1 ;; Copyright (C) 2016-2020 Free Software Foundation, Inc.
3 ;; This file is free software; you can redistribute it and/or modify it under
4 ;; the terms of the GNU General Public License as published by the Free
5 ;; Software Foundation; either version 3 of the License, or (at your option)
8 ;; This file is distributed in the hope that it will be useful, but WITHOUT
9 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 ;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 ;; You should have received a copy of the GNU General Public License
14 ;; along with GCC; see the file COPYING3. If not see
15 ;; <http://www.gnu.org/licenses/>.
17 ;; {{{ Vector iterators
19 ; Vector modes for sub-dword modes
20 (define_mode_iterator VEC_SUBDWORD_MODE
23 ; Vector modes for one vector register
24 (define_mode_iterator VEC_1REG_MODE
26 (define_mode_iterator VEC_1REG_ALT
28 (define_mode_iterator VEC_ALL1REG_MODE
29 [V64QI V64HI V64SI V64HF V64SF])
31 (define_mode_iterator VEC_1REG_INT_MODE
33 (define_mode_iterator VEC_ALL1REG_INT_MODE
35 (define_mode_iterator VEC_ALL1REG_INT_ALT
38 ; Vector modes for two vector registers
39 (define_mode_iterator VEC_2REG_MODE
43 (define_mode_iterator VEC_REG_MODE
44 [V64SI V64HF V64SF ; Single reg
45 V64DI V64DF]) ; Double reg
46 (define_mode_iterator VEC_ALLREG_MODE
47 [V64QI V64HI V64SI V64HF V64SF ; Single reg
48 V64DI V64DF]) ; Double reg
49 (define_mode_iterator VEC_ALLREG_ALT
50 [V64QI V64HI V64SI V64HF V64SF ; Single reg
51 V64DI V64DF]) ; Double reg
52 (define_mode_iterator VEC_ALLREG_INT_MODE
53 [V64QI V64HI V64SI ; Single reg
56 (define_mode_attr scalar_mode
57 [(V64QI "qi") (V64HI "hi") (V64SI "si")
58 (V64HF "hf") (V64SF "sf") (V64DI "di") (V64DF "df")])
60 (define_mode_attr SCALAR_MODE
61 [(V64QI "QI") (V64HI "HI") (V64SI "SI")
62 (V64HF "HF") (V64SF "SF") (V64DI "DI") (V64DF "DF")])
64 (define_mode_attr sdwa [(V64QI "BYTE_0") (V64HI "WORD_0") (V64SI "DWORD")])
69 (define_subst_attr "exec" "vec_merge"
71 (define_subst_attr "exec_clobber" "vec_merge_with_clobber"
73 (define_subst_attr "exec_vcc" "vec_merge_with_vcc"
75 (define_subst_attr "exec_scatter" "scatter_store"
78 (define_subst "vec_merge"
79 [(set (match_operand:VEC_ALLREG_MODE 0)
80 (match_operand:VEC_ALLREG_MODE 1))]
83 (vec_merge:VEC_ALLREG_MODE
85 (match_operand:VEC_ALLREG_MODE 3
86 "gcn_register_or_unspec_operand" "U0")
87 (match_operand:DI 4 "gcn_exec_reg_operand" "e")))])
89 (define_subst "vec_merge_with_clobber"
90 [(set (match_operand:VEC_ALLREG_MODE 0)
91 (match_operand:VEC_ALLREG_MODE 1))
92 (clobber (match_operand 2))]
95 (vec_merge:VEC_ALLREG_MODE
97 (match_operand:VEC_ALLREG_MODE 3
98 "gcn_register_or_unspec_operand" "U0")
99 (match_operand:DI 4 "gcn_exec_reg_operand" "e")))
100 (clobber (match_dup 2))])
102 (define_subst "vec_merge_with_vcc"
103 [(set (match_operand:VEC_ALLREG_MODE 0)
104 (match_operand:VEC_ALLREG_MODE 1))
105 (set (match_operand:DI 2)
106 (match_operand:DI 3))]
110 (vec_merge:VEC_ALLREG_MODE
112 (match_operand:VEC_ALLREG_MODE 4
113 "gcn_register_or_unspec_operand" "U0")
114 (match_operand:DI 5 "gcn_exec_reg_operand" "e")))
116 (and:DI (match_dup 3)
117 (reg:DI EXEC_REG)))])])
119 (define_subst "scatter_store"
120 [(set (mem:BLK (scratch))
128 [(set (mem:BLK (scratch))
134 (match_operand:DI 4 "gcn_exec_reg_operand" "e")]
140 ; This is the entry point for all vector register moves. Memory accesses can
141 ; come this way also, but will more usually use the reload_in/out,
142 ; gather/scatter, maskload/store, etc.
144 (define_expand "mov<mode>"
145 [(set (match_operand:VEC_ALLREG_MODE 0 "nonimmediate_operand")
146 (match_operand:VEC_ALLREG_MODE 1 "general_operand"))]
149 if (MEM_P (operands[0]) && !lra_in_progress && !reload_completed)
151 operands[1] = force_reg (<MODE>mode, operands[1]);
152 rtx scratch = gen_rtx_SCRATCH (V64DImode);
153 rtx a = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
154 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
155 rtx expr = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
158 emit_insn (gen_scatter<mode>_expr (expr, operands[1], a, v));
161 else if (MEM_P (operands[1]) && !lra_in_progress && !reload_completed)
163 rtx scratch = gen_rtx_SCRATCH (V64DImode);
164 rtx a = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
165 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
166 rtx expr = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
169 emit_insn (gen_gather<mode>_expr (operands[0], expr, a, v));
172 else if ((MEM_P (operands[0]) || MEM_P (operands[1])))
174 gcc_assert (!reload_completed);
175 rtx scratch = gen_reg_rtx (V64DImode);
176 emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], scratch));
181 ; A pseudo instruction that helps LRA use the "U0" constraint.
183 (define_insn "mov<mode>_unspec"
184 [(set (match_operand:VEC_ALLREG_MODE 0 "nonimmediate_operand" "=v")
185 (match_operand:VEC_ALLREG_MODE 1 "gcn_unspec_operand" " U"))]
188 [(set_attr "type" "unknown")
189 (set_attr "length" "0")])
191 (define_insn "*mov<mode>"
192 [(set (match_operand:VEC_ALL1REG_MODE 0 "nonimmediate_operand" "=v,v")
193 (match_operand:VEC_ALL1REG_MODE 1 "general_operand" "vA,B"))]
196 [(set_attr "type" "vop1,vop1")
197 (set_attr "length" "4,8")])
199 (define_insn "mov<mode>_exec"
200 [(set (match_operand:VEC_ALL1REG_MODE 0 "nonimmediate_operand"
202 (vec_merge:VEC_ALL1REG_MODE
203 (match_operand:VEC_ALL1REG_MODE 1 "general_operand"
205 (match_operand:VEC_ALL1REG_MODE 3 "gcn_alu_or_unspec_operand"
207 (match_operand:DI 2 "register_operand" " e, e,cV,Sv, e, e")))
208 (clobber (match_scratch:V64DI 4 "=X, X, X, X,&v,&v"))]
209 "!MEM_P (operands[0]) || REG_P (operands[1])"
213 v_cndmask_b32\t%0, %3, %1, vcc
214 v_cndmask_b32\t%0, %3, %1, %2
217 [(set_attr "type" "vop1,vop1,vop2,vop3a,*,*")
218 (set_attr "length" "4,8,4,8,16,16")])
220 ; This variant does not accept an unspec, but does permit MEM
221 ; read/modify/write which is necessary for maskstore.
223 ;(define_insn "*mov<mode>_exec_match"
224 ; [(set (match_operand:VEC_ALL1REG_MODE 0 "nonimmediate_operand"
226 ; (vec_merge:VEC_ALL1REG_MODE
227 ; (match_operand:VEC_ALL1REG_MODE 1 "general_operand" "vA,B, m, v")
229 ; (match_operand:DI 2 "gcn_exec_reg_operand" " e,e, e, e")))
230 ; (clobber (match_scratch:V64DI 3 "=X,X,&v,&v"))]
231 ; "!MEM_P (operands[0]) || REG_P (operands[1])"
237 ; [(set_attr "type" "vop1,vop1,*,*")
238 ; (set_attr "length" "4,8,16,16")])
240 (define_insn "*mov<mode>"
241 [(set (match_operand:VEC_2REG_MODE 0 "nonimmediate_operand" "=v")
242 (match_operand:VEC_2REG_MODE 1 "general_operand" "vDB"))]
245 if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1]))
246 return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1";
248 return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1";
250 [(set_attr "type" "vmult")
251 (set_attr "length" "16")])
253 (define_insn "mov<mode>_exec"
254 [(set (match_operand:VEC_2REG_MODE 0 "nonimmediate_operand"
256 (vec_merge:VEC_2REG_MODE
257 (match_operand:VEC_2REG_MODE 1 "general_operand"
259 (match_operand:VEC_2REG_MODE 3 "gcn_alu_or_unspec_operand"
260 " U0,vDA0,vDA0,U0,U0")
261 (match_operand:DI 2 "register_operand" " e, cV, Sv, e, e")))
262 (clobber (match_scratch:V64DI 4 "= X, X, X,&v,&v"))]
263 "!MEM_P (operands[0]) || REG_P (operands[1])"
265 if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1]))
266 switch (which_alternative)
269 return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1";
271 return "v_cndmask_b32\t%L0, %L3, %L1, vcc\;"
272 "v_cndmask_b32\t%H0, %H3, %H1, vcc";
274 return "v_cndmask_b32\t%L0, %L3, %L1, %2\;"
275 "v_cndmask_b32\t%H0, %H3, %H1, %2";
278 switch (which_alternative)
281 return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1";
283 return "v_cndmask_b32\t%H0, %H3, %H1, vcc\;"
284 "v_cndmask_b32\t%L0, %L3, %L1, vcc";
286 return "v_cndmask_b32\t%H0, %H3, %H1, %2\;"
287 "v_cndmask_b32\t%L0, %L3, %L1, %2";
292 [(set_attr "type" "vmult,vmult,vmult,*,*")
293 (set_attr "length" "16,16,16,16,16")])
295 ; This variant does not accept an unspec, but does permit MEM
296 ; read/modify/write which is necessary for maskstore.
298 ;(define_insn "*mov<mode>_exec_match"
299 ; [(set (match_operand:VEC_2REG_MODE 0 "nonimmediate_operand" "=v, v, m")
300 ; (vec_merge:VEC_2REG_MODE
301 ; (match_operand:VEC_2REG_MODE 1 "general_operand" "vDB, m, v")
303 ; (match_operand:DI 2 "gcn_exec_reg_operand" " e, e, e")))
304 ; (clobber (match_scratch:V64DI 3 "=X,&v,&v"))]
305 ; "!MEM_P (operands[0]) || REG_P (operands[1])"
307 ; * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \
308 ; return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \
310 ; return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\";
313 ; [(set_attr "type" "vmult,*,*")
314 ; (set_attr "length" "16,16,16")])
316 ; A SGPR-base load looks like:
319 ; There's no hardware instruction that corresponds to this, but vector base
320 ; addresses are placed in an SGPR because it is easier to add to a vector.
321 ; We also have a temporary vT, and the vector v1 holding numbered lanes.
324 ; vT = v1 << log2(element-size)
328 (define_insn "mov<mode>_sgprbase"
329 [(set (match_operand:VEC_ALL1REG_MODE 0 "nonimmediate_operand"
331 (unspec:VEC_ALL1REG_MODE
332 [(match_operand:VEC_ALL1REG_MODE 1 "general_operand"
335 (clobber (match_operand:V64DI 2 "register_operand" "=&v,&v,&v,&v"))]
336 "lra_in_progress || reload_completed"
342 [(set_attr "type" "vop1,vop1,*,*")
343 (set_attr "length" "4,8,12,12")])
345 (define_insn "mov<mode>_sgprbase"
346 [(set (match_operand:VEC_2REG_MODE 0 "nonimmediate_operand" "= v, v, m")
347 (unspec:VEC_2REG_MODE
348 [(match_operand:VEC_2REG_MODE 1 "general_operand" "vDB, m, v")]
350 (clobber (match_operand:V64DI 2 "register_operand" "=&v,&v,&v"))]
351 "lra_in_progress || reload_completed"
353 * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \
354 return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \
356 return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\";
359 [(set_attr "type" "vmult,*,*")
360 (set_attr "length" "8,12,12")])
362 ; reload_in was once a standard name, but here it's only referenced by
363 ; gcn_secondary_reload. It allows a reload with a scratch register.
365 (define_expand "reload_in<mode>"
366 [(set (match_operand:VEC_ALLREG_MODE 0 "register_operand" "= v")
367 (match_operand:VEC_ALLREG_MODE 1 "memory_operand" " m"))
368 (clobber (match_operand:V64DI 2 "register_operand" "=&v"))]
371 emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], operands[2]));
375 ; reload_out is similar to reload_in, above.
377 (define_expand "reload_out<mode>"
378 [(set (match_operand:VEC_ALLREG_MODE 0 "memory_operand" "= m")
379 (match_operand:VEC_ALLREG_MODE 1 "register_operand" " v"))
380 (clobber (match_operand:V64DI 2 "register_operand" "=&v"))]
383 emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], operands[2]));
387 ; Expand scalar addresses into gather/scatter patterns
390 [(set (match_operand:VEC_ALLREG_MODE 0 "memory_operand")
391 (unspec:VEC_ALLREG_MODE
392 [(match_operand:VEC_ALLREG_MODE 1 "general_operand")]
394 (clobber (match_scratch:V64DI 2))]
396 [(set (mem:BLK (scratch))
397 (unspec:BLK [(match_dup 5) (match_dup 1) (match_dup 6) (match_dup 7)]
400 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
403 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
404 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
408 [(set (match_operand:VEC_ALLREG_MODE 0 "memory_operand")
409 (vec_merge:VEC_ALLREG_MODE
410 (match_operand:VEC_ALLREG_MODE 1 "general_operand")
411 (match_operand:VEC_ALLREG_MODE 2 "")
412 (match_operand:DI 3 "gcn_exec_reg_operand")))
413 (clobber (match_scratch:V64DI 4))]
415 [(set (mem:BLK (scratch))
416 (unspec:BLK [(match_dup 5) (match_dup 1)
417 (match_dup 6) (match_dup 7) (match_dup 3)]
420 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode,
424 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
425 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
429 [(set (match_operand:VEC_ALLREG_MODE 0 "nonimmediate_operand")
430 (unspec:VEC_ALLREG_MODE
431 [(match_operand:VEC_ALLREG_MODE 1 "memory_operand")]
433 (clobber (match_scratch:V64DI 2))]
436 (unspec:VEC_ALLREG_MODE [(match_dup 5) (match_dup 6) (match_dup 7)
440 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
443 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
444 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
448 [(set (match_operand:VEC_ALLREG_MODE 0 "nonimmediate_operand")
449 (vec_merge:VEC_ALLREG_MODE
450 (match_operand:VEC_ALLREG_MODE 1 "memory_operand")
451 (match_operand:VEC_ALLREG_MODE 2 "")
452 (match_operand:DI 3 "gcn_exec_reg_operand")))
453 (clobber (match_scratch:V64DI 4))]
456 (vec_merge:VEC_ALLREG_MODE
457 (unspec:VEC_ALLREG_MODE [(match_dup 5) (match_dup 6) (match_dup 7)
463 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode,
467 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
468 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
471 ; TODO: Add zero/sign extending variants.
476 ; v_writelane and v_readlane work regardless of exec flags.
477 ; We allow source to be scratch.
479 ; FIXME these should take A immediates
481 (define_insn "*vec_set<mode>"
482 [(set (match_operand:VEC_ALL1REG_MODE 0 "register_operand" "= v")
483 (vec_merge:VEC_ALL1REG_MODE
484 (vec_duplicate:VEC_ALL1REG_MODE
485 (match_operand:<SCALAR_MODE> 1 "register_operand" " Sv"))
486 (match_operand:VEC_ALL1REG_MODE 3 "gcn_register_or_unspec_operand"
488 (ashift (const_int 1)
489 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
491 "v_writelane_b32 %0, %1, %2"
492 [(set_attr "type" "vop3a")
493 (set_attr "length" "8")
494 (set_attr "exec" "none")
495 (set_attr "laneselect" "yes")])
497 ; FIXME: 64bit operations really should be splitters, but I am not sure how
498 ; to represent vertical subregs.
499 (define_insn "*vec_set<mode>"
500 [(set (match_operand:VEC_2REG_MODE 0 "register_operand" "= v")
501 (vec_merge:VEC_2REG_MODE
502 (vec_duplicate:VEC_2REG_MODE
503 (match_operand:<SCALAR_MODE> 1 "register_operand" " Sv"))
504 (match_operand:VEC_2REG_MODE 3 "gcn_register_or_unspec_operand"
506 (ashift (const_int 1)
507 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
509 "v_writelane_b32 %L0, %L1, %2\;v_writelane_b32 %H0, %H1, %2"
510 [(set_attr "type" "vmult")
511 (set_attr "length" "16")
512 (set_attr "exec" "none")
513 (set_attr "laneselect" "yes")])
515 (define_expand "vec_set<mode>"
516 [(set (match_operand:VEC_ALLREG_MODE 0 "register_operand")
517 (vec_merge:VEC_ALLREG_MODE
518 (vec_duplicate:VEC_ALLREG_MODE
519 (match_operand:<SCALAR_MODE> 1 "register_operand"))
521 (ashift (const_int 1) (match_operand:SI 2 "gcn_alu_operand"))))]
524 (define_insn "*vec_set<mode>_1"
525 [(set (match_operand:VEC_ALL1REG_MODE 0 "register_operand" "=v")
526 (vec_merge:VEC_ALL1REG_MODE
527 (vec_duplicate:VEC_ALL1REG_MODE
528 (match_operand:<SCALAR_MODE> 1 "register_operand" "Sv"))
529 (match_operand:VEC_ALL1REG_MODE 3 "gcn_register_or_unspec_operand"
531 (match_operand:SI 2 "const_int_operand" " i")))]
532 "((unsigned) exact_log2 (INTVAL (operands[2])) < 64)"
534 operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));
535 return "v_writelane_b32 %0, %1, %2";
537 [(set_attr "type" "vop3a")
538 (set_attr "length" "8")
539 (set_attr "exec" "none")
540 (set_attr "laneselect" "yes")])
542 (define_insn "*vec_set<mode>_1"
543 [(set (match_operand:VEC_2REG_MODE 0 "register_operand" "=v")
544 (vec_merge:VEC_2REG_MODE
545 (vec_duplicate:VEC_2REG_MODE
546 (match_operand:<SCALAR_MODE> 1 "register_operand" "Sv"))
547 (match_operand:VEC_2REG_MODE 3 "gcn_register_or_unspec_operand"
549 (match_operand:SI 2 "const_int_operand" " i")))]
550 "((unsigned) exact_log2 (INTVAL (operands[2])) < 64)"
552 operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));
553 return "v_writelane_b32 %L0, %L1, %2\;v_writelane_b32 %H0, %H1, %2";
555 [(set_attr "type" "vmult")
556 (set_attr "length" "16")
557 (set_attr "exec" "none")
558 (set_attr "laneselect" "yes")])
560 (define_insn "vec_duplicate<mode><exec>"
561 [(set (match_operand:VEC_ALL1REG_MODE 0 "register_operand" "=v")
562 (vec_duplicate:VEC_ALL1REG_MODE
563 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand" "SvB")))]
566 [(set_attr "type" "vop3a")
567 (set_attr "length" "8")])
569 (define_insn "vec_duplicate<mode><exec>"
570 [(set (match_operand:VEC_2REG_MODE 0 "register_operand" "= v")
571 (vec_duplicate:VEC_2REG_MODE
572 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand" "SvDB")))]
574 "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1"
575 [(set_attr "type" "vop3a")
576 (set_attr "length" "16")])
578 (define_insn "vec_extract<mode><scalar_mode>"
579 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg")
580 (vec_select:<SCALAR_MODE>
581 (match_operand:VEC_ALL1REG_MODE 1 "register_operand" " v")
582 (parallel [(match_operand:SI 2 "gcn_alu_operand" "SvB")])))]
584 "v_readlane_b32 %0, %1, %2"
585 [(set_attr "type" "vop3a")
586 (set_attr "length" "8")
587 (set_attr "exec" "none")
588 (set_attr "laneselect" "yes")])
590 (define_insn "vec_extract<mode><scalar_mode>"
591 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=&Sg")
592 (vec_select:<SCALAR_MODE>
593 (match_operand:VEC_2REG_MODE 1 "register_operand" " v")
594 (parallel [(match_operand:SI 2 "gcn_alu_operand" " SvB")])))]
596 "v_readlane_b32 %L0, %L1, %2\;v_readlane_b32 %H0, %H1, %2"
597 [(set_attr "type" "vmult")
598 (set_attr "length" "16")
599 (set_attr "exec" "none")
600 (set_attr "laneselect" "yes")])
602 (define_expand "extract_last_<mode>"
603 [(match_operand:<SCALAR_MODE> 0 "register_operand")
604 (match_operand:DI 1 "gcn_alu_operand")
605 (match_operand:VEC_ALLREG_MODE 2 "register_operand")]
606 "can_create_pseudo_p ()"
608 rtx dst = operands[0];
609 rtx mask = operands[1];
610 rtx vect = operands[2];
611 rtx tmpreg = gen_reg_rtx (SImode);
613 emit_insn (gen_clzdi2 (tmpreg, mask));
614 emit_insn (gen_subsi3 (tmpreg, GEN_INT (63), tmpreg));
615 emit_insn (gen_vec_extract<mode><scalar_mode> (dst, vect, tmpreg));
619 (define_expand "fold_extract_last_<mode>"
620 [(match_operand:<SCALAR_MODE> 0 "register_operand")
621 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand")
622 (match_operand:DI 2 "gcn_alu_operand")
623 (match_operand:VEC_ALLREG_MODE 3 "register_operand")]
624 "can_create_pseudo_p ()"
626 rtx dst = operands[0];
627 rtx default_value = operands[1];
628 rtx mask = operands[2];
629 rtx vect = operands[3];
630 rtx else_label = gen_label_rtx ();
631 rtx end_label = gen_label_rtx ();
633 rtx cond = gen_rtx_EQ (VOIDmode, mask, const0_rtx);
634 emit_jump_insn (gen_cbranchdi4 (cond, mask, const0_rtx, else_label));
635 emit_insn (gen_extract_last_<mode> (dst, mask, vect));
636 emit_jump_insn (gen_jump (end_label));
638 emit_label (else_label);
639 emit_move_insn (dst, default_value);
640 emit_label (end_label);
644 (define_expand "vec_init<mode><scalar_mode>"
645 [(match_operand:VEC_ALLREG_MODE 0 "register_operand")
649 gcn_expand_vector_init (operands[0], operands[1]);
654 ;; {{{ Scatter / Gather
656 ;; GCN does not have an instruction for loading a vector from contiguous
657 ;; memory so *all* loads and stores are eventually converted to scatter
660 ;; GCC does not permit MEM to hold vectors of addresses, so we must use an
661 ;; unspec. The unspec formats are as follows:
664 ;; [(<address expression>)
667 ;; (mem:BLK (scratch))]
671 ;; [(<address expression>)
672 ;; (<source register>)
678 ;; - Loads are expected to be wrapped in a vec_merge, so do not need <exec>.
679 ;; - The mem:BLK does not contain any real information, but indicates that an
680 ;; unknown memory read is taking place. Stores are expected to use a similar
681 ;; mem:BLK outside the unspec.
682 ;; - The address space and glc (volatile) fields are there to replace the
683 ;; fields normally found in a MEM.
684 ;; - Multiple forms of address expression are supported, below.
686 (define_expand "gather_load<mode>v64si"
687 [(match_operand:VEC_ALLREG_MODE 0 "register_operand")
688 (match_operand:DI 1 "register_operand")
689 (match_operand:V64SI 2 "register_operand")
690 (match_operand 3 "immediate_operand")
691 (match_operand:SI 4 "gcn_alu_operand")]
694 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1],
695 operands[2], operands[4],
696 INTVAL (operands[3]), NULL);
698 if (GET_MODE (addr) == V64DImode)
699 emit_insn (gen_gather<mode>_insn_1offset (operands[0], addr, const0_rtx,
700 const0_rtx, const0_rtx));
702 emit_insn (gen_gather<mode>_insn_2offsets (operands[0], operands[1],
703 addr, const0_rtx, const0_rtx,
708 ; Allow any address expression
709 (define_expand "gather<mode>_expr<exec>"
710 [(set (match_operand:VEC_ALLREG_MODE 0 "register_operand")
711 (unspec:VEC_ALLREG_MODE
712 [(match_operand 1 "")
713 (match_operand 2 "immediate_operand")
714 (match_operand 3 "immediate_operand")
720 (define_insn "gather<mode>_insn_1offset<exec>"
721 [(set (match_operand:VEC_ALLREG_MODE 0 "register_operand" "=v")
722 (unspec:VEC_ALLREG_MODE
723 [(plus:V64DI (match_operand:V64DI 1 "register_operand" " v")
725 (match_operand 2 "immediate_operand" " n")))
726 (match_operand 3 "immediate_operand" " n")
727 (match_operand 4 "immediate_operand" " n")
730 "(AS_FLAT_P (INTVAL (operands[3]))
731 && ((TARGET_GCN3 && INTVAL(operands[2]) == 0)
732 || ((unsigned HOST_WIDE_INT)INTVAL(operands[2]) < 0x1000)))
733 || (AS_GLOBAL_P (INTVAL (operands[3]))
734 && (((unsigned HOST_WIDE_INT)INTVAL(operands[2]) + 0x1000) < 0x2000))"
736 addr_space_t as = INTVAL (operands[3]);
737 const char *glc = INTVAL (operands[4]) ? " glc" : "";
739 static char buf[200];
742 if (TARGET_GCN5_PLUS)
743 sprintf (buf, "flat_load%%o0\t%%0, %%1 offset:%%2%s\;s_waitcnt\t0",
746 sprintf (buf, "flat_load%%o0\t%%0, %%1%s\;s_waitcnt\t0", glc);
748 else if (AS_GLOBAL_P (as))
749 sprintf (buf, "global_load%%o0\t%%0, %%1, off offset:%%2%s\;"
750 "s_waitcnt\tvmcnt(0)", glc);
756 [(set_attr "type" "flat")
757 (set_attr "length" "12")])
759 (define_insn "gather<mode>_insn_1offset_ds<exec>"
760 [(set (match_operand:VEC_ALLREG_MODE 0 "register_operand" "=v")
761 (unspec:VEC_ALLREG_MODE
762 [(plus:V64SI (match_operand:V64SI 1 "register_operand" " v")
764 (match_operand 2 "immediate_operand" " n")))
765 (match_operand 3 "immediate_operand" " n")
766 (match_operand 4 "immediate_operand" " n")
769 "(AS_ANY_DS_P (INTVAL (operands[3]))
770 && ((unsigned HOST_WIDE_INT)INTVAL(operands[2]) < 0x10000))"
772 addr_space_t as = INTVAL (operands[3]);
773 static char buf[200];
774 sprintf (buf, "ds_read%%b0\t%%0, %%1 offset:%%2%s\;s_waitcnt\tlgkmcnt(0)",
775 (AS_GDS_P (as) ? " gds" : ""));
778 [(set_attr "type" "ds")
779 (set_attr "length" "12")])
781 (define_insn "gather<mode>_insn_2offsets<exec>"
782 [(set (match_operand:VEC_ALLREG_MODE 0 "register_operand" "=v")
783 (unspec:VEC_ALLREG_MODE
787 (match_operand:DI 1 "register_operand" "Sv"))
789 (match_operand:V64SI 2 "register_operand" " v")))
790 (vec_duplicate:V64DI (match_operand 3 "immediate_operand" " n")))
791 (match_operand 4 "immediate_operand" " n")
792 (match_operand 5 "immediate_operand" " n")
795 "(AS_GLOBAL_P (INTVAL (operands[4]))
796 && (((unsigned HOST_WIDE_INT)INTVAL(operands[3]) + 0x1000) < 0x2000))"
798 addr_space_t as = INTVAL (operands[4]);
799 const char *glc = INTVAL (operands[5]) ? " glc" : "";
801 static char buf[200];
802 if (AS_GLOBAL_P (as))
804 /* Work around assembler bug in which a 64-bit register is expected,
805 but a 32-bit value would be correct. */
806 int reg = REGNO (operands[2]) - FIRST_VGPR_REG;
807 sprintf (buf, "global_load%%o0\t%%0, v[%d:%d], %%1 offset:%%3%s\;"
808 "s_waitcnt\tvmcnt(0)", reg, reg + 1, glc);
815 [(set_attr "type" "flat")
816 (set_attr "length" "12")])
818 (define_expand "scatter_store<mode>v64si"
819 [(match_operand:DI 0 "register_operand")
820 (match_operand:V64SI 1 "register_operand")
821 (match_operand 2 "immediate_operand")
822 (match_operand:SI 3 "gcn_alu_operand")
823 (match_operand:VEC_ALLREG_MODE 4 "register_operand")]
826 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0],
827 operands[1], operands[3],
828 INTVAL (operands[2]), NULL);
830 if (GET_MODE (addr) == V64DImode)
831 emit_insn (gen_scatter<mode>_insn_1offset (addr, const0_rtx, operands[4],
832 const0_rtx, const0_rtx));
834 emit_insn (gen_scatter<mode>_insn_2offsets (operands[0], addr,
835 const0_rtx, operands[4],
836 const0_rtx, const0_rtx));
840 ; Allow any address expression
841 (define_expand "scatter<mode>_expr<exec_scatter>"
842 [(set (mem:BLK (scratch))
844 [(match_operand:V64DI 0 "")
845 (match_operand:VEC_ALLREG_MODE 1 "register_operand")
846 (match_operand 2 "immediate_operand")
847 (match_operand 3 "immediate_operand")]
852 (define_insn "scatter<mode>_insn_1offset<exec_scatter>"
853 [(set (mem:BLK (scratch))
855 [(plus:V64DI (match_operand:V64DI 0 "register_operand" "v")
857 (match_operand 1 "immediate_operand" "n")))
858 (match_operand:VEC_ALLREG_MODE 2 "register_operand" "v")
859 (match_operand 3 "immediate_operand" "n")
860 (match_operand 4 "immediate_operand" "n")]
862 "(AS_FLAT_P (INTVAL (operands[3]))
863 && (INTVAL(operands[1]) == 0
865 && (unsigned HOST_WIDE_INT)INTVAL(operands[1]) < 0x1000)))
866 || (AS_GLOBAL_P (INTVAL (operands[3]))
867 && (((unsigned HOST_WIDE_INT)INTVAL(operands[1]) + 0x1000) < 0x2000))"
869 addr_space_t as = INTVAL (operands[3]);
870 const char *glc = INTVAL (operands[4]) ? " glc" : "";
872 static char buf[200];
875 if (TARGET_GCN5_PLUS)
876 sprintf (buf, "flat_store%%s2\t%%0, %%2 offset:%%1%s", glc);
878 sprintf (buf, "flat_store%%s2\t%%0, %%2%s", glc);
880 else if (AS_GLOBAL_P (as))
881 sprintf (buf, "global_store%%s2\t%%0, %%2, off offset:%%1%s", glc);
887 [(set_attr "type" "flat")
888 (set_attr "length" "12")])
890 (define_insn "scatter<mode>_insn_1offset_ds<exec_scatter>"
891 [(set (mem:BLK (scratch))
893 [(plus:V64SI (match_operand:V64SI 0 "register_operand" "v")
895 (match_operand 1 "immediate_operand" "n")))
896 (match_operand:VEC_ALLREG_MODE 2 "register_operand" "v")
897 (match_operand 3 "immediate_operand" "n")
898 (match_operand 4 "immediate_operand" "n")]
900 "(AS_ANY_DS_P (INTVAL (operands[3]))
901 && ((unsigned HOST_WIDE_INT)INTVAL(operands[1]) < 0x10000))"
903 addr_space_t as = INTVAL (operands[3]);
904 static char buf[200];
905 sprintf (buf, "ds_write%%b2\t%%0, %%2 offset:%%1%s",
906 (AS_GDS_P (as) ? " gds" : ""));
909 [(set_attr "type" "ds")
910 (set_attr "length" "12")])
912 (define_insn "scatter<mode>_insn_2offsets<exec_scatter>"
913 [(set (mem:BLK (scratch))
918 (match_operand:DI 0 "register_operand" "Sv"))
920 (match_operand:V64SI 1 "register_operand" " v")))
921 (vec_duplicate:V64DI (match_operand 2 "immediate_operand"
923 (match_operand:VEC_ALLREG_MODE 3 "register_operand" " v")
924 (match_operand 4 "immediate_operand" " n")
925 (match_operand 5 "immediate_operand" " n")]
927 "(AS_GLOBAL_P (INTVAL (operands[4]))
928 && (((unsigned HOST_WIDE_INT)INTVAL(operands[2]) + 0x1000) < 0x2000))"
930 addr_space_t as = INTVAL (operands[4]);
931 const char *glc = INTVAL (operands[5]) ? " glc" : "";
933 static char buf[200];
934 if (AS_GLOBAL_P (as))
936 /* Work around assembler bug in which a 64-bit register is expected,
937 but a 32-bit value would be correct. */
938 int reg = REGNO (operands[1]) - FIRST_VGPR_REG;
939 sprintf (buf, "global_store%%s3\tv[%d:%d], %%3, %%0 offset:%%2%s",
947 [(set_attr "type" "flat")
948 (set_attr "length" "12")])
953 (define_insn "ds_bpermute<mode>"
954 [(set (match_operand:VEC_ALL1REG_MODE 0 "register_operand" "=v")
955 (unspec:VEC_ALL1REG_MODE
956 [(match_operand:VEC_ALL1REG_MODE 2 "register_operand" " v")
957 (match_operand:V64SI 1 "register_operand" " v")
958 (match_operand:DI 3 "gcn_exec_reg_operand" " e")]
961 "ds_bpermute_b32\t%0, %1, %2\;s_waitcnt\tlgkmcnt(0)"
962 [(set_attr "type" "vop2")
963 (set_attr "length" "12")])
965 (define_insn_and_split "ds_bpermute<mode>"
966 [(set (match_operand:VEC_2REG_MODE 0 "register_operand" "=&v")
967 (unspec:VEC_2REG_MODE
968 [(match_operand:VEC_2REG_MODE 2 "register_operand" " v0")
969 (match_operand:V64SI 1 "register_operand" " v")
970 (match_operand:DI 3 "gcn_exec_reg_operand" " e")]
975 [(set (match_dup 4) (unspec:V64SI [(match_dup 6) (match_dup 1) (match_dup 3)]
977 (set (match_dup 5) (unspec:V64SI [(match_dup 7) (match_dup 1) (match_dup 3)]
980 operands[4] = gcn_operand_part (<MODE>mode, operands[0], 0);
981 operands[5] = gcn_operand_part (<MODE>mode, operands[0], 1);
982 operands[6] = gcn_operand_part (<MODE>mode, operands[2], 0);
983 operands[7] = gcn_operand_part (<MODE>mode, operands[2], 1);
985 [(set_attr "type" "vmult")
986 (set_attr "length" "24")])
989 ;; {{{ ALU special case: add/sub
991 (define_insn "add<mode>3<exec_clobber>"
992 [(set (match_operand:VEC_ALL1REG_INT_MODE 0 "register_operand" "= v")
993 (plus:VEC_ALL1REG_INT_MODE
994 (match_operand:VEC_ALL1REG_INT_MODE 1 "register_operand" "% v")
995 (match_operand:VEC_ALL1REG_INT_MODE 2 "gcn_alu_operand" "vSvB")))
996 (clobber (reg:DI VCC_REG))]
998 "v_add%^_u32\t%0, vcc, %2, %1"
999 [(set_attr "type" "vop2")
1000 (set_attr "length" "8")])
1002 (define_insn "add<mode>3_dup<exec_clobber>"
1003 [(set (match_operand:VEC_ALL1REG_INT_MODE 0 "register_operand" "= v")
1004 (plus:VEC_ALL1REG_INT_MODE
1005 (vec_duplicate:VEC_ALL1REG_INT_MODE
1006 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand" "SvB"))
1007 (match_operand:VEC_ALL1REG_INT_MODE 1 "register_operand" " v")))
1008 (clobber (reg:DI VCC_REG))]
1010 "v_add%^_u32\t%0, vcc, %2, %1"
1011 [(set_attr "type" "vop2")
1012 (set_attr "length" "8")])
1014 (define_insn "addv64si3_vcc<exec_vcc>"
1015 [(set (match_operand:V64SI 0 "register_operand" "= v, v")
1017 (match_operand:V64SI 1 "register_operand" "% v, v")
1018 (match_operand:V64SI 2 "gcn_alu_operand" "vSvB,vSvB")))
1019 (set (match_operand:DI 3 "register_operand" "= cV, Sg")
1020 (ltu:DI (plus:V64SI (match_dup 1) (match_dup 2))
1023 "v_add%^_u32\t%0, %3, %2, %1"
1024 [(set_attr "type" "vop2,vop3b")
1025 (set_attr "length" "8")])
1027 ; This pattern only changes the VCC bits when the corresponding lane is
1028 ; enabled, so the set must be described as an ior.
1030 (define_insn "addv64si3_vcc_dup<exec_vcc>"
1031 [(set (match_operand:V64SI 0 "register_operand" "= v, v")
1033 (vec_duplicate:V64SI
1034 (match_operand:SI 1 "gcn_alu_operand" "SvB,SvB"))
1035 (match_operand:V64SI 2 "register_operand" " v, v")))
1036 (set (match_operand:DI 3 "register_operand" "=cV, Sg")
1037 (ltu:DI (plus:V64SI (vec_duplicate:V64SI (match_dup 2))
1039 (vec_duplicate:V64SI (match_dup 2))))]
1041 "v_add%^_u32\t%0, %3, %2, %1"
1042 [(set_attr "type" "vop2,vop3b")
1043 (set_attr "length" "8,8")])
1045 ; v_addc does not accept an SGPR because the VCC read already counts as an
1046 ; SGPR use and the number of SGPR operands is limited to 1. It does not
1047 ; accept "B" immediate constants due to a related bus conflict.
1049 (define_insn "addcv64si3<exec_vcc>"
1050 [(set (match_operand:V64SI 0 "register_operand" "=v, v")
1054 (vec_duplicate:V64SI (const_int 1))
1055 (vec_duplicate:V64SI (const_int 0))
1056 (match_operand:DI 3 "register_operand" " cV,cVSv"))
1057 (match_operand:V64SI 1 "gcn_alu_operand" "% v, vA"))
1058 (match_operand:V64SI 2 "gcn_alu_operand" " vA, vA")))
1059 (set (match_operand:DI 4 "register_operand" "=cV,cVSg")
1060 (ior:DI (ltu:DI (plus:V64SI
1063 (vec_duplicate:V64SI (const_int 1))
1064 (vec_duplicate:V64SI (const_int 0))
1071 (vec_duplicate:V64SI (const_int 1))
1072 (vec_duplicate:V64SI (const_int 0))
1077 "v_addc%^_u32\t%0, %4, %2, %1, %3"
1078 [(set_attr "type" "vop2,vop3b")
1079 (set_attr "length" "4,8")])
1081 (define_insn "sub<mode>3<exec_clobber>"
1082 [(set (match_operand:VEC_ALL1REG_INT_MODE 0 "register_operand" "= v, v")
1083 (minus:VEC_ALL1REG_INT_MODE
1084 (match_operand:VEC_ALL1REG_INT_MODE 1 "gcn_alu_operand" "vSvB, v")
1085 (match_operand:VEC_ALL1REG_INT_MODE 2 "gcn_alu_operand" " v,vSvB")))
1086 (clobber (reg:DI VCC_REG))]
1089 v_sub%^_u32\t%0, vcc, %1, %2
1090 v_subrev%^_u32\t%0, vcc, %2, %1"
1091 [(set_attr "type" "vop2")
1092 (set_attr "length" "8,8")])
1094 (define_insn "subv64si3_vcc<exec_vcc>"
1095 [(set (match_operand:V64SI 0 "register_operand" "= v, v, v, v")
1097 (match_operand:V64SI 1 "gcn_alu_operand" "vSvB,vSvB, v, v")
1098 (match_operand:V64SI 2 "gcn_alu_operand" " v, v,vSvB,vSvB")))
1099 (set (match_operand:DI 3 "register_operand" "= cV, Sg, cV, Sg")
1100 (gtu:DI (minus:V64SI (match_dup 1) (match_dup 2))
1104 v_sub%^_u32\t%0, %3, %1, %2
1105 v_sub%^_u32\t%0, %3, %1, %2
1106 v_subrev%^_u32\t%0, %3, %2, %1
1107 v_subrev%^_u32\t%0, %3, %2, %1"
1108 [(set_attr "type" "vop2,vop3b,vop2,vop3b")
1109 (set_attr "length" "8")])
1111 ; v_subb does not accept an SGPR because the VCC read already counts as an
1112 ; SGPR use and the number of SGPR operands is limited to 1. It does not
1113 ; accept "B" immediate constants due to a related bus conflict.
1115 (define_insn "subcv64si3<exec_vcc>"
1116 [(set (match_operand:V64SI 0 "register_operand" "= v, v, v, v")
1120 (vec_duplicate:V64SI (const_int 1))
1121 (vec_duplicate:V64SI (const_int 0))
1122 (match_operand:DI 3 "gcn_alu_operand" " cV,cVSv,cV,cVSv"))
1123 (match_operand:V64SI 1 "gcn_alu_operand" " vA, vA, v, vA"))
1124 (match_operand:V64SI 2 "gcn_alu_operand" " v, vA,vA, vA")))
1125 (set (match_operand:DI 4 "register_operand" "=cV,cVSg,cV,cVSg")
1126 (ior:DI (gtu:DI (minus:V64SI (minus:V64SI
1128 (vec_duplicate:V64SI (const_int 1))
1129 (vec_duplicate:V64SI (const_int 0))
1134 (ltu:DI (minus:V64SI (vec_merge:V64SI
1135 (vec_duplicate:V64SI (const_int 1))
1136 (vec_duplicate:V64SI (const_int 0))
1142 v_subb%^_u32\t%0, %4, %1, %2, %3
1143 v_subb%^_u32\t%0, %4, %1, %2, %3
1144 v_subbrev%^_u32\t%0, %4, %2, %1, %3
1145 v_subbrev%^_u32\t%0, %4, %2, %1, %3"
1146 [(set_attr "type" "vop2,vop3b,vop2,vop3b")
1147 (set_attr "length" "4,8,4,8")])
1149 (define_insn_and_split "addv64di3"
1150 [(set (match_operand:V64DI 0 "register_operand" "= v")
1152 (match_operand:V64DI 1 "register_operand" "%vDb")
1153 (match_operand:V64DI 2 "gcn_alu_operand" " vDb")))
1154 (clobber (reg:DI VCC_REG))]
1157 "gcn_can_split_p (V64DImode, operands[0])
1158 && gcn_can_split_p (V64DImode, operands[1])
1159 && gcn_can_split_p (V64DImode, operands[2])"
1162 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1163 emit_insn (gen_addv64si3_vcc
1164 (gcn_operand_part (V64DImode, operands[0], 0),
1165 gcn_operand_part (V64DImode, operands[1], 0),
1166 gcn_operand_part (V64DImode, operands[2], 0),
1168 emit_insn (gen_addcv64si3
1169 (gcn_operand_part (V64DImode, operands[0], 1),
1170 gcn_operand_part (V64DImode, operands[1], 1),
1171 gcn_operand_part (V64DImode, operands[2], 1),
1175 [(set_attr "type" "vmult")
1176 (set_attr "length" "8")])
1178 (define_insn_and_split "addv64di3_exec"
1179 [(set (match_operand:V64DI 0 "register_operand" "= v")
1182 (match_operand:V64DI 1 "register_operand" "%vDb")
1183 (match_operand:V64DI 2 "gcn_alu_operand" " vDb"))
1184 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
1185 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1186 (clobber (reg:DI VCC_REG))]
1189 "gcn_can_split_p (V64DImode, operands[0])
1190 && gcn_can_split_p (V64DImode, operands[1])
1191 && gcn_can_split_p (V64DImode, operands[2])
1192 && gcn_can_split_p (V64DImode, operands[4])"
1195 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1196 emit_insn (gen_addv64si3_vcc_exec
1197 (gcn_operand_part (V64DImode, operands[0], 0),
1198 gcn_operand_part (V64DImode, operands[1], 0),
1199 gcn_operand_part (V64DImode, operands[2], 0),
1201 gcn_operand_part (V64DImode, operands[3], 0),
1203 emit_insn (gen_addcv64si3_exec
1204 (gcn_operand_part (V64DImode, operands[0], 1),
1205 gcn_operand_part (V64DImode, operands[1], 1),
1206 gcn_operand_part (V64DImode, operands[2], 1),
1208 gcn_operand_part (V64DImode, operands[3], 1),
1212 [(set_attr "type" "vmult")
1213 (set_attr "length" "8")])
1215 (define_insn_and_split "subv64di3"
1216 [(set (match_operand:V64DI 0 "register_operand" "= v, v")
1218 (match_operand:V64DI 1 "gcn_alu_operand" "vDb, v")
1219 (match_operand:V64DI 2 "gcn_alu_operand" " v,vDb")))
1220 (clobber (reg:DI VCC_REG))]
1223 "gcn_can_split_p (V64DImode, operands[0])
1224 && gcn_can_split_p (V64DImode, operands[1])
1225 && gcn_can_split_p (V64DImode, operands[2])"
1228 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1229 emit_insn (gen_subv64si3_vcc
1230 (gcn_operand_part (V64DImode, operands[0], 0),
1231 gcn_operand_part (V64DImode, operands[1], 0),
1232 gcn_operand_part (V64DImode, operands[2], 0),
1234 emit_insn (gen_subcv64si3
1235 (gcn_operand_part (V64DImode, operands[0], 1),
1236 gcn_operand_part (V64DImode, operands[1], 1),
1237 gcn_operand_part (V64DImode, operands[2], 1),
1241 [(set_attr "type" "vmult")
1242 (set_attr "length" "8")])
1244 (define_insn_and_split "subv64di3_exec"
1245 [(set (match_operand:V64DI 0 "register_operand" "= v, v")
1248 (match_operand:V64DI 1 "gcn_alu_operand" "vSvB, v")
1249 (match_operand:V64DI 2 "gcn_alu_operand" " v,vSvB"))
1250 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0, U0")
1251 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))
1252 (clobber (reg:DI VCC_REG))]
1253 "register_operand (operands[1], VOIDmode)
1254 || register_operand (operands[2], VOIDmode)"
1256 "gcn_can_split_p (V64DImode, operands[0])
1257 && gcn_can_split_p (V64DImode, operands[1])
1258 && gcn_can_split_p (V64DImode, operands[2])
1259 && gcn_can_split_p (V64DImode, operands[3])"
1262 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1263 emit_insn (gen_subv64si3_vcc_exec
1264 (gcn_operand_part (V64DImode, operands[0], 0),
1265 gcn_operand_part (V64DImode, operands[1], 0),
1266 gcn_operand_part (V64DImode, operands[2], 0),
1268 gcn_operand_part (V64DImode, operands[3], 0),
1270 emit_insn (gen_subcv64si3_exec
1271 (gcn_operand_part (V64DImode, operands[0], 1),
1272 gcn_operand_part (V64DImode, operands[1], 1),
1273 gcn_operand_part (V64DImode, operands[2], 1),
1275 gcn_operand_part (V64DImode, operands[3], 1),
1279 [(set_attr "type" "vmult")
1280 (set_attr "length" "8")])
1282 (define_insn_and_split "addv64di3_zext"
1283 [(set (match_operand:V64DI 0 "register_operand" "= v, v")
1286 (match_operand:V64SI 1 "gcn_alu_operand" " vA, vB"))
1287 (match_operand:V64DI 2 "gcn_alu_operand" "vDb,vDA")))
1288 (clobber (reg:DI VCC_REG))]
1291 "gcn_can_split_p (V64DImode, operands[0])
1292 && gcn_can_split_p (V64DImode, operands[2])"
1295 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1296 emit_insn (gen_addv64si3_vcc
1297 (gcn_operand_part (V64DImode, operands[0], 0),
1299 gcn_operand_part (V64DImode, operands[2], 0),
1301 emit_insn (gen_addcv64si3
1302 (gcn_operand_part (V64DImode, operands[0], 1),
1303 gcn_operand_part (V64DImode, operands[2], 1),
1304 const0_rtx, vcc, vcc));
1307 [(set_attr "type" "vmult")
1308 (set_attr "length" "8")])
1310 (define_insn_and_split "addv64di3_zext_exec"
1311 [(set (match_operand:V64DI 0 "register_operand" "= v, v")
1315 (match_operand:V64SI 1 "gcn_alu_operand" " vA, vB"))
1316 (match_operand:V64DI 2 "gcn_alu_operand" "vDb,vDA"))
1317 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0, U0")
1318 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))
1319 (clobber (reg:DI VCC_REG))]
1322 "gcn_can_split_p (V64DImode, operands[0])
1323 && gcn_can_split_p (V64DImode, operands[2])
1324 && gcn_can_split_p (V64DImode, operands[3])"
1327 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1328 emit_insn (gen_addv64si3_vcc_exec
1329 (gcn_operand_part (V64DImode, operands[0], 0),
1331 gcn_operand_part (V64DImode, operands[2], 0),
1333 gcn_operand_part (V64DImode, operands[3], 0),
1335 emit_insn (gen_addcv64si3_exec
1336 (gcn_operand_part (V64DImode, operands[0], 1),
1337 gcn_operand_part (V64DImode, operands[2], 1),
1338 const0_rtx, vcc, vcc,
1339 gcn_operand_part (V64DImode, operands[3], 1),
1343 [(set_attr "type" "vmult")
1344 (set_attr "length" "8")])
1346 (define_insn_and_split "addv64di3_zext_dup"
1347 [(set (match_operand:V64DI 0 "register_operand" "= v, v")
1350 (vec_duplicate:V64SI
1351 (match_operand:SI 1 "gcn_alu_operand" "BSv,ASv")))
1352 (match_operand:V64DI 2 "gcn_alu_operand" "vDA,vDb")))
1353 (clobber (reg:DI VCC_REG))]
1356 "gcn_can_split_p (V64DImode, operands[0])
1357 && gcn_can_split_p (V64DImode, operands[2])"
1360 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1361 emit_insn (gen_addv64si3_vcc_dup
1362 (gcn_operand_part (V64DImode, operands[0], 0),
1363 gcn_operand_part (DImode, operands[1], 0),
1364 gcn_operand_part (V64DImode, operands[2], 0),
1366 emit_insn (gen_addcv64si3
1367 (gcn_operand_part (V64DImode, operands[0], 1),
1368 gcn_operand_part (V64DImode, operands[2], 1),
1369 const0_rtx, vcc, vcc));
1372 [(set_attr "type" "vmult")
1373 (set_attr "length" "8")])
1375 (define_insn_and_split "addv64di3_zext_dup_exec"
1376 [(set (match_operand:V64DI 0 "register_operand" "= v, v")
1380 (vec_duplicate:V64SI
1381 (match_operand:SI 1 "gcn_alu_operand" "ASv,BSv")))
1382 (match_operand:V64DI 2 "gcn_alu_operand" "vDb,vDA"))
1383 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0, U0")
1384 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))
1385 (clobber (reg:DI VCC_REG))]
1388 "gcn_can_split_p (V64DImode, operands[0])
1389 && gcn_can_split_p (V64DImode, operands[2])
1390 && gcn_can_split_p (V64DImode, operands[3])"
1393 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1394 emit_insn (gen_addv64si3_vcc_dup_exec
1395 (gcn_operand_part (V64DImode, operands[0], 0),
1396 gcn_operand_part (DImode, operands[1], 0),
1397 gcn_operand_part (V64DImode, operands[2], 0),
1399 gcn_operand_part (V64DImode, operands[3], 0),
1401 emit_insn (gen_addcv64si3_exec
1402 (gcn_operand_part (V64DImode, operands[0], 1),
1403 gcn_operand_part (V64DImode, operands[2], 1),
1404 const0_rtx, vcc, vcc,
1405 gcn_operand_part (V64DImode, operands[3], 1),
1409 [(set_attr "type" "vmult")
1410 (set_attr "length" "8")])
1412 (define_insn_and_split "addv64di3_zext_dup2"
1413 [(set (match_operand:V64DI 0 "register_operand" "= v")
1415 (zero_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand" " vA"))
1416 (vec_duplicate:V64DI (match_operand:DI 2 "gcn_alu_operand" "DbSv"))))
1417 (clobber (reg:DI VCC_REG))]
1420 "gcn_can_split_p (V64DImode, operands[0])"
1423 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1424 emit_insn (gen_addv64si3_vcc_dup
1425 (gcn_operand_part (V64DImode, operands[0], 0),
1426 gcn_operand_part (DImode, operands[2], 0),
1429 rtx dsthi = gcn_operand_part (V64DImode, operands[0], 1);
1430 emit_insn (gen_vec_duplicatev64si
1431 (dsthi, gcn_operand_part (DImode, operands[2], 1)));
1432 emit_insn (gen_addcv64si3 (dsthi, dsthi, const0_rtx, vcc, vcc));
1435 [(set_attr "type" "vmult")
1436 (set_attr "length" "8")])
1438 (define_insn_and_split "addv64di3_zext_dup2_exec"
1439 [(set (match_operand:V64DI 0 "register_operand" "= v")
1442 (zero_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand"
1444 (vec_duplicate:V64DI (match_operand:DI 2 "gcn_alu_operand" "BSv")))
1445 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
1446 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1447 (clobber (reg:DI VCC_REG))]
1450 "gcn_can_split_p (V64DImode, operands[0])
1451 && gcn_can_split_p (V64DImode, operands[3])"
1454 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1455 emit_insn (gen_addv64si3_vcc_dup_exec
1456 (gcn_operand_part (V64DImode, operands[0], 0),
1457 gcn_operand_part (DImode, operands[2], 0),
1460 gcn_operand_part (V64DImode, operands[3], 0),
1462 rtx dsthi = gcn_operand_part (V64DImode, operands[0], 1);
1463 emit_insn (gen_vec_duplicatev64si_exec
1464 (dsthi, gcn_operand_part (DImode, operands[2], 1),
1465 gcn_gen_undef (V64SImode), operands[4]));
1466 emit_insn (gen_addcv64si3_exec
1467 (dsthi, dsthi, const0_rtx, vcc, vcc,
1468 gcn_operand_part (V64DImode, operands[3], 1),
1472 [(set_attr "type" "vmult")
1473 (set_attr "length" "8")])
1475 (define_insn_and_split "addv64di3_sext_dup2"
1476 [(set (match_operand:V64DI 0 "register_operand" "= v")
1478 (sign_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand" " vA"))
1479 (vec_duplicate:V64DI (match_operand:DI 2 "gcn_alu_operand" "BSv"))))
1480 (clobber (match_scratch:V64SI 3 "=&v"))
1481 (clobber (reg:DI VCC_REG))]
1484 "gcn_can_split_p (V64DImode, operands[0])"
1487 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1488 emit_insn (gen_ashrv64si3 (operands[3], operands[1], GEN_INT (31)));
1489 emit_insn (gen_addv64si3_vcc_dup
1490 (gcn_operand_part (V64DImode, operands[0], 0),
1491 gcn_operand_part (DImode, operands[2], 0),
1494 rtx dsthi = gcn_operand_part (V64DImode, operands[0], 1);
1495 emit_insn (gen_vec_duplicatev64si
1496 (dsthi, gcn_operand_part (DImode, operands[2], 1)));
1497 emit_insn (gen_addcv64si3 (dsthi, dsthi, operands[3], vcc, vcc));
1500 [(set_attr "type" "vmult")
1501 (set_attr "length" "8")])
1503 (define_insn_and_split "addv64di3_sext_dup2_exec"
1504 [(set (match_operand:V64DI 0 "register_operand" "= v")
1507 (sign_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand"
1509 (vec_duplicate:V64DI (match_operand:DI 2 "gcn_alu_operand" "BSv")))
1510 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
1511 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1512 (clobber (match_scratch:V64SI 5 "=&v"))
1513 (clobber (reg:DI VCC_REG))]
1516 "gcn_can_split_p (V64DImode, operands[0])
1517 && gcn_can_split_p (V64DImode, operands[3])"
1520 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1521 emit_insn (gen_ashrv64si3_exec (operands[5], operands[1], GEN_INT (31),
1522 gcn_gen_undef (V64SImode), operands[4]));
1523 emit_insn (gen_addv64si3_vcc_dup_exec
1524 (gcn_operand_part (V64DImode, operands[0], 0),
1525 gcn_operand_part (DImode, operands[2], 0),
1528 gcn_operand_part (V64DImode, operands[3], 0),
1530 rtx dsthi = gcn_operand_part (V64DImode, operands[0], 1);
1531 emit_insn (gen_vec_duplicatev64si_exec
1532 (dsthi, gcn_operand_part (DImode, operands[2], 1),
1533 gcn_gen_undef (V64SImode), operands[4]));
1534 emit_insn (gen_addcv64si3_exec
1535 (dsthi, dsthi, operands[5], vcc, vcc,
1536 gcn_operand_part (V64DImode, operands[3], 1),
1540 [(set_attr "type" "vmult")
1541 (set_attr "length" "8")])
1544 ;; {{{ DS memory ALU: add/sub
1546 (define_mode_iterator DS_ARITH_MODE [V64SI V64SF V64DI])
1547 (define_mode_iterator DS_ARITH_SCALAR_MODE [SI SF DI])
1549 ;; FIXME: the vector patterns probably need RD expanded to a vector of
1550 ;; addresses. For now, the only way a vector can get into LDS is
1551 ;; if the user puts it there manually.
1553 ;; FIXME: the scalar patterns are probably fine in themselves, but need to be
1554 ;; checked to see if anything can ever use them.
1556 (define_insn "add<mode>3_ds<exec>"
1557 [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD")
1559 (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" "%RD")
1560 (match_operand:DS_ARITH_MODE 2 "register_operand" " v")))]
1561 "rtx_equal_p (operands[0], operands[1])"
1562 "ds_add%u0\t%A0, %2%O0"
1563 [(set_attr "type" "ds")
1564 (set_attr "length" "8")])
1566 (define_insn "add<mode>3_ds_scalar"
1567 [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
1568 (plus:DS_ARITH_SCALAR_MODE
1569 (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand"
1571 (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")))]
1572 "rtx_equal_p (operands[0], operands[1])"
1573 "ds_add%u0\t%A0, %2%O0"
1574 [(set_attr "type" "ds")
1575 (set_attr "length" "8")])
1577 (define_insn "sub<mode>3_ds<exec>"
1578 [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD")
1579 (minus:DS_ARITH_MODE
1580 (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" " RD")
1581 (match_operand:DS_ARITH_MODE 2 "register_operand" " v")))]
1582 "rtx_equal_p (operands[0], operands[1])"
1583 "ds_sub%u0\t%A0, %2%O0"
1584 [(set_attr "type" "ds")
1585 (set_attr "length" "8")])
1587 (define_insn "sub<mode>3_ds_scalar"
1588 [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
1589 (minus:DS_ARITH_SCALAR_MODE
1590 (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand"
1592 (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")))]
1593 "rtx_equal_p (operands[0], operands[1])"
1594 "ds_sub%u0\t%A0, %2%O0"
1595 [(set_attr "type" "ds")
1596 (set_attr "length" "8")])
1598 (define_insn "subr<mode>3_ds<exec>"
1599 [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD")
1600 (minus:DS_ARITH_MODE
1601 (match_operand:DS_ARITH_MODE 2 "register_operand" " v")
1602 (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" " RD")))]
1603 "rtx_equal_p (operands[0], operands[1])"
1604 "ds_rsub%u0\t%A0, %2%O0"
1605 [(set_attr "type" "ds")
1606 (set_attr "length" "8")])
1608 (define_insn "subr<mode>3_ds_scalar"
1609 [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
1610 (minus:DS_ARITH_SCALAR_MODE
1611 (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")
1612 (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand"
1614 "rtx_equal_p (operands[0], operands[1])"
1615 "ds_rsub%u0\t%A0, %2%O0"
1616 [(set_attr "type" "ds")
1617 (set_attr "length" "8")])
1620 ;; {{{ ALU special case: mult
1622 (define_insn "<su>mulv64si3_highpart<exec>"
1623 [(set (match_operand:V64SI 0 "register_operand" "= v")
1628 (match_operand:V64SI 1 "gcn_alu_operand" " %v"))
1630 (match_operand:V64SI 2 "gcn_alu_operand" "vSvA")))
1633 "v_mul_hi<sgnsuffix>0\t%0, %2, %1"
1634 [(set_attr "type" "vop3a")
1635 (set_attr "length" "8")])
1637 (define_insn "mul<mode>3<exec>"
1638 [(set (match_operand:VEC_ALL1REG_INT_MODE 0 "register_operand" "= v")
1639 (mult:VEC_ALL1REG_INT_MODE
1640 (match_operand:VEC_ALL1REG_INT_MODE 1 "gcn_alu_operand" "%vSvA")
1641 (match_operand:VEC_ALL1REG_INT_MODE 2 "gcn_alu_operand" " vSvA")))]
1643 "v_mul_lo_u32\t%0, %1, %2"
1644 [(set_attr "type" "vop3a")
1645 (set_attr "length" "8")])
1647 (define_insn "mul<mode>3_dup<exec>"
1648 [(set (match_operand:VEC_ALL1REG_INT_MODE 0 "register_operand" "= v")
1649 (mult:VEC_ALL1REG_INT_MODE
1650 (match_operand:VEC_ALL1REG_INT_MODE 1 "gcn_alu_operand" "%vSvA")
1651 (vec_duplicate:VEC_ALL1REG_INT_MODE
1652 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand" " SvA"))))]
1654 "v_mul_lo_u32\t%0, %1, %2"
1655 [(set_attr "type" "vop3a")
1656 (set_attr "length" "8")])
1658 (define_insn_and_split "mulv64di3"
1659 [(set (match_operand:V64DI 0 "register_operand" "=&v")
1661 (match_operand:V64DI 1 "gcn_alu_operand" "% v")
1662 (match_operand:V64DI 2 "gcn_alu_operand" "vDA")))
1663 (clobber (match_scratch:V64SI 3 "=&v"))]
1669 rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
1670 rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
1671 rtx left_lo = gcn_operand_part (V64DImode, operands[1], 0);
1672 rtx left_hi = gcn_operand_part (V64DImode, operands[1], 1);
1673 rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
1674 rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
1675 rtx tmp = operands[3];
1677 emit_insn (gen_mulv64si3 (out_lo, left_lo, right_lo));
1678 emit_insn (gen_umulv64si3_highpart (out_hi, left_lo, right_lo));
1679 emit_insn (gen_mulv64si3 (tmp, left_hi, right_lo));
1680 emit_insn (gen_addv64si3 (out_hi, out_hi, tmp));
1681 emit_insn (gen_mulv64si3 (tmp, left_lo, right_hi));
1682 emit_insn (gen_addv64si3 (out_hi, out_hi, tmp));
1683 emit_insn (gen_mulv64si3 (tmp, left_hi, right_hi));
1684 emit_insn (gen_addv64si3 (out_hi, out_hi, tmp));
1688 (define_insn_and_split "mulv64di3_exec"
1689 [(set (match_operand:V64DI 0 "register_operand" "=&v")
1692 (match_operand:V64DI 1 "gcn_alu_operand" "% v")
1693 (match_operand:V64DI 2 "gcn_alu_operand" "vDA"))
1694 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
1695 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1696 (clobber (match_scratch:V64SI 5 "=&v"))]
1702 rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
1703 rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
1704 rtx left_lo = gcn_operand_part (V64DImode, operands[1], 0);
1705 rtx left_hi = gcn_operand_part (V64DImode, operands[1], 1);
1706 rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
1707 rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
1708 rtx exec = operands[4];
1709 rtx tmp = operands[5];
1712 if (GET_CODE (operands[3]) == UNSPEC)
1714 old_lo = old_hi = gcn_gen_undef (V64SImode);
1718 old_lo = gcn_operand_part (V64DImode, operands[3], 0);
1719 old_hi = gcn_operand_part (V64DImode, operands[3], 1);
1722 rtx undef = gcn_gen_undef (V64SImode);
1724 emit_insn (gen_mulv64si3_exec (out_lo, left_lo, right_lo, old_lo, exec));
1725 emit_insn (gen_umulv64si3_highpart_exec (out_hi, left_lo, right_lo,
1727 emit_insn (gen_mulv64si3_exec (tmp, left_hi, right_lo, undef, exec));
1728 emit_insn (gen_addv64si3_exec (out_hi, out_hi, tmp, out_hi, exec));
1729 emit_insn (gen_mulv64si3_exec (tmp, left_lo, right_hi, undef, exec));
1730 emit_insn (gen_addv64si3_exec (out_hi, out_hi, tmp, out_hi, exec));
1731 emit_insn (gen_mulv64si3_exec (tmp, left_hi, right_hi, undef, exec));
1732 emit_insn (gen_addv64si3_exec (out_hi, out_hi, tmp, out_hi, exec));
1736 (define_insn_and_split "mulv64di3_zext"
1737 [(set (match_operand:V64DI 0 "register_operand" "=&v")
1740 (match_operand:V64SI 1 "gcn_alu_operand" " v"))
1741 (match_operand:V64DI 2 "gcn_alu_operand" "vDA")))
1742 (clobber (match_scratch:V64SI 3 "=&v"))]
1748 rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
1749 rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
1750 rtx left = operands[1];
1751 rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
1752 rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
1753 rtx tmp = operands[3];
1755 emit_insn (gen_mulv64si3 (out_lo, left, right_lo));
1756 emit_insn (gen_umulv64si3_highpart (out_hi, left, right_lo));
1757 emit_insn (gen_mulv64si3 (tmp, left, right_hi));
1758 emit_insn (gen_addv64si3 (out_hi, out_hi, tmp));
1762 (define_insn_and_split "mulv64di3_zext_exec"
1763 [(set (match_operand:V64DI 0 "register_operand" "=&v")
1767 (match_operand:V64SI 1 "gcn_alu_operand" " v"))
1768 (match_operand:V64DI 2 "gcn_alu_operand" "vDA"))
1769 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
1770 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1771 (clobber (match_scratch:V64SI 5 "=&v"))]
1777 rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
1778 rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
1779 rtx left = operands[1];
1780 rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
1781 rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
1782 rtx exec = operands[4];
1783 rtx tmp = operands[5];
1786 if (GET_CODE (operands[3]) == UNSPEC)
1788 old_lo = old_hi = gcn_gen_undef (V64SImode);
1792 old_lo = gcn_operand_part (V64DImode, operands[3], 0);
1793 old_hi = gcn_operand_part (V64DImode, operands[3], 1);
1796 rtx undef = gcn_gen_undef (V64SImode);
1798 emit_insn (gen_mulv64si3_exec (out_lo, left, right_lo, old_lo, exec));
1799 emit_insn (gen_umulv64si3_highpart_exec (out_hi, left, right_lo,
1801 emit_insn (gen_mulv64si3_exec (tmp, left, right_hi, undef, exec));
1802 emit_insn (gen_addv64si3_exec (out_hi, out_hi, tmp, out_hi, exec));
1806 (define_insn_and_split "mulv64di3_zext_dup2"
1807 [(set (match_operand:V64DI 0 "register_operand" "= &v")
1810 (match_operand:V64SI 1 "gcn_alu_operand" " v"))
1811 (vec_duplicate:V64DI
1812 (match_operand:DI 2 "gcn_alu_operand" "SvDA"))))
1813 (clobber (match_scratch:V64SI 3 "= &v"))]
1819 rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
1820 rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
1821 rtx left = operands[1];
1822 rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
1823 rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
1824 rtx tmp = operands[3];
1826 emit_insn (gen_mulv64si3 (out_lo, left, right_lo));
1827 emit_insn (gen_umulv64si3_highpart (out_hi, left, right_lo));
1828 emit_insn (gen_mulv64si3 (tmp, left, right_hi));
1829 emit_insn (gen_addv64si3 (out_hi, out_hi, tmp));
1833 (define_insn_and_split "mulv64di3_zext_dup2_exec"
1834 [(set (match_operand:V64DI 0 "register_operand" "= &v")
1838 (match_operand:V64SI 1 "gcn_alu_operand" " v"))
1839 (vec_duplicate:V64DI
1840 (match_operand:DI 2 "gcn_alu_operand" "SvDA")))
1841 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
1842 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1843 (clobber (match_scratch:V64SI 5 "= &v"))]
1849 rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
1850 rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
1851 rtx left = operands[1];
1852 rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
1853 rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
1854 rtx exec = operands[4];
1855 rtx tmp = operands[5];
1858 if (GET_CODE (operands[3]) == UNSPEC)
1860 old_lo = old_hi = gcn_gen_undef (V64SImode);
1864 old_lo = gcn_operand_part (V64DImode, operands[3], 0);
1865 old_hi = gcn_operand_part (V64DImode, operands[3], 1);
1868 rtx undef = gcn_gen_undef (V64SImode);
1870 emit_insn (gen_mulv64si3_exec (out_lo, left, right_lo, old_lo, exec));
1871 emit_insn (gen_umulv64si3_highpart_exec (out_hi, left, right_lo,
1873 emit_insn (gen_mulv64si3_exec (tmp, left, right_hi, undef, exec));
1874 emit_insn (gen_addv64si3_exec (out_hi, out_hi, tmp, out_hi, exec));
1879 ;; {{{ ALU generic case
1881 (define_mode_iterator VEC_INT_MODE [V64SI V64DI])
1883 (define_code_iterator bitop [and ior xor])
1884 (define_code_iterator shiftop [ashift lshiftrt ashiftrt])
1885 (define_code_iterator minmaxop [smin smax umin umax])
1887 (define_insn "<expander><mode>2<exec>"
1888 [(set (match_operand:VEC_ALL1REG_INT_MODE 0 "gcn_valu_dst_operand" "= v")
1889 (bitunop:VEC_ALL1REG_INT_MODE
1890 (match_operand:VEC_ALL1REG_INT_MODE 1 "gcn_valu_src0_operand" "vSvB")))]
1892 "v_<mnemonic>0\t%0, %1"
1893 [(set_attr "type" "vop1")
1894 (set_attr "length" "8")])
1896 (define_insn "<expander><mode>3<exec>"
1897 [(set (match_operand:VEC_ALL1REG_INT_MODE 0 "gcn_valu_dst_operand" "= v,RD")
1898 (bitop:VEC_ALL1REG_INT_MODE
1899 (match_operand:VEC_ALL1REG_INT_MODE 1 "gcn_valu_src0_operand"
1901 (match_operand:VEC_ALL1REG_INT_MODE 2 "gcn_valu_src1com_operand"
1905 v_<mnemonic>0\t%0, %2, %1
1906 ds_<mnemonic>0\t%A0, %2%O0"
1907 [(set_attr "type" "vop2,ds")
1908 (set_attr "length" "8,8")])
1910 (define_insn_and_split "<expander>v64di3"
1911 [(set (match_operand:V64DI 0 "gcn_valu_dst_operand" "= v,RD")
1913 (match_operand:V64DI 1 "gcn_valu_src0_operand" "% v,RD")
1914 (match_operand:V64DI 2 "gcn_valu_src1com_operand" "vSvB, v")))]
1918 ds_<mnemonic>0\t%A0, %2%O0"
1919 "(reload_completed && !gcn_ds_memory_operand (operands[0], V64DImode))"
1921 (bitop:V64SI (match_dup 5) (match_dup 7)))
1923 (bitop:V64SI (match_dup 6) (match_dup 8)))]
1925 operands[3] = gcn_operand_part (V64DImode, operands[0], 0);
1926 operands[4] = gcn_operand_part (V64DImode, operands[0], 1);
1927 operands[5] = gcn_operand_part (V64DImode, operands[1], 0);
1928 operands[6] = gcn_operand_part (V64DImode, operands[1], 1);
1929 operands[7] = gcn_operand_part (V64DImode, operands[2], 0);
1930 operands[8] = gcn_operand_part (V64DImode, operands[2], 1);
1932 [(set_attr "type" "vmult,ds")
1933 (set_attr "length" "16,8")])
1935 (define_insn_and_split "<expander>v64di3_exec"
1936 [(set (match_operand:V64DI 0 "gcn_valu_dst_operand" "= v,RD")
1939 (match_operand:V64DI 1 "gcn_valu_src0_operand" "% v,RD")
1940 (match_operand:V64DI 2 "gcn_valu_src1com_operand" "vSvB, v"))
1941 (match_operand:V64DI 3 "gcn_register_ds_or_unspec_operand"
1943 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))]
1944 "!memory_operand (operands[0], VOIDmode)
1945 || (rtx_equal_p (operands[0], operands[1])
1946 && register_operand (operands[2], VOIDmode))"
1949 ds_<mnemonic>0\t%A0, %2%O0"
1950 "(reload_completed && !gcn_ds_memory_operand (operands[0], V64DImode))"
1953 (bitop:V64SI (match_dup 7) (match_dup 9))
1958 (bitop:V64SI (match_dup 8) (match_dup 10))
1962 operands[5] = gcn_operand_part (V64DImode, operands[0], 0);
1963 operands[6] = gcn_operand_part (V64DImode, operands[0], 1);
1964 operands[7] = gcn_operand_part (V64DImode, operands[1], 0);
1965 operands[8] = gcn_operand_part (V64DImode, operands[1], 1);
1966 operands[9] = gcn_operand_part (V64DImode, operands[2], 0);
1967 operands[10] = gcn_operand_part (V64DImode, operands[2], 1);
1968 operands[11] = gcn_operand_part (V64DImode, operands[3], 0);
1969 operands[12] = gcn_operand_part (V64DImode, operands[3], 1);
1971 [(set_attr "type" "vmult,ds")
1972 (set_attr "length" "16,8")])
1974 (define_expand "<expander><mode>3"
1975 [(set (match_operand:VEC_SUBDWORD_MODE 0 "register_operand" "= v")
1976 (shiftop:VEC_SUBDWORD_MODE
1977 (match_operand:VEC_SUBDWORD_MODE 1 "gcn_alu_operand" " v")
1978 (vec_duplicate:VEC_SUBDWORD_MODE
1979 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
1982 enum {ashift, lshiftrt, ashiftrt};
1983 bool unsignedp = (<code> == lshiftrt);
1984 rtx insi1 = gen_reg_rtx (V64SImode);
1985 rtx insi2 = gen_reg_rtx (SImode);
1986 rtx outsi = gen_reg_rtx (V64SImode);
1988 convert_move (insi1, operands[1], unsignedp);
1989 convert_move (insi2, operands[2], unsignedp);
1990 emit_insn (gen_<expander>v64si3 (outsi, insi1, insi2));
1991 convert_move (operands[0], outsi, unsignedp);
1995 (define_insn "<expander>v64si3<exec>"
1996 [(set (match_operand:V64SI 0 "register_operand" "= v")
1998 (match_operand:V64SI 1 "gcn_alu_operand" " v")
1999 (vec_duplicate:V64SI
2000 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
2002 "v_<revmnemonic>0\t%0, %2, %1"
2003 [(set_attr "type" "vop2")
2004 (set_attr "length" "8")])
2006 (define_expand "v<expander><mode>3"
2007 [(set (match_operand:VEC_SUBDWORD_MODE 0 "register_operand" "=v")
2008 (shiftop:VEC_SUBDWORD_MODE
2009 (match_operand:VEC_SUBDWORD_MODE 1 "gcn_alu_operand" " v")
2010 (match_operand:VEC_SUBDWORD_MODE 2 "gcn_alu_operand" "vB")))]
2013 enum {ashift, lshiftrt, ashiftrt};
2014 bool unsignedp = (<code> == ashift || <code> == ashiftrt);
2015 rtx insi1 = gen_reg_rtx (V64SImode);
2016 rtx insi2 = gen_reg_rtx (V64SImode);
2017 rtx outsi = gen_reg_rtx (V64SImode);
2019 convert_move (insi1, operands[1], unsignedp);
2020 convert_move (insi2, operands[2], unsignedp);
2021 emit_insn (gen_v<expander>v64si3 (outsi, insi1, insi2));
2022 convert_move (operands[0], outsi, unsignedp);
2026 (define_insn "v<expander>v64si3<exec>"
2027 [(set (match_operand:V64SI 0 "register_operand" "=v")
2029 (match_operand:V64SI 1 "gcn_alu_operand" " v")
2030 (match_operand:V64SI 2 "gcn_alu_operand" "vB")))]
2032 "v_<revmnemonic>0\t%0, %2, %1"
2033 [(set_attr "type" "vop2")
2034 (set_attr "length" "8")])
2036 (define_expand "<expander><mode>3"
2037 [(set (match_operand:VEC_SUBDWORD_MODE 0 "gcn_valu_dst_operand")
2038 (minmaxop:VEC_SUBDWORD_MODE
2039 (match_operand:VEC_SUBDWORD_MODE 1 "gcn_valu_src0_operand")
2040 (match_operand:VEC_SUBDWORD_MODE 2 "gcn_valu_src1com_operand")))]
2043 enum {smin, umin, smax, umax};
2044 bool unsignedp = (<code> == umax || <code> == umin);
2045 rtx insi1 = gen_reg_rtx (V64SImode);
2046 rtx insi2 = gen_reg_rtx (V64SImode);
2047 rtx outsi = gen_reg_rtx (V64SImode);
2049 convert_move (insi1, operands[1], unsignedp);
2050 convert_move (insi2, operands[2], unsignedp);
2051 emit_insn (gen_<code>v64si3 (outsi, insi1, insi2));
2052 convert_move (operands[0], outsi, unsignedp);
2056 (define_insn "<expander>v64si3<exec>"
2057 [(set (match_operand:V64SI 0 "gcn_valu_dst_operand" "= v,RD")
2059 (match_operand:V64SI 1 "gcn_valu_src0_operand" "% v, 0")
2060 (match_operand:V64SI 2 "gcn_valu_src1com_operand" "vSvB, v")))]
2063 v_<mnemonic>0\t%0, %2, %1
2064 ds_<mnemonic>0\t%A0, %2%O0"
2065 [(set_attr "type" "vop2,ds")
2066 (set_attr "length" "8,8")])
2069 ;; {{{ FP binops - special cases
2071 ; GCN does not directly provide a DFmode subtract instruction, so we do it by
2072 ; adding the negated second operand to the first.
2074 (define_insn "subv64df3<exec>"
2075 [(set (match_operand:V64DF 0 "register_operand" "= v, v")
2077 (match_operand:V64DF 1 "gcn_alu_operand" "vSvB, v")
2078 (match_operand:V64DF 2 "gcn_alu_operand" " v,vSvB")))]
2081 v_add_f64\t%0, %1, -%2
2082 v_add_f64\t%0, -%2, %1"
2083 [(set_attr "type" "vop3a")
2084 (set_attr "length" "8,8")])
2086 (define_insn "subdf"
2087 [(set (match_operand:DF 0 "register_operand" "= v, v")
2089 (match_operand:DF 1 "gcn_alu_operand" "vSvB, v")
2090 (match_operand:DF 2 "gcn_alu_operand" " v,vSvB")))]
2093 v_add_f64\t%0, %1, -%2
2094 v_add_f64\t%0, -%2, %1"
2095 [(set_attr "type" "vop3a")
2096 (set_attr "length" "8,8")])
2099 ;; {{{ FP binops - generic
2101 (define_mode_iterator VEC_FP_MODE [V64HF V64SF V64DF])
2102 (define_mode_iterator VEC_FP_1REG_MODE [V64HF V64SF])
2103 (define_mode_iterator FP_MODE [HF SF DF])
2104 (define_mode_iterator FP_1REG_MODE [HF SF])
2106 (define_code_iterator comm_fp [plus mult smin smax])
2107 (define_code_iterator nocomm_fp [minus])
2108 (define_code_iterator all_fp [plus mult minus smin smax])
2110 (define_insn "<expander><mode>3<exec>"
2111 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "= v")
2112 (comm_fp:VEC_FP_MODE
2113 (match_operand:VEC_FP_MODE 1 "gcn_alu_operand" "% v")
2114 (match_operand:VEC_FP_MODE 2 "gcn_alu_operand" "vSvB")))]
2116 "v_<mnemonic>0\t%0, %2, %1"
2117 [(set_attr "type" "vop2")
2118 (set_attr "length" "8")])
2120 (define_insn "<expander><mode>3"
2121 [(set (match_operand:FP_MODE 0 "gcn_valu_dst_operand" "= v, RL")
2123 (match_operand:FP_MODE 1 "gcn_valu_src0_operand" "% v, 0")
2124 (match_operand:FP_MODE 2 "gcn_valu_src1_operand" "vSvB,vSvB")))]
2127 v_<mnemonic>0\t%0, %2, %1
2128 v_<mnemonic>0\t%0, %1%O0"
2129 [(set_attr "type" "vop2,ds")
2130 (set_attr "length" "8")])
2132 (define_insn "<expander><mode>3<exec>"
2133 [(set (match_operand:VEC_FP_1REG_MODE 0 "register_operand" "= v, v")
2134 (nocomm_fp:VEC_FP_1REG_MODE
2135 (match_operand:VEC_FP_1REG_MODE 1 "gcn_alu_operand" "vSvB, v")
2136 (match_operand:VEC_FP_1REG_MODE 2 "gcn_alu_operand" " v,vSvB")))]
2139 v_<mnemonic>0\t%0, %1, %2
2140 v_<revmnemonic>0\t%0, %2, %1"
2141 [(set_attr "type" "vop2")
2142 (set_attr "length" "8,8")])
2144 (define_insn "<expander><mode>3"
2145 [(set (match_operand:FP_1REG_MODE 0 "register_operand" "= v, v")
2146 (nocomm_fp:FP_1REG_MODE
2147 (match_operand:FP_1REG_MODE 1 "gcn_alu_operand" "vSvB, v")
2148 (match_operand:FP_1REG_MODE 2 "gcn_alu_operand" " v,vSvB")))]
2151 v_<mnemonic>0\t%0, %1, %2
2152 v_<revmnemonic>0\t%0, %2, %1"
2153 [(set_attr "type" "vop2")
2154 (set_attr "length" "8,8")])
2159 (define_insn "abs<mode>2"
2160 [(set (match_operand:FP_MODE 0 "register_operand" "=v")
2161 (abs:FP_MODE (match_operand:FP_MODE 1 "register_operand" " v")))]
2163 "v_add%i0\t%0, 0, |%1|"
2164 [(set_attr "type" "vop3a")
2165 (set_attr "length" "8")])
2167 (define_insn "abs<mode>2<exec>"
2168 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "=v")
2170 (match_operand:VEC_FP_MODE 1 "register_operand" " v")))]
2172 "v_add%i0\t%0, 0, |%1|"
2173 [(set_attr "type" "vop3a")
2174 (set_attr "length" "8")])
2176 (define_insn "neg<mode>2<exec>"
2177 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "=v")
2179 (match_operand:VEC_FP_MODE 1 "register_operand" " v")))]
2181 "v_add%i0\t%0, 0, -%1"
2182 [(set_attr "type" "vop3a")
2183 (set_attr "length" "8")])
2185 (define_insn "sqrt<mode>2<exec>"
2186 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "= v")
2188 (match_operand:VEC_FP_MODE 1 "gcn_alu_operand" "vSvB")))]
2189 "flag_unsafe_math_optimizations"
2191 [(set_attr "type" "vop1")
2192 (set_attr "length" "8")])
2194 (define_insn "sqrt<mode>2"
2195 [(set (match_operand:FP_MODE 0 "register_operand" "= v")
2197 (match_operand:FP_MODE 1 "gcn_alu_operand" "vSvB")))]
2198 "flag_unsafe_math_optimizations"
2200 [(set_attr "type" "vop1")
2201 (set_attr "length" "8")])
2204 ;; {{{ FP fused multiply and add
2206 (define_insn "fma<mode>4<exec>"
2207 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "= v, v")
2209 (match_operand:VEC_FP_MODE 1 "gcn_alu_operand" "% vA, vA")
2210 (match_operand:VEC_FP_MODE 2 "gcn_alu_operand" " vA,vSvA")
2211 (match_operand:VEC_FP_MODE 3 "gcn_alu_operand" "vSvA, vA")))]
2213 "v_fma%i0\t%0, %1, %2, %3"
2214 [(set_attr "type" "vop3a")
2215 (set_attr "length" "8")])
2217 (define_insn "fma<mode>4_negop2<exec>"
2218 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "= v, v, v")
2220 (match_operand:VEC_FP_MODE 1 "gcn_alu_operand" " vA, vA,vSvA")
2222 (match_operand:VEC_FP_MODE 2 "gcn_alu_operand" " vA,vSvA, vA"))
2223 (match_operand:VEC_FP_MODE 3 "gcn_alu_operand" "vSvA, vA, vA")))]
2225 "v_fma%i0\t%0, %1, -%2, %3"
2226 [(set_attr "type" "vop3a")
2227 (set_attr "length" "8")])
2229 (define_insn "fma<mode>4"
2230 [(set (match_operand:FP_MODE 0 "register_operand" "= v, v")
2232 (match_operand:FP_MODE 1 "gcn_alu_operand" "% vA, vA")
2233 (match_operand:FP_MODE 2 "gcn_alu_operand" " vA,vSvA")
2234 (match_operand:FP_MODE 3 "gcn_alu_operand" "vSvA, vA")))]
2236 "v_fma%i0\t%0, %1, %2, %3"
2237 [(set_attr "type" "vop3a")
2238 (set_attr "length" "8")])
2240 (define_insn "fma<mode>4_negop2"
2241 [(set (match_operand:FP_MODE 0 "register_operand" "= v, v, v")
2243 (match_operand:FP_MODE 1 "gcn_alu_operand" " vA, vA,vSvA")
2245 (match_operand:FP_MODE 2 "gcn_alu_operand" " vA,vSvA, vA"))
2246 (match_operand:FP_MODE 3 "gcn_alu_operand" "vSvA, vA, vA")))]
2248 "v_fma%i0\t%0, %1, -%2, %3"
2249 [(set_attr "type" "vop3a")
2250 (set_attr "length" "8")])
2255 (define_insn "recip<mode>2<exec>"
2256 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "= v")
2258 (vec_duplicate:VEC_FP_MODE (float:<SCALAR_MODE> (const_int 1)))
2259 (match_operand:VEC_FP_MODE 1 "gcn_alu_operand" "vSvB")))]
2262 [(set_attr "type" "vop1")
2263 (set_attr "length" "8")])
2265 (define_insn "recip<mode>2"
2266 [(set (match_operand:FP_MODE 0 "register_operand" "= v")
2268 (float:FP_MODE (const_int 1))
2269 (match_operand:FP_MODE 1 "gcn_alu_operand" "vSvB")))]
2272 [(set_attr "type" "vop1")
2273 (set_attr "length" "8")])
2275 ;; Do division via a = b * 1/c
2276 ;; The v_rcp_* instructions are not sufficiently accurate on their own,
2277 ;; so we use 2 v_fma_* instructions to do one round of Newton-Raphson
2278 ;; which the ISA manual says is enough to improve the reciprocal accuracy.
2280 ;; FIXME: This does not handle denormals, NaNs, division-by-zero etc.
2282 (define_expand "div<mode>3"
2283 [(match_operand:VEC_FP_MODE 0 "gcn_valu_dst_operand")
2284 (match_operand:VEC_FP_MODE 1 "gcn_valu_src0_operand")
2285 (match_operand:VEC_FP_MODE 2 "gcn_valu_src0_operand")]
2286 "flag_reciprocal_math"
2288 rtx two = gcn_vec_constant (<MODE>mode,
2289 const_double_from_real_value (dconst2, <SCALAR_MODE>mode));
2290 rtx initrcp = gen_reg_rtx (<MODE>mode);
2291 rtx fma = gen_reg_rtx (<MODE>mode);
2294 bool is_rcp = (GET_CODE (operands[1]) == CONST_VECTOR
2296 (CONST_DOUBLE_REAL_VALUE
2297 (CONST_VECTOR_ELT (operands[1], 0)), &dconstm1));
2302 rcp = gen_reg_rtx (<MODE>mode);
2304 emit_insn (gen_recip<mode>2 (initrcp, operands[2]));
2305 emit_insn (gen_fma<mode>4_negop2 (fma, initrcp, operands[2], two));
2306 emit_insn (gen_mul<mode>3 (rcp, initrcp, fma));
2309 emit_insn (gen_mul<mode>3 (operands[0], operands[1], rcp));
2314 (define_expand "div<mode>3"
2315 [(match_operand:FP_MODE 0 "gcn_valu_dst_operand")
2316 (match_operand:FP_MODE 1 "gcn_valu_src0_operand")
2317 (match_operand:FP_MODE 2 "gcn_valu_src0_operand")]
2318 "flag_reciprocal_math"
2320 rtx two = const_double_from_real_value (dconst2, <MODE>mode);
2321 rtx initrcp = gen_reg_rtx (<MODE>mode);
2322 rtx fma = gen_reg_rtx (<MODE>mode);
2325 bool is_rcp = (GET_CODE (operands[1]) == CONST_DOUBLE
2326 && real_identical (CONST_DOUBLE_REAL_VALUE (operands[1]),
2332 rcp = gen_reg_rtx (<MODE>mode);
2334 emit_insn (gen_recip<mode>2 (initrcp, operands[2]));
2335 emit_insn (gen_fma<mode>4_negop2 (fma, initrcp, operands[2], two));
2336 emit_insn (gen_mul<mode>3 (rcp, initrcp, fma));
2339 emit_insn (gen_mul<mode>3 (operands[0], operands[1], rcp));
2345 ;; {{{ Int/FP conversions
2347 (define_mode_iterator CVT_FROM_MODE [HI SI HF SF DF])
2348 (define_mode_iterator CVT_TO_MODE [HI SI HF SF DF])
2350 (define_mode_iterator VCVT_MODE [V64HI V64SI V64HF V64SF V64DF])
2351 (define_mode_iterator VCVT_FMODE [V64HF V64SF V64DF])
2352 (define_mode_iterator VCVT_IMODE [V64HI V64SI])
2354 (define_code_iterator cvt_op [fix unsigned_fix
2355 float unsigned_float
2356 float_extend float_truncate])
2357 (define_code_attr cvt_name [(fix "fix_trunc") (unsigned_fix "fixuns_trunc")
2358 (float "float") (unsigned_float "floatuns")
2359 (float_extend "extend") (float_truncate "trunc")])
2360 (define_code_attr cvt_operands [(fix "%i0%i1") (unsigned_fix "%u0%i1")
2361 (float "%i0%i1") (unsigned_float "%i0%u1")
2362 (float_extend "%i0%i1")
2363 (float_truncate "%i0%i1")])
2365 (define_insn "<cvt_name><CVT_FROM_MODE:mode><CVT_TO_MODE:mode>2"
2366 [(set (match_operand:CVT_TO_MODE 0 "register_operand" "= v")
2368 (match_operand:CVT_FROM_MODE 1 "gcn_alu_operand" "vSvB")))]
2369 "gcn_valid_cvt_p (<CVT_FROM_MODE:MODE>mode, <CVT_TO_MODE:MODE>mode,
2371 "v_cvt<cvt_operands>\t%0, %1"
2372 [(set_attr "type" "vop1")
2373 (set_attr "length" "8")])
2375 (define_insn "<cvt_name><VCVT_MODE:mode><VCVT_FMODE:mode>2<exec>"
2376 [(set (match_operand:VCVT_FMODE 0 "register_operand" "= v")
2378 (match_operand:VCVT_MODE 1 "gcn_alu_operand" "vSvB")))]
2379 "gcn_valid_cvt_p (<VCVT_MODE:MODE>mode, <VCVT_FMODE:MODE>mode,
2381 "v_cvt<cvt_operands>\t%0, %1"
2382 [(set_attr "type" "vop1")
2383 (set_attr "length" "8")])
2385 (define_insn "<cvt_name><VCVT_FMODE:mode><VCVT_IMODE:mode>2<exec>"
2386 [(set (match_operand:VCVT_IMODE 0 "register_operand" "= v")
2388 (match_operand:VCVT_FMODE 1 "gcn_alu_operand" "vSvB")))]
2389 "gcn_valid_cvt_p (<VCVT_FMODE:MODE>mode, <VCVT_IMODE:MODE>mode,
2391 "v_cvt<cvt_operands>\t%0, %1"
2392 [(set_attr "type" "vop1")
2393 (set_attr "length" "8")])
2396 ;; {{{ Int/int conversions
2398 (define_code_iterator zero_convert [truncate zero_extend])
2399 (define_code_attr convop [
2400 (sign_extend "extend")
2401 (zero_extend "zero_extend")
2402 (truncate "trunc")])
2404 (define_insn "<convop><VEC_ALL1REG_INT_ALT:mode><VEC_ALL1REG_INT_MODE:mode>2<exec>"
2405 [(set (match_operand:VEC_ALL1REG_INT_MODE 0 "register_operand" "=v")
2406 (zero_convert:VEC_ALL1REG_INT_MODE
2407 (match_operand:VEC_ALL1REG_INT_ALT 1 "gcn_alu_operand" " v")))]
2409 "v_mov_b32_sdwa\t%0, %1 dst_sel:<VEC_ALL1REG_INT_MODE:sdwa> dst_unused:UNUSED_PAD src0_sel:<VEC_ALL1REG_INT_ALT:sdwa>"
2410 [(set_attr "type" "vop_sdwa")
2411 (set_attr "length" "8")])
2413 (define_insn "extend<VEC_ALL1REG_INT_ALT:mode><VEC_ALL1REG_INT_MODE:mode>2<exec>"
2414 [(set (match_operand:VEC_ALL1REG_INT_MODE 0 "register_operand" "=v")
2415 (sign_extend:VEC_ALL1REG_INT_MODE
2416 (match_operand:VEC_ALL1REG_INT_ALT 1 "gcn_alu_operand" " v")))]
2418 "v_mov_b32_sdwa\t%0, sext(%1) src0_sel:<VEC_ALL1REG_INT_ALT:sdwa>"
2419 [(set_attr "type" "vop_sdwa")
2420 (set_attr "length" "8")])
2422 ;; GCC can already do these for scalar types, but not for vector types.
2423 ;; Unfortunately you can't just do SUBREG on a vector to select the low part,
2424 ;; so there must be a few tricks here.
2426 (define_insn_and_split "truncv64di<mode>2"
2427 [(set (match_operand:VEC_ALL1REG_INT_MODE 0 "register_operand" "=v")
2428 (truncate:VEC_ALL1REG_INT_MODE
2429 (match_operand:V64DI 1 "gcn_alu_operand" " v")))]
2435 rtx inlo = gcn_operand_part (V64DImode, operands[1], 0);
2436 rtx out = operands[0];
2438 if (<MODE>mode != V64SImode)
2439 emit_insn (gen_truncv64si<mode>2 (out, inlo));
2441 emit_move_insn (out, inlo);
2443 [(set_attr "type" "vop2")
2444 (set_attr "length" "4")])
2446 (define_insn_and_split "truncv64di<mode>2_exec"
2447 [(set (match_operand:VEC_ALL1REG_INT_MODE 0 "register_operand" "=v")
2448 (vec_merge:VEC_ALL1REG_INT_MODE
2449 (truncate:VEC_ALL1REG_INT_MODE
2450 (match_operand:V64DI 1 "gcn_alu_operand" " v"))
2451 (match_operand:VEC_ALL1REG_INT_MODE 2 "gcn_alu_or_unspec_operand"
2453 (match_operand:DI 3 "gcn_exec_operand" " e")))]
2459 rtx out = operands[0];
2460 rtx inlo = gcn_operand_part (V64DImode, operands[1], 0);
2461 rtx merge = operands[2];
2462 rtx exec = operands[3];
2464 if (<MODE>mode != V64SImode)
2465 emit_insn (gen_truncv64si<mode>2_exec (out, inlo, merge, exec));
2467 emit_insn (gen_mov<mode>_exec (out, inlo, exec, merge));
2469 [(set_attr "type" "vop2")
2470 (set_attr "length" "4")])
2472 (define_insn_and_split "<convop><mode>v64di2"
2473 [(set (match_operand:V64DI 0 "register_operand" "=v")
2475 (match_operand:VEC_ALL1REG_INT_MODE 1 "gcn_alu_operand" " v")))]
2481 rtx outlo = gcn_operand_part (V64DImode, operands[0], 0);
2482 rtx outhi = gcn_operand_part (V64DImode, operands[0], 1);
2483 rtx in = operands[1];
2485 if (<MODE>mode != V64SImode)
2486 emit_insn (gen_<convop><mode>v64si2 (outlo, in));
2488 emit_move_insn (outlo, in);
2490 emit_insn (gen_ashrv64si3 (outhi, outlo, GEN_INT (31)));
2492 emit_insn (gen_vec_duplicatev64si (outhi, const0_rtx));
2494 [(set_attr "type" "mult")
2495 (set_attr "length" "12")])
2497 (define_insn_and_split "<convop><mode>v64di2_exec"
2498 [(set (match_operand:V64DI 0 "register_operand" "=v")
2501 (match_operand:VEC_ALL1REG_INT_MODE 1 "gcn_alu_operand" " v"))
2502 (match_operand:V64DI 2 "gcn_alu_or_unspec_operand" "U0")
2503 (match_operand:DI 3 "gcn_exec_operand" " e")))]
2509 rtx outlo = gcn_operand_part (V64DImode, operands[0], 0);
2510 rtx outhi = gcn_operand_part (V64DImode, operands[0], 1);
2511 rtx in = operands[1];
2512 rtx mergelo = gcn_operand_part (V64DImode, operands[2], 0);
2513 rtx mergehi = gcn_operand_part (V64DImode, operands[2], 1);
2514 rtx exec = operands[3];
2516 if (<MODE>mode != V64SImode)
2517 emit_insn (gen_<convop><mode>v64si2_exec (outlo, in, mergelo, exec));
2519 emit_insn (gen_mov<mode>_exec (outlo, in, exec, mergelo));
2521 emit_insn (gen_ashrv64si3_exec (outhi, outlo, GEN_INT (31), mergehi,
2524 emit_insn (gen_vec_duplicatev64si_exec (outhi, const0_rtx, mergehi,
2527 [(set_attr "type" "mult")
2528 (set_attr "length" "12")])
2531 ;; {{{ Vector comparison/merge
2533 (define_mode_iterator VCMP_MODE [V64HI V64SI V64DI V64HF V64SF V64DF])
2534 (define_mode_iterator VCMP_MODE_INT [V64HI V64SI V64DI])
2536 (define_insn "vec_cmp<mode>di"
2537 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e, e,Sg,Sg")
2538 (match_operator 1 "gcn_fp_compare_operator"
2539 [(match_operand:VCMP_MODE 2 "gcn_alu_operand"
2540 "vSv, B,vSv, B, v,vA")
2541 (match_operand:VCMP_MODE 3 "gcn_vop3_operand"
2542 " v, v, v, v,vA, v")]))
2543 (clobber (match_scratch:DI 4 "= X, X, cV,cV, X, X"))]
2546 v_cmp%E1\tvcc, %2, %3
2547 v_cmp%E1\tvcc, %2, %3
2548 v_cmpx%E1\tvcc, %2, %3
2549 v_cmpx%E1\tvcc, %2, %3
2550 v_cmp%E1\t%0, %2, %3
2551 v_cmp%E1\t%0, %2, %3"
2552 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a")
2553 (set_attr "length" "4,8,4,8,8,8")])
2555 (define_expand "vec_cmpu<mode>di"
2556 [(match_operand:DI 0 "register_operand")
2557 (match_operator 1 "gcn_compare_operator"
2558 [(match_operand:VCMP_MODE_INT 2 "gcn_alu_operand")
2559 (match_operand:VCMP_MODE_INT 3 "gcn_vop3_operand")])]
2562 /* Unsigned comparisons use the same patterns as signed comparisons,
2563 except that they use unsigned operators (e.g. LTU vs LT).
2564 The '%E1' directive then does the Right Thing. */
2565 emit_insn (gen_vec_cmp<mode>di (operands[0], operands[1], operands[2],
2570 ; There's no instruction for 8-bit vector comparison, so we need to extend.
2571 (define_expand "vec_cmp<u>v64qidi"
2572 [(match_operand:DI 0 "register_operand")
2573 (match_operator 1 "gcn_compare_operator"
2574 [(any_extend:V64SI (match_operand:V64QI 2 "gcn_alu_operand"))
2575 (any_extend:V64SI (match_operand:V64QI 3 "gcn_vop3_operand"))])]
2576 "can_create_pseudo_p ()"
2578 rtx sitmp1 = gen_reg_rtx (V64SImode);
2579 rtx sitmp2 = gen_reg_rtx (V64SImode);
2581 emit_insn (gen_<expander>v64qiv64si2 (sitmp1, operands[2]));
2582 emit_insn (gen_<expander>v64qiv64si2 (sitmp2, operands[3]));
2583 emit_insn (gen_vec_cmpv64sidi (operands[0], operands[1], sitmp1, sitmp2));
2587 (define_insn "vec_cmp<mode>di_exec"
2588 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e, e,Sg,Sg")
2590 (match_operator 1 "gcn_fp_compare_operator"
2591 [(match_operand:VCMP_MODE 2 "gcn_alu_operand"
2592 "vSv, B,vSv, B, v,vA")
2593 (match_operand:VCMP_MODE 3 "gcn_vop3_operand"
2594 " v, v, v, v,vA, v")])
2595 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e, e, e, e")))
2596 (clobber (match_scratch:DI 5 "= X, X, cV,cV, X, X"))]
2599 v_cmp%E1\tvcc, %2, %3
2600 v_cmp%E1\tvcc, %2, %3
2601 v_cmpx%E1\tvcc, %2, %3
2602 v_cmpx%E1\tvcc, %2, %3
2603 v_cmp%E1\t%0, %2, %3
2604 v_cmp%E1\t%0, %2, %3"
2605 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a")
2606 (set_attr "length" "4,8,4,8,8,8")])
2608 (define_expand "vec_cmpu<mode>di_exec"
2609 [(match_operand:DI 0 "register_operand")
2610 (match_operator 1 "gcn_compare_operator"
2611 [(match_operand:VCMP_MODE_INT 2 "gcn_alu_operand")
2612 (match_operand:VCMP_MODE_INT 3 "gcn_vop3_operand")])
2613 (match_operand:DI 4 "gcn_exec_reg_operand")]
2616 /* Unsigned comparisons use the same patterns as signed comparisons,
2617 except that they use unsigned operators (e.g. LTU vs LT).
2618 The '%E1' directive then does the Right Thing. */
2619 emit_insn (gen_vec_cmpu<mode>di_exec (operands[0], operands[1],
2620 operands[2], operands[3],
2625 (define_expand "vec_cmp<u>v64qidi_exec"
2626 [(match_operand:DI 0 "register_operand")
2627 (match_operator 1 "gcn_compare_operator"
2628 [(any_extend:V64SI (match_operand:V64QI 2 "gcn_alu_operand"))
2629 (any_extend:V64SI (match_operand:V64QI 3 "gcn_vop3_operand"))])
2630 (match_operand:DI 4 "gcn_exec_reg_operand")]
2631 "can_create_pseudo_p ()"
2633 rtx sitmp1 = gen_reg_rtx (V64SImode);
2634 rtx sitmp2 = gen_reg_rtx (V64SImode);
2636 emit_insn (gen_<expander>v64qiv64si2_exec (sitmp1, operands[2],
2637 operands[2], operands[4]));
2638 emit_insn (gen_<expander>v64qiv64si2_exec (sitmp2, operands[3],
2639 operands[3], operands[4]));
2640 emit_insn (gen_vec_cmpv64sidi_exec (operands[0], operands[1], sitmp1,
2641 sitmp2, operands[4]));
2645 (define_insn "vec_cmp<mode>di_dup"
2646 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e,e,Sg")
2647 (match_operator 1 "gcn_fp_compare_operator"
2648 [(vec_duplicate:VCMP_MODE
2649 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand"
2651 (match_operand:VCMP_MODE 3 "gcn_vop3_operand" " v, v, v,v, v")]))
2652 (clobber (match_scratch:DI 4 "= X,X,cV,cV, X"))]
2655 v_cmp%E1\tvcc, %2, %3
2656 v_cmp%E1\tvcc, %2, %3
2657 v_cmpx%E1\tvcc, %2, %3
2658 v_cmpx%E1\tvcc, %2, %3
2659 v_cmp%E1\t%0, %2, %3"
2660 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a")
2661 (set_attr "length" "4,8,4,8,8")])
2663 (define_insn "vec_cmp<mode>di_dup_exec"
2664 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e,e,Sg")
2666 (match_operator 1 "gcn_fp_compare_operator"
2667 [(vec_duplicate:VCMP_MODE
2668 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand"
2670 (match_operand:VCMP_MODE 3 "gcn_vop3_operand" " v, v, v,v, v")])
2671 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e,e, e")))
2672 (clobber (match_scratch:DI 5 "= X,X,cV,cV, X"))]
2675 v_cmp%E1\tvcc, %2, %3
2676 v_cmp%E1\tvcc, %2, %3
2677 v_cmpx%E1\tvcc, %2, %3
2678 v_cmpx%E1\tvcc, %2, %3
2679 v_cmp%E1\t%0, %2, %3"
2680 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a")
2681 (set_attr "length" "4,8,4,8,8")])
2683 (define_expand "vcond_mask_<mode>di"
2685 [(set (match_operand:VEC_ALLREG_MODE 0 "register_operand" "")
2686 (vec_merge:VEC_ALLREG_MODE
2687 (match_operand:VEC_ALLREG_MODE 1 "gcn_vop3_operand" "")
2688 (match_operand:VEC_ALLREG_MODE 2 "gcn_alu_operand" "")
2689 (match_operand:DI 3 "register_operand" "")))
2690 (clobber (scratch:V64DI))])]
2694 (define_expand "vcond<VEC_ALLREG_MODE:mode><VEC_ALLREG_ALT:mode>"
2695 [(match_operand:VEC_ALLREG_MODE 0 "register_operand")
2696 (match_operand:VEC_ALLREG_MODE 1 "gcn_vop3_operand")
2697 (match_operand:VEC_ALLREG_MODE 2 "gcn_alu_operand")
2698 (match_operator 3 "gcn_fp_compare_operator"
2699 [(match_operand:VEC_ALLREG_ALT 4 "gcn_alu_operand")
2700 (match_operand:VEC_ALLREG_ALT 5 "gcn_vop3_operand")])]
2703 rtx tmp = gen_reg_rtx (DImode);
2704 emit_insn (gen_vec_cmp<VEC_ALLREG_ALT:mode>di
2705 (tmp, operands[3], operands[4], operands[5]));
2706 emit_insn (gen_vcond_mask_<VEC_ALLREG_MODE:mode>di
2707 (operands[0], operands[1], operands[2], tmp));
2711 (define_expand "vcond<VEC_ALLREG_MODE:mode><VEC_ALLREG_ALT:mode>_exec"
2712 [(match_operand:VEC_ALLREG_MODE 0 "register_operand")
2713 (match_operand:VEC_ALLREG_MODE 1 "gcn_vop3_operand")
2714 (match_operand:VEC_ALLREG_MODE 2 "gcn_alu_operand")
2715 (match_operator 3 "gcn_fp_compare_operator"
2716 [(match_operand:VEC_ALLREG_ALT 4 "gcn_alu_operand")
2717 (match_operand:VEC_ALLREG_ALT 5 "gcn_vop3_operand")])
2718 (match_operand:DI 6 "gcn_exec_reg_operand" "e")]
2721 rtx tmp = gen_reg_rtx (DImode);
2722 emit_insn (gen_vec_cmp<VEC_ALLREG_ALT:mode>di_exec
2723 (tmp, operands[3], operands[4], operands[5], operands[6]));
2724 emit_insn (gen_vcond_mask_<VEC_ALLREG_MODE:mode>di
2725 (operands[0], operands[1], operands[2], tmp));
2729 (define_expand "vcondu<VEC_ALLREG_MODE:mode><VEC_ALLREG_INT_MODE:mode>"
2730 [(match_operand:VEC_ALLREG_MODE 0 "register_operand")
2731 (match_operand:VEC_ALLREG_MODE 1 "gcn_vop3_operand")
2732 (match_operand:VEC_ALLREG_MODE 2 "gcn_alu_operand")
2733 (match_operator 3 "gcn_fp_compare_operator"
2734 [(match_operand:VEC_ALLREG_INT_MODE 4 "gcn_alu_operand")
2735 (match_operand:VEC_ALLREG_INT_MODE 5 "gcn_vop3_operand")])]
2738 rtx tmp = gen_reg_rtx (DImode);
2739 emit_insn (gen_vec_cmpu<VEC_ALLREG_INT_MODE:mode>di
2740 (tmp, operands[3], operands[4], operands[5]));
2741 emit_insn (gen_vcond_mask_<VEC_ALLREG_MODE:mode>di
2742 (operands[0], operands[1], operands[2], tmp));
2746 (define_expand "vcondu<VEC_ALLREG_MODE:mode><VEC_ALLREG_INT_MODE:mode>_exec"
2747 [(match_operand:VEC_ALLREG_MODE 0 "register_operand")
2748 (match_operand:VEC_ALLREG_MODE 1 "gcn_vop3_operand")
2749 (match_operand:VEC_ALLREG_MODE 2 "gcn_alu_operand")
2750 (match_operator 3 "gcn_fp_compare_operator"
2751 [(match_operand:VEC_ALLREG_INT_MODE 4 "gcn_alu_operand")
2752 (match_operand:VEC_ALLREG_INT_MODE 5 "gcn_vop3_operand")])
2753 (match_operand:DI 6 "gcn_exec_reg_operand" "e")]
2756 rtx tmp = gen_reg_rtx (DImode);
2757 emit_insn (gen_vec_cmpu<VEC_ALLREG_INT_MODE:mode>di_exec
2758 (tmp, operands[3], operands[4], operands[5], operands[6]));
2759 emit_insn (gen_vcond_mask_<VEC_ALLREG_MODE:mode>di
2760 (operands[0], operands[1], operands[2], tmp));
2765 ;; {{{ Fully masked loop support
2767 (define_expand "while_ultsidi"
2768 [(match_operand:DI 0 "register_operand")
2769 (match_operand:SI 1 "")
2770 (match_operand:SI 2 "")]
2773 if (GET_CODE (operands[1]) != CONST_INT
2774 || GET_CODE (operands[2]) != CONST_INT)
2776 rtx _0_1_2_3 = gen_rtx_REG (V64SImode, VGPR_REGNO (1));
2778 if (GET_CODE (operands[1]) != CONST_INT
2779 || INTVAL (operands[1]) != 0)
2781 tmp = gen_reg_rtx (V64SImode);
2782 emit_insn (gen_addv64si3_dup (tmp, _0_1_2_3, operands[1]));
2784 emit_insn (gen_vec_cmpv64sidi_dup (operands[0],
2785 gen_rtx_GT (VOIDmode, 0, 0),
2790 HOST_WIDE_INT diff = INTVAL (operands[2]) - INTVAL (operands[1]);
2791 HOST_WIDE_INT mask = (diff >= 64 ? -1
2792 : ~((unsigned HOST_WIDE_INT)-1 << diff));
2793 emit_move_insn (operands[0], gen_rtx_CONST_INT (VOIDmode, mask));
2798 (define_expand "maskload<mode>di"
2799 [(match_operand:VEC_ALLREG_MODE 0 "register_operand")
2800 (match_operand:VEC_ALLREG_MODE 1 "memory_operand")
2801 (match_operand 2 "")]
2804 rtx exec = force_reg (DImode, operands[2]);
2805 rtx addr = gcn_expand_scalar_to_vector_address
2806 (<MODE>mode, exec, operands[1], gen_rtx_SCRATCH (V64DImode));
2807 rtx as = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
2808 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
2810 /* Masked lanes are required to hold zero. */
2811 emit_move_insn (operands[0], gcn_vec_constant (<MODE>mode, 0));
2813 emit_insn (gen_gather<mode>_expr_exec (operands[0], addr, as, v,
2814 operands[0], exec));
2818 (define_expand "maskstore<mode>di"
2819 [(match_operand:VEC_ALLREG_MODE 0 "memory_operand")
2820 (match_operand:VEC_ALLREG_MODE 1 "register_operand")
2821 (match_operand 2 "")]
2824 rtx exec = force_reg (DImode, operands[2]);
2825 rtx addr = gcn_expand_scalar_to_vector_address
2826 (<MODE>mode, exec, operands[0], gen_rtx_SCRATCH (V64DImode));
2827 rtx as = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
2828 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
2829 emit_insn (gen_scatter<mode>_expr_exec (addr, operands[1], as, v, exec));
2833 (define_expand "mask_gather_load<mode>v64si"
2834 [(match_operand:VEC_ALLREG_MODE 0 "register_operand")
2835 (match_operand:DI 1 "register_operand")
2836 (match_operand:V64SI 2 "register_operand")
2837 (match_operand 3 "immediate_operand")
2838 (match_operand:SI 4 "gcn_alu_operand")
2839 (match_operand:DI 5 "")]
2842 rtx exec = force_reg (DImode, operands[5]);
2844 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1],
2845 operands[2], operands[4],
2846 INTVAL (operands[3]), exec);
2848 /* Masked lanes are required to hold zero. */
2849 emit_move_insn (operands[0], gcn_vec_constant (<MODE>mode, 0));
2851 if (GET_MODE (addr) == V64DImode)
2852 emit_insn (gen_gather<mode>_insn_1offset_exec (operands[0], addr,
2853 const0_rtx, const0_rtx,
2854 const0_rtx, operands[0],
2857 emit_insn (gen_gather<mode>_insn_2offsets_exec (operands[0], operands[1],
2859 const0_rtx, const0_rtx,
2860 operands[0], exec));
2864 (define_expand "mask_scatter_store<mode>v64si"
2865 [(match_operand:DI 0 "register_operand")
2866 (match_operand:V64SI 1 "register_operand")
2867 (match_operand 2 "immediate_operand")
2868 (match_operand:SI 3 "gcn_alu_operand")
2869 (match_operand:VEC_ALLREG_MODE 4 "register_operand")
2870 (match_operand:DI 5 "")]
2873 rtx exec = force_reg (DImode, operands[5]);
2875 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0],
2876 operands[1], operands[3],
2877 INTVAL (operands[2]), exec);
2879 if (GET_MODE (addr) == V64DImode)
2880 emit_insn (gen_scatter<mode>_insn_1offset_exec (addr, const0_rtx,
2881 operands[4], const0_rtx,
2885 emit_insn (gen_scatter<mode>_insn_2offsets_exec (operands[0], addr,
2886 const0_rtx, operands[4],
2887 const0_rtx, const0_rtx,
2892 ; FIXME this should be VEC_REG_MODE, but not all dependencies are implemented.
2893 (define_mode_iterator COND_MODE [V64SI V64DI V64SF V64DF])
2894 (define_mode_iterator COND_INT_MODE [V64SI V64DI])
2896 (define_code_iterator cond_op [plus minus])
2898 (define_expand "cond_<expander><mode>"
2899 [(match_operand:COND_MODE 0 "register_operand")
2900 (match_operand:DI 1 "register_operand")
2902 (match_operand:COND_MODE 2 "gcn_alu_operand")
2903 (match_operand:COND_MODE 3 "gcn_alu_operand"))
2904 (match_operand:COND_MODE 4 "register_operand")]
2907 operands[1] = force_reg (DImode, operands[1]);
2908 operands[2] = force_reg (<MODE>mode, operands[2]);
2910 emit_insn (gen_<expander><mode>3_exec (operands[0], operands[2],
2911 operands[3], operands[4],
2916 (define_code_iterator cond_bitop [and ior xor])
2918 (define_expand "cond_<expander><mode>"
2919 [(match_operand:COND_INT_MODE 0 "register_operand")
2920 (match_operand:DI 1 "register_operand")
2921 (cond_bitop:COND_INT_MODE
2922 (match_operand:COND_INT_MODE 2 "gcn_alu_operand")
2923 (match_operand:COND_INT_MODE 3 "gcn_alu_operand"))
2924 (match_operand:COND_INT_MODE 4 "register_operand")]
2927 operands[1] = force_reg (DImode, operands[1]);
2928 operands[2] = force_reg (<MODE>mode, operands[2]);
2930 emit_insn (gen_<expander><mode>3_exec (operands[0], operands[2],
2931 operands[3], operands[4],
2937 ;; {{{ Vector reductions
2939 (define_int_iterator REDUC_UNSPEC [UNSPEC_SMIN_DPP_SHR UNSPEC_SMAX_DPP_SHR
2940 UNSPEC_UMIN_DPP_SHR UNSPEC_UMAX_DPP_SHR
2943 UNSPEC_IOR_DPP_SHR UNSPEC_XOR_DPP_SHR])
2945 (define_int_iterator REDUC_2REG_UNSPEC [UNSPEC_PLUS_DPP_SHR
2947 UNSPEC_IOR_DPP_SHR UNSPEC_XOR_DPP_SHR])
2949 ; FIXME: Isn't there a better way of doing this?
2950 (define_int_attr reduc_unspec [(UNSPEC_SMIN_DPP_SHR "UNSPEC_SMIN_DPP_SHR")
2951 (UNSPEC_SMAX_DPP_SHR "UNSPEC_SMAX_DPP_SHR")
2952 (UNSPEC_UMIN_DPP_SHR "UNSPEC_UMIN_DPP_SHR")
2953 (UNSPEC_UMAX_DPP_SHR "UNSPEC_UMAX_DPP_SHR")
2954 (UNSPEC_PLUS_DPP_SHR "UNSPEC_PLUS_DPP_SHR")
2955 (UNSPEC_AND_DPP_SHR "UNSPEC_AND_DPP_SHR")
2956 (UNSPEC_IOR_DPP_SHR "UNSPEC_IOR_DPP_SHR")
2957 (UNSPEC_XOR_DPP_SHR "UNSPEC_XOR_DPP_SHR")])
2959 (define_int_attr reduc_op [(UNSPEC_SMIN_DPP_SHR "smin")
2960 (UNSPEC_SMAX_DPP_SHR "smax")
2961 (UNSPEC_UMIN_DPP_SHR "umin")
2962 (UNSPEC_UMAX_DPP_SHR "umax")
2963 (UNSPEC_PLUS_DPP_SHR "plus")
2964 (UNSPEC_AND_DPP_SHR "and")
2965 (UNSPEC_IOR_DPP_SHR "ior")
2966 (UNSPEC_XOR_DPP_SHR "xor")])
2968 (define_int_attr reduc_insn [(UNSPEC_SMIN_DPP_SHR "v_min%i0")
2969 (UNSPEC_SMAX_DPP_SHR "v_max%i0")
2970 (UNSPEC_UMIN_DPP_SHR "v_min%u0")
2971 (UNSPEC_UMAX_DPP_SHR "v_max%u0")
2972 (UNSPEC_PLUS_DPP_SHR "v_add%u0")
2973 (UNSPEC_AND_DPP_SHR "v_and%b0")
2974 (UNSPEC_IOR_DPP_SHR "v_or%b0")
2975 (UNSPEC_XOR_DPP_SHR "v_xor%b0")])
2977 (define_expand "reduc_<reduc_op>_scal_<mode>"
2978 [(set (match_operand:<SCALAR_MODE> 0 "register_operand")
2979 (unspec:<SCALAR_MODE>
2980 [(match_operand:VEC_1REG_MODE 1 "register_operand")]
2984 rtx tmp = gcn_expand_reduc_scalar (<MODE>mode, operands[1],
2987 /* The result of the reduction is in lane 63 of tmp. */
2988 emit_insn (gen_mov_from_lane63_<mode> (operands[0], tmp));
2993 (define_expand "reduc_<reduc_op>_scal_v64di"
2994 [(set (match_operand:DI 0 "register_operand")
2996 [(match_operand:V64DI 1 "register_operand")]
2997 REDUC_2REG_UNSPEC))]
3000 rtx tmp = gcn_expand_reduc_scalar (V64DImode, operands[1],
3003 /* The result of the reduction is in lane 63 of tmp. */
3004 emit_insn (gen_mov_from_lane63_v64di (operands[0], tmp));
3009 (define_insn "*<reduc_op>_dpp_shr_<mode>"
3010 [(set (match_operand:VEC_1REG_MODE 0 "register_operand" "=v")
3011 (unspec:VEC_1REG_MODE
3012 [(match_operand:VEC_1REG_MODE 1 "register_operand" "v")
3013 (match_operand:VEC_1REG_MODE 2 "register_operand" "v")
3014 (match_operand:SI 3 "const_int_operand" "n")]
3016 "!(TARGET_GCN3 && SCALAR_INT_MODE_P (<SCALAR_MODE>mode)
3017 && <reduc_unspec> == UNSPEC_PLUS_DPP_SHR)"
3019 return gcn_expand_dpp_shr_insn (<MODE>mode, "<reduc_insn>",
3020 <reduc_unspec>, INTVAL (operands[3]));
3022 [(set_attr "type" "vop_dpp")
3023 (set_attr "length" "8")])
3025 (define_insn_and_split "*<reduc_op>_dpp_shr_v64di"
3026 [(set (match_operand:V64DI 0 "register_operand" "=v")
3028 [(match_operand:V64DI 1 "register_operand" "v")
3029 (match_operand:V64DI 2 "register_operand" "v")
3030 (match_operand:SI 3 "const_int_operand" "n")]
3031 REDUC_2REG_UNSPEC))]
3037 [(match_dup 6) (match_dup 8) (match_dup 3)] REDUC_2REG_UNSPEC))
3040 [(match_dup 7) (match_dup 9) (match_dup 3)] REDUC_2REG_UNSPEC))]
3042 operands[4] = gcn_operand_part (V64DImode, operands[0], 0);
3043 operands[5] = gcn_operand_part (V64DImode, operands[0], 1);
3044 operands[6] = gcn_operand_part (V64DImode, operands[1], 0);
3045 operands[7] = gcn_operand_part (V64DImode, operands[1], 1);
3046 operands[8] = gcn_operand_part (V64DImode, operands[2], 0);
3047 operands[9] = gcn_operand_part (V64DImode, operands[2], 1);
3049 [(set_attr "type" "vmult")
3050 (set_attr "length" "16")])
3052 ; Special cases for addition.
3054 (define_insn "*plus_carry_dpp_shr_v64si"
3055 [(set (match_operand:V64SI 0 "register_operand" "=v")
3057 [(match_operand:V64SI 1 "register_operand" "v")
3058 (match_operand:V64SI 2 "register_operand" "v")
3059 (match_operand:SI 3 "const_int_operand" "n")]
3060 UNSPEC_PLUS_CARRY_DPP_SHR))
3061 (clobber (reg:DI VCC_REG))]
3064 const char *insn = TARGET_GCN3 ? "v_add%u0" : "v_add_co%u0";
3065 return gcn_expand_dpp_shr_insn (V64SImode, insn,
3066 UNSPEC_PLUS_CARRY_DPP_SHR,
3067 INTVAL (operands[3]));
3069 [(set_attr "type" "vop_dpp")
3070 (set_attr "length" "8")])
3072 (define_insn "*plus_carry_in_dpp_shr_v64si"
3073 [(set (match_operand:V64SI 0 "register_operand" "=v")
3075 [(match_operand:V64SI 1 "register_operand" "v")
3076 (match_operand:V64SI 2 "register_operand" "v")
3077 (match_operand:SI 3 "const_int_operand" "n")
3078 (match_operand:DI 4 "register_operand" "cV")]
3079 UNSPEC_PLUS_CARRY_IN_DPP_SHR))
3080 (clobber (reg:DI VCC_REG))]
3083 const char *insn = TARGET_GCN3 ? "v_addc%u0" : "v_addc_co%u0";
3084 return gcn_expand_dpp_shr_insn (V64SImode, insn,
3085 UNSPEC_PLUS_CARRY_IN_DPP_SHR,
3086 INTVAL (operands[3]));
3088 [(set_attr "type" "vop_dpp")
3089 (set_attr "length" "8")])
3091 (define_insn_and_split "*plus_carry_dpp_shr_v64di"
3092 [(set (match_operand:V64DI 0 "register_operand" "=v")
3094 [(match_operand:V64DI 1 "register_operand" "v")
3095 (match_operand:V64DI 2 "register_operand" "v")
3096 (match_operand:SI 3 "const_int_operand" "n")]
3097 UNSPEC_PLUS_CARRY_DPP_SHR))
3098 (clobber (reg:DI VCC_REG))]
3102 [(parallel [(set (match_dup 4)
3104 [(match_dup 6) (match_dup 8) (match_dup 3)]
3105 UNSPEC_PLUS_CARRY_DPP_SHR))
3106 (clobber (reg:DI VCC_REG))])
3107 (parallel [(set (match_dup 5)
3109 [(match_dup 7) (match_dup 9) (match_dup 3) (reg:DI VCC_REG)]
3110 UNSPEC_PLUS_CARRY_IN_DPP_SHR))
3111 (clobber (reg:DI VCC_REG))])]
3113 operands[4] = gcn_operand_part (V64DImode, operands[0], 0);
3114 operands[5] = gcn_operand_part (V64DImode, operands[0], 1);
3115 operands[6] = gcn_operand_part (V64DImode, operands[1], 0);
3116 operands[7] = gcn_operand_part (V64DImode, operands[1], 1);
3117 operands[8] = gcn_operand_part (V64DImode, operands[2], 0);
3118 operands[9] = gcn_operand_part (V64DImode, operands[2], 1);
3120 [(set_attr "type" "vmult")
3121 (set_attr "length" "16")])
3123 ; Instructions to move a scalar value from lane 63 of a vector register.
3124 (define_insn "mov_from_lane63_<mode>"
3125 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg,v")
3126 (unspec:<SCALAR_MODE>
3127 [(match_operand:VEC_ALL1REG_MODE 1 "register_operand" "v,v")]
3128 UNSPEC_MOV_FROM_LANE63))]
3131 v_readlane_b32\t%0, %1, 63
3132 v_mov_b32\t%0, %1 wave_ror:1"
3133 [(set_attr "type" "vop3a,vop_dpp")
3134 (set_attr "exec" "none,*")
3135 (set_attr "length" "8")])
3137 (define_insn "mov_from_lane63_v64di"
3138 [(set (match_operand:DI 0 "register_operand" "=Sg,v")
3140 [(match_operand:V64DI 1 "register_operand" "v,v")]
3141 UNSPEC_MOV_FROM_LANE63))]
3144 v_readlane_b32\t%L0, %L1, 63\;v_readlane_b32\t%H0, %H1, 63
3145 * if (REGNO (operands[0]) <= REGNO (operands[1])) \
3146 return \"v_mov_b32\t%L0, %L1 wave_ror:1\;\" \
3147 \"v_mov_b32\t%H0, %H1 wave_ror:1\"; \
3149 return \"v_mov_b32\t%H0, %H1 wave_ror:1\;\" \
3150 \"v_mov_b32\t%L0, %L1 wave_ror:1\";"
3151 [(set_attr "type" "vop3a,vop_dpp")
3152 (set_attr "exec" "none,*")
3153 (set_attr "length" "8")])
3156 ;; {{{ Miscellaneous
3158 (define_expand "vec_seriesv64si"
3159 [(match_operand:V64SI 0 "register_operand")
3160 (match_operand:SI 1 "gcn_alu_operand")
3161 (match_operand:SI 2 "gcn_alu_operand")]
3164 rtx tmp = gen_reg_rtx (V64SImode);
3165 rtx v1 = gen_rtx_REG (V64SImode, VGPR_REGNO (1));
3167 emit_insn (gen_mulv64si3_dup (tmp, v1, operands[2]));
3168 emit_insn (gen_addv64si3_dup (operands[0], tmp, operands[1]));
3172 (define_expand "vec_seriesv64di"
3173 [(match_operand:V64DI 0 "register_operand")
3174 (match_operand:DI 1 "gcn_alu_operand")
3175 (match_operand:DI 2 "gcn_alu_operand")]
3178 rtx tmp = gen_reg_rtx (V64DImode);
3179 rtx v1 = gen_rtx_REG (V64SImode, VGPR_REGNO (1));
3180 rtx op1vec = gen_reg_rtx (V64DImode);
3182 emit_insn (gen_mulv64di3_zext_dup2 (tmp, v1, operands[2]));
3183 emit_insn (gen_vec_duplicatev64di (op1vec, operands[1]));
3184 emit_insn (gen_addv64di3 (operands[0], tmp, op1vec));