]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/gcn/gcn-valu.md
[arm] Fix ambiguous .md attribute uses
[thirdparty/gcc.git] / gcc / config / gcn / gcn-valu.md
CommitLineData
3d6275e3
AS
1;; Copyright (C) 2016-2019 Free Software Foundation, Inc.
2
3;; This file is free software; you can redistribute it and/or modify it under
4;; the terms of the GNU General Public License as published by the Free
5;; Software Foundation; either version 3 of the License, or (at your option)
6;; any later version.
7
8;; This file is distributed in the hope that it will be useful, but WITHOUT
9;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11;; for more details.
12
13;; You should have received a copy of the GNU General Public License
14;; along with GCC; see the file COPYING3. If not see
15;; <http://www.gnu.org/licenses/>.
16
17;; {{{ Vector iterators
18
19; Vector modes for one vector register
20(define_mode_iterator VEC_1REG_MODE
21 [V64QI V64HI V64SI V64HF V64SF])
22(define_mode_iterator VEC_1REG_ALT
23 [V64QI V64HI V64SI V64HF V64SF])
24
25(define_mode_iterator VEC_1REG_INT_MODE
26 [V64QI V64HI V64SI])
27(define_mode_iterator VEC_1REG_INT_ALT
28 [V64QI V64HI V64SI])
29
30; Vector modes for two vector registers
31(define_mode_iterator VEC_2REG_MODE
32 [V64DI V64DF])
33
34; All of above
35(define_mode_iterator VEC_REG_MODE
36 [V64QI V64HI V64SI V64HF V64SF ; Single reg
37 V64DI V64DF]) ; Double reg
38
39(define_mode_attr scalar_mode
40 [(V64QI "qi") (V64HI "hi") (V64SI "si")
41 (V64HF "hf") (V64SF "sf") (V64DI "di") (V64DF "df")])
42
43(define_mode_attr SCALAR_MODE
44 [(V64QI "QI") (V64HI "HI") (V64SI "SI")
45 (V64HF "HF") (V64SF "SF") (V64DI "DI") (V64DF "DF")])
46
47;; }}}
48;; {{{ Substitutions
49
50(define_subst_attr "exec" "vec_merge"
51 "" "_exec")
52(define_subst_attr "exec_clobber" "vec_merge_with_clobber"
53 "" "_exec")
54(define_subst_attr "exec_vcc" "vec_merge_with_vcc"
55 "" "_exec")
56(define_subst_attr "exec_scatter" "scatter_store"
57 "" "_exec")
58
59(define_subst "vec_merge"
60 [(set (match_operand:VEC_REG_MODE 0)
61 (match_operand:VEC_REG_MODE 1))]
62 ""
63 [(set (match_dup 0)
64 (vec_merge:VEC_REG_MODE
65 (match_dup 1)
66 (match_operand:VEC_REG_MODE 3 "gcn_register_or_unspec_operand" "U0")
67 (match_operand:DI 4 "gcn_exec_reg_operand" "e")))])
68
69(define_subst "vec_merge_with_clobber"
70 [(set (match_operand:VEC_REG_MODE 0)
71 (match_operand:VEC_REG_MODE 1))
72 (clobber (match_operand 2))]
73 ""
74 [(set (match_dup 0)
75 (vec_merge:VEC_REG_MODE
76 (match_dup 1)
77 (match_operand:VEC_REG_MODE 3 "gcn_register_or_unspec_operand" "U0")
78 (match_operand:DI 4 "gcn_exec_reg_operand" "e")))
79 (clobber (match_dup 2))])
80
81(define_subst "vec_merge_with_vcc"
82 [(set (match_operand:VEC_REG_MODE 0)
83 (match_operand:VEC_REG_MODE 1))
84 (set (match_operand:DI 2)
85 (match_operand:DI 3))]
86 ""
87 [(parallel
88 [(set (match_dup 0)
89 (vec_merge:VEC_REG_MODE
90 (match_dup 1)
91 (match_operand:VEC_REG_MODE 4
92 "gcn_register_or_unspec_operand" "U0")
93 (match_operand:DI 5 "gcn_exec_reg_operand" "e")))
94 (set (match_dup 2)
95 (and:DI (match_dup 3)
96 (reg:DI EXEC_REG)))])])
97
98(define_subst "scatter_store"
99 [(set (mem:BLK (scratch))
100 (unspec:BLK
101 [(match_operand 0)
102 (match_operand 1)
103 (match_operand 2)
104 (match_operand 3)]
105 UNSPEC_SCATTER))]
106 ""
107 [(set (mem:BLK (scratch))
108 (unspec:BLK
109 [(match_dup 0)
110 (match_dup 1)
111 (match_dup 2)
112 (match_dup 3)
113 (match_operand:DI 4 "gcn_exec_reg_operand" "e")]
114 UNSPEC_SCATTER))])
115
116;; }}}
117;; {{{ Vector moves
118
119; This is the entry point for all vector register moves. Memory accesses can
120; come this way also, but will more usually use the reload_in/out,
121; gather/scatter, maskload/store, etc.
122
123(define_expand "mov<mode>"
124 [(set (match_operand:VEC_REG_MODE 0 "nonimmediate_operand")
125 (match_operand:VEC_REG_MODE 1 "general_operand"))]
126 ""
127 {
128 if (MEM_P (operands[0]) && !lra_in_progress && !reload_completed)
129 {
130 operands[1] = force_reg (<MODE>mode, operands[1]);
131 rtx scratch = gen_rtx_SCRATCH (V64DImode);
132 rtx a = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
133 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
134 rtx expr = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
135 operands[0],
136 scratch);
137 emit_insn (gen_scatter<mode>_expr (expr, operands[1], a, v));
138 DONE;
139 }
140 else if (MEM_P (operands[1]) && !lra_in_progress && !reload_completed)
141 {
142 rtx scratch = gen_rtx_SCRATCH (V64DImode);
143 rtx a = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
144 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
145 rtx expr = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
146 operands[1],
147 scratch);
148 emit_insn (gen_gather<mode>_expr (operands[0], expr, a, v));
149 DONE;
150 }
151 else if ((MEM_P (operands[0]) || MEM_P (operands[1])))
152 {
153 gcc_assert (!reload_completed);
154 rtx scratch = gen_reg_rtx (V64DImode);
155 emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], scratch));
156 DONE;
157 }
158 })
159
160; A pseudo instruction that helps LRA use the "U0" constraint.
161
162(define_insn "mov<mode>_unspec"
163 [(set (match_operand:VEC_REG_MODE 0 "nonimmediate_operand" "=v")
164 (match_operand:VEC_REG_MODE 1 "gcn_unspec_operand" " U"))]
165 ""
166 ""
167 [(set_attr "type" "unknown")
168 (set_attr "length" "0")])
169
170(define_insn "*mov<mode>"
171 [(set (match_operand:VEC_1REG_MODE 0 "nonimmediate_operand" "=v,v")
172 (match_operand:VEC_1REG_MODE 1 "general_operand" "vA,B"))]
173 ""
174 "v_mov_b32\t%0, %1"
175 [(set_attr "type" "vop1,vop1")
176 (set_attr "length" "4,8")])
177
178(define_insn "mov<mode>_exec"
179 [(set (match_operand:VEC_1REG_MODE 0 "nonimmediate_operand"
180 "=v, v, v, v, v, m")
181 (vec_merge:VEC_1REG_MODE
182 (match_operand:VEC_1REG_MODE 1 "general_operand"
183 "vA, B, v,vA, m, v")
184 (match_operand:VEC_1REG_MODE 3 "gcn_alu_or_unspec_operand"
185 "U0,U0,vA,vA,U0,U0")
186 (match_operand:DI 2 "register_operand" " e, e,cV,Sv, e, e")))
187 (clobber (match_scratch:V64DI 4 "=X, X, X, X,&v,&v"))]
188 "!MEM_P (operands[0]) || REG_P (operands[1])"
189 "@
190 v_mov_b32\t%0, %1
191 v_mov_b32\t%0, %1
192 v_cndmask_b32\t%0, %3, %1, vcc
193 v_cndmask_b32\t%0, %3, %1, %2
194 #
195 #"
196 [(set_attr "type" "vop1,vop1,vop2,vop3a,*,*")
197 (set_attr "length" "4,8,4,8,16,16")])
198
199; This variant does not accept an unspec, but does permit MEM
200; read/modify/write which is necessary for maskstore.
201
202;(define_insn "*mov<mode>_exec_match"
203; [(set (match_operand:VEC_1REG_MODE 0 "nonimmediate_operand" "=v,v, v, m")
204; (vec_merge:VEC_1REG_MODE
205; (match_operand:VEC_1REG_MODE 1 "general_operand" "vA,B, m, v")
206; (match_dup 0)
207; (match_operand:DI 2 "gcn_exec_reg_operand" " e,e, e, e")))
208; (clobber (match_scratch:V64DI 3 "=X,X,&v,&v"))]
209; "!MEM_P (operands[0]) || REG_P (operands[1])"
210; "@
211; v_mov_b32\t%0, %1
212; v_mov_b32\t%0, %1
213; #
214; #"
215; [(set_attr "type" "vop1,vop1,*,*")
216; (set_attr "length" "4,8,16,16")])
217
218(define_insn "*mov<mode>"
219 [(set (match_operand:VEC_2REG_MODE 0 "nonimmediate_operand" "=v")
220 (match_operand:VEC_2REG_MODE 1 "general_operand" "vDB"))]
221 ""
222 {
223 if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1]))
224 return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1";
225 else
226 return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1";
227 }
228 [(set_attr "type" "vmult")
229 (set_attr "length" "16")])
230
231(define_insn "mov<mode>_exec"
232 [(set (match_operand:VEC_2REG_MODE 0 "nonimmediate_operand"
233 "= v, v, v, v, m")
234 (vec_merge:VEC_2REG_MODE
235 (match_operand:VEC_2REG_MODE 1 "general_operand"
236 "vDB, v0, v0, m, v")
237 (match_operand:VEC_2REG_MODE 3 "gcn_alu_or_unspec_operand"
238 " U0,vDA0,vDA0,U0,U0")
239 (match_operand:DI 2 "register_operand" " e, cV, Sv, e, e")))
240 (clobber (match_scratch:V64DI 4 "= X, X, X,&v,&v"))]
241 "!MEM_P (operands[0]) || REG_P (operands[1])"
242 {
243 if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1]))
244 switch (which_alternative)
245 {
246 case 0:
247 return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1";
248 case 1:
249 return "v_cndmask_b32\t%L0, %L3, %L1, vcc\;"
250 "v_cndmask_b32\t%H0, %H3, %H1, vcc";
251 case 2:
252 return "v_cndmask_b32\t%L0, %L3, %L1, %2\;"
253 "v_cndmask_b32\t%H0, %H3, %H1, %2";
254 }
255 else
256 switch (which_alternative)
257 {
258 case 0:
259 return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1";
260 case 1:
261 return "v_cndmask_b32\t%H0, %H3, %H1, vcc\;"
262 "v_cndmask_b32\t%L0, %L3, %L1, vcc";
263 case 2:
264 return "v_cndmask_b32\t%H0, %H3, %H1, %2\;"
265 "v_cndmask_b32\t%L0, %L3, %L1, %2";
266 }
267
268 return "#";
269 }
270 [(set_attr "type" "vmult,vmult,vmult,*,*")
271 (set_attr "length" "16,16,16,16,16")])
272
273; This variant does not accept an unspec, but does permit MEM
274; read/modify/write which is necessary for maskstore.
275
276;(define_insn "*mov<mode>_exec_match"
277; [(set (match_operand:VEC_2REG_MODE 0 "nonimmediate_operand" "=v, v, m")
278; (vec_merge:VEC_2REG_MODE
279; (match_operand:VEC_2REG_MODE 1 "general_operand" "vDB, m, v")
280; (match_dup 0)
281; (match_operand:DI 2 "gcn_exec_reg_operand" " e, e, e")))
282; (clobber (match_scratch:V64DI 3 "=X,&v,&v"))]
283; "!MEM_P (operands[0]) || REG_P (operands[1])"
284; "@
285; * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \
286; return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \
287; else \
288; return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\";
289; #
290; #"
291; [(set_attr "type" "vmult,*,*")
292; (set_attr "length" "16,16,16")])
293
294; A SGPR-base load looks like:
295; <load> v, Sv
296;
297; There's no hardware instruction that corresponds to this, but vector base
298; addresses are placed in an SGPR because it is easier to add to a vector.
299; We also have a temporary vT, and the vector v1 holding numbered lanes.
300;
301; Rewrite as:
302; vT = v1 << log2(element-size)
303; vT += Sv
304; flat_load v, vT
305
306(define_insn "mov<mode>_sgprbase"
307 [(set (match_operand:VEC_1REG_MODE 0 "nonimmediate_operand" "= v, v, v, m")
308 (unspec:VEC_1REG_MODE
309 [(match_operand:VEC_1REG_MODE 1 "general_operand" " vA,vB, m, v")]
310 UNSPEC_SGPRBASE))
311 (clobber (match_operand:V64DI 2 "register_operand" "=&v,&v,&v,&v"))]
312 "lra_in_progress || reload_completed"
313 "@
314 v_mov_b32\t%0, %1
315 v_mov_b32\t%0, %1
316 #
317 #"
318 [(set_attr "type" "vop1,vop1,*,*")
319 (set_attr "length" "4,8,12,12")])
320
321(define_insn "mov<mode>_sgprbase"
322 [(set (match_operand:VEC_2REG_MODE 0 "nonimmediate_operand" "= v, v, m")
323 (unspec:VEC_2REG_MODE
324 [(match_operand:VEC_2REG_MODE 1 "general_operand" "vDB, m, v")]
325 UNSPEC_SGPRBASE))
326 (clobber (match_operand:V64DI 2 "register_operand" "=&v,&v,&v"))]
327 "lra_in_progress || reload_completed"
328 "@
329 * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \
330 return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \
331 else \
332 return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\";
333 #
334 #"
335 [(set_attr "type" "vmult,*,*")
336 (set_attr "length" "8,12,12")])
337
338; reload_in was once a standard name, but here it's only referenced by
339; gcn_secondary_reload. It allows a reload with a scratch register.
340
341(define_expand "reload_in<mode>"
342 [(set (match_operand:VEC_REG_MODE 0 "register_operand" "= v")
343 (match_operand:VEC_REG_MODE 1 "memory_operand" " m"))
344 (clobber (match_operand:V64DI 2 "register_operand" "=&v"))]
345 ""
346 {
347 emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], operands[2]));
348 DONE;
349 })
350
351; reload_out is similar to reload_in, above.
352
353(define_expand "reload_out<mode>"
354 [(set (match_operand:VEC_REG_MODE 0 "memory_operand" "= m")
355 (match_operand:VEC_REG_MODE 1 "register_operand" " v"))
356 (clobber (match_operand:V64DI 2 "register_operand" "=&v"))]
357 ""
358 {
359 emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], operands[2]));
360 DONE;
361 })
362
363; Expand scalar addresses into gather/scatter patterns
364
365(define_split
366 [(set (match_operand:VEC_REG_MODE 0 "memory_operand")
367 (unspec:VEC_REG_MODE
368 [(match_operand:VEC_REG_MODE 1 "general_operand")]
369 UNSPEC_SGPRBASE))
370 (clobber (match_scratch:V64DI 2))]
371 ""
372 [(set (mem:BLK (scratch))
373 (unspec:BLK [(match_dup 5) (match_dup 1) (match_dup 6) (match_dup 7)]
374 UNSPEC_SCATTER))]
375 {
376 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
377 operands[0],
378 operands[2]);
379 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
380 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
381 })
382
383(define_split
384 [(set (match_operand:VEC_REG_MODE 0 "memory_operand")
385 (vec_merge:VEC_REG_MODE
386 (match_operand:VEC_REG_MODE 1 "general_operand")
387 (match_operand:VEC_REG_MODE 2 "")
388 (match_operand:DI 3 "gcn_exec_reg_operand")))
389 (clobber (match_scratch:V64DI 4))]
390 ""
391 [(set (mem:BLK (scratch))
392 (unspec:BLK [(match_dup 5) (match_dup 1)
393 (match_dup 6) (match_dup 7) (match_dup 3)]
394 UNSPEC_SCATTER))]
395 {
396 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode,
397 operands[3],
398 operands[0],
399 operands[4]);
400 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
401 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
402 })
403
404(define_split
405 [(set (match_operand:VEC_REG_MODE 0 "nonimmediate_operand")
406 (unspec:VEC_REG_MODE
407 [(match_operand:VEC_REG_MODE 1 "memory_operand")]
408 UNSPEC_SGPRBASE))
409 (clobber (match_scratch:V64DI 2))]
410 ""
411 [(set (match_dup 0)
412 (unspec:VEC_REG_MODE [(match_dup 5) (match_dup 6) (match_dup 7)
413 (mem:BLK (scratch))]
414 UNSPEC_GATHER))]
415 {
416 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
417 operands[1],
418 operands[2]);
419 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
420 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
421 })
422
423(define_split
424 [(set (match_operand:VEC_REG_MODE 0 "nonimmediate_operand")
425 (vec_merge:VEC_REG_MODE
426 (match_operand:VEC_REG_MODE 1 "memory_operand")
427 (match_operand:VEC_REG_MODE 2 "")
428 (match_operand:DI 3 "gcn_exec_reg_operand")))
429 (clobber (match_scratch:V64DI 4))]
430 ""
431 [(set (match_dup 0)
432 (vec_merge:VEC_REG_MODE
433 (unspec:VEC_REG_MODE [(match_dup 5) (match_dup 6) (match_dup 7)
434 (mem:BLK (scratch))]
435 UNSPEC_GATHER)
436 (match_dup 2)
437 (match_dup 3)))]
438 {
439 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode,
440 operands[3],
441 operands[1],
442 operands[4]);
443 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
444 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
445 })
446
447; TODO: Add zero/sign extending variants.
448
449;; }}}
450;; {{{ Lane moves
451
452; v_writelane and v_readlane work regardless of exec flags.
453; We allow source to be scratch.
454;
455; FIXME these should take A immediates
456
457(define_insn "*vec_set<mode>"
458 [(set (match_operand:VEC_1REG_MODE 0 "register_operand" "= v")
459 (vec_merge:VEC_1REG_MODE
460 (vec_duplicate:VEC_1REG_MODE
461 (match_operand:<SCALAR_MODE> 1 "register_operand" " Sv"))
462 (match_operand:VEC_1REG_MODE 3 "gcn_register_or_unspec_operand"
463 " U0")
464 (ashift (const_int 1)
465 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
466 ""
467 "v_writelane_b32 %0, %1, %2"
468 [(set_attr "type" "vop3a")
469 (set_attr "length" "8")
470 (set_attr "exec" "none")
471 (set_attr "laneselect" "yes")])
472
473; FIXME: 64bit operations really should be splitters, but I am not sure how
474; to represent vertical subregs.
475(define_insn "*vec_set<mode>"
476 [(set (match_operand:VEC_2REG_MODE 0 "register_operand" "= v")
477 (vec_merge:VEC_2REG_MODE
478 (vec_duplicate:VEC_2REG_MODE
479 (match_operand:<SCALAR_MODE> 1 "register_operand" " Sv"))
480 (match_operand:VEC_2REG_MODE 3 "gcn_register_or_unspec_operand"
481 " U0")
482 (ashift (const_int 1)
483 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
484 ""
485 "v_writelane_b32 %L0, %L1, %2\;v_writelane_b32 %H0, %H1, %2"
486 [(set_attr "type" "vmult")
487 (set_attr "length" "16")
488 (set_attr "exec" "none")
489 (set_attr "laneselect" "yes")])
490
491(define_expand "vec_set<mode>"
492 [(set (match_operand:VEC_REG_MODE 0 "register_operand")
493 (vec_merge:VEC_REG_MODE
494 (vec_duplicate:VEC_REG_MODE
495 (match_operand:<SCALAR_MODE> 1 "register_operand"))
496 (match_dup 0)
497 (ashift (const_int 1) (match_operand:SI 2 "gcn_alu_operand"))))]
498 "")
499
500(define_insn "*vec_set<mode>_1"
501 [(set (match_operand:VEC_1REG_MODE 0 "register_operand" "=v")
502 (vec_merge:VEC_1REG_MODE
503 (vec_duplicate:VEC_1REG_MODE
504 (match_operand:<SCALAR_MODE> 1 "register_operand" "Sv"))
505 (match_operand:VEC_1REG_MODE 3 "gcn_register_or_unspec_operand"
506 "U0")
507 (match_operand:SI 2 "const_int_operand" " i")))]
508 "((unsigned) exact_log2 (INTVAL (operands[2])) < 64)"
509 {
510 operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));
511 return "v_writelane_b32 %0, %1, %2";
512 }
513 [(set_attr "type" "vop3a")
514 (set_attr "length" "8")
515 (set_attr "exec" "none")
516 (set_attr "laneselect" "yes")])
517
518(define_insn "*vec_set<mode>_1"
519 [(set (match_operand:VEC_2REG_MODE 0 "register_operand" "=v")
520 (vec_merge:VEC_2REG_MODE
521 (vec_duplicate:VEC_2REG_MODE
522 (match_operand:<SCALAR_MODE> 1 "register_operand" "Sv"))
523 (match_operand:VEC_2REG_MODE 3 "gcn_register_or_unspec_operand"
524 "U0")
525 (match_operand:SI 2 "const_int_operand" " i")))]
526 "((unsigned) exact_log2 (INTVAL (operands[2])) < 64)"
527 {
528 operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));
529 return "v_writelane_b32 %L0, %L1, %2\;v_writelane_b32 %H0, %H1, %2";
530 }
531 [(set_attr "type" "vmult")
532 (set_attr "length" "16")
533 (set_attr "exec" "none")
534 (set_attr "laneselect" "yes")])
535
536(define_insn "vec_duplicate<mode><exec>"
537 [(set (match_operand:VEC_1REG_MODE 0 "register_operand" "=v")
538 (vec_duplicate:VEC_1REG_MODE
539 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand" "SvB")))]
540 ""
541 "v_mov_b32\t%0, %1"
542 [(set_attr "type" "vop3a")
543 (set_attr "length" "8")])
544
545(define_insn "vec_duplicate<mode><exec>"
546 [(set (match_operand:VEC_2REG_MODE 0 "register_operand" "= v")
547 (vec_duplicate:VEC_2REG_MODE
548 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand" "SvDB")))]
549 ""
550 "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1"
551 [(set_attr "type" "vop3a")
552 (set_attr "length" "16")])
553
554(define_insn "vec_extract<mode><scalar_mode>"
555 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg")
556 (vec_select:<SCALAR_MODE>
557 (match_operand:VEC_1REG_MODE 1 "register_operand" " v")
558 (parallel [(match_operand:SI 2 "gcn_alu_operand" "SvB")])))]
559 ""
560 "v_readlane_b32 %0, %1, %2"
561 [(set_attr "type" "vop3a")
562 (set_attr "length" "8")
563 (set_attr "exec" "none")
564 (set_attr "laneselect" "yes")])
565
566(define_insn "vec_extract<mode><scalar_mode>"
567 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg")
568 (vec_select:<SCALAR_MODE>
569 (match_operand:VEC_2REG_MODE 1 "register_operand" " v")
570 (parallel [(match_operand:SI 2 "gcn_alu_operand" "SvB")])))]
571 ""
572 "v_readlane_b32 %L0, %L1, %2\;v_readlane_b32 %H0, %H1, %2"
573 [(set_attr "type" "vmult")
574 (set_attr "length" "16")
575 (set_attr "exec" "none")
576 (set_attr "laneselect" "yes")])
577
578(define_expand "vec_init<mode><scalar_mode>"
579 [(match_operand:VEC_REG_MODE 0 "register_operand")
580 (match_operand 1)]
581 ""
582 {
583 gcn_expand_vector_init (operands[0], operands[1]);
584 DONE;
585 })
586
587;; }}}
588;; {{{ Scatter / Gather
589
590;; GCN does not have an instruction for loading a vector from contiguous
591;; memory so *all* loads and stores are eventually converted to scatter
592;; or gather.
593;;
594;; GCC does not permit MEM to hold vectors of addresses, so we must use an
595;; unspec. The unspec formats are as follows:
596;;
597;; (unspec:V64??
598;; [(<address expression>)
599;; (<addr_space_t>)
600;; (<use_glc>)
601;; (mem:BLK (scratch))]
602;; UNSPEC_GATHER)
603;;
604;; (unspec:BLK
605;; [(<address expression>)
606;; (<source register>)
607;; (<addr_space_t>)
608;; (<use_glc>)
609;; (<exec>)]
610;; UNSPEC_SCATTER)
611;;
612;; - Loads are expected to be wrapped in a vec_merge, so do not need <exec>.
613;; - The mem:BLK does not contain any real information, but indicates that an
614;; unknown memory read is taking place. Stores are expected to use a similar
615;; mem:BLK outside the unspec.
616;; - The address space and glc (volatile) fields are there to replace the
617;; fields normally found in a MEM.
618;; - Multiple forms of address expression are supported, below.
619
620(define_expand "gather_load<mode>"
621 [(match_operand:VEC_REG_MODE 0 "register_operand")
622 (match_operand:DI 1 "register_operand")
623 (match_operand 2 "register_operand")
624 (match_operand 3 "immediate_operand")
625 (match_operand:SI 4 "gcn_alu_operand")]
626 ""
627 {
628 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1],
629 operands[2], operands[4],
630 INTVAL (operands[3]), NULL);
631
632 if (GET_MODE (addr) == V64DImode)
633 emit_insn (gen_gather<mode>_insn_1offset (operands[0], addr, const0_rtx,
634 const0_rtx, const0_rtx));
635 else
636 emit_insn (gen_gather<mode>_insn_2offsets (operands[0], operands[1],
637 addr, const0_rtx, const0_rtx,
638 const0_rtx));
639 DONE;
640 })
641
642(define_expand "gather<mode>_exec"
643 [(match_operand:VEC_REG_MODE 0 "register_operand")
644 (match_operand:DI 1 "register_operand")
645 (match_operand:V64SI 2 "register_operand")
646 (match_operand 3 "immediate_operand")
647 (match_operand:SI 4 "gcn_alu_operand")
648 (match_operand:DI 5 "gcn_exec_reg_operand")]
649 ""
650 {
651 rtx undefmode = gcn_gen_undef (<MODE>mode);
652
653 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1],
654 operands[2], operands[4],
655 INTVAL (operands[3]), operands[5]);
656
657 if (GET_MODE (addr) == V64DImode)
658 emit_insn (gen_gather<mode>_insn_1offset_exec (operands[0], addr,
659 const0_rtx, const0_rtx,
660 const0_rtx, undefmode,
661 operands[5]));
662 else
663 emit_insn (gen_gather<mode>_insn_2offsets_exec (operands[0], operands[1],
664 addr, const0_rtx,
665 const0_rtx, const0_rtx,
666 undefmode, operands[5]));
667 DONE;
668 })
669
670; Allow any address expression
671(define_expand "gather<mode>_expr<exec>"
672 [(set (match_operand:VEC_REG_MODE 0 "register_operand")
673 (unspec:VEC_REG_MODE
674 [(match_operand 1 "")
675 (match_operand 2 "immediate_operand")
676 (match_operand 3 "immediate_operand")
677 (mem:BLK (scratch))]
678 UNSPEC_GATHER))]
679 ""
680 {})
681
682(define_insn "gather<mode>_insn_1offset<exec>"
683 [(set (match_operand:VEC_REG_MODE 0 "register_operand" "=v")
684 (unspec:VEC_REG_MODE
685 [(plus:V64DI (match_operand:V64DI 1 "register_operand" " v")
686 (vec_duplicate:V64DI
687 (match_operand 2 "immediate_operand" " n")))
688 (match_operand 3 "immediate_operand" " n")
689 (match_operand 4 "immediate_operand" " n")
690 (mem:BLK (scratch))]
691 UNSPEC_GATHER))]
692 "(AS_FLAT_P (INTVAL (operands[3]))
693 && ((TARGET_GCN3 && INTVAL(operands[2]) == 0)
694 || ((unsigned HOST_WIDE_INT)INTVAL(operands[2]) < 0x1000)))
695 || (AS_GLOBAL_P (INTVAL (operands[3]))
696 && (((unsigned HOST_WIDE_INT)INTVAL(operands[2]) + 0x1000) < 0x2000))"
697 {
698 addr_space_t as = INTVAL (operands[3]);
699 const char *glc = INTVAL (operands[4]) ? " glc" : "";
700
701 static char buf[200];
702 if (AS_FLAT_P (as))
703 {
704 if (TARGET_GCN5_PLUS)
705 sprintf (buf, "flat_load%%s0\t%%0, %%1 offset:%%2%s\;s_waitcnt\t0",
706 glc);
707 else
708 sprintf (buf, "flat_load%%s0\t%%0, %%1%s\;s_waitcnt\t0", glc);
709 }
710 else if (AS_GLOBAL_P (as))
711 sprintf (buf, "global_load%%s0\t%%0, %%1, off offset:%%2%s\;"
712 "s_waitcnt\tvmcnt(0)", glc);
713 else
714 gcc_unreachable ();
715
716 return buf;
717 }
718 [(set_attr "type" "flat")
719 (set_attr "length" "12")])
720
721(define_insn "gather<mode>_insn_1offset_ds<exec>"
722 [(set (match_operand:VEC_REG_MODE 0 "register_operand" "=v")
723 (unspec:VEC_REG_MODE
724 [(plus:V64SI (match_operand:V64SI 1 "register_operand" " v")
725 (vec_duplicate:V64SI
726 (match_operand 2 "immediate_operand" " n")))
727 (match_operand 3 "immediate_operand" " n")
728 (match_operand 4 "immediate_operand" " n")
729 (mem:BLK (scratch))]
730 UNSPEC_GATHER))]
731 "(AS_ANY_DS_P (INTVAL (operands[3]))
732 && ((unsigned HOST_WIDE_INT)INTVAL(operands[2]) < 0x10000))"
733 {
734 addr_space_t as = INTVAL (operands[3]);
735 static char buf[200];
736 sprintf (buf, "ds_read%%b0\t%%0, %%1 offset:%%2%s\;s_waitcnt\tlgkmcnt(0)",
737 (AS_GDS_P (as) ? " gds" : ""));
738 return buf;
739 }
740 [(set_attr "type" "ds")
741 (set_attr "length" "12")])
742
743(define_insn "gather<mode>_insn_2offsets<exec>"
744 [(set (match_operand:VEC_REG_MODE 0 "register_operand" "=v")
745 (unspec:VEC_REG_MODE
746 [(plus:V64DI
747 (plus:V64DI
748 (vec_duplicate:V64DI
749 (match_operand:DI 1 "register_operand" "Sv"))
750 (sign_extend:V64DI
751 (match_operand:V64SI 2 "register_operand" " v")))
752 (vec_duplicate:V64DI (match_operand 3 "immediate_operand" " n")))
753 (match_operand 4 "immediate_operand" " n")
754 (match_operand 5 "immediate_operand" " n")
755 (mem:BLK (scratch))]
756 UNSPEC_GATHER))]
757 "(AS_GLOBAL_P (INTVAL (operands[4]))
758 && (((unsigned HOST_WIDE_INT)INTVAL(operands[3]) + 0x1000) < 0x2000))"
759 {
760 addr_space_t as = INTVAL (operands[4]);
761 const char *glc = INTVAL (operands[5]) ? " glc" : "";
762
763 static char buf[200];
764 if (AS_GLOBAL_P (as))
765 {
766 /* Work around assembler bug in which a 64-bit register is expected,
767 but a 32-bit value would be correct. */
768 int reg = REGNO (operands[2]) - FIRST_VGPR_REG;
769 sprintf (buf, "global_load%%s0\t%%0, v[%d:%d], %%1 offset:%%3%s\;"
770 "s_waitcnt\tvmcnt(0)", reg, reg + 1, glc);
771 }
772 else
773 gcc_unreachable ();
774
775 return buf;
776 }
777 [(set_attr "type" "flat")
778 (set_attr "length" "12")])
779
780(define_expand "scatter_store<mode>"
781 [(match_operand:DI 0 "register_operand")
782 (match_operand 1 "register_operand")
783 (match_operand 2 "immediate_operand")
784 (match_operand:SI 3 "gcn_alu_operand")
785 (match_operand:VEC_REG_MODE 4 "register_operand")]
786 ""
787 {
788 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0],
789 operands[1], operands[3],
790 INTVAL (operands[2]), NULL);
791
792 if (GET_MODE (addr) == V64DImode)
793 emit_insn (gen_scatter<mode>_insn_1offset (addr, const0_rtx, operands[4],
794 const0_rtx, const0_rtx));
795 else
796 emit_insn (gen_scatter<mode>_insn_2offsets (operands[0], addr,
797 const0_rtx, operands[4],
798 const0_rtx, const0_rtx));
799 DONE;
800 })
801
802(define_expand "scatter<mode>_exec"
803 [(match_operand:DI 0 "register_operand")
804 (match_operand 1 "register_operand")
805 (match_operand 2 "immediate_operand")
806 (match_operand:SI 3 "gcn_alu_operand")
807 (match_operand:VEC_REG_MODE 4 "register_operand")
808 (match_operand:DI 5 "gcn_exec_reg_operand")]
809 ""
810 {
811 operands[5] = force_reg (DImode, operands[5]);
812
813 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0],
814 operands[1], operands[3],
815 INTVAL (operands[2]), operands[5]);
816
817 if (GET_MODE (addr) == V64DImode)
818 emit_insn (gen_scatter<mode>_insn_1offset_exec (addr, const0_rtx,
819 operands[4], const0_rtx,
820 const0_rtx,
821 operands[5]));
822 else
823 emit_insn (gen_scatter<mode>_insn_2offsets_exec (operands[0], addr,
824 const0_rtx, operands[4],
825 const0_rtx, const0_rtx,
826 operands[5]));
827 DONE;
828 })
829
830; Allow any address expression
831(define_expand "scatter<mode>_expr<exec_scatter>"
832 [(set (mem:BLK (scratch))
833 (unspec:BLK
834 [(match_operand:V64DI 0 "")
835 (match_operand:VEC_REG_MODE 1 "register_operand")
836 (match_operand 2 "immediate_operand")
837 (match_operand 3 "immediate_operand")]
838 UNSPEC_SCATTER))]
839 ""
840 {})
841
842(define_insn "scatter<mode>_insn_1offset<exec_scatter>"
843 [(set (mem:BLK (scratch))
844 (unspec:BLK
845 [(plus:V64DI (match_operand:V64DI 0 "register_operand" "v")
846 (vec_duplicate:V64DI
847 (match_operand 1 "immediate_operand" "n")))
848 (match_operand:VEC_REG_MODE 2 "register_operand" "v")
849 (match_operand 3 "immediate_operand" "n")
850 (match_operand 4 "immediate_operand" "n")]
851 UNSPEC_SCATTER))]
852 "(AS_FLAT_P (INTVAL (operands[3]))
853 && (INTVAL(operands[1]) == 0
854 || (TARGET_GCN5_PLUS
855 && (unsigned HOST_WIDE_INT)INTVAL(operands[1]) < 0x1000)))
856 || (AS_GLOBAL_P (INTVAL (operands[3]))
857 && (((unsigned HOST_WIDE_INT)INTVAL(operands[1]) + 0x1000) < 0x2000))"
858 {
859 addr_space_t as = INTVAL (operands[3]);
860 const char *glc = INTVAL (operands[4]) ? " glc" : "";
861
862 static char buf[200];
863 if (AS_FLAT_P (as))
864 {
865 if (TARGET_GCN5_PLUS)
866 sprintf (buf, "flat_store%%s2\t%%0, %%2 offset:%%1%s\;"
867 "s_waitcnt\texpcnt(0)", glc);
868 else
869 sprintf (buf, "flat_store%%s2\t%%0, %%2%s\;s_waitcnt\texpcnt(0)",
870 glc);
871 }
872 else if (AS_GLOBAL_P (as))
873 sprintf (buf, "global_store%%s2\t%%0, %%2, off offset:%%1%s\;"
874 "s_waitcnt\texpcnt(0)", glc);
875 else
876 gcc_unreachable ();
877
878 return buf;
879 }
880 [(set_attr "type" "flat")
881 (set_attr "length" "12")])
882
883(define_insn "scatter<mode>_insn_1offset_ds<exec_scatter>"
884 [(set (mem:BLK (scratch))
885 (unspec:BLK
886 [(plus:V64SI (match_operand:V64SI 0 "register_operand" "v")
887 (vec_duplicate:V64SI
888 (match_operand 1 "immediate_operand" "n")))
889 (match_operand:VEC_REG_MODE 2 "register_operand" "v")
890 (match_operand 3 "immediate_operand" "n")
891 (match_operand 4 "immediate_operand" "n")]
892 UNSPEC_SCATTER))]
893 "(AS_ANY_DS_P (INTVAL (operands[3]))
894 && ((unsigned HOST_WIDE_INT)INTVAL(operands[1]) < 0x10000))"
895 {
896 addr_space_t as = INTVAL (operands[3]);
897 static char buf[200];
898 sprintf (buf, "ds_write%%b2\t%%0, %%2 offset:%%1%s\;s_waitcnt\texpcnt(0)",
899 (AS_GDS_P (as) ? " gds" : ""));
900 return buf;
901 }
902 [(set_attr "type" "ds")
903 (set_attr "length" "12")])
904
905(define_insn "scatter<mode>_insn_2offsets<exec_scatter>"
906 [(set (mem:BLK (scratch))
907 (unspec:BLK
908 [(plus:V64DI
909 (plus:V64DI
910 (vec_duplicate:V64DI
911 (match_operand:DI 0 "register_operand" "Sv"))
912 (sign_extend:V64DI
913 (match_operand:V64SI 1 "register_operand" " v")))
914 (vec_duplicate:V64DI (match_operand 2 "immediate_operand"
915 " n")))
916 (match_operand:VEC_REG_MODE 3 "register_operand" " v")
917 (match_operand 4 "immediate_operand" " n")
918 (match_operand 5 "immediate_operand" " n")]
919 UNSPEC_SCATTER))]
920 "(AS_GLOBAL_P (INTVAL (operands[4]))
921 && (((unsigned HOST_WIDE_INT)INTVAL(operands[2]) + 0x1000) < 0x2000))"
922 {
923 addr_space_t as = INTVAL (operands[4]);
924 const char *glc = INTVAL (operands[5]) ? " glc" : "";
925
926 static char buf[200];
927 if (AS_GLOBAL_P (as))
928 {
929 /* Work around assembler bug in which a 64-bit register is expected,
930 but a 32-bit value would be correct. */
931 int reg = REGNO (operands[1]) - FIRST_VGPR_REG;
932 sprintf (buf, "global_store%%s3\tv[%d:%d], %%3, %%0 offset:%%2%s\;"
933 "s_waitcnt\texpcnt(0)", reg, reg + 1, glc);
934 }
935 else
936 gcc_unreachable ();
937
938 return buf;
939 }
940 [(set_attr "type" "flat")
941 (set_attr "length" "12")])
942
943;; }}}
944;; {{{ Permutations
945
946(define_insn "ds_bpermute<mode>"
947 [(set (match_operand:VEC_1REG_MODE 0 "register_operand" "=v")
948 (unspec:VEC_1REG_MODE
949 [(match_operand:VEC_1REG_MODE 2 "register_operand" " v")
950 (match_operand:V64SI 1 "register_operand" " v")
951 (match_operand:DI 3 "gcn_exec_reg_operand" " e")]
952 UNSPEC_BPERMUTE))]
953 ""
954 "ds_bpermute_b32\t%0, %1, %2\;s_waitcnt\tlgkmcnt(0)"
955 [(set_attr "type" "vop2")
956 (set_attr "length" "12")])
957
958(define_insn_and_split "ds_bpermute<mode>"
959 [(set (match_operand:VEC_2REG_MODE 0 "register_operand" "=&v")
960 (unspec:VEC_2REG_MODE
961 [(match_operand:VEC_2REG_MODE 2 "register_operand" " v0")
962 (match_operand:V64SI 1 "register_operand" " v")
963 (match_operand:DI 3 "gcn_exec_reg_operand" " e")]
964 UNSPEC_BPERMUTE))]
965 ""
966 "#"
967 "reload_completed"
968 [(set (match_dup 4) (unspec:V64SI [(match_dup 6) (match_dup 1) (match_dup 3)]
969 UNSPEC_BPERMUTE))
970 (set (match_dup 5) (unspec:V64SI [(match_dup 7) (match_dup 1) (match_dup 3)]
971 UNSPEC_BPERMUTE))]
972 {
973 operands[4] = gcn_operand_part (<MODE>mode, operands[0], 0);
974 operands[5] = gcn_operand_part (<MODE>mode, operands[0], 1);
975 operands[6] = gcn_operand_part (<MODE>mode, operands[2], 0);
976 operands[7] = gcn_operand_part (<MODE>mode, operands[2], 1);
977 }
978 [(set_attr "type" "vmult")
979 (set_attr "length" "24")])
980
981;; }}}
982;; {{{ ALU special case: add/sub
983
984(define_insn "addv64si3<exec_clobber>"
985 [(set (match_operand:V64SI 0 "register_operand" "= v")
986 (plus:V64SI
987 (match_operand:V64SI 1 "register_operand" "% v")
988 (match_operand:V64SI 2 "gcn_alu_operand" "vSvB")))
989 (clobber (reg:DI VCC_REG))]
990 ""
991 "v_add%^_u32\t%0, vcc, %2, %1"
992 [(set_attr "type" "vop2")
993 (set_attr "length" "8")])
994
995(define_insn "addv64si3_dup<exec_clobber>"
996 [(set (match_operand:V64SI 0 "register_operand" "= v")
997 (plus:V64SI
998 (vec_duplicate:V64SI
999 (match_operand:SI 2 "gcn_alu_operand" "SvB"))
1000 (match_operand:V64SI 1 "register_operand" " v")))
1001 (clobber (reg:DI VCC_REG))]
1002 ""
1003 "v_add%^_u32\t%0, vcc, %2, %1"
1004 [(set_attr "type" "vop2")
1005 (set_attr "length" "8")])
1006
1007(define_insn "addv64si3_vcc<exec_vcc>"
1008 [(set (match_operand:V64SI 0 "register_operand" "= v, v")
1009 (plus:V64SI
1010 (match_operand:V64SI 1 "register_operand" "% v, v")
1011 (match_operand:V64SI 2 "gcn_alu_operand" "vSvB,vSvB")))
1012 (set (match_operand:DI 3 "register_operand" "= cV, Sg")
1013 (ltu:DI (plus:V64SI (match_dup 1) (match_dup 2))
1014 (match_dup 1)))]
1015 ""
1016 "v_add%^_u32\t%0, %3, %2, %1"
1017 [(set_attr "type" "vop2,vop3b")
1018 (set_attr "length" "8")])
1019
1020; This pattern only changes the VCC bits when the corresponding lane is
1021; enabled, so the set must be described as an ior.
1022
1023(define_insn "addv64si3_vcc_dup<exec_vcc>"
1024 [(set (match_operand:V64SI 0 "register_operand" "= v, v")
1025 (plus:V64SI
1026 (vec_duplicate:V64SI
1027 (match_operand:SI 1 "gcn_alu_operand" "SvB,SvB"))
1028 (match_operand:V64SI 2 "register_operand" " v, v")))
1029 (set (match_operand:DI 3 "register_operand" "=cV, Sg")
1030 (ltu:DI (plus:V64SI (vec_duplicate:V64SI (match_dup 2))
1031 (match_dup 1))
1032 (vec_duplicate:V64SI (match_dup 2))))]
1033 ""
1034 "v_add%^_u32\t%0, %3, %2, %1"
1035 [(set_attr "type" "vop2,vop3b")
1036 (set_attr "length" "8,8")])
1037
1038; This pattern does not accept SGPR because VCC read already counts as an
1039; SGPR use and number of SGPR operands is limited to 1.
1040
1041(define_insn "addcv64si3<exec_vcc>"
1042 [(set (match_operand:V64SI 0 "register_operand" "=v,v")
1043 (plus:V64SI
1044 (plus:V64SI
1045 (vec_merge:V64SI
1046 (vec_duplicate:V64SI (const_int 1))
1047 (vec_duplicate:V64SI (const_int 0))
1048 (match_operand:DI 3 "register_operand" " cV,Sv"))
1049 (match_operand:V64SI 1 "gcn_alu_operand" "%vA,vA"))
1050 (match_operand:V64SI 2 "gcn_alu_operand" " vB,vB")))
1051 (set (match_operand:DI 4 "register_operand" "=cV,Sg")
1052 (ior:DI (ltu:DI (plus:V64SI
1053 (plus:V64SI
1054 (vec_merge:V64SI
1055 (vec_duplicate:V64SI (const_int 1))
1056 (vec_duplicate:V64SI (const_int 0))
1057 (match_dup 3))
1058 (match_dup 1))
1059 (match_dup 2))
1060 (match_dup 2))
1061 (ltu:DI (plus:V64SI
1062 (vec_merge:V64SI
1063 (vec_duplicate:V64SI (const_int 1))
1064 (vec_duplicate:V64SI (const_int 0))
1065 (match_dup 3))
1066 (match_dup 1))
1067 (match_dup 1))))]
1068 ""
1069 "v_addc%^_u32\t%0, %4, %1, %2, %3"
1070 [(set_attr "type" "vop2,vop3b")
1071 (set_attr "length" "4,8")])
1072
1073(define_insn "addcv64si3_dup<exec_vcc>"
1074 [(set (match_operand:V64SI 0 "register_operand" "=v,v")
1075 (plus:V64SI
1076 (plus:V64SI
1077 (vec_merge:V64SI
1078 (vec_duplicate:V64SI (const_int 1))
1079 (vec_duplicate:V64SI (const_int 0))
1080 (match_operand:DI 3 "register_operand" " cV, Sv"))
1081 (match_operand:V64SI 1 "gcn_alu_operand" "%vA, vA"))
1082 (vec_duplicate:V64SI
1083 (match_operand:SI 2 "gcn_alu_operand" "SvB,SvB"))))
1084 (set (match_operand:DI 4 "register_operand" "=cV, Sg")
1085 (ior:DI (ltu:DI (plus:V64SI (plus:V64SI
1086 (vec_merge:V64SI
1087 (vec_duplicate:V64SI (const_int 1))
1088 (vec_duplicate:V64SI (const_int 0))
1089 (match_dup 3))
1090 (match_dup 1))
1091 (vec_duplicate:V64SI
1092 (match_dup 2)))
1093 (vec_duplicate:V64SI
1094 (match_dup 2)))
1095 (ltu:DI (plus:V64SI (vec_merge:V64SI
1096 (vec_duplicate:V64SI (const_int 1))
1097 (vec_duplicate:V64SI (const_int 0))
1098 (match_dup 3))
1099 (match_dup 1))
1100 (match_dup 1))))]
1101 ""
1102 "v_addc%^_u32\t%0, %4, %1, %2, %3"
1103 [(set_attr "type" "vop2,vop3b")
1104 (set_attr "length" "4,8")])
1105
1106(define_insn "subv64si3<exec_clobber>"
1107 [(set (match_operand:V64SI 0 "register_operand" "= v, v")
1108 (minus:V64SI
1109 (match_operand:V64SI 1 "gcn_alu_operand" "vSvB, v")
1110 (match_operand:V64SI 2 "gcn_alu_operand" " v,vSvB")))
1111 (clobber (reg:DI VCC_REG))]
1112 ""
1113 "@
1114 v_sub%^_u32\t%0, vcc, %1, %2
1115 v_subrev%^_u32\t%0, vcc, %2, %1"
1116 [(set_attr "type" "vop2")
1117 (set_attr "length" "8,8")])
1118
1119(define_insn "subv64si3_vcc<exec_vcc>"
1120 [(set (match_operand:V64SI 0 "register_operand" "= v, v, v, v")
1121 (minus:V64SI
1122 (match_operand:V64SI 1 "gcn_alu_operand" "vSvB,vSvB, v, v")
1123 (match_operand:V64SI 2 "gcn_alu_operand" " v, v,vSvB,vSvB")))
1124 (set (match_operand:DI 3 "register_operand" "= cV, Sg, cV, Sg")
1125 (gtu:DI (minus:V64SI (match_dup 1) (match_dup 2))
1126 (match_dup 1)))]
1127 ""
1128 "@
1129 v_sub%^_u32\t%0, %3, %1, %2
1130 v_sub%^_u32\t%0, %3, %1, %2
1131 v_subrev%^_u32\t%0, %3, %2, %1
1132 v_subrev%^_u32\t%0, %3, %2, %1"
1133 [(set_attr "type" "vop2,vop3b,vop2,vop3b")
1134 (set_attr "length" "8")])
1135
1136; This pattern does not accept SGPR because VCC read already counts
1137; as a SGPR use and number of SGPR operands is limited to 1.
1138
1139(define_insn "subcv64si3<exec_vcc>"
1140 [(set (match_operand:V64SI 0 "register_operand" "= v, v, v, v")
1141 (minus:V64SI
1142 (minus:V64SI
1143 (vec_merge:V64SI
1144 (vec_duplicate:V64SI (const_int 1))
1145 (vec_duplicate:V64SI (const_int 0))
1146 (match_operand:DI 3 "gcn_alu_operand" " cV,Sv,cV,Sv"))
1147 (match_operand:V64SI 1 "gcn_alu_operand" " vA,vA,vB,vB"))
1148 (match_operand:V64SI 2 "gcn_alu_operand" " vB,vB,vA,vA")))
1149 (set (match_operand:DI 4 "register_operand" "=cV,Sg,cV,Sg")
1150 (ior:DI (gtu:DI (minus:V64SI (minus:V64SI
1151 (vec_merge:V64SI
1152 (vec_duplicate:V64SI (const_int 1))
1153 (vec_duplicate:V64SI (const_int 0))
1154 (match_dup 3))
1155 (match_dup 1))
1156 (match_dup 2))
1157 (match_dup 2))
1158 (ltu:DI (minus:V64SI (vec_merge:V64SI
1159 (vec_duplicate:V64SI (const_int 1))
1160 (vec_duplicate:V64SI (const_int 0))
1161 (match_dup 3))
1162 (match_dup 1))
1163 (match_dup 1))))]
1164 ""
1165 "@
1166 v_subb%^_u32\t%0, %4, %1, %2, %3
1167 v_subb%^_u32\t%0, %4, %1, %2, %3
1168 v_subbrev%^_u32\t%0, %4, %2, %1, %3
1169 v_subbrev%^_u32\t%0, %4, %2, %1, %3"
1170 [(set_attr "type" "vop2,vop3b,vop2,vop3b")
1171 (set_attr "length" "8")])
1172
1173(define_insn_and_split "addv64di3"
1174 [(set (match_operand:V64DI 0 "register_operand" "= &v")
1175 (plus:V64DI
1176 (match_operand:V64DI 1 "register_operand" "% v0")
1177 (match_operand:V64DI 2 "gcn_alu_operand" "vSvB0")))
1178 (clobber (reg:DI VCC_REG))]
1179 ""
1180 "#"
1181 "gcn_can_split_p (V64DImode, operands[0])
1182 && gcn_can_split_p (V64DImode, operands[1])
1183 && gcn_can_split_p (V64DImode, operands[2])"
1184 [(const_int 0)]
1185 {
1186 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1187 emit_insn (gen_addv64si3_vcc
1188 (gcn_operand_part (V64DImode, operands[0], 0),
1189 gcn_operand_part (V64DImode, operands[1], 0),
1190 gcn_operand_part (V64DImode, operands[2], 0),
1191 vcc));
1192 emit_insn (gen_addcv64si3
1193 (gcn_operand_part (V64DImode, operands[0], 1),
1194 gcn_operand_part (V64DImode, operands[1], 1),
1195 gcn_operand_part (V64DImode, operands[2], 1),
1196 vcc, vcc));
1197 DONE;
1198 }
1199 [(set_attr "type" "vmult")
1200 (set_attr "length" "8")])
1201
1202(define_insn_and_split "addv64di3_exec"
1203 [(set (match_operand:V64DI 0 "register_operand" "= &v")
1204 (vec_merge:V64DI
1205 (plus:V64DI
1206 (match_operand:V64DI 1 "register_operand" "% v0")
1207 (match_operand:V64DI 2 "gcn_alu_operand" "vSvB0"))
1208 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
1209 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1210 (clobber (reg:DI VCC_REG))]
1211 ""
1212 "#"
1213 "gcn_can_split_p (V64DImode, operands[0])
1214 && gcn_can_split_p (V64DImode, operands[1])
1215 && gcn_can_split_p (V64DImode, operands[2])
1216 && gcn_can_split_p (V64DImode, operands[4])"
1217 [(const_int 0)]
1218 {
1219 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1220 emit_insn (gen_addv64si3_vcc_exec
1221 (gcn_operand_part (V64DImode, operands[0], 0),
1222 gcn_operand_part (V64DImode, operands[1], 0),
1223 gcn_operand_part (V64DImode, operands[2], 0),
1224 vcc,
1225 gcn_operand_part (V64DImode, operands[3], 0),
1226 operands[4]));
1227 emit_insn (gen_addcv64si3_exec
1228 (gcn_operand_part (V64DImode, operands[0], 1),
1229 gcn_operand_part (V64DImode, operands[1], 1),
1230 gcn_operand_part (V64DImode, operands[2], 1),
1231 vcc, vcc,
1232 gcn_operand_part (V64DImode, operands[3], 1),
1233 operands[4]));
1234 DONE;
1235 }
1236 [(set_attr "type" "vmult")
1237 (set_attr "length" "8")])
1238
1239(define_insn_and_split "subv64di3"
1240 [(set (match_operand:V64DI 0 "register_operand" "= &v, &v")
1241 (minus:V64DI
1242 (match_operand:V64DI 1 "gcn_alu_operand" "vSvB0, v0")
1243 (match_operand:V64DI 2 "gcn_alu_operand" " v0,vSvB0")))
1244 (clobber (reg:DI VCC_REG))]
1245 ""
1246 "#"
1247 "gcn_can_split_p (V64DImode, operands[0])
1248 && gcn_can_split_p (V64DImode, operands[1])
1249 && gcn_can_split_p (V64DImode, operands[2])"
1250 [(const_int 0)]
1251 {
1252 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1253 emit_insn (gen_subv64si3_vcc
1254 (gcn_operand_part (V64DImode, operands[0], 0),
1255 gcn_operand_part (V64DImode, operands[1], 0),
1256 gcn_operand_part (V64DImode, operands[2], 0),
1257 vcc));
1258 emit_insn (gen_subcv64si3
1259 (gcn_operand_part (V64DImode, operands[0], 1),
1260 gcn_operand_part (V64DImode, operands[1], 1),
1261 gcn_operand_part (V64DImode, operands[2], 1),
1262 vcc, vcc));
1263 DONE;
1264 }
1265 [(set_attr "type" "vmult")
1266 (set_attr "length" "8,8")])
1267
1268(define_insn_and_split "subv64di3_exec"
1269 [(set (match_operand:V64DI 0 "register_operand" "= &v, &v")
1270 (vec_merge:V64DI
1271 (minus:V64DI
1272 (match_operand:V64DI 1 "gcn_alu_operand" "vSvB0, v0")
1273 (match_operand:V64DI 2 "gcn_alu_operand" " v0,vSvB0"))
1274 (match_operand:V64DI 3 "gcn_register_or_unspec_operand"
1275 " U0, U0")
1276 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))
1277 (clobber (reg:DI VCC_REG))]
1278 "register_operand (operands[1], VOIDmode)
1279 || register_operand (operands[2], VOIDmode)"
1280 "#"
1281 "gcn_can_split_p (V64DImode, operands[0])
1282 && gcn_can_split_p (V64DImode, operands[1])
1283 && gcn_can_split_p (V64DImode, operands[2])
1284 && gcn_can_split_p (V64DImode, operands[3])"
1285 [(const_int 0)]
1286 {
1287 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1288 emit_insn (gen_subv64si3_vcc_exec
1289 (gcn_operand_part (V64DImode, operands[0], 0),
1290 gcn_operand_part (V64DImode, operands[1], 0),
1291 gcn_operand_part (V64DImode, operands[2], 0),
1292 vcc,
1293 gcn_operand_part (V64DImode, operands[3], 0),
1294 operands[4]));
1295 emit_insn (gen_subcv64si3_exec
1296 (gcn_operand_part (V64DImode, operands[0], 1),
1297 gcn_operand_part (V64DImode, operands[1], 1),
1298 gcn_operand_part (V64DImode, operands[2], 1),
1299 vcc, vcc,
1300 gcn_operand_part (V64DImode, operands[3], 1),
1301 operands[4]));
1302 DONE;
1303 }
1304 [(set_attr "type" "vmult")
1305 (set_attr "length" "8,8")])
1306
1307(define_insn_and_split "addv64di3_dup"
1308 [(set (match_operand:V64DI 0 "register_operand" "= &v")
1309 (plus:V64DI
1310 (match_operand:V64DI 1 "register_operand" " v0")
1311 (vec_duplicate:V64DI
1312 (match_operand:DI 2 "gcn_alu_operand" "SvDB"))))
1313 (clobber (reg:DI VCC_REG))]
1314 ""
1315 "#"
1316 "gcn_can_split_p (V64DImode, operands[0])
1317 && gcn_can_split_p (V64DImode, operands[1])
1318 && gcn_can_split_p (V64DImode, operands[2])"
1319 [(const_int 0)]
1320 {
1321 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1322 emit_insn (gen_addv64si3_vcc_dup
1323 (gcn_operand_part (V64DImode, operands[0], 0),
1324 gcn_operand_part (DImode, operands[2], 0),
1325 gcn_operand_part (V64DImode, operands[1], 0),
1326 vcc));
1327 emit_insn (gen_addcv64si3_dup
1328 (gcn_operand_part (V64DImode, operands[0], 1),
1329 gcn_operand_part (V64DImode, operands[1], 1),
1330 gcn_operand_part (DImode, operands[2], 1),
1331 vcc, vcc));
1332 DONE;
1333 }
1334 [(set_attr "type" "vmult")
1335 (set_attr "length" "8")])
1336
1337(define_insn_and_split "addv64di3_dup_exec"
1338 [(set (match_operand:V64DI 0 "register_operand" "= &v")
1339 (vec_merge:V64DI
1340 (plus:V64DI
1341 (match_operand:V64DI 1 "register_operand" " v0")
1342 (vec_duplicate:V64DI
1343 (match_operand:DI 2 "gcn_alu_operand" "SvDB")))
1344 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
1345 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1346 (clobber (reg:DI VCC_REG))]
1347 ""
1348 "#"
1349 "gcn_can_split_p (V64DImode, operands[0])
1350 && gcn_can_split_p (V64DImode, operands[1])
1351 && gcn_can_split_p (V64DImode, operands[2])
1352 && gcn_can_split_p (V64DImode, operands[3])"
1353 [(const_int 0)]
1354 {
1355 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1356 emit_insn (gen_addv64si3_vcc_dup_exec
1357 (gcn_operand_part (V64DImode, operands[0], 0),
1358 gcn_operand_part (DImode, operands[2], 0),
1359 gcn_operand_part (V64DImode, operands[1], 0),
1360 vcc,
1361 gcn_operand_part (V64DImode, operands[3], 0),
1362 operands[4]));
1363 emit_insn (gen_addcv64si3_dup_exec
1364 (gcn_operand_part (V64DImode, operands[0], 1),
1365 gcn_operand_part (V64DImode, operands[1], 1),
1366 gcn_operand_part (DImode, operands[2], 1),
1367 vcc, vcc,
1368 gcn_operand_part (V64DImode, operands[3], 1),
1369 operands[4]));
1370 DONE;
1371 }
1372 [(set_attr "type" "vmult")
1373 (set_attr "length" "8")])
1374
1375(define_insn_and_split "addv64di3_zext"
1376 [(set (match_operand:V64DI 0 "register_operand" "=&v,&v")
1377 (plus:V64DI
1378 (zero_extend:V64DI
1379 (match_operand:V64SI 1 "gcn_alu_operand" "0vA,0vB"))
1380 (match_operand:V64DI 2 "gcn_alu_operand" "0vB,0vA")))
1381 (clobber (reg:DI VCC_REG))]
1382 ""
1383 "#"
1384 "gcn_can_split_p (V64DImode, operands[0])
1385 && gcn_can_split_p (V64DImode, operands[2])"
1386 [(const_int 0)]
1387 {
1388 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1389 emit_insn (gen_addv64si3_vcc
1390 (gcn_operand_part (V64DImode, operands[0], 0),
1391 operands[1],
1392 gcn_operand_part (V64DImode, operands[2], 0),
1393 vcc));
1394 emit_insn (gen_addcv64si3
1395 (gcn_operand_part (V64DImode, operands[0], 1),
1396 gcn_operand_part (V64DImode, operands[2], 1),
1397 const0_rtx, vcc, vcc));
1398 DONE;
1399 }
1400 [(set_attr "type" "vmult")
1401 (set_attr "length" "8,8")])
1402
1403(define_insn_and_split "addv64di3_zext_exec"
1404 [(set (match_operand:V64DI 0 "register_operand" "=&v,&v")
1405 (vec_merge:V64DI
1406 (plus:V64DI
1407 (zero_extend:V64DI
1408 (match_operand:V64SI 1 "gcn_alu_operand" "0vA,0vB"))
1409 (match_operand:V64DI 2 "gcn_alu_operand" "0vB,0vA"))
1410 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0, U0")
1411 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))
1412 (clobber (reg:DI VCC_REG))]
1413 ""
1414 "#"
1415 "gcn_can_split_p (V64DImode, operands[0])
1416 && gcn_can_split_p (V64DImode, operands[2])
1417 && gcn_can_split_p (V64DImode, operands[3])"
1418 [(const_int 0)]
1419 {
1420 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1421 emit_insn (gen_addv64si3_vcc_exec
1422 (gcn_operand_part (V64DImode, operands[0], 0),
1423 operands[1],
1424 gcn_operand_part (V64DImode, operands[2], 0),
1425 vcc,
1426 gcn_operand_part (V64DImode, operands[3], 0),
1427 operands[4]));
1428 emit_insn (gen_addcv64si3_exec
1429 (gcn_operand_part (V64DImode, operands[0], 1),
1430 gcn_operand_part (V64DImode, operands[2], 1),
1431 const0_rtx, vcc, vcc,
1432 gcn_operand_part (V64DImode, operands[3], 1),
1433 operands[4]));
1434 DONE;
1435 }
1436 [(set_attr "type" "vmult")
1437 (set_attr "length" "8,8")])
1438
1439(define_insn_and_split "addv64di3_zext_dup"
1440 [(set (match_operand:V64DI 0 "register_operand" "=&v")
1441 (plus:V64DI
1442 (zero_extend:V64DI
1443 (vec_duplicate:V64SI
1444 (match_operand:SI 1 "gcn_alu_operand" "BSv")))
1445 (match_operand:V64DI 2 "gcn_alu_operand" "vA0")))
1446 (clobber (reg:DI VCC_REG))]
1447 ""
1448 "#"
1449 "gcn_can_split_p (V64DImode, operands[0])
1450 && gcn_can_split_p (V64DImode, operands[2])"
1451 [(const_int 0)]
1452 {
1453 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1454 emit_insn (gen_addv64si3_vcc_dup
1455 (gcn_operand_part (V64DImode, operands[0], 0),
1456 gcn_operand_part (DImode, operands[1], 0),
1457 gcn_operand_part (V64DImode, operands[2], 0),
1458 vcc));
1459 emit_insn (gen_addcv64si3
1460 (gcn_operand_part (V64DImode, operands[0], 1),
1461 gcn_operand_part (V64DImode, operands[2], 1),
1462 const0_rtx, vcc, vcc));
1463 DONE;
1464 }
1465 [(set_attr "type" "vmult")
1466 (set_attr "length" "8")])
1467
1468(define_insn_and_split "addv64di3_zext_dup_exec"
1469 [(set (match_operand:V64DI 0 "register_operand" "=&v")
1470 (vec_merge:V64DI
1471 (plus:V64DI
1472 (zero_extend:V64DI
1473 (vec_duplicate:V64SI
1474 (match_operand:SI 1 "gcn_alu_operand" "BSv")))
1475 (match_operand:V64DI 2 "gcn_alu_operand" "vA0"))
1476 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
1477 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1478 (clobber (reg:DI VCC_REG))]
1479 ""
1480 "#"
1481 "gcn_can_split_p (V64DImode, operands[0])
1482 && gcn_can_split_p (V64DImode, operands[2])
1483 && gcn_can_split_p (V64DImode, operands[3])"
1484 [(const_int 0)]
1485 {
1486 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1487 emit_insn (gen_addv64si3_vcc_dup_exec
1488 (gcn_operand_part (V64DImode, operands[0], 0),
1489 gcn_operand_part (DImode, operands[1], 0),
1490 gcn_operand_part (V64DImode, operands[2], 0),
1491 vcc,
1492 gcn_operand_part (V64DImode, operands[3], 0),
1493 operands[4]));
1494 emit_insn (gen_addcv64si3_exec
1495 (gcn_operand_part (V64DImode, operands[0], 1),
1496 gcn_operand_part (V64DImode, operands[2], 1),
1497 const0_rtx, vcc, vcc,
1498 gcn_operand_part (V64DImode, operands[3], 1),
1499 operands[4]));
1500 DONE;
1501 }
1502 [(set_attr "type" "vmult")
1503 (set_attr "length" "8")])
1504
1505(define_insn_and_split "addv64di3_zext_dup2"
1506 [(set (match_operand:V64DI 0 "register_operand" "= v")
1507 (plus:V64DI
1508 (zero_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand" " vA"))
1509 (vec_duplicate:V64DI (match_operand:DI 2 "gcn_alu_operand" "BSv"))))
1510 (clobber (reg:DI VCC_REG))]
1511 ""
1512 "#"
1513 "gcn_can_split_p (V64DImode, operands[0])"
1514 [(const_int 0)]
1515 {
1516 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1517 emit_insn (gen_addv64si3_vcc_dup
1518 (gcn_operand_part (V64DImode, operands[0], 0),
1519 gcn_operand_part (DImode, operands[2], 0),
1520 operands[1],
1521 vcc));
1522 rtx dsthi = gcn_operand_part (V64DImode, operands[0], 1);
1523 emit_insn (gen_vec_duplicatev64si
1524 (dsthi, gcn_operand_part (DImode, operands[2], 1)));
1525 emit_insn (gen_addcv64si3 (dsthi, dsthi, const0_rtx, vcc, vcc));
1526 DONE;
1527 }
1528 [(set_attr "type" "vmult")
1529 (set_attr "length" "8")])
1530
1531(define_insn_and_split "addv64di3_zext_dup2_exec"
1532 [(set (match_operand:V64DI 0 "register_operand" "= v")
1533 (vec_merge:V64DI
1534 (plus:V64DI
1535 (zero_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand"
1536 " vA"))
1537 (vec_duplicate:V64DI (match_operand:DI 2 "gcn_alu_operand" "BSv")))
1538 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
1539 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1540 (clobber (reg:DI VCC_REG))]
1541 ""
1542 "#"
1543 "gcn_can_split_p (V64DImode, operands[0])
1544 && gcn_can_split_p (V64DImode, operands[3])"
1545 [(const_int 0)]
1546 {
1547 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1548 emit_insn (gen_addv64si3_vcc_dup_exec
1549 (gcn_operand_part (V64DImode, operands[0], 0),
1550 gcn_operand_part (DImode, operands[2], 0),
1551 operands[1],
1552 vcc,
1553 gcn_operand_part (V64DImode, operands[3], 0),
1554 operands[4]));
1555 rtx dsthi = gcn_operand_part (V64DImode, operands[0], 1);
1556 emit_insn (gen_vec_duplicatev64si_exec
1557 (dsthi, gcn_operand_part (DImode, operands[2], 1),
1558 gcn_gen_undef (V64SImode), operands[4]));
1559 emit_insn (gen_addcv64si3_exec
1560 (dsthi, dsthi, const0_rtx, vcc, vcc,
1561 gcn_operand_part (V64DImode, operands[3], 1),
1562 operands[4]));
1563 DONE;
1564 }
1565 [(set_attr "type" "vmult")
1566 (set_attr "length" "8")])
1567
1568(define_insn_and_split "addv64di3_sext_dup2"
1569 [(set (match_operand:V64DI 0 "register_operand" "= v")
1570 (plus:V64DI
1571 (sign_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand" " vA"))
1572 (vec_duplicate:V64DI (match_operand:DI 2 "gcn_alu_operand" "BSv"))))
1573 (clobber (match_scratch:V64SI 3 "=&v"))
1574 (clobber (reg:DI VCC_REG))]
1575 ""
1576 "#"
1577 "gcn_can_split_p (V64DImode, operands[0])"
1578 [(const_int 0)]
1579 {
1580 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1581 emit_insn (gen_ashrv64si3 (operands[3], operands[1], GEN_INT (31)));
1582 emit_insn (gen_addv64si3_vcc_dup
1583 (gcn_operand_part (V64DImode, operands[0], 0),
1584 gcn_operand_part (DImode, operands[2], 0),
1585 operands[1],
1586 vcc));
1587 rtx dsthi = gcn_operand_part (V64DImode, operands[0], 1);
1588 emit_insn (gen_vec_duplicatev64si
1589 (dsthi, gcn_operand_part (DImode, operands[2], 1)));
1590 emit_insn (gen_addcv64si3 (dsthi, dsthi, operands[3], vcc, vcc));
1591 DONE;
1592 }
1593 [(set_attr "type" "vmult")
1594 (set_attr "length" "8")])
1595
1596(define_insn_and_split "addv64di3_sext_dup2_exec"
1597 [(set (match_operand:V64DI 0 "register_operand" "= v")
1598 (vec_merge:V64DI
1599 (plus:V64DI
1600 (sign_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand"
1601 " vA"))
1602 (vec_duplicate:V64DI (match_operand:DI 2 "gcn_alu_operand" "BSv")))
1603 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
1604 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1605 (clobber (match_scratch:V64SI 5 "=&v"))
1606 (clobber (reg:DI VCC_REG))]
1607 ""
1608 "#"
1609 "gcn_can_split_p (V64DImode, operands[0])
1610 && gcn_can_split_p (V64DImode, operands[3])"
1611 [(const_int 0)]
1612 {
1613 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1614 emit_insn (gen_ashrv64si3_exec (operands[5], operands[1], GEN_INT (31),
1615 gcn_gen_undef (V64SImode), operands[4]));
1616 emit_insn (gen_addv64si3_vcc_dup_exec
1617 (gcn_operand_part (V64DImode, operands[0], 0),
1618 gcn_operand_part (DImode, operands[2], 0),
1619 operands[1],
1620 vcc,
1621 gcn_operand_part (V64DImode, operands[3], 0),
1622 operands[4]));
1623 rtx dsthi = gcn_operand_part (V64DImode, operands[0], 1);
1624 emit_insn (gen_vec_duplicatev64si_exec
1625 (dsthi, gcn_operand_part (DImode, operands[2], 1),
1626 gcn_gen_undef (V64SImode), operands[4]));
1627 emit_insn (gen_addcv64si3_exec
1628 (dsthi, dsthi, operands[5], vcc, vcc,
1629 gcn_operand_part (V64DImode, operands[3], 1),
1630 operands[4]));
1631 DONE;
1632 }
1633 [(set_attr "type" "vmult")
1634 (set_attr "length" "8")])
1635
1636;; }}}
1637;; {{{ DS memory ALU: add/sub
1638
1639(define_mode_iterator DS_ARITH_MODE [V64SI V64SF V64DI])
1640(define_mode_iterator DS_ARITH_SCALAR_MODE [SI SF DI])
1641
1642;; FIXME: the vector patterns probably need RD expanded to a vector of
1643;; addresses. For now, the only way a vector can get into LDS is
1644;; if the user puts it there manually.
1645;;
1646;; FIXME: the scalar patterns are probably fine in themselves, but need to be
1647;; checked to see if anything can ever use them.
1648
1649(define_insn "add<mode>3_ds<exec>"
1650 [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD")
1651 (plus:DS_ARITH_MODE
1652 (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" "%RD")
1653 (match_operand:DS_ARITH_MODE 2 "register_operand" " v")))]
1654 "rtx_equal_p (operands[0], operands[1])"
1655 "ds_add%u0\t%A0, %2%O0"
1656 [(set_attr "type" "ds")
1657 (set_attr "length" "8")])
1658
1659(define_insn "add<mode>3_ds_scalar"
1660 [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
1661 (plus:DS_ARITH_SCALAR_MODE
1662 (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand"
1663 "%RD")
1664 (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")))]
1665 "rtx_equal_p (operands[0], operands[1])"
1666 "ds_add%u0\t%A0, %2%O0"
1667 [(set_attr "type" "ds")
1668 (set_attr "length" "8")])
1669
1670(define_insn "sub<mode>3_ds<exec>"
1671 [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD")
1672 (minus:DS_ARITH_MODE
1673 (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" " RD")
1674 (match_operand:DS_ARITH_MODE 2 "register_operand" " v")))]
1675 "rtx_equal_p (operands[0], operands[1])"
1676 "ds_sub%u0\t%A0, %2%O0"
1677 [(set_attr "type" "ds")
1678 (set_attr "length" "8")])
1679
1680(define_insn "sub<mode>3_ds_scalar"
1681 [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
1682 (minus:DS_ARITH_SCALAR_MODE
1683 (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand"
1684 " RD")
1685 (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")))]
1686 "rtx_equal_p (operands[0], operands[1])"
1687 "ds_sub%u0\t%A0, %2%O0"
1688 [(set_attr "type" "ds")
1689 (set_attr "length" "8")])
1690
1691(define_insn "subr<mode>3_ds<exec>"
1692 [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD")
1693 (minus:DS_ARITH_MODE
1694 (match_operand:DS_ARITH_MODE 2 "register_operand" " v")
1695 (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" " RD")))]
1696 "rtx_equal_p (operands[0], operands[1])"
1697 "ds_rsub%u0\t%A0, %2%O0"
1698 [(set_attr "type" "ds")
1699 (set_attr "length" "8")])
1700
1701(define_insn "subr<mode>3_ds_scalar"
1702 [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
1703 (minus:DS_ARITH_SCALAR_MODE
1704 (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")
1705 (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand"
1706 " RD")))]
1707 "rtx_equal_p (operands[0], operands[1])"
1708 "ds_rsub%u0\t%A0, %2%O0"
1709 [(set_attr "type" "ds")
1710 (set_attr "length" "8")])
1711
1712;; }}}
1713;; {{{ ALU special case: mult
1714
1715(define_insn "<su>mulv64si3_highpart<exec>"
1716 [(set (match_operand:V64SI 0 "register_operand" "= v")
1717 (truncate:V64SI
1718 (lshiftrt:V64DI
1719 (mult:V64DI
1720 (any_extend:V64DI
1721 (match_operand:V64SI 1 "gcn_alu_operand" " %v"))
1722 (any_extend:V64DI
1723 (match_operand:V64SI 2 "gcn_alu_operand" "vSvA")))
1724 (const_int 32))))]
1725 ""
1726 "v_mul_hi<sgnsuffix>0\t%0, %2, %1"
1727 [(set_attr "type" "vop3a")
1728 (set_attr "length" "8")])
1729
1730(define_insn "mulv64si3<exec>"
1731 [(set (match_operand:V64SI 0 "register_operand" "= v")
1732 (mult:V64SI
1733 (match_operand:V64SI 1 "gcn_alu_operand" "%vSvA")
1734 (match_operand:V64SI 2 "gcn_alu_operand" " vSvA")))]
1735 ""
1736 "v_mul_lo_u32\t%0, %1, %2"
1737 [(set_attr "type" "vop3a")
1738 (set_attr "length" "8")])
1739
1740(define_insn "mulv64si3_dup<exec>"
1741 [(set (match_operand:V64SI 0 "register_operand" "= v")
1742 (mult:V64SI
1743 (match_operand:V64SI 1 "gcn_alu_operand" "%vSvA")
1744 (vec_duplicate:V64SI
1745 (match_operand:SI 2 "gcn_alu_operand" " SvA"))))]
1746 ""
1747 "v_mul_lo_u32\t%0, %1, %2"
1748 [(set_attr "type" "vop3a")
1749 (set_attr "length" "8")])
1750
1751(define_insn_and_split "mulv64di3"
1752 [(set (match_operand:V64DI 0 "register_operand" "=&v")
1753 (mult:V64DI
1754 (match_operand:V64DI 1 "gcn_alu_operand" "% v")
1755 (match_operand:V64DI 2 "gcn_alu_operand" "vDA")))
1756 (clobber (match_scratch:V64SI 3 "=&v"))]
1757 ""
1758 "#"
1759 "reload_completed"
1760 [(const_int 0)]
1761 {
1762 rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
1763 rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
1764 rtx left_lo = gcn_operand_part (V64DImode, operands[1], 0);
1765 rtx left_hi = gcn_operand_part (V64DImode, operands[1], 1);
1766 rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
1767 rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
1768 rtx tmp = operands[3];
1769
1770 emit_insn (gen_mulv64si3 (out_lo, left_lo, right_lo));
1771 emit_insn (gen_umulv64si3_highpart (out_hi, left_lo, right_lo));
1772 emit_insn (gen_mulv64si3 (tmp, left_hi, right_lo));
1773 emit_insn (gen_addv64si3 (out_hi, out_hi, tmp));
1774 emit_insn (gen_mulv64si3 (tmp, left_lo, right_hi));
1775 emit_insn (gen_addv64si3 (out_hi, out_hi, tmp));
1776 emit_insn (gen_mulv64si3 (tmp, left_hi, right_hi));
1777 emit_insn (gen_addv64si3 (out_hi, out_hi, tmp));
1778 DONE;
1779 })
1780
1781(define_insn_and_split "mulv64di3_exec"
1782 [(set (match_operand:V64DI 0 "register_operand" "=&v")
1783 (vec_merge:V64DI
1784 (mult:V64DI
1785 (match_operand:V64DI 1 "gcn_alu_operand" "% v")
1786 (match_operand:V64DI 2 "gcn_alu_operand" "vDA"))
1787 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
1788 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1789 (clobber (match_scratch:V64SI 5 "=&v"))]
1790 ""
1791 "#"
1792 "reload_completed"
1793 [(const_int 0)]
1794 {
1795 rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
1796 rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
1797 rtx left_lo = gcn_operand_part (V64DImode, operands[1], 0);
1798 rtx left_hi = gcn_operand_part (V64DImode, operands[1], 1);
1799 rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
1800 rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
1801 rtx exec = operands[4];
1802 rtx tmp = operands[5];
1803
1804 rtx old_lo, old_hi;
1805 if (GET_CODE (operands[3]) == UNSPEC)
1806 {
1807 old_lo = old_hi = gcn_gen_undef (V64SImode);
1808 }
1809 else
1810 {
1811 old_lo = gcn_operand_part (V64DImode, operands[3], 0);
1812 old_hi = gcn_operand_part (V64DImode, operands[3], 1);
1813 }
1814
1815 rtx undef = gcn_gen_undef (V64SImode);
1816
1817 emit_insn (gen_mulv64si3_exec (out_lo, left_lo, right_lo, old_lo, exec));
1818 emit_insn (gen_umulv64si3_highpart_exec (out_hi, left_lo, right_lo,
1819 old_hi, exec));
1820 emit_insn (gen_mulv64si3_exec (tmp, left_hi, right_lo, undef, exec));
1821 emit_insn (gen_addv64si3_exec (out_hi, out_hi, tmp, out_hi, exec));
1822 emit_insn (gen_mulv64si3_exec (tmp, left_lo, right_hi, undef, exec));
1823 emit_insn (gen_addv64si3_exec (out_hi, out_hi, tmp, out_hi, exec));
1824 emit_insn (gen_mulv64si3_exec (tmp, left_hi, right_hi, undef, exec));
1825 emit_insn (gen_addv64si3_exec (out_hi, out_hi, tmp, out_hi, exec));
1826 DONE;
1827 })
1828
1829(define_insn_and_split "mulv64di3_zext"
1830 [(set (match_operand:V64DI 0 "register_operand" "=&v")
1831 (mult:V64DI
1832 (zero_extend:V64DI
1833 (match_operand:V64SI 1 "gcn_alu_operand" " v"))
1834 (match_operand:V64DI 2 "gcn_alu_operand" "vDA")))
1835 (clobber (match_scratch:V64SI 3 "=&v"))]
1836 ""
1837 "#"
1838 "reload_completed"
1839 [(const_int 0)]
1840 {
1841 rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
1842 rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
1843 rtx left = operands[1];
1844 rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
1845 rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
1846 rtx tmp = operands[3];
1847
1848 emit_insn (gen_mulv64si3 (out_lo, left, right_lo));
1849 emit_insn (gen_umulv64si3_highpart (out_hi, left, right_lo));
1850 emit_insn (gen_mulv64si3 (tmp, left, right_hi));
1851 emit_insn (gen_addv64si3 (out_hi, out_hi, tmp));
1852 DONE;
1853 })
1854
1855(define_insn_and_split "mulv64di3_zext_exec"
1856 [(set (match_operand:V64DI 0 "register_operand" "=&v")
1857 (vec_merge:V64DI
1858 (mult:V64DI
1859 (zero_extend:V64DI
1860 (match_operand:V64SI 1 "gcn_alu_operand" " v"))
1861 (match_operand:V64DI 2 "gcn_alu_operand" "vDA"))
1862 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
1863 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1864 (clobber (match_scratch:V64SI 5 "=&v"))]
1865 ""
1866 "#"
1867 "reload_completed"
1868 [(const_int 0)]
1869 {
1870 rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
1871 rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
1872 rtx left = operands[1];
1873 rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
1874 rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
1875 rtx exec = operands[4];
1876 rtx tmp = operands[5];
1877
1878 rtx old_lo, old_hi;
1879 if (GET_CODE (operands[3]) == UNSPEC)
1880 {
1881 old_lo = old_hi = gcn_gen_undef (V64SImode);
1882 }
1883 else
1884 {
1885 old_lo = gcn_operand_part (V64DImode, operands[3], 0);
1886 old_hi = gcn_operand_part (V64DImode, operands[3], 1);
1887 }
1888
1889 rtx undef = gcn_gen_undef (V64SImode);
1890
1891 emit_insn (gen_mulv64si3_exec (out_lo, left, right_lo, old_lo, exec));
1892 emit_insn (gen_umulv64si3_highpart_exec (out_hi, left, right_lo,
1893 old_hi, exec));
1894 emit_insn (gen_mulv64si3_exec (tmp, left, right_hi, undef, exec));
1895 emit_insn (gen_addv64si3_exec (out_hi, out_hi, tmp, out_hi, exec));
1896 DONE;
1897 })
1898
1899(define_insn_and_split "mulv64di3_zext_dup2"
1900 [(set (match_operand:V64DI 0 "register_operand" "= &v")
1901 (mult:V64DI
1902 (zero_extend:V64DI
1903 (match_operand:V64SI 1 "gcn_alu_operand" " v"))
1904 (vec_duplicate:V64DI
1905 (match_operand:DI 2 "gcn_alu_operand" "SvDA"))))
1906 (clobber (match_scratch:V64SI 3 "= &v"))]
1907 ""
1908 "#"
1909 "reload_completed"
1910 [(const_int 0)]
1911 {
1912 rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
1913 rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
1914 rtx left = operands[1];
1915 rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
1916 rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
1917 rtx tmp = operands[3];
1918
1919 emit_insn (gen_mulv64si3 (out_lo, left, right_lo));
1920 emit_insn (gen_umulv64si3_highpart (out_hi, left, right_lo));
1921 emit_insn (gen_mulv64si3 (tmp, left, right_hi));
1922 emit_insn (gen_addv64si3 (out_hi, out_hi, tmp));
1923 DONE;
1924 })
1925
1926(define_insn_and_split "mulv64di3_zext_dup2_exec"
1927 [(set (match_operand:V64DI 0 "register_operand" "= &v")
1928 (vec_merge:V64DI
1929 (mult:V64DI
1930 (zero_extend:V64DI
1931 (match_operand:V64SI 1 "gcn_alu_operand" " v"))
1932 (vec_duplicate:V64DI
1933 (match_operand:DI 2 "gcn_alu_operand" "SvDA")))
1934 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
1935 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1936 (clobber (match_scratch:V64SI 5 "= &v"))]
1937 ""
1938 "#"
1939 "reload_completed"
1940 [(const_int 0)]
1941 {
1942 rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
1943 rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
1944 rtx left = operands[1];
1945 rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
1946 rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
1947 rtx exec = operands[4];
1948 rtx tmp = operands[5];
1949
1950 rtx old_lo, old_hi;
1951 if (GET_CODE (operands[3]) == UNSPEC)
1952 {
1953 old_lo = old_hi = gcn_gen_undef (V64SImode);
1954 }
1955 else
1956 {
1957 old_lo = gcn_operand_part (V64DImode, operands[3], 0);
1958 old_hi = gcn_operand_part (V64DImode, operands[3], 1);
1959 }
1960
1961 rtx undef = gcn_gen_undef (V64SImode);
1962
1963 emit_insn (gen_mulv64si3_exec (out_lo, left, right_lo, old_lo, exec));
1964 emit_insn (gen_umulv64si3_highpart_exec (out_hi, left, right_lo,
1965 old_hi, exec));
1966 emit_insn (gen_mulv64si3_exec (tmp, left, right_hi, undef, exec));
1967 emit_insn (gen_addv64si3_exec (out_hi, out_hi, tmp, out_hi, exec));
1968 DONE;
1969 })
1970
1971;; }}}
1972;; {{{ ALU generic case
1973
1974(define_mode_iterator VEC_INT_MODE [V64QI V64HI V64SI V64DI])
1975
1976(define_code_iterator bitop [and ior xor])
1977(define_code_iterator shiftop [ashift lshiftrt ashiftrt])
1978(define_code_iterator minmaxop [smin smax umin umax])
1979
1980(define_insn "<expander><mode>2<exec>"
1981 [(set (match_operand:VEC_1REG_INT_MODE 0 "gcn_valu_dst_operand" "= v")
1982 (bitunop:VEC_1REG_INT_MODE
1983 (match_operand:VEC_1REG_INT_MODE 1 "gcn_valu_src0_operand" "vSvB")))]
1984 ""
1985 "v_<mnemonic>0\t%0, %1"
1986 [(set_attr "type" "vop1")
1987 (set_attr "length" "8")])
1988
1989(define_insn "<expander><mode>3<exec>"
1990 [(set (match_operand:VEC_1REG_INT_MODE 0 "gcn_valu_dst_operand" "= v,RD")
1991 (bitop:VEC_1REG_INT_MODE
1992 (match_operand:VEC_1REG_INT_MODE 1 "gcn_valu_src0_operand"
1993 "% v, 0")
1994 (match_operand:VEC_1REG_INT_MODE 2 "gcn_valu_src1com_operand"
1995 "vSvB, v")))]
1996 ""
1997 "@
1998 v_<mnemonic>0\t%0, %2, %1
1999 ds_<mnemonic>0\t%A0, %2%O0"
2000 [(set_attr "type" "vop2,ds")
2001 (set_attr "length" "8,8")])
2002
2003(define_insn_and_split "<expander>v64di3"
2004 [(set (match_operand:V64DI 0 "gcn_valu_dst_operand" "=&v,RD")
2005 (bitop:V64DI
2006 (match_operand:V64DI 1 "gcn_valu_src0_operand" "% v,RD")
2007 (match_operand:V64DI 2 "gcn_valu_src1com_operand" "vSvB, v")))]
2008 ""
2009 "@
2010 #
2011 ds_<mnemonic>0\t%A0, %2%O0"
2012 "(reload_completed && !gcn_ds_memory_operand (operands[0], V64DImode))"
2013 [(set (match_dup 3)
2014 (bitop:V64SI (match_dup 5) (match_dup 7)))
2015 (set (match_dup 4)
2016 (bitop:V64SI (match_dup 6) (match_dup 8)))]
2017 {
2018 operands[3] = gcn_operand_part (V64DImode, operands[0], 0);
2019 operands[4] = gcn_operand_part (V64DImode, operands[0], 1);
2020 operands[5] = gcn_operand_part (V64DImode, operands[1], 0);
2021 operands[6] = gcn_operand_part (V64DImode, operands[1], 1);
2022 operands[7] = gcn_operand_part (V64DImode, operands[2], 0);
2023 operands[8] = gcn_operand_part (V64DImode, operands[2], 1);
2024 }
2025 [(set_attr "type" "vmult,ds")
2026 (set_attr "length" "16,8")])
2027
2028(define_insn_and_split "<expander>v64di3_exec"
2029 [(set (match_operand:V64DI 0 "gcn_valu_dst_operand" "=&v,RD")
2030 (vec_merge:V64DI
2031 (bitop:V64DI
2032 (match_operand:V64DI 1 "gcn_valu_src0_operand" "% v,RD")
2033 (match_operand:V64DI 2 "gcn_valu_src1com_operand" "vSvB, v"))
2034 (match_operand:V64DI 3 "gcn_register_ds_or_unspec_operand"
2035 " U0,U0")
2036 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))]
2037 "!memory_operand (operands[0], VOIDmode)
2038 || (rtx_equal_p (operands[0], operands[1])
2039 && register_operand (operands[2], VOIDmode))"
2040 "@
2041 #
2042 ds_<mnemonic>0\t%A0, %2%O0"
2043 "(reload_completed && !gcn_ds_memory_operand (operands[0], V64DImode))"
2044 [(set (match_dup 5)
2045 (vec_merge:V64SI
2046 (bitop:V64SI (match_dup 7) (match_dup 9))
2047 (match_dup 11)
2048 (match_dup 4)))
2049 (set (match_dup 6)
2050 (vec_merge:V64SI
2051 (bitop:V64SI (match_dup 8) (match_dup 10))
2052 (match_dup 12)
2053 (match_dup 4)))]
2054 {
2055 operands[5] = gcn_operand_part (V64DImode, operands[0], 0);
2056 operands[6] = gcn_operand_part (V64DImode, operands[0], 1);
2057 operands[7] = gcn_operand_part (V64DImode, operands[1], 0);
2058 operands[8] = gcn_operand_part (V64DImode, operands[1], 1);
2059 operands[9] = gcn_operand_part (V64DImode, operands[2], 0);
2060 operands[10] = gcn_operand_part (V64DImode, operands[2], 1);
2061 operands[11] = gcn_operand_part (V64DImode, operands[3], 0);
2062 operands[12] = gcn_operand_part (V64DImode, operands[3], 1);
2063 }
2064 [(set_attr "type" "vmult,ds")
2065 (set_attr "length" "16,8")])
2066
2067(define_insn "<expander>v64si3<exec>"
2068 [(set (match_operand:V64SI 0 "register_operand" "= v")
2069 (shiftop:V64SI
2070 (match_operand:V64SI 1 "gcn_alu_operand" " v")
2071 (vec_duplicate:V64SI
2072 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
2073 ""
2074 "v_<revmnemonic>0\t%0, %2, %1"
2075 [(set_attr "type" "vop2")
2076 (set_attr "length" "8")])
2077
2078(define_insn "v<expander>v64si3<exec>"
2079 [(set (match_operand:V64SI 0 "register_operand" "=v")
2080 (shiftop:V64SI
2081 (match_operand:V64SI 1 "gcn_alu_operand" " v")
2082 (match_operand:V64SI 2 "gcn_alu_operand" "vB")))]
2083 ""
2084 "v_<revmnemonic>0\t%0, %2, %1"
2085 [(set_attr "type" "vop2")
2086 (set_attr "length" "8")])
2087
2088(define_insn "<expander><mode>3<exec>"
2089 [(set (match_operand:VEC_1REG_INT_MODE 0 "gcn_valu_dst_operand" "= v,RD")
2090 (minmaxop:VEC_1REG_INT_MODE
2091 (match_operand:VEC_1REG_INT_MODE 1 "gcn_valu_src0_operand"
2092 "% v, 0")
2093 (match_operand:VEC_1REG_INT_MODE 2 "gcn_valu_src1com_operand"
2094 "vSvB, v")))]
2095 ""
2096 "@
2097 v_<mnemonic>0\t%0, %2, %1
2098 ds_<mnemonic>0\t%A0, %2%O0"
2099 [(set_attr "type" "vop2,ds")
2100 (set_attr "length" "8,8")])
2101
2102;; }}}
2103;; {{{ FP binops - special cases
2104
2105; GCN does not directly provide a DFmode subtract instruction, so we do it by
2106; adding the negated second operand to the first.
2107
2108(define_insn "subv64df3<exec>"
2109 [(set (match_operand:V64DF 0 "register_operand" "= v, v")
2110 (minus:V64DF
2111 (match_operand:V64DF 1 "gcn_alu_operand" "vSvB, v")
2112 (match_operand:V64DF 2 "gcn_alu_operand" " v,vSvB")))]
2113 ""
2114 "@
2115 v_add_f64\t%0, %1, -%2
2116 v_add_f64\t%0, -%2, %1"
2117 [(set_attr "type" "vop3a")
2118 (set_attr "length" "8,8")])
2119
2120(define_insn "subdf"
2121 [(set (match_operand:DF 0 "register_operand" "= v, v")
2122 (minus:DF
2123 (match_operand:DF 1 "gcn_alu_operand" "vSvB, v")
2124 (match_operand:DF 2 "gcn_alu_operand" " v,vSvB")))]
2125 ""
2126 "@
2127 v_add_f64\t%0, %1, -%2
2128 v_add_f64\t%0, -%2, %1"
2129 [(set_attr "type" "vop3a")
2130 (set_attr "length" "8,8")])
2131
2132;; }}}
2133;; {{{ FP binops - generic
2134
2135(define_mode_iterator VEC_FP_MODE [V64HF V64SF V64DF])
2136(define_mode_iterator VEC_FP_1REG_MODE [V64HF V64SF])
2137(define_mode_iterator FP_MODE [HF SF DF])
2138(define_mode_iterator FP_1REG_MODE [HF SF])
2139
2140(define_code_iterator comm_fp [plus mult smin smax])
2141(define_code_iterator nocomm_fp [minus])
2142(define_code_iterator all_fp [plus mult minus smin smax])
2143
2144(define_insn "<expander><mode>3<exec>"
2145 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "= v")
2146 (comm_fp:VEC_FP_MODE
2147 (match_operand:VEC_FP_MODE 1 "gcn_alu_operand" "% v")
2148 (match_operand:VEC_FP_MODE 2 "gcn_alu_operand" "vSvB")))]
2149 ""
2150 "v_<mnemonic>0\t%0, %2, %1"
2151 [(set_attr "type" "vop2")
2152 (set_attr "length" "8")])
2153
2154(define_insn "<expander><mode>3"
2155 [(set (match_operand:FP_MODE 0 "gcn_valu_dst_operand" "= v, RL")
2156 (comm_fp:FP_MODE
2157 (match_operand:FP_MODE 1 "gcn_valu_src0_operand" "% v, 0")
2158 (match_operand:FP_MODE 2 "gcn_valu_src1_operand" "vSvB,vSvB")))]
2159 ""
2160 "@
2161 v_<mnemonic>0\t%0, %2, %1
2162 v_<mnemonic>0\t%0, %1%O0"
2163 [(set_attr "type" "vop2,ds")
2164 (set_attr "length" "8")])
2165
2166(define_insn "<expander><mode>3<exec>"
2167 [(set (match_operand:VEC_FP_1REG_MODE 0 "register_operand" "= v, v")
2168 (nocomm_fp:VEC_FP_1REG_MODE
2169 (match_operand:VEC_FP_1REG_MODE 1 "gcn_alu_operand" "vSvB, v")
2170 (match_operand:VEC_FP_1REG_MODE 2 "gcn_alu_operand" " v,vSvB")))]
2171 ""
2172 "@
2173 v_<mnemonic>0\t%0, %1, %2
2174 v_<revmnemonic>0\t%0, %2, %1"
2175 [(set_attr "type" "vop2")
2176 (set_attr "length" "8,8")])
2177
2178(define_insn "<expander><mode>3"
2179 [(set (match_operand:FP_1REG_MODE 0 "register_operand" "= v, v")
2180 (nocomm_fp:FP_1REG_MODE
2181 (match_operand:FP_1REG_MODE 1 "gcn_alu_operand" "vSvB, v")
2182 (match_operand:FP_1REG_MODE 2 "gcn_alu_operand" " v,vSvB")))]
2183 ""
2184 "@
2185 v_<mnemonic>0\t%0, %1, %2
2186 v_<revmnemonic>0\t%0, %2, %1"
2187 [(set_attr "type" "vop2")
2188 (set_attr "length" "8,8")])
2189
2190;; }}}
2191;; {{{ FP unops
2192
2193(define_insn "abs<mode>2"
2194 [(set (match_operand:FP_MODE 0 "register_operand" "=v")
2195 (abs:FP_MODE (match_operand:FP_MODE 1 "register_operand" " v")))]
2196 ""
2197 "v_add%i0\t%0, 0, |%1|"
2198 [(set_attr "type" "vop3a")
2199 (set_attr "length" "8")])
2200
2201(define_insn "abs<mode>2<exec>"
2202 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "=v")
2203 (abs:VEC_FP_MODE
2204 (match_operand:VEC_FP_MODE 1 "register_operand" " v")))]
2205 ""
2206 "v_add%i0\t%0, 0, |%1|"
2207 [(set_attr "type" "vop3a")
2208 (set_attr "length" "8")])
2209
2210(define_insn "neg<mode>2<exec>"
2211 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "=v")
2212 (neg:VEC_FP_MODE
2213 (match_operand:VEC_FP_MODE 1 "register_operand" " v")))]
2214 ""
2215 "v_add%i0\t%0, 0, -%1"
2216 [(set_attr "type" "vop3a")
2217 (set_attr "length" "8")])
2218
2219(define_insn "sqrt<mode>2<exec>"
2220 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "= v")
2221 (sqrt:VEC_FP_MODE
2222 (match_operand:VEC_FP_MODE 1 "gcn_alu_operand" "vSvB")))]
2223 "flag_unsafe_math_optimizations"
2224 "v_sqrt%i0\t%0, %1"
2225 [(set_attr "type" "vop1")
2226 (set_attr "length" "8")])
2227
2228(define_insn "sqrt<mode>2"
2229 [(set (match_operand:FP_MODE 0 "register_operand" "= v")
2230 (sqrt:FP_MODE
2231 (match_operand:FP_MODE 1 "gcn_alu_operand" "vSvB")))]
2232 "flag_unsafe_math_optimizations"
2233 "v_sqrt%i0\t%0, %1"
2234 [(set_attr "type" "vop1")
2235 (set_attr "length" "8")])
2236
2237;; }}}
2238;; {{{ FP fused multiply and add
2239
2240(define_insn "fma<mode>4<exec>"
2241 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "= v, v")
2242 (fma:VEC_FP_MODE
2243 (match_operand:VEC_FP_MODE 1 "gcn_alu_operand" "% vA, vA")
2244 (match_operand:VEC_FP_MODE 2 "gcn_alu_operand" " vA,vSvA")
2245 (match_operand:VEC_FP_MODE 3 "gcn_alu_operand" "vSvA, vA")))]
2246 ""
2247 "v_fma%i0\t%0, %1, %2, %3"
2248 [(set_attr "type" "vop3a")
2249 (set_attr "length" "8")])
2250
2251(define_insn "fma<mode>4_negop2<exec>"
2252 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "= v, v, v")
2253 (fma:VEC_FP_MODE
2254 (match_operand:VEC_FP_MODE 1 "gcn_alu_operand" " vA, vA,vSvA")
2255 (neg:VEC_FP_MODE
2256 (match_operand:VEC_FP_MODE 2 "gcn_alu_operand" " vA,vSvA, vA"))
2257 (match_operand:VEC_FP_MODE 3 "gcn_alu_operand" "vSvA, vA, vA")))]
2258 ""
2259 "v_fma%i0\t%0, %1, -%2, %3"
2260 [(set_attr "type" "vop3a")
2261 (set_attr "length" "8")])
2262
2263(define_insn "fma<mode>4"
2264 [(set (match_operand:FP_MODE 0 "register_operand" "= v, v")
2265 (fma:FP_MODE
2266 (match_operand:FP_MODE 1 "gcn_alu_operand" "% vA, vA")
2267 (match_operand:FP_MODE 2 "gcn_alu_operand" " vA,vSvA")
2268 (match_operand:FP_MODE 3 "gcn_alu_operand" "vSvA, vA")))]
2269 ""
2270 "v_fma%i0\t%0, %1, %2, %3"
2271 [(set_attr "type" "vop3a")
2272 (set_attr "length" "8")])
2273
2274(define_insn "fma<mode>4_negop2"
2275 [(set (match_operand:FP_MODE 0 "register_operand" "= v, v, v")
2276 (fma:FP_MODE
2277 (match_operand:FP_MODE 1 "gcn_alu_operand" " vA, vA,vSvA")
2278 (neg:FP_MODE
2279 (match_operand:FP_MODE 2 "gcn_alu_operand" " vA,vSvA, vA"))
2280 (match_operand:FP_MODE 3 "gcn_alu_operand" "vSvA, vA, vA")))]
2281 ""
2282 "v_fma%i0\t%0, %1, -%2, %3"
2283 [(set_attr "type" "vop3a")
2284 (set_attr "length" "8")])
2285
2286;; }}}
2287;; {{{ FP division
2288
2289(define_insn "recip<mode>2<exec>"
2290 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "= v")
2291 (div:VEC_FP_MODE
2292 (vec_duplicate:VEC_FP_MODE (float:<SCALAR_MODE> (const_int 1)))
2293 (match_operand:VEC_FP_MODE 1 "gcn_alu_operand" "vSvB")))]
2294 ""
2295 "v_rcp%i0\t%0, %1"
2296 [(set_attr "type" "vop1")
2297 (set_attr "length" "8")])
2298
2299(define_insn "recip<mode>2"
2300 [(set (match_operand:FP_MODE 0 "register_operand" "= v")
2301 (div:FP_MODE
2302 (float:FP_MODE (const_int 1))
2303 (match_operand:FP_MODE 1 "gcn_alu_operand" "vSvB")))]
2304 ""
2305 "v_rcp%i0\t%0, %1"
2306 [(set_attr "type" "vop1")
2307 (set_attr "length" "8")])
2308
2309;; Do division via a = b * 1/c
2310;; The v_rcp_* instructions are not sufficiently accurate on their own,
2311;; so we use 2 v_fma_* instructions to do one round of Newton-Raphson
2312;; which the ISA manual says is enough to improve the reciprocal accuracy.
2313;;
2314;; FIXME: This does not handle denormals, NaNs, division-by-zero etc.
2315
2316(define_expand "div<mode>3"
2317 [(match_operand:VEC_FP_MODE 0 "gcn_valu_dst_operand")
2318 (match_operand:VEC_FP_MODE 1 "gcn_valu_src0_operand")
2319 (match_operand:VEC_FP_MODE 2 "gcn_valu_src0_operand")]
2320 "flag_reciprocal_math"
2321 {
2322 rtx two = gcn_vec_constant (<MODE>mode,
2323 const_double_from_real_value (dconst2, <SCALAR_MODE>mode));
2324 rtx initrcp = gen_reg_rtx (<MODE>mode);
2325 rtx fma = gen_reg_rtx (<MODE>mode);
2326 rtx rcp;
2327
2328 bool is_rcp = (GET_CODE (operands[1]) == CONST_VECTOR
2329 && real_identical
2330 (CONST_DOUBLE_REAL_VALUE
2331 (CONST_VECTOR_ELT (operands[1], 0)), &dconstm1));
2332
2333 if (is_rcp)
2334 rcp = operands[0];
2335 else
2336 rcp = gen_reg_rtx (<MODE>mode);
2337
2338 emit_insn (gen_recip<mode>2 (initrcp, operands[2]));
2339 emit_insn (gen_fma<mode>4_negop2 (fma, initrcp, operands[2], two));
2340 emit_insn (gen_mul<mode>3 (rcp, initrcp, fma));
2341
2342 if (!is_rcp)
2343 emit_insn (gen_mul<mode>3 (operands[0], operands[1], rcp));
2344
2345 DONE;
2346 })
2347
2348(define_expand "div<mode>3"
2349 [(match_operand:FP_MODE 0 "gcn_valu_dst_operand")
2350 (match_operand:FP_MODE 1 "gcn_valu_src0_operand")
2351 (match_operand:FP_MODE 2 "gcn_valu_src0_operand")]
2352 "flag_reciprocal_math"
2353 {
2354 rtx two = const_double_from_real_value (dconst2, <MODE>mode);
2355 rtx initrcp = gen_reg_rtx (<MODE>mode);
2356 rtx fma = gen_reg_rtx (<MODE>mode);
2357 rtx rcp;
2358
2359 bool is_rcp = (GET_CODE (operands[1]) == CONST_DOUBLE
2360 && real_identical (CONST_DOUBLE_REAL_VALUE (operands[1]),
2361 &dconstm1));
2362
2363 if (is_rcp)
2364 rcp = operands[0];
2365 else
2366 rcp = gen_reg_rtx (<MODE>mode);
2367
2368 emit_insn (gen_recip<mode>2 (initrcp, operands[2]));
2369 emit_insn (gen_fma<mode>4_negop2 (fma, initrcp, operands[2], two));
2370 emit_insn (gen_mul<mode>3 (rcp, initrcp, fma));
2371
2372 if (!is_rcp)
2373 emit_insn (gen_mul<mode>3 (operands[0], operands[1], rcp));
2374
2375 DONE;
2376 })
2377
2378;; }}}
2379;; {{{ Int/FP conversions
2380
2381(define_mode_iterator CVT_FROM_MODE [HI SI HF SF DF])
2382(define_mode_iterator CVT_TO_MODE [HI SI HF SF DF])
2383
2384(define_mode_iterator VCVT_FROM_MODE [V64HI V64SI V64HF V64SF V64DF])
2385(define_mode_iterator VCVT_TO_MODE [V64HI V64SI V64HF V64SF V64DF])
2386
2387(define_code_iterator cvt_op [fix unsigned_fix
2388 float unsigned_float
2389 float_extend float_truncate])
2390(define_code_attr cvt_name [(fix "fix_trunc") (unsigned_fix "fixuns_trunc")
2391 (float "float") (unsigned_float "floatuns")
2392 (float_extend "extend") (float_truncate "trunc")])
2393(define_code_attr cvt_operands [(fix "%i0%i1") (unsigned_fix "%u0%i1")
2394 (float "%i0%i1") (unsigned_float "%i0%u1")
2395 (float_extend "%i0%i1")
2396 (float_truncate "%i0%i1")])
2397
2398(define_insn "<cvt_name><CVT_FROM_MODE:mode><CVT_TO_MODE:mode>2"
2399 [(set (match_operand:CVT_TO_MODE 0 "register_operand" "= v")
2400 (cvt_op:CVT_TO_MODE
2401 (match_operand:CVT_FROM_MODE 1 "gcn_alu_operand" "vSvB")))]
2402 "gcn_valid_cvt_p (<CVT_FROM_MODE:MODE>mode, <CVT_TO_MODE:MODE>mode,
2403 <cvt_name>_cvt)"
2404 "v_cvt<cvt_operands>\t%0, %1"
2405 [(set_attr "type" "vop1")
2406 (set_attr "length" "8")])
2407
2408(define_insn "<cvt_name><VCVT_FROM_MODE:mode><VCVT_TO_MODE:mode>2<exec>"
2409 [(set (match_operand:VCVT_TO_MODE 0 "register_operand" "= v")
2410 (cvt_op:VCVT_TO_MODE
2411 (match_operand:VCVT_FROM_MODE 1 "gcn_alu_operand" "vSvB")))]
2412 "gcn_valid_cvt_p (<VCVT_FROM_MODE:MODE>mode, <VCVT_TO_MODE:MODE>mode,
2413 <cvt_name>_cvt)"
2414 "v_cvt<cvt_operands>\t%0, %1"
2415 [(set_attr "type" "vop1")
2416 (set_attr "length" "8")])
2417
2418;; }}}
2419;; {{{ Int/int conversions
2420
2421;; GCC can already do these for scalar types, but not for vector types.
2422;; Unfortunately you can't just do SUBREG on a vector to select the low part,
2423;; so there must be a few tricks here.
2424
2425(define_insn_and_split "vec_truncatev64div64si"
2426 [(set (match_operand:V64SI 0 "register_operand" "=v,&v")
2427 (truncate:V64SI
2428 (match_operand:V64DI 1 "register_operand" " 0, v")))]
2429 ""
2430 "#"
2431 "reload_completed"
2432 [(set (match_dup 0) (match_dup 1))]
2433 {
2434 operands[1] = gcn_operand_part (V64SImode, operands[1], 0);
2435 }
2436 [(set_attr "type" "vop2")
2437 (set_attr "length" "0,4")])
2438
2439(define_insn_and_split "vec_truncatev64div64si_exec"
2440 [(set (match_operand:V64SI 0 "register_operand" "=v,&v")
2441 (vec_merge:V64SI
2442 (truncate:V64SI
2443 (match_operand:V64DI 1 "register_operand" " 0, v"))
2444 (match_operand:V64SI 2 "gcn_alu_or_unspec_operand" "U0,U0")
2445 (match_operand:DI 3 "gcn_exec_operand" " e, e")))]
2446 ""
2447 "#"
2448 "reload_completed"
2449 [(parallel [(set (match_dup 0)
2450 (vec_merge:V64SI (match_dup 1) (match_dup 2) (match_dup 3)))
2451 (clobber (scratch:V64DI))])]
2452 {
2453 operands[1] = gcn_operand_part (V64SImode, operands[1], 0);
2454 }
2455 [(set_attr "type" "vop2")
2456 (set_attr "length" "0,4")])
2457
2458;; }}}
2459;; {{{ Vector comparison/merge
2460
2461(define_insn "vec_cmp<mode>di"
2462 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e, e,Sg,Sg")
2463 (match_operator 1 "comparison_operator"
2464 [(match_operand:VEC_1REG_MODE 2 "gcn_alu_operand"
2465 "vSv, B,vSv, B, v,vA")
2466 (match_operand:VEC_1REG_MODE 3 "gcn_vop3_operand"
2467 " v, v, v, v,vA, v")]))
2468 (clobber (match_scratch:DI 4 "= X, X, cV,cV, X, X"))]
2469 ""
2470 "@
2471 v_cmp%E1\tvcc, %2, %3
2472 v_cmp%E1\tvcc, %2, %3
2473 v_cmpx%E1\tvcc, %2, %3
2474 v_cmpx%E1\tvcc, %2, %3
2475 v_cmp%E1\t%0, %2, %3
2476 v_cmp%E1\t%0, %2, %3"
2477 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a")
2478 (set_attr "length" "4,8,4,8,8,8")])
2479
2480(define_expand "vec_cmpu<mode>di"
2481 [(match_operand:DI 0 "register_operand")
2482 (match_operator 1 "comparison_operator"
2483 [(match_operand:VEC_1REG_INT_MODE 2 "gcn_alu_operand")
2484 (match_operand:VEC_1REG_INT_MODE 3 "gcn_vop3_operand")])]
2485 ""
2486 {
2487 /* Unsigned comparisons use the same patterns as signed comparisons,
2488 except that they use unsigned operators (e.g. LTU vs LT).
2489 The '%E1' directive then does the Right Thing. */
2490 emit_insn (gen_vec_cmp<mode>di (operands[0], operands[1], operands[2],
2491 operands[3]));
2492 DONE;
2493 })
2494
2495(define_insn "vec_cmp<mode>di_exec"
2496 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e, e,Sg,Sg")
2497 (and:DI
2498 (match_operator 1 "comparison_operator"
2499 [(match_operand:VEC_1REG_MODE 2 "gcn_alu_operand"
2500 "vSv, B,vSv, B, v,vA")
2501 (match_operand:VEC_1REG_MODE 3 "gcn_vop3_operand"
2502 " v, v, v, v,vA, v")])
2503 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e, e, e, e")))
2504 (clobber (match_scratch:DI 5 "= X, X, cV,cV, X, X"))]
2505 ""
2506 "@
2507 v_cmp%E1\tvcc, %2, %3
2508 v_cmp%E1\tvcc, %2, %3
2509 v_cmpx%E1\tvcc, %2, %3
2510 v_cmpx%E1\tvcc, %2, %3
2511 v_cmp%E1\t%0, %2, %3
2512 v_cmp%E1\t%0, %2, %3"
2513 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a")
2514 (set_attr "length" "4,8,4,8,8,8")])
2515
2516(define_insn "vec_cmp<mode>di_dup"
2517 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e,e,Sg")
2518 (match_operator 1 "comparison_operator"
2519 [(vec_duplicate:VEC_1REG_MODE
2520 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand"
2521 " Sv, B,Sv,B, A"))
2522 (match_operand:VEC_1REG_MODE 3 "gcn_vop3_operand"
2523 " v, v, v,v, v")]))
2524 (clobber (match_scratch:DI 4 "= X,X,cV,cV, X"))]
2525 ""
2526 "@
2527 v_cmp%E1\tvcc, %2, %3
2528 v_cmp%E1\tvcc, %2, %3
2529 v_cmpx%E1\tvcc, %2, %3
2530 v_cmpx%E1\tvcc, %2, %3
2531 v_cmp%E1\t%0, %2, %3"
2532 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a")
2533 (set_attr "length" "4,8,4,8,8")])
2534
2535(define_insn "vec_cmp<mode>di_dup_exec"
2536 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e,e,Sg")
2537 (and:DI
2538 (match_operator 1 "comparison_operator"
2539 [(vec_duplicate:VEC_1REG_MODE
2540 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand"
2541 " Sv, B,Sv,B, A"))
2542 (match_operand:VEC_1REG_MODE 3 "gcn_vop3_operand"
2543 " v, v, v,v, v")])
2544 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e,e, e")))
2545 (clobber (match_scratch:DI 5 "= X,X,cV,cV, X"))]
2546 ""
2547 "@
2548 v_cmp%E1\tvcc, %2, %3
2549 v_cmp%E1\tvcc, %2, %3
2550 v_cmpx%E1\tvcc, %2, %3
2551 v_cmpx%E1\tvcc, %2, %3
2552 v_cmp%E1\t%0, %2, %3"
2553 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a")
2554 (set_attr "length" "4,8,4,8,8")])
2555
2556(define_expand "vcond_mask_<mode>di"
2557 [(parallel
2558 [(set (match_operand:VEC_REG_MODE 0 "register_operand" "")
2559 (vec_merge:VEC_REG_MODE
2560 (match_operand:VEC_REG_MODE 1 "gcn_vop3_operand" "")
2561 (match_operand:VEC_REG_MODE 2 "gcn_alu_operand" "")
2562 (match_operand:DI 3 "register_operand" "")))
2563 (clobber (scratch:V64DI))])]
2564 ""
2565 "")
2566
2567(define_expand "vcond<VEC_1REG_MODE:mode><VEC_1REG_ALT:mode>"
2568 [(match_operand:VEC_1REG_MODE 0 "register_operand")
2569 (match_operand:VEC_1REG_MODE 1 "gcn_vop3_operand")
2570 (match_operand:VEC_1REG_MODE 2 "gcn_alu_operand")
2571 (match_operator 3 "comparison_operator"
2572 [(match_operand:VEC_1REG_ALT 4 "gcn_alu_operand")
2573 (match_operand:VEC_1REG_ALT 5 "gcn_vop3_operand")])]
2574 ""
2575 {
2576 rtx tmp = gen_reg_rtx (DImode);
2577 emit_insn (gen_vec_cmp<mode>di (tmp, operands[3], operands[4],
2578 operands[5]));
2579 emit_insn (gen_vcond_mask_<mode>di (operands[0], operands[1], operands[2],
2580 tmp));
2581 DONE;
2582 })
2583
2584(define_expand "vcond<VEC_1REG_MODE:mode><VEC_1REG_ALT:mode>_exec"
2585 [(match_operand:VEC_1REG_MODE 0 "register_operand")
2586 (match_operand:VEC_1REG_MODE 1 "gcn_vop3_operand")
2587 (match_operand:VEC_1REG_MODE 2 "gcn_alu_operand")
2588 (match_operator 3 "comparison_operator"
2589 [(match_operand:VEC_1REG_ALT 4 "gcn_alu_operand")
2590 (match_operand:VEC_1REG_ALT 5 "gcn_vop3_operand")])
2591 (match_operand:DI 6 "gcn_exec_reg_operand" "e")]
2592 ""
2593 {
2594 rtx tmp = gen_reg_rtx (DImode);
2595 emit_insn (gen_vec_cmp<mode>di_exec (tmp, operands[3], operands[4],
2596 operands[5], operands[6]));
2597 emit_insn (gen_vcond_mask_<mode>di (operands[0], operands[1], operands[2],
2598 tmp));
2599 DONE;
2600 })
2601
2602(define_expand "vcondu<VEC_1REG_INT_MODE:mode><VEC_1REG_INT_ALT:mode>"
2603 [(match_operand:VEC_1REG_INT_MODE 0 "register_operand")
2604 (match_operand:VEC_1REG_INT_MODE 1 "gcn_vop3_operand")
2605 (match_operand:VEC_1REG_INT_MODE 2 "gcn_alu_operand")
2606 (match_operator 3 "comparison_operator"
2607 [(match_operand:VEC_1REG_INT_ALT 4 "gcn_alu_operand")
2608 (match_operand:VEC_1REG_INT_ALT 5 "gcn_vop3_operand")])]
2609 ""
2610 {
2611 rtx tmp = gen_reg_rtx (DImode);
2612 emit_insn (gen_vec_cmp<mode>di (tmp, operands[3], operands[4],
2613 operands[5]));
2614 emit_insn (gen_vcond_mask_<mode>di (operands[0], operands[1], operands[2],
2615 tmp));
2616 DONE;
2617 })
2618
2619(define_expand "vcondu<VEC_1REG_INT_MODE:mode><VEC_1REG_INT_ALT:mode>_exec"
2620 [(match_operand:VEC_1REG_INT_MODE 0 "register_operand")
2621 (match_operand:VEC_1REG_INT_MODE 1 "gcn_vop3_operand")
2622 (match_operand:VEC_1REG_INT_MODE 2 "gcn_alu_operand")
2623 (match_operator 3 "comparison_operator"
2624 [(match_operand:VEC_1REG_INT_ALT 4 "gcn_alu_operand")
2625 (match_operand:VEC_1REG_INT_ALT 5 "gcn_vop3_operand")])
2626 (match_operand:DI 6 "gcn_exec_reg_operand" "e")]
2627 ""
2628 {
2629 rtx tmp = gen_reg_rtx (DImode);
2630 emit_insn (gen_vec_cmp<mode>di_exec (tmp, operands[3], operands[4],
2631 operands[5], operands[6]));
2632 emit_insn (gen_vcond_mask_<mode>di (operands[0], operands[1], operands[2],
2633 tmp));
2634 DONE;
2635 })
2636
2637;; }}}
2638;; {{{ Fully masked loop support
2639
2640(define_expand "while_ultsidi"
2641 [(match_operand:DI 0 "register_operand")
2642 (match_operand:SI 1 "")
2643 (match_operand:SI 2 "")]
2644 ""
2645 {
2646 if (GET_CODE (operands[1]) != CONST_INT
2647 || GET_CODE (operands[2]) != CONST_INT)
2648 {
2649 rtx _0_1_2_3 = gen_rtx_REG (V64SImode, VGPR_REGNO (1));
2650 rtx tmp = _0_1_2_3;
2651 if (GET_CODE (operands[1]) != CONST_INT
2652 || INTVAL (operands[1]) != 0)
2653 {
2654 tmp = gen_reg_rtx (V64SImode);
2655 emit_insn (gen_addv64si3_dup (tmp, _0_1_2_3, operands[1]));
2656 }
2657 emit_insn (gen_vec_cmpv64sidi_dup (operands[0],
2658 gen_rtx_GT (VOIDmode, 0, 0),
2659 operands[2], tmp));
2660 }
2661 else
2662 {
2663 HOST_WIDE_INT diff = INTVAL (operands[2]) - INTVAL (operands[1]);
2664 HOST_WIDE_INT mask = (diff >= 64 ? -1
2665 : ~((unsigned HOST_WIDE_INT)-1 << diff));
2666 emit_move_insn (operands[0], gen_rtx_CONST_INT (VOIDmode, mask));
2667 }
2668 DONE;
2669 })
2670
2671(define_expand "maskload<mode>di"
2672 [(match_operand:VEC_REG_MODE 0 "register_operand")
2673 (match_operand:VEC_REG_MODE 1 "memory_operand")
2674 (match_operand 2 "")]
2675 ""
2676 {
2677 rtx exec = force_reg (DImode, operands[2]);
2678 rtx addr = gcn_expand_scalar_to_vector_address
2679 (<MODE>mode, exec, operands[1], gen_rtx_SCRATCH (V64DImode));
2680 rtx as = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
2681 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
2682 rtx undef = gcn_gen_undef (<MODE>mode);
2683 emit_insn (gen_gather<mode>_expr_exec (operands[0], addr, as, v, undef,
2684 exec));
2685 DONE;
2686 })
2687
2688(define_expand "maskstore<mode>di"
2689 [(match_operand:VEC_REG_MODE 0 "memory_operand")
2690 (match_operand:VEC_REG_MODE 1 "register_operand")
2691 (match_operand 2 "")]
2692 ""
2693 {
2694 rtx exec = force_reg (DImode, operands[2]);
2695 rtx addr = gcn_expand_scalar_to_vector_address
2696 (<MODE>mode, exec, operands[0], gen_rtx_SCRATCH (V64DImode));
2697 rtx as = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
2698 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
2699 emit_insn (gen_scatter<mode>_expr_exec (addr, operands[1], as, v, exec));
2700 DONE;
2701 })
2702
2703(define_expand "mask_gather_load<mode>"
2704 [(match_operand:VEC_REG_MODE 0 "register_operand")
2705 (match_operand:DI 1 "register_operand")
2706 (match_operand 2 "register_operand")
2707 (match_operand 3 "immediate_operand")
2708 (match_operand:SI 4 "gcn_alu_operand")
2709 (match_operand:DI 5 "")]
2710 ""
2711 {
2712 rtx exec = force_reg (DImode, operands[5]);
2713
2714 /* TODO: more conversions will be needed when more types are vectorized. */
2715 if (GET_MODE (operands[2]) == V64DImode)
2716 {
2717 rtx tmp = gen_reg_rtx (V64SImode);
2718 emit_insn (gen_vec_truncatev64div64si_exec (tmp, operands[2],
2719 gcn_gen_undef (V64SImode),
2720 exec));
2721 operands[2] = tmp;
2722 }
2723
2724 emit_insn (gen_gather<mode>_exec (operands[0], operands[1], operands[2],
2725 operands[3], operands[4], exec));
2726 DONE;
2727 })
2728
2729(define_expand "mask_scatter_store<mode>"
2730 [(match_operand:DI 0 "register_operand")
2731 (match_operand 1 "register_operand")
2732 (match_operand 2 "immediate_operand")
2733 (match_operand:SI 3 "gcn_alu_operand")
2734 (match_operand:VEC_REG_MODE 4 "register_operand")
2735 (match_operand:DI 5 "")]
2736 ""
2737 {
2738 rtx exec = force_reg (DImode, operands[5]);
2739
2740 /* TODO: more conversions will be needed when more types are vectorized. */
2741 if (GET_MODE (operands[1]) == V64DImode)
2742 {
2743 rtx tmp = gen_reg_rtx (V64SImode);
2744 emit_insn (gen_vec_truncatev64div64si_exec (tmp, operands[1],
2745 gcn_gen_undef (V64SImode),
2746 exec));
2747 operands[1] = tmp;
2748 }
2749
2750 emit_insn (gen_scatter<mode>_exec (operands[0], operands[1], operands[2],
2751 operands[3], operands[4], exec));
2752 DONE;
2753 })
2754
2755; FIXME this should be VEC_REG_MODE, but not all dependencies are implemented.
2756(define_mode_iterator COND_MODE [V64SI V64DI V64SF V64DF])
2757(define_mode_iterator COND_INT_MODE [V64SI V64DI])
2758
2759(define_code_iterator cond_op [plus minus])
2760
2761(define_expand "cond_<expander><mode>"
2762 [(match_operand:COND_MODE 0 "register_operand")
2763 (match_operand:DI 1 "register_operand")
2764 (cond_op:COND_MODE
2765 (match_operand:COND_MODE 2 "gcn_alu_operand")
2766 (match_operand:COND_MODE 3 "gcn_alu_operand"))
2767 (match_operand:COND_MODE 4 "register_operand")]
2768 ""
2769 {
2770 operands[1] = force_reg (DImode, operands[1]);
2771 operands[2] = force_reg (<MODE>mode, operands[2]);
2772
2773 emit_insn (gen_<expander><mode>3_exec (operands[0], operands[2],
2774 operands[3], operands[4],
2775 operands[1]));
2776 DONE;
2777 })
2778
2779(define_code_iterator cond_bitop [and ior xor])
2780
2781(define_expand "cond_<expander><mode>"
2782 [(match_operand:COND_INT_MODE 0 "register_operand")
2783 (match_operand:DI 1 "register_operand")
2784 (cond_bitop:COND_INT_MODE
2785 (match_operand:COND_INT_MODE 2 "gcn_alu_operand")
2786 (match_operand:COND_INT_MODE 3 "gcn_alu_operand"))
2787 (match_operand:COND_INT_MODE 4 "register_operand")]
2788 ""
2789 {
2790 operands[1] = force_reg (DImode, operands[1]);
2791 operands[2] = force_reg (<MODE>mode, operands[2]);
2792
2793 emit_insn (gen_<expander><mode>3_exec (operands[0], operands[2],
2794 operands[3], operands[4],
2795 operands[1]));
2796 DONE;
2797 })
2798
2799;; }}}
2800;; {{{ Vector reductions
2801
2802(define_int_iterator REDUC_UNSPEC [UNSPEC_SMIN_DPP_SHR UNSPEC_SMAX_DPP_SHR
2803 UNSPEC_UMIN_DPP_SHR UNSPEC_UMAX_DPP_SHR
2804 UNSPEC_PLUS_DPP_SHR
2805 UNSPEC_AND_DPP_SHR
2806 UNSPEC_IOR_DPP_SHR UNSPEC_XOR_DPP_SHR])
2807
2808(define_int_iterator REDUC_2REG_UNSPEC [UNSPEC_PLUS_DPP_SHR
2809 UNSPEC_AND_DPP_SHR
2810 UNSPEC_IOR_DPP_SHR UNSPEC_XOR_DPP_SHR])
2811
2812; FIXME: Isn't there a better way of doing this?
2813(define_int_attr reduc_unspec [(UNSPEC_SMIN_DPP_SHR "UNSPEC_SMIN_DPP_SHR")
2814 (UNSPEC_SMAX_DPP_SHR "UNSPEC_SMAX_DPP_SHR")
2815 (UNSPEC_UMIN_DPP_SHR "UNSPEC_UMIN_DPP_SHR")
2816 (UNSPEC_UMAX_DPP_SHR "UNSPEC_UMAX_DPP_SHR")
2817 (UNSPEC_PLUS_DPP_SHR "UNSPEC_PLUS_DPP_SHR")
2818 (UNSPEC_AND_DPP_SHR "UNSPEC_AND_DPP_SHR")
2819 (UNSPEC_IOR_DPP_SHR "UNSPEC_IOR_DPP_SHR")
2820 (UNSPEC_XOR_DPP_SHR "UNSPEC_XOR_DPP_SHR")])
2821
2822(define_int_attr reduc_op [(UNSPEC_SMIN_DPP_SHR "smin")
2823 (UNSPEC_SMAX_DPP_SHR "smax")
2824 (UNSPEC_UMIN_DPP_SHR "umin")
2825 (UNSPEC_UMAX_DPP_SHR "umax")
2826 (UNSPEC_PLUS_DPP_SHR "plus")
2827 (UNSPEC_AND_DPP_SHR "and")
2828 (UNSPEC_IOR_DPP_SHR "ior")
2829 (UNSPEC_XOR_DPP_SHR "xor")])
2830
2831(define_int_attr reduc_insn [(UNSPEC_SMIN_DPP_SHR "v_min%i0")
2832 (UNSPEC_SMAX_DPP_SHR "v_max%i0")
2833 (UNSPEC_UMIN_DPP_SHR "v_min%u0")
2834 (UNSPEC_UMAX_DPP_SHR "v_max%u0")
2835 (UNSPEC_PLUS_DPP_SHR "v_add%u0")
2836 (UNSPEC_AND_DPP_SHR "v_and%b0")
2837 (UNSPEC_IOR_DPP_SHR "v_or%b0")
2838 (UNSPEC_XOR_DPP_SHR "v_xor%b0")])
2839
2840(define_expand "reduc_<reduc_op>_scal_<mode>"
2841 [(set (match_operand:<SCALAR_MODE> 0 "register_operand")
2842 (unspec:<SCALAR_MODE>
2843 [(match_operand:VEC_1REG_MODE 1 "register_operand")]
2844 REDUC_UNSPEC))]
2845 ""
2846 {
2847 rtx tmp = gcn_expand_reduc_scalar (<MODE>mode, operands[1],
2848 <reduc_unspec>);
2849
2850 /* The result of the reduction is in lane 63 of tmp. */
2851 emit_insn (gen_mov_from_lane63_<mode> (operands[0], tmp));
2852
2853 DONE;
2854 })
2855
2856(define_expand "reduc_<reduc_op>_scal_v64di"
2857 [(set (match_operand:DI 0 "register_operand")
2858 (unspec:DI
2859 [(match_operand:V64DI 1 "register_operand")]
2860 REDUC_2REG_UNSPEC))]
2861 ""
2862 {
2863 rtx tmp = gcn_expand_reduc_scalar (V64DImode, operands[1],
2864 <reduc_unspec>);
2865
2866 /* The result of the reduction is in lane 63 of tmp. */
2867 emit_insn (gen_mov_from_lane63_v64di (operands[0], tmp));
2868
2869 DONE;
2870 })
2871
2872(define_insn "*<reduc_op>_dpp_shr_<mode>"
2873 [(set (match_operand:VEC_1REG_MODE 0 "register_operand" "=v")
2874 (unspec:VEC_1REG_MODE
2875 [(match_operand:VEC_1REG_MODE 1 "register_operand" "v")
2876 (match_operand:VEC_1REG_MODE 2 "register_operand" "v")
2877 (match_operand:SI 3 "const_int_operand" "n")]
2878 REDUC_UNSPEC))]
2879 "!(TARGET_GCN3 && SCALAR_INT_MODE_P (<SCALAR_MODE>mode)
2880 && <reduc_unspec> == UNSPEC_PLUS_DPP_SHR)"
2881 {
2882 return gcn_expand_dpp_shr_insn (<MODE>mode, "<reduc_insn>",
2883 <reduc_unspec>, INTVAL (operands[3]));
2884 }
2885 [(set_attr "type" "vop_dpp")
2886 (set_attr "length" "8")])
2887
2888(define_insn_and_split "*<reduc_op>_dpp_shr_v64di"
2889 [(set (match_operand:V64DI 0 "register_operand" "=&v")
2890 (unspec:V64DI
2891 [(match_operand:V64DI 1 "register_operand" "v0")
2892 (match_operand:V64DI 2 "register_operand" "v0")
2893 (match_operand:SI 3 "const_int_operand" "n")]
2894 REDUC_2REG_UNSPEC))]
2895 ""
2896 "#"
2897 "reload_completed"
2898 [(set (match_dup 4)
2899 (unspec:V64SI
2900 [(match_dup 6) (match_dup 8) (match_dup 3)] REDUC_2REG_UNSPEC))
2901 (set (match_dup 5)
2902 (unspec:V64SI
2903 [(match_dup 7) (match_dup 9) (match_dup 3)] REDUC_2REG_UNSPEC))]
2904 {
2905 operands[4] = gcn_operand_part (V64DImode, operands[0], 0);
2906 operands[5] = gcn_operand_part (V64DImode, operands[0], 1);
2907 operands[6] = gcn_operand_part (V64DImode, operands[1], 0);
2908 operands[7] = gcn_operand_part (V64DImode, operands[1], 1);
2909 operands[8] = gcn_operand_part (V64DImode, operands[2], 0);
2910 operands[9] = gcn_operand_part (V64DImode, operands[2], 1);
2911 }
2912 [(set_attr "type" "vmult")
2913 (set_attr "length" "16")])
2914
2915; Special cases for addition.
2916
2917(define_insn "*plus_carry_dpp_shr_<mode>"
2918 [(set (match_operand:VEC_1REG_INT_MODE 0 "register_operand" "=v")
2919 (unspec:VEC_1REG_INT_MODE
2920 [(match_operand:VEC_1REG_INT_MODE 1 "register_operand" "v")
2921 (match_operand:VEC_1REG_INT_MODE 2 "register_operand" "v")
2922 (match_operand:SI 3 "const_int_operand" "n")]
2923 UNSPEC_PLUS_CARRY_DPP_SHR))
2924 (clobber (reg:DI VCC_REG))]
2925 ""
2926 {
2927 const char *insn = TARGET_GCN3 ? "v_add%u0" : "v_add_co%u0";
2928 return gcn_expand_dpp_shr_insn (<MODE>mode, insn,
2929 UNSPEC_PLUS_CARRY_DPP_SHR,
2930 INTVAL (operands[3]));
2931 }
2932 [(set_attr "type" "vop_dpp")
2933 (set_attr "length" "8")])
2934
2935(define_insn "*plus_carry_in_dpp_shr_v64si"
2936 [(set (match_operand:V64SI 0 "register_operand" "=v")
2937 (unspec:V64SI
2938 [(match_operand:V64SI 1 "register_operand" "v")
2939 (match_operand:V64SI 2 "register_operand" "v")
2940 (match_operand:SI 3 "const_int_operand" "n")
2941 (match_operand:DI 4 "register_operand" "cV")]
2942 UNSPEC_PLUS_CARRY_IN_DPP_SHR))
2943 (clobber (reg:DI VCC_REG))]
2944 ""
2945 {
2946 const char *insn = TARGET_GCN3 ? "v_addc%u0" : "v_addc_co%u0";
2947 return gcn_expand_dpp_shr_insn (V64SImode, insn,
2948 UNSPEC_PLUS_CARRY_IN_DPP_SHR,
2949 INTVAL (operands[3]));
2950 }
2951 [(set_attr "type" "vop_dpp")
2952 (set_attr "length" "8")])
2953
2954(define_insn_and_split "*plus_carry_dpp_shr_v64di"
2955 [(set (match_operand:V64DI 0 "register_operand" "=&v")
2956 (unspec:V64DI
2957 [(match_operand:V64DI 1 "register_operand" "v0")
2958 (match_operand:V64DI 2 "register_operand" "v0")
2959 (match_operand:SI 3 "const_int_operand" "n")]
2960 UNSPEC_PLUS_CARRY_DPP_SHR))
2961 (clobber (reg:DI VCC_REG))]
2962 ""
2963 "#"
2964 "reload_completed"
2965 [(parallel [(set (match_dup 4)
2966 (unspec:V64SI
2967 [(match_dup 6) (match_dup 8) (match_dup 3)]
2968 UNSPEC_PLUS_CARRY_DPP_SHR))
2969 (clobber (reg:DI VCC_REG))])
2970 (parallel [(set (match_dup 5)
2971 (unspec:V64SI
2972 [(match_dup 7) (match_dup 9) (match_dup 3) (reg:DI VCC_REG)]
2973 UNSPEC_PLUS_CARRY_IN_DPP_SHR))
2974 (clobber (reg:DI VCC_REG))])]
2975 {
2976 operands[4] = gcn_operand_part (V64DImode, operands[0], 0);
2977 operands[5] = gcn_operand_part (V64DImode, operands[0], 1);
2978 operands[6] = gcn_operand_part (V64DImode, operands[1], 0);
2979 operands[7] = gcn_operand_part (V64DImode, operands[1], 1);
2980 operands[8] = gcn_operand_part (V64DImode, operands[2], 0);
2981 operands[9] = gcn_operand_part (V64DImode, operands[2], 1);
2982 }
2983 [(set_attr "type" "vmult")
2984 (set_attr "length" "16")])
2985
2986; Instructions to move a scalar value from lane 63 of a vector register.
2987(define_insn "mov_from_lane63_<mode>"
2988 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg,v")
2989 (unspec:<SCALAR_MODE>
2990 [(match_operand:VEC_1REG_MODE 1 "register_operand" "v,v")]
2991 UNSPEC_MOV_FROM_LANE63))]
2992 ""
2993 "@
2994 v_readlane_b32\t%0, %1, 63
2995 v_mov_b32\t%0, %1 wave_ror:1"
2996 [(set_attr "type" "vop3a,vop_dpp")
2997 (set_attr "exec" "none,*")
2998 (set_attr "length" "8")])
2999
3000(define_insn "mov_from_lane63_v64di"
3001 [(set (match_operand:DI 0 "register_operand" "=Sg,v")
3002 (unspec:DI
3003 [(match_operand:V64DI 1 "register_operand" "v,v")]
3004 UNSPEC_MOV_FROM_LANE63))]
3005 ""
3006 "@
3007 v_readlane_b32\t%L0, %L1, 63\;v_readlane_b32\t%H0, %H1, 63
3008 * if (REGNO (operands[0]) <= REGNO (operands[1])) \
3009 return \"v_mov_b32\t%L0, %L1 wave_ror:1\;\" \
3010 \"v_mov_b32\t%H0, %H1 wave_ror:1\"; \
3011 else \
3012 return \"v_mov_b32\t%H0, %H1 wave_ror:1\;\" \
3013 \"v_mov_b32\t%L0, %L1 wave_ror:1\";"
3014 [(set_attr "type" "vop3a,vop_dpp")
3015 (set_attr "exec" "none,*")
3016 (set_attr "length" "8")])
3017
3018;; }}}
3019;; {{{ Miscellaneous
3020
3021(define_expand "vec_seriesv64si"
3022 [(match_operand:V64SI 0 "register_operand")
3023 (match_operand:SI 1 "gcn_alu_operand")
3024 (match_operand:SI 2 "gcn_alu_operand")]
3025 ""
3026 {
3027 rtx tmp = gen_reg_rtx (V64SImode);
3028 rtx v1 = gen_rtx_REG (V64SImode, VGPR_REGNO (1));
3029
3030 emit_insn (gen_mulv64si3_dup (tmp, v1, operands[2]));
3031 emit_insn (gen_addv64si3_dup (operands[0], tmp, operands[1]));
3032 DONE;
3033 })
3034
3035(define_expand "vec_seriesv64di"
3036 [(match_operand:V64DI 0 "register_operand")
3037 (match_operand:DI 1 "gcn_alu_operand")
3038 (match_operand:DI 2 "gcn_alu_operand")]
3039 ""
3040 {
3041 rtx tmp = gen_reg_rtx (V64DImode);
3042 rtx v1 = gen_rtx_REG (V64SImode, VGPR_REGNO (1));
3043
3044 emit_insn (gen_mulv64di3_zext_dup2 (tmp, v1, operands[2]));
3045 emit_insn (gen_addv64di3_dup (operands[0], tmp, operands[1]));
3046 DONE;
3047 })
3048
3049;; }}}