]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/gcn/gcn-valu.md
Enable QI/HImode vector moves
[thirdparty/gcc.git] / gcc / config / gcn / gcn-valu.md
CommitLineData
3d6275e3
AS
1;; Copyright (C) 2016-2019 Free Software Foundation, Inc.
2
3;; This file is free software; you can redistribute it and/or modify it under
4;; the terms of the GNU General Public License as published by the Free
5;; Software Foundation; either version 3 of the License, or (at your option)
6;; any later version.
7
8;; This file is distributed in the hope that it will be useful, but WITHOUT
9;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11;; for more details.
12
13;; You should have received a copy of the GNU General Public License
14;; along with GCC; see the file COPYING3. If not see
15;; <http://www.gnu.org/licenses/>.
16
17;; {{{ Vector iterators
18
19; Vector modes for one vector register
20(define_mode_iterator VEC_1REG_MODE
2b99bed8 21 [V64SI V64HF V64SF])
3d6275e3 22(define_mode_iterator VEC_1REG_ALT
2b99bed8
AS
23 [V64SI V64HF V64SF])
24(define_mode_iterator VEC_ALL1REG_MODE
3d6275e3
AS
25 [V64QI V64HI V64SI V64HF V64SF])
26
27(define_mode_iterator VEC_1REG_INT_MODE
2b99bed8 28 [V64SI])
3d6275e3 29(define_mode_iterator VEC_1REG_INT_ALT
2b99bed8
AS
30 [V64SI])
31(define_mode_iterator VEC_ALL1REG_INT_MODE
32 [V64QI V64HI V64SI])
33(define_mode_iterator VEC_ALL1REG_INT_ALT
3d6275e3
AS
34 [V64QI V64HI V64SI])
35
36; Vector modes for two vector registers
37(define_mode_iterator VEC_2REG_MODE
38 [V64DI V64DF])
39
40; All of above
41(define_mode_iterator VEC_REG_MODE
2b99bed8
AS
42 [V64SI V64HF V64SF ; Single reg
43 V64DI V64DF]) ; Double reg
44(define_mode_iterator VEC_ALLREG_MODE
3d6275e3
AS
45 [V64QI V64HI V64SI V64HF V64SF ; Single reg
46 V64DI V64DF]) ; Double reg
47
48(define_mode_attr scalar_mode
49 [(V64QI "qi") (V64HI "hi") (V64SI "si")
50 (V64HF "hf") (V64SF "sf") (V64DI "di") (V64DF "df")])
51
52(define_mode_attr SCALAR_MODE
53 [(V64QI "QI") (V64HI "HI") (V64SI "SI")
54 (V64HF "HF") (V64SF "SF") (V64DI "DI") (V64DF "DF")])
55
56;; }}}
57;; {{{ Substitutions
58
59(define_subst_attr "exec" "vec_merge"
60 "" "_exec")
61(define_subst_attr "exec_clobber" "vec_merge_with_clobber"
62 "" "_exec")
63(define_subst_attr "exec_vcc" "vec_merge_with_vcc"
64 "" "_exec")
65(define_subst_attr "exec_scatter" "scatter_store"
66 "" "_exec")
67
68(define_subst "vec_merge"
2b99bed8
AS
69 [(set (match_operand:VEC_ALLREG_MODE 0)
70 (match_operand:VEC_ALLREG_MODE 1))]
3d6275e3
AS
71 ""
72 [(set (match_dup 0)
2b99bed8 73 (vec_merge:VEC_ALLREG_MODE
3d6275e3 74 (match_dup 1)
2b99bed8
AS
75 (match_operand:VEC_ALLREG_MODE 3
76 "gcn_register_or_unspec_operand" "U0")
3d6275e3
AS
77 (match_operand:DI 4 "gcn_exec_reg_operand" "e")))])
78
79(define_subst "vec_merge_with_clobber"
2b99bed8
AS
80 [(set (match_operand:VEC_ALLREG_MODE 0)
81 (match_operand:VEC_ALLREG_MODE 1))
3d6275e3
AS
82 (clobber (match_operand 2))]
83 ""
84 [(set (match_dup 0)
2b99bed8 85 (vec_merge:VEC_ALLREG_MODE
3d6275e3 86 (match_dup 1)
2b99bed8
AS
87 (match_operand:VEC_ALLREG_MODE 3
88 "gcn_register_or_unspec_operand" "U0")
3d6275e3
AS
89 (match_operand:DI 4 "gcn_exec_reg_operand" "e")))
90 (clobber (match_dup 2))])
91
92(define_subst "vec_merge_with_vcc"
2b99bed8
AS
93 [(set (match_operand:VEC_ALLREG_MODE 0)
94 (match_operand:VEC_ALLREG_MODE 1))
3d6275e3
AS
95 (set (match_operand:DI 2)
96 (match_operand:DI 3))]
97 ""
98 [(parallel
99 [(set (match_dup 0)
2b99bed8 100 (vec_merge:VEC_ALLREG_MODE
3d6275e3 101 (match_dup 1)
2b99bed8 102 (match_operand:VEC_ALLREG_MODE 4
3d6275e3
AS
103 "gcn_register_or_unspec_operand" "U0")
104 (match_operand:DI 5 "gcn_exec_reg_operand" "e")))
105 (set (match_dup 2)
106 (and:DI (match_dup 3)
107 (reg:DI EXEC_REG)))])])
108
109(define_subst "scatter_store"
110 [(set (mem:BLK (scratch))
111 (unspec:BLK
112 [(match_operand 0)
113 (match_operand 1)
114 (match_operand 2)
115 (match_operand 3)]
116 UNSPEC_SCATTER))]
117 ""
118 [(set (mem:BLK (scratch))
119 (unspec:BLK
120 [(match_dup 0)
121 (match_dup 1)
122 (match_dup 2)
123 (match_dup 3)
124 (match_operand:DI 4 "gcn_exec_reg_operand" "e")]
125 UNSPEC_SCATTER))])
126
127;; }}}
128;; {{{ Vector moves
129
130; This is the entry point for all vector register moves. Memory accesses can
131; come this way also, but will more usually use the reload_in/out,
132; gather/scatter, maskload/store, etc.
133
134(define_expand "mov<mode>"
2b99bed8
AS
135 [(set (match_operand:VEC_ALLREG_MODE 0 "nonimmediate_operand")
136 (match_operand:VEC_ALLREG_MODE 1 "general_operand"))]
3d6275e3
AS
137 ""
138 {
139 if (MEM_P (operands[0]) && !lra_in_progress && !reload_completed)
140 {
141 operands[1] = force_reg (<MODE>mode, operands[1]);
142 rtx scratch = gen_rtx_SCRATCH (V64DImode);
143 rtx a = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
144 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
145 rtx expr = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
146 operands[0],
147 scratch);
148 emit_insn (gen_scatter<mode>_expr (expr, operands[1], a, v));
149 DONE;
150 }
151 else if (MEM_P (operands[1]) && !lra_in_progress && !reload_completed)
152 {
153 rtx scratch = gen_rtx_SCRATCH (V64DImode);
154 rtx a = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
155 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
156 rtx expr = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
157 operands[1],
158 scratch);
159 emit_insn (gen_gather<mode>_expr (operands[0], expr, a, v));
160 DONE;
161 }
162 else if ((MEM_P (operands[0]) || MEM_P (operands[1])))
163 {
164 gcc_assert (!reload_completed);
165 rtx scratch = gen_reg_rtx (V64DImode);
166 emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], scratch));
167 DONE;
168 }
169 })
170
171; A pseudo instruction that helps LRA use the "U0" constraint.
172
173(define_insn "mov<mode>_unspec"
2b99bed8
AS
174 [(set (match_operand:VEC_ALLREG_MODE 0 "nonimmediate_operand" "=v")
175 (match_operand:VEC_ALLREG_MODE 1 "gcn_unspec_operand" " U"))]
3d6275e3
AS
176 ""
177 ""
178 [(set_attr "type" "unknown")
179 (set_attr "length" "0")])
180
181(define_insn "*mov<mode>"
2b99bed8
AS
182 [(set (match_operand:VEC_ALL1REG_MODE 0 "nonimmediate_operand" "=v,v")
183 (match_operand:VEC_ALL1REG_MODE 1 "general_operand" "vA,B"))]
3d6275e3
AS
184 ""
185 "v_mov_b32\t%0, %1"
186 [(set_attr "type" "vop1,vop1")
187 (set_attr "length" "4,8")])
188
189(define_insn "mov<mode>_exec"
2b99bed8 190 [(set (match_operand:VEC_ALL1REG_MODE 0 "nonimmediate_operand"
3d6275e3 191 "=v, v, v, v, v, m")
2b99bed8
AS
192 (vec_merge:VEC_ALL1REG_MODE
193 (match_operand:VEC_ALL1REG_MODE 1 "general_operand"
3d6275e3 194 "vA, B, v,vA, m, v")
2b99bed8 195 (match_operand:VEC_ALL1REG_MODE 3 "gcn_alu_or_unspec_operand"
3d6275e3
AS
196 "U0,U0,vA,vA,U0,U0")
197 (match_operand:DI 2 "register_operand" " e, e,cV,Sv, e, e")))
198 (clobber (match_scratch:V64DI 4 "=X, X, X, X,&v,&v"))]
199 "!MEM_P (operands[0]) || REG_P (operands[1])"
200 "@
201 v_mov_b32\t%0, %1
202 v_mov_b32\t%0, %1
203 v_cndmask_b32\t%0, %3, %1, vcc
204 v_cndmask_b32\t%0, %3, %1, %2
205 #
206 #"
207 [(set_attr "type" "vop1,vop1,vop2,vop3a,*,*")
208 (set_attr "length" "4,8,4,8,16,16")])
209
210; This variant does not accept an unspec, but does permit MEM
211; read/modify/write which is necessary for maskstore.
212
213;(define_insn "*mov<mode>_exec_match"
2b99bed8
AS
214; [(set (match_operand:VEC_ALL1REG_MODE 0 "nonimmediate_operand"
215; "=v,v, v, m")
216; (vec_merge:VEC_ALL1REG_MODE
217; (match_operand:VEC_ALL1REG_MODE 1 "general_operand" "vA,B, m, v")
3d6275e3
AS
218; (match_dup 0)
219; (match_operand:DI 2 "gcn_exec_reg_operand" " e,e, e, e")))
220; (clobber (match_scratch:V64DI 3 "=X,X,&v,&v"))]
221; "!MEM_P (operands[0]) || REG_P (operands[1])"
222; "@
223; v_mov_b32\t%0, %1
224; v_mov_b32\t%0, %1
225; #
226; #"
227; [(set_attr "type" "vop1,vop1,*,*")
228; (set_attr "length" "4,8,16,16")])
229
230(define_insn "*mov<mode>"
231 [(set (match_operand:VEC_2REG_MODE 0 "nonimmediate_operand" "=v")
232 (match_operand:VEC_2REG_MODE 1 "general_operand" "vDB"))]
233 ""
234 {
235 if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1]))
236 return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1";
237 else
238 return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1";
239 }
240 [(set_attr "type" "vmult")
241 (set_attr "length" "16")])
242
243(define_insn "mov<mode>_exec"
244 [(set (match_operand:VEC_2REG_MODE 0 "nonimmediate_operand"
245 "= v, v, v, v, m")
246 (vec_merge:VEC_2REG_MODE
247 (match_operand:VEC_2REG_MODE 1 "general_operand"
248 "vDB, v0, v0, m, v")
249 (match_operand:VEC_2REG_MODE 3 "gcn_alu_or_unspec_operand"
250 " U0,vDA0,vDA0,U0,U0")
251 (match_operand:DI 2 "register_operand" " e, cV, Sv, e, e")))
252 (clobber (match_scratch:V64DI 4 "= X, X, X,&v,&v"))]
253 "!MEM_P (operands[0]) || REG_P (operands[1])"
254 {
255 if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1]))
256 switch (which_alternative)
257 {
258 case 0:
259 return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1";
260 case 1:
261 return "v_cndmask_b32\t%L0, %L3, %L1, vcc\;"
262 "v_cndmask_b32\t%H0, %H3, %H1, vcc";
263 case 2:
264 return "v_cndmask_b32\t%L0, %L3, %L1, %2\;"
265 "v_cndmask_b32\t%H0, %H3, %H1, %2";
266 }
267 else
268 switch (which_alternative)
269 {
270 case 0:
271 return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1";
272 case 1:
273 return "v_cndmask_b32\t%H0, %H3, %H1, vcc\;"
274 "v_cndmask_b32\t%L0, %L3, %L1, vcc";
275 case 2:
276 return "v_cndmask_b32\t%H0, %H3, %H1, %2\;"
277 "v_cndmask_b32\t%L0, %L3, %L1, %2";
278 }
279
280 return "#";
281 }
282 [(set_attr "type" "vmult,vmult,vmult,*,*")
283 (set_attr "length" "16,16,16,16,16")])
284
285; This variant does not accept an unspec, but does permit MEM
286; read/modify/write which is necessary for maskstore.
287
288;(define_insn "*mov<mode>_exec_match"
289; [(set (match_operand:VEC_2REG_MODE 0 "nonimmediate_operand" "=v, v, m")
290; (vec_merge:VEC_2REG_MODE
291; (match_operand:VEC_2REG_MODE 1 "general_operand" "vDB, m, v")
292; (match_dup 0)
293; (match_operand:DI 2 "gcn_exec_reg_operand" " e, e, e")))
294; (clobber (match_scratch:V64DI 3 "=X,&v,&v"))]
295; "!MEM_P (operands[0]) || REG_P (operands[1])"
296; "@
297; * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \
298; return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \
299; else \
300; return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\";
301; #
302; #"
303; [(set_attr "type" "vmult,*,*")
304; (set_attr "length" "16,16,16")])
305
306; A SGPR-base load looks like:
307; <load> v, Sv
308;
309; There's no hardware instruction that corresponds to this, but vector base
310; addresses are placed in an SGPR because it is easier to add to a vector.
311; We also have a temporary vT, and the vector v1 holding numbered lanes.
312;
313; Rewrite as:
314; vT = v1 << log2(element-size)
315; vT += Sv
316; flat_load v, vT
317
318(define_insn "mov<mode>_sgprbase"
2b99bed8
AS
319 [(set (match_operand:VEC_ALL1REG_MODE 0 "nonimmediate_operand"
320 "= v, v, v, m")
321 (unspec:VEC_ALL1REG_MODE
322 [(match_operand:VEC_ALL1REG_MODE 1 "general_operand"
323 " vA,vB, m, v")]
3d6275e3
AS
324 UNSPEC_SGPRBASE))
325 (clobber (match_operand:V64DI 2 "register_operand" "=&v,&v,&v,&v"))]
326 "lra_in_progress || reload_completed"
327 "@
328 v_mov_b32\t%0, %1
329 v_mov_b32\t%0, %1
330 #
331 #"
332 [(set_attr "type" "vop1,vop1,*,*")
333 (set_attr "length" "4,8,12,12")])
334
335(define_insn "mov<mode>_sgprbase"
336 [(set (match_operand:VEC_2REG_MODE 0 "nonimmediate_operand" "= v, v, m")
337 (unspec:VEC_2REG_MODE
338 [(match_operand:VEC_2REG_MODE 1 "general_operand" "vDB, m, v")]
339 UNSPEC_SGPRBASE))
340 (clobber (match_operand:V64DI 2 "register_operand" "=&v,&v,&v"))]
341 "lra_in_progress || reload_completed"
342 "@
343 * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \
344 return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \
345 else \
346 return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\";
347 #
348 #"
349 [(set_attr "type" "vmult,*,*")
350 (set_attr "length" "8,12,12")])
351
352; reload_in was once a standard name, but here it's only referenced by
353; gcn_secondary_reload. It allows a reload with a scratch register.
354
355(define_expand "reload_in<mode>"
2b99bed8
AS
356 [(set (match_operand:VEC_ALLREG_MODE 0 "register_operand" "= v")
357 (match_operand:VEC_ALLREG_MODE 1 "memory_operand" " m"))
3d6275e3
AS
358 (clobber (match_operand:V64DI 2 "register_operand" "=&v"))]
359 ""
360 {
361 emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], operands[2]));
362 DONE;
363 })
364
365; reload_out is similar to reload_in, above.
366
367(define_expand "reload_out<mode>"
2b99bed8
AS
368 [(set (match_operand:VEC_ALLREG_MODE 0 "memory_operand" "= m")
369 (match_operand:VEC_ALLREG_MODE 1 "register_operand" " v"))
3d6275e3
AS
370 (clobber (match_operand:V64DI 2 "register_operand" "=&v"))]
371 ""
372 {
373 emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], operands[2]));
374 DONE;
375 })
376
377; Expand scalar addresses into gather/scatter patterns
378
379(define_split
2b99bed8
AS
380 [(set (match_operand:VEC_ALLREG_MODE 0 "memory_operand")
381 (unspec:VEC_ALLREG_MODE
382 [(match_operand:VEC_ALLREG_MODE 1 "general_operand")]
3d6275e3
AS
383 UNSPEC_SGPRBASE))
384 (clobber (match_scratch:V64DI 2))]
385 ""
386 [(set (mem:BLK (scratch))
387 (unspec:BLK [(match_dup 5) (match_dup 1) (match_dup 6) (match_dup 7)]
388 UNSPEC_SCATTER))]
389 {
390 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
391 operands[0],
392 operands[2]);
393 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
394 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
395 })
396
397(define_split
2b99bed8
AS
398 [(set (match_operand:VEC_ALLREG_MODE 0 "memory_operand")
399 (vec_merge:VEC_ALLREG_MODE
400 (match_operand:VEC_ALLREG_MODE 1 "general_operand")
401 (match_operand:VEC_ALLREG_MODE 2 "")
3d6275e3
AS
402 (match_operand:DI 3 "gcn_exec_reg_operand")))
403 (clobber (match_scratch:V64DI 4))]
404 ""
405 [(set (mem:BLK (scratch))
406 (unspec:BLK [(match_dup 5) (match_dup 1)
407 (match_dup 6) (match_dup 7) (match_dup 3)]
408 UNSPEC_SCATTER))]
409 {
410 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode,
411 operands[3],
412 operands[0],
413 operands[4]);
414 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
415 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
416 })
417
418(define_split
2b99bed8
AS
419 [(set (match_operand:VEC_ALLREG_MODE 0 "nonimmediate_operand")
420 (unspec:VEC_ALLREG_MODE
421 [(match_operand:VEC_ALLREG_MODE 1 "memory_operand")]
3d6275e3
AS
422 UNSPEC_SGPRBASE))
423 (clobber (match_scratch:V64DI 2))]
424 ""
425 [(set (match_dup 0)
2b99bed8 426 (unspec:VEC_ALLREG_MODE [(match_dup 5) (match_dup 6) (match_dup 7)
3d6275e3
AS
427 (mem:BLK (scratch))]
428 UNSPEC_GATHER))]
429 {
430 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
431 operands[1],
432 operands[2]);
433 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
434 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
435 })
436
437(define_split
2b99bed8
AS
438 [(set (match_operand:VEC_ALLREG_MODE 0 "nonimmediate_operand")
439 (vec_merge:VEC_ALLREG_MODE
440 (match_operand:VEC_ALLREG_MODE 1 "memory_operand")
441 (match_operand:VEC_ALLREG_MODE 2 "")
3d6275e3
AS
442 (match_operand:DI 3 "gcn_exec_reg_operand")))
443 (clobber (match_scratch:V64DI 4))]
444 ""
445 [(set (match_dup 0)
2b99bed8
AS
446 (vec_merge:VEC_ALLREG_MODE
447 (unspec:VEC_ALLREG_MODE [(match_dup 5) (match_dup 6) (match_dup 7)
448 (mem:BLK (scratch))]
449 UNSPEC_GATHER)
3d6275e3
AS
450 (match_dup 2)
451 (match_dup 3)))]
452 {
453 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode,
454 operands[3],
455 operands[1],
456 operands[4]);
457 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
458 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
459 })
460
461; TODO: Add zero/sign extending variants.
462
463;; }}}
464;; {{{ Lane moves
465
466; v_writelane and v_readlane work regardless of exec flags.
467; We allow source to be scratch.
468;
469; FIXME these should take A immediates
470
471(define_insn "*vec_set<mode>"
2b99bed8
AS
472 [(set (match_operand:VEC_ALL1REG_MODE 0 "register_operand" "= v")
473 (vec_merge:VEC_ALL1REG_MODE
474 (vec_duplicate:VEC_ALL1REG_MODE
3d6275e3 475 (match_operand:<SCALAR_MODE> 1 "register_operand" " Sv"))
2b99bed8 476 (match_operand:VEC_ALL1REG_MODE 3 "gcn_register_or_unspec_operand"
3d6275e3
AS
477 " U0")
478 (ashift (const_int 1)
479 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
480 ""
481 "v_writelane_b32 %0, %1, %2"
482 [(set_attr "type" "vop3a")
483 (set_attr "length" "8")
484 (set_attr "exec" "none")
485 (set_attr "laneselect" "yes")])
486
487; FIXME: 64bit operations really should be splitters, but I am not sure how
488; to represent vertical subregs.
489(define_insn "*vec_set<mode>"
490 [(set (match_operand:VEC_2REG_MODE 0 "register_operand" "= v")
491 (vec_merge:VEC_2REG_MODE
492 (vec_duplicate:VEC_2REG_MODE
493 (match_operand:<SCALAR_MODE> 1 "register_operand" " Sv"))
494 (match_operand:VEC_2REG_MODE 3 "gcn_register_or_unspec_operand"
495 " U0")
496 (ashift (const_int 1)
497 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
498 ""
499 "v_writelane_b32 %L0, %L1, %2\;v_writelane_b32 %H0, %H1, %2"
500 [(set_attr "type" "vmult")
501 (set_attr "length" "16")
502 (set_attr "exec" "none")
503 (set_attr "laneselect" "yes")])
504
505(define_expand "vec_set<mode>"
2b99bed8
AS
506 [(set (match_operand:VEC_ALLREG_MODE 0 "register_operand")
507 (vec_merge:VEC_ALLREG_MODE
508 (vec_duplicate:VEC_ALLREG_MODE
3d6275e3
AS
509 (match_operand:<SCALAR_MODE> 1 "register_operand"))
510 (match_dup 0)
511 (ashift (const_int 1) (match_operand:SI 2 "gcn_alu_operand"))))]
512 "")
513
514(define_insn "*vec_set<mode>_1"
2b99bed8
AS
515 [(set (match_operand:VEC_ALL1REG_MODE 0 "register_operand" "=v")
516 (vec_merge:VEC_ALL1REG_MODE
517 (vec_duplicate:VEC_ALL1REG_MODE
3d6275e3 518 (match_operand:<SCALAR_MODE> 1 "register_operand" "Sv"))
2b99bed8 519 (match_operand:VEC_ALL1REG_MODE 3 "gcn_register_or_unspec_operand"
3d6275e3
AS
520 "U0")
521 (match_operand:SI 2 "const_int_operand" " i")))]
522 "((unsigned) exact_log2 (INTVAL (operands[2])) < 64)"
523 {
524 operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));
525 return "v_writelane_b32 %0, %1, %2";
526 }
527 [(set_attr "type" "vop3a")
528 (set_attr "length" "8")
529 (set_attr "exec" "none")
530 (set_attr "laneselect" "yes")])
531
532(define_insn "*vec_set<mode>_1"
533 [(set (match_operand:VEC_2REG_MODE 0 "register_operand" "=v")
534 (vec_merge:VEC_2REG_MODE
535 (vec_duplicate:VEC_2REG_MODE
536 (match_operand:<SCALAR_MODE> 1 "register_operand" "Sv"))
537 (match_operand:VEC_2REG_MODE 3 "gcn_register_or_unspec_operand"
538 "U0")
539 (match_operand:SI 2 "const_int_operand" " i")))]
540 "((unsigned) exact_log2 (INTVAL (operands[2])) < 64)"
541 {
542 operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));
543 return "v_writelane_b32 %L0, %L1, %2\;v_writelane_b32 %H0, %H1, %2";
544 }
545 [(set_attr "type" "vmult")
546 (set_attr "length" "16")
547 (set_attr "exec" "none")
548 (set_attr "laneselect" "yes")])
549
550(define_insn "vec_duplicate<mode><exec>"
2b99bed8
AS
551 [(set (match_operand:VEC_ALL1REG_MODE 0 "register_operand" "=v")
552 (vec_duplicate:VEC_ALL1REG_MODE
553 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand" "SvB")))]
3d6275e3
AS
554 ""
555 "v_mov_b32\t%0, %1"
556 [(set_attr "type" "vop3a")
557 (set_attr "length" "8")])
558
559(define_insn "vec_duplicate<mode><exec>"
560 [(set (match_operand:VEC_2REG_MODE 0 "register_operand" "= v")
561 (vec_duplicate:VEC_2REG_MODE
562 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand" "SvDB")))]
563 ""
564 "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1"
565 [(set_attr "type" "vop3a")
566 (set_attr "length" "16")])
567
568(define_insn "vec_extract<mode><scalar_mode>"
569 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg")
570 (vec_select:<SCALAR_MODE>
2b99bed8
AS
571 (match_operand:VEC_ALL1REG_MODE 1 "register_operand" " v")
572 (parallel [(match_operand:SI 2 "gcn_alu_operand" "SvB")])))]
3d6275e3
AS
573 ""
574 "v_readlane_b32 %0, %1, %2"
575 [(set_attr "type" "vop3a")
576 (set_attr "length" "8")
577 (set_attr "exec" "none")
578 (set_attr "laneselect" "yes")])
579
580(define_insn "vec_extract<mode><scalar_mode>"
581 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg")
582 (vec_select:<SCALAR_MODE>
583 (match_operand:VEC_2REG_MODE 1 "register_operand" " v")
584 (parallel [(match_operand:SI 2 "gcn_alu_operand" "SvB")])))]
585 ""
586 "v_readlane_b32 %L0, %L1, %2\;v_readlane_b32 %H0, %H1, %2"
587 [(set_attr "type" "vmult")
588 (set_attr "length" "16")
589 (set_attr "exec" "none")
590 (set_attr "laneselect" "yes")])
591
592(define_expand "vec_init<mode><scalar_mode>"
2b99bed8 593 [(match_operand:VEC_ALLREG_MODE 0 "register_operand")
3d6275e3
AS
594 (match_operand 1)]
595 ""
596 {
597 gcn_expand_vector_init (operands[0], operands[1]);
598 DONE;
599 })
600
601;; }}}
602;; {{{ Scatter / Gather
603
604;; GCN does not have an instruction for loading a vector from contiguous
605;; memory so *all* loads and stores are eventually converted to scatter
606;; or gather.
607;;
608;; GCC does not permit MEM to hold vectors of addresses, so we must use an
609;; unspec. The unspec formats are as follows:
610;;
611;; (unspec:V64??
612;; [(<address expression>)
613;; (<addr_space_t>)
614;; (<use_glc>)
615;; (mem:BLK (scratch))]
616;; UNSPEC_GATHER)
617;;
618;; (unspec:BLK
619;; [(<address expression>)
620;; (<source register>)
621;; (<addr_space_t>)
622;; (<use_glc>)
623;; (<exec>)]
624;; UNSPEC_SCATTER)
625;;
626;; - Loads are expected to be wrapped in a vec_merge, so do not need <exec>.
627;; - The mem:BLK does not contain any real information, but indicates that an
628;; unknown memory read is taking place. Stores are expected to use a similar
629;; mem:BLK outside the unspec.
630;; - The address space and glc (volatile) fields are there to replace the
631;; fields normally found in a MEM.
632;; - Multiple forms of address expression are supported, below.
633
634(define_expand "gather_load<mode>"
2b99bed8 635 [(match_operand:VEC_ALLREG_MODE 0 "register_operand")
3d6275e3
AS
636 (match_operand:DI 1 "register_operand")
637 (match_operand 2 "register_operand")
638 (match_operand 3 "immediate_operand")
639 (match_operand:SI 4 "gcn_alu_operand")]
640 ""
641 {
642 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1],
643 operands[2], operands[4],
644 INTVAL (operands[3]), NULL);
645
646 if (GET_MODE (addr) == V64DImode)
647 emit_insn (gen_gather<mode>_insn_1offset (operands[0], addr, const0_rtx,
648 const0_rtx, const0_rtx));
649 else
650 emit_insn (gen_gather<mode>_insn_2offsets (operands[0], operands[1],
651 addr, const0_rtx, const0_rtx,
652 const0_rtx));
653 DONE;
654 })
655
656(define_expand "gather<mode>_exec"
2b99bed8 657 [(match_operand:VEC_ALLREG_MODE 0 "register_operand")
3d6275e3
AS
658 (match_operand:DI 1 "register_operand")
659 (match_operand:V64SI 2 "register_operand")
660 (match_operand 3 "immediate_operand")
661 (match_operand:SI 4 "gcn_alu_operand")
662 (match_operand:DI 5 "gcn_exec_reg_operand")]
663 ""
664 {
665 rtx undefmode = gcn_gen_undef (<MODE>mode);
666
667 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1],
668 operands[2], operands[4],
669 INTVAL (operands[3]), operands[5]);
670
671 if (GET_MODE (addr) == V64DImode)
672 emit_insn (gen_gather<mode>_insn_1offset_exec (operands[0], addr,
673 const0_rtx, const0_rtx,
674 const0_rtx, undefmode,
675 operands[5]));
676 else
677 emit_insn (gen_gather<mode>_insn_2offsets_exec (operands[0], operands[1],
678 addr, const0_rtx,
679 const0_rtx, const0_rtx,
680 undefmode, operands[5]));
681 DONE;
682 })
683
684; Allow any address expression
685(define_expand "gather<mode>_expr<exec>"
2b99bed8
AS
686 [(set (match_operand:VEC_ALLREG_MODE 0 "register_operand")
687 (unspec:VEC_ALLREG_MODE
3d6275e3
AS
688 [(match_operand 1 "")
689 (match_operand 2 "immediate_operand")
690 (match_operand 3 "immediate_operand")
691 (mem:BLK (scratch))]
692 UNSPEC_GATHER))]
693 ""
694 {})
695
696(define_insn "gather<mode>_insn_1offset<exec>"
2b99bed8
AS
697 [(set (match_operand:VEC_ALLREG_MODE 0 "register_operand" "=v")
698 (unspec:VEC_ALLREG_MODE
3d6275e3
AS
699 [(plus:V64DI (match_operand:V64DI 1 "register_operand" " v")
700 (vec_duplicate:V64DI
701 (match_operand 2 "immediate_operand" " n")))
702 (match_operand 3 "immediate_operand" " n")
703 (match_operand 4 "immediate_operand" " n")
704 (mem:BLK (scratch))]
705 UNSPEC_GATHER))]
706 "(AS_FLAT_P (INTVAL (operands[3]))
707 && ((TARGET_GCN3 && INTVAL(operands[2]) == 0)
708 || ((unsigned HOST_WIDE_INT)INTVAL(operands[2]) < 0x1000)))
709 || (AS_GLOBAL_P (INTVAL (operands[3]))
710 && (((unsigned HOST_WIDE_INT)INTVAL(operands[2]) + 0x1000) < 0x2000))"
711 {
712 addr_space_t as = INTVAL (operands[3]);
713 const char *glc = INTVAL (operands[4]) ? " glc" : "";
714
715 static char buf[200];
716 if (AS_FLAT_P (as))
717 {
718 if (TARGET_GCN5_PLUS)
719 sprintf (buf, "flat_load%%s0\t%%0, %%1 offset:%%2%s\;s_waitcnt\t0",
720 glc);
721 else
722 sprintf (buf, "flat_load%%s0\t%%0, %%1%s\;s_waitcnt\t0", glc);
723 }
724 else if (AS_GLOBAL_P (as))
725 sprintf (buf, "global_load%%s0\t%%0, %%1, off offset:%%2%s\;"
726 "s_waitcnt\tvmcnt(0)", glc);
727 else
728 gcc_unreachable ();
729
730 return buf;
731 }
732 [(set_attr "type" "flat")
733 (set_attr "length" "12")])
734
735(define_insn "gather<mode>_insn_1offset_ds<exec>"
2b99bed8
AS
736 [(set (match_operand:VEC_ALLREG_MODE 0 "register_operand" "=v")
737 (unspec:VEC_ALLREG_MODE
3d6275e3
AS
738 [(plus:V64SI (match_operand:V64SI 1 "register_operand" " v")
739 (vec_duplicate:V64SI
740 (match_operand 2 "immediate_operand" " n")))
741 (match_operand 3 "immediate_operand" " n")
742 (match_operand 4 "immediate_operand" " n")
743 (mem:BLK (scratch))]
744 UNSPEC_GATHER))]
745 "(AS_ANY_DS_P (INTVAL (operands[3]))
746 && ((unsigned HOST_WIDE_INT)INTVAL(operands[2]) < 0x10000))"
747 {
748 addr_space_t as = INTVAL (operands[3]);
749 static char buf[200];
750 sprintf (buf, "ds_read%%b0\t%%0, %%1 offset:%%2%s\;s_waitcnt\tlgkmcnt(0)",
751 (AS_GDS_P (as) ? " gds" : ""));
752 return buf;
753 }
754 [(set_attr "type" "ds")
755 (set_attr "length" "12")])
756
757(define_insn "gather<mode>_insn_2offsets<exec>"
2b99bed8
AS
758 [(set (match_operand:VEC_ALLREG_MODE 0 "register_operand" "=v")
759 (unspec:VEC_ALLREG_MODE
3d6275e3
AS
760 [(plus:V64DI
761 (plus:V64DI
762 (vec_duplicate:V64DI
763 (match_operand:DI 1 "register_operand" "Sv"))
764 (sign_extend:V64DI
765 (match_operand:V64SI 2 "register_operand" " v")))
766 (vec_duplicate:V64DI (match_operand 3 "immediate_operand" " n")))
767 (match_operand 4 "immediate_operand" " n")
768 (match_operand 5 "immediate_operand" " n")
769 (mem:BLK (scratch))]
770 UNSPEC_GATHER))]
771 "(AS_GLOBAL_P (INTVAL (operands[4]))
772 && (((unsigned HOST_WIDE_INT)INTVAL(operands[3]) + 0x1000) < 0x2000))"
773 {
774 addr_space_t as = INTVAL (operands[4]);
775 const char *glc = INTVAL (operands[5]) ? " glc" : "";
776
777 static char buf[200];
778 if (AS_GLOBAL_P (as))
779 {
780 /* Work around assembler bug in which a 64-bit register is expected,
781 but a 32-bit value would be correct. */
782 int reg = REGNO (operands[2]) - FIRST_VGPR_REG;
783 sprintf (buf, "global_load%%s0\t%%0, v[%d:%d], %%1 offset:%%3%s\;"
784 "s_waitcnt\tvmcnt(0)", reg, reg + 1, glc);
785 }
786 else
787 gcc_unreachable ();
788
789 return buf;
790 }
791 [(set_attr "type" "flat")
792 (set_attr "length" "12")])
793
794(define_expand "scatter_store<mode>"
795 [(match_operand:DI 0 "register_operand")
796 (match_operand 1 "register_operand")
797 (match_operand 2 "immediate_operand")
798 (match_operand:SI 3 "gcn_alu_operand")
2b99bed8 799 (match_operand:VEC_ALLREG_MODE 4 "register_operand")]
3d6275e3
AS
800 ""
801 {
802 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0],
803 operands[1], operands[3],
804 INTVAL (operands[2]), NULL);
805
806 if (GET_MODE (addr) == V64DImode)
807 emit_insn (gen_scatter<mode>_insn_1offset (addr, const0_rtx, operands[4],
808 const0_rtx, const0_rtx));
809 else
810 emit_insn (gen_scatter<mode>_insn_2offsets (operands[0], addr,
811 const0_rtx, operands[4],
812 const0_rtx, const0_rtx));
813 DONE;
814 })
815
816(define_expand "scatter<mode>_exec"
817 [(match_operand:DI 0 "register_operand")
818 (match_operand 1 "register_operand")
819 (match_operand 2 "immediate_operand")
820 (match_operand:SI 3 "gcn_alu_operand")
2b99bed8 821 (match_operand:VEC_ALLREG_MODE 4 "register_operand")
3d6275e3
AS
822 (match_operand:DI 5 "gcn_exec_reg_operand")]
823 ""
824 {
825 operands[5] = force_reg (DImode, operands[5]);
826
827 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0],
828 operands[1], operands[3],
829 INTVAL (operands[2]), operands[5]);
830
831 if (GET_MODE (addr) == V64DImode)
832 emit_insn (gen_scatter<mode>_insn_1offset_exec (addr, const0_rtx,
833 operands[4], const0_rtx,
834 const0_rtx,
835 operands[5]));
836 else
837 emit_insn (gen_scatter<mode>_insn_2offsets_exec (operands[0], addr,
838 const0_rtx, operands[4],
839 const0_rtx, const0_rtx,
840 operands[5]));
841 DONE;
842 })
843
844; Allow any address expression
845(define_expand "scatter<mode>_expr<exec_scatter>"
846 [(set (mem:BLK (scratch))
847 (unspec:BLK
848 [(match_operand:V64DI 0 "")
2b99bed8 849 (match_operand:VEC_ALLREG_MODE 1 "register_operand")
3d6275e3
AS
850 (match_operand 2 "immediate_operand")
851 (match_operand 3 "immediate_operand")]
852 UNSPEC_SCATTER))]
853 ""
854 {})
855
856(define_insn "scatter<mode>_insn_1offset<exec_scatter>"
857 [(set (mem:BLK (scratch))
858 (unspec:BLK
859 [(plus:V64DI (match_operand:V64DI 0 "register_operand" "v")
860 (vec_duplicate:V64DI
861 (match_operand 1 "immediate_operand" "n")))
2b99bed8 862 (match_operand:VEC_ALLREG_MODE 2 "register_operand" "v")
3d6275e3
AS
863 (match_operand 3 "immediate_operand" "n")
864 (match_operand 4 "immediate_operand" "n")]
865 UNSPEC_SCATTER))]
866 "(AS_FLAT_P (INTVAL (operands[3]))
867 && (INTVAL(operands[1]) == 0
868 || (TARGET_GCN5_PLUS
869 && (unsigned HOST_WIDE_INT)INTVAL(operands[1]) < 0x1000)))
870 || (AS_GLOBAL_P (INTVAL (operands[3]))
871 && (((unsigned HOST_WIDE_INT)INTVAL(operands[1]) + 0x1000) < 0x2000))"
872 {
873 addr_space_t as = INTVAL (operands[3]);
874 const char *glc = INTVAL (operands[4]) ? " glc" : "";
875
876 static char buf[200];
877 if (AS_FLAT_P (as))
878 {
879 if (TARGET_GCN5_PLUS)
930c5599 880 sprintf (buf, "flat_store%%s2\t%%0, %%2 offset:%%1%s", glc);
3d6275e3 881 else
930c5599 882 sprintf (buf, "flat_store%%s2\t%%0, %%2%s", glc);
3d6275e3
AS
883 }
884 else if (AS_GLOBAL_P (as))
930c5599 885 sprintf (buf, "global_store%%s2\t%%0, %%2, off offset:%%1%s", glc);
3d6275e3
AS
886 else
887 gcc_unreachable ();
888
889 return buf;
890 }
891 [(set_attr "type" "flat")
892 (set_attr "length" "12")])
893
894(define_insn "scatter<mode>_insn_1offset_ds<exec_scatter>"
895 [(set (mem:BLK (scratch))
896 (unspec:BLK
897 [(plus:V64SI (match_operand:V64SI 0 "register_operand" "v")
898 (vec_duplicate:V64SI
899 (match_operand 1 "immediate_operand" "n")))
2b99bed8 900 (match_operand:VEC_ALLREG_MODE 2 "register_operand" "v")
3d6275e3
AS
901 (match_operand 3 "immediate_operand" "n")
902 (match_operand 4 "immediate_operand" "n")]
903 UNSPEC_SCATTER))]
904 "(AS_ANY_DS_P (INTVAL (operands[3]))
905 && ((unsigned HOST_WIDE_INT)INTVAL(operands[1]) < 0x10000))"
906 {
907 addr_space_t as = INTVAL (operands[3]);
908 static char buf[200];
930c5599 909 sprintf (buf, "ds_write%%b2\t%%0, %%2 offset:%%1%s",
3d6275e3
AS
910 (AS_GDS_P (as) ? " gds" : ""));
911 return buf;
912 }
913 [(set_attr "type" "ds")
914 (set_attr "length" "12")])
915
916(define_insn "scatter<mode>_insn_2offsets<exec_scatter>"
917 [(set (mem:BLK (scratch))
918 (unspec:BLK
919 [(plus:V64DI
920 (plus:V64DI
921 (vec_duplicate:V64DI
922 (match_operand:DI 0 "register_operand" "Sv"))
923 (sign_extend:V64DI
924 (match_operand:V64SI 1 "register_operand" " v")))
925 (vec_duplicate:V64DI (match_operand 2 "immediate_operand"
926 " n")))
2b99bed8 927 (match_operand:VEC_ALLREG_MODE 3 "register_operand" " v")
3d6275e3
AS
928 (match_operand 4 "immediate_operand" " n")
929 (match_operand 5 "immediate_operand" " n")]
930 UNSPEC_SCATTER))]
931 "(AS_GLOBAL_P (INTVAL (operands[4]))
932 && (((unsigned HOST_WIDE_INT)INTVAL(operands[2]) + 0x1000) < 0x2000))"
933 {
934 addr_space_t as = INTVAL (operands[4]);
935 const char *glc = INTVAL (operands[5]) ? " glc" : "";
936
937 static char buf[200];
938 if (AS_GLOBAL_P (as))
939 {
940 /* Work around assembler bug in which a 64-bit register is expected,
941 but a 32-bit value would be correct. */
942 int reg = REGNO (operands[1]) - FIRST_VGPR_REG;
930c5599
AS
943 sprintf (buf, "global_store%%s3\tv[%d:%d], %%3, %%0 offset:%%2%s",
944 reg, reg + 1, glc);
3d6275e3
AS
945 }
946 else
947 gcc_unreachable ();
948
949 return buf;
950 }
951 [(set_attr "type" "flat")
952 (set_attr "length" "12")])
953
954;; }}}
955;; {{{ Permutations
956
957(define_insn "ds_bpermute<mode>"
2b99bed8
AS
958 [(set (match_operand:VEC_ALL1REG_MODE 0 "register_operand" "=v")
959 (unspec:VEC_ALL1REG_MODE
960 [(match_operand:VEC_ALL1REG_MODE 2 "register_operand" " v")
961 (match_operand:V64SI 1 "register_operand" " v")
962 (match_operand:DI 3 "gcn_exec_reg_operand" " e")]
3d6275e3
AS
963 UNSPEC_BPERMUTE))]
964 ""
965 "ds_bpermute_b32\t%0, %1, %2\;s_waitcnt\tlgkmcnt(0)"
966 [(set_attr "type" "vop2")
967 (set_attr "length" "12")])
968
969(define_insn_and_split "ds_bpermute<mode>"
970 [(set (match_operand:VEC_2REG_MODE 0 "register_operand" "=&v")
971 (unspec:VEC_2REG_MODE
972 [(match_operand:VEC_2REG_MODE 2 "register_operand" " v0")
973 (match_operand:V64SI 1 "register_operand" " v")
974 (match_operand:DI 3 "gcn_exec_reg_operand" " e")]
975 UNSPEC_BPERMUTE))]
976 ""
977 "#"
978 "reload_completed"
979 [(set (match_dup 4) (unspec:V64SI [(match_dup 6) (match_dup 1) (match_dup 3)]
980 UNSPEC_BPERMUTE))
981 (set (match_dup 5) (unspec:V64SI [(match_dup 7) (match_dup 1) (match_dup 3)]
982 UNSPEC_BPERMUTE))]
983 {
984 operands[4] = gcn_operand_part (<MODE>mode, operands[0], 0);
985 operands[5] = gcn_operand_part (<MODE>mode, operands[0], 1);
986 operands[6] = gcn_operand_part (<MODE>mode, operands[2], 0);
987 operands[7] = gcn_operand_part (<MODE>mode, operands[2], 1);
988 }
989 [(set_attr "type" "vmult")
990 (set_attr "length" "24")])
991
992;; }}}
993;; {{{ ALU special case: add/sub
994
995(define_insn "addv64si3<exec_clobber>"
996 [(set (match_operand:V64SI 0 "register_operand" "= v")
997 (plus:V64SI
998 (match_operand:V64SI 1 "register_operand" "% v")
999 (match_operand:V64SI 2 "gcn_alu_operand" "vSvB")))
1000 (clobber (reg:DI VCC_REG))]
1001 ""
1002 "v_add%^_u32\t%0, vcc, %2, %1"
1003 [(set_attr "type" "vop2")
1004 (set_attr "length" "8")])
1005
1006(define_insn "addv64si3_dup<exec_clobber>"
1007 [(set (match_operand:V64SI 0 "register_operand" "= v")
1008 (plus:V64SI
1009 (vec_duplicate:V64SI
1010 (match_operand:SI 2 "gcn_alu_operand" "SvB"))
1011 (match_operand:V64SI 1 "register_operand" " v")))
1012 (clobber (reg:DI VCC_REG))]
1013 ""
1014 "v_add%^_u32\t%0, vcc, %2, %1"
1015 [(set_attr "type" "vop2")
1016 (set_attr "length" "8")])
1017
1018(define_insn "addv64si3_vcc<exec_vcc>"
1019 [(set (match_operand:V64SI 0 "register_operand" "= v, v")
1020 (plus:V64SI
1021 (match_operand:V64SI 1 "register_operand" "% v, v")
1022 (match_operand:V64SI 2 "gcn_alu_operand" "vSvB,vSvB")))
1023 (set (match_operand:DI 3 "register_operand" "= cV, Sg")
1024 (ltu:DI (plus:V64SI (match_dup 1) (match_dup 2))
1025 (match_dup 1)))]
1026 ""
1027 "v_add%^_u32\t%0, %3, %2, %1"
1028 [(set_attr "type" "vop2,vop3b")
1029 (set_attr "length" "8")])
1030
1031; This pattern only changes the VCC bits when the corresponding lane is
1032; enabled, so the set must be described as an ior.
1033
1034(define_insn "addv64si3_vcc_dup<exec_vcc>"
1035 [(set (match_operand:V64SI 0 "register_operand" "= v, v")
1036 (plus:V64SI
1037 (vec_duplicate:V64SI
1038 (match_operand:SI 1 "gcn_alu_operand" "SvB,SvB"))
1039 (match_operand:V64SI 2 "register_operand" " v, v")))
1040 (set (match_operand:DI 3 "register_operand" "=cV, Sg")
1041 (ltu:DI (plus:V64SI (vec_duplicate:V64SI (match_dup 2))
1042 (match_dup 1))
1043 (vec_duplicate:V64SI (match_dup 2))))]
1044 ""
1045 "v_add%^_u32\t%0, %3, %2, %1"
1046 [(set_attr "type" "vop2,vop3b")
1047 (set_attr "length" "8,8")])
1048
1049; This pattern does not accept SGPR because VCC read already counts as an
1050; SGPR use and number of SGPR operands is limited to 1.
1051
1052(define_insn "addcv64si3<exec_vcc>"
1053 [(set (match_operand:V64SI 0 "register_operand" "=v,v")
1054 (plus:V64SI
1055 (plus:V64SI
1056 (vec_merge:V64SI
1057 (vec_duplicate:V64SI (const_int 1))
1058 (vec_duplicate:V64SI (const_int 0))
1059 (match_operand:DI 3 "register_operand" " cV,Sv"))
1060 (match_operand:V64SI 1 "gcn_alu_operand" "%vA,vA"))
1061 (match_operand:V64SI 2 "gcn_alu_operand" " vB,vB")))
1062 (set (match_operand:DI 4 "register_operand" "=cV,Sg")
1063 (ior:DI (ltu:DI (plus:V64SI
1064 (plus:V64SI
1065 (vec_merge:V64SI
1066 (vec_duplicate:V64SI (const_int 1))
1067 (vec_duplicate:V64SI (const_int 0))
1068 (match_dup 3))
1069 (match_dup 1))
1070 (match_dup 2))
1071 (match_dup 2))
1072 (ltu:DI (plus:V64SI
1073 (vec_merge:V64SI
1074 (vec_duplicate:V64SI (const_int 1))
1075 (vec_duplicate:V64SI (const_int 0))
1076 (match_dup 3))
1077 (match_dup 1))
1078 (match_dup 1))))]
1079 ""
1080 "v_addc%^_u32\t%0, %4, %1, %2, %3"
1081 [(set_attr "type" "vop2,vop3b")
1082 (set_attr "length" "4,8")])
1083
1084(define_insn "addcv64si3_dup<exec_vcc>"
1085 [(set (match_operand:V64SI 0 "register_operand" "=v,v")
1086 (plus:V64SI
1087 (plus:V64SI
1088 (vec_merge:V64SI
1089 (vec_duplicate:V64SI (const_int 1))
1090 (vec_duplicate:V64SI (const_int 0))
1091 (match_operand:DI 3 "register_operand" " cV, Sv"))
1092 (match_operand:V64SI 1 "gcn_alu_operand" "%vA, vA"))
1093 (vec_duplicate:V64SI
1094 (match_operand:SI 2 "gcn_alu_operand" "SvB,SvB"))))
1095 (set (match_operand:DI 4 "register_operand" "=cV, Sg")
1096 (ior:DI (ltu:DI (plus:V64SI (plus:V64SI
1097 (vec_merge:V64SI
1098 (vec_duplicate:V64SI (const_int 1))
1099 (vec_duplicate:V64SI (const_int 0))
1100 (match_dup 3))
1101 (match_dup 1))
1102 (vec_duplicate:V64SI
1103 (match_dup 2)))
1104 (vec_duplicate:V64SI
1105 (match_dup 2)))
1106 (ltu:DI (plus:V64SI (vec_merge:V64SI
1107 (vec_duplicate:V64SI (const_int 1))
1108 (vec_duplicate:V64SI (const_int 0))
1109 (match_dup 3))
1110 (match_dup 1))
1111 (match_dup 1))))]
1112 ""
1113 "v_addc%^_u32\t%0, %4, %1, %2, %3"
1114 [(set_attr "type" "vop2,vop3b")
1115 (set_attr "length" "4,8")])
1116
1117(define_insn "subv64si3<exec_clobber>"
1118 [(set (match_operand:V64SI 0 "register_operand" "= v, v")
1119 (minus:V64SI
1120 (match_operand:V64SI 1 "gcn_alu_operand" "vSvB, v")
1121 (match_operand:V64SI 2 "gcn_alu_operand" " v,vSvB")))
1122 (clobber (reg:DI VCC_REG))]
1123 ""
1124 "@
1125 v_sub%^_u32\t%0, vcc, %1, %2
1126 v_subrev%^_u32\t%0, vcc, %2, %1"
1127 [(set_attr "type" "vop2")
1128 (set_attr "length" "8,8")])
1129
1130(define_insn "subv64si3_vcc<exec_vcc>"
1131 [(set (match_operand:V64SI 0 "register_operand" "= v, v, v, v")
1132 (minus:V64SI
1133 (match_operand:V64SI 1 "gcn_alu_operand" "vSvB,vSvB, v, v")
1134 (match_operand:V64SI 2 "gcn_alu_operand" " v, v,vSvB,vSvB")))
1135 (set (match_operand:DI 3 "register_operand" "= cV, Sg, cV, Sg")
1136 (gtu:DI (minus:V64SI (match_dup 1) (match_dup 2))
1137 (match_dup 1)))]
1138 ""
1139 "@
1140 v_sub%^_u32\t%0, %3, %1, %2
1141 v_sub%^_u32\t%0, %3, %1, %2
1142 v_subrev%^_u32\t%0, %3, %2, %1
1143 v_subrev%^_u32\t%0, %3, %2, %1"
1144 [(set_attr "type" "vop2,vop3b,vop2,vop3b")
1145 (set_attr "length" "8")])
1146
1147; This pattern does not accept SGPR because VCC read already counts
1148; as a SGPR use and number of SGPR operands is limited to 1.
1149
1150(define_insn "subcv64si3<exec_vcc>"
1151 [(set (match_operand:V64SI 0 "register_operand" "= v, v, v, v")
1152 (minus:V64SI
1153 (minus:V64SI
1154 (vec_merge:V64SI
1155 (vec_duplicate:V64SI (const_int 1))
1156 (vec_duplicate:V64SI (const_int 0))
1157 (match_operand:DI 3 "gcn_alu_operand" " cV,Sv,cV,Sv"))
1158 (match_operand:V64SI 1 "gcn_alu_operand" " vA,vA,vB,vB"))
1159 (match_operand:V64SI 2 "gcn_alu_operand" " vB,vB,vA,vA")))
1160 (set (match_operand:DI 4 "register_operand" "=cV,Sg,cV,Sg")
1161 (ior:DI (gtu:DI (minus:V64SI (minus:V64SI
1162 (vec_merge:V64SI
1163 (vec_duplicate:V64SI (const_int 1))
1164 (vec_duplicate:V64SI (const_int 0))
1165 (match_dup 3))
1166 (match_dup 1))
1167 (match_dup 2))
1168 (match_dup 2))
1169 (ltu:DI (minus:V64SI (vec_merge:V64SI
1170 (vec_duplicate:V64SI (const_int 1))
1171 (vec_duplicate:V64SI (const_int 0))
1172 (match_dup 3))
1173 (match_dup 1))
1174 (match_dup 1))))]
1175 ""
1176 "@
1177 v_subb%^_u32\t%0, %4, %1, %2, %3
1178 v_subb%^_u32\t%0, %4, %1, %2, %3
1179 v_subbrev%^_u32\t%0, %4, %2, %1, %3
1180 v_subbrev%^_u32\t%0, %4, %2, %1, %3"
1181 [(set_attr "type" "vop2,vop3b,vop2,vop3b")
1182 (set_attr "length" "8")])
1183
1184(define_insn_and_split "addv64di3"
1185 [(set (match_operand:V64DI 0 "register_operand" "= &v")
1186 (plus:V64DI
1187 (match_operand:V64DI 1 "register_operand" "% v0")
1188 (match_operand:V64DI 2 "gcn_alu_operand" "vSvB0")))
1189 (clobber (reg:DI VCC_REG))]
1190 ""
1191 "#"
1192 "gcn_can_split_p (V64DImode, operands[0])
1193 && gcn_can_split_p (V64DImode, operands[1])
1194 && gcn_can_split_p (V64DImode, operands[2])"
1195 [(const_int 0)]
1196 {
1197 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1198 emit_insn (gen_addv64si3_vcc
1199 (gcn_operand_part (V64DImode, operands[0], 0),
1200 gcn_operand_part (V64DImode, operands[1], 0),
1201 gcn_operand_part (V64DImode, operands[2], 0),
1202 vcc));
1203 emit_insn (gen_addcv64si3
1204 (gcn_operand_part (V64DImode, operands[0], 1),
1205 gcn_operand_part (V64DImode, operands[1], 1),
1206 gcn_operand_part (V64DImode, operands[2], 1),
1207 vcc, vcc));
1208 DONE;
1209 }
1210 [(set_attr "type" "vmult")
1211 (set_attr "length" "8")])
1212
1213(define_insn_and_split "addv64di3_exec"
1214 [(set (match_operand:V64DI 0 "register_operand" "= &v")
1215 (vec_merge:V64DI
1216 (plus:V64DI
1217 (match_operand:V64DI 1 "register_operand" "% v0")
1218 (match_operand:V64DI 2 "gcn_alu_operand" "vSvB0"))
1219 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
1220 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1221 (clobber (reg:DI VCC_REG))]
1222 ""
1223 "#"
1224 "gcn_can_split_p (V64DImode, operands[0])
1225 && gcn_can_split_p (V64DImode, operands[1])
1226 && gcn_can_split_p (V64DImode, operands[2])
1227 && gcn_can_split_p (V64DImode, operands[4])"
1228 [(const_int 0)]
1229 {
1230 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1231 emit_insn (gen_addv64si3_vcc_exec
1232 (gcn_operand_part (V64DImode, operands[0], 0),
1233 gcn_operand_part (V64DImode, operands[1], 0),
1234 gcn_operand_part (V64DImode, operands[2], 0),
1235 vcc,
1236 gcn_operand_part (V64DImode, operands[3], 0),
1237 operands[4]));
1238 emit_insn (gen_addcv64si3_exec
1239 (gcn_operand_part (V64DImode, operands[0], 1),
1240 gcn_operand_part (V64DImode, operands[1], 1),
1241 gcn_operand_part (V64DImode, operands[2], 1),
1242 vcc, vcc,
1243 gcn_operand_part (V64DImode, operands[3], 1),
1244 operands[4]));
1245 DONE;
1246 }
1247 [(set_attr "type" "vmult")
1248 (set_attr "length" "8")])
1249
1250(define_insn_and_split "subv64di3"
1251 [(set (match_operand:V64DI 0 "register_operand" "= &v, &v")
1252 (minus:V64DI
1253 (match_operand:V64DI 1 "gcn_alu_operand" "vSvB0, v0")
1254 (match_operand:V64DI 2 "gcn_alu_operand" " v0,vSvB0")))
1255 (clobber (reg:DI VCC_REG))]
1256 ""
1257 "#"
1258 "gcn_can_split_p (V64DImode, operands[0])
1259 && gcn_can_split_p (V64DImode, operands[1])
1260 && gcn_can_split_p (V64DImode, operands[2])"
1261 [(const_int 0)]
1262 {
1263 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1264 emit_insn (gen_subv64si3_vcc
1265 (gcn_operand_part (V64DImode, operands[0], 0),
1266 gcn_operand_part (V64DImode, operands[1], 0),
1267 gcn_operand_part (V64DImode, operands[2], 0),
1268 vcc));
1269 emit_insn (gen_subcv64si3
1270 (gcn_operand_part (V64DImode, operands[0], 1),
1271 gcn_operand_part (V64DImode, operands[1], 1),
1272 gcn_operand_part (V64DImode, operands[2], 1),
1273 vcc, vcc));
1274 DONE;
1275 }
1276 [(set_attr "type" "vmult")
1277 (set_attr "length" "8,8")])
1278
1279(define_insn_and_split "subv64di3_exec"
1280 [(set (match_operand:V64DI 0 "register_operand" "= &v, &v")
1281 (vec_merge:V64DI
1282 (minus:V64DI
1283 (match_operand:V64DI 1 "gcn_alu_operand" "vSvB0, v0")
1284 (match_operand:V64DI 2 "gcn_alu_operand" " v0,vSvB0"))
1285 (match_operand:V64DI 3 "gcn_register_or_unspec_operand"
1286 " U0, U0")
1287 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))
1288 (clobber (reg:DI VCC_REG))]
1289 "register_operand (operands[1], VOIDmode)
1290 || register_operand (operands[2], VOIDmode)"
1291 "#"
1292 "gcn_can_split_p (V64DImode, operands[0])
1293 && gcn_can_split_p (V64DImode, operands[1])
1294 && gcn_can_split_p (V64DImode, operands[2])
1295 && gcn_can_split_p (V64DImode, operands[3])"
1296 [(const_int 0)]
1297 {
1298 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1299 emit_insn (gen_subv64si3_vcc_exec
1300 (gcn_operand_part (V64DImode, operands[0], 0),
1301 gcn_operand_part (V64DImode, operands[1], 0),
1302 gcn_operand_part (V64DImode, operands[2], 0),
1303 vcc,
1304 gcn_operand_part (V64DImode, operands[3], 0),
1305 operands[4]));
1306 emit_insn (gen_subcv64si3_exec
1307 (gcn_operand_part (V64DImode, operands[0], 1),
1308 gcn_operand_part (V64DImode, operands[1], 1),
1309 gcn_operand_part (V64DImode, operands[2], 1),
1310 vcc, vcc,
1311 gcn_operand_part (V64DImode, operands[3], 1),
1312 operands[4]));
1313 DONE;
1314 }
1315 [(set_attr "type" "vmult")
1316 (set_attr "length" "8,8")])
1317
1318(define_insn_and_split "addv64di3_dup"
1319 [(set (match_operand:V64DI 0 "register_operand" "= &v")
1320 (plus:V64DI
1321 (match_operand:V64DI 1 "register_operand" " v0")
1322 (vec_duplicate:V64DI
1323 (match_operand:DI 2 "gcn_alu_operand" "SvDB"))))
1324 (clobber (reg:DI VCC_REG))]
1325 ""
1326 "#"
1327 "gcn_can_split_p (V64DImode, operands[0])
1328 && gcn_can_split_p (V64DImode, operands[1])
1329 && gcn_can_split_p (V64DImode, operands[2])"
1330 [(const_int 0)]
1331 {
1332 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1333 emit_insn (gen_addv64si3_vcc_dup
1334 (gcn_operand_part (V64DImode, operands[0], 0),
1335 gcn_operand_part (DImode, operands[2], 0),
1336 gcn_operand_part (V64DImode, operands[1], 0),
1337 vcc));
1338 emit_insn (gen_addcv64si3_dup
1339 (gcn_operand_part (V64DImode, operands[0], 1),
1340 gcn_operand_part (V64DImode, operands[1], 1),
1341 gcn_operand_part (DImode, operands[2], 1),
1342 vcc, vcc));
1343 DONE;
1344 }
1345 [(set_attr "type" "vmult")
1346 (set_attr "length" "8")])
1347
1348(define_insn_and_split "addv64di3_dup_exec"
1349 [(set (match_operand:V64DI 0 "register_operand" "= &v")
1350 (vec_merge:V64DI
1351 (plus:V64DI
1352 (match_operand:V64DI 1 "register_operand" " v0")
1353 (vec_duplicate:V64DI
1354 (match_operand:DI 2 "gcn_alu_operand" "SvDB")))
1355 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
1356 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1357 (clobber (reg:DI VCC_REG))]
1358 ""
1359 "#"
1360 "gcn_can_split_p (V64DImode, operands[0])
1361 && gcn_can_split_p (V64DImode, operands[1])
1362 && gcn_can_split_p (V64DImode, operands[2])
1363 && gcn_can_split_p (V64DImode, operands[3])"
1364 [(const_int 0)]
1365 {
1366 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1367 emit_insn (gen_addv64si3_vcc_dup_exec
1368 (gcn_operand_part (V64DImode, operands[0], 0),
1369 gcn_operand_part (DImode, operands[2], 0),
1370 gcn_operand_part (V64DImode, operands[1], 0),
1371 vcc,
1372 gcn_operand_part (V64DImode, operands[3], 0),
1373 operands[4]));
1374 emit_insn (gen_addcv64si3_dup_exec
1375 (gcn_operand_part (V64DImode, operands[0], 1),
1376 gcn_operand_part (V64DImode, operands[1], 1),
1377 gcn_operand_part (DImode, operands[2], 1),
1378 vcc, vcc,
1379 gcn_operand_part (V64DImode, operands[3], 1),
1380 operands[4]));
1381 DONE;
1382 }
1383 [(set_attr "type" "vmult")
1384 (set_attr "length" "8")])
1385
1386(define_insn_and_split "addv64di3_zext"
1387 [(set (match_operand:V64DI 0 "register_operand" "=&v,&v")
1388 (plus:V64DI
1389 (zero_extend:V64DI
1390 (match_operand:V64SI 1 "gcn_alu_operand" "0vA,0vB"))
1391 (match_operand:V64DI 2 "gcn_alu_operand" "0vB,0vA")))
1392 (clobber (reg:DI VCC_REG))]
1393 ""
1394 "#"
1395 "gcn_can_split_p (V64DImode, operands[0])
1396 && gcn_can_split_p (V64DImode, operands[2])"
1397 [(const_int 0)]
1398 {
1399 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1400 emit_insn (gen_addv64si3_vcc
1401 (gcn_operand_part (V64DImode, operands[0], 0),
1402 operands[1],
1403 gcn_operand_part (V64DImode, operands[2], 0),
1404 vcc));
1405 emit_insn (gen_addcv64si3
1406 (gcn_operand_part (V64DImode, operands[0], 1),
1407 gcn_operand_part (V64DImode, operands[2], 1),
1408 const0_rtx, vcc, vcc));
1409 DONE;
1410 }
1411 [(set_attr "type" "vmult")
1412 (set_attr "length" "8,8")])
1413
1414(define_insn_and_split "addv64di3_zext_exec"
1415 [(set (match_operand:V64DI 0 "register_operand" "=&v,&v")
1416 (vec_merge:V64DI
1417 (plus:V64DI
1418 (zero_extend:V64DI
1419 (match_operand:V64SI 1 "gcn_alu_operand" "0vA,0vB"))
1420 (match_operand:V64DI 2 "gcn_alu_operand" "0vB,0vA"))
1421 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0, U0")
1422 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))
1423 (clobber (reg:DI VCC_REG))]
1424 ""
1425 "#"
1426 "gcn_can_split_p (V64DImode, operands[0])
1427 && gcn_can_split_p (V64DImode, operands[2])
1428 && gcn_can_split_p (V64DImode, operands[3])"
1429 [(const_int 0)]
1430 {
1431 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1432 emit_insn (gen_addv64si3_vcc_exec
1433 (gcn_operand_part (V64DImode, operands[0], 0),
1434 operands[1],
1435 gcn_operand_part (V64DImode, operands[2], 0),
1436 vcc,
1437 gcn_operand_part (V64DImode, operands[3], 0),
1438 operands[4]));
1439 emit_insn (gen_addcv64si3_exec
1440 (gcn_operand_part (V64DImode, operands[0], 1),
1441 gcn_operand_part (V64DImode, operands[2], 1),
1442 const0_rtx, vcc, vcc,
1443 gcn_operand_part (V64DImode, operands[3], 1),
1444 operands[4]));
1445 DONE;
1446 }
1447 [(set_attr "type" "vmult")
1448 (set_attr "length" "8,8")])
1449
1450(define_insn_and_split "addv64di3_zext_dup"
1451 [(set (match_operand:V64DI 0 "register_operand" "=&v")
1452 (plus:V64DI
1453 (zero_extend:V64DI
1454 (vec_duplicate:V64SI
1455 (match_operand:SI 1 "gcn_alu_operand" "BSv")))
1456 (match_operand:V64DI 2 "gcn_alu_operand" "vA0")))
1457 (clobber (reg:DI VCC_REG))]
1458 ""
1459 "#"
1460 "gcn_can_split_p (V64DImode, operands[0])
1461 && gcn_can_split_p (V64DImode, operands[2])"
1462 [(const_int 0)]
1463 {
1464 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1465 emit_insn (gen_addv64si3_vcc_dup
1466 (gcn_operand_part (V64DImode, operands[0], 0),
1467 gcn_operand_part (DImode, operands[1], 0),
1468 gcn_operand_part (V64DImode, operands[2], 0),
1469 vcc));
1470 emit_insn (gen_addcv64si3
1471 (gcn_operand_part (V64DImode, operands[0], 1),
1472 gcn_operand_part (V64DImode, operands[2], 1),
1473 const0_rtx, vcc, vcc));
1474 DONE;
1475 }
1476 [(set_attr "type" "vmult")
1477 (set_attr "length" "8")])
1478
1479(define_insn_and_split "addv64di3_zext_dup_exec"
1480 [(set (match_operand:V64DI 0 "register_operand" "=&v")
1481 (vec_merge:V64DI
1482 (plus:V64DI
1483 (zero_extend:V64DI
1484 (vec_duplicate:V64SI
1485 (match_operand:SI 1 "gcn_alu_operand" "BSv")))
1486 (match_operand:V64DI 2 "gcn_alu_operand" "vA0"))
1487 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
1488 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1489 (clobber (reg:DI VCC_REG))]
1490 ""
1491 "#"
1492 "gcn_can_split_p (V64DImode, operands[0])
1493 && gcn_can_split_p (V64DImode, operands[2])
1494 && gcn_can_split_p (V64DImode, operands[3])"
1495 [(const_int 0)]
1496 {
1497 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1498 emit_insn (gen_addv64si3_vcc_dup_exec
1499 (gcn_operand_part (V64DImode, operands[0], 0),
1500 gcn_operand_part (DImode, operands[1], 0),
1501 gcn_operand_part (V64DImode, operands[2], 0),
1502 vcc,
1503 gcn_operand_part (V64DImode, operands[3], 0),
1504 operands[4]));
1505 emit_insn (gen_addcv64si3_exec
1506 (gcn_operand_part (V64DImode, operands[0], 1),
1507 gcn_operand_part (V64DImode, operands[2], 1),
1508 const0_rtx, vcc, vcc,
1509 gcn_operand_part (V64DImode, operands[3], 1),
1510 operands[4]));
1511 DONE;
1512 }
1513 [(set_attr "type" "vmult")
1514 (set_attr "length" "8")])
1515
1516(define_insn_and_split "addv64di3_zext_dup2"
1517 [(set (match_operand:V64DI 0 "register_operand" "= v")
1518 (plus:V64DI
1519 (zero_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand" " vA"))
1520 (vec_duplicate:V64DI (match_operand:DI 2 "gcn_alu_operand" "BSv"))))
1521 (clobber (reg:DI VCC_REG))]
1522 ""
1523 "#"
1524 "gcn_can_split_p (V64DImode, operands[0])"
1525 [(const_int 0)]
1526 {
1527 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1528 emit_insn (gen_addv64si3_vcc_dup
1529 (gcn_operand_part (V64DImode, operands[0], 0),
1530 gcn_operand_part (DImode, operands[2], 0),
1531 operands[1],
1532 vcc));
1533 rtx dsthi = gcn_operand_part (V64DImode, operands[0], 1);
1534 emit_insn (gen_vec_duplicatev64si
1535 (dsthi, gcn_operand_part (DImode, operands[2], 1)));
1536 emit_insn (gen_addcv64si3 (dsthi, dsthi, const0_rtx, vcc, vcc));
1537 DONE;
1538 }
1539 [(set_attr "type" "vmult")
1540 (set_attr "length" "8")])
1541
1542(define_insn_and_split "addv64di3_zext_dup2_exec"
1543 [(set (match_operand:V64DI 0 "register_operand" "= v")
1544 (vec_merge:V64DI
1545 (plus:V64DI
1546 (zero_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand"
1547 " vA"))
1548 (vec_duplicate:V64DI (match_operand:DI 2 "gcn_alu_operand" "BSv")))
1549 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
1550 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1551 (clobber (reg:DI VCC_REG))]
1552 ""
1553 "#"
1554 "gcn_can_split_p (V64DImode, operands[0])
1555 && gcn_can_split_p (V64DImode, operands[3])"
1556 [(const_int 0)]
1557 {
1558 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1559 emit_insn (gen_addv64si3_vcc_dup_exec
1560 (gcn_operand_part (V64DImode, operands[0], 0),
1561 gcn_operand_part (DImode, operands[2], 0),
1562 operands[1],
1563 vcc,
1564 gcn_operand_part (V64DImode, operands[3], 0),
1565 operands[4]));
1566 rtx dsthi = gcn_operand_part (V64DImode, operands[0], 1);
1567 emit_insn (gen_vec_duplicatev64si_exec
1568 (dsthi, gcn_operand_part (DImode, operands[2], 1),
1569 gcn_gen_undef (V64SImode), operands[4]));
1570 emit_insn (gen_addcv64si3_exec
1571 (dsthi, dsthi, const0_rtx, vcc, vcc,
1572 gcn_operand_part (V64DImode, operands[3], 1),
1573 operands[4]));
1574 DONE;
1575 }
1576 [(set_attr "type" "vmult")
1577 (set_attr "length" "8")])
1578
1579(define_insn_and_split "addv64di3_sext_dup2"
1580 [(set (match_operand:V64DI 0 "register_operand" "= v")
1581 (plus:V64DI
1582 (sign_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand" " vA"))
1583 (vec_duplicate:V64DI (match_operand:DI 2 "gcn_alu_operand" "BSv"))))
1584 (clobber (match_scratch:V64SI 3 "=&v"))
1585 (clobber (reg:DI VCC_REG))]
1586 ""
1587 "#"
1588 "gcn_can_split_p (V64DImode, operands[0])"
1589 [(const_int 0)]
1590 {
1591 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1592 emit_insn (gen_ashrv64si3 (operands[3], operands[1], GEN_INT (31)));
1593 emit_insn (gen_addv64si3_vcc_dup
1594 (gcn_operand_part (V64DImode, operands[0], 0),
1595 gcn_operand_part (DImode, operands[2], 0),
1596 operands[1],
1597 vcc));
1598 rtx dsthi = gcn_operand_part (V64DImode, operands[0], 1);
1599 emit_insn (gen_vec_duplicatev64si
1600 (dsthi, gcn_operand_part (DImode, operands[2], 1)));
1601 emit_insn (gen_addcv64si3 (dsthi, dsthi, operands[3], vcc, vcc));
1602 DONE;
1603 }
1604 [(set_attr "type" "vmult")
1605 (set_attr "length" "8")])
1606
1607(define_insn_and_split "addv64di3_sext_dup2_exec"
1608 [(set (match_operand:V64DI 0 "register_operand" "= v")
1609 (vec_merge:V64DI
1610 (plus:V64DI
1611 (sign_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand"
1612 " vA"))
1613 (vec_duplicate:V64DI (match_operand:DI 2 "gcn_alu_operand" "BSv")))
1614 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
1615 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1616 (clobber (match_scratch:V64SI 5 "=&v"))
1617 (clobber (reg:DI VCC_REG))]
1618 ""
1619 "#"
1620 "gcn_can_split_p (V64DImode, operands[0])
1621 && gcn_can_split_p (V64DImode, operands[3])"
1622 [(const_int 0)]
1623 {
1624 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1625 emit_insn (gen_ashrv64si3_exec (operands[5], operands[1], GEN_INT (31),
1626 gcn_gen_undef (V64SImode), operands[4]));
1627 emit_insn (gen_addv64si3_vcc_dup_exec
1628 (gcn_operand_part (V64DImode, operands[0], 0),
1629 gcn_operand_part (DImode, operands[2], 0),
1630 operands[1],
1631 vcc,
1632 gcn_operand_part (V64DImode, operands[3], 0),
1633 operands[4]));
1634 rtx dsthi = gcn_operand_part (V64DImode, operands[0], 1);
1635 emit_insn (gen_vec_duplicatev64si_exec
1636 (dsthi, gcn_operand_part (DImode, operands[2], 1),
1637 gcn_gen_undef (V64SImode), operands[4]));
1638 emit_insn (gen_addcv64si3_exec
1639 (dsthi, dsthi, operands[5], vcc, vcc,
1640 gcn_operand_part (V64DImode, operands[3], 1),
1641 operands[4]));
1642 DONE;
1643 }
1644 [(set_attr "type" "vmult")
1645 (set_attr "length" "8")])
1646
1647;; }}}
1648;; {{{ DS memory ALU: add/sub
1649
1650(define_mode_iterator DS_ARITH_MODE [V64SI V64SF V64DI])
1651(define_mode_iterator DS_ARITH_SCALAR_MODE [SI SF DI])
1652
1653;; FIXME: the vector patterns probably need RD expanded to a vector of
1654;; addresses. For now, the only way a vector can get into LDS is
1655;; if the user puts it there manually.
1656;;
1657;; FIXME: the scalar patterns are probably fine in themselves, but need to be
1658;; checked to see if anything can ever use them.
1659
1660(define_insn "add<mode>3_ds<exec>"
1661 [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD")
1662 (plus:DS_ARITH_MODE
1663 (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" "%RD")
1664 (match_operand:DS_ARITH_MODE 2 "register_operand" " v")))]
1665 "rtx_equal_p (operands[0], operands[1])"
1666 "ds_add%u0\t%A0, %2%O0"
1667 [(set_attr "type" "ds")
1668 (set_attr "length" "8")])
1669
1670(define_insn "add<mode>3_ds_scalar"
1671 [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
1672 (plus:DS_ARITH_SCALAR_MODE
1673 (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand"
1674 "%RD")
1675 (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")))]
1676 "rtx_equal_p (operands[0], operands[1])"
1677 "ds_add%u0\t%A0, %2%O0"
1678 [(set_attr "type" "ds")
1679 (set_attr "length" "8")])
1680
1681(define_insn "sub<mode>3_ds<exec>"
1682 [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD")
1683 (minus:DS_ARITH_MODE
1684 (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" " RD")
1685 (match_operand:DS_ARITH_MODE 2 "register_operand" " v")))]
1686 "rtx_equal_p (operands[0], operands[1])"
1687 "ds_sub%u0\t%A0, %2%O0"
1688 [(set_attr "type" "ds")
1689 (set_attr "length" "8")])
1690
1691(define_insn "sub<mode>3_ds_scalar"
1692 [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
1693 (minus:DS_ARITH_SCALAR_MODE
1694 (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand"
1695 " RD")
1696 (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")))]
1697 "rtx_equal_p (operands[0], operands[1])"
1698 "ds_sub%u0\t%A0, %2%O0"
1699 [(set_attr "type" "ds")
1700 (set_attr "length" "8")])
1701
1702(define_insn "subr<mode>3_ds<exec>"
1703 [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD")
1704 (minus:DS_ARITH_MODE
1705 (match_operand:DS_ARITH_MODE 2 "register_operand" " v")
1706 (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" " RD")))]
1707 "rtx_equal_p (operands[0], operands[1])"
1708 "ds_rsub%u0\t%A0, %2%O0"
1709 [(set_attr "type" "ds")
1710 (set_attr "length" "8")])
1711
1712(define_insn "subr<mode>3_ds_scalar"
1713 [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
1714 (minus:DS_ARITH_SCALAR_MODE
1715 (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")
1716 (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand"
1717 " RD")))]
1718 "rtx_equal_p (operands[0], operands[1])"
1719 "ds_rsub%u0\t%A0, %2%O0"
1720 [(set_attr "type" "ds")
1721 (set_attr "length" "8")])
1722
1723;; }}}
1724;; {{{ ALU special case: mult
1725
1726(define_insn "<su>mulv64si3_highpart<exec>"
1727 [(set (match_operand:V64SI 0 "register_operand" "= v")
1728 (truncate:V64SI
1729 (lshiftrt:V64DI
1730 (mult:V64DI
1731 (any_extend:V64DI
1732 (match_operand:V64SI 1 "gcn_alu_operand" " %v"))
1733 (any_extend:V64DI
1734 (match_operand:V64SI 2 "gcn_alu_operand" "vSvA")))
1735 (const_int 32))))]
1736 ""
1737 "v_mul_hi<sgnsuffix>0\t%0, %2, %1"
1738 [(set_attr "type" "vop3a")
1739 (set_attr "length" "8")])
1740
1741(define_insn "mulv64si3<exec>"
1742 [(set (match_operand:V64SI 0 "register_operand" "= v")
1743 (mult:V64SI
1744 (match_operand:V64SI 1 "gcn_alu_operand" "%vSvA")
1745 (match_operand:V64SI 2 "gcn_alu_operand" " vSvA")))]
1746 ""
1747 "v_mul_lo_u32\t%0, %1, %2"
1748 [(set_attr "type" "vop3a")
1749 (set_attr "length" "8")])
1750
1751(define_insn "mulv64si3_dup<exec>"
1752 [(set (match_operand:V64SI 0 "register_operand" "= v")
1753 (mult:V64SI
1754 (match_operand:V64SI 1 "gcn_alu_operand" "%vSvA")
1755 (vec_duplicate:V64SI
1756 (match_operand:SI 2 "gcn_alu_operand" " SvA"))))]
1757 ""
1758 "v_mul_lo_u32\t%0, %1, %2"
1759 [(set_attr "type" "vop3a")
1760 (set_attr "length" "8")])
1761
1762(define_insn_and_split "mulv64di3"
1763 [(set (match_operand:V64DI 0 "register_operand" "=&v")
1764 (mult:V64DI
1765 (match_operand:V64DI 1 "gcn_alu_operand" "% v")
1766 (match_operand:V64DI 2 "gcn_alu_operand" "vDA")))
1767 (clobber (match_scratch:V64SI 3 "=&v"))]
1768 ""
1769 "#"
1770 "reload_completed"
1771 [(const_int 0)]
1772 {
1773 rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
1774 rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
1775 rtx left_lo = gcn_operand_part (V64DImode, operands[1], 0);
1776 rtx left_hi = gcn_operand_part (V64DImode, operands[1], 1);
1777 rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
1778 rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
1779 rtx tmp = operands[3];
1780
1781 emit_insn (gen_mulv64si3 (out_lo, left_lo, right_lo));
1782 emit_insn (gen_umulv64si3_highpart (out_hi, left_lo, right_lo));
1783 emit_insn (gen_mulv64si3 (tmp, left_hi, right_lo));
1784 emit_insn (gen_addv64si3 (out_hi, out_hi, tmp));
1785 emit_insn (gen_mulv64si3 (tmp, left_lo, right_hi));
1786 emit_insn (gen_addv64si3 (out_hi, out_hi, tmp));
1787 emit_insn (gen_mulv64si3 (tmp, left_hi, right_hi));
1788 emit_insn (gen_addv64si3 (out_hi, out_hi, tmp));
1789 DONE;
1790 })
1791
1792(define_insn_and_split "mulv64di3_exec"
1793 [(set (match_operand:V64DI 0 "register_operand" "=&v")
1794 (vec_merge:V64DI
1795 (mult:V64DI
1796 (match_operand:V64DI 1 "gcn_alu_operand" "% v")
1797 (match_operand:V64DI 2 "gcn_alu_operand" "vDA"))
1798 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
1799 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1800 (clobber (match_scratch:V64SI 5 "=&v"))]
1801 ""
1802 "#"
1803 "reload_completed"
1804 [(const_int 0)]
1805 {
1806 rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
1807 rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
1808 rtx left_lo = gcn_operand_part (V64DImode, operands[1], 0);
1809 rtx left_hi = gcn_operand_part (V64DImode, operands[1], 1);
1810 rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
1811 rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
1812 rtx exec = operands[4];
1813 rtx tmp = operands[5];
1814
1815 rtx old_lo, old_hi;
1816 if (GET_CODE (operands[3]) == UNSPEC)
1817 {
1818 old_lo = old_hi = gcn_gen_undef (V64SImode);
1819 }
1820 else
1821 {
1822 old_lo = gcn_operand_part (V64DImode, operands[3], 0);
1823 old_hi = gcn_operand_part (V64DImode, operands[3], 1);
1824 }
1825
1826 rtx undef = gcn_gen_undef (V64SImode);
1827
1828 emit_insn (gen_mulv64si3_exec (out_lo, left_lo, right_lo, old_lo, exec));
1829 emit_insn (gen_umulv64si3_highpart_exec (out_hi, left_lo, right_lo,
1830 old_hi, exec));
1831 emit_insn (gen_mulv64si3_exec (tmp, left_hi, right_lo, undef, exec));
1832 emit_insn (gen_addv64si3_exec (out_hi, out_hi, tmp, out_hi, exec));
1833 emit_insn (gen_mulv64si3_exec (tmp, left_lo, right_hi, undef, exec));
1834 emit_insn (gen_addv64si3_exec (out_hi, out_hi, tmp, out_hi, exec));
1835 emit_insn (gen_mulv64si3_exec (tmp, left_hi, right_hi, undef, exec));
1836 emit_insn (gen_addv64si3_exec (out_hi, out_hi, tmp, out_hi, exec));
1837 DONE;
1838 })
1839
1840(define_insn_and_split "mulv64di3_zext"
1841 [(set (match_operand:V64DI 0 "register_operand" "=&v")
1842 (mult:V64DI
1843 (zero_extend:V64DI
1844 (match_operand:V64SI 1 "gcn_alu_operand" " v"))
1845 (match_operand:V64DI 2 "gcn_alu_operand" "vDA")))
1846 (clobber (match_scratch:V64SI 3 "=&v"))]
1847 ""
1848 "#"
1849 "reload_completed"
1850 [(const_int 0)]
1851 {
1852 rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
1853 rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
1854 rtx left = operands[1];
1855 rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
1856 rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
1857 rtx tmp = operands[3];
1858
1859 emit_insn (gen_mulv64si3 (out_lo, left, right_lo));
1860 emit_insn (gen_umulv64si3_highpart (out_hi, left, right_lo));
1861 emit_insn (gen_mulv64si3 (tmp, left, right_hi));
1862 emit_insn (gen_addv64si3 (out_hi, out_hi, tmp));
1863 DONE;
1864 })
1865
1866(define_insn_and_split "mulv64di3_zext_exec"
1867 [(set (match_operand:V64DI 0 "register_operand" "=&v")
1868 (vec_merge:V64DI
1869 (mult:V64DI
1870 (zero_extend:V64DI
1871 (match_operand:V64SI 1 "gcn_alu_operand" " v"))
1872 (match_operand:V64DI 2 "gcn_alu_operand" "vDA"))
1873 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
1874 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1875 (clobber (match_scratch:V64SI 5 "=&v"))]
1876 ""
1877 "#"
1878 "reload_completed"
1879 [(const_int 0)]
1880 {
1881 rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
1882 rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
1883 rtx left = operands[1];
1884 rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
1885 rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
1886 rtx exec = operands[4];
1887 rtx tmp = operands[5];
1888
1889 rtx old_lo, old_hi;
1890 if (GET_CODE (operands[3]) == UNSPEC)
1891 {
1892 old_lo = old_hi = gcn_gen_undef (V64SImode);
1893 }
1894 else
1895 {
1896 old_lo = gcn_operand_part (V64DImode, operands[3], 0);
1897 old_hi = gcn_operand_part (V64DImode, operands[3], 1);
1898 }
1899
1900 rtx undef = gcn_gen_undef (V64SImode);
1901
1902 emit_insn (gen_mulv64si3_exec (out_lo, left, right_lo, old_lo, exec));
1903 emit_insn (gen_umulv64si3_highpart_exec (out_hi, left, right_lo,
1904 old_hi, exec));
1905 emit_insn (gen_mulv64si3_exec (tmp, left, right_hi, undef, exec));
1906 emit_insn (gen_addv64si3_exec (out_hi, out_hi, tmp, out_hi, exec));
1907 DONE;
1908 })
1909
1910(define_insn_and_split "mulv64di3_zext_dup2"
1911 [(set (match_operand:V64DI 0 "register_operand" "= &v")
1912 (mult:V64DI
1913 (zero_extend:V64DI
1914 (match_operand:V64SI 1 "gcn_alu_operand" " v"))
1915 (vec_duplicate:V64DI
1916 (match_operand:DI 2 "gcn_alu_operand" "SvDA"))))
1917 (clobber (match_scratch:V64SI 3 "= &v"))]
1918 ""
1919 "#"
1920 "reload_completed"
1921 [(const_int 0)]
1922 {
1923 rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
1924 rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
1925 rtx left = operands[1];
1926 rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
1927 rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
1928 rtx tmp = operands[3];
1929
1930 emit_insn (gen_mulv64si3 (out_lo, left, right_lo));
1931 emit_insn (gen_umulv64si3_highpart (out_hi, left, right_lo));
1932 emit_insn (gen_mulv64si3 (tmp, left, right_hi));
1933 emit_insn (gen_addv64si3 (out_hi, out_hi, tmp));
1934 DONE;
1935 })
1936
1937(define_insn_and_split "mulv64di3_zext_dup2_exec"
1938 [(set (match_operand:V64DI 0 "register_operand" "= &v")
1939 (vec_merge:V64DI
1940 (mult:V64DI
1941 (zero_extend:V64DI
1942 (match_operand:V64SI 1 "gcn_alu_operand" " v"))
1943 (vec_duplicate:V64DI
1944 (match_operand:DI 2 "gcn_alu_operand" "SvDA")))
1945 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
1946 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1947 (clobber (match_scratch:V64SI 5 "= &v"))]
1948 ""
1949 "#"
1950 "reload_completed"
1951 [(const_int 0)]
1952 {
1953 rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
1954 rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
1955 rtx left = operands[1];
1956 rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
1957 rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
1958 rtx exec = operands[4];
1959 rtx tmp = operands[5];
1960
1961 rtx old_lo, old_hi;
1962 if (GET_CODE (operands[3]) == UNSPEC)
1963 {
1964 old_lo = old_hi = gcn_gen_undef (V64SImode);
1965 }
1966 else
1967 {
1968 old_lo = gcn_operand_part (V64DImode, operands[3], 0);
1969 old_hi = gcn_operand_part (V64DImode, operands[3], 1);
1970 }
1971
1972 rtx undef = gcn_gen_undef (V64SImode);
1973
1974 emit_insn (gen_mulv64si3_exec (out_lo, left, right_lo, old_lo, exec));
1975 emit_insn (gen_umulv64si3_highpart_exec (out_hi, left, right_lo,
1976 old_hi, exec));
1977 emit_insn (gen_mulv64si3_exec (tmp, left, right_hi, undef, exec));
1978 emit_insn (gen_addv64si3_exec (out_hi, out_hi, tmp, out_hi, exec));
1979 DONE;
1980 })
1981
1982;; }}}
1983;; {{{ ALU generic case
1984
2b99bed8 1985(define_mode_iterator VEC_INT_MODE [V64SI V64DI])
3d6275e3
AS
1986
1987(define_code_iterator bitop [and ior xor])
1988(define_code_iterator shiftop [ashift lshiftrt ashiftrt])
1989(define_code_iterator minmaxop [smin smax umin umax])
1990
1991(define_insn "<expander><mode>2<exec>"
1992 [(set (match_operand:VEC_1REG_INT_MODE 0 "gcn_valu_dst_operand" "= v")
1993 (bitunop:VEC_1REG_INT_MODE
1994 (match_operand:VEC_1REG_INT_MODE 1 "gcn_valu_src0_operand" "vSvB")))]
1995 ""
1996 "v_<mnemonic>0\t%0, %1"
1997 [(set_attr "type" "vop1")
1998 (set_attr "length" "8")])
1999
2000(define_insn "<expander><mode>3<exec>"
2001 [(set (match_operand:VEC_1REG_INT_MODE 0 "gcn_valu_dst_operand" "= v,RD")
2002 (bitop:VEC_1REG_INT_MODE
2003 (match_operand:VEC_1REG_INT_MODE 1 "gcn_valu_src0_operand"
2004 "% v, 0")
2005 (match_operand:VEC_1REG_INT_MODE 2 "gcn_valu_src1com_operand"
2006 "vSvB, v")))]
2007 ""
2008 "@
2009 v_<mnemonic>0\t%0, %2, %1
2010 ds_<mnemonic>0\t%A0, %2%O0"
2011 [(set_attr "type" "vop2,ds")
2012 (set_attr "length" "8,8")])
2013
2014(define_insn_and_split "<expander>v64di3"
2015 [(set (match_operand:V64DI 0 "gcn_valu_dst_operand" "=&v,RD")
2016 (bitop:V64DI
2017 (match_operand:V64DI 1 "gcn_valu_src0_operand" "% v,RD")
2018 (match_operand:V64DI 2 "gcn_valu_src1com_operand" "vSvB, v")))]
2019 ""
2020 "@
2021 #
2022 ds_<mnemonic>0\t%A0, %2%O0"
2023 "(reload_completed && !gcn_ds_memory_operand (operands[0], V64DImode))"
2024 [(set (match_dup 3)
2025 (bitop:V64SI (match_dup 5) (match_dup 7)))
2026 (set (match_dup 4)
2027 (bitop:V64SI (match_dup 6) (match_dup 8)))]
2028 {
2029 operands[3] = gcn_operand_part (V64DImode, operands[0], 0);
2030 operands[4] = gcn_operand_part (V64DImode, operands[0], 1);
2031 operands[5] = gcn_operand_part (V64DImode, operands[1], 0);
2032 operands[6] = gcn_operand_part (V64DImode, operands[1], 1);
2033 operands[7] = gcn_operand_part (V64DImode, operands[2], 0);
2034 operands[8] = gcn_operand_part (V64DImode, operands[2], 1);
2035 }
2036 [(set_attr "type" "vmult,ds")
2037 (set_attr "length" "16,8")])
2038
2039(define_insn_and_split "<expander>v64di3_exec"
2040 [(set (match_operand:V64DI 0 "gcn_valu_dst_operand" "=&v,RD")
2041 (vec_merge:V64DI
2042 (bitop:V64DI
2043 (match_operand:V64DI 1 "gcn_valu_src0_operand" "% v,RD")
2044 (match_operand:V64DI 2 "gcn_valu_src1com_operand" "vSvB, v"))
2045 (match_operand:V64DI 3 "gcn_register_ds_or_unspec_operand"
2046 " U0,U0")
2047 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))]
2048 "!memory_operand (operands[0], VOIDmode)
2049 || (rtx_equal_p (operands[0], operands[1])
2050 && register_operand (operands[2], VOIDmode))"
2051 "@
2052 #
2053 ds_<mnemonic>0\t%A0, %2%O0"
2054 "(reload_completed && !gcn_ds_memory_operand (operands[0], V64DImode))"
2055 [(set (match_dup 5)
2056 (vec_merge:V64SI
2057 (bitop:V64SI (match_dup 7) (match_dup 9))
2058 (match_dup 11)
2059 (match_dup 4)))
2060 (set (match_dup 6)
2061 (vec_merge:V64SI
2062 (bitop:V64SI (match_dup 8) (match_dup 10))
2063 (match_dup 12)
2064 (match_dup 4)))]
2065 {
2066 operands[5] = gcn_operand_part (V64DImode, operands[0], 0);
2067 operands[6] = gcn_operand_part (V64DImode, operands[0], 1);
2068 operands[7] = gcn_operand_part (V64DImode, operands[1], 0);
2069 operands[8] = gcn_operand_part (V64DImode, operands[1], 1);
2070 operands[9] = gcn_operand_part (V64DImode, operands[2], 0);
2071 operands[10] = gcn_operand_part (V64DImode, operands[2], 1);
2072 operands[11] = gcn_operand_part (V64DImode, operands[3], 0);
2073 operands[12] = gcn_operand_part (V64DImode, operands[3], 1);
2074 }
2075 [(set_attr "type" "vmult,ds")
2076 (set_attr "length" "16,8")])
2077
2078(define_insn "<expander>v64si3<exec>"
2079 [(set (match_operand:V64SI 0 "register_operand" "= v")
2080 (shiftop:V64SI
2081 (match_operand:V64SI 1 "gcn_alu_operand" " v")
2082 (vec_duplicate:V64SI
2083 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
2084 ""
2085 "v_<revmnemonic>0\t%0, %2, %1"
2086 [(set_attr "type" "vop2")
2087 (set_attr "length" "8")])
2088
2089(define_insn "v<expander>v64si3<exec>"
2090 [(set (match_operand:V64SI 0 "register_operand" "=v")
2091 (shiftop:V64SI
2092 (match_operand:V64SI 1 "gcn_alu_operand" " v")
2093 (match_operand:V64SI 2 "gcn_alu_operand" "vB")))]
2094 ""
2095 "v_<revmnemonic>0\t%0, %2, %1"
2096 [(set_attr "type" "vop2")
2097 (set_attr "length" "8")])
2098
2099(define_insn "<expander><mode>3<exec>"
2100 [(set (match_operand:VEC_1REG_INT_MODE 0 "gcn_valu_dst_operand" "= v,RD")
2101 (minmaxop:VEC_1REG_INT_MODE
2102 (match_operand:VEC_1REG_INT_MODE 1 "gcn_valu_src0_operand"
2103 "% v, 0")
2104 (match_operand:VEC_1REG_INT_MODE 2 "gcn_valu_src1com_operand"
2105 "vSvB, v")))]
2106 ""
2107 "@
2108 v_<mnemonic>0\t%0, %2, %1
2109 ds_<mnemonic>0\t%A0, %2%O0"
2110 [(set_attr "type" "vop2,ds")
2111 (set_attr "length" "8,8")])
2112
2113;; }}}
2114;; {{{ FP binops - special cases
2115
2116; GCN does not directly provide a DFmode subtract instruction, so we do it by
2117; adding the negated second operand to the first.
2118
2119(define_insn "subv64df3<exec>"
2120 [(set (match_operand:V64DF 0 "register_operand" "= v, v")
2121 (minus:V64DF
2122 (match_operand:V64DF 1 "gcn_alu_operand" "vSvB, v")
2123 (match_operand:V64DF 2 "gcn_alu_operand" " v,vSvB")))]
2124 ""
2125 "@
2126 v_add_f64\t%0, %1, -%2
2127 v_add_f64\t%0, -%2, %1"
2128 [(set_attr "type" "vop3a")
2129 (set_attr "length" "8,8")])
2130
2131(define_insn "subdf"
2132 [(set (match_operand:DF 0 "register_operand" "= v, v")
2133 (minus:DF
2134 (match_operand:DF 1 "gcn_alu_operand" "vSvB, v")
2135 (match_operand:DF 2 "gcn_alu_operand" " v,vSvB")))]
2136 ""
2137 "@
2138 v_add_f64\t%0, %1, -%2
2139 v_add_f64\t%0, -%2, %1"
2140 [(set_attr "type" "vop3a")
2141 (set_attr "length" "8,8")])
2142
2143;; }}}
2144;; {{{ FP binops - generic
2145
2146(define_mode_iterator VEC_FP_MODE [V64HF V64SF V64DF])
2147(define_mode_iterator VEC_FP_1REG_MODE [V64HF V64SF])
2148(define_mode_iterator FP_MODE [HF SF DF])
2149(define_mode_iterator FP_1REG_MODE [HF SF])
2150
2151(define_code_iterator comm_fp [plus mult smin smax])
2152(define_code_iterator nocomm_fp [minus])
2153(define_code_iterator all_fp [plus mult minus smin smax])
2154
2155(define_insn "<expander><mode>3<exec>"
2156 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "= v")
2157 (comm_fp:VEC_FP_MODE
2158 (match_operand:VEC_FP_MODE 1 "gcn_alu_operand" "% v")
2159 (match_operand:VEC_FP_MODE 2 "gcn_alu_operand" "vSvB")))]
2160 ""
2161 "v_<mnemonic>0\t%0, %2, %1"
2162 [(set_attr "type" "vop2")
2163 (set_attr "length" "8")])
2164
2165(define_insn "<expander><mode>3"
2166 [(set (match_operand:FP_MODE 0 "gcn_valu_dst_operand" "= v, RL")
2167 (comm_fp:FP_MODE
2168 (match_operand:FP_MODE 1 "gcn_valu_src0_operand" "% v, 0")
2169 (match_operand:FP_MODE 2 "gcn_valu_src1_operand" "vSvB,vSvB")))]
2170 ""
2171 "@
2172 v_<mnemonic>0\t%0, %2, %1
2173 v_<mnemonic>0\t%0, %1%O0"
2174 [(set_attr "type" "vop2,ds")
2175 (set_attr "length" "8")])
2176
2177(define_insn "<expander><mode>3<exec>"
2178 [(set (match_operand:VEC_FP_1REG_MODE 0 "register_operand" "= v, v")
2179 (nocomm_fp:VEC_FP_1REG_MODE
2180 (match_operand:VEC_FP_1REG_MODE 1 "gcn_alu_operand" "vSvB, v")
2181 (match_operand:VEC_FP_1REG_MODE 2 "gcn_alu_operand" " v,vSvB")))]
2182 ""
2183 "@
2184 v_<mnemonic>0\t%0, %1, %2
2185 v_<revmnemonic>0\t%0, %2, %1"
2186 [(set_attr "type" "vop2")
2187 (set_attr "length" "8,8")])
2188
2189(define_insn "<expander><mode>3"
2190 [(set (match_operand:FP_1REG_MODE 0 "register_operand" "= v, v")
2191 (nocomm_fp:FP_1REG_MODE
2192 (match_operand:FP_1REG_MODE 1 "gcn_alu_operand" "vSvB, v")
2193 (match_operand:FP_1REG_MODE 2 "gcn_alu_operand" " v,vSvB")))]
2194 ""
2195 "@
2196 v_<mnemonic>0\t%0, %1, %2
2197 v_<revmnemonic>0\t%0, %2, %1"
2198 [(set_attr "type" "vop2")
2199 (set_attr "length" "8,8")])
2200
2201;; }}}
2202;; {{{ FP unops
2203
2204(define_insn "abs<mode>2"
2205 [(set (match_operand:FP_MODE 0 "register_operand" "=v")
2206 (abs:FP_MODE (match_operand:FP_MODE 1 "register_operand" " v")))]
2207 ""
2208 "v_add%i0\t%0, 0, |%1|"
2209 [(set_attr "type" "vop3a")
2210 (set_attr "length" "8")])
2211
2212(define_insn "abs<mode>2<exec>"
2213 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "=v")
2214 (abs:VEC_FP_MODE
2215 (match_operand:VEC_FP_MODE 1 "register_operand" " v")))]
2216 ""
2217 "v_add%i0\t%0, 0, |%1|"
2218 [(set_attr "type" "vop3a")
2219 (set_attr "length" "8")])
2220
2221(define_insn "neg<mode>2<exec>"
2222 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "=v")
2223 (neg:VEC_FP_MODE
2224 (match_operand:VEC_FP_MODE 1 "register_operand" " v")))]
2225 ""
2226 "v_add%i0\t%0, 0, -%1"
2227 [(set_attr "type" "vop3a")
2228 (set_attr "length" "8")])
2229
2230(define_insn "sqrt<mode>2<exec>"
2231 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "= v")
2232 (sqrt:VEC_FP_MODE
2233 (match_operand:VEC_FP_MODE 1 "gcn_alu_operand" "vSvB")))]
2234 "flag_unsafe_math_optimizations"
2235 "v_sqrt%i0\t%0, %1"
2236 [(set_attr "type" "vop1")
2237 (set_attr "length" "8")])
2238
2239(define_insn "sqrt<mode>2"
2240 [(set (match_operand:FP_MODE 0 "register_operand" "= v")
2241 (sqrt:FP_MODE
2242 (match_operand:FP_MODE 1 "gcn_alu_operand" "vSvB")))]
2243 "flag_unsafe_math_optimizations"
2244 "v_sqrt%i0\t%0, %1"
2245 [(set_attr "type" "vop1")
2246 (set_attr "length" "8")])
2247
2248;; }}}
2249;; {{{ FP fused multiply and add
2250
2251(define_insn "fma<mode>4<exec>"
2252 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "= v, v")
2253 (fma:VEC_FP_MODE
2254 (match_operand:VEC_FP_MODE 1 "gcn_alu_operand" "% vA, vA")
2255 (match_operand:VEC_FP_MODE 2 "gcn_alu_operand" " vA,vSvA")
2256 (match_operand:VEC_FP_MODE 3 "gcn_alu_operand" "vSvA, vA")))]
2257 ""
2258 "v_fma%i0\t%0, %1, %2, %3"
2259 [(set_attr "type" "vop3a")
2260 (set_attr "length" "8")])
2261
2262(define_insn "fma<mode>4_negop2<exec>"
2263 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "= v, v, v")
2264 (fma:VEC_FP_MODE
2265 (match_operand:VEC_FP_MODE 1 "gcn_alu_operand" " vA, vA,vSvA")
2266 (neg:VEC_FP_MODE
2267 (match_operand:VEC_FP_MODE 2 "gcn_alu_operand" " vA,vSvA, vA"))
2268 (match_operand:VEC_FP_MODE 3 "gcn_alu_operand" "vSvA, vA, vA")))]
2269 ""
2270 "v_fma%i0\t%0, %1, -%2, %3"
2271 [(set_attr "type" "vop3a")
2272 (set_attr "length" "8")])
2273
2274(define_insn "fma<mode>4"
2275 [(set (match_operand:FP_MODE 0 "register_operand" "= v, v")
2276 (fma:FP_MODE
2277 (match_operand:FP_MODE 1 "gcn_alu_operand" "% vA, vA")
2278 (match_operand:FP_MODE 2 "gcn_alu_operand" " vA,vSvA")
2279 (match_operand:FP_MODE 3 "gcn_alu_operand" "vSvA, vA")))]
2280 ""
2281 "v_fma%i0\t%0, %1, %2, %3"
2282 [(set_attr "type" "vop3a")
2283 (set_attr "length" "8")])
2284
2285(define_insn "fma<mode>4_negop2"
2286 [(set (match_operand:FP_MODE 0 "register_operand" "= v, v, v")
2287 (fma:FP_MODE
2288 (match_operand:FP_MODE 1 "gcn_alu_operand" " vA, vA,vSvA")
2289 (neg:FP_MODE
2290 (match_operand:FP_MODE 2 "gcn_alu_operand" " vA,vSvA, vA"))
2291 (match_operand:FP_MODE 3 "gcn_alu_operand" "vSvA, vA, vA")))]
2292 ""
2293 "v_fma%i0\t%0, %1, -%2, %3"
2294 [(set_attr "type" "vop3a")
2295 (set_attr "length" "8")])
2296
2297;; }}}
2298;; {{{ FP division
2299
2300(define_insn "recip<mode>2<exec>"
2301 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "= v")
2302 (div:VEC_FP_MODE
2303 (vec_duplicate:VEC_FP_MODE (float:<SCALAR_MODE> (const_int 1)))
2304 (match_operand:VEC_FP_MODE 1 "gcn_alu_operand" "vSvB")))]
2305 ""
2306 "v_rcp%i0\t%0, %1"
2307 [(set_attr "type" "vop1")
2308 (set_attr "length" "8")])
2309
2310(define_insn "recip<mode>2"
2311 [(set (match_operand:FP_MODE 0 "register_operand" "= v")
2312 (div:FP_MODE
2313 (float:FP_MODE (const_int 1))
2314 (match_operand:FP_MODE 1 "gcn_alu_operand" "vSvB")))]
2315 ""
2316 "v_rcp%i0\t%0, %1"
2317 [(set_attr "type" "vop1")
2318 (set_attr "length" "8")])
2319
2320;; Do division via a = b * 1/c
2321;; The v_rcp_* instructions are not sufficiently accurate on their own,
2322;; so we use 2 v_fma_* instructions to do one round of Newton-Raphson
2323;; which the ISA manual says is enough to improve the reciprocal accuracy.
2324;;
2325;; FIXME: This does not handle denormals, NaNs, division-by-zero etc.
2326
2327(define_expand "div<mode>3"
2328 [(match_operand:VEC_FP_MODE 0 "gcn_valu_dst_operand")
2329 (match_operand:VEC_FP_MODE 1 "gcn_valu_src0_operand")
2330 (match_operand:VEC_FP_MODE 2 "gcn_valu_src0_operand")]
2331 "flag_reciprocal_math"
2332 {
2333 rtx two = gcn_vec_constant (<MODE>mode,
2334 const_double_from_real_value (dconst2, <SCALAR_MODE>mode));
2335 rtx initrcp = gen_reg_rtx (<MODE>mode);
2336 rtx fma = gen_reg_rtx (<MODE>mode);
2337 rtx rcp;
2338
2339 bool is_rcp = (GET_CODE (operands[1]) == CONST_VECTOR
2340 && real_identical
2341 (CONST_DOUBLE_REAL_VALUE
2342 (CONST_VECTOR_ELT (operands[1], 0)), &dconstm1));
2343
2344 if (is_rcp)
2345 rcp = operands[0];
2346 else
2347 rcp = gen_reg_rtx (<MODE>mode);
2348
2349 emit_insn (gen_recip<mode>2 (initrcp, operands[2]));
2350 emit_insn (gen_fma<mode>4_negop2 (fma, initrcp, operands[2], two));
2351 emit_insn (gen_mul<mode>3 (rcp, initrcp, fma));
2352
2353 if (!is_rcp)
2354 emit_insn (gen_mul<mode>3 (operands[0], operands[1], rcp));
2355
2356 DONE;
2357 })
2358
2359(define_expand "div<mode>3"
2360 [(match_operand:FP_MODE 0 "gcn_valu_dst_operand")
2361 (match_operand:FP_MODE 1 "gcn_valu_src0_operand")
2362 (match_operand:FP_MODE 2 "gcn_valu_src0_operand")]
2363 "flag_reciprocal_math"
2364 {
2365 rtx two = const_double_from_real_value (dconst2, <MODE>mode);
2366 rtx initrcp = gen_reg_rtx (<MODE>mode);
2367 rtx fma = gen_reg_rtx (<MODE>mode);
2368 rtx rcp;
2369
2370 bool is_rcp = (GET_CODE (operands[1]) == CONST_DOUBLE
2371 && real_identical (CONST_DOUBLE_REAL_VALUE (operands[1]),
2372 &dconstm1));
2373
2374 if (is_rcp)
2375 rcp = operands[0];
2376 else
2377 rcp = gen_reg_rtx (<MODE>mode);
2378
2379 emit_insn (gen_recip<mode>2 (initrcp, operands[2]));
2380 emit_insn (gen_fma<mode>4_negop2 (fma, initrcp, operands[2], two));
2381 emit_insn (gen_mul<mode>3 (rcp, initrcp, fma));
2382
2383 if (!is_rcp)
2384 emit_insn (gen_mul<mode>3 (operands[0], operands[1], rcp));
2385
2386 DONE;
2387 })
2388
2389;; }}}
2390;; {{{ Int/FP conversions
2391
2392(define_mode_iterator CVT_FROM_MODE [HI SI HF SF DF])
2393(define_mode_iterator CVT_TO_MODE [HI SI HF SF DF])
2394
2395(define_mode_iterator VCVT_FROM_MODE [V64HI V64SI V64HF V64SF V64DF])
2396(define_mode_iterator VCVT_TO_MODE [V64HI V64SI V64HF V64SF V64DF])
2397
2398(define_code_iterator cvt_op [fix unsigned_fix
2399 float unsigned_float
2400 float_extend float_truncate])
2401(define_code_attr cvt_name [(fix "fix_trunc") (unsigned_fix "fixuns_trunc")
2402 (float "float") (unsigned_float "floatuns")
2403 (float_extend "extend") (float_truncate "trunc")])
2404(define_code_attr cvt_operands [(fix "%i0%i1") (unsigned_fix "%u0%i1")
2405 (float "%i0%i1") (unsigned_float "%i0%u1")
2406 (float_extend "%i0%i1")
2407 (float_truncate "%i0%i1")])
2408
2409(define_insn "<cvt_name><CVT_FROM_MODE:mode><CVT_TO_MODE:mode>2"
2410 [(set (match_operand:CVT_TO_MODE 0 "register_operand" "= v")
2411 (cvt_op:CVT_TO_MODE
2412 (match_operand:CVT_FROM_MODE 1 "gcn_alu_operand" "vSvB")))]
2413 "gcn_valid_cvt_p (<CVT_FROM_MODE:MODE>mode, <CVT_TO_MODE:MODE>mode,
2414 <cvt_name>_cvt)"
2415 "v_cvt<cvt_operands>\t%0, %1"
2416 [(set_attr "type" "vop1")
2417 (set_attr "length" "8")])
2418
2419(define_insn "<cvt_name><VCVT_FROM_MODE:mode><VCVT_TO_MODE:mode>2<exec>"
2420 [(set (match_operand:VCVT_TO_MODE 0 "register_operand" "= v")
2421 (cvt_op:VCVT_TO_MODE
2422 (match_operand:VCVT_FROM_MODE 1 "gcn_alu_operand" "vSvB")))]
2423 "gcn_valid_cvt_p (<VCVT_FROM_MODE:MODE>mode, <VCVT_TO_MODE:MODE>mode,
2424 <cvt_name>_cvt)"
2425 "v_cvt<cvt_operands>\t%0, %1"
2426 [(set_attr "type" "vop1")
2427 (set_attr "length" "8")])
2428
2429;; }}}
2430;; {{{ Int/int conversions
2431
2432;; GCC can already do these for scalar types, but not for vector types.
2433;; Unfortunately you can't just do SUBREG on a vector to select the low part,
2434;; so there must be a few tricks here.
2435
2436(define_insn_and_split "vec_truncatev64div64si"
2437 [(set (match_operand:V64SI 0 "register_operand" "=v,&v")
2438 (truncate:V64SI
2439 (match_operand:V64DI 1 "register_operand" " 0, v")))]
2440 ""
2441 "#"
2442 "reload_completed"
2443 [(set (match_dup 0) (match_dup 1))]
2444 {
2445 operands[1] = gcn_operand_part (V64SImode, operands[1], 0);
2446 }
2447 [(set_attr "type" "vop2")
2448 (set_attr "length" "0,4")])
2449
2450(define_insn_and_split "vec_truncatev64div64si_exec"
2451 [(set (match_operand:V64SI 0 "register_operand" "=v,&v")
2452 (vec_merge:V64SI
2453 (truncate:V64SI
2454 (match_operand:V64DI 1 "register_operand" " 0, v"))
2455 (match_operand:V64SI 2 "gcn_alu_or_unspec_operand" "U0,U0")
2456 (match_operand:DI 3 "gcn_exec_operand" " e, e")))]
2457 ""
2458 "#"
2459 "reload_completed"
2460 [(parallel [(set (match_dup 0)
2461 (vec_merge:V64SI (match_dup 1) (match_dup 2) (match_dup 3)))
2462 (clobber (scratch:V64DI))])]
2463 {
2464 operands[1] = gcn_operand_part (V64SImode, operands[1], 0);
2465 }
2466 [(set_attr "type" "vop2")
2467 (set_attr "length" "0,4")])
2468
2469;; }}}
2470;; {{{ Vector comparison/merge
2471
2472(define_insn "vec_cmp<mode>di"
2473 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e, e,Sg,Sg")
2474 (match_operator 1 "comparison_operator"
2475 [(match_operand:VEC_1REG_MODE 2 "gcn_alu_operand"
2476 "vSv, B,vSv, B, v,vA")
2477 (match_operand:VEC_1REG_MODE 3 "gcn_vop3_operand"
2478 " v, v, v, v,vA, v")]))
2479 (clobber (match_scratch:DI 4 "= X, X, cV,cV, X, X"))]
2480 ""
2481 "@
2482 v_cmp%E1\tvcc, %2, %3
2483 v_cmp%E1\tvcc, %2, %3
2484 v_cmpx%E1\tvcc, %2, %3
2485 v_cmpx%E1\tvcc, %2, %3
2486 v_cmp%E1\t%0, %2, %3
2487 v_cmp%E1\t%0, %2, %3"
2488 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a")
2489 (set_attr "length" "4,8,4,8,8,8")])
2490
2491(define_expand "vec_cmpu<mode>di"
2492 [(match_operand:DI 0 "register_operand")
2493 (match_operator 1 "comparison_operator"
2494 [(match_operand:VEC_1REG_INT_MODE 2 "gcn_alu_operand")
2495 (match_operand:VEC_1REG_INT_MODE 3 "gcn_vop3_operand")])]
2496 ""
2497 {
2498 /* Unsigned comparisons use the same patterns as signed comparisons,
2499 except that they use unsigned operators (e.g. LTU vs LT).
2500 The '%E1' directive then does the Right Thing. */
2501 emit_insn (gen_vec_cmp<mode>di (operands[0], operands[1], operands[2],
2502 operands[3]));
2503 DONE;
2504 })
2505
2506(define_insn "vec_cmp<mode>di_exec"
2507 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e, e,Sg,Sg")
2508 (and:DI
2509 (match_operator 1 "comparison_operator"
2510 [(match_operand:VEC_1REG_MODE 2 "gcn_alu_operand"
2511 "vSv, B,vSv, B, v,vA")
2512 (match_operand:VEC_1REG_MODE 3 "gcn_vop3_operand"
2513 " v, v, v, v,vA, v")])
2514 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e, e, e, e")))
2515 (clobber (match_scratch:DI 5 "= X, X, cV,cV, X, X"))]
2516 ""
2517 "@
2518 v_cmp%E1\tvcc, %2, %3
2519 v_cmp%E1\tvcc, %2, %3
2520 v_cmpx%E1\tvcc, %2, %3
2521 v_cmpx%E1\tvcc, %2, %3
2522 v_cmp%E1\t%0, %2, %3
2523 v_cmp%E1\t%0, %2, %3"
2524 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a")
2525 (set_attr "length" "4,8,4,8,8,8")])
2526
2527(define_insn "vec_cmp<mode>di_dup"
2528 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e,e,Sg")
2529 (match_operator 1 "comparison_operator"
2530 [(vec_duplicate:VEC_1REG_MODE
2531 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand"
2532 " Sv, B,Sv,B, A"))
2533 (match_operand:VEC_1REG_MODE 3 "gcn_vop3_operand"
2534 " v, v, v,v, v")]))
2535 (clobber (match_scratch:DI 4 "= X,X,cV,cV, X"))]
2536 ""
2537 "@
2538 v_cmp%E1\tvcc, %2, %3
2539 v_cmp%E1\tvcc, %2, %3
2540 v_cmpx%E1\tvcc, %2, %3
2541 v_cmpx%E1\tvcc, %2, %3
2542 v_cmp%E1\t%0, %2, %3"
2543 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a")
2544 (set_attr "length" "4,8,4,8,8")])
2545
2546(define_insn "vec_cmp<mode>di_dup_exec"
2547 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e,e,Sg")
2548 (and:DI
2549 (match_operator 1 "comparison_operator"
2550 [(vec_duplicate:VEC_1REG_MODE
2551 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand"
2552 " Sv, B,Sv,B, A"))
2553 (match_operand:VEC_1REG_MODE 3 "gcn_vop3_operand"
2554 " v, v, v,v, v")])
2555 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e,e, e")))
2556 (clobber (match_scratch:DI 5 "= X,X,cV,cV, X"))]
2557 ""
2558 "@
2559 v_cmp%E1\tvcc, %2, %3
2560 v_cmp%E1\tvcc, %2, %3
2561 v_cmpx%E1\tvcc, %2, %3
2562 v_cmpx%E1\tvcc, %2, %3
2563 v_cmp%E1\t%0, %2, %3"
2564 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a")
2565 (set_attr "length" "4,8,4,8,8")])
2566
2567(define_expand "vcond_mask_<mode>di"
2568 [(parallel
2b99bed8
AS
2569 [(set (match_operand:VEC_ALLREG_MODE 0 "register_operand" "")
2570 (vec_merge:VEC_ALLREG_MODE
2571 (match_operand:VEC_ALLREG_MODE 1 "gcn_vop3_operand" "")
2572 (match_operand:VEC_ALLREG_MODE 2 "gcn_alu_operand" "")
2573 (match_operand:DI 3 "register_operand" "")))
3d6275e3
AS
2574 (clobber (scratch:V64DI))])]
2575 ""
2576 "")
2577
2b99bed8
AS
2578(define_expand "vcond<VEC_ALL1REG_MODE:mode><VEC_1REG_ALT:mode>"
2579 [(match_operand:VEC_ALL1REG_MODE 0 "register_operand")
2580 (match_operand:VEC_ALL1REG_MODE 1 "gcn_vop3_operand")
2581 (match_operand:VEC_ALL1REG_MODE 2 "gcn_alu_operand")
3d6275e3
AS
2582 (match_operator 3 "comparison_operator"
2583 [(match_operand:VEC_1REG_ALT 4 "gcn_alu_operand")
2584 (match_operand:VEC_1REG_ALT 5 "gcn_vop3_operand")])]
2585 ""
2586 {
2587 rtx tmp = gen_reg_rtx (DImode);
96eb1765
RS
2588 emit_insn (gen_vec_cmp<VEC_1REG_ALT:mode>di
2589 (tmp, operands[3], operands[4], operands[5]));
2b99bed8 2590 emit_insn (gen_vcond_mask_<VEC_ALL1REG_MODE:mode>di
96eb1765 2591 (operands[0], operands[1], operands[2], tmp));
3d6275e3
AS
2592 DONE;
2593 })
2594
2b99bed8
AS
2595(define_expand "vcond<VEC_ALL1REG_MODE:mode><VEC_1REG_ALT:mode>_exec"
2596 [(match_operand:VEC_ALL1REG_MODE 0 "register_operand")
2597 (match_operand:VEC_ALL1REG_MODE 1 "gcn_vop3_operand")
2598 (match_operand:VEC_ALL1REG_MODE 2 "gcn_alu_operand")
3d6275e3
AS
2599 (match_operator 3 "comparison_operator"
2600 [(match_operand:VEC_1REG_ALT 4 "gcn_alu_operand")
2601 (match_operand:VEC_1REG_ALT 5 "gcn_vop3_operand")])
2602 (match_operand:DI 6 "gcn_exec_reg_operand" "e")]
2603 ""
2604 {
2605 rtx tmp = gen_reg_rtx (DImode);
96eb1765
RS
2606 emit_insn (gen_vec_cmp<VEC_1REG_ALT:mode>di_exec
2607 (tmp, operands[3], operands[4], operands[5], operands[6]));
2b99bed8 2608 emit_insn (gen_vcond_mask_<VEC_ALL1REG_MODE:mode>di
96eb1765 2609 (operands[0], operands[1], operands[2], tmp));
3d6275e3
AS
2610 DONE;
2611 })
2612
2b99bed8
AS
2613(define_expand "vcondu<VEC_ALL1REG_MODE:mode><VEC_1REG_INT_ALT:mode>"
2614 [(match_operand:VEC_ALL1REG_MODE 0 "register_operand")
2615 (match_operand:VEC_ALL1REG_MODE 1 "gcn_vop3_operand")
2616 (match_operand:VEC_ALL1REG_MODE 2 "gcn_alu_operand")
3d6275e3
AS
2617 (match_operator 3 "comparison_operator"
2618 [(match_operand:VEC_1REG_INT_ALT 4 "gcn_alu_operand")
2619 (match_operand:VEC_1REG_INT_ALT 5 "gcn_vop3_operand")])]
2620 ""
2621 {
2622 rtx tmp = gen_reg_rtx (DImode);
96eb1765
RS
2623 emit_insn (gen_vec_cmp<VEC_1REG_INT_ALT:mode>di
2624 (tmp, operands[3], operands[4], operands[5]));
2b99bed8 2625 emit_insn (gen_vcond_mask_<VEC_ALL1REG_MODE:mode>di
96eb1765 2626 (operands[0], operands[1], operands[2], tmp));
3d6275e3
AS
2627 DONE;
2628 })
2629
2b99bed8
AS
2630(define_expand "vcondu<VEC_ALL1REG_MODE:mode><VEC_1REG_INT_ALT:mode>_exec"
2631 [(match_operand:VEC_ALL1REG_MODE 0 "register_operand")
2632 (match_operand:VEC_ALL1REG_MODE 1 "gcn_vop3_operand")
2633 (match_operand:VEC_ALL1REG_MODE 2 "gcn_alu_operand")
3d6275e3
AS
2634 (match_operator 3 "comparison_operator"
2635 [(match_operand:VEC_1REG_INT_ALT 4 "gcn_alu_operand")
2636 (match_operand:VEC_1REG_INT_ALT 5 "gcn_vop3_operand")])
2637 (match_operand:DI 6 "gcn_exec_reg_operand" "e")]
2638 ""
2639 {
2640 rtx tmp = gen_reg_rtx (DImode);
96eb1765
RS
2641 emit_insn (gen_vec_cmp<VEC_1REG_INT_ALT:mode>di_exec
2642 (tmp, operands[3], operands[4], operands[5], operands[6]));
2b99bed8 2643 emit_insn (gen_vcond_mask_<VEC_ALL1REG_MODE:mode>di
96eb1765 2644 (operands[0], operands[1], operands[2], tmp));
3d6275e3
AS
2645 DONE;
2646 })
2647
2648;; }}}
2649;; {{{ Fully masked loop support
2650
2651(define_expand "while_ultsidi"
2652 [(match_operand:DI 0 "register_operand")
2653 (match_operand:SI 1 "")
2654 (match_operand:SI 2 "")]
2655 ""
2656 {
2657 if (GET_CODE (operands[1]) != CONST_INT
2658 || GET_CODE (operands[2]) != CONST_INT)
2659 {
2660 rtx _0_1_2_3 = gen_rtx_REG (V64SImode, VGPR_REGNO (1));
2661 rtx tmp = _0_1_2_3;
2662 if (GET_CODE (operands[1]) != CONST_INT
2663 || INTVAL (operands[1]) != 0)
2664 {
2665 tmp = gen_reg_rtx (V64SImode);
2666 emit_insn (gen_addv64si3_dup (tmp, _0_1_2_3, operands[1]));
2667 }
2668 emit_insn (gen_vec_cmpv64sidi_dup (operands[0],
2669 gen_rtx_GT (VOIDmode, 0, 0),
2670 operands[2], tmp));
2671 }
2672 else
2673 {
2674 HOST_WIDE_INT diff = INTVAL (operands[2]) - INTVAL (operands[1]);
2675 HOST_WIDE_INT mask = (diff >= 64 ? -1
2676 : ~((unsigned HOST_WIDE_INT)-1 << diff));
2677 emit_move_insn (operands[0], gen_rtx_CONST_INT (VOIDmode, mask));
2678 }
2679 DONE;
2680 })
2681
2682(define_expand "maskload<mode>di"
2b99bed8
AS
2683 [(match_operand:VEC_ALLREG_MODE 0 "register_operand")
2684 (match_operand:VEC_ALLREG_MODE 1 "memory_operand")
3d6275e3
AS
2685 (match_operand 2 "")]
2686 ""
2687 {
2688 rtx exec = force_reg (DImode, operands[2]);
2689 rtx addr = gcn_expand_scalar_to_vector_address
2690 (<MODE>mode, exec, operands[1], gen_rtx_SCRATCH (V64DImode));
2691 rtx as = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
2692 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
2693 rtx undef = gcn_gen_undef (<MODE>mode);
2694 emit_insn (gen_gather<mode>_expr_exec (operands[0], addr, as, v, undef,
2695 exec));
2696 DONE;
2697 })
2698
2699(define_expand "maskstore<mode>di"
2b99bed8
AS
2700 [(match_operand:VEC_ALLREG_MODE 0 "memory_operand")
2701 (match_operand:VEC_ALLREG_MODE 1 "register_operand")
3d6275e3
AS
2702 (match_operand 2 "")]
2703 ""
2704 {
2705 rtx exec = force_reg (DImode, operands[2]);
2706 rtx addr = gcn_expand_scalar_to_vector_address
2707 (<MODE>mode, exec, operands[0], gen_rtx_SCRATCH (V64DImode));
2708 rtx as = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
2709 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
2710 emit_insn (gen_scatter<mode>_expr_exec (addr, operands[1], as, v, exec));
2711 DONE;
2712 })
2713
2714(define_expand "mask_gather_load<mode>"
2b99bed8 2715 [(match_operand:VEC_ALLREG_MODE 0 "register_operand")
3d6275e3
AS
2716 (match_operand:DI 1 "register_operand")
2717 (match_operand 2 "register_operand")
2718 (match_operand 3 "immediate_operand")
2719 (match_operand:SI 4 "gcn_alu_operand")
2720 (match_operand:DI 5 "")]
2721 ""
2722 {
2723 rtx exec = force_reg (DImode, operands[5]);
2724
2725 /* TODO: more conversions will be needed when more types are vectorized. */
2726 if (GET_MODE (operands[2]) == V64DImode)
2727 {
2728 rtx tmp = gen_reg_rtx (V64SImode);
2729 emit_insn (gen_vec_truncatev64div64si_exec (tmp, operands[2],
2730 gcn_gen_undef (V64SImode),
2731 exec));
2732 operands[2] = tmp;
2733 }
2734
2735 emit_insn (gen_gather<mode>_exec (operands[0], operands[1], operands[2],
2736 operands[3], operands[4], exec));
2737 DONE;
2738 })
2739
2740(define_expand "mask_scatter_store<mode>"
2741 [(match_operand:DI 0 "register_operand")
2742 (match_operand 1 "register_operand")
2743 (match_operand 2 "immediate_operand")
2744 (match_operand:SI 3 "gcn_alu_operand")
2b99bed8 2745 (match_operand:VEC_ALLREG_MODE 4 "register_operand")
3d6275e3
AS
2746 (match_operand:DI 5 "")]
2747 ""
2748 {
2749 rtx exec = force_reg (DImode, operands[5]);
2750
2751 /* TODO: more conversions will be needed when more types are vectorized. */
2752 if (GET_MODE (operands[1]) == V64DImode)
2753 {
2754 rtx tmp = gen_reg_rtx (V64SImode);
2755 emit_insn (gen_vec_truncatev64div64si_exec (tmp, operands[1],
2756 gcn_gen_undef (V64SImode),
2757 exec));
2758 operands[1] = tmp;
2759 }
2760
2761 emit_insn (gen_scatter<mode>_exec (operands[0], operands[1], operands[2],
2762 operands[3], operands[4], exec));
2763 DONE;
2764 })
2765
2766; FIXME this should be VEC_REG_MODE, but not all dependencies are implemented.
2767(define_mode_iterator COND_MODE [V64SI V64DI V64SF V64DF])
2768(define_mode_iterator COND_INT_MODE [V64SI V64DI])
2769
2770(define_code_iterator cond_op [plus minus])
2771
2772(define_expand "cond_<expander><mode>"
2773 [(match_operand:COND_MODE 0 "register_operand")
2774 (match_operand:DI 1 "register_operand")
2775 (cond_op:COND_MODE
2776 (match_operand:COND_MODE 2 "gcn_alu_operand")
2777 (match_operand:COND_MODE 3 "gcn_alu_operand"))
2778 (match_operand:COND_MODE 4 "register_operand")]
2779 ""
2780 {
2781 operands[1] = force_reg (DImode, operands[1]);
2782 operands[2] = force_reg (<MODE>mode, operands[2]);
2783
2784 emit_insn (gen_<expander><mode>3_exec (operands[0], operands[2],
2785 operands[3], operands[4],
2786 operands[1]));
2787 DONE;
2788 })
2789
2790(define_code_iterator cond_bitop [and ior xor])
2791
2792(define_expand "cond_<expander><mode>"
2793 [(match_operand:COND_INT_MODE 0 "register_operand")
2794 (match_operand:DI 1 "register_operand")
2795 (cond_bitop:COND_INT_MODE
2796 (match_operand:COND_INT_MODE 2 "gcn_alu_operand")
2797 (match_operand:COND_INT_MODE 3 "gcn_alu_operand"))
2798 (match_operand:COND_INT_MODE 4 "register_operand")]
2799 ""
2800 {
2801 operands[1] = force_reg (DImode, operands[1]);
2802 operands[2] = force_reg (<MODE>mode, operands[2]);
2803
2804 emit_insn (gen_<expander><mode>3_exec (operands[0], operands[2],
2805 operands[3], operands[4],
2806 operands[1]));
2807 DONE;
2808 })
2809
2810;; }}}
2811;; {{{ Vector reductions
2812
2813(define_int_iterator REDUC_UNSPEC [UNSPEC_SMIN_DPP_SHR UNSPEC_SMAX_DPP_SHR
2814 UNSPEC_UMIN_DPP_SHR UNSPEC_UMAX_DPP_SHR
2815 UNSPEC_PLUS_DPP_SHR
2816 UNSPEC_AND_DPP_SHR
2817 UNSPEC_IOR_DPP_SHR UNSPEC_XOR_DPP_SHR])
2818
2819(define_int_iterator REDUC_2REG_UNSPEC [UNSPEC_PLUS_DPP_SHR
2820 UNSPEC_AND_DPP_SHR
2821 UNSPEC_IOR_DPP_SHR UNSPEC_XOR_DPP_SHR])
2822
2823; FIXME: Isn't there a better way of doing this?
2824(define_int_attr reduc_unspec [(UNSPEC_SMIN_DPP_SHR "UNSPEC_SMIN_DPP_SHR")
2825 (UNSPEC_SMAX_DPP_SHR "UNSPEC_SMAX_DPP_SHR")
2826 (UNSPEC_UMIN_DPP_SHR "UNSPEC_UMIN_DPP_SHR")
2827 (UNSPEC_UMAX_DPP_SHR "UNSPEC_UMAX_DPP_SHR")
2828 (UNSPEC_PLUS_DPP_SHR "UNSPEC_PLUS_DPP_SHR")
2829 (UNSPEC_AND_DPP_SHR "UNSPEC_AND_DPP_SHR")
2830 (UNSPEC_IOR_DPP_SHR "UNSPEC_IOR_DPP_SHR")
2831 (UNSPEC_XOR_DPP_SHR "UNSPEC_XOR_DPP_SHR")])
2832
2833(define_int_attr reduc_op [(UNSPEC_SMIN_DPP_SHR "smin")
2834 (UNSPEC_SMAX_DPP_SHR "smax")
2835 (UNSPEC_UMIN_DPP_SHR "umin")
2836 (UNSPEC_UMAX_DPP_SHR "umax")
2837 (UNSPEC_PLUS_DPP_SHR "plus")
2838 (UNSPEC_AND_DPP_SHR "and")
2839 (UNSPEC_IOR_DPP_SHR "ior")
2840 (UNSPEC_XOR_DPP_SHR "xor")])
2841
2842(define_int_attr reduc_insn [(UNSPEC_SMIN_DPP_SHR "v_min%i0")
2843 (UNSPEC_SMAX_DPP_SHR "v_max%i0")
2844 (UNSPEC_UMIN_DPP_SHR "v_min%u0")
2845 (UNSPEC_UMAX_DPP_SHR "v_max%u0")
2846 (UNSPEC_PLUS_DPP_SHR "v_add%u0")
2847 (UNSPEC_AND_DPP_SHR "v_and%b0")
2848 (UNSPEC_IOR_DPP_SHR "v_or%b0")
2849 (UNSPEC_XOR_DPP_SHR "v_xor%b0")])
2850
2851(define_expand "reduc_<reduc_op>_scal_<mode>"
2852 [(set (match_operand:<SCALAR_MODE> 0 "register_operand")
2853 (unspec:<SCALAR_MODE>
2854 [(match_operand:VEC_1REG_MODE 1 "register_operand")]
2855 REDUC_UNSPEC))]
2856 ""
2857 {
2858 rtx tmp = gcn_expand_reduc_scalar (<MODE>mode, operands[1],
2859 <reduc_unspec>);
2860
2861 /* The result of the reduction is in lane 63 of tmp. */
2862 emit_insn (gen_mov_from_lane63_<mode> (operands[0], tmp));
2863
2864 DONE;
2865 })
2866
2867(define_expand "reduc_<reduc_op>_scal_v64di"
2868 [(set (match_operand:DI 0 "register_operand")
2869 (unspec:DI
2870 [(match_operand:V64DI 1 "register_operand")]
2871 REDUC_2REG_UNSPEC))]
2872 ""
2873 {
2874 rtx tmp = gcn_expand_reduc_scalar (V64DImode, operands[1],
2875 <reduc_unspec>);
2876
2877 /* The result of the reduction is in lane 63 of tmp. */
2878 emit_insn (gen_mov_from_lane63_v64di (operands[0], tmp));
2879
2880 DONE;
2881 })
2882
2883(define_insn "*<reduc_op>_dpp_shr_<mode>"
2884 [(set (match_operand:VEC_1REG_MODE 0 "register_operand" "=v")
2885 (unspec:VEC_1REG_MODE
2886 [(match_operand:VEC_1REG_MODE 1 "register_operand" "v")
2887 (match_operand:VEC_1REG_MODE 2 "register_operand" "v")
2888 (match_operand:SI 3 "const_int_operand" "n")]
2889 REDUC_UNSPEC))]
2890 "!(TARGET_GCN3 && SCALAR_INT_MODE_P (<SCALAR_MODE>mode)
2891 && <reduc_unspec> == UNSPEC_PLUS_DPP_SHR)"
2892 {
2893 return gcn_expand_dpp_shr_insn (<MODE>mode, "<reduc_insn>",
2894 <reduc_unspec>, INTVAL (operands[3]));
2895 }
2896 [(set_attr "type" "vop_dpp")
2897 (set_attr "length" "8")])
2898
2899(define_insn_and_split "*<reduc_op>_dpp_shr_v64di"
2900 [(set (match_operand:V64DI 0 "register_operand" "=&v")
2901 (unspec:V64DI
2902 [(match_operand:V64DI 1 "register_operand" "v0")
2903 (match_operand:V64DI 2 "register_operand" "v0")
2904 (match_operand:SI 3 "const_int_operand" "n")]
2905 REDUC_2REG_UNSPEC))]
2906 ""
2907 "#"
2908 "reload_completed"
2909 [(set (match_dup 4)
2910 (unspec:V64SI
2911 [(match_dup 6) (match_dup 8) (match_dup 3)] REDUC_2REG_UNSPEC))
2912 (set (match_dup 5)
2913 (unspec:V64SI
2914 [(match_dup 7) (match_dup 9) (match_dup 3)] REDUC_2REG_UNSPEC))]
2915 {
2916 operands[4] = gcn_operand_part (V64DImode, operands[0], 0);
2917 operands[5] = gcn_operand_part (V64DImode, operands[0], 1);
2918 operands[6] = gcn_operand_part (V64DImode, operands[1], 0);
2919 operands[7] = gcn_operand_part (V64DImode, operands[1], 1);
2920 operands[8] = gcn_operand_part (V64DImode, operands[2], 0);
2921 operands[9] = gcn_operand_part (V64DImode, operands[2], 1);
2922 }
2923 [(set_attr "type" "vmult")
2924 (set_attr "length" "16")])
2925
2926; Special cases for addition.
2927
2928(define_insn "*plus_carry_dpp_shr_<mode>"
2929 [(set (match_operand:VEC_1REG_INT_MODE 0 "register_operand" "=v")
2930 (unspec:VEC_1REG_INT_MODE
2931 [(match_operand:VEC_1REG_INT_MODE 1 "register_operand" "v")
2932 (match_operand:VEC_1REG_INT_MODE 2 "register_operand" "v")
2933 (match_operand:SI 3 "const_int_operand" "n")]
2934 UNSPEC_PLUS_CARRY_DPP_SHR))
2935 (clobber (reg:DI VCC_REG))]
2936 ""
2937 {
2938 const char *insn = TARGET_GCN3 ? "v_add%u0" : "v_add_co%u0";
2939 return gcn_expand_dpp_shr_insn (<MODE>mode, insn,
2940 UNSPEC_PLUS_CARRY_DPP_SHR,
2941 INTVAL (operands[3]));
2942 }
2943 [(set_attr "type" "vop_dpp")
2944 (set_attr "length" "8")])
2945
2946(define_insn "*plus_carry_in_dpp_shr_v64si"
2947 [(set (match_operand:V64SI 0 "register_operand" "=v")
2948 (unspec:V64SI
2949 [(match_operand:V64SI 1 "register_operand" "v")
2950 (match_operand:V64SI 2 "register_operand" "v")
2951 (match_operand:SI 3 "const_int_operand" "n")
2952 (match_operand:DI 4 "register_operand" "cV")]
2953 UNSPEC_PLUS_CARRY_IN_DPP_SHR))
2954 (clobber (reg:DI VCC_REG))]
2955 ""
2956 {
2957 const char *insn = TARGET_GCN3 ? "v_addc%u0" : "v_addc_co%u0";
2958 return gcn_expand_dpp_shr_insn (V64SImode, insn,
2959 UNSPEC_PLUS_CARRY_IN_DPP_SHR,
2960 INTVAL (operands[3]));
2961 }
2962 [(set_attr "type" "vop_dpp")
2963 (set_attr "length" "8")])
2964
2965(define_insn_and_split "*plus_carry_dpp_shr_v64di"
2966 [(set (match_operand:V64DI 0 "register_operand" "=&v")
2967 (unspec:V64DI
2968 [(match_operand:V64DI 1 "register_operand" "v0")
2969 (match_operand:V64DI 2 "register_operand" "v0")
2970 (match_operand:SI 3 "const_int_operand" "n")]
2971 UNSPEC_PLUS_CARRY_DPP_SHR))
2972 (clobber (reg:DI VCC_REG))]
2973 ""
2974 "#"
2975 "reload_completed"
2976 [(parallel [(set (match_dup 4)
2977 (unspec:V64SI
2978 [(match_dup 6) (match_dup 8) (match_dup 3)]
2979 UNSPEC_PLUS_CARRY_DPP_SHR))
2980 (clobber (reg:DI VCC_REG))])
2981 (parallel [(set (match_dup 5)
2982 (unspec:V64SI
2983 [(match_dup 7) (match_dup 9) (match_dup 3) (reg:DI VCC_REG)]
2984 UNSPEC_PLUS_CARRY_IN_DPP_SHR))
2985 (clobber (reg:DI VCC_REG))])]
2986 {
2987 operands[4] = gcn_operand_part (V64DImode, operands[0], 0);
2988 operands[5] = gcn_operand_part (V64DImode, operands[0], 1);
2989 operands[6] = gcn_operand_part (V64DImode, operands[1], 0);
2990 operands[7] = gcn_operand_part (V64DImode, operands[1], 1);
2991 operands[8] = gcn_operand_part (V64DImode, operands[2], 0);
2992 operands[9] = gcn_operand_part (V64DImode, operands[2], 1);
2993 }
2994 [(set_attr "type" "vmult")
2995 (set_attr "length" "16")])
2996
2997; Instructions to move a scalar value from lane 63 of a vector register.
2998(define_insn "mov_from_lane63_<mode>"
2b99bed8 2999 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg,v")
3d6275e3 3000 (unspec:<SCALAR_MODE>
2b99bed8 3001 [(match_operand:VEC_ALL1REG_MODE 1 "register_operand" "v,v")]
3d6275e3
AS
3002 UNSPEC_MOV_FROM_LANE63))]
3003 ""
3004 "@
3005 v_readlane_b32\t%0, %1, 63
3006 v_mov_b32\t%0, %1 wave_ror:1"
3007 [(set_attr "type" "vop3a,vop_dpp")
3008 (set_attr "exec" "none,*")
3009 (set_attr "length" "8")])
3010
3011(define_insn "mov_from_lane63_v64di"
3012 [(set (match_operand:DI 0 "register_operand" "=Sg,v")
3013 (unspec:DI
3014 [(match_operand:V64DI 1 "register_operand" "v,v")]
3015 UNSPEC_MOV_FROM_LANE63))]
3016 ""
3017 "@
3018 v_readlane_b32\t%L0, %L1, 63\;v_readlane_b32\t%H0, %H1, 63
3019 * if (REGNO (operands[0]) <= REGNO (operands[1])) \
3020 return \"v_mov_b32\t%L0, %L1 wave_ror:1\;\" \
3021 \"v_mov_b32\t%H0, %H1 wave_ror:1\"; \
3022 else \
3023 return \"v_mov_b32\t%H0, %H1 wave_ror:1\;\" \
3024 \"v_mov_b32\t%L0, %L1 wave_ror:1\";"
3025 [(set_attr "type" "vop3a,vop_dpp")
3026 (set_attr "exec" "none,*")
3027 (set_attr "length" "8")])
3028
3029;; }}}
3030;; {{{ Miscellaneous
3031
3032(define_expand "vec_seriesv64si"
3033 [(match_operand:V64SI 0 "register_operand")
3034 (match_operand:SI 1 "gcn_alu_operand")
3035 (match_operand:SI 2 "gcn_alu_operand")]
3036 ""
3037 {
3038 rtx tmp = gen_reg_rtx (V64SImode);
3039 rtx v1 = gen_rtx_REG (V64SImode, VGPR_REGNO (1));
3040
3041 emit_insn (gen_mulv64si3_dup (tmp, v1, operands[2]));
3042 emit_insn (gen_addv64si3_dup (operands[0], tmp, operands[1]));
3043 DONE;
3044 })
3045
3046(define_expand "vec_seriesv64di"
3047 [(match_operand:V64DI 0 "register_operand")
3048 (match_operand:DI 1 "gcn_alu_operand")
3049 (match_operand:DI 2 "gcn_alu_operand")]
3050 ""
3051 {
3052 rtx tmp = gen_reg_rtx (V64DImode);
3053 rtx v1 = gen_rtx_REG (V64SImode, VGPR_REGNO (1));
3054
3055 emit_insn (gen_mulv64di3_zext_dup2 (tmp, v1, operands[2]));
3056 emit_insn (gen_addv64di3_dup (operands[0], tmp, operands[1]));
3057 DONE;
3058 })
3059
3060;; }}}