]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/gcn/gcn-valu.md
Update copyright years.
[thirdparty/gcc.git] / gcc / config / gcn / gcn-valu.md
1 ;; Copyright (C) 2016-2020 Free Software Foundation, Inc.
2
3 ;; This file is free software; you can redistribute it and/or modify it under
4 ;; the terms of the GNU General Public License as published by the Free
5 ;; Software Foundation; either version 3 of the License, or (at your option)
6 ;; any later version.
7
8 ;; This file is distributed in the hope that it will be useful, but WITHOUT
9 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 ;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 ;; for more details.
12
13 ;; You should have received a copy of the GNU General Public License
14 ;; along with GCC; see the file COPYING3. If not see
15 ;; <http://www.gnu.org/licenses/>.
16
17 ;; {{{ Vector iterators
18
19 ; Vector modes for one vector register
20 (define_mode_iterator VEC_1REG_MODE
21 [V64SI V64HF V64SF])
22 (define_mode_iterator VEC_1REG_ALT
23 [V64SI V64HF V64SF])
24 (define_mode_iterator VEC_ALL1REG_MODE
25 [V64QI V64HI V64SI V64HF V64SF])
26
27 (define_mode_iterator VEC_1REG_INT_MODE
28 [V64SI])
29 (define_mode_iterator VEC_1REG_INT_ALT
30 [V64SI])
31 (define_mode_iterator VEC_ALL1REG_INT_MODE
32 [V64QI V64HI V64SI])
33 (define_mode_iterator VEC_ALL1REG_INT_ALT
34 [V64QI V64HI V64SI])
35
36 ; Vector modes for two vector registers
37 (define_mode_iterator VEC_2REG_MODE
38 [V64DI V64DF])
39
40 ; All of above
41 (define_mode_iterator VEC_REG_MODE
42 [V64SI V64HF V64SF ; Single reg
43 V64DI V64DF]) ; Double reg
44 (define_mode_iterator VEC_ALLREG_MODE
45 [V64QI V64HI V64SI V64HF V64SF ; Single reg
46 V64DI V64DF]) ; Double reg
47
48 (define_mode_attr scalar_mode
49 [(V64QI "qi") (V64HI "hi") (V64SI "si")
50 (V64HF "hf") (V64SF "sf") (V64DI "di") (V64DF "df")])
51
52 (define_mode_attr SCALAR_MODE
53 [(V64QI "QI") (V64HI "HI") (V64SI "SI")
54 (V64HF "HF") (V64SF "SF") (V64DI "DI") (V64DF "DF")])
55
56 (define_mode_attr sdwa [(V64QI "BYTE_0") (V64HI "WORD_0") (V64SI "DWORD")])
57
58 ;; }}}
59 ;; {{{ Substitutions
60
61 (define_subst_attr "exec" "vec_merge"
62 "" "_exec")
63 (define_subst_attr "exec_clobber" "vec_merge_with_clobber"
64 "" "_exec")
65 (define_subst_attr "exec_vcc" "vec_merge_with_vcc"
66 "" "_exec")
67 (define_subst_attr "exec_scatter" "scatter_store"
68 "" "_exec")
69
70 (define_subst "vec_merge"
71 [(set (match_operand:VEC_ALLREG_MODE 0)
72 (match_operand:VEC_ALLREG_MODE 1))]
73 ""
74 [(set (match_dup 0)
75 (vec_merge:VEC_ALLREG_MODE
76 (match_dup 1)
77 (match_operand:VEC_ALLREG_MODE 3
78 "gcn_register_or_unspec_operand" "U0")
79 (match_operand:DI 4 "gcn_exec_reg_operand" "e")))])
80
81 (define_subst "vec_merge_with_clobber"
82 [(set (match_operand:VEC_ALLREG_MODE 0)
83 (match_operand:VEC_ALLREG_MODE 1))
84 (clobber (match_operand 2))]
85 ""
86 [(set (match_dup 0)
87 (vec_merge:VEC_ALLREG_MODE
88 (match_dup 1)
89 (match_operand:VEC_ALLREG_MODE 3
90 "gcn_register_or_unspec_operand" "U0")
91 (match_operand:DI 4 "gcn_exec_reg_operand" "e")))
92 (clobber (match_dup 2))])
93
94 (define_subst "vec_merge_with_vcc"
95 [(set (match_operand:VEC_ALLREG_MODE 0)
96 (match_operand:VEC_ALLREG_MODE 1))
97 (set (match_operand:DI 2)
98 (match_operand:DI 3))]
99 ""
100 [(parallel
101 [(set (match_dup 0)
102 (vec_merge:VEC_ALLREG_MODE
103 (match_dup 1)
104 (match_operand:VEC_ALLREG_MODE 4
105 "gcn_register_or_unspec_operand" "U0")
106 (match_operand:DI 5 "gcn_exec_reg_operand" "e")))
107 (set (match_dup 2)
108 (and:DI (match_dup 3)
109 (reg:DI EXEC_REG)))])])
110
111 (define_subst "scatter_store"
112 [(set (mem:BLK (scratch))
113 (unspec:BLK
114 [(match_operand 0)
115 (match_operand 1)
116 (match_operand 2)
117 (match_operand 3)]
118 UNSPEC_SCATTER))]
119 ""
120 [(set (mem:BLK (scratch))
121 (unspec:BLK
122 [(match_dup 0)
123 (match_dup 1)
124 (match_dup 2)
125 (match_dup 3)
126 (match_operand:DI 4 "gcn_exec_reg_operand" "e")]
127 UNSPEC_SCATTER))])
128
129 ;; }}}
130 ;; {{{ Vector moves
131
132 ; This is the entry point for all vector register moves. Memory accesses can
133 ; come this way also, but will more usually use the reload_in/out,
134 ; gather/scatter, maskload/store, etc.
135
136 (define_expand "mov<mode>"
137 [(set (match_operand:VEC_ALLREG_MODE 0 "nonimmediate_operand")
138 (match_operand:VEC_ALLREG_MODE 1 "general_operand"))]
139 ""
140 {
141 if (MEM_P (operands[0]) && !lra_in_progress && !reload_completed)
142 {
143 operands[1] = force_reg (<MODE>mode, operands[1]);
144 rtx scratch = gen_rtx_SCRATCH (V64DImode);
145 rtx a = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
146 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
147 rtx expr = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
148 operands[0],
149 scratch);
150 emit_insn (gen_scatter<mode>_expr (expr, operands[1], a, v));
151 DONE;
152 }
153 else if (MEM_P (operands[1]) && !lra_in_progress && !reload_completed)
154 {
155 rtx scratch = gen_rtx_SCRATCH (V64DImode);
156 rtx a = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
157 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
158 rtx expr = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
159 operands[1],
160 scratch);
161 emit_insn (gen_gather<mode>_expr (operands[0], expr, a, v));
162 DONE;
163 }
164 else if ((MEM_P (operands[0]) || MEM_P (operands[1])))
165 {
166 gcc_assert (!reload_completed);
167 rtx scratch = gen_reg_rtx (V64DImode);
168 emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], scratch));
169 DONE;
170 }
171 })
172
173 ; A pseudo instruction that helps LRA use the "U0" constraint.
174
175 (define_insn "mov<mode>_unspec"
176 [(set (match_operand:VEC_ALLREG_MODE 0 "nonimmediate_operand" "=v")
177 (match_operand:VEC_ALLREG_MODE 1 "gcn_unspec_operand" " U"))]
178 ""
179 ""
180 [(set_attr "type" "unknown")
181 (set_attr "length" "0")])
182
183 (define_insn "*mov<mode>"
184 [(set (match_operand:VEC_ALL1REG_MODE 0 "nonimmediate_operand" "=v,v")
185 (match_operand:VEC_ALL1REG_MODE 1 "general_operand" "vA,B"))]
186 ""
187 "v_mov_b32\t%0, %1"
188 [(set_attr "type" "vop1,vop1")
189 (set_attr "length" "4,8")])
190
191 (define_insn "mov<mode>_exec"
192 [(set (match_operand:VEC_ALL1REG_MODE 0 "nonimmediate_operand"
193 "=v, v, v, v, v, m")
194 (vec_merge:VEC_ALL1REG_MODE
195 (match_operand:VEC_ALL1REG_MODE 1 "general_operand"
196 "vA, B, v,vA, m, v")
197 (match_operand:VEC_ALL1REG_MODE 3 "gcn_alu_or_unspec_operand"
198 "U0,U0,vA,vA,U0,U0")
199 (match_operand:DI 2 "register_operand" " e, e,cV,Sv, e, e")))
200 (clobber (match_scratch:V64DI 4 "=X, X, X, X,&v,&v"))]
201 "!MEM_P (operands[0]) || REG_P (operands[1])"
202 "@
203 v_mov_b32\t%0, %1
204 v_mov_b32\t%0, %1
205 v_cndmask_b32\t%0, %3, %1, vcc
206 v_cndmask_b32\t%0, %3, %1, %2
207 #
208 #"
209 [(set_attr "type" "vop1,vop1,vop2,vop3a,*,*")
210 (set_attr "length" "4,8,4,8,16,16")])
211
212 ; This variant does not accept an unspec, but does permit MEM
213 ; read/modify/write which is necessary for maskstore.
214
215 ;(define_insn "*mov<mode>_exec_match"
216 ; [(set (match_operand:VEC_ALL1REG_MODE 0 "nonimmediate_operand"
217 ; "=v,v, v, m")
218 ; (vec_merge:VEC_ALL1REG_MODE
219 ; (match_operand:VEC_ALL1REG_MODE 1 "general_operand" "vA,B, m, v")
220 ; (match_dup 0)
221 ; (match_operand:DI 2 "gcn_exec_reg_operand" " e,e, e, e")))
222 ; (clobber (match_scratch:V64DI 3 "=X,X,&v,&v"))]
223 ; "!MEM_P (operands[0]) || REG_P (operands[1])"
224 ; "@
225 ; v_mov_b32\t%0, %1
226 ; v_mov_b32\t%0, %1
227 ; #
228 ; #"
229 ; [(set_attr "type" "vop1,vop1,*,*")
230 ; (set_attr "length" "4,8,16,16")])
231
232 (define_insn "*mov<mode>"
233 [(set (match_operand:VEC_2REG_MODE 0 "nonimmediate_operand" "=v")
234 (match_operand:VEC_2REG_MODE 1 "general_operand" "vDB"))]
235 ""
236 {
237 if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1]))
238 return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1";
239 else
240 return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1";
241 }
242 [(set_attr "type" "vmult")
243 (set_attr "length" "16")])
244
245 (define_insn "mov<mode>_exec"
246 [(set (match_operand:VEC_2REG_MODE 0 "nonimmediate_operand"
247 "= v, v, v, v, m")
248 (vec_merge:VEC_2REG_MODE
249 (match_operand:VEC_2REG_MODE 1 "general_operand"
250 "vDB, v0, v0, m, v")
251 (match_operand:VEC_2REG_MODE 3 "gcn_alu_or_unspec_operand"
252 " U0,vDA0,vDA0,U0,U0")
253 (match_operand:DI 2 "register_operand" " e, cV, Sv, e, e")))
254 (clobber (match_scratch:V64DI 4 "= X, X, X,&v,&v"))]
255 "!MEM_P (operands[0]) || REG_P (operands[1])"
256 {
257 if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1]))
258 switch (which_alternative)
259 {
260 case 0:
261 return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1";
262 case 1:
263 return "v_cndmask_b32\t%L0, %L3, %L1, vcc\;"
264 "v_cndmask_b32\t%H0, %H3, %H1, vcc";
265 case 2:
266 return "v_cndmask_b32\t%L0, %L3, %L1, %2\;"
267 "v_cndmask_b32\t%H0, %H3, %H1, %2";
268 }
269 else
270 switch (which_alternative)
271 {
272 case 0:
273 return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1";
274 case 1:
275 return "v_cndmask_b32\t%H0, %H3, %H1, vcc\;"
276 "v_cndmask_b32\t%L0, %L3, %L1, vcc";
277 case 2:
278 return "v_cndmask_b32\t%H0, %H3, %H1, %2\;"
279 "v_cndmask_b32\t%L0, %L3, %L1, %2";
280 }
281
282 return "#";
283 }
284 [(set_attr "type" "vmult,vmult,vmult,*,*")
285 (set_attr "length" "16,16,16,16,16")])
286
287 ; This variant does not accept an unspec, but does permit MEM
288 ; read/modify/write which is necessary for maskstore.
289
290 ;(define_insn "*mov<mode>_exec_match"
291 ; [(set (match_operand:VEC_2REG_MODE 0 "nonimmediate_operand" "=v, v, m")
292 ; (vec_merge:VEC_2REG_MODE
293 ; (match_operand:VEC_2REG_MODE 1 "general_operand" "vDB, m, v")
294 ; (match_dup 0)
295 ; (match_operand:DI 2 "gcn_exec_reg_operand" " e, e, e")))
296 ; (clobber (match_scratch:V64DI 3 "=X,&v,&v"))]
297 ; "!MEM_P (operands[0]) || REG_P (operands[1])"
298 ; "@
299 ; * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \
300 ; return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \
301 ; else \
302 ; return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\";
303 ; #
304 ; #"
305 ; [(set_attr "type" "vmult,*,*")
306 ; (set_attr "length" "16,16,16")])
307
308 ; A SGPR-base load looks like:
309 ; <load> v, Sv
310 ;
311 ; There's no hardware instruction that corresponds to this, but vector base
312 ; addresses are placed in an SGPR because it is easier to add to a vector.
313 ; We also have a temporary vT, and the vector v1 holding numbered lanes.
314 ;
315 ; Rewrite as:
316 ; vT = v1 << log2(element-size)
317 ; vT += Sv
318 ; flat_load v, vT
319
320 (define_insn "mov<mode>_sgprbase"
321 [(set (match_operand:VEC_ALL1REG_MODE 0 "nonimmediate_operand"
322 "= v, v, v, m")
323 (unspec:VEC_ALL1REG_MODE
324 [(match_operand:VEC_ALL1REG_MODE 1 "general_operand"
325 " vA,vB, m, v")]
326 UNSPEC_SGPRBASE))
327 (clobber (match_operand:V64DI 2 "register_operand" "=&v,&v,&v,&v"))]
328 "lra_in_progress || reload_completed"
329 "@
330 v_mov_b32\t%0, %1
331 v_mov_b32\t%0, %1
332 #
333 #"
334 [(set_attr "type" "vop1,vop1,*,*")
335 (set_attr "length" "4,8,12,12")])
336
337 (define_insn "mov<mode>_sgprbase"
338 [(set (match_operand:VEC_2REG_MODE 0 "nonimmediate_operand" "= v, v, m")
339 (unspec:VEC_2REG_MODE
340 [(match_operand:VEC_2REG_MODE 1 "general_operand" "vDB, m, v")]
341 UNSPEC_SGPRBASE))
342 (clobber (match_operand:V64DI 2 "register_operand" "=&v,&v,&v"))]
343 "lra_in_progress || reload_completed"
344 "@
345 * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \
346 return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \
347 else \
348 return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\";
349 #
350 #"
351 [(set_attr "type" "vmult,*,*")
352 (set_attr "length" "8,12,12")])
353
354 ; reload_in was once a standard name, but here it's only referenced by
355 ; gcn_secondary_reload. It allows a reload with a scratch register.
356
357 (define_expand "reload_in<mode>"
358 [(set (match_operand:VEC_ALLREG_MODE 0 "register_operand" "= v")
359 (match_operand:VEC_ALLREG_MODE 1 "memory_operand" " m"))
360 (clobber (match_operand:V64DI 2 "register_operand" "=&v"))]
361 ""
362 {
363 emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], operands[2]));
364 DONE;
365 })
366
367 ; reload_out is similar to reload_in, above.
368
369 (define_expand "reload_out<mode>"
370 [(set (match_operand:VEC_ALLREG_MODE 0 "memory_operand" "= m")
371 (match_operand:VEC_ALLREG_MODE 1 "register_operand" " v"))
372 (clobber (match_operand:V64DI 2 "register_operand" "=&v"))]
373 ""
374 {
375 emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], operands[2]));
376 DONE;
377 })
378
379 ; Expand scalar addresses into gather/scatter patterns
380
381 (define_split
382 [(set (match_operand:VEC_ALLREG_MODE 0 "memory_operand")
383 (unspec:VEC_ALLREG_MODE
384 [(match_operand:VEC_ALLREG_MODE 1 "general_operand")]
385 UNSPEC_SGPRBASE))
386 (clobber (match_scratch:V64DI 2))]
387 ""
388 [(set (mem:BLK (scratch))
389 (unspec:BLK [(match_dup 5) (match_dup 1) (match_dup 6) (match_dup 7)]
390 UNSPEC_SCATTER))]
391 {
392 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
393 operands[0],
394 operands[2]);
395 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
396 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
397 })
398
399 (define_split
400 [(set (match_operand:VEC_ALLREG_MODE 0 "memory_operand")
401 (vec_merge:VEC_ALLREG_MODE
402 (match_operand:VEC_ALLREG_MODE 1 "general_operand")
403 (match_operand:VEC_ALLREG_MODE 2 "")
404 (match_operand:DI 3 "gcn_exec_reg_operand")))
405 (clobber (match_scratch:V64DI 4))]
406 ""
407 [(set (mem:BLK (scratch))
408 (unspec:BLK [(match_dup 5) (match_dup 1)
409 (match_dup 6) (match_dup 7) (match_dup 3)]
410 UNSPEC_SCATTER))]
411 {
412 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode,
413 operands[3],
414 operands[0],
415 operands[4]);
416 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
417 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
418 })
419
420 (define_split
421 [(set (match_operand:VEC_ALLREG_MODE 0 "nonimmediate_operand")
422 (unspec:VEC_ALLREG_MODE
423 [(match_operand:VEC_ALLREG_MODE 1 "memory_operand")]
424 UNSPEC_SGPRBASE))
425 (clobber (match_scratch:V64DI 2))]
426 ""
427 [(set (match_dup 0)
428 (unspec:VEC_ALLREG_MODE [(match_dup 5) (match_dup 6) (match_dup 7)
429 (mem:BLK (scratch))]
430 UNSPEC_GATHER))]
431 {
432 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
433 operands[1],
434 operands[2]);
435 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
436 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
437 })
438
439 (define_split
440 [(set (match_operand:VEC_ALLREG_MODE 0 "nonimmediate_operand")
441 (vec_merge:VEC_ALLREG_MODE
442 (match_operand:VEC_ALLREG_MODE 1 "memory_operand")
443 (match_operand:VEC_ALLREG_MODE 2 "")
444 (match_operand:DI 3 "gcn_exec_reg_operand")))
445 (clobber (match_scratch:V64DI 4))]
446 ""
447 [(set (match_dup 0)
448 (vec_merge:VEC_ALLREG_MODE
449 (unspec:VEC_ALLREG_MODE [(match_dup 5) (match_dup 6) (match_dup 7)
450 (mem:BLK (scratch))]
451 UNSPEC_GATHER)
452 (match_dup 2)
453 (match_dup 3)))]
454 {
455 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode,
456 operands[3],
457 operands[1],
458 operands[4]);
459 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
460 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
461 })
462
463 ; TODO: Add zero/sign extending variants.
464
465 ;; }}}
466 ;; {{{ Lane moves
467
468 ; v_writelane and v_readlane work regardless of exec flags.
469 ; We allow source to be scratch.
470 ;
471 ; FIXME these should take A immediates
472
473 (define_insn "*vec_set<mode>"
474 [(set (match_operand:VEC_ALL1REG_MODE 0 "register_operand" "= v")
475 (vec_merge:VEC_ALL1REG_MODE
476 (vec_duplicate:VEC_ALL1REG_MODE
477 (match_operand:<SCALAR_MODE> 1 "register_operand" " Sv"))
478 (match_operand:VEC_ALL1REG_MODE 3 "gcn_register_or_unspec_operand"
479 " U0")
480 (ashift (const_int 1)
481 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
482 ""
483 "v_writelane_b32 %0, %1, %2"
484 [(set_attr "type" "vop3a")
485 (set_attr "length" "8")
486 (set_attr "exec" "none")
487 (set_attr "laneselect" "yes")])
488
489 ; FIXME: 64bit operations really should be splitters, but I am not sure how
490 ; to represent vertical subregs.
491 (define_insn "*vec_set<mode>"
492 [(set (match_operand:VEC_2REG_MODE 0 "register_operand" "= v")
493 (vec_merge:VEC_2REG_MODE
494 (vec_duplicate:VEC_2REG_MODE
495 (match_operand:<SCALAR_MODE> 1 "register_operand" " Sv"))
496 (match_operand:VEC_2REG_MODE 3 "gcn_register_or_unspec_operand"
497 " U0")
498 (ashift (const_int 1)
499 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
500 ""
501 "v_writelane_b32 %L0, %L1, %2\;v_writelane_b32 %H0, %H1, %2"
502 [(set_attr "type" "vmult")
503 (set_attr "length" "16")
504 (set_attr "exec" "none")
505 (set_attr "laneselect" "yes")])
506
507 (define_expand "vec_set<mode>"
508 [(set (match_operand:VEC_ALLREG_MODE 0 "register_operand")
509 (vec_merge:VEC_ALLREG_MODE
510 (vec_duplicate:VEC_ALLREG_MODE
511 (match_operand:<SCALAR_MODE> 1 "register_operand"))
512 (match_dup 0)
513 (ashift (const_int 1) (match_operand:SI 2 "gcn_alu_operand"))))]
514 "")
515
516 (define_insn "*vec_set<mode>_1"
517 [(set (match_operand:VEC_ALL1REG_MODE 0 "register_operand" "=v")
518 (vec_merge:VEC_ALL1REG_MODE
519 (vec_duplicate:VEC_ALL1REG_MODE
520 (match_operand:<SCALAR_MODE> 1 "register_operand" "Sv"))
521 (match_operand:VEC_ALL1REG_MODE 3 "gcn_register_or_unspec_operand"
522 "U0")
523 (match_operand:SI 2 "const_int_operand" " i")))]
524 "((unsigned) exact_log2 (INTVAL (operands[2])) < 64)"
525 {
526 operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));
527 return "v_writelane_b32 %0, %1, %2";
528 }
529 [(set_attr "type" "vop3a")
530 (set_attr "length" "8")
531 (set_attr "exec" "none")
532 (set_attr "laneselect" "yes")])
533
534 (define_insn "*vec_set<mode>_1"
535 [(set (match_operand:VEC_2REG_MODE 0 "register_operand" "=v")
536 (vec_merge:VEC_2REG_MODE
537 (vec_duplicate:VEC_2REG_MODE
538 (match_operand:<SCALAR_MODE> 1 "register_operand" "Sv"))
539 (match_operand:VEC_2REG_MODE 3 "gcn_register_or_unspec_operand"
540 "U0")
541 (match_operand:SI 2 "const_int_operand" " i")))]
542 "((unsigned) exact_log2 (INTVAL (operands[2])) < 64)"
543 {
544 operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));
545 return "v_writelane_b32 %L0, %L1, %2\;v_writelane_b32 %H0, %H1, %2";
546 }
547 [(set_attr "type" "vmult")
548 (set_attr "length" "16")
549 (set_attr "exec" "none")
550 (set_attr "laneselect" "yes")])
551
552 (define_insn "vec_duplicate<mode><exec>"
553 [(set (match_operand:VEC_ALL1REG_MODE 0 "register_operand" "=v")
554 (vec_duplicate:VEC_ALL1REG_MODE
555 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand" "SvB")))]
556 ""
557 "v_mov_b32\t%0, %1"
558 [(set_attr "type" "vop3a")
559 (set_attr "length" "8")])
560
561 (define_insn "vec_duplicate<mode><exec>"
562 [(set (match_operand:VEC_2REG_MODE 0 "register_operand" "= v")
563 (vec_duplicate:VEC_2REG_MODE
564 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand" "SvDB")))]
565 ""
566 "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1"
567 [(set_attr "type" "vop3a")
568 (set_attr "length" "16")])
569
570 (define_insn "vec_extract<mode><scalar_mode>"
571 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg")
572 (vec_select:<SCALAR_MODE>
573 (match_operand:VEC_ALL1REG_MODE 1 "register_operand" " v")
574 (parallel [(match_operand:SI 2 "gcn_alu_operand" "SvB")])))]
575 ""
576 "v_readlane_b32 %0, %1, %2"
577 [(set_attr "type" "vop3a")
578 (set_attr "length" "8")
579 (set_attr "exec" "none")
580 (set_attr "laneselect" "yes")])
581
582 (define_insn "vec_extract<mode><scalar_mode>"
583 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg")
584 (vec_select:<SCALAR_MODE>
585 (match_operand:VEC_2REG_MODE 1 "register_operand" " v")
586 (parallel [(match_operand:SI 2 "gcn_alu_operand" "SvB")])))]
587 ""
588 "v_readlane_b32 %L0, %L1, %2\;v_readlane_b32 %H0, %H1, %2"
589 [(set_attr "type" "vmult")
590 (set_attr "length" "16")
591 (set_attr "exec" "none")
592 (set_attr "laneselect" "yes")])
593
594 (define_expand "extract_last_<mode>"
595 [(match_operand:<SCALAR_MODE> 0 "register_operand")
596 (match_operand:DI 1 "gcn_alu_operand")
597 (match_operand:VEC_ALLREG_MODE 2 "register_operand")]
598 "can_create_pseudo_p ()"
599 {
600 rtx dst = operands[0];
601 rtx mask = operands[1];
602 rtx vect = operands[2];
603 rtx tmpreg = gen_reg_rtx (SImode);
604
605 emit_insn (gen_clzdi2 (tmpreg, mask));
606 emit_insn (gen_subsi3 (tmpreg, GEN_INT (63), tmpreg));
607 emit_insn (gen_vec_extract<mode><scalar_mode> (dst, vect, tmpreg));
608 DONE;
609 })
610
611 (define_expand "fold_extract_last_<mode>"
612 [(match_operand:<SCALAR_MODE> 0 "register_operand")
613 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand")
614 (match_operand:DI 2 "gcn_alu_operand")
615 (match_operand:VEC_ALLREG_MODE 3 "register_operand")]
616 "can_create_pseudo_p ()"
617 {
618 rtx dst = operands[0];
619 rtx default_value = operands[1];
620 rtx mask = operands[2];
621 rtx vect = operands[3];
622 rtx else_label = gen_label_rtx ();
623 rtx end_label = gen_label_rtx ();
624
625 rtx cond = gen_rtx_EQ (VOIDmode, mask, const0_rtx);
626 emit_jump_insn (gen_cbranchdi4 (cond, mask, const0_rtx, else_label));
627 emit_insn (gen_extract_last_<mode> (dst, mask, vect));
628 emit_jump_insn (gen_jump (end_label));
629 emit_barrier ();
630 emit_label (else_label);
631 emit_move_insn (dst, default_value);
632 emit_label (end_label);
633 DONE;
634 })
635
636 (define_expand "vec_init<mode><scalar_mode>"
637 [(match_operand:VEC_ALLREG_MODE 0 "register_operand")
638 (match_operand 1)]
639 ""
640 {
641 gcn_expand_vector_init (operands[0], operands[1]);
642 DONE;
643 })
644
645 ;; }}}
646 ;; {{{ Scatter / Gather
647
648 ;; GCN does not have an instruction for loading a vector from contiguous
649 ;; memory so *all* loads and stores are eventually converted to scatter
650 ;; or gather.
651 ;;
652 ;; GCC does not permit MEM to hold vectors of addresses, so we must use an
653 ;; unspec. The unspec formats are as follows:
654 ;;
655 ;; (unspec:V64??
656 ;; [(<address expression>)
657 ;; (<addr_space_t>)
658 ;; (<use_glc>)
659 ;; (mem:BLK (scratch))]
660 ;; UNSPEC_GATHER)
661 ;;
662 ;; (unspec:BLK
663 ;; [(<address expression>)
664 ;; (<source register>)
665 ;; (<addr_space_t>)
666 ;; (<use_glc>)
667 ;; (<exec>)]
668 ;; UNSPEC_SCATTER)
669 ;;
670 ;; - Loads are expected to be wrapped in a vec_merge, so do not need <exec>.
671 ;; - The mem:BLK does not contain any real information, but indicates that an
672 ;; unknown memory read is taking place. Stores are expected to use a similar
673 ;; mem:BLK outside the unspec.
674 ;; - The address space and glc (volatile) fields are there to replace the
675 ;; fields normally found in a MEM.
676 ;; - Multiple forms of address expression are supported, below.
677
678 (define_expand "gather_load<mode>"
679 [(match_operand:VEC_ALLREG_MODE 0 "register_operand")
680 (match_operand:DI 1 "register_operand")
681 (match_operand 2 "register_operand")
682 (match_operand 3 "immediate_operand")
683 (match_operand:SI 4 "gcn_alu_operand")]
684 ""
685 {
686 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1],
687 operands[2], operands[4],
688 INTVAL (operands[3]), NULL);
689
690 if (GET_MODE (addr) == V64DImode)
691 emit_insn (gen_gather<mode>_insn_1offset (operands[0], addr, const0_rtx,
692 const0_rtx, const0_rtx));
693 else
694 emit_insn (gen_gather<mode>_insn_2offsets (operands[0], operands[1],
695 addr, const0_rtx, const0_rtx,
696 const0_rtx));
697 DONE;
698 })
699
700 (define_expand "gather<mode>_exec"
701 [(match_operand:VEC_ALLREG_MODE 0 "register_operand")
702 (match_operand:DI 1 "register_operand")
703 (match_operand:V64SI 2 "register_operand")
704 (match_operand 3 "immediate_operand")
705 (match_operand:SI 4 "gcn_alu_operand")
706 (match_operand:DI 5 "gcn_exec_reg_operand")]
707 ""
708 {
709 rtx undefmode = gcn_gen_undef (<MODE>mode);
710
711 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1],
712 operands[2], operands[4],
713 INTVAL (operands[3]), operands[5]);
714
715 if (GET_MODE (addr) == V64DImode)
716 emit_insn (gen_gather<mode>_insn_1offset_exec (operands[0], addr,
717 const0_rtx, const0_rtx,
718 const0_rtx, undefmode,
719 operands[5]));
720 else
721 emit_insn (gen_gather<mode>_insn_2offsets_exec (operands[0], operands[1],
722 addr, const0_rtx,
723 const0_rtx, const0_rtx,
724 undefmode, operands[5]));
725 DONE;
726 })
727
728 ; Allow any address expression
729 (define_expand "gather<mode>_expr<exec>"
730 [(set (match_operand:VEC_ALLREG_MODE 0 "register_operand")
731 (unspec:VEC_ALLREG_MODE
732 [(match_operand 1 "")
733 (match_operand 2 "immediate_operand")
734 (match_operand 3 "immediate_operand")
735 (mem:BLK (scratch))]
736 UNSPEC_GATHER))]
737 ""
738 {})
739
740 (define_insn "gather<mode>_insn_1offset<exec>"
741 [(set (match_operand:VEC_ALLREG_MODE 0 "register_operand" "=v")
742 (unspec:VEC_ALLREG_MODE
743 [(plus:V64DI (match_operand:V64DI 1 "register_operand" " v")
744 (vec_duplicate:V64DI
745 (match_operand 2 "immediate_operand" " n")))
746 (match_operand 3 "immediate_operand" " n")
747 (match_operand 4 "immediate_operand" " n")
748 (mem:BLK (scratch))]
749 UNSPEC_GATHER))]
750 "(AS_FLAT_P (INTVAL (operands[3]))
751 && ((TARGET_GCN3 && INTVAL(operands[2]) == 0)
752 || ((unsigned HOST_WIDE_INT)INTVAL(operands[2]) < 0x1000)))
753 || (AS_GLOBAL_P (INTVAL (operands[3]))
754 && (((unsigned HOST_WIDE_INT)INTVAL(operands[2]) + 0x1000) < 0x2000))"
755 {
756 addr_space_t as = INTVAL (operands[3]);
757 const char *glc = INTVAL (operands[4]) ? " glc" : "";
758
759 static char buf[200];
760 if (AS_FLAT_P (as))
761 {
762 if (TARGET_GCN5_PLUS)
763 sprintf (buf, "flat_load%%o0\t%%0, %%1 offset:%%2%s\;s_waitcnt\t0",
764 glc);
765 else
766 sprintf (buf, "flat_load%%o0\t%%0, %%1%s\;s_waitcnt\t0", glc);
767 }
768 else if (AS_GLOBAL_P (as))
769 sprintf (buf, "global_load%%o0\t%%0, %%1, off offset:%%2%s\;"
770 "s_waitcnt\tvmcnt(0)", glc);
771 else
772 gcc_unreachable ();
773
774 return buf;
775 }
776 [(set_attr "type" "flat")
777 (set_attr "length" "12")])
778
779 (define_insn "gather<mode>_insn_1offset_ds<exec>"
780 [(set (match_operand:VEC_ALLREG_MODE 0 "register_operand" "=v")
781 (unspec:VEC_ALLREG_MODE
782 [(plus:V64SI (match_operand:V64SI 1 "register_operand" " v")
783 (vec_duplicate:V64SI
784 (match_operand 2 "immediate_operand" " n")))
785 (match_operand 3 "immediate_operand" " n")
786 (match_operand 4 "immediate_operand" " n")
787 (mem:BLK (scratch))]
788 UNSPEC_GATHER))]
789 "(AS_ANY_DS_P (INTVAL (operands[3]))
790 && ((unsigned HOST_WIDE_INT)INTVAL(operands[2]) < 0x10000))"
791 {
792 addr_space_t as = INTVAL (operands[3]);
793 static char buf[200];
794 sprintf (buf, "ds_read%%b0\t%%0, %%1 offset:%%2%s\;s_waitcnt\tlgkmcnt(0)",
795 (AS_GDS_P (as) ? " gds" : ""));
796 return buf;
797 }
798 [(set_attr "type" "ds")
799 (set_attr "length" "12")])
800
801 (define_insn "gather<mode>_insn_2offsets<exec>"
802 [(set (match_operand:VEC_ALLREG_MODE 0 "register_operand" "=v")
803 (unspec:VEC_ALLREG_MODE
804 [(plus:V64DI
805 (plus:V64DI
806 (vec_duplicate:V64DI
807 (match_operand:DI 1 "register_operand" "Sv"))
808 (sign_extend:V64DI
809 (match_operand:V64SI 2 "register_operand" " v")))
810 (vec_duplicate:V64DI (match_operand 3 "immediate_operand" " n")))
811 (match_operand 4 "immediate_operand" " n")
812 (match_operand 5 "immediate_operand" " n")
813 (mem:BLK (scratch))]
814 UNSPEC_GATHER))]
815 "(AS_GLOBAL_P (INTVAL (operands[4]))
816 && (((unsigned HOST_WIDE_INT)INTVAL(operands[3]) + 0x1000) < 0x2000))"
817 {
818 addr_space_t as = INTVAL (operands[4]);
819 const char *glc = INTVAL (operands[5]) ? " glc" : "";
820
821 static char buf[200];
822 if (AS_GLOBAL_P (as))
823 {
824 /* Work around assembler bug in which a 64-bit register is expected,
825 but a 32-bit value would be correct. */
826 int reg = REGNO (operands[2]) - FIRST_VGPR_REG;
827 sprintf (buf, "global_load%%o0\t%%0, v[%d:%d], %%1 offset:%%3%s\;"
828 "s_waitcnt\tvmcnt(0)", reg, reg + 1, glc);
829 }
830 else
831 gcc_unreachable ();
832
833 return buf;
834 }
835 [(set_attr "type" "flat")
836 (set_attr "length" "12")])
837
838 (define_expand "scatter_store<mode>"
839 [(match_operand:DI 0 "register_operand")
840 (match_operand 1 "register_operand")
841 (match_operand 2 "immediate_operand")
842 (match_operand:SI 3 "gcn_alu_operand")
843 (match_operand:VEC_ALLREG_MODE 4 "register_operand")]
844 ""
845 {
846 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0],
847 operands[1], operands[3],
848 INTVAL (operands[2]), NULL);
849
850 if (GET_MODE (addr) == V64DImode)
851 emit_insn (gen_scatter<mode>_insn_1offset (addr, const0_rtx, operands[4],
852 const0_rtx, const0_rtx));
853 else
854 emit_insn (gen_scatter<mode>_insn_2offsets (operands[0], addr,
855 const0_rtx, operands[4],
856 const0_rtx, const0_rtx));
857 DONE;
858 })
859
860 (define_expand "scatter<mode>_exec"
861 [(match_operand:DI 0 "register_operand")
862 (match_operand 1 "register_operand")
863 (match_operand 2 "immediate_operand")
864 (match_operand:SI 3 "gcn_alu_operand")
865 (match_operand:VEC_ALLREG_MODE 4 "register_operand")
866 (match_operand:DI 5 "gcn_exec_reg_operand")]
867 ""
868 {
869 operands[5] = force_reg (DImode, operands[5]);
870
871 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0],
872 operands[1], operands[3],
873 INTVAL (operands[2]), operands[5]);
874
875 if (GET_MODE (addr) == V64DImode)
876 emit_insn (gen_scatter<mode>_insn_1offset_exec (addr, const0_rtx,
877 operands[4], const0_rtx,
878 const0_rtx,
879 operands[5]));
880 else
881 emit_insn (gen_scatter<mode>_insn_2offsets_exec (operands[0], addr,
882 const0_rtx, operands[4],
883 const0_rtx, const0_rtx,
884 operands[5]));
885 DONE;
886 })
887
888 ; Allow any address expression
889 (define_expand "scatter<mode>_expr<exec_scatter>"
890 [(set (mem:BLK (scratch))
891 (unspec:BLK
892 [(match_operand:V64DI 0 "")
893 (match_operand:VEC_ALLREG_MODE 1 "register_operand")
894 (match_operand 2 "immediate_operand")
895 (match_operand 3 "immediate_operand")]
896 UNSPEC_SCATTER))]
897 ""
898 {})
899
900 (define_insn "scatter<mode>_insn_1offset<exec_scatter>"
901 [(set (mem:BLK (scratch))
902 (unspec:BLK
903 [(plus:V64DI (match_operand:V64DI 0 "register_operand" "v")
904 (vec_duplicate:V64DI
905 (match_operand 1 "immediate_operand" "n")))
906 (match_operand:VEC_ALLREG_MODE 2 "register_operand" "v")
907 (match_operand 3 "immediate_operand" "n")
908 (match_operand 4 "immediate_operand" "n")]
909 UNSPEC_SCATTER))]
910 "(AS_FLAT_P (INTVAL (operands[3]))
911 && (INTVAL(operands[1]) == 0
912 || (TARGET_GCN5_PLUS
913 && (unsigned HOST_WIDE_INT)INTVAL(operands[1]) < 0x1000)))
914 || (AS_GLOBAL_P (INTVAL (operands[3]))
915 && (((unsigned HOST_WIDE_INT)INTVAL(operands[1]) + 0x1000) < 0x2000))"
916 {
917 addr_space_t as = INTVAL (operands[3]);
918 const char *glc = INTVAL (operands[4]) ? " glc" : "";
919
920 static char buf[200];
921 if (AS_FLAT_P (as))
922 {
923 if (TARGET_GCN5_PLUS)
924 sprintf (buf, "flat_store%%s2\t%%0, %%2 offset:%%1%s", glc);
925 else
926 sprintf (buf, "flat_store%%s2\t%%0, %%2%s", glc);
927 }
928 else if (AS_GLOBAL_P (as))
929 sprintf (buf, "global_store%%s2\t%%0, %%2, off offset:%%1%s", glc);
930 else
931 gcc_unreachable ();
932
933 return buf;
934 }
935 [(set_attr "type" "flat")
936 (set_attr "length" "12")])
937
938 (define_insn "scatter<mode>_insn_1offset_ds<exec_scatter>"
939 [(set (mem:BLK (scratch))
940 (unspec:BLK
941 [(plus:V64SI (match_operand:V64SI 0 "register_operand" "v")
942 (vec_duplicate:V64SI
943 (match_operand 1 "immediate_operand" "n")))
944 (match_operand:VEC_ALLREG_MODE 2 "register_operand" "v")
945 (match_operand 3 "immediate_operand" "n")
946 (match_operand 4 "immediate_operand" "n")]
947 UNSPEC_SCATTER))]
948 "(AS_ANY_DS_P (INTVAL (operands[3]))
949 && ((unsigned HOST_WIDE_INT)INTVAL(operands[1]) < 0x10000))"
950 {
951 addr_space_t as = INTVAL (operands[3]);
952 static char buf[200];
953 sprintf (buf, "ds_write%%b2\t%%0, %%2 offset:%%1%s",
954 (AS_GDS_P (as) ? " gds" : ""));
955 return buf;
956 }
957 [(set_attr "type" "ds")
958 (set_attr "length" "12")])
959
960 (define_insn "scatter<mode>_insn_2offsets<exec_scatter>"
961 [(set (mem:BLK (scratch))
962 (unspec:BLK
963 [(plus:V64DI
964 (plus:V64DI
965 (vec_duplicate:V64DI
966 (match_operand:DI 0 "register_operand" "Sv"))
967 (sign_extend:V64DI
968 (match_operand:V64SI 1 "register_operand" " v")))
969 (vec_duplicate:V64DI (match_operand 2 "immediate_operand"
970 " n")))
971 (match_operand:VEC_ALLREG_MODE 3 "register_operand" " v")
972 (match_operand 4 "immediate_operand" " n")
973 (match_operand 5 "immediate_operand" " n")]
974 UNSPEC_SCATTER))]
975 "(AS_GLOBAL_P (INTVAL (operands[4]))
976 && (((unsigned HOST_WIDE_INT)INTVAL(operands[2]) + 0x1000) < 0x2000))"
977 {
978 addr_space_t as = INTVAL (operands[4]);
979 const char *glc = INTVAL (operands[5]) ? " glc" : "";
980
981 static char buf[200];
982 if (AS_GLOBAL_P (as))
983 {
984 /* Work around assembler bug in which a 64-bit register is expected,
985 but a 32-bit value would be correct. */
986 int reg = REGNO (operands[1]) - FIRST_VGPR_REG;
987 sprintf (buf, "global_store%%s3\tv[%d:%d], %%3, %%0 offset:%%2%s",
988 reg, reg + 1, glc);
989 }
990 else
991 gcc_unreachable ();
992
993 return buf;
994 }
995 [(set_attr "type" "flat")
996 (set_attr "length" "12")])
997
998 ;; }}}
999 ;; {{{ Permutations
1000
1001 (define_insn "ds_bpermute<mode>"
1002 [(set (match_operand:VEC_ALL1REG_MODE 0 "register_operand" "=v")
1003 (unspec:VEC_ALL1REG_MODE
1004 [(match_operand:VEC_ALL1REG_MODE 2 "register_operand" " v")
1005 (match_operand:V64SI 1 "register_operand" " v")
1006 (match_operand:DI 3 "gcn_exec_reg_operand" " e")]
1007 UNSPEC_BPERMUTE))]
1008 ""
1009 "ds_bpermute_b32\t%0, %1, %2\;s_waitcnt\tlgkmcnt(0)"
1010 [(set_attr "type" "vop2")
1011 (set_attr "length" "12")])
1012
1013 (define_insn_and_split "ds_bpermute<mode>"
1014 [(set (match_operand:VEC_2REG_MODE 0 "register_operand" "=&v")
1015 (unspec:VEC_2REG_MODE
1016 [(match_operand:VEC_2REG_MODE 2 "register_operand" " v0")
1017 (match_operand:V64SI 1 "register_operand" " v")
1018 (match_operand:DI 3 "gcn_exec_reg_operand" " e")]
1019 UNSPEC_BPERMUTE))]
1020 ""
1021 "#"
1022 "reload_completed"
1023 [(set (match_dup 4) (unspec:V64SI [(match_dup 6) (match_dup 1) (match_dup 3)]
1024 UNSPEC_BPERMUTE))
1025 (set (match_dup 5) (unspec:V64SI [(match_dup 7) (match_dup 1) (match_dup 3)]
1026 UNSPEC_BPERMUTE))]
1027 {
1028 operands[4] = gcn_operand_part (<MODE>mode, operands[0], 0);
1029 operands[5] = gcn_operand_part (<MODE>mode, operands[0], 1);
1030 operands[6] = gcn_operand_part (<MODE>mode, operands[2], 0);
1031 operands[7] = gcn_operand_part (<MODE>mode, operands[2], 1);
1032 }
1033 [(set_attr "type" "vmult")
1034 (set_attr "length" "24")])
1035
1036 ;; }}}
1037 ;; {{{ ALU special case: add/sub
1038
1039 (define_insn "add<mode>3<exec_clobber>"
1040 [(set (match_operand:VEC_ALL1REG_INT_MODE 0 "register_operand" "= v")
1041 (plus:VEC_ALL1REG_INT_MODE
1042 (match_operand:VEC_ALL1REG_INT_MODE 1 "register_operand" "% v")
1043 (match_operand:VEC_ALL1REG_INT_MODE 2 "gcn_alu_operand" "vSvB")))
1044 (clobber (reg:DI VCC_REG))]
1045 ""
1046 "v_add%^_u32\t%0, vcc, %2, %1"
1047 [(set_attr "type" "vop2")
1048 (set_attr "length" "8")])
1049
1050 (define_insn "add<mode>3_dup<exec_clobber>"
1051 [(set (match_operand:VEC_ALL1REG_INT_MODE 0 "register_operand" "= v")
1052 (plus:VEC_ALL1REG_INT_MODE
1053 (vec_duplicate:VEC_ALL1REG_INT_MODE
1054 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand" "SvB"))
1055 (match_operand:VEC_ALL1REG_INT_MODE 1 "register_operand" " v")))
1056 (clobber (reg:DI VCC_REG))]
1057 ""
1058 "v_add%^_u32\t%0, vcc, %2, %1"
1059 [(set_attr "type" "vop2")
1060 (set_attr "length" "8")])
1061
1062 (define_insn "addv64si3_vcc<exec_vcc>"
1063 [(set (match_operand:V64SI 0 "register_operand" "= v, v")
1064 (plus:V64SI
1065 (match_operand:V64SI 1 "register_operand" "% v, v")
1066 (match_operand:V64SI 2 "gcn_alu_operand" "vSvB,vSvB")))
1067 (set (match_operand:DI 3 "register_operand" "= cV, Sg")
1068 (ltu:DI (plus:V64SI (match_dup 1) (match_dup 2))
1069 (match_dup 1)))]
1070 ""
1071 "v_add%^_u32\t%0, %3, %2, %1"
1072 [(set_attr "type" "vop2,vop3b")
1073 (set_attr "length" "8")])
1074
1075 ; This pattern only changes the VCC bits when the corresponding lane is
1076 ; enabled, so the set must be described as an ior.
1077
1078 (define_insn "addv64si3_vcc_dup<exec_vcc>"
1079 [(set (match_operand:V64SI 0 "register_operand" "= v, v")
1080 (plus:V64SI
1081 (vec_duplicate:V64SI
1082 (match_operand:SI 1 "gcn_alu_operand" "SvB,SvB"))
1083 (match_operand:V64SI 2 "register_operand" " v, v")))
1084 (set (match_operand:DI 3 "register_operand" "=cV, Sg")
1085 (ltu:DI (plus:V64SI (vec_duplicate:V64SI (match_dup 2))
1086 (match_dup 1))
1087 (vec_duplicate:V64SI (match_dup 2))))]
1088 ""
1089 "v_add%^_u32\t%0, %3, %2, %1"
1090 [(set_attr "type" "vop2,vop3b")
1091 (set_attr "length" "8,8")])
1092
1093 ; This pattern does not accept SGPR because VCC read already counts as an
1094 ; SGPR use and number of SGPR operands is limited to 1.
1095
1096 (define_insn "addcv64si3<exec_vcc>"
1097 [(set (match_operand:V64SI 0 "register_operand" "=v,v")
1098 (plus:V64SI
1099 (plus:V64SI
1100 (vec_merge:V64SI
1101 (vec_duplicate:V64SI (const_int 1))
1102 (vec_duplicate:V64SI (const_int 0))
1103 (match_operand:DI 3 "register_operand" " cV,Sv"))
1104 (match_operand:V64SI 1 "gcn_alu_operand" "%vA,vA"))
1105 (match_operand:V64SI 2 "gcn_alu_operand" " vB,vB")))
1106 (set (match_operand:DI 4 "register_operand" "=cV,Sg")
1107 (ior:DI (ltu:DI (plus:V64SI
1108 (plus:V64SI
1109 (vec_merge:V64SI
1110 (vec_duplicate:V64SI (const_int 1))
1111 (vec_duplicate:V64SI (const_int 0))
1112 (match_dup 3))
1113 (match_dup 1))
1114 (match_dup 2))
1115 (match_dup 2))
1116 (ltu:DI (plus:V64SI
1117 (vec_merge:V64SI
1118 (vec_duplicate:V64SI (const_int 1))
1119 (vec_duplicate:V64SI (const_int 0))
1120 (match_dup 3))
1121 (match_dup 1))
1122 (match_dup 1))))]
1123 ""
1124 "v_addc%^_u32\t%0, %4, %1, %2, %3"
1125 [(set_attr "type" "vop2,vop3b")
1126 (set_attr "length" "4,8")])
1127
1128 (define_insn "addcv64si3_dup<exec_vcc>"
1129 [(set (match_operand:V64SI 0 "register_operand" "=v,v")
1130 (plus:V64SI
1131 (plus:V64SI
1132 (vec_merge:V64SI
1133 (vec_duplicate:V64SI (const_int 1))
1134 (vec_duplicate:V64SI (const_int 0))
1135 (match_operand:DI 3 "register_operand" " cV, Sv"))
1136 (match_operand:V64SI 1 "gcn_alu_operand" "%vA, vA"))
1137 (vec_duplicate:V64SI
1138 (match_operand:SI 2 "gcn_alu_operand" "SvB,SvB"))))
1139 (set (match_operand:DI 4 "register_operand" "=cV, Sg")
1140 (ior:DI (ltu:DI (plus:V64SI (plus:V64SI
1141 (vec_merge:V64SI
1142 (vec_duplicate:V64SI (const_int 1))
1143 (vec_duplicate:V64SI (const_int 0))
1144 (match_dup 3))
1145 (match_dup 1))
1146 (vec_duplicate:V64SI
1147 (match_dup 2)))
1148 (vec_duplicate:V64SI
1149 (match_dup 2)))
1150 (ltu:DI (plus:V64SI (vec_merge:V64SI
1151 (vec_duplicate:V64SI (const_int 1))
1152 (vec_duplicate:V64SI (const_int 0))
1153 (match_dup 3))
1154 (match_dup 1))
1155 (match_dup 1))))]
1156 ""
1157 "v_addc%^_u32\t%0, %4, %1, %2, %3"
1158 [(set_attr "type" "vop2,vop3b")
1159 (set_attr "length" "4,8")])
1160
1161 (define_insn "sub<mode>3<exec_clobber>"
1162 [(set (match_operand:VEC_ALL1REG_INT_MODE 0 "register_operand" "= v, v")
1163 (minus:VEC_ALL1REG_INT_MODE
1164 (match_operand:VEC_ALL1REG_INT_MODE 1 "gcn_alu_operand" "vSvB, v")
1165 (match_operand:VEC_ALL1REG_INT_MODE 2 "gcn_alu_operand" " v,vSvB")))
1166 (clobber (reg:DI VCC_REG))]
1167 ""
1168 "@
1169 v_sub%^_u32\t%0, vcc, %1, %2
1170 v_subrev%^_u32\t%0, vcc, %2, %1"
1171 [(set_attr "type" "vop2")
1172 (set_attr "length" "8,8")])
1173
1174 (define_insn "subv64si3_vcc<exec_vcc>"
1175 [(set (match_operand:V64SI 0 "register_operand" "= v, v, v, v")
1176 (minus:V64SI
1177 (match_operand:V64SI 1 "gcn_alu_operand" "vSvB,vSvB, v, v")
1178 (match_operand:V64SI 2 "gcn_alu_operand" " v, v,vSvB,vSvB")))
1179 (set (match_operand:DI 3 "register_operand" "= cV, Sg, cV, Sg")
1180 (gtu:DI (minus:V64SI (match_dup 1) (match_dup 2))
1181 (match_dup 1)))]
1182 ""
1183 "@
1184 v_sub%^_u32\t%0, %3, %1, %2
1185 v_sub%^_u32\t%0, %3, %1, %2
1186 v_subrev%^_u32\t%0, %3, %2, %1
1187 v_subrev%^_u32\t%0, %3, %2, %1"
1188 [(set_attr "type" "vop2,vop3b,vop2,vop3b")
1189 (set_attr "length" "8")])
1190
1191 ; This pattern does not accept SGPR because VCC read already counts
1192 ; as a SGPR use and number of SGPR operands is limited to 1.
1193
1194 (define_insn "subcv64si3<exec_vcc>"
1195 [(set (match_operand:V64SI 0 "register_operand" "= v, v, v, v")
1196 (minus:V64SI
1197 (minus:V64SI
1198 (vec_merge:V64SI
1199 (vec_duplicate:V64SI (const_int 1))
1200 (vec_duplicate:V64SI (const_int 0))
1201 (match_operand:DI 3 "gcn_alu_operand" " cV,Sv,cV,Sv"))
1202 (match_operand:V64SI 1 "gcn_alu_operand" " vA,vA,vB,vB"))
1203 (match_operand:V64SI 2 "gcn_alu_operand" " vB,vB,vA,vA")))
1204 (set (match_operand:DI 4 "register_operand" "=cV,Sg,cV,Sg")
1205 (ior:DI (gtu:DI (minus:V64SI (minus:V64SI
1206 (vec_merge:V64SI
1207 (vec_duplicate:V64SI (const_int 1))
1208 (vec_duplicate:V64SI (const_int 0))
1209 (match_dup 3))
1210 (match_dup 1))
1211 (match_dup 2))
1212 (match_dup 2))
1213 (ltu:DI (minus:V64SI (vec_merge:V64SI
1214 (vec_duplicate:V64SI (const_int 1))
1215 (vec_duplicate:V64SI (const_int 0))
1216 (match_dup 3))
1217 (match_dup 1))
1218 (match_dup 1))))]
1219 ""
1220 "@
1221 v_subb%^_u32\t%0, %4, %1, %2, %3
1222 v_subb%^_u32\t%0, %4, %1, %2, %3
1223 v_subbrev%^_u32\t%0, %4, %2, %1, %3
1224 v_subbrev%^_u32\t%0, %4, %2, %1, %3"
1225 [(set_attr "type" "vop2,vop3b,vop2,vop3b")
1226 (set_attr "length" "8")])
1227
1228 (define_insn_and_split "addv64di3"
1229 [(set (match_operand:V64DI 0 "register_operand" "= &v")
1230 (plus:V64DI
1231 (match_operand:V64DI 1 "register_operand" "% v0")
1232 (match_operand:V64DI 2 "gcn_alu_operand" "vSvB0")))
1233 (clobber (reg:DI VCC_REG))]
1234 ""
1235 "#"
1236 "gcn_can_split_p (V64DImode, operands[0])
1237 && gcn_can_split_p (V64DImode, operands[1])
1238 && gcn_can_split_p (V64DImode, operands[2])"
1239 [(const_int 0)]
1240 {
1241 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1242 emit_insn (gen_addv64si3_vcc
1243 (gcn_operand_part (V64DImode, operands[0], 0),
1244 gcn_operand_part (V64DImode, operands[1], 0),
1245 gcn_operand_part (V64DImode, operands[2], 0),
1246 vcc));
1247 emit_insn (gen_addcv64si3
1248 (gcn_operand_part (V64DImode, operands[0], 1),
1249 gcn_operand_part (V64DImode, operands[1], 1),
1250 gcn_operand_part (V64DImode, operands[2], 1),
1251 vcc, vcc));
1252 DONE;
1253 }
1254 [(set_attr "type" "vmult")
1255 (set_attr "length" "8")])
1256
1257 (define_insn_and_split "addv64di3_exec"
1258 [(set (match_operand:V64DI 0 "register_operand" "= &v")
1259 (vec_merge:V64DI
1260 (plus:V64DI
1261 (match_operand:V64DI 1 "register_operand" "% v0")
1262 (match_operand:V64DI 2 "gcn_alu_operand" "vSvB0"))
1263 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
1264 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1265 (clobber (reg:DI VCC_REG))]
1266 ""
1267 "#"
1268 "gcn_can_split_p (V64DImode, operands[0])
1269 && gcn_can_split_p (V64DImode, operands[1])
1270 && gcn_can_split_p (V64DImode, operands[2])
1271 && gcn_can_split_p (V64DImode, operands[4])"
1272 [(const_int 0)]
1273 {
1274 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1275 emit_insn (gen_addv64si3_vcc_exec
1276 (gcn_operand_part (V64DImode, operands[0], 0),
1277 gcn_operand_part (V64DImode, operands[1], 0),
1278 gcn_operand_part (V64DImode, operands[2], 0),
1279 vcc,
1280 gcn_operand_part (V64DImode, operands[3], 0),
1281 operands[4]));
1282 emit_insn (gen_addcv64si3_exec
1283 (gcn_operand_part (V64DImode, operands[0], 1),
1284 gcn_operand_part (V64DImode, operands[1], 1),
1285 gcn_operand_part (V64DImode, operands[2], 1),
1286 vcc, vcc,
1287 gcn_operand_part (V64DImode, operands[3], 1),
1288 operands[4]));
1289 DONE;
1290 }
1291 [(set_attr "type" "vmult")
1292 (set_attr "length" "8")])
1293
1294 (define_insn_and_split "subv64di3"
1295 [(set (match_operand:V64DI 0 "register_operand" "= &v, &v")
1296 (minus:V64DI
1297 (match_operand:V64DI 1 "gcn_alu_operand" "vSvB0, v0")
1298 (match_operand:V64DI 2 "gcn_alu_operand" " v0,vSvB0")))
1299 (clobber (reg:DI VCC_REG))]
1300 ""
1301 "#"
1302 "gcn_can_split_p (V64DImode, operands[0])
1303 && gcn_can_split_p (V64DImode, operands[1])
1304 && gcn_can_split_p (V64DImode, operands[2])"
1305 [(const_int 0)]
1306 {
1307 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1308 emit_insn (gen_subv64si3_vcc
1309 (gcn_operand_part (V64DImode, operands[0], 0),
1310 gcn_operand_part (V64DImode, operands[1], 0),
1311 gcn_operand_part (V64DImode, operands[2], 0),
1312 vcc));
1313 emit_insn (gen_subcv64si3
1314 (gcn_operand_part (V64DImode, operands[0], 1),
1315 gcn_operand_part (V64DImode, operands[1], 1),
1316 gcn_operand_part (V64DImode, operands[2], 1),
1317 vcc, vcc));
1318 DONE;
1319 }
1320 [(set_attr "type" "vmult")
1321 (set_attr "length" "8,8")])
1322
1323 (define_insn_and_split "subv64di3_exec"
1324 [(set (match_operand:V64DI 0 "register_operand" "= &v, &v")
1325 (vec_merge:V64DI
1326 (minus:V64DI
1327 (match_operand:V64DI 1 "gcn_alu_operand" "vSvB0, v0")
1328 (match_operand:V64DI 2 "gcn_alu_operand" " v0,vSvB0"))
1329 (match_operand:V64DI 3 "gcn_register_or_unspec_operand"
1330 " U0, U0")
1331 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))
1332 (clobber (reg:DI VCC_REG))]
1333 "register_operand (operands[1], VOIDmode)
1334 || register_operand (operands[2], VOIDmode)"
1335 "#"
1336 "gcn_can_split_p (V64DImode, operands[0])
1337 && gcn_can_split_p (V64DImode, operands[1])
1338 && gcn_can_split_p (V64DImode, operands[2])
1339 && gcn_can_split_p (V64DImode, operands[3])"
1340 [(const_int 0)]
1341 {
1342 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1343 emit_insn (gen_subv64si3_vcc_exec
1344 (gcn_operand_part (V64DImode, operands[0], 0),
1345 gcn_operand_part (V64DImode, operands[1], 0),
1346 gcn_operand_part (V64DImode, operands[2], 0),
1347 vcc,
1348 gcn_operand_part (V64DImode, operands[3], 0),
1349 operands[4]));
1350 emit_insn (gen_subcv64si3_exec
1351 (gcn_operand_part (V64DImode, operands[0], 1),
1352 gcn_operand_part (V64DImode, operands[1], 1),
1353 gcn_operand_part (V64DImode, operands[2], 1),
1354 vcc, vcc,
1355 gcn_operand_part (V64DImode, operands[3], 1),
1356 operands[4]));
1357 DONE;
1358 }
1359 [(set_attr "type" "vmult")
1360 (set_attr "length" "8,8")])
1361
1362 (define_insn_and_split "addv64di3_dup"
1363 [(set (match_operand:V64DI 0 "register_operand" "= &v")
1364 (plus:V64DI
1365 (match_operand:V64DI 1 "register_operand" " v0")
1366 (vec_duplicate:V64DI
1367 (match_operand:DI 2 "gcn_alu_operand" "SvDB"))))
1368 (clobber (reg:DI VCC_REG))]
1369 ""
1370 "#"
1371 "gcn_can_split_p (V64DImode, operands[0])
1372 && gcn_can_split_p (V64DImode, operands[1])
1373 && gcn_can_split_p (V64DImode, operands[2])"
1374 [(const_int 0)]
1375 {
1376 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1377 emit_insn (gen_addv64si3_vcc_dup
1378 (gcn_operand_part (V64DImode, operands[0], 0),
1379 gcn_operand_part (DImode, operands[2], 0),
1380 gcn_operand_part (V64DImode, operands[1], 0),
1381 vcc));
1382 emit_insn (gen_addcv64si3_dup
1383 (gcn_operand_part (V64DImode, operands[0], 1),
1384 gcn_operand_part (V64DImode, operands[1], 1),
1385 gcn_operand_part (DImode, operands[2], 1),
1386 vcc, vcc));
1387 DONE;
1388 }
1389 [(set_attr "type" "vmult")
1390 (set_attr "length" "8")])
1391
1392 (define_insn_and_split "addv64di3_dup_exec"
1393 [(set (match_operand:V64DI 0 "register_operand" "= &v")
1394 (vec_merge:V64DI
1395 (plus:V64DI
1396 (match_operand:V64DI 1 "register_operand" " v0")
1397 (vec_duplicate:V64DI
1398 (match_operand:DI 2 "gcn_alu_operand" "SvDB")))
1399 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
1400 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1401 (clobber (reg:DI VCC_REG))]
1402 ""
1403 "#"
1404 "gcn_can_split_p (V64DImode, operands[0])
1405 && gcn_can_split_p (V64DImode, operands[1])
1406 && gcn_can_split_p (V64DImode, operands[2])
1407 && gcn_can_split_p (V64DImode, operands[3])"
1408 [(const_int 0)]
1409 {
1410 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1411 emit_insn (gen_addv64si3_vcc_dup_exec
1412 (gcn_operand_part (V64DImode, operands[0], 0),
1413 gcn_operand_part (DImode, operands[2], 0),
1414 gcn_operand_part (V64DImode, operands[1], 0),
1415 vcc,
1416 gcn_operand_part (V64DImode, operands[3], 0),
1417 operands[4]));
1418 emit_insn (gen_addcv64si3_dup_exec
1419 (gcn_operand_part (V64DImode, operands[0], 1),
1420 gcn_operand_part (V64DImode, operands[1], 1),
1421 gcn_operand_part (DImode, operands[2], 1),
1422 vcc, vcc,
1423 gcn_operand_part (V64DImode, operands[3], 1),
1424 operands[4]));
1425 DONE;
1426 }
1427 [(set_attr "type" "vmult")
1428 (set_attr "length" "8")])
1429
1430 (define_insn_and_split "addv64di3_zext"
1431 [(set (match_operand:V64DI 0 "register_operand" "=&v,&v")
1432 (plus:V64DI
1433 (zero_extend:V64DI
1434 (match_operand:V64SI 1 "gcn_alu_operand" "0vA,0vB"))
1435 (match_operand:V64DI 2 "gcn_alu_operand" "0vB,0vA")))
1436 (clobber (reg:DI VCC_REG))]
1437 ""
1438 "#"
1439 "gcn_can_split_p (V64DImode, operands[0])
1440 && gcn_can_split_p (V64DImode, operands[2])"
1441 [(const_int 0)]
1442 {
1443 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1444 emit_insn (gen_addv64si3_vcc
1445 (gcn_operand_part (V64DImode, operands[0], 0),
1446 operands[1],
1447 gcn_operand_part (V64DImode, operands[2], 0),
1448 vcc));
1449 emit_insn (gen_addcv64si3
1450 (gcn_operand_part (V64DImode, operands[0], 1),
1451 gcn_operand_part (V64DImode, operands[2], 1),
1452 const0_rtx, vcc, vcc));
1453 DONE;
1454 }
1455 [(set_attr "type" "vmult")
1456 (set_attr "length" "8,8")])
1457
1458 (define_insn_and_split "addv64di3_zext_exec"
1459 [(set (match_operand:V64DI 0 "register_operand" "=&v,&v")
1460 (vec_merge:V64DI
1461 (plus:V64DI
1462 (zero_extend:V64DI
1463 (match_operand:V64SI 1 "gcn_alu_operand" "0vA,0vB"))
1464 (match_operand:V64DI 2 "gcn_alu_operand" "0vB,0vA"))
1465 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0, U0")
1466 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))
1467 (clobber (reg:DI VCC_REG))]
1468 ""
1469 "#"
1470 "gcn_can_split_p (V64DImode, operands[0])
1471 && gcn_can_split_p (V64DImode, operands[2])
1472 && gcn_can_split_p (V64DImode, operands[3])"
1473 [(const_int 0)]
1474 {
1475 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1476 emit_insn (gen_addv64si3_vcc_exec
1477 (gcn_operand_part (V64DImode, operands[0], 0),
1478 operands[1],
1479 gcn_operand_part (V64DImode, operands[2], 0),
1480 vcc,
1481 gcn_operand_part (V64DImode, operands[3], 0),
1482 operands[4]));
1483 emit_insn (gen_addcv64si3_exec
1484 (gcn_operand_part (V64DImode, operands[0], 1),
1485 gcn_operand_part (V64DImode, operands[2], 1),
1486 const0_rtx, vcc, vcc,
1487 gcn_operand_part (V64DImode, operands[3], 1),
1488 operands[4]));
1489 DONE;
1490 }
1491 [(set_attr "type" "vmult")
1492 (set_attr "length" "8,8")])
1493
1494 (define_insn_and_split "addv64di3_zext_dup"
1495 [(set (match_operand:V64DI 0 "register_operand" "=&v")
1496 (plus:V64DI
1497 (zero_extend:V64DI
1498 (vec_duplicate:V64SI
1499 (match_operand:SI 1 "gcn_alu_operand" "BSv")))
1500 (match_operand:V64DI 2 "gcn_alu_operand" "vA0")))
1501 (clobber (reg:DI VCC_REG))]
1502 ""
1503 "#"
1504 "gcn_can_split_p (V64DImode, operands[0])
1505 && gcn_can_split_p (V64DImode, operands[2])"
1506 [(const_int 0)]
1507 {
1508 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1509 emit_insn (gen_addv64si3_vcc_dup
1510 (gcn_operand_part (V64DImode, operands[0], 0),
1511 gcn_operand_part (DImode, operands[1], 0),
1512 gcn_operand_part (V64DImode, operands[2], 0),
1513 vcc));
1514 emit_insn (gen_addcv64si3
1515 (gcn_operand_part (V64DImode, operands[0], 1),
1516 gcn_operand_part (V64DImode, operands[2], 1),
1517 const0_rtx, vcc, vcc));
1518 DONE;
1519 }
1520 [(set_attr "type" "vmult")
1521 (set_attr "length" "8")])
1522
1523 (define_insn_and_split "addv64di3_zext_dup_exec"
1524 [(set (match_operand:V64DI 0 "register_operand" "=&v")
1525 (vec_merge:V64DI
1526 (plus:V64DI
1527 (zero_extend:V64DI
1528 (vec_duplicate:V64SI
1529 (match_operand:SI 1 "gcn_alu_operand" "BSv")))
1530 (match_operand:V64DI 2 "gcn_alu_operand" "vA0"))
1531 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
1532 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1533 (clobber (reg:DI VCC_REG))]
1534 ""
1535 "#"
1536 "gcn_can_split_p (V64DImode, operands[0])
1537 && gcn_can_split_p (V64DImode, operands[2])
1538 && gcn_can_split_p (V64DImode, operands[3])"
1539 [(const_int 0)]
1540 {
1541 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1542 emit_insn (gen_addv64si3_vcc_dup_exec
1543 (gcn_operand_part (V64DImode, operands[0], 0),
1544 gcn_operand_part (DImode, operands[1], 0),
1545 gcn_operand_part (V64DImode, operands[2], 0),
1546 vcc,
1547 gcn_operand_part (V64DImode, operands[3], 0),
1548 operands[4]));
1549 emit_insn (gen_addcv64si3_exec
1550 (gcn_operand_part (V64DImode, operands[0], 1),
1551 gcn_operand_part (V64DImode, operands[2], 1),
1552 const0_rtx, vcc, vcc,
1553 gcn_operand_part (V64DImode, operands[3], 1),
1554 operands[4]));
1555 DONE;
1556 }
1557 [(set_attr "type" "vmult")
1558 (set_attr "length" "8")])
1559
1560 (define_insn_and_split "addv64di3_zext_dup2"
1561 [(set (match_operand:V64DI 0 "register_operand" "= v")
1562 (plus:V64DI
1563 (zero_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand" " vA"))
1564 (vec_duplicate:V64DI (match_operand:DI 2 "gcn_alu_operand" "BSv"))))
1565 (clobber (reg:DI VCC_REG))]
1566 ""
1567 "#"
1568 "gcn_can_split_p (V64DImode, operands[0])"
1569 [(const_int 0)]
1570 {
1571 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1572 emit_insn (gen_addv64si3_vcc_dup
1573 (gcn_operand_part (V64DImode, operands[0], 0),
1574 gcn_operand_part (DImode, operands[2], 0),
1575 operands[1],
1576 vcc));
1577 rtx dsthi = gcn_operand_part (V64DImode, operands[0], 1);
1578 emit_insn (gen_vec_duplicatev64si
1579 (dsthi, gcn_operand_part (DImode, operands[2], 1)));
1580 emit_insn (gen_addcv64si3 (dsthi, dsthi, const0_rtx, vcc, vcc));
1581 DONE;
1582 }
1583 [(set_attr "type" "vmult")
1584 (set_attr "length" "8")])
1585
1586 (define_insn_and_split "addv64di3_zext_dup2_exec"
1587 [(set (match_operand:V64DI 0 "register_operand" "= v")
1588 (vec_merge:V64DI
1589 (plus:V64DI
1590 (zero_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand"
1591 " vA"))
1592 (vec_duplicate:V64DI (match_operand:DI 2 "gcn_alu_operand" "BSv")))
1593 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
1594 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1595 (clobber (reg:DI VCC_REG))]
1596 ""
1597 "#"
1598 "gcn_can_split_p (V64DImode, operands[0])
1599 && gcn_can_split_p (V64DImode, operands[3])"
1600 [(const_int 0)]
1601 {
1602 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1603 emit_insn (gen_addv64si3_vcc_dup_exec
1604 (gcn_operand_part (V64DImode, operands[0], 0),
1605 gcn_operand_part (DImode, operands[2], 0),
1606 operands[1],
1607 vcc,
1608 gcn_operand_part (V64DImode, operands[3], 0),
1609 operands[4]));
1610 rtx dsthi = gcn_operand_part (V64DImode, operands[0], 1);
1611 emit_insn (gen_vec_duplicatev64si_exec
1612 (dsthi, gcn_operand_part (DImode, operands[2], 1),
1613 gcn_gen_undef (V64SImode), operands[4]));
1614 emit_insn (gen_addcv64si3_exec
1615 (dsthi, dsthi, const0_rtx, vcc, vcc,
1616 gcn_operand_part (V64DImode, operands[3], 1),
1617 operands[4]));
1618 DONE;
1619 }
1620 [(set_attr "type" "vmult")
1621 (set_attr "length" "8")])
1622
1623 (define_insn_and_split "addv64di3_sext_dup2"
1624 [(set (match_operand:V64DI 0 "register_operand" "= v")
1625 (plus:V64DI
1626 (sign_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand" " vA"))
1627 (vec_duplicate:V64DI (match_operand:DI 2 "gcn_alu_operand" "BSv"))))
1628 (clobber (match_scratch:V64SI 3 "=&v"))
1629 (clobber (reg:DI VCC_REG))]
1630 ""
1631 "#"
1632 "gcn_can_split_p (V64DImode, operands[0])"
1633 [(const_int 0)]
1634 {
1635 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1636 emit_insn (gen_ashrv64si3 (operands[3], operands[1], GEN_INT (31)));
1637 emit_insn (gen_addv64si3_vcc_dup
1638 (gcn_operand_part (V64DImode, operands[0], 0),
1639 gcn_operand_part (DImode, operands[2], 0),
1640 operands[1],
1641 vcc));
1642 rtx dsthi = gcn_operand_part (V64DImode, operands[0], 1);
1643 emit_insn (gen_vec_duplicatev64si
1644 (dsthi, gcn_operand_part (DImode, operands[2], 1)));
1645 emit_insn (gen_addcv64si3 (dsthi, dsthi, operands[3], vcc, vcc));
1646 DONE;
1647 }
1648 [(set_attr "type" "vmult")
1649 (set_attr "length" "8")])
1650
1651 (define_insn_and_split "addv64di3_sext_dup2_exec"
1652 [(set (match_operand:V64DI 0 "register_operand" "= v")
1653 (vec_merge:V64DI
1654 (plus:V64DI
1655 (sign_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand"
1656 " vA"))
1657 (vec_duplicate:V64DI (match_operand:DI 2 "gcn_alu_operand" "BSv")))
1658 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
1659 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1660 (clobber (match_scratch:V64SI 5 "=&v"))
1661 (clobber (reg:DI VCC_REG))]
1662 ""
1663 "#"
1664 "gcn_can_split_p (V64DImode, operands[0])
1665 && gcn_can_split_p (V64DImode, operands[3])"
1666 [(const_int 0)]
1667 {
1668 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1669 emit_insn (gen_ashrv64si3_exec (operands[5], operands[1], GEN_INT (31),
1670 gcn_gen_undef (V64SImode), operands[4]));
1671 emit_insn (gen_addv64si3_vcc_dup_exec
1672 (gcn_operand_part (V64DImode, operands[0], 0),
1673 gcn_operand_part (DImode, operands[2], 0),
1674 operands[1],
1675 vcc,
1676 gcn_operand_part (V64DImode, operands[3], 0),
1677 operands[4]));
1678 rtx dsthi = gcn_operand_part (V64DImode, operands[0], 1);
1679 emit_insn (gen_vec_duplicatev64si_exec
1680 (dsthi, gcn_operand_part (DImode, operands[2], 1),
1681 gcn_gen_undef (V64SImode), operands[4]));
1682 emit_insn (gen_addcv64si3_exec
1683 (dsthi, dsthi, operands[5], vcc, vcc,
1684 gcn_operand_part (V64DImode, operands[3], 1),
1685 operands[4]));
1686 DONE;
1687 }
1688 [(set_attr "type" "vmult")
1689 (set_attr "length" "8")])
1690
1691 ;; }}}
1692 ;; {{{ DS memory ALU: add/sub
1693
1694 (define_mode_iterator DS_ARITH_MODE [V64SI V64SF V64DI])
1695 (define_mode_iterator DS_ARITH_SCALAR_MODE [SI SF DI])
1696
1697 ;; FIXME: the vector patterns probably need RD expanded to a vector of
1698 ;; addresses. For now, the only way a vector can get into LDS is
1699 ;; if the user puts it there manually.
1700 ;;
1701 ;; FIXME: the scalar patterns are probably fine in themselves, but need to be
1702 ;; checked to see if anything can ever use them.
1703
1704 (define_insn "add<mode>3_ds<exec>"
1705 [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD")
1706 (plus:DS_ARITH_MODE
1707 (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" "%RD")
1708 (match_operand:DS_ARITH_MODE 2 "register_operand" " v")))]
1709 "rtx_equal_p (operands[0], operands[1])"
1710 "ds_add%u0\t%A0, %2%O0"
1711 [(set_attr "type" "ds")
1712 (set_attr "length" "8")])
1713
1714 (define_insn "add<mode>3_ds_scalar"
1715 [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
1716 (plus:DS_ARITH_SCALAR_MODE
1717 (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand"
1718 "%RD")
1719 (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")))]
1720 "rtx_equal_p (operands[0], operands[1])"
1721 "ds_add%u0\t%A0, %2%O0"
1722 [(set_attr "type" "ds")
1723 (set_attr "length" "8")])
1724
1725 (define_insn "sub<mode>3_ds<exec>"
1726 [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD")
1727 (minus:DS_ARITH_MODE
1728 (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" " RD")
1729 (match_operand:DS_ARITH_MODE 2 "register_operand" " v")))]
1730 "rtx_equal_p (operands[0], operands[1])"
1731 "ds_sub%u0\t%A0, %2%O0"
1732 [(set_attr "type" "ds")
1733 (set_attr "length" "8")])
1734
1735 (define_insn "sub<mode>3_ds_scalar"
1736 [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
1737 (minus:DS_ARITH_SCALAR_MODE
1738 (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand"
1739 " RD")
1740 (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")))]
1741 "rtx_equal_p (operands[0], operands[1])"
1742 "ds_sub%u0\t%A0, %2%O0"
1743 [(set_attr "type" "ds")
1744 (set_attr "length" "8")])
1745
1746 (define_insn "subr<mode>3_ds<exec>"
1747 [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD")
1748 (minus:DS_ARITH_MODE
1749 (match_operand:DS_ARITH_MODE 2 "register_operand" " v")
1750 (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" " RD")))]
1751 "rtx_equal_p (operands[0], operands[1])"
1752 "ds_rsub%u0\t%A0, %2%O0"
1753 [(set_attr "type" "ds")
1754 (set_attr "length" "8")])
1755
1756 (define_insn "subr<mode>3_ds_scalar"
1757 [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
1758 (minus:DS_ARITH_SCALAR_MODE
1759 (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")
1760 (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand"
1761 " RD")))]
1762 "rtx_equal_p (operands[0], operands[1])"
1763 "ds_rsub%u0\t%A0, %2%O0"
1764 [(set_attr "type" "ds")
1765 (set_attr "length" "8")])
1766
1767 ;; }}}
1768 ;; {{{ ALU special case: mult
1769
1770 (define_insn "<su>mulv64si3_highpart<exec>"
1771 [(set (match_operand:V64SI 0 "register_operand" "= v")
1772 (truncate:V64SI
1773 (lshiftrt:V64DI
1774 (mult:V64DI
1775 (any_extend:V64DI
1776 (match_operand:V64SI 1 "gcn_alu_operand" " %v"))
1777 (any_extend:V64DI
1778 (match_operand:V64SI 2 "gcn_alu_operand" "vSvA")))
1779 (const_int 32))))]
1780 ""
1781 "v_mul_hi<sgnsuffix>0\t%0, %2, %1"
1782 [(set_attr "type" "vop3a")
1783 (set_attr "length" "8")])
1784
1785 (define_insn "mul<mode>3<exec>"
1786 [(set (match_operand:VEC_ALL1REG_INT_MODE 0 "register_operand" "= v")
1787 (mult:VEC_ALL1REG_INT_MODE
1788 (match_operand:VEC_ALL1REG_INT_MODE 1 "gcn_alu_operand" "%vSvA")
1789 (match_operand:VEC_ALL1REG_INT_MODE 2 "gcn_alu_operand" " vSvA")))]
1790 ""
1791 "v_mul_lo_u32\t%0, %1, %2"
1792 [(set_attr "type" "vop3a")
1793 (set_attr "length" "8")])
1794
1795 (define_insn "mul<mode>3_dup<exec>"
1796 [(set (match_operand:VEC_ALL1REG_INT_MODE 0 "register_operand" "= v")
1797 (mult:VEC_ALL1REG_INT_MODE
1798 (match_operand:VEC_ALL1REG_INT_MODE 1 "gcn_alu_operand" "%vSvA")
1799 (vec_duplicate:VEC_ALL1REG_INT_MODE
1800 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand" " SvA"))))]
1801 ""
1802 "v_mul_lo_u32\t%0, %1, %2"
1803 [(set_attr "type" "vop3a")
1804 (set_attr "length" "8")])
1805
1806 (define_insn_and_split "mulv64di3"
1807 [(set (match_operand:V64DI 0 "register_operand" "=&v")
1808 (mult:V64DI
1809 (match_operand:V64DI 1 "gcn_alu_operand" "% v")
1810 (match_operand:V64DI 2 "gcn_alu_operand" "vDA")))
1811 (clobber (match_scratch:V64SI 3 "=&v"))]
1812 ""
1813 "#"
1814 "reload_completed"
1815 [(const_int 0)]
1816 {
1817 rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
1818 rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
1819 rtx left_lo = gcn_operand_part (V64DImode, operands[1], 0);
1820 rtx left_hi = gcn_operand_part (V64DImode, operands[1], 1);
1821 rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
1822 rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
1823 rtx tmp = operands[3];
1824
1825 emit_insn (gen_mulv64si3 (out_lo, left_lo, right_lo));
1826 emit_insn (gen_umulv64si3_highpart (out_hi, left_lo, right_lo));
1827 emit_insn (gen_mulv64si3 (tmp, left_hi, right_lo));
1828 emit_insn (gen_addv64si3 (out_hi, out_hi, tmp));
1829 emit_insn (gen_mulv64si3 (tmp, left_lo, right_hi));
1830 emit_insn (gen_addv64si3 (out_hi, out_hi, tmp));
1831 emit_insn (gen_mulv64si3 (tmp, left_hi, right_hi));
1832 emit_insn (gen_addv64si3 (out_hi, out_hi, tmp));
1833 DONE;
1834 })
1835
1836 (define_insn_and_split "mulv64di3_exec"
1837 [(set (match_operand:V64DI 0 "register_operand" "=&v")
1838 (vec_merge:V64DI
1839 (mult:V64DI
1840 (match_operand:V64DI 1 "gcn_alu_operand" "% v")
1841 (match_operand:V64DI 2 "gcn_alu_operand" "vDA"))
1842 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
1843 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1844 (clobber (match_scratch:V64SI 5 "=&v"))]
1845 ""
1846 "#"
1847 "reload_completed"
1848 [(const_int 0)]
1849 {
1850 rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
1851 rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
1852 rtx left_lo = gcn_operand_part (V64DImode, operands[1], 0);
1853 rtx left_hi = gcn_operand_part (V64DImode, operands[1], 1);
1854 rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
1855 rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
1856 rtx exec = operands[4];
1857 rtx tmp = operands[5];
1858
1859 rtx old_lo, old_hi;
1860 if (GET_CODE (operands[3]) == UNSPEC)
1861 {
1862 old_lo = old_hi = gcn_gen_undef (V64SImode);
1863 }
1864 else
1865 {
1866 old_lo = gcn_operand_part (V64DImode, operands[3], 0);
1867 old_hi = gcn_operand_part (V64DImode, operands[3], 1);
1868 }
1869
1870 rtx undef = gcn_gen_undef (V64SImode);
1871
1872 emit_insn (gen_mulv64si3_exec (out_lo, left_lo, right_lo, old_lo, exec));
1873 emit_insn (gen_umulv64si3_highpart_exec (out_hi, left_lo, right_lo,
1874 old_hi, exec));
1875 emit_insn (gen_mulv64si3_exec (tmp, left_hi, right_lo, undef, exec));
1876 emit_insn (gen_addv64si3_exec (out_hi, out_hi, tmp, out_hi, exec));
1877 emit_insn (gen_mulv64si3_exec (tmp, left_lo, right_hi, undef, exec));
1878 emit_insn (gen_addv64si3_exec (out_hi, out_hi, tmp, out_hi, exec));
1879 emit_insn (gen_mulv64si3_exec (tmp, left_hi, right_hi, undef, exec));
1880 emit_insn (gen_addv64si3_exec (out_hi, out_hi, tmp, out_hi, exec));
1881 DONE;
1882 })
1883
1884 (define_insn_and_split "mulv64di3_zext"
1885 [(set (match_operand:V64DI 0 "register_operand" "=&v")
1886 (mult:V64DI
1887 (zero_extend:V64DI
1888 (match_operand:V64SI 1 "gcn_alu_operand" " v"))
1889 (match_operand:V64DI 2 "gcn_alu_operand" "vDA")))
1890 (clobber (match_scratch:V64SI 3 "=&v"))]
1891 ""
1892 "#"
1893 "reload_completed"
1894 [(const_int 0)]
1895 {
1896 rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
1897 rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
1898 rtx left = operands[1];
1899 rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
1900 rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
1901 rtx tmp = operands[3];
1902
1903 emit_insn (gen_mulv64si3 (out_lo, left, right_lo));
1904 emit_insn (gen_umulv64si3_highpart (out_hi, left, right_lo));
1905 emit_insn (gen_mulv64si3 (tmp, left, right_hi));
1906 emit_insn (gen_addv64si3 (out_hi, out_hi, tmp));
1907 DONE;
1908 })
1909
1910 (define_insn_and_split "mulv64di3_zext_exec"
1911 [(set (match_operand:V64DI 0 "register_operand" "=&v")
1912 (vec_merge:V64DI
1913 (mult:V64DI
1914 (zero_extend:V64DI
1915 (match_operand:V64SI 1 "gcn_alu_operand" " v"))
1916 (match_operand:V64DI 2 "gcn_alu_operand" "vDA"))
1917 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
1918 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1919 (clobber (match_scratch:V64SI 5 "=&v"))]
1920 ""
1921 "#"
1922 "reload_completed"
1923 [(const_int 0)]
1924 {
1925 rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
1926 rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
1927 rtx left = operands[1];
1928 rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
1929 rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
1930 rtx exec = operands[4];
1931 rtx tmp = operands[5];
1932
1933 rtx old_lo, old_hi;
1934 if (GET_CODE (operands[3]) == UNSPEC)
1935 {
1936 old_lo = old_hi = gcn_gen_undef (V64SImode);
1937 }
1938 else
1939 {
1940 old_lo = gcn_operand_part (V64DImode, operands[3], 0);
1941 old_hi = gcn_operand_part (V64DImode, operands[3], 1);
1942 }
1943
1944 rtx undef = gcn_gen_undef (V64SImode);
1945
1946 emit_insn (gen_mulv64si3_exec (out_lo, left, right_lo, old_lo, exec));
1947 emit_insn (gen_umulv64si3_highpart_exec (out_hi, left, right_lo,
1948 old_hi, exec));
1949 emit_insn (gen_mulv64si3_exec (tmp, left, right_hi, undef, exec));
1950 emit_insn (gen_addv64si3_exec (out_hi, out_hi, tmp, out_hi, exec));
1951 DONE;
1952 })
1953
1954 (define_insn_and_split "mulv64di3_zext_dup2"
1955 [(set (match_operand:V64DI 0 "register_operand" "= &v")
1956 (mult:V64DI
1957 (zero_extend:V64DI
1958 (match_operand:V64SI 1 "gcn_alu_operand" " v"))
1959 (vec_duplicate:V64DI
1960 (match_operand:DI 2 "gcn_alu_operand" "SvDA"))))
1961 (clobber (match_scratch:V64SI 3 "= &v"))]
1962 ""
1963 "#"
1964 "reload_completed"
1965 [(const_int 0)]
1966 {
1967 rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
1968 rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
1969 rtx left = operands[1];
1970 rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
1971 rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
1972 rtx tmp = operands[3];
1973
1974 emit_insn (gen_mulv64si3 (out_lo, left, right_lo));
1975 emit_insn (gen_umulv64si3_highpart (out_hi, left, right_lo));
1976 emit_insn (gen_mulv64si3 (tmp, left, right_hi));
1977 emit_insn (gen_addv64si3 (out_hi, out_hi, tmp));
1978 DONE;
1979 })
1980
1981 (define_insn_and_split "mulv64di3_zext_dup2_exec"
1982 [(set (match_operand:V64DI 0 "register_operand" "= &v")
1983 (vec_merge:V64DI
1984 (mult:V64DI
1985 (zero_extend:V64DI
1986 (match_operand:V64SI 1 "gcn_alu_operand" " v"))
1987 (vec_duplicate:V64DI
1988 (match_operand:DI 2 "gcn_alu_operand" "SvDA")))
1989 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
1990 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1991 (clobber (match_scratch:V64SI 5 "= &v"))]
1992 ""
1993 "#"
1994 "reload_completed"
1995 [(const_int 0)]
1996 {
1997 rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
1998 rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
1999 rtx left = operands[1];
2000 rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
2001 rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
2002 rtx exec = operands[4];
2003 rtx tmp = operands[5];
2004
2005 rtx old_lo, old_hi;
2006 if (GET_CODE (operands[3]) == UNSPEC)
2007 {
2008 old_lo = old_hi = gcn_gen_undef (V64SImode);
2009 }
2010 else
2011 {
2012 old_lo = gcn_operand_part (V64DImode, operands[3], 0);
2013 old_hi = gcn_operand_part (V64DImode, operands[3], 1);
2014 }
2015
2016 rtx undef = gcn_gen_undef (V64SImode);
2017
2018 emit_insn (gen_mulv64si3_exec (out_lo, left, right_lo, old_lo, exec));
2019 emit_insn (gen_umulv64si3_highpart_exec (out_hi, left, right_lo,
2020 old_hi, exec));
2021 emit_insn (gen_mulv64si3_exec (tmp, left, right_hi, undef, exec));
2022 emit_insn (gen_addv64si3_exec (out_hi, out_hi, tmp, out_hi, exec));
2023 DONE;
2024 })
2025
2026 ;; }}}
2027 ;; {{{ ALU generic case
2028
2029 (define_mode_iterator VEC_INT_MODE [V64SI V64DI])
2030
2031 (define_code_iterator bitop [and ior xor])
2032 (define_code_iterator shiftop [ashift lshiftrt ashiftrt])
2033 (define_code_iterator minmaxop [smin smax umin umax])
2034
2035 (define_insn "<expander><mode>2<exec>"
2036 [(set (match_operand:VEC_1REG_INT_MODE 0 "gcn_valu_dst_operand" "= v")
2037 (bitunop:VEC_1REG_INT_MODE
2038 (match_operand:VEC_1REG_INT_MODE 1 "gcn_valu_src0_operand" "vSvB")))]
2039 ""
2040 "v_<mnemonic>0\t%0, %1"
2041 [(set_attr "type" "vop1")
2042 (set_attr "length" "8")])
2043
2044 (define_insn "<expander><mode>3<exec>"
2045 [(set (match_operand:VEC_1REG_INT_MODE 0 "gcn_valu_dst_operand" "= v,RD")
2046 (bitop:VEC_1REG_INT_MODE
2047 (match_operand:VEC_1REG_INT_MODE 1 "gcn_valu_src0_operand"
2048 "% v, 0")
2049 (match_operand:VEC_1REG_INT_MODE 2 "gcn_valu_src1com_operand"
2050 "vSvB, v")))]
2051 ""
2052 "@
2053 v_<mnemonic>0\t%0, %2, %1
2054 ds_<mnemonic>0\t%A0, %2%O0"
2055 [(set_attr "type" "vop2,ds")
2056 (set_attr "length" "8,8")])
2057
2058 (define_insn_and_split "<expander>v64di3"
2059 [(set (match_operand:V64DI 0 "gcn_valu_dst_operand" "=&v,RD")
2060 (bitop:V64DI
2061 (match_operand:V64DI 1 "gcn_valu_src0_operand" "% v,RD")
2062 (match_operand:V64DI 2 "gcn_valu_src1com_operand" "vSvB, v")))]
2063 ""
2064 "@
2065 #
2066 ds_<mnemonic>0\t%A0, %2%O0"
2067 "(reload_completed && !gcn_ds_memory_operand (operands[0], V64DImode))"
2068 [(set (match_dup 3)
2069 (bitop:V64SI (match_dup 5) (match_dup 7)))
2070 (set (match_dup 4)
2071 (bitop:V64SI (match_dup 6) (match_dup 8)))]
2072 {
2073 operands[3] = gcn_operand_part (V64DImode, operands[0], 0);
2074 operands[4] = gcn_operand_part (V64DImode, operands[0], 1);
2075 operands[5] = gcn_operand_part (V64DImode, operands[1], 0);
2076 operands[6] = gcn_operand_part (V64DImode, operands[1], 1);
2077 operands[7] = gcn_operand_part (V64DImode, operands[2], 0);
2078 operands[8] = gcn_operand_part (V64DImode, operands[2], 1);
2079 }
2080 [(set_attr "type" "vmult,ds")
2081 (set_attr "length" "16,8")])
2082
2083 (define_insn_and_split "<expander>v64di3_exec"
2084 [(set (match_operand:V64DI 0 "gcn_valu_dst_operand" "=&v,RD")
2085 (vec_merge:V64DI
2086 (bitop:V64DI
2087 (match_operand:V64DI 1 "gcn_valu_src0_operand" "% v,RD")
2088 (match_operand:V64DI 2 "gcn_valu_src1com_operand" "vSvB, v"))
2089 (match_operand:V64DI 3 "gcn_register_ds_or_unspec_operand"
2090 " U0,U0")
2091 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))]
2092 "!memory_operand (operands[0], VOIDmode)
2093 || (rtx_equal_p (operands[0], operands[1])
2094 && register_operand (operands[2], VOIDmode))"
2095 "@
2096 #
2097 ds_<mnemonic>0\t%A0, %2%O0"
2098 "(reload_completed && !gcn_ds_memory_operand (operands[0], V64DImode))"
2099 [(set (match_dup 5)
2100 (vec_merge:V64SI
2101 (bitop:V64SI (match_dup 7) (match_dup 9))
2102 (match_dup 11)
2103 (match_dup 4)))
2104 (set (match_dup 6)
2105 (vec_merge:V64SI
2106 (bitop:V64SI (match_dup 8) (match_dup 10))
2107 (match_dup 12)
2108 (match_dup 4)))]
2109 {
2110 operands[5] = gcn_operand_part (V64DImode, operands[0], 0);
2111 operands[6] = gcn_operand_part (V64DImode, operands[0], 1);
2112 operands[7] = gcn_operand_part (V64DImode, operands[1], 0);
2113 operands[8] = gcn_operand_part (V64DImode, operands[1], 1);
2114 operands[9] = gcn_operand_part (V64DImode, operands[2], 0);
2115 operands[10] = gcn_operand_part (V64DImode, operands[2], 1);
2116 operands[11] = gcn_operand_part (V64DImode, operands[3], 0);
2117 operands[12] = gcn_operand_part (V64DImode, operands[3], 1);
2118 }
2119 [(set_attr "type" "vmult,ds")
2120 (set_attr "length" "16,8")])
2121
2122 (define_insn "<expander>v64si3<exec>"
2123 [(set (match_operand:V64SI 0 "register_operand" "= v")
2124 (shiftop:V64SI
2125 (match_operand:V64SI 1 "gcn_alu_operand" " v")
2126 (vec_duplicate:V64SI
2127 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
2128 ""
2129 "v_<revmnemonic>0\t%0, %2, %1"
2130 [(set_attr "type" "vop2")
2131 (set_attr "length" "8")])
2132
2133 (define_insn "v<expander>v64si3<exec>"
2134 [(set (match_operand:V64SI 0 "register_operand" "=v")
2135 (shiftop:V64SI
2136 (match_operand:V64SI 1 "gcn_alu_operand" " v")
2137 (match_operand:V64SI 2 "gcn_alu_operand" "vB")))]
2138 ""
2139 "v_<revmnemonic>0\t%0, %2, %1"
2140 [(set_attr "type" "vop2")
2141 (set_attr "length" "8")])
2142
2143 (define_insn "<expander><mode>3<exec>"
2144 [(set (match_operand:VEC_1REG_INT_MODE 0 "gcn_valu_dst_operand" "= v,RD")
2145 (minmaxop:VEC_1REG_INT_MODE
2146 (match_operand:VEC_1REG_INT_MODE 1 "gcn_valu_src0_operand"
2147 "% v, 0")
2148 (match_operand:VEC_1REG_INT_MODE 2 "gcn_valu_src1com_operand"
2149 "vSvB, v")))]
2150 ""
2151 "@
2152 v_<mnemonic>0\t%0, %2, %1
2153 ds_<mnemonic>0\t%A0, %2%O0"
2154 [(set_attr "type" "vop2,ds")
2155 (set_attr "length" "8,8")])
2156
2157 ;; }}}
2158 ;; {{{ FP binops - special cases
2159
2160 ; GCN does not directly provide a DFmode subtract instruction, so we do it by
2161 ; adding the negated second operand to the first.
2162
2163 (define_insn "subv64df3<exec>"
2164 [(set (match_operand:V64DF 0 "register_operand" "= v, v")
2165 (minus:V64DF
2166 (match_operand:V64DF 1 "gcn_alu_operand" "vSvB, v")
2167 (match_operand:V64DF 2 "gcn_alu_operand" " v,vSvB")))]
2168 ""
2169 "@
2170 v_add_f64\t%0, %1, -%2
2171 v_add_f64\t%0, -%2, %1"
2172 [(set_attr "type" "vop3a")
2173 (set_attr "length" "8,8")])
2174
2175 (define_insn "subdf"
2176 [(set (match_operand:DF 0 "register_operand" "= v, v")
2177 (minus:DF
2178 (match_operand:DF 1 "gcn_alu_operand" "vSvB, v")
2179 (match_operand:DF 2 "gcn_alu_operand" " v,vSvB")))]
2180 ""
2181 "@
2182 v_add_f64\t%0, %1, -%2
2183 v_add_f64\t%0, -%2, %1"
2184 [(set_attr "type" "vop3a")
2185 (set_attr "length" "8,8")])
2186
2187 ;; }}}
2188 ;; {{{ FP binops - generic
2189
2190 (define_mode_iterator VEC_FP_MODE [V64HF V64SF V64DF])
2191 (define_mode_iterator VEC_FP_1REG_MODE [V64HF V64SF])
2192 (define_mode_iterator FP_MODE [HF SF DF])
2193 (define_mode_iterator FP_1REG_MODE [HF SF])
2194
2195 (define_code_iterator comm_fp [plus mult smin smax])
2196 (define_code_iterator nocomm_fp [minus])
2197 (define_code_iterator all_fp [plus mult minus smin smax])
2198
2199 (define_insn "<expander><mode>3<exec>"
2200 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "= v")
2201 (comm_fp:VEC_FP_MODE
2202 (match_operand:VEC_FP_MODE 1 "gcn_alu_operand" "% v")
2203 (match_operand:VEC_FP_MODE 2 "gcn_alu_operand" "vSvB")))]
2204 ""
2205 "v_<mnemonic>0\t%0, %2, %1"
2206 [(set_attr "type" "vop2")
2207 (set_attr "length" "8")])
2208
2209 (define_insn "<expander><mode>3"
2210 [(set (match_operand:FP_MODE 0 "gcn_valu_dst_operand" "= v, RL")
2211 (comm_fp:FP_MODE
2212 (match_operand:FP_MODE 1 "gcn_valu_src0_operand" "% v, 0")
2213 (match_operand:FP_MODE 2 "gcn_valu_src1_operand" "vSvB,vSvB")))]
2214 ""
2215 "@
2216 v_<mnemonic>0\t%0, %2, %1
2217 v_<mnemonic>0\t%0, %1%O0"
2218 [(set_attr "type" "vop2,ds")
2219 (set_attr "length" "8")])
2220
2221 (define_insn "<expander><mode>3<exec>"
2222 [(set (match_operand:VEC_FP_1REG_MODE 0 "register_operand" "= v, v")
2223 (nocomm_fp:VEC_FP_1REG_MODE
2224 (match_operand:VEC_FP_1REG_MODE 1 "gcn_alu_operand" "vSvB, v")
2225 (match_operand:VEC_FP_1REG_MODE 2 "gcn_alu_operand" " v,vSvB")))]
2226 ""
2227 "@
2228 v_<mnemonic>0\t%0, %1, %2
2229 v_<revmnemonic>0\t%0, %2, %1"
2230 [(set_attr "type" "vop2")
2231 (set_attr "length" "8,8")])
2232
2233 (define_insn "<expander><mode>3"
2234 [(set (match_operand:FP_1REG_MODE 0 "register_operand" "= v, v")
2235 (nocomm_fp:FP_1REG_MODE
2236 (match_operand:FP_1REG_MODE 1 "gcn_alu_operand" "vSvB, v")
2237 (match_operand:FP_1REG_MODE 2 "gcn_alu_operand" " v,vSvB")))]
2238 ""
2239 "@
2240 v_<mnemonic>0\t%0, %1, %2
2241 v_<revmnemonic>0\t%0, %2, %1"
2242 [(set_attr "type" "vop2")
2243 (set_attr "length" "8,8")])
2244
2245 ;; }}}
2246 ;; {{{ FP unops
2247
2248 (define_insn "abs<mode>2"
2249 [(set (match_operand:FP_MODE 0 "register_operand" "=v")
2250 (abs:FP_MODE (match_operand:FP_MODE 1 "register_operand" " v")))]
2251 ""
2252 "v_add%i0\t%0, 0, |%1|"
2253 [(set_attr "type" "vop3a")
2254 (set_attr "length" "8")])
2255
2256 (define_insn "abs<mode>2<exec>"
2257 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "=v")
2258 (abs:VEC_FP_MODE
2259 (match_operand:VEC_FP_MODE 1 "register_operand" " v")))]
2260 ""
2261 "v_add%i0\t%0, 0, |%1|"
2262 [(set_attr "type" "vop3a")
2263 (set_attr "length" "8")])
2264
2265 (define_insn "neg<mode>2<exec>"
2266 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "=v")
2267 (neg:VEC_FP_MODE
2268 (match_operand:VEC_FP_MODE 1 "register_operand" " v")))]
2269 ""
2270 "v_add%i0\t%0, 0, -%1"
2271 [(set_attr "type" "vop3a")
2272 (set_attr "length" "8")])
2273
2274 (define_insn "sqrt<mode>2<exec>"
2275 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "= v")
2276 (sqrt:VEC_FP_MODE
2277 (match_operand:VEC_FP_MODE 1 "gcn_alu_operand" "vSvB")))]
2278 "flag_unsafe_math_optimizations"
2279 "v_sqrt%i0\t%0, %1"
2280 [(set_attr "type" "vop1")
2281 (set_attr "length" "8")])
2282
2283 (define_insn "sqrt<mode>2"
2284 [(set (match_operand:FP_MODE 0 "register_operand" "= v")
2285 (sqrt:FP_MODE
2286 (match_operand:FP_MODE 1 "gcn_alu_operand" "vSvB")))]
2287 "flag_unsafe_math_optimizations"
2288 "v_sqrt%i0\t%0, %1"
2289 [(set_attr "type" "vop1")
2290 (set_attr "length" "8")])
2291
2292 ;; }}}
2293 ;; {{{ FP fused multiply and add
2294
2295 (define_insn "fma<mode>4<exec>"
2296 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "= v, v")
2297 (fma:VEC_FP_MODE
2298 (match_operand:VEC_FP_MODE 1 "gcn_alu_operand" "% vA, vA")
2299 (match_operand:VEC_FP_MODE 2 "gcn_alu_operand" " vA,vSvA")
2300 (match_operand:VEC_FP_MODE 3 "gcn_alu_operand" "vSvA, vA")))]
2301 ""
2302 "v_fma%i0\t%0, %1, %2, %3"
2303 [(set_attr "type" "vop3a")
2304 (set_attr "length" "8")])
2305
2306 (define_insn "fma<mode>4_negop2<exec>"
2307 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "= v, v, v")
2308 (fma:VEC_FP_MODE
2309 (match_operand:VEC_FP_MODE 1 "gcn_alu_operand" " vA, vA,vSvA")
2310 (neg:VEC_FP_MODE
2311 (match_operand:VEC_FP_MODE 2 "gcn_alu_operand" " vA,vSvA, vA"))
2312 (match_operand:VEC_FP_MODE 3 "gcn_alu_operand" "vSvA, vA, vA")))]
2313 ""
2314 "v_fma%i0\t%0, %1, -%2, %3"
2315 [(set_attr "type" "vop3a")
2316 (set_attr "length" "8")])
2317
2318 (define_insn "fma<mode>4"
2319 [(set (match_operand:FP_MODE 0 "register_operand" "= v, v")
2320 (fma:FP_MODE
2321 (match_operand:FP_MODE 1 "gcn_alu_operand" "% vA, vA")
2322 (match_operand:FP_MODE 2 "gcn_alu_operand" " vA,vSvA")
2323 (match_operand:FP_MODE 3 "gcn_alu_operand" "vSvA, vA")))]
2324 ""
2325 "v_fma%i0\t%0, %1, %2, %3"
2326 [(set_attr "type" "vop3a")
2327 (set_attr "length" "8")])
2328
2329 (define_insn "fma<mode>4_negop2"
2330 [(set (match_operand:FP_MODE 0 "register_operand" "= v, v, v")
2331 (fma:FP_MODE
2332 (match_operand:FP_MODE 1 "gcn_alu_operand" " vA, vA,vSvA")
2333 (neg:FP_MODE
2334 (match_operand:FP_MODE 2 "gcn_alu_operand" " vA,vSvA, vA"))
2335 (match_operand:FP_MODE 3 "gcn_alu_operand" "vSvA, vA, vA")))]
2336 ""
2337 "v_fma%i0\t%0, %1, -%2, %3"
2338 [(set_attr "type" "vop3a")
2339 (set_attr "length" "8")])
2340
2341 ;; }}}
2342 ;; {{{ FP division
2343
2344 (define_insn "recip<mode>2<exec>"
2345 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "= v")
2346 (div:VEC_FP_MODE
2347 (vec_duplicate:VEC_FP_MODE (float:<SCALAR_MODE> (const_int 1)))
2348 (match_operand:VEC_FP_MODE 1 "gcn_alu_operand" "vSvB")))]
2349 ""
2350 "v_rcp%i0\t%0, %1"
2351 [(set_attr "type" "vop1")
2352 (set_attr "length" "8")])
2353
2354 (define_insn "recip<mode>2"
2355 [(set (match_operand:FP_MODE 0 "register_operand" "= v")
2356 (div:FP_MODE
2357 (float:FP_MODE (const_int 1))
2358 (match_operand:FP_MODE 1 "gcn_alu_operand" "vSvB")))]
2359 ""
2360 "v_rcp%i0\t%0, %1"
2361 [(set_attr "type" "vop1")
2362 (set_attr "length" "8")])
2363
2364 ;; Do division via a = b * 1/c
2365 ;; The v_rcp_* instructions are not sufficiently accurate on their own,
2366 ;; so we use 2 v_fma_* instructions to do one round of Newton-Raphson
2367 ;; which the ISA manual says is enough to improve the reciprocal accuracy.
2368 ;;
2369 ;; FIXME: This does not handle denormals, NaNs, division-by-zero etc.
2370
2371 (define_expand "div<mode>3"
2372 [(match_operand:VEC_FP_MODE 0 "gcn_valu_dst_operand")
2373 (match_operand:VEC_FP_MODE 1 "gcn_valu_src0_operand")
2374 (match_operand:VEC_FP_MODE 2 "gcn_valu_src0_operand")]
2375 "flag_reciprocal_math"
2376 {
2377 rtx two = gcn_vec_constant (<MODE>mode,
2378 const_double_from_real_value (dconst2, <SCALAR_MODE>mode));
2379 rtx initrcp = gen_reg_rtx (<MODE>mode);
2380 rtx fma = gen_reg_rtx (<MODE>mode);
2381 rtx rcp;
2382
2383 bool is_rcp = (GET_CODE (operands[1]) == CONST_VECTOR
2384 && real_identical
2385 (CONST_DOUBLE_REAL_VALUE
2386 (CONST_VECTOR_ELT (operands[1], 0)), &dconstm1));
2387
2388 if (is_rcp)
2389 rcp = operands[0];
2390 else
2391 rcp = gen_reg_rtx (<MODE>mode);
2392
2393 emit_insn (gen_recip<mode>2 (initrcp, operands[2]));
2394 emit_insn (gen_fma<mode>4_negop2 (fma, initrcp, operands[2], two));
2395 emit_insn (gen_mul<mode>3 (rcp, initrcp, fma));
2396
2397 if (!is_rcp)
2398 emit_insn (gen_mul<mode>3 (operands[0], operands[1], rcp));
2399
2400 DONE;
2401 })
2402
2403 (define_expand "div<mode>3"
2404 [(match_operand:FP_MODE 0 "gcn_valu_dst_operand")
2405 (match_operand:FP_MODE 1 "gcn_valu_src0_operand")
2406 (match_operand:FP_MODE 2 "gcn_valu_src0_operand")]
2407 "flag_reciprocal_math"
2408 {
2409 rtx two = const_double_from_real_value (dconst2, <MODE>mode);
2410 rtx initrcp = gen_reg_rtx (<MODE>mode);
2411 rtx fma = gen_reg_rtx (<MODE>mode);
2412 rtx rcp;
2413
2414 bool is_rcp = (GET_CODE (operands[1]) == CONST_DOUBLE
2415 && real_identical (CONST_DOUBLE_REAL_VALUE (operands[1]),
2416 &dconstm1));
2417
2418 if (is_rcp)
2419 rcp = operands[0];
2420 else
2421 rcp = gen_reg_rtx (<MODE>mode);
2422
2423 emit_insn (gen_recip<mode>2 (initrcp, operands[2]));
2424 emit_insn (gen_fma<mode>4_negop2 (fma, initrcp, operands[2], two));
2425 emit_insn (gen_mul<mode>3 (rcp, initrcp, fma));
2426
2427 if (!is_rcp)
2428 emit_insn (gen_mul<mode>3 (operands[0], operands[1], rcp));
2429
2430 DONE;
2431 })
2432
2433 ;; }}}
2434 ;; {{{ Int/FP conversions
2435
2436 (define_mode_iterator CVT_FROM_MODE [HI SI HF SF DF])
2437 (define_mode_iterator CVT_TO_MODE [HI SI HF SF DF])
2438
2439 (define_mode_iterator VCVT_MODE [V64HI V64SI V64HF V64SF V64DF])
2440 (define_mode_iterator VCVT_FMODE [V64HF V64SF V64DF])
2441 (define_mode_iterator VCVT_IMODE [V64HI V64SI])
2442
2443 (define_code_iterator cvt_op [fix unsigned_fix
2444 float unsigned_float
2445 float_extend float_truncate])
2446 (define_code_attr cvt_name [(fix "fix_trunc") (unsigned_fix "fixuns_trunc")
2447 (float "float") (unsigned_float "floatuns")
2448 (float_extend "extend") (float_truncate "trunc")])
2449 (define_code_attr cvt_operands [(fix "%i0%i1") (unsigned_fix "%u0%i1")
2450 (float "%i0%i1") (unsigned_float "%i0%u1")
2451 (float_extend "%i0%i1")
2452 (float_truncate "%i0%i1")])
2453
2454 (define_insn "<cvt_name><CVT_FROM_MODE:mode><CVT_TO_MODE:mode>2"
2455 [(set (match_operand:CVT_TO_MODE 0 "register_operand" "= v")
2456 (cvt_op:CVT_TO_MODE
2457 (match_operand:CVT_FROM_MODE 1 "gcn_alu_operand" "vSvB")))]
2458 "gcn_valid_cvt_p (<CVT_FROM_MODE:MODE>mode, <CVT_TO_MODE:MODE>mode,
2459 <cvt_name>_cvt)"
2460 "v_cvt<cvt_operands>\t%0, %1"
2461 [(set_attr "type" "vop1")
2462 (set_attr "length" "8")])
2463
2464 (define_insn "<cvt_name><VCVT_MODE:mode><VCVT_FMODE:mode>2<exec>"
2465 [(set (match_operand:VCVT_FMODE 0 "register_operand" "= v")
2466 (cvt_op:VCVT_FMODE
2467 (match_operand:VCVT_MODE 1 "gcn_alu_operand" "vSvB")))]
2468 "gcn_valid_cvt_p (<VCVT_MODE:MODE>mode, <VCVT_FMODE:MODE>mode,
2469 <cvt_name>_cvt)"
2470 "v_cvt<cvt_operands>\t%0, %1"
2471 [(set_attr "type" "vop1")
2472 (set_attr "length" "8")])
2473
2474 (define_insn "<cvt_name><VCVT_FMODE:mode><VCVT_IMODE:mode>2<exec>"
2475 [(set (match_operand:VCVT_IMODE 0 "register_operand" "= v")
2476 (cvt_op:VCVT_IMODE
2477 (match_operand:VCVT_FMODE 1 "gcn_alu_operand" "vSvB")))]
2478 "gcn_valid_cvt_p (<VCVT_FMODE:MODE>mode, <VCVT_IMODE:MODE>mode,
2479 <cvt_name>_cvt)"
2480 "v_cvt<cvt_operands>\t%0, %1"
2481 [(set_attr "type" "vop1")
2482 (set_attr "length" "8")])
2483
2484 ;; }}}
2485 ;; {{{ Int/int conversions
2486
2487 (define_code_iterator zero_convert [truncate zero_extend])
2488 (define_code_attr convop [
2489 (sign_extend "extend")
2490 (zero_extend "zero_extend")
2491 (truncate "trunc")])
2492
2493 (define_insn "<convop><VEC_ALL1REG_INT_ALT:mode><VEC_ALL1REG_INT_MODE:mode>2<exec>"
2494 [(set (match_operand:VEC_ALL1REG_INT_MODE 0 "register_operand" "=v")
2495 (zero_convert:VEC_ALL1REG_INT_MODE
2496 (match_operand:VEC_ALL1REG_INT_ALT 1 "gcn_alu_operand" " v")))]
2497 ""
2498 "v_mov_b32_sdwa\t%0, %1 dst_sel:<VEC_ALL1REG_INT_MODE:sdwa> dst_unused:UNUSED_PAD src0_sel:<VEC_ALL1REG_INT_ALT:sdwa>"
2499 [(set_attr "type" "vop_sdwa")
2500 (set_attr "length" "8")])
2501
2502 (define_insn "extend<VEC_ALL1REG_INT_ALT:mode><VEC_ALL1REG_INT_MODE:mode>2<exec>"
2503 [(set (match_operand:VEC_ALL1REG_INT_MODE 0 "register_operand" "=v")
2504 (sign_extend:VEC_ALL1REG_INT_MODE
2505 (match_operand:VEC_ALL1REG_INT_ALT 1 "gcn_alu_operand" " v")))]
2506 ""
2507 "v_mov_b32_sdwa\t%0, sext(%1) src0_sel:<VEC_ALL1REG_INT_ALT:sdwa>"
2508 [(set_attr "type" "vop_sdwa")
2509 (set_attr "length" "8")])
2510
2511 ;; GCC can already do these for scalar types, but not for vector types.
2512 ;; Unfortunately you can't just do SUBREG on a vector to select the low part,
2513 ;; so there must be a few tricks here.
2514
2515 (define_insn_and_split "truncv64di<mode>2"
2516 [(set (match_operand:VEC_ALL1REG_INT_MODE 0 "register_operand" "=v")
2517 (truncate:VEC_ALL1REG_INT_MODE
2518 (match_operand:V64DI 1 "gcn_alu_operand" " v")))]
2519 ""
2520 "#"
2521 "reload_completed"
2522 [(const_int 0)]
2523 {
2524 rtx inlo = gcn_operand_part (V64DImode, operands[1], 0);
2525 rtx out = operands[0];
2526
2527 if (<MODE>mode != V64SImode)
2528 emit_insn (gen_truncv64si<mode>2 (out, inlo));
2529 else
2530 emit_move_insn (out, inlo);
2531 }
2532 [(set_attr "type" "vop2")
2533 (set_attr "length" "4")])
2534
2535 (define_insn_and_split "truncv64di<mode>2_exec"
2536 [(set (match_operand:VEC_ALL1REG_INT_MODE 0 "register_operand" "=v")
2537 (vec_merge:VEC_ALL1REG_INT_MODE
2538 (truncate:VEC_ALL1REG_INT_MODE
2539 (match_operand:V64DI 1 "gcn_alu_operand" " v"))
2540 (match_operand:VEC_ALL1REG_INT_MODE 2 "gcn_alu_or_unspec_operand"
2541 "U0")
2542 (match_operand:DI 3 "gcn_exec_operand" " e")))]
2543 ""
2544 "#"
2545 "reload_completed"
2546 [(const_int 0)]
2547 {
2548 rtx out = operands[0];
2549 rtx inlo = gcn_operand_part (V64DImode, operands[1], 0);
2550 rtx merge = operands[2];
2551 rtx exec = operands[3];
2552
2553 if (<MODE>mode != V64SImode)
2554 emit_insn (gen_truncv64si<mode>2_exec (out, inlo, merge, exec));
2555 else
2556 emit_insn (gen_mov<mode>_exec (out, inlo, exec, merge));
2557 }
2558 [(set_attr "type" "vop2")
2559 (set_attr "length" "4")])
2560
2561 (define_insn_and_split "<convop><mode>v64di2"
2562 [(set (match_operand:V64DI 0 "register_operand" "=v")
2563 (any_extend:V64DI
2564 (match_operand:VEC_ALL1REG_INT_MODE 1 "gcn_alu_operand" " v")))]
2565 ""
2566 "#"
2567 "reload_completed"
2568 [(const_int 0)]
2569 {
2570 rtx outlo = gcn_operand_part (V64DImode, operands[0], 0);
2571 rtx outhi = gcn_operand_part (V64DImode, operands[0], 1);
2572 rtx in = operands[1];
2573
2574 if (<MODE>mode != V64SImode)
2575 emit_insn (gen_<convop><mode>v64si2 (outlo, in));
2576 else
2577 emit_move_insn (outlo, in);
2578 if ('<su>' == 's')
2579 emit_insn (gen_ashrv64si3 (outhi, outlo, GEN_INT (31)));
2580 else
2581 emit_insn (gen_vec_duplicatev64si (outhi, const0_rtx));
2582 }
2583 [(set_attr "type" "mult")
2584 (set_attr "length" "12")])
2585
2586 (define_insn_and_split "<convop><mode>v64di2_exec"
2587 [(set (match_operand:V64DI 0 "register_operand" "=v")
2588 (vec_merge:V64DI
2589 (any_extend:V64DI
2590 (match_operand:VEC_ALL1REG_INT_MODE 1 "gcn_alu_operand" " v"))
2591 (match_operand:V64DI 2 "gcn_alu_or_unspec_operand" "U0")
2592 (match_operand:DI 3 "gcn_exec_operand" " e")))]
2593 ""
2594 "#"
2595 "reload_completed"
2596 [(const_int 0)]
2597 {
2598 rtx outlo = gcn_operand_part (V64DImode, operands[0], 0);
2599 rtx outhi = gcn_operand_part (V64DImode, operands[0], 1);
2600 rtx in = operands[1];
2601 rtx mergelo = gcn_operand_part (V64DImode, operands[2], 0);
2602 rtx mergehi = gcn_operand_part (V64DImode, operands[2], 1);
2603 rtx exec = operands[3];
2604
2605 if (<MODE>mode != V64SImode)
2606 emit_insn (gen_<convop><mode>v64si2_exec (outlo, in, mergelo, exec));
2607 else
2608 emit_insn (gen_mov<mode>_exec (outlo, in, exec, mergelo));
2609 if ('<su>' == 's')
2610 emit_insn (gen_ashrv64si3_exec (outhi, outlo, GEN_INT (31), mergehi,
2611 exec));
2612 else
2613 emit_insn (gen_vec_duplicatev64si_exec (outhi, const0_rtx, mergehi,
2614 exec));
2615 }
2616 [(set_attr "type" "mult")
2617 (set_attr "length" "12")])
2618
2619 ;; }}}
2620 ;; {{{ Vector comparison/merge
2621
2622 (define_insn "vec_cmp<mode>di"
2623 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e, e,Sg,Sg")
2624 (match_operator 1 "comparison_operator"
2625 [(match_operand:VEC_1REG_MODE 2 "gcn_alu_operand"
2626 "vSv, B,vSv, B, v,vA")
2627 (match_operand:VEC_1REG_MODE 3 "gcn_vop3_operand"
2628 " v, v, v, v,vA, v")]))
2629 (clobber (match_scratch:DI 4 "= X, X, cV,cV, X, X"))]
2630 ""
2631 "@
2632 v_cmp%E1\tvcc, %2, %3
2633 v_cmp%E1\tvcc, %2, %3
2634 v_cmpx%E1\tvcc, %2, %3
2635 v_cmpx%E1\tvcc, %2, %3
2636 v_cmp%E1\t%0, %2, %3
2637 v_cmp%E1\t%0, %2, %3"
2638 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a")
2639 (set_attr "length" "4,8,4,8,8,8")])
2640
2641 (define_expand "vec_cmpu<mode>di"
2642 [(match_operand:DI 0 "register_operand")
2643 (match_operator 1 "comparison_operator"
2644 [(match_operand:VEC_1REG_INT_MODE 2 "gcn_alu_operand")
2645 (match_operand:VEC_1REG_INT_MODE 3 "gcn_vop3_operand")])]
2646 ""
2647 {
2648 /* Unsigned comparisons use the same patterns as signed comparisons,
2649 except that they use unsigned operators (e.g. LTU vs LT).
2650 The '%E1' directive then does the Right Thing. */
2651 emit_insn (gen_vec_cmp<mode>di (operands[0], operands[1], operands[2],
2652 operands[3]));
2653 DONE;
2654 })
2655
2656 (define_insn "vec_cmp<mode>di_exec"
2657 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e, e,Sg,Sg")
2658 (and:DI
2659 (match_operator 1 "comparison_operator"
2660 [(match_operand:VEC_1REG_MODE 2 "gcn_alu_operand"
2661 "vSv, B,vSv, B, v,vA")
2662 (match_operand:VEC_1REG_MODE 3 "gcn_vop3_operand"
2663 " v, v, v, v,vA, v")])
2664 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e, e, e, e")))
2665 (clobber (match_scratch:DI 5 "= X, X, cV,cV, X, X"))]
2666 ""
2667 "@
2668 v_cmp%E1\tvcc, %2, %3
2669 v_cmp%E1\tvcc, %2, %3
2670 v_cmpx%E1\tvcc, %2, %3
2671 v_cmpx%E1\tvcc, %2, %3
2672 v_cmp%E1\t%0, %2, %3
2673 v_cmp%E1\t%0, %2, %3"
2674 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a")
2675 (set_attr "length" "4,8,4,8,8,8")])
2676
2677 (define_insn "vec_cmp<mode>di_dup"
2678 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e,e,Sg")
2679 (match_operator 1 "comparison_operator"
2680 [(vec_duplicate:VEC_1REG_MODE
2681 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand"
2682 " Sv, B,Sv,B, A"))
2683 (match_operand:VEC_1REG_MODE 3 "gcn_vop3_operand"
2684 " v, v, v,v, v")]))
2685 (clobber (match_scratch:DI 4 "= X,X,cV,cV, X"))]
2686 ""
2687 "@
2688 v_cmp%E1\tvcc, %2, %3
2689 v_cmp%E1\tvcc, %2, %3
2690 v_cmpx%E1\tvcc, %2, %3
2691 v_cmpx%E1\tvcc, %2, %3
2692 v_cmp%E1\t%0, %2, %3"
2693 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a")
2694 (set_attr "length" "4,8,4,8,8")])
2695
2696 (define_insn "vec_cmp<mode>di_dup_exec"
2697 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e,e,Sg")
2698 (and:DI
2699 (match_operator 1 "comparison_operator"
2700 [(vec_duplicate:VEC_1REG_MODE
2701 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand"
2702 " Sv, B,Sv,B, A"))
2703 (match_operand:VEC_1REG_MODE 3 "gcn_vop3_operand"
2704 " v, v, v,v, v")])
2705 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e,e, e")))
2706 (clobber (match_scratch:DI 5 "= X,X,cV,cV, X"))]
2707 ""
2708 "@
2709 v_cmp%E1\tvcc, %2, %3
2710 v_cmp%E1\tvcc, %2, %3
2711 v_cmpx%E1\tvcc, %2, %3
2712 v_cmpx%E1\tvcc, %2, %3
2713 v_cmp%E1\t%0, %2, %3"
2714 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a")
2715 (set_attr "length" "4,8,4,8,8")])
2716
2717 (define_expand "vcond_mask_<mode>di"
2718 [(parallel
2719 [(set (match_operand:VEC_ALLREG_MODE 0 "register_operand" "")
2720 (vec_merge:VEC_ALLREG_MODE
2721 (match_operand:VEC_ALLREG_MODE 1 "gcn_vop3_operand" "")
2722 (match_operand:VEC_ALLREG_MODE 2 "gcn_alu_operand" "")
2723 (match_operand:DI 3 "register_operand" "")))
2724 (clobber (scratch:V64DI))])]
2725 ""
2726 "")
2727
2728 (define_expand "vcond<VEC_ALL1REG_MODE:mode><VEC_1REG_ALT:mode>"
2729 [(match_operand:VEC_ALL1REG_MODE 0 "register_operand")
2730 (match_operand:VEC_ALL1REG_MODE 1 "gcn_vop3_operand")
2731 (match_operand:VEC_ALL1REG_MODE 2 "gcn_alu_operand")
2732 (match_operator 3 "comparison_operator"
2733 [(match_operand:VEC_1REG_ALT 4 "gcn_alu_operand")
2734 (match_operand:VEC_1REG_ALT 5 "gcn_vop3_operand")])]
2735 ""
2736 {
2737 rtx tmp = gen_reg_rtx (DImode);
2738 emit_insn (gen_vec_cmp<VEC_1REG_ALT:mode>di
2739 (tmp, operands[3], operands[4], operands[5]));
2740 emit_insn (gen_vcond_mask_<VEC_ALL1REG_MODE:mode>di
2741 (operands[0], operands[1], operands[2], tmp));
2742 DONE;
2743 })
2744
2745 (define_expand "vcond<VEC_ALL1REG_MODE:mode><VEC_1REG_ALT:mode>_exec"
2746 [(match_operand:VEC_ALL1REG_MODE 0 "register_operand")
2747 (match_operand:VEC_ALL1REG_MODE 1 "gcn_vop3_operand")
2748 (match_operand:VEC_ALL1REG_MODE 2 "gcn_alu_operand")
2749 (match_operator 3 "comparison_operator"
2750 [(match_operand:VEC_1REG_ALT 4 "gcn_alu_operand")
2751 (match_operand:VEC_1REG_ALT 5 "gcn_vop3_operand")])
2752 (match_operand:DI 6 "gcn_exec_reg_operand" "e")]
2753 ""
2754 {
2755 rtx tmp = gen_reg_rtx (DImode);
2756 emit_insn (gen_vec_cmp<VEC_1REG_ALT:mode>di_exec
2757 (tmp, operands[3], operands[4], operands[5], operands[6]));
2758 emit_insn (gen_vcond_mask_<VEC_ALL1REG_MODE:mode>di
2759 (operands[0], operands[1], operands[2], tmp));
2760 DONE;
2761 })
2762
2763 (define_expand "vcondu<VEC_ALL1REG_MODE:mode><VEC_1REG_INT_ALT:mode>"
2764 [(match_operand:VEC_ALL1REG_MODE 0 "register_operand")
2765 (match_operand:VEC_ALL1REG_MODE 1 "gcn_vop3_operand")
2766 (match_operand:VEC_ALL1REG_MODE 2 "gcn_alu_operand")
2767 (match_operator 3 "comparison_operator"
2768 [(match_operand:VEC_1REG_INT_ALT 4 "gcn_alu_operand")
2769 (match_operand:VEC_1REG_INT_ALT 5 "gcn_vop3_operand")])]
2770 ""
2771 {
2772 rtx tmp = gen_reg_rtx (DImode);
2773 emit_insn (gen_vec_cmp<VEC_1REG_INT_ALT:mode>di
2774 (tmp, operands[3], operands[4], operands[5]));
2775 emit_insn (gen_vcond_mask_<VEC_ALL1REG_MODE:mode>di
2776 (operands[0], operands[1], operands[2], tmp));
2777 DONE;
2778 })
2779
2780 (define_expand "vcondu<VEC_ALL1REG_MODE:mode><VEC_1REG_INT_ALT:mode>_exec"
2781 [(match_operand:VEC_ALL1REG_MODE 0 "register_operand")
2782 (match_operand:VEC_ALL1REG_MODE 1 "gcn_vop3_operand")
2783 (match_operand:VEC_ALL1REG_MODE 2 "gcn_alu_operand")
2784 (match_operator 3 "comparison_operator"
2785 [(match_operand:VEC_1REG_INT_ALT 4 "gcn_alu_operand")
2786 (match_operand:VEC_1REG_INT_ALT 5 "gcn_vop3_operand")])
2787 (match_operand:DI 6 "gcn_exec_reg_operand" "e")]
2788 ""
2789 {
2790 rtx tmp = gen_reg_rtx (DImode);
2791 emit_insn (gen_vec_cmp<VEC_1REG_INT_ALT:mode>di_exec
2792 (tmp, operands[3], operands[4], operands[5], operands[6]));
2793 emit_insn (gen_vcond_mask_<VEC_ALL1REG_MODE:mode>di
2794 (operands[0], operands[1], operands[2], tmp));
2795 DONE;
2796 })
2797
2798 ;; }}}
2799 ;; {{{ Fully masked loop support
2800
2801 (define_expand "while_ultsidi"
2802 [(match_operand:DI 0 "register_operand")
2803 (match_operand:SI 1 "")
2804 (match_operand:SI 2 "")]
2805 ""
2806 {
2807 if (GET_CODE (operands[1]) != CONST_INT
2808 || GET_CODE (operands[2]) != CONST_INT)
2809 {
2810 rtx _0_1_2_3 = gen_rtx_REG (V64SImode, VGPR_REGNO (1));
2811 rtx tmp = _0_1_2_3;
2812 if (GET_CODE (operands[1]) != CONST_INT
2813 || INTVAL (operands[1]) != 0)
2814 {
2815 tmp = gen_reg_rtx (V64SImode);
2816 emit_insn (gen_addv64si3_dup (tmp, _0_1_2_3, operands[1]));
2817 }
2818 emit_insn (gen_vec_cmpv64sidi_dup (operands[0],
2819 gen_rtx_GT (VOIDmode, 0, 0),
2820 operands[2], tmp));
2821 }
2822 else
2823 {
2824 HOST_WIDE_INT diff = INTVAL (operands[2]) - INTVAL (operands[1]);
2825 HOST_WIDE_INT mask = (diff >= 64 ? -1
2826 : ~((unsigned HOST_WIDE_INT)-1 << diff));
2827 emit_move_insn (operands[0], gen_rtx_CONST_INT (VOIDmode, mask));
2828 }
2829 DONE;
2830 })
2831
2832 (define_expand "maskload<mode>di"
2833 [(match_operand:VEC_ALLREG_MODE 0 "register_operand")
2834 (match_operand:VEC_ALLREG_MODE 1 "memory_operand")
2835 (match_operand 2 "")]
2836 ""
2837 {
2838 rtx exec = force_reg (DImode, operands[2]);
2839 rtx addr = gcn_expand_scalar_to_vector_address
2840 (<MODE>mode, exec, operands[1], gen_rtx_SCRATCH (V64DImode));
2841 rtx as = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
2842 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
2843 rtx undef = gcn_gen_undef (<MODE>mode);
2844 emit_insn (gen_gather<mode>_expr_exec (operands[0], addr, as, v, undef,
2845 exec));
2846 DONE;
2847 })
2848
2849 (define_expand "maskstore<mode>di"
2850 [(match_operand:VEC_ALLREG_MODE 0 "memory_operand")
2851 (match_operand:VEC_ALLREG_MODE 1 "register_operand")
2852 (match_operand 2 "")]
2853 ""
2854 {
2855 rtx exec = force_reg (DImode, operands[2]);
2856 rtx addr = gcn_expand_scalar_to_vector_address
2857 (<MODE>mode, exec, operands[0], gen_rtx_SCRATCH (V64DImode));
2858 rtx as = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
2859 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
2860 emit_insn (gen_scatter<mode>_expr_exec (addr, operands[1], as, v, exec));
2861 DONE;
2862 })
2863
2864 (define_expand "mask_gather_load<mode>"
2865 [(match_operand:VEC_ALLREG_MODE 0 "register_operand")
2866 (match_operand:DI 1 "register_operand")
2867 (match_operand 2 "register_operand")
2868 (match_operand 3 "immediate_operand")
2869 (match_operand:SI 4 "gcn_alu_operand")
2870 (match_operand:DI 5 "")]
2871 ""
2872 {
2873 rtx exec = force_reg (DImode, operands[5]);
2874
2875 /* TODO: more conversions will be needed when more types are vectorized. */
2876 if (GET_MODE (operands[2]) == V64DImode)
2877 {
2878 rtx tmp = gen_reg_rtx (V64SImode);
2879 emit_insn (gen_truncv64div64si2_exec (tmp, operands[2],
2880 gcn_gen_undef (V64SImode),
2881 exec));
2882 operands[2] = tmp;
2883 }
2884
2885 emit_insn (gen_gather<mode>_exec (operands[0], operands[1], operands[2],
2886 operands[3], operands[4], exec));
2887 DONE;
2888 })
2889
2890 (define_expand "mask_scatter_store<mode>"
2891 [(match_operand:DI 0 "register_operand")
2892 (match_operand 1 "register_operand")
2893 (match_operand 2 "immediate_operand")
2894 (match_operand:SI 3 "gcn_alu_operand")
2895 (match_operand:VEC_ALLREG_MODE 4 "register_operand")
2896 (match_operand:DI 5 "")]
2897 ""
2898 {
2899 rtx exec = force_reg (DImode, operands[5]);
2900
2901 /* TODO: more conversions will be needed when more types are vectorized. */
2902 if (GET_MODE (operands[1]) == V64DImode)
2903 {
2904 rtx tmp = gen_reg_rtx (V64SImode);
2905 emit_insn (gen_truncv64div64si2_exec (tmp, operands[1],
2906 gcn_gen_undef (V64SImode),
2907 exec));
2908 operands[1] = tmp;
2909 }
2910
2911 emit_insn (gen_scatter<mode>_exec (operands[0], operands[1], operands[2],
2912 operands[3], operands[4], exec));
2913 DONE;
2914 })
2915
2916 ; FIXME this should be VEC_REG_MODE, but not all dependencies are implemented.
2917 (define_mode_iterator COND_MODE [V64SI V64DI V64SF V64DF])
2918 (define_mode_iterator COND_INT_MODE [V64SI V64DI])
2919
2920 (define_code_iterator cond_op [plus minus])
2921
2922 (define_expand "cond_<expander><mode>"
2923 [(match_operand:COND_MODE 0 "register_operand")
2924 (match_operand:DI 1 "register_operand")
2925 (cond_op:COND_MODE
2926 (match_operand:COND_MODE 2 "gcn_alu_operand")
2927 (match_operand:COND_MODE 3 "gcn_alu_operand"))
2928 (match_operand:COND_MODE 4 "register_operand")]
2929 ""
2930 {
2931 operands[1] = force_reg (DImode, operands[1]);
2932 operands[2] = force_reg (<MODE>mode, operands[2]);
2933
2934 emit_insn (gen_<expander><mode>3_exec (operands[0], operands[2],
2935 operands[3], operands[4],
2936 operands[1]));
2937 DONE;
2938 })
2939
2940 (define_code_iterator cond_bitop [and ior xor])
2941
2942 (define_expand "cond_<expander><mode>"
2943 [(match_operand:COND_INT_MODE 0 "register_operand")
2944 (match_operand:DI 1 "register_operand")
2945 (cond_bitop:COND_INT_MODE
2946 (match_operand:COND_INT_MODE 2 "gcn_alu_operand")
2947 (match_operand:COND_INT_MODE 3 "gcn_alu_operand"))
2948 (match_operand:COND_INT_MODE 4 "register_operand")]
2949 ""
2950 {
2951 operands[1] = force_reg (DImode, operands[1]);
2952 operands[2] = force_reg (<MODE>mode, operands[2]);
2953
2954 emit_insn (gen_<expander><mode>3_exec (operands[0], operands[2],
2955 operands[3], operands[4],
2956 operands[1]));
2957 DONE;
2958 })
2959
2960 ;; }}}
2961 ;; {{{ Vector reductions
2962
2963 (define_int_iterator REDUC_UNSPEC [UNSPEC_SMIN_DPP_SHR UNSPEC_SMAX_DPP_SHR
2964 UNSPEC_UMIN_DPP_SHR UNSPEC_UMAX_DPP_SHR
2965 UNSPEC_PLUS_DPP_SHR
2966 UNSPEC_AND_DPP_SHR
2967 UNSPEC_IOR_DPP_SHR UNSPEC_XOR_DPP_SHR])
2968
2969 (define_int_iterator REDUC_2REG_UNSPEC [UNSPEC_PLUS_DPP_SHR
2970 UNSPEC_AND_DPP_SHR
2971 UNSPEC_IOR_DPP_SHR UNSPEC_XOR_DPP_SHR])
2972
2973 ; FIXME: Isn't there a better way of doing this?
2974 (define_int_attr reduc_unspec [(UNSPEC_SMIN_DPP_SHR "UNSPEC_SMIN_DPP_SHR")
2975 (UNSPEC_SMAX_DPP_SHR "UNSPEC_SMAX_DPP_SHR")
2976 (UNSPEC_UMIN_DPP_SHR "UNSPEC_UMIN_DPP_SHR")
2977 (UNSPEC_UMAX_DPP_SHR "UNSPEC_UMAX_DPP_SHR")
2978 (UNSPEC_PLUS_DPP_SHR "UNSPEC_PLUS_DPP_SHR")
2979 (UNSPEC_AND_DPP_SHR "UNSPEC_AND_DPP_SHR")
2980 (UNSPEC_IOR_DPP_SHR "UNSPEC_IOR_DPP_SHR")
2981 (UNSPEC_XOR_DPP_SHR "UNSPEC_XOR_DPP_SHR")])
2982
2983 (define_int_attr reduc_op [(UNSPEC_SMIN_DPP_SHR "smin")
2984 (UNSPEC_SMAX_DPP_SHR "smax")
2985 (UNSPEC_UMIN_DPP_SHR "umin")
2986 (UNSPEC_UMAX_DPP_SHR "umax")
2987 (UNSPEC_PLUS_DPP_SHR "plus")
2988 (UNSPEC_AND_DPP_SHR "and")
2989 (UNSPEC_IOR_DPP_SHR "ior")
2990 (UNSPEC_XOR_DPP_SHR "xor")])
2991
2992 (define_int_attr reduc_insn [(UNSPEC_SMIN_DPP_SHR "v_min%i0")
2993 (UNSPEC_SMAX_DPP_SHR "v_max%i0")
2994 (UNSPEC_UMIN_DPP_SHR "v_min%u0")
2995 (UNSPEC_UMAX_DPP_SHR "v_max%u0")
2996 (UNSPEC_PLUS_DPP_SHR "v_add%u0")
2997 (UNSPEC_AND_DPP_SHR "v_and%b0")
2998 (UNSPEC_IOR_DPP_SHR "v_or%b0")
2999 (UNSPEC_XOR_DPP_SHR "v_xor%b0")])
3000
3001 (define_expand "reduc_<reduc_op>_scal_<mode>"
3002 [(set (match_operand:<SCALAR_MODE> 0 "register_operand")
3003 (unspec:<SCALAR_MODE>
3004 [(match_operand:VEC_1REG_MODE 1 "register_operand")]
3005 REDUC_UNSPEC))]
3006 ""
3007 {
3008 rtx tmp = gcn_expand_reduc_scalar (<MODE>mode, operands[1],
3009 <reduc_unspec>);
3010
3011 /* The result of the reduction is in lane 63 of tmp. */
3012 emit_insn (gen_mov_from_lane63_<mode> (operands[0], tmp));
3013
3014 DONE;
3015 })
3016
3017 (define_expand "reduc_<reduc_op>_scal_v64di"
3018 [(set (match_operand:DI 0 "register_operand")
3019 (unspec:DI
3020 [(match_operand:V64DI 1 "register_operand")]
3021 REDUC_2REG_UNSPEC))]
3022 ""
3023 {
3024 rtx tmp = gcn_expand_reduc_scalar (V64DImode, operands[1],
3025 <reduc_unspec>);
3026
3027 /* The result of the reduction is in lane 63 of tmp. */
3028 emit_insn (gen_mov_from_lane63_v64di (operands[0], tmp));
3029
3030 DONE;
3031 })
3032
3033 (define_insn "*<reduc_op>_dpp_shr_<mode>"
3034 [(set (match_operand:VEC_1REG_MODE 0 "register_operand" "=v")
3035 (unspec:VEC_1REG_MODE
3036 [(match_operand:VEC_1REG_MODE 1 "register_operand" "v")
3037 (match_operand:VEC_1REG_MODE 2 "register_operand" "v")
3038 (match_operand:SI 3 "const_int_operand" "n")]
3039 REDUC_UNSPEC))]
3040 "!(TARGET_GCN3 && SCALAR_INT_MODE_P (<SCALAR_MODE>mode)
3041 && <reduc_unspec> == UNSPEC_PLUS_DPP_SHR)"
3042 {
3043 return gcn_expand_dpp_shr_insn (<MODE>mode, "<reduc_insn>",
3044 <reduc_unspec>, INTVAL (operands[3]));
3045 }
3046 [(set_attr "type" "vop_dpp")
3047 (set_attr "length" "8")])
3048
3049 (define_insn_and_split "*<reduc_op>_dpp_shr_v64di"
3050 [(set (match_operand:V64DI 0 "register_operand" "=&v")
3051 (unspec:V64DI
3052 [(match_operand:V64DI 1 "register_operand" "v0")
3053 (match_operand:V64DI 2 "register_operand" "v0")
3054 (match_operand:SI 3 "const_int_operand" "n")]
3055 REDUC_2REG_UNSPEC))]
3056 ""
3057 "#"
3058 "reload_completed"
3059 [(set (match_dup 4)
3060 (unspec:V64SI
3061 [(match_dup 6) (match_dup 8) (match_dup 3)] REDUC_2REG_UNSPEC))
3062 (set (match_dup 5)
3063 (unspec:V64SI
3064 [(match_dup 7) (match_dup 9) (match_dup 3)] REDUC_2REG_UNSPEC))]
3065 {
3066 operands[4] = gcn_operand_part (V64DImode, operands[0], 0);
3067 operands[5] = gcn_operand_part (V64DImode, operands[0], 1);
3068 operands[6] = gcn_operand_part (V64DImode, operands[1], 0);
3069 operands[7] = gcn_operand_part (V64DImode, operands[1], 1);
3070 operands[8] = gcn_operand_part (V64DImode, operands[2], 0);
3071 operands[9] = gcn_operand_part (V64DImode, operands[2], 1);
3072 }
3073 [(set_attr "type" "vmult")
3074 (set_attr "length" "16")])
3075
3076 ; Special cases for addition.
3077
3078 (define_insn "*plus_carry_dpp_shr_v64si"
3079 [(set (match_operand:V64SI 0 "register_operand" "=v")
3080 (unspec:V64SI
3081 [(match_operand:V64SI 1 "register_operand" "v")
3082 (match_operand:V64SI 2 "register_operand" "v")
3083 (match_operand:SI 3 "const_int_operand" "n")]
3084 UNSPEC_PLUS_CARRY_DPP_SHR))
3085 (clobber (reg:DI VCC_REG))]
3086 ""
3087 {
3088 const char *insn = TARGET_GCN3 ? "v_add%u0" : "v_add_co%u0";
3089 return gcn_expand_dpp_shr_insn (V64SImode, insn,
3090 UNSPEC_PLUS_CARRY_DPP_SHR,
3091 INTVAL (operands[3]));
3092 }
3093 [(set_attr "type" "vop_dpp")
3094 (set_attr "length" "8")])
3095
3096 (define_insn "*plus_carry_in_dpp_shr_v64si"
3097 [(set (match_operand:V64SI 0 "register_operand" "=v")
3098 (unspec:V64SI
3099 [(match_operand:V64SI 1 "register_operand" "v")
3100 (match_operand:V64SI 2 "register_operand" "v")
3101 (match_operand:SI 3 "const_int_operand" "n")
3102 (match_operand:DI 4 "register_operand" "cV")]
3103 UNSPEC_PLUS_CARRY_IN_DPP_SHR))
3104 (clobber (reg:DI VCC_REG))]
3105 ""
3106 {
3107 const char *insn = TARGET_GCN3 ? "v_addc%u0" : "v_addc_co%u0";
3108 return gcn_expand_dpp_shr_insn (V64SImode, insn,
3109 UNSPEC_PLUS_CARRY_IN_DPP_SHR,
3110 INTVAL (operands[3]));
3111 }
3112 [(set_attr "type" "vop_dpp")
3113 (set_attr "length" "8")])
3114
3115 (define_insn_and_split "*plus_carry_dpp_shr_v64di"
3116 [(set (match_operand:V64DI 0 "register_operand" "=&v")
3117 (unspec:V64DI
3118 [(match_operand:V64DI 1 "register_operand" "v0")
3119 (match_operand:V64DI 2 "register_operand" "v0")
3120 (match_operand:SI 3 "const_int_operand" "n")]
3121 UNSPEC_PLUS_CARRY_DPP_SHR))
3122 (clobber (reg:DI VCC_REG))]
3123 ""
3124 "#"
3125 "reload_completed"
3126 [(parallel [(set (match_dup 4)
3127 (unspec:V64SI
3128 [(match_dup 6) (match_dup 8) (match_dup 3)]
3129 UNSPEC_PLUS_CARRY_DPP_SHR))
3130 (clobber (reg:DI VCC_REG))])
3131 (parallel [(set (match_dup 5)
3132 (unspec:V64SI
3133 [(match_dup 7) (match_dup 9) (match_dup 3) (reg:DI VCC_REG)]
3134 UNSPEC_PLUS_CARRY_IN_DPP_SHR))
3135 (clobber (reg:DI VCC_REG))])]
3136 {
3137 operands[4] = gcn_operand_part (V64DImode, operands[0], 0);
3138 operands[5] = gcn_operand_part (V64DImode, operands[0], 1);
3139 operands[6] = gcn_operand_part (V64DImode, operands[1], 0);
3140 operands[7] = gcn_operand_part (V64DImode, operands[1], 1);
3141 operands[8] = gcn_operand_part (V64DImode, operands[2], 0);
3142 operands[9] = gcn_operand_part (V64DImode, operands[2], 1);
3143 }
3144 [(set_attr "type" "vmult")
3145 (set_attr "length" "16")])
3146
3147 ; Instructions to move a scalar value from lane 63 of a vector register.
3148 (define_insn "mov_from_lane63_<mode>"
3149 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg,v")
3150 (unspec:<SCALAR_MODE>
3151 [(match_operand:VEC_ALL1REG_MODE 1 "register_operand" "v,v")]
3152 UNSPEC_MOV_FROM_LANE63))]
3153 ""
3154 "@
3155 v_readlane_b32\t%0, %1, 63
3156 v_mov_b32\t%0, %1 wave_ror:1"
3157 [(set_attr "type" "vop3a,vop_dpp")
3158 (set_attr "exec" "none,*")
3159 (set_attr "length" "8")])
3160
3161 (define_insn "mov_from_lane63_v64di"
3162 [(set (match_operand:DI 0 "register_operand" "=Sg,v")
3163 (unspec:DI
3164 [(match_operand:V64DI 1 "register_operand" "v,v")]
3165 UNSPEC_MOV_FROM_LANE63))]
3166 ""
3167 "@
3168 v_readlane_b32\t%L0, %L1, 63\;v_readlane_b32\t%H0, %H1, 63
3169 * if (REGNO (operands[0]) <= REGNO (operands[1])) \
3170 return \"v_mov_b32\t%L0, %L1 wave_ror:1\;\" \
3171 \"v_mov_b32\t%H0, %H1 wave_ror:1\"; \
3172 else \
3173 return \"v_mov_b32\t%H0, %H1 wave_ror:1\;\" \
3174 \"v_mov_b32\t%L0, %L1 wave_ror:1\";"
3175 [(set_attr "type" "vop3a,vop_dpp")
3176 (set_attr "exec" "none,*")
3177 (set_attr "length" "8")])
3178
3179 ;; }}}
3180 ;; {{{ Miscellaneous
3181
3182 (define_expand "vec_seriesv64si"
3183 [(match_operand:V64SI 0 "register_operand")
3184 (match_operand:SI 1 "gcn_alu_operand")
3185 (match_operand:SI 2 "gcn_alu_operand")]
3186 ""
3187 {
3188 rtx tmp = gen_reg_rtx (V64SImode);
3189 rtx v1 = gen_rtx_REG (V64SImode, VGPR_REGNO (1));
3190
3191 emit_insn (gen_mulv64si3_dup (tmp, v1, operands[2]));
3192 emit_insn (gen_addv64si3_dup (operands[0], tmp, operands[1]));
3193 DONE;
3194 })
3195
3196 (define_expand "vec_seriesv64di"
3197 [(match_operand:V64DI 0 "register_operand")
3198 (match_operand:DI 1 "gcn_alu_operand")
3199 (match_operand:DI 2 "gcn_alu_operand")]
3200 ""
3201 {
3202 rtx tmp = gen_reg_rtx (V64DImode);
3203 rtx v1 = gen_rtx_REG (V64SImode, VGPR_REGNO (1));
3204
3205 emit_insn (gen_mulv64di3_zext_dup2 (tmp, v1, operands[2]));
3206 emit_insn (gen_addv64di3_dup (operands[0], tmp, operands[1]));
3207 DONE;
3208 })
3209
3210 ;; }}}