]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/gcn/gcn-valu.md
Remove amdgcn expcnt waits.
[thirdparty/gcc.git] / gcc / config / gcn / gcn-valu.md
1 ;; Copyright (C) 2016-2019 Free Software Foundation, Inc.
2
3 ;; This file is free software; you can redistribute it and/or modify it under
4 ;; the terms of the GNU General Public License as published by the Free
5 ;; Software Foundation; either version 3 of the License, or (at your option)
6 ;; any later version.
7
8 ;; This file is distributed in the hope that it will be useful, but WITHOUT
9 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 ;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 ;; for more details.
12
13 ;; You should have received a copy of the GNU General Public License
14 ;; along with GCC; see the file COPYING3. If not see
15 ;; <http://www.gnu.org/licenses/>.
16
17 ;; {{{ Vector iterators
18
19 ; Vector modes for one vector register
20 (define_mode_iterator VEC_1REG_MODE
21 [V64QI V64HI V64SI V64HF V64SF])
22 (define_mode_iterator VEC_1REG_ALT
23 [V64QI V64HI V64SI V64HF V64SF])
24
25 (define_mode_iterator VEC_1REG_INT_MODE
26 [V64QI V64HI V64SI])
27 (define_mode_iterator VEC_1REG_INT_ALT
28 [V64QI V64HI V64SI])
29
30 ; Vector modes for two vector registers
31 (define_mode_iterator VEC_2REG_MODE
32 [V64DI V64DF])
33
34 ; All of above
35 (define_mode_iterator VEC_REG_MODE
36 [V64QI V64HI V64SI V64HF V64SF ; Single reg
37 V64DI V64DF]) ; Double reg
38
39 (define_mode_attr scalar_mode
40 [(V64QI "qi") (V64HI "hi") (V64SI "si")
41 (V64HF "hf") (V64SF "sf") (V64DI "di") (V64DF "df")])
42
43 (define_mode_attr SCALAR_MODE
44 [(V64QI "QI") (V64HI "HI") (V64SI "SI")
45 (V64HF "HF") (V64SF "SF") (V64DI "DI") (V64DF "DF")])
46
47 ;; }}}
48 ;; {{{ Substitutions
49
50 (define_subst_attr "exec" "vec_merge"
51 "" "_exec")
52 (define_subst_attr "exec_clobber" "vec_merge_with_clobber"
53 "" "_exec")
54 (define_subst_attr "exec_vcc" "vec_merge_with_vcc"
55 "" "_exec")
56 (define_subst_attr "exec_scatter" "scatter_store"
57 "" "_exec")
58
59 (define_subst "vec_merge"
60 [(set (match_operand:VEC_REG_MODE 0)
61 (match_operand:VEC_REG_MODE 1))]
62 ""
63 [(set (match_dup 0)
64 (vec_merge:VEC_REG_MODE
65 (match_dup 1)
66 (match_operand:VEC_REG_MODE 3 "gcn_register_or_unspec_operand" "U0")
67 (match_operand:DI 4 "gcn_exec_reg_operand" "e")))])
68
69 (define_subst "vec_merge_with_clobber"
70 [(set (match_operand:VEC_REG_MODE 0)
71 (match_operand:VEC_REG_MODE 1))
72 (clobber (match_operand 2))]
73 ""
74 [(set (match_dup 0)
75 (vec_merge:VEC_REG_MODE
76 (match_dup 1)
77 (match_operand:VEC_REG_MODE 3 "gcn_register_or_unspec_operand" "U0")
78 (match_operand:DI 4 "gcn_exec_reg_operand" "e")))
79 (clobber (match_dup 2))])
80
81 (define_subst "vec_merge_with_vcc"
82 [(set (match_operand:VEC_REG_MODE 0)
83 (match_operand:VEC_REG_MODE 1))
84 (set (match_operand:DI 2)
85 (match_operand:DI 3))]
86 ""
87 [(parallel
88 [(set (match_dup 0)
89 (vec_merge:VEC_REG_MODE
90 (match_dup 1)
91 (match_operand:VEC_REG_MODE 4
92 "gcn_register_or_unspec_operand" "U0")
93 (match_operand:DI 5 "gcn_exec_reg_operand" "e")))
94 (set (match_dup 2)
95 (and:DI (match_dup 3)
96 (reg:DI EXEC_REG)))])])
97
98 (define_subst "scatter_store"
99 [(set (mem:BLK (scratch))
100 (unspec:BLK
101 [(match_operand 0)
102 (match_operand 1)
103 (match_operand 2)
104 (match_operand 3)]
105 UNSPEC_SCATTER))]
106 ""
107 [(set (mem:BLK (scratch))
108 (unspec:BLK
109 [(match_dup 0)
110 (match_dup 1)
111 (match_dup 2)
112 (match_dup 3)
113 (match_operand:DI 4 "gcn_exec_reg_operand" "e")]
114 UNSPEC_SCATTER))])
115
116 ;; }}}
117 ;; {{{ Vector moves
118
119 ; This is the entry point for all vector register moves. Memory accesses can
120 ; come this way also, but will more usually use the reload_in/out,
121 ; gather/scatter, maskload/store, etc.
122
123 (define_expand "mov<mode>"
124 [(set (match_operand:VEC_REG_MODE 0 "nonimmediate_operand")
125 (match_operand:VEC_REG_MODE 1 "general_operand"))]
126 ""
127 {
128 if (MEM_P (operands[0]) && !lra_in_progress && !reload_completed)
129 {
130 operands[1] = force_reg (<MODE>mode, operands[1]);
131 rtx scratch = gen_rtx_SCRATCH (V64DImode);
132 rtx a = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
133 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
134 rtx expr = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
135 operands[0],
136 scratch);
137 emit_insn (gen_scatter<mode>_expr (expr, operands[1], a, v));
138 DONE;
139 }
140 else if (MEM_P (operands[1]) && !lra_in_progress && !reload_completed)
141 {
142 rtx scratch = gen_rtx_SCRATCH (V64DImode);
143 rtx a = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
144 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
145 rtx expr = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
146 operands[1],
147 scratch);
148 emit_insn (gen_gather<mode>_expr (operands[0], expr, a, v));
149 DONE;
150 }
151 else if ((MEM_P (operands[0]) || MEM_P (operands[1])))
152 {
153 gcc_assert (!reload_completed);
154 rtx scratch = gen_reg_rtx (V64DImode);
155 emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], scratch));
156 DONE;
157 }
158 })
159
160 ; A pseudo instruction that helps LRA use the "U0" constraint.
161
162 (define_insn "mov<mode>_unspec"
163 [(set (match_operand:VEC_REG_MODE 0 "nonimmediate_operand" "=v")
164 (match_operand:VEC_REG_MODE 1 "gcn_unspec_operand" " U"))]
165 ""
166 ""
167 [(set_attr "type" "unknown")
168 (set_attr "length" "0")])
169
170 (define_insn "*mov<mode>"
171 [(set (match_operand:VEC_1REG_MODE 0 "nonimmediate_operand" "=v,v")
172 (match_operand:VEC_1REG_MODE 1 "general_operand" "vA,B"))]
173 ""
174 "v_mov_b32\t%0, %1"
175 [(set_attr "type" "vop1,vop1")
176 (set_attr "length" "4,8")])
177
178 (define_insn "mov<mode>_exec"
179 [(set (match_operand:VEC_1REG_MODE 0 "nonimmediate_operand"
180 "=v, v, v, v, v, m")
181 (vec_merge:VEC_1REG_MODE
182 (match_operand:VEC_1REG_MODE 1 "general_operand"
183 "vA, B, v,vA, m, v")
184 (match_operand:VEC_1REG_MODE 3 "gcn_alu_or_unspec_operand"
185 "U0,U0,vA,vA,U0,U0")
186 (match_operand:DI 2 "register_operand" " e, e,cV,Sv, e, e")))
187 (clobber (match_scratch:V64DI 4 "=X, X, X, X,&v,&v"))]
188 "!MEM_P (operands[0]) || REG_P (operands[1])"
189 "@
190 v_mov_b32\t%0, %1
191 v_mov_b32\t%0, %1
192 v_cndmask_b32\t%0, %3, %1, vcc
193 v_cndmask_b32\t%0, %3, %1, %2
194 #
195 #"
196 [(set_attr "type" "vop1,vop1,vop2,vop3a,*,*")
197 (set_attr "length" "4,8,4,8,16,16")])
198
199 ; This variant does not accept an unspec, but does permit MEM
200 ; read/modify/write which is necessary for maskstore.
201
202 ;(define_insn "*mov<mode>_exec_match"
203 ; [(set (match_operand:VEC_1REG_MODE 0 "nonimmediate_operand" "=v,v, v, m")
204 ; (vec_merge:VEC_1REG_MODE
205 ; (match_operand:VEC_1REG_MODE 1 "general_operand" "vA,B, m, v")
206 ; (match_dup 0)
207 ; (match_operand:DI 2 "gcn_exec_reg_operand" " e,e, e, e")))
208 ; (clobber (match_scratch:V64DI 3 "=X,X,&v,&v"))]
209 ; "!MEM_P (operands[0]) || REG_P (operands[1])"
210 ; "@
211 ; v_mov_b32\t%0, %1
212 ; v_mov_b32\t%0, %1
213 ; #
214 ; #"
215 ; [(set_attr "type" "vop1,vop1,*,*")
216 ; (set_attr "length" "4,8,16,16")])
217
218 (define_insn "*mov<mode>"
219 [(set (match_operand:VEC_2REG_MODE 0 "nonimmediate_operand" "=v")
220 (match_operand:VEC_2REG_MODE 1 "general_operand" "vDB"))]
221 ""
222 {
223 if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1]))
224 return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1";
225 else
226 return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1";
227 }
228 [(set_attr "type" "vmult")
229 (set_attr "length" "16")])
230
231 (define_insn "mov<mode>_exec"
232 [(set (match_operand:VEC_2REG_MODE 0 "nonimmediate_operand"
233 "= v, v, v, v, m")
234 (vec_merge:VEC_2REG_MODE
235 (match_operand:VEC_2REG_MODE 1 "general_operand"
236 "vDB, v0, v0, m, v")
237 (match_operand:VEC_2REG_MODE 3 "gcn_alu_or_unspec_operand"
238 " U0,vDA0,vDA0,U0,U0")
239 (match_operand:DI 2 "register_operand" " e, cV, Sv, e, e")))
240 (clobber (match_scratch:V64DI 4 "= X, X, X,&v,&v"))]
241 "!MEM_P (operands[0]) || REG_P (operands[1])"
242 {
243 if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1]))
244 switch (which_alternative)
245 {
246 case 0:
247 return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1";
248 case 1:
249 return "v_cndmask_b32\t%L0, %L3, %L1, vcc\;"
250 "v_cndmask_b32\t%H0, %H3, %H1, vcc";
251 case 2:
252 return "v_cndmask_b32\t%L0, %L3, %L1, %2\;"
253 "v_cndmask_b32\t%H0, %H3, %H1, %2";
254 }
255 else
256 switch (which_alternative)
257 {
258 case 0:
259 return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1";
260 case 1:
261 return "v_cndmask_b32\t%H0, %H3, %H1, vcc\;"
262 "v_cndmask_b32\t%L0, %L3, %L1, vcc";
263 case 2:
264 return "v_cndmask_b32\t%H0, %H3, %H1, %2\;"
265 "v_cndmask_b32\t%L0, %L3, %L1, %2";
266 }
267
268 return "#";
269 }
270 [(set_attr "type" "vmult,vmult,vmult,*,*")
271 (set_attr "length" "16,16,16,16,16")])
272
273 ; This variant does not accept an unspec, but does permit MEM
274 ; read/modify/write which is necessary for maskstore.
275
276 ;(define_insn "*mov<mode>_exec_match"
277 ; [(set (match_operand:VEC_2REG_MODE 0 "nonimmediate_operand" "=v, v, m")
278 ; (vec_merge:VEC_2REG_MODE
279 ; (match_operand:VEC_2REG_MODE 1 "general_operand" "vDB, m, v")
280 ; (match_dup 0)
281 ; (match_operand:DI 2 "gcn_exec_reg_operand" " e, e, e")))
282 ; (clobber (match_scratch:V64DI 3 "=X,&v,&v"))]
283 ; "!MEM_P (operands[0]) || REG_P (operands[1])"
284 ; "@
285 ; * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \
286 ; return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \
287 ; else \
288 ; return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\";
289 ; #
290 ; #"
291 ; [(set_attr "type" "vmult,*,*")
292 ; (set_attr "length" "16,16,16")])
293
294 ; A SGPR-base load looks like:
295 ; <load> v, Sv
296 ;
297 ; There's no hardware instruction that corresponds to this, but vector base
298 ; addresses are placed in an SGPR because it is easier to add to a vector.
299 ; We also have a temporary vT, and the vector v1 holding numbered lanes.
300 ;
301 ; Rewrite as:
302 ; vT = v1 << log2(element-size)
303 ; vT += Sv
304 ; flat_load v, vT
305
306 (define_insn "mov<mode>_sgprbase"
307 [(set (match_operand:VEC_1REG_MODE 0 "nonimmediate_operand" "= v, v, v, m")
308 (unspec:VEC_1REG_MODE
309 [(match_operand:VEC_1REG_MODE 1 "general_operand" " vA,vB, m, v")]
310 UNSPEC_SGPRBASE))
311 (clobber (match_operand:V64DI 2 "register_operand" "=&v,&v,&v,&v"))]
312 "lra_in_progress || reload_completed"
313 "@
314 v_mov_b32\t%0, %1
315 v_mov_b32\t%0, %1
316 #
317 #"
318 [(set_attr "type" "vop1,vop1,*,*")
319 (set_attr "length" "4,8,12,12")])
320
321 (define_insn "mov<mode>_sgprbase"
322 [(set (match_operand:VEC_2REG_MODE 0 "nonimmediate_operand" "= v, v, m")
323 (unspec:VEC_2REG_MODE
324 [(match_operand:VEC_2REG_MODE 1 "general_operand" "vDB, m, v")]
325 UNSPEC_SGPRBASE))
326 (clobber (match_operand:V64DI 2 "register_operand" "=&v,&v,&v"))]
327 "lra_in_progress || reload_completed"
328 "@
329 * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \
330 return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \
331 else \
332 return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\";
333 #
334 #"
335 [(set_attr "type" "vmult,*,*")
336 (set_attr "length" "8,12,12")])
337
338 ; reload_in was once a standard name, but here it's only referenced by
339 ; gcn_secondary_reload. It allows a reload with a scratch register.
340
341 (define_expand "reload_in<mode>"
342 [(set (match_operand:VEC_REG_MODE 0 "register_operand" "= v")
343 (match_operand:VEC_REG_MODE 1 "memory_operand" " m"))
344 (clobber (match_operand:V64DI 2 "register_operand" "=&v"))]
345 ""
346 {
347 emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], operands[2]));
348 DONE;
349 })
350
351 ; reload_out is similar to reload_in, above.
352
353 (define_expand "reload_out<mode>"
354 [(set (match_operand:VEC_REG_MODE 0 "memory_operand" "= m")
355 (match_operand:VEC_REG_MODE 1 "register_operand" " v"))
356 (clobber (match_operand:V64DI 2 "register_operand" "=&v"))]
357 ""
358 {
359 emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], operands[2]));
360 DONE;
361 })
362
363 ; Expand scalar addresses into gather/scatter patterns
364
365 (define_split
366 [(set (match_operand:VEC_REG_MODE 0 "memory_operand")
367 (unspec:VEC_REG_MODE
368 [(match_operand:VEC_REG_MODE 1 "general_operand")]
369 UNSPEC_SGPRBASE))
370 (clobber (match_scratch:V64DI 2))]
371 ""
372 [(set (mem:BLK (scratch))
373 (unspec:BLK [(match_dup 5) (match_dup 1) (match_dup 6) (match_dup 7)]
374 UNSPEC_SCATTER))]
375 {
376 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
377 operands[0],
378 operands[2]);
379 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
380 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
381 })
382
383 (define_split
384 [(set (match_operand:VEC_REG_MODE 0 "memory_operand")
385 (vec_merge:VEC_REG_MODE
386 (match_operand:VEC_REG_MODE 1 "general_operand")
387 (match_operand:VEC_REG_MODE 2 "")
388 (match_operand:DI 3 "gcn_exec_reg_operand")))
389 (clobber (match_scratch:V64DI 4))]
390 ""
391 [(set (mem:BLK (scratch))
392 (unspec:BLK [(match_dup 5) (match_dup 1)
393 (match_dup 6) (match_dup 7) (match_dup 3)]
394 UNSPEC_SCATTER))]
395 {
396 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode,
397 operands[3],
398 operands[0],
399 operands[4]);
400 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
401 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
402 })
403
404 (define_split
405 [(set (match_operand:VEC_REG_MODE 0 "nonimmediate_operand")
406 (unspec:VEC_REG_MODE
407 [(match_operand:VEC_REG_MODE 1 "memory_operand")]
408 UNSPEC_SGPRBASE))
409 (clobber (match_scratch:V64DI 2))]
410 ""
411 [(set (match_dup 0)
412 (unspec:VEC_REG_MODE [(match_dup 5) (match_dup 6) (match_dup 7)
413 (mem:BLK (scratch))]
414 UNSPEC_GATHER))]
415 {
416 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
417 operands[1],
418 operands[2]);
419 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
420 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
421 })
422
423 (define_split
424 [(set (match_operand:VEC_REG_MODE 0 "nonimmediate_operand")
425 (vec_merge:VEC_REG_MODE
426 (match_operand:VEC_REG_MODE 1 "memory_operand")
427 (match_operand:VEC_REG_MODE 2 "")
428 (match_operand:DI 3 "gcn_exec_reg_operand")))
429 (clobber (match_scratch:V64DI 4))]
430 ""
431 [(set (match_dup 0)
432 (vec_merge:VEC_REG_MODE
433 (unspec:VEC_REG_MODE [(match_dup 5) (match_dup 6) (match_dup 7)
434 (mem:BLK (scratch))]
435 UNSPEC_GATHER)
436 (match_dup 2)
437 (match_dup 3)))]
438 {
439 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode,
440 operands[3],
441 operands[1],
442 operands[4]);
443 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
444 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
445 })
446
447 ; TODO: Add zero/sign extending variants.
448
449 ;; }}}
450 ;; {{{ Lane moves
451
452 ; v_writelane and v_readlane work regardless of exec flags.
453 ; We allow source to be scratch.
454 ;
455 ; FIXME these should take A immediates
456
457 (define_insn "*vec_set<mode>"
458 [(set (match_operand:VEC_1REG_MODE 0 "register_operand" "= v")
459 (vec_merge:VEC_1REG_MODE
460 (vec_duplicate:VEC_1REG_MODE
461 (match_operand:<SCALAR_MODE> 1 "register_operand" " Sv"))
462 (match_operand:VEC_1REG_MODE 3 "gcn_register_or_unspec_operand"
463 " U0")
464 (ashift (const_int 1)
465 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
466 ""
467 "v_writelane_b32 %0, %1, %2"
468 [(set_attr "type" "vop3a")
469 (set_attr "length" "8")
470 (set_attr "exec" "none")
471 (set_attr "laneselect" "yes")])
472
473 ; FIXME: 64bit operations really should be splitters, but I am not sure how
474 ; to represent vertical subregs.
475 (define_insn "*vec_set<mode>"
476 [(set (match_operand:VEC_2REG_MODE 0 "register_operand" "= v")
477 (vec_merge:VEC_2REG_MODE
478 (vec_duplicate:VEC_2REG_MODE
479 (match_operand:<SCALAR_MODE> 1 "register_operand" " Sv"))
480 (match_operand:VEC_2REG_MODE 3 "gcn_register_or_unspec_operand"
481 " U0")
482 (ashift (const_int 1)
483 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
484 ""
485 "v_writelane_b32 %L0, %L1, %2\;v_writelane_b32 %H0, %H1, %2"
486 [(set_attr "type" "vmult")
487 (set_attr "length" "16")
488 (set_attr "exec" "none")
489 (set_attr "laneselect" "yes")])
490
491 (define_expand "vec_set<mode>"
492 [(set (match_operand:VEC_REG_MODE 0 "register_operand")
493 (vec_merge:VEC_REG_MODE
494 (vec_duplicate:VEC_REG_MODE
495 (match_operand:<SCALAR_MODE> 1 "register_operand"))
496 (match_dup 0)
497 (ashift (const_int 1) (match_operand:SI 2 "gcn_alu_operand"))))]
498 "")
499
500 (define_insn "*vec_set<mode>_1"
501 [(set (match_operand:VEC_1REG_MODE 0 "register_operand" "=v")
502 (vec_merge:VEC_1REG_MODE
503 (vec_duplicate:VEC_1REG_MODE
504 (match_operand:<SCALAR_MODE> 1 "register_operand" "Sv"))
505 (match_operand:VEC_1REG_MODE 3 "gcn_register_or_unspec_operand"
506 "U0")
507 (match_operand:SI 2 "const_int_operand" " i")))]
508 "((unsigned) exact_log2 (INTVAL (operands[2])) < 64)"
509 {
510 operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));
511 return "v_writelane_b32 %0, %1, %2";
512 }
513 [(set_attr "type" "vop3a")
514 (set_attr "length" "8")
515 (set_attr "exec" "none")
516 (set_attr "laneselect" "yes")])
517
518 (define_insn "*vec_set<mode>_1"
519 [(set (match_operand:VEC_2REG_MODE 0 "register_operand" "=v")
520 (vec_merge:VEC_2REG_MODE
521 (vec_duplicate:VEC_2REG_MODE
522 (match_operand:<SCALAR_MODE> 1 "register_operand" "Sv"))
523 (match_operand:VEC_2REG_MODE 3 "gcn_register_or_unspec_operand"
524 "U0")
525 (match_operand:SI 2 "const_int_operand" " i")))]
526 "((unsigned) exact_log2 (INTVAL (operands[2])) < 64)"
527 {
528 operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));
529 return "v_writelane_b32 %L0, %L1, %2\;v_writelane_b32 %H0, %H1, %2";
530 }
531 [(set_attr "type" "vmult")
532 (set_attr "length" "16")
533 (set_attr "exec" "none")
534 (set_attr "laneselect" "yes")])
535
536 (define_insn "vec_duplicate<mode><exec>"
537 [(set (match_operand:VEC_1REG_MODE 0 "register_operand" "=v")
538 (vec_duplicate:VEC_1REG_MODE
539 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand" "SvB")))]
540 ""
541 "v_mov_b32\t%0, %1"
542 [(set_attr "type" "vop3a")
543 (set_attr "length" "8")])
544
545 (define_insn "vec_duplicate<mode><exec>"
546 [(set (match_operand:VEC_2REG_MODE 0 "register_operand" "= v")
547 (vec_duplicate:VEC_2REG_MODE
548 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand" "SvDB")))]
549 ""
550 "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1"
551 [(set_attr "type" "vop3a")
552 (set_attr "length" "16")])
553
554 (define_insn "vec_extract<mode><scalar_mode>"
555 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg")
556 (vec_select:<SCALAR_MODE>
557 (match_operand:VEC_1REG_MODE 1 "register_operand" " v")
558 (parallel [(match_operand:SI 2 "gcn_alu_operand" "SvB")])))]
559 ""
560 "v_readlane_b32 %0, %1, %2"
561 [(set_attr "type" "vop3a")
562 (set_attr "length" "8")
563 (set_attr "exec" "none")
564 (set_attr "laneselect" "yes")])
565
566 (define_insn "vec_extract<mode><scalar_mode>"
567 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg")
568 (vec_select:<SCALAR_MODE>
569 (match_operand:VEC_2REG_MODE 1 "register_operand" " v")
570 (parallel [(match_operand:SI 2 "gcn_alu_operand" "SvB")])))]
571 ""
572 "v_readlane_b32 %L0, %L1, %2\;v_readlane_b32 %H0, %H1, %2"
573 [(set_attr "type" "vmult")
574 (set_attr "length" "16")
575 (set_attr "exec" "none")
576 (set_attr "laneselect" "yes")])
577
578 (define_expand "vec_init<mode><scalar_mode>"
579 [(match_operand:VEC_REG_MODE 0 "register_operand")
580 (match_operand 1)]
581 ""
582 {
583 gcn_expand_vector_init (operands[0], operands[1]);
584 DONE;
585 })
586
587 ;; }}}
588 ;; {{{ Scatter / Gather
589
590 ;; GCN does not have an instruction for loading a vector from contiguous
591 ;; memory so *all* loads and stores are eventually converted to scatter
592 ;; or gather.
593 ;;
594 ;; GCC does not permit MEM to hold vectors of addresses, so we must use an
595 ;; unspec. The unspec formats are as follows:
596 ;;
597 ;; (unspec:V64??
598 ;; [(<address expression>)
599 ;; (<addr_space_t>)
600 ;; (<use_glc>)
601 ;; (mem:BLK (scratch))]
602 ;; UNSPEC_GATHER)
603 ;;
604 ;; (unspec:BLK
605 ;; [(<address expression>)
606 ;; (<source register>)
607 ;; (<addr_space_t>)
608 ;; (<use_glc>)
609 ;; (<exec>)]
610 ;; UNSPEC_SCATTER)
611 ;;
612 ;; - Loads are expected to be wrapped in a vec_merge, so do not need <exec>.
613 ;; - The mem:BLK does not contain any real information, but indicates that an
614 ;; unknown memory read is taking place. Stores are expected to use a similar
615 ;; mem:BLK outside the unspec.
616 ;; - The address space and glc (volatile) fields are there to replace the
617 ;; fields normally found in a MEM.
618 ;; - Multiple forms of address expression are supported, below.
619
620 (define_expand "gather_load<mode>"
621 [(match_operand:VEC_REG_MODE 0 "register_operand")
622 (match_operand:DI 1 "register_operand")
623 (match_operand 2 "register_operand")
624 (match_operand 3 "immediate_operand")
625 (match_operand:SI 4 "gcn_alu_operand")]
626 ""
627 {
628 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1],
629 operands[2], operands[4],
630 INTVAL (operands[3]), NULL);
631
632 if (GET_MODE (addr) == V64DImode)
633 emit_insn (gen_gather<mode>_insn_1offset (operands[0], addr, const0_rtx,
634 const0_rtx, const0_rtx));
635 else
636 emit_insn (gen_gather<mode>_insn_2offsets (operands[0], operands[1],
637 addr, const0_rtx, const0_rtx,
638 const0_rtx));
639 DONE;
640 })
641
642 (define_expand "gather<mode>_exec"
643 [(match_operand:VEC_REG_MODE 0 "register_operand")
644 (match_operand:DI 1 "register_operand")
645 (match_operand:V64SI 2 "register_operand")
646 (match_operand 3 "immediate_operand")
647 (match_operand:SI 4 "gcn_alu_operand")
648 (match_operand:DI 5 "gcn_exec_reg_operand")]
649 ""
650 {
651 rtx undefmode = gcn_gen_undef (<MODE>mode);
652
653 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1],
654 operands[2], operands[4],
655 INTVAL (operands[3]), operands[5]);
656
657 if (GET_MODE (addr) == V64DImode)
658 emit_insn (gen_gather<mode>_insn_1offset_exec (operands[0], addr,
659 const0_rtx, const0_rtx,
660 const0_rtx, undefmode,
661 operands[5]));
662 else
663 emit_insn (gen_gather<mode>_insn_2offsets_exec (operands[0], operands[1],
664 addr, const0_rtx,
665 const0_rtx, const0_rtx,
666 undefmode, operands[5]));
667 DONE;
668 })
669
670 ; Allow any address expression
671 (define_expand "gather<mode>_expr<exec>"
672 [(set (match_operand:VEC_REG_MODE 0 "register_operand")
673 (unspec:VEC_REG_MODE
674 [(match_operand 1 "")
675 (match_operand 2 "immediate_operand")
676 (match_operand 3 "immediate_operand")
677 (mem:BLK (scratch))]
678 UNSPEC_GATHER))]
679 ""
680 {})
681
682 (define_insn "gather<mode>_insn_1offset<exec>"
683 [(set (match_operand:VEC_REG_MODE 0 "register_operand" "=v")
684 (unspec:VEC_REG_MODE
685 [(plus:V64DI (match_operand:V64DI 1 "register_operand" " v")
686 (vec_duplicate:V64DI
687 (match_operand 2 "immediate_operand" " n")))
688 (match_operand 3 "immediate_operand" " n")
689 (match_operand 4 "immediate_operand" " n")
690 (mem:BLK (scratch))]
691 UNSPEC_GATHER))]
692 "(AS_FLAT_P (INTVAL (operands[3]))
693 && ((TARGET_GCN3 && INTVAL(operands[2]) == 0)
694 || ((unsigned HOST_WIDE_INT)INTVAL(operands[2]) < 0x1000)))
695 || (AS_GLOBAL_P (INTVAL (operands[3]))
696 && (((unsigned HOST_WIDE_INT)INTVAL(operands[2]) + 0x1000) < 0x2000))"
697 {
698 addr_space_t as = INTVAL (operands[3]);
699 const char *glc = INTVAL (operands[4]) ? " glc" : "";
700
701 static char buf[200];
702 if (AS_FLAT_P (as))
703 {
704 if (TARGET_GCN5_PLUS)
705 sprintf (buf, "flat_load%%s0\t%%0, %%1 offset:%%2%s\;s_waitcnt\t0",
706 glc);
707 else
708 sprintf (buf, "flat_load%%s0\t%%0, %%1%s\;s_waitcnt\t0", glc);
709 }
710 else if (AS_GLOBAL_P (as))
711 sprintf (buf, "global_load%%s0\t%%0, %%1, off offset:%%2%s\;"
712 "s_waitcnt\tvmcnt(0)", glc);
713 else
714 gcc_unreachable ();
715
716 return buf;
717 }
718 [(set_attr "type" "flat")
719 (set_attr "length" "12")])
720
721 (define_insn "gather<mode>_insn_1offset_ds<exec>"
722 [(set (match_operand:VEC_REG_MODE 0 "register_operand" "=v")
723 (unspec:VEC_REG_MODE
724 [(plus:V64SI (match_operand:V64SI 1 "register_operand" " v")
725 (vec_duplicate:V64SI
726 (match_operand 2 "immediate_operand" " n")))
727 (match_operand 3 "immediate_operand" " n")
728 (match_operand 4 "immediate_operand" " n")
729 (mem:BLK (scratch))]
730 UNSPEC_GATHER))]
731 "(AS_ANY_DS_P (INTVAL (operands[3]))
732 && ((unsigned HOST_WIDE_INT)INTVAL(operands[2]) < 0x10000))"
733 {
734 addr_space_t as = INTVAL (operands[3]);
735 static char buf[200];
736 sprintf (buf, "ds_read%%b0\t%%0, %%1 offset:%%2%s\;s_waitcnt\tlgkmcnt(0)",
737 (AS_GDS_P (as) ? " gds" : ""));
738 return buf;
739 }
740 [(set_attr "type" "ds")
741 (set_attr "length" "12")])
742
743 (define_insn "gather<mode>_insn_2offsets<exec>"
744 [(set (match_operand:VEC_REG_MODE 0 "register_operand" "=v")
745 (unspec:VEC_REG_MODE
746 [(plus:V64DI
747 (plus:V64DI
748 (vec_duplicate:V64DI
749 (match_operand:DI 1 "register_operand" "Sv"))
750 (sign_extend:V64DI
751 (match_operand:V64SI 2 "register_operand" " v")))
752 (vec_duplicate:V64DI (match_operand 3 "immediate_operand" " n")))
753 (match_operand 4 "immediate_operand" " n")
754 (match_operand 5 "immediate_operand" " n")
755 (mem:BLK (scratch))]
756 UNSPEC_GATHER))]
757 "(AS_GLOBAL_P (INTVAL (operands[4]))
758 && (((unsigned HOST_WIDE_INT)INTVAL(operands[3]) + 0x1000) < 0x2000))"
759 {
760 addr_space_t as = INTVAL (operands[4]);
761 const char *glc = INTVAL (operands[5]) ? " glc" : "";
762
763 static char buf[200];
764 if (AS_GLOBAL_P (as))
765 {
766 /* Work around assembler bug in which a 64-bit register is expected,
767 but a 32-bit value would be correct. */
768 int reg = REGNO (operands[2]) - FIRST_VGPR_REG;
769 sprintf (buf, "global_load%%s0\t%%0, v[%d:%d], %%1 offset:%%3%s\;"
770 "s_waitcnt\tvmcnt(0)", reg, reg + 1, glc);
771 }
772 else
773 gcc_unreachable ();
774
775 return buf;
776 }
777 [(set_attr "type" "flat")
778 (set_attr "length" "12")])
779
780 (define_expand "scatter_store<mode>"
781 [(match_operand:DI 0 "register_operand")
782 (match_operand 1 "register_operand")
783 (match_operand 2 "immediate_operand")
784 (match_operand:SI 3 "gcn_alu_operand")
785 (match_operand:VEC_REG_MODE 4 "register_operand")]
786 ""
787 {
788 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0],
789 operands[1], operands[3],
790 INTVAL (operands[2]), NULL);
791
792 if (GET_MODE (addr) == V64DImode)
793 emit_insn (gen_scatter<mode>_insn_1offset (addr, const0_rtx, operands[4],
794 const0_rtx, const0_rtx));
795 else
796 emit_insn (gen_scatter<mode>_insn_2offsets (operands[0], addr,
797 const0_rtx, operands[4],
798 const0_rtx, const0_rtx));
799 DONE;
800 })
801
802 (define_expand "scatter<mode>_exec"
803 [(match_operand:DI 0 "register_operand")
804 (match_operand 1 "register_operand")
805 (match_operand 2 "immediate_operand")
806 (match_operand:SI 3 "gcn_alu_operand")
807 (match_operand:VEC_REG_MODE 4 "register_operand")
808 (match_operand:DI 5 "gcn_exec_reg_operand")]
809 ""
810 {
811 operands[5] = force_reg (DImode, operands[5]);
812
813 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0],
814 operands[1], operands[3],
815 INTVAL (operands[2]), operands[5]);
816
817 if (GET_MODE (addr) == V64DImode)
818 emit_insn (gen_scatter<mode>_insn_1offset_exec (addr, const0_rtx,
819 operands[4], const0_rtx,
820 const0_rtx,
821 operands[5]));
822 else
823 emit_insn (gen_scatter<mode>_insn_2offsets_exec (operands[0], addr,
824 const0_rtx, operands[4],
825 const0_rtx, const0_rtx,
826 operands[5]));
827 DONE;
828 })
829
830 ; Allow any address expression
831 (define_expand "scatter<mode>_expr<exec_scatter>"
832 [(set (mem:BLK (scratch))
833 (unspec:BLK
834 [(match_operand:V64DI 0 "")
835 (match_operand:VEC_REG_MODE 1 "register_operand")
836 (match_operand 2 "immediate_operand")
837 (match_operand 3 "immediate_operand")]
838 UNSPEC_SCATTER))]
839 ""
840 {})
841
842 (define_insn "scatter<mode>_insn_1offset<exec_scatter>"
843 [(set (mem:BLK (scratch))
844 (unspec:BLK
845 [(plus:V64DI (match_operand:V64DI 0 "register_operand" "v")
846 (vec_duplicate:V64DI
847 (match_operand 1 "immediate_operand" "n")))
848 (match_operand:VEC_REG_MODE 2 "register_operand" "v")
849 (match_operand 3 "immediate_operand" "n")
850 (match_operand 4 "immediate_operand" "n")]
851 UNSPEC_SCATTER))]
852 "(AS_FLAT_P (INTVAL (operands[3]))
853 && (INTVAL(operands[1]) == 0
854 || (TARGET_GCN5_PLUS
855 && (unsigned HOST_WIDE_INT)INTVAL(operands[1]) < 0x1000)))
856 || (AS_GLOBAL_P (INTVAL (operands[3]))
857 && (((unsigned HOST_WIDE_INT)INTVAL(operands[1]) + 0x1000) < 0x2000))"
858 {
859 addr_space_t as = INTVAL (operands[3]);
860 const char *glc = INTVAL (operands[4]) ? " glc" : "";
861
862 static char buf[200];
863 if (AS_FLAT_P (as))
864 {
865 if (TARGET_GCN5_PLUS)
866 sprintf (buf, "flat_store%%s2\t%%0, %%2 offset:%%1%s", glc);
867 else
868 sprintf (buf, "flat_store%%s2\t%%0, %%2%s", glc);
869 }
870 else if (AS_GLOBAL_P (as))
871 sprintf (buf, "global_store%%s2\t%%0, %%2, off offset:%%1%s", glc);
872 else
873 gcc_unreachable ();
874
875 return buf;
876 }
877 [(set_attr "type" "flat")
878 (set_attr "length" "12")])
879
880 (define_insn "scatter<mode>_insn_1offset_ds<exec_scatter>"
881 [(set (mem:BLK (scratch))
882 (unspec:BLK
883 [(plus:V64SI (match_operand:V64SI 0 "register_operand" "v")
884 (vec_duplicate:V64SI
885 (match_operand 1 "immediate_operand" "n")))
886 (match_operand:VEC_REG_MODE 2 "register_operand" "v")
887 (match_operand 3 "immediate_operand" "n")
888 (match_operand 4 "immediate_operand" "n")]
889 UNSPEC_SCATTER))]
890 "(AS_ANY_DS_P (INTVAL (operands[3]))
891 && ((unsigned HOST_WIDE_INT)INTVAL(operands[1]) < 0x10000))"
892 {
893 addr_space_t as = INTVAL (operands[3]);
894 static char buf[200];
895 sprintf (buf, "ds_write%%b2\t%%0, %%2 offset:%%1%s",
896 (AS_GDS_P (as) ? " gds" : ""));
897 return buf;
898 }
899 [(set_attr "type" "ds")
900 (set_attr "length" "12")])
901
902 (define_insn "scatter<mode>_insn_2offsets<exec_scatter>"
903 [(set (mem:BLK (scratch))
904 (unspec:BLK
905 [(plus:V64DI
906 (plus:V64DI
907 (vec_duplicate:V64DI
908 (match_operand:DI 0 "register_operand" "Sv"))
909 (sign_extend:V64DI
910 (match_operand:V64SI 1 "register_operand" " v")))
911 (vec_duplicate:V64DI (match_operand 2 "immediate_operand"
912 " n")))
913 (match_operand:VEC_REG_MODE 3 "register_operand" " v")
914 (match_operand 4 "immediate_operand" " n")
915 (match_operand 5 "immediate_operand" " n")]
916 UNSPEC_SCATTER))]
917 "(AS_GLOBAL_P (INTVAL (operands[4]))
918 && (((unsigned HOST_WIDE_INT)INTVAL(operands[2]) + 0x1000) < 0x2000))"
919 {
920 addr_space_t as = INTVAL (operands[4]);
921 const char *glc = INTVAL (operands[5]) ? " glc" : "";
922
923 static char buf[200];
924 if (AS_GLOBAL_P (as))
925 {
926 /* Work around assembler bug in which a 64-bit register is expected,
927 but a 32-bit value would be correct. */
928 int reg = REGNO (operands[1]) - FIRST_VGPR_REG;
929 sprintf (buf, "global_store%%s3\tv[%d:%d], %%3, %%0 offset:%%2%s",
930 reg, reg + 1, glc);
931 }
932 else
933 gcc_unreachable ();
934
935 return buf;
936 }
937 [(set_attr "type" "flat")
938 (set_attr "length" "12")])
939
940 ;; }}}
941 ;; {{{ Permutations
942
943 (define_insn "ds_bpermute<mode>"
944 [(set (match_operand:VEC_1REG_MODE 0 "register_operand" "=v")
945 (unspec:VEC_1REG_MODE
946 [(match_operand:VEC_1REG_MODE 2 "register_operand" " v")
947 (match_operand:V64SI 1 "register_operand" " v")
948 (match_operand:DI 3 "gcn_exec_reg_operand" " e")]
949 UNSPEC_BPERMUTE))]
950 ""
951 "ds_bpermute_b32\t%0, %1, %2\;s_waitcnt\tlgkmcnt(0)"
952 [(set_attr "type" "vop2")
953 (set_attr "length" "12")])
954
955 (define_insn_and_split "ds_bpermute<mode>"
956 [(set (match_operand:VEC_2REG_MODE 0 "register_operand" "=&v")
957 (unspec:VEC_2REG_MODE
958 [(match_operand:VEC_2REG_MODE 2 "register_operand" " v0")
959 (match_operand:V64SI 1 "register_operand" " v")
960 (match_operand:DI 3 "gcn_exec_reg_operand" " e")]
961 UNSPEC_BPERMUTE))]
962 ""
963 "#"
964 "reload_completed"
965 [(set (match_dup 4) (unspec:V64SI [(match_dup 6) (match_dup 1) (match_dup 3)]
966 UNSPEC_BPERMUTE))
967 (set (match_dup 5) (unspec:V64SI [(match_dup 7) (match_dup 1) (match_dup 3)]
968 UNSPEC_BPERMUTE))]
969 {
970 operands[4] = gcn_operand_part (<MODE>mode, operands[0], 0);
971 operands[5] = gcn_operand_part (<MODE>mode, operands[0], 1);
972 operands[6] = gcn_operand_part (<MODE>mode, operands[2], 0);
973 operands[7] = gcn_operand_part (<MODE>mode, operands[2], 1);
974 }
975 [(set_attr "type" "vmult")
976 (set_attr "length" "24")])
977
978 ;; }}}
979 ;; {{{ ALU special case: add/sub
980
981 (define_insn "addv64si3<exec_clobber>"
982 [(set (match_operand:V64SI 0 "register_operand" "= v")
983 (plus:V64SI
984 (match_operand:V64SI 1 "register_operand" "% v")
985 (match_operand:V64SI 2 "gcn_alu_operand" "vSvB")))
986 (clobber (reg:DI VCC_REG))]
987 ""
988 "v_add%^_u32\t%0, vcc, %2, %1"
989 [(set_attr "type" "vop2")
990 (set_attr "length" "8")])
991
992 (define_insn "addv64si3_dup<exec_clobber>"
993 [(set (match_operand:V64SI 0 "register_operand" "= v")
994 (plus:V64SI
995 (vec_duplicate:V64SI
996 (match_operand:SI 2 "gcn_alu_operand" "SvB"))
997 (match_operand:V64SI 1 "register_operand" " v")))
998 (clobber (reg:DI VCC_REG))]
999 ""
1000 "v_add%^_u32\t%0, vcc, %2, %1"
1001 [(set_attr "type" "vop2")
1002 (set_attr "length" "8")])
1003
1004 (define_insn "addv64si3_vcc<exec_vcc>"
1005 [(set (match_operand:V64SI 0 "register_operand" "= v, v")
1006 (plus:V64SI
1007 (match_operand:V64SI 1 "register_operand" "% v, v")
1008 (match_operand:V64SI 2 "gcn_alu_operand" "vSvB,vSvB")))
1009 (set (match_operand:DI 3 "register_operand" "= cV, Sg")
1010 (ltu:DI (plus:V64SI (match_dup 1) (match_dup 2))
1011 (match_dup 1)))]
1012 ""
1013 "v_add%^_u32\t%0, %3, %2, %1"
1014 [(set_attr "type" "vop2,vop3b")
1015 (set_attr "length" "8")])
1016
1017 ; This pattern only changes the VCC bits when the corresponding lane is
1018 ; enabled, so the set must be described as an ior.
1019
1020 (define_insn "addv64si3_vcc_dup<exec_vcc>"
1021 [(set (match_operand:V64SI 0 "register_operand" "= v, v")
1022 (plus:V64SI
1023 (vec_duplicate:V64SI
1024 (match_operand:SI 1 "gcn_alu_operand" "SvB,SvB"))
1025 (match_operand:V64SI 2 "register_operand" " v, v")))
1026 (set (match_operand:DI 3 "register_operand" "=cV, Sg")
1027 (ltu:DI (plus:V64SI (vec_duplicate:V64SI (match_dup 2))
1028 (match_dup 1))
1029 (vec_duplicate:V64SI (match_dup 2))))]
1030 ""
1031 "v_add%^_u32\t%0, %3, %2, %1"
1032 [(set_attr "type" "vop2,vop3b")
1033 (set_attr "length" "8,8")])
1034
1035 ; This pattern does not accept SGPR because VCC read already counts as an
1036 ; SGPR use and number of SGPR operands is limited to 1.
1037
1038 (define_insn "addcv64si3<exec_vcc>"
1039 [(set (match_operand:V64SI 0 "register_operand" "=v,v")
1040 (plus:V64SI
1041 (plus:V64SI
1042 (vec_merge:V64SI
1043 (vec_duplicate:V64SI (const_int 1))
1044 (vec_duplicate:V64SI (const_int 0))
1045 (match_operand:DI 3 "register_operand" " cV,Sv"))
1046 (match_operand:V64SI 1 "gcn_alu_operand" "%vA,vA"))
1047 (match_operand:V64SI 2 "gcn_alu_operand" " vB,vB")))
1048 (set (match_operand:DI 4 "register_operand" "=cV,Sg")
1049 (ior:DI (ltu:DI (plus:V64SI
1050 (plus:V64SI
1051 (vec_merge:V64SI
1052 (vec_duplicate:V64SI (const_int 1))
1053 (vec_duplicate:V64SI (const_int 0))
1054 (match_dup 3))
1055 (match_dup 1))
1056 (match_dup 2))
1057 (match_dup 2))
1058 (ltu:DI (plus:V64SI
1059 (vec_merge:V64SI
1060 (vec_duplicate:V64SI (const_int 1))
1061 (vec_duplicate:V64SI (const_int 0))
1062 (match_dup 3))
1063 (match_dup 1))
1064 (match_dup 1))))]
1065 ""
1066 "v_addc%^_u32\t%0, %4, %1, %2, %3"
1067 [(set_attr "type" "vop2,vop3b")
1068 (set_attr "length" "4,8")])
1069
1070 (define_insn "addcv64si3_dup<exec_vcc>"
1071 [(set (match_operand:V64SI 0 "register_operand" "=v,v")
1072 (plus:V64SI
1073 (plus:V64SI
1074 (vec_merge:V64SI
1075 (vec_duplicate:V64SI (const_int 1))
1076 (vec_duplicate:V64SI (const_int 0))
1077 (match_operand:DI 3 "register_operand" " cV, Sv"))
1078 (match_operand:V64SI 1 "gcn_alu_operand" "%vA, vA"))
1079 (vec_duplicate:V64SI
1080 (match_operand:SI 2 "gcn_alu_operand" "SvB,SvB"))))
1081 (set (match_operand:DI 4 "register_operand" "=cV, Sg")
1082 (ior:DI (ltu:DI (plus:V64SI (plus:V64SI
1083 (vec_merge:V64SI
1084 (vec_duplicate:V64SI (const_int 1))
1085 (vec_duplicate:V64SI (const_int 0))
1086 (match_dup 3))
1087 (match_dup 1))
1088 (vec_duplicate:V64SI
1089 (match_dup 2)))
1090 (vec_duplicate:V64SI
1091 (match_dup 2)))
1092 (ltu:DI (plus:V64SI (vec_merge:V64SI
1093 (vec_duplicate:V64SI (const_int 1))
1094 (vec_duplicate:V64SI (const_int 0))
1095 (match_dup 3))
1096 (match_dup 1))
1097 (match_dup 1))))]
1098 ""
1099 "v_addc%^_u32\t%0, %4, %1, %2, %3"
1100 [(set_attr "type" "vop2,vop3b")
1101 (set_attr "length" "4,8")])
1102
1103 (define_insn "subv64si3<exec_clobber>"
1104 [(set (match_operand:V64SI 0 "register_operand" "= v, v")
1105 (minus:V64SI
1106 (match_operand:V64SI 1 "gcn_alu_operand" "vSvB, v")
1107 (match_operand:V64SI 2 "gcn_alu_operand" " v,vSvB")))
1108 (clobber (reg:DI VCC_REG))]
1109 ""
1110 "@
1111 v_sub%^_u32\t%0, vcc, %1, %2
1112 v_subrev%^_u32\t%0, vcc, %2, %1"
1113 [(set_attr "type" "vop2")
1114 (set_attr "length" "8,8")])
1115
1116 (define_insn "subv64si3_vcc<exec_vcc>"
1117 [(set (match_operand:V64SI 0 "register_operand" "= v, v, v, v")
1118 (minus:V64SI
1119 (match_operand:V64SI 1 "gcn_alu_operand" "vSvB,vSvB, v, v")
1120 (match_operand:V64SI 2 "gcn_alu_operand" " v, v,vSvB,vSvB")))
1121 (set (match_operand:DI 3 "register_operand" "= cV, Sg, cV, Sg")
1122 (gtu:DI (minus:V64SI (match_dup 1) (match_dup 2))
1123 (match_dup 1)))]
1124 ""
1125 "@
1126 v_sub%^_u32\t%0, %3, %1, %2
1127 v_sub%^_u32\t%0, %3, %1, %2
1128 v_subrev%^_u32\t%0, %3, %2, %1
1129 v_subrev%^_u32\t%0, %3, %2, %1"
1130 [(set_attr "type" "vop2,vop3b,vop2,vop3b")
1131 (set_attr "length" "8")])
1132
1133 ; This pattern does not accept SGPR because VCC read already counts
1134 ; as a SGPR use and number of SGPR operands is limited to 1.
1135
1136 (define_insn "subcv64si3<exec_vcc>"
1137 [(set (match_operand:V64SI 0 "register_operand" "= v, v, v, v")
1138 (minus:V64SI
1139 (minus:V64SI
1140 (vec_merge:V64SI
1141 (vec_duplicate:V64SI (const_int 1))
1142 (vec_duplicate:V64SI (const_int 0))
1143 (match_operand:DI 3 "gcn_alu_operand" " cV,Sv,cV,Sv"))
1144 (match_operand:V64SI 1 "gcn_alu_operand" " vA,vA,vB,vB"))
1145 (match_operand:V64SI 2 "gcn_alu_operand" " vB,vB,vA,vA")))
1146 (set (match_operand:DI 4 "register_operand" "=cV,Sg,cV,Sg")
1147 (ior:DI (gtu:DI (minus:V64SI (minus:V64SI
1148 (vec_merge:V64SI
1149 (vec_duplicate:V64SI (const_int 1))
1150 (vec_duplicate:V64SI (const_int 0))
1151 (match_dup 3))
1152 (match_dup 1))
1153 (match_dup 2))
1154 (match_dup 2))
1155 (ltu:DI (minus:V64SI (vec_merge:V64SI
1156 (vec_duplicate:V64SI (const_int 1))
1157 (vec_duplicate:V64SI (const_int 0))
1158 (match_dup 3))
1159 (match_dup 1))
1160 (match_dup 1))))]
1161 ""
1162 "@
1163 v_subb%^_u32\t%0, %4, %1, %2, %3
1164 v_subb%^_u32\t%0, %4, %1, %2, %3
1165 v_subbrev%^_u32\t%0, %4, %2, %1, %3
1166 v_subbrev%^_u32\t%0, %4, %2, %1, %3"
1167 [(set_attr "type" "vop2,vop3b,vop2,vop3b")
1168 (set_attr "length" "8")])
1169
1170 (define_insn_and_split "addv64di3"
1171 [(set (match_operand:V64DI 0 "register_operand" "= &v")
1172 (plus:V64DI
1173 (match_operand:V64DI 1 "register_operand" "% v0")
1174 (match_operand:V64DI 2 "gcn_alu_operand" "vSvB0")))
1175 (clobber (reg:DI VCC_REG))]
1176 ""
1177 "#"
1178 "gcn_can_split_p (V64DImode, operands[0])
1179 && gcn_can_split_p (V64DImode, operands[1])
1180 && gcn_can_split_p (V64DImode, operands[2])"
1181 [(const_int 0)]
1182 {
1183 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1184 emit_insn (gen_addv64si3_vcc
1185 (gcn_operand_part (V64DImode, operands[0], 0),
1186 gcn_operand_part (V64DImode, operands[1], 0),
1187 gcn_operand_part (V64DImode, operands[2], 0),
1188 vcc));
1189 emit_insn (gen_addcv64si3
1190 (gcn_operand_part (V64DImode, operands[0], 1),
1191 gcn_operand_part (V64DImode, operands[1], 1),
1192 gcn_operand_part (V64DImode, operands[2], 1),
1193 vcc, vcc));
1194 DONE;
1195 }
1196 [(set_attr "type" "vmult")
1197 (set_attr "length" "8")])
1198
1199 (define_insn_and_split "addv64di3_exec"
1200 [(set (match_operand:V64DI 0 "register_operand" "= &v")
1201 (vec_merge:V64DI
1202 (plus:V64DI
1203 (match_operand:V64DI 1 "register_operand" "% v0")
1204 (match_operand:V64DI 2 "gcn_alu_operand" "vSvB0"))
1205 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
1206 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1207 (clobber (reg:DI VCC_REG))]
1208 ""
1209 "#"
1210 "gcn_can_split_p (V64DImode, operands[0])
1211 && gcn_can_split_p (V64DImode, operands[1])
1212 && gcn_can_split_p (V64DImode, operands[2])
1213 && gcn_can_split_p (V64DImode, operands[4])"
1214 [(const_int 0)]
1215 {
1216 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1217 emit_insn (gen_addv64si3_vcc_exec
1218 (gcn_operand_part (V64DImode, operands[0], 0),
1219 gcn_operand_part (V64DImode, operands[1], 0),
1220 gcn_operand_part (V64DImode, operands[2], 0),
1221 vcc,
1222 gcn_operand_part (V64DImode, operands[3], 0),
1223 operands[4]));
1224 emit_insn (gen_addcv64si3_exec
1225 (gcn_operand_part (V64DImode, operands[0], 1),
1226 gcn_operand_part (V64DImode, operands[1], 1),
1227 gcn_operand_part (V64DImode, operands[2], 1),
1228 vcc, vcc,
1229 gcn_operand_part (V64DImode, operands[3], 1),
1230 operands[4]));
1231 DONE;
1232 }
1233 [(set_attr "type" "vmult")
1234 (set_attr "length" "8")])
1235
1236 (define_insn_and_split "subv64di3"
1237 [(set (match_operand:V64DI 0 "register_operand" "= &v, &v")
1238 (minus:V64DI
1239 (match_operand:V64DI 1 "gcn_alu_operand" "vSvB0, v0")
1240 (match_operand:V64DI 2 "gcn_alu_operand" " v0,vSvB0")))
1241 (clobber (reg:DI VCC_REG))]
1242 ""
1243 "#"
1244 "gcn_can_split_p (V64DImode, operands[0])
1245 && gcn_can_split_p (V64DImode, operands[1])
1246 && gcn_can_split_p (V64DImode, operands[2])"
1247 [(const_int 0)]
1248 {
1249 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1250 emit_insn (gen_subv64si3_vcc
1251 (gcn_operand_part (V64DImode, operands[0], 0),
1252 gcn_operand_part (V64DImode, operands[1], 0),
1253 gcn_operand_part (V64DImode, operands[2], 0),
1254 vcc));
1255 emit_insn (gen_subcv64si3
1256 (gcn_operand_part (V64DImode, operands[0], 1),
1257 gcn_operand_part (V64DImode, operands[1], 1),
1258 gcn_operand_part (V64DImode, operands[2], 1),
1259 vcc, vcc));
1260 DONE;
1261 }
1262 [(set_attr "type" "vmult")
1263 (set_attr "length" "8,8")])
1264
1265 (define_insn_and_split "subv64di3_exec"
1266 [(set (match_operand:V64DI 0 "register_operand" "= &v, &v")
1267 (vec_merge:V64DI
1268 (minus:V64DI
1269 (match_operand:V64DI 1 "gcn_alu_operand" "vSvB0, v0")
1270 (match_operand:V64DI 2 "gcn_alu_operand" " v0,vSvB0"))
1271 (match_operand:V64DI 3 "gcn_register_or_unspec_operand"
1272 " U0, U0")
1273 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))
1274 (clobber (reg:DI VCC_REG))]
1275 "register_operand (operands[1], VOIDmode)
1276 || register_operand (operands[2], VOIDmode)"
1277 "#"
1278 "gcn_can_split_p (V64DImode, operands[0])
1279 && gcn_can_split_p (V64DImode, operands[1])
1280 && gcn_can_split_p (V64DImode, operands[2])
1281 && gcn_can_split_p (V64DImode, operands[3])"
1282 [(const_int 0)]
1283 {
1284 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1285 emit_insn (gen_subv64si3_vcc_exec
1286 (gcn_operand_part (V64DImode, operands[0], 0),
1287 gcn_operand_part (V64DImode, operands[1], 0),
1288 gcn_operand_part (V64DImode, operands[2], 0),
1289 vcc,
1290 gcn_operand_part (V64DImode, operands[3], 0),
1291 operands[4]));
1292 emit_insn (gen_subcv64si3_exec
1293 (gcn_operand_part (V64DImode, operands[0], 1),
1294 gcn_operand_part (V64DImode, operands[1], 1),
1295 gcn_operand_part (V64DImode, operands[2], 1),
1296 vcc, vcc,
1297 gcn_operand_part (V64DImode, operands[3], 1),
1298 operands[4]));
1299 DONE;
1300 }
1301 [(set_attr "type" "vmult")
1302 (set_attr "length" "8,8")])
1303
1304 (define_insn_and_split "addv64di3_dup"
1305 [(set (match_operand:V64DI 0 "register_operand" "= &v")
1306 (plus:V64DI
1307 (match_operand:V64DI 1 "register_operand" " v0")
1308 (vec_duplicate:V64DI
1309 (match_operand:DI 2 "gcn_alu_operand" "SvDB"))))
1310 (clobber (reg:DI VCC_REG))]
1311 ""
1312 "#"
1313 "gcn_can_split_p (V64DImode, operands[0])
1314 && gcn_can_split_p (V64DImode, operands[1])
1315 && gcn_can_split_p (V64DImode, operands[2])"
1316 [(const_int 0)]
1317 {
1318 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1319 emit_insn (gen_addv64si3_vcc_dup
1320 (gcn_operand_part (V64DImode, operands[0], 0),
1321 gcn_operand_part (DImode, operands[2], 0),
1322 gcn_operand_part (V64DImode, operands[1], 0),
1323 vcc));
1324 emit_insn (gen_addcv64si3_dup
1325 (gcn_operand_part (V64DImode, operands[0], 1),
1326 gcn_operand_part (V64DImode, operands[1], 1),
1327 gcn_operand_part (DImode, operands[2], 1),
1328 vcc, vcc));
1329 DONE;
1330 }
1331 [(set_attr "type" "vmult")
1332 (set_attr "length" "8")])
1333
1334 (define_insn_and_split "addv64di3_dup_exec"
1335 [(set (match_operand:V64DI 0 "register_operand" "= &v")
1336 (vec_merge:V64DI
1337 (plus:V64DI
1338 (match_operand:V64DI 1 "register_operand" " v0")
1339 (vec_duplicate:V64DI
1340 (match_operand:DI 2 "gcn_alu_operand" "SvDB")))
1341 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
1342 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1343 (clobber (reg:DI VCC_REG))]
1344 ""
1345 "#"
1346 "gcn_can_split_p (V64DImode, operands[0])
1347 && gcn_can_split_p (V64DImode, operands[1])
1348 && gcn_can_split_p (V64DImode, operands[2])
1349 && gcn_can_split_p (V64DImode, operands[3])"
1350 [(const_int 0)]
1351 {
1352 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1353 emit_insn (gen_addv64si3_vcc_dup_exec
1354 (gcn_operand_part (V64DImode, operands[0], 0),
1355 gcn_operand_part (DImode, operands[2], 0),
1356 gcn_operand_part (V64DImode, operands[1], 0),
1357 vcc,
1358 gcn_operand_part (V64DImode, operands[3], 0),
1359 operands[4]));
1360 emit_insn (gen_addcv64si3_dup_exec
1361 (gcn_operand_part (V64DImode, operands[0], 1),
1362 gcn_operand_part (V64DImode, operands[1], 1),
1363 gcn_operand_part (DImode, operands[2], 1),
1364 vcc, vcc,
1365 gcn_operand_part (V64DImode, operands[3], 1),
1366 operands[4]));
1367 DONE;
1368 }
1369 [(set_attr "type" "vmult")
1370 (set_attr "length" "8")])
1371
1372 (define_insn_and_split "addv64di3_zext"
1373 [(set (match_operand:V64DI 0 "register_operand" "=&v,&v")
1374 (plus:V64DI
1375 (zero_extend:V64DI
1376 (match_operand:V64SI 1 "gcn_alu_operand" "0vA,0vB"))
1377 (match_operand:V64DI 2 "gcn_alu_operand" "0vB,0vA")))
1378 (clobber (reg:DI VCC_REG))]
1379 ""
1380 "#"
1381 "gcn_can_split_p (V64DImode, operands[0])
1382 && gcn_can_split_p (V64DImode, operands[2])"
1383 [(const_int 0)]
1384 {
1385 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1386 emit_insn (gen_addv64si3_vcc
1387 (gcn_operand_part (V64DImode, operands[0], 0),
1388 operands[1],
1389 gcn_operand_part (V64DImode, operands[2], 0),
1390 vcc));
1391 emit_insn (gen_addcv64si3
1392 (gcn_operand_part (V64DImode, operands[0], 1),
1393 gcn_operand_part (V64DImode, operands[2], 1),
1394 const0_rtx, vcc, vcc));
1395 DONE;
1396 }
1397 [(set_attr "type" "vmult")
1398 (set_attr "length" "8,8")])
1399
1400 (define_insn_and_split "addv64di3_zext_exec"
1401 [(set (match_operand:V64DI 0 "register_operand" "=&v,&v")
1402 (vec_merge:V64DI
1403 (plus:V64DI
1404 (zero_extend:V64DI
1405 (match_operand:V64SI 1 "gcn_alu_operand" "0vA,0vB"))
1406 (match_operand:V64DI 2 "gcn_alu_operand" "0vB,0vA"))
1407 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0, U0")
1408 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))
1409 (clobber (reg:DI VCC_REG))]
1410 ""
1411 "#"
1412 "gcn_can_split_p (V64DImode, operands[0])
1413 && gcn_can_split_p (V64DImode, operands[2])
1414 && gcn_can_split_p (V64DImode, operands[3])"
1415 [(const_int 0)]
1416 {
1417 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1418 emit_insn (gen_addv64si3_vcc_exec
1419 (gcn_operand_part (V64DImode, operands[0], 0),
1420 operands[1],
1421 gcn_operand_part (V64DImode, operands[2], 0),
1422 vcc,
1423 gcn_operand_part (V64DImode, operands[3], 0),
1424 operands[4]));
1425 emit_insn (gen_addcv64si3_exec
1426 (gcn_operand_part (V64DImode, operands[0], 1),
1427 gcn_operand_part (V64DImode, operands[2], 1),
1428 const0_rtx, vcc, vcc,
1429 gcn_operand_part (V64DImode, operands[3], 1),
1430 operands[4]));
1431 DONE;
1432 }
1433 [(set_attr "type" "vmult")
1434 (set_attr "length" "8,8")])
1435
1436 (define_insn_and_split "addv64di3_zext_dup"
1437 [(set (match_operand:V64DI 0 "register_operand" "=&v")
1438 (plus:V64DI
1439 (zero_extend:V64DI
1440 (vec_duplicate:V64SI
1441 (match_operand:SI 1 "gcn_alu_operand" "BSv")))
1442 (match_operand:V64DI 2 "gcn_alu_operand" "vA0")))
1443 (clobber (reg:DI VCC_REG))]
1444 ""
1445 "#"
1446 "gcn_can_split_p (V64DImode, operands[0])
1447 && gcn_can_split_p (V64DImode, operands[2])"
1448 [(const_int 0)]
1449 {
1450 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1451 emit_insn (gen_addv64si3_vcc_dup
1452 (gcn_operand_part (V64DImode, operands[0], 0),
1453 gcn_operand_part (DImode, operands[1], 0),
1454 gcn_operand_part (V64DImode, operands[2], 0),
1455 vcc));
1456 emit_insn (gen_addcv64si3
1457 (gcn_operand_part (V64DImode, operands[0], 1),
1458 gcn_operand_part (V64DImode, operands[2], 1),
1459 const0_rtx, vcc, vcc));
1460 DONE;
1461 }
1462 [(set_attr "type" "vmult")
1463 (set_attr "length" "8")])
1464
1465 (define_insn_and_split "addv64di3_zext_dup_exec"
1466 [(set (match_operand:V64DI 0 "register_operand" "=&v")
1467 (vec_merge:V64DI
1468 (plus:V64DI
1469 (zero_extend:V64DI
1470 (vec_duplicate:V64SI
1471 (match_operand:SI 1 "gcn_alu_operand" "BSv")))
1472 (match_operand:V64DI 2 "gcn_alu_operand" "vA0"))
1473 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
1474 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1475 (clobber (reg:DI VCC_REG))]
1476 ""
1477 "#"
1478 "gcn_can_split_p (V64DImode, operands[0])
1479 && gcn_can_split_p (V64DImode, operands[2])
1480 && gcn_can_split_p (V64DImode, operands[3])"
1481 [(const_int 0)]
1482 {
1483 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1484 emit_insn (gen_addv64si3_vcc_dup_exec
1485 (gcn_operand_part (V64DImode, operands[0], 0),
1486 gcn_operand_part (DImode, operands[1], 0),
1487 gcn_operand_part (V64DImode, operands[2], 0),
1488 vcc,
1489 gcn_operand_part (V64DImode, operands[3], 0),
1490 operands[4]));
1491 emit_insn (gen_addcv64si3_exec
1492 (gcn_operand_part (V64DImode, operands[0], 1),
1493 gcn_operand_part (V64DImode, operands[2], 1),
1494 const0_rtx, vcc, vcc,
1495 gcn_operand_part (V64DImode, operands[3], 1),
1496 operands[4]));
1497 DONE;
1498 }
1499 [(set_attr "type" "vmult")
1500 (set_attr "length" "8")])
1501
1502 (define_insn_and_split "addv64di3_zext_dup2"
1503 [(set (match_operand:V64DI 0 "register_operand" "= v")
1504 (plus:V64DI
1505 (zero_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand" " vA"))
1506 (vec_duplicate:V64DI (match_operand:DI 2 "gcn_alu_operand" "BSv"))))
1507 (clobber (reg:DI VCC_REG))]
1508 ""
1509 "#"
1510 "gcn_can_split_p (V64DImode, operands[0])"
1511 [(const_int 0)]
1512 {
1513 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1514 emit_insn (gen_addv64si3_vcc_dup
1515 (gcn_operand_part (V64DImode, operands[0], 0),
1516 gcn_operand_part (DImode, operands[2], 0),
1517 operands[1],
1518 vcc));
1519 rtx dsthi = gcn_operand_part (V64DImode, operands[0], 1);
1520 emit_insn (gen_vec_duplicatev64si
1521 (dsthi, gcn_operand_part (DImode, operands[2], 1)));
1522 emit_insn (gen_addcv64si3 (dsthi, dsthi, const0_rtx, vcc, vcc));
1523 DONE;
1524 }
1525 [(set_attr "type" "vmult")
1526 (set_attr "length" "8")])
1527
1528 (define_insn_and_split "addv64di3_zext_dup2_exec"
1529 [(set (match_operand:V64DI 0 "register_operand" "= v")
1530 (vec_merge:V64DI
1531 (plus:V64DI
1532 (zero_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand"
1533 " vA"))
1534 (vec_duplicate:V64DI (match_operand:DI 2 "gcn_alu_operand" "BSv")))
1535 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
1536 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1537 (clobber (reg:DI VCC_REG))]
1538 ""
1539 "#"
1540 "gcn_can_split_p (V64DImode, operands[0])
1541 && gcn_can_split_p (V64DImode, operands[3])"
1542 [(const_int 0)]
1543 {
1544 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1545 emit_insn (gen_addv64si3_vcc_dup_exec
1546 (gcn_operand_part (V64DImode, operands[0], 0),
1547 gcn_operand_part (DImode, operands[2], 0),
1548 operands[1],
1549 vcc,
1550 gcn_operand_part (V64DImode, operands[3], 0),
1551 operands[4]));
1552 rtx dsthi = gcn_operand_part (V64DImode, operands[0], 1);
1553 emit_insn (gen_vec_duplicatev64si_exec
1554 (dsthi, gcn_operand_part (DImode, operands[2], 1),
1555 gcn_gen_undef (V64SImode), operands[4]));
1556 emit_insn (gen_addcv64si3_exec
1557 (dsthi, dsthi, const0_rtx, vcc, vcc,
1558 gcn_operand_part (V64DImode, operands[3], 1),
1559 operands[4]));
1560 DONE;
1561 }
1562 [(set_attr "type" "vmult")
1563 (set_attr "length" "8")])
1564
1565 (define_insn_and_split "addv64di3_sext_dup2"
1566 [(set (match_operand:V64DI 0 "register_operand" "= v")
1567 (plus:V64DI
1568 (sign_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand" " vA"))
1569 (vec_duplicate:V64DI (match_operand:DI 2 "gcn_alu_operand" "BSv"))))
1570 (clobber (match_scratch:V64SI 3 "=&v"))
1571 (clobber (reg:DI VCC_REG))]
1572 ""
1573 "#"
1574 "gcn_can_split_p (V64DImode, operands[0])"
1575 [(const_int 0)]
1576 {
1577 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1578 emit_insn (gen_ashrv64si3 (operands[3], operands[1], GEN_INT (31)));
1579 emit_insn (gen_addv64si3_vcc_dup
1580 (gcn_operand_part (V64DImode, operands[0], 0),
1581 gcn_operand_part (DImode, operands[2], 0),
1582 operands[1],
1583 vcc));
1584 rtx dsthi = gcn_operand_part (V64DImode, operands[0], 1);
1585 emit_insn (gen_vec_duplicatev64si
1586 (dsthi, gcn_operand_part (DImode, operands[2], 1)));
1587 emit_insn (gen_addcv64si3 (dsthi, dsthi, operands[3], vcc, vcc));
1588 DONE;
1589 }
1590 [(set_attr "type" "vmult")
1591 (set_attr "length" "8")])
1592
1593 (define_insn_and_split "addv64di3_sext_dup2_exec"
1594 [(set (match_operand:V64DI 0 "register_operand" "= v")
1595 (vec_merge:V64DI
1596 (plus:V64DI
1597 (sign_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand"
1598 " vA"))
1599 (vec_duplicate:V64DI (match_operand:DI 2 "gcn_alu_operand" "BSv")))
1600 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
1601 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1602 (clobber (match_scratch:V64SI 5 "=&v"))
1603 (clobber (reg:DI VCC_REG))]
1604 ""
1605 "#"
1606 "gcn_can_split_p (V64DImode, operands[0])
1607 && gcn_can_split_p (V64DImode, operands[3])"
1608 [(const_int 0)]
1609 {
1610 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1611 emit_insn (gen_ashrv64si3_exec (operands[5], operands[1], GEN_INT (31),
1612 gcn_gen_undef (V64SImode), operands[4]));
1613 emit_insn (gen_addv64si3_vcc_dup_exec
1614 (gcn_operand_part (V64DImode, operands[0], 0),
1615 gcn_operand_part (DImode, operands[2], 0),
1616 operands[1],
1617 vcc,
1618 gcn_operand_part (V64DImode, operands[3], 0),
1619 operands[4]));
1620 rtx dsthi = gcn_operand_part (V64DImode, operands[0], 1);
1621 emit_insn (gen_vec_duplicatev64si_exec
1622 (dsthi, gcn_operand_part (DImode, operands[2], 1),
1623 gcn_gen_undef (V64SImode), operands[4]));
1624 emit_insn (gen_addcv64si3_exec
1625 (dsthi, dsthi, operands[5], vcc, vcc,
1626 gcn_operand_part (V64DImode, operands[3], 1),
1627 operands[4]));
1628 DONE;
1629 }
1630 [(set_attr "type" "vmult")
1631 (set_attr "length" "8")])
1632
1633 ;; }}}
1634 ;; {{{ DS memory ALU: add/sub
1635
1636 (define_mode_iterator DS_ARITH_MODE [V64SI V64SF V64DI])
1637 (define_mode_iterator DS_ARITH_SCALAR_MODE [SI SF DI])
1638
1639 ;; FIXME: the vector patterns probably need RD expanded to a vector of
1640 ;; addresses. For now, the only way a vector can get into LDS is
1641 ;; if the user puts it there manually.
1642 ;;
1643 ;; FIXME: the scalar patterns are probably fine in themselves, but need to be
1644 ;; checked to see if anything can ever use them.
1645
1646 (define_insn "add<mode>3_ds<exec>"
1647 [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD")
1648 (plus:DS_ARITH_MODE
1649 (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" "%RD")
1650 (match_operand:DS_ARITH_MODE 2 "register_operand" " v")))]
1651 "rtx_equal_p (operands[0], operands[1])"
1652 "ds_add%u0\t%A0, %2%O0"
1653 [(set_attr "type" "ds")
1654 (set_attr "length" "8")])
1655
1656 (define_insn "add<mode>3_ds_scalar"
1657 [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
1658 (plus:DS_ARITH_SCALAR_MODE
1659 (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand"
1660 "%RD")
1661 (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")))]
1662 "rtx_equal_p (operands[0], operands[1])"
1663 "ds_add%u0\t%A0, %2%O0"
1664 [(set_attr "type" "ds")
1665 (set_attr "length" "8")])
1666
1667 (define_insn "sub<mode>3_ds<exec>"
1668 [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD")
1669 (minus:DS_ARITH_MODE
1670 (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" " RD")
1671 (match_operand:DS_ARITH_MODE 2 "register_operand" " v")))]
1672 "rtx_equal_p (operands[0], operands[1])"
1673 "ds_sub%u0\t%A0, %2%O0"
1674 [(set_attr "type" "ds")
1675 (set_attr "length" "8")])
1676
1677 (define_insn "sub<mode>3_ds_scalar"
1678 [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
1679 (minus:DS_ARITH_SCALAR_MODE
1680 (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand"
1681 " RD")
1682 (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")))]
1683 "rtx_equal_p (operands[0], operands[1])"
1684 "ds_sub%u0\t%A0, %2%O0"
1685 [(set_attr "type" "ds")
1686 (set_attr "length" "8")])
1687
1688 (define_insn "subr<mode>3_ds<exec>"
1689 [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD")
1690 (minus:DS_ARITH_MODE
1691 (match_operand:DS_ARITH_MODE 2 "register_operand" " v")
1692 (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" " RD")))]
1693 "rtx_equal_p (operands[0], operands[1])"
1694 "ds_rsub%u0\t%A0, %2%O0"
1695 [(set_attr "type" "ds")
1696 (set_attr "length" "8")])
1697
1698 (define_insn "subr<mode>3_ds_scalar"
1699 [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
1700 (minus:DS_ARITH_SCALAR_MODE
1701 (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")
1702 (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand"
1703 " RD")))]
1704 "rtx_equal_p (operands[0], operands[1])"
1705 "ds_rsub%u0\t%A0, %2%O0"
1706 [(set_attr "type" "ds")
1707 (set_attr "length" "8")])
1708
1709 ;; }}}
1710 ;; {{{ ALU special case: mult
1711
1712 (define_insn "<su>mulv64si3_highpart<exec>"
1713 [(set (match_operand:V64SI 0 "register_operand" "= v")
1714 (truncate:V64SI
1715 (lshiftrt:V64DI
1716 (mult:V64DI
1717 (any_extend:V64DI
1718 (match_operand:V64SI 1 "gcn_alu_operand" " %v"))
1719 (any_extend:V64DI
1720 (match_operand:V64SI 2 "gcn_alu_operand" "vSvA")))
1721 (const_int 32))))]
1722 ""
1723 "v_mul_hi<sgnsuffix>0\t%0, %2, %1"
1724 [(set_attr "type" "vop3a")
1725 (set_attr "length" "8")])
1726
1727 (define_insn "mulv64si3<exec>"
1728 [(set (match_operand:V64SI 0 "register_operand" "= v")
1729 (mult:V64SI
1730 (match_operand:V64SI 1 "gcn_alu_operand" "%vSvA")
1731 (match_operand:V64SI 2 "gcn_alu_operand" " vSvA")))]
1732 ""
1733 "v_mul_lo_u32\t%0, %1, %2"
1734 [(set_attr "type" "vop3a")
1735 (set_attr "length" "8")])
1736
1737 (define_insn "mulv64si3_dup<exec>"
1738 [(set (match_operand:V64SI 0 "register_operand" "= v")
1739 (mult:V64SI
1740 (match_operand:V64SI 1 "gcn_alu_operand" "%vSvA")
1741 (vec_duplicate:V64SI
1742 (match_operand:SI 2 "gcn_alu_operand" " SvA"))))]
1743 ""
1744 "v_mul_lo_u32\t%0, %1, %2"
1745 [(set_attr "type" "vop3a")
1746 (set_attr "length" "8")])
1747
1748 (define_insn_and_split "mulv64di3"
1749 [(set (match_operand:V64DI 0 "register_operand" "=&v")
1750 (mult:V64DI
1751 (match_operand:V64DI 1 "gcn_alu_operand" "% v")
1752 (match_operand:V64DI 2 "gcn_alu_operand" "vDA")))
1753 (clobber (match_scratch:V64SI 3 "=&v"))]
1754 ""
1755 "#"
1756 "reload_completed"
1757 [(const_int 0)]
1758 {
1759 rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
1760 rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
1761 rtx left_lo = gcn_operand_part (V64DImode, operands[1], 0);
1762 rtx left_hi = gcn_operand_part (V64DImode, operands[1], 1);
1763 rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
1764 rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
1765 rtx tmp = operands[3];
1766
1767 emit_insn (gen_mulv64si3 (out_lo, left_lo, right_lo));
1768 emit_insn (gen_umulv64si3_highpart (out_hi, left_lo, right_lo));
1769 emit_insn (gen_mulv64si3 (tmp, left_hi, right_lo));
1770 emit_insn (gen_addv64si3 (out_hi, out_hi, tmp));
1771 emit_insn (gen_mulv64si3 (tmp, left_lo, right_hi));
1772 emit_insn (gen_addv64si3 (out_hi, out_hi, tmp));
1773 emit_insn (gen_mulv64si3 (tmp, left_hi, right_hi));
1774 emit_insn (gen_addv64si3 (out_hi, out_hi, tmp));
1775 DONE;
1776 })
1777
1778 (define_insn_and_split "mulv64di3_exec"
1779 [(set (match_operand:V64DI 0 "register_operand" "=&v")
1780 (vec_merge:V64DI
1781 (mult:V64DI
1782 (match_operand:V64DI 1 "gcn_alu_operand" "% v")
1783 (match_operand:V64DI 2 "gcn_alu_operand" "vDA"))
1784 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
1785 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1786 (clobber (match_scratch:V64SI 5 "=&v"))]
1787 ""
1788 "#"
1789 "reload_completed"
1790 [(const_int 0)]
1791 {
1792 rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
1793 rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
1794 rtx left_lo = gcn_operand_part (V64DImode, operands[1], 0);
1795 rtx left_hi = gcn_operand_part (V64DImode, operands[1], 1);
1796 rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
1797 rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
1798 rtx exec = operands[4];
1799 rtx tmp = operands[5];
1800
1801 rtx old_lo, old_hi;
1802 if (GET_CODE (operands[3]) == UNSPEC)
1803 {
1804 old_lo = old_hi = gcn_gen_undef (V64SImode);
1805 }
1806 else
1807 {
1808 old_lo = gcn_operand_part (V64DImode, operands[3], 0);
1809 old_hi = gcn_operand_part (V64DImode, operands[3], 1);
1810 }
1811
1812 rtx undef = gcn_gen_undef (V64SImode);
1813
1814 emit_insn (gen_mulv64si3_exec (out_lo, left_lo, right_lo, old_lo, exec));
1815 emit_insn (gen_umulv64si3_highpart_exec (out_hi, left_lo, right_lo,
1816 old_hi, exec));
1817 emit_insn (gen_mulv64si3_exec (tmp, left_hi, right_lo, undef, exec));
1818 emit_insn (gen_addv64si3_exec (out_hi, out_hi, tmp, out_hi, exec));
1819 emit_insn (gen_mulv64si3_exec (tmp, left_lo, right_hi, undef, exec));
1820 emit_insn (gen_addv64si3_exec (out_hi, out_hi, tmp, out_hi, exec));
1821 emit_insn (gen_mulv64si3_exec (tmp, left_hi, right_hi, undef, exec));
1822 emit_insn (gen_addv64si3_exec (out_hi, out_hi, tmp, out_hi, exec));
1823 DONE;
1824 })
1825
1826 (define_insn_and_split "mulv64di3_zext"
1827 [(set (match_operand:V64DI 0 "register_operand" "=&v")
1828 (mult:V64DI
1829 (zero_extend:V64DI
1830 (match_operand:V64SI 1 "gcn_alu_operand" " v"))
1831 (match_operand:V64DI 2 "gcn_alu_operand" "vDA")))
1832 (clobber (match_scratch:V64SI 3 "=&v"))]
1833 ""
1834 "#"
1835 "reload_completed"
1836 [(const_int 0)]
1837 {
1838 rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
1839 rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
1840 rtx left = operands[1];
1841 rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
1842 rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
1843 rtx tmp = operands[3];
1844
1845 emit_insn (gen_mulv64si3 (out_lo, left, right_lo));
1846 emit_insn (gen_umulv64si3_highpart (out_hi, left, right_lo));
1847 emit_insn (gen_mulv64si3 (tmp, left, right_hi));
1848 emit_insn (gen_addv64si3 (out_hi, out_hi, tmp));
1849 DONE;
1850 })
1851
1852 (define_insn_and_split "mulv64di3_zext_exec"
1853 [(set (match_operand:V64DI 0 "register_operand" "=&v")
1854 (vec_merge:V64DI
1855 (mult:V64DI
1856 (zero_extend:V64DI
1857 (match_operand:V64SI 1 "gcn_alu_operand" " v"))
1858 (match_operand:V64DI 2 "gcn_alu_operand" "vDA"))
1859 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
1860 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1861 (clobber (match_scratch:V64SI 5 "=&v"))]
1862 ""
1863 "#"
1864 "reload_completed"
1865 [(const_int 0)]
1866 {
1867 rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
1868 rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
1869 rtx left = operands[1];
1870 rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
1871 rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
1872 rtx exec = operands[4];
1873 rtx tmp = operands[5];
1874
1875 rtx old_lo, old_hi;
1876 if (GET_CODE (operands[3]) == UNSPEC)
1877 {
1878 old_lo = old_hi = gcn_gen_undef (V64SImode);
1879 }
1880 else
1881 {
1882 old_lo = gcn_operand_part (V64DImode, operands[3], 0);
1883 old_hi = gcn_operand_part (V64DImode, operands[3], 1);
1884 }
1885
1886 rtx undef = gcn_gen_undef (V64SImode);
1887
1888 emit_insn (gen_mulv64si3_exec (out_lo, left, right_lo, old_lo, exec));
1889 emit_insn (gen_umulv64si3_highpart_exec (out_hi, left, right_lo,
1890 old_hi, exec));
1891 emit_insn (gen_mulv64si3_exec (tmp, left, right_hi, undef, exec));
1892 emit_insn (gen_addv64si3_exec (out_hi, out_hi, tmp, out_hi, exec));
1893 DONE;
1894 })
1895
1896 (define_insn_and_split "mulv64di3_zext_dup2"
1897 [(set (match_operand:V64DI 0 "register_operand" "= &v")
1898 (mult:V64DI
1899 (zero_extend:V64DI
1900 (match_operand:V64SI 1 "gcn_alu_operand" " v"))
1901 (vec_duplicate:V64DI
1902 (match_operand:DI 2 "gcn_alu_operand" "SvDA"))))
1903 (clobber (match_scratch:V64SI 3 "= &v"))]
1904 ""
1905 "#"
1906 "reload_completed"
1907 [(const_int 0)]
1908 {
1909 rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
1910 rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
1911 rtx left = operands[1];
1912 rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
1913 rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
1914 rtx tmp = operands[3];
1915
1916 emit_insn (gen_mulv64si3 (out_lo, left, right_lo));
1917 emit_insn (gen_umulv64si3_highpart (out_hi, left, right_lo));
1918 emit_insn (gen_mulv64si3 (tmp, left, right_hi));
1919 emit_insn (gen_addv64si3 (out_hi, out_hi, tmp));
1920 DONE;
1921 })
1922
1923 (define_insn_and_split "mulv64di3_zext_dup2_exec"
1924 [(set (match_operand:V64DI 0 "register_operand" "= &v")
1925 (vec_merge:V64DI
1926 (mult:V64DI
1927 (zero_extend:V64DI
1928 (match_operand:V64SI 1 "gcn_alu_operand" " v"))
1929 (vec_duplicate:V64DI
1930 (match_operand:DI 2 "gcn_alu_operand" "SvDA")))
1931 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
1932 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1933 (clobber (match_scratch:V64SI 5 "= &v"))]
1934 ""
1935 "#"
1936 "reload_completed"
1937 [(const_int 0)]
1938 {
1939 rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
1940 rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
1941 rtx left = operands[1];
1942 rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
1943 rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
1944 rtx exec = operands[4];
1945 rtx tmp = operands[5];
1946
1947 rtx old_lo, old_hi;
1948 if (GET_CODE (operands[3]) == UNSPEC)
1949 {
1950 old_lo = old_hi = gcn_gen_undef (V64SImode);
1951 }
1952 else
1953 {
1954 old_lo = gcn_operand_part (V64DImode, operands[3], 0);
1955 old_hi = gcn_operand_part (V64DImode, operands[3], 1);
1956 }
1957
1958 rtx undef = gcn_gen_undef (V64SImode);
1959
1960 emit_insn (gen_mulv64si3_exec (out_lo, left, right_lo, old_lo, exec));
1961 emit_insn (gen_umulv64si3_highpart_exec (out_hi, left, right_lo,
1962 old_hi, exec));
1963 emit_insn (gen_mulv64si3_exec (tmp, left, right_hi, undef, exec));
1964 emit_insn (gen_addv64si3_exec (out_hi, out_hi, tmp, out_hi, exec));
1965 DONE;
1966 })
1967
1968 ;; }}}
1969 ;; {{{ ALU generic case
1970
1971 (define_mode_iterator VEC_INT_MODE [V64QI V64HI V64SI V64DI])
1972
1973 (define_code_iterator bitop [and ior xor])
1974 (define_code_iterator shiftop [ashift lshiftrt ashiftrt])
1975 (define_code_iterator minmaxop [smin smax umin umax])
1976
1977 (define_insn "<expander><mode>2<exec>"
1978 [(set (match_operand:VEC_1REG_INT_MODE 0 "gcn_valu_dst_operand" "= v")
1979 (bitunop:VEC_1REG_INT_MODE
1980 (match_operand:VEC_1REG_INT_MODE 1 "gcn_valu_src0_operand" "vSvB")))]
1981 ""
1982 "v_<mnemonic>0\t%0, %1"
1983 [(set_attr "type" "vop1")
1984 (set_attr "length" "8")])
1985
1986 (define_insn "<expander><mode>3<exec>"
1987 [(set (match_operand:VEC_1REG_INT_MODE 0 "gcn_valu_dst_operand" "= v,RD")
1988 (bitop:VEC_1REG_INT_MODE
1989 (match_operand:VEC_1REG_INT_MODE 1 "gcn_valu_src0_operand"
1990 "% v, 0")
1991 (match_operand:VEC_1REG_INT_MODE 2 "gcn_valu_src1com_operand"
1992 "vSvB, v")))]
1993 ""
1994 "@
1995 v_<mnemonic>0\t%0, %2, %1
1996 ds_<mnemonic>0\t%A0, %2%O0"
1997 [(set_attr "type" "vop2,ds")
1998 (set_attr "length" "8,8")])
1999
2000 (define_insn_and_split "<expander>v64di3"
2001 [(set (match_operand:V64DI 0 "gcn_valu_dst_operand" "=&v,RD")
2002 (bitop:V64DI
2003 (match_operand:V64DI 1 "gcn_valu_src0_operand" "% v,RD")
2004 (match_operand:V64DI 2 "gcn_valu_src1com_operand" "vSvB, v")))]
2005 ""
2006 "@
2007 #
2008 ds_<mnemonic>0\t%A0, %2%O0"
2009 "(reload_completed && !gcn_ds_memory_operand (operands[0], V64DImode))"
2010 [(set (match_dup 3)
2011 (bitop:V64SI (match_dup 5) (match_dup 7)))
2012 (set (match_dup 4)
2013 (bitop:V64SI (match_dup 6) (match_dup 8)))]
2014 {
2015 operands[3] = gcn_operand_part (V64DImode, operands[0], 0);
2016 operands[4] = gcn_operand_part (V64DImode, operands[0], 1);
2017 operands[5] = gcn_operand_part (V64DImode, operands[1], 0);
2018 operands[6] = gcn_operand_part (V64DImode, operands[1], 1);
2019 operands[7] = gcn_operand_part (V64DImode, operands[2], 0);
2020 operands[8] = gcn_operand_part (V64DImode, operands[2], 1);
2021 }
2022 [(set_attr "type" "vmult,ds")
2023 (set_attr "length" "16,8")])
2024
2025 (define_insn_and_split "<expander>v64di3_exec"
2026 [(set (match_operand:V64DI 0 "gcn_valu_dst_operand" "=&v,RD")
2027 (vec_merge:V64DI
2028 (bitop:V64DI
2029 (match_operand:V64DI 1 "gcn_valu_src0_operand" "% v,RD")
2030 (match_operand:V64DI 2 "gcn_valu_src1com_operand" "vSvB, v"))
2031 (match_operand:V64DI 3 "gcn_register_ds_or_unspec_operand"
2032 " U0,U0")
2033 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))]
2034 "!memory_operand (operands[0], VOIDmode)
2035 || (rtx_equal_p (operands[0], operands[1])
2036 && register_operand (operands[2], VOIDmode))"
2037 "@
2038 #
2039 ds_<mnemonic>0\t%A0, %2%O0"
2040 "(reload_completed && !gcn_ds_memory_operand (operands[0], V64DImode))"
2041 [(set (match_dup 5)
2042 (vec_merge:V64SI
2043 (bitop:V64SI (match_dup 7) (match_dup 9))
2044 (match_dup 11)
2045 (match_dup 4)))
2046 (set (match_dup 6)
2047 (vec_merge:V64SI
2048 (bitop:V64SI (match_dup 8) (match_dup 10))
2049 (match_dup 12)
2050 (match_dup 4)))]
2051 {
2052 operands[5] = gcn_operand_part (V64DImode, operands[0], 0);
2053 operands[6] = gcn_operand_part (V64DImode, operands[0], 1);
2054 operands[7] = gcn_operand_part (V64DImode, operands[1], 0);
2055 operands[8] = gcn_operand_part (V64DImode, operands[1], 1);
2056 operands[9] = gcn_operand_part (V64DImode, operands[2], 0);
2057 operands[10] = gcn_operand_part (V64DImode, operands[2], 1);
2058 operands[11] = gcn_operand_part (V64DImode, operands[3], 0);
2059 operands[12] = gcn_operand_part (V64DImode, operands[3], 1);
2060 }
2061 [(set_attr "type" "vmult,ds")
2062 (set_attr "length" "16,8")])
2063
2064 (define_insn "<expander>v64si3<exec>"
2065 [(set (match_operand:V64SI 0 "register_operand" "= v")
2066 (shiftop:V64SI
2067 (match_operand:V64SI 1 "gcn_alu_operand" " v")
2068 (vec_duplicate:V64SI
2069 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
2070 ""
2071 "v_<revmnemonic>0\t%0, %2, %1"
2072 [(set_attr "type" "vop2")
2073 (set_attr "length" "8")])
2074
2075 (define_insn "v<expander>v64si3<exec>"
2076 [(set (match_operand:V64SI 0 "register_operand" "=v")
2077 (shiftop:V64SI
2078 (match_operand:V64SI 1 "gcn_alu_operand" " v")
2079 (match_operand:V64SI 2 "gcn_alu_operand" "vB")))]
2080 ""
2081 "v_<revmnemonic>0\t%0, %2, %1"
2082 [(set_attr "type" "vop2")
2083 (set_attr "length" "8")])
2084
2085 (define_insn "<expander><mode>3<exec>"
2086 [(set (match_operand:VEC_1REG_INT_MODE 0 "gcn_valu_dst_operand" "= v,RD")
2087 (minmaxop:VEC_1REG_INT_MODE
2088 (match_operand:VEC_1REG_INT_MODE 1 "gcn_valu_src0_operand"
2089 "% v, 0")
2090 (match_operand:VEC_1REG_INT_MODE 2 "gcn_valu_src1com_operand"
2091 "vSvB, v")))]
2092 ""
2093 "@
2094 v_<mnemonic>0\t%0, %2, %1
2095 ds_<mnemonic>0\t%A0, %2%O0"
2096 [(set_attr "type" "vop2,ds")
2097 (set_attr "length" "8,8")])
2098
2099 ;; }}}
2100 ;; {{{ FP binops - special cases
2101
2102 ; GCN does not directly provide a DFmode subtract instruction, so we do it by
2103 ; adding the negated second operand to the first.
2104
2105 (define_insn "subv64df3<exec>"
2106 [(set (match_operand:V64DF 0 "register_operand" "= v, v")
2107 (minus:V64DF
2108 (match_operand:V64DF 1 "gcn_alu_operand" "vSvB, v")
2109 (match_operand:V64DF 2 "gcn_alu_operand" " v,vSvB")))]
2110 ""
2111 "@
2112 v_add_f64\t%0, %1, -%2
2113 v_add_f64\t%0, -%2, %1"
2114 [(set_attr "type" "vop3a")
2115 (set_attr "length" "8,8")])
2116
2117 (define_insn "subdf"
2118 [(set (match_operand:DF 0 "register_operand" "= v, v")
2119 (minus:DF
2120 (match_operand:DF 1 "gcn_alu_operand" "vSvB, v")
2121 (match_operand:DF 2 "gcn_alu_operand" " v,vSvB")))]
2122 ""
2123 "@
2124 v_add_f64\t%0, %1, -%2
2125 v_add_f64\t%0, -%2, %1"
2126 [(set_attr "type" "vop3a")
2127 (set_attr "length" "8,8")])
2128
2129 ;; }}}
2130 ;; {{{ FP binops - generic
2131
2132 (define_mode_iterator VEC_FP_MODE [V64HF V64SF V64DF])
2133 (define_mode_iterator VEC_FP_1REG_MODE [V64HF V64SF])
2134 (define_mode_iterator FP_MODE [HF SF DF])
2135 (define_mode_iterator FP_1REG_MODE [HF SF])
2136
2137 (define_code_iterator comm_fp [plus mult smin smax])
2138 (define_code_iterator nocomm_fp [minus])
2139 (define_code_iterator all_fp [plus mult minus smin smax])
2140
2141 (define_insn "<expander><mode>3<exec>"
2142 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "= v")
2143 (comm_fp:VEC_FP_MODE
2144 (match_operand:VEC_FP_MODE 1 "gcn_alu_operand" "% v")
2145 (match_operand:VEC_FP_MODE 2 "gcn_alu_operand" "vSvB")))]
2146 ""
2147 "v_<mnemonic>0\t%0, %2, %1"
2148 [(set_attr "type" "vop2")
2149 (set_attr "length" "8")])
2150
2151 (define_insn "<expander><mode>3"
2152 [(set (match_operand:FP_MODE 0 "gcn_valu_dst_operand" "= v, RL")
2153 (comm_fp:FP_MODE
2154 (match_operand:FP_MODE 1 "gcn_valu_src0_operand" "% v, 0")
2155 (match_operand:FP_MODE 2 "gcn_valu_src1_operand" "vSvB,vSvB")))]
2156 ""
2157 "@
2158 v_<mnemonic>0\t%0, %2, %1
2159 v_<mnemonic>0\t%0, %1%O0"
2160 [(set_attr "type" "vop2,ds")
2161 (set_attr "length" "8")])
2162
2163 (define_insn "<expander><mode>3<exec>"
2164 [(set (match_operand:VEC_FP_1REG_MODE 0 "register_operand" "= v, v")
2165 (nocomm_fp:VEC_FP_1REG_MODE
2166 (match_operand:VEC_FP_1REG_MODE 1 "gcn_alu_operand" "vSvB, v")
2167 (match_operand:VEC_FP_1REG_MODE 2 "gcn_alu_operand" " v,vSvB")))]
2168 ""
2169 "@
2170 v_<mnemonic>0\t%0, %1, %2
2171 v_<revmnemonic>0\t%0, %2, %1"
2172 [(set_attr "type" "vop2")
2173 (set_attr "length" "8,8")])
2174
2175 (define_insn "<expander><mode>3"
2176 [(set (match_operand:FP_1REG_MODE 0 "register_operand" "= v, v")
2177 (nocomm_fp:FP_1REG_MODE
2178 (match_operand:FP_1REG_MODE 1 "gcn_alu_operand" "vSvB, v")
2179 (match_operand:FP_1REG_MODE 2 "gcn_alu_operand" " v,vSvB")))]
2180 ""
2181 "@
2182 v_<mnemonic>0\t%0, %1, %2
2183 v_<revmnemonic>0\t%0, %2, %1"
2184 [(set_attr "type" "vop2")
2185 (set_attr "length" "8,8")])
2186
2187 ;; }}}
2188 ;; {{{ FP unops
2189
2190 (define_insn "abs<mode>2"
2191 [(set (match_operand:FP_MODE 0 "register_operand" "=v")
2192 (abs:FP_MODE (match_operand:FP_MODE 1 "register_operand" " v")))]
2193 ""
2194 "v_add%i0\t%0, 0, |%1|"
2195 [(set_attr "type" "vop3a")
2196 (set_attr "length" "8")])
2197
2198 (define_insn "abs<mode>2<exec>"
2199 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "=v")
2200 (abs:VEC_FP_MODE
2201 (match_operand:VEC_FP_MODE 1 "register_operand" " v")))]
2202 ""
2203 "v_add%i0\t%0, 0, |%1|"
2204 [(set_attr "type" "vop3a")
2205 (set_attr "length" "8")])
2206
2207 (define_insn "neg<mode>2<exec>"
2208 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "=v")
2209 (neg:VEC_FP_MODE
2210 (match_operand:VEC_FP_MODE 1 "register_operand" " v")))]
2211 ""
2212 "v_add%i0\t%0, 0, -%1"
2213 [(set_attr "type" "vop3a")
2214 (set_attr "length" "8")])
2215
2216 (define_insn "sqrt<mode>2<exec>"
2217 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "= v")
2218 (sqrt:VEC_FP_MODE
2219 (match_operand:VEC_FP_MODE 1 "gcn_alu_operand" "vSvB")))]
2220 "flag_unsafe_math_optimizations"
2221 "v_sqrt%i0\t%0, %1"
2222 [(set_attr "type" "vop1")
2223 (set_attr "length" "8")])
2224
2225 (define_insn "sqrt<mode>2"
2226 [(set (match_operand:FP_MODE 0 "register_operand" "= v")
2227 (sqrt:FP_MODE
2228 (match_operand:FP_MODE 1 "gcn_alu_operand" "vSvB")))]
2229 "flag_unsafe_math_optimizations"
2230 "v_sqrt%i0\t%0, %1"
2231 [(set_attr "type" "vop1")
2232 (set_attr "length" "8")])
2233
2234 ;; }}}
2235 ;; {{{ FP fused multiply and add
2236
2237 (define_insn "fma<mode>4<exec>"
2238 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "= v, v")
2239 (fma:VEC_FP_MODE
2240 (match_operand:VEC_FP_MODE 1 "gcn_alu_operand" "% vA, vA")
2241 (match_operand:VEC_FP_MODE 2 "gcn_alu_operand" " vA,vSvA")
2242 (match_operand:VEC_FP_MODE 3 "gcn_alu_operand" "vSvA, vA")))]
2243 ""
2244 "v_fma%i0\t%0, %1, %2, %3"
2245 [(set_attr "type" "vop3a")
2246 (set_attr "length" "8")])
2247
2248 (define_insn "fma<mode>4_negop2<exec>"
2249 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "= v, v, v")
2250 (fma:VEC_FP_MODE
2251 (match_operand:VEC_FP_MODE 1 "gcn_alu_operand" " vA, vA,vSvA")
2252 (neg:VEC_FP_MODE
2253 (match_operand:VEC_FP_MODE 2 "gcn_alu_operand" " vA,vSvA, vA"))
2254 (match_operand:VEC_FP_MODE 3 "gcn_alu_operand" "vSvA, vA, vA")))]
2255 ""
2256 "v_fma%i0\t%0, %1, -%2, %3"
2257 [(set_attr "type" "vop3a")
2258 (set_attr "length" "8")])
2259
2260 (define_insn "fma<mode>4"
2261 [(set (match_operand:FP_MODE 0 "register_operand" "= v, v")
2262 (fma:FP_MODE
2263 (match_operand:FP_MODE 1 "gcn_alu_operand" "% vA, vA")
2264 (match_operand:FP_MODE 2 "gcn_alu_operand" " vA,vSvA")
2265 (match_operand:FP_MODE 3 "gcn_alu_operand" "vSvA, vA")))]
2266 ""
2267 "v_fma%i0\t%0, %1, %2, %3"
2268 [(set_attr "type" "vop3a")
2269 (set_attr "length" "8")])
2270
2271 (define_insn "fma<mode>4_negop2"
2272 [(set (match_operand:FP_MODE 0 "register_operand" "= v, v, v")
2273 (fma:FP_MODE
2274 (match_operand:FP_MODE 1 "gcn_alu_operand" " vA, vA,vSvA")
2275 (neg:FP_MODE
2276 (match_operand:FP_MODE 2 "gcn_alu_operand" " vA,vSvA, vA"))
2277 (match_operand:FP_MODE 3 "gcn_alu_operand" "vSvA, vA, vA")))]
2278 ""
2279 "v_fma%i0\t%0, %1, -%2, %3"
2280 [(set_attr "type" "vop3a")
2281 (set_attr "length" "8")])
2282
2283 ;; }}}
2284 ;; {{{ FP division
2285
2286 (define_insn "recip<mode>2<exec>"
2287 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "= v")
2288 (div:VEC_FP_MODE
2289 (vec_duplicate:VEC_FP_MODE (float:<SCALAR_MODE> (const_int 1)))
2290 (match_operand:VEC_FP_MODE 1 "gcn_alu_operand" "vSvB")))]
2291 ""
2292 "v_rcp%i0\t%0, %1"
2293 [(set_attr "type" "vop1")
2294 (set_attr "length" "8")])
2295
2296 (define_insn "recip<mode>2"
2297 [(set (match_operand:FP_MODE 0 "register_operand" "= v")
2298 (div:FP_MODE
2299 (float:FP_MODE (const_int 1))
2300 (match_operand:FP_MODE 1 "gcn_alu_operand" "vSvB")))]
2301 ""
2302 "v_rcp%i0\t%0, %1"
2303 [(set_attr "type" "vop1")
2304 (set_attr "length" "8")])
2305
2306 ;; Do division via a = b * 1/c
2307 ;; The v_rcp_* instructions are not sufficiently accurate on their own,
2308 ;; so we use 2 v_fma_* instructions to do one round of Newton-Raphson
2309 ;; which the ISA manual says is enough to improve the reciprocal accuracy.
2310 ;;
2311 ;; FIXME: This does not handle denormals, NaNs, division-by-zero etc.
2312
2313 (define_expand "div<mode>3"
2314 [(match_operand:VEC_FP_MODE 0 "gcn_valu_dst_operand")
2315 (match_operand:VEC_FP_MODE 1 "gcn_valu_src0_operand")
2316 (match_operand:VEC_FP_MODE 2 "gcn_valu_src0_operand")]
2317 "flag_reciprocal_math"
2318 {
2319 rtx two = gcn_vec_constant (<MODE>mode,
2320 const_double_from_real_value (dconst2, <SCALAR_MODE>mode));
2321 rtx initrcp = gen_reg_rtx (<MODE>mode);
2322 rtx fma = gen_reg_rtx (<MODE>mode);
2323 rtx rcp;
2324
2325 bool is_rcp = (GET_CODE (operands[1]) == CONST_VECTOR
2326 && real_identical
2327 (CONST_DOUBLE_REAL_VALUE
2328 (CONST_VECTOR_ELT (operands[1], 0)), &dconstm1));
2329
2330 if (is_rcp)
2331 rcp = operands[0];
2332 else
2333 rcp = gen_reg_rtx (<MODE>mode);
2334
2335 emit_insn (gen_recip<mode>2 (initrcp, operands[2]));
2336 emit_insn (gen_fma<mode>4_negop2 (fma, initrcp, operands[2], two));
2337 emit_insn (gen_mul<mode>3 (rcp, initrcp, fma));
2338
2339 if (!is_rcp)
2340 emit_insn (gen_mul<mode>3 (operands[0], operands[1], rcp));
2341
2342 DONE;
2343 })
2344
2345 (define_expand "div<mode>3"
2346 [(match_operand:FP_MODE 0 "gcn_valu_dst_operand")
2347 (match_operand:FP_MODE 1 "gcn_valu_src0_operand")
2348 (match_operand:FP_MODE 2 "gcn_valu_src0_operand")]
2349 "flag_reciprocal_math"
2350 {
2351 rtx two = const_double_from_real_value (dconst2, <MODE>mode);
2352 rtx initrcp = gen_reg_rtx (<MODE>mode);
2353 rtx fma = gen_reg_rtx (<MODE>mode);
2354 rtx rcp;
2355
2356 bool is_rcp = (GET_CODE (operands[1]) == CONST_DOUBLE
2357 && real_identical (CONST_DOUBLE_REAL_VALUE (operands[1]),
2358 &dconstm1));
2359
2360 if (is_rcp)
2361 rcp = operands[0];
2362 else
2363 rcp = gen_reg_rtx (<MODE>mode);
2364
2365 emit_insn (gen_recip<mode>2 (initrcp, operands[2]));
2366 emit_insn (gen_fma<mode>4_negop2 (fma, initrcp, operands[2], two));
2367 emit_insn (gen_mul<mode>3 (rcp, initrcp, fma));
2368
2369 if (!is_rcp)
2370 emit_insn (gen_mul<mode>3 (operands[0], operands[1], rcp));
2371
2372 DONE;
2373 })
2374
2375 ;; }}}
2376 ;; {{{ Int/FP conversions
2377
2378 (define_mode_iterator CVT_FROM_MODE [HI SI HF SF DF])
2379 (define_mode_iterator CVT_TO_MODE [HI SI HF SF DF])
2380
2381 (define_mode_iterator VCVT_FROM_MODE [V64HI V64SI V64HF V64SF V64DF])
2382 (define_mode_iterator VCVT_TO_MODE [V64HI V64SI V64HF V64SF V64DF])
2383
2384 (define_code_iterator cvt_op [fix unsigned_fix
2385 float unsigned_float
2386 float_extend float_truncate])
2387 (define_code_attr cvt_name [(fix "fix_trunc") (unsigned_fix "fixuns_trunc")
2388 (float "float") (unsigned_float "floatuns")
2389 (float_extend "extend") (float_truncate "trunc")])
2390 (define_code_attr cvt_operands [(fix "%i0%i1") (unsigned_fix "%u0%i1")
2391 (float "%i0%i1") (unsigned_float "%i0%u1")
2392 (float_extend "%i0%i1")
2393 (float_truncate "%i0%i1")])
2394
2395 (define_insn "<cvt_name><CVT_FROM_MODE:mode><CVT_TO_MODE:mode>2"
2396 [(set (match_operand:CVT_TO_MODE 0 "register_operand" "= v")
2397 (cvt_op:CVT_TO_MODE
2398 (match_operand:CVT_FROM_MODE 1 "gcn_alu_operand" "vSvB")))]
2399 "gcn_valid_cvt_p (<CVT_FROM_MODE:MODE>mode, <CVT_TO_MODE:MODE>mode,
2400 <cvt_name>_cvt)"
2401 "v_cvt<cvt_operands>\t%0, %1"
2402 [(set_attr "type" "vop1")
2403 (set_attr "length" "8")])
2404
2405 (define_insn "<cvt_name><VCVT_FROM_MODE:mode><VCVT_TO_MODE:mode>2<exec>"
2406 [(set (match_operand:VCVT_TO_MODE 0 "register_operand" "= v")
2407 (cvt_op:VCVT_TO_MODE
2408 (match_operand:VCVT_FROM_MODE 1 "gcn_alu_operand" "vSvB")))]
2409 "gcn_valid_cvt_p (<VCVT_FROM_MODE:MODE>mode, <VCVT_TO_MODE:MODE>mode,
2410 <cvt_name>_cvt)"
2411 "v_cvt<cvt_operands>\t%0, %1"
2412 [(set_attr "type" "vop1")
2413 (set_attr "length" "8")])
2414
2415 ;; }}}
2416 ;; {{{ Int/int conversions
2417
2418 ;; GCC can already do these for scalar types, but not for vector types.
2419 ;; Unfortunately you can't just do SUBREG on a vector to select the low part,
2420 ;; so there must be a few tricks here.
2421
2422 (define_insn_and_split "vec_truncatev64div64si"
2423 [(set (match_operand:V64SI 0 "register_operand" "=v,&v")
2424 (truncate:V64SI
2425 (match_operand:V64DI 1 "register_operand" " 0, v")))]
2426 ""
2427 "#"
2428 "reload_completed"
2429 [(set (match_dup 0) (match_dup 1))]
2430 {
2431 operands[1] = gcn_operand_part (V64SImode, operands[1], 0);
2432 }
2433 [(set_attr "type" "vop2")
2434 (set_attr "length" "0,4")])
2435
2436 (define_insn_and_split "vec_truncatev64div64si_exec"
2437 [(set (match_operand:V64SI 0 "register_operand" "=v,&v")
2438 (vec_merge:V64SI
2439 (truncate:V64SI
2440 (match_operand:V64DI 1 "register_operand" " 0, v"))
2441 (match_operand:V64SI 2 "gcn_alu_or_unspec_operand" "U0,U0")
2442 (match_operand:DI 3 "gcn_exec_operand" " e, e")))]
2443 ""
2444 "#"
2445 "reload_completed"
2446 [(parallel [(set (match_dup 0)
2447 (vec_merge:V64SI (match_dup 1) (match_dup 2) (match_dup 3)))
2448 (clobber (scratch:V64DI))])]
2449 {
2450 operands[1] = gcn_operand_part (V64SImode, operands[1], 0);
2451 }
2452 [(set_attr "type" "vop2")
2453 (set_attr "length" "0,4")])
2454
2455 ;; }}}
2456 ;; {{{ Vector comparison/merge
2457
2458 (define_insn "vec_cmp<mode>di"
2459 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e, e,Sg,Sg")
2460 (match_operator 1 "comparison_operator"
2461 [(match_operand:VEC_1REG_MODE 2 "gcn_alu_operand"
2462 "vSv, B,vSv, B, v,vA")
2463 (match_operand:VEC_1REG_MODE 3 "gcn_vop3_operand"
2464 " v, v, v, v,vA, v")]))
2465 (clobber (match_scratch:DI 4 "= X, X, cV,cV, X, X"))]
2466 ""
2467 "@
2468 v_cmp%E1\tvcc, %2, %3
2469 v_cmp%E1\tvcc, %2, %3
2470 v_cmpx%E1\tvcc, %2, %3
2471 v_cmpx%E1\tvcc, %2, %3
2472 v_cmp%E1\t%0, %2, %3
2473 v_cmp%E1\t%0, %2, %3"
2474 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a")
2475 (set_attr "length" "4,8,4,8,8,8")])
2476
2477 (define_expand "vec_cmpu<mode>di"
2478 [(match_operand:DI 0 "register_operand")
2479 (match_operator 1 "comparison_operator"
2480 [(match_operand:VEC_1REG_INT_MODE 2 "gcn_alu_operand")
2481 (match_operand:VEC_1REG_INT_MODE 3 "gcn_vop3_operand")])]
2482 ""
2483 {
2484 /* Unsigned comparisons use the same patterns as signed comparisons,
2485 except that they use unsigned operators (e.g. LTU vs LT).
2486 The '%E1' directive then does the Right Thing. */
2487 emit_insn (gen_vec_cmp<mode>di (operands[0], operands[1], operands[2],
2488 operands[3]));
2489 DONE;
2490 })
2491
2492 (define_insn "vec_cmp<mode>di_exec"
2493 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e, e,Sg,Sg")
2494 (and:DI
2495 (match_operator 1 "comparison_operator"
2496 [(match_operand:VEC_1REG_MODE 2 "gcn_alu_operand"
2497 "vSv, B,vSv, B, v,vA")
2498 (match_operand:VEC_1REG_MODE 3 "gcn_vop3_operand"
2499 " v, v, v, v,vA, v")])
2500 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e, e, e, e")))
2501 (clobber (match_scratch:DI 5 "= X, X, cV,cV, X, X"))]
2502 ""
2503 "@
2504 v_cmp%E1\tvcc, %2, %3
2505 v_cmp%E1\tvcc, %2, %3
2506 v_cmpx%E1\tvcc, %2, %3
2507 v_cmpx%E1\tvcc, %2, %3
2508 v_cmp%E1\t%0, %2, %3
2509 v_cmp%E1\t%0, %2, %3"
2510 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a")
2511 (set_attr "length" "4,8,4,8,8,8")])
2512
2513 (define_insn "vec_cmp<mode>di_dup"
2514 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e,e,Sg")
2515 (match_operator 1 "comparison_operator"
2516 [(vec_duplicate:VEC_1REG_MODE
2517 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand"
2518 " Sv, B,Sv,B, A"))
2519 (match_operand:VEC_1REG_MODE 3 "gcn_vop3_operand"
2520 " v, v, v,v, v")]))
2521 (clobber (match_scratch:DI 4 "= X,X,cV,cV, X"))]
2522 ""
2523 "@
2524 v_cmp%E1\tvcc, %2, %3
2525 v_cmp%E1\tvcc, %2, %3
2526 v_cmpx%E1\tvcc, %2, %3
2527 v_cmpx%E1\tvcc, %2, %3
2528 v_cmp%E1\t%0, %2, %3"
2529 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a")
2530 (set_attr "length" "4,8,4,8,8")])
2531
2532 (define_insn "vec_cmp<mode>di_dup_exec"
2533 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e,e,Sg")
2534 (and:DI
2535 (match_operator 1 "comparison_operator"
2536 [(vec_duplicate:VEC_1REG_MODE
2537 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand"
2538 " Sv, B,Sv,B, A"))
2539 (match_operand:VEC_1REG_MODE 3 "gcn_vop3_operand"
2540 " v, v, v,v, v")])
2541 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e,e, e")))
2542 (clobber (match_scratch:DI 5 "= X,X,cV,cV, X"))]
2543 ""
2544 "@
2545 v_cmp%E1\tvcc, %2, %3
2546 v_cmp%E1\tvcc, %2, %3
2547 v_cmpx%E1\tvcc, %2, %3
2548 v_cmpx%E1\tvcc, %2, %3
2549 v_cmp%E1\t%0, %2, %3"
2550 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a")
2551 (set_attr "length" "4,8,4,8,8")])
2552
2553 (define_expand "vcond_mask_<mode>di"
2554 [(parallel
2555 [(set (match_operand:VEC_REG_MODE 0 "register_operand" "")
2556 (vec_merge:VEC_REG_MODE
2557 (match_operand:VEC_REG_MODE 1 "gcn_vop3_operand" "")
2558 (match_operand:VEC_REG_MODE 2 "gcn_alu_operand" "")
2559 (match_operand:DI 3 "register_operand" "")))
2560 (clobber (scratch:V64DI))])]
2561 ""
2562 "")
2563
2564 (define_expand "vcond<VEC_1REG_MODE:mode><VEC_1REG_ALT:mode>"
2565 [(match_operand:VEC_1REG_MODE 0 "register_operand")
2566 (match_operand:VEC_1REG_MODE 1 "gcn_vop3_operand")
2567 (match_operand:VEC_1REG_MODE 2 "gcn_alu_operand")
2568 (match_operator 3 "comparison_operator"
2569 [(match_operand:VEC_1REG_ALT 4 "gcn_alu_operand")
2570 (match_operand:VEC_1REG_ALT 5 "gcn_vop3_operand")])]
2571 ""
2572 {
2573 rtx tmp = gen_reg_rtx (DImode);
2574 emit_insn (gen_vec_cmp<VEC_1REG_ALT:mode>di
2575 (tmp, operands[3], operands[4], operands[5]));
2576 emit_insn (gen_vcond_mask_<VEC_1REG_MODE:mode>di
2577 (operands[0], operands[1], operands[2], tmp));
2578 DONE;
2579 })
2580
2581 (define_expand "vcond<VEC_1REG_MODE:mode><VEC_1REG_ALT:mode>_exec"
2582 [(match_operand:VEC_1REG_MODE 0 "register_operand")
2583 (match_operand:VEC_1REG_MODE 1 "gcn_vop3_operand")
2584 (match_operand:VEC_1REG_MODE 2 "gcn_alu_operand")
2585 (match_operator 3 "comparison_operator"
2586 [(match_operand:VEC_1REG_ALT 4 "gcn_alu_operand")
2587 (match_operand:VEC_1REG_ALT 5 "gcn_vop3_operand")])
2588 (match_operand:DI 6 "gcn_exec_reg_operand" "e")]
2589 ""
2590 {
2591 rtx tmp = gen_reg_rtx (DImode);
2592 emit_insn (gen_vec_cmp<VEC_1REG_ALT:mode>di_exec
2593 (tmp, operands[3], operands[4], operands[5], operands[6]));
2594 emit_insn (gen_vcond_mask_<VEC_1REG_MODE:mode>di
2595 (operands[0], operands[1], operands[2], tmp));
2596 DONE;
2597 })
2598
2599 (define_expand "vcondu<VEC_1REG_INT_MODE:mode><VEC_1REG_INT_ALT:mode>"
2600 [(match_operand:VEC_1REG_INT_MODE 0 "register_operand")
2601 (match_operand:VEC_1REG_INT_MODE 1 "gcn_vop3_operand")
2602 (match_operand:VEC_1REG_INT_MODE 2 "gcn_alu_operand")
2603 (match_operator 3 "comparison_operator"
2604 [(match_operand:VEC_1REG_INT_ALT 4 "gcn_alu_operand")
2605 (match_operand:VEC_1REG_INT_ALT 5 "gcn_vop3_operand")])]
2606 ""
2607 {
2608 rtx tmp = gen_reg_rtx (DImode);
2609 emit_insn (gen_vec_cmp<VEC_1REG_INT_ALT:mode>di
2610 (tmp, operands[3], operands[4], operands[5]));
2611 emit_insn (gen_vcond_mask_<VEC_1REG_INT_MODE:mode>di
2612 (operands[0], operands[1], operands[2], tmp));
2613 DONE;
2614 })
2615
2616 (define_expand "vcondu<VEC_1REG_INT_MODE:mode><VEC_1REG_INT_ALT:mode>_exec"
2617 [(match_operand:VEC_1REG_INT_MODE 0 "register_operand")
2618 (match_operand:VEC_1REG_INT_MODE 1 "gcn_vop3_operand")
2619 (match_operand:VEC_1REG_INT_MODE 2 "gcn_alu_operand")
2620 (match_operator 3 "comparison_operator"
2621 [(match_operand:VEC_1REG_INT_ALT 4 "gcn_alu_operand")
2622 (match_operand:VEC_1REG_INT_ALT 5 "gcn_vop3_operand")])
2623 (match_operand:DI 6 "gcn_exec_reg_operand" "e")]
2624 ""
2625 {
2626 rtx tmp = gen_reg_rtx (DImode);
2627 emit_insn (gen_vec_cmp<VEC_1REG_INT_ALT:mode>di_exec
2628 (tmp, operands[3], operands[4], operands[5], operands[6]));
2629 emit_insn (gen_vcond_mask_<VEC_1REG_INT_MODE:mode>di
2630 (operands[0], operands[1], operands[2], tmp));
2631 DONE;
2632 })
2633
2634 ;; }}}
2635 ;; {{{ Fully masked loop support
2636
2637 (define_expand "while_ultsidi"
2638 [(match_operand:DI 0 "register_operand")
2639 (match_operand:SI 1 "")
2640 (match_operand:SI 2 "")]
2641 ""
2642 {
2643 if (GET_CODE (operands[1]) != CONST_INT
2644 || GET_CODE (operands[2]) != CONST_INT)
2645 {
2646 rtx _0_1_2_3 = gen_rtx_REG (V64SImode, VGPR_REGNO (1));
2647 rtx tmp = _0_1_2_3;
2648 if (GET_CODE (operands[1]) != CONST_INT
2649 || INTVAL (operands[1]) != 0)
2650 {
2651 tmp = gen_reg_rtx (V64SImode);
2652 emit_insn (gen_addv64si3_dup (tmp, _0_1_2_3, operands[1]));
2653 }
2654 emit_insn (gen_vec_cmpv64sidi_dup (operands[0],
2655 gen_rtx_GT (VOIDmode, 0, 0),
2656 operands[2], tmp));
2657 }
2658 else
2659 {
2660 HOST_WIDE_INT diff = INTVAL (operands[2]) - INTVAL (operands[1]);
2661 HOST_WIDE_INT mask = (diff >= 64 ? -1
2662 : ~((unsigned HOST_WIDE_INT)-1 << diff));
2663 emit_move_insn (operands[0], gen_rtx_CONST_INT (VOIDmode, mask));
2664 }
2665 DONE;
2666 })
2667
2668 (define_expand "maskload<mode>di"
2669 [(match_operand:VEC_REG_MODE 0 "register_operand")
2670 (match_operand:VEC_REG_MODE 1 "memory_operand")
2671 (match_operand 2 "")]
2672 ""
2673 {
2674 rtx exec = force_reg (DImode, operands[2]);
2675 rtx addr = gcn_expand_scalar_to_vector_address
2676 (<MODE>mode, exec, operands[1], gen_rtx_SCRATCH (V64DImode));
2677 rtx as = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
2678 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
2679 rtx undef = gcn_gen_undef (<MODE>mode);
2680 emit_insn (gen_gather<mode>_expr_exec (operands[0], addr, as, v, undef,
2681 exec));
2682 DONE;
2683 })
2684
2685 (define_expand "maskstore<mode>di"
2686 [(match_operand:VEC_REG_MODE 0 "memory_operand")
2687 (match_operand:VEC_REG_MODE 1 "register_operand")
2688 (match_operand 2 "")]
2689 ""
2690 {
2691 rtx exec = force_reg (DImode, operands[2]);
2692 rtx addr = gcn_expand_scalar_to_vector_address
2693 (<MODE>mode, exec, operands[0], gen_rtx_SCRATCH (V64DImode));
2694 rtx as = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
2695 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
2696 emit_insn (gen_scatter<mode>_expr_exec (addr, operands[1], as, v, exec));
2697 DONE;
2698 })
2699
2700 (define_expand "mask_gather_load<mode>"
2701 [(match_operand:VEC_REG_MODE 0 "register_operand")
2702 (match_operand:DI 1 "register_operand")
2703 (match_operand 2 "register_operand")
2704 (match_operand 3 "immediate_operand")
2705 (match_operand:SI 4 "gcn_alu_operand")
2706 (match_operand:DI 5 "")]
2707 ""
2708 {
2709 rtx exec = force_reg (DImode, operands[5]);
2710
2711 /* TODO: more conversions will be needed when more types are vectorized. */
2712 if (GET_MODE (operands[2]) == V64DImode)
2713 {
2714 rtx tmp = gen_reg_rtx (V64SImode);
2715 emit_insn (gen_vec_truncatev64div64si_exec (tmp, operands[2],
2716 gcn_gen_undef (V64SImode),
2717 exec));
2718 operands[2] = tmp;
2719 }
2720
2721 emit_insn (gen_gather<mode>_exec (operands[0], operands[1], operands[2],
2722 operands[3], operands[4], exec));
2723 DONE;
2724 })
2725
2726 (define_expand "mask_scatter_store<mode>"
2727 [(match_operand:DI 0 "register_operand")
2728 (match_operand 1 "register_operand")
2729 (match_operand 2 "immediate_operand")
2730 (match_operand:SI 3 "gcn_alu_operand")
2731 (match_operand:VEC_REG_MODE 4 "register_operand")
2732 (match_operand:DI 5 "")]
2733 ""
2734 {
2735 rtx exec = force_reg (DImode, operands[5]);
2736
2737 /* TODO: more conversions will be needed when more types are vectorized. */
2738 if (GET_MODE (operands[1]) == V64DImode)
2739 {
2740 rtx tmp = gen_reg_rtx (V64SImode);
2741 emit_insn (gen_vec_truncatev64div64si_exec (tmp, operands[1],
2742 gcn_gen_undef (V64SImode),
2743 exec));
2744 operands[1] = tmp;
2745 }
2746
2747 emit_insn (gen_scatter<mode>_exec (operands[0], operands[1], operands[2],
2748 operands[3], operands[4], exec));
2749 DONE;
2750 })
2751
2752 ; FIXME this should be VEC_REG_MODE, but not all dependencies are implemented.
2753 (define_mode_iterator COND_MODE [V64SI V64DI V64SF V64DF])
2754 (define_mode_iterator COND_INT_MODE [V64SI V64DI])
2755
2756 (define_code_iterator cond_op [plus minus])
2757
2758 (define_expand "cond_<expander><mode>"
2759 [(match_operand:COND_MODE 0 "register_operand")
2760 (match_operand:DI 1 "register_operand")
2761 (cond_op:COND_MODE
2762 (match_operand:COND_MODE 2 "gcn_alu_operand")
2763 (match_operand:COND_MODE 3 "gcn_alu_operand"))
2764 (match_operand:COND_MODE 4 "register_operand")]
2765 ""
2766 {
2767 operands[1] = force_reg (DImode, operands[1]);
2768 operands[2] = force_reg (<MODE>mode, operands[2]);
2769
2770 emit_insn (gen_<expander><mode>3_exec (operands[0], operands[2],
2771 operands[3], operands[4],
2772 operands[1]));
2773 DONE;
2774 })
2775
2776 (define_code_iterator cond_bitop [and ior xor])
2777
2778 (define_expand "cond_<expander><mode>"
2779 [(match_operand:COND_INT_MODE 0 "register_operand")
2780 (match_operand:DI 1 "register_operand")
2781 (cond_bitop:COND_INT_MODE
2782 (match_operand:COND_INT_MODE 2 "gcn_alu_operand")
2783 (match_operand:COND_INT_MODE 3 "gcn_alu_operand"))
2784 (match_operand:COND_INT_MODE 4 "register_operand")]
2785 ""
2786 {
2787 operands[1] = force_reg (DImode, operands[1]);
2788 operands[2] = force_reg (<MODE>mode, operands[2]);
2789
2790 emit_insn (gen_<expander><mode>3_exec (operands[0], operands[2],
2791 operands[3], operands[4],
2792 operands[1]));
2793 DONE;
2794 })
2795
2796 ;; }}}
2797 ;; {{{ Vector reductions
2798
2799 (define_int_iterator REDUC_UNSPEC [UNSPEC_SMIN_DPP_SHR UNSPEC_SMAX_DPP_SHR
2800 UNSPEC_UMIN_DPP_SHR UNSPEC_UMAX_DPP_SHR
2801 UNSPEC_PLUS_DPP_SHR
2802 UNSPEC_AND_DPP_SHR
2803 UNSPEC_IOR_DPP_SHR UNSPEC_XOR_DPP_SHR])
2804
2805 (define_int_iterator REDUC_2REG_UNSPEC [UNSPEC_PLUS_DPP_SHR
2806 UNSPEC_AND_DPP_SHR
2807 UNSPEC_IOR_DPP_SHR UNSPEC_XOR_DPP_SHR])
2808
2809 ; FIXME: Isn't there a better way of doing this?
2810 (define_int_attr reduc_unspec [(UNSPEC_SMIN_DPP_SHR "UNSPEC_SMIN_DPP_SHR")
2811 (UNSPEC_SMAX_DPP_SHR "UNSPEC_SMAX_DPP_SHR")
2812 (UNSPEC_UMIN_DPP_SHR "UNSPEC_UMIN_DPP_SHR")
2813 (UNSPEC_UMAX_DPP_SHR "UNSPEC_UMAX_DPP_SHR")
2814 (UNSPEC_PLUS_DPP_SHR "UNSPEC_PLUS_DPP_SHR")
2815 (UNSPEC_AND_DPP_SHR "UNSPEC_AND_DPP_SHR")
2816 (UNSPEC_IOR_DPP_SHR "UNSPEC_IOR_DPP_SHR")
2817 (UNSPEC_XOR_DPP_SHR "UNSPEC_XOR_DPP_SHR")])
2818
2819 (define_int_attr reduc_op [(UNSPEC_SMIN_DPP_SHR "smin")
2820 (UNSPEC_SMAX_DPP_SHR "smax")
2821 (UNSPEC_UMIN_DPP_SHR "umin")
2822 (UNSPEC_UMAX_DPP_SHR "umax")
2823 (UNSPEC_PLUS_DPP_SHR "plus")
2824 (UNSPEC_AND_DPP_SHR "and")
2825 (UNSPEC_IOR_DPP_SHR "ior")
2826 (UNSPEC_XOR_DPP_SHR "xor")])
2827
2828 (define_int_attr reduc_insn [(UNSPEC_SMIN_DPP_SHR "v_min%i0")
2829 (UNSPEC_SMAX_DPP_SHR "v_max%i0")
2830 (UNSPEC_UMIN_DPP_SHR "v_min%u0")
2831 (UNSPEC_UMAX_DPP_SHR "v_max%u0")
2832 (UNSPEC_PLUS_DPP_SHR "v_add%u0")
2833 (UNSPEC_AND_DPP_SHR "v_and%b0")
2834 (UNSPEC_IOR_DPP_SHR "v_or%b0")
2835 (UNSPEC_XOR_DPP_SHR "v_xor%b0")])
2836
2837 (define_expand "reduc_<reduc_op>_scal_<mode>"
2838 [(set (match_operand:<SCALAR_MODE> 0 "register_operand")
2839 (unspec:<SCALAR_MODE>
2840 [(match_operand:VEC_1REG_MODE 1 "register_operand")]
2841 REDUC_UNSPEC))]
2842 ""
2843 {
2844 rtx tmp = gcn_expand_reduc_scalar (<MODE>mode, operands[1],
2845 <reduc_unspec>);
2846
2847 /* The result of the reduction is in lane 63 of tmp. */
2848 emit_insn (gen_mov_from_lane63_<mode> (operands[0], tmp));
2849
2850 DONE;
2851 })
2852
2853 (define_expand "reduc_<reduc_op>_scal_v64di"
2854 [(set (match_operand:DI 0 "register_operand")
2855 (unspec:DI
2856 [(match_operand:V64DI 1 "register_operand")]
2857 REDUC_2REG_UNSPEC))]
2858 ""
2859 {
2860 rtx tmp = gcn_expand_reduc_scalar (V64DImode, operands[1],
2861 <reduc_unspec>);
2862
2863 /* The result of the reduction is in lane 63 of tmp. */
2864 emit_insn (gen_mov_from_lane63_v64di (operands[0], tmp));
2865
2866 DONE;
2867 })
2868
2869 (define_insn "*<reduc_op>_dpp_shr_<mode>"
2870 [(set (match_operand:VEC_1REG_MODE 0 "register_operand" "=v")
2871 (unspec:VEC_1REG_MODE
2872 [(match_operand:VEC_1REG_MODE 1 "register_operand" "v")
2873 (match_operand:VEC_1REG_MODE 2 "register_operand" "v")
2874 (match_operand:SI 3 "const_int_operand" "n")]
2875 REDUC_UNSPEC))]
2876 "!(TARGET_GCN3 && SCALAR_INT_MODE_P (<SCALAR_MODE>mode)
2877 && <reduc_unspec> == UNSPEC_PLUS_DPP_SHR)"
2878 {
2879 return gcn_expand_dpp_shr_insn (<MODE>mode, "<reduc_insn>",
2880 <reduc_unspec>, INTVAL (operands[3]));
2881 }
2882 [(set_attr "type" "vop_dpp")
2883 (set_attr "length" "8")])
2884
2885 (define_insn_and_split "*<reduc_op>_dpp_shr_v64di"
2886 [(set (match_operand:V64DI 0 "register_operand" "=&v")
2887 (unspec:V64DI
2888 [(match_operand:V64DI 1 "register_operand" "v0")
2889 (match_operand:V64DI 2 "register_operand" "v0")
2890 (match_operand:SI 3 "const_int_operand" "n")]
2891 REDUC_2REG_UNSPEC))]
2892 ""
2893 "#"
2894 "reload_completed"
2895 [(set (match_dup 4)
2896 (unspec:V64SI
2897 [(match_dup 6) (match_dup 8) (match_dup 3)] REDUC_2REG_UNSPEC))
2898 (set (match_dup 5)
2899 (unspec:V64SI
2900 [(match_dup 7) (match_dup 9) (match_dup 3)] REDUC_2REG_UNSPEC))]
2901 {
2902 operands[4] = gcn_operand_part (V64DImode, operands[0], 0);
2903 operands[5] = gcn_operand_part (V64DImode, operands[0], 1);
2904 operands[6] = gcn_operand_part (V64DImode, operands[1], 0);
2905 operands[7] = gcn_operand_part (V64DImode, operands[1], 1);
2906 operands[8] = gcn_operand_part (V64DImode, operands[2], 0);
2907 operands[9] = gcn_operand_part (V64DImode, operands[2], 1);
2908 }
2909 [(set_attr "type" "vmult")
2910 (set_attr "length" "16")])
2911
2912 ; Special cases for addition.
2913
2914 (define_insn "*plus_carry_dpp_shr_<mode>"
2915 [(set (match_operand:VEC_1REG_INT_MODE 0 "register_operand" "=v")
2916 (unspec:VEC_1REG_INT_MODE
2917 [(match_operand:VEC_1REG_INT_MODE 1 "register_operand" "v")
2918 (match_operand:VEC_1REG_INT_MODE 2 "register_operand" "v")
2919 (match_operand:SI 3 "const_int_operand" "n")]
2920 UNSPEC_PLUS_CARRY_DPP_SHR))
2921 (clobber (reg:DI VCC_REG))]
2922 ""
2923 {
2924 const char *insn = TARGET_GCN3 ? "v_add%u0" : "v_add_co%u0";
2925 return gcn_expand_dpp_shr_insn (<MODE>mode, insn,
2926 UNSPEC_PLUS_CARRY_DPP_SHR,
2927 INTVAL (operands[3]));
2928 }
2929 [(set_attr "type" "vop_dpp")
2930 (set_attr "length" "8")])
2931
2932 (define_insn "*plus_carry_in_dpp_shr_v64si"
2933 [(set (match_operand:V64SI 0 "register_operand" "=v")
2934 (unspec:V64SI
2935 [(match_operand:V64SI 1 "register_operand" "v")
2936 (match_operand:V64SI 2 "register_operand" "v")
2937 (match_operand:SI 3 "const_int_operand" "n")
2938 (match_operand:DI 4 "register_operand" "cV")]
2939 UNSPEC_PLUS_CARRY_IN_DPP_SHR))
2940 (clobber (reg:DI VCC_REG))]
2941 ""
2942 {
2943 const char *insn = TARGET_GCN3 ? "v_addc%u0" : "v_addc_co%u0";
2944 return gcn_expand_dpp_shr_insn (V64SImode, insn,
2945 UNSPEC_PLUS_CARRY_IN_DPP_SHR,
2946 INTVAL (operands[3]));
2947 }
2948 [(set_attr "type" "vop_dpp")
2949 (set_attr "length" "8")])
2950
2951 (define_insn_and_split "*plus_carry_dpp_shr_v64di"
2952 [(set (match_operand:V64DI 0 "register_operand" "=&v")
2953 (unspec:V64DI
2954 [(match_operand:V64DI 1 "register_operand" "v0")
2955 (match_operand:V64DI 2 "register_operand" "v0")
2956 (match_operand:SI 3 "const_int_operand" "n")]
2957 UNSPEC_PLUS_CARRY_DPP_SHR))
2958 (clobber (reg:DI VCC_REG))]
2959 ""
2960 "#"
2961 "reload_completed"
2962 [(parallel [(set (match_dup 4)
2963 (unspec:V64SI
2964 [(match_dup 6) (match_dup 8) (match_dup 3)]
2965 UNSPEC_PLUS_CARRY_DPP_SHR))
2966 (clobber (reg:DI VCC_REG))])
2967 (parallel [(set (match_dup 5)
2968 (unspec:V64SI
2969 [(match_dup 7) (match_dup 9) (match_dup 3) (reg:DI VCC_REG)]
2970 UNSPEC_PLUS_CARRY_IN_DPP_SHR))
2971 (clobber (reg:DI VCC_REG))])]
2972 {
2973 operands[4] = gcn_operand_part (V64DImode, operands[0], 0);
2974 operands[5] = gcn_operand_part (V64DImode, operands[0], 1);
2975 operands[6] = gcn_operand_part (V64DImode, operands[1], 0);
2976 operands[7] = gcn_operand_part (V64DImode, operands[1], 1);
2977 operands[8] = gcn_operand_part (V64DImode, operands[2], 0);
2978 operands[9] = gcn_operand_part (V64DImode, operands[2], 1);
2979 }
2980 [(set_attr "type" "vmult")
2981 (set_attr "length" "16")])
2982
2983 ; Instructions to move a scalar value from lane 63 of a vector register.
2984 (define_insn "mov_from_lane63_<mode>"
2985 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg,v")
2986 (unspec:<SCALAR_MODE>
2987 [(match_operand:VEC_1REG_MODE 1 "register_operand" "v,v")]
2988 UNSPEC_MOV_FROM_LANE63))]
2989 ""
2990 "@
2991 v_readlane_b32\t%0, %1, 63
2992 v_mov_b32\t%0, %1 wave_ror:1"
2993 [(set_attr "type" "vop3a,vop_dpp")
2994 (set_attr "exec" "none,*")
2995 (set_attr "length" "8")])
2996
2997 (define_insn "mov_from_lane63_v64di"
2998 [(set (match_operand:DI 0 "register_operand" "=Sg,v")
2999 (unspec:DI
3000 [(match_operand:V64DI 1 "register_operand" "v,v")]
3001 UNSPEC_MOV_FROM_LANE63))]
3002 ""
3003 "@
3004 v_readlane_b32\t%L0, %L1, 63\;v_readlane_b32\t%H0, %H1, 63
3005 * if (REGNO (operands[0]) <= REGNO (operands[1])) \
3006 return \"v_mov_b32\t%L0, %L1 wave_ror:1\;\" \
3007 \"v_mov_b32\t%H0, %H1 wave_ror:1\"; \
3008 else \
3009 return \"v_mov_b32\t%H0, %H1 wave_ror:1\;\" \
3010 \"v_mov_b32\t%L0, %L1 wave_ror:1\";"
3011 [(set_attr "type" "vop3a,vop_dpp")
3012 (set_attr "exec" "none,*")
3013 (set_attr "length" "8")])
3014
3015 ;; }}}
3016 ;; {{{ Miscellaneous
3017
3018 (define_expand "vec_seriesv64si"
3019 [(match_operand:V64SI 0 "register_operand")
3020 (match_operand:SI 1 "gcn_alu_operand")
3021 (match_operand:SI 2 "gcn_alu_operand")]
3022 ""
3023 {
3024 rtx tmp = gen_reg_rtx (V64SImode);
3025 rtx v1 = gen_rtx_REG (V64SImode, VGPR_REGNO (1));
3026
3027 emit_insn (gen_mulv64si3_dup (tmp, v1, operands[2]));
3028 emit_insn (gen_addv64si3_dup (operands[0], tmp, operands[1]));
3029 DONE;
3030 })
3031
3032 (define_expand "vec_seriesv64di"
3033 [(match_operand:V64DI 0 "register_operand")
3034 (match_operand:DI 1 "gcn_alu_operand")
3035 (match_operand:DI 2 "gcn_alu_operand")]
3036 ""
3037 {
3038 rtx tmp = gen_reg_rtx (V64DImode);
3039 rtx v1 = gen_rtx_REG (V64SImode, VGPR_REGNO (1));
3040
3041 emit_insn (gen_mulv64di3_zext_dup2 (tmp, v1, operands[2]));
3042 emit_insn (gen_addv64di3_dup (operands[0], tmp, operands[1]));
3043 DONE;
3044 })
3045
3046 ;; }}}