]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/gcn/gcn-valu.md
amdgcn: sub-dword vector min/max/shift/bit operators
[thirdparty/gcc.git] / gcc / config / gcn / gcn-valu.md
1 ;; Copyright (C) 2016-2020 Free Software Foundation, Inc.
2
3 ;; This file is free software; you can redistribute it and/or modify it under
4 ;; the terms of the GNU General Public License as published by the Free
5 ;; Software Foundation; either version 3 of the License, or (at your option)
6 ;; any later version.
7
8 ;; This file is distributed in the hope that it will be useful, but WITHOUT
9 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 ;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 ;; for more details.
12
13 ;; You should have received a copy of the GNU General Public License
14 ;; along with GCC; see the file COPYING3. If not see
15 ;; <http://www.gnu.org/licenses/>.
16
17 ;; {{{ Vector iterators
18
19 ; Vector modes for sub-dword modes
20 (define_mode_iterator VEC_SUBDWORD_MODE
21 [V64QI V64HI])
22
23 ; Vector modes for one vector register
24 (define_mode_iterator VEC_1REG_MODE
25 [V64SI V64HF V64SF])
26 (define_mode_iterator VEC_1REG_ALT
27 [V64SI V64HF V64SF])
28 (define_mode_iterator VEC_ALL1REG_MODE
29 [V64QI V64HI V64SI V64HF V64SF])
30
31 (define_mode_iterator VEC_1REG_INT_MODE
32 [V64SI])
33 (define_mode_iterator VEC_ALL1REG_INT_MODE
34 [V64QI V64HI V64SI])
35 (define_mode_iterator VEC_ALL1REG_INT_ALT
36 [V64QI V64HI V64SI])
37
38 ; Vector modes for two vector registers
39 (define_mode_iterator VEC_2REG_MODE
40 [V64DI V64DF])
41
42 ; All of above
43 (define_mode_iterator VEC_REG_MODE
44 [V64SI V64HF V64SF ; Single reg
45 V64DI V64DF]) ; Double reg
46 (define_mode_iterator VEC_ALLREG_MODE
47 [V64QI V64HI V64SI V64HF V64SF ; Single reg
48 V64DI V64DF]) ; Double reg
49 (define_mode_iterator VEC_ALLREG_ALT
50 [V64QI V64HI V64SI V64HF V64SF ; Single reg
51 V64DI V64DF]) ; Double reg
52 (define_mode_iterator VEC_ALLREG_INT_MODE
53 [V64QI V64HI V64SI ; Single reg
54 V64DI]) ; Double reg
55
56 (define_mode_attr scalar_mode
57 [(V64QI "qi") (V64HI "hi") (V64SI "si")
58 (V64HF "hf") (V64SF "sf") (V64DI "di") (V64DF "df")])
59
60 (define_mode_attr SCALAR_MODE
61 [(V64QI "QI") (V64HI "HI") (V64SI "SI")
62 (V64HF "HF") (V64SF "SF") (V64DI "DI") (V64DF "DF")])
63
64 (define_mode_attr sdwa [(V64QI "BYTE_0") (V64HI "WORD_0") (V64SI "DWORD")])
65
66 ;; }}}
67 ;; {{{ Substitutions
68
69 (define_subst_attr "exec" "vec_merge"
70 "" "_exec")
71 (define_subst_attr "exec_clobber" "vec_merge_with_clobber"
72 "" "_exec")
73 (define_subst_attr "exec_vcc" "vec_merge_with_vcc"
74 "" "_exec")
75 (define_subst_attr "exec_scatter" "scatter_store"
76 "" "_exec")
77
78 (define_subst "vec_merge"
79 [(set (match_operand:VEC_ALLREG_MODE 0)
80 (match_operand:VEC_ALLREG_MODE 1))]
81 ""
82 [(set (match_dup 0)
83 (vec_merge:VEC_ALLREG_MODE
84 (match_dup 1)
85 (match_operand:VEC_ALLREG_MODE 3
86 "gcn_register_or_unspec_operand" "U0")
87 (match_operand:DI 4 "gcn_exec_reg_operand" "e")))])
88
89 (define_subst "vec_merge_with_clobber"
90 [(set (match_operand:VEC_ALLREG_MODE 0)
91 (match_operand:VEC_ALLREG_MODE 1))
92 (clobber (match_operand 2))]
93 ""
94 [(set (match_dup 0)
95 (vec_merge:VEC_ALLREG_MODE
96 (match_dup 1)
97 (match_operand:VEC_ALLREG_MODE 3
98 "gcn_register_or_unspec_operand" "U0")
99 (match_operand:DI 4 "gcn_exec_reg_operand" "e")))
100 (clobber (match_dup 2))])
101
102 (define_subst "vec_merge_with_vcc"
103 [(set (match_operand:VEC_ALLREG_MODE 0)
104 (match_operand:VEC_ALLREG_MODE 1))
105 (set (match_operand:DI 2)
106 (match_operand:DI 3))]
107 ""
108 [(parallel
109 [(set (match_dup 0)
110 (vec_merge:VEC_ALLREG_MODE
111 (match_dup 1)
112 (match_operand:VEC_ALLREG_MODE 4
113 "gcn_register_or_unspec_operand" "U0")
114 (match_operand:DI 5 "gcn_exec_reg_operand" "e")))
115 (set (match_dup 2)
116 (and:DI (match_dup 3)
117 (reg:DI EXEC_REG)))])])
118
119 (define_subst "scatter_store"
120 [(set (mem:BLK (scratch))
121 (unspec:BLK
122 [(match_operand 0)
123 (match_operand 1)
124 (match_operand 2)
125 (match_operand 3)]
126 UNSPEC_SCATTER))]
127 ""
128 [(set (mem:BLK (scratch))
129 (unspec:BLK
130 [(match_dup 0)
131 (match_dup 1)
132 (match_dup 2)
133 (match_dup 3)
134 (match_operand:DI 4 "gcn_exec_reg_operand" "e")]
135 UNSPEC_SCATTER))])
136
137 ;; }}}
138 ;; {{{ Vector moves
139
140 ; This is the entry point for all vector register moves. Memory accesses can
141 ; come this way also, but will more usually use the reload_in/out,
142 ; gather/scatter, maskload/store, etc.
143
144 (define_expand "mov<mode>"
145 [(set (match_operand:VEC_ALLREG_MODE 0 "nonimmediate_operand")
146 (match_operand:VEC_ALLREG_MODE 1 "general_operand"))]
147 ""
148 {
149 if (MEM_P (operands[0]) && !lra_in_progress && !reload_completed)
150 {
151 operands[1] = force_reg (<MODE>mode, operands[1]);
152 rtx scratch = gen_rtx_SCRATCH (V64DImode);
153 rtx a = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
154 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
155 rtx expr = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
156 operands[0],
157 scratch);
158 emit_insn (gen_scatter<mode>_expr (expr, operands[1], a, v));
159 DONE;
160 }
161 else if (MEM_P (operands[1]) && !lra_in_progress && !reload_completed)
162 {
163 rtx scratch = gen_rtx_SCRATCH (V64DImode);
164 rtx a = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
165 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
166 rtx expr = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
167 operands[1],
168 scratch);
169 emit_insn (gen_gather<mode>_expr (operands[0], expr, a, v));
170 DONE;
171 }
172 else if ((MEM_P (operands[0]) || MEM_P (operands[1])))
173 {
174 gcc_assert (!reload_completed);
175 rtx scratch = gen_reg_rtx (V64DImode);
176 emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], scratch));
177 DONE;
178 }
179 })
180
181 ; A pseudo instruction that helps LRA use the "U0" constraint.
182
183 (define_insn "mov<mode>_unspec"
184 [(set (match_operand:VEC_ALLREG_MODE 0 "nonimmediate_operand" "=v")
185 (match_operand:VEC_ALLREG_MODE 1 "gcn_unspec_operand" " U"))]
186 ""
187 ""
188 [(set_attr "type" "unknown")
189 (set_attr "length" "0")])
190
191 (define_insn "*mov<mode>"
192 [(set (match_operand:VEC_ALL1REG_MODE 0 "nonimmediate_operand" "=v,v")
193 (match_operand:VEC_ALL1REG_MODE 1 "general_operand" "vA,B"))]
194 ""
195 "v_mov_b32\t%0, %1"
196 [(set_attr "type" "vop1,vop1")
197 (set_attr "length" "4,8")])
198
199 (define_insn "mov<mode>_exec"
200 [(set (match_operand:VEC_ALL1REG_MODE 0 "nonimmediate_operand"
201 "=v, v, v, v, v, m")
202 (vec_merge:VEC_ALL1REG_MODE
203 (match_operand:VEC_ALL1REG_MODE 1 "general_operand"
204 "vA, B, v,vA, m, v")
205 (match_operand:VEC_ALL1REG_MODE 3 "gcn_alu_or_unspec_operand"
206 "U0,U0,vA,vA,U0,U0")
207 (match_operand:DI 2 "register_operand" " e, e,cV,Sv, e, e")))
208 (clobber (match_scratch:V64DI 4 "=X, X, X, X,&v,&v"))]
209 "!MEM_P (operands[0]) || REG_P (operands[1])"
210 "@
211 v_mov_b32\t%0, %1
212 v_mov_b32\t%0, %1
213 v_cndmask_b32\t%0, %3, %1, vcc
214 v_cndmask_b32\t%0, %3, %1, %2
215 #
216 #"
217 [(set_attr "type" "vop1,vop1,vop2,vop3a,*,*")
218 (set_attr "length" "4,8,4,8,16,16")])
219
220 ; This variant does not accept an unspec, but does permit MEM
221 ; read/modify/write which is necessary for maskstore.
222
223 ;(define_insn "*mov<mode>_exec_match"
224 ; [(set (match_operand:VEC_ALL1REG_MODE 0 "nonimmediate_operand"
225 ; "=v,v, v, m")
226 ; (vec_merge:VEC_ALL1REG_MODE
227 ; (match_operand:VEC_ALL1REG_MODE 1 "general_operand" "vA,B, m, v")
228 ; (match_dup 0)
229 ; (match_operand:DI 2 "gcn_exec_reg_operand" " e,e, e, e")))
230 ; (clobber (match_scratch:V64DI 3 "=X,X,&v,&v"))]
231 ; "!MEM_P (operands[0]) || REG_P (operands[1])"
232 ; "@
233 ; v_mov_b32\t%0, %1
234 ; v_mov_b32\t%0, %1
235 ; #
236 ; #"
237 ; [(set_attr "type" "vop1,vop1,*,*")
238 ; (set_attr "length" "4,8,16,16")])
239
240 (define_insn "*mov<mode>"
241 [(set (match_operand:VEC_2REG_MODE 0 "nonimmediate_operand" "=v")
242 (match_operand:VEC_2REG_MODE 1 "general_operand" "vDB"))]
243 ""
244 {
245 if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1]))
246 return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1";
247 else
248 return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1";
249 }
250 [(set_attr "type" "vmult")
251 (set_attr "length" "16")])
252
253 (define_insn "mov<mode>_exec"
254 [(set (match_operand:VEC_2REG_MODE 0 "nonimmediate_operand"
255 "= v, v, v, v, m")
256 (vec_merge:VEC_2REG_MODE
257 (match_operand:VEC_2REG_MODE 1 "general_operand"
258 "vDB, v0, v0, m, v")
259 (match_operand:VEC_2REG_MODE 3 "gcn_alu_or_unspec_operand"
260 " U0,vDA0,vDA0,U0,U0")
261 (match_operand:DI 2 "register_operand" " e, cV, Sv, e, e")))
262 (clobber (match_scratch:V64DI 4 "= X, X, X,&v,&v"))]
263 "!MEM_P (operands[0]) || REG_P (operands[1])"
264 {
265 if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1]))
266 switch (which_alternative)
267 {
268 case 0:
269 return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1";
270 case 1:
271 return "v_cndmask_b32\t%L0, %L3, %L1, vcc\;"
272 "v_cndmask_b32\t%H0, %H3, %H1, vcc";
273 case 2:
274 return "v_cndmask_b32\t%L0, %L3, %L1, %2\;"
275 "v_cndmask_b32\t%H0, %H3, %H1, %2";
276 }
277 else
278 switch (which_alternative)
279 {
280 case 0:
281 return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1";
282 case 1:
283 return "v_cndmask_b32\t%H0, %H3, %H1, vcc\;"
284 "v_cndmask_b32\t%L0, %L3, %L1, vcc";
285 case 2:
286 return "v_cndmask_b32\t%H0, %H3, %H1, %2\;"
287 "v_cndmask_b32\t%L0, %L3, %L1, %2";
288 }
289
290 return "#";
291 }
292 [(set_attr "type" "vmult,vmult,vmult,*,*")
293 (set_attr "length" "16,16,16,16,16")])
294
295 ; This variant does not accept an unspec, but does permit MEM
296 ; read/modify/write which is necessary for maskstore.
297
298 ;(define_insn "*mov<mode>_exec_match"
299 ; [(set (match_operand:VEC_2REG_MODE 0 "nonimmediate_operand" "=v, v, m")
300 ; (vec_merge:VEC_2REG_MODE
301 ; (match_operand:VEC_2REG_MODE 1 "general_operand" "vDB, m, v")
302 ; (match_dup 0)
303 ; (match_operand:DI 2 "gcn_exec_reg_operand" " e, e, e")))
304 ; (clobber (match_scratch:V64DI 3 "=X,&v,&v"))]
305 ; "!MEM_P (operands[0]) || REG_P (operands[1])"
306 ; "@
307 ; * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \
308 ; return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \
309 ; else \
310 ; return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\";
311 ; #
312 ; #"
313 ; [(set_attr "type" "vmult,*,*")
314 ; (set_attr "length" "16,16,16")])
315
316 ; A SGPR-base load looks like:
317 ; <load> v, Sv
318 ;
319 ; There's no hardware instruction that corresponds to this, but vector base
320 ; addresses are placed in an SGPR because it is easier to add to a vector.
321 ; We also have a temporary vT, and the vector v1 holding numbered lanes.
322 ;
323 ; Rewrite as:
324 ; vT = v1 << log2(element-size)
325 ; vT += Sv
326 ; flat_load v, vT
327
328 (define_insn "mov<mode>_sgprbase"
329 [(set (match_operand:VEC_ALL1REG_MODE 0 "nonimmediate_operand"
330 "= v, v, v, m")
331 (unspec:VEC_ALL1REG_MODE
332 [(match_operand:VEC_ALL1REG_MODE 1 "general_operand"
333 " vA,vB, m, v")]
334 UNSPEC_SGPRBASE))
335 (clobber (match_operand:V64DI 2 "register_operand" "=&v,&v,&v,&v"))]
336 "lra_in_progress || reload_completed"
337 "@
338 v_mov_b32\t%0, %1
339 v_mov_b32\t%0, %1
340 #
341 #"
342 [(set_attr "type" "vop1,vop1,*,*")
343 (set_attr "length" "4,8,12,12")])
344
345 (define_insn "mov<mode>_sgprbase"
346 [(set (match_operand:VEC_2REG_MODE 0 "nonimmediate_operand" "= v, v, m")
347 (unspec:VEC_2REG_MODE
348 [(match_operand:VEC_2REG_MODE 1 "general_operand" "vDB, m, v")]
349 UNSPEC_SGPRBASE))
350 (clobber (match_operand:V64DI 2 "register_operand" "=&v,&v,&v"))]
351 "lra_in_progress || reload_completed"
352 "@
353 * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \
354 return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \
355 else \
356 return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\";
357 #
358 #"
359 [(set_attr "type" "vmult,*,*")
360 (set_attr "length" "8,12,12")])
361
362 ; reload_in was once a standard name, but here it's only referenced by
363 ; gcn_secondary_reload. It allows a reload with a scratch register.
364
365 (define_expand "reload_in<mode>"
366 [(set (match_operand:VEC_ALLREG_MODE 0 "register_operand" "= v")
367 (match_operand:VEC_ALLREG_MODE 1 "memory_operand" " m"))
368 (clobber (match_operand:V64DI 2 "register_operand" "=&v"))]
369 ""
370 {
371 emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], operands[2]));
372 DONE;
373 })
374
375 ; reload_out is similar to reload_in, above.
376
377 (define_expand "reload_out<mode>"
378 [(set (match_operand:VEC_ALLREG_MODE 0 "memory_operand" "= m")
379 (match_operand:VEC_ALLREG_MODE 1 "register_operand" " v"))
380 (clobber (match_operand:V64DI 2 "register_operand" "=&v"))]
381 ""
382 {
383 emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], operands[2]));
384 DONE;
385 })
386
387 ; Expand scalar addresses into gather/scatter patterns
388
389 (define_split
390 [(set (match_operand:VEC_ALLREG_MODE 0 "memory_operand")
391 (unspec:VEC_ALLREG_MODE
392 [(match_operand:VEC_ALLREG_MODE 1 "general_operand")]
393 UNSPEC_SGPRBASE))
394 (clobber (match_scratch:V64DI 2))]
395 ""
396 [(set (mem:BLK (scratch))
397 (unspec:BLK [(match_dup 5) (match_dup 1) (match_dup 6) (match_dup 7)]
398 UNSPEC_SCATTER))]
399 {
400 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
401 operands[0],
402 operands[2]);
403 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
404 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
405 })
406
407 (define_split
408 [(set (match_operand:VEC_ALLREG_MODE 0 "memory_operand")
409 (vec_merge:VEC_ALLREG_MODE
410 (match_operand:VEC_ALLREG_MODE 1 "general_operand")
411 (match_operand:VEC_ALLREG_MODE 2 "")
412 (match_operand:DI 3 "gcn_exec_reg_operand")))
413 (clobber (match_scratch:V64DI 4))]
414 ""
415 [(set (mem:BLK (scratch))
416 (unspec:BLK [(match_dup 5) (match_dup 1)
417 (match_dup 6) (match_dup 7) (match_dup 3)]
418 UNSPEC_SCATTER))]
419 {
420 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode,
421 operands[3],
422 operands[0],
423 operands[4]);
424 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
425 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
426 })
427
428 (define_split
429 [(set (match_operand:VEC_ALLREG_MODE 0 "nonimmediate_operand")
430 (unspec:VEC_ALLREG_MODE
431 [(match_operand:VEC_ALLREG_MODE 1 "memory_operand")]
432 UNSPEC_SGPRBASE))
433 (clobber (match_scratch:V64DI 2))]
434 ""
435 [(set (match_dup 0)
436 (unspec:VEC_ALLREG_MODE [(match_dup 5) (match_dup 6) (match_dup 7)
437 (mem:BLK (scratch))]
438 UNSPEC_GATHER))]
439 {
440 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
441 operands[1],
442 operands[2]);
443 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
444 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
445 })
446
447 (define_split
448 [(set (match_operand:VEC_ALLREG_MODE 0 "nonimmediate_operand")
449 (vec_merge:VEC_ALLREG_MODE
450 (match_operand:VEC_ALLREG_MODE 1 "memory_operand")
451 (match_operand:VEC_ALLREG_MODE 2 "")
452 (match_operand:DI 3 "gcn_exec_reg_operand")))
453 (clobber (match_scratch:V64DI 4))]
454 ""
455 [(set (match_dup 0)
456 (vec_merge:VEC_ALLREG_MODE
457 (unspec:VEC_ALLREG_MODE [(match_dup 5) (match_dup 6) (match_dup 7)
458 (mem:BLK (scratch))]
459 UNSPEC_GATHER)
460 (match_dup 2)
461 (match_dup 3)))]
462 {
463 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode,
464 operands[3],
465 operands[1],
466 operands[4]);
467 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
468 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
469 })
470
471 ; TODO: Add zero/sign extending variants.
472
473 ;; }}}
474 ;; {{{ Lane moves
475
476 ; v_writelane and v_readlane work regardless of exec flags.
477 ; We allow source to be scratch.
478 ;
479 ; FIXME these should take A immediates
480
481 (define_insn "*vec_set<mode>"
482 [(set (match_operand:VEC_ALL1REG_MODE 0 "register_operand" "= v")
483 (vec_merge:VEC_ALL1REG_MODE
484 (vec_duplicate:VEC_ALL1REG_MODE
485 (match_operand:<SCALAR_MODE> 1 "register_operand" " Sv"))
486 (match_operand:VEC_ALL1REG_MODE 3 "gcn_register_or_unspec_operand"
487 " U0")
488 (ashift (const_int 1)
489 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
490 ""
491 "v_writelane_b32 %0, %1, %2"
492 [(set_attr "type" "vop3a")
493 (set_attr "length" "8")
494 (set_attr "exec" "none")
495 (set_attr "laneselect" "yes")])
496
497 ; FIXME: 64bit operations really should be splitters, but I am not sure how
498 ; to represent vertical subregs.
499 (define_insn "*vec_set<mode>"
500 [(set (match_operand:VEC_2REG_MODE 0 "register_operand" "= v")
501 (vec_merge:VEC_2REG_MODE
502 (vec_duplicate:VEC_2REG_MODE
503 (match_operand:<SCALAR_MODE> 1 "register_operand" " Sv"))
504 (match_operand:VEC_2REG_MODE 3 "gcn_register_or_unspec_operand"
505 " U0")
506 (ashift (const_int 1)
507 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
508 ""
509 "v_writelane_b32 %L0, %L1, %2\;v_writelane_b32 %H0, %H1, %2"
510 [(set_attr "type" "vmult")
511 (set_attr "length" "16")
512 (set_attr "exec" "none")
513 (set_attr "laneselect" "yes")])
514
515 (define_expand "vec_set<mode>"
516 [(set (match_operand:VEC_ALLREG_MODE 0 "register_operand")
517 (vec_merge:VEC_ALLREG_MODE
518 (vec_duplicate:VEC_ALLREG_MODE
519 (match_operand:<SCALAR_MODE> 1 "register_operand"))
520 (match_dup 0)
521 (ashift (const_int 1) (match_operand:SI 2 "gcn_alu_operand"))))]
522 "")
523
524 (define_insn "*vec_set<mode>_1"
525 [(set (match_operand:VEC_ALL1REG_MODE 0 "register_operand" "=v")
526 (vec_merge:VEC_ALL1REG_MODE
527 (vec_duplicate:VEC_ALL1REG_MODE
528 (match_operand:<SCALAR_MODE> 1 "register_operand" "Sv"))
529 (match_operand:VEC_ALL1REG_MODE 3 "gcn_register_or_unspec_operand"
530 "U0")
531 (match_operand:SI 2 "const_int_operand" " i")))]
532 "((unsigned) exact_log2 (INTVAL (operands[2])) < 64)"
533 {
534 operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));
535 return "v_writelane_b32 %0, %1, %2";
536 }
537 [(set_attr "type" "vop3a")
538 (set_attr "length" "8")
539 (set_attr "exec" "none")
540 (set_attr "laneselect" "yes")])
541
542 (define_insn "*vec_set<mode>_1"
543 [(set (match_operand:VEC_2REG_MODE 0 "register_operand" "=v")
544 (vec_merge:VEC_2REG_MODE
545 (vec_duplicate:VEC_2REG_MODE
546 (match_operand:<SCALAR_MODE> 1 "register_operand" "Sv"))
547 (match_operand:VEC_2REG_MODE 3 "gcn_register_or_unspec_operand"
548 "U0")
549 (match_operand:SI 2 "const_int_operand" " i")))]
550 "((unsigned) exact_log2 (INTVAL (operands[2])) < 64)"
551 {
552 operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));
553 return "v_writelane_b32 %L0, %L1, %2\;v_writelane_b32 %H0, %H1, %2";
554 }
555 [(set_attr "type" "vmult")
556 (set_attr "length" "16")
557 (set_attr "exec" "none")
558 (set_attr "laneselect" "yes")])
559
560 (define_insn "vec_duplicate<mode><exec>"
561 [(set (match_operand:VEC_ALL1REG_MODE 0 "register_operand" "=v")
562 (vec_duplicate:VEC_ALL1REG_MODE
563 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand" "SvB")))]
564 ""
565 "v_mov_b32\t%0, %1"
566 [(set_attr "type" "vop3a")
567 (set_attr "length" "8")])
568
569 (define_insn "vec_duplicate<mode><exec>"
570 [(set (match_operand:VEC_2REG_MODE 0 "register_operand" "= v")
571 (vec_duplicate:VEC_2REG_MODE
572 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand" "SvDB")))]
573 ""
574 "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1"
575 [(set_attr "type" "vop3a")
576 (set_attr "length" "16")])
577
578 (define_insn "vec_extract<mode><scalar_mode>"
579 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg")
580 (vec_select:<SCALAR_MODE>
581 (match_operand:VEC_ALL1REG_MODE 1 "register_operand" " v")
582 (parallel [(match_operand:SI 2 "gcn_alu_operand" "SvB")])))]
583 ""
584 "v_readlane_b32 %0, %1, %2"
585 [(set_attr "type" "vop3a")
586 (set_attr "length" "8")
587 (set_attr "exec" "none")
588 (set_attr "laneselect" "yes")])
589
590 (define_insn "vec_extract<mode><scalar_mode>"
591 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=&Sg")
592 (vec_select:<SCALAR_MODE>
593 (match_operand:VEC_2REG_MODE 1 "register_operand" " v")
594 (parallel [(match_operand:SI 2 "gcn_alu_operand" " SvB")])))]
595 ""
596 "v_readlane_b32 %L0, %L1, %2\;v_readlane_b32 %H0, %H1, %2"
597 [(set_attr "type" "vmult")
598 (set_attr "length" "16")
599 (set_attr "exec" "none")
600 (set_attr "laneselect" "yes")])
601
602 (define_expand "extract_last_<mode>"
603 [(match_operand:<SCALAR_MODE> 0 "register_operand")
604 (match_operand:DI 1 "gcn_alu_operand")
605 (match_operand:VEC_ALLREG_MODE 2 "register_operand")]
606 "can_create_pseudo_p ()"
607 {
608 rtx dst = operands[0];
609 rtx mask = operands[1];
610 rtx vect = operands[2];
611 rtx tmpreg = gen_reg_rtx (SImode);
612
613 emit_insn (gen_clzdi2 (tmpreg, mask));
614 emit_insn (gen_subsi3 (tmpreg, GEN_INT (63), tmpreg));
615 emit_insn (gen_vec_extract<mode><scalar_mode> (dst, vect, tmpreg));
616 DONE;
617 })
618
619 (define_expand "fold_extract_last_<mode>"
620 [(match_operand:<SCALAR_MODE> 0 "register_operand")
621 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand")
622 (match_operand:DI 2 "gcn_alu_operand")
623 (match_operand:VEC_ALLREG_MODE 3 "register_operand")]
624 "can_create_pseudo_p ()"
625 {
626 rtx dst = operands[0];
627 rtx default_value = operands[1];
628 rtx mask = operands[2];
629 rtx vect = operands[3];
630 rtx else_label = gen_label_rtx ();
631 rtx end_label = gen_label_rtx ();
632
633 rtx cond = gen_rtx_EQ (VOIDmode, mask, const0_rtx);
634 emit_jump_insn (gen_cbranchdi4 (cond, mask, const0_rtx, else_label));
635 emit_insn (gen_extract_last_<mode> (dst, mask, vect));
636 emit_jump_insn (gen_jump (end_label));
637 emit_barrier ();
638 emit_label (else_label);
639 emit_move_insn (dst, default_value);
640 emit_label (end_label);
641 DONE;
642 })
643
644 (define_expand "vec_init<mode><scalar_mode>"
645 [(match_operand:VEC_ALLREG_MODE 0 "register_operand")
646 (match_operand 1)]
647 ""
648 {
649 gcn_expand_vector_init (operands[0], operands[1]);
650 DONE;
651 })
652
653 ;; }}}
654 ;; {{{ Scatter / Gather
655
656 ;; GCN does not have an instruction for loading a vector from contiguous
657 ;; memory so *all* loads and stores are eventually converted to scatter
658 ;; or gather.
659 ;;
660 ;; GCC does not permit MEM to hold vectors of addresses, so we must use an
661 ;; unspec. The unspec formats are as follows:
662 ;;
663 ;; (unspec:V64??
664 ;; [(<address expression>)
665 ;; (<addr_space_t>)
666 ;; (<use_glc>)
667 ;; (mem:BLK (scratch))]
668 ;; UNSPEC_GATHER)
669 ;;
670 ;; (unspec:BLK
671 ;; [(<address expression>)
672 ;; (<source register>)
673 ;; (<addr_space_t>)
674 ;; (<use_glc>)
675 ;; (<exec>)]
676 ;; UNSPEC_SCATTER)
677 ;;
678 ;; - Loads are expected to be wrapped in a vec_merge, so do not need <exec>.
679 ;; - The mem:BLK does not contain any real information, but indicates that an
680 ;; unknown memory read is taking place. Stores are expected to use a similar
681 ;; mem:BLK outside the unspec.
682 ;; - The address space and glc (volatile) fields are there to replace the
683 ;; fields normally found in a MEM.
684 ;; - Multiple forms of address expression are supported, below.
685
686 (define_expand "gather_load<mode>v64si"
687 [(match_operand:VEC_ALLREG_MODE 0 "register_operand")
688 (match_operand:DI 1 "register_operand")
689 (match_operand:V64SI 2 "register_operand")
690 (match_operand 3 "immediate_operand")
691 (match_operand:SI 4 "gcn_alu_operand")]
692 ""
693 {
694 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1],
695 operands[2], operands[4],
696 INTVAL (operands[3]), NULL);
697
698 if (GET_MODE (addr) == V64DImode)
699 emit_insn (gen_gather<mode>_insn_1offset (operands[0], addr, const0_rtx,
700 const0_rtx, const0_rtx));
701 else
702 emit_insn (gen_gather<mode>_insn_2offsets (operands[0], operands[1],
703 addr, const0_rtx, const0_rtx,
704 const0_rtx));
705 DONE;
706 })
707
708 ; Allow any address expression
709 (define_expand "gather<mode>_expr<exec>"
710 [(set (match_operand:VEC_ALLREG_MODE 0 "register_operand")
711 (unspec:VEC_ALLREG_MODE
712 [(match_operand 1 "")
713 (match_operand 2 "immediate_operand")
714 (match_operand 3 "immediate_operand")
715 (mem:BLK (scratch))]
716 UNSPEC_GATHER))]
717 ""
718 {})
719
720 (define_insn "gather<mode>_insn_1offset<exec>"
721 [(set (match_operand:VEC_ALLREG_MODE 0 "register_operand" "=v")
722 (unspec:VEC_ALLREG_MODE
723 [(plus:V64DI (match_operand:V64DI 1 "register_operand" " v")
724 (vec_duplicate:V64DI
725 (match_operand 2 "immediate_operand" " n")))
726 (match_operand 3 "immediate_operand" " n")
727 (match_operand 4 "immediate_operand" " n")
728 (mem:BLK (scratch))]
729 UNSPEC_GATHER))]
730 "(AS_FLAT_P (INTVAL (operands[3]))
731 && ((TARGET_GCN3 && INTVAL(operands[2]) == 0)
732 || ((unsigned HOST_WIDE_INT)INTVAL(operands[2]) < 0x1000)))
733 || (AS_GLOBAL_P (INTVAL (operands[3]))
734 && (((unsigned HOST_WIDE_INT)INTVAL(operands[2]) + 0x1000) < 0x2000))"
735 {
736 addr_space_t as = INTVAL (operands[3]);
737 const char *glc = INTVAL (operands[4]) ? " glc" : "";
738
739 static char buf[200];
740 if (AS_FLAT_P (as))
741 {
742 if (TARGET_GCN5_PLUS)
743 sprintf (buf, "flat_load%%o0\t%%0, %%1 offset:%%2%s\;s_waitcnt\t0",
744 glc);
745 else
746 sprintf (buf, "flat_load%%o0\t%%0, %%1%s\;s_waitcnt\t0", glc);
747 }
748 else if (AS_GLOBAL_P (as))
749 sprintf (buf, "global_load%%o0\t%%0, %%1, off offset:%%2%s\;"
750 "s_waitcnt\tvmcnt(0)", glc);
751 else
752 gcc_unreachable ();
753
754 return buf;
755 }
756 [(set_attr "type" "flat")
757 (set_attr "length" "12")])
758
759 (define_insn "gather<mode>_insn_1offset_ds<exec>"
760 [(set (match_operand:VEC_ALLREG_MODE 0 "register_operand" "=v")
761 (unspec:VEC_ALLREG_MODE
762 [(plus:V64SI (match_operand:V64SI 1 "register_operand" " v")
763 (vec_duplicate:V64SI
764 (match_operand 2 "immediate_operand" " n")))
765 (match_operand 3 "immediate_operand" " n")
766 (match_operand 4 "immediate_operand" " n")
767 (mem:BLK (scratch))]
768 UNSPEC_GATHER))]
769 "(AS_ANY_DS_P (INTVAL (operands[3]))
770 && ((unsigned HOST_WIDE_INT)INTVAL(operands[2]) < 0x10000))"
771 {
772 addr_space_t as = INTVAL (operands[3]);
773 static char buf[200];
774 sprintf (buf, "ds_read%%b0\t%%0, %%1 offset:%%2%s\;s_waitcnt\tlgkmcnt(0)",
775 (AS_GDS_P (as) ? " gds" : ""));
776 return buf;
777 }
778 [(set_attr "type" "ds")
779 (set_attr "length" "12")])
780
781 (define_insn "gather<mode>_insn_2offsets<exec>"
782 [(set (match_operand:VEC_ALLREG_MODE 0 "register_operand" "=v")
783 (unspec:VEC_ALLREG_MODE
784 [(plus:V64DI
785 (plus:V64DI
786 (vec_duplicate:V64DI
787 (match_operand:DI 1 "register_operand" "Sv"))
788 (sign_extend:V64DI
789 (match_operand:V64SI 2 "register_operand" " v")))
790 (vec_duplicate:V64DI (match_operand 3 "immediate_operand" " n")))
791 (match_operand 4 "immediate_operand" " n")
792 (match_operand 5 "immediate_operand" " n")
793 (mem:BLK (scratch))]
794 UNSPEC_GATHER))]
795 "(AS_GLOBAL_P (INTVAL (operands[4]))
796 && (((unsigned HOST_WIDE_INT)INTVAL(operands[3]) + 0x1000) < 0x2000))"
797 {
798 addr_space_t as = INTVAL (operands[4]);
799 const char *glc = INTVAL (operands[5]) ? " glc" : "";
800
801 static char buf[200];
802 if (AS_GLOBAL_P (as))
803 {
804 /* Work around assembler bug in which a 64-bit register is expected,
805 but a 32-bit value would be correct. */
806 int reg = REGNO (operands[2]) - FIRST_VGPR_REG;
807 sprintf (buf, "global_load%%o0\t%%0, v[%d:%d], %%1 offset:%%3%s\;"
808 "s_waitcnt\tvmcnt(0)", reg, reg + 1, glc);
809 }
810 else
811 gcc_unreachable ();
812
813 return buf;
814 }
815 [(set_attr "type" "flat")
816 (set_attr "length" "12")])
817
818 (define_expand "scatter_store<mode>v64si"
819 [(match_operand:DI 0 "register_operand")
820 (match_operand:V64SI 1 "register_operand")
821 (match_operand 2 "immediate_operand")
822 (match_operand:SI 3 "gcn_alu_operand")
823 (match_operand:VEC_ALLREG_MODE 4 "register_operand")]
824 ""
825 {
826 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0],
827 operands[1], operands[3],
828 INTVAL (operands[2]), NULL);
829
830 if (GET_MODE (addr) == V64DImode)
831 emit_insn (gen_scatter<mode>_insn_1offset (addr, const0_rtx, operands[4],
832 const0_rtx, const0_rtx));
833 else
834 emit_insn (gen_scatter<mode>_insn_2offsets (operands[0], addr,
835 const0_rtx, operands[4],
836 const0_rtx, const0_rtx));
837 DONE;
838 })
839
840 ; Allow any address expression
841 (define_expand "scatter<mode>_expr<exec_scatter>"
842 [(set (mem:BLK (scratch))
843 (unspec:BLK
844 [(match_operand:V64DI 0 "")
845 (match_operand:VEC_ALLREG_MODE 1 "register_operand")
846 (match_operand 2 "immediate_operand")
847 (match_operand 3 "immediate_operand")]
848 UNSPEC_SCATTER))]
849 ""
850 {})
851
852 (define_insn "scatter<mode>_insn_1offset<exec_scatter>"
853 [(set (mem:BLK (scratch))
854 (unspec:BLK
855 [(plus:V64DI (match_operand:V64DI 0 "register_operand" "v")
856 (vec_duplicate:V64DI
857 (match_operand 1 "immediate_operand" "n")))
858 (match_operand:VEC_ALLREG_MODE 2 "register_operand" "v")
859 (match_operand 3 "immediate_operand" "n")
860 (match_operand 4 "immediate_operand" "n")]
861 UNSPEC_SCATTER))]
862 "(AS_FLAT_P (INTVAL (operands[3]))
863 && (INTVAL(operands[1]) == 0
864 || (TARGET_GCN5_PLUS
865 && (unsigned HOST_WIDE_INT)INTVAL(operands[1]) < 0x1000)))
866 || (AS_GLOBAL_P (INTVAL (operands[3]))
867 && (((unsigned HOST_WIDE_INT)INTVAL(operands[1]) + 0x1000) < 0x2000))"
868 {
869 addr_space_t as = INTVAL (operands[3]);
870 const char *glc = INTVAL (operands[4]) ? " glc" : "";
871
872 static char buf[200];
873 if (AS_FLAT_P (as))
874 {
875 if (TARGET_GCN5_PLUS)
876 sprintf (buf, "flat_store%%s2\t%%0, %%2 offset:%%1%s", glc);
877 else
878 sprintf (buf, "flat_store%%s2\t%%0, %%2%s", glc);
879 }
880 else if (AS_GLOBAL_P (as))
881 sprintf (buf, "global_store%%s2\t%%0, %%2, off offset:%%1%s", glc);
882 else
883 gcc_unreachable ();
884
885 return buf;
886 }
887 [(set_attr "type" "flat")
888 (set_attr "length" "12")])
889
890 (define_insn "scatter<mode>_insn_1offset_ds<exec_scatter>"
891 [(set (mem:BLK (scratch))
892 (unspec:BLK
893 [(plus:V64SI (match_operand:V64SI 0 "register_operand" "v")
894 (vec_duplicate:V64SI
895 (match_operand 1 "immediate_operand" "n")))
896 (match_operand:VEC_ALLREG_MODE 2 "register_operand" "v")
897 (match_operand 3 "immediate_operand" "n")
898 (match_operand 4 "immediate_operand" "n")]
899 UNSPEC_SCATTER))]
900 "(AS_ANY_DS_P (INTVAL (operands[3]))
901 && ((unsigned HOST_WIDE_INT)INTVAL(operands[1]) < 0x10000))"
902 {
903 addr_space_t as = INTVAL (operands[3]);
904 static char buf[200];
905 sprintf (buf, "ds_write%%b2\t%%0, %%2 offset:%%1%s",
906 (AS_GDS_P (as) ? " gds" : ""));
907 return buf;
908 }
909 [(set_attr "type" "ds")
910 (set_attr "length" "12")])
911
912 (define_insn "scatter<mode>_insn_2offsets<exec_scatter>"
913 [(set (mem:BLK (scratch))
914 (unspec:BLK
915 [(plus:V64DI
916 (plus:V64DI
917 (vec_duplicate:V64DI
918 (match_operand:DI 0 "register_operand" "Sv"))
919 (sign_extend:V64DI
920 (match_operand:V64SI 1 "register_operand" " v")))
921 (vec_duplicate:V64DI (match_operand 2 "immediate_operand"
922 " n")))
923 (match_operand:VEC_ALLREG_MODE 3 "register_operand" " v")
924 (match_operand 4 "immediate_operand" " n")
925 (match_operand 5 "immediate_operand" " n")]
926 UNSPEC_SCATTER))]
927 "(AS_GLOBAL_P (INTVAL (operands[4]))
928 && (((unsigned HOST_WIDE_INT)INTVAL(operands[2]) + 0x1000) < 0x2000))"
929 {
930 addr_space_t as = INTVAL (operands[4]);
931 const char *glc = INTVAL (operands[5]) ? " glc" : "";
932
933 static char buf[200];
934 if (AS_GLOBAL_P (as))
935 {
936 /* Work around assembler bug in which a 64-bit register is expected,
937 but a 32-bit value would be correct. */
938 int reg = REGNO (operands[1]) - FIRST_VGPR_REG;
939 sprintf (buf, "global_store%%s3\tv[%d:%d], %%3, %%0 offset:%%2%s",
940 reg, reg + 1, glc);
941 }
942 else
943 gcc_unreachable ();
944
945 return buf;
946 }
947 [(set_attr "type" "flat")
948 (set_attr "length" "12")])
949
950 ;; }}}
951 ;; {{{ Permutations
952
953 (define_insn "ds_bpermute<mode>"
954 [(set (match_operand:VEC_ALL1REG_MODE 0 "register_operand" "=v")
955 (unspec:VEC_ALL1REG_MODE
956 [(match_operand:VEC_ALL1REG_MODE 2 "register_operand" " v")
957 (match_operand:V64SI 1 "register_operand" " v")
958 (match_operand:DI 3 "gcn_exec_reg_operand" " e")]
959 UNSPEC_BPERMUTE))]
960 ""
961 "ds_bpermute_b32\t%0, %1, %2\;s_waitcnt\tlgkmcnt(0)"
962 [(set_attr "type" "vop2")
963 (set_attr "length" "12")])
964
965 (define_insn_and_split "ds_bpermute<mode>"
966 [(set (match_operand:VEC_2REG_MODE 0 "register_operand" "=&v")
967 (unspec:VEC_2REG_MODE
968 [(match_operand:VEC_2REG_MODE 2 "register_operand" " v0")
969 (match_operand:V64SI 1 "register_operand" " v")
970 (match_operand:DI 3 "gcn_exec_reg_operand" " e")]
971 UNSPEC_BPERMUTE))]
972 ""
973 "#"
974 "reload_completed"
975 [(set (match_dup 4) (unspec:V64SI [(match_dup 6) (match_dup 1) (match_dup 3)]
976 UNSPEC_BPERMUTE))
977 (set (match_dup 5) (unspec:V64SI [(match_dup 7) (match_dup 1) (match_dup 3)]
978 UNSPEC_BPERMUTE))]
979 {
980 operands[4] = gcn_operand_part (<MODE>mode, operands[0], 0);
981 operands[5] = gcn_operand_part (<MODE>mode, operands[0], 1);
982 operands[6] = gcn_operand_part (<MODE>mode, operands[2], 0);
983 operands[7] = gcn_operand_part (<MODE>mode, operands[2], 1);
984 }
985 [(set_attr "type" "vmult")
986 (set_attr "length" "24")])
987
988 ;; }}}
989 ;; {{{ ALU special case: add/sub
990
991 (define_insn "add<mode>3<exec_clobber>"
992 [(set (match_operand:VEC_ALL1REG_INT_MODE 0 "register_operand" "= v")
993 (plus:VEC_ALL1REG_INT_MODE
994 (match_operand:VEC_ALL1REG_INT_MODE 1 "register_operand" "% v")
995 (match_operand:VEC_ALL1REG_INT_MODE 2 "gcn_alu_operand" "vSvB")))
996 (clobber (reg:DI VCC_REG))]
997 ""
998 "v_add%^_u32\t%0, vcc, %2, %1"
999 [(set_attr "type" "vop2")
1000 (set_attr "length" "8")])
1001
1002 (define_insn "add<mode>3_dup<exec_clobber>"
1003 [(set (match_operand:VEC_ALL1REG_INT_MODE 0 "register_operand" "= v")
1004 (plus:VEC_ALL1REG_INT_MODE
1005 (vec_duplicate:VEC_ALL1REG_INT_MODE
1006 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand" "SvB"))
1007 (match_operand:VEC_ALL1REG_INT_MODE 1 "register_operand" " v")))
1008 (clobber (reg:DI VCC_REG))]
1009 ""
1010 "v_add%^_u32\t%0, vcc, %2, %1"
1011 [(set_attr "type" "vop2")
1012 (set_attr "length" "8")])
1013
1014 (define_insn "addv64si3_vcc<exec_vcc>"
1015 [(set (match_operand:V64SI 0 "register_operand" "= v, v")
1016 (plus:V64SI
1017 (match_operand:V64SI 1 "register_operand" "% v, v")
1018 (match_operand:V64SI 2 "gcn_alu_operand" "vSvB,vSvB")))
1019 (set (match_operand:DI 3 "register_operand" "= cV, Sg")
1020 (ltu:DI (plus:V64SI (match_dup 1) (match_dup 2))
1021 (match_dup 1)))]
1022 ""
1023 "v_add%^_u32\t%0, %3, %2, %1"
1024 [(set_attr "type" "vop2,vop3b")
1025 (set_attr "length" "8")])
1026
1027 ; This pattern only changes the VCC bits when the corresponding lane is
1028 ; enabled, so the set must be described as an ior.
1029
1030 (define_insn "addv64si3_vcc_dup<exec_vcc>"
1031 [(set (match_operand:V64SI 0 "register_operand" "= v, v")
1032 (plus:V64SI
1033 (vec_duplicate:V64SI
1034 (match_operand:SI 1 "gcn_alu_operand" "SvB,SvB"))
1035 (match_operand:V64SI 2 "register_operand" " v, v")))
1036 (set (match_operand:DI 3 "register_operand" "=cV, Sg")
1037 (ltu:DI (plus:V64SI (vec_duplicate:V64SI (match_dup 2))
1038 (match_dup 1))
1039 (vec_duplicate:V64SI (match_dup 2))))]
1040 ""
1041 "v_add%^_u32\t%0, %3, %2, %1"
1042 [(set_attr "type" "vop2,vop3b")
1043 (set_attr "length" "8,8")])
1044
1045 ; v_addc does not accept an SGPR because the VCC read already counts as an
1046 ; SGPR use and the number of SGPR operands is limited to 1. It does not
1047 ; accept "B" immediate constants due to a related bus conflict.
1048
1049 (define_insn "addcv64si3<exec_vcc>"
1050 [(set (match_operand:V64SI 0 "register_operand" "=v, v")
1051 (plus:V64SI
1052 (plus:V64SI
1053 (vec_merge:V64SI
1054 (vec_duplicate:V64SI (const_int 1))
1055 (vec_duplicate:V64SI (const_int 0))
1056 (match_operand:DI 3 "register_operand" " cV,cVSv"))
1057 (match_operand:V64SI 1 "gcn_alu_operand" "% v, vA"))
1058 (match_operand:V64SI 2 "gcn_alu_operand" " vA, vA")))
1059 (set (match_operand:DI 4 "register_operand" "=cV,cVSg")
1060 (ior:DI (ltu:DI (plus:V64SI
1061 (plus:V64SI
1062 (vec_merge:V64SI
1063 (vec_duplicate:V64SI (const_int 1))
1064 (vec_duplicate:V64SI (const_int 0))
1065 (match_dup 3))
1066 (match_dup 1))
1067 (match_dup 2))
1068 (match_dup 2))
1069 (ltu:DI (plus:V64SI
1070 (vec_merge:V64SI
1071 (vec_duplicate:V64SI (const_int 1))
1072 (vec_duplicate:V64SI (const_int 0))
1073 (match_dup 3))
1074 (match_dup 1))
1075 (match_dup 1))))]
1076 ""
1077 "v_addc%^_u32\t%0, %4, %2, %1, %3"
1078 [(set_attr "type" "vop2,vop3b")
1079 (set_attr "length" "4,8")])
1080
1081 (define_insn "sub<mode>3<exec_clobber>"
1082 [(set (match_operand:VEC_ALL1REG_INT_MODE 0 "register_operand" "= v, v")
1083 (minus:VEC_ALL1REG_INT_MODE
1084 (match_operand:VEC_ALL1REG_INT_MODE 1 "gcn_alu_operand" "vSvB, v")
1085 (match_operand:VEC_ALL1REG_INT_MODE 2 "gcn_alu_operand" " v,vSvB")))
1086 (clobber (reg:DI VCC_REG))]
1087 ""
1088 "@
1089 v_sub%^_u32\t%0, vcc, %1, %2
1090 v_subrev%^_u32\t%0, vcc, %2, %1"
1091 [(set_attr "type" "vop2")
1092 (set_attr "length" "8,8")])
1093
1094 (define_insn "subv64si3_vcc<exec_vcc>"
1095 [(set (match_operand:V64SI 0 "register_operand" "= v, v, v, v")
1096 (minus:V64SI
1097 (match_operand:V64SI 1 "gcn_alu_operand" "vSvB,vSvB, v, v")
1098 (match_operand:V64SI 2 "gcn_alu_operand" " v, v,vSvB,vSvB")))
1099 (set (match_operand:DI 3 "register_operand" "= cV, Sg, cV, Sg")
1100 (gtu:DI (minus:V64SI (match_dup 1) (match_dup 2))
1101 (match_dup 1)))]
1102 ""
1103 "@
1104 v_sub%^_u32\t%0, %3, %1, %2
1105 v_sub%^_u32\t%0, %3, %1, %2
1106 v_subrev%^_u32\t%0, %3, %2, %1
1107 v_subrev%^_u32\t%0, %3, %2, %1"
1108 [(set_attr "type" "vop2,vop3b,vop2,vop3b")
1109 (set_attr "length" "8")])
1110
1111 ; v_subb does not accept an SGPR because the VCC read already counts as an
1112 ; SGPR use and the number of SGPR operands is limited to 1. It does not
1113 ; accept "B" immediate constants due to a related bus conflict.
1114
1115 (define_insn "subcv64si3<exec_vcc>"
1116 [(set (match_operand:V64SI 0 "register_operand" "= v, v, v, v")
1117 (minus:V64SI
1118 (minus:V64SI
1119 (vec_merge:V64SI
1120 (vec_duplicate:V64SI (const_int 1))
1121 (vec_duplicate:V64SI (const_int 0))
1122 (match_operand:DI 3 "gcn_alu_operand" " cV,cVSv,cV,cVSv"))
1123 (match_operand:V64SI 1 "gcn_alu_operand" " vA, vA, v, vA"))
1124 (match_operand:V64SI 2 "gcn_alu_operand" " v, vA,vA, vA")))
1125 (set (match_operand:DI 4 "register_operand" "=cV,cVSg,cV,cVSg")
1126 (ior:DI (gtu:DI (minus:V64SI (minus:V64SI
1127 (vec_merge:V64SI
1128 (vec_duplicate:V64SI (const_int 1))
1129 (vec_duplicate:V64SI (const_int 0))
1130 (match_dup 3))
1131 (match_dup 1))
1132 (match_dup 2))
1133 (match_dup 2))
1134 (ltu:DI (minus:V64SI (vec_merge:V64SI
1135 (vec_duplicate:V64SI (const_int 1))
1136 (vec_duplicate:V64SI (const_int 0))
1137 (match_dup 3))
1138 (match_dup 1))
1139 (match_dup 1))))]
1140 ""
1141 "@
1142 v_subb%^_u32\t%0, %4, %1, %2, %3
1143 v_subb%^_u32\t%0, %4, %1, %2, %3
1144 v_subbrev%^_u32\t%0, %4, %2, %1, %3
1145 v_subbrev%^_u32\t%0, %4, %2, %1, %3"
1146 [(set_attr "type" "vop2,vop3b,vop2,vop3b")
1147 (set_attr "length" "4,8,4,8")])
1148
1149 (define_insn_and_split "addv64di3"
1150 [(set (match_operand:V64DI 0 "register_operand" "= v")
1151 (plus:V64DI
1152 (match_operand:V64DI 1 "register_operand" "%vDb")
1153 (match_operand:V64DI 2 "gcn_alu_operand" " vDb")))
1154 (clobber (reg:DI VCC_REG))]
1155 ""
1156 "#"
1157 "gcn_can_split_p (V64DImode, operands[0])
1158 && gcn_can_split_p (V64DImode, operands[1])
1159 && gcn_can_split_p (V64DImode, operands[2])"
1160 [(const_int 0)]
1161 {
1162 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1163 emit_insn (gen_addv64si3_vcc
1164 (gcn_operand_part (V64DImode, operands[0], 0),
1165 gcn_operand_part (V64DImode, operands[1], 0),
1166 gcn_operand_part (V64DImode, operands[2], 0),
1167 vcc));
1168 emit_insn (gen_addcv64si3
1169 (gcn_operand_part (V64DImode, operands[0], 1),
1170 gcn_operand_part (V64DImode, operands[1], 1),
1171 gcn_operand_part (V64DImode, operands[2], 1),
1172 vcc, vcc));
1173 DONE;
1174 }
1175 [(set_attr "type" "vmult")
1176 (set_attr "length" "8")])
1177
1178 (define_insn_and_split "addv64di3_exec"
1179 [(set (match_operand:V64DI 0 "register_operand" "= v")
1180 (vec_merge:V64DI
1181 (plus:V64DI
1182 (match_operand:V64DI 1 "register_operand" "%vDb")
1183 (match_operand:V64DI 2 "gcn_alu_operand" " vDb"))
1184 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
1185 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1186 (clobber (reg:DI VCC_REG))]
1187 ""
1188 "#"
1189 "gcn_can_split_p (V64DImode, operands[0])
1190 && gcn_can_split_p (V64DImode, operands[1])
1191 && gcn_can_split_p (V64DImode, operands[2])
1192 && gcn_can_split_p (V64DImode, operands[4])"
1193 [(const_int 0)]
1194 {
1195 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1196 emit_insn (gen_addv64si3_vcc_exec
1197 (gcn_operand_part (V64DImode, operands[0], 0),
1198 gcn_operand_part (V64DImode, operands[1], 0),
1199 gcn_operand_part (V64DImode, operands[2], 0),
1200 vcc,
1201 gcn_operand_part (V64DImode, operands[3], 0),
1202 operands[4]));
1203 emit_insn (gen_addcv64si3_exec
1204 (gcn_operand_part (V64DImode, operands[0], 1),
1205 gcn_operand_part (V64DImode, operands[1], 1),
1206 gcn_operand_part (V64DImode, operands[2], 1),
1207 vcc, vcc,
1208 gcn_operand_part (V64DImode, operands[3], 1),
1209 operands[4]));
1210 DONE;
1211 }
1212 [(set_attr "type" "vmult")
1213 (set_attr "length" "8")])
1214
1215 (define_insn_and_split "subv64di3"
1216 [(set (match_operand:V64DI 0 "register_operand" "= v, v")
1217 (minus:V64DI
1218 (match_operand:V64DI 1 "gcn_alu_operand" "vDb, v")
1219 (match_operand:V64DI 2 "gcn_alu_operand" " v,vDb")))
1220 (clobber (reg:DI VCC_REG))]
1221 ""
1222 "#"
1223 "gcn_can_split_p (V64DImode, operands[0])
1224 && gcn_can_split_p (V64DImode, operands[1])
1225 && gcn_can_split_p (V64DImode, operands[2])"
1226 [(const_int 0)]
1227 {
1228 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1229 emit_insn (gen_subv64si3_vcc
1230 (gcn_operand_part (V64DImode, operands[0], 0),
1231 gcn_operand_part (V64DImode, operands[1], 0),
1232 gcn_operand_part (V64DImode, operands[2], 0),
1233 vcc));
1234 emit_insn (gen_subcv64si3
1235 (gcn_operand_part (V64DImode, operands[0], 1),
1236 gcn_operand_part (V64DImode, operands[1], 1),
1237 gcn_operand_part (V64DImode, operands[2], 1),
1238 vcc, vcc));
1239 DONE;
1240 }
1241 [(set_attr "type" "vmult")
1242 (set_attr "length" "8")])
1243
1244 (define_insn_and_split "subv64di3_exec"
1245 [(set (match_operand:V64DI 0 "register_operand" "= v, v")
1246 (vec_merge:V64DI
1247 (minus:V64DI
1248 (match_operand:V64DI 1 "gcn_alu_operand" "vSvB, v")
1249 (match_operand:V64DI 2 "gcn_alu_operand" " v,vSvB"))
1250 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0, U0")
1251 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))
1252 (clobber (reg:DI VCC_REG))]
1253 "register_operand (operands[1], VOIDmode)
1254 || register_operand (operands[2], VOIDmode)"
1255 "#"
1256 "gcn_can_split_p (V64DImode, operands[0])
1257 && gcn_can_split_p (V64DImode, operands[1])
1258 && gcn_can_split_p (V64DImode, operands[2])
1259 && gcn_can_split_p (V64DImode, operands[3])"
1260 [(const_int 0)]
1261 {
1262 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1263 emit_insn (gen_subv64si3_vcc_exec
1264 (gcn_operand_part (V64DImode, operands[0], 0),
1265 gcn_operand_part (V64DImode, operands[1], 0),
1266 gcn_operand_part (V64DImode, operands[2], 0),
1267 vcc,
1268 gcn_operand_part (V64DImode, operands[3], 0),
1269 operands[4]));
1270 emit_insn (gen_subcv64si3_exec
1271 (gcn_operand_part (V64DImode, operands[0], 1),
1272 gcn_operand_part (V64DImode, operands[1], 1),
1273 gcn_operand_part (V64DImode, operands[2], 1),
1274 vcc, vcc,
1275 gcn_operand_part (V64DImode, operands[3], 1),
1276 operands[4]));
1277 DONE;
1278 }
1279 [(set_attr "type" "vmult")
1280 (set_attr "length" "8")])
1281
1282 (define_insn_and_split "addv64di3_zext"
1283 [(set (match_operand:V64DI 0 "register_operand" "= v, v")
1284 (plus:V64DI
1285 (zero_extend:V64DI
1286 (match_operand:V64SI 1 "gcn_alu_operand" " vA, vB"))
1287 (match_operand:V64DI 2 "gcn_alu_operand" "vDb,vDA")))
1288 (clobber (reg:DI VCC_REG))]
1289 ""
1290 "#"
1291 "gcn_can_split_p (V64DImode, operands[0])
1292 && gcn_can_split_p (V64DImode, operands[2])"
1293 [(const_int 0)]
1294 {
1295 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1296 emit_insn (gen_addv64si3_vcc
1297 (gcn_operand_part (V64DImode, operands[0], 0),
1298 operands[1],
1299 gcn_operand_part (V64DImode, operands[2], 0),
1300 vcc));
1301 emit_insn (gen_addcv64si3
1302 (gcn_operand_part (V64DImode, operands[0], 1),
1303 gcn_operand_part (V64DImode, operands[2], 1),
1304 const0_rtx, vcc, vcc));
1305 DONE;
1306 }
1307 [(set_attr "type" "vmult")
1308 (set_attr "length" "8")])
1309
1310 (define_insn_and_split "addv64di3_zext_exec"
1311 [(set (match_operand:V64DI 0 "register_operand" "= v, v")
1312 (vec_merge:V64DI
1313 (plus:V64DI
1314 (zero_extend:V64DI
1315 (match_operand:V64SI 1 "gcn_alu_operand" " vA, vB"))
1316 (match_operand:V64DI 2 "gcn_alu_operand" "vDb,vDA"))
1317 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0, U0")
1318 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))
1319 (clobber (reg:DI VCC_REG))]
1320 ""
1321 "#"
1322 "gcn_can_split_p (V64DImode, operands[0])
1323 && gcn_can_split_p (V64DImode, operands[2])
1324 && gcn_can_split_p (V64DImode, operands[3])"
1325 [(const_int 0)]
1326 {
1327 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1328 emit_insn (gen_addv64si3_vcc_exec
1329 (gcn_operand_part (V64DImode, operands[0], 0),
1330 operands[1],
1331 gcn_operand_part (V64DImode, operands[2], 0),
1332 vcc,
1333 gcn_operand_part (V64DImode, operands[3], 0),
1334 operands[4]));
1335 emit_insn (gen_addcv64si3_exec
1336 (gcn_operand_part (V64DImode, operands[0], 1),
1337 gcn_operand_part (V64DImode, operands[2], 1),
1338 const0_rtx, vcc, vcc,
1339 gcn_operand_part (V64DImode, operands[3], 1),
1340 operands[4]));
1341 DONE;
1342 }
1343 [(set_attr "type" "vmult")
1344 (set_attr "length" "8")])
1345
1346 (define_insn_and_split "addv64di3_zext_dup"
1347 [(set (match_operand:V64DI 0 "register_operand" "= v, v")
1348 (plus:V64DI
1349 (zero_extend:V64DI
1350 (vec_duplicate:V64SI
1351 (match_operand:SI 1 "gcn_alu_operand" "BSv,ASv")))
1352 (match_operand:V64DI 2 "gcn_alu_operand" "vDA,vDb")))
1353 (clobber (reg:DI VCC_REG))]
1354 ""
1355 "#"
1356 "gcn_can_split_p (V64DImode, operands[0])
1357 && gcn_can_split_p (V64DImode, operands[2])"
1358 [(const_int 0)]
1359 {
1360 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1361 emit_insn (gen_addv64si3_vcc_dup
1362 (gcn_operand_part (V64DImode, operands[0], 0),
1363 gcn_operand_part (DImode, operands[1], 0),
1364 gcn_operand_part (V64DImode, operands[2], 0),
1365 vcc));
1366 emit_insn (gen_addcv64si3
1367 (gcn_operand_part (V64DImode, operands[0], 1),
1368 gcn_operand_part (V64DImode, operands[2], 1),
1369 const0_rtx, vcc, vcc));
1370 DONE;
1371 }
1372 [(set_attr "type" "vmult")
1373 (set_attr "length" "8")])
1374
1375 (define_insn_and_split "addv64di3_zext_dup_exec"
1376 [(set (match_operand:V64DI 0 "register_operand" "= v, v")
1377 (vec_merge:V64DI
1378 (plus:V64DI
1379 (zero_extend:V64DI
1380 (vec_duplicate:V64SI
1381 (match_operand:SI 1 "gcn_alu_operand" "ASv,BSv")))
1382 (match_operand:V64DI 2 "gcn_alu_operand" "vDb,vDA"))
1383 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0, U0")
1384 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))
1385 (clobber (reg:DI VCC_REG))]
1386 ""
1387 "#"
1388 "gcn_can_split_p (V64DImode, operands[0])
1389 && gcn_can_split_p (V64DImode, operands[2])
1390 && gcn_can_split_p (V64DImode, operands[3])"
1391 [(const_int 0)]
1392 {
1393 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1394 emit_insn (gen_addv64si3_vcc_dup_exec
1395 (gcn_operand_part (V64DImode, operands[0], 0),
1396 gcn_operand_part (DImode, operands[1], 0),
1397 gcn_operand_part (V64DImode, operands[2], 0),
1398 vcc,
1399 gcn_operand_part (V64DImode, operands[3], 0),
1400 operands[4]));
1401 emit_insn (gen_addcv64si3_exec
1402 (gcn_operand_part (V64DImode, operands[0], 1),
1403 gcn_operand_part (V64DImode, operands[2], 1),
1404 const0_rtx, vcc, vcc,
1405 gcn_operand_part (V64DImode, operands[3], 1),
1406 operands[4]));
1407 DONE;
1408 }
1409 [(set_attr "type" "vmult")
1410 (set_attr "length" "8")])
1411
1412 (define_insn_and_split "addv64di3_zext_dup2"
1413 [(set (match_operand:V64DI 0 "register_operand" "= v")
1414 (plus:V64DI
1415 (zero_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand" " vA"))
1416 (vec_duplicate:V64DI (match_operand:DI 2 "gcn_alu_operand" "DbSv"))))
1417 (clobber (reg:DI VCC_REG))]
1418 ""
1419 "#"
1420 "gcn_can_split_p (V64DImode, operands[0])"
1421 [(const_int 0)]
1422 {
1423 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1424 emit_insn (gen_addv64si3_vcc_dup
1425 (gcn_operand_part (V64DImode, operands[0], 0),
1426 gcn_operand_part (DImode, operands[2], 0),
1427 operands[1],
1428 vcc));
1429 rtx dsthi = gcn_operand_part (V64DImode, operands[0], 1);
1430 emit_insn (gen_vec_duplicatev64si
1431 (dsthi, gcn_operand_part (DImode, operands[2], 1)));
1432 emit_insn (gen_addcv64si3 (dsthi, dsthi, const0_rtx, vcc, vcc));
1433 DONE;
1434 }
1435 [(set_attr "type" "vmult")
1436 (set_attr "length" "8")])
1437
1438 (define_insn_and_split "addv64di3_zext_dup2_exec"
1439 [(set (match_operand:V64DI 0 "register_operand" "= v")
1440 (vec_merge:V64DI
1441 (plus:V64DI
1442 (zero_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand"
1443 " vA"))
1444 (vec_duplicate:V64DI (match_operand:DI 2 "gcn_alu_operand" "BSv")))
1445 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
1446 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1447 (clobber (reg:DI VCC_REG))]
1448 ""
1449 "#"
1450 "gcn_can_split_p (V64DImode, operands[0])
1451 && gcn_can_split_p (V64DImode, operands[3])"
1452 [(const_int 0)]
1453 {
1454 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1455 emit_insn (gen_addv64si3_vcc_dup_exec
1456 (gcn_operand_part (V64DImode, operands[0], 0),
1457 gcn_operand_part (DImode, operands[2], 0),
1458 operands[1],
1459 vcc,
1460 gcn_operand_part (V64DImode, operands[3], 0),
1461 operands[4]));
1462 rtx dsthi = gcn_operand_part (V64DImode, operands[0], 1);
1463 emit_insn (gen_vec_duplicatev64si_exec
1464 (dsthi, gcn_operand_part (DImode, operands[2], 1),
1465 gcn_gen_undef (V64SImode), operands[4]));
1466 emit_insn (gen_addcv64si3_exec
1467 (dsthi, dsthi, const0_rtx, vcc, vcc,
1468 gcn_operand_part (V64DImode, operands[3], 1),
1469 operands[4]));
1470 DONE;
1471 }
1472 [(set_attr "type" "vmult")
1473 (set_attr "length" "8")])
1474
1475 (define_insn_and_split "addv64di3_sext_dup2"
1476 [(set (match_operand:V64DI 0 "register_operand" "= v")
1477 (plus:V64DI
1478 (sign_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand" " vA"))
1479 (vec_duplicate:V64DI (match_operand:DI 2 "gcn_alu_operand" "BSv"))))
1480 (clobber (match_scratch:V64SI 3 "=&v"))
1481 (clobber (reg:DI VCC_REG))]
1482 ""
1483 "#"
1484 "gcn_can_split_p (V64DImode, operands[0])"
1485 [(const_int 0)]
1486 {
1487 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1488 emit_insn (gen_ashrv64si3 (operands[3], operands[1], GEN_INT (31)));
1489 emit_insn (gen_addv64si3_vcc_dup
1490 (gcn_operand_part (V64DImode, operands[0], 0),
1491 gcn_operand_part (DImode, operands[2], 0),
1492 operands[1],
1493 vcc));
1494 rtx dsthi = gcn_operand_part (V64DImode, operands[0], 1);
1495 emit_insn (gen_vec_duplicatev64si
1496 (dsthi, gcn_operand_part (DImode, operands[2], 1)));
1497 emit_insn (gen_addcv64si3 (dsthi, dsthi, operands[3], vcc, vcc));
1498 DONE;
1499 }
1500 [(set_attr "type" "vmult")
1501 (set_attr "length" "8")])
1502
1503 (define_insn_and_split "addv64di3_sext_dup2_exec"
1504 [(set (match_operand:V64DI 0 "register_operand" "= v")
1505 (vec_merge:V64DI
1506 (plus:V64DI
1507 (sign_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand"
1508 " vA"))
1509 (vec_duplicate:V64DI (match_operand:DI 2 "gcn_alu_operand" "BSv")))
1510 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
1511 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1512 (clobber (match_scratch:V64SI 5 "=&v"))
1513 (clobber (reg:DI VCC_REG))]
1514 ""
1515 "#"
1516 "gcn_can_split_p (V64DImode, operands[0])
1517 && gcn_can_split_p (V64DImode, operands[3])"
1518 [(const_int 0)]
1519 {
1520 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1521 emit_insn (gen_ashrv64si3_exec (operands[5], operands[1], GEN_INT (31),
1522 gcn_gen_undef (V64SImode), operands[4]));
1523 emit_insn (gen_addv64si3_vcc_dup_exec
1524 (gcn_operand_part (V64DImode, operands[0], 0),
1525 gcn_operand_part (DImode, operands[2], 0),
1526 operands[1],
1527 vcc,
1528 gcn_operand_part (V64DImode, operands[3], 0),
1529 operands[4]));
1530 rtx dsthi = gcn_operand_part (V64DImode, operands[0], 1);
1531 emit_insn (gen_vec_duplicatev64si_exec
1532 (dsthi, gcn_operand_part (DImode, operands[2], 1),
1533 gcn_gen_undef (V64SImode), operands[4]));
1534 emit_insn (gen_addcv64si3_exec
1535 (dsthi, dsthi, operands[5], vcc, vcc,
1536 gcn_operand_part (V64DImode, operands[3], 1),
1537 operands[4]));
1538 DONE;
1539 }
1540 [(set_attr "type" "vmult")
1541 (set_attr "length" "8")])
1542
1543 ;; }}}
1544 ;; {{{ DS memory ALU: add/sub
1545
1546 (define_mode_iterator DS_ARITH_MODE [V64SI V64SF V64DI])
1547 (define_mode_iterator DS_ARITH_SCALAR_MODE [SI SF DI])
1548
1549 ;; FIXME: the vector patterns probably need RD expanded to a vector of
1550 ;; addresses. For now, the only way a vector can get into LDS is
1551 ;; if the user puts it there manually.
1552 ;;
1553 ;; FIXME: the scalar patterns are probably fine in themselves, but need to be
1554 ;; checked to see if anything can ever use them.
1555
1556 (define_insn "add<mode>3_ds<exec>"
1557 [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD")
1558 (plus:DS_ARITH_MODE
1559 (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" "%RD")
1560 (match_operand:DS_ARITH_MODE 2 "register_operand" " v")))]
1561 "rtx_equal_p (operands[0], operands[1])"
1562 "ds_add%u0\t%A0, %2%O0"
1563 [(set_attr "type" "ds")
1564 (set_attr "length" "8")])
1565
1566 (define_insn "add<mode>3_ds_scalar"
1567 [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
1568 (plus:DS_ARITH_SCALAR_MODE
1569 (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand"
1570 "%RD")
1571 (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")))]
1572 "rtx_equal_p (operands[0], operands[1])"
1573 "ds_add%u0\t%A0, %2%O0"
1574 [(set_attr "type" "ds")
1575 (set_attr "length" "8")])
1576
1577 (define_insn "sub<mode>3_ds<exec>"
1578 [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD")
1579 (minus:DS_ARITH_MODE
1580 (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" " RD")
1581 (match_operand:DS_ARITH_MODE 2 "register_operand" " v")))]
1582 "rtx_equal_p (operands[0], operands[1])"
1583 "ds_sub%u0\t%A0, %2%O0"
1584 [(set_attr "type" "ds")
1585 (set_attr "length" "8")])
1586
1587 (define_insn "sub<mode>3_ds_scalar"
1588 [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
1589 (minus:DS_ARITH_SCALAR_MODE
1590 (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand"
1591 " RD")
1592 (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")))]
1593 "rtx_equal_p (operands[0], operands[1])"
1594 "ds_sub%u0\t%A0, %2%O0"
1595 [(set_attr "type" "ds")
1596 (set_attr "length" "8")])
1597
1598 (define_insn "subr<mode>3_ds<exec>"
1599 [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD")
1600 (minus:DS_ARITH_MODE
1601 (match_operand:DS_ARITH_MODE 2 "register_operand" " v")
1602 (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" " RD")))]
1603 "rtx_equal_p (operands[0], operands[1])"
1604 "ds_rsub%u0\t%A0, %2%O0"
1605 [(set_attr "type" "ds")
1606 (set_attr "length" "8")])
1607
1608 (define_insn "subr<mode>3_ds_scalar"
1609 [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
1610 (minus:DS_ARITH_SCALAR_MODE
1611 (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")
1612 (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand"
1613 " RD")))]
1614 "rtx_equal_p (operands[0], operands[1])"
1615 "ds_rsub%u0\t%A0, %2%O0"
1616 [(set_attr "type" "ds")
1617 (set_attr "length" "8")])
1618
1619 ;; }}}
1620 ;; {{{ ALU special case: mult
1621
1622 (define_insn "<su>mulv64si3_highpart<exec>"
1623 [(set (match_operand:V64SI 0 "register_operand" "= v")
1624 (truncate:V64SI
1625 (lshiftrt:V64DI
1626 (mult:V64DI
1627 (any_extend:V64DI
1628 (match_operand:V64SI 1 "gcn_alu_operand" " %v"))
1629 (any_extend:V64DI
1630 (match_operand:V64SI 2 "gcn_alu_operand" "vSvA")))
1631 (const_int 32))))]
1632 ""
1633 "v_mul_hi<sgnsuffix>0\t%0, %2, %1"
1634 [(set_attr "type" "vop3a")
1635 (set_attr "length" "8")])
1636
1637 (define_insn "mul<mode>3<exec>"
1638 [(set (match_operand:VEC_ALL1REG_INT_MODE 0 "register_operand" "= v")
1639 (mult:VEC_ALL1REG_INT_MODE
1640 (match_operand:VEC_ALL1REG_INT_MODE 1 "gcn_alu_operand" "%vSvA")
1641 (match_operand:VEC_ALL1REG_INT_MODE 2 "gcn_alu_operand" " vSvA")))]
1642 ""
1643 "v_mul_lo_u32\t%0, %1, %2"
1644 [(set_attr "type" "vop3a")
1645 (set_attr "length" "8")])
1646
1647 (define_insn "mul<mode>3_dup<exec>"
1648 [(set (match_operand:VEC_ALL1REG_INT_MODE 0 "register_operand" "= v")
1649 (mult:VEC_ALL1REG_INT_MODE
1650 (match_operand:VEC_ALL1REG_INT_MODE 1 "gcn_alu_operand" "%vSvA")
1651 (vec_duplicate:VEC_ALL1REG_INT_MODE
1652 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand" " SvA"))))]
1653 ""
1654 "v_mul_lo_u32\t%0, %1, %2"
1655 [(set_attr "type" "vop3a")
1656 (set_attr "length" "8")])
1657
1658 (define_insn_and_split "mulv64di3"
1659 [(set (match_operand:V64DI 0 "register_operand" "=&v")
1660 (mult:V64DI
1661 (match_operand:V64DI 1 "gcn_alu_operand" "% v")
1662 (match_operand:V64DI 2 "gcn_alu_operand" "vDA")))
1663 (clobber (match_scratch:V64SI 3 "=&v"))]
1664 ""
1665 "#"
1666 "reload_completed"
1667 [(const_int 0)]
1668 {
1669 rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
1670 rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
1671 rtx left_lo = gcn_operand_part (V64DImode, operands[1], 0);
1672 rtx left_hi = gcn_operand_part (V64DImode, operands[1], 1);
1673 rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
1674 rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
1675 rtx tmp = operands[3];
1676
1677 emit_insn (gen_mulv64si3 (out_lo, left_lo, right_lo));
1678 emit_insn (gen_umulv64si3_highpart (out_hi, left_lo, right_lo));
1679 emit_insn (gen_mulv64si3 (tmp, left_hi, right_lo));
1680 emit_insn (gen_addv64si3 (out_hi, out_hi, tmp));
1681 emit_insn (gen_mulv64si3 (tmp, left_lo, right_hi));
1682 emit_insn (gen_addv64si3 (out_hi, out_hi, tmp));
1683 emit_insn (gen_mulv64si3 (tmp, left_hi, right_hi));
1684 emit_insn (gen_addv64si3 (out_hi, out_hi, tmp));
1685 DONE;
1686 })
1687
1688 (define_insn_and_split "mulv64di3_exec"
1689 [(set (match_operand:V64DI 0 "register_operand" "=&v")
1690 (vec_merge:V64DI
1691 (mult:V64DI
1692 (match_operand:V64DI 1 "gcn_alu_operand" "% v")
1693 (match_operand:V64DI 2 "gcn_alu_operand" "vDA"))
1694 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
1695 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1696 (clobber (match_scratch:V64SI 5 "=&v"))]
1697 ""
1698 "#"
1699 "reload_completed"
1700 [(const_int 0)]
1701 {
1702 rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
1703 rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
1704 rtx left_lo = gcn_operand_part (V64DImode, operands[1], 0);
1705 rtx left_hi = gcn_operand_part (V64DImode, operands[1], 1);
1706 rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
1707 rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
1708 rtx exec = operands[4];
1709 rtx tmp = operands[5];
1710
1711 rtx old_lo, old_hi;
1712 if (GET_CODE (operands[3]) == UNSPEC)
1713 {
1714 old_lo = old_hi = gcn_gen_undef (V64SImode);
1715 }
1716 else
1717 {
1718 old_lo = gcn_operand_part (V64DImode, operands[3], 0);
1719 old_hi = gcn_operand_part (V64DImode, operands[3], 1);
1720 }
1721
1722 rtx undef = gcn_gen_undef (V64SImode);
1723
1724 emit_insn (gen_mulv64si3_exec (out_lo, left_lo, right_lo, old_lo, exec));
1725 emit_insn (gen_umulv64si3_highpart_exec (out_hi, left_lo, right_lo,
1726 old_hi, exec));
1727 emit_insn (gen_mulv64si3_exec (tmp, left_hi, right_lo, undef, exec));
1728 emit_insn (gen_addv64si3_exec (out_hi, out_hi, tmp, out_hi, exec));
1729 emit_insn (gen_mulv64si3_exec (tmp, left_lo, right_hi, undef, exec));
1730 emit_insn (gen_addv64si3_exec (out_hi, out_hi, tmp, out_hi, exec));
1731 emit_insn (gen_mulv64si3_exec (tmp, left_hi, right_hi, undef, exec));
1732 emit_insn (gen_addv64si3_exec (out_hi, out_hi, tmp, out_hi, exec));
1733 DONE;
1734 })
1735
1736 (define_insn_and_split "mulv64di3_zext"
1737 [(set (match_operand:V64DI 0 "register_operand" "=&v")
1738 (mult:V64DI
1739 (zero_extend:V64DI
1740 (match_operand:V64SI 1 "gcn_alu_operand" " v"))
1741 (match_operand:V64DI 2 "gcn_alu_operand" "vDA")))
1742 (clobber (match_scratch:V64SI 3 "=&v"))]
1743 ""
1744 "#"
1745 "reload_completed"
1746 [(const_int 0)]
1747 {
1748 rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
1749 rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
1750 rtx left = operands[1];
1751 rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
1752 rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
1753 rtx tmp = operands[3];
1754
1755 emit_insn (gen_mulv64si3 (out_lo, left, right_lo));
1756 emit_insn (gen_umulv64si3_highpart (out_hi, left, right_lo));
1757 emit_insn (gen_mulv64si3 (tmp, left, right_hi));
1758 emit_insn (gen_addv64si3 (out_hi, out_hi, tmp));
1759 DONE;
1760 })
1761
1762 (define_insn_and_split "mulv64di3_zext_exec"
1763 [(set (match_operand:V64DI 0 "register_operand" "=&v")
1764 (vec_merge:V64DI
1765 (mult:V64DI
1766 (zero_extend:V64DI
1767 (match_operand:V64SI 1 "gcn_alu_operand" " v"))
1768 (match_operand:V64DI 2 "gcn_alu_operand" "vDA"))
1769 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
1770 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1771 (clobber (match_scratch:V64SI 5 "=&v"))]
1772 ""
1773 "#"
1774 "reload_completed"
1775 [(const_int 0)]
1776 {
1777 rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
1778 rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
1779 rtx left = operands[1];
1780 rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
1781 rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
1782 rtx exec = operands[4];
1783 rtx tmp = operands[5];
1784
1785 rtx old_lo, old_hi;
1786 if (GET_CODE (operands[3]) == UNSPEC)
1787 {
1788 old_lo = old_hi = gcn_gen_undef (V64SImode);
1789 }
1790 else
1791 {
1792 old_lo = gcn_operand_part (V64DImode, operands[3], 0);
1793 old_hi = gcn_operand_part (V64DImode, operands[3], 1);
1794 }
1795
1796 rtx undef = gcn_gen_undef (V64SImode);
1797
1798 emit_insn (gen_mulv64si3_exec (out_lo, left, right_lo, old_lo, exec));
1799 emit_insn (gen_umulv64si3_highpart_exec (out_hi, left, right_lo,
1800 old_hi, exec));
1801 emit_insn (gen_mulv64si3_exec (tmp, left, right_hi, undef, exec));
1802 emit_insn (gen_addv64si3_exec (out_hi, out_hi, tmp, out_hi, exec));
1803 DONE;
1804 })
1805
1806 (define_insn_and_split "mulv64di3_zext_dup2"
1807 [(set (match_operand:V64DI 0 "register_operand" "= &v")
1808 (mult:V64DI
1809 (zero_extend:V64DI
1810 (match_operand:V64SI 1 "gcn_alu_operand" " v"))
1811 (vec_duplicate:V64DI
1812 (match_operand:DI 2 "gcn_alu_operand" "SvDA"))))
1813 (clobber (match_scratch:V64SI 3 "= &v"))]
1814 ""
1815 "#"
1816 "reload_completed"
1817 [(const_int 0)]
1818 {
1819 rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
1820 rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
1821 rtx left = operands[1];
1822 rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
1823 rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
1824 rtx tmp = operands[3];
1825
1826 emit_insn (gen_mulv64si3 (out_lo, left, right_lo));
1827 emit_insn (gen_umulv64si3_highpart (out_hi, left, right_lo));
1828 emit_insn (gen_mulv64si3 (tmp, left, right_hi));
1829 emit_insn (gen_addv64si3 (out_hi, out_hi, tmp));
1830 DONE;
1831 })
1832
1833 (define_insn_and_split "mulv64di3_zext_dup2_exec"
1834 [(set (match_operand:V64DI 0 "register_operand" "= &v")
1835 (vec_merge:V64DI
1836 (mult:V64DI
1837 (zero_extend:V64DI
1838 (match_operand:V64SI 1 "gcn_alu_operand" " v"))
1839 (vec_duplicate:V64DI
1840 (match_operand:DI 2 "gcn_alu_operand" "SvDA")))
1841 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
1842 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1843 (clobber (match_scratch:V64SI 5 "= &v"))]
1844 ""
1845 "#"
1846 "reload_completed"
1847 [(const_int 0)]
1848 {
1849 rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
1850 rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
1851 rtx left = operands[1];
1852 rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
1853 rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
1854 rtx exec = operands[4];
1855 rtx tmp = operands[5];
1856
1857 rtx old_lo, old_hi;
1858 if (GET_CODE (operands[3]) == UNSPEC)
1859 {
1860 old_lo = old_hi = gcn_gen_undef (V64SImode);
1861 }
1862 else
1863 {
1864 old_lo = gcn_operand_part (V64DImode, operands[3], 0);
1865 old_hi = gcn_operand_part (V64DImode, operands[3], 1);
1866 }
1867
1868 rtx undef = gcn_gen_undef (V64SImode);
1869
1870 emit_insn (gen_mulv64si3_exec (out_lo, left, right_lo, old_lo, exec));
1871 emit_insn (gen_umulv64si3_highpart_exec (out_hi, left, right_lo,
1872 old_hi, exec));
1873 emit_insn (gen_mulv64si3_exec (tmp, left, right_hi, undef, exec));
1874 emit_insn (gen_addv64si3_exec (out_hi, out_hi, tmp, out_hi, exec));
1875 DONE;
1876 })
1877
1878 ;; }}}
1879 ;; {{{ ALU generic case
1880
1881 (define_mode_iterator VEC_INT_MODE [V64SI V64DI])
1882
1883 (define_code_iterator bitop [and ior xor])
1884 (define_code_iterator shiftop [ashift lshiftrt ashiftrt])
1885 (define_code_iterator minmaxop [smin smax umin umax])
1886
1887 (define_insn "<expander><mode>2<exec>"
1888 [(set (match_operand:VEC_ALL1REG_INT_MODE 0 "gcn_valu_dst_operand" "= v")
1889 (bitunop:VEC_ALL1REG_INT_MODE
1890 (match_operand:VEC_ALL1REG_INT_MODE 1 "gcn_valu_src0_operand" "vSvB")))]
1891 ""
1892 "v_<mnemonic>0\t%0, %1"
1893 [(set_attr "type" "vop1")
1894 (set_attr "length" "8")])
1895
1896 (define_insn "<expander><mode>3<exec>"
1897 [(set (match_operand:VEC_ALL1REG_INT_MODE 0 "gcn_valu_dst_operand" "= v,RD")
1898 (bitop:VEC_ALL1REG_INT_MODE
1899 (match_operand:VEC_ALL1REG_INT_MODE 1 "gcn_valu_src0_operand"
1900 "% v, 0")
1901 (match_operand:VEC_ALL1REG_INT_MODE 2 "gcn_valu_src1com_operand"
1902 "vSvB, v")))]
1903 ""
1904 "@
1905 v_<mnemonic>0\t%0, %2, %1
1906 ds_<mnemonic>0\t%A0, %2%O0"
1907 [(set_attr "type" "vop2,ds")
1908 (set_attr "length" "8,8")])
1909
1910 (define_insn_and_split "<expander>v64di3"
1911 [(set (match_operand:V64DI 0 "gcn_valu_dst_operand" "= v,RD")
1912 (bitop:V64DI
1913 (match_operand:V64DI 1 "gcn_valu_src0_operand" "% v,RD")
1914 (match_operand:V64DI 2 "gcn_valu_src1com_operand" "vSvB, v")))]
1915 ""
1916 "@
1917 #
1918 ds_<mnemonic>0\t%A0, %2%O0"
1919 "(reload_completed && !gcn_ds_memory_operand (operands[0], V64DImode))"
1920 [(set (match_dup 3)
1921 (bitop:V64SI (match_dup 5) (match_dup 7)))
1922 (set (match_dup 4)
1923 (bitop:V64SI (match_dup 6) (match_dup 8)))]
1924 {
1925 operands[3] = gcn_operand_part (V64DImode, operands[0], 0);
1926 operands[4] = gcn_operand_part (V64DImode, operands[0], 1);
1927 operands[5] = gcn_operand_part (V64DImode, operands[1], 0);
1928 operands[6] = gcn_operand_part (V64DImode, operands[1], 1);
1929 operands[7] = gcn_operand_part (V64DImode, operands[2], 0);
1930 operands[8] = gcn_operand_part (V64DImode, operands[2], 1);
1931 }
1932 [(set_attr "type" "vmult,ds")
1933 (set_attr "length" "16,8")])
1934
1935 (define_insn_and_split "<expander>v64di3_exec"
1936 [(set (match_operand:V64DI 0 "gcn_valu_dst_operand" "= v,RD")
1937 (vec_merge:V64DI
1938 (bitop:V64DI
1939 (match_operand:V64DI 1 "gcn_valu_src0_operand" "% v,RD")
1940 (match_operand:V64DI 2 "gcn_valu_src1com_operand" "vSvB, v"))
1941 (match_operand:V64DI 3 "gcn_register_ds_or_unspec_operand"
1942 " U0,U0")
1943 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))]
1944 "!memory_operand (operands[0], VOIDmode)
1945 || (rtx_equal_p (operands[0], operands[1])
1946 && register_operand (operands[2], VOIDmode))"
1947 "@
1948 #
1949 ds_<mnemonic>0\t%A0, %2%O0"
1950 "(reload_completed && !gcn_ds_memory_operand (operands[0], V64DImode))"
1951 [(set (match_dup 5)
1952 (vec_merge:V64SI
1953 (bitop:V64SI (match_dup 7) (match_dup 9))
1954 (match_dup 11)
1955 (match_dup 4)))
1956 (set (match_dup 6)
1957 (vec_merge:V64SI
1958 (bitop:V64SI (match_dup 8) (match_dup 10))
1959 (match_dup 12)
1960 (match_dup 4)))]
1961 {
1962 operands[5] = gcn_operand_part (V64DImode, operands[0], 0);
1963 operands[6] = gcn_operand_part (V64DImode, operands[0], 1);
1964 operands[7] = gcn_operand_part (V64DImode, operands[1], 0);
1965 operands[8] = gcn_operand_part (V64DImode, operands[1], 1);
1966 operands[9] = gcn_operand_part (V64DImode, operands[2], 0);
1967 operands[10] = gcn_operand_part (V64DImode, operands[2], 1);
1968 operands[11] = gcn_operand_part (V64DImode, operands[3], 0);
1969 operands[12] = gcn_operand_part (V64DImode, operands[3], 1);
1970 }
1971 [(set_attr "type" "vmult,ds")
1972 (set_attr "length" "16,8")])
1973
1974 (define_expand "<expander><mode>3"
1975 [(set (match_operand:VEC_SUBDWORD_MODE 0 "register_operand" "= v")
1976 (shiftop:VEC_SUBDWORD_MODE
1977 (match_operand:VEC_SUBDWORD_MODE 1 "gcn_alu_operand" " v")
1978 (vec_duplicate:VEC_SUBDWORD_MODE
1979 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
1980 ""
1981 {
1982 enum {ashift, lshiftrt, ashiftrt};
1983 bool unsignedp = (<code> == lshiftrt);
1984 rtx insi1 = gen_reg_rtx (V64SImode);
1985 rtx insi2 = gen_reg_rtx (SImode);
1986 rtx outsi = gen_reg_rtx (V64SImode);
1987
1988 convert_move (insi1, operands[1], unsignedp);
1989 convert_move (insi2, operands[2], unsignedp);
1990 emit_insn (gen_<expander>v64si3 (outsi, insi1, insi2));
1991 convert_move (operands[0], outsi, unsignedp);
1992 DONE;
1993 })
1994
1995 (define_insn "<expander>v64si3<exec>"
1996 [(set (match_operand:V64SI 0 "register_operand" "= v")
1997 (shiftop:V64SI
1998 (match_operand:V64SI 1 "gcn_alu_operand" " v")
1999 (vec_duplicate:V64SI
2000 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
2001 ""
2002 "v_<revmnemonic>0\t%0, %2, %1"
2003 [(set_attr "type" "vop2")
2004 (set_attr "length" "8")])
2005
2006 (define_expand "v<expander><mode>3"
2007 [(set (match_operand:VEC_SUBDWORD_MODE 0 "register_operand" "=v")
2008 (shiftop:VEC_SUBDWORD_MODE
2009 (match_operand:VEC_SUBDWORD_MODE 1 "gcn_alu_operand" " v")
2010 (match_operand:VEC_SUBDWORD_MODE 2 "gcn_alu_operand" "vB")))]
2011 ""
2012 {
2013 enum {ashift, lshiftrt, ashiftrt};
2014 bool unsignedp = (<code> == ashift || <code> == ashiftrt);
2015 rtx insi1 = gen_reg_rtx (V64SImode);
2016 rtx insi2 = gen_reg_rtx (V64SImode);
2017 rtx outsi = gen_reg_rtx (V64SImode);
2018
2019 convert_move (insi1, operands[1], unsignedp);
2020 convert_move (insi2, operands[2], unsignedp);
2021 emit_insn (gen_v<expander>v64si3 (outsi, insi1, insi2));
2022 convert_move (operands[0], outsi, unsignedp);
2023 DONE;
2024 })
2025
2026 (define_insn "v<expander>v64si3<exec>"
2027 [(set (match_operand:V64SI 0 "register_operand" "=v")
2028 (shiftop:V64SI
2029 (match_operand:V64SI 1 "gcn_alu_operand" " v")
2030 (match_operand:V64SI 2 "gcn_alu_operand" "vB")))]
2031 ""
2032 "v_<revmnemonic>0\t%0, %2, %1"
2033 [(set_attr "type" "vop2")
2034 (set_attr "length" "8")])
2035
2036 (define_expand "<expander><mode>3"
2037 [(set (match_operand:VEC_SUBDWORD_MODE 0 "gcn_valu_dst_operand")
2038 (minmaxop:VEC_SUBDWORD_MODE
2039 (match_operand:VEC_SUBDWORD_MODE 1 "gcn_valu_src0_operand")
2040 (match_operand:VEC_SUBDWORD_MODE 2 "gcn_valu_src1com_operand")))]
2041 ""
2042 {
2043 enum {smin, umin, smax, umax};
2044 bool unsignedp = (<code> == umax || <code> == umin);
2045 rtx insi1 = gen_reg_rtx (V64SImode);
2046 rtx insi2 = gen_reg_rtx (V64SImode);
2047 rtx outsi = gen_reg_rtx (V64SImode);
2048
2049 convert_move (insi1, operands[1], unsignedp);
2050 convert_move (insi2, operands[2], unsignedp);
2051 emit_insn (gen_<code>v64si3 (outsi, insi1, insi2));
2052 convert_move (operands[0], outsi, unsignedp);
2053 DONE;
2054 })
2055
2056 (define_insn "<expander>v64si3<exec>"
2057 [(set (match_operand:V64SI 0 "gcn_valu_dst_operand" "= v,RD")
2058 (minmaxop:V64SI
2059 (match_operand:V64SI 1 "gcn_valu_src0_operand" "% v, 0")
2060 (match_operand:V64SI 2 "gcn_valu_src1com_operand" "vSvB, v")))]
2061 ""
2062 "@
2063 v_<mnemonic>0\t%0, %2, %1
2064 ds_<mnemonic>0\t%A0, %2%O0"
2065 [(set_attr "type" "vop2,ds")
2066 (set_attr "length" "8,8")])
2067
2068 ;; }}}
2069 ;; {{{ FP binops - special cases
2070
2071 ; GCN does not directly provide a DFmode subtract instruction, so we do it by
2072 ; adding the negated second operand to the first.
2073
2074 (define_insn "subv64df3<exec>"
2075 [(set (match_operand:V64DF 0 "register_operand" "= v, v")
2076 (minus:V64DF
2077 (match_operand:V64DF 1 "gcn_alu_operand" "vSvB, v")
2078 (match_operand:V64DF 2 "gcn_alu_operand" " v,vSvB")))]
2079 ""
2080 "@
2081 v_add_f64\t%0, %1, -%2
2082 v_add_f64\t%0, -%2, %1"
2083 [(set_attr "type" "vop3a")
2084 (set_attr "length" "8,8")])
2085
2086 (define_insn "subdf"
2087 [(set (match_operand:DF 0 "register_operand" "= v, v")
2088 (minus:DF
2089 (match_operand:DF 1 "gcn_alu_operand" "vSvB, v")
2090 (match_operand:DF 2 "gcn_alu_operand" " v,vSvB")))]
2091 ""
2092 "@
2093 v_add_f64\t%0, %1, -%2
2094 v_add_f64\t%0, -%2, %1"
2095 [(set_attr "type" "vop3a")
2096 (set_attr "length" "8,8")])
2097
2098 ;; }}}
2099 ;; {{{ FP binops - generic
2100
2101 (define_mode_iterator VEC_FP_MODE [V64HF V64SF V64DF])
2102 (define_mode_iterator VEC_FP_1REG_MODE [V64HF V64SF])
2103 (define_mode_iterator FP_MODE [HF SF DF])
2104 (define_mode_iterator FP_1REG_MODE [HF SF])
2105
2106 (define_code_iterator comm_fp [plus mult smin smax])
2107 (define_code_iterator nocomm_fp [minus])
2108 (define_code_iterator all_fp [plus mult minus smin smax])
2109
2110 (define_insn "<expander><mode>3<exec>"
2111 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "= v")
2112 (comm_fp:VEC_FP_MODE
2113 (match_operand:VEC_FP_MODE 1 "gcn_alu_operand" "% v")
2114 (match_operand:VEC_FP_MODE 2 "gcn_alu_operand" "vSvB")))]
2115 ""
2116 "v_<mnemonic>0\t%0, %2, %1"
2117 [(set_attr "type" "vop2")
2118 (set_attr "length" "8")])
2119
2120 (define_insn "<expander><mode>3"
2121 [(set (match_operand:FP_MODE 0 "gcn_valu_dst_operand" "= v, RL")
2122 (comm_fp:FP_MODE
2123 (match_operand:FP_MODE 1 "gcn_valu_src0_operand" "% v, 0")
2124 (match_operand:FP_MODE 2 "gcn_valu_src1_operand" "vSvB,vSvB")))]
2125 ""
2126 "@
2127 v_<mnemonic>0\t%0, %2, %1
2128 v_<mnemonic>0\t%0, %1%O0"
2129 [(set_attr "type" "vop2,ds")
2130 (set_attr "length" "8")])
2131
2132 (define_insn "<expander><mode>3<exec>"
2133 [(set (match_operand:VEC_FP_1REG_MODE 0 "register_operand" "= v, v")
2134 (nocomm_fp:VEC_FP_1REG_MODE
2135 (match_operand:VEC_FP_1REG_MODE 1 "gcn_alu_operand" "vSvB, v")
2136 (match_operand:VEC_FP_1REG_MODE 2 "gcn_alu_operand" " v,vSvB")))]
2137 ""
2138 "@
2139 v_<mnemonic>0\t%0, %1, %2
2140 v_<revmnemonic>0\t%0, %2, %1"
2141 [(set_attr "type" "vop2")
2142 (set_attr "length" "8,8")])
2143
2144 (define_insn "<expander><mode>3"
2145 [(set (match_operand:FP_1REG_MODE 0 "register_operand" "= v, v")
2146 (nocomm_fp:FP_1REG_MODE
2147 (match_operand:FP_1REG_MODE 1 "gcn_alu_operand" "vSvB, v")
2148 (match_operand:FP_1REG_MODE 2 "gcn_alu_operand" " v,vSvB")))]
2149 ""
2150 "@
2151 v_<mnemonic>0\t%0, %1, %2
2152 v_<revmnemonic>0\t%0, %2, %1"
2153 [(set_attr "type" "vop2")
2154 (set_attr "length" "8,8")])
2155
2156 ;; }}}
2157 ;; {{{ FP unops
2158
2159 (define_insn "abs<mode>2"
2160 [(set (match_operand:FP_MODE 0 "register_operand" "=v")
2161 (abs:FP_MODE (match_operand:FP_MODE 1 "register_operand" " v")))]
2162 ""
2163 "v_add%i0\t%0, 0, |%1|"
2164 [(set_attr "type" "vop3a")
2165 (set_attr "length" "8")])
2166
2167 (define_insn "abs<mode>2<exec>"
2168 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "=v")
2169 (abs:VEC_FP_MODE
2170 (match_operand:VEC_FP_MODE 1 "register_operand" " v")))]
2171 ""
2172 "v_add%i0\t%0, 0, |%1|"
2173 [(set_attr "type" "vop3a")
2174 (set_attr "length" "8")])
2175
2176 (define_insn "neg<mode>2<exec>"
2177 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "=v")
2178 (neg:VEC_FP_MODE
2179 (match_operand:VEC_FP_MODE 1 "register_operand" " v")))]
2180 ""
2181 "v_add%i0\t%0, 0, -%1"
2182 [(set_attr "type" "vop3a")
2183 (set_attr "length" "8")])
2184
2185 (define_insn "sqrt<mode>2<exec>"
2186 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "= v")
2187 (sqrt:VEC_FP_MODE
2188 (match_operand:VEC_FP_MODE 1 "gcn_alu_operand" "vSvB")))]
2189 "flag_unsafe_math_optimizations"
2190 "v_sqrt%i0\t%0, %1"
2191 [(set_attr "type" "vop1")
2192 (set_attr "length" "8")])
2193
2194 (define_insn "sqrt<mode>2"
2195 [(set (match_operand:FP_MODE 0 "register_operand" "= v")
2196 (sqrt:FP_MODE
2197 (match_operand:FP_MODE 1 "gcn_alu_operand" "vSvB")))]
2198 "flag_unsafe_math_optimizations"
2199 "v_sqrt%i0\t%0, %1"
2200 [(set_attr "type" "vop1")
2201 (set_attr "length" "8")])
2202
2203 ;; }}}
2204 ;; {{{ FP fused multiply and add
2205
2206 (define_insn "fma<mode>4<exec>"
2207 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "= v, v")
2208 (fma:VEC_FP_MODE
2209 (match_operand:VEC_FP_MODE 1 "gcn_alu_operand" "% vA, vA")
2210 (match_operand:VEC_FP_MODE 2 "gcn_alu_operand" " vA,vSvA")
2211 (match_operand:VEC_FP_MODE 3 "gcn_alu_operand" "vSvA, vA")))]
2212 ""
2213 "v_fma%i0\t%0, %1, %2, %3"
2214 [(set_attr "type" "vop3a")
2215 (set_attr "length" "8")])
2216
2217 (define_insn "fma<mode>4_negop2<exec>"
2218 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "= v, v, v")
2219 (fma:VEC_FP_MODE
2220 (match_operand:VEC_FP_MODE 1 "gcn_alu_operand" " vA, vA,vSvA")
2221 (neg:VEC_FP_MODE
2222 (match_operand:VEC_FP_MODE 2 "gcn_alu_operand" " vA,vSvA, vA"))
2223 (match_operand:VEC_FP_MODE 3 "gcn_alu_operand" "vSvA, vA, vA")))]
2224 ""
2225 "v_fma%i0\t%0, %1, -%2, %3"
2226 [(set_attr "type" "vop3a")
2227 (set_attr "length" "8")])
2228
2229 (define_insn "fma<mode>4"
2230 [(set (match_operand:FP_MODE 0 "register_operand" "= v, v")
2231 (fma:FP_MODE
2232 (match_operand:FP_MODE 1 "gcn_alu_operand" "% vA, vA")
2233 (match_operand:FP_MODE 2 "gcn_alu_operand" " vA,vSvA")
2234 (match_operand:FP_MODE 3 "gcn_alu_operand" "vSvA, vA")))]
2235 ""
2236 "v_fma%i0\t%0, %1, %2, %3"
2237 [(set_attr "type" "vop3a")
2238 (set_attr "length" "8")])
2239
2240 (define_insn "fma<mode>4_negop2"
2241 [(set (match_operand:FP_MODE 0 "register_operand" "= v, v, v")
2242 (fma:FP_MODE
2243 (match_operand:FP_MODE 1 "gcn_alu_operand" " vA, vA,vSvA")
2244 (neg:FP_MODE
2245 (match_operand:FP_MODE 2 "gcn_alu_operand" " vA,vSvA, vA"))
2246 (match_operand:FP_MODE 3 "gcn_alu_operand" "vSvA, vA, vA")))]
2247 ""
2248 "v_fma%i0\t%0, %1, -%2, %3"
2249 [(set_attr "type" "vop3a")
2250 (set_attr "length" "8")])
2251
2252 ;; }}}
2253 ;; {{{ FP division
2254
2255 (define_insn "recip<mode>2<exec>"
2256 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "= v")
2257 (div:VEC_FP_MODE
2258 (vec_duplicate:VEC_FP_MODE (float:<SCALAR_MODE> (const_int 1)))
2259 (match_operand:VEC_FP_MODE 1 "gcn_alu_operand" "vSvB")))]
2260 ""
2261 "v_rcp%i0\t%0, %1"
2262 [(set_attr "type" "vop1")
2263 (set_attr "length" "8")])
2264
2265 (define_insn "recip<mode>2"
2266 [(set (match_operand:FP_MODE 0 "register_operand" "= v")
2267 (div:FP_MODE
2268 (float:FP_MODE (const_int 1))
2269 (match_operand:FP_MODE 1 "gcn_alu_operand" "vSvB")))]
2270 ""
2271 "v_rcp%i0\t%0, %1"
2272 [(set_attr "type" "vop1")
2273 (set_attr "length" "8")])
2274
2275 ;; Do division via a = b * 1/c
2276 ;; The v_rcp_* instructions are not sufficiently accurate on their own,
2277 ;; so we use 2 v_fma_* instructions to do one round of Newton-Raphson
2278 ;; which the ISA manual says is enough to improve the reciprocal accuracy.
2279 ;;
2280 ;; FIXME: This does not handle denormals, NaNs, division-by-zero etc.
2281
2282 (define_expand "div<mode>3"
2283 [(match_operand:VEC_FP_MODE 0 "gcn_valu_dst_operand")
2284 (match_operand:VEC_FP_MODE 1 "gcn_valu_src0_operand")
2285 (match_operand:VEC_FP_MODE 2 "gcn_valu_src0_operand")]
2286 "flag_reciprocal_math"
2287 {
2288 rtx two = gcn_vec_constant (<MODE>mode,
2289 const_double_from_real_value (dconst2, <SCALAR_MODE>mode));
2290 rtx initrcp = gen_reg_rtx (<MODE>mode);
2291 rtx fma = gen_reg_rtx (<MODE>mode);
2292 rtx rcp;
2293
2294 bool is_rcp = (GET_CODE (operands[1]) == CONST_VECTOR
2295 && real_identical
2296 (CONST_DOUBLE_REAL_VALUE
2297 (CONST_VECTOR_ELT (operands[1], 0)), &dconstm1));
2298
2299 if (is_rcp)
2300 rcp = operands[0];
2301 else
2302 rcp = gen_reg_rtx (<MODE>mode);
2303
2304 emit_insn (gen_recip<mode>2 (initrcp, operands[2]));
2305 emit_insn (gen_fma<mode>4_negop2 (fma, initrcp, operands[2], two));
2306 emit_insn (gen_mul<mode>3 (rcp, initrcp, fma));
2307
2308 if (!is_rcp)
2309 emit_insn (gen_mul<mode>3 (operands[0], operands[1], rcp));
2310
2311 DONE;
2312 })
2313
2314 (define_expand "div<mode>3"
2315 [(match_operand:FP_MODE 0 "gcn_valu_dst_operand")
2316 (match_operand:FP_MODE 1 "gcn_valu_src0_operand")
2317 (match_operand:FP_MODE 2 "gcn_valu_src0_operand")]
2318 "flag_reciprocal_math"
2319 {
2320 rtx two = const_double_from_real_value (dconst2, <MODE>mode);
2321 rtx initrcp = gen_reg_rtx (<MODE>mode);
2322 rtx fma = gen_reg_rtx (<MODE>mode);
2323 rtx rcp;
2324
2325 bool is_rcp = (GET_CODE (operands[1]) == CONST_DOUBLE
2326 && real_identical (CONST_DOUBLE_REAL_VALUE (operands[1]),
2327 &dconstm1));
2328
2329 if (is_rcp)
2330 rcp = operands[0];
2331 else
2332 rcp = gen_reg_rtx (<MODE>mode);
2333
2334 emit_insn (gen_recip<mode>2 (initrcp, operands[2]));
2335 emit_insn (gen_fma<mode>4_negop2 (fma, initrcp, operands[2], two));
2336 emit_insn (gen_mul<mode>3 (rcp, initrcp, fma));
2337
2338 if (!is_rcp)
2339 emit_insn (gen_mul<mode>3 (operands[0], operands[1], rcp));
2340
2341 DONE;
2342 })
2343
2344 ;; }}}
2345 ;; {{{ Int/FP conversions
2346
2347 (define_mode_iterator CVT_FROM_MODE [HI SI HF SF DF])
2348 (define_mode_iterator CVT_TO_MODE [HI SI HF SF DF])
2349
2350 (define_mode_iterator VCVT_MODE [V64HI V64SI V64HF V64SF V64DF])
2351 (define_mode_iterator VCVT_FMODE [V64HF V64SF V64DF])
2352 (define_mode_iterator VCVT_IMODE [V64HI V64SI])
2353
2354 (define_code_iterator cvt_op [fix unsigned_fix
2355 float unsigned_float
2356 float_extend float_truncate])
2357 (define_code_attr cvt_name [(fix "fix_trunc") (unsigned_fix "fixuns_trunc")
2358 (float "float") (unsigned_float "floatuns")
2359 (float_extend "extend") (float_truncate "trunc")])
2360 (define_code_attr cvt_operands [(fix "%i0%i1") (unsigned_fix "%u0%i1")
2361 (float "%i0%i1") (unsigned_float "%i0%u1")
2362 (float_extend "%i0%i1")
2363 (float_truncate "%i0%i1")])
2364
2365 (define_insn "<cvt_name><CVT_FROM_MODE:mode><CVT_TO_MODE:mode>2"
2366 [(set (match_operand:CVT_TO_MODE 0 "register_operand" "= v")
2367 (cvt_op:CVT_TO_MODE
2368 (match_operand:CVT_FROM_MODE 1 "gcn_alu_operand" "vSvB")))]
2369 "gcn_valid_cvt_p (<CVT_FROM_MODE:MODE>mode, <CVT_TO_MODE:MODE>mode,
2370 <cvt_name>_cvt)"
2371 "v_cvt<cvt_operands>\t%0, %1"
2372 [(set_attr "type" "vop1")
2373 (set_attr "length" "8")])
2374
2375 (define_insn "<cvt_name><VCVT_MODE:mode><VCVT_FMODE:mode>2<exec>"
2376 [(set (match_operand:VCVT_FMODE 0 "register_operand" "= v")
2377 (cvt_op:VCVT_FMODE
2378 (match_operand:VCVT_MODE 1 "gcn_alu_operand" "vSvB")))]
2379 "gcn_valid_cvt_p (<VCVT_MODE:MODE>mode, <VCVT_FMODE:MODE>mode,
2380 <cvt_name>_cvt)"
2381 "v_cvt<cvt_operands>\t%0, %1"
2382 [(set_attr "type" "vop1")
2383 (set_attr "length" "8")])
2384
2385 (define_insn "<cvt_name><VCVT_FMODE:mode><VCVT_IMODE:mode>2<exec>"
2386 [(set (match_operand:VCVT_IMODE 0 "register_operand" "= v")
2387 (cvt_op:VCVT_IMODE
2388 (match_operand:VCVT_FMODE 1 "gcn_alu_operand" "vSvB")))]
2389 "gcn_valid_cvt_p (<VCVT_FMODE:MODE>mode, <VCVT_IMODE:MODE>mode,
2390 <cvt_name>_cvt)"
2391 "v_cvt<cvt_operands>\t%0, %1"
2392 [(set_attr "type" "vop1")
2393 (set_attr "length" "8")])
2394
2395 ;; }}}
2396 ;; {{{ Int/int conversions
2397
2398 (define_code_iterator zero_convert [truncate zero_extend])
2399 (define_code_attr convop [
2400 (sign_extend "extend")
2401 (zero_extend "zero_extend")
2402 (truncate "trunc")])
2403
2404 (define_insn "<convop><VEC_ALL1REG_INT_ALT:mode><VEC_ALL1REG_INT_MODE:mode>2<exec>"
2405 [(set (match_operand:VEC_ALL1REG_INT_MODE 0 "register_operand" "=v")
2406 (zero_convert:VEC_ALL1REG_INT_MODE
2407 (match_operand:VEC_ALL1REG_INT_ALT 1 "gcn_alu_operand" " v")))]
2408 ""
2409 "v_mov_b32_sdwa\t%0, %1 dst_sel:<VEC_ALL1REG_INT_MODE:sdwa> dst_unused:UNUSED_PAD src0_sel:<VEC_ALL1REG_INT_ALT:sdwa>"
2410 [(set_attr "type" "vop_sdwa")
2411 (set_attr "length" "8")])
2412
2413 (define_insn "extend<VEC_ALL1REG_INT_ALT:mode><VEC_ALL1REG_INT_MODE:mode>2<exec>"
2414 [(set (match_operand:VEC_ALL1REG_INT_MODE 0 "register_operand" "=v")
2415 (sign_extend:VEC_ALL1REG_INT_MODE
2416 (match_operand:VEC_ALL1REG_INT_ALT 1 "gcn_alu_operand" " v")))]
2417 ""
2418 "v_mov_b32_sdwa\t%0, sext(%1) src0_sel:<VEC_ALL1REG_INT_ALT:sdwa>"
2419 [(set_attr "type" "vop_sdwa")
2420 (set_attr "length" "8")])
2421
2422 ;; GCC can already do these for scalar types, but not for vector types.
2423 ;; Unfortunately you can't just do SUBREG on a vector to select the low part,
2424 ;; so there must be a few tricks here.
2425
2426 (define_insn_and_split "truncv64di<mode>2"
2427 [(set (match_operand:VEC_ALL1REG_INT_MODE 0 "register_operand" "=v")
2428 (truncate:VEC_ALL1REG_INT_MODE
2429 (match_operand:V64DI 1 "gcn_alu_operand" " v")))]
2430 ""
2431 "#"
2432 "reload_completed"
2433 [(const_int 0)]
2434 {
2435 rtx inlo = gcn_operand_part (V64DImode, operands[1], 0);
2436 rtx out = operands[0];
2437
2438 if (<MODE>mode != V64SImode)
2439 emit_insn (gen_truncv64si<mode>2 (out, inlo));
2440 else
2441 emit_move_insn (out, inlo);
2442 }
2443 [(set_attr "type" "vop2")
2444 (set_attr "length" "4")])
2445
2446 (define_insn_and_split "truncv64di<mode>2_exec"
2447 [(set (match_operand:VEC_ALL1REG_INT_MODE 0 "register_operand" "=v")
2448 (vec_merge:VEC_ALL1REG_INT_MODE
2449 (truncate:VEC_ALL1REG_INT_MODE
2450 (match_operand:V64DI 1 "gcn_alu_operand" " v"))
2451 (match_operand:VEC_ALL1REG_INT_MODE 2 "gcn_alu_or_unspec_operand"
2452 "U0")
2453 (match_operand:DI 3 "gcn_exec_operand" " e")))]
2454 ""
2455 "#"
2456 "reload_completed"
2457 [(const_int 0)]
2458 {
2459 rtx out = operands[0];
2460 rtx inlo = gcn_operand_part (V64DImode, operands[1], 0);
2461 rtx merge = operands[2];
2462 rtx exec = operands[3];
2463
2464 if (<MODE>mode != V64SImode)
2465 emit_insn (gen_truncv64si<mode>2_exec (out, inlo, merge, exec));
2466 else
2467 emit_insn (gen_mov<mode>_exec (out, inlo, exec, merge));
2468 }
2469 [(set_attr "type" "vop2")
2470 (set_attr "length" "4")])
2471
2472 (define_insn_and_split "<convop><mode>v64di2"
2473 [(set (match_operand:V64DI 0 "register_operand" "=v")
2474 (any_extend:V64DI
2475 (match_operand:VEC_ALL1REG_INT_MODE 1 "gcn_alu_operand" " v")))]
2476 ""
2477 "#"
2478 "reload_completed"
2479 [(const_int 0)]
2480 {
2481 rtx outlo = gcn_operand_part (V64DImode, operands[0], 0);
2482 rtx outhi = gcn_operand_part (V64DImode, operands[0], 1);
2483 rtx in = operands[1];
2484
2485 if (<MODE>mode != V64SImode)
2486 emit_insn (gen_<convop><mode>v64si2 (outlo, in));
2487 else
2488 emit_move_insn (outlo, in);
2489 if ('<su>' == 's')
2490 emit_insn (gen_ashrv64si3 (outhi, outlo, GEN_INT (31)));
2491 else
2492 emit_insn (gen_vec_duplicatev64si (outhi, const0_rtx));
2493 }
2494 [(set_attr "type" "mult")
2495 (set_attr "length" "12")])
2496
2497 (define_insn_and_split "<convop><mode>v64di2_exec"
2498 [(set (match_operand:V64DI 0 "register_operand" "=v")
2499 (vec_merge:V64DI
2500 (any_extend:V64DI
2501 (match_operand:VEC_ALL1REG_INT_MODE 1 "gcn_alu_operand" " v"))
2502 (match_operand:V64DI 2 "gcn_alu_or_unspec_operand" "U0")
2503 (match_operand:DI 3 "gcn_exec_operand" " e")))]
2504 ""
2505 "#"
2506 "reload_completed"
2507 [(const_int 0)]
2508 {
2509 rtx outlo = gcn_operand_part (V64DImode, operands[0], 0);
2510 rtx outhi = gcn_operand_part (V64DImode, operands[0], 1);
2511 rtx in = operands[1];
2512 rtx mergelo = gcn_operand_part (V64DImode, operands[2], 0);
2513 rtx mergehi = gcn_operand_part (V64DImode, operands[2], 1);
2514 rtx exec = operands[3];
2515
2516 if (<MODE>mode != V64SImode)
2517 emit_insn (gen_<convop><mode>v64si2_exec (outlo, in, mergelo, exec));
2518 else
2519 emit_insn (gen_mov<mode>_exec (outlo, in, exec, mergelo));
2520 if ('<su>' == 's')
2521 emit_insn (gen_ashrv64si3_exec (outhi, outlo, GEN_INT (31), mergehi,
2522 exec));
2523 else
2524 emit_insn (gen_vec_duplicatev64si_exec (outhi, const0_rtx, mergehi,
2525 exec));
2526 }
2527 [(set_attr "type" "mult")
2528 (set_attr "length" "12")])
2529
2530 ;; }}}
2531 ;; {{{ Vector comparison/merge
2532
2533 (define_mode_iterator VCMP_MODE [V64HI V64SI V64DI V64HF V64SF V64DF])
2534 (define_mode_iterator VCMP_MODE_INT [V64HI V64SI V64DI])
2535
2536 (define_insn "vec_cmp<mode>di"
2537 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e, e,Sg,Sg")
2538 (match_operator 1 "gcn_fp_compare_operator"
2539 [(match_operand:VCMP_MODE 2 "gcn_alu_operand"
2540 "vSv, B,vSv, B, v,vA")
2541 (match_operand:VCMP_MODE 3 "gcn_vop3_operand"
2542 " v, v, v, v,vA, v")]))
2543 (clobber (match_scratch:DI 4 "= X, X, cV,cV, X, X"))]
2544 ""
2545 "@
2546 v_cmp%E1\tvcc, %2, %3
2547 v_cmp%E1\tvcc, %2, %3
2548 v_cmpx%E1\tvcc, %2, %3
2549 v_cmpx%E1\tvcc, %2, %3
2550 v_cmp%E1\t%0, %2, %3
2551 v_cmp%E1\t%0, %2, %3"
2552 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a")
2553 (set_attr "length" "4,8,4,8,8,8")])
2554
2555 (define_expand "vec_cmpu<mode>di"
2556 [(match_operand:DI 0 "register_operand")
2557 (match_operator 1 "gcn_compare_operator"
2558 [(match_operand:VCMP_MODE_INT 2 "gcn_alu_operand")
2559 (match_operand:VCMP_MODE_INT 3 "gcn_vop3_operand")])]
2560 ""
2561 {
2562 /* Unsigned comparisons use the same patterns as signed comparisons,
2563 except that they use unsigned operators (e.g. LTU vs LT).
2564 The '%E1' directive then does the Right Thing. */
2565 emit_insn (gen_vec_cmp<mode>di (operands[0], operands[1], operands[2],
2566 operands[3]));
2567 DONE;
2568 })
2569
2570 ; There's no instruction for 8-bit vector comparison, so we need to extend.
2571 (define_expand "vec_cmp<u>v64qidi"
2572 [(match_operand:DI 0 "register_operand")
2573 (match_operator 1 "gcn_compare_operator"
2574 [(any_extend:V64SI (match_operand:V64QI 2 "gcn_alu_operand"))
2575 (any_extend:V64SI (match_operand:V64QI 3 "gcn_vop3_operand"))])]
2576 "can_create_pseudo_p ()"
2577 {
2578 rtx sitmp1 = gen_reg_rtx (V64SImode);
2579 rtx sitmp2 = gen_reg_rtx (V64SImode);
2580
2581 emit_insn (gen_<expander>v64qiv64si2 (sitmp1, operands[2]));
2582 emit_insn (gen_<expander>v64qiv64si2 (sitmp2, operands[3]));
2583 emit_insn (gen_vec_cmpv64sidi (operands[0], operands[1], sitmp1, sitmp2));
2584 DONE;
2585 })
2586
2587 (define_insn "vec_cmp<mode>di_exec"
2588 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e, e,Sg,Sg")
2589 (and:DI
2590 (match_operator 1 "gcn_fp_compare_operator"
2591 [(match_operand:VCMP_MODE 2 "gcn_alu_operand"
2592 "vSv, B,vSv, B, v,vA")
2593 (match_operand:VCMP_MODE 3 "gcn_vop3_operand"
2594 " v, v, v, v,vA, v")])
2595 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e, e, e, e")))
2596 (clobber (match_scratch:DI 5 "= X, X, cV,cV, X, X"))]
2597 ""
2598 "@
2599 v_cmp%E1\tvcc, %2, %3
2600 v_cmp%E1\tvcc, %2, %3
2601 v_cmpx%E1\tvcc, %2, %3
2602 v_cmpx%E1\tvcc, %2, %3
2603 v_cmp%E1\t%0, %2, %3
2604 v_cmp%E1\t%0, %2, %3"
2605 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a")
2606 (set_attr "length" "4,8,4,8,8,8")])
2607
2608 (define_expand "vec_cmpu<mode>di_exec"
2609 [(match_operand:DI 0 "register_operand")
2610 (match_operator 1 "gcn_compare_operator"
2611 [(match_operand:VCMP_MODE_INT 2 "gcn_alu_operand")
2612 (match_operand:VCMP_MODE_INT 3 "gcn_vop3_operand")])
2613 (match_operand:DI 4 "gcn_exec_reg_operand")]
2614 ""
2615 {
2616 /* Unsigned comparisons use the same patterns as signed comparisons,
2617 except that they use unsigned operators (e.g. LTU vs LT).
2618 The '%E1' directive then does the Right Thing. */
2619 emit_insn (gen_vec_cmpu<mode>di_exec (operands[0], operands[1],
2620 operands[2], operands[3],
2621 operands[4]));
2622 DONE;
2623 })
2624
2625 (define_expand "vec_cmp<u>v64qidi_exec"
2626 [(match_operand:DI 0 "register_operand")
2627 (match_operator 1 "gcn_compare_operator"
2628 [(any_extend:V64SI (match_operand:V64QI 2 "gcn_alu_operand"))
2629 (any_extend:V64SI (match_operand:V64QI 3 "gcn_vop3_operand"))])
2630 (match_operand:DI 4 "gcn_exec_reg_operand")]
2631 "can_create_pseudo_p ()"
2632 {
2633 rtx sitmp1 = gen_reg_rtx (V64SImode);
2634 rtx sitmp2 = gen_reg_rtx (V64SImode);
2635
2636 emit_insn (gen_<expander>v64qiv64si2_exec (sitmp1, operands[2],
2637 operands[2], operands[4]));
2638 emit_insn (gen_<expander>v64qiv64si2_exec (sitmp2, operands[3],
2639 operands[3], operands[4]));
2640 emit_insn (gen_vec_cmpv64sidi_exec (operands[0], operands[1], sitmp1,
2641 sitmp2, operands[4]));
2642 DONE;
2643 })
2644
2645 (define_insn "vec_cmp<mode>di_dup"
2646 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e,e,Sg")
2647 (match_operator 1 "gcn_fp_compare_operator"
2648 [(vec_duplicate:VCMP_MODE
2649 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand"
2650 " Sv, B,Sv,B, A"))
2651 (match_operand:VCMP_MODE 3 "gcn_vop3_operand" " v, v, v,v, v")]))
2652 (clobber (match_scratch:DI 4 "= X,X,cV,cV, X"))]
2653 ""
2654 "@
2655 v_cmp%E1\tvcc, %2, %3
2656 v_cmp%E1\tvcc, %2, %3
2657 v_cmpx%E1\tvcc, %2, %3
2658 v_cmpx%E1\tvcc, %2, %3
2659 v_cmp%E1\t%0, %2, %3"
2660 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a")
2661 (set_attr "length" "4,8,4,8,8")])
2662
2663 (define_insn "vec_cmp<mode>di_dup_exec"
2664 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e,e,Sg")
2665 (and:DI
2666 (match_operator 1 "gcn_fp_compare_operator"
2667 [(vec_duplicate:VCMP_MODE
2668 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand"
2669 " Sv, B,Sv,B, A"))
2670 (match_operand:VCMP_MODE 3 "gcn_vop3_operand" " v, v, v,v, v")])
2671 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e,e, e")))
2672 (clobber (match_scratch:DI 5 "= X,X,cV,cV, X"))]
2673 ""
2674 "@
2675 v_cmp%E1\tvcc, %2, %3
2676 v_cmp%E1\tvcc, %2, %3
2677 v_cmpx%E1\tvcc, %2, %3
2678 v_cmpx%E1\tvcc, %2, %3
2679 v_cmp%E1\t%0, %2, %3"
2680 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a")
2681 (set_attr "length" "4,8,4,8,8")])
2682
2683 (define_expand "vcond_mask_<mode>di"
2684 [(parallel
2685 [(set (match_operand:VEC_ALLREG_MODE 0 "register_operand" "")
2686 (vec_merge:VEC_ALLREG_MODE
2687 (match_operand:VEC_ALLREG_MODE 1 "gcn_vop3_operand" "")
2688 (match_operand:VEC_ALLREG_MODE 2 "gcn_alu_operand" "")
2689 (match_operand:DI 3 "register_operand" "")))
2690 (clobber (scratch:V64DI))])]
2691 ""
2692 "")
2693
2694 (define_expand "vcond<VEC_ALLREG_MODE:mode><VEC_ALLREG_ALT:mode>"
2695 [(match_operand:VEC_ALLREG_MODE 0 "register_operand")
2696 (match_operand:VEC_ALLREG_MODE 1 "gcn_vop3_operand")
2697 (match_operand:VEC_ALLREG_MODE 2 "gcn_alu_operand")
2698 (match_operator 3 "gcn_fp_compare_operator"
2699 [(match_operand:VEC_ALLREG_ALT 4 "gcn_alu_operand")
2700 (match_operand:VEC_ALLREG_ALT 5 "gcn_vop3_operand")])]
2701 ""
2702 {
2703 rtx tmp = gen_reg_rtx (DImode);
2704 emit_insn (gen_vec_cmp<VEC_ALLREG_ALT:mode>di
2705 (tmp, operands[3], operands[4], operands[5]));
2706 emit_insn (gen_vcond_mask_<VEC_ALLREG_MODE:mode>di
2707 (operands[0], operands[1], operands[2], tmp));
2708 DONE;
2709 })
2710
2711 (define_expand "vcond<VEC_ALLREG_MODE:mode><VEC_ALLREG_ALT:mode>_exec"
2712 [(match_operand:VEC_ALLREG_MODE 0 "register_operand")
2713 (match_operand:VEC_ALLREG_MODE 1 "gcn_vop3_operand")
2714 (match_operand:VEC_ALLREG_MODE 2 "gcn_alu_operand")
2715 (match_operator 3 "gcn_fp_compare_operator"
2716 [(match_operand:VEC_ALLREG_ALT 4 "gcn_alu_operand")
2717 (match_operand:VEC_ALLREG_ALT 5 "gcn_vop3_operand")])
2718 (match_operand:DI 6 "gcn_exec_reg_operand" "e")]
2719 ""
2720 {
2721 rtx tmp = gen_reg_rtx (DImode);
2722 emit_insn (gen_vec_cmp<VEC_ALLREG_ALT:mode>di_exec
2723 (tmp, operands[3], operands[4], operands[5], operands[6]));
2724 emit_insn (gen_vcond_mask_<VEC_ALLREG_MODE:mode>di
2725 (operands[0], operands[1], operands[2], tmp));
2726 DONE;
2727 })
2728
2729 (define_expand "vcondu<VEC_ALLREG_MODE:mode><VEC_ALLREG_INT_MODE:mode>"
2730 [(match_operand:VEC_ALLREG_MODE 0 "register_operand")
2731 (match_operand:VEC_ALLREG_MODE 1 "gcn_vop3_operand")
2732 (match_operand:VEC_ALLREG_MODE 2 "gcn_alu_operand")
2733 (match_operator 3 "gcn_fp_compare_operator"
2734 [(match_operand:VEC_ALLREG_INT_MODE 4 "gcn_alu_operand")
2735 (match_operand:VEC_ALLREG_INT_MODE 5 "gcn_vop3_operand")])]
2736 ""
2737 {
2738 rtx tmp = gen_reg_rtx (DImode);
2739 emit_insn (gen_vec_cmpu<VEC_ALLREG_INT_MODE:mode>di
2740 (tmp, operands[3], operands[4], operands[5]));
2741 emit_insn (gen_vcond_mask_<VEC_ALLREG_MODE:mode>di
2742 (operands[0], operands[1], operands[2], tmp));
2743 DONE;
2744 })
2745
2746 (define_expand "vcondu<VEC_ALLREG_MODE:mode><VEC_ALLREG_INT_MODE:mode>_exec"
2747 [(match_operand:VEC_ALLREG_MODE 0 "register_operand")
2748 (match_operand:VEC_ALLREG_MODE 1 "gcn_vop3_operand")
2749 (match_operand:VEC_ALLREG_MODE 2 "gcn_alu_operand")
2750 (match_operator 3 "gcn_fp_compare_operator"
2751 [(match_operand:VEC_ALLREG_INT_MODE 4 "gcn_alu_operand")
2752 (match_operand:VEC_ALLREG_INT_MODE 5 "gcn_vop3_operand")])
2753 (match_operand:DI 6 "gcn_exec_reg_operand" "e")]
2754 ""
2755 {
2756 rtx tmp = gen_reg_rtx (DImode);
2757 emit_insn (gen_vec_cmpu<VEC_ALLREG_INT_MODE:mode>di_exec
2758 (tmp, operands[3], operands[4], operands[5], operands[6]));
2759 emit_insn (gen_vcond_mask_<VEC_ALLREG_MODE:mode>di
2760 (operands[0], operands[1], operands[2], tmp));
2761 DONE;
2762 })
2763
2764 ;; }}}
2765 ;; {{{ Fully masked loop support
2766
2767 (define_expand "while_ultsidi"
2768 [(match_operand:DI 0 "register_operand")
2769 (match_operand:SI 1 "")
2770 (match_operand:SI 2 "")]
2771 ""
2772 {
2773 if (GET_CODE (operands[1]) != CONST_INT
2774 || GET_CODE (operands[2]) != CONST_INT)
2775 {
2776 rtx _0_1_2_3 = gen_rtx_REG (V64SImode, VGPR_REGNO (1));
2777 rtx tmp = _0_1_2_3;
2778 if (GET_CODE (operands[1]) != CONST_INT
2779 || INTVAL (operands[1]) != 0)
2780 {
2781 tmp = gen_reg_rtx (V64SImode);
2782 emit_insn (gen_addv64si3_dup (tmp, _0_1_2_3, operands[1]));
2783 }
2784 emit_insn (gen_vec_cmpv64sidi_dup (operands[0],
2785 gen_rtx_GT (VOIDmode, 0, 0),
2786 operands[2], tmp));
2787 }
2788 else
2789 {
2790 HOST_WIDE_INT diff = INTVAL (operands[2]) - INTVAL (operands[1]);
2791 HOST_WIDE_INT mask = (diff >= 64 ? -1
2792 : ~((unsigned HOST_WIDE_INT)-1 << diff));
2793 emit_move_insn (operands[0], gen_rtx_CONST_INT (VOIDmode, mask));
2794 }
2795 DONE;
2796 })
2797
2798 (define_expand "maskload<mode>di"
2799 [(match_operand:VEC_ALLREG_MODE 0 "register_operand")
2800 (match_operand:VEC_ALLREG_MODE 1 "memory_operand")
2801 (match_operand 2 "")]
2802 ""
2803 {
2804 rtx exec = force_reg (DImode, operands[2]);
2805 rtx addr = gcn_expand_scalar_to_vector_address
2806 (<MODE>mode, exec, operands[1], gen_rtx_SCRATCH (V64DImode));
2807 rtx as = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
2808 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
2809
2810 /* Masked lanes are required to hold zero. */
2811 emit_move_insn (operands[0], gcn_vec_constant (<MODE>mode, 0));
2812
2813 emit_insn (gen_gather<mode>_expr_exec (operands[0], addr, as, v,
2814 operands[0], exec));
2815 DONE;
2816 })
2817
2818 (define_expand "maskstore<mode>di"
2819 [(match_operand:VEC_ALLREG_MODE 0 "memory_operand")
2820 (match_operand:VEC_ALLREG_MODE 1 "register_operand")
2821 (match_operand 2 "")]
2822 ""
2823 {
2824 rtx exec = force_reg (DImode, operands[2]);
2825 rtx addr = gcn_expand_scalar_to_vector_address
2826 (<MODE>mode, exec, operands[0], gen_rtx_SCRATCH (V64DImode));
2827 rtx as = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
2828 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
2829 emit_insn (gen_scatter<mode>_expr_exec (addr, operands[1], as, v, exec));
2830 DONE;
2831 })
2832
2833 (define_expand "mask_gather_load<mode>v64si"
2834 [(match_operand:VEC_ALLREG_MODE 0 "register_operand")
2835 (match_operand:DI 1 "register_operand")
2836 (match_operand:V64SI 2 "register_operand")
2837 (match_operand 3 "immediate_operand")
2838 (match_operand:SI 4 "gcn_alu_operand")
2839 (match_operand:DI 5 "")]
2840 ""
2841 {
2842 rtx exec = force_reg (DImode, operands[5]);
2843
2844 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1],
2845 operands[2], operands[4],
2846 INTVAL (operands[3]), exec);
2847
2848 /* Masked lanes are required to hold zero. */
2849 emit_move_insn (operands[0], gcn_vec_constant (<MODE>mode, 0));
2850
2851 if (GET_MODE (addr) == V64DImode)
2852 emit_insn (gen_gather<mode>_insn_1offset_exec (operands[0], addr,
2853 const0_rtx, const0_rtx,
2854 const0_rtx, operands[0],
2855 exec));
2856 else
2857 emit_insn (gen_gather<mode>_insn_2offsets_exec (operands[0], operands[1],
2858 addr, const0_rtx,
2859 const0_rtx, const0_rtx,
2860 operands[0], exec));
2861 DONE;
2862 })
2863
2864 (define_expand "mask_scatter_store<mode>v64si"
2865 [(match_operand:DI 0 "register_operand")
2866 (match_operand:V64SI 1 "register_operand")
2867 (match_operand 2 "immediate_operand")
2868 (match_operand:SI 3 "gcn_alu_operand")
2869 (match_operand:VEC_ALLREG_MODE 4 "register_operand")
2870 (match_operand:DI 5 "")]
2871 ""
2872 {
2873 rtx exec = force_reg (DImode, operands[5]);
2874
2875 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0],
2876 operands[1], operands[3],
2877 INTVAL (operands[2]), exec);
2878
2879 if (GET_MODE (addr) == V64DImode)
2880 emit_insn (gen_scatter<mode>_insn_1offset_exec (addr, const0_rtx,
2881 operands[4], const0_rtx,
2882 const0_rtx,
2883 exec));
2884 else
2885 emit_insn (gen_scatter<mode>_insn_2offsets_exec (operands[0], addr,
2886 const0_rtx, operands[4],
2887 const0_rtx, const0_rtx,
2888 exec));
2889 DONE;
2890 })
2891
2892 ; FIXME this should be VEC_REG_MODE, but not all dependencies are implemented.
2893 (define_mode_iterator COND_MODE [V64SI V64DI V64SF V64DF])
2894 (define_mode_iterator COND_INT_MODE [V64SI V64DI])
2895
2896 (define_code_iterator cond_op [plus minus])
2897
2898 (define_expand "cond_<expander><mode>"
2899 [(match_operand:COND_MODE 0 "register_operand")
2900 (match_operand:DI 1 "register_operand")
2901 (cond_op:COND_MODE
2902 (match_operand:COND_MODE 2 "gcn_alu_operand")
2903 (match_operand:COND_MODE 3 "gcn_alu_operand"))
2904 (match_operand:COND_MODE 4 "register_operand")]
2905 ""
2906 {
2907 operands[1] = force_reg (DImode, operands[1]);
2908 operands[2] = force_reg (<MODE>mode, operands[2]);
2909
2910 emit_insn (gen_<expander><mode>3_exec (operands[0], operands[2],
2911 operands[3], operands[4],
2912 operands[1]));
2913 DONE;
2914 })
2915
2916 (define_code_iterator cond_bitop [and ior xor])
2917
2918 (define_expand "cond_<expander><mode>"
2919 [(match_operand:COND_INT_MODE 0 "register_operand")
2920 (match_operand:DI 1 "register_operand")
2921 (cond_bitop:COND_INT_MODE
2922 (match_operand:COND_INT_MODE 2 "gcn_alu_operand")
2923 (match_operand:COND_INT_MODE 3 "gcn_alu_operand"))
2924 (match_operand:COND_INT_MODE 4 "register_operand")]
2925 ""
2926 {
2927 operands[1] = force_reg (DImode, operands[1]);
2928 operands[2] = force_reg (<MODE>mode, operands[2]);
2929
2930 emit_insn (gen_<expander><mode>3_exec (operands[0], operands[2],
2931 operands[3], operands[4],
2932 operands[1]));
2933 DONE;
2934 })
2935
2936 ;; }}}
2937 ;; {{{ Vector reductions
2938
2939 (define_int_iterator REDUC_UNSPEC [UNSPEC_SMIN_DPP_SHR UNSPEC_SMAX_DPP_SHR
2940 UNSPEC_UMIN_DPP_SHR UNSPEC_UMAX_DPP_SHR
2941 UNSPEC_PLUS_DPP_SHR
2942 UNSPEC_AND_DPP_SHR
2943 UNSPEC_IOR_DPP_SHR UNSPEC_XOR_DPP_SHR])
2944
2945 (define_int_iterator REDUC_2REG_UNSPEC [UNSPEC_PLUS_DPP_SHR
2946 UNSPEC_AND_DPP_SHR
2947 UNSPEC_IOR_DPP_SHR UNSPEC_XOR_DPP_SHR])
2948
2949 ; FIXME: Isn't there a better way of doing this?
2950 (define_int_attr reduc_unspec [(UNSPEC_SMIN_DPP_SHR "UNSPEC_SMIN_DPP_SHR")
2951 (UNSPEC_SMAX_DPP_SHR "UNSPEC_SMAX_DPP_SHR")
2952 (UNSPEC_UMIN_DPP_SHR "UNSPEC_UMIN_DPP_SHR")
2953 (UNSPEC_UMAX_DPP_SHR "UNSPEC_UMAX_DPP_SHR")
2954 (UNSPEC_PLUS_DPP_SHR "UNSPEC_PLUS_DPP_SHR")
2955 (UNSPEC_AND_DPP_SHR "UNSPEC_AND_DPP_SHR")
2956 (UNSPEC_IOR_DPP_SHR "UNSPEC_IOR_DPP_SHR")
2957 (UNSPEC_XOR_DPP_SHR "UNSPEC_XOR_DPP_SHR")])
2958
2959 (define_int_attr reduc_op [(UNSPEC_SMIN_DPP_SHR "smin")
2960 (UNSPEC_SMAX_DPP_SHR "smax")
2961 (UNSPEC_UMIN_DPP_SHR "umin")
2962 (UNSPEC_UMAX_DPP_SHR "umax")
2963 (UNSPEC_PLUS_DPP_SHR "plus")
2964 (UNSPEC_AND_DPP_SHR "and")
2965 (UNSPEC_IOR_DPP_SHR "ior")
2966 (UNSPEC_XOR_DPP_SHR "xor")])
2967
2968 (define_int_attr reduc_insn [(UNSPEC_SMIN_DPP_SHR "v_min%i0")
2969 (UNSPEC_SMAX_DPP_SHR "v_max%i0")
2970 (UNSPEC_UMIN_DPP_SHR "v_min%u0")
2971 (UNSPEC_UMAX_DPP_SHR "v_max%u0")
2972 (UNSPEC_PLUS_DPP_SHR "v_add%u0")
2973 (UNSPEC_AND_DPP_SHR "v_and%b0")
2974 (UNSPEC_IOR_DPP_SHR "v_or%b0")
2975 (UNSPEC_XOR_DPP_SHR "v_xor%b0")])
2976
2977 (define_expand "reduc_<reduc_op>_scal_<mode>"
2978 [(set (match_operand:<SCALAR_MODE> 0 "register_operand")
2979 (unspec:<SCALAR_MODE>
2980 [(match_operand:VEC_1REG_MODE 1 "register_operand")]
2981 REDUC_UNSPEC))]
2982 ""
2983 {
2984 rtx tmp = gcn_expand_reduc_scalar (<MODE>mode, operands[1],
2985 <reduc_unspec>);
2986
2987 /* The result of the reduction is in lane 63 of tmp. */
2988 emit_insn (gen_mov_from_lane63_<mode> (operands[0], tmp));
2989
2990 DONE;
2991 })
2992
2993 (define_expand "reduc_<reduc_op>_scal_v64di"
2994 [(set (match_operand:DI 0 "register_operand")
2995 (unspec:DI
2996 [(match_operand:V64DI 1 "register_operand")]
2997 REDUC_2REG_UNSPEC))]
2998 ""
2999 {
3000 rtx tmp = gcn_expand_reduc_scalar (V64DImode, operands[1],
3001 <reduc_unspec>);
3002
3003 /* The result of the reduction is in lane 63 of tmp. */
3004 emit_insn (gen_mov_from_lane63_v64di (operands[0], tmp));
3005
3006 DONE;
3007 })
3008
3009 (define_insn "*<reduc_op>_dpp_shr_<mode>"
3010 [(set (match_operand:VEC_1REG_MODE 0 "register_operand" "=v")
3011 (unspec:VEC_1REG_MODE
3012 [(match_operand:VEC_1REG_MODE 1 "register_operand" "v")
3013 (match_operand:VEC_1REG_MODE 2 "register_operand" "v")
3014 (match_operand:SI 3 "const_int_operand" "n")]
3015 REDUC_UNSPEC))]
3016 "!(TARGET_GCN3 && SCALAR_INT_MODE_P (<SCALAR_MODE>mode)
3017 && <reduc_unspec> == UNSPEC_PLUS_DPP_SHR)"
3018 {
3019 return gcn_expand_dpp_shr_insn (<MODE>mode, "<reduc_insn>",
3020 <reduc_unspec>, INTVAL (operands[3]));
3021 }
3022 [(set_attr "type" "vop_dpp")
3023 (set_attr "length" "8")])
3024
3025 (define_insn_and_split "*<reduc_op>_dpp_shr_v64di"
3026 [(set (match_operand:V64DI 0 "register_operand" "=v")
3027 (unspec:V64DI
3028 [(match_operand:V64DI 1 "register_operand" "v")
3029 (match_operand:V64DI 2 "register_operand" "v")
3030 (match_operand:SI 3 "const_int_operand" "n")]
3031 REDUC_2REG_UNSPEC))]
3032 ""
3033 "#"
3034 "reload_completed"
3035 [(set (match_dup 4)
3036 (unspec:V64SI
3037 [(match_dup 6) (match_dup 8) (match_dup 3)] REDUC_2REG_UNSPEC))
3038 (set (match_dup 5)
3039 (unspec:V64SI
3040 [(match_dup 7) (match_dup 9) (match_dup 3)] REDUC_2REG_UNSPEC))]
3041 {
3042 operands[4] = gcn_operand_part (V64DImode, operands[0], 0);
3043 operands[5] = gcn_operand_part (V64DImode, operands[0], 1);
3044 operands[6] = gcn_operand_part (V64DImode, operands[1], 0);
3045 operands[7] = gcn_operand_part (V64DImode, operands[1], 1);
3046 operands[8] = gcn_operand_part (V64DImode, operands[2], 0);
3047 operands[9] = gcn_operand_part (V64DImode, operands[2], 1);
3048 }
3049 [(set_attr "type" "vmult")
3050 (set_attr "length" "16")])
3051
3052 ; Special cases for addition.
3053
3054 (define_insn "*plus_carry_dpp_shr_v64si"
3055 [(set (match_operand:V64SI 0 "register_operand" "=v")
3056 (unspec:V64SI
3057 [(match_operand:V64SI 1 "register_operand" "v")
3058 (match_operand:V64SI 2 "register_operand" "v")
3059 (match_operand:SI 3 "const_int_operand" "n")]
3060 UNSPEC_PLUS_CARRY_DPP_SHR))
3061 (clobber (reg:DI VCC_REG))]
3062 ""
3063 {
3064 const char *insn = TARGET_GCN3 ? "v_add%u0" : "v_add_co%u0";
3065 return gcn_expand_dpp_shr_insn (V64SImode, insn,
3066 UNSPEC_PLUS_CARRY_DPP_SHR,
3067 INTVAL (operands[3]));
3068 }
3069 [(set_attr "type" "vop_dpp")
3070 (set_attr "length" "8")])
3071
3072 (define_insn "*plus_carry_in_dpp_shr_v64si"
3073 [(set (match_operand:V64SI 0 "register_operand" "=v")
3074 (unspec:V64SI
3075 [(match_operand:V64SI 1 "register_operand" "v")
3076 (match_operand:V64SI 2 "register_operand" "v")
3077 (match_operand:SI 3 "const_int_operand" "n")
3078 (match_operand:DI 4 "register_operand" "cV")]
3079 UNSPEC_PLUS_CARRY_IN_DPP_SHR))
3080 (clobber (reg:DI VCC_REG))]
3081 ""
3082 {
3083 const char *insn = TARGET_GCN3 ? "v_addc%u0" : "v_addc_co%u0";
3084 return gcn_expand_dpp_shr_insn (V64SImode, insn,
3085 UNSPEC_PLUS_CARRY_IN_DPP_SHR,
3086 INTVAL (operands[3]));
3087 }
3088 [(set_attr "type" "vop_dpp")
3089 (set_attr "length" "8")])
3090
3091 (define_insn_and_split "*plus_carry_dpp_shr_v64di"
3092 [(set (match_operand:V64DI 0 "register_operand" "=v")
3093 (unspec:V64DI
3094 [(match_operand:V64DI 1 "register_operand" "v")
3095 (match_operand:V64DI 2 "register_operand" "v")
3096 (match_operand:SI 3 "const_int_operand" "n")]
3097 UNSPEC_PLUS_CARRY_DPP_SHR))
3098 (clobber (reg:DI VCC_REG))]
3099 ""
3100 "#"
3101 "reload_completed"
3102 [(parallel [(set (match_dup 4)
3103 (unspec:V64SI
3104 [(match_dup 6) (match_dup 8) (match_dup 3)]
3105 UNSPEC_PLUS_CARRY_DPP_SHR))
3106 (clobber (reg:DI VCC_REG))])
3107 (parallel [(set (match_dup 5)
3108 (unspec:V64SI
3109 [(match_dup 7) (match_dup 9) (match_dup 3) (reg:DI VCC_REG)]
3110 UNSPEC_PLUS_CARRY_IN_DPP_SHR))
3111 (clobber (reg:DI VCC_REG))])]
3112 {
3113 operands[4] = gcn_operand_part (V64DImode, operands[0], 0);
3114 operands[5] = gcn_operand_part (V64DImode, operands[0], 1);
3115 operands[6] = gcn_operand_part (V64DImode, operands[1], 0);
3116 operands[7] = gcn_operand_part (V64DImode, operands[1], 1);
3117 operands[8] = gcn_operand_part (V64DImode, operands[2], 0);
3118 operands[9] = gcn_operand_part (V64DImode, operands[2], 1);
3119 }
3120 [(set_attr "type" "vmult")
3121 (set_attr "length" "16")])
3122
3123 ; Instructions to move a scalar value from lane 63 of a vector register.
3124 (define_insn "mov_from_lane63_<mode>"
3125 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg,v")
3126 (unspec:<SCALAR_MODE>
3127 [(match_operand:VEC_ALL1REG_MODE 1 "register_operand" "v,v")]
3128 UNSPEC_MOV_FROM_LANE63))]
3129 ""
3130 "@
3131 v_readlane_b32\t%0, %1, 63
3132 v_mov_b32\t%0, %1 wave_ror:1"
3133 [(set_attr "type" "vop3a,vop_dpp")
3134 (set_attr "exec" "none,*")
3135 (set_attr "length" "8")])
3136
3137 (define_insn "mov_from_lane63_v64di"
3138 [(set (match_operand:DI 0 "register_operand" "=Sg,v")
3139 (unspec:DI
3140 [(match_operand:V64DI 1 "register_operand" "v,v")]
3141 UNSPEC_MOV_FROM_LANE63))]
3142 ""
3143 "@
3144 v_readlane_b32\t%L0, %L1, 63\;v_readlane_b32\t%H0, %H1, 63
3145 * if (REGNO (operands[0]) <= REGNO (operands[1])) \
3146 return \"v_mov_b32\t%L0, %L1 wave_ror:1\;\" \
3147 \"v_mov_b32\t%H0, %H1 wave_ror:1\"; \
3148 else \
3149 return \"v_mov_b32\t%H0, %H1 wave_ror:1\;\" \
3150 \"v_mov_b32\t%L0, %L1 wave_ror:1\";"
3151 [(set_attr "type" "vop3a,vop_dpp")
3152 (set_attr "exec" "none,*")
3153 (set_attr "length" "8")])
3154
3155 ;; }}}
3156 ;; {{{ Miscellaneous
3157
3158 (define_expand "vec_seriesv64si"
3159 [(match_operand:V64SI 0 "register_operand")
3160 (match_operand:SI 1 "gcn_alu_operand")
3161 (match_operand:SI 2 "gcn_alu_operand")]
3162 ""
3163 {
3164 rtx tmp = gen_reg_rtx (V64SImode);
3165 rtx v1 = gen_rtx_REG (V64SImode, VGPR_REGNO (1));
3166
3167 emit_insn (gen_mulv64si3_dup (tmp, v1, operands[2]));
3168 emit_insn (gen_addv64si3_dup (operands[0], tmp, operands[1]));
3169 DONE;
3170 })
3171
3172 (define_expand "vec_seriesv64di"
3173 [(match_operand:V64DI 0 "register_operand")
3174 (match_operand:DI 1 "gcn_alu_operand")
3175 (match_operand:DI 2 "gcn_alu_operand")]
3176 ""
3177 {
3178 rtx tmp = gen_reg_rtx (V64DImode);
3179 rtx v1 = gen_rtx_REG (V64SImode, VGPR_REGNO (1));
3180 rtx op1vec = gen_reg_rtx (V64DImode);
3181
3182 emit_insn (gen_mulv64di3_zext_dup2 (tmp, v1, operands[2]));
3183 emit_insn (gen_vec_duplicatev64di (op1vec, operands[1]));
3184 emit_insn (gen_addv64di3 (operands[0], tmp, op1vec));
3185 DONE;
3186 })
3187
3188 ;; }}}