]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/gcn/gcn-valu.md
WIP amdgcn: use unsigned extend for lshiftrt
[thirdparty/gcc.git] / gcc / config / gcn / gcn-valu.md
1 ;; Copyright (C) 2016-2020 Free Software Foundation, Inc.
2
3 ;; This file is free software; you can redistribute it and/or modify it under
4 ;; the terms of the GNU General Public License as published by the Free
5 ;; Software Foundation; either version 3 of the License, or (at your option)
6 ;; any later version.
7
8 ;; This file is distributed in the hope that it will be useful, but WITHOUT
9 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 ;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 ;; for more details.
12
13 ;; You should have received a copy of the GNU General Public License
14 ;; along with GCC; see the file COPYING3. If not see
15 ;; <http://www.gnu.org/licenses/>.
16
17 ;; {{{ Vector iterators
18
19 ; Vector modes for specific types
20 ; (This will make more sense when there are multiple vector sizes)
21 (define_mode_iterator V_QI
22 [V64QI])
23 (define_mode_iterator V_HI
24 [V64HI])
25 (define_mode_iterator V_HF
26 [V64HF])
27 (define_mode_iterator V_SI
28 [V64SI])
29 (define_mode_iterator V_SF
30 [V64SF])
31 (define_mode_iterator V_DI
32 [V64DI])
33 (define_mode_iterator V_DF
34 [V64DF])
35
36 ; Vector modes for sub-dword modes
37 (define_mode_iterator V_QIHI
38 [V64QI V64HI])
39
40 ; Vector modes for one vector register
41 (define_mode_iterator V_1REG
42 [V64QI V64HI V64SI V64HF V64SF])
43
44 (define_mode_iterator V_INT_1REG
45 [V64QI V64HI V64SI])
46 (define_mode_iterator V_INT_1REG_ALT
47 [V64QI V64HI V64SI])
48 (define_mode_iterator V_FP_1REG
49 [V64HF V64SF])
50
51 ; Vector modes for two vector registers
52 (define_mode_iterator V_2REG
53 [V64DI V64DF])
54
55 ; Vector modes with native support
56 (define_mode_iterator V_noQI
57 [V64HI V64HF V64SI V64SF V64DI V64DF])
58 (define_mode_iterator V_noHI
59 [V64HF V64SI V64SF V64DI V64DF])
60
61 (define_mode_iterator V_INT_noQI
62 [V64HI V64SI V64DI])
63
64 ; All of above
65 (define_mode_iterator V_ALL
66 [V64QI V64HI V64HF V64SI V64SF V64DI V64DF])
67 (define_mode_iterator V_ALL_ALT
68 [V64QI V64HI V64HF V64SI V64SF V64DI V64DF])
69
70 (define_mode_iterator V_INT
71 [V64QI V64HI V64SI V64DI])
72 (define_mode_iterator V_FP
73 [V64HF V64SF V64DF])
74
75 (define_mode_attr scalar_mode
76 [(V64QI "qi") (V64HI "hi") (V64SI "si")
77 (V64HF "hf") (V64SF "sf") (V64DI "di") (V64DF "df")])
78
79 (define_mode_attr SCALAR_MODE
80 [(V64QI "QI") (V64HI "HI") (V64SI "SI")
81 (V64HF "HF") (V64SF "SF") (V64DI "DI") (V64DF "DF")])
82
83 (define_mode_attr vnsi
84 [(V64QI "v64si") (V64HI "v64si") (V64HF "v64si") (V64SI "v64si")
85 (V64SF "v64si") (V64DI "v64si") (V64DF "v64si")])
86
87 (define_mode_attr VnSI
88 [(V64QI "V64SI") (V64HI "V64SI") (V64HF "V64SI") (V64SI "V64SI")
89 (V64SF "V64SI") (V64DI "V64SI") (V64DF "V64SI")])
90
91 (define_mode_attr vndi
92 [(V64QI "v64di") (V64HI "v64di") (V64HF "v64di") (V64SI "v64di")
93 (V64SF "v64di") (V64DI "v64di") (V64DF "v64di")])
94
95 (define_mode_attr VnDI
96 [(V64QI "V64DI") (V64HI "V64DI") (V64HF "V64DI") (V64SI "V64DI")
97 (V64SF "V64DI") (V64DI "V64DI") (V64DF "V64DI")])
98
99 (define_mode_attr sdwa [(V64QI "BYTE_0") (V64HI "WORD_0") (V64SI "DWORD")])
100
101 ;; }}}
102 ;; {{{ Substitutions
103
104 (define_subst_attr "exec" "vec_merge"
105 "" "_exec")
106 (define_subst_attr "exec_clobber" "vec_merge_with_clobber"
107 "" "_exec")
108 (define_subst_attr "exec_vcc" "vec_merge_with_vcc"
109 "" "_exec")
110 (define_subst_attr "exec_scatter" "scatter_store"
111 "" "_exec")
112
113 (define_subst "vec_merge"
114 [(set (match_operand:V_ALL 0)
115 (match_operand:V_ALL 1))]
116 ""
117 [(set (match_dup 0)
118 (vec_merge:V_ALL
119 (match_dup 1)
120 (match_operand:V_ALL 3 "gcn_register_or_unspec_operand" "U0")
121 (match_operand:DI 4 "gcn_exec_reg_operand" "e")))])
122
123 (define_subst "vec_merge_with_clobber"
124 [(set (match_operand:V_ALL 0)
125 (match_operand:V_ALL 1))
126 (clobber (match_operand 2))]
127 ""
128 [(set (match_dup 0)
129 (vec_merge:V_ALL
130 (match_dup 1)
131 (match_operand:V_ALL 3 "gcn_register_or_unspec_operand" "U0")
132 (match_operand:DI 4 "gcn_exec_reg_operand" "e")))
133 (clobber (match_dup 2))])
134
135 (define_subst "vec_merge_with_vcc"
136 [(set (match_operand:V_ALL 0)
137 (match_operand:V_ALL 1))
138 (set (match_operand:DI 2)
139 (match_operand:DI 3))]
140 ""
141 [(parallel
142 [(set (match_dup 0)
143 (vec_merge:V_ALL
144 (match_dup 1)
145 (match_operand:V_ALL 4 "gcn_register_or_unspec_operand" "U0")
146 (match_operand:DI 5 "gcn_exec_reg_operand" "e")))
147 (set (match_dup 2)
148 (and:DI (match_dup 3)
149 (reg:DI EXEC_REG)))])])
150
151 (define_subst "scatter_store"
152 [(set (mem:BLK (scratch))
153 (unspec:BLK
154 [(match_operand 0)
155 (match_operand 1)
156 (match_operand 2)
157 (match_operand 3)]
158 UNSPEC_SCATTER))]
159 ""
160 [(set (mem:BLK (scratch))
161 (unspec:BLK
162 [(match_dup 0)
163 (match_dup 1)
164 (match_dup 2)
165 (match_dup 3)
166 (match_operand:DI 4 "gcn_exec_reg_operand" "e")]
167 UNSPEC_SCATTER))])
168
169 ;; }}}
170 ;; {{{ Vector moves
171
172 ; This is the entry point for all vector register moves. Memory accesses can
173 ; come this way also, but will more usually use the reload_in/out,
174 ; gather/scatter, maskload/store, etc.
175
176 (define_expand "mov<mode>"
177 [(set (match_operand:V_ALL 0 "nonimmediate_operand")
178 (match_operand:V_ALL 1 "general_operand"))]
179 ""
180 {
181 if (MEM_P (operands[0]) && !lra_in_progress && !reload_completed)
182 {
183 operands[1] = force_reg (<MODE>mode, operands[1]);
184 rtx scratch = gen_rtx_SCRATCH (<VnDI>mode);
185 rtx a = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
186 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
187 rtx expr = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
188 operands[0],
189 scratch);
190 emit_insn (gen_scatter<mode>_expr (expr, operands[1], a, v));
191 DONE;
192 }
193 else if (MEM_P (operands[1]) && !lra_in_progress && !reload_completed)
194 {
195 rtx scratch = gen_rtx_SCRATCH (<VnDI>mode);
196 rtx a = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
197 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
198 rtx expr = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
199 operands[1],
200 scratch);
201 emit_insn (gen_gather<mode>_expr (operands[0], expr, a, v));
202 DONE;
203 }
204 else if ((MEM_P (operands[0]) || MEM_P (operands[1])))
205 {
206 gcc_assert (!reload_completed);
207 rtx scratch = gen_reg_rtx (<VnDI>mode);
208 emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], scratch));
209 DONE;
210 }
211 })
212
213 ; A pseudo instruction that helps LRA use the "U0" constraint.
214
215 (define_insn "mov<mode>_unspec"
216 [(set (match_operand:V_ALL 0 "nonimmediate_operand" "=v")
217 (match_operand:V_ALL 1 "gcn_unspec_operand" " U"))]
218 ""
219 ""
220 [(set_attr "type" "unknown")
221 (set_attr "length" "0")])
222
223 (define_insn "*mov<mode>"
224 [(set (match_operand:V_1REG 0 "nonimmediate_operand" "=v,v")
225 (match_operand:V_1REG 1 "general_operand" "vA,B"))]
226 ""
227 "v_mov_b32\t%0, %1"
228 [(set_attr "type" "vop1,vop1")
229 (set_attr "length" "4,8")])
230
231 (define_insn "mov<mode>_exec"
232 [(set (match_operand:V_1REG 0 "nonimmediate_operand" "=v, v, v, v, v, m")
233 (vec_merge:V_1REG
234 (match_operand:V_1REG 1 "general_operand" "vA, B, v,vA, m, v")
235 (match_operand:V_1REG 2 "gcn_alu_or_unspec_operand"
236 "U0,U0,vA,vA,U0,U0")
237 (match_operand:DI 3 "register_operand" " e, e,cV,Sv, e, e")))
238 (clobber (match_scratch:<VnDI> 4 "=X, X, X, X,&v,&v"))]
239 "!MEM_P (operands[0]) || REG_P (operands[1])"
240 "@
241 v_mov_b32\t%0, %1
242 v_mov_b32\t%0, %1
243 v_cndmask_b32\t%0, %2, %1, vcc
244 v_cndmask_b32\t%0, %2, %1, %3
245 #
246 #"
247 [(set_attr "type" "vop1,vop1,vop2,vop3a,*,*")
248 (set_attr "length" "4,8,4,8,16,16")])
249
250 ; This variant does not accept an unspec, but does permit MEM
251 ; read/modify/write which is necessary for maskstore.
252
253 ;(define_insn "*mov<mode>_exec_match"
254 ; [(set (match_operand:V_1REG 0 "nonimmediate_operand" "=v,v, v, m")
255 ; (vec_merge:V_1REG
256 ; (match_operand:V_1REG 1 "general_operand" "vA,B, m, v")
257 ; (match_dup 0)
258 ; (match_operand:DI 2 "gcn_exec_reg_operand" " e,e, e, e")))
259 ; (clobber (match_scratch:<VnDI> 3 "=X,X,&v,&v"))]
260 ; "!MEM_P (operands[0]) || REG_P (operands[1])"
261 ; "@
262 ; v_mov_b32\t%0, %1
263 ; v_mov_b32\t%0, %1
264 ; #
265 ; #"
266 ; [(set_attr "type" "vop1,vop1,*,*")
267 ; (set_attr "length" "4,8,16,16")])
268
269 (define_insn "*mov<mode>"
270 [(set (match_operand:V_2REG 0 "nonimmediate_operand" "=v")
271 (match_operand:V_2REG 1 "general_operand" "vDB"))]
272 ""
273 {
274 if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1]))
275 return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1";
276 else
277 return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1";
278 }
279 [(set_attr "type" "vmult")
280 (set_attr "length" "16")])
281
282 (define_insn "mov<mode>_exec"
283 [(set (match_operand:V_2REG 0 "nonimmediate_operand" "= v, v, v, v, m")
284 (vec_merge:V_2REG
285 (match_operand:V_2REG 1 "general_operand" "vDB, v0, v0, m, v")
286 (match_operand:V_2REG 2 "gcn_alu_or_unspec_operand"
287 " U0,vDA0,vDA0,U0,U0")
288 (match_operand:DI 3 "register_operand" " e, cV, Sv, e, e")))
289 (clobber (match_scratch:<VnDI> 4 "= X, X, X,&v,&v"))]
290 "!MEM_P (operands[0]) || REG_P (operands[1])"
291 {
292 if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1]))
293 switch (which_alternative)
294 {
295 case 0:
296 return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1";
297 case 1:
298 return "v_cndmask_b32\t%L0, %L2, %L1, vcc\;"
299 "v_cndmask_b32\t%H0, %H2, %H1, vcc";
300 case 2:
301 return "v_cndmask_b32\t%L0, %L2, %L1, %3\;"
302 "v_cndmask_b32\t%H0, %H2, %H1, %3";
303 }
304 else
305 switch (which_alternative)
306 {
307 case 0:
308 return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1";
309 case 1:
310 return "v_cndmask_b32\t%H0, %H2, %H1, vcc\;"
311 "v_cndmask_b32\t%L0, %L2, %L1, vcc";
312 case 2:
313 return "v_cndmask_b32\t%H0, %H2, %H1, %3\;"
314 "v_cndmask_b32\t%L0, %L2, %L1, %3";
315 }
316
317 return "#";
318 }
319 [(set_attr "type" "vmult,vmult,vmult,*,*")
320 (set_attr "length" "16,16,16,16,16")])
321
322 ; This variant does not accept an unspec, but does permit MEM
323 ; read/modify/write which is necessary for maskstore.
324
325 ;(define_insn "*mov<mode>_exec_match"
326 ; [(set (match_operand:V_2REG 0 "nonimmediate_operand" "=v, v, m")
327 ; (vec_merge:V_2REG
328 ; (match_operand:V_2REG 1 "general_operand" "vDB, m, v")
329 ; (match_dup 0)
330 ; (match_operand:DI 2 "gcn_exec_reg_operand" " e, e, e")))
331 ; (clobber (match_scratch:<VnDI> 3 "=X,&v,&v"))]
332 ; "!MEM_P (operands[0]) || REG_P (operands[1])"
333 ; "@
334 ; * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \
335 ; return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \
336 ; else \
337 ; return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\";
338 ; #
339 ; #"
340 ; [(set_attr "type" "vmult,*,*")
341 ; (set_attr "length" "16,16,16")])
342
343 ; A SGPR-base load looks like:
344 ; <load> v, Sv
345 ;
346 ; There's no hardware instruction that corresponds to this, but vector base
347 ; addresses are placed in an SGPR because it is easier to add to a vector.
348 ; We also have a temporary vT, and the vector v1 holding numbered lanes.
349 ;
350 ; Rewrite as:
351 ; vT = v1 << log2(element-size)
352 ; vT += Sv
353 ; flat_load v, vT
354
355 (define_insn "mov<mode>_sgprbase"
356 [(set (match_operand:V_1REG 0 "nonimmediate_operand" "= v, v, v, m")
357 (unspec:V_1REG
358 [(match_operand:V_1REG 1 "general_operand" " vA,vB, m, v")]
359 UNSPEC_SGPRBASE))
360 (clobber (match_operand:<VnDI> 2 "register_operand" "=&v,&v,&v,&v"))]
361 "lra_in_progress || reload_completed"
362 "@
363 v_mov_b32\t%0, %1
364 v_mov_b32\t%0, %1
365 #
366 #"
367 [(set_attr "type" "vop1,vop1,*,*")
368 (set_attr "length" "4,8,12,12")])
369
370 (define_insn "mov<mode>_sgprbase"
371 [(set (match_operand:V_2REG 0 "nonimmediate_operand" "= v, v, m")
372 (unspec:V_2REG
373 [(match_operand:V_2REG 1 "general_operand" "vDB, m, v")]
374 UNSPEC_SGPRBASE))
375 (clobber (match_operand:<VnDI> 2 "register_operand" "=&v,&v,&v"))]
376 "lra_in_progress || reload_completed"
377 "@
378 * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \
379 return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \
380 else \
381 return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\";
382 #
383 #"
384 [(set_attr "type" "vmult,*,*")
385 (set_attr "length" "8,12,12")])
386
387 ; reload_in was once a standard name, but here it's only referenced by
388 ; gcn_secondary_reload. It allows a reload with a scratch register.
389
390 (define_expand "reload_in<mode>"
391 [(set (match_operand:V_ALL 0 "register_operand" "= v")
392 (match_operand:V_ALL 1 "memory_operand" " m"))
393 (clobber (match_operand:<VnDI> 2 "register_operand" "=&v"))]
394 ""
395 {
396 emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], operands[2]));
397 DONE;
398 })
399
400 ; reload_out is similar to reload_in, above.
401
402 (define_expand "reload_out<mode>"
403 [(set (match_operand:V_ALL 0 "memory_operand" "= m")
404 (match_operand:V_ALL 1 "register_operand" " v"))
405 (clobber (match_operand:<VnDI> 2 "register_operand" "=&v"))]
406 ""
407 {
408 emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], operands[2]));
409 DONE;
410 })
411
412 ; Expand scalar addresses into gather/scatter patterns
413
414 (define_split
415 [(set (match_operand:V_ALL 0 "memory_operand")
416 (unspec:V_ALL
417 [(match_operand:V_ALL 1 "general_operand")]
418 UNSPEC_SGPRBASE))
419 (clobber (match_scratch:<VnDI> 2))]
420 ""
421 [(set (mem:BLK (scratch))
422 (unspec:BLK [(match_dup 5) (match_dup 1) (match_dup 6) (match_dup 7)]
423 UNSPEC_SCATTER))]
424 {
425 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
426 operands[0],
427 operands[2]);
428 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
429 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
430 })
431
432 (define_split
433 [(set (match_operand:V_ALL 0 "memory_operand")
434 (vec_merge:V_ALL
435 (match_operand:V_ALL 1 "general_operand")
436 (match_operand:V_ALL 2 "")
437 (match_operand:DI 3 "gcn_exec_reg_operand")))
438 (clobber (match_scratch:<VnDI> 4))]
439 ""
440 [(set (mem:BLK (scratch))
441 (unspec:BLK [(match_dup 5) (match_dup 1)
442 (match_dup 6) (match_dup 7) (match_dup 3)]
443 UNSPEC_SCATTER))]
444 {
445 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode,
446 operands[3],
447 operands[0],
448 operands[4]);
449 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
450 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
451 })
452
453 (define_split
454 [(set (match_operand:V_ALL 0 "nonimmediate_operand")
455 (unspec:V_ALL
456 [(match_operand:V_ALL 1 "memory_operand")]
457 UNSPEC_SGPRBASE))
458 (clobber (match_scratch:<VnDI> 2))]
459 ""
460 [(set (match_dup 0)
461 (unspec:V_ALL [(match_dup 5) (match_dup 6) (match_dup 7)
462 (mem:BLK (scratch))]
463 UNSPEC_GATHER))]
464 {
465 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
466 operands[1],
467 operands[2]);
468 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
469 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
470 })
471
472 (define_split
473 [(set (match_operand:V_ALL 0 "nonimmediate_operand")
474 (vec_merge:V_ALL
475 (match_operand:V_ALL 1 "memory_operand")
476 (match_operand:V_ALL 2 "")
477 (match_operand:DI 3 "gcn_exec_reg_operand")))
478 (clobber (match_scratch:<VnDI> 4))]
479 ""
480 [(set (match_dup 0)
481 (vec_merge:V_ALL
482 (unspec:V_ALL [(match_dup 5) (match_dup 6) (match_dup 7)
483 (mem:BLK (scratch))]
484 UNSPEC_GATHER)
485 (match_dup 2)
486 (match_dup 3)))]
487 {
488 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode,
489 operands[3],
490 operands[1],
491 operands[4]);
492 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
493 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
494 })
495
496 ; TODO: Add zero/sign extending variants.
497
498 ;; }}}
499 ;; {{{ Lane moves
500
501 ; v_writelane and v_readlane work regardless of exec flags.
502 ; We allow source to be scratch.
503 ;
504 ; FIXME these should take A immediates
505
506 (define_insn "*vec_set<mode>"
507 [(set (match_operand:V_1REG 0 "register_operand" "= v")
508 (vec_merge:V_1REG
509 (vec_duplicate:V_1REG
510 (match_operand:<SCALAR_MODE> 1 "register_operand" " Sv"))
511 (match_operand:V_1REG 3 "gcn_register_or_unspec_operand" " U0")
512 (ashift (const_int 1)
513 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
514 ""
515 "v_writelane_b32 %0, %1, %2"
516 [(set_attr "type" "vop3a")
517 (set_attr "length" "8")
518 (set_attr "exec" "none")
519 (set_attr "laneselect" "yes")])
520
521 ; FIXME: 64bit operations really should be splitters, but I am not sure how
522 ; to represent vertical subregs.
523 (define_insn "*vec_set<mode>"
524 [(set (match_operand:V_2REG 0 "register_operand" "= v")
525 (vec_merge:V_2REG
526 (vec_duplicate:V_2REG
527 (match_operand:<SCALAR_MODE> 1 "register_operand" " Sv"))
528 (match_operand:V_2REG 3 "gcn_register_or_unspec_operand" " U0")
529 (ashift (const_int 1)
530 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
531 ""
532 "v_writelane_b32 %L0, %L1, %2\;v_writelane_b32 %H0, %H1, %2"
533 [(set_attr "type" "vmult")
534 (set_attr "length" "16")
535 (set_attr "exec" "none")
536 (set_attr "laneselect" "yes")])
537
538 (define_expand "vec_set<mode>"
539 [(set (match_operand:V_ALL 0 "register_operand")
540 (vec_merge:V_ALL
541 (vec_duplicate:V_ALL
542 (match_operand:<SCALAR_MODE> 1 "register_operand"))
543 (match_dup 0)
544 (ashift (const_int 1) (match_operand:SI 2 "gcn_alu_operand"))))]
545 "")
546
547 (define_insn "*vec_set<mode>_1"
548 [(set (match_operand:V_1REG 0 "register_operand" "=v")
549 (vec_merge:V_1REG
550 (vec_duplicate:V_1REG
551 (match_operand:<SCALAR_MODE> 1 "register_operand" "Sv"))
552 (match_operand:V_1REG 3 "gcn_register_or_unspec_operand" "U0")
553 (match_operand:SI 2 "const_int_operand" " i")))]
554 "((unsigned) exact_log2 (INTVAL (operands[2])) < GET_MODE_NUNITS (<MODE>mode))"
555 {
556 operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));
557 return "v_writelane_b32 %0, %1, %2";
558 }
559 [(set_attr "type" "vop3a")
560 (set_attr "length" "8")
561 (set_attr "exec" "none")
562 (set_attr "laneselect" "yes")])
563
564 (define_insn "*vec_set<mode>_1"
565 [(set (match_operand:V_2REG 0 "register_operand" "=v")
566 (vec_merge:V_2REG
567 (vec_duplicate:V_2REG
568 (match_operand:<SCALAR_MODE> 1 "register_operand" "Sv"))
569 (match_operand:V_2REG 3 "gcn_register_or_unspec_operand" "U0")
570 (match_operand:SI 2 "const_int_operand" " i")))]
571 "((unsigned) exact_log2 (INTVAL (operands[2])) < GET_MODE_NUNITS (<MODE>mode))"
572 {
573 operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));
574 return "v_writelane_b32 %L0, %L1, %2\;v_writelane_b32 %H0, %H1, %2";
575 }
576 [(set_attr "type" "vmult")
577 (set_attr "length" "16")
578 (set_attr "exec" "none")
579 (set_attr "laneselect" "yes")])
580
581 (define_insn "vec_duplicate<mode><exec>"
582 [(set (match_operand:V_1REG 0 "register_operand" "=v")
583 (vec_duplicate:V_1REG
584 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand" "SvB")))]
585 ""
586 "v_mov_b32\t%0, %1"
587 [(set_attr "type" "vop3a")
588 (set_attr "length" "8")])
589
590 (define_insn "vec_duplicate<mode><exec>"
591 [(set (match_operand:V_2REG 0 "register_operand" "= v")
592 (vec_duplicate:V_2REG
593 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand" "SvDB")))]
594 ""
595 "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1"
596 [(set_attr "type" "vop3a")
597 (set_attr "length" "16")])
598
599 (define_insn "vec_extract<mode><scalar_mode>"
600 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg")
601 (vec_select:<SCALAR_MODE>
602 (match_operand:V_1REG 1 "register_operand" " v")
603 (parallel [(match_operand:SI 2 "gcn_alu_operand" "SvB")])))]
604 ""
605 "v_readlane_b32 %0, %1, %2"
606 [(set_attr "type" "vop3a")
607 (set_attr "length" "8")
608 (set_attr "exec" "none")
609 (set_attr "laneselect" "yes")])
610
611 (define_insn "vec_extract<mode><scalar_mode>"
612 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=&Sg")
613 (vec_select:<SCALAR_MODE>
614 (match_operand:V_2REG 1 "register_operand" " v")
615 (parallel [(match_operand:SI 2 "gcn_alu_operand" " SvB")])))]
616 ""
617 "v_readlane_b32 %L0, %L1, %2\;v_readlane_b32 %H0, %H1, %2"
618 [(set_attr "type" "vmult")
619 (set_attr "length" "16")
620 (set_attr "exec" "none")
621 (set_attr "laneselect" "yes")])
622
623 (define_expand "extract_last_<mode>"
624 [(match_operand:<SCALAR_MODE> 0 "register_operand")
625 (match_operand:DI 1 "gcn_alu_operand")
626 (match_operand:V_ALL 2 "register_operand")]
627 "can_create_pseudo_p ()"
628 {
629 rtx dst = operands[0];
630 rtx mask = operands[1];
631 rtx vect = operands[2];
632 rtx tmpreg = gen_reg_rtx (SImode);
633
634 emit_insn (gen_clzdi2 (tmpreg, mask));
635 emit_insn (gen_subsi3 (tmpreg, GEN_INT (63), tmpreg));
636 emit_insn (gen_vec_extract<mode><scalar_mode> (dst, vect, tmpreg));
637 DONE;
638 })
639
640 (define_expand "fold_extract_last_<mode>"
641 [(match_operand:<SCALAR_MODE> 0 "register_operand")
642 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand")
643 (match_operand:DI 2 "gcn_alu_operand")
644 (match_operand:V_ALL 3 "register_operand")]
645 "can_create_pseudo_p ()"
646 {
647 rtx dst = operands[0];
648 rtx default_value = operands[1];
649 rtx mask = operands[2];
650 rtx vect = operands[3];
651 rtx else_label = gen_label_rtx ();
652 rtx end_label = gen_label_rtx ();
653
654 rtx cond = gen_rtx_EQ (VOIDmode, mask, const0_rtx);
655 emit_jump_insn (gen_cbranchdi4 (cond, mask, const0_rtx, else_label));
656 emit_insn (gen_extract_last_<mode> (dst, mask, vect));
657 emit_jump_insn (gen_jump (end_label));
658 emit_barrier ();
659 emit_label (else_label);
660 emit_move_insn (dst, default_value);
661 emit_label (end_label);
662 DONE;
663 })
664
665 (define_expand "vec_init<mode><scalar_mode>"
666 [(match_operand:V_ALL 0 "register_operand")
667 (match_operand 1)]
668 ""
669 {
670 gcn_expand_vector_init (operands[0], operands[1]);
671 DONE;
672 })
673
674 ;; }}}
675 ;; {{{ Scatter / Gather
676
677 ;; GCN does not have an instruction for loading a vector from contiguous
678 ;; memory so *all* loads and stores are eventually converted to scatter
679 ;; or gather.
680 ;;
681 ;; GCC does not permit MEM to hold vectors of addresses, so we must use an
682 ;; unspec. The unspec formats are as follows:
683 ;;
684 ;; (unspec:V??
685 ;; [(<address expression>)
686 ;; (<addr_space_t>)
687 ;; (<use_glc>)
688 ;; (mem:BLK (scratch))]
689 ;; UNSPEC_GATHER)
690 ;;
691 ;; (unspec:BLK
692 ;; [(<address expression>)
693 ;; (<source register>)
694 ;; (<addr_space_t>)
695 ;; (<use_glc>)
696 ;; (<exec>)]
697 ;; UNSPEC_SCATTER)
698 ;;
699 ;; - Loads are expected to be wrapped in a vec_merge, so do not need <exec>.
700 ;; - The mem:BLK does not contain any real information, but indicates that an
701 ;; unknown memory read is taking place. Stores are expected to use a similar
702 ;; mem:BLK outside the unspec.
703 ;; - The address space and glc (volatile) fields are there to replace the
704 ;; fields normally found in a MEM.
705 ;; - Multiple forms of address expression are supported, below.
706
707 (define_expand "gather_load<mode><vnsi>"
708 [(match_operand:V_ALL 0 "register_operand")
709 (match_operand:DI 1 "register_operand")
710 (match_operand:<VnSI> 2 "register_operand")
711 (match_operand 3 "immediate_operand")
712 (match_operand:SI 4 "gcn_alu_operand")]
713 ""
714 {
715 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1],
716 operands[2], operands[4],
717 INTVAL (operands[3]), NULL);
718
719 if (GET_MODE (addr) == <VnDI>mode)
720 emit_insn (gen_gather<mode>_insn_1offset (operands[0], addr, const0_rtx,
721 const0_rtx, const0_rtx));
722 else
723 emit_insn (gen_gather<mode>_insn_2offsets (operands[0], operands[1],
724 addr, const0_rtx, const0_rtx,
725 const0_rtx));
726 DONE;
727 })
728
729 ; Allow any address expression
730 (define_expand "gather<mode>_expr<exec>"
731 [(set (match_operand:V_ALL 0 "register_operand")
732 (unspec:V_ALL
733 [(match_operand 1 "")
734 (match_operand 2 "immediate_operand")
735 (match_operand 3 "immediate_operand")
736 (mem:BLK (scratch))]
737 UNSPEC_GATHER))]
738 ""
739 {})
740
741 (define_insn "gather<mode>_insn_1offset<exec>"
742 [(set (match_operand:V_ALL 0 "register_operand" "=v")
743 (unspec:V_ALL
744 [(plus:<VnDI> (match_operand:<VnDI> 1 "register_operand" " v")
745 (vec_duplicate:<VnDI>
746 (match_operand 2 "immediate_operand" " n")))
747 (match_operand 3 "immediate_operand" " n")
748 (match_operand 4 "immediate_operand" " n")
749 (mem:BLK (scratch))]
750 UNSPEC_GATHER))]
751 "(AS_FLAT_P (INTVAL (operands[3]))
752 && ((TARGET_GCN3 && INTVAL(operands[2]) == 0)
753 || ((unsigned HOST_WIDE_INT)INTVAL(operands[2]) < 0x1000)))
754 || (AS_GLOBAL_P (INTVAL (operands[3]))
755 && (((unsigned HOST_WIDE_INT)INTVAL(operands[2]) + 0x1000) < 0x2000))"
756 {
757 addr_space_t as = INTVAL (operands[3]);
758 const char *glc = INTVAL (operands[4]) ? " glc" : "";
759
760 static char buf[200];
761 if (AS_FLAT_P (as))
762 {
763 if (TARGET_GCN5_PLUS)
764 sprintf (buf, "flat_load%%o0\t%%0, %%1 offset:%%2%s\;s_waitcnt\t0",
765 glc);
766 else
767 sprintf (buf, "flat_load%%o0\t%%0, %%1%s\;s_waitcnt\t0", glc);
768 }
769 else if (AS_GLOBAL_P (as))
770 sprintf (buf, "global_load%%o0\t%%0, %%1, off offset:%%2%s\;"
771 "s_waitcnt\tvmcnt(0)", glc);
772 else
773 gcc_unreachable ();
774
775 return buf;
776 }
777 [(set_attr "type" "flat")
778 (set_attr "length" "12")])
779
780 (define_insn "gather<mode>_insn_1offset_ds<exec>"
781 [(set (match_operand:V_ALL 0 "register_operand" "=v")
782 (unspec:V_ALL
783 [(plus:<VnSI> (match_operand:<VnSI> 1 "register_operand" " v")
784 (vec_duplicate:<VnSI>
785 (match_operand 2 "immediate_operand" " n")))
786 (match_operand 3 "immediate_operand" " n")
787 (match_operand 4 "immediate_operand" " n")
788 (mem:BLK (scratch))]
789 UNSPEC_GATHER))]
790 "(AS_ANY_DS_P (INTVAL (operands[3]))
791 && ((unsigned HOST_WIDE_INT)INTVAL(operands[2]) < 0x10000))"
792 {
793 addr_space_t as = INTVAL (operands[3]);
794 static char buf[200];
795 sprintf (buf, "ds_read%%b0\t%%0, %%1 offset:%%2%s\;s_waitcnt\tlgkmcnt(0)",
796 (AS_GDS_P (as) ? " gds" : ""));
797 return buf;
798 }
799 [(set_attr "type" "ds")
800 (set_attr "length" "12")])
801
802 (define_insn "gather<mode>_insn_2offsets<exec>"
803 [(set (match_operand:V_ALL 0 "register_operand" "=v")
804 (unspec:V_ALL
805 [(plus:<VnDI>
806 (plus:<VnDI>
807 (vec_duplicate:<VnDI>
808 (match_operand:DI 1 "register_operand" "Sv"))
809 (sign_extend:<VnDI>
810 (match_operand:<VnSI> 2 "register_operand" " v")))
811 (vec_duplicate:<VnDI> (match_operand 3 "immediate_operand" " n")))
812 (match_operand 4 "immediate_operand" " n")
813 (match_operand 5 "immediate_operand" " n")
814 (mem:BLK (scratch))]
815 UNSPEC_GATHER))]
816 "(AS_GLOBAL_P (INTVAL (operands[4]))
817 && (((unsigned HOST_WIDE_INT)INTVAL(operands[3]) + 0x1000) < 0x2000))"
818 {
819 addr_space_t as = INTVAL (operands[4]);
820 const char *glc = INTVAL (operands[5]) ? " glc" : "";
821
822 static char buf[200];
823 if (AS_GLOBAL_P (as))
824 {
825 /* Work around assembler bug in which a 64-bit register is expected,
826 but a 32-bit value would be correct. */
827 int reg = REGNO (operands[2]) - FIRST_VGPR_REG;
828 sprintf (buf, "global_load%%o0\t%%0, v[%d:%d], %%1 offset:%%3%s\;"
829 "s_waitcnt\tvmcnt(0)", reg, reg + 1, glc);
830 }
831 else
832 gcc_unreachable ();
833
834 return buf;
835 }
836 [(set_attr "type" "flat")
837 (set_attr "length" "12")])
838
839 (define_expand "scatter_store<mode><vnsi>"
840 [(match_operand:DI 0 "register_operand")
841 (match_operand:<VnSI> 1 "register_operand")
842 (match_operand 2 "immediate_operand")
843 (match_operand:SI 3 "gcn_alu_operand")
844 (match_operand:V_ALL 4 "register_operand")]
845 ""
846 {
847 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0],
848 operands[1], operands[3],
849 INTVAL (operands[2]), NULL);
850
851 if (GET_MODE (addr) == <VnDI>mode)
852 emit_insn (gen_scatter<mode>_insn_1offset (addr, const0_rtx, operands[4],
853 const0_rtx, const0_rtx));
854 else
855 emit_insn (gen_scatter<mode>_insn_2offsets (operands[0], addr,
856 const0_rtx, operands[4],
857 const0_rtx, const0_rtx));
858 DONE;
859 })
860
861 ; Allow any address expression
862 (define_expand "scatter<mode>_expr<exec_scatter>"
863 [(set (mem:BLK (scratch))
864 (unspec:BLK
865 [(match_operand:<VnDI> 0 "")
866 (match_operand:V_ALL 1 "register_operand")
867 (match_operand 2 "immediate_operand")
868 (match_operand 3 "immediate_operand")]
869 UNSPEC_SCATTER))]
870 ""
871 {})
872
873 (define_insn "scatter<mode>_insn_1offset<exec_scatter>"
874 [(set (mem:BLK (scratch))
875 (unspec:BLK
876 [(plus:<VnDI> (match_operand:<VnDI> 0 "register_operand" "v")
877 (vec_duplicate:<VnDI>
878 (match_operand 1 "immediate_operand" "n")))
879 (match_operand:V_ALL 2 "register_operand" "v")
880 (match_operand 3 "immediate_operand" "n")
881 (match_operand 4 "immediate_operand" "n")]
882 UNSPEC_SCATTER))]
883 "(AS_FLAT_P (INTVAL (operands[3]))
884 && (INTVAL(operands[1]) == 0
885 || (TARGET_GCN5_PLUS
886 && (unsigned HOST_WIDE_INT)INTVAL(operands[1]) < 0x1000)))
887 || (AS_GLOBAL_P (INTVAL (operands[3]))
888 && (((unsigned HOST_WIDE_INT)INTVAL(operands[1]) + 0x1000) < 0x2000))"
889 {
890 addr_space_t as = INTVAL (operands[3]);
891 const char *glc = INTVAL (operands[4]) ? " glc" : "";
892
893 static char buf[200];
894 if (AS_FLAT_P (as))
895 {
896 if (TARGET_GCN5_PLUS)
897 sprintf (buf, "flat_store%%s2\t%%0, %%2 offset:%%1%s", glc);
898 else
899 sprintf (buf, "flat_store%%s2\t%%0, %%2%s", glc);
900 }
901 else if (AS_GLOBAL_P (as))
902 sprintf (buf, "global_store%%s2\t%%0, %%2, off offset:%%1%s", glc);
903 else
904 gcc_unreachable ();
905
906 return buf;
907 }
908 [(set_attr "type" "flat")
909 (set_attr "length" "12")])
910
911 (define_insn "scatter<mode>_insn_1offset_ds<exec_scatter>"
912 [(set (mem:BLK (scratch))
913 (unspec:BLK
914 [(plus:<VnSI> (match_operand:<VnSI> 0 "register_operand" "v")
915 (vec_duplicate:<VnSI>
916 (match_operand 1 "immediate_operand" "n")))
917 (match_operand:V_ALL 2 "register_operand" "v")
918 (match_operand 3 "immediate_operand" "n")
919 (match_operand 4 "immediate_operand" "n")]
920 UNSPEC_SCATTER))]
921 "(AS_ANY_DS_P (INTVAL (operands[3]))
922 && ((unsigned HOST_WIDE_INT)INTVAL(operands[1]) < 0x10000))"
923 {
924 addr_space_t as = INTVAL (operands[3]);
925 static char buf[200];
926 sprintf (buf, "ds_write%%b2\t%%0, %%2 offset:%%1%s",
927 (AS_GDS_P (as) ? " gds" : ""));
928 return buf;
929 }
930 [(set_attr "type" "ds")
931 (set_attr "length" "12")])
932
933 (define_insn "scatter<mode>_insn_2offsets<exec_scatter>"
934 [(set (mem:BLK (scratch))
935 (unspec:BLK
936 [(plus:<VnDI>
937 (plus:<VnDI>
938 (vec_duplicate:<VnDI>
939 (match_operand:DI 0 "register_operand" "Sv"))
940 (sign_extend:<VnDI>
941 (match_operand:<VnSI> 1 "register_operand" " v")))
942 (vec_duplicate:<VnDI> (match_operand 2 "immediate_operand" " n")))
943 (match_operand:V_ALL 3 "register_operand" " v")
944 (match_operand 4 "immediate_operand" " n")
945 (match_operand 5 "immediate_operand" " n")]
946 UNSPEC_SCATTER))]
947 "(AS_GLOBAL_P (INTVAL (operands[4]))
948 && (((unsigned HOST_WIDE_INT)INTVAL(operands[2]) + 0x1000) < 0x2000))"
949 {
950 addr_space_t as = INTVAL (operands[4]);
951 const char *glc = INTVAL (operands[5]) ? " glc" : "";
952
953 static char buf[200];
954 if (AS_GLOBAL_P (as))
955 {
956 /* Work around assembler bug in which a 64-bit register is expected,
957 but a 32-bit value would be correct. */
958 int reg = REGNO (operands[1]) - FIRST_VGPR_REG;
959 sprintf (buf, "global_store%%s3\tv[%d:%d], %%3, %%0 offset:%%2%s",
960 reg, reg + 1, glc);
961 }
962 else
963 gcc_unreachable ();
964
965 return buf;
966 }
967 [(set_attr "type" "flat")
968 (set_attr "length" "12")])
969
970 ;; }}}
971 ;; {{{ Permutations
972
973 (define_insn "ds_bpermute<mode>"
974 [(set (match_operand:V_1REG 0 "register_operand" "=v")
975 (unspec:V_1REG
976 [(match_operand:V_1REG 2 "register_operand" " v")
977 (match_operand:<VnSI> 1 "register_operand" " v")
978 (match_operand:DI 3 "gcn_exec_reg_operand" " e")]
979 UNSPEC_BPERMUTE))]
980 ""
981 "ds_bpermute_b32\t%0, %1, %2\;s_waitcnt\tlgkmcnt(0)"
982 [(set_attr "type" "vop2")
983 (set_attr "length" "12")])
984
985 (define_insn_and_split "ds_bpermute<mode>"
986 [(set (match_operand:V_2REG 0 "register_operand" "=&v")
987 (unspec:V_2REG
988 [(match_operand:V_2REG 2 "register_operand" " v0")
989 (match_operand:<VnSI> 1 "register_operand" " v")
990 (match_operand:DI 3 "gcn_exec_reg_operand" " e")]
991 UNSPEC_BPERMUTE))]
992 ""
993 "#"
994 "reload_completed"
995 [(set (match_dup 4) (unspec:<VnSI>
996 [(match_dup 6) (match_dup 1) (match_dup 3)]
997 UNSPEC_BPERMUTE))
998 (set (match_dup 5) (unspec:<VnSI>
999 [(match_dup 7) (match_dup 1) (match_dup 3)]
1000 UNSPEC_BPERMUTE))]
1001 {
1002 operands[4] = gcn_operand_part (<MODE>mode, operands[0], 0);
1003 operands[5] = gcn_operand_part (<MODE>mode, operands[0], 1);
1004 operands[6] = gcn_operand_part (<MODE>mode, operands[2], 0);
1005 operands[7] = gcn_operand_part (<MODE>mode, operands[2], 1);
1006 }
1007 [(set_attr "type" "vmult")
1008 (set_attr "length" "24")])
1009
1010 (define_insn "@dpp_move<mode>"
1011 [(set (match_operand:V_noHI 0 "register_operand" "=v")
1012 (unspec:V_noHI
1013 [(match_operand:V_noHI 1 "register_operand" " v")
1014 (match_operand:SI 2 "const_int_operand" " n")]
1015 UNSPEC_MOV_DPP_SHR))]
1016 ""
1017 {
1018 return gcn_expand_dpp_shr_insn (<MODE>mode, "v_mov_b32",
1019 UNSPEC_MOV_DPP_SHR, INTVAL (operands[2]));
1020 }
1021 [(set_attr "type" "vop_dpp")
1022 (set_attr "length" "16")])
1023
1024 ;; }}}
1025 ;; {{{ ALU special case: add/sub
1026
1027 (define_insn "add<mode>3<exec_clobber>"
1028 [(set (match_operand:V_INT_1REG 0 "register_operand" "= v")
1029 (plus:V_INT_1REG
1030 (match_operand:V_INT_1REG 1 "register_operand" "% v")
1031 (match_operand:V_INT_1REG 2 "gcn_alu_operand" "vSvB")))
1032 (clobber (reg:DI VCC_REG))]
1033 ""
1034 "v_add%^_u32\t%0, vcc, %2, %1"
1035 [(set_attr "type" "vop2")
1036 (set_attr "length" "8")])
1037
1038 (define_insn "add<mode>3_dup<exec_clobber>"
1039 [(set (match_operand:V_INT_1REG 0 "register_operand" "= v")
1040 (plus:V_INT_1REG
1041 (vec_duplicate:V_INT_1REG
1042 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand" "SvB"))
1043 (match_operand:V_INT_1REG 1 "register_operand" " v")))
1044 (clobber (reg:DI VCC_REG))]
1045 ""
1046 "v_add%^_u32\t%0, vcc, %2, %1"
1047 [(set_attr "type" "vop2")
1048 (set_attr "length" "8")])
1049
1050 (define_insn "add<mode>3_vcc<exec_vcc>"
1051 [(set (match_operand:V_SI 0 "register_operand" "= v, v")
1052 (plus:V_SI
1053 (match_operand:V_SI 1 "register_operand" "% v, v")
1054 (match_operand:V_SI 2 "gcn_alu_operand" "vSvB,vSvB")))
1055 (set (match_operand:DI 3 "register_operand" "= cV, Sg")
1056 (ltu:DI (plus:V_SI (match_dup 1) (match_dup 2))
1057 (match_dup 1)))]
1058 ""
1059 "v_add%^_u32\t%0, %3, %2, %1"
1060 [(set_attr "type" "vop2,vop3b")
1061 (set_attr "length" "8")])
1062
1063 ; This pattern only changes the VCC bits when the corresponding lane is
1064 ; enabled, so the set must be described as an ior.
1065
1066 (define_insn "add<mode>3_vcc_dup<exec_vcc>"
1067 [(set (match_operand:V_SI 0 "register_operand" "= v, v")
1068 (plus:V_SI
1069 (vec_duplicate:V_SI
1070 (match_operand:SI 1 "gcn_alu_operand" "SvB,SvB"))
1071 (match_operand:V_SI 2 "register_operand" " v, v")))
1072 (set (match_operand:DI 3 "register_operand" "=cV, Sg")
1073 (ltu:DI (plus:V_SI (vec_duplicate:V_SI (match_dup 2))
1074 (match_dup 1))
1075 (vec_duplicate:V_SI (match_dup 2))))]
1076 ""
1077 "v_add%^_u32\t%0, %3, %2, %1"
1078 [(set_attr "type" "vop2,vop3b")
1079 (set_attr "length" "8,8")])
1080
1081 ; v_addc does not accept an SGPR because the VCC read already counts as an
1082 ; SGPR use and the number of SGPR operands is limited to 1. It does not
1083 ; accept "B" immediate constants due to a related bus conflict.
1084
1085 (define_insn "addc<mode>3<exec_vcc>"
1086 [(set (match_operand:V_SI 0 "register_operand" "=v, v")
1087 (plus:V_SI
1088 (plus:V_SI
1089 (vec_merge:V_SI
1090 (vec_duplicate:V_SI (const_int 1))
1091 (vec_duplicate:V_SI (const_int 0))
1092 (match_operand:DI 3 "register_operand" " cV,cVSv"))
1093 (match_operand:V_SI 1 "gcn_alu_operand" "% v, vA"))
1094 (match_operand:V_SI 2 "gcn_alu_operand" " vA, vA")))
1095 (set (match_operand:DI 4 "register_operand" "=cV,cVSg")
1096 (ior:DI (ltu:DI (plus:V_SI
1097 (plus:V_SI
1098 (vec_merge:V_SI
1099 (vec_duplicate:V_SI (const_int 1))
1100 (vec_duplicate:V_SI (const_int 0))
1101 (match_dup 3))
1102 (match_dup 1))
1103 (match_dup 2))
1104 (match_dup 2))
1105 (ltu:DI (plus:V_SI
1106 (vec_merge:V_SI
1107 (vec_duplicate:V_SI (const_int 1))
1108 (vec_duplicate:V_SI (const_int 0))
1109 (match_dup 3))
1110 (match_dup 1))
1111 (match_dup 1))))]
1112 ""
1113 "v_addc%^_u32\t%0, %4, %2, %1, %3"
1114 [(set_attr "type" "vop2,vop3b")
1115 (set_attr "length" "4,8")])
1116
1117 (define_insn "sub<mode>3<exec_clobber>"
1118 [(set (match_operand:V_INT_1REG 0 "register_operand" "= v, v")
1119 (minus:V_INT_1REG
1120 (match_operand:V_INT_1REG 1 "gcn_alu_operand" "vSvB, v")
1121 (match_operand:V_INT_1REG 2 "gcn_alu_operand" " v,vSvB")))
1122 (clobber (reg:DI VCC_REG))]
1123 ""
1124 "@
1125 v_sub%^_u32\t%0, vcc, %1, %2
1126 v_subrev%^_u32\t%0, vcc, %2, %1"
1127 [(set_attr "type" "vop2")
1128 (set_attr "length" "8,8")])
1129
1130 (define_insn "sub<mode>3_vcc<exec_vcc>"
1131 [(set (match_operand:V_SI 0 "register_operand" "= v, v, v, v")
1132 (minus:V_SI
1133 (match_operand:V_SI 1 "gcn_alu_operand" "vSvB,vSvB, v, v")
1134 (match_operand:V_SI 2 "gcn_alu_operand" " v, v,vSvB,vSvB")))
1135 (set (match_operand:DI 3 "register_operand" "= cV, Sg, cV, Sg")
1136 (gtu:DI (minus:V_SI (match_dup 1) (match_dup 2))
1137 (match_dup 1)))]
1138 ""
1139 "@
1140 v_sub%^_u32\t%0, %3, %1, %2
1141 v_sub%^_u32\t%0, %3, %1, %2
1142 v_subrev%^_u32\t%0, %3, %2, %1
1143 v_subrev%^_u32\t%0, %3, %2, %1"
1144 [(set_attr "type" "vop2,vop3b,vop2,vop3b")
1145 (set_attr "length" "8")])
1146
1147 ; v_subb does not accept an SGPR because the VCC read already counts as an
1148 ; SGPR use and the number of SGPR operands is limited to 1. It does not
1149 ; accept "B" immediate constants due to a related bus conflict.
1150
1151 (define_insn "subc<mode>3<exec_vcc>"
1152 [(set (match_operand:V_SI 0 "register_operand" "= v, v, v, v")
1153 (minus:V_SI
1154 (minus:V_SI
1155 (vec_merge:V_SI
1156 (vec_duplicate:V_SI (const_int 1))
1157 (vec_duplicate:V_SI (const_int 0))
1158 (match_operand:DI 3 "gcn_alu_operand" " cV,cVSv,cV,cVSv"))
1159 (match_operand:V_SI 1 "gcn_alu_operand" " vA, vA, v, vA"))
1160 (match_operand:V_SI 2 "gcn_alu_operand" " v, vA,vA, vA")))
1161 (set (match_operand:DI 4 "register_operand" "=cV,cVSg,cV,cVSg")
1162 (ior:DI (gtu:DI (minus:V_SI (minus:V_SI
1163 (vec_merge:V_SI
1164 (vec_duplicate:V_SI (const_int 1))
1165 (vec_duplicate:V_SI (const_int 0))
1166 (match_dup 3))
1167 (match_dup 1))
1168 (match_dup 2))
1169 (match_dup 2))
1170 (ltu:DI (minus:V_SI (vec_merge:V_SI
1171 (vec_duplicate:V_SI (const_int 1))
1172 (vec_duplicate:V_SI (const_int 0))
1173 (match_dup 3))
1174 (match_dup 1))
1175 (match_dup 1))))]
1176 ""
1177 "@
1178 v_subb%^_u32\t%0, %4, %1, %2, %3
1179 v_subb%^_u32\t%0, %4, %1, %2, %3
1180 v_subbrev%^_u32\t%0, %4, %2, %1, %3
1181 v_subbrev%^_u32\t%0, %4, %2, %1, %3"
1182 [(set_attr "type" "vop2,vop3b,vop2,vop3b")
1183 (set_attr "length" "4,8,4,8")])
1184
1185 (define_insn_and_split "add<mode>3"
1186 [(set (match_operand:V_DI 0 "register_operand" "= v")
1187 (plus:V_DI
1188 (match_operand:V_DI 1 "register_operand" "%vDb")
1189 (match_operand:V_DI 2 "gcn_alu_operand" " vDb")))
1190 (clobber (reg:DI VCC_REG))]
1191 ""
1192 "#"
1193 "gcn_can_split_p (<MODE>mode, operands[0])
1194 && gcn_can_split_p (<MODE>mode, operands[1])
1195 && gcn_can_split_p (<MODE>mode, operands[2])"
1196 [(const_int 0)]
1197 {
1198 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1199 emit_insn (gen_add<vnsi>3_vcc
1200 (gcn_operand_part (<MODE>mode, operands[0], 0),
1201 gcn_operand_part (<MODE>mode, operands[1], 0),
1202 gcn_operand_part (<MODE>mode, operands[2], 0),
1203 vcc));
1204 emit_insn (gen_addc<vnsi>3
1205 (gcn_operand_part (<MODE>mode, operands[0], 1),
1206 gcn_operand_part (<MODE>mode, operands[1], 1),
1207 gcn_operand_part (<MODE>mode, operands[2], 1),
1208 vcc, vcc));
1209 DONE;
1210 }
1211 [(set_attr "type" "vmult")
1212 (set_attr "length" "8")])
1213
1214 (define_insn_and_split "add<mode>3_exec"
1215 [(set (match_operand:V_DI 0 "register_operand" "= v")
1216 (vec_merge:V_DI
1217 (plus:V_DI
1218 (match_operand:V_DI 1 "register_operand" "%vDb")
1219 (match_operand:V_DI 2 "gcn_alu_operand" " vDb"))
1220 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0")
1221 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1222 (clobber (reg:DI VCC_REG))]
1223 ""
1224 "#"
1225 "gcn_can_split_p (<MODE>mode, operands[0])
1226 && gcn_can_split_p (<MODE>mode, operands[1])
1227 && gcn_can_split_p (<MODE>mode, operands[2])
1228 && gcn_can_split_p (<MODE>mode, operands[4])"
1229 [(const_int 0)]
1230 {
1231 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1232 emit_insn (gen_add<vnsi>3_vcc_exec
1233 (gcn_operand_part (<MODE>mode, operands[0], 0),
1234 gcn_operand_part (<MODE>mode, operands[1], 0),
1235 gcn_operand_part (<MODE>mode, operands[2], 0),
1236 vcc,
1237 gcn_operand_part (<MODE>mode, operands[3], 0),
1238 operands[4]));
1239 emit_insn (gen_addc<vnsi>3_exec
1240 (gcn_operand_part (<MODE>mode, operands[0], 1),
1241 gcn_operand_part (<MODE>mode, operands[1], 1),
1242 gcn_operand_part (<MODE>mode, operands[2], 1),
1243 vcc, vcc,
1244 gcn_operand_part (<MODE>mode, operands[3], 1),
1245 operands[4]));
1246 DONE;
1247 }
1248 [(set_attr "type" "vmult")
1249 (set_attr "length" "8")])
1250
1251 (define_insn_and_split "sub<mode>3"
1252 [(set (match_operand:V_DI 0 "register_operand" "= v, v")
1253 (minus:V_DI
1254 (match_operand:V_DI 1 "gcn_alu_operand" "vDb, v")
1255 (match_operand:V_DI 2 "gcn_alu_operand" " v,vDb")))
1256 (clobber (reg:DI VCC_REG))]
1257 ""
1258 "#"
1259 "gcn_can_split_p (<MODE>mode, operands[0])
1260 && gcn_can_split_p (<MODE>mode, operands[1])
1261 && gcn_can_split_p (<MODE>mode, operands[2])"
1262 [(const_int 0)]
1263 {
1264 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1265 emit_insn (gen_sub<vnsi>3_vcc
1266 (gcn_operand_part (<MODE>mode, operands[0], 0),
1267 gcn_operand_part (<MODE>mode, operands[1], 0),
1268 gcn_operand_part (<MODE>mode, operands[2], 0),
1269 vcc));
1270 emit_insn (gen_subc<vnsi>3
1271 (gcn_operand_part (<MODE>mode, operands[0], 1),
1272 gcn_operand_part (<MODE>mode, operands[1], 1),
1273 gcn_operand_part (<MODE>mode, operands[2], 1),
1274 vcc, vcc));
1275 DONE;
1276 }
1277 [(set_attr "type" "vmult")
1278 (set_attr "length" "8")])
1279
1280 (define_insn_and_split "sub<mode>3_exec"
1281 [(set (match_operand:V_DI 0 "register_operand" "= v, v")
1282 (vec_merge:V_DI
1283 (minus:V_DI
1284 (match_operand:V_DI 1 "gcn_alu_operand" "vSvB, v")
1285 (match_operand:V_DI 2 "gcn_alu_operand" " v,vSvB"))
1286 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0, U0")
1287 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))
1288 (clobber (reg:DI VCC_REG))]
1289 "register_operand (operands[1], VOIDmode)
1290 || register_operand (operands[2], VOIDmode)"
1291 "#"
1292 "gcn_can_split_p (<MODE>mode, operands[0])
1293 && gcn_can_split_p (<MODE>mode, operands[1])
1294 && gcn_can_split_p (<MODE>mode, operands[2])
1295 && gcn_can_split_p (<MODE>mode, operands[3])"
1296 [(const_int 0)]
1297 {
1298 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1299 emit_insn (gen_sub<vnsi>3_vcc_exec
1300 (gcn_operand_part (<MODE>mode, operands[0], 0),
1301 gcn_operand_part (<MODE>mode, operands[1], 0),
1302 gcn_operand_part (<MODE>mode, operands[2], 0),
1303 vcc,
1304 gcn_operand_part (<MODE>mode, operands[3], 0),
1305 operands[4]));
1306 emit_insn (gen_subc<vnsi>3_exec
1307 (gcn_operand_part (<MODE>mode, operands[0], 1),
1308 gcn_operand_part (<MODE>mode, operands[1], 1),
1309 gcn_operand_part (<MODE>mode, operands[2], 1),
1310 vcc, vcc,
1311 gcn_operand_part (<MODE>mode, operands[3], 1),
1312 operands[4]));
1313 DONE;
1314 }
1315 [(set_attr "type" "vmult")
1316 (set_attr "length" "8")])
1317
1318 (define_insn_and_split "add<mode>3_zext"
1319 [(set (match_operand:V_DI 0 "register_operand" "= v, v")
1320 (plus:V_DI
1321 (zero_extend:V_DI
1322 (match_operand:<VnSI> 1 "gcn_alu_operand" " vA, vB"))
1323 (match_operand:V_DI 2 "gcn_alu_operand" "vDb,vDA")))
1324 (clobber (reg:DI VCC_REG))]
1325 ""
1326 "#"
1327 "gcn_can_split_p (<MODE>mode, operands[0])
1328 && gcn_can_split_p (<MODE>mode, operands[2])"
1329 [(const_int 0)]
1330 {
1331 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1332 emit_insn (gen_add<vnsi>3_vcc
1333 (gcn_operand_part (<MODE>mode, operands[0], 0),
1334 operands[1],
1335 gcn_operand_part (<MODE>mode, operands[2], 0),
1336 vcc));
1337 emit_insn (gen_addc<vnsi>3
1338 (gcn_operand_part (<MODE>mode, operands[0], 1),
1339 gcn_operand_part (<MODE>mode, operands[2], 1),
1340 const0_rtx, vcc, vcc));
1341 DONE;
1342 }
1343 [(set_attr "type" "vmult")
1344 (set_attr "length" "8")])
1345
1346 (define_insn_and_split "add<mode>3_zext_exec"
1347 [(set (match_operand:V_DI 0 "register_operand" "= v, v")
1348 (vec_merge:V_DI
1349 (plus:V_DI
1350 (zero_extend:V_DI
1351 (match_operand:<VnSI> 1 "gcn_alu_operand" " vA, vB"))
1352 (match_operand:V_DI 2 "gcn_alu_operand" "vDb,vDA"))
1353 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0, U0")
1354 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))
1355 (clobber (reg:DI VCC_REG))]
1356 ""
1357 "#"
1358 "gcn_can_split_p (<MODE>mode, operands[0])
1359 && gcn_can_split_p (<MODE>mode, operands[2])
1360 && gcn_can_split_p (<MODE>mode, operands[3])"
1361 [(const_int 0)]
1362 {
1363 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1364 emit_insn (gen_add<vnsi>3_vcc_exec
1365 (gcn_operand_part (<MODE>mode, operands[0], 0),
1366 operands[1],
1367 gcn_operand_part (<MODE>mode, operands[2], 0),
1368 vcc,
1369 gcn_operand_part (<MODE>mode, operands[3], 0),
1370 operands[4]));
1371 emit_insn (gen_addc<vnsi>3_exec
1372 (gcn_operand_part (<MODE>mode, operands[0], 1),
1373 gcn_operand_part (<MODE>mode, operands[2], 1),
1374 const0_rtx, vcc, vcc,
1375 gcn_operand_part (<MODE>mode, operands[3], 1),
1376 operands[4]));
1377 DONE;
1378 }
1379 [(set_attr "type" "vmult")
1380 (set_attr "length" "8")])
1381
1382 (define_insn_and_split "add<mode>3_vcc_zext_dup"
1383 [(set (match_operand:V_DI 0 "register_operand" "= v, v")
1384 (plus:V_DI
1385 (zero_extend:V_DI
1386 (vec_duplicate:<VnSI>
1387 (match_operand:SI 1 "gcn_alu_operand" " BSv, ASv")))
1388 (match_operand:V_DI 2 "gcn_alu_operand" " vDA, vDb")))
1389 (set (match_operand:DI 3 "register_operand" "=SgcV,SgcV")
1390 (ltu:DI (plus:V_DI
1391 (zero_extend:V_DI (vec_duplicate:<VnSI> (match_dup 1)))
1392 (match_dup 2))
1393 (match_dup 1)))]
1394 ""
1395 "#"
1396 "gcn_can_split_p (<MODE>mode, operands[0])
1397 && gcn_can_split_p (<MODE>mode, operands[2])"
1398 [(const_int 0)]
1399 {
1400 emit_insn (gen_add<vnsi>3_vcc_dup
1401 (gcn_operand_part (<MODE>mode, operands[0], 0),
1402 gcn_operand_part (DImode, operands[1], 0),
1403 gcn_operand_part (<MODE>mode, operands[2], 0),
1404 operands[3]));
1405 emit_insn (gen_addc<vnsi>3
1406 (gcn_operand_part (<MODE>mode, operands[0], 1),
1407 gcn_operand_part (<MODE>mode, operands[2], 1),
1408 const0_rtx, operands[3], operands[3]));
1409 DONE;
1410 }
1411 [(set_attr "type" "vmult")
1412 (set_attr "length" "8")])
1413
1414 (define_expand "add<mode>3_zext_dup"
1415 [(match_operand:V_DI 0 "register_operand")
1416 (match_operand:SI 1 "gcn_alu_operand")
1417 (match_operand:V_DI 2 "gcn_alu_operand")]
1418 ""
1419 {
1420 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1421 emit_insn (gen_add<mode>3_vcc_zext_dup (operands[0], operands[1],
1422 operands[2], vcc));
1423 DONE;
1424 })
1425
1426 (define_insn_and_split "add<mode>3_vcc_zext_dup_exec"
1427 [(set (match_operand:V_DI 0 "register_operand" "= v, v")
1428 (vec_merge:V_DI
1429 (plus:V_DI
1430 (zero_extend:V_DI
1431 (vec_duplicate:<VnSI>
1432 (match_operand:SI 1 "gcn_alu_operand" " ASv, BSv")))
1433 (match_operand:V_DI 2 "gcn_alu_operand" " vDb, vDA"))
1434 (match_operand:V_DI 4 "gcn_register_or_unspec_operand" " U0, U0")
1435 (match_operand:DI 5 "gcn_exec_reg_operand" " e, e")))
1436 (set (match_operand:DI 3 "register_operand" "=SgcV,SgcV")
1437 (and:DI
1438 (ltu:DI (plus:V_DI
1439 (zero_extend:V_DI (vec_duplicate:<VnSI> (match_dup 1)))
1440 (match_dup 2))
1441 (match_dup 1))
1442 (match_dup 5)))]
1443 ""
1444 "#"
1445 "gcn_can_split_p (<MODE>mode, operands[0])
1446 && gcn_can_split_p (<MODE>mode, operands[2])
1447 && gcn_can_split_p (<MODE>mode, operands[4])"
1448 [(const_int 0)]
1449 {
1450 emit_insn (gen_add<vnsi>3_vcc_dup_exec
1451 (gcn_operand_part (<MODE>mode, operands[0], 0),
1452 gcn_operand_part (DImode, operands[1], 0),
1453 gcn_operand_part (<MODE>mode, operands[2], 0),
1454 operands[3],
1455 gcn_operand_part (<MODE>mode, operands[4], 0),
1456 operands[5]));
1457 emit_insn (gen_addc<vnsi>3_exec
1458 (gcn_operand_part (<MODE>mode, operands[0], 1),
1459 gcn_operand_part (<MODE>mode, operands[2], 1),
1460 const0_rtx, operands[3], operands[3],
1461 gcn_operand_part (<MODE>mode, operands[4], 1),
1462 operands[5]));
1463 DONE;
1464 }
1465 [(set_attr "type" "vmult")
1466 (set_attr "length" "8")])
1467
1468 (define_expand "add<mode>3_zext_dup_exec"
1469 [(match_operand:V_DI 0 "register_operand")
1470 (match_operand:SI 1 "gcn_alu_operand")
1471 (match_operand:V_DI 2 "gcn_alu_operand")
1472 (match_operand:V_DI 3 "gcn_register_or_unspec_operand")
1473 (match_operand:DI 4 "gcn_exec_reg_operand")]
1474 ""
1475 {
1476 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1477 emit_insn (gen_add<mode>3_vcc_zext_dup_exec (operands[0], operands[1],
1478 operands[2], vcc, operands[3],
1479 operands[4]));
1480 DONE;
1481 })
1482
1483 (define_insn_and_split "add<mode>3_vcc_zext_dup2"
1484 [(set (match_operand:V_DI 0 "register_operand" "= v")
1485 (plus:V_DI
1486 (zero_extend:V_DI (match_operand:<VnSI> 1 "gcn_alu_operand" " vA"))
1487 (vec_duplicate:V_DI (match_operand:DI 2 "gcn_alu_operand" " DbSv"))))
1488 (set (match_operand:DI 3 "register_operand" "=SgcV")
1489 (ltu:DI (plus:V_DI
1490 (zero_extend:V_DI (match_dup 1))
1491 (vec_duplicate:V_DI (match_dup 2)))
1492 (match_dup 1)))]
1493 ""
1494 "#"
1495 "gcn_can_split_p (<MODE>mode, operands[0])"
1496 [(const_int 0)]
1497 {
1498 emit_insn (gen_add<vnsi>3_vcc_dup
1499 (gcn_operand_part (<MODE>mode, operands[0], 0),
1500 gcn_operand_part (DImode, operands[2], 0),
1501 operands[1],
1502 operands[3]));
1503 rtx dsthi = gcn_operand_part (<MODE>mode, operands[0], 1);
1504 emit_insn (gen_vec_duplicate<vnsi>
1505 (dsthi, gcn_operand_part (DImode, operands[2], 1)));
1506 emit_insn (gen_addc<vnsi>3 (dsthi, dsthi, const0_rtx, operands[3],
1507 operands[3]));
1508 DONE;
1509 }
1510 [(set_attr "type" "vmult")
1511 (set_attr "length" "8")])
1512
1513 (define_expand "add<mode>3_zext_dup2"
1514 [(match_operand:V_DI 0 "register_operand")
1515 (match_operand:<VnSI> 1 "gcn_alu_operand")
1516 (match_operand:DI 2 "gcn_alu_operand")]
1517 ""
1518 {
1519 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1520 emit_insn (gen_add<mode>3_vcc_zext_dup2 (operands[0], operands[1],
1521 operands[2], vcc));
1522 DONE;
1523 })
1524
1525 (define_insn_and_split "add<mode>3_vcc_zext_dup2_exec"
1526 [(set (match_operand:V_DI 0 "register_operand" "= v")
1527 (vec_merge:V_DI
1528 (plus:V_DI
1529 (zero_extend:V_DI (match_operand:<VnSI> 1 "gcn_alu_operand" "vA"))
1530 (vec_duplicate:V_DI (match_operand:DI 2 "gcn_alu_operand" "BSv")))
1531 (match_operand:V_DI 4 "gcn_register_or_unspec_operand" " U0")
1532 (match_operand:DI 5 "gcn_exec_reg_operand" " e")))
1533 (set (match_operand:DI 3 "register_operand" "=SgcV")
1534 (and:DI
1535 (ltu:DI (plus:V_DI
1536 (zero_extend:V_DI (match_dup 1))
1537 (vec_duplicate:V_DI (match_dup 2)))
1538 (match_dup 1))
1539 (match_dup 5)))]
1540 ""
1541 "#"
1542 "gcn_can_split_p (<MODE>mode, operands[0])
1543 && gcn_can_split_p (<MODE>mode, operands[4])"
1544 [(const_int 0)]
1545 {
1546 emit_insn (gen_add<vnsi>3_vcc_dup_exec
1547 (gcn_operand_part (<MODE>mode, operands[0], 0),
1548 gcn_operand_part (DImode, operands[2], 0),
1549 operands[1],
1550 operands[3],
1551 gcn_operand_part (<MODE>mode, operands[4], 0),
1552 operands[5]));
1553 rtx dsthi = gcn_operand_part (<MODE>mode, operands[0], 1);
1554 emit_insn (gen_vec_duplicate<vnsi>_exec
1555 (dsthi, gcn_operand_part (DImode, operands[2], 1),
1556 gcn_operand_part (<MODE>mode, operands[4], 1),
1557 operands[5]));
1558 emit_insn (gen_addc<vnsi>3_exec
1559 (dsthi, dsthi, const0_rtx, operands[3], operands[3],
1560 gcn_operand_part (<MODE>mode, operands[4], 1),
1561 operands[5]));
1562 DONE;
1563 }
1564 [(set_attr "type" "vmult")
1565 (set_attr "length" "8")])
1566
1567 (define_expand "add<mode>3_zext_dup2_exec"
1568 [(match_operand:V_DI 0 "register_operand")
1569 (match_operand:<VnSI> 1 "gcn_alu_operand")
1570 (match_operand:DI 2 "gcn_alu_operand")
1571 (match_operand:V_DI 3 "gcn_register_or_unspec_operand")
1572 (match_operand:DI 4 "gcn_exec_reg_operand")]
1573 ""
1574 {
1575 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1576 emit_insn (gen_add<mode>3_vcc_zext_dup2_exec (operands[0], operands[1],
1577 operands[2], vcc,
1578 operands[3], operands[4]));
1579 DONE;
1580 })
1581
1582 (define_insn_and_split "add<mode>3_sext_dup2"
1583 [(set (match_operand:V_DI 0 "register_operand" "= v")
1584 (plus:V_DI
1585 (sign_extend:V_DI (match_operand:<VnSI> 1 "gcn_alu_operand" " vA"))
1586 (vec_duplicate:V_DI (match_operand:DI 2 "gcn_alu_operand" "BSv"))))
1587 (clobber (match_scratch:<VnSI> 3 "=&v"))
1588 (clobber (reg:DI VCC_REG))]
1589 ""
1590 "#"
1591 "gcn_can_split_p (<MODE>mode, operands[0])"
1592 [(const_int 0)]
1593 {
1594 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1595 emit_insn (gen_ashr<vnsi>3 (operands[3], operands[1], GEN_INT (31)));
1596 emit_insn (gen_add<vnsi>3_vcc_dup
1597 (gcn_operand_part (<MODE>mode, operands[0], 0),
1598 gcn_operand_part (DImode, operands[2], 0),
1599 operands[1],
1600 vcc));
1601 rtx dsthi = gcn_operand_part (<MODE>mode, operands[0], 1);
1602 emit_insn (gen_vec_duplicate<vnsi>
1603 (dsthi, gcn_operand_part (DImode, operands[2], 1)));
1604 emit_insn (gen_addc<vnsi>3 (dsthi, dsthi, operands[3], vcc, vcc));
1605 DONE;
1606 }
1607 [(set_attr "type" "vmult")
1608 (set_attr "length" "8")])
1609
1610 (define_insn_and_split "add<mode>3_sext_dup2_exec"
1611 [(set (match_operand:V_DI 0 "register_operand" "= v")
1612 (vec_merge:V_DI
1613 (plus:V_DI
1614 (sign_extend:V_DI (match_operand:<VnSI> 1 "gcn_alu_operand" "vA"))
1615 (vec_duplicate:V_DI (match_operand:DI 2 "gcn_alu_operand" "BSv")))
1616 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0")
1617 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1618 (clobber (match_scratch:<VnSI> 5 "=&v"))
1619 (clobber (reg:DI VCC_REG))]
1620 ""
1621 "#"
1622 "gcn_can_split_p (<MODE>mode, operands[0])
1623 && gcn_can_split_p (<MODE>mode, operands[3])"
1624 [(const_int 0)]
1625 {
1626 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1627 emit_insn (gen_ashr<vnsi>3_exec (operands[5], operands[1], GEN_INT (31),
1628 gcn_gen_undef (<VnSI>mode), operands[4]));
1629 emit_insn (gen_add<vnsi>3_vcc_dup_exec
1630 (gcn_operand_part (<MODE>mode, operands[0], 0),
1631 gcn_operand_part (DImode, operands[2], 0),
1632 operands[1],
1633 vcc,
1634 gcn_operand_part (<MODE>mode, operands[3], 0),
1635 operands[4]));
1636 rtx dsthi = gcn_operand_part (<MODE>mode, operands[0], 1);
1637 emit_insn (gen_vec_duplicate<vnsi>_exec
1638 (dsthi, gcn_operand_part (DImode, operands[2], 1),
1639 gcn_operand_part (<MODE>mode, operands[3], 1),
1640 operands[4]));
1641 emit_insn (gen_addc<vnsi>3_exec
1642 (dsthi, dsthi, operands[5], vcc, vcc,
1643 gcn_operand_part (<MODE>mode, operands[3], 1),
1644 operands[4]));
1645 DONE;
1646 }
1647 [(set_attr "type" "vmult")
1648 (set_attr "length" "8")])
1649
1650 ;; }}}
1651 ;; {{{ DS memory ALU: add/sub
1652
1653 (define_mode_iterator DS_ARITH_MODE [V64SI V64SF V64DI])
1654 (define_mode_iterator DS_ARITH_SCALAR_MODE [SI SF DI])
1655
1656 ;; FIXME: the vector patterns probably need RD expanded to a vector of
1657 ;; addresses. For now, the only way a vector can get into LDS is
1658 ;; if the user puts it there manually.
1659 ;;
1660 ;; FIXME: the scalar patterns are probably fine in themselves, but need to be
1661 ;; checked to see if anything can ever use them.
1662
1663 (define_insn "add<mode>3_ds<exec>"
1664 [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD")
1665 (plus:DS_ARITH_MODE
1666 (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" "%RD")
1667 (match_operand:DS_ARITH_MODE 2 "register_operand" " v")))]
1668 "rtx_equal_p (operands[0], operands[1])"
1669 "ds_add%u0\t%A0, %2%O0"
1670 [(set_attr "type" "ds")
1671 (set_attr "length" "8")])
1672
1673 (define_insn "add<mode>3_ds_scalar"
1674 [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
1675 (plus:DS_ARITH_SCALAR_MODE
1676 (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand"
1677 "%RD")
1678 (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")))]
1679 "rtx_equal_p (operands[0], operands[1])"
1680 "ds_add%u0\t%A0, %2%O0"
1681 [(set_attr "type" "ds")
1682 (set_attr "length" "8")])
1683
1684 (define_insn "sub<mode>3_ds<exec>"
1685 [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD")
1686 (minus:DS_ARITH_MODE
1687 (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" " RD")
1688 (match_operand:DS_ARITH_MODE 2 "register_operand" " v")))]
1689 "rtx_equal_p (operands[0], operands[1])"
1690 "ds_sub%u0\t%A0, %2%O0"
1691 [(set_attr "type" "ds")
1692 (set_attr "length" "8")])
1693
1694 (define_insn "sub<mode>3_ds_scalar"
1695 [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
1696 (minus:DS_ARITH_SCALAR_MODE
1697 (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand"
1698 " RD")
1699 (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")))]
1700 "rtx_equal_p (operands[0], operands[1])"
1701 "ds_sub%u0\t%A0, %2%O0"
1702 [(set_attr "type" "ds")
1703 (set_attr "length" "8")])
1704
1705 (define_insn "subr<mode>3_ds<exec>"
1706 [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD")
1707 (minus:DS_ARITH_MODE
1708 (match_operand:DS_ARITH_MODE 2 "register_operand" " v")
1709 (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" " RD")))]
1710 "rtx_equal_p (operands[0], operands[1])"
1711 "ds_rsub%u0\t%A0, %2%O0"
1712 [(set_attr "type" "ds")
1713 (set_attr "length" "8")])
1714
1715 (define_insn "subr<mode>3_ds_scalar"
1716 [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
1717 (minus:DS_ARITH_SCALAR_MODE
1718 (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")
1719 (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand"
1720 " RD")))]
1721 "rtx_equal_p (operands[0], operands[1])"
1722 "ds_rsub%u0\t%A0, %2%O0"
1723 [(set_attr "type" "ds")
1724 (set_attr "length" "8")])
1725
1726 ;; }}}
1727 ;; {{{ ALU special case: mult
1728
1729 (define_insn "<su>mul<mode>3_highpart<exec>"
1730 [(set (match_operand:V_SI 0 "register_operand" "= v")
1731 (truncate:V_SI
1732 (lshiftrt:<VnDI>
1733 (mult:<VnDI>
1734 (any_extend:<VnDI>
1735 (match_operand:V_SI 1 "gcn_alu_operand" " %v"))
1736 (any_extend:<VnDI>
1737 (match_operand:V_SI 2 "gcn_alu_operand" "vSvA")))
1738 (const_int 32))))]
1739 ""
1740 "v_mul_hi<sgnsuffix>0\t%0, %2, %1"
1741 [(set_attr "type" "vop3a")
1742 (set_attr "length" "8")])
1743
1744 (define_insn "mul<mode>3<exec>"
1745 [(set (match_operand:V_INT_1REG 0 "register_operand" "= v")
1746 (mult:V_INT_1REG
1747 (match_operand:V_INT_1REG 1 "gcn_alu_operand" "%vSvA")
1748 (match_operand:V_INT_1REG 2 "gcn_alu_operand" " vSvA")))]
1749 ""
1750 "v_mul_lo_u32\t%0, %1, %2"
1751 [(set_attr "type" "vop3a")
1752 (set_attr "length" "8")])
1753
1754 (define_insn "mul<mode>3_dup<exec>"
1755 [(set (match_operand:V_INT_1REG 0 "register_operand" "= v")
1756 (mult:V_INT_1REG
1757 (match_operand:V_INT_1REG 1 "gcn_alu_operand" "%vSvA")
1758 (vec_duplicate:V_INT_1REG
1759 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand" " SvA"))))]
1760 ""
1761 "v_mul_lo_u32\t%0, %1, %2"
1762 [(set_attr "type" "vop3a")
1763 (set_attr "length" "8")])
1764
1765 (define_insn_and_split "mul<mode>3"
1766 [(set (match_operand:V_DI 0 "register_operand" "=&v")
1767 (mult:V_DI
1768 (match_operand:V_DI 1 "gcn_alu_operand" "% v")
1769 (match_operand:V_DI 2 "gcn_alu_operand" "vDA")))
1770 (clobber (match_scratch:<VnSI> 3 "=&v"))]
1771 ""
1772 "#"
1773 "reload_completed"
1774 [(const_int 0)]
1775 {
1776 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
1777 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
1778 rtx left_lo = gcn_operand_part (<MODE>mode, operands[1], 0);
1779 rtx left_hi = gcn_operand_part (<MODE>mode, operands[1], 1);
1780 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
1781 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
1782 rtx tmp = operands[3];
1783
1784 emit_insn (gen_mul<vnsi>3 (out_lo, left_lo, right_lo));
1785 emit_insn (gen_umul<vnsi>3_highpart (out_hi, left_lo, right_lo));
1786 emit_insn (gen_mul<vnsi>3 (tmp, left_hi, right_lo));
1787 emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp));
1788 emit_insn (gen_mul<vnsi>3 (tmp, left_lo, right_hi));
1789 emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp));
1790 emit_insn (gen_mul<vnsi>3 (tmp, left_hi, right_hi));
1791 emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp));
1792 DONE;
1793 })
1794
1795 (define_insn_and_split "mul<mode>3_exec"
1796 [(set (match_operand:V_DI 0 "register_operand" "=&v")
1797 (vec_merge:V_DI
1798 (mult:V_DI
1799 (match_operand:V_DI 1 "gcn_alu_operand" "% v")
1800 (match_operand:V_DI 2 "gcn_alu_operand" "vDA"))
1801 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0")
1802 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1803 (clobber (match_scratch:<VnSI> 5 "=&v"))]
1804 ""
1805 "#"
1806 "reload_completed"
1807 [(const_int 0)]
1808 {
1809 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
1810 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
1811 rtx left_lo = gcn_operand_part (<MODE>mode, operands[1], 0);
1812 rtx left_hi = gcn_operand_part (<MODE>mode, operands[1], 1);
1813 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
1814 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
1815 rtx exec = operands[4];
1816 rtx tmp = operands[5];
1817
1818 rtx old_lo, old_hi;
1819 if (GET_CODE (operands[3]) == UNSPEC)
1820 {
1821 old_lo = old_hi = gcn_gen_undef (<VnSI>mode);
1822 }
1823 else
1824 {
1825 old_lo = gcn_operand_part (<MODE>mode, operands[3], 0);
1826 old_hi = gcn_operand_part (<MODE>mode, operands[3], 1);
1827 }
1828
1829 rtx undef = gcn_gen_undef (<VnSI>mode);
1830
1831 emit_insn (gen_mul<vnsi>3_exec (out_lo, left_lo, right_lo, old_lo, exec));
1832 emit_insn (gen_umul<vnsi>3_highpart_exec (out_hi, left_lo, right_lo,
1833 old_hi, exec));
1834 emit_insn (gen_mul<vnsi>3_exec (tmp, left_hi, right_lo, undef, exec));
1835 emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec));
1836 emit_insn (gen_mul<vnsi>3_exec (tmp, left_lo, right_hi, undef, exec));
1837 emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec));
1838 emit_insn (gen_mul<vnsi>3_exec (tmp, left_hi, right_hi, undef, exec));
1839 emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec));
1840 DONE;
1841 })
1842
1843 (define_insn_and_split "mul<mode>3_zext"
1844 [(set (match_operand:V_DI 0 "register_operand" "=&v")
1845 (mult:V_DI
1846 (zero_extend:V_DI
1847 (match_operand:<VnSI> 1 "gcn_alu_operand" " v"))
1848 (match_operand:V_DI 2 "gcn_alu_operand" "vDA")))
1849 (clobber (match_scratch:<VnSI> 3 "=&v"))]
1850 ""
1851 "#"
1852 "reload_completed"
1853 [(const_int 0)]
1854 {
1855 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
1856 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
1857 rtx left = operands[1];
1858 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
1859 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
1860 rtx tmp = operands[3];
1861
1862 emit_insn (gen_mul<vnsi>3 (out_lo, left, right_lo));
1863 emit_insn (gen_umul<vnsi>3_highpart (out_hi, left, right_lo));
1864 emit_insn (gen_mul<vnsi>3 (tmp, left, right_hi));
1865 emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp));
1866 DONE;
1867 })
1868
1869 (define_insn_and_split "mul<mode>3_zext_exec"
1870 [(set (match_operand:V_DI 0 "register_operand" "=&v")
1871 (vec_merge:V_DI
1872 (mult:V_DI
1873 (zero_extend:V_DI
1874 (match_operand:<VnSI> 1 "gcn_alu_operand" " v"))
1875 (match_operand:V_DI 2 "gcn_alu_operand" "vDA"))
1876 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0")
1877 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1878 (clobber (match_scratch:<VnSI> 5 "=&v"))]
1879 ""
1880 "#"
1881 "reload_completed"
1882 [(const_int 0)]
1883 {
1884 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
1885 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
1886 rtx left = operands[1];
1887 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
1888 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
1889 rtx exec = operands[4];
1890 rtx tmp = operands[5];
1891
1892 rtx old_lo, old_hi;
1893 if (GET_CODE (operands[3]) == UNSPEC)
1894 {
1895 old_lo = old_hi = gcn_gen_undef (<VnSI>mode);
1896 }
1897 else
1898 {
1899 old_lo = gcn_operand_part (<MODE>mode, operands[3], 0);
1900 old_hi = gcn_operand_part (<MODE>mode, operands[3], 1);
1901 }
1902
1903 rtx undef = gcn_gen_undef (<VnSI>mode);
1904
1905 emit_insn (gen_mul<vnsi>3_exec (out_lo, left, right_lo, old_lo, exec));
1906 emit_insn (gen_umul<vnsi>3_highpart_exec (out_hi, left, right_lo,
1907 old_hi, exec));
1908 emit_insn (gen_mul<vnsi>3_exec (tmp, left, right_hi, undef, exec));
1909 emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec));
1910 DONE;
1911 })
1912
1913 (define_insn_and_split "mul<mode>3_zext_dup2"
1914 [(set (match_operand:V_DI 0 "register_operand" "= &v")
1915 (mult:V_DI
1916 (zero_extend:V_DI
1917 (match_operand:<VnSI> 1 "gcn_alu_operand" " v"))
1918 (vec_duplicate:V_DI
1919 (match_operand:DI 2 "gcn_alu_operand" "SvDA"))))
1920 (clobber (match_scratch:<VnSI> 3 "= &v"))]
1921 ""
1922 "#"
1923 "reload_completed"
1924 [(const_int 0)]
1925 {
1926 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
1927 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
1928 rtx left = operands[1];
1929 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
1930 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
1931 rtx tmp = operands[3];
1932
1933 emit_insn (gen_mul<vnsi>3 (out_lo, left, right_lo));
1934 emit_insn (gen_umul<vnsi>3_highpart (out_hi, left, right_lo));
1935 emit_insn (gen_mul<vnsi>3 (tmp, left, right_hi));
1936 emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp));
1937 DONE;
1938 })
1939
1940 (define_insn_and_split "mul<mode>3_zext_dup2_exec"
1941 [(set (match_operand:V_DI 0 "register_operand" "= &v")
1942 (vec_merge:V_DI
1943 (mult:V_DI
1944 (zero_extend:V_DI
1945 (match_operand:<VnSI> 1 "gcn_alu_operand" " v"))
1946 (vec_duplicate:V_DI
1947 (match_operand:DI 2 "gcn_alu_operand" "SvDA")))
1948 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0")
1949 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1950 (clobber (match_scratch:<VnSI> 5 "= &v"))]
1951 ""
1952 "#"
1953 "reload_completed"
1954 [(const_int 0)]
1955 {
1956 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
1957 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
1958 rtx left = operands[1];
1959 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
1960 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
1961 rtx exec = operands[4];
1962 rtx tmp = operands[5];
1963
1964 rtx old_lo, old_hi;
1965 if (GET_CODE (operands[3]) == UNSPEC)
1966 {
1967 old_lo = old_hi = gcn_gen_undef (<VnSI>mode);
1968 }
1969 else
1970 {
1971 old_lo = gcn_operand_part (<MODE>mode, operands[3], 0);
1972 old_hi = gcn_operand_part (<MODE>mode, operands[3], 1);
1973 }
1974
1975 rtx undef = gcn_gen_undef (<VnSI>mode);
1976
1977 emit_insn (gen_mul<vnsi>3_exec (out_lo, left, right_lo, old_lo, exec));
1978 emit_insn (gen_umul<vnsi>3_highpart_exec (out_hi, left, right_lo,
1979 old_hi, exec));
1980 emit_insn (gen_mul<vnsi>3_exec (tmp, left, right_hi, undef, exec));
1981 emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec));
1982 DONE;
1983 })
1984
1985 ;; }}}
1986 ;; {{{ ALU generic case
1987
1988 (define_code_iterator bitop [and ior xor])
1989 (define_code_iterator shiftop [ashift lshiftrt ashiftrt])
1990 (define_code_iterator minmaxop [smin smax umin umax])
1991
1992 (define_insn "<expander><mode>2<exec>"
1993 [(set (match_operand:V_INT_1REG 0 "gcn_valu_dst_operand" "= v")
1994 (bitunop:V_INT_1REG
1995 (match_operand:V_INT_1REG 1 "gcn_valu_src0_operand" "vSvB")))]
1996 ""
1997 "v_<mnemonic>0\t%0, %1"
1998 [(set_attr "type" "vop1")
1999 (set_attr "length" "8")])
2000
2001 (define_insn "<expander><mode>3<exec>"
2002 [(set (match_operand:V_INT_1REG 0 "gcn_valu_dst_operand" "= v,RD")
2003 (bitop:V_INT_1REG
2004 (match_operand:V_INT_1REG 1 "gcn_valu_src0_operand" "% v, 0")
2005 (match_operand:V_INT_1REG 2 "gcn_valu_src1com_operand" "vSvB, v")))]
2006 ""
2007 "@
2008 v_<mnemonic>0\t%0, %2, %1
2009 ds_<mnemonic>0\t%A0, %2%O0"
2010 [(set_attr "type" "vop2,ds")
2011 (set_attr "length" "8,8")])
2012
2013 (define_insn_and_split "<expander><mode>3"
2014 [(set (match_operand:V_DI 0 "gcn_valu_dst_operand" "= v,RD")
2015 (bitop:V_DI
2016 (match_operand:V_DI 1 "gcn_valu_src0_operand" "% v,RD")
2017 (match_operand:V_DI 2 "gcn_valu_src1com_operand" "vSvB, v")))]
2018 ""
2019 "@
2020 #
2021 ds_<mnemonic>0\t%A0, %2%O0"
2022 "(reload_completed && !gcn_ds_memory_operand (operands[0], <MODE>mode))"
2023 [(set (match_dup 3)
2024 (bitop:<VnSI> (match_dup 5) (match_dup 7)))
2025 (set (match_dup 4)
2026 (bitop:<VnSI> (match_dup 6) (match_dup 8)))]
2027 {
2028 operands[3] = gcn_operand_part (<MODE>mode, operands[0], 0);
2029 operands[4] = gcn_operand_part (<MODE>mode, operands[0], 1);
2030 operands[5] = gcn_operand_part (<MODE>mode, operands[1], 0);
2031 operands[6] = gcn_operand_part (<MODE>mode, operands[1], 1);
2032 operands[7] = gcn_operand_part (<MODE>mode, operands[2], 0);
2033 operands[8] = gcn_operand_part (<MODE>mode, operands[2], 1);
2034 }
2035 [(set_attr "type" "vmult,ds")
2036 (set_attr "length" "16,8")])
2037
2038 (define_insn_and_split "<expander><mode>3_exec"
2039 [(set (match_operand:V_DI 0 "gcn_valu_dst_operand" "= v,RD")
2040 (vec_merge:V_DI
2041 (bitop:V_DI
2042 (match_operand:V_DI 1 "gcn_valu_src0_operand" "% v,RD")
2043 (match_operand:V_DI 2 "gcn_valu_src1com_operand" "vSvB, v"))
2044 (match_operand:V_DI 3 "gcn_register_ds_or_unspec_operand" "U0,U0")
2045 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))]
2046 "!memory_operand (operands[0], VOIDmode)
2047 || (rtx_equal_p (operands[0], operands[1])
2048 && register_operand (operands[2], VOIDmode))"
2049 "@
2050 #
2051 ds_<mnemonic>0\t%A0, %2%O0"
2052 "(reload_completed && !gcn_ds_memory_operand (operands[0], <MODE>mode))"
2053 [(set (match_dup 5)
2054 (vec_merge:<VnSI>
2055 (bitop:<VnSI> (match_dup 7) (match_dup 9))
2056 (match_dup 11)
2057 (match_dup 4)))
2058 (set (match_dup 6)
2059 (vec_merge:<VnSI>
2060 (bitop:<VnSI> (match_dup 8) (match_dup 10))
2061 (match_dup 12)
2062 (match_dup 4)))]
2063 {
2064 operands[5] = gcn_operand_part (<MODE>mode, operands[0], 0);
2065 operands[6] = gcn_operand_part (<MODE>mode, operands[0], 1);
2066 operands[7] = gcn_operand_part (<MODE>mode, operands[1], 0);
2067 operands[8] = gcn_operand_part (<MODE>mode, operands[1], 1);
2068 operands[9] = gcn_operand_part (<MODE>mode, operands[2], 0);
2069 operands[10] = gcn_operand_part (<MODE>mode, operands[2], 1);
2070 operands[11] = gcn_operand_part (<MODE>mode, operands[3], 0);
2071 operands[12] = gcn_operand_part (<MODE>mode, operands[3], 1);
2072 }
2073 [(set_attr "type" "vmult,ds")
2074 (set_attr "length" "16,8")])
2075
2076 (define_expand "<expander><mode>3"
2077 [(set (match_operand:V_QIHI 0 "register_operand" "= v")
2078 (shiftop:V_QIHI
2079 (match_operand:V_QIHI 1 "gcn_alu_operand" " v")
2080 (vec_duplicate:V_QIHI
2081 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
2082 ""
2083 {
2084 enum {ashift, lshiftrt, ashiftrt};
2085 bool unsignedp = (<code> == lshiftrt);
2086 rtx insi1 = gen_reg_rtx (<VnSI>mode);
2087 rtx insi2 = gen_reg_rtx (SImode);
2088 rtx outsi = gen_reg_rtx (<VnSI>mode);
2089
2090 convert_move (insi1, operands[1], unsignedp);
2091 convert_move (insi2, operands[2], unsignedp);
2092 emit_insn (gen_<expander><vnsi>3 (outsi, insi1, insi2));
2093 convert_move (operands[0], outsi, unsignedp);
2094 DONE;
2095 })
2096
2097 (define_insn "<expander><mode>3<exec>"
2098 [(set (match_operand:V_SI 0 "register_operand" "= v")
2099 (shiftop:V_SI
2100 (match_operand:V_SI 1 "gcn_alu_operand" " v")
2101 (vec_duplicate:V_SI
2102 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
2103 ""
2104 "v_<revmnemonic>0\t%0, %2, %1"
2105 [(set_attr "type" "vop2")
2106 (set_attr "length" "8")])
2107
2108 (define_expand "v<expander><mode>3"
2109 [(set (match_operand:V_QIHI 0 "register_operand" "=v")
2110 (shiftop:V_QIHI
2111 (match_operand:V_QIHI 1 "gcn_alu_operand" " v")
2112 (match_operand:V_QIHI 2 "gcn_alu_operand" "vB")))]
2113 ""
2114 {
2115 enum {ashift, lshiftrt, ashiftrt};
2116 bool unsignedp = (<code> == lshiftrt);
2117 rtx insi1 = gen_reg_rtx (<VnSI>mode);
2118 rtx insi2 = gen_reg_rtx (<VnSI>mode);
2119 rtx outsi = gen_reg_rtx (<VnSI>mode);
2120
2121 convert_move (insi1, operands[1], unsignedp);
2122 convert_move (insi2, operands[2], unsignedp);
2123 emit_insn (gen_v<expander><vnsi>3 (outsi, insi1, insi2));
2124 convert_move (operands[0], outsi, unsignedp);
2125 DONE;
2126 })
2127
2128 (define_insn "v<expander><mode>3<exec>"
2129 [(set (match_operand:V_SI 0 "register_operand" "=v")
2130 (shiftop:V_SI
2131 (match_operand:V_SI 1 "gcn_alu_operand" " v")
2132 (match_operand:V_SI 2 "gcn_alu_operand" "vB")))]
2133 ""
2134 "v_<revmnemonic>0\t%0, %2, %1"
2135 [(set_attr "type" "vop2")
2136 (set_attr "length" "8")])
2137
2138 (define_expand "<expander><mode>3"
2139 [(set (match_operand:V_QIHI 0 "gcn_valu_dst_operand")
2140 (minmaxop:V_QIHI
2141 (match_operand:V_QIHI 1 "gcn_valu_src0_operand")
2142 (match_operand:V_QIHI 2 "gcn_valu_src1com_operand")))]
2143 ""
2144 {
2145 enum {smin, umin, smax, umax};
2146 bool unsignedp = (<code> == umax || <code> == umin);
2147 rtx insi1 = gen_reg_rtx (<VnSI>mode);
2148 rtx insi2 = gen_reg_rtx (<VnSI>mode);
2149 rtx outsi = gen_reg_rtx (<VnSI>mode);
2150
2151 convert_move (insi1, operands[1], unsignedp);
2152 convert_move (insi2, operands[2], unsignedp);
2153 emit_insn (gen_<code><vnsi>3 (outsi, insi1, insi2));
2154 convert_move (operands[0], outsi, unsignedp);
2155 DONE;
2156 })
2157
2158 (define_insn "<expander><vnsi>3<exec>"
2159 [(set (match_operand:V_SI 0 "gcn_valu_dst_operand" "= v,RD")
2160 (minmaxop:V_SI
2161 (match_operand:V_SI 1 "gcn_valu_src0_operand" "% v, 0")
2162 (match_operand:V_SI 2 "gcn_valu_src1com_operand" "vSvB, v")))]
2163 ""
2164 "@
2165 v_<mnemonic>0\t%0, %2, %1
2166 ds_<mnemonic>0\t%A0, %2%O0"
2167 [(set_attr "type" "vop2,ds")
2168 (set_attr "length" "8,8")])
2169
2170 ;; }}}
2171 ;; {{{ FP binops - special cases
2172
2173 ; GCN does not directly provide a DFmode subtract instruction, so we do it by
2174 ; adding the negated second operand to the first.
2175
2176 (define_insn "sub<mode>3<exec>"
2177 [(set (match_operand:V_DF 0 "register_operand" "= v, v")
2178 (minus:V_DF
2179 (match_operand:V_DF 1 "gcn_alu_operand" "vSvB, v")
2180 (match_operand:V_DF 2 "gcn_alu_operand" " v,vSvB")))]
2181 ""
2182 "@
2183 v_add_f64\t%0, %1, -%2
2184 v_add_f64\t%0, -%2, %1"
2185 [(set_attr "type" "vop3a")
2186 (set_attr "length" "8,8")])
2187
2188 (define_insn "subdf"
2189 [(set (match_operand:DF 0 "register_operand" "= v, v")
2190 (minus:DF
2191 (match_operand:DF 1 "gcn_alu_operand" "vSvB, v")
2192 (match_operand:DF 2 "gcn_alu_operand" " v,vSvB")))]
2193 ""
2194 "@
2195 v_add_f64\t%0, %1, -%2
2196 v_add_f64\t%0, -%2, %1"
2197 [(set_attr "type" "vop3a")
2198 (set_attr "length" "8,8")])
2199
2200 ;; }}}
2201 ;; {{{ FP binops - generic
2202
2203 (define_code_iterator comm_fp [plus mult smin smax])
2204 (define_code_iterator nocomm_fp [minus])
2205 (define_code_iterator all_fp [plus mult minus smin smax])
2206
2207 (define_insn "<expander><mode>3<exec>"
2208 [(set (match_operand:V_FP 0 "register_operand" "= v")
2209 (comm_fp:V_FP
2210 (match_operand:V_FP 1 "gcn_alu_operand" "% v")
2211 (match_operand:V_FP 2 "gcn_alu_operand" "vSvB")))]
2212 ""
2213 "v_<mnemonic>0\t%0, %2, %1"
2214 [(set_attr "type" "vop2")
2215 (set_attr "length" "8")])
2216
2217 (define_insn "<expander><mode>3"
2218 [(set (match_operand:FP 0 "gcn_valu_dst_operand" "= v, RL")
2219 (comm_fp:FP
2220 (match_operand:FP 1 "gcn_valu_src0_operand" "% v, 0")
2221 (match_operand:FP 2 "gcn_valu_src1_operand" "vSvB,vSvB")))]
2222 ""
2223 "@
2224 v_<mnemonic>0\t%0, %2, %1
2225 v_<mnemonic>0\t%0, %1%O0"
2226 [(set_attr "type" "vop2,ds")
2227 (set_attr "length" "8")])
2228
2229 (define_insn "<expander><mode>3<exec>"
2230 [(set (match_operand:V_FP_1REG 0 "register_operand" "= v, v")
2231 (nocomm_fp:V_FP_1REG
2232 (match_operand:V_FP_1REG 1 "gcn_alu_operand" "vSvB, v")
2233 (match_operand:V_FP_1REG 2 "gcn_alu_operand" " v,vSvB")))]
2234 ""
2235 "@
2236 v_<mnemonic>0\t%0, %1, %2
2237 v_<revmnemonic>0\t%0, %2, %1"
2238 [(set_attr "type" "vop2")
2239 (set_attr "length" "8,8")])
2240
2241 (define_insn "<expander><mode>3"
2242 [(set (match_operand:FP_1REG 0 "register_operand" "= v, v")
2243 (nocomm_fp:FP_1REG
2244 (match_operand:FP_1REG 1 "gcn_alu_operand" "vSvB, v")
2245 (match_operand:FP_1REG 2 "gcn_alu_operand" " v,vSvB")))]
2246 ""
2247 "@
2248 v_<mnemonic>0\t%0, %1, %2
2249 v_<revmnemonic>0\t%0, %2, %1"
2250 [(set_attr "type" "vop2")
2251 (set_attr "length" "8,8")])
2252
2253 ;; }}}
2254 ;; {{{ FP unops
2255
2256 (define_insn "abs<mode>2"
2257 [(set (match_operand:FP 0 "register_operand" "=v")
2258 (abs:FP (match_operand:FP 1 "register_operand" " v")))]
2259 ""
2260 "v_add%i0\t%0, 0, |%1|"
2261 [(set_attr "type" "vop3a")
2262 (set_attr "length" "8")])
2263
2264 (define_insn "abs<mode>2<exec>"
2265 [(set (match_operand:V_FP 0 "register_operand" "=v")
2266 (abs:V_FP
2267 (match_operand:V_FP 1 "register_operand" " v")))]
2268 ""
2269 "v_add%i0\t%0, 0, |%1|"
2270 [(set_attr "type" "vop3a")
2271 (set_attr "length" "8")])
2272
2273 (define_insn "neg<mode>2<exec>"
2274 [(set (match_operand:V_FP 0 "register_operand" "=v")
2275 (neg:V_FP
2276 (match_operand:V_FP 1 "register_operand" " v")))]
2277 ""
2278 "v_add%i0\t%0, 0, -%1"
2279 [(set_attr "type" "vop3a")
2280 (set_attr "length" "8")])
2281
2282 (define_insn "sqrt<mode>2<exec>"
2283 [(set (match_operand:V_FP 0 "register_operand" "= v")
2284 (sqrt:V_FP
2285 (match_operand:V_FP 1 "gcn_alu_operand" "vSvB")))]
2286 "flag_unsafe_math_optimizations"
2287 "v_sqrt%i0\t%0, %1"
2288 [(set_attr "type" "vop1")
2289 (set_attr "length" "8")])
2290
2291 (define_insn "sqrt<mode>2"
2292 [(set (match_operand:FP 0 "register_operand" "= v")
2293 (sqrt:FP
2294 (match_operand:FP 1 "gcn_alu_operand" "vSvB")))]
2295 "flag_unsafe_math_optimizations"
2296 "v_sqrt%i0\t%0, %1"
2297 [(set_attr "type" "vop1")
2298 (set_attr "length" "8")])
2299
2300 ;; }}}
2301 ;; {{{ FP fused multiply and add
2302
2303 (define_insn "fma<mode>4<exec>"
2304 [(set (match_operand:V_FP 0 "register_operand" "= v, v")
2305 (fma:V_FP
2306 (match_operand:V_FP 1 "gcn_alu_operand" "% vA, vA")
2307 (match_operand:V_FP 2 "gcn_alu_operand" " vA,vSvA")
2308 (match_operand:V_FP 3 "gcn_alu_operand" "vSvA, vA")))]
2309 ""
2310 "v_fma%i0\t%0, %1, %2, %3"
2311 [(set_attr "type" "vop3a")
2312 (set_attr "length" "8")])
2313
2314 (define_insn "fma<mode>4_negop2<exec>"
2315 [(set (match_operand:V_FP 0 "register_operand" "= v, v, v")
2316 (fma:V_FP
2317 (match_operand:V_FP 1 "gcn_alu_operand" " vA, vA,vSvA")
2318 (neg:V_FP
2319 (match_operand:V_FP 2 "gcn_alu_operand" " vA,vSvA, vA"))
2320 (match_operand:V_FP 3 "gcn_alu_operand" "vSvA, vA, vA")))]
2321 ""
2322 "v_fma%i0\t%0, %1, -%2, %3"
2323 [(set_attr "type" "vop3a")
2324 (set_attr "length" "8")])
2325
2326 (define_insn "fma<mode>4"
2327 [(set (match_operand:FP 0 "register_operand" "= v, v")
2328 (fma:FP
2329 (match_operand:FP 1 "gcn_alu_operand" "% vA, vA")
2330 (match_operand:FP 2 "gcn_alu_operand" " vA,vSvA")
2331 (match_operand:FP 3 "gcn_alu_operand" "vSvA, vA")))]
2332 ""
2333 "v_fma%i0\t%0, %1, %2, %3"
2334 [(set_attr "type" "vop3a")
2335 (set_attr "length" "8")])
2336
2337 (define_insn "fma<mode>4_negop2"
2338 [(set (match_operand:FP 0 "register_operand" "= v, v, v")
2339 (fma:FP
2340 (match_operand:FP 1 "gcn_alu_operand" " vA, vA,vSvA")
2341 (neg:FP
2342 (match_operand:FP 2 "gcn_alu_operand" " vA,vSvA, vA"))
2343 (match_operand:FP 3 "gcn_alu_operand" "vSvA, vA, vA")))]
2344 ""
2345 "v_fma%i0\t%0, %1, -%2, %3"
2346 [(set_attr "type" "vop3a")
2347 (set_attr "length" "8")])
2348
2349 ;; }}}
2350 ;; {{{ FP division
2351
2352 (define_insn "recip<mode>2<exec>"
2353 [(set (match_operand:V_FP 0 "register_operand" "= v")
2354 (div:V_FP
2355 (vec_duplicate:V_FP (float:<SCALAR_MODE> (const_int 1)))
2356 (match_operand:V_FP 1 "gcn_alu_operand" "vSvB")))]
2357 ""
2358 "v_rcp%i0\t%0, %1"
2359 [(set_attr "type" "vop1")
2360 (set_attr "length" "8")])
2361
2362 (define_insn "recip<mode>2"
2363 [(set (match_operand:FP 0 "register_operand" "= v")
2364 (div:FP
2365 (float:FP (const_int 1))
2366 (match_operand:FP 1 "gcn_alu_operand" "vSvB")))]
2367 ""
2368 "v_rcp%i0\t%0, %1"
2369 [(set_attr "type" "vop1")
2370 (set_attr "length" "8")])
2371
2372 ;; Do division via a = b * 1/c
2373 ;; The v_rcp_* instructions are not sufficiently accurate on their own,
2374 ;; so we use 2 v_fma_* instructions to do one round of Newton-Raphson
2375 ;; which the ISA manual says is enough to improve the reciprocal accuracy.
2376 ;;
2377 ;; FIXME: This does not handle denormals, NaNs, division-by-zero etc.
2378
2379 (define_expand "div<mode>3"
2380 [(match_operand:V_FP 0 "gcn_valu_dst_operand")
2381 (match_operand:V_FP 1 "gcn_valu_src0_operand")
2382 (match_operand:V_FP 2 "gcn_valu_src0_operand")]
2383 "flag_reciprocal_math"
2384 {
2385 rtx two = gcn_vec_constant (<MODE>mode,
2386 const_double_from_real_value (dconst2, <SCALAR_MODE>mode));
2387 rtx initrcp = gen_reg_rtx (<MODE>mode);
2388 rtx fma = gen_reg_rtx (<MODE>mode);
2389 rtx rcp;
2390
2391 bool is_rcp = (GET_CODE (operands[1]) == CONST_VECTOR
2392 && real_identical
2393 (CONST_DOUBLE_REAL_VALUE
2394 (CONST_VECTOR_ELT (operands[1], 0)), &dconstm1));
2395
2396 if (is_rcp)
2397 rcp = operands[0];
2398 else
2399 rcp = gen_reg_rtx (<MODE>mode);
2400
2401 emit_insn (gen_recip<mode>2 (initrcp, operands[2]));
2402 emit_insn (gen_fma<mode>4_negop2 (fma, initrcp, operands[2], two));
2403 emit_insn (gen_mul<mode>3 (rcp, initrcp, fma));
2404
2405 if (!is_rcp)
2406 emit_insn (gen_mul<mode>3 (operands[0], operands[1], rcp));
2407
2408 DONE;
2409 })
2410
2411 (define_expand "div<mode>3"
2412 [(match_operand:FP 0 "gcn_valu_dst_operand")
2413 (match_operand:FP 1 "gcn_valu_src0_operand")
2414 (match_operand:FP 2 "gcn_valu_src0_operand")]
2415 "flag_reciprocal_math"
2416 {
2417 rtx two = const_double_from_real_value (dconst2, <MODE>mode);
2418 rtx initrcp = gen_reg_rtx (<MODE>mode);
2419 rtx fma = gen_reg_rtx (<MODE>mode);
2420 rtx rcp;
2421
2422 bool is_rcp = (GET_CODE (operands[1]) == CONST_DOUBLE
2423 && real_identical (CONST_DOUBLE_REAL_VALUE (operands[1]),
2424 &dconstm1));
2425
2426 if (is_rcp)
2427 rcp = operands[0];
2428 else
2429 rcp = gen_reg_rtx (<MODE>mode);
2430
2431 emit_insn (gen_recip<mode>2 (initrcp, operands[2]));
2432 emit_insn (gen_fma<mode>4_negop2 (fma, initrcp, operands[2], two));
2433 emit_insn (gen_mul<mode>3 (rcp, initrcp, fma));
2434
2435 if (!is_rcp)
2436 emit_insn (gen_mul<mode>3 (operands[0], operands[1], rcp));
2437
2438 DONE;
2439 })
2440
2441 ;; }}}
2442 ;; {{{ Int/FP conversions
2443
2444 (define_mode_iterator CVT_FROM_MODE [HI SI HF SF DF])
2445 (define_mode_iterator CVT_TO_MODE [HI SI HF SF DF])
2446
2447 (define_mode_iterator VCVT_MODE [V64HI V64SI V64HF V64SF V64DF])
2448 (define_mode_iterator VCVT_FMODE [V64HF V64SF V64DF])
2449 (define_mode_iterator VCVT_IMODE [V64HI V64SI])
2450
2451 (define_code_iterator cvt_op [fix unsigned_fix
2452 float unsigned_float
2453 float_extend float_truncate])
2454 (define_code_attr cvt_name [(fix "fix_trunc") (unsigned_fix "fixuns_trunc")
2455 (float "float") (unsigned_float "floatuns")
2456 (float_extend "extend") (float_truncate "trunc")])
2457 (define_code_attr cvt_operands [(fix "%i0%i1") (unsigned_fix "%u0%i1")
2458 (float "%i0%i1") (unsigned_float "%i0%u1")
2459 (float_extend "%i0%i1")
2460 (float_truncate "%i0%i1")])
2461
2462 (define_insn "<cvt_name><CVT_FROM_MODE:mode><CVT_TO_MODE:mode>2"
2463 [(set (match_operand:CVT_TO_MODE 0 "register_operand" "= v")
2464 (cvt_op:CVT_TO_MODE
2465 (match_operand:CVT_FROM_MODE 1 "gcn_alu_operand" "vSvB")))]
2466 "gcn_valid_cvt_p (<CVT_FROM_MODE:MODE>mode, <CVT_TO_MODE:MODE>mode,
2467 <cvt_name>_cvt)"
2468 "v_cvt<cvt_operands>\t%0, %1"
2469 [(set_attr "type" "vop1")
2470 (set_attr "length" "8")])
2471
2472 (define_insn "<cvt_name><VCVT_MODE:mode><VCVT_FMODE:mode>2<exec>"
2473 [(set (match_operand:VCVT_FMODE 0 "register_operand" "= v")
2474 (cvt_op:VCVT_FMODE
2475 (match_operand:VCVT_MODE 1 "gcn_alu_operand" "vSvB")))]
2476 "gcn_valid_cvt_p (<VCVT_MODE:MODE>mode, <VCVT_FMODE:MODE>mode,
2477 <cvt_name>_cvt)"
2478 "v_cvt<cvt_operands>\t%0, %1"
2479 [(set_attr "type" "vop1")
2480 (set_attr "length" "8")])
2481
2482 (define_insn "<cvt_name><VCVT_FMODE:mode><VCVT_IMODE:mode>2<exec>"
2483 [(set (match_operand:VCVT_IMODE 0 "register_operand" "= v")
2484 (cvt_op:VCVT_IMODE
2485 (match_operand:VCVT_FMODE 1 "gcn_alu_operand" "vSvB")))]
2486 "gcn_valid_cvt_p (<VCVT_FMODE:MODE>mode, <VCVT_IMODE:MODE>mode,
2487 <cvt_name>_cvt)"
2488 "v_cvt<cvt_operands>\t%0, %1"
2489 [(set_attr "type" "vop1")
2490 (set_attr "length" "8")])
2491
2492 ;; }}}
2493 ;; {{{ Int/int conversions
2494
2495 (define_code_iterator zero_convert [truncate zero_extend])
2496 (define_code_attr convop [
2497 (sign_extend "extend")
2498 (zero_extend "zero_extend")
2499 (truncate "trunc")])
2500
2501 (define_insn "<convop><V_INT_1REG_ALT:mode><V_INT_1REG:mode>2<exec>"
2502 [(set (match_operand:V_INT_1REG 0 "register_operand" "=v")
2503 (zero_convert:V_INT_1REG
2504 (match_operand:V_INT_1REG_ALT 1 "gcn_alu_operand" " v")))]
2505 ""
2506 "v_mov_b32_sdwa\t%0, %1 dst_sel:<V_INT_1REG:sdwa> dst_unused:UNUSED_PAD src0_sel:<V_INT_1REG_ALT:sdwa>"
2507 [(set_attr "type" "vop_sdwa")
2508 (set_attr "length" "8")])
2509
2510 (define_insn "extend<V_INT_1REG_ALT:mode><V_INT_1REG:mode>2<exec>"
2511 [(set (match_operand:V_INT_1REG 0 "register_operand" "=v")
2512 (sign_extend:V_INT_1REG
2513 (match_operand:V_INT_1REG_ALT 1 "gcn_alu_operand" " v")))]
2514 ""
2515 "v_mov_b32_sdwa\t%0, sext(%1) src0_sel:<V_INT_1REG_ALT:sdwa>"
2516 [(set_attr "type" "vop_sdwa")
2517 (set_attr "length" "8")])
2518
2519 ;; GCC can already do these for scalar types, but not for vector types.
2520 ;; Unfortunately you can't just do SUBREG on a vector to select the low part,
2521 ;; so there must be a few tricks here.
2522
2523 (define_insn_and_split "trunc<vndi><mode>2"
2524 [(set (match_operand:V_INT_1REG 0 "register_operand" "=v")
2525 (truncate:V_INT_1REG
2526 (match_operand:<VnDI> 1 "gcn_alu_operand" " v")))]
2527 ""
2528 "#"
2529 "reload_completed"
2530 [(const_int 0)]
2531 {
2532 rtx inlo = gcn_operand_part (<VnDI>mode, operands[1], 0);
2533 rtx out = operands[0];
2534
2535 if (<MODE>mode != <VnSI>mode)
2536 emit_insn (gen_trunc<vnsi><mode>2 (out, inlo));
2537 else
2538 emit_move_insn (out, inlo);
2539 }
2540 [(set_attr "type" "vop2")
2541 (set_attr "length" "4")])
2542
2543 (define_insn_and_split "trunc<vndi><mode>2_exec"
2544 [(set (match_operand:V_INT_1REG 0 "register_operand" "=v")
2545 (vec_merge:V_INT_1REG
2546 (truncate:V_INT_1REG
2547 (match_operand:<VnDI> 1 "gcn_alu_operand" " v"))
2548 (match_operand:V_INT_1REG 2 "gcn_alu_or_unspec_operand" "U0")
2549 (match_operand:DI 3 "gcn_exec_operand" " e")))]
2550 ""
2551 "#"
2552 "reload_completed"
2553 [(const_int 0)]
2554 {
2555 rtx out = operands[0];
2556 rtx inlo = gcn_operand_part (<VnDI>mode, operands[1], 0);
2557 rtx merge = operands[2];
2558 rtx exec = operands[3];
2559
2560 if (<MODE>mode != <VnSI>mode)
2561 emit_insn (gen_trunc<vnsi><mode>2_exec (out, inlo, merge, exec));
2562 else
2563 emit_insn (gen_mov<mode>_exec (out, inlo, merge, exec));
2564 }
2565 [(set_attr "type" "vop2")
2566 (set_attr "length" "4")])
2567
2568 (define_insn_and_split "<convop><mode><vndi>2"
2569 [(set (match_operand:<VnDI> 0 "register_operand" "=v")
2570 (any_extend:<VnDI>
2571 (match_operand:V_INT_1REG 1 "gcn_alu_operand" " v")))]
2572 ""
2573 "#"
2574 "reload_completed"
2575 [(const_int 0)]
2576 {
2577 rtx outlo = gcn_operand_part (<VnDI>mode, operands[0], 0);
2578 rtx outhi = gcn_operand_part (<VnDI>mode, operands[0], 1);
2579 rtx in = operands[1];
2580
2581 if (<MODE>mode != <VnSI>mode)
2582 emit_insn (gen_<convop><mode><vnsi>2 (outlo, in));
2583 else
2584 emit_move_insn (outlo, in);
2585 if ('<su>' == 's')
2586 emit_insn (gen_ashr<vnsi>3 (outhi, outlo, GEN_INT (31)));
2587 else
2588 emit_insn (gen_vec_duplicate<vnsi> (outhi, const0_rtx));
2589 }
2590 [(set_attr "type" "mult")
2591 (set_attr "length" "12")])
2592
2593 (define_insn_and_split "<convop><mode><vndi>2_exec"
2594 [(set (match_operand:<VnDI> 0 "register_operand" "=v")
2595 (vec_merge:<VnDI>
2596 (any_extend:<VnDI>
2597 (match_operand:V_INT_1REG 1 "gcn_alu_operand" " v"))
2598 (match_operand:<VnDI> 2 "gcn_alu_or_unspec_operand" "U0")
2599 (match_operand:DI 3 "gcn_exec_operand" " e")))]
2600 ""
2601 "#"
2602 "reload_completed"
2603 [(const_int 0)]
2604 {
2605 rtx outlo = gcn_operand_part (<VnDI>mode, operands[0], 0);
2606 rtx outhi = gcn_operand_part (<VnDI>mode, operands[0], 1);
2607 rtx in = operands[1];
2608 rtx mergelo = gcn_operand_part (<VnDI>mode, operands[2], 0);
2609 rtx mergehi = gcn_operand_part (<VnDI>mode, operands[2], 1);
2610 rtx exec = operands[3];
2611
2612 if (<MODE>mode != <VnSI>mode)
2613 emit_insn (gen_<convop><mode><vnsi>2_exec (outlo, in, mergelo, exec));
2614 else
2615 emit_insn (gen_mov<mode>_exec (outlo, in, mergelo, exec));
2616 if ('<su>' == 's')
2617 emit_insn (gen_ashr<vnsi>3_exec (outhi, outlo, GEN_INT (31), mergehi,
2618 exec));
2619 else
2620 emit_insn (gen_vec_duplicate<vnsi>_exec (outhi, const0_rtx, mergehi,
2621 exec));
2622 }
2623 [(set_attr "type" "mult")
2624 (set_attr "length" "12")])
2625
2626 ;; }}}
2627 ;; {{{ Vector comparison/merge
2628
2629 (define_insn "vec_cmp<mode>di"
2630 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e, e,Sg,Sg")
2631 (match_operator:DI 1 "gcn_fp_compare_operator"
2632 [(match_operand:V_noQI 2 "gcn_alu_operand" "vSv, B,vSv, B, v,vA")
2633 (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v, v,vA, v")]))
2634 (clobber (match_scratch:DI 4 "= X, X, cV,cV, X, X"))]
2635 ""
2636 "@
2637 v_cmp%E1\tvcc, %2, %3
2638 v_cmp%E1\tvcc, %2, %3
2639 v_cmpx%E1\tvcc, %2, %3
2640 v_cmpx%E1\tvcc, %2, %3
2641 v_cmp%E1\t%0, %2, %3
2642 v_cmp%E1\t%0, %2, %3"
2643 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a")
2644 (set_attr "length" "4,8,4,8,8,8")])
2645
2646 (define_expand "vec_cmpu<mode>di"
2647 [(match_operand:DI 0 "register_operand")
2648 (match_operator 1 "gcn_compare_operator"
2649 [(match_operand:V_INT_noQI 2 "gcn_alu_operand")
2650 (match_operand:V_INT_noQI 3 "gcn_vop3_operand")])]
2651 ""
2652 {
2653 /* Unsigned comparisons use the same patterns as signed comparisons,
2654 except that they use unsigned operators (e.g. LTU vs LT).
2655 The '%E1' directive then does the Right Thing. */
2656 emit_insn (gen_vec_cmp<mode>di (operands[0], operands[1], operands[2],
2657 operands[3]));
2658 DONE;
2659 })
2660
2661 ; There's no instruction for 8-bit vector comparison, so we need to extend.
2662 (define_expand "vec_cmp<u><mode>di"
2663 [(match_operand:DI 0 "register_operand")
2664 (match_operator 1 "gcn_compare_operator"
2665 [(any_extend:<VnSI> (match_operand:V_QI 2 "gcn_alu_operand"))
2666 (any_extend:<VnSI> (match_operand:V_QI 3 "gcn_vop3_operand"))])]
2667 "can_create_pseudo_p ()"
2668 {
2669 rtx sitmp1 = gen_reg_rtx (<VnSI>mode);
2670 rtx sitmp2 = gen_reg_rtx (<VnSI>mode);
2671
2672 emit_insn (gen_<expander><mode><vnsi>2 (sitmp1, operands[2]));
2673 emit_insn (gen_<expander><mode><vnsi>2 (sitmp2, operands[3]));
2674 emit_insn (gen_vec_cmp<vnsi>di (operands[0], operands[1], sitmp1, sitmp2));
2675 DONE;
2676 })
2677
2678 (define_insn "vec_cmp<mode>di_exec"
2679 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e, e,Sg,Sg")
2680 (and:DI
2681 (match_operator 1 "gcn_fp_compare_operator"
2682 [(match_operand:V_noQI 2 "gcn_alu_operand" "vSv, B,vSv, B, v,vA")
2683 (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v, v,vA, v")])
2684 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e, e, e, e")))
2685 (clobber (match_scratch:DI 5 "= X, X, cV,cV, X, X"))]
2686 ""
2687 "@
2688 v_cmp%E1\tvcc, %2, %3
2689 v_cmp%E1\tvcc, %2, %3
2690 v_cmpx%E1\tvcc, %2, %3
2691 v_cmpx%E1\tvcc, %2, %3
2692 v_cmp%E1\t%0, %2, %3
2693 v_cmp%E1\t%0, %2, %3"
2694 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a")
2695 (set_attr "length" "4,8,4,8,8,8")])
2696
2697 (define_expand "vec_cmpu<mode>di_exec"
2698 [(match_operand:DI 0 "register_operand")
2699 (match_operator 1 "gcn_compare_operator"
2700 [(match_operand:V_INT_noQI 2 "gcn_alu_operand")
2701 (match_operand:V_INT_noQI 3 "gcn_vop3_operand")])
2702 (match_operand:DI 4 "gcn_exec_reg_operand")]
2703 ""
2704 {
2705 /* Unsigned comparisons use the same patterns as signed comparisons,
2706 except that they use unsigned operators (e.g. LTU vs LT).
2707 The '%E1' directive then does the Right Thing. */
2708 emit_insn (gen_vec_cmpu<mode>di_exec (operands[0], operands[1],
2709 operands[2], operands[3],
2710 operands[4]));
2711 DONE;
2712 })
2713
2714 (define_expand "vec_cmp<u><mode>di_exec"
2715 [(match_operand:DI 0 "register_operand")
2716 (match_operator 1 "gcn_compare_operator"
2717 [(any_extend:<VnSI> (match_operand:V_QI 2 "gcn_alu_operand"))
2718 (any_extend:<VnSI> (match_operand:V_QI 3 "gcn_vop3_operand"))])
2719 (match_operand:DI 4 "gcn_exec_reg_operand")]
2720 "can_create_pseudo_p ()"
2721 {
2722 rtx sitmp1 = gen_reg_rtx (<VnSI>mode);
2723 rtx sitmp2 = gen_reg_rtx (<VnSI>mode);
2724
2725 emit_insn (gen_<expander><mode><vnsi>2_exec (sitmp1, operands[2],
2726 operands[2], operands[4]));
2727 emit_insn (gen_<expander><mode><vnsi>2_exec (sitmp2, operands[3],
2728 operands[3], operands[4]));
2729 emit_insn (gen_vec_cmp<vnsi>di_exec (operands[0], operands[1], sitmp1,
2730 sitmp2, operands[4]));
2731 DONE;
2732 })
2733
2734 (define_insn "vec_cmp<mode>di_dup"
2735 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e,e,Sg")
2736 (match_operator:DI 1 "gcn_fp_compare_operator"
2737 [(vec_duplicate:V_noQI
2738 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand"
2739 " Sv, B,Sv,B, A"))
2740 (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v,v, v")]))
2741 (clobber (match_scratch:DI 4 "= X,X,cV,cV, X"))]
2742 ""
2743 "@
2744 v_cmp%E1\tvcc, %2, %3
2745 v_cmp%E1\tvcc, %2, %3
2746 v_cmpx%E1\tvcc, %2, %3
2747 v_cmpx%E1\tvcc, %2, %3
2748 v_cmp%E1\t%0, %2, %3"
2749 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a")
2750 (set_attr "length" "4,8,4,8,8")])
2751
2752 (define_insn "vec_cmp<mode>di_dup_exec"
2753 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e,e,Sg")
2754 (and:DI
2755 (match_operator 1 "gcn_fp_compare_operator"
2756 [(vec_duplicate:V_noQI
2757 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand"
2758 " Sv, B,Sv,B, A"))
2759 (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v,v, v")])
2760 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e,e, e")))
2761 (clobber (match_scratch:DI 5 "= X,X,cV,cV, X"))]
2762 ""
2763 "@
2764 v_cmp%E1\tvcc, %2, %3
2765 v_cmp%E1\tvcc, %2, %3
2766 v_cmpx%E1\tvcc, %2, %3
2767 v_cmpx%E1\tvcc, %2, %3
2768 v_cmp%E1\t%0, %2, %3"
2769 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a")
2770 (set_attr "length" "4,8,4,8,8")])
2771
2772 (define_expand "vcond_mask_<mode>di"
2773 [(parallel
2774 [(set (match_operand:V_ALL 0 "register_operand" "")
2775 (vec_merge:V_ALL
2776 (match_operand:V_ALL 1 "gcn_vop3_operand" "")
2777 (match_operand:V_ALL 2 "gcn_alu_operand" "")
2778 (match_operand:DI 3 "register_operand" "")))
2779 (clobber (scratch:<VnDI>))])]
2780 ""
2781 "")
2782
2783 (define_expand "vcond<V_ALL:mode><V_ALL_ALT:mode>"
2784 [(match_operand:V_ALL 0 "register_operand")
2785 (match_operand:V_ALL 1 "gcn_vop3_operand")
2786 (match_operand:V_ALL 2 "gcn_alu_operand")
2787 (match_operator 3 "gcn_fp_compare_operator"
2788 [(match_operand:V_ALL_ALT 4 "gcn_alu_operand")
2789 (match_operand:V_ALL_ALT 5 "gcn_vop3_operand")])]
2790 ""
2791 {
2792 rtx tmp = gen_reg_rtx (DImode);
2793 emit_insn (gen_vec_cmp<V_ALL_ALT:mode>di
2794 (tmp, operands[3], operands[4], operands[5]));
2795 emit_insn (gen_vcond_mask_<V_ALL:mode>di
2796 (operands[0], operands[1], operands[2], tmp));
2797 DONE;
2798 })
2799
2800 (define_expand "vcond<V_ALL:mode><V_ALL_ALT:mode>_exec"
2801 [(match_operand:V_ALL 0 "register_operand")
2802 (match_operand:V_ALL 1 "gcn_vop3_operand")
2803 (match_operand:V_ALL 2 "gcn_alu_operand")
2804 (match_operator 3 "gcn_fp_compare_operator"
2805 [(match_operand:V_ALL_ALT 4 "gcn_alu_operand")
2806 (match_operand:V_ALL_ALT 5 "gcn_vop3_operand")])
2807 (match_operand:DI 6 "gcn_exec_reg_operand" "e")]
2808 ""
2809 {
2810 rtx tmp = gen_reg_rtx (DImode);
2811 emit_insn (gen_vec_cmp<V_ALL_ALT:mode>di_exec
2812 (tmp, operands[3], operands[4], operands[5], operands[6]));
2813 emit_insn (gen_vcond_mask_<V_ALL:mode>di
2814 (operands[0], operands[1], operands[2], tmp));
2815 DONE;
2816 })
2817
2818 (define_expand "vcondu<V_ALL:mode><V_INT:mode>"
2819 [(match_operand:V_ALL 0 "register_operand")
2820 (match_operand:V_ALL 1 "gcn_vop3_operand")
2821 (match_operand:V_ALL 2 "gcn_alu_operand")
2822 (match_operator 3 "gcn_fp_compare_operator"
2823 [(match_operand:V_INT 4 "gcn_alu_operand")
2824 (match_operand:V_INT 5 "gcn_vop3_operand")])]
2825 ""
2826 {
2827 rtx tmp = gen_reg_rtx (DImode);
2828 emit_insn (gen_vec_cmpu<V_INT:mode>di
2829 (tmp, operands[3], operands[4], operands[5]));
2830 emit_insn (gen_vcond_mask_<V_ALL:mode>di
2831 (operands[0], operands[1], operands[2], tmp));
2832 DONE;
2833 })
2834
2835 (define_expand "vcondu<V_ALL:mode><V_INT:mode>_exec"
2836 [(match_operand:V_ALL 0 "register_operand")
2837 (match_operand:V_ALL 1 "gcn_vop3_operand")
2838 (match_operand:V_ALL 2 "gcn_alu_operand")
2839 (match_operator 3 "gcn_fp_compare_operator"
2840 [(match_operand:V_INT 4 "gcn_alu_operand")
2841 (match_operand:V_INT 5 "gcn_vop3_operand")])
2842 (match_operand:DI 6 "gcn_exec_reg_operand" "e")]
2843 ""
2844 {
2845 rtx tmp = gen_reg_rtx (DImode);
2846 emit_insn (gen_vec_cmpu<V_INT:mode>di_exec
2847 (tmp, operands[3], operands[4], operands[5], operands[6]));
2848 emit_insn (gen_vcond_mask_<V_ALL:mode>di
2849 (operands[0], operands[1], operands[2], tmp));
2850 DONE;
2851 })
2852
2853 ;; }}}
2854 ;; {{{ Fully masked loop support
2855
2856 (define_expand "while_ultsidi"
2857 [(match_operand:DI 0 "register_operand")
2858 (match_operand:SI 1 "")
2859 (match_operand:SI 2 "")]
2860 ""
2861 {
2862 if (GET_CODE (operands[1]) != CONST_INT
2863 || GET_CODE (operands[2]) != CONST_INT)
2864 {
2865 rtx _0_1_2_3 = gen_rtx_REG (V64SImode, VGPR_REGNO (1));
2866 rtx tmp = _0_1_2_3;
2867 if (GET_CODE (operands[1]) != CONST_INT
2868 || INTVAL (operands[1]) != 0)
2869 {
2870 tmp = gen_reg_rtx (V64SImode);
2871 emit_insn (gen_addv64si3_dup (tmp, _0_1_2_3, operands[1]));
2872 }
2873 emit_insn (gen_vec_cmpv64sidi_dup (operands[0],
2874 gen_rtx_GT (VOIDmode, 0, 0),
2875 operands[2], tmp));
2876 }
2877 else
2878 {
2879 HOST_WIDE_INT diff = INTVAL (operands[2]) - INTVAL (operands[1]);
2880 HOST_WIDE_INT mask = (diff >= 64 ? -1
2881 : ~((unsigned HOST_WIDE_INT)-1 << diff));
2882 emit_move_insn (operands[0], gen_rtx_CONST_INT (VOIDmode, mask));
2883 }
2884 DONE;
2885 })
2886
2887 (define_expand "maskload<mode>di"
2888 [(match_operand:V_ALL 0 "register_operand")
2889 (match_operand:V_ALL 1 "memory_operand")
2890 (match_operand 2 "")]
2891 ""
2892 {
2893 rtx exec = force_reg (DImode, operands[2]);
2894 rtx addr = gcn_expand_scalar_to_vector_address
2895 (<MODE>mode, exec, operands[1], gen_rtx_SCRATCH (<VnDI>mode));
2896 rtx as = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
2897 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
2898
2899 /* Masked lanes are required to hold zero. */
2900 emit_move_insn (operands[0], gcn_vec_constant (<MODE>mode, 0));
2901
2902 emit_insn (gen_gather<mode>_expr_exec (operands[0], addr, as, v,
2903 operands[0], exec));
2904 DONE;
2905 })
2906
2907 (define_expand "maskstore<mode>di"
2908 [(match_operand:V_ALL 0 "memory_operand")
2909 (match_operand:V_ALL 1 "register_operand")
2910 (match_operand 2 "")]
2911 ""
2912 {
2913 rtx exec = force_reg (DImode, operands[2]);
2914 rtx addr = gcn_expand_scalar_to_vector_address
2915 (<MODE>mode, exec, operands[0], gen_rtx_SCRATCH (<VnDI>mode));
2916 rtx as = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
2917 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
2918 emit_insn (gen_scatter<mode>_expr_exec (addr, operands[1], as, v, exec));
2919 DONE;
2920 })
2921
2922 (define_expand "mask_gather_load<mode><vnsi>"
2923 [(match_operand:V_ALL 0 "register_operand")
2924 (match_operand:DI 1 "register_operand")
2925 (match_operand:<VnSI> 2 "register_operand")
2926 (match_operand 3 "immediate_operand")
2927 (match_operand:SI 4 "gcn_alu_operand")
2928 (match_operand:DI 5 "")]
2929 ""
2930 {
2931 rtx exec = force_reg (DImode, operands[5]);
2932
2933 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1],
2934 operands[2], operands[4],
2935 INTVAL (operands[3]), exec);
2936
2937 /* Masked lanes are required to hold zero. */
2938 emit_move_insn (operands[0], gcn_vec_constant (<MODE>mode, 0));
2939
2940 if (GET_MODE (addr) == <VnDI>mode)
2941 emit_insn (gen_gather<mode>_insn_1offset_exec (operands[0], addr,
2942 const0_rtx, const0_rtx,
2943 const0_rtx, operands[0],
2944 exec));
2945 else
2946 emit_insn (gen_gather<mode>_insn_2offsets_exec (operands[0], operands[1],
2947 addr, const0_rtx,
2948 const0_rtx, const0_rtx,
2949 operands[0], exec));
2950 DONE;
2951 })
2952
2953 (define_expand "mask_scatter_store<mode><vnsi>"
2954 [(match_operand:DI 0 "register_operand")
2955 (match_operand:<VnSI> 1 "register_operand")
2956 (match_operand 2 "immediate_operand")
2957 (match_operand:SI 3 "gcn_alu_operand")
2958 (match_operand:V_ALL 4 "register_operand")
2959 (match_operand:DI 5 "")]
2960 ""
2961 {
2962 rtx exec = force_reg (DImode, operands[5]);
2963
2964 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0],
2965 operands[1], operands[3],
2966 INTVAL (operands[2]), exec);
2967
2968 if (GET_MODE (addr) == <VnDI>mode)
2969 emit_insn (gen_scatter<mode>_insn_1offset_exec (addr, const0_rtx,
2970 operands[4], const0_rtx,
2971 const0_rtx,
2972 exec));
2973 else
2974 emit_insn (gen_scatter<mode>_insn_2offsets_exec (operands[0], addr,
2975 const0_rtx, operands[4],
2976 const0_rtx, const0_rtx,
2977 exec));
2978 DONE;
2979 })
2980
2981 (define_code_iterator cond_op [plus minus mult])
2982
2983 (define_expand "cond_<expander><mode>"
2984 [(match_operand:V_ALL 0 "register_operand")
2985 (match_operand:DI 1 "register_operand")
2986 (cond_op:V_ALL
2987 (match_operand:V_ALL 2 "gcn_alu_operand")
2988 (match_operand:V_ALL 3 "gcn_alu_operand"))
2989 (match_operand:V_ALL 4 "register_operand")]
2990 ""
2991 {
2992 operands[1] = force_reg (DImode, operands[1]);
2993 operands[2] = force_reg (<MODE>mode, operands[2]);
2994
2995 emit_insn (gen_<expander><mode>3_exec (operands[0], operands[2],
2996 operands[3], operands[4],
2997 operands[1]));
2998 DONE;
2999 })
3000
3001 ;; TODO smin umin smax umax
3002 (define_code_iterator cond_bitop [and ior xor])
3003
3004 (define_expand "cond_<expander><mode>"
3005 [(match_operand:V_INT 0 "register_operand")
3006 (match_operand:DI 1 "register_operand")
3007 (cond_bitop:V_INT
3008 (match_operand:V_INT 2 "gcn_alu_operand")
3009 (match_operand:V_INT 3 "gcn_alu_operand"))
3010 (match_operand:V_INT 4 "register_operand")]
3011 ""
3012 {
3013 operands[1] = force_reg (DImode, operands[1]);
3014 operands[2] = force_reg (<MODE>mode, operands[2]);
3015
3016 emit_insn (gen_<expander><mode>3_exec (operands[0], operands[2],
3017 operands[3], operands[4],
3018 operands[1]));
3019 DONE;
3020 })
3021
3022 ;; }}}
3023 ;; {{{ Vector reductions
3024
3025 (define_int_iterator REDUC_UNSPEC [UNSPEC_SMIN_DPP_SHR UNSPEC_SMAX_DPP_SHR
3026 UNSPEC_UMIN_DPP_SHR UNSPEC_UMAX_DPP_SHR
3027 UNSPEC_PLUS_DPP_SHR
3028 UNSPEC_AND_DPP_SHR
3029 UNSPEC_IOR_DPP_SHR UNSPEC_XOR_DPP_SHR])
3030
3031 (define_int_iterator REDUC_2REG_UNSPEC [UNSPEC_PLUS_DPP_SHR
3032 UNSPEC_AND_DPP_SHR
3033 UNSPEC_IOR_DPP_SHR UNSPEC_XOR_DPP_SHR])
3034
3035 ; FIXME: Isn't there a better way of doing this?
3036 (define_int_attr reduc_unspec [(UNSPEC_SMIN_DPP_SHR "UNSPEC_SMIN_DPP_SHR")
3037 (UNSPEC_SMAX_DPP_SHR "UNSPEC_SMAX_DPP_SHR")
3038 (UNSPEC_UMIN_DPP_SHR "UNSPEC_UMIN_DPP_SHR")
3039 (UNSPEC_UMAX_DPP_SHR "UNSPEC_UMAX_DPP_SHR")
3040 (UNSPEC_PLUS_DPP_SHR "UNSPEC_PLUS_DPP_SHR")
3041 (UNSPEC_AND_DPP_SHR "UNSPEC_AND_DPP_SHR")
3042 (UNSPEC_IOR_DPP_SHR "UNSPEC_IOR_DPP_SHR")
3043 (UNSPEC_XOR_DPP_SHR "UNSPEC_XOR_DPP_SHR")])
3044
3045 (define_int_attr reduc_op [(UNSPEC_SMIN_DPP_SHR "smin")
3046 (UNSPEC_SMAX_DPP_SHR "smax")
3047 (UNSPEC_UMIN_DPP_SHR "umin")
3048 (UNSPEC_UMAX_DPP_SHR "umax")
3049 (UNSPEC_PLUS_DPP_SHR "plus")
3050 (UNSPEC_AND_DPP_SHR "and")
3051 (UNSPEC_IOR_DPP_SHR "ior")
3052 (UNSPEC_XOR_DPP_SHR "xor")])
3053
3054 (define_int_attr reduc_insn [(UNSPEC_SMIN_DPP_SHR "v_min%i0")
3055 (UNSPEC_SMAX_DPP_SHR "v_max%i0")
3056 (UNSPEC_UMIN_DPP_SHR "v_min%u0")
3057 (UNSPEC_UMAX_DPP_SHR "v_max%u0")
3058 (UNSPEC_PLUS_DPP_SHR "v_add%U0")
3059 (UNSPEC_AND_DPP_SHR "v_and%B0")
3060 (UNSPEC_IOR_DPP_SHR "v_or%B0")
3061 (UNSPEC_XOR_DPP_SHR "v_xor%B0")])
3062
3063 (define_expand "reduc_<reduc_op>_scal_<mode>"
3064 [(set (match_operand:<SCALAR_MODE> 0 "register_operand")
3065 (unspec:<SCALAR_MODE>
3066 [(match_operand:V_ALL 1 "register_operand")]
3067 REDUC_UNSPEC))]
3068 ""
3069 {
3070 rtx tmp = gcn_expand_reduc_scalar (<MODE>mode, operands[1],
3071 <reduc_unspec>);
3072
3073 /* The result of the reduction is in lane 63 of tmp. */
3074 emit_insn (gen_mov_from_lane63_<mode> (operands[0], tmp));
3075
3076 DONE;
3077 })
3078
3079
3080 (define_insn "*<reduc_op>_dpp_shr_<mode>"
3081 [(set (match_operand:V_1REG 0 "register_operand" "=v")
3082 (unspec:V_1REG
3083 [(match_operand:V_1REG 1 "register_operand" "v")
3084 (match_operand:V_1REG 2 "register_operand" "v")
3085 (match_operand:SI 3 "const_int_operand" "n")]
3086 REDUC_UNSPEC))]
3087 ; GCN3 requires a carry out, GCN5 not
3088 "!(TARGET_GCN3 && SCALAR_INT_MODE_P (<SCALAR_MODE>mode)
3089 && <reduc_unspec> == UNSPEC_PLUS_DPP_SHR)"
3090 {
3091 return gcn_expand_dpp_shr_insn (<MODE>mode, "<reduc_insn>",
3092 <reduc_unspec>, INTVAL (operands[3]));
3093 }
3094 [(set_attr "type" "vop_dpp")
3095 (set_attr "length" "8")])
3096
3097 (define_insn_and_split "*<reduc_op>_dpp_shr_<mode>"
3098 [(set (match_operand:V_DI 0 "register_operand" "=v")
3099 (unspec:V_DI
3100 [(match_operand:V_DI 1 "register_operand" "v")
3101 (match_operand:V_DI 2 "register_operand" "v")
3102 (match_operand:SI 3 "const_int_operand" "n")]
3103 REDUC_2REG_UNSPEC))]
3104 ""
3105 "#"
3106 "reload_completed"
3107 [(set (match_dup 4)
3108 (unspec:<VnSI>
3109 [(match_dup 6) (match_dup 8) (match_dup 3)] REDUC_2REG_UNSPEC))
3110 (set (match_dup 5)
3111 (unspec:<VnSI>
3112 [(match_dup 7) (match_dup 9) (match_dup 3)] REDUC_2REG_UNSPEC))]
3113 {
3114 operands[4] = gcn_operand_part (<MODE>mode, operands[0], 0);
3115 operands[5] = gcn_operand_part (<MODE>mode, operands[0], 1);
3116 operands[6] = gcn_operand_part (<MODE>mode, operands[1], 0);
3117 operands[7] = gcn_operand_part (<MODE>mode, operands[1], 1);
3118 operands[8] = gcn_operand_part (<MODE>mode, operands[2], 0);
3119 operands[9] = gcn_operand_part (<MODE>mode, operands[2], 1);
3120 }
3121 [(set_attr "type" "vmult")
3122 (set_attr "length" "16")])
3123
3124 ; Special cases for addition.
3125
3126 (define_insn "*plus_carry_dpp_shr_<mode>"
3127 [(set (match_operand:V_INT_1REG 0 "register_operand" "=v")
3128 (unspec:V_INT_1REG
3129 [(match_operand:V_INT_1REG 1 "register_operand" "v")
3130 (match_operand:V_INT_1REG 2 "register_operand" "v")
3131 (match_operand:SI 3 "const_int_operand" "n")]
3132 UNSPEC_PLUS_CARRY_DPP_SHR))
3133 (clobber (reg:DI VCC_REG))]
3134 ""
3135 {
3136 return gcn_expand_dpp_shr_insn (<VnSI>mode, "v_add%^_u32",
3137 UNSPEC_PLUS_CARRY_DPP_SHR,
3138 INTVAL (operands[3]));
3139 }
3140 [(set_attr "type" "vop_dpp")
3141 (set_attr "length" "8")])
3142
3143 (define_insn "*plus_carry_in_dpp_shr_<mode>"
3144 [(set (match_operand:V_SI 0 "register_operand" "=v")
3145 (unspec:V_SI
3146 [(match_operand:V_SI 1 "register_operand" "v")
3147 (match_operand:V_SI 2 "register_operand" "v")
3148 (match_operand:SI 3 "const_int_operand" "n")
3149 (match_operand:DI 4 "register_operand" "cV")]
3150 UNSPEC_PLUS_CARRY_IN_DPP_SHR))
3151 (clobber (reg:DI VCC_REG))]
3152 ""
3153 {
3154 return gcn_expand_dpp_shr_insn (<MODE>mode, "v_addc%^_u32",
3155 UNSPEC_PLUS_CARRY_IN_DPP_SHR,
3156 INTVAL (operands[3]));
3157 }
3158 [(set_attr "type" "vop_dpp")
3159 (set_attr "length" "8")])
3160
3161 (define_insn_and_split "*plus_carry_dpp_shr_<mode>"
3162 [(set (match_operand:V_DI 0 "register_operand" "=v")
3163 (unspec:V_DI
3164 [(match_operand:V_DI 1 "register_operand" "v")
3165 (match_operand:V_DI 2 "register_operand" "v")
3166 (match_operand:SI 3 "const_int_operand" "n")]
3167 UNSPEC_PLUS_CARRY_DPP_SHR))
3168 (clobber (reg:DI VCC_REG))]
3169 ""
3170 "#"
3171 "reload_completed"
3172 [(parallel [(set (match_dup 4)
3173 (unspec:<VnSI>
3174 [(match_dup 6) (match_dup 8) (match_dup 3)]
3175 UNSPEC_PLUS_CARRY_DPP_SHR))
3176 (clobber (reg:DI VCC_REG))])
3177 (parallel [(set (match_dup 5)
3178 (unspec:<VnSI>
3179 [(match_dup 7) (match_dup 9) (match_dup 3) (reg:DI VCC_REG)]
3180 UNSPEC_PLUS_CARRY_IN_DPP_SHR))
3181 (clobber (reg:DI VCC_REG))])]
3182 {
3183 operands[4] = gcn_operand_part (<MODE>mode, operands[0], 0);
3184 operands[5] = gcn_operand_part (<MODE>mode, operands[0], 1);
3185 operands[6] = gcn_operand_part (<MODE>mode, operands[1], 0);
3186 operands[7] = gcn_operand_part (<MODE>mode, operands[1], 1);
3187 operands[8] = gcn_operand_part (<MODE>mode, operands[2], 0);
3188 operands[9] = gcn_operand_part (<MODE>mode, operands[2], 1);
3189 }
3190 [(set_attr "type" "vmult")
3191 (set_attr "length" "16")])
3192
3193 ; Instructions to move a scalar value from lane 63 of a vector register.
3194 (define_insn "mov_from_lane63_<mode>"
3195 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg,v")
3196 (unspec:<SCALAR_MODE>
3197 [(match_operand:V_1REG 1 "register_operand" " v,v")]
3198 UNSPEC_MOV_FROM_LANE63))]
3199 ""
3200 "@
3201 v_readlane_b32\t%0, %1, 63
3202 v_mov_b32\t%0, %1 wave_ror:1"
3203 [(set_attr "type" "vop3a,vop_dpp")
3204 (set_attr "exec" "none,*")
3205 (set_attr "length" "8")])
3206
3207 (define_insn "mov_from_lane63_<mode>"
3208 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg,v")
3209 (unspec:<SCALAR_MODE>
3210 [(match_operand:V_2REG 1 "register_operand" " v,v")]
3211 UNSPEC_MOV_FROM_LANE63))]
3212 ""
3213 "@
3214 v_readlane_b32\t%L0, %L1, 63\;v_readlane_b32\t%H0, %H1, 63
3215 * if (REGNO (operands[0]) <= REGNO (operands[1])) \
3216 return \"v_mov_b32\t%L0, %L1 wave_ror:1\;\" \
3217 \"v_mov_b32\t%H0, %H1 wave_ror:1\"; \
3218 else \
3219 return \"v_mov_b32\t%H0, %H1 wave_ror:1\;\" \
3220 \"v_mov_b32\t%L0, %L1 wave_ror:1\";"
3221 [(set_attr "type" "vop3a,vop_dpp")
3222 (set_attr "exec" "none,*")
3223 (set_attr "length" "8")])
3224
3225 ;; }}}
3226 ;; {{{ Miscellaneous
3227
3228 (define_expand "vec_series<mode>"
3229 [(match_operand:V_SI 0 "register_operand")
3230 (match_operand:SI 1 "gcn_alu_operand")
3231 (match_operand:SI 2 "gcn_alu_operand")]
3232 ""
3233 {
3234 rtx tmp = gen_reg_rtx (<MODE>mode);
3235 rtx v1 = gen_rtx_REG (<MODE>mode, VGPR_REGNO (1));
3236
3237 emit_insn (gen_mul<mode>3_dup (tmp, v1, operands[2]));
3238 emit_insn (gen_add<mode>3_dup (operands[0], tmp, operands[1]));
3239 DONE;
3240 })
3241
3242 (define_expand "vec_series<mode>"
3243 [(match_operand:V_DI 0 "register_operand")
3244 (match_operand:DI 1 "gcn_alu_operand")
3245 (match_operand:DI 2 "gcn_alu_operand")]
3246 ""
3247 {
3248 rtx tmp = gen_reg_rtx (<MODE>mode);
3249 rtx v1 = gen_rtx_REG (<VnSI>mode, VGPR_REGNO (1));
3250 rtx op1vec = gen_reg_rtx (<MODE>mode);
3251
3252 emit_insn (gen_mul<mode>3_zext_dup2 (tmp, v1, operands[2]));
3253 emit_insn (gen_vec_duplicate<mode> (op1vec, operands[1]));
3254 emit_insn (gen_add<mode>3 (operands[0], tmp, op1vec));
3255 DONE;
3256 })
3257
3258 ;; }}}