]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/gcn/gcn-valu.md
vect: while_ult for integer masks
[thirdparty/gcc.git] / gcc / config / gcn / gcn-valu.md
1 ;; Copyright (C) 2016-2022 Free Software Foundation, Inc.
2
3 ;; This file is free software; you can redistribute it and/or modify it under
4 ;; the terms of the GNU General Public License as published by the Free
5 ;; Software Foundation; either version 3 of the License, or (at your option)
6 ;; any later version.
7
8 ;; This file is distributed in the hope that it will be useful, but WITHOUT
9 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 ;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 ;; for more details.
12
13 ;; You should have received a copy of the GNU General Public License
14 ;; along with GCC; see the file COPYING3. If not see
15 ;; <http://www.gnu.org/licenses/>.
16
17 ;; {{{ Vector iterators
18
19 ; Vector modes for specific types
20 ; (This will make more sense when there are multiple vector sizes)
21 (define_mode_iterator V_QI
22 [V64QI])
23 (define_mode_iterator V_HI
24 [V64HI])
25 (define_mode_iterator V_HF
26 [V64HF])
27 (define_mode_iterator V_SI
28 [V64SI])
29 (define_mode_iterator V_SF
30 [V64SF])
31 (define_mode_iterator V_DI
32 [V64DI])
33 (define_mode_iterator V_DF
34 [V64DF])
35
36 ; Vector modes for sub-dword modes
37 (define_mode_iterator V_QIHI
38 [V64QI V64HI])
39
40 ; Vector modes for one vector register
41 (define_mode_iterator V_1REG
42 [V64QI V64HI V64SI V64HF V64SF])
43
44 (define_mode_iterator V_INT_1REG
45 [V64QI V64HI V64SI])
46 (define_mode_iterator V_INT_1REG_ALT
47 [V64QI V64HI V64SI])
48 (define_mode_iterator V_FP_1REG
49 [V64HF V64SF])
50
51 ; Vector modes for two vector registers
52 (define_mode_iterator V_2REG
53 [V64DI V64DF])
54
55 ; Vector modes with native support
56 (define_mode_iterator V_noQI
57 [V64HI V64HF V64SI V64SF V64DI V64DF])
58 (define_mode_iterator V_noHI
59 [V64HF V64SI V64SF V64DI V64DF])
60
61 (define_mode_iterator V_INT_noQI
62 [V64HI V64SI V64DI])
63 (define_mode_iterator V_INT_noHI
64 [V64SI V64DI])
65
66 ; All of above
67 (define_mode_iterator V_ALL
68 [V64QI V64HI V64HF V64SI V64SF V64DI V64DF])
69 (define_mode_iterator V_ALL_ALT
70 [V64QI V64HI V64HF V64SI V64SF V64DI V64DF])
71
72 (define_mode_iterator V_INT
73 [V64QI V64HI V64SI V64DI])
74 (define_mode_iterator V_FP
75 [V64HF V64SF V64DF])
76
77 (define_mode_attr scalar_mode
78 [(V64QI "qi") (V64HI "hi") (V64SI "si")
79 (V64HF "hf") (V64SF "sf") (V64DI "di") (V64DF "df")])
80
81 (define_mode_attr SCALAR_MODE
82 [(V64QI "QI") (V64HI "HI") (V64SI "SI")
83 (V64HF "HF") (V64SF "SF") (V64DI "DI") (V64DF "DF")])
84
85 (define_mode_attr vnsi
86 [(V64QI "v64si") (V64HI "v64si") (V64HF "v64si") (V64SI "v64si")
87 (V64SF "v64si") (V64DI "v64si") (V64DF "v64si")])
88
89 (define_mode_attr VnSI
90 [(V64QI "V64SI") (V64HI "V64SI") (V64HF "V64SI") (V64SI "V64SI")
91 (V64SF "V64SI") (V64DI "V64SI") (V64DF "V64SI")])
92
93 (define_mode_attr vndi
94 [(V64QI "v64di") (V64HI "v64di") (V64HF "v64di") (V64SI "v64di")
95 (V64SF "v64di") (V64DI "v64di") (V64DF "v64di")])
96
97 (define_mode_attr VnDI
98 [(V64QI "V64DI") (V64HI "V64DI") (V64HF "V64DI") (V64SI "V64DI")
99 (V64SF "V64DI") (V64DI "V64DI") (V64DF "V64DI")])
100
101 (define_mode_attr sdwa [(V64QI "BYTE_0") (V64HI "WORD_0") (V64SI "DWORD")])
102
103 ;; }}}
104 ;; {{{ Substitutions
105
106 (define_subst_attr "exec" "vec_merge"
107 "" "_exec")
108 (define_subst_attr "exec_clobber" "vec_merge_with_clobber"
109 "" "_exec")
110 (define_subst_attr "exec_vcc" "vec_merge_with_vcc"
111 "" "_exec")
112 (define_subst_attr "exec_scatter" "scatter_store"
113 "" "_exec")
114
115 (define_subst "vec_merge"
116 [(set (match_operand:V_ALL 0)
117 (match_operand:V_ALL 1))]
118 ""
119 [(set (match_dup 0)
120 (vec_merge:V_ALL
121 (match_dup 1)
122 (match_operand:V_ALL 3 "gcn_register_or_unspec_operand" "U0")
123 (match_operand:DI 4 "gcn_exec_reg_operand" "e")))])
124
125 (define_subst "vec_merge_with_clobber"
126 [(set (match_operand:V_ALL 0)
127 (match_operand:V_ALL 1))
128 (clobber (match_operand 2))]
129 ""
130 [(set (match_dup 0)
131 (vec_merge:V_ALL
132 (match_dup 1)
133 (match_operand:V_ALL 3 "gcn_register_or_unspec_operand" "U0")
134 (match_operand:DI 4 "gcn_exec_reg_operand" "e")))
135 (clobber (match_dup 2))])
136
137 (define_subst "vec_merge_with_vcc"
138 [(set (match_operand:V_ALL 0)
139 (match_operand:V_ALL 1))
140 (set (match_operand:DI 2)
141 (match_operand:DI 3))]
142 ""
143 [(parallel
144 [(set (match_dup 0)
145 (vec_merge:V_ALL
146 (match_dup 1)
147 (match_operand:V_ALL 4 "gcn_register_or_unspec_operand" "U0")
148 (match_operand:DI 5 "gcn_exec_reg_operand" "e")))
149 (set (match_dup 2)
150 (and:DI (match_dup 3)
151 (reg:DI EXEC_REG)))])])
152
153 (define_subst "scatter_store"
154 [(set (mem:BLK (scratch))
155 (unspec:BLK
156 [(match_operand 0)
157 (match_operand 1)
158 (match_operand 2)
159 (match_operand 3)]
160 UNSPEC_SCATTER))]
161 ""
162 [(set (mem:BLK (scratch))
163 (unspec:BLK
164 [(match_dup 0)
165 (match_dup 1)
166 (match_dup 2)
167 (match_dup 3)
168 (match_operand:DI 4 "gcn_exec_reg_operand" "e")]
169 UNSPEC_SCATTER))])
170
171 ;; }}}
172 ;; {{{ Vector moves
173
174 ; This is the entry point for all vector register moves. Memory accesses can
175 ; come this way also, but will more usually use the reload_in/out,
176 ; gather/scatter, maskload/store, etc.
177
178 (define_expand "mov<mode>"
179 [(set (match_operand:V_ALL 0 "nonimmediate_operand")
180 (match_operand:V_ALL 1 "general_operand"))]
181 ""
182 {
183 if (MEM_P (operands[0]) && !lra_in_progress && !reload_completed)
184 {
185 operands[1] = force_reg (<MODE>mode, operands[1]);
186 rtx scratch = gen_rtx_SCRATCH (<VnDI>mode);
187 rtx a = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
188 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
189 rtx expr = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
190 operands[0],
191 scratch);
192 emit_insn (gen_scatter<mode>_expr (expr, operands[1], a, v));
193 DONE;
194 }
195 else if (MEM_P (operands[1]) && !lra_in_progress && !reload_completed)
196 {
197 rtx scratch = gen_rtx_SCRATCH (<VnDI>mode);
198 rtx a = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
199 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
200 rtx expr = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
201 operands[1],
202 scratch);
203 emit_insn (gen_gather<mode>_expr (operands[0], expr, a, v));
204 DONE;
205 }
206 else if ((MEM_P (operands[0]) || MEM_P (operands[1])))
207 {
208 gcc_assert (!reload_completed);
209 rtx scratch = gen_reg_rtx (<VnDI>mode);
210 emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], scratch));
211 DONE;
212 }
213 })
214
215 ; A pseudo instruction that helps LRA use the "U0" constraint.
216
217 (define_insn "mov<mode>_unspec"
218 [(set (match_operand:V_ALL 0 "nonimmediate_operand" "=v")
219 (match_operand:V_ALL 1 "gcn_unspec_operand" " U"))]
220 ""
221 ""
222 [(set_attr "type" "unknown")
223 (set_attr "length" "0")])
224
225 (define_insn "*mov<mode>"
226 [(set (match_operand:V_1REG 0 "nonimmediate_operand" "=v,v")
227 (match_operand:V_1REG 1 "general_operand" "vA,B"))]
228 ""
229 "v_mov_b32\t%0, %1"
230 [(set_attr "type" "vop1,vop1")
231 (set_attr "length" "4,8")])
232
233 (define_insn "mov<mode>_exec"
234 [(set (match_operand:V_1REG 0 "nonimmediate_operand" "=v, v, v, v, v, m")
235 (vec_merge:V_1REG
236 (match_operand:V_1REG 1 "general_operand" "vA, B, v,vA, m, v")
237 (match_operand:V_1REG 2 "gcn_alu_or_unspec_operand"
238 "U0,U0,vA,vA,U0,U0")
239 (match_operand:DI 3 "register_operand" " e, e,cV,Sv, e, e")))
240 (clobber (match_scratch:<VnDI> 4 "=X, X, X, X,&v,&v"))]
241 "!MEM_P (operands[0]) || REG_P (operands[1])"
242 "@
243 v_mov_b32\t%0, %1
244 v_mov_b32\t%0, %1
245 v_cndmask_b32\t%0, %2, %1, vcc
246 v_cndmask_b32\t%0, %2, %1, %3
247 #
248 #"
249 [(set_attr "type" "vop1,vop1,vop2,vop3a,*,*")
250 (set_attr "length" "4,8,4,8,16,16")])
251
252 ; This variant does not accept an unspec, but does permit MEM
253 ; read/modify/write which is necessary for maskstore.
254
255 ;(define_insn "*mov<mode>_exec_match"
256 ; [(set (match_operand:V_1REG 0 "nonimmediate_operand" "=v,v, v, m")
257 ; (vec_merge:V_1REG
258 ; (match_operand:V_1REG 1 "general_operand" "vA,B, m, v")
259 ; (match_dup 0)
260 ; (match_operand:DI 2 "gcn_exec_reg_operand" " e,e, e, e")))
261 ; (clobber (match_scratch:<VnDI> 3 "=X,X,&v,&v"))]
262 ; "!MEM_P (operands[0]) || REG_P (operands[1])"
263 ; "@
264 ; v_mov_b32\t%0, %1
265 ; v_mov_b32\t%0, %1
266 ; #
267 ; #"
268 ; [(set_attr "type" "vop1,vop1,*,*")
269 ; (set_attr "length" "4,8,16,16")])
270
271 (define_insn "*mov<mode>"
272 [(set (match_operand:V_2REG 0 "nonimmediate_operand" "=v")
273 (match_operand:V_2REG 1 "general_operand" "vDB"))]
274 ""
275 {
276 if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1]))
277 return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1";
278 else
279 return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1";
280 }
281 [(set_attr "type" "vmult")
282 (set_attr "length" "16")])
283
284 (define_insn "mov<mode>_exec"
285 [(set (match_operand:V_2REG 0 "nonimmediate_operand" "= v, v, v, v, m")
286 (vec_merge:V_2REG
287 (match_operand:V_2REG 1 "general_operand" "vDB, v0, v0, m, v")
288 (match_operand:V_2REG 2 "gcn_alu_or_unspec_operand"
289 " U0,vDA0,vDA0,U0,U0")
290 (match_operand:DI 3 "register_operand" " e, cV, Sv, e, e")))
291 (clobber (match_scratch:<VnDI> 4 "= X, X, X,&v,&v"))]
292 "!MEM_P (operands[0]) || REG_P (operands[1])"
293 {
294 if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1]))
295 switch (which_alternative)
296 {
297 case 0:
298 return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1";
299 case 1:
300 return "v_cndmask_b32\t%L0, %L2, %L1, vcc\;"
301 "v_cndmask_b32\t%H0, %H2, %H1, vcc";
302 case 2:
303 return "v_cndmask_b32\t%L0, %L2, %L1, %3\;"
304 "v_cndmask_b32\t%H0, %H2, %H1, %3";
305 }
306 else
307 switch (which_alternative)
308 {
309 case 0:
310 return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1";
311 case 1:
312 return "v_cndmask_b32\t%H0, %H2, %H1, vcc\;"
313 "v_cndmask_b32\t%L0, %L2, %L1, vcc";
314 case 2:
315 return "v_cndmask_b32\t%H0, %H2, %H1, %3\;"
316 "v_cndmask_b32\t%L0, %L2, %L1, %3";
317 }
318
319 return "#";
320 }
321 [(set_attr "type" "vmult,vmult,vmult,*,*")
322 (set_attr "length" "16,16,16,16,16")])
323
324 ; This variant does not accept an unspec, but does permit MEM
325 ; read/modify/write which is necessary for maskstore.
326
327 ;(define_insn "*mov<mode>_exec_match"
328 ; [(set (match_operand:V_2REG 0 "nonimmediate_operand" "=v, v, m")
329 ; (vec_merge:V_2REG
330 ; (match_operand:V_2REG 1 "general_operand" "vDB, m, v")
331 ; (match_dup 0)
332 ; (match_operand:DI 2 "gcn_exec_reg_operand" " e, e, e")))
333 ; (clobber (match_scratch:<VnDI> 3 "=X,&v,&v"))]
334 ; "!MEM_P (operands[0]) || REG_P (operands[1])"
335 ; "@
336 ; * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \
337 ; return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \
338 ; else \
339 ; return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\";
340 ; #
341 ; #"
342 ; [(set_attr "type" "vmult,*,*")
343 ; (set_attr "length" "16,16,16")])
344
345 ; A SGPR-base load looks like:
346 ; <load> v, Sv
347 ;
348 ; There's no hardware instruction that corresponds to this, but vector base
349 ; addresses are placed in an SGPR because it is easier to add to a vector.
350 ; We also have a temporary vT, and the vector v1 holding numbered lanes.
351 ;
352 ; Rewrite as:
353 ; vT = v1 << log2(element-size)
354 ; vT += Sv
355 ; flat_load v, vT
356
357 (define_insn "mov<mode>_sgprbase"
358 [(set (match_operand:V_1REG 0 "nonimmediate_operand" "= v, v, v, m")
359 (unspec:V_1REG
360 [(match_operand:V_1REG 1 "general_operand" " vA,vB, m, v")]
361 UNSPEC_SGPRBASE))
362 (clobber (match_operand:<VnDI> 2 "register_operand" "=&v,&v,&v,&v"))]
363 "lra_in_progress || reload_completed"
364 "@
365 v_mov_b32\t%0, %1
366 v_mov_b32\t%0, %1
367 #
368 #"
369 [(set_attr "type" "vop1,vop1,*,*")
370 (set_attr "length" "4,8,12,12")])
371
372 (define_insn "mov<mode>_sgprbase"
373 [(set (match_operand:V_2REG 0 "nonimmediate_operand" "= v, v, m")
374 (unspec:V_2REG
375 [(match_operand:V_2REG 1 "general_operand" "vDB, m, v")]
376 UNSPEC_SGPRBASE))
377 (clobber (match_operand:<VnDI> 2 "register_operand" "=&v,&v,&v"))]
378 "lra_in_progress || reload_completed"
379 "@
380 * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \
381 return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \
382 else \
383 return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\";
384 #
385 #"
386 [(set_attr "type" "vmult,*,*")
387 (set_attr "length" "8,12,12")])
388
389 ; reload_in was once a standard name, but here it's only referenced by
390 ; gcn_secondary_reload. It allows a reload with a scratch register.
391
392 (define_expand "reload_in<mode>"
393 [(set (match_operand:V_ALL 0 "register_operand" "= v")
394 (match_operand:V_ALL 1 "memory_operand" " m"))
395 (clobber (match_operand:<VnDI> 2 "register_operand" "=&v"))]
396 ""
397 {
398 emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], operands[2]));
399 DONE;
400 })
401
402 ; reload_out is similar to reload_in, above.
403
404 (define_expand "reload_out<mode>"
405 [(set (match_operand:V_ALL 0 "memory_operand" "= m")
406 (match_operand:V_ALL 1 "register_operand" " v"))
407 (clobber (match_operand:<VnDI> 2 "register_operand" "=&v"))]
408 ""
409 {
410 emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], operands[2]));
411 DONE;
412 })
413
414 ; Expand scalar addresses into gather/scatter patterns
415
416 (define_split
417 [(set (match_operand:V_ALL 0 "memory_operand")
418 (unspec:V_ALL
419 [(match_operand:V_ALL 1 "general_operand")]
420 UNSPEC_SGPRBASE))
421 (clobber (match_scratch:<VnDI> 2))]
422 ""
423 [(set (mem:BLK (scratch))
424 (unspec:BLK [(match_dup 5) (match_dup 1) (match_dup 6) (match_dup 7)]
425 UNSPEC_SCATTER))]
426 {
427 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
428 operands[0],
429 operands[2]);
430 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
431 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
432 })
433
434 (define_split
435 [(set (match_operand:V_ALL 0 "memory_operand")
436 (vec_merge:V_ALL
437 (match_operand:V_ALL 1 "general_operand")
438 (match_operand:V_ALL 2 "")
439 (match_operand:DI 3 "gcn_exec_reg_operand")))
440 (clobber (match_scratch:<VnDI> 4))]
441 ""
442 [(set (mem:BLK (scratch))
443 (unspec:BLK [(match_dup 5) (match_dup 1)
444 (match_dup 6) (match_dup 7) (match_dup 3)]
445 UNSPEC_SCATTER))]
446 {
447 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode,
448 operands[3],
449 operands[0],
450 operands[4]);
451 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
452 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
453 })
454
455 (define_split
456 [(set (match_operand:V_ALL 0 "nonimmediate_operand")
457 (unspec:V_ALL
458 [(match_operand:V_ALL 1 "memory_operand")]
459 UNSPEC_SGPRBASE))
460 (clobber (match_scratch:<VnDI> 2))]
461 ""
462 [(set (match_dup 0)
463 (unspec:V_ALL [(match_dup 5) (match_dup 6) (match_dup 7)
464 (mem:BLK (scratch))]
465 UNSPEC_GATHER))]
466 {
467 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
468 operands[1],
469 operands[2]);
470 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
471 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
472 })
473
474 (define_split
475 [(set (match_operand:V_ALL 0 "nonimmediate_operand")
476 (vec_merge:V_ALL
477 (match_operand:V_ALL 1 "memory_operand")
478 (match_operand:V_ALL 2 "")
479 (match_operand:DI 3 "gcn_exec_reg_operand")))
480 (clobber (match_scratch:<VnDI> 4))]
481 ""
482 [(set (match_dup 0)
483 (vec_merge:V_ALL
484 (unspec:V_ALL [(match_dup 5) (match_dup 6) (match_dup 7)
485 (mem:BLK (scratch))]
486 UNSPEC_GATHER)
487 (match_dup 2)
488 (match_dup 3)))]
489 {
490 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode,
491 operands[3],
492 operands[1],
493 operands[4]);
494 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
495 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
496 })
497
498 ; TODO: Add zero/sign extending variants.
499
500 ;; }}}
501 ;; {{{ Lane moves
502
503 ; v_writelane and v_readlane work regardless of exec flags.
504 ; We allow source to be scratch.
505 ;
506 ; FIXME these should take A immediates
507
508 (define_insn "*vec_set<mode>"
509 [(set (match_operand:V_1REG 0 "register_operand" "= v")
510 (vec_merge:V_1REG
511 (vec_duplicate:V_1REG
512 (match_operand:<SCALAR_MODE> 1 "register_operand" " Sv"))
513 (match_operand:V_1REG 3 "gcn_register_or_unspec_operand" " U0")
514 (ashift (const_int 1)
515 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
516 ""
517 "v_writelane_b32 %0, %1, %2"
518 [(set_attr "type" "vop3a")
519 (set_attr "length" "8")
520 (set_attr "exec" "none")
521 (set_attr "laneselect" "yes")])
522
523 ; FIXME: 64bit operations really should be splitters, but I am not sure how
524 ; to represent vertical subregs.
525 (define_insn "*vec_set<mode>"
526 [(set (match_operand:V_2REG 0 "register_operand" "= v")
527 (vec_merge:V_2REG
528 (vec_duplicate:V_2REG
529 (match_operand:<SCALAR_MODE> 1 "register_operand" " Sv"))
530 (match_operand:V_2REG 3 "gcn_register_or_unspec_operand" " U0")
531 (ashift (const_int 1)
532 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
533 ""
534 "v_writelane_b32 %L0, %L1, %2\;v_writelane_b32 %H0, %H1, %2"
535 [(set_attr "type" "vmult")
536 (set_attr "length" "16")
537 (set_attr "exec" "none")
538 (set_attr "laneselect" "yes")])
539
540 (define_expand "vec_set<mode>"
541 [(set (match_operand:V_ALL 0 "register_operand")
542 (vec_merge:V_ALL
543 (vec_duplicate:V_ALL
544 (match_operand:<SCALAR_MODE> 1 "register_operand"))
545 (match_dup 0)
546 (ashift (const_int 1) (match_operand:SI 2 "gcn_alu_operand"))))]
547 "")
548
549 (define_insn "*vec_set<mode>_1"
550 [(set (match_operand:V_1REG 0 "register_operand" "=v")
551 (vec_merge:V_1REG
552 (vec_duplicate:V_1REG
553 (match_operand:<SCALAR_MODE> 1 "register_operand" "Sv"))
554 (match_operand:V_1REG 3 "gcn_register_or_unspec_operand" "U0")
555 (match_operand:SI 2 "const_int_operand" " i")))]
556 "((unsigned) exact_log2 (INTVAL (operands[2])) < GET_MODE_NUNITS (<MODE>mode))"
557 {
558 operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));
559 return "v_writelane_b32 %0, %1, %2";
560 }
561 [(set_attr "type" "vop3a")
562 (set_attr "length" "8")
563 (set_attr "exec" "none")
564 (set_attr "laneselect" "yes")])
565
566 (define_insn "*vec_set<mode>_1"
567 [(set (match_operand:V_2REG 0 "register_operand" "=v")
568 (vec_merge:V_2REG
569 (vec_duplicate:V_2REG
570 (match_operand:<SCALAR_MODE> 1 "register_operand" "Sv"))
571 (match_operand:V_2REG 3 "gcn_register_or_unspec_operand" "U0")
572 (match_operand:SI 2 "const_int_operand" " i")))]
573 "((unsigned) exact_log2 (INTVAL (operands[2])) < GET_MODE_NUNITS (<MODE>mode))"
574 {
575 operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));
576 return "v_writelane_b32 %L0, %L1, %2\;v_writelane_b32 %H0, %H1, %2";
577 }
578 [(set_attr "type" "vmult")
579 (set_attr "length" "16")
580 (set_attr "exec" "none")
581 (set_attr "laneselect" "yes")])
582
583 (define_insn "vec_duplicate<mode><exec>"
584 [(set (match_operand:V_1REG 0 "register_operand" "=v")
585 (vec_duplicate:V_1REG
586 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand" "SvB")))]
587 ""
588 "v_mov_b32\t%0, %1"
589 [(set_attr "type" "vop3a")
590 (set_attr "length" "8")])
591
592 (define_insn "vec_duplicate<mode><exec>"
593 [(set (match_operand:V_2REG 0 "register_operand" "= v")
594 (vec_duplicate:V_2REG
595 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand" "SvDB")))]
596 ""
597 "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1"
598 [(set_attr "type" "vop3a")
599 (set_attr "length" "16")])
600
601 (define_insn "vec_extract<mode><scalar_mode>"
602 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg")
603 (vec_select:<SCALAR_MODE>
604 (match_operand:V_1REG 1 "register_operand" " v")
605 (parallel [(match_operand:SI 2 "gcn_alu_operand" "SvB")])))]
606 ""
607 "v_readlane_b32 %0, %1, %2"
608 [(set_attr "type" "vop3a")
609 (set_attr "length" "8")
610 (set_attr "exec" "none")
611 (set_attr "laneselect" "yes")])
612
613 (define_insn "vec_extract<mode><scalar_mode>"
614 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=&Sg")
615 (vec_select:<SCALAR_MODE>
616 (match_operand:V_2REG 1 "register_operand" " v")
617 (parallel [(match_operand:SI 2 "gcn_alu_operand" " SvB")])))]
618 ""
619 "v_readlane_b32 %L0, %L1, %2\;v_readlane_b32 %H0, %H1, %2"
620 [(set_attr "type" "vmult")
621 (set_attr "length" "16")
622 (set_attr "exec" "none")
623 (set_attr "laneselect" "yes")])
624
625 (define_expand "extract_last_<mode>"
626 [(match_operand:<SCALAR_MODE> 0 "register_operand")
627 (match_operand:DI 1 "gcn_alu_operand")
628 (match_operand:V_ALL 2 "register_operand")]
629 "can_create_pseudo_p ()"
630 {
631 rtx dst = operands[0];
632 rtx mask = operands[1];
633 rtx vect = operands[2];
634 rtx tmpreg = gen_reg_rtx (SImode);
635
636 emit_insn (gen_clzdi2 (tmpreg, mask));
637 emit_insn (gen_subsi3 (tmpreg, GEN_INT (63), tmpreg));
638 emit_insn (gen_vec_extract<mode><scalar_mode> (dst, vect, tmpreg));
639 DONE;
640 })
641
642 (define_expand "fold_extract_last_<mode>"
643 [(match_operand:<SCALAR_MODE> 0 "register_operand")
644 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand")
645 (match_operand:DI 2 "gcn_alu_operand")
646 (match_operand:V_ALL 3 "register_operand")]
647 "can_create_pseudo_p ()"
648 {
649 rtx dst = operands[0];
650 rtx default_value = operands[1];
651 rtx mask = operands[2];
652 rtx vect = operands[3];
653 rtx else_label = gen_label_rtx ();
654 rtx end_label = gen_label_rtx ();
655
656 rtx cond = gen_rtx_EQ (VOIDmode, mask, const0_rtx);
657 emit_jump_insn (gen_cbranchdi4 (cond, mask, const0_rtx, else_label));
658 emit_insn (gen_extract_last_<mode> (dst, mask, vect));
659 emit_jump_insn (gen_jump (end_label));
660 emit_barrier ();
661 emit_label (else_label);
662 emit_move_insn (dst, default_value);
663 emit_label (end_label);
664 DONE;
665 })
666
667 (define_expand "vec_init<mode><scalar_mode>"
668 [(match_operand:V_ALL 0 "register_operand")
669 (match_operand 1)]
670 ""
671 {
672 gcn_expand_vector_init (operands[0], operands[1]);
673 DONE;
674 })
675
676 ;; }}}
677 ;; {{{ Scatter / Gather
678
679 ;; GCN does not have an instruction for loading a vector from contiguous
680 ;; memory so *all* loads and stores are eventually converted to scatter
681 ;; or gather.
682 ;;
683 ;; GCC does not permit MEM to hold vectors of addresses, so we must use an
684 ;; unspec. The unspec formats are as follows:
685 ;;
686 ;; (unspec:V??
687 ;; [(<address expression>)
688 ;; (<addr_space_t>)
689 ;; (<use_glc>)
690 ;; (mem:BLK (scratch))]
691 ;; UNSPEC_GATHER)
692 ;;
693 ;; (unspec:BLK
694 ;; [(<address expression>)
695 ;; (<source register>)
696 ;; (<addr_space_t>)
697 ;; (<use_glc>)
698 ;; (<exec>)]
699 ;; UNSPEC_SCATTER)
700 ;;
701 ;; - Loads are expected to be wrapped in a vec_merge, so do not need <exec>.
702 ;; - The mem:BLK does not contain any real information, but indicates that an
703 ;; unknown memory read is taking place. Stores are expected to use a similar
704 ;; mem:BLK outside the unspec.
705 ;; - The address space and glc (volatile) fields are there to replace the
706 ;; fields normally found in a MEM.
707 ;; - Multiple forms of address expression are supported, below.
708 ;;
709 ;; TODO: implement combined gather and zero_extend, but only for -msram-ecc=on
710
711 (define_expand "gather_load<mode><vnsi>"
712 [(match_operand:V_ALL 0 "register_operand")
713 (match_operand:DI 1 "register_operand")
714 (match_operand:<VnSI> 2 "register_operand")
715 (match_operand 3 "immediate_operand")
716 (match_operand:SI 4 "gcn_alu_operand")]
717 ""
718 {
719 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1],
720 operands[2], operands[4],
721 INTVAL (operands[3]), NULL);
722
723 if (GET_MODE (addr) == <VnDI>mode)
724 emit_insn (gen_gather<mode>_insn_1offset (operands[0], addr, const0_rtx,
725 const0_rtx, const0_rtx));
726 else
727 emit_insn (gen_gather<mode>_insn_2offsets (operands[0], operands[1],
728 addr, const0_rtx, const0_rtx,
729 const0_rtx));
730 DONE;
731 })
732
733 ; Allow any address expression
734 (define_expand "gather<mode>_expr<exec>"
735 [(set (match_operand:V_ALL 0 "register_operand")
736 (unspec:V_ALL
737 [(match_operand 1 "")
738 (match_operand 2 "immediate_operand")
739 (match_operand 3 "immediate_operand")
740 (mem:BLK (scratch))]
741 UNSPEC_GATHER))]
742 ""
743 {})
744
745 (define_insn "gather<mode>_insn_1offset<exec>"
746 [(set (match_operand:V_ALL 0 "register_operand" "=v")
747 (unspec:V_ALL
748 [(plus:<VnDI> (match_operand:<VnDI> 1 "register_operand" " v")
749 (vec_duplicate:<VnDI>
750 (match_operand 2 "immediate_operand" " n")))
751 (match_operand 3 "immediate_operand" " n")
752 (match_operand 4 "immediate_operand" " n")
753 (mem:BLK (scratch))]
754 UNSPEC_GATHER))]
755 "(AS_FLAT_P (INTVAL (operands[3]))
756 && ((TARGET_GCN3 && INTVAL(operands[2]) == 0)
757 || ((unsigned HOST_WIDE_INT)INTVAL(operands[2]) < 0x1000)))
758 || (AS_GLOBAL_P (INTVAL (operands[3]))
759 && (((unsigned HOST_WIDE_INT)INTVAL(operands[2]) + 0x1000) < 0x2000))"
760 {
761 addr_space_t as = INTVAL (operands[3]);
762 const char *glc = INTVAL (operands[4]) ? " glc" : "";
763
764 static char buf[200];
765 if (AS_FLAT_P (as))
766 {
767 if (TARGET_GCN5_PLUS)
768 sprintf (buf, "flat_load%%o0\t%%0, %%1 offset:%%2%s\;s_waitcnt\t0",
769 glc);
770 else
771 sprintf (buf, "flat_load%%o0\t%%0, %%1%s\;s_waitcnt\t0", glc);
772 }
773 else if (AS_GLOBAL_P (as))
774 sprintf (buf, "global_load%%o0\t%%0, %%1, off offset:%%2%s\;"
775 "s_waitcnt\tvmcnt(0)", glc);
776 else
777 gcc_unreachable ();
778
779 return buf;
780 }
781 [(set_attr "type" "flat")
782 (set_attr "length" "12")])
783
784 (define_insn "gather<mode>_insn_1offset_ds<exec>"
785 [(set (match_operand:V_ALL 0 "register_operand" "=v")
786 (unspec:V_ALL
787 [(plus:<VnSI> (match_operand:<VnSI> 1 "register_operand" " v")
788 (vec_duplicate:<VnSI>
789 (match_operand 2 "immediate_operand" " n")))
790 (match_operand 3 "immediate_operand" " n")
791 (match_operand 4 "immediate_operand" " n")
792 (mem:BLK (scratch))]
793 UNSPEC_GATHER))]
794 "(AS_ANY_DS_P (INTVAL (operands[3]))
795 && ((unsigned HOST_WIDE_INT)INTVAL(operands[2]) < 0x10000))"
796 {
797 addr_space_t as = INTVAL (operands[3]);
798 static char buf[200];
799 sprintf (buf, "ds_read%%b0\t%%0, %%1 offset:%%2%s\;s_waitcnt\tlgkmcnt(0)",
800 (AS_GDS_P (as) ? " gds" : ""));
801 return buf;
802 }
803 [(set_attr "type" "ds")
804 (set_attr "length" "12")])
805
806 (define_insn "gather<mode>_insn_2offsets<exec>"
807 [(set (match_operand:V_ALL 0 "register_operand" "=v")
808 (unspec:V_ALL
809 [(plus:<VnDI>
810 (plus:<VnDI>
811 (vec_duplicate:<VnDI>
812 (match_operand:DI 1 "register_operand" "Sv"))
813 (sign_extend:<VnDI>
814 (match_operand:<VnSI> 2 "register_operand" " v")))
815 (vec_duplicate:<VnDI> (match_operand 3 "immediate_operand" " n")))
816 (match_operand 4 "immediate_operand" " n")
817 (match_operand 5 "immediate_operand" " n")
818 (mem:BLK (scratch))]
819 UNSPEC_GATHER))]
820 "(AS_GLOBAL_P (INTVAL (operands[4]))
821 && (((unsigned HOST_WIDE_INT)INTVAL(operands[3]) + 0x1000) < 0x2000))"
822 {
823 addr_space_t as = INTVAL (operands[4]);
824 const char *glc = INTVAL (operands[5]) ? " glc" : "";
825
826 static char buf[200];
827 if (AS_GLOBAL_P (as))
828 sprintf (buf, "global_load%%o0\t%%0, %%2, %%1 offset:%%3%s\;"
829 "s_waitcnt\tvmcnt(0)", glc);
830 else
831 gcc_unreachable ();
832
833 return buf;
834 }
835 [(set_attr "type" "flat")
836 (set_attr "length" "12")])
837
838 (define_expand "scatter_store<mode><vnsi>"
839 [(match_operand:DI 0 "register_operand")
840 (match_operand:<VnSI> 1 "register_operand")
841 (match_operand 2 "immediate_operand")
842 (match_operand:SI 3 "gcn_alu_operand")
843 (match_operand:V_ALL 4 "register_operand")]
844 ""
845 {
846 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0],
847 operands[1], operands[3],
848 INTVAL (operands[2]), NULL);
849
850 if (GET_MODE (addr) == <VnDI>mode)
851 emit_insn (gen_scatter<mode>_insn_1offset (addr, const0_rtx, operands[4],
852 const0_rtx, const0_rtx));
853 else
854 emit_insn (gen_scatter<mode>_insn_2offsets (operands[0], addr,
855 const0_rtx, operands[4],
856 const0_rtx, const0_rtx));
857 DONE;
858 })
859
860 ; Allow any address expression
861 (define_expand "scatter<mode>_expr<exec_scatter>"
862 [(set (mem:BLK (scratch))
863 (unspec:BLK
864 [(match_operand:<VnDI> 0 "")
865 (match_operand:V_ALL 1 "register_operand")
866 (match_operand 2 "immediate_operand")
867 (match_operand 3 "immediate_operand")]
868 UNSPEC_SCATTER))]
869 ""
870 {})
871
872 (define_insn "scatter<mode>_insn_1offset<exec_scatter>"
873 [(set (mem:BLK (scratch))
874 (unspec:BLK
875 [(plus:<VnDI> (match_operand:<VnDI> 0 "register_operand" "v")
876 (vec_duplicate:<VnDI>
877 (match_operand 1 "immediate_operand" "n")))
878 (match_operand:V_ALL 2 "register_operand" "v")
879 (match_operand 3 "immediate_operand" "n")
880 (match_operand 4 "immediate_operand" "n")]
881 UNSPEC_SCATTER))]
882 "(AS_FLAT_P (INTVAL (operands[3]))
883 && (INTVAL(operands[1]) == 0
884 || (TARGET_GCN5_PLUS
885 && (unsigned HOST_WIDE_INT)INTVAL(operands[1]) < 0x1000)))
886 || (AS_GLOBAL_P (INTVAL (operands[3]))
887 && (((unsigned HOST_WIDE_INT)INTVAL(operands[1]) + 0x1000) < 0x2000))"
888 {
889 addr_space_t as = INTVAL (operands[3]);
890 const char *glc = INTVAL (operands[4]) ? " glc" : "";
891
892 static char buf[200];
893 if (AS_FLAT_P (as))
894 {
895 if (TARGET_GCN5_PLUS)
896 sprintf (buf, "flat_store%%s2\t%%0, %%2 offset:%%1%s", glc);
897 else
898 sprintf (buf, "flat_store%%s2\t%%0, %%2%s", glc);
899 }
900 else if (AS_GLOBAL_P (as))
901 sprintf (buf, "global_store%%s2\t%%0, %%2, off offset:%%1%s", glc);
902 else
903 gcc_unreachable ();
904
905 return buf;
906 }
907 [(set_attr "type" "flat")
908 (set_attr "length" "12")])
909
910 (define_insn "scatter<mode>_insn_1offset_ds<exec_scatter>"
911 [(set (mem:BLK (scratch))
912 (unspec:BLK
913 [(plus:<VnSI> (match_operand:<VnSI> 0 "register_operand" "v")
914 (vec_duplicate:<VnSI>
915 (match_operand 1 "immediate_operand" "n")))
916 (match_operand:V_ALL 2 "register_operand" "v")
917 (match_operand 3 "immediate_operand" "n")
918 (match_operand 4 "immediate_operand" "n")]
919 UNSPEC_SCATTER))]
920 "(AS_ANY_DS_P (INTVAL (operands[3]))
921 && ((unsigned HOST_WIDE_INT)INTVAL(operands[1]) < 0x10000))"
922 {
923 addr_space_t as = INTVAL (operands[3]);
924 static char buf[200];
925 sprintf (buf, "ds_write%%b2\t%%0, %%2 offset:%%1%s\;s_waitcnt\tlgkmcnt(0)",
926 (AS_GDS_P (as) ? " gds" : ""));
927 return buf;
928 }
929 [(set_attr "type" "ds")
930 (set_attr "length" "12")])
931
932 (define_insn "scatter<mode>_insn_2offsets<exec_scatter>"
933 [(set (mem:BLK (scratch))
934 (unspec:BLK
935 [(plus:<VnDI>
936 (plus:<VnDI>
937 (vec_duplicate:<VnDI>
938 (match_operand:DI 0 "register_operand" "Sv"))
939 (sign_extend:<VnDI>
940 (match_operand:<VnSI> 1 "register_operand" " v")))
941 (vec_duplicate:<VnDI> (match_operand 2 "immediate_operand" " n")))
942 (match_operand:V_ALL 3 "register_operand" " v")
943 (match_operand 4 "immediate_operand" " n")
944 (match_operand 5 "immediate_operand" " n")]
945 UNSPEC_SCATTER))]
946 "(AS_GLOBAL_P (INTVAL (operands[4]))
947 && (((unsigned HOST_WIDE_INT)INTVAL(operands[2]) + 0x1000) < 0x2000))"
948 {
949 addr_space_t as = INTVAL (operands[4]);
950 const char *glc = INTVAL (operands[5]) ? " glc" : "";
951
952 static char buf[200];
953 if (AS_GLOBAL_P (as))
954 sprintf (buf, "global_store%%s3\t%%1, %%3, %%0 offset:%%2%s", glc);
955 else
956 gcc_unreachable ();
957
958 return buf;
959 }
960 [(set_attr "type" "flat")
961 (set_attr "length" "12")])
962
963 ;; }}}
964 ;; {{{ Permutations
965
966 (define_insn "ds_bpermute<mode>"
967 [(set (match_operand:V_1REG 0 "register_operand" "=v")
968 (unspec:V_1REG
969 [(match_operand:V_1REG 2 "register_operand" " v")
970 (match_operand:<VnSI> 1 "register_operand" " v")
971 (match_operand:DI 3 "gcn_exec_reg_operand" " e")]
972 UNSPEC_BPERMUTE))]
973 ""
974 "ds_bpermute_b32\t%0, %1, %2\;s_waitcnt\tlgkmcnt(0)"
975 [(set_attr "type" "vop2")
976 (set_attr "length" "12")])
977
978 (define_insn_and_split "ds_bpermute<mode>"
979 [(set (match_operand:V_2REG 0 "register_operand" "=&v")
980 (unspec:V_2REG
981 [(match_operand:V_2REG 2 "register_operand" " v0")
982 (match_operand:<VnSI> 1 "register_operand" " v")
983 (match_operand:DI 3 "gcn_exec_reg_operand" " e")]
984 UNSPEC_BPERMUTE))]
985 ""
986 "#"
987 "reload_completed"
988 [(set (match_dup 4) (unspec:<VnSI>
989 [(match_dup 6) (match_dup 1) (match_dup 3)]
990 UNSPEC_BPERMUTE))
991 (set (match_dup 5) (unspec:<VnSI>
992 [(match_dup 7) (match_dup 1) (match_dup 3)]
993 UNSPEC_BPERMUTE))]
994 {
995 operands[4] = gcn_operand_part (<MODE>mode, operands[0], 0);
996 operands[5] = gcn_operand_part (<MODE>mode, operands[0], 1);
997 operands[6] = gcn_operand_part (<MODE>mode, operands[2], 0);
998 operands[7] = gcn_operand_part (<MODE>mode, operands[2], 1);
999 }
1000 [(set_attr "type" "vmult")
1001 (set_attr "length" "24")])
1002
1003 (define_insn "@dpp_move<mode>"
1004 [(set (match_operand:V_noHI 0 "register_operand" "=v")
1005 (unspec:V_noHI
1006 [(match_operand:V_noHI 1 "register_operand" " v")
1007 (match_operand:SI 2 "const_int_operand" " n")]
1008 UNSPEC_MOV_DPP_SHR))]
1009 ""
1010 {
1011 return gcn_expand_dpp_shr_insn (<MODE>mode, "v_mov_b32",
1012 UNSPEC_MOV_DPP_SHR, INTVAL (operands[2]));
1013 }
1014 [(set_attr "type" "vop_dpp")
1015 (set_attr "length" "16")])
1016
1017 ;; }}}
1018 ;; {{{ ALU special case: add/sub
1019
1020 (define_insn "add<mode>3<exec_clobber>"
1021 [(set (match_operand:V_INT_1REG 0 "register_operand" "= v")
1022 (plus:V_INT_1REG
1023 (match_operand:V_INT_1REG 1 "register_operand" "% v")
1024 (match_operand:V_INT_1REG 2 "gcn_alu_operand" "vSvB")))
1025 (clobber (reg:DI VCC_REG))]
1026 ""
1027 "v_add%^_u32\t%0, vcc, %2, %1"
1028 [(set_attr "type" "vop2")
1029 (set_attr "length" "8")])
1030
1031 (define_insn "add<mode>3_dup<exec_clobber>"
1032 [(set (match_operand:V_INT_1REG 0 "register_operand" "= v")
1033 (plus:V_INT_1REG
1034 (vec_duplicate:V_INT_1REG
1035 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand" "SvB"))
1036 (match_operand:V_INT_1REG 1 "register_operand" " v")))
1037 (clobber (reg:DI VCC_REG))]
1038 ""
1039 "v_add%^_u32\t%0, vcc, %2, %1"
1040 [(set_attr "type" "vop2")
1041 (set_attr "length" "8")])
1042
1043 (define_insn "add<mode>3_vcc<exec_vcc>"
1044 [(set (match_operand:V_SI 0 "register_operand" "= v, v")
1045 (plus:V_SI
1046 (match_operand:V_SI 1 "register_operand" "% v, v")
1047 (match_operand:V_SI 2 "gcn_alu_operand" "vSvB,vSvB")))
1048 (set (match_operand:DI 3 "register_operand" "= cV, Sg")
1049 (ltu:DI (plus:V_SI (match_dup 1) (match_dup 2))
1050 (match_dup 1)))]
1051 ""
1052 "v_add%^_u32\t%0, %3, %2, %1"
1053 [(set_attr "type" "vop2,vop3b")
1054 (set_attr "length" "8")])
1055
1056 ; This pattern only changes the VCC bits when the corresponding lane is
1057 ; enabled, so the set must be described as an ior.
1058
1059 (define_insn "add<mode>3_vcc_dup<exec_vcc>"
1060 [(set (match_operand:V_SI 0 "register_operand" "= v, v")
1061 (plus:V_SI
1062 (vec_duplicate:V_SI
1063 (match_operand:SI 1 "gcn_alu_operand" "SvB,SvB"))
1064 (match_operand:V_SI 2 "register_operand" " v, v")))
1065 (set (match_operand:DI 3 "register_operand" "=cV, Sg")
1066 (ltu:DI (plus:V_SI (vec_duplicate:V_SI (match_dup 2))
1067 (match_dup 1))
1068 (vec_duplicate:V_SI (match_dup 2))))]
1069 ""
1070 "v_add%^_u32\t%0, %3, %2, %1"
1071 [(set_attr "type" "vop2,vop3b")
1072 (set_attr "length" "8,8")])
1073
1074 ; v_addc does not accept an SGPR because the VCC read already counts as an
1075 ; SGPR use and the number of SGPR operands is limited to 1. It does not
1076 ; accept "B" immediate constants due to a related bus conflict.
1077
1078 (define_insn "addc<mode>3<exec_vcc>"
1079 [(set (match_operand:V_SI 0 "register_operand" "=v, v")
1080 (plus:V_SI
1081 (plus:V_SI
1082 (vec_merge:V_SI
1083 (vec_duplicate:V_SI (const_int 1))
1084 (vec_duplicate:V_SI (const_int 0))
1085 (match_operand:DI 3 "register_operand" " cV,cVSv"))
1086 (match_operand:V_SI 1 "gcn_alu_operand" "% v, vA"))
1087 (match_operand:V_SI 2 "gcn_alu_operand" " vA, vA")))
1088 (set (match_operand:DI 4 "register_operand" "=cV,cVSg")
1089 (ior:DI (ltu:DI (plus:V_SI
1090 (plus:V_SI
1091 (vec_merge:V_SI
1092 (vec_duplicate:V_SI (const_int 1))
1093 (vec_duplicate:V_SI (const_int 0))
1094 (match_dup 3))
1095 (match_dup 1))
1096 (match_dup 2))
1097 (match_dup 2))
1098 (ltu:DI (plus:V_SI
1099 (vec_merge:V_SI
1100 (vec_duplicate:V_SI (const_int 1))
1101 (vec_duplicate:V_SI (const_int 0))
1102 (match_dup 3))
1103 (match_dup 1))
1104 (match_dup 1))))]
1105 ""
1106 "v_addc%^_u32\t%0, %4, %2, %1, %3"
1107 [(set_attr "type" "vop2,vop3b")
1108 (set_attr "length" "4,8")])
1109
1110 (define_insn "sub<mode>3<exec_clobber>"
1111 [(set (match_operand:V_INT_1REG 0 "register_operand" "= v, v")
1112 (minus:V_INT_1REG
1113 (match_operand:V_INT_1REG 1 "gcn_alu_operand" "vSvB, v")
1114 (match_operand:V_INT_1REG 2 "gcn_alu_operand" " v,vSvB")))
1115 (clobber (reg:DI VCC_REG))]
1116 ""
1117 "@
1118 v_sub%^_u32\t%0, vcc, %1, %2
1119 v_subrev%^_u32\t%0, vcc, %2, %1"
1120 [(set_attr "type" "vop2")
1121 (set_attr "length" "8,8")])
1122
1123 (define_insn "sub<mode>3_vcc<exec_vcc>"
1124 [(set (match_operand:V_SI 0 "register_operand" "= v, v, v, v")
1125 (minus:V_SI
1126 (match_operand:V_SI 1 "gcn_alu_operand" "vSvB,vSvB, v, v")
1127 (match_operand:V_SI 2 "gcn_alu_operand" " v, v,vSvB,vSvB")))
1128 (set (match_operand:DI 3 "register_operand" "= cV, Sg, cV, Sg")
1129 (gtu:DI (minus:V_SI (match_dup 1) (match_dup 2))
1130 (match_dup 1)))]
1131 ""
1132 "@
1133 v_sub%^_u32\t%0, %3, %1, %2
1134 v_sub%^_u32\t%0, %3, %1, %2
1135 v_subrev%^_u32\t%0, %3, %2, %1
1136 v_subrev%^_u32\t%0, %3, %2, %1"
1137 [(set_attr "type" "vop2,vop3b,vop2,vop3b")
1138 (set_attr "length" "8")])
1139
1140 ; v_subb does not accept an SGPR because the VCC read already counts as an
1141 ; SGPR use and the number of SGPR operands is limited to 1. It does not
1142 ; accept "B" immediate constants due to a related bus conflict.
1143
1144 (define_insn "subc<mode>3<exec_vcc>"
1145 [(set (match_operand:V_SI 0 "register_operand" "= v, v, v, v")
1146 (minus:V_SI
1147 (minus:V_SI
1148 (vec_merge:V_SI
1149 (vec_duplicate:V_SI (const_int 1))
1150 (vec_duplicate:V_SI (const_int 0))
1151 (match_operand:DI 3 "gcn_alu_operand" " cV,cVSv,cV,cVSv"))
1152 (match_operand:V_SI 1 "gcn_alu_operand" " vA, vA, v, vA"))
1153 (match_operand:V_SI 2 "gcn_alu_operand" " v, vA,vA, vA")))
1154 (set (match_operand:DI 4 "register_operand" "=cV,cVSg,cV,cVSg")
1155 (ior:DI (gtu:DI (minus:V_SI (minus:V_SI
1156 (vec_merge:V_SI
1157 (vec_duplicate:V_SI (const_int 1))
1158 (vec_duplicate:V_SI (const_int 0))
1159 (match_dup 3))
1160 (match_dup 1))
1161 (match_dup 2))
1162 (match_dup 2))
1163 (ltu:DI (minus:V_SI (vec_merge:V_SI
1164 (vec_duplicate:V_SI (const_int 1))
1165 (vec_duplicate:V_SI (const_int 0))
1166 (match_dup 3))
1167 (match_dup 1))
1168 (match_dup 1))))]
1169 ""
1170 "@
1171 v_subb%^_u32\t%0, %4, %1, %2, %3
1172 v_subb%^_u32\t%0, %4, %1, %2, %3
1173 v_subbrev%^_u32\t%0, %4, %2, %1, %3
1174 v_subbrev%^_u32\t%0, %4, %2, %1, %3"
1175 [(set_attr "type" "vop2,vop3b,vop2,vop3b")
1176 (set_attr "length" "4,8,4,8")])
1177
1178 (define_insn_and_split "add<mode>3"
1179 [(set (match_operand:V_DI 0 "register_operand" "= v")
1180 (plus:V_DI
1181 (match_operand:V_DI 1 "register_operand" "%vDb")
1182 (match_operand:V_DI 2 "gcn_alu_operand" " vDb")))
1183 (clobber (reg:DI VCC_REG))]
1184 ""
1185 "#"
1186 "gcn_can_split_p (<MODE>mode, operands[0])
1187 && gcn_can_split_p (<MODE>mode, operands[1])
1188 && gcn_can_split_p (<MODE>mode, operands[2])"
1189 [(const_int 0)]
1190 {
1191 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1192 emit_insn (gen_add<vnsi>3_vcc
1193 (gcn_operand_part (<MODE>mode, operands[0], 0),
1194 gcn_operand_part (<MODE>mode, operands[1], 0),
1195 gcn_operand_part (<MODE>mode, operands[2], 0),
1196 vcc));
1197 emit_insn (gen_addc<vnsi>3
1198 (gcn_operand_part (<MODE>mode, operands[0], 1),
1199 gcn_operand_part (<MODE>mode, operands[1], 1),
1200 gcn_operand_part (<MODE>mode, operands[2], 1),
1201 vcc, vcc));
1202 DONE;
1203 }
1204 [(set_attr "type" "vmult")
1205 (set_attr "length" "8")])
1206
1207 (define_insn_and_split "add<mode>3_exec"
1208 [(set (match_operand:V_DI 0 "register_operand" "= v")
1209 (vec_merge:V_DI
1210 (plus:V_DI
1211 (match_operand:V_DI 1 "register_operand" "%vDb")
1212 (match_operand:V_DI 2 "gcn_alu_operand" " vDb"))
1213 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0")
1214 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1215 (clobber (reg:DI VCC_REG))]
1216 ""
1217 "#"
1218 "gcn_can_split_p (<MODE>mode, operands[0])
1219 && gcn_can_split_p (<MODE>mode, operands[1])
1220 && gcn_can_split_p (<MODE>mode, operands[2])
1221 && gcn_can_split_p (<MODE>mode, operands[4])"
1222 [(const_int 0)]
1223 {
1224 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1225 emit_insn (gen_add<vnsi>3_vcc_exec
1226 (gcn_operand_part (<MODE>mode, operands[0], 0),
1227 gcn_operand_part (<MODE>mode, operands[1], 0),
1228 gcn_operand_part (<MODE>mode, operands[2], 0),
1229 vcc,
1230 gcn_operand_part (<MODE>mode, operands[3], 0),
1231 operands[4]));
1232 emit_insn (gen_addc<vnsi>3_exec
1233 (gcn_operand_part (<MODE>mode, operands[0], 1),
1234 gcn_operand_part (<MODE>mode, operands[1], 1),
1235 gcn_operand_part (<MODE>mode, operands[2], 1),
1236 vcc, vcc,
1237 gcn_operand_part (<MODE>mode, operands[3], 1),
1238 operands[4]));
1239 DONE;
1240 }
1241 [(set_attr "type" "vmult")
1242 (set_attr "length" "8")])
1243
1244 (define_insn_and_split "sub<mode>3"
1245 [(set (match_operand:V_DI 0 "register_operand" "= v, v")
1246 (minus:V_DI
1247 (match_operand:V_DI 1 "gcn_alu_operand" "vDb, v")
1248 (match_operand:V_DI 2 "gcn_alu_operand" " v,vDb")))
1249 (clobber (reg:DI VCC_REG))]
1250 ""
1251 "#"
1252 "gcn_can_split_p (<MODE>mode, operands[0])
1253 && gcn_can_split_p (<MODE>mode, operands[1])
1254 && gcn_can_split_p (<MODE>mode, operands[2])"
1255 [(const_int 0)]
1256 {
1257 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1258 emit_insn (gen_sub<vnsi>3_vcc
1259 (gcn_operand_part (<MODE>mode, operands[0], 0),
1260 gcn_operand_part (<MODE>mode, operands[1], 0),
1261 gcn_operand_part (<MODE>mode, operands[2], 0),
1262 vcc));
1263 emit_insn (gen_subc<vnsi>3
1264 (gcn_operand_part (<MODE>mode, operands[0], 1),
1265 gcn_operand_part (<MODE>mode, operands[1], 1),
1266 gcn_operand_part (<MODE>mode, operands[2], 1),
1267 vcc, vcc));
1268 DONE;
1269 }
1270 [(set_attr "type" "vmult")
1271 (set_attr "length" "8")])
1272
1273 (define_insn_and_split "sub<mode>3_exec"
1274 [(set (match_operand:V_DI 0 "register_operand" "= v, v")
1275 (vec_merge:V_DI
1276 (minus:V_DI
1277 (match_operand:V_DI 1 "gcn_alu_operand" "vSvB, v")
1278 (match_operand:V_DI 2 "gcn_alu_operand" " v,vSvB"))
1279 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0, U0")
1280 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))
1281 (clobber (reg:DI VCC_REG))]
1282 "register_operand (operands[1], VOIDmode)
1283 || register_operand (operands[2], VOIDmode)"
1284 "#"
1285 "gcn_can_split_p (<MODE>mode, operands[0])
1286 && gcn_can_split_p (<MODE>mode, operands[1])
1287 && gcn_can_split_p (<MODE>mode, operands[2])
1288 && gcn_can_split_p (<MODE>mode, operands[3])"
1289 [(const_int 0)]
1290 {
1291 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1292 emit_insn (gen_sub<vnsi>3_vcc_exec
1293 (gcn_operand_part (<MODE>mode, operands[0], 0),
1294 gcn_operand_part (<MODE>mode, operands[1], 0),
1295 gcn_operand_part (<MODE>mode, operands[2], 0),
1296 vcc,
1297 gcn_operand_part (<MODE>mode, operands[3], 0),
1298 operands[4]));
1299 emit_insn (gen_subc<vnsi>3_exec
1300 (gcn_operand_part (<MODE>mode, operands[0], 1),
1301 gcn_operand_part (<MODE>mode, operands[1], 1),
1302 gcn_operand_part (<MODE>mode, operands[2], 1),
1303 vcc, vcc,
1304 gcn_operand_part (<MODE>mode, operands[3], 1),
1305 operands[4]));
1306 DONE;
1307 }
1308 [(set_attr "type" "vmult")
1309 (set_attr "length" "8")])
1310
1311 (define_insn_and_split "add<mode>3_zext"
1312 [(set (match_operand:V_DI 0 "register_operand" "= v, v")
1313 (plus:V_DI
1314 (zero_extend:V_DI
1315 (match_operand:<VnSI> 1 "gcn_alu_operand" " vA, vB"))
1316 (match_operand:V_DI 2 "gcn_alu_operand" "vDb,vDA")))
1317 (clobber (reg:DI VCC_REG))]
1318 ""
1319 "#"
1320 "gcn_can_split_p (<MODE>mode, operands[0])
1321 && gcn_can_split_p (<MODE>mode, operands[2])"
1322 [(const_int 0)]
1323 {
1324 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1325 emit_insn (gen_add<vnsi>3_vcc
1326 (gcn_operand_part (<MODE>mode, operands[0], 0),
1327 operands[1],
1328 gcn_operand_part (<MODE>mode, operands[2], 0),
1329 vcc));
1330 emit_insn (gen_addc<vnsi>3
1331 (gcn_operand_part (<MODE>mode, operands[0], 1),
1332 gcn_operand_part (<MODE>mode, operands[2], 1),
1333 const0_rtx, vcc, vcc));
1334 DONE;
1335 }
1336 [(set_attr "type" "vmult")
1337 (set_attr "length" "8")])
1338
1339 (define_insn_and_split "add<mode>3_zext_exec"
1340 [(set (match_operand:V_DI 0 "register_operand" "= v, v")
1341 (vec_merge:V_DI
1342 (plus:V_DI
1343 (zero_extend:V_DI
1344 (match_operand:<VnSI> 1 "gcn_alu_operand" " vA, vB"))
1345 (match_operand:V_DI 2 "gcn_alu_operand" "vDb,vDA"))
1346 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0, U0")
1347 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))
1348 (clobber (reg:DI VCC_REG))]
1349 ""
1350 "#"
1351 "gcn_can_split_p (<MODE>mode, operands[0])
1352 && gcn_can_split_p (<MODE>mode, operands[2])
1353 && gcn_can_split_p (<MODE>mode, operands[3])"
1354 [(const_int 0)]
1355 {
1356 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1357 emit_insn (gen_add<vnsi>3_vcc_exec
1358 (gcn_operand_part (<MODE>mode, operands[0], 0),
1359 operands[1],
1360 gcn_operand_part (<MODE>mode, operands[2], 0),
1361 vcc,
1362 gcn_operand_part (<MODE>mode, operands[3], 0),
1363 operands[4]));
1364 emit_insn (gen_addc<vnsi>3_exec
1365 (gcn_operand_part (<MODE>mode, operands[0], 1),
1366 gcn_operand_part (<MODE>mode, operands[2], 1),
1367 const0_rtx, vcc, vcc,
1368 gcn_operand_part (<MODE>mode, operands[3], 1),
1369 operands[4]));
1370 DONE;
1371 }
1372 [(set_attr "type" "vmult")
1373 (set_attr "length" "8")])
1374
1375 (define_insn_and_split "add<mode>3_vcc_zext_dup"
1376 [(set (match_operand:V_DI 0 "register_operand" "= v, v")
1377 (plus:V_DI
1378 (zero_extend:V_DI
1379 (vec_duplicate:<VnSI>
1380 (match_operand:SI 1 "gcn_alu_operand" " BSv, ASv")))
1381 (match_operand:V_DI 2 "gcn_alu_operand" " vDA, vDb")))
1382 (set (match_operand:DI 3 "register_operand" "=&SgcV,&SgcV")
1383 (ltu:DI (plus:V_DI
1384 (zero_extend:V_DI (vec_duplicate:<VnSI> (match_dup 1)))
1385 (match_dup 2))
1386 (match_dup 1)))]
1387 ""
1388 "#"
1389 "gcn_can_split_p (<MODE>mode, operands[0])
1390 && gcn_can_split_p (<MODE>mode, operands[2])"
1391 [(const_int 0)]
1392 {
1393 emit_insn (gen_add<vnsi>3_vcc_dup
1394 (gcn_operand_part (<MODE>mode, operands[0], 0),
1395 gcn_operand_part (DImode, operands[1], 0),
1396 gcn_operand_part (<MODE>mode, operands[2], 0),
1397 operands[3]));
1398 emit_insn (gen_addc<vnsi>3
1399 (gcn_operand_part (<MODE>mode, operands[0], 1),
1400 gcn_operand_part (<MODE>mode, operands[2], 1),
1401 const0_rtx, operands[3], operands[3]));
1402 DONE;
1403 }
1404 [(set_attr "type" "vmult")
1405 (set_attr "length" "8")])
1406
1407 (define_expand "add<mode>3_zext_dup"
1408 [(match_operand:V_DI 0 "register_operand")
1409 (match_operand:SI 1 "gcn_alu_operand")
1410 (match_operand:V_DI 2 "gcn_alu_operand")]
1411 ""
1412 {
1413 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1414 emit_insn (gen_add<mode>3_vcc_zext_dup (operands[0], operands[1],
1415 operands[2], vcc));
1416 DONE;
1417 })
1418
1419 (define_insn_and_split "add<mode>3_vcc_zext_dup_exec"
1420 [(set (match_operand:V_DI 0 "register_operand" "= v, v")
1421 (vec_merge:V_DI
1422 (plus:V_DI
1423 (zero_extend:V_DI
1424 (vec_duplicate:<VnSI>
1425 (match_operand:SI 1 "gcn_alu_operand" " ASv, BSv")))
1426 (match_operand:V_DI 2 "gcn_alu_operand" " vDb, vDA"))
1427 (match_operand:V_DI 4 "gcn_register_or_unspec_operand" " U0, U0")
1428 (match_operand:DI 5 "gcn_exec_reg_operand" " e, e")))
1429 (set (match_operand:DI 3 "register_operand" "=&SgcV,&SgcV")
1430 (and:DI
1431 (ltu:DI (plus:V_DI
1432 (zero_extend:V_DI (vec_duplicate:<VnSI> (match_dup 1)))
1433 (match_dup 2))
1434 (match_dup 1))
1435 (match_dup 5)))]
1436 ""
1437 "#"
1438 "gcn_can_split_p (<MODE>mode, operands[0])
1439 && gcn_can_split_p (<MODE>mode, operands[2])
1440 && gcn_can_split_p (<MODE>mode, operands[4])"
1441 [(const_int 0)]
1442 {
1443 emit_insn (gen_add<vnsi>3_vcc_dup_exec
1444 (gcn_operand_part (<MODE>mode, operands[0], 0),
1445 gcn_operand_part (DImode, operands[1], 0),
1446 gcn_operand_part (<MODE>mode, operands[2], 0),
1447 operands[3],
1448 gcn_operand_part (<MODE>mode, operands[4], 0),
1449 operands[5]));
1450 emit_insn (gen_addc<vnsi>3_exec
1451 (gcn_operand_part (<MODE>mode, operands[0], 1),
1452 gcn_operand_part (<MODE>mode, operands[2], 1),
1453 const0_rtx, operands[3], operands[3],
1454 gcn_operand_part (<MODE>mode, operands[4], 1),
1455 operands[5]));
1456 DONE;
1457 }
1458 [(set_attr "type" "vmult")
1459 (set_attr "length" "8")])
1460
1461 (define_expand "add<mode>3_zext_dup_exec"
1462 [(match_operand:V_DI 0 "register_operand")
1463 (match_operand:SI 1 "gcn_alu_operand")
1464 (match_operand:V_DI 2 "gcn_alu_operand")
1465 (match_operand:V_DI 3 "gcn_register_or_unspec_operand")
1466 (match_operand:DI 4 "gcn_exec_reg_operand")]
1467 ""
1468 {
1469 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1470 emit_insn (gen_add<mode>3_vcc_zext_dup_exec (operands[0], operands[1],
1471 operands[2], vcc, operands[3],
1472 operands[4]));
1473 DONE;
1474 })
1475
1476 (define_insn_and_split "add<mode>3_vcc_zext_dup2"
1477 [(set (match_operand:V_DI 0 "register_operand" "= v")
1478 (plus:V_DI
1479 (zero_extend:V_DI (match_operand:<VnSI> 1 "gcn_alu_operand" " vA"))
1480 (vec_duplicate:V_DI (match_operand:DI 2 "gcn_alu_operand" " DbSv"))))
1481 (set (match_operand:DI 3 "register_operand" "=&SgcV")
1482 (ltu:DI (plus:V_DI
1483 (zero_extend:V_DI (match_dup 1))
1484 (vec_duplicate:V_DI (match_dup 2)))
1485 (match_dup 1)))]
1486 ""
1487 "#"
1488 "gcn_can_split_p (<MODE>mode, operands[0])"
1489 [(const_int 0)]
1490 {
1491 emit_insn (gen_add<vnsi>3_vcc_dup
1492 (gcn_operand_part (<MODE>mode, operands[0], 0),
1493 gcn_operand_part (DImode, operands[2], 0),
1494 operands[1],
1495 operands[3]));
1496 rtx dsthi = gcn_operand_part (<MODE>mode, operands[0], 1);
1497 emit_insn (gen_vec_duplicate<vnsi>
1498 (dsthi, gcn_operand_part (DImode, operands[2], 1)));
1499 emit_insn (gen_addc<vnsi>3 (dsthi, dsthi, const0_rtx, operands[3],
1500 operands[3]));
1501 DONE;
1502 }
1503 [(set_attr "type" "vmult")
1504 (set_attr "length" "8")])
1505
1506 (define_expand "add<mode>3_zext_dup2"
1507 [(match_operand:V_DI 0 "register_operand")
1508 (match_operand:<VnSI> 1 "gcn_alu_operand")
1509 (match_operand:DI 2 "gcn_alu_operand")]
1510 ""
1511 {
1512 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1513 emit_insn (gen_add<mode>3_vcc_zext_dup2 (operands[0], operands[1],
1514 operands[2], vcc));
1515 DONE;
1516 })
1517
1518 (define_insn_and_split "add<mode>3_vcc_zext_dup2_exec"
1519 [(set (match_operand:V_DI 0 "register_operand" "= v")
1520 (vec_merge:V_DI
1521 (plus:V_DI
1522 (zero_extend:V_DI (match_operand:<VnSI> 1 "gcn_alu_operand" "vA"))
1523 (vec_duplicate:V_DI (match_operand:DI 2 "gcn_alu_operand" "BSv")))
1524 (match_operand:V_DI 4 "gcn_register_or_unspec_operand" " U0")
1525 (match_operand:DI 5 "gcn_exec_reg_operand" " e")))
1526 (set (match_operand:DI 3 "register_operand" "=&SgcV")
1527 (and:DI
1528 (ltu:DI (plus:V_DI
1529 (zero_extend:V_DI (match_dup 1))
1530 (vec_duplicate:V_DI (match_dup 2)))
1531 (match_dup 1))
1532 (match_dup 5)))]
1533 ""
1534 "#"
1535 "gcn_can_split_p (<MODE>mode, operands[0])
1536 && gcn_can_split_p (<MODE>mode, operands[4])"
1537 [(const_int 0)]
1538 {
1539 emit_insn (gen_add<vnsi>3_vcc_dup_exec
1540 (gcn_operand_part (<MODE>mode, operands[0], 0),
1541 gcn_operand_part (DImode, operands[2], 0),
1542 operands[1],
1543 operands[3],
1544 gcn_operand_part (<MODE>mode, operands[4], 0),
1545 operands[5]));
1546 rtx dsthi = gcn_operand_part (<MODE>mode, operands[0], 1);
1547 emit_insn (gen_vec_duplicate<vnsi>_exec
1548 (dsthi, gcn_operand_part (DImode, operands[2], 1),
1549 gcn_operand_part (<MODE>mode, operands[4], 1),
1550 operands[5]));
1551 emit_insn (gen_addc<vnsi>3_exec
1552 (dsthi, dsthi, const0_rtx, operands[3], operands[3],
1553 gcn_operand_part (<MODE>mode, operands[4], 1),
1554 operands[5]));
1555 DONE;
1556 }
1557 [(set_attr "type" "vmult")
1558 (set_attr "length" "8")])
1559
1560 (define_expand "add<mode>3_zext_dup2_exec"
1561 [(match_operand:V_DI 0 "register_operand")
1562 (match_operand:<VnSI> 1 "gcn_alu_operand")
1563 (match_operand:DI 2 "gcn_alu_operand")
1564 (match_operand:V_DI 3 "gcn_register_or_unspec_operand")
1565 (match_operand:DI 4 "gcn_exec_reg_operand")]
1566 ""
1567 {
1568 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1569 emit_insn (gen_add<mode>3_vcc_zext_dup2_exec (operands[0], operands[1],
1570 operands[2], vcc,
1571 operands[3], operands[4]));
1572 DONE;
1573 })
1574
1575 (define_insn_and_split "add<mode>3_sext_dup2"
1576 [(set (match_operand:V_DI 0 "register_operand" "= v")
1577 (plus:V_DI
1578 (sign_extend:V_DI (match_operand:<VnSI> 1 "gcn_alu_operand" " vA"))
1579 (vec_duplicate:V_DI (match_operand:DI 2 "gcn_alu_operand" "BSv"))))
1580 (clobber (match_scratch:<VnSI> 3 "=&v"))
1581 (clobber (reg:DI VCC_REG))]
1582 ""
1583 "#"
1584 "gcn_can_split_p (<MODE>mode, operands[0])"
1585 [(const_int 0)]
1586 {
1587 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1588 emit_insn (gen_ashr<vnsi>3 (operands[3], operands[1], GEN_INT (31)));
1589 emit_insn (gen_add<vnsi>3_vcc_dup
1590 (gcn_operand_part (<MODE>mode, operands[0], 0),
1591 gcn_operand_part (DImode, operands[2], 0),
1592 operands[1],
1593 vcc));
1594 rtx dsthi = gcn_operand_part (<MODE>mode, operands[0], 1);
1595 emit_insn (gen_vec_duplicate<vnsi>
1596 (dsthi, gcn_operand_part (DImode, operands[2], 1)));
1597 emit_insn (gen_addc<vnsi>3 (dsthi, dsthi, operands[3], vcc, vcc));
1598 DONE;
1599 }
1600 [(set_attr "type" "vmult")
1601 (set_attr "length" "8")])
1602
1603 (define_insn_and_split "add<mode>3_sext_dup2_exec"
1604 [(set (match_operand:V_DI 0 "register_operand" "= v")
1605 (vec_merge:V_DI
1606 (plus:V_DI
1607 (sign_extend:V_DI (match_operand:<VnSI> 1 "gcn_alu_operand" "vA"))
1608 (vec_duplicate:V_DI (match_operand:DI 2 "gcn_alu_operand" "BSv")))
1609 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0")
1610 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1611 (clobber (match_scratch:<VnSI> 5 "=&v"))
1612 (clobber (reg:DI VCC_REG))]
1613 ""
1614 "#"
1615 "gcn_can_split_p (<MODE>mode, operands[0])
1616 && gcn_can_split_p (<MODE>mode, operands[3])"
1617 [(const_int 0)]
1618 {
1619 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1620 emit_insn (gen_ashr<vnsi>3_exec (operands[5], operands[1], GEN_INT (31),
1621 gcn_gen_undef (<VnSI>mode), operands[4]));
1622 emit_insn (gen_add<vnsi>3_vcc_dup_exec
1623 (gcn_operand_part (<MODE>mode, operands[0], 0),
1624 gcn_operand_part (DImode, operands[2], 0),
1625 operands[1],
1626 vcc,
1627 gcn_operand_part (<MODE>mode, operands[3], 0),
1628 operands[4]));
1629 rtx dsthi = gcn_operand_part (<MODE>mode, operands[0], 1);
1630 emit_insn (gen_vec_duplicate<vnsi>_exec
1631 (dsthi, gcn_operand_part (DImode, operands[2], 1),
1632 gcn_operand_part (<MODE>mode, operands[3], 1),
1633 operands[4]));
1634 emit_insn (gen_addc<vnsi>3_exec
1635 (dsthi, dsthi, operands[5], vcc, vcc,
1636 gcn_operand_part (<MODE>mode, operands[3], 1),
1637 operands[4]));
1638 DONE;
1639 }
1640 [(set_attr "type" "vmult")
1641 (set_attr "length" "8")])
1642
1643 ;; }}}
1644 ;; {{{ DS memory ALU: add/sub
1645
1646 (define_mode_iterator DS_ARITH_MODE [V64SI V64SF V64DI])
1647 (define_mode_iterator DS_ARITH_SCALAR_MODE [SI SF DI])
1648
1649 ;; FIXME: the vector patterns probably need RD expanded to a vector of
1650 ;; addresses. For now, the only way a vector can get into LDS is
1651 ;; if the user puts it there manually.
1652 ;;
1653 ;; FIXME: the scalar patterns are probably fine in themselves, but need to be
1654 ;; checked to see if anything can ever use them.
1655
1656 (define_insn "add<mode>3_ds<exec>"
1657 [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD")
1658 (plus:DS_ARITH_MODE
1659 (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" "%RD")
1660 (match_operand:DS_ARITH_MODE 2 "register_operand" " v")))]
1661 "rtx_equal_p (operands[0], operands[1])"
1662 "ds_add%u0\t%A0, %2%O0"
1663 [(set_attr "type" "ds")
1664 (set_attr "length" "8")])
1665
1666 (define_insn "add<mode>3_ds_scalar"
1667 [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
1668 (plus:DS_ARITH_SCALAR_MODE
1669 (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand"
1670 "%RD")
1671 (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")))]
1672 "rtx_equal_p (operands[0], operands[1])"
1673 "ds_add%u0\t%A0, %2%O0"
1674 [(set_attr "type" "ds")
1675 (set_attr "length" "8")])
1676
1677 (define_insn "sub<mode>3_ds<exec>"
1678 [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD")
1679 (minus:DS_ARITH_MODE
1680 (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" " RD")
1681 (match_operand:DS_ARITH_MODE 2 "register_operand" " v")))]
1682 "rtx_equal_p (operands[0], operands[1])"
1683 "ds_sub%u0\t%A0, %2%O0"
1684 [(set_attr "type" "ds")
1685 (set_attr "length" "8")])
1686
1687 (define_insn "sub<mode>3_ds_scalar"
1688 [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
1689 (minus:DS_ARITH_SCALAR_MODE
1690 (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand"
1691 " RD")
1692 (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")))]
1693 "rtx_equal_p (operands[0], operands[1])"
1694 "ds_sub%u0\t%A0, %2%O0"
1695 [(set_attr "type" "ds")
1696 (set_attr "length" "8")])
1697
1698 (define_insn "subr<mode>3_ds<exec>"
1699 [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD")
1700 (minus:DS_ARITH_MODE
1701 (match_operand:DS_ARITH_MODE 2 "register_operand" " v")
1702 (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" " RD")))]
1703 "rtx_equal_p (operands[0], operands[1])"
1704 "ds_rsub%u0\t%A0, %2%O0"
1705 [(set_attr "type" "ds")
1706 (set_attr "length" "8")])
1707
1708 (define_insn "subr<mode>3_ds_scalar"
1709 [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
1710 (minus:DS_ARITH_SCALAR_MODE
1711 (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")
1712 (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand"
1713 " RD")))]
1714 "rtx_equal_p (operands[0], operands[1])"
1715 "ds_rsub%u0\t%A0, %2%O0"
1716 [(set_attr "type" "ds")
1717 (set_attr "length" "8")])
1718
1719 ;; }}}
1720 ;; {{{ ALU special case: mult
1721
1722 (define_insn "<su>mul<mode>3_highpart<exec>"
1723 [(set (match_operand:V_SI 0 "register_operand" "= v")
1724 (truncate:V_SI
1725 (lshiftrt:<VnDI>
1726 (mult:<VnDI>
1727 (any_extend:<VnDI>
1728 (match_operand:V_SI 1 "gcn_alu_operand" " %v"))
1729 (any_extend:<VnDI>
1730 (match_operand:V_SI 2 "gcn_alu_operand" "vSvA")))
1731 (const_int 32))))]
1732 ""
1733 "v_mul_hi<sgnsuffix>0\t%0, %2, %1"
1734 [(set_attr "type" "vop3a")
1735 (set_attr "length" "8")])
1736
1737 (define_insn "mul<mode>3<exec>"
1738 [(set (match_operand:V_INT_1REG 0 "register_operand" "= v")
1739 (mult:V_INT_1REG
1740 (match_operand:V_INT_1REG 1 "gcn_alu_operand" "%vSvA")
1741 (match_operand:V_INT_1REG 2 "gcn_alu_operand" " vSvA")))]
1742 ""
1743 "v_mul_lo_u32\t%0, %1, %2"
1744 [(set_attr "type" "vop3a")
1745 (set_attr "length" "8")])
1746
1747 (define_insn "mul<mode>3_dup<exec>"
1748 [(set (match_operand:V_INT_1REG 0 "register_operand" "= v")
1749 (mult:V_INT_1REG
1750 (match_operand:V_INT_1REG 1 "gcn_alu_operand" "%vSvA")
1751 (vec_duplicate:V_INT_1REG
1752 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand" " SvA"))))]
1753 ""
1754 "v_mul_lo_u32\t%0, %1, %2"
1755 [(set_attr "type" "vop3a")
1756 (set_attr "length" "8")])
1757
1758 (define_insn_and_split "mul<mode>3"
1759 [(set (match_operand:V_DI 0 "register_operand" "=&v")
1760 (mult:V_DI
1761 (match_operand:V_DI 1 "gcn_alu_operand" "% v")
1762 (match_operand:V_DI 2 "gcn_alu_operand" "vDA")))
1763 (clobber (match_scratch:<VnSI> 3 "=&v"))]
1764 ""
1765 "#"
1766 "reload_completed"
1767 [(const_int 0)]
1768 {
1769 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
1770 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
1771 rtx left_lo = gcn_operand_part (<MODE>mode, operands[1], 0);
1772 rtx left_hi = gcn_operand_part (<MODE>mode, operands[1], 1);
1773 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
1774 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
1775 rtx tmp = operands[3];
1776
1777 emit_insn (gen_mul<vnsi>3 (out_lo, left_lo, right_lo));
1778 emit_insn (gen_umul<vnsi>3_highpart (out_hi, left_lo, right_lo));
1779 emit_insn (gen_mul<vnsi>3 (tmp, left_hi, right_lo));
1780 emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp));
1781 emit_insn (gen_mul<vnsi>3 (tmp, left_lo, right_hi));
1782 emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp));
1783 emit_insn (gen_mul<vnsi>3 (tmp, left_hi, right_hi));
1784 emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp));
1785 DONE;
1786 })
1787
1788 (define_insn_and_split "mul<mode>3_exec"
1789 [(set (match_operand:V_DI 0 "register_operand" "=&v")
1790 (vec_merge:V_DI
1791 (mult:V_DI
1792 (match_operand:V_DI 1 "gcn_alu_operand" "% v")
1793 (match_operand:V_DI 2 "gcn_alu_operand" "vDA"))
1794 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0")
1795 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1796 (clobber (match_scratch:<VnSI> 5 "=&v"))]
1797 ""
1798 "#"
1799 "reload_completed"
1800 [(const_int 0)]
1801 {
1802 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
1803 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
1804 rtx left_lo = gcn_operand_part (<MODE>mode, operands[1], 0);
1805 rtx left_hi = gcn_operand_part (<MODE>mode, operands[1], 1);
1806 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
1807 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
1808 rtx exec = operands[4];
1809 rtx tmp = operands[5];
1810
1811 rtx old_lo, old_hi;
1812 if (GET_CODE (operands[3]) == UNSPEC)
1813 {
1814 old_lo = old_hi = gcn_gen_undef (<VnSI>mode);
1815 }
1816 else
1817 {
1818 old_lo = gcn_operand_part (<MODE>mode, operands[3], 0);
1819 old_hi = gcn_operand_part (<MODE>mode, operands[3], 1);
1820 }
1821
1822 rtx undef = gcn_gen_undef (<VnSI>mode);
1823
1824 emit_insn (gen_mul<vnsi>3_exec (out_lo, left_lo, right_lo, old_lo, exec));
1825 emit_insn (gen_umul<vnsi>3_highpart_exec (out_hi, left_lo, right_lo,
1826 old_hi, exec));
1827 emit_insn (gen_mul<vnsi>3_exec (tmp, left_hi, right_lo, undef, exec));
1828 emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec));
1829 emit_insn (gen_mul<vnsi>3_exec (tmp, left_lo, right_hi, undef, exec));
1830 emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec));
1831 emit_insn (gen_mul<vnsi>3_exec (tmp, left_hi, right_hi, undef, exec));
1832 emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec));
1833 DONE;
1834 })
1835
1836 (define_insn_and_split "mul<mode>3_zext"
1837 [(set (match_operand:V_DI 0 "register_operand" "=&v")
1838 (mult:V_DI
1839 (zero_extend:V_DI
1840 (match_operand:<VnSI> 1 "gcn_alu_operand" " v"))
1841 (match_operand:V_DI 2 "gcn_alu_operand" "vDA")))
1842 (clobber (match_scratch:<VnSI> 3 "=&v"))]
1843 ""
1844 "#"
1845 "reload_completed"
1846 [(const_int 0)]
1847 {
1848 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
1849 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
1850 rtx left = operands[1];
1851 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
1852 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
1853 rtx tmp = operands[3];
1854
1855 emit_insn (gen_mul<vnsi>3 (out_lo, left, right_lo));
1856 emit_insn (gen_umul<vnsi>3_highpart (out_hi, left, right_lo));
1857 emit_insn (gen_mul<vnsi>3 (tmp, left, right_hi));
1858 emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp));
1859 DONE;
1860 })
1861
1862 (define_insn_and_split "mul<mode>3_zext_exec"
1863 [(set (match_operand:V_DI 0 "register_operand" "=&v")
1864 (vec_merge:V_DI
1865 (mult:V_DI
1866 (zero_extend:V_DI
1867 (match_operand:<VnSI> 1 "gcn_alu_operand" " v"))
1868 (match_operand:V_DI 2 "gcn_alu_operand" "vDA"))
1869 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0")
1870 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1871 (clobber (match_scratch:<VnSI> 5 "=&v"))]
1872 ""
1873 "#"
1874 "reload_completed"
1875 [(const_int 0)]
1876 {
1877 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
1878 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
1879 rtx left = operands[1];
1880 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
1881 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
1882 rtx exec = operands[4];
1883 rtx tmp = operands[5];
1884
1885 rtx old_lo, old_hi;
1886 if (GET_CODE (operands[3]) == UNSPEC)
1887 {
1888 old_lo = old_hi = gcn_gen_undef (<VnSI>mode);
1889 }
1890 else
1891 {
1892 old_lo = gcn_operand_part (<MODE>mode, operands[3], 0);
1893 old_hi = gcn_operand_part (<MODE>mode, operands[3], 1);
1894 }
1895
1896 rtx undef = gcn_gen_undef (<VnSI>mode);
1897
1898 emit_insn (gen_mul<vnsi>3_exec (out_lo, left, right_lo, old_lo, exec));
1899 emit_insn (gen_umul<vnsi>3_highpart_exec (out_hi, left, right_lo,
1900 old_hi, exec));
1901 emit_insn (gen_mul<vnsi>3_exec (tmp, left, right_hi, undef, exec));
1902 emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec));
1903 DONE;
1904 })
1905
1906 (define_insn_and_split "mul<mode>3_zext_dup2"
1907 [(set (match_operand:V_DI 0 "register_operand" "= &v")
1908 (mult:V_DI
1909 (zero_extend:V_DI
1910 (match_operand:<VnSI> 1 "gcn_alu_operand" " v"))
1911 (vec_duplicate:V_DI
1912 (match_operand:DI 2 "gcn_alu_operand" "SvDA"))))
1913 (clobber (match_scratch:<VnSI> 3 "= &v"))]
1914 ""
1915 "#"
1916 "reload_completed"
1917 [(const_int 0)]
1918 {
1919 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
1920 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
1921 rtx left = operands[1];
1922 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
1923 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
1924 rtx tmp = operands[3];
1925
1926 emit_insn (gen_mul<vnsi>3 (out_lo, left, right_lo));
1927 emit_insn (gen_umul<vnsi>3_highpart (out_hi, left, right_lo));
1928 emit_insn (gen_mul<vnsi>3 (tmp, left, right_hi));
1929 emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp));
1930 DONE;
1931 })
1932
1933 (define_insn_and_split "mul<mode>3_zext_dup2_exec"
1934 [(set (match_operand:V_DI 0 "register_operand" "= &v")
1935 (vec_merge:V_DI
1936 (mult:V_DI
1937 (zero_extend:V_DI
1938 (match_operand:<VnSI> 1 "gcn_alu_operand" " v"))
1939 (vec_duplicate:V_DI
1940 (match_operand:DI 2 "gcn_alu_operand" "SvDA")))
1941 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0")
1942 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1943 (clobber (match_scratch:<VnSI> 5 "= &v"))]
1944 ""
1945 "#"
1946 "reload_completed"
1947 [(const_int 0)]
1948 {
1949 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
1950 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
1951 rtx left = operands[1];
1952 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
1953 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
1954 rtx exec = operands[4];
1955 rtx tmp = operands[5];
1956
1957 rtx old_lo, old_hi;
1958 if (GET_CODE (operands[3]) == UNSPEC)
1959 {
1960 old_lo = old_hi = gcn_gen_undef (<VnSI>mode);
1961 }
1962 else
1963 {
1964 old_lo = gcn_operand_part (<MODE>mode, operands[3], 0);
1965 old_hi = gcn_operand_part (<MODE>mode, operands[3], 1);
1966 }
1967
1968 rtx undef = gcn_gen_undef (<VnSI>mode);
1969
1970 emit_insn (gen_mul<vnsi>3_exec (out_lo, left, right_lo, old_lo, exec));
1971 emit_insn (gen_umul<vnsi>3_highpart_exec (out_hi, left, right_lo,
1972 old_hi, exec));
1973 emit_insn (gen_mul<vnsi>3_exec (tmp, left, right_hi, undef, exec));
1974 emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec));
1975 DONE;
1976 })
1977
1978 ;; }}}
1979 ;; {{{ ALU generic case
1980
1981 (define_code_iterator bitop [and ior xor])
1982 (define_code_iterator shiftop [ashift lshiftrt ashiftrt])
1983 (define_code_iterator minmaxop [smin smax umin umax])
1984
1985 (define_insn "<expander><mode>2<exec>"
1986 [(set (match_operand:V_INT_1REG 0 "gcn_valu_dst_operand" "= v")
1987 (bitunop:V_INT_1REG
1988 (match_operand:V_INT_1REG 1 "gcn_valu_src0_operand" "vSvB")))]
1989 ""
1990 "v_<mnemonic>0\t%0, %1"
1991 [(set_attr "type" "vop1")
1992 (set_attr "length" "8")])
1993
1994 (define_insn "<expander><mode>3<exec>"
1995 [(set (match_operand:V_INT_1REG 0 "gcn_valu_dst_operand" "= v,RD")
1996 (bitop:V_INT_1REG
1997 (match_operand:V_INT_1REG 1 "gcn_valu_src0_operand" "% v, 0")
1998 (match_operand:V_INT_1REG 2 "gcn_valu_src1com_operand" "vSvB, v")))]
1999 ""
2000 "@
2001 v_<mnemonic>0\t%0, %2, %1
2002 ds_<mnemonic>0\t%A0, %2%O0"
2003 [(set_attr "type" "vop2,ds")
2004 (set_attr "length" "8,8")])
2005
2006 (define_insn_and_split "<expander><mode>3"
2007 [(set (match_operand:V_DI 0 "gcn_valu_dst_operand" "= v,RD")
2008 (bitop:V_DI
2009 (match_operand:V_DI 1 "gcn_valu_src0_operand" "% v,RD")
2010 (match_operand:V_DI 2 "gcn_valu_src1com_operand" "vSvB, v")))]
2011 ""
2012 "@
2013 #
2014 ds_<mnemonic>0\t%A0, %2%O0"
2015 "(reload_completed && !gcn_ds_memory_operand (operands[0], <MODE>mode))"
2016 [(set (match_dup 3)
2017 (bitop:<VnSI> (match_dup 5) (match_dup 7)))
2018 (set (match_dup 4)
2019 (bitop:<VnSI> (match_dup 6) (match_dup 8)))]
2020 {
2021 operands[3] = gcn_operand_part (<MODE>mode, operands[0], 0);
2022 operands[4] = gcn_operand_part (<MODE>mode, operands[0], 1);
2023 operands[5] = gcn_operand_part (<MODE>mode, operands[1], 0);
2024 operands[6] = gcn_operand_part (<MODE>mode, operands[1], 1);
2025 operands[7] = gcn_operand_part (<MODE>mode, operands[2], 0);
2026 operands[8] = gcn_operand_part (<MODE>mode, operands[2], 1);
2027 }
2028 [(set_attr "type" "vmult,ds")
2029 (set_attr "length" "16,8")])
2030
2031 (define_insn_and_split "<expander><mode>3_exec"
2032 [(set (match_operand:V_DI 0 "gcn_valu_dst_operand" "= v,RD")
2033 (vec_merge:V_DI
2034 (bitop:V_DI
2035 (match_operand:V_DI 1 "gcn_valu_src0_operand" "% v,RD")
2036 (match_operand:V_DI 2 "gcn_valu_src1com_operand" "vSvB, v"))
2037 (match_operand:V_DI 3 "gcn_register_ds_or_unspec_operand" "U0,U0")
2038 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))]
2039 "!memory_operand (operands[0], VOIDmode)
2040 || (rtx_equal_p (operands[0], operands[1])
2041 && register_operand (operands[2], VOIDmode))"
2042 "@
2043 #
2044 ds_<mnemonic>0\t%A0, %2%O0"
2045 "(reload_completed && !gcn_ds_memory_operand (operands[0], <MODE>mode))"
2046 [(set (match_dup 5)
2047 (vec_merge:<VnSI>
2048 (bitop:<VnSI> (match_dup 7) (match_dup 9))
2049 (match_dup 11)
2050 (match_dup 4)))
2051 (set (match_dup 6)
2052 (vec_merge:<VnSI>
2053 (bitop:<VnSI> (match_dup 8) (match_dup 10))
2054 (match_dup 12)
2055 (match_dup 4)))]
2056 {
2057 operands[5] = gcn_operand_part (<MODE>mode, operands[0], 0);
2058 operands[6] = gcn_operand_part (<MODE>mode, operands[0], 1);
2059 operands[7] = gcn_operand_part (<MODE>mode, operands[1], 0);
2060 operands[8] = gcn_operand_part (<MODE>mode, operands[1], 1);
2061 operands[9] = gcn_operand_part (<MODE>mode, operands[2], 0);
2062 operands[10] = gcn_operand_part (<MODE>mode, operands[2], 1);
2063 operands[11] = gcn_operand_part (<MODE>mode, operands[3], 0);
2064 operands[12] = gcn_operand_part (<MODE>mode, operands[3], 1);
2065 }
2066 [(set_attr "type" "vmult,ds")
2067 (set_attr "length" "16,8")])
2068
2069 (define_expand "<expander><mode>3"
2070 [(set (match_operand:V_QIHI 0 "register_operand" "= v")
2071 (shiftop:V_QIHI
2072 (match_operand:V_QIHI 1 "gcn_alu_operand" " v")
2073 (vec_duplicate:V_QIHI
2074 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
2075 ""
2076 {
2077 enum {ashift, lshiftrt, ashiftrt};
2078 bool unsignedp = (<code> == lshiftrt);
2079 rtx insi1 = gen_reg_rtx (<VnSI>mode);
2080 rtx insi2 = gen_reg_rtx (SImode);
2081 rtx outsi = gen_reg_rtx (<VnSI>mode);
2082
2083 convert_move (insi1, operands[1], unsignedp);
2084 convert_move (insi2, operands[2], unsignedp);
2085 emit_insn (gen_<expander><vnsi>3 (outsi, insi1, insi2));
2086 convert_move (operands[0], outsi, unsignedp);
2087 DONE;
2088 })
2089
2090 (define_insn "<expander><mode>3<exec>"
2091 [(set (match_operand:V_INT_noHI 0 "register_operand" "= v")
2092 (shiftop:V_INT_noHI
2093 (match_operand:V_INT_noHI 1 "gcn_alu_operand" " v")
2094 (vec_duplicate:<VnSI>
2095 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
2096 ""
2097 "v_<revmnemonic>0\t%0, %2, %1"
2098 [(set_attr "type" "vop2")
2099 (set_attr "length" "8")])
2100
2101 (define_expand "v<expander><mode>3"
2102 [(set (match_operand:V_QIHI 0 "register_operand" "=v")
2103 (shiftop:V_QIHI
2104 (match_operand:V_QIHI 1 "gcn_alu_operand" " v")
2105 (match_operand:V_QIHI 2 "gcn_alu_operand" "vB")))]
2106 ""
2107 {
2108 enum {ashift, lshiftrt, ashiftrt};
2109 bool unsignedp = (<code> == lshiftrt);
2110 rtx insi1 = gen_reg_rtx (<VnSI>mode);
2111 rtx insi2 = gen_reg_rtx (<VnSI>mode);
2112 rtx outsi = gen_reg_rtx (<VnSI>mode);
2113
2114 convert_move (insi1, operands[1], unsignedp);
2115 convert_move (insi2, operands[2], unsignedp);
2116 emit_insn (gen_v<expander><vnsi>3 (outsi, insi1, insi2));
2117 convert_move (operands[0], outsi, unsignedp);
2118 DONE;
2119 })
2120
2121 (define_insn "v<expander><mode>3<exec>"
2122 [(set (match_operand:V_INT_noHI 0 "register_operand" "=v")
2123 (shiftop:V_INT_noHI
2124 (match_operand:V_INT_noHI 1 "gcn_alu_operand" " v")
2125 (match_operand:<VnSI> 2 "gcn_alu_operand" "vB")))]
2126 ""
2127 "v_<revmnemonic>0\t%0, %2, %1"
2128 [(set_attr "type" "vop2")
2129 (set_attr "length" "8")])
2130
2131 (define_expand "<expander><mode>3"
2132 [(set (match_operand:V_QIHI 0 "gcn_valu_dst_operand")
2133 (minmaxop:V_QIHI
2134 (match_operand:V_QIHI 1 "gcn_valu_src0_operand")
2135 (match_operand:V_QIHI 2 "gcn_valu_src1com_operand")))]
2136 ""
2137 {
2138 enum {smin, umin, smax, umax};
2139 bool unsignedp = (<code> == umax || <code> == umin);
2140 rtx insi1 = gen_reg_rtx (<VnSI>mode);
2141 rtx insi2 = gen_reg_rtx (<VnSI>mode);
2142 rtx outsi = gen_reg_rtx (<VnSI>mode);
2143
2144 convert_move (insi1, operands[1], unsignedp);
2145 convert_move (insi2, operands[2], unsignedp);
2146 emit_insn (gen_<code><vnsi>3 (outsi, insi1, insi2));
2147 convert_move (operands[0], outsi, unsignedp);
2148 DONE;
2149 })
2150
2151 (define_insn "<expander><vnsi>3<exec>"
2152 [(set (match_operand:V_SI 0 "gcn_valu_dst_operand" "= v,RD")
2153 (minmaxop:V_SI
2154 (match_operand:V_SI 1 "gcn_valu_src0_operand" "% v, 0")
2155 (match_operand:V_SI 2 "gcn_valu_src1com_operand" "vSvB, v")))]
2156 ""
2157 "@
2158 v_<mnemonic>0\t%0, %2, %1
2159 ds_<mnemonic>0\t%A0, %2%O0"
2160 [(set_attr "type" "vop2,ds")
2161 (set_attr "length" "8,8")])
2162
2163 ;; }}}
2164 ;; {{{ FP binops - special cases
2165
2166 ; GCN does not directly provide a DFmode subtract instruction, so we do it by
2167 ; adding the negated second operand to the first.
2168
2169 (define_insn "sub<mode>3<exec>"
2170 [(set (match_operand:V_DF 0 "register_operand" "= v, v")
2171 (minus:V_DF
2172 (match_operand:V_DF 1 "gcn_alu_operand" "vSvB, v")
2173 (match_operand:V_DF 2 "gcn_alu_operand" " v,vSvB")))]
2174 ""
2175 "@
2176 v_add_f64\t%0, %1, -%2
2177 v_add_f64\t%0, -%2, %1"
2178 [(set_attr "type" "vop3a")
2179 (set_attr "length" "8,8")])
2180
2181 (define_insn "subdf3"
2182 [(set (match_operand:DF 0 "register_operand" "= v, v")
2183 (minus:DF
2184 (match_operand:DF 1 "gcn_alu_operand" "vSvB, v")
2185 (match_operand:DF 2 "gcn_alu_operand" " v,vSvB")))]
2186 ""
2187 "@
2188 v_add_f64\t%0, %1, -%2
2189 v_add_f64\t%0, -%2, %1"
2190 [(set_attr "type" "vop3a")
2191 (set_attr "length" "8,8")])
2192
2193 ;; }}}
2194 ;; {{{ FP binops - generic
2195
2196 (define_code_iterator comm_fp [plus mult smin smax])
2197 (define_code_iterator nocomm_fp [minus])
2198 (define_code_iterator all_fp [plus mult minus smin smax])
2199
2200 (define_insn "<expander><mode>3<exec>"
2201 [(set (match_operand:V_FP 0 "register_operand" "= v")
2202 (comm_fp:V_FP
2203 (match_operand:V_FP 1 "gcn_alu_operand" "% v")
2204 (match_operand:V_FP 2 "gcn_alu_operand" "vSvB")))]
2205 ""
2206 "v_<mnemonic>0\t%0, %2, %1"
2207 [(set_attr "type" "vop2")
2208 (set_attr "length" "8")])
2209
2210 (define_insn "<expander><mode>3"
2211 [(set (match_operand:FP 0 "gcn_valu_dst_operand" "= v, RL")
2212 (comm_fp:FP
2213 (match_operand:FP 1 "gcn_valu_src0_operand" "% v, 0")
2214 (match_operand:FP 2 "gcn_valu_src1_operand" "vSvB,vSvB")))]
2215 ""
2216 "@
2217 v_<mnemonic>0\t%0, %2, %1
2218 v_<mnemonic>0\t%0, %1%O0"
2219 [(set_attr "type" "vop2,ds")
2220 (set_attr "length" "8")])
2221
2222 (define_insn "<expander><mode>3<exec>"
2223 [(set (match_operand:V_FP_1REG 0 "register_operand" "= v, v")
2224 (nocomm_fp:V_FP_1REG
2225 (match_operand:V_FP_1REG 1 "gcn_alu_operand" "vSvB, v")
2226 (match_operand:V_FP_1REG 2 "gcn_alu_operand" " v,vSvB")))]
2227 ""
2228 "@
2229 v_<mnemonic>0\t%0, %1, %2
2230 v_<revmnemonic>0\t%0, %2, %1"
2231 [(set_attr "type" "vop2")
2232 (set_attr "length" "8,8")])
2233
2234 (define_insn "<expander><mode>3"
2235 [(set (match_operand:FP_1REG 0 "register_operand" "= v, v")
2236 (nocomm_fp:FP_1REG
2237 (match_operand:FP_1REG 1 "gcn_alu_operand" "vSvB, v")
2238 (match_operand:FP_1REG 2 "gcn_alu_operand" " v,vSvB")))]
2239 ""
2240 "@
2241 v_<mnemonic>0\t%0, %1, %2
2242 v_<revmnemonic>0\t%0, %2, %1"
2243 [(set_attr "type" "vop2")
2244 (set_attr "length" "8,8")])
2245
2246 ;; }}}
2247 ;; {{{ FP unops
2248
2249 (define_insn "abs<mode>2"
2250 [(set (match_operand:FP 0 "register_operand" "=v")
2251 (abs:FP (match_operand:FP 1 "register_operand" " v")))]
2252 ""
2253 "v_add%i0\t%0, 0, |%1|"
2254 [(set_attr "type" "vop3a")
2255 (set_attr "length" "8")])
2256
2257 (define_insn "abs<mode>2<exec>"
2258 [(set (match_operand:V_FP 0 "register_operand" "=v")
2259 (abs:V_FP
2260 (match_operand:V_FP 1 "register_operand" " v")))]
2261 ""
2262 "v_add%i0\t%0, 0, |%1|"
2263 [(set_attr "type" "vop3a")
2264 (set_attr "length" "8")])
2265
2266 (define_insn "neg<mode>2<exec>"
2267 [(set (match_operand:V_FP 0 "register_operand" "=v")
2268 (neg:V_FP
2269 (match_operand:V_FP 1 "register_operand" " v")))]
2270 ""
2271 "v_add%i0\t%0, 0, -%1"
2272 [(set_attr "type" "vop3a")
2273 (set_attr "length" "8")])
2274
2275 (define_insn "sqrt<mode>2<exec>"
2276 [(set (match_operand:V_FP 0 "register_operand" "= v")
2277 (sqrt:V_FP
2278 (match_operand:V_FP 1 "gcn_alu_operand" "vSvB")))]
2279 "flag_unsafe_math_optimizations"
2280 "v_sqrt%i0\t%0, %1"
2281 [(set_attr "type" "vop1")
2282 (set_attr "length" "8")])
2283
2284 (define_insn "sqrt<mode>2"
2285 [(set (match_operand:FP 0 "register_operand" "= v")
2286 (sqrt:FP
2287 (match_operand:FP 1 "gcn_alu_operand" "vSvB")))]
2288 "flag_unsafe_math_optimizations"
2289 "v_sqrt%i0\t%0, %1"
2290 [(set_attr "type" "vop1")
2291 (set_attr "length" "8")])
2292
2293 ; These FP unops have f64, f32 and f16 versions.
2294 (define_int_iterator MATH_UNOP_1OR2REG
2295 [UNSPEC_FLOOR UNSPEC_CEIL])
2296
2297 ; These FP unops only have f16/f32 versions.
2298 (define_int_iterator MATH_UNOP_1REG
2299 [UNSPEC_EXP2 UNSPEC_LOG2])
2300
2301 (define_int_iterator MATH_UNOP_TRIG
2302 [UNSPEC_SIN UNSPEC_COS])
2303
2304 (define_int_attr math_unop
2305 [(UNSPEC_FLOOR "floor")
2306 (UNSPEC_CEIL "ceil")
2307 (UNSPEC_EXP2 "exp2")
2308 (UNSPEC_LOG2 "log2")
2309 (UNSPEC_SIN "sin")
2310 (UNSPEC_COS "cos")])
2311
2312 (define_insn "<math_unop><mode>2"
2313 [(set (match_operand:FP 0 "register_operand" "= v")
2314 (unspec:FP
2315 [(match_operand:FP 1 "gcn_alu_operand" "vSvB")]
2316 MATH_UNOP_1OR2REG))]
2317 ""
2318 "v_<math_unop>%i0\t%0, %1"
2319 [(set_attr "type" "vop1")
2320 (set_attr "length" "8")])
2321
2322 (define_insn "<math_unop><mode>2<exec>"
2323 [(set (match_operand:V_FP 0 "register_operand" "= v")
2324 (unspec:V_FP
2325 [(match_operand:V_FP 1 "gcn_alu_operand" "vSvB")]
2326 MATH_UNOP_1OR2REG))]
2327 ""
2328 "v_<math_unop>%i0\t%0, %1"
2329 [(set_attr "type" "vop1")
2330 (set_attr "length" "8")])
2331
2332 (define_insn "<math_unop><mode>2"
2333 [(set (match_operand:FP_1REG 0 "register_operand" "= v")
2334 (unspec:FP_1REG
2335 [(match_operand:FP_1REG 1 "gcn_alu_operand" "vSvB")]
2336 MATH_UNOP_1REG))]
2337 "flag_unsafe_math_optimizations"
2338 "v_<math_unop>%i0\t%0, %1"
2339 [(set_attr "type" "vop1")
2340 (set_attr "length" "8")])
2341
2342 (define_insn "<math_unop><mode>2<exec>"
2343 [(set (match_operand:V_FP_1REG 0 "register_operand" "= v")
2344 (unspec:V_FP_1REG
2345 [(match_operand:V_FP_1REG 1 "gcn_alu_operand" "vSvB")]
2346 MATH_UNOP_1REG))]
2347 "flag_unsafe_math_optimizations"
2348 "v_<math_unop>%i0\t%0, %1"
2349 [(set_attr "type" "vop1")
2350 (set_attr "length" "8")])
2351
2352 (define_insn "*<math_unop><mode>2_insn"
2353 [(set (match_operand:FP_1REG 0 "register_operand" "= v")
2354 (unspec:FP_1REG
2355 [(match_operand:FP_1REG 1 "gcn_alu_operand" "vSvB")]
2356 MATH_UNOP_TRIG))]
2357 "flag_unsafe_math_optimizations"
2358 "v_<math_unop>%i0\t%0, %1"
2359 [(set_attr "type" "vop1")
2360 (set_attr "length" "8")])
2361
2362 (define_insn "*<math_unop><mode>2<exec>_insn"
2363 [(set (match_operand:V_FP_1REG 0 "register_operand" "= v")
2364 (unspec:V_FP_1REG
2365 [(match_operand:V_FP_1REG 1 "gcn_alu_operand" "vSvB")]
2366 MATH_UNOP_TRIG))]
2367 "flag_unsafe_math_optimizations"
2368 "v_<math_unop>%i0\t%0, %1"
2369 [(set_attr "type" "vop1")
2370 (set_attr "length" "8")])
2371
2372 ; Trigonometric functions need their input scaled by 1/(2*PI) first.
2373
2374 (define_expand "<math_unop><mode>2"
2375 [(set (match_dup 2)
2376 (mult:FP_1REG
2377 (match_dup 3)
2378 (match_operand:FP_1REG 1 "gcn_alu_operand")))
2379 (set (match_operand:FP_1REG 0 "register_operand")
2380 (unspec:FP_1REG
2381 [(match_dup 2)]
2382 MATH_UNOP_TRIG))]
2383 "flag_unsafe_math_optimizations"
2384 {
2385 operands[2] = gen_reg_rtx (<MODE>mode);
2386 operands[3] = const_double_from_real_value (gcn_dconst1over2pi (),
2387 <MODE>mode);
2388 })
2389
2390 (define_expand "<math_unop><mode>2<exec>"
2391 [(set (match_dup 2)
2392 (mult:V_FP_1REG
2393 (match_dup 3)
2394 (match_operand:V_FP_1REG 1 "gcn_alu_operand")))
2395 (set (match_operand:V_FP_1REG 0 "register_operand")
2396 (unspec:V_FP_1REG
2397 [(match_dup 2)]
2398 MATH_UNOP_TRIG))]
2399 "flag_unsafe_math_optimizations"
2400 {
2401 operands[2] = gen_reg_rtx (<MODE>mode);
2402 operands[3] =
2403 gcn_vec_constant (<MODE>mode,
2404 const_double_from_real_value (gcn_dconst1over2pi (),
2405 <SCALAR_MODE>mode));
2406 })
2407
2408 ; Implement ldexp pattern
2409
2410 (define_insn "ldexp<mode>3"
2411 [(set (match_operand:FP 0 "register_operand" "=v")
2412 (unspec:FP
2413 [(match_operand:FP 1 "gcn_alu_operand" "vB")
2414 (match_operand:SI 2 "gcn_alu_operand" "vSvA")]
2415 UNSPEC_LDEXP))]
2416 ""
2417 "v_ldexp%i0\t%0, %1, %2"
2418 [(set_attr "type" "vop3a")
2419 (set_attr "length" "8")])
2420
2421 (define_insn "ldexp<mode>3<exec>"
2422 [(set (match_operand:V_FP 0 "register_operand" "=v")
2423 (unspec:V_FP
2424 [(match_operand:V_FP 1 "gcn_alu_operand" "vB")
2425 (match_operand:V64SI 2 "gcn_alu_operand" "vSvA")]
2426 UNSPEC_LDEXP))]
2427 ""
2428 "v_ldexp%i0\t%0, %1, %2"
2429 [(set_attr "type" "vop3a")
2430 (set_attr "length" "8")])
2431
2432 ; Implement frexp patterns
2433
2434 (define_insn "frexp<mode>_exp2"
2435 [(set (match_operand:SI 0 "register_operand" "=v")
2436 (unspec:SI
2437 [(match_operand:FP 1 "gcn_alu_operand" "vB")]
2438 UNSPEC_FREXP_EXP))]
2439 ""
2440 "v_frexp_exp_i32%i1\t%0, %1"
2441 [(set_attr "type" "vop1")
2442 (set_attr "length" "8")])
2443
2444 (define_insn "frexp<mode>_mant2"
2445 [(set (match_operand:FP 0 "register_operand" "=v")
2446 (unspec:FP
2447 [(match_operand:FP 1 "gcn_alu_operand" "vB")]
2448 UNSPEC_FREXP_MANT))]
2449 ""
2450 "v_frexp_mant%i1\t%0, %1"
2451 [(set_attr "type" "vop1")
2452 (set_attr "length" "8")])
2453
2454 (define_insn "frexp<mode>_exp2<exec>"
2455 [(set (match_operand:V64SI 0 "register_operand" "=v")
2456 (unspec:V64SI
2457 [(match_operand:V_FP 1 "gcn_alu_operand" "vB")]
2458 UNSPEC_FREXP_EXP))]
2459 ""
2460 "v_frexp_exp_i32%i1\t%0, %1"
2461 [(set_attr "type" "vop1")
2462 (set_attr "length" "8")])
2463
2464 (define_insn "frexp<mode>_mant2<exec>"
2465 [(set (match_operand:V_FP 0 "register_operand" "=v")
2466 (unspec:V_FP
2467 [(match_operand:V_FP 1 "gcn_alu_operand" "vB")]
2468 UNSPEC_FREXP_MANT))]
2469 ""
2470 "v_frexp_mant%i1\t%0, %1"
2471 [(set_attr "type" "vop1")
2472 (set_attr "length" "8")])
2473
2474 ;; }}}
2475 ;; {{{ FP fused multiply and add
2476
2477 (define_insn "fma<mode>4<exec>"
2478 [(set (match_operand:V_FP 0 "register_operand" "= v, v")
2479 (fma:V_FP
2480 (match_operand:V_FP 1 "gcn_alu_operand" "% vA, vA")
2481 (match_operand:V_FP 2 "gcn_alu_operand" " vA,vSvA")
2482 (match_operand:V_FP 3 "gcn_alu_operand" "vSvA, vA")))]
2483 ""
2484 "v_fma%i0\t%0, %1, %2, %3"
2485 [(set_attr "type" "vop3a")
2486 (set_attr "length" "8")])
2487
2488 (define_insn "fma<mode>4_negop2<exec>"
2489 [(set (match_operand:V_FP 0 "register_operand" "= v, v, v")
2490 (fma:V_FP
2491 (match_operand:V_FP 1 "gcn_alu_operand" " vA, vA,vSvA")
2492 (neg:V_FP
2493 (match_operand:V_FP 2 "gcn_alu_operand" " vA,vSvA, vA"))
2494 (match_operand:V_FP 3 "gcn_alu_operand" "vSvA, vA, vA")))]
2495 ""
2496 "v_fma%i0\t%0, %1, -%2, %3"
2497 [(set_attr "type" "vop3a")
2498 (set_attr "length" "8")])
2499
2500 (define_insn "fma<mode>4"
2501 [(set (match_operand:FP 0 "register_operand" "= v, v")
2502 (fma:FP
2503 (match_operand:FP 1 "gcn_alu_operand" "% vA, vA")
2504 (match_operand:FP 2 "gcn_alu_operand" " vA,vSvA")
2505 (match_operand:FP 3 "gcn_alu_operand" "vSvA, vA")))]
2506 ""
2507 "v_fma%i0\t%0, %1, %2, %3"
2508 [(set_attr "type" "vop3a")
2509 (set_attr "length" "8")])
2510
2511 (define_insn "fma<mode>4_negop2"
2512 [(set (match_operand:FP 0 "register_operand" "= v, v, v")
2513 (fma:FP
2514 (match_operand:FP 1 "gcn_alu_operand" " vA, vA,vSvA")
2515 (neg:FP
2516 (match_operand:FP 2 "gcn_alu_operand" " vA,vSvA, vA"))
2517 (match_operand:FP 3 "gcn_alu_operand" "vSvA, vA, vA")))]
2518 ""
2519 "v_fma%i0\t%0, %1, -%2, %3"
2520 [(set_attr "type" "vop3a")
2521 (set_attr "length" "8")])
2522
2523 ;; }}}
2524 ;; {{{ FP division
2525
2526 (define_insn "recip<mode>2<exec>"
2527 [(set (match_operand:V_FP 0 "register_operand" "= v")
2528 (unspec:V_FP
2529 [(match_operand:V_FP 1 "gcn_alu_operand" "vSvB")]
2530 UNSPEC_RCP))]
2531 ""
2532 "v_rcp%i0\t%0, %1"
2533 [(set_attr "type" "vop1")
2534 (set_attr "length" "8")])
2535
2536 (define_insn "recip<mode>2"
2537 [(set (match_operand:FP 0 "register_operand" "= v")
2538 (unspec:FP
2539 [(match_operand:FP 1 "gcn_alu_operand" "vSvB")]
2540 UNSPEC_RCP))]
2541 ""
2542 "v_rcp%i0\t%0, %1"
2543 [(set_attr "type" "vop1")
2544 (set_attr "length" "8")])
2545
2546 ;; Do division via a = b * 1/c
2547 ;; The v_rcp_* instructions are not sufficiently accurate on their own,
2548 ;; so we use 2 v_fma_* instructions to do one round of Newton-Raphson
2549 ;; which the ISA manual says is enough to improve the reciprocal accuracy.
2550 ;;
2551 ;; FIXME: This does not handle denormals, NaNs, division-by-zero etc.
2552
2553 (define_expand "div<mode>3"
2554 [(match_operand:V_FP 0 "gcn_valu_dst_operand")
2555 (match_operand:V_FP 1 "gcn_valu_src0_operand")
2556 (match_operand:V_FP 2 "gcn_valu_src0_operand")]
2557 "flag_reciprocal_math"
2558 {
2559 rtx one = gcn_vec_constant (<MODE>mode,
2560 const_double_from_real_value (dconst1, <SCALAR_MODE>mode));
2561 rtx initrcp = gen_reg_rtx (<MODE>mode);
2562 rtx fma = gen_reg_rtx (<MODE>mode);
2563 rtx rcp;
2564 rtx num = operands[1], denom = operands[2];
2565
2566 bool is_rcp = (GET_CODE (num) == CONST_VECTOR
2567 && real_identical
2568 (CONST_DOUBLE_REAL_VALUE
2569 (CONST_VECTOR_ELT (num, 0)), &dconstm1));
2570
2571 if (is_rcp)
2572 rcp = operands[0];
2573 else
2574 rcp = gen_reg_rtx (<MODE>mode);
2575
2576 emit_insn (gen_recip<mode>2 (initrcp, denom));
2577 emit_insn (gen_fma<mode>4_negop2 (fma, initrcp, denom, one));
2578 emit_insn (gen_fma<mode>4 (rcp, fma, initrcp, initrcp));
2579
2580 if (!is_rcp)
2581 {
2582 rtx div_est = gen_reg_rtx (<MODE>mode);
2583 rtx fma2 = gen_reg_rtx (<MODE>mode);
2584 rtx fma3 = gen_reg_rtx (<MODE>mode);
2585 rtx fma4 = gen_reg_rtx (<MODE>mode);
2586 emit_insn (gen_mul<mode>3 (div_est, num, rcp));
2587 emit_insn (gen_fma<mode>4_negop2 (fma2, div_est, denom, num));
2588 emit_insn (gen_fma<mode>4 (fma3, fma2, rcp, div_est));
2589 emit_insn (gen_fma<mode>4_negop2 (fma4, fma3, denom, num));
2590 emit_insn (gen_fma<mode>4 (operands[0], fma4, rcp, fma3));
2591 }
2592
2593 DONE;
2594 })
2595
2596 (define_expand "div<mode>3"
2597 [(match_operand:FP 0 "gcn_valu_dst_operand")
2598 (match_operand:FP 1 "gcn_valu_src0_operand")
2599 (match_operand:FP 2 "gcn_valu_src0_operand")]
2600 "flag_reciprocal_math"
2601 {
2602 rtx one = const_double_from_real_value (dconst1, <MODE>mode);
2603 rtx initrcp = gen_reg_rtx (<MODE>mode);
2604 rtx fma = gen_reg_rtx (<MODE>mode);
2605 rtx rcp;
2606 rtx num = operands[1], denom = operands[2];
2607
2608 bool is_rcp = (GET_CODE (operands[1]) == CONST_DOUBLE
2609 && real_identical (CONST_DOUBLE_REAL_VALUE (operands[1]),
2610 &dconstm1));
2611
2612 if (is_rcp)
2613 rcp = operands[0];
2614 else
2615 rcp = gen_reg_rtx (<MODE>mode);
2616
2617 emit_insn (gen_recip<mode>2 (initrcp, denom));
2618 emit_insn (gen_fma<mode>4_negop2 (fma, initrcp, denom, one));
2619 emit_insn (gen_fma<mode>4 (rcp, fma, initrcp, initrcp));
2620
2621 if (!is_rcp)
2622 {
2623 rtx div_est = gen_reg_rtx (<MODE>mode);
2624 rtx fma2 = gen_reg_rtx (<MODE>mode);
2625 rtx fma3 = gen_reg_rtx (<MODE>mode);
2626 rtx fma4 = gen_reg_rtx (<MODE>mode);
2627 emit_insn (gen_mul<mode>3 (div_est, num, rcp));
2628 emit_insn (gen_fma<mode>4_negop2 (fma2, div_est, denom, num));
2629 emit_insn (gen_fma<mode>4 (fma3, fma2, rcp, div_est));
2630 emit_insn (gen_fma<mode>4_negop2 (fma4, fma3, denom, num));
2631 emit_insn (gen_fma<mode>4 (operands[0], fma4, rcp, fma3));
2632 }
2633
2634 DONE;
2635 })
2636
2637 ;; }}}
2638 ;; {{{ Int/FP conversions
2639
2640 (define_mode_iterator CVT_FROM_MODE [HI SI HF SF DF])
2641 (define_mode_iterator CVT_TO_MODE [HI SI HF SF DF])
2642
2643 (define_mode_iterator VCVT_MODE [V64HI V64SI V64HF V64SF V64DF])
2644 (define_mode_iterator VCVT_FMODE [V64HF V64SF V64DF])
2645 (define_mode_iterator VCVT_IMODE [V64HI V64SI])
2646
2647 (define_code_iterator cvt_op [fix unsigned_fix
2648 float unsigned_float
2649 float_extend float_truncate])
2650 (define_code_attr cvt_name [(fix "fix_trunc") (unsigned_fix "fixuns_trunc")
2651 (float "float") (unsigned_float "floatuns")
2652 (float_extend "extend") (float_truncate "trunc")])
2653 (define_code_attr cvt_operands [(fix "%i0%i1") (unsigned_fix "%u0%i1")
2654 (float "%i0%i1") (unsigned_float "%i0%u1")
2655 (float_extend "%i0%i1")
2656 (float_truncate "%i0%i1")])
2657
2658 (define_insn "<cvt_name><CVT_FROM_MODE:mode><CVT_TO_MODE:mode>2"
2659 [(set (match_operand:CVT_TO_MODE 0 "register_operand" "= v")
2660 (cvt_op:CVT_TO_MODE
2661 (match_operand:CVT_FROM_MODE 1 "gcn_alu_operand" "vSvB")))]
2662 "gcn_valid_cvt_p (<CVT_FROM_MODE:MODE>mode, <CVT_TO_MODE:MODE>mode,
2663 <cvt_name>_cvt)"
2664 "v_cvt<cvt_operands>\t%0, %1"
2665 [(set_attr "type" "vop1")
2666 (set_attr "length" "8")])
2667
2668 (define_insn "<cvt_name><VCVT_MODE:mode><VCVT_FMODE:mode>2<exec>"
2669 [(set (match_operand:VCVT_FMODE 0 "register_operand" "= v")
2670 (cvt_op:VCVT_FMODE
2671 (match_operand:VCVT_MODE 1 "gcn_alu_operand" "vSvB")))]
2672 "gcn_valid_cvt_p (<VCVT_MODE:MODE>mode, <VCVT_FMODE:MODE>mode,
2673 <cvt_name>_cvt)"
2674 "v_cvt<cvt_operands>\t%0, %1"
2675 [(set_attr "type" "vop1")
2676 (set_attr "length" "8")])
2677
2678 (define_insn "<cvt_name><VCVT_FMODE:mode><VCVT_IMODE:mode>2<exec>"
2679 [(set (match_operand:VCVT_IMODE 0 "register_operand" "= v")
2680 (cvt_op:VCVT_IMODE
2681 (match_operand:VCVT_FMODE 1 "gcn_alu_operand" "vSvB")))]
2682 "gcn_valid_cvt_p (<VCVT_FMODE:MODE>mode, <VCVT_IMODE:MODE>mode,
2683 <cvt_name>_cvt)"
2684 "v_cvt<cvt_operands>\t%0, %1"
2685 [(set_attr "type" "vop1")
2686 (set_attr "length" "8")])
2687
2688 ;; }}}
2689 ;; {{{ Int/int conversions
2690
2691 (define_code_iterator zero_convert [truncate zero_extend])
2692 (define_code_attr convop [
2693 (sign_extend "extend")
2694 (zero_extend "zero_extend")
2695 (truncate "trunc")])
2696
2697 (define_insn "<convop><V_INT_1REG_ALT:mode><V_INT_1REG:mode>2<exec>"
2698 [(set (match_operand:V_INT_1REG 0 "register_operand" "=v")
2699 (zero_convert:V_INT_1REG
2700 (match_operand:V_INT_1REG_ALT 1 "gcn_alu_operand" " v")))]
2701 ""
2702 "v_mov_b32_sdwa\t%0, %1 dst_sel:<V_INT_1REG:sdwa> dst_unused:UNUSED_PAD src0_sel:<V_INT_1REG_ALT:sdwa>"
2703 [(set_attr "type" "vop_sdwa")
2704 (set_attr "length" "8")])
2705
2706 (define_insn "extend<V_INT_1REG_ALT:mode><V_INT_1REG:mode>2<exec>"
2707 [(set (match_operand:V_INT_1REG 0 "register_operand" "=v")
2708 (sign_extend:V_INT_1REG
2709 (match_operand:V_INT_1REG_ALT 1 "gcn_alu_operand" " v")))]
2710 ""
2711 "v_mov_b32_sdwa\t%0, sext(%1) src0_sel:<V_INT_1REG_ALT:sdwa>"
2712 [(set_attr "type" "vop_sdwa")
2713 (set_attr "length" "8")])
2714
2715 ;; GCC can already do these for scalar types, but not for vector types.
2716 ;; Unfortunately you can't just do SUBREG on a vector to select the low part,
2717 ;; so there must be a few tricks here.
2718
2719 (define_insn_and_split "trunc<vndi><mode>2"
2720 [(set (match_operand:V_INT_1REG 0 "register_operand" "=v")
2721 (truncate:V_INT_1REG
2722 (match_operand:<VnDI> 1 "gcn_alu_operand" " v")))]
2723 ""
2724 "#"
2725 "reload_completed"
2726 [(const_int 0)]
2727 {
2728 rtx inlo = gcn_operand_part (<VnDI>mode, operands[1], 0);
2729 rtx out = operands[0];
2730
2731 if (<MODE>mode != <VnSI>mode)
2732 emit_insn (gen_trunc<vnsi><mode>2 (out, inlo));
2733 else
2734 emit_move_insn (out, inlo);
2735 }
2736 [(set_attr "type" "vop2")
2737 (set_attr "length" "4")])
2738
2739 (define_insn_and_split "trunc<vndi><mode>2_exec"
2740 [(set (match_operand:V_INT_1REG 0 "register_operand" "=v")
2741 (vec_merge:V_INT_1REG
2742 (truncate:V_INT_1REG
2743 (match_operand:<VnDI> 1 "gcn_alu_operand" " v"))
2744 (match_operand:V_INT_1REG 2 "gcn_alu_or_unspec_operand" "U0")
2745 (match_operand:DI 3 "gcn_exec_operand" " e")))]
2746 ""
2747 "#"
2748 "reload_completed"
2749 [(const_int 0)]
2750 {
2751 rtx out = operands[0];
2752 rtx inlo = gcn_operand_part (<VnDI>mode, operands[1], 0);
2753 rtx merge = operands[2];
2754 rtx exec = operands[3];
2755
2756 if (<MODE>mode != <VnSI>mode)
2757 emit_insn (gen_trunc<vnsi><mode>2_exec (out, inlo, merge, exec));
2758 else
2759 emit_insn (gen_mov<mode>_exec (out, inlo, merge, exec));
2760 }
2761 [(set_attr "type" "vop2")
2762 (set_attr "length" "4")])
2763
2764 (define_insn_and_split "<convop><mode><vndi>2"
2765 [(set (match_operand:<VnDI> 0 "register_operand" "=v")
2766 (any_extend:<VnDI>
2767 (match_operand:V_INT_1REG 1 "gcn_alu_operand" " v")))]
2768 ""
2769 "#"
2770 "reload_completed"
2771 [(const_int 0)]
2772 {
2773 rtx outlo = gcn_operand_part (<VnDI>mode, operands[0], 0);
2774 rtx outhi = gcn_operand_part (<VnDI>mode, operands[0], 1);
2775 rtx in = operands[1];
2776
2777 if (<MODE>mode != <VnSI>mode)
2778 emit_insn (gen_<convop><mode><vnsi>2 (outlo, in));
2779 else
2780 emit_move_insn (outlo, in);
2781 if ('<su>' == 's')
2782 emit_insn (gen_ashr<vnsi>3 (outhi, outlo, GEN_INT (31)));
2783 else
2784 emit_insn (gen_vec_duplicate<vnsi> (outhi, const0_rtx));
2785 }
2786 [(set_attr "type" "mult")
2787 (set_attr "length" "12")])
2788
2789 (define_insn_and_split "<convop><mode><vndi>2_exec"
2790 [(set (match_operand:<VnDI> 0 "register_operand" "=v")
2791 (vec_merge:<VnDI>
2792 (any_extend:<VnDI>
2793 (match_operand:V_INT_1REG 1 "gcn_alu_operand" " v"))
2794 (match_operand:<VnDI> 2 "gcn_alu_or_unspec_operand" "U0")
2795 (match_operand:DI 3 "gcn_exec_operand" " e")))]
2796 ""
2797 "#"
2798 "reload_completed"
2799 [(const_int 0)]
2800 {
2801 rtx outlo = gcn_operand_part (<VnDI>mode, operands[0], 0);
2802 rtx outhi = gcn_operand_part (<VnDI>mode, operands[0], 1);
2803 rtx in = operands[1];
2804 rtx mergelo = gcn_operand_part (<VnDI>mode, operands[2], 0);
2805 rtx mergehi = gcn_operand_part (<VnDI>mode, operands[2], 1);
2806 rtx exec = operands[3];
2807
2808 if (<MODE>mode != <VnSI>mode)
2809 emit_insn (gen_<convop><mode><vnsi>2_exec (outlo, in, mergelo, exec));
2810 else
2811 emit_insn (gen_mov<mode>_exec (outlo, in, mergelo, exec));
2812 if ('<su>' == 's')
2813 emit_insn (gen_ashr<vnsi>3_exec (outhi, outlo, GEN_INT (31), mergehi,
2814 exec));
2815 else
2816 emit_insn (gen_vec_duplicate<vnsi>_exec (outhi, const0_rtx, mergehi,
2817 exec));
2818 }
2819 [(set_attr "type" "mult")
2820 (set_attr "length" "12")])
2821
2822 ;; }}}
2823 ;; {{{ Vector comparison/merge
2824
2825 (define_insn "vec_cmp<mode>di"
2826 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e, e,Sg,Sg")
2827 (match_operator:DI 1 "gcn_fp_compare_operator"
2828 [(match_operand:V_noQI 2 "gcn_alu_operand" "vSv, B,vSv, B, v,vA")
2829 (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v, v,vA, v")]))
2830 (clobber (match_scratch:DI 4 "= X, X, cV,cV, X, X"))]
2831 ""
2832 "@
2833 v_cmp%E1\tvcc, %2, %3
2834 v_cmp%E1\tvcc, %2, %3
2835 v_cmpx%E1\tvcc, %2, %3
2836 v_cmpx%E1\tvcc, %2, %3
2837 v_cmp%E1\t%0, %2, %3
2838 v_cmp%E1\t%0, %2, %3"
2839 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a")
2840 (set_attr "length" "4,8,4,8,8,8")])
2841
2842 (define_expand "vec_cmpu<mode>di"
2843 [(match_operand:DI 0 "register_operand")
2844 (match_operator 1 "gcn_compare_operator"
2845 [(match_operand:V_INT_noQI 2 "gcn_alu_operand")
2846 (match_operand:V_INT_noQI 3 "gcn_vop3_operand")])]
2847 ""
2848 {
2849 /* Unsigned comparisons use the same patterns as signed comparisons,
2850 except that they use unsigned operators (e.g. LTU vs LT).
2851 The '%E1' directive then does the Right Thing. */
2852 emit_insn (gen_vec_cmp<mode>di (operands[0], operands[1], operands[2],
2853 operands[3]));
2854 DONE;
2855 })
2856
2857 ; There's no instruction for 8-bit vector comparison, so we need to extend.
2858 (define_expand "vec_cmp<u><mode>di"
2859 [(match_operand:DI 0 "register_operand")
2860 (match_operator 1 "gcn_compare_operator"
2861 [(any_extend:<VnSI> (match_operand:V_QI 2 "gcn_alu_operand"))
2862 (any_extend:<VnSI> (match_operand:V_QI 3 "gcn_vop3_operand"))])]
2863 "can_create_pseudo_p ()"
2864 {
2865 rtx sitmp1 = gen_reg_rtx (<VnSI>mode);
2866 rtx sitmp2 = gen_reg_rtx (<VnSI>mode);
2867
2868 emit_insn (gen_<expander><mode><vnsi>2 (sitmp1, operands[2]));
2869 emit_insn (gen_<expander><mode><vnsi>2 (sitmp2, operands[3]));
2870 emit_insn (gen_vec_cmp<vnsi>di (operands[0], operands[1], sitmp1, sitmp2));
2871 DONE;
2872 })
2873
2874 (define_insn "vec_cmp<mode>di_exec"
2875 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e, e,Sg,Sg")
2876 (and:DI
2877 (match_operator 1 "gcn_fp_compare_operator"
2878 [(match_operand:V_noQI 2 "gcn_alu_operand" "vSv, B,vSv, B, v,vA")
2879 (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v, v,vA, v")])
2880 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e, e, e, e")))
2881 (clobber (match_scratch:DI 5 "= X, X, cV,cV, X, X"))]
2882 ""
2883 "@
2884 v_cmp%E1\tvcc, %2, %3
2885 v_cmp%E1\tvcc, %2, %3
2886 v_cmpx%E1\tvcc, %2, %3
2887 v_cmpx%E1\tvcc, %2, %3
2888 v_cmp%E1\t%0, %2, %3
2889 v_cmp%E1\t%0, %2, %3"
2890 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a")
2891 (set_attr "length" "4,8,4,8,8,8")])
2892
2893 (define_expand "vec_cmpu<mode>di_exec"
2894 [(match_operand:DI 0 "register_operand")
2895 (match_operator 1 "gcn_compare_operator"
2896 [(match_operand:V_INT_noQI 2 "gcn_alu_operand")
2897 (match_operand:V_INT_noQI 3 "gcn_vop3_operand")])
2898 (match_operand:DI 4 "gcn_exec_reg_operand")]
2899 ""
2900 {
2901 /* Unsigned comparisons use the same patterns as signed comparisons,
2902 except that they use unsigned operators (e.g. LTU vs LT).
2903 The '%E1' directive then does the Right Thing. */
2904 emit_insn (gen_vec_cmpu<mode>di_exec (operands[0], operands[1],
2905 operands[2], operands[3],
2906 operands[4]));
2907 DONE;
2908 })
2909
2910 (define_expand "vec_cmp<u><mode>di_exec"
2911 [(match_operand:DI 0 "register_operand")
2912 (match_operator 1 "gcn_compare_operator"
2913 [(any_extend:<VnSI> (match_operand:V_QI 2 "gcn_alu_operand"))
2914 (any_extend:<VnSI> (match_operand:V_QI 3 "gcn_vop3_operand"))])
2915 (match_operand:DI 4 "gcn_exec_reg_operand")]
2916 "can_create_pseudo_p ()"
2917 {
2918 rtx sitmp1 = gen_reg_rtx (<VnSI>mode);
2919 rtx sitmp2 = gen_reg_rtx (<VnSI>mode);
2920
2921 emit_insn (gen_<expander><mode><vnsi>2_exec (sitmp1, operands[2],
2922 operands[2], operands[4]));
2923 emit_insn (gen_<expander><mode><vnsi>2_exec (sitmp2, operands[3],
2924 operands[3], operands[4]));
2925 emit_insn (gen_vec_cmp<vnsi>di_exec (operands[0], operands[1], sitmp1,
2926 sitmp2, operands[4]));
2927 DONE;
2928 })
2929
2930 (define_insn "vec_cmp<mode>di_dup"
2931 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e,e,Sg")
2932 (match_operator:DI 1 "gcn_fp_compare_operator"
2933 [(vec_duplicate:V_noQI
2934 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand"
2935 " Sv, B,Sv,B, A"))
2936 (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v,v, v")]))
2937 (clobber (match_scratch:DI 4 "= X,X,cV,cV, X"))]
2938 ""
2939 "@
2940 v_cmp%E1\tvcc, %2, %3
2941 v_cmp%E1\tvcc, %2, %3
2942 v_cmpx%E1\tvcc, %2, %3
2943 v_cmpx%E1\tvcc, %2, %3
2944 v_cmp%E1\t%0, %2, %3"
2945 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a")
2946 (set_attr "length" "4,8,4,8,8")])
2947
2948 (define_insn "vec_cmp<mode>di_dup_exec"
2949 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e,e,Sg")
2950 (and:DI
2951 (match_operator 1 "gcn_fp_compare_operator"
2952 [(vec_duplicate:V_noQI
2953 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand"
2954 " Sv, B,Sv,B, A"))
2955 (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v,v, v")])
2956 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e,e, e")))
2957 (clobber (match_scratch:DI 5 "= X,X,cV,cV, X"))]
2958 ""
2959 "@
2960 v_cmp%E1\tvcc, %2, %3
2961 v_cmp%E1\tvcc, %2, %3
2962 v_cmpx%E1\tvcc, %2, %3
2963 v_cmpx%E1\tvcc, %2, %3
2964 v_cmp%E1\t%0, %2, %3"
2965 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a")
2966 (set_attr "length" "4,8,4,8,8")])
2967
2968 (define_expand "vcond_mask_<mode>di"
2969 [(parallel
2970 [(set (match_operand:V_ALL 0 "register_operand" "")
2971 (vec_merge:V_ALL
2972 (match_operand:V_ALL 1 "gcn_vop3_operand" "")
2973 (match_operand:V_ALL 2 "gcn_alu_operand" "")
2974 (match_operand:DI 3 "register_operand" "")))
2975 (clobber (scratch:<VnDI>))])]
2976 ""
2977 "")
2978
2979 (define_expand "vcond<V_ALL:mode><V_ALL_ALT:mode>"
2980 [(match_operand:V_ALL 0 "register_operand")
2981 (match_operand:V_ALL 1 "gcn_vop3_operand")
2982 (match_operand:V_ALL 2 "gcn_alu_operand")
2983 (match_operator 3 "gcn_fp_compare_operator"
2984 [(match_operand:V_ALL_ALT 4 "gcn_alu_operand")
2985 (match_operand:V_ALL_ALT 5 "gcn_vop3_operand")])]
2986 ""
2987 {
2988 rtx tmp = gen_reg_rtx (DImode);
2989 emit_insn (gen_vec_cmp<V_ALL_ALT:mode>di
2990 (tmp, operands[3], operands[4], operands[5]));
2991 emit_insn (gen_vcond_mask_<V_ALL:mode>di
2992 (operands[0], operands[1], operands[2], tmp));
2993 DONE;
2994 })
2995
2996 (define_expand "vcond<V_ALL:mode><V_ALL_ALT:mode>_exec"
2997 [(match_operand:V_ALL 0 "register_operand")
2998 (match_operand:V_ALL 1 "gcn_vop3_operand")
2999 (match_operand:V_ALL 2 "gcn_alu_operand")
3000 (match_operator 3 "gcn_fp_compare_operator"
3001 [(match_operand:V_ALL_ALT 4 "gcn_alu_operand")
3002 (match_operand:V_ALL_ALT 5 "gcn_vop3_operand")])
3003 (match_operand:DI 6 "gcn_exec_reg_operand" "e")]
3004 ""
3005 {
3006 rtx tmp = gen_reg_rtx (DImode);
3007 emit_insn (gen_vec_cmp<V_ALL_ALT:mode>di_exec
3008 (tmp, operands[3], operands[4], operands[5], operands[6]));
3009 emit_insn (gen_vcond_mask_<V_ALL:mode>di
3010 (operands[0], operands[1], operands[2], tmp));
3011 DONE;
3012 })
3013
3014 (define_expand "vcondu<V_ALL:mode><V_INT:mode>"
3015 [(match_operand:V_ALL 0 "register_operand")
3016 (match_operand:V_ALL 1 "gcn_vop3_operand")
3017 (match_operand:V_ALL 2 "gcn_alu_operand")
3018 (match_operator 3 "gcn_fp_compare_operator"
3019 [(match_operand:V_INT 4 "gcn_alu_operand")
3020 (match_operand:V_INT 5 "gcn_vop3_operand")])]
3021 ""
3022 {
3023 rtx tmp = gen_reg_rtx (DImode);
3024 emit_insn (gen_vec_cmpu<V_INT:mode>di
3025 (tmp, operands[3], operands[4], operands[5]));
3026 emit_insn (gen_vcond_mask_<V_ALL:mode>di
3027 (operands[0], operands[1], operands[2], tmp));
3028 DONE;
3029 })
3030
3031 (define_expand "vcondu<V_ALL:mode><V_INT:mode>_exec"
3032 [(match_operand:V_ALL 0 "register_operand")
3033 (match_operand:V_ALL 1 "gcn_vop3_operand")
3034 (match_operand:V_ALL 2 "gcn_alu_operand")
3035 (match_operator 3 "gcn_fp_compare_operator"
3036 [(match_operand:V_INT 4 "gcn_alu_operand")
3037 (match_operand:V_INT 5 "gcn_vop3_operand")])
3038 (match_operand:DI 6 "gcn_exec_reg_operand" "e")]
3039 ""
3040 {
3041 rtx tmp = gen_reg_rtx (DImode);
3042 emit_insn (gen_vec_cmpu<V_INT:mode>di_exec
3043 (tmp, operands[3], operands[4], operands[5], operands[6]));
3044 emit_insn (gen_vcond_mask_<V_ALL:mode>di
3045 (operands[0], operands[1], operands[2], tmp));
3046 DONE;
3047 })
3048
3049 ;; }}}
3050 ;; {{{ Fully masked loop support
3051
3052 (define_expand "while_ultsidi"
3053 [(match_operand:DI 0 "register_operand")
3054 (match_operand:SI 1 "")
3055 (match_operand:SI 2 "")
3056 (match_operand:SI 3 "")]
3057 ""
3058 {
3059 if (GET_CODE (operands[1]) != CONST_INT
3060 || GET_CODE (operands[2]) != CONST_INT)
3061 {
3062 rtx _0_1_2_3 = gen_rtx_REG (V64SImode, VGPR_REGNO (1));
3063 rtx tmp = _0_1_2_3;
3064 if (GET_CODE (operands[1]) != CONST_INT
3065 || INTVAL (operands[1]) != 0)
3066 {
3067 tmp = gen_reg_rtx (V64SImode);
3068 emit_insn (gen_addv64si3_dup (tmp, _0_1_2_3, operands[1]));
3069 }
3070 emit_insn (gen_vec_cmpv64sidi_dup (operands[0],
3071 gen_rtx_GT (VOIDmode, 0, 0),
3072 operands[2], tmp));
3073 }
3074 else
3075 {
3076 HOST_WIDE_INT diff = INTVAL (operands[2]) - INTVAL (operands[1]);
3077 HOST_WIDE_INT mask = (diff >= 64 ? -1
3078 : ~((unsigned HOST_WIDE_INT)-1 << diff));
3079 emit_move_insn (operands[0], gen_rtx_CONST_INT (VOIDmode, mask));
3080 }
3081 if (INTVAL (operands[3]) < 64)
3082 emit_insn (gen_anddi3 (operands[0], operands[0],
3083 gen_rtx_CONST_INT (VOIDmode,
3084 ~((unsigned HOST_WIDE_INT)-1
3085 << INTVAL (operands[3])))));
3086 DONE;
3087 })
3088
3089 (define_expand "maskload<mode>di"
3090 [(match_operand:V_ALL 0 "register_operand")
3091 (match_operand:V_ALL 1 "memory_operand")
3092 (match_operand 2 "")]
3093 ""
3094 {
3095 rtx exec = force_reg (DImode, operands[2]);
3096 rtx addr = gcn_expand_scalar_to_vector_address
3097 (<MODE>mode, exec, operands[1], gen_rtx_SCRATCH (<VnDI>mode));
3098 rtx as = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
3099 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
3100
3101 /* Masked lanes are required to hold zero. */
3102 emit_move_insn (operands[0], gcn_vec_constant (<MODE>mode, 0));
3103
3104 emit_insn (gen_gather<mode>_expr_exec (operands[0], addr, as, v,
3105 operands[0], exec));
3106 DONE;
3107 })
3108
3109 (define_expand "maskstore<mode>di"
3110 [(match_operand:V_ALL 0 "memory_operand")
3111 (match_operand:V_ALL 1 "register_operand")
3112 (match_operand 2 "")]
3113 ""
3114 {
3115 rtx exec = force_reg (DImode, operands[2]);
3116 rtx addr = gcn_expand_scalar_to_vector_address
3117 (<MODE>mode, exec, operands[0], gen_rtx_SCRATCH (<VnDI>mode));
3118 rtx as = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
3119 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
3120 emit_insn (gen_scatter<mode>_expr_exec (addr, operands[1], as, v, exec));
3121 DONE;
3122 })
3123
3124 (define_expand "mask_gather_load<mode><vnsi>"
3125 [(match_operand:V_ALL 0 "register_operand")
3126 (match_operand:DI 1 "register_operand")
3127 (match_operand:<VnSI> 2 "register_operand")
3128 (match_operand 3 "immediate_operand")
3129 (match_operand:SI 4 "gcn_alu_operand")
3130 (match_operand:DI 5 "")]
3131 ""
3132 {
3133 rtx exec = force_reg (DImode, operands[5]);
3134
3135 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1],
3136 operands[2], operands[4],
3137 INTVAL (operands[3]), exec);
3138
3139 /* Masked lanes are required to hold zero. */
3140 emit_move_insn (operands[0], gcn_vec_constant (<MODE>mode, 0));
3141
3142 if (GET_MODE (addr) == <VnDI>mode)
3143 emit_insn (gen_gather<mode>_insn_1offset_exec (operands[0], addr,
3144 const0_rtx, const0_rtx,
3145 const0_rtx, operands[0],
3146 exec));
3147 else
3148 emit_insn (gen_gather<mode>_insn_2offsets_exec (operands[0], operands[1],
3149 addr, const0_rtx,
3150 const0_rtx, const0_rtx,
3151 operands[0], exec));
3152 DONE;
3153 })
3154
3155 (define_expand "mask_scatter_store<mode><vnsi>"
3156 [(match_operand:DI 0 "register_operand")
3157 (match_operand:<VnSI> 1 "register_operand")
3158 (match_operand 2 "immediate_operand")
3159 (match_operand:SI 3 "gcn_alu_operand")
3160 (match_operand:V_ALL 4 "register_operand")
3161 (match_operand:DI 5 "")]
3162 ""
3163 {
3164 rtx exec = force_reg (DImode, operands[5]);
3165
3166 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0],
3167 operands[1], operands[3],
3168 INTVAL (operands[2]), exec);
3169
3170 if (GET_MODE (addr) == <VnDI>mode)
3171 emit_insn (gen_scatter<mode>_insn_1offset_exec (addr, const0_rtx,
3172 operands[4], const0_rtx,
3173 const0_rtx,
3174 exec));
3175 else
3176 emit_insn (gen_scatter<mode>_insn_2offsets_exec (operands[0], addr,
3177 const0_rtx, operands[4],
3178 const0_rtx, const0_rtx,
3179 exec));
3180 DONE;
3181 })
3182
3183 (define_code_iterator cond_op [plus minus mult])
3184
3185 (define_expand "cond_<expander><mode>"
3186 [(match_operand:V_ALL 0 "register_operand")
3187 (match_operand:DI 1 "register_operand")
3188 (cond_op:V_ALL
3189 (match_operand:V_ALL 2 "gcn_alu_operand")
3190 (match_operand:V_ALL 3 "gcn_alu_operand"))
3191 (match_operand:V_ALL 4 "register_operand")]
3192 ""
3193 {
3194 operands[1] = force_reg (DImode, operands[1]);
3195 operands[2] = force_reg (<MODE>mode, operands[2]);
3196
3197 emit_insn (gen_<expander><mode>3_exec (operands[0], operands[2],
3198 operands[3], operands[4],
3199 operands[1]));
3200 DONE;
3201 })
3202
3203 ;; TODO smin umin smax umax
3204 (define_code_iterator cond_bitop [and ior xor])
3205
3206 (define_expand "cond_<expander><mode>"
3207 [(match_operand:V_INT 0 "register_operand")
3208 (match_operand:DI 1 "register_operand")
3209 (cond_bitop:V_INT
3210 (match_operand:V_INT 2 "gcn_alu_operand")
3211 (match_operand:V_INT 3 "gcn_alu_operand"))
3212 (match_operand:V_INT 4 "register_operand")]
3213 ""
3214 {
3215 operands[1] = force_reg (DImode, operands[1]);
3216 operands[2] = force_reg (<MODE>mode, operands[2]);
3217
3218 emit_insn (gen_<expander><mode>3_exec (operands[0], operands[2],
3219 operands[3], operands[4],
3220 operands[1]));
3221 DONE;
3222 })
3223
3224 ;; }}}
3225 ;; {{{ Vector reductions
3226
3227 (define_int_iterator REDUC_UNSPEC [UNSPEC_SMIN_DPP_SHR UNSPEC_SMAX_DPP_SHR
3228 UNSPEC_UMIN_DPP_SHR UNSPEC_UMAX_DPP_SHR
3229 UNSPEC_PLUS_DPP_SHR
3230 UNSPEC_AND_DPP_SHR
3231 UNSPEC_IOR_DPP_SHR UNSPEC_XOR_DPP_SHR])
3232
3233 (define_int_iterator REDUC_2REG_UNSPEC [UNSPEC_PLUS_DPP_SHR
3234 UNSPEC_AND_DPP_SHR
3235 UNSPEC_IOR_DPP_SHR UNSPEC_XOR_DPP_SHR])
3236
3237 ; FIXME: Isn't there a better way of doing this?
3238 (define_int_attr reduc_unspec [(UNSPEC_SMIN_DPP_SHR "UNSPEC_SMIN_DPP_SHR")
3239 (UNSPEC_SMAX_DPP_SHR "UNSPEC_SMAX_DPP_SHR")
3240 (UNSPEC_UMIN_DPP_SHR "UNSPEC_UMIN_DPP_SHR")
3241 (UNSPEC_UMAX_DPP_SHR "UNSPEC_UMAX_DPP_SHR")
3242 (UNSPEC_PLUS_DPP_SHR "UNSPEC_PLUS_DPP_SHR")
3243 (UNSPEC_AND_DPP_SHR "UNSPEC_AND_DPP_SHR")
3244 (UNSPEC_IOR_DPP_SHR "UNSPEC_IOR_DPP_SHR")
3245 (UNSPEC_XOR_DPP_SHR "UNSPEC_XOR_DPP_SHR")])
3246
3247 (define_int_attr reduc_op [(UNSPEC_SMIN_DPP_SHR "smin")
3248 (UNSPEC_SMAX_DPP_SHR "smax")
3249 (UNSPEC_UMIN_DPP_SHR "umin")
3250 (UNSPEC_UMAX_DPP_SHR "umax")
3251 (UNSPEC_PLUS_DPP_SHR "plus")
3252 (UNSPEC_AND_DPP_SHR "and")
3253 (UNSPEC_IOR_DPP_SHR "ior")
3254 (UNSPEC_XOR_DPP_SHR "xor")])
3255
3256 (define_int_attr reduc_insn [(UNSPEC_SMIN_DPP_SHR "v_min%i0")
3257 (UNSPEC_SMAX_DPP_SHR "v_max%i0")
3258 (UNSPEC_UMIN_DPP_SHR "v_min%u0")
3259 (UNSPEC_UMAX_DPP_SHR "v_max%u0")
3260 (UNSPEC_PLUS_DPP_SHR "v_add%U0")
3261 (UNSPEC_AND_DPP_SHR "v_and%B0")
3262 (UNSPEC_IOR_DPP_SHR "v_or%B0")
3263 (UNSPEC_XOR_DPP_SHR "v_xor%B0")])
3264
3265 (define_expand "reduc_<reduc_op>_scal_<mode>"
3266 [(set (match_operand:<SCALAR_MODE> 0 "register_operand")
3267 (unspec:<SCALAR_MODE>
3268 [(match_operand:V_ALL 1 "register_operand")]
3269 REDUC_UNSPEC))]
3270 ""
3271 {
3272 rtx tmp = gcn_expand_reduc_scalar (<MODE>mode, operands[1],
3273 <reduc_unspec>);
3274
3275 /* The result of the reduction is in lane 63 of tmp. */
3276 emit_insn (gen_mov_from_lane63_<mode> (operands[0], tmp));
3277
3278 DONE;
3279 })
3280
3281 ;; Warning: This "-ffast-math" implementation converts in-order reductions
3282 ;; into associative reductions. It's also used where OpenMP or
3283 ;; OpenACC paralellization has already broken the in-order semantics.
3284 (define_expand "fold_left_plus_<mode>"
3285 [(match_operand:<SCALAR_MODE> 0 "register_operand")
3286 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand")
3287 (match_operand:V_FP 2 "gcn_alu_operand")]
3288 "can_create_pseudo_p ()
3289 && (flag_openacc || flag_openmp
3290 || flag_associative_math)"
3291 {
3292 rtx dest = operands[0];
3293 rtx scalar = operands[1];
3294 rtx vector = operands[2];
3295 rtx tmp = gen_reg_rtx (<SCALAR_MODE>mode);
3296
3297 emit_insn (gen_reduc_plus_scal_<mode> (tmp, vector));
3298 emit_insn (gen_add<scalar_mode>3 (dest, scalar, tmp));
3299 DONE;
3300 })
3301
3302 (define_insn "*<reduc_op>_dpp_shr_<mode>"
3303 [(set (match_operand:V_1REG 0 "register_operand" "=v")
3304 (unspec:V_1REG
3305 [(match_operand:V_1REG 1 "register_operand" "v")
3306 (match_operand:V_1REG 2 "register_operand" "v")
3307 (match_operand:SI 3 "const_int_operand" "n")]
3308 REDUC_UNSPEC))]
3309 ; GCN3 requires a carry out, GCN5 not
3310 "!(TARGET_GCN3 && SCALAR_INT_MODE_P (<SCALAR_MODE>mode)
3311 && <reduc_unspec> == UNSPEC_PLUS_DPP_SHR)"
3312 {
3313 return gcn_expand_dpp_shr_insn (<MODE>mode, "<reduc_insn>",
3314 <reduc_unspec>, INTVAL (operands[3]));
3315 }
3316 [(set_attr "type" "vop_dpp")
3317 (set_attr "length" "8")])
3318
3319 (define_insn_and_split "*<reduc_op>_dpp_shr_<mode>"
3320 [(set (match_operand:V_DI 0 "register_operand" "=v")
3321 (unspec:V_DI
3322 [(match_operand:V_DI 1 "register_operand" "v")
3323 (match_operand:V_DI 2 "register_operand" "v")
3324 (match_operand:SI 3 "const_int_operand" "n")]
3325 REDUC_2REG_UNSPEC))]
3326 ""
3327 "#"
3328 "reload_completed"
3329 [(set (match_dup 4)
3330 (unspec:<VnSI>
3331 [(match_dup 6) (match_dup 8) (match_dup 3)] REDUC_2REG_UNSPEC))
3332 (set (match_dup 5)
3333 (unspec:<VnSI>
3334 [(match_dup 7) (match_dup 9) (match_dup 3)] REDUC_2REG_UNSPEC))]
3335 {
3336 operands[4] = gcn_operand_part (<MODE>mode, operands[0], 0);
3337 operands[5] = gcn_operand_part (<MODE>mode, operands[0], 1);
3338 operands[6] = gcn_operand_part (<MODE>mode, operands[1], 0);
3339 operands[7] = gcn_operand_part (<MODE>mode, operands[1], 1);
3340 operands[8] = gcn_operand_part (<MODE>mode, operands[2], 0);
3341 operands[9] = gcn_operand_part (<MODE>mode, operands[2], 1);
3342 }
3343 [(set_attr "type" "vmult")
3344 (set_attr "length" "16")])
3345
3346 ; Special cases for addition.
3347
3348 (define_insn "*plus_carry_dpp_shr_<mode>"
3349 [(set (match_operand:V_INT_1REG 0 "register_operand" "=v")
3350 (unspec:V_INT_1REG
3351 [(match_operand:V_INT_1REG 1 "register_operand" "v")
3352 (match_operand:V_INT_1REG 2 "register_operand" "v")
3353 (match_operand:SI 3 "const_int_operand" "n")]
3354 UNSPEC_PLUS_CARRY_DPP_SHR))
3355 (clobber (reg:DI VCC_REG))]
3356 ""
3357 {
3358 return gcn_expand_dpp_shr_insn (<VnSI>mode, "v_add%^_u32",
3359 UNSPEC_PLUS_CARRY_DPP_SHR,
3360 INTVAL (operands[3]));
3361 }
3362 [(set_attr "type" "vop_dpp")
3363 (set_attr "length" "8")])
3364
3365 (define_insn "*plus_carry_in_dpp_shr_<mode>"
3366 [(set (match_operand:V_SI 0 "register_operand" "=v")
3367 (unspec:V_SI
3368 [(match_operand:V_SI 1 "register_operand" "v")
3369 (match_operand:V_SI 2 "register_operand" "v")
3370 (match_operand:SI 3 "const_int_operand" "n")
3371 (match_operand:DI 4 "register_operand" "cV")]
3372 UNSPEC_PLUS_CARRY_IN_DPP_SHR))
3373 (clobber (reg:DI VCC_REG))]
3374 ""
3375 {
3376 return gcn_expand_dpp_shr_insn (<MODE>mode, "v_addc%^_u32",
3377 UNSPEC_PLUS_CARRY_IN_DPP_SHR,
3378 INTVAL (operands[3]));
3379 }
3380 [(set_attr "type" "vop_dpp")
3381 (set_attr "length" "8")])
3382
3383 (define_insn_and_split "*plus_carry_dpp_shr_<mode>"
3384 [(set (match_operand:V_DI 0 "register_operand" "=v")
3385 (unspec:V_DI
3386 [(match_operand:V_DI 1 "register_operand" "v")
3387 (match_operand:V_DI 2 "register_operand" "v")
3388 (match_operand:SI 3 "const_int_operand" "n")]
3389 UNSPEC_PLUS_CARRY_DPP_SHR))
3390 (clobber (reg:DI VCC_REG))]
3391 ""
3392 "#"
3393 "reload_completed"
3394 [(parallel [(set (match_dup 4)
3395 (unspec:<VnSI>
3396 [(match_dup 6) (match_dup 8) (match_dup 3)]
3397 UNSPEC_PLUS_CARRY_DPP_SHR))
3398 (clobber (reg:DI VCC_REG))])
3399 (parallel [(set (match_dup 5)
3400 (unspec:<VnSI>
3401 [(match_dup 7) (match_dup 9) (match_dup 3) (reg:DI VCC_REG)]
3402 UNSPEC_PLUS_CARRY_IN_DPP_SHR))
3403 (clobber (reg:DI VCC_REG))])]
3404 {
3405 operands[4] = gcn_operand_part (<MODE>mode, operands[0], 0);
3406 operands[5] = gcn_operand_part (<MODE>mode, operands[0], 1);
3407 operands[6] = gcn_operand_part (<MODE>mode, operands[1], 0);
3408 operands[7] = gcn_operand_part (<MODE>mode, operands[1], 1);
3409 operands[8] = gcn_operand_part (<MODE>mode, operands[2], 0);
3410 operands[9] = gcn_operand_part (<MODE>mode, operands[2], 1);
3411 }
3412 [(set_attr "type" "vmult")
3413 (set_attr "length" "16")])
3414
3415 ; Instructions to move a scalar value from lane 63 of a vector register.
3416 (define_insn "mov_from_lane63_<mode>"
3417 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg,v")
3418 (unspec:<SCALAR_MODE>
3419 [(match_operand:V_1REG 1 "register_operand" " v,v")]
3420 UNSPEC_MOV_FROM_LANE63))]
3421 ""
3422 "@
3423 v_readlane_b32\t%0, %1, 63
3424 v_mov_b32\t%0, %1 wave_ror:1"
3425 [(set_attr "type" "vop3a,vop_dpp")
3426 (set_attr "exec" "none,*")
3427 (set_attr "length" "8")])
3428
3429 (define_insn "mov_from_lane63_<mode>"
3430 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg,v")
3431 (unspec:<SCALAR_MODE>
3432 [(match_operand:V_2REG 1 "register_operand" " v,v")]
3433 UNSPEC_MOV_FROM_LANE63))]
3434 ""
3435 "@
3436 v_readlane_b32\t%L0, %L1, 63\;v_readlane_b32\t%H0, %H1, 63
3437 * if (REGNO (operands[0]) <= REGNO (operands[1])) \
3438 return \"v_mov_b32\t%L0, %L1 wave_ror:1\;\" \
3439 \"v_mov_b32\t%H0, %H1 wave_ror:1\"; \
3440 else \
3441 return \"v_mov_b32\t%H0, %H1 wave_ror:1\;\" \
3442 \"v_mov_b32\t%L0, %L1 wave_ror:1\";"
3443 [(set_attr "type" "vop3a,vop_dpp")
3444 (set_attr "exec" "none,*")
3445 (set_attr "length" "8")])
3446
3447 ;; }}}
3448 ;; {{{ Miscellaneous
3449
3450 (define_expand "vec_series<mode>"
3451 [(match_operand:V_SI 0 "register_operand")
3452 (match_operand:SI 1 "gcn_alu_operand")
3453 (match_operand:SI 2 "gcn_alu_operand")]
3454 ""
3455 {
3456 rtx tmp = gen_reg_rtx (<MODE>mode);
3457 rtx v1 = gen_rtx_REG (<MODE>mode, VGPR_REGNO (1));
3458
3459 emit_insn (gen_mul<mode>3_dup (tmp, v1, operands[2]));
3460 emit_insn (gen_add<mode>3_dup (operands[0], tmp, operands[1]));
3461 DONE;
3462 })
3463
3464 (define_expand "vec_series<mode>"
3465 [(match_operand:V_DI 0 "register_operand")
3466 (match_operand:DI 1 "gcn_alu_operand")
3467 (match_operand:DI 2 "gcn_alu_operand")]
3468 ""
3469 {
3470 rtx tmp = gen_reg_rtx (<MODE>mode);
3471 rtx v1 = gen_rtx_REG (<VnSI>mode, VGPR_REGNO (1));
3472 rtx op1vec = gen_reg_rtx (<MODE>mode);
3473
3474 emit_insn (gen_mul<mode>3_zext_dup2 (tmp, v1, operands[2]));
3475 emit_insn (gen_vec_duplicate<mode> (op1vec, operands[1]));
3476 emit_insn (gen_add<mode>3 (operands[0], tmp, op1vec));
3477 DONE;
3478 })
3479
3480 ;; }}}