]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/gcn/gcn-valu.md
Update copyright years.
[thirdparty/gcc.git] / gcc / config / gcn / gcn-valu.md
CommitLineData
7adcbafe 1;; Copyright (C) 2016-2022 Free Software Foundation, Inc.
3d6275e3
AS
2
3;; This file is free software; you can redistribute it and/or modify it under
4;; the terms of the GNU General Public License as published by the Free
5;; Software Foundation; either version 3 of the License, or (at your option)
6;; any later version.
7
8;; This file is distributed in the hope that it will be useful, but WITHOUT
9;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11;; for more details.
12
13;; You should have received a copy of the GNU General Public License
14;; along with GCC; see the file COPYING3. If not see
15;; <http://www.gnu.org/licenses/>.
16
17;; {{{ Vector iterators
18
1165109b
AS
19; Vector modes for specific types
20; (This will make more sense when there are multiple vector sizes)
21(define_mode_iterator V_QI
22 [V64QI])
23(define_mode_iterator V_HI
24 [V64HI])
25(define_mode_iterator V_HF
26 [V64HF])
27(define_mode_iterator V_SI
28 [V64SI])
29(define_mode_iterator V_SF
30 [V64SF])
31(define_mode_iterator V_DI
32 [V64DI])
33(define_mode_iterator V_DF
34 [V64DF])
35
dc941ea9 36; Vector modes for sub-dword modes
03876953 37(define_mode_iterator V_QIHI
dc941ea9
AS
38 [V64QI V64HI])
39
3d6275e3 40; Vector modes for one vector register
03876953 41(define_mode_iterator V_1REG
3d6275e3
AS
42 [V64QI V64HI V64SI V64HF V64SF])
43
03876953 44(define_mode_iterator V_INT_1REG
2b99bed8 45 [V64QI V64HI V64SI])
03876953 46(define_mode_iterator V_INT_1REG_ALT
3d6275e3 47 [V64QI V64HI V64SI])
03876953
AS
48(define_mode_iterator V_FP_1REG
49 [V64HF V64SF])
3d6275e3
AS
50
51; Vector modes for two vector registers
03876953 52(define_mode_iterator V_2REG
3d6275e3
AS
53 [V64DI V64DF])
54
03876953
AS
55; Vector modes with native support
56(define_mode_iterator V_noQI
57 [V64HI V64HF V64SI V64SF V64DI V64DF])
58(define_mode_iterator V_noHI
59 [V64HF V64SI V64SF V64DI V64DF])
60
61(define_mode_iterator V_INT_noQI
62 [V64HI V64SI V64DI])
63
3d6275e3 64; All of above
03876953
AS
65(define_mode_iterator V_ALL
66 [V64QI V64HI V64HF V64SI V64SF V64DI V64DF])
67(define_mode_iterator V_ALL_ALT
68 [V64QI V64HI V64HF V64SI V64SF V64DI V64DF])
69
70(define_mode_iterator V_INT
71 [V64QI V64HI V64SI V64DI])
72(define_mode_iterator V_FP
73 [V64HF V64SF V64DF])
3d6275e3
AS
74
75(define_mode_attr scalar_mode
76 [(V64QI "qi") (V64HI "hi") (V64SI "si")
77 (V64HF "hf") (V64SF "sf") (V64DI "di") (V64DF "df")])
78
79(define_mode_attr SCALAR_MODE
80 [(V64QI "QI") (V64HI "HI") (V64SI "SI")
81 (V64HF "HF") (V64SF "SF") (V64DI "DI") (V64DF "DF")])
82
1165109b
AS
83(define_mode_attr vnsi
84 [(V64QI "v64si") (V64HI "v64si") (V64HF "v64si") (V64SI "v64si")
85 (V64SF "v64si") (V64DI "v64si") (V64DF "v64si")])
86
87(define_mode_attr VnSI
88 [(V64QI "V64SI") (V64HI "V64SI") (V64HF "V64SI") (V64SI "V64SI")
89 (V64SF "V64SI") (V64DI "V64SI") (V64DF "V64SI")])
90
91(define_mode_attr vndi
92 [(V64QI "v64di") (V64HI "v64di") (V64HF "v64di") (V64SI "v64di")
93 (V64SF "v64di") (V64DI "v64di") (V64DF "v64di")])
94
95(define_mode_attr VnDI
96 [(V64QI "V64DI") (V64HI "V64DI") (V64HF "V64DI") (V64SI "V64DI")
97 (V64SF "V64DI") (V64DI "V64DI") (V64DF "V64DI")])
98
3d66c777
AS
99(define_mode_attr sdwa [(V64QI "BYTE_0") (V64HI "WORD_0") (V64SI "DWORD")])
100
3d6275e3
AS
101;; }}}
102;; {{{ Substitutions
103
104(define_subst_attr "exec" "vec_merge"
105 "" "_exec")
106(define_subst_attr "exec_clobber" "vec_merge_with_clobber"
107 "" "_exec")
108(define_subst_attr "exec_vcc" "vec_merge_with_vcc"
109 "" "_exec")
110(define_subst_attr "exec_scatter" "scatter_store"
111 "" "_exec")
112
113(define_subst "vec_merge"
03876953
AS
114 [(set (match_operand:V_ALL 0)
115 (match_operand:V_ALL 1))]
3d6275e3
AS
116 ""
117 [(set (match_dup 0)
03876953 118 (vec_merge:V_ALL
3d6275e3 119 (match_dup 1)
03876953 120 (match_operand:V_ALL 3 "gcn_register_or_unspec_operand" "U0")
3d6275e3
AS
121 (match_operand:DI 4 "gcn_exec_reg_operand" "e")))])
122
123(define_subst "vec_merge_with_clobber"
03876953
AS
124 [(set (match_operand:V_ALL 0)
125 (match_operand:V_ALL 1))
3d6275e3
AS
126 (clobber (match_operand 2))]
127 ""
128 [(set (match_dup 0)
03876953 129 (vec_merge:V_ALL
3d6275e3 130 (match_dup 1)
03876953 131 (match_operand:V_ALL 3 "gcn_register_or_unspec_operand" "U0")
3d6275e3
AS
132 (match_operand:DI 4 "gcn_exec_reg_operand" "e")))
133 (clobber (match_dup 2))])
134
135(define_subst "vec_merge_with_vcc"
03876953
AS
136 [(set (match_operand:V_ALL 0)
137 (match_operand:V_ALL 1))
3d6275e3
AS
138 (set (match_operand:DI 2)
139 (match_operand:DI 3))]
140 ""
141 [(parallel
142 [(set (match_dup 0)
03876953 143 (vec_merge:V_ALL
3d6275e3 144 (match_dup 1)
03876953 145 (match_operand:V_ALL 4 "gcn_register_or_unspec_operand" "U0")
3d6275e3
AS
146 (match_operand:DI 5 "gcn_exec_reg_operand" "e")))
147 (set (match_dup 2)
148 (and:DI (match_dup 3)
149 (reg:DI EXEC_REG)))])])
150
151(define_subst "scatter_store"
152 [(set (mem:BLK (scratch))
153 (unspec:BLK
154 [(match_operand 0)
155 (match_operand 1)
156 (match_operand 2)
157 (match_operand 3)]
158 UNSPEC_SCATTER))]
159 ""
160 [(set (mem:BLK (scratch))
161 (unspec:BLK
162 [(match_dup 0)
163 (match_dup 1)
164 (match_dup 2)
165 (match_dup 3)
166 (match_operand:DI 4 "gcn_exec_reg_operand" "e")]
167 UNSPEC_SCATTER))])
168
169;; }}}
170;; {{{ Vector moves
171
172; This is the entry point for all vector register moves. Memory accesses can
173; come this way also, but will more usually use the reload_in/out,
174; gather/scatter, maskload/store, etc.
175
176(define_expand "mov<mode>"
03876953
AS
177 [(set (match_operand:V_ALL 0 "nonimmediate_operand")
178 (match_operand:V_ALL 1 "general_operand"))]
3d6275e3
AS
179 ""
180 {
181 if (MEM_P (operands[0]) && !lra_in_progress && !reload_completed)
182 {
183 operands[1] = force_reg (<MODE>mode, operands[1]);
1165109b 184 rtx scratch = gen_rtx_SCRATCH (<VnDI>mode);
3d6275e3
AS
185 rtx a = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
186 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
187 rtx expr = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
188 operands[0],
189 scratch);
190 emit_insn (gen_scatter<mode>_expr (expr, operands[1], a, v));
191 DONE;
192 }
193 else if (MEM_P (operands[1]) && !lra_in_progress && !reload_completed)
194 {
1165109b 195 rtx scratch = gen_rtx_SCRATCH (<VnDI>mode);
3d6275e3
AS
196 rtx a = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
197 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
198 rtx expr = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
199 operands[1],
200 scratch);
201 emit_insn (gen_gather<mode>_expr (operands[0], expr, a, v));
202 DONE;
203 }
204 else if ((MEM_P (operands[0]) || MEM_P (operands[1])))
205 {
206 gcc_assert (!reload_completed);
1165109b 207 rtx scratch = gen_reg_rtx (<VnDI>mode);
3d6275e3
AS
208 emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], scratch));
209 DONE;
210 }
211 })
212
213; A pseudo instruction that helps LRA use the "U0" constraint.
214
215(define_insn "mov<mode>_unspec"
03876953
AS
216 [(set (match_operand:V_ALL 0 "nonimmediate_operand" "=v")
217 (match_operand:V_ALL 1 "gcn_unspec_operand" " U"))]
3d6275e3
AS
218 ""
219 ""
220 [(set_attr "type" "unknown")
221 (set_attr "length" "0")])
222
223(define_insn "*mov<mode>"
03876953
AS
224 [(set (match_operand:V_1REG 0 "nonimmediate_operand" "=v,v")
225 (match_operand:V_1REG 1 "general_operand" "vA,B"))]
3d6275e3
AS
226 ""
227 "v_mov_b32\t%0, %1"
228 [(set_attr "type" "vop1,vop1")
229 (set_attr "length" "4,8")])
230
231(define_insn "mov<mode>_exec"
03876953
AS
232 [(set (match_operand:V_1REG 0 "nonimmediate_operand" "=v, v, v, v, v, m")
233 (vec_merge:V_1REG
234 (match_operand:V_1REG 1 "general_operand" "vA, B, v,vA, m, v")
b7886845 235 (match_operand:V_1REG 2 "gcn_alu_or_unspec_operand"
3d6275e3 236 "U0,U0,vA,vA,U0,U0")
b7886845 237 (match_operand:DI 3 "register_operand" " e, e,cV,Sv, e, e")))
1165109b 238 (clobber (match_scratch:<VnDI> 4 "=X, X, X, X,&v,&v"))]
3d6275e3
AS
239 "!MEM_P (operands[0]) || REG_P (operands[1])"
240 "@
241 v_mov_b32\t%0, %1
242 v_mov_b32\t%0, %1
b7886845
AS
243 v_cndmask_b32\t%0, %2, %1, vcc
244 v_cndmask_b32\t%0, %2, %1, %3
3d6275e3
AS
245 #
246 #"
247 [(set_attr "type" "vop1,vop1,vop2,vop3a,*,*")
248 (set_attr "length" "4,8,4,8,16,16")])
249
250; This variant does not accept an unspec, but does permit MEM
251; read/modify/write which is necessary for maskstore.
252
253;(define_insn "*mov<mode>_exec_match"
03876953
AS
254; [(set (match_operand:V_1REG 0 "nonimmediate_operand" "=v,v, v, m")
255; (vec_merge:V_1REG
256; (match_operand:V_1REG 1 "general_operand" "vA,B, m, v")
3d6275e3 257; (match_dup 0)
03876953 258; (match_operand:DI 2 "gcn_exec_reg_operand" " e,e, e, e")))
1165109b 259; (clobber (match_scratch:<VnDI> 3 "=X,X,&v,&v"))]
3d6275e3
AS
260; "!MEM_P (operands[0]) || REG_P (operands[1])"
261; "@
262; v_mov_b32\t%0, %1
263; v_mov_b32\t%0, %1
264; #
265; #"
266; [(set_attr "type" "vop1,vop1,*,*")
267; (set_attr "length" "4,8,16,16")])
268
269(define_insn "*mov<mode>"
03876953
AS
270 [(set (match_operand:V_2REG 0 "nonimmediate_operand" "=v")
271 (match_operand:V_2REG 1 "general_operand" "vDB"))]
3d6275e3
AS
272 ""
273 {
274 if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1]))
275 return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1";
276 else
277 return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1";
278 }
279 [(set_attr "type" "vmult")
280 (set_attr "length" "16")])
281
282(define_insn "mov<mode>_exec"
03876953
AS
283 [(set (match_operand:V_2REG 0 "nonimmediate_operand" "= v, v, v, v, m")
284 (vec_merge:V_2REG
285 (match_operand:V_2REG 1 "general_operand" "vDB, v0, v0, m, v")
b7886845 286 (match_operand:V_2REG 2 "gcn_alu_or_unspec_operand"
3d6275e3 287 " U0,vDA0,vDA0,U0,U0")
b7886845 288 (match_operand:DI 3 "register_operand" " e, cV, Sv, e, e")))
1165109b 289 (clobber (match_scratch:<VnDI> 4 "= X, X, X,&v,&v"))]
3d6275e3
AS
290 "!MEM_P (operands[0]) || REG_P (operands[1])"
291 {
292 if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1]))
293 switch (which_alternative)
294 {
295 case 0:
296 return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1";
297 case 1:
b7886845
AS
298 return "v_cndmask_b32\t%L0, %L2, %L1, vcc\;"
299 "v_cndmask_b32\t%H0, %H2, %H1, vcc";
3d6275e3 300 case 2:
b7886845
AS
301 return "v_cndmask_b32\t%L0, %L2, %L1, %3\;"
302 "v_cndmask_b32\t%H0, %H2, %H1, %3";
3d6275e3
AS
303 }
304 else
305 switch (which_alternative)
306 {
307 case 0:
308 return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1";
309 case 1:
b7886845
AS
310 return "v_cndmask_b32\t%H0, %H2, %H1, vcc\;"
311 "v_cndmask_b32\t%L0, %L2, %L1, vcc";
3d6275e3 312 case 2:
b7886845
AS
313 return "v_cndmask_b32\t%H0, %H2, %H1, %3\;"
314 "v_cndmask_b32\t%L0, %L2, %L1, %3";
3d6275e3
AS
315 }
316
317 return "#";
318 }
319 [(set_attr "type" "vmult,vmult,vmult,*,*")
320 (set_attr "length" "16,16,16,16,16")])
321
322; This variant does not accept an unspec, but does permit MEM
323; read/modify/write which is necessary for maskstore.
324
325;(define_insn "*mov<mode>_exec_match"
03876953
AS
326; [(set (match_operand:V_2REG 0 "nonimmediate_operand" "=v, v, m")
327; (vec_merge:V_2REG
328; (match_operand:V_2REG 1 "general_operand" "vDB, m, v")
3d6275e3 329; (match_dup 0)
03876953 330; (match_operand:DI 2 "gcn_exec_reg_operand" " e, e, e")))
1165109b 331; (clobber (match_scratch:<VnDI> 3 "=X,&v,&v"))]
3d6275e3
AS
332; "!MEM_P (operands[0]) || REG_P (operands[1])"
333; "@
334; * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \
335; return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \
336; else \
337; return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\";
338; #
339; #"
340; [(set_attr "type" "vmult,*,*")
341; (set_attr "length" "16,16,16")])
342
343; A SGPR-base load looks like:
344; <load> v, Sv
345;
346; There's no hardware instruction that corresponds to this, but vector base
347; addresses are placed in an SGPR because it is easier to add to a vector.
348; We also have a temporary vT, and the vector v1 holding numbered lanes.
349;
350; Rewrite as:
351; vT = v1 << log2(element-size)
352; vT += Sv
353; flat_load v, vT
354
355(define_insn "mov<mode>_sgprbase"
03876953
AS
356 [(set (match_operand:V_1REG 0 "nonimmediate_operand" "= v, v, v, m")
357 (unspec:V_1REG
358 [(match_operand:V_1REG 1 "general_operand" " vA,vB, m, v")]
3d6275e3 359 UNSPEC_SGPRBASE))
1165109b 360 (clobber (match_operand:<VnDI> 2 "register_operand" "=&v,&v,&v,&v"))]
3d6275e3
AS
361 "lra_in_progress || reload_completed"
362 "@
363 v_mov_b32\t%0, %1
364 v_mov_b32\t%0, %1
365 #
366 #"
367 [(set_attr "type" "vop1,vop1,*,*")
368 (set_attr "length" "4,8,12,12")])
369
370(define_insn "mov<mode>_sgprbase"
03876953
AS
371 [(set (match_operand:V_2REG 0 "nonimmediate_operand" "= v, v, m")
372 (unspec:V_2REG
373 [(match_operand:V_2REG 1 "general_operand" "vDB, m, v")]
3d6275e3 374 UNSPEC_SGPRBASE))
1165109b 375 (clobber (match_operand:<VnDI> 2 "register_operand" "=&v,&v,&v"))]
3d6275e3
AS
376 "lra_in_progress || reload_completed"
377 "@
378 * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \
379 return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \
380 else \
381 return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\";
382 #
383 #"
384 [(set_attr "type" "vmult,*,*")
385 (set_attr "length" "8,12,12")])
386
387; reload_in was once a standard name, but here it's only referenced by
388; gcn_secondary_reload. It allows a reload with a scratch register.
389
390(define_expand "reload_in<mode>"
03876953
AS
391 [(set (match_operand:V_ALL 0 "register_operand" "= v")
392 (match_operand:V_ALL 1 "memory_operand" " m"))
1165109b 393 (clobber (match_operand:<VnDI> 2 "register_operand" "=&v"))]
3d6275e3
AS
394 ""
395 {
396 emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], operands[2]));
397 DONE;
398 })
399
400; reload_out is similar to reload_in, above.
401
402(define_expand "reload_out<mode>"
03876953
AS
403 [(set (match_operand:V_ALL 0 "memory_operand" "= m")
404 (match_operand:V_ALL 1 "register_operand" " v"))
1165109b 405 (clobber (match_operand:<VnDI> 2 "register_operand" "=&v"))]
3d6275e3
AS
406 ""
407 {
408 emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], operands[2]));
409 DONE;
410 })
411
412; Expand scalar addresses into gather/scatter patterns
413
414(define_split
03876953
AS
415 [(set (match_operand:V_ALL 0 "memory_operand")
416 (unspec:V_ALL
417 [(match_operand:V_ALL 1 "general_operand")]
3d6275e3 418 UNSPEC_SGPRBASE))
1165109b 419 (clobber (match_scratch:<VnDI> 2))]
3d6275e3
AS
420 ""
421 [(set (mem:BLK (scratch))
422 (unspec:BLK [(match_dup 5) (match_dup 1) (match_dup 6) (match_dup 7)]
423 UNSPEC_SCATTER))]
424 {
425 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
426 operands[0],
427 operands[2]);
428 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
429 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
430 })
431
432(define_split
03876953
AS
433 [(set (match_operand:V_ALL 0 "memory_operand")
434 (vec_merge:V_ALL
435 (match_operand:V_ALL 1 "general_operand")
436 (match_operand:V_ALL 2 "")
3d6275e3 437 (match_operand:DI 3 "gcn_exec_reg_operand")))
1165109b 438 (clobber (match_scratch:<VnDI> 4))]
3d6275e3
AS
439 ""
440 [(set (mem:BLK (scratch))
441 (unspec:BLK [(match_dup 5) (match_dup 1)
442 (match_dup 6) (match_dup 7) (match_dup 3)]
443 UNSPEC_SCATTER))]
444 {
445 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode,
446 operands[3],
447 operands[0],
448 operands[4]);
449 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
450 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
451 })
452
453(define_split
03876953
AS
454 [(set (match_operand:V_ALL 0 "nonimmediate_operand")
455 (unspec:V_ALL
456 [(match_operand:V_ALL 1 "memory_operand")]
3d6275e3 457 UNSPEC_SGPRBASE))
1165109b 458 (clobber (match_scratch:<VnDI> 2))]
3d6275e3
AS
459 ""
460 [(set (match_dup 0)
03876953
AS
461 (unspec:V_ALL [(match_dup 5) (match_dup 6) (match_dup 7)
462 (mem:BLK (scratch))]
463 UNSPEC_GATHER))]
3d6275e3
AS
464 {
465 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
466 operands[1],
467 operands[2]);
468 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
469 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
470 })
471
472(define_split
03876953
AS
473 [(set (match_operand:V_ALL 0 "nonimmediate_operand")
474 (vec_merge:V_ALL
475 (match_operand:V_ALL 1 "memory_operand")
476 (match_operand:V_ALL 2 "")
3d6275e3 477 (match_operand:DI 3 "gcn_exec_reg_operand")))
1165109b 478 (clobber (match_scratch:<VnDI> 4))]
3d6275e3
AS
479 ""
480 [(set (match_dup 0)
03876953
AS
481 (vec_merge:V_ALL
482 (unspec:V_ALL [(match_dup 5) (match_dup 6) (match_dup 7)
483 (mem:BLK (scratch))]
484 UNSPEC_GATHER)
3d6275e3
AS
485 (match_dup 2)
486 (match_dup 3)))]
487 {
488 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode,
489 operands[3],
490 operands[1],
491 operands[4]);
492 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
493 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
494 })
495
496; TODO: Add zero/sign extending variants.
497
498;; }}}
499;; {{{ Lane moves
500
501; v_writelane and v_readlane work regardless of exec flags.
502; We allow source to be scratch.
503;
504; FIXME these should take A immediates
505
506(define_insn "*vec_set<mode>"
03876953
AS
507 [(set (match_operand:V_1REG 0 "register_operand" "= v")
508 (vec_merge:V_1REG
509 (vec_duplicate:V_1REG
510 (match_operand:<SCALAR_MODE> 1 "register_operand" " Sv"))
511 (match_operand:V_1REG 3 "gcn_register_or_unspec_operand" " U0")
3d6275e3 512 (ashift (const_int 1)
03876953 513 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
3d6275e3
AS
514 ""
515 "v_writelane_b32 %0, %1, %2"
516 [(set_attr "type" "vop3a")
517 (set_attr "length" "8")
518 (set_attr "exec" "none")
519 (set_attr "laneselect" "yes")])
520
521; FIXME: 64bit operations really should be splitters, but I am not sure how
522; to represent vertical subregs.
523(define_insn "*vec_set<mode>"
03876953
AS
524 [(set (match_operand:V_2REG 0 "register_operand" "= v")
525 (vec_merge:V_2REG
526 (vec_duplicate:V_2REG
527 (match_operand:<SCALAR_MODE> 1 "register_operand" " Sv"))
528 (match_operand:V_2REG 3 "gcn_register_or_unspec_operand" " U0")
3d6275e3 529 (ashift (const_int 1)
03876953 530 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
3d6275e3
AS
531 ""
532 "v_writelane_b32 %L0, %L1, %2\;v_writelane_b32 %H0, %H1, %2"
533 [(set_attr "type" "vmult")
534 (set_attr "length" "16")
535 (set_attr "exec" "none")
536 (set_attr "laneselect" "yes")])
537
538(define_expand "vec_set<mode>"
03876953
AS
539 [(set (match_operand:V_ALL 0 "register_operand")
540 (vec_merge:V_ALL
541 (vec_duplicate:V_ALL
3d6275e3
AS
542 (match_operand:<SCALAR_MODE> 1 "register_operand"))
543 (match_dup 0)
544 (ashift (const_int 1) (match_operand:SI 2 "gcn_alu_operand"))))]
545 "")
546
547(define_insn "*vec_set<mode>_1"
03876953
AS
548 [(set (match_operand:V_1REG 0 "register_operand" "=v")
549 (vec_merge:V_1REG
550 (vec_duplicate:V_1REG
551 (match_operand:<SCALAR_MODE> 1 "register_operand" "Sv"))
552 (match_operand:V_1REG 3 "gcn_register_or_unspec_operand" "U0")
553 (match_operand:SI 2 "const_int_operand" " i")))]
1165109b 554 "((unsigned) exact_log2 (INTVAL (operands[2])) < GET_MODE_NUNITS (<MODE>mode))"
3d6275e3
AS
555 {
556 operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));
557 return "v_writelane_b32 %0, %1, %2";
558 }
559 [(set_attr "type" "vop3a")
560 (set_attr "length" "8")
561 (set_attr "exec" "none")
562 (set_attr "laneselect" "yes")])
563
564(define_insn "*vec_set<mode>_1"
03876953
AS
565 [(set (match_operand:V_2REG 0 "register_operand" "=v")
566 (vec_merge:V_2REG
567 (vec_duplicate:V_2REG
568 (match_operand:<SCALAR_MODE> 1 "register_operand" "Sv"))
569 (match_operand:V_2REG 3 "gcn_register_or_unspec_operand" "U0")
570 (match_operand:SI 2 "const_int_operand" " i")))]
1165109b 571 "((unsigned) exact_log2 (INTVAL (operands[2])) < GET_MODE_NUNITS (<MODE>mode))"
3d6275e3
AS
572 {
573 operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));
574 return "v_writelane_b32 %L0, %L1, %2\;v_writelane_b32 %H0, %H1, %2";
575 }
576 [(set_attr "type" "vmult")
577 (set_attr "length" "16")
578 (set_attr "exec" "none")
579 (set_attr "laneselect" "yes")])
580
581(define_insn "vec_duplicate<mode><exec>"
03876953
AS
582 [(set (match_operand:V_1REG 0 "register_operand" "=v")
583 (vec_duplicate:V_1REG
584 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand" "SvB")))]
3d6275e3
AS
585 ""
586 "v_mov_b32\t%0, %1"
587 [(set_attr "type" "vop3a")
588 (set_attr "length" "8")])
589
590(define_insn "vec_duplicate<mode><exec>"
03876953
AS
591 [(set (match_operand:V_2REG 0 "register_operand" "= v")
592 (vec_duplicate:V_2REG
3d6275e3
AS
593 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand" "SvDB")))]
594 ""
595 "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1"
596 [(set_attr "type" "vop3a")
597 (set_attr "length" "16")])
598
599(define_insn "vec_extract<mode><scalar_mode>"
03876953 600 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg")
3d6275e3 601 (vec_select:<SCALAR_MODE>
03876953
AS
602 (match_operand:V_1REG 1 "register_operand" " v")
603 (parallel [(match_operand:SI 2 "gcn_alu_operand" "SvB")])))]
3d6275e3
AS
604 ""
605 "v_readlane_b32 %0, %1, %2"
606 [(set_attr "type" "vop3a")
607 (set_attr "length" "8")
608 (set_attr "exec" "none")
609 (set_attr "laneselect" "yes")])
610
611(define_insn "vec_extract<mode><scalar_mode>"
03876953 612 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=&Sg")
3d6275e3 613 (vec_select:<SCALAR_MODE>
03876953
AS
614 (match_operand:V_2REG 1 "register_operand" " v")
615 (parallel [(match_operand:SI 2 "gcn_alu_operand" " SvB")])))]
3d6275e3
AS
616 ""
617 "v_readlane_b32 %L0, %L1, %2\;v_readlane_b32 %H0, %H1, %2"
618 [(set_attr "type" "vmult")
619 (set_attr "length" "16")
620 (set_attr "exec" "none")
621 (set_attr "laneselect" "yes")])
622
b92d1124
AS
623(define_expand "extract_last_<mode>"
624 [(match_operand:<SCALAR_MODE> 0 "register_operand")
625 (match_operand:DI 1 "gcn_alu_operand")
03876953 626 (match_operand:V_ALL 2 "register_operand")]
b92d1124
AS
627 "can_create_pseudo_p ()"
628 {
629 rtx dst = operands[0];
630 rtx mask = operands[1];
631 rtx vect = operands[2];
632 rtx tmpreg = gen_reg_rtx (SImode);
633
634 emit_insn (gen_clzdi2 (tmpreg, mask));
635 emit_insn (gen_subsi3 (tmpreg, GEN_INT (63), tmpreg));
636 emit_insn (gen_vec_extract<mode><scalar_mode> (dst, vect, tmpreg));
637 DONE;
638 })
639
640(define_expand "fold_extract_last_<mode>"
641 [(match_operand:<SCALAR_MODE> 0 "register_operand")
642 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand")
643 (match_operand:DI 2 "gcn_alu_operand")
03876953 644 (match_operand:V_ALL 3 "register_operand")]
b92d1124
AS
645 "can_create_pseudo_p ()"
646 {
647 rtx dst = operands[0];
648 rtx default_value = operands[1];
649 rtx mask = operands[2];
650 rtx vect = operands[3];
651 rtx else_label = gen_label_rtx ();
652 rtx end_label = gen_label_rtx ();
653
654 rtx cond = gen_rtx_EQ (VOIDmode, mask, const0_rtx);
655 emit_jump_insn (gen_cbranchdi4 (cond, mask, const0_rtx, else_label));
656 emit_insn (gen_extract_last_<mode> (dst, mask, vect));
657 emit_jump_insn (gen_jump (end_label));
658 emit_barrier ();
659 emit_label (else_label);
660 emit_move_insn (dst, default_value);
661 emit_label (end_label);
662 DONE;
663 })
664
3d6275e3 665(define_expand "vec_init<mode><scalar_mode>"
03876953 666 [(match_operand:V_ALL 0 "register_operand")
3d6275e3
AS
667 (match_operand 1)]
668 ""
669 {
670 gcn_expand_vector_init (operands[0], operands[1]);
671 DONE;
672 })
673
674;; }}}
675;; {{{ Scatter / Gather
676
677;; GCN does not have an instruction for loading a vector from contiguous
678;; memory so *all* loads and stores are eventually converted to scatter
679;; or gather.
680;;
681;; GCC does not permit MEM to hold vectors of addresses, so we must use an
682;; unspec. The unspec formats are as follows:
683;;
1165109b 684;; (unspec:V??
3d6275e3
AS
685;; [(<address expression>)
686;; (<addr_space_t>)
687;; (<use_glc>)
688;; (mem:BLK (scratch))]
689;; UNSPEC_GATHER)
690;;
691;; (unspec:BLK
692;; [(<address expression>)
693;; (<source register>)
694;; (<addr_space_t>)
695;; (<use_glc>)
696;; (<exec>)]
697;; UNSPEC_SCATTER)
698;;
699;; - Loads are expected to be wrapped in a vec_merge, so do not need <exec>.
700;; - The mem:BLK does not contain any real information, but indicates that an
701;; unknown memory read is taking place. Stores are expected to use a similar
702;; mem:BLK outside the unspec.
703;; - The address space and glc (volatile) fields are there to replace the
704;; fields normally found in a MEM.
705;; - Multiple forms of address expression are supported, below.
aad32a00
AS
706;;
707;; TODO: implement combined gather and zero_extend, but only for -msram-ecc=on
3d6275e3 708
1165109b 709(define_expand "gather_load<mode><vnsi>"
03876953 710 [(match_operand:V_ALL 0 "register_operand")
3d6275e3 711 (match_operand:DI 1 "register_operand")
1165109b 712 (match_operand:<VnSI> 2 "register_operand")
3d6275e3
AS
713 (match_operand 3 "immediate_operand")
714 (match_operand:SI 4 "gcn_alu_operand")]
715 ""
716 {
717 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1],
718 operands[2], operands[4],
719 INTVAL (operands[3]), NULL);
720
1165109b 721 if (GET_MODE (addr) == <VnDI>mode)
3d6275e3
AS
722 emit_insn (gen_gather<mode>_insn_1offset (operands[0], addr, const0_rtx,
723 const0_rtx, const0_rtx));
724 else
725 emit_insn (gen_gather<mode>_insn_2offsets (operands[0], operands[1],
726 addr, const0_rtx, const0_rtx,
727 const0_rtx));
728 DONE;
729 })
730
3d6275e3
AS
731; Allow any address expression
732(define_expand "gather<mode>_expr<exec>"
03876953
AS
733 [(set (match_operand:V_ALL 0 "register_operand")
734 (unspec:V_ALL
3d6275e3
AS
735 [(match_operand 1 "")
736 (match_operand 2 "immediate_operand")
737 (match_operand 3 "immediate_operand")
738 (mem:BLK (scratch))]
739 UNSPEC_GATHER))]
740 ""
741 {})
742
743(define_insn "gather<mode>_insn_1offset<exec>"
1165109b 744 [(set (match_operand:V_ALL 0 "register_operand" "=v")
03876953 745 (unspec:V_ALL
1165109b
AS
746 [(plus:<VnDI> (match_operand:<VnDI> 1 "register_operand" " v")
747 (vec_duplicate:<VnDI>
748 (match_operand 2 "immediate_operand" " n")))
749 (match_operand 3 "immediate_operand" " n")
750 (match_operand 4 "immediate_operand" " n")
3d6275e3
AS
751 (mem:BLK (scratch))]
752 UNSPEC_GATHER))]
753 "(AS_FLAT_P (INTVAL (operands[3]))
754 && ((TARGET_GCN3 && INTVAL(operands[2]) == 0)
755 || ((unsigned HOST_WIDE_INT)INTVAL(operands[2]) < 0x1000)))
756 || (AS_GLOBAL_P (INTVAL (operands[3]))
757 && (((unsigned HOST_WIDE_INT)INTVAL(operands[2]) + 0x1000) < 0x2000))"
758 {
759 addr_space_t as = INTVAL (operands[3]);
760 const char *glc = INTVAL (operands[4]) ? " glc" : "";
761
762 static char buf[200];
763 if (AS_FLAT_P (as))
764 {
765 if (TARGET_GCN5_PLUS)
1e8f5d49 766 sprintf (buf, "flat_load%%o0\t%%0, %%1 offset:%%2%s\;s_waitcnt\t0",
3d6275e3
AS
767 glc);
768 else
1e8f5d49 769 sprintf (buf, "flat_load%%o0\t%%0, %%1%s\;s_waitcnt\t0", glc);
3d6275e3
AS
770 }
771 else if (AS_GLOBAL_P (as))
28dd61b7 772 sprintf (buf, "global_load%%o0\t%%0, %%1, off offset:%%2%s\;"
3d6275e3
AS
773 "s_waitcnt\tvmcnt(0)", glc);
774 else
775 gcc_unreachable ();
776
777 return buf;
778 }
779 [(set_attr "type" "flat")
780 (set_attr "length" "12")])
781
782(define_insn "gather<mode>_insn_1offset_ds<exec>"
1165109b 783 [(set (match_operand:V_ALL 0 "register_operand" "=v")
03876953 784 (unspec:V_ALL
1165109b
AS
785 [(plus:<VnSI> (match_operand:<VnSI> 1 "register_operand" " v")
786 (vec_duplicate:<VnSI>
787 (match_operand 2 "immediate_operand" " n")))
788 (match_operand 3 "immediate_operand" " n")
789 (match_operand 4 "immediate_operand" " n")
3d6275e3
AS
790 (mem:BLK (scratch))]
791 UNSPEC_GATHER))]
792 "(AS_ANY_DS_P (INTVAL (operands[3]))
793 && ((unsigned HOST_WIDE_INT)INTVAL(operands[2]) < 0x10000))"
794 {
795 addr_space_t as = INTVAL (operands[3]);
796 static char buf[200];
797 sprintf (buf, "ds_read%%b0\t%%0, %%1 offset:%%2%s\;s_waitcnt\tlgkmcnt(0)",
798 (AS_GDS_P (as) ? " gds" : ""));
799 return buf;
800 }
801 [(set_attr "type" "ds")
802 (set_attr "length" "12")])
803
804(define_insn "gather<mode>_insn_2offsets<exec>"
1165109b 805 [(set (match_operand:V_ALL 0 "register_operand" "=v")
03876953 806 (unspec:V_ALL
1165109b
AS
807 [(plus:<VnDI>
808 (plus:<VnDI>
809 (vec_duplicate:<VnDI>
810 (match_operand:DI 1 "register_operand" "Sv"))
811 (sign_extend:<VnDI>
812 (match_operand:<VnSI> 2 "register_operand" " v")))
813 (vec_duplicate:<VnDI> (match_operand 3 "immediate_operand" " n")))
814 (match_operand 4 "immediate_operand" " n")
815 (match_operand 5 "immediate_operand" " n")
3d6275e3
AS
816 (mem:BLK (scratch))]
817 UNSPEC_GATHER))]
818 "(AS_GLOBAL_P (INTVAL (operands[4]))
819 && (((unsigned HOST_WIDE_INT)INTVAL(operands[3]) + 0x1000) < 0x2000))"
820 {
821 addr_space_t as = INTVAL (operands[4]);
822 const char *glc = INTVAL (operands[5]) ? " glc" : "";
823
824 static char buf[200];
825 if (AS_GLOBAL_P (as))
826 {
827 /* Work around assembler bug in which a 64-bit register is expected,
828 but a 32-bit value would be correct. */
829 int reg = REGNO (operands[2]) - FIRST_VGPR_REG;
81c362c7
AS
830 if (HAVE_GCN_ASM_GLOBAL_LOAD_FIXED)
831 sprintf (buf, "global_load%%o0\t%%0, v%d, %%1 offset:%%3%s\;"
832 "s_waitcnt\tvmcnt(0)", reg, glc);
833 else
834 sprintf (buf, "global_load%%o0\t%%0, v[%d:%d], %%1 offset:%%3%s\;"
835 "s_waitcnt\tvmcnt(0)", reg, reg + 1, glc);
3d6275e3
AS
836 }
837 else
838 gcc_unreachable ();
839
840 return buf;
841 }
842 [(set_attr "type" "flat")
843 (set_attr "length" "12")])
844
1165109b 845(define_expand "scatter_store<mode><vnsi>"
3d6275e3 846 [(match_operand:DI 0 "register_operand")
1165109b 847 (match_operand:<VnSI> 1 "register_operand")
3d6275e3
AS
848 (match_operand 2 "immediate_operand")
849 (match_operand:SI 3 "gcn_alu_operand")
03876953 850 (match_operand:V_ALL 4 "register_operand")]
3d6275e3
AS
851 ""
852 {
853 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0],
854 operands[1], operands[3],
855 INTVAL (operands[2]), NULL);
856
1165109b 857 if (GET_MODE (addr) == <VnDI>mode)
3d6275e3
AS
858 emit_insn (gen_scatter<mode>_insn_1offset (addr, const0_rtx, operands[4],
859 const0_rtx, const0_rtx));
860 else
861 emit_insn (gen_scatter<mode>_insn_2offsets (operands[0], addr,
862 const0_rtx, operands[4],
863 const0_rtx, const0_rtx));
864 DONE;
865 })
866
3d6275e3
AS
867; Allow any address expression
868(define_expand "scatter<mode>_expr<exec_scatter>"
869 [(set (mem:BLK (scratch))
870 (unspec:BLK
1165109b 871 [(match_operand:<VnDI> 0 "")
03876953 872 (match_operand:V_ALL 1 "register_operand")
3d6275e3
AS
873 (match_operand 2 "immediate_operand")
874 (match_operand 3 "immediate_operand")]
875 UNSPEC_SCATTER))]
876 ""
877 {})
878
879(define_insn "scatter<mode>_insn_1offset<exec_scatter>"
880 [(set (mem:BLK (scratch))
881 (unspec:BLK
1165109b
AS
882 [(plus:<VnDI> (match_operand:<VnDI> 0 "register_operand" "v")
883 (vec_duplicate:<VnDI>
884 (match_operand 1 "immediate_operand" "n")))
885 (match_operand:V_ALL 2 "register_operand" "v")
886 (match_operand 3 "immediate_operand" "n")
887 (match_operand 4 "immediate_operand" "n")]
3d6275e3
AS
888 UNSPEC_SCATTER))]
889 "(AS_FLAT_P (INTVAL (operands[3]))
890 && (INTVAL(operands[1]) == 0
891 || (TARGET_GCN5_PLUS
892 && (unsigned HOST_WIDE_INT)INTVAL(operands[1]) < 0x1000)))
893 || (AS_GLOBAL_P (INTVAL (operands[3]))
894 && (((unsigned HOST_WIDE_INT)INTVAL(operands[1]) + 0x1000) < 0x2000))"
895 {
896 addr_space_t as = INTVAL (operands[3]);
897 const char *glc = INTVAL (operands[4]) ? " glc" : "";
898
899 static char buf[200];
900 if (AS_FLAT_P (as))
901 {
902 if (TARGET_GCN5_PLUS)
930c5599 903 sprintf (buf, "flat_store%%s2\t%%0, %%2 offset:%%1%s", glc);
3d6275e3 904 else
930c5599 905 sprintf (buf, "flat_store%%s2\t%%0, %%2%s", glc);
3d6275e3
AS
906 }
907 else if (AS_GLOBAL_P (as))
930c5599 908 sprintf (buf, "global_store%%s2\t%%0, %%2, off offset:%%1%s", glc);
3d6275e3
AS
909 else
910 gcc_unreachable ();
911
912 return buf;
913 }
914 [(set_attr "type" "flat")
915 (set_attr "length" "12")])
916
917(define_insn "scatter<mode>_insn_1offset_ds<exec_scatter>"
918 [(set (mem:BLK (scratch))
919 (unspec:BLK
1165109b
AS
920 [(plus:<VnSI> (match_operand:<VnSI> 0 "register_operand" "v")
921 (vec_duplicate:<VnSI>
922 (match_operand 1 "immediate_operand" "n")))
923 (match_operand:V_ALL 2 "register_operand" "v")
924 (match_operand 3 "immediate_operand" "n")
925 (match_operand 4 "immediate_operand" "n")]
3d6275e3
AS
926 UNSPEC_SCATTER))]
927 "(AS_ANY_DS_P (INTVAL (operands[3]))
928 && ((unsigned HOST_WIDE_INT)INTVAL(operands[1]) < 0x10000))"
929 {
930 addr_space_t as = INTVAL (operands[3]);
931 static char buf[200];
e929d65b 932 sprintf (buf, "ds_write%%b2\t%%0, %%2 offset:%%1%s\;s_waitcnt\tlgkmcnt(0)",
3d6275e3
AS
933 (AS_GDS_P (as) ? " gds" : ""));
934 return buf;
935 }
936 [(set_attr "type" "ds")
937 (set_attr "length" "12")])
938
939(define_insn "scatter<mode>_insn_2offsets<exec_scatter>"
940 [(set (mem:BLK (scratch))
941 (unspec:BLK
1165109b
AS
942 [(plus:<VnDI>
943 (plus:<VnDI>
944 (vec_duplicate:<VnDI>
945 (match_operand:DI 0 "register_operand" "Sv"))
946 (sign_extend:<VnDI>
947 (match_operand:<VnSI> 1 "register_operand" " v")))
948 (vec_duplicate:<VnDI> (match_operand 2 "immediate_operand" " n")))
949 (match_operand:V_ALL 3 "register_operand" " v")
950 (match_operand 4 "immediate_operand" " n")
951 (match_operand 5 "immediate_operand" " n")]
3d6275e3
AS
952 UNSPEC_SCATTER))]
953 "(AS_GLOBAL_P (INTVAL (operands[4]))
954 && (((unsigned HOST_WIDE_INT)INTVAL(operands[2]) + 0x1000) < 0x2000))"
955 {
956 addr_space_t as = INTVAL (operands[4]);
957 const char *glc = INTVAL (operands[5]) ? " glc" : "";
958
959 static char buf[200];
960 if (AS_GLOBAL_P (as))
961 {
962 /* Work around assembler bug in which a 64-bit register is expected,
963 but a 32-bit value would be correct. */
964 int reg = REGNO (operands[1]) - FIRST_VGPR_REG;
81c362c7
AS
965 if (HAVE_GCN_ASM_GLOBAL_LOAD_FIXED)
966 sprintf (buf, "global_store%%s3\tv%d, %%3, %%0 offset:%%2%s",
967 reg, glc);
968 else
969 sprintf (buf, "global_store%%s3\tv[%d:%d], %%3, %%0 offset:%%2%s",
970 reg, reg + 1, glc);
3d6275e3
AS
971 }
972 else
973 gcc_unreachable ();
974
975 return buf;
976 }
977 [(set_attr "type" "flat")
978 (set_attr "length" "12")])
979
980;; }}}
981;; {{{ Permutations
982
983(define_insn "ds_bpermute<mode>"
03876953
AS
984 [(set (match_operand:V_1REG 0 "register_operand" "=v")
985 (unspec:V_1REG
986 [(match_operand:V_1REG 2 "register_operand" " v")
1165109b 987 (match_operand:<VnSI> 1 "register_operand" " v")
03876953 988 (match_operand:DI 3 "gcn_exec_reg_operand" " e")]
3d6275e3
AS
989 UNSPEC_BPERMUTE))]
990 ""
991 "ds_bpermute_b32\t%0, %1, %2\;s_waitcnt\tlgkmcnt(0)"
992 [(set_attr "type" "vop2")
993 (set_attr "length" "12")])
994
995(define_insn_and_split "ds_bpermute<mode>"
03876953
AS
996 [(set (match_operand:V_2REG 0 "register_operand" "=&v")
997 (unspec:V_2REG
998 [(match_operand:V_2REG 2 "register_operand" " v0")
1165109b 999 (match_operand:<VnSI> 1 "register_operand" " v")
03876953 1000 (match_operand:DI 3 "gcn_exec_reg_operand" " e")]
3d6275e3
AS
1001 UNSPEC_BPERMUTE))]
1002 ""
1003 "#"
1004 "reload_completed"
1165109b
AS
1005 [(set (match_dup 4) (unspec:<VnSI>
1006 [(match_dup 6) (match_dup 1) (match_dup 3)]
1007 UNSPEC_BPERMUTE))
1008 (set (match_dup 5) (unspec:<VnSI>
1009 [(match_dup 7) (match_dup 1) (match_dup 3)]
1010 UNSPEC_BPERMUTE))]
3d6275e3
AS
1011 {
1012 operands[4] = gcn_operand_part (<MODE>mode, operands[0], 0);
1013 operands[5] = gcn_operand_part (<MODE>mode, operands[0], 1);
1014 operands[6] = gcn_operand_part (<MODE>mode, operands[2], 0);
1015 operands[7] = gcn_operand_part (<MODE>mode, operands[2], 1);
1016 }
1017 [(set_attr "type" "vmult")
1018 (set_attr "length" "24")])
1019
a5879399 1020(define_insn "@dpp_move<mode>"
03876953
AS
1021 [(set (match_operand:V_noHI 0 "register_operand" "=v")
1022 (unspec:V_noHI
1023 [(match_operand:V_noHI 1 "register_operand" " v")
1024 (match_operand:SI 2 "const_int_operand" " n")]
a5879399
AS
1025 UNSPEC_MOV_DPP_SHR))]
1026 ""
1027 {
1028 return gcn_expand_dpp_shr_insn (<MODE>mode, "v_mov_b32",
1029 UNSPEC_MOV_DPP_SHR, INTVAL (operands[2]));
1030 }
1031 [(set_attr "type" "vop_dpp")
1032 (set_attr "length" "16")])
1033
3d6275e3
AS
1034;; }}}
1035;; {{{ ALU special case: add/sub
1036
77f7566e 1037(define_insn "add<mode>3<exec_clobber>"
03876953
AS
1038 [(set (match_operand:V_INT_1REG 0 "register_operand" "= v")
1039 (plus:V_INT_1REG
1040 (match_operand:V_INT_1REG 1 "register_operand" "% v")
1041 (match_operand:V_INT_1REG 2 "gcn_alu_operand" "vSvB")))
3d6275e3
AS
1042 (clobber (reg:DI VCC_REG))]
1043 ""
1044 "v_add%^_u32\t%0, vcc, %2, %1"
1045 [(set_attr "type" "vop2")
1046 (set_attr "length" "8")])
1047
77f7566e 1048(define_insn "add<mode>3_dup<exec_clobber>"
03876953
AS
1049 [(set (match_operand:V_INT_1REG 0 "register_operand" "= v")
1050 (plus:V_INT_1REG
1051 (vec_duplicate:V_INT_1REG
1052 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand" "SvB"))
1053 (match_operand:V_INT_1REG 1 "register_operand" " v")))
3d6275e3
AS
1054 (clobber (reg:DI VCC_REG))]
1055 ""
1056 "v_add%^_u32\t%0, vcc, %2, %1"
1057 [(set_attr "type" "vop2")
1058 (set_attr "length" "8")])
1059
1165109b
AS
1060(define_insn "add<mode>3_vcc<exec_vcc>"
1061 [(set (match_operand:V_SI 0 "register_operand" "= v, v")
1062 (plus:V_SI
1063 (match_operand:V_SI 1 "register_operand" "% v, v")
1064 (match_operand:V_SI 2 "gcn_alu_operand" "vSvB,vSvB")))
1065 (set (match_operand:DI 3 "register_operand" "= cV, Sg")
1066 (ltu:DI (plus:V_SI (match_dup 1) (match_dup 2))
3d6275e3
AS
1067 (match_dup 1)))]
1068 ""
1069 "v_add%^_u32\t%0, %3, %2, %1"
1070 [(set_attr "type" "vop2,vop3b")
1071 (set_attr "length" "8")])
1072
1073; This pattern only changes the VCC bits when the corresponding lane is
1074; enabled, so the set must be described as an ior.
1075
1165109b
AS
1076(define_insn "add<mode>3_vcc_dup<exec_vcc>"
1077 [(set (match_operand:V_SI 0 "register_operand" "= v, v")
1078 (plus:V_SI
1079 (vec_duplicate:V_SI
1080 (match_operand:SI 1 "gcn_alu_operand" "SvB,SvB"))
1081 (match_operand:V_SI 2 "register_operand" " v, v")))
1082 (set (match_operand:DI 3 "register_operand" "=cV, Sg")
1083 (ltu:DI (plus:V_SI (vec_duplicate:V_SI (match_dup 2))
1084 (match_dup 1))
1085 (vec_duplicate:V_SI (match_dup 2))))]
3d6275e3
AS
1086 ""
1087 "v_add%^_u32\t%0, %3, %2, %1"
1088 [(set_attr "type" "vop2,vop3b")
1089 (set_attr "length" "8,8")])
1090
66b01cc3
AS
1091; v_addc does not accept an SGPR because the VCC read already counts as an
1092; SGPR use and the number of SGPR operands is limited to 1. It does not
1093; accept "B" immediate constants due to a related bus conflict.
3d6275e3 1094
1165109b
AS
1095(define_insn "addc<mode>3<exec_vcc>"
1096 [(set (match_operand:V_SI 0 "register_operand" "=v, v")
1097 (plus:V_SI
1098 (plus:V_SI
1099 (vec_merge:V_SI
1100 (vec_duplicate:V_SI (const_int 1))
1101 (vec_duplicate:V_SI (const_int 0))
66b01cc3 1102 (match_operand:DI 3 "register_operand" " cV,cVSv"))
1165109b
AS
1103 (match_operand:V_SI 1 "gcn_alu_operand" "% v, vA"))
1104 (match_operand:V_SI 2 "gcn_alu_operand" " vA, vA")))
66b01cc3 1105 (set (match_operand:DI 4 "register_operand" "=cV,cVSg")
1165109b
AS
1106 (ior:DI (ltu:DI (plus:V_SI
1107 (plus:V_SI
1108 (vec_merge:V_SI
1109 (vec_duplicate:V_SI (const_int 1))
1110 (vec_duplicate:V_SI (const_int 0))
3d6275e3
AS
1111 (match_dup 3))
1112 (match_dup 1))
1113 (match_dup 2))
1114 (match_dup 2))
1165109b
AS
1115 (ltu:DI (plus:V_SI
1116 (vec_merge:V_SI
1117 (vec_duplicate:V_SI (const_int 1))
1118 (vec_duplicate:V_SI (const_int 0))
3d6275e3
AS
1119 (match_dup 3))
1120 (match_dup 1))
1121 (match_dup 1))))]
1122 ""
66b01cc3 1123 "v_addc%^_u32\t%0, %4, %2, %1, %3"
3d6275e3
AS
1124 [(set_attr "type" "vop2,vop3b")
1125 (set_attr "length" "4,8")])
1126
77f7566e 1127(define_insn "sub<mode>3<exec_clobber>"
03876953
AS
1128 [(set (match_operand:V_INT_1REG 0 "register_operand" "= v, v")
1129 (minus:V_INT_1REG
1130 (match_operand:V_INT_1REG 1 "gcn_alu_operand" "vSvB, v")
1131 (match_operand:V_INT_1REG 2 "gcn_alu_operand" " v,vSvB")))
3d6275e3
AS
1132 (clobber (reg:DI VCC_REG))]
1133 ""
1134 "@
1135 v_sub%^_u32\t%0, vcc, %1, %2
1136 v_subrev%^_u32\t%0, vcc, %2, %1"
1137 [(set_attr "type" "vop2")
1138 (set_attr "length" "8,8")])
1139
1165109b
AS
1140(define_insn "sub<mode>3_vcc<exec_vcc>"
1141 [(set (match_operand:V_SI 0 "register_operand" "= v, v, v, v")
1142 (minus:V_SI
1143 (match_operand:V_SI 1 "gcn_alu_operand" "vSvB,vSvB, v, v")
1144 (match_operand:V_SI 2 "gcn_alu_operand" " v, v,vSvB,vSvB")))
1145 (set (match_operand:DI 3 "register_operand" "= cV, Sg, cV, Sg")
1146 (gtu:DI (minus:V_SI (match_dup 1) (match_dup 2))
3d6275e3
AS
1147 (match_dup 1)))]
1148 ""
1149 "@
1150 v_sub%^_u32\t%0, %3, %1, %2
1151 v_sub%^_u32\t%0, %3, %1, %2
1152 v_subrev%^_u32\t%0, %3, %2, %1
1153 v_subrev%^_u32\t%0, %3, %2, %1"
1154 [(set_attr "type" "vop2,vop3b,vop2,vop3b")
1155 (set_attr "length" "8")])
1156
66b01cc3
AS
1157; v_subb does not accept an SGPR because the VCC read already counts as an
1158; SGPR use and the number of SGPR operands is limited to 1. It does not
1159; accept "B" immediate constants due to a related bus conflict.
3d6275e3 1160
1165109b
AS
1161(define_insn "subc<mode>3<exec_vcc>"
1162 [(set (match_operand:V_SI 0 "register_operand" "= v, v, v, v")
1163 (minus:V_SI
1164 (minus:V_SI
1165 (vec_merge:V_SI
1166 (vec_duplicate:V_SI (const_int 1))
1167 (vec_duplicate:V_SI (const_int 0))
1168 (match_operand:DI 3 "gcn_alu_operand" " cV,cVSv,cV,cVSv"))
1169 (match_operand:V_SI 1 "gcn_alu_operand" " vA, vA, v, vA"))
1170 (match_operand:V_SI 2 "gcn_alu_operand" " v, vA,vA, vA")))
1171 (set (match_operand:DI 4 "register_operand" "=cV,cVSg,cV,cVSg")
1172 (ior:DI (gtu:DI (minus:V_SI (minus:V_SI
1173 (vec_merge:V_SI
1174 (vec_duplicate:V_SI (const_int 1))
1175 (vec_duplicate:V_SI (const_int 0))
1176 (match_dup 3))
3d6275e3
AS
1177 (match_dup 1))
1178 (match_dup 2))
1179 (match_dup 2))
1165109b
AS
1180 (ltu:DI (minus:V_SI (vec_merge:V_SI
1181 (vec_duplicate:V_SI (const_int 1))
1182 (vec_duplicate:V_SI (const_int 0))
1183 (match_dup 3))
1184 (match_dup 1))
3d6275e3
AS
1185 (match_dup 1))))]
1186 ""
1187 "@
1188 v_subb%^_u32\t%0, %4, %1, %2, %3
1189 v_subb%^_u32\t%0, %4, %1, %2, %3
1190 v_subbrev%^_u32\t%0, %4, %2, %1, %3
1191 v_subbrev%^_u32\t%0, %4, %2, %1, %3"
1192 [(set_attr "type" "vop2,vop3b,vop2,vop3b")
66b01cc3 1193 (set_attr "length" "4,8,4,8")])
3d6275e3 1194
1165109b
AS
1195(define_insn_and_split "add<mode>3"
1196 [(set (match_operand:V_DI 0 "register_operand" "= v")
1197 (plus:V_DI
1198 (match_operand:V_DI 1 "register_operand" "%vDb")
1199 (match_operand:V_DI 2 "gcn_alu_operand" " vDb")))
3d6275e3
AS
1200 (clobber (reg:DI VCC_REG))]
1201 ""
1202 "#"
1165109b
AS
1203 "gcn_can_split_p (<MODE>mode, operands[0])
1204 && gcn_can_split_p (<MODE>mode, operands[1])
1205 && gcn_can_split_p (<MODE>mode, operands[2])"
3d6275e3
AS
1206 [(const_int 0)]
1207 {
1208 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1165109b
AS
1209 emit_insn (gen_add<vnsi>3_vcc
1210 (gcn_operand_part (<MODE>mode, operands[0], 0),
1211 gcn_operand_part (<MODE>mode, operands[1], 0),
1212 gcn_operand_part (<MODE>mode, operands[2], 0),
3d6275e3 1213 vcc));
1165109b
AS
1214 emit_insn (gen_addc<vnsi>3
1215 (gcn_operand_part (<MODE>mode, operands[0], 1),
1216 gcn_operand_part (<MODE>mode, operands[1], 1),
1217 gcn_operand_part (<MODE>mode, operands[2], 1),
3d6275e3
AS
1218 vcc, vcc));
1219 DONE;
1220 }
1221 [(set_attr "type" "vmult")
1222 (set_attr "length" "8")])
1223
1165109b
AS
1224(define_insn_and_split "add<mode>3_exec"
1225 [(set (match_operand:V_DI 0 "register_operand" "= v")
1226 (vec_merge:V_DI
1227 (plus:V_DI
1228 (match_operand:V_DI 1 "register_operand" "%vDb")
1229 (match_operand:V_DI 2 "gcn_alu_operand" " vDb"))
1230 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0")
1231 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
3d6275e3
AS
1232 (clobber (reg:DI VCC_REG))]
1233 ""
1234 "#"
1165109b
AS
1235 "gcn_can_split_p (<MODE>mode, operands[0])
1236 && gcn_can_split_p (<MODE>mode, operands[1])
1237 && gcn_can_split_p (<MODE>mode, operands[2])
1238 && gcn_can_split_p (<MODE>mode, operands[4])"
3d6275e3
AS
1239 [(const_int 0)]
1240 {
1241 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1165109b
AS
1242 emit_insn (gen_add<vnsi>3_vcc_exec
1243 (gcn_operand_part (<MODE>mode, operands[0], 0),
1244 gcn_operand_part (<MODE>mode, operands[1], 0),
1245 gcn_operand_part (<MODE>mode, operands[2], 0),
3d6275e3 1246 vcc,
1165109b 1247 gcn_operand_part (<MODE>mode, operands[3], 0),
3d6275e3 1248 operands[4]));
1165109b
AS
1249 emit_insn (gen_addc<vnsi>3_exec
1250 (gcn_operand_part (<MODE>mode, operands[0], 1),
1251 gcn_operand_part (<MODE>mode, operands[1], 1),
1252 gcn_operand_part (<MODE>mode, operands[2], 1),
3d6275e3 1253 vcc, vcc,
1165109b 1254 gcn_operand_part (<MODE>mode, operands[3], 1),
3d6275e3
AS
1255 operands[4]));
1256 DONE;
1257 }
1258 [(set_attr "type" "vmult")
1259 (set_attr "length" "8")])
1260
1165109b
AS
1261(define_insn_and_split "sub<mode>3"
1262 [(set (match_operand:V_DI 0 "register_operand" "= v, v")
1263 (minus:V_DI
1264 (match_operand:V_DI 1 "gcn_alu_operand" "vDb, v")
1265 (match_operand:V_DI 2 "gcn_alu_operand" " v,vDb")))
3d6275e3
AS
1266 (clobber (reg:DI VCC_REG))]
1267 ""
1268 "#"
1165109b
AS
1269 "gcn_can_split_p (<MODE>mode, operands[0])
1270 && gcn_can_split_p (<MODE>mode, operands[1])
1271 && gcn_can_split_p (<MODE>mode, operands[2])"
3d6275e3
AS
1272 [(const_int 0)]
1273 {
1274 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1165109b
AS
1275 emit_insn (gen_sub<vnsi>3_vcc
1276 (gcn_operand_part (<MODE>mode, operands[0], 0),
1277 gcn_operand_part (<MODE>mode, operands[1], 0),
1278 gcn_operand_part (<MODE>mode, operands[2], 0),
3d6275e3 1279 vcc));
1165109b
AS
1280 emit_insn (gen_subc<vnsi>3
1281 (gcn_operand_part (<MODE>mode, operands[0], 1),
1282 gcn_operand_part (<MODE>mode, operands[1], 1),
1283 gcn_operand_part (<MODE>mode, operands[2], 1),
3d6275e3
AS
1284 vcc, vcc));
1285 DONE;
1286 }
1287 [(set_attr "type" "vmult")
d54fc770 1288 (set_attr "length" "8")])
3d6275e3 1289
1165109b
AS
1290(define_insn_and_split "sub<mode>3_exec"
1291 [(set (match_operand:V_DI 0 "register_operand" "= v, v")
1292 (vec_merge:V_DI
1293 (minus:V_DI
1294 (match_operand:V_DI 1 "gcn_alu_operand" "vSvB, v")
1295 (match_operand:V_DI 2 "gcn_alu_operand" " v,vSvB"))
1296 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0, U0")
3abfd4f3 1297 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))
3d6275e3
AS
1298 (clobber (reg:DI VCC_REG))]
1299 "register_operand (operands[1], VOIDmode)
1300 || register_operand (operands[2], VOIDmode)"
1301 "#"
1165109b
AS
1302 "gcn_can_split_p (<MODE>mode, operands[0])
1303 && gcn_can_split_p (<MODE>mode, operands[1])
1304 && gcn_can_split_p (<MODE>mode, operands[2])
1305 && gcn_can_split_p (<MODE>mode, operands[3])"
3d6275e3
AS
1306 [(const_int 0)]
1307 {
1308 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1165109b
AS
1309 emit_insn (gen_sub<vnsi>3_vcc_exec
1310 (gcn_operand_part (<MODE>mode, operands[0], 0),
1311 gcn_operand_part (<MODE>mode, operands[1], 0),
1312 gcn_operand_part (<MODE>mode, operands[2], 0),
3d6275e3 1313 vcc,
1165109b 1314 gcn_operand_part (<MODE>mode, operands[3], 0),
3d6275e3 1315 operands[4]));
1165109b
AS
1316 emit_insn (gen_subc<vnsi>3_exec
1317 (gcn_operand_part (<MODE>mode, operands[0], 1),
1318 gcn_operand_part (<MODE>mode, operands[1], 1),
1319 gcn_operand_part (<MODE>mode, operands[2], 1),
3d6275e3 1320 vcc, vcc,
1165109b 1321 gcn_operand_part (<MODE>mode, operands[3], 1),
3d6275e3
AS
1322 operands[4]));
1323 DONE;
1324 }
1325 [(set_attr "type" "vmult")
d54fc770 1326 (set_attr "length" "8")])
3d6275e3 1327
1165109b
AS
1328(define_insn_and_split "add<mode>3_zext"
1329 [(set (match_operand:V_DI 0 "register_operand" "= v, v")
1330 (plus:V_DI
1331 (zero_extend:V_DI
1332 (match_operand:<VnSI> 1 "gcn_alu_operand" " vA, vB"))
1333 (match_operand:V_DI 2 "gcn_alu_operand" "vDb,vDA")))
3d6275e3
AS
1334 (clobber (reg:DI VCC_REG))]
1335 ""
1336 "#"
1165109b
AS
1337 "gcn_can_split_p (<MODE>mode, operands[0])
1338 && gcn_can_split_p (<MODE>mode, operands[2])"
3d6275e3
AS
1339 [(const_int 0)]
1340 {
1341 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1165109b
AS
1342 emit_insn (gen_add<vnsi>3_vcc
1343 (gcn_operand_part (<MODE>mode, operands[0], 0),
3d6275e3 1344 operands[1],
1165109b 1345 gcn_operand_part (<MODE>mode, operands[2], 0),
3d6275e3 1346 vcc));
1165109b
AS
1347 emit_insn (gen_addc<vnsi>3
1348 (gcn_operand_part (<MODE>mode, operands[0], 1),
1349 gcn_operand_part (<MODE>mode, operands[2], 1),
3d6275e3
AS
1350 const0_rtx, vcc, vcc));
1351 DONE;
1352 }
1353 [(set_attr "type" "vmult")
66b01cc3 1354 (set_attr "length" "8")])
3d6275e3 1355
1165109b
AS
1356(define_insn_and_split "add<mode>3_zext_exec"
1357 [(set (match_operand:V_DI 0 "register_operand" "= v, v")
1358 (vec_merge:V_DI
1359 (plus:V_DI
1360 (zero_extend:V_DI
1361 (match_operand:<VnSI> 1 "gcn_alu_operand" " vA, vB"))
1362 (match_operand:V_DI 2 "gcn_alu_operand" "vDb,vDA"))
1363 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0, U0")
1364 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))
3d6275e3
AS
1365 (clobber (reg:DI VCC_REG))]
1366 ""
1367 "#"
1165109b
AS
1368 "gcn_can_split_p (<MODE>mode, operands[0])
1369 && gcn_can_split_p (<MODE>mode, operands[2])
1370 && gcn_can_split_p (<MODE>mode, operands[3])"
3d6275e3
AS
1371 [(const_int 0)]
1372 {
1373 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1165109b
AS
1374 emit_insn (gen_add<vnsi>3_vcc_exec
1375 (gcn_operand_part (<MODE>mode, operands[0], 0),
3d6275e3 1376 operands[1],
1165109b 1377 gcn_operand_part (<MODE>mode, operands[2], 0),
3d6275e3 1378 vcc,
1165109b 1379 gcn_operand_part (<MODE>mode, operands[3], 0),
3d6275e3 1380 operands[4]));
1165109b
AS
1381 emit_insn (gen_addc<vnsi>3_exec
1382 (gcn_operand_part (<MODE>mode, operands[0], 1),
1383 gcn_operand_part (<MODE>mode, operands[2], 1),
3d6275e3 1384 const0_rtx, vcc, vcc,
1165109b 1385 gcn_operand_part (<MODE>mode, operands[3], 1),
3d6275e3
AS
1386 operands[4]));
1387 DONE;
1388 }
1389 [(set_attr "type" "vmult")
66b01cc3 1390 (set_attr "length" "8")])
3d6275e3 1391
75d0b3d7 1392(define_insn_and_split "add<mode>3_vcc_zext_dup"
961c2aac 1393 [(set (match_operand:V_DI 0 "register_operand" "= v, v")
1165109b
AS
1394 (plus:V_DI
1395 (zero_extend:V_DI
1396 (vec_duplicate:<VnSI>
961c2aac
AS
1397 (match_operand:SI 1 "gcn_alu_operand" " BSv, ASv")))
1398 (match_operand:V_DI 2 "gcn_alu_operand" " vDA, vDb")))
1399 (set (match_operand:DI 3 "register_operand" "=&SgcV,&SgcV")
75d0b3d7
AS
1400 (ltu:DI (plus:V_DI
1401 (zero_extend:V_DI (vec_duplicate:<VnSI> (match_dup 1)))
1402 (match_dup 2))
1403 (match_dup 1)))]
3d6275e3
AS
1404 ""
1405 "#"
1165109b
AS
1406 "gcn_can_split_p (<MODE>mode, operands[0])
1407 && gcn_can_split_p (<MODE>mode, operands[2])"
3d6275e3
AS
1408 [(const_int 0)]
1409 {
1165109b
AS
1410 emit_insn (gen_add<vnsi>3_vcc_dup
1411 (gcn_operand_part (<MODE>mode, operands[0], 0),
3d6275e3 1412 gcn_operand_part (DImode, operands[1], 0),
1165109b 1413 gcn_operand_part (<MODE>mode, operands[2], 0),
75d0b3d7 1414 operands[3]));
1165109b
AS
1415 emit_insn (gen_addc<vnsi>3
1416 (gcn_operand_part (<MODE>mode, operands[0], 1),
1417 gcn_operand_part (<MODE>mode, operands[2], 1),
75d0b3d7 1418 const0_rtx, operands[3], operands[3]));
3d6275e3
AS
1419 DONE;
1420 }
1421 [(set_attr "type" "vmult")
1422 (set_attr "length" "8")])
1423
75d0b3d7
AS
1424(define_expand "add<mode>3_zext_dup"
1425 [(match_operand:V_DI 0 "register_operand")
1426 (match_operand:SI 1 "gcn_alu_operand")
1427 (match_operand:V_DI 2 "gcn_alu_operand")]
1428 ""
1429 {
1430 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1431 emit_insn (gen_add<mode>3_vcc_zext_dup (operands[0], operands[1],
1432 operands[2], vcc));
1433 DONE;
1434 })
1435
1436(define_insn_and_split "add<mode>3_vcc_zext_dup_exec"
961c2aac 1437 [(set (match_operand:V_DI 0 "register_operand" "= v, v")
1165109b
AS
1438 (vec_merge:V_DI
1439 (plus:V_DI
1440 (zero_extend:V_DI
1441 (vec_duplicate:<VnSI>
961c2aac
AS
1442 (match_operand:SI 1 "gcn_alu_operand" " ASv, BSv")))
1443 (match_operand:V_DI 2 "gcn_alu_operand" " vDb, vDA"))
1444 (match_operand:V_DI 4 "gcn_register_or_unspec_operand" " U0, U0")
1445 (match_operand:DI 5 "gcn_exec_reg_operand" " e, e")))
1446 (set (match_operand:DI 3 "register_operand" "=&SgcV,&SgcV")
75d0b3d7
AS
1447 (and:DI
1448 (ltu:DI (plus:V_DI
1449 (zero_extend:V_DI (vec_duplicate:<VnSI> (match_dup 1)))
1450 (match_dup 2))
1451 (match_dup 1))
1452 (match_dup 5)))]
3d6275e3
AS
1453 ""
1454 "#"
1165109b
AS
1455 "gcn_can_split_p (<MODE>mode, operands[0])
1456 && gcn_can_split_p (<MODE>mode, operands[2])
75d0b3d7 1457 && gcn_can_split_p (<MODE>mode, operands[4])"
3d6275e3
AS
1458 [(const_int 0)]
1459 {
1165109b
AS
1460 emit_insn (gen_add<vnsi>3_vcc_dup_exec
1461 (gcn_operand_part (<MODE>mode, operands[0], 0),
3d6275e3 1462 gcn_operand_part (DImode, operands[1], 0),
1165109b 1463 gcn_operand_part (<MODE>mode, operands[2], 0),
75d0b3d7
AS
1464 operands[3],
1465 gcn_operand_part (<MODE>mode, operands[4], 0),
1466 operands[5]));
1165109b
AS
1467 emit_insn (gen_addc<vnsi>3_exec
1468 (gcn_operand_part (<MODE>mode, operands[0], 1),
1469 gcn_operand_part (<MODE>mode, operands[2], 1),
75d0b3d7
AS
1470 const0_rtx, operands[3], operands[3],
1471 gcn_operand_part (<MODE>mode, operands[4], 1),
1472 operands[5]));
3d6275e3
AS
1473 DONE;
1474 }
1475 [(set_attr "type" "vmult")
1476 (set_attr "length" "8")])
1477
75d0b3d7
AS
1478(define_expand "add<mode>3_zext_dup_exec"
1479 [(match_operand:V_DI 0 "register_operand")
1480 (match_operand:SI 1 "gcn_alu_operand")
1481 (match_operand:V_DI 2 "gcn_alu_operand")
1482 (match_operand:V_DI 3 "gcn_register_or_unspec_operand")
1483 (match_operand:DI 4 "gcn_exec_reg_operand")]
1484 ""
1485 {
1486 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1487 emit_insn (gen_add<mode>3_vcc_zext_dup_exec (operands[0], operands[1],
1488 operands[2], vcc, operands[3],
1489 operands[4]));
1490 DONE;
1491 })
1492
1493(define_insn_and_split "add<mode>3_vcc_zext_dup2"
961c2aac 1494 [(set (match_operand:V_DI 0 "register_operand" "= v")
1165109b
AS
1495 (plus:V_DI
1496 (zero_extend:V_DI (match_operand:<VnSI> 1 "gcn_alu_operand" " vA"))
75d0b3d7 1497 (vec_duplicate:V_DI (match_operand:DI 2 "gcn_alu_operand" " DbSv"))))
961c2aac 1498 (set (match_operand:DI 3 "register_operand" "=&SgcV")
75d0b3d7
AS
1499 (ltu:DI (plus:V_DI
1500 (zero_extend:V_DI (match_dup 1))
1501 (vec_duplicate:V_DI (match_dup 2)))
1502 (match_dup 1)))]
3d6275e3
AS
1503 ""
1504 "#"
1165109b 1505 "gcn_can_split_p (<MODE>mode, operands[0])"
3d6275e3
AS
1506 [(const_int 0)]
1507 {
1165109b
AS
1508 emit_insn (gen_add<vnsi>3_vcc_dup
1509 (gcn_operand_part (<MODE>mode, operands[0], 0),
3d6275e3
AS
1510 gcn_operand_part (DImode, operands[2], 0),
1511 operands[1],
75d0b3d7 1512 operands[3]));
1165109b
AS
1513 rtx dsthi = gcn_operand_part (<MODE>mode, operands[0], 1);
1514 emit_insn (gen_vec_duplicate<vnsi>
3d6275e3 1515 (dsthi, gcn_operand_part (DImode, operands[2], 1)));
75d0b3d7
AS
1516 emit_insn (gen_addc<vnsi>3 (dsthi, dsthi, const0_rtx, operands[3],
1517 operands[3]));
3d6275e3
AS
1518 DONE;
1519 }
1520 [(set_attr "type" "vmult")
1521 (set_attr "length" "8")])
1522
75d0b3d7
AS
1523(define_expand "add<mode>3_zext_dup2"
1524 [(match_operand:V_DI 0 "register_operand")
1525 (match_operand:<VnSI> 1 "gcn_alu_operand")
1526 (match_operand:DI 2 "gcn_alu_operand")]
1527 ""
1528 {
1529 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1530 emit_insn (gen_add<mode>3_vcc_zext_dup2 (operands[0], operands[1],
1531 operands[2], vcc));
1532 DONE;
1533 })
1534
1535(define_insn_and_split "add<mode>3_vcc_zext_dup2_exec"
961c2aac 1536 [(set (match_operand:V_DI 0 "register_operand" "= v")
1165109b
AS
1537 (vec_merge:V_DI
1538 (plus:V_DI
1539 (zero_extend:V_DI (match_operand:<VnSI> 1 "gcn_alu_operand" "vA"))
1540 (vec_duplicate:V_DI (match_operand:DI 2 "gcn_alu_operand" "BSv")))
961c2aac
AS
1541 (match_operand:V_DI 4 "gcn_register_or_unspec_operand" " U0")
1542 (match_operand:DI 5 "gcn_exec_reg_operand" " e")))
1543 (set (match_operand:DI 3 "register_operand" "=&SgcV")
75d0b3d7
AS
1544 (and:DI
1545 (ltu:DI (plus:V_DI
1546 (zero_extend:V_DI (match_dup 1))
1547 (vec_duplicate:V_DI (match_dup 2)))
1548 (match_dup 1))
1549 (match_dup 5)))]
3d6275e3
AS
1550 ""
1551 "#"
1165109b 1552 "gcn_can_split_p (<MODE>mode, operands[0])
75d0b3d7 1553 && gcn_can_split_p (<MODE>mode, operands[4])"
3d6275e3
AS
1554 [(const_int 0)]
1555 {
1165109b
AS
1556 emit_insn (gen_add<vnsi>3_vcc_dup_exec
1557 (gcn_operand_part (<MODE>mode, operands[0], 0),
3d6275e3
AS
1558 gcn_operand_part (DImode, operands[2], 0),
1559 operands[1],
75d0b3d7
AS
1560 operands[3],
1561 gcn_operand_part (<MODE>mode, operands[4], 0),
1562 operands[5]));
1165109b
AS
1563 rtx dsthi = gcn_operand_part (<MODE>mode, operands[0], 1);
1564 emit_insn (gen_vec_duplicate<vnsi>_exec
3d6275e3 1565 (dsthi, gcn_operand_part (DImode, operands[2], 1),
75d0b3d7
AS
1566 gcn_operand_part (<MODE>mode, operands[4], 1),
1567 operands[5]));
1165109b 1568 emit_insn (gen_addc<vnsi>3_exec
75d0b3d7
AS
1569 (dsthi, dsthi, const0_rtx, operands[3], operands[3],
1570 gcn_operand_part (<MODE>mode, operands[4], 1),
1571 operands[5]));
3d6275e3
AS
1572 DONE;
1573 }
1574 [(set_attr "type" "vmult")
1575 (set_attr "length" "8")])
1576
75d0b3d7
AS
1577(define_expand "add<mode>3_zext_dup2_exec"
1578 [(match_operand:V_DI 0 "register_operand")
1579 (match_operand:<VnSI> 1 "gcn_alu_operand")
1580 (match_operand:DI 2 "gcn_alu_operand")
1581 (match_operand:V_DI 3 "gcn_register_or_unspec_operand")
1582 (match_operand:DI 4 "gcn_exec_reg_operand")]
1583 ""
1584 {
1585 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1586 emit_insn (gen_add<mode>3_vcc_zext_dup2_exec (operands[0], operands[1],
1587 operands[2], vcc,
1588 operands[3], operands[4]));
1589 DONE;
1590 })
1591
1165109b
AS
1592(define_insn_and_split "add<mode>3_sext_dup2"
1593 [(set (match_operand:V_DI 0 "register_operand" "= v")
1594 (plus:V_DI
1595 (sign_extend:V_DI (match_operand:<VnSI> 1 "gcn_alu_operand" " vA"))
1596 (vec_duplicate:V_DI (match_operand:DI 2 "gcn_alu_operand" "BSv"))))
1597 (clobber (match_scratch:<VnSI> 3 "=&v"))
3d6275e3
AS
1598 (clobber (reg:DI VCC_REG))]
1599 ""
1600 "#"
1165109b 1601 "gcn_can_split_p (<MODE>mode, operands[0])"
3d6275e3
AS
1602 [(const_int 0)]
1603 {
1604 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1165109b
AS
1605 emit_insn (gen_ashr<vnsi>3 (operands[3], operands[1], GEN_INT (31)));
1606 emit_insn (gen_add<vnsi>3_vcc_dup
1607 (gcn_operand_part (<MODE>mode, operands[0], 0),
3d6275e3
AS
1608 gcn_operand_part (DImode, operands[2], 0),
1609 operands[1],
1610 vcc));
1165109b
AS
1611 rtx dsthi = gcn_operand_part (<MODE>mode, operands[0], 1);
1612 emit_insn (gen_vec_duplicate<vnsi>
3d6275e3 1613 (dsthi, gcn_operand_part (DImode, operands[2], 1)));
1165109b 1614 emit_insn (gen_addc<vnsi>3 (dsthi, dsthi, operands[3], vcc, vcc));
3d6275e3
AS
1615 DONE;
1616 }
1617 [(set_attr "type" "vmult")
1618 (set_attr "length" "8")])
1619
1165109b
AS
1620(define_insn_and_split "add<mode>3_sext_dup2_exec"
1621 [(set (match_operand:V_DI 0 "register_operand" "= v")
1622 (vec_merge:V_DI
1623 (plus:V_DI
1624 (sign_extend:V_DI (match_operand:<VnSI> 1 "gcn_alu_operand" "vA"))
1625 (vec_duplicate:V_DI (match_operand:DI 2 "gcn_alu_operand" "BSv")))
1626 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0")
3d6275e3 1627 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1165109b 1628 (clobber (match_scratch:<VnSI> 5 "=&v"))
3d6275e3
AS
1629 (clobber (reg:DI VCC_REG))]
1630 ""
1631 "#"
1165109b
AS
1632 "gcn_can_split_p (<MODE>mode, operands[0])
1633 && gcn_can_split_p (<MODE>mode, operands[3])"
3d6275e3
AS
1634 [(const_int 0)]
1635 {
1636 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1165109b
AS
1637 emit_insn (gen_ashr<vnsi>3_exec (operands[5], operands[1], GEN_INT (31),
1638 gcn_gen_undef (<VnSI>mode), operands[4]));
1639 emit_insn (gen_add<vnsi>3_vcc_dup_exec
1640 (gcn_operand_part (<MODE>mode, operands[0], 0),
3d6275e3
AS
1641 gcn_operand_part (DImode, operands[2], 0),
1642 operands[1],
1643 vcc,
1165109b 1644 gcn_operand_part (<MODE>mode, operands[3], 0),
3d6275e3 1645 operands[4]));
1165109b
AS
1646 rtx dsthi = gcn_operand_part (<MODE>mode, operands[0], 1);
1647 emit_insn (gen_vec_duplicate<vnsi>_exec
3d6275e3 1648 (dsthi, gcn_operand_part (DImode, operands[2], 1),
28b733ea
AS
1649 gcn_operand_part (<MODE>mode, operands[3], 1),
1650 operands[4]));
1165109b 1651 emit_insn (gen_addc<vnsi>3_exec
3d6275e3 1652 (dsthi, dsthi, operands[5], vcc, vcc,
1165109b 1653 gcn_operand_part (<MODE>mode, operands[3], 1),
3d6275e3
AS
1654 operands[4]));
1655 DONE;
1656 }
1657 [(set_attr "type" "vmult")
1658 (set_attr "length" "8")])
1659
1660;; }}}
1661;; {{{ DS memory ALU: add/sub
1662
1663(define_mode_iterator DS_ARITH_MODE [V64SI V64SF V64DI])
1664(define_mode_iterator DS_ARITH_SCALAR_MODE [SI SF DI])
1665
1666;; FIXME: the vector patterns probably need RD expanded to a vector of
1667;; addresses. For now, the only way a vector can get into LDS is
1668;; if the user puts it there manually.
1669;;
1670;; FIXME: the scalar patterns are probably fine in themselves, but need to be
1671;; checked to see if anything can ever use them.
1672
1673(define_insn "add<mode>3_ds<exec>"
1674 [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD")
1675 (plus:DS_ARITH_MODE
1676 (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" "%RD")
1677 (match_operand:DS_ARITH_MODE 2 "register_operand" " v")))]
1678 "rtx_equal_p (operands[0], operands[1])"
1679 "ds_add%u0\t%A0, %2%O0"
1680 [(set_attr "type" "ds")
1681 (set_attr "length" "8")])
1682
1683(define_insn "add<mode>3_ds_scalar"
1684 [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
1685 (plus:DS_ARITH_SCALAR_MODE
1686 (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand"
1687 "%RD")
1688 (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")))]
1689 "rtx_equal_p (operands[0], operands[1])"
1690 "ds_add%u0\t%A0, %2%O0"
1691 [(set_attr "type" "ds")
1692 (set_attr "length" "8")])
1693
1694(define_insn "sub<mode>3_ds<exec>"
1695 [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD")
1696 (minus:DS_ARITH_MODE
1697 (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" " RD")
1698 (match_operand:DS_ARITH_MODE 2 "register_operand" " v")))]
1699 "rtx_equal_p (operands[0], operands[1])"
1700 "ds_sub%u0\t%A0, %2%O0"
1701 [(set_attr "type" "ds")
1702 (set_attr "length" "8")])
1703
1704(define_insn "sub<mode>3_ds_scalar"
1705 [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
1706 (minus:DS_ARITH_SCALAR_MODE
1707 (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand"
1708 " RD")
1709 (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")))]
1710 "rtx_equal_p (operands[0], operands[1])"
1711 "ds_sub%u0\t%A0, %2%O0"
1712 [(set_attr "type" "ds")
1713 (set_attr "length" "8")])
1714
1715(define_insn "subr<mode>3_ds<exec>"
1716 [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD")
1717 (minus:DS_ARITH_MODE
1718 (match_operand:DS_ARITH_MODE 2 "register_operand" " v")
1719 (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" " RD")))]
1720 "rtx_equal_p (operands[0], operands[1])"
1721 "ds_rsub%u0\t%A0, %2%O0"
1722 [(set_attr "type" "ds")
1723 (set_attr "length" "8")])
1724
1725(define_insn "subr<mode>3_ds_scalar"
1726 [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
1727 (minus:DS_ARITH_SCALAR_MODE
1728 (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")
1729 (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand"
1730 " RD")))]
1731 "rtx_equal_p (operands[0], operands[1])"
1732 "ds_rsub%u0\t%A0, %2%O0"
1733 [(set_attr "type" "ds")
1734 (set_attr "length" "8")])
1735
1736;; }}}
1737;; {{{ ALU special case: mult
1738
1165109b
AS
1739(define_insn "<su>mul<mode>3_highpart<exec>"
1740 [(set (match_operand:V_SI 0 "register_operand" "= v")
1741 (truncate:V_SI
1742 (lshiftrt:<VnDI>
1743 (mult:<VnDI>
1744 (any_extend:<VnDI>
1745 (match_operand:V_SI 1 "gcn_alu_operand" " %v"))
1746 (any_extend:<VnDI>
1747 (match_operand:V_SI 2 "gcn_alu_operand" "vSvA")))
3d6275e3
AS
1748 (const_int 32))))]
1749 ""
1750 "v_mul_hi<sgnsuffix>0\t%0, %2, %1"
1751 [(set_attr "type" "vop3a")
1752 (set_attr "length" "8")])
1753
7b945b19 1754(define_insn "mul<mode>3<exec>"
03876953
AS
1755 [(set (match_operand:V_INT_1REG 0 "register_operand" "= v")
1756 (mult:V_INT_1REG
1757 (match_operand:V_INT_1REG 1 "gcn_alu_operand" "%vSvA")
1758 (match_operand:V_INT_1REG 2 "gcn_alu_operand" " vSvA")))]
3d6275e3
AS
1759 ""
1760 "v_mul_lo_u32\t%0, %1, %2"
1761 [(set_attr "type" "vop3a")
1762 (set_attr "length" "8")])
1763
7b945b19 1764(define_insn "mul<mode>3_dup<exec>"
03876953
AS
1765 [(set (match_operand:V_INT_1REG 0 "register_operand" "= v")
1766 (mult:V_INT_1REG
1767 (match_operand:V_INT_1REG 1 "gcn_alu_operand" "%vSvA")
1768 (vec_duplicate:V_INT_1REG
1769 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand" " SvA"))))]
3d6275e3
AS
1770 ""
1771 "v_mul_lo_u32\t%0, %1, %2"
1772 [(set_attr "type" "vop3a")
1773 (set_attr "length" "8")])
1774
1165109b
AS
1775(define_insn_and_split "mul<mode>3"
1776 [(set (match_operand:V_DI 0 "register_operand" "=&v")
1777 (mult:V_DI
1778 (match_operand:V_DI 1 "gcn_alu_operand" "% v")
1779 (match_operand:V_DI 2 "gcn_alu_operand" "vDA")))
1780 (clobber (match_scratch:<VnSI> 3 "=&v"))]
3d6275e3
AS
1781 ""
1782 "#"
1783 "reload_completed"
1784 [(const_int 0)]
1785 {
1165109b
AS
1786 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
1787 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
1788 rtx left_lo = gcn_operand_part (<MODE>mode, operands[1], 0);
1789 rtx left_hi = gcn_operand_part (<MODE>mode, operands[1], 1);
1790 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
1791 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
3d6275e3
AS
1792 rtx tmp = operands[3];
1793
1165109b
AS
1794 emit_insn (gen_mul<vnsi>3 (out_lo, left_lo, right_lo));
1795 emit_insn (gen_umul<vnsi>3_highpart (out_hi, left_lo, right_lo));
1796 emit_insn (gen_mul<vnsi>3 (tmp, left_hi, right_lo));
1797 emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp));
1798 emit_insn (gen_mul<vnsi>3 (tmp, left_lo, right_hi));
1799 emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp));
1800 emit_insn (gen_mul<vnsi>3 (tmp, left_hi, right_hi));
1801 emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp));
3d6275e3
AS
1802 DONE;
1803 })
1804
1165109b
AS
1805(define_insn_and_split "mul<mode>3_exec"
1806 [(set (match_operand:V_DI 0 "register_operand" "=&v")
1807 (vec_merge:V_DI
1808 (mult:V_DI
1809 (match_operand:V_DI 1 "gcn_alu_operand" "% v")
1810 (match_operand:V_DI 2 "gcn_alu_operand" "vDA"))
1811 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0")
1812 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1813 (clobber (match_scratch:<VnSI> 5 "=&v"))]
3d6275e3
AS
1814 ""
1815 "#"
1816 "reload_completed"
1817 [(const_int 0)]
1818 {
1165109b
AS
1819 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
1820 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
1821 rtx left_lo = gcn_operand_part (<MODE>mode, operands[1], 0);
1822 rtx left_hi = gcn_operand_part (<MODE>mode, operands[1], 1);
1823 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
1824 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
3d6275e3
AS
1825 rtx exec = operands[4];
1826 rtx tmp = operands[5];
1827
1828 rtx old_lo, old_hi;
1829 if (GET_CODE (operands[3]) == UNSPEC)
1830 {
1165109b 1831 old_lo = old_hi = gcn_gen_undef (<VnSI>mode);
3d6275e3
AS
1832 }
1833 else
1834 {
1165109b
AS
1835 old_lo = gcn_operand_part (<MODE>mode, operands[3], 0);
1836 old_hi = gcn_operand_part (<MODE>mode, operands[3], 1);
3d6275e3
AS
1837 }
1838
1165109b
AS
1839 rtx undef = gcn_gen_undef (<VnSI>mode);
1840
1841 emit_insn (gen_mul<vnsi>3_exec (out_lo, left_lo, right_lo, old_lo, exec));
1842 emit_insn (gen_umul<vnsi>3_highpart_exec (out_hi, left_lo, right_lo,
1843 old_hi, exec));
1844 emit_insn (gen_mul<vnsi>3_exec (tmp, left_hi, right_lo, undef, exec));
1845 emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec));
1846 emit_insn (gen_mul<vnsi>3_exec (tmp, left_lo, right_hi, undef, exec));
1847 emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec));
1848 emit_insn (gen_mul<vnsi>3_exec (tmp, left_hi, right_hi, undef, exec));
1849 emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec));
3d6275e3
AS
1850 DONE;
1851 })
1852
1165109b
AS
1853(define_insn_and_split "mul<mode>3_zext"
1854 [(set (match_operand:V_DI 0 "register_operand" "=&v")
1855 (mult:V_DI
1856 (zero_extend:V_DI
1857 (match_operand:<VnSI> 1 "gcn_alu_operand" " v"))
1858 (match_operand:V_DI 2 "gcn_alu_operand" "vDA")))
1859 (clobber (match_scratch:<VnSI> 3 "=&v"))]
3d6275e3
AS
1860 ""
1861 "#"
1862 "reload_completed"
1863 [(const_int 0)]
1864 {
1165109b
AS
1865 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
1866 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
3d6275e3 1867 rtx left = operands[1];
1165109b
AS
1868 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
1869 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
3d6275e3
AS
1870 rtx tmp = operands[3];
1871
1165109b
AS
1872 emit_insn (gen_mul<vnsi>3 (out_lo, left, right_lo));
1873 emit_insn (gen_umul<vnsi>3_highpart (out_hi, left, right_lo));
1874 emit_insn (gen_mul<vnsi>3 (tmp, left, right_hi));
1875 emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp));
3d6275e3
AS
1876 DONE;
1877 })
1878
1165109b
AS
1879(define_insn_and_split "mul<mode>3_zext_exec"
1880 [(set (match_operand:V_DI 0 "register_operand" "=&v")
1881 (vec_merge:V_DI
1882 (mult:V_DI
1883 (zero_extend:V_DI
1884 (match_operand:<VnSI> 1 "gcn_alu_operand" " v"))
1885 (match_operand:V_DI 2 "gcn_alu_operand" "vDA"))
1886 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0")
1887 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1888 (clobber (match_scratch:<VnSI> 5 "=&v"))]
3d6275e3
AS
1889 ""
1890 "#"
1891 "reload_completed"
1892 [(const_int 0)]
1893 {
1165109b
AS
1894 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
1895 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
3d6275e3 1896 rtx left = operands[1];
1165109b
AS
1897 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
1898 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
3d6275e3
AS
1899 rtx exec = operands[4];
1900 rtx tmp = operands[5];
1901
1902 rtx old_lo, old_hi;
1903 if (GET_CODE (operands[3]) == UNSPEC)
1904 {
1165109b 1905 old_lo = old_hi = gcn_gen_undef (<VnSI>mode);
3d6275e3
AS
1906 }
1907 else
1908 {
1165109b
AS
1909 old_lo = gcn_operand_part (<MODE>mode, operands[3], 0);
1910 old_hi = gcn_operand_part (<MODE>mode, operands[3], 1);
3d6275e3
AS
1911 }
1912
1165109b 1913 rtx undef = gcn_gen_undef (<VnSI>mode);
3d6275e3 1914
1165109b
AS
1915 emit_insn (gen_mul<vnsi>3_exec (out_lo, left, right_lo, old_lo, exec));
1916 emit_insn (gen_umul<vnsi>3_highpart_exec (out_hi, left, right_lo,
1917 old_hi, exec));
1918 emit_insn (gen_mul<vnsi>3_exec (tmp, left, right_hi, undef, exec));
1919 emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec));
3d6275e3
AS
1920 DONE;
1921 })
1922
1165109b
AS
1923(define_insn_and_split "mul<mode>3_zext_dup2"
1924 [(set (match_operand:V_DI 0 "register_operand" "= &v")
1925 (mult:V_DI
1926 (zero_extend:V_DI
1927 (match_operand:<VnSI> 1 "gcn_alu_operand" " v"))
1928 (vec_duplicate:V_DI
1929 (match_operand:DI 2 "gcn_alu_operand" "SvDA"))))
1930 (clobber (match_scratch:<VnSI> 3 "= &v"))]
3d6275e3
AS
1931 ""
1932 "#"
1933 "reload_completed"
1934 [(const_int 0)]
1935 {
1165109b
AS
1936 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
1937 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
3d6275e3 1938 rtx left = operands[1];
1165109b
AS
1939 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
1940 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
3d6275e3
AS
1941 rtx tmp = operands[3];
1942
1165109b
AS
1943 emit_insn (gen_mul<vnsi>3 (out_lo, left, right_lo));
1944 emit_insn (gen_umul<vnsi>3_highpart (out_hi, left, right_lo));
1945 emit_insn (gen_mul<vnsi>3 (tmp, left, right_hi));
1946 emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp));
3d6275e3
AS
1947 DONE;
1948 })
1949
1165109b
AS
1950(define_insn_and_split "mul<mode>3_zext_dup2_exec"
1951 [(set (match_operand:V_DI 0 "register_operand" "= &v")
1952 (vec_merge:V_DI
1953 (mult:V_DI
1954 (zero_extend:V_DI
1955 (match_operand:<VnSI> 1 "gcn_alu_operand" " v"))
1956 (vec_duplicate:V_DI
1957 (match_operand:DI 2 "gcn_alu_operand" "SvDA")))
1958 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0")
1959 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1960 (clobber (match_scratch:<VnSI> 5 "= &v"))]
3d6275e3
AS
1961 ""
1962 "#"
1963 "reload_completed"
1964 [(const_int 0)]
1965 {
1165109b
AS
1966 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
1967 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
3d6275e3 1968 rtx left = operands[1];
1165109b
AS
1969 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
1970 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
3d6275e3
AS
1971 rtx exec = operands[4];
1972 rtx tmp = operands[5];
1973
1974 rtx old_lo, old_hi;
1975 if (GET_CODE (operands[3]) == UNSPEC)
1976 {
1165109b 1977 old_lo = old_hi = gcn_gen_undef (<VnSI>mode);
3d6275e3
AS
1978 }
1979 else
1980 {
1165109b
AS
1981 old_lo = gcn_operand_part (<MODE>mode, operands[3], 0);
1982 old_hi = gcn_operand_part (<MODE>mode, operands[3], 1);
3d6275e3
AS
1983 }
1984
1165109b 1985 rtx undef = gcn_gen_undef (<VnSI>mode);
3d6275e3 1986
1165109b
AS
1987 emit_insn (gen_mul<vnsi>3_exec (out_lo, left, right_lo, old_lo, exec));
1988 emit_insn (gen_umul<vnsi>3_highpart_exec (out_hi, left, right_lo,
1989 old_hi, exec));
1990 emit_insn (gen_mul<vnsi>3_exec (tmp, left, right_hi, undef, exec));
1991 emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec));
3d6275e3
AS
1992 DONE;
1993 })
1994
1995;; }}}
1996;; {{{ ALU generic case
1997
3d6275e3
AS
1998(define_code_iterator bitop [and ior xor])
1999(define_code_iterator shiftop [ashift lshiftrt ashiftrt])
2000(define_code_iterator minmaxop [smin smax umin umax])
2001
2002(define_insn "<expander><mode>2<exec>"
03876953
AS
2003 [(set (match_operand:V_INT_1REG 0 "gcn_valu_dst_operand" "= v")
2004 (bitunop:V_INT_1REG
2005 (match_operand:V_INT_1REG 1 "gcn_valu_src0_operand" "vSvB")))]
3d6275e3
AS
2006 ""
2007 "v_<mnemonic>0\t%0, %1"
2008 [(set_attr "type" "vop1")
2009 (set_attr "length" "8")])
2010
2011(define_insn "<expander><mode>3<exec>"
03876953
AS
2012 [(set (match_operand:V_INT_1REG 0 "gcn_valu_dst_operand" "= v,RD")
2013 (bitop:V_INT_1REG
2014 (match_operand:V_INT_1REG 1 "gcn_valu_src0_operand" "% v, 0")
2015 (match_operand:V_INT_1REG 2 "gcn_valu_src1com_operand" "vSvB, v")))]
3d6275e3
AS
2016 ""
2017 "@
2018 v_<mnemonic>0\t%0, %2, %1
2019 ds_<mnemonic>0\t%A0, %2%O0"
2020 [(set_attr "type" "vop2,ds")
2021 (set_attr "length" "8,8")])
2022
1165109b
AS
2023(define_insn_and_split "<expander><mode>3"
2024 [(set (match_operand:V_DI 0 "gcn_valu_dst_operand" "= v,RD")
2025 (bitop:V_DI
2026 (match_operand:V_DI 1 "gcn_valu_src0_operand" "% v,RD")
2027 (match_operand:V_DI 2 "gcn_valu_src1com_operand" "vSvB, v")))]
3d6275e3
AS
2028 ""
2029 "@
2030 #
2031 ds_<mnemonic>0\t%A0, %2%O0"
1165109b 2032 "(reload_completed && !gcn_ds_memory_operand (operands[0], <MODE>mode))"
3d6275e3 2033 [(set (match_dup 3)
1165109b 2034 (bitop:<VnSI> (match_dup 5) (match_dup 7)))
3d6275e3 2035 (set (match_dup 4)
1165109b
AS
2036 (bitop:<VnSI> (match_dup 6) (match_dup 8)))]
2037 {
2038 operands[3] = gcn_operand_part (<MODE>mode, operands[0], 0);
2039 operands[4] = gcn_operand_part (<MODE>mode, operands[0], 1);
2040 operands[5] = gcn_operand_part (<MODE>mode, operands[1], 0);
2041 operands[6] = gcn_operand_part (<MODE>mode, operands[1], 1);
2042 operands[7] = gcn_operand_part (<MODE>mode, operands[2], 0);
2043 operands[8] = gcn_operand_part (<MODE>mode, operands[2], 1);
3d6275e3
AS
2044 }
2045 [(set_attr "type" "vmult,ds")
2046 (set_attr "length" "16,8")])
2047
1165109b
AS
2048(define_insn_and_split "<expander><mode>3_exec"
2049 [(set (match_operand:V_DI 0 "gcn_valu_dst_operand" "= v,RD")
2050 (vec_merge:V_DI
2051 (bitop:V_DI
2052 (match_operand:V_DI 1 "gcn_valu_src0_operand" "% v,RD")
2053 (match_operand:V_DI 2 "gcn_valu_src1com_operand" "vSvB, v"))
2054 (match_operand:V_DI 3 "gcn_register_ds_or_unspec_operand" "U0,U0")
3d6275e3
AS
2055 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))]
2056 "!memory_operand (operands[0], VOIDmode)
2057 || (rtx_equal_p (operands[0], operands[1])
2058 && register_operand (operands[2], VOIDmode))"
2059 "@
2060 #
2061 ds_<mnemonic>0\t%A0, %2%O0"
1165109b 2062 "(reload_completed && !gcn_ds_memory_operand (operands[0], <MODE>mode))"
3d6275e3 2063 [(set (match_dup 5)
1165109b
AS
2064 (vec_merge:<VnSI>
2065 (bitop:<VnSI> (match_dup 7) (match_dup 9))
3d6275e3
AS
2066 (match_dup 11)
2067 (match_dup 4)))
2068 (set (match_dup 6)
1165109b
AS
2069 (vec_merge:<VnSI>
2070 (bitop:<VnSI> (match_dup 8) (match_dup 10))
3d6275e3
AS
2071 (match_dup 12)
2072 (match_dup 4)))]
2073 {
1165109b
AS
2074 operands[5] = gcn_operand_part (<MODE>mode, operands[0], 0);
2075 operands[6] = gcn_operand_part (<MODE>mode, operands[0], 1);
2076 operands[7] = gcn_operand_part (<MODE>mode, operands[1], 0);
2077 operands[8] = gcn_operand_part (<MODE>mode, operands[1], 1);
2078 operands[9] = gcn_operand_part (<MODE>mode, operands[2], 0);
2079 operands[10] = gcn_operand_part (<MODE>mode, operands[2], 1);
2080 operands[11] = gcn_operand_part (<MODE>mode, operands[3], 0);
2081 operands[12] = gcn_operand_part (<MODE>mode, operands[3], 1);
3d6275e3
AS
2082 }
2083 [(set_attr "type" "vmult,ds")
2084 (set_attr "length" "16,8")])
2085
dc941ea9 2086(define_expand "<expander><mode>3"
03876953
AS
2087 [(set (match_operand:V_QIHI 0 "register_operand" "= v")
2088 (shiftop:V_QIHI
2089 (match_operand:V_QIHI 1 "gcn_alu_operand" " v")
2090 (vec_duplicate:V_QIHI
2091 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
dc941ea9
AS
2092 ""
2093 {
2094 enum {ashift, lshiftrt, ashiftrt};
2095 bool unsignedp = (<code> == lshiftrt);
1165109b 2096 rtx insi1 = gen_reg_rtx (<VnSI>mode);
dc941ea9 2097 rtx insi2 = gen_reg_rtx (SImode);
1165109b 2098 rtx outsi = gen_reg_rtx (<VnSI>mode);
dc941ea9
AS
2099
2100 convert_move (insi1, operands[1], unsignedp);
2101 convert_move (insi2, operands[2], unsignedp);
1165109b 2102 emit_insn (gen_<expander><vnsi>3 (outsi, insi1, insi2));
dc941ea9
AS
2103 convert_move (operands[0], outsi, unsignedp);
2104 DONE;
2105 })
2106
1165109b
AS
2107(define_insn "<expander><mode>3<exec>"
2108 [(set (match_operand:V_SI 0 "register_operand" "= v")
2109 (shiftop:V_SI
2110 (match_operand:V_SI 1 "gcn_alu_operand" " v")
2111 (vec_duplicate:V_SI
3d6275e3
AS
2112 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
2113 ""
2114 "v_<revmnemonic>0\t%0, %2, %1"
2115 [(set_attr "type" "vop2")
2116 (set_attr "length" "8")])
2117
dc941ea9 2118(define_expand "v<expander><mode>3"
03876953
AS
2119 [(set (match_operand:V_QIHI 0 "register_operand" "=v")
2120 (shiftop:V_QIHI
2121 (match_operand:V_QIHI 1 "gcn_alu_operand" " v")
2122 (match_operand:V_QIHI 2 "gcn_alu_operand" "vB")))]
dc941ea9
AS
2123 ""
2124 {
2125 enum {ashift, lshiftrt, ashiftrt};
b8db70e1 2126 bool unsignedp = (<code> == lshiftrt);
1165109b
AS
2127 rtx insi1 = gen_reg_rtx (<VnSI>mode);
2128 rtx insi2 = gen_reg_rtx (<VnSI>mode);
2129 rtx outsi = gen_reg_rtx (<VnSI>mode);
dc941ea9
AS
2130
2131 convert_move (insi1, operands[1], unsignedp);
2132 convert_move (insi2, operands[2], unsignedp);
1165109b 2133 emit_insn (gen_v<expander><vnsi>3 (outsi, insi1, insi2));
dc941ea9
AS
2134 convert_move (operands[0], outsi, unsignedp);
2135 DONE;
2136 })
2137
1165109b
AS
2138(define_insn "v<expander><mode>3<exec>"
2139 [(set (match_operand:V_SI 0 "register_operand" "=v")
2140 (shiftop:V_SI
2141 (match_operand:V_SI 1 "gcn_alu_operand" " v")
2142 (match_operand:V_SI 2 "gcn_alu_operand" "vB")))]
3d6275e3
AS
2143 ""
2144 "v_<revmnemonic>0\t%0, %2, %1"
2145 [(set_attr "type" "vop2")
2146 (set_attr "length" "8")])
2147
dc941ea9 2148(define_expand "<expander><mode>3"
03876953
AS
2149 [(set (match_operand:V_QIHI 0 "gcn_valu_dst_operand")
2150 (minmaxop:V_QIHI
2151 (match_operand:V_QIHI 1 "gcn_valu_src0_operand")
2152 (match_operand:V_QIHI 2 "gcn_valu_src1com_operand")))]
dc941ea9
AS
2153 ""
2154 {
2155 enum {smin, umin, smax, umax};
2156 bool unsignedp = (<code> == umax || <code> == umin);
1165109b
AS
2157 rtx insi1 = gen_reg_rtx (<VnSI>mode);
2158 rtx insi2 = gen_reg_rtx (<VnSI>mode);
2159 rtx outsi = gen_reg_rtx (<VnSI>mode);
dc941ea9
AS
2160
2161 convert_move (insi1, operands[1], unsignedp);
2162 convert_move (insi2, operands[2], unsignedp);
1165109b 2163 emit_insn (gen_<code><vnsi>3 (outsi, insi1, insi2));
dc941ea9
AS
2164 convert_move (operands[0], outsi, unsignedp);
2165 DONE;
2166 })
2167
1165109b
AS
2168(define_insn "<expander><vnsi>3<exec>"
2169 [(set (match_operand:V_SI 0 "gcn_valu_dst_operand" "= v,RD")
2170 (minmaxop:V_SI
2171 (match_operand:V_SI 1 "gcn_valu_src0_operand" "% v, 0")
2172 (match_operand:V_SI 2 "gcn_valu_src1com_operand" "vSvB, v")))]
3d6275e3
AS
2173 ""
2174 "@
2175 v_<mnemonic>0\t%0, %2, %1
2176 ds_<mnemonic>0\t%A0, %2%O0"
2177 [(set_attr "type" "vop2,ds")
2178 (set_attr "length" "8,8")])
2179
2180;; }}}
2181;; {{{ FP binops - special cases
2182
2183; GCN does not directly provide a DFmode subtract instruction, so we do it by
2184; adding the negated second operand to the first.
2185
1165109b
AS
2186(define_insn "sub<mode>3<exec>"
2187 [(set (match_operand:V_DF 0 "register_operand" "= v, v")
2188 (minus:V_DF
2189 (match_operand:V_DF 1 "gcn_alu_operand" "vSvB, v")
2190 (match_operand:V_DF 2 "gcn_alu_operand" " v,vSvB")))]
3d6275e3
AS
2191 ""
2192 "@
2193 v_add_f64\t%0, %1, -%2
2194 v_add_f64\t%0, -%2, %1"
2195 [(set_attr "type" "vop3a")
2196 (set_attr "length" "8,8")])
2197
abb3993e 2198(define_insn "subdf3"
3d6275e3
AS
2199 [(set (match_operand:DF 0 "register_operand" "= v, v")
2200 (minus:DF
2201 (match_operand:DF 1 "gcn_alu_operand" "vSvB, v")
2202 (match_operand:DF 2 "gcn_alu_operand" " v,vSvB")))]
2203 ""
2204 "@
2205 v_add_f64\t%0, %1, -%2
2206 v_add_f64\t%0, -%2, %1"
2207 [(set_attr "type" "vop3a")
2208 (set_attr "length" "8,8")])
2209
2210;; }}}
2211;; {{{ FP binops - generic
2212
3d6275e3
AS
2213(define_code_iterator comm_fp [plus mult smin smax])
2214(define_code_iterator nocomm_fp [minus])
2215(define_code_iterator all_fp [plus mult minus smin smax])
2216
2217(define_insn "<expander><mode>3<exec>"
03876953
AS
2218 [(set (match_operand:V_FP 0 "register_operand" "= v")
2219 (comm_fp:V_FP
2220 (match_operand:V_FP 1 "gcn_alu_operand" "% v")
2221 (match_operand:V_FP 2 "gcn_alu_operand" "vSvB")))]
3d6275e3
AS
2222 ""
2223 "v_<mnemonic>0\t%0, %2, %1"
2224 [(set_attr "type" "vop2")
2225 (set_attr "length" "8")])
2226
2227(define_insn "<expander><mode>3"
03876953
AS
2228 [(set (match_operand:FP 0 "gcn_valu_dst_operand" "= v, RL")
2229 (comm_fp:FP
2230 (match_operand:FP 1 "gcn_valu_src0_operand" "% v, 0")
2231 (match_operand:FP 2 "gcn_valu_src1_operand" "vSvB,vSvB")))]
3d6275e3
AS
2232 ""
2233 "@
2234 v_<mnemonic>0\t%0, %2, %1
2235 v_<mnemonic>0\t%0, %1%O0"
2236 [(set_attr "type" "vop2,ds")
2237 (set_attr "length" "8")])
2238
2239(define_insn "<expander><mode>3<exec>"
03876953
AS
2240 [(set (match_operand:V_FP_1REG 0 "register_operand" "= v, v")
2241 (nocomm_fp:V_FP_1REG
2242 (match_operand:V_FP_1REG 1 "gcn_alu_operand" "vSvB, v")
2243 (match_operand:V_FP_1REG 2 "gcn_alu_operand" " v,vSvB")))]
3d6275e3
AS
2244 ""
2245 "@
2246 v_<mnemonic>0\t%0, %1, %2
2247 v_<revmnemonic>0\t%0, %2, %1"
2248 [(set_attr "type" "vop2")
2249 (set_attr "length" "8,8")])
2250
2251(define_insn "<expander><mode>3"
03876953
AS
2252 [(set (match_operand:FP_1REG 0 "register_operand" "= v, v")
2253 (nocomm_fp:FP_1REG
2254 (match_operand:FP_1REG 1 "gcn_alu_operand" "vSvB, v")
2255 (match_operand:FP_1REG 2 "gcn_alu_operand" " v,vSvB")))]
3d6275e3
AS
2256 ""
2257 "@
2258 v_<mnemonic>0\t%0, %1, %2
2259 v_<revmnemonic>0\t%0, %2, %1"
2260 [(set_attr "type" "vop2")
2261 (set_attr "length" "8,8")])
2262
2263;; }}}
2264;; {{{ FP unops
2265
2266(define_insn "abs<mode>2"
03876953
AS
2267 [(set (match_operand:FP 0 "register_operand" "=v")
2268 (abs:FP (match_operand:FP 1 "register_operand" " v")))]
3d6275e3
AS
2269 ""
2270 "v_add%i0\t%0, 0, |%1|"
2271 [(set_attr "type" "vop3a")
2272 (set_attr "length" "8")])
2273
2274(define_insn "abs<mode>2<exec>"
03876953
AS
2275 [(set (match_operand:V_FP 0 "register_operand" "=v")
2276 (abs:V_FP
2277 (match_operand:V_FP 1 "register_operand" " v")))]
3d6275e3
AS
2278 ""
2279 "v_add%i0\t%0, 0, |%1|"
2280 [(set_attr "type" "vop3a")
2281 (set_attr "length" "8")])
2282
2283(define_insn "neg<mode>2<exec>"
03876953
AS
2284 [(set (match_operand:V_FP 0 "register_operand" "=v")
2285 (neg:V_FP
2286 (match_operand:V_FP 1 "register_operand" " v")))]
3d6275e3
AS
2287 ""
2288 "v_add%i0\t%0, 0, -%1"
2289 [(set_attr "type" "vop3a")
2290 (set_attr "length" "8")])
2291
2292(define_insn "sqrt<mode>2<exec>"
03876953
AS
2293 [(set (match_operand:V_FP 0 "register_operand" "= v")
2294 (sqrt:V_FP
2295 (match_operand:V_FP 1 "gcn_alu_operand" "vSvB")))]
3d6275e3
AS
2296 "flag_unsafe_math_optimizations"
2297 "v_sqrt%i0\t%0, %1"
2298 [(set_attr "type" "vop1")
2299 (set_attr "length" "8")])
2300
2301(define_insn "sqrt<mode>2"
03876953
AS
2302 [(set (match_operand:FP 0 "register_operand" "= v")
2303 (sqrt:FP
2304 (match_operand:FP 1 "gcn_alu_operand" "vSvB")))]
3d6275e3
AS
2305 "flag_unsafe_math_optimizations"
2306 "v_sqrt%i0\t%0, %1"
2307 [(set_attr "type" "vop1")
2308 (set_attr "length" "8")])
2309
2310;; }}}
2311;; {{{ FP fused multiply and add
2312
2313(define_insn "fma<mode>4<exec>"
03876953
AS
2314 [(set (match_operand:V_FP 0 "register_operand" "= v, v")
2315 (fma:V_FP
2316 (match_operand:V_FP 1 "gcn_alu_operand" "% vA, vA")
2317 (match_operand:V_FP 2 "gcn_alu_operand" " vA,vSvA")
2318 (match_operand:V_FP 3 "gcn_alu_operand" "vSvA, vA")))]
3d6275e3
AS
2319 ""
2320 "v_fma%i0\t%0, %1, %2, %3"
2321 [(set_attr "type" "vop3a")
2322 (set_attr "length" "8")])
2323
2324(define_insn "fma<mode>4_negop2<exec>"
03876953
AS
2325 [(set (match_operand:V_FP 0 "register_operand" "= v, v, v")
2326 (fma:V_FP
2327 (match_operand:V_FP 1 "gcn_alu_operand" " vA, vA,vSvA")
2328 (neg:V_FP
2329 (match_operand:V_FP 2 "gcn_alu_operand" " vA,vSvA, vA"))
2330 (match_operand:V_FP 3 "gcn_alu_operand" "vSvA, vA, vA")))]
3d6275e3
AS
2331 ""
2332 "v_fma%i0\t%0, %1, -%2, %3"
2333 [(set_attr "type" "vop3a")
2334 (set_attr "length" "8")])
2335
2336(define_insn "fma<mode>4"
03876953
AS
2337 [(set (match_operand:FP 0 "register_operand" "= v, v")
2338 (fma:FP
2339 (match_operand:FP 1 "gcn_alu_operand" "% vA, vA")
2340 (match_operand:FP 2 "gcn_alu_operand" " vA,vSvA")
2341 (match_operand:FP 3 "gcn_alu_operand" "vSvA, vA")))]
3d6275e3
AS
2342 ""
2343 "v_fma%i0\t%0, %1, %2, %3"
2344 [(set_attr "type" "vop3a")
2345 (set_attr "length" "8")])
2346
2347(define_insn "fma<mode>4_negop2"
03876953
AS
2348 [(set (match_operand:FP 0 "register_operand" "= v, v, v")
2349 (fma:FP
2350 (match_operand:FP 1 "gcn_alu_operand" " vA, vA,vSvA")
2351 (neg:FP
2352 (match_operand:FP 2 "gcn_alu_operand" " vA,vSvA, vA"))
2353 (match_operand:FP 3 "gcn_alu_operand" "vSvA, vA, vA")))]
3d6275e3
AS
2354 ""
2355 "v_fma%i0\t%0, %1, -%2, %3"
2356 [(set_attr "type" "vop3a")
2357 (set_attr "length" "8")])
2358
2359;; }}}
2360;; {{{ FP division
2361
2362(define_insn "recip<mode>2<exec>"
03876953 2363 [(set (match_operand:V_FP 0 "register_operand" "= v")
c8812bac
JB
2364 (unspec:V_FP
2365 [(match_operand:V_FP 1 "gcn_alu_operand" "vSvB")]
2366 UNSPEC_RCP))]
3d6275e3
AS
2367 ""
2368 "v_rcp%i0\t%0, %1"
2369 [(set_attr "type" "vop1")
2370 (set_attr "length" "8")])
2371
2372(define_insn "recip<mode>2"
03876953 2373 [(set (match_operand:FP 0 "register_operand" "= v")
c8812bac
JB
2374 (unspec:FP
2375 [(match_operand:FP 1 "gcn_alu_operand" "vSvB")]
2376 UNSPEC_RCP))]
3d6275e3
AS
2377 ""
2378 "v_rcp%i0\t%0, %1"
2379 [(set_attr "type" "vop1")
2380 (set_attr "length" "8")])
2381
2382;; Do division via a = b * 1/c
2383;; The v_rcp_* instructions are not sufficiently accurate on their own,
2384;; so we use 2 v_fma_* instructions to do one round of Newton-Raphson
2385;; which the ISA manual says is enough to improve the reciprocal accuracy.
2386;;
2387;; FIXME: This does not handle denormals, NaNs, division-by-zero etc.
2388
2389(define_expand "div<mode>3"
03876953
AS
2390 [(match_operand:V_FP 0 "gcn_valu_dst_operand")
2391 (match_operand:V_FP 1 "gcn_valu_src0_operand")
2392 (match_operand:V_FP 2 "gcn_valu_src0_operand")]
3d6275e3
AS
2393 "flag_reciprocal_math"
2394 {
c8812bac
JB
2395 rtx one = gcn_vec_constant (<MODE>mode,
2396 const_double_from_real_value (dconst1, <SCALAR_MODE>mode));
3d6275e3
AS
2397 rtx initrcp = gen_reg_rtx (<MODE>mode);
2398 rtx fma = gen_reg_rtx (<MODE>mode);
2399 rtx rcp;
c8812bac 2400 rtx num = operands[1], denom = operands[2];
3d6275e3 2401
c8812bac 2402 bool is_rcp = (GET_CODE (num) == CONST_VECTOR
3d6275e3
AS
2403 && real_identical
2404 (CONST_DOUBLE_REAL_VALUE
c8812bac 2405 (CONST_VECTOR_ELT (num, 0)), &dconstm1));
3d6275e3
AS
2406
2407 if (is_rcp)
2408 rcp = operands[0];
2409 else
2410 rcp = gen_reg_rtx (<MODE>mode);
2411
c8812bac
JB
2412 emit_insn (gen_recip<mode>2 (initrcp, denom));
2413 emit_insn (gen_fma<mode>4_negop2 (fma, initrcp, denom, one));
2414 emit_insn (gen_fma<mode>4 (rcp, fma, initrcp, initrcp));
3d6275e3
AS
2415
2416 if (!is_rcp)
c8812bac
JB
2417 {
2418 rtx div_est = gen_reg_rtx (<MODE>mode);
2419 rtx fma2 = gen_reg_rtx (<MODE>mode);
2420 rtx fma3 = gen_reg_rtx (<MODE>mode);
2421 rtx fma4 = gen_reg_rtx (<MODE>mode);
2422 emit_insn (gen_mul<mode>3 (div_est, num, rcp));
2423 emit_insn (gen_fma<mode>4_negop2 (fma2, div_est, denom, num));
2424 emit_insn (gen_fma<mode>4 (fma3, fma2, rcp, div_est));
2425 emit_insn (gen_fma<mode>4_negop2 (fma4, fma3, denom, num));
2426 emit_insn (gen_fma<mode>4 (operands[0], fma4, rcp, fma3));
2427 }
3d6275e3
AS
2428
2429 DONE;
2430 })
2431
2432(define_expand "div<mode>3"
03876953
AS
2433 [(match_operand:FP 0 "gcn_valu_dst_operand")
2434 (match_operand:FP 1 "gcn_valu_src0_operand")
2435 (match_operand:FP 2 "gcn_valu_src0_operand")]
3d6275e3
AS
2436 "flag_reciprocal_math"
2437 {
c8812bac 2438 rtx one = const_double_from_real_value (dconst1, <MODE>mode);
3d6275e3
AS
2439 rtx initrcp = gen_reg_rtx (<MODE>mode);
2440 rtx fma = gen_reg_rtx (<MODE>mode);
2441 rtx rcp;
c8812bac 2442 rtx num = operands[1], denom = operands[2];
3d6275e3
AS
2443
2444 bool is_rcp = (GET_CODE (operands[1]) == CONST_DOUBLE
2445 && real_identical (CONST_DOUBLE_REAL_VALUE (operands[1]),
2446 &dconstm1));
2447
2448 if (is_rcp)
2449 rcp = operands[0];
2450 else
2451 rcp = gen_reg_rtx (<MODE>mode);
2452
c8812bac
JB
2453 emit_insn (gen_recip<mode>2 (initrcp, denom));
2454 emit_insn (gen_fma<mode>4_negop2 (fma, initrcp, denom, one));
2455 emit_insn (gen_fma<mode>4 (rcp, fma, initrcp, initrcp));
3d6275e3
AS
2456
2457 if (!is_rcp)
c8812bac
JB
2458 {
2459 rtx div_est = gen_reg_rtx (<MODE>mode);
2460 rtx fma2 = gen_reg_rtx (<MODE>mode);
2461 rtx fma3 = gen_reg_rtx (<MODE>mode);
2462 rtx fma4 = gen_reg_rtx (<MODE>mode);
2463 emit_insn (gen_mul<mode>3 (div_est, num, rcp));
2464 emit_insn (gen_fma<mode>4_negop2 (fma2, div_est, denom, num));
2465 emit_insn (gen_fma<mode>4 (fma3, fma2, rcp, div_est));
2466 emit_insn (gen_fma<mode>4_negop2 (fma4, fma3, denom, num));
2467 emit_insn (gen_fma<mode>4 (operands[0], fma4, rcp, fma3));
2468 }
3d6275e3
AS
2469
2470 DONE;
2471 })
2472
2473;; }}}
2474;; {{{ Int/FP conversions
2475
2476(define_mode_iterator CVT_FROM_MODE [HI SI HF SF DF])
2477(define_mode_iterator CVT_TO_MODE [HI SI HF SF DF])
2478
3d66c777
AS
2479(define_mode_iterator VCVT_MODE [V64HI V64SI V64HF V64SF V64DF])
2480(define_mode_iterator VCVT_FMODE [V64HF V64SF V64DF])
2481(define_mode_iterator VCVT_IMODE [V64HI V64SI])
3d6275e3
AS
2482
2483(define_code_iterator cvt_op [fix unsigned_fix
2484 float unsigned_float
2485 float_extend float_truncate])
2486(define_code_attr cvt_name [(fix "fix_trunc") (unsigned_fix "fixuns_trunc")
2487 (float "float") (unsigned_float "floatuns")
2488 (float_extend "extend") (float_truncate "trunc")])
2489(define_code_attr cvt_operands [(fix "%i0%i1") (unsigned_fix "%u0%i1")
2490 (float "%i0%i1") (unsigned_float "%i0%u1")
2491 (float_extend "%i0%i1")
2492 (float_truncate "%i0%i1")])
2493
2494(define_insn "<cvt_name><CVT_FROM_MODE:mode><CVT_TO_MODE:mode>2"
2495 [(set (match_operand:CVT_TO_MODE 0 "register_operand" "= v")
2496 (cvt_op:CVT_TO_MODE
2497 (match_operand:CVT_FROM_MODE 1 "gcn_alu_operand" "vSvB")))]
2498 "gcn_valid_cvt_p (<CVT_FROM_MODE:MODE>mode, <CVT_TO_MODE:MODE>mode,
2499 <cvt_name>_cvt)"
2500 "v_cvt<cvt_operands>\t%0, %1"
2501 [(set_attr "type" "vop1")
2502 (set_attr "length" "8")])
2503
3d66c777
AS
2504(define_insn "<cvt_name><VCVT_MODE:mode><VCVT_FMODE:mode>2<exec>"
2505 [(set (match_operand:VCVT_FMODE 0 "register_operand" "= v")
2506 (cvt_op:VCVT_FMODE
2507 (match_operand:VCVT_MODE 1 "gcn_alu_operand" "vSvB")))]
2508 "gcn_valid_cvt_p (<VCVT_MODE:MODE>mode, <VCVT_FMODE:MODE>mode,
2509 <cvt_name>_cvt)"
2510 "v_cvt<cvt_operands>\t%0, %1"
2511 [(set_attr "type" "vop1")
2512 (set_attr "length" "8")])
2513
2514(define_insn "<cvt_name><VCVT_FMODE:mode><VCVT_IMODE:mode>2<exec>"
2515 [(set (match_operand:VCVT_IMODE 0 "register_operand" "= v")
2516 (cvt_op:VCVT_IMODE
2517 (match_operand:VCVT_FMODE 1 "gcn_alu_operand" "vSvB")))]
2518 "gcn_valid_cvt_p (<VCVT_FMODE:MODE>mode, <VCVT_IMODE:MODE>mode,
3d6275e3
AS
2519 <cvt_name>_cvt)"
2520 "v_cvt<cvt_operands>\t%0, %1"
2521 [(set_attr "type" "vop1")
2522 (set_attr "length" "8")])
2523
2524;; }}}
2525;; {{{ Int/int conversions
2526
3d66c777
AS
2527(define_code_iterator zero_convert [truncate zero_extend])
2528(define_code_attr convop [
2529 (sign_extend "extend")
2530 (zero_extend "zero_extend")
2531 (truncate "trunc")])
2532
03876953
AS
2533(define_insn "<convop><V_INT_1REG_ALT:mode><V_INT_1REG:mode>2<exec>"
2534 [(set (match_operand:V_INT_1REG 0 "register_operand" "=v")
2535 (zero_convert:V_INT_1REG
2536 (match_operand:V_INT_1REG_ALT 1 "gcn_alu_operand" " v")))]
3d66c777 2537 ""
03876953 2538 "v_mov_b32_sdwa\t%0, %1 dst_sel:<V_INT_1REG:sdwa> dst_unused:UNUSED_PAD src0_sel:<V_INT_1REG_ALT:sdwa>"
3d66c777
AS
2539 [(set_attr "type" "vop_sdwa")
2540 (set_attr "length" "8")])
2541
03876953
AS
2542(define_insn "extend<V_INT_1REG_ALT:mode><V_INT_1REG:mode>2<exec>"
2543 [(set (match_operand:V_INT_1REG 0 "register_operand" "=v")
2544 (sign_extend:V_INT_1REG
2545 (match_operand:V_INT_1REG_ALT 1 "gcn_alu_operand" " v")))]
3d66c777 2546 ""
03876953 2547 "v_mov_b32_sdwa\t%0, sext(%1) src0_sel:<V_INT_1REG_ALT:sdwa>"
3d66c777
AS
2548 [(set_attr "type" "vop_sdwa")
2549 (set_attr "length" "8")])
2550
3d6275e3
AS
2551;; GCC can already do these for scalar types, but not for vector types.
2552;; Unfortunately you can't just do SUBREG on a vector to select the low part,
2553;; so there must be a few tricks here.
2554
1165109b 2555(define_insn_and_split "trunc<vndi><mode>2"
03876953
AS
2556 [(set (match_operand:V_INT_1REG 0 "register_operand" "=v")
2557 (truncate:V_INT_1REG
1165109b 2558 (match_operand:<VnDI> 1 "gcn_alu_operand" " v")))]
3d6275e3
AS
2559 ""
2560 "#"
2561 "reload_completed"
3d66c777 2562 [(const_int 0)]
3d6275e3 2563 {
1165109b 2564 rtx inlo = gcn_operand_part (<VnDI>mode, operands[1], 0);
3d66c777
AS
2565 rtx out = operands[0];
2566
1165109b
AS
2567 if (<MODE>mode != <VnSI>mode)
2568 emit_insn (gen_trunc<vnsi><mode>2 (out, inlo));
3d66c777
AS
2569 else
2570 emit_move_insn (out, inlo);
3d6275e3
AS
2571 }
2572 [(set_attr "type" "vop2")
3d66c777
AS
2573 (set_attr "length" "4")])
2574
1165109b 2575(define_insn_and_split "trunc<vndi><mode>2_exec"
03876953
AS
2576 [(set (match_operand:V_INT_1REG 0 "register_operand" "=v")
2577 (vec_merge:V_INT_1REG
2578 (truncate:V_INT_1REG
1165109b 2579 (match_operand:<VnDI> 1 "gcn_alu_operand" " v"))
03876953
AS
2580 (match_operand:V_INT_1REG 2 "gcn_alu_or_unspec_operand" "U0")
2581 (match_operand:DI 3 "gcn_exec_operand" " e")))]
3d6275e3
AS
2582 ""
2583 "#"
2584 "reload_completed"
3d66c777 2585 [(const_int 0)]
3d6275e3 2586 {
3d66c777 2587 rtx out = operands[0];
1165109b 2588 rtx inlo = gcn_operand_part (<VnDI>mode, operands[1], 0);
3d66c777
AS
2589 rtx merge = operands[2];
2590 rtx exec = operands[3];
2591
1165109b
AS
2592 if (<MODE>mode != <VnSI>mode)
2593 emit_insn (gen_trunc<vnsi><mode>2_exec (out, inlo, merge, exec));
3d66c777 2594 else
b7886845 2595 emit_insn (gen_mov<mode>_exec (out, inlo, merge, exec));
3d6275e3
AS
2596 }
2597 [(set_attr "type" "vop2")
3d66c777
AS
2598 (set_attr "length" "4")])
2599
1165109b
AS
2600(define_insn_and_split "<convop><mode><vndi>2"
2601 [(set (match_operand:<VnDI> 0 "register_operand" "=v")
2602 (any_extend:<VnDI>
03876953 2603 (match_operand:V_INT_1REG 1 "gcn_alu_operand" " v")))]
3d66c777
AS
2604 ""
2605 "#"
2606 "reload_completed"
2607 [(const_int 0)]
2608 {
1165109b
AS
2609 rtx outlo = gcn_operand_part (<VnDI>mode, operands[0], 0);
2610 rtx outhi = gcn_operand_part (<VnDI>mode, operands[0], 1);
3d66c777
AS
2611 rtx in = operands[1];
2612
1165109b
AS
2613 if (<MODE>mode != <VnSI>mode)
2614 emit_insn (gen_<convop><mode><vnsi>2 (outlo, in));
3d66c777
AS
2615 else
2616 emit_move_insn (outlo, in);
2617 if ('<su>' == 's')
1165109b 2618 emit_insn (gen_ashr<vnsi>3 (outhi, outlo, GEN_INT (31)));
3d66c777 2619 else
1165109b 2620 emit_insn (gen_vec_duplicate<vnsi> (outhi, const0_rtx));
3d66c777
AS
2621 }
2622 [(set_attr "type" "mult")
2623 (set_attr "length" "12")])
2624
1165109b
AS
2625(define_insn_and_split "<convop><mode><vndi>2_exec"
2626 [(set (match_operand:<VnDI> 0 "register_operand" "=v")
2627 (vec_merge:<VnDI>
2628 (any_extend:<VnDI>
03876953 2629 (match_operand:V_INT_1REG 1 "gcn_alu_operand" " v"))
1165109b 2630 (match_operand:<VnDI> 2 "gcn_alu_or_unspec_operand" "U0")
03876953 2631 (match_operand:DI 3 "gcn_exec_operand" " e")))]
3d66c777
AS
2632 ""
2633 "#"
2634 "reload_completed"
2635 [(const_int 0)]
2636 {
1165109b
AS
2637 rtx outlo = gcn_operand_part (<VnDI>mode, operands[0], 0);
2638 rtx outhi = gcn_operand_part (<VnDI>mode, operands[0], 1);
3d66c777 2639 rtx in = operands[1];
1165109b
AS
2640 rtx mergelo = gcn_operand_part (<VnDI>mode, operands[2], 0);
2641 rtx mergehi = gcn_operand_part (<VnDI>mode, operands[2], 1);
3d66c777
AS
2642 rtx exec = operands[3];
2643
1165109b
AS
2644 if (<MODE>mode != <VnSI>mode)
2645 emit_insn (gen_<convop><mode><vnsi>2_exec (outlo, in, mergelo, exec));
3d66c777 2646 else
b7886845 2647 emit_insn (gen_mov<mode>_exec (outlo, in, mergelo, exec));
3d66c777 2648 if ('<su>' == 's')
1165109b
AS
2649 emit_insn (gen_ashr<vnsi>3_exec (outhi, outlo, GEN_INT (31), mergehi,
2650 exec));
3d66c777 2651 else
1165109b
AS
2652 emit_insn (gen_vec_duplicate<vnsi>_exec (outhi, const0_rtx, mergehi,
2653 exec));
3d66c777
AS
2654 }
2655 [(set_attr "type" "mult")
2656 (set_attr "length" "12")])
3d6275e3
AS
2657
2658;; }}}
2659;; {{{ Vector comparison/merge
2660
2661(define_insn "vec_cmp<mode>di"
2662 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e, e,Sg,Sg")
dbde9e2d 2663 (match_operator:DI 1 "gcn_fp_compare_operator"
03876953
AS
2664 [(match_operand:V_noQI 2 "gcn_alu_operand" "vSv, B,vSv, B, v,vA")
2665 (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v, v,vA, v")]))
3d6275e3
AS
2666 (clobber (match_scratch:DI 4 "= X, X, cV,cV, X, X"))]
2667 ""
2668 "@
2669 v_cmp%E1\tvcc, %2, %3
2670 v_cmp%E1\tvcc, %2, %3
2671 v_cmpx%E1\tvcc, %2, %3
2672 v_cmpx%E1\tvcc, %2, %3
2673 v_cmp%E1\t%0, %2, %3
2674 v_cmp%E1\t%0, %2, %3"
2675 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a")
2676 (set_attr "length" "4,8,4,8,8,8")])
2677
2678(define_expand "vec_cmpu<mode>di"
2679 [(match_operand:DI 0 "register_operand")
f4d4a406 2680 (match_operator 1 "gcn_compare_operator"
03876953
AS
2681 [(match_operand:V_INT_noQI 2 "gcn_alu_operand")
2682 (match_operand:V_INT_noQI 3 "gcn_vop3_operand")])]
3d6275e3
AS
2683 ""
2684 {
2685 /* Unsigned comparisons use the same patterns as signed comparisons,
2686 except that they use unsigned operators (e.g. LTU vs LT).
2687 The '%E1' directive then does the Right Thing. */
2688 emit_insn (gen_vec_cmp<mode>di (operands[0], operands[1], operands[2],
2689 operands[3]));
2690 DONE;
2691 })
2692
0e159efc 2693; There's no instruction for 8-bit vector comparison, so we need to extend.
1165109b 2694(define_expand "vec_cmp<u><mode>di"
0e159efc 2695 [(match_operand:DI 0 "register_operand")
f4d4a406 2696 (match_operator 1 "gcn_compare_operator"
1165109b
AS
2697 [(any_extend:<VnSI> (match_operand:V_QI 2 "gcn_alu_operand"))
2698 (any_extend:<VnSI> (match_operand:V_QI 3 "gcn_vop3_operand"))])]
0e159efc
AS
2699 "can_create_pseudo_p ()"
2700 {
1165109b
AS
2701 rtx sitmp1 = gen_reg_rtx (<VnSI>mode);
2702 rtx sitmp2 = gen_reg_rtx (<VnSI>mode);
0e159efc 2703
1165109b
AS
2704 emit_insn (gen_<expander><mode><vnsi>2 (sitmp1, operands[2]));
2705 emit_insn (gen_<expander><mode><vnsi>2 (sitmp2, operands[3]));
2706 emit_insn (gen_vec_cmp<vnsi>di (operands[0], operands[1], sitmp1, sitmp2));
0e159efc
AS
2707 DONE;
2708 })
2709
3d6275e3
AS
2710(define_insn "vec_cmp<mode>di_exec"
2711 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e, e,Sg,Sg")
2712 (and:DI
f4d4a406 2713 (match_operator 1 "gcn_fp_compare_operator"
03876953
AS
2714 [(match_operand:V_noQI 2 "gcn_alu_operand" "vSv, B,vSv, B, v,vA")
2715 (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v, v,vA, v")])
3d6275e3
AS
2716 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e, e, e, e")))
2717 (clobber (match_scratch:DI 5 "= X, X, cV,cV, X, X"))]
2718 ""
2719 "@
2720 v_cmp%E1\tvcc, %2, %3
2721 v_cmp%E1\tvcc, %2, %3
2722 v_cmpx%E1\tvcc, %2, %3
2723 v_cmpx%E1\tvcc, %2, %3
2724 v_cmp%E1\t%0, %2, %3
2725 v_cmp%E1\t%0, %2, %3"
2726 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a")
2727 (set_attr "length" "4,8,4,8,8,8")])
2728
0e159efc
AS
2729(define_expand "vec_cmpu<mode>di_exec"
2730 [(match_operand:DI 0 "register_operand")
f4d4a406 2731 (match_operator 1 "gcn_compare_operator"
03876953
AS
2732 [(match_operand:V_INT_noQI 2 "gcn_alu_operand")
2733 (match_operand:V_INT_noQI 3 "gcn_vop3_operand")])
0e159efc
AS
2734 (match_operand:DI 4 "gcn_exec_reg_operand")]
2735 ""
2736 {
2737 /* Unsigned comparisons use the same patterns as signed comparisons,
2738 except that they use unsigned operators (e.g. LTU vs LT).
2739 The '%E1' directive then does the Right Thing. */
2740 emit_insn (gen_vec_cmpu<mode>di_exec (operands[0], operands[1],
2741 operands[2], operands[3],
2742 operands[4]));
2743 DONE;
2744 })
2745
1165109b 2746(define_expand "vec_cmp<u><mode>di_exec"
0e159efc 2747 [(match_operand:DI 0 "register_operand")
f4d4a406 2748 (match_operator 1 "gcn_compare_operator"
1165109b
AS
2749 [(any_extend:<VnSI> (match_operand:V_QI 2 "gcn_alu_operand"))
2750 (any_extend:<VnSI> (match_operand:V_QI 3 "gcn_vop3_operand"))])
0e159efc
AS
2751 (match_operand:DI 4 "gcn_exec_reg_operand")]
2752 "can_create_pseudo_p ()"
2753 {
1165109b
AS
2754 rtx sitmp1 = gen_reg_rtx (<VnSI>mode);
2755 rtx sitmp2 = gen_reg_rtx (<VnSI>mode);
0e159efc 2756
1165109b
AS
2757 emit_insn (gen_<expander><mode><vnsi>2_exec (sitmp1, operands[2],
2758 operands[2], operands[4]));
2759 emit_insn (gen_<expander><mode><vnsi>2_exec (sitmp2, operands[3],
2760 operands[3], operands[4]));
2761 emit_insn (gen_vec_cmp<vnsi>di_exec (operands[0], operands[1], sitmp1,
2762 sitmp2, operands[4]));
0e159efc
AS
2763 DONE;
2764 })
2765
3d6275e3
AS
2766(define_insn "vec_cmp<mode>di_dup"
2767 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e,e,Sg")
dbde9e2d 2768 (match_operator:DI 1 "gcn_fp_compare_operator"
03876953 2769 [(vec_duplicate:V_noQI
3d6275e3
AS
2770 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand"
2771 " Sv, B,Sv,B, A"))
03876953 2772 (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v,v, v")]))
3d6275e3
AS
2773 (clobber (match_scratch:DI 4 "= X,X,cV,cV, X"))]
2774 ""
2775 "@
2776 v_cmp%E1\tvcc, %2, %3
2777 v_cmp%E1\tvcc, %2, %3
2778 v_cmpx%E1\tvcc, %2, %3
2779 v_cmpx%E1\tvcc, %2, %3
2780 v_cmp%E1\t%0, %2, %3"
2781 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a")
2782 (set_attr "length" "4,8,4,8,8")])
2783
2784(define_insn "vec_cmp<mode>di_dup_exec"
2785 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e,e,Sg")
2786 (and:DI
f4d4a406 2787 (match_operator 1 "gcn_fp_compare_operator"
03876953 2788 [(vec_duplicate:V_noQI
3d6275e3
AS
2789 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand"
2790 " Sv, B,Sv,B, A"))
03876953 2791 (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v,v, v")])
3d6275e3
AS
2792 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e,e, e")))
2793 (clobber (match_scratch:DI 5 "= X,X,cV,cV, X"))]
2794 ""
2795 "@
2796 v_cmp%E1\tvcc, %2, %3
2797 v_cmp%E1\tvcc, %2, %3
2798 v_cmpx%E1\tvcc, %2, %3
2799 v_cmpx%E1\tvcc, %2, %3
2800 v_cmp%E1\t%0, %2, %3"
2801 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a")
2802 (set_attr "length" "4,8,4,8,8")])
2803
2804(define_expand "vcond_mask_<mode>di"
2805 [(parallel
03876953
AS
2806 [(set (match_operand:V_ALL 0 "register_operand" "")
2807 (vec_merge:V_ALL
2808 (match_operand:V_ALL 1 "gcn_vop3_operand" "")
2809 (match_operand:V_ALL 2 "gcn_alu_operand" "")
2b99bed8 2810 (match_operand:DI 3 "register_operand" "")))
1165109b 2811 (clobber (scratch:<VnDI>))])]
3d6275e3
AS
2812 ""
2813 "")
2814
03876953
AS
2815(define_expand "vcond<V_ALL:mode><V_ALL_ALT:mode>"
2816 [(match_operand:V_ALL 0 "register_operand")
2817 (match_operand:V_ALL 1 "gcn_vop3_operand")
2818 (match_operand:V_ALL 2 "gcn_alu_operand")
f4d4a406 2819 (match_operator 3 "gcn_fp_compare_operator"
03876953
AS
2820 [(match_operand:V_ALL_ALT 4 "gcn_alu_operand")
2821 (match_operand:V_ALL_ALT 5 "gcn_vop3_operand")])]
3d6275e3
AS
2822 ""
2823 {
2824 rtx tmp = gen_reg_rtx (DImode);
03876953 2825 emit_insn (gen_vec_cmp<V_ALL_ALT:mode>di
96eb1765 2826 (tmp, operands[3], operands[4], operands[5]));
03876953 2827 emit_insn (gen_vcond_mask_<V_ALL:mode>di
96eb1765 2828 (operands[0], operands[1], operands[2], tmp));
3d6275e3
AS
2829 DONE;
2830 })
2831
03876953
AS
2832(define_expand "vcond<V_ALL:mode><V_ALL_ALT:mode>_exec"
2833 [(match_operand:V_ALL 0 "register_operand")
2834 (match_operand:V_ALL 1 "gcn_vop3_operand")
2835 (match_operand:V_ALL 2 "gcn_alu_operand")
f4d4a406 2836 (match_operator 3 "gcn_fp_compare_operator"
03876953
AS
2837 [(match_operand:V_ALL_ALT 4 "gcn_alu_operand")
2838 (match_operand:V_ALL_ALT 5 "gcn_vop3_operand")])
3d6275e3
AS
2839 (match_operand:DI 6 "gcn_exec_reg_operand" "e")]
2840 ""
2841 {
2842 rtx tmp = gen_reg_rtx (DImode);
03876953 2843 emit_insn (gen_vec_cmp<V_ALL_ALT:mode>di_exec
96eb1765 2844 (tmp, operands[3], operands[4], operands[5], operands[6]));
03876953 2845 emit_insn (gen_vcond_mask_<V_ALL:mode>di
96eb1765 2846 (operands[0], operands[1], operands[2], tmp));
3d6275e3
AS
2847 DONE;
2848 })
2849
03876953
AS
2850(define_expand "vcondu<V_ALL:mode><V_INT:mode>"
2851 [(match_operand:V_ALL 0 "register_operand")
2852 (match_operand:V_ALL 1 "gcn_vop3_operand")
2853 (match_operand:V_ALL 2 "gcn_alu_operand")
f4d4a406 2854 (match_operator 3 "gcn_fp_compare_operator"
03876953
AS
2855 [(match_operand:V_INT 4 "gcn_alu_operand")
2856 (match_operand:V_INT 5 "gcn_vop3_operand")])]
3d6275e3
AS
2857 ""
2858 {
2859 rtx tmp = gen_reg_rtx (DImode);
03876953 2860 emit_insn (gen_vec_cmpu<V_INT:mode>di
96eb1765 2861 (tmp, operands[3], operands[4], operands[5]));
03876953 2862 emit_insn (gen_vcond_mask_<V_ALL:mode>di
96eb1765 2863 (operands[0], operands[1], operands[2], tmp));
3d6275e3
AS
2864 DONE;
2865 })
2866
03876953
AS
2867(define_expand "vcondu<V_ALL:mode><V_INT:mode>_exec"
2868 [(match_operand:V_ALL 0 "register_operand")
2869 (match_operand:V_ALL 1 "gcn_vop3_operand")
2870 (match_operand:V_ALL 2 "gcn_alu_operand")
f4d4a406 2871 (match_operator 3 "gcn_fp_compare_operator"
03876953
AS
2872 [(match_operand:V_INT 4 "gcn_alu_operand")
2873 (match_operand:V_INT 5 "gcn_vop3_operand")])
3d6275e3
AS
2874 (match_operand:DI 6 "gcn_exec_reg_operand" "e")]
2875 ""
2876 {
2877 rtx tmp = gen_reg_rtx (DImode);
03876953 2878 emit_insn (gen_vec_cmpu<V_INT:mode>di_exec
96eb1765 2879 (tmp, operands[3], operands[4], operands[5], operands[6]));
03876953 2880 emit_insn (gen_vcond_mask_<V_ALL:mode>di
96eb1765 2881 (operands[0], operands[1], operands[2], tmp));
3d6275e3
AS
2882 DONE;
2883 })
2884
2885;; }}}
2886;; {{{ Fully masked loop support
2887
2888(define_expand "while_ultsidi"
2889 [(match_operand:DI 0 "register_operand")
2890 (match_operand:SI 1 "")
2891 (match_operand:SI 2 "")]
2892 ""
2893 {
2894 if (GET_CODE (operands[1]) != CONST_INT
2895 || GET_CODE (operands[2]) != CONST_INT)
2896 {
2897 rtx _0_1_2_3 = gen_rtx_REG (V64SImode, VGPR_REGNO (1));
2898 rtx tmp = _0_1_2_3;
2899 if (GET_CODE (operands[1]) != CONST_INT
2900 || INTVAL (operands[1]) != 0)
2901 {
2902 tmp = gen_reg_rtx (V64SImode);
2903 emit_insn (gen_addv64si3_dup (tmp, _0_1_2_3, operands[1]));
2904 }
2905 emit_insn (gen_vec_cmpv64sidi_dup (operands[0],
2906 gen_rtx_GT (VOIDmode, 0, 0),
2907 operands[2], tmp));
2908 }
2909 else
2910 {
2911 HOST_WIDE_INT diff = INTVAL (operands[2]) - INTVAL (operands[1]);
2912 HOST_WIDE_INT mask = (diff >= 64 ? -1
2913 : ~((unsigned HOST_WIDE_INT)-1 << diff));
2914 emit_move_insn (operands[0], gen_rtx_CONST_INT (VOIDmode, mask));
2915 }
2916 DONE;
2917 })
2918
2919(define_expand "maskload<mode>di"
03876953
AS
2920 [(match_operand:V_ALL 0 "register_operand")
2921 (match_operand:V_ALL 1 "memory_operand")
3d6275e3
AS
2922 (match_operand 2 "")]
2923 ""
2924 {
2925 rtx exec = force_reg (DImode, operands[2]);
2926 rtx addr = gcn_expand_scalar_to_vector_address
1165109b 2927 (<MODE>mode, exec, operands[1], gen_rtx_SCRATCH (<VnDI>mode));
3d6275e3
AS
2928 rtx as = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
2929 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
95607c12
AS
2930
2931 /* Masked lanes are required to hold zero. */
2932 emit_move_insn (operands[0], gcn_vec_constant (<MODE>mode, 0));
2933
2934 emit_insn (gen_gather<mode>_expr_exec (operands[0], addr, as, v,
2935 operands[0], exec));
3d6275e3
AS
2936 DONE;
2937 })
2938
2939(define_expand "maskstore<mode>di"
03876953
AS
2940 [(match_operand:V_ALL 0 "memory_operand")
2941 (match_operand:V_ALL 1 "register_operand")
3d6275e3
AS
2942 (match_operand 2 "")]
2943 ""
2944 {
2945 rtx exec = force_reg (DImode, operands[2]);
2946 rtx addr = gcn_expand_scalar_to_vector_address
1165109b 2947 (<MODE>mode, exec, operands[0], gen_rtx_SCRATCH (<VnDI>mode));
3d6275e3
AS
2948 rtx as = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
2949 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
2950 emit_insn (gen_scatter<mode>_expr_exec (addr, operands[1], as, v, exec));
2951 DONE;
2952 })
2953
1165109b 2954(define_expand "mask_gather_load<mode><vnsi>"
03876953 2955 [(match_operand:V_ALL 0 "register_operand")
3d6275e3 2956 (match_operand:DI 1 "register_operand")
1165109b 2957 (match_operand:<VnSI> 2 "register_operand")
3d6275e3
AS
2958 (match_operand 3 "immediate_operand")
2959 (match_operand:SI 4 "gcn_alu_operand")
2960 (match_operand:DI 5 "")]
2961 ""
2962 {
2963 rtx exec = force_reg (DImode, operands[5]);
2964
95607c12
AS
2965 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1],
2966 operands[2], operands[4],
2967 INTVAL (operands[3]), exec);
2968
2969 /* Masked lanes are required to hold zero. */
2970 emit_move_insn (operands[0], gcn_vec_constant (<MODE>mode, 0));
2971
1165109b 2972 if (GET_MODE (addr) == <VnDI>mode)
95607c12
AS
2973 emit_insn (gen_gather<mode>_insn_1offset_exec (operands[0], addr,
2974 const0_rtx, const0_rtx,
2975 const0_rtx, operands[0],
2976 exec));
2977 else
2978 emit_insn (gen_gather<mode>_insn_2offsets_exec (operands[0], operands[1],
2979 addr, const0_rtx,
2980 const0_rtx, const0_rtx,
2981 operands[0], exec));
3d6275e3
AS
2982 DONE;
2983 })
2984
1165109b 2985(define_expand "mask_scatter_store<mode><vnsi>"
3d6275e3 2986 [(match_operand:DI 0 "register_operand")
1165109b 2987 (match_operand:<VnSI> 1 "register_operand")
3d6275e3
AS
2988 (match_operand 2 "immediate_operand")
2989 (match_operand:SI 3 "gcn_alu_operand")
03876953 2990 (match_operand:V_ALL 4 "register_operand")
3d6275e3
AS
2991 (match_operand:DI 5 "")]
2992 ""
2993 {
2994 rtx exec = force_reg (DImode, operands[5]);
2995
b5fb73b6
AS
2996 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0],
2997 operands[1], operands[3],
2998 INTVAL (operands[2]), exec);
3d6275e3 2999
1165109b 3000 if (GET_MODE (addr) == <VnDI>mode)
b5fb73b6
AS
3001 emit_insn (gen_scatter<mode>_insn_1offset_exec (addr, const0_rtx,
3002 operands[4], const0_rtx,
3003 const0_rtx,
3004 exec));
3005 else
3006 emit_insn (gen_scatter<mode>_insn_2offsets_exec (operands[0], addr,
3007 const0_rtx, operands[4],
3008 const0_rtx, const0_rtx,
3009 exec));
3d6275e3
AS
3010 DONE;
3011 })
3012
5a80a6c3 3013(define_code_iterator cond_op [plus minus mult])
3d6275e3
AS
3014
3015(define_expand "cond_<expander><mode>"
03876953 3016 [(match_operand:V_ALL 0 "register_operand")
3d6275e3 3017 (match_operand:DI 1 "register_operand")
03876953
AS
3018 (cond_op:V_ALL
3019 (match_operand:V_ALL 2 "gcn_alu_operand")
3020 (match_operand:V_ALL 3 "gcn_alu_operand"))
3021 (match_operand:V_ALL 4 "register_operand")]
3d6275e3
AS
3022 ""
3023 {
3024 operands[1] = force_reg (DImode, operands[1]);
3025 operands[2] = force_reg (<MODE>mode, operands[2]);
3026
3027 emit_insn (gen_<expander><mode>3_exec (operands[0], operands[2],
3028 operands[3], operands[4],
3029 operands[1]));
3030 DONE;
3031 })
3032
5a80a6c3 3033;; TODO smin umin smax umax
3d6275e3
AS
3034(define_code_iterator cond_bitop [and ior xor])
3035
3036(define_expand "cond_<expander><mode>"
03876953 3037 [(match_operand:V_INT 0 "register_operand")
3d6275e3 3038 (match_operand:DI 1 "register_operand")
03876953
AS
3039 (cond_bitop:V_INT
3040 (match_operand:V_INT 2 "gcn_alu_operand")
3041 (match_operand:V_INT 3 "gcn_alu_operand"))
3042 (match_operand:V_INT 4 "register_operand")]
3d6275e3
AS
3043 ""
3044 {
3045 operands[1] = force_reg (DImode, operands[1]);
3046 operands[2] = force_reg (<MODE>mode, operands[2]);
3047
3048 emit_insn (gen_<expander><mode>3_exec (operands[0], operands[2],
3049 operands[3], operands[4],
3050 operands[1]));
3051 DONE;
3052 })
3053
3054;; }}}
3055;; {{{ Vector reductions
3056
3057(define_int_iterator REDUC_UNSPEC [UNSPEC_SMIN_DPP_SHR UNSPEC_SMAX_DPP_SHR
3058 UNSPEC_UMIN_DPP_SHR UNSPEC_UMAX_DPP_SHR
3059 UNSPEC_PLUS_DPP_SHR
3060 UNSPEC_AND_DPP_SHR
3061 UNSPEC_IOR_DPP_SHR UNSPEC_XOR_DPP_SHR])
3062
3063(define_int_iterator REDUC_2REG_UNSPEC [UNSPEC_PLUS_DPP_SHR
3064 UNSPEC_AND_DPP_SHR
3065 UNSPEC_IOR_DPP_SHR UNSPEC_XOR_DPP_SHR])
3066
3067; FIXME: Isn't there a better way of doing this?
3068(define_int_attr reduc_unspec [(UNSPEC_SMIN_DPP_SHR "UNSPEC_SMIN_DPP_SHR")
3069 (UNSPEC_SMAX_DPP_SHR "UNSPEC_SMAX_DPP_SHR")
3070 (UNSPEC_UMIN_DPP_SHR "UNSPEC_UMIN_DPP_SHR")
3071 (UNSPEC_UMAX_DPP_SHR "UNSPEC_UMAX_DPP_SHR")
3072 (UNSPEC_PLUS_DPP_SHR "UNSPEC_PLUS_DPP_SHR")
3073 (UNSPEC_AND_DPP_SHR "UNSPEC_AND_DPP_SHR")
3074 (UNSPEC_IOR_DPP_SHR "UNSPEC_IOR_DPP_SHR")
3075 (UNSPEC_XOR_DPP_SHR "UNSPEC_XOR_DPP_SHR")])
3076
3077(define_int_attr reduc_op [(UNSPEC_SMIN_DPP_SHR "smin")
3078 (UNSPEC_SMAX_DPP_SHR "smax")
3079 (UNSPEC_UMIN_DPP_SHR "umin")
3080 (UNSPEC_UMAX_DPP_SHR "umax")
3081 (UNSPEC_PLUS_DPP_SHR "plus")
3082 (UNSPEC_AND_DPP_SHR "and")
3083 (UNSPEC_IOR_DPP_SHR "ior")
3084 (UNSPEC_XOR_DPP_SHR "xor")])
3085
3086(define_int_attr reduc_insn [(UNSPEC_SMIN_DPP_SHR "v_min%i0")
3087 (UNSPEC_SMAX_DPP_SHR "v_max%i0")
3088 (UNSPEC_UMIN_DPP_SHR "v_min%u0")
3089 (UNSPEC_UMAX_DPP_SHR "v_max%u0")
a5879399
AS
3090 (UNSPEC_PLUS_DPP_SHR "v_add%U0")
3091 (UNSPEC_AND_DPP_SHR "v_and%B0")
3092 (UNSPEC_IOR_DPP_SHR "v_or%B0")
3093 (UNSPEC_XOR_DPP_SHR "v_xor%B0")])
3d6275e3
AS
3094
3095(define_expand "reduc_<reduc_op>_scal_<mode>"
3096 [(set (match_operand:<SCALAR_MODE> 0 "register_operand")
3097 (unspec:<SCALAR_MODE>
03876953 3098 [(match_operand:V_ALL 1 "register_operand")]
3d6275e3
AS
3099 REDUC_UNSPEC))]
3100 ""
3101 {
3102 rtx tmp = gcn_expand_reduc_scalar (<MODE>mode, operands[1],
3103 <reduc_unspec>);
3104
3105 /* The result of the reduction is in lane 63 of tmp. */
3106 emit_insn (gen_mov_from_lane63_<mode> (operands[0], tmp));
3107
3108 DONE;
3109 })
3110
bf628a97
AS
3111;; Warning: This "-ffast-math" implementation converts in-order reductions
3112;; into associative reductions. It's also used where OpenMP or
3113;; OpenACC paralellization has already broken the in-order semantics.
3114(define_expand "fold_left_plus_<mode>"
3115 [(match_operand:<SCALAR_MODE> 0 "register_operand")
3116 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand")
3117 (match_operand:V_FP 2 "gcn_alu_operand")]
3118 "can_create_pseudo_p ()
3119 && (flag_openacc || flag_openmp
3120 || flag_associative_math)"
3121 {
3122 rtx dest = operands[0];
3123 rtx scalar = operands[1];
3124 rtx vector = operands[2];
3125 rtx tmp = gen_reg_rtx (<SCALAR_MODE>mode);
3126
3127 emit_insn (gen_reduc_plus_scal_<mode> (tmp, vector));
3128 emit_insn (gen_add<scalar_mode>3 (dest, scalar, tmp));
3129 DONE;
3130 })
3d6275e3
AS
3131
3132(define_insn "*<reduc_op>_dpp_shr_<mode>"
03876953
AS
3133 [(set (match_operand:V_1REG 0 "register_operand" "=v")
3134 (unspec:V_1REG
3135 [(match_operand:V_1REG 1 "register_operand" "v")
3136 (match_operand:V_1REG 2 "register_operand" "v")
3137 (match_operand:SI 3 "const_int_operand" "n")]
3d6275e3 3138 REDUC_UNSPEC))]
a5879399 3139 ; GCN3 requires a carry out, GCN5 not
3d6275e3
AS
3140 "!(TARGET_GCN3 && SCALAR_INT_MODE_P (<SCALAR_MODE>mode)
3141 && <reduc_unspec> == UNSPEC_PLUS_DPP_SHR)"
3142 {
3143 return gcn_expand_dpp_shr_insn (<MODE>mode, "<reduc_insn>",
3144 <reduc_unspec>, INTVAL (operands[3]));
3145 }
3146 [(set_attr "type" "vop_dpp")
3147 (set_attr "length" "8")])
3148
1165109b
AS
3149(define_insn_and_split "*<reduc_op>_dpp_shr_<mode>"
3150 [(set (match_operand:V_DI 0 "register_operand" "=v")
3151 (unspec:V_DI
3152 [(match_operand:V_DI 1 "register_operand" "v")
3153 (match_operand:V_DI 2 "register_operand" "v")
3154 (match_operand:SI 3 "const_int_operand" "n")]
3d6275e3
AS
3155 REDUC_2REG_UNSPEC))]
3156 ""
3157 "#"
3158 "reload_completed"
3159 [(set (match_dup 4)
1165109b 3160 (unspec:<VnSI>
3d6275e3
AS
3161 [(match_dup 6) (match_dup 8) (match_dup 3)] REDUC_2REG_UNSPEC))
3162 (set (match_dup 5)
1165109b 3163 (unspec:<VnSI>
3d6275e3
AS
3164 [(match_dup 7) (match_dup 9) (match_dup 3)] REDUC_2REG_UNSPEC))]
3165 {
1165109b
AS
3166 operands[4] = gcn_operand_part (<MODE>mode, operands[0], 0);
3167 operands[5] = gcn_operand_part (<MODE>mode, operands[0], 1);
3168 operands[6] = gcn_operand_part (<MODE>mode, operands[1], 0);
3169 operands[7] = gcn_operand_part (<MODE>mode, operands[1], 1);
3170 operands[8] = gcn_operand_part (<MODE>mode, operands[2], 0);
3171 operands[9] = gcn_operand_part (<MODE>mode, operands[2], 1);
3d6275e3
AS
3172 }
3173 [(set_attr "type" "vmult")
3174 (set_attr "length" "16")])
3175
3176; Special cases for addition.
3177
a5879399 3178(define_insn "*plus_carry_dpp_shr_<mode>"
03876953
AS
3179 [(set (match_operand:V_INT_1REG 0 "register_operand" "=v")
3180 (unspec:V_INT_1REG
3181 [(match_operand:V_INT_1REG 1 "register_operand" "v")
3182 (match_operand:V_INT_1REG 2 "register_operand" "v")
3183 (match_operand:SI 3 "const_int_operand" "n")]
3d6275e3
AS
3184 UNSPEC_PLUS_CARRY_DPP_SHR))
3185 (clobber (reg:DI VCC_REG))]
3186 ""
3187 {
1165109b 3188 return gcn_expand_dpp_shr_insn (<VnSI>mode, "v_add%^_u32",
3d6275e3
AS
3189 UNSPEC_PLUS_CARRY_DPP_SHR,
3190 INTVAL (operands[3]));
3191 }
3192 [(set_attr "type" "vop_dpp")
3193 (set_attr "length" "8")])
3194
1165109b
AS
3195(define_insn "*plus_carry_in_dpp_shr_<mode>"
3196 [(set (match_operand:V_SI 0 "register_operand" "=v")
3197 (unspec:V_SI
3198 [(match_operand:V_SI 1 "register_operand" "v")
3199 (match_operand:V_SI 2 "register_operand" "v")
3200 (match_operand:SI 3 "const_int_operand" "n")
3d6275e3
AS
3201 (match_operand:DI 4 "register_operand" "cV")]
3202 UNSPEC_PLUS_CARRY_IN_DPP_SHR))
3203 (clobber (reg:DI VCC_REG))]
3204 ""
3205 {
1165109b 3206 return gcn_expand_dpp_shr_insn (<MODE>mode, "v_addc%^_u32",
3d6275e3
AS
3207 UNSPEC_PLUS_CARRY_IN_DPP_SHR,
3208 INTVAL (operands[3]));
3209 }
3210 [(set_attr "type" "vop_dpp")
3211 (set_attr "length" "8")])
3212
1165109b
AS
3213(define_insn_and_split "*plus_carry_dpp_shr_<mode>"
3214 [(set (match_operand:V_DI 0 "register_operand" "=v")
3215 (unspec:V_DI
3216 [(match_operand:V_DI 1 "register_operand" "v")
3217 (match_operand:V_DI 2 "register_operand" "v")
3218 (match_operand:SI 3 "const_int_operand" "n")]
3d6275e3
AS
3219 UNSPEC_PLUS_CARRY_DPP_SHR))
3220 (clobber (reg:DI VCC_REG))]
3221 ""
3222 "#"
3223 "reload_completed"
3224 [(parallel [(set (match_dup 4)
1165109b 3225 (unspec:<VnSI>
3d6275e3
AS
3226 [(match_dup 6) (match_dup 8) (match_dup 3)]
3227 UNSPEC_PLUS_CARRY_DPP_SHR))
3228 (clobber (reg:DI VCC_REG))])
3229 (parallel [(set (match_dup 5)
1165109b 3230 (unspec:<VnSI>
3d6275e3
AS
3231 [(match_dup 7) (match_dup 9) (match_dup 3) (reg:DI VCC_REG)]
3232 UNSPEC_PLUS_CARRY_IN_DPP_SHR))
3233 (clobber (reg:DI VCC_REG))])]
3234 {
1165109b
AS
3235 operands[4] = gcn_operand_part (<MODE>mode, operands[0], 0);
3236 operands[5] = gcn_operand_part (<MODE>mode, operands[0], 1);
3237 operands[6] = gcn_operand_part (<MODE>mode, operands[1], 0);
3238 operands[7] = gcn_operand_part (<MODE>mode, operands[1], 1);
3239 operands[8] = gcn_operand_part (<MODE>mode, operands[2], 0);
3240 operands[9] = gcn_operand_part (<MODE>mode, operands[2], 1);
3d6275e3
AS
3241 }
3242 [(set_attr "type" "vmult")
3243 (set_attr "length" "16")])
3244
3245; Instructions to move a scalar value from lane 63 of a vector register.
3246(define_insn "mov_from_lane63_<mode>"
03876953 3247 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg,v")
3d6275e3 3248 (unspec:<SCALAR_MODE>
03876953 3249 [(match_operand:V_1REG 1 "register_operand" " v,v")]
3d6275e3
AS
3250 UNSPEC_MOV_FROM_LANE63))]
3251 ""
3252 "@
3253 v_readlane_b32\t%0, %1, 63
3254 v_mov_b32\t%0, %1 wave_ror:1"
3255 [(set_attr "type" "vop3a,vop_dpp")
3256 (set_attr "exec" "none,*")
3257 (set_attr "length" "8")])
3258
a5879399 3259(define_insn "mov_from_lane63_<mode>"
03876953 3260 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg,v")
a5879399 3261 (unspec:<SCALAR_MODE>
03876953 3262 [(match_operand:V_2REG 1 "register_operand" " v,v")]
3d6275e3
AS
3263 UNSPEC_MOV_FROM_LANE63))]
3264 ""
3265 "@
3266 v_readlane_b32\t%L0, %L1, 63\;v_readlane_b32\t%H0, %H1, 63
3267 * if (REGNO (operands[0]) <= REGNO (operands[1])) \
3268 return \"v_mov_b32\t%L0, %L1 wave_ror:1\;\" \
3269 \"v_mov_b32\t%H0, %H1 wave_ror:1\"; \
3270 else \
3271 return \"v_mov_b32\t%H0, %H1 wave_ror:1\;\" \
3272 \"v_mov_b32\t%L0, %L1 wave_ror:1\";"
3273 [(set_attr "type" "vop3a,vop_dpp")
3274 (set_attr "exec" "none,*")
3275 (set_attr "length" "8")])
3276
3277;; }}}
3278;; {{{ Miscellaneous
3279
1165109b
AS
3280(define_expand "vec_series<mode>"
3281 [(match_operand:V_SI 0 "register_operand")
3d6275e3
AS
3282 (match_operand:SI 1 "gcn_alu_operand")
3283 (match_operand:SI 2 "gcn_alu_operand")]
3284 ""
3285 {
1165109b
AS
3286 rtx tmp = gen_reg_rtx (<MODE>mode);
3287 rtx v1 = gen_rtx_REG (<MODE>mode, VGPR_REGNO (1));
3d6275e3 3288
1165109b
AS
3289 emit_insn (gen_mul<mode>3_dup (tmp, v1, operands[2]));
3290 emit_insn (gen_add<mode>3_dup (operands[0], tmp, operands[1]));
3d6275e3
AS
3291 DONE;
3292 })
3293
1165109b
AS
3294(define_expand "vec_series<mode>"
3295 [(match_operand:V_DI 0 "register_operand")
3d6275e3
AS
3296 (match_operand:DI 1 "gcn_alu_operand")
3297 (match_operand:DI 2 "gcn_alu_operand")]
3298 ""
3299 {
1165109b
AS
3300 rtx tmp = gen_reg_rtx (<MODE>mode);
3301 rtx v1 = gen_rtx_REG (<VnSI>mode, VGPR_REGNO (1));
3302 rtx op1vec = gen_reg_rtx (<MODE>mode);
3d6275e3 3303
1165109b
AS
3304 emit_insn (gen_mul<mode>3_zext_dup2 (tmp, v1, operands[2]));
3305 emit_insn (gen_vec_duplicate<mode> (op1vec, operands[1]));
3306 emit_insn (gen_add<mode>3 (operands[0], tmp, op1vec));
3d6275e3
AS
3307 DONE;
3308 })
3309
3310;; }}}