1 ;; Copyright (C) 2016-2022 Free Software Foundation, Inc.
3 ;; This file is free software; you can redistribute it and/or modify it under
4 ;; the terms of the GNU General Public License as published by the Free
5 ;; Software Foundation; either version 3 of the License, or (at your option)
8 ;; This file is distributed in the hope that it will be useful, but WITHOUT
9 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 ;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 ;; You should have received a copy of the GNU General Public License
14 ;; along with GCC; see the file COPYING3. If not see
15 ;; <http://www.gnu.org/licenses/>.
17 ;; {{{ Vector iterators
19 ; Vector modes for specific types
20 (define_mode_iterator V_QI
21 [V2QI V4QI V8QI V16QI V32QI V64QI])
22 (define_mode_iterator V_HI
23 [V2HI V4HI V8HI V16HI V32HI V64HI])
24 (define_mode_iterator V_HF
25 [V2HF V4HF V8HF V16HF V32HF V64HF])
26 (define_mode_iterator V_SI
27 [V2SI V4SI V8SI V16SI V32SI V64SI])
28 (define_mode_iterator V_SF
29 [V2SF V4SF V8SF V16SF V32SF V64SF])
30 (define_mode_iterator V_DI
31 [V2DI V4DI V8DI V16DI V32DI V64DI])
32 (define_mode_iterator V_DF
33 [V2DF V4DF V8DF V16DF V32DF V64DF])
35 ; Vector modes for sub-dword modes
36 (define_mode_iterator V_QIHI
44 ; Vector modes for one vector register
45 (define_mode_iterator V_1REG
46 [V2QI V2HI V2SI V2HF V2SF
47 V4QI V4HI V4SI V4HF V4SF
48 V8QI V8HI V8SI V8HF V8SF
49 V16QI V16HI V16SI V16HF V16SF
50 V32QI V32HI V32SI V32HF V32SF
51 V64QI V64HI V64SI V64HF V64SF])
53 (define_mode_iterator V_INT_1REG
60 (define_mode_iterator V_INT_1REG_ALT
67 (define_mode_iterator V_FP_1REG
75 ; Vector modes for two vector registers
76 (define_mode_iterator V_2REG
84 ; Vector modes with native support
85 (define_mode_iterator V_noQI
86 [V2HI V2HF V2SI V2SF V2DI V2DF
87 V4HI V4HF V4SI V4SF V4DI V4DF
88 V8HI V8HF V8SI V8SF V8DI V8DF
89 V16HI V16HF V16SI V16SF V16DI V16DF
90 V32HI V32HF V32SI V32SF V32DI V32DF
91 V64HI V64HF V64SI V64SF V64DI V64DF])
92 (define_mode_iterator V_noHI
93 [V2HF V2SI V2SF V2DI V2DF
94 V4HF V4SI V4SF V4DI V4DF
95 V8HF V8SI V8SF V8DI V8DF
96 V16HF V16SI V16SF V16DI V16DF
97 V32HF V32SI V32SF V32DI V32DF
98 V64HF V64SI V64SF V64DI V64DF])
100 (define_mode_iterator V_INT_noQI
107 (define_mode_iterator V_INT_noHI
116 (define_mode_iterator V_ALL
117 [V2QI V2HI V2HF V2SI V2SF V2DI V2DF
118 V4QI V4HI V4HF V4SI V4SF V4DI V4DF
119 V8QI V8HI V8HF V8SI V8SF V8DI V8DF
120 V16QI V16HI V16HF V16SI V16SF V16DI V16DF
121 V32QI V32HI V32HF V32SI V32SF V32DI V32DF
122 V64QI V64HI V64HF V64SI V64SF V64DI V64DF])
123 (define_mode_iterator V_ALL_ALT
124 [V2QI V2HI V2HF V2SI V2SF V2DI V2DF
125 V4QI V4HI V4HF V4SI V4SF V4DI V4DF
126 V8QI V8HI V8HF V8SI V8SF V8DI V8DF
127 V16QI V16HI V16HF V16SI V16SF V16DI V16DF
128 V32QI V32HI V32HF V32SI V32SF V32DI V32DF
129 V64QI V64HI V64HF V64SI V64SF V64DI V64DF])
131 (define_mode_iterator V_INT
135 V16QI V16HI V16SI V16DI
136 V32QI V32HI V32SI V32DI
137 V64QI V64HI V64SI V64DI])
138 (define_mode_iterator V_FP
146 (define_mode_attr scalar_mode
147 [(V2QI "qi") (V2HI "hi") (V2SI "si")
148 (V2HF "hf") (V2SF "sf") (V2DI "di") (V2DF "df")
149 (V4QI "qi") (V4HI "hi") (V4SI "si")
150 (V4HF "hf") (V4SF "sf") (V4DI "di") (V4DF "df")
151 (V8QI "qi") (V8HI "hi") (V8SI "si")
152 (V8HF "hf") (V8SF "sf") (V8DI "di") (V8DF "df")
153 (V16QI "qi") (V16HI "hi") (V16SI "si")
154 (V16HF "hf") (V16SF "sf") (V16DI "di") (V16DF "df")
155 (V32QI "qi") (V32HI "hi") (V32SI "si")
156 (V32HF "hf") (V32SF "sf") (V32DI "di") (V32DF "df")
157 (V64QI "qi") (V64HI "hi") (V64SI "si")
158 (V64HF "hf") (V64SF "sf") (V64DI "di") (V64DF "df")])
160 (define_mode_attr SCALAR_MODE
161 [(V2QI "QI") (V2HI "HI") (V2SI "SI")
162 (V2HF "HF") (V2SF "SF") (V2DI "DI") (V2DF "DF")
163 (V4QI "QI") (V4HI "HI") (V4SI "SI")
164 (V4HF "HF") (V4SF "SF") (V4DI "DI") (V4DF "DF")
165 (V8QI "QI") (V8HI "HI") (V8SI "SI")
166 (V8HF "HF") (V8SF "SF") (V8DI "DI") (V8DF "DF")
167 (V16QI "QI") (V16HI "HI") (V16SI "SI")
168 (V16HF "HF") (V16SF "SF") (V16DI "DI") (V16DF "DF")
169 (V32QI "QI") (V32HI "HI") (V32SI "SI")
170 (V32HF "HF") (V32SF "SF") (V32DI "DI") (V32DF "DF")
171 (V64QI "QI") (V64HI "HI") (V64SI "SI")
172 (V64HF "HF") (V64SF "SF") (V64DI "DI") (V64DF "DF")])
174 (define_mode_attr vnsi
175 [(V2QI "v2si") (V2HI "v2si") (V2HF "v2si") (V2SI "v2si")
176 (V2SF "v2si") (V2DI "v2si") (V2DF "v2si")
177 (V4QI "v4si") (V4HI "v4si") (V4HF "v4si") (V4SI "v4si")
178 (V4SF "v4si") (V4DI "v4si") (V4DF "v4si")
179 (V8QI "v8si") (V8HI "v8si") (V8HF "v8si") (V8SI "v8si")
180 (V8SF "v8si") (V8DI "v8si") (V8DF "v8si")
181 (V16QI "v16si") (V16HI "v16si") (V16HF "v16si") (V16SI "v16si")
182 (V16SF "v16si") (V16DI "v16si") (V16DF "v16si")
183 (V32QI "v32si") (V32HI "v32si") (V32HF "v32si") (V32SI "v32si")
184 (V32SF "v32si") (V32DI "v32si") (V32DF "v32si")
185 (V64QI "v64si") (V64HI "v64si") (V64HF "v64si") (V64SI "v64si")
186 (V64SF "v64si") (V64DI "v64si") (V64DF "v64si")])
188 (define_mode_attr VnSI
189 [(V2QI "V2SI") (V2HI "V2SI") (V2HF "V2SI") (V2SI "V2SI")
190 (V2SF "V2SI") (V2DI "V2SI") (V2DF "V2SI")
191 (V4QI "V4SI") (V4HI "V4SI") (V4HF "V4SI") (V4SI "V4SI")
192 (V4SF "V4SI") (V4DI "V4SI") (V4DF "V4SI")
193 (V8QI "V8SI") (V8HI "V8SI") (V8HF "V8SI") (V8SI "V8SI")
194 (V8SF "V8SI") (V8DI "V8SI") (V8DF "V8SI")
195 (V16QI "V16SI") (V16HI "V16SI") (V16HF "V16SI") (V16SI "V16SI")
196 (V16SF "V16SI") (V16DI "V16SI") (V16DF "V16SI")
197 (V32QI "V32SI") (V32HI "V32SI") (V32HF "V32SI") (V32SI "V32SI")
198 (V32SF "V32SI") (V32DI "V32SI") (V32DF "V32SI")
199 (V64QI "V64SI") (V64HI "V64SI") (V64HF "V64SI") (V64SI "V64SI")
200 (V64SF "V64SI") (V64DI "V64SI") (V64DF "V64SI")])
202 (define_mode_attr vndi
203 [(V2QI "v2di") (V2HI "v2di") (V2HF "v2di") (V2SI "v2di")
204 (V2SF "v2di") (V2DI "v2di") (V2DF "v2di")
205 (V4QI "v4di") (V4HI "v4di") (V4HF "v4di") (V4SI "v4di")
206 (V4SF "v4di") (V4DI "v4di") (V4DF "v4di")
207 (V8QI "v8di") (V8HI "v8di") (V8HF "v8di") (V8SI "v8di")
208 (V8SF "v8di") (V8DI "v8di") (V8DF "v8di")
209 (V16QI "v16di") (V16HI "v16di") (V16HF "v16di") (V16SI "v16di")
210 (V16SF "v16di") (V16DI "v16di") (V16DF "v16di")
211 (V32QI "v32di") (V32HI "v32di") (V32HF "v32di") (V32SI "v32di")
212 (V32SF "v32di") (V32DI "v32di") (V32DF "v32di")
213 (V64QI "v64di") (V64HI "v64di") (V64HF "v64di") (V64SI "v64di")
214 (V64SF "v64di") (V64DI "v64di") (V64DF "v64di")])
216 (define_mode_attr VnDI
217 [(V2QI "V2DI") (V2HI "V2DI") (V2HF "V2DI") (V2SI "V2DI")
218 (V2SF "V2DI") (V2DI "V2DI") (V2DF "V2DI")
219 (V4QI "V4DI") (V4HI "V4DI") (V4HF "V4DI") (V4SI "V4DI")
220 (V4SF "V4DI") (V4DI "V4DI") (V4DF "V4DI")
221 (V8QI "V8DI") (V8HI "V8DI") (V8HF "V8DI") (V8SI "V8DI")
222 (V8SF "V8DI") (V8DI "V8DI") (V8DF "V8DI")
223 (V16QI "V16DI") (V16HI "V16DI") (V16HF "V16DI") (V16SI "V16DI")
224 (V16SF "V16DI") (V16DI "V16DI") (V16DF "V16DI")
225 (V32QI "V32DI") (V32HI "V32DI") (V32HF "V32DI") (V32SI "V32DI")
226 (V32SF "V32DI") (V32DI "V32DI") (V32DF "V32DI")
227 (V64QI "V64DI") (V64HI "V64DI") (V64HF "V64DI") (V64SI "V64DI")
228 (V64SF "V64DI") (V64DI "V64DI") (V64DF "V64DI")])
230 (define_mode_attr sdwa
231 [(V2QI "BYTE_0") (V2HI "WORD_0") (V2SI "DWORD")
232 (V4QI "BYTE_0") (V4HI "WORD_0") (V4SI "DWORD")
233 (V8QI "BYTE_0") (V8HI "WORD_0") (V8SI "DWORD")
234 (V16QI "BYTE_0") (V16HI "WORD_0") (V16SI "DWORD")
235 (V32QI "BYTE_0") (V32HI "WORD_0") (V32SI "DWORD")
236 (V64QI "BYTE_0") (V64HI "WORD_0") (V64SI "DWORD")])
241 (define_subst_attr "exec" "vec_merge"
243 (define_subst_attr "exec_clobber" "vec_merge_with_clobber"
245 (define_subst_attr "exec_vcc" "vec_merge_with_vcc"
247 (define_subst_attr "exec_scatter" "scatter_store"
250 (define_subst "vec_merge"
251 [(set (match_operand:V_ALL 0)
252 (match_operand:V_ALL 1))]
257 (match_operand:V_ALL 3 "gcn_register_or_unspec_operand" "U0")
258 (match_operand:DI 4 "gcn_exec_reg_operand" "e")))])
260 (define_subst "vec_merge_with_clobber"
261 [(set (match_operand:V_ALL 0)
262 (match_operand:V_ALL 1))
263 (clobber (match_operand 2))]
268 (match_operand:V_ALL 3 "gcn_register_or_unspec_operand" "U0")
269 (match_operand:DI 4 "gcn_exec_reg_operand" "e")))
270 (clobber (match_dup 2))])
272 (define_subst "vec_merge_with_vcc"
273 [(set (match_operand:V_ALL 0)
274 (match_operand:V_ALL 1))
275 (set (match_operand:DI 2)
276 (match_operand:DI 3))]
282 (match_operand:V_ALL 4 "gcn_register_or_unspec_operand" "U0")
283 (match_operand:DI 5 "gcn_exec_reg_operand" "e")))
285 (and:DI (match_dup 3)
286 (reg:DI EXEC_REG)))])])
288 (define_subst "scatter_store"
289 [(set (mem:BLK (scratch))
297 [(set (mem:BLK (scratch))
303 (match_operand:DI 4 "gcn_exec_reg_operand" "e")]
309 ; This is the entry point for all vector register moves. Memory accesses can
310 ; come this way also, but will more usually use the reload_in/out,
311 ; gather/scatter, maskload/store, etc.
313 (define_expand "mov<mode>"
314 [(set (match_operand:V_ALL 0 "nonimmediate_operand")
315 (match_operand:V_ALL 1 "general_operand"))]
318 /* Bitwise reinterpret casts via SUBREG don't work with GCN vector
319 registers, but we can convert the MEM to a mode that does work. */
320 if (MEM_P (operands[0]) && !SUBREG_P (operands[0])
321 && SUBREG_P (operands[1])
322 && GET_MODE_SIZE (GET_MODE (operands[1]))
323 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[1]))))
325 rtx src = SUBREG_REG (operands[1]);
326 rtx mem = copy_rtx (operands[0]);
327 PUT_MODE_RAW (mem, GET_MODE (src));
328 emit_move_insn (mem, src);
331 if (MEM_P (operands[1]) && !SUBREG_P (operands[1])
332 && SUBREG_P (operands[0])
333 && GET_MODE_SIZE (GET_MODE (operands[0]))
334 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[0]))))
336 rtx dest = SUBREG_REG (operands[0]);
337 rtx mem = copy_rtx (operands[1]);
338 PUT_MODE_RAW (mem, GET_MODE (dest));
339 emit_move_insn (dest, mem);
343 /* SUBREG of MEM is not supported. */
344 gcc_assert ((!SUBREG_P (operands[0])
345 || !MEM_P (SUBREG_REG (operands[0])))
346 && (!SUBREG_P (operands[1])
347 || !MEM_P (SUBREG_REG (operands[1]))));
349 if (MEM_P (operands[0]) && !lra_in_progress && !reload_completed)
351 operands[1] = force_reg (<MODE>mode, operands[1]);
352 rtx scratch = gen_rtx_SCRATCH (<VnDI>mode);
353 rtx a = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
354 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
355 rtx expr = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
358 emit_insn (gen_scatter<mode>_expr (expr, operands[1], a, v));
361 else if (MEM_P (operands[1]) && !lra_in_progress && !reload_completed)
363 rtx scratch = gen_rtx_SCRATCH (<VnDI>mode);
364 rtx a = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
365 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
366 rtx expr = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
369 emit_insn (gen_gather<mode>_expr (operands[0], expr, a, v));
372 else if ((MEM_P (operands[0]) || MEM_P (operands[1])))
374 gcc_assert (!reload_completed);
375 rtx scratch = gen_reg_rtx (<VnDI>mode);
376 emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], scratch));
381 ; A pseudo instruction that helps LRA use the "U0" constraint.
383 (define_insn "mov<mode>_unspec"
384 [(set (match_operand:V_ALL 0 "nonimmediate_operand" "=v")
385 (match_operand:V_ALL 1 "gcn_unspec_operand" " U"))]
388 [(set_attr "type" "unknown")
389 (set_attr "length" "0")])
391 (define_insn "*mov<mode>"
392 [(set (match_operand:V_1REG 0 "nonimmediate_operand" "=v,v")
393 (match_operand:V_1REG 1 "general_operand" "vA,B"))]
396 [(set_attr "type" "vop1,vop1")
397 (set_attr "length" "4,8")])
399 (define_insn "mov<mode>_exec"
400 [(set (match_operand:V_1REG 0 "nonimmediate_operand" "=v, v, v, v, v, m")
402 (match_operand:V_1REG 1 "general_operand" "vA, B, v,vA, m, v")
403 (match_operand:V_1REG 2 "gcn_alu_or_unspec_operand"
405 (match_operand:DI 3 "register_operand" " e, e,cV,Sv, e, e")))
406 (clobber (match_scratch:<VnDI> 4 "=X, X, X, X,&v,&v"))]
407 "!MEM_P (operands[0]) || REG_P (operands[1])"
411 v_cndmask_b32\t%0, %2, %1, vcc
412 v_cndmask_b32\t%0, %2, %1, %3
415 [(set_attr "type" "vop1,vop1,vop2,vop3a,*,*")
416 (set_attr "length" "4,8,4,8,16,16")])
418 ; This variant does not accept an unspec, but does permit MEM
419 ; read/modify/write which is necessary for maskstore.
421 ;(define_insn "*mov<mode>_exec_match"
422 ; [(set (match_operand:V_1REG 0 "nonimmediate_operand" "=v,v, v, m")
424 ; (match_operand:V_1REG 1 "general_operand" "vA,B, m, v")
426 ; (match_operand:DI 2 "gcn_exec_reg_operand" " e,e, e, e")))
427 ; (clobber (match_scratch:<VnDI> 3 "=X,X,&v,&v"))]
428 ; "!MEM_P (operands[0]) || REG_P (operands[1])"
434 ; [(set_attr "type" "vop1,vop1,*,*")
435 ; (set_attr "length" "4,8,16,16")])
437 (define_insn "*mov<mode>"
438 [(set (match_operand:V_2REG 0 "nonimmediate_operand" "=v")
439 (match_operand:V_2REG 1 "general_operand" "vDB"))]
442 if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1]))
443 return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1";
445 return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1";
447 [(set_attr "type" "vmult")
448 (set_attr "length" "16")])
450 (define_insn "mov<mode>_exec"
451 [(set (match_operand:V_2REG 0 "nonimmediate_operand" "= v, v, v, v, m")
453 (match_operand:V_2REG 1 "general_operand" "vDB, v0, v0, m, v")
454 (match_operand:V_2REG 2 "gcn_alu_or_unspec_operand"
455 " U0,vDA0,vDA0,U0,U0")
456 (match_operand:DI 3 "register_operand" " e, cV, Sv, e, e")))
457 (clobber (match_scratch:<VnDI> 4 "= X, X, X,&v,&v"))]
458 "!MEM_P (operands[0]) || REG_P (operands[1])"
460 if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1]))
461 switch (which_alternative)
464 return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1";
466 return "v_cndmask_b32\t%L0, %L2, %L1, vcc\;"
467 "v_cndmask_b32\t%H0, %H2, %H1, vcc";
469 return "v_cndmask_b32\t%L0, %L2, %L1, %3\;"
470 "v_cndmask_b32\t%H0, %H2, %H1, %3";
473 switch (which_alternative)
476 return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1";
478 return "v_cndmask_b32\t%H0, %H2, %H1, vcc\;"
479 "v_cndmask_b32\t%L0, %L2, %L1, vcc";
481 return "v_cndmask_b32\t%H0, %H2, %H1, %3\;"
482 "v_cndmask_b32\t%L0, %L2, %L1, %3";
487 [(set_attr "type" "vmult,vmult,vmult,*,*")
488 (set_attr "length" "16,16,16,16,16")])
490 ; This variant does not accept an unspec, but does permit MEM
491 ; read/modify/write which is necessary for maskstore.
493 ;(define_insn "*mov<mode>_exec_match"
494 ; [(set (match_operand:V_2REG 0 "nonimmediate_operand" "=v, v, m")
496 ; (match_operand:V_2REG 1 "general_operand" "vDB, m, v")
498 ; (match_operand:DI 2 "gcn_exec_reg_operand" " e, e, e")))
499 ; (clobber (match_scratch:<VnDI> 3 "=X,&v,&v"))]
500 ; "!MEM_P (operands[0]) || REG_P (operands[1])"
502 ; * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \
503 ; return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \
505 ; return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\";
508 ; [(set_attr "type" "vmult,*,*")
509 ; (set_attr "length" "16,16,16")])
511 ; A SGPR-base load looks like:
514 ; There's no hardware instruction that corresponds to this, but vector base
515 ; addresses are placed in an SGPR because it is easier to add to a vector.
516 ; We also have a temporary vT, and the vector v1 holding numbered lanes.
519 ; vT = v1 << log2(element-size)
523 (define_insn "mov<mode>_sgprbase"
524 [(set (match_operand:V_1REG 0 "nonimmediate_operand" "= v, v, v, m")
526 [(match_operand:V_1REG 1 "general_operand" " vA,vB, m, v")]
528 (clobber (match_operand:<VnDI> 2 "register_operand" "=&v,&v,&v,&v"))]
529 "lra_in_progress || reload_completed"
535 [(set_attr "type" "vop1,vop1,*,*")
536 (set_attr "length" "4,8,12,12")])
538 (define_insn "mov<mode>_sgprbase"
539 [(set (match_operand:V_2REG 0 "nonimmediate_operand" "= v, v, m")
541 [(match_operand:V_2REG 1 "general_operand" "vDB, m, v")]
543 (clobber (match_operand:<VnDI> 2 "register_operand" "=&v,&v,&v"))]
544 "lra_in_progress || reload_completed"
546 * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \
547 return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \
549 return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\";
552 [(set_attr "type" "vmult,*,*")
553 (set_attr "length" "8,12,12")])
555 ; reload_in was once a standard name, but here it's only referenced by
556 ; gcn_secondary_reload. It allows a reload with a scratch register.
558 (define_expand "reload_in<mode>"
559 [(set (match_operand:V_ALL 0 "register_operand" "= v")
560 (match_operand:V_ALL 1 "memory_operand" " m"))
561 (clobber (match_operand:<VnDI> 2 "register_operand" "=&v"))]
564 emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], operands[2]));
568 ; reload_out is similar to reload_in, above.
570 (define_expand "reload_out<mode>"
571 [(set (match_operand:V_ALL 0 "memory_operand" "= m")
572 (match_operand:V_ALL 1 "register_operand" " v"))
573 (clobber (match_operand:<VnDI> 2 "register_operand" "=&v"))]
576 emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], operands[2]));
580 ; Expand scalar addresses into gather/scatter patterns
583 [(set (match_operand:V_ALL 0 "memory_operand")
585 [(match_operand:V_ALL 1 "general_operand")]
587 (clobber (match_scratch:<VnDI> 2))]
589 [(set (mem:BLK (scratch))
590 (unspec:BLK [(match_dup 5) (match_dup 1) (match_dup 6) (match_dup 7)]
593 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
596 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
597 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
601 [(set (match_operand:V_ALL 0 "memory_operand")
603 (match_operand:V_ALL 1 "general_operand")
604 (match_operand:V_ALL 2 "")
605 (match_operand:DI 3 "gcn_exec_reg_operand")))
606 (clobber (match_scratch:<VnDI> 4))]
608 [(set (mem:BLK (scratch))
609 (unspec:BLK [(match_dup 5) (match_dup 1)
610 (match_dup 6) (match_dup 7) (match_dup 3)]
613 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode,
617 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
618 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
622 [(set (match_operand:V_ALL 0 "nonimmediate_operand")
624 [(match_operand:V_ALL 1 "memory_operand")]
626 (clobber (match_scratch:<VnDI> 2))]
629 (unspec:V_ALL [(match_dup 5) (match_dup 6) (match_dup 7)
633 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
636 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
637 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
641 [(set (match_operand:V_ALL 0 "nonimmediate_operand")
643 (match_operand:V_ALL 1 "memory_operand")
644 (match_operand:V_ALL 2 "")
645 (match_operand:DI 3 "gcn_exec_reg_operand")))
646 (clobber (match_scratch:<VnDI> 4))]
650 (unspec:V_ALL [(match_dup 5) (match_dup 6) (match_dup 7)
656 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode,
660 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
661 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
664 ; TODO: Add zero/sign extending variants.
669 ; v_writelane and v_readlane work regardless of exec flags.
670 ; We allow source to be scratch.
672 ; FIXME these should take A immediates
674 (define_insn "*vec_set<mode>"
675 [(set (match_operand:V_1REG 0 "register_operand" "= v")
677 (vec_duplicate:V_1REG
678 (match_operand:<SCALAR_MODE> 1 "register_operand" " Sv"))
679 (match_operand:V_1REG 3 "gcn_register_or_unspec_operand" " U0")
680 (ashift (const_int 1)
681 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
683 "v_writelane_b32 %0, %1, %2"
684 [(set_attr "type" "vop3a")
685 (set_attr "length" "8")
686 (set_attr "exec" "none")
687 (set_attr "laneselect" "yes")])
689 ; FIXME: 64bit operations really should be splitters, but I am not sure how
690 ; to represent vertical subregs.
691 (define_insn "*vec_set<mode>"
692 [(set (match_operand:V_2REG 0 "register_operand" "= v")
694 (vec_duplicate:V_2REG
695 (match_operand:<SCALAR_MODE> 1 "register_operand" " Sv"))
696 (match_operand:V_2REG 3 "gcn_register_or_unspec_operand" " U0")
697 (ashift (const_int 1)
698 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
700 "v_writelane_b32 %L0, %L1, %2\;v_writelane_b32 %H0, %H1, %2"
701 [(set_attr "type" "vmult")
702 (set_attr "length" "16")
703 (set_attr "exec" "none")
704 (set_attr "laneselect" "yes")])
706 (define_expand "vec_set<mode>"
707 [(set (match_operand:V_ALL 0 "register_operand")
710 (match_operand:<SCALAR_MODE> 1 "register_operand"))
712 (ashift (const_int 1) (match_operand:SI 2 "gcn_alu_operand"))))]
715 (define_insn "*vec_set<mode>_1"
716 [(set (match_operand:V_1REG 0 "register_operand" "=v")
718 (vec_duplicate:V_1REG
719 (match_operand:<SCALAR_MODE> 1 "register_operand" "Sv"))
720 (match_operand:V_1REG 3 "gcn_register_or_unspec_operand" "U0")
721 (match_operand:SI 2 "const_int_operand" " i")))]
722 "((unsigned) exact_log2 (INTVAL (operands[2])) < GET_MODE_NUNITS (<MODE>mode))"
724 operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));
725 return "v_writelane_b32 %0, %1, %2";
727 [(set_attr "type" "vop3a")
728 (set_attr "length" "8")
729 (set_attr "exec" "none")
730 (set_attr "laneselect" "yes")])
732 (define_insn "*vec_set<mode>_1"
733 [(set (match_operand:V_2REG 0 "register_operand" "=v")
735 (vec_duplicate:V_2REG
736 (match_operand:<SCALAR_MODE> 1 "register_operand" "Sv"))
737 (match_operand:V_2REG 3 "gcn_register_or_unspec_operand" "U0")
738 (match_operand:SI 2 "const_int_operand" " i")))]
739 "((unsigned) exact_log2 (INTVAL (operands[2])) < GET_MODE_NUNITS (<MODE>mode))"
741 operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));
742 return "v_writelane_b32 %L0, %L1, %2\;v_writelane_b32 %H0, %H1, %2";
744 [(set_attr "type" "vmult")
745 (set_attr "length" "16")
746 (set_attr "exec" "none")
747 (set_attr "laneselect" "yes")])
749 (define_insn "vec_duplicate<mode><exec>"
750 [(set (match_operand:V_1REG 0 "register_operand" "=v")
751 (vec_duplicate:V_1REG
752 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand" "SvB")))]
755 [(set_attr "type" "vop3a")
756 (set_attr "length" "8")])
758 (define_insn "vec_duplicate<mode><exec>"
759 [(set (match_operand:V_2REG 0 "register_operand" "= v")
760 (vec_duplicate:V_2REG
761 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand" "SvDB")))]
763 "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1"
764 [(set_attr "type" "vop3a")
765 (set_attr "length" "16")])
767 (define_insn "vec_extract<mode><scalar_mode>"
768 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg")
769 (vec_select:<SCALAR_MODE>
770 (match_operand:V_1REG 1 "register_operand" " v")
771 (parallel [(match_operand:SI 2 "gcn_alu_operand" "SvB")])))]
773 "v_readlane_b32 %0, %1, %2"
774 [(set_attr "type" "vop3a")
775 (set_attr "length" "8")
776 (set_attr "exec" "none")
777 (set_attr "laneselect" "yes")])
779 (define_insn "vec_extract<mode><scalar_mode>"
780 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=&Sg")
781 (vec_select:<SCALAR_MODE>
782 (match_operand:V_2REG 1 "register_operand" " v")
783 (parallel [(match_operand:SI 2 "gcn_alu_operand" " SvB")])))]
785 "v_readlane_b32 %L0, %L1, %2\;v_readlane_b32 %H0, %H1, %2"
786 [(set_attr "type" "vmult")
787 (set_attr "length" "16")
788 (set_attr "exec" "none")
789 (set_attr "laneselect" "yes")])
791 (define_expand "vec_extract<V_ALL:mode><V_ALL_ALT:mode>"
792 [(set (match_operand:V_ALL_ALT 0 "register_operand")
793 (vec_select:V_ALL_ALT
794 (match_operand:V_ALL 1 "register_operand")
795 (parallel [(match_operand 2 "immediate_operand")])))]
796 "MODE_VF (<V_ALL_ALT:MODE>mode) < MODE_VF (<V_ALL:MODE>mode)
797 && <V_ALL_ALT:SCALAR_MODE>mode == <V_ALL:SCALAR_MODE>mode"
799 int numlanes = GET_MODE_NUNITS (<V_ALL_ALT:MODE>mode);
800 int firstlane = INTVAL (operands[2]) * numlanes;
805 /* A plain move will do. */
808 /* FIXME: optimize this by using DPP where available. */
810 rtx permutation = gen_reg_rtx (<V_ALL:VnSI>mode);
811 emit_insn (gen_vec_series<V_ALL:vnsi> (permutation,
812 GEN_INT (firstlane*4),
815 tmp = gen_reg_rtx (<V_ALL:MODE>mode);
816 emit_insn (gen_ds_bpermute<V_ALL:mode> (tmp, permutation, operands[1],
817 get_exec (<V_ALL:MODE>mode)));
820 emit_move_insn (operands[0],
821 gen_rtx_SUBREG (<V_ALL_ALT:MODE>mode, tmp, 0));
825 (define_expand "extract_last_<mode>"
826 [(match_operand:<SCALAR_MODE> 0 "register_operand")
827 (match_operand:DI 1 "gcn_alu_operand")
828 (match_operand:V_ALL 2 "register_operand")]
829 "can_create_pseudo_p ()"
831 rtx dst = operands[0];
832 rtx mask = operands[1];
833 rtx vect = operands[2];
834 rtx tmpreg = gen_reg_rtx (SImode);
836 emit_insn (gen_clzdi2 (tmpreg, mask));
837 emit_insn (gen_subsi3 (tmpreg, GEN_INT (63), tmpreg));
838 emit_insn (gen_vec_extract<mode><scalar_mode> (dst, vect, tmpreg));
842 (define_expand "fold_extract_last_<mode>"
843 [(match_operand:<SCALAR_MODE> 0 "register_operand")
844 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand")
845 (match_operand:DI 2 "gcn_alu_operand")
846 (match_operand:V_ALL 3 "register_operand")]
847 "can_create_pseudo_p ()"
849 rtx dst = operands[0];
850 rtx default_value = operands[1];
851 rtx mask = operands[2];
852 rtx vect = operands[3];
853 rtx else_label = gen_label_rtx ();
854 rtx end_label = gen_label_rtx ();
856 rtx cond = gen_rtx_EQ (VOIDmode, mask, const0_rtx);
857 emit_jump_insn (gen_cbranchdi4 (cond, mask, const0_rtx, else_label));
858 emit_insn (gen_extract_last_<mode> (dst, mask, vect));
859 emit_jump_insn (gen_jump (end_label));
861 emit_label (else_label);
862 emit_move_insn (dst, default_value);
863 emit_label (end_label);
867 (define_expand "vec_init<mode><scalar_mode>"
868 [(match_operand:V_ALL 0 "register_operand")
872 gcn_expand_vector_init (operands[0], operands[1]);
876 (define_expand "vec_init<V_ALL:mode><V_ALL_ALT:mode>"
877 [(match_operand:V_ALL 0 "register_operand")
878 (match_operand:V_ALL_ALT 1)]
879 "<V_ALL:SCALAR_MODE>mode == <V_ALL_ALT:SCALAR_MODE>mode
880 && MODE_VF (<V_ALL_ALT:MODE>mode) < MODE_VF (<V_ALL:MODE>mode)"
882 gcn_expand_vector_init (operands[0], operands[1]);
887 ;; {{{ Scatter / Gather
889 ;; GCN does not have an instruction for loading a vector from contiguous
890 ;; memory so *all* loads and stores are eventually converted to scatter
893 ;; GCC does not permit MEM to hold vectors of addresses, so we must use an
894 ;; unspec. The unspec formats are as follows:
897 ;; [(<address expression>)
900 ;; (mem:BLK (scratch))]
904 ;; [(<address expression>)
905 ;; (<source register>)
911 ;; - Loads are expected to be wrapped in a vec_merge, so do not need <exec>.
912 ;; - The mem:BLK does not contain any real information, but indicates that an
913 ;; unknown memory read is taking place. Stores are expected to use a similar
914 ;; mem:BLK outside the unspec.
915 ;; - The address space and glc (volatile) fields are there to replace the
916 ;; fields normally found in a MEM.
917 ;; - Multiple forms of address expression are supported, below.
919 ;; TODO: implement combined gather and zero_extend, but only for -msram-ecc=on
921 (define_expand "gather_load<mode><vnsi>"
922 [(match_operand:V_ALL 0 "register_operand")
923 (match_operand:DI 1 "register_operand")
924 (match_operand:<VnSI> 2 "register_operand")
925 (match_operand 3 "immediate_operand")
926 (match_operand:SI 4 "gcn_alu_operand")]
929 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1],
930 operands[2], operands[4],
931 INTVAL (operands[3]), NULL);
933 if (GET_MODE (addr) == <VnDI>mode)
934 emit_insn (gen_gather<mode>_insn_1offset (operands[0], addr, const0_rtx,
935 const0_rtx, const0_rtx));
937 emit_insn (gen_gather<mode>_insn_2offsets (operands[0], operands[1],
938 addr, const0_rtx, const0_rtx,
943 ; Allow any address expression
944 (define_expand "gather<mode>_expr<exec>"
945 [(set (match_operand:V_ALL 0 "register_operand")
947 [(match_operand 1 "")
948 (match_operand 2 "immediate_operand")
949 (match_operand 3 "immediate_operand")
955 (define_insn "gather<mode>_insn_1offset<exec>"
956 [(set (match_operand:V_ALL 0 "register_operand" "=v")
958 [(plus:<VnDI> (match_operand:<VnDI> 1 "register_operand" " v")
959 (vec_duplicate:<VnDI>
960 (match_operand 2 "immediate_operand" " n")))
961 (match_operand 3 "immediate_operand" " n")
962 (match_operand 4 "immediate_operand" " n")
965 "(AS_FLAT_P (INTVAL (operands[3]))
966 && ((TARGET_GCN3 && INTVAL(operands[2]) == 0)
967 || ((unsigned HOST_WIDE_INT)INTVAL(operands[2]) < 0x1000)))
968 || (AS_GLOBAL_P (INTVAL (operands[3]))
969 && (((unsigned HOST_WIDE_INT)INTVAL(operands[2]) + 0x1000) < 0x2000))"
971 addr_space_t as = INTVAL (operands[3]);
972 const char *glc = INTVAL (operands[4]) ? " glc" : "";
974 static char buf[200];
977 if (TARGET_GCN5_PLUS)
978 sprintf (buf, "flat_load%%o0\t%%0, %%1 offset:%%2%s\;s_waitcnt\t0",
981 sprintf (buf, "flat_load%%o0\t%%0, %%1%s\;s_waitcnt\t0", glc);
983 else if (AS_GLOBAL_P (as))
984 sprintf (buf, "global_load%%o0\t%%0, %%1, off offset:%%2%s\;"
985 "s_waitcnt\tvmcnt(0)", glc);
991 [(set_attr "type" "flat")
992 (set_attr "length" "12")])
994 (define_insn "gather<mode>_insn_1offset_ds<exec>"
995 [(set (match_operand:V_ALL 0 "register_operand" "=v")
997 [(plus:<VnSI> (match_operand:<VnSI> 1 "register_operand" " v")
998 (vec_duplicate:<VnSI>
999 (match_operand 2 "immediate_operand" " n")))
1000 (match_operand 3 "immediate_operand" " n")
1001 (match_operand 4 "immediate_operand" " n")
1002 (mem:BLK (scratch))]
1004 "(AS_ANY_DS_P (INTVAL (operands[3]))
1005 && ((unsigned HOST_WIDE_INT)INTVAL(operands[2]) < 0x10000))"
1007 addr_space_t as = INTVAL (operands[3]);
1008 static char buf[200];
1009 sprintf (buf, "ds_read%%b0\t%%0, %%1 offset:%%2%s\;s_waitcnt\tlgkmcnt(0)",
1010 (AS_GDS_P (as) ? " gds" : ""));
1013 [(set_attr "type" "ds")
1014 (set_attr "length" "12")])
1016 (define_insn "gather<mode>_insn_2offsets<exec>"
1017 [(set (match_operand:V_ALL 0 "register_operand" "=v")
1021 (vec_duplicate:<VnDI>
1022 (match_operand:DI 1 "register_operand" "Sv"))
1024 (match_operand:<VnSI> 2 "register_operand" " v")))
1025 (vec_duplicate:<VnDI> (match_operand 3 "immediate_operand" " n")))
1026 (match_operand 4 "immediate_operand" " n")
1027 (match_operand 5 "immediate_operand" " n")
1028 (mem:BLK (scratch))]
1030 "(AS_GLOBAL_P (INTVAL (operands[4]))
1031 && (((unsigned HOST_WIDE_INT)INTVAL(operands[3]) + 0x1000) < 0x2000))"
1033 addr_space_t as = INTVAL (operands[4]);
1034 const char *glc = INTVAL (operands[5]) ? " glc" : "";
1036 static char buf[200];
1037 if (AS_GLOBAL_P (as))
1038 sprintf (buf, "global_load%%o0\t%%0, %%2, %%1 offset:%%3%s\;"
1039 "s_waitcnt\tvmcnt(0)", glc);
1045 [(set_attr "type" "flat")
1046 (set_attr "length" "12")])
1048 (define_expand "scatter_store<mode><vnsi>"
1049 [(match_operand:DI 0 "register_operand")
1050 (match_operand:<VnSI> 1 "register_operand")
1051 (match_operand 2 "immediate_operand")
1052 (match_operand:SI 3 "gcn_alu_operand")
1053 (match_operand:V_ALL 4 "register_operand")]
1056 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0],
1057 operands[1], operands[3],
1058 INTVAL (operands[2]), NULL);
1060 if (GET_MODE (addr) == <VnDI>mode)
1061 emit_insn (gen_scatter<mode>_insn_1offset (addr, const0_rtx, operands[4],
1062 const0_rtx, const0_rtx));
1064 emit_insn (gen_scatter<mode>_insn_2offsets (operands[0], addr,
1065 const0_rtx, operands[4],
1066 const0_rtx, const0_rtx));
1070 ; Allow any address expression
1071 (define_expand "scatter<mode>_expr<exec_scatter>"
1072 [(set (mem:BLK (scratch))
1074 [(match_operand:<VnDI> 0 "")
1075 (match_operand:V_ALL 1 "register_operand")
1076 (match_operand 2 "immediate_operand")
1077 (match_operand 3 "immediate_operand")]
1082 (define_insn "scatter<mode>_insn_1offset<exec_scatter>"
1083 [(set (mem:BLK (scratch))
1085 [(plus:<VnDI> (match_operand:<VnDI> 0 "register_operand" "v")
1086 (vec_duplicate:<VnDI>
1087 (match_operand 1 "immediate_operand" "n")))
1088 (match_operand:V_ALL 2 "register_operand" "v")
1089 (match_operand 3 "immediate_operand" "n")
1090 (match_operand 4 "immediate_operand" "n")]
1092 "(AS_FLAT_P (INTVAL (operands[3]))
1093 && (INTVAL(operands[1]) == 0
1094 || (TARGET_GCN5_PLUS
1095 && (unsigned HOST_WIDE_INT)INTVAL(operands[1]) < 0x1000)))
1096 || (AS_GLOBAL_P (INTVAL (operands[3]))
1097 && (((unsigned HOST_WIDE_INT)INTVAL(operands[1]) + 0x1000) < 0x2000))"
1099 addr_space_t as = INTVAL (operands[3]);
1100 const char *glc = INTVAL (operands[4]) ? " glc" : "";
1102 static char buf[200];
1105 if (TARGET_GCN5_PLUS)
1106 sprintf (buf, "flat_store%%s2\t%%0, %%2 offset:%%1%s", glc);
1108 sprintf (buf, "flat_store%%s2\t%%0, %%2%s", glc);
1110 else if (AS_GLOBAL_P (as))
1111 sprintf (buf, "global_store%%s2\t%%0, %%2, off offset:%%1%s", glc);
1117 [(set_attr "type" "flat")
1118 (set_attr "length" "12")])
1120 (define_insn "scatter<mode>_insn_1offset_ds<exec_scatter>"
1121 [(set (mem:BLK (scratch))
1123 [(plus:<VnSI> (match_operand:<VnSI> 0 "register_operand" "v")
1124 (vec_duplicate:<VnSI>
1125 (match_operand 1 "immediate_operand" "n")))
1126 (match_operand:V_ALL 2 "register_operand" "v")
1127 (match_operand 3 "immediate_operand" "n")
1128 (match_operand 4 "immediate_operand" "n")]
1130 "(AS_ANY_DS_P (INTVAL (operands[3]))
1131 && ((unsigned HOST_WIDE_INT)INTVAL(operands[1]) < 0x10000))"
1133 addr_space_t as = INTVAL (operands[3]);
1134 static char buf[200];
1135 sprintf (buf, "ds_write%%b2\t%%0, %%2 offset:%%1%s\;s_waitcnt\tlgkmcnt(0)",
1136 (AS_GDS_P (as) ? " gds" : ""));
1139 [(set_attr "type" "ds")
1140 (set_attr "length" "12")])
1142 (define_insn "scatter<mode>_insn_2offsets<exec_scatter>"
1143 [(set (mem:BLK (scratch))
1147 (vec_duplicate:<VnDI>
1148 (match_operand:DI 0 "register_operand" "Sv"))
1150 (match_operand:<VnSI> 1 "register_operand" " v")))
1151 (vec_duplicate:<VnDI> (match_operand 2 "immediate_operand" " n")))
1152 (match_operand:V_ALL 3 "register_operand" " v")
1153 (match_operand 4 "immediate_operand" " n")
1154 (match_operand 5 "immediate_operand" " n")]
1156 "(AS_GLOBAL_P (INTVAL (operands[4]))
1157 && (((unsigned HOST_WIDE_INT)INTVAL(operands[2]) + 0x1000) < 0x2000))"
1159 addr_space_t as = INTVAL (operands[4]);
1160 const char *glc = INTVAL (operands[5]) ? " glc" : "";
1162 static char buf[200];
1163 if (AS_GLOBAL_P (as))
1164 sprintf (buf, "global_store%%s3\t%%1, %%3, %%0 offset:%%2%s", glc);
1170 [(set_attr "type" "flat")
1171 (set_attr "length" "12")])
1176 (define_insn "ds_bpermute<mode>"
1177 [(set (match_operand:V_1REG 0 "register_operand" "=v")
1179 [(match_operand:V_1REG 2 "register_operand" " v")
1180 (match_operand:<VnSI> 1 "register_operand" " v")
1181 (match_operand:DI 3 "gcn_exec_reg_operand" " e")]
1184 "ds_bpermute_b32\t%0, %1, %2\;s_waitcnt\tlgkmcnt(0)"
1185 [(set_attr "type" "vop2")
1186 (set_attr "length" "12")])
1188 (define_insn_and_split "ds_bpermute<mode>"
1189 [(set (match_operand:V_2REG 0 "register_operand" "=&v")
1191 [(match_operand:V_2REG 2 "register_operand" " v0")
1192 (match_operand:<VnSI> 1 "register_operand" " v")
1193 (match_operand:DI 3 "gcn_exec_reg_operand" " e")]
1198 [(set (match_dup 4) (unspec:<VnSI>
1199 [(match_dup 6) (match_dup 1) (match_dup 3)]
1201 (set (match_dup 5) (unspec:<VnSI>
1202 [(match_dup 7) (match_dup 1) (match_dup 3)]
1205 operands[4] = gcn_operand_part (<MODE>mode, operands[0], 0);
1206 operands[5] = gcn_operand_part (<MODE>mode, operands[0], 1);
1207 operands[6] = gcn_operand_part (<MODE>mode, operands[2], 0);
1208 operands[7] = gcn_operand_part (<MODE>mode, operands[2], 1);
1210 [(set_attr "type" "vmult")
1211 (set_attr "length" "24")])
1213 (define_insn "@dpp_move<mode>"
1214 [(set (match_operand:V_noHI 0 "register_operand" "=v")
1216 [(match_operand:V_noHI 1 "register_operand" " v")
1217 (match_operand:SI 2 "const_int_operand" " n")]
1218 UNSPEC_MOV_DPP_SHR))]
1221 return gcn_expand_dpp_shr_insn (<MODE>mode, "v_mov_b32",
1222 UNSPEC_MOV_DPP_SHR, INTVAL (operands[2]));
1224 [(set_attr "type" "vop_dpp")
1225 (set_attr "length" "16")])
1228 ;; {{{ ALU special case: add/sub
1230 (define_insn "add<mode>3<exec_clobber>"
1231 [(set (match_operand:V_INT_1REG 0 "register_operand" "= v")
1233 (match_operand:V_INT_1REG 1 "register_operand" "% v")
1234 (match_operand:V_INT_1REG 2 "gcn_alu_operand" "vSvB")))
1235 (clobber (reg:DI VCC_REG))]
1237 "v_add%^_u32\t%0, vcc, %2, %1"
1238 [(set_attr "type" "vop2")
1239 (set_attr "length" "8")])
1241 (define_insn "add<mode>3_dup<exec_clobber>"
1242 [(set (match_operand:V_INT_1REG 0 "register_operand" "= v")
1244 (vec_duplicate:V_INT_1REG
1245 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand" "SvB"))
1246 (match_operand:V_INT_1REG 1 "register_operand" " v")))
1247 (clobber (reg:DI VCC_REG))]
1249 "v_add%^_u32\t%0, vcc, %2, %1"
1250 [(set_attr "type" "vop2")
1251 (set_attr "length" "8")])
1253 (define_insn "add<mode>3_vcc<exec_vcc>"
1254 [(set (match_operand:V_SI 0 "register_operand" "= v, v")
1256 (match_operand:V_SI 1 "register_operand" "% v, v")
1257 (match_operand:V_SI 2 "gcn_alu_operand" "vSvB,vSvB")))
1258 (set (match_operand:DI 3 "register_operand" "= cV, Sg")
1259 (ltu:DI (plus:V_SI (match_dup 1) (match_dup 2))
1262 "v_add%^_u32\t%0, %3, %2, %1"
1263 [(set_attr "type" "vop2,vop3b")
1264 (set_attr "length" "8")])
1266 ; This pattern only changes the VCC bits when the corresponding lane is
1267 ; enabled, so the set must be described as an ior.
1269 (define_insn "add<mode>3_vcc_dup<exec_vcc>"
1270 [(set (match_operand:V_SI 0 "register_operand" "= v, v")
1273 (match_operand:SI 1 "gcn_alu_operand" "SvB,SvB"))
1274 (match_operand:V_SI 2 "register_operand" " v, v")))
1275 (set (match_operand:DI 3 "register_operand" "=cV, Sg")
1276 (ltu:DI (plus:V_SI (vec_duplicate:V_SI (match_dup 2))
1278 (vec_duplicate:V_SI (match_dup 2))))]
1280 "v_add%^_u32\t%0, %3, %2, %1"
1281 [(set_attr "type" "vop2,vop3b")
1282 (set_attr "length" "8,8")])
1284 ; v_addc does not accept an SGPR because the VCC read already counts as an
1285 ; SGPR use and the number of SGPR operands is limited to 1. It does not
1286 ; accept "B" immediate constants due to a related bus conflict.
1288 (define_insn "addc<mode>3<exec_vcc>"
1289 [(set (match_operand:V_SI 0 "register_operand" "=v, v")
1293 (vec_duplicate:V_SI (const_int 1))
1294 (vec_duplicate:V_SI (const_int 0))
1295 (match_operand:DI 3 "register_operand" " cV,cVSv"))
1296 (match_operand:V_SI 1 "gcn_alu_operand" "% v, vA"))
1297 (match_operand:V_SI 2 "gcn_alu_operand" " vA, vA")))
1298 (set (match_operand:DI 4 "register_operand" "=cV,cVSg")
1299 (ior:DI (ltu:DI (plus:V_SI
1302 (vec_duplicate:V_SI (const_int 1))
1303 (vec_duplicate:V_SI (const_int 0))
1310 (vec_duplicate:V_SI (const_int 1))
1311 (vec_duplicate:V_SI (const_int 0))
1316 "v_addc%^_u32\t%0, %4, %2, %1, %3"
1317 [(set_attr "type" "vop2,vop3b")
1318 (set_attr "length" "4,8")])
1320 (define_insn "sub<mode>3<exec_clobber>"
1321 [(set (match_operand:V_INT_1REG 0 "register_operand" "= v, v")
1323 (match_operand:V_INT_1REG 1 "gcn_alu_operand" "vSvB, v")
1324 (match_operand:V_INT_1REG 2 "gcn_alu_operand" " v,vSvB")))
1325 (clobber (reg:DI VCC_REG))]
1328 v_sub%^_u32\t%0, vcc, %1, %2
1329 v_subrev%^_u32\t%0, vcc, %2, %1"
1330 [(set_attr "type" "vop2")
1331 (set_attr "length" "8,8")])
1333 (define_insn "sub<mode>3_vcc<exec_vcc>"
1334 [(set (match_operand:V_SI 0 "register_operand" "= v, v, v, v")
1336 (match_operand:V_SI 1 "gcn_alu_operand" "vSvB,vSvB, v, v")
1337 (match_operand:V_SI 2 "gcn_alu_operand" " v, v,vSvB,vSvB")))
1338 (set (match_operand:DI 3 "register_operand" "= cV, Sg, cV, Sg")
1339 (gtu:DI (minus:V_SI (match_dup 1) (match_dup 2))
1343 v_sub%^_u32\t%0, %3, %1, %2
1344 v_sub%^_u32\t%0, %3, %1, %2
1345 v_subrev%^_u32\t%0, %3, %2, %1
1346 v_subrev%^_u32\t%0, %3, %2, %1"
1347 [(set_attr "type" "vop2,vop3b,vop2,vop3b")
1348 (set_attr "length" "8")])
1350 ; v_subb does not accept an SGPR because the VCC read already counts as an
1351 ; SGPR use and the number of SGPR operands is limited to 1. It does not
1352 ; accept "B" immediate constants due to a related bus conflict.
1354 (define_insn "subc<mode>3<exec_vcc>"
1355 [(set (match_operand:V_SI 0 "register_operand" "= v, v, v, v")
1359 (vec_duplicate:V_SI (const_int 1))
1360 (vec_duplicate:V_SI (const_int 0))
1361 (match_operand:DI 3 "gcn_alu_operand" " cV,cVSv,cV,cVSv"))
1362 (match_operand:V_SI 1 "gcn_alu_operand" " vA, vA, v, vA"))
1363 (match_operand:V_SI 2 "gcn_alu_operand" " v, vA,vA, vA")))
1364 (set (match_operand:DI 4 "register_operand" "=cV,cVSg,cV,cVSg")
1365 (ior:DI (gtu:DI (minus:V_SI (minus:V_SI
1367 (vec_duplicate:V_SI (const_int 1))
1368 (vec_duplicate:V_SI (const_int 0))
1373 (ltu:DI (minus:V_SI (vec_merge:V_SI
1374 (vec_duplicate:V_SI (const_int 1))
1375 (vec_duplicate:V_SI (const_int 0))
1381 v_subb%^_u32\t%0, %4, %1, %2, %3
1382 v_subb%^_u32\t%0, %4, %1, %2, %3
1383 v_subbrev%^_u32\t%0, %4, %2, %1, %3
1384 v_subbrev%^_u32\t%0, %4, %2, %1, %3"
1385 [(set_attr "type" "vop2,vop3b,vop2,vop3b")
1386 (set_attr "length" "4,8,4,8")])
1388 (define_insn_and_split "add<mode>3"
1389 [(set (match_operand:V_DI 0 "register_operand" "= v")
1391 (match_operand:V_DI 1 "register_operand" "%vDb")
1392 (match_operand:V_DI 2 "gcn_alu_operand" " vDb")))
1393 (clobber (reg:DI VCC_REG))]
1396 "gcn_can_split_p (<MODE>mode, operands[0])
1397 && gcn_can_split_p (<MODE>mode, operands[1])
1398 && gcn_can_split_p (<MODE>mode, operands[2])"
1401 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1402 emit_insn (gen_add<vnsi>3_vcc
1403 (gcn_operand_part (<MODE>mode, operands[0], 0),
1404 gcn_operand_part (<MODE>mode, operands[1], 0),
1405 gcn_operand_part (<MODE>mode, operands[2], 0),
1407 emit_insn (gen_addc<vnsi>3
1408 (gcn_operand_part (<MODE>mode, operands[0], 1),
1409 gcn_operand_part (<MODE>mode, operands[1], 1),
1410 gcn_operand_part (<MODE>mode, operands[2], 1),
1414 [(set_attr "type" "vmult")
1415 (set_attr "length" "8")])
1417 (define_insn_and_split "add<mode>3_exec"
1418 [(set (match_operand:V_DI 0 "register_operand" "= v")
1421 (match_operand:V_DI 1 "register_operand" "%vDb")
1422 (match_operand:V_DI 2 "gcn_alu_operand" " vDb"))
1423 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0")
1424 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1425 (clobber (reg:DI VCC_REG))]
1428 "gcn_can_split_p (<MODE>mode, operands[0])
1429 && gcn_can_split_p (<MODE>mode, operands[1])
1430 && gcn_can_split_p (<MODE>mode, operands[2])
1431 && gcn_can_split_p (<MODE>mode, operands[4])"
1434 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1435 emit_insn (gen_add<vnsi>3_vcc_exec
1436 (gcn_operand_part (<MODE>mode, operands[0], 0),
1437 gcn_operand_part (<MODE>mode, operands[1], 0),
1438 gcn_operand_part (<MODE>mode, operands[2], 0),
1440 gcn_operand_part (<MODE>mode, operands[3], 0),
1442 emit_insn (gen_addc<vnsi>3_exec
1443 (gcn_operand_part (<MODE>mode, operands[0], 1),
1444 gcn_operand_part (<MODE>mode, operands[1], 1),
1445 gcn_operand_part (<MODE>mode, operands[2], 1),
1447 gcn_operand_part (<MODE>mode, operands[3], 1),
1451 [(set_attr "type" "vmult")
1452 (set_attr "length" "8")])
1454 (define_insn_and_split "sub<mode>3"
1455 [(set (match_operand:V_DI 0 "register_operand" "= v, v")
1457 (match_operand:V_DI 1 "gcn_alu_operand" "vDb, v")
1458 (match_operand:V_DI 2 "gcn_alu_operand" " v,vDb")))
1459 (clobber (reg:DI VCC_REG))]
1462 "gcn_can_split_p (<MODE>mode, operands[0])
1463 && gcn_can_split_p (<MODE>mode, operands[1])
1464 && gcn_can_split_p (<MODE>mode, operands[2])"
1467 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1468 emit_insn (gen_sub<vnsi>3_vcc
1469 (gcn_operand_part (<MODE>mode, operands[0], 0),
1470 gcn_operand_part (<MODE>mode, operands[1], 0),
1471 gcn_operand_part (<MODE>mode, operands[2], 0),
1473 emit_insn (gen_subc<vnsi>3
1474 (gcn_operand_part (<MODE>mode, operands[0], 1),
1475 gcn_operand_part (<MODE>mode, operands[1], 1),
1476 gcn_operand_part (<MODE>mode, operands[2], 1),
1480 [(set_attr "type" "vmult")
1481 (set_attr "length" "8")])
1483 (define_insn_and_split "sub<mode>3_exec"
1484 [(set (match_operand:V_DI 0 "register_operand" "= v, v")
1487 (match_operand:V_DI 1 "gcn_alu_operand" "vSvB, v")
1488 (match_operand:V_DI 2 "gcn_alu_operand" " v,vSvB"))
1489 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0, U0")
1490 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))
1491 (clobber (reg:DI VCC_REG))]
1492 "register_operand (operands[1], VOIDmode)
1493 || register_operand (operands[2], VOIDmode)"
1495 "gcn_can_split_p (<MODE>mode, operands[0])
1496 && gcn_can_split_p (<MODE>mode, operands[1])
1497 && gcn_can_split_p (<MODE>mode, operands[2])
1498 && gcn_can_split_p (<MODE>mode, operands[3])"
1501 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1502 emit_insn (gen_sub<vnsi>3_vcc_exec
1503 (gcn_operand_part (<MODE>mode, operands[0], 0),
1504 gcn_operand_part (<MODE>mode, operands[1], 0),
1505 gcn_operand_part (<MODE>mode, operands[2], 0),
1507 gcn_operand_part (<MODE>mode, operands[3], 0),
1509 emit_insn (gen_subc<vnsi>3_exec
1510 (gcn_operand_part (<MODE>mode, operands[0], 1),
1511 gcn_operand_part (<MODE>mode, operands[1], 1),
1512 gcn_operand_part (<MODE>mode, operands[2], 1),
1514 gcn_operand_part (<MODE>mode, operands[3], 1),
1518 [(set_attr "type" "vmult")
1519 (set_attr "length" "8")])
1521 (define_insn_and_split "add<mode>3_zext"
1522 [(set (match_operand:V_DI 0 "register_operand" "= v, v")
1525 (match_operand:<VnSI> 1 "gcn_alu_operand" " vA, vB"))
1526 (match_operand:V_DI 2 "gcn_alu_operand" "vDb,vDA")))
1527 (clobber (reg:DI VCC_REG))]
1530 "gcn_can_split_p (<MODE>mode, operands[0])
1531 && gcn_can_split_p (<MODE>mode, operands[2])"
1534 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1535 emit_insn (gen_add<vnsi>3_vcc
1536 (gcn_operand_part (<MODE>mode, operands[0], 0),
1538 gcn_operand_part (<MODE>mode, operands[2], 0),
1540 emit_insn (gen_addc<vnsi>3
1541 (gcn_operand_part (<MODE>mode, operands[0], 1),
1542 gcn_operand_part (<MODE>mode, operands[2], 1),
1543 const0_rtx, vcc, vcc));
1546 [(set_attr "type" "vmult")
1547 (set_attr "length" "8")])
1549 (define_insn_and_split "add<mode>3_zext_exec"
1550 [(set (match_operand:V_DI 0 "register_operand" "= v, v")
1554 (match_operand:<VnSI> 1 "gcn_alu_operand" " vA, vB"))
1555 (match_operand:V_DI 2 "gcn_alu_operand" "vDb,vDA"))
1556 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0, U0")
1557 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))
1558 (clobber (reg:DI VCC_REG))]
1561 "gcn_can_split_p (<MODE>mode, operands[0])
1562 && gcn_can_split_p (<MODE>mode, operands[2])
1563 && gcn_can_split_p (<MODE>mode, operands[3])"
1566 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1567 emit_insn (gen_add<vnsi>3_vcc_exec
1568 (gcn_operand_part (<MODE>mode, operands[0], 0),
1570 gcn_operand_part (<MODE>mode, operands[2], 0),
1572 gcn_operand_part (<MODE>mode, operands[3], 0),
1574 emit_insn (gen_addc<vnsi>3_exec
1575 (gcn_operand_part (<MODE>mode, operands[0], 1),
1576 gcn_operand_part (<MODE>mode, operands[2], 1),
1577 const0_rtx, vcc, vcc,
1578 gcn_operand_part (<MODE>mode, operands[3], 1),
1582 [(set_attr "type" "vmult")
1583 (set_attr "length" "8")])
1585 (define_insn_and_split "add<mode>3_vcc_zext_dup"
1586 [(set (match_operand:V_DI 0 "register_operand" "= v, v")
1589 (vec_duplicate:<VnSI>
1590 (match_operand:SI 1 "gcn_alu_operand" " BSv, ASv")))
1591 (match_operand:V_DI 2 "gcn_alu_operand" " vDA, vDb")))
1592 (set (match_operand:DI 3 "register_operand" "=&SgcV,&SgcV")
1594 (zero_extend:V_DI (vec_duplicate:<VnSI> (match_dup 1)))
1599 "gcn_can_split_p (<MODE>mode, operands[0])
1600 && gcn_can_split_p (<MODE>mode, operands[2])"
1603 emit_insn (gen_add<vnsi>3_vcc_dup
1604 (gcn_operand_part (<MODE>mode, operands[0], 0),
1605 gcn_operand_part (DImode, operands[1], 0),
1606 gcn_operand_part (<MODE>mode, operands[2], 0),
1608 emit_insn (gen_addc<vnsi>3
1609 (gcn_operand_part (<MODE>mode, operands[0], 1),
1610 gcn_operand_part (<MODE>mode, operands[2], 1),
1611 const0_rtx, operands[3], operands[3]));
1614 [(set_attr "type" "vmult")
1615 (set_attr "length" "8")])
1617 (define_expand "add<mode>3_zext_dup"
1618 [(match_operand:V_DI 0 "register_operand")
1619 (match_operand:SI 1 "gcn_alu_operand")
1620 (match_operand:V_DI 2 "gcn_alu_operand")]
1623 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1624 emit_insn (gen_add<mode>3_vcc_zext_dup (operands[0], operands[1],
1629 (define_insn_and_split "add<mode>3_vcc_zext_dup_exec"
1630 [(set (match_operand:V_DI 0 "register_operand" "= v, v")
1634 (vec_duplicate:<VnSI>
1635 (match_operand:SI 1 "gcn_alu_operand" " ASv, BSv")))
1636 (match_operand:V_DI 2 "gcn_alu_operand" " vDb, vDA"))
1637 (match_operand:V_DI 4 "gcn_register_or_unspec_operand" " U0, U0")
1638 (match_operand:DI 5 "gcn_exec_reg_operand" " e, e")))
1639 (set (match_operand:DI 3 "register_operand" "=&SgcV,&SgcV")
1642 (zero_extend:V_DI (vec_duplicate:<VnSI> (match_dup 1)))
1648 "gcn_can_split_p (<MODE>mode, operands[0])
1649 && gcn_can_split_p (<MODE>mode, operands[2])
1650 && gcn_can_split_p (<MODE>mode, operands[4])"
1653 emit_insn (gen_add<vnsi>3_vcc_dup_exec
1654 (gcn_operand_part (<MODE>mode, operands[0], 0),
1655 gcn_operand_part (DImode, operands[1], 0),
1656 gcn_operand_part (<MODE>mode, operands[2], 0),
1658 gcn_operand_part (<MODE>mode, operands[4], 0),
1660 emit_insn (gen_addc<vnsi>3_exec
1661 (gcn_operand_part (<MODE>mode, operands[0], 1),
1662 gcn_operand_part (<MODE>mode, operands[2], 1),
1663 const0_rtx, operands[3], operands[3],
1664 gcn_operand_part (<MODE>mode, operands[4], 1),
1668 [(set_attr "type" "vmult")
1669 (set_attr "length" "8")])
1671 (define_expand "add<mode>3_zext_dup_exec"
1672 [(match_operand:V_DI 0 "register_operand")
1673 (match_operand:SI 1 "gcn_alu_operand")
1674 (match_operand:V_DI 2 "gcn_alu_operand")
1675 (match_operand:V_DI 3 "gcn_register_or_unspec_operand")
1676 (match_operand:DI 4 "gcn_exec_reg_operand")]
1679 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1680 emit_insn (gen_add<mode>3_vcc_zext_dup_exec (operands[0], operands[1],
1681 operands[2], vcc, operands[3],
1686 (define_insn_and_split "add<mode>3_vcc_zext_dup2"
1687 [(set (match_operand:V_DI 0 "register_operand" "= v")
1689 (zero_extend:V_DI (match_operand:<VnSI> 1 "gcn_alu_operand" " vA"))
1690 (vec_duplicate:V_DI (match_operand:DI 2 "gcn_alu_operand" " DbSv"))))
1691 (set (match_operand:DI 3 "register_operand" "=&SgcV")
1693 (zero_extend:V_DI (match_dup 1))
1694 (vec_duplicate:V_DI (match_dup 2)))
1698 "gcn_can_split_p (<MODE>mode, operands[0])"
1701 emit_insn (gen_add<vnsi>3_vcc_dup
1702 (gcn_operand_part (<MODE>mode, operands[0], 0),
1703 gcn_operand_part (DImode, operands[2], 0),
1706 rtx dsthi = gcn_operand_part (<MODE>mode, operands[0], 1);
1707 emit_insn (gen_vec_duplicate<vnsi>
1708 (dsthi, gcn_operand_part (DImode, operands[2], 1)));
1709 emit_insn (gen_addc<vnsi>3 (dsthi, dsthi, const0_rtx, operands[3],
1713 [(set_attr "type" "vmult")
1714 (set_attr "length" "8")])
1716 (define_expand "add<mode>3_zext_dup2"
1717 [(match_operand:V_DI 0 "register_operand")
1718 (match_operand:<VnSI> 1 "gcn_alu_operand")
1719 (match_operand:DI 2 "gcn_alu_operand")]
1722 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1723 emit_insn (gen_add<mode>3_vcc_zext_dup2 (operands[0], operands[1],
1728 (define_insn_and_split "add<mode>3_vcc_zext_dup2_exec"
1729 [(set (match_operand:V_DI 0 "register_operand" "= v")
1732 (zero_extend:V_DI (match_operand:<VnSI> 1 "gcn_alu_operand" "vA"))
1733 (vec_duplicate:V_DI (match_operand:DI 2 "gcn_alu_operand" "BSv")))
1734 (match_operand:V_DI 4 "gcn_register_or_unspec_operand" " U0")
1735 (match_operand:DI 5 "gcn_exec_reg_operand" " e")))
1736 (set (match_operand:DI 3 "register_operand" "=&SgcV")
1739 (zero_extend:V_DI (match_dup 1))
1740 (vec_duplicate:V_DI (match_dup 2)))
1745 "gcn_can_split_p (<MODE>mode, operands[0])
1746 && gcn_can_split_p (<MODE>mode, operands[4])"
1749 emit_insn (gen_add<vnsi>3_vcc_dup_exec
1750 (gcn_operand_part (<MODE>mode, operands[0], 0),
1751 gcn_operand_part (DImode, operands[2], 0),
1754 gcn_operand_part (<MODE>mode, operands[4], 0),
1756 rtx dsthi = gcn_operand_part (<MODE>mode, operands[0], 1);
1757 emit_insn (gen_vec_duplicate<vnsi>_exec
1758 (dsthi, gcn_operand_part (DImode, operands[2], 1),
1759 gcn_operand_part (<MODE>mode, operands[4], 1),
1761 emit_insn (gen_addc<vnsi>3_exec
1762 (dsthi, dsthi, const0_rtx, operands[3], operands[3],
1763 gcn_operand_part (<MODE>mode, operands[4], 1),
1767 [(set_attr "type" "vmult")
1768 (set_attr "length" "8")])
1770 (define_expand "add<mode>3_zext_dup2_exec"
1771 [(match_operand:V_DI 0 "register_operand")
1772 (match_operand:<VnSI> 1 "gcn_alu_operand")
1773 (match_operand:DI 2 "gcn_alu_operand")
1774 (match_operand:V_DI 3 "gcn_register_or_unspec_operand")
1775 (match_operand:DI 4 "gcn_exec_reg_operand")]
1778 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1779 emit_insn (gen_add<mode>3_vcc_zext_dup2_exec (operands[0], operands[1],
1781 operands[3], operands[4]));
1785 (define_insn_and_split "add<mode>3_sext_dup2"
1786 [(set (match_operand:V_DI 0 "register_operand" "= v")
1788 (sign_extend:V_DI (match_operand:<VnSI> 1 "gcn_alu_operand" " vA"))
1789 (vec_duplicate:V_DI (match_operand:DI 2 "gcn_alu_operand" "BSv"))))
1790 (clobber (match_scratch:<VnSI> 3 "=&v"))
1791 (clobber (reg:DI VCC_REG))]
1794 "gcn_can_split_p (<MODE>mode, operands[0])"
1797 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1798 emit_insn (gen_ashr<vnsi>3 (operands[3], operands[1], GEN_INT (31)));
1799 emit_insn (gen_add<vnsi>3_vcc_dup
1800 (gcn_operand_part (<MODE>mode, operands[0], 0),
1801 gcn_operand_part (DImode, operands[2], 0),
1804 rtx dsthi = gcn_operand_part (<MODE>mode, operands[0], 1);
1805 emit_insn (gen_vec_duplicate<vnsi>
1806 (dsthi, gcn_operand_part (DImode, operands[2], 1)));
1807 emit_insn (gen_addc<vnsi>3 (dsthi, dsthi, operands[3], vcc, vcc));
1810 [(set_attr "type" "vmult")
1811 (set_attr "length" "8")])
1813 (define_insn_and_split "add<mode>3_sext_dup2_exec"
1814 [(set (match_operand:V_DI 0 "register_operand" "= v")
1817 (sign_extend:V_DI (match_operand:<VnSI> 1 "gcn_alu_operand" "vA"))
1818 (vec_duplicate:V_DI (match_operand:DI 2 "gcn_alu_operand" "BSv")))
1819 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0")
1820 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1821 (clobber (match_scratch:<VnSI> 5 "=&v"))
1822 (clobber (reg:DI VCC_REG))]
1825 "gcn_can_split_p (<MODE>mode, operands[0])
1826 && gcn_can_split_p (<MODE>mode, operands[3])"
1829 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1830 emit_insn (gen_ashr<vnsi>3_exec (operands[5], operands[1], GEN_INT (31),
1831 gcn_gen_undef (<VnSI>mode), operands[4]));
1832 emit_insn (gen_add<vnsi>3_vcc_dup_exec
1833 (gcn_operand_part (<MODE>mode, operands[0], 0),
1834 gcn_operand_part (DImode, operands[2], 0),
1837 gcn_operand_part (<MODE>mode, operands[3], 0),
1839 rtx dsthi = gcn_operand_part (<MODE>mode, operands[0], 1);
1840 emit_insn (gen_vec_duplicate<vnsi>_exec
1841 (dsthi, gcn_operand_part (DImode, operands[2], 1),
1842 gcn_operand_part (<MODE>mode, operands[3], 1),
1844 emit_insn (gen_addc<vnsi>3_exec
1845 (dsthi, dsthi, operands[5], vcc, vcc,
1846 gcn_operand_part (<MODE>mode, operands[3], 1),
1850 [(set_attr "type" "vmult")
1851 (set_attr "length" "8")])
1854 ;; {{{ DS memory ALU: add/sub
1856 (define_mode_iterator DS_ARITH_MODE [V64SI V64SF V64DI])
1857 (define_mode_iterator DS_ARITH_SCALAR_MODE [SI SF DI])
1859 ;; FIXME: the vector patterns probably need RD expanded to a vector of
1860 ;; addresses. For now, the only way a vector can get into LDS is
1861 ;; if the user puts it there manually.
1863 ;; FIXME: the scalar patterns are probably fine in themselves, but need to be
1864 ;; checked to see if anything can ever use them.
1866 (define_insn "add<mode>3_ds<exec>"
1867 [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD")
1869 (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" "%RD")
1870 (match_operand:DS_ARITH_MODE 2 "register_operand" " v")))]
1871 "rtx_equal_p (operands[0], operands[1])"
1872 "ds_add%u0\t%A0, %2%O0"
1873 [(set_attr "type" "ds")
1874 (set_attr "length" "8")])
1876 (define_insn "add<mode>3_ds_scalar"
1877 [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
1878 (plus:DS_ARITH_SCALAR_MODE
1879 (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand"
1881 (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")))]
1882 "rtx_equal_p (operands[0], operands[1])"
1883 "ds_add%u0\t%A0, %2%O0"
1884 [(set_attr "type" "ds")
1885 (set_attr "length" "8")])
1887 (define_insn "sub<mode>3_ds<exec>"
1888 [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD")
1889 (minus:DS_ARITH_MODE
1890 (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" " RD")
1891 (match_operand:DS_ARITH_MODE 2 "register_operand" " v")))]
1892 "rtx_equal_p (operands[0], operands[1])"
1893 "ds_sub%u0\t%A0, %2%O0"
1894 [(set_attr "type" "ds")
1895 (set_attr "length" "8")])
1897 (define_insn "sub<mode>3_ds_scalar"
1898 [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
1899 (minus:DS_ARITH_SCALAR_MODE
1900 (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand"
1902 (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")))]
1903 "rtx_equal_p (operands[0], operands[1])"
1904 "ds_sub%u0\t%A0, %2%O0"
1905 [(set_attr "type" "ds")
1906 (set_attr "length" "8")])
1908 (define_insn "subr<mode>3_ds<exec>"
1909 [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD")
1910 (minus:DS_ARITH_MODE
1911 (match_operand:DS_ARITH_MODE 2 "register_operand" " v")
1912 (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" " RD")))]
1913 "rtx_equal_p (operands[0], operands[1])"
1914 "ds_rsub%u0\t%A0, %2%O0"
1915 [(set_attr "type" "ds")
1916 (set_attr "length" "8")])
1918 (define_insn "subr<mode>3_ds_scalar"
1919 [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
1920 (minus:DS_ARITH_SCALAR_MODE
1921 (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")
1922 (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand"
1924 "rtx_equal_p (operands[0], operands[1])"
1925 "ds_rsub%u0\t%A0, %2%O0"
1926 [(set_attr "type" "ds")
1927 (set_attr "length" "8")])
1930 ;; {{{ ALU special case: mult
1932 (define_insn "<su>mul<mode>3_highpart<exec>"
1933 [(set (match_operand:V_SI 0 "register_operand" "= v")
1938 (match_operand:V_SI 1 "gcn_alu_operand" " %v"))
1940 (match_operand:V_SI 2 "gcn_alu_operand" "vSvA")))
1943 "v_mul_hi<sgnsuffix>0\t%0, %2, %1"
1944 [(set_attr "type" "vop3a")
1945 (set_attr "length" "8")])
1947 (define_insn "mul<mode>3<exec>"
1948 [(set (match_operand:V_INT_1REG 0 "register_operand" "= v")
1950 (match_operand:V_INT_1REG 1 "gcn_alu_operand" "%vSvA")
1951 (match_operand:V_INT_1REG 2 "gcn_alu_operand" " vSvA")))]
1953 "v_mul_lo_u32\t%0, %1, %2"
1954 [(set_attr "type" "vop3a")
1955 (set_attr "length" "8")])
1957 (define_insn "mul<mode>3_dup<exec>"
1958 [(set (match_operand:V_INT_1REG 0 "register_operand" "= v")
1960 (match_operand:V_INT_1REG 1 "gcn_alu_operand" "%vSvA")
1961 (vec_duplicate:V_INT_1REG
1962 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand" " SvA"))))]
1964 "v_mul_lo_u32\t%0, %1, %2"
1965 [(set_attr "type" "vop3a")
1966 (set_attr "length" "8")])
1968 (define_insn_and_split "mul<mode>3"
1969 [(set (match_operand:V_DI 0 "register_operand" "=&v")
1971 (match_operand:V_DI 1 "gcn_alu_operand" "% v")
1972 (match_operand:V_DI 2 "gcn_alu_operand" "vDA")))
1973 (clobber (match_scratch:<VnSI> 3 "=&v"))]
1979 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
1980 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
1981 rtx left_lo = gcn_operand_part (<MODE>mode, operands[1], 0);
1982 rtx left_hi = gcn_operand_part (<MODE>mode, operands[1], 1);
1983 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
1984 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
1985 rtx tmp = operands[3];
1987 emit_insn (gen_mul<vnsi>3 (out_lo, left_lo, right_lo));
1988 emit_insn (gen_umul<vnsi>3_highpart (out_hi, left_lo, right_lo));
1989 emit_insn (gen_mul<vnsi>3 (tmp, left_hi, right_lo));
1990 emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp));
1991 emit_insn (gen_mul<vnsi>3 (tmp, left_lo, right_hi));
1992 emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp));
1993 emit_insn (gen_mul<vnsi>3 (tmp, left_hi, right_hi));
1994 emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp));
1998 (define_insn_and_split "mul<mode>3_exec"
1999 [(set (match_operand:V_DI 0 "register_operand" "=&v")
2002 (match_operand:V_DI 1 "gcn_alu_operand" "% v")
2003 (match_operand:V_DI 2 "gcn_alu_operand" "vDA"))
2004 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0")
2005 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
2006 (clobber (match_scratch:<VnSI> 5 "=&v"))]
2012 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
2013 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
2014 rtx left_lo = gcn_operand_part (<MODE>mode, operands[1], 0);
2015 rtx left_hi = gcn_operand_part (<MODE>mode, operands[1], 1);
2016 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
2017 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
2018 rtx exec = operands[4];
2019 rtx tmp = operands[5];
2022 if (GET_CODE (operands[3]) == UNSPEC)
2024 old_lo = old_hi = gcn_gen_undef (<VnSI>mode);
2028 old_lo = gcn_operand_part (<MODE>mode, operands[3], 0);
2029 old_hi = gcn_operand_part (<MODE>mode, operands[3], 1);
2032 rtx undef = gcn_gen_undef (<VnSI>mode);
2034 emit_insn (gen_mul<vnsi>3_exec (out_lo, left_lo, right_lo, old_lo, exec));
2035 emit_insn (gen_umul<vnsi>3_highpart_exec (out_hi, left_lo, right_lo,
2037 emit_insn (gen_mul<vnsi>3_exec (tmp, left_hi, right_lo, undef, exec));
2038 emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec));
2039 emit_insn (gen_mul<vnsi>3_exec (tmp, left_lo, right_hi, undef, exec));
2040 emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec));
2041 emit_insn (gen_mul<vnsi>3_exec (tmp, left_hi, right_hi, undef, exec));
2042 emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec));
2046 (define_insn_and_split "mul<mode>3_zext"
2047 [(set (match_operand:V_DI 0 "register_operand" "=&v")
2050 (match_operand:<VnSI> 1 "gcn_alu_operand" " v"))
2051 (match_operand:V_DI 2 "gcn_alu_operand" "vDA")))
2052 (clobber (match_scratch:<VnSI> 3 "=&v"))]
2058 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
2059 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
2060 rtx left = operands[1];
2061 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
2062 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
2063 rtx tmp = operands[3];
2065 emit_insn (gen_mul<vnsi>3 (out_lo, left, right_lo));
2066 emit_insn (gen_umul<vnsi>3_highpart (out_hi, left, right_lo));
2067 emit_insn (gen_mul<vnsi>3 (tmp, left, right_hi));
2068 emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp));
2072 (define_insn_and_split "mul<mode>3_zext_exec"
2073 [(set (match_operand:V_DI 0 "register_operand" "=&v")
2077 (match_operand:<VnSI> 1 "gcn_alu_operand" " v"))
2078 (match_operand:V_DI 2 "gcn_alu_operand" "vDA"))
2079 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0")
2080 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
2081 (clobber (match_scratch:<VnSI> 5 "=&v"))]
2087 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
2088 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
2089 rtx left = operands[1];
2090 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
2091 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
2092 rtx exec = operands[4];
2093 rtx tmp = operands[5];
2096 if (GET_CODE (operands[3]) == UNSPEC)
2098 old_lo = old_hi = gcn_gen_undef (<VnSI>mode);
2102 old_lo = gcn_operand_part (<MODE>mode, operands[3], 0);
2103 old_hi = gcn_operand_part (<MODE>mode, operands[3], 1);
2106 rtx undef = gcn_gen_undef (<VnSI>mode);
2108 emit_insn (gen_mul<vnsi>3_exec (out_lo, left, right_lo, old_lo, exec));
2109 emit_insn (gen_umul<vnsi>3_highpart_exec (out_hi, left, right_lo,
2111 emit_insn (gen_mul<vnsi>3_exec (tmp, left, right_hi, undef, exec));
2112 emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec));
2116 (define_insn_and_split "mul<mode>3_zext_dup2"
2117 [(set (match_operand:V_DI 0 "register_operand" "= &v")
2120 (match_operand:<VnSI> 1 "gcn_alu_operand" " v"))
2122 (match_operand:DI 2 "gcn_alu_operand" "SvDA"))))
2123 (clobber (match_scratch:<VnSI> 3 "= &v"))]
2129 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
2130 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
2131 rtx left = operands[1];
2132 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
2133 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
2134 rtx tmp = operands[3];
2136 emit_insn (gen_mul<vnsi>3 (out_lo, left, right_lo));
2137 emit_insn (gen_umul<vnsi>3_highpart (out_hi, left, right_lo));
2138 emit_insn (gen_mul<vnsi>3 (tmp, left, right_hi));
2139 emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp));
2143 (define_insn_and_split "mul<mode>3_zext_dup2_exec"
2144 [(set (match_operand:V_DI 0 "register_operand" "= &v")
2148 (match_operand:<VnSI> 1 "gcn_alu_operand" " v"))
2150 (match_operand:DI 2 "gcn_alu_operand" "SvDA")))
2151 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0")
2152 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
2153 (clobber (match_scratch:<VnSI> 5 "= &v"))]
2159 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
2160 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
2161 rtx left = operands[1];
2162 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
2163 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
2164 rtx exec = operands[4];
2165 rtx tmp = operands[5];
2168 if (GET_CODE (operands[3]) == UNSPEC)
2170 old_lo = old_hi = gcn_gen_undef (<VnSI>mode);
2174 old_lo = gcn_operand_part (<MODE>mode, operands[3], 0);
2175 old_hi = gcn_operand_part (<MODE>mode, operands[3], 1);
2178 rtx undef = gcn_gen_undef (<VnSI>mode);
2180 emit_insn (gen_mul<vnsi>3_exec (out_lo, left, right_lo, old_lo, exec));
2181 emit_insn (gen_umul<vnsi>3_highpart_exec (out_hi, left, right_lo,
2183 emit_insn (gen_mul<vnsi>3_exec (tmp, left, right_hi, undef, exec));
2184 emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec));
2189 ;; {{{ ALU generic case
2191 (define_code_iterator bitop [and ior xor])
2192 (define_code_iterator shiftop [ashift lshiftrt ashiftrt])
2193 (define_code_iterator minmaxop [smin smax umin umax])
2195 (define_insn "<expander><mode>2<exec>"
2196 [(set (match_operand:V_INT_1REG 0 "gcn_valu_dst_operand" "= v")
2198 (match_operand:V_INT_1REG 1 "gcn_valu_src0_operand" "vSvB")))]
2200 "v_<mnemonic>0\t%0, %1"
2201 [(set_attr "type" "vop1")
2202 (set_attr "length" "8")])
2204 (define_insn "<expander><mode>3<exec>"
2205 [(set (match_operand:V_INT_1REG 0 "gcn_valu_dst_operand" "= v,RD")
2207 (match_operand:V_INT_1REG 1 "gcn_valu_src0_operand" "% v, 0")
2208 (match_operand:V_INT_1REG 2 "gcn_valu_src1com_operand" "vSvB, v")))]
2211 v_<mnemonic>0\t%0, %2, %1
2212 ds_<mnemonic>0\t%A0, %2%O0"
2213 [(set_attr "type" "vop2,ds")
2214 (set_attr "length" "8,8")])
2216 (define_insn_and_split "<expander><mode>3"
2217 [(set (match_operand:V_DI 0 "gcn_valu_dst_operand" "= v,RD")
2219 (match_operand:V_DI 1 "gcn_valu_src0_operand" "% v,RD")
2220 (match_operand:V_DI 2 "gcn_valu_src1com_operand" "vSvB, v")))]
2224 ds_<mnemonic>0\t%A0, %2%O0"
2225 "(reload_completed && !gcn_ds_memory_operand (operands[0], <MODE>mode))"
2227 (bitop:<VnSI> (match_dup 5) (match_dup 7)))
2229 (bitop:<VnSI> (match_dup 6) (match_dup 8)))]
2231 operands[3] = gcn_operand_part (<MODE>mode, operands[0], 0);
2232 operands[4] = gcn_operand_part (<MODE>mode, operands[0], 1);
2233 operands[5] = gcn_operand_part (<MODE>mode, operands[1], 0);
2234 operands[6] = gcn_operand_part (<MODE>mode, operands[1], 1);
2235 operands[7] = gcn_operand_part (<MODE>mode, operands[2], 0);
2236 operands[8] = gcn_operand_part (<MODE>mode, operands[2], 1);
2238 [(set_attr "type" "vmult,ds")
2239 (set_attr "length" "16,8")])
2241 (define_insn_and_split "<expander><mode>3_exec"
2242 [(set (match_operand:V_DI 0 "gcn_valu_dst_operand" "= v,RD")
2245 (match_operand:V_DI 1 "gcn_valu_src0_operand" "% v,RD")
2246 (match_operand:V_DI 2 "gcn_valu_src1com_operand" "vSvB, v"))
2247 (match_operand:V_DI 3 "gcn_register_ds_or_unspec_operand" "U0,U0")
2248 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))]
2249 "!memory_operand (operands[0], VOIDmode)
2250 || (rtx_equal_p (operands[0], operands[1])
2251 && register_operand (operands[2], VOIDmode))"
2254 ds_<mnemonic>0\t%A0, %2%O0"
2255 "(reload_completed && !gcn_ds_memory_operand (operands[0], <MODE>mode))"
2258 (bitop:<VnSI> (match_dup 7) (match_dup 9))
2263 (bitop:<VnSI> (match_dup 8) (match_dup 10))
2267 operands[5] = gcn_operand_part (<MODE>mode, operands[0], 0);
2268 operands[6] = gcn_operand_part (<MODE>mode, operands[0], 1);
2269 operands[7] = gcn_operand_part (<MODE>mode, operands[1], 0);
2270 operands[8] = gcn_operand_part (<MODE>mode, operands[1], 1);
2271 operands[9] = gcn_operand_part (<MODE>mode, operands[2], 0);
2272 operands[10] = gcn_operand_part (<MODE>mode, operands[2], 1);
2273 operands[11] = gcn_operand_part (<MODE>mode, operands[3], 0);
2274 operands[12] = gcn_operand_part (<MODE>mode, operands[3], 1);
2276 [(set_attr "type" "vmult,ds")
2277 (set_attr "length" "16,8")])
2279 (define_expand "<expander><mode>3"
2280 [(set (match_operand:V_QIHI 0 "register_operand" "= v")
2282 (match_operand:V_QIHI 1 "gcn_alu_operand" " v")
2283 (vec_duplicate:V_QIHI
2284 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
2287 enum {ashift, lshiftrt, ashiftrt};
2288 bool unsignedp = (<code> == lshiftrt);
2289 rtx insi1 = gen_reg_rtx (<VnSI>mode);
2290 rtx insi2 = gen_reg_rtx (SImode);
2291 rtx outsi = gen_reg_rtx (<VnSI>mode);
2293 convert_move (insi1, operands[1], unsignedp);
2294 convert_move (insi2, operands[2], unsignedp);
2295 emit_insn (gen_<expander><vnsi>3 (outsi, insi1, insi2));
2296 convert_move (operands[0], outsi, unsignedp);
2300 (define_insn "<expander><mode>3<exec>"
2301 [(set (match_operand:V_INT_noHI 0 "register_operand" "= v")
2303 (match_operand:V_INT_noHI 1 "gcn_alu_operand" " v")
2304 (vec_duplicate:<VnSI>
2305 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
2307 "v_<revmnemonic>0\t%0, %2, %1"
2308 [(set_attr "type" "vop2")
2309 (set_attr "length" "8")])
2311 (define_expand "v<expander><mode>3"
2312 [(set (match_operand:V_QIHI 0 "register_operand" "=v")
2314 (match_operand:V_QIHI 1 "gcn_alu_operand" " v")
2315 (match_operand:V_QIHI 2 "gcn_alu_operand" "vB")))]
2318 enum {ashift, lshiftrt, ashiftrt};
2319 bool unsignedp = (<code> == lshiftrt);
2320 rtx insi1 = gen_reg_rtx (<VnSI>mode);
2321 rtx insi2 = gen_reg_rtx (<VnSI>mode);
2322 rtx outsi = gen_reg_rtx (<VnSI>mode);
2324 convert_move (insi1, operands[1], unsignedp);
2325 convert_move (insi2, operands[2], unsignedp);
2326 emit_insn (gen_v<expander><vnsi>3 (outsi, insi1, insi2));
2327 convert_move (operands[0], outsi, unsignedp);
2331 (define_insn "v<expander><mode>3<exec>"
2332 [(set (match_operand:V_INT_noHI 0 "register_operand" "=v")
2334 (match_operand:V_INT_noHI 1 "gcn_alu_operand" " v")
2335 (match_operand:<VnSI> 2 "gcn_alu_operand" "vB")))]
2337 "v_<revmnemonic>0\t%0, %2, %1"
2338 [(set_attr "type" "vop2")
2339 (set_attr "length" "8")])
2341 (define_expand "<expander><mode>3"
2342 [(set (match_operand:V_QIHI 0 "gcn_valu_dst_operand")
2344 (match_operand:V_QIHI 1 "gcn_valu_src0_operand")
2345 (match_operand:V_QIHI 2 "gcn_valu_src1com_operand")))]
2348 enum {smin, umin, smax, umax};
2349 bool unsignedp = (<code> == umax || <code> == umin);
2350 rtx insi1 = gen_reg_rtx (<VnSI>mode);
2351 rtx insi2 = gen_reg_rtx (<VnSI>mode);
2352 rtx outsi = gen_reg_rtx (<VnSI>mode);
2354 convert_move (insi1, operands[1], unsignedp);
2355 convert_move (insi2, operands[2], unsignedp);
2356 emit_insn (gen_<code><vnsi>3 (outsi, insi1, insi2));
2357 convert_move (operands[0], outsi, unsignedp);
2361 (define_insn "<expander><vnsi>3<exec>"
2362 [(set (match_operand:V_SI 0 "gcn_valu_dst_operand" "= v,RD")
2364 (match_operand:V_SI 1 "gcn_valu_src0_operand" "% v, 0")
2365 (match_operand:V_SI 2 "gcn_valu_src1com_operand" "vSvB, v")))]
2368 v_<mnemonic>0\t%0, %2, %1
2369 ds_<mnemonic>0\t%A0, %2%O0"
2370 [(set_attr "type" "vop2,ds")
2371 (set_attr "length" "8,8")])
2376 (define_expand "neg<mode>2"
2377 [(match_operand:V_INT 0 "register_operand")
2378 (match_operand:V_INT 1 "register_operand")]
2381 emit_insn (gen_sub<mode>3 (operands[0], gcn_vec_constant (<MODE>mode, 0),
2387 ;; {{{ FP binops - special cases
2389 ; GCN does not directly provide a DFmode subtract instruction, so we do it by
2390 ; adding the negated second operand to the first.
2392 (define_insn "sub<mode>3<exec>"
2393 [(set (match_operand:V_DF 0 "register_operand" "= v, v")
2395 (match_operand:V_DF 1 "gcn_alu_operand" "vSvB, v")
2396 (match_operand:V_DF 2 "gcn_alu_operand" " v,vSvB")))]
2399 v_add_f64\t%0, %1, -%2
2400 v_add_f64\t%0, -%2, %1"
2401 [(set_attr "type" "vop3a")
2402 (set_attr "length" "8,8")])
2404 (define_insn "subdf3"
2405 [(set (match_operand:DF 0 "register_operand" "= v, v")
2407 (match_operand:DF 1 "gcn_alu_operand" "vSvB, v")
2408 (match_operand:DF 2 "gcn_alu_operand" " v,vSvB")))]
2411 v_add_f64\t%0, %1, -%2
2412 v_add_f64\t%0, -%2, %1"
2413 [(set_attr "type" "vop3a")
2414 (set_attr "length" "8,8")])
2417 ;; {{{ FP binops - generic
2419 (define_code_iterator comm_fp [plus mult smin smax])
2420 (define_code_iterator nocomm_fp [minus])
2421 (define_code_iterator all_fp [plus mult minus smin smax])
2423 (define_insn "<expander><mode>3<exec>"
2424 [(set (match_operand:V_FP 0 "register_operand" "= v")
2426 (match_operand:V_FP 1 "gcn_alu_operand" "% v")
2427 (match_operand:V_FP 2 "gcn_alu_operand" "vSvB")))]
2429 "v_<mnemonic>0\t%0, %2, %1"
2430 [(set_attr "type" "vop2")
2431 (set_attr "length" "8")])
2433 (define_insn "<expander><mode>3"
2434 [(set (match_operand:FP 0 "gcn_valu_dst_operand" "= v, RL")
2436 (match_operand:FP 1 "gcn_valu_src0_operand" "% v, 0")
2437 (match_operand:FP 2 "gcn_valu_src1_operand" "vSvB,vSvB")))]
2440 v_<mnemonic>0\t%0, %2, %1
2441 v_<mnemonic>0\t%0, %1%O0"
2442 [(set_attr "type" "vop2,ds")
2443 (set_attr "length" "8")])
2445 (define_insn "<expander><mode>3<exec>"
2446 [(set (match_operand:V_FP_1REG 0 "register_operand" "= v, v")
2447 (nocomm_fp:V_FP_1REG
2448 (match_operand:V_FP_1REG 1 "gcn_alu_operand" "vSvB, v")
2449 (match_operand:V_FP_1REG 2 "gcn_alu_operand" " v,vSvB")))]
2452 v_<mnemonic>0\t%0, %1, %2
2453 v_<revmnemonic>0\t%0, %2, %1"
2454 [(set_attr "type" "vop2")
2455 (set_attr "length" "8,8")])
2457 (define_insn "<expander><mode>3"
2458 [(set (match_operand:FP_1REG 0 "register_operand" "= v, v")
2460 (match_operand:FP_1REG 1 "gcn_alu_operand" "vSvB, v")
2461 (match_operand:FP_1REG 2 "gcn_alu_operand" " v,vSvB")))]
2464 v_<mnemonic>0\t%0, %1, %2
2465 v_<revmnemonic>0\t%0, %2, %1"
2466 [(set_attr "type" "vop2")
2467 (set_attr "length" "8,8")])
2472 (define_insn "abs<mode>2"
2473 [(set (match_operand:FP 0 "register_operand" "=v")
2474 (abs:FP (match_operand:FP 1 "register_operand" " v")))]
2476 "v_add%i0\t%0, 0, |%1|"
2477 [(set_attr "type" "vop3a")
2478 (set_attr "length" "8")])
2480 (define_insn "abs<mode>2<exec>"
2481 [(set (match_operand:V_FP 0 "register_operand" "=v")
2483 (match_operand:V_FP 1 "register_operand" " v")))]
2485 "v_add%i0\t%0, 0, |%1|"
2486 [(set_attr "type" "vop3a")
2487 (set_attr "length" "8")])
2489 (define_insn "neg<mode>2<exec>"
2490 [(set (match_operand:V_FP 0 "register_operand" "=v")
2492 (match_operand:V_FP 1 "register_operand" " v")))]
2494 "v_add%i0\t%0, 0, -%1"
2495 [(set_attr "type" "vop3a")
2496 (set_attr "length" "8")])
2498 (define_insn "sqrt<mode>2<exec>"
2499 [(set (match_operand:V_FP 0 "register_operand" "= v")
2501 (match_operand:V_FP 1 "gcn_alu_operand" "vSvB")))]
2502 "flag_unsafe_math_optimizations"
2504 [(set_attr "type" "vop1")
2505 (set_attr "length" "8")])
2507 (define_insn "sqrt<mode>2"
2508 [(set (match_operand:FP 0 "register_operand" "= v")
2510 (match_operand:FP 1 "gcn_alu_operand" "vSvB")))]
2511 "flag_unsafe_math_optimizations"
2513 [(set_attr "type" "vop1")
2514 (set_attr "length" "8")])
2516 ; These FP unops have f64, f32 and f16 versions.
2517 (define_int_iterator MATH_UNOP_1OR2REG
2518 [UNSPEC_FLOOR UNSPEC_CEIL])
2520 ; These FP unops only have f16/f32 versions.
2521 (define_int_iterator MATH_UNOP_1REG
2522 [UNSPEC_EXP2 UNSPEC_LOG2])
2524 (define_int_iterator MATH_UNOP_TRIG
2525 [UNSPEC_SIN UNSPEC_COS])
2527 (define_int_attr math_unop
2528 [(UNSPEC_FLOOR "floor")
2529 (UNSPEC_CEIL "ceil")
2530 (UNSPEC_EXP2 "exp2")
2531 (UNSPEC_LOG2 "log2")
2533 (UNSPEC_COS "cos")])
2535 (define_insn "<math_unop><mode>2"
2536 [(set (match_operand:FP 0 "register_operand" "= v")
2538 [(match_operand:FP 1 "gcn_alu_operand" "vSvB")]
2539 MATH_UNOP_1OR2REG))]
2541 "v_<math_unop>%i0\t%0, %1"
2542 [(set_attr "type" "vop1")
2543 (set_attr "length" "8")])
2545 (define_insn "<math_unop><mode>2<exec>"
2546 [(set (match_operand:V_FP 0 "register_operand" "= v")
2548 [(match_operand:V_FP 1 "gcn_alu_operand" "vSvB")]
2549 MATH_UNOP_1OR2REG))]
2551 "v_<math_unop>%i0\t%0, %1"
2552 [(set_attr "type" "vop1")
2553 (set_attr "length" "8")])
2555 (define_insn "<math_unop><mode>2"
2556 [(set (match_operand:FP_1REG 0 "register_operand" "= v")
2558 [(match_operand:FP_1REG 1 "gcn_alu_operand" "vSvB")]
2560 "flag_unsafe_math_optimizations"
2561 "v_<math_unop>%i0\t%0, %1"
2562 [(set_attr "type" "vop1")
2563 (set_attr "length" "8")])
2565 (define_insn "<math_unop><mode>2<exec>"
2566 [(set (match_operand:V_FP_1REG 0 "register_operand" "= v")
2568 [(match_operand:V_FP_1REG 1 "gcn_alu_operand" "vSvB")]
2570 "flag_unsafe_math_optimizations"
2571 "v_<math_unop>%i0\t%0, %1"
2572 [(set_attr "type" "vop1")
2573 (set_attr "length" "8")])
2575 (define_insn "*<math_unop><mode>2_insn"
2576 [(set (match_operand:FP_1REG 0 "register_operand" "= v")
2578 [(match_operand:FP_1REG 1 "gcn_alu_operand" "vSvB")]
2580 "flag_unsafe_math_optimizations"
2581 "v_<math_unop>%i0\t%0, %1"
2582 [(set_attr "type" "vop1")
2583 (set_attr "length" "8")])
2585 (define_insn "*<math_unop><mode>2<exec>_insn"
2586 [(set (match_operand:V_FP_1REG 0 "register_operand" "= v")
2588 [(match_operand:V_FP_1REG 1 "gcn_alu_operand" "vSvB")]
2590 "flag_unsafe_math_optimizations"
2591 "v_<math_unop>%i0\t%0, %1"
2592 [(set_attr "type" "vop1")
2593 (set_attr "length" "8")])
2595 ; Trigonometric functions need their input scaled by 1/(2*PI) first.
2597 (define_expand "<math_unop><mode>2"
2601 (match_operand:FP_1REG 1 "gcn_alu_operand")))
2602 (set (match_operand:FP_1REG 0 "register_operand")
2606 "flag_unsafe_math_optimizations"
2608 operands[2] = gen_reg_rtx (<MODE>mode);
2609 operands[3] = const_double_from_real_value (gcn_dconst1over2pi (),
2613 (define_expand "<math_unop><mode>2<exec>"
2617 (match_operand:V_FP_1REG 1 "gcn_alu_operand")))
2618 (set (match_operand:V_FP_1REG 0 "register_operand")
2622 "flag_unsafe_math_optimizations"
2624 operands[2] = gen_reg_rtx (<MODE>mode);
2626 gcn_vec_constant (<MODE>mode,
2627 const_double_from_real_value (gcn_dconst1over2pi (),
2628 <SCALAR_MODE>mode));
2631 ; Implement ldexp pattern
2633 (define_insn "ldexp<mode>3"
2634 [(set (match_operand:FP 0 "register_operand" "=v")
2636 [(match_operand:FP 1 "gcn_alu_operand" "vB")
2637 (match_operand:SI 2 "gcn_alu_operand" "vSvA")]
2640 "v_ldexp%i0\t%0, %1, %2"
2641 [(set_attr "type" "vop3a")
2642 (set_attr "length" "8")])
2644 (define_insn "ldexp<mode>3<exec>"
2645 [(set (match_operand:V_FP 0 "register_operand" "= v")
2647 [(match_operand:V_FP 1 "gcn_alu_operand" " vB")
2648 (match_operand:<VnSI> 2 "gcn_alu_operand" "vSvA")]
2651 "v_ldexp%i0\t%0, %1, %2"
2652 [(set_attr "type" "vop3a")
2653 (set_attr "length" "8")])
2655 ; Implement frexp patterns
2657 (define_insn "frexp<mode>_exp2"
2658 [(set (match_operand:SI 0 "register_operand" "=v")
2660 [(match_operand:FP 1 "gcn_alu_operand" "vB")]
2663 "v_frexp_exp_i32%i1\t%0, %1"
2664 [(set_attr "type" "vop1")
2665 (set_attr "length" "8")])
2667 (define_insn "frexp<mode>_mant2"
2668 [(set (match_operand:FP 0 "register_operand" "=v")
2670 [(match_operand:FP 1 "gcn_alu_operand" "vB")]
2671 UNSPEC_FREXP_MANT))]
2673 "v_frexp_mant%i1\t%0, %1"
2674 [(set_attr "type" "vop1")
2675 (set_attr "length" "8")])
2677 (define_insn "frexp<mode>_exp2<exec>"
2678 [(set (match_operand:<VnSI> 0 "register_operand" "=v")
2680 [(match_operand:V_FP 1 "gcn_alu_operand" "vB")]
2683 "v_frexp_exp_i32%i1\t%0, %1"
2684 [(set_attr "type" "vop1")
2685 (set_attr "length" "8")])
2687 (define_insn "frexp<mode>_mant2<exec>"
2688 [(set (match_operand:V_FP 0 "register_operand" "=v")
2690 [(match_operand:V_FP 1 "gcn_alu_operand" "vB")]
2691 UNSPEC_FREXP_MANT))]
2693 "v_frexp_mant%i1\t%0, %1"
2694 [(set_attr "type" "vop1")
2695 (set_attr "length" "8")])
2698 ;; {{{ FP fused multiply and add
2700 (define_insn "fma<mode>4<exec>"
2701 [(set (match_operand:V_FP 0 "register_operand" "= v, v")
2703 (match_operand:V_FP 1 "gcn_alu_operand" "% vA, vA")
2704 (match_operand:V_FP 2 "gcn_alu_operand" " vA,vSvA")
2705 (match_operand:V_FP 3 "gcn_alu_operand" "vSvA, vA")))]
2707 "v_fma%i0\t%0, %1, %2, %3"
2708 [(set_attr "type" "vop3a")
2709 (set_attr "length" "8")])
2711 (define_insn "fma<mode>4_negop2<exec>"
2712 [(set (match_operand:V_FP 0 "register_operand" "= v, v, v")
2714 (match_operand:V_FP 1 "gcn_alu_operand" " vA, vA,vSvA")
2716 (match_operand:V_FP 2 "gcn_alu_operand" " vA,vSvA, vA"))
2717 (match_operand:V_FP 3 "gcn_alu_operand" "vSvA, vA, vA")))]
2719 "v_fma%i0\t%0, %1, -%2, %3"
2720 [(set_attr "type" "vop3a")
2721 (set_attr "length" "8")])
2723 (define_insn "fma<mode>4"
2724 [(set (match_operand:FP 0 "register_operand" "= v, v")
2726 (match_operand:FP 1 "gcn_alu_operand" "% vA, vA")
2727 (match_operand:FP 2 "gcn_alu_operand" " vA,vSvA")
2728 (match_operand:FP 3 "gcn_alu_operand" "vSvA, vA")))]
2730 "v_fma%i0\t%0, %1, %2, %3"
2731 [(set_attr "type" "vop3a")
2732 (set_attr "length" "8")])
2734 (define_insn "fma<mode>4_negop2"
2735 [(set (match_operand:FP 0 "register_operand" "= v, v, v")
2737 (match_operand:FP 1 "gcn_alu_operand" " vA, vA,vSvA")
2739 (match_operand:FP 2 "gcn_alu_operand" " vA,vSvA, vA"))
2740 (match_operand:FP 3 "gcn_alu_operand" "vSvA, vA, vA")))]
2742 "v_fma%i0\t%0, %1, -%2, %3"
2743 [(set_attr "type" "vop3a")
2744 (set_attr "length" "8")])
2749 (define_insn "recip<mode>2<exec>"
2750 [(set (match_operand:V_FP 0 "register_operand" "= v")
2752 [(match_operand:V_FP 1 "gcn_alu_operand" "vSvB")]
2756 [(set_attr "type" "vop1")
2757 (set_attr "length" "8")])
2759 (define_insn "recip<mode>2"
2760 [(set (match_operand:FP 0 "register_operand" "= v")
2762 [(match_operand:FP 1 "gcn_alu_operand" "vSvB")]
2766 [(set_attr "type" "vop1")
2767 (set_attr "length" "8")])
2769 ;; Do division via a = b * 1/c
2770 ;; The v_rcp_* instructions are not sufficiently accurate on their own,
2771 ;; so we use 2 v_fma_* instructions to do one round of Newton-Raphson
2772 ;; which the ISA manual says is enough to improve the reciprocal accuracy.
2774 ;; FIXME: This does not handle denormals, NaNs, division-by-zero etc.
2776 (define_expand "div<mode>3"
2777 [(match_operand:V_FP 0 "gcn_valu_dst_operand")
2778 (match_operand:V_FP 1 "gcn_valu_src0_operand")
2779 (match_operand:V_FP 2 "gcn_valu_src0_operand")]
2780 "flag_reciprocal_math"
2782 rtx one = gcn_vec_constant (<MODE>mode,
2783 const_double_from_real_value (dconst1, <SCALAR_MODE>mode));
2784 rtx initrcp = gen_reg_rtx (<MODE>mode);
2785 rtx fma = gen_reg_rtx (<MODE>mode);
2787 rtx num = operands[1], denom = operands[2];
2789 bool is_rcp = (GET_CODE (num) == CONST_VECTOR
2791 (CONST_DOUBLE_REAL_VALUE
2792 (CONST_VECTOR_ELT (num, 0)), &dconstm1));
2797 rcp = gen_reg_rtx (<MODE>mode);
2799 emit_insn (gen_recip<mode>2 (initrcp, denom));
2800 emit_insn (gen_fma<mode>4_negop2 (fma, initrcp, denom, one));
2801 emit_insn (gen_fma<mode>4 (rcp, fma, initrcp, initrcp));
2805 rtx div_est = gen_reg_rtx (<MODE>mode);
2806 rtx fma2 = gen_reg_rtx (<MODE>mode);
2807 rtx fma3 = gen_reg_rtx (<MODE>mode);
2808 rtx fma4 = gen_reg_rtx (<MODE>mode);
2809 emit_insn (gen_mul<mode>3 (div_est, num, rcp));
2810 emit_insn (gen_fma<mode>4_negop2 (fma2, div_est, denom, num));
2811 emit_insn (gen_fma<mode>4 (fma3, fma2, rcp, div_est));
2812 emit_insn (gen_fma<mode>4_negop2 (fma4, fma3, denom, num));
2813 emit_insn (gen_fma<mode>4 (operands[0], fma4, rcp, fma3));
2819 (define_expand "div<mode>3"
2820 [(match_operand:FP 0 "gcn_valu_dst_operand")
2821 (match_operand:FP 1 "gcn_valu_src0_operand")
2822 (match_operand:FP 2 "gcn_valu_src0_operand")]
2823 "flag_reciprocal_math"
2825 rtx one = const_double_from_real_value (dconst1, <MODE>mode);
2826 rtx initrcp = gen_reg_rtx (<MODE>mode);
2827 rtx fma = gen_reg_rtx (<MODE>mode);
2829 rtx num = operands[1], denom = operands[2];
2831 bool is_rcp = (GET_CODE (operands[1]) == CONST_DOUBLE
2832 && real_identical (CONST_DOUBLE_REAL_VALUE (operands[1]),
2838 rcp = gen_reg_rtx (<MODE>mode);
2840 emit_insn (gen_recip<mode>2 (initrcp, denom));
2841 emit_insn (gen_fma<mode>4_negop2 (fma, initrcp, denom, one));
2842 emit_insn (gen_fma<mode>4 (rcp, fma, initrcp, initrcp));
2846 rtx div_est = gen_reg_rtx (<MODE>mode);
2847 rtx fma2 = gen_reg_rtx (<MODE>mode);
2848 rtx fma3 = gen_reg_rtx (<MODE>mode);
2849 rtx fma4 = gen_reg_rtx (<MODE>mode);
2850 emit_insn (gen_mul<mode>3 (div_est, num, rcp));
2851 emit_insn (gen_fma<mode>4_negop2 (fma2, div_est, denom, num));
2852 emit_insn (gen_fma<mode>4 (fma3, fma2, rcp, div_est));
2853 emit_insn (gen_fma<mode>4_negop2 (fma4, fma3, denom, num));
2854 emit_insn (gen_fma<mode>4 (operands[0], fma4, rcp, fma3));
2861 ;; {{{ Int/FP conversions
2863 (define_mode_iterator CVT_FROM_MODE [HI SI HF SF DF])
2864 (define_mode_iterator CVT_TO_MODE [HI SI HF SF DF])
2866 (define_mode_iterator VCVT_MODE
2867 [V2HI V2SI V2HF V2SF V2DF
2868 V4HI V4SI V4HF V4SF V4DF
2869 V8HI V8SI V8HF V8SF V8DF
2870 V16HI V16SI V16HF V16SF V16DF
2871 V32HI V32SI V32HF V32SF V32DF
2872 V64HI V64SI V64HF V64SF V64DF])
2873 (define_mode_iterator VCVT_FMODE
2880 (define_mode_iterator VCVT_IMODE
2888 (define_code_iterator cvt_op [fix unsigned_fix
2889 float unsigned_float
2890 float_extend float_truncate])
2891 (define_code_attr cvt_name [(fix "fix_trunc") (unsigned_fix "fixuns_trunc")
2892 (float "float") (unsigned_float "floatuns")
2893 (float_extend "extend") (float_truncate "trunc")])
2894 (define_code_attr cvt_operands [(fix "%i0%i1") (unsigned_fix "%u0%i1")
2895 (float "%i0%i1") (unsigned_float "%i0%u1")
2896 (float_extend "%i0%i1")
2897 (float_truncate "%i0%i1")])
2899 (define_insn "<cvt_name><CVT_FROM_MODE:mode><CVT_TO_MODE:mode>2"
2900 [(set (match_operand:CVT_TO_MODE 0 "register_operand" "= v")
2902 (match_operand:CVT_FROM_MODE 1 "gcn_alu_operand" "vSvB")))]
2903 "gcn_valid_cvt_p (<CVT_FROM_MODE:MODE>mode, <CVT_TO_MODE:MODE>mode,
2905 "v_cvt<cvt_operands>\t%0, %1"
2906 [(set_attr "type" "vop1")
2907 (set_attr "length" "8")])
2909 (define_insn "<cvt_name><VCVT_MODE:mode><VCVT_FMODE:mode>2<exec>"
2910 [(set (match_operand:VCVT_FMODE 0 "register_operand" "= v")
2912 (match_operand:VCVT_MODE 1 "gcn_alu_operand" "vSvB")))]
2913 "MODE_VF (<VCVT_MODE:MODE>mode) == MODE_VF (<VCVT_FMODE:MODE>mode)
2914 && gcn_valid_cvt_p (<VCVT_MODE:MODE>mode, <VCVT_FMODE:MODE>mode,
2916 "v_cvt<cvt_operands>\t%0, %1"
2917 [(set_attr "type" "vop1")
2918 (set_attr "length" "8")])
2920 (define_insn "<cvt_name><VCVT_FMODE:mode><VCVT_IMODE:mode>2<exec>"
2921 [(set (match_operand:VCVT_IMODE 0 "register_operand" "= v")
2923 (match_operand:VCVT_FMODE 1 "gcn_alu_operand" "vSvB")))]
2924 "MODE_VF (<VCVT_IMODE:MODE>mode) == MODE_VF (<VCVT_FMODE:MODE>mode)
2925 && gcn_valid_cvt_p (<VCVT_FMODE:MODE>mode, <VCVT_IMODE:MODE>mode,
2927 "v_cvt<cvt_operands>\t%0, %1"
2928 [(set_attr "type" "vop1")
2929 (set_attr "length" "8")])
2932 ;; {{{ Int/int conversions
2934 (define_code_iterator zero_convert [truncate zero_extend])
2935 (define_code_attr convop [
2936 (sign_extend "extend")
2937 (zero_extend "zero_extend")
2938 (truncate "trunc")])
2940 (define_insn "<convop><V_INT_1REG_ALT:mode><V_INT_1REG:mode>2<exec>"
2941 [(set (match_operand:V_INT_1REG 0 "register_operand" "=v")
2942 (zero_convert:V_INT_1REG
2943 (match_operand:V_INT_1REG_ALT 1 "gcn_alu_operand" " v")))]
2945 "v_mov_b32_sdwa\t%0, %1 dst_sel:<V_INT_1REG:sdwa> dst_unused:UNUSED_PAD src0_sel:<V_INT_1REG_ALT:sdwa>"
2946 [(set_attr "type" "vop_sdwa")
2947 (set_attr "length" "8")])
2949 (define_insn "extend<V_INT_1REG_ALT:mode><V_INT_1REG:mode>2<exec>"
2950 [(set (match_operand:V_INT_1REG 0 "register_operand" "=v")
2951 (sign_extend:V_INT_1REG
2952 (match_operand:V_INT_1REG_ALT 1 "gcn_alu_operand" " v")))]
2954 "v_mov_b32_sdwa\t%0, sext(%1) src0_sel:<V_INT_1REG_ALT:sdwa>"
2955 [(set_attr "type" "vop_sdwa")
2956 (set_attr "length" "8")])
2958 ;; GCC can already do these for scalar types, but not for vector types.
2959 ;; Unfortunately you can't just do SUBREG on a vector to select the low part,
2960 ;; so there must be a few tricks here.
2962 (define_insn_and_split "trunc<vndi><mode>2"
2963 [(set (match_operand:V_INT_1REG 0 "register_operand" "=v")
2964 (truncate:V_INT_1REG
2965 (match_operand:<VnDI> 1 "gcn_alu_operand" " v")))]
2971 rtx inlo = gcn_operand_part (<VnDI>mode, operands[1], 0);
2972 rtx out = operands[0];
2974 if (<MODE>mode != <VnSI>mode)
2975 emit_insn (gen_trunc<vnsi><mode>2 (out, inlo));
2977 emit_move_insn (out, inlo);
2979 [(set_attr "type" "vop2")
2980 (set_attr "length" "4")])
2982 (define_insn_and_split "trunc<vndi><mode>2_exec"
2983 [(set (match_operand:V_INT_1REG 0 "register_operand" "=v")
2984 (vec_merge:V_INT_1REG
2985 (truncate:V_INT_1REG
2986 (match_operand:<VnDI> 1 "gcn_alu_operand" " v"))
2987 (match_operand:V_INT_1REG 2 "gcn_alu_or_unspec_operand" "U0")
2988 (match_operand:DI 3 "gcn_exec_operand" " e")))]
2994 rtx out = operands[0];
2995 rtx inlo = gcn_operand_part (<VnDI>mode, operands[1], 0);
2996 rtx merge = operands[2];
2997 rtx exec = operands[3];
2999 if (<MODE>mode != <VnSI>mode)
3000 emit_insn (gen_trunc<vnsi><mode>2_exec (out, inlo, merge, exec));
3002 emit_insn (gen_mov<mode>_exec (out, inlo, merge, exec));
3004 [(set_attr "type" "vop2")
3005 (set_attr "length" "4")])
3007 (define_insn_and_split "<convop><mode><vndi>2"
3008 [(set (match_operand:<VnDI> 0 "register_operand" "=v")
3010 (match_operand:V_INT_1REG 1 "gcn_alu_operand" " v")))]
3016 rtx outlo = gcn_operand_part (<VnDI>mode, operands[0], 0);
3017 rtx outhi = gcn_operand_part (<VnDI>mode, operands[0], 1);
3018 rtx in = operands[1];
3020 if (<MODE>mode != <VnSI>mode)
3021 emit_insn (gen_<convop><mode><vnsi>2 (outlo, in));
3023 emit_move_insn (outlo, in);
3025 emit_insn (gen_ashr<vnsi>3 (outhi, outlo, GEN_INT (31)));
3027 emit_insn (gen_vec_duplicate<vnsi> (outhi, const0_rtx));
3029 [(set_attr "type" "mult")
3030 (set_attr "length" "12")])
3032 (define_insn_and_split "<convop><mode><vndi>2_exec"
3033 [(set (match_operand:<VnDI> 0 "register_operand" "=v")
3036 (match_operand:V_INT_1REG 1 "gcn_alu_operand" " v"))
3037 (match_operand:<VnDI> 2 "gcn_alu_or_unspec_operand" "U0")
3038 (match_operand:DI 3 "gcn_exec_operand" " e")))]
3044 rtx outlo = gcn_operand_part (<VnDI>mode, operands[0], 0);
3045 rtx outhi = gcn_operand_part (<VnDI>mode, operands[0], 1);
3046 rtx in = operands[1];
3047 rtx mergelo = gcn_operand_part (<VnDI>mode, operands[2], 0);
3048 rtx mergehi = gcn_operand_part (<VnDI>mode, operands[2], 1);
3049 rtx exec = operands[3];
3051 if (<MODE>mode != <VnSI>mode)
3052 emit_insn (gen_<convop><mode><vnsi>2_exec (outlo, in, mergelo, exec));
3054 emit_insn (gen_mov<mode>_exec (outlo, in, mergelo, exec));
3056 emit_insn (gen_ashr<vnsi>3_exec (outhi, outlo, GEN_INT (31), mergehi,
3059 emit_insn (gen_vec_duplicate<vnsi>_exec (outhi, const0_rtx, mergehi,
3062 [(set_attr "type" "mult")
3063 (set_attr "length" "12")])
3066 ;; {{{ Vector comparison/merge
3068 (define_insn "vec_cmp<mode>di"
3069 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e, e,Sg,Sg")
3070 (match_operator:DI 1 "gcn_fp_compare_operator"
3071 [(match_operand:V_noQI 2 "gcn_alu_operand" "vSv, B,vSv, B, v,vA")
3072 (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v, v,vA, v")]))
3073 (clobber (match_scratch:DI 4 "= X, X, cV,cV, X, X"))]
3076 v_cmp%E1\tvcc, %2, %3
3077 v_cmp%E1\tvcc, %2, %3
3078 v_cmpx%E1\tvcc, %2, %3
3079 v_cmpx%E1\tvcc, %2, %3
3080 v_cmp%E1\t%0, %2, %3
3081 v_cmp%E1\t%0, %2, %3"
3082 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a")
3083 (set_attr "length" "4,8,4,8,8,8")])
3085 (define_expand "vec_cmpu<mode>di"
3086 [(match_operand:DI 0 "register_operand")
3087 (match_operator 1 "gcn_compare_operator"
3088 [(match_operand:V_INT_noQI 2 "gcn_alu_operand")
3089 (match_operand:V_INT_noQI 3 "gcn_vop3_operand")])]
3092 /* Unsigned comparisons use the same patterns as signed comparisons,
3093 except that they use unsigned operators (e.g. LTU vs LT).
3094 The '%E1' directive then does the Right Thing. */
3095 emit_insn (gen_vec_cmp<mode>di (operands[0], operands[1], operands[2],
3100 ; There's no instruction for 8-bit vector comparison, so we need to extend.
3101 (define_expand "vec_cmp<u><mode>di"
3102 [(match_operand:DI 0 "register_operand")
3103 (match_operator 1 "gcn_compare_operator"
3104 [(any_extend:<VnSI> (match_operand:V_QI 2 "gcn_alu_operand"))
3105 (any_extend:<VnSI> (match_operand:V_QI 3 "gcn_vop3_operand"))])]
3106 "can_create_pseudo_p ()"
3108 rtx sitmp1 = gen_reg_rtx (<VnSI>mode);
3109 rtx sitmp2 = gen_reg_rtx (<VnSI>mode);
3111 emit_insn (gen_<expander><mode><vnsi>2 (sitmp1, operands[2]));
3112 emit_insn (gen_<expander><mode><vnsi>2 (sitmp2, operands[3]));
3113 emit_insn (gen_vec_cmp<vnsi>di (operands[0], operands[1], sitmp1, sitmp2));
3117 (define_insn "vec_cmp<mode>di_exec"
3118 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e, e,Sg,Sg")
3120 (match_operator 1 "gcn_fp_compare_operator"
3121 [(match_operand:V_noQI 2 "gcn_alu_operand" "vSv, B,vSv, B, v,vA")
3122 (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v, v,vA, v")])
3123 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e, e, e, e")))
3124 (clobber (match_scratch:DI 5 "= X, X, cV,cV, X, X"))]
3127 v_cmp%E1\tvcc, %2, %3
3128 v_cmp%E1\tvcc, %2, %3
3129 v_cmpx%E1\tvcc, %2, %3
3130 v_cmpx%E1\tvcc, %2, %3
3131 v_cmp%E1\t%0, %2, %3
3132 v_cmp%E1\t%0, %2, %3"
3133 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a")
3134 (set_attr "length" "4,8,4,8,8,8")])
3136 (define_expand "vec_cmpu<mode>di_exec"
3137 [(match_operand:DI 0 "register_operand")
3138 (match_operator 1 "gcn_compare_operator"
3139 [(match_operand:V_INT_noQI 2 "gcn_alu_operand")
3140 (match_operand:V_INT_noQI 3 "gcn_vop3_operand")])
3141 (match_operand:DI 4 "gcn_exec_reg_operand")]
3144 /* Unsigned comparisons use the same patterns as signed comparisons,
3145 except that they use unsigned operators (e.g. LTU vs LT).
3146 The '%E1' directive then does the Right Thing. */
3147 emit_insn (gen_vec_cmpu<mode>di_exec (operands[0], operands[1],
3148 operands[2], operands[3],
3153 (define_expand "vec_cmp<u><mode>di_exec"
3154 [(match_operand:DI 0 "register_operand")
3155 (match_operator 1 "gcn_compare_operator"
3156 [(any_extend:<VnSI> (match_operand:V_QI 2 "gcn_alu_operand"))
3157 (any_extend:<VnSI> (match_operand:V_QI 3 "gcn_vop3_operand"))])
3158 (match_operand:DI 4 "gcn_exec_reg_operand")]
3159 "can_create_pseudo_p ()"
3161 rtx sitmp1 = gen_reg_rtx (<VnSI>mode);
3162 rtx sitmp2 = gen_reg_rtx (<VnSI>mode);
3164 emit_insn (gen_<expander><mode><vnsi>2_exec (sitmp1, operands[2],
3165 operands[2], operands[4]));
3166 emit_insn (gen_<expander><mode><vnsi>2_exec (sitmp2, operands[3],
3167 operands[3], operands[4]));
3168 emit_insn (gen_vec_cmp<vnsi>di_exec (operands[0], operands[1], sitmp1,
3169 sitmp2, operands[4]));
3173 (define_insn "vec_cmp<mode>di_dup"
3174 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e,e,Sg")
3175 (match_operator:DI 1 "gcn_fp_compare_operator"
3176 [(vec_duplicate:V_noQI
3177 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand"
3179 (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v,v, v")]))
3180 (clobber (match_scratch:DI 4 "= X,X,cV,cV, X"))]
3183 v_cmp%E1\tvcc, %2, %3
3184 v_cmp%E1\tvcc, %2, %3
3185 v_cmpx%E1\tvcc, %2, %3
3186 v_cmpx%E1\tvcc, %2, %3
3187 v_cmp%E1\t%0, %2, %3"
3188 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a")
3189 (set_attr "length" "4,8,4,8,8")])
3191 (define_insn "vec_cmp<mode>di_dup_exec"
3192 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e,e,Sg")
3194 (match_operator 1 "gcn_fp_compare_operator"
3195 [(vec_duplicate:V_noQI
3196 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand"
3198 (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v,v, v")])
3199 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e,e, e")))
3200 (clobber (match_scratch:DI 5 "= X,X,cV,cV, X"))]
3203 v_cmp%E1\tvcc, %2, %3
3204 v_cmp%E1\tvcc, %2, %3
3205 v_cmpx%E1\tvcc, %2, %3
3206 v_cmpx%E1\tvcc, %2, %3
3207 v_cmp%E1\t%0, %2, %3"
3208 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a")
3209 (set_attr "length" "4,8,4,8,8")])
3211 (define_expand "vcond_mask_<mode>di"
3213 [(set (match_operand:V_ALL 0 "register_operand" "")
3215 (match_operand:V_ALL 1 "gcn_vop3_operand" "")
3216 (match_operand:V_ALL 2 "gcn_alu_operand" "")
3217 (match_operand:DI 3 "register_operand" "")))
3218 (clobber (scratch:<VnDI>))])]
3222 (define_expand "vcond<V_ALL:mode><V_ALL_ALT:mode>"
3223 [(match_operand:V_ALL 0 "register_operand")
3224 (match_operand:V_ALL 1 "gcn_vop3_operand")
3225 (match_operand:V_ALL 2 "gcn_alu_operand")
3226 (match_operator 3 "gcn_fp_compare_operator"
3227 [(match_operand:V_ALL_ALT 4 "gcn_alu_operand")
3228 (match_operand:V_ALL_ALT 5 "gcn_vop3_operand")])]
3231 rtx tmp = gen_reg_rtx (DImode);
3232 emit_insn (gen_vec_cmp<V_ALL_ALT:mode>di
3233 (tmp, operands[3], operands[4], operands[5]));
3234 emit_insn (gen_vcond_mask_<V_ALL:mode>di
3235 (operands[0], operands[1], operands[2], tmp));
3239 (define_expand "vcond<V_ALL:mode><V_ALL_ALT:mode>_exec"
3240 [(match_operand:V_ALL 0 "register_operand")
3241 (match_operand:V_ALL 1 "gcn_vop3_operand")
3242 (match_operand:V_ALL 2 "gcn_alu_operand")
3243 (match_operator 3 "gcn_fp_compare_operator"
3244 [(match_operand:V_ALL_ALT 4 "gcn_alu_operand")
3245 (match_operand:V_ALL_ALT 5 "gcn_vop3_operand")])
3246 (match_operand:DI 6 "gcn_exec_reg_operand" "e")]
3249 rtx tmp = gen_reg_rtx (DImode);
3250 emit_insn (gen_vec_cmp<V_ALL_ALT:mode>di_exec
3251 (tmp, operands[3], operands[4], operands[5], operands[6]));
3252 emit_insn (gen_vcond_mask_<V_ALL:mode>di
3253 (operands[0], operands[1], operands[2], tmp));
3257 (define_expand "vcondu<V_ALL:mode><V_INT:mode>"
3258 [(match_operand:V_ALL 0 "register_operand")
3259 (match_operand:V_ALL 1 "gcn_vop3_operand")
3260 (match_operand:V_ALL 2 "gcn_alu_operand")
3261 (match_operator 3 "gcn_fp_compare_operator"
3262 [(match_operand:V_INT 4 "gcn_alu_operand")
3263 (match_operand:V_INT 5 "gcn_vop3_operand")])]
3266 rtx tmp = gen_reg_rtx (DImode);
3267 emit_insn (gen_vec_cmpu<V_INT:mode>di
3268 (tmp, operands[3], operands[4], operands[5]));
3269 emit_insn (gen_vcond_mask_<V_ALL:mode>di
3270 (operands[0], operands[1], operands[2], tmp));
3274 (define_expand "vcondu<V_ALL:mode><V_INT:mode>_exec"
3275 [(match_operand:V_ALL 0 "register_operand")
3276 (match_operand:V_ALL 1 "gcn_vop3_operand")
3277 (match_operand:V_ALL 2 "gcn_alu_operand")
3278 (match_operator 3 "gcn_fp_compare_operator"
3279 [(match_operand:V_INT 4 "gcn_alu_operand")
3280 (match_operand:V_INT 5 "gcn_vop3_operand")])
3281 (match_operand:DI 6 "gcn_exec_reg_operand" "e")]
3284 rtx tmp = gen_reg_rtx (DImode);
3285 emit_insn (gen_vec_cmpu<V_INT:mode>di_exec
3286 (tmp, operands[3], operands[4], operands[5], operands[6]));
3287 emit_insn (gen_vcond_mask_<V_ALL:mode>di
3288 (operands[0], operands[1], operands[2], tmp));
3293 ;; {{{ Fully masked loop support
3295 (define_expand "while_ultsidi"
3296 [(match_operand:DI 0 "register_operand")
3297 (match_operand:SI 1 "")
3298 (match_operand:SI 2 "")
3299 (match_operand:SI 3 "")]
3302 if (GET_CODE (operands[1]) != CONST_INT
3303 || GET_CODE (operands[2]) != CONST_INT)
3305 rtx _0_1_2_3 = gen_rtx_REG (V64SImode, VGPR_REGNO (1));
3307 if (GET_CODE (operands[1]) != CONST_INT
3308 || INTVAL (operands[1]) != 0)
3310 tmp = gen_reg_rtx (V64SImode);
3311 emit_insn (gen_addv64si3_dup (tmp, _0_1_2_3, operands[1]));
3313 emit_insn (gen_vec_cmpv64sidi_dup (operands[0],
3314 gen_rtx_GT (VOIDmode, 0, 0),
3319 HOST_WIDE_INT diff = INTVAL (operands[2]) - INTVAL (operands[1]);
3320 HOST_WIDE_INT mask = (diff >= 64 ? -1
3321 : ~((unsigned HOST_WIDE_INT)-1 << diff));
3322 emit_move_insn (operands[0], gen_rtx_CONST_INT (VOIDmode, mask));
3324 if (INTVAL (operands[3]) < 64)
3325 emit_insn (gen_anddi3 (operands[0], operands[0],
3326 gen_rtx_CONST_INT (VOIDmode,
3327 ~((unsigned HOST_WIDE_INT)-1
3328 << INTVAL (operands[3])))));
3332 (define_expand "maskload<mode>di"
3333 [(match_operand:V_ALL 0 "register_operand")
3334 (match_operand:V_ALL 1 "memory_operand")
3335 (match_operand 2 "")]
3338 rtx exec = force_reg (DImode, operands[2]);
3339 rtx addr = gcn_expand_scalar_to_vector_address
3340 (<MODE>mode, exec, operands[1], gen_rtx_SCRATCH (<VnDI>mode));
3341 rtx as = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
3342 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
3344 /* Masked lanes are required to hold zero. */
3345 emit_move_insn (operands[0], gcn_vec_constant (<MODE>mode, 0));
3347 emit_insn (gen_gather<mode>_expr_exec (operands[0], addr, as, v,
3348 operands[0], exec));
3352 (define_expand "maskstore<mode>di"
3353 [(match_operand:V_ALL 0 "memory_operand")
3354 (match_operand:V_ALL 1 "register_operand")
3355 (match_operand 2 "")]
3358 rtx exec = force_reg (DImode, operands[2]);
3359 rtx addr = gcn_expand_scalar_to_vector_address
3360 (<MODE>mode, exec, operands[0], gen_rtx_SCRATCH (<VnDI>mode));
3361 rtx as = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
3362 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
3363 emit_insn (gen_scatter<mode>_expr_exec (addr, operands[1], as, v, exec));
3367 (define_expand "mask_gather_load<mode><vnsi>"
3368 [(match_operand:V_ALL 0 "register_operand")
3369 (match_operand:DI 1 "register_operand")
3370 (match_operand:<VnSI> 2 "register_operand")
3371 (match_operand 3 "immediate_operand")
3372 (match_operand:SI 4 "gcn_alu_operand")
3373 (match_operand:DI 5 "")]
3376 rtx exec = force_reg (DImode, operands[5]);
3378 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1],
3379 operands[2], operands[4],
3380 INTVAL (operands[3]), exec);
3382 /* Masked lanes are required to hold zero. */
3383 emit_move_insn (operands[0], gcn_vec_constant (<MODE>mode, 0));
3385 if (GET_MODE (addr) == <VnDI>mode)
3386 emit_insn (gen_gather<mode>_insn_1offset_exec (operands[0], addr,
3387 const0_rtx, const0_rtx,
3388 const0_rtx, operands[0],
3391 emit_insn (gen_gather<mode>_insn_2offsets_exec (operands[0], operands[1],
3393 const0_rtx, const0_rtx,
3394 operands[0], exec));
3398 (define_expand "mask_scatter_store<mode><vnsi>"
3399 [(match_operand:DI 0 "register_operand")
3400 (match_operand:<VnSI> 1 "register_operand")
3401 (match_operand 2 "immediate_operand")
3402 (match_operand:SI 3 "gcn_alu_operand")
3403 (match_operand:V_ALL 4 "register_operand")
3404 (match_operand:DI 5 "")]
3407 rtx exec = force_reg (DImode, operands[5]);
3409 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0],
3410 operands[1], operands[3],
3411 INTVAL (operands[2]), exec);
3413 if (GET_MODE (addr) == <VnDI>mode)
3414 emit_insn (gen_scatter<mode>_insn_1offset_exec (addr, const0_rtx,
3415 operands[4], const0_rtx,
3419 emit_insn (gen_scatter<mode>_insn_2offsets_exec (operands[0], addr,
3420 const0_rtx, operands[4],
3421 const0_rtx, const0_rtx,
3426 (define_code_iterator cond_op [plus minus mult])
3428 (define_expand "cond_<expander><mode>"
3429 [(match_operand:V_ALL 0 "register_operand")
3430 (match_operand:DI 1 "register_operand")
3432 (match_operand:V_ALL 2 "gcn_alu_operand")
3433 (match_operand:V_ALL 3 "gcn_alu_operand"))
3434 (match_operand:V_ALL 4 "register_operand")]
3437 operands[1] = force_reg (DImode, operands[1]);
3438 operands[2] = force_reg (<MODE>mode, operands[2]);
3440 emit_insn (gen_<expander><mode>3_exec (operands[0], operands[2],
3441 operands[3], operands[4],
3446 ;; TODO smin umin smax umax
3447 (define_code_iterator cond_bitop [and ior xor])
3449 (define_expand "cond_<expander><mode>"
3450 [(match_operand:V_INT 0 "register_operand")
3451 (match_operand:DI 1 "register_operand")
3453 (match_operand:V_INT 2 "gcn_alu_operand")
3454 (match_operand:V_INT 3 "gcn_alu_operand"))
3455 (match_operand:V_INT 4 "register_operand")]
3458 operands[1] = force_reg (DImode, operands[1]);
3459 operands[2] = force_reg (<MODE>mode, operands[2]);
3461 emit_insn (gen_<expander><mode>3_exec (operands[0], operands[2],
3462 operands[3], operands[4],
3468 ;; {{{ Vector reductions
3470 (define_int_iterator REDUC_UNSPEC [UNSPEC_SMIN_DPP_SHR UNSPEC_SMAX_DPP_SHR
3471 UNSPEC_UMIN_DPP_SHR UNSPEC_UMAX_DPP_SHR
3474 UNSPEC_IOR_DPP_SHR UNSPEC_XOR_DPP_SHR])
3476 (define_int_iterator REDUC_2REG_UNSPEC [UNSPEC_PLUS_DPP_SHR
3478 UNSPEC_IOR_DPP_SHR UNSPEC_XOR_DPP_SHR])
3480 ; FIXME: Isn't there a better way of doing this?
3481 (define_int_attr reduc_unspec [(UNSPEC_SMIN_DPP_SHR "UNSPEC_SMIN_DPP_SHR")
3482 (UNSPEC_SMAX_DPP_SHR "UNSPEC_SMAX_DPP_SHR")
3483 (UNSPEC_UMIN_DPP_SHR "UNSPEC_UMIN_DPP_SHR")
3484 (UNSPEC_UMAX_DPP_SHR "UNSPEC_UMAX_DPP_SHR")
3485 (UNSPEC_PLUS_DPP_SHR "UNSPEC_PLUS_DPP_SHR")
3486 (UNSPEC_AND_DPP_SHR "UNSPEC_AND_DPP_SHR")
3487 (UNSPEC_IOR_DPP_SHR "UNSPEC_IOR_DPP_SHR")
3488 (UNSPEC_XOR_DPP_SHR "UNSPEC_XOR_DPP_SHR")])
3490 (define_int_attr reduc_op [(UNSPEC_SMIN_DPP_SHR "smin")
3491 (UNSPEC_SMAX_DPP_SHR "smax")
3492 (UNSPEC_UMIN_DPP_SHR "umin")
3493 (UNSPEC_UMAX_DPP_SHR "umax")
3494 (UNSPEC_PLUS_DPP_SHR "plus")
3495 (UNSPEC_AND_DPP_SHR "and")
3496 (UNSPEC_IOR_DPP_SHR "ior")
3497 (UNSPEC_XOR_DPP_SHR "xor")])
3499 (define_int_attr reduc_insn [(UNSPEC_SMIN_DPP_SHR "v_min%i0")
3500 (UNSPEC_SMAX_DPP_SHR "v_max%i0")
3501 (UNSPEC_UMIN_DPP_SHR "v_min%u0")
3502 (UNSPEC_UMAX_DPP_SHR "v_max%u0")
3503 (UNSPEC_PLUS_DPP_SHR "v_add%U0")
3504 (UNSPEC_AND_DPP_SHR "v_and%B0")
3505 (UNSPEC_IOR_DPP_SHR "v_or%B0")
3506 (UNSPEC_XOR_DPP_SHR "v_xor%B0")])
3508 (define_expand "reduc_<reduc_op>_scal_<mode>"
3509 [(set (match_operand:<SCALAR_MODE> 0 "register_operand")
3510 (unspec:<SCALAR_MODE>
3511 [(match_operand:V_ALL 1 "register_operand")]
3515 rtx tmp = gcn_expand_reduc_scalar (<MODE>mode, operands[1],
3518 rtx last_lane = GEN_INT (GET_MODE_NUNITS (<MODE>mode) - 1);
3519 emit_insn (gen_vec_extract<mode><scalar_mode> (operands[0], tmp,
3525 ;; Warning: This "-ffast-math" implementation converts in-order reductions
3526 ;; into associative reductions. It's also used where OpenMP or
3527 ;; OpenACC paralellization has already broken the in-order semantics.
3528 (define_expand "fold_left_plus_<mode>"
3529 [(match_operand:<SCALAR_MODE> 0 "register_operand")
3530 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand")
3531 (match_operand:V_FP 2 "gcn_alu_operand")]
3532 "can_create_pseudo_p ()
3533 && (flag_openacc || flag_openmp
3534 || flag_associative_math)"
3536 rtx dest = operands[0];
3537 rtx scalar = operands[1];
3538 rtx vector = operands[2];
3539 rtx tmp = gen_reg_rtx (<SCALAR_MODE>mode);
3541 emit_insn (gen_reduc_plus_scal_<mode> (tmp, vector));
3542 emit_insn (gen_add<scalar_mode>3 (dest, scalar, tmp));
3546 (define_insn "*<reduc_op>_dpp_shr_<mode>"
3547 [(set (match_operand:V_1REG 0 "register_operand" "=v")
3549 [(match_operand:V_1REG 1 "register_operand" "v")
3550 (match_operand:V_1REG 2 "register_operand" "v")
3551 (match_operand:SI 3 "const_int_operand" "n")]
3553 ; GCN3 requires a carry out, GCN5 not
3554 "!(TARGET_GCN3 && SCALAR_INT_MODE_P (<SCALAR_MODE>mode)
3555 && <reduc_unspec> == UNSPEC_PLUS_DPP_SHR)"
3557 return gcn_expand_dpp_shr_insn (<MODE>mode, "<reduc_insn>",
3558 <reduc_unspec>, INTVAL (operands[3]));
3560 [(set_attr "type" "vop_dpp")
3561 (set_attr "length" "8")])
3563 (define_insn_and_split "*<reduc_op>_dpp_shr_<mode>"
3564 [(set (match_operand:V_DI 0 "register_operand" "=v")
3566 [(match_operand:V_DI 1 "register_operand" "v")
3567 (match_operand:V_DI 2 "register_operand" "v")
3568 (match_operand:SI 3 "const_int_operand" "n")]
3569 REDUC_2REG_UNSPEC))]
3575 [(match_dup 6) (match_dup 8) (match_dup 3)] REDUC_2REG_UNSPEC))
3578 [(match_dup 7) (match_dup 9) (match_dup 3)] REDUC_2REG_UNSPEC))]
3580 operands[4] = gcn_operand_part (<MODE>mode, operands[0], 0);
3581 operands[5] = gcn_operand_part (<MODE>mode, operands[0], 1);
3582 operands[6] = gcn_operand_part (<MODE>mode, operands[1], 0);
3583 operands[7] = gcn_operand_part (<MODE>mode, operands[1], 1);
3584 operands[8] = gcn_operand_part (<MODE>mode, operands[2], 0);
3585 operands[9] = gcn_operand_part (<MODE>mode, operands[2], 1);
3587 [(set_attr "type" "vmult")
3588 (set_attr "length" "16")])
3590 ; Special cases for addition.
3592 (define_insn "*plus_carry_dpp_shr_<mode>"
3593 [(set (match_operand:V_INT_1REG 0 "register_operand" "=v")
3595 [(match_operand:V_INT_1REG 1 "register_operand" "v")
3596 (match_operand:V_INT_1REG 2 "register_operand" "v")
3597 (match_operand:SI 3 "const_int_operand" "n")]
3598 UNSPEC_PLUS_CARRY_DPP_SHR))
3599 (clobber (reg:DI VCC_REG))]
3602 return gcn_expand_dpp_shr_insn (<VnSI>mode, "v_add%^_u32",
3603 UNSPEC_PLUS_CARRY_DPP_SHR,
3604 INTVAL (operands[3]));
3606 [(set_attr "type" "vop_dpp")
3607 (set_attr "length" "8")])
3609 (define_insn "*plus_carry_in_dpp_shr_<mode>"
3610 [(set (match_operand:V_SI 0 "register_operand" "=v")
3612 [(match_operand:V_SI 1 "register_operand" "v")
3613 (match_operand:V_SI 2 "register_operand" "v")
3614 (match_operand:SI 3 "const_int_operand" "n")
3615 (match_operand:DI 4 "register_operand" "cV")]
3616 UNSPEC_PLUS_CARRY_IN_DPP_SHR))
3617 (clobber (reg:DI VCC_REG))]
3620 return gcn_expand_dpp_shr_insn (<MODE>mode, "v_addc%^_u32",
3621 UNSPEC_PLUS_CARRY_IN_DPP_SHR,
3622 INTVAL (operands[3]));
3624 [(set_attr "type" "vop_dpp")
3625 (set_attr "length" "8")])
3627 (define_insn_and_split "*plus_carry_dpp_shr_<mode>"
3628 [(set (match_operand:V_DI 0 "register_operand" "=v")
3630 [(match_operand:V_DI 1 "register_operand" "v")
3631 (match_operand:V_DI 2 "register_operand" "v")
3632 (match_operand:SI 3 "const_int_operand" "n")]
3633 UNSPEC_PLUS_CARRY_DPP_SHR))
3634 (clobber (reg:DI VCC_REG))]
3638 [(parallel [(set (match_dup 4)
3640 [(match_dup 6) (match_dup 8) (match_dup 3)]
3641 UNSPEC_PLUS_CARRY_DPP_SHR))
3642 (clobber (reg:DI VCC_REG))])
3643 (parallel [(set (match_dup 5)
3645 [(match_dup 7) (match_dup 9) (match_dup 3) (reg:DI VCC_REG)]
3646 UNSPEC_PLUS_CARRY_IN_DPP_SHR))
3647 (clobber (reg:DI VCC_REG))])]
3649 operands[4] = gcn_operand_part (<MODE>mode, operands[0], 0);
3650 operands[5] = gcn_operand_part (<MODE>mode, operands[0], 1);
3651 operands[6] = gcn_operand_part (<MODE>mode, operands[1], 0);
3652 operands[7] = gcn_operand_part (<MODE>mode, operands[1], 1);
3653 operands[8] = gcn_operand_part (<MODE>mode, operands[2], 0);
3654 operands[9] = gcn_operand_part (<MODE>mode, operands[2], 1);
3656 [(set_attr "type" "vmult")
3657 (set_attr "length" "16")])
3660 ;; {{{ Miscellaneous
3662 (define_expand "vec_series<mode>"
3663 [(match_operand:V_SI 0 "register_operand")
3664 (match_operand:SI 1 "gcn_alu_operand")
3665 (match_operand:SI 2 "gcn_alu_operand")]
3668 rtx tmp = gen_reg_rtx (<MODE>mode);
3669 rtx v1 = gen_rtx_REG (<MODE>mode, VGPR_REGNO (1));
3671 emit_insn (gen_mul<mode>3_dup (tmp, v1, operands[2]));
3672 emit_insn (gen_add<mode>3_dup (operands[0], tmp, operands[1]));
3676 (define_expand "vec_series<mode>"
3677 [(match_operand:V_DI 0 "register_operand")
3678 (match_operand:DI 1 "gcn_alu_operand")
3679 (match_operand:DI 2 "gcn_alu_operand")]
3682 rtx tmp = gen_reg_rtx (<MODE>mode);
3683 rtx v1 = gen_rtx_REG (<VnSI>mode, VGPR_REGNO (1));
3684 rtx op1vec = gen_reg_rtx (<MODE>mode);
3686 emit_insn (gen_mul<mode>3_zext_dup2 (tmp, v1, operands[2]));
3687 emit_insn (gen_vec_duplicate<mode> (op1vec, operands[1]));
3688 emit_insn (gen_add<mode>3 (operands[0], tmp, op1vec));