1 ;; Copyright (C) 2016-2024 Free Software Foundation, Inc.
3 ;; This file is free software; you can redistribute it and/or modify it under
4 ;; the terms of the GNU General Public License as published by the Free
5 ;; Software Foundation; either version 3 of the License, or (at your option)
8 ;; This file is distributed in the hope that it will be useful, but WITHOUT
9 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 ;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 ;; You should have received a copy of the GNU General Public License
14 ;; along with GCC; see the file COPYING3. If not see
15 ;; <http://www.gnu.org/licenses/>.
17 ;; {{{ Vector iterators
18 ; SV iterators include both scalar and vector modes.
20 ; Vector modes for specific types
21 (define_mode_iterator V_QI
22 [V2QI V4QI V8QI V16QI V32QI V64QI])
23 (define_mode_iterator V_HI
24 [V2HI V4HI V8HI V16HI V32HI V64HI])
25 (define_mode_iterator V_HF
26 [V2HF V4HF V8HF V16HF V32HF V64HF])
27 (define_mode_iterator V_SI
28 [V2SI V4SI V8SI V16SI V32SI V64SI])
29 (define_mode_iterator V_SF
30 [V2SF V4SF V8SF V16SF V32SF V64SF])
31 (define_mode_iterator V_DI
32 [V2DI V4DI V8DI V16DI V32DI V64DI])
33 (define_mode_iterator V_DF
34 [V2DF V4DF V8DF V16DF V32DF V64DF])
36 ; Vector modes for sub-dword modes
37 (define_mode_iterator V_QIHI
45 ; Vector modes for one vector register
46 (define_mode_iterator V_1REG
47 [V2QI V2HI V2SI V2HF V2SF
48 V4QI V4HI V4SI V4HF V4SF
49 V8QI V8HI V8SI V8HF V8SF
50 V16QI V16HI V16SI V16HF V16SF
51 V32QI V32HI V32SI V32HF V32SF
52 V64QI V64HI V64SI V64HF V64SF])
53 (define_mode_iterator V_1REG_ALT
54 [V2QI V2HI V2SI V2HF V2SF
55 V4QI V4HI V4SI V4HF V4SF
56 V8QI V8HI V8SI V8HF V8SF
57 V16QI V16HI V16SI V16HF V16SF
58 V32QI V32HI V32SI V32HF V32SF
59 V64QI V64HI V64SI V64HF V64SF])
61 (define_mode_iterator V_INT_1REG
68 (define_mode_iterator V_INT_1REG_ALT
75 (define_mode_iterator V_FP_1REG
83 ; Vector modes for two vector registers
84 (define_mode_iterator V_2REG
91 (define_mode_iterator V_2REG_ALT
99 ; Vector modes for four vector registers
100 (define_mode_iterator V_4REG [V2TI V4TI V8TI V16TI V32TI V64TI])
101 (define_mode_iterator V_4REG_ALT [V2TI V4TI V8TI V16TI V32TI V64TI])
103 ; Vector modes with native support
104 (define_mode_iterator V_noQI
105 [V2HI V2HF V2SI V2SF V2DI V2DF
106 V4HI V4HF V4SI V4SF V4DI V4DF
107 V8HI V8HF V8SI V8SF V8DI V8DF
108 V16HI V16HF V16SI V16SF V16DI V16DF
109 V32HI V32HF V32SI V32SF V32DI V32DF
110 V64HI V64HF V64SI V64SF V64DI V64DF])
111 (define_mode_iterator V_noHI
112 [V2HF V2SI V2SF V2DI V2DF
113 V4HF V4SI V4SF V4DI V4DF
114 V8HF V8SI V8SF V8DI V8DF
115 V16HF V16SI V16SF V16DI V16DF
116 V32HF V32SI V32SF V32DI V32DF
117 V64HF V64SI V64SF V64DI V64DF])
119 (define_mode_iterator V_INT_noQI
126 (define_mode_iterator V_INT_noHI
134 (define_mode_iterator SV_SFDF
143 ; All modes in which we want to do more than just moves.
144 (define_mode_iterator V_ALL
145 [V2QI V2HI V2HF V2SI V2SF V2DI V2DF
146 V4QI V4HI V4HF V4SI V4SF V4DI V4DF
147 V8QI V8HI V8HF V8SI V8SF V8DI V8DF
148 V16QI V16HI V16HF V16SI V16SF V16DI V16DF
149 V32QI V32HI V32HF V32SI V32SF V32DI V32DF
150 V64QI V64HI V64HF V64SI V64SF V64DI V64DF])
151 (define_mode_iterator V_ALL_ALT
152 [V2QI V2HI V2HF V2SI V2SF V2DI V2DF
153 V4QI V4HI V4HF V4SI V4SF V4DI V4DF
154 V8QI V8HI V8HF V8SI V8SF V8DI V8DF
155 V16QI V16HI V16HF V16SI V16SF V16DI V16DF
156 V32QI V32HI V32HF V32SI V32SF V32DI V32DF
157 V64QI V64HI V64HF V64SI V64SF V64DI V64DF])
159 (define_mode_iterator V_INT
163 V16QI V16HI V16SI V16DI
164 V32QI V32HI V32SI V32DI
165 V64QI V64HI V64SI V64DI])
166 (define_mode_iterator V_FP
173 (define_mode_iterator SV_FP
182 ; All modes that need moves, including those without many insns.
183 (define_mode_iterator V_MOV
184 [V2QI V2HI V2HF V2SI V2SF V2DI V2DF V2TI
185 V4QI V4HI V4HF V4SI V4SF V4DI V4DF V4TI
186 V8QI V8HI V8HF V8SI V8SF V8DI V8DF V8TI
187 V16QI V16HI V16HF V16SI V16SF V16DI V16DF V16TI
188 V32QI V32HI V32HF V32SI V32SF V32DI V32DF V32TI
189 V64QI V64HI V64HF V64SI V64SF V64DI V64DF V64TI])
190 (define_mode_iterator V_MOV_ALT
191 [V2QI V2HI V2HF V2SI V2SF V2DI V2DF V2TI
192 V4QI V4HI V4HF V4SI V4SF V4DI V4DF V4TI
193 V8QI V8HI V8HF V8SI V8SF V8DI V8DF V8TI
194 V16QI V16HI V16HF V16SI V16SF V16DI V16DF V16TI
195 V32QI V32HI V32HF V32SI V32SF V32DI V32DF V32TI
196 V64QI V64HI V64HF V64SI V64SF V64DI V64DF V64TI])
198 (define_mode_attr scalar_mode
199 [(QI "qi") (HI "hi") (SI "si") (TI "ti")
200 (HF "hf") (SF "sf") (DI "di") (DF "df")
201 (V2QI "qi") (V2HI "hi") (V2SI "si") (V2TI "ti")
202 (V2HF "hf") (V2SF "sf") (V2DI "di") (V2DF "df")
203 (V4QI "qi") (V4HI "hi") (V4SI "si") (V4TI "ti")
204 (V4HF "hf") (V4SF "sf") (V4DI "di") (V4DF "df")
205 (V8QI "qi") (V8HI "hi") (V8SI "si") (V8TI "ti")
206 (V8HF "hf") (V8SF "sf") (V8DI "di") (V8DF "df")
207 (V16QI "qi") (V16HI "hi") (V16SI "si") (V16TI "ti")
208 (V16HF "hf") (V16SF "sf") (V16DI "di") (V16DF "df")
209 (V32QI "qi") (V32HI "hi") (V32SI "si") (V32TI "ti")
210 (V32HF "hf") (V32SF "sf") (V32DI "di") (V32DF "df")
211 (V64QI "qi") (V64HI "hi") (V64SI "si") (V64TI "ti")
212 (V64HF "hf") (V64SF "sf") (V64DI "di") (V64DF "df")])
214 (define_mode_attr SCALAR_MODE
215 [(QI "QI") (HI "HI") (SI "SI") (TI "TI")
216 (HF "HF") (SF "SF") (DI "DI") (DF "DF")
217 (V2QI "QI") (V2HI "HI") (V2SI "SI") (V2TI "TI")
218 (V2HF "HF") (V2SF "SF") (V2DI "DI") (V2DF "DF")
219 (V4QI "QI") (V4HI "HI") (V4SI "SI") (V4TI "TI")
220 (V4HF "HF") (V4SF "SF") (V4DI "DI") (V4DF "DF")
221 (V8QI "QI") (V8HI "HI") (V8SI "SI") (V8TI "TI")
222 (V8HF "HF") (V8SF "SF") (V8DI "DI") (V8DF "DF")
223 (V16QI "QI") (V16HI "HI") (V16SI "SI") (V16TI "TI")
224 (V16HF "HF") (V16SF "SF") (V16DI "DI") (V16DF "DF")
225 (V32QI "QI") (V32HI "HI") (V32SI "SI") (V32TI "TI")
226 (V32HF "HF") (V32SF "SF") (V32DI "DI") (V32DF "DF")
227 (V64QI "QI") (V64HI "HI") (V64SI "SI") (V64TI "TI")
228 (V64HF "HF") (V64SF "SF") (V64DI "DI") (V64DF "DF")])
230 (define_mode_attr vnsi
231 [(QI "si") (HI "si") (SI "si") (TI "si")
232 (HF "si") (SF "si") (DI "si") (DF "si")
233 (V2QI "v2si") (V2HI "v2si") (V2HF "v2si") (V2SI "v2si")
234 (V2SF "v2si") (V2DI "v2si") (V2DF "v2si") (V2TI "v2si")
235 (V4QI "v4si") (V4HI "v4si") (V4HF "v4si") (V4SI "v4si")
236 (V4SF "v4si") (V4DI "v4si") (V4DF "v4si") (V4TI "v4si")
237 (V8QI "v8si") (V8HI "v8si") (V8HF "v8si") (V8SI "v8si")
238 (V8SF "v8si") (V8DI "v8si") (V8DF "v8si") (V8TI "v8si")
239 (V16QI "v16si") (V16HI "v16si") (V16HF "v16si") (V16SI "v16si")
240 (V16SF "v16si") (V16DI "v16si") (V16DF "v16si") (V16TI "v16si")
241 (V32QI "v32si") (V32HI "v32si") (V32HF "v32si") (V32SI "v32si")
242 (V32SF "v32si") (V32DI "v32si") (V32DF "v32si") (V32TI "v32si")
243 (V64QI "v64si") (V64HI "v64si") (V64HF "v64si") (V64SI "v64si")
244 (V64SF "v64si") (V64DI "v64si") (V64DF "v64si") (V64TI "v64si")])
246 (define_mode_attr VnSI
247 [(QI "SI") (HI "SI") (SI "SI") (TI "SI")
248 (HF "SI") (SF "SI") (DI "SI") (DF "SI")
249 (V2QI "V2SI") (V2HI "V2SI") (V2HF "V2SI") (V2SI "V2SI")
250 (V2SF "V2SI") (V2DI "V2SI") (V2DF "V2SI") (V2TI "V2SI")
251 (V4QI "V4SI") (V4HI "V4SI") (V4HF "V4SI") (V4SI "V4SI")
252 (V4SF "V4SI") (V4DI "V4SI") (V4DF "V4SI") (V4TI "V4SI")
253 (V8QI "V8SI") (V8HI "V8SI") (V8HF "V8SI") (V8SI "V8SI")
254 (V8SF "V8SI") (V8DI "V8SI") (V8DF "V8SI") (V8TI "V8SI")
255 (V16QI "V16SI") (V16HI "V16SI") (V16HF "V16SI") (V16SI "V16SI")
256 (V16SF "V16SI") (V16DI "V16SI") (V16DF "V16SI") (V16TI "V16SI")
257 (V32QI "V32SI") (V32HI "V32SI") (V32HF "V32SI") (V32SI "V32SI")
258 (V32SF "V32SI") (V32DI "V32SI") (V32DF "V32SI") (V32TI "V32SI")
259 (V64QI "V64SI") (V64HI "V64SI") (V64HF "V64SI") (V64SI "V64SI")
260 (V64SF "V64SI") (V64DI "V64SI") (V64DF "V64SI") (V64TI "V64SI")])
262 (define_mode_attr vndi
263 [(V2QI "v2di") (V2HI "v2di") (V2HF "v2di") (V2SI "v2di")
264 (V2SF "v2di") (V2DI "v2di") (V2DF "v2di") (V2TI "v2di")
265 (V4QI "v4di") (V4HI "v4di") (V4HF "v4di") (V4SI "v4di")
266 (V4SF "v4di") (V4DI "v4di") (V4DF "v4di") (V4TI "v4di")
267 (V8QI "v8di") (V8HI "v8di") (V8HF "v8di") (V8SI "v8di")
268 (V8SF "v8di") (V8DI "v8di") (V8DF "v8di") (V8TI "v8di")
269 (V16QI "v16di") (V16HI "v16di") (V16HF "v16di") (V16SI "v16di")
270 (V16SF "v16di") (V16DI "v16di") (V16DF "v16di") (V16TI "v16di")
271 (V32QI "v32di") (V32HI "v32di") (V32HF "v32di") (V32SI "v32di")
272 (V32SF "v32di") (V32DI "v32di") (V32DF "v32di") (V32TI "v32di")
273 (V64QI "v64di") (V64HI "v64di") (V64HF "v64di") (V64SI "v64di")
274 (V64SF "v64di") (V64DI "v64di") (V64DF "v64di") (V64TI "v64di")])
276 (define_mode_attr VnDI
277 [(V2QI "V2DI") (V2HI "V2DI") (V2HF "V2DI") (V2SI "V2DI")
278 (V2SF "V2DI") (V2DI "V2DI") (V2DF "V2DI") (V2TI "V2DI")
279 (V4QI "V4DI") (V4HI "V4DI") (V4HF "V4DI") (V4SI "V4DI")
280 (V4SF "V4DI") (V4DI "V4DI") (V4DF "V4DI") (V4TI "V4DI")
281 (V8QI "V8DI") (V8HI "V8DI") (V8HF "V8DI") (V8SI "V8DI")
282 (V8SF "V8DI") (V8DI "V8DI") (V8DF "V8DI") (V8TI "V8DI")
283 (V16QI "V16DI") (V16HI "V16DI") (V16HF "V16DI") (V16SI "V16DI")
284 (V16SF "V16DI") (V16DI "V16DI") (V16DF "V16DI") (V16TI "V16DI")
285 (V32QI "V32DI") (V32HI "V32DI") (V32HF "V32DI") (V32SI "V32DI")
286 (V32SF "V32DI") (V32DI "V32DI") (V32DF "V32DI") (V32TI "V32DI")
287 (V64QI "V64DI") (V64HI "V64DI") (V64HF "V64DI") (V64SI "V64DI")
288 (V64SF "V64DI") (V64DI "V64DI") (V64DF "V64DI") (V64TI "V64DI")])
290 (define_mode_attr sdwa
291 [(V2QI "BYTE_0") (V2HI "WORD_0") (V2SI "DWORD")
292 (V4QI "BYTE_0") (V4HI "WORD_0") (V4SI "DWORD")
293 (V8QI "BYTE_0") (V8HI "WORD_0") (V8SI "DWORD")
294 (V16QI "BYTE_0") (V16HI "WORD_0") (V16SI "DWORD")
295 (V32QI "BYTE_0") (V32HI "WORD_0") (V32SI "DWORD")
296 (V64QI "BYTE_0") (V64HI "WORD_0") (V64SI "DWORD")])
301 (define_subst_attr "exec" "vec_merge"
303 (define_subst_attr "exec_clobber" "vec_merge_with_clobber"
305 (define_subst_attr "exec_vcc" "vec_merge_with_vcc"
307 (define_subst_attr "exec_scatter" "scatter_store"
310 (define_subst "vec_merge"
311 [(set (match_operand:V_MOV 0)
312 (match_operand:V_MOV 1))]
317 (match_operand:V_MOV 3 "gcn_register_or_unspec_operand" "U0")
318 (match_operand:DI 4 "gcn_exec_reg_operand" "e")))])
320 (define_subst "vec_merge_with_clobber"
321 [(set (match_operand:V_MOV 0)
322 (match_operand:V_MOV 1))
323 (clobber (match_operand 2))]
328 (match_operand:V_MOV 3 "gcn_register_or_unspec_operand" "U0")
329 (match_operand:DI 4 "gcn_exec_reg_operand" "e")))
330 (clobber (match_dup 2))])
332 (define_subst "vec_merge_with_vcc"
333 [(set (match_operand:V_MOV 0)
334 (match_operand:V_MOV 1))
335 (set (match_operand:DI 2)
336 (match_operand:DI 3))]
342 (match_operand:V_MOV 4 "gcn_register_or_unspec_operand" "U0")
343 (match_operand:DI 5 "gcn_exec_reg_operand" "e")))
345 (and:DI (match_dup 3)
346 (reg:DI EXEC_REG)))])])
348 (define_subst "scatter_store"
349 [(set (mem:BLK (scratch))
357 [(set (mem:BLK (scratch))
363 (match_operand:DI 4 "gcn_exec_reg_operand" "e")]
369 ; This is the entry point for all vector register moves. Memory accesses can
370 ; come this way also, but will more usually use the reload_in/out,
371 ; gather/scatter, maskload/store, etc.
373 (define_expand "mov<mode>"
374 [(set (match_operand:V_MOV 0 "nonimmediate_operand")
375 (match_operand:V_MOV 1 "general_operand"))]
378 /* Bitwise reinterpret casts via SUBREG don't work with GCN vector
379 registers, but we can convert the MEM to a mode that does work. */
380 if (MEM_P (operands[0]) && !SUBREG_P (operands[0])
381 && SUBREG_P (operands[1])
382 && GET_MODE_SIZE (GET_MODE (operands[1]))
383 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[1]))))
385 rtx src = SUBREG_REG (operands[1]);
386 rtx mem = copy_rtx (operands[0]);
387 PUT_MODE_RAW (mem, GET_MODE (src));
388 emit_move_insn (mem, src);
391 if (MEM_P (operands[1]) && !SUBREG_P (operands[1])
392 && SUBREG_P (operands[0])
393 && GET_MODE_SIZE (GET_MODE (operands[0]))
394 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[0]))))
396 rtx dest = SUBREG_REG (operands[0]);
397 rtx mem = copy_rtx (operands[1]);
398 PUT_MODE_RAW (mem, GET_MODE (dest));
399 emit_move_insn (dest, mem);
403 /* SUBREG of MEM is not supported. */
404 gcc_assert ((!SUBREG_P (operands[0])
405 || !MEM_P (SUBREG_REG (operands[0])))
406 && (!SUBREG_P (operands[1])
407 || !MEM_P (SUBREG_REG (operands[1]))));
409 if (MEM_P (operands[0]) && !lra_in_progress && !reload_completed)
411 operands[1] = force_reg (<MODE>mode, operands[1]);
412 rtx scratch = gen_rtx_SCRATCH (<VnDI>mode);
413 rtx a = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
414 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
415 rtx expr = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
418 emit_insn (gen_scatter<mode>_expr (expr, operands[1], a, v));
421 else if (MEM_P (operands[1]) && !lra_in_progress && !reload_completed)
423 rtx scratch = gen_rtx_SCRATCH (<VnDI>mode);
424 rtx a = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
425 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
426 rtx expr = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
429 emit_insn (gen_gather<mode>_expr (operands[0], expr, a, v));
432 else if ((MEM_P (operands[0]) || MEM_P (operands[1])))
434 gcc_assert (!reload_completed);
435 rtx scratch = gen_reg_rtx (<VnDI>mode);
436 emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], scratch));
441 ; A pseudo instruction that helps LRA use the "U0" constraint.
443 (define_insn "mov<mode>_unspec"
444 [(set (match_operand:V_MOV 0 "nonimmediate_operand" "=v")
445 (match_operand:V_MOV 1 "gcn_unspec_operand" " U"))]
448 [(set_attr "type" "unknown")
449 (set_attr "length" "0")])
451 (define_insn "*mov<mode>"
452 [(set (match_operand:V_1REG 0 "nonimmediate_operand")
453 (match_operand:V_1REG 1 "general_operand"))]
455 {@ [cons: =0, 1; attrs: type, length, gcn_version]
456 [v ,vA;vop1 ,4,* ] v_mov_b32\t%0, %1
458 [v ,a ;vop3p_mai,8,* ] v_accvgpr_read_b32\t%0, %1
459 [$a ,v ;vop3p_mai,8,* ] v_accvgpr_write_b32\t%0, %1
460 [a ,a ;vop1 ,4,cdna2] v_accvgpr_mov_b32\t%0, %1
463 (define_insn "mov<mode>_exec"
464 [(set (match_operand:V_1REG 0 "nonimmediate_operand")
466 (match_operand:V_1REG 1 "general_operand")
467 (match_operand:V_1REG 2 "gcn_alu_or_unspec_operand")
468 (match_operand:DI 3 "register_operand")))
469 (clobber (match_scratch:<VnDI> 4))]
470 "!MEM_P (operands[0]) || REG_P (operands[1])"
471 {@ [cons: =0, 1, 2, 3, =4; attrs: type, length]
472 [v,vA,U0,e ,X ;vop1 ,4 ] v_mov_b32\t%0, %1
473 [v,B ,U0,e ,X ;vop1 ,8 ] v_mov_b32\t%0, %1
474 [v,v ,vA,cV,X ;vop2 ,4 ] v_cndmask_b32\t%0, %2, %1, vcc
475 [v,vA,vA,Sv,X ;vop3a,8 ] v_cndmask_b32\t%0, %2, %1, %3
476 [v,m ,U0,e ,&v;* ,16] #
477 [m,v ,U0,e ,&v;* ,16] #
480 ; This variant does not accept an unspec, but does permit MEM
481 ; read/modify/write which is necessary for maskstore.
483 ;(define_insn "*mov<mode>_exec_match"
484 ; [(set (match_operand:V_1REG 0 "nonimmediate_operand" "=v,v, v, m")
486 ; (match_operand:V_1REG 1 "general_operand" "vA,B, m, v")
488 ; (match_operand:DI 2 "gcn_exec_reg_operand" " e,e, e, e")))
489 ; (clobber (match_scratch:<VnDI> 3 "=X,X,&v,&v"))]
490 ; "!MEM_P (operands[0]) || REG_P (operands[1])"
496 ; [(set_attr "type" "vop1,vop1,*,*")
497 ; (set_attr "length" "4,8,16,16")])
499 (define_insn "*mov<mode>"
500 [(set (match_operand:V_2REG 0 "nonimmediate_operand" "=v, v,$a,a")
501 (match_operand:V_2REG 1 "general_operand" "vDB,a, v,a"))]
504 * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \
505 return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \
507 return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\";
508 * if (REGNO (operands[0]) <= REGNO (operands[1])) \
509 return \"v_accvgpr_read_b32\t%L0, %L1\;v_accvgpr_read_b32\t%H0, %H1\"; \
511 return \"v_accvgpr_read_b32\t%H0, %H1\;v_accvgpr_read_b32\t%L0, %L1\";
512 * if (REGNO (operands[0]) <= REGNO (operands[1])) \
513 return \"v_accvgpr_write_b32\t%L0, %L1\;v_accvgpr_write_b32\t%H0, %H1\"; \
515 return \"v_accvgpr_write_b32\t%H0, %H1\;v_accvgpr_write_b32\t%L0, %L1\";
516 * if (REGNO (operands[0]) <= REGNO (operands[1])) \
517 return \"v_accvgpr_mov_b32\t%L0, %L1\;v_accvgpr_mov_b32\t%H0, %H1\"; \
519 return \"v_accvgpr_mov_b32\t%H0, %H1\;v_accvgpr_mov_b32\t%L0, %L1\";"
520 [(set_attr "type" "vmult,vmult,vmult,vmult")
521 (set_attr "length" "16,16,16,8")
522 (set_attr "gcn_version" "*,*,*,cdna2")])
524 (define_insn "mov<mode>_exec"
525 [(set (match_operand:V_2REG 0 "nonimmediate_operand" "= v, v, v, v, m")
527 (match_operand:V_2REG 1 "general_operand" "vDB, v0, v0, m, v")
528 (match_operand:V_2REG 2 "gcn_alu_or_unspec_operand"
529 " U0,vDA0,vDA0,U0,U0")
530 (match_operand:DI 3 "register_operand" " e, cV, Sv, e, e")))
531 (clobber (match_scratch:<VnDI> 4 "= X, X, X,&v,&v"))]
532 "!MEM_P (operands[0]) || REG_P (operands[1])"
534 if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1]))
535 switch (which_alternative)
538 return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1";
540 return "v_cndmask_b32\t%L0, %L2, %L1, vcc\;"
541 "v_cndmask_b32\t%H0, %H2, %H1, vcc";
543 return "v_cndmask_b32\t%L0, %L2, %L1, %3\;"
544 "v_cndmask_b32\t%H0, %H2, %H1, %3";
547 switch (which_alternative)
550 return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1";
552 return "v_cndmask_b32\t%H0, %H2, %H1, vcc\;"
553 "v_cndmask_b32\t%L0, %L2, %L1, vcc";
555 return "v_cndmask_b32\t%H0, %H2, %H1, %3\;"
556 "v_cndmask_b32\t%L0, %L2, %L1, %3";
561 [(set_attr "type" "vmult,vmult,vmult,*,*")
562 (set_attr "length" "16,16,16,16,16")])
564 (define_insn "*mov<mode>_4reg"
565 [(set (match_operand:V_4REG 0 "nonimmediate_operand")
566 (match_operand:V_4REG 1 "general_operand"))]
568 {@ [cons: =0, 1; attrs: type, length, gcn_version]
569 [v ,vDB;vmult,16,* ] v_mov_b32\t%L0, %L1\; v_mov_b32\t%H0, %H1\; v_mov_b32\t%J0, %J1\; v_mov_b32\t%K0, %K1
570 [v ,a ;vmult,32,* ] v_accvgpr_read_b32\t%L0, %L1\; v_accvgpr_read_b32\t%H0, %H1\; v_accvgpr_read_b32\t%J0, %J1\; v_accvgpr_read_b32\t%K0, %K1
571 [$a,v ;vmult,32,* ] v_accvgpr_write_b32\t%L0, %L1\;v_accvgpr_write_b32\t%H0, %H1\;v_accvgpr_write_b32\t%J0, %J1\;v_accvgpr_write_b32\t%K0, %K1
572 [a ,a ;vmult,32,cdna2] v_accvgpr_mov_b32\t%L0, %L1\; v_accvgpr_mov_b32\t%H0, %H1\; v_accvgpr_mov_b32\t%J0, %J1\; v_accvgpr_mov_b32\t%K0, %K1
575 (define_insn "mov<mode>_exec"
576 [(set (match_operand:V_4REG 0 "nonimmediate_operand" "= v, v, v, v, m")
578 (match_operand:V_4REG 1 "general_operand" "vDB, v0, v0, m, v")
579 (match_operand:V_4REG 2 "gcn_alu_or_unspec_operand"
580 " U0,vDA0,vDA0,U0,U0")
581 (match_operand:DI 3 "register_operand" " e, cV, Sv, e, e")))
582 (clobber (match_scratch:<VnDI> 4 "= X, X, X,&v,&v"))]
583 "!MEM_P (operands[0]) || REG_P (operands[1])"
585 if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1]))
586 switch (which_alternative)
589 return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\;"
590 "v_mov_b32\t%J0, %J1\;v_mov_b32\t%K0, %K1";
592 return "v_cndmask_b32\t%L0, %L2, %L1, vcc\;"
593 "v_cndmask_b32\t%H0, %H2, %H1, vcc\;"
594 "v_cndmask_b32\t%J0, %J2, %J1, vcc\;"
595 "v_cndmask_b32\t%K0, %K2, %K1, vcc";
597 return "v_cndmask_b32\t%L0, %L2, %L1, %3\;"
598 "v_cndmask_b32\t%H0, %H2, %H1, %3\;"
599 "v_cndmask_b32\t%J0, %J2, %J1, %3\;"
600 "v_cndmask_b32\t%K0, %K2, %K1, %3";
603 switch (which_alternative)
606 return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\;"
607 "v_mov_b32\t%J0, %J1\;v_mov_b32\t%K0, %K1";
609 return "v_cndmask_b32\t%H0, %H2, %H1, vcc\;"
610 "v_cndmask_b32\t%L0, %L2, %L1, vcc\;"
611 "v_cndmask_b32\t%J0, %J2, %J1, vcc\;"
612 "v_cndmask_b32\t%K0, %K2, %K1, vcc";
614 return "v_cndmask_b32\t%H0, %H2, %H1, %3\;"
615 "v_cndmask_b32\t%L0, %L2, %L1, %3\;"
616 "v_cndmask_b32\t%J0, %J2, %J1, %3\;"
617 "v_cndmask_b32\t%K0, %K2, %K1, %3";
622 [(set_attr "type" "vmult,vmult,vmult,*,*")
623 (set_attr "length" "32")])
625 ; This variant does not accept an unspec, but does permit MEM
626 ; read/modify/write which is necessary for maskstore.
628 ;(define_insn "*mov<mode>_exec_match"
629 ; [(set (match_operand:V_2REG 0 "nonimmediate_operand" "=v, v, m")
631 ; (match_operand:V_2REG 1 "general_operand" "vDB, m, v")
633 ; (match_operand:DI 2 "gcn_exec_reg_operand" " e, e, e")))
634 ; (clobber (match_scratch:<VnDI> 3 "=X,&v,&v"))]
635 ; "!MEM_P (operands[0]) || REG_P (operands[1])"
637 ; * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \
638 ; return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \
640 ; return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\";
643 ; [(set_attr "type" "vmult,*,*")
644 ; (set_attr "length" "16,16,16")])
646 ; A SGPR-base load looks like:
649 ; There's no hardware instruction that corresponds to this, but vector base
650 ; addresses are placed in an SGPR because it is easier to add to a vector.
651 ; We also have a temporary vT, and the vector v1 holding numbered lanes.
654 ; vT = v1 << log2(element-size)
658 (define_insn "@mov<mode>_sgprbase"
659 [(set (match_operand:V_1REG 0 "nonimmediate_operand")
661 [(match_operand:V_1REG 1 "general_operand")]
663 (clobber (match_operand:<VnDI> 2 "register_operand"))]
664 "lra_in_progress || reload_completed"
665 {@ [cons: =0, 1, =2; attrs: type, length, gcn_version]
666 [v,vA,&v;vop1,4 ,* ] v_mov_b32\t%0, %1
667 [v,vB,&v;vop1,8 ,* ] ^
670 [a,m ,&v;* ,12,cdna2] #
671 [m,a ,&v;* ,12,cdna2] #
674 (define_insn "@mov<mode>_sgprbase"
675 [(set (match_operand:V_2REG 0 "nonimmediate_operand" "= v, v, m, a, m")
677 [(match_operand:V_2REG 1 "general_operand" "vDB, m, v, m, a")]
679 (clobber (match_operand:<VnDI> 2 "register_operand" "=&v,&v,&v,&v,&v"))]
680 "lra_in_progress || reload_completed"
682 * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \
683 return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \
685 return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\";
690 [(set_attr "type" "vmult,*,*,*,*")
691 (set_attr "length" "8,12,12,12,12")
692 (set_attr "gcn_version" "*,*,*,cdna2,cdna2")])
694 (define_insn "@mov<mode>_sgprbase"
695 [(set (match_operand:V_4REG 0 "nonimmediate_operand")
697 [(match_operand:V_4REG 1 "general_operand")]
699 (clobber (match_operand:<VnDI> 2 "register_operand"))]
700 "lra_in_progress || reload_completed"
701 {@ [cons: =0, 1, =2; attrs: type, length]
702 [v,vDB,&v;vmult,8 ] v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\;v_mov_b32\t%J0, %J1\;v_mov_b32\t%K0, %K1
707 ; Expand scalar addresses into gather/scatter patterns
710 [(set (match_operand:V_MOV 0 "memory_operand")
712 [(match_operand:V_MOV 1 "general_operand")]
714 (clobber (match_scratch:<VnDI> 2))]
716 [(set (mem:BLK (scratch))
717 (unspec:BLK [(match_dup 5) (match_dup 1) (match_dup 6) (match_dup 7)]
720 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
723 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
724 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
728 [(set (match_operand:V_MOV 0 "memory_operand")
730 (match_operand:V_MOV 1 "general_operand")
731 (match_operand:V_MOV 2 "")
732 (match_operand:DI 3 "gcn_exec_reg_operand")))
733 (clobber (match_scratch:<VnDI> 4))]
735 [(set (mem:BLK (scratch))
736 (unspec:BLK [(match_dup 5) (match_dup 1)
737 (match_dup 6) (match_dup 7) (match_dup 3)]
740 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode,
744 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
745 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
749 [(set (match_operand:V_MOV 0 "nonimmediate_operand")
751 [(match_operand:V_MOV 1 "memory_operand")]
753 (clobber (match_scratch:<VnDI> 2))]
756 (unspec:V_MOV [(match_dup 5) (match_dup 6) (match_dup 7)
760 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
763 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
764 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
768 [(set (match_operand:V_MOV 0 "nonimmediate_operand")
770 (match_operand:V_MOV 1 "memory_operand")
771 (match_operand:V_MOV 2 "")
772 (match_operand:DI 3 "gcn_exec_reg_operand")))
773 (clobber (match_scratch:<VnDI> 4))]
777 (unspec:V_MOV [(match_dup 5) (match_dup 6) (match_dup 7)
783 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode,
787 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
788 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
791 ; TODO: Add zero/sign extending variants.
796 ; v_writelane and v_readlane work regardless of exec flags.
797 ; We allow source to be scratch.
799 ; FIXME these should take A immediates
801 (define_insn "*vec_set<mode>"
802 [(set (match_operand:V_1REG 0 "register_operand" "= v")
804 (vec_duplicate:V_1REG
805 (match_operand:<SCALAR_MODE> 1 "register_operand" " Sv"))
806 (match_operand:V_1REG 3 "gcn_register_or_unspec_operand" " U0")
807 (ashift (const_int 1)
808 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
810 "v_writelane_b32 %0, %1, %2"
811 [(set_attr "type" "vop3a")
812 (set_attr "length" "8")
813 (set_attr "exec" "none")
814 (set_attr "laneselect" "yes")])
816 ; FIXME: 64bit operations really should be splitters, but I am not sure how
817 ; to represent vertical subregs.
818 (define_insn "*vec_set<mode>"
819 [(set (match_operand:V_2REG 0 "register_operand" "= v")
821 (vec_duplicate:V_2REG
822 (match_operand:<SCALAR_MODE> 1 "register_operand" " Sv"))
823 (match_operand:V_2REG 3 "gcn_register_or_unspec_operand" " U0")
824 (ashift (const_int 1)
825 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
827 "v_writelane_b32 %L0, %L1, %2\;v_writelane_b32 %H0, %H1, %2"
828 [(set_attr "type" "vmult")
829 (set_attr "length" "16")
830 (set_attr "exec" "none")
831 (set_attr "laneselect" "yes")])
833 (define_expand "vec_set<mode>"
834 [(set (match_operand:V_MOV 0 "register_operand")
837 (match_operand:<SCALAR_MODE> 1 "register_operand"))
839 (ashift (const_int 1) (match_operand:SI 2 "gcn_alu_operand"))))]
842 (define_insn "*vec_set<mode>_1"
843 [(set (match_operand:V_1REG 0 "register_operand" "=v")
845 (vec_duplicate:V_1REG
846 (match_operand:<SCALAR_MODE> 1 "register_operand" "Sv"))
847 (match_operand:V_1REG 3 "gcn_register_or_unspec_operand" "U0")
848 (match_operand:SI 2 "const_int_operand" " i")))]
849 "((unsigned) exact_log2 (INTVAL (operands[2])) < GET_MODE_NUNITS (<MODE>mode))"
851 operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));
852 return "v_writelane_b32 %0, %1, %2";
854 [(set_attr "type" "vop3a")
855 (set_attr "length" "8")
856 (set_attr "exec" "none")
857 (set_attr "laneselect" "yes")])
859 (define_insn "*vec_set<mode>_1"
860 [(set (match_operand:V_2REG 0 "register_operand" "=v")
862 (vec_duplicate:V_2REG
863 (match_operand:<SCALAR_MODE> 1 "register_operand" "Sv"))
864 (match_operand:V_2REG 3 "gcn_register_or_unspec_operand" "U0")
865 (match_operand:SI 2 "const_int_operand" " i")))]
866 "((unsigned) exact_log2 (INTVAL (operands[2])) < GET_MODE_NUNITS (<MODE>mode))"
868 operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));
869 return "v_writelane_b32 %L0, %L1, %2\;v_writelane_b32 %H0, %H1, %2";
871 [(set_attr "type" "vmult")
872 (set_attr "length" "16")
873 (set_attr "exec" "none")
874 (set_attr "laneselect" "yes")])
876 (define_insn "vec_duplicate<mode><exec>"
877 [(set (match_operand:V_1REG 0 "register_operand" "=v")
878 (vec_duplicate:V_1REG
879 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand" "SvB")))]
882 [(set_attr "type" "vop3a")
883 (set_attr "length" "8")])
885 (define_insn "vec_duplicate<mode><exec>"
886 [(set (match_operand:V_2REG 0 "register_operand" "= v")
887 (vec_duplicate:V_2REG
888 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand" "SvDB")))]
890 "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1"
891 [(set_attr "type" "vop3a")
892 (set_attr "length" "16")])
894 (define_insn "vec_duplicate<mode><exec>"
895 [(set (match_operand:V_4REG 0 "register_operand" "= v")
896 (vec_duplicate:V_4REG
897 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand" "SvDB")))]
899 "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\;v_mov_b32\t%J0, %J1\;v_mov_b32\t%K0, %K1"
900 [(set_attr "type" "mult")
901 (set_attr "length" "32")])
903 (define_insn "vec_extract<mode><scalar_mode>"
904 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg")
905 (vec_select:<SCALAR_MODE>
906 (match_operand:V_1REG 1 "register_operand" " v")
907 (parallel [(match_operand:SI 2 "gcn_alu_operand" "SvB")])))]
909 "v_readlane_b32 %0, %1, %2"
910 [(set_attr "type" "vop3a")
911 (set_attr "length" "8")
912 (set_attr "exec" "none")
913 (set_attr "laneselect" "yes")])
915 (define_insn "vec_extract<mode><scalar_mode>"
916 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=&Sg")
917 (vec_select:<SCALAR_MODE>
918 (match_operand:V_2REG 1 "register_operand" " v")
919 (parallel [(match_operand:SI 2 "gcn_alu_operand" " SvB")])))]
921 "v_readlane_b32 %L0, %L1, %2\;v_readlane_b32 %H0, %H1, %2"
922 [(set_attr "type" "vmult")
923 (set_attr "length" "16")
924 (set_attr "exec" "none")
925 (set_attr "laneselect" "yes")])
927 (define_insn "vec_extract<mode><scalar_mode>"
928 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=&Sg")
929 (vec_select:<SCALAR_MODE>
930 (match_operand:V_4REG 1 "register_operand" " v")
931 (parallel [(match_operand:SI 2 "gcn_alu_operand" " SvB")])))]
933 "v_readlane_b32 %L0, %L1, %2\;v_readlane_b32 %H0, %H1, %2\;v_readlane_b32 %J0, %J1, %2\;v_readlane_b32 %K0, %K1, %2"
934 [(set_attr "type" "vmult")
935 (set_attr "length" "32")
936 (set_attr "exec" "none")
937 (set_attr "laneselect" "yes")])
939 (define_insn "vec_extract<V_1REG:mode><V_1REG_ALT:mode>_nop"
940 [(set (match_operand:V_1REG_ALT 0 "register_operand" "=v,v")
941 (vec_select:V_1REG_ALT
942 (match_operand:V_1REG 1 "register_operand" " 0,v")
943 (match_operand 2 "ascending_zero_int_parallel" "")))]
944 "MODE_VF (<V_1REG_ALT:MODE>mode) < MODE_VF (<V_1REG:MODE>mode)
945 && <V_1REG_ALT:SCALAR_MODE>mode == <V_1REG:SCALAR_MODE>mode
946 /* This comment silences a warning for operands[2]. */"
948 ; in-place extract %0
950 [(set_attr "type" "vmult")
951 (set_attr "length" "0,8")])
953 (define_insn "vec_extract<V_2REG:mode><V_2REG_ALT:mode>_nop"
954 [(set (match_operand:V_2REG_ALT 0 "register_operand" "=v,v")
955 (vec_select:V_2REG_ALT
956 (match_operand:V_2REG 1 "register_operand" " 0,v")
957 (match_operand 2 "ascending_zero_int_parallel" "")))]
958 "MODE_VF (<V_2REG_ALT:MODE>mode) < MODE_VF (<V_2REG:MODE>mode)
959 && <V_2REG_ALT:SCALAR_MODE>mode == <V_2REG:SCALAR_MODE>mode
960 /* This comment silences a warning for operands[2]. */"
962 ; in-place extract %0
963 v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1"
964 [(set_attr "type" "vmult")
965 (set_attr "length" "0,8")])
967 (define_insn "vec_extract<V_4REG:mode><V_4REG_ALT:mode>_nop"
968 [(set (match_operand:V_4REG_ALT 0 "register_operand" "=v,v")
969 (vec_select:V_4REG_ALT
970 (match_operand:V_4REG 1 "register_operand" " 0,v")
971 (match_operand 2 "ascending_zero_int_parallel" "")))]
972 "MODE_VF (<V_4REG_ALT:MODE>mode) < MODE_VF (<V_4REG:MODE>mode)
973 && <V_4REG_ALT:SCALAR_MODE>mode == <V_4REG:SCALAR_MODE>mode"
975 ; in-place extract %0
976 v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\;v_mov_b32\t%J0, %J1\;v_mov_b32\t%K0, %K1"
977 [(set_attr "type" "vmult")
978 (set_attr "length" "0,16")])
980 (define_expand "vec_extract<V_MOV:mode><V_MOV_ALT:mode>"
981 [(match_operand:V_MOV_ALT 0 "register_operand")
982 (match_operand:V_MOV 1 "register_operand")
983 (match_operand 2 "immediate_operand")]
984 "MODE_VF (<V_MOV_ALT:MODE>mode) < MODE_VF (<V_MOV:MODE>mode)
985 && <V_MOV_ALT:SCALAR_MODE>mode == <V_MOV:SCALAR_MODE>mode"
987 int numlanes = GET_MODE_NUNITS (<V_MOV_ALT:MODE>mode);
988 int firstlane = INTVAL (operands[2]) * numlanes;
993 rtx parallel = gen_rtx_PARALLEL (<V_MOV:MODE>mode,
994 rtvec_alloc (numlanes));
995 for (int i = 0; i < numlanes; i++)
996 XVECEXP (parallel, 0, i) = GEN_INT (i);
997 emit_insn (gen_vec_extract<V_MOV:mode><V_MOV_ALT:mode>_nop
998 (operands[0], operands[1], parallel));
1000 /* FIXME: optimize this by using DPP where available. */
1002 rtx permutation = gen_reg_rtx (<V_MOV:VnSI>mode);
1003 emit_insn (gen_vec_series<V_MOV:vnsi> (permutation,
1004 GEN_INT (firstlane*4),
1007 tmp = gen_reg_rtx (<V_MOV:MODE>mode);
1008 emit_insn (gen_ds_bpermute<V_MOV:mode> (tmp, permutation, operands[1],
1009 get_exec (<V_MOV:MODE>mode)));
1011 emit_move_insn (operands[0],
1012 gen_rtx_SUBREG (<V_MOV_ALT:MODE>mode, tmp, 0));
1017 (define_expand "extract_last_<mode>"
1018 [(match_operand:<SCALAR_MODE> 0 "register_operand")
1019 (match_operand:DI 1 "gcn_alu_operand")
1020 (match_operand:V_MOV 2 "register_operand")]
1021 "can_create_pseudo_p ()"
1023 rtx dst = operands[0];
1024 rtx mask = operands[1];
1025 rtx vect = operands[2];
1026 rtx tmpreg = gen_reg_rtx (SImode);
1028 emit_insn (gen_clzdi2 (tmpreg, mask));
1029 emit_insn (gen_subsi3 (tmpreg, GEN_INT (63), tmpreg));
1030 emit_insn (gen_vec_extract<mode><scalar_mode> (dst, vect, tmpreg));
1034 (define_expand "fold_extract_last_<mode>"
1035 [(match_operand:<SCALAR_MODE> 0 "register_operand")
1036 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand")
1037 (match_operand:DI 2 "gcn_alu_operand")
1038 (match_operand:V_MOV 3 "register_operand")]
1039 "can_create_pseudo_p ()"
1041 rtx dst = operands[0];
1042 rtx default_value = operands[1];
1043 rtx mask = operands[2];
1044 rtx vect = operands[3];
1045 rtx else_label = gen_label_rtx ();
1046 rtx end_label = gen_label_rtx ();
1048 rtx cond = gen_rtx_EQ (VOIDmode, mask, const0_rtx);
1049 emit_jump_insn (gen_cbranchdi4 (cond, mask, const0_rtx, else_label));
1050 emit_insn (gen_extract_last_<mode> (dst, mask, vect));
1051 emit_jump_insn (gen_jump (end_label));
1053 emit_label (else_label);
1054 emit_move_insn (dst, default_value);
1055 emit_label (end_label);
1059 (define_expand "vec_init<mode><scalar_mode>"
1060 [(match_operand:V_MOV 0 "register_operand")
1064 gcn_expand_vector_init (operands[0], operands[1]);
1068 (define_expand "vec_init<V_MOV:mode><V_MOV_ALT:mode>"
1069 [(match_operand:V_MOV 0 "register_operand")
1070 (match_operand:V_MOV_ALT 1)]
1071 "<V_MOV:SCALAR_MODE>mode == <V_MOV_ALT:SCALAR_MODE>mode
1072 && MODE_VF (<V_MOV_ALT:MODE>mode) < MODE_VF (<V_MOV:MODE>mode)"
1074 gcn_expand_vector_init (operands[0], operands[1]);
1079 ;; {{{ Scatter / Gather
1081 ;; GCN does not have an instruction for loading a vector from contiguous
1082 ;; memory so *all* loads and stores are eventually converted to scatter
1085 ;; GCC does not permit MEM to hold vectors of addresses, so we must use an
1086 ;; unspec. The unspec formats are as follows:
1089 ;; [(<address expression>)
1092 ;; (mem:BLK (scratch))]
1096 ;; [(<address expression>)
1097 ;; (<source register>)
1103 ;; - Loads are expected to be wrapped in a vec_merge, so do not need <exec>.
1104 ;; - The mem:BLK does not contain any real information, but indicates that an
1105 ;; unknown memory read is taking place. Stores are expected to use a similar
1106 ;; mem:BLK outside the unspec.
1107 ;; - The address space and glc (volatile) fields are there to replace the
1108 ;; fields normally found in a MEM.
1109 ;; - Multiple forms of address expression are supported, below.
1111 ;; TODO: implement combined gather and zero_extend, but only for -msram-ecc=on
1113 (define_expand "gather_load<mode><vnsi>"
1114 [(match_operand:V_MOV 0 "register_operand")
1115 (match_operand:DI 1 "register_operand")
1116 (match_operand:<VnSI> 2 "register_operand")
1117 (match_operand 3 "immediate_operand")
1118 (match_operand:SI 4 "gcn_alu_operand")]
1121 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1],
1122 operands[2], operands[4],
1123 INTVAL (operands[3]), NULL);
1125 if (GET_MODE (addr) == <VnDI>mode)
1126 emit_insn (gen_gather<mode>_insn_1offset (operands[0], addr, const0_rtx,
1127 const0_rtx, const0_rtx));
1129 emit_insn (gen_gather<mode>_insn_2offsets (operands[0], operands[1],
1130 addr, const0_rtx, const0_rtx,
1135 ; Allow any address expression
1136 (define_expand "gather<mode>_expr<exec>"
1137 [(set (match_operand:V_MOV 0 "register_operand")
1139 [(match_operand 1 "")
1140 (match_operand 2 "immediate_operand")
1141 (match_operand 3 "immediate_operand")
1142 (mem:BLK (scratch))]
1147 (define_insn "gather<mode>_insn_1offset<exec>"
1148 [(set (match_operand:V_MOV 0 "register_operand" "=v,a,&v,&a")
1150 [(plus:<VnDI> (match_operand:<VnDI> 1 "register_operand" " v,v, v, v")
1151 (vec_duplicate:<VnDI>
1152 (match_operand 2 "immediate_operand" " n,n, n, n")))
1153 (match_operand 3 "immediate_operand" " n,n, n, n")
1154 (match_operand 4 "immediate_operand" " n,n, n, n")
1155 (mem:BLK (scratch))]
1157 "(AS_FLAT_P (INTVAL (operands[3]))
1158 && ((TARGET_GCN3 && INTVAL(operands[2]) == 0)
1159 || ((unsigned HOST_WIDE_INT)INTVAL(operands[2]) < 0x1000)))
1160 || (AS_GLOBAL_P (INTVAL (operands[3]))
1161 && (((unsigned HOST_WIDE_INT)INTVAL(operands[2]) + 0x1000) < 0x2000))"
1163 addr_space_t as = INTVAL (operands[3]);
1164 const char *glc = INTVAL (operands[4]) ? " glc" : "";
1166 static char buf[200];
1169 if (TARGET_GCN5_PLUS)
1170 sprintf (buf, "flat_load%%o0\t%%0, %%1 offset:%%2%s\;s_waitcnt\t0",
1173 sprintf (buf, "flat_load%%o0\t%%0, %%1%s\;s_waitcnt\t0", glc);
1175 else if (AS_GLOBAL_P (as))
1176 sprintf (buf, "global_load%%o0\t%%0, %%1, off offset:%%2%s\;"
1177 "s_waitcnt\tvmcnt(0)", glc);
1183 [(set_attr "type" "flat")
1184 (set_attr "length" "12")
1185 (set_attr "gcn_version" "*,cdna2,*,cdna2")
1186 (set_attr "xnack" "off,off,on,on")])
1188 (define_insn "gather<mode>_insn_1offset_ds<exec>"
1189 [(set (match_operand:V_MOV 0 "register_operand" "=v,a")
1191 [(plus:<VnSI> (match_operand:<VnSI> 1 "register_operand" " v,v")
1192 (vec_duplicate:<VnSI>
1193 (match_operand 2 "immediate_operand" " n,n")))
1194 (match_operand 3 "immediate_operand" " n,n")
1195 (match_operand 4 "immediate_operand" " n,n")
1196 (mem:BLK (scratch))]
1198 "(AS_ANY_DS_P (INTVAL (operands[3]))
1199 && ((unsigned HOST_WIDE_INT)INTVAL(operands[2]) < 0x10000))"
1201 addr_space_t as = INTVAL (operands[3]);
1202 static char buf[200];
1203 sprintf (buf, "ds_read%%b0\t%%0, %%1 offset:%%2%s\;s_waitcnt\tlgkmcnt(0)",
1204 (AS_GDS_P (as) ? " gds" : ""));
1207 [(set_attr "type" "ds")
1208 (set_attr "length" "12")
1209 (set_attr "gcn_version" "*,cdna2")])
1211 (define_insn "gather<mode>_insn_2offsets<exec>"
1212 [(set (match_operand:V_MOV 0 "register_operand" "=v,a,&v,&a")
1216 (vec_duplicate:<VnDI>
1217 (match_operand:DI 1 "register_operand" "Sv,Sv,Sv,Sv"))
1219 (match_operand:<VnSI> 2 "register_operand" " v, v, v, v")))
1220 (vec_duplicate:<VnDI> (match_operand 3 "immediate_operand"
1222 (match_operand 4 "immediate_operand" " n, n, n, n")
1223 (match_operand 5 "immediate_operand" " n, n, n, n")
1224 (mem:BLK (scratch))]
1226 "(AS_GLOBAL_P (INTVAL (operands[4]))
1227 && (((unsigned HOST_WIDE_INT)INTVAL(operands[3]) + 0x1000) < 0x2000))"
1229 addr_space_t as = INTVAL (operands[4]);
1230 const char *glc = INTVAL (operands[5]) ? " glc" : "";
1232 static char buf[200];
1233 if (AS_GLOBAL_P (as))
1234 sprintf (buf, "global_load%%o0\t%%0, %%2, %%1 offset:%%3%s\;"
1235 "s_waitcnt\tvmcnt(0)", glc);
1241 [(set_attr "type" "flat")
1242 (set_attr "length" "12")
1243 (set_attr "gcn_version" "*,cdna2,*,cdna2")
1244 (set_attr "xnack" "off,off,on,on")])
1246 (define_expand "scatter_store<mode><vnsi>"
1247 [(match_operand:DI 0 "register_operand")
1248 (match_operand:<VnSI> 1 "register_operand")
1249 (match_operand 2 "immediate_operand")
1250 (match_operand:SI 3 "gcn_alu_operand")
1251 (match_operand:V_MOV 4 "register_operand")]
1254 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0],
1255 operands[1], operands[3],
1256 INTVAL (operands[2]), NULL);
1258 if (GET_MODE (addr) == <VnDI>mode)
1259 emit_insn (gen_scatter<mode>_insn_1offset (addr, const0_rtx, operands[4],
1260 const0_rtx, const0_rtx));
1262 emit_insn (gen_scatter<mode>_insn_2offsets (operands[0], addr,
1263 const0_rtx, operands[4],
1264 const0_rtx, const0_rtx));
1268 ; Allow any address expression
1269 (define_expand "scatter<mode>_expr<exec_scatter>"
1270 [(set (mem:BLK (scratch))
1272 [(match_operand:<VnDI> 0 "")
1273 (match_operand:V_MOV 1 "register_operand")
1274 (match_operand 2 "immediate_operand")
1275 (match_operand 3 "immediate_operand")]
1280 (define_insn "scatter<mode>_insn_1offset<exec_scatter>"
1281 [(set (mem:BLK (scratch))
1283 [(plus:<VnDI> (match_operand:<VnDI> 0 "register_operand" "v,v")
1284 (vec_duplicate:<VnDI>
1285 (match_operand 1 "immediate_operand" "n,n")))
1286 (match_operand:V_MOV 2 "register_operand" "v,a")
1287 (match_operand 3 "immediate_operand" "n,n")
1288 (match_operand 4 "immediate_operand" "n,n")]
1290 "(AS_FLAT_P (INTVAL (operands[3]))
1291 && (INTVAL(operands[1]) == 0
1292 || (TARGET_GCN5_PLUS
1293 && (unsigned HOST_WIDE_INT)INTVAL(operands[1]) < 0x1000)))
1294 || (AS_GLOBAL_P (INTVAL (operands[3]))
1295 && (((unsigned HOST_WIDE_INT)INTVAL(operands[1]) + 0x1000) < 0x2000))"
1297 addr_space_t as = INTVAL (operands[3]);
1298 const char *glc = INTVAL (operands[4]) ? " glc" : "";
1300 static char buf[200];
1303 if (TARGET_GCN5_PLUS)
1304 sprintf (buf, "flat_store%%s2\t%%0, %%2 offset:%%1%s", glc);
1306 sprintf (buf, "flat_store%%s2\t%%0, %%2%s", glc);
1308 else if (AS_GLOBAL_P (as))
1309 sprintf (buf, "global_store%%s2\t%%0, %%2, off offset:%%1%s", glc);
1315 [(set_attr "type" "flat")
1316 (set_attr "length" "12")
1317 (set_attr "gcn_version" "*,cdna2")])
1319 (define_insn "scatter<mode>_insn_1offset_ds<exec_scatter>"
1320 [(set (mem:BLK (scratch))
1322 [(plus:<VnSI> (match_operand:<VnSI> 0 "register_operand" "v,v")
1323 (vec_duplicate:<VnSI>
1324 (match_operand 1 "immediate_operand" "n,n")))
1325 (match_operand:V_MOV 2 "register_operand" "v,a")
1326 (match_operand 3 "immediate_operand" "n,n")
1327 (match_operand 4 "immediate_operand" "n,n")]
1329 "(AS_ANY_DS_P (INTVAL (operands[3]))
1330 && ((unsigned HOST_WIDE_INT)INTVAL(operands[1]) < 0x10000))"
1332 addr_space_t as = INTVAL (operands[3]);
1333 static char buf[200];
1334 sprintf (buf, "ds_write%%b2\t%%0, %%2 offset:%%1%s\;s_waitcnt\tlgkmcnt(0)",
1335 (AS_GDS_P (as) ? " gds" : ""));
1338 [(set_attr "type" "ds")
1339 (set_attr "length" "12")
1340 (set_attr "gcn_version" "*,cdna2")])
1342 (define_insn "scatter<mode>_insn_2offsets<exec_scatter>"
1343 [(set (mem:BLK (scratch))
1347 (vec_duplicate:<VnDI>
1348 (match_operand:DI 0 "register_operand" "Sv,Sv"))
1350 (match_operand:<VnSI> 1 "register_operand" "v,v")))
1351 (vec_duplicate:<VnDI> (match_operand 2 "immediate_operand" "n,n")))
1352 (match_operand:V_MOV 3 "register_operand" "v,a")
1353 (match_operand 4 "immediate_operand" "n,n")
1354 (match_operand 5 "immediate_operand" "n,n")]
1356 "(AS_GLOBAL_P (INTVAL (operands[4]))
1357 && (((unsigned HOST_WIDE_INT)INTVAL(operands[2]) + 0x1000) < 0x2000))"
1359 addr_space_t as = INTVAL (operands[4]);
1360 const char *glc = INTVAL (operands[5]) ? " glc" : "";
1362 static char buf[200];
1363 if (AS_GLOBAL_P (as))
1364 sprintf (buf, "global_store%%s3\t%%1, %%3, %%0 offset:%%2%s", glc);
1370 [(set_attr "type" "flat")
1371 (set_attr "length" "12")
1372 (set_attr "gcn_version" "*,cdna2")])
1377 (define_insn "ds_bpermute<mode>"
1378 [(set (match_operand:V_1REG 0 "register_operand" "=v")
1380 [(match_operand:V_1REG 2 "register_operand" " v")
1381 (match_operand:<VnSI> 1 "register_operand" " v")
1382 (match_operand:DI 3 "gcn_exec_reg_operand" " e")]
1385 "ds_bpermute_b32\t%0, %1, %2\;s_waitcnt\tlgkmcnt(0)"
1386 [(set_attr "type" "vop2")
1387 (set_attr "length" "12")])
1389 (define_insn_and_split "ds_bpermute<mode>"
1390 [(set (match_operand:V_2REG 0 "register_operand" "=&v")
1392 [(match_operand:V_2REG 2 "register_operand" " v0")
1393 (match_operand:<VnSI> 1 "register_operand" " v")
1394 (match_operand:DI 3 "gcn_exec_reg_operand" " e")]
1399 [(set (match_dup 4) (unspec:<VnSI>
1400 [(match_dup 6) (match_dup 1) (match_dup 3)]
1402 (set (match_dup 5) (unspec:<VnSI>
1403 [(match_dup 7) (match_dup 1) (match_dup 3)]
1406 operands[4] = gcn_operand_part (<MODE>mode, operands[0], 0);
1407 operands[5] = gcn_operand_part (<MODE>mode, operands[0], 1);
1408 operands[6] = gcn_operand_part (<MODE>mode, operands[2], 0);
1409 operands[7] = gcn_operand_part (<MODE>mode, operands[2], 1);
1411 [(set_attr "type" "vmult")
1412 (set_attr "length" "24")])
1414 (define_insn "@dpp_move<mode>"
1415 [(set (match_operand:V_noHI 0 "register_operand" "=v")
1417 [(match_operand:V_noHI 1 "register_operand" " v")
1418 (match_operand:SI 2 "const_int_operand" " n")]
1419 UNSPEC_MOV_DPP_SHR))]
1422 return gcn_expand_dpp_shr_insn (<MODE>mode, "v_mov_b32",
1423 UNSPEC_MOV_DPP_SHR, INTVAL (operands[2]));
1425 [(set_attr "type" "vop_dpp")
1426 (set_attr "length" "16")])
1428 (define_insn "@dpp_swap_pairs<mode>"
1429 [(set (match_operand:V_noHI 0 "register_operand" "=v")
1431 [(match_operand:V_noHI 1 "register_operand" " v")]
1432 UNSPEC_MOV_DPP_SWAP_PAIRS))]
1435 return gcn_expand_dpp_swap_pairs_insn (<MODE>mode, "v_mov_b32",
1436 UNSPEC_MOV_DPP_SWAP_PAIRS);
1438 [(set_attr "type" "vop_dpp")
1439 (set_attr "length" "16")])
1441 (define_insn "@dpp_distribute_even<mode>"
1442 [(set (match_operand:V_noHI 0 "register_operand" "=v")
1444 [(match_operand:V_noHI 1 "register_operand" " v")]
1445 UNSPEC_MOV_DPP_DISTRIBUTE_EVEN))]
1448 return gcn_expand_dpp_distribute_even_insn (<MODE>mode, "v_mov_b32",
1449 UNSPEC_MOV_DPP_DISTRIBUTE_EVEN);
1451 [(set_attr "type" "vop_dpp")
1452 (set_attr "length" "16")])
1454 (define_insn "@dpp_distribute_odd<mode>"
1455 [(set (match_operand:V_noHI 0 "register_operand" "=v")
1457 [(match_operand:V_noHI 1 "register_operand" " v")]
1458 UNSPEC_MOV_DPP_DISTRIBUTE_EVEN))]
1461 return gcn_expand_dpp_distribute_odd_insn (<MODE>mode, "v_mov_b32",
1462 UNSPEC_MOV_DPP_DISTRIBUTE_ODD);
1464 [(set_attr "type" "vop_dpp")
1465 (set_attr "length" "16")])
1468 ;; {{{ ALU special case: add/sub
1470 (define_insn "add<mode>3<exec_clobber>"
1471 [(set (match_operand:V_INT_1REG 0 "register_operand")
1473 (match_operand:V_INT_1REG 1 "register_operand")
1474 (match_operand:V_INT_1REG 2 "gcn_alu_operand")))
1475 (clobber (reg:DI VCC_REG))]
1477 {@ [cons: =0, %1, 2; attrs: type, length]
1478 [v,v,vSvA;vop2,4] v_add%^_u32\t%0, vcc, %2, %1
1482 (define_insn "add<mode>3_dup<exec_clobber>"
1483 [(set (match_operand:V_INT_1REG 0 "register_operand")
1485 (vec_duplicate:V_INT_1REG
1486 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand"))
1487 (match_operand:V_INT_1REG 1 "register_operand")))
1488 (clobber (reg:DI VCC_REG))]
1490 {@ [cons: =0, 1, 2; attrs: type, length]
1491 [v,v,SvA;vop2,4] v_add%^_u32\t%0, vcc, %2, %1
1495 (define_insn "add<mode>3_vcc<exec_vcc>"
1496 [(set (match_operand:V_SI 0 "register_operand")
1498 (match_operand:V_SI 1 "register_operand")
1499 (match_operand:V_SI 2 "gcn_alu_operand")))
1500 (set (match_operand:DI 3 "register_operand")
1501 (ltu:DI (plus:V_SI (match_dup 1) (match_dup 2))
1504 {@ [cons: =0, %1, 2, =3; attrs: type, length]
1505 [v,v,vSvA,cV;vop2 ,4] v_add%^_u32\t%0, %3, %2, %1
1506 [v,v,vSvB,cV;vop2 ,8] ^
1507 [v,v,vSvA,Sg;vop3b,8] ^
1510 ; This pattern only changes the VCC bits when the corresponding lane is
1511 ; enabled, so the set must be described as an ior.
1513 (define_insn "add<mode>3_vcc_dup<exec_vcc>"
1514 [(set (match_operand:V_SI 0 "register_operand")
1517 (match_operand:SI 1 "gcn_alu_operand"))
1518 (match_operand:V_SI 2 "register_operand")))
1519 (set (match_operand:DI 3 "register_operand")
1520 (ltu:DI (plus:V_SI (vec_duplicate:V_SI (match_dup 2))
1522 (vec_duplicate:V_SI (match_dup 2))))]
1524 {@ [cons: =0, 1, 2, =3; attrs: type, length]
1525 [v,SvA,v,cV;vop2 ,4] v_add%^_u32\t%0, %3, %1, %2
1526 [v,SvB,v,cV;vop2 ,8] ^
1527 [v,SvA,v,Sg;vop3b,8] ^
1530 ; v_addc does not accept an SGPR because the VCC read already counts as an
1531 ; SGPR use and the number of SGPR operands is limited to 1. It does not
1532 ; accept "B" immediate constants due to a related bus conflict.
1534 (define_insn "addc<mode>3<exec_vcc>"
1535 [(set (match_operand:V_SI 0 "register_operand" "=v, v")
1539 (vec_duplicate:V_SI (const_int 1))
1540 (vec_duplicate:V_SI (const_int 0))
1541 (match_operand:DI 3 "register_operand" " cV,cVSv"))
1542 (match_operand:V_SI 1 "gcn_alu_operand" "% v, vA"))
1543 (match_operand:V_SI 2 "gcn_alu_operand" " vA, vA")))
1544 (set (match_operand:DI 4 "register_operand" "=cV,cVSg")
1545 (ior:DI (ltu:DI (plus:V_SI
1548 (vec_duplicate:V_SI (const_int 1))
1549 (vec_duplicate:V_SI (const_int 0))
1556 (vec_duplicate:V_SI (const_int 1))
1557 (vec_duplicate:V_SI (const_int 0))
1562 "{v_addc%^_u32|v_add_co_ci_u32}\t%0, %4, %2, %1, %3"
1563 [(set_attr "type" "vop2,vop3b")
1564 (set_attr "length" "4,8")])
1566 (define_insn "sub<mode>3<exec_clobber>"
1567 [(set (match_operand:V_INT_1REG 0 "register_operand" "= v, v")
1569 (match_operand:V_INT_1REG 1 "gcn_alu_operand" "vSvB, v")
1570 (match_operand:V_INT_1REG 2 "gcn_alu_operand" " v,vSvB")))
1571 (clobber (reg:DI VCC_REG))]
1574 v_sub%^_u32\t%0, vcc, %1, %2
1575 v_subrev%^_u32\t%0, vcc, %2, %1"
1576 [(set_attr "type" "vop2")
1577 (set_attr "length" "8,8")])
1579 (define_insn "sub<mode>3_vcc<exec_vcc>"
1580 [(set (match_operand:V_SI 0 "register_operand" "= v, v, v, v")
1582 (match_operand:V_SI 1 "gcn_alu_operand" "vSvB,vSvB, v, v")
1583 (match_operand:V_SI 2 "gcn_alu_operand" " v, v,vSvB,vSvB")))
1584 (set (match_operand:DI 3 "register_operand" "= cV, Sg, cV, Sg")
1585 (gtu:DI (minus:V_SI (match_dup 1) (match_dup 2))
1589 v_sub%^_u32\t%0, %3, %1, %2
1590 v_sub%^_u32\t%0, %3, %1, %2
1591 v_subrev%^_u32\t%0, %3, %2, %1
1592 v_subrev%^_u32\t%0, %3, %2, %1"
1593 [(set_attr "type" "vop2,vop3b,vop2,vop3b")
1594 (set_attr "length" "8")])
1596 ; v_subb does not accept an SGPR because the VCC read already counts as an
1597 ; SGPR use and the number of SGPR operands is limited to 1. It does not
1598 ; accept "B" immediate constants due to a related bus conflict.
1600 (define_insn "subc<mode>3<exec_vcc>"
1601 [(set (match_operand:V_SI 0 "register_operand" "= v, v, v, v")
1605 (vec_duplicate:V_SI (const_int 1))
1606 (vec_duplicate:V_SI (const_int 0))
1607 (match_operand:DI 3 "gcn_alu_operand" " cV,cVSv,cV,cVSv"))
1608 (match_operand:V_SI 1 "gcn_alu_operand" " vA, vA, v, vA"))
1609 (match_operand:V_SI 2 "gcn_alu_operand" " v, vA,vA, vA")))
1610 (set (match_operand:DI 4 "register_operand" "=cV,cVSg,cV,cVSg")
1611 (ior:DI (gtu:DI (minus:V_SI (minus:V_SI
1613 (vec_duplicate:V_SI (const_int 1))
1614 (vec_duplicate:V_SI (const_int 0))
1619 (ltu:DI (minus:V_SI (vec_merge:V_SI
1620 (vec_duplicate:V_SI (const_int 1))
1621 (vec_duplicate:V_SI (const_int 0))
1627 {v_subb%^_u32|v_sub_co_ci_u32}\t%0, %4, %1, %2, %3
1628 {v_subb%^_u32|v_sub_co_ci_u32}\t%0, %4, %1, %2, %3
1629 {v_subbrev%^_u32|v_subrev_co_ci_u32}\t%0, %4, %2, %1, %3
1630 {v_subbrev%^_u32|v_subrev_co_ci_u32}\t%0, %4, %2, %1, %3"
1631 [(set_attr "type" "vop2,vop3b,vop2,vop3b")
1632 (set_attr "length" "4,8,4,8")])
1634 (define_insn_and_split "add<mode>3"
1635 [(set (match_operand:V_DI 0 "register_operand" "= v")
1637 (match_operand:V_DI 1 "register_operand" "%vDb")
1638 (match_operand:V_DI 2 "gcn_alu_operand" " vDb")))
1639 (clobber (reg:DI VCC_REG))]
1642 "gcn_can_split_p (<MODE>mode, operands[0])
1643 && gcn_can_split_p (<MODE>mode, operands[1])
1644 && gcn_can_split_p (<MODE>mode, operands[2])"
1647 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1648 emit_insn (gen_add<vnsi>3_vcc
1649 (gcn_operand_part (<MODE>mode, operands[0], 0),
1650 gcn_operand_part (<MODE>mode, operands[1], 0),
1651 gcn_operand_part (<MODE>mode, operands[2], 0),
1653 emit_insn (gen_addc<vnsi>3
1654 (gcn_operand_part (<MODE>mode, operands[0], 1),
1655 gcn_operand_part (<MODE>mode, operands[1], 1),
1656 gcn_operand_part (<MODE>mode, operands[2], 1),
1660 [(set_attr "type" "vmult")
1661 (set_attr "length" "8")])
1663 (define_insn_and_split "add<mode>3_exec"
1664 [(set (match_operand:V_DI 0 "register_operand" "= v")
1667 (match_operand:V_DI 1 "register_operand" "%vDb")
1668 (match_operand:V_DI 2 "gcn_alu_operand" " vDb"))
1669 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0")
1670 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1671 (clobber (reg:DI VCC_REG))]
1674 "gcn_can_split_p (<MODE>mode, operands[0])
1675 && gcn_can_split_p (<MODE>mode, operands[1])
1676 && gcn_can_split_p (<MODE>mode, operands[2])
1677 && gcn_can_split_p (<MODE>mode, operands[4])"
1680 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1681 emit_insn (gen_add<vnsi>3_vcc_exec
1682 (gcn_operand_part (<MODE>mode, operands[0], 0),
1683 gcn_operand_part (<MODE>mode, operands[1], 0),
1684 gcn_operand_part (<MODE>mode, operands[2], 0),
1686 gcn_operand_part (<MODE>mode, operands[3], 0),
1688 emit_insn (gen_addc<vnsi>3_exec
1689 (gcn_operand_part (<MODE>mode, operands[0], 1),
1690 gcn_operand_part (<MODE>mode, operands[1], 1),
1691 gcn_operand_part (<MODE>mode, operands[2], 1),
1693 gcn_operand_part (<MODE>mode, operands[3], 1),
1697 [(set_attr "type" "vmult")
1698 (set_attr "length" "8")])
1700 (define_insn_and_split "sub<mode>3"
1701 [(set (match_operand:V_DI 0 "register_operand" "= v, v")
1703 (match_operand:V_DI 1 "gcn_alu_operand" "vDb, v")
1704 (match_operand:V_DI 2 "gcn_alu_operand" " v,vDb")))
1705 (clobber (reg:DI VCC_REG))]
1708 "gcn_can_split_p (<MODE>mode, operands[0])
1709 && gcn_can_split_p (<MODE>mode, operands[1])
1710 && gcn_can_split_p (<MODE>mode, operands[2])"
1713 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1714 emit_insn (gen_sub<vnsi>3_vcc
1715 (gcn_operand_part (<MODE>mode, operands[0], 0),
1716 gcn_operand_part (<MODE>mode, operands[1], 0),
1717 gcn_operand_part (<MODE>mode, operands[2], 0),
1719 emit_insn (gen_subc<vnsi>3
1720 (gcn_operand_part (<MODE>mode, operands[0], 1),
1721 gcn_operand_part (<MODE>mode, operands[1], 1),
1722 gcn_operand_part (<MODE>mode, operands[2], 1),
1726 [(set_attr "type" "vmult")
1727 (set_attr "length" "8")])
1729 (define_insn_and_split "sub<mode>3_exec"
1730 [(set (match_operand:V_DI 0 "register_operand" "= v, v")
1733 (match_operand:V_DI 1 "gcn_alu_operand" "vSvB, v")
1734 (match_operand:V_DI 2 "gcn_alu_operand" " v,vSvB"))
1735 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0, U0")
1736 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))
1737 (clobber (reg:DI VCC_REG))]
1738 "register_operand (operands[1], VOIDmode)
1739 || register_operand (operands[2], VOIDmode)"
1741 "gcn_can_split_p (<MODE>mode, operands[0])
1742 && gcn_can_split_p (<MODE>mode, operands[1])
1743 && gcn_can_split_p (<MODE>mode, operands[2])
1744 && gcn_can_split_p (<MODE>mode, operands[3])"
1747 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1748 emit_insn (gen_sub<vnsi>3_vcc_exec
1749 (gcn_operand_part (<MODE>mode, operands[0], 0),
1750 gcn_operand_part (<MODE>mode, operands[1], 0),
1751 gcn_operand_part (<MODE>mode, operands[2], 0),
1753 gcn_operand_part (<MODE>mode, operands[3], 0),
1755 emit_insn (gen_subc<vnsi>3_exec
1756 (gcn_operand_part (<MODE>mode, operands[0], 1),
1757 gcn_operand_part (<MODE>mode, operands[1], 1),
1758 gcn_operand_part (<MODE>mode, operands[2], 1),
1760 gcn_operand_part (<MODE>mode, operands[3], 1),
1764 [(set_attr "type" "vmult")
1765 (set_attr "length" "8")])
1767 (define_insn_and_split "add<mode>3_zext"
1768 [(set (match_operand:V_DI 0 "register_operand" "= v, v")
1771 (match_operand:<VnSI> 1 "gcn_alu_operand" " vA, vB"))
1772 (match_operand:V_DI 2 "gcn_alu_operand" "vDb,vDA")))
1773 (clobber (reg:DI VCC_REG))]
1776 "gcn_can_split_p (<MODE>mode, operands[0])
1777 && gcn_can_split_p (<MODE>mode, operands[2])"
1780 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1781 emit_insn (gen_add<vnsi>3_vcc
1782 (gcn_operand_part (<MODE>mode, operands[0], 0),
1784 gcn_operand_part (<MODE>mode, operands[2], 0),
1786 emit_insn (gen_addc<vnsi>3
1787 (gcn_operand_part (<MODE>mode, operands[0], 1),
1788 gcn_operand_part (<MODE>mode, operands[2], 1),
1789 const0_rtx, vcc, vcc));
1792 [(set_attr "type" "vmult")
1793 (set_attr "length" "8")])
1795 (define_insn_and_split "add<mode>3_zext_exec"
1796 [(set (match_operand:V_DI 0 "register_operand" "= v, v")
1800 (match_operand:<VnSI> 1 "gcn_alu_operand" " vA, vB"))
1801 (match_operand:V_DI 2 "gcn_alu_operand" "vDb,vDA"))
1802 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0, U0")
1803 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))
1804 (clobber (reg:DI VCC_REG))]
1807 "gcn_can_split_p (<MODE>mode, operands[0])
1808 && gcn_can_split_p (<MODE>mode, operands[2])
1809 && gcn_can_split_p (<MODE>mode, operands[3])"
1812 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1813 emit_insn (gen_add<vnsi>3_vcc_exec
1814 (gcn_operand_part (<MODE>mode, operands[0], 0),
1816 gcn_operand_part (<MODE>mode, operands[2], 0),
1818 gcn_operand_part (<MODE>mode, operands[3], 0),
1820 emit_insn (gen_addc<vnsi>3_exec
1821 (gcn_operand_part (<MODE>mode, operands[0], 1),
1822 gcn_operand_part (<MODE>mode, operands[2], 1),
1823 const0_rtx, vcc, vcc,
1824 gcn_operand_part (<MODE>mode, operands[3], 1),
1828 [(set_attr "type" "vmult")
1829 (set_attr "length" "8")])
1831 (define_insn_and_split "add<mode>3_vcc_zext_dup"
1832 [(set (match_operand:V_DI 0 "register_operand")
1835 (vec_duplicate:<VnSI>
1836 (match_operand:SI 1 "gcn_alu_operand")))
1837 (match_operand:V_DI 2 "gcn_alu_operand")))
1838 (set (match_operand:DI 3 "register_operand")
1840 (zero_extend:V_DI (vec_duplicate:<VnSI> (match_dup 1)))
1844 {@ [cons: =0, 1, 2, =3]
1848 "gcn_can_split_p (<MODE>mode, operands[0])
1849 && gcn_can_split_p (<MODE>mode, operands[2])"
1852 emit_insn (gen_add<vnsi>3_vcc_dup
1853 (gcn_operand_part (<MODE>mode, operands[0], 0),
1854 gcn_operand_part (DImode, operands[1], 0),
1855 gcn_operand_part (<MODE>mode, operands[2], 0),
1857 emit_insn (gen_addc<vnsi>3
1858 (gcn_operand_part (<MODE>mode, operands[0], 1),
1859 gcn_operand_part (<MODE>mode, operands[2], 1),
1860 const0_rtx, operands[3], operands[3]));
1863 [(set_attr "type" "vmult")
1864 (set_attr "length" "8")])
1866 (define_expand "add<mode>3_zext_dup"
1867 [(match_operand:V_DI 0 "register_operand")
1868 (match_operand:SI 1 "gcn_alu_operand")
1869 (match_operand:V_DI 2 "gcn_alu_operand")]
1872 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1873 emit_insn (gen_add<mode>3_vcc_zext_dup (operands[0], operands[1],
1878 (define_insn_and_split "add<mode>3_vcc_zext_dup_exec"
1879 [(set (match_operand:V_DI 0 "register_operand")
1883 (vec_duplicate:<VnSI>
1884 (match_operand:SI 1 "gcn_alu_operand")))
1885 (match_operand:V_DI 2 "gcn_alu_operand"))
1886 (match_operand:V_DI 4 "gcn_register_or_unspec_operand")
1887 (match_operand:DI 5 "gcn_exec_reg_operand")))
1888 (set (match_operand:DI 3 "register_operand")
1891 (zero_extend:V_DI (vec_duplicate:<VnSI> (match_dup 1)))
1896 {@ [cons: =0, 1, 2, =3, 4, 5]
1897 [v,ASv,v,&Sg,U0,e] #
1898 [v,BSv,v,&cV,U0,e] ^
1900 "gcn_can_split_p (<MODE>mode, operands[0])
1901 && gcn_can_split_p (<MODE>mode, operands[2])
1902 && gcn_can_split_p (<MODE>mode, operands[4])"
1905 emit_insn (gen_add<vnsi>3_vcc_dup_exec
1906 (gcn_operand_part (<MODE>mode, operands[0], 0),
1907 gcn_operand_part (DImode, operands[1], 0),
1908 gcn_operand_part (<MODE>mode, operands[2], 0),
1910 gcn_operand_part (<MODE>mode, operands[4], 0),
1912 emit_insn (gen_addc<vnsi>3_exec
1913 (gcn_operand_part (<MODE>mode, operands[0], 1),
1914 gcn_operand_part (<MODE>mode, operands[2], 1),
1915 const0_rtx, operands[3], operands[3],
1916 gcn_operand_part (<MODE>mode, operands[4], 1),
1920 [(set_attr "type" "vmult")
1921 (set_attr "length" "8")])
1923 (define_expand "add<mode>3_zext_dup_exec"
1924 [(match_operand:V_DI 0 "register_operand")
1925 (match_operand:SI 1 "gcn_alu_operand")
1926 (match_operand:V_DI 2 "gcn_alu_operand")
1927 (match_operand:V_DI 3 "gcn_register_or_unspec_operand")
1928 (match_operand:DI 4 "gcn_exec_reg_operand")]
1931 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1932 emit_insn (gen_add<mode>3_vcc_zext_dup_exec (operands[0], operands[1],
1933 operands[2], vcc, operands[3],
1938 (define_insn_and_split "add<mode>3_vcc_zext_dup2"
1939 [(set (match_operand:V_DI 0 "register_operand")
1941 (zero_extend:V_DI (match_operand:<VnSI> 1 "gcn_alu_operand"))
1942 (vec_duplicate:V_DI (match_operand:DI 2 "gcn_alu_operand"))))
1943 (set (match_operand:DI 3 "register_operand")
1945 (zero_extend:V_DI (match_dup 1))
1946 (vec_duplicate:V_DI (match_dup 2)))
1949 {@ [cons: =0, 1, 2, =3]
1953 "gcn_can_split_p (<MODE>mode, operands[0])"
1956 emit_insn (gen_add<vnsi>3_vcc_dup
1957 (gcn_operand_part (<MODE>mode, operands[0], 0),
1958 gcn_operand_part (DImode, operands[2], 0),
1961 rtx dsthi = gcn_operand_part (<MODE>mode, operands[0], 1);
1962 emit_insn (gen_vec_duplicate<vnsi>
1963 (dsthi, gcn_operand_part (DImode, operands[2], 1)));
1964 emit_insn (gen_addc<vnsi>3 (dsthi, dsthi, const0_rtx, operands[3],
1968 [(set_attr "type" "vmult")
1969 (set_attr "length" "8")])
1971 (define_expand "add<mode>3_zext_dup2"
1972 [(match_operand:V_DI 0 "register_operand")
1973 (match_operand:<VnSI> 1 "gcn_alu_operand")
1974 (match_operand:DI 2 "gcn_alu_operand")]
1977 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1978 emit_insn (gen_add<mode>3_vcc_zext_dup2 (operands[0], operands[1],
1983 (define_insn_and_split "add<mode>3_vcc_zext_dup2_exec"
1984 [(set (match_operand:V_DI 0 "register_operand")
1987 (zero_extend:V_DI (match_operand:<VnSI> 1 "gcn_alu_operand"))
1988 (vec_duplicate:V_DI (match_operand:DI 2 "gcn_alu_operand")))
1989 (match_operand:V_DI 4 "gcn_register_or_unspec_operand")
1990 (match_operand:DI 5 "gcn_exec_reg_operand")))
1991 (set (match_operand:DI 3 "register_operand")
1994 (zero_extend:V_DI (match_dup 1))
1995 (vec_duplicate:V_DI (match_dup 2)))
1999 {@ [cons: =0, 1, 2, =3, 4, 5]
2000 [v,v,ASv,&Sg,U0,e] #
2001 [v,v,BSv,&cV,U0,e] ^
2003 "gcn_can_split_p (<MODE>mode, operands[0])
2004 && gcn_can_split_p (<MODE>mode, operands[4])"
2007 emit_insn (gen_add<vnsi>3_vcc_dup_exec
2008 (gcn_operand_part (<MODE>mode, operands[0], 0),
2009 gcn_operand_part (DImode, operands[2], 0),
2012 gcn_operand_part (<MODE>mode, operands[4], 0),
2014 rtx dsthi = gcn_operand_part (<MODE>mode, operands[0], 1);
2015 emit_insn (gen_vec_duplicate<vnsi>_exec
2016 (dsthi, gcn_operand_part (DImode, operands[2], 1),
2017 gcn_operand_part (<MODE>mode, operands[4], 1),
2019 emit_insn (gen_addc<vnsi>3_exec
2020 (dsthi, dsthi, const0_rtx, operands[3], operands[3],
2021 gcn_operand_part (<MODE>mode, operands[4], 1),
2025 [(set_attr "type" "vmult")
2026 (set_attr "length" "8")])
2028 (define_expand "add<mode>3_zext_dup2_exec"
2029 [(match_operand:V_DI 0 "register_operand")
2030 (match_operand:<VnSI> 1 "gcn_alu_operand")
2031 (match_operand:DI 2 "gcn_alu_operand")
2032 (match_operand:V_DI 3 "gcn_register_or_unspec_operand")
2033 (match_operand:DI 4 "gcn_exec_reg_operand")]
2036 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
2037 emit_insn (gen_add<mode>3_vcc_zext_dup2_exec (operands[0], operands[1],
2039 operands[3], operands[4]));
2043 (define_insn_and_split "add<mode>3_sext_dup2"
2044 [(set (match_operand:V_DI 0 "register_operand" "= v")
2046 (sign_extend:V_DI (match_operand:<VnSI> 1 "gcn_alu_operand" " vA"))
2047 (vec_duplicate:V_DI (match_operand:DI 2 "gcn_alu_operand" "BSv"))))
2048 (clobber (match_scratch:<VnSI> 3 "=&v"))
2049 (clobber (reg:DI VCC_REG))]
2052 "gcn_can_split_p (<MODE>mode, operands[0])"
2055 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
2056 emit_insn (gen_ashr<vnsi>3 (operands[3], operands[1], GEN_INT (31)));
2057 emit_insn (gen_add<vnsi>3_vcc_dup
2058 (gcn_operand_part (<MODE>mode, operands[0], 0),
2059 gcn_operand_part (DImode, operands[2], 0),
2062 rtx dsthi = gcn_operand_part (<MODE>mode, operands[0], 1);
2063 emit_insn (gen_vec_duplicate<vnsi>
2064 (dsthi, gcn_operand_part (DImode, operands[2], 1)));
2065 emit_insn (gen_addc<vnsi>3 (dsthi, dsthi, operands[3], vcc, vcc));
2068 [(set_attr "type" "vmult")
2069 (set_attr "length" "8")])
2071 (define_insn_and_split "add<mode>3_sext_dup2_exec"
2072 [(set (match_operand:V_DI 0 "register_operand" "= v")
2075 (sign_extend:V_DI (match_operand:<VnSI> 1 "gcn_alu_operand" "vA"))
2076 (vec_duplicate:V_DI (match_operand:DI 2 "gcn_alu_operand" "BSv")))
2077 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0")
2078 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
2079 (clobber (match_scratch:<VnSI> 5 "=&v"))
2080 (clobber (reg:DI VCC_REG))]
2083 "gcn_can_split_p (<MODE>mode, operands[0])
2084 && gcn_can_split_p (<MODE>mode, operands[3])"
2087 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
2088 emit_insn (gen_ashr<vnsi>3_exec (operands[5], operands[1], GEN_INT (31),
2089 gcn_gen_undef (<VnSI>mode), operands[4]));
2090 emit_insn (gen_add<vnsi>3_vcc_dup_exec
2091 (gcn_operand_part (<MODE>mode, operands[0], 0),
2092 gcn_operand_part (DImode, operands[2], 0),
2095 gcn_operand_part (<MODE>mode, operands[3], 0),
2097 rtx dsthi = gcn_operand_part (<MODE>mode, operands[0], 1);
2098 emit_insn (gen_vec_duplicate<vnsi>_exec
2099 (dsthi, gcn_operand_part (DImode, operands[2], 1),
2100 gcn_operand_part (<MODE>mode, operands[3], 1),
2102 emit_insn (gen_addc<vnsi>3_exec
2103 (dsthi, dsthi, operands[5], vcc, vcc,
2104 gcn_operand_part (<MODE>mode, operands[3], 1),
2108 [(set_attr "type" "vmult")
2109 (set_attr "length" "8")])
2112 ;; {{{ DS memory ALU: add/sub
2114 (define_mode_iterator DS_ARITH_MODE [V64SI V64SF V64DI])
2115 (define_mode_iterator DS_ARITH_SCALAR_MODE [SI SF DI])
2117 ;; FIXME: the vector patterns probably need RD expanded to a vector of
2118 ;; addresses. For now, the only way a vector can get into LDS is
2119 ;; if the user puts it there manually.
2121 ;; FIXME: the scalar patterns are probably fine in themselves, but need to be
2122 ;; checked to see if anything can ever use them.
2124 (define_insn "add<mode>3_ds<exec>"
2125 [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD")
2127 (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" "%RD")
2128 (match_operand:DS_ARITH_MODE 2 "register_operand" " v")))]
2129 "rtx_equal_p (operands[0], operands[1])"
2130 "ds_add%u0\t%A0, %2%O0"
2131 [(set_attr "type" "ds")
2132 (set_attr "length" "8")])
2134 (define_insn "add<mode>3_ds_scalar"
2135 [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
2136 (plus:DS_ARITH_SCALAR_MODE
2137 (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand"
2139 (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")))]
2140 "rtx_equal_p (operands[0], operands[1])"
2141 "ds_add%u0\t%A0, %2%O0"
2142 [(set_attr "type" "ds")
2143 (set_attr "length" "8")])
2145 (define_insn "sub<mode>3_ds<exec>"
2146 [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD")
2147 (minus:DS_ARITH_MODE
2148 (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" " RD")
2149 (match_operand:DS_ARITH_MODE 2 "register_operand" " v")))]
2150 "rtx_equal_p (operands[0], operands[1])"
2151 "ds_sub%u0\t%A0, %2%O0"
2152 [(set_attr "type" "ds")
2153 (set_attr "length" "8")])
2155 (define_insn "sub<mode>3_ds_scalar"
2156 [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
2157 (minus:DS_ARITH_SCALAR_MODE
2158 (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand"
2160 (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")))]
2161 "rtx_equal_p (operands[0], operands[1])"
2162 "ds_sub%u0\t%A0, %2%O0"
2163 [(set_attr "type" "ds")
2164 (set_attr "length" "8")])
2166 (define_insn "subr<mode>3_ds<exec>"
2167 [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD")
2168 (minus:DS_ARITH_MODE
2169 (match_operand:DS_ARITH_MODE 2 "register_operand" " v")
2170 (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" " RD")))]
2171 "rtx_equal_p (operands[0], operands[1])"
2172 "ds_rsub%u0\t%A0, %2%O0"
2173 [(set_attr "type" "ds")
2174 (set_attr "length" "8")])
2176 (define_insn "subr<mode>3_ds_scalar"
2177 [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
2178 (minus:DS_ARITH_SCALAR_MODE
2179 (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")
2180 (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand"
2182 "rtx_equal_p (operands[0], operands[1])"
2183 "ds_rsub%u0\t%A0, %2%O0"
2184 [(set_attr "type" "ds")
2185 (set_attr "length" "8")])
2188 ;; {{{ ALU special case: mult
2190 (define_insn "<su>mul<mode>3_highpart<exec>"
2191 [(set (match_operand:V_SI 0 "register_operand" "= v")
2196 (match_operand:V_SI 1 "gcn_alu_operand" " %v"))
2198 (match_operand:V_SI 2 "gcn_alu_operand" "vSvA")))
2201 "v_mul_hi<sgnsuffix>0\t%0, %2, %1"
2202 [(set_attr "type" "vop3a")
2203 (set_attr "length" "8")])
2205 (define_insn "mul<mode>3<exec>"
2206 [(set (match_operand:V_INT_1REG 0 "register_operand" "= v")
2208 (match_operand:V_INT_1REG 1 "gcn_alu_operand" "%vSvA")
2209 (match_operand:V_INT_1REG 2 "gcn_alu_operand" " vSvA")))]
2211 "v_mul_lo_u32\t%0, %1, %2"
2212 [(set_attr "type" "vop3a")
2213 (set_attr "length" "8")])
2215 (define_insn "mul<mode>3_dup<exec>"
2216 [(set (match_operand:V_INT_1REG 0 "register_operand" "= v")
2218 (match_operand:V_INT_1REG 1 "gcn_alu_operand" "%vSvA")
2219 (vec_duplicate:V_INT_1REG
2220 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand" " SvA"))))]
2222 "v_mul_lo_u32\t%0, %1, %2"
2223 [(set_attr "type" "vop3a")
2224 (set_attr "length" "8")])
2226 (define_insn_and_split "mul<mode>3"
2227 [(set (match_operand:V_DI 0 "register_operand" "=&v")
2229 (match_operand:V_DI 1 "gcn_alu_operand" "% v")
2230 (match_operand:V_DI 2 "gcn_alu_operand" "vDA")))
2231 (clobber (match_scratch:<VnSI> 3 "=&v"))]
2237 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
2238 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
2239 rtx left_lo = gcn_operand_part (<MODE>mode, operands[1], 0);
2240 rtx left_hi = gcn_operand_part (<MODE>mode, operands[1], 1);
2241 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
2242 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
2243 rtx tmp = operands[3];
2245 emit_insn (gen_mul<vnsi>3 (out_lo, left_lo, right_lo));
2246 emit_insn (gen_umul<vnsi>3_highpart (out_hi, left_lo, right_lo));
2247 emit_insn (gen_mul<vnsi>3 (tmp, left_hi, right_lo));
2248 emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp));
2249 emit_insn (gen_mul<vnsi>3 (tmp, left_lo, right_hi));
2250 emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp));
2251 emit_insn (gen_mul<vnsi>3 (tmp, left_hi, right_hi));
2252 emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp));
2256 (define_insn_and_split "mul<mode>3_exec"
2257 [(set (match_operand:V_DI 0 "register_operand" "=&v")
2260 (match_operand:V_DI 1 "gcn_alu_operand" "% v")
2261 (match_operand:V_DI 2 "gcn_alu_operand" "vDA"))
2262 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0")
2263 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
2264 (clobber (match_scratch:<VnSI> 5 "=&v"))]
2270 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
2271 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
2272 rtx left_lo = gcn_operand_part (<MODE>mode, operands[1], 0);
2273 rtx left_hi = gcn_operand_part (<MODE>mode, operands[1], 1);
2274 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
2275 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
2276 rtx exec = operands[4];
2277 rtx tmp = operands[5];
2280 if (GET_CODE (operands[3]) == UNSPEC)
2282 old_lo = old_hi = gcn_gen_undef (<VnSI>mode);
2286 old_lo = gcn_operand_part (<MODE>mode, operands[3], 0);
2287 old_hi = gcn_operand_part (<MODE>mode, operands[3], 1);
2290 rtx undef = gcn_gen_undef (<VnSI>mode);
2292 emit_insn (gen_mul<vnsi>3_exec (out_lo, left_lo, right_lo, old_lo, exec));
2293 emit_insn (gen_umul<vnsi>3_highpart_exec (out_hi, left_lo, right_lo,
2295 emit_insn (gen_mul<vnsi>3_exec (tmp, left_hi, right_lo, undef, exec));
2296 emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec));
2297 emit_insn (gen_mul<vnsi>3_exec (tmp, left_lo, right_hi, undef, exec));
2298 emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec));
2299 emit_insn (gen_mul<vnsi>3_exec (tmp, left_hi, right_hi, undef, exec));
2300 emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec));
2304 (define_insn_and_split "mul<mode>3_zext"
2305 [(set (match_operand:V_DI 0 "register_operand" "=&v")
2308 (match_operand:<VnSI> 1 "gcn_alu_operand" " v"))
2309 (match_operand:V_DI 2 "gcn_alu_operand" "vDA")))
2310 (clobber (match_scratch:<VnSI> 3 "=&v"))]
2316 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
2317 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
2318 rtx left = operands[1];
2319 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
2320 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
2321 rtx tmp = operands[3];
2323 emit_insn (gen_mul<vnsi>3 (out_lo, left, right_lo));
2324 emit_insn (gen_umul<vnsi>3_highpart (out_hi, left, right_lo));
2325 emit_insn (gen_mul<vnsi>3 (tmp, left, right_hi));
2326 emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp));
2330 (define_insn_and_split "mul<mode>3_zext_exec"
2331 [(set (match_operand:V_DI 0 "register_operand" "=&v")
2335 (match_operand:<VnSI> 1 "gcn_alu_operand" " v"))
2336 (match_operand:V_DI 2 "gcn_alu_operand" "vDA"))
2337 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0")
2338 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
2339 (clobber (match_scratch:<VnSI> 5 "=&v"))]
2345 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
2346 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
2347 rtx left = operands[1];
2348 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
2349 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
2350 rtx exec = operands[4];
2351 rtx tmp = operands[5];
2354 if (GET_CODE (operands[3]) == UNSPEC)
2356 old_lo = old_hi = gcn_gen_undef (<VnSI>mode);
2360 old_lo = gcn_operand_part (<MODE>mode, operands[3], 0);
2361 old_hi = gcn_operand_part (<MODE>mode, operands[3], 1);
2364 rtx undef = gcn_gen_undef (<VnSI>mode);
2366 emit_insn (gen_mul<vnsi>3_exec (out_lo, left, right_lo, old_lo, exec));
2367 emit_insn (gen_umul<vnsi>3_highpart_exec (out_hi, left, right_lo,
2369 emit_insn (gen_mul<vnsi>3_exec (tmp, left, right_hi, undef, exec));
2370 emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec));
2374 (define_insn_and_split "mul<mode>3_zext_dup2"
2375 [(set (match_operand:V_DI 0 "register_operand" "= &v")
2378 (match_operand:<VnSI> 1 "gcn_alu_operand" " v"))
2380 (match_operand:DI 2 "gcn_alu_operand" "SvDA"))))
2381 (clobber (match_scratch:<VnSI> 3 "= &v"))]
2387 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
2388 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
2389 rtx left = operands[1];
2390 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
2391 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
2392 rtx tmp = operands[3];
2394 emit_insn (gen_mul<vnsi>3 (out_lo, left, right_lo));
2395 emit_insn (gen_umul<vnsi>3_highpart (out_hi, left, right_lo));
2396 emit_insn (gen_mul<vnsi>3 (tmp, left, right_hi));
2397 emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp));
2401 (define_insn_and_split "mul<mode>3_zext_dup2_exec"
2402 [(set (match_operand:V_DI 0 "register_operand" "= &v")
2406 (match_operand:<VnSI> 1 "gcn_alu_operand" " v"))
2408 (match_operand:DI 2 "gcn_alu_operand" "SvDA")))
2409 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0")
2410 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
2411 (clobber (match_scratch:<VnSI> 5 "= &v"))]
2417 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
2418 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
2419 rtx left = operands[1];
2420 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
2421 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
2422 rtx exec = operands[4];
2423 rtx tmp = operands[5];
2426 if (GET_CODE (operands[3]) == UNSPEC)
2428 old_lo = old_hi = gcn_gen_undef (<VnSI>mode);
2432 old_lo = gcn_operand_part (<MODE>mode, operands[3], 0);
2433 old_hi = gcn_operand_part (<MODE>mode, operands[3], 1);
2436 rtx undef = gcn_gen_undef (<VnSI>mode);
2438 emit_insn (gen_mul<vnsi>3_exec (out_lo, left, right_lo, old_lo, exec));
2439 emit_insn (gen_umul<vnsi>3_highpart_exec (out_hi, left, right_lo,
2441 emit_insn (gen_mul<vnsi>3_exec (tmp, left, right_hi, undef, exec));
2442 emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec));
2446 (define_int_iterator UNSPEC_CMUL_OP [UNSPEC_CMUL UNSPEC_CMUL_CONJ])
2447 (define_int_attr conj_op [(UNSPEC_CMUL "") (UNSPEC_CMUL_CONJ "_conj")])
2448 (define_int_attr cmul_subadd [(UNSPEC_CMUL "sub") (UNSPEC_CMUL_CONJ "add")])
2449 (define_int_attr cmul_addsub [(UNSPEC_CMUL "add") (UNSPEC_CMUL_CONJ "sub")])
2451 (define_expand "cmul<conj_op><mode>3"
2452 [(set (match_operand:V_noHI 0 "register_operand" "=&v")
2454 [(match_operand:V_noHI 1 "register_operand" "v")
2455 (match_operand:V_noHI 2 "register_operand" "v")]
2461 rtx t1 = gen_reg_rtx (<MODE>mode);
2462 emit_insn (gen_mul<mode>3 (t1, operands[1], operands[2])); // a*c b*d
2464 rtx s2_perm = gen_reg_rtx (<MODE>mode);
2465 emit_insn (gen_dpp_swap_pairs<mode> (s2_perm, operands[2])); // d c
2467 rtx t2 = gen_reg_rtx (<MODE>mode);
2468 emit_insn (gen_mul<mode>3 (t2, operands[1], s2_perm)); // a*d b*c
2470 rtx t1_perm = gen_reg_rtx (<MODE>mode);
2471 emit_insn (gen_dpp_swap_pairs<mode> (t1_perm, t1)); // b*d a*c
2473 rtx even = gen_rtx_REG (DImode, EXEC_REG);
2474 emit_move_insn (even, get_exec (0x5555555555555555UL));
2475 rtx dest = operands[0];
2476 emit_insn (gen_<cmul_subadd><mode>3_exec (dest, t1, t1_perm,
2477 gcn_gen_undef (<MODE>mode),
2478 even)); // a*c-b*d 0
2480 rtx t2_perm = gen_reg_rtx (<MODE>mode);
2481 emit_insn (gen_dpp_swap_pairs<mode> (t2_perm, t2)); // b*c a*d
2483 rtx odd = gen_rtx_REG (DImode, EXEC_REG);
2484 emit_move_insn (odd, get_exec (0xaaaaaaaaaaaaaaaaUL));
2485 emit_insn (gen_<cmul_addsub><mode>3_exec (dest, t2, t2_perm, dest, odd));
2490 (define_code_iterator addsub [plus minus])
2491 (define_code_attr addsub_as [(plus "a") (minus "s")])
2493 (define_expand "cml<addsub_as><mode>4"
2494 [(set (match_operand:V_FP 0 "register_operand" "=&v")
2497 [(match_operand:V_FP 1 "register_operand" "v")
2498 (match_operand:V_FP 2 "register_operand" "v")]
2500 (match_operand:V_FP 3 "register_operand" "v")))]
2503 rtx a = gen_reg_rtx (<MODE>mode);
2504 emit_insn (gen_dpp_distribute_even<mode> (a, operands[1])); // a a
2506 rtx t1 = gen_reg_rtx (<MODE>mode);
2507 emit_insn (gen_fm<addsub_as><mode>4 (t1, a, operands[2], operands[3]));
2510 rtx b = gen_reg_rtx (<MODE>mode);
2511 emit_insn (gen_dpp_distribute_odd<mode> (b, operands[1])); // b b
2513 rtx t2 = gen_reg_rtx (<MODE>mode);
2514 emit_insn (gen_mul<mode>3 (t2, b, operands[2])); // b*c b*d
2516 rtx t2_perm = gen_reg_rtx (<MODE>mode);
2517 emit_insn (gen_dpp_swap_pairs<mode> (t2_perm, t2)); // b*d b*c
2519 rtx even = gen_rtx_REG (DImode, EXEC_REG);
2520 emit_move_insn (even, get_exec (0x5555555555555555UL));
2521 rtx dest = operands[0];
2522 emit_insn (gen_sub<mode>3_exec (dest, t1, t2_perm,
2523 gcn_gen_undef (<MODE>mode), even));
2525 rtx odd = gen_rtx_REG (DImode, EXEC_REG);
2526 emit_move_insn (odd, get_exec (0xaaaaaaaaaaaaaaaaUL));
2527 emit_insn (gen_add<mode>3_exec (dest, t1, t2_perm, dest, odd));
2532 (define_expand "vec_addsub<mode>3"
2533 [(set (match_operand:V_noHI 0 "register_operand" "=&v")
2536 (match_operand:V_noHI 1 "register_operand" "v")
2537 (match_operand:V_noHI 2 "register_operand" "v"))
2538 (plus:V_noHI (match_dup 1) (match_dup 2))
2539 (const_int 6148914691236517205)))]
2542 rtx even = gen_rtx_REG (DImode, EXEC_REG);
2543 emit_move_insn (even, get_exec (0x5555555555555555UL));
2544 rtx dest = operands[0];
2545 rtx x = operands[1];
2546 rtx y = operands[2];
2547 emit_insn (gen_sub<mode>3_exec (dest, x, y, gcn_gen_undef (<MODE>mode),
2549 rtx odd = gen_rtx_REG (DImode, EXEC_REG);
2550 emit_move_insn (odd, get_exec (0xaaaaaaaaaaaaaaaaUL));
2551 emit_insn (gen_add<mode>3_exec (dest, x, y, dest, odd));
2556 (define_int_iterator CADD [UNSPEC_CADD90 UNSPEC_CADD270])
2557 (define_int_attr rot [(UNSPEC_CADD90 "90") (UNSPEC_CADD270 "270")])
2558 (define_int_attr cadd_subadd [(UNSPEC_CADD90 "sub") (UNSPEC_CADD270 "add")])
2559 (define_int_attr cadd_addsub [(UNSPEC_CADD90 "add") (UNSPEC_CADD270 "sub")])
2561 (define_expand "cadd<rot><mode>3"
2562 [(set (match_operand:V_noHI 0 "register_operand" "=&v")
2563 (unspec:V_noHI [(match_operand:V_noHI 1 "register_operand" "v")
2564 (match_operand:V_noHI 2 "register_operand" "v")]
2568 rtx dest = operands[0];
2569 rtx x = operands[1];
2570 rtx y = gen_reg_rtx (<MODE>mode);
2571 emit_insn (gen_dpp_swap_pairs<mode> (y, operands[2]));
2573 rtx even = gen_rtx_REG (DImode, EXEC_REG);
2574 emit_move_insn (even, get_exec (0x5555555555555555UL));
2575 emit_insn (gen_<cadd_subadd><mode>3_exec (dest, x, y,
2576 gcn_gen_undef (<MODE>mode),
2578 rtx odd = gen_rtx_REG (DImode, EXEC_REG);
2579 emit_move_insn (odd, get_exec (0xaaaaaaaaaaaaaaaaUL));
2580 emit_insn (gen_<cadd_addsub><mode>3_exec (dest, x, y, dest, odd));
2585 (define_expand "vec_fmaddsub<mode>4"
2586 [(match_operand:V_noHI 0 "register_operand" "=&v")
2587 (match_operand:V_noHI 1 "register_operand" "v")
2588 (match_operand:V_noHI 2 "register_operand" "v")
2589 (match_operand:V_noHI 3 "register_operand" "v")]
2592 rtx t1 = gen_reg_rtx (<MODE>mode);
2593 emit_insn (gen_mul<mode>3 (t1, operands[1], operands[2]));
2594 rtx even = gen_rtx_REG (DImode, EXEC_REG);
2595 emit_move_insn (even, get_exec (0x5555555555555555UL));
2596 rtx dest = operands[0];
2597 emit_insn (gen_sub<mode>3_exec (dest, t1, operands[3],
2598 gcn_gen_undef (<MODE>mode), even));
2599 rtx odd = gen_rtx_REG (DImode, EXEC_REG);
2600 emit_move_insn (odd, get_exec (0xaaaaaaaaaaaaaaaaUL));
2601 emit_insn (gen_add<mode>3_exec (dest, t1, operands[3], dest, odd));
2606 (define_expand "vec_fmsubadd<mode>4"
2607 [(match_operand:V_noHI 0 "register_operand" "=&v")
2608 (match_operand:V_noHI 1 "register_operand" "v")
2609 (match_operand:V_noHI 2 "register_operand" "v")
2610 (match_operand:V_noHI 3 "register_operand" "v")]
2613 rtx t1 = gen_reg_rtx (<MODE>mode);
2614 emit_insn (gen_mul<mode>3 (t1, operands[1], operands[2]));
2615 rtx even = gen_rtx_REG (DImode, EXEC_REG);
2616 emit_move_insn (even, get_exec (0x5555555555555555UL));
2617 rtx dest = operands[0];
2618 emit_insn (gen_add<mode>3_exec (dest, t1, operands[3],
2619 gcn_gen_undef (<MODE>mode), even));
2620 rtx odd = gen_rtx_REG (DImode, EXEC_REG);
2621 emit_move_insn (odd, get_exec (0xaaaaaaaaaaaaaaaaUL));
2622 emit_insn (gen_sub<mode>3_exec (dest, t1, operands[3], dest, odd));
2628 ;; {{{ ALU generic case
2630 (define_code_iterator bitop [and ior xor])
2631 (define_code_iterator shiftop [ashift lshiftrt ashiftrt])
2632 (define_code_iterator minmaxop [smin smax umin umax])
2634 (define_insn "<expander><mode>2<exec>"
2635 [(set (match_operand:V_INT_1REG 0 "gcn_valu_dst_operand" "= v")
2637 (match_operand:V_INT_1REG 1 "gcn_valu_src0_operand" "vSvB")))]
2639 "v_<mnemonic>0\t%0, %1"
2640 [(set_attr "type" "vop1")
2641 (set_attr "length" "8")])
2643 (define_insn "<expander><mode>3<exec>"
2644 [(set (match_operand:V_INT_1REG 0 "gcn_valu_dst_operand" "= v,RD")
2646 (match_operand:V_INT_1REG 1 "gcn_valu_src0_operand" "% v, 0")
2647 (match_operand:V_INT_1REG 2 "gcn_valu_src1com_operand" "vSvB, v")))]
2650 v_<mnemonic>0\t%0, %2, %1
2651 ds_<mnemonic>0\t%A0, %2%O0"
2652 [(set_attr "type" "vop2,ds")
2653 (set_attr "length" "8,8")])
2655 (define_insn_and_split "<expander><mode>3"
2656 [(set (match_operand:V_DI 0 "gcn_valu_dst_operand" "= v,RD")
2658 (match_operand:V_DI 1 "gcn_valu_src0_operand" "% v,RD")
2659 (match_operand:V_DI 2 "gcn_valu_src1com_operand" "vSvB, v")))]
2663 ds_<mnemonic>0\t%A0, %2%O0"
2664 "(reload_completed && !gcn_ds_memory_operand (operands[0], <MODE>mode))"
2666 (bitop:<VnSI> (match_dup 5) (match_dup 7)))
2668 (bitop:<VnSI> (match_dup 6) (match_dup 8)))]
2670 operands[3] = gcn_operand_part (<MODE>mode, operands[0], 0);
2671 operands[4] = gcn_operand_part (<MODE>mode, operands[0], 1);
2672 operands[5] = gcn_operand_part (<MODE>mode, operands[1], 0);
2673 operands[6] = gcn_operand_part (<MODE>mode, operands[1], 1);
2674 operands[7] = gcn_operand_part (<MODE>mode, operands[2], 0);
2675 operands[8] = gcn_operand_part (<MODE>mode, operands[2], 1);
2677 [(set_attr "type" "vmult,ds")
2678 (set_attr "length" "16,8")])
2680 (define_insn_and_split "<expander><mode>3_exec"
2681 [(set (match_operand:V_DI 0 "gcn_valu_dst_operand" "= v,RD")
2684 (match_operand:V_DI 1 "gcn_valu_src0_operand" "% v,RD")
2685 (match_operand:V_DI 2 "gcn_valu_src1com_operand" "vSvB, v"))
2686 (match_operand:V_DI 3 "gcn_register_ds_or_unspec_operand" "U0,U0")
2687 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))]
2688 "!memory_operand (operands[0], VOIDmode)
2689 || (rtx_equal_p (operands[0], operands[1])
2690 && register_operand (operands[2], VOIDmode))"
2693 ds_<mnemonic>0\t%A0, %2%O0"
2694 "(reload_completed && !gcn_ds_memory_operand (operands[0], <MODE>mode))"
2697 (bitop:<VnSI> (match_dup 7) (match_dup 9))
2702 (bitop:<VnSI> (match_dup 8) (match_dup 10))
2706 operands[5] = gcn_operand_part (<MODE>mode, operands[0], 0);
2707 operands[6] = gcn_operand_part (<MODE>mode, operands[0], 1);
2708 operands[7] = gcn_operand_part (<MODE>mode, operands[1], 0);
2709 operands[8] = gcn_operand_part (<MODE>mode, operands[1], 1);
2710 operands[9] = gcn_operand_part (<MODE>mode, operands[2], 0);
2711 operands[10] = gcn_operand_part (<MODE>mode, operands[2], 1);
2712 operands[11] = gcn_operand_part (<MODE>mode, operands[3], 0);
2713 operands[12] = gcn_operand_part (<MODE>mode, operands[3], 1);
2715 [(set_attr "type" "vmult,ds")
2716 (set_attr "length" "16,8")])
2718 (define_expand "<expander><mode>3"
2719 [(set (match_operand:V_QIHI 0 "register_operand" "= v")
2721 (match_operand:V_QIHI 1 "gcn_alu_operand" " v")
2722 (vec_duplicate:V_QIHI
2723 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
2726 enum {ashift, lshiftrt, ashiftrt};
2727 bool unsignedp = (<code> == lshiftrt);
2728 rtx insi1 = gen_reg_rtx (<VnSI>mode);
2729 rtx insi2 = gen_reg_rtx (SImode);
2730 rtx outsi = gen_reg_rtx (<VnSI>mode);
2732 convert_move (insi1, operands[1], unsignedp);
2733 convert_move (insi2, operands[2], unsignedp);
2734 emit_insn (gen_<expander><vnsi>3 (outsi, insi1, insi2));
2735 convert_move (operands[0], outsi, unsignedp);
2739 (define_insn "<expander><mode>3<exec>"
2740 [(set (match_operand:V_INT_noHI 0 "register_operand" "= v")
2742 (match_operand:V_INT_noHI 1 "gcn_alu_operand" " v")
2743 (vec_duplicate:<VnSI>
2744 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
2746 "v_<revmnemonic>0\t%0, %2, %1"
2747 [(set_attr "type" "vop2")
2748 (set_attr "length" "8")])
2750 (define_expand "v<expander><mode>3"
2751 [(set (match_operand:V_QIHI 0 "register_operand" "=v")
2753 (match_operand:V_QIHI 1 "gcn_alu_operand" " v")
2754 (match_operand:V_QIHI 2 "gcn_alu_operand" "vB")))]
2757 enum {ashift, lshiftrt, ashiftrt};
2758 bool unsignedp = (<code> == lshiftrt);
2759 rtx insi1 = gen_reg_rtx (<VnSI>mode);
2760 rtx insi2 = gen_reg_rtx (<VnSI>mode);
2761 rtx outsi = gen_reg_rtx (<VnSI>mode);
2763 convert_move (insi1, operands[1], unsignedp);
2764 convert_move (insi2, operands[2], unsignedp);
2765 emit_insn (gen_v<expander><vnsi>3 (outsi, insi1, insi2));
2766 convert_move (operands[0], outsi, unsignedp);
2770 (define_insn "v<expander><mode>3<exec>"
2771 [(set (match_operand:V_INT_noHI 0 "register_operand" "=v")
2773 (match_operand:V_INT_noHI 1 "gcn_alu_operand" " v")
2774 (match_operand:<VnSI> 2 "gcn_alu_operand" "vB")))]
2776 "v_<revmnemonic>0\t%0, %2, %1"
2777 [(set_attr "type" "vop2")
2778 (set_attr "length" "8")])
2780 (define_expand "<expander><mode>3"
2781 [(set (match_operand:V_QIHI 0 "gcn_valu_dst_operand")
2783 (match_operand:V_QIHI 1 "gcn_valu_src0_operand")
2784 (match_operand:V_QIHI 2 "gcn_valu_src1com_operand")))]
2787 enum {smin, umin, smax, umax};
2788 bool unsignedp = (<code> == umax || <code> == umin);
2789 rtx insi1 = gen_reg_rtx (<VnSI>mode);
2790 rtx insi2 = gen_reg_rtx (<VnSI>mode);
2791 rtx outsi = gen_reg_rtx (<VnSI>mode);
2793 convert_move (insi1, operands[1], unsignedp);
2794 convert_move (insi2, operands[2], unsignedp);
2795 emit_insn (gen_<code><vnsi>3 (outsi, insi1, insi2));
2796 convert_move (operands[0], outsi, unsignedp);
2800 (define_expand "<expander><mode>3_exec"
2801 [(set (match_operand:V_QIHI 0 "gcn_valu_dst_operand")
2804 (match_operand:V_QIHI 1 "gcn_valu_src0_operand")
2805 (match_operand:V_QIHI 2 "gcn_valu_src1com_operand"))
2806 (match_operand:V_QIHI 3 "gcn_register_or_unspec_operand" "U0")
2807 (match_operand:DI 4 "gcn_exec_reg_operand" "e")))]
2810 enum {smin, umin, smax, umax};
2811 bool unsignedp = (<code> == umax || <code> == umin);
2812 rtx insi1 = gen_reg_rtx (<VnSI>mode);
2813 rtx insi2 = gen_reg_rtx (<VnSI>mode);
2814 rtx outsi = gen_reg_rtx (<VnSI>mode);
2815 rtx out = operands[0];
2816 rtx exec = operands[4];
2817 rtx tmp = gen_reg_rtx (<MODE>mode);
2819 convert_move (insi1, operands[1], unsignedp);
2820 convert_move (insi2, operands[2], unsignedp);
2821 emit_insn (gen_<code><vnsi>3_exec (outsi, insi1, insi2,
2822 gcn_gen_undef(<VnSI>mode), exec));
2823 convert_move (tmp, outsi, unsignedp);
2824 emit_insn (gen_mov<mode>_exec (out, tmp, operands[3], exec));
2828 (define_insn "<expander><vnsi>3<exec>"
2829 [(set (match_operand:V_SI 0 "gcn_valu_dst_operand" "= v,RD")
2831 (match_operand:V_SI 1 "gcn_valu_src0_operand" "% v, 0")
2832 (match_operand:V_SI 2 "gcn_valu_src1com_operand" "vSvB, v")))]
2835 v_<mnemonic>0\t%0, %2, %1
2836 ds_<mnemonic>0\t%A0, %2%O0"
2837 [(set_attr "type" "vop2,ds")
2838 (set_attr "length" "8,8")])
2840 (define_insn_and_split "<expander><mode>3"
2841 [(set (match_operand:V_DI 0 "register_operand" "=v")
2843 (match_operand:V_DI 1 "gcn_alu_operand" " v")
2844 (match_operand:V_DI 2 "gcn_alu_operand" " v")))
2845 (clobber (reg:DI VCC_REG))]
2851 rtx out = operands[0];
2852 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
2854 enum {smin, smax, umin, umax};
2855 bool minp = (<code> == smin || <code> == umin);
2856 if (<code> == smin || <code> == smax)
2857 emit_insn (gen_vec_cmp<mode>di (vcc, minp ? gen_rtx_LT (VOIDmode, 0, 0) :
2858 gen_rtx_GT (VOIDmode, 0, 0), operands[1],
2861 emit_insn (gen_vec_cmp<mode>di (vcc, minp ? gen_rtx_LTU (VOIDmode, 0, 0) :
2862 gen_rtx_GTU (VOIDmode, 0, 0), operands[1],
2864 emit_insn (gen_vcond_mask_<mode>di (out, operands[1], operands[2], vcc));
2866 [(set_attr "type" "mult")])
2868 (define_insn_and_split "<expander><mode>3_exec"
2869 [(set (match_operand:V_DI 0 "register_operand" "= v")
2872 (match_operand:V_DI 1 "gcn_alu_operand" " v")
2873 (match_operand:V_DI 2 "gcn_alu_operand" " v"))
2874 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0")
2875 (match_operand:DI 4 "gcn_exec_reg_operand" "+e")))
2876 (clobber (match_scratch:<VnDI> 5 "= &v"))
2877 (clobber (reg:DI VCC_REG))]
2883 rtx out = operands[0];
2884 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
2885 rtx exec = operands[4];
2886 rtx tmp = operands[5];
2888 enum {smin, smax, umin, umax};
2889 bool minp = (<code> == smin || <code> == umin);
2890 if (<code> == smin || <code> == smax)
2891 emit_insn (gen_vec_cmp<mode>di_exec (vcc,
2892 minp ? gen_rtx_LT (VOIDmode, 0, 0) :
2893 gen_rtx_GT (VOIDmode, 0, 0),
2894 operands[1], operands[2], exec));
2896 emit_insn (gen_vec_cmp<mode>di_exec (vcc,
2897 minp ? gen_rtx_LTU (VOIDmode, 0, 0) :
2898 gen_rtx_GTU (VOIDmode, 0, 0),
2899 operands[1], operands[2], exec));
2900 emit_insn (gen_vcond_mask_<mode>di (tmp, operands[1], operands[2], vcc));
2901 emit_insn (gen_mov<mode>_exec (out, tmp, operands[3], exec));
2903 [(set_attr "type" "mult")])
2908 (define_expand "neg<mode>2"
2909 [(match_operand:V_INT 0 "register_operand")
2910 (match_operand:V_INT 1 "register_operand")]
2913 emit_insn (gen_sub<mode>3 (operands[0], gcn_vec_constant (<MODE>mode, 0),
2918 (define_insn_and_split "one_cmpl<mode>2<exec>"
2919 [(set (match_operand:V_DI 0 "register_operand" "= v")
2921 (match_operand:V_DI 1 "gcn_alu_operand" "vSvDB")))]
2925 [(set (match_dup 3) (not:<VnSI> (match_dup 5)))
2926 (set (match_dup 4) (not:<VnSI> (match_dup 6)))]
2928 operands[3] = gcn_operand_part (<VnDI>mode, operands[0], 0);
2929 operands[4] = gcn_operand_part (<VnDI>mode, operands[0], 1);
2930 operands[5] = gcn_operand_part (<VnDI>mode, operands[1], 0);
2931 operands[6] = gcn_operand_part (<VnDI>mode, operands[1], 1);
2933 [(set_attr "type" "mult")])
2936 ;; {{{ FP binops - special cases
2938 ; GCN does not directly provide a DFmode subtract instruction, so we do it by
2939 ; adding the negated second operand to the first.
2941 (define_insn "sub<mode>3<exec>"
2942 [(set (match_operand:V_DF 0 "register_operand" "= v, v")
2944 (match_operand:V_DF 1 "gcn_alu_operand" "vSvB, v")
2945 (match_operand:V_DF 2 "gcn_alu_operand" " v,vSvB")))]
2948 v_add_f64\t%0, %1, -%2
2949 v_add_f64\t%0, -%2, %1"
2950 [(set_attr "type" "vop3a")
2951 (set_attr "length" "8,8")])
2953 (define_insn "subdf3"
2954 [(set (match_operand:DF 0 "register_operand" "= v, v")
2956 (match_operand:DF 1 "gcn_alu_operand" "vSvB, v")
2957 (match_operand:DF 2 "gcn_alu_operand" " v,vSvB")))]
2960 v_add_f64\t%0, %1, -%2
2961 v_add_f64\t%0, -%2, %1"
2962 [(set_attr "type" "vop3a")
2963 (set_attr "length" "8,8")])
2966 ;; {{{ FP binops - generic
2968 (define_code_iterator comm_fp [plus mult smin smax])
2969 (define_code_iterator nocomm_fp [minus])
2970 (define_code_iterator all_fp [plus mult minus smin smax])
2972 (define_insn "<expander><mode>3<exec>"
2973 [(set (match_operand:V_FP 0 "register_operand" "= v")
2975 (match_operand:V_FP 1 "gcn_alu_operand" "% v")
2976 (match_operand:V_FP 2 "gcn_alu_operand" "vSvB")))]
2978 "v_<mnemonic>0\t%0, %2, %1"
2979 [(set_attr "type" "vop2")
2980 (set_attr "length" "8")])
2982 (define_insn "<expander><mode>3"
2983 [(set (match_operand:FP 0 "gcn_valu_dst_operand" "= v, RL")
2985 (match_operand:FP 1 "gcn_valu_src0_operand" "% v, 0")
2986 (match_operand:FP 2 "gcn_valu_src1_operand" "vSvB,vSvB")))]
2989 v_<mnemonic>0\t%0, %2, %1
2990 v_<mnemonic>0\t%0, %1%O0"
2991 [(set_attr "type" "vop2,ds")
2992 (set_attr "length" "8")])
2994 (define_insn "<expander><mode>3<exec>"
2995 [(set (match_operand:V_FP_1REG 0 "register_operand" "= v, v")
2996 (nocomm_fp:V_FP_1REG
2997 (match_operand:V_FP_1REG 1 "gcn_alu_operand" "vSvB, v")
2998 (match_operand:V_FP_1REG 2 "gcn_alu_operand" " v,vSvB")))]
3001 v_<mnemonic>0\t%0, %1, %2
3002 v_<revmnemonic>0\t%0, %2, %1"
3003 [(set_attr "type" "vop2")
3004 (set_attr "length" "8,8")])
3006 (define_insn "<expander><mode>3"
3007 [(set (match_operand:FP_1REG 0 "register_operand" "= v, v")
3009 (match_operand:FP_1REG 1 "gcn_alu_operand" "vSvB, v")
3010 (match_operand:FP_1REG 2 "gcn_alu_operand" " v,vSvB")))]
3013 v_<mnemonic>0\t%0, %1, %2
3014 v_<revmnemonic>0\t%0, %2, %1"
3015 [(set_attr "type" "vop2")
3016 (set_attr "length" "8,8")])
3018 (define_code_iterator fminmaxop [smin smax])
3019 (define_expand "<fexpander><mode>3"
3020 [(set (match_operand:FP 0 "gcn_valu_dst_operand")
3022 (match_operand:FP 1 "gcn_valu_src0_operand")
3023 (match_operand:FP 2 "gcn_valu_src1_operand")))]
3027 (define_expand "<fexpander><mode>3<exec>"
3028 [(set (match_operand:V_FP 0 "gcn_valu_dst_operand")
3030 (match_operand:V_FP 1 "gcn_valu_src0_operand")
3031 (match_operand:V_FP 2 "gcn_valu_src1_operand")))]
3038 (define_insn "abs<mode>2"
3039 [(set (match_operand:FP 0 "register_operand" "=v")
3040 (abs:FP (match_operand:FP 1 "register_operand" " v")))]
3042 "v_add%i0\t%0, 0, |%1|"
3043 [(set_attr "type" "vop3a")
3044 (set_attr "length" "8")])
3046 (define_insn "abs<mode>2<exec>"
3047 [(set (match_operand:V_FP 0 "register_operand" "=v")
3049 (match_operand:V_FP 1 "register_operand" " v")))]
3051 "v_add%i0\t%0, 0, |%1|"
3052 [(set_attr "type" "vop3a")
3053 (set_attr "length" "8")])
3055 (define_insn "neg<mode>2<exec>"
3056 [(set (match_operand:V_FP 0 "register_operand" "=v")
3058 (match_operand:V_FP 1 "register_operand" " v")))]
3060 "v_add%i0\t%0, 0, -%1"
3061 [(set_attr "type" "vop3a")
3062 (set_attr "length" "8")])
3064 (define_insn "sqrt<mode>2<exec>"
3065 [(set (match_operand:V_FP 0 "register_operand" "= v")
3067 (match_operand:V_FP 1 "gcn_alu_operand" "vSvB")))]
3068 "flag_unsafe_math_optimizations"
3070 [(set_attr "type" "vop1")
3071 (set_attr "length" "8")])
3073 (define_insn "sqrt<mode>2"
3074 [(set (match_operand:FP 0 "register_operand" "= v")
3076 (match_operand:FP 1 "gcn_alu_operand" "vSvB")))]
3077 "flag_unsafe_math_optimizations"
3079 [(set_attr "type" "vop1")
3080 (set_attr "length" "8")])
3082 ; These FP unops have f64, f32 and f16 versions.
3083 (define_int_iterator MATH_UNOP_1OR2REG
3084 [UNSPEC_FLOOR UNSPEC_CEIL])
3086 ; These FP unops only have f16/f32 versions.
3087 (define_int_iterator MATH_UNOP_1REG
3088 [UNSPEC_EXP2 UNSPEC_LOG2])
3090 (define_int_iterator MATH_UNOP_TRIG
3091 [UNSPEC_SIN UNSPEC_COS])
3093 (define_int_attr math_unop
3094 [(UNSPEC_FLOOR "floor")
3095 (UNSPEC_CEIL "ceil")
3096 (UNSPEC_EXP2 "exp2")
3097 (UNSPEC_LOG2 "log2")
3099 (UNSPEC_COS "cos")])
3101 (define_int_attr math_unop_insn
3102 [(UNSPEC_FLOOR "floor")
3103 (UNSPEC_CEIL "ceil")
3107 (UNSPEC_COS "cos")])
3109 (define_insn "<math_unop><mode>2"
3110 [(set (match_operand:FP 0 "register_operand" "= v")
3112 [(match_operand:FP 1 "gcn_alu_operand" "vSvB")]
3113 MATH_UNOP_1OR2REG))]
3115 "v_<math_unop_insn>%i0\t%0, %1"
3116 [(set_attr "type" "vop1")
3117 (set_attr "length" "8")])
3119 (define_insn "<math_unop><mode>2<exec>"
3120 [(set (match_operand:V_FP 0 "register_operand" "= v")
3122 [(match_operand:V_FP 1 "gcn_alu_operand" "vSvB")]
3123 MATH_UNOP_1OR2REG))]
3125 "v_<math_unop_insn>%i0\t%0, %1"
3126 [(set_attr "type" "vop1")
3127 (set_attr "length" "8")])
3129 (define_insn "<math_unop><mode>2"
3130 [(set (match_operand:FP_1REG 0 "register_operand" "= v")
3132 [(match_operand:FP_1REG 1 "gcn_alu_operand" "vSvB")]
3134 "flag_unsafe_math_optimizations"
3135 "v_<math_unop_insn>%i0\t%0, %1"
3136 [(set_attr "type" "vop1")
3137 (set_attr "length" "8")])
3139 (define_insn "<math_unop><mode>2<exec>"
3140 [(set (match_operand:V_FP_1REG 0 "register_operand" "= v")
3142 [(match_operand:V_FP_1REG 1 "gcn_alu_operand" "vSvB")]
3144 "flag_unsafe_math_optimizations"
3145 "v_<math_unop_insn>%i0\t%0, %1"
3146 [(set_attr "type" "vop1")
3147 (set_attr "length" "8")])
3149 (define_insn "*<math_unop><mode>2_insn"
3150 [(set (match_operand:FP_1REG 0 "register_operand" "= v")
3152 [(match_operand:FP_1REG 1 "gcn_alu_operand" "vSvB")]
3154 "flag_unsafe_math_optimizations"
3155 "v_<math_unop_insn>%i0\t%0, %1"
3156 [(set_attr "type" "vop1")
3157 (set_attr "length" "8")])
3159 (define_insn "*<math_unop><mode>2<exec>_insn"
3160 [(set (match_operand:V_FP_1REG 0 "register_operand" "= v")
3162 [(match_operand:V_FP_1REG 1 "gcn_alu_operand" "vSvB")]
3164 "flag_unsafe_math_optimizations"
3165 "v_<math_unop_insn>%i0\t%0, %1"
3166 [(set_attr "type" "vop1")
3167 (set_attr "length" "8")])
3169 ; Trigonometric functions need their input scaled by 1/(2*PI) first.
3171 (define_expand "<math_unop><mode>2"
3175 (match_operand:FP_1REG 1 "gcn_alu_operand")))
3176 (set (match_operand:FP_1REG 0 "register_operand")
3180 "flag_unsafe_math_optimizations"
3182 operands[2] = gen_reg_rtx (<MODE>mode);
3183 operands[3] = const_double_from_real_value (gcn_dconst1over2pi (),
3187 (define_expand "<math_unop><mode>2<exec>"
3191 (match_operand:V_FP_1REG 1 "gcn_alu_operand")))
3192 (set (match_operand:V_FP_1REG 0 "register_operand")
3196 "flag_unsafe_math_optimizations"
3198 operands[2] = gen_reg_rtx (<MODE>mode);
3200 gcn_vec_constant (<MODE>mode,
3201 const_double_from_real_value (gcn_dconst1over2pi (),
3202 <SCALAR_MODE>mode));
3205 ; Implement ldexp pattern
3207 (define_insn "ldexp<mode>3<exec>"
3208 [(set (match_operand:SV_FP 0 "register_operand" "= v")
3210 [(match_operand:SV_FP 1 "gcn_alu_operand" " vA")
3211 (match_operand:<VnSI> 2 "gcn_alu_operand" "vSvA")]
3214 "v_ldexp%i0\t%0, %1, %2"
3215 [(set_attr "type" "vop3a")
3216 (set_attr "length" "8")])
3218 ; Implement frexp patterns
3220 (define_insn "frexp<mode>_exp2"
3221 [(set (match_operand:SI 0 "register_operand" "=v")
3223 [(match_operand:FP 1 "gcn_alu_operand" "vB")]
3226 "v_frexp_exp_i32%i1\t%0, %1"
3227 [(set_attr "type" "vop1")
3228 (set_attr "length" "8")])
3230 (define_insn "frexp<mode>_mant2"
3231 [(set (match_operand:FP 0 "register_operand" "=v")
3233 [(match_operand:FP 1 "gcn_alu_operand" "vB")]
3234 UNSPEC_FREXP_MANT))]
3236 "v_frexp_mant%i1\t%0, %1"
3237 [(set_attr "type" "vop1")
3238 (set_attr "length" "8")])
3240 (define_insn "frexp<mode>_exp2<exec>"
3241 [(set (match_operand:<VnSI> 0 "register_operand" "=v")
3243 [(match_operand:V_FP 1 "gcn_alu_operand" "vB")]
3246 "v_frexp_exp_i32%i1\t%0, %1"
3247 [(set_attr "type" "vop1")
3248 (set_attr "length" "8")])
3250 (define_insn "frexp<mode>_mant2<exec>"
3251 [(set (match_operand:V_FP 0 "register_operand" "=v")
3253 [(match_operand:V_FP 1 "gcn_alu_operand" "vB")]
3254 UNSPEC_FREXP_MANT))]
3256 "v_frexp_mant%i1\t%0, %1"
3257 [(set_attr "type" "vop1")
3258 (set_attr "length" "8")])
3261 ;; {{{ FP fused multiply and add
3263 (define_insn "fma<mode>4<exec>"
3264 [(set (match_operand:V_FP 0 "register_operand" "= v, v")
3266 (match_operand:V_FP 1 "gcn_alu_operand" "% vA, vA")
3267 (match_operand:V_FP 2 "gcn_alu_operand" " vA,vSvA")
3268 (match_operand:V_FP 3 "gcn_alu_operand" "vSvA, vA")))]
3270 "v_fma%i0\t%0, %1, %2, %3"
3271 [(set_attr "type" "vop3a")
3272 (set_attr "length" "8")])
3274 (define_insn "fma<mode>4_negop2<exec>"
3275 [(set (match_operand:V_FP 0 "register_operand" "= v, v, v")
3277 (match_operand:V_FP 1 "gcn_alu_operand" " vA, vA,vSvA")
3279 (match_operand:V_FP 2 "gcn_alu_operand" " vA,vSvA, vA"))
3280 (match_operand:V_FP 3 "gcn_alu_operand" "vSvA, vA, vA")))]
3282 "v_fma%i0\t%0, %1, -%2, %3"
3283 [(set_attr "type" "vop3a")
3284 (set_attr "length" "8")])
3286 (define_insn "fma<mode>4"
3287 [(set (match_operand:FP 0 "register_operand" "= v, v")
3289 (match_operand:FP 1 "gcn_alu_operand" "% vA, vA")
3290 (match_operand:FP 2 "gcn_alu_operand" " vA,vSvA")
3291 (match_operand:FP 3 "gcn_alu_operand" "vSvA, vA")))]
3293 "v_fma%i0\t%0, %1, %2, %3"
3294 [(set_attr "type" "vop3a")
3295 (set_attr "length" "8")])
3297 (define_insn "fma<mode>4_negop2"
3298 [(set (match_operand:FP 0 "register_operand" "= v, v, v")
3300 (match_operand:FP 1 "gcn_alu_operand" " vA, vA,vSvA")
3302 (match_operand:FP 2 "gcn_alu_operand" " vA,vSvA, vA"))
3303 (match_operand:FP 3 "gcn_alu_operand" "vSvA, vA, vA")))]
3305 "v_fma%i0\t%0, %1, -%2, %3"
3306 [(set_attr "type" "vop3a")
3307 (set_attr "length" "8")])
3309 (define_insn "fms<mode>4<exec>"
3310 [(set (match_operand:V_FP 0 "register_operand" "= v, v")
3312 (match_operand:V_FP 1 "gcn_alu_operand" "% vA, vA")
3313 (match_operand:V_FP 2 "gcn_alu_operand" " vA,vSvA")
3315 (match_operand:V_FP 3 "gcn_alu_operand" "vSvA, vA"))))]
3317 "v_fma%i0\t%0, %1, %2, -%3"
3318 [(set_attr "type" "vop3a")
3319 (set_attr "length" "8")])
3321 (define_insn "fms<mode>4_negop2<exec>"
3322 [(set (match_operand:V_FP 0 "register_operand" "= v, v, v")
3324 (match_operand:V_FP 1 "gcn_alu_operand" " vA, vA,vSvA")
3326 (match_operand:V_FP 2 "gcn_alu_operand" " vA,vSvA, vA"))
3328 (match_operand:V_FP 3 "gcn_alu_operand" "vSvA, vA, vA"))))]
3330 "v_fma%i0\t%0, %1, -%2, -%3"
3331 [(set_attr "type" "vop3a")
3332 (set_attr "length" "8")])
3334 (define_insn "fms<mode>4"
3335 [(set (match_operand:FP 0 "register_operand" "= v, v")
3337 (match_operand:FP 1 "gcn_alu_operand" "% vA, vA")
3338 (match_operand:FP 2 "gcn_alu_operand" " vA,vSvA")
3340 (match_operand:FP 3 "gcn_alu_operand" "vSvA, vA"))))]
3342 "v_fma%i0\t%0, %1, %2, -%3"
3343 [(set_attr "type" "vop3a")
3344 (set_attr "length" "8")])
3346 (define_insn "fms<mode>4_negop2"
3347 [(set (match_operand:FP 0 "register_operand" "= v, v, v")
3349 (match_operand:FP 1 "gcn_alu_operand" " vA, vA,vSvA")
3351 (match_operand:FP 2 "gcn_alu_operand" " vA,vSvA, vA"))
3353 (match_operand:FP 3 "gcn_alu_operand" "vSvA, vA, vA"))))]
3355 "v_fma%i0\t%0, %1, -%2, -%3"
3356 [(set_attr "type" "vop3a")
3357 (set_attr "length" "8")])
3362 (define_insn "recip<mode>2<exec>"
3363 [(set (match_operand:SV_FP 0 "register_operand" "= v")
3365 [(match_operand:SV_FP 1 "gcn_alu_operand" "vSvB")]
3369 [(set_attr "type" "vop1")
3370 (set_attr "length" "8")])
3372 ;; v_div_scale takes a numerator (op2) and denominator (op1) and returns the
3373 ;; one that matches op3 adjusted for best results in reciprocal division.
3374 ;; It also emits a VCC mask that is intended for input to v_div_fmas.
3375 ;; The caller is expected to call this twice, once for each input. The output
3376 ;; VCC is the same in both cases, so the caller may discard one.
3377 (define_insn "div_scale<mode><exec_vcc>"
3378 [(set (match_operand:SV_SFDF 0 "register_operand" "=v")
3380 [(match_operand:SV_SFDF 1 "gcn_alu_operand" "v")
3381 (match_operand:SV_SFDF 2 "gcn_alu_operand" "v")
3382 (match_operand:SV_SFDF 3 "gcn_alu_operand" "v")]
3384 (set (match_operand:DI 4 "register_operand" "=SvcV")
3386 [(match_dup 1) (match_dup 2) (match_dup 3)]
3389 "v_div_scale%i0\t%0, %4, %3, %1, %2"
3390 [(set_attr "type" "vop3b")
3391 (set_attr "length" "8")])
3393 ;; v_div_fmas is "FMA and Scale" that uses the VCC output from v_div_scale
3394 ;; to conditionally scale the output of the whole division operation.
3395 ;; This is necessary to counter the adjustments made by v_div_scale and
3396 ;; replaces the last FMA instruction of the Newton Raphson algorithm.
3397 (define_insn "div_fmas<mode><exec>"
3398 [(set (match_operand:SV_SFDF 0 "register_operand" "=v")
3402 (match_operand:SV_SFDF 1 "gcn_alu_operand" "v")
3403 (match_operand:SV_SFDF 2 "gcn_alu_operand" "v"))
3404 (match_operand:SV_SFDF 3 "gcn_alu_operand" "v"))
3405 (match_operand:DI 4 "register_operand" "cV")]
3408 "v_div_fmas%i0\t%0, %1, %2, %3; %4"
3409 [(set_attr "type" "vop3a")
3410 (set_attr "length" "8")
3411 (set_attr "vccwait" "5")])
3413 ;; v_div_fixup takes the inputs and outputs of a division operation already
3414 ;; completed and cleans up the floating-point sign bit, infinity, underflow,
3415 ;; overflow, and NaN status. It will also emit any FP exceptions.
3416 ;; op1: quotient, op2: denominator, op3: numerator
3417 (define_insn "div_fixup<mode><exec>"
3418 [(set (match_operand:SV_FP 0 "register_operand" "=v")
3420 [(match_operand:SV_FP 1 "register_operand" "v")
3421 (match_operand:SV_FP 2 "gcn_alu_operand" "v")
3422 (match_operand:SV_FP 3 "gcn_alu_operand" "v")]
3425 "v_div_fixup%i0\t%0, %1, %2, %3"
3426 [(set_attr "type" "vop3a")
3427 (set_attr "length" "8")])
3429 (define_expand "div<mode>3"
3430 [(match_operand:SV_SFDF 0 "register_operand")
3431 (match_operand:SV_SFDF 1 "gcn_alu_operand")
3432 (match_operand:SV_SFDF 2 "gcn_alu_operand")]
3435 rtx numerator = operands[1];
3436 rtx denominator = operands[2];
3438 /* Scale the inputs if they are close to the FP limits.
3439 This will be reversed later. */
3440 rtx vcc = gen_reg_rtx (DImode);
3441 rtx discardedvcc = gen_reg_rtx (DImode);
3442 rtx scaled_numerator = gen_reg_rtx (<MODE>mode);
3443 rtx scaled_denominator = gen_reg_rtx (<MODE>mode);
3444 emit_insn (gen_div_scale<mode> (scaled_denominator,
3445 denominator, numerator,
3446 denominator, discardedvcc));
3447 emit_insn (gen_div_scale<mode> (scaled_numerator,
3448 denominator, numerator,
3451 /* Find the reciprocal of the denominator, and use Newton-Raphson to
3452 improve the accuracy over the basic hardware instruction. */
3453 rtx one = gcn_vec_constant (<MODE>mode,
3454 const_double_from_real_value (dconst1, <SCALAR_MODE>mode));
3455 rtx initrcp = gen_reg_rtx (<MODE>mode);
3456 rtx fma1 = gen_reg_rtx (<MODE>mode);
3457 rtx rcp = gen_reg_rtx (<MODE>mode);
3458 emit_insn (gen_recip<mode>2 (initrcp, scaled_denominator));
3459 emit_insn (gen_fma<mode>4_negop2 (fma1, initrcp, scaled_denominator, one));
3460 emit_insn (gen_fma<mode>4 (rcp, fma1, initrcp, initrcp));
3462 /* Do the division "a/b" via "a*1/b" and use Newton-Raphson to improve
3463 the accuracy. The "div_fmas" instruction reverses any scaling
3464 performed by "div_scale", above. */
3465 rtx div_est = gen_reg_rtx (<MODE>mode);
3466 rtx fma2 = gen_reg_rtx (<MODE>mode);
3467 rtx fma3 = gen_reg_rtx (<MODE>mode);
3468 rtx fma4 = gen_reg_rtx (<MODE>mode);
3469 rtx fmas = gen_reg_rtx (<MODE>mode);
3470 emit_insn (gen_mul<mode>3 (div_est, scaled_numerator, rcp));
3471 emit_insn (gen_fma<mode>4_negop2 (fma2, div_est, scaled_denominator,
3473 emit_insn (gen_fma<mode>4 (fma3, fma2, rcp, div_est));
3474 emit_insn (gen_fma<mode>4_negop2 (fma4, fma3, scaled_denominator,
3476 emit_insn (gen_div_fmas<mode> (fmas, fma4, rcp, fma3, vcc));
3478 /* Finally, use "div_fixup" to get the details right and find errors. */
3479 emit_insn (gen_div_fixup<mode> (operands[0], fmas, denominator,
3485 ;; {{{ Int/FP conversions
3487 (define_mode_iterator CVT_FROM_MODE [HI SI HF SF DF])
3488 (define_mode_iterator CVT_TO_MODE [HI SI HF SF DF])
3490 (define_mode_iterator VCVT_MODE
3491 [V2HI V2SI V2HF V2SF V2DF
3492 V4HI V4SI V4HF V4SF V4DF
3493 V8HI V8SI V8HF V8SF V8DF
3494 V16HI V16SI V16HF V16SF V16DF
3495 V32HI V32SI V32HF V32SF V32DF
3496 V64HI V64SI V64HF V64SF V64DF])
3497 (define_mode_iterator VCVT_FMODE
3504 (define_mode_iterator VCVT_IMODE
3512 (define_code_iterator cvt_op [fix unsigned_fix
3513 float unsigned_float
3514 float_extend float_truncate])
3515 (define_code_attr cvt_name [(fix "fix_trunc") (unsigned_fix "fixuns_trunc")
3516 (float "float") (unsigned_float "floatuns")
3517 (float_extend "extend") (float_truncate "trunc")])
3518 (define_code_attr cvt_operands [(fix "%i0%i1") (unsigned_fix "%u0%i1")
3519 (float "%i0%i1") (unsigned_float "%i0%u1")
3520 (float_extend "%i0%i1")
3521 (float_truncate "%i0%i1")])
3523 (define_insn "<cvt_name><CVT_FROM_MODE:mode><CVT_TO_MODE:mode>2"
3524 [(set (match_operand:CVT_TO_MODE 0 "register_operand" "= v")
3526 (match_operand:CVT_FROM_MODE 1 "gcn_alu_operand" "vSvB")))]
3527 "gcn_valid_cvt_p (<CVT_FROM_MODE:MODE>mode, <CVT_TO_MODE:MODE>mode,
3529 "v_cvt<cvt_operands>\t%0, %1"
3530 [(set_attr "type" "vop1")
3531 (set_attr "length" "8")])
3533 (define_insn "<cvt_name><VCVT_MODE:mode><VCVT_FMODE:mode>2<exec>"
3534 [(set (match_operand:VCVT_FMODE 0 "register_operand" "= v")
3536 (match_operand:VCVT_MODE 1 "gcn_alu_operand" "vSvB")))]
3537 "MODE_VF (<VCVT_MODE:MODE>mode) == MODE_VF (<VCVT_FMODE:MODE>mode)
3538 && gcn_valid_cvt_p (<VCVT_MODE:MODE>mode, <VCVT_FMODE:MODE>mode,
3540 "v_cvt<cvt_operands>\t%0, %1"
3541 [(set_attr "type" "vop1")
3542 (set_attr "length" "8")])
3544 (define_insn "<cvt_name><VCVT_FMODE:mode><VCVT_IMODE:mode>2<exec>"
3545 [(set (match_operand:VCVT_IMODE 0 "register_operand" "= v")
3547 (match_operand:VCVT_FMODE 1 "gcn_alu_operand" "vSvB")))]
3548 "MODE_VF (<VCVT_IMODE:MODE>mode) == MODE_VF (<VCVT_FMODE:MODE>mode)
3549 && gcn_valid_cvt_p (<VCVT_FMODE:MODE>mode, <VCVT_IMODE:MODE>mode,
3551 "v_cvt<cvt_operands>\t%0, %1"
3552 [(set_attr "type" "vop1")
3553 (set_attr "length" "8")])
3556 ;; {{{ Int/int conversions
3558 (define_code_iterator zero_convert [truncate zero_extend])
3559 (define_code_attr convop [
3560 (sign_extend "extend")
3561 (zero_extend "zero_extend")
3562 (truncate "trunc")])
3564 (define_insn "<convop><V_INT_1REG_ALT:mode><V_INT_1REG:mode>2<exec>"
3565 [(set (match_operand:V_INT_1REG 0 "register_operand" "=v")
3566 (zero_convert:V_INT_1REG
3567 (match_operand:V_INT_1REG_ALT 1 "gcn_alu_operand" " v")))]
3569 "v_mov_b32_sdwa\t%0, %1 dst_sel:<V_INT_1REG:sdwa> dst_unused:UNUSED_PAD src0_sel:<V_INT_1REG_ALT:sdwa>"
3570 [(set_attr "type" "vop_sdwa")
3571 (set_attr "length" "8")])
3573 (define_insn "extend<V_INT_1REG_ALT:mode><V_INT_1REG:mode>2<exec>"
3574 [(set (match_operand:V_INT_1REG 0 "register_operand" "=v")
3575 (sign_extend:V_INT_1REG
3576 (match_operand:V_INT_1REG_ALT 1 "gcn_alu_operand" " v")))]
3578 "v_mov_b32_sdwa\t%0, sext(%1) src0_sel:<V_INT_1REG_ALT:sdwa>"
3579 [(set_attr "type" "vop_sdwa")
3580 (set_attr "length" "8")])
3582 ;; GCC can already do these for scalar types, but not for vector types.
3583 ;; Unfortunately you can't just do SUBREG on a vector to select the low part,
3584 ;; so there must be a few tricks here.
3586 (define_insn_and_split "trunc<vndi><mode>2"
3587 [(set (match_operand:V_INT_1REG 0 "register_operand" "=v")
3588 (truncate:V_INT_1REG
3589 (match_operand:<VnDI> 1 "gcn_alu_operand" " v")))]
3595 rtx inlo = gcn_operand_part (<VnDI>mode, operands[1], 0);
3596 rtx out = operands[0];
3598 if (<MODE>mode != <VnSI>mode)
3599 emit_insn (gen_trunc<vnsi><mode>2 (out, inlo));
3601 emit_move_insn (out, inlo);
3603 [(set_attr "type" "vop2")
3604 (set_attr "length" "4")])
3606 (define_insn_and_split "trunc<vndi><mode>2_exec"
3607 [(set (match_operand:V_INT_1REG 0 "register_operand" "=v")
3608 (vec_merge:V_INT_1REG
3609 (truncate:V_INT_1REG
3610 (match_operand:<VnDI> 1 "gcn_alu_operand" " v"))
3611 (match_operand:V_INT_1REG 2 "gcn_alu_or_unspec_operand" "U0")
3612 (match_operand:DI 3 "gcn_exec_operand" " e")))]
3618 rtx out = operands[0];
3619 rtx inlo = gcn_operand_part (<VnDI>mode, operands[1], 0);
3620 rtx merge = operands[2];
3621 rtx exec = operands[3];
3623 if (<MODE>mode != <VnSI>mode)
3624 emit_insn (gen_trunc<vnsi><mode>2_exec (out, inlo, merge, exec));
3626 emit_insn (gen_mov<mode>_exec (out, inlo, merge, exec));
3628 [(set_attr "type" "vop2")
3629 (set_attr "length" "4")])
3631 (define_insn_and_split "<convop><mode><vndi>2"
3632 [(set (match_operand:<VnDI> 0 "register_operand" "=v")
3634 (match_operand:V_INT_1REG 1 "gcn_alu_operand" " v")))]
3640 rtx outlo = gcn_operand_part (<VnDI>mode, operands[0], 0);
3641 rtx outhi = gcn_operand_part (<VnDI>mode, operands[0], 1);
3642 rtx in = operands[1];
3644 if (<MODE>mode != <VnSI>mode)
3645 emit_insn (gen_<convop><mode><vnsi>2 (outlo, in));
3647 emit_move_insn (outlo, in);
3649 emit_insn (gen_ashr<vnsi>3 (outhi, outlo, GEN_INT (31)));
3651 emit_insn (gen_vec_duplicate<vnsi> (outhi, const0_rtx));
3653 [(set_attr "type" "mult")
3654 (set_attr "length" "12")])
3656 (define_insn_and_split "<convop><mode><vndi>2_exec"
3657 [(set (match_operand:<VnDI> 0 "register_operand" "=v")
3660 (match_operand:V_INT_1REG 1 "gcn_alu_operand" " v"))
3661 (match_operand:<VnDI> 2 "gcn_alu_or_unspec_operand" "U0")
3662 (match_operand:DI 3 "gcn_exec_operand" " e")))]
3668 rtx outlo = gcn_operand_part (<VnDI>mode, operands[0], 0);
3669 rtx outhi = gcn_operand_part (<VnDI>mode, operands[0], 1);
3670 rtx in = operands[1];
3671 rtx mergelo = gcn_operand_part (<VnDI>mode, operands[2], 0);
3672 rtx mergehi = gcn_operand_part (<VnDI>mode, operands[2], 1);
3673 rtx exec = operands[3];
3675 if (<MODE>mode != <VnSI>mode)
3676 emit_insn (gen_<convop><mode><vnsi>2_exec (outlo, in, mergelo, exec));
3678 emit_insn (gen_mov<mode>_exec (outlo, in, mergelo, exec));
3680 emit_insn (gen_ashr<vnsi>3_exec (outhi, outlo, GEN_INT (31), mergehi,
3683 emit_insn (gen_vec_duplicate<vnsi>_exec (outhi, const0_rtx, mergehi,
3686 [(set_attr "type" "mult")
3687 (set_attr "length" "12")])
3690 ;; {{{ Vector comparison/merge
3692 (define_insn "vec_cmp<mode>di"
3693 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e, e,Sg,Sg, e, e")
3694 (match_operator:DI 1 "gcn_fp_compare_operator"
3695 [(match_operand:V_noQI 2 "gcn_alu_operand" "vSv, B,vSv, B, v,vA,vSv, B")
3696 (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v, v,vA, v, v, v")]))
3697 (clobber (match_scratch:DI 4 "= X, X, cV,cV, X, X, X, X"))]
3700 v_cmp%E1\tvcc, %2, %3
3701 v_cmp%E1\tvcc, %2, %3
3702 v_cmpx%E1\tvcc, %2, %3
3703 v_cmpx%E1\tvcc, %2, %3
3704 v_cmp%E1\t%0, %2, %3
3705 v_cmp%E1\t%0, %2, %3
3708 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a,vopc,vopc")
3709 (set_attr "length" "4,8,4,8,8,8,4,8")
3710 (set_attr "rdna" "*,*,no,no,*,*,yes,yes")])
3712 (define_expand "vec_cmpu<mode>di"
3713 [(match_operand:DI 0 "register_operand")
3714 (match_operator 1 "gcn_compare_operator"
3715 [(match_operand:V_INT_noQI 2 "gcn_alu_operand")
3716 (match_operand:V_INT_noQI 3 "gcn_vop3_operand")])]
3719 /* Unsigned comparisons use the same patterns as signed comparisons,
3720 except that they use unsigned operators (e.g. LTU vs LT).
3721 The '%E1' directive then does the Right Thing. */
3722 emit_insn (gen_vec_cmp<mode>di (operands[0], operands[1], operands[2],
3727 ; There's no instruction for 8-bit vector comparison, so we need to extend.
3728 (define_expand "vec_cmp<u><mode>di"
3729 [(match_operand:DI 0 "register_operand")
3730 (match_operator 1 "gcn_compare_operator"
3731 [(any_extend:<VnSI> (match_operand:V_QI 2 "gcn_alu_operand"))
3732 (any_extend:<VnSI> (match_operand:V_QI 3 "gcn_vop3_operand"))])]
3733 "can_create_pseudo_p ()"
3735 rtx sitmp1 = gen_reg_rtx (<VnSI>mode);
3736 rtx sitmp2 = gen_reg_rtx (<VnSI>mode);
3738 emit_insn (gen_<expander><mode><vnsi>2 (sitmp1, operands[2]));
3739 emit_insn (gen_<expander><mode><vnsi>2 (sitmp2, operands[3]));
3740 emit_insn (gen_vec_cmp<vnsi>di (operands[0], operands[1], sitmp1, sitmp2));
3744 (define_insn "vec_cmp<mode>di_exec"
3745 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e, e,Sg,Sg, e, e")
3747 (match_operator 1 "gcn_fp_compare_operator"
3748 [(match_operand:V_noQI 2 "gcn_alu_operand" "vSv, B,vSv, B, v,vA,vSv, B")
3749 (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v, v,vA, v, v, v")])
3750 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e, e, e, e, e, e")))
3751 (clobber (match_scratch:DI 5 "= X, X, cV,cV, X, X, X, X"))]
3754 v_cmp%E1\tvcc, %2, %3
3755 v_cmp%E1\tvcc, %2, %3
3756 v_cmpx%E1\tvcc, %2, %3
3757 v_cmpx%E1\tvcc, %2, %3
3758 v_cmp%E1\t%0, %2, %3
3759 v_cmp%E1\t%0, %2, %3
3762 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a,vopc,vopc")
3763 (set_attr "length" "4,8,4,8,8,8,4,8")
3764 (set_attr "rdna" "*,*,no,no,*,*,yes,yes")])
3766 (define_expand "vec_cmpu<mode>di_exec"
3767 [(match_operand:DI 0 "register_operand")
3768 (match_operator 1 "gcn_compare_operator"
3769 [(match_operand:V_INT_noQI 2 "gcn_alu_operand")
3770 (match_operand:V_INT_noQI 3 "gcn_vop3_operand")])
3771 (match_operand:DI 4 "gcn_exec_reg_operand")]
3774 /* Unsigned comparisons use the same patterns as signed comparisons,
3775 except that they use unsigned operators (e.g. LTU vs LT).
3776 The '%E1' directive then does the Right Thing. */
3777 emit_insn (gen_vec_cmpu<mode>di_exec (operands[0], operands[1],
3778 operands[2], operands[3],
3783 (define_expand "vec_cmp<u><mode>di_exec"
3784 [(match_operand:DI 0 "register_operand")
3785 (match_operator 1 "gcn_compare_operator"
3786 [(any_extend:<VnSI> (match_operand:V_QI 2 "gcn_alu_operand"))
3787 (any_extend:<VnSI> (match_operand:V_QI 3 "gcn_vop3_operand"))])
3788 (match_operand:DI 4 "gcn_exec_reg_operand")]
3789 "can_create_pseudo_p ()"
3791 rtx sitmp1 = gen_reg_rtx (<VnSI>mode);
3792 rtx sitmp2 = gen_reg_rtx (<VnSI>mode);
3794 emit_insn (gen_<expander><mode><vnsi>2_exec (sitmp1, operands[2],
3795 operands[2], operands[4]));
3796 emit_insn (gen_<expander><mode><vnsi>2_exec (sitmp2, operands[3],
3797 operands[3], operands[4]));
3798 emit_insn (gen_vec_cmp<vnsi>di_exec (operands[0], operands[1], sitmp1,
3799 sitmp2, operands[4]));
3803 (define_insn "vec_cmp<mode>di_dup"
3804 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e,e,Sg, e,e")
3805 (match_operator:DI 1 "gcn_fp_compare_operator"
3806 [(vec_duplicate:V_noQI
3807 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand"
3808 " Sv, B,Sv,B, A,Sv,B"))
3809 (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v,v, v, v,v")]))
3810 (clobber (match_scratch:DI 4 "= X,X,cV,cV, X, X,X"))]
3813 v_cmp%E1\tvcc, %2, %3
3814 v_cmp%E1\tvcc, %2, %3
3815 v_cmpx%E1\tvcc, %2, %3
3816 v_cmpx%E1\tvcc, %2, %3
3817 v_cmp%E1\t%0, %2, %3
3820 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vopc,vopc")
3821 (set_attr "length" "4,8,4,8,8,4,8")
3822 (set_attr "rdna" "*,*,no,no,*,yes,yes")])
3824 (define_insn "vec_cmp<mode>di_dup_exec"
3825 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e,e,Sg, e,e")
3827 (match_operator 1 "gcn_fp_compare_operator"
3828 [(vec_duplicate:V_noQI
3829 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand"
3830 " Sv, B,Sv,B, A,Sv,B"))
3831 (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v,v, v, v,v")])
3832 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e,e, e, e,e")))
3833 (clobber (match_scratch:DI 5 "= X,X,cV,cV, X, X,X"))]
3836 v_cmp%E1\tvcc, %2, %3
3837 v_cmp%E1\tvcc, %2, %3
3838 v_cmpx%E1\tvcc, %2, %3
3839 v_cmpx%E1\tvcc, %2, %3
3840 v_cmp%E1\t%0, %2, %3
3843 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vopc,vopc")
3844 (set_attr "length" "4,8,4,8,8,4,8")
3845 (set_attr "rdna" "*,*,no,no,*,yes,yes")])
3847 (define_expand "vcond_mask_<mode>di"
3849 [(set (match_operand:V_ALL 0 "register_operand" "")
3851 (match_operand:V_ALL 1 "gcn_vop3_operand" "")
3852 (match_operand:V_ALL 2 "gcn_alu_operand" "")
3853 (match_operand:DI 3 "register_operand" "")))
3854 (clobber (scratch:<VnDI>))])]
3858 (define_expand "vcond<V_ALL:mode><V_ALL_ALT:mode>"
3859 [(match_operand:V_ALL 0 "register_operand")
3860 (match_operand:V_ALL 1 "gcn_vop3_operand")
3861 (match_operand:V_ALL 2 "gcn_alu_operand")
3862 (match_operator 3 "gcn_fp_compare_operator"
3863 [(match_operand:V_ALL_ALT 4 "gcn_alu_operand")
3864 (match_operand:V_ALL_ALT 5 "gcn_vop3_operand")])]
3867 rtx tmp = gen_reg_rtx (DImode);
3868 emit_insn (gen_vec_cmp<V_ALL_ALT:mode>di
3869 (tmp, operands[3], operands[4], operands[5]));
3870 emit_insn (gen_vcond_mask_<V_ALL:mode>di
3871 (operands[0], operands[1], operands[2], tmp));
3875 (define_expand "vcond<V_ALL:mode><V_ALL_ALT:mode>_exec"
3876 [(match_operand:V_ALL 0 "register_operand")
3877 (match_operand:V_ALL 1 "gcn_vop3_operand")
3878 (match_operand:V_ALL 2 "gcn_alu_operand")
3879 (match_operator 3 "gcn_fp_compare_operator"
3880 [(match_operand:V_ALL_ALT 4 "gcn_alu_operand")
3881 (match_operand:V_ALL_ALT 5 "gcn_vop3_operand")])
3882 (match_operand:DI 6 "gcn_exec_reg_operand" "e")]
3885 rtx tmp = gen_reg_rtx (DImode);
3886 emit_insn (gen_vec_cmp<V_ALL_ALT:mode>di_exec
3887 (tmp, operands[3], operands[4], operands[5], operands[6]));
3888 emit_insn (gen_vcond_mask_<V_ALL:mode>di
3889 (operands[0], operands[1], operands[2], tmp));
3893 (define_expand "vcondu<V_ALL:mode><V_INT:mode>"
3894 [(match_operand:V_ALL 0 "register_operand")
3895 (match_operand:V_ALL 1 "gcn_vop3_operand")
3896 (match_operand:V_ALL 2 "gcn_alu_operand")
3897 (match_operator 3 "gcn_fp_compare_operator"
3898 [(match_operand:V_INT 4 "gcn_alu_operand")
3899 (match_operand:V_INT 5 "gcn_vop3_operand")])]
3902 rtx tmp = gen_reg_rtx (DImode);
3903 emit_insn (gen_vec_cmpu<V_INT:mode>di
3904 (tmp, operands[3], operands[4], operands[5]));
3905 emit_insn (gen_vcond_mask_<V_ALL:mode>di
3906 (operands[0], operands[1], operands[2], tmp));
3910 (define_expand "vcondu<V_ALL:mode><V_INT:mode>_exec"
3911 [(match_operand:V_ALL 0 "register_operand")
3912 (match_operand:V_ALL 1 "gcn_vop3_operand")
3913 (match_operand:V_ALL 2 "gcn_alu_operand")
3914 (match_operator 3 "gcn_fp_compare_operator"
3915 [(match_operand:V_INT 4 "gcn_alu_operand")
3916 (match_operand:V_INT 5 "gcn_vop3_operand")])
3917 (match_operand:DI 6 "gcn_exec_reg_operand" "e")]
3920 rtx tmp = gen_reg_rtx (DImode);
3921 emit_insn (gen_vec_cmpu<V_INT:mode>di_exec
3922 (tmp, operands[3], operands[4], operands[5], operands[6]));
3923 emit_insn (gen_vcond_mask_<V_ALL:mode>di
3924 (operands[0], operands[1], operands[2], tmp));
3929 ;; {{{ Fully masked loop support
3931 (define_expand "while_ultsidi"
3932 [(match_operand:DI 0 "register_operand")
3933 (match_operand:SI 1 "")
3934 (match_operand:SI 2 "")
3935 (match_operand:SI 3 "")]
3938 if (GET_CODE (operands[1]) != CONST_INT
3939 || GET_CODE (operands[2]) != CONST_INT)
3941 rtx _0_1_2_3 = gen_rtx_REG (V64SImode, VGPR_REGNO (1));
3943 if (GET_CODE (operands[1]) != CONST_INT
3944 || INTVAL (operands[1]) != 0)
3946 tmp = gen_reg_rtx (V64SImode);
3947 emit_insn (gen_addv64si3_dup (tmp, _0_1_2_3, operands[1]));
3949 emit_insn (gen_vec_cmpv64sidi_dup (operands[0],
3950 gen_rtx_GT (VOIDmode, 0, 0),
3955 HOST_WIDE_INT diff = INTVAL (operands[2]) - INTVAL (operands[1]);
3956 HOST_WIDE_INT mask = (diff >= 64 ? -1
3957 : ~((unsigned HOST_WIDE_INT)-1 << diff));
3958 emit_move_insn (operands[0], gen_rtx_CONST_INT (VOIDmode, mask));
3960 if (INTVAL (operands[3]) < 64)
3961 emit_insn (gen_anddi3 (operands[0], operands[0],
3962 gen_rtx_CONST_INT (VOIDmode,
3963 ~((unsigned HOST_WIDE_INT)-1
3964 << INTVAL (operands[3])))));
3968 (define_expand "maskload<mode>di"
3969 [(match_operand:V_MOV 0 "register_operand")
3970 (match_operand:V_MOV 1 "memory_operand")
3971 (match_operand 2 "")]
3974 rtx exec = force_reg (DImode, operands[2]);
3975 rtx addr = gcn_expand_scalar_to_vector_address
3976 (<MODE>mode, exec, operands[1], gen_rtx_SCRATCH (<VnDI>mode));
3977 rtx as = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
3978 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
3980 /* Masked lanes are required to hold zero. */
3981 emit_move_insn (operands[0], gcn_vec_constant (<MODE>mode, 0));
3983 emit_insn (gen_gather<mode>_expr_exec (operands[0], addr, as, v,
3984 operands[0], exec));
3988 (define_expand "maskstore<mode>di"
3989 [(match_operand:V_MOV 0 "memory_operand")
3990 (match_operand:V_MOV 1 "register_operand")
3991 (match_operand 2 "")]
3994 rtx exec = force_reg (DImode, operands[2]);
3995 rtx addr = gcn_expand_scalar_to_vector_address
3996 (<MODE>mode, exec, operands[0], gen_rtx_SCRATCH (<VnDI>mode));
3997 rtx as = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
3998 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
3999 emit_insn (gen_scatter<mode>_expr_exec (addr, operands[1], as, v, exec));
4003 (define_expand "mask_gather_load<mode><vnsi>"
4004 [(match_operand:V_MOV 0 "register_operand")
4005 (match_operand:DI 1 "register_operand")
4006 (match_operand:<VnSI> 2 "register_operand")
4007 (match_operand 3 "immediate_operand")
4008 (match_operand:SI 4 "gcn_alu_operand")
4009 (match_operand:DI 5 "")]
4012 rtx exec = force_reg (DImode, operands[5]);
4014 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1],
4015 operands[2], operands[4],
4016 INTVAL (operands[3]), exec);
4018 /* Masked lanes are required to hold zero. */
4019 emit_move_insn (operands[0], gcn_vec_constant (<MODE>mode, 0));
4021 if (GET_MODE (addr) == <VnDI>mode)
4022 emit_insn (gen_gather<mode>_insn_1offset_exec (operands[0], addr,
4023 const0_rtx, const0_rtx,
4024 const0_rtx, operands[0],
4027 emit_insn (gen_gather<mode>_insn_2offsets_exec (operands[0], operands[1],
4029 const0_rtx, const0_rtx,
4030 operands[0], exec));
4034 (define_expand "mask_scatter_store<mode><vnsi>"
4035 [(match_operand:DI 0 "register_operand")
4036 (match_operand:<VnSI> 1 "register_operand")
4037 (match_operand 2 "immediate_operand")
4038 (match_operand:SI 3 "gcn_alu_operand")
4039 (match_operand:V_MOV 4 "register_operand")
4040 (match_operand:DI 5 "")]
4043 rtx exec = force_reg (DImode, operands[5]);
4045 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0],
4046 operands[1], operands[3],
4047 INTVAL (operands[2]), exec);
4049 if (GET_MODE (addr) == <VnDI>mode)
4050 emit_insn (gen_scatter<mode>_insn_1offset_exec (addr, const0_rtx,
4051 operands[4], const0_rtx,
4055 emit_insn (gen_scatter<mode>_insn_2offsets_exec (operands[0], addr,
4056 const0_rtx, operands[4],
4057 const0_rtx, const0_rtx,
4062 (define_code_iterator cond_op [plus minus mult])
4064 (define_expand "cond_<expander><mode>"
4065 [(match_operand:V_ALL 0 "register_operand")
4066 (match_operand:DI 1 "register_operand")
4068 (match_operand:V_ALL 2 "gcn_alu_operand")
4069 (match_operand:V_ALL 3 "gcn_alu_operand"))
4070 (match_operand:V_ALL 4 "register_operand")]
4073 operands[1] = force_reg (DImode, operands[1]);
4074 operands[2] = force_reg (<MODE>mode, operands[2]);
4076 emit_insn (gen_<expander><mode>3_exec (operands[0], operands[2],
4077 operands[3], operands[4],
4082 (define_code_iterator cond_fminmaxop [smin smax])
4084 (define_expand "cond_<fexpander><mode>"
4085 [(match_operand:V_FP 0 "register_operand")
4086 (match_operand:DI 1 "register_operand")
4087 (cond_fminmaxop:V_FP
4088 (match_operand:V_FP 2 "gcn_alu_operand")
4089 (match_operand:V_FP 3 "gcn_alu_operand"))
4090 (match_operand:V_FP 4 "register_operand")]
4093 operands[1] = force_reg (DImode, operands[1]);
4094 operands[2] = force_reg (<MODE>mode, operands[2]);
4096 emit_insn (gen_<fexpander><mode>3_exec (operands[0], operands[2],
4097 operands[3], operands[4],
4102 (define_code_iterator cond_minmaxop [smin smax umin umax])
4104 (define_expand "cond_<expander><mode>"
4105 [(match_operand:V_INT 0 "register_operand")
4106 (match_operand:DI 1 "register_operand")
4107 (cond_minmaxop:V_INT
4108 (match_operand:V_INT 2 "gcn_alu_operand")
4109 (match_operand:V_INT 3 "gcn_alu_operand"))
4110 (match_operand:V_INT 4 "register_operand")]
4113 operands[1] = force_reg (DImode, operands[1]);
4114 operands[2] = force_reg (<MODE>mode, operands[2]);
4115 rtx tmp = gen_reg_rtx (<MODE>mode);
4117 emit_insn (gen_<expander><mode>3_exec (tmp, operands[2], operands[3],
4118 gcn_gen_undef(<MODE>mode),
4120 emit_insn (gen_vcond_mask_<mode>di (operands[0], tmp, operands[4],
4125 (define_code_iterator cond_bitop [and ior xor])
4127 (define_expand "cond_<expander><mode>"
4128 [(match_operand:V_INT 0 "register_operand")
4129 (match_operand:DI 1 "register_operand")
4131 (match_operand:V_INT 2 "gcn_alu_operand")
4132 (match_operand:V_INT 3 "gcn_alu_operand"))
4133 (match_operand:V_INT 4 "register_operand")]
4136 operands[1] = force_reg (DImode, operands[1]);
4137 operands[2] = force_reg (<MODE>mode, operands[2]);
4139 emit_insn (gen_<expander><mode>3_exec (operands[0], operands[2],
4140 operands[3], operands[4],
4145 (define_code_iterator cond_shiftop [ashift lshiftrt ashiftrt])
4147 (define_expand "cond_<expander><mode>"
4148 [(match_operand:V_INT_noHI 0 "register_operand")
4149 (match_operand:DI 1 "register_operand")
4150 (cond_shiftop:V_INT_noHI
4151 (match_operand:V_INT_noHI 2 "gcn_alu_operand")
4152 (match_operand:V_INT_noHI 3 "gcn_alu_operand"))
4153 (match_operand:V_INT_noHI 4 "register_operand")]
4156 operands[1] = force_reg (DImode, operands[1]);
4157 operands[2] = force_reg (<MODE>mode, operands[2]);
4159 rtx shiftby = gen_reg_rtx (<VnSI>mode);
4160 convert_move (shiftby, operands[3], 0);
4162 emit_insn (gen_v<expander><mode>3_exec (operands[0], operands[2],
4163 shiftby, operands[4],
4169 ;; {{{ Vector reductions
4171 (define_int_iterator REDUC_UNSPEC [UNSPEC_SMIN_DPP_SHR UNSPEC_SMAX_DPP_SHR
4172 UNSPEC_UMIN_DPP_SHR UNSPEC_UMAX_DPP_SHR
4175 UNSPEC_IOR_DPP_SHR UNSPEC_XOR_DPP_SHR])
4177 (define_int_iterator REDUC_2REG_UNSPEC [UNSPEC_PLUS_DPP_SHR
4179 UNSPEC_IOR_DPP_SHR UNSPEC_XOR_DPP_SHR])
4181 ; FIXME: Isn't there a better way of doing this?
4182 (define_int_attr reduc_unspec [(UNSPEC_SMIN_DPP_SHR "UNSPEC_SMIN_DPP_SHR")
4183 (UNSPEC_SMAX_DPP_SHR "UNSPEC_SMAX_DPP_SHR")
4184 (UNSPEC_UMIN_DPP_SHR "UNSPEC_UMIN_DPP_SHR")
4185 (UNSPEC_UMAX_DPP_SHR "UNSPEC_UMAX_DPP_SHR")
4186 (UNSPEC_PLUS_DPP_SHR "UNSPEC_PLUS_DPP_SHR")
4187 (UNSPEC_AND_DPP_SHR "UNSPEC_AND_DPP_SHR")
4188 (UNSPEC_IOR_DPP_SHR "UNSPEC_IOR_DPP_SHR")
4189 (UNSPEC_XOR_DPP_SHR "UNSPEC_XOR_DPP_SHR")])
4191 (define_int_attr reduc_op [(UNSPEC_SMIN_DPP_SHR "smin")
4192 (UNSPEC_SMAX_DPP_SHR "smax")
4193 (UNSPEC_UMIN_DPP_SHR "umin")
4194 (UNSPEC_UMAX_DPP_SHR "umax")
4195 (UNSPEC_PLUS_DPP_SHR "plus")
4196 (UNSPEC_AND_DPP_SHR "and")
4197 (UNSPEC_IOR_DPP_SHR "ior")
4198 (UNSPEC_XOR_DPP_SHR "xor")])
4200 (define_int_attr reduc_insn [(UNSPEC_SMIN_DPP_SHR "v_min%i0")
4201 (UNSPEC_SMAX_DPP_SHR "v_max%i0")
4202 (UNSPEC_UMIN_DPP_SHR "v_min%u0")
4203 (UNSPEC_UMAX_DPP_SHR "v_max%u0")
4204 (UNSPEC_PLUS_DPP_SHR "v_add%U0")
4205 (UNSPEC_AND_DPP_SHR "v_and%B0")
4206 (UNSPEC_IOR_DPP_SHR "v_or%B0")
4207 (UNSPEC_XOR_DPP_SHR "v_xor%B0")])
4209 (define_expand "reduc_<reduc_op>_scal_<mode>"
4210 [(set (match_operand:<SCALAR_MODE> 0 "register_operand")
4211 (unspec:<SCALAR_MODE>
4212 [(match_operand:V_ALL 1 "register_operand")]
4216 rtx tmp = gcn_expand_reduc_scalar (<MODE>mode, operands[1],
4219 rtx last_lane = GEN_INT (GET_MODE_NUNITS (<MODE>mode) - 1);
4220 emit_insn (gen_vec_extract<mode><scalar_mode> (operands[0], tmp,
4226 (define_expand "reduc_<fexpander>_scal_<mode>"
4227 [(match_operand:<SCALAR_MODE> 0 "register_operand")
4229 (match_operand:V_FP 1 "register_operand"))]
4232 /* fmin/fmax are identical to smin/smax. */
4233 emit_insn (gen_reduc_<expander>_scal_<mode> (operands[0], operands[1]));
4237 ;; Warning: This "-ffast-math" implementation converts in-order reductions
4238 ;; into associative reductions. It's also used where OpenMP or
4239 ;; OpenACC paralellization has already broken the in-order semantics.
4240 (define_expand "fold_left_plus_<mode>"
4241 [(match_operand:<SCALAR_MODE> 0 "register_operand")
4242 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand")
4243 (match_operand:V_FP 2 "gcn_alu_operand")]
4244 "can_create_pseudo_p ()
4245 && (flag_openacc || flag_openmp
4246 || flag_associative_math)"
4248 rtx dest = operands[0];
4249 rtx scalar = operands[1];
4250 rtx vector = operands[2];
4251 rtx tmp = gen_reg_rtx (<SCALAR_MODE>mode);
4253 emit_insn (gen_reduc_plus_scal_<mode> (tmp, vector));
4254 emit_insn (gen_add<scalar_mode>3 (dest, scalar, tmp));
4258 (define_insn "*<reduc_op>_dpp_shr_<mode>"
4259 [(set (match_operand:V_1REG 0 "register_operand" "=v")
4261 [(match_operand:V_1REG 1 "register_operand" "v")
4262 (match_operand:V_1REG 2 "register_operand" "v")
4263 (match_operand:SI 3 "const_int_operand" "n")]
4265 ; GCN3 requires a carry out, GCN5 not
4266 "!(TARGET_GCN3 && SCALAR_INT_MODE_P (<SCALAR_MODE>mode)
4267 && <reduc_unspec> == UNSPEC_PLUS_DPP_SHR)
4270 return gcn_expand_dpp_shr_insn (<MODE>mode, "<reduc_insn>",
4271 <reduc_unspec>, INTVAL (operands[3]));
4273 [(set_attr "type" "vop_dpp")
4274 (set_attr "length" "8")])
4276 (define_insn_and_split "*<reduc_op>_dpp_shr_<mode>"
4277 [(set (match_operand:V_DI 0 "register_operand" "=v")
4279 [(match_operand:V_DI 1 "register_operand" "v")
4280 (match_operand:V_DI 2 "register_operand" "v")
4281 (match_operand:SI 3 "const_int_operand" "n")]
4282 REDUC_2REG_UNSPEC))]
4288 [(match_dup 6) (match_dup 8) (match_dup 3)] REDUC_2REG_UNSPEC))
4291 [(match_dup 7) (match_dup 9) (match_dup 3)] REDUC_2REG_UNSPEC))]
4293 operands[4] = gcn_operand_part (<MODE>mode, operands[0], 0);
4294 operands[5] = gcn_operand_part (<MODE>mode, operands[0], 1);
4295 operands[6] = gcn_operand_part (<MODE>mode, operands[1], 0);
4296 operands[7] = gcn_operand_part (<MODE>mode, operands[1], 1);
4297 operands[8] = gcn_operand_part (<MODE>mode, operands[2], 0);
4298 operands[9] = gcn_operand_part (<MODE>mode, operands[2], 1);
4300 [(set_attr "type" "vmult")
4301 (set_attr "length" "16")])
4303 ; Special cases for addition.
4305 (define_insn "*plus_carry_dpp_shr_<mode>"
4306 [(set (match_operand:V_INT_1REG 0 "register_operand" "=v")
4308 [(match_operand:V_INT_1REG 1 "register_operand" "v")
4309 (match_operand:V_INT_1REG 2 "register_operand" "v")
4310 (match_operand:SI 3 "const_int_operand" "n")]
4311 UNSPEC_PLUS_CARRY_DPP_SHR))
4312 (clobber (reg:DI VCC_REG))]
4315 return gcn_expand_dpp_shr_insn (<VnSI>mode, "v_add%^_u32",
4316 UNSPEC_PLUS_CARRY_DPP_SHR,
4317 INTVAL (operands[3]));
4319 [(set_attr "type" "vop_dpp")
4320 (set_attr "length" "8")])
4322 (define_insn "*plus_carry_in_dpp_shr_<mode>"
4323 [(set (match_operand:V_SI 0 "register_operand" "=v")
4325 [(match_operand:V_SI 1 "register_operand" "v")
4326 (match_operand:V_SI 2 "register_operand" "v")
4327 (match_operand:SI 3 "const_int_operand" "n")
4328 (match_operand:DI 4 "register_operand" "cV")]
4329 UNSPEC_PLUS_CARRY_IN_DPP_SHR))
4330 (clobber (reg:DI VCC_REG))]
4333 return gcn_expand_dpp_shr_insn (<MODE>mode, "v_addc%^_u32",
4334 UNSPEC_PLUS_CARRY_IN_DPP_SHR,
4335 INTVAL (operands[3]));
4337 [(set_attr "type" "vop_dpp")
4338 (set_attr "length" "8")])
4340 (define_insn_and_split "*plus_carry_dpp_shr_<mode>"
4341 [(set (match_operand:V_DI 0 "register_operand" "=v")
4343 [(match_operand:V_DI 1 "register_operand" "v")
4344 (match_operand:V_DI 2 "register_operand" "v")
4345 (match_operand:SI 3 "const_int_operand" "n")]
4346 UNSPEC_PLUS_CARRY_DPP_SHR))
4347 (clobber (reg:DI VCC_REG))]
4351 [(parallel [(set (match_dup 4)
4353 [(match_dup 6) (match_dup 8) (match_dup 3)]
4354 UNSPEC_PLUS_CARRY_DPP_SHR))
4355 (clobber (reg:DI VCC_REG))])
4356 (parallel [(set (match_dup 5)
4358 [(match_dup 7) (match_dup 9) (match_dup 3) (reg:DI VCC_REG)]
4359 UNSPEC_PLUS_CARRY_IN_DPP_SHR))
4360 (clobber (reg:DI VCC_REG))])]
4362 operands[4] = gcn_operand_part (<MODE>mode, operands[0], 0);
4363 operands[5] = gcn_operand_part (<MODE>mode, operands[0], 1);
4364 operands[6] = gcn_operand_part (<MODE>mode, operands[1], 0);
4365 operands[7] = gcn_operand_part (<MODE>mode, operands[1], 1);
4366 operands[8] = gcn_operand_part (<MODE>mode, operands[2], 0);
4367 operands[9] = gcn_operand_part (<MODE>mode, operands[2], 1);
4369 [(set_attr "type" "vmult")
4370 (set_attr "length" "16")])
4373 ;; {{{ Miscellaneous
4375 (define_expand "vec_series<mode>"
4376 [(match_operand:V_SI 0 "register_operand")
4377 (match_operand:SI 1 "gcn_alu_operand")
4378 (match_operand:SI 2 "gcn_alu_operand")]
4381 rtx tmp = gen_reg_rtx (<MODE>mode);
4382 rtx v1 = gen_rtx_REG (<MODE>mode, VGPR_REGNO (1));
4384 emit_insn (gen_mul<mode>3_dup (tmp, v1, operands[2]));
4385 emit_insn (gen_add<mode>3_dup (operands[0], tmp, operands[1]));
4389 (define_expand "vec_series<mode>"
4390 [(match_operand:V_DI 0 "register_operand")
4391 (match_operand:DI 1 "gcn_alu_operand")
4392 (match_operand:DI 2 "gcn_alu_operand")]
4395 rtx tmp = gen_reg_rtx (<MODE>mode);
4396 rtx v1 = gen_rtx_REG (<VnSI>mode, VGPR_REGNO (1));
4397 rtx op1vec = gen_reg_rtx (<MODE>mode);
4399 emit_insn (gen_mul<mode>3_zext_dup2 (tmp, v1, operands[2]));
4400 emit_insn (gen_vec_duplicate<mode> (op1vec, operands[1]));
4401 emit_insn (gen_add<mode>3 (operands[0], tmp, op1vec));