]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/gcn/gcn-valu.md
6274d2e922802b3ee912e6355a7f703cc96c7a87
[thirdparty/gcc.git] / gcc / config / gcn / gcn-valu.md
1 ;; Copyright (C) 2016-2022 Free Software Foundation, Inc.
2
3 ;; This file is free software; you can redistribute it and/or modify it under
4 ;; the terms of the GNU General Public License as published by the Free
5 ;; Software Foundation; either version 3 of the License, or (at your option)
6 ;; any later version.
7
8 ;; This file is distributed in the hope that it will be useful, but WITHOUT
9 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 ;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 ;; for more details.
12
13 ;; You should have received a copy of the GNU General Public License
14 ;; along with GCC; see the file COPYING3. If not see
15 ;; <http://www.gnu.org/licenses/>.
16
17 ;; {{{ Vector iterators
18
19 ; Vector modes for specific types
20 (define_mode_iterator V_QI
21 [V2QI V4QI V8QI V16QI V32QI V64QI])
22 (define_mode_iterator V_HI
23 [V2HI V4HI V8HI V16HI V32HI V64HI])
24 (define_mode_iterator V_HF
25 [V2HF V4HF V8HF V16HF V32HF V64HF])
26 (define_mode_iterator V_SI
27 [V2SI V4SI V8SI V16SI V32SI V64SI])
28 (define_mode_iterator V_SF
29 [V2SF V4SF V8SF V16SF V32SF V64SF])
30 (define_mode_iterator V_DI
31 [V2DI V4DI V8DI V16DI V32DI V64DI])
32 (define_mode_iterator V_DF
33 [V2DF V4DF V8DF V16DF V32DF V64DF])
34
35 ; Vector modes for sub-dword modes
36 (define_mode_iterator V_QIHI
37 [V2QI V2HI
38 V4QI V4HI
39 V8QI V8HI
40 V16QI V16HI
41 V32QI V32HI
42 V64QI V64HI])
43
44 ; Vector modes for one vector register
45 (define_mode_iterator V_1REG
46 [V2QI V2HI V2SI V2HF V2SF
47 V4QI V4HI V4SI V4HF V4SF
48 V8QI V8HI V8SI V8HF V8SF
49 V16QI V16HI V16SI V16HF V16SF
50 V32QI V32HI V32SI V32HF V32SF
51 V64QI V64HI V64SI V64HF V64SF])
52
53 (define_mode_iterator V_INT_1REG
54 [V2QI V2HI V2SI
55 V4QI V4HI V4SI
56 V8QI V8HI V8SI
57 V16QI V16HI V16SI
58 V32QI V32HI V32SI
59 V64QI V64HI V64SI])
60 (define_mode_iterator V_INT_1REG_ALT
61 [V2QI V2HI V2SI
62 V4QI V4HI V4SI
63 V8QI V8HI V8SI
64 V16QI V16HI V16SI
65 V32QI V32HI V32SI
66 V64QI V64HI V64SI])
67 (define_mode_iterator V_FP_1REG
68 [V2HF V2SF
69 V4HF V4SF
70 V8HF V8SF
71 V16HF V16SF
72 V32HF V32SF
73 V64HF V64SF])
74
75 ; Vector modes for two vector registers
76 (define_mode_iterator V_2REG
77 [V2DI V2DF
78 V4DI V4DF
79 V8DI V8DF
80 V16DI V16DF
81 V32DI V32DF
82 V64DI V64DF])
83
84 ; Vector modes with native support
85 (define_mode_iterator V_noQI
86 [V2HI V2HF V2SI V2SF V2DI V2DF
87 V4HI V4HF V4SI V4SF V4DI V4DF
88 V8HI V8HF V8SI V8SF V8DI V8DF
89 V16HI V16HF V16SI V16SF V16DI V16DF
90 V32HI V32HF V32SI V32SF V32DI V32DF
91 V64HI V64HF V64SI V64SF V64DI V64DF])
92 (define_mode_iterator V_noHI
93 [V2HF V2SI V2SF V2DI V2DF
94 V4HF V4SI V4SF V4DI V4DF
95 V8HF V8SI V8SF V8DI V8DF
96 V16HF V16SI V16SF V16DI V16DF
97 V32HF V32SI V32SF V32DI V32DF
98 V64HF V64SI V64SF V64DI V64DF])
99
100 (define_mode_iterator V_INT_noQI
101 [V2HI V2SI V2DI
102 V4HI V4SI V4DI
103 V8HI V8SI V8DI
104 V16HI V16SI V16DI
105 V32HI V32SI V32DI
106 V64HI V64SI V64DI])
107 (define_mode_iterator V_INT_noHI
108 [V2SI V2DI
109 V4SI V4DI
110 V8SI V8DI
111 V16SI V16DI
112 V32SI V32DI
113 V64SI V64DI])
114
115 ; All of above
116 (define_mode_iterator V_ALL
117 [V2QI V2HI V2HF V2SI V2SF V2DI V2DF
118 V4QI V4HI V4HF V4SI V4SF V4DI V4DF
119 V8QI V8HI V8HF V8SI V8SF V8DI V8DF
120 V16QI V16HI V16HF V16SI V16SF V16DI V16DF
121 V32QI V32HI V32HF V32SI V32SF V32DI V32DF
122 V64QI V64HI V64HF V64SI V64SF V64DI V64DF])
123 (define_mode_iterator V_ALL_ALT
124 [V2QI V2HI V2HF V2SI V2SF V2DI V2DF
125 V4QI V4HI V4HF V4SI V4SF V4DI V4DF
126 V8QI V8HI V8HF V8SI V8SF V8DI V8DF
127 V16QI V16HI V16HF V16SI V16SF V16DI V16DF
128 V32QI V32HI V32HF V32SI V32SF V32DI V32DF
129 V64QI V64HI V64HF V64SI V64SF V64DI V64DF])
130
131 (define_mode_iterator V_INT
132 [V2QI V2HI V2SI V2DI
133 V4QI V4HI V4SI V4DI
134 V8QI V8HI V8SI V8DI
135 V16QI V16HI V16SI V16DI
136 V32QI V32HI V32SI V32DI
137 V64QI V64HI V64SI V64DI])
138 (define_mode_iterator V_FP
139 [V2HF V2SF V2DF
140 V4HF V4SF V4DF
141 V8HF V8SF V8DF
142 V16HF V16SF V16DF
143 V32HF V32SF V32DF
144 V64HF V64SF V64DF])
145
146 (define_mode_attr scalar_mode
147 [(V2QI "qi") (V2HI "hi") (V2SI "si")
148 (V2HF "hf") (V2SF "sf") (V2DI "di") (V2DF "df")
149 (V4QI "qi") (V4HI "hi") (V4SI "si")
150 (V4HF "hf") (V4SF "sf") (V4DI "di") (V4DF "df")
151 (V8QI "qi") (V8HI "hi") (V8SI "si")
152 (V8HF "hf") (V8SF "sf") (V8DI "di") (V8DF "df")
153 (V16QI "qi") (V16HI "hi") (V16SI "si")
154 (V16HF "hf") (V16SF "sf") (V16DI "di") (V16DF "df")
155 (V32QI "qi") (V32HI "hi") (V32SI "si")
156 (V32HF "hf") (V32SF "sf") (V32DI "di") (V32DF "df")
157 (V64QI "qi") (V64HI "hi") (V64SI "si")
158 (V64HF "hf") (V64SF "sf") (V64DI "di") (V64DF "df")])
159
160 (define_mode_attr SCALAR_MODE
161 [(V2QI "QI") (V2HI "HI") (V2SI "SI")
162 (V2HF "HF") (V2SF "SF") (V2DI "DI") (V2DF "DF")
163 (V4QI "QI") (V4HI "HI") (V4SI "SI")
164 (V4HF "HF") (V4SF "SF") (V4DI "DI") (V4DF "DF")
165 (V8QI "QI") (V8HI "HI") (V8SI "SI")
166 (V8HF "HF") (V8SF "SF") (V8DI "DI") (V8DF "DF")
167 (V16QI "QI") (V16HI "HI") (V16SI "SI")
168 (V16HF "HF") (V16SF "SF") (V16DI "DI") (V16DF "DF")
169 (V32QI "QI") (V32HI "HI") (V32SI "SI")
170 (V32HF "HF") (V32SF "SF") (V32DI "DI") (V32DF "DF")
171 (V64QI "QI") (V64HI "HI") (V64SI "SI")
172 (V64HF "HF") (V64SF "SF") (V64DI "DI") (V64DF "DF")])
173
174 (define_mode_attr vnsi
175 [(V2QI "v2si") (V2HI "v2si") (V2HF "v2si") (V2SI "v2si")
176 (V2SF "v2si") (V2DI "v2si") (V2DF "v2si")
177 (V4QI "v4si") (V4HI "v4si") (V4HF "v4si") (V4SI "v4si")
178 (V4SF "v4si") (V4DI "v4si") (V4DF "v4si")
179 (V8QI "v8si") (V8HI "v8si") (V8HF "v8si") (V8SI "v8si")
180 (V8SF "v8si") (V8DI "v8si") (V8DF "v8si")
181 (V16QI "v16si") (V16HI "v16si") (V16HF "v16si") (V16SI "v16si")
182 (V16SF "v16si") (V16DI "v16si") (V16DF "v16si")
183 (V32QI "v32si") (V32HI "v32si") (V32HF "v32si") (V32SI "v32si")
184 (V32SF "v32si") (V32DI "v32si") (V32DF "v32si")
185 (V64QI "v64si") (V64HI "v64si") (V64HF "v64si") (V64SI "v64si")
186 (V64SF "v64si") (V64DI "v64si") (V64DF "v64si")])
187
188 (define_mode_attr VnSI
189 [(V2QI "V2SI") (V2HI "V2SI") (V2HF "V2SI") (V2SI "V2SI")
190 (V2SF "V2SI") (V2DI "V2SI") (V2DF "V2SI")
191 (V4QI "V4SI") (V4HI "V4SI") (V4HF "V4SI") (V4SI "V4SI")
192 (V4SF "V4SI") (V4DI "V4SI") (V4DF "V4SI")
193 (V8QI "V8SI") (V8HI "V8SI") (V8HF "V8SI") (V8SI "V8SI")
194 (V8SF "V8SI") (V8DI "V8SI") (V8DF "V8SI")
195 (V16QI "V16SI") (V16HI "V16SI") (V16HF "V16SI") (V16SI "V16SI")
196 (V16SF "V16SI") (V16DI "V16SI") (V16DF "V16SI")
197 (V32QI "V32SI") (V32HI "V32SI") (V32HF "V32SI") (V32SI "V32SI")
198 (V32SF "V32SI") (V32DI "V32SI") (V32DF "V32SI")
199 (V64QI "V64SI") (V64HI "V64SI") (V64HF "V64SI") (V64SI "V64SI")
200 (V64SF "V64SI") (V64DI "V64SI") (V64DF "V64SI")])
201
202 (define_mode_attr vndi
203 [(V2QI "v2di") (V2HI "v2di") (V2HF "v2di") (V2SI "v2di")
204 (V2SF "v2di") (V2DI "v2di") (V2DF "v2di")
205 (V4QI "v4di") (V4HI "v4di") (V4HF "v4di") (V4SI "v4di")
206 (V4SF "v4di") (V4DI "v4di") (V4DF "v4di")
207 (V8QI "v8di") (V8HI "v8di") (V8HF "v8di") (V8SI "v8di")
208 (V8SF "v8di") (V8DI "v8di") (V8DF "v8di")
209 (V16QI "v16di") (V16HI "v16di") (V16HF "v16di") (V16SI "v16di")
210 (V16SF "v16di") (V16DI "v16di") (V16DF "v16di")
211 (V32QI "v32di") (V32HI "v32di") (V32HF "v32di") (V32SI "v32di")
212 (V32SF "v32di") (V32DI "v32di") (V32DF "v32di")
213 (V64QI "v64di") (V64HI "v64di") (V64HF "v64di") (V64SI "v64di")
214 (V64SF "v64di") (V64DI "v64di") (V64DF "v64di")])
215
216 (define_mode_attr VnDI
217 [(V2QI "V2DI") (V2HI "V2DI") (V2HF "V2DI") (V2SI "V2DI")
218 (V2SF "V2DI") (V2DI "V2DI") (V2DF "V2DI")
219 (V4QI "V4DI") (V4HI "V4DI") (V4HF "V4DI") (V4SI "V4DI")
220 (V4SF "V4DI") (V4DI "V4DI") (V4DF "V4DI")
221 (V8QI "V8DI") (V8HI "V8DI") (V8HF "V8DI") (V8SI "V8DI")
222 (V8SF "V8DI") (V8DI "V8DI") (V8DF "V8DI")
223 (V16QI "V16DI") (V16HI "V16DI") (V16HF "V16DI") (V16SI "V16DI")
224 (V16SF "V16DI") (V16DI "V16DI") (V16DF "V16DI")
225 (V32QI "V32DI") (V32HI "V32DI") (V32HF "V32DI") (V32SI "V32DI")
226 (V32SF "V32DI") (V32DI "V32DI") (V32DF "V32DI")
227 (V64QI "V64DI") (V64HI "V64DI") (V64HF "V64DI") (V64SI "V64DI")
228 (V64SF "V64DI") (V64DI "V64DI") (V64DF "V64DI")])
229
230 (define_mode_attr sdwa
231 [(V2QI "BYTE_0") (V2HI "WORD_0") (V2SI "DWORD")
232 (V4QI "BYTE_0") (V4HI "WORD_0") (V4SI "DWORD")
233 (V8QI "BYTE_0") (V8HI "WORD_0") (V8SI "DWORD")
234 (V16QI "BYTE_0") (V16HI "WORD_0") (V16SI "DWORD")
235 (V32QI "BYTE_0") (V32HI "WORD_0") (V32SI "DWORD")
236 (V64QI "BYTE_0") (V64HI "WORD_0") (V64SI "DWORD")])
237
238 ;; }}}
239 ;; {{{ Substitutions
240
241 (define_subst_attr "exec" "vec_merge"
242 "" "_exec")
243 (define_subst_attr "exec_clobber" "vec_merge_with_clobber"
244 "" "_exec")
245 (define_subst_attr "exec_vcc" "vec_merge_with_vcc"
246 "" "_exec")
247 (define_subst_attr "exec_scatter" "scatter_store"
248 "" "_exec")
249
250 (define_subst "vec_merge"
251 [(set (match_operand:V_ALL 0)
252 (match_operand:V_ALL 1))]
253 ""
254 [(set (match_dup 0)
255 (vec_merge:V_ALL
256 (match_dup 1)
257 (match_operand:V_ALL 3 "gcn_register_or_unspec_operand" "U0")
258 (match_operand:DI 4 "gcn_exec_reg_operand" "e")))])
259
260 (define_subst "vec_merge_with_clobber"
261 [(set (match_operand:V_ALL 0)
262 (match_operand:V_ALL 1))
263 (clobber (match_operand 2))]
264 ""
265 [(set (match_dup 0)
266 (vec_merge:V_ALL
267 (match_dup 1)
268 (match_operand:V_ALL 3 "gcn_register_or_unspec_operand" "U0")
269 (match_operand:DI 4 "gcn_exec_reg_operand" "e")))
270 (clobber (match_dup 2))])
271
272 (define_subst "vec_merge_with_vcc"
273 [(set (match_operand:V_ALL 0)
274 (match_operand:V_ALL 1))
275 (set (match_operand:DI 2)
276 (match_operand:DI 3))]
277 ""
278 [(parallel
279 [(set (match_dup 0)
280 (vec_merge:V_ALL
281 (match_dup 1)
282 (match_operand:V_ALL 4 "gcn_register_or_unspec_operand" "U0")
283 (match_operand:DI 5 "gcn_exec_reg_operand" "e")))
284 (set (match_dup 2)
285 (and:DI (match_dup 3)
286 (reg:DI EXEC_REG)))])])
287
288 (define_subst "scatter_store"
289 [(set (mem:BLK (scratch))
290 (unspec:BLK
291 [(match_operand 0)
292 (match_operand 1)
293 (match_operand 2)
294 (match_operand 3)]
295 UNSPEC_SCATTER))]
296 ""
297 [(set (mem:BLK (scratch))
298 (unspec:BLK
299 [(match_dup 0)
300 (match_dup 1)
301 (match_dup 2)
302 (match_dup 3)
303 (match_operand:DI 4 "gcn_exec_reg_operand" "e")]
304 UNSPEC_SCATTER))])
305
306 ;; }}}
307 ;; {{{ Vector moves
308
309 ; This is the entry point for all vector register moves. Memory accesses can
310 ; come this way also, but will more usually use the reload_in/out,
311 ; gather/scatter, maskload/store, etc.
312
313 (define_expand "mov<mode>"
314 [(set (match_operand:V_ALL 0 "nonimmediate_operand")
315 (match_operand:V_ALL 1 "general_operand"))]
316 ""
317 {
318 /* Bitwise reinterpret casts via SUBREG don't work with GCN vector
319 registers, but we can convert the MEM to a mode that does work. */
320 if (MEM_P (operands[0]) && !SUBREG_P (operands[0])
321 && SUBREG_P (operands[1])
322 && GET_MODE_SIZE (GET_MODE (operands[1]))
323 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[1]))))
324 {
325 rtx src = SUBREG_REG (operands[1]);
326 rtx mem = copy_rtx (operands[0]);
327 PUT_MODE_RAW (mem, GET_MODE (src));
328 emit_move_insn (mem, src);
329 DONE;
330 }
331 if (MEM_P (operands[1]) && !SUBREG_P (operands[1])
332 && SUBREG_P (operands[0])
333 && GET_MODE_SIZE (GET_MODE (operands[0]))
334 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[0]))))
335 {
336 rtx dest = SUBREG_REG (operands[0]);
337 rtx mem = copy_rtx (operands[1]);
338 PUT_MODE_RAW (mem, GET_MODE (dest));
339 emit_move_insn (dest, mem);
340 DONE;
341 }
342
343 /* SUBREG of MEM is not supported. */
344 gcc_assert ((!SUBREG_P (operands[0])
345 || !MEM_P (SUBREG_REG (operands[0])))
346 && (!SUBREG_P (operands[1])
347 || !MEM_P (SUBREG_REG (operands[1]))));
348
349 if (MEM_P (operands[0]) && !lra_in_progress && !reload_completed)
350 {
351 operands[1] = force_reg (<MODE>mode, operands[1]);
352 rtx scratch = gen_rtx_SCRATCH (<VnDI>mode);
353 rtx a = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
354 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
355 rtx expr = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
356 operands[0],
357 scratch);
358 emit_insn (gen_scatter<mode>_expr (expr, operands[1], a, v));
359 DONE;
360 }
361 else if (MEM_P (operands[1]) && !lra_in_progress && !reload_completed)
362 {
363 rtx scratch = gen_rtx_SCRATCH (<VnDI>mode);
364 rtx a = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
365 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
366 rtx expr = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
367 operands[1],
368 scratch);
369 emit_insn (gen_gather<mode>_expr (operands[0], expr, a, v));
370 DONE;
371 }
372 else if ((MEM_P (operands[0]) || MEM_P (operands[1])))
373 {
374 gcc_assert (!reload_completed);
375 rtx scratch = gen_reg_rtx (<VnDI>mode);
376 emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], scratch));
377 DONE;
378 }
379 })
380
381 ; A pseudo instruction that helps LRA use the "U0" constraint.
382
383 (define_insn "mov<mode>_unspec"
384 [(set (match_operand:V_ALL 0 "nonimmediate_operand" "=v")
385 (match_operand:V_ALL 1 "gcn_unspec_operand" " U"))]
386 ""
387 ""
388 [(set_attr "type" "unknown")
389 (set_attr "length" "0")])
390
391 (define_insn "*mov<mode>"
392 [(set (match_operand:V_1REG 0 "nonimmediate_operand" "=v,v")
393 (match_operand:V_1REG 1 "general_operand" "vA,B"))]
394 ""
395 "v_mov_b32\t%0, %1"
396 [(set_attr "type" "vop1,vop1")
397 (set_attr "length" "4,8")])
398
399 (define_insn "mov<mode>_exec"
400 [(set (match_operand:V_1REG 0 "nonimmediate_operand" "=v, v, v, v, v, m")
401 (vec_merge:V_1REG
402 (match_operand:V_1REG 1 "general_operand" "vA, B, v,vA, m, v")
403 (match_operand:V_1REG 2 "gcn_alu_or_unspec_operand"
404 "U0,U0,vA,vA,U0,U0")
405 (match_operand:DI 3 "register_operand" " e, e,cV,Sv, e, e")))
406 (clobber (match_scratch:<VnDI> 4 "=X, X, X, X,&v,&v"))]
407 "!MEM_P (operands[0]) || REG_P (operands[1])"
408 "@
409 v_mov_b32\t%0, %1
410 v_mov_b32\t%0, %1
411 v_cndmask_b32\t%0, %2, %1, vcc
412 v_cndmask_b32\t%0, %2, %1, %3
413 #
414 #"
415 [(set_attr "type" "vop1,vop1,vop2,vop3a,*,*")
416 (set_attr "length" "4,8,4,8,16,16")])
417
418 ; This variant does not accept an unspec, but does permit MEM
419 ; read/modify/write which is necessary for maskstore.
420
421 ;(define_insn "*mov<mode>_exec_match"
422 ; [(set (match_operand:V_1REG 0 "nonimmediate_operand" "=v,v, v, m")
423 ; (vec_merge:V_1REG
424 ; (match_operand:V_1REG 1 "general_operand" "vA,B, m, v")
425 ; (match_dup 0)
426 ; (match_operand:DI 2 "gcn_exec_reg_operand" " e,e, e, e")))
427 ; (clobber (match_scratch:<VnDI> 3 "=X,X,&v,&v"))]
428 ; "!MEM_P (operands[0]) || REG_P (operands[1])"
429 ; "@
430 ; v_mov_b32\t%0, %1
431 ; v_mov_b32\t%0, %1
432 ; #
433 ; #"
434 ; [(set_attr "type" "vop1,vop1,*,*")
435 ; (set_attr "length" "4,8,16,16")])
436
437 (define_insn "*mov<mode>"
438 [(set (match_operand:V_2REG 0 "nonimmediate_operand" "=v")
439 (match_operand:V_2REG 1 "general_operand" "vDB"))]
440 ""
441 {
442 if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1]))
443 return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1";
444 else
445 return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1";
446 }
447 [(set_attr "type" "vmult")
448 (set_attr "length" "16")])
449
450 (define_insn "mov<mode>_exec"
451 [(set (match_operand:V_2REG 0 "nonimmediate_operand" "= v, v, v, v, m")
452 (vec_merge:V_2REG
453 (match_operand:V_2REG 1 "general_operand" "vDB, v0, v0, m, v")
454 (match_operand:V_2REG 2 "gcn_alu_or_unspec_operand"
455 " U0,vDA0,vDA0,U0,U0")
456 (match_operand:DI 3 "register_operand" " e, cV, Sv, e, e")))
457 (clobber (match_scratch:<VnDI> 4 "= X, X, X,&v,&v"))]
458 "!MEM_P (operands[0]) || REG_P (operands[1])"
459 {
460 if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1]))
461 switch (which_alternative)
462 {
463 case 0:
464 return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1";
465 case 1:
466 return "v_cndmask_b32\t%L0, %L2, %L1, vcc\;"
467 "v_cndmask_b32\t%H0, %H2, %H1, vcc";
468 case 2:
469 return "v_cndmask_b32\t%L0, %L2, %L1, %3\;"
470 "v_cndmask_b32\t%H0, %H2, %H1, %3";
471 }
472 else
473 switch (which_alternative)
474 {
475 case 0:
476 return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1";
477 case 1:
478 return "v_cndmask_b32\t%H0, %H2, %H1, vcc\;"
479 "v_cndmask_b32\t%L0, %L2, %L1, vcc";
480 case 2:
481 return "v_cndmask_b32\t%H0, %H2, %H1, %3\;"
482 "v_cndmask_b32\t%L0, %L2, %L1, %3";
483 }
484
485 return "#";
486 }
487 [(set_attr "type" "vmult,vmult,vmult,*,*")
488 (set_attr "length" "16,16,16,16,16")])
489
490 ; This variant does not accept an unspec, but does permit MEM
491 ; read/modify/write which is necessary for maskstore.
492
493 ;(define_insn "*mov<mode>_exec_match"
494 ; [(set (match_operand:V_2REG 0 "nonimmediate_operand" "=v, v, m")
495 ; (vec_merge:V_2REG
496 ; (match_operand:V_2REG 1 "general_operand" "vDB, m, v")
497 ; (match_dup 0)
498 ; (match_operand:DI 2 "gcn_exec_reg_operand" " e, e, e")))
499 ; (clobber (match_scratch:<VnDI> 3 "=X,&v,&v"))]
500 ; "!MEM_P (operands[0]) || REG_P (operands[1])"
501 ; "@
502 ; * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \
503 ; return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \
504 ; else \
505 ; return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\";
506 ; #
507 ; #"
508 ; [(set_attr "type" "vmult,*,*")
509 ; (set_attr "length" "16,16,16")])
510
511 ; A SGPR-base load looks like:
512 ; <load> v, Sv
513 ;
514 ; There's no hardware instruction that corresponds to this, but vector base
515 ; addresses are placed in an SGPR because it is easier to add to a vector.
516 ; We also have a temporary vT, and the vector v1 holding numbered lanes.
517 ;
518 ; Rewrite as:
519 ; vT = v1 << log2(element-size)
520 ; vT += Sv
521 ; flat_load v, vT
522
523 (define_insn "mov<mode>_sgprbase"
524 [(set (match_operand:V_1REG 0 "nonimmediate_operand" "= v, v, v, m")
525 (unspec:V_1REG
526 [(match_operand:V_1REG 1 "general_operand" " vA,vB, m, v")]
527 UNSPEC_SGPRBASE))
528 (clobber (match_operand:<VnDI> 2 "register_operand" "=&v,&v,&v,&v"))]
529 "lra_in_progress || reload_completed"
530 "@
531 v_mov_b32\t%0, %1
532 v_mov_b32\t%0, %1
533 #
534 #"
535 [(set_attr "type" "vop1,vop1,*,*")
536 (set_attr "length" "4,8,12,12")])
537
538 (define_insn "mov<mode>_sgprbase"
539 [(set (match_operand:V_2REG 0 "nonimmediate_operand" "= v, v, m")
540 (unspec:V_2REG
541 [(match_operand:V_2REG 1 "general_operand" "vDB, m, v")]
542 UNSPEC_SGPRBASE))
543 (clobber (match_operand:<VnDI> 2 "register_operand" "=&v,&v,&v"))]
544 "lra_in_progress || reload_completed"
545 "@
546 * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \
547 return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \
548 else \
549 return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\";
550 #
551 #"
552 [(set_attr "type" "vmult,*,*")
553 (set_attr "length" "8,12,12")])
554
555 ; reload_in was once a standard name, but here it's only referenced by
556 ; gcn_secondary_reload. It allows a reload with a scratch register.
557
558 (define_expand "reload_in<mode>"
559 [(set (match_operand:V_ALL 0 "register_operand" "= v")
560 (match_operand:V_ALL 1 "memory_operand" " m"))
561 (clobber (match_operand:<VnDI> 2 "register_operand" "=&v"))]
562 ""
563 {
564 emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], operands[2]));
565 DONE;
566 })
567
568 ; reload_out is similar to reload_in, above.
569
570 (define_expand "reload_out<mode>"
571 [(set (match_operand:V_ALL 0 "memory_operand" "= m")
572 (match_operand:V_ALL 1 "register_operand" " v"))
573 (clobber (match_operand:<VnDI> 2 "register_operand" "=&v"))]
574 ""
575 {
576 emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], operands[2]));
577 DONE;
578 })
579
580 ; Expand scalar addresses into gather/scatter patterns
581
582 (define_split
583 [(set (match_operand:V_ALL 0 "memory_operand")
584 (unspec:V_ALL
585 [(match_operand:V_ALL 1 "general_operand")]
586 UNSPEC_SGPRBASE))
587 (clobber (match_scratch:<VnDI> 2))]
588 ""
589 [(set (mem:BLK (scratch))
590 (unspec:BLK [(match_dup 5) (match_dup 1) (match_dup 6) (match_dup 7)]
591 UNSPEC_SCATTER))]
592 {
593 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
594 operands[0],
595 operands[2]);
596 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
597 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
598 })
599
600 (define_split
601 [(set (match_operand:V_ALL 0 "memory_operand")
602 (vec_merge:V_ALL
603 (match_operand:V_ALL 1 "general_operand")
604 (match_operand:V_ALL 2 "")
605 (match_operand:DI 3 "gcn_exec_reg_operand")))
606 (clobber (match_scratch:<VnDI> 4))]
607 ""
608 [(set (mem:BLK (scratch))
609 (unspec:BLK [(match_dup 5) (match_dup 1)
610 (match_dup 6) (match_dup 7) (match_dup 3)]
611 UNSPEC_SCATTER))]
612 {
613 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode,
614 operands[3],
615 operands[0],
616 operands[4]);
617 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
618 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
619 })
620
621 (define_split
622 [(set (match_operand:V_ALL 0 "nonimmediate_operand")
623 (unspec:V_ALL
624 [(match_operand:V_ALL 1 "memory_operand")]
625 UNSPEC_SGPRBASE))
626 (clobber (match_scratch:<VnDI> 2))]
627 ""
628 [(set (match_dup 0)
629 (unspec:V_ALL [(match_dup 5) (match_dup 6) (match_dup 7)
630 (mem:BLK (scratch))]
631 UNSPEC_GATHER))]
632 {
633 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
634 operands[1],
635 operands[2]);
636 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
637 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
638 })
639
640 (define_split
641 [(set (match_operand:V_ALL 0 "nonimmediate_operand")
642 (vec_merge:V_ALL
643 (match_operand:V_ALL 1 "memory_operand")
644 (match_operand:V_ALL 2 "")
645 (match_operand:DI 3 "gcn_exec_reg_operand")))
646 (clobber (match_scratch:<VnDI> 4))]
647 ""
648 [(set (match_dup 0)
649 (vec_merge:V_ALL
650 (unspec:V_ALL [(match_dup 5) (match_dup 6) (match_dup 7)
651 (mem:BLK (scratch))]
652 UNSPEC_GATHER)
653 (match_dup 2)
654 (match_dup 3)))]
655 {
656 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode,
657 operands[3],
658 operands[1],
659 operands[4]);
660 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
661 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
662 })
663
664 ; TODO: Add zero/sign extending variants.
665
666 ;; }}}
667 ;; {{{ Lane moves
668
669 ; v_writelane and v_readlane work regardless of exec flags.
670 ; We allow source to be scratch.
671 ;
672 ; FIXME these should take A immediates
673
674 (define_insn "*vec_set<mode>"
675 [(set (match_operand:V_1REG 0 "register_operand" "= v")
676 (vec_merge:V_1REG
677 (vec_duplicate:V_1REG
678 (match_operand:<SCALAR_MODE> 1 "register_operand" " Sv"))
679 (match_operand:V_1REG 3 "gcn_register_or_unspec_operand" " U0")
680 (ashift (const_int 1)
681 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
682 ""
683 "v_writelane_b32 %0, %1, %2"
684 [(set_attr "type" "vop3a")
685 (set_attr "length" "8")
686 (set_attr "exec" "none")
687 (set_attr "laneselect" "yes")])
688
689 ; FIXME: 64bit operations really should be splitters, but I am not sure how
690 ; to represent vertical subregs.
691 (define_insn "*vec_set<mode>"
692 [(set (match_operand:V_2REG 0 "register_operand" "= v")
693 (vec_merge:V_2REG
694 (vec_duplicate:V_2REG
695 (match_operand:<SCALAR_MODE> 1 "register_operand" " Sv"))
696 (match_operand:V_2REG 3 "gcn_register_or_unspec_operand" " U0")
697 (ashift (const_int 1)
698 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
699 ""
700 "v_writelane_b32 %L0, %L1, %2\;v_writelane_b32 %H0, %H1, %2"
701 [(set_attr "type" "vmult")
702 (set_attr "length" "16")
703 (set_attr "exec" "none")
704 (set_attr "laneselect" "yes")])
705
706 (define_expand "vec_set<mode>"
707 [(set (match_operand:V_ALL 0 "register_operand")
708 (vec_merge:V_ALL
709 (vec_duplicate:V_ALL
710 (match_operand:<SCALAR_MODE> 1 "register_operand"))
711 (match_dup 0)
712 (ashift (const_int 1) (match_operand:SI 2 "gcn_alu_operand"))))]
713 "")
714
715 (define_insn "*vec_set<mode>_1"
716 [(set (match_operand:V_1REG 0 "register_operand" "=v")
717 (vec_merge:V_1REG
718 (vec_duplicate:V_1REG
719 (match_operand:<SCALAR_MODE> 1 "register_operand" "Sv"))
720 (match_operand:V_1REG 3 "gcn_register_or_unspec_operand" "U0")
721 (match_operand:SI 2 "const_int_operand" " i")))]
722 "((unsigned) exact_log2 (INTVAL (operands[2])) < GET_MODE_NUNITS (<MODE>mode))"
723 {
724 operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));
725 return "v_writelane_b32 %0, %1, %2";
726 }
727 [(set_attr "type" "vop3a")
728 (set_attr "length" "8")
729 (set_attr "exec" "none")
730 (set_attr "laneselect" "yes")])
731
732 (define_insn "*vec_set<mode>_1"
733 [(set (match_operand:V_2REG 0 "register_operand" "=v")
734 (vec_merge:V_2REG
735 (vec_duplicate:V_2REG
736 (match_operand:<SCALAR_MODE> 1 "register_operand" "Sv"))
737 (match_operand:V_2REG 3 "gcn_register_or_unspec_operand" "U0")
738 (match_operand:SI 2 "const_int_operand" " i")))]
739 "((unsigned) exact_log2 (INTVAL (operands[2])) < GET_MODE_NUNITS (<MODE>mode))"
740 {
741 operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));
742 return "v_writelane_b32 %L0, %L1, %2\;v_writelane_b32 %H0, %H1, %2";
743 }
744 [(set_attr "type" "vmult")
745 (set_attr "length" "16")
746 (set_attr "exec" "none")
747 (set_attr "laneselect" "yes")])
748
749 (define_insn "vec_duplicate<mode><exec>"
750 [(set (match_operand:V_1REG 0 "register_operand" "=v")
751 (vec_duplicate:V_1REG
752 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand" "SvB")))]
753 ""
754 "v_mov_b32\t%0, %1"
755 [(set_attr "type" "vop3a")
756 (set_attr "length" "8")])
757
758 (define_insn "vec_duplicate<mode><exec>"
759 [(set (match_operand:V_2REG 0 "register_operand" "= v")
760 (vec_duplicate:V_2REG
761 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand" "SvDB")))]
762 ""
763 "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1"
764 [(set_attr "type" "vop3a")
765 (set_attr "length" "16")])
766
767 (define_insn "vec_extract<mode><scalar_mode>"
768 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg")
769 (vec_select:<SCALAR_MODE>
770 (match_operand:V_1REG 1 "register_operand" " v")
771 (parallel [(match_operand:SI 2 "gcn_alu_operand" "SvB")])))]
772 ""
773 "v_readlane_b32 %0, %1, %2"
774 [(set_attr "type" "vop3a")
775 (set_attr "length" "8")
776 (set_attr "exec" "none")
777 (set_attr "laneselect" "yes")])
778
779 (define_insn "vec_extract<mode><scalar_mode>"
780 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=&Sg")
781 (vec_select:<SCALAR_MODE>
782 (match_operand:V_2REG 1 "register_operand" " v")
783 (parallel [(match_operand:SI 2 "gcn_alu_operand" " SvB")])))]
784 ""
785 "v_readlane_b32 %L0, %L1, %2\;v_readlane_b32 %H0, %H1, %2"
786 [(set_attr "type" "vmult")
787 (set_attr "length" "16")
788 (set_attr "exec" "none")
789 (set_attr "laneselect" "yes")])
790
791 (define_expand "vec_extract<V_ALL:mode><V_ALL_ALT:mode>"
792 [(set (match_operand:V_ALL_ALT 0 "register_operand")
793 (vec_select:V_ALL_ALT
794 (match_operand:V_ALL 1 "register_operand")
795 (parallel [(match_operand 2 "immediate_operand")])))]
796 "MODE_VF (<V_ALL_ALT:MODE>mode) < MODE_VF (<V_ALL:MODE>mode)
797 && <V_ALL_ALT:SCALAR_MODE>mode == <V_ALL:SCALAR_MODE>mode"
798 {
799 int numlanes = GET_MODE_NUNITS (<V_ALL_ALT:MODE>mode);
800 int firstlane = INTVAL (operands[2]) * numlanes;
801 rtx tmp;
802
803 if (firstlane == 0)
804 {
805 /* A plain move will do. */
806 tmp = operands[1];
807 } else {
808 /* FIXME: optimize this by using DPP where available. */
809
810 rtx permutation = gen_reg_rtx (<V_ALL:VnSI>mode);
811 emit_insn (gen_vec_series<V_ALL:vnsi> (permutation,
812 GEN_INT (firstlane*4),
813 GEN_INT (4)));
814
815 tmp = gen_reg_rtx (<V_ALL:MODE>mode);
816 emit_insn (gen_ds_bpermute<V_ALL:mode> (tmp, permutation, operands[1],
817 get_exec (<V_ALL:MODE>mode)));
818 }
819
820 emit_move_insn (operands[0],
821 gen_rtx_SUBREG (<V_ALL_ALT:MODE>mode, tmp, 0));
822 DONE;
823 })
824
825 (define_expand "extract_last_<mode>"
826 [(match_operand:<SCALAR_MODE> 0 "register_operand")
827 (match_operand:DI 1 "gcn_alu_operand")
828 (match_operand:V_ALL 2 "register_operand")]
829 "can_create_pseudo_p ()"
830 {
831 rtx dst = operands[0];
832 rtx mask = operands[1];
833 rtx vect = operands[2];
834 rtx tmpreg = gen_reg_rtx (SImode);
835
836 emit_insn (gen_clzdi2 (tmpreg, mask));
837 emit_insn (gen_subsi3 (tmpreg, GEN_INT (63), tmpreg));
838 emit_insn (gen_vec_extract<mode><scalar_mode> (dst, vect, tmpreg));
839 DONE;
840 })
841
842 (define_expand "fold_extract_last_<mode>"
843 [(match_operand:<SCALAR_MODE> 0 "register_operand")
844 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand")
845 (match_operand:DI 2 "gcn_alu_operand")
846 (match_operand:V_ALL 3 "register_operand")]
847 "can_create_pseudo_p ()"
848 {
849 rtx dst = operands[0];
850 rtx default_value = operands[1];
851 rtx mask = operands[2];
852 rtx vect = operands[3];
853 rtx else_label = gen_label_rtx ();
854 rtx end_label = gen_label_rtx ();
855
856 rtx cond = gen_rtx_EQ (VOIDmode, mask, const0_rtx);
857 emit_jump_insn (gen_cbranchdi4 (cond, mask, const0_rtx, else_label));
858 emit_insn (gen_extract_last_<mode> (dst, mask, vect));
859 emit_jump_insn (gen_jump (end_label));
860 emit_barrier ();
861 emit_label (else_label);
862 emit_move_insn (dst, default_value);
863 emit_label (end_label);
864 DONE;
865 })
866
867 (define_expand "vec_init<mode><scalar_mode>"
868 [(match_operand:V_ALL 0 "register_operand")
869 (match_operand 1)]
870 ""
871 {
872 gcn_expand_vector_init (operands[0], operands[1]);
873 DONE;
874 })
875
876 (define_expand "vec_init<V_ALL:mode><V_ALL_ALT:mode>"
877 [(match_operand:V_ALL 0 "register_operand")
878 (match_operand:V_ALL_ALT 1)]
879 "<V_ALL:SCALAR_MODE>mode == <V_ALL_ALT:SCALAR_MODE>mode
880 && MODE_VF (<V_ALL_ALT:MODE>mode) < MODE_VF (<V_ALL:MODE>mode)"
881 {
882 gcn_expand_vector_init (operands[0], operands[1]);
883 DONE;
884 })
885
886 ;; }}}
887 ;; {{{ Scatter / Gather
888
889 ;; GCN does not have an instruction for loading a vector from contiguous
890 ;; memory so *all* loads and stores are eventually converted to scatter
891 ;; or gather.
892 ;;
893 ;; GCC does not permit MEM to hold vectors of addresses, so we must use an
894 ;; unspec. The unspec formats are as follows:
895 ;;
896 ;; (unspec:V??
897 ;; [(<address expression>)
898 ;; (<addr_space_t>)
899 ;; (<use_glc>)
900 ;; (mem:BLK (scratch))]
901 ;; UNSPEC_GATHER)
902 ;;
903 ;; (unspec:BLK
904 ;; [(<address expression>)
905 ;; (<source register>)
906 ;; (<addr_space_t>)
907 ;; (<use_glc>)
908 ;; (<exec>)]
909 ;; UNSPEC_SCATTER)
910 ;;
911 ;; - Loads are expected to be wrapped in a vec_merge, so do not need <exec>.
912 ;; - The mem:BLK does not contain any real information, but indicates that an
913 ;; unknown memory read is taking place. Stores are expected to use a similar
914 ;; mem:BLK outside the unspec.
915 ;; - The address space and glc (volatile) fields are there to replace the
916 ;; fields normally found in a MEM.
917 ;; - Multiple forms of address expression are supported, below.
918 ;;
919 ;; TODO: implement combined gather and zero_extend, but only for -msram-ecc=on
920
921 (define_expand "gather_load<mode><vnsi>"
922 [(match_operand:V_ALL 0 "register_operand")
923 (match_operand:DI 1 "register_operand")
924 (match_operand:<VnSI> 2 "register_operand")
925 (match_operand 3 "immediate_operand")
926 (match_operand:SI 4 "gcn_alu_operand")]
927 ""
928 {
929 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1],
930 operands[2], operands[4],
931 INTVAL (operands[3]), NULL);
932
933 if (GET_MODE (addr) == <VnDI>mode)
934 emit_insn (gen_gather<mode>_insn_1offset (operands[0], addr, const0_rtx,
935 const0_rtx, const0_rtx));
936 else
937 emit_insn (gen_gather<mode>_insn_2offsets (operands[0], operands[1],
938 addr, const0_rtx, const0_rtx,
939 const0_rtx));
940 DONE;
941 })
942
943 ; Allow any address expression
944 (define_expand "gather<mode>_expr<exec>"
945 [(set (match_operand:V_ALL 0 "register_operand")
946 (unspec:V_ALL
947 [(match_operand 1 "")
948 (match_operand 2 "immediate_operand")
949 (match_operand 3 "immediate_operand")
950 (mem:BLK (scratch))]
951 UNSPEC_GATHER))]
952 ""
953 {})
954
955 (define_insn "gather<mode>_insn_1offset<exec>"
956 [(set (match_operand:V_ALL 0 "register_operand" "=v")
957 (unspec:V_ALL
958 [(plus:<VnDI> (match_operand:<VnDI> 1 "register_operand" " v")
959 (vec_duplicate:<VnDI>
960 (match_operand 2 "immediate_operand" " n")))
961 (match_operand 3 "immediate_operand" " n")
962 (match_operand 4 "immediate_operand" " n")
963 (mem:BLK (scratch))]
964 UNSPEC_GATHER))]
965 "(AS_FLAT_P (INTVAL (operands[3]))
966 && ((TARGET_GCN3 && INTVAL(operands[2]) == 0)
967 || ((unsigned HOST_WIDE_INT)INTVAL(operands[2]) < 0x1000)))
968 || (AS_GLOBAL_P (INTVAL (operands[3]))
969 && (((unsigned HOST_WIDE_INT)INTVAL(operands[2]) + 0x1000) < 0x2000))"
970 {
971 addr_space_t as = INTVAL (operands[3]);
972 const char *glc = INTVAL (operands[4]) ? " glc" : "";
973
974 static char buf[200];
975 if (AS_FLAT_P (as))
976 {
977 if (TARGET_GCN5_PLUS)
978 sprintf (buf, "flat_load%%o0\t%%0, %%1 offset:%%2%s\;s_waitcnt\t0",
979 glc);
980 else
981 sprintf (buf, "flat_load%%o0\t%%0, %%1%s\;s_waitcnt\t0", glc);
982 }
983 else if (AS_GLOBAL_P (as))
984 sprintf (buf, "global_load%%o0\t%%0, %%1, off offset:%%2%s\;"
985 "s_waitcnt\tvmcnt(0)", glc);
986 else
987 gcc_unreachable ();
988
989 return buf;
990 }
991 [(set_attr "type" "flat")
992 (set_attr "length" "12")])
993
994 (define_insn "gather<mode>_insn_1offset_ds<exec>"
995 [(set (match_operand:V_ALL 0 "register_operand" "=v")
996 (unspec:V_ALL
997 [(plus:<VnSI> (match_operand:<VnSI> 1 "register_operand" " v")
998 (vec_duplicate:<VnSI>
999 (match_operand 2 "immediate_operand" " n")))
1000 (match_operand 3 "immediate_operand" " n")
1001 (match_operand 4 "immediate_operand" " n")
1002 (mem:BLK (scratch))]
1003 UNSPEC_GATHER))]
1004 "(AS_ANY_DS_P (INTVAL (operands[3]))
1005 && ((unsigned HOST_WIDE_INT)INTVAL(operands[2]) < 0x10000))"
1006 {
1007 addr_space_t as = INTVAL (operands[3]);
1008 static char buf[200];
1009 sprintf (buf, "ds_read%%b0\t%%0, %%1 offset:%%2%s\;s_waitcnt\tlgkmcnt(0)",
1010 (AS_GDS_P (as) ? " gds" : ""));
1011 return buf;
1012 }
1013 [(set_attr "type" "ds")
1014 (set_attr "length" "12")])
1015
1016 (define_insn "gather<mode>_insn_2offsets<exec>"
1017 [(set (match_operand:V_ALL 0 "register_operand" "=v")
1018 (unspec:V_ALL
1019 [(plus:<VnDI>
1020 (plus:<VnDI>
1021 (vec_duplicate:<VnDI>
1022 (match_operand:DI 1 "register_operand" "Sv"))
1023 (sign_extend:<VnDI>
1024 (match_operand:<VnSI> 2 "register_operand" " v")))
1025 (vec_duplicate:<VnDI> (match_operand 3 "immediate_operand" " n")))
1026 (match_operand 4 "immediate_operand" " n")
1027 (match_operand 5 "immediate_operand" " n")
1028 (mem:BLK (scratch))]
1029 UNSPEC_GATHER))]
1030 "(AS_GLOBAL_P (INTVAL (operands[4]))
1031 && (((unsigned HOST_WIDE_INT)INTVAL(operands[3]) + 0x1000) < 0x2000))"
1032 {
1033 addr_space_t as = INTVAL (operands[4]);
1034 const char *glc = INTVAL (operands[5]) ? " glc" : "";
1035
1036 static char buf[200];
1037 if (AS_GLOBAL_P (as))
1038 sprintf (buf, "global_load%%o0\t%%0, %%2, %%1 offset:%%3%s\;"
1039 "s_waitcnt\tvmcnt(0)", glc);
1040 else
1041 gcc_unreachable ();
1042
1043 return buf;
1044 }
1045 [(set_attr "type" "flat")
1046 (set_attr "length" "12")])
1047
1048 (define_expand "scatter_store<mode><vnsi>"
1049 [(match_operand:DI 0 "register_operand")
1050 (match_operand:<VnSI> 1 "register_operand")
1051 (match_operand 2 "immediate_operand")
1052 (match_operand:SI 3 "gcn_alu_operand")
1053 (match_operand:V_ALL 4 "register_operand")]
1054 ""
1055 {
1056 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0],
1057 operands[1], operands[3],
1058 INTVAL (operands[2]), NULL);
1059
1060 if (GET_MODE (addr) == <VnDI>mode)
1061 emit_insn (gen_scatter<mode>_insn_1offset (addr, const0_rtx, operands[4],
1062 const0_rtx, const0_rtx));
1063 else
1064 emit_insn (gen_scatter<mode>_insn_2offsets (operands[0], addr,
1065 const0_rtx, operands[4],
1066 const0_rtx, const0_rtx));
1067 DONE;
1068 })
1069
1070 ; Allow any address expression
1071 (define_expand "scatter<mode>_expr<exec_scatter>"
1072 [(set (mem:BLK (scratch))
1073 (unspec:BLK
1074 [(match_operand:<VnDI> 0 "")
1075 (match_operand:V_ALL 1 "register_operand")
1076 (match_operand 2 "immediate_operand")
1077 (match_operand 3 "immediate_operand")]
1078 UNSPEC_SCATTER))]
1079 ""
1080 {})
1081
1082 (define_insn "scatter<mode>_insn_1offset<exec_scatter>"
1083 [(set (mem:BLK (scratch))
1084 (unspec:BLK
1085 [(plus:<VnDI> (match_operand:<VnDI> 0 "register_operand" "v")
1086 (vec_duplicate:<VnDI>
1087 (match_operand 1 "immediate_operand" "n")))
1088 (match_operand:V_ALL 2 "register_operand" "v")
1089 (match_operand 3 "immediate_operand" "n")
1090 (match_operand 4 "immediate_operand" "n")]
1091 UNSPEC_SCATTER))]
1092 "(AS_FLAT_P (INTVAL (operands[3]))
1093 && (INTVAL(operands[1]) == 0
1094 || (TARGET_GCN5_PLUS
1095 && (unsigned HOST_WIDE_INT)INTVAL(operands[1]) < 0x1000)))
1096 || (AS_GLOBAL_P (INTVAL (operands[3]))
1097 && (((unsigned HOST_WIDE_INT)INTVAL(operands[1]) + 0x1000) < 0x2000))"
1098 {
1099 addr_space_t as = INTVAL (operands[3]);
1100 const char *glc = INTVAL (operands[4]) ? " glc" : "";
1101
1102 static char buf[200];
1103 if (AS_FLAT_P (as))
1104 {
1105 if (TARGET_GCN5_PLUS)
1106 sprintf (buf, "flat_store%%s2\t%%0, %%2 offset:%%1%s", glc);
1107 else
1108 sprintf (buf, "flat_store%%s2\t%%0, %%2%s", glc);
1109 }
1110 else if (AS_GLOBAL_P (as))
1111 sprintf (buf, "global_store%%s2\t%%0, %%2, off offset:%%1%s", glc);
1112 else
1113 gcc_unreachable ();
1114
1115 return buf;
1116 }
1117 [(set_attr "type" "flat")
1118 (set_attr "length" "12")])
1119
1120 (define_insn "scatter<mode>_insn_1offset_ds<exec_scatter>"
1121 [(set (mem:BLK (scratch))
1122 (unspec:BLK
1123 [(plus:<VnSI> (match_operand:<VnSI> 0 "register_operand" "v")
1124 (vec_duplicate:<VnSI>
1125 (match_operand 1 "immediate_operand" "n")))
1126 (match_operand:V_ALL 2 "register_operand" "v")
1127 (match_operand 3 "immediate_operand" "n")
1128 (match_operand 4 "immediate_operand" "n")]
1129 UNSPEC_SCATTER))]
1130 "(AS_ANY_DS_P (INTVAL (operands[3]))
1131 && ((unsigned HOST_WIDE_INT)INTVAL(operands[1]) < 0x10000))"
1132 {
1133 addr_space_t as = INTVAL (operands[3]);
1134 static char buf[200];
1135 sprintf (buf, "ds_write%%b2\t%%0, %%2 offset:%%1%s\;s_waitcnt\tlgkmcnt(0)",
1136 (AS_GDS_P (as) ? " gds" : ""));
1137 return buf;
1138 }
1139 [(set_attr "type" "ds")
1140 (set_attr "length" "12")])
1141
1142 (define_insn "scatter<mode>_insn_2offsets<exec_scatter>"
1143 [(set (mem:BLK (scratch))
1144 (unspec:BLK
1145 [(plus:<VnDI>
1146 (plus:<VnDI>
1147 (vec_duplicate:<VnDI>
1148 (match_operand:DI 0 "register_operand" "Sv"))
1149 (sign_extend:<VnDI>
1150 (match_operand:<VnSI> 1 "register_operand" " v")))
1151 (vec_duplicate:<VnDI> (match_operand 2 "immediate_operand" " n")))
1152 (match_operand:V_ALL 3 "register_operand" " v")
1153 (match_operand 4 "immediate_operand" " n")
1154 (match_operand 5 "immediate_operand" " n")]
1155 UNSPEC_SCATTER))]
1156 "(AS_GLOBAL_P (INTVAL (operands[4]))
1157 && (((unsigned HOST_WIDE_INT)INTVAL(operands[2]) + 0x1000) < 0x2000))"
1158 {
1159 addr_space_t as = INTVAL (operands[4]);
1160 const char *glc = INTVAL (operands[5]) ? " glc" : "";
1161
1162 static char buf[200];
1163 if (AS_GLOBAL_P (as))
1164 sprintf (buf, "global_store%%s3\t%%1, %%3, %%0 offset:%%2%s", glc);
1165 else
1166 gcc_unreachable ();
1167
1168 return buf;
1169 }
1170 [(set_attr "type" "flat")
1171 (set_attr "length" "12")])
1172
1173 ;; }}}
1174 ;; {{{ Permutations
1175
1176 (define_insn "ds_bpermute<mode>"
1177 [(set (match_operand:V_1REG 0 "register_operand" "=v")
1178 (unspec:V_1REG
1179 [(match_operand:V_1REG 2 "register_operand" " v")
1180 (match_operand:<VnSI> 1 "register_operand" " v")
1181 (match_operand:DI 3 "gcn_exec_reg_operand" " e")]
1182 UNSPEC_BPERMUTE))]
1183 ""
1184 "ds_bpermute_b32\t%0, %1, %2\;s_waitcnt\tlgkmcnt(0)"
1185 [(set_attr "type" "vop2")
1186 (set_attr "length" "12")])
1187
1188 (define_insn_and_split "ds_bpermute<mode>"
1189 [(set (match_operand:V_2REG 0 "register_operand" "=&v")
1190 (unspec:V_2REG
1191 [(match_operand:V_2REG 2 "register_operand" " v0")
1192 (match_operand:<VnSI> 1 "register_operand" " v")
1193 (match_operand:DI 3 "gcn_exec_reg_operand" " e")]
1194 UNSPEC_BPERMUTE))]
1195 ""
1196 "#"
1197 "reload_completed"
1198 [(set (match_dup 4) (unspec:<VnSI>
1199 [(match_dup 6) (match_dup 1) (match_dup 3)]
1200 UNSPEC_BPERMUTE))
1201 (set (match_dup 5) (unspec:<VnSI>
1202 [(match_dup 7) (match_dup 1) (match_dup 3)]
1203 UNSPEC_BPERMUTE))]
1204 {
1205 operands[4] = gcn_operand_part (<MODE>mode, operands[0], 0);
1206 operands[5] = gcn_operand_part (<MODE>mode, operands[0], 1);
1207 operands[6] = gcn_operand_part (<MODE>mode, operands[2], 0);
1208 operands[7] = gcn_operand_part (<MODE>mode, operands[2], 1);
1209 }
1210 [(set_attr "type" "vmult")
1211 (set_attr "length" "24")])
1212
1213 (define_insn "@dpp_move<mode>"
1214 [(set (match_operand:V_noHI 0 "register_operand" "=v")
1215 (unspec:V_noHI
1216 [(match_operand:V_noHI 1 "register_operand" " v")
1217 (match_operand:SI 2 "const_int_operand" " n")]
1218 UNSPEC_MOV_DPP_SHR))]
1219 ""
1220 {
1221 return gcn_expand_dpp_shr_insn (<MODE>mode, "v_mov_b32",
1222 UNSPEC_MOV_DPP_SHR, INTVAL (operands[2]));
1223 }
1224 [(set_attr "type" "vop_dpp")
1225 (set_attr "length" "16")])
1226
1227 ;; }}}
1228 ;; {{{ ALU special case: add/sub
1229
1230 (define_insn "add<mode>3<exec_clobber>"
1231 [(set (match_operand:V_INT_1REG 0 "register_operand" "= v")
1232 (plus:V_INT_1REG
1233 (match_operand:V_INT_1REG 1 "register_operand" "% v")
1234 (match_operand:V_INT_1REG 2 "gcn_alu_operand" "vSvB")))
1235 (clobber (reg:DI VCC_REG))]
1236 ""
1237 "v_add%^_u32\t%0, vcc, %2, %1"
1238 [(set_attr "type" "vop2")
1239 (set_attr "length" "8")])
1240
1241 (define_insn "add<mode>3_dup<exec_clobber>"
1242 [(set (match_operand:V_INT_1REG 0 "register_operand" "= v")
1243 (plus:V_INT_1REG
1244 (vec_duplicate:V_INT_1REG
1245 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand" "SvB"))
1246 (match_operand:V_INT_1REG 1 "register_operand" " v")))
1247 (clobber (reg:DI VCC_REG))]
1248 ""
1249 "v_add%^_u32\t%0, vcc, %2, %1"
1250 [(set_attr "type" "vop2")
1251 (set_attr "length" "8")])
1252
1253 (define_insn "add<mode>3_vcc<exec_vcc>"
1254 [(set (match_operand:V_SI 0 "register_operand" "= v, v")
1255 (plus:V_SI
1256 (match_operand:V_SI 1 "register_operand" "% v, v")
1257 (match_operand:V_SI 2 "gcn_alu_operand" "vSvB,vSvB")))
1258 (set (match_operand:DI 3 "register_operand" "= cV, Sg")
1259 (ltu:DI (plus:V_SI (match_dup 1) (match_dup 2))
1260 (match_dup 1)))]
1261 ""
1262 "v_add%^_u32\t%0, %3, %2, %1"
1263 [(set_attr "type" "vop2,vop3b")
1264 (set_attr "length" "8")])
1265
1266 ; This pattern only changes the VCC bits when the corresponding lane is
1267 ; enabled, so the set must be described as an ior.
1268
1269 (define_insn "add<mode>3_vcc_dup<exec_vcc>"
1270 [(set (match_operand:V_SI 0 "register_operand" "= v, v")
1271 (plus:V_SI
1272 (vec_duplicate:V_SI
1273 (match_operand:SI 1 "gcn_alu_operand" "SvB,SvB"))
1274 (match_operand:V_SI 2 "register_operand" " v, v")))
1275 (set (match_operand:DI 3 "register_operand" "=cV, Sg")
1276 (ltu:DI (plus:V_SI (vec_duplicate:V_SI (match_dup 2))
1277 (match_dup 1))
1278 (vec_duplicate:V_SI (match_dup 2))))]
1279 ""
1280 "v_add%^_u32\t%0, %3, %2, %1"
1281 [(set_attr "type" "vop2,vop3b")
1282 (set_attr "length" "8,8")])
1283
1284 ; v_addc does not accept an SGPR because the VCC read already counts as an
1285 ; SGPR use and the number of SGPR operands is limited to 1. It does not
1286 ; accept "B" immediate constants due to a related bus conflict.
1287
1288 (define_insn "addc<mode>3<exec_vcc>"
1289 [(set (match_operand:V_SI 0 "register_operand" "=v, v")
1290 (plus:V_SI
1291 (plus:V_SI
1292 (vec_merge:V_SI
1293 (vec_duplicate:V_SI (const_int 1))
1294 (vec_duplicate:V_SI (const_int 0))
1295 (match_operand:DI 3 "register_operand" " cV,cVSv"))
1296 (match_operand:V_SI 1 "gcn_alu_operand" "% v, vA"))
1297 (match_operand:V_SI 2 "gcn_alu_operand" " vA, vA")))
1298 (set (match_operand:DI 4 "register_operand" "=cV,cVSg")
1299 (ior:DI (ltu:DI (plus:V_SI
1300 (plus:V_SI
1301 (vec_merge:V_SI
1302 (vec_duplicate:V_SI (const_int 1))
1303 (vec_duplicate:V_SI (const_int 0))
1304 (match_dup 3))
1305 (match_dup 1))
1306 (match_dup 2))
1307 (match_dup 2))
1308 (ltu:DI (plus:V_SI
1309 (vec_merge:V_SI
1310 (vec_duplicate:V_SI (const_int 1))
1311 (vec_duplicate:V_SI (const_int 0))
1312 (match_dup 3))
1313 (match_dup 1))
1314 (match_dup 1))))]
1315 ""
1316 "v_addc%^_u32\t%0, %4, %2, %1, %3"
1317 [(set_attr "type" "vop2,vop3b")
1318 (set_attr "length" "4,8")])
1319
1320 (define_insn "sub<mode>3<exec_clobber>"
1321 [(set (match_operand:V_INT_1REG 0 "register_operand" "= v, v")
1322 (minus:V_INT_1REG
1323 (match_operand:V_INT_1REG 1 "gcn_alu_operand" "vSvB, v")
1324 (match_operand:V_INT_1REG 2 "gcn_alu_operand" " v,vSvB")))
1325 (clobber (reg:DI VCC_REG))]
1326 ""
1327 "@
1328 v_sub%^_u32\t%0, vcc, %1, %2
1329 v_subrev%^_u32\t%0, vcc, %2, %1"
1330 [(set_attr "type" "vop2")
1331 (set_attr "length" "8,8")])
1332
1333 (define_insn "sub<mode>3_vcc<exec_vcc>"
1334 [(set (match_operand:V_SI 0 "register_operand" "= v, v, v, v")
1335 (minus:V_SI
1336 (match_operand:V_SI 1 "gcn_alu_operand" "vSvB,vSvB, v, v")
1337 (match_operand:V_SI 2 "gcn_alu_operand" " v, v,vSvB,vSvB")))
1338 (set (match_operand:DI 3 "register_operand" "= cV, Sg, cV, Sg")
1339 (gtu:DI (minus:V_SI (match_dup 1) (match_dup 2))
1340 (match_dup 1)))]
1341 ""
1342 "@
1343 v_sub%^_u32\t%0, %3, %1, %2
1344 v_sub%^_u32\t%0, %3, %1, %2
1345 v_subrev%^_u32\t%0, %3, %2, %1
1346 v_subrev%^_u32\t%0, %3, %2, %1"
1347 [(set_attr "type" "vop2,vop3b,vop2,vop3b")
1348 (set_attr "length" "8")])
1349
1350 ; v_subb does not accept an SGPR because the VCC read already counts as an
1351 ; SGPR use and the number of SGPR operands is limited to 1. It does not
1352 ; accept "B" immediate constants due to a related bus conflict.
1353
1354 (define_insn "subc<mode>3<exec_vcc>"
1355 [(set (match_operand:V_SI 0 "register_operand" "= v, v, v, v")
1356 (minus:V_SI
1357 (minus:V_SI
1358 (vec_merge:V_SI
1359 (vec_duplicate:V_SI (const_int 1))
1360 (vec_duplicate:V_SI (const_int 0))
1361 (match_operand:DI 3 "gcn_alu_operand" " cV,cVSv,cV,cVSv"))
1362 (match_operand:V_SI 1 "gcn_alu_operand" " vA, vA, v, vA"))
1363 (match_operand:V_SI 2 "gcn_alu_operand" " v, vA,vA, vA")))
1364 (set (match_operand:DI 4 "register_operand" "=cV,cVSg,cV,cVSg")
1365 (ior:DI (gtu:DI (minus:V_SI (minus:V_SI
1366 (vec_merge:V_SI
1367 (vec_duplicate:V_SI (const_int 1))
1368 (vec_duplicate:V_SI (const_int 0))
1369 (match_dup 3))
1370 (match_dup 1))
1371 (match_dup 2))
1372 (match_dup 2))
1373 (ltu:DI (minus:V_SI (vec_merge:V_SI
1374 (vec_duplicate:V_SI (const_int 1))
1375 (vec_duplicate:V_SI (const_int 0))
1376 (match_dup 3))
1377 (match_dup 1))
1378 (match_dup 1))))]
1379 ""
1380 "@
1381 v_subb%^_u32\t%0, %4, %1, %2, %3
1382 v_subb%^_u32\t%0, %4, %1, %2, %3
1383 v_subbrev%^_u32\t%0, %4, %2, %1, %3
1384 v_subbrev%^_u32\t%0, %4, %2, %1, %3"
1385 [(set_attr "type" "vop2,vop3b,vop2,vop3b")
1386 (set_attr "length" "4,8,4,8")])
1387
1388 (define_insn_and_split "add<mode>3"
1389 [(set (match_operand:V_DI 0 "register_operand" "= v")
1390 (plus:V_DI
1391 (match_operand:V_DI 1 "register_operand" "%vDb")
1392 (match_operand:V_DI 2 "gcn_alu_operand" " vDb")))
1393 (clobber (reg:DI VCC_REG))]
1394 ""
1395 "#"
1396 "gcn_can_split_p (<MODE>mode, operands[0])
1397 && gcn_can_split_p (<MODE>mode, operands[1])
1398 && gcn_can_split_p (<MODE>mode, operands[2])"
1399 [(const_int 0)]
1400 {
1401 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1402 emit_insn (gen_add<vnsi>3_vcc
1403 (gcn_operand_part (<MODE>mode, operands[0], 0),
1404 gcn_operand_part (<MODE>mode, operands[1], 0),
1405 gcn_operand_part (<MODE>mode, operands[2], 0),
1406 vcc));
1407 emit_insn (gen_addc<vnsi>3
1408 (gcn_operand_part (<MODE>mode, operands[0], 1),
1409 gcn_operand_part (<MODE>mode, operands[1], 1),
1410 gcn_operand_part (<MODE>mode, operands[2], 1),
1411 vcc, vcc));
1412 DONE;
1413 }
1414 [(set_attr "type" "vmult")
1415 (set_attr "length" "8")])
1416
1417 (define_insn_and_split "add<mode>3_exec"
1418 [(set (match_operand:V_DI 0 "register_operand" "= v")
1419 (vec_merge:V_DI
1420 (plus:V_DI
1421 (match_operand:V_DI 1 "register_operand" "%vDb")
1422 (match_operand:V_DI 2 "gcn_alu_operand" " vDb"))
1423 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0")
1424 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1425 (clobber (reg:DI VCC_REG))]
1426 ""
1427 "#"
1428 "gcn_can_split_p (<MODE>mode, operands[0])
1429 && gcn_can_split_p (<MODE>mode, operands[1])
1430 && gcn_can_split_p (<MODE>mode, operands[2])
1431 && gcn_can_split_p (<MODE>mode, operands[4])"
1432 [(const_int 0)]
1433 {
1434 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1435 emit_insn (gen_add<vnsi>3_vcc_exec
1436 (gcn_operand_part (<MODE>mode, operands[0], 0),
1437 gcn_operand_part (<MODE>mode, operands[1], 0),
1438 gcn_operand_part (<MODE>mode, operands[2], 0),
1439 vcc,
1440 gcn_operand_part (<MODE>mode, operands[3], 0),
1441 operands[4]));
1442 emit_insn (gen_addc<vnsi>3_exec
1443 (gcn_operand_part (<MODE>mode, operands[0], 1),
1444 gcn_operand_part (<MODE>mode, operands[1], 1),
1445 gcn_operand_part (<MODE>mode, operands[2], 1),
1446 vcc, vcc,
1447 gcn_operand_part (<MODE>mode, operands[3], 1),
1448 operands[4]));
1449 DONE;
1450 }
1451 [(set_attr "type" "vmult")
1452 (set_attr "length" "8")])
1453
1454 (define_insn_and_split "sub<mode>3"
1455 [(set (match_operand:V_DI 0 "register_operand" "= v, v")
1456 (minus:V_DI
1457 (match_operand:V_DI 1 "gcn_alu_operand" "vDb, v")
1458 (match_operand:V_DI 2 "gcn_alu_operand" " v,vDb")))
1459 (clobber (reg:DI VCC_REG))]
1460 ""
1461 "#"
1462 "gcn_can_split_p (<MODE>mode, operands[0])
1463 && gcn_can_split_p (<MODE>mode, operands[1])
1464 && gcn_can_split_p (<MODE>mode, operands[2])"
1465 [(const_int 0)]
1466 {
1467 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1468 emit_insn (gen_sub<vnsi>3_vcc
1469 (gcn_operand_part (<MODE>mode, operands[0], 0),
1470 gcn_operand_part (<MODE>mode, operands[1], 0),
1471 gcn_operand_part (<MODE>mode, operands[2], 0),
1472 vcc));
1473 emit_insn (gen_subc<vnsi>3
1474 (gcn_operand_part (<MODE>mode, operands[0], 1),
1475 gcn_operand_part (<MODE>mode, operands[1], 1),
1476 gcn_operand_part (<MODE>mode, operands[2], 1),
1477 vcc, vcc));
1478 DONE;
1479 }
1480 [(set_attr "type" "vmult")
1481 (set_attr "length" "8")])
1482
1483 (define_insn_and_split "sub<mode>3_exec"
1484 [(set (match_operand:V_DI 0 "register_operand" "= v, v")
1485 (vec_merge:V_DI
1486 (minus:V_DI
1487 (match_operand:V_DI 1 "gcn_alu_operand" "vSvB, v")
1488 (match_operand:V_DI 2 "gcn_alu_operand" " v,vSvB"))
1489 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0, U0")
1490 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))
1491 (clobber (reg:DI VCC_REG))]
1492 "register_operand (operands[1], VOIDmode)
1493 || register_operand (operands[2], VOIDmode)"
1494 "#"
1495 "gcn_can_split_p (<MODE>mode, operands[0])
1496 && gcn_can_split_p (<MODE>mode, operands[1])
1497 && gcn_can_split_p (<MODE>mode, operands[2])
1498 && gcn_can_split_p (<MODE>mode, operands[3])"
1499 [(const_int 0)]
1500 {
1501 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1502 emit_insn (gen_sub<vnsi>3_vcc_exec
1503 (gcn_operand_part (<MODE>mode, operands[0], 0),
1504 gcn_operand_part (<MODE>mode, operands[1], 0),
1505 gcn_operand_part (<MODE>mode, operands[2], 0),
1506 vcc,
1507 gcn_operand_part (<MODE>mode, operands[3], 0),
1508 operands[4]));
1509 emit_insn (gen_subc<vnsi>3_exec
1510 (gcn_operand_part (<MODE>mode, operands[0], 1),
1511 gcn_operand_part (<MODE>mode, operands[1], 1),
1512 gcn_operand_part (<MODE>mode, operands[2], 1),
1513 vcc, vcc,
1514 gcn_operand_part (<MODE>mode, operands[3], 1),
1515 operands[4]));
1516 DONE;
1517 }
1518 [(set_attr "type" "vmult")
1519 (set_attr "length" "8")])
1520
1521 (define_insn_and_split "add<mode>3_zext"
1522 [(set (match_operand:V_DI 0 "register_operand" "= v, v")
1523 (plus:V_DI
1524 (zero_extend:V_DI
1525 (match_operand:<VnSI> 1 "gcn_alu_operand" " vA, vB"))
1526 (match_operand:V_DI 2 "gcn_alu_operand" "vDb,vDA")))
1527 (clobber (reg:DI VCC_REG))]
1528 ""
1529 "#"
1530 "gcn_can_split_p (<MODE>mode, operands[0])
1531 && gcn_can_split_p (<MODE>mode, operands[2])"
1532 [(const_int 0)]
1533 {
1534 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1535 emit_insn (gen_add<vnsi>3_vcc
1536 (gcn_operand_part (<MODE>mode, operands[0], 0),
1537 operands[1],
1538 gcn_operand_part (<MODE>mode, operands[2], 0),
1539 vcc));
1540 emit_insn (gen_addc<vnsi>3
1541 (gcn_operand_part (<MODE>mode, operands[0], 1),
1542 gcn_operand_part (<MODE>mode, operands[2], 1),
1543 const0_rtx, vcc, vcc));
1544 DONE;
1545 }
1546 [(set_attr "type" "vmult")
1547 (set_attr "length" "8")])
1548
1549 (define_insn_and_split "add<mode>3_zext_exec"
1550 [(set (match_operand:V_DI 0 "register_operand" "= v, v")
1551 (vec_merge:V_DI
1552 (plus:V_DI
1553 (zero_extend:V_DI
1554 (match_operand:<VnSI> 1 "gcn_alu_operand" " vA, vB"))
1555 (match_operand:V_DI 2 "gcn_alu_operand" "vDb,vDA"))
1556 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0, U0")
1557 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))
1558 (clobber (reg:DI VCC_REG))]
1559 ""
1560 "#"
1561 "gcn_can_split_p (<MODE>mode, operands[0])
1562 && gcn_can_split_p (<MODE>mode, operands[2])
1563 && gcn_can_split_p (<MODE>mode, operands[3])"
1564 [(const_int 0)]
1565 {
1566 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1567 emit_insn (gen_add<vnsi>3_vcc_exec
1568 (gcn_operand_part (<MODE>mode, operands[0], 0),
1569 operands[1],
1570 gcn_operand_part (<MODE>mode, operands[2], 0),
1571 vcc,
1572 gcn_operand_part (<MODE>mode, operands[3], 0),
1573 operands[4]));
1574 emit_insn (gen_addc<vnsi>3_exec
1575 (gcn_operand_part (<MODE>mode, operands[0], 1),
1576 gcn_operand_part (<MODE>mode, operands[2], 1),
1577 const0_rtx, vcc, vcc,
1578 gcn_operand_part (<MODE>mode, operands[3], 1),
1579 operands[4]));
1580 DONE;
1581 }
1582 [(set_attr "type" "vmult")
1583 (set_attr "length" "8")])
1584
1585 (define_insn_and_split "add<mode>3_vcc_zext_dup"
1586 [(set (match_operand:V_DI 0 "register_operand" "= v, v")
1587 (plus:V_DI
1588 (zero_extend:V_DI
1589 (vec_duplicate:<VnSI>
1590 (match_operand:SI 1 "gcn_alu_operand" " BSv, ASv")))
1591 (match_operand:V_DI 2 "gcn_alu_operand" " vDA, vDb")))
1592 (set (match_operand:DI 3 "register_operand" "=&SgcV,&SgcV")
1593 (ltu:DI (plus:V_DI
1594 (zero_extend:V_DI (vec_duplicate:<VnSI> (match_dup 1)))
1595 (match_dup 2))
1596 (match_dup 1)))]
1597 ""
1598 "#"
1599 "gcn_can_split_p (<MODE>mode, operands[0])
1600 && gcn_can_split_p (<MODE>mode, operands[2])"
1601 [(const_int 0)]
1602 {
1603 emit_insn (gen_add<vnsi>3_vcc_dup
1604 (gcn_operand_part (<MODE>mode, operands[0], 0),
1605 gcn_operand_part (DImode, operands[1], 0),
1606 gcn_operand_part (<MODE>mode, operands[2], 0),
1607 operands[3]));
1608 emit_insn (gen_addc<vnsi>3
1609 (gcn_operand_part (<MODE>mode, operands[0], 1),
1610 gcn_operand_part (<MODE>mode, operands[2], 1),
1611 const0_rtx, operands[3], operands[3]));
1612 DONE;
1613 }
1614 [(set_attr "type" "vmult")
1615 (set_attr "length" "8")])
1616
1617 (define_expand "add<mode>3_zext_dup"
1618 [(match_operand:V_DI 0 "register_operand")
1619 (match_operand:SI 1 "gcn_alu_operand")
1620 (match_operand:V_DI 2 "gcn_alu_operand")]
1621 ""
1622 {
1623 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1624 emit_insn (gen_add<mode>3_vcc_zext_dup (operands[0], operands[1],
1625 operands[2], vcc));
1626 DONE;
1627 })
1628
1629 (define_insn_and_split "add<mode>3_vcc_zext_dup_exec"
1630 [(set (match_operand:V_DI 0 "register_operand" "= v, v")
1631 (vec_merge:V_DI
1632 (plus:V_DI
1633 (zero_extend:V_DI
1634 (vec_duplicate:<VnSI>
1635 (match_operand:SI 1 "gcn_alu_operand" " ASv, BSv")))
1636 (match_operand:V_DI 2 "gcn_alu_operand" " vDb, vDA"))
1637 (match_operand:V_DI 4 "gcn_register_or_unspec_operand" " U0, U0")
1638 (match_operand:DI 5 "gcn_exec_reg_operand" " e, e")))
1639 (set (match_operand:DI 3 "register_operand" "=&SgcV,&SgcV")
1640 (and:DI
1641 (ltu:DI (plus:V_DI
1642 (zero_extend:V_DI (vec_duplicate:<VnSI> (match_dup 1)))
1643 (match_dup 2))
1644 (match_dup 1))
1645 (match_dup 5)))]
1646 ""
1647 "#"
1648 "gcn_can_split_p (<MODE>mode, operands[0])
1649 && gcn_can_split_p (<MODE>mode, operands[2])
1650 && gcn_can_split_p (<MODE>mode, operands[4])"
1651 [(const_int 0)]
1652 {
1653 emit_insn (gen_add<vnsi>3_vcc_dup_exec
1654 (gcn_operand_part (<MODE>mode, operands[0], 0),
1655 gcn_operand_part (DImode, operands[1], 0),
1656 gcn_operand_part (<MODE>mode, operands[2], 0),
1657 operands[3],
1658 gcn_operand_part (<MODE>mode, operands[4], 0),
1659 operands[5]));
1660 emit_insn (gen_addc<vnsi>3_exec
1661 (gcn_operand_part (<MODE>mode, operands[0], 1),
1662 gcn_operand_part (<MODE>mode, operands[2], 1),
1663 const0_rtx, operands[3], operands[3],
1664 gcn_operand_part (<MODE>mode, operands[4], 1),
1665 operands[5]));
1666 DONE;
1667 }
1668 [(set_attr "type" "vmult")
1669 (set_attr "length" "8")])
1670
1671 (define_expand "add<mode>3_zext_dup_exec"
1672 [(match_operand:V_DI 0 "register_operand")
1673 (match_operand:SI 1 "gcn_alu_operand")
1674 (match_operand:V_DI 2 "gcn_alu_operand")
1675 (match_operand:V_DI 3 "gcn_register_or_unspec_operand")
1676 (match_operand:DI 4 "gcn_exec_reg_operand")]
1677 ""
1678 {
1679 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1680 emit_insn (gen_add<mode>3_vcc_zext_dup_exec (operands[0], operands[1],
1681 operands[2], vcc, operands[3],
1682 operands[4]));
1683 DONE;
1684 })
1685
1686 (define_insn_and_split "add<mode>3_vcc_zext_dup2"
1687 [(set (match_operand:V_DI 0 "register_operand" "= v")
1688 (plus:V_DI
1689 (zero_extend:V_DI (match_operand:<VnSI> 1 "gcn_alu_operand" " vA"))
1690 (vec_duplicate:V_DI (match_operand:DI 2 "gcn_alu_operand" " DbSv"))))
1691 (set (match_operand:DI 3 "register_operand" "=&SgcV")
1692 (ltu:DI (plus:V_DI
1693 (zero_extend:V_DI (match_dup 1))
1694 (vec_duplicate:V_DI (match_dup 2)))
1695 (match_dup 1)))]
1696 ""
1697 "#"
1698 "gcn_can_split_p (<MODE>mode, operands[0])"
1699 [(const_int 0)]
1700 {
1701 emit_insn (gen_add<vnsi>3_vcc_dup
1702 (gcn_operand_part (<MODE>mode, operands[0], 0),
1703 gcn_operand_part (DImode, operands[2], 0),
1704 operands[1],
1705 operands[3]));
1706 rtx dsthi = gcn_operand_part (<MODE>mode, operands[0], 1);
1707 emit_insn (gen_vec_duplicate<vnsi>
1708 (dsthi, gcn_operand_part (DImode, operands[2], 1)));
1709 emit_insn (gen_addc<vnsi>3 (dsthi, dsthi, const0_rtx, operands[3],
1710 operands[3]));
1711 DONE;
1712 }
1713 [(set_attr "type" "vmult")
1714 (set_attr "length" "8")])
1715
1716 (define_expand "add<mode>3_zext_dup2"
1717 [(match_operand:V_DI 0 "register_operand")
1718 (match_operand:<VnSI> 1 "gcn_alu_operand")
1719 (match_operand:DI 2 "gcn_alu_operand")]
1720 ""
1721 {
1722 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1723 emit_insn (gen_add<mode>3_vcc_zext_dup2 (operands[0], operands[1],
1724 operands[2], vcc));
1725 DONE;
1726 })
1727
1728 (define_insn_and_split "add<mode>3_vcc_zext_dup2_exec"
1729 [(set (match_operand:V_DI 0 "register_operand" "= v")
1730 (vec_merge:V_DI
1731 (plus:V_DI
1732 (zero_extend:V_DI (match_operand:<VnSI> 1 "gcn_alu_operand" "vA"))
1733 (vec_duplicate:V_DI (match_operand:DI 2 "gcn_alu_operand" "BSv")))
1734 (match_operand:V_DI 4 "gcn_register_or_unspec_operand" " U0")
1735 (match_operand:DI 5 "gcn_exec_reg_operand" " e")))
1736 (set (match_operand:DI 3 "register_operand" "=&SgcV")
1737 (and:DI
1738 (ltu:DI (plus:V_DI
1739 (zero_extend:V_DI (match_dup 1))
1740 (vec_duplicate:V_DI (match_dup 2)))
1741 (match_dup 1))
1742 (match_dup 5)))]
1743 ""
1744 "#"
1745 "gcn_can_split_p (<MODE>mode, operands[0])
1746 && gcn_can_split_p (<MODE>mode, operands[4])"
1747 [(const_int 0)]
1748 {
1749 emit_insn (gen_add<vnsi>3_vcc_dup_exec
1750 (gcn_operand_part (<MODE>mode, operands[0], 0),
1751 gcn_operand_part (DImode, operands[2], 0),
1752 operands[1],
1753 operands[3],
1754 gcn_operand_part (<MODE>mode, operands[4], 0),
1755 operands[5]));
1756 rtx dsthi = gcn_operand_part (<MODE>mode, operands[0], 1);
1757 emit_insn (gen_vec_duplicate<vnsi>_exec
1758 (dsthi, gcn_operand_part (DImode, operands[2], 1),
1759 gcn_operand_part (<MODE>mode, operands[4], 1),
1760 operands[5]));
1761 emit_insn (gen_addc<vnsi>3_exec
1762 (dsthi, dsthi, const0_rtx, operands[3], operands[3],
1763 gcn_operand_part (<MODE>mode, operands[4], 1),
1764 operands[5]));
1765 DONE;
1766 }
1767 [(set_attr "type" "vmult")
1768 (set_attr "length" "8")])
1769
1770 (define_expand "add<mode>3_zext_dup2_exec"
1771 [(match_operand:V_DI 0 "register_operand")
1772 (match_operand:<VnSI> 1 "gcn_alu_operand")
1773 (match_operand:DI 2 "gcn_alu_operand")
1774 (match_operand:V_DI 3 "gcn_register_or_unspec_operand")
1775 (match_operand:DI 4 "gcn_exec_reg_operand")]
1776 ""
1777 {
1778 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1779 emit_insn (gen_add<mode>3_vcc_zext_dup2_exec (operands[0], operands[1],
1780 operands[2], vcc,
1781 operands[3], operands[4]));
1782 DONE;
1783 })
1784
1785 (define_insn_and_split "add<mode>3_sext_dup2"
1786 [(set (match_operand:V_DI 0 "register_operand" "= v")
1787 (plus:V_DI
1788 (sign_extend:V_DI (match_operand:<VnSI> 1 "gcn_alu_operand" " vA"))
1789 (vec_duplicate:V_DI (match_operand:DI 2 "gcn_alu_operand" "BSv"))))
1790 (clobber (match_scratch:<VnSI> 3 "=&v"))
1791 (clobber (reg:DI VCC_REG))]
1792 ""
1793 "#"
1794 "gcn_can_split_p (<MODE>mode, operands[0])"
1795 [(const_int 0)]
1796 {
1797 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1798 emit_insn (gen_ashr<vnsi>3 (operands[3], operands[1], GEN_INT (31)));
1799 emit_insn (gen_add<vnsi>3_vcc_dup
1800 (gcn_operand_part (<MODE>mode, operands[0], 0),
1801 gcn_operand_part (DImode, operands[2], 0),
1802 operands[1],
1803 vcc));
1804 rtx dsthi = gcn_operand_part (<MODE>mode, operands[0], 1);
1805 emit_insn (gen_vec_duplicate<vnsi>
1806 (dsthi, gcn_operand_part (DImode, operands[2], 1)));
1807 emit_insn (gen_addc<vnsi>3 (dsthi, dsthi, operands[3], vcc, vcc));
1808 DONE;
1809 }
1810 [(set_attr "type" "vmult")
1811 (set_attr "length" "8")])
1812
1813 (define_insn_and_split "add<mode>3_sext_dup2_exec"
1814 [(set (match_operand:V_DI 0 "register_operand" "= v")
1815 (vec_merge:V_DI
1816 (plus:V_DI
1817 (sign_extend:V_DI (match_operand:<VnSI> 1 "gcn_alu_operand" "vA"))
1818 (vec_duplicate:V_DI (match_operand:DI 2 "gcn_alu_operand" "BSv")))
1819 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0")
1820 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1821 (clobber (match_scratch:<VnSI> 5 "=&v"))
1822 (clobber (reg:DI VCC_REG))]
1823 ""
1824 "#"
1825 "gcn_can_split_p (<MODE>mode, operands[0])
1826 && gcn_can_split_p (<MODE>mode, operands[3])"
1827 [(const_int 0)]
1828 {
1829 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1830 emit_insn (gen_ashr<vnsi>3_exec (operands[5], operands[1], GEN_INT (31),
1831 gcn_gen_undef (<VnSI>mode), operands[4]));
1832 emit_insn (gen_add<vnsi>3_vcc_dup_exec
1833 (gcn_operand_part (<MODE>mode, operands[0], 0),
1834 gcn_operand_part (DImode, operands[2], 0),
1835 operands[1],
1836 vcc,
1837 gcn_operand_part (<MODE>mode, operands[3], 0),
1838 operands[4]));
1839 rtx dsthi = gcn_operand_part (<MODE>mode, operands[0], 1);
1840 emit_insn (gen_vec_duplicate<vnsi>_exec
1841 (dsthi, gcn_operand_part (DImode, operands[2], 1),
1842 gcn_operand_part (<MODE>mode, operands[3], 1),
1843 operands[4]));
1844 emit_insn (gen_addc<vnsi>3_exec
1845 (dsthi, dsthi, operands[5], vcc, vcc,
1846 gcn_operand_part (<MODE>mode, operands[3], 1),
1847 operands[4]));
1848 DONE;
1849 }
1850 [(set_attr "type" "vmult")
1851 (set_attr "length" "8")])
1852
1853 ;; }}}
1854 ;; {{{ DS memory ALU: add/sub
1855
1856 (define_mode_iterator DS_ARITH_MODE [V64SI V64SF V64DI])
1857 (define_mode_iterator DS_ARITH_SCALAR_MODE [SI SF DI])
1858
1859 ;; FIXME: the vector patterns probably need RD expanded to a vector of
1860 ;; addresses. For now, the only way a vector can get into LDS is
1861 ;; if the user puts it there manually.
1862 ;;
1863 ;; FIXME: the scalar patterns are probably fine in themselves, but need to be
1864 ;; checked to see if anything can ever use them.
1865
1866 (define_insn "add<mode>3_ds<exec>"
1867 [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD")
1868 (plus:DS_ARITH_MODE
1869 (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" "%RD")
1870 (match_operand:DS_ARITH_MODE 2 "register_operand" " v")))]
1871 "rtx_equal_p (operands[0], operands[1])"
1872 "ds_add%u0\t%A0, %2%O0"
1873 [(set_attr "type" "ds")
1874 (set_attr "length" "8")])
1875
1876 (define_insn "add<mode>3_ds_scalar"
1877 [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
1878 (plus:DS_ARITH_SCALAR_MODE
1879 (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand"
1880 "%RD")
1881 (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")))]
1882 "rtx_equal_p (operands[0], operands[1])"
1883 "ds_add%u0\t%A0, %2%O0"
1884 [(set_attr "type" "ds")
1885 (set_attr "length" "8")])
1886
1887 (define_insn "sub<mode>3_ds<exec>"
1888 [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD")
1889 (minus:DS_ARITH_MODE
1890 (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" " RD")
1891 (match_operand:DS_ARITH_MODE 2 "register_operand" " v")))]
1892 "rtx_equal_p (operands[0], operands[1])"
1893 "ds_sub%u0\t%A0, %2%O0"
1894 [(set_attr "type" "ds")
1895 (set_attr "length" "8")])
1896
1897 (define_insn "sub<mode>3_ds_scalar"
1898 [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
1899 (minus:DS_ARITH_SCALAR_MODE
1900 (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand"
1901 " RD")
1902 (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")))]
1903 "rtx_equal_p (operands[0], operands[1])"
1904 "ds_sub%u0\t%A0, %2%O0"
1905 [(set_attr "type" "ds")
1906 (set_attr "length" "8")])
1907
1908 (define_insn "subr<mode>3_ds<exec>"
1909 [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD")
1910 (minus:DS_ARITH_MODE
1911 (match_operand:DS_ARITH_MODE 2 "register_operand" " v")
1912 (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" " RD")))]
1913 "rtx_equal_p (operands[0], operands[1])"
1914 "ds_rsub%u0\t%A0, %2%O0"
1915 [(set_attr "type" "ds")
1916 (set_attr "length" "8")])
1917
1918 (define_insn "subr<mode>3_ds_scalar"
1919 [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
1920 (minus:DS_ARITH_SCALAR_MODE
1921 (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")
1922 (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand"
1923 " RD")))]
1924 "rtx_equal_p (operands[0], operands[1])"
1925 "ds_rsub%u0\t%A0, %2%O0"
1926 [(set_attr "type" "ds")
1927 (set_attr "length" "8")])
1928
1929 ;; }}}
1930 ;; {{{ ALU special case: mult
1931
1932 (define_insn "<su>mul<mode>3_highpart<exec>"
1933 [(set (match_operand:V_SI 0 "register_operand" "= v")
1934 (truncate:V_SI
1935 (lshiftrt:<VnDI>
1936 (mult:<VnDI>
1937 (any_extend:<VnDI>
1938 (match_operand:V_SI 1 "gcn_alu_operand" " %v"))
1939 (any_extend:<VnDI>
1940 (match_operand:V_SI 2 "gcn_alu_operand" "vSvA")))
1941 (const_int 32))))]
1942 ""
1943 "v_mul_hi<sgnsuffix>0\t%0, %2, %1"
1944 [(set_attr "type" "vop3a")
1945 (set_attr "length" "8")])
1946
1947 (define_insn "mul<mode>3<exec>"
1948 [(set (match_operand:V_INT_1REG 0 "register_operand" "= v")
1949 (mult:V_INT_1REG
1950 (match_operand:V_INT_1REG 1 "gcn_alu_operand" "%vSvA")
1951 (match_operand:V_INT_1REG 2 "gcn_alu_operand" " vSvA")))]
1952 ""
1953 "v_mul_lo_u32\t%0, %1, %2"
1954 [(set_attr "type" "vop3a")
1955 (set_attr "length" "8")])
1956
1957 (define_insn "mul<mode>3_dup<exec>"
1958 [(set (match_operand:V_INT_1REG 0 "register_operand" "= v")
1959 (mult:V_INT_1REG
1960 (match_operand:V_INT_1REG 1 "gcn_alu_operand" "%vSvA")
1961 (vec_duplicate:V_INT_1REG
1962 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand" " SvA"))))]
1963 ""
1964 "v_mul_lo_u32\t%0, %1, %2"
1965 [(set_attr "type" "vop3a")
1966 (set_attr "length" "8")])
1967
1968 (define_insn_and_split "mul<mode>3"
1969 [(set (match_operand:V_DI 0 "register_operand" "=&v")
1970 (mult:V_DI
1971 (match_operand:V_DI 1 "gcn_alu_operand" "% v")
1972 (match_operand:V_DI 2 "gcn_alu_operand" "vDA")))
1973 (clobber (match_scratch:<VnSI> 3 "=&v"))]
1974 ""
1975 "#"
1976 "reload_completed"
1977 [(const_int 0)]
1978 {
1979 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
1980 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
1981 rtx left_lo = gcn_operand_part (<MODE>mode, operands[1], 0);
1982 rtx left_hi = gcn_operand_part (<MODE>mode, operands[1], 1);
1983 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
1984 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
1985 rtx tmp = operands[3];
1986
1987 emit_insn (gen_mul<vnsi>3 (out_lo, left_lo, right_lo));
1988 emit_insn (gen_umul<vnsi>3_highpart (out_hi, left_lo, right_lo));
1989 emit_insn (gen_mul<vnsi>3 (tmp, left_hi, right_lo));
1990 emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp));
1991 emit_insn (gen_mul<vnsi>3 (tmp, left_lo, right_hi));
1992 emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp));
1993 emit_insn (gen_mul<vnsi>3 (tmp, left_hi, right_hi));
1994 emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp));
1995 DONE;
1996 })
1997
1998 (define_insn_and_split "mul<mode>3_exec"
1999 [(set (match_operand:V_DI 0 "register_operand" "=&v")
2000 (vec_merge:V_DI
2001 (mult:V_DI
2002 (match_operand:V_DI 1 "gcn_alu_operand" "% v")
2003 (match_operand:V_DI 2 "gcn_alu_operand" "vDA"))
2004 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0")
2005 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
2006 (clobber (match_scratch:<VnSI> 5 "=&v"))]
2007 ""
2008 "#"
2009 "reload_completed"
2010 [(const_int 0)]
2011 {
2012 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
2013 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
2014 rtx left_lo = gcn_operand_part (<MODE>mode, operands[1], 0);
2015 rtx left_hi = gcn_operand_part (<MODE>mode, operands[1], 1);
2016 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
2017 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
2018 rtx exec = operands[4];
2019 rtx tmp = operands[5];
2020
2021 rtx old_lo, old_hi;
2022 if (GET_CODE (operands[3]) == UNSPEC)
2023 {
2024 old_lo = old_hi = gcn_gen_undef (<VnSI>mode);
2025 }
2026 else
2027 {
2028 old_lo = gcn_operand_part (<MODE>mode, operands[3], 0);
2029 old_hi = gcn_operand_part (<MODE>mode, operands[3], 1);
2030 }
2031
2032 rtx undef = gcn_gen_undef (<VnSI>mode);
2033
2034 emit_insn (gen_mul<vnsi>3_exec (out_lo, left_lo, right_lo, old_lo, exec));
2035 emit_insn (gen_umul<vnsi>3_highpart_exec (out_hi, left_lo, right_lo,
2036 old_hi, exec));
2037 emit_insn (gen_mul<vnsi>3_exec (tmp, left_hi, right_lo, undef, exec));
2038 emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec));
2039 emit_insn (gen_mul<vnsi>3_exec (tmp, left_lo, right_hi, undef, exec));
2040 emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec));
2041 emit_insn (gen_mul<vnsi>3_exec (tmp, left_hi, right_hi, undef, exec));
2042 emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec));
2043 DONE;
2044 })
2045
2046 (define_insn_and_split "mul<mode>3_zext"
2047 [(set (match_operand:V_DI 0 "register_operand" "=&v")
2048 (mult:V_DI
2049 (zero_extend:V_DI
2050 (match_operand:<VnSI> 1 "gcn_alu_operand" " v"))
2051 (match_operand:V_DI 2 "gcn_alu_operand" "vDA")))
2052 (clobber (match_scratch:<VnSI> 3 "=&v"))]
2053 ""
2054 "#"
2055 "reload_completed"
2056 [(const_int 0)]
2057 {
2058 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
2059 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
2060 rtx left = operands[1];
2061 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
2062 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
2063 rtx tmp = operands[3];
2064
2065 emit_insn (gen_mul<vnsi>3 (out_lo, left, right_lo));
2066 emit_insn (gen_umul<vnsi>3_highpart (out_hi, left, right_lo));
2067 emit_insn (gen_mul<vnsi>3 (tmp, left, right_hi));
2068 emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp));
2069 DONE;
2070 })
2071
2072 (define_insn_and_split "mul<mode>3_zext_exec"
2073 [(set (match_operand:V_DI 0 "register_operand" "=&v")
2074 (vec_merge:V_DI
2075 (mult:V_DI
2076 (zero_extend:V_DI
2077 (match_operand:<VnSI> 1 "gcn_alu_operand" " v"))
2078 (match_operand:V_DI 2 "gcn_alu_operand" "vDA"))
2079 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0")
2080 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
2081 (clobber (match_scratch:<VnSI> 5 "=&v"))]
2082 ""
2083 "#"
2084 "reload_completed"
2085 [(const_int 0)]
2086 {
2087 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
2088 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
2089 rtx left = operands[1];
2090 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
2091 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
2092 rtx exec = operands[4];
2093 rtx tmp = operands[5];
2094
2095 rtx old_lo, old_hi;
2096 if (GET_CODE (operands[3]) == UNSPEC)
2097 {
2098 old_lo = old_hi = gcn_gen_undef (<VnSI>mode);
2099 }
2100 else
2101 {
2102 old_lo = gcn_operand_part (<MODE>mode, operands[3], 0);
2103 old_hi = gcn_operand_part (<MODE>mode, operands[3], 1);
2104 }
2105
2106 rtx undef = gcn_gen_undef (<VnSI>mode);
2107
2108 emit_insn (gen_mul<vnsi>3_exec (out_lo, left, right_lo, old_lo, exec));
2109 emit_insn (gen_umul<vnsi>3_highpart_exec (out_hi, left, right_lo,
2110 old_hi, exec));
2111 emit_insn (gen_mul<vnsi>3_exec (tmp, left, right_hi, undef, exec));
2112 emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec));
2113 DONE;
2114 })
2115
2116 (define_insn_and_split "mul<mode>3_zext_dup2"
2117 [(set (match_operand:V_DI 0 "register_operand" "= &v")
2118 (mult:V_DI
2119 (zero_extend:V_DI
2120 (match_operand:<VnSI> 1 "gcn_alu_operand" " v"))
2121 (vec_duplicate:V_DI
2122 (match_operand:DI 2 "gcn_alu_operand" "SvDA"))))
2123 (clobber (match_scratch:<VnSI> 3 "= &v"))]
2124 ""
2125 "#"
2126 "reload_completed"
2127 [(const_int 0)]
2128 {
2129 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
2130 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
2131 rtx left = operands[1];
2132 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
2133 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
2134 rtx tmp = operands[3];
2135
2136 emit_insn (gen_mul<vnsi>3 (out_lo, left, right_lo));
2137 emit_insn (gen_umul<vnsi>3_highpart (out_hi, left, right_lo));
2138 emit_insn (gen_mul<vnsi>3 (tmp, left, right_hi));
2139 emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp));
2140 DONE;
2141 })
2142
2143 (define_insn_and_split "mul<mode>3_zext_dup2_exec"
2144 [(set (match_operand:V_DI 0 "register_operand" "= &v")
2145 (vec_merge:V_DI
2146 (mult:V_DI
2147 (zero_extend:V_DI
2148 (match_operand:<VnSI> 1 "gcn_alu_operand" " v"))
2149 (vec_duplicate:V_DI
2150 (match_operand:DI 2 "gcn_alu_operand" "SvDA")))
2151 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0")
2152 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
2153 (clobber (match_scratch:<VnSI> 5 "= &v"))]
2154 ""
2155 "#"
2156 "reload_completed"
2157 [(const_int 0)]
2158 {
2159 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
2160 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
2161 rtx left = operands[1];
2162 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
2163 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
2164 rtx exec = operands[4];
2165 rtx tmp = operands[5];
2166
2167 rtx old_lo, old_hi;
2168 if (GET_CODE (operands[3]) == UNSPEC)
2169 {
2170 old_lo = old_hi = gcn_gen_undef (<VnSI>mode);
2171 }
2172 else
2173 {
2174 old_lo = gcn_operand_part (<MODE>mode, operands[3], 0);
2175 old_hi = gcn_operand_part (<MODE>mode, operands[3], 1);
2176 }
2177
2178 rtx undef = gcn_gen_undef (<VnSI>mode);
2179
2180 emit_insn (gen_mul<vnsi>3_exec (out_lo, left, right_lo, old_lo, exec));
2181 emit_insn (gen_umul<vnsi>3_highpart_exec (out_hi, left, right_lo,
2182 old_hi, exec));
2183 emit_insn (gen_mul<vnsi>3_exec (tmp, left, right_hi, undef, exec));
2184 emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec));
2185 DONE;
2186 })
2187
2188 ;; }}}
2189 ;; {{{ ALU generic case
2190
2191 (define_code_iterator bitop [and ior xor])
2192 (define_code_iterator shiftop [ashift lshiftrt ashiftrt])
2193 (define_code_iterator minmaxop [smin smax umin umax])
2194
2195 (define_insn "<expander><mode>2<exec>"
2196 [(set (match_operand:V_INT_1REG 0 "gcn_valu_dst_operand" "= v")
2197 (bitunop:V_INT_1REG
2198 (match_operand:V_INT_1REG 1 "gcn_valu_src0_operand" "vSvB")))]
2199 ""
2200 "v_<mnemonic>0\t%0, %1"
2201 [(set_attr "type" "vop1")
2202 (set_attr "length" "8")])
2203
2204 (define_insn "<expander><mode>3<exec>"
2205 [(set (match_operand:V_INT_1REG 0 "gcn_valu_dst_operand" "= v,RD")
2206 (bitop:V_INT_1REG
2207 (match_operand:V_INT_1REG 1 "gcn_valu_src0_operand" "% v, 0")
2208 (match_operand:V_INT_1REG 2 "gcn_valu_src1com_operand" "vSvB, v")))]
2209 ""
2210 "@
2211 v_<mnemonic>0\t%0, %2, %1
2212 ds_<mnemonic>0\t%A0, %2%O0"
2213 [(set_attr "type" "vop2,ds")
2214 (set_attr "length" "8,8")])
2215
2216 (define_insn_and_split "<expander><mode>3"
2217 [(set (match_operand:V_DI 0 "gcn_valu_dst_operand" "= v,RD")
2218 (bitop:V_DI
2219 (match_operand:V_DI 1 "gcn_valu_src0_operand" "% v,RD")
2220 (match_operand:V_DI 2 "gcn_valu_src1com_operand" "vSvB, v")))]
2221 ""
2222 "@
2223 #
2224 ds_<mnemonic>0\t%A0, %2%O0"
2225 "(reload_completed && !gcn_ds_memory_operand (operands[0], <MODE>mode))"
2226 [(set (match_dup 3)
2227 (bitop:<VnSI> (match_dup 5) (match_dup 7)))
2228 (set (match_dup 4)
2229 (bitop:<VnSI> (match_dup 6) (match_dup 8)))]
2230 {
2231 operands[3] = gcn_operand_part (<MODE>mode, operands[0], 0);
2232 operands[4] = gcn_operand_part (<MODE>mode, operands[0], 1);
2233 operands[5] = gcn_operand_part (<MODE>mode, operands[1], 0);
2234 operands[6] = gcn_operand_part (<MODE>mode, operands[1], 1);
2235 operands[7] = gcn_operand_part (<MODE>mode, operands[2], 0);
2236 operands[8] = gcn_operand_part (<MODE>mode, operands[2], 1);
2237 }
2238 [(set_attr "type" "vmult,ds")
2239 (set_attr "length" "16,8")])
2240
2241 (define_insn_and_split "<expander><mode>3_exec"
2242 [(set (match_operand:V_DI 0 "gcn_valu_dst_operand" "= v,RD")
2243 (vec_merge:V_DI
2244 (bitop:V_DI
2245 (match_operand:V_DI 1 "gcn_valu_src0_operand" "% v,RD")
2246 (match_operand:V_DI 2 "gcn_valu_src1com_operand" "vSvB, v"))
2247 (match_operand:V_DI 3 "gcn_register_ds_or_unspec_operand" "U0,U0")
2248 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))]
2249 "!memory_operand (operands[0], VOIDmode)
2250 || (rtx_equal_p (operands[0], operands[1])
2251 && register_operand (operands[2], VOIDmode))"
2252 "@
2253 #
2254 ds_<mnemonic>0\t%A0, %2%O0"
2255 "(reload_completed && !gcn_ds_memory_operand (operands[0], <MODE>mode))"
2256 [(set (match_dup 5)
2257 (vec_merge:<VnSI>
2258 (bitop:<VnSI> (match_dup 7) (match_dup 9))
2259 (match_dup 11)
2260 (match_dup 4)))
2261 (set (match_dup 6)
2262 (vec_merge:<VnSI>
2263 (bitop:<VnSI> (match_dup 8) (match_dup 10))
2264 (match_dup 12)
2265 (match_dup 4)))]
2266 {
2267 operands[5] = gcn_operand_part (<MODE>mode, operands[0], 0);
2268 operands[6] = gcn_operand_part (<MODE>mode, operands[0], 1);
2269 operands[7] = gcn_operand_part (<MODE>mode, operands[1], 0);
2270 operands[8] = gcn_operand_part (<MODE>mode, operands[1], 1);
2271 operands[9] = gcn_operand_part (<MODE>mode, operands[2], 0);
2272 operands[10] = gcn_operand_part (<MODE>mode, operands[2], 1);
2273 operands[11] = gcn_operand_part (<MODE>mode, operands[3], 0);
2274 operands[12] = gcn_operand_part (<MODE>mode, operands[3], 1);
2275 }
2276 [(set_attr "type" "vmult,ds")
2277 (set_attr "length" "16,8")])
2278
2279 (define_expand "<expander><mode>3"
2280 [(set (match_operand:V_QIHI 0 "register_operand" "= v")
2281 (shiftop:V_QIHI
2282 (match_operand:V_QIHI 1 "gcn_alu_operand" " v")
2283 (vec_duplicate:V_QIHI
2284 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
2285 ""
2286 {
2287 enum {ashift, lshiftrt, ashiftrt};
2288 bool unsignedp = (<code> == lshiftrt);
2289 rtx insi1 = gen_reg_rtx (<VnSI>mode);
2290 rtx insi2 = gen_reg_rtx (SImode);
2291 rtx outsi = gen_reg_rtx (<VnSI>mode);
2292
2293 convert_move (insi1, operands[1], unsignedp);
2294 convert_move (insi2, operands[2], unsignedp);
2295 emit_insn (gen_<expander><vnsi>3 (outsi, insi1, insi2));
2296 convert_move (operands[0], outsi, unsignedp);
2297 DONE;
2298 })
2299
2300 (define_insn "<expander><mode>3<exec>"
2301 [(set (match_operand:V_INT_noHI 0 "register_operand" "= v")
2302 (shiftop:V_INT_noHI
2303 (match_operand:V_INT_noHI 1 "gcn_alu_operand" " v")
2304 (vec_duplicate:<VnSI>
2305 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
2306 ""
2307 "v_<revmnemonic>0\t%0, %2, %1"
2308 [(set_attr "type" "vop2")
2309 (set_attr "length" "8")])
2310
2311 (define_expand "v<expander><mode>3"
2312 [(set (match_operand:V_QIHI 0 "register_operand" "=v")
2313 (shiftop:V_QIHI
2314 (match_operand:V_QIHI 1 "gcn_alu_operand" " v")
2315 (match_operand:V_QIHI 2 "gcn_alu_operand" "vB")))]
2316 ""
2317 {
2318 enum {ashift, lshiftrt, ashiftrt};
2319 bool unsignedp = (<code> == lshiftrt);
2320 rtx insi1 = gen_reg_rtx (<VnSI>mode);
2321 rtx insi2 = gen_reg_rtx (<VnSI>mode);
2322 rtx outsi = gen_reg_rtx (<VnSI>mode);
2323
2324 convert_move (insi1, operands[1], unsignedp);
2325 convert_move (insi2, operands[2], unsignedp);
2326 emit_insn (gen_v<expander><vnsi>3 (outsi, insi1, insi2));
2327 convert_move (operands[0], outsi, unsignedp);
2328 DONE;
2329 })
2330
2331 (define_insn "v<expander><mode>3<exec>"
2332 [(set (match_operand:V_INT_noHI 0 "register_operand" "=v")
2333 (shiftop:V_INT_noHI
2334 (match_operand:V_INT_noHI 1 "gcn_alu_operand" " v")
2335 (match_operand:<VnSI> 2 "gcn_alu_operand" "vB")))]
2336 ""
2337 "v_<revmnemonic>0\t%0, %2, %1"
2338 [(set_attr "type" "vop2")
2339 (set_attr "length" "8")])
2340
2341 (define_expand "<expander><mode>3"
2342 [(set (match_operand:V_QIHI 0 "gcn_valu_dst_operand")
2343 (minmaxop:V_QIHI
2344 (match_operand:V_QIHI 1 "gcn_valu_src0_operand")
2345 (match_operand:V_QIHI 2 "gcn_valu_src1com_operand")))]
2346 ""
2347 {
2348 enum {smin, umin, smax, umax};
2349 bool unsignedp = (<code> == umax || <code> == umin);
2350 rtx insi1 = gen_reg_rtx (<VnSI>mode);
2351 rtx insi2 = gen_reg_rtx (<VnSI>mode);
2352 rtx outsi = gen_reg_rtx (<VnSI>mode);
2353
2354 convert_move (insi1, operands[1], unsignedp);
2355 convert_move (insi2, operands[2], unsignedp);
2356 emit_insn (gen_<code><vnsi>3 (outsi, insi1, insi2));
2357 convert_move (operands[0], outsi, unsignedp);
2358 DONE;
2359 })
2360
2361 (define_insn "<expander><vnsi>3<exec>"
2362 [(set (match_operand:V_SI 0 "gcn_valu_dst_operand" "= v,RD")
2363 (minmaxop:V_SI
2364 (match_operand:V_SI 1 "gcn_valu_src0_operand" "% v, 0")
2365 (match_operand:V_SI 2 "gcn_valu_src1com_operand" "vSvB, v")))]
2366 ""
2367 "@
2368 v_<mnemonic>0\t%0, %2, %1
2369 ds_<mnemonic>0\t%A0, %2%O0"
2370 [(set_attr "type" "vop2,ds")
2371 (set_attr "length" "8,8")])
2372
2373 ;; }}}
2374 ;; {{{ Int unops
2375
2376 (define_expand "neg<mode>2"
2377 [(match_operand:V_INT 0 "register_operand")
2378 (match_operand:V_INT 1 "register_operand")]
2379 ""
2380 {
2381 emit_insn (gen_sub<mode>3 (operands[0], gcn_vec_constant (<MODE>mode, 0),
2382 operands[1]));
2383 DONE;
2384 })
2385
2386 ;; }}}
2387 ;; {{{ FP binops - special cases
2388
2389 ; GCN does not directly provide a DFmode subtract instruction, so we do it by
2390 ; adding the negated second operand to the first.
2391
2392 (define_insn "sub<mode>3<exec>"
2393 [(set (match_operand:V_DF 0 "register_operand" "= v, v")
2394 (minus:V_DF
2395 (match_operand:V_DF 1 "gcn_alu_operand" "vSvB, v")
2396 (match_operand:V_DF 2 "gcn_alu_operand" " v,vSvB")))]
2397 ""
2398 "@
2399 v_add_f64\t%0, %1, -%2
2400 v_add_f64\t%0, -%2, %1"
2401 [(set_attr "type" "vop3a")
2402 (set_attr "length" "8,8")])
2403
2404 (define_insn "subdf3"
2405 [(set (match_operand:DF 0 "register_operand" "= v, v")
2406 (minus:DF
2407 (match_operand:DF 1 "gcn_alu_operand" "vSvB, v")
2408 (match_operand:DF 2 "gcn_alu_operand" " v,vSvB")))]
2409 ""
2410 "@
2411 v_add_f64\t%0, %1, -%2
2412 v_add_f64\t%0, -%2, %1"
2413 [(set_attr "type" "vop3a")
2414 (set_attr "length" "8,8")])
2415
2416 ;; }}}
2417 ;; {{{ FP binops - generic
2418
2419 (define_code_iterator comm_fp [plus mult smin smax])
2420 (define_code_iterator nocomm_fp [minus])
2421 (define_code_iterator all_fp [plus mult minus smin smax])
2422
2423 (define_insn "<expander><mode>3<exec>"
2424 [(set (match_operand:V_FP 0 "register_operand" "= v")
2425 (comm_fp:V_FP
2426 (match_operand:V_FP 1 "gcn_alu_operand" "% v")
2427 (match_operand:V_FP 2 "gcn_alu_operand" "vSvB")))]
2428 ""
2429 "v_<mnemonic>0\t%0, %2, %1"
2430 [(set_attr "type" "vop2")
2431 (set_attr "length" "8")])
2432
2433 (define_insn "<expander><mode>3"
2434 [(set (match_operand:FP 0 "gcn_valu_dst_operand" "= v, RL")
2435 (comm_fp:FP
2436 (match_operand:FP 1 "gcn_valu_src0_operand" "% v, 0")
2437 (match_operand:FP 2 "gcn_valu_src1_operand" "vSvB,vSvB")))]
2438 ""
2439 "@
2440 v_<mnemonic>0\t%0, %2, %1
2441 v_<mnemonic>0\t%0, %1%O0"
2442 [(set_attr "type" "vop2,ds")
2443 (set_attr "length" "8")])
2444
2445 (define_insn "<expander><mode>3<exec>"
2446 [(set (match_operand:V_FP_1REG 0 "register_operand" "= v, v")
2447 (nocomm_fp:V_FP_1REG
2448 (match_operand:V_FP_1REG 1 "gcn_alu_operand" "vSvB, v")
2449 (match_operand:V_FP_1REG 2 "gcn_alu_operand" " v,vSvB")))]
2450 ""
2451 "@
2452 v_<mnemonic>0\t%0, %1, %2
2453 v_<revmnemonic>0\t%0, %2, %1"
2454 [(set_attr "type" "vop2")
2455 (set_attr "length" "8,8")])
2456
2457 (define_insn "<expander><mode>3"
2458 [(set (match_operand:FP_1REG 0 "register_operand" "= v, v")
2459 (nocomm_fp:FP_1REG
2460 (match_operand:FP_1REG 1 "gcn_alu_operand" "vSvB, v")
2461 (match_operand:FP_1REG 2 "gcn_alu_operand" " v,vSvB")))]
2462 ""
2463 "@
2464 v_<mnemonic>0\t%0, %1, %2
2465 v_<revmnemonic>0\t%0, %2, %1"
2466 [(set_attr "type" "vop2")
2467 (set_attr "length" "8,8")])
2468
2469 ;; }}}
2470 ;; {{{ FP unops
2471
2472 (define_insn "abs<mode>2"
2473 [(set (match_operand:FP 0 "register_operand" "=v")
2474 (abs:FP (match_operand:FP 1 "register_operand" " v")))]
2475 ""
2476 "v_add%i0\t%0, 0, |%1|"
2477 [(set_attr "type" "vop3a")
2478 (set_attr "length" "8")])
2479
2480 (define_insn "abs<mode>2<exec>"
2481 [(set (match_operand:V_FP 0 "register_operand" "=v")
2482 (abs:V_FP
2483 (match_operand:V_FP 1 "register_operand" " v")))]
2484 ""
2485 "v_add%i0\t%0, 0, |%1|"
2486 [(set_attr "type" "vop3a")
2487 (set_attr "length" "8")])
2488
2489 (define_insn "neg<mode>2<exec>"
2490 [(set (match_operand:V_FP 0 "register_operand" "=v")
2491 (neg:V_FP
2492 (match_operand:V_FP 1 "register_operand" " v")))]
2493 ""
2494 "v_add%i0\t%0, 0, -%1"
2495 [(set_attr "type" "vop3a")
2496 (set_attr "length" "8")])
2497
2498 (define_insn "sqrt<mode>2<exec>"
2499 [(set (match_operand:V_FP 0 "register_operand" "= v")
2500 (sqrt:V_FP
2501 (match_operand:V_FP 1 "gcn_alu_operand" "vSvB")))]
2502 "flag_unsafe_math_optimizations"
2503 "v_sqrt%i0\t%0, %1"
2504 [(set_attr "type" "vop1")
2505 (set_attr "length" "8")])
2506
2507 (define_insn "sqrt<mode>2"
2508 [(set (match_operand:FP 0 "register_operand" "= v")
2509 (sqrt:FP
2510 (match_operand:FP 1 "gcn_alu_operand" "vSvB")))]
2511 "flag_unsafe_math_optimizations"
2512 "v_sqrt%i0\t%0, %1"
2513 [(set_attr "type" "vop1")
2514 (set_attr "length" "8")])
2515
2516 ; These FP unops have f64, f32 and f16 versions.
2517 (define_int_iterator MATH_UNOP_1OR2REG
2518 [UNSPEC_FLOOR UNSPEC_CEIL])
2519
2520 ; These FP unops only have f16/f32 versions.
2521 (define_int_iterator MATH_UNOP_1REG
2522 [UNSPEC_EXP2 UNSPEC_LOG2])
2523
2524 (define_int_iterator MATH_UNOP_TRIG
2525 [UNSPEC_SIN UNSPEC_COS])
2526
2527 (define_int_attr math_unop
2528 [(UNSPEC_FLOOR "floor")
2529 (UNSPEC_CEIL "ceil")
2530 (UNSPEC_EXP2 "exp2")
2531 (UNSPEC_LOG2 "log2")
2532 (UNSPEC_SIN "sin")
2533 (UNSPEC_COS "cos")])
2534
2535 (define_insn "<math_unop><mode>2"
2536 [(set (match_operand:FP 0 "register_operand" "= v")
2537 (unspec:FP
2538 [(match_operand:FP 1 "gcn_alu_operand" "vSvB")]
2539 MATH_UNOP_1OR2REG))]
2540 ""
2541 "v_<math_unop>%i0\t%0, %1"
2542 [(set_attr "type" "vop1")
2543 (set_attr "length" "8")])
2544
2545 (define_insn "<math_unop><mode>2<exec>"
2546 [(set (match_operand:V_FP 0 "register_operand" "= v")
2547 (unspec:V_FP
2548 [(match_operand:V_FP 1 "gcn_alu_operand" "vSvB")]
2549 MATH_UNOP_1OR2REG))]
2550 ""
2551 "v_<math_unop>%i0\t%0, %1"
2552 [(set_attr "type" "vop1")
2553 (set_attr "length" "8")])
2554
2555 (define_insn "<math_unop><mode>2"
2556 [(set (match_operand:FP_1REG 0 "register_operand" "= v")
2557 (unspec:FP_1REG
2558 [(match_operand:FP_1REG 1 "gcn_alu_operand" "vSvB")]
2559 MATH_UNOP_1REG))]
2560 "flag_unsafe_math_optimizations"
2561 "v_<math_unop>%i0\t%0, %1"
2562 [(set_attr "type" "vop1")
2563 (set_attr "length" "8")])
2564
2565 (define_insn "<math_unop><mode>2<exec>"
2566 [(set (match_operand:V_FP_1REG 0 "register_operand" "= v")
2567 (unspec:V_FP_1REG
2568 [(match_operand:V_FP_1REG 1 "gcn_alu_operand" "vSvB")]
2569 MATH_UNOP_1REG))]
2570 "flag_unsafe_math_optimizations"
2571 "v_<math_unop>%i0\t%0, %1"
2572 [(set_attr "type" "vop1")
2573 (set_attr "length" "8")])
2574
2575 (define_insn "*<math_unop><mode>2_insn"
2576 [(set (match_operand:FP_1REG 0 "register_operand" "= v")
2577 (unspec:FP_1REG
2578 [(match_operand:FP_1REG 1 "gcn_alu_operand" "vSvB")]
2579 MATH_UNOP_TRIG))]
2580 "flag_unsafe_math_optimizations"
2581 "v_<math_unop>%i0\t%0, %1"
2582 [(set_attr "type" "vop1")
2583 (set_attr "length" "8")])
2584
2585 (define_insn "*<math_unop><mode>2<exec>_insn"
2586 [(set (match_operand:V_FP_1REG 0 "register_operand" "= v")
2587 (unspec:V_FP_1REG
2588 [(match_operand:V_FP_1REG 1 "gcn_alu_operand" "vSvB")]
2589 MATH_UNOP_TRIG))]
2590 "flag_unsafe_math_optimizations"
2591 "v_<math_unop>%i0\t%0, %1"
2592 [(set_attr "type" "vop1")
2593 (set_attr "length" "8")])
2594
2595 ; Trigonometric functions need their input scaled by 1/(2*PI) first.
2596
2597 (define_expand "<math_unop><mode>2"
2598 [(set (match_dup 2)
2599 (mult:FP_1REG
2600 (match_dup 3)
2601 (match_operand:FP_1REG 1 "gcn_alu_operand")))
2602 (set (match_operand:FP_1REG 0 "register_operand")
2603 (unspec:FP_1REG
2604 [(match_dup 2)]
2605 MATH_UNOP_TRIG))]
2606 "flag_unsafe_math_optimizations"
2607 {
2608 operands[2] = gen_reg_rtx (<MODE>mode);
2609 operands[3] = const_double_from_real_value (gcn_dconst1over2pi (),
2610 <MODE>mode);
2611 })
2612
2613 (define_expand "<math_unop><mode>2<exec>"
2614 [(set (match_dup 2)
2615 (mult:V_FP_1REG
2616 (match_dup 3)
2617 (match_operand:V_FP_1REG 1 "gcn_alu_operand")))
2618 (set (match_operand:V_FP_1REG 0 "register_operand")
2619 (unspec:V_FP_1REG
2620 [(match_dup 2)]
2621 MATH_UNOP_TRIG))]
2622 "flag_unsafe_math_optimizations"
2623 {
2624 operands[2] = gen_reg_rtx (<MODE>mode);
2625 operands[3] =
2626 gcn_vec_constant (<MODE>mode,
2627 const_double_from_real_value (gcn_dconst1over2pi (),
2628 <SCALAR_MODE>mode));
2629 })
2630
2631 ; Implement ldexp pattern
2632
2633 (define_insn "ldexp<mode>3"
2634 [(set (match_operand:FP 0 "register_operand" "=v")
2635 (unspec:FP
2636 [(match_operand:FP 1 "gcn_alu_operand" "vB")
2637 (match_operand:SI 2 "gcn_alu_operand" "vSvA")]
2638 UNSPEC_LDEXP))]
2639 ""
2640 "v_ldexp%i0\t%0, %1, %2"
2641 [(set_attr "type" "vop3a")
2642 (set_attr "length" "8")])
2643
2644 (define_insn "ldexp<mode>3<exec>"
2645 [(set (match_operand:V_FP 0 "register_operand" "= v")
2646 (unspec:V_FP
2647 [(match_operand:V_FP 1 "gcn_alu_operand" " vB")
2648 (match_operand:<VnSI> 2 "gcn_alu_operand" "vSvA")]
2649 UNSPEC_LDEXP))]
2650 ""
2651 "v_ldexp%i0\t%0, %1, %2"
2652 [(set_attr "type" "vop3a")
2653 (set_attr "length" "8")])
2654
2655 ; Implement frexp patterns
2656
2657 (define_insn "frexp<mode>_exp2"
2658 [(set (match_operand:SI 0 "register_operand" "=v")
2659 (unspec:SI
2660 [(match_operand:FP 1 "gcn_alu_operand" "vB")]
2661 UNSPEC_FREXP_EXP))]
2662 ""
2663 "v_frexp_exp_i32%i1\t%0, %1"
2664 [(set_attr "type" "vop1")
2665 (set_attr "length" "8")])
2666
2667 (define_insn "frexp<mode>_mant2"
2668 [(set (match_operand:FP 0 "register_operand" "=v")
2669 (unspec:FP
2670 [(match_operand:FP 1 "gcn_alu_operand" "vB")]
2671 UNSPEC_FREXP_MANT))]
2672 ""
2673 "v_frexp_mant%i1\t%0, %1"
2674 [(set_attr "type" "vop1")
2675 (set_attr "length" "8")])
2676
2677 (define_insn "frexp<mode>_exp2<exec>"
2678 [(set (match_operand:<VnSI> 0 "register_operand" "=v")
2679 (unspec:<VnSI>
2680 [(match_operand:V_FP 1 "gcn_alu_operand" "vB")]
2681 UNSPEC_FREXP_EXP))]
2682 ""
2683 "v_frexp_exp_i32%i1\t%0, %1"
2684 [(set_attr "type" "vop1")
2685 (set_attr "length" "8")])
2686
2687 (define_insn "frexp<mode>_mant2<exec>"
2688 [(set (match_operand:V_FP 0 "register_operand" "=v")
2689 (unspec:V_FP
2690 [(match_operand:V_FP 1 "gcn_alu_operand" "vB")]
2691 UNSPEC_FREXP_MANT))]
2692 ""
2693 "v_frexp_mant%i1\t%0, %1"
2694 [(set_attr "type" "vop1")
2695 (set_attr "length" "8")])
2696
2697 ;; }}}
2698 ;; {{{ FP fused multiply and add
2699
2700 (define_insn "fma<mode>4<exec>"
2701 [(set (match_operand:V_FP 0 "register_operand" "= v, v")
2702 (fma:V_FP
2703 (match_operand:V_FP 1 "gcn_alu_operand" "% vA, vA")
2704 (match_operand:V_FP 2 "gcn_alu_operand" " vA,vSvA")
2705 (match_operand:V_FP 3 "gcn_alu_operand" "vSvA, vA")))]
2706 ""
2707 "v_fma%i0\t%0, %1, %2, %3"
2708 [(set_attr "type" "vop3a")
2709 (set_attr "length" "8")])
2710
2711 (define_insn "fma<mode>4_negop2<exec>"
2712 [(set (match_operand:V_FP 0 "register_operand" "= v, v, v")
2713 (fma:V_FP
2714 (match_operand:V_FP 1 "gcn_alu_operand" " vA, vA,vSvA")
2715 (neg:V_FP
2716 (match_operand:V_FP 2 "gcn_alu_operand" " vA,vSvA, vA"))
2717 (match_operand:V_FP 3 "gcn_alu_operand" "vSvA, vA, vA")))]
2718 ""
2719 "v_fma%i0\t%0, %1, -%2, %3"
2720 [(set_attr "type" "vop3a")
2721 (set_attr "length" "8")])
2722
2723 (define_insn "fma<mode>4"
2724 [(set (match_operand:FP 0 "register_operand" "= v, v")
2725 (fma:FP
2726 (match_operand:FP 1 "gcn_alu_operand" "% vA, vA")
2727 (match_operand:FP 2 "gcn_alu_operand" " vA,vSvA")
2728 (match_operand:FP 3 "gcn_alu_operand" "vSvA, vA")))]
2729 ""
2730 "v_fma%i0\t%0, %1, %2, %3"
2731 [(set_attr "type" "vop3a")
2732 (set_attr "length" "8")])
2733
2734 (define_insn "fma<mode>4_negop2"
2735 [(set (match_operand:FP 0 "register_operand" "= v, v, v")
2736 (fma:FP
2737 (match_operand:FP 1 "gcn_alu_operand" " vA, vA,vSvA")
2738 (neg:FP
2739 (match_operand:FP 2 "gcn_alu_operand" " vA,vSvA, vA"))
2740 (match_operand:FP 3 "gcn_alu_operand" "vSvA, vA, vA")))]
2741 ""
2742 "v_fma%i0\t%0, %1, -%2, %3"
2743 [(set_attr "type" "vop3a")
2744 (set_attr "length" "8")])
2745
2746 ;; }}}
2747 ;; {{{ FP division
2748
2749 (define_insn "recip<mode>2<exec>"
2750 [(set (match_operand:V_FP 0 "register_operand" "= v")
2751 (unspec:V_FP
2752 [(match_operand:V_FP 1 "gcn_alu_operand" "vSvB")]
2753 UNSPEC_RCP))]
2754 ""
2755 "v_rcp%i0\t%0, %1"
2756 [(set_attr "type" "vop1")
2757 (set_attr "length" "8")])
2758
2759 (define_insn "recip<mode>2"
2760 [(set (match_operand:FP 0 "register_operand" "= v")
2761 (unspec:FP
2762 [(match_operand:FP 1 "gcn_alu_operand" "vSvB")]
2763 UNSPEC_RCP))]
2764 ""
2765 "v_rcp%i0\t%0, %1"
2766 [(set_attr "type" "vop1")
2767 (set_attr "length" "8")])
2768
2769 ;; Do division via a = b * 1/c
2770 ;; The v_rcp_* instructions are not sufficiently accurate on their own,
2771 ;; so we use 2 v_fma_* instructions to do one round of Newton-Raphson
2772 ;; which the ISA manual says is enough to improve the reciprocal accuracy.
2773 ;;
2774 ;; FIXME: This does not handle denormals, NaNs, division-by-zero etc.
2775
2776 (define_expand "div<mode>3"
2777 [(match_operand:V_FP 0 "gcn_valu_dst_operand")
2778 (match_operand:V_FP 1 "gcn_valu_src0_operand")
2779 (match_operand:V_FP 2 "gcn_valu_src0_operand")]
2780 "flag_reciprocal_math"
2781 {
2782 rtx one = gcn_vec_constant (<MODE>mode,
2783 const_double_from_real_value (dconst1, <SCALAR_MODE>mode));
2784 rtx initrcp = gen_reg_rtx (<MODE>mode);
2785 rtx fma = gen_reg_rtx (<MODE>mode);
2786 rtx rcp;
2787 rtx num = operands[1], denom = operands[2];
2788
2789 bool is_rcp = (GET_CODE (num) == CONST_VECTOR
2790 && real_identical
2791 (CONST_DOUBLE_REAL_VALUE
2792 (CONST_VECTOR_ELT (num, 0)), &dconstm1));
2793
2794 if (is_rcp)
2795 rcp = operands[0];
2796 else
2797 rcp = gen_reg_rtx (<MODE>mode);
2798
2799 emit_insn (gen_recip<mode>2 (initrcp, denom));
2800 emit_insn (gen_fma<mode>4_negop2 (fma, initrcp, denom, one));
2801 emit_insn (gen_fma<mode>4 (rcp, fma, initrcp, initrcp));
2802
2803 if (!is_rcp)
2804 {
2805 rtx div_est = gen_reg_rtx (<MODE>mode);
2806 rtx fma2 = gen_reg_rtx (<MODE>mode);
2807 rtx fma3 = gen_reg_rtx (<MODE>mode);
2808 rtx fma4 = gen_reg_rtx (<MODE>mode);
2809 emit_insn (gen_mul<mode>3 (div_est, num, rcp));
2810 emit_insn (gen_fma<mode>4_negop2 (fma2, div_est, denom, num));
2811 emit_insn (gen_fma<mode>4 (fma3, fma2, rcp, div_est));
2812 emit_insn (gen_fma<mode>4_negop2 (fma4, fma3, denom, num));
2813 emit_insn (gen_fma<mode>4 (operands[0], fma4, rcp, fma3));
2814 }
2815
2816 DONE;
2817 })
2818
2819 (define_expand "div<mode>3"
2820 [(match_operand:FP 0 "gcn_valu_dst_operand")
2821 (match_operand:FP 1 "gcn_valu_src0_operand")
2822 (match_operand:FP 2 "gcn_valu_src0_operand")]
2823 "flag_reciprocal_math"
2824 {
2825 rtx one = const_double_from_real_value (dconst1, <MODE>mode);
2826 rtx initrcp = gen_reg_rtx (<MODE>mode);
2827 rtx fma = gen_reg_rtx (<MODE>mode);
2828 rtx rcp;
2829 rtx num = operands[1], denom = operands[2];
2830
2831 bool is_rcp = (GET_CODE (operands[1]) == CONST_DOUBLE
2832 && real_identical (CONST_DOUBLE_REAL_VALUE (operands[1]),
2833 &dconstm1));
2834
2835 if (is_rcp)
2836 rcp = operands[0];
2837 else
2838 rcp = gen_reg_rtx (<MODE>mode);
2839
2840 emit_insn (gen_recip<mode>2 (initrcp, denom));
2841 emit_insn (gen_fma<mode>4_negop2 (fma, initrcp, denom, one));
2842 emit_insn (gen_fma<mode>4 (rcp, fma, initrcp, initrcp));
2843
2844 if (!is_rcp)
2845 {
2846 rtx div_est = gen_reg_rtx (<MODE>mode);
2847 rtx fma2 = gen_reg_rtx (<MODE>mode);
2848 rtx fma3 = gen_reg_rtx (<MODE>mode);
2849 rtx fma4 = gen_reg_rtx (<MODE>mode);
2850 emit_insn (gen_mul<mode>3 (div_est, num, rcp));
2851 emit_insn (gen_fma<mode>4_negop2 (fma2, div_est, denom, num));
2852 emit_insn (gen_fma<mode>4 (fma3, fma2, rcp, div_est));
2853 emit_insn (gen_fma<mode>4_negop2 (fma4, fma3, denom, num));
2854 emit_insn (gen_fma<mode>4 (operands[0], fma4, rcp, fma3));
2855 }
2856
2857 DONE;
2858 })
2859
2860 ;; }}}
2861 ;; {{{ Int/FP conversions
2862
2863 (define_mode_iterator CVT_FROM_MODE [HI SI HF SF DF])
2864 (define_mode_iterator CVT_TO_MODE [HI SI HF SF DF])
2865
2866 (define_mode_iterator VCVT_MODE
2867 [V2HI V2SI V2HF V2SF V2DF
2868 V4HI V4SI V4HF V4SF V4DF
2869 V8HI V8SI V8HF V8SF V8DF
2870 V16HI V16SI V16HF V16SF V16DF
2871 V32HI V32SI V32HF V32SF V32DF
2872 V64HI V64SI V64HF V64SF V64DF])
2873 (define_mode_iterator VCVT_FMODE
2874 [V2HF V2SF V2DF
2875 V4HF V4SF V4DF
2876 V8HF V8SF V8DF
2877 V16HF V16SF V16DF
2878 V32HF V32SF V32DF
2879 V64HF V64SF V64DF])
2880 (define_mode_iterator VCVT_IMODE
2881 [V2HI V2SI
2882 V4HI V4SI
2883 V8HI V8SI
2884 V16HI V16SI
2885 V32HI V32SI
2886 V64HI V64SI])
2887
2888 (define_code_iterator cvt_op [fix unsigned_fix
2889 float unsigned_float
2890 float_extend float_truncate])
2891 (define_code_attr cvt_name [(fix "fix_trunc") (unsigned_fix "fixuns_trunc")
2892 (float "float") (unsigned_float "floatuns")
2893 (float_extend "extend") (float_truncate "trunc")])
2894 (define_code_attr cvt_operands [(fix "%i0%i1") (unsigned_fix "%u0%i1")
2895 (float "%i0%i1") (unsigned_float "%i0%u1")
2896 (float_extend "%i0%i1")
2897 (float_truncate "%i0%i1")])
2898
2899 (define_insn "<cvt_name><CVT_FROM_MODE:mode><CVT_TO_MODE:mode>2"
2900 [(set (match_operand:CVT_TO_MODE 0 "register_operand" "= v")
2901 (cvt_op:CVT_TO_MODE
2902 (match_operand:CVT_FROM_MODE 1 "gcn_alu_operand" "vSvB")))]
2903 "gcn_valid_cvt_p (<CVT_FROM_MODE:MODE>mode, <CVT_TO_MODE:MODE>mode,
2904 <cvt_name>_cvt)"
2905 "v_cvt<cvt_operands>\t%0, %1"
2906 [(set_attr "type" "vop1")
2907 (set_attr "length" "8")])
2908
2909 (define_insn "<cvt_name><VCVT_MODE:mode><VCVT_FMODE:mode>2<exec>"
2910 [(set (match_operand:VCVT_FMODE 0 "register_operand" "= v")
2911 (cvt_op:VCVT_FMODE
2912 (match_operand:VCVT_MODE 1 "gcn_alu_operand" "vSvB")))]
2913 "MODE_VF (<VCVT_MODE:MODE>mode) == MODE_VF (<VCVT_FMODE:MODE>mode)
2914 && gcn_valid_cvt_p (<VCVT_MODE:MODE>mode, <VCVT_FMODE:MODE>mode,
2915 <cvt_name>_cvt)"
2916 "v_cvt<cvt_operands>\t%0, %1"
2917 [(set_attr "type" "vop1")
2918 (set_attr "length" "8")])
2919
2920 (define_insn "<cvt_name><VCVT_FMODE:mode><VCVT_IMODE:mode>2<exec>"
2921 [(set (match_operand:VCVT_IMODE 0 "register_operand" "= v")
2922 (cvt_op:VCVT_IMODE
2923 (match_operand:VCVT_FMODE 1 "gcn_alu_operand" "vSvB")))]
2924 "MODE_VF (<VCVT_IMODE:MODE>mode) == MODE_VF (<VCVT_FMODE:MODE>mode)
2925 && gcn_valid_cvt_p (<VCVT_FMODE:MODE>mode, <VCVT_IMODE:MODE>mode,
2926 <cvt_name>_cvt)"
2927 "v_cvt<cvt_operands>\t%0, %1"
2928 [(set_attr "type" "vop1")
2929 (set_attr "length" "8")])
2930
2931 ;; }}}
2932 ;; {{{ Int/int conversions
2933
2934 (define_code_iterator zero_convert [truncate zero_extend])
2935 (define_code_attr convop [
2936 (sign_extend "extend")
2937 (zero_extend "zero_extend")
2938 (truncate "trunc")])
2939
2940 (define_insn "<convop><V_INT_1REG_ALT:mode><V_INT_1REG:mode>2<exec>"
2941 [(set (match_operand:V_INT_1REG 0 "register_operand" "=v")
2942 (zero_convert:V_INT_1REG
2943 (match_operand:V_INT_1REG_ALT 1 "gcn_alu_operand" " v")))]
2944 ""
2945 "v_mov_b32_sdwa\t%0, %1 dst_sel:<V_INT_1REG:sdwa> dst_unused:UNUSED_PAD src0_sel:<V_INT_1REG_ALT:sdwa>"
2946 [(set_attr "type" "vop_sdwa")
2947 (set_attr "length" "8")])
2948
2949 (define_insn "extend<V_INT_1REG_ALT:mode><V_INT_1REG:mode>2<exec>"
2950 [(set (match_operand:V_INT_1REG 0 "register_operand" "=v")
2951 (sign_extend:V_INT_1REG
2952 (match_operand:V_INT_1REG_ALT 1 "gcn_alu_operand" " v")))]
2953 ""
2954 "v_mov_b32_sdwa\t%0, sext(%1) src0_sel:<V_INT_1REG_ALT:sdwa>"
2955 [(set_attr "type" "vop_sdwa")
2956 (set_attr "length" "8")])
2957
2958 ;; GCC can already do these for scalar types, but not for vector types.
2959 ;; Unfortunately you can't just do SUBREG on a vector to select the low part,
2960 ;; so there must be a few tricks here.
2961
2962 (define_insn_and_split "trunc<vndi><mode>2"
2963 [(set (match_operand:V_INT_1REG 0 "register_operand" "=v")
2964 (truncate:V_INT_1REG
2965 (match_operand:<VnDI> 1 "gcn_alu_operand" " v")))]
2966 ""
2967 "#"
2968 "reload_completed"
2969 [(const_int 0)]
2970 {
2971 rtx inlo = gcn_operand_part (<VnDI>mode, operands[1], 0);
2972 rtx out = operands[0];
2973
2974 if (<MODE>mode != <VnSI>mode)
2975 emit_insn (gen_trunc<vnsi><mode>2 (out, inlo));
2976 else
2977 emit_move_insn (out, inlo);
2978 }
2979 [(set_attr "type" "vop2")
2980 (set_attr "length" "4")])
2981
2982 (define_insn_and_split "trunc<vndi><mode>2_exec"
2983 [(set (match_operand:V_INT_1REG 0 "register_operand" "=v")
2984 (vec_merge:V_INT_1REG
2985 (truncate:V_INT_1REG
2986 (match_operand:<VnDI> 1 "gcn_alu_operand" " v"))
2987 (match_operand:V_INT_1REG 2 "gcn_alu_or_unspec_operand" "U0")
2988 (match_operand:DI 3 "gcn_exec_operand" " e")))]
2989 ""
2990 "#"
2991 "reload_completed"
2992 [(const_int 0)]
2993 {
2994 rtx out = operands[0];
2995 rtx inlo = gcn_operand_part (<VnDI>mode, operands[1], 0);
2996 rtx merge = operands[2];
2997 rtx exec = operands[3];
2998
2999 if (<MODE>mode != <VnSI>mode)
3000 emit_insn (gen_trunc<vnsi><mode>2_exec (out, inlo, merge, exec));
3001 else
3002 emit_insn (gen_mov<mode>_exec (out, inlo, merge, exec));
3003 }
3004 [(set_attr "type" "vop2")
3005 (set_attr "length" "4")])
3006
3007 (define_insn_and_split "<convop><mode><vndi>2"
3008 [(set (match_operand:<VnDI> 0 "register_operand" "=v")
3009 (any_extend:<VnDI>
3010 (match_operand:V_INT_1REG 1 "gcn_alu_operand" " v")))]
3011 ""
3012 "#"
3013 "reload_completed"
3014 [(const_int 0)]
3015 {
3016 rtx outlo = gcn_operand_part (<VnDI>mode, operands[0], 0);
3017 rtx outhi = gcn_operand_part (<VnDI>mode, operands[0], 1);
3018 rtx in = operands[1];
3019
3020 if (<MODE>mode != <VnSI>mode)
3021 emit_insn (gen_<convop><mode><vnsi>2 (outlo, in));
3022 else
3023 emit_move_insn (outlo, in);
3024 if ('<su>' == 's')
3025 emit_insn (gen_ashr<vnsi>3 (outhi, outlo, GEN_INT (31)));
3026 else
3027 emit_insn (gen_vec_duplicate<vnsi> (outhi, const0_rtx));
3028 }
3029 [(set_attr "type" "mult")
3030 (set_attr "length" "12")])
3031
3032 (define_insn_and_split "<convop><mode><vndi>2_exec"
3033 [(set (match_operand:<VnDI> 0 "register_operand" "=v")
3034 (vec_merge:<VnDI>
3035 (any_extend:<VnDI>
3036 (match_operand:V_INT_1REG 1 "gcn_alu_operand" " v"))
3037 (match_operand:<VnDI> 2 "gcn_alu_or_unspec_operand" "U0")
3038 (match_operand:DI 3 "gcn_exec_operand" " e")))]
3039 ""
3040 "#"
3041 "reload_completed"
3042 [(const_int 0)]
3043 {
3044 rtx outlo = gcn_operand_part (<VnDI>mode, operands[0], 0);
3045 rtx outhi = gcn_operand_part (<VnDI>mode, operands[0], 1);
3046 rtx in = operands[1];
3047 rtx mergelo = gcn_operand_part (<VnDI>mode, operands[2], 0);
3048 rtx mergehi = gcn_operand_part (<VnDI>mode, operands[2], 1);
3049 rtx exec = operands[3];
3050
3051 if (<MODE>mode != <VnSI>mode)
3052 emit_insn (gen_<convop><mode><vnsi>2_exec (outlo, in, mergelo, exec));
3053 else
3054 emit_insn (gen_mov<mode>_exec (outlo, in, mergelo, exec));
3055 if ('<su>' == 's')
3056 emit_insn (gen_ashr<vnsi>3_exec (outhi, outlo, GEN_INT (31), mergehi,
3057 exec));
3058 else
3059 emit_insn (gen_vec_duplicate<vnsi>_exec (outhi, const0_rtx, mergehi,
3060 exec));
3061 }
3062 [(set_attr "type" "mult")
3063 (set_attr "length" "12")])
3064
3065 ;; }}}
3066 ;; {{{ Vector comparison/merge
3067
3068 (define_insn "vec_cmp<mode>di"
3069 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e, e,Sg,Sg")
3070 (match_operator:DI 1 "gcn_fp_compare_operator"
3071 [(match_operand:V_noQI 2 "gcn_alu_operand" "vSv, B,vSv, B, v,vA")
3072 (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v, v,vA, v")]))
3073 (clobber (match_scratch:DI 4 "= X, X, cV,cV, X, X"))]
3074 ""
3075 "@
3076 v_cmp%E1\tvcc, %2, %3
3077 v_cmp%E1\tvcc, %2, %3
3078 v_cmpx%E1\tvcc, %2, %3
3079 v_cmpx%E1\tvcc, %2, %3
3080 v_cmp%E1\t%0, %2, %3
3081 v_cmp%E1\t%0, %2, %3"
3082 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a")
3083 (set_attr "length" "4,8,4,8,8,8")])
3084
3085 (define_expand "vec_cmpu<mode>di"
3086 [(match_operand:DI 0 "register_operand")
3087 (match_operator 1 "gcn_compare_operator"
3088 [(match_operand:V_INT_noQI 2 "gcn_alu_operand")
3089 (match_operand:V_INT_noQI 3 "gcn_vop3_operand")])]
3090 ""
3091 {
3092 /* Unsigned comparisons use the same patterns as signed comparisons,
3093 except that they use unsigned operators (e.g. LTU vs LT).
3094 The '%E1' directive then does the Right Thing. */
3095 emit_insn (gen_vec_cmp<mode>di (operands[0], operands[1], operands[2],
3096 operands[3]));
3097 DONE;
3098 })
3099
3100 ; There's no instruction for 8-bit vector comparison, so we need to extend.
3101 (define_expand "vec_cmp<u><mode>di"
3102 [(match_operand:DI 0 "register_operand")
3103 (match_operator 1 "gcn_compare_operator"
3104 [(any_extend:<VnSI> (match_operand:V_QI 2 "gcn_alu_operand"))
3105 (any_extend:<VnSI> (match_operand:V_QI 3 "gcn_vop3_operand"))])]
3106 "can_create_pseudo_p ()"
3107 {
3108 rtx sitmp1 = gen_reg_rtx (<VnSI>mode);
3109 rtx sitmp2 = gen_reg_rtx (<VnSI>mode);
3110
3111 emit_insn (gen_<expander><mode><vnsi>2 (sitmp1, operands[2]));
3112 emit_insn (gen_<expander><mode><vnsi>2 (sitmp2, operands[3]));
3113 emit_insn (gen_vec_cmp<vnsi>di (operands[0], operands[1], sitmp1, sitmp2));
3114 DONE;
3115 })
3116
3117 (define_insn "vec_cmp<mode>di_exec"
3118 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e, e,Sg,Sg")
3119 (and:DI
3120 (match_operator 1 "gcn_fp_compare_operator"
3121 [(match_operand:V_noQI 2 "gcn_alu_operand" "vSv, B,vSv, B, v,vA")
3122 (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v, v,vA, v")])
3123 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e, e, e, e")))
3124 (clobber (match_scratch:DI 5 "= X, X, cV,cV, X, X"))]
3125 ""
3126 "@
3127 v_cmp%E1\tvcc, %2, %3
3128 v_cmp%E1\tvcc, %2, %3
3129 v_cmpx%E1\tvcc, %2, %3
3130 v_cmpx%E1\tvcc, %2, %3
3131 v_cmp%E1\t%0, %2, %3
3132 v_cmp%E1\t%0, %2, %3"
3133 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a")
3134 (set_attr "length" "4,8,4,8,8,8")])
3135
3136 (define_expand "vec_cmpu<mode>di_exec"
3137 [(match_operand:DI 0 "register_operand")
3138 (match_operator 1 "gcn_compare_operator"
3139 [(match_operand:V_INT_noQI 2 "gcn_alu_operand")
3140 (match_operand:V_INT_noQI 3 "gcn_vop3_operand")])
3141 (match_operand:DI 4 "gcn_exec_reg_operand")]
3142 ""
3143 {
3144 /* Unsigned comparisons use the same patterns as signed comparisons,
3145 except that they use unsigned operators (e.g. LTU vs LT).
3146 The '%E1' directive then does the Right Thing. */
3147 emit_insn (gen_vec_cmpu<mode>di_exec (operands[0], operands[1],
3148 operands[2], operands[3],
3149 operands[4]));
3150 DONE;
3151 })
3152
3153 (define_expand "vec_cmp<u><mode>di_exec"
3154 [(match_operand:DI 0 "register_operand")
3155 (match_operator 1 "gcn_compare_operator"
3156 [(any_extend:<VnSI> (match_operand:V_QI 2 "gcn_alu_operand"))
3157 (any_extend:<VnSI> (match_operand:V_QI 3 "gcn_vop3_operand"))])
3158 (match_operand:DI 4 "gcn_exec_reg_operand")]
3159 "can_create_pseudo_p ()"
3160 {
3161 rtx sitmp1 = gen_reg_rtx (<VnSI>mode);
3162 rtx sitmp2 = gen_reg_rtx (<VnSI>mode);
3163
3164 emit_insn (gen_<expander><mode><vnsi>2_exec (sitmp1, operands[2],
3165 operands[2], operands[4]));
3166 emit_insn (gen_<expander><mode><vnsi>2_exec (sitmp2, operands[3],
3167 operands[3], operands[4]));
3168 emit_insn (gen_vec_cmp<vnsi>di_exec (operands[0], operands[1], sitmp1,
3169 sitmp2, operands[4]));
3170 DONE;
3171 })
3172
3173 (define_insn "vec_cmp<mode>di_dup"
3174 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e,e,Sg")
3175 (match_operator:DI 1 "gcn_fp_compare_operator"
3176 [(vec_duplicate:V_noQI
3177 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand"
3178 " Sv, B,Sv,B, A"))
3179 (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v,v, v")]))
3180 (clobber (match_scratch:DI 4 "= X,X,cV,cV, X"))]
3181 ""
3182 "@
3183 v_cmp%E1\tvcc, %2, %3
3184 v_cmp%E1\tvcc, %2, %3
3185 v_cmpx%E1\tvcc, %2, %3
3186 v_cmpx%E1\tvcc, %2, %3
3187 v_cmp%E1\t%0, %2, %3"
3188 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a")
3189 (set_attr "length" "4,8,4,8,8")])
3190
3191 (define_insn "vec_cmp<mode>di_dup_exec"
3192 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e,e,Sg")
3193 (and:DI
3194 (match_operator 1 "gcn_fp_compare_operator"
3195 [(vec_duplicate:V_noQI
3196 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand"
3197 " Sv, B,Sv,B, A"))
3198 (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v,v, v")])
3199 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e,e, e")))
3200 (clobber (match_scratch:DI 5 "= X,X,cV,cV, X"))]
3201 ""
3202 "@
3203 v_cmp%E1\tvcc, %2, %3
3204 v_cmp%E1\tvcc, %2, %3
3205 v_cmpx%E1\tvcc, %2, %3
3206 v_cmpx%E1\tvcc, %2, %3
3207 v_cmp%E1\t%0, %2, %3"
3208 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a")
3209 (set_attr "length" "4,8,4,8,8")])
3210
3211 (define_expand "vcond_mask_<mode>di"
3212 [(parallel
3213 [(set (match_operand:V_ALL 0 "register_operand" "")
3214 (vec_merge:V_ALL
3215 (match_operand:V_ALL 1 "gcn_vop3_operand" "")
3216 (match_operand:V_ALL 2 "gcn_alu_operand" "")
3217 (match_operand:DI 3 "register_operand" "")))
3218 (clobber (scratch:<VnDI>))])]
3219 ""
3220 "")
3221
3222 (define_expand "vcond<V_ALL:mode><V_ALL_ALT:mode>"
3223 [(match_operand:V_ALL 0 "register_operand")
3224 (match_operand:V_ALL 1 "gcn_vop3_operand")
3225 (match_operand:V_ALL 2 "gcn_alu_operand")
3226 (match_operator 3 "gcn_fp_compare_operator"
3227 [(match_operand:V_ALL_ALT 4 "gcn_alu_operand")
3228 (match_operand:V_ALL_ALT 5 "gcn_vop3_operand")])]
3229 ""
3230 {
3231 rtx tmp = gen_reg_rtx (DImode);
3232 emit_insn (gen_vec_cmp<V_ALL_ALT:mode>di
3233 (tmp, operands[3], operands[4], operands[5]));
3234 emit_insn (gen_vcond_mask_<V_ALL:mode>di
3235 (operands[0], operands[1], operands[2], tmp));
3236 DONE;
3237 })
3238
3239 (define_expand "vcond<V_ALL:mode><V_ALL_ALT:mode>_exec"
3240 [(match_operand:V_ALL 0 "register_operand")
3241 (match_operand:V_ALL 1 "gcn_vop3_operand")
3242 (match_operand:V_ALL 2 "gcn_alu_operand")
3243 (match_operator 3 "gcn_fp_compare_operator"
3244 [(match_operand:V_ALL_ALT 4 "gcn_alu_operand")
3245 (match_operand:V_ALL_ALT 5 "gcn_vop3_operand")])
3246 (match_operand:DI 6 "gcn_exec_reg_operand" "e")]
3247 ""
3248 {
3249 rtx tmp = gen_reg_rtx (DImode);
3250 emit_insn (gen_vec_cmp<V_ALL_ALT:mode>di_exec
3251 (tmp, operands[3], operands[4], operands[5], operands[6]));
3252 emit_insn (gen_vcond_mask_<V_ALL:mode>di
3253 (operands[0], operands[1], operands[2], tmp));
3254 DONE;
3255 })
3256
3257 (define_expand "vcondu<V_ALL:mode><V_INT:mode>"
3258 [(match_operand:V_ALL 0 "register_operand")
3259 (match_operand:V_ALL 1 "gcn_vop3_operand")
3260 (match_operand:V_ALL 2 "gcn_alu_operand")
3261 (match_operator 3 "gcn_fp_compare_operator"
3262 [(match_operand:V_INT 4 "gcn_alu_operand")
3263 (match_operand:V_INT 5 "gcn_vop3_operand")])]
3264 ""
3265 {
3266 rtx tmp = gen_reg_rtx (DImode);
3267 emit_insn (gen_vec_cmpu<V_INT:mode>di
3268 (tmp, operands[3], operands[4], operands[5]));
3269 emit_insn (gen_vcond_mask_<V_ALL:mode>di
3270 (operands[0], operands[1], operands[2], tmp));
3271 DONE;
3272 })
3273
3274 (define_expand "vcondu<V_ALL:mode><V_INT:mode>_exec"
3275 [(match_operand:V_ALL 0 "register_operand")
3276 (match_operand:V_ALL 1 "gcn_vop3_operand")
3277 (match_operand:V_ALL 2 "gcn_alu_operand")
3278 (match_operator 3 "gcn_fp_compare_operator"
3279 [(match_operand:V_INT 4 "gcn_alu_operand")
3280 (match_operand:V_INT 5 "gcn_vop3_operand")])
3281 (match_operand:DI 6 "gcn_exec_reg_operand" "e")]
3282 ""
3283 {
3284 rtx tmp = gen_reg_rtx (DImode);
3285 emit_insn (gen_vec_cmpu<V_INT:mode>di_exec
3286 (tmp, operands[3], operands[4], operands[5], operands[6]));
3287 emit_insn (gen_vcond_mask_<V_ALL:mode>di
3288 (operands[0], operands[1], operands[2], tmp));
3289 DONE;
3290 })
3291
3292 ;; }}}
3293 ;; {{{ Fully masked loop support
3294
3295 (define_expand "while_ultsidi"
3296 [(match_operand:DI 0 "register_operand")
3297 (match_operand:SI 1 "")
3298 (match_operand:SI 2 "")
3299 (match_operand:SI 3 "")]
3300 ""
3301 {
3302 if (GET_CODE (operands[1]) != CONST_INT
3303 || GET_CODE (operands[2]) != CONST_INT)
3304 {
3305 rtx _0_1_2_3 = gen_rtx_REG (V64SImode, VGPR_REGNO (1));
3306 rtx tmp = _0_1_2_3;
3307 if (GET_CODE (operands[1]) != CONST_INT
3308 || INTVAL (operands[1]) != 0)
3309 {
3310 tmp = gen_reg_rtx (V64SImode);
3311 emit_insn (gen_addv64si3_dup (tmp, _0_1_2_3, operands[1]));
3312 }
3313 emit_insn (gen_vec_cmpv64sidi_dup (operands[0],
3314 gen_rtx_GT (VOIDmode, 0, 0),
3315 operands[2], tmp));
3316 }
3317 else
3318 {
3319 HOST_WIDE_INT diff = INTVAL (operands[2]) - INTVAL (operands[1]);
3320 HOST_WIDE_INT mask = (diff >= 64 ? -1
3321 : ~((unsigned HOST_WIDE_INT)-1 << diff));
3322 emit_move_insn (operands[0], gen_rtx_CONST_INT (VOIDmode, mask));
3323 }
3324 if (INTVAL (operands[3]) < 64)
3325 emit_insn (gen_anddi3 (operands[0], operands[0],
3326 gen_rtx_CONST_INT (VOIDmode,
3327 ~((unsigned HOST_WIDE_INT)-1
3328 << INTVAL (operands[3])))));
3329 DONE;
3330 })
3331
3332 (define_expand "maskload<mode>di"
3333 [(match_operand:V_ALL 0 "register_operand")
3334 (match_operand:V_ALL 1 "memory_operand")
3335 (match_operand 2 "")]
3336 ""
3337 {
3338 rtx exec = force_reg (DImode, operands[2]);
3339 rtx addr = gcn_expand_scalar_to_vector_address
3340 (<MODE>mode, exec, operands[1], gen_rtx_SCRATCH (<VnDI>mode));
3341 rtx as = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
3342 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
3343
3344 /* Masked lanes are required to hold zero. */
3345 emit_move_insn (operands[0], gcn_vec_constant (<MODE>mode, 0));
3346
3347 emit_insn (gen_gather<mode>_expr_exec (operands[0], addr, as, v,
3348 operands[0], exec));
3349 DONE;
3350 })
3351
3352 (define_expand "maskstore<mode>di"
3353 [(match_operand:V_ALL 0 "memory_operand")
3354 (match_operand:V_ALL 1 "register_operand")
3355 (match_operand 2 "")]
3356 ""
3357 {
3358 rtx exec = force_reg (DImode, operands[2]);
3359 rtx addr = gcn_expand_scalar_to_vector_address
3360 (<MODE>mode, exec, operands[0], gen_rtx_SCRATCH (<VnDI>mode));
3361 rtx as = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
3362 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
3363 emit_insn (gen_scatter<mode>_expr_exec (addr, operands[1], as, v, exec));
3364 DONE;
3365 })
3366
3367 (define_expand "mask_gather_load<mode><vnsi>"
3368 [(match_operand:V_ALL 0 "register_operand")
3369 (match_operand:DI 1 "register_operand")
3370 (match_operand:<VnSI> 2 "register_operand")
3371 (match_operand 3 "immediate_operand")
3372 (match_operand:SI 4 "gcn_alu_operand")
3373 (match_operand:DI 5 "")]
3374 ""
3375 {
3376 rtx exec = force_reg (DImode, operands[5]);
3377
3378 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1],
3379 operands[2], operands[4],
3380 INTVAL (operands[3]), exec);
3381
3382 /* Masked lanes are required to hold zero. */
3383 emit_move_insn (operands[0], gcn_vec_constant (<MODE>mode, 0));
3384
3385 if (GET_MODE (addr) == <VnDI>mode)
3386 emit_insn (gen_gather<mode>_insn_1offset_exec (operands[0], addr,
3387 const0_rtx, const0_rtx,
3388 const0_rtx, operands[0],
3389 exec));
3390 else
3391 emit_insn (gen_gather<mode>_insn_2offsets_exec (operands[0], operands[1],
3392 addr, const0_rtx,
3393 const0_rtx, const0_rtx,
3394 operands[0], exec));
3395 DONE;
3396 })
3397
3398 (define_expand "mask_scatter_store<mode><vnsi>"
3399 [(match_operand:DI 0 "register_operand")
3400 (match_operand:<VnSI> 1 "register_operand")
3401 (match_operand 2 "immediate_operand")
3402 (match_operand:SI 3 "gcn_alu_operand")
3403 (match_operand:V_ALL 4 "register_operand")
3404 (match_operand:DI 5 "")]
3405 ""
3406 {
3407 rtx exec = force_reg (DImode, operands[5]);
3408
3409 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0],
3410 operands[1], operands[3],
3411 INTVAL (operands[2]), exec);
3412
3413 if (GET_MODE (addr) == <VnDI>mode)
3414 emit_insn (gen_scatter<mode>_insn_1offset_exec (addr, const0_rtx,
3415 operands[4], const0_rtx,
3416 const0_rtx,
3417 exec));
3418 else
3419 emit_insn (gen_scatter<mode>_insn_2offsets_exec (operands[0], addr,
3420 const0_rtx, operands[4],
3421 const0_rtx, const0_rtx,
3422 exec));
3423 DONE;
3424 })
3425
3426 (define_code_iterator cond_op [plus minus mult])
3427
3428 (define_expand "cond_<expander><mode>"
3429 [(match_operand:V_ALL 0 "register_operand")
3430 (match_operand:DI 1 "register_operand")
3431 (cond_op:V_ALL
3432 (match_operand:V_ALL 2 "gcn_alu_operand")
3433 (match_operand:V_ALL 3 "gcn_alu_operand"))
3434 (match_operand:V_ALL 4 "register_operand")]
3435 ""
3436 {
3437 operands[1] = force_reg (DImode, operands[1]);
3438 operands[2] = force_reg (<MODE>mode, operands[2]);
3439
3440 emit_insn (gen_<expander><mode>3_exec (operands[0], operands[2],
3441 operands[3], operands[4],
3442 operands[1]));
3443 DONE;
3444 })
3445
3446 ;; TODO smin umin smax umax
3447 (define_code_iterator cond_bitop [and ior xor])
3448
3449 (define_expand "cond_<expander><mode>"
3450 [(match_operand:V_INT 0 "register_operand")
3451 (match_operand:DI 1 "register_operand")
3452 (cond_bitop:V_INT
3453 (match_operand:V_INT 2 "gcn_alu_operand")
3454 (match_operand:V_INT 3 "gcn_alu_operand"))
3455 (match_operand:V_INT 4 "register_operand")]
3456 ""
3457 {
3458 operands[1] = force_reg (DImode, operands[1]);
3459 operands[2] = force_reg (<MODE>mode, operands[2]);
3460
3461 emit_insn (gen_<expander><mode>3_exec (operands[0], operands[2],
3462 operands[3], operands[4],
3463 operands[1]));
3464 DONE;
3465 })
3466
3467 ;; }}}
3468 ;; {{{ Vector reductions
3469
3470 (define_int_iterator REDUC_UNSPEC [UNSPEC_SMIN_DPP_SHR UNSPEC_SMAX_DPP_SHR
3471 UNSPEC_UMIN_DPP_SHR UNSPEC_UMAX_DPP_SHR
3472 UNSPEC_PLUS_DPP_SHR
3473 UNSPEC_AND_DPP_SHR
3474 UNSPEC_IOR_DPP_SHR UNSPEC_XOR_DPP_SHR])
3475
3476 (define_int_iterator REDUC_2REG_UNSPEC [UNSPEC_PLUS_DPP_SHR
3477 UNSPEC_AND_DPP_SHR
3478 UNSPEC_IOR_DPP_SHR UNSPEC_XOR_DPP_SHR])
3479
3480 ; FIXME: Isn't there a better way of doing this?
3481 (define_int_attr reduc_unspec [(UNSPEC_SMIN_DPP_SHR "UNSPEC_SMIN_DPP_SHR")
3482 (UNSPEC_SMAX_DPP_SHR "UNSPEC_SMAX_DPP_SHR")
3483 (UNSPEC_UMIN_DPP_SHR "UNSPEC_UMIN_DPP_SHR")
3484 (UNSPEC_UMAX_DPP_SHR "UNSPEC_UMAX_DPP_SHR")
3485 (UNSPEC_PLUS_DPP_SHR "UNSPEC_PLUS_DPP_SHR")
3486 (UNSPEC_AND_DPP_SHR "UNSPEC_AND_DPP_SHR")
3487 (UNSPEC_IOR_DPP_SHR "UNSPEC_IOR_DPP_SHR")
3488 (UNSPEC_XOR_DPP_SHR "UNSPEC_XOR_DPP_SHR")])
3489
3490 (define_int_attr reduc_op [(UNSPEC_SMIN_DPP_SHR "smin")
3491 (UNSPEC_SMAX_DPP_SHR "smax")
3492 (UNSPEC_UMIN_DPP_SHR "umin")
3493 (UNSPEC_UMAX_DPP_SHR "umax")
3494 (UNSPEC_PLUS_DPP_SHR "plus")
3495 (UNSPEC_AND_DPP_SHR "and")
3496 (UNSPEC_IOR_DPP_SHR "ior")
3497 (UNSPEC_XOR_DPP_SHR "xor")])
3498
3499 (define_int_attr reduc_insn [(UNSPEC_SMIN_DPP_SHR "v_min%i0")
3500 (UNSPEC_SMAX_DPP_SHR "v_max%i0")
3501 (UNSPEC_UMIN_DPP_SHR "v_min%u0")
3502 (UNSPEC_UMAX_DPP_SHR "v_max%u0")
3503 (UNSPEC_PLUS_DPP_SHR "v_add%U0")
3504 (UNSPEC_AND_DPP_SHR "v_and%B0")
3505 (UNSPEC_IOR_DPP_SHR "v_or%B0")
3506 (UNSPEC_XOR_DPP_SHR "v_xor%B0")])
3507
3508 (define_expand "reduc_<reduc_op>_scal_<mode>"
3509 [(set (match_operand:<SCALAR_MODE> 0 "register_operand")
3510 (unspec:<SCALAR_MODE>
3511 [(match_operand:V_ALL 1 "register_operand")]
3512 REDUC_UNSPEC))]
3513 ""
3514 {
3515 rtx tmp = gcn_expand_reduc_scalar (<MODE>mode, operands[1],
3516 <reduc_unspec>);
3517
3518 rtx last_lane = GEN_INT (GET_MODE_NUNITS (<MODE>mode) - 1);
3519 emit_insn (gen_vec_extract<mode><scalar_mode> (operands[0], tmp,
3520 last_lane));
3521
3522 DONE;
3523 })
3524
3525 ;; Warning: This "-ffast-math" implementation converts in-order reductions
3526 ;; into associative reductions. It's also used where OpenMP or
3527 ;; OpenACC paralellization has already broken the in-order semantics.
3528 (define_expand "fold_left_plus_<mode>"
3529 [(match_operand:<SCALAR_MODE> 0 "register_operand")
3530 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand")
3531 (match_operand:V_FP 2 "gcn_alu_operand")]
3532 "can_create_pseudo_p ()
3533 && (flag_openacc || flag_openmp
3534 || flag_associative_math)"
3535 {
3536 rtx dest = operands[0];
3537 rtx scalar = operands[1];
3538 rtx vector = operands[2];
3539 rtx tmp = gen_reg_rtx (<SCALAR_MODE>mode);
3540
3541 emit_insn (gen_reduc_plus_scal_<mode> (tmp, vector));
3542 emit_insn (gen_add<scalar_mode>3 (dest, scalar, tmp));
3543 DONE;
3544 })
3545
3546 (define_insn "*<reduc_op>_dpp_shr_<mode>"
3547 [(set (match_operand:V_1REG 0 "register_operand" "=v")
3548 (unspec:V_1REG
3549 [(match_operand:V_1REG 1 "register_operand" "v")
3550 (match_operand:V_1REG 2 "register_operand" "v")
3551 (match_operand:SI 3 "const_int_operand" "n")]
3552 REDUC_UNSPEC))]
3553 ; GCN3 requires a carry out, GCN5 not
3554 "!(TARGET_GCN3 && SCALAR_INT_MODE_P (<SCALAR_MODE>mode)
3555 && <reduc_unspec> == UNSPEC_PLUS_DPP_SHR)"
3556 {
3557 return gcn_expand_dpp_shr_insn (<MODE>mode, "<reduc_insn>",
3558 <reduc_unspec>, INTVAL (operands[3]));
3559 }
3560 [(set_attr "type" "vop_dpp")
3561 (set_attr "length" "8")])
3562
3563 (define_insn_and_split "*<reduc_op>_dpp_shr_<mode>"
3564 [(set (match_operand:V_DI 0 "register_operand" "=v")
3565 (unspec:V_DI
3566 [(match_operand:V_DI 1 "register_operand" "v")
3567 (match_operand:V_DI 2 "register_operand" "v")
3568 (match_operand:SI 3 "const_int_operand" "n")]
3569 REDUC_2REG_UNSPEC))]
3570 ""
3571 "#"
3572 "reload_completed"
3573 [(set (match_dup 4)
3574 (unspec:<VnSI>
3575 [(match_dup 6) (match_dup 8) (match_dup 3)] REDUC_2REG_UNSPEC))
3576 (set (match_dup 5)
3577 (unspec:<VnSI>
3578 [(match_dup 7) (match_dup 9) (match_dup 3)] REDUC_2REG_UNSPEC))]
3579 {
3580 operands[4] = gcn_operand_part (<MODE>mode, operands[0], 0);
3581 operands[5] = gcn_operand_part (<MODE>mode, operands[0], 1);
3582 operands[6] = gcn_operand_part (<MODE>mode, operands[1], 0);
3583 operands[7] = gcn_operand_part (<MODE>mode, operands[1], 1);
3584 operands[8] = gcn_operand_part (<MODE>mode, operands[2], 0);
3585 operands[9] = gcn_operand_part (<MODE>mode, operands[2], 1);
3586 }
3587 [(set_attr "type" "vmult")
3588 (set_attr "length" "16")])
3589
3590 ; Special cases for addition.
3591
3592 (define_insn "*plus_carry_dpp_shr_<mode>"
3593 [(set (match_operand:V_INT_1REG 0 "register_operand" "=v")
3594 (unspec:V_INT_1REG
3595 [(match_operand:V_INT_1REG 1 "register_operand" "v")
3596 (match_operand:V_INT_1REG 2 "register_operand" "v")
3597 (match_operand:SI 3 "const_int_operand" "n")]
3598 UNSPEC_PLUS_CARRY_DPP_SHR))
3599 (clobber (reg:DI VCC_REG))]
3600 ""
3601 {
3602 return gcn_expand_dpp_shr_insn (<VnSI>mode, "v_add%^_u32",
3603 UNSPEC_PLUS_CARRY_DPP_SHR,
3604 INTVAL (operands[3]));
3605 }
3606 [(set_attr "type" "vop_dpp")
3607 (set_attr "length" "8")])
3608
3609 (define_insn "*plus_carry_in_dpp_shr_<mode>"
3610 [(set (match_operand:V_SI 0 "register_operand" "=v")
3611 (unspec:V_SI
3612 [(match_operand:V_SI 1 "register_operand" "v")
3613 (match_operand:V_SI 2 "register_operand" "v")
3614 (match_operand:SI 3 "const_int_operand" "n")
3615 (match_operand:DI 4 "register_operand" "cV")]
3616 UNSPEC_PLUS_CARRY_IN_DPP_SHR))
3617 (clobber (reg:DI VCC_REG))]
3618 ""
3619 {
3620 return gcn_expand_dpp_shr_insn (<MODE>mode, "v_addc%^_u32",
3621 UNSPEC_PLUS_CARRY_IN_DPP_SHR,
3622 INTVAL (operands[3]));
3623 }
3624 [(set_attr "type" "vop_dpp")
3625 (set_attr "length" "8")])
3626
3627 (define_insn_and_split "*plus_carry_dpp_shr_<mode>"
3628 [(set (match_operand:V_DI 0 "register_operand" "=v")
3629 (unspec:V_DI
3630 [(match_operand:V_DI 1 "register_operand" "v")
3631 (match_operand:V_DI 2 "register_operand" "v")
3632 (match_operand:SI 3 "const_int_operand" "n")]
3633 UNSPEC_PLUS_CARRY_DPP_SHR))
3634 (clobber (reg:DI VCC_REG))]
3635 ""
3636 "#"
3637 "reload_completed"
3638 [(parallel [(set (match_dup 4)
3639 (unspec:<VnSI>
3640 [(match_dup 6) (match_dup 8) (match_dup 3)]
3641 UNSPEC_PLUS_CARRY_DPP_SHR))
3642 (clobber (reg:DI VCC_REG))])
3643 (parallel [(set (match_dup 5)
3644 (unspec:<VnSI>
3645 [(match_dup 7) (match_dup 9) (match_dup 3) (reg:DI VCC_REG)]
3646 UNSPEC_PLUS_CARRY_IN_DPP_SHR))
3647 (clobber (reg:DI VCC_REG))])]
3648 {
3649 operands[4] = gcn_operand_part (<MODE>mode, operands[0], 0);
3650 operands[5] = gcn_operand_part (<MODE>mode, operands[0], 1);
3651 operands[6] = gcn_operand_part (<MODE>mode, operands[1], 0);
3652 operands[7] = gcn_operand_part (<MODE>mode, operands[1], 1);
3653 operands[8] = gcn_operand_part (<MODE>mode, operands[2], 0);
3654 operands[9] = gcn_operand_part (<MODE>mode, operands[2], 1);
3655 }
3656 [(set_attr "type" "vmult")
3657 (set_attr "length" "16")])
3658
3659 ;; }}}
3660 ;; {{{ Miscellaneous
3661
3662 (define_expand "vec_series<mode>"
3663 [(match_operand:V_SI 0 "register_operand")
3664 (match_operand:SI 1 "gcn_alu_operand")
3665 (match_operand:SI 2 "gcn_alu_operand")]
3666 ""
3667 {
3668 rtx tmp = gen_reg_rtx (<MODE>mode);
3669 rtx v1 = gen_rtx_REG (<MODE>mode, VGPR_REGNO (1));
3670
3671 emit_insn (gen_mul<mode>3_dup (tmp, v1, operands[2]));
3672 emit_insn (gen_add<mode>3_dup (operands[0], tmp, operands[1]));
3673 DONE;
3674 })
3675
3676 (define_expand "vec_series<mode>"
3677 [(match_operand:V_DI 0 "register_operand")
3678 (match_operand:DI 1 "gcn_alu_operand")
3679 (match_operand:DI 2 "gcn_alu_operand")]
3680 ""
3681 {
3682 rtx tmp = gen_reg_rtx (<MODE>mode);
3683 rtx v1 = gen_rtx_REG (<VnSI>mode, VGPR_REGNO (1));
3684 rtx op1vec = gen_reg_rtx (<MODE>mode);
3685
3686 emit_insn (gen_mul<mode>3_zext_dup2 (tmp, v1, operands[2]));
3687 emit_insn (gen_vec_duplicate<mode> (op1vec, operands[1]));
3688 emit_insn (gen_add<mode>3 (operands[0], tmp, op1vec));
3689 DONE;
3690 })
3691
3692 ;; }}}