]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/gcn/gcn-valu.md
Update copyright years.
[thirdparty/gcc.git] / gcc / config / gcn / gcn-valu.md
1 ;; Copyright (C) 2016-2024 Free Software Foundation, Inc.
2
3 ;; This file is free software; you can redistribute it and/or modify it under
4 ;; the terms of the GNU General Public License as published by the Free
5 ;; Software Foundation; either version 3 of the License, or (at your option)
6 ;; any later version.
7
8 ;; This file is distributed in the hope that it will be useful, but WITHOUT
9 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 ;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 ;; for more details.
12
13 ;; You should have received a copy of the GNU General Public License
14 ;; along with GCC; see the file COPYING3. If not see
15 ;; <http://www.gnu.org/licenses/>.
16
17 ;; {{{ Vector iterators
18 ; SV iterators include both scalar and vector modes.
19
20 ; Vector modes for specific types
21 (define_mode_iterator V_QI
22 [V2QI V4QI V8QI V16QI V32QI V64QI])
23 (define_mode_iterator V_HI
24 [V2HI V4HI V8HI V16HI V32HI V64HI])
25 (define_mode_iterator V_HF
26 [V2HF V4HF V8HF V16HF V32HF V64HF])
27 (define_mode_iterator V_SI
28 [V2SI V4SI V8SI V16SI V32SI V64SI])
29 (define_mode_iterator V_SF
30 [V2SF V4SF V8SF V16SF V32SF V64SF])
31 (define_mode_iterator V_DI
32 [V2DI V4DI V8DI V16DI V32DI V64DI])
33 (define_mode_iterator V_DF
34 [V2DF V4DF V8DF V16DF V32DF V64DF])
35
36 ; Vector modes for sub-dword modes
37 (define_mode_iterator V_QIHI
38 [V2QI V2HI
39 V4QI V4HI
40 V8QI V8HI
41 V16QI V16HI
42 V32QI V32HI
43 V64QI V64HI])
44
45 ; Vector modes for one vector register
46 (define_mode_iterator V_1REG
47 [V2QI V2HI V2SI V2HF V2SF
48 V4QI V4HI V4SI V4HF V4SF
49 V8QI V8HI V8SI V8HF V8SF
50 V16QI V16HI V16SI V16HF V16SF
51 V32QI V32HI V32SI V32HF V32SF
52 V64QI V64HI V64SI V64HF V64SF])
53 (define_mode_iterator V_1REG_ALT
54 [V2QI V2HI V2SI V2HF V2SF
55 V4QI V4HI V4SI V4HF V4SF
56 V8QI V8HI V8SI V8HF V8SF
57 V16QI V16HI V16SI V16HF V16SF
58 V32QI V32HI V32SI V32HF V32SF
59 V64QI V64HI V64SI V64HF V64SF])
60
61 (define_mode_iterator V_INT_1REG
62 [V2QI V2HI V2SI
63 V4QI V4HI V4SI
64 V8QI V8HI V8SI
65 V16QI V16HI V16SI
66 V32QI V32HI V32SI
67 V64QI V64HI V64SI])
68 (define_mode_iterator V_INT_1REG_ALT
69 [V2QI V2HI V2SI
70 V4QI V4HI V4SI
71 V8QI V8HI V8SI
72 V16QI V16HI V16SI
73 V32QI V32HI V32SI
74 V64QI V64HI V64SI])
75 (define_mode_iterator V_FP_1REG
76 [V2HF V2SF
77 V4HF V4SF
78 V8HF V8SF
79 V16HF V16SF
80 V32HF V32SF
81 V64HF V64SF])
82
83 ; Vector modes for two vector registers
84 (define_mode_iterator V_2REG
85 [V2DI V2DF
86 V4DI V4DF
87 V8DI V8DF
88 V16DI V16DF
89 V32DI V32DF
90 V64DI V64DF])
91 (define_mode_iterator V_2REG_ALT
92 [V2DI V2DF
93 V4DI V4DF
94 V8DI V8DF
95 V16DI V16DF
96 V32DI V32DF
97 V64DI V64DF])
98
99 ; Vector modes for four vector registers
100 (define_mode_iterator V_4REG [V2TI V4TI V8TI V16TI V32TI V64TI])
101 (define_mode_iterator V_4REG_ALT [V2TI V4TI V8TI V16TI V32TI V64TI])
102
103 ; Vector modes with native support
104 (define_mode_iterator V_noQI
105 [V2HI V2HF V2SI V2SF V2DI V2DF
106 V4HI V4HF V4SI V4SF V4DI V4DF
107 V8HI V8HF V8SI V8SF V8DI V8DF
108 V16HI V16HF V16SI V16SF V16DI V16DF
109 V32HI V32HF V32SI V32SF V32DI V32DF
110 V64HI V64HF V64SI V64SF V64DI V64DF])
111 (define_mode_iterator V_noHI
112 [V2HF V2SI V2SF V2DI V2DF
113 V4HF V4SI V4SF V4DI V4DF
114 V8HF V8SI V8SF V8DI V8DF
115 V16HF V16SI V16SF V16DI V16DF
116 V32HF V32SI V32SF V32DI V32DF
117 V64HF V64SI V64SF V64DI V64DF])
118
119 (define_mode_iterator V_INT_noQI
120 [V2HI V2SI V2DI
121 V4HI V4SI V4DI
122 V8HI V8SI V8DI
123 V16HI V16SI V16DI
124 V32HI V32SI V32DI
125 V64HI V64SI V64DI])
126 (define_mode_iterator V_INT_noHI
127 [V2SI V2DI
128 V4SI V4DI
129 V8SI V8DI
130 V16SI V16DI
131 V32SI V32DI
132 V64SI V64DI])
133
134 (define_mode_iterator SV_SFDF
135 [SF DF
136 V2SF V2DF
137 V4SF V4DF
138 V8SF V8DF
139 V16SF V16DF
140 V32SF V32DF
141 V64SF V64DF])
142
143 ; All modes in which we want to do more than just moves.
144 (define_mode_iterator V_ALL
145 [V2QI V2HI V2HF V2SI V2SF V2DI V2DF
146 V4QI V4HI V4HF V4SI V4SF V4DI V4DF
147 V8QI V8HI V8HF V8SI V8SF V8DI V8DF
148 V16QI V16HI V16HF V16SI V16SF V16DI V16DF
149 V32QI V32HI V32HF V32SI V32SF V32DI V32DF
150 V64QI V64HI V64HF V64SI V64SF V64DI V64DF])
151 (define_mode_iterator V_ALL_ALT
152 [V2QI V2HI V2HF V2SI V2SF V2DI V2DF
153 V4QI V4HI V4HF V4SI V4SF V4DI V4DF
154 V8QI V8HI V8HF V8SI V8SF V8DI V8DF
155 V16QI V16HI V16HF V16SI V16SF V16DI V16DF
156 V32QI V32HI V32HF V32SI V32SF V32DI V32DF
157 V64QI V64HI V64HF V64SI V64SF V64DI V64DF])
158
159 (define_mode_iterator V_INT
160 [V2QI V2HI V2SI V2DI
161 V4QI V4HI V4SI V4DI
162 V8QI V8HI V8SI V8DI
163 V16QI V16HI V16SI V16DI
164 V32QI V32HI V32SI V32DI
165 V64QI V64HI V64SI V64DI])
166 (define_mode_iterator V_FP
167 [V2HF V2SF V2DF
168 V4HF V4SF V4DF
169 V8HF V8SF V8DF
170 V16HF V16SF V16DF
171 V32HF V32SF V32DF
172 V64HF V64SF V64DF])
173 (define_mode_iterator SV_FP
174 [HF SF DF
175 V2HF V2SF V2DF
176 V4HF V4SF V4DF
177 V8HF V8SF V8DF
178 V16HF V16SF V16DF
179 V32HF V32SF V32DF
180 V64HF V64SF V64DF])
181
182 ; All modes that need moves, including those without many insns.
183 (define_mode_iterator V_MOV
184 [V2QI V2HI V2HF V2SI V2SF V2DI V2DF V2TI
185 V4QI V4HI V4HF V4SI V4SF V4DI V4DF V4TI
186 V8QI V8HI V8HF V8SI V8SF V8DI V8DF V8TI
187 V16QI V16HI V16HF V16SI V16SF V16DI V16DF V16TI
188 V32QI V32HI V32HF V32SI V32SF V32DI V32DF V32TI
189 V64QI V64HI V64HF V64SI V64SF V64DI V64DF V64TI])
190 (define_mode_iterator V_MOV_ALT
191 [V2QI V2HI V2HF V2SI V2SF V2DI V2DF V2TI
192 V4QI V4HI V4HF V4SI V4SF V4DI V4DF V4TI
193 V8QI V8HI V8HF V8SI V8SF V8DI V8DF V8TI
194 V16QI V16HI V16HF V16SI V16SF V16DI V16DF V16TI
195 V32QI V32HI V32HF V32SI V32SF V32DI V32DF V32TI
196 V64QI V64HI V64HF V64SI V64SF V64DI V64DF V64TI])
197
198 (define_mode_attr scalar_mode
199 [(QI "qi") (HI "hi") (SI "si") (TI "ti")
200 (HF "hf") (SF "sf") (DI "di") (DF "df")
201 (V2QI "qi") (V2HI "hi") (V2SI "si") (V2TI "ti")
202 (V2HF "hf") (V2SF "sf") (V2DI "di") (V2DF "df")
203 (V4QI "qi") (V4HI "hi") (V4SI "si") (V4TI "ti")
204 (V4HF "hf") (V4SF "sf") (V4DI "di") (V4DF "df")
205 (V8QI "qi") (V8HI "hi") (V8SI "si") (V8TI "ti")
206 (V8HF "hf") (V8SF "sf") (V8DI "di") (V8DF "df")
207 (V16QI "qi") (V16HI "hi") (V16SI "si") (V16TI "ti")
208 (V16HF "hf") (V16SF "sf") (V16DI "di") (V16DF "df")
209 (V32QI "qi") (V32HI "hi") (V32SI "si") (V32TI "ti")
210 (V32HF "hf") (V32SF "sf") (V32DI "di") (V32DF "df")
211 (V64QI "qi") (V64HI "hi") (V64SI "si") (V64TI "ti")
212 (V64HF "hf") (V64SF "sf") (V64DI "di") (V64DF "df")])
213
214 (define_mode_attr SCALAR_MODE
215 [(QI "QI") (HI "HI") (SI "SI") (TI "TI")
216 (HF "HF") (SF "SF") (DI "DI") (DF "DF")
217 (V2QI "QI") (V2HI "HI") (V2SI "SI") (V2TI "TI")
218 (V2HF "HF") (V2SF "SF") (V2DI "DI") (V2DF "DF")
219 (V4QI "QI") (V4HI "HI") (V4SI "SI") (V4TI "TI")
220 (V4HF "HF") (V4SF "SF") (V4DI "DI") (V4DF "DF")
221 (V8QI "QI") (V8HI "HI") (V8SI "SI") (V8TI "TI")
222 (V8HF "HF") (V8SF "SF") (V8DI "DI") (V8DF "DF")
223 (V16QI "QI") (V16HI "HI") (V16SI "SI") (V16TI "TI")
224 (V16HF "HF") (V16SF "SF") (V16DI "DI") (V16DF "DF")
225 (V32QI "QI") (V32HI "HI") (V32SI "SI") (V32TI "TI")
226 (V32HF "HF") (V32SF "SF") (V32DI "DI") (V32DF "DF")
227 (V64QI "QI") (V64HI "HI") (V64SI "SI") (V64TI "TI")
228 (V64HF "HF") (V64SF "SF") (V64DI "DI") (V64DF "DF")])
229
230 (define_mode_attr vnsi
231 [(QI "si") (HI "si") (SI "si") (TI "si")
232 (HF "si") (SF "si") (DI "si") (DF "si")
233 (V2QI "v2si") (V2HI "v2si") (V2HF "v2si") (V2SI "v2si")
234 (V2SF "v2si") (V2DI "v2si") (V2DF "v2si") (V2TI "v2si")
235 (V4QI "v4si") (V4HI "v4si") (V4HF "v4si") (V4SI "v4si")
236 (V4SF "v4si") (V4DI "v4si") (V4DF "v4si") (V4TI "v4si")
237 (V8QI "v8si") (V8HI "v8si") (V8HF "v8si") (V8SI "v8si")
238 (V8SF "v8si") (V8DI "v8si") (V8DF "v8si") (V8TI "v8si")
239 (V16QI "v16si") (V16HI "v16si") (V16HF "v16si") (V16SI "v16si")
240 (V16SF "v16si") (V16DI "v16si") (V16DF "v16si") (V16TI "v16si")
241 (V32QI "v32si") (V32HI "v32si") (V32HF "v32si") (V32SI "v32si")
242 (V32SF "v32si") (V32DI "v32si") (V32DF "v32si") (V32TI "v32si")
243 (V64QI "v64si") (V64HI "v64si") (V64HF "v64si") (V64SI "v64si")
244 (V64SF "v64si") (V64DI "v64si") (V64DF "v64si") (V64TI "v64si")])
245
246 (define_mode_attr VnSI
247 [(QI "SI") (HI "SI") (SI "SI") (TI "SI")
248 (HF "SI") (SF "SI") (DI "SI") (DF "SI")
249 (V2QI "V2SI") (V2HI "V2SI") (V2HF "V2SI") (V2SI "V2SI")
250 (V2SF "V2SI") (V2DI "V2SI") (V2DF "V2SI") (V2TI "V2SI")
251 (V4QI "V4SI") (V4HI "V4SI") (V4HF "V4SI") (V4SI "V4SI")
252 (V4SF "V4SI") (V4DI "V4SI") (V4DF "V4SI") (V4TI "V4SI")
253 (V8QI "V8SI") (V8HI "V8SI") (V8HF "V8SI") (V8SI "V8SI")
254 (V8SF "V8SI") (V8DI "V8SI") (V8DF "V8SI") (V8TI "V8SI")
255 (V16QI "V16SI") (V16HI "V16SI") (V16HF "V16SI") (V16SI "V16SI")
256 (V16SF "V16SI") (V16DI "V16SI") (V16DF "V16SI") (V16TI "V16SI")
257 (V32QI "V32SI") (V32HI "V32SI") (V32HF "V32SI") (V32SI "V32SI")
258 (V32SF "V32SI") (V32DI "V32SI") (V32DF "V32SI") (V32TI "V32SI")
259 (V64QI "V64SI") (V64HI "V64SI") (V64HF "V64SI") (V64SI "V64SI")
260 (V64SF "V64SI") (V64DI "V64SI") (V64DF "V64SI") (V64TI "V64SI")])
261
262 (define_mode_attr vndi
263 [(V2QI "v2di") (V2HI "v2di") (V2HF "v2di") (V2SI "v2di")
264 (V2SF "v2di") (V2DI "v2di") (V2DF "v2di") (V2TI "v2di")
265 (V4QI "v4di") (V4HI "v4di") (V4HF "v4di") (V4SI "v4di")
266 (V4SF "v4di") (V4DI "v4di") (V4DF "v4di") (V4TI "v4di")
267 (V8QI "v8di") (V8HI "v8di") (V8HF "v8di") (V8SI "v8di")
268 (V8SF "v8di") (V8DI "v8di") (V8DF "v8di") (V8TI "v8di")
269 (V16QI "v16di") (V16HI "v16di") (V16HF "v16di") (V16SI "v16di")
270 (V16SF "v16di") (V16DI "v16di") (V16DF "v16di") (V16TI "v16di")
271 (V32QI "v32di") (V32HI "v32di") (V32HF "v32di") (V32SI "v32di")
272 (V32SF "v32di") (V32DI "v32di") (V32DF "v32di") (V32TI "v32di")
273 (V64QI "v64di") (V64HI "v64di") (V64HF "v64di") (V64SI "v64di")
274 (V64SF "v64di") (V64DI "v64di") (V64DF "v64di") (V64TI "v64di")])
275
276 (define_mode_attr VnDI
277 [(V2QI "V2DI") (V2HI "V2DI") (V2HF "V2DI") (V2SI "V2DI")
278 (V2SF "V2DI") (V2DI "V2DI") (V2DF "V2DI") (V2TI "V2DI")
279 (V4QI "V4DI") (V4HI "V4DI") (V4HF "V4DI") (V4SI "V4DI")
280 (V4SF "V4DI") (V4DI "V4DI") (V4DF "V4DI") (V4TI "V4DI")
281 (V8QI "V8DI") (V8HI "V8DI") (V8HF "V8DI") (V8SI "V8DI")
282 (V8SF "V8DI") (V8DI "V8DI") (V8DF "V8DI") (V8TI "V8DI")
283 (V16QI "V16DI") (V16HI "V16DI") (V16HF "V16DI") (V16SI "V16DI")
284 (V16SF "V16DI") (V16DI "V16DI") (V16DF "V16DI") (V16TI "V16DI")
285 (V32QI "V32DI") (V32HI "V32DI") (V32HF "V32DI") (V32SI "V32DI")
286 (V32SF "V32DI") (V32DI "V32DI") (V32DF "V32DI") (V32TI "V32DI")
287 (V64QI "V64DI") (V64HI "V64DI") (V64HF "V64DI") (V64SI "V64DI")
288 (V64SF "V64DI") (V64DI "V64DI") (V64DF "V64DI") (V64TI "V64DI")])
289
290 (define_mode_attr sdwa
291 [(V2QI "BYTE_0") (V2HI "WORD_0") (V2SI "DWORD")
292 (V4QI "BYTE_0") (V4HI "WORD_0") (V4SI "DWORD")
293 (V8QI "BYTE_0") (V8HI "WORD_0") (V8SI "DWORD")
294 (V16QI "BYTE_0") (V16HI "WORD_0") (V16SI "DWORD")
295 (V32QI "BYTE_0") (V32HI "WORD_0") (V32SI "DWORD")
296 (V64QI "BYTE_0") (V64HI "WORD_0") (V64SI "DWORD")])
297
298 ;; }}}
299 ;; {{{ Substitutions
300
301 (define_subst_attr "exec" "vec_merge"
302 "" "_exec")
303 (define_subst_attr "exec_clobber" "vec_merge_with_clobber"
304 "" "_exec")
305 (define_subst_attr "exec_vcc" "vec_merge_with_vcc"
306 "" "_exec")
307 (define_subst_attr "exec_scatter" "scatter_store"
308 "" "_exec")
309
310 (define_subst "vec_merge"
311 [(set (match_operand:V_MOV 0)
312 (match_operand:V_MOV 1))]
313 ""
314 [(set (match_dup 0)
315 (vec_merge:V_MOV
316 (match_dup 1)
317 (match_operand:V_MOV 3 "gcn_register_or_unspec_operand" "U0")
318 (match_operand:DI 4 "gcn_exec_reg_operand" "e")))])
319
320 (define_subst "vec_merge_with_clobber"
321 [(set (match_operand:V_MOV 0)
322 (match_operand:V_MOV 1))
323 (clobber (match_operand 2))]
324 ""
325 [(set (match_dup 0)
326 (vec_merge:V_MOV
327 (match_dup 1)
328 (match_operand:V_MOV 3 "gcn_register_or_unspec_operand" "U0")
329 (match_operand:DI 4 "gcn_exec_reg_operand" "e")))
330 (clobber (match_dup 2))])
331
332 (define_subst "vec_merge_with_vcc"
333 [(set (match_operand:V_MOV 0)
334 (match_operand:V_MOV 1))
335 (set (match_operand:DI 2)
336 (match_operand:DI 3))]
337 ""
338 [(parallel
339 [(set (match_dup 0)
340 (vec_merge:V_MOV
341 (match_dup 1)
342 (match_operand:V_MOV 4 "gcn_register_or_unspec_operand" "U0")
343 (match_operand:DI 5 "gcn_exec_reg_operand" "e")))
344 (set (match_dup 2)
345 (and:DI (match_dup 3)
346 (reg:DI EXEC_REG)))])])
347
348 (define_subst "scatter_store"
349 [(set (mem:BLK (scratch))
350 (unspec:BLK
351 [(match_operand 0)
352 (match_operand 1)
353 (match_operand 2)
354 (match_operand 3)]
355 UNSPEC_SCATTER))]
356 ""
357 [(set (mem:BLK (scratch))
358 (unspec:BLK
359 [(match_dup 0)
360 (match_dup 1)
361 (match_dup 2)
362 (match_dup 3)
363 (match_operand:DI 4 "gcn_exec_reg_operand" "e")]
364 UNSPEC_SCATTER))])
365
366 ;; }}}
367 ;; {{{ Vector moves
368
369 ; This is the entry point for all vector register moves. Memory accesses can
370 ; come this way also, but will more usually use the reload_in/out,
371 ; gather/scatter, maskload/store, etc.
372
373 (define_expand "mov<mode>"
374 [(set (match_operand:V_MOV 0 "nonimmediate_operand")
375 (match_operand:V_MOV 1 "general_operand"))]
376 ""
377 {
378 /* Bitwise reinterpret casts via SUBREG don't work with GCN vector
379 registers, but we can convert the MEM to a mode that does work. */
380 if (MEM_P (operands[0]) && !SUBREG_P (operands[0])
381 && SUBREG_P (operands[1])
382 && GET_MODE_SIZE (GET_MODE (operands[1]))
383 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[1]))))
384 {
385 rtx src = SUBREG_REG (operands[1]);
386 rtx mem = copy_rtx (operands[0]);
387 PUT_MODE_RAW (mem, GET_MODE (src));
388 emit_move_insn (mem, src);
389 DONE;
390 }
391 if (MEM_P (operands[1]) && !SUBREG_P (operands[1])
392 && SUBREG_P (operands[0])
393 && GET_MODE_SIZE (GET_MODE (operands[0]))
394 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[0]))))
395 {
396 rtx dest = SUBREG_REG (operands[0]);
397 rtx mem = copy_rtx (operands[1]);
398 PUT_MODE_RAW (mem, GET_MODE (dest));
399 emit_move_insn (dest, mem);
400 DONE;
401 }
402
403 /* SUBREG of MEM is not supported. */
404 gcc_assert ((!SUBREG_P (operands[0])
405 || !MEM_P (SUBREG_REG (operands[0])))
406 && (!SUBREG_P (operands[1])
407 || !MEM_P (SUBREG_REG (operands[1]))));
408
409 if (MEM_P (operands[0]) && !lra_in_progress && !reload_completed)
410 {
411 operands[1] = force_reg (<MODE>mode, operands[1]);
412 rtx scratch = gen_rtx_SCRATCH (<VnDI>mode);
413 rtx a = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
414 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
415 rtx expr = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
416 operands[0],
417 scratch);
418 emit_insn (gen_scatter<mode>_expr (expr, operands[1], a, v));
419 DONE;
420 }
421 else if (MEM_P (operands[1]) && !lra_in_progress && !reload_completed)
422 {
423 rtx scratch = gen_rtx_SCRATCH (<VnDI>mode);
424 rtx a = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
425 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
426 rtx expr = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
427 operands[1],
428 scratch);
429 emit_insn (gen_gather<mode>_expr (operands[0], expr, a, v));
430 DONE;
431 }
432 else if ((MEM_P (operands[0]) || MEM_P (operands[1])))
433 {
434 gcc_assert (!reload_completed);
435 rtx scratch = gen_reg_rtx (<VnDI>mode);
436 emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], scratch));
437 DONE;
438 }
439 })
440
441 ; A pseudo instruction that helps LRA use the "U0" constraint.
442
443 (define_insn "mov<mode>_unspec"
444 [(set (match_operand:V_MOV 0 "nonimmediate_operand" "=v")
445 (match_operand:V_MOV 1 "gcn_unspec_operand" " U"))]
446 ""
447 ""
448 [(set_attr "type" "unknown")
449 (set_attr "length" "0")])
450
451 (define_insn "*mov<mode>"
452 [(set (match_operand:V_1REG 0 "nonimmediate_operand")
453 (match_operand:V_1REG 1 "general_operand"))]
454 ""
455 {@ [cons: =0, 1; attrs: type, length, gcn_version]
456 [v ,vA;vop1 ,4,* ] v_mov_b32\t%0, %1
457 [v ,B ;vop1 ,8,* ] ^
458 [v ,a ;vop3p_mai,8,* ] v_accvgpr_read_b32\t%0, %1
459 [$a ,v ;vop3p_mai,8,* ] v_accvgpr_write_b32\t%0, %1
460 [a ,a ;vop1 ,4,cdna2] v_accvgpr_mov_b32\t%0, %1
461 })
462
463 (define_insn "mov<mode>_exec"
464 [(set (match_operand:V_1REG 0 "nonimmediate_operand")
465 (vec_merge:V_1REG
466 (match_operand:V_1REG 1 "general_operand")
467 (match_operand:V_1REG 2 "gcn_alu_or_unspec_operand")
468 (match_operand:DI 3 "register_operand")))
469 (clobber (match_scratch:<VnDI> 4))]
470 "!MEM_P (operands[0]) || REG_P (operands[1])"
471 {@ [cons: =0, 1, 2, 3, =4; attrs: type, length]
472 [v,vA,U0,e ,X ;vop1 ,4 ] v_mov_b32\t%0, %1
473 [v,B ,U0,e ,X ;vop1 ,8 ] v_mov_b32\t%0, %1
474 [v,v ,vA,cV,X ;vop2 ,4 ] v_cndmask_b32\t%0, %2, %1, vcc
475 [v,vA,vA,Sv,X ;vop3a,8 ] v_cndmask_b32\t%0, %2, %1, %3
476 [v,m ,U0,e ,&v;* ,16] #
477 [m,v ,U0,e ,&v;* ,16] #
478 })
479
480 ; This variant does not accept an unspec, but does permit MEM
481 ; read/modify/write which is necessary for maskstore.
482
483 ;(define_insn "*mov<mode>_exec_match"
484 ; [(set (match_operand:V_1REG 0 "nonimmediate_operand" "=v,v, v, m")
485 ; (vec_merge:V_1REG
486 ; (match_operand:V_1REG 1 "general_operand" "vA,B, m, v")
487 ; (match_dup 0)
488 ; (match_operand:DI 2 "gcn_exec_reg_operand" " e,e, e, e")))
489 ; (clobber (match_scratch:<VnDI> 3 "=X,X,&v,&v"))]
490 ; "!MEM_P (operands[0]) || REG_P (operands[1])"
491 ; "@
492 ; v_mov_b32\t%0, %1
493 ; v_mov_b32\t%0, %1
494 ; #
495 ; #"
496 ; [(set_attr "type" "vop1,vop1,*,*")
497 ; (set_attr "length" "4,8,16,16")])
498
499 (define_insn "*mov<mode>"
500 [(set (match_operand:V_2REG 0 "nonimmediate_operand" "=v, v,$a,a")
501 (match_operand:V_2REG 1 "general_operand" "vDB,a, v,a"))]
502 ""
503 "@
504 * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \
505 return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \
506 else \
507 return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\";
508 * if (REGNO (operands[0]) <= REGNO (operands[1])) \
509 return \"v_accvgpr_read_b32\t%L0, %L1\;v_accvgpr_read_b32\t%H0, %H1\"; \
510 else \
511 return \"v_accvgpr_read_b32\t%H0, %H1\;v_accvgpr_read_b32\t%L0, %L1\";
512 * if (REGNO (operands[0]) <= REGNO (operands[1])) \
513 return \"v_accvgpr_write_b32\t%L0, %L1\;v_accvgpr_write_b32\t%H0, %H1\"; \
514 else \
515 return \"v_accvgpr_write_b32\t%H0, %H1\;v_accvgpr_write_b32\t%L0, %L1\";
516 * if (REGNO (operands[0]) <= REGNO (operands[1])) \
517 return \"v_accvgpr_mov_b32\t%L0, %L1\;v_accvgpr_mov_b32\t%H0, %H1\"; \
518 else \
519 return \"v_accvgpr_mov_b32\t%H0, %H1\;v_accvgpr_mov_b32\t%L0, %L1\";"
520 [(set_attr "type" "vmult,vmult,vmult,vmult")
521 (set_attr "length" "16,16,16,8")
522 (set_attr "gcn_version" "*,*,*,cdna2")])
523
524 (define_insn "mov<mode>_exec"
525 [(set (match_operand:V_2REG 0 "nonimmediate_operand" "= v, v, v, v, m")
526 (vec_merge:V_2REG
527 (match_operand:V_2REG 1 "general_operand" "vDB, v0, v0, m, v")
528 (match_operand:V_2REG 2 "gcn_alu_or_unspec_operand"
529 " U0,vDA0,vDA0,U0,U0")
530 (match_operand:DI 3 "register_operand" " e, cV, Sv, e, e")))
531 (clobber (match_scratch:<VnDI> 4 "= X, X, X,&v,&v"))]
532 "!MEM_P (operands[0]) || REG_P (operands[1])"
533 {
534 if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1]))
535 switch (which_alternative)
536 {
537 case 0:
538 return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1";
539 case 1:
540 return "v_cndmask_b32\t%L0, %L2, %L1, vcc\;"
541 "v_cndmask_b32\t%H0, %H2, %H1, vcc";
542 case 2:
543 return "v_cndmask_b32\t%L0, %L2, %L1, %3\;"
544 "v_cndmask_b32\t%H0, %H2, %H1, %3";
545 }
546 else
547 switch (which_alternative)
548 {
549 case 0:
550 return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1";
551 case 1:
552 return "v_cndmask_b32\t%H0, %H2, %H1, vcc\;"
553 "v_cndmask_b32\t%L0, %L2, %L1, vcc";
554 case 2:
555 return "v_cndmask_b32\t%H0, %H2, %H1, %3\;"
556 "v_cndmask_b32\t%L0, %L2, %L1, %3";
557 }
558
559 return "#";
560 }
561 [(set_attr "type" "vmult,vmult,vmult,*,*")
562 (set_attr "length" "16,16,16,16,16")])
563
564 (define_insn "*mov<mode>_4reg"
565 [(set (match_operand:V_4REG 0 "nonimmediate_operand")
566 (match_operand:V_4REG 1 "general_operand"))]
567 ""
568 {@ [cons: =0, 1; attrs: type, length, gcn_version]
569 [v ,vDB;vmult,16,* ] v_mov_b32\t%L0, %L1\; v_mov_b32\t%H0, %H1\; v_mov_b32\t%J0, %J1\; v_mov_b32\t%K0, %K1
570 [v ,a ;vmult,32,* ] v_accvgpr_read_b32\t%L0, %L1\; v_accvgpr_read_b32\t%H0, %H1\; v_accvgpr_read_b32\t%J0, %J1\; v_accvgpr_read_b32\t%K0, %K1
571 [$a,v ;vmult,32,* ] v_accvgpr_write_b32\t%L0, %L1\;v_accvgpr_write_b32\t%H0, %H1\;v_accvgpr_write_b32\t%J0, %J1\;v_accvgpr_write_b32\t%K0, %K1
572 [a ,a ;vmult,32,cdna2] v_accvgpr_mov_b32\t%L0, %L1\; v_accvgpr_mov_b32\t%H0, %H1\; v_accvgpr_mov_b32\t%J0, %J1\; v_accvgpr_mov_b32\t%K0, %K1
573 })
574
575 (define_insn "mov<mode>_exec"
576 [(set (match_operand:V_4REG 0 "nonimmediate_operand" "= v, v, v, v, m")
577 (vec_merge:V_4REG
578 (match_operand:V_4REG 1 "general_operand" "vDB, v0, v0, m, v")
579 (match_operand:V_4REG 2 "gcn_alu_or_unspec_operand"
580 " U0,vDA0,vDA0,U0,U0")
581 (match_operand:DI 3 "register_operand" " e, cV, Sv, e, e")))
582 (clobber (match_scratch:<VnDI> 4 "= X, X, X,&v,&v"))]
583 "!MEM_P (operands[0]) || REG_P (operands[1])"
584 {
585 if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1]))
586 switch (which_alternative)
587 {
588 case 0:
589 return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\;"
590 "v_mov_b32\t%J0, %J1\;v_mov_b32\t%K0, %K1";
591 case 1:
592 return "v_cndmask_b32\t%L0, %L2, %L1, vcc\;"
593 "v_cndmask_b32\t%H0, %H2, %H1, vcc\;"
594 "v_cndmask_b32\t%J0, %J2, %J1, vcc\;"
595 "v_cndmask_b32\t%K0, %K2, %K1, vcc";
596 case 2:
597 return "v_cndmask_b32\t%L0, %L2, %L1, %3\;"
598 "v_cndmask_b32\t%H0, %H2, %H1, %3\;"
599 "v_cndmask_b32\t%J0, %J2, %J1, %3\;"
600 "v_cndmask_b32\t%K0, %K2, %K1, %3";
601 }
602 else
603 switch (which_alternative)
604 {
605 case 0:
606 return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\;"
607 "v_mov_b32\t%J0, %J1\;v_mov_b32\t%K0, %K1";
608 case 1:
609 return "v_cndmask_b32\t%H0, %H2, %H1, vcc\;"
610 "v_cndmask_b32\t%L0, %L2, %L1, vcc\;"
611 "v_cndmask_b32\t%J0, %J2, %J1, vcc\;"
612 "v_cndmask_b32\t%K0, %K2, %K1, vcc";
613 case 2:
614 return "v_cndmask_b32\t%H0, %H2, %H1, %3\;"
615 "v_cndmask_b32\t%L0, %L2, %L1, %3\;"
616 "v_cndmask_b32\t%J0, %J2, %J1, %3\;"
617 "v_cndmask_b32\t%K0, %K2, %K1, %3";
618 }
619
620 return "#";
621 }
622 [(set_attr "type" "vmult,vmult,vmult,*,*")
623 (set_attr "length" "32")])
624
625 ; This variant does not accept an unspec, but does permit MEM
626 ; read/modify/write which is necessary for maskstore.
627
628 ;(define_insn "*mov<mode>_exec_match"
629 ; [(set (match_operand:V_2REG 0 "nonimmediate_operand" "=v, v, m")
630 ; (vec_merge:V_2REG
631 ; (match_operand:V_2REG 1 "general_operand" "vDB, m, v")
632 ; (match_dup 0)
633 ; (match_operand:DI 2 "gcn_exec_reg_operand" " e, e, e")))
634 ; (clobber (match_scratch:<VnDI> 3 "=X,&v,&v"))]
635 ; "!MEM_P (operands[0]) || REG_P (operands[1])"
636 ; "@
637 ; * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \
638 ; return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \
639 ; else \
640 ; return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\";
641 ; #
642 ; #"
643 ; [(set_attr "type" "vmult,*,*")
644 ; (set_attr "length" "16,16,16")])
645
646 ; A SGPR-base load looks like:
647 ; <load> v, Sv
648 ;
649 ; There's no hardware instruction that corresponds to this, but vector base
650 ; addresses are placed in an SGPR because it is easier to add to a vector.
651 ; We also have a temporary vT, and the vector v1 holding numbered lanes.
652 ;
653 ; Rewrite as:
654 ; vT = v1 << log2(element-size)
655 ; vT += Sv
656 ; flat_load v, vT
657
658 (define_insn "@mov<mode>_sgprbase"
659 [(set (match_operand:V_1REG 0 "nonimmediate_operand")
660 (unspec:V_1REG
661 [(match_operand:V_1REG 1 "general_operand")]
662 UNSPEC_SGPRBASE))
663 (clobber (match_operand:<VnDI> 2 "register_operand"))]
664 "lra_in_progress || reload_completed"
665 {@ [cons: =0, 1, =2; attrs: type, length, gcn_version]
666 [v,vA,&v;vop1,4 ,* ] v_mov_b32\t%0, %1
667 [v,vB,&v;vop1,8 ,* ] ^
668 [v,m ,&v;* ,12,* ] #
669 [m,v ,&v;* ,12,* ] #
670 [a,m ,&v;* ,12,cdna2] #
671 [m,a ,&v;* ,12,cdna2] #
672 })
673
674 (define_insn "@mov<mode>_sgprbase"
675 [(set (match_operand:V_2REG 0 "nonimmediate_operand" "= v, v, m, a, m")
676 (unspec:V_2REG
677 [(match_operand:V_2REG 1 "general_operand" "vDB, m, v, m, a")]
678 UNSPEC_SGPRBASE))
679 (clobber (match_operand:<VnDI> 2 "register_operand" "=&v,&v,&v,&v,&v"))]
680 "lra_in_progress || reload_completed"
681 "@
682 * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \
683 return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \
684 else \
685 return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\";
686 #
687 #
688 #
689 #"
690 [(set_attr "type" "vmult,*,*,*,*")
691 (set_attr "length" "8,12,12,12,12")
692 (set_attr "gcn_version" "*,*,*,cdna2,cdna2")])
693
694 (define_insn "@mov<mode>_sgprbase"
695 [(set (match_operand:V_4REG 0 "nonimmediate_operand")
696 (unspec:V_4REG
697 [(match_operand:V_4REG 1 "general_operand")]
698 UNSPEC_SGPRBASE))
699 (clobber (match_operand:<VnDI> 2 "register_operand"))]
700 "lra_in_progress || reload_completed"
701 {@ [cons: =0, 1, =2; attrs: type, length]
702 [v,vDB,&v;vmult,8 ] v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\;v_mov_b32\t%J0, %J1\;v_mov_b32\t%K0, %K1
703 [v,m ,&v;* ,12] #
704 [m,v ,&v;* ,12] #
705 })
706
707 ; Expand scalar addresses into gather/scatter patterns
708
709 (define_split
710 [(set (match_operand:V_MOV 0 "memory_operand")
711 (unspec:V_MOV
712 [(match_operand:V_MOV 1 "general_operand")]
713 UNSPEC_SGPRBASE))
714 (clobber (match_scratch:<VnDI> 2))]
715 ""
716 [(set (mem:BLK (scratch))
717 (unspec:BLK [(match_dup 5) (match_dup 1) (match_dup 6) (match_dup 7)]
718 UNSPEC_SCATTER))]
719 {
720 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
721 operands[0],
722 operands[2]);
723 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
724 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
725 })
726
727 (define_split
728 [(set (match_operand:V_MOV 0 "memory_operand")
729 (vec_merge:V_MOV
730 (match_operand:V_MOV 1 "general_operand")
731 (match_operand:V_MOV 2 "")
732 (match_operand:DI 3 "gcn_exec_reg_operand")))
733 (clobber (match_scratch:<VnDI> 4))]
734 ""
735 [(set (mem:BLK (scratch))
736 (unspec:BLK [(match_dup 5) (match_dup 1)
737 (match_dup 6) (match_dup 7) (match_dup 3)]
738 UNSPEC_SCATTER))]
739 {
740 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode,
741 operands[3],
742 operands[0],
743 operands[4]);
744 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
745 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
746 })
747
748 (define_split
749 [(set (match_operand:V_MOV 0 "nonimmediate_operand")
750 (unspec:V_MOV
751 [(match_operand:V_MOV 1 "memory_operand")]
752 UNSPEC_SGPRBASE))
753 (clobber (match_scratch:<VnDI> 2))]
754 ""
755 [(set (match_dup 0)
756 (unspec:V_MOV [(match_dup 5) (match_dup 6) (match_dup 7)
757 (mem:BLK (scratch))]
758 UNSPEC_GATHER))]
759 {
760 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
761 operands[1],
762 operands[2]);
763 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
764 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
765 })
766
767 (define_split
768 [(set (match_operand:V_MOV 0 "nonimmediate_operand")
769 (vec_merge:V_MOV
770 (match_operand:V_MOV 1 "memory_operand")
771 (match_operand:V_MOV 2 "")
772 (match_operand:DI 3 "gcn_exec_reg_operand")))
773 (clobber (match_scratch:<VnDI> 4))]
774 ""
775 [(set (match_dup 0)
776 (vec_merge:V_MOV
777 (unspec:V_MOV [(match_dup 5) (match_dup 6) (match_dup 7)
778 (mem:BLK (scratch))]
779 UNSPEC_GATHER)
780 (match_dup 2)
781 (match_dup 3)))]
782 {
783 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode,
784 operands[3],
785 operands[1],
786 operands[4]);
787 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
788 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
789 })
790
791 ; TODO: Add zero/sign extending variants.
792
793 ;; }}}
794 ;; {{{ Lane moves
795
796 ; v_writelane and v_readlane work regardless of exec flags.
797 ; We allow source to be scratch.
798 ;
799 ; FIXME these should take A immediates
800
801 (define_insn "*vec_set<mode>"
802 [(set (match_operand:V_1REG 0 "register_operand" "= v")
803 (vec_merge:V_1REG
804 (vec_duplicate:V_1REG
805 (match_operand:<SCALAR_MODE> 1 "register_operand" " Sv"))
806 (match_operand:V_1REG 3 "gcn_register_or_unspec_operand" " U0")
807 (ashift (const_int 1)
808 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
809 ""
810 "v_writelane_b32 %0, %1, %2"
811 [(set_attr "type" "vop3a")
812 (set_attr "length" "8")
813 (set_attr "exec" "none")
814 (set_attr "laneselect" "yes")])
815
816 ; FIXME: 64bit operations really should be splitters, but I am not sure how
817 ; to represent vertical subregs.
818 (define_insn "*vec_set<mode>"
819 [(set (match_operand:V_2REG 0 "register_operand" "= v")
820 (vec_merge:V_2REG
821 (vec_duplicate:V_2REG
822 (match_operand:<SCALAR_MODE> 1 "register_operand" " Sv"))
823 (match_operand:V_2REG 3 "gcn_register_or_unspec_operand" " U0")
824 (ashift (const_int 1)
825 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
826 ""
827 "v_writelane_b32 %L0, %L1, %2\;v_writelane_b32 %H0, %H1, %2"
828 [(set_attr "type" "vmult")
829 (set_attr "length" "16")
830 (set_attr "exec" "none")
831 (set_attr "laneselect" "yes")])
832
833 (define_expand "vec_set<mode>"
834 [(set (match_operand:V_MOV 0 "register_operand")
835 (vec_merge:V_MOV
836 (vec_duplicate:V_MOV
837 (match_operand:<SCALAR_MODE> 1 "register_operand"))
838 (match_dup 0)
839 (ashift (const_int 1) (match_operand:SI 2 "gcn_alu_operand"))))]
840 "")
841
842 (define_insn "*vec_set<mode>_1"
843 [(set (match_operand:V_1REG 0 "register_operand" "=v")
844 (vec_merge:V_1REG
845 (vec_duplicate:V_1REG
846 (match_operand:<SCALAR_MODE> 1 "register_operand" "Sv"))
847 (match_operand:V_1REG 3 "gcn_register_or_unspec_operand" "U0")
848 (match_operand:SI 2 "const_int_operand" " i")))]
849 "((unsigned) exact_log2 (INTVAL (operands[2])) < GET_MODE_NUNITS (<MODE>mode))"
850 {
851 operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));
852 return "v_writelane_b32 %0, %1, %2";
853 }
854 [(set_attr "type" "vop3a")
855 (set_attr "length" "8")
856 (set_attr "exec" "none")
857 (set_attr "laneselect" "yes")])
858
859 (define_insn "*vec_set<mode>_1"
860 [(set (match_operand:V_2REG 0 "register_operand" "=v")
861 (vec_merge:V_2REG
862 (vec_duplicate:V_2REG
863 (match_operand:<SCALAR_MODE> 1 "register_operand" "Sv"))
864 (match_operand:V_2REG 3 "gcn_register_or_unspec_operand" "U0")
865 (match_operand:SI 2 "const_int_operand" " i")))]
866 "((unsigned) exact_log2 (INTVAL (operands[2])) < GET_MODE_NUNITS (<MODE>mode))"
867 {
868 operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));
869 return "v_writelane_b32 %L0, %L1, %2\;v_writelane_b32 %H0, %H1, %2";
870 }
871 [(set_attr "type" "vmult")
872 (set_attr "length" "16")
873 (set_attr "exec" "none")
874 (set_attr "laneselect" "yes")])
875
876 (define_insn "vec_duplicate<mode><exec>"
877 [(set (match_operand:V_1REG 0 "register_operand" "=v")
878 (vec_duplicate:V_1REG
879 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand" "SvB")))]
880 ""
881 "v_mov_b32\t%0, %1"
882 [(set_attr "type" "vop3a")
883 (set_attr "length" "8")])
884
885 (define_insn "vec_duplicate<mode><exec>"
886 [(set (match_operand:V_2REG 0 "register_operand" "= v")
887 (vec_duplicate:V_2REG
888 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand" "SvDB")))]
889 ""
890 "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1"
891 [(set_attr "type" "vop3a")
892 (set_attr "length" "16")])
893
894 (define_insn "vec_duplicate<mode><exec>"
895 [(set (match_operand:V_4REG 0 "register_operand" "= v")
896 (vec_duplicate:V_4REG
897 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand" "SvDB")))]
898 ""
899 "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\;v_mov_b32\t%J0, %J1\;v_mov_b32\t%K0, %K1"
900 [(set_attr "type" "mult")
901 (set_attr "length" "32")])
902
903 (define_insn "vec_extract<mode><scalar_mode>"
904 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg")
905 (vec_select:<SCALAR_MODE>
906 (match_operand:V_1REG 1 "register_operand" " v")
907 (parallel [(match_operand:SI 2 "gcn_alu_operand" "SvB")])))]
908 ""
909 "v_readlane_b32 %0, %1, %2"
910 [(set_attr "type" "vop3a")
911 (set_attr "length" "8")
912 (set_attr "exec" "none")
913 (set_attr "laneselect" "yes")])
914
915 (define_insn "vec_extract<mode><scalar_mode>"
916 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=&Sg")
917 (vec_select:<SCALAR_MODE>
918 (match_operand:V_2REG 1 "register_operand" " v")
919 (parallel [(match_operand:SI 2 "gcn_alu_operand" " SvB")])))]
920 ""
921 "v_readlane_b32 %L0, %L1, %2\;v_readlane_b32 %H0, %H1, %2"
922 [(set_attr "type" "vmult")
923 (set_attr "length" "16")
924 (set_attr "exec" "none")
925 (set_attr "laneselect" "yes")])
926
927 (define_insn "vec_extract<mode><scalar_mode>"
928 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=&Sg")
929 (vec_select:<SCALAR_MODE>
930 (match_operand:V_4REG 1 "register_operand" " v")
931 (parallel [(match_operand:SI 2 "gcn_alu_operand" " SvB")])))]
932 ""
933 "v_readlane_b32 %L0, %L1, %2\;v_readlane_b32 %H0, %H1, %2\;v_readlane_b32 %J0, %J1, %2\;v_readlane_b32 %K0, %K1, %2"
934 [(set_attr "type" "vmult")
935 (set_attr "length" "32")
936 (set_attr "exec" "none")
937 (set_attr "laneselect" "yes")])
938
939 (define_insn "vec_extract<V_1REG:mode><V_1REG_ALT:mode>_nop"
940 [(set (match_operand:V_1REG_ALT 0 "register_operand" "=v,v")
941 (vec_select:V_1REG_ALT
942 (match_operand:V_1REG 1 "register_operand" " 0,v")
943 (match_operand 2 "ascending_zero_int_parallel" "")))]
944 "MODE_VF (<V_1REG_ALT:MODE>mode) < MODE_VF (<V_1REG:MODE>mode)
945 && <V_1REG_ALT:SCALAR_MODE>mode == <V_1REG:SCALAR_MODE>mode
946 /* This comment silences a warning for operands[2]. */"
947 "@
948 ; in-place extract %0
949 v_mov_b32\t%L0, %L1"
950 [(set_attr "type" "vmult")
951 (set_attr "length" "0,8")])
952
953 (define_insn "vec_extract<V_2REG:mode><V_2REG_ALT:mode>_nop"
954 [(set (match_operand:V_2REG_ALT 0 "register_operand" "=v,v")
955 (vec_select:V_2REG_ALT
956 (match_operand:V_2REG 1 "register_operand" " 0,v")
957 (match_operand 2 "ascending_zero_int_parallel" "")))]
958 "MODE_VF (<V_2REG_ALT:MODE>mode) < MODE_VF (<V_2REG:MODE>mode)
959 && <V_2REG_ALT:SCALAR_MODE>mode == <V_2REG:SCALAR_MODE>mode
960 /* This comment silences a warning for operands[2]. */"
961 "@
962 ; in-place extract %0
963 v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1"
964 [(set_attr "type" "vmult")
965 (set_attr "length" "0,8")])
966
967 (define_insn "vec_extract<V_4REG:mode><V_4REG_ALT:mode>_nop"
968 [(set (match_operand:V_4REG_ALT 0 "register_operand" "=v,v")
969 (vec_select:V_4REG_ALT
970 (match_operand:V_4REG 1 "register_operand" " 0,v")
971 (match_operand 2 "ascending_zero_int_parallel" "")))]
972 "MODE_VF (<V_4REG_ALT:MODE>mode) < MODE_VF (<V_4REG:MODE>mode)
973 && <V_4REG_ALT:SCALAR_MODE>mode == <V_4REG:SCALAR_MODE>mode"
974 "@
975 ; in-place extract %0
976 v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\;v_mov_b32\t%J0, %J1\;v_mov_b32\t%K0, %K1"
977 [(set_attr "type" "vmult")
978 (set_attr "length" "0,16")])
979
980 (define_expand "vec_extract<V_MOV:mode><V_MOV_ALT:mode>"
981 [(match_operand:V_MOV_ALT 0 "register_operand")
982 (match_operand:V_MOV 1 "register_operand")
983 (match_operand 2 "immediate_operand")]
984 "MODE_VF (<V_MOV_ALT:MODE>mode) < MODE_VF (<V_MOV:MODE>mode)
985 && <V_MOV_ALT:SCALAR_MODE>mode == <V_MOV:SCALAR_MODE>mode"
986 {
987 int numlanes = GET_MODE_NUNITS (<V_MOV_ALT:MODE>mode);
988 int firstlane = INTVAL (operands[2]) * numlanes;
989 rtx tmp;
990
991 if (firstlane == 0)
992 {
993 rtx parallel = gen_rtx_PARALLEL (<V_MOV:MODE>mode,
994 rtvec_alloc (numlanes));
995 for (int i = 0; i < numlanes; i++)
996 XVECEXP (parallel, 0, i) = GEN_INT (i);
997 emit_insn (gen_vec_extract<V_MOV:mode><V_MOV_ALT:mode>_nop
998 (operands[0], operands[1], parallel));
999 } else {
1000 /* FIXME: optimize this by using DPP where available. */
1001
1002 rtx permutation = gen_reg_rtx (<V_MOV:VnSI>mode);
1003 emit_insn (gen_vec_series<V_MOV:vnsi> (permutation,
1004 GEN_INT (firstlane*4),
1005 GEN_INT (4)));
1006
1007 tmp = gen_reg_rtx (<V_MOV:MODE>mode);
1008 emit_insn (gen_ds_bpermute<V_MOV:mode> (tmp, permutation, operands[1],
1009 get_exec (<V_MOV:MODE>mode)));
1010
1011 emit_move_insn (operands[0],
1012 gen_rtx_SUBREG (<V_MOV_ALT:MODE>mode, tmp, 0));
1013 }
1014 DONE;
1015 })
1016
1017 (define_expand "extract_last_<mode>"
1018 [(match_operand:<SCALAR_MODE> 0 "register_operand")
1019 (match_operand:DI 1 "gcn_alu_operand")
1020 (match_operand:V_MOV 2 "register_operand")]
1021 "can_create_pseudo_p ()"
1022 {
1023 rtx dst = operands[0];
1024 rtx mask = operands[1];
1025 rtx vect = operands[2];
1026 rtx tmpreg = gen_reg_rtx (SImode);
1027
1028 emit_insn (gen_clzdi2 (tmpreg, mask));
1029 emit_insn (gen_subsi3 (tmpreg, GEN_INT (63), tmpreg));
1030 emit_insn (gen_vec_extract<mode><scalar_mode> (dst, vect, tmpreg));
1031 DONE;
1032 })
1033
1034 (define_expand "fold_extract_last_<mode>"
1035 [(match_operand:<SCALAR_MODE> 0 "register_operand")
1036 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand")
1037 (match_operand:DI 2 "gcn_alu_operand")
1038 (match_operand:V_MOV 3 "register_operand")]
1039 "can_create_pseudo_p ()"
1040 {
1041 rtx dst = operands[0];
1042 rtx default_value = operands[1];
1043 rtx mask = operands[2];
1044 rtx vect = operands[3];
1045 rtx else_label = gen_label_rtx ();
1046 rtx end_label = gen_label_rtx ();
1047
1048 rtx cond = gen_rtx_EQ (VOIDmode, mask, const0_rtx);
1049 emit_jump_insn (gen_cbranchdi4 (cond, mask, const0_rtx, else_label));
1050 emit_insn (gen_extract_last_<mode> (dst, mask, vect));
1051 emit_jump_insn (gen_jump (end_label));
1052 emit_barrier ();
1053 emit_label (else_label);
1054 emit_move_insn (dst, default_value);
1055 emit_label (end_label);
1056 DONE;
1057 })
1058
1059 (define_expand "vec_init<mode><scalar_mode>"
1060 [(match_operand:V_MOV 0 "register_operand")
1061 (match_operand 1)]
1062 ""
1063 {
1064 gcn_expand_vector_init (operands[0], operands[1]);
1065 DONE;
1066 })
1067
1068 (define_expand "vec_init<V_MOV:mode><V_MOV_ALT:mode>"
1069 [(match_operand:V_MOV 0 "register_operand")
1070 (match_operand:V_MOV_ALT 1)]
1071 "<V_MOV:SCALAR_MODE>mode == <V_MOV_ALT:SCALAR_MODE>mode
1072 && MODE_VF (<V_MOV_ALT:MODE>mode) < MODE_VF (<V_MOV:MODE>mode)"
1073 {
1074 gcn_expand_vector_init (operands[0], operands[1]);
1075 DONE;
1076 })
1077
1078 ;; }}}
1079 ;; {{{ Scatter / Gather
1080
1081 ;; GCN does not have an instruction for loading a vector from contiguous
1082 ;; memory so *all* loads and stores are eventually converted to scatter
1083 ;; or gather.
1084 ;;
1085 ;; GCC does not permit MEM to hold vectors of addresses, so we must use an
1086 ;; unspec. The unspec formats are as follows:
1087 ;;
1088 ;; (unspec:V??
1089 ;; [(<address expression>)
1090 ;; (<addr_space_t>)
1091 ;; (<use_glc>)
1092 ;; (mem:BLK (scratch))]
1093 ;; UNSPEC_GATHER)
1094 ;;
1095 ;; (unspec:BLK
1096 ;; [(<address expression>)
1097 ;; (<source register>)
1098 ;; (<addr_space_t>)
1099 ;; (<use_glc>)
1100 ;; (<exec>)]
1101 ;; UNSPEC_SCATTER)
1102 ;;
1103 ;; - Loads are expected to be wrapped in a vec_merge, so do not need <exec>.
1104 ;; - The mem:BLK does not contain any real information, but indicates that an
1105 ;; unknown memory read is taking place. Stores are expected to use a similar
1106 ;; mem:BLK outside the unspec.
1107 ;; - The address space and glc (volatile) fields are there to replace the
1108 ;; fields normally found in a MEM.
1109 ;; - Multiple forms of address expression are supported, below.
1110 ;;
1111 ;; TODO: implement combined gather and zero_extend, but only for -msram-ecc=on
1112
1113 (define_expand "gather_load<mode><vnsi>"
1114 [(match_operand:V_MOV 0 "register_operand")
1115 (match_operand:DI 1 "register_operand")
1116 (match_operand:<VnSI> 2 "register_operand")
1117 (match_operand 3 "immediate_operand")
1118 (match_operand:SI 4 "gcn_alu_operand")]
1119 ""
1120 {
1121 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1],
1122 operands[2], operands[4],
1123 INTVAL (operands[3]), NULL);
1124
1125 if (GET_MODE (addr) == <VnDI>mode)
1126 emit_insn (gen_gather<mode>_insn_1offset (operands[0], addr, const0_rtx,
1127 const0_rtx, const0_rtx));
1128 else
1129 emit_insn (gen_gather<mode>_insn_2offsets (operands[0], operands[1],
1130 addr, const0_rtx, const0_rtx,
1131 const0_rtx));
1132 DONE;
1133 })
1134
1135 ; Allow any address expression
1136 (define_expand "gather<mode>_expr<exec>"
1137 [(set (match_operand:V_MOV 0 "register_operand")
1138 (unspec:V_MOV
1139 [(match_operand 1 "")
1140 (match_operand 2 "immediate_operand")
1141 (match_operand 3 "immediate_operand")
1142 (mem:BLK (scratch))]
1143 UNSPEC_GATHER))]
1144 ""
1145 {})
1146
1147 (define_insn "gather<mode>_insn_1offset<exec>"
1148 [(set (match_operand:V_MOV 0 "register_operand" "=v,a,&v,&a")
1149 (unspec:V_MOV
1150 [(plus:<VnDI> (match_operand:<VnDI> 1 "register_operand" " v,v, v, v")
1151 (vec_duplicate:<VnDI>
1152 (match_operand 2 "immediate_operand" " n,n, n, n")))
1153 (match_operand 3 "immediate_operand" " n,n, n, n")
1154 (match_operand 4 "immediate_operand" " n,n, n, n")
1155 (mem:BLK (scratch))]
1156 UNSPEC_GATHER))]
1157 "(AS_FLAT_P (INTVAL (operands[3]))
1158 && ((TARGET_GCN3 && INTVAL(operands[2]) == 0)
1159 || ((unsigned HOST_WIDE_INT)INTVAL(operands[2]) < 0x1000)))
1160 || (AS_GLOBAL_P (INTVAL (operands[3]))
1161 && (((unsigned HOST_WIDE_INT)INTVAL(operands[2]) + 0x1000) < 0x2000))"
1162 {
1163 addr_space_t as = INTVAL (operands[3]);
1164 const char *glc = INTVAL (operands[4]) ? " glc" : "";
1165
1166 static char buf[200];
1167 if (AS_FLAT_P (as))
1168 {
1169 if (TARGET_GCN5_PLUS)
1170 sprintf (buf, "flat_load%%o0\t%%0, %%1 offset:%%2%s\;s_waitcnt\t0",
1171 glc);
1172 else
1173 sprintf (buf, "flat_load%%o0\t%%0, %%1%s\;s_waitcnt\t0", glc);
1174 }
1175 else if (AS_GLOBAL_P (as))
1176 sprintf (buf, "global_load%%o0\t%%0, %%1, off offset:%%2%s\;"
1177 "s_waitcnt\tvmcnt(0)", glc);
1178 else
1179 gcc_unreachable ();
1180
1181 return buf;
1182 }
1183 [(set_attr "type" "flat")
1184 (set_attr "length" "12")
1185 (set_attr "gcn_version" "*,cdna2,*,cdna2")
1186 (set_attr "xnack" "off,off,on,on")])
1187
1188 (define_insn "gather<mode>_insn_1offset_ds<exec>"
1189 [(set (match_operand:V_MOV 0 "register_operand" "=v,a")
1190 (unspec:V_MOV
1191 [(plus:<VnSI> (match_operand:<VnSI> 1 "register_operand" " v,v")
1192 (vec_duplicate:<VnSI>
1193 (match_operand 2 "immediate_operand" " n,n")))
1194 (match_operand 3 "immediate_operand" " n,n")
1195 (match_operand 4 "immediate_operand" " n,n")
1196 (mem:BLK (scratch))]
1197 UNSPEC_GATHER))]
1198 "(AS_ANY_DS_P (INTVAL (operands[3]))
1199 && ((unsigned HOST_WIDE_INT)INTVAL(operands[2]) < 0x10000))"
1200 {
1201 addr_space_t as = INTVAL (operands[3]);
1202 static char buf[200];
1203 sprintf (buf, "ds_read%%b0\t%%0, %%1 offset:%%2%s\;s_waitcnt\tlgkmcnt(0)",
1204 (AS_GDS_P (as) ? " gds" : ""));
1205 return buf;
1206 }
1207 [(set_attr "type" "ds")
1208 (set_attr "length" "12")
1209 (set_attr "gcn_version" "*,cdna2")])
1210
1211 (define_insn "gather<mode>_insn_2offsets<exec>"
1212 [(set (match_operand:V_MOV 0 "register_operand" "=v,a,&v,&a")
1213 (unspec:V_MOV
1214 [(plus:<VnDI>
1215 (plus:<VnDI>
1216 (vec_duplicate:<VnDI>
1217 (match_operand:DI 1 "register_operand" "Sv,Sv,Sv,Sv"))
1218 (sign_extend:<VnDI>
1219 (match_operand:<VnSI> 2 "register_operand" " v, v, v, v")))
1220 (vec_duplicate:<VnDI> (match_operand 3 "immediate_operand"
1221 " n, n, n, n")))
1222 (match_operand 4 "immediate_operand" " n, n, n, n")
1223 (match_operand 5 "immediate_operand" " n, n, n, n")
1224 (mem:BLK (scratch))]
1225 UNSPEC_GATHER))]
1226 "(AS_GLOBAL_P (INTVAL (operands[4]))
1227 && (((unsigned HOST_WIDE_INT)INTVAL(operands[3]) + 0x1000) < 0x2000))"
1228 {
1229 addr_space_t as = INTVAL (operands[4]);
1230 const char *glc = INTVAL (operands[5]) ? " glc" : "";
1231
1232 static char buf[200];
1233 if (AS_GLOBAL_P (as))
1234 sprintf (buf, "global_load%%o0\t%%0, %%2, %%1 offset:%%3%s\;"
1235 "s_waitcnt\tvmcnt(0)", glc);
1236 else
1237 gcc_unreachable ();
1238
1239 return buf;
1240 }
1241 [(set_attr "type" "flat")
1242 (set_attr "length" "12")
1243 (set_attr "gcn_version" "*,cdna2,*,cdna2")
1244 (set_attr "xnack" "off,off,on,on")])
1245
1246 (define_expand "scatter_store<mode><vnsi>"
1247 [(match_operand:DI 0 "register_operand")
1248 (match_operand:<VnSI> 1 "register_operand")
1249 (match_operand 2 "immediate_operand")
1250 (match_operand:SI 3 "gcn_alu_operand")
1251 (match_operand:V_MOV 4 "register_operand")]
1252 ""
1253 {
1254 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0],
1255 operands[1], operands[3],
1256 INTVAL (operands[2]), NULL);
1257
1258 if (GET_MODE (addr) == <VnDI>mode)
1259 emit_insn (gen_scatter<mode>_insn_1offset (addr, const0_rtx, operands[4],
1260 const0_rtx, const0_rtx));
1261 else
1262 emit_insn (gen_scatter<mode>_insn_2offsets (operands[0], addr,
1263 const0_rtx, operands[4],
1264 const0_rtx, const0_rtx));
1265 DONE;
1266 })
1267
1268 ; Allow any address expression
1269 (define_expand "scatter<mode>_expr<exec_scatter>"
1270 [(set (mem:BLK (scratch))
1271 (unspec:BLK
1272 [(match_operand:<VnDI> 0 "")
1273 (match_operand:V_MOV 1 "register_operand")
1274 (match_operand 2 "immediate_operand")
1275 (match_operand 3 "immediate_operand")]
1276 UNSPEC_SCATTER))]
1277 ""
1278 {})
1279
1280 (define_insn "scatter<mode>_insn_1offset<exec_scatter>"
1281 [(set (mem:BLK (scratch))
1282 (unspec:BLK
1283 [(plus:<VnDI> (match_operand:<VnDI> 0 "register_operand" "v,v")
1284 (vec_duplicate:<VnDI>
1285 (match_operand 1 "immediate_operand" "n,n")))
1286 (match_operand:V_MOV 2 "register_operand" "v,a")
1287 (match_operand 3 "immediate_operand" "n,n")
1288 (match_operand 4 "immediate_operand" "n,n")]
1289 UNSPEC_SCATTER))]
1290 "(AS_FLAT_P (INTVAL (operands[3]))
1291 && (INTVAL(operands[1]) == 0
1292 || (TARGET_GCN5_PLUS
1293 && (unsigned HOST_WIDE_INT)INTVAL(operands[1]) < 0x1000)))
1294 || (AS_GLOBAL_P (INTVAL (operands[3]))
1295 && (((unsigned HOST_WIDE_INT)INTVAL(operands[1]) + 0x1000) < 0x2000))"
1296 {
1297 addr_space_t as = INTVAL (operands[3]);
1298 const char *glc = INTVAL (operands[4]) ? " glc" : "";
1299
1300 static char buf[200];
1301 if (AS_FLAT_P (as))
1302 {
1303 if (TARGET_GCN5_PLUS)
1304 sprintf (buf, "flat_store%%s2\t%%0, %%2 offset:%%1%s", glc);
1305 else
1306 sprintf (buf, "flat_store%%s2\t%%0, %%2%s", glc);
1307 }
1308 else if (AS_GLOBAL_P (as))
1309 sprintf (buf, "global_store%%s2\t%%0, %%2, off offset:%%1%s", glc);
1310 else
1311 gcc_unreachable ();
1312
1313 return buf;
1314 }
1315 [(set_attr "type" "flat")
1316 (set_attr "length" "12")
1317 (set_attr "gcn_version" "*,cdna2")])
1318
1319 (define_insn "scatter<mode>_insn_1offset_ds<exec_scatter>"
1320 [(set (mem:BLK (scratch))
1321 (unspec:BLK
1322 [(plus:<VnSI> (match_operand:<VnSI> 0 "register_operand" "v,v")
1323 (vec_duplicate:<VnSI>
1324 (match_operand 1 "immediate_operand" "n,n")))
1325 (match_operand:V_MOV 2 "register_operand" "v,a")
1326 (match_operand 3 "immediate_operand" "n,n")
1327 (match_operand 4 "immediate_operand" "n,n")]
1328 UNSPEC_SCATTER))]
1329 "(AS_ANY_DS_P (INTVAL (operands[3]))
1330 && ((unsigned HOST_WIDE_INT)INTVAL(operands[1]) < 0x10000))"
1331 {
1332 addr_space_t as = INTVAL (operands[3]);
1333 static char buf[200];
1334 sprintf (buf, "ds_write%%b2\t%%0, %%2 offset:%%1%s\;s_waitcnt\tlgkmcnt(0)",
1335 (AS_GDS_P (as) ? " gds" : ""));
1336 return buf;
1337 }
1338 [(set_attr "type" "ds")
1339 (set_attr "length" "12")
1340 (set_attr "gcn_version" "*,cdna2")])
1341
1342 (define_insn "scatter<mode>_insn_2offsets<exec_scatter>"
1343 [(set (mem:BLK (scratch))
1344 (unspec:BLK
1345 [(plus:<VnDI>
1346 (plus:<VnDI>
1347 (vec_duplicate:<VnDI>
1348 (match_operand:DI 0 "register_operand" "Sv,Sv"))
1349 (sign_extend:<VnDI>
1350 (match_operand:<VnSI> 1 "register_operand" "v,v")))
1351 (vec_duplicate:<VnDI> (match_operand 2 "immediate_operand" "n,n")))
1352 (match_operand:V_MOV 3 "register_operand" "v,a")
1353 (match_operand 4 "immediate_operand" "n,n")
1354 (match_operand 5 "immediate_operand" "n,n")]
1355 UNSPEC_SCATTER))]
1356 "(AS_GLOBAL_P (INTVAL (operands[4]))
1357 && (((unsigned HOST_WIDE_INT)INTVAL(operands[2]) + 0x1000) < 0x2000))"
1358 {
1359 addr_space_t as = INTVAL (operands[4]);
1360 const char *glc = INTVAL (operands[5]) ? " glc" : "";
1361
1362 static char buf[200];
1363 if (AS_GLOBAL_P (as))
1364 sprintf (buf, "global_store%%s3\t%%1, %%3, %%0 offset:%%2%s", glc);
1365 else
1366 gcc_unreachable ();
1367
1368 return buf;
1369 }
1370 [(set_attr "type" "flat")
1371 (set_attr "length" "12")
1372 (set_attr "gcn_version" "*,cdna2")])
1373
1374 ;; }}}
1375 ;; {{{ Permutations
1376
1377 (define_insn "ds_bpermute<mode>"
1378 [(set (match_operand:V_1REG 0 "register_operand" "=v")
1379 (unspec:V_1REG
1380 [(match_operand:V_1REG 2 "register_operand" " v")
1381 (match_operand:<VnSI> 1 "register_operand" " v")
1382 (match_operand:DI 3 "gcn_exec_reg_operand" " e")]
1383 UNSPEC_BPERMUTE))]
1384 ""
1385 "ds_bpermute_b32\t%0, %1, %2\;s_waitcnt\tlgkmcnt(0)"
1386 [(set_attr "type" "vop2")
1387 (set_attr "length" "12")])
1388
1389 (define_insn_and_split "ds_bpermute<mode>"
1390 [(set (match_operand:V_2REG 0 "register_operand" "=&v")
1391 (unspec:V_2REG
1392 [(match_operand:V_2REG 2 "register_operand" " v0")
1393 (match_operand:<VnSI> 1 "register_operand" " v")
1394 (match_operand:DI 3 "gcn_exec_reg_operand" " e")]
1395 UNSPEC_BPERMUTE))]
1396 ""
1397 "#"
1398 "reload_completed"
1399 [(set (match_dup 4) (unspec:<VnSI>
1400 [(match_dup 6) (match_dup 1) (match_dup 3)]
1401 UNSPEC_BPERMUTE))
1402 (set (match_dup 5) (unspec:<VnSI>
1403 [(match_dup 7) (match_dup 1) (match_dup 3)]
1404 UNSPEC_BPERMUTE))]
1405 {
1406 operands[4] = gcn_operand_part (<MODE>mode, operands[0], 0);
1407 operands[5] = gcn_operand_part (<MODE>mode, operands[0], 1);
1408 operands[6] = gcn_operand_part (<MODE>mode, operands[2], 0);
1409 operands[7] = gcn_operand_part (<MODE>mode, operands[2], 1);
1410 }
1411 [(set_attr "type" "vmult")
1412 (set_attr "length" "24")])
1413
1414 (define_insn "@dpp_move<mode>"
1415 [(set (match_operand:V_noHI 0 "register_operand" "=v")
1416 (unspec:V_noHI
1417 [(match_operand:V_noHI 1 "register_operand" " v")
1418 (match_operand:SI 2 "const_int_operand" " n")]
1419 UNSPEC_MOV_DPP_SHR))]
1420 "!TARGET_RDNA2"
1421 {
1422 return gcn_expand_dpp_shr_insn (<MODE>mode, "v_mov_b32",
1423 UNSPEC_MOV_DPP_SHR, INTVAL (operands[2]));
1424 }
1425 [(set_attr "type" "vop_dpp")
1426 (set_attr "length" "16")])
1427
1428 (define_insn "@dpp_swap_pairs<mode>"
1429 [(set (match_operand:V_noHI 0 "register_operand" "=v")
1430 (unspec:V_noHI
1431 [(match_operand:V_noHI 1 "register_operand" " v")]
1432 UNSPEC_MOV_DPP_SWAP_PAIRS))]
1433 ""
1434 {
1435 return gcn_expand_dpp_swap_pairs_insn (<MODE>mode, "v_mov_b32",
1436 UNSPEC_MOV_DPP_SWAP_PAIRS);
1437 }
1438 [(set_attr "type" "vop_dpp")
1439 (set_attr "length" "16")])
1440
1441 (define_insn "@dpp_distribute_even<mode>"
1442 [(set (match_operand:V_noHI 0 "register_operand" "=v")
1443 (unspec:V_noHI
1444 [(match_operand:V_noHI 1 "register_operand" " v")]
1445 UNSPEC_MOV_DPP_DISTRIBUTE_EVEN))]
1446 ""
1447 {
1448 return gcn_expand_dpp_distribute_even_insn (<MODE>mode, "v_mov_b32",
1449 UNSPEC_MOV_DPP_DISTRIBUTE_EVEN);
1450 }
1451 [(set_attr "type" "vop_dpp")
1452 (set_attr "length" "16")])
1453
1454 (define_insn "@dpp_distribute_odd<mode>"
1455 [(set (match_operand:V_noHI 0 "register_operand" "=v")
1456 (unspec:V_noHI
1457 [(match_operand:V_noHI 1 "register_operand" " v")]
1458 UNSPEC_MOV_DPP_DISTRIBUTE_EVEN))]
1459 ""
1460 {
1461 return gcn_expand_dpp_distribute_odd_insn (<MODE>mode, "v_mov_b32",
1462 UNSPEC_MOV_DPP_DISTRIBUTE_ODD);
1463 }
1464 [(set_attr "type" "vop_dpp")
1465 (set_attr "length" "16")])
1466
1467 ;; }}}
1468 ;; {{{ ALU special case: add/sub
1469
1470 (define_insn "add<mode>3<exec_clobber>"
1471 [(set (match_operand:V_INT_1REG 0 "register_operand")
1472 (plus:V_INT_1REG
1473 (match_operand:V_INT_1REG 1 "register_operand")
1474 (match_operand:V_INT_1REG 2 "gcn_alu_operand")))
1475 (clobber (reg:DI VCC_REG))]
1476 ""
1477 {@ [cons: =0, %1, 2; attrs: type, length]
1478 [v,v,vSvA;vop2,4] v_add%^_u32\t%0, vcc, %2, %1
1479 [v,v,vSvB;vop2,8] ^
1480 })
1481
1482 (define_insn "add<mode>3_dup<exec_clobber>"
1483 [(set (match_operand:V_INT_1REG 0 "register_operand")
1484 (plus:V_INT_1REG
1485 (vec_duplicate:V_INT_1REG
1486 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand"))
1487 (match_operand:V_INT_1REG 1 "register_operand")))
1488 (clobber (reg:DI VCC_REG))]
1489 ""
1490 {@ [cons: =0, 1, 2; attrs: type, length]
1491 [v,v,SvA;vop2,4] v_add%^_u32\t%0, vcc, %2, %1
1492 [v,v,SvB;vop2,8] ^
1493 })
1494
1495 (define_insn "add<mode>3_vcc<exec_vcc>"
1496 [(set (match_operand:V_SI 0 "register_operand")
1497 (plus:V_SI
1498 (match_operand:V_SI 1 "register_operand")
1499 (match_operand:V_SI 2 "gcn_alu_operand")))
1500 (set (match_operand:DI 3 "register_operand")
1501 (ltu:DI (plus:V_SI (match_dup 1) (match_dup 2))
1502 (match_dup 1)))]
1503 ""
1504 {@ [cons: =0, %1, 2, =3; attrs: type, length]
1505 [v,v,vSvA,cV;vop2 ,4] v_add%^_u32\t%0, %3, %2, %1
1506 [v,v,vSvB,cV;vop2 ,8] ^
1507 [v,v,vSvA,Sg;vop3b,8] ^
1508 })
1509
1510 ; This pattern only changes the VCC bits when the corresponding lane is
1511 ; enabled, so the set must be described as an ior.
1512
1513 (define_insn "add<mode>3_vcc_dup<exec_vcc>"
1514 [(set (match_operand:V_SI 0 "register_operand")
1515 (plus:V_SI
1516 (vec_duplicate:V_SI
1517 (match_operand:SI 1 "gcn_alu_operand"))
1518 (match_operand:V_SI 2 "register_operand")))
1519 (set (match_operand:DI 3 "register_operand")
1520 (ltu:DI (plus:V_SI (vec_duplicate:V_SI (match_dup 2))
1521 (match_dup 1))
1522 (vec_duplicate:V_SI (match_dup 2))))]
1523 ""
1524 {@ [cons: =0, 1, 2, =3; attrs: type, length]
1525 [v,SvA,v,cV;vop2 ,4] v_add%^_u32\t%0, %3, %1, %2
1526 [v,SvB,v,cV;vop2 ,8] ^
1527 [v,SvA,v,Sg;vop3b,8] ^
1528 })
1529
1530 ; v_addc does not accept an SGPR because the VCC read already counts as an
1531 ; SGPR use and the number of SGPR operands is limited to 1. It does not
1532 ; accept "B" immediate constants due to a related bus conflict.
1533
1534 (define_insn "addc<mode>3<exec_vcc>"
1535 [(set (match_operand:V_SI 0 "register_operand" "=v, v")
1536 (plus:V_SI
1537 (plus:V_SI
1538 (vec_merge:V_SI
1539 (vec_duplicate:V_SI (const_int 1))
1540 (vec_duplicate:V_SI (const_int 0))
1541 (match_operand:DI 3 "register_operand" " cV,cVSv"))
1542 (match_operand:V_SI 1 "gcn_alu_operand" "% v, vA"))
1543 (match_operand:V_SI 2 "gcn_alu_operand" " vA, vA")))
1544 (set (match_operand:DI 4 "register_operand" "=cV,cVSg")
1545 (ior:DI (ltu:DI (plus:V_SI
1546 (plus:V_SI
1547 (vec_merge:V_SI
1548 (vec_duplicate:V_SI (const_int 1))
1549 (vec_duplicate:V_SI (const_int 0))
1550 (match_dup 3))
1551 (match_dup 1))
1552 (match_dup 2))
1553 (match_dup 2))
1554 (ltu:DI (plus:V_SI
1555 (vec_merge:V_SI
1556 (vec_duplicate:V_SI (const_int 1))
1557 (vec_duplicate:V_SI (const_int 0))
1558 (match_dup 3))
1559 (match_dup 1))
1560 (match_dup 1))))]
1561 ""
1562 "{v_addc%^_u32|v_add_co_ci_u32}\t%0, %4, %2, %1, %3"
1563 [(set_attr "type" "vop2,vop3b")
1564 (set_attr "length" "4,8")])
1565
1566 (define_insn "sub<mode>3<exec_clobber>"
1567 [(set (match_operand:V_INT_1REG 0 "register_operand" "= v, v")
1568 (minus:V_INT_1REG
1569 (match_operand:V_INT_1REG 1 "gcn_alu_operand" "vSvB, v")
1570 (match_operand:V_INT_1REG 2 "gcn_alu_operand" " v,vSvB")))
1571 (clobber (reg:DI VCC_REG))]
1572 ""
1573 "@
1574 v_sub%^_u32\t%0, vcc, %1, %2
1575 v_subrev%^_u32\t%0, vcc, %2, %1"
1576 [(set_attr "type" "vop2")
1577 (set_attr "length" "8,8")])
1578
1579 (define_insn "sub<mode>3_vcc<exec_vcc>"
1580 [(set (match_operand:V_SI 0 "register_operand" "= v, v, v, v")
1581 (minus:V_SI
1582 (match_operand:V_SI 1 "gcn_alu_operand" "vSvB,vSvB, v, v")
1583 (match_operand:V_SI 2 "gcn_alu_operand" " v, v,vSvB,vSvB")))
1584 (set (match_operand:DI 3 "register_operand" "= cV, Sg, cV, Sg")
1585 (gtu:DI (minus:V_SI (match_dup 1) (match_dup 2))
1586 (match_dup 1)))]
1587 ""
1588 "@
1589 v_sub%^_u32\t%0, %3, %1, %2
1590 v_sub%^_u32\t%0, %3, %1, %2
1591 v_subrev%^_u32\t%0, %3, %2, %1
1592 v_subrev%^_u32\t%0, %3, %2, %1"
1593 [(set_attr "type" "vop2,vop3b,vop2,vop3b")
1594 (set_attr "length" "8")])
1595
1596 ; v_subb does not accept an SGPR because the VCC read already counts as an
1597 ; SGPR use and the number of SGPR operands is limited to 1. It does not
1598 ; accept "B" immediate constants due to a related bus conflict.
1599
1600 (define_insn "subc<mode>3<exec_vcc>"
1601 [(set (match_operand:V_SI 0 "register_operand" "= v, v, v, v")
1602 (minus:V_SI
1603 (minus:V_SI
1604 (vec_merge:V_SI
1605 (vec_duplicate:V_SI (const_int 1))
1606 (vec_duplicate:V_SI (const_int 0))
1607 (match_operand:DI 3 "gcn_alu_operand" " cV,cVSv,cV,cVSv"))
1608 (match_operand:V_SI 1 "gcn_alu_operand" " vA, vA, v, vA"))
1609 (match_operand:V_SI 2 "gcn_alu_operand" " v, vA,vA, vA")))
1610 (set (match_operand:DI 4 "register_operand" "=cV,cVSg,cV,cVSg")
1611 (ior:DI (gtu:DI (minus:V_SI (minus:V_SI
1612 (vec_merge:V_SI
1613 (vec_duplicate:V_SI (const_int 1))
1614 (vec_duplicate:V_SI (const_int 0))
1615 (match_dup 3))
1616 (match_dup 1))
1617 (match_dup 2))
1618 (match_dup 2))
1619 (ltu:DI (minus:V_SI (vec_merge:V_SI
1620 (vec_duplicate:V_SI (const_int 1))
1621 (vec_duplicate:V_SI (const_int 0))
1622 (match_dup 3))
1623 (match_dup 1))
1624 (match_dup 1))))]
1625 ""
1626 "@
1627 {v_subb%^_u32|v_sub_co_ci_u32}\t%0, %4, %1, %2, %3
1628 {v_subb%^_u32|v_sub_co_ci_u32}\t%0, %4, %1, %2, %3
1629 {v_subbrev%^_u32|v_subrev_co_ci_u32}\t%0, %4, %2, %1, %3
1630 {v_subbrev%^_u32|v_subrev_co_ci_u32}\t%0, %4, %2, %1, %3"
1631 [(set_attr "type" "vop2,vop3b,vop2,vop3b")
1632 (set_attr "length" "4,8,4,8")])
1633
1634 (define_insn_and_split "add<mode>3"
1635 [(set (match_operand:V_DI 0 "register_operand" "= v")
1636 (plus:V_DI
1637 (match_operand:V_DI 1 "register_operand" "%vDb")
1638 (match_operand:V_DI 2 "gcn_alu_operand" " vDb")))
1639 (clobber (reg:DI VCC_REG))]
1640 ""
1641 "#"
1642 "gcn_can_split_p (<MODE>mode, operands[0])
1643 && gcn_can_split_p (<MODE>mode, operands[1])
1644 && gcn_can_split_p (<MODE>mode, operands[2])"
1645 [(const_int 0)]
1646 {
1647 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1648 emit_insn (gen_add<vnsi>3_vcc
1649 (gcn_operand_part (<MODE>mode, operands[0], 0),
1650 gcn_operand_part (<MODE>mode, operands[1], 0),
1651 gcn_operand_part (<MODE>mode, operands[2], 0),
1652 vcc));
1653 emit_insn (gen_addc<vnsi>3
1654 (gcn_operand_part (<MODE>mode, operands[0], 1),
1655 gcn_operand_part (<MODE>mode, operands[1], 1),
1656 gcn_operand_part (<MODE>mode, operands[2], 1),
1657 vcc, vcc));
1658 DONE;
1659 }
1660 [(set_attr "type" "vmult")
1661 (set_attr "length" "8")])
1662
1663 (define_insn_and_split "add<mode>3_exec"
1664 [(set (match_operand:V_DI 0 "register_operand" "= v")
1665 (vec_merge:V_DI
1666 (plus:V_DI
1667 (match_operand:V_DI 1 "register_operand" "%vDb")
1668 (match_operand:V_DI 2 "gcn_alu_operand" " vDb"))
1669 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0")
1670 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1671 (clobber (reg:DI VCC_REG))]
1672 ""
1673 "#"
1674 "gcn_can_split_p (<MODE>mode, operands[0])
1675 && gcn_can_split_p (<MODE>mode, operands[1])
1676 && gcn_can_split_p (<MODE>mode, operands[2])
1677 && gcn_can_split_p (<MODE>mode, operands[4])"
1678 [(const_int 0)]
1679 {
1680 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1681 emit_insn (gen_add<vnsi>3_vcc_exec
1682 (gcn_operand_part (<MODE>mode, operands[0], 0),
1683 gcn_operand_part (<MODE>mode, operands[1], 0),
1684 gcn_operand_part (<MODE>mode, operands[2], 0),
1685 vcc,
1686 gcn_operand_part (<MODE>mode, operands[3], 0),
1687 operands[4]));
1688 emit_insn (gen_addc<vnsi>3_exec
1689 (gcn_operand_part (<MODE>mode, operands[0], 1),
1690 gcn_operand_part (<MODE>mode, operands[1], 1),
1691 gcn_operand_part (<MODE>mode, operands[2], 1),
1692 vcc, vcc,
1693 gcn_operand_part (<MODE>mode, operands[3], 1),
1694 operands[4]));
1695 DONE;
1696 }
1697 [(set_attr "type" "vmult")
1698 (set_attr "length" "8")])
1699
1700 (define_insn_and_split "sub<mode>3"
1701 [(set (match_operand:V_DI 0 "register_operand" "= v, v")
1702 (minus:V_DI
1703 (match_operand:V_DI 1 "gcn_alu_operand" "vDb, v")
1704 (match_operand:V_DI 2 "gcn_alu_operand" " v,vDb")))
1705 (clobber (reg:DI VCC_REG))]
1706 ""
1707 "#"
1708 "gcn_can_split_p (<MODE>mode, operands[0])
1709 && gcn_can_split_p (<MODE>mode, operands[1])
1710 && gcn_can_split_p (<MODE>mode, operands[2])"
1711 [(const_int 0)]
1712 {
1713 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1714 emit_insn (gen_sub<vnsi>3_vcc
1715 (gcn_operand_part (<MODE>mode, operands[0], 0),
1716 gcn_operand_part (<MODE>mode, operands[1], 0),
1717 gcn_operand_part (<MODE>mode, operands[2], 0),
1718 vcc));
1719 emit_insn (gen_subc<vnsi>3
1720 (gcn_operand_part (<MODE>mode, operands[0], 1),
1721 gcn_operand_part (<MODE>mode, operands[1], 1),
1722 gcn_operand_part (<MODE>mode, operands[2], 1),
1723 vcc, vcc));
1724 DONE;
1725 }
1726 [(set_attr "type" "vmult")
1727 (set_attr "length" "8")])
1728
1729 (define_insn_and_split "sub<mode>3_exec"
1730 [(set (match_operand:V_DI 0 "register_operand" "= v, v")
1731 (vec_merge:V_DI
1732 (minus:V_DI
1733 (match_operand:V_DI 1 "gcn_alu_operand" "vSvB, v")
1734 (match_operand:V_DI 2 "gcn_alu_operand" " v,vSvB"))
1735 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0, U0")
1736 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))
1737 (clobber (reg:DI VCC_REG))]
1738 "register_operand (operands[1], VOIDmode)
1739 || register_operand (operands[2], VOIDmode)"
1740 "#"
1741 "gcn_can_split_p (<MODE>mode, operands[0])
1742 && gcn_can_split_p (<MODE>mode, operands[1])
1743 && gcn_can_split_p (<MODE>mode, operands[2])
1744 && gcn_can_split_p (<MODE>mode, operands[3])"
1745 [(const_int 0)]
1746 {
1747 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1748 emit_insn (gen_sub<vnsi>3_vcc_exec
1749 (gcn_operand_part (<MODE>mode, operands[0], 0),
1750 gcn_operand_part (<MODE>mode, operands[1], 0),
1751 gcn_operand_part (<MODE>mode, operands[2], 0),
1752 vcc,
1753 gcn_operand_part (<MODE>mode, operands[3], 0),
1754 operands[4]));
1755 emit_insn (gen_subc<vnsi>3_exec
1756 (gcn_operand_part (<MODE>mode, operands[0], 1),
1757 gcn_operand_part (<MODE>mode, operands[1], 1),
1758 gcn_operand_part (<MODE>mode, operands[2], 1),
1759 vcc, vcc,
1760 gcn_operand_part (<MODE>mode, operands[3], 1),
1761 operands[4]));
1762 DONE;
1763 }
1764 [(set_attr "type" "vmult")
1765 (set_attr "length" "8")])
1766
1767 (define_insn_and_split "add<mode>3_zext"
1768 [(set (match_operand:V_DI 0 "register_operand" "= v, v")
1769 (plus:V_DI
1770 (zero_extend:V_DI
1771 (match_operand:<VnSI> 1 "gcn_alu_operand" " vA, vB"))
1772 (match_operand:V_DI 2 "gcn_alu_operand" "vDb,vDA")))
1773 (clobber (reg:DI VCC_REG))]
1774 ""
1775 "#"
1776 "gcn_can_split_p (<MODE>mode, operands[0])
1777 && gcn_can_split_p (<MODE>mode, operands[2])"
1778 [(const_int 0)]
1779 {
1780 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1781 emit_insn (gen_add<vnsi>3_vcc
1782 (gcn_operand_part (<MODE>mode, operands[0], 0),
1783 operands[1],
1784 gcn_operand_part (<MODE>mode, operands[2], 0),
1785 vcc));
1786 emit_insn (gen_addc<vnsi>3
1787 (gcn_operand_part (<MODE>mode, operands[0], 1),
1788 gcn_operand_part (<MODE>mode, operands[2], 1),
1789 const0_rtx, vcc, vcc));
1790 DONE;
1791 }
1792 [(set_attr "type" "vmult")
1793 (set_attr "length" "8")])
1794
1795 (define_insn_and_split "add<mode>3_zext_exec"
1796 [(set (match_operand:V_DI 0 "register_operand" "= v, v")
1797 (vec_merge:V_DI
1798 (plus:V_DI
1799 (zero_extend:V_DI
1800 (match_operand:<VnSI> 1 "gcn_alu_operand" " vA, vB"))
1801 (match_operand:V_DI 2 "gcn_alu_operand" "vDb,vDA"))
1802 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0, U0")
1803 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))
1804 (clobber (reg:DI VCC_REG))]
1805 ""
1806 "#"
1807 "gcn_can_split_p (<MODE>mode, operands[0])
1808 && gcn_can_split_p (<MODE>mode, operands[2])
1809 && gcn_can_split_p (<MODE>mode, operands[3])"
1810 [(const_int 0)]
1811 {
1812 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1813 emit_insn (gen_add<vnsi>3_vcc_exec
1814 (gcn_operand_part (<MODE>mode, operands[0], 0),
1815 operands[1],
1816 gcn_operand_part (<MODE>mode, operands[2], 0),
1817 vcc,
1818 gcn_operand_part (<MODE>mode, operands[3], 0),
1819 operands[4]));
1820 emit_insn (gen_addc<vnsi>3_exec
1821 (gcn_operand_part (<MODE>mode, operands[0], 1),
1822 gcn_operand_part (<MODE>mode, operands[2], 1),
1823 const0_rtx, vcc, vcc,
1824 gcn_operand_part (<MODE>mode, operands[3], 1),
1825 operands[4]));
1826 DONE;
1827 }
1828 [(set_attr "type" "vmult")
1829 (set_attr "length" "8")])
1830
1831 (define_insn_and_split "add<mode>3_vcc_zext_dup"
1832 [(set (match_operand:V_DI 0 "register_operand")
1833 (plus:V_DI
1834 (zero_extend:V_DI
1835 (vec_duplicate:<VnSI>
1836 (match_operand:SI 1 "gcn_alu_operand")))
1837 (match_operand:V_DI 2 "gcn_alu_operand")))
1838 (set (match_operand:DI 3 "register_operand")
1839 (ltu:DI (plus:V_DI
1840 (zero_extend:V_DI (vec_duplicate:<VnSI> (match_dup 1)))
1841 (match_dup 2))
1842 (match_dup 1)))]
1843 ""
1844 {@ [cons: =0, 1, 2, =3]
1845 [v,ASv,v,&Sg] #
1846 [v,BSv,v,&cV] ^
1847 }
1848 "gcn_can_split_p (<MODE>mode, operands[0])
1849 && gcn_can_split_p (<MODE>mode, operands[2])"
1850 [(const_int 0)]
1851 {
1852 emit_insn (gen_add<vnsi>3_vcc_dup
1853 (gcn_operand_part (<MODE>mode, operands[0], 0),
1854 gcn_operand_part (DImode, operands[1], 0),
1855 gcn_operand_part (<MODE>mode, operands[2], 0),
1856 operands[3]));
1857 emit_insn (gen_addc<vnsi>3
1858 (gcn_operand_part (<MODE>mode, operands[0], 1),
1859 gcn_operand_part (<MODE>mode, operands[2], 1),
1860 const0_rtx, operands[3], operands[3]));
1861 DONE;
1862 }
1863 [(set_attr "type" "vmult")
1864 (set_attr "length" "8")])
1865
1866 (define_expand "add<mode>3_zext_dup"
1867 [(match_operand:V_DI 0 "register_operand")
1868 (match_operand:SI 1 "gcn_alu_operand")
1869 (match_operand:V_DI 2 "gcn_alu_operand")]
1870 ""
1871 {
1872 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1873 emit_insn (gen_add<mode>3_vcc_zext_dup (operands[0], operands[1],
1874 operands[2], vcc));
1875 DONE;
1876 })
1877
1878 (define_insn_and_split "add<mode>3_vcc_zext_dup_exec"
1879 [(set (match_operand:V_DI 0 "register_operand")
1880 (vec_merge:V_DI
1881 (plus:V_DI
1882 (zero_extend:V_DI
1883 (vec_duplicate:<VnSI>
1884 (match_operand:SI 1 "gcn_alu_operand")))
1885 (match_operand:V_DI 2 "gcn_alu_operand"))
1886 (match_operand:V_DI 4 "gcn_register_or_unspec_operand")
1887 (match_operand:DI 5 "gcn_exec_reg_operand")))
1888 (set (match_operand:DI 3 "register_operand")
1889 (and:DI
1890 (ltu:DI (plus:V_DI
1891 (zero_extend:V_DI (vec_duplicate:<VnSI> (match_dup 1)))
1892 (match_dup 2))
1893 (match_dup 1))
1894 (match_dup 5)))]
1895 ""
1896 {@ [cons: =0, 1, 2, =3, 4, 5]
1897 [v,ASv,v,&Sg,U0,e] #
1898 [v,BSv,v,&cV,U0,e] ^
1899 }
1900 "gcn_can_split_p (<MODE>mode, operands[0])
1901 && gcn_can_split_p (<MODE>mode, operands[2])
1902 && gcn_can_split_p (<MODE>mode, operands[4])"
1903 [(const_int 0)]
1904 {
1905 emit_insn (gen_add<vnsi>3_vcc_dup_exec
1906 (gcn_operand_part (<MODE>mode, operands[0], 0),
1907 gcn_operand_part (DImode, operands[1], 0),
1908 gcn_operand_part (<MODE>mode, operands[2], 0),
1909 operands[3],
1910 gcn_operand_part (<MODE>mode, operands[4], 0),
1911 operands[5]));
1912 emit_insn (gen_addc<vnsi>3_exec
1913 (gcn_operand_part (<MODE>mode, operands[0], 1),
1914 gcn_operand_part (<MODE>mode, operands[2], 1),
1915 const0_rtx, operands[3], operands[3],
1916 gcn_operand_part (<MODE>mode, operands[4], 1),
1917 operands[5]));
1918 DONE;
1919 }
1920 [(set_attr "type" "vmult")
1921 (set_attr "length" "8")])
1922
1923 (define_expand "add<mode>3_zext_dup_exec"
1924 [(match_operand:V_DI 0 "register_operand")
1925 (match_operand:SI 1 "gcn_alu_operand")
1926 (match_operand:V_DI 2 "gcn_alu_operand")
1927 (match_operand:V_DI 3 "gcn_register_or_unspec_operand")
1928 (match_operand:DI 4 "gcn_exec_reg_operand")]
1929 ""
1930 {
1931 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1932 emit_insn (gen_add<mode>3_vcc_zext_dup_exec (operands[0], operands[1],
1933 operands[2], vcc, operands[3],
1934 operands[4]));
1935 DONE;
1936 })
1937
1938 (define_insn_and_split "add<mode>3_vcc_zext_dup2"
1939 [(set (match_operand:V_DI 0 "register_operand")
1940 (plus:V_DI
1941 (zero_extend:V_DI (match_operand:<VnSI> 1 "gcn_alu_operand"))
1942 (vec_duplicate:V_DI (match_operand:DI 2 "gcn_alu_operand"))))
1943 (set (match_operand:DI 3 "register_operand")
1944 (ltu:DI (plus:V_DI
1945 (zero_extend:V_DI (match_dup 1))
1946 (vec_duplicate:V_DI (match_dup 2)))
1947 (match_dup 1)))]
1948 ""
1949 {@ [cons: =0, 1, 2, =3]
1950 [v,v,DbSv,&cV] #
1951 [v,v,DASv,&Sg] ^
1952 }
1953 "gcn_can_split_p (<MODE>mode, operands[0])"
1954 [(const_int 0)]
1955 {
1956 emit_insn (gen_add<vnsi>3_vcc_dup
1957 (gcn_operand_part (<MODE>mode, operands[0], 0),
1958 gcn_operand_part (DImode, operands[2], 0),
1959 operands[1],
1960 operands[3]));
1961 rtx dsthi = gcn_operand_part (<MODE>mode, operands[0], 1);
1962 emit_insn (gen_vec_duplicate<vnsi>
1963 (dsthi, gcn_operand_part (DImode, operands[2], 1)));
1964 emit_insn (gen_addc<vnsi>3 (dsthi, dsthi, const0_rtx, operands[3],
1965 operands[3]));
1966 DONE;
1967 }
1968 [(set_attr "type" "vmult")
1969 (set_attr "length" "8")])
1970
1971 (define_expand "add<mode>3_zext_dup2"
1972 [(match_operand:V_DI 0 "register_operand")
1973 (match_operand:<VnSI> 1 "gcn_alu_operand")
1974 (match_operand:DI 2 "gcn_alu_operand")]
1975 ""
1976 {
1977 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1978 emit_insn (gen_add<mode>3_vcc_zext_dup2 (operands[0], operands[1],
1979 operands[2], vcc));
1980 DONE;
1981 })
1982
1983 (define_insn_and_split "add<mode>3_vcc_zext_dup2_exec"
1984 [(set (match_operand:V_DI 0 "register_operand")
1985 (vec_merge:V_DI
1986 (plus:V_DI
1987 (zero_extend:V_DI (match_operand:<VnSI> 1 "gcn_alu_operand"))
1988 (vec_duplicate:V_DI (match_operand:DI 2 "gcn_alu_operand")))
1989 (match_operand:V_DI 4 "gcn_register_or_unspec_operand")
1990 (match_operand:DI 5 "gcn_exec_reg_operand")))
1991 (set (match_operand:DI 3 "register_operand")
1992 (and:DI
1993 (ltu:DI (plus:V_DI
1994 (zero_extend:V_DI (match_dup 1))
1995 (vec_duplicate:V_DI (match_dup 2)))
1996 (match_dup 1))
1997 (match_dup 5)))]
1998 ""
1999 {@ [cons: =0, 1, 2, =3, 4, 5]
2000 [v,v,ASv,&Sg,U0,e] #
2001 [v,v,BSv,&cV,U0,e] ^
2002 }
2003 "gcn_can_split_p (<MODE>mode, operands[0])
2004 && gcn_can_split_p (<MODE>mode, operands[4])"
2005 [(const_int 0)]
2006 {
2007 emit_insn (gen_add<vnsi>3_vcc_dup_exec
2008 (gcn_operand_part (<MODE>mode, operands[0], 0),
2009 gcn_operand_part (DImode, operands[2], 0),
2010 operands[1],
2011 operands[3],
2012 gcn_operand_part (<MODE>mode, operands[4], 0),
2013 operands[5]));
2014 rtx dsthi = gcn_operand_part (<MODE>mode, operands[0], 1);
2015 emit_insn (gen_vec_duplicate<vnsi>_exec
2016 (dsthi, gcn_operand_part (DImode, operands[2], 1),
2017 gcn_operand_part (<MODE>mode, operands[4], 1),
2018 operands[5]));
2019 emit_insn (gen_addc<vnsi>3_exec
2020 (dsthi, dsthi, const0_rtx, operands[3], operands[3],
2021 gcn_operand_part (<MODE>mode, operands[4], 1),
2022 operands[5]));
2023 DONE;
2024 }
2025 [(set_attr "type" "vmult")
2026 (set_attr "length" "8")])
2027
2028 (define_expand "add<mode>3_zext_dup2_exec"
2029 [(match_operand:V_DI 0 "register_operand")
2030 (match_operand:<VnSI> 1 "gcn_alu_operand")
2031 (match_operand:DI 2 "gcn_alu_operand")
2032 (match_operand:V_DI 3 "gcn_register_or_unspec_operand")
2033 (match_operand:DI 4 "gcn_exec_reg_operand")]
2034 ""
2035 {
2036 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
2037 emit_insn (gen_add<mode>3_vcc_zext_dup2_exec (operands[0], operands[1],
2038 operands[2], vcc,
2039 operands[3], operands[4]));
2040 DONE;
2041 })
2042
2043 (define_insn_and_split "add<mode>3_sext_dup2"
2044 [(set (match_operand:V_DI 0 "register_operand" "= v")
2045 (plus:V_DI
2046 (sign_extend:V_DI (match_operand:<VnSI> 1 "gcn_alu_operand" " vA"))
2047 (vec_duplicate:V_DI (match_operand:DI 2 "gcn_alu_operand" "BSv"))))
2048 (clobber (match_scratch:<VnSI> 3 "=&v"))
2049 (clobber (reg:DI VCC_REG))]
2050 ""
2051 "#"
2052 "gcn_can_split_p (<MODE>mode, operands[0])"
2053 [(const_int 0)]
2054 {
2055 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
2056 emit_insn (gen_ashr<vnsi>3 (operands[3], operands[1], GEN_INT (31)));
2057 emit_insn (gen_add<vnsi>3_vcc_dup
2058 (gcn_operand_part (<MODE>mode, operands[0], 0),
2059 gcn_operand_part (DImode, operands[2], 0),
2060 operands[1],
2061 vcc));
2062 rtx dsthi = gcn_operand_part (<MODE>mode, operands[0], 1);
2063 emit_insn (gen_vec_duplicate<vnsi>
2064 (dsthi, gcn_operand_part (DImode, operands[2], 1)));
2065 emit_insn (gen_addc<vnsi>3 (dsthi, dsthi, operands[3], vcc, vcc));
2066 DONE;
2067 }
2068 [(set_attr "type" "vmult")
2069 (set_attr "length" "8")])
2070
2071 (define_insn_and_split "add<mode>3_sext_dup2_exec"
2072 [(set (match_operand:V_DI 0 "register_operand" "= v")
2073 (vec_merge:V_DI
2074 (plus:V_DI
2075 (sign_extend:V_DI (match_operand:<VnSI> 1 "gcn_alu_operand" "vA"))
2076 (vec_duplicate:V_DI (match_operand:DI 2 "gcn_alu_operand" "BSv")))
2077 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0")
2078 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
2079 (clobber (match_scratch:<VnSI> 5 "=&v"))
2080 (clobber (reg:DI VCC_REG))]
2081 ""
2082 "#"
2083 "gcn_can_split_p (<MODE>mode, operands[0])
2084 && gcn_can_split_p (<MODE>mode, operands[3])"
2085 [(const_int 0)]
2086 {
2087 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
2088 emit_insn (gen_ashr<vnsi>3_exec (operands[5], operands[1], GEN_INT (31),
2089 gcn_gen_undef (<VnSI>mode), operands[4]));
2090 emit_insn (gen_add<vnsi>3_vcc_dup_exec
2091 (gcn_operand_part (<MODE>mode, operands[0], 0),
2092 gcn_operand_part (DImode, operands[2], 0),
2093 operands[1],
2094 vcc,
2095 gcn_operand_part (<MODE>mode, operands[3], 0),
2096 operands[4]));
2097 rtx dsthi = gcn_operand_part (<MODE>mode, operands[0], 1);
2098 emit_insn (gen_vec_duplicate<vnsi>_exec
2099 (dsthi, gcn_operand_part (DImode, operands[2], 1),
2100 gcn_operand_part (<MODE>mode, operands[3], 1),
2101 operands[4]));
2102 emit_insn (gen_addc<vnsi>3_exec
2103 (dsthi, dsthi, operands[5], vcc, vcc,
2104 gcn_operand_part (<MODE>mode, operands[3], 1),
2105 operands[4]));
2106 DONE;
2107 }
2108 [(set_attr "type" "vmult")
2109 (set_attr "length" "8")])
2110
2111 ;; }}}
2112 ;; {{{ DS memory ALU: add/sub
2113
2114 (define_mode_iterator DS_ARITH_MODE [V64SI V64SF V64DI])
2115 (define_mode_iterator DS_ARITH_SCALAR_MODE [SI SF DI])
2116
2117 ;; FIXME: the vector patterns probably need RD expanded to a vector of
2118 ;; addresses. For now, the only way a vector can get into LDS is
2119 ;; if the user puts it there manually.
2120 ;;
2121 ;; FIXME: the scalar patterns are probably fine in themselves, but need to be
2122 ;; checked to see if anything can ever use them.
2123
2124 (define_insn "add<mode>3_ds<exec>"
2125 [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD")
2126 (plus:DS_ARITH_MODE
2127 (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" "%RD")
2128 (match_operand:DS_ARITH_MODE 2 "register_operand" " v")))]
2129 "rtx_equal_p (operands[0], operands[1])"
2130 "ds_add%u0\t%A0, %2%O0"
2131 [(set_attr "type" "ds")
2132 (set_attr "length" "8")])
2133
2134 (define_insn "add<mode>3_ds_scalar"
2135 [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
2136 (plus:DS_ARITH_SCALAR_MODE
2137 (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand"
2138 "%RD")
2139 (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")))]
2140 "rtx_equal_p (operands[0], operands[1])"
2141 "ds_add%u0\t%A0, %2%O0"
2142 [(set_attr "type" "ds")
2143 (set_attr "length" "8")])
2144
2145 (define_insn "sub<mode>3_ds<exec>"
2146 [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD")
2147 (minus:DS_ARITH_MODE
2148 (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" " RD")
2149 (match_operand:DS_ARITH_MODE 2 "register_operand" " v")))]
2150 "rtx_equal_p (operands[0], operands[1])"
2151 "ds_sub%u0\t%A0, %2%O0"
2152 [(set_attr "type" "ds")
2153 (set_attr "length" "8")])
2154
2155 (define_insn "sub<mode>3_ds_scalar"
2156 [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
2157 (minus:DS_ARITH_SCALAR_MODE
2158 (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand"
2159 " RD")
2160 (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")))]
2161 "rtx_equal_p (operands[0], operands[1])"
2162 "ds_sub%u0\t%A0, %2%O0"
2163 [(set_attr "type" "ds")
2164 (set_attr "length" "8")])
2165
2166 (define_insn "subr<mode>3_ds<exec>"
2167 [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD")
2168 (minus:DS_ARITH_MODE
2169 (match_operand:DS_ARITH_MODE 2 "register_operand" " v")
2170 (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" " RD")))]
2171 "rtx_equal_p (operands[0], operands[1])"
2172 "ds_rsub%u0\t%A0, %2%O0"
2173 [(set_attr "type" "ds")
2174 (set_attr "length" "8")])
2175
2176 (define_insn "subr<mode>3_ds_scalar"
2177 [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
2178 (minus:DS_ARITH_SCALAR_MODE
2179 (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")
2180 (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand"
2181 " RD")))]
2182 "rtx_equal_p (operands[0], operands[1])"
2183 "ds_rsub%u0\t%A0, %2%O0"
2184 [(set_attr "type" "ds")
2185 (set_attr "length" "8")])
2186
2187 ;; }}}
2188 ;; {{{ ALU special case: mult
2189
2190 (define_insn "<su>mul<mode>3_highpart<exec>"
2191 [(set (match_operand:V_SI 0 "register_operand" "= v")
2192 (truncate:V_SI
2193 (lshiftrt:<VnDI>
2194 (mult:<VnDI>
2195 (any_extend:<VnDI>
2196 (match_operand:V_SI 1 "gcn_alu_operand" " %v"))
2197 (any_extend:<VnDI>
2198 (match_operand:V_SI 2 "gcn_alu_operand" "vSvA")))
2199 (const_int 32))))]
2200 ""
2201 "v_mul_hi<sgnsuffix>0\t%0, %2, %1"
2202 [(set_attr "type" "vop3a")
2203 (set_attr "length" "8")])
2204
2205 (define_insn "mul<mode>3<exec>"
2206 [(set (match_operand:V_INT_1REG 0 "register_operand" "= v")
2207 (mult:V_INT_1REG
2208 (match_operand:V_INT_1REG 1 "gcn_alu_operand" "%vSvA")
2209 (match_operand:V_INT_1REG 2 "gcn_alu_operand" " vSvA")))]
2210 ""
2211 "v_mul_lo_u32\t%0, %1, %2"
2212 [(set_attr "type" "vop3a")
2213 (set_attr "length" "8")])
2214
2215 (define_insn "mul<mode>3_dup<exec>"
2216 [(set (match_operand:V_INT_1REG 0 "register_operand" "= v")
2217 (mult:V_INT_1REG
2218 (match_operand:V_INT_1REG 1 "gcn_alu_operand" "%vSvA")
2219 (vec_duplicate:V_INT_1REG
2220 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand" " SvA"))))]
2221 ""
2222 "v_mul_lo_u32\t%0, %1, %2"
2223 [(set_attr "type" "vop3a")
2224 (set_attr "length" "8")])
2225
2226 (define_insn_and_split "mul<mode>3"
2227 [(set (match_operand:V_DI 0 "register_operand" "=&v")
2228 (mult:V_DI
2229 (match_operand:V_DI 1 "gcn_alu_operand" "% v")
2230 (match_operand:V_DI 2 "gcn_alu_operand" "vDA")))
2231 (clobber (match_scratch:<VnSI> 3 "=&v"))]
2232 ""
2233 "#"
2234 "reload_completed"
2235 [(const_int 0)]
2236 {
2237 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
2238 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
2239 rtx left_lo = gcn_operand_part (<MODE>mode, operands[1], 0);
2240 rtx left_hi = gcn_operand_part (<MODE>mode, operands[1], 1);
2241 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
2242 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
2243 rtx tmp = operands[3];
2244
2245 emit_insn (gen_mul<vnsi>3 (out_lo, left_lo, right_lo));
2246 emit_insn (gen_umul<vnsi>3_highpart (out_hi, left_lo, right_lo));
2247 emit_insn (gen_mul<vnsi>3 (tmp, left_hi, right_lo));
2248 emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp));
2249 emit_insn (gen_mul<vnsi>3 (tmp, left_lo, right_hi));
2250 emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp));
2251 emit_insn (gen_mul<vnsi>3 (tmp, left_hi, right_hi));
2252 emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp));
2253 DONE;
2254 })
2255
2256 (define_insn_and_split "mul<mode>3_exec"
2257 [(set (match_operand:V_DI 0 "register_operand" "=&v")
2258 (vec_merge:V_DI
2259 (mult:V_DI
2260 (match_operand:V_DI 1 "gcn_alu_operand" "% v")
2261 (match_operand:V_DI 2 "gcn_alu_operand" "vDA"))
2262 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0")
2263 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
2264 (clobber (match_scratch:<VnSI> 5 "=&v"))]
2265 ""
2266 "#"
2267 "reload_completed"
2268 [(const_int 0)]
2269 {
2270 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
2271 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
2272 rtx left_lo = gcn_operand_part (<MODE>mode, operands[1], 0);
2273 rtx left_hi = gcn_operand_part (<MODE>mode, operands[1], 1);
2274 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
2275 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
2276 rtx exec = operands[4];
2277 rtx tmp = operands[5];
2278
2279 rtx old_lo, old_hi;
2280 if (GET_CODE (operands[3]) == UNSPEC)
2281 {
2282 old_lo = old_hi = gcn_gen_undef (<VnSI>mode);
2283 }
2284 else
2285 {
2286 old_lo = gcn_operand_part (<MODE>mode, operands[3], 0);
2287 old_hi = gcn_operand_part (<MODE>mode, operands[3], 1);
2288 }
2289
2290 rtx undef = gcn_gen_undef (<VnSI>mode);
2291
2292 emit_insn (gen_mul<vnsi>3_exec (out_lo, left_lo, right_lo, old_lo, exec));
2293 emit_insn (gen_umul<vnsi>3_highpart_exec (out_hi, left_lo, right_lo,
2294 old_hi, exec));
2295 emit_insn (gen_mul<vnsi>3_exec (tmp, left_hi, right_lo, undef, exec));
2296 emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec));
2297 emit_insn (gen_mul<vnsi>3_exec (tmp, left_lo, right_hi, undef, exec));
2298 emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec));
2299 emit_insn (gen_mul<vnsi>3_exec (tmp, left_hi, right_hi, undef, exec));
2300 emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec));
2301 DONE;
2302 })
2303
2304 (define_insn_and_split "mul<mode>3_zext"
2305 [(set (match_operand:V_DI 0 "register_operand" "=&v")
2306 (mult:V_DI
2307 (zero_extend:V_DI
2308 (match_operand:<VnSI> 1 "gcn_alu_operand" " v"))
2309 (match_operand:V_DI 2 "gcn_alu_operand" "vDA")))
2310 (clobber (match_scratch:<VnSI> 3 "=&v"))]
2311 ""
2312 "#"
2313 "reload_completed"
2314 [(const_int 0)]
2315 {
2316 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
2317 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
2318 rtx left = operands[1];
2319 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
2320 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
2321 rtx tmp = operands[3];
2322
2323 emit_insn (gen_mul<vnsi>3 (out_lo, left, right_lo));
2324 emit_insn (gen_umul<vnsi>3_highpart (out_hi, left, right_lo));
2325 emit_insn (gen_mul<vnsi>3 (tmp, left, right_hi));
2326 emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp));
2327 DONE;
2328 })
2329
2330 (define_insn_and_split "mul<mode>3_zext_exec"
2331 [(set (match_operand:V_DI 0 "register_operand" "=&v")
2332 (vec_merge:V_DI
2333 (mult:V_DI
2334 (zero_extend:V_DI
2335 (match_operand:<VnSI> 1 "gcn_alu_operand" " v"))
2336 (match_operand:V_DI 2 "gcn_alu_operand" "vDA"))
2337 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0")
2338 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
2339 (clobber (match_scratch:<VnSI> 5 "=&v"))]
2340 ""
2341 "#"
2342 "reload_completed"
2343 [(const_int 0)]
2344 {
2345 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
2346 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
2347 rtx left = operands[1];
2348 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
2349 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
2350 rtx exec = operands[4];
2351 rtx tmp = operands[5];
2352
2353 rtx old_lo, old_hi;
2354 if (GET_CODE (operands[3]) == UNSPEC)
2355 {
2356 old_lo = old_hi = gcn_gen_undef (<VnSI>mode);
2357 }
2358 else
2359 {
2360 old_lo = gcn_operand_part (<MODE>mode, operands[3], 0);
2361 old_hi = gcn_operand_part (<MODE>mode, operands[3], 1);
2362 }
2363
2364 rtx undef = gcn_gen_undef (<VnSI>mode);
2365
2366 emit_insn (gen_mul<vnsi>3_exec (out_lo, left, right_lo, old_lo, exec));
2367 emit_insn (gen_umul<vnsi>3_highpart_exec (out_hi, left, right_lo,
2368 old_hi, exec));
2369 emit_insn (gen_mul<vnsi>3_exec (tmp, left, right_hi, undef, exec));
2370 emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec));
2371 DONE;
2372 })
2373
2374 (define_insn_and_split "mul<mode>3_zext_dup2"
2375 [(set (match_operand:V_DI 0 "register_operand" "= &v")
2376 (mult:V_DI
2377 (zero_extend:V_DI
2378 (match_operand:<VnSI> 1 "gcn_alu_operand" " v"))
2379 (vec_duplicate:V_DI
2380 (match_operand:DI 2 "gcn_alu_operand" "SvDA"))))
2381 (clobber (match_scratch:<VnSI> 3 "= &v"))]
2382 ""
2383 "#"
2384 "reload_completed"
2385 [(const_int 0)]
2386 {
2387 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
2388 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
2389 rtx left = operands[1];
2390 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
2391 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
2392 rtx tmp = operands[3];
2393
2394 emit_insn (gen_mul<vnsi>3 (out_lo, left, right_lo));
2395 emit_insn (gen_umul<vnsi>3_highpart (out_hi, left, right_lo));
2396 emit_insn (gen_mul<vnsi>3 (tmp, left, right_hi));
2397 emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp));
2398 DONE;
2399 })
2400
2401 (define_insn_and_split "mul<mode>3_zext_dup2_exec"
2402 [(set (match_operand:V_DI 0 "register_operand" "= &v")
2403 (vec_merge:V_DI
2404 (mult:V_DI
2405 (zero_extend:V_DI
2406 (match_operand:<VnSI> 1 "gcn_alu_operand" " v"))
2407 (vec_duplicate:V_DI
2408 (match_operand:DI 2 "gcn_alu_operand" "SvDA")))
2409 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0")
2410 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
2411 (clobber (match_scratch:<VnSI> 5 "= &v"))]
2412 ""
2413 "#"
2414 "reload_completed"
2415 [(const_int 0)]
2416 {
2417 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
2418 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
2419 rtx left = operands[1];
2420 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
2421 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
2422 rtx exec = operands[4];
2423 rtx tmp = operands[5];
2424
2425 rtx old_lo, old_hi;
2426 if (GET_CODE (operands[3]) == UNSPEC)
2427 {
2428 old_lo = old_hi = gcn_gen_undef (<VnSI>mode);
2429 }
2430 else
2431 {
2432 old_lo = gcn_operand_part (<MODE>mode, operands[3], 0);
2433 old_hi = gcn_operand_part (<MODE>mode, operands[3], 1);
2434 }
2435
2436 rtx undef = gcn_gen_undef (<VnSI>mode);
2437
2438 emit_insn (gen_mul<vnsi>3_exec (out_lo, left, right_lo, old_lo, exec));
2439 emit_insn (gen_umul<vnsi>3_highpart_exec (out_hi, left, right_lo,
2440 old_hi, exec));
2441 emit_insn (gen_mul<vnsi>3_exec (tmp, left, right_hi, undef, exec));
2442 emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec));
2443 DONE;
2444 })
2445
2446 (define_int_iterator UNSPEC_CMUL_OP [UNSPEC_CMUL UNSPEC_CMUL_CONJ])
2447 (define_int_attr conj_op [(UNSPEC_CMUL "") (UNSPEC_CMUL_CONJ "_conj")])
2448 (define_int_attr cmul_subadd [(UNSPEC_CMUL "sub") (UNSPEC_CMUL_CONJ "add")])
2449 (define_int_attr cmul_addsub [(UNSPEC_CMUL "add") (UNSPEC_CMUL_CONJ "sub")])
2450
2451 (define_expand "cmul<conj_op><mode>3"
2452 [(set (match_operand:V_noHI 0 "register_operand" "=&v")
2453 (unspec:V_noHI
2454 [(match_operand:V_noHI 1 "register_operand" "v")
2455 (match_operand:V_noHI 2 "register_operand" "v")]
2456 UNSPEC_CMUL_OP))]
2457 ""
2458 {
2459 // operands[1] a b
2460 // operands[2] c d
2461 rtx t1 = gen_reg_rtx (<MODE>mode);
2462 emit_insn (gen_mul<mode>3 (t1, operands[1], operands[2])); // a*c b*d
2463
2464 rtx s2_perm = gen_reg_rtx (<MODE>mode);
2465 emit_insn (gen_dpp_swap_pairs<mode> (s2_perm, operands[2])); // d c
2466
2467 rtx t2 = gen_reg_rtx (<MODE>mode);
2468 emit_insn (gen_mul<mode>3 (t2, operands[1], s2_perm)); // a*d b*c
2469
2470 rtx t1_perm = gen_reg_rtx (<MODE>mode);
2471 emit_insn (gen_dpp_swap_pairs<mode> (t1_perm, t1)); // b*d a*c
2472
2473 rtx even = gen_rtx_REG (DImode, EXEC_REG);
2474 emit_move_insn (even, get_exec (0x5555555555555555UL));
2475 rtx dest = operands[0];
2476 emit_insn (gen_<cmul_subadd><mode>3_exec (dest, t1, t1_perm,
2477 gcn_gen_undef (<MODE>mode),
2478 even)); // a*c-b*d 0
2479
2480 rtx t2_perm = gen_reg_rtx (<MODE>mode);
2481 emit_insn (gen_dpp_swap_pairs<mode> (t2_perm, t2)); // b*c a*d
2482
2483 rtx odd = gen_rtx_REG (DImode, EXEC_REG);
2484 emit_move_insn (odd, get_exec (0xaaaaaaaaaaaaaaaaUL));
2485 emit_insn (gen_<cmul_addsub><mode>3_exec (dest, t2, t2_perm, dest, odd));
2486 // 0 a*d+b*c
2487 DONE;
2488 })
2489
2490 (define_code_iterator addsub [plus minus])
2491 (define_code_attr addsub_as [(plus "a") (minus "s")])
2492
2493 (define_expand "cml<addsub_as><mode>4"
2494 [(set (match_operand:V_FP 0 "register_operand" "=&v")
2495 (addsub:V_FP
2496 (unspec:V_FP
2497 [(match_operand:V_FP 1 "register_operand" "v")
2498 (match_operand:V_FP 2 "register_operand" "v")]
2499 UNSPEC_CMUL)
2500 (match_operand:V_FP 3 "register_operand" "v")))]
2501 ""
2502 {
2503 rtx a = gen_reg_rtx (<MODE>mode);
2504 emit_insn (gen_dpp_distribute_even<mode> (a, operands[1])); // a a
2505
2506 rtx t1 = gen_reg_rtx (<MODE>mode);
2507 emit_insn (gen_fm<addsub_as><mode>4 (t1, a, operands[2], operands[3]));
2508 // a*c a*d
2509
2510 rtx b = gen_reg_rtx (<MODE>mode);
2511 emit_insn (gen_dpp_distribute_odd<mode> (b, operands[1])); // b b
2512
2513 rtx t2 = gen_reg_rtx (<MODE>mode);
2514 emit_insn (gen_mul<mode>3 (t2, b, operands[2])); // b*c b*d
2515
2516 rtx t2_perm = gen_reg_rtx (<MODE>mode);
2517 emit_insn (gen_dpp_swap_pairs<mode> (t2_perm, t2)); // b*d b*c
2518
2519 rtx even = gen_rtx_REG (DImode, EXEC_REG);
2520 emit_move_insn (even, get_exec (0x5555555555555555UL));
2521 rtx dest = operands[0];
2522 emit_insn (gen_sub<mode>3_exec (dest, t1, t2_perm,
2523 gcn_gen_undef (<MODE>mode), even));
2524
2525 rtx odd = gen_rtx_REG (DImode, EXEC_REG);
2526 emit_move_insn (odd, get_exec (0xaaaaaaaaaaaaaaaaUL));
2527 emit_insn (gen_add<mode>3_exec (dest, t1, t2_perm, dest, odd));
2528
2529 DONE;
2530 })
2531
2532 (define_expand "vec_addsub<mode>3"
2533 [(set (match_operand:V_noHI 0 "register_operand" "=&v")
2534 (vec_merge:V_noHI
2535 (minus:V_noHI
2536 (match_operand:V_noHI 1 "register_operand" "v")
2537 (match_operand:V_noHI 2 "register_operand" "v"))
2538 (plus:V_noHI (match_dup 1) (match_dup 2))
2539 (const_int 6148914691236517205)))]
2540 ""
2541 {
2542 rtx even = gen_rtx_REG (DImode, EXEC_REG);
2543 emit_move_insn (even, get_exec (0x5555555555555555UL));
2544 rtx dest = operands[0];
2545 rtx x = operands[1];
2546 rtx y = operands[2];
2547 emit_insn (gen_sub<mode>3_exec (dest, x, y, gcn_gen_undef (<MODE>mode),
2548 even));
2549 rtx odd = gen_rtx_REG (DImode, EXEC_REG);
2550 emit_move_insn (odd, get_exec (0xaaaaaaaaaaaaaaaaUL));
2551 emit_insn (gen_add<mode>3_exec (dest, x, y, dest, odd));
2552
2553 DONE;
2554 })
2555
2556 (define_int_iterator CADD [UNSPEC_CADD90 UNSPEC_CADD270])
2557 (define_int_attr rot [(UNSPEC_CADD90 "90") (UNSPEC_CADD270 "270")])
2558 (define_int_attr cadd_subadd [(UNSPEC_CADD90 "sub") (UNSPEC_CADD270 "add")])
2559 (define_int_attr cadd_addsub [(UNSPEC_CADD90 "add") (UNSPEC_CADD270 "sub")])
2560
2561 (define_expand "cadd<rot><mode>3"
2562 [(set (match_operand:V_noHI 0 "register_operand" "=&v")
2563 (unspec:V_noHI [(match_operand:V_noHI 1 "register_operand" "v")
2564 (match_operand:V_noHI 2 "register_operand" "v")]
2565 CADD))]
2566 ""
2567 {
2568 rtx dest = operands[0];
2569 rtx x = operands[1];
2570 rtx y = gen_reg_rtx (<MODE>mode);
2571 emit_insn (gen_dpp_swap_pairs<mode> (y, operands[2]));
2572
2573 rtx even = gen_rtx_REG (DImode, EXEC_REG);
2574 emit_move_insn (even, get_exec (0x5555555555555555UL));
2575 emit_insn (gen_<cadd_subadd><mode>3_exec (dest, x, y,
2576 gcn_gen_undef (<MODE>mode),
2577 even));
2578 rtx odd = gen_rtx_REG (DImode, EXEC_REG);
2579 emit_move_insn (odd, get_exec (0xaaaaaaaaaaaaaaaaUL));
2580 emit_insn (gen_<cadd_addsub><mode>3_exec (dest, x, y, dest, odd));
2581
2582 DONE;
2583 })
2584
2585 (define_expand "vec_fmaddsub<mode>4"
2586 [(match_operand:V_noHI 0 "register_operand" "=&v")
2587 (match_operand:V_noHI 1 "register_operand" "v")
2588 (match_operand:V_noHI 2 "register_operand" "v")
2589 (match_operand:V_noHI 3 "register_operand" "v")]
2590 ""
2591 {
2592 rtx t1 = gen_reg_rtx (<MODE>mode);
2593 emit_insn (gen_mul<mode>3 (t1, operands[1], operands[2]));
2594 rtx even = gen_rtx_REG (DImode, EXEC_REG);
2595 emit_move_insn (even, get_exec (0x5555555555555555UL));
2596 rtx dest = operands[0];
2597 emit_insn (gen_sub<mode>3_exec (dest, t1, operands[3],
2598 gcn_gen_undef (<MODE>mode), even));
2599 rtx odd = gen_rtx_REG (DImode, EXEC_REG);
2600 emit_move_insn (odd, get_exec (0xaaaaaaaaaaaaaaaaUL));
2601 emit_insn (gen_add<mode>3_exec (dest, t1, operands[3], dest, odd));
2602
2603 DONE;
2604 })
2605
2606 (define_expand "vec_fmsubadd<mode>4"
2607 [(match_operand:V_noHI 0 "register_operand" "=&v")
2608 (match_operand:V_noHI 1 "register_operand" "v")
2609 (match_operand:V_noHI 2 "register_operand" "v")
2610 (match_operand:V_noHI 3 "register_operand" "v")]
2611 ""
2612 {
2613 rtx t1 = gen_reg_rtx (<MODE>mode);
2614 emit_insn (gen_mul<mode>3 (t1, operands[1], operands[2]));
2615 rtx even = gen_rtx_REG (DImode, EXEC_REG);
2616 emit_move_insn (even, get_exec (0x5555555555555555UL));
2617 rtx dest = operands[0];
2618 emit_insn (gen_add<mode>3_exec (dest, t1, operands[3],
2619 gcn_gen_undef (<MODE>mode), even));
2620 rtx odd = gen_rtx_REG (DImode, EXEC_REG);
2621 emit_move_insn (odd, get_exec (0xaaaaaaaaaaaaaaaaUL));
2622 emit_insn (gen_sub<mode>3_exec (dest, t1, operands[3], dest, odd));
2623
2624 DONE;
2625 })
2626
2627 ;; }}}
2628 ;; {{{ ALU generic case
2629
2630 (define_code_iterator bitop [and ior xor])
2631 (define_code_iterator shiftop [ashift lshiftrt ashiftrt])
2632 (define_code_iterator minmaxop [smin smax umin umax])
2633
2634 (define_insn "<expander><mode>2<exec>"
2635 [(set (match_operand:V_INT_1REG 0 "gcn_valu_dst_operand" "= v")
2636 (bitunop:V_INT_1REG
2637 (match_operand:V_INT_1REG 1 "gcn_valu_src0_operand" "vSvB")))]
2638 ""
2639 "v_<mnemonic>0\t%0, %1"
2640 [(set_attr "type" "vop1")
2641 (set_attr "length" "8")])
2642
2643 (define_insn "<expander><mode>3<exec>"
2644 [(set (match_operand:V_INT_1REG 0 "gcn_valu_dst_operand" "= v,RD")
2645 (bitop:V_INT_1REG
2646 (match_operand:V_INT_1REG 1 "gcn_valu_src0_operand" "% v, 0")
2647 (match_operand:V_INT_1REG 2 "gcn_valu_src1com_operand" "vSvB, v")))]
2648 ""
2649 "@
2650 v_<mnemonic>0\t%0, %2, %1
2651 ds_<mnemonic>0\t%A0, %2%O0"
2652 [(set_attr "type" "vop2,ds")
2653 (set_attr "length" "8,8")])
2654
2655 (define_insn_and_split "<expander><mode>3"
2656 [(set (match_operand:V_DI 0 "gcn_valu_dst_operand" "= v,RD")
2657 (bitop:V_DI
2658 (match_operand:V_DI 1 "gcn_valu_src0_operand" "% v,RD")
2659 (match_operand:V_DI 2 "gcn_valu_src1com_operand" "vSvB, v")))]
2660 ""
2661 "@
2662 #
2663 ds_<mnemonic>0\t%A0, %2%O0"
2664 "(reload_completed && !gcn_ds_memory_operand (operands[0], <MODE>mode))"
2665 [(set (match_dup 3)
2666 (bitop:<VnSI> (match_dup 5) (match_dup 7)))
2667 (set (match_dup 4)
2668 (bitop:<VnSI> (match_dup 6) (match_dup 8)))]
2669 {
2670 operands[3] = gcn_operand_part (<MODE>mode, operands[0], 0);
2671 operands[4] = gcn_operand_part (<MODE>mode, operands[0], 1);
2672 operands[5] = gcn_operand_part (<MODE>mode, operands[1], 0);
2673 operands[6] = gcn_operand_part (<MODE>mode, operands[1], 1);
2674 operands[7] = gcn_operand_part (<MODE>mode, operands[2], 0);
2675 operands[8] = gcn_operand_part (<MODE>mode, operands[2], 1);
2676 }
2677 [(set_attr "type" "vmult,ds")
2678 (set_attr "length" "16,8")])
2679
2680 (define_insn_and_split "<expander><mode>3_exec"
2681 [(set (match_operand:V_DI 0 "gcn_valu_dst_operand" "= v,RD")
2682 (vec_merge:V_DI
2683 (bitop:V_DI
2684 (match_operand:V_DI 1 "gcn_valu_src0_operand" "% v,RD")
2685 (match_operand:V_DI 2 "gcn_valu_src1com_operand" "vSvB, v"))
2686 (match_operand:V_DI 3 "gcn_register_ds_or_unspec_operand" "U0,U0")
2687 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))]
2688 "!memory_operand (operands[0], VOIDmode)
2689 || (rtx_equal_p (operands[0], operands[1])
2690 && register_operand (operands[2], VOIDmode))"
2691 "@
2692 #
2693 ds_<mnemonic>0\t%A0, %2%O0"
2694 "(reload_completed && !gcn_ds_memory_operand (operands[0], <MODE>mode))"
2695 [(set (match_dup 5)
2696 (vec_merge:<VnSI>
2697 (bitop:<VnSI> (match_dup 7) (match_dup 9))
2698 (match_dup 11)
2699 (match_dup 4)))
2700 (set (match_dup 6)
2701 (vec_merge:<VnSI>
2702 (bitop:<VnSI> (match_dup 8) (match_dup 10))
2703 (match_dup 12)
2704 (match_dup 4)))]
2705 {
2706 operands[5] = gcn_operand_part (<MODE>mode, operands[0], 0);
2707 operands[6] = gcn_operand_part (<MODE>mode, operands[0], 1);
2708 operands[7] = gcn_operand_part (<MODE>mode, operands[1], 0);
2709 operands[8] = gcn_operand_part (<MODE>mode, operands[1], 1);
2710 operands[9] = gcn_operand_part (<MODE>mode, operands[2], 0);
2711 operands[10] = gcn_operand_part (<MODE>mode, operands[2], 1);
2712 operands[11] = gcn_operand_part (<MODE>mode, operands[3], 0);
2713 operands[12] = gcn_operand_part (<MODE>mode, operands[3], 1);
2714 }
2715 [(set_attr "type" "vmult,ds")
2716 (set_attr "length" "16,8")])
2717
2718 (define_expand "<expander><mode>3"
2719 [(set (match_operand:V_QIHI 0 "register_operand" "= v")
2720 (shiftop:V_QIHI
2721 (match_operand:V_QIHI 1 "gcn_alu_operand" " v")
2722 (vec_duplicate:V_QIHI
2723 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
2724 ""
2725 {
2726 enum {ashift, lshiftrt, ashiftrt};
2727 bool unsignedp = (<code> == lshiftrt);
2728 rtx insi1 = gen_reg_rtx (<VnSI>mode);
2729 rtx insi2 = gen_reg_rtx (SImode);
2730 rtx outsi = gen_reg_rtx (<VnSI>mode);
2731
2732 convert_move (insi1, operands[1], unsignedp);
2733 convert_move (insi2, operands[2], unsignedp);
2734 emit_insn (gen_<expander><vnsi>3 (outsi, insi1, insi2));
2735 convert_move (operands[0], outsi, unsignedp);
2736 DONE;
2737 })
2738
2739 (define_insn "<expander><mode>3<exec>"
2740 [(set (match_operand:V_INT_noHI 0 "register_operand" "= v")
2741 (shiftop:V_INT_noHI
2742 (match_operand:V_INT_noHI 1 "gcn_alu_operand" " v")
2743 (vec_duplicate:<VnSI>
2744 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
2745 ""
2746 "v_<revmnemonic>0\t%0, %2, %1"
2747 [(set_attr "type" "vop2")
2748 (set_attr "length" "8")])
2749
2750 (define_expand "v<expander><mode>3"
2751 [(set (match_operand:V_QIHI 0 "register_operand" "=v")
2752 (shiftop:V_QIHI
2753 (match_operand:V_QIHI 1 "gcn_alu_operand" " v")
2754 (match_operand:V_QIHI 2 "gcn_alu_operand" "vB")))]
2755 ""
2756 {
2757 enum {ashift, lshiftrt, ashiftrt};
2758 bool unsignedp = (<code> == lshiftrt);
2759 rtx insi1 = gen_reg_rtx (<VnSI>mode);
2760 rtx insi2 = gen_reg_rtx (<VnSI>mode);
2761 rtx outsi = gen_reg_rtx (<VnSI>mode);
2762
2763 convert_move (insi1, operands[1], unsignedp);
2764 convert_move (insi2, operands[2], unsignedp);
2765 emit_insn (gen_v<expander><vnsi>3 (outsi, insi1, insi2));
2766 convert_move (operands[0], outsi, unsignedp);
2767 DONE;
2768 })
2769
2770 (define_insn "v<expander><mode>3<exec>"
2771 [(set (match_operand:V_INT_noHI 0 "register_operand" "=v")
2772 (shiftop:V_INT_noHI
2773 (match_operand:V_INT_noHI 1 "gcn_alu_operand" " v")
2774 (match_operand:<VnSI> 2 "gcn_alu_operand" "vB")))]
2775 ""
2776 "v_<revmnemonic>0\t%0, %2, %1"
2777 [(set_attr "type" "vop2")
2778 (set_attr "length" "8")])
2779
2780 (define_expand "<expander><mode>3"
2781 [(set (match_operand:V_QIHI 0 "gcn_valu_dst_operand")
2782 (minmaxop:V_QIHI
2783 (match_operand:V_QIHI 1 "gcn_valu_src0_operand")
2784 (match_operand:V_QIHI 2 "gcn_valu_src1com_operand")))]
2785 ""
2786 {
2787 enum {smin, umin, smax, umax};
2788 bool unsignedp = (<code> == umax || <code> == umin);
2789 rtx insi1 = gen_reg_rtx (<VnSI>mode);
2790 rtx insi2 = gen_reg_rtx (<VnSI>mode);
2791 rtx outsi = gen_reg_rtx (<VnSI>mode);
2792
2793 convert_move (insi1, operands[1], unsignedp);
2794 convert_move (insi2, operands[2], unsignedp);
2795 emit_insn (gen_<code><vnsi>3 (outsi, insi1, insi2));
2796 convert_move (operands[0], outsi, unsignedp);
2797 DONE;
2798 })
2799
2800 (define_expand "<expander><mode>3_exec"
2801 [(set (match_operand:V_QIHI 0 "gcn_valu_dst_operand")
2802 (vec_merge:V_QIHI
2803 (minmaxop:V_QIHI
2804 (match_operand:V_QIHI 1 "gcn_valu_src0_operand")
2805 (match_operand:V_QIHI 2 "gcn_valu_src1com_operand"))
2806 (match_operand:V_QIHI 3 "gcn_register_or_unspec_operand" "U0")
2807 (match_operand:DI 4 "gcn_exec_reg_operand" "e")))]
2808 ""
2809 {
2810 enum {smin, umin, smax, umax};
2811 bool unsignedp = (<code> == umax || <code> == umin);
2812 rtx insi1 = gen_reg_rtx (<VnSI>mode);
2813 rtx insi2 = gen_reg_rtx (<VnSI>mode);
2814 rtx outsi = gen_reg_rtx (<VnSI>mode);
2815 rtx out = operands[0];
2816 rtx exec = operands[4];
2817 rtx tmp = gen_reg_rtx (<MODE>mode);
2818
2819 convert_move (insi1, operands[1], unsignedp);
2820 convert_move (insi2, operands[2], unsignedp);
2821 emit_insn (gen_<code><vnsi>3_exec (outsi, insi1, insi2,
2822 gcn_gen_undef(<VnSI>mode), exec));
2823 convert_move (tmp, outsi, unsignedp);
2824 emit_insn (gen_mov<mode>_exec (out, tmp, operands[3], exec));
2825 DONE;
2826 })
2827
2828 (define_insn "<expander><vnsi>3<exec>"
2829 [(set (match_operand:V_SI 0 "gcn_valu_dst_operand" "= v,RD")
2830 (minmaxop:V_SI
2831 (match_operand:V_SI 1 "gcn_valu_src0_operand" "% v, 0")
2832 (match_operand:V_SI 2 "gcn_valu_src1com_operand" "vSvB, v")))]
2833 ""
2834 "@
2835 v_<mnemonic>0\t%0, %2, %1
2836 ds_<mnemonic>0\t%A0, %2%O0"
2837 [(set_attr "type" "vop2,ds")
2838 (set_attr "length" "8,8")])
2839
2840 (define_insn_and_split "<expander><mode>3"
2841 [(set (match_operand:V_DI 0 "register_operand" "=v")
2842 (minmaxop:V_DI
2843 (match_operand:V_DI 1 "gcn_alu_operand" " v")
2844 (match_operand:V_DI 2 "gcn_alu_operand" " v")))
2845 (clobber (reg:DI VCC_REG))]
2846 ""
2847 "#"
2848 "reload_completed"
2849 [(const_int 0)]
2850 {
2851 rtx out = operands[0];
2852 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
2853
2854 enum {smin, smax, umin, umax};
2855 bool minp = (<code> == smin || <code> == umin);
2856 if (<code> == smin || <code> == smax)
2857 emit_insn (gen_vec_cmp<mode>di (vcc, minp ? gen_rtx_LT (VOIDmode, 0, 0) :
2858 gen_rtx_GT (VOIDmode, 0, 0), operands[1],
2859 operands[2]));
2860 else
2861 emit_insn (gen_vec_cmp<mode>di (vcc, minp ? gen_rtx_LTU (VOIDmode, 0, 0) :
2862 gen_rtx_GTU (VOIDmode, 0, 0), operands[1],
2863 operands[2]));
2864 emit_insn (gen_vcond_mask_<mode>di (out, operands[1], operands[2], vcc));
2865 }
2866 [(set_attr "type" "mult")])
2867
2868 (define_insn_and_split "<expander><mode>3_exec"
2869 [(set (match_operand:V_DI 0 "register_operand" "= v")
2870 (vec_merge:V_DI
2871 (minmaxop:V_DI
2872 (match_operand:V_DI 1 "gcn_alu_operand" " v")
2873 (match_operand:V_DI 2 "gcn_alu_operand" " v"))
2874 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0")
2875 (match_operand:DI 4 "gcn_exec_reg_operand" "+e")))
2876 (clobber (match_scratch:<VnDI> 5 "= &v"))
2877 (clobber (reg:DI VCC_REG))]
2878 ""
2879 "#"
2880 "reload_completed"
2881 [(const_int 0)]
2882 {
2883 rtx out = operands[0];
2884 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
2885 rtx exec = operands[4];
2886 rtx tmp = operands[5];
2887
2888 enum {smin, smax, umin, umax};
2889 bool minp = (<code> == smin || <code> == umin);
2890 if (<code> == smin || <code> == smax)
2891 emit_insn (gen_vec_cmp<mode>di_exec (vcc,
2892 minp ? gen_rtx_LT (VOIDmode, 0, 0) :
2893 gen_rtx_GT (VOIDmode, 0, 0),
2894 operands[1], operands[2], exec));
2895 else
2896 emit_insn (gen_vec_cmp<mode>di_exec (vcc,
2897 minp ? gen_rtx_LTU (VOIDmode, 0, 0) :
2898 gen_rtx_GTU (VOIDmode, 0, 0),
2899 operands[1], operands[2], exec));
2900 emit_insn (gen_vcond_mask_<mode>di (tmp, operands[1], operands[2], vcc));
2901 emit_insn (gen_mov<mode>_exec (out, tmp, operands[3], exec));
2902 }
2903 [(set_attr "type" "mult")])
2904
2905 ;; }}}
2906 ;; {{{ Int unops
2907
2908 (define_expand "neg<mode>2"
2909 [(match_operand:V_INT 0 "register_operand")
2910 (match_operand:V_INT 1 "register_operand")]
2911 ""
2912 {
2913 emit_insn (gen_sub<mode>3 (operands[0], gcn_vec_constant (<MODE>mode, 0),
2914 operands[1]));
2915 DONE;
2916 })
2917
2918 (define_insn_and_split "one_cmpl<mode>2<exec>"
2919 [(set (match_operand:V_DI 0 "register_operand" "= v")
2920 (not:V_DI
2921 (match_operand:V_DI 1 "gcn_alu_operand" "vSvDB")))]
2922 ""
2923 "#"
2924 "reload_completed"
2925 [(set (match_dup 3) (not:<VnSI> (match_dup 5)))
2926 (set (match_dup 4) (not:<VnSI> (match_dup 6)))]
2927 {
2928 operands[3] = gcn_operand_part (<VnDI>mode, operands[0], 0);
2929 operands[4] = gcn_operand_part (<VnDI>mode, operands[0], 1);
2930 operands[5] = gcn_operand_part (<VnDI>mode, operands[1], 0);
2931 operands[6] = gcn_operand_part (<VnDI>mode, operands[1], 1);
2932 }
2933 [(set_attr "type" "mult")])
2934
2935 ;; }}}
2936 ;; {{{ FP binops - special cases
2937
2938 ; GCN does not directly provide a DFmode subtract instruction, so we do it by
2939 ; adding the negated second operand to the first.
2940
2941 (define_insn "sub<mode>3<exec>"
2942 [(set (match_operand:V_DF 0 "register_operand" "= v, v")
2943 (minus:V_DF
2944 (match_operand:V_DF 1 "gcn_alu_operand" "vSvB, v")
2945 (match_operand:V_DF 2 "gcn_alu_operand" " v,vSvB")))]
2946 ""
2947 "@
2948 v_add_f64\t%0, %1, -%2
2949 v_add_f64\t%0, -%2, %1"
2950 [(set_attr "type" "vop3a")
2951 (set_attr "length" "8,8")])
2952
2953 (define_insn "subdf3"
2954 [(set (match_operand:DF 0 "register_operand" "= v, v")
2955 (minus:DF
2956 (match_operand:DF 1 "gcn_alu_operand" "vSvB, v")
2957 (match_operand:DF 2 "gcn_alu_operand" " v,vSvB")))]
2958 ""
2959 "@
2960 v_add_f64\t%0, %1, -%2
2961 v_add_f64\t%0, -%2, %1"
2962 [(set_attr "type" "vop3a")
2963 (set_attr "length" "8,8")])
2964
2965 ;; }}}
2966 ;; {{{ FP binops - generic
2967
2968 (define_code_iterator comm_fp [plus mult smin smax])
2969 (define_code_iterator nocomm_fp [minus])
2970 (define_code_iterator all_fp [plus mult minus smin smax])
2971
2972 (define_insn "<expander><mode>3<exec>"
2973 [(set (match_operand:V_FP 0 "register_operand" "= v")
2974 (comm_fp:V_FP
2975 (match_operand:V_FP 1 "gcn_alu_operand" "% v")
2976 (match_operand:V_FP 2 "gcn_alu_operand" "vSvB")))]
2977 ""
2978 "v_<mnemonic>0\t%0, %2, %1"
2979 [(set_attr "type" "vop2")
2980 (set_attr "length" "8")])
2981
2982 (define_insn "<expander><mode>3"
2983 [(set (match_operand:FP 0 "gcn_valu_dst_operand" "= v, RL")
2984 (comm_fp:FP
2985 (match_operand:FP 1 "gcn_valu_src0_operand" "% v, 0")
2986 (match_operand:FP 2 "gcn_valu_src1_operand" "vSvB,vSvB")))]
2987 ""
2988 "@
2989 v_<mnemonic>0\t%0, %2, %1
2990 v_<mnemonic>0\t%0, %1%O0"
2991 [(set_attr "type" "vop2,ds")
2992 (set_attr "length" "8")])
2993
2994 (define_insn "<expander><mode>3<exec>"
2995 [(set (match_operand:V_FP_1REG 0 "register_operand" "= v, v")
2996 (nocomm_fp:V_FP_1REG
2997 (match_operand:V_FP_1REG 1 "gcn_alu_operand" "vSvB, v")
2998 (match_operand:V_FP_1REG 2 "gcn_alu_operand" " v,vSvB")))]
2999 ""
3000 "@
3001 v_<mnemonic>0\t%0, %1, %2
3002 v_<revmnemonic>0\t%0, %2, %1"
3003 [(set_attr "type" "vop2")
3004 (set_attr "length" "8,8")])
3005
3006 (define_insn "<expander><mode>3"
3007 [(set (match_operand:FP_1REG 0 "register_operand" "= v, v")
3008 (nocomm_fp:FP_1REG
3009 (match_operand:FP_1REG 1 "gcn_alu_operand" "vSvB, v")
3010 (match_operand:FP_1REG 2 "gcn_alu_operand" " v,vSvB")))]
3011 ""
3012 "@
3013 v_<mnemonic>0\t%0, %1, %2
3014 v_<revmnemonic>0\t%0, %2, %1"
3015 [(set_attr "type" "vop2")
3016 (set_attr "length" "8,8")])
3017
3018 (define_code_iterator fminmaxop [smin smax])
3019 (define_expand "<fexpander><mode>3"
3020 [(set (match_operand:FP 0 "gcn_valu_dst_operand")
3021 (fminmaxop:FP
3022 (match_operand:FP 1 "gcn_valu_src0_operand")
3023 (match_operand:FP 2 "gcn_valu_src1_operand")))]
3024 ""
3025 {})
3026
3027 (define_expand "<fexpander><mode>3<exec>"
3028 [(set (match_operand:V_FP 0 "gcn_valu_dst_operand")
3029 (fminmaxop:V_FP
3030 (match_operand:V_FP 1 "gcn_valu_src0_operand")
3031 (match_operand:V_FP 2 "gcn_valu_src1_operand")))]
3032 ""
3033 {})
3034
3035 ;; }}}
3036 ;; {{{ FP unops
3037
3038 (define_insn "abs<mode>2"
3039 [(set (match_operand:FP 0 "register_operand" "=v")
3040 (abs:FP (match_operand:FP 1 "register_operand" " v")))]
3041 ""
3042 "v_add%i0\t%0, 0, |%1|"
3043 [(set_attr "type" "vop3a")
3044 (set_attr "length" "8")])
3045
3046 (define_insn "abs<mode>2<exec>"
3047 [(set (match_operand:V_FP 0 "register_operand" "=v")
3048 (abs:V_FP
3049 (match_operand:V_FP 1 "register_operand" " v")))]
3050 ""
3051 "v_add%i0\t%0, 0, |%1|"
3052 [(set_attr "type" "vop3a")
3053 (set_attr "length" "8")])
3054
3055 (define_insn "neg<mode>2<exec>"
3056 [(set (match_operand:V_FP 0 "register_operand" "=v")
3057 (neg:V_FP
3058 (match_operand:V_FP 1 "register_operand" " v")))]
3059 ""
3060 "v_add%i0\t%0, 0, -%1"
3061 [(set_attr "type" "vop3a")
3062 (set_attr "length" "8")])
3063
3064 (define_insn "sqrt<mode>2<exec>"
3065 [(set (match_operand:V_FP 0 "register_operand" "= v")
3066 (sqrt:V_FP
3067 (match_operand:V_FP 1 "gcn_alu_operand" "vSvB")))]
3068 "flag_unsafe_math_optimizations"
3069 "v_sqrt%i0\t%0, %1"
3070 [(set_attr "type" "vop1")
3071 (set_attr "length" "8")])
3072
3073 (define_insn "sqrt<mode>2"
3074 [(set (match_operand:FP 0 "register_operand" "= v")
3075 (sqrt:FP
3076 (match_operand:FP 1 "gcn_alu_operand" "vSvB")))]
3077 "flag_unsafe_math_optimizations"
3078 "v_sqrt%i0\t%0, %1"
3079 [(set_attr "type" "vop1")
3080 (set_attr "length" "8")])
3081
3082 ; These FP unops have f64, f32 and f16 versions.
3083 (define_int_iterator MATH_UNOP_1OR2REG
3084 [UNSPEC_FLOOR UNSPEC_CEIL])
3085
3086 ; These FP unops only have f16/f32 versions.
3087 (define_int_iterator MATH_UNOP_1REG
3088 [UNSPEC_EXP2 UNSPEC_LOG2])
3089
3090 (define_int_iterator MATH_UNOP_TRIG
3091 [UNSPEC_SIN UNSPEC_COS])
3092
3093 (define_int_attr math_unop
3094 [(UNSPEC_FLOOR "floor")
3095 (UNSPEC_CEIL "ceil")
3096 (UNSPEC_EXP2 "exp2")
3097 (UNSPEC_LOG2 "log2")
3098 (UNSPEC_SIN "sin")
3099 (UNSPEC_COS "cos")])
3100
3101 (define_int_attr math_unop_insn
3102 [(UNSPEC_FLOOR "floor")
3103 (UNSPEC_CEIL "ceil")
3104 (UNSPEC_EXP2 "exp")
3105 (UNSPEC_LOG2 "log")
3106 (UNSPEC_SIN "sin")
3107 (UNSPEC_COS "cos")])
3108
3109 (define_insn "<math_unop><mode>2"
3110 [(set (match_operand:FP 0 "register_operand" "= v")
3111 (unspec:FP
3112 [(match_operand:FP 1 "gcn_alu_operand" "vSvB")]
3113 MATH_UNOP_1OR2REG))]
3114 ""
3115 "v_<math_unop_insn>%i0\t%0, %1"
3116 [(set_attr "type" "vop1")
3117 (set_attr "length" "8")])
3118
3119 (define_insn "<math_unop><mode>2<exec>"
3120 [(set (match_operand:V_FP 0 "register_operand" "= v")
3121 (unspec:V_FP
3122 [(match_operand:V_FP 1 "gcn_alu_operand" "vSvB")]
3123 MATH_UNOP_1OR2REG))]
3124 ""
3125 "v_<math_unop_insn>%i0\t%0, %1"
3126 [(set_attr "type" "vop1")
3127 (set_attr "length" "8")])
3128
3129 (define_insn "<math_unop><mode>2"
3130 [(set (match_operand:FP_1REG 0 "register_operand" "= v")
3131 (unspec:FP_1REG
3132 [(match_operand:FP_1REG 1 "gcn_alu_operand" "vSvB")]
3133 MATH_UNOP_1REG))]
3134 "flag_unsafe_math_optimizations"
3135 "v_<math_unop_insn>%i0\t%0, %1"
3136 [(set_attr "type" "vop1")
3137 (set_attr "length" "8")])
3138
3139 (define_insn "<math_unop><mode>2<exec>"
3140 [(set (match_operand:V_FP_1REG 0 "register_operand" "= v")
3141 (unspec:V_FP_1REG
3142 [(match_operand:V_FP_1REG 1 "gcn_alu_operand" "vSvB")]
3143 MATH_UNOP_1REG))]
3144 "flag_unsafe_math_optimizations"
3145 "v_<math_unop_insn>%i0\t%0, %1"
3146 [(set_attr "type" "vop1")
3147 (set_attr "length" "8")])
3148
3149 (define_insn "*<math_unop><mode>2_insn"
3150 [(set (match_operand:FP_1REG 0 "register_operand" "= v")
3151 (unspec:FP_1REG
3152 [(match_operand:FP_1REG 1 "gcn_alu_operand" "vSvB")]
3153 MATH_UNOP_TRIG))]
3154 "flag_unsafe_math_optimizations"
3155 "v_<math_unop_insn>%i0\t%0, %1"
3156 [(set_attr "type" "vop1")
3157 (set_attr "length" "8")])
3158
3159 (define_insn "*<math_unop><mode>2<exec>_insn"
3160 [(set (match_operand:V_FP_1REG 0 "register_operand" "= v")
3161 (unspec:V_FP_1REG
3162 [(match_operand:V_FP_1REG 1 "gcn_alu_operand" "vSvB")]
3163 MATH_UNOP_TRIG))]
3164 "flag_unsafe_math_optimizations"
3165 "v_<math_unop_insn>%i0\t%0, %1"
3166 [(set_attr "type" "vop1")
3167 (set_attr "length" "8")])
3168
3169 ; Trigonometric functions need their input scaled by 1/(2*PI) first.
3170
3171 (define_expand "<math_unop><mode>2"
3172 [(set (match_dup 2)
3173 (mult:FP_1REG
3174 (match_dup 3)
3175 (match_operand:FP_1REG 1 "gcn_alu_operand")))
3176 (set (match_operand:FP_1REG 0 "register_operand")
3177 (unspec:FP_1REG
3178 [(match_dup 2)]
3179 MATH_UNOP_TRIG))]
3180 "flag_unsafe_math_optimizations"
3181 {
3182 operands[2] = gen_reg_rtx (<MODE>mode);
3183 operands[3] = const_double_from_real_value (gcn_dconst1over2pi (),
3184 <MODE>mode);
3185 })
3186
3187 (define_expand "<math_unop><mode>2<exec>"
3188 [(set (match_dup 2)
3189 (mult:V_FP_1REG
3190 (match_dup 3)
3191 (match_operand:V_FP_1REG 1 "gcn_alu_operand")))
3192 (set (match_operand:V_FP_1REG 0 "register_operand")
3193 (unspec:V_FP_1REG
3194 [(match_dup 2)]
3195 MATH_UNOP_TRIG))]
3196 "flag_unsafe_math_optimizations"
3197 {
3198 operands[2] = gen_reg_rtx (<MODE>mode);
3199 operands[3] =
3200 gcn_vec_constant (<MODE>mode,
3201 const_double_from_real_value (gcn_dconst1over2pi (),
3202 <SCALAR_MODE>mode));
3203 })
3204
3205 ; Implement ldexp pattern
3206
3207 (define_insn "ldexp<mode>3<exec>"
3208 [(set (match_operand:SV_FP 0 "register_operand" "= v")
3209 (unspec:SV_FP
3210 [(match_operand:SV_FP 1 "gcn_alu_operand" " vA")
3211 (match_operand:<VnSI> 2 "gcn_alu_operand" "vSvA")]
3212 UNSPEC_LDEXP))]
3213 ""
3214 "v_ldexp%i0\t%0, %1, %2"
3215 [(set_attr "type" "vop3a")
3216 (set_attr "length" "8")])
3217
3218 ; Implement frexp patterns
3219
3220 (define_insn "frexp<mode>_exp2"
3221 [(set (match_operand:SI 0 "register_operand" "=v")
3222 (unspec:SI
3223 [(match_operand:FP 1 "gcn_alu_operand" "vB")]
3224 UNSPEC_FREXP_EXP))]
3225 ""
3226 "v_frexp_exp_i32%i1\t%0, %1"
3227 [(set_attr "type" "vop1")
3228 (set_attr "length" "8")])
3229
3230 (define_insn "frexp<mode>_mant2"
3231 [(set (match_operand:FP 0 "register_operand" "=v")
3232 (unspec:FP
3233 [(match_operand:FP 1 "gcn_alu_operand" "vB")]
3234 UNSPEC_FREXP_MANT))]
3235 ""
3236 "v_frexp_mant%i1\t%0, %1"
3237 [(set_attr "type" "vop1")
3238 (set_attr "length" "8")])
3239
3240 (define_insn "frexp<mode>_exp2<exec>"
3241 [(set (match_operand:<VnSI> 0 "register_operand" "=v")
3242 (unspec:<VnSI>
3243 [(match_operand:V_FP 1 "gcn_alu_operand" "vB")]
3244 UNSPEC_FREXP_EXP))]
3245 ""
3246 "v_frexp_exp_i32%i1\t%0, %1"
3247 [(set_attr "type" "vop1")
3248 (set_attr "length" "8")])
3249
3250 (define_insn "frexp<mode>_mant2<exec>"
3251 [(set (match_operand:V_FP 0 "register_operand" "=v")
3252 (unspec:V_FP
3253 [(match_operand:V_FP 1 "gcn_alu_operand" "vB")]
3254 UNSPEC_FREXP_MANT))]
3255 ""
3256 "v_frexp_mant%i1\t%0, %1"
3257 [(set_attr "type" "vop1")
3258 (set_attr "length" "8")])
3259
3260 ;; }}}
3261 ;; {{{ FP fused multiply and add
3262
3263 (define_insn "fma<mode>4<exec>"
3264 [(set (match_operand:V_FP 0 "register_operand" "= v, v")
3265 (fma:V_FP
3266 (match_operand:V_FP 1 "gcn_alu_operand" "% vA, vA")
3267 (match_operand:V_FP 2 "gcn_alu_operand" " vA,vSvA")
3268 (match_operand:V_FP 3 "gcn_alu_operand" "vSvA, vA")))]
3269 ""
3270 "v_fma%i0\t%0, %1, %2, %3"
3271 [(set_attr "type" "vop3a")
3272 (set_attr "length" "8")])
3273
3274 (define_insn "fma<mode>4_negop2<exec>"
3275 [(set (match_operand:V_FP 0 "register_operand" "= v, v, v")
3276 (fma:V_FP
3277 (match_operand:V_FP 1 "gcn_alu_operand" " vA, vA,vSvA")
3278 (neg:V_FP
3279 (match_operand:V_FP 2 "gcn_alu_operand" " vA,vSvA, vA"))
3280 (match_operand:V_FP 3 "gcn_alu_operand" "vSvA, vA, vA")))]
3281 ""
3282 "v_fma%i0\t%0, %1, -%2, %3"
3283 [(set_attr "type" "vop3a")
3284 (set_attr "length" "8")])
3285
3286 (define_insn "fma<mode>4"
3287 [(set (match_operand:FP 0 "register_operand" "= v, v")
3288 (fma:FP
3289 (match_operand:FP 1 "gcn_alu_operand" "% vA, vA")
3290 (match_operand:FP 2 "gcn_alu_operand" " vA,vSvA")
3291 (match_operand:FP 3 "gcn_alu_operand" "vSvA, vA")))]
3292 ""
3293 "v_fma%i0\t%0, %1, %2, %3"
3294 [(set_attr "type" "vop3a")
3295 (set_attr "length" "8")])
3296
3297 (define_insn "fma<mode>4_negop2"
3298 [(set (match_operand:FP 0 "register_operand" "= v, v, v")
3299 (fma:FP
3300 (match_operand:FP 1 "gcn_alu_operand" " vA, vA,vSvA")
3301 (neg:FP
3302 (match_operand:FP 2 "gcn_alu_operand" " vA,vSvA, vA"))
3303 (match_operand:FP 3 "gcn_alu_operand" "vSvA, vA, vA")))]
3304 ""
3305 "v_fma%i0\t%0, %1, -%2, %3"
3306 [(set_attr "type" "vop3a")
3307 (set_attr "length" "8")])
3308
3309 (define_insn "fms<mode>4<exec>"
3310 [(set (match_operand:V_FP 0 "register_operand" "= v, v")
3311 (fma:V_FP
3312 (match_operand:V_FP 1 "gcn_alu_operand" "% vA, vA")
3313 (match_operand:V_FP 2 "gcn_alu_operand" " vA,vSvA")
3314 (neg:V_FP
3315 (match_operand:V_FP 3 "gcn_alu_operand" "vSvA, vA"))))]
3316 ""
3317 "v_fma%i0\t%0, %1, %2, -%3"
3318 [(set_attr "type" "vop3a")
3319 (set_attr "length" "8")])
3320
3321 (define_insn "fms<mode>4_negop2<exec>"
3322 [(set (match_operand:V_FP 0 "register_operand" "= v, v, v")
3323 (fma:V_FP
3324 (match_operand:V_FP 1 "gcn_alu_operand" " vA, vA,vSvA")
3325 (neg:V_FP
3326 (match_operand:V_FP 2 "gcn_alu_operand" " vA,vSvA, vA"))
3327 (neg:V_FP
3328 (match_operand:V_FP 3 "gcn_alu_operand" "vSvA, vA, vA"))))]
3329 ""
3330 "v_fma%i0\t%0, %1, -%2, -%3"
3331 [(set_attr "type" "vop3a")
3332 (set_attr "length" "8")])
3333
3334 (define_insn "fms<mode>4"
3335 [(set (match_operand:FP 0 "register_operand" "= v, v")
3336 (fma:FP
3337 (match_operand:FP 1 "gcn_alu_operand" "% vA, vA")
3338 (match_operand:FP 2 "gcn_alu_operand" " vA,vSvA")
3339 (neg:FP
3340 (match_operand:FP 3 "gcn_alu_operand" "vSvA, vA"))))]
3341 ""
3342 "v_fma%i0\t%0, %1, %2, -%3"
3343 [(set_attr "type" "vop3a")
3344 (set_attr "length" "8")])
3345
3346 (define_insn "fms<mode>4_negop2"
3347 [(set (match_operand:FP 0 "register_operand" "= v, v, v")
3348 (fma:FP
3349 (match_operand:FP 1 "gcn_alu_operand" " vA, vA,vSvA")
3350 (neg:FP
3351 (match_operand:FP 2 "gcn_alu_operand" " vA,vSvA, vA"))
3352 (neg:FP
3353 (match_operand:FP 3 "gcn_alu_operand" "vSvA, vA, vA"))))]
3354 ""
3355 "v_fma%i0\t%0, %1, -%2, -%3"
3356 [(set_attr "type" "vop3a")
3357 (set_attr "length" "8")])
3358
3359 ;; }}}
3360 ;; {{{ FP division
3361
3362 (define_insn "recip<mode>2<exec>"
3363 [(set (match_operand:SV_FP 0 "register_operand" "= v")
3364 (unspec:SV_FP
3365 [(match_operand:SV_FP 1 "gcn_alu_operand" "vSvB")]
3366 UNSPEC_RCP))]
3367 ""
3368 "v_rcp%i0\t%0, %1"
3369 [(set_attr "type" "vop1")
3370 (set_attr "length" "8")])
3371
3372 ;; v_div_scale takes a numerator (op2) and denominator (op1) and returns the
3373 ;; one that matches op3 adjusted for best results in reciprocal division.
3374 ;; It also emits a VCC mask that is intended for input to v_div_fmas.
3375 ;; The caller is expected to call this twice, once for each input. The output
3376 ;; VCC is the same in both cases, so the caller may discard one.
3377 (define_insn "div_scale<mode><exec_vcc>"
3378 [(set (match_operand:SV_SFDF 0 "register_operand" "=v")
3379 (unspec:SV_SFDF
3380 [(match_operand:SV_SFDF 1 "gcn_alu_operand" "v")
3381 (match_operand:SV_SFDF 2 "gcn_alu_operand" "v")
3382 (match_operand:SV_SFDF 3 "gcn_alu_operand" "v")]
3383 UNSPEC_DIV_SCALE))
3384 (set (match_operand:DI 4 "register_operand" "=SvcV")
3385 (unspec:DI
3386 [(match_dup 1) (match_dup 2) (match_dup 3)]
3387 UNSPEC_DIV_SCALE))]
3388 ""
3389 "v_div_scale%i0\t%0, %4, %3, %1, %2"
3390 [(set_attr "type" "vop3b")
3391 (set_attr "length" "8")])
3392
3393 ;; v_div_fmas is "FMA and Scale" that uses the VCC output from v_div_scale
3394 ;; to conditionally scale the output of the whole division operation.
3395 ;; This is necessary to counter the adjustments made by v_div_scale and
3396 ;; replaces the last FMA instruction of the Newton Raphson algorithm.
3397 (define_insn "div_fmas<mode><exec>"
3398 [(set (match_operand:SV_SFDF 0 "register_operand" "=v")
3399 (unspec:SV_SFDF
3400 [(plus:SV_SFDF
3401 (mult:SV_SFDF
3402 (match_operand:SV_SFDF 1 "gcn_alu_operand" "v")
3403 (match_operand:SV_SFDF 2 "gcn_alu_operand" "v"))
3404 (match_operand:SV_SFDF 3 "gcn_alu_operand" "v"))
3405 (match_operand:DI 4 "register_operand" "cV")]
3406 UNSPEC_DIV_FMAS))]
3407 ""
3408 "v_div_fmas%i0\t%0, %1, %2, %3; %4"
3409 [(set_attr "type" "vop3a")
3410 (set_attr "length" "8")
3411 (set_attr "vccwait" "5")])
3412
3413 ;; v_div_fixup takes the inputs and outputs of a division operation already
3414 ;; completed and cleans up the floating-point sign bit, infinity, underflow,
3415 ;; overflow, and NaN status. It will also emit any FP exceptions.
3416 ;; op1: quotient, op2: denominator, op3: numerator
3417 (define_insn "div_fixup<mode><exec>"
3418 [(set (match_operand:SV_FP 0 "register_operand" "=v")
3419 (unspec:SV_FP
3420 [(match_operand:SV_FP 1 "register_operand" "v")
3421 (match_operand:SV_FP 2 "gcn_alu_operand" "v")
3422 (match_operand:SV_FP 3 "gcn_alu_operand" "v")]
3423 UNSPEC_DIV_FIXUP))]
3424 ""
3425 "v_div_fixup%i0\t%0, %1, %2, %3"
3426 [(set_attr "type" "vop3a")
3427 (set_attr "length" "8")])
3428
3429 (define_expand "div<mode>3"
3430 [(match_operand:SV_SFDF 0 "register_operand")
3431 (match_operand:SV_SFDF 1 "gcn_alu_operand")
3432 (match_operand:SV_SFDF 2 "gcn_alu_operand")]
3433 ""
3434 {
3435 rtx numerator = operands[1];
3436 rtx denominator = operands[2];
3437
3438 /* Scale the inputs if they are close to the FP limits.
3439 This will be reversed later. */
3440 rtx vcc = gen_reg_rtx (DImode);
3441 rtx discardedvcc = gen_reg_rtx (DImode);
3442 rtx scaled_numerator = gen_reg_rtx (<MODE>mode);
3443 rtx scaled_denominator = gen_reg_rtx (<MODE>mode);
3444 emit_insn (gen_div_scale<mode> (scaled_denominator,
3445 denominator, numerator,
3446 denominator, discardedvcc));
3447 emit_insn (gen_div_scale<mode> (scaled_numerator,
3448 denominator, numerator,
3449 numerator, vcc));
3450
3451 /* Find the reciprocal of the denominator, and use Newton-Raphson to
3452 improve the accuracy over the basic hardware instruction. */
3453 rtx one = gcn_vec_constant (<MODE>mode,
3454 const_double_from_real_value (dconst1, <SCALAR_MODE>mode));
3455 rtx initrcp = gen_reg_rtx (<MODE>mode);
3456 rtx fma1 = gen_reg_rtx (<MODE>mode);
3457 rtx rcp = gen_reg_rtx (<MODE>mode);
3458 emit_insn (gen_recip<mode>2 (initrcp, scaled_denominator));
3459 emit_insn (gen_fma<mode>4_negop2 (fma1, initrcp, scaled_denominator, one));
3460 emit_insn (gen_fma<mode>4 (rcp, fma1, initrcp, initrcp));
3461
3462 /* Do the division "a/b" via "a*1/b" and use Newton-Raphson to improve
3463 the accuracy. The "div_fmas" instruction reverses any scaling
3464 performed by "div_scale", above. */
3465 rtx div_est = gen_reg_rtx (<MODE>mode);
3466 rtx fma2 = gen_reg_rtx (<MODE>mode);
3467 rtx fma3 = gen_reg_rtx (<MODE>mode);
3468 rtx fma4 = gen_reg_rtx (<MODE>mode);
3469 rtx fmas = gen_reg_rtx (<MODE>mode);
3470 emit_insn (gen_mul<mode>3 (div_est, scaled_numerator, rcp));
3471 emit_insn (gen_fma<mode>4_negop2 (fma2, div_est, scaled_denominator,
3472 scaled_numerator));
3473 emit_insn (gen_fma<mode>4 (fma3, fma2, rcp, div_est));
3474 emit_insn (gen_fma<mode>4_negop2 (fma4, fma3, scaled_denominator,
3475 scaled_numerator));
3476 emit_insn (gen_div_fmas<mode> (fmas, fma4, rcp, fma3, vcc));
3477
3478 /* Finally, use "div_fixup" to get the details right and find errors. */
3479 emit_insn (gen_div_fixup<mode> (operands[0], fmas, denominator,
3480 numerator));
3481 DONE;
3482 })
3483
3484 ;; }}}
3485 ;; {{{ Int/FP conversions
3486
3487 (define_mode_iterator CVT_FROM_MODE [HI SI HF SF DF])
3488 (define_mode_iterator CVT_TO_MODE [HI SI HF SF DF])
3489
3490 (define_mode_iterator VCVT_MODE
3491 [V2HI V2SI V2HF V2SF V2DF
3492 V4HI V4SI V4HF V4SF V4DF
3493 V8HI V8SI V8HF V8SF V8DF
3494 V16HI V16SI V16HF V16SF V16DF
3495 V32HI V32SI V32HF V32SF V32DF
3496 V64HI V64SI V64HF V64SF V64DF])
3497 (define_mode_iterator VCVT_FMODE
3498 [V2HF V2SF V2DF
3499 V4HF V4SF V4DF
3500 V8HF V8SF V8DF
3501 V16HF V16SF V16DF
3502 V32HF V32SF V32DF
3503 V64HF V64SF V64DF])
3504 (define_mode_iterator VCVT_IMODE
3505 [V2HI V2SI
3506 V4HI V4SI
3507 V8HI V8SI
3508 V16HI V16SI
3509 V32HI V32SI
3510 V64HI V64SI])
3511
3512 (define_code_iterator cvt_op [fix unsigned_fix
3513 float unsigned_float
3514 float_extend float_truncate])
3515 (define_code_attr cvt_name [(fix "fix_trunc") (unsigned_fix "fixuns_trunc")
3516 (float "float") (unsigned_float "floatuns")
3517 (float_extend "extend") (float_truncate "trunc")])
3518 (define_code_attr cvt_operands [(fix "%i0%i1") (unsigned_fix "%u0%i1")
3519 (float "%i0%i1") (unsigned_float "%i0%u1")
3520 (float_extend "%i0%i1")
3521 (float_truncate "%i0%i1")])
3522
3523 (define_insn "<cvt_name><CVT_FROM_MODE:mode><CVT_TO_MODE:mode>2"
3524 [(set (match_operand:CVT_TO_MODE 0 "register_operand" "= v")
3525 (cvt_op:CVT_TO_MODE
3526 (match_operand:CVT_FROM_MODE 1 "gcn_alu_operand" "vSvB")))]
3527 "gcn_valid_cvt_p (<CVT_FROM_MODE:MODE>mode, <CVT_TO_MODE:MODE>mode,
3528 <cvt_name>_cvt)"
3529 "v_cvt<cvt_operands>\t%0, %1"
3530 [(set_attr "type" "vop1")
3531 (set_attr "length" "8")])
3532
3533 (define_insn "<cvt_name><VCVT_MODE:mode><VCVT_FMODE:mode>2<exec>"
3534 [(set (match_operand:VCVT_FMODE 0 "register_operand" "= v")
3535 (cvt_op:VCVT_FMODE
3536 (match_operand:VCVT_MODE 1 "gcn_alu_operand" "vSvB")))]
3537 "MODE_VF (<VCVT_MODE:MODE>mode) == MODE_VF (<VCVT_FMODE:MODE>mode)
3538 && gcn_valid_cvt_p (<VCVT_MODE:MODE>mode, <VCVT_FMODE:MODE>mode,
3539 <cvt_name>_cvt)"
3540 "v_cvt<cvt_operands>\t%0, %1"
3541 [(set_attr "type" "vop1")
3542 (set_attr "length" "8")])
3543
3544 (define_insn "<cvt_name><VCVT_FMODE:mode><VCVT_IMODE:mode>2<exec>"
3545 [(set (match_operand:VCVT_IMODE 0 "register_operand" "= v")
3546 (cvt_op:VCVT_IMODE
3547 (match_operand:VCVT_FMODE 1 "gcn_alu_operand" "vSvB")))]
3548 "MODE_VF (<VCVT_IMODE:MODE>mode) == MODE_VF (<VCVT_FMODE:MODE>mode)
3549 && gcn_valid_cvt_p (<VCVT_FMODE:MODE>mode, <VCVT_IMODE:MODE>mode,
3550 <cvt_name>_cvt)"
3551 "v_cvt<cvt_operands>\t%0, %1"
3552 [(set_attr "type" "vop1")
3553 (set_attr "length" "8")])
3554
3555 ;; }}}
3556 ;; {{{ Int/int conversions
3557
3558 (define_code_iterator zero_convert [truncate zero_extend])
3559 (define_code_attr convop [
3560 (sign_extend "extend")
3561 (zero_extend "zero_extend")
3562 (truncate "trunc")])
3563
3564 (define_insn "<convop><V_INT_1REG_ALT:mode><V_INT_1REG:mode>2<exec>"
3565 [(set (match_operand:V_INT_1REG 0 "register_operand" "=v")
3566 (zero_convert:V_INT_1REG
3567 (match_operand:V_INT_1REG_ALT 1 "gcn_alu_operand" " v")))]
3568 ""
3569 "v_mov_b32_sdwa\t%0, %1 dst_sel:<V_INT_1REG:sdwa> dst_unused:UNUSED_PAD src0_sel:<V_INT_1REG_ALT:sdwa>"
3570 [(set_attr "type" "vop_sdwa")
3571 (set_attr "length" "8")])
3572
3573 (define_insn "extend<V_INT_1REG_ALT:mode><V_INT_1REG:mode>2<exec>"
3574 [(set (match_operand:V_INT_1REG 0 "register_operand" "=v")
3575 (sign_extend:V_INT_1REG
3576 (match_operand:V_INT_1REG_ALT 1 "gcn_alu_operand" " v")))]
3577 ""
3578 "v_mov_b32_sdwa\t%0, sext(%1) src0_sel:<V_INT_1REG_ALT:sdwa>"
3579 [(set_attr "type" "vop_sdwa")
3580 (set_attr "length" "8")])
3581
3582 ;; GCC can already do these for scalar types, but not for vector types.
3583 ;; Unfortunately you can't just do SUBREG on a vector to select the low part,
3584 ;; so there must be a few tricks here.
3585
3586 (define_insn_and_split "trunc<vndi><mode>2"
3587 [(set (match_operand:V_INT_1REG 0 "register_operand" "=v")
3588 (truncate:V_INT_1REG
3589 (match_operand:<VnDI> 1 "gcn_alu_operand" " v")))]
3590 ""
3591 "#"
3592 "reload_completed"
3593 [(const_int 0)]
3594 {
3595 rtx inlo = gcn_operand_part (<VnDI>mode, operands[1], 0);
3596 rtx out = operands[0];
3597
3598 if (<MODE>mode != <VnSI>mode)
3599 emit_insn (gen_trunc<vnsi><mode>2 (out, inlo));
3600 else
3601 emit_move_insn (out, inlo);
3602 }
3603 [(set_attr "type" "vop2")
3604 (set_attr "length" "4")])
3605
3606 (define_insn_and_split "trunc<vndi><mode>2_exec"
3607 [(set (match_operand:V_INT_1REG 0 "register_operand" "=v")
3608 (vec_merge:V_INT_1REG
3609 (truncate:V_INT_1REG
3610 (match_operand:<VnDI> 1 "gcn_alu_operand" " v"))
3611 (match_operand:V_INT_1REG 2 "gcn_alu_or_unspec_operand" "U0")
3612 (match_operand:DI 3 "gcn_exec_operand" " e")))]
3613 ""
3614 "#"
3615 "reload_completed"
3616 [(const_int 0)]
3617 {
3618 rtx out = operands[0];
3619 rtx inlo = gcn_operand_part (<VnDI>mode, operands[1], 0);
3620 rtx merge = operands[2];
3621 rtx exec = operands[3];
3622
3623 if (<MODE>mode != <VnSI>mode)
3624 emit_insn (gen_trunc<vnsi><mode>2_exec (out, inlo, merge, exec));
3625 else
3626 emit_insn (gen_mov<mode>_exec (out, inlo, merge, exec));
3627 }
3628 [(set_attr "type" "vop2")
3629 (set_attr "length" "4")])
3630
3631 (define_insn_and_split "<convop><mode><vndi>2"
3632 [(set (match_operand:<VnDI> 0 "register_operand" "=v")
3633 (any_extend:<VnDI>
3634 (match_operand:V_INT_1REG 1 "gcn_alu_operand" " v")))]
3635 ""
3636 "#"
3637 "reload_completed"
3638 [(const_int 0)]
3639 {
3640 rtx outlo = gcn_operand_part (<VnDI>mode, operands[0], 0);
3641 rtx outhi = gcn_operand_part (<VnDI>mode, operands[0], 1);
3642 rtx in = operands[1];
3643
3644 if (<MODE>mode != <VnSI>mode)
3645 emit_insn (gen_<convop><mode><vnsi>2 (outlo, in));
3646 else
3647 emit_move_insn (outlo, in);
3648 if ('<su>' == 's')
3649 emit_insn (gen_ashr<vnsi>3 (outhi, outlo, GEN_INT (31)));
3650 else
3651 emit_insn (gen_vec_duplicate<vnsi> (outhi, const0_rtx));
3652 }
3653 [(set_attr "type" "mult")
3654 (set_attr "length" "12")])
3655
3656 (define_insn_and_split "<convop><mode><vndi>2_exec"
3657 [(set (match_operand:<VnDI> 0 "register_operand" "=v")
3658 (vec_merge:<VnDI>
3659 (any_extend:<VnDI>
3660 (match_operand:V_INT_1REG 1 "gcn_alu_operand" " v"))
3661 (match_operand:<VnDI> 2 "gcn_alu_or_unspec_operand" "U0")
3662 (match_operand:DI 3 "gcn_exec_operand" " e")))]
3663 ""
3664 "#"
3665 "reload_completed"
3666 [(const_int 0)]
3667 {
3668 rtx outlo = gcn_operand_part (<VnDI>mode, operands[0], 0);
3669 rtx outhi = gcn_operand_part (<VnDI>mode, operands[0], 1);
3670 rtx in = operands[1];
3671 rtx mergelo = gcn_operand_part (<VnDI>mode, operands[2], 0);
3672 rtx mergehi = gcn_operand_part (<VnDI>mode, operands[2], 1);
3673 rtx exec = operands[3];
3674
3675 if (<MODE>mode != <VnSI>mode)
3676 emit_insn (gen_<convop><mode><vnsi>2_exec (outlo, in, mergelo, exec));
3677 else
3678 emit_insn (gen_mov<mode>_exec (outlo, in, mergelo, exec));
3679 if ('<su>' == 's')
3680 emit_insn (gen_ashr<vnsi>3_exec (outhi, outlo, GEN_INT (31), mergehi,
3681 exec));
3682 else
3683 emit_insn (gen_vec_duplicate<vnsi>_exec (outhi, const0_rtx, mergehi,
3684 exec));
3685 }
3686 [(set_attr "type" "mult")
3687 (set_attr "length" "12")])
3688
3689 ;; }}}
3690 ;; {{{ Vector comparison/merge
3691
3692 (define_insn "vec_cmp<mode>di"
3693 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e, e,Sg,Sg, e, e")
3694 (match_operator:DI 1 "gcn_fp_compare_operator"
3695 [(match_operand:V_noQI 2 "gcn_alu_operand" "vSv, B,vSv, B, v,vA,vSv, B")
3696 (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v, v,vA, v, v, v")]))
3697 (clobber (match_scratch:DI 4 "= X, X, cV,cV, X, X, X, X"))]
3698 ""
3699 "@
3700 v_cmp%E1\tvcc, %2, %3
3701 v_cmp%E1\tvcc, %2, %3
3702 v_cmpx%E1\tvcc, %2, %3
3703 v_cmpx%E1\tvcc, %2, %3
3704 v_cmp%E1\t%0, %2, %3
3705 v_cmp%E1\t%0, %2, %3
3706 v_cmpx%E1\t%2, %3
3707 v_cmpx%E1\t%2, %3"
3708 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a,vopc,vopc")
3709 (set_attr "length" "4,8,4,8,8,8,4,8")
3710 (set_attr "rdna" "*,*,no,no,*,*,yes,yes")])
3711
3712 (define_expand "vec_cmpu<mode>di"
3713 [(match_operand:DI 0 "register_operand")
3714 (match_operator 1 "gcn_compare_operator"
3715 [(match_operand:V_INT_noQI 2 "gcn_alu_operand")
3716 (match_operand:V_INT_noQI 3 "gcn_vop3_operand")])]
3717 ""
3718 {
3719 /* Unsigned comparisons use the same patterns as signed comparisons,
3720 except that they use unsigned operators (e.g. LTU vs LT).
3721 The '%E1' directive then does the Right Thing. */
3722 emit_insn (gen_vec_cmp<mode>di (operands[0], operands[1], operands[2],
3723 operands[3]));
3724 DONE;
3725 })
3726
3727 ; There's no instruction for 8-bit vector comparison, so we need to extend.
3728 (define_expand "vec_cmp<u><mode>di"
3729 [(match_operand:DI 0 "register_operand")
3730 (match_operator 1 "gcn_compare_operator"
3731 [(any_extend:<VnSI> (match_operand:V_QI 2 "gcn_alu_operand"))
3732 (any_extend:<VnSI> (match_operand:V_QI 3 "gcn_vop3_operand"))])]
3733 "can_create_pseudo_p ()"
3734 {
3735 rtx sitmp1 = gen_reg_rtx (<VnSI>mode);
3736 rtx sitmp2 = gen_reg_rtx (<VnSI>mode);
3737
3738 emit_insn (gen_<expander><mode><vnsi>2 (sitmp1, operands[2]));
3739 emit_insn (gen_<expander><mode><vnsi>2 (sitmp2, operands[3]));
3740 emit_insn (gen_vec_cmp<vnsi>di (operands[0], operands[1], sitmp1, sitmp2));
3741 DONE;
3742 })
3743
3744 (define_insn "vec_cmp<mode>di_exec"
3745 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e, e,Sg,Sg, e, e")
3746 (and:DI
3747 (match_operator 1 "gcn_fp_compare_operator"
3748 [(match_operand:V_noQI 2 "gcn_alu_operand" "vSv, B,vSv, B, v,vA,vSv, B")
3749 (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v, v,vA, v, v, v")])
3750 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e, e, e, e, e, e")))
3751 (clobber (match_scratch:DI 5 "= X, X, cV,cV, X, X, X, X"))]
3752 ""
3753 "@
3754 v_cmp%E1\tvcc, %2, %3
3755 v_cmp%E1\tvcc, %2, %3
3756 v_cmpx%E1\tvcc, %2, %3
3757 v_cmpx%E1\tvcc, %2, %3
3758 v_cmp%E1\t%0, %2, %3
3759 v_cmp%E1\t%0, %2, %3
3760 v_cmpx%E1\t%2, %3
3761 v_cmpx%E1\t%2, %3"
3762 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a,vopc,vopc")
3763 (set_attr "length" "4,8,4,8,8,8,4,8")
3764 (set_attr "rdna" "*,*,no,no,*,*,yes,yes")])
3765
3766 (define_expand "vec_cmpu<mode>di_exec"
3767 [(match_operand:DI 0 "register_operand")
3768 (match_operator 1 "gcn_compare_operator"
3769 [(match_operand:V_INT_noQI 2 "gcn_alu_operand")
3770 (match_operand:V_INT_noQI 3 "gcn_vop3_operand")])
3771 (match_operand:DI 4 "gcn_exec_reg_operand")]
3772 ""
3773 {
3774 /* Unsigned comparisons use the same patterns as signed comparisons,
3775 except that they use unsigned operators (e.g. LTU vs LT).
3776 The '%E1' directive then does the Right Thing. */
3777 emit_insn (gen_vec_cmpu<mode>di_exec (operands[0], operands[1],
3778 operands[2], operands[3],
3779 operands[4]));
3780 DONE;
3781 })
3782
3783 (define_expand "vec_cmp<u><mode>di_exec"
3784 [(match_operand:DI 0 "register_operand")
3785 (match_operator 1 "gcn_compare_operator"
3786 [(any_extend:<VnSI> (match_operand:V_QI 2 "gcn_alu_operand"))
3787 (any_extend:<VnSI> (match_operand:V_QI 3 "gcn_vop3_operand"))])
3788 (match_operand:DI 4 "gcn_exec_reg_operand")]
3789 "can_create_pseudo_p ()"
3790 {
3791 rtx sitmp1 = gen_reg_rtx (<VnSI>mode);
3792 rtx sitmp2 = gen_reg_rtx (<VnSI>mode);
3793
3794 emit_insn (gen_<expander><mode><vnsi>2_exec (sitmp1, operands[2],
3795 operands[2], operands[4]));
3796 emit_insn (gen_<expander><mode><vnsi>2_exec (sitmp2, operands[3],
3797 operands[3], operands[4]));
3798 emit_insn (gen_vec_cmp<vnsi>di_exec (operands[0], operands[1], sitmp1,
3799 sitmp2, operands[4]));
3800 DONE;
3801 })
3802
3803 (define_insn "vec_cmp<mode>di_dup"
3804 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e,e,Sg, e,e")
3805 (match_operator:DI 1 "gcn_fp_compare_operator"
3806 [(vec_duplicate:V_noQI
3807 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand"
3808 " Sv, B,Sv,B, A,Sv,B"))
3809 (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v,v, v, v,v")]))
3810 (clobber (match_scratch:DI 4 "= X,X,cV,cV, X, X,X"))]
3811 ""
3812 "@
3813 v_cmp%E1\tvcc, %2, %3
3814 v_cmp%E1\tvcc, %2, %3
3815 v_cmpx%E1\tvcc, %2, %3
3816 v_cmpx%E1\tvcc, %2, %3
3817 v_cmp%E1\t%0, %2, %3
3818 v_cmpx%E1\t%2, %3
3819 v_cmpx%E1\t%2, %3"
3820 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vopc,vopc")
3821 (set_attr "length" "4,8,4,8,8,4,8")
3822 (set_attr "rdna" "*,*,no,no,*,yes,yes")])
3823
3824 (define_insn "vec_cmp<mode>di_dup_exec"
3825 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e,e,Sg, e,e")
3826 (and:DI
3827 (match_operator 1 "gcn_fp_compare_operator"
3828 [(vec_duplicate:V_noQI
3829 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand"
3830 " Sv, B,Sv,B, A,Sv,B"))
3831 (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v,v, v, v,v")])
3832 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e,e, e, e,e")))
3833 (clobber (match_scratch:DI 5 "= X,X,cV,cV, X, X,X"))]
3834 ""
3835 "@
3836 v_cmp%E1\tvcc, %2, %3
3837 v_cmp%E1\tvcc, %2, %3
3838 v_cmpx%E1\tvcc, %2, %3
3839 v_cmpx%E1\tvcc, %2, %3
3840 v_cmp%E1\t%0, %2, %3
3841 v_cmpx%E1\t%2, %3
3842 v_cmpx%E1\t%2, %3"
3843 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vopc,vopc")
3844 (set_attr "length" "4,8,4,8,8,4,8")
3845 (set_attr "rdna" "*,*,no,no,*,yes,yes")])
3846
3847 (define_expand "vcond_mask_<mode>di"
3848 [(parallel
3849 [(set (match_operand:V_ALL 0 "register_operand" "")
3850 (vec_merge:V_ALL
3851 (match_operand:V_ALL 1 "gcn_vop3_operand" "")
3852 (match_operand:V_ALL 2 "gcn_alu_operand" "")
3853 (match_operand:DI 3 "register_operand" "")))
3854 (clobber (scratch:<VnDI>))])]
3855 ""
3856 "")
3857
3858 (define_expand "vcond<V_ALL:mode><V_ALL_ALT:mode>"
3859 [(match_operand:V_ALL 0 "register_operand")
3860 (match_operand:V_ALL 1 "gcn_vop3_operand")
3861 (match_operand:V_ALL 2 "gcn_alu_operand")
3862 (match_operator 3 "gcn_fp_compare_operator"
3863 [(match_operand:V_ALL_ALT 4 "gcn_alu_operand")
3864 (match_operand:V_ALL_ALT 5 "gcn_vop3_operand")])]
3865 ""
3866 {
3867 rtx tmp = gen_reg_rtx (DImode);
3868 emit_insn (gen_vec_cmp<V_ALL_ALT:mode>di
3869 (tmp, operands[3], operands[4], operands[5]));
3870 emit_insn (gen_vcond_mask_<V_ALL:mode>di
3871 (operands[0], operands[1], operands[2], tmp));
3872 DONE;
3873 })
3874
3875 (define_expand "vcond<V_ALL:mode><V_ALL_ALT:mode>_exec"
3876 [(match_operand:V_ALL 0 "register_operand")
3877 (match_operand:V_ALL 1 "gcn_vop3_operand")
3878 (match_operand:V_ALL 2 "gcn_alu_operand")
3879 (match_operator 3 "gcn_fp_compare_operator"
3880 [(match_operand:V_ALL_ALT 4 "gcn_alu_operand")
3881 (match_operand:V_ALL_ALT 5 "gcn_vop3_operand")])
3882 (match_operand:DI 6 "gcn_exec_reg_operand" "e")]
3883 ""
3884 {
3885 rtx tmp = gen_reg_rtx (DImode);
3886 emit_insn (gen_vec_cmp<V_ALL_ALT:mode>di_exec
3887 (tmp, operands[3], operands[4], operands[5], operands[6]));
3888 emit_insn (gen_vcond_mask_<V_ALL:mode>di
3889 (operands[0], operands[1], operands[2], tmp));
3890 DONE;
3891 })
3892
3893 (define_expand "vcondu<V_ALL:mode><V_INT:mode>"
3894 [(match_operand:V_ALL 0 "register_operand")
3895 (match_operand:V_ALL 1 "gcn_vop3_operand")
3896 (match_operand:V_ALL 2 "gcn_alu_operand")
3897 (match_operator 3 "gcn_fp_compare_operator"
3898 [(match_operand:V_INT 4 "gcn_alu_operand")
3899 (match_operand:V_INT 5 "gcn_vop3_operand")])]
3900 ""
3901 {
3902 rtx tmp = gen_reg_rtx (DImode);
3903 emit_insn (gen_vec_cmpu<V_INT:mode>di
3904 (tmp, operands[3], operands[4], operands[5]));
3905 emit_insn (gen_vcond_mask_<V_ALL:mode>di
3906 (operands[0], operands[1], operands[2], tmp));
3907 DONE;
3908 })
3909
3910 (define_expand "vcondu<V_ALL:mode><V_INT:mode>_exec"
3911 [(match_operand:V_ALL 0 "register_operand")
3912 (match_operand:V_ALL 1 "gcn_vop3_operand")
3913 (match_operand:V_ALL 2 "gcn_alu_operand")
3914 (match_operator 3 "gcn_fp_compare_operator"
3915 [(match_operand:V_INT 4 "gcn_alu_operand")
3916 (match_operand:V_INT 5 "gcn_vop3_operand")])
3917 (match_operand:DI 6 "gcn_exec_reg_operand" "e")]
3918 ""
3919 {
3920 rtx tmp = gen_reg_rtx (DImode);
3921 emit_insn (gen_vec_cmpu<V_INT:mode>di_exec
3922 (tmp, operands[3], operands[4], operands[5], operands[6]));
3923 emit_insn (gen_vcond_mask_<V_ALL:mode>di
3924 (operands[0], operands[1], operands[2], tmp));
3925 DONE;
3926 })
3927
3928 ;; }}}
3929 ;; {{{ Fully masked loop support
3930
3931 (define_expand "while_ultsidi"
3932 [(match_operand:DI 0 "register_operand")
3933 (match_operand:SI 1 "")
3934 (match_operand:SI 2 "")
3935 (match_operand:SI 3 "")]
3936 ""
3937 {
3938 if (GET_CODE (operands[1]) != CONST_INT
3939 || GET_CODE (operands[2]) != CONST_INT)
3940 {
3941 rtx _0_1_2_3 = gen_rtx_REG (V64SImode, VGPR_REGNO (1));
3942 rtx tmp = _0_1_2_3;
3943 if (GET_CODE (operands[1]) != CONST_INT
3944 || INTVAL (operands[1]) != 0)
3945 {
3946 tmp = gen_reg_rtx (V64SImode);
3947 emit_insn (gen_addv64si3_dup (tmp, _0_1_2_3, operands[1]));
3948 }
3949 emit_insn (gen_vec_cmpv64sidi_dup (operands[0],
3950 gen_rtx_GT (VOIDmode, 0, 0),
3951 operands[2], tmp));
3952 }
3953 else
3954 {
3955 HOST_WIDE_INT diff = INTVAL (operands[2]) - INTVAL (operands[1]);
3956 HOST_WIDE_INT mask = (diff >= 64 ? -1
3957 : ~((unsigned HOST_WIDE_INT)-1 << diff));
3958 emit_move_insn (operands[0], gen_rtx_CONST_INT (VOIDmode, mask));
3959 }
3960 if (INTVAL (operands[3]) < 64)
3961 emit_insn (gen_anddi3 (operands[0], operands[0],
3962 gen_rtx_CONST_INT (VOIDmode,
3963 ~((unsigned HOST_WIDE_INT)-1
3964 << INTVAL (operands[3])))));
3965 DONE;
3966 })
3967
3968 (define_expand "maskload<mode>di"
3969 [(match_operand:V_MOV 0 "register_operand")
3970 (match_operand:V_MOV 1 "memory_operand")
3971 (match_operand 2 "")]
3972 ""
3973 {
3974 rtx exec = force_reg (DImode, operands[2]);
3975 rtx addr = gcn_expand_scalar_to_vector_address
3976 (<MODE>mode, exec, operands[1], gen_rtx_SCRATCH (<VnDI>mode));
3977 rtx as = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
3978 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
3979
3980 /* Masked lanes are required to hold zero. */
3981 emit_move_insn (operands[0], gcn_vec_constant (<MODE>mode, 0));
3982
3983 emit_insn (gen_gather<mode>_expr_exec (operands[0], addr, as, v,
3984 operands[0], exec));
3985 DONE;
3986 })
3987
3988 (define_expand "maskstore<mode>di"
3989 [(match_operand:V_MOV 0 "memory_operand")
3990 (match_operand:V_MOV 1 "register_operand")
3991 (match_operand 2 "")]
3992 ""
3993 {
3994 rtx exec = force_reg (DImode, operands[2]);
3995 rtx addr = gcn_expand_scalar_to_vector_address
3996 (<MODE>mode, exec, operands[0], gen_rtx_SCRATCH (<VnDI>mode));
3997 rtx as = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
3998 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
3999 emit_insn (gen_scatter<mode>_expr_exec (addr, operands[1], as, v, exec));
4000 DONE;
4001 })
4002
4003 (define_expand "mask_gather_load<mode><vnsi>"
4004 [(match_operand:V_MOV 0 "register_operand")
4005 (match_operand:DI 1 "register_operand")
4006 (match_operand:<VnSI> 2 "register_operand")
4007 (match_operand 3 "immediate_operand")
4008 (match_operand:SI 4 "gcn_alu_operand")
4009 (match_operand:DI 5 "")]
4010 ""
4011 {
4012 rtx exec = force_reg (DImode, operands[5]);
4013
4014 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1],
4015 operands[2], operands[4],
4016 INTVAL (operands[3]), exec);
4017
4018 /* Masked lanes are required to hold zero. */
4019 emit_move_insn (operands[0], gcn_vec_constant (<MODE>mode, 0));
4020
4021 if (GET_MODE (addr) == <VnDI>mode)
4022 emit_insn (gen_gather<mode>_insn_1offset_exec (operands[0], addr,
4023 const0_rtx, const0_rtx,
4024 const0_rtx, operands[0],
4025 exec));
4026 else
4027 emit_insn (gen_gather<mode>_insn_2offsets_exec (operands[0], operands[1],
4028 addr, const0_rtx,
4029 const0_rtx, const0_rtx,
4030 operands[0], exec));
4031 DONE;
4032 })
4033
4034 (define_expand "mask_scatter_store<mode><vnsi>"
4035 [(match_operand:DI 0 "register_operand")
4036 (match_operand:<VnSI> 1 "register_operand")
4037 (match_operand 2 "immediate_operand")
4038 (match_operand:SI 3 "gcn_alu_operand")
4039 (match_operand:V_MOV 4 "register_operand")
4040 (match_operand:DI 5 "")]
4041 ""
4042 {
4043 rtx exec = force_reg (DImode, operands[5]);
4044
4045 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0],
4046 operands[1], operands[3],
4047 INTVAL (operands[2]), exec);
4048
4049 if (GET_MODE (addr) == <VnDI>mode)
4050 emit_insn (gen_scatter<mode>_insn_1offset_exec (addr, const0_rtx,
4051 operands[4], const0_rtx,
4052 const0_rtx,
4053 exec));
4054 else
4055 emit_insn (gen_scatter<mode>_insn_2offsets_exec (operands[0], addr,
4056 const0_rtx, operands[4],
4057 const0_rtx, const0_rtx,
4058 exec));
4059 DONE;
4060 })
4061
4062 (define_code_iterator cond_op [plus minus mult])
4063
4064 (define_expand "cond_<expander><mode>"
4065 [(match_operand:V_ALL 0 "register_operand")
4066 (match_operand:DI 1 "register_operand")
4067 (cond_op:V_ALL
4068 (match_operand:V_ALL 2 "gcn_alu_operand")
4069 (match_operand:V_ALL 3 "gcn_alu_operand"))
4070 (match_operand:V_ALL 4 "register_operand")]
4071 ""
4072 {
4073 operands[1] = force_reg (DImode, operands[1]);
4074 operands[2] = force_reg (<MODE>mode, operands[2]);
4075
4076 emit_insn (gen_<expander><mode>3_exec (operands[0], operands[2],
4077 operands[3], operands[4],
4078 operands[1]));
4079 DONE;
4080 })
4081
4082 (define_code_iterator cond_fminmaxop [smin smax])
4083
4084 (define_expand "cond_<fexpander><mode>"
4085 [(match_operand:V_FP 0 "register_operand")
4086 (match_operand:DI 1 "register_operand")
4087 (cond_fminmaxop:V_FP
4088 (match_operand:V_FP 2 "gcn_alu_operand")
4089 (match_operand:V_FP 3 "gcn_alu_operand"))
4090 (match_operand:V_FP 4 "register_operand")]
4091 ""
4092 {
4093 operands[1] = force_reg (DImode, operands[1]);
4094 operands[2] = force_reg (<MODE>mode, operands[2]);
4095
4096 emit_insn (gen_<fexpander><mode>3_exec (operands[0], operands[2],
4097 operands[3], operands[4],
4098 operands[1]));
4099 DONE;
4100 })
4101
4102 (define_code_iterator cond_minmaxop [smin smax umin umax])
4103
4104 (define_expand "cond_<expander><mode>"
4105 [(match_operand:V_INT 0 "register_operand")
4106 (match_operand:DI 1 "register_operand")
4107 (cond_minmaxop:V_INT
4108 (match_operand:V_INT 2 "gcn_alu_operand")
4109 (match_operand:V_INT 3 "gcn_alu_operand"))
4110 (match_operand:V_INT 4 "register_operand")]
4111 ""
4112 {
4113 operands[1] = force_reg (DImode, operands[1]);
4114 operands[2] = force_reg (<MODE>mode, operands[2]);
4115 rtx tmp = gen_reg_rtx (<MODE>mode);
4116
4117 emit_insn (gen_<expander><mode>3_exec (tmp, operands[2], operands[3],
4118 gcn_gen_undef(<MODE>mode),
4119 operands[1]));
4120 emit_insn (gen_vcond_mask_<mode>di (operands[0], tmp, operands[4],
4121 operands[1]));
4122 DONE;
4123 })
4124
4125 (define_code_iterator cond_bitop [and ior xor])
4126
4127 (define_expand "cond_<expander><mode>"
4128 [(match_operand:V_INT 0 "register_operand")
4129 (match_operand:DI 1 "register_operand")
4130 (cond_bitop:V_INT
4131 (match_operand:V_INT 2 "gcn_alu_operand")
4132 (match_operand:V_INT 3 "gcn_alu_operand"))
4133 (match_operand:V_INT 4 "register_operand")]
4134 ""
4135 {
4136 operands[1] = force_reg (DImode, operands[1]);
4137 operands[2] = force_reg (<MODE>mode, operands[2]);
4138
4139 emit_insn (gen_<expander><mode>3_exec (operands[0], operands[2],
4140 operands[3], operands[4],
4141 operands[1]));
4142 DONE;
4143 })
4144
4145 (define_code_iterator cond_shiftop [ashift lshiftrt ashiftrt])
4146
4147 (define_expand "cond_<expander><mode>"
4148 [(match_operand:V_INT_noHI 0 "register_operand")
4149 (match_operand:DI 1 "register_operand")
4150 (cond_shiftop:V_INT_noHI
4151 (match_operand:V_INT_noHI 2 "gcn_alu_operand")
4152 (match_operand:V_INT_noHI 3 "gcn_alu_operand"))
4153 (match_operand:V_INT_noHI 4 "register_operand")]
4154 ""
4155 {
4156 operands[1] = force_reg (DImode, operands[1]);
4157 operands[2] = force_reg (<MODE>mode, operands[2]);
4158
4159 rtx shiftby = gen_reg_rtx (<VnSI>mode);
4160 convert_move (shiftby, operands[3], 0);
4161
4162 emit_insn (gen_v<expander><mode>3_exec (operands[0], operands[2],
4163 shiftby, operands[4],
4164 operands[1]));
4165 DONE;
4166 })
4167
4168 ;; }}}
4169 ;; {{{ Vector reductions
4170
4171 (define_int_iterator REDUC_UNSPEC [UNSPEC_SMIN_DPP_SHR UNSPEC_SMAX_DPP_SHR
4172 UNSPEC_UMIN_DPP_SHR UNSPEC_UMAX_DPP_SHR
4173 UNSPEC_PLUS_DPP_SHR
4174 UNSPEC_AND_DPP_SHR
4175 UNSPEC_IOR_DPP_SHR UNSPEC_XOR_DPP_SHR])
4176
4177 (define_int_iterator REDUC_2REG_UNSPEC [UNSPEC_PLUS_DPP_SHR
4178 UNSPEC_AND_DPP_SHR
4179 UNSPEC_IOR_DPP_SHR UNSPEC_XOR_DPP_SHR])
4180
4181 ; FIXME: Isn't there a better way of doing this?
4182 (define_int_attr reduc_unspec [(UNSPEC_SMIN_DPP_SHR "UNSPEC_SMIN_DPP_SHR")
4183 (UNSPEC_SMAX_DPP_SHR "UNSPEC_SMAX_DPP_SHR")
4184 (UNSPEC_UMIN_DPP_SHR "UNSPEC_UMIN_DPP_SHR")
4185 (UNSPEC_UMAX_DPP_SHR "UNSPEC_UMAX_DPP_SHR")
4186 (UNSPEC_PLUS_DPP_SHR "UNSPEC_PLUS_DPP_SHR")
4187 (UNSPEC_AND_DPP_SHR "UNSPEC_AND_DPP_SHR")
4188 (UNSPEC_IOR_DPP_SHR "UNSPEC_IOR_DPP_SHR")
4189 (UNSPEC_XOR_DPP_SHR "UNSPEC_XOR_DPP_SHR")])
4190
4191 (define_int_attr reduc_op [(UNSPEC_SMIN_DPP_SHR "smin")
4192 (UNSPEC_SMAX_DPP_SHR "smax")
4193 (UNSPEC_UMIN_DPP_SHR "umin")
4194 (UNSPEC_UMAX_DPP_SHR "umax")
4195 (UNSPEC_PLUS_DPP_SHR "plus")
4196 (UNSPEC_AND_DPP_SHR "and")
4197 (UNSPEC_IOR_DPP_SHR "ior")
4198 (UNSPEC_XOR_DPP_SHR "xor")])
4199
4200 (define_int_attr reduc_insn [(UNSPEC_SMIN_DPP_SHR "v_min%i0")
4201 (UNSPEC_SMAX_DPP_SHR "v_max%i0")
4202 (UNSPEC_UMIN_DPP_SHR "v_min%u0")
4203 (UNSPEC_UMAX_DPP_SHR "v_max%u0")
4204 (UNSPEC_PLUS_DPP_SHR "v_add%U0")
4205 (UNSPEC_AND_DPP_SHR "v_and%B0")
4206 (UNSPEC_IOR_DPP_SHR "v_or%B0")
4207 (UNSPEC_XOR_DPP_SHR "v_xor%B0")])
4208
4209 (define_expand "reduc_<reduc_op>_scal_<mode>"
4210 [(set (match_operand:<SCALAR_MODE> 0 "register_operand")
4211 (unspec:<SCALAR_MODE>
4212 [(match_operand:V_ALL 1 "register_operand")]
4213 REDUC_UNSPEC))]
4214 "!TARGET_RDNA2"
4215 {
4216 rtx tmp = gcn_expand_reduc_scalar (<MODE>mode, operands[1],
4217 <reduc_unspec>);
4218
4219 rtx last_lane = GEN_INT (GET_MODE_NUNITS (<MODE>mode) - 1);
4220 emit_insn (gen_vec_extract<mode><scalar_mode> (operands[0], tmp,
4221 last_lane));
4222
4223 DONE;
4224 })
4225
4226 (define_expand "reduc_<fexpander>_scal_<mode>"
4227 [(match_operand:<SCALAR_MODE> 0 "register_operand")
4228 (fminmaxop:V_FP
4229 (match_operand:V_FP 1 "register_operand"))]
4230 ""
4231 {
4232 /* fmin/fmax are identical to smin/smax. */
4233 emit_insn (gen_reduc_<expander>_scal_<mode> (operands[0], operands[1]));
4234 DONE;
4235 })
4236
4237 ;; Warning: This "-ffast-math" implementation converts in-order reductions
4238 ;; into associative reductions. It's also used where OpenMP or
4239 ;; OpenACC paralellization has already broken the in-order semantics.
4240 (define_expand "fold_left_plus_<mode>"
4241 [(match_operand:<SCALAR_MODE> 0 "register_operand")
4242 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand")
4243 (match_operand:V_FP 2 "gcn_alu_operand")]
4244 "can_create_pseudo_p ()
4245 && (flag_openacc || flag_openmp
4246 || flag_associative_math)"
4247 {
4248 rtx dest = operands[0];
4249 rtx scalar = operands[1];
4250 rtx vector = operands[2];
4251 rtx tmp = gen_reg_rtx (<SCALAR_MODE>mode);
4252
4253 emit_insn (gen_reduc_plus_scal_<mode> (tmp, vector));
4254 emit_insn (gen_add<scalar_mode>3 (dest, scalar, tmp));
4255 DONE;
4256 })
4257
4258 (define_insn "*<reduc_op>_dpp_shr_<mode>"
4259 [(set (match_operand:V_1REG 0 "register_operand" "=v")
4260 (unspec:V_1REG
4261 [(match_operand:V_1REG 1 "register_operand" "v")
4262 (match_operand:V_1REG 2 "register_operand" "v")
4263 (match_operand:SI 3 "const_int_operand" "n")]
4264 REDUC_UNSPEC))]
4265 ; GCN3 requires a carry out, GCN5 not
4266 "!(TARGET_GCN3 && SCALAR_INT_MODE_P (<SCALAR_MODE>mode)
4267 && <reduc_unspec> == UNSPEC_PLUS_DPP_SHR)
4268 && !TARGET_RDNA2"
4269 {
4270 return gcn_expand_dpp_shr_insn (<MODE>mode, "<reduc_insn>",
4271 <reduc_unspec>, INTVAL (operands[3]));
4272 }
4273 [(set_attr "type" "vop_dpp")
4274 (set_attr "length" "8")])
4275
4276 (define_insn_and_split "*<reduc_op>_dpp_shr_<mode>"
4277 [(set (match_operand:V_DI 0 "register_operand" "=v")
4278 (unspec:V_DI
4279 [(match_operand:V_DI 1 "register_operand" "v")
4280 (match_operand:V_DI 2 "register_operand" "v")
4281 (match_operand:SI 3 "const_int_operand" "n")]
4282 REDUC_2REG_UNSPEC))]
4283 ""
4284 "#"
4285 "reload_completed"
4286 [(set (match_dup 4)
4287 (unspec:<VnSI>
4288 [(match_dup 6) (match_dup 8) (match_dup 3)] REDUC_2REG_UNSPEC))
4289 (set (match_dup 5)
4290 (unspec:<VnSI>
4291 [(match_dup 7) (match_dup 9) (match_dup 3)] REDUC_2REG_UNSPEC))]
4292 {
4293 operands[4] = gcn_operand_part (<MODE>mode, operands[0], 0);
4294 operands[5] = gcn_operand_part (<MODE>mode, operands[0], 1);
4295 operands[6] = gcn_operand_part (<MODE>mode, operands[1], 0);
4296 operands[7] = gcn_operand_part (<MODE>mode, operands[1], 1);
4297 operands[8] = gcn_operand_part (<MODE>mode, operands[2], 0);
4298 operands[9] = gcn_operand_part (<MODE>mode, operands[2], 1);
4299 }
4300 [(set_attr "type" "vmult")
4301 (set_attr "length" "16")])
4302
4303 ; Special cases for addition.
4304
4305 (define_insn "*plus_carry_dpp_shr_<mode>"
4306 [(set (match_operand:V_INT_1REG 0 "register_operand" "=v")
4307 (unspec:V_INT_1REG
4308 [(match_operand:V_INT_1REG 1 "register_operand" "v")
4309 (match_operand:V_INT_1REG 2 "register_operand" "v")
4310 (match_operand:SI 3 "const_int_operand" "n")]
4311 UNSPEC_PLUS_CARRY_DPP_SHR))
4312 (clobber (reg:DI VCC_REG))]
4313 "!TARGET_RDNA2"
4314 {
4315 return gcn_expand_dpp_shr_insn (<VnSI>mode, "v_add%^_u32",
4316 UNSPEC_PLUS_CARRY_DPP_SHR,
4317 INTVAL (operands[3]));
4318 }
4319 [(set_attr "type" "vop_dpp")
4320 (set_attr "length" "8")])
4321
4322 (define_insn "*plus_carry_in_dpp_shr_<mode>"
4323 [(set (match_operand:V_SI 0 "register_operand" "=v")
4324 (unspec:V_SI
4325 [(match_operand:V_SI 1 "register_operand" "v")
4326 (match_operand:V_SI 2 "register_operand" "v")
4327 (match_operand:SI 3 "const_int_operand" "n")
4328 (match_operand:DI 4 "register_operand" "cV")]
4329 UNSPEC_PLUS_CARRY_IN_DPP_SHR))
4330 (clobber (reg:DI VCC_REG))]
4331 "!TARGET_RDNA2"
4332 {
4333 return gcn_expand_dpp_shr_insn (<MODE>mode, "v_addc%^_u32",
4334 UNSPEC_PLUS_CARRY_IN_DPP_SHR,
4335 INTVAL (operands[3]));
4336 }
4337 [(set_attr "type" "vop_dpp")
4338 (set_attr "length" "8")])
4339
4340 (define_insn_and_split "*plus_carry_dpp_shr_<mode>"
4341 [(set (match_operand:V_DI 0 "register_operand" "=v")
4342 (unspec:V_DI
4343 [(match_operand:V_DI 1 "register_operand" "v")
4344 (match_operand:V_DI 2 "register_operand" "v")
4345 (match_operand:SI 3 "const_int_operand" "n")]
4346 UNSPEC_PLUS_CARRY_DPP_SHR))
4347 (clobber (reg:DI VCC_REG))]
4348 ""
4349 "#"
4350 "reload_completed"
4351 [(parallel [(set (match_dup 4)
4352 (unspec:<VnSI>
4353 [(match_dup 6) (match_dup 8) (match_dup 3)]
4354 UNSPEC_PLUS_CARRY_DPP_SHR))
4355 (clobber (reg:DI VCC_REG))])
4356 (parallel [(set (match_dup 5)
4357 (unspec:<VnSI>
4358 [(match_dup 7) (match_dup 9) (match_dup 3) (reg:DI VCC_REG)]
4359 UNSPEC_PLUS_CARRY_IN_DPP_SHR))
4360 (clobber (reg:DI VCC_REG))])]
4361 {
4362 operands[4] = gcn_operand_part (<MODE>mode, operands[0], 0);
4363 operands[5] = gcn_operand_part (<MODE>mode, operands[0], 1);
4364 operands[6] = gcn_operand_part (<MODE>mode, operands[1], 0);
4365 operands[7] = gcn_operand_part (<MODE>mode, operands[1], 1);
4366 operands[8] = gcn_operand_part (<MODE>mode, operands[2], 0);
4367 operands[9] = gcn_operand_part (<MODE>mode, operands[2], 1);
4368 }
4369 [(set_attr "type" "vmult")
4370 (set_attr "length" "16")])
4371
4372 ;; }}}
4373 ;; {{{ Miscellaneous
4374
4375 (define_expand "vec_series<mode>"
4376 [(match_operand:V_SI 0 "register_operand")
4377 (match_operand:SI 1 "gcn_alu_operand")
4378 (match_operand:SI 2 "gcn_alu_operand")]
4379 ""
4380 {
4381 rtx tmp = gen_reg_rtx (<MODE>mode);
4382 rtx v1 = gen_rtx_REG (<MODE>mode, VGPR_REGNO (1));
4383
4384 emit_insn (gen_mul<mode>3_dup (tmp, v1, operands[2]));
4385 emit_insn (gen_add<mode>3_dup (operands[0], tmp, operands[1]));
4386 DONE;
4387 })
4388
4389 (define_expand "vec_series<mode>"
4390 [(match_operand:V_DI 0 "register_operand")
4391 (match_operand:DI 1 "gcn_alu_operand")
4392 (match_operand:DI 2 "gcn_alu_operand")]
4393 ""
4394 {
4395 rtx tmp = gen_reg_rtx (<MODE>mode);
4396 rtx v1 = gen_rtx_REG (<VnSI>mode, VGPR_REGNO (1));
4397 rtx op1vec = gen_reg_rtx (<MODE>mode);
4398
4399 emit_insn (gen_mul<mode>3_zext_dup2 (tmp, v1, operands[2]));
4400 emit_insn (gen_vec_duplicate<mode> (op1vec, operands[1]));
4401 emit_insn (gen_add<mode>3 (operands[0], tmp, op1vec));
4402 DONE;
4403 })
4404
4405 ;; }}}