]>
Commit | Line | Data |
---|---|---|
7adcbafe | 1 | ;; Copyright (C) 2016-2022 Free Software Foundation, Inc. |
3d6275e3 AS |
2 | |
3 | ;; This file is free software; you can redistribute it and/or modify it under | |
4 | ;; the terms of the GNU General Public License as published by the Free | |
5 | ;; Software Foundation; either version 3 of the License, or (at your option) | |
6 | ;; any later version. | |
7 | ||
8 | ;; This file is distributed in the hope that it will be useful, but WITHOUT | |
9 | ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
10 | ;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
11 | ;; for more details. | |
12 | ||
13 | ;; You should have received a copy of the GNU General Public License | |
14 | ;; along with GCC; see the file COPYING3. If not see | |
15 | ;; <http://www.gnu.org/licenses/>. | |
16 | ||
17 | ;; {{{ Vector iterators | |
18 | ||
1165109b AS |
19 | ; Vector modes for specific types |
20 | ; (This will make more sense when there are multiple vector sizes) | |
21 | (define_mode_iterator V_QI | |
22 | [V64QI]) | |
23 | (define_mode_iterator V_HI | |
24 | [V64HI]) | |
25 | (define_mode_iterator V_HF | |
26 | [V64HF]) | |
27 | (define_mode_iterator V_SI | |
28 | [V64SI]) | |
29 | (define_mode_iterator V_SF | |
30 | [V64SF]) | |
31 | (define_mode_iterator V_DI | |
32 | [V64DI]) | |
33 | (define_mode_iterator V_DF | |
34 | [V64DF]) | |
35 | ||
dc941ea9 | 36 | ; Vector modes for sub-dword modes |
03876953 | 37 | (define_mode_iterator V_QIHI |
dc941ea9 AS |
38 | [V64QI V64HI]) |
39 | ||
3d6275e3 | 40 | ; Vector modes for one vector register |
03876953 | 41 | (define_mode_iterator V_1REG |
3d6275e3 AS |
42 | [V64QI V64HI V64SI V64HF V64SF]) |
43 | ||
03876953 | 44 | (define_mode_iterator V_INT_1REG |
2b99bed8 | 45 | [V64QI V64HI V64SI]) |
03876953 | 46 | (define_mode_iterator V_INT_1REG_ALT |
3d6275e3 | 47 | [V64QI V64HI V64SI]) |
03876953 AS |
48 | (define_mode_iterator V_FP_1REG |
49 | [V64HF V64SF]) | |
3d6275e3 AS |
50 | |
51 | ; Vector modes for two vector registers | |
03876953 | 52 | (define_mode_iterator V_2REG |
3d6275e3 AS |
53 | [V64DI V64DF]) |
54 | ||
03876953 AS |
55 | ; Vector modes with native support |
56 | (define_mode_iterator V_noQI | |
57 | [V64HI V64HF V64SI V64SF V64DI V64DF]) | |
58 | (define_mode_iterator V_noHI | |
59 | [V64HF V64SI V64SF V64DI V64DF]) | |
60 | ||
61 | (define_mode_iterator V_INT_noQI | |
62 | [V64HI V64SI V64DI]) | |
63 | ||
3d6275e3 | 64 | ; All of above |
03876953 AS |
65 | (define_mode_iterator V_ALL |
66 | [V64QI V64HI V64HF V64SI V64SF V64DI V64DF]) | |
67 | (define_mode_iterator V_ALL_ALT | |
68 | [V64QI V64HI V64HF V64SI V64SF V64DI V64DF]) | |
69 | ||
70 | (define_mode_iterator V_INT | |
71 | [V64QI V64HI V64SI V64DI]) | |
72 | (define_mode_iterator V_FP | |
73 | [V64HF V64SF V64DF]) | |
3d6275e3 AS |
74 | |
75 | (define_mode_attr scalar_mode | |
76 | [(V64QI "qi") (V64HI "hi") (V64SI "si") | |
77 | (V64HF "hf") (V64SF "sf") (V64DI "di") (V64DF "df")]) | |
78 | ||
79 | (define_mode_attr SCALAR_MODE | |
80 | [(V64QI "QI") (V64HI "HI") (V64SI "SI") | |
81 | (V64HF "HF") (V64SF "SF") (V64DI "DI") (V64DF "DF")]) | |
82 | ||
1165109b AS |
83 | (define_mode_attr vnsi |
84 | [(V64QI "v64si") (V64HI "v64si") (V64HF "v64si") (V64SI "v64si") | |
85 | (V64SF "v64si") (V64DI "v64si") (V64DF "v64si")]) | |
86 | ||
87 | (define_mode_attr VnSI | |
88 | [(V64QI "V64SI") (V64HI "V64SI") (V64HF "V64SI") (V64SI "V64SI") | |
89 | (V64SF "V64SI") (V64DI "V64SI") (V64DF "V64SI")]) | |
90 | ||
91 | (define_mode_attr vndi | |
92 | [(V64QI "v64di") (V64HI "v64di") (V64HF "v64di") (V64SI "v64di") | |
93 | (V64SF "v64di") (V64DI "v64di") (V64DF "v64di")]) | |
94 | ||
95 | (define_mode_attr VnDI | |
96 | [(V64QI "V64DI") (V64HI "V64DI") (V64HF "V64DI") (V64SI "V64DI") | |
97 | (V64SF "V64DI") (V64DI "V64DI") (V64DF "V64DI")]) | |
98 | ||
3d66c777 AS |
99 | (define_mode_attr sdwa [(V64QI "BYTE_0") (V64HI "WORD_0") (V64SI "DWORD")]) |
100 | ||
3d6275e3 AS |
101 | ;; }}} |
102 | ;; {{{ Substitutions | |
103 | ||
104 | (define_subst_attr "exec" "vec_merge" | |
105 | "" "_exec") | |
106 | (define_subst_attr "exec_clobber" "vec_merge_with_clobber" | |
107 | "" "_exec") | |
108 | (define_subst_attr "exec_vcc" "vec_merge_with_vcc" | |
109 | "" "_exec") | |
110 | (define_subst_attr "exec_scatter" "scatter_store" | |
111 | "" "_exec") | |
112 | ||
113 | (define_subst "vec_merge" | |
03876953 AS |
114 | [(set (match_operand:V_ALL 0) |
115 | (match_operand:V_ALL 1))] | |
3d6275e3 AS |
116 | "" |
117 | [(set (match_dup 0) | |
03876953 | 118 | (vec_merge:V_ALL |
3d6275e3 | 119 | (match_dup 1) |
03876953 | 120 | (match_operand:V_ALL 3 "gcn_register_or_unspec_operand" "U0") |
3d6275e3 AS |
121 | (match_operand:DI 4 "gcn_exec_reg_operand" "e")))]) |
122 | ||
123 | (define_subst "vec_merge_with_clobber" | |
03876953 AS |
124 | [(set (match_operand:V_ALL 0) |
125 | (match_operand:V_ALL 1)) | |
3d6275e3 AS |
126 | (clobber (match_operand 2))] |
127 | "" | |
128 | [(set (match_dup 0) | |
03876953 | 129 | (vec_merge:V_ALL |
3d6275e3 | 130 | (match_dup 1) |
03876953 | 131 | (match_operand:V_ALL 3 "gcn_register_or_unspec_operand" "U0") |
3d6275e3 AS |
132 | (match_operand:DI 4 "gcn_exec_reg_operand" "e"))) |
133 | (clobber (match_dup 2))]) | |
134 | ||
135 | (define_subst "vec_merge_with_vcc" | |
03876953 AS |
136 | [(set (match_operand:V_ALL 0) |
137 | (match_operand:V_ALL 1)) | |
3d6275e3 AS |
138 | (set (match_operand:DI 2) |
139 | (match_operand:DI 3))] | |
140 | "" | |
141 | [(parallel | |
142 | [(set (match_dup 0) | |
03876953 | 143 | (vec_merge:V_ALL |
3d6275e3 | 144 | (match_dup 1) |
03876953 | 145 | (match_operand:V_ALL 4 "gcn_register_or_unspec_operand" "U0") |
3d6275e3 AS |
146 | (match_operand:DI 5 "gcn_exec_reg_operand" "e"))) |
147 | (set (match_dup 2) | |
148 | (and:DI (match_dup 3) | |
149 | (reg:DI EXEC_REG)))])]) | |
150 | ||
151 | (define_subst "scatter_store" | |
152 | [(set (mem:BLK (scratch)) | |
153 | (unspec:BLK | |
154 | [(match_operand 0) | |
155 | (match_operand 1) | |
156 | (match_operand 2) | |
157 | (match_operand 3)] | |
158 | UNSPEC_SCATTER))] | |
159 | "" | |
160 | [(set (mem:BLK (scratch)) | |
161 | (unspec:BLK | |
162 | [(match_dup 0) | |
163 | (match_dup 1) | |
164 | (match_dup 2) | |
165 | (match_dup 3) | |
166 | (match_operand:DI 4 "gcn_exec_reg_operand" "e")] | |
167 | UNSPEC_SCATTER))]) | |
168 | ||
169 | ;; }}} | |
170 | ;; {{{ Vector moves | |
171 | ||
172 | ; This is the entry point for all vector register moves. Memory accesses can | |
173 | ; come this way also, but will more usually use the reload_in/out, | |
174 | ; gather/scatter, maskload/store, etc. | |
175 | ||
176 | (define_expand "mov<mode>" | |
03876953 AS |
177 | [(set (match_operand:V_ALL 0 "nonimmediate_operand") |
178 | (match_operand:V_ALL 1 "general_operand"))] | |
3d6275e3 AS |
179 | "" |
180 | { | |
181 | if (MEM_P (operands[0]) && !lra_in_progress && !reload_completed) | |
182 | { | |
183 | operands[1] = force_reg (<MODE>mode, operands[1]); | |
1165109b | 184 | rtx scratch = gen_rtx_SCRATCH (<VnDI>mode); |
3d6275e3 AS |
185 | rtx a = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0])); |
186 | rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0])); | |
187 | rtx expr = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL, | |
188 | operands[0], | |
189 | scratch); | |
190 | emit_insn (gen_scatter<mode>_expr (expr, operands[1], a, v)); | |
191 | DONE; | |
192 | } | |
193 | else if (MEM_P (operands[1]) && !lra_in_progress && !reload_completed) | |
194 | { | |
1165109b | 195 | rtx scratch = gen_rtx_SCRATCH (<VnDI>mode); |
3d6275e3 AS |
196 | rtx a = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1])); |
197 | rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1])); | |
198 | rtx expr = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL, | |
199 | operands[1], | |
200 | scratch); | |
201 | emit_insn (gen_gather<mode>_expr (operands[0], expr, a, v)); | |
202 | DONE; | |
203 | } | |
204 | else if ((MEM_P (operands[0]) || MEM_P (operands[1]))) | |
205 | { | |
206 | gcc_assert (!reload_completed); | |
1165109b | 207 | rtx scratch = gen_reg_rtx (<VnDI>mode); |
3d6275e3 AS |
208 | emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], scratch)); |
209 | DONE; | |
210 | } | |
211 | }) | |
212 | ||
213 | ; A pseudo instruction that helps LRA use the "U0" constraint. | |
214 | ||
215 | (define_insn "mov<mode>_unspec" | |
03876953 AS |
216 | [(set (match_operand:V_ALL 0 "nonimmediate_operand" "=v") |
217 | (match_operand:V_ALL 1 "gcn_unspec_operand" " U"))] | |
3d6275e3 AS |
218 | "" |
219 | "" | |
220 | [(set_attr "type" "unknown") | |
221 | (set_attr "length" "0")]) | |
222 | ||
223 | (define_insn "*mov<mode>" | |
03876953 AS |
224 | [(set (match_operand:V_1REG 0 "nonimmediate_operand" "=v,v") |
225 | (match_operand:V_1REG 1 "general_operand" "vA,B"))] | |
3d6275e3 AS |
226 | "" |
227 | "v_mov_b32\t%0, %1" | |
228 | [(set_attr "type" "vop1,vop1") | |
229 | (set_attr "length" "4,8")]) | |
230 | ||
231 | (define_insn "mov<mode>_exec" | |
03876953 AS |
232 | [(set (match_operand:V_1REG 0 "nonimmediate_operand" "=v, v, v, v, v, m") |
233 | (vec_merge:V_1REG | |
234 | (match_operand:V_1REG 1 "general_operand" "vA, B, v,vA, m, v") | |
b7886845 | 235 | (match_operand:V_1REG 2 "gcn_alu_or_unspec_operand" |
3d6275e3 | 236 | "U0,U0,vA,vA,U0,U0") |
b7886845 | 237 | (match_operand:DI 3 "register_operand" " e, e,cV,Sv, e, e"))) |
1165109b | 238 | (clobber (match_scratch:<VnDI> 4 "=X, X, X, X,&v,&v"))] |
3d6275e3 AS |
239 | "!MEM_P (operands[0]) || REG_P (operands[1])" |
240 | "@ | |
241 | v_mov_b32\t%0, %1 | |
242 | v_mov_b32\t%0, %1 | |
b7886845 AS |
243 | v_cndmask_b32\t%0, %2, %1, vcc |
244 | v_cndmask_b32\t%0, %2, %1, %3 | |
3d6275e3 AS |
245 | # |
246 | #" | |
247 | [(set_attr "type" "vop1,vop1,vop2,vop3a,*,*") | |
248 | (set_attr "length" "4,8,4,8,16,16")]) | |
249 | ||
250 | ; This variant does not accept an unspec, but does permit MEM | |
251 | ; read/modify/write which is necessary for maskstore. | |
252 | ||
253 | ;(define_insn "*mov<mode>_exec_match" | |
03876953 AS |
254 | ; [(set (match_operand:V_1REG 0 "nonimmediate_operand" "=v,v, v, m") |
255 | ; (vec_merge:V_1REG | |
256 | ; (match_operand:V_1REG 1 "general_operand" "vA,B, m, v") | |
3d6275e3 | 257 | ; (match_dup 0) |
03876953 | 258 | ; (match_operand:DI 2 "gcn_exec_reg_operand" " e,e, e, e"))) |
1165109b | 259 | ; (clobber (match_scratch:<VnDI> 3 "=X,X,&v,&v"))] |
3d6275e3 AS |
260 | ; "!MEM_P (operands[0]) || REG_P (operands[1])" |
261 | ; "@ | |
262 | ; v_mov_b32\t%0, %1 | |
263 | ; v_mov_b32\t%0, %1 | |
264 | ; # | |
265 | ; #" | |
266 | ; [(set_attr "type" "vop1,vop1,*,*") | |
267 | ; (set_attr "length" "4,8,16,16")]) | |
268 | ||
269 | (define_insn "*mov<mode>" | |
03876953 AS |
270 | [(set (match_operand:V_2REG 0 "nonimmediate_operand" "=v") |
271 | (match_operand:V_2REG 1 "general_operand" "vDB"))] | |
3d6275e3 AS |
272 | "" |
273 | { | |
274 | if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) | |
275 | return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1"; | |
276 | else | |
277 | return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1"; | |
278 | } | |
279 | [(set_attr "type" "vmult") | |
280 | (set_attr "length" "16")]) | |
281 | ||
282 | (define_insn "mov<mode>_exec" | |
03876953 AS |
283 | [(set (match_operand:V_2REG 0 "nonimmediate_operand" "= v, v, v, v, m") |
284 | (vec_merge:V_2REG | |
285 | (match_operand:V_2REG 1 "general_operand" "vDB, v0, v0, m, v") | |
b7886845 | 286 | (match_operand:V_2REG 2 "gcn_alu_or_unspec_operand" |
3d6275e3 | 287 | " U0,vDA0,vDA0,U0,U0") |
b7886845 | 288 | (match_operand:DI 3 "register_operand" " e, cV, Sv, e, e"))) |
1165109b | 289 | (clobber (match_scratch:<VnDI> 4 "= X, X, X,&v,&v"))] |
3d6275e3 AS |
290 | "!MEM_P (operands[0]) || REG_P (operands[1])" |
291 | { | |
292 | if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) | |
293 | switch (which_alternative) | |
294 | { | |
295 | case 0: | |
296 | return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1"; | |
297 | case 1: | |
b7886845 AS |
298 | return "v_cndmask_b32\t%L0, %L2, %L1, vcc\;" |
299 | "v_cndmask_b32\t%H0, %H2, %H1, vcc"; | |
3d6275e3 | 300 | case 2: |
b7886845 AS |
301 | return "v_cndmask_b32\t%L0, %L2, %L1, %3\;" |
302 | "v_cndmask_b32\t%H0, %H2, %H1, %3"; | |
3d6275e3 AS |
303 | } |
304 | else | |
305 | switch (which_alternative) | |
306 | { | |
307 | case 0: | |
308 | return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1"; | |
309 | case 1: | |
b7886845 AS |
310 | return "v_cndmask_b32\t%H0, %H2, %H1, vcc\;" |
311 | "v_cndmask_b32\t%L0, %L2, %L1, vcc"; | |
3d6275e3 | 312 | case 2: |
b7886845 AS |
313 | return "v_cndmask_b32\t%H0, %H2, %H1, %3\;" |
314 | "v_cndmask_b32\t%L0, %L2, %L1, %3"; | |
3d6275e3 AS |
315 | } |
316 | ||
317 | return "#"; | |
318 | } | |
319 | [(set_attr "type" "vmult,vmult,vmult,*,*") | |
320 | (set_attr "length" "16,16,16,16,16")]) | |
321 | ||
322 | ; This variant does not accept an unspec, but does permit MEM | |
323 | ; read/modify/write which is necessary for maskstore. | |
324 | ||
325 | ;(define_insn "*mov<mode>_exec_match" | |
03876953 AS |
326 | ; [(set (match_operand:V_2REG 0 "nonimmediate_operand" "=v, v, m") |
327 | ; (vec_merge:V_2REG | |
328 | ; (match_operand:V_2REG 1 "general_operand" "vDB, m, v") | |
3d6275e3 | 329 | ; (match_dup 0) |
03876953 | 330 | ; (match_operand:DI 2 "gcn_exec_reg_operand" " e, e, e"))) |
1165109b | 331 | ; (clobber (match_scratch:<VnDI> 3 "=X,&v,&v"))] |
3d6275e3 AS |
332 | ; "!MEM_P (operands[0]) || REG_P (operands[1])" |
333 | ; "@ | |
334 | ; * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \ | |
335 | ; return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \ | |
336 | ; else \ | |
337 | ; return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\"; | |
338 | ; # | |
339 | ; #" | |
340 | ; [(set_attr "type" "vmult,*,*") | |
341 | ; (set_attr "length" "16,16,16")]) | |
342 | ||
343 | ; A SGPR-base load looks like: | |
344 | ; <load> v, Sv | |
345 | ; | |
346 | ; There's no hardware instruction that corresponds to this, but vector base | |
347 | ; addresses are placed in an SGPR because it is easier to add to a vector. | |
348 | ; We also have a temporary vT, and the vector v1 holding numbered lanes. | |
349 | ; | |
350 | ; Rewrite as: | |
351 | ; vT = v1 << log2(element-size) | |
352 | ; vT += Sv | |
353 | ; flat_load v, vT | |
354 | ||
355 | (define_insn "mov<mode>_sgprbase" | |
03876953 AS |
356 | [(set (match_operand:V_1REG 0 "nonimmediate_operand" "= v, v, v, m") |
357 | (unspec:V_1REG | |
358 | [(match_operand:V_1REG 1 "general_operand" " vA,vB, m, v")] | |
3d6275e3 | 359 | UNSPEC_SGPRBASE)) |
1165109b | 360 | (clobber (match_operand:<VnDI> 2 "register_operand" "=&v,&v,&v,&v"))] |
3d6275e3 AS |
361 | "lra_in_progress || reload_completed" |
362 | "@ | |
363 | v_mov_b32\t%0, %1 | |
364 | v_mov_b32\t%0, %1 | |
365 | # | |
366 | #" | |
367 | [(set_attr "type" "vop1,vop1,*,*") | |
368 | (set_attr "length" "4,8,12,12")]) | |
369 | ||
370 | (define_insn "mov<mode>_sgprbase" | |
03876953 AS |
371 | [(set (match_operand:V_2REG 0 "nonimmediate_operand" "= v, v, m") |
372 | (unspec:V_2REG | |
373 | [(match_operand:V_2REG 1 "general_operand" "vDB, m, v")] | |
3d6275e3 | 374 | UNSPEC_SGPRBASE)) |
1165109b | 375 | (clobber (match_operand:<VnDI> 2 "register_operand" "=&v,&v,&v"))] |
3d6275e3 AS |
376 | "lra_in_progress || reload_completed" |
377 | "@ | |
378 | * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \ | |
379 | return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \ | |
380 | else \ | |
381 | return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\"; | |
382 | # | |
383 | #" | |
384 | [(set_attr "type" "vmult,*,*") | |
385 | (set_attr "length" "8,12,12")]) | |
386 | ||
387 | ; reload_in was once a standard name, but here it's only referenced by | |
388 | ; gcn_secondary_reload. It allows a reload with a scratch register. | |
389 | ||
390 | (define_expand "reload_in<mode>" | |
03876953 AS |
391 | [(set (match_operand:V_ALL 0 "register_operand" "= v") |
392 | (match_operand:V_ALL 1 "memory_operand" " m")) | |
1165109b | 393 | (clobber (match_operand:<VnDI> 2 "register_operand" "=&v"))] |
3d6275e3 AS |
394 | "" |
395 | { | |
396 | emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], operands[2])); | |
397 | DONE; | |
398 | }) | |
399 | ||
400 | ; reload_out is similar to reload_in, above. | |
401 | ||
402 | (define_expand "reload_out<mode>" | |
03876953 AS |
403 | [(set (match_operand:V_ALL 0 "memory_operand" "= m") |
404 | (match_operand:V_ALL 1 "register_operand" " v")) | |
1165109b | 405 | (clobber (match_operand:<VnDI> 2 "register_operand" "=&v"))] |
3d6275e3 AS |
406 | "" |
407 | { | |
408 | emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], operands[2])); | |
409 | DONE; | |
410 | }) | |
411 | ||
412 | ; Expand scalar addresses into gather/scatter patterns | |
413 | ||
414 | (define_split | |
03876953 AS |
415 | [(set (match_operand:V_ALL 0 "memory_operand") |
416 | (unspec:V_ALL | |
417 | [(match_operand:V_ALL 1 "general_operand")] | |
3d6275e3 | 418 | UNSPEC_SGPRBASE)) |
1165109b | 419 | (clobber (match_scratch:<VnDI> 2))] |
3d6275e3 AS |
420 | "" |
421 | [(set (mem:BLK (scratch)) | |
422 | (unspec:BLK [(match_dup 5) (match_dup 1) (match_dup 6) (match_dup 7)] | |
423 | UNSPEC_SCATTER))] | |
424 | { | |
425 | operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL, | |
426 | operands[0], | |
427 | operands[2]); | |
428 | operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0])); | |
429 | operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0])); | |
430 | }) | |
431 | ||
432 | (define_split | |
03876953 AS |
433 | [(set (match_operand:V_ALL 0 "memory_operand") |
434 | (vec_merge:V_ALL | |
435 | (match_operand:V_ALL 1 "general_operand") | |
436 | (match_operand:V_ALL 2 "") | |
3d6275e3 | 437 | (match_operand:DI 3 "gcn_exec_reg_operand"))) |
1165109b | 438 | (clobber (match_scratch:<VnDI> 4))] |
3d6275e3 AS |
439 | "" |
440 | [(set (mem:BLK (scratch)) | |
441 | (unspec:BLK [(match_dup 5) (match_dup 1) | |
442 | (match_dup 6) (match_dup 7) (match_dup 3)] | |
443 | UNSPEC_SCATTER))] | |
444 | { | |
445 | operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode, | |
446 | operands[3], | |
447 | operands[0], | |
448 | operands[4]); | |
449 | operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0])); | |
450 | operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0])); | |
451 | }) | |
452 | ||
453 | (define_split | |
03876953 AS |
454 | [(set (match_operand:V_ALL 0 "nonimmediate_operand") |
455 | (unspec:V_ALL | |
456 | [(match_operand:V_ALL 1 "memory_operand")] | |
3d6275e3 | 457 | UNSPEC_SGPRBASE)) |
1165109b | 458 | (clobber (match_scratch:<VnDI> 2))] |
3d6275e3 AS |
459 | "" |
460 | [(set (match_dup 0) | |
03876953 AS |
461 | (unspec:V_ALL [(match_dup 5) (match_dup 6) (match_dup 7) |
462 | (mem:BLK (scratch))] | |
463 | UNSPEC_GATHER))] | |
3d6275e3 AS |
464 | { |
465 | operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL, | |
466 | operands[1], | |
467 | operands[2]); | |
468 | operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1])); | |
469 | operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1])); | |
470 | }) | |
471 | ||
472 | (define_split | |
03876953 AS |
473 | [(set (match_operand:V_ALL 0 "nonimmediate_operand") |
474 | (vec_merge:V_ALL | |
475 | (match_operand:V_ALL 1 "memory_operand") | |
476 | (match_operand:V_ALL 2 "") | |
3d6275e3 | 477 | (match_operand:DI 3 "gcn_exec_reg_operand"))) |
1165109b | 478 | (clobber (match_scratch:<VnDI> 4))] |
3d6275e3 AS |
479 | "" |
480 | [(set (match_dup 0) | |
03876953 AS |
481 | (vec_merge:V_ALL |
482 | (unspec:V_ALL [(match_dup 5) (match_dup 6) (match_dup 7) | |
483 | (mem:BLK (scratch))] | |
484 | UNSPEC_GATHER) | |
3d6275e3 AS |
485 | (match_dup 2) |
486 | (match_dup 3)))] | |
487 | { | |
488 | operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode, | |
489 | operands[3], | |
490 | operands[1], | |
491 | operands[4]); | |
492 | operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1])); | |
493 | operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1])); | |
494 | }) | |
495 | ||
496 | ; TODO: Add zero/sign extending variants. | |
497 | ||
498 | ;; }}} | |
499 | ;; {{{ Lane moves | |
500 | ||
501 | ; v_writelane and v_readlane work regardless of exec flags. | |
502 | ; We allow source to be scratch. | |
503 | ; | |
504 | ; FIXME these should take A immediates | |
505 | ||
506 | (define_insn "*vec_set<mode>" | |
03876953 AS |
507 | [(set (match_operand:V_1REG 0 "register_operand" "= v") |
508 | (vec_merge:V_1REG | |
509 | (vec_duplicate:V_1REG | |
510 | (match_operand:<SCALAR_MODE> 1 "register_operand" " Sv")) | |
511 | (match_operand:V_1REG 3 "gcn_register_or_unspec_operand" " U0") | |
3d6275e3 | 512 | (ashift (const_int 1) |
03876953 | 513 | (match_operand:SI 2 "gcn_alu_operand" "SvB"))))] |
3d6275e3 AS |
514 | "" |
515 | "v_writelane_b32 %0, %1, %2" | |
516 | [(set_attr "type" "vop3a") | |
517 | (set_attr "length" "8") | |
518 | (set_attr "exec" "none") | |
519 | (set_attr "laneselect" "yes")]) | |
520 | ||
521 | ; FIXME: 64bit operations really should be splitters, but I am not sure how | |
522 | ; to represent vertical subregs. | |
523 | (define_insn "*vec_set<mode>" | |
03876953 AS |
524 | [(set (match_operand:V_2REG 0 "register_operand" "= v") |
525 | (vec_merge:V_2REG | |
526 | (vec_duplicate:V_2REG | |
527 | (match_operand:<SCALAR_MODE> 1 "register_operand" " Sv")) | |
528 | (match_operand:V_2REG 3 "gcn_register_or_unspec_operand" " U0") | |
3d6275e3 | 529 | (ashift (const_int 1) |
03876953 | 530 | (match_operand:SI 2 "gcn_alu_operand" "SvB"))))] |
3d6275e3 AS |
531 | "" |
532 | "v_writelane_b32 %L0, %L1, %2\;v_writelane_b32 %H0, %H1, %2" | |
533 | [(set_attr "type" "vmult") | |
534 | (set_attr "length" "16") | |
535 | (set_attr "exec" "none") | |
536 | (set_attr "laneselect" "yes")]) | |
537 | ||
538 | (define_expand "vec_set<mode>" | |
03876953 AS |
539 | [(set (match_operand:V_ALL 0 "register_operand") |
540 | (vec_merge:V_ALL | |
541 | (vec_duplicate:V_ALL | |
3d6275e3 AS |
542 | (match_operand:<SCALAR_MODE> 1 "register_operand")) |
543 | (match_dup 0) | |
544 | (ashift (const_int 1) (match_operand:SI 2 "gcn_alu_operand"))))] | |
545 | "") | |
546 | ||
547 | (define_insn "*vec_set<mode>_1" | |
03876953 AS |
548 | [(set (match_operand:V_1REG 0 "register_operand" "=v") |
549 | (vec_merge:V_1REG | |
550 | (vec_duplicate:V_1REG | |
551 | (match_operand:<SCALAR_MODE> 1 "register_operand" "Sv")) | |
552 | (match_operand:V_1REG 3 "gcn_register_or_unspec_operand" "U0") | |
553 | (match_operand:SI 2 "const_int_operand" " i")))] | |
1165109b | 554 | "((unsigned) exact_log2 (INTVAL (operands[2])) < GET_MODE_NUNITS (<MODE>mode))" |
3d6275e3 AS |
555 | { |
556 | operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2]))); | |
557 | return "v_writelane_b32 %0, %1, %2"; | |
558 | } | |
559 | [(set_attr "type" "vop3a") | |
560 | (set_attr "length" "8") | |
561 | (set_attr "exec" "none") | |
562 | (set_attr "laneselect" "yes")]) | |
563 | ||
564 | (define_insn "*vec_set<mode>_1" | |
03876953 AS |
565 | [(set (match_operand:V_2REG 0 "register_operand" "=v") |
566 | (vec_merge:V_2REG | |
567 | (vec_duplicate:V_2REG | |
568 | (match_operand:<SCALAR_MODE> 1 "register_operand" "Sv")) | |
569 | (match_operand:V_2REG 3 "gcn_register_or_unspec_operand" "U0") | |
570 | (match_operand:SI 2 "const_int_operand" " i")))] | |
1165109b | 571 | "((unsigned) exact_log2 (INTVAL (operands[2])) < GET_MODE_NUNITS (<MODE>mode))" |
3d6275e3 AS |
572 | { |
573 | operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2]))); | |
574 | return "v_writelane_b32 %L0, %L1, %2\;v_writelane_b32 %H0, %H1, %2"; | |
575 | } | |
576 | [(set_attr "type" "vmult") | |
577 | (set_attr "length" "16") | |
578 | (set_attr "exec" "none") | |
579 | (set_attr "laneselect" "yes")]) | |
580 | ||
581 | (define_insn "vec_duplicate<mode><exec>" | |
03876953 AS |
582 | [(set (match_operand:V_1REG 0 "register_operand" "=v") |
583 | (vec_duplicate:V_1REG | |
584 | (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand" "SvB")))] | |
3d6275e3 AS |
585 | "" |
586 | "v_mov_b32\t%0, %1" | |
587 | [(set_attr "type" "vop3a") | |
588 | (set_attr "length" "8")]) | |
589 | ||
590 | (define_insn "vec_duplicate<mode><exec>" | |
03876953 AS |
591 | [(set (match_operand:V_2REG 0 "register_operand" "= v") |
592 | (vec_duplicate:V_2REG | |
3d6275e3 AS |
593 | (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand" "SvDB")))] |
594 | "" | |
595 | "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1" | |
596 | [(set_attr "type" "vop3a") | |
597 | (set_attr "length" "16")]) | |
598 | ||
599 | (define_insn "vec_extract<mode><scalar_mode>" | |
03876953 | 600 | [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg") |
3d6275e3 | 601 | (vec_select:<SCALAR_MODE> |
03876953 AS |
602 | (match_operand:V_1REG 1 "register_operand" " v") |
603 | (parallel [(match_operand:SI 2 "gcn_alu_operand" "SvB")])))] | |
3d6275e3 AS |
604 | "" |
605 | "v_readlane_b32 %0, %1, %2" | |
606 | [(set_attr "type" "vop3a") | |
607 | (set_attr "length" "8") | |
608 | (set_attr "exec" "none") | |
609 | (set_attr "laneselect" "yes")]) | |
610 | ||
611 | (define_insn "vec_extract<mode><scalar_mode>" | |
03876953 | 612 | [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=&Sg") |
3d6275e3 | 613 | (vec_select:<SCALAR_MODE> |
03876953 AS |
614 | (match_operand:V_2REG 1 "register_operand" " v") |
615 | (parallel [(match_operand:SI 2 "gcn_alu_operand" " SvB")])))] | |
3d6275e3 AS |
616 | "" |
617 | "v_readlane_b32 %L0, %L1, %2\;v_readlane_b32 %H0, %H1, %2" | |
618 | [(set_attr "type" "vmult") | |
619 | (set_attr "length" "16") | |
620 | (set_attr "exec" "none") | |
621 | (set_attr "laneselect" "yes")]) | |
622 | ||
b92d1124 AS |
623 | (define_expand "extract_last_<mode>" |
624 | [(match_operand:<SCALAR_MODE> 0 "register_operand") | |
625 | (match_operand:DI 1 "gcn_alu_operand") | |
03876953 | 626 | (match_operand:V_ALL 2 "register_operand")] |
b92d1124 AS |
627 | "can_create_pseudo_p ()" |
628 | { | |
629 | rtx dst = operands[0]; | |
630 | rtx mask = operands[1]; | |
631 | rtx vect = operands[2]; | |
632 | rtx tmpreg = gen_reg_rtx (SImode); | |
633 | ||
634 | emit_insn (gen_clzdi2 (tmpreg, mask)); | |
635 | emit_insn (gen_subsi3 (tmpreg, GEN_INT (63), tmpreg)); | |
636 | emit_insn (gen_vec_extract<mode><scalar_mode> (dst, vect, tmpreg)); | |
637 | DONE; | |
638 | }) | |
639 | ||
640 | (define_expand "fold_extract_last_<mode>" | |
641 | [(match_operand:<SCALAR_MODE> 0 "register_operand") | |
642 | (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand") | |
643 | (match_operand:DI 2 "gcn_alu_operand") | |
03876953 | 644 | (match_operand:V_ALL 3 "register_operand")] |
b92d1124 AS |
645 | "can_create_pseudo_p ()" |
646 | { | |
647 | rtx dst = operands[0]; | |
648 | rtx default_value = operands[1]; | |
649 | rtx mask = operands[2]; | |
650 | rtx vect = operands[3]; | |
651 | rtx else_label = gen_label_rtx (); | |
652 | rtx end_label = gen_label_rtx (); | |
653 | ||
654 | rtx cond = gen_rtx_EQ (VOIDmode, mask, const0_rtx); | |
655 | emit_jump_insn (gen_cbranchdi4 (cond, mask, const0_rtx, else_label)); | |
656 | emit_insn (gen_extract_last_<mode> (dst, mask, vect)); | |
657 | emit_jump_insn (gen_jump (end_label)); | |
658 | emit_barrier (); | |
659 | emit_label (else_label); | |
660 | emit_move_insn (dst, default_value); | |
661 | emit_label (end_label); | |
662 | DONE; | |
663 | }) | |
664 | ||
3d6275e3 | 665 | (define_expand "vec_init<mode><scalar_mode>" |
03876953 | 666 | [(match_operand:V_ALL 0 "register_operand") |
3d6275e3 AS |
667 | (match_operand 1)] |
668 | "" | |
669 | { | |
670 | gcn_expand_vector_init (operands[0], operands[1]); | |
671 | DONE; | |
672 | }) | |
673 | ||
674 | ;; }}} | |
675 | ;; {{{ Scatter / Gather | |
676 | ||
677 | ;; GCN does not have an instruction for loading a vector from contiguous | |
678 | ;; memory so *all* loads and stores are eventually converted to scatter | |
679 | ;; or gather. | |
680 | ;; | |
681 | ;; GCC does not permit MEM to hold vectors of addresses, so we must use an | |
682 | ;; unspec. The unspec formats are as follows: | |
683 | ;; | |
1165109b | 684 | ;; (unspec:V?? |
3d6275e3 AS |
685 | ;; [(<address expression>) |
686 | ;; (<addr_space_t>) | |
687 | ;; (<use_glc>) | |
688 | ;; (mem:BLK (scratch))] | |
689 | ;; UNSPEC_GATHER) | |
690 | ;; | |
691 | ;; (unspec:BLK | |
692 | ;; [(<address expression>) | |
693 | ;; (<source register>) | |
694 | ;; (<addr_space_t>) | |
695 | ;; (<use_glc>) | |
696 | ;; (<exec>)] | |
697 | ;; UNSPEC_SCATTER) | |
698 | ;; | |
699 | ;; - Loads are expected to be wrapped in a vec_merge, so do not need <exec>. | |
700 | ;; - The mem:BLK does not contain any real information, but indicates that an | |
701 | ;; unknown memory read is taking place. Stores are expected to use a similar | |
702 | ;; mem:BLK outside the unspec. | |
703 | ;; - The address space and glc (volatile) fields are there to replace the | |
704 | ;; fields normally found in a MEM. | |
705 | ;; - Multiple forms of address expression are supported, below. | |
aad32a00 AS |
706 | ;; |
707 | ;; TODO: implement combined gather and zero_extend, but only for -msram-ecc=on | |
3d6275e3 | 708 | |
1165109b | 709 | (define_expand "gather_load<mode><vnsi>" |
03876953 | 710 | [(match_operand:V_ALL 0 "register_operand") |
3d6275e3 | 711 | (match_operand:DI 1 "register_operand") |
1165109b | 712 | (match_operand:<VnSI> 2 "register_operand") |
3d6275e3 AS |
713 | (match_operand 3 "immediate_operand") |
714 | (match_operand:SI 4 "gcn_alu_operand")] | |
715 | "" | |
716 | { | |
717 | rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1], | |
718 | operands[2], operands[4], | |
719 | INTVAL (operands[3]), NULL); | |
720 | ||
1165109b | 721 | if (GET_MODE (addr) == <VnDI>mode) |
3d6275e3 AS |
722 | emit_insn (gen_gather<mode>_insn_1offset (operands[0], addr, const0_rtx, |
723 | const0_rtx, const0_rtx)); | |
724 | else | |
725 | emit_insn (gen_gather<mode>_insn_2offsets (operands[0], operands[1], | |
726 | addr, const0_rtx, const0_rtx, | |
727 | const0_rtx)); | |
728 | DONE; | |
729 | }) | |
730 | ||
3d6275e3 AS |
731 | ; Allow any address expression |
732 | (define_expand "gather<mode>_expr<exec>" | |
03876953 AS |
733 | [(set (match_operand:V_ALL 0 "register_operand") |
734 | (unspec:V_ALL | |
3d6275e3 AS |
735 | [(match_operand 1 "") |
736 | (match_operand 2 "immediate_operand") | |
737 | (match_operand 3 "immediate_operand") | |
738 | (mem:BLK (scratch))] | |
739 | UNSPEC_GATHER))] | |
740 | "" | |
741 | {}) | |
742 | ||
743 | (define_insn "gather<mode>_insn_1offset<exec>" | |
1165109b | 744 | [(set (match_operand:V_ALL 0 "register_operand" "=v") |
03876953 | 745 | (unspec:V_ALL |
1165109b AS |
746 | [(plus:<VnDI> (match_operand:<VnDI> 1 "register_operand" " v") |
747 | (vec_duplicate:<VnDI> | |
748 | (match_operand 2 "immediate_operand" " n"))) | |
749 | (match_operand 3 "immediate_operand" " n") | |
750 | (match_operand 4 "immediate_operand" " n") | |
3d6275e3 AS |
751 | (mem:BLK (scratch))] |
752 | UNSPEC_GATHER))] | |
753 | "(AS_FLAT_P (INTVAL (operands[3])) | |
754 | && ((TARGET_GCN3 && INTVAL(operands[2]) == 0) | |
755 | || ((unsigned HOST_WIDE_INT)INTVAL(operands[2]) < 0x1000))) | |
756 | || (AS_GLOBAL_P (INTVAL (operands[3])) | |
757 | && (((unsigned HOST_WIDE_INT)INTVAL(operands[2]) + 0x1000) < 0x2000))" | |
758 | { | |
759 | addr_space_t as = INTVAL (operands[3]); | |
760 | const char *glc = INTVAL (operands[4]) ? " glc" : ""; | |
761 | ||
762 | static char buf[200]; | |
763 | if (AS_FLAT_P (as)) | |
764 | { | |
765 | if (TARGET_GCN5_PLUS) | |
1e8f5d49 | 766 | sprintf (buf, "flat_load%%o0\t%%0, %%1 offset:%%2%s\;s_waitcnt\t0", |
3d6275e3 AS |
767 | glc); |
768 | else | |
1e8f5d49 | 769 | sprintf (buf, "flat_load%%o0\t%%0, %%1%s\;s_waitcnt\t0", glc); |
3d6275e3 AS |
770 | } |
771 | else if (AS_GLOBAL_P (as)) | |
28dd61b7 | 772 | sprintf (buf, "global_load%%o0\t%%0, %%1, off offset:%%2%s\;" |
3d6275e3 AS |
773 | "s_waitcnt\tvmcnt(0)", glc); |
774 | else | |
775 | gcc_unreachable (); | |
776 | ||
777 | return buf; | |
778 | } | |
779 | [(set_attr "type" "flat") | |
780 | (set_attr "length" "12")]) | |
781 | ||
782 | (define_insn "gather<mode>_insn_1offset_ds<exec>" | |
1165109b | 783 | [(set (match_operand:V_ALL 0 "register_operand" "=v") |
03876953 | 784 | (unspec:V_ALL |
1165109b AS |
785 | [(plus:<VnSI> (match_operand:<VnSI> 1 "register_operand" " v") |
786 | (vec_duplicate:<VnSI> | |
787 | (match_operand 2 "immediate_operand" " n"))) | |
788 | (match_operand 3 "immediate_operand" " n") | |
789 | (match_operand 4 "immediate_operand" " n") | |
3d6275e3 AS |
790 | (mem:BLK (scratch))] |
791 | UNSPEC_GATHER))] | |
792 | "(AS_ANY_DS_P (INTVAL (operands[3])) | |
793 | && ((unsigned HOST_WIDE_INT)INTVAL(operands[2]) < 0x10000))" | |
794 | { | |
795 | addr_space_t as = INTVAL (operands[3]); | |
796 | static char buf[200]; | |
797 | sprintf (buf, "ds_read%%b0\t%%0, %%1 offset:%%2%s\;s_waitcnt\tlgkmcnt(0)", | |
798 | (AS_GDS_P (as) ? " gds" : "")); | |
799 | return buf; | |
800 | } | |
801 | [(set_attr "type" "ds") | |
802 | (set_attr "length" "12")]) | |
803 | ||
804 | (define_insn "gather<mode>_insn_2offsets<exec>" | |
1165109b | 805 | [(set (match_operand:V_ALL 0 "register_operand" "=v") |
03876953 | 806 | (unspec:V_ALL |
1165109b AS |
807 | [(plus:<VnDI> |
808 | (plus:<VnDI> | |
809 | (vec_duplicate:<VnDI> | |
810 | (match_operand:DI 1 "register_operand" "Sv")) | |
811 | (sign_extend:<VnDI> | |
812 | (match_operand:<VnSI> 2 "register_operand" " v"))) | |
813 | (vec_duplicate:<VnDI> (match_operand 3 "immediate_operand" " n"))) | |
814 | (match_operand 4 "immediate_operand" " n") | |
815 | (match_operand 5 "immediate_operand" " n") | |
3d6275e3 AS |
816 | (mem:BLK (scratch))] |
817 | UNSPEC_GATHER))] | |
818 | "(AS_GLOBAL_P (INTVAL (operands[4])) | |
819 | && (((unsigned HOST_WIDE_INT)INTVAL(operands[3]) + 0x1000) < 0x2000))" | |
820 | { | |
821 | addr_space_t as = INTVAL (operands[4]); | |
822 | const char *glc = INTVAL (operands[5]) ? " glc" : ""; | |
823 | ||
824 | static char buf[200]; | |
825 | if (AS_GLOBAL_P (as)) | |
826 | { | |
827 | /* Work around assembler bug in which a 64-bit register is expected, | |
828 | but a 32-bit value would be correct. */ | |
829 | int reg = REGNO (operands[2]) - FIRST_VGPR_REG; | |
81c362c7 AS |
830 | if (HAVE_GCN_ASM_GLOBAL_LOAD_FIXED) |
831 | sprintf (buf, "global_load%%o0\t%%0, v%d, %%1 offset:%%3%s\;" | |
832 | "s_waitcnt\tvmcnt(0)", reg, glc); | |
833 | else | |
834 | sprintf (buf, "global_load%%o0\t%%0, v[%d:%d], %%1 offset:%%3%s\;" | |
835 | "s_waitcnt\tvmcnt(0)", reg, reg + 1, glc); | |
3d6275e3 AS |
836 | } |
837 | else | |
838 | gcc_unreachable (); | |
839 | ||
840 | return buf; | |
841 | } | |
842 | [(set_attr "type" "flat") | |
843 | (set_attr "length" "12")]) | |
844 | ||
1165109b | 845 | (define_expand "scatter_store<mode><vnsi>" |
3d6275e3 | 846 | [(match_operand:DI 0 "register_operand") |
1165109b | 847 | (match_operand:<VnSI> 1 "register_operand") |
3d6275e3 AS |
848 | (match_operand 2 "immediate_operand") |
849 | (match_operand:SI 3 "gcn_alu_operand") | |
03876953 | 850 | (match_operand:V_ALL 4 "register_operand")] |
3d6275e3 AS |
851 | "" |
852 | { | |
853 | rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0], | |
854 | operands[1], operands[3], | |
855 | INTVAL (operands[2]), NULL); | |
856 | ||
1165109b | 857 | if (GET_MODE (addr) == <VnDI>mode) |
3d6275e3 AS |
858 | emit_insn (gen_scatter<mode>_insn_1offset (addr, const0_rtx, operands[4], |
859 | const0_rtx, const0_rtx)); | |
860 | else | |
861 | emit_insn (gen_scatter<mode>_insn_2offsets (operands[0], addr, | |
862 | const0_rtx, operands[4], | |
863 | const0_rtx, const0_rtx)); | |
864 | DONE; | |
865 | }) | |
866 | ||
3d6275e3 AS |
867 | ; Allow any address expression |
868 | (define_expand "scatter<mode>_expr<exec_scatter>" | |
869 | [(set (mem:BLK (scratch)) | |
870 | (unspec:BLK | |
1165109b | 871 | [(match_operand:<VnDI> 0 "") |
03876953 | 872 | (match_operand:V_ALL 1 "register_operand") |
3d6275e3 AS |
873 | (match_operand 2 "immediate_operand") |
874 | (match_operand 3 "immediate_operand")] | |
875 | UNSPEC_SCATTER))] | |
876 | "" | |
877 | {}) | |
878 | ||
879 | (define_insn "scatter<mode>_insn_1offset<exec_scatter>" | |
880 | [(set (mem:BLK (scratch)) | |
881 | (unspec:BLK | |
1165109b AS |
882 | [(plus:<VnDI> (match_operand:<VnDI> 0 "register_operand" "v") |
883 | (vec_duplicate:<VnDI> | |
884 | (match_operand 1 "immediate_operand" "n"))) | |
885 | (match_operand:V_ALL 2 "register_operand" "v") | |
886 | (match_operand 3 "immediate_operand" "n") | |
887 | (match_operand 4 "immediate_operand" "n")] | |
3d6275e3 AS |
888 | UNSPEC_SCATTER))] |
889 | "(AS_FLAT_P (INTVAL (operands[3])) | |
890 | && (INTVAL(operands[1]) == 0 | |
891 | || (TARGET_GCN5_PLUS | |
892 | && (unsigned HOST_WIDE_INT)INTVAL(operands[1]) < 0x1000))) | |
893 | || (AS_GLOBAL_P (INTVAL (operands[3])) | |
894 | && (((unsigned HOST_WIDE_INT)INTVAL(operands[1]) + 0x1000) < 0x2000))" | |
895 | { | |
896 | addr_space_t as = INTVAL (operands[3]); | |
897 | const char *glc = INTVAL (operands[4]) ? " glc" : ""; | |
898 | ||
899 | static char buf[200]; | |
900 | if (AS_FLAT_P (as)) | |
901 | { | |
902 | if (TARGET_GCN5_PLUS) | |
930c5599 | 903 | sprintf (buf, "flat_store%%s2\t%%0, %%2 offset:%%1%s", glc); |
3d6275e3 | 904 | else |
930c5599 | 905 | sprintf (buf, "flat_store%%s2\t%%0, %%2%s", glc); |
3d6275e3 AS |
906 | } |
907 | else if (AS_GLOBAL_P (as)) | |
930c5599 | 908 | sprintf (buf, "global_store%%s2\t%%0, %%2, off offset:%%1%s", glc); |
3d6275e3 AS |
909 | else |
910 | gcc_unreachable (); | |
911 | ||
912 | return buf; | |
913 | } | |
914 | [(set_attr "type" "flat") | |
915 | (set_attr "length" "12")]) | |
916 | ||
917 | (define_insn "scatter<mode>_insn_1offset_ds<exec_scatter>" | |
918 | [(set (mem:BLK (scratch)) | |
919 | (unspec:BLK | |
1165109b AS |
920 | [(plus:<VnSI> (match_operand:<VnSI> 0 "register_operand" "v") |
921 | (vec_duplicate:<VnSI> | |
922 | (match_operand 1 "immediate_operand" "n"))) | |
923 | (match_operand:V_ALL 2 "register_operand" "v") | |
924 | (match_operand 3 "immediate_operand" "n") | |
925 | (match_operand 4 "immediate_operand" "n")] | |
3d6275e3 AS |
926 | UNSPEC_SCATTER))] |
927 | "(AS_ANY_DS_P (INTVAL (operands[3])) | |
928 | && ((unsigned HOST_WIDE_INT)INTVAL(operands[1]) < 0x10000))" | |
929 | { | |
930 | addr_space_t as = INTVAL (operands[3]); | |
931 | static char buf[200]; | |
e929d65b | 932 | sprintf (buf, "ds_write%%b2\t%%0, %%2 offset:%%1%s\;s_waitcnt\tlgkmcnt(0)", |
3d6275e3 AS |
933 | (AS_GDS_P (as) ? " gds" : "")); |
934 | return buf; | |
935 | } | |
936 | [(set_attr "type" "ds") | |
937 | (set_attr "length" "12")]) | |
938 | ||
939 | (define_insn "scatter<mode>_insn_2offsets<exec_scatter>" | |
940 | [(set (mem:BLK (scratch)) | |
941 | (unspec:BLK | |
1165109b AS |
942 | [(plus:<VnDI> |
943 | (plus:<VnDI> | |
944 | (vec_duplicate:<VnDI> | |
945 | (match_operand:DI 0 "register_operand" "Sv")) | |
946 | (sign_extend:<VnDI> | |
947 | (match_operand:<VnSI> 1 "register_operand" " v"))) | |
948 | (vec_duplicate:<VnDI> (match_operand 2 "immediate_operand" " n"))) | |
949 | (match_operand:V_ALL 3 "register_operand" " v") | |
950 | (match_operand 4 "immediate_operand" " n") | |
951 | (match_operand 5 "immediate_operand" " n")] | |
3d6275e3 AS |
952 | UNSPEC_SCATTER))] |
953 | "(AS_GLOBAL_P (INTVAL (operands[4])) | |
954 | && (((unsigned HOST_WIDE_INT)INTVAL(operands[2]) + 0x1000) < 0x2000))" | |
955 | { | |
956 | addr_space_t as = INTVAL (operands[4]); | |
957 | const char *glc = INTVAL (operands[5]) ? " glc" : ""; | |
958 | ||
959 | static char buf[200]; | |
960 | if (AS_GLOBAL_P (as)) | |
961 | { | |
962 | /* Work around assembler bug in which a 64-bit register is expected, | |
963 | but a 32-bit value would be correct. */ | |
964 | int reg = REGNO (operands[1]) - FIRST_VGPR_REG; | |
81c362c7 AS |
965 | if (HAVE_GCN_ASM_GLOBAL_LOAD_FIXED) |
966 | sprintf (buf, "global_store%%s3\tv%d, %%3, %%0 offset:%%2%s", | |
967 | reg, glc); | |
968 | else | |
969 | sprintf (buf, "global_store%%s3\tv[%d:%d], %%3, %%0 offset:%%2%s", | |
970 | reg, reg + 1, glc); | |
3d6275e3 AS |
971 | } |
972 | else | |
973 | gcc_unreachable (); | |
974 | ||
975 | return buf; | |
976 | } | |
977 | [(set_attr "type" "flat") | |
978 | (set_attr "length" "12")]) | |
979 | ||
980 | ;; }}} | |
981 | ;; {{{ Permutations | |
982 | ||
983 | (define_insn "ds_bpermute<mode>" | |
03876953 AS |
984 | [(set (match_operand:V_1REG 0 "register_operand" "=v") |
985 | (unspec:V_1REG | |
986 | [(match_operand:V_1REG 2 "register_operand" " v") | |
1165109b | 987 | (match_operand:<VnSI> 1 "register_operand" " v") |
03876953 | 988 | (match_operand:DI 3 "gcn_exec_reg_operand" " e")] |
3d6275e3 AS |
989 | UNSPEC_BPERMUTE))] |
990 | "" | |
991 | "ds_bpermute_b32\t%0, %1, %2\;s_waitcnt\tlgkmcnt(0)" | |
992 | [(set_attr "type" "vop2") | |
993 | (set_attr "length" "12")]) | |
994 | ||
995 | (define_insn_and_split "ds_bpermute<mode>" | |
03876953 AS |
996 | [(set (match_operand:V_2REG 0 "register_operand" "=&v") |
997 | (unspec:V_2REG | |
998 | [(match_operand:V_2REG 2 "register_operand" " v0") | |
1165109b | 999 | (match_operand:<VnSI> 1 "register_operand" " v") |
03876953 | 1000 | (match_operand:DI 3 "gcn_exec_reg_operand" " e")] |
3d6275e3 AS |
1001 | UNSPEC_BPERMUTE))] |
1002 | "" | |
1003 | "#" | |
1004 | "reload_completed" | |
1165109b AS |
1005 | [(set (match_dup 4) (unspec:<VnSI> |
1006 | [(match_dup 6) (match_dup 1) (match_dup 3)] | |
1007 | UNSPEC_BPERMUTE)) | |
1008 | (set (match_dup 5) (unspec:<VnSI> | |
1009 | [(match_dup 7) (match_dup 1) (match_dup 3)] | |
1010 | UNSPEC_BPERMUTE))] | |
3d6275e3 AS |
1011 | { |
1012 | operands[4] = gcn_operand_part (<MODE>mode, operands[0], 0); | |
1013 | operands[5] = gcn_operand_part (<MODE>mode, operands[0], 1); | |
1014 | operands[6] = gcn_operand_part (<MODE>mode, operands[2], 0); | |
1015 | operands[7] = gcn_operand_part (<MODE>mode, operands[2], 1); | |
1016 | } | |
1017 | [(set_attr "type" "vmult") | |
1018 | (set_attr "length" "24")]) | |
1019 | ||
a5879399 | 1020 | (define_insn "@dpp_move<mode>" |
03876953 AS |
1021 | [(set (match_operand:V_noHI 0 "register_operand" "=v") |
1022 | (unspec:V_noHI | |
1023 | [(match_operand:V_noHI 1 "register_operand" " v") | |
1024 | (match_operand:SI 2 "const_int_operand" " n")] | |
a5879399 AS |
1025 | UNSPEC_MOV_DPP_SHR))] |
1026 | "" | |
1027 | { | |
1028 | return gcn_expand_dpp_shr_insn (<MODE>mode, "v_mov_b32", | |
1029 | UNSPEC_MOV_DPP_SHR, INTVAL (operands[2])); | |
1030 | } | |
1031 | [(set_attr "type" "vop_dpp") | |
1032 | (set_attr "length" "16")]) | |
1033 | ||
3d6275e3 AS |
1034 | ;; }}} |
1035 | ;; {{{ ALU special case: add/sub | |
1036 | ||
77f7566e | 1037 | (define_insn "add<mode>3<exec_clobber>" |
03876953 AS |
1038 | [(set (match_operand:V_INT_1REG 0 "register_operand" "= v") |
1039 | (plus:V_INT_1REG | |
1040 | (match_operand:V_INT_1REG 1 "register_operand" "% v") | |
1041 | (match_operand:V_INT_1REG 2 "gcn_alu_operand" "vSvB"))) | |
3d6275e3 AS |
1042 | (clobber (reg:DI VCC_REG))] |
1043 | "" | |
1044 | "v_add%^_u32\t%0, vcc, %2, %1" | |
1045 | [(set_attr "type" "vop2") | |
1046 | (set_attr "length" "8")]) | |
1047 | ||
77f7566e | 1048 | (define_insn "add<mode>3_dup<exec_clobber>" |
03876953 AS |
1049 | [(set (match_operand:V_INT_1REG 0 "register_operand" "= v") |
1050 | (plus:V_INT_1REG | |
1051 | (vec_duplicate:V_INT_1REG | |
1052 | (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand" "SvB")) | |
1053 | (match_operand:V_INT_1REG 1 "register_operand" " v"))) | |
3d6275e3 AS |
1054 | (clobber (reg:DI VCC_REG))] |
1055 | "" | |
1056 | "v_add%^_u32\t%0, vcc, %2, %1" | |
1057 | [(set_attr "type" "vop2") | |
1058 | (set_attr "length" "8")]) | |
1059 | ||
1165109b AS |
1060 | (define_insn "add<mode>3_vcc<exec_vcc>" |
1061 | [(set (match_operand:V_SI 0 "register_operand" "= v, v") | |
1062 | (plus:V_SI | |
1063 | (match_operand:V_SI 1 "register_operand" "% v, v") | |
1064 | (match_operand:V_SI 2 "gcn_alu_operand" "vSvB,vSvB"))) | |
1065 | (set (match_operand:DI 3 "register_operand" "= cV, Sg") | |
1066 | (ltu:DI (plus:V_SI (match_dup 1) (match_dup 2)) | |
3d6275e3 AS |
1067 | (match_dup 1)))] |
1068 | "" | |
1069 | "v_add%^_u32\t%0, %3, %2, %1" | |
1070 | [(set_attr "type" "vop2,vop3b") | |
1071 | (set_attr "length" "8")]) | |
1072 | ||
1073 | ; This pattern only changes the VCC bits when the corresponding lane is | |
1074 | ; enabled, so the set must be described as an ior. | |
1075 | ||
1165109b AS |
1076 | (define_insn "add<mode>3_vcc_dup<exec_vcc>" |
1077 | [(set (match_operand:V_SI 0 "register_operand" "= v, v") | |
1078 | (plus:V_SI | |
1079 | (vec_duplicate:V_SI | |
1080 | (match_operand:SI 1 "gcn_alu_operand" "SvB,SvB")) | |
1081 | (match_operand:V_SI 2 "register_operand" " v, v"))) | |
1082 | (set (match_operand:DI 3 "register_operand" "=cV, Sg") | |
1083 | (ltu:DI (plus:V_SI (vec_duplicate:V_SI (match_dup 2)) | |
1084 | (match_dup 1)) | |
1085 | (vec_duplicate:V_SI (match_dup 2))))] | |
3d6275e3 AS |
1086 | "" |
1087 | "v_add%^_u32\t%0, %3, %2, %1" | |
1088 | [(set_attr "type" "vop2,vop3b") | |
1089 | (set_attr "length" "8,8")]) | |
1090 | ||
66b01cc3 AS |
1091 | ; v_addc does not accept an SGPR because the VCC read already counts as an |
1092 | ; SGPR use and the number of SGPR operands is limited to 1. It does not | |
1093 | ; accept "B" immediate constants due to a related bus conflict. | |
3d6275e3 | 1094 | |
1165109b AS |
1095 | (define_insn "addc<mode>3<exec_vcc>" |
1096 | [(set (match_operand:V_SI 0 "register_operand" "=v, v") | |
1097 | (plus:V_SI | |
1098 | (plus:V_SI | |
1099 | (vec_merge:V_SI | |
1100 | (vec_duplicate:V_SI (const_int 1)) | |
1101 | (vec_duplicate:V_SI (const_int 0)) | |
66b01cc3 | 1102 | (match_operand:DI 3 "register_operand" " cV,cVSv")) |
1165109b AS |
1103 | (match_operand:V_SI 1 "gcn_alu_operand" "% v, vA")) |
1104 | (match_operand:V_SI 2 "gcn_alu_operand" " vA, vA"))) | |
66b01cc3 | 1105 | (set (match_operand:DI 4 "register_operand" "=cV,cVSg") |
1165109b AS |
1106 | (ior:DI (ltu:DI (plus:V_SI |
1107 | (plus:V_SI | |
1108 | (vec_merge:V_SI | |
1109 | (vec_duplicate:V_SI (const_int 1)) | |
1110 | (vec_duplicate:V_SI (const_int 0)) | |
3d6275e3 AS |
1111 | (match_dup 3)) |
1112 | (match_dup 1)) | |
1113 | (match_dup 2)) | |
1114 | (match_dup 2)) | |
1165109b AS |
1115 | (ltu:DI (plus:V_SI |
1116 | (vec_merge:V_SI | |
1117 | (vec_duplicate:V_SI (const_int 1)) | |
1118 | (vec_duplicate:V_SI (const_int 0)) | |
3d6275e3 AS |
1119 | (match_dup 3)) |
1120 | (match_dup 1)) | |
1121 | (match_dup 1))))] | |
1122 | "" | |
66b01cc3 | 1123 | "v_addc%^_u32\t%0, %4, %2, %1, %3" |
3d6275e3 AS |
1124 | [(set_attr "type" "vop2,vop3b") |
1125 | (set_attr "length" "4,8")]) | |
1126 | ||
77f7566e | 1127 | (define_insn "sub<mode>3<exec_clobber>" |
03876953 AS |
1128 | [(set (match_operand:V_INT_1REG 0 "register_operand" "= v, v") |
1129 | (minus:V_INT_1REG | |
1130 | (match_operand:V_INT_1REG 1 "gcn_alu_operand" "vSvB, v") | |
1131 | (match_operand:V_INT_1REG 2 "gcn_alu_operand" " v,vSvB"))) | |
3d6275e3 AS |
1132 | (clobber (reg:DI VCC_REG))] |
1133 | "" | |
1134 | "@ | |
1135 | v_sub%^_u32\t%0, vcc, %1, %2 | |
1136 | v_subrev%^_u32\t%0, vcc, %2, %1" | |
1137 | [(set_attr "type" "vop2") | |
1138 | (set_attr "length" "8,8")]) | |
1139 | ||
1165109b AS |
1140 | (define_insn "sub<mode>3_vcc<exec_vcc>" |
1141 | [(set (match_operand:V_SI 0 "register_operand" "= v, v, v, v") | |
1142 | (minus:V_SI | |
1143 | (match_operand:V_SI 1 "gcn_alu_operand" "vSvB,vSvB, v, v") | |
1144 | (match_operand:V_SI 2 "gcn_alu_operand" " v, v,vSvB,vSvB"))) | |
1145 | (set (match_operand:DI 3 "register_operand" "= cV, Sg, cV, Sg") | |
1146 | (gtu:DI (minus:V_SI (match_dup 1) (match_dup 2)) | |
3d6275e3 AS |
1147 | (match_dup 1)))] |
1148 | "" | |
1149 | "@ | |
1150 | v_sub%^_u32\t%0, %3, %1, %2 | |
1151 | v_sub%^_u32\t%0, %3, %1, %2 | |
1152 | v_subrev%^_u32\t%0, %3, %2, %1 | |
1153 | v_subrev%^_u32\t%0, %3, %2, %1" | |
1154 | [(set_attr "type" "vop2,vop3b,vop2,vop3b") | |
1155 | (set_attr "length" "8")]) | |
1156 | ||
66b01cc3 AS |
1157 | ; v_subb does not accept an SGPR because the VCC read already counts as an |
1158 | ; SGPR use and the number of SGPR operands is limited to 1. It does not | |
1159 | ; accept "B" immediate constants due to a related bus conflict. | |
3d6275e3 | 1160 | |
1165109b AS |
1161 | (define_insn "subc<mode>3<exec_vcc>" |
1162 | [(set (match_operand:V_SI 0 "register_operand" "= v, v, v, v") | |
1163 | (minus:V_SI | |
1164 | (minus:V_SI | |
1165 | (vec_merge:V_SI | |
1166 | (vec_duplicate:V_SI (const_int 1)) | |
1167 | (vec_duplicate:V_SI (const_int 0)) | |
1168 | (match_operand:DI 3 "gcn_alu_operand" " cV,cVSv,cV,cVSv")) | |
1169 | (match_operand:V_SI 1 "gcn_alu_operand" " vA, vA, v, vA")) | |
1170 | (match_operand:V_SI 2 "gcn_alu_operand" " v, vA,vA, vA"))) | |
1171 | (set (match_operand:DI 4 "register_operand" "=cV,cVSg,cV,cVSg") | |
1172 | (ior:DI (gtu:DI (minus:V_SI (minus:V_SI | |
1173 | (vec_merge:V_SI | |
1174 | (vec_duplicate:V_SI (const_int 1)) | |
1175 | (vec_duplicate:V_SI (const_int 0)) | |
1176 | (match_dup 3)) | |
3d6275e3 AS |
1177 | (match_dup 1)) |
1178 | (match_dup 2)) | |
1179 | (match_dup 2)) | |
1165109b AS |
1180 | (ltu:DI (minus:V_SI (vec_merge:V_SI |
1181 | (vec_duplicate:V_SI (const_int 1)) | |
1182 | (vec_duplicate:V_SI (const_int 0)) | |
1183 | (match_dup 3)) | |
1184 | (match_dup 1)) | |
3d6275e3 AS |
1185 | (match_dup 1))))] |
1186 | "" | |
1187 | "@ | |
1188 | v_subb%^_u32\t%0, %4, %1, %2, %3 | |
1189 | v_subb%^_u32\t%0, %4, %1, %2, %3 | |
1190 | v_subbrev%^_u32\t%0, %4, %2, %1, %3 | |
1191 | v_subbrev%^_u32\t%0, %4, %2, %1, %3" | |
1192 | [(set_attr "type" "vop2,vop3b,vop2,vop3b") | |
66b01cc3 | 1193 | (set_attr "length" "4,8,4,8")]) |
3d6275e3 | 1194 | |
1165109b AS |
1195 | (define_insn_and_split "add<mode>3" |
1196 | [(set (match_operand:V_DI 0 "register_operand" "= v") | |
1197 | (plus:V_DI | |
1198 | (match_operand:V_DI 1 "register_operand" "%vDb") | |
1199 | (match_operand:V_DI 2 "gcn_alu_operand" " vDb"))) | |
3d6275e3 AS |
1200 | (clobber (reg:DI VCC_REG))] |
1201 | "" | |
1202 | "#" | |
1165109b AS |
1203 | "gcn_can_split_p (<MODE>mode, operands[0]) |
1204 | && gcn_can_split_p (<MODE>mode, operands[1]) | |
1205 | && gcn_can_split_p (<MODE>mode, operands[2])" | |
3d6275e3 AS |
1206 | [(const_int 0)] |
1207 | { | |
1208 | rtx vcc = gen_rtx_REG (DImode, VCC_REG); | |
1165109b AS |
1209 | emit_insn (gen_add<vnsi>3_vcc |
1210 | (gcn_operand_part (<MODE>mode, operands[0], 0), | |
1211 | gcn_operand_part (<MODE>mode, operands[1], 0), | |
1212 | gcn_operand_part (<MODE>mode, operands[2], 0), | |
3d6275e3 | 1213 | vcc)); |
1165109b AS |
1214 | emit_insn (gen_addc<vnsi>3 |
1215 | (gcn_operand_part (<MODE>mode, operands[0], 1), | |
1216 | gcn_operand_part (<MODE>mode, operands[1], 1), | |
1217 | gcn_operand_part (<MODE>mode, operands[2], 1), | |
3d6275e3 AS |
1218 | vcc, vcc)); |
1219 | DONE; | |
1220 | } | |
1221 | [(set_attr "type" "vmult") | |
1222 | (set_attr "length" "8")]) | |
1223 | ||
1165109b AS |
1224 | (define_insn_and_split "add<mode>3_exec" |
1225 | [(set (match_operand:V_DI 0 "register_operand" "= v") | |
1226 | (vec_merge:V_DI | |
1227 | (plus:V_DI | |
1228 | (match_operand:V_DI 1 "register_operand" "%vDb") | |
1229 | (match_operand:V_DI 2 "gcn_alu_operand" " vDb")) | |
1230 | (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0") | |
1231 | (match_operand:DI 4 "gcn_exec_reg_operand" " e"))) | |
3d6275e3 AS |
1232 | (clobber (reg:DI VCC_REG))] |
1233 | "" | |
1234 | "#" | |
1165109b AS |
1235 | "gcn_can_split_p (<MODE>mode, operands[0]) |
1236 | && gcn_can_split_p (<MODE>mode, operands[1]) | |
1237 | && gcn_can_split_p (<MODE>mode, operands[2]) | |
1238 | && gcn_can_split_p (<MODE>mode, operands[4])" | |
3d6275e3 AS |
1239 | [(const_int 0)] |
1240 | { | |
1241 | rtx vcc = gen_rtx_REG (DImode, VCC_REG); | |
1165109b AS |
1242 | emit_insn (gen_add<vnsi>3_vcc_exec |
1243 | (gcn_operand_part (<MODE>mode, operands[0], 0), | |
1244 | gcn_operand_part (<MODE>mode, operands[1], 0), | |
1245 | gcn_operand_part (<MODE>mode, operands[2], 0), | |
3d6275e3 | 1246 | vcc, |
1165109b | 1247 | gcn_operand_part (<MODE>mode, operands[3], 0), |
3d6275e3 | 1248 | operands[4])); |
1165109b AS |
1249 | emit_insn (gen_addc<vnsi>3_exec |
1250 | (gcn_operand_part (<MODE>mode, operands[0], 1), | |
1251 | gcn_operand_part (<MODE>mode, operands[1], 1), | |
1252 | gcn_operand_part (<MODE>mode, operands[2], 1), | |
3d6275e3 | 1253 | vcc, vcc, |
1165109b | 1254 | gcn_operand_part (<MODE>mode, operands[3], 1), |
3d6275e3 AS |
1255 | operands[4])); |
1256 | DONE; | |
1257 | } | |
1258 | [(set_attr "type" "vmult") | |
1259 | (set_attr "length" "8")]) | |
1260 | ||
1165109b AS |
1261 | (define_insn_and_split "sub<mode>3" |
1262 | [(set (match_operand:V_DI 0 "register_operand" "= v, v") | |
1263 | (minus:V_DI | |
1264 | (match_operand:V_DI 1 "gcn_alu_operand" "vDb, v") | |
1265 | (match_operand:V_DI 2 "gcn_alu_operand" " v,vDb"))) | |
3d6275e3 AS |
1266 | (clobber (reg:DI VCC_REG))] |
1267 | "" | |
1268 | "#" | |
1165109b AS |
1269 | "gcn_can_split_p (<MODE>mode, operands[0]) |
1270 | && gcn_can_split_p (<MODE>mode, operands[1]) | |
1271 | && gcn_can_split_p (<MODE>mode, operands[2])" | |
3d6275e3 AS |
1272 | [(const_int 0)] |
1273 | { | |
1274 | rtx vcc = gen_rtx_REG (DImode, VCC_REG); | |
1165109b AS |
1275 | emit_insn (gen_sub<vnsi>3_vcc |
1276 | (gcn_operand_part (<MODE>mode, operands[0], 0), | |
1277 | gcn_operand_part (<MODE>mode, operands[1], 0), | |
1278 | gcn_operand_part (<MODE>mode, operands[2], 0), | |
3d6275e3 | 1279 | vcc)); |
1165109b AS |
1280 | emit_insn (gen_subc<vnsi>3 |
1281 | (gcn_operand_part (<MODE>mode, operands[0], 1), | |
1282 | gcn_operand_part (<MODE>mode, operands[1], 1), | |
1283 | gcn_operand_part (<MODE>mode, operands[2], 1), | |
3d6275e3 AS |
1284 | vcc, vcc)); |
1285 | DONE; | |
1286 | } | |
1287 | [(set_attr "type" "vmult") | |
d54fc770 | 1288 | (set_attr "length" "8")]) |
3d6275e3 | 1289 | |
1165109b AS |
1290 | (define_insn_and_split "sub<mode>3_exec" |
1291 | [(set (match_operand:V_DI 0 "register_operand" "= v, v") | |
1292 | (vec_merge:V_DI | |
1293 | (minus:V_DI | |
1294 | (match_operand:V_DI 1 "gcn_alu_operand" "vSvB, v") | |
1295 | (match_operand:V_DI 2 "gcn_alu_operand" " v,vSvB")) | |
1296 | (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0, U0") | |
3abfd4f3 | 1297 | (match_operand:DI 4 "gcn_exec_reg_operand" " e, e"))) |
3d6275e3 AS |
1298 | (clobber (reg:DI VCC_REG))] |
1299 | "register_operand (operands[1], VOIDmode) | |
1300 | || register_operand (operands[2], VOIDmode)" | |
1301 | "#" | |
1165109b AS |
1302 | "gcn_can_split_p (<MODE>mode, operands[0]) |
1303 | && gcn_can_split_p (<MODE>mode, operands[1]) | |
1304 | && gcn_can_split_p (<MODE>mode, operands[2]) | |
1305 | && gcn_can_split_p (<MODE>mode, operands[3])" | |
3d6275e3 AS |
1306 | [(const_int 0)] |
1307 | { | |
1308 | rtx vcc = gen_rtx_REG (DImode, VCC_REG); | |
1165109b AS |
1309 | emit_insn (gen_sub<vnsi>3_vcc_exec |
1310 | (gcn_operand_part (<MODE>mode, operands[0], 0), | |
1311 | gcn_operand_part (<MODE>mode, operands[1], 0), | |
1312 | gcn_operand_part (<MODE>mode, operands[2], 0), | |
3d6275e3 | 1313 | vcc, |
1165109b | 1314 | gcn_operand_part (<MODE>mode, operands[3], 0), |
3d6275e3 | 1315 | operands[4])); |
1165109b AS |
1316 | emit_insn (gen_subc<vnsi>3_exec |
1317 | (gcn_operand_part (<MODE>mode, operands[0], 1), | |
1318 | gcn_operand_part (<MODE>mode, operands[1], 1), | |
1319 | gcn_operand_part (<MODE>mode, operands[2], 1), | |
3d6275e3 | 1320 | vcc, vcc, |
1165109b | 1321 | gcn_operand_part (<MODE>mode, operands[3], 1), |
3d6275e3 AS |
1322 | operands[4])); |
1323 | DONE; | |
1324 | } | |
1325 | [(set_attr "type" "vmult") | |
d54fc770 | 1326 | (set_attr "length" "8")]) |
3d6275e3 | 1327 | |
1165109b AS |
1328 | (define_insn_and_split "add<mode>3_zext" |
1329 | [(set (match_operand:V_DI 0 "register_operand" "= v, v") | |
1330 | (plus:V_DI | |
1331 | (zero_extend:V_DI | |
1332 | (match_operand:<VnSI> 1 "gcn_alu_operand" " vA, vB")) | |
1333 | (match_operand:V_DI 2 "gcn_alu_operand" "vDb,vDA"))) | |
3d6275e3 AS |
1334 | (clobber (reg:DI VCC_REG))] |
1335 | "" | |
1336 | "#" | |
1165109b AS |
1337 | "gcn_can_split_p (<MODE>mode, operands[0]) |
1338 | && gcn_can_split_p (<MODE>mode, operands[2])" | |
3d6275e3 AS |
1339 | [(const_int 0)] |
1340 | { | |
1341 | rtx vcc = gen_rtx_REG (DImode, VCC_REG); | |
1165109b AS |
1342 | emit_insn (gen_add<vnsi>3_vcc |
1343 | (gcn_operand_part (<MODE>mode, operands[0], 0), | |
3d6275e3 | 1344 | operands[1], |
1165109b | 1345 | gcn_operand_part (<MODE>mode, operands[2], 0), |
3d6275e3 | 1346 | vcc)); |
1165109b AS |
1347 | emit_insn (gen_addc<vnsi>3 |
1348 | (gcn_operand_part (<MODE>mode, operands[0], 1), | |
1349 | gcn_operand_part (<MODE>mode, operands[2], 1), | |
3d6275e3 AS |
1350 | const0_rtx, vcc, vcc)); |
1351 | DONE; | |
1352 | } | |
1353 | [(set_attr "type" "vmult") | |
66b01cc3 | 1354 | (set_attr "length" "8")]) |
3d6275e3 | 1355 | |
1165109b AS |
1356 | (define_insn_and_split "add<mode>3_zext_exec" |
1357 | [(set (match_operand:V_DI 0 "register_operand" "= v, v") | |
1358 | (vec_merge:V_DI | |
1359 | (plus:V_DI | |
1360 | (zero_extend:V_DI | |
1361 | (match_operand:<VnSI> 1 "gcn_alu_operand" " vA, vB")) | |
1362 | (match_operand:V_DI 2 "gcn_alu_operand" "vDb,vDA")) | |
1363 | (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0, U0") | |
1364 | (match_operand:DI 4 "gcn_exec_reg_operand" " e, e"))) | |
3d6275e3 AS |
1365 | (clobber (reg:DI VCC_REG))] |
1366 | "" | |
1367 | "#" | |
1165109b AS |
1368 | "gcn_can_split_p (<MODE>mode, operands[0]) |
1369 | && gcn_can_split_p (<MODE>mode, operands[2]) | |
1370 | && gcn_can_split_p (<MODE>mode, operands[3])" | |
3d6275e3 AS |
1371 | [(const_int 0)] |
1372 | { | |
1373 | rtx vcc = gen_rtx_REG (DImode, VCC_REG); | |
1165109b AS |
1374 | emit_insn (gen_add<vnsi>3_vcc_exec |
1375 | (gcn_operand_part (<MODE>mode, operands[0], 0), | |
3d6275e3 | 1376 | operands[1], |
1165109b | 1377 | gcn_operand_part (<MODE>mode, operands[2], 0), |
3d6275e3 | 1378 | vcc, |
1165109b | 1379 | gcn_operand_part (<MODE>mode, operands[3], 0), |
3d6275e3 | 1380 | operands[4])); |
1165109b AS |
1381 | emit_insn (gen_addc<vnsi>3_exec |
1382 | (gcn_operand_part (<MODE>mode, operands[0], 1), | |
1383 | gcn_operand_part (<MODE>mode, operands[2], 1), | |
3d6275e3 | 1384 | const0_rtx, vcc, vcc, |
1165109b | 1385 | gcn_operand_part (<MODE>mode, operands[3], 1), |
3d6275e3 AS |
1386 | operands[4])); |
1387 | DONE; | |
1388 | } | |
1389 | [(set_attr "type" "vmult") | |
66b01cc3 | 1390 | (set_attr "length" "8")]) |
3d6275e3 | 1391 | |
75d0b3d7 | 1392 | (define_insn_and_split "add<mode>3_vcc_zext_dup" |
961c2aac | 1393 | [(set (match_operand:V_DI 0 "register_operand" "= v, v") |
1165109b AS |
1394 | (plus:V_DI |
1395 | (zero_extend:V_DI | |
1396 | (vec_duplicate:<VnSI> | |
961c2aac AS |
1397 | (match_operand:SI 1 "gcn_alu_operand" " BSv, ASv"))) |
1398 | (match_operand:V_DI 2 "gcn_alu_operand" " vDA, vDb"))) | |
1399 | (set (match_operand:DI 3 "register_operand" "=&SgcV,&SgcV") | |
75d0b3d7 AS |
1400 | (ltu:DI (plus:V_DI |
1401 | (zero_extend:V_DI (vec_duplicate:<VnSI> (match_dup 1))) | |
1402 | (match_dup 2)) | |
1403 | (match_dup 1)))] | |
3d6275e3 AS |
1404 | "" |
1405 | "#" | |
1165109b AS |
1406 | "gcn_can_split_p (<MODE>mode, operands[0]) |
1407 | && gcn_can_split_p (<MODE>mode, operands[2])" | |
3d6275e3 AS |
1408 | [(const_int 0)] |
1409 | { | |
1165109b AS |
1410 | emit_insn (gen_add<vnsi>3_vcc_dup |
1411 | (gcn_operand_part (<MODE>mode, operands[0], 0), | |
3d6275e3 | 1412 | gcn_operand_part (DImode, operands[1], 0), |
1165109b | 1413 | gcn_operand_part (<MODE>mode, operands[2], 0), |
75d0b3d7 | 1414 | operands[3])); |
1165109b AS |
1415 | emit_insn (gen_addc<vnsi>3 |
1416 | (gcn_operand_part (<MODE>mode, operands[0], 1), | |
1417 | gcn_operand_part (<MODE>mode, operands[2], 1), | |
75d0b3d7 | 1418 | const0_rtx, operands[3], operands[3])); |
3d6275e3 AS |
1419 | DONE; |
1420 | } | |
1421 | [(set_attr "type" "vmult") | |
1422 | (set_attr "length" "8")]) | |
1423 | ||
75d0b3d7 AS |
1424 | (define_expand "add<mode>3_zext_dup" |
1425 | [(match_operand:V_DI 0 "register_operand") | |
1426 | (match_operand:SI 1 "gcn_alu_operand") | |
1427 | (match_operand:V_DI 2 "gcn_alu_operand")] | |
1428 | "" | |
1429 | { | |
1430 | rtx vcc = gen_rtx_REG (DImode, VCC_REG); | |
1431 | emit_insn (gen_add<mode>3_vcc_zext_dup (operands[0], operands[1], | |
1432 | operands[2], vcc)); | |
1433 | DONE; | |
1434 | }) | |
1435 | ||
1436 | (define_insn_and_split "add<mode>3_vcc_zext_dup_exec" | |
961c2aac | 1437 | [(set (match_operand:V_DI 0 "register_operand" "= v, v") |
1165109b AS |
1438 | (vec_merge:V_DI |
1439 | (plus:V_DI | |
1440 | (zero_extend:V_DI | |
1441 | (vec_duplicate:<VnSI> | |
961c2aac AS |
1442 | (match_operand:SI 1 "gcn_alu_operand" " ASv, BSv"))) |
1443 | (match_operand:V_DI 2 "gcn_alu_operand" " vDb, vDA")) | |
1444 | (match_operand:V_DI 4 "gcn_register_or_unspec_operand" " U0, U0") | |
1445 | (match_operand:DI 5 "gcn_exec_reg_operand" " e, e"))) | |
1446 | (set (match_operand:DI 3 "register_operand" "=&SgcV,&SgcV") | |
75d0b3d7 AS |
1447 | (and:DI |
1448 | (ltu:DI (plus:V_DI | |
1449 | (zero_extend:V_DI (vec_duplicate:<VnSI> (match_dup 1))) | |
1450 | (match_dup 2)) | |
1451 | (match_dup 1)) | |
1452 | (match_dup 5)))] | |
3d6275e3 AS |
1453 | "" |
1454 | "#" | |
1165109b AS |
1455 | "gcn_can_split_p (<MODE>mode, operands[0]) |
1456 | && gcn_can_split_p (<MODE>mode, operands[2]) | |
75d0b3d7 | 1457 | && gcn_can_split_p (<MODE>mode, operands[4])" |
3d6275e3 AS |
1458 | [(const_int 0)] |
1459 | { | |
1165109b AS |
1460 | emit_insn (gen_add<vnsi>3_vcc_dup_exec |
1461 | (gcn_operand_part (<MODE>mode, operands[0], 0), | |
3d6275e3 | 1462 | gcn_operand_part (DImode, operands[1], 0), |
1165109b | 1463 | gcn_operand_part (<MODE>mode, operands[2], 0), |
75d0b3d7 AS |
1464 | operands[3], |
1465 | gcn_operand_part (<MODE>mode, operands[4], 0), | |
1466 | operands[5])); | |
1165109b AS |
1467 | emit_insn (gen_addc<vnsi>3_exec |
1468 | (gcn_operand_part (<MODE>mode, operands[0], 1), | |
1469 | gcn_operand_part (<MODE>mode, operands[2], 1), | |
75d0b3d7 AS |
1470 | const0_rtx, operands[3], operands[3], |
1471 | gcn_operand_part (<MODE>mode, operands[4], 1), | |
1472 | operands[5])); | |
3d6275e3 AS |
1473 | DONE; |
1474 | } | |
1475 | [(set_attr "type" "vmult") | |
1476 | (set_attr "length" "8")]) | |
1477 | ||
75d0b3d7 AS |
1478 | (define_expand "add<mode>3_zext_dup_exec" |
1479 | [(match_operand:V_DI 0 "register_operand") | |
1480 | (match_operand:SI 1 "gcn_alu_operand") | |
1481 | (match_operand:V_DI 2 "gcn_alu_operand") | |
1482 | (match_operand:V_DI 3 "gcn_register_or_unspec_operand") | |
1483 | (match_operand:DI 4 "gcn_exec_reg_operand")] | |
1484 | "" | |
1485 | { | |
1486 | rtx vcc = gen_rtx_REG (DImode, VCC_REG); | |
1487 | emit_insn (gen_add<mode>3_vcc_zext_dup_exec (operands[0], operands[1], | |
1488 | operands[2], vcc, operands[3], | |
1489 | operands[4])); | |
1490 | DONE; | |
1491 | }) | |
1492 | ||
1493 | (define_insn_and_split "add<mode>3_vcc_zext_dup2" | |
961c2aac | 1494 | [(set (match_operand:V_DI 0 "register_operand" "= v") |
1165109b AS |
1495 | (plus:V_DI |
1496 | (zero_extend:V_DI (match_operand:<VnSI> 1 "gcn_alu_operand" " vA")) | |
75d0b3d7 | 1497 | (vec_duplicate:V_DI (match_operand:DI 2 "gcn_alu_operand" " DbSv")))) |
961c2aac | 1498 | (set (match_operand:DI 3 "register_operand" "=&SgcV") |
75d0b3d7 AS |
1499 | (ltu:DI (plus:V_DI |
1500 | (zero_extend:V_DI (match_dup 1)) | |
1501 | (vec_duplicate:V_DI (match_dup 2))) | |
1502 | (match_dup 1)))] | |
3d6275e3 AS |
1503 | "" |
1504 | "#" | |
1165109b | 1505 | "gcn_can_split_p (<MODE>mode, operands[0])" |
3d6275e3 AS |
1506 | [(const_int 0)] |
1507 | { | |
1165109b AS |
1508 | emit_insn (gen_add<vnsi>3_vcc_dup |
1509 | (gcn_operand_part (<MODE>mode, operands[0], 0), | |
3d6275e3 AS |
1510 | gcn_operand_part (DImode, operands[2], 0), |
1511 | operands[1], | |
75d0b3d7 | 1512 | operands[3])); |
1165109b AS |
1513 | rtx dsthi = gcn_operand_part (<MODE>mode, operands[0], 1); |
1514 | emit_insn (gen_vec_duplicate<vnsi> | |
3d6275e3 | 1515 | (dsthi, gcn_operand_part (DImode, operands[2], 1))); |
75d0b3d7 AS |
1516 | emit_insn (gen_addc<vnsi>3 (dsthi, dsthi, const0_rtx, operands[3], |
1517 | operands[3])); | |
3d6275e3 AS |
1518 | DONE; |
1519 | } | |
1520 | [(set_attr "type" "vmult") | |
1521 | (set_attr "length" "8")]) | |
1522 | ||
75d0b3d7 AS |
1523 | (define_expand "add<mode>3_zext_dup2" |
1524 | [(match_operand:V_DI 0 "register_operand") | |
1525 | (match_operand:<VnSI> 1 "gcn_alu_operand") | |
1526 | (match_operand:DI 2 "gcn_alu_operand")] | |
1527 | "" | |
1528 | { | |
1529 | rtx vcc = gen_rtx_REG (DImode, VCC_REG); | |
1530 | emit_insn (gen_add<mode>3_vcc_zext_dup2 (operands[0], operands[1], | |
1531 | operands[2], vcc)); | |
1532 | DONE; | |
1533 | }) | |
1534 | ||
1535 | (define_insn_and_split "add<mode>3_vcc_zext_dup2_exec" | |
961c2aac | 1536 | [(set (match_operand:V_DI 0 "register_operand" "= v") |
1165109b AS |
1537 | (vec_merge:V_DI |
1538 | (plus:V_DI | |
1539 | (zero_extend:V_DI (match_operand:<VnSI> 1 "gcn_alu_operand" "vA")) | |
1540 | (vec_duplicate:V_DI (match_operand:DI 2 "gcn_alu_operand" "BSv"))) | |
961c2aac AS |
1541 | (match_operand:V_DI 4 "gcn_register_or_unspec_operand" " U0") |
1542 | (match_operand:DI 5 "gcn_exec_reg_operand" " e"))) | |
1543 | (set (match_operand:DI 3 "register_operand" "=&SgcV") | |
75d0b3d7 AS |
1544 | (and:DI |
1545 | (ltu:DI (plus:V_DI | |
1546 | (zero_extend:V_DI (match_dup 1)) | |
1547 | (vec_duplicate:V_DI (match_dup 2))) | |
1548 | (match_dup 1)) | |
1549 | (match_dup 5)))] | |
3d6275e3 AS |
1550 | "" |
1551 | "#" | |
1165109b | 1552 | "gcn_can_split_p (<MODE>mode, operands[0]) |
75d0b3d7 | 1553 | && gcn_can_split_p (<MODE>mode, operands[4])" |
3d6275e3 AS |
1554 | [(const_int 0)] |
1555 | { | |
1165109b AS |
1556 | emit_insn (gen_add<vnsi>3_vcc_dup_exec |
1557 | (gcn_operand_part (<MODE>mode, operands[0], 0), | |
3d6275e3 AS |
1558 | gcn_operand_part (DImode, operands[2], 0), |
1559 | operands[1], | |
75d0b3d7 AS |
1560 | operands[3], |
1561 | gcn_operand_part (<MODE>mode, operands[4], 0), | |
1562 | operands[5])); | |
1165109b AS |
1563 | rtx dsthi = gcn_operand_part (<MODE>mode, operands[0], 1); |
1564 | emit_insn (gen_vec_duplicate<vnsi>_exec | |
3d6275e3 | 1565 | (dsthi, gcn_operand_part (DImode, operands[2], 1), |
75d0b3d7 AS |
1566 | gcn_operand_part (<MODE>mode, operands[4], 1), |
1567 | operands[5])); | |
1165109b | 1568 | emit_insn (gen_addc<vnsi>3_exec |
75d0b3d7 AS |
1569 | (dsthi, dsthi, const0_rtx, operands[3], operands[3], |
1570 | gcn_operand_part (<MODE>mode, operands[4], 1), | |
1571 | operands[5])); | |
3d6275e3 AS |
1572 | DONE; |
1573 | } | |
1574 | [(set_attr "type" "vmult") | |
1575 | (set_attr "length" "8")]) | |
1576 | ||
75d0b3d7 AS |
1577 | (define_expand "add<mode>3_zext_dup2_exec" |
1578 | [(match_operand:V_DI 0 "register_operand") | |
1579 | (match_operand:<VnSI> 1 "gcn_alu_operand") | |
1580 | (match_operand:DI 2 "gcn_alu_operand") | |
1581 | (match_operand:V_DI 3 "gcn_register_or_unspec_operand") | |
1582 | (match_operand:DI 4 "gcn_exec_reg_operand")] | |
1583 | "" | |
1584 | { | |
1585 | rtx vcc = gen_rtx_REG (DImode, VCC_REG); | |
1586 | emit_insn (gen_add<mode>3_vcc_zext_dup2_exec (operands[0], operands[1], | |
1587 | operands[2], vcc, | |
1588 | operands[3], operands[4])); | |
1589 | DONE; | |
1590 | }) | |
1591 | ||
1165109b AS |
1592 | (define_insn_and_split "add<mode>3_sext_dup2" |
1593 | [(set (match_operand:V_DI 0 "register_operand" "= v") | |
1594 | (plus:V_DI | |
1595 | (sign_extend:V_DI (match_operand:<VnSI> 1 "gcn_alu_operand" " vA")) | |
1596 | (vec_duplicate:V_DI (match_operand:DI 2 "gcn_alu_operand" "BSv")))) | |
1597 | (clobber (match_scratch:<VnSI> 3 "=&v")) | |
3d6275e3 AS |
1598 | (clobber (reg:DI VCC_REG))] |
1599 | "" | |
1600 | "#" | |
1165109b | 1601 | "gcn_can_split_p (<MODE>mode, operands[0])" |
3d6275e3 AS |
1602 | [(const_int 0)] |
1603 | { | |
1604 | rtx vcc = gen_rtx_REG (DImode, VCC_REG); | |
1165109b AS |
1605 | emit_insn (gen_ashr<vnsi>3 (operands[3], operands[1], GEN_INT (31))); |
1606 | emit_insn (gen_add<vnsi>3_vcc_dup | |
1607 | (gcn_operand_part (<MODE>mode, operands[0], 0), | |
3d6275e3 AS |
1608 | gcn_operand_part (DImode, operands[2], 0), |
1609 | operands[1], | |
1610 | vcc)); | |
1165109b AS |
1611 | rtx dsthi = gcn_operand_part (<MODE>mode, operands[0], 1); |
1612 | emit_insn (gen_vec_duplicate<vnsi> | |
3d6275e3 | 1613 | (dsthi, gcn_operand_part (DImode, operands[2], 1))); |
1165109b | 1614 | emit_insn (gen_addc<vnsi>3 (dsthi, dsthi, operands[3], vcc, vcc)); |
3d6275e3 AS |
1615 | DONE; |
1616 | } | |
1617 | [(set_attr "type" "vmult") | |
1618 | (set_attr "length" "8")]) | |
1619 | ||
1165109b AS |
1620 | (define_insn_and_split "add<mode>3_sext_dup2_exec" |
1621 | [(set (match_operand:V_DI 0 "register_operand" "= v") | |
1622 | (vec_merge:V_DI | |
1623 | (plus:V_DI | |
1624 | (sign_extend:V_DI (match_operand:<VnSI> 1 "gcn_alu_operand" "vA")) | |
1625 | (vec_duplicate:V_DI (match_operand:DI 2 "gcn_alu_operand" "BSv"))) | |
1626 | (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0") | |
3d6275e3 | 1627 | (match_operand:DI 4 "gcn_exec_reg_operand" " e"))) |
1165109b | 1628 | (clobber (match_scratch:<VnSI> 5 "=&v")) |
3d6275e3 AS |
1629 | (clobber (reg:DI VCC_REG))] |
1630 | "" | |
1631 | "#" | |
1165109b AS |
1632 | "gcn_can_split_p (<MODE>mode, operands[0]) |
1633 | && gcn_can_split_p (<MODE>mode, operands[3])" | |
3d6275e3 AS |
1634 | [(const_int 0)] |
1635 | { | |
1636 | rtx vcc = gen_rtx_REG (DImode, VCC_REG); | |
1165109b AS |
1637 | emit_insn (gen_ashr<vnsi>3_exec (operands[5], operands[1], GEN_INT (31), |
1638 | gcn_gen_undef (<VnSI>mode), operands[4])); | |
1639 | emit_insn (gen_add<vnsi>3_vcc_dup_exec | |
1640 | (gcn_operand_part (<MODE>mode, operands[0], 0), | |
3d6275e3 AS |
1641 | gcn_operand_part (DImode, operands[2], 0), |
1642 | operands[1], | |
1643 | vcc, | |
1165109b | 1644 | gcn_operand_part (<MODE>mode, operands[3], 0), |
3d6275e3 | 1645 | operands[4])); |
1165109b AS |
1646 | rtx dsthi = gcn_operand_part (<MODE>mode, operands[0], 1); |
1647 | emit_insn (gen_vec_duplicate<vnsi>_exec | |
3d6275e3 | 1648 | (dsthi, gcn_operand_part (DImode, operands[2], 1), |
28b733ea AS |
1649 | gcn_operand_part (<MODE>mode, operands[3], 1), |
1650 | operands[4])); | |
1165109b | 1651 | emit_insn (gen_addc<vnsi>3_exec |
3d6275e3 | 1652 | (dsthi, dsthi, operands[5], vcc, vcc, |
1165109b | 1653 | gcn_operand_part (<MODE>mode, operands[3], 1), |
3d6275e3 AS |
1654 | operands[4])); |
1655 | DONE; | |
1656 | } | |
1657 | [(set_attr "type" "vmult") | |
1658 | (set_attr "length" "8")]) | |
1659 | ||
1660 | ;; }}} | |
1661 | ;; {{{ DS memory ALU: add/sub | |
1662 | ||
1663 | (define_mode_iterator DS_ARITH_MODE [V64SI V64SF V64DI]) | |
1664 | (define_mode_iterator DS_ARITH_SCALAR_MODE [SI SF DI]) | |
1665 | ||
1666 | ;; FIXME: the vector patterns probably need RD expanded to a vector of | |
1667 | ;; addresses. For now, the only way a vector can get into LDS is | |
1668 | ;; if the user puts it there manually. | |
1669 | ;; | |
1670 | ;; FIXME: the scalar patterns are probably fine in themselves, but need to be | |
1671 | ;; checked to see if anything can ever use them. | |
1672 | ||
1673 | (define_insn "add<mode>3_ds<exec>" | |
1674 | [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD") | |
1675 | (plus:DS_ARITH_MODE | |
1676 | (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" "%RD") | |
1677 | (match_operand:DS_ARITH_MODE 2 "register_operand" " v")))] | |
1678 | "rtx_equal_p (operands[0], operands[1])" | |
1679 | "ds_add%u0\t%A0, %2%O0" | |
1680 | [(set_attr "type" "ds") | |
1681 | (set_attr "length" "8")]) | |
1682 | ||
1683 | (define_insn "add<mode>3_ds_scalar" | |
1684 | [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD") | |
1685 | (plus:DS_ARITH_SCALAR_MODE | |
1686 | (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand" | |
1687 | "%RD") | |
1688 | (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")))] | |
1689 | "rtx_equal_p (operands[0], operands[1])" | |
1690 | "ds_add%u0\t%A0, %2%O0" | |
1691 | [(set_attr "type" "ds") | |
1692 | (set_attr "length" "8")]) | |
1693 | ||
1694 | (define_insn "sub<mode>3_ds<exec>" | |
1695 | [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD") | |
1696 | (minus:DS_ARITH_MODE | |
1697 | (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" " RD") | |
1698 | (match_operand:DS_ARITH_MODE 2 "register_operand" " v")))] | |
1699 | "rtx_equal_p (operands[0], operands[1])" | |
1700 | "ds_sub%u0\t%A0, %2%O0" | |
1701 | [(set_attr "type" "ds") | |
1702 | (set_attr "length" "8")]) | |
1703 | ||
1704 | (define_insn "sub<mode>3_ds_scalar" | |
1705 | [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD") | |
1706 | (minus:DS_ARITH_SCALAR_MODE | |
1707 | (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand" | |
1708 | " RD") | |
1709 | (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")))] | |
1710 | "rtx_equal_p (operands[0], operands[1])" | |
1711 | "ds_sub%u0\t%A0, %2%O0" | |
1712 | [(set_attr "type" "ds") | |
1713 | (set_attr "length" "8")]) | |
1714 | ||
1715 | (define_insn "subr<mode>3_ds<exec>" | |
1716 | [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD") | |
1717 | (minus:DS_ARITH_MODE | |
1718 | (match_operand:DS_ARITH_MODE 2 "register_operand" " v") | |
1719 | (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" " RD")))] | |
1720 | "rtx_equal_p (operands[0], operands[1])" | |
1721 | "ds_rsub%u0\t%A0, %2%O0" | |
1722 | [(set_attr "type" "ds") | |
1723 | (set_attr "length" "8")]) | |
1724 | ||
1725 | (define_insn "subr<mode>3_ds_scalar" | |
1726 | [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD") | |
1727 | (minus:DS_ARITH_SCALAR_MODE | |
1728 | (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v") | |
1729 | (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand" | |
1730 | " RD")))] | |
1731 | "rtx_equal_p (operands[0], operands[1])" | |
1732 | "ds_rsub%u0\t%A0, %2%O0" | |
1733 | [(set_attr "type" "ds") | |
1734 | (set_attr "length" "8")]) | |
1735 | ||
1736 | ;; }}} | |
1737 | ;; {{{ ALU special case: mult | |
1738 | ||
1165109b AS |
1739 | (define_insn "<su>mul<mode>3_highpart<exec>" |
1740 | [(set (match_operand:V_SI 0 "register_operand" "= v") | |
1741 | (truncate:V_SI | |
1742 | (lshiftrt:<VnDI> | |
1743 | (mult:<VnDI> | |
1744 | (any_extend:<VnDI> | |
1745 | (match_operand:V_SI 1 "gcn_alu_operand" " %v")) | |
1746 | (any_extend:<VnDI> | |
1747 | (match_operand:V_SI 2 "gcn_alu_operand" "vSvA"))) | |
3d6275e3 AS |
1748 | (const_int 32))))] |
1749 | "" | |
1750 | "v_mul_hi<sgnsuffix>0\t%0, %2, %1" | |
1751 | [(set_attr "type" "vop3a") | |
1752 | (set_attr "length" "8")]) | |
1753 | ||
7b945b19 | 1754 | (define_insn "mul<mode>3<exec>" |
03876953 AS |
1755 | [(set (match_operand:V_INT_1REG 0 "register_operand" "= v") |
1756 | (mult:V_INT_1REG | |
1757 | (match_operand:V_INT_1REG 1 "gcn_alu_operand" "%vSvA") | |
1758 | (match_operand:V_INT_1REG 2 "gcn_alu_operand" " vSvA")))] | |
3d6275e3 AS |
1759 | "" |
1760 | "v_mul_lo_u32\t%0, %1, %2" | |
1761 | [(set_attr "type" "vop3a") | |
1762 | (set_attr "length" "8")]) | |
1763 | ||
7b945b19 | 1764 | (define_insn "mul<mode>3_dup<exec>" |
03876953 AS |
1765 | [(set (match_operand:V_INT_1REG 0 "register_operand" "= v") |
1766 | (mult:V_INT_1REG | |
1767 | (match_operand:V_INT_1REG 1 "gcn_alu_operand" "%vSvA") | |
1768 | (vec_duplicate:V_INT_1REG | |
1769 | (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand" " SvA"))))] | |
3d6275e3 AS |
1770 | "" |
1771 | "v_mul_lo_u32\t%0, %1, %2" | |
1772 | [(set_attr "type" "vop3a") | |
1773 | (set_attr "length" "8")]) | |
1774 | ||
1165109b AS |
1775 | (define_insn_and_split "mul<mode>3" |
1776 | [(set (match_operand:V_DI 0 "register_operand" "=&v") | |
1777 | (mult:V_DI | |
1778 | (match_operand:V_DI 1 "gcn_alu_operand" "% v") | |
1779 | (match_operand:V_DI 2 "gcn_alu_operand" "vDA"))) | |
1780 | (clobber (match_scratch:<VnSI> 3 "=&v"))] | |
3d6275e3 AS |
1781 | "" |
1782 | "#" | |
1783 | "reload_completed" | |
1784 | [(const_int 0)] | |
1785 | { | |
1165109b AS |
1786 | rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0); |
1787 | rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1); | |
1788 | rtx left_lo = gcn_operand_part (<MODE>mode, operands[1], 0); | |
1789 | rtx left_hi = gcn_operand_part (<MODE>mode, operands[1], 1); | |
1790 | rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0); | |
1791 | rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1); | |
3d6275e3 AS |
1792 | rtx tmp = operands[3]; |
1793 | ||
1165109b AS |
1794 | emit_insn (gen_mul<vnsi>3 (out_lo, left_lo, right_lo)); |
1795 | emit_insn (gen_umul<vnsi>3_highpart (out_hi, left_lo, right_lo)); | |
1796 | emit_insn (gen_mul<vnsi>3 (tmp, left_hi, right_lo)); | |
1797 | emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp)); | |
1798 | emit_insn (gen_mul<vnsi>3 (tmp, left_lo, right_hi)); | |
1799 | emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp)); | |
1800 | emit_insn (gen_mul<vnsi>3 (tmp, left_hi, right_hi)); | |
1801 | emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp)); | |
3d6275e3 AS |
1802 | DONE; |
1803 | }) | |
1804 | ||
1165109b AS |
1805 | (define_insn_and_split "mul<mode>3_exec" |
1806 | [(set (match_operand:V_DI 0 "register_operand" "=&v") | |
1807 | (vec_merge:V_DI | |
1808 | (mult:V_DI | |
1809 | (match_operand:V_DI 1 "gcn_alu_operand" "% v") | |
1810 | (match_operand:V_DI 2 "gcn_alu_operand" "vDA")) | |
1811 | (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0") | |
1812 | (match_operand:DI 4 "gcn_exec_reg_operand" " e"))) | |
1813 | (clobber (match_scratch:<VnSI> 5 "=&v"))] | |
3d6275e3 AS |
1814 | "" |
1815 | "#" | |
1816 | "reload_completed" | |
1817 | [(const_int 0)] | |
1818 | { | |
1165109b AS |
1819 | rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0); |
1820 | rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1); | |
1821 | rtx left_lo = gcn_operand_part (<MODE>mode, operands[1], 0); | |
1822 | rtx left_hi = gcn_operand_part (<MODE>mode, operands[1], 1); | |
1823 | rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0); | |
1824 | rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1); | |
3d6275e3 AS |
1825 | rtx exec = operands[4]; |
1826 | rtx tmp = operands[5]; | |
1827 | ||
1828 | rtx old_lo, old_hi; | |
1829 | if (GET_CODE (operands[3]) == UNSPEC) | |
1830 | { | |
1165109b | 1831 | old_lo = old_hi = gcn_gen_undef (<VnSI>mode); |
3d6275e3 AS |
1832 | } |
1833 | else | |
1834 | { | |
1165109b AS |
1835 | old_lo = gcn_operand_part (<MODE>mode, operands[3], 0); |
1836 | old_hi = gcn_operand_part (<MODE>mode, operands[3], 1); | |
3d6275e3 AS |
1837 | } |
1838 | ||
1165109b AS |
1839 | rtx undef = gcn_gen_undef (<VnSI>mode); |
1840 | ||
1841 | emit_insn (gen_mul<vnsi>3_exec (out_lo, left_lo, right_lo, old_lo, exec)); | |
1842 | emit_insn (gen_umul<vnsi>3_highpart_exec (out_hi, left_lo, right_lo, | |
1843 | old_hi, exec)); | |
1844 | emit_insn (gen_mul<vnsi>3_exec (tmp, left_hi, right_lo, undef, exec)); | |
1845 | emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec)); | |
1846 | emit_insn (gen_mul<vnsi>3_exec (tmp, left_lo, right_hi, undef, exec)); | |
1847 | emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec)); | |
1848 | emit_insn (gen_mul<vnsi>3_exec (tmp, left_hi, right_hi, undef, exec)); | |
1849 | emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec)); | |
3d6275e3 AS |
1850 | DONE; |
1851 | }) | |
1852 | ||
1165109b AS |
1853 | (define_insn_and_split "mul<mode>3_zext" |
1854 | [(set (match_operand:V_DI 0 "register_operand" "=&v") | |
1855 | (mult:V_DI | |
1856 | (zero_extend:V_DI | |
1857 | (match_operand:<VnSI> 1 "gcn_alu_operand" " v")) | |
1858 | (match_operand:V_DI 2 "gcn_alu_operand" "vDA"))) | |
1859 | (clobber (match_scratch:<VnSI> 3 "=&v"))] | |
3d6275e3 AS |
1860 | "" |
1861 | "#" | |
1862 | "reload_completed" | |
1863 | [(const_int 0)] | |
1864 | { | |
1165109b AS |
1865 | rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0); |
1866 | rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1); | |
3d6275e3 | 1867 | rtx left = operands[1]; |
1165109b AS |
1868 | rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0); |
1869 | rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1); | |
3d6275e3 AS |
1870 | rtx tmp = operands[3]; |
1871 | ||
1165109b AS |
1872 | emit_insn (gen_mul<vnsi>3 (out_lo, left, right_lo)); |
1873 | emit_insn (gen_umul<vnsi>3_highpart (out_hi, left, right_lo)); | |
1874 | emit_insn (gen_mul<vnsi>3 (tmp, left, right_hi)); | |
1875 | emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp)); | |
3d6275e3 AS |
1876 | DONE; |
1877 | }) | |
1878 | ||
1165109b AS |
1879 | (define_insn_and_split "mul<mode>3_zext_exec" |
1880 | [(set (match_operand:V_DI 0 "register_operand" "=&v") | |
1881 | (vec_merge:V_DI | |
1882 | (mult:V_DI | |
1883 | (zero_extend:V_DI | |
1884 | (match_operand:<VnSI> 1 "gcn_alu_operand" " v")) | |
1885 | (match_operand:V_DI 2 "gcn_alu_operand" "vDA")) | |
1886 | (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0") | |
1887 | (match_operand:DI 4 "gcn_exec_reg_operand" " e"))) | |
1888 | (clobber (match_scratch:<VnSI> 5 "=&v"))] | |
3d6275e3 AS |
1889 | "" |
1890 | "#" | |
1891 | "reload_completed" | |
1892 | [(const_int 0)] | |
1893 | { | |
1165109b AS |
1894 | rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0); |
1895 | rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1); | |
3d6275e3 | 1896 | rtx left = operands[1]; |
1165109b AS |
1897 | rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0); |
1898 | rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1); | |
3d6275e3 AS |
1899 | rtx exec = operands[4]; |
1900 | rtx tmp = operands[5]; | |
1901 | ||
1902 | rtx old_lo, old_hi; | |
1903 | if (GET_CODE (operands[3]) == UNSPEC) | |
1904 | { | |
1165109b | 1905 | old_lo = old_hi = gcn_gen_undef (<VnSI>mode); |
3d6275e3 AS |
1906 | } |
1907 | else | |
1908 | { | |
1165109b AS |
1909 | old_lo = gcn_operand_part (<MODE>mode, operands[3], 0); |
1910 | old_hi = gcn_operand_part (<MODE>mode, operands[3], 1); | |
3d6275e3 AS |
1911 | } |
1912 | ||
1165109b | 1913 | rtx undef = gcn_gen_undef (<VnSI>mode); |
3d6275e3 | 1914 | |
1165109b AS |
1915 | emit_insn (gen_mul<vnsi>3_exec (out_lo, left, right_lo, old_lo, exec)); |
1916 | emit_insn (gen_umul<vnsi>3_highpart_exec (out_hi, left, right_lo, | |
1917 | old_hi, exec)); | |
1918 | emit_insn (gen_mul<vnsi>3_exec (tmp, left, right_hi, undef, exec)); | |
1919 | emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec)); | |
3d6275e3 AS |
1920 | DONE; |
1921 | }) | |
1922 | ||
1165109b AS |
1923 | (define_insn_and_split "mul<mode>3_zext_dup2" |
1924 | [(set (match_operand:V_DI 0 "register_operand" "= &v") | |
1925 | (mult:V_DI | |
1926 | (zero_extend:V_DI | |
1927 | (match_operand:<VnSI> 1 "gcn_alu_operand" " v")) | |
1928 | (vec_duplicate:V_DI | |
1929 | (match_operand:DI 2 "gcn_alu_operand" "SvDA")))) | |
1930 | (clobber (match_scratch:<VnSI> 3 "= &v"))] | |
3d6275e3 AS |
1931 | "" |
1932 | "#" | |
1933 | "reload_completed" | |
1934 | [(const_int 0)] | |
1935 | { | |
1165109b AS |
1936 | rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0); |
1937 | rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1); | |
3d6275e3 | 1938 | rtx left = operands[1]; |
1165109b AS |
1939 | rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0); |
1940 | rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1); | |
3d6275e3 AS |
1941 | rtx tmp = operands[3]; |
1942 | ||
1165109b AS |
1943 | emit_insn (gen_mul<vnsi>3 (out_lo, left, right_lo)); |
1944 | emit_insn (gen_umul<vnsi>3_highpart (out_hi, left, right_lo)); | |
1945 | emit_insn (gen_mul<vnsi>3 (tmp, left, right_hi)); | |
1946 | emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp)); | |
3d6275e3 AS |
1947 | DONE; |
1948 | }) | |
1949 | ||
1165109b AS |
1950 | (define_insn_and_split "mul<mode>3_zext_dup2_exec" |
1951 | [(set (match_operand:V_DI 0 "register_operand" "= &v") | |
1952 | (vec_merge:V_DI | |
1953 | (mult:V_DI | |
1954 | (zero_extend:V_DI | |
1955 | (match_operand:<VnSI> 1 "gcn_alu_operand" " v")) | |
1956 | (vec_duplicate:V_DI | |
1957 | (match_operand:DI 2 "gcn_alu_operand" "SvDA"))) | |
1958 | (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0") | |
1959 | (match_operand:DI 4 "gcn_exec_reg_operand" " e"))) | |
1960 | (clobber (match_scratch:<VnSI> 5 "= &v"))] | |
3d6275e3 AS |
1961 | "" |
1962 | "#" | |
1963 | "reload_completed" | |
1964 | [(const_int 0)] | |
1965 | { | |
1165109b AS |
1966 | rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0); |
1967 | rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1); | |
3d6275e3 | 1968 | rtx left = operands[1]; |
1165109b AS |
1969 | rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0); |
1970 | rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1); | |
3d6275e3 AS |
1971 | rtx exec = operands[4]; |
1972 | rtx tmp = operands[5]; | |
1973 | ||
1974 | rtx old_lo, old_hi; | |
1975 | if (GET_CODE (operands[3]) == UNSPEC) | |
1976 | { | |
1165109b | 1977 | old_lo = old_hi = gcn_gen_undef (<VnSI>mode); |
3d6275e3 AS |
1978 | } |
1979 | else | |
1980 | { | |
1165109b AS |
1981 | old_lo = gcn_operand_part (<MODE>mode, operands[3], 0); |
1982 | old_hi = gcn_operand_part (<MODE>mode, operands[3], 1); | |
3d6275e3 AS |
1983 | } |
1984 | ||
1165109b | 1985 | rtx undef = gcn_gen_undef (<VnSI>mode); |
3d6275e3 | 1986 | |
1165109b AS |
1987 | emit_insn (gen_mul<vnsi>3_exec (out_lo, left, right_lo, old_lo, exec)); |
1988 | emit_insn (gen_umul<vnsi>3_highpart_exec (out_hi, left, right_lo, | |
1989 | old_hi, exec)); | |
1990 | emit_insn (gen_mul<vnsi>3_exec (tmp, left, right_hi, undef, exec)); | |
1991 | emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec)); | |
3d6275e3 AS |
1992 | DONE; |
1993 | }) | |
1994 | ||
1995 | ;; }}} | |
1996 | ;; {{{ ALU generic case | |
1997 | ||
3d6275e3 AS |
1998 | (define_code_iterator bitop [and ior xor]) |
1999 | (define_code_iterator shiftop [ashift lshiftrt ashiftrt]) | |
2000 | (define_code_iterator minmaxop [smin smax umin umax]) | |
2001 | ||
2002 | (define_insn "<expander><mode>2<exec>" | |
03876953 AS |
2003 | [(set (match_operand:V_INT_1REG 0 "gcn_valu_dst_operand" "= v") |
2004 | (bitunop:V_INT_1REG | |
2005 | (match_operand:V_INT_1REG 1 "gcn_valu_src0_operand" "vSvB")))] | |
3d6275e3 AS |
2006 | "" |
2007 | "v_<mnemonic>0\t%0, %1" | |
2008 | [(set_attr "type" "vop1") | |
2009 | (set_attr "length" "8")]) | |
2010 | ||
2011 | (define_insn "<expander><mode>3<exec>" | |
03876953 AS |
2012 | [(set (match_operand:V_INT_1REG 0 "gcn_valu_dst_operand" "= v,RD") |
2013 | (bitop:V_INT_1REG | |
2014 | (match_operand:V_INT_1REG 1 "gcn_valu_src0_operand" "% v, 0") | |
2015 | (match_operand:V_INT_1REG 2 "gcn_valu_src1com_operand" "vSvB, v")))] | |
3d6275e3 AS |
2016 | "" |
2017 | "@ | |
2018 | v_<mnemonic>0\t%0, %2, %1 | |
2019 | ds_<mnemonic>0\t%A0, %2%O0" | |
2020 | [(set_attr "type" "vop2,ds") | |
2021 | (set_attr "length" "8,8")]) | |
2022 | ||
1165109b AS |
2023 | (define_insn_and_split "<expander><mode>3" |
2024 | [(set (match_operand:V_DI 0 "gcn_valu_dst_operand" "= v,RD") | |
2025 | (bitop:V_DI | |
2026 | (match_operand:V_DI 1 "gcn_valu_src0_operand" "% v,RD") | |
2027 | (match_operand:V_DI 2 "gcn_valu_src1com_operand" "vSvB, v")))] | |
3d6275e3 AS |
2028 | "" |
2029 | "@ | |
2030 | # | |
2031 | ds_<mnemonic>0\t%A0, %2%O0" | |
1165109b | 2032 | "(reload_completed && !gcn_ds_memory_operand (operands[0], <MODE>mode))" |
3d6275e3 | 2033 | [(set (match_dup 3) |
1165109b | 2034 | (bitop:<VnSI> (match_dup 5) (match_dup 7))) |
3d6275e3 | 2035 | (set (match_dup 4) |
1165109b AS |
2036 | (bitop:<VnSI> (match_dup 6) (match_dup 8)))] |
2037 | { | |
2038 | operands[3] = gcn_operand_part (<MODE>mode, operands[0], 0); | |
2039 | operands[4] = gcn_operand_part (<MODE>mode, operands[0], 1); | |
2040 | operands[5] = gcn_operand_part (<MODE>mode, operands[1], 0); | |
2041 | operands[6] = gcn_operand_part (<MODE>mode, operands[1], 1); | |
2042 | operands[7] = gcn_operand_part (<MODE>mode, operands[2], 0); | |
2043 | operands[8] = gcn_operand_part (<MODE>mode, operands[2], 1); | |
3d6275e3 AS |
2044 | } |
2045 | [(set_attr "type" "vmult,ds") | |
2046 | (set_attr "length" "16,8")]) | |
2047 | ||
1165109b AS |
2048 | (define_insn_and_split "<expander><mode>3_exec" |
2049 | [(set (match_operand:V_DI 0 "gcn_valu_dst_operand" "= v,RD") | |
2050 | (vec_merge:V_DI | |
2051 | (bitop:V_DI | |
2052 | (match_operand:V_DI 1 "gcn_valu_src0_operand" "% v,RD") | |
2053 | (match_operand:V_DI 2 "gcn_valu_src1com_operand" "vSvB, v")) | |
2054 | (match_operand:V_DI 3 "gcn_register_ds_or_unspec_operand" "U0,U0") | |
3d6275e3 AS |
2055 | (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))] |
2056 | "!memory_operand (operands[0], VOIDmode) | |
2057 | || (rtx_equal_p (operands[0], operands[1]) | |
2058 | && register_operand (operands[2], VOIDmode))" | |
2059 | "@ | |
2060 | # | |
2061 | ds_<mnemonic>0\t%A0, %2%O0" | |
1165109b | 2062 | "(reload_completed && !gcn_ds_memory_operand (operands[0], <MODE>mode))" |
3d6275e3 | 2063 | [(set (match_dup 5) |
1165109b AS |
2064 | (vec_merge:<VnSI> |
2065 | (bitop:<VnSI> (match_dup 7) (match_dup 9)) | |
3d6275e3 AS |
2066 | (match_dup 11) |
2067 | (match_dup 4))) | |
2068 | (set (match_dup 6) | |
1165109b AS |
2069 | (vec_merge:<VnSI> |
2070 | (bitop:<VnSI> (match_dup 8) (match_dup 10)) | |
3d6275e3 AS |
2071 | (match_dup 12) |
2072 | (match_dup 4)))] | |
2073 | { | |
1165109b AS |
2074 | operands[5] = gcn_operand_part (<MODE>mode, operands[0], 0); |
2075 | operands[6] = gcn_operand_part (<MODE>mode, operands[0], 1); | |
2076 | operands[7] = gcn_operand_part (<MODE>mode, operands[1], 0); | |
2077 | operands[8] = gcn_operand_part (<MODE>mode, operands[1], 1); | |
2078 | operands[9] = gcn_operand_part (<MODE>mode, operands[2], 0); | |
2079 | operands[10] = gcn_operand_part (<MODE>mode, operands[2], 1); | |
2080 | operands[11] = gcn_operand_part (<MODE>mode, operands[3], 0); | |
2081 | operands[12] = gcn_operand_part (<MODE>mode, operands[3], 1); | |
3d6275e3 AS |
2082 | } |
2083 | [(set_attr "type" "vmult,ds") | |
2084 | (set_attr "length" "16,8")]) | |
2085 | ||
dc941ea9 | 2086 | (define_expand "<expander><mode>3" |
03876953 AS |
2087 | [(set (match_operand:V_QIHI 0 "register_operand" "= v") |
2088 | (shiftop:V_QIHI | |
2089 | (match_operand:V_QIHI 1 "gcn_alu_operand" " v") | |
2090 | (vec_duplicate:V_QIHI | |
2091 | (match_operand:SI 2 "gcn_alu_operand" "SvB"))))] | |
dc941ea9 AS |
2092 | "" |
2093 | { | |
2094 | enum {ashift, lshiftrt, ashiftrt}; | |
2095 | bool unsignedp = (<code> == lshiftrt); | |
1165109b | 2096 | rtx insi1 = gen_reg_rtx (<VnSI>mode); |
dc941ea9 | 2097 | rtx insi2 = gen_reg_rtx (SImode); |
1165109b | 2098 | rtx outsi = gen_reg_rtx (<VnSI>mode); |
dc941ea9 AS |
2099 | |
2100 | convert_move (insi1, operands[1], unsignedp); | |
2101 | convert_move (insi2, operands[2], unsignedp); | |
1165109b | 2102 | emit_insn (gen_<expander><vnsi>3 (outsi, insi1, insi2)); |
dc941ea9 AS |
2103 | convert_move (operands[0], outsi, unsignedp); |
2104 | DONE; | |
2105 | }) | |
2106 | ||
1165109b AS |
2107 | (define_insn "<expander><mode>3<exec>" |
2108 | [(set (match_operand:V_SI 0 "register_operand" "= v") | |
2109 | (shiftop:V_SI | |
2110 | (match_operand:V_SI 1 "gcn_alu_operand" " v") | |
2111 | (vec_duplicate:V_SI | |
3d6275e3 AS |
2112 | (match_operand:SI 2 "gcn_alu_operand" "SvB"))))] |
2113 | "" | |
2114 | "v_<revmnemonic>0\t%0, %2, %1" | |
2115 | [(set_attr "type" "vop2") | |
2116 | (set_attr "length" "8")]) | |
2117 | ||
dc941ea9 | 2118 | (define_expand "v<expander><mode>3" |
03876953 AS |
2119 | [(set (match_operand:V_QIHI 0 "register_operand" "=v") |
2120 | (shiftop:V_QIHI | |
2121 | (match_operand:V_QIHI 1 "gcn_alu_operand" " v") | |
2122 | (match_operand:V_QIHI 2 "gcn_alu_operand" "vB")))] | |
dc941ea9 AS |
2123 | "" |
2124 | { | |
2125 | enum {ashift, lshiftrt, ashiftrt}; | |
b8db70e1 | 2126 | bool unsignedp = (<code> == lshiftrt); |
1165109b AS |
2127 | rtx insi1 = gen_reg_rtx (<VnSI>mode); |
2128 | rtx insi2 = gen_reg_rtx (<VnSI>mode); | |
2129 | rtx outsi = gen_reg_rtx (<VnSI>mode); | |
dc941ea9 AS |
2130 | |
2131 | convert_move (insi1, operands[1], unsignedp); | |
2132 | convert_move (insi2, operands[2], unsignedp); | |
1165109b | 2133 | emit_insn (gen_v<expander><vnsi>3 (outsi, insi1, insi2)); |
dc941ea9 AS |
2134 | convert_move (operands[0], outsi, unsignedp); |
2135 | DONE; | |
2136 | }) | |
2137 | ||
1165109b AS |
2138 | (define_insn "v<expander><mode>3<exec>" |
2139 | [(set (match_operand:V_SI 0 "register_operand" "=v") | |
2140 | (shiftop:V_SI | |
2141 | (match_operand:V_SI 1 "gcn_alu_operand" " v") | |
2142 | (match_operand:V_SI 2 "gcn_alu_operand" "vB")))] | |
3d6275e3 AS |
2143 | "" |
2144 | "v_<revmnemonic>0\t%0, %2, %1" | |
2145 | [(set_attr "type" "vop2") | |
2146 | (set_attr "length" "8")]) | |
2147 | ||
dc941ea9 | 2148 | (define_expand "<expander><mode>3" |
03876953 AS |
2149 | [(set (match_operand:V_QIHI 0 "gcn_valu_dst_operand") |
2150 | (minmaxop:V_QIHI | |
2151 | (match_operand:V_QIHI 1 "gcn_valu_src0_operand") | |
2152 | (match_operand:V_QIHI 2 "gcn_valu_src1com_operand")))] | |
dc941ea9 AS |
2153 | "" |
2154 | { | |
2155 | enum {smin, umin, smax, umax}; | |
2156 | bool unsignedp = (<code> == umax || <code> == umin); | |
1165109b AS |
2157 | rtx insi1 = gen_reg_rtx (<VnSI>mode); |
2158 | rtx insi2 = gen_reg_rtx (<VnSI>mode); | |
2159 | rtx outsi = gen_reg_rtx (<VnSI>mode); | |
dc941ea9 AS |
2160 | |
2161 | convert_move (insi1, operands[1], unsignedp); | |
2162 | convert_move (insi2, operands[2], unsignedp); | |
1165109b | 2163 | emit_insn (gen_<code><vnsi>3 (outsi, insi1, insi2)); |
dc941ea9 AS |
2164 | convert_move (operands[0], outsi, unsignedp); |
2165 | DONE; | |
2166 | }) | |
2167 | ||
1165109b AS |
2168 | (define_insn "<expander><vnsi>3<exec>" |
2169 | [(set (match_operand:V_SI 0 "gcn_valu_dst_operand" "= v,RD") | |
2170 | (minmaxop:V_SI | |
2171 | (match_operand:V_SI 1 "gcn_valu_src0_operand" "% v, 0") | |
2172 | (match_operand:V_SI 2 "gcn_valu_src1com_operand" "vSvB, v")))] | |
3d6275e3 AS |
2173 | "" |
2174 | "@ | |
2175 | v_<mnemonic>0\t%0, %2, %1 | |
2176 | ds_<mnemonic>0\t%A0, %2%O0" | |
2177 | [(set_attr "type" "vop2,ds") | |
2178 | (set_attr "length" "8,8")]) | |
2179 | ||
2180 | ;; }}} | |
2181 | ;; {{{ FP binops - special cases | |
2182 | ||
2183 | ; GCN does not directly provide a DFmode subtract instruction, so we do it by | |
2184 | ; adding the negated second operand to the first. | |
2185 | ||
1165109b AS |
2186 | (define_insn "sub<mode>3<exec>" |
2187 | [(set (match_operand:V_DF 0 "register_operand" "= v, v") | |
2188 | (minus:V_DF | |
2189 | (match_operand:V_DF 1 "gcn_alu_operand" "vSvB, v") | |
2190 | (match_operand:V_DF 2 "gcn_alu_operand" " v,vSvB")))] | |
3d6275e3 AS |
2191 | "" |
2192 | "@ | |
2193 | v_add_f64\t%0, %1, -%2 | |
2194 | v_add_f64\t%0, -%2, %1" | |
2195 | [(set_attr "type" "vop3a") | |
2196 | (set_attr "length" "8,8")]) | |
2197 | ||
abb3993e | 2198 | (define_insn "subdf3" |
3d6275e3 AS |
2199 | [(set (match_operand:DF 0 "register_operand" "= v, v") |
2200 | (minus:DF | |
2201 | (match_operand:DF 1 "gcn_alu_operand" "vSvB, v") | |
2202 | (match_operand:DF 2 "gcn_alu_operand" " v,vSvB")))] | |
2203 | "" | |
2204 | "@ | |
2205 | v_add_f64\t%0, %1, -%2 | |
2206 | v_add_f64\t%0, -%2, %1" | |
2207 | [(set_attr "type" "vop3a") | |
2208 | (set_attr "length" "8,8")]) | |
2209 | ||
2210 | ;; }}} | |
2211 | ;; {{{ FP binops - generic | |
2212 | ||
3d6275e3 AS |
2213 | (define_code_iterator comm_fp [plus mult smin smax]) |
2214 | (define_code_iterator nocomm_fp [minus]) | |
2215 | (define_code_iterator all_fp [plus mult minus smin smax]) | |
2216 | ||
2217 | (define_insn "<expander><mode>3<exec>" | |
03876953 AS |
2218 | [(set (match_operand:V_FP 0 "register_operand" "= v") |
2219 | (comm_fp:V_FP | |
2220 | (match_operand:V_FP 1 "gcn_alu_operand" "% v") | |
2221 | (match_operand:V_FP 2 "gcn_alu_operand" "vSvB")))] | |
3d6275e3 AS |
2222 | "" |
2223 | "v_<mnemonic>0\t%0, %2, %1" | |
2224 | [(set_attr "type" "vop2") | |
2225 | (set_attr "length" "8")]) | |
2226 | ||
2227 | (define_insn "<expander><mode>3" | |
03876953 AS |
2228 | [(set (match_operand:FP 0 "gcn_valu_dst_operand" "= v, RL") |
2229 | (comm_fp:FP | |
2230 | (match_operand:FP 1 "gcn_valu_src0_operand" "% v, 0") | |
2231 | (match_operand:FP 2 "gcn_valu_src1_operand" "vSvB,vSvB")))] | |
3d6275e3 AS |
2232 | "" |
2233 | "@ | |
2234 | v_<mnemonic>0\t%0, %2, %1 | |
2235 | v_<mnemonic>0\t%0, %1%O0" | |
2236 | [(set_attr "type" "vop2,ds") | |
2237 | (set_attr "length" "8")]) | |
2238 | ||
2239 | (define_insn "<expander><mode>3<exec>" | |
03876953 AS |
2240 | [(set (match_operand:V_FP_1REG 0 "register_operand" "= v, v") |
2241 | (nocomm_fp:V_FP_1REG | |
2242 | (match_operand:V_FP_1REG 1 "gcn_alu_operand" "vSvB, v") | |
2243 | (match_operand:V_FP_1REG 2 "gcn_alu_operand" " v,vSvB")))] | |
3d6275e3 AS |
2244 | "" |
2245 | "@ | |
2246 | v_<mnemonic>0\t%0, %1, %2 | |
2247 | v_<revmnemonic>0\t%0, %2, %1" | |
2248 | [(set_attr "type" "vop2") | |
2249 | (set_attr "length" "8,8")]) | |
2250 | ||
2251 | (define_insn "<expander><mode>3" | |
03876953 AS |
2252 | [(set (match_operand:FP_1REG 0 "register_operand" "= v, v") |
2253 | (nocomm_fp:FP_1REG | |
2254 | (match_operand:FP_1REG 1 "gcn_alu_operand" "vSvB, v") | |
2255 | (match_operand:FP_1REG 2 "gcn_alu_operand" " v,vSvB")))] | |
3d6275e3 AS |
2256 | "" |
2257 | "@ | |
2258 | v_<mnemonic>0\t%0, %1, %2 | |
2259 | v_<revmnemonic>0\t%0, %2, %1" | |
2260 | [(set_attr "type" "vop2") | |
2261 | (set_attr "length" "8,8")]) | |
2262 | ||
2263 | ;; }}} | |
2264 | ;; {{{ FP unops | |
2265 | ||
2266 | (define_insn "abs<mode>2" | |
03876953 AS |
2267 | [(set (match_operand:FP 0 "register_operand" "=v") |
2268 | (abs:FP (match_operand:FP 1 "register_operand" " v")))] | |
3d6275e3 AS |
2269 | "" |
2270 | "v_add%i0\t%0, 0, |%1|" | |
2271 | [(set_attr "type" "vop3a") | |
2272 | (set_attr "length" "8")]) | |
2273 | ||
2274 | (define_insn "abs<mode>2<exec>" | |
03876953 AS |
2275 | [(set (match_operand:V_FP 0 "register_operand" "=v") |
2276 | (abs:V_FP | |
2277 | (match_operand:V_FP 1 "register_operand" " v")))] | |
3d6275e3 AS |
2278 | "" |
2279 | "v_add%i0\t%0, 0, |%1|" | |
2280 | [(set_attr "type" "vop3a") | |
2281 | (set_attr "length" "8")]) | |
2282 | ||
2283 | (define_insn "neg<mode>2<exec>" | |
03876953 AS |
2284 | [(set (match_operand:V_FP 0 "register_operand" "=v") |
2285 | (neg:V_FP | |
2286 | (match_operand:V_FP 1 "register_operand" " v")))] | |
3d6275e3 AS |
2287 | "" |
2288 | "v_add%i0\t%0, 0, -%1" | |
2289 | [(set_attr "type" "vop3a") | |
2290 | (set_attr "length" "8")]) | |
2291 | ||
2292 | (define_insn "sqrt<mode>2<exec>" | |
03876953 AS |
2293 | [(set (match_operand:V_FP 0 "register_operand" "= v") |
2294 | (sqrt:V_FP | |
2295 | (match_operand:V_FP 1 "gcn_alu_operand" "vSvB")))] | |
3d6275e3 AS |
2296 | "flag_unsafe_math_optimizations" |
2297 | "v_sqrt%i0\t%0, %1" | |
2298 | [(set_attr "type" "vop1") | |
2299 | (set_attr "length" "8")]) | |
2300 | ||
2301 | (define_insn "sqrt<mode>2" | |
03876953 AS |
2302 | [(set (match_operand:FP 0 "register_operand" "= v") |
2303 | (sqrt:FP | |
2304 | (match_operand:FP 1 "gcn_alu_operand" "vSvB")))] | |
3d6275e3 AS |
2305 | "flag_unsafe_math_optimizations" |
2306 | "v_sqrt%i0\t%0, %1" | |
2307 | [(set_attr "type" "vop1") | |
2308 | (set_attr "length" "8")]) | |
2309 | ||
2310 | ;; }}} | |
2311 | ;; {{{ FP fused multiply and add | |
2312 | ||
2313 | (define_insn "fma<mode>4<exec>" | |
03876953 AS |
2314 | [(set (match_operand:V_FP 0 "register_operand" "= v, v") |
2315 | (fma:V_FP | |
2316 | (match_operand:V_FP 1 "gcn_alu_operand" "% vA, vA") | |
2317 | (match_operand:V_FP 2 "gcn_alu_operand" " vA,vSvA") | |
2318 | (match_operand:V_FP 3 "gcn_alu_operand" "vSvA, vA")))] | |
3d6275e3 AS |
2319 | "" |
2320 | "v_fma%i0\t%0, %1, %2, %3" | |
2321 | [(set_attr "type" "vop3a") | |
2322 | (set_attr "length" "8")]) | |
2323 | ||
2324 | (define_insn "fma<mode>4_negop2<exec>" | |
03876953 AS |
2325 | [(set (match_operand:V_FP 0 "register_operand" "= v, v, v") |
2326 | (fma:V_FP | |
2327 | (match_operand:V_FP 1 "gcn_alu_operand" " vA, vA,vSvA") | |
2328 | (neg:V_FP | |
2329 | (match_operand:V_FP 2 "gcn_alu_operand" " vA,vSvA, vA")) | |
2330 | (match_operand:V_FP 3 "gcn_alu_operand" "vSvA, vA, vA")))] | |
3d6275e3 AS |
2331 | "" |
2332 | "v_fma%i0\t%0, %1, -%2, %3" | |
2333 | [(set_attr "type" "vop3a") | |
2334 | (set_attr "length" "8")]) | |
2335 | ||
2336 | (define_insn "fma<mode>4" | |
03876953 AS |
2337 | [(set (match_operand:FP 0 "register_operand" "= v, v") |
2338 | (fma:FP | |
2339 | (match_operand:FP 1 "gcn_alu_operand" "% vA, vA") | |
2340 | (match_operand:FP 2 "gcn_alu_operand" " vA,vSvA") | |
2341 | (match_operand:FP 3 "gcn_alu_operand" "vSvA, vA")))] | |
3d6275e3 AS |
2342 | "" |
2343 | "v_fma%i0\t%0, %1, %2, %3" | |
2344 | [(set_attr "type" "vop3a") | |
2345 | (set_attr "length" "8")]) | |
2346 | ||
2347 | (define_insn "fma<mode>4_negop2" | |
03876953 AS |
2348 | [(set (match_operand:FP 0 "register_operand" "= v, v, v") |
2349 | (fma:FP | |
2350 | (match_operand:FP 1 "gcn_alu_operand" " vA, vA,vSvA") | |
2351 | (neg:FP | |
2352 | (match_operand:FP 2 "gcn_alu_operand" " vA,vSvA, vA")) | |
2353 | (match_operand:FP 3 "gcn_alu_operand" "vSvA, vA, vA")))] | |
3d6275e3 AS |
2354 | "" |
2355 | "v_fma%i0\t%0, %1, -%2, %3" | |
2356 | [(set_attr "type" "vop3a") | |
2357 | (set_attr "length" "8")]) | |
2358 | ||
2359 | ;; }}} | |
2360 | ;; {{{ FP division | |
2361 | ||
2362 | (define_insn "recip<mode>2<exec>" | |
03876953 | 2363 | [(set (match_operand:V_FP 0 "register_operand" "= v") |
c8812bac JB |
2364 | (unspec:V_FP |
2365 | [(match_operand:V_FP 1 "gcn_alu_operand" "vSvB")] | |
2366 | UNSPEC_RCP))] | |
3d6275e3 AS |
2367 | "" |
2368 | "v_rcp%i0\t%0, %1" | |
2369 | [(set_attr "type" "vop1") | |
2370 | (set_attr "length" "8")]) | |
2371 | ||
2372 | (define_insn "recip<mode>2" | |
03876953 | 2373 | [(set (match_operand:FP 0 "register_operand" "= v") |
c8812bac JB |
2374 | (unspec:FP |
2375 | [(match_operand:FP 1 "gcn_alu_operand" "vSvB")] | |
2376 | UNSPEC_RCP))] | |
3d6275e3 AS |
2377 | "" |
2378 | "v_rcp%i0\t%0, %1" | |
2379 | [(set_attr "type" "vop1") | |
2380 | (set_attr "length" "8")]) | |
2381 | ||
2382 | ;; Do division via a = b * 1/c | |
2383 | ;; The v_rcp_* instructions are not sufficiently accurate on their own, | |
2384 | ;; so we use 2 v_fma_* instructions to do one round of Newton-Raphson | |
2385 | ;; which the ISA manual says is enough to improve the reciprocal accuracy. | |
2386 | ;; | |
2387 | ;; FIXME: This does not handle denormals, NaNs, division-by-zero etc. | |
2388 | ||
2389 | (define_expand "div<mode>3" | |
03876953 AS |
2390 | [(match_operand:V_FP 0 "gcn_valu_dst_operand") |
2391 | (match_operand:V_FP 1 "gcn_valu_src0_operand") | |
2392 | (match_operand:V_FP 2 "gcn_valu_src0_operand")] | |
3d6275e3 AS |
2393 | "flag_reciprocal_math" |
2394 | { | |
c8812bac JB |
2395 | rtx one = gcn_vec_constant (<MODE>mode, |
2396 | const_double_from_real_value (dconst1, <SCALAR_MODE>mode)); | |
3d6275e3 AS |
2397 | rtx initrcp = gen_reg_rtx (<MODE>mode); |
2398 | rtx fma = gen_reg_rtx (<MODE>mode); | |
2399 | rtx rcp; | |
c8812bac | 2400 | rtx num = operands[1], denom = operands[2]; |
3d6275e3 | 2401 | |
c8812bac | 2402 | bool is_rcp = (GET_CODE (num) == CONST_VECTOR |
3d6275e3 AS |
2403 | && real_identical |
2404 | (CONST_DOUBLE_REAL_VALUE | |
c8812bac | 2405 | (CONST_VECTOR_ELT (num, 0)), &dconstm1)); |
3d6275e3 AS |
2406 | |
2407 | if (is_rcp) | |
2408 | rcp = operands[0]; | |
2409 | else | |
2410 | rcp = gen_reg_rtx (<MODE>mode); | |
2411 | ||
c8812bac JB |
2412 | emit_insn (gen_recip<mode>2 (initrcp, denom)); |
2413 | emit_insn (gen_fma<mode>4_negop2 (fma, initrcp, denom, one)); | |
2414 | emit_insn (gen_fma<mode>4 (rcp, fma, initrcp, initrcp)); | |
3d6275e3 AS |
2415 | |
2416 | if (!is_rcp) | |
c8812bac JB |
2417 | { |
2418 | rtx div_est = gen_reg_rtx (<MODE>mode); | |
2419 | rtx fma2 = gen_reg_rtx (<MODE>mode); | |
2420 | rtx fma3 = gen_reg_rtx (<MODE>mode); | |
2421 | rtx fma4 = gen_reg_rtx (<MODE>mode); | |
2422 | emit_insn (gen_mul<mode>3 (div_est, num, rcp)); | |
2423 | emit_insn (gen_fma<mode>4_negop2 (fma2, div_est, denom, num)); | |
2424 | emit_insn (gen_fma<mode>4 (fma3, fma2, rcp, div_est)); | |
2425 | emit_insn (gen_fma<mode>4_negop2 (fma4, fma3, denom, num)); | |
2426 | emit_insn (gen_fma<mode>4 (operands[0], fma4, rcp, fma3)); | |
2427 | } | |
3d6275e3 AS |
2428 | |
2429 | DONE; | |
2430 | }) | |
2431 | ||
2432 | (define_expand "div<mode>3" | |
03876953 AS |
2433 | [(match_operand:FP 0 "gcn_valu_dst_operand") |
2434 | (match_operand:FP 1 "gcn_valu_src0_operand") | |
2435 | (match_operand:FP 2 "gcn_valu_src0_operand")] | |
3d6275e3 AS |
2436 | "flag_reciprocal_math" |
2437 | { | |
c8812bac | 2438 | rtx one = const_double_from_real_value (dconst1, <MODE>mode); |
3d6275e3 AS |
2439 | rtx initrcp = gen_reg_rtx (<MODE>mode); |
2440 | rtx fma = gen_reg_rtx (<MODE>mode); | |
2441 | rtx rcp; | |
c8812bac | 2442 | rtx num = operands[1], denom = operands[2]; |
3d6275e3 AS |
2443 | |
2444 | bool is_rcp = (GET_CODE (operands[1]) == CONST_DOUBLE | |
2445 | && real_identical (CONST_DOUBLE_REAL_VALUE (operands[1]), | |
2446 | &dconstm1)); | |
2447 | ||
2448 | if (is_rcp) | |
2449 | rcp = operands[0]; | |
2450 | else | |
2451 | rcp = gen_reg_rtx (<MODE>mode); | |
2452 | ||
c8812bac JB |
2453 | emit_insn (gen_recip<mode>2 (initrcp, denom)); |
2454 | emit_insn (gen_fma<mode>4_negop2 (fma, initrcp, denom, one)); | |
2455 | emit_insn (gen_fma<mode>4 (rcp, fma, initrcp, initrcp)); | |
3d6275e3 AS |
2456 | |
2457 | if (!is_rcp) | |
c8812bac JB |
2458 | { |
2459 | rtx div_est = gen_reg_rtx (<MODE>mode); | |
2460 | rtx fma2 = gen_reg_rtx (<MODE>mode); | |
2461 | rtx fma3 = gen_reg_rtx (<MODE>mode); | |
2462 | rtx fma4 = gen_reg_rtx (<MODE>mode); | |
2463 | emit_insn (gen_mul<mode>3 (div_est, num, rcp)); | |
2464 | emit_insn (gen_fma<mode>4_negop2 (fma2, div_est, denom, num)); | |
2465 | emit_insn (gen_fma<mode>4 (fma3, fma2, rcp, div_est)); | |
2466 | emit_insn (gen_fma<mode>4_negop2 (fma4, fma3, denom, num)); | |
2467 | emit_insn (gen_fma<mode>4 (operands[0], fma4, rcp, fma3)); | |
2468 | } | |
3d6275e3 AS |
2469 | |
2470 | DONE; | |
2471 | }) | |
2472 | ||
2473 | ;; }}} | |
2474 | ;; {{{ Int/FP conversions | |
2475 | ||
2476 | (define_mode_iterator CVT_FROM_MODE [HI SI HF SF DF]) | |
2477 | (define_mode_iterator CVT_TO_MODE [HI SI HF SF DF]) | |
2478 | ||
3d66c777 AS |
2479 | (define_mode_iterator VCVT_MODE [V64HI V64SI V64HF V64SF V64DF]) |
2480 | (define_mode_iterator VCVT_FMODE [V64HF V64SF V64DF]) | |
2481 | (define_mode_iterator VCVT_IMODE [V64HI V64SI]) | |
3d6275e3 AS |
2482 | |
2483 | (define_code_iterator cvt_op [fix unsigned_fix | |
2484 | float unsigned_float | |
2485 | float_extend float_truncate]) | |
2486 | (define_code_attr cvt_name [(fix "fix_trunc") (unsigned_fix "fixuns_trunc") | |
2487 | (float "float") (unsigned_float "floatuns") | |
2488 | (float_extend "extend") (float_truncate "trunc")]) | |
2489 | (define_code_attr cvt_operands [(fix "%i0%i1") (unsigned_fix "%u0%i1") | |
2490 | (float "%i0%i1") (unsigned_float "%i0%u1") | |
2491 | (float_extend "%i0%i1") | |
2492 | (float_truncate "%i0%i1")]) | |
2493 | ||
2494 | (define_insn "<cvt_name><CVT_FROM_MODE:mode><CVT_TO_MODE:mode>2" | |
2495 | [(set (match_operand:CVT_TO_MODE 0 "register_operand" "= v") | |
2496 | (cvt_op:CVT_TO_MODE | |
2497 | (match_operand:CVT_FROM_MODE 1 "gcn_alu_operand" "vSvB")))] | |
2498 | "gcn_valid_cvt_p (<CVT_FROM_MODE:MODE>mode, <CVT_TO_MODE:MODE>mode, | |
2499 | <cvt_name>_cvt)" | |
2500 | "v_cvt<cvt_operands>\t%0, %1" | |
2501 | [(set_attr "type" "vop1") | |
2502 | (set_attr "length" "8")]) | |
2503 | ||
3d66c777 AS |
2504 | (define_insn "<cvt_name><VCVT_MODE:mode><VCVT_FMODE:mode>2<exec>" |
2505 | [(set (match_operand:VCVT_FMODE 0 "register_operand" "= v") | |
2506 | (cvt_op:VCVT_FMODE | |
2507 | (match_operand:VCVT_MODE 1 "gcn_alu_operand" "vSvB")))] | |
2508 | "gcn_valid_cvt_p (<VCVT_MODE:MODE>mode, <VCVT_FMODE:MODE>mode, | |
2509 | <cvt_name>_cvt)" | |
2510 | "v_cvt<cvt_operands>\t%0, %1" | |
2511 | [(set_attr "type" "vop1") | |
2512 | (set_attr "length" "8")]) | |
2513 | ||
2514 | (define_insn "<cvt_name><VCVT_FMODE:mode><VCVT_IMODE:mode>2<exec>" | |
2515 | [(set (match_operand:VCVT_IMODE 0 "register_operand" "= v") | |
2516 | (cvt_op:VCVT_IMODE | |
2517 | (match_operand:VCVT_FMODE 1 "gcn_alu_operand" "vSvB")))] | |
2518 | "gcn_valid_cvt_p (<VCVT_FMODE:MODE>mode, <VCVT_IMODE:MODE>mode, | |
3d6275e3 AS |
2519 | <cvt_name>_cvt)" |
2520 | "v_cvt<cvt_operands>\t%0, %1" | |
2521 | [(set_attr "type" "vop1") | |
2522 | (set_attr "length" "8")]) | |
2523 | ||
2524 | ;; }}} | |
2525 | ;; {{{ Int/int conversions | |
2526 | ||
3d66c777 AS |
2527 | (define_code_iterator zero_convert [truncate zero_extend]) |
2528 | (define_code_attr convop [ | |
2529 | (sign_extend "extend") | |
2530 | (zero_extend "zero_extend") | |
2531 | (truncate "trunc")]) | |
2532 | ||
03876953 AS |
2533 | (define_insn "<convop><V_INT_1REG_ALT:mode><V_INT_1REG:mode>2<exec>" |
2534 | [(set (match_operand:V_INT_1REG 0 "register_operand" "=v") | |
2535 | (zero_convert:V_INT_1REG | |
2536 | (match_operand:V_INT_1REG_ALT 1 "gcn_alu_operand" " v")))] | |
3d66c777 | 2537 | "" |
03876953 | 2538 | "v_mov_b32_sdwa\t%0, %1 dst_sel:<V_INT_1REG:sdwa> dst_unused:UNUSED_PAD src0_sel:<V_INT_1REG_ALT:sdwa>" |
3d66c777 AS |
2539 | [(set_attr "type" "vop_sdwa") |
2540 | (set_attr "length" "8")]) | |
2541 | ||
03876953 AS |
2542 | (define_insn "extend<V_INT_1REG_ALT:mode><V_INT_1REG:mode>2<exec>" |
2543 | [(set (match_operand:V_INT_1REG 0 "register_operand" "=v") | |
2544 | (sign_extend:V_INT_1REG | |
2545 | (match_operand:V_INT_1REG_ALT 1 "gcn_alu_operand" " v")))] | |
3d66c777 | 2546 | "" |
03876953 | 2547 | "v_mov_b32_sdwa\t%0, sext(%1) src0_sel:<V_INT_1REG_ALT:sdwa>" |
3d66c777 AS |
2548 | [(set_attr "type" "vop_sdwa") |
2549 | (set_attr "length" "8")]) | |
2550 | ||
3d6275e3 AS |
2551 | ;; GCC can already do these for scalar types, but not for vector types. |
2552 | ;; Unfortunately you can't just do SUBREG on a vector to select the low part, | |
2553 | ;; so there must be a few tricks here. | |
2554 | ||
1165109b | 2555 | (define_insn_and_split "trunc<vndi><mode>2" |
03876953 AS |
2556 | [(set (match_operand:V_INT_1REG 0 "register_operand" "=v") |
2557 | (truncate:V_INT_1REG | |
1165109b | 2558 | (match_operand:<VnDI> 1 "gcn_alu_operand" " v")))] |
3d6275e3 AS |
2559 | "" |
2560 | "#" | |
2561 | "reload_completed" | |
3d66c777 | 2562 | [(const_int 0)] |
3d6275e3 | 2563 | { |
1165109b | 2564 | rtx inlo = gcn_operand_part (<VnDI>mode, operands[1], 0); |
3d66c777 AS |
2565 | rtx out = operands[0]; |
2566 | ||
1165109b AS |
2567 | if (<MODE>mode != <VnSI>mode) |
2568 | emit_insn (gen_trunc<vnsi><mode>2 (out, inlo)); | |
3d66c777 AS |
2569 | else |
2570 | emit_move_insn (out, inlo); | |
3d6275e3 AS |
2571 | } |
2572 | [(set_attr "type" "vop2") | |
3d66c777 AS |
2573 | (set_attr "length" "4")]) |
2574 | ||
1165109b | 2575 | (define_insn_and_split "trunc<vndi><mode>2_exec" |
03876953 AS |
2576 | [(set (match_operand:V_INT_1REG 0 "register_operand" "=v") |
2577 | (vec_merge:V_INT_1REG | |
2578 | (truncate:V_INT_1REG | |
1165109b | 2579 | (match_operand:<VnDI> 1 "gcn_alu_operand" " v")) |
03876953 AS |
2580 | (match_operand:V_INT_1REG 2 "gcn_alu_or_unspec_operand" "U0") |
2581 | (match_operand:DI 3 "gcn_exec_operand" " e")))] | |
3d6275e3 AS |
2582 | "" |
2583 | "#" | |
2584 | "reload_completed" | |
3d66c777 | 2585 | [(const_int 0)] |
3d6275e3 | 2586 | { |
3d66c777 | 2587 | rtx out = operands[0]; |
1165109b | 2588 | rtx inlo = gcn_operand_part (<VnDI>mode, operands[1], 0); |
3d66c777 AS |
2589 | rtx merge = operands[2]; |
2590 | rtx exec = operands[3]; | |
2591 | ||
1165109b AS |
2592 | if (<MODE>mode != <VnSI>mode) |
2593 | emit_insn (gen_trunc<vnsi><mode>2_exec (out, inlo, merge, exec)); | |
3d66c777 | 2594 | else |
b7886845 | 2595 | emit_insn (gen_mov<mode>_exec (out, inlo, merge, exec)); |
3d6275e3 AS |
2596 | } |
2597 | [(set_attr "type" "vop2") | |
3d66c777 AS |
2598 | (set_attr "length" "4")]) |
2599 | ||
1165109b AS |
2600 | (define_insn_and_split "<convop><mode><vndi>2" |
2601 | [(set (match_operand:<VnDI> 0 "register_operand" "=v") | |
2602 | (any_extend:<VnDI> | |
03876953 | 2603 | (match_operand:V_INT_1REG 1 "gcn_alu_operand" " v")))] |
3d66c777 AS |
2604 | "" |
2605 | "#" | |
2606 | "reload_completed" | |
2607 | [(const_int 0)] | |
2608 | { | |
1165109b AS |
2609 | rtx outlo = gcn_operand_part (<VnDI>mode, operands[0], 0); |
2610 | rtx outhi = gcn_operand_part (<VnDI>mode, operands[0], 1); | |
3d66c777 AS |
2611 | rtx in = operands[1]; |
2612 | ||
1165109b AS |
2613 | if (<MODE>mode != <VnSI>mode) |
2614 | emit_insn (gen_<convop><mode><vnsi>2 (outlo, in)); | |
3d66c777 AS |
2615 | else |
2616 | emit_move_insn (outlo, in); | |
2617 | if ('<su>' == 's') | |
1165109b | 2618 | emit_insn (gen_ashr<vnsi>3 (outhi, outlo, GEN_INT (31))); |
3d66c777 | 2619 | else |
1165109b | 2620 | emit_insn (gen_vec_duplicate<vnsi> (outhi, const0_rtx)); |
3d66c777 AS |
2621 | } |
2622 | [(set_attr "type" "mult") | |
2623 | (set_attr "length" "12")]) | |
2624 | ||
1165109b AS |
2625 | (define_insn_and_split "<convop><mode><vndi>2_exec" |
2626 | [(set (match_operand:<VnDI> 0 "register_operand" "=v") | |
2627 | (vec_merge:<VnDI> | |
2628 | (any_extend:<VnDI> | |
03876953 | 2629 | (match_operand:V_INT_1REG 1 "gcn_alu_operand" " v")) |
1165109b | 2630 | (match_operand:<VnDI> 2 "gcn_alu_or_unspec_operand" "U0") |
03876953 | 2631 | (match_operand:DI 3 "gcn_exec_operand" " e")))] |
3d66c777 AS |
2632 | "" |
2633 | "#" | |
2634 | "reload_completed" | |
2635 | [(const_int 0)] | |
2636 | { | |
1165109b AS |
2637 | rtx outlo = gcn_operand_part (<VnDI>mode, operands[0], 0); |
2638 | rtx outhi = gcn_operand_part (<VnDI>mode, operands[0], 1); | |
3d66c777 | 2639 | rtx in = operands[1]; |
1165109b AS |
2640 | rtx mergelo = gcn_operand_part (<VnDI>mode, operands[2], 0); |
2641 | rtx mergehi = gcn_operand_part (<VnDI>mode, operands[2], 1); | |
3d66c777 AS |
2642 | rtx exec = operands[3]; |
2643 | ||
1165109b AS |
2644 | if (<MODE>mode != <VnSI>mode) |
2645 | emit_insn (gen_<convop><mode><vnsi>2_exec (outlo, in, mergelo, exec)); | |
3d66c777 | 2646 | else |
b7886845 | 2647 | emit_insn (gen_mov<mode>_exec (outlo, in, mergelo, exec)); |
3d66c777 | 2648 | if ('<su>' == 's') |
1165109b AS |
2649 | emit_insn (gen_ashr<vnsi>3_exec (outhi, outlo, GEN_INT (31), mergehi, |
2650 | exec)); | |
3d66c777 | 2651 | else |
1165109b AS |
2652 | emit_insn (gen_vec_duplicate<vnsi>_exec (outhi, const0_rtx, mergehi, |
2653 | exec)); | |
3d66c777 AS |
2654 | } |
2655 | [(set_attr "type" "mult") | |
2656 | (set_attr "length" "12")]) | |
3d6275e3 AS |
2657 | |
2658 | ;; }}} | |
2659 | ;; {{{ Vector comparison/merge | |
2660 | ||
2661 | (define_insn "vec_cmp<mode>di" | |
2662 | [(set (match_operand:DI 0 "register_operand" "=cV,cV, e, e,Sg,Sg") | |
dbde9e2d | 2663 | (match_operator:DI 1 "gcn_fp_compare_operator" |
03876953 AS |
2664 | [(match_operand:V_noQI 2 "gcn_alu_operand" "vSv, B,vSv, B, v,vA") |
2665 | (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v, v,vA, v")])) | |
3d6275e3 AS |
2666 | (clobber (match_scratch:DI 4 "= X, X, cV,cV, X, X"))] |
2667 | "" | |
2668 | "@ | |
2669 | v_cmp%E1\tvcc, %2, %3 | |
2670 | v_cmp%E1\tvcc, %2, %3 | |
2671 | v_cmpx%E1\tvcc, %2, %3 | |
2672 | v_cmpx%E1\tvcc, %2, %3 | |
2673 | v_cmp%E1\t%0, %2, %3 | |
2674 | v_cmp%E1\t%0, %2, %3" | |
2675 | [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a") | |
2676 | (set_attr "length" "4,8,4,8,8,8")]) | |
2677 | ||
2678 | (define_expand "vec_cmpu<mode>di" | |
2679 | [(match_operand:DI 0 "register_operand") | |
f4d4a406 | 2680 | (match_operator 1 "gcn_compare_operator" |
03876953 AS |
2681 | [(match_operand:V_INT_noQI 2 "gcn_alu_operand") |
2682 | (match_operand:V_INT_noQI 3 "gcn_vop3_operand")])] | |
3d6275e3 AS |
2683 | "" |
2684 | { | |
2685 | /* Unsigned comparisons use the same patterns as signed comparisons, | |
2686 | except that they use unsigned operators (e.g. LTU vs LT). | |
2687 | The '%E1' directive then does the Right Thing. */ | |
2688 | emit_insn (gen_vec_cmp<mode>di (operands[0], operands[1], operands[2], | |
2689 | operands[3])); | |
2690 | DONE; | |
2691 | }) | |
2692 | ||
0e159efc | 2693 | ; There's no instruction for 8-bit vector comparison, so we need to extend. |
1165109b | 2694 | (define_expand "vec_cmp<u><mode>di" |
0e159efc | 2695 | [(match_operand:DI 0 "register_operand") |
f4d4a406 | 2696 | (match_operator 1 "gcn_compare_operator" |
1165109b AS |
2697 | [(any_extend:<VnSI> (match_operand:V_QI 2 "gcn_alu_operand")) |
2698 | (any_extend:<VnSI> (match_operand:V_QI 3 "gcn_vop3_operand"))])] | |
0e159efc AS |
2699 | "can_create_pseudo_p ()" |
2700 | { | |
1165109b AS |
2701 | rtx sitmp1 = gen_reg_rtx (<VnSI>mode); |
2702 | rtx sitmp2 = gen_reg_rtx (<VnSI>mode); | |
0e159efc | 2703 | |
1165109b AS |
2704 | emit_insn (gen_<expander><mode><vnsi>2 (sitmp1, operands[2])); |
2705 | emit_insn (gen_<expander><mode><vnsi>2 (sitmp2, operands[3])); | |
2706 | emit_insn (gen_vec_cmp<vnsi>di (operands[0], operands[1], sitmp1, sitmp2)); | |
0e159efc AS |
2707 | DONE; |
2708 | }) | |
2709 | ||
3d6275e3 AS |
2710 | (define_insn "vec_cmp<mode>di_exec" |
2711 | [(set (match_operand:DI 0 "register_operand" "=cV,cV, e, e,Sg,Sg") | |
2712 | (and:DI | |
f4d4a406 | 2713 | (match_operator 1 "gcn_fp_compare_operator" |
03876953 AS |
2714 | [(match_operand:V_noQI 2 "gcn_alu_operand" "vSv, B,vSv, B, v,vA") |
2715 | (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v, v,vA, v")]) | |
3d6275e3 AS |
2716 | (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e, e, e, e"))) |
2717 | (clobber (match_scratch:DI 5 "= X, X, cV,cV, X, X"))] | |
2718 | "" | |
2719 | "@ | |
2720 | v_cmp%E1\tvcc, %2, %3 | |
2721 | v_cmp%E1\tvcc, %2, %3 | |
2722 | v_cmpx%E1\tvcc, %2, %3 | |
2723 | v_cmpx%E1\tvcc, %2, %3 | |
2724 | v_cmp%E1\t%0, %2, %3 | |
2725 | v_cmp%E1\t%0, %2, %3" | |
2726 | [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a") | |
2727 | (set_attr "length" "4,8,4,8,8,8")]) | |
2728 | ||
0e159efc AS |
2729 | (define_expand "vec_cmpu<mode>di_exec" |
2730 | [(match_operand:DI 0 "register_operand") | |
f4d4a406 | 2731 | (match_operator 1 "gcn_compare_operator" |
03876953 AS |
2732 | [(match_operand:V_INT_noQI 2 "gcn_alu_operand") |
2733 | (match_operand:V_INT_noQI 3 "gcn_vop3_operand")]) | |
0e159efc AS |
2734 | (match_operand:DI 4 "gcn_exec_reg_operand")] |
2735 | "" | |
2736 | { | |
2737 | /* Unsigned comparisons use the same patterns as signed comparisons, | |
2738 | except that they use unsigned operators (e.g. LTU vs LT). | |
2739 | The '%E1' directive then does the Right Thing. */ | |
2740 | emit_insn (gen_vec_cmpu<mode>di_exec (operands[0], operands[1], | |
2741 | operands[2], operands[3], | |
2742 | operands[4])); | |
2743 | DONE; | |
2744 | }) | |
2745 | ||
1165109b | 2746 | (define_expand "vec_cmp<u><mode>di_exec" |
0e159efc | 2747 | [(match_operand:DI 0 "register_operand") |
f4d4a406 | 2748 | (match_operator 1 "gcn_compare_operator" |
1165109b AS |
2749 | [(any_extend:<VnSI> (match_operand:V_QI 2 "gcn_alu_operand")) |
2750 | (any_extend:<VnSI> (match_operand:V_QI 3 "gcn_vop3_operand"))]) | |
0e159efc AS |
2751 | (match_operand:DI 4 "gcn_exec_reg_operand")] |
2752 | "can_create_pseudo_p ()" | |
2753 | { | |
1165109b AS |
2754 | rtx sitmp1 = gen_reg_rtx (<VnSI>mode); |
2755 | rtx sitmp2 = gen_reg_rtx (<VnSI>mode); | |
0e159efc | 2756 | |
1165109b AS |
2757 | emit_insn (gen_<expander><mode><vnsi>2_exec (sitmp1, operands[2], |
2758 | operands[2], operands[4])); | |
2759 | emit_insn (gen_<expander><mode><vnsi>2_exec (sitmp2, operands[3], | |
2760 | operands[3], operands[4])); | |
2761 | emit_insn (gen_vec_cmp<vnsi>di_exec (operands[0], operands[1], sitmp1, | |
2762 | sitmp2, operands[4])); | |
0e159efc AS |
2763 | DONE; |
2764 | }) | |
2765 | ||
3d6275e3 AS |
2766 | (define_insn "vec_cmp<mode>di_dup" |
2767 | [(set (match_operand:DI 0 "register_operand" "=cV,cV, e,e,Sg") | |
dbde9e2d | 2768 | (match_operator:DI 1 "gcn_fp_compare_operator" |
03876953 | 2769 | [(vec_duplicate:V_noQI |
3d6275e3 AS |
2770 | (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand" |
2771 | " Sv, B,Sv,B, A")) | |
03876953 | 2772 | (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v,v, v")])) |
3d6275e3 AS |
2773 | (clobber (match_scratch:DI 4 "= X,X,cV,cV, X"))] |
2774 | "" | |
2775 | "@ | |
2776 | v_cmp%E1\tvcc, %2, %3 | |
2777 | v_cmp%E1\tvcc, %2, %3 | |
2778 | v_cmpx%E1\tvcc, %2, %3 | |
2779 | v_cmpx%E1\tvcc, %2, %3 | |
2780 | v_cmp%E1\t%0, %2, %3" | |
2781 | [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a") | |
2782 | (set_attr "length" "4,8,4,8,8")]) | |
2783 | ||
2784 | (define_insn "vec_cmp<mode>di_dup_exec" | |
2785 | [(set (match_operand:DI 0 "register_operand" "=cV,cV, e,e,Sg") | |
2786 | (and:DI | |
f4d4a406 | 2787 | (match_operator 1 "gcn_fp_compare_operator" |
03876953 | 2788 | [(vec_duplicate:V_noQI |
3d6275e3 AS |
2789 | (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand" |
2790 | " Sv, B,Sv,B, A")) | |
03876953 | 2791 | (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v,v, v")]) |
3d6275e3 AS |
2792 | (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e,e, e"))) |
2793 | (clobber (match_scratch:DI 5 "= X,X,cV,cV, X"))] | |
2794 | "" | |
2795 | "@ | |
2796 | v_cmp%E1\tvcc, %2, %3 | |
2797 | v_cmp%E1\tvcc, %2, %3 | |
2798 | v_cmpx%E1\tvcc, %2, %3 | |
2799 | v_cmpx%E1\tvcc, %2, %3 | |
2800 | v_cmp%E1\t%0, %2, %3" | |
2801 | [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a") | |
2802 | (set_attr "length" "4,8,4,8,8")]) | |
2803 | ||
2804 | (define_expand "vcond_mask_<mode>di" | |
2805 | [(parallel | |
03876953 AS |
2806 | [(set (match_operand:V_ALL 0 "register_operand" "") |
2807 | (vec_merge:V_ALL | |
2808 | (match_operand:V_ALL 1 "gcn_vop3_operand" "") | |
2809 | (match_operand:V_ALL 2 "gcn_alu_operand" "") | |
2b99bed8 | 2810 | (match_operand:DI 3 "register_operand" ""))) |
1165109b | 2811 | (clobber (scratch:<VnDI>))])] |
3d6275e3 AS |
2812 | "" |
2813 | "") | |
2814 | ||
03876953 AS |
2815 | (define_expand "vcond<V_ALL:mode><V_ALL_ALT:mode>" |
2816 | [(match_operand:V_ALL 0 "register_operand") | |
2817 | (match_operand:V_ALL 1 "gcn_vop3_operand") | |
2818 | (match_operand:V_ALL 2 "gcn_alu_operand") | |
f4d4a406 | 2819 | (match_operator 3 "gcn_fp_compare_operator" |
03876953 AS |
2820 | [(match_operand:V_ALL_ALT 4 "gcn_alu_operand") |
2821 | (match_operand:V_ALL_ALT 5 "gcn_vop3_operand")])] | |
3d6275e3 AS |
2822 | "" |
2823 | { | |
2824 | rtx tmp = gen_reg_rtx (DImode); | |
03876953 | 2825 | emit_insn (gen_vec_cmp<V_ALL_ALT:mode>di |
96eb1765 | 2826 | (tmp, operands[3], operands[4], operands[5])); |
03876953 | 2827 | emit_insn (gen_vcond_mask_<V_ALL:mode>di |
96eb1765 | 2828 | (operands[0], operands[1], operands[2], tmp)); |
3d6275e3 AS |
2829 | DONE; |
2830 | }) | |
2831 | ||
03876953 AS |
2832 | (define_expand "vcond<V_ALL:mode><V_ALL_ALT:mode>_exec" |
2833 | [(match_operand:V_ALL 0 "register_operand") | |
2834 | (match_operand:V_ALL 1 "gcn_vop3_operand") | |
2835 | (match_operand:V_ALL 2 "gcn_alu_operand") | |
f4d4a406 | 2836 | (match_operator 3 "gcn_fp_compare_operator" |
03876953 AS |
2837 | [(match_operand:V_ALL_ALT 4 "gcn_alu_operand") |
2838 | (match_operand:V_ALL_ALT 5 "gcn_vop3_operand")]) | |
3d6275e3 AS |
2839 | (match_operand:DI 6 "gcn_exec_reg_operand" "e")] |
2840 | "" | |
2841 | { | |
2842 | rtx tmp = gen_reg_rtx (DImode); | |
03876953 | 2843 | emit_insn (gen_vec_cmp<V_ALL_ALT:mode>di_exec |
96eb1765 | 2844 | (tmp, operands[3], operands[4], operands[5], operands[6])); |
03876953 | 2845 | emit_insn (gen_vcond_mask_<V_ALL:mode>di |
96eb1765 | 2846 | (operands[0], operands[1], operands[2], tmp)); |
3d6275e3 AS |
2847 | DONE; |
2848 | }) | |
2849 | ||
03876953 AS |
2850 | (define_expand "vcondu<V_ALL:mode><V_INT:mode>" |
2851 | [(match_operand:V_ALL 0 "register_operand") | |
2852 | (match_operand:V_ALL 1 "gcn_vop3_operand") | |
2853 | (match_operand:V_ALL 2 "gcn_alu_operand") | |
f4d4a406 | 2854 | (match_operator 3 "gcn_fp_compare_operator" |
03876953 AS |
2855 | [(match_operand:V_INT 4 "gcn_alu_operand") |
2856 | (match_operand:V_INT 5 "gcn_vop3_operand")])] | |
3d6275e3 AS |
2857 | "" |
2858 | { | |
2859 | rtx tmp = gen_reg_rtx (DImode); | |
03876953 | 2860 | emit_insn (gen_vec_cmpu<V_INT:mode>di |
96eb1765 | 2861 | (tmp, operands[3], operands[4], operands[5])); |
03876953 | 2862 | emit_insn (gen_vcond_mask_<V_ALL:mode>di |
96eb1765 | 2863 | (operands[0], operands[1], operands[2], tmp)); |
3d6275e3 AS |
2864 | DONE; |
2865 | }) | |
2866 | ||
03876953 AS |
2867 | (define_expand "vcondu<V_ALL:mode><V_INT:mode>_exec" |
2868 | [(match_operand:V_ALL 0 "register_operand") | |
2869 | (match_operand:V_ALL 1 "gcn_vop3_operand") | |
2870 | (match_operand:V_ALL 2 "gcn_alu_operand") | |
f4d4a406 | 2871 | (match_operator 3 "gcn_fp_compare_operator" |
03876953 AS |
2872 | [(match_operand:V_INT 4 "gcn_alu_operand") |
2873 | (match_operand:V_INT 5 "gcn_vop3_operand")]) | |
3d6275e3 AS |
2874 | (match_operand:DI 6 "gcn_exec_reg_operand" "e")] |
2875 | "" | |
2876 | { | |
2877 | rtx tmp = gen_reg_rtx (DImode); | |
03876953 | 2878 | emit_insn (gen_vec_cmpu<V_INT:mode>di_exec |
96eb1765 | 2879 | (tmp, operands[3], operands[4], operands[5], operands[6])); |
03876953 | 2880 | emit_insn (gen_vcond_mask_<V_ALL:mode>di |
96eb1765 | 2881 | (operands[0], operands[1], operands[2], tmp)); |
3d6275e3 AS |
2882 | DONE; |
2883 | }) | |
2884 | ||
2885 | ;; }}} | |
2886 | ;; {{{ Fully masked loop support | |
2887 | ||
2888 | (define_expand "while_ultsidi" | |
2889 | [(match_operand:DI 0 "register_operand") | |
2890 | (match_operand:SI 1 "") | |
2891 | (match_operand:SI 2 "")] | |
2892 | "" | |
2893 | { | |
2894 | if (GET_CODE (operands[1]) != CONST_INT | |
2895 | || GET_CODE (operands[2]) != CONST_INT) | |
2896 | { | |
2897 | rtx _0_1_2_3 = gen_rtx_REG (V64SImode, VGPR_REGNO (1)); | |
2898 | rtx tmp = _0_1_2_3; | |
2899 | if (GET_CODE (operands[1]) != CONST_INT | |
2900 | || INTVAL (operands[1]) != 0) | |
2901 | { | |
2902 | tmp = gen_reg_rtx (V64SImode); | |
2903 | emit_insn (gen_addv64si3_dup (tmp, _0_1_2_3, operands[1])); | |
2904 | } | |
2905 | emit_insn (gen_vec_cmpv64sidi_dup (operands[0], | |
2906 | gen_rtx_GT (VOIDmode, 0, 0), | |
2907 | operands[2], tmp)); | |
2908 | } | |
2909 | else | |
2910 | { | |
2911 | HOST_WIDE_INT diff = INTVAL (operands[2]) - INTVAL (operands[1]); | |
2912 | HOST_WIDE_INT mask = (diff >= 64 ? -1 | |
2913 | : ~((unsigned HOST_WIDE_INT)-1 << diff)); | |
2914 | emit_move_insn (operands[0], gen_rtx_CONST_INT (VOIDmode, mask)); | |
2915 | } | |
2916 | DONE; | |
2917 | }) | |
2918 | ||
2919 | (define_expand "maskload<mode>di" | |
03876953 AS |
2920 | [(match_operand:V_ALL 0 "register_operand") |
2921 | (match_operand:V_ALL 1 "memory_operand") | |
3d6275e3 AS |
2922 | (match_operand 2 "")] |
2923 | "" | |
2924 | { | |
2925 | rtx exec = force_reg (DImode, operands[2]); | |
2926 | rtx addr = gcn_expand_scalar_to_vector_address | |
1165109b | 2927 | (<MODE>mode, exec, operands[1], gen_rtx_SCRATCH (<VnDI>mode)); |
3d6275e3 AS |
2928 | rtx as = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1])); |
2929 | rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1])); | |
95607c12 AS |
2930 | |
2931 | /* Masked lanes are required to hold zero. */ | |
2932 | emit_move_insn (operands[0], gcn_vec_constant (<MODE>mode, 0)); | |
2933 | ||
2934 | emit_insn (gen_gather<mode>_expr_exec (operands[0], addr, as, v, | |
2935 | operands[0], exec)); | |
3d6275e3 AS |
2936 | DONE; |
2937 | }) | |
2938 | ||
2939 | (define_expand "maskstore<mode>di" | |
03876953 AS |
2940 | [(match_operand:V_ALL 0 "memory_operand") |
2941 | (match_operand:V_ALL 1 "register_operand") | |
3d6275e3 AS |
2942 | (match_operand 2 "")] |
2943 | "" | |
2944 | { | |
2945 | rtx exec = force_reg (DImode, operands[2]); | |
2946 | rtx addr = gcn_expand_scalar_to_vector_address | |
1165109b | 2947 | (<MODE>mode, exec, operands[0], gen_rtx_SCRATCH (<VnDI>mode)); |
3d6275e3 AS |
2948 | rtx as = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0])); |
2949 | rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0])); | |
2950 | emit_insn (gen_scatter<mode>_expr_exec (addr, operands[1], as, v, exec)); | |
2951 | DONE; | |
2952 | }) | |
2953 | ||
1165109b | 2954 | (define_expand "mask_gather_load<mode><vnsi>" |
03876953 | 2955 | [(match_operand:V_ALL 0 "register_operand") |
3d6275e3 | 2956 | (match_operand:DI 1 "register_operand") |
1165109b | 2957 | (match_operand:<VnSI> 2 "register_operand") |
3d6275e3 AS |
2958 | (match_operand 3 "immediate_operand") |
2959 | (match_operand:SI 4 "gcn_alu_operand") | |
2960 | (match_operand:DI 5 "")] | |
2961 | "" | |
2962 | { | |
2963 | rtx exec = force_reg (DImode, operands[5]); | |
2964 | ||
95607c12 AS |
2965 | rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1], |
2966 | operands[2], operands[4], | |
2967 | INTVAL (operands[3]), exec); | |
2968 | ||
2969 | /* Masked lanes are required to hold zero. */ | |
2970 | emit_move_insn (operands[0], gcn_vec_constant (<MODE>mode, 0)); | |
2971 | ||
1165109b | 2972 | if (GET_MODE (addr) == <VnDI>mode) |
95607c12 AS |
2973 | emit_insn (gen_gather<mode>_insn_1offset_exec (operands[0], addr, |
2974 | const0_rtx, const0_rtx, | |
2975 | const0_rtx, operands[0], | |
2976 | exec)); | |
2977 | else | |
2978 | emit_insn (gen_gather<mode>_insn_2offsets_exec (operands[0], operands[1], | |
2979 | addr, const0_rtx, | |
2980 | const0_rtx, const0_rtx, | |
2981 | operands[0], exec)); | |
3d6275e3 AS |
2982 | DONE; |
2983 | }) | |
2984 | ||
1165109b | 2985 | (define_expand "mask_scatter_store<mode><vnsi>" |
3d6275e3 | 2986 | [(match_operand:DI 0 "register_operand") |
1165109b | 2987 | (match_operand:<VnSI> 1 "register_operand") |
3d6275e3 AS |
2988 | (match_operand 2 "immediate_operand") |
2989 | (match_operand:SI 3 "gcn_alu_operand") | |
03876953 | 2990 | (match_operand:V_ALL 4 "register_operand") |
3d6275e3 AS |
2991 | (match_operand:DI 5 "")] |
2992 | "" | |
2993 | { | |
2994 | rtx exec = force_reg (DImode, operands[5]); | |
2995 | ||
b5fb73b6 AS |
2996 | rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0], |
2997 | operands[1], operands[3], | |
2998 | INTVAL (operands[2]), exec); | |
3d6275e3 | 2999 | |
1165109b | 3000 | if (GET_MODE (addr) == <VnDI>mode) |
b5fb73b6 AS |
3001 | emit_insn (gen_scatter<mode>_insn_1offset_exec (addr, const0_rtx, |
3002 | operands[4], const0_rtx, | |
3003 | const0_rtx, | |
3004 | exec)); | |
3005 | else | |
3006 | emit_insn (gen_scatter<mode>_insn_2offsets_exec (operands[0], addr, | |
3007 | const0_rtx, operands[4], | |
3008 | const0_rtx, const0_rtx, | |
3009 | exec)); | |
3d6275e3 AS |
3010 | DONE; |
3011 | }) | |
3012 | ||
5a80a6c3 | 3013 | (define_code_iterator cond_op [plus minus mult]) |
3d6275e3 AS |
3014 | |
3015 | (define_expand "cond_<expander><mode>" | |
03876953 | 3016 | [(match_operand:V_ALL 0 "register_operand") |
3d6275e3 | 3017 | (match_operand:DI 1 "register_operand") |
03876953 AS |
3018 | (cond_op:V_ALL |
3019 | (match_operand:V_ALL 2 "gcn_alu_operand") | |
3020 | (match_operand:V_ALL 3 "gcn_alu_operand")) | |
3021 | (match_operand:V_ALL 4 "register_operand")] | |
3d6275e3 AS |
3022 | "" |
3023 | { | |
3024 | operands[1] = force_reg (DImode, operands[1]); | |
3025 | operands[2] = force_reg (<MODE>mode, operands[2]); | |
3026 | ||
3027 | emit_insn (gen_<expander><mode>3_exec (operands[0], operands[2], | |
3028 | operands[3], operands[4], | |
3029 | operands[1])); | |
3030 | DONE; | |
3031 | }) | |
3032 | ||
5a80a6c3 | 3033 | ;; TODO smin umin smax umax |
3d6275e3 AS |
3034 | (define_code_iterator cond_bitop [and ior xor]) |
3035 | ||
3036 | (define_expand "cond_<expander><mode>" | |
03876953 | 3037 | [(match_operand:V_INT 0 "register_operand") |
3d6275e3 | 3038 | (match_operand:DI 1 "register_operand") |
03876953 AS |
3039 | (cond_bitop:V_INT |
3040 | (match_operand:V_INT 2 "gcn_alu_operand") | |
3041 | (match_operand:V_INT 3 "gcn_alu_operand")) | |
3042 | (match_operand:V_INT 4 "register_operand")] | |
3d6275e3 AS |
3043 | "" |
3044 | { | |
3045 | operands[1] = force_reg (DImode, operands[1]); | |
3046 | operands[2] = force_reg (<MODE>mode, operands[2]); | |
3047 | ||
3048 | emit_insn (gen_<expander><mode>3_exec (operands[0], operands[2], | |
3049 | operands[3], operands[4], | |
3050 | operands[1])); | |
3051 | DONE; | |
3052 | }) | |
3053 | ||
3054 | ;; }}} | |
3055 | ;; {{{ Vector reductions | |
3056 | ||
3057 | (define_int_iterator REDUC_UNSPEC [UNSPEC_SMIN_DPP_SHR UNSPEC_SMAX_DPP_SHR | |
3058 | UNSPEC_UMIN_DPP_SHR UNSPEC_UMAX_DPP_SHR | |
3059 | UNSPEC_PLUS_DPP_SHR | |
3060 | UNSPEC_AND_DPP_SHR | |
3061 | UNSPEC_IOR_DPP_SHR UNSPEC_XOR_DPP_SHR]) | |
3062 | ||
3063 | (define_int_iterator REDUC_2REG_UNSPEC [UNSPEC_PLUS_DPP_SHR | |
3064 | UNSPEC_AND_DPP_SHR | |
3065 | UNSPEC_IOR_DPP_SHR UNSPEC_XOR_DPP_SHR]) | |
3066 | ||
3067 | ; FIXME: Isn't there a better way of doing this? | |
3068 | (define_int_attr reduc_unspec [(UNSPEC_SMIN_DPP_SHR "UNSPEC_SMIN_DPP_SHR") | |
3069 | (UNSPEC_SMAX_DPP_SHR "UNSPEC_SMAX_DPP_SHR") | |
3070 | (UNSPEC_UMIN_DPP_SHR "UNSPEC_UMIN_DPP_SHR") | |
3071 | (UNSPEC_UMAX_DPP_SHR "UNSPEC_UMAX_DPP_SHR") | |
3072 | (UNSPEC_PLUS_DPP_SHR "UNSPEC_PLUS_DPP_SHR") | |
3073 | (UNSPEC_AND_DPP_SHR "UNSPEC_AND_DPP_SHR") | |
3074 | (UNSPEC_IOR_DPP_SHR "UNSPEC_IOR_DPP_SHR") | |
3075 | (UNSPEC_XOR_DPP_SHR "UNSPEC_XOR_DPP_SHR")]) | |
3076 | ||
3077 | (define_int_attr reduc_op [(UNSPEC_SMIN_DPP_SHR "smin") | |
3078 | (UNSPEC_SMAX_DPP_SHR "smax") | |
3079 | (UNSPEC_UMIN_DPP_SHR "umin") | |
3080 | (UNSPEC_UMAX_DPP_SHR "umax") | |
3081 | (UNSPEC_PLUS_DPP_SHR "plus") | |
3082 | (UNSPEC_AND_DPP_SHR "and") | |
3083 | (UNSPEC_IOR_DPP_SHR "ior") | |
3084 | (UNSPEC_XOR_DPP_SHR "xor")]) | |
3085 | ||
3086 | (define_int_attr reduc_insn [(UNSPEC_SMIN_DPP_SHR "v_min%i0") | |
3087 | (UNSPEC_SMAX_DPP_SHR "v_max%i0") | |
3088 | (UNSPEC_UMIN_DPP_SHR "v_min%u0") | |
3089 | (UNSPEC_UMAX_DPP_SHR "v_max%u0") | |
a5879399 AS |
3090 | (UNSPEC_PLUS_DPP_SHR "v_add%U0") |
3091 | (UNSPEC_AND_DPP_SHR "v_and%B0") | |
3092 | (UNSPEC_IOR_DPP_SHR "v_or%B0") | |
3093 | (UNSPEC_XOR_DPP_SHR "v_xor%B0")]) | |
3d6275e3 AS |
3094 | |
3095 | (define_expand "reduc_<reduc_op>_scal_<mode>" | |
3096 | [(set (match_operand:<SCALAR_MODE> 0 "register_operand") | |
3097 | (unspec:<SCALAR_MODE> | |
03876953 | 3098 | [(match_operand:V_ALL 1 "register_operand")] |
3d6275e3 AS |
3099 | REDUC_UNSPEC))] |
3100 | "" | |
3101 | { | |
3102 | rtx tmp = gcn_expand_reduc_scalar (<MODE>mode, operands[1], | |
3103 | <reduc_unspec>); | |
3104 | ||
3105 | /* The result of the reduction is in lane 63 of tmp. */ | |
3106 | emit_insn (gen_mov_from_lane63_<mode> (operands[0], tmp)); | |
3107 | ||
3108 | DONE; | |
3109 | }) | |
3110 | ||
bf628a97 AS |
3111 | ;; Warning: This "-ffast-math" implementation converts in-order reductions |
3112 | ;; into associative reductions. It's also used where OpenMP or | |
3113 | ;; OpenACC paralellization has already broken the in-order semantics. | |
3114 | (define_expand "fold_left_plus_<mode>" | |
3115 | [(match_operand:<SCALAR_MODE> 0 "register_operand") | |
3116 | (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand") | |
3117 | (match_operand:V_FP 2 "gcn_alu_operand")] | |
3118 | "can_create_pseudo_p () | |
3119 | && (flag_openacc || flag_openmp | |
3120 | || flag_associative_math)" | |
3121 | { | |
3122 | rtx dest = operands[0]; | |
3123 | rtx scalar = operands[1]; | |
3124 | rtx vector = operands[2]; | |
3125 | rtx tmp = gen_reg_rtx (<SCALAR_MODE>mode); | |
3126 | ||
3127 | emit_insn (gen_reduc_plus_scal_<mode> (tmp, vector)); | |
3128 | emit_insn (gen_add<scalar_mode>3 (dest, scalar, tmp)); | |
3129 | DONE; | |
3130 | }) | |
3d6275e3 AS |
3131 | |
3132 | (define_insn "*<reduc_op>_dpp_shr_<mode>" | |
03876953 AS |
3133 | [(set (match_operand:V_1REG 0 "register_operand" "=v") |
3134 | (unspec:V_1REG | |
3135 | [(match_operand:V_1REG 1 "register_operand" "v") | |
3136 | (match_operand:V_1REG 2 "register_operand" "v") | |
3137 | (match_operand:SI 3 "const_int_operand" "n")] | |
3d6275e3 | 3138 | REDUC_UNSPEC))] |
a5879399 | 3139 | ; GCN3 requires a carry out, GCN5 not |
3d6275e3 AS |
3140 | "!(TARGET_GCN3 && SCALAR_INT_MODE_P (<SCALAR_MODE>mode) |
3141 | && <reduc_unspec> == UNSPEC_PLUS_DPP_SHR)" | |
3142 | { | |
3143 | return gcn_expand_dpp_shr_insn (<MODE>mode, "<reduc_insn>", | |
3144 | <reduc_unspec>, INTVAL (operands[3])); | |
3145 | } | |
3146 | [(set_attr "type" "vop_dpp") | |
3147 | (set_attr "length" "8")]) | |
3148 | ||
1165109b AS |
3149 | (define_insn_and_split "*<reduc_op>_dpp_shr_<mode>" |
3150 | [(set (match_operand:V_DI 0 "register_operand" "=v") | |
3151 | (unspec:V_DI | |
3152 | [(match_operand:V_DI 1 "register_operand" "v") | |
3153 | (match_operand:V_DI 2 "register_operand" "v") | |
3154 | (match_operand:SI 3 "const_int_operand" "n")] | |
3d6275e3 AS |
3155 | REDUC_2REG_UNSPEC))] |
3156 | "" | |
3157 | "#" | |
3158 | "reload_completed" | |
3159 | [(set (match_dup 4) | |
1165109b | 3160 | (unspec:<VnSI> |
3d6275e3 AS |
3161 | [(match_dup 6) (match_dup 8) (match_dup 3)] REDUC_2REG_UNSPEC)) |
3162 | (set (match_dup 5) | |
1165109b | 3163 | (unspec:<VnSI> |
3d6275e3 AS |
3164 | [(match_dup 7) (match_dup 9) (match_dup 3)] REDUC_2REG_UNSPEC))] |
3165 | { | |
1165109b AS |
3166 | operands[4] = gcn_operand_part (<MODE>mode, operands[0], 0); |
3167 | operands[5] = gcn_operand_part (<MODE>mode, operands[0], 1); | |
3168 | operands[6] = gcn_operand_part (<MODE>mode, operands[1], 0); | |
3169 | operands[7] = gcn_operand_part (<MODE>mode, operands[1], 1); | |
3170 | operands[8] = gcn_operand_part (<MODE>mode, operands[2], 0); | |
3171 | operands[9] = gcn_operand_part (<MODE>mode, operands[2], 1); | |
3d6275e3 AS |
3172 | } |
3173 | [(set_attr "type" "vmult") | |
3174 | (set_attr "length" "16")]) | |
3175 | ||
3176 | ; Special cases for addition. | |
3177 | ||
a5879399 | 3178 | (define_insn "*plus_carry_dpp_shr_<mode>" |
03876953 AS |
3179 | [(set (match_operand:V_INT_1REG 0 "register_operand" "=v") |
3180 | (unspec:V_INT_1REG | |
3181 | [(match_operand:V_INT_1REG 1 "register_operand" "v") | |
3182 | (match_operand:V_INT_1REG 2 "register_operand" "v") | |
3183 | (match_operand:SI 3 "const_int_operand" "n")] | |
3d6275e3 AS |
3184 | UNSPEC_PLUS_CARRY_DPP_SHR)) |
3185 | (clobber (reg:DI VCC_REG))] | |
3186 | "" | |
3187 | { | |
1165109b | 3188 | return gcn_expand_dpp_shr_insn (<VnSI>mode, "v_add%^_u32", |
3d6275e3 AS |
3189 | UNSPEC_PLUS_CARRY_DPP_SHR, |
3190 | INTVAL (operands[3])); | |
3191 | } | |
3192 | [(set_attr "type" "vop_dpp") | |
3193 | (set_attr "length" "8")]) | |
3194 | ||
1165109b AS |
3195 | (define_insn "*plus_carry_in_dpp_shr_<mode>" |
3196 | [(set (match_operand:V_SI 0 "register_operand" "=v") | |
3197 | (unspec:V_SI | |
3198 | [(match_operand:V_SI 1 "register_operand" "v") | |
3199 | (match_operand:V_SI 2 "register_operand" "v") | |
3200 | (match_operand:SI 3 "const_int_operand" "n") | |
3d6275e3 AS |
3201 | (match_operand:DI 4 "register_operand" "cV")] |
3202 | UNSPEC_PLUS_CARRY_IN_DPP_SHR)) | |
3203 | (clobber (reg:DI VCC_REG))] | |
3204 | "" | |
3205 | { | |
1165109b | 3206 | return gcn_expand_dpp_shr_insn (<MODE>mode, "v_addc%^_u32", |
3d6275e3 AS |
3207 | UNSPEC_PLUS_CARRY_IN_DPP_SHR, |
3208 | INTVAL (operands[3])); | |
3209 | } | |
3210 | [(set_attr "type" "vop_dpp") | |
3211 | (set_attr "length" "8")]) | |
3212 | ||
1165109b AS |
3213 | (define_insn_and_split "*plus_carry_dpp_shr_<mode>" |
3214 | [(set (match_operand:V_DI 0 "register_operand" "=v") | |
3215 | (unspec:V_DI | |
3216 | [(match_operand:V_DI 1 "register_operand" "v") | |
3217 | (match_operand:V_DI 2 "register_operand" "v") | |
3218 | (match_operand:SI 3 "const_int_operand" "n")] | |
3d6275e3 AS |
3219 | UNSPEC_PLUS_CARRY_DPP_SHR)) |
3220 | (clobber (reg:DI VCC_REG))] | |
3221 | "" | |
3222 | "#" | |
3223 | "reload_completed" | |
3224 | [(parallel [(set (match_dup 4) | |
1165109b | 3225 | (unspec:<VnSI> |
3d6275e3 AS |
3226 | [(match_dup 6) (match_dup 8) (match_dup 3)] |
3227 | UNSPEC_PLUS_CARRY_DPP_SHR)) | |
3228 | (clobber (reg:DI VCC_REG))]) | |
3229 | (parallel [(set (match_dup 5) | |
1165109b | 3230 | (unspec:<VnSI> |
3d6275e3 AS |
3231 | [(match_dup 7) (match_dup 9) (match_dup 3) (reg:DI VCC_REG)] |
3232 | UNSPEC_PLUS_CARRY_IN_DPP_SHR)) | |
3233 | (clobber (reg:DI VCC_REG))])] | |
3234 | { | |
1165109b AS |
3235 | operands[4] = gcn_operand_part (<MODE>mode, operands[0], 0); |
3236 | operands[5] = gcn_operand_part (<MODE>mode, operands[0], 1); | |
3237 | operands[6] = gcn_operand_part (<MODE>mode, operands[1], 0); | |
3238 | operands[7] = gcn_operand_part (<MODE>mode, operands[1], 1); | |
3239 | operands[8] = gcn_operand_part (<MODE>mode, operands[2], 0); | |
3240 | operands[9] = gcn_operand_part (<MODE>mode, operands[2], 1); | |
3d6275e3 AS |
3241 | } |
3242 | [(set_attr "type" "vmult") | |
3243 | (set_attr "length" "16")]) | |
3244 | ||
3245 | ; Instructions to move a scalar value from lane 63 of a vector register. | |
3246 | (define_insn "mov_from_lane63_<mode>" | |
03876953 | 3247 | [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg,v") |
3d6275e3 | 3248 | (unspec:<SCALAR_MODE> |
03876953 | 3249 | [(match_operand:V_1REG 1 "register_operand" " v,v")] |
3d6275e3 AS |
3250 | UNSPEC_MOV_FROM_LANE63))] |
3251 | "" | |
3252 | "@ | |
3253 | v_readlane_b32\t%0, %1, 63 | |
3254 | v_mov_b32\t%0, %1 wave_ror:1" | |
3255 | [(set_attr "type" "vop3a,vop_dpp") | |
3256 | (set_attr "exec" "none,*") | |
3257 | (set_attr "length" "8")]) | |
3258 | ||
a5879399 | 3259 | (define_insn "mov_from_lane63_<mode>" |
03876953 | 3260 | [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg,v") |
a5879399 | 3261 | (unspec:<SCALAR_MODE> |
03876953 | 3262 | [(match_operand:V_2REG 1 "register_operand" " v,v")] |
3d6275e3 AS |
3263 | UNSPEC_MOV_FROM_LANE63))] |
3264 | "" | |
3265 | "@ | |
3266 | v_readlane_b32\t%L0, %L1, 63\;v_readlane_b32\t%H0, %H1, 63 | |
3267 | * if (REGNO (operands[0]) <= REGNO (operands[1])) \ | |
3268 | return \"v_mov_b32\t%L0, %L1 wave_ror:1\;\" \ | |
3269 | \"v_mov_b32\t%H0, %H1 wave_ror:1\"; \ | |
3270 | else \ | |
3271 | return \"v_mov_b32\t%H0, %H1 wave_ror:1\;\" \ | |
3272 | \"v_mov_b32\t%L0, %L1 wave_ror:1\";" | |
3273 | [(set_attr "type" "vop3a,vop_dpp") | |
3274 | (set_attr "exec" "none,*") | |
3275 | (set_attr "length" "8")]) | |
3276 | ||
3277 | ;; }}} | |
3278 | ;; {{{ Miscellaneous | |
3279 | ||
1165109b AS |
3280 | (define_expand "vec_series<mode>" |
3281 | [(match_operand:V_SI 0 "register_operand") | |
3d6275e3 AS |
3282 | (match_operand:SI 1 "gcn_alu_operand") |
3283 | (match_operand:SI 2 "gcn_alu_operand")] | |
3284 | "" | |
3285 | { | |
1165109b AS |
3286 | rtx tmp = gen_reg_rtx (<MODE>mode); |
3287 | rtx v1 = gen_rtx_REG (<MODE>mode, VGPR_REGNO (1)); | |
3d6275e3 | 3288 | |
1165109b AS |
3289 | emit_insn (gen_mul<mode>3_dup (tmp, v1, operands[2])); |
3290 | emit_insn (gen_add<mode>3_dup (operands[0], tmp, operands[1])); | |
3d6275e3 AS |
3291 | DONE; |
3292 | }) | |
3293 | ||
1165109b AS |
3294 | (define_expand "vec_series<mode>" |
3295 | [(match_operand:V_DI 0 "register_operand") | |
3d6275e3 AS |
3296 | (match_operand:DI 1 "gcn_alu_operand") |
3297 | (match_operand:DI 2 "gcn_alu_operand")] | |
3298 | "" | |
3299 | { | |
1165109b AS |
3300 | rtx tmp = gen_reg_rtx (<MODE>mode); |
3301 | rtx v1 = gen_rtx_REG (<VnSI>mode, VGPR_REGNO (1)); | |
3302 | rtx op1vec = gen_reg_rtx (<MODE>mode); | |
3d6275e3 | 3303 | |
1165109b AS |
3304 | emit_insn (gen_mul<mode>3_zext_dup2 (tmp, v1, operands[2])); |
3305 | emit_insn (gen_vec_duplicate<mode> (op1vec, operands[1])); | |
3306 | emit_insn (gen_add<mode>3 (operands[0], tmp, op1vec)); | |
3d6275e3 AS |
3307 | DONE; |
3308 | }) | |
3309 | ||
3310 | ;; }}} |