]>
Commit | Line | Data |
---|---|---|
7adcbafe | 1 | ;; Copyright (C) 2016-2022 Free Software Foundation, Inc. |
3d6275e3 AS |
2 | |
3 | ;; This file is free software; you can redistribute it and/or modify it under | |
4 | ;; the terms of the GNU General Public License as published by the Free | |
5 | ;; Software Foundation; either version 3 of the License, or (at your option) | |
6 | ;; any later version. | |
7 | ||
8 | ;; This file is distributed in the hope that it will be useful, but WITHOUT | |
9 | ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
10 | ;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
11 | ;; for more details. | |
12 | ||
13 | ;; You should have received a copy of the GNU General Public License | |
14 | ;; along with GCC; see the file COPYING3. If not see | |
15 | ;; <http://www.gnu.org/licenses/>. | |
16 | ||
17 | ;;- See file "rtl.def" for documentation on define_insn, match_*, et. al. | |
18 | ||
19 | (include "predicates.md") | |
20 | (include "constraints.md") | |
21 | ||
22 | ;; {{{ Constants and enums | |
23 | ||
24 | ; Named registers | |
25 | (define_constants | |
26 | [(FIRST_SGPR_REG 0) | |
c2709ec4 | 27 | (CC_SAVE_REG 22) |
3d6275e3 AS |
28 | (LAST_SGPR_REG 101) |
29 | (FLAT_SCRATCH_REG 102) | |
30 | (FLAT_SCRATCH_LO_REG 102) | |
31 | (FLAT_SCRATCH_HI_REG 103) | |
32 | (XNACK_MASK_REG 104) | |
33 | (XNACK_MASK_LO_REG 104) | |
34 | (XNACK_MASK_HI_REG 105) | |
35 | (VCC_REG 106) | |
36 | (VCC_LO_REG 106) | |
37 | (VCC_HI_REG 107) | |
38 | (VCCZ_REG 108) | |
39 | (TBA_REG 109) | |
40 | (TBA_LO_REG 109) | |
41 | (TBA_HI_REG 110) | |
42 | (TMA_REG 111) | |
43 | (TMA_LO_REG 111) | |
44 | (TMA_HI_REG 112) | |
45 | (TTMP0_REG 113) | |
46 | (TTMP11_REG 124) | |
47 | (M0_REG 125) | |
48 | (EXEC_REG 126) | |
49 | (EXEC_LO_REG 126) | |
50 | (EXEC_HI_REG 127) | |
51 | (EXECZ_REG 128) | |
52 | (SCC_REG 129) | |
53 | (FIRST_VGPR_REG 160) | |
54 | (LAST_VGPR_REG 415)]) | |
55 | ||
56 | (define_constants | |
57 | [(SP_REGNUM 16) | |
58 | (LR_REGNUM 18) | |
59 | (AP_REGNUM 416) | |
60 | (FP_REGNUM 418)]) | |
61 | ||
62 | (define_c_enum "unspecv" [ | |
63 | UNSPECV_PROLOGUE_USE | |
64 | UNSPECV_KERNEL_RETURN | |
65 | UNSPECV_BARRIER | |
66 | UNSPECV_ATOMIC | |
67 | UNSPECV_ICACHE_INV]) | |
68 | ||
69 | (define_c_enum "unspec" [ | |
76136f7f | 70 | UNSPEC_ADDPTR |
3d6275e3 AS |
71 | UNSPEC_VECTOR |
72 | UNSPEC_BPERMUTE | |
73 | UNSPEC_SGPRBASE | |
74 | UNSPEC_MEMORY_BARRIER | |
75 | UNSPEC_SMIN_DPP_SHR UNSPEC_SMAX_DPP_SHR | |
76 | UNSPEC_UMIN_DPP_SHR UNSPEC_UMAX_DPP_SHR | |
77 | UNSPEC_PLUS_DPP_SHR | |
78 | UNSPEC_PLUS_CARRY_DPP_SHR UNSPEC_PLUS_CARRY_IN_DPP_SHR | |
79 | UNSPEC_AND_DPP_SHR UNSPEC_IOR_DPP_SHR UNSPEC_XOR_DPP_SHR | |
a5879399 | 80 | UNSPEC_MOV_DPP_SHR |
3d6275e3 AS |
81 | UNSPEC_MOV_FROM_LANE63 |
82 | UNSPEC_GATHER | |
c8812bac | 83 | UNSPEC_SCATTER |
0c06e46a JB |
84 | UNSPEC_RCP |
85 | UNSPEC_FLBIT_INT]) | |
3d6275e3 AS |
86 | |
87 | ;; }}} | |
88 | ;; {{{ Attributes | |
89 | ||
90 | ; Instruction type (encoding) as described in the ISA specification. | |
91 | ; The following table summarizes possible operands of individual instruction | |
92 | ; types and corresponding constraints. | |
93 | ; | |
94 | ; sop2 - scalar, two inputs, one output | |
95 | ; ssrc0/ssrc1: sgpr 0-102; flat_scratch,xnack,vcc,tba,tma,ttmp0-11,exec | |
96 | ; vccz,execz,scc,inline immedate,fp inline immediate | |
97 | ; sdst: sgpr 0-102; flat_scratch,xnack,vcc,tba,tma,ttmp0-11,exec | |
98 | ; | |
99 | ; Constraints "=SD, SD", "SSA,SSB","SSB,SSA" | |
100 | ; | |
101 | ; sopk - scalar, inline constant input, one output | |
102 | ; simm16: 16bit inline constant | |
103 | ; sdst: same as sop2/ssrc0 | |
104 | ; | |
105 | ; Constraints "=SD", "J" | |
106 | ; | |
107 | ; sop1 - scalar, one input, one output | |
108 | ; ssrc0: same as sop2/ssrc0. FIXME: manual omit VCCZ | |
109 | ; sdst: same as sop2/sdst | |
110 | ; | |
111 | ; Constraints "=SD", "SSA" | |
112 | ; | |
113 | ; sopc - scalar, two inputs, one comparsion | |
114 | ; ssrc0: same as sop2/ssc0. | |
115 | ; | |
116 | ; Constraints "SSI,SSA","SSA,SSI" | |
117 | ; | |
118 | ; sopp - scalar, one constant input, one special | |
119 | ; simm16 | |
120 | ; | |
121 | ; smem - scalar memory | |
122 | ; sbase: aligned pair of sgprs. Specify {size[15:0], base[47:0]} in | |
123 | ; dwords | |
124 | ; sdata: sgpr0-102, flat_scratch, xnack, vcc, tba, tma | |
125 | ; offset: sgpr or 20bit unsigned byte offset | |
126 | ; | |
127 | ; vop2 - vector, two inputs, one output | |
128 | ; vsrc0: sgpr0-102,flat_scratch,xnack,vcc,tba,ttmp0-11,m0,exec, | |
129 | ; inline constant -16 to -64, fp inline immediate, vccz, execz, | |
130 | ; scc, lds, literal constant, vgpr0-255 | |
131 | ; vsrc1: vgpr0-255 | |
132 | ; vdst: vgpr0-255 | |
133 | ; Limitations: At most one SGPR, at most one constant | |
134 | ; if constant is used, SGPR must be M0 | |
135 | ; Only SRC0 can be LDS_DIRECT | |
136 | ; | |
137 | ; constraints: "=v", "vBSv", "v" | |
138 | ; | |
139 | ; vop1 - vector, one input, one output | |
140 | ; vsrc0: same as vop2/src0 | |
141 | ; vdst: vgpr0-255 | |
142 | ; | |
143 | ; constraints: "=v", "vBSv" | |
144 | ; | |
145 | ; vopc - vector, two inputs, one comparsion output; | |
146 | ; vsrc0: same as vop2/src0 | |
147 | ; vsrc1: vgpr0-255 | |
148 | ; vdst: | |
149 | ; | |
150 | ; constraints: "vASv", "v" | |
151 | ; | |
152 | ; vop3a - vector, three inputs, one output | |
153 | ; vdst: vgpr0-255, for v_cmp sgpr or vcc | |
154 | ; abs,clamp | |
155 | ; vsrc0: sgpr0-102,vcc,tba,ttmp0-11,m0,exec, | |
156 | ; inline constant -16 to -64, fp inline immediate, vccz, execz, | |
157 | ; scc, lds_direct | |
158 | ; FIXME: really missing 1/pi? really 104 SGPRs | |
159 | ; | |
160 | ; vop3b - vector, three inputs, one vector output, one scalar output | |
161 | ; vsrc0,vsrc1,vsrc2: same as vop3a vsrc0 | |
162 | ; vdst: vgpr0-255 | |
163 | ; sdst: sgpr0-103/vcc/tba/tma/ttmp0-11 | |
164 | ; | |
165 | ; vop_sdwa - second dword for vop1/vop2/vopc for specifying sub-dword address | |
166 | ; src0: vgpr0-255 | |
167 | ; dst_sel: BYTE_0-3, WORD_0-1, DWORD | |
168 | ; dst_unused: UNUSED_PAD, UNUSED_SEXT, UNUSED_PRESERVE | |
169 | ; clamp: true/false | |
170 | ; src0_sel: BYTE_0-3, WORD_0-1, DWORD | |
171 | ; flags: src0_sext, src0_neg, src0_abs, src1_sel, src1_sext, src1_neg, | |
172 | ; src1_abs | |
173 | ; | |
174 | ; vop_dpp - second dword for vop1/vop2/vopc for specifying data-parallel ops | |
175 | ; src0: vgpr0-255 | |
176 | ; dpp_ctrl: quad_perm, row_sl0-15, row_sr0-15, row_rr0-15, wf_sl1, | |
177 | ; wf_rl1, wf_sr1, wf_rr1, row_mirror, row_half_mirror, | |
178 | ; bcast15, bcast31 | |
179 | ; flags: src0_neg, src0_abs, src1_neg, src1_abs | |
180 | ; bank_mask: 4-bit mask | |
181 | ; row_mask: 4-bit mask | |
182 | ; | |
183 | ; ds - Local and global data share instructions. | |
184 | ; offset0: 8-bit constant | |
185 | ; offset1: 8-bit constant | |
186 | ; flag: gds | |
187 | ; addr: vgpr0-255 | |
188 | ; data0: vgpr0-255 | |
189 | ; data1: vgpr0-255 | |
190 | ; vdst: vgpr0-255 | |
191 | ; | |
192 | ; mubuf - Untyped memory buffer operation. First word with LDS, second word | |
193 | ; non-LDS. | |
194 | ; offset: 12-bit constant | |
195 | ; vaddr: vgpr0-255 | |
196 | ; vdata: vgpr0-255 | |
197 | ; srsrc: sgpr0-102 | |
198 | ; soffset: sgpr0-102 | |
199 | ; flags: offen, idxen, glc, lds, slc, tfe | |
200 | ; | |
201 | ; mtbuf - Typed memory buffer operation. Two words | |
202 | ; offset: 12-bit constant | |
203 | ; dfmt: 4-bit constant | |
204 | ; nfmt: 3-bit constant | |
205 | ; vaddr: vgpr0-255 | |
206 | ; vdata: vgpr0-255 | |
207 | ; srsrc: sgpr0-102 | |
208 | ; soffset: sgpr0-102 | |
209 | ; flags: offen, idxen, glc, lds, slc, tfe | |
210 | ; | |
211 | ; flat - flat or global memory operations | |
212 | ; flags: glc, slc | |
213 | ; addr: vgpr0-255 | |
214 | ; data: vgpr0-255 | |
215 | ; vdst: vgpr0-255 | |
216 | ; | |
217 | ; mult - expands to multiple instructions (pseudo encoding) | |
218 | ; | |
219 | ; vmult - as mult, when a vector instruction is used. | |
220 | ||
221 | (define_attr "type" | |
222 | "unknown,sop1,sop2,sopk,sopc,sopp,smem,ds,vop2,vop1,vopc, | |
223 | vop3a,vop3b,vop_sdwa,vop_dpp,mubuf,mtbuf,flat,mult,vmult" | |
224 | (const_string "unknown")) | |
225 | ||
226 | ; Set if instruction is executed in scalar or vector unit | |
227 | ||
228 | (define_attr "unit" "unknown,scalar,vector" | |
229 | (cond [(eq_attr "type" "sop1,sop2,sopk,sopc,sopp,smem,mult") | |
230 | (const_string "scalar") | |
231 | (eq_attr "type" "vop2,vop1,vopc,vop3a,vop3b,ds, | |
232 | vop_sdwa,vop_dpp,flat,vmult") | |
233 | (const_string "vector")] | |
234 | (const_string "unknown"))) | |
235 | ||
236 | ; All vector instructions run as 64 threads as predicated by the EXEC | |
237 | ; register. Scalar operations in vector register require a single lane | |
238 | ; enabled, vector moves require a full set of lanes enabled, and most vector | |
239 | ; operations handle the lane masking themselves. | |
240 | ; The md_reorg pass is responsible for ensuring that EXEC is set appropriately | |
241 | ; according to the following settings: | |
242 | ; auto - md_reorg will inspect def/use to determine what to do. | |
243 | ; none - exec is not needed. | |
244 | ; single - disable all but lane zero. | |
245 | ; full - enable all lanes. | |
246 | ||
247 | (define_attr "exec" "auto,none,single,full" | |
248 | (const_string "auto")) | |
249 | ||
250 | ; Infer the (worst-case) length from the instruction type by default. Many | |
251 | ; types can have an optional immediate word following, which we include here. | |
252 | ; "Multiple" types are counted as two 64-bit instructions. This is just a | |
253 | ; default fallback: it can be overridden per-alternative in insn patterns for | |
254 | ; greater accuracy. | |
255 | ||
256 | (define_attr "length" "" | |
257 | (cond [(eq_attr "type" "sop1") (const_int 8) | |
258 | (eq_attr "type" "sop2") (const_int 8) | |
259 | (eq_attr "type" "sopk") (const_int 8) | |
260 | (eq_attr "type" "sopc") (const_int 8) | |
261 | (eq_attr "type" "sopp") (const_int 4) | |
262 | (eq_attr "type" "smem") (const_int 8) | |
263 | (eq_attr "type" "ds") (const_int 8) | |
264 | (eq_attr "type" "vop1") (const_int 8) | |
265 | (eq_attr "type" "vop2") (const_int 8) | |
266 | (eq_attr "type" "vopc") (const_int 8) | |
267 | (eq_attr "type" "vop3a") (const_int 8) | |
268 | (eq_attr "type" "vop3b") (const_int 8) | |
269 | (eq_attr "type" "vop_sdwa") (const_int 8) | |
270 | (eq_attr "type" "vop_dpp") (const_int 8) | |
271 | (eq_attr "type" "flat") (const_int 8) | |
272 | (eq_attr "type" "mult") (const_int 16) | |
273 | (eq_attr "type" "vmult") (const_int 16)] | |
274 | (const_int 4))) | |
275 | ||
276 | ; Disable alternatives that only apply to specific ISA variants. | |
277 | ||
278 | (define_attr "gcn_version" "gcn3,gcn5" (const_string "gcn3")) | |
279 | ||
280 | (define_attr "enabled" "" | |
281 | (cond [(eq_attr "gcn_version" "gcn3") (const_int 1) | |
282 | (and (eq_attr "gcn_version" "gcn5") | |
283 | (ne (symbol_ref "TARGET_GCN5_PLUS") (const_int 0))) | |
284 | (const_int 1)] | |
285 | (const_int 0))) | |
286 | ||
287 | ; We need to be able to identify v_readlane and v_writelane with | |
288 | ; SGPR lane selection in order to handle "Manually Inserted Wait States". | |
289 | ||
290 | (define_attr "laneselect" "yes,no" (const_string "no")) | |
291 | ||
930c5599 AS |
292 | ; Identify instructions that require a "Manually Inserted Wait State" if |
293 | ; their inputs are overwritten by subsequent instructions. | |
294 | ||
295 | (define_attr "delayeduse" "yes,no" (const_string "no")) | |
296 | ||
3d6275e3 AS |
297 | ;; }}} |
298 | ;; {{{ Iterators useful across the wole machine description | |
299 | ||
300 | (define_mode_iterator SIDI [SI DI]) | |
301 | (define_mode_iterator SFDF [SF DF]) | |
302 | (define_mode_iterator SISF [SI SF]) | |
303 | (define_mode_iterator QIHI [QI HI]) | |
304 | (define_mode_iterator DIDF [DI DF]) | |
03876953 AS |
305 | (define_mode_iterator FP [HF SF DF]) |
306 | (define_mode_iterator FP_1REG [HF SF]) | |
3d6275e3 AS |
307 | |
308 | ;; }}} | |
309 | ;; {{{ Attributes. | |
310 | ||
311 | ; Translate RTX code into GCN instruction mnemonics with and without | |
312 | ; suffixes such as _b32, etc. | |
313 | ||
314 | (define_code_attr mnemonic | |
315 | [(minus "sub%i") | |
316 | (plus "add%i") | |
317 | (ashift "lshl%b") | |
318 | (lshiftrt "lshr%b") | |
319 | (ashiftrt "ashr%i") | |
320 | (and "and%B") | |
321 | (ior "or%B") | |
322 | (xor "xor%B") | |
323 | (mult "mul%i") | |
324 | (smin "min%i") | |
325 | (smax "max%i") | |
326 | (umin "min%u") | |
327 | (umax "max%u") | |
dc941ea9 | 328 | (not "not%B") |
3d6275e3 AS |
329 | (popcount "bcnt_u32%b")]) |
330 | ||
331 | (define_code_attr bare_mnemonic | |
332 | [(plus "add") | |
333 | (minus "sub") | |
334 | (and "and") | |
335 | (ior "or") | |
336 | (xor "xor")]) | |
337 | ||
338 | (define_code_attr s_mnemonic | |
339 | [(not "not%b") | |
34bac264 AS |
340 | (popcount "bcnt1_i32%b") |
341 | (clz "flbit_i32%b") | |
0c06e46a JB |
342 | (ctz "ff1_i32%b") |
343 | (clrsb "flbit_i32%i")]) | |
3d6275e3 AS |
344 | |
345 | (define_code_attr revmnemonic | |
346 | [(minus "subrev%i") | |
347 | (ashift "lshlrev%b") | |
348 | (lshiftrt "lshrrev%b") | |
349 | (ashiftrt "ashrrev%i")]) | |
350 | ||
351 | ; Translate RTX code into corresponding expander name. | |
352 | ||
353 | (define_code_attr expander | |
354 | [(and "and") | |
355 | (ior "ior") | |
356 | (xor "xor") | |
357 | (plus "add") | |
358 | (minus "sub") | |
359 | (ashift "ashl") | |
360 | (lshiftrt "lshr") | |
361 | (ashiftrt "ashr") | |
362 | (mult "mul") | |
363 | (smin "smin") | |
364 | (smax "smax") | |
365 | (umin "umin") | |
366 | (umax "umax") | |
367 | (not "one_cmpl") | |
34bac264 AS |
368 | (popcount "popcount") |
369 | (clz "clz") | |
0e159efc AS |
370 | (ctz "ctz") |
371 | (sign_extend "extend") | |
372 | (zero_extend "zero_extend")]) | |
3d6275e3 AS |
373 | |
374 | ;; }}} | |
375 | ;; {{{ Miscellaneous instructions | |
376 | ||
377 | (define_insn "nop" | |
378 | [(const_int 0)] | |
379 | "" | |
380 | "s_nop\t0x0" | |
381 | [(set_attr "type" "sopp")]) | |
382 | ||
383 | ; FIXME: What should the value of the immediate be? Zero is disallowed, so | |
384 | ; pick 1 for now. | |
385 | (define_insn "trap" | |
386 | [(trap_if (const_int 1) (const_int 0))] | |
387 | "" | |
388 | "s_trap\t1" | |
389 | [(set_attr "type" "sopp")]) | |
390 | ||
391 | ;; }}} | |
392 | ;; {{{ Moves | |
393 | ||
394 | ;; All scalar modes we support moves in. | |
395 | (define_mode_iterator MOV_MODE [BI QI HI SI DI TI SF DF]) | |
396 | ||
397 | ; This is the entry point for creating all kinds of scalar moves, | |
398 | ; including reloads and symbols. | |
399 | ||
400 | (define_expand "mov<mode>" | |
401 | [(set (match_operand:MOV_MODE 0 "nonimmediate_operand") | |
402 | (match_operand:MOV_MODE 1 "general_operand"))] | |
403 | "" | |
404 | { | |
82863a5c AS |
405 | if (SUBREG_P (operands[1]) |
406 | && GET_MODE (operands[1]) == SImode | |
407 | && GET_MODE (SUBREG_REG (operands[1])) == BImode) | |
408 | { | |
409 | /* (reg:BI VCC) has nregs==2 to ensure it gets clobbered as a whole, | |
410 | but (subreg:SI (reg:BI VCC)) doesn't, which causes the LRA liveness | |
411 | checks to assert. Transform this: | |
412 | (set (reg:SI) (subreg:SI (reg:BI))) | |
413 | to this: | |
414 | (set (subreg:BI (reg:SI)) (reg:BI)) */ | |
415 | operands[0] = gen_rtx_SUBREG (BImode, operands[0], 0); | |
416 | operands[1] = SUBREG_REG (operands[1]); | |
417 | } | |
418 | if (SUBREG_P (operands[0]) | |
419 | && GET_MODE (operands[0]) == SImode | |
420 | && GET_MODE (SUBREG_REG (operands[0])) == BImode) | |
421 | { | |
422 | /* Likewise, transform this: | |
423 | (set (subreg:SI (reg:BI)) (reg:SI)) | |
424 | to this: | |
425 | (set (reg:BI) (subreg:BI (reg:SI))) */ | |
426 | operands[0] = SUBREG_REG (operands[0]); | |
427 | operands[1] = gen_rtx_SUBREG (BImode, operands[1], 0); | |
428 | } | |
429 | ||
3d6275e3 AS |
430 | if (MEM_P (operands[0])) |
431 | operands[1] = force_reg (<MODE>mode, operands[1]); | |
432 | ||
433 | if (!lra_in_progress && !reload_completed | |
434 | && !gcn_valid_move_p (<MODE>mode, operands[0], operands[1])) | |
435 | { | |
436 | /* Something is probably trying to generate a move | |
437 | which can only work indirectly. | |
438 | E.g. Move from LDS memory to SGPR hardreg | |
439 | or MEM:QI to SGPR. */ | |
440 | rtx tmpreg = gen_reg_rtx (<MODE>mode); | |
441 | emit_insn (gen_mov<mode> (tmpreg, operands[1])); | |
442 | emit_insn (gen_mov<mode> (operands[0], tmpreg)); | |
443 | DONE; | |
444 | } | |
445 | ||
446 | if (<MODE>mode == DImode | |
447 | && (GET_CODE (operands[1]) == SYMBOL_REF | |
448 | || GET_CODE (operands[1]) == LABEL_REF)) | |
449 | { | |
c2709ec4 AS |
450 | if (lra_in_progress) |
451 | emit_insn (gen_movdi_symbol_save_scc (operands[0], operands[1])); | |
452 | else | |
453 | emit_insn (gen_movdi_symbol (operands[0], operands[1])); | |
3d6275e3 AS |
454 | DONE; |
455 | } | |
456 | }) | |
457 | ||
458 | ; Split invalid moves into two valid moves | |
459 | ||
460 | (define_split | |
461 | [(set (match_operand:MOV_MODE 0 "nonimmediate_operand") | |
462 | (match_operand:MOV_MODE 1 "general_operand"))] | |
463 | "!reload_completed && !lra_in_progress | |
464 | && !gcn_valid_move_p (<MODE>mode, operands[0], operands[1])" | |
465 | [(set (match_dup 2) (match_dup 1)) | |
466 | (set (match_dup 0) (match_dup 2))] | |
467 | { | |
468 | operands[2] = gen_reg_rtx(<MODE>mode); | |
469 | }) | |
470 | ||
471 | ; We need BImode move so we can reload flags registers. | |
472 | ||
473 | (define_insn "*movbi" | |
474 | [(set (match_operand:BI 0 "nonimmediate_operand" | |
475 | "=Sg, v,Sg,cs,cV,cV,Sm,RS, v,RF, v,RM") | |
476 | (match_operand:BI 1 "gcn_load_operand" | |
477 | "SSA,vSvA, v,SS, v,SS,RS,Sm,RF, v,RM, v"))] | |
478 | "" | |
479 | { | |
480 | /* SCC as an operand is currently not accepted by the LLVM assembler, so | |
481 | we emit bytes directly as a workaround. */ | |
482 | switch (which_alternative) { | |
483 | case 0: | |
484 | if (REG_P (operands[1]) && REGNO (operands[1]) == SCC_REG) | |
485 | return "; s_mov_b32\t%0,%1 is not supported by the assembler.\;" | |
486 | ".byte\t0xfd\;" | |
487 | ".byte\t0x0\;" | |
488 | ".byte\t0x80|%R0\;" | |
489 | ".byte\t0xbe"; | |
490 | else | |
491 | return "s_mov_b32\t%0, %1"; | |
492 | case 1: | |
493 | if (REG_P (operands[1]) && REGNO (operands[1]) == SCC_REG) | |
494 | return "; v_mov_b32\t%0, %1\;" | |
495 | ".byte\t0xfd\;" | |
496 | ".byte\t0x2\;" | |
497 | ".byte\t((%V0<<1)&0xff)\;" | |
498 | ".byte\t0x7e|(%V0>>7)"; | |
499 | else | |
500 | return "v_mov_b32\t%0, %1"; | |
501 | case 2: | |
502 | return "v_readlane_b32\t%0, %1, 0"; | |
503 | case 3: | |
504 | return "s_cmpk_lg_u32\t%1, 0"; | |
505 | case 4: | |
506 | return "v_cmp_ne_u32\tvcc, 0, %1"; | |
507 | case 5: | |
508 | if (REGNO (operands[1]) == SCC_REG) | |
509 | return "; s_mov_b32\t%0, %1 is not supported by the assembler.\;" | |
510 | ".byte\t0xfd\;" | |
511 | ".byte\t0x0\;" | |
512 | ".byte\t0xea\;" | |
513 | ".byte\t0xbe\;" | |
514 | "s_mov_b32\tvcc_hi, 0"; | |
515 | else | |
516 | return "s_mov_b32\tvcc_lo, %1\;" | |
517 | "s_mov_b32\tvcc_hi, 0"; | |
518 | case 6: | |
519 | return "s_load_dword\t%0, %A1\;s_waitcnt\tlgkmcnt(0)"; | |
520 | case 7: | |
930c5599 | 521 | return "s_store_dword\t%1, %A0"; |
3d6275e3 AS |
522 | case 8: |
523 | return "flat_load_dword\t%0, %A1%O1%g1\;s_waitcnt\t0"; | |
524 | case 9: | |
930c5599 | 525 | return "flat_store_dword\t%A0, %1%O0%g0"; |
3d6275e3 AS |
526 | case 10: |
527 | return "global_load_dword\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)"; | |
528 | case 11: | |
930c5599 | 529 | return "global_store_dword\t%A0, %1%O0%g0"; |
3d6275e3 AS |
530 | default: |
531 | gcc_unreachable (); | |
532 | } | |
533 | } | |
534 | [(set_attr "type" "sop1,vop1,vop3a,sopk,vopc,mult,smem,smem,flat,flat, | |
535 | flat,flat") | |
536 | (set_attr "exec" "*,*,none,*,*,*,*,*,*,*,*,*") | |
537 | (set_attr "length" "4,4,4,4,4,8,12,12,12,12,12,12")]) | |
538 | ||
539 | ; 32bit move pattern | |
540 | ||
541 | (define_insn "*mov<mode>_insn" | |
542 | [(set (match_operand:SISF 0 "nonimmediate_operand" | |
543 | "=SD,SD,SD,SD,RB,Sm,RS,v,Sg, v, v,RF,v,RLRG, v,SD, v,RM") | |
544 | (match_operand:SISF 1 "gcn_load_operand" | |
545 | "SSA, J, B,RB,Sm,RS,Sm,v, v,Sv,RF, v,B, v,RLRG, Y,RM, v"))] | |
546 | "" | |
547 | "@ | |
548 | s_mov_b32\t%0, %1 | |
549 | s_movk_i32\t%0, %1 | |
550 | s_mov_b32\t%0, %1 | |
551 | s_buffer_load%s0\t%0, s[0:3], %1\;s_waitcnt\tlgkmcnt(0) | |
930c5599 | 552 | s_buffer_store%s1\t%1, s[0:3], %0 |
3d6275e3 | 553 | s_load_dword\t%0, %A1\;s_waitcnt\tlgkmcnt(0) |
930c5599 | 554 | s_store_dword\t%1, %A0 |
3d6275e3 AS |
555 | v_mov_b32\t%0, %1 |
556 | v_readlane_b32\t%0, %1, 0 | |
557 | v_writelane_b32\t%0, %1, 0 | |
558 | flat_load_dword\t%0, %A1%O1%g1\;s_waitcnt\t0 | |
930c5599 | 559 | flat_store_dword\t%A0, %1%O0%g0 |
3d6275e3 | 560 | v_mov_b32\t%0, %1 |
e929d65b | 561 | ds_write_b32\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0) |
3d6275e3 AS |
562 | ds_read_b32\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0) |
563 | s_mov_b32\t%0, %1 | |
564 | global_load_dword\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0) | |
930c5599 | 565 | global_store_dword\t%A0, %1%O0%g0" |
3d6275e3 AS |
566 | [(set_attr "type" "sop1,sopk,sop1,smem,smem,smem,smem,vop1,vop3a,vop3a,flat, |
567 | flat,vop1,ds,ds,sop1,flat,flat") | |
568 | (set_attr "exec" "*,*,*,*,*,*,*,*,none,none,*,*,*,*,*,*,*,*") | |
569 | (set_attr "length" "4,4,8,12,12,12,12,4,8,8,12,12,8,12,12,8,12,12")]) | |
570 | ||
571 | ; 8/16bit move pattern | |
aad32a00 | 572 | ; TODO: implement combined load and zero_extend, but *only* for -msram-ecc=on |
3d6275e3 AS |
573 | |
574 | (define_insn "*mov<mode>_insn" | |
575 | [(set (match_operand:QIHI 0 "nonimmediate_operand" | |
576 | "=SD,SD,SD,v,Sg, v, v,RF,v,RLRG, v, v,RM") | |
577 | (match_operand:QIHI 1 "gcn_load_operand" | |
578 | "SSA, J, B,v, v,Sv,RF, v,B, v,RLRG,RM, v"))] | |
579 | "gcn_valid_move_p (<MODE>mode, operands[0], operands[1])" | |
580 | "@ | |
581 | s_mov_b32\t%0, %1 | |
582 | s_movk_i32\t%0, %1 | |
583 | s_mov_b32\t%0, %1 | |
584 | v_mov_b32\t%0, %1 | |
585 | v_readlane_b32\t%0, %1, 0 | |
586 | v_writelane_b32\t%0, %1, 0 | |
587 | flat_load%o1\t%0, %A1%O1%g1\;s_waitcnt\t0 | |
930c5599 | 588 | flat_store%s0\t%A0, %1%O0%g0 |
3d6275e3 | 589 | v_mov_b32\t%0, %1 |
e929d65b | 590 | ds_write%b0\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0) |
3d6275e3 AS |
591 | ds_read%u1\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0) |
592 | global_load%o1\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0) | |
930c5599 | 593 | global_store%s0\t%A0, %1%O0%g0" |
3d6275e3 AS |
594 | [(set_attr "type" |
595 | "sop1,sopk,sop1,vop1,vop3a,vop3a,flat,flat,vop1,ds,ds,flat,flat") | |
596 | (set_attr "exec" "*,*,*,*,none,none,*,*,*,*,*,*,*") | |
597 | (set_attr "length" "4,4,8,4,4,4,12,12,8,12,12,12,12")]) | |
598 | ||
599 | ; 64bit move pattern | |
600 | ||
601 | (define_insn_and_split "*mov<mode>_insn" | |
602 | [(set (match_operand:DIDF 0 "nonimmediate_operand" | |
603 | "=SD,SD,SD,RS,Sm,v, v,Sg, v, v,RF,RLRG, v, v,RM") | |
604 | (match_operand:DIDF 1 "general_operand" | |
605 | "SSA, C,DB,Sm,RS,v,DB, v,Sv,RF, v, v,RLRG,RM, v"))] | |
606 | "GET_CODE(operands[1]) != SYMBOL_REF" | |
607 | "@ | |
608 | s_mov_b64\t%0, %1 | |
609 | s_mov_b64\t%0, %1 | |
610 | # | |
930c5599 | 611 | s_store_dwordx2\t%1, %A0 |
3d6275e3 AS |
612 | s_load_dwordx2\t%0, %A1\;s_waitcnt\tlgkmcnt(0) |
613 | # | |
614 | # | |
615 | # | |
616 | # | |
617 | flat_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\t0 | |
930c5599 | 618 | flat_store_dwordx2\t%A0, %1%O0%g0 |
e929d65b | 619 | ds_write_b64\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0) |
3d6275e3 AS |
620 | ds_read_b64\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0) |
621 | global_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0) | |
930c5599 | 622 | global_store_dwordx2\t%A0, %1%O0%g0" |
01f5d5e8 AS |
623 | "reload_completed |
624 | && ((!MEM_P (operands[0]) && !MEM_P (operands[1]) | |
625 | && !gcn_sgpr_move_p (operands[0], operands[1])) | |
626 | || (GET_CODE (operands[1]) == CONST_INT | |
627 | && !gcn_constant64_p (operands[1])))" | |
3d6275e3 AS |
628 | [(set (match_dup 0) (match_dup 1)) |
629 | (set (match_dup 2) (match_dup 3))] | |
630 | { | |
631 | rtx inlo = gen_lowpart (SImode, operands[1]); | |
632 | rtx inhi = gen_highpart_mode (SImode, <MODE>mode, operands[1]); | |
633 | rtx outlo = gen_lowpart (SImode, operands[0]); | |
634 | rtx outhi = gen_highpart_mode (SImode, <MODE>mode, operands[0]); | |
635 | ||
636 | /* Ensure that overlapping registers aren't corrupted. */ | |
ccf93cd0 | 637 | if (reg_overlap_mentioned_p (outlo, inhi)) |
3d6275e3 AS |
638 | { |
639 | operands[0] = outhi; | |
640 | operands[1] = inhi; | |
641 | operands[2] = outlo; | |
642 | operands[3] = inlo; | |
643 | } | |
644 | else | |
645 | { | |
646 | operands[0] = outlo; | |
647 | operands[1] = inlo; | |
648 | operands[2] = outhi; | |
649 | operands[3] = inhi; | |
650 | } | |
651 | } | |
652 | [(set_attr "type" "sop1,sop1,mult,smem,smem,vmult,vmult,vmult,vmult,flat, | |
653 | flat,ds,ds,flat,flat") | |
654 | (set_attr "length" "4,8,*,12,12,*,*,*,*,12,12,12,12,12,12")]) | |
655 | ||
656 | ; 128-bit move. | |
657 | ||
658 | (define_insn_and_split "*movti_insn" | |
659 | [(set (match_operand:TI 0 "nonimmediate_operand" | |
660 | "=SD,RS,Sm,RF, v,v, v,SD,RM, v,RL, v") | |
661 | (match_operand:TI 1 "general_operand" | |
662 | "SSB,Sm,RS, v,RF,v,Sv, v, v,RM, v,RL"))] | |
663 | "" | |
664 | "@ | |
665 | # | |
930c5599 | 666 | s_store_dwordx4\t%1, %A0 |
3d6275e3 | 667 | s_load_dwordx4\t%0, %A1\;s_waitcnt\tlgkmcnt(0) |
930c5599 | 668 | flat_store_dwordx4\t%A0, %1%O0%g0 |
3d6275e3 AS |
669 | flat_load_dwordx4\t%0, %A1%O1%g1\;s_waitcnt\t0 |
670 | # | |
671 | # | |
672 | # | |
930c5599 | 673 | global_store_dwordx4\t%A0, %1%O0%g0 |
3d6275e3 | 674 | global_load_dwordx4\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0) |
e929d65b | 675 | ds_write_b128\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0) |
3d6275e3 AS |
676 | ds_read_b128\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)" |
677 | "reload_completed | |
678 | && REG_P (operands[0]) | |
679 | && (REG_P (operands[1]) || GET_CODE (operands[1]) == CONST_INT)" | |
680 | [(set (match_dup 0) (match_dup 1)) | |
681 | (set (match_dup 2) (match_dup 3)) | |
682 | (set (match_dup 4) (match_dup 5)) | |
683 | (set (match_dup 6) (match_dup 7))] | |
684 | { | |
8ae0de56 AS |
685 | gcc_assert (rtx_equal_p (operands[0], operands[1]) |
686 | || !reg_overlap_mentioned_p (operands[0], operands[1])); | |
3d6275e3 AS |
687 | operands[6] = gcn_operand_part (TImode, operands[0], 3); |
688 | operands[7] = gcn_operand_part (TImode, operands[1], 3); | |
689 | operands[4] = gcn_operand_part (TImode, operands[0], 2); | |
690 | operands[5] = gcn_operand_part (TImode, operands[1], 2); | |
691 | operands[2] = gcn_operand_part (TImode, operands[0], 1); | |
692 | operands[3] = gcn_operand_part (TImode, operands[1], 1); | |
693 | operands[0] = gcn_operand_part (TImode, operands[0], 0); | |
694 | operands[1] = gcn_operand_part (TImode, operands[1], 0); | |
695 | } | |
696 | [(set_attr "type" "mult,smem,smem,flat,flat,vmult,vmult,vmult,flat,flat,\ | |
697 | ds,ds") | |
b2c113ae | 698 | (set_attr "delayeduse" "*,*,yes,*,*,*,*,*,yes,*,*,*") |
3d6275e3 AS |
699 | (set_attr "length" "*,12,12,12,12,*,*,*,12,12,12,12")]) |
700 | ||
701 | ;; }}} | |
702 | ;; {{{ Prologue/Epilogue | |
703 | ||
704 | (define_insn "prologue_use" | |
705 | [(unspec_volatile [(match_operand 0)] UNSPECV_PROLOGUE_USE)] | |
706 | "" | |
707 | "" | |
708 | [(set_attr "length" "0")]) | |
709 | ||
710 | (define_expand "prologue" | |
711 | [(const_int 0)] | |
712 | "" | |
713 | { | |
714 | gcn_expand_prologue (); | |
715 | DONE; | |
716 | }) | |
717 | ||
718 | (define_expand "epilogue" | |
719 | [(const_int 0)] | |
720 | "" | |
721 | { | |
722 | gcn_expand_epilogue (); | |
723 | DONE; | |
724 | }) | |
725 | ||
726 | ;; }}} | |
727 | ;; {{{ Control flow | |
728 | ||
729 | ; This pattern must satisfy simplejump_p, which means it cannot be a parallel | |
730 | ; that clobbers SCC. Thus, we must preserve SCC if we're generating a long | |
731 | ; branch sequence. | |
732 | ||
733 | (define_insn "jump" | |
734 | [(set (pc) | |
735 | (label_ref (match_operand 0)))] | |
736 | "" | |
737 | { | |
738 | if (get_attr_length (insn) == 4) | |
739 | return "s_branch\t%0"; | |
740 | else | |
741 | /* !!! This sequence clobbers EXEC_SAVE_REG and CC_SAVE_REG. */ | |
742 | return "; s_mov_b32\ts22, scc is not supported by the assembler.\;" | |
743 | ".long\t0xbe9600fd\;" | |
744 | "s_getpc_b64\ts[20:21]\;" | |
745 | "s_add_u32\ts20, s20, %0@rel32@lo+4\;" | |
746 | "s_addc_u32\ts21, s21, %0@rel32@hi+4\;" | |
747 | "s_cmpk_lg_u32\ts22, 0\;" | |
748 | "s_setpc_b64\ts[20:21]"; | |
749 | } | |
750 | [(set_attr "type" "sopp") | |
751 | (set (attr "length") | |
752 | (if_then_else (and (ge (minus (match_dup 0) (pc)) | |
753 | (const_int -131072)) | |
754 | (lt (minus (match_dup 0) (pc)) | |
755 | (const_int 131072))) | |
756 | (const_int 4) | |
757 | (const_int 32)))]) | |
758 | ||
759 | (define_insn "indirect_jump" | |
760 | [(set (pc) | |
761 | (match_operand:DI 0 "register_operand" "Sg"))] | |
762 | "" | |
763 | "s_setpc_b64\t%0" | |
764 | [(set_attr "type" "sop1") | |
765 | (set_attr "length" "4")]) | |
766 | ||
767 | (define_insn "cjump" | |
768 | [(set (pc) | |
769 | (if_then_else | |
770 | (match_operator:BI 1 "gcn_conditional_operator" | |
771 | [(match_operand:BI 2 "gcn_conditional_register_operand" "ca,cV") | |
772 | (const_int 0)]) | |
773 | (label_ref (match_operand 0)) | |
774 | (pc)))] | |
775 | "" | |
776 | { | |
777 | if (get_attr_length (insn) == 4) | |
778 | return "s_cbranch%C1\t%0"; | |
779 | else | |
780 | { | |
781 | /* !!! This sequence clobbers EXEC_SAVE_REG and CC_SAVE_REG but | |
782 | restores SCC. */ | |
783 | if (REGNO (operands[2]) == SCC_REG) | |
784 | { | |
785 | if (GET_CODE (operands[1]) == EQ) | |
786 | return "s_cbranch%c1\t.Lskip%=\;" | |
787 | "s_getpc_b64\ts[20:21]\;" | |
788 | "s_add_u32\ts20, s20, %0@rel32@lo+4\;" | |
789 | "s_addc_u32\ts21, s21, %0@rel32@hi+4\;" | |
790 | "s_cmp_lg_u32\t0, 0\;" | |
791 | "s_setpc_b64\ts[20:21]\n" | |
792 | ".Lskip%=:"; | |
793 | else | |
794 | return "s_cbranch%c1\t.Lskip%=\;" | |
795 | "s_getpc_b64\ts[20:21]\;" | |
796 | "s_add_u32\ts20, s20, %0@rel32@lo+4\;" | |
797 | "s_addc_u32\ts21, s21, %0@rel32@hi+4\;" | |
798 | "s_cmp_eq_u32\t0, 0\;" | |
799 | "s_setpc_b64\ts[20:21]\n" | |
800 | ".Lskip%=:"; | |
801 | } | |
802 | else | |
803 | return "s_cbranch%c1\t.Lskip%=\;" | |
804 | "; s_mov_b32\ts22, scc is not supported by the assembler.\;" | |
805 | ".byte\t0xfd\;" | |
806 | ".byte\t0x0\;" | |
807 | ".byte\t0x80|22\;" | |
808 | ".byte\t0xbe\;" | |
809 | "s_getpc_b64\ts[20:21]\;" | |
810 | "s_add_u32\ts20, s20, %0@rel32@lo+4\;" | |
811 | "s_addc_u32\ts21, s21, %0@rel32@hi+4\;" | |
812 | "s_cmpk_lg_u32\ts22, 0\;" | |
813 | "s_setpc_b64\ts[20:21]\n" | |
814 | ".Lskip%=:"; | |
815 | } | |
816 | } | |
817 | [(set_attr "type" "sopp") | |
818 | (set (attr "length") | |
819 | (if_then_else (and (ge (minus (match_dup 0) (pc)) | |
820 | (const_int -131072)) | |
821 | (lt (minus (match_dup 0) (pc)) | |
822 | (const_int 131072))) | |
823 | (const_int 4) | |
824 | (const_int 36)))]) | |
825 | ||
826 | ; Returning from a normal function is different to returning from a | |
827 | ; kernel function. | |
828 | ||
829 | (define_insn "gcn_return" | |
830 | [(return)] | |
831 | "" | |
832 | { | |
833 | if (cfun && cfun->machine && cfun->machine->normal_function) | |
834 | return "s_setpc_b64\ts[18:19]"; | |
835 | else | |
e8daba7e | 836 | return "s_waitcnt\tlgkmcnt(0)\;s_dcache_wb\;s_endpgm"; |
3d6275e3 AS |
837 | } |
838 | [(set_attr "type" "sop1") | |
e8daba7e | 839 | (set_attr "length" "12")]) |
3d6275e3 AS |
840 | |
841 | (define_expand "call" | |
842 | [(parallel [(call (match_operand 0 "") | |
843 | (match_operand 1 "")) | |
844 | (clobber (reg:DI LR_REGNUM)) | |
845 | (clobber (match_scratch:DI 2))])] | |
846 | "" | |
847 | {}) | |
848 | ||
849 | (define_insn "gcn_simple_call" | |
850 | [(call (mem (match_operand 0 "immediate_operand" "Y,B")) | |
851 | (match_operand 1 "const_int_operand")) | |
852 | (clobber (reg:DI LR_REGNUM)) | |
853 | (clobber (match_scratch:DI 2 "=&Sg,X"))] | |
854 | "" | |
855 | "@ | |
856 | s_getpc_b64\t%2\;s_add_u32\t%L2, %L2, %0@rel32@lo+4\;s_addc_u32\t%H2, %H2, %0@rel32@hi+4\;s_swappc_b64\ts[18:19], %2 | |
857 | s_swappc_b64\ts[18:19], %0" | |
858 | [(set_attr "type" "mult,sop1") | |
859 | (set_attr "length" "24,4")]) | |
860 | ||
861 | (define_insn "movdi_symbol" | |
862 | [(set (match_operand:DI 0 "nonimmediate_operand" "=Sg") | |
863 | (match_operand:DI 1 "general_operand" "Y")) | |
864 | (clobber (reg:BI SCC_REG))] | |
865 | "GET_CODE (operands[1]) == SYMBOL_REF || GET_CODE (operands[1]) == LABEL_REF" | |
866 | { | |
867 | if (SYMBOL_REF_P (operands[1]) | |
868 | && SYMBOL_REF_WEAK (operands[1])) | |
869 | return "s_getpc_b64\t%0\;" | |
870 | "s_add_u32\t%L0, %L0, %1@gotpcrel32@lo+4\;" | |
871 | "s_addc_u32\t%H0, %H0, %1@gotpcrel32@hi+4\;" | |
872 | "s_load_dwordx2\t%0, %0\;" | |
873 | "s_waitcnt\tlgkmcnt(0)"; | |
874 | ||
875 | return "s_getpc_b64\t%0\;" | |
876 | "s_add_u32\t%L0, %L0, %1@rel32@lo+4\;" | |
877 | "s_addc_u32\t%H0, %H0, %1@rel32@hi+4"; | |
878 | } | |
879 | [(set_attr "type" "mult") | |
880 | (set_attr "length" "32")]) | |
881 | ||
8108da8a | 882 | (define_insn "movdi_symbol_save_scc" |
c2709ec4 AS |
883 | [(set (match_operand:DI 0 "nonimmediate_operand" "=Sg") |
884 | (match_operand:DI 1 "general_operand" "Y")) | |
885 | (clobber (reg:BI CC_SAVE_REG))] | |
8108da8a | 886 | "(GET_CODE (operands[1]) == SYMBOL_REF || GET_CODE (operands[1]) == LABEL_REF) |
c2709ec4 | 887 | && (lra_in_progress || reload_completed)" |
8108da8a AS |
888 | { |
889 | /* !!! These sequences clobber CC_SAVE_REG. */ | |
890 | ||
891 | if (SYMBOL_REF_P (operands[1]) | |
892 | && SYMBOL_REF_WEAK (operands[1])) | |
893 | return "; s_mov_b32\ts22, scc is not supported by the assembler.\;" | |
894 | ".long\t0xbe9600fd\;" | |
895 | "s_getpc_b64\t%0\;" | |
896 | "s_add_u32\t%L0, %L0, %1@gotpcrel32@lo+4\;" | |
897 | "s_addc_u32\t%H0, %H0, %1@gotpcrel32@hi+4\;" | |
898 | "s_load_dwordx2\t%0, %0\;" | |
899 | "s_cmpk_lg_u32\ts22, 0\;" | |
900 | "s_waitcnt\tlgkmcnt(0)"; | |
901 | ||
902 | return "; s_mov_b32\ts22, scc is not supported by the assembler.\;" | |
903 | ".long\t0xbe9600fd\;" | |
904 | "s_getpc_b64\t%0\;" | |
905 | "s_add_u32\t%L0, %L0, %1@rel32@lo+4\;" | |
906 | "s_addc_u32\t%H0, %H0, %1@rel32@hi+4\;" | |
907 | "s_cmpk_lg_u32\ts22, 0"; | |
908 | } | |
909 | [(set_attr "type" "mult") | |
910 | (set_attr "length" "40")]) | |
911 | ||
c2709ec4 | 912 | |
3d6275e3 AS |
913 | (define_insn "gcn_indirect_call" |
914 | [(call (mem (match_operand:DI 0 "register_operand" "Sg")) | |
915 | (match_operand 1 "" "")) | |
916 | (clobber (reg:DI LR_REGNUM)) | |
917 | (clobber (match_scratch:DI 2 "=X"))] | |
918 | "" | |
919 | "s_swappc_b64\ts[18:19], %0" | |
920 | [(set_attr "type" "sop1") | |
921 | (set_attr "length" "4")]) | |
922 | ||
923 | (define_expand "call_value" | |
924 | [(parallel [(set (match_operand 0 "") | |
925 | (call (match_operand 1 "") | |
926 | (match_operand 2 ""))) | |
927 | (clobber (reg:DI LR_REGNUM)) | |
928 | (clobber (match_scratch:DI 3))])] | |
929 | "" | |
930 | {}) | |
931 | ||
932 | (define_insn "gcn_call_value" | |
933 | [(set (match_operand 0 "register_operand" "=Sg,Sg") | |
934 | (call (mem (match_operand 1 "immediate_operand" "Y,B")) | |
935 | (match_operand 2 "const_int_operand"))) | |
936 | (clobber (reg:DI LR_REGNUM)) | |
937 | (clobber (match_scratch:DI 3 "=&Sg,X"))] | |
938 | "" | |
939 | "@ | |
940 | s_getpc_b64\t%3\;s_add_u32\t%L3, %L3, %1@rel32@lo+4\;s_addc_u32\t%H3, %H3, %1@rel32@hi+4\;s_swappc_b64\ts[18:19], %3 | |
941 | s_swappc_b64\ts[18:19], %1" | |
942 | [(set_attr "type" "sop1") | |
943 | (set_attr "length" "24")]) | |
944 | ||
945 | (define_insn "gcn_call_value_indirect" | |
946 | [(set (match_operand 0 "register_operand" "=Sg") | |
947 | (call (mem (match_operand:DI 1 "register_operand" "Sg")) | |
948 | (match_operand 2 "" ""))) | |
949 | (clobber (reg:DI LR_REGNUM)) | |
950 | (clobber (match_scratch:DI 3 "=X"))] | |
951 | "" | |
952 | "s_swappc_b64\ts[18:19], %1" | |
953 | [(set_attr "type" "sop1") | |
954 | (set_attr "length" "4")]) | |
955 | ||
956 | ; GCN does not have an instruction to clear only part of the instruction | |
957 | ; cache, so the operands are ignored. | |
958 | ||
959 | (define_insn "clear_icache" | |
960 | [(unspec_volatile | |
961 | [(match_operand 0 "") (match_operand 1 "")] | |
962 | UNSPECV_ICACHE_INV)] | |
963 | "" | |
964 | "s_icache_inv" | |
965 | [(set_attr "type" "sopp") | |
966 | (set_attr "length" "4")]) | |
967 | ||
968 | ;; }}} | |
969 | ;; {{{ Conditionals | |
970 | ||
971 | ; 32-bit compare, scalar unit only | |
972 | ||
973 | (define_insn "cstoresi4" | |
974 | [(set (match_operand:BI 0 "gcn_conditional_register_operand" | |
975 | "=cs, cs, cs, cs") | |
976 | (match_operator:BI 1 "gcn_compare_operator" | |
977 | [(match_operand:SI 2 "gcn_alu_operand" "SSA,SSA,SSB, SS") | |
978 | (match_operand:SI 3 "gcn_alu_operand" "SSA,SSL, SS,SSB")]))] | |
979 | "" | |
980 | "@ | |
981 | s_cmp%D1\t%2, %3 | |
982 | s_cmpk%D1\t%2, %3 | |
983 | s_cmp%D1\t%2, %3 | |
984 | s_cmp%D1\t%2, %3" | |
985 | [(set_attr "type" "sopc,sopk,sopk,sopk") | |
986 | (set_attr "length" "4,4,8,8")]) | |
987 | ||
988 | (define_expand "cbranchsi4" | |
989 | [(match_operator 0 "gcn_compare_operator" | |
990 | [(match_operand:SI 1 "gcn_alu_operand") | |
991 | (match_operand:SI 2 "gcn_alu_operand")]) | |
992 | (match_operand 3)] | |
993 | "" | |
994 | { | |
995 | rtx cc = gen_reg_rtx (BImode); | |
996 | emit_insn (gen_cstoresi4 (cc, operands[0], operands[1], operands[2])); | |
997 | emit_jump_insn (gen_cjump (operands[3], | |
998 | gen_rtx_NE (BImode, cc, const0_rtx), cc)); | |
999 | DONE; | |
1000 | }) | |
1001 | ||
1002 | ; 64-bit compare; either unit, but scalar allows limited operators | |
1003 | ||
1004 | (define_expand "cstoredi4" | |
1005 | [(set (match_operand:BI 0 "gcn_conditional_register_operand") | |
1006 | (match_operator:BI 1 "gcn_compare_operator" | |
1007 | [(match_operand:DI 2 "gcn_alu_operand") | |
1008 | (match_operand:DI 3 "gcn_alu_operand")]))] | |
1009 | "" | |
1010 | {}) | |
1011 | ||
1012 | (define_insn "cstoredi4_vec_and_scalar" | |
1013 | [(set (match_operand:BI 0 "gcn_conditional_register_operand" "= cs, cV") | |
1014 | (match_operator:BI 1 "gcn_compare_64bit_operator" | |
1015 | [(match_operand:DI 2 "gcn_alu_operand" "%SSA,vSvC") | |
1016 | (match_operand:DI 3 "gcn_alu_operand" " SSC, v")]))] | |
1017 | "" | |
1018 | "@ | |
1019 | s_cmp%D1\t%2, %3 | |
1020 | v_cmp%E1\tvcc, %2, %3" | |
1021 | [(set_attr "type" "sopc,vopc") | |
1022 | (set_attr "length" "8")]) | |
1023 | ||
1024 | (define_insn "cstoredi4_vector" | |
1025 | [(set (match_operand:BI 0 "gcn_conditional_register_operand" "= cV") | |
1026 | (match_operator:BI 1 "gcn_compare_operator" | |
1027 | [(match_operand:DI 2 "gcn_alu_operand" "vSvB") | |
1028 | (match_operand:DI 3 "gcn_alu_operand" " v")]))] | |
1029 | "" | |
1030 | "v_cmp%E1\tvcc, %2, %3" | |
1031 | [(set_attr "type" "vopc") | |
1032 | (set_attr "length" "8")]) | |
1033 | ||
1034 | (define_expand "cbranchdi4" | |
1035 | [(match_operator 0 "gcn_compare_operator" | |
1036 | [(match_operand:DI 1 "gcn_alu_operand") | |
1037 | (match_operand:DI 2 "gcn_alu_operand")]) | |
1038 | (match_operand 3)] | |
1039 | "" | |
1040 | { | |
1041 | rtx cc = gen_reg_rtx (BImode); | |
1042 | emit_insn (gen_cstoredi4 (cc, operands[0], operands[1], operands[2])); | |
1043 | emit_jump_insn (gen_cjump (operands[3], | |
1044 | gen_rtx_NE (BImode, cc, const0_rtx), cc)); | |
1045 | DONE; | |
1046 | }) | |
1047 | ||
1048 | ; FP compare; vector unit only | |
1049 | ||
1050 | (define_insn "cstore<mode>4" | |
1051 | [(set (match_operand:BI 0 "gcn_conditional_register_operand" "=cV") | |
1052 | (match_operator:BI 1 "gcn_fp_compare_operator" | |
1053 | [(match_operand:SFDF 2 "gcn_alu_operand" "vB") | |
1054 | (match_operand:SFDF 3 "gcn_alu_operand" "v")]))] | |
1055 | "" | |
1056 | "v_cmp%E1\tvcc, %2, %3" | |
1057 | [(set_attr "type" "vopc") | |
1058 | (set_attr "length" "8")]) | |
1059 | ||
1060 | (define_expand "cbranch<mode>4" | |
1061 | [(match_operator 0 "gcn_fp_compare_operator" | |
1062 | [(match_operand:SFDF 1 "gcn_alu_operand") | |
1063 | (match_operand:SFDF 2 "gcn_alu_operand")]) | |
1064 | (match_operand 3)] | |
1065 | "" | |
1066 | { | |
1067 | rtx cc = gen_reg_rtx (BImode); | |
1068 | emit_insn (gen_cstore<mode>4 (cc, operands[0], operands[1], operands[2])); | |
1069 | emit_jump_insn (gen_cjump (operands[3], | |
1070 | gen_rtx_NE (BImode, cc, const0_rtx), cc)); | |
1071 | DONE; | |
1072 | }) | |
1073 | ||
1074 | ;; }}} | |
1075 | ;; {{{ ALU special cases: Plus | |
1076 | ||
1077 | (define_insn "addsi3" | |
1078 | [(set (match_operand:SI 0 "register_operand" "= Sg, Sg, Sg, v") | |
1079 | (plus:SI (match_operand:SI 1 "gcn_alu_operand" "%SgA, 0,SgA, v") | |
1080 | (match_operand:SI 2 "gcn_alu_operand" " SgA,SgJ, B,vBSv"))) | |
1081 | (clobber (match_scratch:BI 3 "= cs, cs, cs, X")) | |
1082 | (clobber (match_scratch:DI 4 "= X, X, X, cV"))] | |
1083 | "" | |
1084 | "@ | |
1085 | s_add_i32\t%0, %1, %2 | |
1086 | s_addk_i32\t%0, %2 | |
1087 | s_add_i32\t%0, %1, %2 | |
1088 | v_add%^_u32\t%0, vcc, %2, %1" | |
1089 | [(set_attr "type" "sop2,sopk,sop2,vop2") | |
1090 | (set_attr "length" "4,4,8,8")]) | |
1091 | ||
1092 | (define_expand "addsi3_scc" | |
1093 | [(parallel [(set (match_operand:SI 0 "register_operand") | |
1094 | (plus:SI (match_operand:SI 1 "gcn_alu_operand") | |
1095 | (match_operand:SI 2 "gcn_alu_operand"))) | |
1096 | (clobber (reg:BI SCC_REG)) | |
1097 | (clobber (scratch:DI))])] | |
1098 | "" | |
1099 | {}) | |
1100 | ||
1101 | ; Having this as an insn_and_split allows us to keep together DImode adds | |
1102 | ; through some RTL optimisation passes, and means the CC reg we set isn't | |
1103 | ; dependent on the constraint alternative (which doesn't seem to work well). | |
1104 | ||
3d6275e3 AS |
1105 | ; If v_addc_u32 is used to add with carry, a 32-bit literal constant cannot be |
1106 | ; used as an operand due to the read of VCC, so we restrict constants to the | |
1107 | ; inlinable range for that alternative. | |
1108 | ||
1109 | (define_insn_and_split "adddi3" | |
3abfd4f3 AS |
1110 | [(set (match_operand:DI 0 "register_operand" "=Sg, v") |
1111 | (plus:DI (match_operand:DI 1 "register_operand" " Sg, v") | |
1112 | (match_operand:DI 2 "nonmemory_operand" "SgB,vA"))) | |
1113 | (clobber (match_scratch:BI 3 "=cs, X")) | |
1114 | (clobber (match_scratch:DI 4 "= X,cV"))] | |
3d6275e3 AS |
1115 | "" |
1116 | "#" | |
1117 | "&& reload_completed" | |
1118 | [(const_int 0)] | |
1119 | { | |
1120 | rtx cc = gen_rtx_REG (BImode, gcn_vgpr_register_operand (operands[1], | |
1121 | DImode) | |
1122 | ? VCC_REG : SCC_REG); | |
1123 | ||
1124 | emit_insn (gen_addsi3_scalar_carry | |
1125 | (gcn_operand_part (DImode, operands[0], 0), | |
1126 | gcn_operand_part (DImode, operands[1], 0), | |
1127 | gcn_operand_part (DImode, operands[2], 0), | |
1128 | cc)); | |
1129 | rtx val = gcn_operand_part (DImode, operands[2], 1); | |
1130 | if (val != const0_rtx) | |
1131 | emit_insn (gen_addcsi3_scalar | |
1132 | (gcn_operand_part (DImode, operands[0], 1), | |
1133 | gcn_operand_part (DImode, operands[1], 1), | |
1134 | gcn_operand_part (DImode, operands[2], 1), | |
1135 | cc, cc)); | |
1136 | else | |
1137 | emit_insn (gen_addcsi3_scalar_zero | |
1138 | (gcn_operand_part (DImode, operands[0], 1), | |
1139 | gcn_operand_part (DImode, operands[1], 1), | |
1140 | cc)); | |
1141 | DONE; | |
1142 | } | |
3abfd4f3 | 1143 | [(set_attr "type" "mult,vmult") |
3d6275e3 AS |
1144 | (set_attr "length" "8")]) |
1145 | ||
1146 | (define_expand "adddi3_scc" | |
1147 | [(parallel [(set (match_operand:DI 0 "register_operand") | |
1148 | (plus:DI (match_operand:DI 1 "register_operand") | |
1149 | (match_operand:DI 2 "nonmemory_operand"))) | |
1150 | (clobber (reg:BI SCC_REG)) | |
1151 | (clobber (scratch:DI))])] | |
1152 | "" | |
1153 | {}) | |
1154 | ||
1155 | ;; Add with carry. | |
1156 | ||
1157 | (define_insn "addsi3_scalar_carry" | |
1158 | [(set (match_operand:SI 0 "register_operand" "= Sg, v") | |
1159 | (plus:SI (match_operand:SI 1 "gcn_alu_operand" "%SgA, v") | |
1160 | (match_operand:SI 2 "gcn_alu_operand" " SgB,vB"))) | |
1161 | (set (match_operand:BI 3 "register_operand" "= cs,cV") | |
1162 | (ltu:BI (plus:SI (match_dup 1) | |
1163 | (match_dup 2)) | |
1164 | (match_dup 1)))] | |
1165 | "" | |
1166 | "@ | |
1167 | s_add_u32\t%0, %1, %2 | |
1168 | v_add%^_u32\t%0, vcc, %2, %1" | |
1169 | [(set_attr "type" "sop2,vop2") | |
1170 | (set_attr "length" "8,8")]) | |
1171 | ||
1172 | (define_insn "addsi3_scalar_carry_cst" | |
1173 | [(set (match_operand:SI 0 "register_operand" "=Sg, v") | |
1174 | (plus:SI (match_operand:SI 1 "gcn_alu_operand" "SgA, v") | |
1175 | (match_operand:SI 2 "const_int_operand" " n, n"))) | |
1176 | (set (match_operand:BI 4 "register_operand" "=cs,cV") | |
1177 | (geu:BI (plus:SI (match_dup 1) | |
1178 | (match_dup 2)) | |
1179 | (match_operand:SI 3 "const_int_operand" " n, n")))] | |
1180 | "INTVAL (operands[2]) == -INTVAL (operands[3])" | |
1181 | "@ | |
1182 | s_add_u32\t%0, %1, %2 | |
1183 | v_add%^_u32\t%0, vcc, %2, %1" | |
1184 | [(set_attr "type" "sop2,vop2") | |
1185 | (set_attr "length" "4")]) | |
1186 | ||
1187 | (define_insn "addcsi3_scalar" | |
1188 | [(set (match_operand:SI 0 "register_operand" "= Sg, v") | |
1189 | (plus:SI (plus:SI (zero_extend:SI | |
1190 | (match_operand:BI 3 "register_operand" "= cs,cV")) | |
1191 | (match_operand:SI 1 "gcn_alu_operand" "%SgA, v")) | |
1192 | (match_operand:SI 2 "gcn_alu_operand" " SgB,vA"))) | |
1193 | (set (match_operand:BI 4 "register_operand" "= 3, 3") | |
1194 | (ior:BI (ltu:BI (plus:SI | |
1195 | (plus:SI | |
1196 | (zero_extend:SI (match_dup 3)) | |
1197 | (match_dup 1)) | |
1198 | (match_dup 2)) | |
1199 | (match_dup 2)) | |
1200 | (ltu:BI (plus:SI (zero_extend:SI (match_dup 3)) (match_dup 1)) | |
1201 | (match_dup 1))))] | |
1202 | "" | |
1203 | "@ | |
1204 | s_addc_u32\t%0, %1, %2 | |
1205 | v_addc%^_u32\t%0, vcc, %2, %1, vcc" | |
1206 | [(set_attr "type" "sop2,vop2") | |
1207 | (set_attr "length" "8,4")]) | |
1208 | ||
1209 | (define_insn "addcsi3_scalar_zero" | |
1210 | [(set (match_operand:SI 0 "register_operand" "=Sg, v") | |
1211 | (plus:SI (zero_extend:SI | |
1212 | (match_operand:BI 2 "register_operand" "=cs,cV")) | |
1213 | (match_operand:SI 1 "gcn_alu_operand" "SgA, v"))) | |
1214 | (set (match_dup 2) | |
1215 | (ltu:BI (plus:SI (zero_extend:SI (match_dup 2)) | |
1216 | (match_dup 1)) | |
1217 | (match_dup 1)))] | |
1218 | "" | |
1219 | "@ | |
1220 | s_addc_u32\t%0, %1, 0 | |
1221 | v_addc%^_u32\t%0, vcc, 0, %1, vcc" | |
1222 | [(set_attr "type" "sop2,vop2") | |
1223 | (set_attr "length" "4")]) | |
1224 | ||
1225 | ; "addptr" is the same as "add" except that it must not write to VCC or SCC | |
1226 | ; as a side-effect. Unfortunately GCN does not have a suitable instruction | |
76136f7f AS |
1227 | ; for this, so we use CC_SAVE_REG as a temp. |
1228 | ; Note that it is not safe to save/clobber/restore as separate insns because | |
1229 | ; doing so will break data-flow analysis, so this must use multiple | |
1230 | ; instructions in one insn. | |
3abfd4f3 AS |
1231 | ; |
1232 | ; The "v0" should be just "v", but somehow the "0" helps LRA not loop forever | |
1233 | ; on testcase pr54713-2.c with -O0. It's only an optimization hint anyway. | |
76136f7f AS |
1234 | ; |
1235 | ; The SGPR alternative is preferred as it is typically used with mov_sgprbase. | |
3d6275e3 AS |
1236 | |
1237 | (define_insn "addptrdi3" | |
76136f7f AS |
1238 | [(set (match_operand:DI 0 "register_operand" "= v, Sg") |
1239 | (unspec:DI [ | |
1240 | (plus:DI (match_operand:DI 1 "register_operand" "^v0,Sg0") | |
1241 | (match_operand:DI 2 "nonmemory_operand" "vDA,SgDB"))] | |
1242 | UNSPEC_ADDPTR))] | |
3d6275e3 AS |
1243 | "" |
1244 | { | |
76136f7f AS |
1245 | if (which_alternative == 0) |
1246 | { | |
1247 | rtx new_operands[4] = { operands[0], operands[1], operands[2], | |
1248 | gen_rtx_REG (DImode, CC_SAVE_REG) }; | |
3d6275e3 | 1249 | |
76136f7f AS |
1250 | output_asm_insn ("v_add%^_u32\t%L0, %3, %L2, %L1", new_operands); |
1251 | output_asm_insn ("v_addc%^_u32\t%H0, %3, %H2, %H1, %3", new_operands); | |
1252 | } | |
1253 | else | |
1254 | { | |
1255 | rtx new_operands[4] = { operands[0], operands[1], operands[2], | |
1256 | gen_rtx_REG (BImode, CC_SAVE_REG) }; | |
1257 | ||
1258 | output_asm_insn ("s_mov_b32\t%3, scc", new_operands); | |
1259 | output_asm_insn ("s_add_u32\t%L0, %L1, %L2", new_operands); | |
1260 | output_asm_insn ("s_addc_u32\t%H0, %H1, %H2", new_operands); | |
1261 | output_asm_insn ("s_cmpk_lg_u32\t%3, 0", new_operands); | |
1262 | } | |
3d6275e3 AS |
1263 | |
1264 | return ""; | |
1265 | } | |
76136f7f AS |
1266 | [(set_attr "type" "vmult,mult") |
1267 | (set_attr "length" "16,24")]) | |
3d6275e3 AS |
1268 | |
1269 | ;; }}} | |
1270 | ;; {{{ ALU special cases: Minus | |
1271 | ||
1272 | (define_insn "subsi3" | |
1273 | [(set (match_operand:SI 0 "register_operand" "=Sg, Sg, v, v") | |
1274 | (minus:SI (match_operand:SI 1 "gcn_alu_operand" "SgA,SgA, v,vBSv") | |
1275 | (match_operand:SI 2 "gcn_alu_operand" "SgA, B, vBSv, v"))) | |
1276 | (clobber (match_scratch:BI 3 "=cs, cs, X, X")) | |
1277 | (clobber (match_scratch:DI 4 "= X, X, cV, cV"))] | |
1278 | "" | |
1279 | "@ | |
1280 | s_sub_i32\t%0, %1, %2 | |
1281 | s_sub_i32\t%0, %1, %2 | |
1282 | v_subrev%^_u32\t%0, vcc, %2, %1 | |
1283 | v_sub%^_u32\t%0, vcc, %1, %2" | |
1284 | [(set_attr "type" "sop2,sop2,vop2,vop2") | |
1285 | (set_attr "length" "4,8,8,8")]) | |
1286 | ||
1287 | (define_insn_and_split "subdi3" | |
1288 | [(set (match_operand:DI 0 "register_operand" "=Sg, Sg") | |
1289 | (minus:DI | |
1290 | (match_operand:DI 1 "gcn_alu_operand" "SgA,SgB") | |
1291 | (match_operand:DI 2 "gcn_alu_operand" "SgB,SgA"))) | |
1292 | (clobber (reg:BI SCC_REG))] | |
1293 | "" | |
1294 | "#" | |
1295 | "reload_completed" | |
1296 | [(const_int 0)] | |
1297 | { | |
1298 | emit_insn (gen_subsi3_scalar_carry | |
1299 | (gcn_operand_part (DImode, operands[0], 0), | |
1300 | gcn_operand_part (DImode, operands[1], 0), | |
1301 | gcn_operand_part (DImode, operands[2], 0))); | |
1302 | rtx val = gcn_operand_part (DImode, operands[2], 1); | |
1303 | if (val != const0_rtx) | |
1304 | emit_insn (gen_subcsi3_scalar | |
1305 | (gcn_operand_part (DImode, operands[0], 1), | |
1306 | gcn_operand_part (DImode, operands[1], 1), | |
1307 | gcn_operand_part (DImode, operands[2], 1))); | |
1308 | else | |
1309 | emit_insn (gen_subcsi3_scalar_zero | |
1310 | (gcn_operand_part (DImode, operands[0], 1), | |
1311 | gcn_operand_part (DImode, operands[1], 1))); | |
1312 | DONE; | |
1313 | } | |
1314 | [(set_attr "length" "8")]) | |
1315 | ||
1316 | (define_insn "subsi3_scalar_carry" | |
1317 | [(set (match_operand:SI 0 "register_operand" "=Sg, Sg") | |
1318 | (minus:SI (match_operand:SI 1 "gcn_alu_operand" "SgA,SgB") | |
1319 | (match_operand:SI 2 "gcn_alu_operand" "SgB,SgA"))) | |
1320 | (set (reg:BI SCC_REG) | |
1321 | (gtu:BI (minus:SI (match_dup 1) | |
1322 | (match_dup 2)) | |
1323 | (match_dup 1)))] | |
1324 | "" | |
1325 | "s_sub_u32\t%0, %1, %2" | |
1326 | [(set_attr "type" "sop2") | |
1327 | (set_attr "length" "8")]) | |
1328 | ||
1329 | (define_insn "subsi3_scalar_carry_cst" | |
1330 | [(set (match_operand:SI 0 "register_operand" "=Sg") | |
1331 | (minus:SI (match_operand:SI 1 "gcn_alu_operand" "SgA") | |
1332 | (match_operand:SI 2 "const_int_operand" " n"))) | |
1333 | (set (reg:BI SCC_REG) | |
1334 | (leu:BI (minus:SI (match_dup 1) | |
1335 | (match_dup 2)) | |
1336 | (match_operand:SI 3 "const_int_operand" " n")))] | |
1337 | "INTVAL (operands[2]) == -INTVAL (operands[3])" | |
1338 | "s_sub_u32\t%0, %1, %2" | |
1339 | [(set_attr "type" "sop2") | |
1340 | (set_attr "length" "4")]) | |
1341 | ||
1342 | (define_insn "subcsi3_scalar" | |
1343 | [(set (match_operand:SI 0 "register_operand" "=Sg, Sg") | |
1344 | (minus:SI (minus:SI (zero_extend:SI (reg:BI SCC_REG)) | |
1345 | (match_operand:SI 1 "gcn_alu_operand" "SgA,SgB")) | |
1346 | (match_operand:SI 2 "gcn_alu_operand" "SgB,SgA"))) | |
1347 | (set (reg:BI SCC_REG) | |
1348 | (ior:BI (gtu:BI (minus:SI (minus:SI (zero_extend:SI (reg:BI SCC_REG)) | |
1349 | (match_dup 1)) | |
1350 | (match_dup 2)) | |
1351 | (match_dup 1)) | |
1352 | (gtu:BI (minus:SI (zero_extend:SI (reg:BI SCC_REG)) | |
1353 | (match_dup 1)) | |
1354 | (match_dup 1))))] | |
1355 | "" | |
1356 | "s_subb_u32\t%0, %1, %2" | |
1357 | [(set_attr "type" "sop2") | |
1358 | (set_attr "length" "8")]) | |
1359 | ||
1360 | (define_insn "subcsi3_scalar_zero" | |
1361 | [(set (match_operand:SI 0 "register_operand" "=Sg") | |
1362 | (minus:SI (zero_extend:SI (reg:BI SCC_REG)) | |
1363 | (match_operand:SI 1 "gcn_alu_operand" "SgA"))) | |
1364 | (set (reg:BI SCC_REG) | |
1365 | (gtu:BI (minus:SI (zero_extend:SI (reg:BI SCC_REG)) (match_dup 1)) | |
1366 | (match_dup 1)))] | |
1367 | "" | |
1368 | "s_subb_u32\t%0, %1, 0" | |
1369 | [(set_attr "type" "sop2") | |
1370 | (set_attr "length" "4")]) | |
1371 | ||
1372 | ;; }}} | |
1373 | ;; {{{ ALU: mult | |
1374 | ||
1375 | ; Vector multiply has vop3a encoding, but no corresponding vop2a, so no long | |
1376 | ; immediate. | |
5c127c4c JB |
1377 | ; The "s_mulk_i32" variant sets SCC to indicate overflow (which we don't care |
1378 | ; about here, but we need to indicate the clobbering). | |
3d6275e3 AS |
1379 | (define_insn "mulsi3" |
1380 | [(set (match_operand:SI 0 "register_operand" "= Sg,Sg, Sg, v") | |
1381 | (mult:SI (match_operand:SI 1 "gcn_alu_operand" "%SgA, 0,SgA, v") | |
5c127c4c JB |
1382 | (match_operand:SI 2 "gcn_alu_operand" " SgA, J, B,vASv"))) |
1383 | (clobber (match_scratch:BI 3 "=X,cs, X, X"))] | |
3d6275e3 AS |
1384 | "" |
1385 | "@ | |
1386 | s_mul_i32\t%0, %1, %2 | |
1387 | s_mulk_i32\t%0, %2 | |
1388 | s_mul_i32\t%0, %1, %2 | |
1389 | v_mul_lo_i32\t%0, %1, %2" | |
1390 | [(set_attr "type" "sop2,sopk,sop2,vop3a") | |
1391 | (set_attr "length" "4,4,8,4")]) | |
1392 | ||
1393 | (define_code_iterator any_extend [sign_extend zero_extend]) | |
1394 | (define_code_attr sgnsuffix [(sign_extend "%i") (zero_extend "%u")]) | |
1395 | (define_code_attr su [(sign_extend "s") (zero_extend "u")]) | |
1396 | (define_code_attr u [(sign_extend "") (zero_extend "u")]) | |
1397 | (define_code_attr iu [(sign_extend "i") (zero_extend "u")]) | |
1398 | (define_code_attr e [(sign_extend "e") (zero_extend "")]) | |
1399 | ||
53b1d169 JB |
1400 | (define_expand "<su>mulsi3_highpart" |
1401 | [(set (match_operand:SI 0 "register_operand" "") | |
3d6275e3 AS |
1402 | (truncate:SI |
1403 | (lshiftrt:DI | |
1404 | (mult:DI | |
1405 | (any_extend:DI | |
53b1d169 | 1406 | (match_operand:SI 1 "register_operand" "")) |
3d6275e3 | 1407 | (any_extend:DI |
53b1d169 | 1408 | (match_operand:SI 2 "gcn_alu_operand" ""))) |
3d6275e3 AS |
1409 | (const_int 32))))] |
1410 | "" | |
53b1d169 JB |
1411 | { |
1412 | if (can_create_pseudo_p () | |
1413 | && !TARGET_GCN5 | |
1414 | && !gcn_inline_immediate_operand (operands[2], SImode)) | |
1415 | operands[2] = force_reg (SImode, operands[2]); | |
1416 | ||
1417 | if (REG_P (operands[2])) | |
1418 | emit_insn (gen_<su>mulsi3_highpart_reg (operands[0], operands[1], | |
1419 | operands[2])); | |
1420 | else | |
1421 | emit_insn (gen_<su>mulsi3_highpart_imm (operands[0], operands[1], | |
1422 | operands[2])); | |
1423 | ||
1424 | DONE; | |
1425 | }) | |
1426 | ||
1427 | (define_insn "<su>mulsi3_highpart_reg" | |
1428 | [(set (match_operand:SI 0 "register_operand" "=Sg, v") | |
1429 | (truncate:SI | |
1430 | (lshiftrt:DI | |
1431 | (mult:DI | |
1432 | (any_extend:DI | |
1433 | (match_operand:SI 1 "register_operand" "%Sg, v")) | |
1434 | (any_extend:DI | |
1435 | (match_operand:SI 2 "register_operand" "Sg,vSv"))) | |
1436 | (const_int 32))))] | |
1437 | "" | |
1438 | "@ | |
1439 | s_mul_hi<sgnsuffix>0\t%0, %1, %2 | |
1440 | v_mul_hi<sgnsuffix>0\t%0, %2, %1" | |
1441 | [(set_attr "type" "sop2,vop3a") | |
1442 | (set_attr "length" "4,8") | |
1443 | (set_attr "gcn_version" "gcn5,*")]) | |
1444 | ||
1445 | (define_insn "<su>mulsi3_highpart_imm" | |
1446 | [(set (match_operand:SI 0 "register_operand" "=Sg,Sg,v") | |
1447 | (truncate:SI | |
1448 | (lshiftrt:DI | |
1449 | (mult:DI | |
1450 | (any_extend:DI | |
1451 | (match_operand:SI 1 "register_operand" "Sg,Sg,v")) | |
1452 | (match_operand:DI 2 "gcn_32bit_immediate_operand" "A, B,A")) | |
1453 | (const_int 32))))] | |
1454 | "TARGET_GCN5 || gcn_inline_immediate_operand (operands[2], SImode)" | |
1455 | "@ | |
1456 | s_mul_hi<sgnsuffix>0\t%0, %1, %2 | |
1457 | s_mul_hi<sgnsuffix>0\t%0, %1, %2 | |
1458 | v_mul_hi<sgnsuffix>0\t%0, %2, %1" | |
1459 | [(set_attr "type" "sop2,sop2,vop3a") | |
1460 | (set_attr "length" "4,8,8") | |
1461 | (set_attr "gcn_version" "gcn5,gcn5,*")]) | |
3d6275e3 | 1462 | |
8f332122 JB |
1463 | (define_expand "<su>mulsidi3" |
1464 | [(set (match_operand:DI 0 "register_operand" "") | |
1465 | (mult:DI (any_extend:DI | |
1466 | (match_operand:SI 1 "register_operand" "")) | |
1467 | (any_extend:DI | |
1468 | (match_operand:SI 2 "nonmemory_operand" ""))))] | |
1469 | "" | |
1470 | { | |
1471 | if (can_create_pseudo_p () | |
1472 | && !TARGET_GCN5 | |
1473 | && !gcn_inline_immediate_operand (operands[2], SImode)) | |
1474 | operands[2] = force_reg (SImode, operands[2]); | |
1475 | ||
1476 | if (REG_P (operands[2])) | |
1477 | emit_insn (gen_<su>mulsidi3_reg (operands[0], operands[1], operands[2])); | |
1478 | else | |
1479 | emit_insn (gen_<su>mulsidi3_imm (operands[0], operands[1], operands[2])); | |
1480 | ||
1481 | DONE; | |
1482 | }) | |
1483 | ||
1484 | (define_insn_and_split "<su>mulsidi3_reg" | |
1485 | [(set (match_operand:DI 0 "register_operand" "=&Sg, &v") | |
1486 | (mult:DI (any_extend:DI | |
1487 | (match_operand:SI 1 "register_operand" "%Sg, v")) | |
1488 | (any_extend:DI | |
1489 | (match_operand:SI 2 "register_operand" "Sg,vSv"))))] | |
1490 | "" | |
1491 | "#" | |
1492 | "reload_completed" | |
1493 | [(const_int 0)] | |
1494 | { | |
1495 | rtx dstlo = gen_lowpart (SImode, operands[0]); | |
1496 | rtx dsthi = gen_highpart_mode (SImode, DImode, operands[0]); | |
1497 | emit_insn (gen_mulsi3 (dstlo, operands[1], operands[2])); | |
1498 | emit_insn (gen_<su>mulsi3_highpart (dsthi, operands[1], operands[2])); | |
1499 | DONE; | |
1500 | } | |
1501 | [(set_attr "gcn_version" "gcn5,*")]) | |
1502 | ||
1503 | (define_insn_and_split "<su>mulsidi3_imm" | |
1504 | [(set (match_operand:DI 0 "register_operand" "=&Sg,&Sg,&v") | |
1505 | (mult:DI (any_extend:DI | |
1506 | (match_operand:SI 1 "register_operand" "Sg, Sg, v")) | |
1507 | (match_operand:DI 2 "gcn_32bit_immediate_operand" | |
1508 | "A, B, A")))] | |
1509 | "TARGET_GCN5 || gcn_inline_immediate_operand (operands[2], SImode)" | |
1510 | "#" | |
1511 | "&& reload_completed" | |
1512 | [(const_int 0)] | |
1513 | { | |
1514 | rtx dstlo = gen_lowpart (SImode, operands[0]); | |
1515 | rtx dsthi = gen_highpart_mode (SImode, DImode, operands[0]); | |
1516 | emit_insn (gen_mulsi3 (dstlo, operands[1], operands[2])); | |
1517 | emit_insn (gen_<su>mulsi3_highpart (dsthi, operands[1], operands[2])); | |
1518 | DONE; | |
1519 | } | |
1520 | [(set_attr "gcn_version" "gcn5,gcn5,*")]) | |
1521 | ||
1522 | (define_insn_and_split "muldi3" | |
1523 | [(set (match_operand:DI 0 "register_operand" "=&Sg,&Sg, &v,&v") | |
1524 | (mult:DI (match_operand:DI 1 "register_operand" "%Sg, Sg, v, v") | |
1525 | (match_operand:DI 2 "nonmemory_operand" "Sg, i,vSv, A"))) | |
1526 | (clobber (match_scratch:SI 3 "=&Sg,&Sg,&v,&v")) | |
1527 | (clobber (match_scratch:BI 4 "=cs, cs, X, X")) | |
1528 | (clobber (match_scratch:DI 5 "=X, X,cV,cV"))] | |
1529 | "" | |
1530 | "#" | |
1531 | "reload_completed" | |
1532 | [(const_int 0)] | |
1533 | { | |
1534 | rtx tmp = operands[3]; | |
1535 | rtx dsthi = gen_highpart_mode (SImode, DImode, operands[0]); | |
1536 | rtx op1lo = gcn_operand_part (DImode, operands[1], 0); | |
1537 | rtx op1hi = gcn_operand_part (DImode, operands[1], 1); | |
1538 | rtx op2lo = gcn_operand_part (DImode, operands[2], 0); | |
1539 | rtx op2hi = gcn_operand_part (DImode, operands[2], 1); | |
1540 | emit_insn (gen_umulsidi3 (operands[0], op1lo, op2lo)); | |
1541 | emit_insn (gen_mulsi3 (tmp, op1lo, op2hi)); | |
1542 | rtx add = gen_rtx_SET (dsthi, gen_rtx_PLUS (SImode, dsthi, tmp)); | |
1543 | rtx clob1 = gen_rtx_CLOBBER (VOIDmode, operands[4]); | |
1544 | rtx clob2 = gen_rtx_CLOBBER (VOIDmode, operands[5]); | |
1545 | add = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, add, clob1, clob2)); | |
1546 | emit_insn (add); | |
1547 | emit_insn (gen_mulsi3 (tmp, op1hi, op2lo)); | |
1548 | add = gen_rtx_SET (dsthi, gen_rtx_PLUS (SImode, dsthi, tmp)); | |
1549 | clob1 = gen_rtx_CLOBBER (VOIDmode, operands[4]); | |
1550 | clob2 = gen_rtx_CLOBBER (VOIDmode, operands[5]); | |
1551 | add = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, add, clob1, clob2)); | |
1552 | emit_insn (add); | |
1553 | DONE; | |
1554 | } | |
1555 | [(set_attr "gcn_version" "gcn5,gcn5,*,*")]) | |
1556 | ||
3d6275e3 AS |
1557 | (define_insn "<u>mulhisi3" |
1558 | [(set (match_operand:SI 0 "register_operand" "=v") | |
1559 | (mult:SI | |
1560 | (any_extend:SI (match_operand:HI 1 "register_operand" "%v")) | |
1561 | (any_extend:SI (match_operand:HI 2 "register_operand" " v"))))] | |
1562 | "" | |
1563 | "v_mul_<iu>32_<iu>24_sdwa\t%0, %<e>1, %<e>2 src0_sel:WORD_0 src1_sel:WORD_0" | |
1564 | [(set_attr "type" "vop_sdwa") | |
1565 | (set_attr "length" "8")]) | |
1566 | ||
1567 | (define_insn "<u>mulqihi3_scalar" | |
1568 | [(set (match_operand:HI 0 "register_operand" "=v") | |
1569 | (mult:HI | |
1570 | (any_extend:HI (match_operand:QI 1 "register_operand" "%v")) | |
1571 | (any_extend:HI (match_operand:QI 2 "register_operand" " v"))))] | |
1572 | "" | |
1573 | "v_mul_<iu>32_<iu>24_sdwa\t%0, %<e>1, %<e>2 src0_sel:BYTE_0 src1_sel:BYTE_0" | |
1574 | [(set_attr "type" "vop_sdwa") | |
1575 | (set_attr "length" "8")]) | |
1576 | ||
1577 | ;; }}} | |
1578 | ;; {{{ ALU: generic 32-bit unop | |
1579 | ||
1580 | (define_code_iterator bitunop [not popcount]) | |
1581 | (define_code_attr popcount_extra_op [(not "") (popcount ", 0")]) | |
1582 | ||
1583 | (define_insn "<expander>si2" | |
1584 | [(set (match_operand:SI 0 "register_operand" "=Sg, v") | |
1585 | (bitunop:SI | |
1586 | (match_operand:SI 1 "gcn_alu_operand" "SgB,vSvB"))) | |
1587 | (clobber (match_scratch:BI 2 "=cs, X"))] | |
1588 | "" | |
1589 | "@ | |
1590 | s_<s_mnemonic>0\t%0, %1 | |
1591 | v_<mnemonic>0\t%0, %1<popcount_extra_op>" | |
1592 | [(set_attr "type" "sop1,vop1") | |
1593 | (set_attr "length" "8")]) | |
1594 | ||
34bac264 AS |
1595 | (define_code_iterator countzeros [clz ctz]) |
1596 | ||
1597 | (define_insn "<expander>si2" | |
1598 | [(set (match_operand:SI 0 "register_operand" "=Sg,Sg") | |
1599 | (countzeros:SI | |
1600 | (match_operand:SI 1 "gcn_alu_operand" "SgA, B")))] | |
1601 | "" | |
1602 | "s_<s_mnemonic>1\t%0, %1" | |
1603 | [(set_attr "type" "sop1") | |
1604 | (set_attr "length" "4,8")]) | |
1605 | ||
1606 | ; The truncate ensures that a constant passed to operand 1 is treated as DImode | |
1607 | (define_insn "<expander>di2" | |
1608 | [(set (match_operand:SI 0 "register_operand" "=Sg,Sg") | |
1609 | (truncate:SI | |
1610 | (countzeros:DI | |
1611 | (match_operand:DI 1 "gcn_alu_operand" "SgA, B"))))] | |
1612 | "" | |
1613 | "s_<s_mnemonic>1\t%0, %1" | |
1614 | [(set_attr "type" "sop1") | |
1615 | (set_attr "length" "4,8")]) | |
1616 | ||
0c06e46a JB |
1617 | (define_insn "gcn_flbit<mode>_int" |
1618 | [(set (match_operand:SI 0 "register_operand" "=Sg,Sg") | |
1619 | (unspec:SI [(match_operand:SIDI 1 "gcn_alu_operand" "SgA, B")] | |
1620 | UNSPEC_FLBIT_INT))] | |
1621 | "" | |
1622 | { | |
1623 | if (<MODE>mode == SImode) | |
1624 | return "s_flbit_i32\t%0, %1"; | |
1625 | else | |
1626 | return "s_flbit_i32_i64\t%0, %1"; | |
1627 | } | |
1628 | [(set_attr "type" "sop1") | |
1629 | (set_attr "length" "4,8")]) | |
1630 | ||
1631 | (define_expand "clrsb<mode>2" | |
1632 | [(set (match_operand:SI 0 "register_operand" "") | |
1633 | (clrsb:SI (match_operand:SIDI 1 "gcn_alu_operand" "")))] | |
1634 | "" | |
1635 | { | |
1636 | rtx tmp = gen_reg_rtx (SImode); | |
1637 | /* FLBIT_I* counts sign or zero bits at the most-significant end of the | |
1638 | input register (and returns -1 for 0/-1 inputs). We want the number of | |
1639 | *redundant* bits (i.e. that value minus one), and an answer of 31/63 for | |
1640 | 0/-1 inputs. We can do that in three instructions... */ | |
1641 | emit_insn (gen_gcn_flbit<mode>_int (tmp, operands[1])); | |
1642 | emit_insn (gen_uminsi3 (tmp, tmp, | |
1643 | gen_int_mode (GET_MODE_BITSIZE (<MODE>mode), | |
1644 | SImode))); | |
1645 | /* If we put this last, it can potentially be folded into a subsequent | |
1646 | arithmetic operation. */ | |
1647 | emit_insn (gen_subsi3 (operands[0], tmp, const1_rtx)); | |
1648 | DONE; | |
1649 | }) | |
1650 | ||
3d6275e3 AS |
1651 | ;; }}} |
1652 | ;; {{{ ALU: generic 32-bit binop | |
1653 | ||
1654 | ; No plus and mult - they have variant with 16bit immediate | |
1655 | ; and thus are defined later. | |
1656 | (define_code_iterator binop [and ior xor smin smax umin umax | |
1657 | ashift lshiftrt ashiftrt]) | |
1658 | (define_code_iterator vec_and_scalar_com [and ior xor smin smax umin umax]) | |
1659 | (define_code_iterator vec_and_scalar_nocom [ashift lshiftrt ashiftrt]) | |
1660 | ||
1661 | (define_insn "<expander>si3" | |
1662 | [(set (match_operand:SI 0 "gcn_valu_dst_operand" "= Sg, v,RD") | |
1663 | (vec_and_scalar_com:SI | |
1664 | (match_operand:SI 1 "gcn_valu_src0_operand" "%SgA,vSvB, 0") | |
1665 | (match_operand:SI 2 "gcn_alu_operand" " SgB, v, v"))) | |
1666 | (clobber (match_scratch:BI 3 "= cs, X, X"))] | |
1667 | "" | |
1668 | "@ | |
1669 | s_<mnemonic>0\t%0, %1, %2 | |
1670 | v_<mnemonic>0\t%0, %1, %2 | |
1671 | ds_<mnemonic>0\t%A0, %2%O0" | |
1672 | [(set_attr "type" "sop2,vop2,ds") | |
1673 | (set_attr "length" "8")]) | |
1674 | ||
1675 | (define_insn "<expander>si3" | |
1676 | [(set (match_operand:SI 0 "register_operand" "=Sg, Sg, v") | |
1677 | (vec_and_scalar_nocom:SI | |
1678 | (match_operand:SI 1 "gcn_alu_operand" "SgB,SgA, v") | |
1679 | (match_operand:SI 2 "gcn_alu_operand" "SgA,SgB,vSvB"))) | |
1680 | (clobber (match_scratch:BI 3 "=cs, cs, X"))] | |
1681 | "" | |
1682 | "@ | |
1683 | s_<mnemonic>0\t%0, %1, %2 | |
1684 | s_<mnemonic>0\t%0, %1, %2 | |
1685 | v_<revmnemonic>0\t%0, %2, %1" | |
1686 | [(set_attr "type" "sop2,sop2,vop2") | |
1687 | (set_attr "length" "8")]) | |
1688 | ||
1689 | (define_expand "<expander>si3_scc" | |
1690 | [(parallel [(set (match_operand:SI 0 "gcn_valu_dst_operand") | |
1691 | (binop:SI | |
1692 | (match_operand:SI 1 "gcn_valu_src0_operand") | |
1693 | (match_operand:SI 2 "gcn_alu_operand"))) | |
1694 | (clobber (reg:BI SCC_REG))])] | |
1695 | "" | |
1696 | {}) | |
1697 | ||
1698 | ;; }}} | |
1699 | ;; {{{ ALU: generic 64-bit | |
1700 | ||
1701 | (define_code_iterator vec_and_scalar64_com [and ior xor]) | |
1702 | ||
1703 | (define_insn_and_split "<expander>di3" | |
3abfd4f3 | 1704 | [(set (match_operand:DI 0 "register_operand" "= Sg, v") |
3d6275e3 | 1705 | (vec_and_scalar64_com:DI |
3abfd4f3 AS |
1706 | (match_operand:DI 1 "gcn_alu_operand" "%SgA,vSvDB") |
1707 | (match_operand:DI 2 "gcn_alu_operand" " SgC, v"))) | |
1708 | (clobber (match_scratch:BI 3 "= cs, X"))] | |
3d6275e3 AS |
1709 | "" |
1710 | "@ | |
1711 | s_<mnemonic>0\t%0, %1, %2 | |
3d6275e3 AS |
1712 | #" |
1713 | "reload_completed && gcn_vgpr_register_operand (operands[0], DImode)" | |
1714 | [(parallel [(set (match_dup 4) | |
1715 | (vec_and_scalar64_com:SI (match_dup 5) (match_dup 6))) | |
1716 | (clobber (match_dup 3))]) | |
1717 | (parallel [(set (match_dup 7) | |
1718 | (vec_and_scalar64_com:SI (match_dup 8) (match_dup 9))) | |
1719 | (clobber (match_dup 3))])] | |
1720 | { | |
1721 | operands[4] = gcn_operand_part (DImode, operands[0], 0); | |
1722 | operands[5] = gcn_operand_part (DImode, operands[1], 0); | |
1723 | operands[6] = gcn_operand_part (DImode, operands[2], 0); | |
1724 | operands[7] = gcn_operand_part (DImode, operands[0], 1); | |
1725 | operands[8] = gcn_operand_part (DImode, operands[1], 1); | |
1726 | operands[9] = gcn_operand_part (DImode, operands[2], 1); | |
1727 | } | |
3abfd4f3 | 1728 | [(set_attr "type" "sop2,vop2") |
3d6275e3 AS |
1729 | (set_attr "length" "8")]) |
1730 | ||
1731 | (define_insn "<expander>di3" | |
1732 | [(set (match_operand:DI 0 "register_operand" "=Sg, Sg, v") | |
1733 | (vec_and_scalar_nocom:DI | |
1734 | (match_operand:DI 1 "gcn_alu_operand" "SgC,SgA, v") | |
1735 | (match_operand:SI 2 "gcn_alu_operand" "SgA,SgC,vSvC"))) | |
1736 | (clobber (match_scratch:BI 3 "=cs, cs, X"))] | |
1737 | "" | |
1738 | "@ | |
1739 | s_<mnemonic>0\t%0, %1, %2 | |
1740 | s_<mnemonic>0\t%0, %1, %2 | |
1741 | v_<revmnemonic>0\t%0, %2, %1" | |
1742 | [(set_attr "type" "sop2,sop2,vop2") | |
1743 | (set_attr "length" "8")]) | |
1744 | ||
19fad467 AS |
1745 | ;; }}} |
1746 | ;; {{{ ALU: generic 128-bit binop | |
1747 | ||
1748 | ; TImode shifts can't be synthesized by the middle-end | |
1749 | (define_expand "<expander>ti3" | |
1750 | [(set (match_operand:TI 0 "register_operand") | |
1751 | (vec_and_scalar_nocom:TI | |
1752 | (match_operand:TI 1 "gcn_alu_operand") | |
1753 | (match_operand:SI 2 "gcn_alu_operand")))] | |
1754 | "" | |
1755 | { | |
1756 | rtx dest = operands[0]; | |
1757 | rtx src = operands[1]; | |
1758 | rtx shift = operands[2]; | |
1759 | ||
1760 | enum {ashr, lshr, ashl} shiftop = <expander>; | |
1761 | rtx (*inverse_shift_fn) (rtx, rtx, rtx) | |
1762 | = (shiftop == ashl ? gen_lshrdi3 : gen_ashldi3); | |
1763 | rtx (*logical_shift_fn) (rtx, rtx, rtx) | |
1764 | = (shiftop == ashl ? gen_ashldi3 : gen_lshrdi3); | |
1765 | ||
1766 | /* We shift "from" one subreg "to" the other, according to shiftop. */ | |
1767 | int from = (shiftop == ashl ? 0 : 8); | |
1768 | int to = (shiftop == ashl ? 8 : 0); | |
1769 | rtx destfrom = simplify_gen_subreg (DImode, dest, TImode, from); | |
1770 | rtx destto = simplify_gen_subreg (DImode, dest, TImode, to); | |
1771 | rtx srcfrom = simplify_gen_subreg (DImode, src, TImode, from); | |
1772 | rtx srcto = simplify_gen_subreg (DImode, src, TImode, to); | |
1773 | ||
1774 | int shiftval = (CONST_INT_P (shift) ? INTVAL (shift) : -1); | |
1775 | enum {RUNTIME, ZERO, SMALL, LARGE} shiftcomparison | |
1776 | = (!CONST_INT_P (shift) ? RUNTIME | |
1777 | : shiftval == 0 ? ZERO | |
1778 | : shiftval < 64 ? SMALL | |
1779 | : LARGE); | |
1780 | ||
1781 | rtx large_label, zero_label, exit_label; | |
1782 | ||
1783 | if (shiftcomparison == RUNTIME) | |
1784 | { | |
1785 | zero_label = gen_label_rtx (); | |
1786 | large_label = gen_label_rtx (); | |
1787 | exit_label = gen_label_rtx (); | |
1788 | ||
1789 | rtx cond = gen_rtx_EQ (VOIDmode, shift, const0_rtx); | |
1790 | emit_insn (gen_cbranchsi4 (cond, shift, const0_rtx, zero_label)); | |
1791 | ||
1792 | rtx sixtyfour = GEN_INT (64); | |
1793 | cond = gen_rtx_GE (VOIDmode, shift, sixtyfour); | |
1794 | emit_insn (gen_cbranchsi4 (cond, shift, sixtyfour, large_label)); | |
1795 | } | |
1796 | ||
1797 | if (shiftcomparison == SMALL || shiftcomparison == RUNTIME) | |
1798 | { | |
1799 | /* Shift both parts by the same amount, then patch in the bits that | |
1800 | cross the boundary. | |
1801 | This does *not* work for zero-length shifts. */ | |
1802 | rtx tmpto1 = gen_reg_rtx (DImode); | |
1803 | rtx tmpto2 = gen_reg_rtx (DImode); | |
1804 | emit_insn (gen_<expander>di3 (destfrom, srcfrom, shift)); | |
1805 | emit_insn (logical_shift_fn (tmpto1, srcto, shift)); | |
1806 | rtx lessershiftval = gen_reg_rtx (SImode); | |
1807 | emit_insn (gen_subsi3 (lessershiftval, GEN_INT (64), shift)); | |
1808 | emit_insn (inverse_shift_fn (tmpto2, srcfrom, lessershiftval)); | |
1809 | emit_insn (gen_iordi3 (destto, tmpto1, tmpto2)); | |
1810 | } | |
1811 | ||
1812 | if (shiftcomparison == RUNTIME) | |
1813 | { | |
1814 | emit_jump_insn (gen_jump (exit_label)); | |
1815 | emit_barrier (); | |
1816 | ||
1817 | emit_label (zero_label); | |
1818 | } | |
1819 | ||
1820 | if (shiftcomparison == ZERO || shiftcomparison == RUNTIME) | |
1821 | emit_move_insn (dest, src); | |
1822 | ||
1823 | if (shiftcomparison == RUNTIME) | |
1824 | { | |
1825 | emit_jump_insn (gen_jump (exit_label)); | |
1826 | emit_barrier (); | |
1827 | ||
1828 | emit_label (large_label); | |
1829 | } | |
1830 | ||
1831 | if (shiftcomparison == LARGE || shiftcomparison == RUNTIME) | |
1832 | { | |
1833 | /* Do the shift within one part, and set the other part appropriately. | |
1834 | Shifts of 128+ bits are an error. */ | |
1835 | rtx lessershiftval = gen_reg_rtx (SImode); | |
1836 | emit_insn (gen_subsi3 (lessershiftval, shift, GEN_INT (64))); | |
1837 | emit_insn (gen_<expander>di3 (destto, srcfrom, lessershiftval)); | |
1838 | if (shiftop == ashr) | |
1839 | emit_insn (gen_ashrdi3 (destfrom, srcfrom, GEN_INT (63))); | |
1840 | else | |
1841 | emit_move_insn (destfrom, const0_rtx); | |
1842 | } | |
1843 | ||
1844 | if (shiftcomparison == RUNTIME) | |
1845 | emit_label (exit_label); | |
1846 | ||
1847 | DONE; | |
1848 | }) | |
1849 | ||
3d6275e3 AS |
1850 | ;; }}} |
1851 | ;; {{{ Atomics | |
1852 | ||
1853 | ; Each compute unit has it's own L1 cache. The L2 cache is shared between | |
1854 | ; all the compute units. Any load or store instruction can skip L1 and | |
1855 | ; access L2 directly using the "glc" flag. Atomic instructions also skip | |
1856 | ; L1. The L1 cache can be flushed and invalidated using instructions. | |
1857 | ; | |
1858 | ; Therefore, in order for "acquire" and "release" atomic modes to work | |
1859 | ; correctly across compute units we must flush before each "release" | |
1860 | ; and invalidate the cache after each "acquire". It might seem like | |
1861 | ; invalidation could be safely done before an "acquire", but since each | |
1862 | ; compute unit can run up to 40 threads simultaneously, all reading values | |
1863 | ; into the L1 cache, this is not actually safe. | |
1864 | ; | |
1865 | ; Additionally, scalar flat instructions access L2 via a different cache | |
1866 | ; (the "constant cache"), so they have separate constrol instructions. We | |
1867 | ; do not attempt to invalidate both caches at once; instead, atomics | |
1868 | ; operating on scalar flat pointers will flush the constant cache, and | |
1869 | ; atomics operating on flat or global pointers will flush L1. It is up to | |
1870 | ; the programmer to get this right. | |
1871 | ||
1872 | (define_code_iterator atomicops [plus minus and ior xor]) | |
1873 | (define_mode_attr X [(SI "") (DI "_X2")]) | |
1874 | ||
1875 | ;; TODO compare_and_swap test_and_set inc dec | |
1876 | ;; Hardware also supports min and max, but GCC does not. | |
1877 | ||
1878 | (define_expand "memory_barrier" | |
1879 | [(set (match_dup 0) | |
1880 | (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER))] | |
1881 | "" | |
1882 | { | |
1883 | operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); | |
1884 | MEM_VOLATILE_P (operands[0]) = 1; | |
1885 | }) | |
1886 | ||
1887 | (define_insn "*memory_barrier" | |
1888 | [(set (match_operand:BLK 0) | |
1889 | (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER))] | |
1890 | "" | |
1891 | "buffer_wbinvl1_vol" | |
1892 | [(set_attr "type" "mubuf") | |
1893 | (set_attr "length" "4")]) | |
1894 | ||
1895 | ; FIXME: These patterns have been disabled as they do not seem to work | |
1896 | ; reliably - they can cause hangs or incorrect results. | |
1897 | ; TODO: flush caches according to memory model | |
1898 | (define_insn "atomic_fetch_<bare_mnemonic><mode>" | |
1899 | [(set (match_operand:SIDI 0 "register_operand" "=Sm, v, v") | |
1900 | (match_operand:SIDI 1 "memory_operand" "+RS,RF,RM")) | |
1901 | (set (match_dup 1) | |
1902 | (unspec_volatile:SIDI | |
1903 | [(atomicops:SIDI | |
1904 | (match_dup 1) | |
1905 | (match_operand:SIDI 2 "register_operand" " Sm, v, v"))] | |
1906 | UNSPECV_ATOMIC)) | |
1907 | (use (match_operand 3 "const_int_operand"))] | |
1908 | "0 /* Disabled. */" | |
1909 | "@ | |
1910 | s_atomic_<bare_mnemonic><X>\t%0, %1, %2 glc\;s_waitcnt\tlgkmcnt(0) | |
1911 | flat_atomic_<bare_mnemonic><X>\t%0, %1, %2 glc\;s_waitcnt\t0 | |
1912 | global_atomic_<bare_mnemonic><X>\t%0, %A1, %2%O1 glc\;s_waitcnt\tvmcnt(0)" | |
1913 | [(set_attr "type" "smem,flat,flat") | |
1914 | (set_attr "length" "12") | |
1915 | (set_attr "gcn_version" "gcn5,*,gcn5")]) | |
1916 | ||
1917 | ; FIXME: These patterns are disabled because the instructions don't | |
1918 | ; seem to work as advertised. Specifically, OMP "team distribute" | |
1919 | ; reductions apparently "lose" some of the writes, similar to what | |
1920 | ; you might expect from a concurrent non-atomic read-modify-write. | |
1921 | ; TODO: flush caches according to memory model | |
1922 | (define_insn "atomic_<bare_mnemonic><mode>" | |
1923 | [(set (match_operand:SIDI 0 "memory_operand" "+RS,RF,RM") | |
1924 | (unspec_volatile:SIDI | |
1925 | [(atomicops:SIDI | |
1926 | (match_dup 0) | |
1927 | (match_operand:SIDI 1 "register_operand" " Sm, v, v"))] | |
1928 | UNSPECV_ATOMIC)) | |
1929 | (use (match_operand 2 "const_int_operand"))] | |
1930 | "0 /* Disabled. */" | |
1931 | "@ | |
1932 | s_atomic_<bare_mnemonic><X>\t%0, %1\;s_waitcnt\tlgkmcnt(0) | |
1933 | flat_atomic_<bare_mnemonic><X>\t%0, %1\;s_waitcnt\t0 | |
1934 | global_atomic_<bare_mnemonic><X>\t%A0, %1%O0\;s_waitcnt\tvmcnt(0)" | |
1935 | [(set_attr "type" "smem,flat,flat") | |
1936 | (set_attr "length" "12") | |
1937 | (set_attr "gcn_version" "gcn5,*,gcn5")]) | |
1938 | ||
1939 | (define_mode_attr x2 [(SI "DI") (DI "TI")]) | |
1940 | (define_mode_attr size [(SI "4") (DI "8")]) | |
1941 | (define_mode_attr bitsize [(SI "32") (DI "64")]) | |
1942 | ||
1943 | (define_expand "sync_compare_and_swap<mode>" | |
1944 | [(match_operand:SIDI 0 "register_operand") | |
1945 | (match_operand:SIDI 1 "memory_operand") | |
1946 | (match_operand:SIDI 2 "register_operand") | |
1947 | (match_operand:SIDI 3 "register_operand")] | |
1948 | "" | |
1949 | { | |
1950 | if (MEM_ADDR_SPACE (operands[1]) == ADDR_SPACE_LDS) | |
1951 | { | |
1952 | emit_insn (gen_sync_compare_and_swap<mode>_lds_insn (operands[0], | |
1953 | operands[1], | |
1954 | operands[2], | |
1955 | operands[3])); | |
1956 | DONE; | |
1957 | } | |
1958 | ||
1959 | /* Operands 2 and 3 must be placed in consecutive registers, and passed | |
1960 | as a combined value. */ | |
1961 | rtx src_cmp = gen_reg_rtx (<x2>mode); | |
1962 | emit_move_insn (gen_rtx_SUBREG (<MODE>mode, src_cmp, 0), operands[3]); | |
1963 | emit_move_insn (gen_rtx_SUBREG (<MODE>mode, src_cmp, <size>), operands[2]); | |
1964 | emit_insn (gen_sync_compare_and_swap<mode>_insn (operands[0], | |
1965 | operands[1], | |
1966 | src_cmp)); | |
1967 | DONE; | |
1968 | }) | |
1969 | ||
1970 | (define_insn "sync_compare_and_swap<mode>_insn" | |
1971 | [(set (match_operand:SIDI 0 "register_operand" "=Sm, v, v") | |
1972 | (match_operand:SIDI 1 "memory_operand" "+RS,RF,RM")) | |
1973 | (set (match_dup 1) | |
1974 | (unspec_volatile:SIDI | |
1975 | [(match_operand:<x2> 2 "register_operand" " Sm, v, v")] | |
1976 | UNSPECV_ATOMIC))] | |
1977 | "" | |
1978 | "@ | |
1979 | s_atomic_cmpswap<X>\t%0, %1, %2 glc\;s_waitcnt\tlgkmcnt(0) | |
1980 | flat_atomic_cmpswap<X>\t%0, %1, %2 glc\;s_waitcnt\t0 | |
1981 | global_atomic_cmpswap<X>\t%0, %A1, %2%O1 glc\;s_waitcnt\tvmcnt(0)" | |
1982 | [(set_attr "type" "smem,flat,flat") | |
1983 | (set_attr "length" "12") | |
930c5599 | 1984 | (set_attr "gcn_version" "gcn5,*,gcn5") |
b2c113ae | 1985 | (set_attr "delayeduse" "*,yes,yes")]) |
3d6275e3 AS |
1986 | |
1987 | (define_insn "sync_compare_and_swap<mode>_lds_insn" | |
1988 | [(set (match_operand:SIDI 0 "register_operand" "= v") | |
1989 | (unspec_volatile:SIDI | |
1990 | [(match_operand:SIDI 1 "memory_operand" "+RL")] | |
1991 | UNSPECV_ATOMIC)) | |
1992 | (set (match_dup 1) | |
1993 | (unspec_volatile:SIDI | |
1994 | [(match_operand:SIDI 2 "register_operand" " v") | |
1995 | (match_operand:SIDI 3 "register_operand" " v")] | |
1996 | UNSPECV_ATOMIC))] | |
1997 | "" | |
1998 | "ds_cmpst_rtn_b<bitsize> %0, %1, %2, %3\;s_waitcnt\tlgkmcnt(0)" | |
1999 | [(set_attr "type" "ds") | |
2000 | (set_attr "length" "12")]) | |
2001 | ||
2002 | (define_insn "atomic_load<mode>" | |
2003 | [(set (match_operand:SIDI 0 "register_operand" "=Sm, v, v") | |
2004 | (unspec_volatile:SIDI | |
2005 | [(match_operand:SIDI 1 "memory_operand" " RS,RF,RM")] | |
2006 | UNSPECV_ATOMIC)) | |
2007 | (use (match_operand:SIDI 2 "immediate_operand" " i, i, i"))] | |
2008 | "" | |
2009 | { | |
2010 | switch (INTVAL (operands[2])) | |
2011 | { | |
2012 | case MEMMODEL_RELAXED: | |
2013 | switch (which_alternative) | |
2014 | { | |
2015 | case 0: | |
2016 | return "s_load%o0\t%0, %A1 glc\;s_waitcnt\tlgkmcnt(0)"; | |
2017 | case 1: | |
2018 | return "flat_load%o0\t%0, %A1%O1 glc\;s_waitcnt\t0"; | |
2019 | case 2: | |
2020 | return "global_load%o0\t%0, %A1%O1 glc\;s_waitcnt\tvmcnt(0)"; | |
2021 | } | |
2022 | break; | |
2023 | case MEMMODEL_CONSUME: | |
2024 | case MEMMODEL_ACQUIRE: | |
2025 | case MEMMODEL_SYNC_ACQUIRE: | |
2026 | switch (which_alternative) | |
2027 | { | |
2028 | case 0: | |
2029 | return "s_load%o0\t%0, %A1 glc\;s_waitcnt\tlgkmcnt(0)\;" | |
2030 | "s_dcache_wb_vol"; | |
2031 | case 1: | |
2032 | return "flat_load%o0\t%0, %A1%O1 glc\;s_waitcnt\t0\;" | |
2033 | "buffer_wbinvl1_vol"; | |
2034 | case 2: | |
2035 | return "global_load%o0\t%0, %A1%O1 glc\;s_waitcnt\tvmcnt(0)\;" | |
2036 | "buffer_wbinvl1_vol"; | |
2037 | } | |
2038 | break; | |
2039 | case MEMMODEL_ACQ_REL: | |
2040 | case MEMMODEL_SEQ_CST: | |
2041 | case MEMMODEL_SYNC_SEQ_CST: | |
2042 | switch (which_alternative) | |
2043 | { | |
2044 | case 0: | |
2045 | return "s_dcache_wb_vol\;s_load%o0\t%0, %A1 glc\;" | |
2046 | "s_waitcnt\tlgkmcnt(0)\;s_dcache_inv_vol"; | |
2047 | case 1: | |
2048 | return "buffer_wbinvl1_vol\;flat_load%o0\t%0, %A1%O1 glc\;" | |
2049 | "s_waitcnt\t0\;buffer_wbinvl1_vol"; | |
2050 | case 2: | |
2051 | return "buffer_wbinvl1_vol\;global_load%o0\t%0, %A1%O1 glc\;" | |
2052 | "s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol"; | |
2053 | } | |
2054 | break; | |
2055 | } | |
2056 | gcc_unreachable (); | |
2057 | } | |
2058 | [(set_attr "type" "smem,flat,flat") | |
2059 | (set_attr "length" "20") | |
2060 | (set_attr "gcn_version" "gcn5,*,gcn5")]) | |
2061 | ||
2062 | (define_insn "atomic_store<mode>" | |
2063 | [(set (match_operand:SIDI 0 "memory_operand" "=RS,RF,RM") | |
2064 | (unspec_volatile:SIDI | |
2065 | [(match_operand:SIDI 1 "register_operand" " Sm, v, v")] | |
2066 | UNSPECV_ATOMIC)) | |
2067 | (use (match_operand:SIDI 2 "immediate_operand" " i, i, i"))] | |
2068 | "" | |
2069 | { | |
2070 | switch (INTVAL (operands[2])) | |
2071 | { | |
2072 | case MEMMODEL_RELAXED: | |
2073 | switch (which_alternative) | |
2074 | { | |
2075 | case 0: | |
2076 | return "s_store%o1\t%1, %A0 glc\;s_waitcnt\tlgkmcnt(0)"; | |
2077 | case 1: | |
2078 | return "flat_store%o1\t%A0, %1%O0 glc\;s_waitcnt\t0"; | |
2079 | case 2: | |
2080 | return "global_store%o1\t%A0, %1%O0 glc\;s_waitcnt\tvmcnt(0)"; | |
2081 | } | |
2082 | break; | |
2083 | case MEMMODEL_RELEASE: | |
2084 | case MEMMODEL_SYNC_RELEASE: | |
2085 | switch (which_alternative) | |
2086 | { | |
2087 | case 0: | |
930c5599 | 2088 | return "s_dcache_wb_vol\;s_store%o1\t%1, %A0 glc"; |
3d6275e3 | 2089 | case 1: |
930c5599 | 2090 | return "buffer_wbinvl1_vol\;flat_store%o1\t%A0, %1%O0 glc"; |
3d6275e3 | 2091 | case 2: |
930c5599 | 2092 | return "buffer_wbinvl1_vol\;global_store%o1\t%A0, %1%O0 glc"; |
3d6275e3 AS |
2093 | } |
2094 | break; | |
2095 | case MEMMODEL_ACQ_REL: | |
2096 | case MEMMODEL_SEQ_CST: | |
2097 | case MEMMODEL_SYNC_SEQ_CST: | |
2098 | switch (which_alternative) | |
2099 | { | |
2100 | case 0: | |
2101 | return "s_dcache_wb_vol\;s_store%o1\t%1, %A0 glc\;" | |
930c5599 | 2102 | "s_waitcnt\tlgkmcnt(0)\;s_dcache_inv_vol"; |
3d6275e3 AS |
2103 | case 1: |
2104 | return "buffer_wbinvl1_vol\;flat_store%o1\t%A0, %1%O0 glc\;" | |
930c5599 | 2105 | "s_waitcnt\t0\;buffer_wbinvl1_vol"; |
3d6275e3 AS |
2106 | case 2: |
2107 | return "buffer_wbinvl1_vol\;global_store%o1\t%A0, %1%O0 glc\;" | |
930c5599 | 2108 | "s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol"; |
3d6275e3 AS |
2109 | } |
2110 | break; | |
2111 | } | |
2112 | gcc_unreachable (); | |
2113 | } | |
2114 | [(set_attr "type" "smem,flat,flat") | |
2115 | (set_attr "length" "20") | |
2116 | (set_attr "gcn_version" "gcn5,*,gcn5")]) | |
2117 | ||
2118 | (define_insn "atomic_exchange<mode>" | |
2119 | [(set (match_operand:SIDI 0 "register_operand" "=Sm, v, v") | |
2120 | (match_operand:SIDI 1 "memory_operand" "+RS,RF,RM")) | |
2121 | (set (match_dup 1) | |
2122 | (unspec_volatile:SIDI | |
2123 | [(match_operand:SIDI 2 "register_operand" " Sm, v, v")] | |
2124 | UNSPECV_ATOMIC)) | |
2125 | (use (match_operand 3 "immediate_operand"))] | |
2126 | "" | |
2127 | { | |
2128 | switch (INTVAL (operands[3])) | |
2129 | { | |
2130 | case MEMMODEL_RELAXED: | |
2131 | switch (which_alternative) | |
2132 | { | |
2133 | case 0: | |
2134 | return "s_atomic_swap<X>\t%0, %1, %2 glc\;s_waitcnt\tlgkmcnt(0)"; | |
2135 | case 1: | |
2136 | return "flat_atomic_swap<X>\t%0, %1, %2 glc\;s_waitcnt\t0"; | |
2137 | case 2: | |
2138 | return "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;" | |
2139 | "s_waitcnt\tvmcnt(0)"; | |
2140 | } | |
2141 | break; | |
2142 | case MEMMODEL_CONSUME: | |
2143 | case MEMMODEL_ACQUIRE: | |
2144 | case MEMMODEL_SYNC_ACQUIRE: | |
2145 | switch (which_alternative) | |
2146 | { | |
2147 | case 0: | |
2148 | return "s_atomic_swap<X>\t%0, %1, %2 glc\;s_waitcnt\tlgkmcnt(0)\;" | |
2149 | "s_dcache_wb_vol\;s_dcache_inv_vol"; | |
2150 | case 1: | |
2151 | return "flat_atomic_swap<X>\t%0, %1, %2 glc\;s_waitcnt\t0\;" | |
2152 | "buffer_wbinvl1_vol"; | |
2153 | case 2: | |
2154 | return "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;" | |
2155 | "s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol"; | |
2156 | } | |
2157 | break; | |
2158 | case MEMMODEL_RELEASE: | |
2159 | case MEMMODEL_SYNC_RELEASE: | |
2160 | switch (which_alternative) | |
2161 | { | |
2162 | case 0: | |
2163 | return "s_dcache_wb_vol\;s_atomic_swap<X>\t%0, %1, %2 glc\;" | |
2164 | "s_waitcnt\tlgkmcnt(0)"; | |
2165 | case 1: | |
2166 | return "buffer_wbinvl1_vol\;flat_atomic_swap<X>\t%0, %1, %2 glc\;" | |
2167 | "s_waitcnt\t0"; | |
2168 | case 2: | |
2169 | return "buffer_wbinvl1_vol\;" | |
2170 | "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;" | |
2171 | "s_waitcnt\tvmcnt(0)"; | |
2172 | } | |
2173 | break; | |
2174 | case MEMMODEL_ACQ_REL: | |
2175 | case MEMMODEL_SEQ_CST: | |
2176 | case MEMMODEL_SYNC_SEQ_CST: | |
2177 | switch (which_alternative) | |
2178 | { | |
2179 | case 0: | |
2180 | return "s_dcache_wb_vol\;s_atomic_swap<X>\t%0, %1, %2 glc\;" | |
2181 | "s_waitcnt\tlgkmcnt(0)\;s_dcache_inv_vol"; | |
2182 | case 1: | |
2183 | return "buffer_wbinvl1_vol\;flat_atomic_swap<X>\t%0, %1, %2 glc\;" | |
2184 | "s_waitcnt\t0\;buffer_wbinvl1_vol"; | |
2185 | case 2: | |
2186 | return "buffer_wbinvl1_vol\;" | |
2187 | "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;" | |
2188 | "s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol"; | |
2189 | } | |
2190 | break; | |
2191 | } | |
2192 | gcc_unreachable (); | |
2193 | } | |
2194 | [(set_attr "type" "smem,flat,flat") | |
2195 | (set_attr "length" "20") | |
2196 | (set_attr "gcn_version" "gcn5,*,gcn5")]) | |
2197 | ||
2198 | ;; }}} | |
2199 | ;; {{{ OpenACC / OpenMP | |
2200 | ||
2201 | (define_expand "oacc_dim_size" | |
2202 | [(match_operand:SI 0 "register_operand") | |
2203 | (match_operand:SI 1 "const_int_operand")] | |
2204 | "" | |
2205 | { | |
2206 | rtx tmp = gcn_oacc_dim_size (INTVAL (operands[1])); | |
2207 | emit_move_insn (operands[0], gen_lowpart (SImode, tmp)); | |
2208 | DONE; | |
2209 | }) | |
2210 | ||
2211 | (define_expand "oacc_dim_pos" | |
2212 | [(match_operand:SI 0 "register_operand") | |
2213 | (match_operand:SI 1 "const_int_operand")] | |
2214 | "" | |
2215 | { | |
2216 | emit_move_insn (operands[0], gcn_oacc_dim_pos (INTVAL (operands[1]))); | |
2217 | DONE; | |
2218 | }) | |
2219 | ||
2220 | (define_expand "gcn_wavefront_barrier" | |
2221 | [(set (match_dup 0) | |
2222 | (unspec_volatile:BLK [(match_dup 0)] UNSPECV_BARRIER))] | |
2223 | "" | |
2224 | { | |
2225 | operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); | |
2226 | MEM_VOLATILE_P (operands[0]) = 1; | |
2227 | }) | |
2228 | ||
2229 | (define_insn "*gcn_wavefront_barrier" | |
2230 | [(set (match_operand:BLK 0 "") | |
2231 | (unspec_volatile:BLK [(match_dup 0)] UNSPECV_BARRIER))] | |
2232 | "" | |
2233 | "s_barrier" | |
2234 | [(set_attr "type" "sopp")]) | |
2235 | ||
2236 | (define_expand "oacc_fork" | |
2237 | [(set (match_operand:SI 0 "") | |
2238 | (match_operand:SI 1 "")) | |
2239 | (use (match_operand:SI 2 ""))] | |
2240 | "" | |
2241 | { | |
2242 | /* We need to have oacc_fork/oacc_join named patterns as a pair, | |
2243 | but the fork isn't actually used. */ | |
2244 | gcc_unreachable (); | |
2245 | }) | |
2246 | ||
2247 | (define_expand "oacc_join" | |
2248 | [(set (match_operand:SI 0 "") | |
2249 | (match_operand:SI 1 "")) | |
2250 | (use (match_operand:SI 2 ""))] | |
2251 | "" | |
2252 | { | |
2253 | emit_insn (gen_gcn_wavefront_barrier ()); | |
2254 | DONE; | |
2255 | }) | |
2256 | ||
2257 | ;; }}} | |
2258 | ||
2259 | (include "gcn-valu.md") |