]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/gcn/gcn.md
b48af0dbde870cf1f3c97a37b31d0f7511842eca
[thirdparty/gcc.git] / gcc / config / gcn / gcn.md
1 ;; Copyright (C) 2016-2019 Free Software Foundation, Inc.
2
3 ;; This file is free software; you can redistribute it and/or modify it under
4 ;; the terms of the GNU General Public License as published by the Free
5 ;; Software Foundation; either version 3 of the License, or (at your option)
6 ;; any later version.
7
8 ;; This file is distributed in the hope that it will be useful, but WITHOUT
9 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 ;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 ;; for more details.
12
13 ;; You should have received a copy of the GNU General Public License
14 ;; along with GCC; see the file COPYING3. If not see
15 ;; <http://www.gnu.org/licenses/>.
16
17 ;;- See file "rtl.def" for documentation on define_insn, match_*, et. al.
18
19 (include "predicates.md")
20 (include "constraints.md")
21
22 ;; {{{ Constants and enums
23
24 ; Named registers
25 (define_constants
26 [(FIRST_SGPR_REG 0)
27 (CC_SAVE_REG 22)
28 (LAST_SGPR_REG 101)
29 (FLAT_SCRATCH_REG 102)
30 (FLAT_SCRATCH_LO_REG 102)
31 (FLAT_SCRATCH_HI_REG 103)
32 (XNACK_MASK_REG 104)
33 (XNACK_MASK_LO_REG 104)
34 (XNACK_MASK_HI_REG 105)
35 (VCC_REG 106)
36 (VCC_LO_REG 106)
37 (VCC_HI_REG 107)
38 (VCCZ_REG 108)
39 (TBA_REG 109)
40 (TBA_LO_REG 109)
41 (TBA_HI_REG 110)
42 (TMA_REG 111)
43 (TMA_LO_REG 111)
44 (TMA_HI_REG 112)
45 (TTMP0_REG 113)
46 (TTMP11_REG 124)
47 (M0_REG 125)
48 (EXEC_REG 126)
49 (EXEC_LO_REG 126)
50 (EXEC_HI_REG 127)
51 (EXECZ_REG 128)
52 (SCC_REG 129)
53 (FIRST_VGPR_REG 160)
54 (LAST_VGPR_REG 415)])
55
56 (define_constants
57 [(SP_REGNUM 16)
58 (LR_REGNUM 18)
59 (AP_REGNUM 416)
60 (FP_REGNUM 418)])
61
62 (define_c_enum "unspecv" [
63 UNSPECV_PROLOGUE_USE
64 UNSPECV_KERNEL_RETURN
65 UNSPECV_BARRIER
66 UNSPECV_ATOMIC
67 UNSPECV_ICACHE_INV])
68
69 (define_c_enum "unspec" [
70 UNSPEC_VECTOR
71 UNSPEC_BPERMUTE
72 UNSPEC_SGPRBASE
73 UNSPEC_MEMORY_BARRIER
74 UNSPEC_SMIN_DPP_SHR UNSPEC_SMAX_DPP_SHR
75 UNSPEC_UMIN_DPP_SHR UNSPEC_UMAX_DPP_SHR
76 UNSPEC_PLUS_DPP_SHR
77 UNSPEC_PLUS_CARRY_DPP_SHR UNSPEC_PLUS_CARRY_IN_DPP_SHR
78 UNSPEC_AND_DPP_SHR UNSPEC_IOR_DPP_SHR UNSPEC_XOR_DPP_SHR
79 UNSPEC_MOV_FROM_LANE63
80 UNSPEC_GATHER
81 UNSPEC_SCATTER])
82
83 ;; }}}
84 ;; {{{ Attributes
85
86 ; Instruction type (encoding) as described in the ISA specification.
87 ; The following table summarizes possible operands of individual instruction
88 ; types and corresponding constraints.
89 ;
90 ; sop2 - scalar, two inputs, one output
91 ; ssrc0/ssrc1: sgpr 0-102; flat_scratch,xnack,vcc,tba,tma,ttmp0-11,exec
92 ; vccz,execz,scc,inline immedate,fp inline immediate
93 ; sdst: sgpr 0-102; flat_scratch,xnack,vcc,tba,tma,ttmp0-11,exec
94 ;
95 ; Constraints "=SD, SD", "SSA,SSB","SSB,SSA"
96 ;
97 ; sopk - scalar, inline constant input, one output
98 ; simm16: 16bit inline constant
99 ; sdst: same as sop2/ssrc0
100 ;
101 ; Constraints "=SD", "J"
102 ;
103 ; sop1 - scalar, one input, one output
104 ; ssrc0: same as sop2/ssrc0. FIXME: manual omit VCCZ
105 ; sdst: same as sop2/sdst
106 ;
107 ; Constraints "=SD", "SSA"
108 ;
109 ; sopc - scalar, two inputs, one comparsion
110 ; ssrc0: same as sop2/ssc0.
111 ;
112 ; Constraints "SSI,SSA","SSA,SSI"
113 ;
114 ; sopp - scalar, one constant input, one special
115 ; simm16
116 ;
117 ; smem - scalar memory
118 ; sbase: aligned pair of sgprs. Specify {size[15:0], base[47:0]} in
119 ; dwords
120 ; sdata: sgpr0-102, flat_scratch, xnack, vcc, tba, tma
121 ; offset: sgpr or 20bit unsigned byte offset
122 ;
123 ; vop2 - vector, two inputs, one output
124 ; vsrc0: sgpr0-102,flat_scratch,xnack,vcc,tba,ttmp0-11,m0,exec,
125 ; inline constant -16 to -64, fp inline immediate, vccz, execz,
126 ; scc, lds, literal constant, vgpr0-255
127 ; vsrc1: vgpr0-255
128 ; vdst: vgpr0-255
129 ; Limitations: At most one SGPR, at most one constant
130 ; if constant is used, SGPR must be M0
131 ; Only SRC0 can be LDS_DIRECT
132 ;
133 ; constraints: "=v", "vBSv", "v"
134 ;
135 ; vop1 - vector, one input, one output
136 ; vsrc0: same as vop2/src0
137 ; vdst: vgpr0-255
138 ;
139 ; constraints: "=v", "vBSv"
140 ;
141 ; vopc - vector, two inputs, one comparsion output;
142 ; vsrc0: same as vop2/src0
143 ; vsrc1: vgpr0-255
144 ; vdst:
145 ;
146 ; constraints: "vASv", "v"
147 ;
148 ; vop3a - vector, three inputs, one output
149 ; vdst: vgpr0-255, for v_cmp sgpr or vcc
150 ; abs,clamp
151 ; vsrc0: sgpr0-102,vcc,tba,ttmp0-11,m0,exec,
152 ; inline constant -16 to -64, fp inline immediate, vccz, execz,
153 ; scc, lds_direct
154 ; FIXME: really missing 1/pi? really 104 SGPRs
155 ;
156 ; vop3b - vector, three inputs, one vector output, one scalar output
157 ; vsrc0,vsrc1,vsrc2: same as vop3a vsrc0
158 ; vdst: vgpr0-255
159 ; sdst: sgpr0-103/vcc/tba/tma/ttmp0-11
160 ;
161 ; vop_sdwa - second dword for vop1/vop2/vopc for specifying sub-dword address
162 ; src0: vgpr0-255
163 ; dst_sel: BYTE_0-3, WORD_0-1, DWORD
164 ; dst_unused: UNUSED_PAD, UNUSED_SEXT, UNUSED_PRESERVE
165 ; clamp: true/false
166 ; src0_sel: BYTE_0-3, WORD_0-1, DWORD
167 ; flags: src0_sext, src0_neg, src0_abs, src1_sel, src1_sext, src1_neg,
168 ; src1_abs
169 ;
170 ; vop_dpp - second dword for vop1/vop2/vopc for specifying data-parallel ops
171 ; src0: vgpr0-255
172 ; dpp_ctrl: quad_perm, row_sl0-15, row_sr0-15, row_rr0-15, wf_sl1,
173 ; wf_rl1, wf_sr1, wf_rr1, row_mirror, row_half_mirror,
174 ; bcast15, bcast31
175 ; flags: src0_neg, src0_abs, src1_neg, src1_abs
176 ; bank_mask: 4-bit mask
177 ; row_mask: 4-bit mask
178 ;
179 ; ds - Local and global data share instructions.
180 ; offset0: 8-bit constant
181 ; offset1: 8-bit constant
182 ; flag: gds
183 ; addr: vgpr0-255
184 ; data0: vgpr0-255
185 ; data1: vgpr0-255
186 ; vdst: vgpr0-255
187 ;
188 ; mubuf - Untyped memory buffer operation. First word with LDS, second word
189 ; non-LDS.
190 ; offset: 12-bit constant
191 ; vaddr: vgpr0-255
192 ; vdata: vgpr0-255
193 ; srsrc: sgpr0-102
194 ; soffset: sgpr0-102
195 ; flags: offen, idxen, glc, lds, slc, tfe
196 ;
197 ; mtbuf - Typed memory buffer operation. Two words
198 ; offset: 12-bit constant
199 ; dfmt: 4-bit constant
200 ; nfmt: 3-bit constant
201 ; vaddr: vgpr0-255
202 ; vdata: vgpr0-255
203 ; srsrc: sgpr0-102
204 ; soffset: sgpr0-102
205 ; flags: offen, idxen, glc, lds, slc, tfe
206 ;
207 ; flat - flat or global memory operations
208 ; flags: glc, slc
209 ; addr: vgpr0-255
210 ; data: vgpr0-255
211 ; vdst: vgpr0-255
212 ;
213 ; mult - expands to multiple instructions (pseudo encoding)
214 ;
215 ; vmult - as mult, when a vector instruction is used.
216
217 (define_attr "type"
218 "unknown,sop1,sop2,sopk,sopc,sopp,smem,ds,vop2,vop1,vopc,
219 vop3a,vop3b,vop_sdwa,vop_dpp,mubuf,mtbuf,flat,mult,vmult"
220 (const_string "unknown"))
221
222 ; Set if instruction is executed in scalar or vector unit
223
224 (define_attr "unit" "unknown,scalar,vector"
225 (cond [(eq_attr "type" "sop1,sop2,sopk,sopc,sopp,smem,mult")
226 (const_string "scalar")
227 (eq_attr "type" "vop2,vop1,vopc,vop3a,vop3b,ds,
228 vop_sdwa,vop_dpp,flat,vmult")
229 (const_string "vector")]
230 (const_string "unknown")))
231
232 ; All vector instructions run as 64 threads as predicated by the EXEC
233 ; register. Scalar operations in vector register require a single lane
234 ; enabled, vector moves require a full set of lanes enabled, and most vector
235 ; operations handle the lane masking themselves.
236 ; The md_reorg pass is responsible for ensuring that EXEC is set appropriately
237 ; according to the following settings:
238 ; auto - md_reorg will inspect def/use to determine what to do.
239 ; none - exec is not needed.
240 ; single - disable all but lane zero.
241 ; full - enable all lanes.
242
243 (define_attr "exec" "auto,none,single,full"
244 (const_string "auto"))
245
246 ; Infer the (worst-case) length from the instruction type by default. Many
247 ; types can have an optional immediate word following, which we include here.
248 ; "Multiple" types are counted as two 64-bit instructions. This is just a
249 ; default fallback: it can be overridden per-alternative in insn patterns for
250 ; greater accuracy.
251
252 (define_attr "length" ""
253 (cond [(eq_attr "type" "sop1") (const_int 8)
254 (eq_attr "type" "sop2") (const_int 8)
255 (eq_attr "type" "sopk") (const_int 8)
256 (eq_attr "type" "sopc") (const_int 8)
257 (eq_attr "type" "sopp") (const_int 4)
258 (eq_attr "type" "smem") (const_int 8)
259 (eq_attr "type" "ds") (const_int 8)
260 (eq_attr "type" "vop1") (const_int 8)
261 (eq_attr "type" "vop2") (const_int 8)
262 (eq_attr "type" "vopc") (const_int 8)
263 (eq_attr "type" "vop3a") (const_int 8)
264 (eq_attr "type" "vop3b") (const_int 8)
265 (eq_attr "type" "vop_sdwa") (const_int 8)
266 (eq_attr "type" "vop_dpp") (const_int 8)
267 (eq_attr "type" "flat") (const_int 8)
268 (eq_attr "type" "mult") (const_int 16)
269 (eq_attr "type" "vmult") (const_int 16)]
270 (const_int 4)))
271
272 ; Disable alternatives that only apply to specific ISA variants.
273
274 (define_attr "gcn_version" "gcn3,gcn5" (const_string "gcn3"))
275
276 (define_attr "enabled" ""
277 (cond [(eq_attr "gcn_version" "gcn3") (const_int 1)
278 (and (eq_attr "gcn_version" "gcn5")
279 (ne (symbol_ref "TARGET_GCN5_PLUS") (const_int 0)))
280 (const_int 1)]
281 (const_int 0)))
282
283 ; We need to be able to identify v_readlane and v_writelane with
284 ; SGPR lane selection in order to handle "Manually Inserted Wait States".
285
286 (define_attr "laneselect" "yes,no" (const_string "no"))
287
288 ; Identify instructions that require a "Manually Inserted Wait State" if
289 ; their inputs are overwritten by subsequent instructions.
290
291 (define_attr "delayeduse" "yes,no" (const_string "no"))
292
293 ;; }}}
294 ;; {{{ Iterators useful across the wole machine description
295
296 (define_mode_iterator SIDI [SI DI])
297 (define_mode_iterator SFDF [SF DF])
298 (define_mode_iterator SISF [SI SF])
299 (define_mode_iterator QIHI [QI HI])
300 (define_mode_iterator DIDF [DI DF])
301
302 ;; }}}
303 ;; {{{ Attributes.
304
305 ; Translate RTX code into GCN instruction mnemonics with and without
306 ; suffixes such as _b32, etc.
307
308 (define_code_attr mnemonic
309 [(minus "sub%i")
310 (plus "add%i")
311 (ashift "lshl%b")
312 (lshiftrt "lshr%b")
313 (ashiftrt "ashr%i")
314 (and "and%B")
315 (ior "or%B")
316 (xor "xor%B")
317 (mult "mul%i")
318 (smin "min%i")
319 (smax "max%i")
320 (umin "min%u")
321 (umax "max%u")
322 (not "not%b")
323 (popcount "bcnt_u32%b")])
324
325 (define_code_attr bare_mnemonic
326 [(plus "add")
327 (minus "sub")
328 (and "and")
329 (ior "or")
330 (xor "xor")])
331
332 (define_code_attr s_mnemonic
333 [(not "not%b")
334 (popcount "bcnt1_i32%b")
335 (clz "flbit_i32%b")
336 (ctz "ff1_i32%b")])
337
338 (define_code_attr revmnemonic
339 [(minus "subrev%i")
340 (ashift "lshlrev%b")
341 (lshiftrt "lshrrev%b")
342 (ashiftrt "ashrrev%i")])
343
344 ; Translate RTX code into corresponding expander name.
345
346 (define_code_attr expander
347 [(and "and")
348 (ior "ior")
349 (xor "xor")
350 (plus "add")
351 (minus "sub")
352 (ashift "ashl")
353 (lshiftrt "lshr")
354 (ashiftrt "ashr")
355 (mult "mul")
356 (smin "smin")
357 (smax "smax")
358 (umin "umin")
359 (umax "umax")
360 (not "one_cmpl")
361 (popcount "popcount")
362 (clz "clz")
363 (ctz "ctz")])
364
365 ;; }}}
366 ;; {{{ Miscellaneous instructions
367
368 (define_insn "nop"
369 [(const_int 0)]
370 ""
371 "s_nop\t0x0"
372 [(set_attr "type" "sopp")])
373
374 ; FIXME: What should the value of the immediate be? Zero is disallowed, so
375 ; pick 1 for now.
376 (define_insn "trap"
377 [(trap_if (const_int 1) (const_int 0))]
378 ""
379 "s_trap\t1"
380 [(set_attr "type" "sopp")])
381
382 ;; }}}
383 ;; {{{ Moves
384
385 ;; All scalar modes we support moves in.
386 (define_mode_iterator MOV_MODE [BI QI HI SI DI TI SF DF])
387
388 ; This is the entry point for creating all kinds of scalar moves,
389 ; including reloads and symbols.
390
391 (define_expand "mov<mode>"
392 [(set (match_operand:MOV_MODE 0 "nonimmediate_operand")
393 (match_operand:MOV_MODE 1 "general_operand"))]
394 ""
395 {
396 if (MEM_P (operands[0]))
397 operands[1] = force_reg (<MODE>mode, operands[1]);
398
399 if (!lra_in_progress && !reload_completed
400 && !gcn_valid_move_p (<MODE>mode, operands[0], operands[1]))
401 {
402 /* Something is probably trying to generate a move
403 which can only work indirectly.
404 E.g. Move from LDS memory to SGPR hardreg
405 or MEM:QI to SGPR. */
406 rtx tmpreg = gen_reg_rtx (<MODE>mode);
407 emit_insn (gen_mov<mode> (tmpreg, operands[1]));
408 emit_insn (gen_mov<mode> (operands[0], tmpreg));
409 DONE;
410 }
411
412 if (<MODE>mode == DImode
413 && (GET_CODE (operands[1]) == SYMBOL_REF
414 || GET_CODE (operands[1]) == LABEL_REF))
415 {
416 if (lra_in_progress)
417 emit_insn (gen_movdi_symbol_save_scc (operands[0], operands[1]));
418 else
419 emit_insn (gen_movdi_symbol (operands[0], operands[1]));
420 DONE;
421 }
422 })
423
424 ; Split invalid moves into two valid moves
425
426 (define_split
427 [(set (match_operand:MOV_MODE 0 "nonimmediate_operand")
428 (match_operand:MOV_MODE 1 "general_operand"))]
429 "!reload_completed && !lra_in_progress
430 && !gcn_valid_move_p (<MODE>mode, operands[0], operands[1])"
431 [(set (match_dup 2) (match_dup 1))
432 (set (match_dup 0) (match_dup 2))]
433 {
434 operands[2] = gen_reg_rtx(<MODE>mode);
435 })
436
437 ; We need BImode move so we can reload flags registers.
438
439 (define_insn "*movbi"
440 [(set (match_operand:BI 0 "nonimmediate_operand"
441 "=Sg, v,Sg,cs,cV,cV,Sm,RS, v,RF, v,RM")
442 (match_operand:BI 1 "gcn_load_operand"
443 "SSA,vSvA, v,SS, v,SS,RS,Sm,RF, v,RM, v"))]
444 ""
445 {
446 /* SCC as an operand is currently not accepted by the LLVM assembler, so
447 we emit bytes directly as a workaround. */
448 switch (which_alternative) {
449 case 0:
450 if (REG_P (operands[1]) && REGNO (operands[1]) == SCC_REG)
451 return "; s_mov_b32\t%0,%1 is not supported by the assembler.\;"
452 ".byte\t0xfd\;"
453 ".byte\t0x0\;"
454 ".byte\t0x80|%R0\;"
455 ".byte\t0xbe";
456 else
457 return "s_mov_b32\t%0, %1";
458 case 1:
459 if (REG_P (operands[1]) && REGNO (operands[1]) == SCC_REG)
460 return "; v_mov_b32\t%0, %1\;"
461 ".byte\t0xfd\;"
462 ".byte\t0x2\;"
463 ".byte\t((%V0<<1)&0xff)\;"
464 ".byte\t0x7e|(%V0>>7)";
465 else
466 return "v_mov_b32\t%0, %1";
467 case 2:
468 return "v_readlane_b32\t%0, %1, 0";
469 case 3:
470 return "s_cmpk_lg_u32\t%1, 0";
471 case 4:
472 return "v_cmp_ne_u32\tvcc, 0, %1";
473 case 5:
474 if (REGNO (operands[1]) == SCC_REG)
475 return "; s_mov_b32\t%0, %1 is not supported by the assembler.\;"
476 ".byte\t0xfd\;"
477 ".byte\t0x0\;"
478 ".byte\t0xea\;"
479 ".byte\t0xbe\;"
480 "s_mov_b32\tvcc_hi, 0";
481 else
482 return "s_mov_b32\tvcc_lo, %1\;"
483 "s_mov_b32\tvcc_hi, 0";
484 case 6:
485 return "s_load_dword\t%0, %A1\;s_waitcnt\tlgkmcnt(0)";
486 case 7:
487 return "s_store_dword\t%1, %A0";
488 case 8:
489 return "flat_load_dword\t%0, %A1%O1%g1\;s_waitcnt\t0";
490 case 9:
491 return "flat_store_dword\t%A0, %1%O0%g0";
492 case 10:
493 return "global_load_dword\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)";
494 case 11:
495 return "global_store_dword\t%A0, %1%O0%g0";
496 default:
497 gcc_unreachable ();
498 }
499 }
500 [(set_attr "type" "sop1,vop1,vop3a,sopk,vopc,mult,smem,smem,flat,flat,
501 flat,flat")
502 (set_attr "exec" "*,*,none,*,*,*,*,*,*,*,*,*")
503 (set_attr "length" "4,4,4,4,4,8,12,12,12,12,12,12")])
504
505 ; 32bit move pattern
506
507 (define_insn "*mov<mode>_insn"
508 [(set (match_operand:SISF 0 "nonimmediate_operand"
509 "=SD,SD,SD,SD,RB,Sm,RS,v,Sg, v, v,RF,v,RLRG, v,SD, v,RM")
510 (match_operand:SISF 1 "gcn_load_operand"
511 "SSA, J, B,RB,Sm,RS,Sm,v, v,Sv,RF, v,B, v,RLRG, Y,RM, v"))]
512 ""
513 "@
514 s_mov_b32\t%0, %1
515 s_movk_i32\t%0, %1
516 s_mov_b32\t%0, %1
517 s_buffer_load%s0\t%0, s[0:3], %1\;s_waitcnt\tlgkmcnt(0)
518 s_buffer_store%s1\t%1, s[0:3], %0
519 s_load_dword\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
520 s_store_dword\t%1, %A0
521 v_mov_b32\t%0, %1
522 v_readlane_b32\t%0, %1, 0
523 v_writelane_b32\t%0, %1, 0
524 flat_load_dword\t%0, %A1%O1%g1\;s_waitcnt\t0
525 flat_store_dword\t%A0, %1%O0%g0
526 v_mov_b32\t%0, %1
527 ds_write_b32\t%A0, %1%O0
528 ds_read_b32\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
529 s_mov_b32\t%0, %1
530 global_load_dword\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
531 global_store_dword\t%A0, %1%O0%g0"
532 [(set_attr "type" "sop1,sopk,sop1,smem,smem,smem,smem,vop1,vop3a,vop3a,flat,
533 flat,vop1,ds,ds,sop1,flat,flat")
534 (set_attr "exec" "*,*,*,*,*,*,*,*,none,none,*,*,*,*,*,*,*,*")
535 (set_attr "length" "4,4,8,12,12,12,12,4,8,8,12,12,8,12,12,8,12,12")])
536
537 ; 8/16bit move pattern
538
539 (define_insn "*mov<mode>_insn"
540 [(set (match_operand:QIHI 0 "nonimmediate_operand"
541 "=SD,SD,SD,v,Sg, v, v,RF,v,RLRG, v, v,RM")
542 (match_operand:QIHI 1 "gcn_load_operand"
543 "SSA, J, B,v, v,Sv,RF, v,B, v,RLRG,RM, v"))]
544 "gcn_valid_move_p (<MODE>mode, operands[0], operands[1])"
545 "@
546 s_mov_b32\t%0, %1
547 s_movk_i32\t%0, %1
548 s_mov_b32\t%0, %1
549 v_mov_b32\t%0, %1
550 v_readlane_b32\t%0, %1, 0
551 v_writelane_b32\t%0, %1, 0
552 flat_load%o1\t%0, %A1%O1%g1\;s_waitcnt\t0
553 flat_store%s0\t%A0, %1%O0%g0
554 v_mov_b32\t%0, %1
555 ds_write%b0\t%A0, %1%O0
556 ds_read%u1\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
557 global_load%o1\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
558 global_store%s0\t%A0, %1%O0%g0"
559 [(set_attr "type"
560 "sop1,sopk,sop1,vop1,vop3a,vop3a,flat,flat,vop1,ds,ds,flat,flat")
561 (set_attr "exec" "*,*,*,*,none,none,*,*,*,*,*,*,*")
562 (set_attr "length" "4,4,8,4,4,4,12,12,8,12,12,12,12")])
563
564 ; 64bit move pattern
565
566 (define_insn_and_split "*mov<mode>_insn"
567 [(set (match_operand:DIDF 0 "nonimmediate_operand"
568 "=SD,SD,SD,RS,Sm,v, v,Sg, v, v,RF,RLRG, v, v,RM")
569 (match_operand:DIDF 1 "general_operand"
570 "SSA, C,DB,Sm,RS,v,DB, v,Sv,RF, v, v,RLRG,RM, v"))]
571 "GET_CODE(operands[1]) != SYMBOL_REF"
572 "@
573 s_mov_b64\t%0, %1
574 s_mov_b64\t%0, %1
575 #
576 s_store_dwordx2\t%1, %A0
577 s_load_dwordx2\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
578 #
579 #
580 #
581 #
582 flat_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\t0
583 flat_store_dwordx2\t%A0, %1%O0%g0
584 ds_write_b64\t%A0, %1%O0
585 ds_read_b64\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
586 global_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
587 global_store_dwordx2\t%A0, %1%O0%g0"
588 "(reload_completed && !MEM_P (operands[0]) && !MEM_P (operands[1])
589 && !gcn_sgpr_move_p (operands[0], operands[1]))
590 || (GET_CODE (operands[1]) == CONST_INT && !gcn_constant64_p (operands[1]))"
591 [(set (match_dup 0) (match_dup 1))
592 (set (match_dup 2) (match_dup 3))]
593 {
594 rtx inlo = gen_lowpart (SImode, operands[1]);
595 rtx inhi = gen_highpart_mode (SImode, <MODE>mode, operands[1]);
596 rtx outlo = gen_lowpart (SImode, operands[0]);
597 rtx outhi = gen_highpart_mode (SImode, <MODE>mode, operands[0]);
598
599 /* Ensure that overlapping registers aren't corrupted. */
600 if (REGNO (outlo) == REGNO (inhi))
601 {
602 operands[0] = outhi;
603 operands[1] = inhi;
604 operands[2] = outlo;
605 operands[3] = inlo;
606 }
607 else
608 {
609 operands[0] = outlo;
610 operands[1] = inlo;
611 operands[2] = outhi;
612 operands[3] = inhi;
613 }
614 }
615 [(set_attr "type" "sop1,sop1,mult,smem,smem,vmult,vmult,vmult,vmult,flat,
616 flat,ds,ds,flat,flat")
617 (set_attr "length" "4,8,*,12,12,*,*,*,*,12,12,12,12,12,12")])
618
619 ; 128-bit move.
620
621 (define_insn_and_split "*movti_insn"
622 [(set (match_operand:TI 0 "nonimmediate_operand"
623 "=SD,RS,Sm,RF, v,v, v,SD,RM, v,RL, v")
624 (match_operand:TI 1 "general_operand"
625 "SSB,Sm,RS, v,RF,v,Sv, v, v,RM, v,RL"))]
626 ""
627 "@
628 #
629 s_store_dwordx4\t%1, %A0
630 s_load_dwordx4\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
631 flat_store_dwordx4\t%A0, %1%O0%g0
632 flat_load_dwordx4\t%0, %A1%O1%g1\;s_waitcnt\t0
633 #
634 #
635 #
636 global_store_dwordx4\t%A0, %1%O0%g0
637 global_load_dwordx4\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
638 ds_write_b128\t%A0, %1%O0
639 ds_read_b128\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)"
640 "reload_completed
641 && REG_P (operands[0])
642 && (REG_P (operands[1]) || GET_CODE (operands[1]) == CONST_INT)"
643 [(set (match_dup 0) (match_dup 1))
644 (set (match_dup 2) (match_dup 3))
645 (set (match_dup 4) (match_dup 5))
646 (set (match_dup 6) (match_dup 7))]
647 {
648 operands[6] = gcn_operand_part (TImode, operands[0], 3);
649 operands[7] = gcn_operand_part (TImode, operands[1], 3);
650 operands[4] = gcn_operand_part (TImode, operands[0], 2);
651 operands[5] = gcn_operand_part (TImode, operands[1], 2);
652 operands[2] = gcn_operand_part (TImode, operands[0], 1);
653 operands[3] = gcn_operand_part (TImode, operands[1], 1);
654 operands[0] = gcn_operand_part (TImode, operands[0], 0);
655 operands[1] = gcn_operand_part (TImode, operands[1], 0);
656 }
657 [(set_attr "type" "mult,smem,smem,flat,flat,vmult,vmult,vmult,flat,flat,\
658 ds,ds")
659 (set_attr "delayeduse" "*,*,yes,*,*,*,*,*,yes,*,*,*")
660 (set_attr "length" "*,12,12,12,12,*,*,*,12,12,12,12")])
661
662 ;; }}}
663 ;; {{{ Prologue/Epilogue
664
665 (define_insn "prologue_use"
666 [(unspec_volatile [(match_operand 0)] UNSPECV_PROLOGUE_USE)]
667 ""
668 ""
669 [(set_attr "length" "0")])
670
671 (define_expand "prologue"
672 [(const_int 0)]
673 ""
674 {
675 gcn_expand_prologue ();
676 DONE;
677 })
678
679 (define_expand "epilogue"
680 [(const_int 0)]
681 ""
682 {
683 gcn_expand_epilogue ();
684 DONE;
685 })
686
687 ;; }}}
688 ;; {{{ Control flow
689
690 ; This pattern must satisfy simplejump_p, which means it cannot be a parallel
691 ; that clobbers SCC. Thus, we must preserve SCC if we're generating a long
692 ; branch sequence.
693
694 (define_insn "jump"
695 [(set (pc)
696 (label_ref (match_operand 0)))]
697 ""
698 {
699 if (get_attr_length (insn) == 4)
700 return "s_branch\t%0";
701 else
702 /* !!! This sequence clobbers EXEC_SAVE_REG and CC_SAVE_REG. */
703 return "; s_mov_b32\ts22, scc is not supported by the assembler.\;"
704 ".long\t0xbe9600fd\;"
705 "s_getpc_b64\ts[20:21]\;"
706 "s_add_u32\ts20, s20, %0@rel32@lo+4\;"
707 "s_addc_u32\ts21, s21, %0@rel32@hi+4\;"
708 "s_cmpk_lg_u32\ts22, 0\;"
709 "s_setpc_b64\ts[20:21]";
710 }
711 [(set_attr "type" "sopp")
712 (set (attr "length")
713 (if_then_else (and (ge (minus (match_dup 0) (pc))
714 (const_int -131072))
715 (lt (minus (match_dup 0) (pc))
716 (const_int 131072)))
717 (const_int 4)
718 (const_int 32)))])
719
720 (define_insn "indirect_jump"
721 [(set (pc)
722 (match_operand:DI 0 "register_operand" "Sg"))]
723 ""
724 "s_setpc_b64\t%0"
725 [(set_attr "type" "sop1")
726 (set_attr "length" "4")])
727
728 (define_insn "cjump"
729 [(set (pc)
730 (if_then_else
731 (match_operator:BI 1 "gcn_conditional_operator"
732 [(match_operand:BI 2 "gcn_conditional_register_operand" "ca,cV")
733 (const_int 0)])
734 (label_ref (match_operand 0))
735 (pc)))]
736 ""
737 {
738 if (get_attr_length (insn) == 4)
739 return "s_cbranch%C1\t%0";
740 else
741 {
742 /* !!! This sequence clobbers EXEC_SAVE_REG and CC_SAVE_REG but
743 restores SCC. */
744 if (REGNO (operands[2]) == SCC_REG)
745 {
746 if (GET_CODE (operands[1]) == EQ)
747 return "s_cbranch%c1\t.Lskip%=\;"
748 "s_getpc_b64\ts[20:21]\;"
749 "s_add_u32\ts20, s20, %0@rel32@lo+4\;"
750 "s_addc_u32\ts21, s21, %0@rel32@hi+4\;"
751 "s_cmp_lg_u32\t0, 0\;"
752 "s_setpc_b64\ts[20:21]\n"
753 ".Lskip%=:";
754 else
755 return "s_cbranch%c1\t.Lskip%=\;"
756 "s_getpc_b64\ts[20:21]\;"
757 "s_add_u32\ts20, s20, %0@rel32@lo+4\;"
758 "s_addc_u32\ts21, s21, %0@rel32@hi+4\;"
759 "s_cmp_eq_u32\t0, 0\;"
760 "s_setpc_b64\ts[20:21]\n"
761 ".Lskip%=:";
762 }
763 else
764 return "s_cbranch%c1\t.Lskip%=\;"
765 "; s_mov_b32\ts22, scc is not supported by the assembler.\;"
766 ".byte\t0xfd\;"
767 ".byte\t0x0\;"
768 ".byte\t0x80|22\;"
769 ".byte\t0xbe\;"
770 "s_getpc_b64\ts[20:21]\;"
771 "s_add_u32\ts20, s20, %0@rel32@lo+4\;"
772 "s_addc_u32\ts21, s21, %0@rel32@hi+4\;"
773 "s_cmpk_lg_u32\ts22, 0\;"
774 "s_setpc_b64\ts[20:21]\n"
775 ".Lskip%=:";
776 }
777 }
778 [(set_attr "type" "sopp")
779 (set (attr "length")
780 (if_then_else (and (ge (minus (match_dup 0) (pc))
781 (const_int -131072))
782 (lt (minus (match_dup 0) (pc))
783 (const_int 131072)))
784 (const_int 4)
785 (const_int 36)))])
786
787 ; Returning from a normal function is different to returning from a
788 ; kernel function.
789
790 (define_insn "gcn_return"
791 [(return)]
792 ""
793 {
794 if (cfun && cfun->machine && cfun->machine->normal_function)
795 return "s_setpc_b64\ts[18:19]";
796 else
797 return "s_waitcnt\tlgkmcnt(0)\;s_dcache_wb\;s_endpgm";
798 }
799 [(set_attr "type" "sop1")
800 (set_attr "length" "12")])
801
802 (define_expand "call"
803 [(parallel [(call (match_operand 0 "")
804 (match_operand 1 ""))
805 (clobber (reg:DI LR_REGNUM))
806 (clobber (match_scratch:DI 2))])]
807 ""
808 {})
809
810 (define_insn "gcn_simple_call"
811 [(call (mem (match_operand 0 "immediate_operand" "Y,B"))
812 (match_operand 1 "const_int_operand"))
813 (clobber (reg:DI LR_REGNUM))
814 (clobber (match_scratch:DI 2 "=&Sg,X"))]
815 ""
816 "@
817 s_getpc_b64\t%2\;s_add_u32\t%L2, %L2, %0@rel32@lo+4\;s_addc_u32\t%H2, %H2, %0@rel32@hi+4\;s_swappc_b64\ts[18:19], %2
818 s_swappc_b64\ts[18:19], %0"
819 [(set_attr "type" "mult,sop1")
820 (set_attr "length" "24,4")])
821
822 (define_insn "movdi_symbol"
823 [(set (match_operand:DI 0 "nonimmediate_operand" "=Sg")
824 (match_operand:DI 1 "general_operand" "Y"))
825 (clobber (reg:BI SCC_REG))]
826 "GET_CODE (operands[1]) == SYMBOL_REF || GET_CODE (operands[1]) == LABEL_REF"
827 {
828 if (SYMBOL_REF_P (operands[1])
829 && SYMBOL_REF_WEAK (operands[1]))
830 return "s_getpc_b64\t%0\;"
831 "s_add_u32\t%L0, %L0, %1@gotpcrel32@lo+4\;"
832 "s_addc_u32\t%H0, %H0, %1@gotpcrel32@hi+4\;"
833 "s_load_dwordx2\t%0, %0\;"
834 "s_waitcnt\tlgkmcnt(0)";
835
836 return "s_getpc_b64\t%0\;"
837 "s_add_u32\t%L0, %L0, %1@rel32@lo+4\;"
838 "s_addc_u32\t%H0, %H0, %1@rel32@hi+4";
839 }
840 [(set_attr "type" "mult")
841 (set_attr "length" "32")])
842
843 (define_insn "movdi_symbol_save_scc"
844 [(set (match_operand:DI 0 "nonimmediate_operand" "=Sg")
845 (match_operand:DI 1 "general_operand" "Y"))
846 (clobber (reg:BI CC_SAVE_REG))]
847 "(GET_CODE (operands[1]) == SYMBOL_REF || GET_CODE (operands[1]) == LABEL_REF)
848 && (lra_in_progress || reload_completed)"
849 {
850 /* !!! These sequences clobber CC_SAVE_REG. */
851
852 if (SYMBOL_REF_P (operands[1])
853 && SYMBOL_REF_WEAK (operands[1]))
854 return "; s_mov_b32\ts22, scc is not supported by the assembler.\;"
855 ".long\t0xbe9600fd\;"
856 "s_getpc_b64\t%0\;"
857 "s_add_u32\t%L0, %L0, %1@gotpcrel32@lo+4\;"
858 "s_addc_u32\t%H0, %H0, %1@gotpcrel32@hi+4\;"
859 "s_load_dwordx2\t%0, %0\;"
860 "s_cmpk_lg_u32\ts22, 0\;"
861 "s_waitcnt\tlgkmcnt(0)";
862
863 return "; s_mov_b32\ts22, scc is not supported by the assembler.\;"
864 ".long\t0xbe9600fd\;"
865 "s_getpc_b64\t%0\;"
866 "s_add_u32\t%L0, %L0, %1@rel32@lo+4\;"
867 "s_addc_u32\t%H0, %H0, %1@rel32@hi+4\;"
868 "s_cmpk_lg_u32\ts22, 0";
869 }
870 [(set_attr "type" "mult")
871 (set_attr "length" "40")])
872
873
874 (define_insn "gcn_indirect_call"
875 [(call (mem (match_operand:DI 0 "register_operand" "Sg"))
876 (match_operand 1 "" ""))
877 (clobber (reg:DI LR_REGNUM))
878 (clobber (match_scratch:DI 2 "=X"))]
879 ""
880 "s_swappc_b64\ts[18:19], %0"
881 [(set_attr "type" "sop1")
882 (set_attr "length" "4")])
883
884 (define_expand "call_value"
885 [(parallel [(set (match_operand 0 "")
886 (call (match_operand 1 "")
887 (match_operand 2 "")))
888 (clobber (reg:DI LR_REGNUM))
889 (clobber (match_scratch:DI 3))])]
890 ""
891 {})
892
893 (define_insn "gcn_call_value"
894 [(set (match_operand 0 "register_operand" "=Sg,Sg")
895 (call (mem (match_operand 1 "immediate_operand" "Y,B"))
896 (match_operand 2 "const_int_operand")))
897 (clobber (reg:DI LR_REGNUM))
898 (clobber (match_scratch:DI 3 "=&Sg,X"))]
899 ""
900 "@
901 s_getpc_b64\t%3\;s_add_u32\t%L3, %L3, %1@rel32@lo+4\;s_addc_u32\t%H3, %H3, %1@rel32@hi+4\;s_swappc_b64\ts[18:19], %3
902 s_swappc_b64\ts[18:19], %1"
903 [(set_attr "type" "sop1")
904 (set_attr "length" "24")])
905
906 (define_insn "gcn_call_value_indirect"
907 [(set (match_operand 0 "register_operand" "=Sg")
908 (call (mem (match_operand:DI 1 "register_operand" "Sg"))
909 (match_operand 2 "" "")))
910 (clobber (reg:DI LR_REGNUM))
911 (clobber (match_scratch:DI 3 "=X"))]
912 ""
913 "s_swappc_b64\ts[18:19], %1"
914 [(set_attr "type" "sop1")
915 (set_attr "length" "4")])
916
917 ; GCN does not have an instruction to clear only part of the instruction
918 ; cache, so the operands are ignored.
919
920 (define_insn "clear_icache"
921 [(unspec_volatile
922 [(match_operand 0 "") (match_operand 1 "")]
923 UNSPECV_ICACHE_INV)]
924 ""
925 "s_icache_inv"
926 [(set_attr "type" "sopp")
927 (set_attr "length" "4")])
928
929 ;; }}}
930 ;; {{{ Conditionals
931
932 ; 32-bit compare, scalar unit only
933
934 (define_insn "cstoresi4"
935 [(set (match_operand:BI 0 "gcn_conditional_register_operand"
936 "=cs, cs, cs, cs")
937 (match_operator:BI 1 "gcn_compare_operator"
938 [(match_operand:SI 2 "gcn_alu_operand" "SSA,SSA,SSB, SS")
939 (match_operand:SI 3 "gcn_alu_operand" "SSA,SSL, SS,SSB")]))]
940 ""
941 "@
942 s_cmp%D1\t%2, %3
943 s_cmpk%D1\t%2, %3
944 s_cmp%D1\t%2, %3
945 s_cmp%D1\t%2, %3"
946 [(set_attr "type" "sopc,sopk,sopk,sopk")
947 (set_attr "length" "4,4,8,8")])
948
949 (define_expand "cbranchsi4"
950 [(match_operator 0 "gcn_compare_operator"
951 [(match_operand:SI 1 "gcn_alu_operand")
952 (match_operand:SI 2 "gcn_alu_operand")])
953 (match_operand 3)]
954 ""
955 {
956 rtx cc = gen_reg_rtx (BImode);
957 emit_insn (gen_cstoresi4 (cc, operands[0], operands[1], operands[2]));
958 emit_jump_insn (gen_cjump (operands[3],
959 gen_rtx_NE (BImode, cc, const0_rtx), cc));
960 DONE;
961 })
962
963 ; 64-bit compare; either unit, but scalar allows limited operators
964
965 (define_expand "cstoredi4"
966 [(set (match_operand:BI 0 "gcn_conditional_register_operand")
967 (match_operator:BI 1 "gcn_compare_operator"
968 [(match_operand:DI 2 "gcn_alu_operand")
969 (match_operand:DI 3 "gcn_alu_operand")]))]
970 ""
971 {})
972
973 (define_insn "cstoredi4_vec_and_scalar"
974 [(set (match_operand:BI 0 "gcn_conditional_register_operand" "= cs, cV")
975 (match_operator:BI 1 "gcn_compare_64bit_operator"
976 [(match_operand:DI 2 "gcn_alu_operand" "%SSA,vSvC")
977 (match_operand:DI 3 "gcn_alu_operand" " SSC, v")]))]
978 ""
979 "@
980 s_cmp%D1\t%2, %3
981 v_cmp%E1\tvcc, %2, %3"
982 [(set_attr "type" "sopc,vopc")
983 (set_attr "length" "8")])
984
985 (define_insn "cstoredi4_vector"
986 [(set (match_operand:BI 0 "gcn_conditional_register_operand" "= cV")
987 (match_operator:BI 1 "gcn_compare_operator"
988 [(match_operand:DI 2 "gcn_alu_operand" "vSvB")
989 (match_operand:DI 3 "gcn_alu_operand" " v")]))]
990 ""
991 "v_cmp%E1\tvcc, %2, %3"
992 [(set_attr "type" "vopc")
993 (set_attr "length" "8")])
994
995 (define_expand "cbranchdi4"
996 [(match_operator 0 "gcn_compare_operator"
997 [(match_operand:DI 1 "gcn_alu_operand")
998 (match_operand:DI 2 "gcn_alu_operand")])
999 (match_operand 3)]
1000 ""
1001 {
1002 rtx cc = gen_reg_rtx (BImode);
1003 emit_insn (gen_cstoredi4 (cc, operands[0], operands[1], operands[2]));
1004 emit_jump_insn (gen_cjump (operands[3],
1005 gen_rtx_NE (BImode, cc, const0_rtx), cc));
1006 DONE;
1007 })
1008
1009 ; FP compare; vector unit only
1010
1011 (define_insn "cstore<mode>4"
1012 [(set (match_operand:BI 0 "gcn_conditional_register_operand" "=cV")
1013 (match_operator:BI 1 "gcn_fp_compare_operator"
1014 [(match_operand:SFDF 2 "gcn_alu_operand" "vB")
1015 (match_operand:SFDF 3 "gcn_alu_operand" "v")]))]
1016 ""
1017 "v_cmp%E1\tvcc, %2, %3"
1018 [(set_attr "type" "vopc")
1019 (set_attr "length" "8")])
1020
1021 (define_expand "cbranch<mode>4"
1022 [(match_operator 0 "gcn_fp_compare_operator"
1023 [(match_operand:SFDF 1 "gcn_alu_operand")
1024 (match_operand:SFDF 2 "gcn_alu_operand")])
1025 (match_operand 3)]
1026 ""
1027 {
1028 rtx cc = gen_reg_rtx (BImode);
1029 emit_insn (gen_cstore<mode>4 (cc, operands[0], operands[1], operands[2]));
1030 emit_jump_insn (gen_cjump (operands[3],
1031 gen_rtx_NE (BImode, cc, const0_rtx), cc));
1032 DONE;
1033 })
1034
1035 ;; }}}
1036 ;; {{{ ALU special cases: Plus
1037
1038 (define_insn "addsi3"
1039 [(set (match_operand:SI 0 "register_operand" "= Sg, Sg, Sg, v")
1040 (plus:SI (match_operand:SI 1 "gcn_alu_operand" "%SgA, 0,SgA, v")
1041 (match_operand:SI 2 "gcn_alu_operand" " SgA,SgJ, B,vBSv")))
1042 (clobber (match_scratch:BI 3 "= cs, cs, cs, X"))
1043 (clobber (match_scratch:DI 4 "= X, X, X, cV"))]
1044 ""
1045 "@
1046 s_add_i32\t%0, %1, %2
1047 s_addk_i32\t%0, %2
1048 s_add_i32\t%0, %1, %2
1049 v_add%^_u32\t%0, vcc, %2, %1"
1050 [(set_attr "type" "sop2,sopk,sop2,vop2")
1051 (set_attr "length" "4,4,8,8")])
1052
1053 (define_expand "addsi3_scc"
1054 [(parallel [(set (match_operand:SI 0 "register_operand")
1055 (plus:SI (match_operand:SI 1 "gcn_alu_operand")
1056 (match_operand:SI 2 "gcn_alu_operand")))
1057 (clobber (reg:BI SCC_REG))
1058 (clobber (scratch:DI))])]
1059 ""
1060 {})
1061
1062 ; Having this as an insn_and_split allows us to keep together DImode adds
1063 ; through some RTL optimisation passes, and means the CC reg we set isn't
1064 ; dependent on the constraint alternative (which doesn't seem to work well).
1065
1066 ; There's an early clobber in the case where "v[0:1]=v[1:2]+?" but
1067 ; "v[0:1]=v[0:1]+?" is fine (as is "v[1:2]=v[0:1]+?", but that's trickier).
1068
1069 ; If v_addc_u32 is used to add with carry, a 32-bit literal constant cannot be
1070 ; used as an operand due to the read of VCC, so we restrict constants to the
1071 ; inlinable range for that alternative.
1072
1073 (define_insn_and_split "adddi3"
1074 [(set (match_operand:DI 0 "register_operand"
1075 "=&Sg,&Sg,&Sg,&Sg,&v,&v,&v,&v")
1076 (plus:DI (match_operand:DI 1 "register_operand"
1077 " Sg, 0, 0, Sg, v, 0, 0, v")
1078 (match_operand:DI 2 "nonmemory_operand"
1079 " 0,SgB, 0,SgB, 0,vA, 0,vA")))
1080 (clobber (match_scratch:BI 3 "= cs, cs, cs, cs, X, X, X, X"))
1081 (clobber (match_scratch:DI 4 "= X, X, X, X,cV,cV,cV,cV"))]
1082 ""
1083 "#"
1084 "&& reload_completed"
1085 [(const_int 0)]
1086 {
1087 rtx cc = gen_rtx_REG (BImode, gcn_vgpr_register_operand (operands[1],
1088 DImode)
1089 ? VCC_REG : SCC_REG);
1090
1091 emit_insn (gen_addsi3_scalar_carry
1092 (gcn_operand_part (DImode, operands[0], 0),
1093 gcn_operand_part (DImode, operands[1], 0),
1094 gcn_operand_part (DImode, operands[2], 0),
1095 cc));
1096 rtx val = gcn_operand_part (DImode, operands[2], 1);
1097 if (val != const0_rtx)
1098 emit_insn (gen_addcsi3_scalar
1099 (gcn_operand_part (DImode, operands[0], 1),
1100 gcn_operand_part (DImode, operands[1], 1),
1101 gcn_operand_part (DImode, operands[2], 1),
1102 cc, cc));
1103 else
1104 emit_insn (gen_addcsi3_scalar_zero
1105 (gcn_operand_part (DImode, operands[0], 1),
1106 gcn_operand_part (DImode, operands[1], 1),
1107 cc));
1108 DONE;
1109 }
1110 [(set_attr "type" "mult,mult,mult,mult,vmult,vmult,vmult,vmult")
1111 (set_attr "length" "8")])
1112
1113 (define_expand "adddi3_scc"
1114 [(parallel [(set (match_operand:DI 0 "register_operand")
1115 (plus:DI (match_operand:DI 1 "register_operand")
1116 (match_operand:DI 2 "nonmemory_operand")))
1117 (clobber (reg:BI SCC_REG))
1118 (clobber (scratch:DI))])]
1119 ""
1120 {})
1121
1122 ;; Add with carry.
1123
1124 (define_insn "addsi3_scalar_carry"
1125 [(set (match_operand:SI 0 "register_operand" "= Sg, v")
1126 (plus:SI (match_operand:SI 1 "gcn_alu_operand" "%SgA, v")
1127 (match_operand:SI 2 "gcn_alu_operand" " SgB,vB")))
1128 (set (match_operand:BI 3 "register_operand" "= cs,cV")
1129 (ltu:BI (plus:SI (match_dup 1)
1130 (match_dup 2))
1131 (match_dup 1)))]
1132 ""
1133 "@
1134 s_add_u32\t%0, %1, %2
1135 v_add%^_u32\t%0, vcc, %2, %1"
1136 [(set_attr "type" "sop2,vop2")
1137 (set_attr "length" "8,8")])
1138
1139 (define_insn "addsi3_scalar_carry_cst"
1140 [(set (match_operand:SI 0 "register_operand" "=Sg, v")
1141 (plus:SI (match_operand:SI 1 "gcn_alu_operand" "SgA, v")
1142 (match_operand:SI 2 "const_int_operand" " n, n")))
1143 (set (match_operand:BI 4 "register_operand" "=cs,cV")
1144 (geu:BI (plus:SI (match_dup 1)
1145 (match_dup 2))
1146 (match_operand:SI 3 "const_int_operand" " n, n")))]
1147 "INTVAL (operands[2]) == -INTVAL (operands[3])"
1148 "@
1149 s_add_u32\t%0, %1, %2
1150 v_add%^_u32\t%0, vcc, %2, %1"
1151 [(set_attr "type" "sop2,vop2")
1152 (set_attr "length" "4")])
1153
1154 (define_insn "addcsi3_scalar"
1155 [(set (match_operand:SI 0 "register_operand" "= Sg, v")
1156 (plus:SI (plus:SI (zero_extend:SI
1157 (match_operand:BI 3 "register_operand" "= cs,cV"))
1158 (match_operand:SI 1 "gcn_alu_operand" "%SgA, v"))
1159 (match_operand:SI 2 "gcn_alu_operand" " SgB,vA")))
1160 (set (match_operand:BI 4 "register_operand" "= 3, 3")
1161 (ior:BI (ltu:BI (plus:SI
1162 (plus:SI
1163 (zero_extend:SI (match_dup 3))
1164 (match_dup 1))
1165 (match_dup 2))
1166 (match_dup 2))
1167 (ltu:BI (plus:SI (zero_extend:SI (match_dup 3)) (match_dup 1))
1168 (match_dup 1))))]
1169 ""
1170 "@
1171 s_addc_u32\t%0, %1, %2
1172 v_addc%^_u32\t%0, vcc, %2, %1, vcc"
1173 [(set_attr "type" "sop2,vop2")
1174 (set_attr "length" "8,4")])
1175
1176 (define_insn "addcsi3_scalar_zero"
1177 [(set (match_operand:SI 0 "register_operand" "=Sg, v")
1178 (plus:SI (zero_extend:SI
1179 (match_operand:BI 2 "register_operand" "=cs,cV"))
1180 (match_operand:SI 1 "gcn_alu_operand" "SgA, v")))
1181 (set (match_dup 2)
1182 (ltu:BI (plus:SI (zero_extend:SI (match_dup 2))
1183 (match_dup 1))
1184 (match_dup 1)))]
1185 ""
1186 "@
1187 s_addc_u32\t%0, %1, 0
1188 v_addc%^_u32\t%0, vcc, 0, %1, vcc"
1189 [(set_attr "type" "sop2,vop2")
1190 (set_attr "length" "4")])
1191
1192 ; "addptr" is the same as "add" except that it must not write to VCC or SCC
1193 ; as a side-effect. Unfortunately GCN does not have a suitable instruction
1194 ; for this, so we use a custom VOP3 add with CC_SAVE_REG as a temp.
1195 ; Note that it is not safe to save/clobber/restore SCC because doing so will
1196 ; break data-flow analysis, so this must use vector registers.
1197
1198 (define_insn "addptrdi3"
1199 [(set (match_operand:DI 0 "register_operand" "= &v")
1200 (plus:DI (match_operand:DI 1 "register_operand" " v0")
1201 (match_operand:DI 2 "nonmemory_operand" "vDA0")))]
1202 ""
1203 {
1204 rtx new_operands[4] = { operands[0], operands[1], operands[2],
1205 gen_rtx_REG (DImode, CC_SAVE_REG) };
1206
1207 output_asm_insn ("v_add%^_u32 %L0, %3, %L2, %L1", new_operands);
1208 output_asm_insn ("v_addc%^_u32 %H0, %3, %H2, %H1, %3", new_operands);
1209
1210 return "";
1211 }
1212 [(set_attr "type" "vmult")
1213 (set_attr "length" "16")])
1214
1215 ;; }}}
1216 ;; {{{ ALU special cases: Minus
1217
1218 (define_insn "subsi3"
1219 [(set (match_operand:SI 0 "register_operand" "=Sg, Sg, v, v")
1220 (minus:SI (match_operand:SI 1 "gcn_alu_operand" "SgA,SgA, v,vBSv")
1221 (match_operand:SI 2 "gcn_alu_operand" "SgA, B, vBSv, v")))
1222 (clobber (match_scratch:BI 3 "=cs, cs, X, X"))
1223 (clobber (match_scratch:DI 4 "= X, X, cV, cV"))]
1224 ""
1225 "@
1226 s_sub_i32\t%0, %1, %2
1227 s_sub_i32\t%0, %1, %2
1228 v_subrev%^_u32\t%0, vcc, %2, %1
1229 v_sub%^_u32\t%0, vcc, %1, %2"
1230 [(set_attr "type" "sop2,sop2,vop2,vop2")
1231 (set_attr "length" "4,8,8,8")])
1232
1233 (define_insn_and_split "subdi3"
1234 [(set (match_operand:DI 0 "register_operand" "=Sg, Sg")
1235 (minus:DI
1236 (match_operand:DI 1 "gcn_alu_operand" "SgA,SgB")
1237 (match_operand:DI 2 "gcn_alu_operand" "SgB,SgA")))
1238 (clobber (reg:BI SCC_REG))]
1239 ""
1240 "#"
1241 "reload_completed"
1242 [(const_int 0)]
1243 {
1244 emit_insn (gen_subsi3_scalar_carry
1245 (gcn_operand_part (DImode, operands[0], 0),
1246 gcn_operand_part (DImode, operands[1], 0),
1247 gcn_operand_part (DImode, operands[2], 0)));
1248 rtx val = gcn_operand_part (DImode, operands[2], 1);
1249 if (val != const0_rtx)
1250 emit_insn (gen_subcsi3_scalar
1251 (gcn_operand_part (DImode, operands[0], 1),
1252 gcn_operand_part (DImode, operands[1], 1),
1253 gcn_operand_part (DImode, operands[2], 1)));
1254 else
1255 emit_insn (gen_subcsi3_scalar_zero
1256 (gcn_operand_part (DImode, operands[0], 1),
1257 gcn_operand_part (DImode, operands[1], 1)));
1258 DONE;
1259 }
1260 [(set_attr "length" "8")])
1261
1262 (define_insn "subsi3_scalar_carry"
1263 [(set (match_operand:SI 0 "register_operand" "=Sg, Sg")
1264 (minus:SI (match_operand:SI 1 "gcn_alu_operand" "SgA,SgB")
1265 (match_operand:SI 2 "gcn_alu_operand" "SgB,SgA")))
1266 (set (reg:BI SCC_REG)
1267 (gtu:BI (minus:SI (match_dup 1)
1268 (match_dup 2))
1269 (match_dup 1)))]
1270 ""
1271 "s_sub_u32\t%0, %1, %2"
1272 [(set_attr "type" "sop2")
1273 (set_attr "length" "8")])
1274
1275 (define_insn "subsi3_scalar_carry_cst"
1276 [(set (match_operand:SI 0 "register_operand" "=Sg")
1277 (minus:SI (match_operand:SI 1 "gcn_alu_operand" "SgA")
1278 (match_operand:SI 2 "const_int_operand" " n")))
1279 (set (reg:BI SCC_REG)
1280 (leu:BI (minus:SI (match_dup 1)
1281 (match_dup 2))
1282 (match_operand:SI 3 "const_int_operand" " n")))]
1283 "INTVAL (operands[2]) == -INTVAL (operands[3])"
1284 "s_sub_u32\t%0, %1, %2"
1285 [(set_attr "type" "sop2")
1286 (set_attr "length" "4")])
1287
1288 (define_insn "subcsi3_scalar"
1289 [(set (match_operand:SI 0 "register_operand" "=Sg, Sg")
1290 (minus:SI (minus:SI (zero_extend:SI (reg:BI SCC_REG))
1291 (match_operand:SI 1 "gcn_alu_operand" "SgA,SgB"))
1292 (match_operand:SI 2 "gcn_alu_operand" "SgB,SgA")))
1293 (set (reg:BI SCC_REG)
1294 (ior:BI (gtu:BI (minus:SI (minus:SI (zero_extend:SI (reg:BI SCC_REG))
1295 (match_dup 1))
1296 (match_dup 2))
1297 (match_dup 1))
1298 (gtu:BI (minus:SI (zero_extend:SI (reg:BI SCC_REG))
1299 (match_dup 1))
1300 (match_dup 1))))]
1301 ""
1302 "s_subb_u32\t%0, %1, %2"
1303 [(set_attr "type" "sop2")
1304 (set_attr "length" "8")])
1305
1306 (define_insn "subcsi3_scalar_zero"
1307 [(set (match_operand:SI 0 "register_operand" "=Sg")
1308 (minus:SI (zero_extend:SI (reg:BI SCC_REG))
1309 (match_operand:SI 1 "gcn_alu_operand" "SgA")))
1310 (set (reg:BI SCC_REG)
1311 (gtu:BI (minus:SI (zero_extend:SI (reg:BI SCC_REG)) (match_dup 1))
1312 (match_dup 1)))]
1313 ""
1314 "s_subb_u32\t%0, %1, 0"
1315 [(set_attr "type" "sop2")
1316 (set_attr "length" "4")])
1317
1318 ;; }}}
1319 ;; {{{ ALU: mult
1320
1321 ; Vector multiply has vop3a encoding, but no corresponding vop2a, so no long
1322 ; immediate.
1323 (define_insn "mulsi3"
1324 [(set (match_operand:SI 0 "register_operand" "= Sg,Sg, Sg, v")
1325 (mult:SI (match_operand:SI 1 "gcn_alu_operand" "%SgA, 0,SgA, v")
1326 (match_operand:SI 2 "gcn_alu_operand" " SgA, J, B,vASv")))]
1327 ""
1328 "@
1329 s_mul_i32\t%0, %1, %2
1330 s_mulk_i32\t%0, %2
1331 s_mul_i32\t%0, %1, %2
1332 v_mul_lo_i32\t%0, %1, %2"
1333 [(set_attr "type" "sop2,sopk,sop2,vop3a")
1334 (set_attr "length" "4,4,8,4")])
1335
1336 (define_code_iterator any_extend [sign_extend zero_extend])
1337 (define_code_attr sgnsuffix [(sign_extend "%i") (zero_extend "%u")])
1338 (define_code_attr su [(sign_extend "s") (zero_extend "u")])
1339 (define_code_attr u [(sign_extend "") (zero_extend "u")])
1340 (define_code_attr iu [(sign_extend "i") (zero_extend "u")])
1341 (define_code_attr e [(sign_extend "e") (zero_extend "")])
1342
1343 (define_insn "<su>mulsi3_highpart"
1344 [(set (match_operand:SI 0 "register_operand" "= v")
1345 (truncate:SI
1346 (lshiftrt:DI
1347 (mult:DI
1348 (any_extend:DI
1349 (match_operand:SI 1 "register_operand" "% v"))
1350 (any_extend:DI
1351 (match_operand:SI 2 "register_operand" "vSv")))
1352 (const_int 32))))]
1353 ""
1354 "v_mul_hi<sgnsuffix>0\t%0, %2, %1"
1355 [(set_attr "type" "vop3a")
1356 (set_attr "length" "8")])
1357
1358 (define_insn "<u>mulhisi3"
1359 [(set (match_operand:SI 0 "register_operand" "=v")
1360 (mult:SI
1361 (any_extend:SI (match_operand:HI 1 "register_operand" "%v"))
1362 (any_extend:SI (match_operand:HI 2 "register_operand" " v"))))]
1363 ""
1364 "v_mul_<iu>32_<iu>24_sdwa\t%0, %<e>1, %<e>2 src0_sel:WORD_0 src1_sel:WORD_0"
1365 [(set_attr "type" "vop_sdwa")
1366 (set_attr "length" "8")])
1367
1368 (define_insn "<u>mulqihi3_scalar"
1369 [(set (match_operand:HI 0 "register_operand" "=v")
1370 (mult:HI
1371 (any_extend:HI (match_operand:QI 1 "register_operand" "%v"))
1372 (any_extend:HI (match_operand:QI 2 "register_operand" " v"))))]
1373 ""
1374 "v_mul_<iu>32_<iu>24_sdwa\t%0, %<e>1, %<e>2 src0_sel:BYTE_0 src1_sel:BYTE_0"
1375 [(set_attr "type" "vop_sdwa")
1376 (set_attr "length" "8")])
1377
1378 ;; }}}
1379 ;; {{{ ALU: generic 32-bit unop
1380
1381 (define_code_iterator bitunop [not popcount])
1382 (define_code_attr popcount_extra_op [(not "") (popcount ", 0")])
1383
1384 (define_insn "<expander>si2"
1385 [(set (match_operand:SI 0 "register_operand" "=Sg, v")
1386 (bitunop:SI
1387 (match_operand:SI 1 "gcn_alu_operand" "SgB,vSvB")))
1388 (clobber (match_scratch:BI 2 "=cs, X"))]
1389 ""
1390 "@
1391 s_<s_mnemonic>0\t%0, %1
1392 v_<mnemonic>0\t%0, %1<popcount_extra_op>"
1393 [(set_attr "type" "sop1,vop1")
1394 (set_attr "length" "8")])
1395
1396 (define_code_iterator countzeros [clz ctz])
1397
1398 (define_insn "<expander>si2"
1399 [(set (match_operand:SI 0 "register_operand" "=Sg,Sg")
1400 (countzeros:SI
1401 (match_operand:SI 1 "gcn_alu_operand" "SgA, B")))]
1402 ""
1403 "s_<s_mnemonic>1\t%0, %1"
1404 [(set_attr "type" "sop1")
1405 (set_attr "length" "4,8")])
1406
1407 ; The truncate ensures that a constant passed to operand 1 is treated as DImode
1408 (define_insn "<expander>di2"
1409 [(set (match_operand:SI 0 "register_operand" "=Sg,Sg")
1410 (truncate:SI
1411 (countzeros:DI
1412 (match_operand:DI 1 "gcn_alu_operand" "SgA, B"))))]
1413 ""
1414 "s_<s_mnemonic>1\t%0, %1"
1415 [(set_attr "type" "sop1")
1416 (set_attr "length" "4,8")])
1417
1418 ;; }}}
1419 ;; {{{ ALU: generic 32-bit binop
1420
1421 ; No plus and mult - they have variant with 16bit immediate
1422 ; and thus are defined later.
1423 (define_code_iterator binop [and ior xor smin smax umin umax
1424 ashift lshiftrt ashiftrt])
1425 (define_code_iterator vec_and_scalar_com [and ior xor smin smax umin umax])
1426 (define_code_iterator vec_and_scalar_nocom [ashift lshiftrt ashiftrt])
1427
1428 (define_insn "<expander>si3"
1429 [(set (match_operand:SI 0 "gcn_valu_dst_operand" "= Sg, v,RD")
1430 (vec_and_scalar_com:SI
1431 (match_operand:SI 1 "gcn_valu_src0_operand" "%SgA,vSvB, 0")
1432 (match_operand:SI 2 "gcn_alu_operand" " SgB, v, v")))
1433 (clobber (match_scratch:BI 3 "= cs, X, X"))]
1434 ""
1435 "@
1436 s_<mnemonic>0\t%0, %1, %2
1437 v_<mnemonic>0\t%0, %1, %2
1438 ds_<mnemonic>0\t%A0, %2%O0"
1439 [(set_attr "type" "sop2,vop2,ds")
1440 (set_attr "length" "8")])
1441
1442 (define_insn "<expander>si3"
1443 [(set (match_operand:SI 0 "register_operand" "=Sg, Sg, v")
1444 (vec_and_scalar_nocom:SI
1445 (match_operand:SI 1 "gcn_alu_operand" "SgB,SgA, v")
1446 (match_operand:SI 2 "gcn_alu_operand" "SgA,SgB,vSvB")))
1447 (clobber (match_scratch:BI 3 "=cs, cs, X"))]
1448 ""
1449 "@
1450 s_<mnemonic>0\t%0, %1, %2
1451 s_<mnemonic>0\t%0, %1, %2
1452 v_<revmnemonic>0\t%0, %2, %1"
1453 [(set_attr "type" "sop2,sop2,vop2")
1454 (set_attr "length" "8")])
1455
1456 (define_expand "<expander>si3_scc"
1457 [(parallel [(set (match_operand:SI 0 "gcn_valu_dst_operand")
1458 (binop:SI
1459 (match_operand:SI 1 "gcn_valu_src0_operand")
1460 (match_operand:SI 2 "gcn_alu_operand")))
1461 (clobber (reg:BI SCC_REG))])]
1462 ""
1463 {})
1464
1465 ;; }}}
1466 ;; {{{ ALU: generic 64-bit
1467
1468 (define_code_iterator vec_and_scalar64_com [and ior xor])
1469
1470 (define_insn_and_split "<expander>di3"
1471 [(set (match_operand:DI 0 "register_operand" "= Sg, &v, &v")
1472 (vec_and_scalar64_com:DI
1473 (match_operand:DI 1 "gcn_alu_operand" "%SgA,vSvDB,vSvDB")
1474 (match_operand:DI 2 "gcn_alu_operand" " SgC, v, 0")))
1475 (clobber (match_scratch:BI 3 "= cs, X, X"))]
1476 ""
1477 "@
1478 s_<mnemonic>0\t%0, %1, %2
1479 #
1480 #"
1481 "reload_completed && gcn_vgpr_register_operand (operands[0], DImode)"
1482 [(parallel [(set (match_dup 4)
1483 (vec_and_scalar64_com:SI (match_dup 5) (match_dup 6)))
1484 (clobber (match_dup 3))])
1485 (parallel [(set (match_dup 7)
1486 (vec_and_scalar64_com:SI (match_dup 8) (match_dup 9)))
1487 (clobber (match_dup 3))])]
1488 {
1489 operands[4] = gcn_operand_part (DImode, operands[0], 0);
1490 operands[5] = gcn_operand_part (DImode, operands[1], 0);
1491 operands[6] = gcn_operand_part (DImode, operands[2], 0);
1492 operands[7] = gcn_operand_part (DImode, operands[0], 1);
1493 operands[8] = gcn_operand_part (DImode, operands[1], 1);
1494 operands[9] = gcn_operand_part (DImode, operands[2], 1);
1495 }
1496 [(set_attr "type" "sop2,vop2,vop2")
1497 (set_attr "length" "8")])
1498
1499 (define_insn "<expander>di3"
1500 [(set (match_operand:DI 0 "register_operand" "=Sg, Sg, v")
1501 (vec_and_scalar_nocom:DI
1502 (match_operand:DI 1 "gcn_alu_operand" "SgC,SgA, v")
1503 (match_operand:SI 2 "gcn_alu_operand" "SgA,SgC,vSvC")))
1504 (clobber (match_scratch:BI 3 "=cs, cs, X"))]
1505 ""
1506 "@
1507 s_<mnemonic>0\t%0, %1, %2
1508 s_<mnemonic>0\t%0, %1, %2
1509 v_<revmnemonic>0\t%0, %2, %1"
1510 [(set_attr "type" "sop2,sop2,vop2")
1511 (set_attr "length" "8")])
1512
1513 ;; }}}
1514 ;; {{{ Atomics
1515
1516 ; Each compute unit has it's own L1 cache. The L2 cache is shared between
1517 ; all the compute units. Any load or store instruction can skip L1 and
1518 ; access L2 directly using the "glc" flag. Atomic instructions also skip
1519 ; L1. The L1 cache can be flushed and invalidated using instructions.
1520 ;
1521 ; Therefore, in order for "acquire" and "release" atomic modes to work
1522 ; correctly across compute units we must flush before each "release"
1523 ; and invalidate the cache after each "acquire". It might seem like
1524 ; invalidation could be safely done before an "acquire", but since each
1525 ; compute unit can run up to 40 threads simultaneously, all reading values
1526 ; into the L1 cache, this is not actually safe.
1527 ;
1528 ; Additionally, scalar flat instructions access L2 via a different cache
1529 ; (the "constant cache"), so they have separate constrol instructions. We
1530 ; do not attempt to invalidate both caches at once; instead, atomics
1531 ; operating on scalar flat pointers will flush the constant cache, and
1532 ; atomics operating on flat or global pointers will flush L1. It is up to
1533 ; the programmer to get this right.
1534
1535 (define_code_iterator atomicops [plus minus and ior xor])
1536 (define_mode_attr X [(SI "") (DI "_X2")])
1537
1538 ;; TODO compare_and_swap test_and_set inc dec
1539 ;; Hardware also supports min and max, but GCC does not.
1540
1541 (define_expand "memory_barrier"
1542 [(set (match_dup 0)
1543 (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER))]
1544 ""
1545 {
1546 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
1547 MEM_VOLATILE_P (operands[0]) = 1;
1548 })
1549
1550 (define_insn "*memory_barrier"
1551 [(set (match_operand:BLK 0)
1552 (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER))]
1553 ""
1554 "buffer_wbinvl1_vol"
1555 [(set_attr "type" "mubuf")
1556 (set_attr "length" "4")])
1557
1558 ; FIXME: These patterns have been disabled as they do not seem to work
1559 ; reliably - they can cause hangs or incorrect results.
1560 ; TODO: flush caches according to memory model
1561 (define_insn "atomic_fetch_<bare_mnemonic><mode>"
1562 [(set (match_operand:SIDI 0 "register_operand" "=Sm, v, v")
1563 (match_operand:SIDI 1 "memory_operand" "+RS,RF,RM"))
1564 (set (match_dup 1)
1565 (unspec_volatile:SIDI
1566 [(atomicops:SIDI
1567 (match_dup 1)
1568 (match_operand:SIDI 2 "register_operand" " Sm, v, v"))]
1569 UNSPECV_ATOMIC))
1570 (use (match_operand 3 "const_int_operand"))]
1571 "0 /* Disabled. */"
1572 "@
1573 s_atomic_<bare_mnemonic><X>\t%0, %1, %2 glc\;s_waitcnt\tlgkmcnt(0)
1574 flat_atomic_<bare_mnemonic><X>\t%0, %1, %2 glc\;s_waitcnt\t0
1575 global_atomic_<bare_mnemonic><X>\t%0, %A1, %2%O1 glc\;s_waitcnt\tvmcnt(0)"
1576 [(set_attr "type" "smem,flat,flat")
1577 (set_attr "length" "12")
1578 (set_attr "gcn_version" "gcn5,*,gcn5")])
1579
1580 ; FIXME: These patterns are disabled because the instructions don't
1581 ; seem to work as advertised. Specifically, OMP "team distribute"
1582 ; reductions apparently "lose" some of the writes, similar to what
1583 ; you might expect from a concurrent non-atomic read-modify-write.
1584 ; TODO: flush caches according to memory model
1585 (define_insn "atomic_<bare_mnemonic><mode>"
1586 [(set (match_operand:SIDI 0 "memory_operand" "+RS,RF,RM")
1587 (unspec_volatile:SIDI
1588 [(atomicops:SIDI
1589 (match_dup 0)
1590 (match_operand:SIDI 1 "register_operand" " Sm, v, v"))]
1591 UNSPECV_ATOMIC))
1592 (use (match_operand 2 "const_int_operand"))]
1593 "0 /* Disabled. */"
1594 "@
1595 s_atomic_<bare_mnemonic><X>\t%0, %1\;s_waitcnt\tlgkmcnt(0)
1596 flat_atomic_<bare_mnemonic><X>\t%0, %1\;s_waitcnt\t0
1597 global_atomic_<bare_mnemonic><X>\t%A0, %1%O0\;s_waitcnt\tvmcnt(0)"
1598 [(set_attr "type" "smem,flat,flat")
1599 (set_attr "length" "12")
1600 (set_attr "gcn_version" "gcn5,*,gcn5")])
1601
1602 (define_mode_attr x2 [(SI "DI") (DI "TI")])
1603 (define_mode_attr size [(SI "4") (DI "8")])
1604 (define_mode_attr bitsize [(SI "32") (DI "64")])
1605
1606 (define_expand "sync_compare_and_swap<mode>"
1607 [(match_operand:SIDI 0 "register_operand")
1608 (match_operand:SIDI 1 "memory_operand")
1609 (match_operand:SIDI 2 "register_operand")
1610 (match_operand:SIDI 3 "register_operand")]
1611 ""
1612 {
1613 if (MEM_ADDR_SPACE (operands[1]) == ADDR_SPACE_LDS)
1614 {
1615 emit_insn (gen_sync_compare_and_swap<mode>_lds_insn (operands[0],
1616 operands[1],
1617 operands[2],
1618 operands[3]));
1619 DONE;
1620 }
1621
1622 /* Operands 2 and 3 must be placed in consecutive registers, and passed
1623 as a combined value. */
1624 rtx src_cmp = gen_reg_rtx (<x2>mode);
1625 emit_move_insn (gen_rtx_SUBREG (<MODE>mode, src_cmp, 0), operands[3]);
1626 emit_move_insn (gen_rtx_SUBREG (<MODE>mode, src_cmp, <size>), operands[2]);
1627 emit_insn (gen_sync_compare_and_swap<mode>_insn (operands[0],
1628 operands[1],
1629 src_cmp));
1630 DONE;
1631 })
1632
1633 (define_insn "sync_compare_and_swap<mode>_insn"
1634 [(set (match_operand:SIDI 0 "register_operand" "=Sm, v, v")
1635 (match_operand:SIDI 1 "memory_operand" "+RS,RF,RM"))
1636 (set (match_dup 1)
1637 (unspec_volatile:SIDI
1638 [(match_operand:<x2> 2 "register_operand" " Sm, v, v")]
1639 UNSPECV_ATOMIC))]
1640 ""
1641 "@
1642 s_atomic_cmpswap<X>\t%0, %1, %2 glc\;s_waitcnt\tlgkmcnt(0)
1643 flat_atomic_cmpswap<X>\t%0, %1, %2 glc\;s_waitcnt\t0
1644 global_atomic_cmpswap<X>\t%0, %A1, %2%O1 glc\;s_waitcnt\tvmcnt(0)"
1645 [(set_attr "type" "smem,flat,flat")
1646 (set_attr "length" "12")
1647 (set_attr "gcn_version" "gcn5,*,gcn5")
1648 (set_attr "delayeduse" "*,yes,yes")])
1649
1650 (define_insn "sync_compare_and_swap<mode>_lds_insn"
1651 [(set (match_operand:SIDI 0 "register_operand" "= v")
1652 (unspec_volatile:SIDI
1653 [(match_operand:SIDI 1 "memory_operand" "+RL")]
1654 UNSPECV_ATOMIC))
1655 (set (match_dup 1)
1656 (unspec_volatile:SIDI
1657 [(match_operand:SIDI 2 "register_operand" " v")
1658 (match_operand:SIDI 3 "register_operand" " v")]
1659 UNSPECV_ATOMIC))]
1660 ""
1661 "ds_cmpst_rtn_b<bitsize> %0, %1, %2, %3\;s_waitcnt\tlgkmcnt(0)"
1662 [(set_attr "type" "ds")
1663 (set_attr "length" "12")])
1664
1665 (define_insn "atomic_load<mode>"
1666 [(set (match_operand:SIDI 0 "register_operand" "=Sm, v, v")
1667 (unspec_volatile:SIDI
1668 [(match_operand:SIDI 1 "memory_operand" " RS,RF,RM")]
1669 UNSPECV_ATOMIC))
1670 (use (match_operand:SIDI 2 "immediate_operand" " i, i, i"))]
1671 ""
1672 {
1673 switch (INTVAL (operands[2]))
1674 {
1675 case MEMMODEL_RELAXED:
1676 switch (which_alternative)
1677 {
1678 case 0:
1679 return "s_load%o0\t%0, %A1 glc\;s_waitcnt\tlgkmcnt(0)";
1680 case 1:
1681 return "flat_load%o0\t%0, %A1%O1 glc\;s_waitcnt\t0";
1682 case 2:
1683 return "global_load%o0\t%0, %A1%O1 glc\;s_waitcnt\tvmcnt(0)";
1684 }
1685 break;
1686 case MEMMODEL_CONSUME:
1687 case MEMMODEL_ACQUIRE:
1688 case MEMMODEL_SYNC_ACQUIRE:
1689 switch (which_alternative)
1690 {
1691 case 0:
1692 return "s_load%o0\t%0, %A1 glc\;s_waitcnt\tlgkmcnt(0)\;"
1693 "s_dcache_wb_vol";
1694 case 1:
1695 return "flat_load%o0\t%0, %A1%O1 glc\;s_waitcnt\t0\;"
1696 "buffer_wbinvl1_vol";
1697 case 2:
1698 return "global_load%o0\t%0, %A1%O1 glc\;s_waitcnt\tvmcnt(0)\;"
1699 "buffer_wbinvl1_vol";
1700 }
1701 break;
1702 case MEMMODEL_ACQ_REL:
1703 case MEMMODEL_SEQ_CST:
1704 case MEMMODEL_SYNC_SEQ_CST:
1705 switch (which_alternative)
1706 {
1707 case 0:
1708 return "s_dcache_wb_vol\;s_load%o0\t%0, %A1 glc\;"
1709 "s_waitcnt\tlgkmcnt(0)\;s_dcache_inv_vol";
1710 case 1:
1711 return "buffer_wbinvl1_vol\;flat_load%o0\t%0, %A1%O1 glc\;"
1712 "s_waitcnt\t0\;buffer_wbinvl1_vol";
1713 case 2:
1714 return "buffer_wbinvl1_vol\;global_load%o0\t%0, %A1%O1 glc\;"
1715 "s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol";
1716 }
1717 break;
1718 }
1719 gcc_unreachable ();
1720 }
1721 [(set_attr "type" "smem,flat,flat")
1722 (set_attr "length" "20")
1723 (set_attr "gcn_version" "gcn5,*,gcn5")])
1724
1725 (define_insn "atomic_store<mode>"
1726 [(set (match_operand:SIDI 0 "memory_operand" "=RS,RF,RM")
1727 (unspec_volatile:SIDI
1728 [(match_operand:SIDI 1 "register_operand" " Sm, v, v")]
1729 UNSPECV_ATOMIC))
1730 (use (match_operand:SIDI 2 "immediate_operand" " i, i, i"))]
1731 ""
1732 {
1733 switch (INTVAL (operands[2]))
1734 {
1735 case MEMMODEL_RELAXED:
1736 switch (which_alternative)
1737 {
1738 case 0:
1739 return "s_store%o1\t%1, %A0 glc\;s_waitcnt\tlgkmcnt(0)";
1740 case 1:
1741 return "flat_store%o1\t%A0, %1%O0 glc\;s_waitcnt\t0";
1742 case 2:
1743 return "global_store%o1\t%A0, %1%O0 glc\;s_waitcnt\tvmcnt(0)";
1744 }
1745 break;
1746 case MEMMODEL_RELEASE:
1747 case MEMMODEL_SYNC_RELEASE:
1748 switch (which_alternative)
1749 {
1750 case 0:
1751 return "s_dcache_wb_vol\;s_store%o1\t%1, %A0 glc";
1752 case 1:
1753 return "buffer_wbinvl1_vol\;flat_store%o1\t%A0, %1%O0 glc";
1754 case 2:
1755 return "buffer_wbinvl1_vol\;global_store%o1\t%A0, %1%O0 glc";
1756 }
1757 break;
1758 case MEMMODEL_ACQ_REL:
1759 case MEMMODEL_SEQ_CST:
1760 case MEMMODEL_SYNC_SEQ_CST:
1761 switch (which_alternative)
1762 {
1763 case 0:
1764 return "s_dcache_wb_vol\;s_store%o1\t%1, %A0 glc\;"
1765 "s_waitcnt\tlgkmcnt(0)\;s_dcache_inv_vol";
1766 case 1:
1767 return "buffer_wbinvl1_vol\;flat_store%o1\t%A0, %1%O0 glc\;"
1768 "s_waitcnt\t0\;buffer_wbinvl1_vol";
1769 case 2:
1770 return "buffer_wbinvl1_vol\;global_store%o1\t%A0, %1%O0 glc\;"
1771 "s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol";
1772 }
1773 break;
1774 }
1775 gcc_unreachable ();
1776 }
1777 [(set_attr "type" "smem,flat,flat")
1778 (set_attr "length" "20")
1779 (set_attr "gcn_version" "gcn5,*,gcn5")])
1780
1781 (define_insn "atomic_exchange<mode>"
1782 [(set (match_operand:SIDI 0 "register_operand" "=Sm, v, v")
1783 (match_operand:SIDI 1 "memory_operand" "+RS,RF,RM"))
1784 (set (match_dup 1)
1785 (unspec_volatile:SIDI
1786 [(match_operand:SIDI 2 "register_operand" " Sm, v, v")]
1787 UNSPECV_ATOMIC))
1788 (use (match_operand 3 "immediate_operand"))]
1789 ""
1790 {
1791 switch (INTVAL (operands[3]))
1792 {
1793 case MEMMODEL_RELAXED:
1794 switch (which_alternative)
1795 {
1796 case 0:
1797 return "s_atomic_swap<X>\t%0, %1, %2 glc\;s_waitcnt\tlgkmcnt(0)";
1798 case 1:
1799 return "flat_atomic_swap<X>\t%0, %1, %2 glc\;s_waitcnt\t0";
1800 case 2:
1801 return "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;"
1802 "s_waitcnt\tvmcnt(0)";
1803 }
1804 break;
1805 case MEMMODEL_CONSUME:
1806 case MEMMODEL_ACQUIRE:
1807 case MEMMODEL_SYNC_ACQUIRE:
1808 switch (which_alternative)
1809 {
1810 case 0:
1811 return "s_atomic_swap<X>\t%0, %1, %2 glc\;s_waitcnt\tlgkmcnt(0)\;"
1812 "s_dcache_wb_vol\;s_dcache_inv_vol";
1813 case 1:
1814 return "flat_atomic_swap<X>\t%0, %1, %2 glc\;s_waitcnt\t0\;"
1815 "buffer_wbinvl1_vol";
1816 case 2:
1817 return "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;"
1818 "s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol";
1819 }
1820 break;
1821 case MEMMODEL_RELEASE:
1822 case MEMMODEL_SYNC_RELEASE:
1823 switch (which_alternative)
1824 {
1825 case 0:
1826 return "s_dcache_wb_vol\;s_atomic_swap<X>\t%0, %1, %2 glc\;"
1827 "s_waitcnt\tlgkmcnt(0)";
1828 case 1:
1829 return "buffer_wbinvl1_vol\;flat_atomic_swap<X>\t%0, %1, %2 glc\;"
1830 "s_waitcnt\t0";
1831 case 2:
1832 return "buffer_wbinvl1_vol\;"
1833 "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;"
1834 "s_waitcnt\tvmcnt(0)";
1835 }
1836 break;
1837 case MEMMODEL_ACQ_REL:
1838 case MEMMODEL_SEQ_CST:
1839 case MEMMODEL_SYNC_SEQ_CST:
1840 switch (which_alternative)
1841 {
1842 case 0:
1843 return "s_dcache_wb_vol\;s_atomic_swap<X>\t%0, %1, %2 glc\;"
1844 "s_waitcnt\tlgkmcnt(0)\;s_dcache_inv_vol";
1845 case 1:
1846 return "buffer_wbinvl1_vol\;flat_atomic_swap<X>\t%0, %1, %2 glc\;"
1847 "s_waitcnt\t0\;buffer_wbinvl1_vol";
1848 case 2:
1849 return "buffer_wbinvl1_vol\;"
1850 "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;"
1851 "s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol";
1852 }
1853 break;
1854 }
1855 gcc_unreachable ();
1856 }
1857 [(set_attr "type" "smem,flat,flat")
1858 (set_attr "length" "20")
1859 (set_attr "gcn_version" "gcn5,*,gcn5")])
1860
1861 ;; }}}
1862 ;; {{{ OpenACC / OpenMP
1863
1864 (define_expand "oacc_dim_size"
1865 [(match_operand:SI 0 "register_operand")
1866 (match_operand:SI 1 "const_int_operand")]
1867 ""
1868 {
1869 rtx tmp = gcn_oacc_dim_size (INTVAL (operands[1]));
1870 emit_move_insn (operands[0], gen_lowpart (SImode, tmp));
1871 DONE;
1872 })
1873
1874 (define_expand "oacc_dim_pos"
1875 [(match_operand:SI 0 "register_operand")
1876 (match_operand:SI 1 "const_int_operand")]
1877 ""
1878 {
1879 emit_move_insn (operands[0], gcn_oacc_dim_pos (INTVAL (operands[1])));
1880 DONE;
1881 })
1882
1883 (define_expand "gcn_wavefront_barrier"
1884 [(set (match_dup 0)
1885 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_BARRIER))]
1886 ""
1887 {
1888 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
1889 MEM_VOLATILE_P (operands[0]) = 1;
1890 })
1891
1892 (define_insn "*gcn_wavefront_barrier"
1893 [(set (match_operand:BLK 0 "")
1894 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_BARRIER))]
1895 ""
1896 "s_barrier"
1897 [(set_attr "type" "sopp")])
1898
1899 (define_expand "oacc_fork"
1900 [(set (match_operand:SI 0 "")
1901 (match_operand:SI 1 ""))
1902 (use (match_operand:SI 2 ""))]
1903 ""
1904 {
1905 /* We need to have oacc_fork/oacc_join named patterns as a pair,
1906 but the fork isn't actually used. */
1907 gcc_unreachable ();
1908 })
1909
1910 (define_expand "oacc_join"
1911 [(set (match_operand:SI 0 "")
1912 (match_operand:SI 1 ""))
1913 (use (match_operand:SI 2 ""))]
1914 ""
1915 {
1916 emit_insn (gen_gcn_wavefront_barrier ());
1917 DONE;
1918 })
1919
1920 ;; }}}
1921
1922 (include "gcn-valu.md")