]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/gcn/gcn.md
Update copyright years.
[thirdparty/gcc.git] / gcc / config / gcn / gcn.md
CommitLineData
7adcbafe 1;; Copyright (C) 2016-2022 Free Software Foundation, Inc.
3d6275e3
AS
2
3;; This file is free software; you can redistribute it and/or modify it under
4;; the terms of the GNU General Public License as published by the Free
5;; Software Foundation; either version 3 of the License, or (at your option)
6;; any later version.
7
8;; This file is distributed in the hope that it will be useful, but WITHOUT
9;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11;; for more details.
12
13;; You should have received a copy of the GNU General Public License
14;; along with GCC; see the file COPYING3. If not see
15;; <http://www.gnu.org/licenses/>.
16
17;;- See file "rtl.def" for documentation on define_insn, match_*, et. al.
18
19(include "predicates.md")
20(include "constraints.md")
21
22;; {{{ Constants and enums
23
24; Named registers
25(define_constants
26 [(FIRST_SGPR_REG 0)
c2709ec4 27 (CC_SAVE_REG 22)
3d6275e3
AS
28 (LAST_SGPR_REG 101)
29 (FLAT_SCRATCH_REG 102)
30 (FLAT_SCRATCH_LO_REG 102)
31 (FLAT_SCRATCH_HI_REG 103)
32 (XNACK_MASK_REG 104)
33 (XNACK_MASK_LO_REG 104)
34 (XNACK_MASK_HI_REG 105)
35 (VCC_REG 106)
36 (VCC_LO_REG 106)
37 (VCC_HI_REG 107)
38 (VCCZ_REG 108)
39 (TBA_REG 109)
40 (TBA_LO_REG 109)
41 (TBA_HI_REG 110)
42 (TMA_REG 111)
43 (TMA_LO_REG 111)
44 (TMA_HI_REG 112)
45 (TTMP0_REG 113)
46 (TTMP11_REG 124)
47 (M0_REG 125)
48 (EXEC_REG 126)
49 (EXEC_LO_REG 126)
50 (EXEC_HI_REG 127)
51 (EXECZ_REG 128)
52 (SCC_REG 129)
53 (FIRST_VGPR_REG 160)
54 (LAST_VGPR_REG 415)])
55
56(define_constants
57 [(SP_REGNUM 16)
58 (LR_REGNUM 18)
59 (AP_REGNUM 416)
60 (FP_REGNUM 418)])
61
62(define_c_enum "unspecv" [
63 UNSPECV_PROLOGUE_USE
64 UNSPECV_KERNEL_RETURN
65 UNSPECV_BARRIER
66 UNSPECV_ATOMIC
67 UNSPECV_ICACHE_INV])
68
69(define_c_enum "unspec" [
76136f7f 70 UNSPEC_ADDPTR
3d6275e3
AS
71 UNSPEC_VECTOR
72 UNSPEC_BPERMUTE
73 UNSPEC_SGPRBASE
74 UNSPEC_MEMORY_BARRIER
75 UNSPEC_SMIN_DPP_SHR UNSPEC_SMAX_DPP_SHR
76 UNSPEC_UMIN_DPP_SHR UNSPEC_UMAX_DPP_SHR
77 UNSPEC_PLUS_DPP_SHR
78 UNSPEC_PLUS_CARRY_DPP_SHR UNSPEC_PLUS_CARRY_IN_DPP_SHR
79 UNSPEC_AND_DPP_SHR UNSPEC_IOR_DPP_SHR UNSPEC_XOR_DPP_SHR
a5879399 80 UNSPEC_MOV_DPP_SHR
3d6275e3
AS
81 UNSPEC_MOV_FROM_LANE63
82 UNSPEC_GATHER
c8812bac 83 UNSPEC_SCATTER
0c06e46a
JB
84 UNSPEC_RCP
85 UNSPEC_FLBIT_INT])
3d6275e3
AS
86
87;; }}}
88;; {{{ Attributes
89
90; Instruction type (encoding) as described in the ISA specification.
91; The following table summarizes possible operands of individual instruction
92; types and corresponding constraints.
93;
94; sop2 - scalar, two inputs, one output
95; ssrc0/ssrc1: sgpr 0-102; flat_scratch,xnack,vcc,tba,tma,ttmp0-11,exec
96; vccz,execz,scc,inline immedate,fp inline immediate
97; sdst: sgpr 0-102; flat_scratch,xnack,vcc,tba,tma,ttmp0-11,exec
98;
99; Constraints "=SD, SD", "SSA,SSB","SSB,SSA"
100;
101; sopk - scalar, inline constant input, one output
102; simm16: 16bit inline constant
103; sdst: same as sop2/ssrc0
104;
105; Constraints "=SD", "J"
106;
107; sop1 - scalar, one input, one output
108; ssrc0: same as sop2/ssrc0. FIXME: manual omit VCCZ
109; sdst: same as sop2/sdst
110;
111; Constraints "=SD", "SSA"
112;
113; sopc - scalar, two inputs, one comparsion
114; ssrc0: same as sop2/ssc0.
115;
116; Constraints "SSI,SSA","SSA,SSI"
117;
118; sopp - scalar, one constant input, one special
119; simm16
120;
121; smem - scalar memory
122; sbase: aligned pair of sgprs. Specify {size[15:0], base[47:0]} in
123; dwords
124; sdata: sgpr0-102, flat_scratch, xnack, vcc, tba, tma
125; offset: sgpr or 20bit unsigned byte offset
126;
127; vop2 - vector, two inputs, one output
128; vsrc0: sgpr0-102,flat_scratch,xnack,vcc,tba,ttmp0-11,m0,exec,
129; inline constant -16 to -64, fp inline immediate, vccz, execz,
130; scc, lds, literal constant, vgpr0-255
131; vsrc1: vgpr0-255
132; vdst: vgpr0-255
133; Limitations: At most one SGPR, at most one constant
134; if constant is used, SGPR must be M0
135; Only SRC0 can be LDS_DIRECT
136;
137; constraints: "=v", "vBSv", "v"
138;
139; vop1 - vector, one input, one output
140; vsrc0: same as vop2/src0
141; vdst: vgpr0-255
142;
143; constraints: "=v", "vBSv"
144;
145; vopc - vector, two inputs, one comparsion output;
146; vsrc0: same as vop2/src0
147; vsrc1: vgpr0-255
148; vdst:
149;
150; constraints: "vASv", "v"
151;
152; vop3a - vector, three inputs, one output
153; vdst: vgpr0-255, for v_cmp sgpr or vcc
154; abs,clamp
155; vsrc0: sgpr0-102,vcc,tba,ttmp0-11,m0,exec,
156; inline constant -16 to -64, fp inline immediate, vccz, execz,
157; scc, lds_direct
158; FIXME: really missing 1/pi? really 104 SGPRs
159;
160; vop3b - vector, three inputs, one vector output, one scalar output
161; vsrc0,vsrc1,vsrc2: same as vop3a vsrc0
162; vdst: vgpr0-255
163; sdst: sgpr0-103/vcc/tba/tma/ttmp0-11
164;
165; vop_sdwa - second dword for vop1/vop2/vopc for specifying sub-dword address
166; src0: vgpr0-255
167; dst_sel: BYTE_0-3, WORD_0-1, DWORD
168; dst_unused: UNUSED_PAD, UNUSED_SEXT, UNUSED_PRESERVE
169; clamp: true/false
170; src0_sel: BYTE_0-3, WORD_0-1, DWORD
171; flags: src0_sext, src0_neg, src0_abs, src1_sel, src1_sext, src1_neg,
172 ; src1_abs
173;
174; vop_dpp - second dword for vop1/vop2/vopc for specifying data-parallel ops
175; src0: vgpr0-255
176; dpp_ctrl: quad_perm, row_sl0-15, row_sr0-15, row_rr0-15, wf_sl1,
177; wf_rl1, wf_sr1, wf_rr1, row_mirror, row_half_mirror,
178; bcast15, bcast31
179; flags: src0_neg, src0_abs, src1_neg, src1_abs
180; bank_mask: 4-bit mask
181; row_mask: 4-bit mask
182;
183; ds - Local and global data share instructions.
184; offset0: 8-bit constant
185; offset1: 8-bit constant
186; flag: gds
187; addr: vgpr0-255
188; data0: vgpr0-255
189; data1: vgpr0-255
190; vdst: vgpr0-255
191;
192; mubuf - Untyped memory buffer operation. First word with LDS, second word
193; non-LDS.
194; offset: 12-bit constant
195; vaddr: vgpr0-255
196; vdata: vgpr0-255
197; srsrc: sgpr0-102
198; soffset: sgpr0-102
199; flags: offen, idxen, glc, lds, slc, tfe
200;
201; mtbuf - Typed memory buffer operation. Two words
202; offset: 12-bit constant
203; dfmt: 4-bit constant
204; nfmt: 3-bit constant
205; vaddr: vgpr0-255
206; vdata: vgpr0-255
207; srsrc: sgpr0-102
208; soffset: sgpr0-102
209; flags: offen, idxen, glc, lds, slc, tfe
210;
211; flat - flat or global memory operations
212; flags: glc, slc
213; addr: vgpr0-255
214; data: vgpr0-255
215; vdst: vgpr0-255
216;
217; mult - expands to multiple instructions (pseudo encoding)
218;
219; vmult - as mult, when a vector instruction is used.
220
221(define_attr "type"
222 "unknown,sop1,sop2,sopk,sopc,sopp,smem,ds,vop2,vop1,vopc,
223 vop3a,vop3b,vop_sdwa,vop_dpp,mubuf,mtbuf,flat,mult,vmult"
224 (const_string "unknown"))
225
226; Set if instruction is executed in scalar or vector unit
227
228(define_attr "unit" "unknown,scalar,vector"
229 (cond [(eq_attr "type" "sop1,sop2,sopk,sopc,sopp,smem,mult")
230 (const_string "scalar")
231 (eq_attr "type" "vop2,vop1,vopc,vop3a,vop3b,ds,
232 vop_sdwa,vop_dpp,flat,vmult")
233 (const_string "vector")]
234 (const_string "unknown")))
235
236; All vector instructions run as 64 threads as predicated by the EXEC
237; register. Scalar operations in vector register require a single lane
238; enabled, vector moves require a full set of lanes enabled, and most vector
239; operations handle the lane masking themselves.
240; The md_reorg pass is responsible for ensuring that EXEC is set appropriately
241; according to the following settings:
242; auto - md_reorg will inspect def/use to determine what to do.
243; none - exec is not needed.
244; single - disable all but lane zero.
245; full - enable all lanes.
246
247(define_attr "exec" "auto,none,single,full"
248 (const_string "auto"))
249
250; Infer the (worst-case) length from the instruction type by default. Many
251; types can have an optional immediate word following, which we include here.
252; "Multiple" types are counted as two 64-bit instructions. This is just a
253; default fallback: it can be overridden per-alternative in insn patterns for
254; greater accuracy.
255
256(define_attr "length" ""
257 (cond [(eq_attr "type" "sop1") (const_int 8)
258 (eq_attr "type" "sop2") (const_int 8)
259 (eq_attr "type" "sopk") (const_int 8)
260 (eq_attr "type" "sopc") (const_int 8)
261 (eq_attr "type" "sopp") (const_int 4)
262 (eq_attr "type" "smem") (const_int 8)
263 (eq_attr "type" "ds") (const_int 8)
264 (eq_attr "type" "vop1") (const_int 8)
265 (eq_attr "type" "vop2") (const_int 8)
266 (eq_attr "type" "vopc") (const_int 8)
267 (eq_attr "type" "vop3a") (const_int 8)
268 (eq_attr "type" "vop3b") (const_int 8)
269 (eq_attr "type" "vop_sdwa") (const_int 8)
270 (eq_attr "type" "vop_dpp") (const_int 8)
271 (eq_attr "type" "flat") (const_int 8)
272 (eq_attr "type" "mult") (const_int 16)
273 (eq_attr "type" "vmult") (const_int 16)]
274 (const_int 4)))
275
276; Disable alternatives that only apply to specific ISA variants.
277
278(define_attr "gcn_version" "gcn3,gcn5" (const_string "gcn3"))
279
280(define_attr "enabled" ""
281 (cond [(eq_attr "gcn_version" "gcn3") (const_int 1)
282 (and (eq_attr "gcn_version" "gcn5")
283 (ne (symbol_ref "TARGET_GCN5_PLUS") (const_int 0)))
284 (const_int 1)]
285 (const_int 0)))
286
287; We need to be able to identify v_readlane and v_writelane with
288; SGPR lane selection in order to handle "Manually Inserted Wait States".
289
290(define_attr "laneselect" "yes,no" (const_string "no"))
291
930c5599
AS
292; Identify instructions that require a "Manually Inserted Wait State" if
293; their inputs are overwritten by subsequent instructions.
294
295(define_attr "delayeduse" "yes,no" (const_string "no"))
296
3d6275e3
AS
297;; }}}
298;; {{{ Iterators useful across the wole machine description
299
300(define_mode_iterator SIDI [SI DI])
301(define_mode_iterator SFDF [SF DF])
302(define_mode_iterator SISF [SI SF])
303(define_mode_iterator QIHI [QI HI])
304(define_mode_iterator DIDF [DI DF])
03876953
AS
305(define_mode_iterator FP [HF SF DF])
306(define_mode_iterator FP_1REG [HF SF])
3d6275e3
AS
307
308;; }}}
309;; {{{ Attributes.
310
311; Translate RTX code into GCN instruction mnemonics with and without
312; suffixes such as _b32, etc.
313
314(define_code_attr mnemonic
315 [(minus "sub%i")
316 (plus "add%i")
317 (ashift "lshl%b")
318 (lshiftrt "lshr%b")
319 (ashiftrt "ashr%i")
320 (and "and%B")
321 (ior "or%B")
322 (xor "xor%B")
323 (mult "mul%i")
324 (smin "min%i")
325 (smax "max%i")
326 (umin "min%u")
327 (umax "max%u")
dc941ea9 328 (not "not%B")
3d6275e3
AS
329 (popcount "bcnt_u32%b")])
330
331(define_code_attr bare_mnemonic
332 [(plus "add")
333 (minus "sub")
334 (and "and")
335 (ior "or")
336 (xor "xor")])
337
338(define_code_attr s_mnemonic
339 [(not "not%b")
34bac264
AS
340 (popcount "bcnt1_i32%b")
341 (clz "flbit_i32%b")
0c06e46a
JB
342 (ctz "ff1_i32%b")
343 (clrsb "flbit_i32%i")])
3d6275e3
AS
344
345(define_code_attr revmnemonic
346 [(minus "subrev%i")
347 (ashift "lshlrev%b")
348 (lshiftrt "lshrrev%b")
349 (ashiftrt "ashrrev%i")])
350
351; Translate RTX code into corresponding expander name.
352
353(define_code_attr expander
354 [(and "and")
355 (ior "ior")
356 (xor "xor")
357 (plus "add")
358 (minus "sub")
359 (ashift "ashl")
360 (lshiftrt "lshr")
361 (ashiftrt "ashr")
362 (mult "mul")
363 (smin "smin")
364 (smax "smax")
365 (umin "umin")
366 (umax "umax")
367 (not "one_cmpl")
34bac264
AS
368 (popcount "popcount")
369 (clz "clz")
0e159efc
AS
370 (ctz "ctz")
371 (sign_extend "extend")
372 (zero_extend "zero_extend")])
3d6275e3
AS
373
374;; }}}
375;; {{{ Miscellaneous instructions
376
377(define_insn "nop"
378 [(const_int 0)]
379 ""
380 "s_nop\t0x0"
381 [(set_attr "type" "sopp")])
382
383; FIXME: What should the value of the immediate be? Zero is disallowed, so
384; pick 1 for now.
385(define_insn "trap"
386 [(trap_if (const_int 1) (const_int 0))]
387 ""
388 "s_trap\t1"
389 [(set_attr "type" "sopp")])
390
391;; }}}
392;; {{{ Moves
393
394;; All scalar modes we support moves in.
395(define_mode_iterator MOV_MODE [BI QI HI SI DI TI SF DF])
396
397; This is the entry point for creating all kinds of scalar moves,
398; including reloads and symbols.
399
400(define_expand "mov<mode>"
401 [(set (match_operand:MOV_MODE 0 "nonimmediate_operand")
402 (match_operand:MOV_MODE 1 "general_operand"))]
403 ""
404 {
82863a5c
AS
405 if (SUBREG_P (operands[1])
406 && GET_MODE (operands[1]) == SImode
407 && GET_MODE (SUBREG_REG (operands[1])) == BImode)
408 {
409 /* (reg:BI VCC) has nregs==2 to ensure it gets clobbered as a whole,
410 but (subreg:SI (reg:BI VCC)) doesn't, which causes the LRA liveness
411 checks to assert. Transform this:
412 (set (reg:SI) (subreg:SI (reg:BI)))
413 to this:
414 (set (subreg:BI (reg:SI)) (reg:BI)) */
415 operands[0] = gen_rtx_SUBREG (BImode, operands[0], 0);
416 operands[1] = SUBREG_REG (operands[1]);
417 }
418 if (SUBREG_P (operands[0])
419 && GET_MODE (operands[0]) == SImode
420 && GET_MODE (SUBREG_REG (operands[0])) == BImode)
421 {
422 /* Likewise, transform this:
423 (set (subreg:SI (reg:BI)) (reg:SI))
424 to this:
425 (set (reg:BI) (subreg:BI (reg:SI))) */
426 operands[0] = SUBREG_REG (operands[0]);
427 operands[1] = gen_rtx_SUBREG (BImode, operands[1], 0);
428 }
429
3d6275e3
AS
430 if (MEM_P (operands[0]))
431 operands[1] = force_reg (<MODE>mode, operands[1]);
432
433 if (!lra_in_progress && !reload_completed
434 && !gcn_valid_move_p (<MODE>mode, operands[0], operands[1]))
435 {
436 /* Something is probably trying to generate a move
437 which can only work indirectly.
438 E.g. Move from LDS memory to SGPR hardreg
439 or MEM:QI to SGPR. */
440 rtx tmpreg = gen_reg_rtx (<MODE>mode);
441 emit_insn (gen_mov<mode> (tmpreg, operands[1]));
442 emit_insn (gen_mov<mode> (operands[0], tmpreg));
443 DONE;
444 }
445
446 if (<MODE>mode == DImode
447 && (GET_CODE (operands[1]) == SYMBOL_REF
448 || GET_CODE (operands[1]) == LABEL_REF))
449 {
c2709ec4
AS
450 if (lra_in_progress)
451 emit_insn (gen_movdi_symbol_save_scc (operands[0], operands[1]));
452 else
453 emit_insn (gen_movdi_symbol (operands[0], operands[1]));
3d6275e3
AS
454 DONE;
455 }
456 })
457
458; Split invalid moves into two valid moves
459
460(define_split
461 [(set (match_operand:MOV_MODE 0 "nonimmediate_operand")
462 (match_operand:MOV_MODE 1 "general_operand"))]
463 "!reload_completed && !lra_in_progress
464 && !gcn_valid_move_p (<MODE>mode, operands[0], operands[1])"
465 [(set (match_dup 2) (match_dup 1))
466 (set (match_dup 0) (match_dup 2))]
467 {
468 operands[2] = gen_reg_rtx(<MODE>mode);
469 })
470
471; We need BImode move so we can reload flags registers.
472
473(define_insn "*movbi"
474 [(set (match_operand:BI 0 "nonimmediate_operand"
475 "=Sg, v,Sg,cs,cV,cV,Sm,RS, v,RF, v,RM")
476 (match_operand:BI 1 "gcn_load_operand"
477 "SSA,vSvA, v,SS, v,SS,RS,Sm,RF, v,RM, v"))]
478 ""
479 {
480 /* SCC as an operand is currently not accepted by the LLVM assembler, so
481 we emit bytes directly as a workaround. */
482 switch (which_alternative) {
483 case 0:
484 if (REG_P (operands[1]) && REGNO (operands[1]) == SCC_REG)
485 return "; s_mov_b32\t%0,%1 is not supported by the assembler.\;"
486 ".byte\t0xfd\;"
487 ".byte\t0x0\;"
488 ".byte\t0x80|%R0\;"
489 ".byte\t0xbe";
490 else
491 return "s_mov_b32\t%0, %1";
492 case 1:
493 if (REG_P (operands[1]) && REGNO (operands[1]) == SCC_REG)
494 return "; v_mov_b32\t%0, %1\;"
495 ".byte\t0xfd\;"
496 ".byte\t0x2\;"
497 ".byte\t((%V0<<1)&0xff)\;"
498 ".byte\t0x7e|(%V0>>7)";
499 else
500 return "v_mov_b32\t%0, %1";
501 case 2:
502 return "v_readlane_b32\t%0, %1, 0";
503 case 3:
504 return "s_cmpk_lg_u32\t%1, 0";
505 case 4:
506 return "v_cmp_ne_u32\tvcc, 0, %1";
507 case 5:
508 if (REGNO (operands[1]) == SCC_REG)
509 return "; s_mov_b32\t%0, %1 is not supported by the assembler.\;"
510 ".byte\t0xfd\;"
511 ".byte\t0x0\;"
512 ".byte\t0xea\;"
513 ".byte\t0xbe\;"
514 "s_mov_b32\tvcc_hi, 0";
515 else
516 return "s_mov_b32\tvcc_lo, %1\;"
517 "s_mov_b32\tvcc_hi, 0";
518 case 6:
519 return "s_load_dword\t%0, %A1\;s_waitcnt\tlgkmcnt(0)";
520 case 7:
930c5599 521 return "s_store_dword\t%1, %A0";
3d6275e3
AS
522 case 8:
523 return "flat_load_dword\t%0, %A1%O1%g1\;s_waitcnt\t0";
524 case 9:
930c5599 525 return "flat_store_dword\t%A0, %1%O0%g0";
3d6275e3
AS
526 case 10:
527 return "global_load_dword\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)";
528 case 11:
930c5599 529 return "global_store_dword\t%A0, %1%O0%g0";
3d6275e3
AS
530 default:
531 gcc_unreachable ();
532 }
533 }
534 [(set_attr "type" "sop1,vop1,vop3a,sopk,vopc,mult,smem,smem,flat,flat,
535 flat,flat")
536 (set_attr "exec" "*,*,none,*,*,*,*,*,*,*,*,*")
537 (set_attr "length" "4,4,4,4,4,8,12,12,12,12,12,12")])
538
539; 32bit move pattern
540
541(define_insn "*mov<mode>_insn"
542 [(set (match_operand:SISF 0 "nonimmediate_operand"
543 "=SD,SD,SD,SD,RB,Sm,RS,v,Sg, v, v,RF,v,RLRG, v,SD, v,RM")
544 (match_operand:SISF 1 "gcn_load_operand"
545 "SSA, J, B,RB,Sm,RS,Sm,v, v,Sv,RF, v,B, v,RLRG, Y,RM, v"))]
546 ""
547 "@
548 s_mov_b32\t%0, %1
549 s_movk_i32\t%0, %1
550 s_mov_b32\t%0, %1
551 s_buffer_load%s0\t%0, s[0:3], %1\;s_waitcnt\tlgkmcnt(0)
930c5599 552 s_buffer_store%s1\t%1, s[0:3], %0
3d6275e3 553 s_load_dword\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
930c5599 554 s_store_dword\t%1, %A0
3d6275e3
AS
555 v_mov_b32\t%0, %1
556 v_readlane_b32\t%0, %1, 0
557 v_writelane_b32\t%0, %1, 0
558 flat_load_dword\t%0, %A1%O1%g1\;s_waitcnt\t0
930c5599 559 flat_store_dword\t%A0, %1%O0%g0
3d6275e3 560 v_mov_b32\t%0, %1
e929d65b 561 ds_write_b32\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
3d6275e3
AS
562 ds_read_b32\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
563 s_mov_b32\t%0, %1
564 global_load_dword\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
930c5599 565 global_store_dword\t%A0, %1%O0%g0"
3d6275e3
AS
566 [(set_attr "type" "sop1,sopk,sop1,smem,smem,smem,smem,vop1,vop3a,vop3a,flat,
567 flat,vop1,ds,ds,sop1,flat,flat")
568 (set_attr "exec" "*,*,*,*,*,*,*,*,none,none,*,*,*,*,*,*,*,*")
569 (set_attr "length" "4,4,8,12,12,12,12,4,8,8,12,12,8,12,12,8,12,12")])
570
571; 8/16bit move pattern
aad32a00 572; TODO: implement combined load and zero_extend, but *only* for -msram-ecc=on
3d6275e3
AS
573
574(define_insn "*mov<mode>_insn"
575 [(set (match_operand:QIHI 0 "nonimmediate_operand"
576 "=SD,SD,SD,v,Sg, v, v,RF,v,RLRG, v, v,RM")
577 (match_operand:QIHI 1 "gcn_load_operand"
578 "SSA, J, B,v, v,Sv,RF, v,B, v,RLRG,RM, v"))]
579 "gcn_valid_move_p (<MODE>mode, operands[0], operands[1])"
580 "@
581 s_mov_b32\t%0, %1
582 s_movk_i32\t%0, %1
583 s_mov_b32\t%0, %1
584 v_mov_b32\t%0, %1
585 v_readlane_b32\t%0, %1, 0
586 v_writelane_b32\t%0, %1, 0
587 flat_load%o1\t%0, %A1%O1%g1\;s_waitcnt\t0
930c5599 588 flat_store%s0\t%A0, %1%O0%g0
3d6275e3 589 v_mov_b32\t%0, %1
e929d65b 590 ds_write%b0\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
3d6275e3
AS
591 ds_read%u1\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
592 global_load%o1\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
930c5599 593 global_store%s0\t%A0, %1%O0%g0"
3d6275e3
AS
594 [(set_attr "type"
595 "sop1,sopk,sop1,vop1,vop3a,vop3a,flat,flat,vop1,ds,ds,flat,flat")
596 (set_attr "exec" "*,*,*,*,none,none,*,*,*,*,*,*,*")
597 (set_attr "length" "4,4,8,4,4,4,12,12,8,12,12,12,12")])
598
599; 64bit move pattern
600
601(define_insn_and_split "*mov<mode>_insn"
602 [(set (match_operand:DIDF 0 "nonimmediate_operand"
603 "=SD,SD,SD,RS,Sm,v, v,Sg, v, v,RF,RLRG, v, v,RM")
604 (match_operand:DIDF 1 "general_operand"
605 "SSA, C,DB,Sm,RS,v,DB, v,Sv,RF, v, v,RLRG,RM, v"))]
606 "GET_CODE(operands[1]) != SYMBOL_REF"
607 "@
608 s_mov_b64\t%0, %1
609 s_mov_b64\t%0, %1
610 #
930c5599 611 s_store_dwordx2\t%1, %A0
3d6275e3
AS
612 s_load_dwordx2\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
613 #
614 #
615 #
616 #
617 flat_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\t0
930c5599 618 flat_store_dwordx2\t%A0, %1%O0%g0
e929d65b 619 ds_write_b64\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
3d6275e3
AS
620 ds_read_b64\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
621 global_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
930c5599 622 global_store_dwordx2\t%A0, %1%O0%g0"
01f5d5e8
AS
623 "reload_completed
624 && ((!MEM_P (operands[0]) && !MEM_P (operands[1])
625 && !gcn_sgpr_move_p (operands[0], operands[1]))
626 || (GET_CODE (operands[1]) == CONST_INT
627 && !gcn_constant64_p (operands[1])))"
3d6275e3
AS
628 [(set (match_dup 0) (match_dup 1))
629 (set (match_dup 2) (match_dup 3))]
630 {
631 rtx inlo = gen_lowpart (SImode, operands[1]);
632 rtx inhi = gen_highpart_mode (SImode, <MODE>mode, operands[1]);
633 rtx outlo = gen_lowpart (SImode, operands[0]);
634 rtx outhi = gen_highpart_mode (SImode, <MODE>mode, operands[0]);
635
636 /* Ensure that overlapping registers aren't corrupted. */
ccf93cd0 637 if (reg_overlap_mentioned_p (outlo, inhi))
3d6275e3
AS
638 {
639 operands[0] = outhi;
640 operands[1] = inhi;
641 operands[2] = outlo;
642 operands[3] = inlo;
643 }
644 else
645 {
646 operands[0] = outlo;
647 operands[1] = inlo;
648 operands[2] = outhi;
649 operands[3] = inhi;
650 }
651 }
652 [(set_attr "type" "sop1,sop1,mult,smem,smem,vmult,vmult,vmult,vmult,flat,
653 flat,ds,ds,flat,flat")
654 (set_attr "length" "4,8,*,12,12,*,*,*,*,12,12,12,12,12,12")])
655
656; 128-bit move.
657
658(define_insn_and_split "*movti_insn"
659 [(set (match_operand:TI 0 "nonimmediate_operand"
660 "=SD,RS,Sm,RF, v,v, v,SD,RM, v,RL, v")
661 (match_operand:TI 1 "general_operand"
662 "SSB,Sm,RS, v,RF,v,Sv, v, v,RM, v,RL"))]
663 ""
664 "@
665 #
930c5599 666 s_store_dwordx4\t%1, %A0
3d6275e3 667 s_load_dwordx4\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
930c5599 668 flat_store_dwordx4\t%A0, %1%O0%g0
3d6275e3
AS
669 flat_load_dwordx4\t%0, %A1%O1%g1\;s_waitcnt\t0
670 #
671 #
672 #
930c5599 673 global_store_dwordx4\t%A0, %1%O0%g0
3d6275e3 674 global_load_dwordx4\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
e929d65b 675 ds_write_b128\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
3d6275e3
AS
676 ds_read_b128\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)"
677 "reload_completed
678 && REG_P (operands[0])
679 && (REG_P (operands[1]) || GET_CODE (operands[1]) == CONST_INT)"
680 [(set (match_dup 0) (match_dup 1))
681 (set (match_dup 2) (match_dup 3))
682 (set (match_dup 4) (match_dup 5))
683 (set (match_dup 6) (match_dup 7))]
684 {
8ae0de56
AS
685 gcc_assert (rtx_equal_p (operands[0], operands[1])
686 || !reg_overlap_mentioned_p (operands[0], operands[1]));
3d6275e3
AS
687 operands[6] = gcn_operand_part (TImode, operands[0], 3);
688 operands[7] = gcn_operand_part (TImode, operands[1], 3);
689 operands[4] = gcn_operand_part (TImode, operands[0], 2);
690 operands[5] = gcn_operand_part (TImode, operands[1], 2);
691 operands[2] = gcn_operand_part (TImode, operands[0], 1);
692 operands[3] = gcn_operand_part (TImode, operands[1], 1);
693 operands[0] = gcn_operand_part (TImode, operands[0], 0);
694 operands[1] = gcn_operand_part (TImode, operands[1], 0);
695 }
696 [(set_attr "type" "mult,smem,smem,flat,flat,vmult,vmult,vmult,flat,flat,\
697 ds,ds")
b2c113ae 698 (set_attr "delayeduse" "*,*,yes,*,*,*,*,*,yes,*,*,*")
3d6275e3
AS
699 (set_attr "length" "*,12,12,12,12,*,*,*,12,12,12,12")])
700
701;; }}}
702;; {{{ Prologue/Epilogue
703
704(define_insn "prologue_use"
705 [(unspec_volatile [(match_operand 0)] UNSPECV_PROLOGUE_USE)]
706 ""
707 ""
708 [(set_attr "length" "0")])
709
710(define_expand "prologue"
711 [(const_int 0)]
712 ""
713 {
714 gcn_expand_prologue ();
715 DONE;
716 })
717
718(define_expand "epilogue"
719 [(const_int 0)]
720 ""
721 {
722 gcn_expand_epilogue ();
723 DONE;
724 })
725
726;; }}}
727;; {{{ Control flow
728
729; This pattern must satisfy simplejump_p, which means it cannot be a parallel
730; that clobbers SCC. Thus, we must preserve SCC if we're generating a long
731; branch sequence.
732
733(define_insn "jump"
734 [(set (pc)
735 (label_ref (match_operand 0)))]
736 ""
737 {
738 if (get_attr_length (insn) == 4)
739 return "s_branch\t%0";
740 else
741 /* !!! This sequence clobbers EXEC_SAVE_REG and CC_SAVE_REG. */
742 return "; s_mov_b32\ts22, scc is not supported by the assembler.\;"
743 ".long\t0xbe9600fd\;"
744 "s_getpc_b64\ts[20:21]\;"
745 "s_add_u32\ts20, s20, %0@rel32@lo+4\;"
746 "s_addc_u32\ts21, s21, %0@rel32@hi+4\;"
747 "s_cmpk_lg_u32\ts22, 0\;"
748 "s_setpc_b64\ts[20:21]";
749 }
750 [(set_attr "type" "sopp")
751 (set (attr "length")
752 (if_then_else (and (ge (minus (match_dup 0) (pc))
753 (const_int -131072))
754 (lt (minus (match_dup 0) (pc))
755 (const_int 131072)))
756 (const_int 4)
757 (const_int 32)))])
758
759(define_insn "indirect_jump"
760 [(set (pc)
761 (match_operand:DI 0 "register_operand" "Sg"))]
762 ""
763 "s_setpc_b64\t%0"
764 [(set_attr "type" "sop1")
765 (set_attr "length" "4")])
766
767(define_insn "cjump"
768 [(set (pc)
769 (if_then_else
770 (match_operator:BI 1 "gcn_conditional_operator"
771 [(match_operand:BI 2 "gcn_conditional_register_operand" "ca,cV")
772 (const_int 0)])
773 (label_ref (match_operand 0))
774 (pc)))]
775 ""
776 {
777 if (get_attr_length (insn) == 4)
778 return "s_cbranch%C1\t%0";
779 else
780 {
781 /* !!! This sequence clobbers EXEC_SAVE_REG and CC_SAVE_REG but
782 restores SCC. */
783 if (REGNO (operands[2]) == SCC_REG)
784 {
785 if (GET_CODE (operands[1]) == EQ)
786 return "s_cbranch%c1\t.Lskip%=\;"
787 "s_getpc_b64\ts[20:21]\;"
788 "s_add_u32\ts20, s20, %0@rel32@lo+4\;"
789 "s_addc_u32\ts21, s21, %0@rel32@hi+4\;"
790 "s_cmp_lg_u32\t0, 0\;"
791 "s_setpc_b64\ts[20:21]\n"
792 ".Lskip%=:";
793 else
794 return "s_cbranch%c1\t.Lskip%=\;"
795 "s_getpc_b64\ts[20:21]\;"
796 "s_add_u32\ts20, s20, %0@rel32@lo+4\;"
797 "s_addc_u32\ts21, s21, %0@rel32@hi+4\;"
798 "s_cmp_eq_u32\t0, 0\;"
799 "s_setpc_b64\ts[20:21]\n"
800 ".Lskip%=:";
801 }
802 else
803 return "s_cbranch%c1\t.Lskip%=\;"
804 "; s_mov_b32\ts22, scc is not supported by the assembler.\;"
805 ".byte\t0xfd\;"
806 ".byte\t0x0\;"
807 ".byte\t0x80|22\;"
808 ".byte\t0xbe\;"
809 "s_getpc_b64\ts[20:21]\;"
810 "s_add_u32\ts20, s20, %0@rel32@lo+4\;"
811 "s_addc_u32\ts21, s21, %0@rel32@hi+4\;"
812 "s_cmpk_lg_u32\ts22, 0\;"
813 "s_setpc_b64\ts[20:21]\n"
814 ".Lskip%=:";
815 }
816 }
817 [(set_attr "type" "sopp")
818 (set (attr "length")
819 (if_then_else (and (ge (minus (match_dup 0) (pc))
820 (const_int -131072))
821 (lt (minus (match_dup 0) (pc))
822 (const_int 131072)))
823 (const_int 4)
824 (const_int 36)))])
825
826; Returning from a normal function is different to returning from a
827; kernel function.
828
829(define_insn "gcn_return"
830 [(return)]
831 ""
832 {
833 if (cfun && cfun->machine && cfun->machine->normal_function)
834 return "s_setpc_b64\ts[18:19]";
835 else
e8daba7e 836 return "s_waitcnt\tlgkmcnt(0)\;s_dcache_wb\;s_endpgm";
3d6275e3
AS
837 }
838 [(set_attr "type" "sop1")
e8daba7e 839 (set_attr "length" "12")])
3d6275e3
AS
840
841(define_expand "call"
842 [(parallel [(call (match_operand 0 "")
843 (match_operand 1 ""))
844 (clobber (reg:DI LR_REGNUM))
845 (clobber (match_scratch:DI 2))])]
846 ""
847 {})
848
849(define_insn "gcn_simple_call"
850 [(call (mem (match_operand 0 "immediate_operand" "Y,B"))
851 (match_operand 1 "const_int_operand"))
852 (clobber (reg:DI LR_REGNUM))
853 (clobber (match_scratch:DI 2 "=&Sg,X"))]
854 ""
855 "@
856 s_getpc_b64\t%2\;s_add_u32\t%L2, %L2, %0@rel32@lo+4\;s_addc_u32\t%H2, %H2, %0@rel32@hi+4\;s_swappc_b64\ts[18:19], %2
857 s_swappc_b64\ts[18:19], %0"
858 [(set_attr "type" "mult,sop1")
859 (set_attr "length" "24,4")])
860
861(define_insn "movdi_symbol"
862 [(set (match_operand:DI 0 "nonimmediate_operand" "=Sg")
863 (match_operand:DI 1 "general_operand" "Y"))
864 (clobber (reg:BI SCC_REG))]
865 "GET_CODE (operands[1]) == SYMBOL_REF || GET_CODE (operands[1]) == LABEL_REF"
866 {
867 if (SYMBOL_REF_P (operands[1])
868 && SYMBOL_REF_WEAK (operands[1]))
869 return "s_getpc_b64\t%0\;"
870 "s_add_u32\t%L0, %L0, %1@gotpcrel32@lo+4\;"
871 "s_addc_u32\t%H0, %H0, %1@gotpcrel32@hi+4\;"
872 "s_load_dwordx2\t%0, %0\;"
873 "s_waitcnt\tlgkmcnt(0)";
874
875 return "s_getpc_b64\t%0\;"
876 "s_add_u32\t%L0, %L0, %1@rel32@lo+4\;"
877 "s_addc_u32\t%H0, %H0, %1@rel32@hi+4";
878 }
879 [(set_attr "type" "mult")
880 (set_attr "length" "32")])
881
8108da8a 882(define_insn "movdi_symbol_save_scc"
c2709ec4
AS
883 [(set (match_operand:DI 0 "nonimmediate_operand" "=Sg")
884 (match_operand:DI 1 "general_operand" "Y"))
885 (clobber (reg:BI CC_SAVE_REG))]
8108da8a 886 "(GET_CODE (operands[1]) == SYMBOL_REF || GET_CODE (operands[1]) == LABEL_REF)
c2709ec4 887 && (lra_in_progress || reload_completed)"
8108da8a
AS
888 {
889 /* !!! These sequences clobber CC_SAVE_REG. */
890
891 if (SYMBOL_REF_P (operands[1])
892 && SYMBOL_REF_WEAK (operands[1]))
893 return "; s_mov_b32\ts22, scc is not supported by the assembler.\;"
894 ".long\t0xbe9600fd\;"
895 "s_getpc_b64\t%0\;"
896 "s_add_u32\t%L0, %L0, %1@gotpcrel32@lo+4\;"
897 "s_addc_u32\t%H0, %H0, %1@gotpcrel32@hi+4\;"
898 "s_load_dwordx2\t%0, %0\;"
899 "s_cmpk_lg_u32\ts22, 0\;"
900 "s_waitcnt\tlgkmcnt(0)";
901
902 return "; s_mov_b32\ts22, scc is not supported by the assembler.\;"
903 ".long\t0xbe9600fd\;"
904 "s_getpc_b64\t%0\;"
905 "s_add_u32\t%L0, %L0, %1@rel32@lo+4\;"
906 "s_addc_u32\t%H0, %H0, %1@rel32@hi+4\;"
907 "s_cmpk_lg_u32\ts22, 0";
908 }
909 [(set_attr "type" "mult")
910 (set_attr "length" "40")])
911
c2709ec4 912
3d6275e3
AS
913(define_insn "gcn_indirect_call"
914 [(call (mem (match_operand:DI 0 "register_operand" "Sg"))
915 (match_operand 1 "" ""))
916 (clobber (reg:DI LR_REGNUM))
917 (clobber (match_scratch:DI 2 "=X"))]
918 ""
919 "s_swappc_b64\ts[18:19], %0"
920 [(set_attr "type" "sop1")
921 (set_attr "length" "4")])
922
923(define_expand "call_value"
924 [(parallel [(set (match_operand 0 "")
925 (call (match_operand 1 "")
926 (match_operand 2 "")))
927 (clobber (reg:DI LR_REGNUM))
928 (clobber (match_scratch:DI 3))])]
929 ""
930 {})
931
932(define_insn "gcn_call_value"
933 [(set (match_operand 0 "register_operand" "=Sg,Sg")
934 (call (mem (match_operand 1 "immediate_operand" "Y,B"))
935 (match_operand 2 "const_int_operand")))
936 (clobber (reg:DI LR_REGNUM))
937 (clobber (match_scratch:DI 3 "=&Sg,X"))]
938 ""
939 "@
940 s_getpc_b64\t%3\;s_add_u32\t%L3, %L3, %1@rel32@lo+4\;s_addc_u32\t%H3, %H3, %1@rel32@hi+4\;s_swappc_b64\ts[18:19], %3
941 s_swappc_b64\ts[18:19], %1"
942 [(set_attr "type" "sop1")
943 (set_attr "length" "24")])
944
945(define_insn "gcn_call_value_indirect"
946 [(set (match_operand 0 "register_operand" "=Sg")
947 (call (mem (match_operand:DI 1 "register_operand" "Sg"))
948 (match_operand 2 "" "")))
949 (clobber (reg:DI LR_REGNUM))
950 (clobber (match_scratch:DI 3 "=X"))]
951 ""
952 "s_swappc_b64\ts[18:19], %1"
953 [(set_attr "type" "sop1")
954 (set_attr "length" "4")])
955
956; GCN does not have an instruction to clear only part of the instruction
957; cache, so the operands are ignored.
958
959(define_insn "clear_icache"
960 [(unspec_volatile
961 [(match_operand 0 "") (match_operand 1 "")]
962 UNSPECV_ICACHE_INV)]
963 ""
964 "s_icache_inv"
965 [(set_attr "type" "sopp")
966 (set_attr "length" "4")])
967
968;; }}}
969;; {{{ Conditionals
970
971; 32-bit compare, scalar unit only
972
973(define_insn "cstoresi4"
974 [(set (match_operand:BI 0 "gcn_conditional_register_operand"
975 "=cs, cs, cs, cs")
976 (match_operator:BI 1 "gcn_compare_operator"
977 [(match_operand:SI 2 "gcn_alu_operand" "SSA,SSA,SSB, SS")
978 (match_operand:SI 3 "gcn_alu_operand" "SSA,SSL, SS,SSB")]))]
979 ""
980 "@
981 s_cmp%D1\t%2, %3
982 s_cmpk%D1\t%2, %3
983 s_cmp%D1\t%2, %3
984 s_cmp%D1\t%2, %3"
985 [(set_attr "type" "sopc,sopk,sopk,sopk")
986 (set_attr "length" "4,4,8,8")])
987
988(define_expand "cbranchsi4"
989 [(match_operator 0 "gcn_compare_operator"
990 [(match_operand:SI 1 "gcn_alu_operand")
991 (match_operand:SI 2 "gcn_alu_operand")])
992 (match_operand 3)]
993 ""
994 {
995 rtx cc = gen_reg_rtx (BImode);
996 emit_insn (gen_cstoresi4 (cc, operands[0], operands[1], operands[2]));
997 emit_jump_insn (gen_cjump (operands[3],
998 gen_rtx_NE (BImode, cc, const0_rtx), cc));
999 DONE;
1000 })
1001
1002; 64-bit compare; either unit, but scalar allows limited operators
1003
1004(define_expand "cstoredi4"
1005 [(set (match_operand:BI 0 "gcn_conditional_register_operand")
1006 (match_operator:BI 1 "gcn_compare_operator"
1007 [(match_operand:DI 2 "gcn_alu_operand")
1008 (match_operand:DI 3 "gcn_alu_operand")]))]
1009 ""
1010 {})
1011
1012(define_insn "cstoredi4_vec_and_scalar"
1013 [(set (match_operand:BI 0 "gcn_conditional_register_operand" "= cs, cV")
1014 (match_operator:BI 1 "gcn_compare_64bit_operator"
1015 [(match_operand:DI 2 "gcn_alu_operand" "%SSA,vSvC")
1016 (match_operand:DI 3 "gcn_alu_operand" " SSC, v")]))]
1017 ""
1018 "@
1019 s_cmp%D1\t%2, %3
1020 v_cmp%E1\tvcc, %2, %3"
1021 [(set_attr "type" "sopc,vopc")
1022 (set_attr "length" "8")])
1023
1024(define_insn "cstoredi4_vector"
1025 [(set (match_operand:BI 0 "gcn_conditional_register_operand" "= cV")
1026 (match_operator:BI 1 "gcn_compare_operator"
1027 [(match_operand:DI 2 "gcn_alu_operand" "vSvB")
1028 (match_operand:DI 3 "gcn_alu_operand" " v")]))]
1029 ""
1030 "v_cmp%E1\tvcc, %2, %3"
1031 [(set_attr "type" "vopc")
1032 (set_attr "length" "8")])
1033
1034(define_expand "cbranchdi4"
1035 [(match_operator 0 "gcn_compare_operator"
1036 [(match_operand:DI 1 "gcn_alu_operand")
1037 (match_operand:DI 2 "gcn_alu_operand")])
1038 (match_operand 3)]
1039 ""
1040 {
1041 rtx cc = gen_reg_rtx (BImode);
1042 emit_insn (gen_cstoredi4 (cc, operands[0], operands[1], operands[2]));
1043 emit_jump_insn (gen_cjump (operands[3],
1044 gen_rtx_NE (BImode, cc, const0_rtx), cc));
1045 DONE;
1046 })
1047
1048; FP compare; vector unit only
1049
1050(define_insn "cstore<mode>4"
1051 [(set (match_operand:BI 0 "gcn_conditional_register_operand" "=cV")
1052 (match_operator:BI 1 "gcn_fp_compare_operator"
1053 [(match_operand:SFDF 2 "gcn_alu_operand" "vB")
1054 (match_operand:SFDF 3 "gcn_alu_operand" "v")]))]
1055 ""
1056 "v_cmp%E1\tvcc, %2, %3"
1057 [(set_attr "type" "vopc")
1058 (set_attr "length" "8")])
1059
1060(define_expand "cbranch<mode>4"
1061 [(match_operator 0 "gcn_fp_compare_operator"
1062 [(match_operand:SFDF 1 "gcn_alu_operand")
1063 (match_operand:SFDF 2 "gcn_alu_operand")])
1064 (match_operand 3)]
1065 ""
1066 {
1067 rtx cc = gen_reg_rtx (BImode);
1068 emit_insn (gen_cstore<mode>4 (cc, operands[0], operands[1], operands[2]));
1069 emit_jump_insn (gen_cjump (operands[3],
1070 gen_rtx_NE (BImode, cc, const0_rtx), cc));
1071 DONE;
1072 })
1073
1074;; }}}
1075;; {{{ ALU special cases: Plus
1076
1077(define_insn "addsi3"
1078 [(set (match_operand:SI 0 "register_operand" "= Sg, Sg, Sg, v")
1079 (plus:SI (match_operand:SI 1 "gcn_alu_operand" "%SgA, 0,SgA, v")
1080 (match_operand:SI 2 "gcn_alu_operand" " SgA,SgJ, B,vBSv")))
1081 (clobber (match_scratch:BI 3 "= cs, cs, cs, X"))
1082 (clobber (match_scratch:DI 4 "= X, X, X, cV"))]
1083 ""
1084 "@
1085 s_add_i32\t%0, %1, %2
1086 s_addk_i32\t%0, %2
1087 s_add_i32\t%0, %1, %2
1088 v_add%^_u32\t%0, vcc, %2, %1"
1089 [(set_attr "type" "sop2,sopk,sop2,vop2")
1090 (set_attr "length" "4,4,8,8")])
1091
1092(define_expand "addsi3_scc"
1093 [(parallel [(set (match_operand:SI 0 "register_operand")
1094 (plus:SI (match_operand:SI 1 "gcn_alu_operand")
1095 (match_operand:SI 2 "gcn_alu_operand")))
1096 (clobber (reg:BI SCC_REG))
1097 (clobber (scratch:DI))])]
1098 ""
1099 {})
1100
1101; Having this as an insn_and_split allows us to keep together DImode adds
1102; through some RTL optimisation passes, and means the CC reg we set isn't
1103; dependent on the constraint alternative (which doesn't seem to work well).
1104
3d6275e3
AS
1105; If v_addc_u32 is used to add with carry, a 32-bit literal constant cannot be
1106; used as an operand due to the read of VCC, so we restrict constants to the
1107; inlinable range for that alternative.
1108
1109(define_insn_and_split "adddi3"
3abfd4f3
AS
1110 [(set (match_operand:DI 0 "register_operand" "=Sg, v")
1111 (plus:DI (match_operand:DI 1 "register_operand" " Sg, v")
1112 (match_operand:DI 2 "nonmemory_operand" "SgB,vA")))
1113 (clobber (match_scratch:BI 3 "=cs, X"))
1114 (clobber (match_scratch:DI 4 "= X,cV"))]
3d6275e3
AS
1115 ""
1116 "#"
1117 "&& reload_completed"
1118 [(const_int 0)]
1119 {
1120 rtx cc = gen_rtx_REG (BImode, gcn_vgpr_register_operand (operands[1],
1121 DImode)
1122 ? VCC_REG : SCC_REG);
1123
1124 emit_insn (gen_addsi3_scalar_carry
1125 (gcn_operand_part (DImode, operands[0], 0),
1126 gcn_operand_part (DImode, operands[1], 0),
1127 gcn_operand_part (DImode, operands[2], 0),
1128 cc));
1129 rtx val = gcn_operand_part (DImode, operands[2], 1);
1130 if (val != const0_rtx)
1131 emit_insn (gen_addcsi3_scalar
1132 (gcn_operand_part (DImode, operands[0], 1),
1133 gcn_operand_part (DImode, operands[1], 1),
1134 gcn_operand_part (DImode, operands[2], 1),
1135 cc, cc));
1136 else
1137 emit_insn (gen_addcsi3_scalar_zero
1138 (gcn_operand_part (DImode, operands[0], 1),
1139 gcn_operand_part (DImode, operands[1], 1),
1140 cc));
1141 DONE;
1142 }
3abfd4f3 1143 [(set_attr "type" "mult,vmult")
3d6275e3
AS
1144 (set_attr "length" "8")])
1145
1146(define_expand "adddi3_scc"
1147 [(parallel [(set (match_operand:DI 0 "register_operand")
1148 (plus:DI (match_operand:DI 1 "register_operand")
1149 (match_operand:DI 2 "nonmemory_operand")))
1150 (clobber (reg:BI SCC_REG))
1151 (clobber (scratch:DI))])]
1152 ""
1153 {})
1154
1155;; Add with carry.
1156
1157(define_insn "addsi3_scalar_carry"
1158 [(set (match_operand:SI 0 "register_operand" "= Sg, v")
1159 (plus:SI (match_operand:SI 1 "gcn_alu_operand" "%SgA, v")
1160 (match_operand:SI 2 "gcn_alu_operand" " SgB,vB")))
1161 (set (match_operand:BI 3 "register_operand" "= cs,cV")
1162 (ltu:BI (plus:SI (match_dup 1)
1163 (match_dup 2))
1164 (match_dup 1)))]
1165 ""
1166 "@
1167 s_add_u32\t%0, %1, %2
1168 v_add%^_u32\t%0, vcc, %2, %1"
1169 [(set_attr "type" "sop2,vop2")
1170 (set_attr "length" "8,8")])
1171
1172(define_insn "addsi3_scalar_carry_cst"
1173 [(set (match_operand:SI 0 "register_operand" "=Sg, v")
1174 (plus:SI (match_operand:SI 1 "gcn_alu_operand" "SgA, v")
1175 (match_operand:SI 2 "const_int_operand" " n, n")))
1176 (set (match_operand:BI 4 "register_operand" "=cs,cV")
1177 (geu:BI (plus:SI (match_dup 1)
1178 (match_dup 2))
1179 (match_operand:SI 3 "const_int_operand" " n, n")))]
1180 "INTVAL (operands[2]) == -INTVAL (operands[3])"
1181 "@
1182 s_add_u32\t%0, %1, %2
1183 v_add%^_u32\t%0, vcc, %2, %1"
1184 [(set_attr "type" "sop2,vop2")
1185 (set_attr "length" "4")])
1186
1187(define_insn "addcsi3_scalar"
1188 [(set (match_operand:SI 0 "register_operand" "= Sg, v")
1189 (plus:SI (plus:SI (zero_extend:SI
1190 (match_operand:BI 3 "register_operand" "= cs,cV"))
1191 (match_operand:SI 1 "gcn_alu_operand" "%SgA, v"))
1192 (match_operand:SI 2 "gcn_alu_operand" " SgB,vA")))
1193 (set (match_operand:BI 4 "register_operand" "= 3, 3")
1194 (ior:BI (ltu:BI (plus:SI
1195 (plus:SI
1196 (zero_extend:SI (match_dup 3))
1197 (match_dup 1))
1198 (match_dup 2))
1199 (match_dup 2))
1200 (ltu:BI (plus:SI (zero_extend:SI (match_dup 3)) (match_dup 1))
1201 (match_dup 1))))]
1202 ""
1203 "@
1204 s_addc_u32\t%0, %1, %2
1205 v_addc%^_u32\t%0, vcc, %2, %1, vcc"
1206 [(set_attr "type" "sop2,vop2")
1207 (set_attr "length" "8,4")])
1208
1209(define_insn "addcsi3_scalar_zero"
1210 [(set (match_operand:SI 0 "register_operand" "=Sg, v")
1211 (plus:SI (zero_extend:SI
1212 (match_operand:BI 2 "register_operand" "=cs,cV"))
1213 (match_operand:SI 1 "gcn_alu_operand" "SgA, v")))
1214 (set (match_dup 2)
1215 (ltu:BI (plus:SI (zero_extend:SI (match_dup 2))
1216 (match_dup 1))
1217 (match_dup 1)))]
1218 ""
1219 "@
1220 s_addc_u32\t%0, %1, 0
1221 v_addc%^_u32\t%0, vcc, 0, %1, vcc"
1222 [(set_attr "type" "sop2,vop2")
1223 (set_attr "length" "4")])
1224
1225; "addptr" is the same as "add" except that it must not write to VCC or SCC
1226; as a side-effect. Unfortunately GCN does not have a suitable instruction
76136f7f
AS
1227; for this, so we use CC_SAVE_REG as a temp.
1228; Note that it is not safe to save/clobber/restore as separate insns because
1229; doing so will break data-flow analysis, so this must use multiple
1230; instructions in one insn.
3abfd4f3
AS
1231;
1232; The "v0" should be just "v", but somehow the "0" helps LRA not loop forever
1233; on testcase pr54713-2.c with -O0. It's only an optimization hint anyway.
76136f7f
AS
1234;
1235; The SGPR alternative is preferred as it is typically used with mov_sgprbase.
3d6275e3
AS
1236
1237(define_insn "addptrdi3"
76136f7f
AS
1238 [(set (match_operand:DI 0 "register_operand" "= v, Sg")
1239 (unspec:DI [
1240 (plus:DI (match_operand:DI 1 "register_operand" "^v0,Sg0")
1241 (match_operand:DI 2 "nonmemory_operand" "vDA,SgDB"))]
1242 UNSPEC_ADDPTR))]
3d6275e3
AS
1243 ""
1244 {
76136f7f
AS
1245 if (which_alternative == 0)
1246 {
1247 rtx new_operands[4] = { operands[0], operands[1], operands[2],
1248 gen_rtx_REG (DImode, CC_SAVE_REG) };
3d6275e3 1249
76136f7f
AS
1250 output_asm_insn ("v_add%^_u32\t%L0, %3, %L2, %L1", new_operands);
1251 output_asm_insn ("v_addc%^_u32\t%H0, %3, %H2, %H1, %3", new_operands);
1252 }
1253 else
1254 {
1255 rtx new_operands[4] = { operands[0], operands[1], operands[2],
1256 gen_rtx_REG (BImode, CC_SAVE_REG) };
1257
1258 output_asm_insn ("s_mov_b32\t%3, scc", new_operands);
1259 output_asm_insn ("s_add_u32\t%L0, %L1, %L2", new_operands);
1260 output_asm_insn ("s_addc_u32\t%H0, %H1, %H2", new_operands);
1261 output_asm_insn ("s_cmpk_lg_u32\t%3, 0", new_operands);
1262 }
3d6275e3
AS
1263
1264 return "";
1265 }
76136f7f
AS
1266 [(set_attr "type" "vmult,mult")
1267 (set_attr "length" "16,24")])
3d6275e3
AS
1268
1269;; }}}
1270;; {{{ ALU special cases: Minus
1271
1272(define_insn "subsi3"
1273 [(set (match_operand:SI 0 "register_operand" "=Sg, Sg, v, v")
1274 (minus:SI (match_operand:SI 1 "gcn_alu_operand" "SgA,SgA, v,vBSv")
1275 (match_operand:SI 2 "gcn_alu_operand" "SgA, B, vBSv, v")))
1276 (clobber (match_scratch:BI 3 "=cs, cs, X, X"))
1277 (clobber (match_scratch:DI 4 "= X, X, cV, cV"))]
1278 ""
1279 "@
1280 s_sub_i32\t%0, %1, %2
1281 s_sub_i32\t%0, %1, %2
1282 v_subrev%^_u32\t%0, vcc, %2, %1
1283 v_sub%^_u32\t%0, vcc, %1, %2"
1284 [(set_attr "type" "sop2,sop2,vop2,vop2")
1285 (set_attr "length" "4,8,8,8")])
1286
1287(define_insn_and_split "subdi3"
1288 [(set (match_operand:DI 0 "register_operand" "=Sg, Sg")
1289 (minus:DI
1290 (match_operand:DI 1 "gcn_alu_operand" "SgA,SgB")
1291 (match_operand:DI 2 "gcn_alu_operand" "SgB,SgA")))
1292 (clobber (reg:BI SCC_REG))]
1293 ""
1294 "#"
1295 "reload_completed"
1296 [(const_int 0)]
1297 {
1298 emit_insn (gen_subsi3_scalar_carry
1299 (gcn_operand_part (DImode, operands[0], 0),
1300 gcn_operand_part (DImode, operands[1], 0),
1301 gcn_operand_part (DImode, operands[2], 0)));
1302 rtx val = gcn_operand_part (DImode, operands[2], 1);
1303 if (val != const0_rtx)
1304 emit_insn (gen_subcsi3_scalar
1305 (gcn_operand_part (DImode, operands[0], 1),
1306 gcn_operand_part (DImode, operands[1], 1),
1307 gcn_operand_part (DImode, operands[2], 1)));
1308 else
1309 emit_insn (gen_subcsi3_scalar_zero
1310 (gcn_operand_part (DImode, operands[0], 1),
1311 gcn_operand_part (DImode, operands[1], 1)));
1312 DONE;
1313 }
1314 [(set_attr "length" "8")])
1315
1316(define_insn "subsi3_scalar_carry"
1317 [(set (match_operand:SI 0 "register_operand" "=Sg, Sg")
1318 (minus:SI (match_operand:SI 1 "gcn_alu_operand" "SgA,SgB")
1319 (match_operand:SI 2 "gcn_alu_operand" "SgB,SgA")))
1320 (set (reg:BI SCC_REG)
1321 (gtu:BI (minus:SI (match_dup 1)
1322 (match_dup 2))
1323 (match_dup 1)))]
1324 ""
1325 "s_sub_u32\t%0, %1, %2"
1326 [(set_attr "type" "sop2")
1327 (set_attr "length" "8")])
1328
1329(define_insn "subsi3_scalar_carry_cst"
1330 [(set (match_operand:SI 0 "register_operand" "=Sg")
1331 (minus:SI (match_operand:SI 1 "gcn_alu_operand" "SgA")
1332 (match_operand:SI 2 "const_int_operand" " n")))
1333 (set (reg:BI SCC_REG)
1334 (leu:BI (minus:SI (match_dup 1)
1335 (match_dup 2))
1336 (match_operand:SI 3 "const_int_operand" " n")))]
1337 "INTVAL (operands[2]) == -INTVAL (operands[3])"
1338 "s_sub_u32\t%0, %1, %2"
1339 [(set_attr "type" "sop2")
1340 (set_attr "length" "4")])
1341
1342(define_insn "subcsi3_scalar"
1343 [(set (match_operand:SI 0 "register_operand" "=Sg, Sg")
1344 (minus:SI (minus:SI (zero_extend:SI (reg:BI SCC_REG))
1345 (match_operand:SI 1 "gcn_alu_operand" "SgA,SgB"))
1346 (match_operand:SI 2 "gcn_alu_operand" "SgB,SgA")))
1347 (set (reg:BI SCC_REG)
1348 (ior:BI (gtu:BI (minus:SI (minus:SI (zero_extend:SI (reg:BI SCC_REG))
1349 (match_dup 1))
1350 (match_dup 2))
1351 (match_dup 1))
1352 (gtu:BI (minus:SI (zero_extend:SI (reg:BI SCC_REG))
1353 (match_dup 1))
1354 (match_dup 1))))]
1355 ""
1356 "s_subb_u32\t%0, %1, %2"
1357 [(set_attr "type" "sop2")
1358 (set_attr "length" "8")])
1359
1360(define_insn "subcsi3_scalar_zero"
1361 [(set (match_operand:SI 0 "register_operand" "=Sg")
1362 (minus:SI (zero_extend:SI (reg:BI SCC_REG))
1363 (match_operand:SI 1 "gcn_alu_operand" "SgA")))
1364 (set (reg:BI SCC_REG)
1365 (gtu:BI (minus:SI (zero_extend:SI (reg:BI SCC_REG)) (match_dup 1))
1366 (match_dup 1)))]
1367 ""
1368 "s_subb_u32\t%0, %1, 0"
1369 [(set_attr "type" "sop2")
1370 (set_attr "length" "4")])
1371
1372;; }}}
1373;; {{{ ALU: mult
1374
1375; Vector multiply has vop3a encoding, but no corresponding vop2a, so no long
1376; immediate.
5c127c4c
JB
1377; The "s_mulk_i32" variant sets SCC to indicate overflow (which we don't care
1378; about here, but we need to indicate the clobbering).
3d6275e3
AS
1379(define_insn "mulsi3"
1380 [(set (match_operand:SI 0 "register_operand" "= Sg,Sg, Sg, v")
1381 (mult:SI (match_operand:SI 1 "gcn_alu_operand" "%SgA, 0,SgA, v")
5c127c4c
JB
1382 (match_operand:SI 2 "gcn_alu_operand" " SgA, J, B,vASv")))
1383 (clobber (match_scratch:BI 3 "=X,cs, X, X"))]
3d6275e3
AS
1384 ""
1385 "@
1386 s_mul_i32\t%0, %1, %2
1387 s_mulk_i32\t%0, %2
1388 s_mul_i32\t%0, %1, %2
1389 v_mul_lo_i32\t%0, %1, %2"
1390 [(set_attr "type" "sop2,sopk,sop2,vop3a")
1391 (set_attr "length" "4,4,8,4")])
1392
1393(define_code_iterator any_extend [sign_extend zero_extend])
1394(define_code_attr sgnsuffix [(sign_extend "%i") (zero_extend "%u")])
1395(define_code_attr su [(sign_extend "s") (zero_extend "u")])
1396(define_code_attr u [(sign_extend "") (zero_extend "u")])
1397(define_code_attr iu [(sign_extend "i") (zero_extend "u")])
1398(define_code_attr e [(sign_extend "e") (zero_extend "")])
1399
53b1d169
JB
1400(define_expand "<su>mulsi3_highpart"
1401 [(set (match_operand:SI 0 "register_operand" "")
3d6275e3
AS
1402 (truncate:SI
1403 (lshiftrt:DI
1404 (mult:DI
1405 (any_extend:DI
53b1d169 1406 (match_operand:SI 1 "register_operand" ""))
3d6275e3 1407 (any_extend:DI
53b1d169 1408 (match_operand:SI 2 "gcn_alu_operand" "")))
3d6275e3
AS
1409 (const_int 32))))]
1410 ""
53b1d169
JB
1411{
1412 if (can_create_pseudo_p ()
1413 && !TARGET_GCN5
1414 && !gcn_inline_immediate_operand (operands[2], SImode))
1415 operands[2] = force_reg (SImode, operands[2]);
1416
1417 if (REG_P (operands[2]))
1418 emit_insn (gen_<su>mulsi3_highpart_reg (operands[0], operands[1],
1419 operands[2]));
1420 else
1421 emit_insn (gen_<su>mulsi3_highpart_imm (operands[0], operands[1],
1422 operands[2]));
1423
1424 DONE;
1425})
1426
1427(define_insn "<su>mulsi3_highpart_reg"
1428 [(set (match_operand:SI 0 "register_operand" "=Sg, v")
1429 (truncate:SI
1430 (lshiftrt:DI
1431 (mult:DI
1432 (any_extend:DI
1433 (match_operand:SI 1 "register_operand" "%Sg, v"))
1434 (any_extend:DI
1435 (match_operand:SI 2 "register_operand" "Sg,vSv")))
1436 (const_int 32))))]
1437 ""
1438 "@
1439 s_mul_hi<sgnsuffix>0\t%0, %1, %2
1440 v_mul_hi<sgnsuffix>0\t%0, %2, %1"
1441 [(set_attr "type" "sop2,vop3a")
1442 (set_attr "length" "4,8")
1443 (set_attr "gcn_version" "gcn5,*")])
1444
1445(define_insn "<su>mulsi3_highpart_imm"
1446 [(set (match_operand:SI 0 "register_operand" "=Sg,Sg,v")
1447 (truncate:SI
1448 (lshiftrt:DI
1449 (mult:DI
1450 (any_extend:DI
1451 (match_operand:SI 1 "register_operand" "Sg,Sg,v"))
1452 (match_operand:DI 2 "gcn_32bit_immediate_operand" "A, B,A"))
1453 (const_int 32))))]
1454 "TARGET_GCN5 || gcn_inline_immediate_operand (operands[2], SImode)"
1455 "@
1456 s_mul_hi<sgnsuffix>0\t%0, %1, %2
1457 s_mul_hi<sgnsuffix>0\t%0, %1, %2
1458 v_mul_hi<sgnsuffix>0\t%0, %2, %1"
1459 [(set_attr "type" "sop2,sop2,vop3a")
1460 (set_attr "length" "4,8,8")
1461 (set_attr "gcn_version" "gcn5,gcn5,*")])
3d6275e3 1462
8f332122
JB
1463(define_expand "<su>mulsidi3"
1464 [(set (match_operand:DI 0 "register_operand" "")
1465 (mult:DI (any_extend:DI
1466 (match_operand:SI 1 "register_operand" ""))
1467 (any_extend:DI
1468 (match_operand:SI 2 "nonmemory_operand" ""))))]
1469 ""
1470{
1471 if (can_create_pseudo_p ()
1472 && !TARGET_GCN5
1473 && !gcn_inline_immediate_operand (operands[2], SImode))
1474 operands[2] = force_reg (SImode, operands[2]);
1475
1476 if (REG_P (operands[2]))
1477 emit_insn (gen_<su>mulsidi3_reg (operands[0], operands[1], operands[2]));
1478 else
1479 emit_insn (gen_<su>mulsidi3_imm (operands[0], operands[1], operands[2]));
1480
1481 DONE;
1482})
1483
1484(define_insn_and_split "<su>mulsidi3_reg"
1485 [(set (match_operand:DI 0 "register_operand" "=&Sg, &v")
1486 (mult:DI (any_extend:DI
1487 (match_operand:SI 1 "register_operand" "%Sg, v"))
1488 (any_extend:DI
1489 (match_operand:SI 2 "register_operand" "Sg,vSv"))))]
1490 ""
1491 "#"
1492 "reload_completed"
1493 [(const_int 0)]
1494 {
1495 rtx dstlo = gen_lowpart (SImode, operands[0]);
1496 rtx dsthi = gen_highpart_mode (SImode, DImode, operands[0]);
1497 emit_insn (gen_mulsi3 (dstlo, operands[1], operands[2]));
1498 emit_insn (gen_<su>mulsi3_highpart (dsthi, operands[1], operands[2]));
1499 DONE;
1500 }
1501 [(set_attr "gcn_version" "gcn5,*")])
1502
1503(define_insn_and_split "<su>mulsidi3_imm"
1504 [(set (match_operand:DI 0 "register_operand" "=&Sg,&Sg,&v")
1505 (mult:DI (any_extend:DI
1506 (match_operand:SI 1 "register_operand" "Sg, Sg, v"))
1507 (match_operand:DI 2 "gcn_32bit_immediate_operand"
1508 "A, B, A")))]
1509 "TARGET_GCN5 || gcn_inline_immediate_operand (operands[2], SImode)"
1510 "#"
1511 "&& reload_completed"
1512 [(const_int 0)]
1513 {
1514 rtx dstlo = gen_lowpart (SImode, operands[0]);
1515 rtx dsthi = gen_highpart_mode (SImode, DImode, operands[0]);
1516 emit_insn (gen_mulsi3 (dstlo, operands[1], operands[2]));
1517 emit_insn (gen_<su>mulsi3_highpart (dsthi, operands[1], operands[2]));
1518 DONE;
1519 }
1520 [(set_attr "gcn_version" "gcn5,gcn5,*")])
1521
1522(define_insn_and_split "muldi3"
1523 [(set (match_operand:DI 0 "register_operand" "=&Sg,&Sg, &v,&v")
1524 (mult:DI (match_operand:DI 1 "register_operand" "%Sg, Sg, v, v")
1525 (match_operand:DI 2 "nonmemory_operand" "Sg, i,vSv, A")))
1526 (clobber (match_scratch:SI 3 "=&Sg,&Sg,&v,&v"))
1527 (clobber (match_scratch:BI 4 "=cs, cs, X, X"))
1528 (clobber (match_scratch:DI 5 "=X, X,cV,cV"))]
1529 ""
1530 "#"
1531 "reload_completed"
1532 [(const_int 0)]
1533 {
1534 rtx tmp = operands[3];
1535 rtx dsthi = gen_highpart_mode (SImode, DImode, operands[0]);
1536 rtx op1lo = gcn_operand_part (DImode, operands[1], 0);
1537 rtx op1hi = gcn_operand_part (DImode, operands[1], 1);
1538 rtx op2lo = gcn_operand_part (DImode, operands[2], 0);
1539 rtx op2hi = gcn_operand_part (DImode, operands[2], 1);
1540 emit_insn (gen_umulsidi3 (operands[0], op1lo, op2lo));
1541 emit_insn (gen_mulsi3 (tmp, op1lo, op2hi));
1542 rtx add = gen_rtx_SET (dsthi, gen_rtx_PLUS (SImode, dsthi, tmp));
1543 rtx clob1 = gen_rtx_CLOBBER (VOIDmode, operands[4]);
1544 rtx clob2 = gen_rtx_CLOBBER (VOIDmode, operands[5]);
1545 add = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, add, clob1, clob2));
1546 emit_insn (add);
1547 emit_insn (gen_mulsi3 (tmp, op1hi, op2lo));
1548 add = gen_rtx_SET (dsthi, gen_rtx_PLUS (SImode, dsthi, tmp));
1549 clob1 = gen_rtx_CLOBBER (VOIDmode, operands[4]);
1550 clob2 = gen_rtx_CLOBBER (VOIDmode, operands[5]);
1551 add = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, add, clob1, clob2));
1552 emit_insn (add);
1553 DONE;
1554 }
1555 [(set_attr "gcn_version" "gcn5,gcn5,*,*")])
1556
3d6275e3
AS
1557(define_insn "<u>mulhisi3"
1558 [(set (match_operand:SI 0 "register_operand" "=v")
1559 (mult:SI
1560 (any_extend:SI (match_operand:HI 1 "register_operand" "%v"))
1561 (any_extend:SI (match_operand:HI 2 "register_operand" " v"))))]
1562 ""
1563 "v_mul_<iu>32_<iu>24_sdwa\t%0, %<e>1, %<e>2 src0_sel:WORD_0 src1_sel:WORD_0"
1564 [(set_attr "type" "vop_sdwa")
1565 (set_attr "length" "8")])
1566
1567(define_insn "<u>mulqihi3_scalar"
1568 [(set (match_operand:HI 0 "register_operand" "=v")
1569 (mult:HI
1570 (any_extend:HI (match_operand:QI 1 "register_operand" "%v"))
1571 (any_extend:HI (match_operand:QI 2 "register_operand" " v"))))]
1572 ""
1573 "v_mul_<iu>32_<iu>24_sdwa\t%0, %<e>1, %<e>2 src0_sel:BYTE_0 src1_sel:BYTE_0"
1574 [(set_attr "type" "vop_sdwa")
1575 (set_attr "length" "8")])
1576
1577;; }}}
1578;; {{{ ALU: generic 32-bit unop
1579
1580(define_code_iterator bitunop [not popcount])
1581(define_code_attr popcount_extra_op [(not "") (popcount ", 0")])
1582
1583(define_insn "<expander>si2"
1584 [(set (match_operand:SI 0 "register_operand" "=Sg, v")
1585 (bitunop:SI
1586 (match_operand:SI 1 "gcn_alu_operand" "SgB,vSvB")))
1587 (clobber (match_scratch:BI 2 "=cs, X"))]
1588 ""
1589 "@
1590 s_<s_mnemonic>0\t%0, %1
1591 v_<mnemonic>0\t%0, %1<popcount_extra_op>"
1592 [(set_attr "type" "sop1,vop1")
1593 (set_attr "length" "8")])
1594
34bac264
AS
1595(define_code_iterator countzeros [clz ctz])
1596
1597(define_insn "<expander>si2"
1598 [(set (match_operand:SI 0 "register_operand" "=Sg,Sg")
1599 (countzeros:SI
1600 (match_operand:SI 1 "gcn_alu_operand" "SgA, B")))]
1601 ""
1602 "s_<s_mnemonic>1\t%0, %1"
1603 [(set_attr "type" "sop1")
1604 (set_attr "length" "4,8")])
1605
1606; The truncate ensures that a constant passed to operand 1 is treated as DImode
1607(define_insn "<expander>di2"
1608 [(set (match_operand:SI 0 "register_operand" "=Sg,Sg")
1609 (truncate:SI
1610 (countzeros:DI
1611 (match_operand:DI 1 "gcn_alu_operand" "SgA, B"))))]
1612 ""
1613 "s_<s_mnemonic>1\t%0, %1"
1614 [(set_attr "type" "sop1")
1615 (set_attr "length" "4,8")])
1616
0c06e46a
JB
1617(define_insn "gcn_flbit<mode>_int"
1618 [(set (match_operand:SI 0 "register_operand" "=Sg,Sg")
1619 (unspec:SI [(match_operand:SIDI 1 "gcn_alu_operand" "SgA, B")]
1620 UNSPEC_FLBIT_INT))]
1621 ""
1622 {
1623 if (<MODE>mode == SImode)
1624 return "s_flbit_i32\t%0, %1";
1625 else
1626 return "s_flbit_i32_i64\t%0, %1";
1627 }
1628 [(set_attr "type" "sop1")
1629 (set_attr "length" "4,8")])
1630
1631(define_expand "clrsb<mode>2"
1632 [(set (match_operand:SI 0 "register_operand" "")
1633 (clrsb:SI (match_operand:SIDI 1 "gcn_alu_operand" "")))]
1634 ""
1635 {
1636 rtx tmp = gen_reg_rtx (SImode);
1637 /* FLBIT_I* counts sign or zero bits at the most-significant end of the
1638 input register (and returns -1 for 0/-1 inputs). We want the number of
1639 *redundant* bits (i.e. that value minus one), and an answer of 31/63 for
1640 0/-1 inputs. We can do that in three instructions... */
1641 emit_insn (gen_gcn_flbit<mode>_int (tmp, operands[1]));
1642 emit_insn (gen_uminsi3 (tmp, tmp,
1643 gen_int_mode (GET_MODE_BITSIZE (<MODE>mode),
1644 SImode)));
1645 /* If we put this last, it can potentially be folded into a subsequent
1646 arithmetic operation. */
1647 emit_insn (gen_subsi3 (operands[0], tmp, const1_rtx));
1648 DONE;
1649 })
1650
3d6275e3
AS
1651;; }}}
1652;; {{{ ALU: generic 32-bit binop
1653
1654; No plus and mult - they have variant with 16bit immediate
1655; and thus are defined later.
1656(define_code_iterator binop [and ior xor smin smax umin umax
1657 ashift lshiftrt ashiftrt])
1658(define_code_iterator vec_and_scalar_com [and ior xor smin smax umin umax])
1659(define_code_iterator vec_and_scalar_nocom [ashift lshiftrt ashiftrt])
1660
1661(define_insn "<expander>si3"
1662 [(set (match_operand:SI 0 "gcn_valu_dst_operand" "= Sg, v,RD")
1663 (vec_and_scalar_com:SI
1664 (match_operand:SI 1 "gcn_valu_src0_operand" "%SgA,vSvB, 0")
1665 (match_operand:SI 2 "gcn_alu_operand" " SgB, v, v")))
1666 (clobber (match_scratch:BI 3 "= cs, X, X"))]
1667 ""
1668 "@
1669 s_<mnemonic>0\t%0, %1, %2
1670 v_<mnemonic>0\t%0, %1, %2
1671 ds_<mnemonic>0\t%A0, %2%O0"
1672 [(set_attr "type" "sop2,vop2,ds")
1673 (set_attr "length" "8")])
1674
1675(define_insn "<expander>si3"
1676 [(set (match_operand:SI 0 "register_operand" "=Sg, Sg, v")
1677 (vec_and_scalar_nocom:SI
1678 (match_operand:SI 1 "gcn_alu_operand" "SgB,SgA, v")
1679 (match_operand:SI 2 "gcn_alu_operand" "SgA,SgB,vSvB")))
1680 (clobber (match_scratch:BI 3 "=cs, cs, X"))]
1681 ""
1682 "@
1683 s_<mnemonic>0\t%0, %1, %2
1684 s_<mnemonic>0\t%0, %1, %2
1685 v_<revmnemonic>0\t%0, %2, %1"
1686 [(set_attr "type" "sop2,sop2,vop2")
1687 (set_attr "length" "8")])
1688
1689(define_expand "<expander>si3_scc"
1690 [(parallel [(set (match_operand:SI 0 "gcn_valu_dst_operand")
1691 (binop:SI
1692 (match_operand:SI 1 "gcn_valu_src0_operand")
1693 (match_operand:SI 2 "gcn_alu_operand")))
1694 (clobber (reg:BI SCC_REG))])]
1695 ""
1696 {})
1697
1698;; }}}
1699;; {{{ ALU: generic 64-bit
1700
1701(define_code_iterator vec_and_scalar64_com [and ior xor])
1702
1703(define_insn_and_split "<expander>di3"
3abfd4f3 1704 [(set (match_operand:DI 0 "register_operand" "= Sg, v")
3d6275e3 1705 (vec_and_scalar64_com:DI
3abfd4f3
AS
1706 (match_operand:DI 1 "gcn_alu_operand" "%SgA,vSvDB")
1707 (match_operand:DI 2 "gcn_alu_operand" " SgC, v")))
1708 (clobber (match_scratch:BI 3 "= cs, X"))]
3d6275e3
AS
1709 ""
1710 "@
1711 s_<mnemonic>0\t%0, %1, %2
3d6275e3
AS
1712 #"
1713 "reload_completed && gcn_vgpr_register_operand (operands[0], DImode)"
1714 [(parallel [(set (match_dup 4)
1715 (vec_and_scalar64_com:SI (match_dup 5) (match_dup 6)))
1716 (clobber (match_dup 3))])
1717 (parallel [(set (match_dup 7)
1718 (vec_and_scalar64_com:SI (match_dup 8) (match_dup 9)))
1719 (clobber (match_dup 3))])]
1720 {
1721 operands[4] = gcn_operand_part (DImode, operands[0], 0);
1722 operands[5] = gcn_operand_part (DImode, operands[1], 0);
1723 operands[6] = gcn_operand_part (DImode, operands[2], 0);
1724 operands[7] = gcn_operand_part (DImode, operands[0], 1);
1725 operands[8] = gcn_operand_part (DImode, operands[1], 1);
1726 operands[9] = gcn_operand_part (DImode, operands[2], 1);
1727 }
3abfd4f3 1728 [(set_attr "type" "sop2,vop2")
3d6275e3
AS
1729 (set_attr "length" "8")])
1730
1731(define_insn "<expander>di3"
1732 [(set (match_operand:DI 0 "register_operand" "=Sg, Sg, v")
1733 (vec_and_scalar_nocom:DI
1734 (match_operand:DI 1 "gcn_alu_operand" "SgC,SgA, v")
1735 (match_operand:SI 2 "gcn_alu_operand" "SgA,SgC,vSvC")))
1736 (clobber (match_scratch:BI 3 "=cs, cs, X"))]
1737 ""
1738 "@
1739 s_<mnemonic>0\t%0, %1, %2
1740 s_<mnemonic>0\t%0, %1, %2
1741 v_<revmnemonic>0\t%0, %2, %1"
1742 [(set_attr "type" "sop2,sop2,vop2")
1743 (set_attr "length" "8")])
1744
19fad467
AS
1745;; }}}
1746;; {{{ ALU: generic 128-bit binop
1747
1748; TImode shifts can't be synthesized by the middle-end
1749(define_expand "<expander>ti3"
1750 [(set (match_operand:TI 0 "register_operand")
1751 (vec_and_scalar_nocom:TI
1752 (match_operand:TI 1 "gcn_alu_operand")
1753 (match_operand:SI 2 "gcn_alu_operand")))]
1754 ""
1755 {
1756 rtx dest = operands[0];
1757 rtx src = operands[1];
1758 rtx shift = operands[2];
1759
1760 enum {ashr, lshr, ashl} shiftop = <expander>;
1761 rtx (*inverse_shift_fn) (rtx, rtx, rtx)
1762 = (shiftop == ashl ? gen_lshrdi3 : gen_ashldi3);
1763 rtx (*logical_shift_fn) (rtx, rtx, rtx)
1764 = (shiftop == ashl ? gen_ashldi3 : gen_lshrdi3);
1765
1766 /* We shift "from" one subreg "to" the other, according to shiftop. */
1767 int from = (shiftop == ashl ? 0 : 8);
1768 int to = (shiftop == ashl ? 8 : 0);
1769 rtx destfrom = simplify_gen_subreg (DImode, dest, TImode, from);
1770 rtx destto = simplify_gen_subreg (DImode, dest, TImode, to);
1771 rtx srcfrom = simplify_gen_subreg (DImode, src, TImode, from);
1772 rtx srcto = simplify_gen_subreg (DImode, src, TImode, to);
1773
1774 int shiftval = (CONST_INT_P (shift) ? INTVAL (shift) : -1);
1775 enum {RUNTIME, ZERO, SMALL, LARGE} shiftcomparison
1776 = (!CONST_INT_P (shift) ? RUNTIME
1777 : shiftval == 0 ? ZERO
1778 : shiftval < 64 ? SMALL
1779 : LARGE);
1780
1781 rtx large_label, zero_label, exit_label;
1782
1783 if (shiftcomparison == RUNTIME)
1784 {
1785 zero_label = gen_label_rtx ();
1786 large_label = gen_label_rtx ();
1787 exit_label = gen_label_rtx ();
1788
1789 rtx cond = gen_rtx_EQ (VOIDmode, shift, const0_rtx);
1790 emit_insn (gen_cbranchsi4 (cond, shift, const0_rtx, zero_label));
1791
1792 rtx sixtyfour = GEN_INT (64);
1793 cond = gen_rtx_GE (VOIDmode, shift, sixtyfour);
1794 emit_insn (gen_cbranchsi4 (cond, shift, sixtyfour, large_label));
1795 }
1796
1797 if (shiftcomparison == SMALL || shiftcomparison == RUNTIME)
1798 {
1799 /* Shift both parts by the same amount, then patch in the bits that
1800 cross the boundary.
1801 This does *not* work for zero-length shifts. */
1802 rtx tmpto1 = gen_reg_rtx (DImode);
1803 rtx tmpto2 = gen_reg_rtx (DImode);
1804 emit_insn (gen_<expander>di3 (destfrom, srcfrom, shift));
1805 emit_insn (logical_shift_fn (tmpto1, srcto, shift));
1806 rtx lessershiftval = gen_reg_rtx (SImode);
1807 emit_insn (gen_subsi3 (lessershiftval, GEN_INT (64), shift));
1808 emit_insn (inverse_shift_fn (tmpto2, srcfrom, lessershiftval));
1809 emit_insn (gen_iordi3 (destto, tmpto1, tmpto2));
1810 }
1811
1812 if (shiftcomparison == RUNTIME)
1813 {
1814 emit_jump_insn (gen_jump (exit_label));
1815 emit_barrier ();
1816
1817 emit_label (zero_label);
1818 }
1819
1820 if (shiftcomparison == ZERO || shiftcomparison == RUNTIME)
1821 emit_move_insn (dest, src);
1822
1823 if (shiftcomparison == RUNTIME)
1824 {
1825 emit_jump_insn (gen_jump (exit_label));
1826 emit_barrier ();
1827
1828 emit_label (large_label);
1829 }
1830
1831 if (shiftcomparison == LARGE || shiftcomparison == RUNTIME)
1832 {
1833 /* Do the shift within one part, and set the other part appropriately.
1834 Shifts of 128+ bits are an error. */
1835 rtx lessershiftval = gen_reg_rtx (SImode);
1836 emit_insn (gen_subsi3 (lessershiftval, shift, GEN_INT (64)));
1837 emit_insn (gen_<expander>di3 (destto, srcfrom, lessershiftval));
1838 if (shiftop == ashr)
1839 emit_insn (gen_ashrdi3 (destfrom, srcfrom, GEN_INT (63)));
1840 else
1841 emit_move_insn (destfrom, const0_rtx);
1842 }
1843
1844 if (shiftcomparison == RUNTIME)
1845 emit_label (exit_label);
1846
1847 DONE;
1848 })
1849
3d6275e3
AS
1850;; }}}
1851;; {{{ Atomics
1852
1853; Each compute unit has it's own L1 cache. The L2 cache is shared between
1854; all the compute units. Any load or store instruction can skip L1 and
1855; access L2 directly using the "glc" flag. Atomic instructions also skip
1856; L1. The L1 cache can be flushed and invalidated using instructions.
1857;
1858; Therefore, in order for "acquire" and "release" atomic modes to work
1859; correctly across compute units we must flush before each "release"
1860; and invalidate the cache after each "acquire". It might seem like
1861; invalidation could be safely done before an "acquire", but since each
1862; compute unit can run up to 40 threads simultaneously, all reading values
1863; into the L1 cache, this is not actually safe.
1864;
1865; Additionally, scalar flat instructions access L2 via a different cache
1866; (the "constant cache"), so they have separate constrol instructions. We
1867; do not attempt to invalidate both caches at once; instead, atomics
1868; operating on scalar flat pointers will flush the constant cache, and
1869; atomics operating on flat or global pointers will flush L1. It is up to
1870; the programmer to get this right.
1871
1872(define_code_iterator atomicops [plus minus and ior xor])
1873(define_mode_attr X [(SI "") (DI "_X2")])
1874
1875;; TODO compare_and_swap test_and_set inc dec
1876;; Hardware also supports min and max, but GCC does not.
1877
1878(define_expand "memory_barrier"
1879 [(set (match_dup 0)
1880 (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER))]
1881 ""
1882 {
1883 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
1884 MEM_VOLATILE_P (operands[0]) = 1;
1885 })
1886
1887(define_insn "*memory_barrier"
1888 [(set (match_operand:BLK 0)
1889 (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER))]
1890 ""
1891 "buffer_wbinvl1_vol"
1892 [(set_attr "type" "mubuf")
1893 (set_attr "length" "4")])
1894
1895; FIXME: These patterns have been disabled as they do not seem to work
1896; reliably - they can cause hangs or incorrect results.
1897; TODO: flush caches according to memory model
1898(define_insn "atomic_fetch_<bare_mnemonic><mode>"
1899 [(set (match_operand:SIDI 0 "register_operand" "=Sm, v, v")
1900 (match_operand:SIDI 1 "memory_operand" "+RS,RF,RM"))
1901 (set (match_dup 1)
1902 (unspec_volatile:SIDI
1903 [(atomicops:SIDI
1904 (match_dup 1)
1905 (match_operand:SIDI 2 "register_operand" " Sm, v, v"))]
1906 UNSPECV_ATOMIC))
1907 (use (match_operand 3 "const_int_operand"))]
1908 "0 /* Disabled. */"
1909 "@
1910 s_atomic_<bare_mnemonic><X>\t%0, %1, %2 glc\;s_waitcnt\tlgkmcnt(0)
1911 flat_atomic_<bare_mnemonic><X>\t%0, %1, %2 glc\;s_waitcnt\t0
1912 global_atomic_<bare_mnemonic><X>\t%0, %A1, %2%O1 glc\;s_waitcnt\tvmcnt(0)"
1913 [(set_attr "type" "smem,flat,flat")
1914 (set_attr "length" "12")
1915 (set_attr "gcn_version" "gcn5,*,gcn5")])
1916
1917; FIXME: These patterns are disabled because the instructions don't
1918; seem to work as advertised. Specifically, OMP "team distribute"
1919; reductions apparently "lose" some of the writes, similar to what
1920; you might expect from a concurrent non-atomic read-modify-write.
1921; TODO: flush caches according to memory model
1922(define_insn "atomic_<bare_mnemonic><mode>"
1923 [(set (match_operand:SIDI 0 "memory_operand" "+RS,RF,RM")
1924 (unspec_volatile:SIDI
1925 [(atomicops:SIDI
1926 (match_dup 0)
1927 (match_operand:SIDI 1 "register_operand" " Sm, v, v"))]
1928 UNSPECV_ATOMIC))
1929 (use (match_operand 2 "const_int_operand"))]
1930 "0 /* Disabled. */"
1931 "@
1932 s_atomic_<bare_mnemonic><X>\t%0, %1\;s_waitcnt\tlgkmcnt(0)
1933 flat_atomic_<bare_mnemonic><X>\t%0, %1\;s_waitcnt\t0
1934 global_atomic_<bare_mnemonic><X>\t%A0, %1%O0\;s_waitcnt\tvmcnt(0)"
1935 [(set_attr "type" "smem,flat,flat")
1936 (set_attr "length" "12")
1937 (set_attr "gcn_version" "gcn5,*,gcn5")])
1938
1939(define_mode_attr x2 [(SI "DI") (DI "TI")])
1940(define_mode_attr size [(SI "4") (DI "8")])
1941(define_mode_attr bitsize [(SI "32") (DI "64")])
1942
1943(define_expand "sync_compare_and_swap<mode>"
1944 [(match_operand:SIDI 0 "register_operand")
1945 (match_operand:SIDI 1 "memory_operand")
1946 (match_operand:SIDI 2 "register_operand")
1947 (match_operand:SIDI 3 "register_operand")]
1948 ""
1949 {
1950 if (MEM_ADDR_SPACE (operands[1]) == ADDR_SPACE_LDS)
1951 {
1952 emit_insn (gen_sync_compare_and_swap<mode>_lds_insn (operands[0],
1953 operands[1],
1954 operands[2],
1955 operands[3]));
1956 DONE;
1957 }
1958
1959 /* Operands 2 and 3 must be placed in consecutive registers, and passed
1960 as a combined value. */
1961 rtx src_cmp = gen_reg_rtx (<x2>mode);
1962 emit_move_insn (gen_rtx_SUBREG (<MODE>mode, src_cmp, 0), operands[3]);
1963 emit_move_insn (gen_rtx_SUBREG (<MODE>mode, src_cmp, <size>), operands[2]);
1964 emit_insn (gen_sync_compare_and_swap<mode>_insn (operands[0],
1965 operands[1],
1966 src_cmp));
1967 DONE;
1968 })
1969
1970(define_insn "sync_compare_and_swap<mode>_insn"
1971 [(set (match_operand:SIDI 0 "register_operand" "=Sm, v, v")
1972 (match_operand:SIDI 1 "memory_operand" "+RS,RF,RM"))
1973 (set (match_dup 1)
1974 (unspec_volatile:SIDI
1975 [(match_operand:<x2> 2 "register_operand" " Sm, v, v")]
1976 UNSPECV_ATOMIC))]
1977 ""
1978 "@
1979 s_atomic_cmpswap<X>\t%0, %1, %2 glc\;s_waitcnt\tlgkmcnt(0)
1980 flat_atomic_cmpswap<X>\t%0, %1, %2 glc\;s_waitcnt\t0
1981 global_atomic_cmpswap<X>\t%0, %A1, %2%O1 glc\;s_waitcnt\tvmcnt(0)"
1982 [(set_attr "type" "smem,flat,flat")
1983 (set_attr "length" "12")
930c5599 1984 (set_attr "gcn_version" "gcn5,*,gcn5")
b2c113ae 1985 (set_attr "delayeduse" "*,yes,yes")])
3d6275e3
AS
1986
1987(define_insn "sync_compare_and_swap<mode>_lds_insn"
1988 [(set (match_operand:SIDI 0 "register_operand" "= v")
1989 (unspec_volatile:SIDI
1990 [(match_operand:SIDI 1 "memory_operand" "+RL")]
1991 UNSPECV_ATOMIC))
1992 (set (match_dup 1)
1993 (unspec_volatile:SIDI
1994 [(match_operand:SIDI 2 "register_operand" " v")
1995 (match_operand:SIDI 3 "register_operand" " v")]
1996 UNSPECV_ATOMIC))]
1997 ""
1998 "ds_cmpst_rtn_b<bitsize> %0, %1, %2, %3\;s_waitcnt\tlgkmcnt(0)"
1999 [(set_attr "type" "ds")
2000 (set_attr "length" "12")])
2001
2002(define_insn "atomic_load<mode>"
2003 [(set (match_operand:SIDI 0 "register_operand" "=Sm, v, v")
2004 (unspec_volatile:SIDI
2005 [(match_operand:SIDI 1 "memory_operand" " RS,RF,RM")]
2006 UNSPECV_ATOMIC))
2007 (use (match_operand:SIDI 2 "immediate_operand" " i, i, i"))]
2008 ""
2009 {
2010 switch (INTVAL (operands[2]))
2011 {
2012 case MEMMODEL_RELAXED:
2013 switch (which_alternative)
2014 {
2015 case 0:
2016 return "s_load%o0\t%0, %A1 glc\;s_waitcnt\tlgkmcnt(0)";
2017 case 1:
2018 return "flat_load%o0\t%0, %A1%O1 glc\;s_waitcnt\t0";
2019 case 2:
2020 return "global_load%o0\t%0, %A1%O1 glc\;s_waitcnt\tvmcnt(0)";
2021 }
2022 break;
2023 case MEMMODEL_CONSUME:
2024 case MEMMODEL_ACQUIRE:
2025 case MEMMODEL_SYNC_ACQUIRE:
2026 switch (which_alternative)
2027 {
2028 case 0:
2029 return "s_load%o0\t%0, %A1 glc\;s_waitcnt\tlgkmcnt(0)\;"
2030 "s_dcache_wb_vol";
2031 case 1:
2032 return "flat_load%o0\t%0, %A1%O1 glc\;s_waitcnt\t0\;"
2033 "buffer_wbinvl1_vol";
2034 case 2:
2035 return "global_load%o0\t%0, %A1%O1 glc\;s_waitcnt\tvmcnt(0)\;"
2036 "buffer_wbinvl1_vol";
2037 }
2038 break;
2039 case MEMMODEL_ACQ_REL:
2040 case MEMMODEL_SEQ_CST:
2041 case MEMMODEL_SYNC_SEQ_CST:
2042 switch (which_alternative)
2043 {
2044 case 0:
2045 return "s_dcache_wb_vol\;s_load%o0\t%0, %A1 glc\;"
2046 "s_waitcnt\tlgkmcnt(0)\;s_dcache_inv_vol";
2047 case 1:
2048 return "buffer_wbinvl1_vol\;flat_load%o0\t%0, %A1%O1 glc\;"
2049 "s_waitcnt\t0\;buffer_wbinvl1_vol";
2050 case 2:
2051 return "buffer_wbinvl1_vol\;global_load%o0\t%0, %A1%O1 glc\;"
2052 "s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol";
2053 }
2054 break;
2055 }
2056 gcc_unreachable ();
2057 }
2058 [(set_attr "type" "smem,flat,flat")
2059 (set_attr "length" "20")
2060 (set_attr "gcn_version" "gcn5,*,gcn5")])
2061
2062(define_insn "atomic_store<mode>"
2063 [(set (match_operand:SIDI 0 "memory_operand" "=RS,RF,RM")
2064 (unspec_volatile:SIDI
2065 [(match_operand:SIDI 1 "register_operand" " Sm, v, v")]
2066 UNSPECV_ATOMIC))
2067 (use (match_operand:SIDI 2 "immediate_operand" " i, i, i"))]
2068 ""
2069 {
2070 switch (INTVAL (operands[2]))
2071 {
2072 case MEMMODEL_RELAXED:
2073 switch (which_alternative)
2074 {
2075 case 0:
2076 return "s_store%o1\t%1, %A0 glc\;s_waitcnt\tlgkmcnt(0)";
2077 case 1:
2078 return "flat_store%o1\t%A0, %1%O0 glc\;s_waitcnt\t0";
2079 case 2:
2080 return "global_store%o1\t%A0, %1%O0 glc\;s_waitcnt\tvmcnt(0)";
2081 }
2082 break;
2083 case MEMMODEL_RELEASE:
2084 case MEMMODEL_SYNC_RELEASE:
2085 switch (which_alternative)
2086 {
2087 case 0:
930c5599 2088 return "s_dcache_wb_vol\;s_store%o1\t%1, %A0 glc";
3d6275e3 2089 case 1:
930c5599 2090 return "buffer_wbinvl1_vol\;flat_store%o1\t%A0, %1%O0 glc";
3d6275e3 2091 case 2:
930c5599 2092 return "buffer_wbinvl1_vol\;global_store%o1\t%A0, %1%O0 glc";
3d6275e3
AS
2093 }
2094 break;
2095 case MEMMODEL_ACQ_REL:
2096 case MEMMODEL_SEQ_CST:
2097 case MEMMODEL_SYNC_SEQ_CST:
2098 switch (which_alternative)
2099 {
2100 case 0:
2101 return "s_dcache_wb_vol\;s_store%o1\t%1, %A0 glc\;"
930c5599 2102 "s_waitcnt\tlgkmcnt(0)\;s_dcache_inv_vol";
3d6275e3
AS
2103 case 1:
2104 return "buffer_wbinvl1_vol\;flat_store%o1\t%A0, %1%O0 glc\;"
930c5599 2105 "s_waitcnt\t0\;buffer_wbinvl1_vol";
3d6275e3
AS
2106 case 2:
2107 return "buffer_wbinvl1_vol\;global_store%o1\t%A0, %1%O0 glc\;"
930c5599 2108 "s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol";
3d6275e3
AS
2109 }
2110 break;
2111 }
2112 gcc_unreachable ();
2113 }
2114 [(set_attr "type" "smem,flat,flat")
2115 (set_attr "length" "20")
2116 (set_attr "gcn_version" "gcn5,*,gcn5")])
2117
2118(define_insn "atomic_exchange<mode>"
2119 [(set (match_operand:SIDI 0 "register_operand" "=Sm, v, v")
2120 (match_operand:SIDI 1 "memory_operand" "+RS,RF,RM"))
2121 (set (match_dup 1)
2122 (unspec_volatile:SIDI
2123 [(match_operand:SIDI 2 "register_operand" " Sm, v, v")]
2124 UNSPECV_ATOMIC))
2125 (use (match_operand 3 "immediate_operand"))]
2126 ""
2127 {
2128 switch (INTVAL (operands[3]))
2129 {
2130 case MEMMODEL_RELAXED:
2131 switch (which_alternative)
2132 {
2133 case 0:
2134 return "s_atomic_swap<X>\t%0, %1, %2 glc\;s_waitcnt\tlgkmcnt(0)";
2135 case 1:
2136 return "flat_atomic_swap<X>\t%0, %1, %2 glc\;s_waitcnt\t0";
2137 case 2:
2138 return "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;"
2139 "s_waitcnt\tvmcnt(0)";
2140 }
2141 break;
2142 case MEMMODEL_CONSUME:
2143 case MEMMODEL_ACQUIRE:
2144 case MEMMODEL_SYNC_ACQUIRE:
2145 switch (which_alternative)
2146 {
2147 case 0:
2148 return "s_atomic_swap<X>\t%0, %1, %2 glc\;s_waitcnt\tlgkmcnt(0)\;"
2149 "s_dcache_wb_vol\;s_dcache_inv_vol";
2150 case 1:
2151 return "flat_atomic_swap<X>\t%0, %1, %2 glc\;s_waitcnt\t0\;"
2152 "buffer_wbinvl1_vol";
2153 case 2:
2154 return "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;"
2155 "s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol";
2156 }
2157 break;
2158 case MEMMODEL_RELEASE:
2159 case MEMMODEL_SYNC_RELEASE:
2160 switch (which_alternative)
2161 {
2162 case 0:
2163 return "s_dcache_wb_vol\;s_atomic_swap<X>\t%0, %1, %2 glc\;"
2164 "s_waitcnt\tlgkmcnt(0)";
2165 case 1:
2166 return "buffer_wbinvl1_vol\;flat_atomic_swap<X>\t%0, %1, %2 glc\;"
2167 "s_waitcnt\t0";
2168 case 2:
2169 return "buffer_wbinvl1_vol\;"
2170 "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;"
2171 "s_waitcnt\tvmcnt(0)";
2172 }
2173 break;
2174 case MEMMODEL_ACQ_REL:
2175 case MEMMODEL_SEQ_CST:
2176 case MEMMODEL_SYNC_SEQ_CST:
2177 switch (which_alternative)
2178 {
2179 case 0:
2180 return "s_dcache_wb_vol\;s_atomic_swap<X>\t%0, %1, %2 glc\;"
2181 "s_waitcnt\tlgkmcnt(0)\;s_dcache_inv_vol";
2182 case 1:
2183 return "buffer_wbinvl1_vol\;flat_atomic_swap<X>\t%0, %1, %2 glc\;"
2184 "s_waitcnt\t0\;buffer_wbinvl1_vol";
2185 case 2:
2186 return "buffer_wbinvl1_vol\;"
2187 "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;"
2188 "s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol";
2189 }
2190 break;
2191 }
2192 gcc_unreachable ();
2193 }
2194 [(set_attr "type" "smem,flat,flat")
2195 (set_attr "length" "20")
2196 (set_attr "gcn_version" "gcn5,*,gcn5")])
2197
2198;; }}}
2199;; {{{ OpenACC / OpenMP
2200
2201(define_expand "oacc_dim_size"
2202 [(match_operand:SI 0 "register_operand")
2203 (match_operand:SI 1 "const_int_operand")]
2204 ""
2205 {
2206 rtx tmp = gcn_oacc_dim_size (INTVAL (operands[1]));
2207 emit_move_insn (operands[0], gen_lowpart (SImode, tmp));
2208 DONE;
2209 })
2210
2211(define_expand "oacc_dim_pos"
2212 [(match_operand:SI 0 "register_operand")
2213 (match_operand:SI 1 "const_int_operand")]
2214 ""
2215 {
2216 emit_move_insn (operands[0], gcn_oacc_dim_pos (INTVAL (operands[1])));
2217 DONE;
2218 })
2219
2220(define_expand "gcn_wavefront_barrier"
2221 [(set (match_dup 0)
2222 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_BARRIER))]
2223 ""
2224 {
2225 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
2226 MEM_VOLATILE_P (operands[0]) = 1;
2227 })
2228
2229(define_insn "*gcn_wavefront_barrier"
2230 [(set (match_operand:BLK 0 "")
2231 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_BARRIER))]
2232 ""
2233 "s_barrier"
2234 [(set_attr "type" "sopp")])
2235
2236(define_expand "oacc_fork"
2237 [(set (match_operand:SI 0 "")
2238 (match_operand:SI 1 ""))
2239 (use (match_operand:SI 2 ""))]
2240 ""
2241 {
2242 /* We need to have oacc_fork/oacc_join named patterns as a pair,
2243 but the fork isn't actually used. */
2244 gcc_unreachable ();
2245 })
2246
2247(define_expand "oacc_join"
2248 [(set (match_operand:SI 0 "")
2249 (match_operand:SI 1 ""))
2250 (use (match_operand:SI 2 ""))]
2251 ""
2252 {
2253 emit_insn (gen_gcn_wavefront_barrier ());
2254 DONE;
2255 })
2256
2257;; }}}
2258
2259(include "gcn-valu.md")