]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/nvptx/nvptx.md
[nvptx] Only use one logical barrier resource
[thirdparty/gcc.git] / gcc / config / nvptx / nvptx.md
CommitLineData
8ce80784 1;; Machine description for NVPTX.
8e8f6434 2;; Copyright (C) 2014-2018 Free Software Foundation, Inc.
8ce80784 3;; Contributed by Bernd Schmidt <bernds@codesourcery.com>
4;;
5;; This file is part of GCC.
6;;
7;; GCC is free software; you can redistribute it and/or modify
8;; it under the terms of the GNU General Public License as published by
9;; the Free Software Foundation; either version 3, or (at your option)
10;; any later version.
11;;
12;; GCC is distributed in the hope that it will be useful,
13;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15;; GNU General Public License for more details.
16;;
17;; You should have received a copy of the GNU General Public License
18;; along with GCC; see the file COPYING3. If not see
19;; <http://www.gnu.org/licenses/>.
20
21(define_c_enum "unspec" [
22 UNSPEC_ARG_REG
8ce80784 23
8ce80784 24 UNSPEC_COPYSIGN
25 UNSPEC_LOG2
26 UNSPEC_EXP2
27 UNSPEC_SIN
28 UNSPEC_COS
29
30 UNSPEC_FPINT_FLOOR
31 UNSPEC_FPINT_BTRUNC
32 UNSPEC_FPINT_CEIL
33 UNSPEC_FPINT_NEARBYINT
34
35 UNSPEC_BITREV
36
37 UNSPEC_ALLOCA
38
7fce8768 39 UNSPEC_SET_SOFTSTACK
40
b3787ae4 41 UNSPEC_DIM_SIZE
42
b3787ae4 43 UNSPEC_BIT_CONV
44
7fce8768 45 UNSPEC_VOTE_BALLOT
46
47 UNSPEC_LANEID
48
b3787ae4 49 UNSPEC_SHUFFLE
50 UNSPEC_BR_UNIFIED
8ce80784 51])
52
53(define_c_enum "unspecv" [
54 UNSPECV_LOCK
55 UNSPECV_CAS
56 UNSPECV_XCHG
b3787ae4 57 UNSPECV_BARSYNC
e163ceb5 58 UNSPECV_MEMBAR
19b20a5c 59 UNSPECV_MEMBAR_CTA
b3787ae4 60 UNSPECV_DIM_POS
61
62 UNSPECV_FORK
63 UNSPECV_FORKED
64 UNSPECV_JOINING
65 UNSPECV_JOIN
7fce8768 66
67 UNSPECV_NOUNROLL
1b576300 68
69 UNSPECV_SIMT_ENTER
70 UNSPECV_SIMT_EXIT
8ce80784 71])
72
73(define_attr "subregs_ok" "false,true"
74 (const_string "false"))
75
7fce8768 76(define_attr "atomic" "false,true"
77 (const_string "false"))
78
9224dd1f 79;; The nvptx operand predicates, in general, don't permit subregs and
80;; only literal constants, which differ from the generic ones, which
81;; permit subregs and symbolc constants (as appropriate)
8ce80784 82(define_predicate "nvptx_register_operand"
f8e6fa1d 83 (match_code "reg")
8ce80784 84{
8ce80784 85 return register_operand (op, mode);
86})
87
50ad9277 88(define_predicate "nvptx_nonimmediate_operand"
f8e6fa1d 89 (match_code "mem,reg")
8ce80784 90{
6bd291cd 91 return (REG_P (op) ? register_operand (op, mode)
92 : memory_operand (op, mode));
8ce80784 93})
94
8ce80784 95(define_predicate "nvptx_nonmemory_operand"
f8e6fa1d 96 (match_code "reg,const_int,const_double")
8ce80784 97{
6bd291cd 98 return (REG_P (op) ? register_operand (op, mode)
99 : immediate_operand (op, mode));
8ce80784 100})
101
8ce80784 102(define_predicate "const0_operand"
103 (and (match_code "const_int")
104 (match_test "op == const0_rtx")))
105
106;; True if this operator is valid for predication.
107(define_predicate "predicate_operator"
108 (match_code "eq,ne"))
109
110(define_predicate "ne_operator"
111 (match_code "ne"))
112
113(define_predicate "nvptx_comparison_operator"
114 (match_code "eq,ne,le,ge,lt,gt,leu,geu,ltu,gtu"))
115
116(define_predicate "nvptx_float_comparison_operator"
117 (match_code "eq,ne,le,ge,lt,gt,uneq,unle,unge,unlt,ungt,unordered,ordered"))
118
119;; Test for a valid operand for a call instruction.
f8e6fa1d 120(define_predicate "call_insn_operand"
8ce80784 121 (match_code "symbol_ref,reg")
122{
50ad9277 123 return REG_P (op) || SYMBOL_REF_FUNCTION_P (op);
8ce80784 124})
125
126;; Return true if OP is a call with parallel USEs of the argument
127;; pseudos.
128(define_predicate "call_operation"
129 (match_code "parallel")
130{
b27697ca 131 int arg_end = XVECLEN (op, 0);
8ce80784 132
b27697ca 133 for (int i = 1; i < arg_end; i++)
8ce80784 134 {
135 rtx elt = XVECEXP (op, 0, i);
8ce80784 136
50ad9277 137 if (GET_CODE (elt) != USE || !REG_P (XEXP (elt, 0)))
8ce80784 138 return false;
139 }
140 return true;
141})
142
7fce8768 143(define_attr "predicable" "false,true"
144 (const_string "true"))
145
146(define_cond_exec
147 [(match_operator 0 "predicate_operator"
148 [(match_operand:BI 1 "nvptx_register_operand" "")
149 (match_operand:BI 2 "const0_operand" "")])]
150 ""
151 ""
152 )
153
8ce80784 154(define_constraint "P0"
155 "An integer with the value 0."
156 (and (match_code "const_int")
157 (match_test "ival == 0")))
158
159(define_constraint "P1"
160 "An integer with the value 1."
161 (and (match_code "const_int")
162 (match_test "ival == 1")))
163
164(define_constraint "Pn"
165 "An integer with the value -1."
166 (and (match_code "const_int")
167 (match_test "ival == -1")))
168
169(define_constraint "R"
170 "A pseudo register."
171 (match_code "reg"))
172
173(define_constraint "Ia"
174 "Any integer constant."
175 (and (match_code "const_int") (match_test "true")))
176
177(define_mode_iterator QHSDISDFM [QI HI SI DI SF DF])
178(define_mode_iterator QHSDIM [QI HI SI DI])
179(define_mode_iterator HSDIM [HI SI DI])
180(define_mode_iterator BHSDIM [BI HI SI DI])
181(define_mode_iterator SDIM [SI DI])
182(define_mode_iterator SDISDFM [SI DI SF DF])
183(define_mode_iterator QHIM [QI HI])
184(define_mode_iterator QHSIM [QI HI SI])
185(define_mode_iterator SDFM [SF DF])
186(define_mode_iterator SDCM [SC DC])
b3787ae4 187(define_mode_iterator BITS [SI SF])
188(define_mode_iterator BITD [DI DF])
ffaae5bd 189(define_mode_iterator VECIM [V2SI V2DI])
8ce80784 190
191;; This mode iterator allows :P to be used for patterns that operate on
192;; pointer-sized quantities. Exactly one of the two alternatives will match.
193(define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")])
194
195;; We should get away with not defining memory alternatives, since we don't
196;; get variables in this mode and pseudos are never spilled.
197(define_insn "movbi"
198 [(set (match_operand:BI 0 "nvptx_register_operand" "=R,R,R")
199 (match_operand:BI 1 "nvptx_nonmemory_operand" "R,P0,Pn"))]
200 ""
201 "@
202 %.\\tmov%t0\\t%0, %1;
203 %.\\tsetp.eq.u32\\t%0, 1, 0;
204 %.\\tsetp.eq.u32\\t%0, 1, 1;")
205
fcac805e 206(define_insn "*mov<mode>_insn"
207 [(set (match_operand:VECIM 0 "nonimmediate_operand" "=R,R,m")
208 (match_operand:VECIM 1 "general_operand" "Ri,m,R"))]
209 "!MEM_P (operands[0]) || REG_P (operands[1])"
210{
211 if (which_alternative == 1)
212 return "%.\\tld%A1%u1\\t%0, %1;";
213 if (which_alternative == 2)
214 return "%.\\tst%A0%u0\\t%0, %1;";
215
216 return nvptx_output_mov_insn (operands[0], operands[1]);
217}
218 [(set_attr "subregs_ok" "true")])
219
8ce80784 220(define_insn "*mov<mode>_insn"
6bd291cd 221 [(set (match_operand:QHSDIM 0 "nonimmediate_operand" "=R,R,m")
6196ad64 222 (match_operand:QHSDIM 1 "general_operand" "Ri,m,R"))]
6bd291cd 223 "!MEM_P (operands[0]) || REG_P (operands[1])"
8ce80784 224{
6196ad64 225 if (which_alternative == 1)
8ce80784 226 return "%.\\tld%A1%u1\\t%0, %1;";
6196ad64 227 if (which_alternative == 2)
8ce80784 228 return "%.\\tst%A0%u0\\t%0, %1;";
229
6196ad64 230 return nvptx_output_mov_insn (operands[0], operands[1]);
8ce80784 231}
232 [(set_attr "subregs_ok" "true")])
233
234(define_insn "*mov<mode>_insn"
6bd291cd 235 [(set (match_operand:SDFM 0 "nonimmediate_operand" "=R,R,m")
8ce80784 236 (match_operand:SDFM 1 "general_operand" "RF,m,R"))]
6196ad64 237 "!MEM_P (operands[0]) || REG_P (operands[1])"
8ce80784 238{
239 if (which_alternative == 1)
240 return "%.\\tld%A1%u0\\t%0, %1;";
241 if (which_alternative == 2)
242 return "%.\\tst%A0%u1\\t%0, %1;";
243
6196ad64 244 return nvptx_output_mov_insn (operands[0], operands[1]);
8ce80784 245}
246 [(set_attr "subregs_ok" "true")])
247
248(define_insn "load_arg_reg<mode>"
249 [(set (match_operand:QHIM 0 "nvptx_register_operand" "=R")
18cefec0 250 (unspec:QHIM [(match_operand 1 "const_int_operand" "n")]
8ce80784 251 UNSPEC_ARG_REG))]
252 ""
253 "%.\\tcvt%t0.u32\\t%0, %%ar%1;")
254
255(define_insn "load_arg_reg<mode>"
256 [(set (match_operand:SDISDFM 0 "nvptx_register_operand" "=R")
18cefec0 257 (unspec:SDISDFM [(match_operand 1 "const_int_operand" "n")]
8ce80784 258 UNSPEC_ARG_REG))]
259 ""
260 "%.\\tmov%t0\\t%0, %%ar%1;")
261
fcac805e 262 (define_expand "mov<mode>"
263 [(set (match_operand:VECIM 0 "nonimmediate_operand" "")
264 (match_operand:VECIM 1 "general_operand" ""))]
265 ""
266{
267 if (MEM_P (operands[0]) && !REG_P (operands[1]))
268 {
269 rtx tmp = gen_reg_rtx (<MODE>mode);
270 emit_move_insn (tmp, operands[1]);
271 emit_move_insn (operands[0], tmp);
272 DONE;
273 }
274})
275
8ce80784 276(define_expand "mov<mode>"
6bd291cd 277 [(set (match_operand:QHSDISDFM 0 "nonimmediate_operand" "")
8ce80784 278 (match_operand:QHSDISDFM 1 "general_operand" ""))]
279 ""
280{
9224dd1f 281 if (MEM_P (operands[0]) && !REG_P (operands[1]))
8ce80784 282 {
283 rtx tmp = gen_reg_rtx (<MODE>mode);
284 emit_move_insn (tmp, operands[1]);
285 emit_move_insn (operands[0], tmp);
286 DONE;
287 }
1ab41d0d 288
289 if (GET_CODE (operands[1]) == LABEL_REF)
290 sorry ("target cannot support label values");
8ce80784 291})
292
8ce80784 293(define_insn "zero_extendqihi2"
294 [(set (match_operand:HI 0 "nvptx_register_operand" "=R,R")
50ad9277 295 (zero_extend:HI (match_operand:QI 1 "nvptx_nonimmediate_operand" "R,m")))]
8ce80784 296 ""
297 "@
298 %.\\tcvt.u16.u%T1\\t%0, %1;
299 %.\\tld%A1.u8\\t%0, %1;"
300 [(set_attr "subregs_ok" "true")])
301
302(define_insn "zero_extend<mode>si2"
303 [(set (match_operand:SI 0 "nvptx_register_operand" "=R,R")
50ad9277 304 (zero_extend:SI (match_operand:QHIM 1 "nvptx_nonimmediate_operand" "R,m")))]
8ce80784 305 ""
306 "@
307 %.\\tcvt.u32.u%T1\\t%0, %1;
308 %.\\tld%A1.u%T1\\t%0, %1;"
309 [(set_attr "subregs_ok" "true")])
310
311(define_insn "zero_extend<mode>di2"
312 [(set (match_operand:DI 0 "nvptx_register_operand" "=R,R")
50ad9277 313 (zero_extend:DI (match_operand:QHSIM 1 "nvptx_nonimmediate_operand" "R,m")))]
8ce80784 314 ""
315 "@
316 %.\\tcvt.u64.u%T1\\t%0, %1;
317 %.\\tld%A1%u1\\t%0, %1;"
318 [(set_attr "subregs_ok" "true")])
319
320(define_insn "extend<mode>si2"
321 [(set (match_operand:SI 0 "nvptx_register_operand" "=R,R")
50ad9277 322 (sign_extend:SI (match_operand:QHIM 1 "nvptx_nonimmediate_operand" "R,m")))]
8ce80784 323 ""
324 "@
325 %.\\tcvt.s32.s%T1\\t%0, %1;
326 %.\\tld%A1.s%T1\\t%0, %1;"
327 [(set_attr "subregs_ok" "true")])
328
329(define_insn "extend<mode>di2"
330 [(set (match_operand:DI 0 "nvptx_register_operand" "=R,R")
50ad9277 331 (sign_extend:DI (match_operand:QHSIM 1 "nvptx_nonimmediate_operand" "R,m")))]
8ce80784 332 ""
333 "@
334 %.\\tcvt.s64.s%T1\\t%0, %1;
335 %.\\tld%A1.s%T1\\t%0, %1;"
336 [(set_attr "subregs_ok" "true")])
337
338(define_insn "trunchiqi2"
50ad9277 339 [(set (match_operand:QI 0 "nvptx_nonimmediate_operand" "=R,m")
8ce80784 340 (truncate:QI (match_operand:HI 1 "nvptx_register_operand" "R,R")))]
341 ""
342 "@
343 %.\\tcvt%t0.u16\\t%0, %1;
344 %.\\tst%A0.u8\\t%0, %1;"
345 [(set_attr "subregs_ok" "true")])
346
347(define_insn "truncsi<mode>2"
50ad9277 348 [(set (match_operand:QHIM 0 "nvptx_nonimmediate_operand" "=R,m")
8ce80784 349 (truncate:QHIM (match_operand:SI 1 "nvptx_register_operand" "R,R")))]
350 ""
351 "@
352 %.\\tcvt%t0.u32\\t%0, %1;
353 %.\\tst%A0.u%T0\\t%0, %1;"
354 [(set_attr "subregs_ok" "true")])
355
356(define_insn "truncdi<mode>2"
50ad9277 357 [(set (match_operand:QHSIM 0 "nvptx_nonimmediate_operand" "=R,m")
8ce80784 358 (truncate:QHSIM (match_operand:DI 1 "nvptx_register_operand" "R,R")))]
359 ""
360 "@
361 %.\\tcvt%t0.u64\\t%0, %1;
362 %.\\tst%A0.u%T0\\t%0, %1;"
363 [(set_attr "subregs_ok" "true")])
364
8ce80784 365;; Integer arithmetic
366
367(define_insn "add<mode>3"
368 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
369 (plus:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
370 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
371 ""
372 "%.\\tadd%t0\\t%0, %1, %2;")
373
374(define_insn "sub<mode>3"
375 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
376 (minus:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
377 (match_operand:HSDIM 2 "nvptx_register_operand" "R")))]
378 ""
379 "%.\\tsub%t0\\t%0, %1, %2;")
380
381(define_insn "mul<mode>3"
382 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
383 (mult:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
384 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
385 ""
386 "%.\\tmul.lo%t0\\t%0, %1, %2;")
387
388(define_insn "*mad<mode>3"
389 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
390 (plus:HSDIM (mult:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
391 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri"))
392 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))]
393 ""
394 "%.\\tmad.lo%t0\\t%0, %1, %2, %3;")
395
396(define_insn "div<mode>3"
397 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
398 (div:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
399 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
400 ""
401 "%.\\tdiv.s%T0\\t%0, %1, %2;")
402
403(define_insn "udiv<mode>3"
404 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
405 (udiv:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
406 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
407 ""
408 "%.\\tdiv.u%T0\\t%0, %1, %2;")
409
410(define_insn "mod<mode>3"
411 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
412 (mod:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "Ri")
413 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
414 ""
415 "%.\\trem.s%T0\\t%0, %1, %2;")
416
417(define_insn "umod<mode>3"
418 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
419 (umod:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "Ri")
420 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
421 ""
422 "%.\\trem.u%T0\\t%0, %1, %2;")
423
424(define_insn "smin<mode>3"
425 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
426 (smin:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
427 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
428 ""
429 "%.\\tmin.s%T0\\t%0, %1, %2;")
430
431(define_insn "umin<mode>3"
432 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
433 (umin:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
434 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
435 ""
436 "%.\\tmin.u%T0\\t%0, %1, %2;")
437
438(define_insn "smax<mode>3"
439 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
440 (smax:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
441 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
442 ""
443 "%.\\tmax.s%T0\\t%0, %1, %2;")
444
445(define_insn "umax<mode>3"
446 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
447 (umax:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
448 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
449 ""
450 "%.\\tmax.u%T0\\t%0, %1, %2;")
451
452(define_insn "abs<mode>2"
453 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
454 (abs:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")))]
455 ""
456 "%.\\tabs.s%T0\\t%0, %1;")
457
458(define_insn "neg<mode>2"
459 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
460 (neg:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")))]
461 ""
462 "%.\\tneg.s%T0\\t%0, %1;")
463
464(define_insn "one_cmpl<mode>2"
465 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
466 (not:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")))]
467 ""
468 "%.\\tnot.b%T0\\t%0, %1;")
469
470(define_insn "bitrev<mode>2"
471 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
472 (unspec:SDIM [(match_operand:SDIM 1 "nvptx_register_operand" "R")]
473 UNSPEC_BITREV))]
474 ""
475 "%.\\tbrev.b%T0\\t%0, %1;")
476
477(define_insn "clz<mode>2"
478 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
479 (clz:SI (match_operand:SDIM 1 "nvptx_register_operand" "R")))]
480 ""
b594ca2b 481 "%.\\tclz.b%T1\\t%0, %1;")
8ce80784 482
483(define_expand "ctz<mode>2"
484 [(set (match_operand:SI 0 "nvptx_register_operand" "")
485 (ctz:SI (match_operand:SDIM 1 "nvptx_register_operand" "")))]
486 ""
487{
488 rtx tmpreg = gen_reg_rtx (<MODE>mode);
489 emit_insn (gen_bitrev<mode>2 (tmpreg, operands[1]));
490 emit_insn (gen_clz<mode>2 (operands[0], tmpreg));
491 DONE;
492})
493
494;; Shifts
495
496(define_insn "ashl<mode>3"
497 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
498 (ashift:SDIM (match_operand:SDIM 1 "nvptx_register_operand" "R")
499 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")))]
500 ""
501 "%.\\tshl.b%T0\\t%0, %1, %2;")
502
503(define_insn "ashr<mode>3"
504 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
505 (ashiftrt:SDIM (match_operand:SDIM 1 "nvptx_register_operand" "R")
506 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")))]
507 ""
508 "%.\\tshr.s%T0\\t%0, %1, %2;")
509
510(define_insn "lshr<mode>3"
511 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
512 (lshiftrt:SDIM (match_operand:SDIM 1 "nvptx_register_operand" "R")
513 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")))]
514 ""
515 "%.\\tshr.u%T0\\t%0, %1, %2;")
516
517;; Logical operations
518
519(define_insn "and<mode>3"
520 [(set (match_operand:BHSDIM 0 "nvptx_register_operand" "=R")
521 (and:BHSDIM (match_operand:BHSDIM 1 "nvptx_register_operand" "R")
522 (match_operand:BHSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
523 ""
524 "%.\\tand.b%T0\\t%0, %1, %2;")
525
526(define_insn "ior<mode>3"
527 [(set (match_operand:BHSDIM 0 "nvptx_register_operand" "=R")
528 (ior:BHSDIM (match_operand:BHSDIM 1 "nvptx_register_operand" "R")
529 (match_operand:BHSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
530 ""
531 "%.\\tor.b%T0\\t%0, %1, %2;")
532
533(define_insn "xor<mode>3"
534 [(set (match_operand:BHSDIM 0 "nvptx_register_operand" "=R")
535 (xor:BHSDIM (match_operand:BHSDIM 1 "nvptx_register_operand" "R")
536 (match_operand:BHSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
537 ""
538 "%.\\txor.b%T0\\t%0, %1, %2;")
539
540;; Comparisons and branches
541
542(define_insn "*cmp<mode>"
543 [(set (match_operand:BI 0 "nvptx_register_operand" "=R")
544 (match_operator:BI 1 "nvptx_comparison_operator"
545 [(match_operand:HSDIM 2 "nvptx_register_operand" "R")
546 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))]
547 ""
b27697ca 548 "%.\\tsetp%c1\\t%0, %2, %3;")
8ce80784 549
550(define_insn "*cmp<mode>"
551 [(set (match_operand:BI 0 "nvptx_register_operand" "=R")
552 (match_operator:BI 1 "nvptx_float_comparison_operator"
553 [(match_operand:SDFM 2 "nvptx_register_operand" "R")
554 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))]
555 ""
b27697ca 556 "%.\\tsetp%c1\\t%0, %2, %3;")
8ce80784 557
558(define_insn "jump"
559 [(set (pc)
560 (label_ref (match_operand 0 "" "")))]
561 ""
562 "%.\\tbra\\t%l0;")
563
564(define_insn "br_true"
565 [(set (pc)
566 (if_then_else (ne (match_operand:BI 0 "nvptx_register_operand" "R")
567 (const_int 0))
568 (label_ref (match_operand 1 "" ""))
569 (pc)))]
570 ""
7fce8768 571 "%j0\\tbra\\t%l1;"
572 [(set_attr "predicable" "false")])
8ce80784 573
574(define_insn "br_false"
575 [(set (pc)
576 (if_then_else (eq (match_operand:BI 0 "nvptx_register_operand" "R")
577 (const_int 0))
578 (label_ref (match_operand 1 "" ""))
579 (pc)))]
580 ""
7fce8768 581 "%J0\\tbra\\t%l1;"
582 [(set_attr "predicable" "false")])
8ce80784 583
b3787ae4 584;; unified conditional branch
585(define_insn "br_true_uni"
586 [(set (pc) (if_then_else
587 (ne (unspec:BI [(match_operand:BI 0 "nvptx_register_operand" "R")]
588 UNSPEC_BR_UNIFIED) (const_int 0))
589 (label_ref (match_operand 1 "" "")) (pc)))]
590 ""
7fce8768 591 "%j0\\tbra.uni\\t%l1;"
592 [(set_attr "predicable" "false")])
b3787ae4 593
594(define_insn "br_false_uni"
595 [(set (pc) (if_then_else
596 (eq (unspec:BI [(match_operand:BI 0 "nvptx_register_operand" "R")]
597 UNSPEC_BR_UNIFIED) (const_int 0))
598 (label_ref (match_operand 1 "" "")) (pc)))]
599 ""
7fce8768 600 "%J0\\tbra.uni\\t%l1;"
601 [(set_attr "predicable" "false")])
b3787ae4 602
8ce80784 603(define_expand "cbranch<mode>4"
604 [(set (pc)
605 (if_then_else (match_operator 0 "nvptx_comparison_operator"
606 [(match_operand:HSDIM 1 "nvptx_register_operand" "")
9d846e45 607 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "")])
8ce80784 608 (label_ref (match_operand 3 "" ""))
609 (pc)))]
610 ""
611{
612 rtx t = nvptx_expand_compare (operands[0]);
613 operands[0] = t;
614 operands[1] = XEXP (t, 0);
615 operands[2] = XEXP (t, 1);
616})
617
618(define_expand "cbranch<mode>4"
619 [(set (pc)
620 (if_then_else (match_operator 0 "nvptx_float_comparison_operator"
621 [(match_operand:SDFM 1 "nvptx_register_operand" "")
9d846e45 622 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "")])
8ce80784 623 (label_ref (match_operand 3 "" ""))
624 (pc)))]
625 ""
626{
627 rtx t = nvptx_expand_compare (operands[0]);
628 operands[0] = t;
629 operands[1] = XEXP (t, 0);
630 operands[2] = XEXP (t, 1);
631})
632
633(define_expand "cbranchbi4"
634 [(set (pc)
635 (if_then_else (match_operator 0 "predicate_operator"
636 [(match_operand:BI 1 "nvptx_register_operand" "")
637 (match_operand:BI 2 "const0_operand" "")])
638 (label_ref (match_operand 3 "" ""))
639 (pc)))]
640 ""
641 "")
642
643;; Conditional stores
644
645(define_insn "setcc_from_bi"
646 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
647 (ne:SI (match_operand:BI 1 "nvptx_register_operand" "R")
648 (const_int 0)))]
649 ""
650 "%.\\tselp%t0 %0,-1,0,%1;")
651
75e09431 652(define_insn "sel_true<mode>"
653 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
654 (if_then_else:HSDIM
655 (ne (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0))
656 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")
657 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))]
658 ""
659 "%.\\tselp%t0\\t%0, %2, %3, %1;")
660
661(define_insn "sel_true<mode>"
662 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
663 (if_then_else:SDFM
664 (ne (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0))
665 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")
666 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))]
667 ""
668 "%.\\tselp%t0\\t%0, %2, %3, %1;")
669
670(define_insn "sel_false<mode>"
671 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
672 (if_then_else:HSDIM
673 (eq (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0))
674 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")
675 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))]
676 ""
677 "%.\\tselp%t0\\t%0, %3, %2, %1;")
678
679(define_insn "sel_false<mode>"
680 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
681 (if_then_else:SDFM
682 (eq (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0))
683 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")
684 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))]
685 ""
686 "%.\\tselp%t0\\t%0, %3, %2, %1;")
687
8ce80784 688(define_insn "setcc_int<mode>"
689 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
690 (match_operator:SI 1 "nvptx_comparison_operator"
75e09431 691 [(match_operand:HSDIM 2 "nvptx_register_operand" "R")
692 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))]
8ce80784 693 ""
b27697ca 694 "%.\\tset%t0%c1\\t%0, %2, %3;")
8ce80784 695
696(define_insn "setcc_int<mode>"
697 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
698 (match_operator:SI 1 "nvptx_float_comparison_operator"
75e09431 699 [(match_operand:SDFM 2 "nvptx_register_operand" "R")
700 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))]
8ce80784 701 ""
b27697ca 702 "%.\\tset%t0%c1\\t%0, %2, %3;")
8ce80784 703
704(define_insn "setcc_float<mode>"
705 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
706 (match_operator:SF 1 "nvptx_comparison_operator"
75e09431 707 [(match_operand:HSDIM 2 "nvptx_register_operand" "R")
708 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))]
8ce80784 709 ""
b27697ca 710 "%.\\tset%t0%c1\\t%0, %2, %3;")
8ce80784 711
712(define_insn "setcc_float<mode>"
713 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
714 (match_operator:SF 1 "nvptx_float_comparison_operator"
75e09431 715 [(match_operand:SDFM 2 "nvptx_register_operand" "R")
716 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))]
8ce80784 717 ""
b27697ca 718 "%.\\tset%t0%c1\\t%0, %2, %3;")
8ce80784 719
720(define_expand "cstorebi4"
721 [(set (match_operand:SI 0 "nvptx_register_operand")
722 (match_operator:SI 1 "ne_operator"
723 [(match_operand:BI 2 "nvptx_register_operand")
724 (match_operand:BI 3 "const0_operand")]))]
725 ""
726 "")
727
728(define_expand "cstore<mode>4"
729 [(set (match_operand:SI 0 "nvptx_register_operand")
730 (match_operator:SI 1 "nvptx_comparison_operator"
731 [(match_operand:HSDIM 2 "nvptx_register_operand")
732 (match_operand:HSDIM 3 "nvptx_nonmemory_operand")]))]
733 ""
734 "")
735
736(define_expand "cstore<mode>4"
737 [(set (match_operand:SI 0 "nvptx_register_operand")
738 (match_operator:SI 1 "nvptx_float_comparison_operator"
739 [(match_operand:SDFM 2 "nvptx_register_operand")
740 (match_operand:SDFM 3 "nvptx_nonmemory_operand")]))]
741 ""
742 "")
743
744;; Calls
745
746(define_insn "call_insn"
747 [(match_parallel 2 "call_operation"
f8e6fa1d 748 [(call (mem:QI (match_operand 0 "call_insn_operand" "Rs"))
8ce80784 749 (match_operand 1))])]
750 ""
751{
752 return nvptx_output_call_insn (insn, NULL_RTX, operands[0]);
753})
754
755(define_insn "call_value_insn"
756 [(match_parallel 3 "call_operation"
757 [(set (match_operand 0 "nvptx_register_operand" "=R")
f8e6fa1d 758 (call (mem:QI (match_operand 1 "call_insn_operand" "Rs"))
8ce80784 759 (match_operand 2)))])]
760 ""
761{
762 return nvptx_output_call_insn (insn, operands[0], operands[1]);
763})
764
765(define_expand "call"
766 [(match_operand 0 "" "")]
767 ""
768{
769 nvptx_expand_call (NULL_RTX, operands[0]);
770 DONE;
771})
772
773(define_expand "call_value"
774 [(match_operand 0 "" "")
775 (match_operand 1 "" "")]
776 ""
777{
778 nvptx_expand_call (operands[0], operands[1]);
779 DONE;
780})
781
782;; Floating point arithmetic.
783
784(define_insn "add<mode>3"
785 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
786 (plus:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
787 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
788 ""
789 "%.\\tadd%t0\\t%0, %1, %2;")
790
791(define_insn "sub<mode>3"
792 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
793 (minus:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
794 (match_operand:SDFM 2 "nvptx_register_operand" "R")))]
795 ""
796 "%.\\tsub%t0\\t%0, %1, %2;")
797
798(define_insn "mul<mode>3"
799 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
800 (mult:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
801 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
802 ""
803 "%.\\tmul%t0\\t%0, %1, %2;")
804
805(define_insn "fma<mode>4"
806 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
807 (fma:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
808 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")
809 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))]
810 ""
811 "%.\\tfma%#%t0\\t%0, %1, %2, %3;")
812
813(define_insn "div<mode>3"
814 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
815 (div:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
816 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
817 ""
818 "%.\\tdiv%#%t0\\t%0, %1, %2;")
819
820(define_insn "copysign<mode>3"
821 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
822 (unspec:SDFM [(match_operand:SDFM 1 "nvptx_register_operand" "R")
823 (match_operand:SDFM 2 "nvptx_register_operand" "R")]
824 UNSPEC_COPYSIGN))]
825 ""
826 "%.\\tcopysign%t0\\t%0, %2, %1;")
827
828(define_insn "smin<mode>3"
829 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
830 (smin:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
831 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
832 ""
833 "%.\\tmin%t0\\t%0, %1, %2;")
834
835(define_insn "smax<mode>3"
836 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
837 (smax:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
838 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
839 ""
840 "%.\\tmax%t0\\t%0, %1, %2;")
841
842(define_insn "abs<mode>2"
843 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
844 (abs:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
845 ""
846 "%.\\tabs%t0\\t%0, %1;")
847
848(define_insn "neg<mode>2"
849 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
850 (neg:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
851 ""
852 "%.\\tneg%t0\\t%0, %1;")
853
854(define_insn "sqrt<mode>2"
855 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
856 (sqrt:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
857 ""
858 "%.\\tsqrt%#%t0\\t%0, %1;")
859
3d380077 860(define_expand "sincossf3"
861 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
862 (unspec:SF [(match_operand:SF 2 "nvptx_register_operand" "R")]
863 UNSPEC_COS))
864 (set (match_operand:SF 1 "nvptx_register_operand" "=R")
865 (unspec:SF [(match_dup 2)] UNSPEC_SIN))]
866 "flag_unsafe_math_optimizations"
867{
868 operands[2] = make_safe_from (operands[2], operands[0]);
869})
870
8ce80784 871(define_insn "sinsf2"
872 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
873 (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")]
874 UNSPEC_SIN))]
875 "flag_unsafe_math_optimizations"
876 "%.\\tsin.approx%t0\\t%0, %1;")
877
878(define_insn "cossf2"
879 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
880 (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")]
881 UNSPEC_COS))]
882 "flag_unsafe_math_optimizations"
883 "%.\\tcos.approx%t0\\t%0, %1;")
884
885(define_insn "log2sf2"
886 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
887 (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")]
888 UNSPEC_LOG2))]
889 "flag_unsafe_math_optimizations"
890 "%.\\tlg2.approx%t0\\t%0, %1;")
891
892(define_insn "exp2sf2"
893 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
894 (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")]
895 UNSPEC_EXP2))]
896 "flag_unsafe_math_optimizations"
897 "%.\\tex2.approx%t0\\t%0, %1;")
898
899;; Conversions involving floating point
900
901(define_insn "extendsfdf2"
902 [(set (match_operand:DF 0 "nvptx_register_operand" "=R")
903 (float_extend:DF (match_operand:SF 1 "nvptx_register_operand" "R")))]
904 ""
905 "%.\\tcvt%t0%t1\\t%0, %1;")
906
907(define_insn "truncdfsf2"
908 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
909 (float_truncate:SF (match_operand:DF 1 "nvptx_register_operand" "R")))]
910 ""
911 "%.\\tcvt%#%t0%t1\\t%0, %1;")
912
913(define_insn "floatunssi<mode>2"
914 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
915 (unsigned_float:SDFM (match_operand:SI 1 "nvptx_register_operand" "R")))]
916 ""
917 "%.\\tcvt%#%t0.u%T1\\t%0, %1;")
918
919(define_insn "floatsi<mode>2"
920 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
921 (float:SDFM (match_operand:SI 1 "nvptx_register_operand" "R")))]
922 ""
923 "%.\\tcvt%#%t0.s%T1\\t%0, %1;")
924
925(define_insn "floatunsdi<mode>2"
926 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
927 (unsigned_float:SDFM (match_operand:DI 1 "nvptx_register_operand" "R")))]
928 ""
929 "%.\\tcvt%#%t0.u%T1\\t%0, %1;")
930
931(define_insn "floatdi<mode>2"
932 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
933 (float:SDFM (match_operand:DI 1 "nvptx_register_operand" "R")))]
934 ""
935 "%.\\tcvt%#%t0.s%T1\\t%0, %1;")
936
937(define_insn "fixuns_trunc<mode>si2"
938 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
939 (unsigned_fix:SI (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
940 ""
941 "%.\\tcvt.rzi.u%T0%t1\\t%0, %1;")
942
943(define_insn "fix_trunc<mode>si2"
944 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
945 (fix:SI (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
946 ""
947 "%.\\tcvt.rzi.s%T0%t1\\t%0, %1;")
948
949(define_insn "fixuns_trunc<mode>di2"
950 [(set (match_operand:DI 0 "nvptx_register_operand" "=R")
951 (unsigned_fix:DI (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
952 ""
953 "%.\\tcvt.rzi.u%T0%t1\\t%0, %1;")
954
955(define_insn "fix_trunc<mode>di2"
956 [(set (match_operand:DI 0 "nvptx_register_operand" "=R")
957 (fix:DI (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
958 ""
959 "%.\\tcvt.rzi.s%T0%t1\\t%0, %1;")
960
961(define_int_iterator FPINT [UNSPEC_FPINT_FLOOR UNSPEC_FPINT_BTRUNC
962 UNSPEC_FPINT_CEIL UNSPEC_FPINT_NEARBYINT])
963(define_int_attr fpint_name [(UNSPEC_FPINT_FLOOR "floor")
964 (UNSPEC_FPINT_BTRUNC "btrunc")
965 (UNSPEC_FPINT_CEIL "ceil")
966 (UNSPEC_FPINT_NEARBYINT "nearbyint")])
967(define_int_attr fpint_roundingmode [(UNSPEC_FPINT_FLOOR ".rmi")
968 (UNSPEC_FPINT_BTRUNC ".rzi")
969 (UNSPEC_FPINT_CEIL ".rpi")
970 (UNSPEC_FPINT_NEARBYINT "%#i")])
971
972(define_insn "<FPINT:fpint_name><SDFM:mode>2"
973 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
974 (unspec:SDFM [(match_operand:SDFM 1 "nvptx_register_operand" "R")]
975 FPINT))]
976 ""
977 "%.\\tcvt<FPINT:fpint_roundingmode>%t0%t1\\t%0, %1;")
978
979(define_int_iterator FPINT2 [UNSPEC_FPINT_FLOOR UNSPEC_FPINT_CEIL])
980(define_int_attr fpint2_name [(UNSPEC_FPINT_FLOOR "lfloor")
981 (UNSPEC_FPINT_CEIL "lceil")])
982(define_int_attr fpint2_roundingmode [(UNSPEC_FPINT_FLOOR ".rmi")
983 (UNSPEC_FPINT_CEIL ".rpi")])
984
985(define_insn "<FPINT2:fpint2_name><SDFM:mode><SDIM:mode>2"
986 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
987 (unspec:SDIM [(match_operand:SDFM 1 "nvptx_register_operand" "R")]
988 FPINT2))]
989 ""
990 "%.\\tcvt<FPINT2:fpint2_roundingmode>.s%T0%t1\\t%0, %1;")
991
992;; Miscellaneous
993
994(define_insn "nop"
995 [(const_int 0)]
996 ""
997 "")
998
f3c313db 999(define_insn "exit"
1000 [(const_int 1)]
1001 ""
1002 "exit;")
1003
47282587 1004(define_insn "fake_nop"
1005 [(const_int 2)]
1006 ""
1007 "{
1008 .reg .u32 %%nop_src;
1009 .reg .u32 %%nop_dst;
1010 mov.u32 %%nop_dst, %%nop_src;
1011 }")
1012
8ce80784 1013(define_insn "return"
1014 [(return)]
1015 ""
1016{
1017 return nvptx_output_return ();
7fce8768 1018}
1019 [(set_attr "predicable" "false")])
8ce80784 1020
1021(define_expand "epilogue"
1022 [(clobber (const_int 0))]
1023 ""
1024{
7fce8768 1025 if (TARGET_SOFT_STACK)
1026 emit_insn (gen_set_softstack_insn (gen_rtx_REG (Pmode,
1027 SOFTSTACK_PREV_REGNUM)));
8ce80784 1028 emit_jump_insn (gen_return ());
1029 DONE;
1030})
1031
1032(define_expand "nonlocal_goto"
1033 [(match_operand 0 "" "")
1034 (match_operand 1 "" "")
1035 (match_operand 2 "" "")
1036 (match_operand 3 "" "")]
1037 ""
1038{
1039 sorry ("target cannot support nonlocal goto.");
1040 emit_insn (gen_nop ());
1041 DONE;
1042})
1043
1044(define_expand "nonlocal_goto_receiver"
1045 [(const_int 0)]
1046 ""
1047{
1048 sorry ("target cannot support nonlocal goto.");
1049})
1050
ed20400d 1051(define_expand "allocate_stack"
1052 [(match_operand 0 "nvptx_register_operand")
1053 (match_operand 1 "nvptx_register_operand")]
1054 ""
1055{
7fce8768 1056 if (TARGET_SOFT_STACK)
1057 {
1058 emit_move_insn (stack_pointer_rtx,
1059 gen_rtx_MINUS (Pmode, stack_pointer_rtx, operands[1]));
1060 emit_insn (gen_set_softstack_insn (stack_pointer_rtx));
1061 emit_move_insn (operands[0], virtual_stack_dynamic_rtx);
1062 DONE;
1063 }
f289122f 1064 /* The ptx documentation specifies an alloca intrinsic (for 32 bit
1065 only) but notes it is not implemented. The assembler emits a
1066 confused error message. Issue a blunt one now instead. */
1067 sorry ("target cannot support alloca.");
1068 emit_insn (gen_nop ());
1069 DONE;
ed20400d 1070})
1071
7fce8768 1072(define_insn "set_softstack_insn"
1073 [(unspec [(match_operand 0 "nvptx_register_operand" "R")]
1074 UNSPEC_SET_SOFTSTACK)]
1075 "TARGET_SOFT_STACK"
1076{
1077 return nvptx_output_set_softstack (REGNO (operands[0]));
1078})
8ce80784 1079
1080(define_expand "restore_stack_block"
1081 [(match_operand 0 "register_operand" "")
1082 (match_operand 1 "register_operand" "")]
1083 ""
1084{
7fce8768 1085 if (TARGET_SOFT_STACK)
1086 {
1087 emit_move_insn (operands[0], operands[1]);
1088 emit_insn (gen_set_softstack_insn (operands[0]));
1089 }
8ce80784 1090 DONE;
1091})
1092
1093(define_expand "restore_stack_function"
1094 [(match_operand 0 "register_operand" "")
1095 (match_operand 1 "register_operand" "")]
1096 ""
1097{
1098 DONE;
1099})
1100
1101(define_insn "trap"
1102 [(trap_if (const_int 1) (const_int 0))]
1103 ""
38017cab 1104 "trap; exit;")
8ce80784 1105
1106(define_insn "trap_if_true"
1107 [(trap_if (ne (match_operand:BI 0 "nvptx_register_operand" "R")
1108 (const_int 0))
1109 (const_int 0))]
1110 ""
38017cab 1111 "%j0 trap; %j0 exit;"
7fce8768 1112 [(set_attr "predicable" "false")])
8ce80784 1113
1114(define_insn "trap_if_false"
1115 [(trap_if (eq (match_operand:BI 0 "nvptx_register_operand" "R")
1116 (const_int 0))
1117 (const_int 0))]
1118 ""
38017cab 1119 "%J0 trap; %J0 exit;"
7fce8768 1120 [(set_attr "predicable" "false")])
8ce80784 1121
1122(define_expand "ctrap<mode>4"
1123 [(trap_if (match_operator 0 "nvptx_comparison_operator"
1124 [(match_operand:SDIM 1 "nvptx_register_operand")
1125 (match_operand:SDIM 2 "nvptx_nonmemory_operand")])
25ce1bcb 1126 (match_operand 3 "const0_operand"))]
8ce80784 1127 ""
1128{
1129 rtx t = nvptx_expand_compare (operands[0]);
1130 emit_insn (gen_trap_if_true (t));
1131 DONE;
1132})
1133
b3787ae4 1134(define_insn "oacc_dim_size"
1135 [(set (match_operand:SI 0 "nvptx_register_operand" "")
1136 (unspec:SI [(match_operand:SI 1 "const_int_operand" "")]
1137 UNSPEC_DIM_SIZE))]
8ce80784 1138 ""
b3787ae4 1139{
1140 static const char *const asms[] =
1141{ /* Must match oacc_loop_levels ordering. */
1142 "%.\\tmov.u32\\t%0, %%nctaid.x;", /* gang */
1143 "%.\\tmov.u32\\t%0, %%ntid.y;", /* worker */
1144 "%.\\tmov.u32\\t%0, %%ntid.x;", /* vector */
1145};
1146 return asms[INTVAL (operands[1])];
1147})
8ce80784 1148
b3787ae4 1149(define_insn "oacc_dim_pos"
8ce80784 1150 [(set (match_operand:SI 0 "nvptx_register_operand" "")
b3787ae4 1151 (unspec_volatile:SI [(match_operand:SI 1 "const_int_operand" "")]
1152 UNSPECV_DIM_POS))]
8ce80784 1153 ""
1154{
b3787ae4 1155 static const char *const asms[] =
1156{ /* Must match oacc_loop_levels ordering. */
1157 "%.\\tmov.u32\\t%0, %%ctaid.x;", /* gang */
1158 "%.\\tmov.u32\\t%0, %%tid.y;", /* worker */
1159 "%.\\tmov.u32\\t%0, %%tid.x;", /* vector */
1160};
1161 return asms[INTVAL (operands[1])];
8ce80784 1162})
1163
b3787ae4 1164(define_insn "nvptx_fork"
1165 [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
1166 UNSPECV_FORK)]
8ce80784 1167 ""
b3787ae4 1168 "// fork %0;"
7fce8768 1169 [(set_attr "predicable" "false")])
8ce80784 1170
b3787ae4 1171(define_insn "nvptx_forked"
1172 [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
1173 UNSPECV_FORKED)]
1174 ""
1175 "// forked %0;"
7fce8768 1176 [(set_attr "predicable" "false")])
b3787ae4 1177
1178(define_insn "nvptx_joining"
1179 [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
1180 UNSPECV_JOINING)]
1181 ""
1182 "// joining %0;"
7fce8768 1183 [(set_attr "predicable" "false")])
b3787ae4 1184
1185(define_insn "nvptx_join"
1186 [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
1187 UNSPECV_JOIN)]
1188 ""
1189 "// join %0;"
7fce8768 1190 [(set_attr "predicable" "false")])
b3787ae4 1191
1192(define_expand "oacc_fork"
1193 [(set (match_operand:SI 0 "nvptx_nonmemory_operand" "")
6bd291cd 1194 (match_operand:SI 1 "general_operand" ""))
b3787ae4 1195 (unspec_volatile:SI [(match_operand:SI 2 "const_int_operand" "")]
1196 UNSPECV_FORKED)]
8ce80784 1197 ""
1198{
b3787ae4 1199 if (operands[0] != const0_rtx)
1200 emit_move_insn (operands[0], operands[1]);
1201 nvptx_expand_oacc_fork (INTVAL (operands[2]));
1202 DONE;
1203})
1204
1205(define_expand "oacc_join"
1206 [(set (match_operand:SI 0 "nvptx_nonmemory_operand" "")
6bd291cd 1207 (match_operand:SI 1 "general_operand" ""))
b3787ae4 1208 (unspec_volatile:SI [(match_operand:SI 2 "const_int_operand" "")]
1209 UNSPECV_JOIN)]
1210 ""
1211{
1212 if (operands[0] != const0_rtx)
1213 emit_move_insn (operands[0], operands[1]);
1214 nvptx_expand_oacc_join (INTVAL (operands[2]));
1215 DONE;
8ce80784 1216})
1217
b3787ae4 1218;; only 32-bit shuffles exist.
1219(define_insn "nvptx_shuffle<mode>"
1220 [(set (match_operand:BITS 0 "nvptx_register_operand" "=R")
1221 (unspec:BITS
1222 [(match_operand:BITS 1 "nvptx_register_operand" "R")
1223 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")
1224 (match_operand:SI 3 "const_int_operand" "n")]
1225 UNSPEC_SHUFFLE))]
1226 ""
1227 "%.\\tshfl%S3.b32\\t%0, %1, %2, 31;")
1228
7fce8768 1229(define_insn "nvptx_vote_ballot"
1230 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
1231 (unspec:SI [(match_operand:BI 1 "nvptx_register_operand" "R")]
1232 UNSPEC_VOTE_BALLOT))]
1233 ""
1234 "%.\\tvote.ballot.b32\\t%0, %1;")
1235
1236;; Patterns for OpenMP SIMD-via-SIMT lowering
1237
1b576300 1238(define_insn "omp_simt_enter_insn"
1239 [(set (match_operand 0 "nvptx_register_operand" "=R")
1240 (unspec_volatile [(match_operand 1 "nvptx_nonmemory_operand" "Ri")
1241 (match_operand 2 "nvptx_nonmemory_operand" "Ri")]
1242 UNSPECV_SIMT_ENTER))]
1243 ""
1244{
1245 return nvptx_output_simt_enter (operands[0], operands[1], operands[2]);
1246})
1247
1248(define_expand "omp_simt_enter"
1249 [(match_operand 0 "nvptx_register_operand" "=R")
1250 (match_operand 1 "nvptx_nonmemory_operand" "Ri")
1251 (match_operand 2 "const_int_operand" "n")]
1252 ""
1253{
1254 if (!CONST_INT_P (operands[1]))
1255 cfun->machine->simt_stack_size = HOST_WIDE_INT_M1U;
1256 else
1257 cfun->machine->simt_stack_size = MAX (UINTVAL (operands[1]),
1258 cfun->machine->simt_stack_size);
1259 cfun->machine->simt_stack_align = MAX (UINTVAL (operands[2]),
1260 cfun->machine->simt_stack_align);
1261 cfun->machine->has_simtreg = true;
1262 emit_insn (gen_omp_simt_enter_insn (operands[0], operands[1], operands[2]));
1263 DONE;
1264})
1265
1266(define_insn "omp_simt_exit"
1267 [(unspec_volatile [(match_operand 0 "nvptx_register_operand" "R")]
1268 UNSPECV_SIMT_EXIT)]
1269 ""
1270{
1271 return nvptx_output_simt_exit (operands[0]);
1272})
1273
7fce8768 1274;; Implement IFN_GOMP_SIMT_LANE: set operand 0 to lane index
1275(define_insn "omp_simt_lane"
1276 [(set (match_operand:SI 0 "nvptx_register_operand" "")
1277 (unspec:SI [(const_int 0)] UNSPEC_LANEID))]
1278 ""
1279 "%.\\tmov.u32\\t%0, %%laneid;")
1280
1281;; Implement IFN_GOMP_SIMT_ORDERED: copy operand 1 to operand 0 and
1282;; place a compiler barrier to disallow unrolling/peeling the containing loop
1283(define_expand "omp_simt_ordered"
1284 [(match_operand:SI 0 "nvptx_register_operand" "=R")
1285 (match_operand:SI 1 "nvptx_register_operand" "R")]
1286 ""
1287{
1288 emit_move_insn (operands[0], operands[1]);
1289 emit_insn (gen_nvptx_nounroll ());
1290 DONE;
1291})
1292
1293;; Implement IFN_GOMP_SIMT_XCHG_BFLY: perform a "butterfly" exchange
1294;; across lanes
1295(define_expand "omp_simt_xchg_bfly"
1296 [(match_operand 0 "nvptx_register_operand" "=R")
1297 (match_operand 1 "nvptx_register_operand" "R")
1298 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")]
1299 ""
1300{
1301 emit_insn (nvptx_gen_shuffle (operands[0], operands[1], operands[2],
1302 SHUFFLE_BFLY));
1303 DONE;
1304})
1305
1306;; Implement IFN_GOMP_SIMT_XCHG_IDX: broadcast value in operand 1
1307;; from lane given by index in operand 2 to operand 0 in all lanes
1308(define_expand "omp_simt_xchg_idx"
1309 [(match_operand 0 "nvptx_register_operand" "=R")
1310 (match_operand 1 "nvptx_register_operand" "R")
1311 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")]
1312 ""
1313{
1314 emit_insn (nvptx_gen_shuffle (operands[0], operands[1], operands[2],
1315 SHUFFLE_IDX));
1316 DONE;
1317})
1318
1319;; Implement IFN_GOMP_SIMT_VOTE_ANY:
1320;; set operand 0 to zero iff all lanes supply zero in operand 1
1321(define_expand "omp_simt_vote_any"
1322 [(match_operand:SI 0 "nvptx_register_operand" "=R")
1323 (match_operand:SI 1 "nvptx_register_operand" "R")]
1324 ""
1325{
1326 rtx pred = gen_reg_rtx (BImode);
1327 emit_move_insn (pred, gen_rtx_NE (BImode, operands[1], const0_rtx));
1328 emit_insn (gen_nvptx_vote_ballot (operands[0], pred));
1329 DONE;
1330})
1331
1332;; Implement IFN_GOMP_SIMT_LAST_LANE:
1333;; set operand 0 to the lowest lane index that passed non-zero in operand 1
1334(define_expand "omp_simt_last_lane"
1335 [(match_operand:SI 0 "nvptx_register_operand" "=R")
1336 (match_operand:SI 1 "nvptx_register_operand" "R")]
1337 ""
1338{
1339 rtx pred = gen_reg_rtx (BImode);
1340 rtx tmp = gen_reg_rtx (SImode);
1341 emit_move_insn (pred, gen_rtx_NE (BImode, operands[1], const0_rtx));
1342 emit_insn (gen_nvptx_vote_ballot (tmp, pred));
1343 emit_insn (gen_ctzsi2 (operands[0], tmp));
1344 DONE;
1345})
1346
b3787ae4 1347;; extract parts of a 64 bit object into 2 32-bit ints
1348(define_insn "unpack<mode>si2"
1349 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
1350 (unspec:SI [(match_operand:BITD 2 "nvptx_register_operand" "R")
1351 (const_int 0)] UNSPEC_BIT_CONV))
1352 (set (match_operand:SI 1 "nvptx_register_operand" "=R")
1353 (unspec:SI [(match_dup 2) (const_int 1)] UNSPEC_BIT_CONV))]
1354 ""
1355 "%.\\tmov.b64\\t{%0,%1}, %2;")
1356
1357;; pack 2 32-bit ints into a 64 bit object
1358(define_insn "packsi<mode>2"
1359 [(set (match_operand:BITD 0 "nvptx_register_operand" "=R")
1360 (unspec:BITD [(match_operand:SI 1 "nvptx_register_operand" "R")
1361 (match_operand:SI 2 "nvptx_register_operand" "R")]
1362 UNSPEC_BIT_CONV))]
1363 ""
1364 "%.\\tmov.b64\\t%0, {%1,%2};")
1365
8ce80784 1366;; Atomic insns.
1367
1368(define_expand "atomic_compare_and_swap<mode>"
1369 [(match_operand:SI 0 "nvptx_register_operand") ;; bool success output
1370 (match_operand:SDIM 1 "nvptx_register_operand") ;; oldval output
1371 (match_operand:SDIM 2 "memory_operand") ;; memory
1372 (match_operand:SDIM 3 "nvptx_register_operand") ;; expected input
1373 (match_operand:SDIM 4 "nvptx_register_operand") ;; newval input
1374 (match_operand:SI 5 "const_int_operand") ;; is_weak
1375 (match_operand:SI 6 "const_int_operand") ;; success model
1376 (match_operand:SI 7 "const_int_operand")] ;; failure model
1377 ""
1378{
c68f6b1c 1379 emit_insn (gen_atomic_compare_and_swap<mode>_1
1380 (operands[1], operands[2], operands[3], operands[4], operands[6]));
1381
1382 rtx cond = gen_reg_rtx (BImode);
1383 emit_move_insn (cond, gen_rtx_EQ (BImode, operands[1], operands[3]));
1384 emit_insn (gen_sel_truesi (operands[0], cond, GEN_INT (1), GEN_INT (0)));
8ce80784 1385 DONE;
1386})
1387
1388(define_insn "atomic_compare_and_swap<mode>_1"
1389 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
1390 (unspec_volatile:SDIM
1391 [(match_operand:SDIM 1 "memory_operand" "+m")
89f6d4a2 1392 (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri")
1393 (match_operand:SDIM 3 "nvptx_nonmemory_operand" "Ri")
8ce80784 1394 (match_operand:SI 4 "const_int_operand")]
1395 UNSPECV_CAS))
1396 (set (match_dup 1)
1397 (unspec_volatile:SDIM [(const_int 0)] UNSPECV_CAS))]
1398 ""
7fce8768 1399 "%.\\tatom%A1.cas.b%T0\\t%0, %1, %2, %3;"
1400 [(set_attr "atomic" "true")])
8ce80784 1401
1402(define_insn "atomic_exchange<mode>"
1403 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") ;; output
1404 (unspec_volatile:SDIM
1405 [(match_operand:SDIM 1 "memory_operand" "+m") ;; memory
1406 (match_operand:SI 3 "const_int_operand")] ;; model
1407 UNSPECV_XCHG))
1408 (set (match_dup 1)
89f6d4a2 1409 (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri"))] ;; input
8ce80784 1410 ""
7fce8768 1411 "%.\\tatom%A1.exch.b%T0\\t%0, %1, %2;"
1412 [(set_attr "atomic" "true")])
8ce80784 1413
1414(define_insn "atomic_fetch_add<mode>"
1415 [(set (match_operand:SDIM 1 "memory_operand" "+m")
1416 (unspec_volatile:SDIM
1417 [(plus:SDIM (match_dup 1)
1418 (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri"))
1419 (match_operand:SI 3 "const_int_operand")] ;; model
1420 UNSPECV_LOCK))
1421 (set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
1422 (match_dup 1))]
1423 ""
7fce8768 1424 "%.\\tatom%A1.add%t0\\t%0, %1, %2;"
1425 [(set_attr "atomic" "true")])
8ce80784 1426
1427(define_insn "atomic_fetch_addsf"
1428 [(set (match_operand:SF 1 "memory_operand" "+m")
1429 (unspec_volatile:SF
1430 [(plus:SF (match_dup 1)
1431 (match_operand:SF 2 "nvptx_nonmemory_operand" "RF"))
1432 (match_operand:SI 3 "const_int_operand")] ;; model
1433 UNSPECV_LOCK))
1434 (set (match_operand:SF 0 "nvptx_register_operand" "=R")
1435 (match_dup 1))]
1436 ""
7fce8768 1437 "%.\\tatom%A1.add%t0\\t%0, %1, %2;"
1438 [(set_attr "atomic" "true")])
8ce80784 1439
1440(define_code_iterator any_logic [and ior xor])
1441(define_code_attr logic [(and "and") (ior "or") (xor "xor")])
1442
8ce80784 1443(define_insn "atomic_fetch_<logic><mode>"
1444 [(set (match_operand:SDIM 1 "memory_operand" "+m")
1445 (unspec_volatile:SDIM
1446 [(any_logic:SDIM (match_dup 1)
1447 (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri"))
1448 (match_operand:SI 3 "const_int_operand")] ;; model
1449 UNSPECV_LOCK))
1450 (set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
1451 (match_dup 1))]
ab841e4d 1452 "<MODE>mode == SImode || TARGET_SM35"
7fce8768 1453 "%.\\tatom%A1.b%T0.<logic>\\t%0, %1, %2;"
1454 [(set_attr "atomic" "true")])
b3787ae4 1455
1456(define_insn "nvptx_barsync"
1457 [(unspec_volatile [(match_operand:SI 0 "const_int_operand" "")]
1458 UNSPECV_BARSYNC)]
1459 ""
7fce8768 1460 "\\tbar.sync\\t%0;"
1461 [(set_attr "predicable" "false")])
1462
e163ceb5 1463(define_expand "memory_barrier"
1464 [(set (match_dup 0)
1465 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR))]
1466 ""
1467{
1468 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
1469 MEM_VOLATILE_P (operands[0]) = 1;
1470})
1471
1472;; Ptx defines the memory barriers membar.cta, membar.gl and membar.sys
1473;; (corresponding to cuda functions threadfence_block, threadfence and
1474;; threadfence_system). For the insn memory_barrier we use membar.sys. This
1475;; may be overconservative, but before using membar.gl instead we'll need to
1476;; explain in detail why it's safe to use. For now, use membar.sys.
1477(define_insn "*memory_barrier"
1478 [(set (match_operand:BLK 0 "" "")
1479 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR))]
1480 ""
1481 "\\tmembar.sys;"
1482 [(set_attr "predicable" "false")])
1483
19b20a5c 1484(define_expand "nvptx_membar_cta"
1485 [(set (match_dup 0)
1486 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR_CTA))]
1487 ""
1488{
1489 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
1490 MEM_VOLATILE_P (operands[0]) = 1;
1491})
1492
1493(define_insn "*nvptx_membar_cta"
1494 [(set (match_operand:BLK 0 "" "")
1495 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR_CTA))]
1496 ""
1497 "\\tmembar.cta;"
1498 [(set_attr "predicable" "false")])
1499
7fce8768 1500(define_insn "nvptx_nounroll"
1501 [(unspec_volatile [(const_int 0)] UNSPECV_NOUNROLL)]
1502 ""
1503 "\\t.pragma \\\"nounroll\\\";"
1504 [(set_attr "predicable" "false")])