]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/nvptx/nvptx.md
Update copyright years.
[thirdparty/gcc.git] / gcc / config / nvptx / nvptx.md
1 ;; Machine description for NVPTX.
2 ;; Copyright (C) 2014-2019 Free Software Foundation, Inc.
3 ;; Contributed by Bernd Schmidt <bernds@codesourcery.com>
4 ;;
5 ;; This file is part of GCC.
6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
10 ;; any later version.
11 ;;
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
16 ;;
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
20
21 (define_c_enum "unspec" [
22 UNSPEC_ARG_REG
23
24 UNSPEC_COPYSIGN
25 UNSPEC_LOG2
26 UNSPEC_EXP2
27 UNSPEC_SIN
28 UNSPEC_COS
29
30 UNSPEC_FPINT_FLOOR
31 UNSPEC_FPINT_BTRUNC
32 UNSPEC_FPINT_CEIL
33 UNSPEC_FPINT_NEARBYINT
34
35 UNSPEC_BITREV
36
37 UNSPEC_ALLOCA
38
39 UNSPEC_SET_SOFTSTACK
40
41 UNSPEC_DIM_SIZE
42
43 UNSPEC_BIT_CONV
44
45 UNSPEC_VOTE_BALLOT
46
47 UNSPEC_LANEID
48
49 UNSPEC_SHUFFLE
50 UNSPEC_BR_UNIFIED
51 ])
52
53 (define_c_enum "unspecv" [
54 UNSPECV_LOCK
55 UNSPECV_CAS
56 UNSPECV_XCHG
57 UNSPECV_BARSYNC
58 UNSPECV_MEMBAR
59 UNSPECV_MEMBAR_CTA
60 UNSPECV_DIM_POS
61
62 UNSPECV_FORK
63 UNSPECV_FORKED
64 UNSPECV_JOINING
65 UNSPECV_JOIN
66
67 UNSPECV_NOUNROLL
68
69 UNSPECV_SIMT_ENTER
70 UNSPECV_SIMT_EXIT
71 ])
72
73 (define_attr "subregs_ok" "false,true"
74 (const_string "false"))
75
76 (define_attr "atomic" "false,true"
77 (const_string "false"))
78
79 ;; The nvptx operand predicates, in general, don't permit subregs and
80 ;; only literal constants, which differ from the generic ones, which
81 ;; permit subregs and symbolc constants (as appropriate)
82 (define_predicate "nvptx_register_operand"
83 (match_code "reg")
84 {
85 return register_operand (op, mode);
86 })
87
88 (define_predicate "nvptx_nonimmediate_operand"
89 (match_code "mem,reg")
90 {
91 return (REG_P (op) ? register_operand (op, mode)
92 : memory_operand (op, mode));
93 })
94
95 (define_predicate "nvptx_nonmemory_operand"
96 (match_code "reg,const_int,const_double")
97 {
98 return (REG_P (op) ? register_operand (op, mode)
99 : immediate_operand (op, mode));
100 })
101
102 (define_predicate "const0_operand"
103 (and (match_code "const_int")
104 (match_test "op == const0_rtx")))
105
106 ;; True if this operator is valid for predication.
107 (define_predicate "predicate_operator"
108 (match_code "eq,ne"))
109
110 (define_predicate "ne_operator"
111 (match_code "ne"))
112
113 (define_predicate "nvptx_comparison_operator"
114 (match_code "eq,ne,le,ge,lt,gt,leu,geu,ltu,gtu"))
115
116 (define_predicate "nvptx_float_comparison_operator"
117 (match_code "eq,ne,le,ge,lt,gt,uneq,unle,unge,unlt,ungt,unordered,ordered"))
118
119 ;; Test for a valid operand for a call instruction.
120 (define_predicate "call_insn_operand"
121 (match_code "symbol_ref,reg")
122 {
123 return REG_P (op) || SYMBOL_REF_FUNCTION_P (op);
124 })
125
126 ;; Return true if OP is a call with parallel USEs of the argument
127 ;; pseudos.
128 (define_predicate "call_operation"
129 (match_code "parallel")
130 {
131 int arg_end = XVECLEN (op, 0);
132
133 for (int i = 1; i < arg_end; i++)
134 {
135 rtx elt = XVECEXP (op, 0, i);
136
137 if (GET_CODE (elt) != USE || !REG_P (XEXP (elt, 0)))
138 return false;
139 }
140 return true;
141 })
142
143 (define_attr "predicable" "false,true"
144 (const_string "true"))
145
146 (define_cond_exec
147 [(match_operator 0 "predicate_operator"
148 [(match_operand:BI 1 "nvptx_register_operand" "")
149 (match_operand:BI 2 "const0_operand" "")])]
150 ""
151 ""
152 )
153
154 (define_constraint "P0"
155 "An integer with the value 0."
156 (and (match_code "const_int")
157 (match_test "ival == 0")))
158
159 (define_constraint "P1"
160 "An integer with the value 1."
161 (and (match_code "const_int")
162 (match_test "ival == 1")))
163
164 (define_constraint "Pn"
165 "An integer with the value -1."
166 (and (match_code "const_int")
167 (match_test "ival == -1")))
168
169 (define_constraint "R"
170 "A pseudo register."
171 (match_code "reg"))
172
173 (define_constraint "Ia"
174 "Any integer constant."
175 (and (match_code "const_int") (match_test "true")))
176
177 (define_mode_iterator QHSDISDFM [QI HI SI DI SF DF])
178 (define_mode_iterator QHSDIM [QI HI SI DI])
179 (define_mode_iterator HSDIM [HI SI DI])
180 (define_mode_iterator BHSDIM [BI HI SI DI])
181 (define_mode_iterator SDIM [SI DI])
182 (define_mode_iterator SDISDFM [SI DI SF DF])
183 (define_mode_iterator QHIM [QI HI])
184 (define_mode_iterator QHSIM [QI HI SI])
185 (define_mode_iterator SDFM [SF DF])
186 (define_mode_iterator SDCM [SC DC])
187 (define_mode_iterator BITS [SI SF])
188 (define_mode_iterator BITD [DI DF])
189 (define_mode_iterator VECIM [V2SI V2DI])
190
191 ;; This mode iterator allows :P to be used for patterns that operate on
192 ;; pointer-sized quantities. Exactly one of the two alternatives will match.
193 (define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")])
194
195 ;; We should get away with not defining memory alternatives, since we don't
196 ;; get variables in this mode and pseudos are never spilled.
197 (define_insn "movbi"
198 [(set (match_operand:BI 0 "nvptx_register_operand" "=R,R,R")
199 (match_operand:BI 1 "nvptx_nonmemory_operand" "R,P0,Pn"))]
200 ""
201 "@
202 %.\\tmov%t0\\t%0, %1;
203 %.\\tsetp.eq.u32\\t%0, 1, 0;
204 %.\\tsetp.eq.u32\\t%0, 1, 1;")
205
206 (define_insn "*mov<mode>_insn"
207 [(set (match_operand:VECIM 0 "nonimmediate_operand" "=R,R,m")
208 (match_operand:VECIM 1 "general_operand" "Ri,m,R"))]
209 "!MEM_P (operands[0]) || REG_P (operands[1])"
210 {
211 if (which_alternative == 1)
212 return "%.\\tld%A1%u1\\t%0, %1;";
213 if (which_alternative == 2)
214 return "%.\\tst%A0%u0\\t%0, %1;";
215
216 return nvptx_output_mov_insn (operands[0], operands[1]);
217 }
218 [(set_attr "subregs_ok" "true")])
219
220 (define_insn "*mov<mode>_insn"
221 [(set (match_operand:QHSDIM 0 "nonimmediate_operand" "=R,R,m")
222 (match_operand:QHSDIM 1 "general_operand" "Ri,m,R"))]
223 "!MEM_P (operands[0]) || REG_P (operands[1])"
224 {
225 if (which_alternative == 1)
226 return "%.\\tld%A1%u1\\t%0, %1;";
227 if (which_alternative == 2)
228 return "%.\\tst%A0%u0\\t%0, %1;";
229
230 return nvptx_output_mov_insn (operands[0], operands[1]);
231 }
232 [(set_attr "subregs_ok" "true")])
233
234 (define_insn "*mov<mode>_insn"
235 [(set (match_operand:SDFM 0 "nonimmediate_operand" "=R,R,m")
236 (match_operand:SDFM 1 "general_operand" "RF,m,R"))]
237 "!MEM_P (operands[0]) || REG_P (operands[1])"
238 {
239 if (which_alternative == 1)
240 return "%.\\tld%A1%u0\\t%0, %1;";
241 if (which_alternative == 2)
242 return "%.\\tst%A0%u1\\t%0, %1;";
243
244 return nvptx_output_mov_insn (operands[0], operands[1]);
245 }
246 [(set_attr "subregs_ok" "true")])
247
248 (define_insn "load_arg_reg<mode>"
249 [(set (match_operand:QHIM 0 "nvptx_register_operand" "=R")
250 (unspec:QHIM [(match_operand 1 "const_int_operand" "n")]
251 UNSPEC_ARG_REG))]
252 ""
253 "%.\\tcvt%t0.u32\\t%0, %%ar%1;")
254
255 (define_insn "load_arg_reg<mode>"
256 [(set (match_operand:SDISDFM 0 "nvptx_register_operand" "=R")
257 (unspec:SDISDFM [(match_operand 1 "const_int_operand" "n")]
258 UNSPEC_ARG_REG))]
259 ""
260 "%.\\tmov%t0\\t%0, %%ar%1;")
261
262 (define_expand "mov<mode>"
263 [(set (match_operand:VECIM 0 "nonimmediate_operand" "")
264 (match_operand:VECIM 1 "general_operand" ""))]
265 ""
266 {
267 if (MEM_P (operands[0]) && !REG_P (operands[1]))
268 {
269 rtx tmp = gen_reg_rtx (<MODE>mode);
270 emit_move_insn (tmp, operands[1]);
271 emit_move_insn (operands[0], tmp);
272 DONE;
273 }
274 })
275
276 (define_expand "mov<mode>"
277 [(set (match_operand:QHSDISDFM 0 "nonimmediate_operand" "")
278 (match_operand:QHSDISDFM 1 "general_operand" ""))]
279 ""
280 {
281 if (MEM_P (operands[0]) && !REG_P (operands[1]))
282 {
283 rtx tmp = gen_reg_rtx (<MODE>mode);
284 emit_move_insn (tmp, operands[1]);
285 emit_move_insn (operands[0], tmp);
286 DONE;
287 }
288
289 if (GET_CODE (operands[1]) == LABEL_REF)
290 sorry ("target cannot support label values");
291 })
292
293 (define_insn "zero_extendqihi2"
294 [(set (match_operand:HI 0 "nvptx_register_operand" "=R,R")
295 (zero_extend:HI (match_operand:QI 1 "nvptx_nonimmediate_operand" "R,m")))]
296 ""
297 "@
298 %.\\tcvt.u16.u%T1\\t%0, %1;
299 %.\\tld%A1.u8\\t%0, %1;"
300 [(set_attr "subregs_ok" "true")])
301
302 (define_insn "zero_extend<mode>si2"
303 [(set (match_operand:SI 0 "nvptx_register_operand" "=R,R")
304 (zero_extend:SI (match_operand:QHIM 1 "nvptx_nonimmediate_operand" "R,m")))]
305 ""
306 "@
307 %.\\tcvt.u32.u%T1\\t%0, %1;
308 %.\\tld%A1.u%T1\\t%0, %1;"
309 [(set_attr "subregs_ok" "true")])
310
311 (define_insn "zero_extend<mode>di2"
312 [(set (match_operand:DI 0 "nvptx_register_operand" "=R,R")
313 (zero_extend:DI (match_operand:QHSIM 1 "nvptx_nonimmediate_operand" "R,m")))]
314 ""
315 "@
316 %.\\tcvt.u64.u%T1\\t%0, %1;
317 %.\\tld%A1%u1\\t%0, %1;"
318 [(set_attr "subregs_ok" "true")])
319
320 (define_insn "extend<mode>si2"
321 [(set (match_operand:SI 0 "nvptx_register_operand" "=R,R")
322 (sign_extend:SI (match_operand:QHIM 1 "nvptx_nonimmediate_operand" "R,m")))]
323 ""
324 "@
325 %.\\tcvt.s32.s%T1\\t%0, %1;
326 %.\\tld%A1.s%T1\\t%0, %1;"
327 [(set_attr "subregs_ok" "true")])
328
329 (define_insn "extend<mode>di2"
330 [(set (match_operand:DI 0 "nvptx_register_operand" "=R,R")
331 (sign_extend:DI (match_operand:QHSIM 1 "nvptx_nonimmediate_operand" "R,m")))]
332 ""
333 "@
334 %.\\tcvt.s64.s%T1\\t%0, %1;
335 %.\\tld%A1.s%T1\\t%0, %1;"
336 [(set_attr "subregs_ok" "true")])
337
338 (define_insn "trunchiqi2"
339 [(set (match_operand:QI 0 "nvptx_nonimmediate_operand" "=R,m")
340 (truncate:QI (match_operand:HI 1 "nvptx_register_operand" "R,R")))]
341 ""
342 "@
343 %.\\tcvt%t0.u16\\t%0, %1;
344 %.\\tst%A0.u8\\t%0, %1;"
345 [(set_attr "subregs_ok" "true")])
346
347 (define_insn "truncsi<mode>2"
348 [(set (match_operand:QHIM 0 "nvptx_nonimmediate_operand" "=R,m")
349 (truncate:QHIM (match_operand:SI 1 "nvptx_register_operand" "R,R")))]
350 ""
351 "@
352 %.\\tcvt%t0.u32\\t%0, %1;
353 %.\\tst%A0.u%T0\\t%0, %1;"
354 [(set_attr "subregs_ok" "true")])
355
356 (define_insn "truncdi<mode>2"
357 [(set (match_operand:QHSIM 0 "nvptx_nonimmediate_operand" "=R,m")
358 (truncate:QHSIM (match_operand:DI 1 "nvptx_register_operand" "R,R")))]
359 ""
360 "@
361 %.\\tcvt%t0.u64\\t%0, %1;
362 %.\\tst%A0.u%T0\\t%0, %1;"
363 [(set_attr "subregs_ok" "true")])
364
365 ;; Integer arithmetic
366
367 (define_insn "add<mode>3"
368 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
369 (plus:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
370 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
371 ""
372 "%.\\tadd%t0\\t%0, %1, %2;")
373
374 (define_insn "sub<mode>3"
375 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
376 (minus:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
377 (match_operand:HSDIM 2 "nvptx_register_operand" "R")))]
378 ""
379 "%.\\tsub%t0\\t%0, %1, %2;")
380
381 (define_insn "mul<mode>3"
382 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
383 (mult:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
384 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
385 ""
386 "%.\\tmul.lo%t0\\t%0, %1, %2;")
387
388 (define_insn "*mad<mode>3"
389 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
390 (plus:HSDIM (mult:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
391 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri"))
392 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))]
393 ""
394 "%.\\tmad.lo%t0\\t%0, %1, %2, %3;")
395
396 (define_insn "div<mode>3"
397 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
398 (div:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
399 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
400 ""
401 "%.\\tdiv.s%T0\\t%0, %1, %2;")
402
403 (define_insn "udiv<mode>3"
404 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
405 (udiv:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
406 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
407 ""
408 "%.\\tdiv.u%T0\\t%0, %1, %2;")
409
410 (define_insn "mod<mode>3"
411 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
412 (mod:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "Ri")
413 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
414 ""
415 "%.\\trem.s%T0\\t%0, %1, %2;")
416
417 (define_insn "umod<mode>3"
418 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
419 (umod:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "Ri")
420 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
421 ""
422 "%.\\trem.u%T0\\t%0, %1, %2;")
423
424 (define_insn "smin<mode>3"
425 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
426 (smin:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
427 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
428 ""
429 "%.\\tmin.s%T0\\t%0, %1, %2;")
430
431 (define_insn "umin<mode>3"
432 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
433 (umin:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
434 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
435 ""
436 "%.\\tmin.u%T0\\t%0, %1, %2;")
437
438 (define_insn "smax<mode>3"
439 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
440 (smax:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
441 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
442 ""
443 "%.\\tmax.s%T0\\t%0, %1, %2;")
444
445 (define_insn "umax<mode>3"
446 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
447 (umax:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
448 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
449 ""
450 "%.\\tmax.u%T0\\t%0, %1, %2;")
451
452 (define_insn "abs<mode>2"
453 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
454 (abs:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")))]
455 ""
456 "%.\\tabs.s%T0\\t%0, %1;")
457
458 (define_insn "neg<mode>2"
459 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
460 (neg:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")))]
461 ""
462 "%.\\tneg.s%T0\\t%0, %1;")
463
464 (define_insn "one_cmpl<mode>2"
465 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
466 (not:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")))]
467 ""
468 "%.\\tnot.b%T0\\t%0, %1;")
469
470 (define_insn "bitrev<mode>2"
471 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
472 (unspec:SDIM [(match_operand:SDIM 1 "nvptx_register_operand" "R")]
473 UNSPEC_BITREV))]
474 ""
475 "%.\\tbrev.b%T0\\t%0, %1;")
476
477 (define_insn "clz<mode>2"
478 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
479 (clz:SI (match_operand:SDIM 1 "nvptx_register_operand" "R")))]
480 ""
481 "%.\\tclz.b%T1\\t%0, %1;")
482
483 (define_expand "ctz<mode>2"
484 [(set (match_operand:SI 0 "nvptx_register_operand" "")
485 (ctz:SI (match_operand:SDIM 1 "nvptx_register_operand" "")))]
486 ""
487 {
488 rtx tmpreg = gen_reg_rtx (<MODE>mode);
489 emit_insn (gen_bitrev<mode>2 (tmpreg, operands[1]));
490 emit_insn (gen_clz<mode>2 (operands[0], tmpreg));
491 DONE;
492 })
493
494 ;; Shifts
495
496 (define_insn "ashl<mode>3"
497 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
498 (ashift:SDIM (match_operand:SDIM 1 "nvptx_register_operand" "R")
499 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")))]
500 ""
501 "%.\\tshl.b%T0\\t%0, %1, %2;")
502
503 (define_insn "ashr<mode>3"
504 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
505 (ashiftrt:SDIM (match_operand:SDIM 1 "nvptx_register_operand" "R")
506 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")))]
507 ""
508 "%.\\tshr.s%T0\\t%0, %1, %2;")
509
510 (define_insn "lshr<mode>3"
511 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
512 (lshiftrt:SDIM (match_operand:SDIM 1 "nvptx_register_operand" "R")
513 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")))]
514 ""
515 "%.\\tshr.u%T0\\t%0, %1, %2;")
516
517 ;; Logical operations
518
519 (define_insn "and<mode>3"
520 [(set (match_operand:BHSDIM 0 "nvptx_register_operand" "=R")
521 (and:BHSDIM (match_operand:BHSDIM 1 "nvptx_register_operand" "R")
522 (match_operand:BHSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
523 ""
524 "%.\\tand.b%T0\\t%0, %1, %2;")
525
526 (define_insn "ior<mode>3"
527 [(set (match_operand:BHSDIM 0 "nvptx_register_operand" "=R")
528 (ior:BHSDIM (match_operand:BHSDIM 1 "nvptx_register_operand" "R")
529 (match_operand:BHSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
530 ""
531 "%.\\tor.b%T0\\t%0, %1, %2;")
532
533 (define_insn "xor<mode>3"
534 [(set (match_operand:BHSDIM 0 "nvptx_register_operand" "=R")
535 (xor:BHSDIM (match_operand:BHSDIM 1 "nvptx_register_operand" "R")
536 (match_operand:BHSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
537 ""
538 "%.\\txor.b%T0\\t%0, %1, %2;")
539
540 ;; Comparisons and branches
541
542 (define_insn "*cmp<mode>"
543 [(set (match_operand:BI 0 "nvptx_register_operand" "=R")
544 (match_operator:BI 1 "nvptx_comparison_operator"
545 [(match_operand:HSDIM 2 "nvptx_register_operand" "R")
546 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))]
547 ""
548 "%.\\tsetp%c1\\t%0, %2, %3;")
549
550 (define_insn "*cmp<mode>"
551 [(set (match_operand:BI 0 "nvptx_register_operand" "=R")
552 (match_operator:BI 1 "nvptx_float_comparison_operator"
553 [(match_operand:SDFM 2 "nvptx_register_operand" "R")
554 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))]
555 ""
556 "%.\\tsetp%c1\\t%0, %2, %3;")
557
558 (define_insn "jump"
559 [(set (pc)
560 (label_ref (match_operand 0 "" "")))]
561 ""
562 "%.\\tbra\\t%l0;")
563
564 (define_insn "br_true"
565 [(set (pc)
566 (if_then_else (ne (match_operand:BI 0 "nvptx_register_operand" "R")
567 (const_int 0))
568 (label_ref (match_operand 1 "" ""))
569 (pc)))]
570 ""
571 "%j0\\tbra\\t%l1;"
572 [(set_attr "predicable" "false")])
573
574 (define_insn "br_false"
575 [(set (pc)
576 (if_then_else (eq (match_operand:BI 0 "nvptx_register_operand" "R")
577 (const_int 0))
578 (label_ref (match_operand 1 "" ""))
579 (pc)))]
580 ""
581 "%J0\\tbra\\t%l1;"
582 [(set_attr "predicable" "false")])
583
584 ;; unified conditional branch
585 (define_insn "br_true_uni"
586 [(set (pc) (if_then_else
587 (ne (unspec:BI [(match_operand:BI 0 "nvptx_register_operand" "R")]
588 UNSPEC_BR_UNIFIED) (const_int 0))
589 (label_ref (match_operand 1 "" "")) (pc)))]
590 ""
591 "%j0\\tbra.uni\\t%l1;"
592 [(set_attr "predicable" "false")])
593
594 (define_insn "br_false_uni"
595 [(set (pc) (if_then_else
596 (eq (unspec:BI [(match_operand:BI 0 "nvptx_register_operand" "R")]
597 UNSPEC_BR_UNIFIED) (const_int 0))
598 (label_ref (match_operand 1 "" "")) (pc)))]
599 ""
600 "%J0\\tbra.uni\\t%l1;"
601 [(set_attr "predicable" "false")])
602
603 (define_expand "cbranch<mode>4"
604 [(set (pc)
605 (if_then_else (match_operator 0 "nvptx_comparison_operator"
606 [(match_operand:HSDIM 1 "nvptx_register_operand" "")
607 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "")])
608 (label_ref (match_operand 3 "" ""))
609 (pc)))]
610 ""
611 {
612 rtx t = nvptx_expand_compare (operands[0]);
613 operands[0] = t;
614 operands[1] = XEXP (t, 0);
615 operands[2] = XEXP (t, 1);
616 })
617
618 (define_expand "cbranch<mode>4"
619 [(set (pc)
620 (if_then_else (match_operator 0 "nvptx_float_comparison_operator"
621 [(match_operand:SDFM 1 "nvptx_register_operand" "")
622 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "")])
623 (label_ref (match_operand 3 "" ""))
624 (pc)))]
625 ""
626 {
627 rtx t = nvptx_expand_compare (operands[0]);
628 operands[0] = t;
629 operands[1] = XEXP (t, 0);
630 operands[2] = XEXP (t, 1);
631 })
632
633 (define_expand "cbranchbi4"
634 [(set (pc)
635 (if_then_else (match_operator 0 "predicate_operator"
636 [(match_operand:BI 1 "nvptx_register_operand" "")
637 (match_operand:BI 2 "const0_operand" "")])
638 (label_ref (match_operand 3 "" ""))
639 (pc)))]
640 ""
641 "")
642
643 ;; Conditional stores
644
645 (define_insn "setcc_from_bi"
646 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
647 (ne:SI (match_operand:BI 1 "nvptx_register_operand" "R")
648 (const_int 0)))]
649 ""
650 "%.\\tselp%t0 %0,-1,0,%1;")
651
652 (define_insn "sel_true<mode>"
653 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
654 (if_then_else:HSDIM
655 (ne (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0))
656 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")
657 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))]
658 ""
659 "%.\\tselp%t0\\t%0, %2, %3, %1;")
660
661 (define_insn "sel_true<mode>"
662 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
663 (if_then_else:SDFM
664 (ne (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0))
665 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")
666 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))]
667 ""
668 "%.\\tselp%t0\\t%0, %2, %3, %1;")
669
670 (define_insn "sel_false<mode>"
671 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
672 (if_then_else:HSDIM
673 (eq (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0))
674 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")
675 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))]
676 ""
677 "%.\\tselp%t0\\t%0, %3, %2, %1;")
678
679 (define_insn "sel_false<mode>"
680 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
681 (if_then_else:SDFM
682 (eq (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0))
683 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")
684 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))]
685 ""
686 "%.\\tselp%t0\\t%0, %3, %2, %1;")
687
688 (define_insn "setcc_int<mode>"
689 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
690 (match_operator:SI 1 "nvptx_comparison_operator"
691 [(match_operand:HSDIM 2 "nvptx_register_operand" "R")
692 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))]
693 ""
694 "%.\\tset%t0%c1\\t%0, %2, %3;")
695
696 (define_insn "setcc_int<mode>"
697 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
698 (match_operator:SI 1 "nvptx_float_comparison_operator"
699 [(match_operand:SDFM 2 "nvptx_register_operand" "R")
700 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))]
701 ""
702 "%.\\tset%t0%c1\\t%0, %2, %3;")
703
704 (define_insn "setcc_float<mode>"
705 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
706 (match_operator:SF 1 "nvptx_comparison_operator"
707 [(match_operand:HSDIM 2 "nvptx_register_operand" "R")
708 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))]
709 ""
710 "%.\\tset%t0%c1\\t%0, %2, %3;")
711
712 (define_insn "setcc_float<mode>"
713 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
714 (match_operator:SF 1 "nvptx_float_comparison_operator"
715 [(match_operand:SDFM 2 "nvptx_register_operand" "R")
716 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))]
717 ""
718 "%.\\tset%t0%c1\\t%0, %2, %3;")
719
720 (define_expand "cstorebi4"
721 [(set (match_operand:SI 0 "nvptx_register_operand")
722 (match_operator:SI 1 "ne_operator"
723 [(match_operand:BI 2 "nvptx_register_operand")
724 (match_operand:BI 3 "const0_operand")]))]
725 ""
726 "")
727
728 (define_expand "cstore<mode>4"
729 [(set (match_operand:SI 0 "nvptx_register_operand")
730 (match_operator:SI 1 "nvptx_comparison_operator"
731 [(match_operand:HSDIM 2 "nvptx_register_operand")
732 (match_operand:HSDIM 3 "nvptx_nonmemory_operand")]))]
733 ""
734 "")
735
736 (define_expand "cstore<mode>4"
737 [(set (match_operand:SI 0 "nvptx_register_operand")
738 (match_operator:SI 1 "nvptx_float_comparison_operator"
739 [(match_operand:SDFM 2 "nvptx_register_operand")
740 (match_operand:SDFM 3 "nvptx_nonmemory_operand")]))]
741 ""
742 "")
743
744 ;; Calls
745
746 (define_insn "call_insn"
747 [(match_parallel 2 "call_operation"
748 [(call (mem:QI (match_operand 0 "call_insn_operand" "Rs"))
749 (match_operand 1))])]
750 ""
751 {
752 return nvptx_output_call_insn (insn, NULL_RTX, operands[0]);
753 })
754
755 (define_insn "call_value_insn"
756 [(match_parallel 3 "call_operation"
757 [(set (match_operand 0 "nvptx_register_operand" "=R")
758 (call (mem:QI (match_operand 1 "call_insn_operand" "Rs"))
759 (match_operand 2)))])]
760 ""
761 {
762 return nvptx_output_call_insn (insn, operands[0], operands[1]);
763 })
764
765 (define_expand "call"
766 [(match_operand 0 "" "")]
767 ""
768 {
769 nvptx_expand_call (NULL_RTX, operands[0]);
770 DONE;
771 })
772
773 (define_expand "call_value"
774 [(match_operand 0 "" "")
775 (match_operand 1 "" "")]
776 ""
777 {
778 nvptx_expand_call (operands[0], operands[1]);
779 DONE;
780 })
781
782 ;; Floating point arithmetic.
783
784 (define_insn "add<mode>3"
785 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
786 (plus:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
787 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
788 ""
789 "%.\\tadd%t0\\t%0, %1, %2;")
790
791 (define_insn "sub<mode>3"
792 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
793 (minus:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
794 (match_operand:SDFM 2 "nvptx_register_operand" "R")))]
795 ""
796 "%.\\tsub%t0\\t%0, %1, %2;")
797
798 (define_insn "mul<mode>3"
799 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
800 (mult:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
801 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
802 ""
803 "%.\\tmul%t0\\t%0, %1, %2;")
804
805 (define_insn "fma<mode>4"
806 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
807 (fma:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
808 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")
809 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))]
810 ""
811 "%.\\tfma%#%t0\\t%0, %1, %2, %3;")
812
813 (define_insn "div<mode>3"
814 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
815 (div:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
816 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
817 ""
818 "%.\\tdiv%#%t0\\t%0, %1, %2;")
819
820 (define_insn "copysign<mode>3"
821 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
822 (unspec:SDFM [(match_operand:SDFM 1 "nvptx_register_operand" "R")
823 (match_operand:SDFM 2 "nvptx_register_operand" "R")]
824 UNSPEC_COPYSIGN))]
825 ""
826 "%.\\tcopysign%t0\\t%0, %2, %1;")
827
828 (define_insn "smin<mode>3"
829 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
830 (smin:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
831 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
832 ""
833 "%.\\tmin%t0\\t%0, %1, %2;")
834
835 (define_insn "smax<mode>3"
836 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
837 (smax:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
838 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
839 ""
840 "%.\\tmax%t0\\t%0, %1, %2;")
841
842 (define_insn "abs<mode>2"
843 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
844 (abs:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
845 ""
846 "%.\\tabs%t0\\t%0, %1;")
847
848 (define_insn "neg<mode>2"
849 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
850 (neg:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
851 ""
852 "%.\\tneg%t0\\t%0, %1;")
853
854 (define_insn "sqrt<mode>2"
855 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
856 (sqrt:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
857 ""
858 "%.\\tsqrt%#%t0\\t%0, %1;")
859
860 (define_expand "sincossf3"
861 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
862 (unspec:SF [(match_operand:SF 2 "nvptx_register_operand" "R")]
863 UNSPEC_COS))
864 (set (match_operand:SF 1 "nvptx_register_operand" "=R")
865 (unspec:SF [(match_dup 2)] UNSPEC_SIN))]
866 "flag_unsafe_math_optimizations"
867 {
868 operands[2] = make_safe_from (operands[2], operands[0]);
869 })
870
871 (define_insn "sinsf2"
872 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
873 (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")]
874 UNSPEC_SIN))]
875 "flag_unsafe_math_optimizations"
876 "%.\\tsin.approx%t0\\t%0, %1;")
877
878 (define_insn "cossf2"
879 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
880 (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")]
881 UNSPEC_COS))]
882 "flag_unsafe_math_optimizations"
883 "%.\\tcos.approx%t0\\t%0, %1;")
884
885 (define_insn "log2sf2"
886 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
887 (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")]
888 UNSPEC_LOG2))]
889 "flag_unsafe_math_optimizations"
890 "%.\\tlg2.approx%t0\\t%0, %1;")
891
892 (define_insn "exp2sf2"
893 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
894 (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")]
895 UNSPEC_EXP2))]
896 "flag_unsafe_math_optimizations"
897 "%.\\tex2.approx%t0\\t%0, %1;")
898
899 ;; Conversions involving floating point
900
901 (define_insn "extendsfdf2"
902 [(set (match_operand:DF 0 "nvptx_register_operand" "=R")
903 (float_extend:DF (match_operand:SF 1 "nvptx_register_operand" "R")))]
904 ""
905 "%.\\tcvt%t0%t1\\t%0, %1;")
906
907 (define_insn "truncdfsf2"
908 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
909 (float_truncate:SF (match_operand:DF 1 "nvptx_register_operand" "R")))]
910 ""
911 "%.\\tcvt%#%t0%t1\\t%0, %1;")
912
913 (define_insn "floatunssi<mode>2"
914 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
915 (unsigned_float:SDFM (match_operand:SI 1 "nvptx_register_operand" "R")))]
916 ""
917 "%.\\tcvt%#%t0.u%T1\\t%0, %1;")
918
919 (define_insn "floatsi<mode>2"
920 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
921 (float:SDFM (match_operand:SI 1 "nvptx_register_operand" "R")))]
922 ""
923 "%.\\tcvt%#%t0.s%T1\\t%0, %1;")
924
925 (define_insn "floatunsdi<mode>2"
926 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
927 (unsigned_float:SDFM (match_operand:DI 1 "nvptx_register_operand" "R")))]
928 ""
929 "%.\\tcvt%#%t0.u%T1\\t%0, %1;")
930
931 (define_insn "floatdi<mode>2"
932 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
933 (float:SDFM (match_operand:DI 1 "nvptx_register_operand" "R")))]
934 ""
935 "%.\\tcvt%#%t0.s%T1\\t%0, %1;")
936
937 (define_insn "fixuns_trunc<mode>si2"
938 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
939 (unsigned_fix:SI (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
940 ""
941 "%.\\tcvt.rzi.u%T0%t1\\t%0, %1;")
942
943 (define_insn "fix_trunc<mode>si2"
944 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
945 (fix:SI (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
946 ""
947 "%.\\tcvt.rzi.s%T0%t1\\t%0, %1;")
948
949 (define_insn "fixuns_trunc<mode>di2"
950 [(set (match_operand:DI 0 "nvptx_register_operand" "=R")
951 (unsigned_fix:DI (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
952 ""
953 "%.\\tcvt.rzi.u%T0%t1\\t%0, %1;")
954
955 (define_insn "fix_trunc<mode>di2"
956 [(set (match_operand:DI 0 "nvptx_register_operand" "=R")
957 (fix:DI (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
958 ""
959 "%.\\tcvt.rzi.s%T0%t1\\t%0, %1;")
960
961 (define_int_iterator FPINT [UNSPEC_FPINT_FLOOR UNSPEC_FPINT_BTRUNC
962 UNSPEC_FPINT_CEIL UNSPEC_FPINT_NEARBYINT])
963 (define_int_attr fpint_name [(UNSPEC_FPINT_FLOOR "floor")
964 (UNSPEC_FPINT_BTRUNC "btrunc")
965 (UNSPEC_FPINT_CEIL "ceil")
966 (UNSPEC_FPINT_NEARBYINT "nearbyint")])
967 (define_int_attr fpint_roundingmode [(UNSPEC_FPINT_FLOOR ".rmi")
968 (UNSPEC_FPINT_BTRUNC ".rzi")
969 (UNSPEC_FPINT_CEIL ".rpi")
970 (UNSPEC_FPINT_NEARBYINT "%#i")])
971
972 (define_insn "<FPINT:fpint_name><SDFM:mode>2"
973 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
974 (unspec:SDFM [(match_operand:SDFM 1 "nvptx_register_operand" "R")]
975 FPINT))]
976 ""
977 "%.\\tcvt<FPINT:fpint_roundingmode>%t0%t1\\t%0, %1;")
978
979 (define_int_iterator FPINT2 [UNSPEC_FPINT_FLOOR UNSPEC_FPINT_CEIL])
980 (define_int_attr fpint2_name [(UNSPEC_FPINT_FLOOR "lfloor")
981 (UNSPEC_FPINT_CEIL "lceil")])
982 (define_int_attr fpint2_roundingmode [(UNSPEC_FPINT_FLOOR ".rmi")
983 (UNSPEC_FPINT_CEIL ".rpi")])
984
985 (define_insn "<FPINT2:fpint2_name><SDFM:mode><SDIM:mode>2"
986 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
987 (unspec:SDIM [(match_operand:SDFM 1 "nvptx_register_operand" "R")]
988 FPINT2))]
989 ""
990 "%.\\tcvt<FPINT2:fpint2_roundingmode>.s%T0%t1\\t%0, %1;")
991
992 ;; Miscellaneous
993
994 (define_insn "nop"
995 [(const_int 0)]
996 ""
997 "")
998
999 (define_insn "exit"
1000 [(const_int 1)]
1001 ""
1002 "exit;")
1003
1004 (define_insn "fake_nop"
1005 [(const_int 2)]
1006 ""
1007 "{
1008 .reg .u32 %%nop_src;
1009 .reg .u32 %%nop_dst;
1010 mov.u32 %%nop_dst, %%nop_src;
1011 }")
1012
1013 (define_insn "return"
1014 [(return)]
1015 ""
1016 {
1017 return nvptx_output_return ();
1018 }
1019 [(set_attr "predicable" "false")])
1020
1021 (define_expand "epilogue"
1022 [(clobber (const_int 0))]
1023 ""
1024 {
1025 if (TARGET_SOFT_STACK)
1026 emit_insn (gen_set_softstack_insn (gen_rtx_REG (Pmode,
1027 SOFTSTACK_PREV_REGNUM)));
1028 emit_jump_insn (gen_return ());
1029 DONE;
1030 })
1031
1032 (define_expand "nonlocal_goto"
1033 [(match_operand 0 "" "")
1034 (match_operand 1 "" "")
1035 (match_operand 2 "" "")
1036 (match_operand 3 "" "")]
1037 ""
1038 {
1039 sorry ("target cannot support nonlocal goto.");
1040 emit_insn (gen_nop ());
1041 DONE;
1042 })
1043
1044 (define_expand "nonlocal_goto_receiver"
1045 [(const_int 0)]
1046 ""
1047 {
1048 sorry ("target cannot support nonlocal goto.");
1049 })
1050
1051 (define_expand "allocate_stack"
1052 [(match_operand 0 "nvptx_register_operand")
1053 (match_operand 1 "nvptx_register_operand")]
1054 ""
1055 {
1056 if (TARGET_SOFT_STACK)
1057 {
1058 emit_move_insn (stack_pointer_rtx,
1059 gen_rtx_MINUS (Pmode, stack_pointer_rtx, operands[1]));
1060 emit_insn (gen_set_softstack_insn (stack_pointer_rtx));
1061 emit_move_insn (operands[0], virtual_stack_dynamic_rtx);
1062 DONE;
1063 }
1064 /* The ptx documentation specifies an alloca intrinsic (for 32 bit
1065 only) but notes it is not implemented. The assembler emits a
1066 confused error message. Issue a blunt one now instead. */
1067 sorry ("target cannot support alloca.");
1068 emit_insn (gen_nop ());
1069 DONE;
1070 })
1071
1072 (define_insn "set_softstack_insn"
1073 [(unspec [(match_operand 0 "nvptx_register_operand" "R")]
1074 UNSPEC_SET_SOFTSTACK)]
1075 "TARGET_SOFT_STACK"
1076 {
1077 return nvptx_output_set_softstack (REGNO (operands[0]));
1078 })
1079
1080 (define_expand "restore_stack_block"
1081 [(match_operand 0 "register_operand" "")
1082 (match_operand 1 "register_operand" "")]
1083 ""
1084 {
1085 if (TARGET_SOFT_STACK)
1086 {
1087 emit_move_insn (operands[0], operands[1]);
1088 emit_insn (gen_set_softstack_insn (operands[0]));
1089 }
1090 DONE;
1091 })
1092
1093 (define_expand "restore_stack_function"
1094 [(match_operand 0 "register_operand" "")
1095 (match_operand 1 "register_operand" "")]
1096 ""
1097 {
1098 DONE;
1099 })
1100
1101 (define_insn "trap"
1102 [(trap_if (const_int 1) (const_int 0))]
1103 ""
1104 "trap; exit;")
1105
1106 (define_insn "trap_if_true"
1107 [(trap_if (ne (match_operand:BI 0 "nvptx_register_operand" "R")
1108 (const_int 0))
1109 (const_int 0))]
1110 ""
1111 "%j0 trap; %j0 exit;"
1112 [(set_attr "predicable" "false")])
1113
1114 (define_insn "trap_if_false"
1115 [(trap_if (eq (match_operand:BI 0 "nvptx_register_operand" "R")
1116 (const_int 0))
1117 (const_int 0))]
1118 ""
1119 "%J0 trap; %J0 exit;"
1120 [(set_attr "predicable" "false")])
1121
1122 (define_expand "ctrap<mode>4"
1123 [(trap_if (match_operator 0 "nvptx_comparison_operator"
1124 [(match_operand:SDIM 1 "nvptx_register_operand")
1125 (match_operand:SDIM 2 "nvptx_nonmemory_operand")])
1126 (match_operand 3 "const0_operand"))]
1127 ""
1128 {
1129 rtx t = nvptx_expand_compare (operands[0]);
1130 emit_insn (gen_trap_if_true (t));
1131 DONE;
1132 })
1133
1134 (define_insn "oacc_dim_size"
1135 [(set (match_operand:SI 0 "nvptx_register_operand" "")
1136 (unspec:SI [(match_operand:SI 1 "const_int_operand" "")]
1137 UNSPEC_DIM_SIZE))]
1138 ""
1139 {
1140 static const char *const asms[] =
1141 { /* Must match oacc_loop_levels ordering. */
1142 "%.\\tmov.u32\\t%0, %%nctaid.x;", /* gang */
1143 "%.\\tmov.u32\\t%0, %%ntid.y;", /* worker */
1144 "%.\\tmov.u32\\t%0, %%ntid.x;", /* vector */
1145 };
1146 return asms[INTVAL (operands[1])];
1147 })
1148
1149 (define_insn "oacc_dim_pos"
1150 [(set (match_operand:SI 0 "nvptx_register_operand" "")
1151 (unspec_volatile:SI [(match_operand:SI 1 "const_int_operand" "")]
1152 UNSPECV_DIM_POS))]
1153 ""
1154 {
1155 static const char *const asms[] =
1156 { /* Must match oacc_loop_levels ordering. */
1157 "%.\\tmov.u32\\t%0, %%ctaid.x;", /* gang */
1158 "%.\\tmov.u32\\t%0, %%tid.y;", /* worker */
1159 "%.\\tmov.u32\\t%0, %%tid.x;", /* vector */
1160 };
1161 return asms[INTVAL (operands[1])];
1162 })
1163
1164 (define_insn "nvptx_fork"
1165 [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
1166 UNSPECV_FORK)]
1167 ""
1168 "// fork %0;"
1169 [(set_attr "predicable" "false")])
1170
1171 (define_insn "nvptx_forked"
1172 [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
1173 UNSPECV_FORKED)]
1174 ""
1175 "// forked %0;"
1176 [(set_attr "predicable" "false")])
1177
1178 (define_insn "nvptx_joining"
1179 [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
1180 UNSPECV_JOINING)]
1181 ""
1182 "// joining %0;"
1183 [(set_attr "predicable" "false")])
1184
1185 (define_insn "nvptx_join"
1186 [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
1187 UNSPECV_JOIN)]
1188 ""
1189 "// join %0;"
1190 [(set_attr "predicable" "false")])
1191
1192 (define_expand "oacc_fork"
1193 [(set (match_operand:SI 0 "nvptx_nonmemory_operand" "")
1194 (match_operand:SI 1 "general_operand" ""))
1195 (unspec_volatile:SI [(match_operand:SI 2 "const_int_operand" "")]
1196 UNSPECV_FORKED)]
1197 ""
1198 {
1199 if (operands[0] != const0_rtx)
1200 emit_move_insn (operands[0], operands[1]);
1201 nvptx_expand_oacc_fork (INTVAL (operands[2]));
1202 DONE;
1203 })
1204
1205 (define_expand "oacc_join"
1206 [(set (match_operand:SI 0 "nvptx_nonmemory_operand" "")
1207 (match_operand:SI 1 "general_operand" ""))
1208 (unspec_volatile:SI [(match_operand:SI 2 "const_int_operand" "")]
1209 UNSPECV_JOIN)]
1210 ""
1211 {
1212 if (operands[0] != const0_rtx)
1213 emit_move_insn (operands[0], operands[1]);
1214 nvptx_expand_oacc_join (INTVAL (operands[2]));
1215 DONE;
1216 })
1217
1218 ;; only 32-bit shuffles exist.
1219 (define_insn "nvptx_shuffle<mode>"
1220 [(set (match_operand:BITS 0 "nvptx_register_operand" "=R")
1221 (unspec:BITS
1222 [(match_operand:BITS 1 "nvptx_register_operand" "R")
1223 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")
1224 (match_operand:SI 3 "const_int_operand" "n")]
1225 UNSPEC_SHUFFLE))]
1226 ""
1227 "%.\\tshfl%S3.b32\\t%0, %1, %2, 31;")
1228
1229 (define_insn "nvptx_vote_ballot"
1230 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
1231 (unspec:SI [(match_operand:BI 1 "nvptx_register_operand" "R")]
1232 UNSPEC_VOTE_BALLOT))]
1233 ""
1234 "%.\\tvote.ballot.b32\\t%0, %1;")
1235
1236 ;; Patterns for OpenMP SIMD-via-SIMT lowering
1237
1238 (define_insn "omp_simt_enter_insn"
1239 [(set (match_operand 0 "nvptx_register_operand" "=R")
1240 (unspec_volatile [(match_operand 1 "nvptx_nonmemory_operand" "Ri")
1241 (match_operand 2 "nvptx_nonmemory_operand" "Ri")]
1242 UNSPECV_SIMT_ENTER))]
1243 ""
1244 {
1245 return nvptx_output_simt_enter (operands[0], operands[1], operands[2]);
1246 })
1247
1248 (define_expand "omp_simt_enter"
1249 [(match_operand 0 "nvptx_register_operand" "=R")
1250 (match_operand 1 "nvptx_nonmemory_operand" "Ri")
1251 (match_operand 2 "const_int_operand" "n")]
1252 ""
1253 {
1254 if (!CONST_INT_P (operands[1]))
1255 cfun->machine->simt_stack_size = HOST_WIDE_INT_M1U;
1256 else
1257 cfun->machine->simt_stack_size = MAX (UINTVAL (operands[1]),
1258 cfun->machine->simt_stack_size);
1259 cfun->machine->simt_stack_align = MAX (UINTVAL (operands[2]),
1260 cfun->machine->simt_stack_align);
1261 cfun->machine->has_simtreg = true;
1262 emit_insn (gen_omp_simt_enter_insn (operands[0], operands[1], operands[2]));
1263 DONE;
1264 })
1265
1266 (define_insn "omp_simt_exit"
1267 [(unspec_volatile [(match_operand 0 "nvptx_register_operand" "R")]
1268 UNSPECV_SIMT_EXIT)]
1269 ""
1270 {
1271 return nvptx_output_simt_exit (operands[0]);
1272 })
1273
1274 ;; Implement IFN_GOMP_SIMT_LANE: set operand 0 to lane index
1275 (define_insn "omp_simt_lane"
1276 [(set (match_operand:SI 0 "nvptx_register_operand" "")
1277 (unspec:SI [(const_int 0)] UNSPEC_LANEID))]
1278 ""
1279 "%.\\tmov.u32\\t%0, %%laneid;")
1280
1281 ;; Implement IFN_GOMP_SIMT_ORDERED: copy operand 1 to operand 0 and
1282 ;; place a compiler barrier to disallow unrolling/peeling the containing loop
1283 (define_expand "omp_simt_ordered"
1284 [(match_operand:SI 0 "nvptx_register_operand" "=R")
1285 (match_operand:SI 1 "nvptx_register_operand" "R")]
1286 ""
1287 {
1288 emit_move_insn (operands[0], operands[1]);
1289 emit_insn (gen_nvptx_nounroll ());
1290 DONE;
1291 })
1292
1293 ;; Implement IFN_GOMP_SIMT_XCHG_BFLY: perform a "butterfly" exchange
1294 ;; across lanes
1295 (define_expand "omp_simt_xchg_bfly"
1296 [(match_operand 0 "nvptx_register_operand" "=R")
1297 (match_operand 1 "nvptx_register_operand" "R")
1298 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")]
1299 ""
1300 {
1301 emit_insn (nvptx_gen_shuffle (operands[0], operands[1], operands[2],
1302 SHUFFLE_BFLY));
1303 DONE;
1304 })
1305
1306 ;; Implement IFN_GOMP_SIMT_XCHG_IDX: broadcast value in operand 1
1307 ;; from lane given by index in operand 2 to operand 0 in all lanes
1308 (define_expand "omp_simt_xchg_idx"
1309 [(match_operand 0 "nvptx_register_operand" "=R")
1310 (match_operand 1 "nvptx_register_operand" "R")
1311 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")]
1312 ""
1313 {
1314 emit_insn (nvptx_gen_shuffle (operands[0], operands[1], operands[2],
1315 SHUFFLE_IDX));
1316 DONE;
1317 })
1318
1319 ;; Implement IFN_GOMP_SIMT_VOTE_ANY:
1320 ;; set operand 0 to zero iff all lanes supply zero in operand 1
1321 (define_expand "omp_simt_vote_any"
1322 [(match_operand:SI 0 "nvptx_register_operand" "=R")
1323 (match_operand:SI 1 "nvptx_register_operand" "R")]
1324 ""
1325 {
1326 rtx pred = gen_reg_rtx (BImode);
1327 emit_move_insn (pred, gen_rtx_NE (BImode, operands[1], const0_rtx));
1328 emit_insn (gen_nvptx_vote_ballot (operands[0], pred));
1329 DONE;
1330 })
1331
1332 ;; Implement IFN_GOMP_SIMT_LAST_LANE:
1333 ;; set operand 0 to the lowest lane index that passed non-zero in operand 1
1334 (define_expand "omp_simt_last_lane"
1335 [(match_operand:SI 0 "nvptx_register_operand" "=R")
1336 (match_operand:SI 1 "nvptx_register_operand" "R")]
1337 ""
1338 {
1339 rtx pred = gen_reg_rtx (BImode);
1340 rtx tmp = gen_reg_rtx (SImode);
1341 emit_move_insn (pred, gen_rtx_NE (BImode, operands[1], const0_rtx));
1342 emit_insn (gen_nvptx_vote_ballot (tmp, pred));
1343 emit_insn (gen_ctzsi2 (operands[0], tmp));
1344 DONE;
1345 })
1346
1347 ;; extract parts of a 64 bit object into 2 32-bit ints
1348 (define_insn "unpack<mode>si2"
1349 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
1350 (unspec:SI [(match_operand:BITD 2 "nvptx_register_operand" "R")
1351 (const_int 0)] UNSPEC_BIT_CONV))
1352 (set (match_operand:SI 1 "nvptx_register_operand" "=R")
1353 (unspec:SI [(match_dup 2) (const_int 1)] UNSPEC_BIT_CONV))]
1354 ""
1355 "%.\\tmov.b64\\t{%0,%1}, %2;")
1356
1357 ;; pack 2 32-bit ints into a 64 bit object
1358 (define_insn "packsi<mode>2"
1359 [(set (match_operand:BITD 0 "nvptx_register_operand" "=R")
1360 (unspec:BITD [(match_operand:SI 1 "nvptx_register_operand" "R")
1361 (match_operand:SI 2 "nvptx_register_operand" "R")]
1362 UNSPEC_BIT_CONV))]
1363 ""
1364 "%.\\tmov.b64\\t%0, {%1,%2};")
1365
1366 ;; Atomic insns.
1367
1368 (define_expand "atomic_compare_and_swap<mode>"
1369 [(match_operand:SI 0 "nvptx_register_operand") ;; bool success output
1370 (match_operand:SDIM 1 "nvptx_register_operand") ;; oldval output
1371 (match_operand:SDIM 2 "memory_operand") ;; memory
1372 (match_operand:SDIM 3 "nvptx_register_operand") ;; expected input
1373 (match_operand:SDIM 4 "nvptx_register_operand") ;; newval input
1374 (match_operand:SI 5 "const_int_operand") ;; is_weak
1375 (match_operand:SI 6 "const_int_operand") ;; success model
1376 (match_operand:SI 7 "const_int_operand")] ;; failure model
1377 ""
1378 {
1379 emit_insn (gen_atomic_compare_and_swap<mode>_1
1380 (operands[1], operands[2], operands[3], operands[4], operands[6]));
1381
1382 rtx cond = gen_reg_rtx (BImode);
1383 emit_move_insn (cond, gen_rtx_EQ (BImode, operands[1], operands[3]));
1384 emit_insn (gen_sel_truesi (operands[0], cond, GEN_INT (1), GEN_INT (0)));
1385 DONE;
1386 })
1387
1388 (define_insn "atomic_compare_and_swap<mode>_1"
1389 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
1390 (unspec_volatile:SDIM
1391 [(match_operand:SDIM 1 "memory_operand" "+m")
1392 (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri")
1393 (match_operand:SDIM 3 "nvptx_nonmemory_operand" "Ri")
1394 (match_operand:SI 4 "const_int_operand")]
1395 UNSPECV_CAS))
1396 (set (match_dup 1)
1397 (unspec_volatile:SDIM [(const_int 0)] UNSPECV_CAS))]
1398 ""
1399 "%.\\tatom%A1.cas.b%T0\\t%0, %1, %2, %3;"
1400 [(set_attr "atomic" "true")])
1401
1402 (define_insn "atomic_exchange<mode>"
1403 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") ;; output
1404 (unspec_volatile:SDIM
1405 [(match_operand:SDIM 1 "memory_operand" "+m") ;; memory
1406 (match_operand:SI 3 "const_int_operand")] ;; model
1407 UNSPECV_XCHG))
1408 (set (match_dup 1)
1409 (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri"))] ;; input
1410 ""
1411 "%.\\tatom%A1.exch.b%T0\\t%0, %1, %2;"
1412 [(set_attr "atomic" "true")])
1413
1414 (define_insn "atomic_fetch_add<mode>"
1415 [(set (match_operand:SDIM 1 "memory_operand" "+m")
1416 (unspec_volatile:SDIM
1417 [(plus:SDIM (match_dup 1)
1418 (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri"))
1419 (match_operand:SI 3 "const_int_operand")] ;; model
1420 UNSPECV_LOCK))
1421 (set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
1422 (match_dup 1))]
1423 ""
1424 "%.\\tatom%A1.add%t0\\t%0, %1, %2;"
1425 [(set_attr "atomic" "true")])
1426
1427 (define_insn "atomic_fetch_addsf"
1428 [(set (match_operand:SF 1 "memory_operand" "+m")
1429 (unspec_volatile:SF
1430 [(plus:SF (match_dup 1)
1431 (match_operand:SF 2 "nvptx_nonmemory_operand" "RF"))
1432 (match_operand:SI 3 "const_int_operand")] ;; model
1433 UNSPECV_LOCK))
1434 (set (match_operand:SF 0 "nvptx_register_operand" "=R")
1435 (match_dup 1))]
1436 ""
1437 "%.\\tatom%A1.add%t0\\t%0, %1, %2;"
1438 [(set_attr "atomic" "true")])
1439
1440 (define_code_iterator any_logic [and ior xor])
1441 (define_code_attr logic [(and "and") (ior "or") (xor "xor")])
1442
1443 (define_insn "atomic_fetch_<logic><mode>"
1444 [(set (match_operand:SDIM 1 "memory_operand" "+m")
1445 (unspec_volatile:SDIM
1446 [(any_logic:SDIM (match_dup 1)
1447 (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri"))
1448 (match_operand:SI 3 "const_int_operand")] ;; model
1449 UNSPECV_LOCK))
1450 (set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
1451 (match_dup 1))]
1452 "<MODE>mode == SImode || TARGET_SM35"
1453 "%.\\tatom%A1.b%T0.<logic>\\t%0, %1, %2;"
1454 [(set_attr "atomic" "true")])
1455
1456 (define_insn "nvptx_barsync"
1457 [(unspec_volatile [(match_operand:SI 0 "nvptx_nonmemory_operand" "Ri")
1458 (match_operand:SI 1 "const_int_operand")]
1459 UNSPECV_BARSYNC)]
1460 ""
1461 {
1462 if (INTVAL (operands[1]) == 0)
1463 return "\\tbar.sync\\t%0;";
1464 else
1465 return "\\tbar.sync\\t%0, %1;";
1466 }
1467 [(set_attr "predicable" "false")])
1468
1469 (define_expand "memory_barrier"
1470 [(set (match_dup 0)
1471 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR))]
1472 ""
1473 {
1474 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
1475 MEM_VOLATILE_P (operands[0]) = 1;
1476 })
1477
1478 ;; Ptx defines the memory barriers membar.cta, membar.gl and membar.sys
1479 ;; (corresponding to cuda functions threadfence_block, threadfence and
1480 ;; threadfence_system). For the insn memory_barrier we use membar.sys. This
1481 ;; may be overconservative, but before using membar.gl instead we'll need to
1482 ;; explain in detail why it's safe to use. For now, use membar.sys.
1483 (define_insn "*memory_barrier"
1484 [(set (match_operand:BLK 0 "" "")
1485 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR))]
1486 ""
1487 "\\tmembar.sys;"
1488 [(set_attr "predicable" "false")])
1489
1490 (define_expand "nvptx_membar_cta"
1491 [(set (match_dup 0)
1492 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR_CTA))]
1493 ""
1494 {
1495 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
1496 MEM_VOLATILE_P (operands[0]) = 1;
1497 })
1498
1499 (define_insn "*nvptx_membar_cta"
1500 [(set (match_operand:BLK 0 "" "")
1501 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR_CTA))]
1502 ""
1503 "\\tmembar.cta;"
1504 [(set_attr "predicable" "false")])
1505
1506 (define_insn "nvptx_nounroll"
1507 [(unspec_volatile [(const_int 0)] UNSPECV_NOUNROLL)]
1508 ""
1509 "\\t.pragma \\\"nounroll\\\";"
1510 [(set_attr "predicable" "false")])