]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/nvptx/nvptx.md
4168190fa42d208f9930e1f3632e4f967c4d81d7
[thirdparty/gcc.git] / gcc / config / nvptx / nvptx.md
1 ;; Machine description for NVPTX.
2 ;; Copyright (C) 2014-2020 Free Software Foundation, Inc.
3 ;; Contributed by Bernd Schmidt <bernds@codesourcery.com>
4 ;;
5 ;; This file is part of GCC.
6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
10 ;; any later version.
11 ;;
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
16 ;;
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
20
21 (define_c_enum "unspec" [
22 UNSPEC_ARG_REG
23
24 UNSPEC_COPYSIGN
25 UNSPEC_LOG2
26 UNSPEC_EXP2
27 UNSPEC_SIN
28 UNSPEC_COS
29
30 UNSPEC_FPINT_FLOOR
31 UNSPEC_FPINT_BTRUNC
32 UNSPEC_FPINT_CEIL
33 UNSPEC_FPINT_NEARBYINT
34
35 UNSPEC_BITREV
36
37 UNSPEC_ALLOCA
38
39 UNSPEC_SET_SOFTSTACK
40
41 UNSPEC_DIM_SIZE
42
43 UNSPEC_BIT_CONV
44
45 UNSPEC_VOTE_BALLOT
46
47 UNSPEC_LANEID
48
49 UNSPEC_SHUFFLE
50 UNSPEC_BR_UNIFIED
51 ])
52
53 (define_c_enum "unspecv" [
54 UNSPECV_LOCK
55 UNSPECV_CAS
56 UNSPECV_XCHG
57 UNSPECV_BARSYNC
58 UNSPECV_MEMBAR
59 UNSPECV_MEMBAR_CTA
60 UNSPECV_DIM_POS
61
62 UNSPECV_FORK
63 UNSPECV_FORKED
64 UNSPECV_JOINING
65 UNSPECV_JOIN
66
67 UNSPECV_NOUNROLL
68
69 UNSPECV_SIMT_ENTER
70 UNSPECV_SIMT_EXIT
71
72 UNSPECV_RED_PART
73 ])
74
75 (define_attr "subregs_ok" "false,true"
76 (const_string "false"))
77
78 (define_attr "atomic" "false,true"
79 (const_string "false"))
80
81 ;; The nvptx operand predicates, in general, don't permit subregs and
82 ;; only literal constants, which differ from the generic ones, which
83 ;; permit subregs and symbolc constants (as appropriate)
84 (define_predicate "nvptx_register_operand"
85 (match_code "reg")
86 {
87 return register_operand (op, mode);
88 })
89
90 (define_predicate "nvptx_nonimmediate_operand"
91 (match_code "mem,reg")
92 {
93 return (REG_P (op) ? register_operand (op, mode)
94 : memory_operand (op, mode));
95 })
96
97 (define_predicate "nvptx_nonmemory_operand"
98 (match_code "reg,const_int,const_double")
99 {
100 return (REG_P (op) ? register_operand (op, mode)
101 : immediate_operand (op, mode));
102 })
103
104 (define_predicate "const0_operand"
105 (and (match_code "const_int")
106 (match_test "op == const0_rtx")))
107
108 ;; True if this operator is valid for predication.
109 (define_predicate "predicate_operator"
110 (match_code "eq,ne"))
111
112 (define_predicate "ne_operator"
113 (match_code "ne"))
114
115 (define_predicate "nvptx_comparison_operator"
116 (match_code "eq,ne,le,ge,lt,gt,leu,geu,ltu,gtu"))
117
118 (define_predicate "nvptx_float_comparison_operator"
119 (match_code "eq,ne,le,ge,lt,gt,uneq,unle,unge,unlt,ungt,unordered,ordered"))
120
121 (define_predicate "nvptx_vector_index_operand"
122 (and (match_code "const_int")
123 (match_test "UINTVAL (op) < 4")))
124
125 ;; Test for a valid operand for a call instruction.
126 (define_predicate "call_insn_operand"
127 (match_code "symbol_ref,reg")
128 {
129 return REG_P (op) || SYMBOL_REF_FUNCTION_P (op);
130 })
131
132 ;; Return true if OP is a call with parallel USEs of the argument
133 ;; pseudos.
134 (define_predicate "call_operation"
135 (match_code "parallel")
136 {
137 int arg_end = XVECLEN (op, 0);
138
139 for (int i = 1; i < arg_end; i++)
140 {
141 rtx elt = XVECEXP (op, 0, i);
142
143 if (GET_CODE (elt) != USE || !REG_P (XEXP (elt, 0)))
144 return false;
145 }
146 return true;
147 })
148
149 (define_attr "predicable" "false,true"
150 (const_string "true"))
151
152 (define_cond_exec
153 [(match_operator 0 "predicate_operator"
154 [(match_operand:BI 1 "nvptx_register_operand" "")
155 (match_operand:BI 2 "const0_operand" "")])]
156 ""
157 ""
158 )
159
160 (define_constraint "P0"
161 "An integer with the value 0."
162 (and (match_code "const_int")
163 (match_test "ival == 0")))
164
165 (define_constraint "P1"
166 "An integer with the value 1."
167 (and (match_code "const_int")
168 (match_test "ival == 1")))
169
170 (define_constraint "Pn"
171 "An integer with the value -1."
172 (and (match_code "const_int")
173 (match_test "ival == -1")))
174
175 (define_constraint "R"
176 "A pseudo register."
177 (match_code "reg"))
178
179 (define_constraint "Ia"
180 "Any integer constant."
181 (and (match_code "const_int") (match_test "true")))
182
183 (define_mode_iterator QHSDISDFM [QI HI SI DI SF DF])
184 (define_mode_iterator QHSDIM [QI HI SI DI])
185 (define_mode_iterator HSDIM [HI SI DI])
186 (define_mode_iterator BHSDIM [BI HI SI DI])
187 (define_mode_iterator SDIM [SI DI])
188 (define_mode_iterator SDISDFM [SI DI SF DF])
189 (define_mode_iterator QHIM [QI HI])
190 (define_mode_iterator QHSIM [QI HI SI])
191 (define_mode_iterator SDFM [SF DF])
192 (define_mode_iterator SDCM [SC DC])
193 (define_mode_iterator BITS [SI SF])
194 (define_mode_iterator BITD [DI DF])
195 (define_mode_iterator VECIM [V2SI V2DI])
196
197 ;; This mode iterator allows :P to be used for patterns that operate on
198 ;; pointer-sized quantities. Exactly one of the two alternatives will match.
199 (define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")])
200
201 ;; Define element mode for each vector mode.
202 (define_mode_attr VECELEM [(V2SI "SI") (V2DI "DI")])
203 (define_mode_attr Vecelem [(V2SI "si") (V2DI "di")])
204
205 ;; We should get away with not defining memory alternatives, since we don't
206 ;; get variables in this mode and pseudos are never spilled.
207 (define_insn "movbi"
208 [(set (match_operand:BI 0 "nvptx_register_operand" "=R,R,R")
209 (match_operand:BI 1 "nvptx_nonmemory_operand" "R,P0,Pn"))]
210 ""
211 "@
212 %.\\tmov%t0\\t%0, %1;
213 %.\\tsetp.eq.u32\\t%0, 1, 0;
214 %.\\tsetp.eq.u32\\t%0, 1, 1;")
215
216 (define_insn "*mov<mode>_insn"
217 [(set (match_operand:VECIM 0 "nonimmediate_operand" "=R,R,m")
218 (match_operand:VECIM 1 "general_operand" "Ri,m,R"))]
219 "!MEM_P (operands[0]) || REG_P (operands[1])"
220 {
221 if (which_alternative == 1)
222 return "%.\\tld%A1%u1\\t%0, %1;";
223 if (which_alternative == 2)
224 return "%.\\tst%A0%u0\\t%0, %1;";
225
226 return nvptx_output_mov_insn (operands[0], operands[1]);
227 }
228 [(set_attr "subregs_ok" "true")])
229
230 (define_insn "*mov<mode>_insn"
231 [(set (match_operand:QHSDIM 0 "nonimmediate_operand" "=R,R,m")
232 (match_operand:QHSDIM 1 "general_operand" "Ri,m,R"))]
233 "!MEM_P (operands[0]) || REG_P (operands[1])"
234 {
235 if (which_alternative == 1)
236 return "%.\\tld%A1%u1\\t%0, %1;";
237 if (which_alternative == 2)
238 return "%.\\tst%A0%u0\\t%0, %1;";
239
240 return nvptx_output_mov_insn (operands[0], operands[1]);
241 }
242 [(set_attr "subregs_ok" "true")])
243
244 (define_insn "*mov<mode>_insn"
245 [(set (match_operand:SDFM 0 "nonimmediate_operand" "=R,R,m")
246 (match_operand:SDFM 1 "general_operand" "RF,m,R"))]
247 "!MEM_P (operands[0]) || REG_P (operands[1])"
248 {
249 if (which_alternative == 1)
250 return "%.\\tld%A1%u0\\t%0, %1;";
251 if (which_alternative == 2)
252 return "%.\\tst%A0%u1\\t%0, %1;";
253
254 return nvptx_output_mov_insn (operands[0], operands[1]);
255 }
256 [(set_attr "subregs_ok" "true")])
257
258 (define_insn "load_arg_reg<mode>"
259 [(set (match_operand:QHIM 0 "nvptx_register_operand" "=R")
260 (unspec:QHIM [(match_operand 1 "const_int_operand" "n")]
261 UNSPEC_ARG_REG))]
262 ""
263 "%.\\tcvt%t0.u32\\t%0, %%ar%1;")
264
265 (define_insn "load_arg_reg<mode>"
266 [(set (match_operand:SDISDFM 0 "nvptx_register_operand" "=R")
267 (unspec:SDISDFM [(match_operand 1 "const_int_operand" "n")]
268 UNSPEC_ARG_REG))]
269 ""
270 "%.\\tmov%t0\\t%0, %%ar%1;")
271
272 (define_expand "mov<mode>"
273 [(set (match_operand:VECIM 0 "nonimmediate_operand" "")
274 (match_operand:VECIM 1 "general_operand" ""))]
275 ""
276 {
277 if (MEM_P (operands[0]) && !REG_P (operands[1]))
278 {
279 rtx tmp = gen_reg_rtx (<MODE>mode);
280 emit_move_insn (tmp, operands[1]);
281 emit_move_insn (operands[0], tmp);
282 DONE;
283 }
284 })
285
286 (define_expand "mov<mode>"
287 [(set (match_operand:QHSDISDFM 0 "nonimmediate_operand" "")
288 (match_operand:QHSDISDFM 1 "general_operand" ""))]
289 ""
290 {
291 if (MEM_P (operands[0]) && !REG_P (operands[1]))
292 {
293 rtx tmp = gen_reg_rtx (<MODE>mode);
294 emit_move_insn (tmp, operands[1]);
295 emit_move_insn (operands[0], tmp);
296 DONE;
297 }
298
299 if (GET_CODE (operands[1]) == LABEL_REF)
300 sorry ("target cannot support label values");
301 })
302
303 (define_insn "zero_extendqihi2"
304 [(set (match_operand:HI 0 "nvptx_register_operand" "=R,R")
305 (zero_extend:HI (match_operand:QI 1 "nvptx_nonimmediate_operand" "R,m")))]
306 ""
307 "@
308 %.\\tcvt.u16.u%T1\\t%0, %1;
309 %.\\tld%A1.u8\\t%0, %1;"
310 [(set_attr "subregs_ok" "true")])
311
312 (define_insn "zero_extend<mode>si2"
313 [(set (match_operand:SI 0 "nvptx_register_operand" "=R,R")
314 (zero_extend:SI (match_operand:QHIM 1 "nvptx_nonimmediate_operand" "R,m")))]
315 ""
316 "@
317 %.\\tcvt.u32.u%T1\\t%0, %1;
318 %.\\tld%A1.u%T1\\t%0, %1;"
319 [(set_attr "subregs_ok" "true")])
320
321 (define_insn "zero_extend<mode>di2"
322 [(set (match_operand:DI 0 "nvptx_register_operand" "=R,R")
323 (zero_extend:DI (match_operand:QHSIM 1 "nvptx_nonimmediate_operand" "R,m")))]
324 ""
325 "@
326 %.\\tcvt.u64.u%T1\\t%0, %1;
327 %.\\tld%A1%u1\\t%0, %1;"
328 [(set_attr "subregs_ok" "true")])
329
330 (define_insn "extendqihi2"
331 [(set (match_operand:HI 0 "nvptx_register_operand" "=R")
332 (sign_extend:HI (match_operand:QI 1 "nvptx_register_operand" "R")))]
333 ""
334 "%.\\tcvt.s16.s8\\t%0, %1;"
335 [(set_attr "subregs_ok" "true")])
336
337 (define_insn "extend<mode>si2"
338 [(set (match_operand:SI 0 "nvptx_register_operand" "=R,R")
339 (sign_extend:SI (match_operand:QHIM 1 "nvptx_nonimmediate_operand" "R,m")))]
340 ""
341 "@
342 %.\\tcvt.s32.s%T1\\t%0, %1;
343 %.\\tld%A1.s%T1\\t%0, %1;"
344 [(set_attr "subregs_ok" "true")])
345
346 (define_insn "extend<mode>di2"
347 [(set (match_operand:DI 0 "nvptx_register_operand" "=R,R")
348 (sign_extend:DI (match_operand:QHSIM 1 "nvptx_nonimmediate_operand" "R,m")))]
349 ""
350 "@
351 %.\\tcvt.s64.s%T1\\t%0, %1;
352 %.\\tld%A1.s%T1\\t%0, %1;"
353 [(set_attr "subregs_ok" "true")])
354
355 (define_insn "trunchiqi2"
356 [(set (match_operand:QI 0 "nvptx_nonimmediate_operand" "=R,m")
357 (truncate:QI (match_operand:HI 1 "nvptx_register_operand" "R,R")))]
358 ""
359 "@
360 %.\\tcvt%t0.u16\\t%0, %1;
361 %.\\tst%A0.u8\\t%0, %1;"
362 [(set_attr "subregs_ok" "true")])
363
364 (define_insn "truncsi<mode>2"
365 [(set (match_operand:QHIM 0 "nvptx_nonimmediate_operand" "=R,m")
366 (truncate:QHIM (match_operand:SI 1 "nvptx_register_operand" "R,R")))]
367 ""
368 "@
369 %.\\tcvt%t0.u32\\t%0, %1;
370 %.\\tst%A0.u%T0\\t%0, %1;"
371 [(set_attr "subregs_ok" "true")])
372
373 (define_insn "truncdi<mode>2"
374 [(set (match_operand:QHSIM 0 "nvptx_nonimmediate_operand" "=R,m")
375 (truncate:QHSIM (match_operand:DI 1 "nvptx_register_operand" "R,R")))]
376 ""
377 "@
378 %.\\tcvt%t0.u64\\t%0, %1;
379 %.\\tst%A0.u%T0\\t%0, %1;"
380 [(set_attr "subregs_ok" "true")])
381
382 ;; Integer arithmetic
383
384 (define_insn "add<mode>3"
385 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
386 (plus:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
387 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
388 ""
389 "%.\\tadd%t0\\t%0, %1, %2;")
390
391 (define_insn "*vadd_addsi4"
392 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
393 (plus:SI (plus:SI (match_operand:SI 1 "nvptx_register_operand" "R")
394 (match_operand:SI 2 "nvptx_register_operand" "R"))
395 (match_operand:SI 3 "nvptx_register_operand" "R")))]
396 ""
397 "%.\\tvadd%t0%t1%t2.add\\t%0, %1, %2, %3;")
398
399 (define_insn "*vsub_addsi4"
400 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
401 (plus:SI (minus:SI (match_operand:SI 1 "nvptx_register_operand" "R")
402 (match_operand:SI 2 "nvptx_register_operand" "R"))
403 (match_operand:SI 3 "nvptx_register_operand" "R")))]
404 ""
405 "%.\\tvsub%t0%t1%t2.add\\t%0, %1, %2, %3;")
406
407 (define_insn "sub<mode>3"
408 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
409 (minus:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
410 (match_operand:HSDIM 2 "nvptx_register_operand" "R")))]
411 ""
412 "%.\\tsub%t0\\t%0, %1, %2;")
413
414 (define_insn "mul<mode>3"
415 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
416 (mult:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
417 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
418 ""
419 "%.\\tmul.lo%t0\\t%0, %1, %2;")
420
421 (define_insn "*mad<mode>3"
422 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
423 (plus:HSDIM (mult:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
424 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri"))
425 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))]
426 ""
427 "%.\\tmad.lo%t0\\t%0, %1, %2, %3;")
428
429 (define_insn "div<mode>3"
430 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
431 (div:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
432 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
433 ""
434 "%.\\tdiv.s%T0\\t%0, %1, %2;")
435
436 (define_insn "udiv<mode>3"
437 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
438 (udiv:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
439 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
440 ""
441 "%.\\tdiv.u%T0\\t%0, %1, %2;")
442
443 (define_insn "mod<mode>3"
444 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
445 (mod:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "Ri")
446 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
447 ""
448 "%.\\trem.s%T0\\t%0, %1, %2;")
449
450 (define_insn "umod<mode>3"
451 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
452 (umod:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "Ri")
453 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
454 ""
455 "%.\\trem.u%T0\\t%0, %1, %2;")
456
457 (define_insn "smin<mode>3"
458 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
459 (smin:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
460 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
461 ""
462 "%.\\tmin.s%T0\\t%0, %1, %2;")
463
464 (define_insn "umin<mode>3"
465 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
466 (umin:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
467 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
468 ""
469 "%.\\tmin.u%T0\\t%0, %1, %2;")
470
471 (define_insn "smax<mode>3"
472 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
473 (smax:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
474 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
475 ""
476 "%.\\tmax.s%T0\\t%0, %1, %2;")
477
478 (define_insn "umax<mode>3"
479 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
480 (umax:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
481 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
482 ""
483 "%.\\tmax.u%T0\\t%0, %1, %2;")
484
485 (define_insn "abs<mode>2"
486 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
487 (abs:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")))]
488 ""
489 "%.\\tabs.s%T0\\t%0, %1;")
490
491 (define_insn "neg<mode>2"
492 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
493 (neg:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")))]
494 ""
495 "%.\\tneg.s%T0\\t%0, %1;")
496
497 (define_insn "one_cmpl<mode>2"
498 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
499 (not:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")))]
500 ""
501 "%.\\tnot.b%T0\\t%0, %1;")
502
503 (define_insn "bitrev<mode>2"
504 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
505 (unspec:SDIM [(match_operand:SDIM 1 "nvptx_register_operand" "R")]
506 UNSPEC_BITREV))]
507 ""
508 "%.\\tbrev.b%T0\\t%0, %1;")
509
510 (define_insn "clz<mode>2"
511 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
512 (clz:SI (match_operand:SDIM 1 "nvptx_register_operand" "R")))]
513 ""
514 "%.\\tclz.b%T1\\t%0, %1;")
515
516 (define_expand "ctz<mode>2"
517 [(set (match_operand:SI 0 "nvptx_register_operand" "")
518 (ctz:SI (match_operand:SDIM 1 "nvptx_register_operand" "")))]
519 ""
520 {
521 rtx tmpreg = gen_reg_rtx (<MODE>mode);
522 emit_insn (gen_bitrev<mode>2 (tmpreg, operands[1]));
523 emit_insn (gen_clz<mode>2 (operands[0], tmpreg));
524 DONE;
525 })
526
527 (define_insn "popcount<mode>2"
528 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
529 (popcount:SI (match_operand:SDIM 1 "nvptx_register_operand" "R")))]
530 ""
531 "%.\\tpopc.b%T1\\t%0, %1;")
532
533 ;; Multiplication variants
534
535 (define_insn "mulhisi3"
536 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
537 (mult:SI (sign_extend:SI
538 (match_operand:HI 1 "nvptx_register_operand" "R"))
539 (sign_extend:SI
540 (match_operand:HI 2 "nvptx_register_operand" "R"))))]
541 ""
542 "%.\\tmul.wide.s16\\t%0, %1, %2;")
543
544 (define_insn "mulsidi3"
545 [(set (match_operand:DI 0 "nvptx_register_operand" "=R")
546 (mult:DI (sign_extend:DI
547 (match_operand:SI 1 "nvptx_register_operand" "R"))
548 (sign_extend:DI
549 (match_operand:SI 2 "nvptx_register_operand" "R"))))]
550 ""
551 "%.\\tmul.wide.s32\\t%0, %1, %2;")
552
553 (define_insn "umulhisi3"
554 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
555 (mult:SI (zero_extend:SI
556 (match_operand:HI 1 "nvptx_register_operand" "R"))
557 (zero_extend:SI
558 (match_operand:HI 2 "nvptx_register_operand" "R"))))]
559 ""
560 "%.\\tmul.wide.u16\\t%0, %1, %2;")
561
562 (define_insn "umulsidi3"
563 [(set (match_operand:DI 0 "nvptx_register_operand" "=R")
564 (mult:DI (zero_extend:DI
565 (match_operand:SI 1 "nvptx_register_operand" "R"))
566 (zero_extend:DI
567 (match_operand:SI 2 "nvptx_register_operand" "R"))))]
568 ""
569 "%.\\tmul.wide.u32\\t%0, %1, %2;")
570
571 (define_insn "smulhi3_highpart"
572 [(set (match_operand:HI 0 "nvptx_register_operand" "=R")
573 (truncate:HI
574 (lshiftrt:SI
575 (mult:SI (sign_extend:SI
576 (match_operand:HI 1 "nvptx_register_operand" "R"))
577 (sign_extend:SI
578 (match_operand:HI 2 "nvptx_register_operand" "R")))
579 (const_int 16))))]
580 ""
581 "%.\\tmul.hi.s16\\t%0, %1, %2;")
582
583 (define_insn "smulsi3_highpart"
584 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
585 (truncate:SI
586 (lshiftrt:DI
587 (mult:DI (sign_extend:DI
588 (match_operand:SI 1 "nvptx_register_operand" "R"))
589 (sign_extend:DI
590 (match_operand:SI 2 "nvptx_register_operand" "R")))
591 (const_int 32))))]
592 ""
593 "%.\\tmul.hi.s32\\t%0, %1, %2;")
594
595 (define_insn "umulhi3_highpart"
596 [(set (match_operand:HI 0 "nvptx_register_operand" "=R")
597 (truncate:HI
598 (lshiftrt:SI
599 (mult:SI (zero_extend:SI
600 (match_operand:HI 1 "nvptx_register_operand" "R"))
601 (zero_extend:SI
602 (match_operand:HI 2 "nvptx_register_operand" "R")))
603 (const_int 16))))]
604 ""
605 "%.\\tmul.hi.u16\\t%0, %1, %2;")
606
607 (define_insn "umulsi3_highpart"
608 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
609 (truncate:SI
610 (lshiftrt:DI
611 (mult:DI (zero_extend:DI
612 (match_operand:SI 1 "nvptx_register_operand" "R"))
613 (zero_extend:DI
614 (match_operand:SI 2 "nvptx_register_operand" "R")))
615 (const_int 32))))]
616 ""
617 "%.\\tmul.hi.u32\\t%0, %1, %2;")
618
619 ;; Shifts
620
621 (define_insn "ashl<mode>3"
622 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
623 (ashift:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
624 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")))]
625 ""
626 "%.\\tshl.b%T0\\t%0, %1, %2;")
627
628 (define_insn "ashr<mode>3"
629 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
630 (ashiftrt:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
631 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")))]
632 ""
633 "%.\\tshr.s%T0\\t%0, %1, %2;")
634
635 (define_insn "lshr<mode>3"
636 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
637 (lshiftrt:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
638 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")))]
639 ""
640 "%.\\tshr.u%T0\\t%0, %1, %2;")
641
642 ;; Logical operations
643
644 (define_insn "and<mode>3"
645 [(set (match_operand:BHSDIM 0 "nvptx_register_operand" "=R")
646 (and:BHSDIM (match_operand:BHSDIM 1 "nvptx_register_operand" "R")
647 (match_operand:BHSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
648 ""
649 "%.\\tand.b%T0\\t%0, %1, %2;")
650
651 (define_insn "ior<mode>3"
652 [(set (match_operand:BHSDIM 0 "nvptx_register_operand" "=R")
653 (ior:BHSDIM (match_operand:BHSDIM 1 "nvptx_register_operand" "R")
654 (match_operand:BHSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
655 ""
656 "%.\\tor.b%T0\\t%0, %1, %2;")
657
658 (define_insn "xor<mode>3"
659 [(set (match_operand:BHSDIM 0 "nvptx_register_operand" "=R")
660 (xor:BHSDIM (match_operand:BHSDIM 1 "nvptx_register_operand" "R")
661 (match_operand:BHSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
662 ""
663 "%.\\txor.b%T0\\t%0, %1, %2;")
664
665 ;; Comparisons and branches
666
667 (define_insn "*cmp<mode>"
668 [(set (match_operand:BI 0 "nvptx_register_operand" "=R")
669 (match_operator:BI 1 "nvptx_comparison_operator"
670 [(match_operand:HSDIM 2 "nvptx_register_operand" "R")
671 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))]
672 ""
673 "%.\\tsetp%c1\\t%0, %2, %3;")
674
675 (define_insn "*cmp<mode>"
676 [(set (match_operand:BI 0 "nvptx_register_operand" "=R")
677 (match_operator:BI 1 "nvptx_float_comparison_operator"
678 [(match_operand:SDFM 2 "nvptx_register_operand" "R")
679 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))]
680 ""
681 "%.\\tsetp%c1\\t%0, %2, %3;")
682
683 (define_insn "jump"
684 [(set (pc)
685 (label_ref (match_operand 0 "" "")))]
686 ""
687 "%.\\tbra\\t%l0;")
688
689 (define_insn "br_true"
690 [(set (pc)
691 (if_then_else (ne (match_operand:BI 0 "nvptx_register_operand" "R")
692 (const_int 0))
693 (label_ref (match_operand 1 "" ""))
694 (pc)))]
695 ""
696 "%j0\\tbra\\t%l1;"
697 [(set_attr "predicable" "false")])
698
699 (define_insn "br_false"
700 [(set (pc)
701 (if_then_else (eq (match_operand:BI 0 "nvptx_register_operand" "R")
702 (const_int 0))
703 (label_ref (match_operand 1 "" ""))
704 (pc)))]
705 ""
706 "%J0\\tbra\\t%l1;"
707 [(set_attr "predicable" "false")])
708
709 ;; unified conditional branch
710 (define_insn "br_true_uni"
711 [(set (pc) (if_then_else
712 (ne (unspec:BI [(match_operand:BI 0 "nvptx_register_operand" "R")]
713 UNSPEC_BR_UNIFIED) (const_int 0))
714 (label_ref (match_operand 1 "" "")) (pc)))]
715 ""
716 "%j0\\tbra.uni\\t%l1;"
717 [(set_attr "predicable" "false")])
718
719 (define_insn "br_false_uni"
720 [(set (pc) (if_then_else
721 (eq (unspec:BI [(match_operand:BI 0 "nvptx_register_operand" "R")]
722 UNSPEC_BR_UNIFIED) (const_int 0))
723 (label_ref (match_operand 1 "" "")) (pc)))]
724 ""
725 "%J0\\tbra.uni\\t%l1;"
726 [(set_attr "predicable" "false")])
727
728 (define_expand "cbranch<mode>4"
729 [(set (pc)
730 (if_then_else (match_operator 0 "nvptx_comparison_operator"
731 [(match_operand:HSDIM 1 "nvptx_register_operand" "")
732 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "")])
733 (label_ref (match_operand 3 "" ""))
734 (pc)))]
735 ""
736 {
737 rtx t = nvptx_expand_compare (operands[0]);
738 operands[0] = t;
739 operands[1] = XEXP (t, 0);
740 operands[2] = XEXP (t, 1);
741 })
742
743 (define_expand "cbranch<mode>4"
744 [(set (pc)
745 (if_then_else (match_operator 0 "nvptx_float_comparison_operator"
746 [(match_operand:SDFM 1 "nvptx_register_operand" "")
747 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "")])
748 (label_ref (match_operand 3 "" ""))
749 (pc)))]
750 ""
751 {
752 rtx t = nvptx_expand_compare (operands[0]);
753 operands[0] = t;
754 operands[1] = XEXP (t, 0);
755 operands[2] = XEXP (t, 1);
756 })
757
758 (define_expand "cbranchbi4"
759 [(set (pc)
760 (if_then_else (match_operator 0 "predicate_operator"
761 [(match_operand:BI 1 "nvptx_register_operand" "")
762 (match_operand:BI 2 "const0_operand" "")])
763 (label_ref (match_operand 3 "" ""))
764 (pc)))]
765 ""
766 "")
767
768 ;; Conditional stores
769
770 (define_insn "setcc_from_bi"
771 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
772 (ne:SI (match_operand:BI 1 "nvptx_register_operand" "R")
773 (const_int 0)))]
774 ""
775 "%.\\tselp%t0 %0,-1,0,%1;")
776
777 (define_insn "sel_true<mode>"
778 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
779 (if_then_else:HSDIM
780 (ne (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0))
781 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")
782 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))]
783 ""
784 "%.\\tselp%t0\\t%0, %2, %3, %1;")
785
786 (define_insn "sel_true<mode>"
787 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
788 (if_then_else:SDFM
789 (ne (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0))
790 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")
791 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))]
792 ""
793 "%.\\tselp%t0\\t%0, %2, %3, %1;")
794
795 (define_insn "sel_false<mode>"
796 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
797 (if_then_else:HSDIM
798 (eq (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0))
799 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")
800 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))]
801 ""
802 "%.\\tselp%t0\\t%0, %3, %2, %1;")
803
804 (define_insn "sel_false<mode>"
805 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
806 (if_then_else:SDFM
807 (eq (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0))
808 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")
809 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))]
810 ""
811 "%.\\tselp%t0\\t%0, %3, %2, %1;")
812
813 (define_insn "setcc_int<mode>"
814 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
815 (match_operator:SI 1 "nvptx_comparison_operator"
816 [(match_operand:HSDIM 2 "nvptx_register_operand" "R")
817 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))]
818 ""
819 "%.\\tset%t0%c1\\t%0, %2, %3;")
820
821 (define_insn "setcc_int<mode>"
822 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
823 (match_operator:SI 1 "nvptx_float_comparison_operator"
824 [(match_operand:SDFM 2 "nvptx_register_operand" "R")
825 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))]
826 ""
827 "%.\\tset%t0%c1\\t%0, %2, %3;")
828
829 (define_insn "setcc_float<mode>"
830 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
831 (match_operator:SF 1 "nvptx_comparison_operator"
832 [(match_operand:HSDIM 2 "nvptx_register_operand" "R")
833 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))]
834 ""
835 "%.\\tset%t0%c1\\t%0, %2, %3;")
836
837 (define_insn "setcc_float<mode>"
838 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
839 (match_operator:SF 1 "nvptx_float_comparison_operator"
840 [(match_operand:SDFM 2 "nvptx_register_operand" "R")
841 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))]
842 ""
843 "%.\\tset%t0%c1\\t%0, %2, %3;")
844
845 (define_expand "cstorebi4"
846 [(set (match_operand:SI 0 "nvptx_register_operand")
847 (match_operator:SI 1 "ne_operator"
848 [(match_operand:BI 2 "nvptx_register_operand")
849 (match_operand:BI 3 "const0_operand")]))]
850 ""
851 "")
852
853 (define_expand "cstore<mode>4"
854 [(set (match_operand:SI 0 "nvptx_register_operand")
855 (match_operator:SI 1 "nvptx_comparison_operator"
856 [(match_operand:HSDIM 2 "nvptx_register_operand")
857 (match_operand:HSDIM 3 "nvptx_nonmemory_operand")]))]
858 ""
859 "")
860
861 (define_expand "cstore<mode>4"
862 [(set (match_operand:SI 0 "nvptx_register_operand")
863 (match_operator:SI 1 "nvptx_float_comparison_operator"
864 [(match_operand:SDFM 2 "nvptx_register_operand")
865 (match_operand:SDFM 3 "nvptx_nonmemory_operand")]))]
866 ""
867 "")
868
869 ;; Calls
870
871 (define_insn "call_insn_<mode>"
872 [(match_parallel 2 "call_operation"
873 [(call (mem:QI (match_operand:P 0 "call_insn_operand" "Rs"))
874 (match_operand 1))])]
875 ""
876 {
877 return nvptx_output_call_insn (insn, NULL_RTX, operands[0]);
878 })
879
880 (define_insn "call_value_insn_<mode>"
881 [(match_parallel 3 "call_operation"
882 [(set (match_operand 0 "nvptx_register_operand" "=R")
883 (call (mem:QI (match_operand:P 1 "call_insn_operand" "Rs"))
884 (match_operand 2)))])]
885 ""
886 {
887 return nvptx_output_call_insn (insn, operands[0], operands[1]);
888 })
889
890 (define_expand "call"
891 [(match_operand 0 "" "")]
892 ""
893 {
894 nvptx_expand_call (NULL_RTX, operands[0]);
895 DONE;
896 })
897
898 (define_expand "call_value"
899 [(match_operand 0 "" "")
900 (match_operand 1 "" "")]
901 ""
902 {
903 nvptx_expand_call (operands[0], operands[1]);
904 DONE;
905 })
906
907 ;; Floating point arithmetic.
908
909 (define_insn "add<mode>3"
910 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
911 (plus:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
912 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
913 ""
914 "%.\\tadd%t0\\t%0, %1, %2;")
915
916 (define_insn "sub<mode>3"
917 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
918 (minus:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
919 (match_operand:SDFM 2 "nvptx_register_operand" "R")))]
920 ""
921 "%.\\tsub%t0\\t%0, %1, %2;")
922
923 (define_insn "mul<mode>3"
924 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
925 (mult:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
926 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
927 ""
928 "%.\\tmul%t0\\t%0, %1, %2;")
929
930 (define_insn "fma<mode>4"
931 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
932 (fma:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
933 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")
934 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))]
935 ""
936 "%.\\tfma%#%t0\\t%0, %1, %2, %3;")
937
938 (define_insn "*recip<mode>2"
939 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
940 (div:SDFM
941 (match_operand:SDFM 2 "const_double_operand" "F")
942 (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
943 "CONST_DOUBLE_P (operands[2])
944 && real_identical (CONST_DOUBLE_REAL_VALUE (operands[2]), &dconst1)"
945 "%.\\trcp%#%t0\\t%0, %1;")
946
947 (define_insn "div<mode>3"
948 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
949 (div:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
950 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
951 ""
952 "%.\\tdiv%#%t0\\t%0, %1, %2;")
953
954 (define_insn "copysign<mode>3"
955 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
956 (unspec:SDFM [(match_operand:SDFM 1 "nvptx_register_operand" "R")
957 (match_operand:SDFM 2 "nvptx_register_operand" "R")]
958 UNSPEC_COPYSIGN))]
959 ""
960 "%.\\tcopysign%t0\\t%0, %2, %1;")
961
962 (define_insn "smin<mode>3"
963 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
964 (smin:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
965 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
966 ""
967 "%.\\tmin%t0\\t%0, %1, %2;")
968
969 (define_insn "smax<mode>3"
970 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
971 (smax:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
972 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
973 ""
974 "%.\\tmax%t0\\t%0, %1, %2;")
975
976 (define_insn "abs<mode>2"
977 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
978 (abs:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
979 ""
980 "%.\\tabs%t0\\t%0, %1;")
981
982 (define_insn "neg<mode>2"
983 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
984 (neg:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
985 ""
986 "%.\\tneg%t0\\t%0, %1;")
987
988 (define_insn "sqrt<mode>2"
989 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
990 (sqrt:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
991 ""
992 "%.\\tsqrt%#%t0\\t%0, %1;")
993
994 (define_expand "sincossf3"
995 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
996 (unspec:SF [(match_operand:SF 2 "nvptx_register_operand" "R")]
997 UNSPEC_COS))
998 (set (match_operand:SF 1 "nvptx_register_operand" "=R")
999 (unspec:SF [(match_dup 2)] UNSPEC_SIN))]
1000 "flag_unsafe_math_optimizations"
1001 {
1002 operands[2] = make_safe_from (operands[2], operands[0]);
1003 })
1004
1005 (define_insn "sinsf2"
1006 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
1007 (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")]
1008 UNSPEC_SIN))]
1009 "flag_unsafe_math_optimizations"
1010 "%.\\tsin.approx%t0\\t%0, %1;")
1011
1012 (define_insn "cossf2"
1013 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
1014 (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")]
1015 UNSPEC_COS))]
1016 "flag_unsafe_math_optimizations"
1017 "%.\\tcos.approx%t0\\t%0, %1;")
1018
1019 (define_insn "log2sf2"
1020 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
1021 (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")]
1022 UNSPEC_LOG2))]
1023 "flag_unsafe_math_optimizations"
1024 "%.\\tlg2.approx%t0\\t%0, %1;")
1025
1026 (define_insn "exp2sf2"
1027 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
1028 (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")]
1029 UNSPEC_EXP2))]
1030 "flag_unsafe_math_optimizations"
1031 "%.\\tex2.approx%t0\\t%0, %1;")
1032
1033 ;; Conversions involving floating point
1034
1035 (define_insn "extendsfdf2"
1036 [(set (match_operand:DF 0 "nvptx_register_operand" "=R")
1037 (float_extend:DF (match_operand:SF 1 "nvptx_register_operand" "R")))]
1038 ""
1039 "%.\\tcvt%t0%t1\\t%0, %1;")
1040
1041 (define_insn "truncdfsf2"
1042 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
1043 (float_truncate:SF (match_operand:DF 1 "nvptx_register_operand" "R")))]
1044 ""
1045 "%.\\tcvt%#%t0%t1\\t%0, %1;")
1046
1047 (define_insn "floatunssi<mode>2"
1048 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1049 (unsigned_float:SDFM (match_operand:SI 1 "nvptx_register_operand" "R")))]
1050 ""
1051 "%.\\tcvt%#%t0.u%T1\\t%0, %1;")
1052
1053 (define_insn "floatsi<mode>2"
1054 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1055 (float:SDFM (match_operand:SI 1 "nvptx_register_operand" "R")))]
1056 ""
1057 "%.\\tcvt%#%t0.s%T1\\t%0, %1;")
1058
1059 (define_insn "floatunsdi<mode>2"
1060 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1061 (unsigned_float:SDFM (match_operand:DI 1 "nvptx_register_operand" "R")))]
1062 ""
1063 "%.\\tcvt%#%t0.u%T1\\t%0, %1;")
1064
1065 (define_insn "floatdi<mode>2"
1066 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1067 (float:SDFM (match_operand:DI 1 "nvptx_register_operand" "R")))]
1068 ""
1069 "%.\\tcvt%#%t0.s%T1\\t%0, %1;")
1070
1071 (define_insn "fixuns_trunc<mode>si2"
1072 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
1073 (unsigned_fix:SI (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
1074 ""
1075 "%.\\tcvt.rzi.u%T0%t1\\t%0, %1;")
1076
1077 (define_insn "fix_trunc<mode>si2"
1078 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
1079 (fix:SI (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
1080 ""
1081 "%.\\tcvt.rzi.s%T0%t1\\t%0, %1;")
1082
1083 (define_insn "fixuns_trunc<mode>di2"
1084 [(set (match_operand:DI 0 "nvptx_register_operand" "=R")
1085 (unsigned_fix:DI (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
1086 ""
1087 "%.\\tcvt.rzi.u%T0%t1\\t%0, %1;")
1088
1089 (define_insn "fix_trunc<mode>di2"
1090 [(set (match_operand:DI 0 "nvptx_register_operand" "=R")
1091 (fix:DI (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
1092 ""
1093 "%.\\tcvt.rzi.s%T0%t1\\t%0, %1;")
1094
1095 (define_int_iterator FPINT [UNSPEC_FPINT_FLOOR UNSPEC_FPINT_BTRUNC
1096 UNSPEC_FPINT_CEIL UNSPEC_FPINT_NEARBYINT])
1097 (define_int_attr fpint_name [(UNSPEC_FPINT_FLOOR "floor")
1098 (UNSPEC_FPINT_BTRUNC "btrunc")
1099 (UNSPEC_FPINT_CEIL "ceil")
1100 (UNSPEC_FPINT_NEARBYINT "nearbyint")])
1101 (define_int_attr fpint_roundingmode [(UNSPEC_FPINT_FLOOR ".rmi")
1102 (UNSPEC_FPINT_BTRUNC ".rzi")
1103 (UNSPEC_FPINT_CEIL ".rpi")
1104 (UNSPEC_FPINT_NEARBYINT "%#i")])
1105
1106 (define_insn "<FPINT:fpint_name><SDFM:mode>2"
1107 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1108 (unspec:SDFM [(match_operand:SDFM 1 "nvptx_register_operand" "R")]
1109 FPINT))]
1110 ""
1111 "%.\\tcvt<FPINT:fpint_roundingmode>%t0%t1\\t%0, %1;")
1112
1113 (define_int_iterator FPINT2 [UNSPEC_FPINT_FLOOR UNSPEC_FPINT_CEIL])
1114 (define_int_attr fpint2_name [(UNSPEC_FPINT_FLOOR "lfloor")
1115 (UNSPEC_FPINT_CEIL "lceil")])
1116 (define_int_attr fpint2_roundingmode [(UNSPEC_FPINT_FLOOR ".rmi")
1117 (UNSPEC_FPINT_CEIL ".rpi")])
1118
1119 (define_insn "<FPINT2:fpint2_name><SDFM:mode><SDIM:mode>2"
1120 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
1121 (unspec:SDIM [(match_operand:SDFM 1 "nvptx_register_operand" "R")]
1122 FPINT2))]
1123 ""
1124 "%.\\tcvt<FPINT2:fpint2_roundingmode>.s%T0%t1\\t%0, %1;")
1125
1126 ;; Vector operations
1127
1128 (define_insn "*vec_set<mode>_0"
1129 [(set (match_operand:VECIM 0 "nvptx_register_operand" "=R")
1130 (vec_merge:VECIM
1131 (vec_duplicate:VECIM
1132 (match_operand:<VECELEM> 1 "nvptx_register_operand" "R"))
1133 (match_dup 0)
1134 (const_int 1)))]
1135 ""
1136 "%.\\tmov%t1\\t%0.x, %1;")
1137
1138 (define_insn "*vec_set<mode>_1"
1139 [(set (match_operand:VECIM 0 "nvptx_register_operand" "=R")
1140 (vec_merge:VECIM
1141 (vec_duplicate:VECIM
1142 (match_operand:<VECELEM> 1 "nvptx_register_operand" "R"))
1143 (match_dup 0)
1144 (const_int 2)))]
1145 ""
1146 "%.\\tmov%t1\\t%0.y, %1;")
1147
1148 (define_insn "*vec_set<mode>_2"
1149 [(set (match_operand:VECIM 0 "nvptx_register_operand" "=R")
1150 (vec_merge:VECIM
1151 (vec_duplicate:VECIM
1152 (match_operand:<VECELEM> 1 "nvptx_register_operand" "R"))
1153 (match_dup 0)
1154 (const_int 4)))]
1155 ""
1156 "%.\\tmov%t1\\t%0.z, %1;")
1157
1158 (define_insn "*vec_set<mode>_3"
1159 [(set (match_operand:VECIM 0 "nvptx_register_operand" "=R")
1160 (vec_merge:VECIM
1161 (vec_duplicate:VECIM
1162 (match_operand:<VECELEM> 1 "nvptx_register_operand" "R"))
1163 (match_dup 0)
1164 (const_int 8)))]
1165 ""
1166 "%.\\tmov%t1\\t%0.w, %1;")
1167
1168 (define_expand "vec_set<mode>"
1169 [(match_operand:VECIM 0 "nvptx_register_operand")
1170 (match_operand:<VECELEM> 1 "nvptx_register_operand")
1171 (match_operand:SI 2 "nvptx_vector_index_operand")]
1172 ""
1173 {
1174 enum machine_mode mode = GET_MODE (operands[0]);
1175 int mask = 1 << INTVAL (operands[2]);
1176 rtx tmp = gen_rtx_VEC_DUPLICATE (mode, operands[1]);
1177 tmp = gen_rtx_VEC_MERGE (mode, tmp, operands[0], GEN_INT (mask));
1178 emit_insn (gen_rtx_SET (operands[0], tmp));
1179 DONE;
1180 })
1181
1182 (define_insn "vec_extract<mode><Vecelem>"
1183 [(set (match_operand:<VECELEM> 0 "nvptx_register_operand" "=R")
1184 (vec_select:<VECELEM>
1185 (match_operand:VECIM 1 "nvptx_register_operand" "R")
1186 (parallel [(match_operand:SI 2 "nvptx_vector_index_operand" "")])))]
1187 ""
1188 {
1189 static const char *const asms[4] = {
1190 "%.\\tmov%t0\\t%0, %1.x;",
1191 "%.\\tmov%t0\\t%0, %1.y;",
1192 "%.\\tmov%t0\\t%0, %1.z;",
1193 "%.\\tmov%t0\\t%0, %1.w;"
1194 };
1195 return asms[INTVAL (operands[2])];
1196 })
1197
1198 ;; Miscellaneous
1199
1200 (define_insn "nop"
1201 [(const_int 0)]
1202 ""
1203 "")
1204
1205 (define_insn "exit"
1206 [(const_int 1)]
1207 ""
1208 "exit;")
1209
1210 (define_insn "fake_nop"
1211 [(const_int 2)]
1212 ""
1213 "{
1214 .reg .u32 %%nop_src;
1215 .reg .u32 %%nop_dst;
1216 mov.u32 %%nop_dst, %%nop_src;
1217 }")
1218
1219 (define_insn "return"
1220 [(return)]
1221 ""
1222 {
1223 return nvptx_output_return ();
1224 }
1225 [(set_attr "predicable" "false")])
1226
1227 (define_expand "epilogue"
1228 [(clobber (const_int 0))]
1229 ""
1230 {
1231 if (TARGET_SOFT_STACK)
1232 emit_insn (gen_set_softstack (Pmode, gen_rtx_REG (Pmode,
1233 SOFTSTACK_PREV_REGNUM)));
1234 emit_jump_insn (gen_return ());
1235 DONE;
1236 })
1237
1238 (define_expand "nonlocal_goto"
1239 [(match_operand 0 "" "")
1240 (match_operand 1 "" "")
1241 (match_operand 2 "" "")
1242 (match_operand 3 "" "")]
1243 ""
1244 {
1245 sorry ("target cannot support nonlocal goto.");
1246 emit_insn (gen_nop ());
1247 DONE;
1248 })
1249
1250 (define_expand "nonlocal_goto_receiver"
1251 [(const_int 0)]
1252 ""
1253 {
1254 sorry ("target cannot support nonlocal goto.");
1255 })
1256
1257 (define_expand "allocate_stack"
1258 [(match_operand 0 "nvptx_register_operand")
1259 (match_operand 1 "nvptx_register_operand")]
1260 ""
1261 {
1262 if (TARGET_SOFT_STACK)
1263 {
1264 emit_move_insn (stack_pointer_rtx,
1265 gen_rtx_MINUS (Pmode, stack_pointer_rtx, operands[1]));
1266 emit_insn (gen_set_softstack (Pmode, stack_pointer_rtx));
1267 emit_move_insn (operands[0], virtual_stack_dynamic_rtx);
1268 DONE;
1269 }
1270 /* The ptx documentation specifies an alloca intrinsic (for 32 bit
1271 only) but notes it is not implemented. The assembler emits a
1272 confused error message. Issue a blunt one now instead. */
1273 sorry ("target cannot support alloca.");
1274 emit_insn (gen_nop ());
1275 DONE;
1276 })
1277
1278 (define_insn "@set_softstack_<mode>"
1279 [(unspec [(match_operand:P 0 "nvptx_register_operand" "R")]
1280 UNSPEC_SET_SOFTSTACK)]
1281 "TARGET_SOFT_STACK"
1282 {
1283 return nvptx_output_set_softstack (REGNO (operands[0]));
1284 })
1285
1286 (define_expand "restore_stack_block"
1287 [(match_operand 0 "register_operand" "")
1288 (match_operand 1 "register_operand" "")]
1289 ""
1290 {
1291 if (TARGET_SOFT_STACK)
1292 {
1293 emit_move_insn (operands[0], operands[1]);
1294 emit_insn (gen_set_softstack (Pmode, operands[0]));
1295 }
1296 DONE;
1297 })
1298
1299 (define_expand "restore_stack_function"
1300 [(match_operand 0 "register_operand" "")
1301 (match_operand 1 "register_operand" "")]
1302 ""
1303 {
1304 DONE;
1305 })
1306
1307 (define_insn "trap"
1308 [(trap_if (const_int 1) (const_int 0))]
1309 ""
1310 "trap; exit;")
1311
1312 (define_insn "trap_if_true"
1313 [(trap_if (ne (match_operand:BI 0 "nvptx_register_operand" "R")
1314 (const_int 0))
1315 (const_int 0))]
1316 ""
1317 "%j0 trap; %j0 exit;"
1318 [(set_attr "predicable" "false")])
1319
1320 (define_insn "trap_if_false"
1321 [(trap_if (eq (match_operand:BI 0 "nvptx_register_operand" "R")
1322 (const_int 0))
1323 (const_int 0))]
1324 ""
1325 "%J0 trap; %J0 exit;"
1326 [(set_attr "predicable" "false")])
1327
1328 (define_expand "ctrap<mode>4"
1329 [(trap_if (match_operator 0 "nvptx_comparison_operator"
1330 [(match_operand:SDIM 1 "nvptx_register_operand")
1331 (match_operand:SDIM 2 "nvptx_nonmemory_operand")])
1332 (match_operand 3 "const0_operand"))]
1333 ""
1334 {
1335 rtx t = nvptx_expand_compare (operands[0]);
1336 emit_insn (gen_trap_if_true (t));
1337 DONE;
1338 })
1339
1340 (define_insn "oacc_dim_size"
1341 [(set (match_operand:SI 0 "nvptx_register_operand" "")
1342 (unspec:SI [(match_operand:SI 1 "const_int_operand" "")]
1343 UNSPEC_DIM_SIZE))]
1344 ""
1345 {
1346 static const char *const asms[] =
1347 { /* Must match oacc_loop_levels ordering. */
1348 "%.\\tmov.u32\\t%0, %%nctaid.x;", /* gang */
1349 "%.\\tmov.u32\\t%0, %%ntid.y;", /* worker */
1350 "%.\\tmov.u32\\t%0, %%ntid.x;", /* vector */
1351 };
1352 return asms[INTVAL (operands[1])];
1353 })
1354
1355 (define_insn "oacc_dim_pos"
1356 [(set (match_operand:SI 0 "nvptx_register_operand" "")
1357 (unspec_volatile:SI [(match_operand:SI 1 "const_int_operand" "")]
1358 UNSPECV_DIM_POS))]
1359 ""
1360 {
1361 static const char *const asms[] =
1362 { /* Must match oacc_loop_levels ordering. */
1363 "%.\\tmov.u32\\t%0, %%ctaid.x;", /* gang */
1364 "%.\\tmov.u32\\t%0, %%tid.y;", /* worker */
1365 "%.\\tmov.u32\\t%0, %%tid.x;", /* vector */
1366 };
1367 return asms[INTVAL (operands[1])];
1368 })
1369
1370 (define_insn "nvptx_fork"
1371 [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
1372 UNSPECV_FORK)]
1373 ""
1374 "// fork %0;"
1375 [(set_attr "predicable" "false")])
1376
1377 (define_insn "nvptx_forked"
1378 [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
1379 UNSPECV_FORKED)]
1380 ""
1381 "// forked %0;"
1382 [(set_attr "predicable" "false")])
1383
1384 (define_insn "nvptx_joining"
1385 [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
1386 UNSPECV_JOINING)]
1387 ""
1388 "// joining %0;"
1389 [(set_attr "predicable" "false")])
1390
1391 (define_insn "nvptx_join"
1392 [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
1393 UNSPECV_JOIN)]
1394 ""
1395 "// join %0;"
1396 [(set_attr "predicable" "false")])
1397
1398 (define_expand "oacc_fork"
1399 [(set (match_operand:SI 0 "nvptx_nonmemory_operand" "")
1400 (match_operand:SI 1 "general_operand" ""))
1401 (unspec_volatile:SI [(match_operand:SI 2 "const_int_operand" "")]
1402 UNSPECV_FORKED)]
1403 ""
1404 {
1405 if (operands[0] != const0_rtx)
1406 emit_move_insn (operands[0], operands[1]);
1407 nvptx_expand_oacc_fork (INTVAL (operands[2]));
1408 DONE;
1409 })
1410
1411 (define_expand "oacc_join"
1412 [(set (match_operand:SI 0 "nvptx_nonmemory_operand" "")
1413 (match_operand:SI 1 "general_operand" ""))
1414 (unspec_volatile:SI [(match_operand:SI 2 "const_int_operand" "")]
1415 UNSPECV_JOIN)]
1416 ""
1417 {
1418 if (operands[0] != const0_rtx)
1419 emit_move_insn (operands[0], operands[1]);
1420 nvptx_expand_oacc_join (INTVAL (operands[2]));
1421 DONE;
1422 })
1423
1424 ;; only 32-bit shuffles exist.
1425 (define_insn "nvptx_shuffle<mode>"
1426 [(set (match_operand:BITS 0 "nvptx_register_operand" "=R")
1427 (unspec:BITS
1428 [(match_operand:BITS 1 "nvptx_register_operand" "R")
1429 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")
1430 (match_operand:SI 3 "const_int_operand" "n")]
1431 UNSPEC_SHUFFLE))]
1432 ""
1433 "%.\\tshfl%S3.b32\\t%0, %1, %2, 31;")
1434
1435 (define_insn "nvptx_vote_ballot"
1436 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
1437 (unspec:SI [(match_operand:BI 1 "nvptx_register_operand" "R")]
1438 UNSPEC_VOTE_BALLOT))]
1439 ""
1440 "%.\\tvote.ballot.b32\\t%0, %1;")
1441
1442 ;; Patterns for OpenMP SIMD-via-SIMT lowering
1443
1444 (define_insn "@omp_simt_enter_<mode>"
1445 [(set (match_operand:P 0 "nvptx_register_operand" "=R")
1446 (unspec_volatile:P [(match_operand:P 1 "nvptx_nonmemory_operand" "Ri")
1447 (match_operand:P 2 "nvptx_nonmemory_operand" "Ri")]
1448 UNSPECV_SIMT_ENTER))]
1449 ""
1450 {
1451 return nvptx_output_simt_enter (operands[0], operands[1], operands[2]);
1452 })
1453
1454 (define_expand "omp_simt_enter"
1455 [(match_operand 0 "nvptx_register_operand" "=R")
1456 (match_operand 1 "nvptx_nonmemory_operand" "Ri")
1457 (match_operand 2 "const_int_operand" "n")]
1458 ""
1459 {
1460 if (!CONST_INT_P (operands[1]))
1461 cfun->machine->simt_stack_size = HOST_WIDE_INT_M1U;
1462 else
1463 cfun->machine->simt_stack_size = MAX (UINTVAL (operands[1]),
1464 cfun->machine->simt_stack_size);
1465 cfun->machine->simt_stack_align = MAX (UINTVAL (operands[2]),
1466 cfun->machine->simt_stack_align);
1467 cfun->machine->has_simtreg = true;
1468 emit_insn (gen_omp_simt_enter (Pmode, operands[0], operands[1], operands[2]));
1469 DONE;
1470 })
1471
1472 (define_expand "omp_simt_exit"
1473 [(match_operand 0 "nvptx_register_operand" "R")]
1474 ""
1475 {
1476 emit_insn (gen_omp_simt_exit (Pmode, operands[0]));
1477 DONE;
1478 })
1479
1480 (define_insn "@omp_simt_exit_<mode>"
1481 [(unspec_volatile [(match_operand:P 0 "nvptx_register_operand" "R")]
1482 UNSPECV_SIMT_EXIT)]
1483 ""
1484 {
1485 return nvptx_output_simt_exit (operands[0]);
1486 })
1487
1488 ;; Implement IFN_GOMP_SIMT_LANE: set operand 0 to lane index
1489 (define_insn "omp_simt_lane"
1490 [(set (match_operand:SI 0 "nvptx_register_operand" "")
1491 (unspec:SI [(const_int 0)] UNSPEC_LANEID))]
1492 ""
1493 "%.\\tmov.u32\\t%0, %%laneid;")
1494
1495 ;; Implement IFN_GOMP_SIMT_ORDERED: copy operand 1 to operand 0 and
1496 ;; place a compiler barrier to disallow unrolling/peeling the containing loop
1497 (define_expand "omp_simt_ordered"
1498 [(match_operand:SI 0 "nvptx_register_operand" "=R")
1499 (match_operand:SI 1 "nvptx_register_operand" "R")]
1500 ""
1501 {
1502 emit_move_insn (operands[0], operands[1]);
1503 emit_insn (gen_nvptx_nounroll ());
1504 DONE;
1505 })
1506
1507 ;; Implement IFN_GOMP_SIMT_XCHG_BFLY: perform a "butterfly" exchange
1508 ;; across lanes
1509 (define_expand "omp_simt_xchg_bfly"
1510 [(match_operand 0 "nvptx_register_operand" "=R")
1511 (match_operand 1 "nvptx_register_operand" "R")
1512 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")]
1513 ""
1514 {
1515 emit_insn (nvptx_gen_shuffle (operands[0], operands[1], operands[2],
1516 SHUFFLE_BFLY));
1517 DONE;
1518 })
1519
1520 ;; Implement IFN_GOMP_SIMT_XCHG_IDX: broadcast value in operand 1
1521 ;; from lane given by index in operand 2 to operand 0 in all lanes
1522 (define_expand "omp_simt_xchg_idx"
1523 [(match_operand 0 "nvptx_register_operand" "=R")
1524 (match_operand 1 "nvptx_register_operand" "R")
1525 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")]
1526 ""
1527 {
1528 emit_insn (nvptx_gen_shuffle (operands[0], operands[1], operands[2],
1529 SHUFFLE_IDX));
1530 DONE;
1531 })
1532
1533 ;; Implement IFN_GOMP_SIMT_VOTE_ANY:
1534 ;; set operand 0 to zero iff all lanes supply zero in operand 1
1535 (define_expand "omp_simt_vote_any"
1536 [(match_operand:SI 0 "nvptx_register_operand" "=R")
1537 (match_operand:SI 1 "nvptx_register_operand" "R")]
1538 ""
1539 {
1540 rtx pred = gen_reg_rtx (BImode);
1541 emit_move_insn (pred, gen_rtx_NE (BImode, operands[1], const0_rtx));
1542 emit_insn (gen_nvptx_vote_ballot (operands[0], pred));
1543 DONE;
1544 })
1545
1546 ;; Implement IFN_GOMP_SIMT_LAST_LANE:
1547 ;; set operand 0 to the lowest lane index that passed non-zero in operand 1
1548 (define_expand "omp_simt_last_lane"
1549 [(match_operand:SI 0 "nvptx_register_operand" "=R")
1550 (match_operand:SI 1 "nvptx_register_operand" "R")]
1551 ""
1552 {
1553 rtx pred = gen_reg_rtx (BImode);
1554 rtx tmp = gen_reg_rtx (SImode);
1555 emit_move_insn (pred, gen_rtx_NE (BImode, operands[1], const0_rtx));
1556 emit_insn (gen_nvptx_vote_ballot (tmp, pred));
1557 emit_insn (gen_ctzsi2 (operands[0], tmp));
1558 DONE;
1559 })
1560
1561 ;; extract parts of a 64 bit object into 2 32-bit ints
1562 (define_insn "unpack<mode>si2"
1563 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
1564 (unspec:SI [(match_operand:BITD 2 "nvptx_register_operand" "R")
1565 (const_int 0)] UNSPEC_BIT_CONV))
1566 (set (match_operand:SI 1 "nvptx_register_operand" "=R")
1567 (unspec:SI [(match_dup 2) (const_int 1)] UNSPEC_BIT_CONV))]
1568 ""
1569 "%.\\tmov.b64\\t{%0,%1}, %2;")
1570
1571 ;; pack 2 32-bit ints into a 64 bit object
1572 (define_insn "packsi<mode>2"
1573 [(set (match_operand:BITD 0 "nvptx_register_operand" "=R")
1574 (unspec:BITD [(match_operand:SI 1 "nvptx_register_operand" "R")
1575 (match_operand:SI 2 "nvptx_register_operand" "R")]
1576 UNSPEC_BIT_CONV))]
1577 ""
1578 "%.\\tmov.b64\\t%0, {%1,%2};")
1579
1580 ;; Atomic insns.
1581
1582 (define_expand "atomic_compare_and_swap<mode>"
1583 [(match_operand:SI 0 "nvptx_register_operand") ;; bool success output
1584 (match_operand:SDIM 1 "nvptx_register_operand") ;; oldval output
1585 (match_operand:SDIM 2 "memory_operand") ;; memory
1586 (match_operand:SDIM 3 "nvptx_register_operand") ;; expected input
1587 (match_operand:SDIM 4 "nvptx_register_operand") ;; newval input
1588 (match_operand:SI 5 "const_int_operand") ;; is_weak
1589 (match_operand:SI 6 "const_int_operand") ;; success model
1590 (match_operand:SI 7 "const_int_operand")] ;; failure model
1591 ""
1592 {
1593 emit_insn (gen_atomic_compare_and_swap<mode>_1
1594 (operands[1], operands[2], operands[3], operands[4], operands[6]));
1595
1596 rtx cond = gen_reg_rtx (BImode);
1597 emit_move_insn (cond, gen_rtx_EQ (BImode, operands[1], operands[3]));
1598 emit_insn (gen_sel_truesi (operands[0], cond, GEN_INT (1), GEN_INT (0)));
1599 DONE;
1600 })
1601
1602 (define_insn "atomic_compare_and_swap<mode>_1"
1603 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
1604 (unspec_volatile:SDIM
1605 [(match_operand:SDIM 1 "memory_operand" "+m")
1606 (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri")
1607 (match_operand:SDIM 3 "nvptx_nonmemory_operand" "Ri")
1608 (match_operand:SI 4 "const_int_operand")]
1609 UNSPECV_CAS))
1610 (set (match_dup 1)
1611 (unspec_volatile:SDIM [(const_int 0)] UNSPECV_CAS))]
1612 ""
1613 "%.\\tatom%A1.cas.b%T0\\t%0, %1, %2, %3;"
1614 [(set_attr "atomic" "true")])
1615
1616 (define_insn "atomic_exchange<mode>"
1617 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") ;; output
1618 (unspec_volatile:SDIM
1619 [(match_operand:SDIM 1 "memory_operand" "+m") ;; memory
1620 (match_operand:SI 3 "const_int_operand")] ;; model
1621 UNSPECV_XCHG))
1622 (set (match_dup 1)
1623 (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri"))] ;; input
1624 ""
1625 "%.\\tatom%A1.exch.b%T0\\t%0, %1, %2;"
1626 [(set_attr "atomic" "true")])
1627
1628 (define_insn "atomic_fetch_add<mode>"
1629 [(set (match_operand:SDIM 1 "memory_operand" "+m")
1630 (unspec_volatile:SDIM
1631 [(plus:SDIM (match_dup 1)
1632 (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri"))
1633 (match_operand:SI 3 "const_int_operand")] ;; model
1634 UNSPECV_LOCK))
1635 (set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
1636 (match_dup 1))]
1637 ""
1638 "%.\\tatom%A1.add%t0\\t%0, %1, %2;"
1639 [(set_attr "atomic" "true")])
1640
1641 (define_insn "atomic_fetch_addsf"
1642 [(set (match_operand:SF 1 "memory_operand" "+m")
1643 (unspec_volatile:SF
1644 [(plus:SF (match_dup 1)
1645 (match_operand:SF 2 "nvptx_nonmemory_operand" "RF"))
1646 (match_operand:SI 3 "const_int_operand")] ;; model
1647 UNSPECV_LOCK))
1648 (set (match_operand:SF 0 "nvptx_register_operand" "=R")
1649 (match_dup 1))]
1650 ""
1651 "%.\\tatom%A1.add%t0\\t%0, %1, %2;"
1652 [(set_attr "atomic" "true")])
1653
1654 (define_code_iterator any_logic [and ior xor])
1655 (define_code_attr logic [(and "and") (ior "or") (xor "xor")])
1656
1657 (define_insn "atomic_fetch_<logic><mode>"
1658 [(set (match_operand:SDIM 1 "memory_operand" "+m")
1659 (unspec_volatile:SDIM
1660 [(any_logic:SDIM (match_dup 1)
1661 (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri"))
1662 (match_operand:SI 3 "const_int_operand")] ;; model
1663 UNSPECV_LOCK))
1664 (set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
1665 (match_dup 1))]
1666 "<MODE>mode == SImode || TARGET_SM35"
1667 "%.\\tatom%A1.b%T0.<logic>\\t%0, %1, %2;"
1668 [(set_attr "atomic" "true")])
1669
1670 (define_insn "nvptx_barsync"
1671 [(unspec_volatile [(match_operand:SI 0 "nvptx_nonmemory_operand" "Ri")
1672 (match_operand:SI 1 "const_int_operand")]
1673 UNSPECV_BARSYNC)]
1674 ""
1675 {
1676 if (INTVAL (operands[1]) == 0)
1677 return "\\tbar.sync\\t%0;";
1678 else
1679 return "\\tbar.sync\\t%0, %1;";
1680 }
1681 [(set_attr "predicable" "false")])
1682
1683 (define_expand "memory_barrier"
1684 [(set (match_dup 0)
1685 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR))]
1686 ""
1687 {
1688 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
1689 MEM_VOLATILE_P (operands[0]) = 1;
1690 })
1691
1692 ;; Ptx defines the memory barriers membar.cta, membar.gl and membar.sys
1693 ;; (corresponding to cuda functions threadfence_block, threadfence and
1694 ;; threadfence_system). For the insn memory_barrier we use membar.sys. This
1695 ;; may be overconservative, but before using membar.gl instead we'll need to
1696 ;; explain in detail why it's safe to use. For now, use membar.sys.
1697 (define_insn "*memory_barrier"
1698 [(set (match_operand:BLK 0 "" "")
1699 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR))]
1700 ""
1701 "\\tmembar.sys;"
1702 [(set_attr "predicable" "false")])
1703
1704 (define_expand "nvptx_membar_cta"
1705 [(set (match_dup 0)
1706 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR_CTA))]
1707 ""
1708 {
1709 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
1710 MEM_VOLATILE_P (operands[0]) = 1;
1711 })
1712
1713 (define_insn "*nvptx_membar_cta"
1714 [(set (match_operand:BLK 0 "" "")
1715 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR_CTA))]
1716 ""
1717 "\\tmembar.cta;"
1718 [(set_attr "predicable" "false")])
1719
1720 (define_insn "nvptx_nounroll"
1721 [(unspec_volatile [(const_int 0)] UNSPECV_NOUNROLL)]
1722 ""
1723 "\\t.pragma \\\"nounroll\\\";"
1724 [(set_attr "predicable" "false")])
1725
1726 (define_insn "nvptx_red_partition"
1727 [(set (match_operand:DI 0 "nonimmediate_operand" "=R")
1728 (unspec_volatile:DI [(match_operand:DI 1 "const_int_operand")]
1729 UNSPECV_RED_PART))]
1730 ""
1731 {
1732 return nvptx_output_red_partition (operands[0], operands[1]);
1733 }
1734 [(set_attr "predicable" "false")])