1 ;; Machine description for NVPTX.
2 ;; Copyright (C) 2014-2020 Free Software Foundation, Inc.
3 ;; Contributed by Bernd Schmidt <bernds@codesourcery.com>
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 (define_c_enum "unspec" [
33 UNSPEC_FPINT_NEARBYINT
53 (define_c_enum "unspecv" [
75 (define_attr "subregs_ok" "false,true"
76 (const_string "false"))
78 (define_attr "atomic" "false,true"
79 (const_string "false"))
81 ;; The nvptx operand predicates, in general, don't permit subregs and
82 ;; only literal constants, which differ from the generic ones, which
83 ;; permit subregs and symbolc constants (as appropriate)
84 (define_predicate "nvptx_register_operand"
87 return register_operand (op, mode);
90 (define_predicate "nvptx_nonimmediate_operand"
91 (match_code "mem,reg")
93 return (REG_P (op) ? register_operand (op, mode)
94 : memory_operand (op, mode));
97 (define_predicate "nvptx_nonmemory_operand"
98 (match_code "reg,const_int,const_double")
100 return (REG_P (op) ? register_operand (op, mode)
101 : immediate_operand (op, mode));
104 (define_predicate "const0_operand"
105 (and (match_code "const_int")
106 (match_test "op == const0_rtx")))
108 ;; True if this operator is valid for predication.
109 (define_predicate "predicate_operator"
110 (match_code "eq,ne"))
112 (define_predicate "ne_operator"
115 (define_predicate "nvptx_comparison_operator"
116 (match_code "eq,ne,le,ge,lt,gt,leu,geu,ltu,gtu"))
118 (define_predicate "nvptx_float_comparison_operator"
119 (match_code "eq,ne,le,ge,lt,gt,uneq,unle,unge,unlt,ungt,unordered,ordered"))
121 (define_predicate "nvptx_vector_index_operand"
122 (and (match_code "const_int")
123 (match_test "UINTVAL (op) < 4")))
125 ;; Test for a valid operand for a call instruction.
126 (define_predicate "call_insn_operand"
127 (match_code "symbol_ref,reg")
129 return REG_P (op) || SYMBOL_REF_FUNCTION_P (op);
132 ;; Return true if OP is a call with parallel USEs of the argument
134 (define_predicate "call_operation"
135 (match_code "parallel")
137 int arg_end = XVECLEN (op, 0);
139 for (int i = 1; i < arg_end; i++)
141 rtx elt = XVECEXP (op, 0, i);
143 if (GET_CODE (elt) != USE || !REG_P (XEXP (elt, 0)))
149 (define_attr "predicable" "false,true"
150 (const_string "true"))
153 [(match_operator 0 "predicate_operator"
154 [(match_operand:BI 1 "nvptx_register_operand" "")
155 (match_operand:BI 2 "const0_operand" "")])]
160 (define_constraint "P0"
161 "An integer with the value 0."
162 (and (match_code "const_int")
163 (match_test "ival == 0")))
165 (define_constraint "P1"
166 "An integer with the value 1."
167 (and (match_code "const_int")
168 (match_test "ival == 1")))
170 (define_constraint "Pn"
171 "An integer with the value -1."
172 (and (match_code "const_int")
173 (match_test "ival == -1")))
175 (define_constraint "R"
179 (define_constraint "Ia"
180 "Any integer constant."
181 (and (match_code "const_int") (match_test "true")))
183 (define_mode_iterator QHSDISDFM [QI HI SI DI SF DF])
184 (define_mode_iterator QHSDIM [QI HI SI DI])
185 (define_mode_iterator HSDIM [HI SI DI])
186 (define_mode_iterator BHSDIM [BI HI SI DI])
187 (define_mode_iterator SDIM [SI DI])
188 (define_mode_iterator SDISDFM [SI DI SF DF])
189 (define_mode_iterator QHIM [QI HI])
190 (define_mode_iterator QHSIM [QI HI SI])
191 (define_mode_iterator SDFM [SF DF])
192 (define_mode_iterator SDCM [SC DC])
193 (define_mode_iterator BITS [SI SF])
194 (define_mode_iterator BITD [DI DF])
195 (define_mode_iterator VECIM [V2SI V2DI])
197 ;; This mode iterator allows :P to be used for patterns that operate on
198 ;; pointer-sized quantities. Exactly one of the two alternatives will match.
199 (define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")])
201 ;; Define element mode for each vector mode.
202 (define_mode_attr VECELEM [(V2SI "SI") (V2DI "DI")])
203 (define_mode_attr Vecelem [(V2SI "si") (V2DI "di")])
205 ;; We should get away with not defining memory alternatives, since we don't
206 ;; get variables in this mode and pseudos are never spilled.
208 [(set (match_operand:BI 0 "nvptx_register_operand" "=R,R,R")
209 (match_operand:BI 1 "nvptx_nonmemory_operand" "R,P0,Pn"))]
212 %.\\tmov%t0\\t%0, %1;
213 %.\\tsetp.eq.u32\\t%0, 1, 0;
214 %.\\tsetp.eq.u32\\t%0, 1, 1;")
216 (define_insn "*mov<mode>_insn"
217 [(set (match_operand:VECIM 0 "nonimmediate_operand" "=R,R,m")
218 (match_operand:VECIM 1 "general_operand" "Ri,m,R"))]
219 "!MEM_P (operands[0]) || REG_P (operands[1])"
221 if (which_alternative == 1)
222 return "%.\\tld%A1%u1\\t%0, %1;";
223 if (which_alternative == 2)
224 return "%.\\tst%A0%u0\\t%0, %1;";
226 return nvptx_output_mov_insn (operands[0], operands[1]);
228 [(set_attr "subregs_ok" "true")])
230 (define_insn "*mov<mode>_insn"
231 [(set (match_operand:QHSDIM 0 "nonimmediate_operand" "=R,R,m")
232 (match_operand:QHSDIM 1 "general_operand" "Ri,m,R"))]
233 "!MEM_P (operands[0]) || REG_P (operands[1])"
235 if (which_alternative == 1)
236 return "%.\\tld%A1%u1\\t%0, %1;";
237 if (which_alternative == 2)
238 return "%.\\tst%A0%u0\\t%0, %1;";
240 return nvptx_output_mov_insn (operands[0], operands[1]);
242 [(set_attr "subregs_ok" "true")])
244 (define_insn "*mov<mode>_insn"
245 [(set (match_operand:SDFM 0 "nonimmediate_operand" "=R,R,m")
246 (match_operand:SDFM 1 "general_operand" "RF,m,R"))]
247 "!MEM_P (operands[0]) || REG_P (operands[1])"
249 if (which_alternative == 1)
250 return "%.\\tld%A1%u0\\t%0, %1;";
251 if (which_alternative == 2)
252 return "%.\\tst%A0%u1\\t%0, %1;";
254 return nvptx_output_mov_insn (operands[0], operands[1]);
256 [(set_attr "subregs_ok" "true")])
258 (define_insn "load_arg_reg<mode>"
259 [(set (match_operand:QHIM 0 "nvptx_register_operand" "=R")
260 (unspec:QHIM [(match_operand 1 "const_int_operand" "n")]
263 "%.\\tcvt%t0.u32\\t%0, %%ar%1;")
265 (define_insn "load_arg_reg<mode>"
266 [(set (match_operand:SDISDFM 0 "nvptx_register_operand" "=R")
267 (unspec:SDISDFM [(match_operand 1 "const_int_operand" "n")]
270 "%.\\tmov%t0\\t%0, %%ar%1;")
272 (define_expand "mov<mode>"
273 [(set (match_operand:VECIM 0 "nonimmediate_operand" "")
274 (match_operand:VECIM 1 "general_operand" ""))]
277 if (MEM_P (operands[0]) && !REG_P (operands[1]))
279 rtx tmp = gen_reg_rtx (<MODE>mode);
280 emit_move_insn (tmp, operands[1]);
281 emit_move_insn (operands[0], tmp);
286 (define_expand "mov<mode>"
287 [(set (match_operand:QHSDISDFM 0 "nonimmediate_operand" "")
288 (match_operand:QHSDISDFM 1 "general_operand" ""))]
291 if (MEM_P (operands[0]) && !REG_P (operands[1]))
293 rtx tmp = gen_reg_rtx (<MODE>mode);
294 emit_move_insn (tmp, operands[1]);
295 emit_move_insn (operands[0], tmp);
299 if (GET_CODE (operands[1]) == LABEL_REF)
300 sorry ("target cannot support label values");
303 (define_insn "zero_extendqihi2"
304 [(set (match_operand:HI 0 "nvptx_register_operand" "=R,R")
305 (zero_extend:HI (match_operand:QI 1 "nvptx_nonimmediate_operand" "R,m")))]
308 %.\\tcvt.u16.u%T1\\t%0, %1;
309 %.\\tld%A1.u8\\t%0, %1;"
310 [(set_attr "subregs_ok" "true")])
312 (define_insn "zero_extend<mode>si2"
313 [(set (match_operand:SI 0 "nvptx_register_operand" "=R,R")
314 (zero_extend:SI (match_operand:QHIM 1 "nvptx_nonimmediate_operand" "R,m")))]
317 %.\\tcvt.u32.u%T1\\t%0, %1;
318 %.\\tld%A1.u%T1\\t%0, %1;"
319 [(set_attr "subregs_ok" "true")])
321 (define_insn "zero_extend<mode>di2"
322 [(set (match_operand:DI 0 "nvptx_register_operand" "=R,R")
323 (zero_extend:DI (match_operand:QHSIM 1 "nvptx_nonimmediate_operand" "R,m")))]
326 %.\\tcvt.u64.u%T1\\t%0, %1;
327 %.\\tld%A1%u1\\t%0, %1;"
328 [(set_attr "subregs_ok" "true")])
330 (define_insn "extendqihi2"
331 [(set (match_operand:HI 0 "nvptx_register_operand" "=R")
332 (sign_extend:HI (match_operand:QI 1 "nvptx_register_operand" "R")))]
334 "%.\\tcvt.s16.s8\\t%0, %1;"
335 [(set_attr "subregs_ok" "true")])
337 (define_insn "extend<mode>si2"
338 [(set (match_operand:SI 0 "nvptx_register_operand" "=R,R")
339 (sign_extend:SI (match_operand:QHIM 1 "nvptx_nonimmediate_operand" "R,m")))]
342 %.\\tcvt.s32.s%T1\\t%0, %1;
343 %.\\tld%A1.s%T1\\t%0, %1;"
344 [(set_attr "subregs_ok" "true")])
346 (define_insn "extend<mode>di2"
347 [(set (match_operand:DI 0 "nvptx_register_operand" "=R,R")
348 (sign_extend:DI (match_operand:QHSIM 1 "nvptx_nonimmediate_operand" "R,m")))]
351 %.\\tcvt.s64.s%T1\\t%0, %1;
352 %.\\tld%A1.s%T1\\t%0, %1;"
353 [(set_attr "subregs_ok" "true")])
355 (define_insn "trunchiqi2"
356 [(set (match_operand:QI 0 "nvptx_nonimmediate_operand" "=R,m")
357 (truncate:QI (match_operand:HI 1 "nvptx_register_operand" "R,R")))]
360 %.\\tcvt%t0.u16\\t%0, %1;
361 %.\\tst%A0.u8\\t%0, %1;"
362 [(set_attr "subregs_ok" "true")])
364 (define_insn "truncsi<mode>2"
365 [(set (match_operand:QHIM 0 "nvptx_nonimmediate_operand" "=R,m")
366 (truncate:QHIM (match_operand:SI 1 "nvptx_register_operand" "R,R")))]
369 %.\\tcvt%t0.u32\\t%0, %1;
370 %.\\tst%A0.u%T0\\t%0, %1;"
371 [(set_attr "subregs_ok" "true")])
373 (define_insn "truncdi<mode>2"
374 [(set (match_operand:QHSIM 0 "nvptx_nonimmediate_operand" "=R,m")
375 (truncate:QHSIM (match_operand:DI 1 "nvptx_register_operand" "R,R")))]
378 %.\\tcvt%t0.u64\\t%0, %1;
379 %.\\tst%A0.u%T0\\t%0, %1;"
380 [(set_attr "subregs_ok" "true")])
382 ;; Integer arithmetic
384 (define_insn "add<mode>3"
385 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
386 (plus:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
387 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
389 "%.\\tadd%t0\\t%0, %1, %2;")
391 (define_insn "*vadd_addsi4"
392 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
393 (plus:SI (plus:SI (match_operand:SI 1 "nvptx_register_operand" "R")
394 (match_operand:SI 2 "nvptx_register_operand" "R"))
395 (match_operand:SI 3 "nvptx_register_operand" "R")))]
397 "%.\\tvadd%t0%t1%t2.add\\t%0, %1, %2, %3;")
399 (define_insn "*vsub_addsi4"
400 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
401 (plus:SI (minus:SI (match_operand:SI 1 "nvptx_register_operand" "R")
402 (match_operand:SI 2 "nvptx_register_operand" "R"))
403 (match_operand:SI 3 "nvptx_register_operand" "R")))]
405 "%.\\tvsub%t0%t1%t2.add\\t%0, %1, %2, %3;")
407 (define_insn "sub<mode>3"
408 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
409 (minus:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
410 (match_operand:HSDIM 2 "nvptx_register_operand" "R")))]
412 "%.\\tsub%t0\\t%0, %1, %2;")
414 (define_insn "mul<mode>3"
415 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
416 (mult:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
417 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
419 "%.\\tmul.lo%t0\\t%0, %1, %2;")
421 (define_insn "*mad<mode>3"
422 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
423 (plus:HSDIM (mult:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
424 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri"))
425 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))]
427 "%.\\tmad.lo%t0\\t%0, %1, %2, %3;")
429 (define_insn "div<mode>3"
430 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
431 (div:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
432 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
434 "%.\\tdiv.s%T0\\t%0, %1, %2;")
436 (define_insn "udiv<mode>3"
437 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
438 (udiv:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
439 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
441 "%.\\tdiv.u%T0\\t%0, %1, %2;")
443 (define_insn "mod<mode>3"
444 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
445 (mod:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "Ri")
446 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
448 "%.\\trem.s%T0\\t%0, %1, %2;")
450 (define_insn "umod<mode>3"
451 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
452 (umod:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "Ri")
453 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
455 "%.\\trem.u%T0\\t%0, %1, %2;")
457 (define_insn "smin<mode>3"
458 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
459 (smin:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
460 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
462 "%.\\tmin.s%T0\\t%0, %1, %2;")
464 (define_insn "umin<mode>3"
465 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
466 (umin:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
467 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
469 "%.\\tmin.u%T0\\t%0, %1, %2;")
471 (define_insn "smax<mode>3"
472 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
473 (smax:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
474 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
476 "%.\\tmax.s%T0\\t%0, %1, %2;")
478 (define_insn "umax<mode>3"
479 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
480 (umax:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
481 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
483 "%.\\tmax.u%T0\\t%0, %1, %2;")
485 (define_insn "abs<mode>2"
486 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
487 (abs:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")))]
489 "%.\\tabs.s%T0\\t%0, %1;")
491 (define_insn "neg<mode>2"
492 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
493 (neg:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")))]
495 "%.\\tneg.s%T0\\t%0, %1;")
497 (define_insn "one_cmpl<mode>2"
498 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
499 (not:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")))]
501 "%.\\tnot.b%T0\\t%0, %1;")
503 (define_insn "bitrev<mode>2"
504 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
505 (unspec:SDIM [(match_operand:SDIM 1 "nvptx_register_operand" "R")]
508 "%.\\tbrev.b%T0\\t%0, %1;")
510 (define_insn "clz<mode>2"
511 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
512 (clz:SI (match_operand:SDIM 1 "nvptx_register_operand" "R")))]
514 "%.\\tclz.b%T1\\t%0, %1;")
516 (define_expand "ctz<mode>2"
517 [(set (match_operand:SI 0 "nvptx_register_operand" "")
518 (ctz:SI (match_operand:SDIM 1 "nvptx_register_operand" "")))]
521 rtx tmpreg = gen_reg_rtx (<MODE>mode);
522 emit_insn (gen_bitrev<mode>2 (tmpreg, operands[1]));
523 emit_insn (gen_clz<mode>2 (operands[0], tmpreg));
527 (define_insn "popcount<mode>2"
528 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
529 (popcount:SI (match_operand:SDIM 1 "nvptx_register_operand" "R")))]
531 "%.\\tpopc.b%T1\\t%0, %1;")
533 ;; Multiplication variants
535 (define_insn "mulhisi3"
536 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
537 (mult:SI (sign_extend:SI
538 (match_operand:HI 1 "nvptx_register_operand" "R"))
540 (match_operand:HI 2 "nvptx_register_operand" "R"))))]
542 "%.\\tmul.wide.s16\\t%0, %1, %2;")
544 (define_insn "mulsidi3"
545 [(set (match_operand:DI 0 "nvptx_register_operand" "=R")
546 (mult:DI (sign_extend:DI
547 (match_operand:SI 1 "nvptx_register_operand" "R"))
549 (match_operand:SI 2 "nvptx_register_operand" "R"))))]
551 "%.\\tmul.wide.s32\\t%0, %1, %2;")
553 (define_insn "umulhisi3"
554 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
555 (mult:SI (zero_extend:SI
556 (match_operand:HI 1 "nvptx_register_operand" "R"))
558 (match_operand:HI 2 "nvptx_register_operand" "R"))))]
560 "%.\\tmul.wide.u16\\t%0, %1, %2;")
562 (define_insn "umulsidi3"
563 [(set (match_operand:DI 0 "nvptx_register_operand" "=R")
564 (mult:DI (zero_extend:DI
565 (match_operand:SI 1 "nvptx_register_operand" "R"))
567 (match_operand:SI 2 "nvptx_register_operand" "R"))))]
569 "%.\\tmul.wide.u32\\t%0, %1, %2;")
571 (define_insn "smulhi3_highpart"
572 [(set (match_operand:HI 0 "nvptx_register_operand" "=R")
575 (mult:SI (sign_extend:SI
576 (match_operand:HI 1 "nvptx_register_operand" "R"))
578 (match_operand:HI 2 "nvptx_register_operand" "R")))
581 "%.\\tmul.hi.s16\\t%0, %1, %2;")
583 (define_insn "smulsi3_highpart"
584 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
587 (mult:DI (sign_extend:DI
588 (match_operand:SI 1 "nvptx_register_operand" "R"))
590 (match_operand:SI 2 "nvptx_register_operand" "R")))
593 "%.\\tmul.hi.s32\\t%0, %1, %2;")
595 (define_insn "umulhi3_highpart"
596 [(set (match_operand:HI 0 "nvptx_register_operand" "=R")
599 (mult:SI (zero_extend:SI
600 (match_operand:HI 1 "nvptx_register_operand" "R"))
602 (match_operand:HI 2 "nvptx_register_operand" "R")))
605 "%.\\tmul.hi.u16\\t%0, %1, %2;")
607 (define_insn "umulsi3_highpart"
608 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
611 (mult:DI (zero_extend:DI
612 (match_operand:SI 1 "nvptx_register_operand" "R"))
614 (match_operand:SI 2 "nvptx_register_operand" "R")))
617 "%.\\tmul.hi.u32\\t%0, %1, %2;")
621 (define_insn "ashl<mode>3"
622 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
623 (ashift:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
624 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")))]
626 "%.\\tshl.b%T0\\t%0, %1, %2;")
628 (define_insn "ashr<mode>3"
629 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
630 (ashiftrt:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
631 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")))]
633 "%.\\tshr.s%T0\\t%0, %1, %2;")
635 (define_insn "lshr<mode>3"
636 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
637 (lshiftrt:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
638 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")))]
640 "%.\\tshr.u%T0\\t%0, %1, %2;")
642 ;; Logical operations
644 (define_insn "and<mode>3"
645 [(set (match_operand:BHSDIM 0 "nvptx_register_operand" "=R")
646 (and:BHSDIM (match_operand:BHSDIM 1 "nvptx_register_operand" "R")
647 (match_operand:BHSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
649 "%.\\tand.b%T0\\t%0, %1, %2;")
651 (define_insn "ior<mode>3"
652 [(set (match_operand:BHSDIM 0 "nvptx_register_operand" "=R")
653 (ior:BHSDIM (match_operand:BHSDIM 1 "nvptx_register_operand" "R")
654 (match_operand:BHSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
656 "%.\\tor.b%T0\\t%0, %1, %2;")
658 (define_insn "xor<mode>3"
659 [(set (match_operand:BHSDIM 0 "nvptx_register_operand" "=R")
660 (xor:BHSDIM (match_operand:BHSDIM 1 "nvptx_register_operand" "R")
661 (match_operand:BHSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
663 "%.\\txor.b%T0\\t%0, %1, %2;")
665 ;; Comparisons and branches
667 (define_insn "*cmp<mode>"
668 [(set (match_operand:BI 0 "nvptx_register_operand" "=R")
669 (match_operator:BI 1 "nvptx_comparison_operator"
670 [(match_operand:HSDIM 2 "nvptx_register_operand" "R")
671 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))]
673 "%.\\tsetp%c1\\t%0, %2, %3;")
675 (define_insn "*cmp<mode>"
676 [(set (match_operand:BI 0 "nvptx_register_operand" "=R")
677 (match_operator:BI 1 "nvptx_float_comparison_operator"
678 [(match_operand:SDFM 2 "nvptx_register_operand" "R")
679 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))]
681 "%.\\tsetp%c1\\t%0, %2, %3;")
685 (label_ref (match_operand 0 "" "")))]
689 (define_insn "br_true"
691 (if_then_else (ne (match_operand:BI 0 "nvptx_register_operand" "R")
693 (label_ref (match_operand 1 "" ""))
697 [(set_attr "predicable" "false")])
699 (define_insn "br_false"
701 (if_then_else (eq (match_operand:BI 0 "nvptx_register_operand" "R")
703 (label_ref (match_operand 1 "" ""))
707 [(set_attr "predicable" "false")])
709 ;; unified conditional branch
710 (define_insn "br_true_uni"
711 [(set (pc) (if_then_else
712 (ne (unspec:BI [(match_operand:BI 0 "nvptx_register_operand" "R")]
713 UNSPEC_BR_UNIFIED) (const_int 0))
714 (label_ref (match_operand 1 "" "")) (pc)))]
716 "%j0\\tbra.uni\\t%l1;"
717 [(set_attr "predicable" "false")])
719 (define_insn "br_false_uni"
720 [(set (pc) (if_then_else
721 (eq (unspec:BI [(match_operand:BI 0 "nvptx_register_operand" "R")]
722 UNSPEC_BR_UNIFIED) (const_int 0))
723 (label_ref (match_operand 1 "" "")) (pc)))]
725 "%J0\\tbra.uni\\t%l1;"
726 [(set_attr "predicable" "false")])
728 (define_expand "cbranch<mode>4"
730 (if_then_else (match_operator 0 "nvptx_comparison_operator"
731 [(match_operand:HSDIM 1 "nvptx_register_operand" "")
732 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "")])
733 (label_ref (match_operand 3 "" ""))
737 rtx t = nvptx_expand_compare (operands[0]);
739 operands[1] = XEXP (t, 0);
740 operands[2] = XEXP (t, 1);
743 (define_expand "cbranch<mode>4"
745 (if_then_else (match_operator 0 "nvptx_float_comparison_operator"
746 [(match_operand:SDFM 1 "nvptx_register_operand" "")
747 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "")])
748 (label_ref (match_operand 3 "" ""))
752 rtx t = nvptx_expand_compare (operands[0]);
754 operands[1] = XEXP (t, 0);
755 operands[2] = XEXP (t, 1);
758 (define_expand "cbranchbi4"
760 (if_then_else (match_operator 0 "predicate_operator"
761 [(match_operand:BI 1 "nvptx_register_operand" "")
762 (match_operand:BI 2 "const0_operand" "")])
763 (label_ref (match_operand 3 "" ""))
768 ;; Conditional stores
770 (define_insn "setcc_from_bi"
771 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
772 (ne:SI (match_operand:BI 1 "nvptx_register_operand" "R")
775 "%.\\tselp%t0 %0,-1,0,%1;")
777 (define_insn "sel_true<mode>"
778 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
780 (ne (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0))
781 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")
782 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))]
784 "%.\\tselp%t0\\t%0, %2, %3, %1;")
786 (define_insn "sel_true<mode>"
787 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
789 (ne (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0))
790 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")
791 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))]
793 "%.\\tselp%t0\\t%0, %2, %3, %1;")
795 (define_insn "sel_false<mode>"
796 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
798 (eq (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0))
799 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")
800 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))]
802 "%.\\tselp%t0\\t%0, %3, %2, %1;")
804 (define_insn "sel_false<mode>"
805 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
807 (eq (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0))
808 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")
809 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))]
811 "%.\\tselp%t0\\t%0, %3, %2, %1;")
813 (define_insn "setcc_int<mode>"
814 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
815 (match_operator:SI 1 "nvptx_comparison_operator"
816 [(match_operand:HSDIM 2 "nvptx_register_operand" "R")
817 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))]
819 "%.\\tset%t0%c1\\t%0, %2, %3;")
821 (define_insn "setcc_int<mode>"
822 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
823 (match_operator:SI 1 "nvptx_float_comparison_operator"
824 [(match_operand:SDFM 2 "nvptx_register_operand" "R")
825 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))]
827 "%.\\tset%t0%c1\\t%0, %2, %3;")
829 (define_insn "setcc_float<mode>"
830 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
831 (match_operator:SF 1 "nvptx_comparison_operator"
832 [(match_operand:HSDIM 2 "nvptx_register_operand" "R")
833 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))]
835 "%.\\tset%t0%c1\\t%0, %2, %3;")
837 (define_insn "setcc_float<mode>"
838 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
839 (match_operator:SF 1 "nvptx_float_comparison_operator"
840 [(match_operand:SDFM 2 "nvptx_register_operand" "R")
841 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))]
843 "%.\\tset%t0%c1\\t%0, %2, %3;")
845 (define_expand "cstorebi4"
846 [(set (match_operand:SI 0 "nvptx_register_operand")
847 (match_operator:SI 1 "ne_operator"
848 [(match_operand:BI 2 "nvptx_register_operand")
849 (match_operand:BI 3 "const0_operand")]))]
853 (define_expand "cstore<mode>4"
854 [(set (match_operand:SI 0 "nvptx_register_operand")
855 (match_operator:SI 1 "nvptx_comparison_operator"
856 [(match_operand:HSDIM 2 "nvptx_register_operand")
857 (match_operand:HSDIM 3 "nvptx_nonmemory_operand")]))]
861 (define_expand "cstore<mode>4"
862 [(set (match_operand:SI 0 "nvptx_register_operand")
863 (match_operator:SI 1 "nvptx_float_comparison_operator"
864 [(match_operand:SDFM 2 "nvptx_register_operand")
865 (match_operand:SDFM 3 "nvptx_nonmemory_operand")]))]
871 (define_insn "call_insn_<mode>"
872 [(match_parallel 2 "call_operation"
873 [(call (mem:QI (match_operand:P 0 "call_insn_operand" "Rs"))
874 (match_operand 1))])]
877 return nvptx_output_call_insn (insn, NULL_RTX, operands[0]);
880 (define_insn "call_value_insn_<mode>"
881 [(match_parallel 3 "call_operation"
882 [(set (match_operand 0 "nvptx_register_operand" "=R")
883 (call (mem:QI (match_operand:P 1 "call_insn_operand" "Rs"))
884 (match_operand 2)))])]
887 return nvptx_output_call_insn (insn, operands[0], operands[1]);
890 (define_expand "call"
891 [(match_operand 0 "" "")]
894 nvptx_expand_call (NULL_RTX, operands[0]);
898 (define_expand "call_value"
899 [(match_operand 0 "" "")
900 (match_operand 1 "" "")]
903 nvptx_expand_call (operands[0], operands[1]);
907 ;; Floating point arithmetic.
909 (define_insn "add<mode>3"
910 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
911 (plus:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
912 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
914 "%.\\tadd%t0\\t%0, %1, %2;")
916 (define_insn "sub<mode>3"
917 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
918 (minus:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
919 (match_operand:SDFM 2 "nvptx_register_operand" "R")))]
921 "%.\\tsub%t0\\t%0, %1, %2;")
923 (define_insn "mul<mode>3"
924 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
925 (mult:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
926 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
928 "%.\\tmul%t0\\t%0, %1, %2;")
930 (define_insn "fma<mode>4"
931 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
932 (fma:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
933 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")
934 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))]
936 "%.\\tfma%#%t0\\t%0, %1, %2, %3;")
938 (define_insn "*recip<mode>2"
939 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
941 (match_operand:SDFM 2 "const_double_operand" "F")
942 (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
943 "CONST_DOUBLE_P (operands[2])
944 && real_identical (CONST_DOUBLE_REAL_VALUE (operands[2]), &dconst1)"
945 "%.\\trcp%#%t0\\t%0, %1;")
947 (define_insn "div<mode>3"
948 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
949 (div:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
950 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
952 "%.\\tdiv%#%t0\\t%0, %1, %2;")
954 (define_insn "copysign<mode>3"
955 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
956 (unspec:SDFM [(match_operand:SDFM 1 "nvptx_register_operand" "R")
957 (match_operand:SDFM 2 "nvptx_register_operand" "R")]
960 "%.\\tcopysign%t0\\t%0, %2, %1;")
962 (define_insn "smin<mode>3"
963 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
964 (smin:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
965 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
967 "%.\\tmin%t0\\t%0, %1, %2;")
969 (define_insn "smax<mode>3"
970 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
971 (smax:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
972 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
974 "%.\\tmax%t0\\t%0, %1, %2;")
976 (define_insn "abs<mode>2"
977 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
978 (abs:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
980 "%.\\tabs%t0\\t%0, %1;")
982 (define_insn "neg<mode>2"
983 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
984 (neg:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
986 "%.\\tneg%t0\\t%0, %1;")
988 (define_insn "sqrt<mode>2"
989 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
990 (sqrt:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
992 "%.\\tsqrt%#%t0\\t%0, %1;")
994 (define_expand "sincossf3"
995 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
996 (unspec:SF [(match_operand:SF 2 "nvptx_register_operand" "R")]
998 (set (match_operand:SF 1 "nvptx_register_operand" "=R")
999 (unspec:SF [(match_dup 2)] UNSPEC_SIN))]
1000 "flag_unsafe_math_optimizations"
1002 operands[2] = make_safe_from (operands[2], operands[0]);
1005 (define_insn "sinsf2"
1006 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
1007 (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")]
1009 "flag_unsafe_math_optimizations"
1010 "%.\\tsin.approx%t0\\t%0, %1;")
1012 (define_insn "cossf2"
1013 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
1014 (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")]
1016 "flag_unsafe_math_optimizations"
1017 "%.\\tcos.approx%t0\\t%0, %1;")
1019 (define_insn "log2sf2"
1020 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
1021 (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")]
1023 "flag_unsafe_math_optimizations"
1024 "%.\\tlg2.approx%t0\\t%0, %1;")
1026 (define_insn "exp2sf2"
1027 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
1028 (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")]
1030 "flag_unsafe_math_optimizations"
1031 "%.\\tex2.approx%t0\\t%0, %1;")
1033 ;; Conversions involving floating point
1035 (define_insn "extendsfdf2"
1036 [(set (match_operand:DF 0 "nvptx_register_operand" "=R")
1037 (float_extend:DF (match_operand:SF 1 "nvptx_register_operand" "R")))]
1039 "%.\\tcvt%t0%t1\\t%0, %1;")
1041 (define_insn "truncdfsf2"
1042 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
1043 (float_truncate:SF (match_operand:DF 1 "nvptx_register_operand" "R")))]
1045 "%.\\tcvt%#%t0%t1\\t%0, %1;")
1047 (define_insn "floatunssi<mode>2"
1048 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1049 (unsigned_float:SDFM (match_operand:SI 1 "nvptx_register_operand" "R")))]
1051 "%.\\tcvt%#%t0.u%T1\\t%0, %1;")
1053 (define_insn "floatsi<mode>2"
1054 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1055 (float:SDFM (match_operand:SI 1 "nvptx_register_operand" "R")))]
1057 "%.\\tcvt%#%t0.s%T1\\t%0, %1;")
1059 (define_insn "floatunsdi<mode>2"
1060 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1061 (unsigned_float:SDFM (match_operand:DI 1 "nvptx_register_operand" "R")))]
1063 "%.\\tcvt%#%t0.u%T1\\t%0, %1;")
1065 (define_insn "floatdi<mode>2"
1066 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1067 (float:SDFM (match_operand:DI 1 "nvptx_register_operand" "R")))]
1069 "%.\\tcvt%#%t0.s%T1\\t%0, %1;")
1071 (define_insn "fixuns_trunc<mode>si2"
1072 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
1073 (unsigned_fix:SI (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
1075 "%.\\tcvt.rzi.u%T0%t1\\t%0, %1;")
1077 (define_insn "fix_trunc<mode>si2"
1078 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
1079 (fix:SI (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
1081 "%.\\tcvt.rzi.s%T0%t1\\t%0, %1;")
1083 (define_insn "fixuns_trunc<mode>di2"
1084 [(set (match_operand:DI 0 "nvptx_register_operand" "=R")
1085 (unsigned_fix:DI (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
1087 "%.\\tcvt.rzi.u%T0%t1\\t%0, %1;")
1089 (define_insn "fix_trunc<mode>di2"
1090 [(set (match_operand:DI 0 "nvptx_register_operand" "=R")
1091 (fix:DI (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
1093 "%.\\tcvt.rzi.s%T0%t1\\t%0, %1;")
1095 (define_int_iterator FPINT [UNSPEC_FPINT_FLOOR UNSPEC_FPINT_BTRUNC
1096 UNSPEC_FPINT_CEIL UNSPEC_FPINT_NEARBYINT])
1097 (define_int_attr fpint_name [(UNSPEC_FPINT_FLOOR "floor")
1098 (UNSPEC_FPINT_BTRUNC "btrunc")
1099 (UNSPEC_FPINT_CEIL "ceil")
1100 (UNSPEC_FPINT_NEARBYINT "nearbyint")])
1101 (define_int_attr fpint_roundingmode [(UNSPEC_FPINT_FLOOR ".rmi")
1102 (UNSPEC_FPINT_BTRUNC ".rzi")
1103 (UNSPEC_FPINT_CEIL ".rpi")
1104 (UNSPEC_FPINT_NEARBYINT "%#i")])
1106 (define_insn "<FPINT:fpint_name><SDFM:mode>2"
1107 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1108 (unspec:SDFM [(match_operand:SDFM 1 "nvptx_register_operand" "R")]
1111 "%.\\tcvt<FPINT:fpint_roundingmode>%t0%t1\\t%0, %1;")
1113 (define_int_iterator FPINT2 [UNSPEC_FPINT_FLOOR UNSPEC_FPINT_CEIL])
1114 (define_int_attr fpint2_name [(UNSPEC_FPINT_FLOOR "lfloor")
1115 (UNSPEC_FPINT_CEIL "lceil")])
1116 (define_int_attr fpint2_roundingmode [(UNSPEC_FPINT_FLOOR ".rmi")
1117 (UNSPEC_FPINT_CEIL ".rpi")])
1119 (define_insn "<FPINT2:fpint2_name><SDFM:mode><SDIM:mode>2"
1120 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
1121 (unspec:SDIM [(match_operand:SDFM 1 "nvptx_register_operand" "R")]
1124 "%.\\tcvt<FPINT2:fpint2_roundingmode>.s%T0%t1\\t%0, %1;")
1126 ;; Vector operations
1128 (define_insn "*vec_set<mode>_0"
1129 [(set (match_operand:VECIM 0 "nvptx_register_operand" "=R")
1131 (vec_duplicate:VECIM
1132 (match_operand:<VECELEM> 1 "nvptx_register_operand" "R"))
1136 "%.\\tmov%t1\\t%0.x, %1;")
1138 (define_insn "*vec_set<mode>_1"
1139 [(set (match_operand:VECIM 0 "nvptx_register_operand" "=R")
1141 (vec_duplicate:VECIM
1142 (match_operand:<VECELEM> 1 "nvptx_register_operand" "R"))
1146 "%.\\tmov%t1\\t%0.y, %1;")
1148 (define_insn "*vec_set<mode>_2"
1149 [(set (match_operand:VECIM 0 "nvptx_register_operand" "=R")
1151 (vec_duplicate:VECIM
1152 (match_operand:<VECELEM> 1 "nvptx_register_operand" "R"))
1156 "%.\\tmov%t1\\t%0.z, %1;")
1158 (define_insn "*vec_set<mode>_3"
1159 [(set (match_operand:VECIM 0 "nvptx_register_operand" "=R")
1161 (vec_duplicate:VECIM
1162 (match_operand:<VECELEM> 1 "nvptx_register_operand" "R"))
1166 "%.\\tmov%t1\\t%0.w, %1;")
1168 (define_expand "vec_set<mode>"
1169 [(match_operand:VECIM 0 "nvptx_register_operand")
1170 (match_operand:<VECELEM> 1 "nvptx_register_operand")
1171 (match_operand:SI 2 "nvptx_vector_index_operand")]
1174 enum machine_mode mode = GET_MODE (operands[0]);
1175 int mask = 1 << INTVAL (operands[2]);
1176 rtx tmp = gen_rtx_VEC_DUPLICATE (mode, operands[1]);
1177 tmp = gen_rtx_VEC_MERGE (mode, tmp, operands[0], GEN_INT (mask));
1178 emit_insn (gen_rtx_SET (operands[0], tmp));
1182 (define_insn "vec_extract<mode><Vecelem>"
1183 [(set (match_operand:<VECELEM> 0 "nvptx_register_operand" "=R")
1184 (vec_select:<VECELEM>
1185 (match_operand:VECIM 1 "nvptx_register_operand" "R")
1186 (parallel [(match_operand:SI 2 "nvptx_vector_index_operand" "")])))]
1189 static const char *const asms[4] = {
1190 "%.\\tmov%t0\\t%0, %1.x;",
1191 "%.\\tmov%t0\\t%0, %1.y;",
1192 "%.\\tmov%t0\\t%0, %1.z;",
1193 "%.\\tmov%t0\\t%0, %1.w;"
1195 return asms[INTVAL (operands[2])];
1210 (define_insn "fake_nop"
1214 .reg .u32 %%nop_src;
1215 .reg .u32 %%nop_dst;
1216 mov.u32 %%nop_dst, %%nop_src;
1219 (define_insn "return"
1223 return nvptx_output_return ();
1225 [(set_attr "predicable" "false")])
1227 (define_expand "epilogue"
1228 [(clobber (const_int 0))]
1231 if (TARGET_SOFT_STACK)
1232 emit_insn (gen_set_softstack (Pmode, gen_rtx_REG (Pmode,
1233 SOFTSTACK_PREV_REGNUM)));
1234 emit_jump_insn (gen_return ());
1238 (define_expand "nonlocal_goto"
1239 [(match_operand 0 "" "")
1240 (match_operand 1 "" "")
1241 (match_operand 2 "" "")
1242 (match_operand 3 "" "")]
1245 sorry ("target cannot support nonlocal goto.");
1246 emit_insn (gen_nop ());
1250 (define_expand "nonlocal_goto_receiver"
1254 sorry ("target cannot support nonlocal goto.");
1257 (define_expand "allocate_stack"
1258 [(match_operand 0 "nvptx_register_operand")
1259 (match_operand 1 "nvptx_register_operand")]
1262 if (TARGET_SOFT_STACK)
1264 emit_move_insn (stack_pointer_rtx,
1265 gen_rtx_MINUS (Pmode, stack_pointer_rtx, operands[1]));
1266 emit_insn (gen_set_softstack (Pmode, stack_pointer_rtx));
1267 emit_move_insn (operands[0], virtual_stack_dynamic_rtx);
1270 /* The ptx documentation specifies an alloca intrinsic (for 32 bit
1271 only) but notes it is not implemented. The assembler emits a
1272 confused error message. Issue a blunt one now instead. */
1273 sorry ("target cannot support alloca.");
1274 emit_insn (gen_nop ());
1278 (define_insn "@set_softstack_<mode>"
1279 [(unspec [(match_operand:P 0 "nvptx_register_operand" "R")]
1280 UNSPEC_SET_SOFTSTACK)]
1283 return nvptx_output_set_softstack (REGNO (operands[0]));
1286 (define_expand "restore_stack_block"
1287 [(match_operand 0 "register_operand" "")
1288 (match_operand 1 "register_operand" "")]
1291 if (TARGET_SOFT_STACK)
1293 emit_move_insn (operands[0], operands[1]);
1294 emit_insn (gen_set_softstack (Pmode, operands[0]));
1299 (define_expand "restore_stack_function"
1300 [(match_operand 0 "register_operand" "")
1301 (match_operand 1 "register_operand" "")]
1308 [(trap_if (const_int 1) (const_int 0))]
1312 (define_insn "trap_if_true"
1313 [(trap_if (ne (match_operand:BI 0 "nvptx_register_operand" "R")
1317 "%j0 trap; %j0 exit;"
1318 [(set_attr "predicable" "false")])
1320 (define_insn "trap_if_false"
1321 [(trap_if (eq (match_operand:BI 0 "nvptx_register_operand" "R")
1325 "%J0 trap; %J0 exit;"
1326 [(set_attr "predicable" "false")])
1328 (define_expand "ctrap<mode>4"
1329 [(trap_if (match_operator 0 "nvptx_comparison_operator"
1330 [(match_operand:SDIM 1 "nvptx_register_operand")
1331 (match_operand:SDIM 2 "nvptx_nonmemory_operand")])
1332 (match_operand 3 "const0_operand"))]
1335 rtx t = nvptx_expand_compare (operands[0]);
1336 emit_insn (gen_trap_if_true (t));
1340 (define_insn "oacc_dim_size"
1341 [(set (match_operand:SI 0 "nvptx_register_operand" "")
1342 (unspec:SI [(match_operand:SI 1 "const_int_operand" "")]
1346 static const char *const asms[] =
1347 { /* Must match oacc_loop_levels ordering. */
1348 "%.\\tmov.u32\\t%0, %%nctaid.x;", /* gang */
1349 "%.\\tmov.u32\\t%0, %%ntid.y;", /* worker */
1350 "%.\\tmov.u32\\t%0, %%ntid.x;", /* vector */
1352 return asms[INTVAL (operands[1])];
1355 (define_insn "oacc_dim_pos"
1356 [(set (match_operand:SI 0 "nvptx_register_operand" "")
1357 (unspec_volatile:SI [(match_operand:SI 1 "const_int_operand" "")]
1361 static const char *const asms[] =
1362 { /* Must match oacc_loop_levels ordering. */
1363 "%.\\tmov.u32\\t%0, %%ctaid.x;", /* gang */
1364 "%.\\tmov.u32\\t%0, %%tid.y;", /* worker */
1365 "%.\\tmov.u32\\t%0, %%tid.x;", /* vector */
1367 return asms[INTVAL (operands[1])];
1370 (define_insn "nvptx_fork"
1371 [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
1375 [(set_attr "predicable" "false")])
1377 (define_insn "nvptx_forked"
1378 [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
1382 [(set_attr "predicable" "false")])
1384 (define_insn "nvptx_joining"
1385 [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
1389 [(set_attr "predicable" "false")])
1391 (define_insn "nvptx_join"
1392 [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
1396 [(set_attr "predicable" "false")])
1398 (define_expand "oacc_fork"
1399 [(set (match_operand:SI 0 "nvptx_nonmemory_operand" "")
1400 (match_operand:SI 1 "general_operand" ""))
1401 (unspec_volatile:SI [(match_operand:SI 2 "const_int_operand" "")]
1405 if (operands[0] != const0_rtx)
1406 emit_move_insn (operands[0], operands[1]);
1407 nvptx_expand_oacc_fork (INTVAL (operands[2]));
1411 (define_expand "oacc_join"
1412 [(set (match_operand:SI 0 "nvptx_nonmemory_operand" "")
1413 (match_operand:SI 1 "general_operand" ""))
1414 (unspec_volatile:SI [(match_operand:SI 2 "const_int_operand" "")]
1418 if (operands[0] != const0_rtx)
1419 emit_move_insn (operands[0], operands[1]);
1420 nvptx_expand_oacc_join (INTVAL (operands[2]));
1424 ;; only 32-bit shuffles exist.
1425 (define_insn "nvptx_shuffle<mode>"
1426 [(set (match_operand:BITS 0 "nvptx_register_operand" "=R")
1428 [(match_operand:BITS 1 "nvptx_register_operand" "R")
1429 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")
1430 (match_operand:SI 3 "const_int_operand" "n")]
1433 "%.\\tshfl%S3.b32\\t%0, %1, %2, 31;")
1435 (define_insn "nvptx_vote_ballot"
1436 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
1437 (unspec:SI [(match_operand:BI 1 "nvptx_register_operand" "R")]
1438 UNSPEC_VOTE_BALLOT))]
1440 "%.\\tvote.ballot.b32\\t%0, %1;")
1442 ;; Patterns for OpenMP SIMD-via-SIMT lowering
1444 (define_insn "@omp_simt_enter_<mode>"
1445 [(set (match_operand:P 0 "nvptx_register_operand" "=R")
1446 (unspec_volatile:P [(match_operand:P 1 "nvptx_nonmemory_operand" "Ri")
1447 (match_operand:P 2 "nvptx_nonmemory_operand" "Ri")]
1448 UNSPECV_SIMT_ENTER))]
1451 return nvptx_output_simt_enter (operands[0], operands[1], operands[2]);
1454 (define_expand "omp_simt_enter"
1455 [(match_operand 0 "nvptx_register_operand" "=R")
1456 (match_operand 1 "nvptx_nonmemory_operand" "Ri")
1457 (match_operand 2 "const_int_operand" "n")]
1460 if (!CONST_INT_P (operands[1]))
1461 cfun->machine->simt_stack_size = HOST_WIDE_INT_M1U;
1463 cfun->machine->simt_stack_size = MAX (UINTVAL (operands[1]),
1464 cfun->machine->simt_stack_size);
1465 cfun->machine->simt_stack_align = MAX (UINTVAL (operands[2]),
1466 cfun->machine->simt_stack_align);
1467 cfun->machine->has_simtreg = true;
1468 emit_insn (gen_omp_simt_enter (Pmode, operands[0], operands[1], operands[2]));
1472 (define_expand "omp_simt_exit"
1473 [(match_operand 0 "nvptx_register_operand" "R")]
1476 emit_insn (gen_omp_simt_exit (Pmode, operands[0]));
1480 (define_insn "@omp_simt_exit_<mode>"
1481 [(unspec_volatile [(match_operand:P 0 "nvptx_register_operand" "R")]
1485 return nvptx_output_simt_exit (operands[0]);
1488 ;; Implement IFN_GOMP_SIMT_LANE: set operand 0 to lane index
1489 (define_insn "omp_simt_lane"
1490 [(set (match_operand:SI 0 "nvptx_register_operand" "")
1491 (unspec:SI [(const_int 0)] UNSPEC_LANEID))]
1493 "%.\\tmov.u32\\t%0, %%laneid;")
1495 ;; Implement IFN_GOMP_SIMT_ORDERED: copy operand 1 to operand 0 and
1496 ;; place a compiler barrier to disallow unrolling/peeling the containing loop
1497 (define_expand "omp_simt_ordered"
1498 [(match_operand:SI 0 "nvptx_register_operand" "=R")
1499 (match_operand:SI 1 "nvptx_register_operand" "R")]
1502 emit_move_insn (operands[0], operands[1]);
1503 emit_insn (gen_nvptx_nounroll ());
1507 ;; Implement IFN_GOMP_SIMT_XCHG_BFLY: perform a "butterfly" exchange
1509 (define_expand "omp_simt_xchg_bfly"
1510 [(match_operand 0 "nvptx_register_operand" "=R")
1511 (match_operand 1 "nvptx_register_operand" "R")
1512 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")]
1515 emit_insn (nvptx_gen_shuffle (operands[0], operands[1], operands[2],
1520 ;; Implement IFN_GOMP_SIMT_XCHG_IDX: broadcast value in operand 1
1521 ;; from lane given by index in operand 2 to operand 0 in all lanes
1522 (define_expand "omp_simt_xchg_idx"
1523 [(match_operand 0 "nvptx_register_operand" "=R")
1524 (match_operand 1 "nvptx_register_operand" "R")
1525 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")]
1528 emit_insn (nvptx_gen_shuffle (operands[0], operands[1], operands[2],
1533 ;; Implement IFN_GOMP_SIMT_VOTE_ANY:
1534 ;; set operand 0 to zero iff all lanes supply zero in operand 1
1535 (define_expand "omp_simt_vote_any"
1536 [(match_operand:SI 0 "nvptx_register_operand" "=R")
1537 (match_operand:SI 1 "nvptx_register_operand" "R")]
1540 rtx pred = gen_reg_rtx (BImode);
1541 emit_move_insn (pred, gen_rtx_NE (BImode, operands[1], const0_rtx));
1542 emit_insn (gen_nvptx_vote_ballot (operands[0], pred));
1546 ;; Implement IFN_GOMP_SIMT_LAST_LANE:
1547 ;; set operand 0 to the lowest lane index that passed non-zero in operand 1
1548 (define_expand "omp_simt_last_lane"
1549 [(match_operand:SI 0 "nvptx_register_operand" "=R")
1550 (match_operand:SI 1 "nvptx_register_operand" "R")]
1553 rtx pred = gen_reg_rtx (BImode);
1554 rtx tmp = gen_reg_rtx (SImode);
1555 emit_move_insn (pred, gen_rtx_NE (BImode, operands[1], const0_rtx));
1556 emit_insn (gen_nvptx_vote_ballot (tmp, pred));
1557 emit_insn (gen_ctzsi2 (operands[0], tmp));
1561 ;; extract parts of a 64 bit object into 2 32-bit ints
1562 (define_insn "unpack<mode>si2"
1563 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
1564 (unspec:SI [(match_operand:BITD 2 "nvptx_register_operand" "R")
1565 (const_int 0)] UNSPEC_BIT_CONV))
1566 (set (match_operand:SI 1 "nvptx_register_operand" "=R")
1567 (unspec:SI [(match_dup 2) (const_int 1)] UNSPEC_BIT_CONV))]
1569 "%.\\tmov.b64\\t{%0,%1}, %2;")
1571 ;; pack 2 32-bit ints into a 64 bit object
1572 (define_insn "packsi<mode>2"
1573 [(set (match_operand:BITD 0 "nvptx_register_operand" "=R")
1574 (unspec:BITD [(match_operand:SI 1 "nvptx_register_operand" "R")
1575 (match_operand:SI 2 "nvptx_register_operand" "R")]
1578 "%.\\tmov.b64\\t%0, {%1,%2};")
1582 (define_expand "atomic_compare_and_swap<mode>"
1583 [(match_operand:SI 0 "nvptx_register_operand") ;; bool success output
1584 (match_operand:SDIM 1 "nvptx_register_operand") ;; oldval output
1585 (match_operand:SDIM 2 "memory_operand") ;; memory
1586 (match_operand:SDIM 3 "nvptx_register_operand") ;; expected input
1587 (match_operand:SDIM 4 "nvptx_register_operand") ;; newval input
1588 (match_operand:SI 5 "const_int_operand") ;; is_weak
1589 (match_operand:SI 6 "const_int_operand") ;; success model
1590 (match_operand:SI 7 "const_int_operand")] ;; failure model
1593 emit_insn (gen_atomic_compare_and_swap<mode>_1
1594 (operands[1], operands[2], operands[3], operands[4], operands[6]));
1596 rtx cond = gen_reg_rtx (BImode);
1597 emit_move_insn (cond, gen_rtx_EQ (BImode, operands[1], operands[3]));
1598 emit_insn (gen_sel_truesi (operands[0], cond, GEN_INT (1), GEN_INT (0)));
1602 (define_insn "atomic_compare_and_swap<mode>_1"
1603 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
1604 (unspec_volatile:SDIM
1605 [(match_operand:SDIM 1 "memory_operand" "+m")
1606 (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri")
1607 (match_operand:SDIM 3 "nvptx_nonmemory_operand" "Ri")
1608 (match_operand:SI 4 "const_int_operand")]
1611 (unspec_volatile:SDIM [(const_int 0)] UNSPECV_CAS))]
1613 "%.\\tatom%A1.cas.b%T0\\t%0, %1, %2, %3;"
1614 [(set_attr "atomic" "true")])
1616 (define_insn "atomic_exchange<mode>"
1617 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") ;; output
1618 (unspec_volatile:SDIM
1619 [(match_operand:SDIM 1 "memory_operand" "+m") ;; memory
1620 (match_operand:SI 3 "const_int_operand")] ;; model
1623 (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri"))] ;; input
1625 "%.\\tatom%A1.exch.b%T0\\t%0, %1, %2;"
1626 [(set_attr "atomic" "true")])
1628 (define_insn "atomic_fetch_add<mode>"
1629 [(set (match_operand:SDIM 1 "memory_operand" "+m")
1630 (unspec_volatile:SDIM
1631 [(plus:SDIM (match_dup 1)
1632 (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri"))
1633 (match_operand:SI 3 "const_int_operand")] ;; model
1635 (set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
1638 "%.\\tatom%A1.add%t0\\t%0, %1, %2;"
1639 [(set_attr "atomic" "true")])
1641 (define_insn "atomic_fetch_addsf"
1642 [(set (match_operand:SF 1 "memory_operand" "+m")
1644 [(plus:SF (match_dup 1)
1645 (match_operand:SF 2 "nvptx_nonmemory_operand" "RF"))
1646 (match_operand:SI 3 "const_int_operand")] ;; model
1648 (set (match_operand:SF 0 "nvptx_register_operand" "=R")
1651 "%.\\tatom%A1.add%t0\\t%0, %1, %2;"
1652 [(set_attr "atomic" "true")])
1654 (define_code_iterator any_logic [and ior xor])
1655 (define_code_attr logic [(and "and") (ior "or") (xor "xor")])
1657 (define_insn "atomic_fetch_<logic><mode>"
1658 [(set (match_operand:SDIM 1 "memory_operand" "+m")
1659 (unspec_volatile:SDIM
1660 [(any_logic:SDIM (match_dup 1)
1661 (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri"))
1662 (match_operand:SI 3 "const_int_operand")] ;; model
1664 (set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
1666 "<MODE>mode == SImode || TARGET_SM35"
1667 "%.\\tatom%A1.b%T0.<logic>\\t%0, %1, %2;"
1668 [(set_attr "atomic" "true")])
1670 (define_insn "nvptx_barsync"
1671 [(unspec_volatile [(match_operand:SI 0 "nvptx_nonmemory_operand" "Ri")
1672 (match_operand:SI 1 "const_int_operand")]
1676 if (INTVAL (operands[1]) == 0)
1677 return "\\tbar.sync\\t%0;";
1679 return "\\tbar.sync\\t%0, %1;";
1681 [(set_attr "predicable" "false")])
1683 (define_expand "memory_barrier"
1685 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR))]
1688 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
1689 MEM_VOLATILE_P (operands[0]) = 1;
1692 ;; Ptx defines the memory barriers membar.cta, membar.gl and membar.sys
1693 ;; (corresponding to cuda functions threadfence_block, threadfence and
1694 ;; threadfence_system). For the insn memory_barrier we use membar.sys. This
1695 ;; may be overconservative, but before using membar.gl instead we'll need to
1696 ;; explain in detail why it's safe to use. For now, use membar.sys.
1697 (define_insn "*memory_barrier"
1698 [(set (match_operand:BLK 0 "" "")
1699 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR))]
1702 [(set_attr "predicable" "false")])
1704 (define_expand "nvptx_membar_cta"
1706 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR_CTA))]
1709 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
1710 MEM_VOLATILE_P (operands[0]) = 1;
1713 (define_insn "*nvptx_membar_cta"
1714 [(set (match_operand:BLK 0 "" "")
1715 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR_CTA))]
1718 [(set_attr "predicable" "false")])
1720 (define_insn "nvptx_nounroll"
1721 [(unspec_volatile [(const_int 0)] UNSPECV_NOUNROLL)]
1723 "\\t.pragma \\\"nounroll\\\";"
1724 [(set_attr "predicable" "false")])
1726 (define_insn "nvptx_red_partition"
1727 [(set (match_operand:DI 0 "nonimmediate_operand" "=R")
1728 (unspec_volatile:DI [(match_operand:DI 1 "const_int_operand")]
1732 return nvptx_output_red_partition (operands[0], operands[1]);
1734 [(set_attr "predicable" "false")])