]>
Commit | Line | Data |
---|---|---|
8ce80784 | 1 | ;; Machine description for NVPTX. |
fbd26352 | 2 | ;; Copyright (C) 2014-2019 Free Software Foundation, Inc. |
8ce80784 | 3 | ;; Contributed by Bernd Schmidt <bernds@codesourcery.com> |
4 | ;; | |
5 | ;; This file is part of GCC. | |
6 | ;; | |
7 | ;; GCC is free software; you can redistribute it and/or modify | |
8 | ;; it under the terms of the GNU General Public License as published by | |
9 | ;; the Free Software Foundation; either version 3, or (at your option) | |
10 | ;; any later version. | |
11 | ;; | |
12 | ;; GCC is distributed in the hope that it will be useful, | |
13 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | ;; GNU General Public License for more details. | |
16 | ;; | |
17 | ;; You should have received a copy of the GNU General Public License | |
18 | ;; along with GCC; see the file COPYING3. If not see | |
19 | ;; <http://www.gnu.org/licenses/>. | |
20 | ||
21 | (define_c_enum "unspec" [ | |
22 | UNSPEC_ARG_REG | |
8ce80784 | 23 | |
8ce80784 | 24 | UNSPEC_COPYSIGN |
25 | UNSPEC_LOG2 | |
26 | UNSPEC_EXP2 | |
27 | UNSPEC_SIN | |
28 | UNSPEC_COS | |
29 | ||
30 | UNSPEC_FPINT_FLOOR | |
31 | UNSPEC_FPINT_BTRUNC | |
32 | UNSPEC_FPINT_CEIL | |
33 | UNSPEC_FPINT_NEARBYINT | |
34 | ||
35 | UNSPEC_BITREV | |
36 | ||
37 | UNSPEC_ALLOCA | |
38 | ||
7fce8768 | 39 | UNSPEC_SET_SOFTSTACK |
40 | ||
b3787ae4 | 41 | UNSPEC_DIM_SIZE |
42 | ||
b3787ae4 | 43 | UNSPEC_BIT_CONV |
44 | ||
7fce8768 | 45 | UNSPEC_VOTE_BALLOT |
46 | ||
47 | UNSPEC_LANEID | |
48 | ||
b3787ae4 | 49 | UNSPEC_SHUFFLE |
50 | UNSPEC_BR_UNIFIED | |
8ce80784 | 51 | ]) |
52 | ||
53 | (define_c_enum "unspecv" [ | |
54 | UNSPECV_LOCK | |
55 | UNSPECV_CAS | |
56 | UNSPECV_XCHG | |
b3787ae4 | 57 | UNSPECV_BARSYNC |
e163ceb5 | 58 | UNSPECV_MEMBAR |
19b20a5c | 59 | UNSPECV_MEMBAR_CTA |
b3787ae4 | 60 | UNSPECV_DIM_POS |
61 | ||
62 | UNSPECV_FORK | |
63 | UNSPECV_FORKED | |
64 | UNSPECV_JOINING | |
65 | UNSPECV_JOIN | |
7fce8768 | 66 | |
67 | UNSPECV_NOUNROLL | |
1b576300 | 68 | |
69 | UNSPECV_SIMT_ENTER | |
70 | UNSPECV_SIMT_EXIT | |
ed42202d | 71 | |
72 | UNSPECV_RED_PART | |
8ce80784 | 73 | ]) |
74 | ||
75 | (define_attr "subregs_ok" "false,true" | |
76 | (const_string "false")) | |
77 | ||
7fce8768 | 78 | (define_attr "atomic" "false,true" |
79 | (const_string "false")) | |
80 | ||
9224dd1f | 81 | ;; The nvptx operand predicates, in general, don't permit subregs and |
82 | ;; only literal constants, which differ from the generic ones, which | |
83 | ;; permit subregs and symbolc constants (as appropriate) | |
8ce80784 | 84 | (define_predicate "nvptx_register_operand" |
f8e6fa1d | 85 | (match_code "reg") |
8ce80784 | 86 | { |
8ce80784 | 87 | return register_operand (op, mode); |
88 | }) | |
89 | ||
50ad9277 | 90 | (define_predicate "nvptx_nonimmediate_operand" |
f8e6fa1d | 91 | (match_code "mem,reg") |
8ce80784 | 92 | { |
6bd291cd | 93 | return (REG_P (op) ? register_operand (op, mode) |
94 | : memory_operand (op, mode)); | |
8ce80784 | 95 | }) |
96 | ||
8ce80784 | 97 | (define_predicate "nvptx_nonmemory_operand" |
f8e6fa1d | 98 | (match_code "reg,const_int,const_double") |
8ce80784 | 99 | { |
6bd291cd | 100 | return (REG_P (op) ? register_operand (op, mode) |
101 | : immediate_operand (op, mode)); | |
8ce80784 | 102 | }) |
103 | ||
8ce80784 | 104 | (define_predicate "const0_operand" |
105 | (and (match_code "const_int") | |
106 | (match_test "op == const0_rtx"))) | |
107 | ||
108 | ;; True if this operator is valid for predication. | |
109 | (define_predicate "predicate_operator" | |
110 | (match_code "eq,ne")) | |
111 | ||
112 | (define_predicate "ne_operator" | |
113 | (match_code "ne")) | |
114 | ||
115 | (define_predicate "nvptx_comparison_operator" | |
116 | (match_code "eq,ne,le,ge,lt,gt,leu,geu,ltu,gtu")) | |
117 | ||
118 | (define_predicate "nvptx_float_comparison_operator" | |
119 | (match_code "eq,ne,le,ge,lt,gt,uneq,unle,unge,unlt,ungt,unordered,ordered")) | |
120 | ||
121 | ;; Test for a valid operand for a call instruction. | |
f8e6fa1d | 122 | (define_predicate "call_insn_operand" |
8ce80784 | 123 | (match_code "symbol_ref,reg") |
124 | { | |
50ad9277 | 125 | return REG_P (op) || SYMBOL_REF_FUNCTION_P (op); |
8ce80784 | 126 | }) |
127 | ||
128 | ;; Return true if OP is a call with parallel USEs of the argument | |
129 | ;; pseudos. | |
130 | (define_predicate "call_operation" | |
131 | (match_code "parallel") | |
132 | { | |
b27697ca | 133 | int arg_end = XVECLEN (op, 0); |
8ce80784 | 134 | |
b27697ca | 135 | for (int i = 1; i < arg_end; i++) |
8ce80784 | 136 | { |
137 | rtx elt = XVECEXP (op, 0, i); | |
8ce80784 | 138 | |
50ad9277 | 139 | if (GET_CODE (elt) != USE || !REG_P (XEXP (elt, 0))) |
8ce80784 | 140 | return false; |
141 | } | |
142 | return true; | |
143 | }) | |
144 | ||
7fce8768 | 145 | (define_attr "predicable" "false,true" |
146 | (const_string "true")) | |
147 | ||
148 | (define_cond_exec | |
149 | [(match_operator 0 "predicate_operator" | |
150 | [(match_operand:BI 1 "nvptx_register_operand" "") | |
151 | (match_operand:BI 2 "const0_operand" "")])] | |
152 | "" | |
153 | "" | |
154 | ) | |
155 | ||
8ce80784 | 156 | (define_constraint "P0" |
157 | "An integer with the value 0." | |
158 | (and (match_code "const_int") | |
159 | (match_test "ival == 0"))) | |
160 | ||
161 | (define_constraint "P1" | |
162 | "An integer with the value 1." | |
163 | (and (match_code "const_int") | |
164 | (match_test "ival == 1"))) | |
165 | ||
166 | (define_constraint "Pn" | |
167 | "An integer with the value -1." | |
168 | (and (match_code "const_int") | |
169 | (match_test "ival == -1"))) | |
170 | ||
171 | (define_constraint "R" | |
172 | "A pseudo register." | |
173 | (match_code "reg")) | |
174 | ||
175 | (define_constraint "Ia" | |
176 | "Any integer constant." | |
177 | (and (match_code "const_int") (match_test "true"))) | |
178 | ||
179 | (define_mode_iterator QHSDISDFM [QI HI SI DI SF DF]) | |
180 | (define_mode_iterator QHSDIM [QI HI SI DI]) | |
181 | (define_mode_iterator HSDIM [HI SI DI]) | |
182 | (define_mode_iterator BHSDIM [BI HI SI DI]) | |
183 | (define_mode_iterator SDIM [SI DI]) | |
184 | (define_mode_iterator SDISDFM [SI DI SF DF]) | |
185 | (define_mode_iterator QHIM [QI HI]) | |
186 | (define_mode_iterator QHSIM [QI HI SI]) | |
187 | (define_mode_iterator SDFM [SF DF]) | |
188 | (define_mode_iterator SDCM [SC DC]) | |
b3787ae4 | 189 | (define_mode_iterator BITS [SI SF]) |
190 | (define_mode_iterator BITD [DI DF]) | |
ffaae5bd | 191 | (define_mode_iterator VECIM [V2SI V2DI]) |
8ce80784 | 192 | |
193 | ;; This mode iterator allows :P to be used for patterns that operate on | |
194 | ;; pointer-sized quantities. Exactly one of the two alternatives will match. | |
195 | (define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")]) | |
196 | ||
197 | ;; We should get away with not defining memory alternatives, since we don't | |
198 | ;; get variables in this mode and pseudos are never spilled. | |
199 | (define_insn "movbi" | |
200 | [(set (match_operand:BI 0 "nvptx_register_operand" "=R,R,R") | |
201 | (match_operand:BI 1 "nvptx_nonmemory_operand" "R,P0,Pn"))] | |
202 | "" | |
203 | "@ | |
204 | %.\\tmov%t0\\t%0, %1; | |
205 | %.\\tsetp.eq.u32\\t%0, 1, 0; | |
206 | %.\\tsetp.eq.u32\\t%0, 1, 1;") | |
207 | ||
fcac805e | 208 | (define_insn "*mov<mode>_insn" |
209 | [(set (match_operand:VECIM 0 "nonimmediate_operand" "=R,R,m") | |
210 | (match_operand:VECIM 1 "general_operand" "Ri,m,R"))] | |
211 | "!MEM_P (operands[0]) || REG_P (operands[1])" | |
212 | { | |
213 | if (which_alternative == 1) | |
214 | return "%.\\tld%A1%u1\\t%0, %1;"; | |
215 | if (which_alternative == 2) | |
216 | return "%.\\tst%A0%u0\\t%0, %1;"; | |
217 | ||
218 | return nvptx_output_mov_insn (operands[0], operands[1]); | |
219 | } | |
220 | [(set_attr "subregs_ok" "true")]) | |
221 | ||
8ce80784 | 222 | (define_insn "*mov<mode>_insn" |
6bd291cd | 223 | [(set (match_operand:QHSDIM 0 "nonimmediate_operand" "=R,R,m") |
6196ad64 | 224 | (match_operand:QHSDIM 1 "general_operand" "Ri,m,R"))] |
6bd291cd | 225 | "!MEM_P (operands[0]) || REG_P (operands[1])" |
8ce80784 | 226 | { |
6196ad64 | 227 | if (which_alternative == 1) |
8ce80784 | 228 | return "%.\\tld%A1%u1\\t%0, %1;"; |
6196ad64 | 229 | if (which_alternative == 2) |
8ce80784 | 230 | return "%.\\tst%A0%u0\\t%0, %1;"; |
231 | ||
6196ad64 | 232 | return nvptx_output_mov_insn (operands[0], operands[1]); |
8ce80784 | 233 | } |
234 | [(set_attr "subregs_ok" "true")]) | |
235 | ||
236 | (define_insn "*mov<mode>_insn" | |
6bd291cd | 237 | [(set (match_operand:SDFM 0 "nonimmediate_operand" "=R,R,m") |
8ce80784 | 238 | (match_operand:SDFM 1 "general_operand" "RF,m,R"))] |
6196ad64 | 239 | "!MEM_P (operands[0]) || REG_P (operands[1])" |
8ce80784 | 240 | { |
241 | if (which_alternative == 1) | |
242 | return "%.\\tld%A1%u0\\t%0, %1;"; | |
243 | if (which_alternative == 2) | |
244 | return "%.\\tst%A0%u1\\t%0, %1;"; | |
245 | ||
6196ad64 | 246 | return nvptx_output_mov_insn (operands[0], operands[1]); |
8ce80784 | 247 | } |
248 | [(set_attr "subregs_ok" "true")]) | |
249 | ||
250 | (define_insn "load_arg_reg<mode>" | |
251 | [(set (match_operand:QHIM 0 "nvptx_register_operand" "=R") | |
18cefec0 | 252 | (unspec:QHIM [(match_operand 1 "const_int_operand" "n")] |
8ce80784 | 253 | UNSPEC_ARG_REG))] |
254 | "" | |
255 | "%.\\tcvt%t0.u32\\t%0, %%ar%1;") | |
256 | ||
257 | (define_insn "load_arg_reg<mode>" | |
258 | [(set (match_operand:SDISDFM 0 "nvptx_register_operand" "=R") | |
18cefec0 | 259 | (unspec:SDISDFM [(match_operand 1 "const_int_operand" "n")] |
8ce80784 | 260 | UNSPEC_ARG_REG))] |
261 | "" | |
262 | "%.\\tmov%t0\\t%0, %%ar%1;") | |
263 | ||
fcac805e | 264 | (define_expand "mov<mode>" |
265 | [(set (match_operand:VECIM 0 "nonimmediate_operand" "") | |
266 | (match_operand:VECIM 1 "general_operand" ""))] | |
267 | "" | |
268 | { | |
269 | if (MEM_P (operands[0]) && !REG_P (operands[1])) | |
270 | { | |
271 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
272 | emit_move_insn (tmp, operands[1]); | |
273 | emit_move_insn (operands[0], tmp); | |
274 | DONE; | |
275 | } | |
276 | }) | |
277 | ||
8ce80784 | 278 | (define_expand "mov<mode>" |
6bd291cd | 279 | [(set (match_operand:QHSDISDFM 0 "nonimmediate_operand" "") |
8ce80784 | 280 | (match_operand:QHSDISDFM 1 "general_operand" ""))] |
281 | "" | |
282 | { | |
9224dd1f | 283 | if (MEM_P (operands[0]) && !REG_P (operands[1])) |
8ce80784 | 284 | { |
285 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
286 | emit_move_insn (tmp, operands[1]); | |
287 | emit_move_insn (operands[0], tmp); | |
288 | DONE; | |
289 | } | |
1ab41d0d | 290 | |
291 | if (GET_CODE (operands[1]) == LABEL_REF) | |
292 | sorry ("target cannot support label values"); | |
8ce80784 | 293 | }) |
294 | ||
8ce80784 | 295 | (define_insn "zero_extendqihi2" |
296 | [(set (match_operand:HI 0 "nvptx_register_operand" "=R,R") | |
50ad9277 | 297 | (zero_extend:HI (match_operand:QI 1 "nvptx_nonimmediate_operand" "R,m")))] |
8ce80784 | 298 | "" |
299 | "@ | |
300 | %.\\tcvt.u16.u%T1\\t%0, %1; | |
301 | %.\\tld%A1.u8\\t%0, %1;" | |
302 | [(set_attr "subregs_ok" "true")]) | |
303 | ||
304 | (define_insn "zero_extend<mode>si2" | |
305 | [(set (match_operand:SI 0 "nvptx_register_operand" "=R,R") | |
50ad9277 | 306 | (zero_extend:SI (match_operand:QHIM 1 "nvptx_nonimmediate_operand" "R,m")))] |
8ce80784 | 307 | "" |
308 | "@ | |
309 | %.\\tcvt.u32.u%T1\\t%0, %1; | |
310 | %.\\tld%A1.u%T1\\t%0, %1;" | |
311 | [(set_attr "subregs_ok" "true")]) | |
312 | ||
313 | (define_insn "zero_extend<mode>di2" | |
314 | [(set (match_operand:DI 0 "nvptx_register_operand" "=R,R") | |
50ad9277 | 315 | (zero_extend:DI (match_operand:QHSIM 1 "nvptx_nonimmediate_operand" "R,m")))] |
8ce80784 | 316 | "" |
317 | "@ | |
318 | %.\\tcvt.u64.u%T1\\t%0, %1; | |
319 | %.\\tld%A1%u1\\t%0, %1;" | |
320 | [(set_attr "subregs_ok" "true")]) | |
321 | ||
322 | (define_insn "extend<mode>si2" | |
323 | [(set (match_operand:SI 0 "nvptx_register_operand" "=R,R") | |
50ad9277 | 324 | (sign_extend:SI (match_operand:QHIM 1 "nvptx_nonimmediate_operand" "R,m")))] |
8ce80784 | 325 | "" |
326 | "@ | |
327 | %.\\tcvt.s32.s%T1\\t%0, %1; | |
328 | %.\\tld%A1.s%T1\\t%0, %1;" | |
329 | [(set_attr "subregs_ok" "true")]) | |
330 | ||
331 | (define_insn "extend<mode>di2" | |
332 | [(set (match_operand:DI 0 "nvptx_register_operand" "=R,R") | |
50ad9277 | 333 | (sign_extend:DI (match_operand:QHSIM 1 "nvptx_nonimmediate_operand" "R,m")))] |
8ce80784 | 334 | "" |
335 | "@ | |
336 | %.\\tcvt.s64.s%T1\\t%0, %1; | |
337 | %.\\tld%A1.s%T1\\t%0, %1;" | |
338 | [(set_attr "subregs_ok" "true")]) | |
339 | ||
340 | (define_insn "trunchiqi2" | |
50ad9277 | 341 | [(set (match_operand:QI 0 "nvptx_nonimmediate_operand" "=R,m") |
8ce80784 | 342 | (truncate:QI (match_operand:HI 1 "nvptx_register_operand" "R,R")))] |
343 | "" | |
344 | "@ | |
345 | %.\\tcvt%t0.u16\\t%0, %1; | |
346 | %.\\tst%A0.u8\\t%0, %1;" | |
347 | [(set_attr "subregs_ok" "true")]) | |
348 | ||
349 | (define_insn "truncsi<mode>2" | |
50ad9277 | 350 | [(set (match_operand:QHIM 0 "nvptx_nonimmediate_operand" "=R,m") |
8ce80784 | 351 | (truncate:QHIM (match_operand:SI 1 "nvptx_register_operand" "R,R")))] |
352 | "" | |
353 | "@ | |
354 | %.\\tcvt%t0.u32\\t%0, %1; | |
355 | %.\\tst%A0.u%T0\\t%0, %1;" | |
356 | [(set_attr "subregs_ok" "true")]) | |
357 | ||
358 | (define_insn "truncdi<mode>2" | |
50ad9277 | 359 | [(set (match_operand:QHSIM 0 "nvptx_nonimmediate_operand" "=R,m") |
8ce80784 | 360 | (truncate:QHSIM (match_operand:DI 1 "nvptx_register_operand" "R,R")))] |
361 | "" | |
362 | "@ | |
363 | %.\\tcvt%t0.u64\\t%0, %1; | |
364 | %.\\tst%A0.u%T0\\t%0, %1;" | |
365 | [(set_attr "subregs_ok" "true")]) | |
366 | ||
8ce80784 | 367 | ;; Integer arithmetic |
368 | ||
369 | (define_insn "add<mode>3" | |
370 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
371 | (plus:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") | |
372 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] | |
373 | "" | |
374 | "%.\\tadd%t0\\t%0, %1, %2;") | |
375 | ||
376 | (define_insn "sub<mode>3" | |
377 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
378 | (minus:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") | |
379 | (match_operand:HSDIM 2 "nvptx_register_operand" "R")))] | |
380 | "" | |
381 | "%.\\tsub%t0\\t%0, %1, %2;") | |
382 | ||
383 | (define_insn "mul<mode>3" | |
384 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
385 | (mult:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") | |
386 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] | |
387 | "" | |
388 | "%.\\tmul.lo%t0\\t%0, %1, %2;") | |
389 | ||
390 | (define_insn "*mad<mode>3" | |
391 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
392 | (plus:HSDIM (mult:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") | |
393 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")) | |
394 | (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))] | |
395 | "" | |
396 | "%.\\tmad.lo%t0\\t%0, %1, %2, %3;") | |
397 | ||
398 | (define_insn "div<mode>3" | |
399 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
400 | (div:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") | |
401 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] | |
402 | "" | |
403 | "%.\\tdiv.s%T0\\t%0, %1, %2;") | |
404 | ||
405 | (define_insn "udiv<mode>3" | |
406 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
407 | (udiv:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") | |
408 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] | |
409 | "" | |
410 | "%.\\tdiv.u%T0\\t%0, %1, %2;") | |
411 | ||
412 | (define_insn "mod<mode>3" | |
413 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
414 | (mod:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "Ri") | |
415 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] | |
416 | "" | |
417 | "%.\\trem.s%T0\\t%0, %1, %2;") | |
418 | ||
419 | (define_insn "umod<mode>3" | |
420 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
421 | (umod:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "Ri") | |
422 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] | |
423 | "" | |
424 | "%.\\trem.u%T0\\t%0, %1, %2;") | |
425 | ||
426 | (define_insn "smin<mode>3" | |
427 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
428 | (smin:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") | |
429 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] | |
430 | "" | |
431 | "%.\\tmin.s%T0\\t%0, %1, %2;") | |
432 | ||
433 | (define_insn "umin<mode>3" | |
434 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
435 | (umin:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") | |
436 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] | |
437 | "" | |
438 | "%.\\tmin.u%T0\\t%0, %1, %2;") | |
439 | ||
440 | (define_insn "smax<mode>3" | |
441 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
442 | (smax:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") | |
443 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] | |
444 | "" | |
445 | "%.\\tmax.s%T0\\t%0, %1, %2;") | |
446 | ||
447 | (define_insn "umax<mode>3" | |
448 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
449 | (umax:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") | |
450 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] | |
451 | "" | |
452 | "%.\\tmax.u%T0\\t%0, %1, %2;") | |
453 | ||
454 | (define_insn "abs<mode>2" | |
455 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
456 | (abs:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")))] | |
457 | "" | |
458 | "%.\\tabs.s%T0\\t%0, %1;") | |
459 | ||
460 | (define_insn "neg<mode>2" | |
461 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
462 | (neg:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")))] | |
463 | "" | |
464 | "%.\\tneg.s%T0\\t%0, %1;") | |
465 | ||
466 | (define_insn "one_cmpl<mode>2" | |
467 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
468 | (not:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")))] | |
469 | "" | |
470 | "%.\\tnot.b%T0\\t%0, %1;") | |
471 | ||
472 | (define_insn "bitrev<mode>2" | |
473 | [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") | |
474 | (unspec:SDIM [(match_operand:SDIM 1 "nvptx_register_operand" "R")] | |
475 | UNSPEC_BITREV))] | |
476 | "" | |
477 | "%.\\tbrev.b%T0\\t%0, %1;") | |
478 | ||
479 | (define_insn "clz<mode>2" | |
480 | [(set (match_operand:SI 0 "nvptx_register_operand" "=R") | |
481 | (clz:SI (match_operand:SDIM 1 "nvptx_register_operand" "R")))] | |
482 | "" | |
b594ca2b | 483 | "%.\\tclz.b%T1\\t%0, %1;") |
8ce80784 | 484 | |
485 | (define_expand "ctz<mode>2" | |
486 | [(set (match_operand:SI 0 "nvptx_register_operand" "") | |
487 | (ctz:SI (match_operand:SDIM 1 "nvptx_register_operand" "")))] | |
488 | "" | |
489 | { | |
490 | rtx tmpreg = gen_reg_rtx (<MODE>mode); | |
491 | emit_insn (gen_bitrev<mode>2 (tmpreg, operands[1])); | |
492 | emit_insn (gen_clz<mode>2 (operands[0], tmpreg)); | |
493 | DONE; | |
494 | }) | |
495 | ||
496 | ;; Shifts | |
497 | ||
498 | (define_insn "ashl<mode>3" | |
499 | [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") | |
500 | (ashift:SDIM (match_operand:SDIM 1 "nvptx_register_operand" "R") | |
501 | (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")))] | |
502 | "" | |
503 | "%.\\tshl.b%T0\\t%0, %1, %2;") | |
504 | ||
505 | (define_insn "ashr<mode>3" | |
506 | [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") | |
507 | (ashiftrt:SDIM (match_operand:SDIM 1 "nvptx_register_operand" "R") | |
508 | (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")))] | |
509 | "" | |
510 | "%.\\tshr.s%T0\\t%0, %1, %2;") | |
511 | ||
512 | (define_insn "lshr<mode>3" | |
513 | [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") | |
514 | (lshiftrt:SDIM (match_operand:SDIM 1 "nvptx_register_operand" "R") | |
515 | (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")))] | |
516 | "" | |
517 | "%.\\tshr.u%T0\\t%0, %1, %2;") | |
518 | ||
519 | ;; Logical operations | |
520 | ||
521 | (define_insn "and<mode>3" | |
522 | [(set (match_operand:BHSDIM 0 "nvptx_register_operand" "=R") | |
523 | (and:BHSDIM (match_operand:BHSDIM 1 "nvptx_register_operand" "R") | |
524 | (match_operand:BHSDIM 2 "nvptx_nonmemory_operand" "Ri")))] | |
525 | "" | |
526 | "%.\\tand.b%T0\\t%0, %1, %2;") | |
527 | ||
528 | (define_insn "ior<mode>3" | |
529 | [(set (match_operand:BHSDIM 0 "nvptx_register_operand" "=R") | |
530 | (ior:BHSDIM (match_operand:BHSDIM 1 "nvptx_register_operand" "R") | |
531 | (match_operand:BHSDIM 2 "nvptx_nonmemory_operand" "Ri")))] | |
532 | "" | |
533 | "%.\\tor.b%T0\\t%0, %1, %2;") | |
534 | ||
535 | (define_insn "xor<mode>3" | |
536 | [(set (match_operand:BHSDIM 0 "nvptx_register_operand" "=R") | |
537 | (xor:BHSDIM (match_operand:BHSDIM 1 "nvptx_register_operand" "R") | |
538 | (match_operand:BHSDIM 2 "nvptx_nonmemory_operand" "Ri")))] | |
539 | "" | |
540 | "%.\\txor.b%T0\\t%0, %1, %2;") | |
541 | ||
542 | ;; Comparisons and branches | |
543 | ||
544 | (define_insn "*cmp<mode>" | |
545 | [(set (match_operand:BI 0 "nvptx_register_operand" "=R") | |
546 | (match_operator:BI 1 "nvptx_comparison_operator" | |
547 | [(match_operand:HSDIM 2 "nvptx_register_operand" "R") | |
548 | (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))] | |
549 | "" | |
b27697ca | 550 | "%.\\tsetp%c1\\t%0, %2, %3;") |
8ce80784 | 551 | |
552 | (define_insn "*cmp<mode>" | |
553 | [(set (match_operand:BI 0 "nvptx_register_operand" "=R") | |
554 | (match_operator:BI 1 "nvptx_float_comparison_operator" | |
555 | [(match_operand:SDFM 2 "nvptx_register_operand" "R") | |
556 | (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))] | |
557 | "" | |
b27697ca | 558 | "%.\\tsetp%c1\\t%0, %2, %3;") |
8ce80784 | 559 | |
560 | (define_insn "jump" | |
561 | [(set (pc) | |
562 | (label_ref (match_operand 0 "" "")))] | |
563 | "" | |
564 | "%.\\tbra\\t%l0;") | |
565 | ||
566 | (define_insn "br_true" | |
567 | [(set (pc) | |
568 | (if_then_else (ne (match_operand:BI 0 "nvptx_register_operand" "R") | |
569 | (const_int 0)) | |
570 | (label_ref (match_operand 1 "" "")) | |
571 | (pc)))] | |
572 | "" | |
7fce8768 | 573 | "%j0\\tbra\\t%l1;" |
574 | [(set_attr "predicable" "false")]) | |
8ce80784 | 575 | |
576 | (define_insn "br_false" | |
577 | [(set (pc) | |
578 | (if_then_else (eq (match_operand:BI 0 "nvptx_register_operand" "R") | |
579 | (const_int 0)) | |
580 | (label_ref (match_operand 1 "" "")) | |
581 | (pc)))] | |
582 | "" | |
7fce8768 | 583 | "%J0\\tbra\\t%l1;" |
584 | [(set_attr "predicable" "false")]) | |
8ce80784 | 585 | |
b3787ae4 | 586 | ;; unified conditional branch |
587 | (define_insn "br_true_uni" | |
588 | [(set (pc) (if_then_else | |
589 | (ne (unspec:BI [(match_operand:BI 0 "nvptx_register_operand" "R")] | |
590 | UNSPEC_BR_UNIFIED) (const_int 0)) | |
591 | (label_ref (match_operand 1 "" "")) (pc)))] | |
592 | "" | |
7fce8768 | 593 | "%j0\\tbra.uni\\t%l1;" |
594 | [(set_attr "predicable" "false")]) | |
b3787ae4 | 595 | |
596 | (define_insn "br_false_uni" | |
597 | [(set (pc) (if_then_else | |
598 | (eq (unspec:BI [(match_operand:BI 0 "nvptx_register_operand" "R")] | |
599 | UNSPEC_BR_UNIFIED) (const_int 0)) | |
600 | (label_ref (match_operand 1 "" "")) (pc)))] | |
601 | "" | |
7fce8768 | 602 | "%J0\\tbra.uni\\t%l1;" |
603 | [(set_attr "predicable" "false")]) | |
b3787ae4 | 604 | |
8ce80784 | 605 | (define_expand "cbranch<mode>4" |
606 | [(set (pc) | |
607 | (if_then_else (match_operator 0 "nvptx_comparison_operator" | |
608 | [(match_operand:HSDIM 1 "nvptx_register_operand" "") | |
9d846e45 | 609 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "")]) |
8ce80784 | 610 | (label_ref (match_operand 3 "" "")) |
611 | (pc)))] | |
612 | "" | |
613 | { | |
614 | rtx t = nvptx_expand_compare (operands[0]); | |
615 | operands[0] = t; | |
616 | operands[1] = XEXP (t, 0); | |
617 | operands[2] = XEXP (t, 1); | |
618 | }) | |
619 | ||
620 | (define_expand "cbranch<mode>4" | |
621 | [(set (pc) | |
622 | (if_then_else (match_operator 0 "nvptx_float_comparison_operator" | |
623 | [(match_operand:SDFM 1 "nvptx_register_operand" "") | |
9d846e45 | 624 | (match_operand:SDFM 2 "nvptx_nonmemory_operand" "")]) |
8ce80784 | 625 | (label_ref (match_operand 3 "" "")) |
626 | (pc)))] | |
627 | "" | |
628 | { | |
629 | rtx t = nvptx_expand_compare (operands[0]); | |
630 | operands[0] = t; | |
631 | operands[1] = XEXP (t, 0); | |
632 | operands[2] = XEXP (t, 1); | |
633 | }) | |
634 | ||
635 | (define_expand "cbranchbi4" | |
636 | [(set (pc) | |
637 | (if_then_else (match_operator 0 "predicate_operator" | |
638 | [(match_operand:BI 1 "nvptx_register_operand" "") | |
639 | (match_operand:BI 2 "const0_operand" "")]) | |
640 | (label_ref (match_operand 3 "" "")) | |
641 | (pc)))] | |
642 | "" | |
643 | "") | |
644 | ||
645 | ;; Conditional stores | |
646 | ||
647 | (define_insn "setcc_from_bi" | |
648 | [(set (match_operand:SI 0 "nvptx_register_operand" "=R") | |
649 | (ne:SI (match_operand:BI 1 "nvptx_register_operand" "R") | |
650 | (const_int 0)))] | |
651 | "" | |
652 | "%.\\tselp%t0 %0,-1,0,%1;") | |
653 | ||
75e09431 | 654 | (define_insn "sel_true<mode>" |
655 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
656 | (if_then_else:HSDIM | |
657 | (ne (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0)) | |
658 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri") | |
659 | (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))] | |
660 | "" | |
661 | "%.\\tselp%t0\\t%0, %2, %3, %1;") | |
662 | ||
663 | (define_insn "sel_true<mode>" | |
664 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
665 | (if_then_else:SDFM | |
666 | (ne (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0)) | |
667 | (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF") | |
668 | (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))] | |
669 | "" | |
670 | "%.\\tselp%t0\\t%0, %2, %3, %1;") | |
671 | ||
672 | (define_insn "sel_false<mode>" | |
673 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
674 | (if_then_else:HSDIM | |
675 | (eq (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0)) | |
676 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri") | |
677 | (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))] | |
678 | "" | |
679 | "%.\\tselp%t0\\t%0, %3, %2, %1;") | |
680 | ||
681 | (define_insn "sel_false<mode>" | |
682 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
683 | (if_then_else:SDFM | |
684 | (eq (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0)) | |
685 | (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF") | |
686 | (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))] | |
687 | "" | |
688 | "%.\\tselp%t0\\t%0, %3, %2, %1;") | |
689 | ||
8ce80784 | 690 | (define_insn "setcc_int<mode>" |
691 | [(set (match_operand:SI 0 "nvptx_register_operand" "=R") | |
692 | (match_operator:SI 1 "nvptx_comparison_operator" | |
75e09431 | 693 | [(match_operand:HSDIM 2 "nvptx_register_operand" "R") |
694 | (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))] | |
8ce80784 | 695 | "" |
b27697ca | 696 | "%.\\tset%t0%c1\\t%0, %2, %3;") |
8ce80784 | 697 | |
698 | (define_insn "setcc_int<mode>" | |
699 | [(set (match_operand:SI 0 "nvptx_register_operand" "=R") | |
700 | (match_operator:SI 1 "nvptx_float_comparison_operator" | |
75e09431 | 701 | [(match_operand:SDFM 2 "nvptx_register_operand" "R") |
702 | (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))] | |
8ce80784 | 703 | "" |
b27697ca | 704 | "%.\\tset%t0%c1\\t%0, %2, %3;") |
8ce80784 | 705 | |
706 | (define_insn "setcc_float<mode>" | |
707 | [(set (match_operand:SF 0 "nvptx_register_operand" "=R") | |
708 | (match_operator:SF 1 "nvptx_comparison_operator" | |
75e09431 | 709 | [(match_operand:HSDIM 2 "nvptx_register_operand" "R") |
710 | (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))] | |
8ce80784 | 711 | "" |
b27697ca | 712 | "%.\\tset%t0%c1\\t%0, %2, %3;") |
8ce80784 | 713 | |
714 | (define_insn "setcc_float<mode>" | |
715 | [(set (match_operand:SF 0 "nvptx_register_operand" "=R") | |
716 | (match_operator:SF 1 "nvptx_float_comparison_operator" | |
75e09431 | 717 | [(match_operand:SDFM 2 "nvptx_register_operand" "R") |
718 | (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))] | |
8ce80784 | 719 | "" |
b27697ca | 720 | "%.\\tset%t0%c1\\t%0, %2, %3;") |
8ce80784 | 721 | |
722 | (define_expand "cstorebi4" | |
723 | [(set (match_operand:SI 0 "nvptx_register_operand") | |
724 | (match_operator:SI 1 "ne_operator" | |
725 | [(match_operand:BI 2 "nvptx_register_operand") | |
726 | (match_operand:BI 3 "const0_operand")]))] | |
727 | "" | |
728 | "") | |
729 | ||
730 | (define_expand "cstore<mode>4" | |
731 | [(set (match_operand:SI 0 "nvptx_register_operand") | |
732 | (match_operator:SI 1 "nvptx_comparison_operator" | |
733 | [(match_operand:HSDIM 2 "nvptx_register_operand") | |
734 | (match_operand:HSDIM 3 "nvptx_nonmemory_operand")]))] | |
735 | "" | |
736 | "") | |
737 | ||
738 | (define_expand "cstore<mode>4" | |
739 | [(set (match_operand:SI 0 "nvptx_register_operand") | |
740 | (match_operator:SI 1 "nvptx_float_comparison_operator" | |
741 | [(match_operand:SDFM 2 "nvptx_register_operand") | |
742 | (match_operand:SDFM 3 "nvptx_nonmemory_operand")]))] | |
743 | "" | |
744 | "") | |
745 | ||
746 | ;; Calls | |
747 | ||
aeb8e16a | 748 | (define_insn "call_insn_<mode>" |
8ce80784 | 749 | [(match_parallel 2 "call_operation" |
aeb8e16a | 750 | [(call (mem:QI (match_operand:P 0 "call_insn_operand" "Rs")) |
8ce80784 | 751 | (match_operand 1))])] |
752 | "" | |
753 | { | |
754 | return nvptx_output_call_insn (insn, NULL_RTX, operands[0]); | |
755 | }) | |
756 | ||
aeb8e16a | 757 | (define_insn "call_value_insn_<mode>" |
8ce80784 | 758 | [(match_parallel 3 "call_operation" |
759 | [(set (match_operand 0 "nvptx_register_operand" "=R") | |
aeb8e16a | 760 | (call (mem:QI (match_operand:P 1 "call_insn_operand" "Rs")) |
8ce80784 | 761 | (match_operand 2)))])] |
762 | "" | |
763 | { | |
764 | return nvptx_output_call_insn (insn, operands[0], operands[1]); | |
765 | }) | |
766 | ||
767 | (define_expand "call" | |
768 | [(match_operand 0 "" "")] | |
769 | "" | |
770 | { | |
771 | nvptx_expand_call (NULL_RTX, operands[0]); | |
772 | DONE; | |
773 | }) | |
774 | ||
775 | (define_expand "call_value" | |
776 | [(match_operand 0 "" "") | |
777 | (match_operand 1 "" "")] | |
778 | "" | |
779 | { | |
780 | nvptx_expand_call (operands[0], operands[1]); | |
781 | DONE; | |
782 | }) | |
783 | ||
784 | ;; Floating point arithmetic. | |
785 | ||
786 | (define_insn "add<mode>3" | |
787 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
788 | (plus:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R") | |
789 | (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))] | |
790 | "" | |
791 | "%.\\tadd%t0\\t%0, %1, %2;") | |
792 | ||
793 | (define_insn "sub<mode>3" | |
794 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
795 | (minus:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R") | |
796 | (match_operand:SDFM 2 "nvptx_register_operand" "R")))] | |
797 | "" | |
798 | "%.\\tsub%t0\\t%0, %1, %2;") | |
799 | ||
800 | (define_insn "mul<mode>3" | |
801 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
802 | (mult:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R") | |
803 | (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))] | |
804 | "" | |
805 | "%.\\tmul%t0\\t%0, %1, %2;") | |
806 | ||
807 | (define_insn "fma<mode>4" | |
808 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
809 | (fma:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R") | |
810 | (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF") | |
811 | (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))] | |
812 | "" | |
813 | "%.\\tfma%#%t0\\t%0, %1, %2, %3;") | |
814 | ||
815 | (define_insn "div<mode>3" | |
816 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
817 | (div:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R") | |
818 | (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))] | |
819 | "" | |
820 | "%.\\tdiv%#%t0\\t%0, %1, %2;") | |
821 | ||
822 | (define_insn "copysign<mode>3" | |
823 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
824 | (unspec:SDFM [(match_operand:SDFM 1 "nvptx_register_operand" "R") | |
825 | (match_operand:SDFM 2 "nvptx_register_operand" "R")] | |
826 | UNSPEC_COPYSIGN))] | |
827 | "" | |
828 | "%.\\tcopysign%t0\\t%0, %2, %1;") | |
829 | ||
830 | (define_insn "smin<mode>3" | |
831 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
832 | (smin:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R") | |
833 | (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))] | |
834 | "" | |
835 | "%.\\tmin%t0\\t%0, %1, %2;") | |
836 | ||
837 | (define_insn "smax<mode>3" | |
838 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
839 | (smax:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R") | |
840 | (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))] | |
841 | "" | |
842 | "%.\\tmax%t0\\t%0, %1, %2;") | |
843 | ||
844 | (define_insn "abs<mode>2" | |
845 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
846 | (abs:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")))] | |
847 | "" | |
848 | "%.\\tabs%t0\\t%0, %1;") | |
849 | ||
850 | (define_insn "neg<mode>2" | |
851 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
852 | (neg:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")))] | |
853 | "" | |
854 | "%.\\tneg%t0\\t%0, %1;") | |
855 | ||
856 | (define_insn "sqrt<mode>2" | |
857 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
858 | (sqrt:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")))] | |
859 | "" | |
860 | "%.\\tsqrt%#%t0\\t%0, %1;") | |
861 | ||
3d380077 | 862 | (define_expand "sincossf3" |
863 | [(set (match_operand:SF 0 "nvptx_register_operand" "=R") | |
864 | (unspec:SF [(match_operand:SF 2 "nvptx_register_operand" "R")] | |
865 | UNSPEC_COS)) | |
866 | (set (match_operand:SF 1 "nvptx_register_operand" "=R") | |
867 | (unspec:SF [(match_dup 2)] UNSPEC_SIN))] | |
868 | "flag_unsafe_math_optimizations" | |
869 | { | |
870 | operands[2] = make_safe_from (operands[2], operands[0]); | |
871 | }) | |
872 | ||
8ce80784 | 873 | (define_insn "sinsf2" |
874 | [(set (match_operand:SF 0 "nvptx_register_operand" "=R") | |
875 | (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")] | |
876 | UNSPEC_SIN))] | |
877 | "flag_unsafe_math_optimizations" | |
878 | "%.\\tsin.approx%t0\\t%0, %1;") | |
879 | ||
880 | (define_insn "cossf2" | |
881 | [(set (match_operand:SF 0 "nvptx_register_operand" "=R") | |
882 | (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")] | |
883 | UNSPEC_COS))] | |
884 | "flag_unsafe_math_optimizations" | |
885 | "%.\\tcos.approx%t0\\t%0, %1;") | |
886 | ||
887 | (define_insn "log2sf2" | |
888 | [(set (match_operand:SF 0 "nvptx_register_operand" "=R") | |
889 | (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")] | |
890 | UNSPEC_LOG2))] | |
891 | "flag_unsafe_math_optimizations" | |
892 | "%.\\tlg2.approx%t0\\t%0, %1;") | |
893 | ||
894 | (define_insn "exp2sf2" | |
895 | [(set (match_operand:SF 0 "nvptx_register_operand" "=R") | |
896 | (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")] | |
897 | UNSPEC_EXP2))] | |
898 | "flag_unsafe_math_optimizations" | |
899 | "%.\\tex2.approx%t0\\t%0, %1;") | |
900 | ||
901 | ;; Conversions involving floating point | |
902 | ||
903 | (define_insn "extendsfdf2" | |
904 | [(set (match_operand:DF 0 "nvptx_register_operand" "=R") | |
905 | (float_extend:DF (match_operand:SF 1 "nvptx_register_operand" "R")))] | |
906 | "" | |
907 | "%.\\tcvt%t0%t1\\t%0, %1;") | |
908 | ||
909 | (define_insn "truncdfsf2" | |
910 | [(set (match_operand:SF 0 "nvptx_register_operand" "=R") | |
911 | (float_truncate:SF (match_operand:DF 1 "nvptx_register_operand" "R")))] | |
912 | "" | |
913 | "%.\\tcvt%#%t0%t1\\t%0, %1;") | |
914 | ||
915 | (define_insn "floatunssi<mode>2" | |
916 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
917 | (unsigned_float:SDFM (match_operand:SI 1 "nvptx_register_operand" "R")))] | |
918 | "" | |
919 | "%.\\tcvt%#%t0.u%T1\\t%0, %1;") | |
920 | ||
921 | (define_insn "floatsi<mode>2" | |
922 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
923 | (float:SDFM (match_operand:SI 1 "nvptx_register_operand" "R")))] | |
924 | "" | |
925 | "%.\\tcvt%#%t0.s%T1\\t%0, %1;") | |
926 | ||
927 | (define_insn "floatunsdi<mode>2" | |
928 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
929 | (unsigned_float:SDFM (match_operand:DI 1 "nvptx_register_operand" "R")))] | |
930 | "" | |
931 | "%.\\tcvt%#%t0.u%T1\\t%0, %1;") | |
932 | ||
933 | (define_insn "floatdi<mode>2" | |
934 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
935 | (float:SDFM (match_operand:DI 1 "nvptx_register_operand" "R")))] | |
936 | "" | |
937 | "%.\\tcvt%#%t0.s%T1\\t%0, %1;") | |
938 | ||
939 | (define_insn "fixuns_trunc<mode>si2" | |
940 | [(set (match_operand:SI 0 "nvptx_register_operand" "=R") | |
941 | (unsigned_fix:SI (match_operand:SDFM 1 "nvptx_register_operand" "R")))] | |
942 | "" | |
943 | "%.\\tcvt.rzi.u%T0%t1\\t%0, %1;") | |
944 | ||
945 | (define_insn "fix_trunc<mode>si2" | |
946 | [(set (match_operand:SI 0 "nvptx_register_operand" "=R") | |
947 | (fix:SI (match_operand:SDFM 1 "nvptx_register_operand" "R")))] | |
948 | "" | |
949 | "%.\\tcvt.rzi.s%T0%t1\\t%0, %1;") | |
950 | ||
951 | (define_insn "fixuns_trunc<mode>di2" | |
952 | [(set (match_operand:DI 0 "nvptx_register_operand" "=R") | |
953 | (unsigned_fix:DI (match_operand:SDFM 1 "nvptx_register_operand" "R")))] | |
954 | "" | |
955 | "%.\\tcvt.rzi.u%T0%t1\\t%0, %1;") | |
956 | ||
957 | (define_insn "fix_trunc<mode>di2" | |
958 | [(set (match_operand:DI 0 "nvptx_register_operand" "=R") | |
959 | (fix:DI (match_operand:SDFM 1 "nvptx_register_operand" "R")))] | |
960 | "" | |
961 | "%.\\tcvt.rzi.s%T0%t1\\t%0, %1;") | |
962 | ||
963 | (define_int_iterator FPINT [UNSPEC_FPINT_FLOOR UNSPEC_FPINT_BTRUNC | |
964 | UNSPEC_FPINT_CEIL UNSPEC_FPINT_NEARBYINT]) | |
965 | (define_int_attr fpint_name [(UNSPEC_FPINT_FLOOR "floor") | |
966 | (UNSPEC_FPINT_BTRUNC "btrunc") | |
967 | (UNSPEC_FPINT_CEIL "ceil") | |
968 | (UNSPEC_FPINT_NEARBYINT "nearbyint")]) | |
969 | (define_int_attr fpint_roundingmode [(UNSPEC_FPINT_FLOOR ".rmi") | |
970 | (UNSPEC_FPINT_BTRUNC ".rzi") | |
971 | (UNSPEC_FPINT_CEIL ".rpi") | |
972 | (UNSPEC_FPINT_NEARBYINT "%#i")]) | |
973 | ||
974 | (define_insn "<FPINT:fpint_name><SDFM:mode>2" | |
975 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
976 | (unspec:SDFM [(match_operand:SDFM 1 "nvptx_register_operand" "R")] | |
977 | FPINT))] | |
978 | "" | |
979 | "%.\\tcvt<FPINT:fpint_roundingmode>%t0%t1\\t%0, %1;") | |
980 | ||
981 | (define_int_iterator FPINT2 [UNSPEC_FPINT_FLOOR UNSPEC_FPINT_CEIL]) | |
982 | (define_int_attr fpint2_name [(UNSPEC_FPINT_FLOOR "lfloor") | |
983 | (UNSPEC_FPINT_CEIL "lceil")]) | |
984 | (define_int_attr fpint2_roundingmode [(UNSPEC_FPINT_FLOOR ".rmi") | |
985 | (UNSPEC_FPINT_CEIL ".rpi")]) | |
986 | ||
987 | (define_insn "<FPINT2:fpint2_name><SDFM:mode><SDIM:mode>2" | |
988 | [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") | |
989 | (unspec:SDIM [(match_operand:SDFM 1 "nvptx_register_operand" "R")] | |
990 | FPINT2))] | |
991 | "" | |
992 | "%.\\tcvt<FPINT2:fpint2_roundingmode>.s%T0%t1\\t%0, %1;") | |
993 | ||
994 | ;; Miscellaneous | |
995 | ||
996 | (define_insn "nop" | |
997 | [(const_int 0)] | |
998 | "" | |
999 | "") | |
1000 | ||
f3c313db | 1001 | (define_insn "exit" |
1002 | [(const_int 1)] | |
1003 | "" | |
1004 | "exit;") | |
1005 | ||
47282587 | 1006 | (define_insn "fake_nop" |
1007 | [(const_int 2)] | |
1008 | "" | |
1009 | "{ | |
1010 | .reg .u32 %%nop_src; | |
1011 | .reg .u32 %%nop_dst; | |
1012 | mov.u32 %%nop_dst, %%nop_src; | |
1013 | }") | |
1014 | ||
8ce80784 | 1015 | (define_insn "return" |
1016 | [(return)] | |
1017 | "" | |
1018 | { | |
1019 | return nvptx_output_return (); | |
7fce8768 | 1020 | } |
1021 | [(set_attr "predicable" "false")]) | |
8ce80784 | 1022 | |
1023 | (define_expand "epilogue" | |
1024 | [(clobber (const_int 0))] | |
1025 | "" | |
1026 | { | |
7fce8768 | 1027 | if (TARGET_SOFT_STACK) |
1028 | emit_insn (gen_set_softstack_insn (gen_rtx_REG (Pmode, | |
1029 | SOFTSTACK_PREV_REGNUM))); | |
8ce80784 | 1030 | emit_jump_insn (gen_return ()); |
1031 | DONE; | |
1032 | }) | |
1033 | ||
1034 | (define_expand "nonlocal_goto" | |
1035 | [(match_operand 0 "" "") | |
1036 | (match_operand 1 "" "") | |
1037 | (match_operand 2 "" "") | |
1038 | (match_operand 3 "" "")] | |
1039 | "" | |
1040 | { | |
1041 | sorry ("target cannot support nonlocal goto."); | |
1042 | emit_insn (gen_nop ()); | |
1043 | DONE; | |
1044 | }) | |
1045 | ||
1046 | (define_expand "nonlocal_goto_receiver" | |
1047 | [(const_int 0)] | |
1048 | "" | |
1049 | { | |
1050 | sorry ("target cannot support nonlocal goto."); | |
1051 | }) | |
1052 | ||
ed20400d | 1053 | (define_expand "allocate_stack" |
1054 | [(match_operand 0 "nvptx_register_operand") | |
1055 | (match_operand 1 "nvptx_register_operand")] | |
1056 | "" | |
1057 | { | |
7fce8768 | 1058 | if (TARGET_SOFT_STACK) |
1059 | { | |
1060 | emit_move_insn (stack_pointer_rtx, | |
1061 | gen_rtx_MINUS (Pmode, stack_pointer_rtx, operands[1])); | |
1062 | emit_insn (gen_set_softstack_insn (stack_pointer_rtx)); | |
1063 | emit_move_insn (operands[0], virtual_stack_dynamic_rtx); | |
1064 | DONE; | |
1065 | } | |
f289122f | 1066 | /* The ptx documentation specifies an alloca intrinsic (for 32 bit |
1067 | only) but notes it is not implemented. The assembler emits a | |
1068 | confused error message. Issue a blunt one now instead. */ | |
1069 | sorry ("target cannot support alloca."); | |
1070 | emit_insn (gen_nop ()); | |
1071 | DONE; | |
ed20400d | 1072 | }) |
1073 | ||
7fce8768 | 1074 | (define_insn "set_softstack_insn" |
1075 | [(unspec [(match_operand 0 "nvptx_register_operand" "R")] | |
1076 | UNSPEC_SET_SOFTSTACK)] | |
1077 | "TARGET_SOFT_STACK" | |
1078 | { | |
1079 | return nvptx_output_set_softstack (REGNO (operands[0])); | |
1080 | }) | |
8ce80784 | 1081 | |
1082 | (define_expand "restore_stack_block" | |
1083 | [(match_operand 0 "register_operand" "") | |
1084 | (match_operand 1 "register_operand" "")] | |
1085 | "" | |
1086 | { | |
7fce8768 | 1087 | if (TARGET_SOFT_STACK) |
1088 | { | |
1089 | emit_move_insn (operands[0], operands[1]); | |
1090 | emit_insn (gen_set_softstack_insn (operands[0])); | |
1091 | } | |
8ce80784 | 1092 | DONE; |
1093 | }) | |
1094 | ||
1095 | (define_expand "restore_stack_function" | |
1096 | [(match_operand 0 "register_operand" "") | |
1097 | (match_operand 1 "register_operand" "")] | |
1098 | "" | |
1099 | { | |
1100 | DONE; | |
1101 | }) | |
1102 | ||
1103 | (define_insn "trap" | |
1104 | [(trap_if (const_int 1) (const_int 0))] | |
1105 | "" | |
38017cab | 1106 | "trap; exit;") |
8ce80784 | 1107 | |
1108 | (define_insn "trap_if_true" | |
1109 | [(trap_if (ne (match_operand:BI 0 "nvptx_register_operand" "R") | |
1110 | (const_int 0)) | |
1111 | (const_int 0))] | |
1112 | "" | |
38017cab | 1113 | "%j0 trap; %j0 exit;" |
7fce8768 | 1114 | [(set_attr "predicable" "false")]) |
8ce80784 | 1115 | |
1116 | (define_insn "trap_if_false" | |
1117 | [(trap_if (eq (match_operand:BI 0 "nvptx_register_operand" "R") | |
1118 | (const_int 0)) | |
1119 | (const_int 0))] | |
1120 | "" | |
38017cab | 1121 | "%J0 trap; %J0 exit;" |
7fce8768 | 1122 | [(set_attr "predicable" "false")]) |
8ce80784 | 1123 | |
1124 | (define_expand "ctrap<mode>4" | |
1125 | [(trap_if (match_operator 0 "nvptx_comparison_operator" | |
1126 | [(match_operand:SDIM 1 "nvptx_register_operand") | |
1127 | (match_operand:SDIM 2 "nvptx_nonmemory_operand")]) | |
25ce1bcb | 1128 | (match_operand 3 "const0_operand"))] |
8ce80784 | 1129 | "" |
1130 | { | |
1131 | rtx t = nvptx_expand_compare (operands[0]); | |
1132 | emit_insn (gen_trap_if_true (t)); | |
1133 | DONE; | |
1134 | }) | |
1135 | ||
b3787ae4 | 1136 | (define_insn "oacc_dim_size" |
1137 | [(set (match_operand:SI 0 "nvptx_register_operand" "") | |
1138 | (unspec:SI [(match_operand:SI 1 "const_int_operand" "")] | |
1139 | UNSPEC_DIM_SIZE))] | |
8ce80784 | 1140 | "" |
b3787ae4 | 1141 | { |
1142 | static const char *const asms[] = | |
1143 | { /* Must match oacc_loop_levels ordering. */ | |
1144 | "%.\\tmov.u32\\t%0, %%nctaid.x;", /* gang */ | |
1145 | "%.\\tmov.u32\\t%0, %%ntid.y;", /* worker */ | |
1146 | "%.\\tmov.u32\\t%0, %%ntid.x;", /* vector */ | |
1147 | }; | |
1148 | return asms[INTVAL (operands[1])]; | |
1149 | }) | |
8ce80784 | 1150 | |
b3787ae4 | 1151 | (define_insn "oacc_dim_pos" |
8ce80784 | 1152 | [(set (match_operand:SI 0 "nvptx_register_operand" "") |
b3787ae4 | 1153 | (unspec_volatile:SI [(match_operand:SI 1 "const_int_operand" "")] |
1154 | UNSPECV_DIM_POS))] | |
8ce80784 | 1155 | "" |
1156 | { | |
b3787ae4 | 1157 | static const char *const asms[] = |
1158 | { /* Must match oacc_loop_levels ordering. */ | |
1159 | "%.\\tmov.u32\\t%0, %%ctaid.x;", /* gang */ | |
1160 | "%.\\tmov.u32\\t%0, %%tid.y;", /* worker */ | |
1161 | "%.\\tmov.u32\\t%0, %%tid.x;", /* vector */ | |
1162 | }; | |
1163 | return asms[INTVAL (operands[1])]; | |
8ce80784 | 1164 | }) |
1165 | ||
b3787ae4 | 1166 | (define_insn "nvptx_fork" |
1167 | [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")] | |
1168 | UNSPECV_FORK)] | |
8ce80784 | 1169 | "" |
b3787ae4 | 1170 | "// fork %0;" |
7fce8768 | 1171 | [(set_attr "predicable" "false")]) |
8ce80784 | 1172 | |
b3787ae4 | 1173 | (define_insn "nvptx_forked" |
1174 | [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")] | |
1175 | UNSPECV_FORKED)] | |
1176 | "" | |
1177 | "// forked %0;" | |
7fce8768 | 1178 | [(set_attr "predicable" "false")]) |
b3787ae4 | 1179 | |
1180 | (define_insn "nvptx_joining" | |
1181 | [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")] | |
1182 | UNSPECV_JOINING)] | |
1183 | "" | |
1184 | "// joining %0;" | |
7fce8768 | 1185 | [(set_attr "predicable" "false")]) |
b3787ae4 | 1186 | |
1187 | (define_insn "nvptx_join" | |
1188 | [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")] | |
1189 | UNSPECV_JOIN)] | |
1190 | "" | |
1191 | "// join %0;" | |
7fce8768 | 1192 | [(set_attr "predicable" "false")]) |
b3787ae4 | 1193 | |
1194 | (define_expand "oacc_fork" | |
1195 | [(set (match_operand:SI 0 "nvptx_nonmemory_operand" "") | |
6bd291cd | 1196 | (match_operand:SI 1 "general_operand" "")) |
b3787ae4 | 1197 | (unspec_volatile:SI [(match_operand:SI 2 "const_int_operand" "")] |
1198 | UNSPECV_FORKED)] | |
8ce80784 | 1199 | "" |
1200 | { | |
b3787ae4 | 1201 | if (operands[0] != const0_rtx) |
1202 | emit_move_insn (operands[0], operands[1]); | |
1203 | nvptx_expand_oacc_fork (INTVAL (operands[2])); | |
1204 | DONE; | |
1205 | }) | |
1206 | ||
1207 | (define_expand "oacc_join" | |
1208 | [(set (match_operand:SI 0 "nvptx_nonmemory_operand" "") | |
6bd291cd | 1209 | (match_operand:SI 1 "general_operand" "")) |
b3787ae4 | 1210 | (unspec_volatile:SI [(match_operand:SI 2 "const_int_operand" "")] |
1211 | UNSPECV_JOIN)] | |
1212 | "" | |
1213 | { | |
1214 | if (operands[0] != const0_rtx) | |
1215 | emit_move_insn (operands[0], operands[1]); | |
1216 | nvptx_expand_oacc_join (INTVAL (operands[2])); | |
1217 | DONE; | |
8ce80784 | 1218 | }) |
1219 | ||
b3787ae4 | 1220 | ;; only 32-bit shuffles exist. |
1221 | (define_insn "nvptx_shuffle<mode>" | |
1222 | [(set (match_operand:BITS 0 "nvptx_register_operand" "=R") | |
1223 | (unspec:BITS | |
1224 | [(match_operand:BITS 1 "nvptx_register_operand" "R") | |
1225 | (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri") | |
1226 | (match_operand:SI 3 "const_int_operand" "n")] | |
1227 | UNSPEC_SHUFFLE))] | |
1228 | "" | |
1229 | "%.\\tshfl%S3.b32\\t%0, %1, %2, 31;") | |
1230 | ||
7fce8768 | 1231 | (define_insn "nvptx_vote_ballot" |
1232 | [(set (match_operand:SI 0 "nvptx_register_operand" "=R") | |
1233 | (unspec:SI [(match_operand:BI 1 "nvptx_register_operand" "R")] | |
1234 | UNSPEC_VOTE_BALLOT))] | |
1235 | "" | |
1236 | "%.\\tvote.ballot.b32\\t%0, %1;") | |
1237 | ||
1238 | ;; Patterns for OpenMP SIMD-via-SIMT lowering | |
1239 | ||
1b576300 | 1240 | (define_insn "omp_simt_enter_insn" |
1241 | [(set (match_operand 0 "nvptx_register_operand" "=R") | |
1242 | (unspec_volatile [(match_operand 1 "nvptx_nonmemory_operand" "Ri") | |
1243 | (match_operand 2 "nvptx_nonmemory_operand" "Ri")] | |
1244 | UNSPECV_SIMT_ENTER))] | |
1245 | "" | |
1246 | { | |
1247 | return nvptx_output_simt_enter (operands[0], operands[1], operands[2]); | |
1248 | }) | |
1249 | ||
1250 | (define_expand "omp_simt_enter" | |
1251 | [(match_operand 0 "nvptx_register_operand" "=R") | |
1252 | (match_operand 1 "nvptx_nonmemory_operand" "Ri") | |
1253 | (match_operand 2 "const_int_operand" "n")] | |
1254 | "" | |
1255 | { | |
1256 | if (!CONST_INT_P (operands[1])) | |
1257 | cfun->machine->simt_stack_size = HOST_WIDE_INT_M1U; | |
1258 | else | |
1259 | cfun->machine->simt_stack_size = MAX (UINTVAL (operands[1]), | |
1260 | cfun->machine->simt_stack_size); | |
1261 | cfun->machine->simt_stack_align = MAX (UINTVAL (operands[2]), | |
1262 | cfun->machine->simt_stack_align); | |
1263 | cfun->machine->has_simtreg = true; | |
1264 | emit_insn (gen_omp_simt_enter_insn (operands[0], operands[1], operands[2])); | |
1265 | DONE; | |
1266 | }) | |
1267 | ||
1268 | (define_insn "omp_simt_exit" | |
1269 | [(unspec_volatile [(match_operand 0 "nvptx_register_operand" "R")] | |
1270 | UNSPECV_SIMT_EXIT)] | |
1271 | "" | |
1272 | { | |
1273 | return nvptx_output_simt_exit (operands[0]); | |
1274 | }) | |
1275 | ||
7fce8768 | 1276 | ;; Implement IFN_GOMP_SIMT_LANE: set operand 0 to lane index |
1277 | (define_insn "omp_simt_lane" | |
1278 | [(set (match_operand:SI 0 "nvptx_register_operand" "") | |
1279 | (unspec:SI [(const_int 0)] UNSPEC_LANEID))] | |
1280 | "" | |
1281 | "%.\\tmov.u32\\t%0, %%laneid;") | |
1282 | ||
1283 | ;; Implement IFN_GOMP_SIMT_ORDERED: copy operand 1 to operand 0 and | |
1284 | ;; place a compiler barrier to disallow unrolling/peeling the containing loop | |
1285 | (define_expand "omp_simt_ordered" | |
1286 | [(match_operand:SI 0 "nvptx_register_operand" "=R") | |
1287 | (match_operand:SI 1 "nvptx_register_operand" "R")] | |
1288 | "" | |
1289 | { | |
1290 | emit_move_insn (operands[0], operands[1]); | |
1291 | emit_insn (gen_nvptx_nounroll ()); | |
1292 | DONE; | |
1293 | }) | |
1294 | ||
1295 | ;; Implement IFN_GOMP_SIMT_XCHG_BFLY: perform a "butterfly" exchange | |
1296 | ;; across lanes | |
1297 | (define_expand "omp_simt_xchg_bfly" | |
1298 | [(match_operand 0 "nvptx_register_operand" "=R") | |
1299 | (match_operand 1 "nvptx_register_operand" "R") | |
1300 | (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")] | |
1301 | "" | |
1302 | { | |
1303 | emit_insn (nvptx_gen_shuffle (operands[0], operands[1], operands[2], | |
1304 | SHUFFLE_BFLY)); | |
1305 | DONE; | |
1306 | }) | |
1307 | ||
1308 | ;; Implement IFN_GOMP_SIMT_XCHG_IDX: broadcast value in operand 1 | |
1309 | ;; from lane given by index in operand 2 to operand 0 in all lanes | |
1310 | (define_expand "omp_simt_xchg_idx" | |
1311 | [(match_operand 0 "nvptx_register_operand" "=R") | |
1312 | (match_operand 1 "nvptx_register_operand" "R") | |
1313 | (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")] | |
1314 | "" | |
1315 | { | |
1316 | emit_insn (nvptx_gen_shuffle (operands[0], operands[1], operands[2], | |
1317 | SHUFFLE_IDX)); | |
1318 | DONE; | |
1319 | }) | |
1320 | ||
1321 | ;; Implement IFN_GOMP_SIMT_VOTE_ANY: | |
1322 | ;; set operand 0 to zero iff all lanes supply zero in operand 1 | |
1323 | (define_expand "omp_simt_vote_any" | |
1324 | [(match_operand:SI 0 "nvptx_register_operand" "=R") | |
1325 | (match_operand:SI 1 "nvptx_register_operand" "R")] | |
1326 | "" | |
1327 | { | |
1328 | rtx pred = gen_reg_rtx (BImode); | |
1329 | emit_move_insn (pred, gen_rtx_NE (BImode, operands[1], const0_rtx)); | |
1330 | emit_insn (gen_nvptx_vote_ballot (operands[0], pred)); | |
1331 | DONE; | |
1332 | }) | |
1333 | ||
1334 | ;; Implement IFN_GOMP_SIMT_LAST_LANE: | |
1335 | ;; set operand 0 to the lowest lane index that passed non-zero in operand 1 | |
1336 | (define_expand "omp_simt_last_lane" | |
1337 | [(match_operand:SI 0 "nvptx_register_operand" "=R") | |
1338 | (match_operand:SI 1 "nvptx_register_operand" "R")] | |
1339 | "" | |
1340 | { | |
1341 | rtx pred = gen_reg_rtx (BImode); | |
1342 | rtx tmp = gen_reg_rtx (SImode); | |
1343 | emit_move_insn (pred, gen_rtx_NE (BImode, operands[1], const0_rtx)); | |
1344 | emit_insn (gen_nvptx_vote_ballot (tmp, pred)); | |
1345 | emit_insn (gen_ctzsi2 (operands[0], tmp)); | |
1346 | DONE; | |
1347 | }) | |
1348 | ||
b3787ae4 | 1349 | ;; extract parts of a 64 bit object into 2 32-bit ints |
1350 | (define_insn "unpack<mode>si2" | |
1351 | [(set (match_operand:SI 0 "nvptx_register_operand" "=R") | |
1352 | (unspec:SI [(match_operand:BITD 2 "nvptx_register_operand" "R") | |
1353 | (const_int 0)] UNSPEC_BIT_CONV)) | |
1354 | (set (match_operand:SI 1 "nvptx_register_operand" "=R") | |
1355 | (unspec:SI [(match_dup 2) (const_int 1)] UNSPEC_BIT_CONV))] | |
1356 | "" | |
1357 | "%.\\tmov.b64\\t{%0,%1}, %2;") | |
1358 | ||
1359 | ;; pack 2 32-bit ints into a 64 bit object | |
1360 | (define_insn "packsi<mode>2" | |
1361 | [(set (match_operand:BITD 0 "nvptx_register_operand" "=R") | |
1362 | (unspec:BITD [(match_operand:SI 1 "nvptx_register_operand" "R") | |
1363 | (match_operand:SI 2 "nvptx_register_operand" "R")] | |
1364 | UNSPEC_BIT_CONV))] | |
1365 | "" | |
1366 | "%.\\tmov.b64\\t%0, {%1,%2};") | |
1367 | ||
8ce80784 | 1368 | ;; Atomic insns. |
1369 | ||
1370 | (define_expand "atomic_compare_and_swap<mode>" | |
1371 | [(match_operand:SI 0 "nvptx_register_operand") ;; bool success output | |
1372 | (match_operand:SDIM 1 "nvptx_register_operand") ;; oldval output | |
1373 | (match_operand:SDIM 2 "memory_operand") ;; memory | |
1374 | (match_operand:SDIM 3 "nvptx_register_operand") ;; expected input | |
1375 | (match_operand:SDIM 4 "nvptx_register_operand") ;; newval input | |
1376 | (match_operand:SI 5 "const_int_operand") ;; is_weak | |
1377 | (match_operand:SI 6 "const_int_operand") ;; success model | |
1378 | (match_operand:SI 7 "const_int_operand")] ;; failure model | |
1379 | "" | |
1380 | { | |
c68f6b1c | 1381 | emit_insn (gen_atomic_compare_and_swap<mode>_1 |
1382 | (operands[1], operands[2], operands[3], operands[4], operands[6])); | |
1383 | ||
1384 | rtx cond = gen_reg_rtx (BImode); | |
1385 | emit_move_insn (cond, gen_rtx_EQ (BImode, operands[1], operands[3])); | |
1386 | emit_insn (gen_sel_truesi (operands[0], cond, GEN_INT (1), GEN_INT (0))); | |
8ce80784 | 1387 | DONE; |
1388 | }) | |
1389 | ||
1390 | (define_insn "atomic_compare_and_swap<mode>_1" | |
1391 | [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") | |
1392 | (unspec_volatile:SDIM | |
1393 | [(match_operand:SDIM 1 "memory_operand" "+m") | |
89f6d4a2 | 1394 | (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri") |
1395 | (match_operand:SDIM 3 "nvptx_nonmemory_operand" "Ri") | |
8ce80784 | 1396 | (match_operand:SI 4 "const_int_operand")] |
1397 | UNSPECV_CAS)) | |
1398 | (set (match_dup 1) | |
1399 | (unspec_volatile:SDIM [(const_int 0)] UNSPECV_CAS))] | |
1400 | "" | |
7fce8768 | 1401 | "%.\\tatom%A1.cas.b%T0\\t%0, %1, %2, %3;" |
1402 | [(set_attr "atomic" "true")]) | |
8ce80784 | 1403 | |
1404 | (define_insn "atomic_exchange<mode>" | |
1405 | [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") ;; output | |
1406 | (unspec_volatile:SDIM | |
1407 | [(match_operand:SDIM 1 "memory_operand" "+m") ;; memory | |
1408 | (match_operand:SI 3 "const_int_operand")] ;; model | |
1409 | UNSPECV_XCHG)) | |
1410 | (set (match_dup 1) | |
89f6d4a2 | 1411 | (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri"))] ;; input |
8ce80784 | 1412 | "" |
7fce8768 | 1413 | "%.\\tatom%A1.exch.b%T0\\t%0, %1, %2;" |
1414 | [(set_attr "atomic" "true")]) | |
8ce80784 | 1415 | |
1416 | (define_insn "atomic_fetch_add<mode>" | |
1417 | [(set (match_operand:SDIM 1 "memory_operand" "+m") | |
1418 | (unspec_volatile:SDIM | |
1419 | [(plus:SDIM (match_dup 1) | |
1420 | (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri")) | |
1421 | (match_operand:SI 3 "const_int_operand")] ;; model | |
1422 | UNSPECV_LOCK)) | |
1423 | (set (match_operand:SDIM 0 "nvptx_register_operand" "=R") | |
1424 | (match_dup 1))] | |
1425 | "" | |
7fce8768 | 1426 | "%.\\tatom%A1.add%t0\\t%0, %1, %2;" |
1427 | [(set_attr "atomic" "true")]) | |
8ce80784 | 1428 | |
1429 | (define_insn "atomic_fetch_addsf" | |
1430 | [(set (match_operand:SF 1 "memory_operand" "+m") | |
1431 | (unspec_volatile:SF | |
1432 | [(plus:SF (match_dup 1) | |
1433 | (match_operand:SF 2 "nvptx_nonmemory_operand" "RF")) | |
1434 | (match_operand:SI 3 "const_int_operand")] ;; model | |
1435 | UNSPECV_LOCK)) | |
1436 | (set (match_operand:SF 0 "nvptx_register_operand" "=R") | |
1437 | (match_dup 1))] | |
1438 | "" | |
7fce8768 | 1439 | "%.\\tatom%A1.add%t0\\t%0, %1, %2;" |
1440 | [(set_attr "atomic" "true")]) | |
8ce80784 | 1441 | |
1442 | (define_code_iterator any_logic [and ior xor]) | |
1443 | (define_code_attr logic [(and "and") (ior "or") (xor "xor")]) | |
1444 | ||
8ce80784 | 1445 | (define_insn "atomic_fetch_<logic><mode>" |
1446 | [(set (match_operand:SDIM 1 "memory_operand" "+m") | |
1447 | (unspec_volatile:SDIM | |
1448 | [(any_logic:SDIM (match_dup 1) | |
1449 | (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri")) | |
1450 | (match_operand:SI 3 "const_int_operand")] ;; model | |
1451 | UNSPECV_LOCK)) | |
1452 | (set (match_operand:SDIM 0 "nvptx_register_operand" "=R") | |
1453 | (match_dup 1))] | |
ab841e4d | 1454 | "<MODE>mode == SImode || TARGET_SM35" |
7fce8768 | 1455 | "%.\\tatom%A1.b%T0.<logic>\\t%0, %1, %2;" |
1456 | [(set_attr "atomic" "true")]) | |
b3787ae4 | 1457 | |
1458 | (define_insn "nvptx_barsync" | |
b1a50cdd | 1459 | [(unspec_volatile [(match_operand:SI 0 "nvptx_nonmemory_operand" "Ri") |
1460 | (match_operand:SI 1 "const_int_operand")] | |
b3787ae4 | 1461 | UNSPECV_BARSYNC)] |
1462 | "" | |
b1a50cdd | 1463 | { |
1464 | if (INTVAL (operands[1]) == 0) | |
1465 | return "\\tbar.sync\\t%0;"; | |
1466 | else | |
1467 | return "\\tbar.sync\\t%0, %1;"; | |
1468 | } | |
7fce8768 | 1469 | [(set_attr "predicable" "false")]) |
1470 | ||
e163ceb5 | 1471 | (define_expand "memory_barrier" |
1472 | [(set (match_dup 0) | |
1473 | (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR))] | |
1474 | "" | |
1475 | { | |
1476 | operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); | |
1477 | MEM_VOLATILE_P (operands[0]) = 1; | |
1478 | }) | |
1479 | ||
1480 | ;; Ptx defines the memory barriers membar.cta, membar.gl and membar.sys | |
1481 | ;; (corresponding to cuda functions threadfence_block, threadfence and | |
1482 | ;; threadfence_system). For the insn memory_barrier we use membar.sys. This | |
1483 | ;; may be overconservative, but before using membar.gl instead we'll need to | |
1484 | ;; explain in detail why it's safe to use. For now, use membar.sys. | |
1485 | (define_insn "*memory_barrier" | |
1486 | [(set (match_operand:BLK 0 "" "") | |
1487 | (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR))] | |
1488 | "" | |
1489 | "\\tmembar.sys;" | |
1490 | [(set_attr "predicable" "false")]) | |
1491 | ||
19b20a5c | 1492 | (define_expand "nvptx_membar_cta" |
1493 | [(set (match_dup 0) | |
1494 | (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR_CTA))] | |
1495 | "" | |
1496 | { | |
1497 | operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); | |
1498 | MEM_VOLATILE_P (operands[0]) = 1; | |
1499 | }) | |
1500 | ||
1501 | (define_insn "*nvptx_membar_cta" | |
1502 | [(set (match_operand:BLK 0 "" "") | |
1503 | (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR_CTA))] | |
1504 | "" | |
1505 | "\\tmembar.cta;" | |
1506 | [(set_attr "predicable" "false")]) | |
1507 | ||
7fce8768 | 1508 | (define_insn "nvptx_nounroll" |
1509 | [(unspec_volatile [(const_int 0)] UNSPECV_NOUNROLL)] | |
1510 | "" | |
1511 | "\\t.pragma \\\"nounroll\\\";" | |
1512 | [(set_attr "predicable" "false")]) | |
ed42202d | 1513 | |
1514 | (define_insn "nvptx_red_partition" | |
1515 | [(set (match_operand:DI 0 "nonimmediate_operand" "=R") | |
aeb8e16a | 1516 | (unspec_volatile:DI [(match_operand:DI 1 "const_int_operand")] |
ed42202d | 1517 | UNSPECV_RED_PART))] |
1518 | "" | |
1519 | { | |
1520 | return nvptx_output_red_partition (operands[0], operands[1]); | |
1521 | } | |
1522 | [(set_attr "predicable" "false")]) |