]>
Commit | Line | Data |
---|---|---|
8ce80784 | 1 | ;; Machine description for NVPTX. |
8e8f6434 | 2 | ;; Copyright (C) 2014-2018 Free Software Foundation, Inc. |
8ce80784 | 3 | ;; Contributed by Bernd Schmidt <bernds@codesourcery.com> |
4 | ;; | |
5 | ;; This file is part of GCC. | |
6 | ;; | |
7 | ;; GCC is free software; you can redistribute it and/or modify | |
8 | ;; it under the terms of the GNU General Public License as published by | |
9 | ;; the Free Software Foundation; either version 3, or (at your option) | |
10 | ;; any later version. | |
11 | ;; | |
12 | ;; GCC is distributed in the hope that it will be useful, | |
13 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | ;; GNU General Public License for more details. | |
16 | ;; | |
17 | ;; You should have received a copy of the GNU General Public License | |
18 | ;; along with GCC; see the file COPYING3. If not see | |
19 | ;; <http://www.gnu.org/licenses/>. | |
20 | ||
21 | (define_c_enum "unspec" [ | |
22 | UNSPEC_ARG_REG | |
8ce80784 | 23 | |
8ce80784 | 24 | UNSPEC_COPYSIGN |
25 | UNSPEC_LOG2 | |
26 | UNSPEC_EXP2 | |
27 | UNSPEC_SIN | |
28 | UNSPEC_COS | |
29 | ||
30 | UNSPEC_FPINT_FLOOR | |
31 | UNSPEC_FPINT_BTRUNC | |
32 | UNSPEC_FPINT_CEIL | |
33 | UNSPEC_FPINT_NEARBYINT | |
34 | ||
35 | UNSPEC_BITREV | |
36 | ||
37 | UNSPEC_ALLOCA | |
38 | ||
7fce8768 | 39 | UNSPEC_SET_SOFTSTACK |
40 | ||
b3787ae4 | 41 | UNSPEC_DIM_SIZE |
42 | ||
b3787ae4 | 43 | UNSPEC_BIT_CONV |
44 | ||
7fce8768 | 45 | UNSPEC_VOTE_BALLOT |
46 | ||
47 | UNSPEC_LANEID | |
48 | ||
b3787ae4 | 49 | UNSPEC_SHUFFLE |
50 | UNSPEC_BR_UNIFIED | |
8ce80784 | 51 | ]) |
52 | ||
53 | (define_c_enum "unspecv" [ | |
54 | UNSPECV_LOCK | |
55 | UNSPECV_CAS | |
56 | UNSPECV_XCHG | |
b3787ae4 | 57 | UNSPECV_BARSYNC |
e163ceb5 | 58 | UNSPECV_MEMBAR |
19b20a5c | 59 | UNSPECV_MEMBAR_CTA |
b3787ae4 | 60 | UNSPECV_DIM_POS |
61 | ||
62 | UNSPECV_FORK | |
63 | UNSPECV_FORKED | |
64 | UNSPECV_JOINING | |
65 | UNSPECV_JOIN | |
7fce8768 | 66 | |
67 | UNSPECV_NOUNROLL | |
1b576300 | 68 | |
69 | UNSPECV_SIMT_ENTER | |
70 | UNSPECV_SIMT_EXIT | |
8ce80784 | 71 | ]) |
72 | ||
73 | (define_attr "subregs_ok" "false,true" | |
74 | (const_string "false")) | |
75 | ||
7fce8768 | 76 | (define_attr "atomic" "false,true" |
77 | (const_string "false")) | |
78 | ||
9224dd1f | 79 | ;; The nvptx operand predicates, in general, don't permit subregs and |
80 | ;; only literal constants, which differ from the generic ones, which | |
81 | ;; permit subregs and symbolc constants (as appropriate) | |
8ce80784 | 82 | (define_predicate "nvptx_register_operand" |
f8e6fa1d | 83 | (match_code "reg") |
8ce80784 | 84 | { |
8ce80784 | 85 | return register_operand (op, mode); |
86 | }) | |
87 | ||
50ad9277 | 88 | (define_predicate "nvptx_nonimmediate_operand" |
f8e6fa1d | 89 | (match_code "mem,reg") |
8ce80784 | 90 | { |
6bd291cd | 91 | return (REG_P (op) ? register_operand (op, mode) |
92 | : memory_operand (op, mode)); | |
8ce80784 | 93 | }) |
94 | ||
8ce80784 | 95 | (define_predicate "nvptx_nonmemory_operand" |
f8e6fa1d | 96 | (match_code "reg,const_int,const_double") |
8ce80784 | 97 | { |
6bd291cd | 98 | return (REG_P (op) ? register_operand (op, mode) |
99 | : immediate_operand (op, mode)); | |
8ce80784 | 100 | }) |
101 | ||
8ce80784 | 102 | (define_predicate "const0_operand" |
103 | (and (match_code "const_int") | |
104 | (match_test "op == const0_rtx"))) | |
105 | ||
106 | ;; True if this operator is valid for predication. | |
107 | (define_predicate "predicate_operator" | |
108 | (match_code "eq,ne")) | |
109 | ||
110 | (define_predicate "ne_operator" | |
111 | (match_code "ne")) | |
112 | ||
113 | (define_predicate "nvptx_comparison_operator" | |
114 | (match_code "eq,ne,le,ge,lt,gt,leu,geu,ltu,gtu")) | |
115 | ||
116 | (define_predicate "nvptx_float_comparison_operator" | |
117 | (match_code "eq,ne,le,ge,lt,gt,uneq,unle,unge,unlt,ungt,unordered,ordered")) | |
118 | ||
119 | ;; Test for a valid operand for a call instruction. | |
f8e6fa1d | 120 | (define_predicate "call_insn_operand" |
8ce80784 | 121 | (match_code "symbol_ref,reg") |
122 | { | |
50ad9277 | 123 | return REG_P (op) || SYMBOL_REF_FUNCTION_P (op); |
8ce80784 | 124 | }) |
125 | ||
126 | ;; Return true if OP is a call with parallel USEs of the argument | |
127 | ;; pseudos. | |
128 | (define_predicate "call_operation" | |
129 | (match_code "parallel") | |
130 | { | |
b27697ca | 131 | int arg_end = XVECLEN (op, 0); |
8ce80784 | 132 | |
b27697ca | 133 | for (int i = 1; i < arg_end; i++) |
8ce80784 | 134 | { |
135 | rtx elt = XVECEXP (op, 0, i); | |
8ce80784 | 136 | |
50ad9277 | 137 | if (GET_CODE (elt) != USE || !REG_P (XEXP (elt, 0))) |
8ce80784 | 138 | return false; |
139 | } | |
140 | return true; | |
141 | }) | |
142 | ||
7fce8768 | 143 | (define_attr "predicable" "false,true" |
144 | (const_string "true")) | |
145 | ||
146 | (define_cond_exec | |
147 | [(match_operator 0 "predicate_operator" | |
148 | [(match_operand:BI 1 "nvptx_register_operand" "") | |
149 | (match_operand:BI 2 "const0_operand" "")])] | |
150 | "" | |
151 | "" | |
152 | ) | |
153 | ||
8ce80784 | 154 | (define_constraint "P0" |
155 | "An integer with the value 0." | |
156 | (and (match_code "const_int") | |
157 | (match_test "ival == 0"))) | |
158 | ||
159 | (define_constraint "P1" | |
160 | "An integer with the value 1." | |
161 | (and (match_code "const_int") | |
162 | (match_test "ival == 1"))) | |
163 | ||
164 | (define_constraint "Pn" | |
165 | "An integer with the value -1." | |
166 | (and (match_code "const_int") | |
167 | (match_test "ival == -1"))) | |
168 | ||
169 | (define_constraint "R" | |
170 | "A pseudo register." | |
171 | (match_code "reg")) | |
172 | ||
173 | (define_constraint "Ia" | |
174 | "Any integer constant." | |
175 | (and (match_code "const_int") (match_test "true"))) | |
176 | ||
177 | (define_mode_iterator QHSDISDFM [QI HI SI DI SF DF]) | |
178 | (define_mode_iterator QHSDIM [QI HI SI DI]) | |
179 | (define_mode_iterator HSDIM [HI SI DI]) | |
180 | (define_mode_iterator BHSDIM [BI HI SI DI]) | |
181 | (define_mode_iterator SDIM [SI DI]) | |
182 | (define_mode_iterator SDISDFM [SI DI SF DF]) | |
183 | (define_mode_iterator QHIM [QI HI]) | |
184 | (define_mode_iterator QHSIM [QI HI SI]) | |
185 | (define_mode_iterator SDFM [SF DF]) | |
186 | (define_mode_iterator SDCM [SC DC]) | |
b3787ae4 | 187 | (define_mode_iterator BITS [SI SF]) |
188 | (define_mode_iterator BITD [DI DF]) | |
ffaae5bd | 189 | (define_mode_iterator VECIM [V2SI V2DI]) |
8ce80784 | 190 | |
191 | ;; This mode iterator allows :P to be used for patterns that operate on | |
192 | ;; pointer-sized quantities. Exactly one of the two alternatives will match. | |
193 | (define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")]) | |
194 | ||
195 | ;; We should get away with not defining memory alternatives, since we don't | |
196 | ;; get variables in this mode and pseudos are never spilled. | |
197 | (define_insn "movbi" | |
198 | [(set (match_operand:BI 0 "nvptx_register_operand" "=R,R,R") | |
199 | (match_operand:BI 1 "nvptx_nonmemory_operand" "R,P0,Pn"))] | |
200 | "" | |
201 | "@ | |
202 | %.\\tmov%t0\\t%0, %1; | |
203 | %.\\tsetp.eq.u32\\t%0, 1, 0; | |
204 | %.\\tsetp.eq.u32\\t%0, 1, 1;") | |
205 | ||
fcac805e | 206 | (define_insn "*mov<mode>_insn" |
207 | [(set (match_operand:VECIM 0 "nonimmediate_operand" "=R,R,m") | |
208 | (match_operand:VECIM 1 "general_operand" "Ri,m,R"))] | |
209 | "!MEM_P (operands[0]) || REG_P (operands[1])" | |
210 | { | |
211 | if (which_alternative == 1) | |
212 | return "%.\\tld%A1%u1\\t%0, %1;"; | |
213 | if (which_alternative == 2) | |
214 | return "%.\\tst%A0%u0\\t%0, %1;"; | |
215 | ||
216 | return nvptx_output_mov_insn (operands[0], operands[1]); | |
217 | } | |
218 | [(set_attr "subregs_ok" "true")]) | |
219 | ||
8ce80784 | 220 | (define_insn "*mov<mode>_insn" |
6bd291cd | 221 | [(set (match_operand:QHSDIM 0 "nonimmediate_operand" "=R,R,m") |
6196ad64 | 222 | (match_operand:QHSDIM 1 "general_operand" "Ri,m,R"))] |
6bd291cd | 223 | "!MEM_P (operands[0]) || REG_P (operands[1])" |
8ce80784 | 224 | { |
6196ad64 | 225 | if (which_alternative == 1) |
8ce80784 | 226 | return "%.\\tld%A1%u1\\t%0, %1;"; |
6196ad64 | 227 | if (which_alternative == 2) |
8ce80784 | 228 | return "%.\\tst%A0%u0\\t%0, %1;"; |
229 | ||
6196ad64 | 230 | return nvptx_output_mov_insn (operands[0], operands[1]); |
8ce80784 | 231 | } |
232 | [(set_attr "subregs_ok" "true")]) | |
233 | ||
234 | (define_insn "*mov<mode>_insn" | |
6bd291cd | 235 | [(set (match_operand:SDFM 0 "nonimmediate_operand" "=R,R,m") |
8ce80784 | 236 | (match_operand:SDFM 1 "general_operand" "RF,m,R"))] |
6196ad64 | 237 | "!MEM_P (operands[0]) || REG_P (operands[1])" |
8ce80784 | 238 | { |
239 | if (which_alternative == 1) | |
240 | return "%.\\tld%A1%u0\\t%0, %1;"; | |
241 | if (which_alternative == 2) | |
242 | return "%.\\tst%A0%u1\\t%0, %1;"; | |
243 | ||
6196ad64 | 244 | return nvptx_output_mov_insn (operands[0], operands[1]); |
8ce80784 | 245 | } |
246 | [(set_attr "subregs_ok" "true")]) | |
247 | ||
248 | (define_insn "load_arg_reg<mode>" | |
249 | [(set (match_operand:QHIM 0 "nvptx_register_operand" "=R") | |
18cefec0 | 250 | (unspec:QHIM [(match_operand 1 "const_int_operand" "n")] |
8ce80784 | 251 | UNSPEC_ARG_REG))] |
252 | "" | |
253 | "%.\\tcvt%t0.u32\\t%0, %%ar%1;") | |
254 | ||
255 | (define_insn "load_arg_reg<mode>" | |
256 | [(set (match_operand:SDISDFM 0 "nvptx_register_operand" "=R") | |
18cefec0 | 257 | (unspec:SDISDFM [(match_operand 1 "const_int_operand" "n")] |
8ce80784 | 258 | UNSPEC_ARG_REG))] |
259 | "" | |
260 | "%.\\tmov%t0\\t%0, %%ar%1;") | |
261 | ||
fcac805e | 262 | (define_expand "mov<mode>" |
263 | [(set (match_operand:VECIM 0 "nonimmediate_operand" "") | |
264 | (match_operand:VECIM 1 "general_operand" ""))] | |
265 | "" | |
266 | { | |
267 | if (MEM_P (operands[0]) && !REG_P (operands[1])) | |
268 | { | |
269 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
270 | emit_move_insn (tmp, operands[1]); | |
271 | emit_move_insn (operands[0], tmp); | |
272 | DONE; | |
273 | } | |
274 | }) | |
275 | ||
8ce80784 | 276 | (define_expand "mov<mode>" |
6bd291cd | 277 | [(set (match_operand:QHSDISDFM 0 "nonimmediate_operand" "") |
8ce80784 | 278 | (match_operand:QHSDISDFM 1 "general_operand" ""))] |
279 | "" | |
280 | { | |
9224dd1f | 281 | if (MEM_P (operands[0]) && !REG_P (operands[1])) |
8ce80784 | 282 | { |
283 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
284 | emit_move_insn (tmp, operands[1]); | |
285 | emit_move_insn (operands[0], tmp); | |
286 | DONE; | |
287 | } | |
1ab41d0d | 288 | |
289 | if (GET_CODE (operands[1]) == LABEL_REF) | |
290 | sorry ("target cannot support label values"); | |
8ce80784 | 291 | }) |
292 | ||
8ce80784 | 293 | (define_insn "zero_extendqihi2" |
294 | [(set (match_operand:HI 0 "nvptx_register_operand" "=R,R") | |
50ad9277 | 295 | (zero_extend:HI (match_operand:QI 1 "nvptx_nonimmediate_operand" "R,m")))] |
8ce80784 | 296 | "" |
297 | "@ | |
298 | %.\\tcvt.u16.u%T1\\t%0, %1; | |
299 | %.\\tld%A1.u8\\t%0, %1;" | |
300 | [(set_attr "subregs_ok" "true")]) | |
301 | ||
302 | (define_insn "zero_extend<mode>si2" | |
303 | [(set (match_operand:SI 0 "nvptx_register_operand" "=R,R") | |
50ad9277 | 304 | (zero_extend:SI (match_operand:QHIM 1 "nvptx_nonimmediate_operand" "R,m")))] |
8ce80784 | 305 | "" |
306 | "@ | |
307 | %.\\tcvt.u32.u%T1\\t%0, %1; | |
308 | %.\\tld%A1.u%T1\\t%0, %1;" | |
309 | [(set_attr "subregs_ok" "true")]) | |
310 | ||
311 | (define_insn "zero_extend<mode>di2" | |
312 | [(set (match_operand:DI 0 "nvptx_register_operand" "=R,R") | |
50ad9277 | 313 | (zero_extend:DI (match_operand:QHSIM 1 "nvptx_nonimmediate_operand" "R,m")))] |
8ce80784 | 314 | "" |
315 | "@ | |
316 | %.\\tcvt.u64.u%T1\\t%0, %1; | |
317 | %.\\tld%A1%u1\\t%0, %1;" | |
318 | [(set_attr "subregs_ok" "true")]) | |
319 | ||
320 | (define_insn "extend<mode>si2" | |
321 | [(set (match_operand:SI 0 "nvptx_register_operand" "=R,R") | |
50ad9277 | 322 | (sign_extend:SI (match_operand:QHIM 1 "nvptx_nonimmediate_operand" "R,m")))] |
8ce80784 | 323 | "" |
324 | "@ | |
325 | %.\\tcvt.s32.s%T1\\t%0, %1; | |
326 | %.\\tld%A1.s%T1\\t%0, %1;" | |
327 | [(set_attr "subregs_ok" "true")]) | |
328 | ||
329 | (define_insn "extend<mode>di2" | |
330 | [(set (match_operand:DI 0 "nvptx_register_operand" "=R,R") | |
50ad9277 | 331 | (sign_extend:DI (match_operand:QHSIM 1 "nvptx_nonimmediate_operand" "R,m")))] |
8ce80784 | 332 | "" |
333 | "@ | |
334 | %.\\tcvt.s64.s%T1\\t%0, %1; | |
335 | %.\\tld%A1.s%T1\\t%0, %1;" | |
336 | [(set_attr "subregs_ok" "true")]) | |
337 | ||
338 | (define_insn "trunchiqi2" | |
50ad9277 | 339 | [(set (match_operand:QI 0 "nvptx_nonimmediate_operand" "=R,m") |
8ce80784 | 340 | (truncate:QI (match_operand:HI 1 "nvptx_register_operand" "R,R")))] |
341 | "" | |
342 | "@ | |
343 | %.\\tcvt%t0.u16\\t%0, %1; | |
344 | %.\\tst%A0.u8\\t%0, %1;" | |
345 | [(set_attr "subregs_ok" "true")]) | |
346 | ||
347 | (define_insn "truncsi<mode>2" | |
50ad9277 | 348 | [(set (match_operand:QHIM 0 "nvptx_nonimmediate_operand" "=R,m") |
8ce80784 | 349 | (truncate:QHIM (match_operand:SI 1 "nvptx_register_operand" "R,R")))] |
350 | "" | |
351 | "@ | |
352 | %.\\tcvt%t0.u32\\t%0, %1; | |
353 | %.\\tst%A0.u%T0\\t%0, %1;" | |
354 | [(set_attr "subregs_ok" "true")]) | |
355 | ||
356 | (define_insn "truncdi<mode>2" | |
50ad9277 | 357 | [(set (match_operand:QHSIM 0 "nvptx_nonimmediate_operand" "=R,m") |
8ce80784 | 358 | (truncate:QHSIM (match_operand:DI 1 "nvptx_register_operand" "R,R")))] |
359 | "" | |
360 | "@ | |
361 | %.\\tcvt%t0.u64\\t%0, %1; | |
362 | %.\\tst%A0.u%T0\\t%0, %1;" | |
363 | [(set_attr "subregs_ok" "true")]) | |
364 | ||
8ce80784 | 365 | ;; Integer arithmetic |
366 | ||
367 | (define_insn "add<mode>3" | |
368 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
369 | (plus:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") | |
370 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] | |
371 | "" | |
372 | "%.\\tadd%t0\\t%0, %1, %2;") | |
373 | ||
374 | (define_insn "sub<mode>3" | |
375 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
376 | (minus:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") | |
377 | (match_operand:HSDIM 2 "nvptx_register_operand" "R")))] | |
378 | "" | |
379 | "%.\\tsub%t0\\t%0, %1, %2;") | |
380 | ||
381 | (define_insn "mul<mode>3" | |
382 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
383 | (mult:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") | |
384 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] | |
385 | "" | |
386 | "%.\\tmul.lo%t0\\t%0, %1, %2;") | |
387 | ||
388 | (define_insn "*mad<mode>3" | |
389 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
390 | (plus:HSDIM (mult:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") | |
391 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")) | |
392 | (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))] | |
393 | "" | |
394 | "%.\\tmad.lo%t0\\t%0, %1, %2, %3;") | |
395 | ||
396 | (define_insn "div<mode>3" | |
397 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
398 | (div:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") | |
399 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] | |
400 | "" | |
401 | "%.\\tdiv.s%T0\\t%0, %1, %2;") | |
402 | ||
403 | (define_insn "udiv<mode>3" | |
404 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
405 | (udiv:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") | |
406 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] | |
407 | "" | |
408 | "%.\\tdiv.u%T0\\t%0, %1, %2;") | |
409 | ||
410 | (define_insn "mod<mode>3" | |
411 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
412 | (mod:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "Ri") | |
413 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] | |
414 | "" | |
415 | "%.\\trem.s%T0\\t%0, %1, %2;") | |
416 | ||
417 | (define_insn "umod<mode>3" | |
418 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
419 | (umod:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "Ri") | |
420 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] | |
421 | "" | |
422 | "%.\\trem.u%T0\\t%0, %1, %2;") | |
423 | ||
424 | (define_insn "smin<mode>3" | |
425 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
426 | (smin:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") | |
427 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] | |
428 | "" | |
429 | "%.\\tmin.s%T0\\t%0, %1, %2;") | |
430 | ||
431 | (define_insn "umin<mode>3" | |
432 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
433 | (umin:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") | |
434 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] | |
435 | "" | |
436 | "%.\\tmin.u%T0\\t%0, %1, %2;") | |
437 | ||
438 | (define_insn "smax<mode>3" | |
439 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
440 | (smax:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") | |
441 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] | |
442 | "" | |
443 | "%.\\tmax.s%T0\\t%0, %1, %2;") | |
444 | ||
445 | (define_insn "umax<mode>3" | |
446 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
447 | (umax:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") | |
448 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] | |
449 | "" | |
450 | "%.\\tmax.u%T0\\t%0, %1, %2;") | |
451 | ||
452 | (define_insn "abs<mode>2" | |
453 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
454 | (abs:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")))] | |
455 | "" | |
456 | "%.\\tabs.s%T0\\t%0, %1;") | |
457 | ||
458 | (define_insn "neg<mode>2" | |
459 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
460 | (neg:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")))] | |
461 | "" | |
462 | "%.\\tneg.s%T0\\t%0, %1;") | |
463 | ||
464 | (define_insn "one_cmpl<mode>2" | |
465 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
466 | (not:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")))] | |
467 | "" | |
468 | "%.\\tnot.b%T0\\t%0, %1;") | |
469 | ||
470 | (define_insn "bitrev<mode>2" | |
471 | [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") | |
472 | (unspec:SDIM [(match_operand:SDIM 1 "nvptx_register_operand" "R")] | |
473 | UNSPEC_BITREV))] | |
474 | "" | |
475 | "%.\\tbrev.b%T0\\t%0, %1;") | |
476 | ||
477 | (define_insn "clz<mode>2" | |
478 | [(set (match_operand:SI 0 "nvptx_register_operand" "=R") | |
479 | (clz:SI (match_operand:SDIM 1 "nvptx_register_operand" "R")))] | |
480 | "" | |
b594ca2b | 481 | "%.\\tclz.b%T1\\t%0, %1;") |
8ce80784 | 482 | |
483 | (define_expand "ctz<mode>2" | |
484 | [(set (match_operand:SI 0 "nvptx_register_operand" "") | |
485 | (ctz:SI (match_operand:SDIM 1 "nvptx_register_operand" "")))] | |
486 | "" | |
487 | { | |
488 | rtx tmpreg = gen_reg_rtx (<MODE>mode); | |
489 | emit_insn (gen_bitrev<mode>2 (tmpreg, operands[1])); | |
490 | emit_insn (gen_clz<mode>2 (operands[0], tmpreg)); | |
491 | DONE; | |
492 | }) | |
493 | ||
494 | ;; Shifts | |
495 | ||
496 | (define_insn "ashl<mode>3" | |
497 | [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") | |
498 | (ashift:SDIM (match_operand:SDIM 1 "nvptx_register_operand" "R") | |
499 | (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")))] | |
500 | "" | |
501 | "%.\\tshl.b%T0\\t%0, %1, %2;") | |
502 | ||
503 | (define_insn "ashr<mode>3" | |
504 | [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") | |
505 | (ashiftrt:SDIM (match_operand:SDIM 1 "nvptx_register_operand" "R") | |
506 | (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")))] | |
507 | "" | |
508 | "%.\\tshr.s%T0\\t%0, %1, %2;") | |
509 | ||
510 | (define_insn "lshr<mode>3" | |
511 | [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") | |
512 | (lshiftrt:SDIM (match_operand:SDIM 1 "nvptx_register_operand" "R") | |
513 | (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")))] | |
514 | "" | |
515 | "%.\\tshr.u%T0\\t%0, %1, %2;") | |
516 | ||
517 | ;; Logical operations | |
518 | ||
519 | (define_insn "and<mode>3" | |
520 | [(set (match_operand:BHSDIM 0 "nvptx_register_operand" "=R") | |
521 | (and:BHSDIM (match_operand:BHSDIM 1 "nvptx_register_operand" "R") | |
522 | (match_operand:BHSDIM 2 "nvptx_nonmemory_operand" "Ri")))] | |
523 | "" | |
524 | "%.\\tand.b%T0\\t%0, %1, %2;") | |
525 | ||
526 | (define_insn "ior<mode>3" | |
527 | [(set (match_operand:BHSDIM 0 "nvptx_register_operand" "=R") | |
528 | (ior:BHSDIM (match_operand:BHSDIM 1 "nvptx_register_operand" "R") | |
529 | (match_operand:BHSDIM 2 "nvptx_nonmemory_operand" "Ri")))] | |
530 | "" | |
531 | "%.\\tor.b%T0\\t%0, %1, %2;") | |
532 | ||
533 | (define_insn "xor<mode>3" | |
534 | [(set (match_operand:BHSDIM 0 "nvptx_register_operand" "=R") | |
535 | (xor:BHSDIM (match_operand:BHSDIM 1 "nvptx_register_operand" "R") | |
536 | (match_operand:BHSDIM 2 "nvptx_nonmemory_operand" "Ri")))] | |
537 | "" | |
538 | "%.\\txor.b%T0\\t%0, %1, %2;") | |
539 | ||
540 | ;; Comparisons and branches | |
541 | ||
542 | (define_insn "*cmp<mode>" | |
543 | [(set (match_operand:BI 0 "nvptx_register_operand" "=R") | |
544 | (match_operator:BI 1 "nvptx_comparison_operator" | |
545 | [(match_operand:HSDIM 2 "nvptx_register_operand" "R") | |
546 | (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))] | |
547 | "" | |
b27697ca | 548 | "%.\\tsetp%c1\\t%0, %2, %3;") |
8ce80784 | 549 | |
550 | (define_insn "*cmp<mode>" | |
551 | [(set (match_operand:BI 0 "nvptx_register_operand" "=R") | |
552 | (match_operator:BI 1 "nvptx_float_comparison_operator" | |
553 | [(match_operand:SDFM 2 "nvptx_register_operand" "R") | |
554 | (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))] | |
555 | "" | |
b27697ca | 556 | "%.\\tsetp%c1\\t%0, %2, %3;") |
8ce80784 | 557 | |
558 | (define_insn "jump" | |
559 | [(set (pc) | |
560 | (label_ref (match_operand 0 "" "")))] | |
561 | "" | |
562 | "%.\\tbra\\t%l0;") | |
563 | ||
564 | (define_insn "br_true" | |
565 | [(set (pc) | |
566 | (if_then_else (ne (match_operand:BI 0 "nvptx_register_operand" "R") | |
567 | (const_int 0)) | |
568 | (label_ref (match_operand 1 "" "")) | |
569 | (pc)))] | |
570 | "" | |
7fce8768 | 571 | "%j0\\tbra\\t%l1;" |
572 | [(set_attr "predicable" "false")]) | |
8ce80784 | 573 | |
574 | (define_insn "br_false" | |
575 | [(set (pc) | |
576 | (if_then_else (eq (match_operand:BI 0 "nvptx_register_operand" "R") | |
577 | (const_int 0)) | |
578 | (label_ref (match_operand 1 "" "")) | |
579 | (pc)))] | |
580 | "" | |
7fce8768 | 581 | "%J0\\tbra\\t%l1;" |
582 | [(set_attr "predicable" "false")]) | |
8ce80784 | 583 | |
b3787ae4 | 584 | ;; unified conditional branch |
585 | (define_insn "br_true_uni" | |
586 | [(set (pc) (if_then_else | |
587 | (ne (unspec:BI [(match_operand:BI 0 "nvptx_register_operand" "R")] | |
588 | UNSPEC_BR_UNIFIED) (const_int 0)) | |
589 | (label_ref (match_operand 1 "" "")) (pc)))] | |
590 | "" | |
7fce8768 | 591 | "%j0\\tbra.uni\\t%l1;" |
592 | [(set_attr "predicable" "false")]) | |
b3787ae4 | 593 | |
594 | (define_insn "br_false_uni" | |
595 | [(set (pc) (if_then_else | |
596 | (eq (unspec:BI [(match_operand:BI 0 "nvptx_register_operand" "R")] | |
597 | UNSPEC_BR_UNIFIED) (const_int 0)) | |
598 | (label_ref (match_operand 1 "" "")) (pc)))] | |
599 | "" | |
7fce8768 | 600 | "%J0\\tbra.uni\\t%l1;" |
601 | [(set_attr "predicable" "false")]) | |
b3787ae4 | 602 | |
8ce80784 | 603 | (define_expand "cbranch<mode>4" |
604 | [(set (pc) | |
605 | (if_then_else (match_operator 0 "nvptx_comparison_operator" | |
606 | [(match_operand:HSDIM 1 "nvptx_register_operand" "") | |
9d846e45 | 607 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "")]) |
8ce80784 | 608 | (label_ref (match_operand 3 "" "")) |
609 | (pc)))] | |
610 | "" | |
611 | { | |
612 | rtx t = nvptx_expand_compare (operands[0]); | |
613 | operands[0] = t; | |
614 | operands[1] = XEXP (t, 0); | |
615 | operands[2] = XEXP (t, 1); | |
616 | }) | |
617 | ||
618 | (define_expand "cbranch<mode>4" | |
619 | [(set (pc) | |
620 | (if_then_else (match_operator 0 "nvptx_float_comparison_operator" | |
621 | [(match_operand:SDFM 1 "nvptx_register_operand" "") | |
9d846e45 | 622 | (match_operand:SDFM 2 "nvptx_nonmemory_operand" "")]) |
8ce80784 | 623 | (label_ref (match_operand 3 "" "")) |
624 | (pc)))] | |
625 | "" | |
626 | { | |
627 | rtx t = nvptx_expand_compare (operands[0]); | |
628 | operands[0] = t; | |
629 | operands[1] = XEXP (t, 0); | |
630 | operands[2] = XEXP (t, 1); | |
631 | }) | |
632 | ||
633 | (define_expand "cbranchbi4" | |
634 | [(set (pc) | |
635 | (if_then_else (match_operator 0 "predicate_operator" | |
636 | [(match_operand:BI 1 "nvptx_register_operand" "") | |
637 | (match_operand:BI 2 "const0_operand" "")]) | |
638 | (label_ref (match_operand 3 "" "")) | |
639 | (pc)))] | |
640 | "" | |
641 | "") | |
642 | ||
643 | ;; Conditional stores | |
644 | ||
645 | (define_insn "setcc_from_bi" | |
646 | [(set (match_operand:SI 0 "nvptx_register_operand" "=R") | |
647 | (ne:SI (match_operand:BI 1 "nvptx_register_operand" "R") | |
648 | (const_int 0)))] | |
649 | "" | |
650 | "%.\\tselp%t0 %0,-1,0,%1;") | |
651 | ||
75e09431 | 652 | (define_insn "sel_true<mode>" |
653 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
654 | (if_then_else:HSDIM | |
655 | (ne (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0)) | |
656 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri") | |
657 | (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))] | |
658 | "" | |
659 | "%.\\tselp%t0\\t%0, %2, %3, %1;") | |
660 | ||
661 | (define_insn "sel_true<mode>" | |
662 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
663 | (if_then_else:SDFM | |
664 | (ne (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0)) | |
665 | (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF") | |
666 | (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))] | |
667 | "" | |
668 | "%.\\tselp%t0\\t%0, %2, %3, %1;") | |
669 | ||
670 | (define_insn "sel_false<mode>" | |
671 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
672 | (if_then_else:HSDIM | |
673 | (eq (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0)) | |
674 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri") | |
675 | (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))] | |
676 | "" | |
677 | "%.\\tselp%t0\\t%0, %3, %2, %1;") | |
678 | ||
679 | (define_insn "sel_false<mode>" | |
680 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
681 | (if_then_else:SDFM | |
682 | (eq (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0)) | |
683 | (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF") | |
684 | (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))] | |
685 | "" | |
686 | "%.\\tselp%t0\\t%0, %3, %2, %1;") | |
687 | ||
8ce80784 | 688 | (define_insn "setcc_int<mode>" |
689 | [(set (match_operand:SI 0 "nvptx_register_operand" "=R") | |
690 | (match_operator:SI 1 "nvptx_comparison_operator" | |
75e09431 | 691 | [(match_operand:HSDIM 2 "nvptx_register_operand" "R") |
692 | (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))] | |
8ce80784 | 693 | "" |
b27697ca | 694 | "%.\\tset%t0%c1\\t%0, %2, %3;") |
8ce80784 | 695 | |
696 | (define_insn "setcc_int<mode>" | |
697 | [(set (match_operand:SI 0 "nvptx_register_operand" "=R") | |
698 | (match_operator:SI 1 "nvptx_float_comparison_operator" | |
75e09431 | 699 | [(match_operand:SDFM 2 "nvptx_register_operand" "R") |
700 | (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))] | |
8ce80784 | 701 | "" |
b27697ca | 702 | "%.\\tset%t0%c1\\t%0, %2, %3;") |
8ce80784 | 703 | |
704 | (define_insn "setcc_float<mode>" | |
705 | [(set (match_operand:SF 0 "nvptx_register_operand" "=R") | |
706 | (match_operator:SF 1 "nvptx_comparison_operator" | |
75e09431 | 707 | [(match_operand:HSDIM 2 "nvptx_register_operand" "R") |
708 | (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))] | |
8ce80784 | 709 | "" |
b27697ca | 710 | "%.\\tset%t0%c1\\t%0, %2, %3;") |
8ce80784 | 711 | |
712 | (define_insn "setcc_float<mode>" | |
713 | [(set (match_operand:SF 0 "nvptx_register_operand" "=R") | |
714 | (match_operator:SF 1 "nvptx_float_comparison_operator" | |
75e09431 | 715 | [(match_operand:SDFM 2 "nvptx_register_operand" "R") |
716 | (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))] | |
8ce80784 | 717 | "" |
b27697ca | 718 | "%.\\tset%t0%c1\\t%0, %2, %3;") |
8ce80784 | 719 | |
720 | (define_expand "cstorebi4" | |
721 | [(set (match_operand:SI 0 "nvptx_register_operand") | |
722 | (match_operator:SI 1 "ne_operator" | |
723 | [(match_operand:BI 2 "nvptx_register_operand") | |
724 | (match_operand:BI 3 "const0_operand")]))] | |
725 | "" | |
726 | "") | |
727 | ||
728 | (define_expand "cstore<mode>4" | |
729 | [(set (match_operand:SI 0 "nvptx_register_operand") | |
730 | (match_operator:SI 1 "nvptx_comparison_operator" | |
731 | [(match_operand:HSDIM 2 "nvptx_register_operand") | |
732 | (match_operand:HSDIM 3 "nvptx_nonmemory_operand")]))] | |
733 | "" | |
734 | "") | |
735 | ||
736 | (define_expand "cstore<mode>4" | |
737 | [(set (match_operand:SI 0 "nvptx_register_operand") | |
738 | (match_operator:SI 1 "nvptx_float_comparison_operator" | |
739 | [(match_operand:SDFM 2 "nvptx_register_operand") | |
740 | (match_operand:SDFM 3 "nvptx_nonmemory_operand")]))] | |
741 | "" | |
742 | "") | |
743 | ||
744 | ;; Calls | |
745 | ||
746 | (define_insn "call_insn" | |
747 | [(match_parallel 2 "call_operation" | |
f8e6fa1d | 748 | [(call (mem:QI (match_operand 0 "call_insn_operand" "Rs")) |
8ce80784 | 749 | (match_operand 1))])] |
750 | "" | |
751 | { | |
752 | return nvptx_output_call_insn (insn, NULL_RTX, operands[0]); | |
753 | }) | |
754 | ||
755 | (define_insn "call_value_insn" | |
756 | [(match_parallel 3 "call_operation" | |
757 | [(set (match_operand 0 "nvptx_register_operand" "=R") | |
f8e6fa1d | 758 | (call (mem:QI (match_operand 1 "call_insn_operand" "Rs")) |
8ce80784 | 759 | (match_operand 2)))])] |
760 | "" | |
761 | { | |
762 | return nvptx_output_call_insn (insn, operands[0], operands[1]); | |
763 | }) | |
764 | ||
765 | (define_expand "call" | |
766 | [(match_operand 0 "" "")] | |
767 | "" | |
768 | { | |
769 | nvptx_expand_call (NULL_RTX, operands[0]); | |
770 | DONE; | |
771 | }) | |
772 | ||
773 | (define_expand "call_value" | |
774 | [(match_operand 0 "" "") | |
775 | (match_operand 1 "" "")] | |
776 | "" | |
777 | { | |
778 | nvptx_expand_call (operands[0], operands[1]); | |
779 | DONE; | |
780 | }) | |
781 | ||
782 | ;; Floating point arithmetic. | |
783 | ||
784 | (define_insn "add<mode>3" | |
785 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
786 | (plus:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R") | |
787 | (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))] | |
788 | "" | |
789 | "%.\\tadd%t0\\t%0, %1, %2;") | |
790 | ||
791 | (define_insn "sub<mode>3" | |
792 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
793 | (minus:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R") | |
794 | (match_operand:SDFM 2 "nvptx_register_operand" "R")))] | |
795 | "" | |
796 | "%.\\tsub%t0\\t%0, %1, %2;") | |
797 | ||
798 | (define_insn "mul<mode>3" | |
799 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
800 | (mult:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R") | |
801 | (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))] | |
802 | "" | |
803 | "%.\\tmul%t0\\t%0, %1, %2;") | |
804 | ||
805 | (define_insn "fma<mode>4" | |
806 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
807 | (fma:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R") | |
808 | (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF") | |
809 | (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))] | |
810 | "" | |
811 | "%.\\tfma%#%t0\\t%0, %1, %2, %3;") | |
812 | ||
813 | (define_insn "div<mode>3" | |
814 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
815 | (div:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R") | |
816 | (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))] | |
817 | "" | |
818 | "%.\\tdiv%#%t0\\t%0, %1, %2;") | |
819 | ||
820 | (define_insn "copysign<mode>3" | |
821 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
822 | (unspec:SDFM [(match_operand:SDFM 1 "nvptx_register_operand" "R") | |
823 | (match_operand:SDFM 2 "nvptx_register_operand" "R")] | |
824 | UNSPEC_COPYSIGN))] | |
825 | "" | |
826 | "%.\\tcopysign%t0\\t%0, %2, %1;") | |
827 | ||
828 | (define_insn "smin<mode>3" | |
829 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
830 | (smin:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R") | |
831 | (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))] | |
832 | "" | |
833 | "%.\\tmin%t0\\t%0, %1, %2;") | |
834 | ||
835 | (define_insn "smax<mode>3" | |
836 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
837 | (smax:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R") | |
838 | (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))] | |
839 | "" | |
840 | "%.\\tmax%t0\\t%0, %1, %2;") | |
841 | ||
842 | (define_insn "abs<mode>2" | |
843 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
844 | (abs:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")))] | |
845 | "" | |
846 | "%.\\tabs%t0\\t%0, %1;") | |
847 | ||
848 | (define_insn "neg<mode>2" | |
849 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
850 | (neg:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")))] | |
851 | "" | |
852 | "%.\\tneg%t0\\t%0, %1;") | |
853 | ||
854 | (define_insn "sqrt<mode>2" | |
855 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
856 | (sqrt:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")))] | |
857 | "" | |
858 | "%.\\tsqrt%#%t0\\t%0, %1;") | |
859 | ||
3d380077 | 860 | (define_expand "sincossf3" |
861 | [(set (match_operand:SF 0 "nvptx_register_operand" "=R") | |
862 | (unspec:SF [(match_operand:SF 2 "nvptx_register_operand" "R")] | |
863 | UNSPEC_COS)) | |
864 | (set (match_operand:SF 1 "nvptx_register_operand" "=R") | |
865 | (unspec:SF [(match_dup 2)] UNSPEC_SIN))] | |
866 | "flag_unsafe_math_optimizations" | |
867 | { | |
868 | operands[2] = make_safe_from (operands[2], operands[0]); | |
869 | }) | |
870 | ||
8ce80784 | 871 | (define_insn "sinsf2" |
872 | [(set (match_operand:SF 0 "nvptx_register_operand" "=R") | |
873 | (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")] | |
874 | UNSPEC_SIN))] | |
875 | "flag_unsafe_math_optimizations" | |
876 | "%.\\tsin.approx%t0\\t%0, %1;") | |
877 | ||
878 | (define_insn "cossf2" | |
879 | [(set (match_operand:SF 0 "nvptx_register_operand" "=R") | |
880 | (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")] | |
881 | UNSPEC_COS))] | |
882 | "flag_unsafe_math_optimizations" | |
883 | "%.\\tcos.approx%t0\\t%0, %1;") | |
884 | ||
885 | (define_insn "log2sf2" | |
886 | [(set (match_operand:SF 0 "nvptx_register_operand" "=R") | |
887 | (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")] | |
888 | UNSPEC_LOG2))] | |
889 | "flag_unsafe_math_optimizations" | |
890 | "%.\\tlg2.approx%t0\\t%0, %1;") | |
891 | ||
892 | (define_insn "exp2sf2" | |
893 | [(set (match_operand:SF 0 "nvptx_register_operand" "=R") | |
894 | (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")] | |
895 | UNSPEC_EXP2))] | |
896 | "flag_unsafe_math_optimizations" | |
897 | "%.\\tex2.approx%t0\\t%0, %1;") | |
898 | ||
899 | ;; Conversions involving floating point | |
900 | ||
901 | (define_insn "extendsfdf2" | |
902 | [(set (match_operand:DF 0 "nvptx_register_operand" "=R") | |
903 | (float_extend:DF (match_operand:SF 1 "nvptx_register_operand" "R")))] | |
904 | "" | |
905 | "%.\\tcvt%t0%t1\\t%0, %1;") | |
906 | ||
907 | (define_insn "truncdfsf2" | |
908 | [(set (match_operand:SF 0 "nvptx_register_operand" "=R") | |
909 | (float_truncate:SF (match_operand:DF 1 "nvptx_register_operand" "R")))] | |
910 | "" | |
911 | "%.\\tcvt%#%t0%t1\\t%0, %1;") | |
912 | ||
913 | (define_insn "floatunssi<mode>2" | |
914 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
915 | (unsigned_float:SDFM (match_operand:SI 1 "nvptx_register_operand" "R")))] | |
916 | "" | |
917 | "%.\\tcvt%#%t0.u%T1\\t%0, %1;") | |
918 | ||
919 | (define_insn "floatsi<mode>2" | |
920 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
921 | (float:SDFM (match_operand:SI 1 "nvptx_register_operand" "R")))] | |
922 | "" | |
923 | "%.\\tcvt%#%t0.s%T1\\t%0, %1;") | |
924 | ||
925 | (define_insn "floatunsdi<mode>2" | |
926 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
927 | (unsigned_float:SDFM (match_operand:DI 1 "nvptx_register_operand" "R")))] | |
928 | "" | |
929 | "%.\\tcvt%#%t0.u%T1\\t%0, %1;") | |
930 | ||
931 | (define_insn "floatdi<mode>2" | |
932 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
933 | (float:SDFM (match_operand:DI 1 "nvptx_register_operand" "R")))] | |
934 | "" | |
935 | "%.\\tcvt%#%t0.s%T1\\t%0, %1;") | |
936 | ||
937 | (define_insn "fixuns_trunc<mode>si2" | |
938 | [(set (match_operand:SI 0 "nvptx_register_operand" "=R") | |
939 | (unsigned_fix:SI (match_operand:SDFM 1 "nvptx_register_operand" "R")))] | |
940 | "" | |
941 | "%.\\tcvt.rzi.u%T0%t1\\t%0, %1;") | |
942 | ||
943 | (define_insn "fix_trunc<mode>si2" | |
944 | [(set (match_operand:SI 0 "nvptx_register_operand" "=R") | |
945 | (fix:SI (match_operand:SDFM 1 "nvptx_register_operand" "R")))] | |
946 | "" | |
947 | "%.\\tcvt.rzi.s%T0%t1\\t%0, %1;") | |
948 | ||
949 | (define_insn "fixuns_trunc<mode>di2" | |
950 | [(set (match_operand:DI 0 "nvptx_register_operand" "=R") | |
951 | (unsigned_fix:DI (match_operand:SDFM 1 "nvptx_register_operand" "R")))] | |
952 | "" | |
953 | "%.\\tcvt.rzi.u%T0%t1\\t%0, %1;") | |
954 | ||
955 | (define_insn "fix_trunc<mode>di2" | |
956 | [(set (match_operand:DI 0 "nvptx_register_operand" "=R") | |
957 | (fix:DI (match_operand:SDFM 1 "nvptx_register_operand" "R")))] | |
958 | "" | |
959 | "%.\\tcvt.rzi.s%T0%t1\\t%0, %1;") | |
960 | ||
961 | (define_int_iterator FPINT [UNSPEC_FPINT_FLOOR UNSPEC_FPINT_BTRUNC | |
962 | UNSPEC_FPINT_CEIL UNSPEC_FPINT_NEARBYINT]) | |
963 | (define_int_attr fpint_name [(UNSPEC_FPINT_FLOOR "floor") | |
964 | (UNSPEC_FPINT_BTRUNC "btrunc") | |
965 | (UNSPEC_FPINT_CEIL "ceil") | |
966 | (UNSPEC_FPINT_NEARBYINT "nearbyint")]) | |
967 | (define_int_attr fpint_roundingmode [(UNSPEC_FPINT_FLOOR ".rmi") | |
968 | (UNSPEC_FPINT_BTRUNC ".rzi") | |
969 | (UNSPEC_FPINT_CEIL ".rpi") | |
970 | (UNSPEC_FPINT_NEARBYINT "%#i")]) | |
971 | ||
972 | (define_insn "<FPINT:fpint_name><SDFM:mode>2" | |
973 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
974 | (unspec:SDFM [(match_operand:SDFM 1 "nvptx_register_operand" "R")] | |
975 | FPINT))] | |
976 | "" | |
977 | "%.\\tcvt<FPINT:fpint_roundingmode>%t0%t1\\t%0, %1;") | |
978 | ||
979 | (define_int_iterator FPINT2 [UNSPEC_FPINT_FLOOR UNSPEC_FPINT_CEIL]) | |
980 | (define_int_attr fpint2_name [(UNSPEC_FPINT_FLOOR "lfloor") | |
981 | (UNSPEC_FPINT_CEIL "lceil")]) | |
982 | (define_int_attr fpint2_roundingmode [(UNSPEC_FPINT_FLOOR ".rmi") | |
983 | (UNSPEC_FPINT_CEIL ".rpi")]) | |
984 | ||
985 | (define_insn "<FPINT2:fpint2_name><SDFM:mode><SDIM:mode>2" | |
986 | [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") | |
987 | (unspec:SDIM [(match_operand:SDFM 1 "nvptx_register_operand" "R")] | |
988 | FPINT2))] | |
989 | "" | |
990 | "%.\\tcvt<FPINT2:fpint2_roundingmode>.s%T0%t1\\t%0, %1;") | |
991 | ||
992 | ;; Miscellaneous | |
993 | ||
994 | (define_insn "nop" | |
995 | [(const_int 0)] | |
996 | "" | |
997 | "") | |
998 | ||
f3c313db | 999 | (define_insn "exit" |
1000 | [(const_int 1)] | |
1001 | "" | |
1002 | "exit;") | |
1003 | ||
47282587 | 1004 | (define_insn "fake_nop" |
1005 | [(const_int 2)] | |
1006 | "" | |
1007 | "{ | |
1008 | .reg .u32 %%nop_src; | |
1009 | .reg .u32 %%nop_dst; | |
1010 | mov.u32 %%nop_dst, %%nop_src; | |
1011 | }") | |
1012 | ||
8ce80784 | 1013 | (define_insn "return" |
1014 | [(return)] | |
1015 | "" | |
1016 | { | |
1017 | return nvptx_output_return (); | |
7fce8768 | 1018 | } |
1019 | [(set_attr "predicable" "false")]) | |
8ce80784 | 1020 | |
1021 | (define_expand "epilogue" | |
1022 | [(clobber (const_int 0))] | |
1023 | "" | |
1024 | { | |
7fce8768 | 1025 | if (TARGET_SOFT_STACK) |
1026 | emit_insn (gen_set_softstack_insn (gen_rtx_REG (Pmode, | |
1027 | SOFTSTACK_PREV_REGNUM))); | |
8ce80784 | 1028 | emit_jump_insn (gen_return ()); |
1029 | DONE; | |
1030 | }) | |
1031 | ||
1032 | (define_expand "nonlocal_goto" | |
1033 | [(match_operand 0 "" "") | |
1034 | (match_operand 1 "" "") | |
1035 | (match_operand 2 "" "") | |
1036 | (match_operand 3 "" "")] | |
1037 | "" | |
1038 | { | |
1039 | sorry ("target cannot support nonlocal goto."); | |
1040 | emit_insn (gen_nop ()); | |
1041 | DONE; | |
1042 | }) | |
1043 | ||
1044 | (define_expand "nonlocal_goto_receiver" | |
1045 | [(const_int 0)] | |
1046 | "" | |
1047 | { | |
1048 | sorry ("target cannot support nonlocal goto."); | |
1049 | }) | |
1050 | ||
ed20400d | 1051 | (define_expand "allocate_stack" |
1052 | [(match_operand 0 "nvptx_register_operand") | |
1053 | (match_operand 1 "nvptx_register_operand")] | |
1054 | "" | |
1055 | { | |
7fce8768 | 1056 | if (TARGET_SOFT_STACK) |
1057 | { | |
1058 | emit_move_insn (stack_pointer_rtx, | |
1059 | gen_rtx_MINUS (Pmode, stack_pointer_rtx, operands[1])); | |
1060 | emit_insn (gen_set_softstack_insn (stack_pointer_rtx)); | |
1061 | emit_move_insn (operands[0], virtual_stack_dynamic_rtx); | |
1062 | DONE; | |
1063 | } | |
f289122f | 1064 | /* The ptx documentation specifies an alloca intrinsic (for 32 bit |
1065 | only) but notes it is not implemented. The assembler emits a | |
1066 | confused error message. Issue a blunt one now instead. */ | |
1067 | sorry ("target cannot support alloca."); | |
1068 | emit_insn (gen_nop ()); | |
1069 | DONE; | |
ed20400d | 1070 | }) |
1071 | ||
7fce8768 | 1072 | (define_insn "set_softstack_insn" |
1073 | [(unspec [(match_operand 0 "nvptx_register_operand" "R")] | |
1074 | UNSPEC_SET_SOFTSTACK)] | |
1075 | "TARGET_SOFT_STACK" | |
1076 | { | |
1077 | return nvptx_output_set_softstack (REGNO (operands[0])); | |
1078 | }) | |
8ce80784 | 1079 | |
1080 | (define_expand "restore_stack_block" | |
1081 | [(match_operand 0 "register_operand" "") | |
1082 | (match_operand 1 "register_operand" "")] | |
1083 | "" | |
1084 | { | |
7fce8768 | 1085 | if (TARGET_SOFT_STACK) |
1086 | { | |
1087 | emit_move_insn (operands[0], operands[1]); | |
1088 | emit_insn (gen_set_softstack_insn (operands[0])); | |
1089 | } | |
8ce80784 | 1090 | DONE; |
1091 | }) | |
1092 | ||
1093 | (define_expand "restore_stack_function" | |
1094 | [(match_operand 0 "register_operand" "") | |
1095 | (match_operand 1 "register_operand" "")] | |
1096 | "" | |
1097 | { | |
1098 | DONE; | |
1099 | }) | |
1100 | ||
1101 | (define_insn "trap" | |
1102 | [(trap_if (const_int 1) (const_int 0))] | |
1103 | "" | |
38017cab | 1104 | "trap; exit;") |
8ce80784 | 1105 | |
1106 | (define_insn "trap_if_true" | |
1107 | [(trap_if (ne (match_operand:BI 0 "nvptx_register_operand" "R") | |
1108 | (const_int 0)) | |
1109 | (const_int 0))] | |
1110 | "" | |
38017cab | 1111 | "%j0 trap; %j0 exit;" |
7fce8768 | 1112 | [(set_attr "predicable" "false")]) |
8ce80784 | 1113 | |
1114 | (define_insn "trap_if_false" | |
1115 | [(trap_if (eq (match_operand:BI 0 "nvptx_register_operand" "R") | |
1116 | (const_int 0)) | |
1117 | (const_int 0))] | |
1118 | "" | |
38017cab | 1119 | "%J0 trap; %J0 exit;" |
7fce8768 | 1120 | [(set_attr "predicable" "false")]) |
8ce80784 | 1121 | |
1122 | (define_expand "ctrap<mode>4" | |
1123 | [(trap_if (match_operator 0 "nvptx_comparison_operator" | |
1124 | [(match_operand:SDIM 1 "nvptx_register_operand") | |
1125 | (match_operand:SDIM 2 "nvptx_nonmemory_operand")]) | |
25ce1bcb | 1126 | (match_operand 3 "const0_operand"))] |
8ce80784 | 1127 | "" |
1128 | { | |
1129 | rtx t = nvptx_expand_compare (operands[0]); | |
1130 | emit_insn (gen_trap_if_true (t)); | |
1131 | DONE; | |
1132 | }) | |
1133 | ||
b3787ae4 | 1134 | (define_insn "oacc_dim_size" |
1135 | [(set (match_operand:SI 0 "nvptx_register_operand" "") | |
1136 | (unspec:SI [(match_operand:SI 1 "const_int_operand" "")] | |
1137 | UNSPEC_DIM_SIZE))] | |
8ce80784 | 1138 | "" |
b3787ae4 | 1139 | { |
1140 | static const char *const asms[] = | |
1141 | { /* Must match oacc_loop_levels ordering. */ | |
1142 | "%.\\tmov.u32\\t%0, %%nctaid.x;", /* gang */ | |
1143 | "%.\\tmov.u32\\t%0, %%ntid.y;", /* worker */ | |
1144 | "%.\\tmov.u32\\t%0, %%ntid.x;", /* vector */ | |
1145 | }; | |
1146 | return asms[INTVAL (operands[1])]; | |
1147 | }) | |
8ce80784 | 1148 | |
b3787ae4 | 1149 | (define_insn "oacc_dim_pos" |
8ce80784 | 1150 | [(set (match_operand:SI 0 "nvptx_register_operand" "") |
b3787ae4 | 1151 | (unspec_volatile:SI [(match_operand:SI 1 "const_int_operand" "")] |
1152 | UNSPECV_DIM_POS))] | |
8ce80784 | 1153 | "" |
1154 | { | |
b3787ae4 | 1155 | static const char *const asms[] = |
1156 | { /* Must match oacc_loop_levels ordering. */ | |
1157 | "%.\\tmov.u32\\t%0, %%ctaid.x;", /* gang */ | |
1158 | "%.\\tmov.u32\\t%0, %%tid.y;", /* worker */ | |
1159 | "%.\\tmov.u32\\t%0, %%tid.x;", /* vector */ | |
1160 | }; | |
1161 | return asms[INTVAL (operands[1])]; | |
8ce80784 | 1162 | }) |
1163 | ||
b3787ae4 | 1164 | (define_insn "nvptx_fork" |
1165 | [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")] | |
1166 | UNSPECV_FORK)] | |
8ce80784 | 1167 | "" |
b3787ae4 | 1168 | "// fork %0;" |
7fce8768 | 1169 | [(set_attr "predicable" "false")]) |
8ce80784 | 1170 | |
b3787ae4 | 1171 | (define_insn "nvptx_forked" |
1172 | [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")] | |
1173 | UNSPECV_FORKED)] | |
1174 | "" | |
1175 | "// forked %0;" | |
7fce8768 | 1176 | [(set_attr "predicable" "false")]) |
b3787ae4 | 1177 | |
1178 | (define_insn "nvptx_joining" | |
1179 | [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")] | |
1180 | UNSPECV_JOINING)] | |
1181 | "" | |
1182 | "// joining %0;" | |
7fce8768 | 1183 | [(set_attr "predicable" "false")]) |
b3787ae4 | 1184 | |
1185 | (define_insn "nvptx_join" | |
1186 | [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")] | |
1187 | UNSPECV_JOIN)] | |
1188 | "" | |
1189 | "// join %0;" | |
7fce8768 | 1190 | [(set_attr "predicable" "false")]) |
b3787ae4 | 1191 | |
1192 | (define_expand "oacc_fork" | |
1193 | [(set (match_operand:SI 0 "nvptx_nonmemory_operand" "") | |
6bd291cd | 1194 | (match_operand:SI 1 "general_operand" "")) |
b3787ae4 | 1195 | (unspec_volatile:SI [(match_operand:SI 2 "const_int_operand" "")] |
1196 | UNSPECV_FORKED)] | |
8ce80784 | 1197 | "" |
1198 | { | |
b3787ae4 | 1199 | if (operands[0] != const0_rtx) |
1200 | emit_move_insn (operands[0], operands[1]); | |
1201 | nvptx_expand_oacc_fork (INTVAL (operands[2])); | |
1202 | DONE; | |
1203 | }) | |
1204 | ||
1205 | (define_expand "oacc_join" | |
1206 | [(set (match_operand:SI 0 "nvptx_nonmemory_operand" "") | |
6bd291cd | 1207 | (match_operand:SI 1 "general_operand" "")) |
b3787ae4 | 1208 | (unspec_volatile:SI [(match_operand:SI 2 "const_int_operand" "")] |
1209 | UNSPECV_JOIN)] | |
1210 | "" | |
1211 | { | |
1212 | if (operands[0] != const0_rtx) | |
1213 | emit_move_insn (operands[0], operands[1]); | |
1214 | nvptx_expand_oacc_join (INTVAL (operands[2])); | |
1215 | DONE; | |
8ce80784 | 1216 | }) |
1217 | ||
b3787ae4 | 1218 | ;; only 32-bit shuffles exist. |
1219 | (define_insn "nvptx_shuffle<mode>" | |
1220 | [(set (match_operand:BITS 0 "nvptx_register_operand" "=R") | |
1221 | (unspec:BITS | |
1222 | [(match_operand:BITS 1 "nvptx_register_operand" "R") | |
1223 | (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri") | |
1224 | (match_operand:SI 3 "const_int_operand" "n")] | |
1225 | UNSPEC_SHUFFLE))] | |
1226 | "" | |
1227 | "%.\\tshfl%S3.b32\\t%0, %1, %2, 31;") | |
1228 | ||
7fce8768 | 1229 | (define_insn "nvptx_vote_ballot" |
1230 | [(set (match_operand:SI 0 "nvptx_register_operand" "=R") | |
1231 | (unspec:SI [(match_operand:BI 1 "nvptx_register_operand" "R")] | |
1232 | UNSPEC_VOTE_BALLOT))] | |
1233 | "" | |
1234 | "%.\\tvote.ballot.b32\\t%0, %1;") | |
1235 | ||
1236 | ;; Patterns for OpenMP SIMD-via-SIMT lowering | |
1237 | ||
1b576300 | 1238 | (define_insn "omp_simt_enter_insn" |
1239 | [(set (match_operand 0 "nvptx_register_operand" "=R") | |
1240 | (unspec_volatile [(match_operand 1 "nvptx_nonmemory_operand" "Ri") | |
1241 | (match_operand 2 "nvptx_nonmemory_operand" "Ri")] | |
1242 | UNSPECV_SIMT_ENTER))] | |
1243 | "" | |
1244 | { | |
1245 | return nvptx_output_simt_enter (operands[0], operands[1], operands[2]); | |
1246 | }) | |
1247 | ||
1248 | (define_expand "omp_simt_enter" | |
1249 | [(match_operand 0 "nvptx_register_operand" "=R") | |
1250 | (match_operand 1 "nvptx_nonmemory_operand" "Ri") | |
1251 | (match_operand 2 "const_int_operand" "n")] | |
1252 | "" | |
1253 | { | |
1254 | if (!CONST_INT_P (operands[1])) | |
1255 | cfun->machine->simt_stack_size = HOST_WIDE_INT_M1U; | |
1256 | else | |
1257 | cfun->machine->simt_stack_size = MAX (UINTVAL (operands[1]), | |
1258 | cfun->machine->simt_stack_size); | |
1259 | cfun->machine->simt_stack_align = MAX (UINTVAL (operands[2]), | |
1260 | cfun->machine->simt_stack_align); | |
1261 | cfun->machine->has_simtreg = true; | |
1262 | emit_insn (gen_omp_simt_enter_insn (operands[0], operands[1], operands[2])); | |
1263 | DONE; | |
1264 | }) | |
1265 | ||
1266 | (define_insn "omp_simt_exit" | |
1267 | [(unspec_volatile [(match_operand 0 "nvptx_register_operand" "R")] | |
1268 | UNSPECV_SIMT_EXIT)] | |
1269 | "" | |
1270 | { | |
1271 | return nvptx_output_simt_exit (operands[0]); | |
1272 | }) | |
1273 | ||
7fce8768 | 1274 | ;; Implement IFN_GOMP_SIMT_LANE: set operand 0 to lane index |
1275 | (define_insn "omp_simt_lane" | |
1276 | [(set (match_operand:SI 0 "nvptx_register_operand" "") | |
1277 | (unspec:SI [(const_int 0)] UNSPEC_LANEID))] | |
1278 | "" | |
1279 | "%.\\tmov.u32\\t%0, %%laneid;") | |
1280 | ||
1281 | ;; Implement IFN_GOMP_SIMT_ORDERED: copy operand 1 to operand 0 and | |
1282 | ;; place a compiler barrier to disallow unrolling/peeling the containing loop | |
1283 | (define_expand "omp_simt_ordered" | |
1284 | [(match_operand:SI 0 "nvptx_register_operand" "=R") | |
1285 | (match_operand:SI 1 "nvptx_register_operand" "R")] | |
1286 | "" | |
1287 | { | |
1288 | emit_move_insn (operands[0], operands[1]); | |
1289 | emit_insn (gen_nvptx_nounroll ()); | |
1290 | DONE; | |
1291 | }) | |
1292 | ||
1293 | ;; Implement IFN_GOMP_SIMT_XCHG_BFLY: perform a "butterfly" exchange | |
1294 | ;; across lanes | |
1295 | (define_expand "omp_simt_xchg_bfly" | |
1296 | [(match_operand 0 "nvptx_register_operand" "=R") | |
1297 | (match_operand 1 "nvptx_register_operand" "R") | |
1298 | (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")] | |
1299 | "" | |
1300 | { | |
1301 | emit_insn (nvptx_gen_shuffle (operands[0], operands[1], operands[2], | |
1302 | SHUFFLE_BFLY)); | |
1303 | DONE; | |
1304 | }) | |
1305 | ||
1306 | ;; Implement IFN_GOMP_SIMT_XCHG_IDX: broadcast value in operand 1 | |
1307 | ;; from lane given by index in operand 2 to operand 0 in all lanes | |
1308 | (define_expand "omp_simt_xchg_idx" | |
1309 | [(match_operand 0 "nvptx_register_operand" "=R") | |
1310 | (match_operand 1 "nvptx_register_operand" "R") | |
1311 | (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")] | |
1312 | "" | |
1313 | { | |
1314 | emit_insn (nvptx_gen_shuffle (operands[0], operands[1], operands[2], | |
1315 | SHUFFLE_IDX)); | |
1316 | DONE; | |
1317 | }) | |
1318 | ||
1319 | ;; Implement IFN_GOMP_SIMT_VOTE_ANY: | |
1320 | ;; set operand 0 to zero iff all lanes supply zero in operand 1 | |
1321 | (define_expand "omp_simt_vote_any" | |
1322 | [(match_operand:SI 0 "nvptx_register_operand" "=R") | |
1323 | (match_operand:SI 1 "nvptx_register_operand" "R")] | |
1324 | "" | |
1325 | { | |
1326 | rtx pred = gen_reg_rtx (BImode); | |
1327 | emit_move_insn (pred, gen_rtx_NE (BImode, operands[1], const0_rtx)); | |
1328 | emit_insn (gen_nvptx_vote_ballot (operands[0], pred)); | |
1329 | DONE; | |
1330 | }) | |
1331 | ||
1332 | ;; Implement IFN_GOMP_SIMT_LAST_LANE: | |
1333 | ;; set operand 0 to the lowest lane index that passed non-zero in operand 1 | |
1334 | (define_expand "omp_simt_last_lane" | |
1335 | [(match_operand:SI 0 "nvptx_register_operand" "=R") | |
1336 | (match_operand:SI 1 "nvptx_register_operand" "R")] | |
1337 | "" | |
1338 | { | |
1339 | rtx pred = gen_reg_rtx (BImode); | |
1340 | rtx tmp = gen_reg_rtx (SImode); | |
1341 | emit_move_insn (pred, gen_rtx_NE (BImode, operands[1], const0_rtx)); | |
1342 | emit_insn (gen_nvptx_vote_ballot (tmp, pred)); | |
1343 | emit_insn (gen_ctzsi2 (operands[0], tmp)); | |
1344 | DONE; | |
1345 | }) | |
1346 | ||
b3787ae4 | 1347 | ;; extract parts of a 64 bit object into 2 32-bit ints |
1348 | (define_insn "unpack<mode>si2" | |
1349 | [(set (match_operand:SI 0 "nvptx_register_operand" "=R") | |
1350 | (unspec:SI [(match_operand:BITD 2 "nvptx_register_operand" "R") | |
1351 | (const_int 0)] UNSPEC_BIT_CONV)) | |
1352 | (set (match_operand:SI 1 "nvptx_register_operand" "=R") | |
1353 | (unspec:SI [(match_dup 2) (const_int 1)] UNSPEC_BIT_CONV))] | |
1354 | "" | |
1355 | "%.\\tmov.b64\\t{%0,%1}, %2;") | |
1356 | ||
1357 | ;; pack 2 32-bit ints into a 64 bit object | |
1358 | (define_insn "packsi<mode>2" | |
1359 | [(set (match_operand:BITD 0 "nvptx_register_operand" "=R") | |
1360 | (unspec:BITD [(match_operand:SI 1 "nvptx_register_operand" "R") | |
1361 | (match_operand:SI 2 "nvptx_register_operand" "R")] | |
1362 | UNSPEC_BIT_CONV))] | |
1363 | "" | |
1364 | "%.\\tmov.b64\\t%0, {%1,%2};") | |
1365 | ||
8ce80784 | 1366 | ;; Atomic insns. |
1367 | ||
1368 | (define_expand "atomic_compare_and_swap<mode>" | |
1369 | [(match_operand:SI 0 "nvptx_register_operand") ;; bool success output | |
1370 | (match_operand:SDIM 1 "nvptx_register_operand") ;; oldval output | |
1371 | (match_operand:SDIM 2 "memory_operand") ;; memory | |
1372 | (match_operand:SDIM 3 "nvptx_register_operand") ;; expected input | |
1373 | (match_operand:SDIM 4 "nvptx_register_operand") ;; newval input | |
1374 | (match_operand:SI 5 "const_int_operand") ;; is_weak | |
1375 | (match_operand:SI 6 "const_int_operand") ;; success model | |
1376 | (match_operand:SI 7 "const_int_operand")] ;; failure model | |
1377 | "" | |
1378 | { | |
c68f6b1c | 1379 | emit_insn (gen_atomic_compare_and_swap<mode>_1 |
1380 | (operands[1], operands[2], operands[3], operands[4], operands[6])); | |
1381 | ||
1382 | rtx cond = gen_reg_rtx (BImode); | |
1383 | emit_move_insn (cond, gen_rtx_EQ (BImode, operands[1], operands[3])); | |
1384 | emit_insn (gen_sel_truesi (operands[0], cond, GEN_INT (1), GEN_INT (0))); | |
8ce80784 | 1385 | DONE; |
1386 | }) | |
1387 | ||
1388 | (define_insn "atomic_compare_and_swap<mode>_1" | |
1389 | [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") | |
1390 | (unspec_volatile:SDIM | |
1391 | [(match_operand:SDIM 1 "memory_operand" "+m") | |
89f6d4a2 | 1392 | (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri") |
1393 | (match_operand:SDIM 3 "nvptx_nonmemory_operand" "Ri") | |
8ce80784 | 1394 | (match_operand:SI 4 "const_int_operand")] |
1395 | UNSPECV_CAS)) | |
1396 | (set (match_dup 1) | |
1397 | (unspec_volatile:SDIM [(const_int 0)] UNSPECV_CAS))] | |
1398 | "" | |
7fce8768 | 1399 | "%.\\tatom%A1.cas.b%T0\\t%0, %1, %2, %3;" |
1400 | [(set_attr "atomic" "true")]) | |
8ce80784 | 1401 | |
1402 | (define_insn "atomic_exchange<mode>" | |
1403 | [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") ;; output | |
1404 | (unspec_volatile:SDIM | |
1405 | [(match_operand:SDIM 1 "memory_operand" "+m") ;; memory | |
1406 | (match_operand:SI 3 "const_int_operand")] ;; model | |
1407 | UNSPECV_XCHG)) | |
1408 | (set (match_dup 1) | |
89f6d4a2 | 1409 | (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri"))] ;; input |
8ce80784 | 1410 | "" |
7fce8768 | 1411 | "%.\\tatom%A1.exch.b%T0\\t%0, %1, %2;" |
1412 | [(set_attr "atomic" "true")]) | |
8ce80784 | 1413 | |
1414 | (define_insn "atomic_fetch_add<mode>" | |
1415 | [(set (match_operand:SDIM 1 "memory_operand" "+m") | |
1416 | (unspec_volatile:SDIM | |
1417 | [(plus:SDIM (match_dup 1) | |
1418 | (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri")) | |
1419 | (match_operand:SI 3 "const_int_operand")] ;; model | |
1420 | UNSPECV_LOCK)) | |
1421 | (set (match_operand:SDIM 0 "nvptx_register_operand" "=R") | |
1422 | (match_dup 1))] | |
1423 | "" | |
7fce8768 | 1424 | "%.\\tatom%A1.add%t0\\t%0, %1, %2;" |
1425 | [(set_attr "atomic" "true")]) | |
8ce80784 | 1426 | |
1427 | (define_insn "atomic_fetch_addsf" | |
1428 | [(set (match_operand:SF 1 "memory_operand" "+m") | |
1429 | (unspec_volatile:SF | |
1430 | [(plus:SF (match_dup 1) | |
1431 | (match_operand:SF 2 "nvptx_nonmemory_operand" "RF")) | |
1432 | (match_operand:SI 3 "const_int_operand")] ;; model | |
1433 | UNSPECV_LOCK)) | |
1434 | (set (match_operand:SF 0 "nvptx_register_operand" "=R") | |
1435 | (match_dup 1))] | |
1436 | "" | |
7fce8768 | 1437 | "%.\\tatom%A1.add%t0\\t%0, %1, %2;" |
1438 | [(set_attr "atomic" "true")]) | |
8ce80784 | 1439 | |
1440 | (define_code_iterator any_logic [and ior xor]) | |
1441 | (define_code_attr logic [(and "and") (ior "or") (xor "xor")]) | |
1442 | ||
8ce80784 | 1443 | (define_insn "atomic_fetch_<logic><mode>" |
1444 | [(set (match_operand:SDIM 1 "memory_operand" "+m") | |
1445 | (unspec_volatile:SDIM | |
1446 | [(any_logic:SDIM (match_dup 1) | |
1447 | (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri")) | |
1448 | (match_operand:SI 3 "const_int_operand")] ;; model | |
1449 | UNSPECV_LOCK)) | |
1450 | (set (match_operand:SDIM 0 "nvptx_register_operand" "=R") | |
1451 | (match_dup 1))] | |
ab841e4d | 1452 | "<MODE>mode == SImode || TARGET_SM35" |
7fce8768 | 1453 | "%.\\tatom%A1.b%T0.<logic>\\t%0, %1, %2;" |
1454 | [(set_attr "atomic" "true")]) | |
b3787ae4 | 1455 | |
1456 | (define_insn "nvptx_barsync" | |
1457 | [(unspec_volatile [(match_operand:SI 0 "const_int_operand" "")] | |
1458 | UNSPECV_BARSYNC)] | |
1459 | "" | |
7fce8768 | 1460 | "\\tbar.sync\\t%0;" |
1461 | [(set_attr "predicable" "false")]) | |
1462 | ||
e163ceb5 | 1463 | (define_expand "memory_barrier" |
1464 | [(set (match_dup 0) | |
1465 | (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR))] | |
1466 | "" | |
1467 | { | |
1468 | operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); | |
1469 | MEM_VOLATILE_P (operands[0]) = 1; | |
1470 | }) | |
1471 | ||
1472 | ;; Ptx defines the memory barriers membar.cta, membar.gl and membar.sys | |
1473 | ;; (corresponding to cuda functions threadfence_block, threadfence and | |
1474 | ;; threadfence_system). For the insn memory_barrier we use membar.sys. This | |
1475 | ;; may be overconservative, but before using membar.gl instead we'll need to | |
1476 | ;; explain in detail why it's safe to use. For now, use membar.sys. | |
1477 | (define_insn "*memory_barrier" | |
1478 | [(set (match_operand:BLK 0 "" "") | |
1479 | (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR))] | |
1480 | "" | |
1481 | "\\tmembar.sys;" | |
1482 | [(set_attr "predicable" "false")]) | |
1483 | ||
19b20a5c | 1484 | (define_expand "nvptx_membar_cta" |
1485 | [(set (match_dup 0) | |
1486 | (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR_CTA))] | |
1487 | "" | |
1488 | { | |
1489 | operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); | |
1490 | MEM_VOLATILE_P (operands[0]) = 1; | |
1491 | }) | |
1492 | ||
1493 | (define_insn "*nvptx_membar_cta" | |
1494 | [(set (match_operand:BLK 0 "" "") | |
1495 | (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR_CTA))] | |
1496 | "" | |
1497 | "\\tmembar.cta;" | |
1498 | [(set_attr "predicable" "false")]) | |
1499 | ||
7fce8768 | 1500 | (define_insn "nvptx_nounroll" |
1501 | [(unspec_volatile [(const_int 0)] UNSPECV_NOUNROLL)] | |
1502 | "" | |
1503 | "\\t.pragma \\\"nounroll\\\";" | |
1504 | [(set_attr "predicable" "false")]) |