]>
Commit | Line | Data |
---|---|---|
8ce80784 | 1 | ;; Machine description for NVPTX. |
8e8f6434 | 2 | ;; Copyright (C) 2014-2018 Free Software Foundation, Inc. |
8ce80784 | 3 | ;; Contributed by Bernd Schmidt <bernds@codesourcery.com> |
4 | ;; | |
5 | ;; This file is part of GCC. | |
6 | ;; | |
7 | ;; GCC is free software; you can redistribute it and/or modify | |
8 | ;; it under the terms of the GNU General Public License as published by | |
9 | ;; the Free Software Foundation; either version 3, or (at your option) | |
10 | ;; any later version. | |
11 | ;; | |
12 | ;; GCC is distributed in the hope that it will be useful, | |
13 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | ;; GNU General Public License for more details. | |
16 | ;; | |
17 | ;; You should have received a copy of the GNU General Public License | |
18 | ;; along with GCC; see the file COPYING3. If not see | |
19 | ;; <http://www.gnu.org/licenses/>. | |
20 | ||
21 | (define_c_enum "unspec" [ | |
22 | UNSPEC_ARG_REG | |
8ce80784 | 23 | |
8ce80784 | 24 | UNSPEC_COPYSIGN |
25 | UNSPEC_LOG2 | |
26 | UNSPEC_EXP2 | |
27 | UNSPEC_SIN | |
28 | UNSPEC_COS | |
29 | ||
30 | UNSPEC_FPINT_FLOOR | |
31 | UNSPEC_FPINT_BTRUNC | |
32 | UNSPEC_FPINT_CEIL | |
33 | UNSPEC_FPINT_NEARBYINT | |
34 | ||
35 | UNSPEC_BITREV | |
36 | ||
37 | UNSPEC_ALLOCA | |
38 | ||
7fce8768 | 39 | UNSPEC_SET_SOFTSTACK |
40 | ||
b3787ae4 | 41 | UNSPEC_DIM_SIZE |
42 | ||
b3787ae4 | 43 | UNSPEC_BIT_CONV |
44 | ||
7fce8768 | 45 | UNSPEC_VOTE_BALLOT |
46 | ||
47 | UNSPEC_LANEID | |
48 | ||
b3787ae4 | 49 | UNSPEC_SHUFFLE |
50 | UNSPEC_BR_UNIFIED | |
8ce80784 | 51 | ]) |
52 | ||
53 | (define_c_enum "unspecv" [ | |
54 | UNSPECV_LOCK | |
55 | UNSPECV_CAS | |
56 | UNSPECV_XCHG | |
b3787ae4 | 57 | UNSPECV_BARSYNC |
58 | UNSPECV_DIM_POS | |
59 | ||
60 | UNSPECV_FORK | |
61 | UNSPECV_FORKED | |
62 | UNSPECV_JOINING | |
63 | UNSPECV_JOIN | |
7fce8768 | 64 | |
65 | UNSPECV_NOUNROLL | |
1b576300 | 66 | |
67 | UNSPECV_SIMT_ENTER | |
68 | UNSPECV_SIMT_EXIT | |
8ce80784 | 69 | ]) |
70 | ||
71 | (define_attr "subregs_ok" "false,true" | |
72 | (const_string "false")) | |
73 | ||
7fce8768 | 74 | (define_attr "atomic" "false,true" |
75 | (const_string "false")) | |
76 | ||
9224dd1f | 77 | ;; The nvptx operand predicates, in general, don't permit subregs and |
78 | ;; only literal constants, which differ from the generic ones, which | |
79 | ;; permit subregs and symbolc constants (as appropriate) | |
8ce80784 | 80 | (define_predicate "nvptx_register_operand" |
f8e6fa1d | 81 | (match_code "reg") |
8ce80784 | 82 | { |
8ce80784 | 83 | return register_operand (op, mode); |
84 | }) | |
85 | ||
50ad9277 | 86 | (define_predicate "nvptx_nonimmediate_operand" |
f8e6fa1d | 87 | (match_code "mem,reg") |
8ce80784 | 88 | { |
6bd291cd | 89 | return (REG_P (op) ? register_operand (op, mode) |
90 | : memory_operand (op, mode)); | |
8ce80784 | 91 | }) |
92 | ||
8ce80784 | 93 | (define_predicate "nvptx_nonmemory_operand" |
f8e6fa1d | 94 | (match_code "reg,const_int,const_double") |
8ce80784 | 95 | { |
6bd291cd | 96 | return (REG_P (op) ? register_operand (op, mode) |
97 | : immediate_operand (op, mode)); | |
8ce80784 | 98 | }) |
99 | ||
8ce80784 | 100 | (define_predicate "const0_operand" |
101 | (and (match_code "const_int") | |
102 | (match_test "op == const0_rtx"))) | |
103 | ||
104 | ;; True if this operator is valid for predication. | |
105 | (define_predicate "predicate_operator" | |
106 | (match_code "eq,ne")) | |
107 | ||
108 | (define_predicate "ne_operator" | |
109 | (match_code "ne")) | |
110 | ||
111 | (define_predicate "nvptx_comparison_operator" | |
112 | (match_code "eq,ne,le,ge,lt,gt,leu,geu,ltu,gtu")) | |
113 | ||
114 | (define_predicate "nvptx_float_comparison_operator" | |
115 | (match_code "eq,ne,le,ge,lt,gt,uneq,unle,unge,unlt,ungt,unordered,ordered")) | |
116 | ||
117 | ;; Test for a valid operand for a call instruction. | |
f8e6fa1d | 118 | (define_predicate "call_insn_operand" |
8ce80784 | 119 | (match_code "symbol_ref,reg") |
120 | { | |
50ad9277 | 121 | return REG_P (op) || SYMBOL_REF_FUNCTION_P (op); |
8ce80784 | 122 | }) |
123 | ||
124 | ;; Return true if OP is a call with parallel USEs of the argument | |
125 | ;; pseudos. | |
126 | (define_predicate "call_operation" | |
127 | (match_code "parallel") | |
128 | { | |
b27697ca | 129 | int arg_end = XVECLEN (op, 0); |
8ce80784 | 130 | |
b27697ca | 131 | for (int i = 1; i < arg_end; i++) |
8ce80784 | 132 | { |
133 | rtx elt = XVECEXP (op, 0, i); | |
8ce80784 | 134 | |
50ad9277 | 135 | if (GET_CODE (elt) != USE || !REG_P (XEXP (elt, 0))) |
8ce80784 | 136 | return false; |
137 | } | |
138 | return true; | |
139 | }) | |
140 | ||
7fce8768 | 141 | (define_attr "predicable" "false,true" |
142 | (const_string "true")) | |
143 | ||
144 | (define_cond_exec | |
145 | [(match_operator 0 "predicate_operator" | |
146 | [(match_operand:BI 1 "nvptx_register_operand" "") | |
147 | (match_operand:BI 2 "const0_operand" "")])] | |
148 | "" | |
149 | "" | |
150 | ) | |
151 | ||
8ce80784 | 152 | (define_constraint "P0" |
153 | "An integer with the value 0." | |
154 | (and (match_code "const_int") | |
155 | (match_test "ival == 0"))) | |
156 | ||
157 | (define_constraint "P1" | |
158 | "An integer with the value 1." | |
159 | (and (match_code "const_int") | |
160 | (match_test "ival == 1"))) | |
161 | ||
162 | (define_constraint "Pn" | |
163 | "An integer with the value -1." | |
164 | (and (match_code "const_int") | |
165 | (match_test "ival == -1"))) | |
166 | ||
167 | (define_constraint "R" | |
168 | "A pseudo register." | |
169 | (match_code "reg")) | |
170 | ||
171 | (define_constraint "Ia" | |
172 | "Any integer constant." | |
173 | (and (match_code "const_int") (match_test "true"))) | |
174 | ||
175 | (define_mode_iterator QHSDISDFM [QI HI SI DI SF DF]) | |
176 | (define_mode_iterator QHSDIM [QI HI SI DI]) | |
177 | (define_mode_iterator HSDIM [HI SI DI]) | |
178 | (define_mode_iterator BHSDIM [BI HI SI DI]) | |
179 | (define_mode_iterator SDIM [SI DI]) | |
180 | (define_mode_iterator SDISDFM [SI DI SF DF]) | |
181 | (define_mode_iterator QHIM [QI HI]) | |
182 | (define_mode_iterator QHSIM [QI HI SI]) | |
183 | (define_mode_iterator SDFM [SF DF]) | |
184 | (define_mode_iterator SDCM [SC DC]) | |
b3787ae4 | 185 | (define_mode_iterator BITS [SI SF]) |
186 | (define_mode_iterator BITD [DI DF]) | |
ffaae5bd | 187 | (define_mode_iterator VECIM [V2SI V2DI]) |
8ce80784 | 188 | |
189 | ;; This mode iterator allows :P to be used for patterns that operate on | |
190 | ;; pointer-sized quantities. Exactly one of the two alternatives will match. | |
191 | (define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")]) | |
192 | ||
193 | ;; We should get away with not defining memory alternatives, since we don't | |
194 | ;; get variables in this mode and pseudos are never spilled. | |
195 | (define_insn "movbi" | |
196 | [(set (match_operand:BI 0 "nvptx_register_operand" "=R,R,R") | |
197 | (match_operand:BI 1 "nvptx_nonmemory_operand" "R,P0,Pn"))] | |
198 | "" | |
199 | "@ | |
200 | %.\\tmov%t0\\t%0, %1; | |
201 | %.\\tsetp.eq.u32\\t%0, 1, 0; | |
202 | %.\\tsetp.eq.u32\\t%0, 1, 1;") | |
203 | ||
fcac805e | 204 | (define_insn "*mov<mode>_insn" |
205 | [(set (match_operand:VECIM 0 "nonimmediate_operand" "=R,R,m") | |
206 | (match_operand:VECIM 1 "general_operand" "Ri,m,R"))] | |
207 | "!MEM_P (operands[0]) || REG_P (operands[1])" | |
208 | { | |
209 | if (which_alternative == 1) | |
210 | return "%.\\tld%A1%u1\\t%0, %1;"; | |
211 | if (which_alternative == 2) | |
212 | return "%.\\tst%A0%u0\\t%0, %1;"; | |
213 | ||
214 | return nvptx_output_mov_insn (operands[0], operands[1]); | |
215 | } | |
216 | [(set_attr "subregs_ok" "true")]) | |
217 | ||
8ce80784 | 218 | (define_insn "*mov<mode>_insn" |
6bd291cd | 219 | [(set (match_operand:QHSDIM 0 "nonimmediate_operand" "=R,R,m") |
6196ad64 | 220 | (match_operand:QHSDIM 1 "general_operand" "Ri,m,R"))] |
6bd291cd | 221 | "!MEM_P (operands[0]) || REG_P (operands[1])" |
8ce80784 | 222 | { |
6196ad64 | 223 | if (which_alternative == 1) |
8ce80784 | 224 | return "%.\\tld%A1%u1\\t%0, %1;"; |
6196ad64 | 225 | if (which_alternative == 2) |
8ce80784 | 226 | return "%.\\tst%A0%u0\\t%0, %1;"; |
227 | ||
6196ad64 | 228 | return nvptx_output_mov_insn (operands[0], operands[1]); |
8ce80784 | 229 | } |
230 | [(set_attr "subregs_ok" "true")]) | |
231 | ||
232 | (define_insn "*mov<mode>_insn" | |
6bd291cd | 233 | [(set (match_operand:SDFM 0 "nonimmediate_operand" "=R,R,m") |
8ce80784 | 234 | (match_operand:SDFM 1 "general_operand" "RF,m,R"))] |
6196ad64 | 235 | "!MEM_P (operands[0]) || REG_P (operands[1])" |
8ce80784 | 236 | { |
237 | if (which_alternative == 1) | |
238 | return "%.\\tld%A1%u0\\t%0, %1;"; | |
239 | if (which_alternative == 2) | |
240 | return "%.\\tst%A0%u1\\t%0, %1;"; | |
241 | ||
6196ad64 | 242 | return nvptx_output_mov_insn (operands[0], operands[1]); |
8ce80784 | 243 | } |
244 | [(set_attr "subregs_ok" "true")]) | |
245 | ||
246 | (define_insn "load_arg_reg<mode>" | |
247 | [(set (match_operand:QHIM 0 "nvptx_register_operand" "=R") | |
18cefec0 | 248 | (unspec:QHIM [(match_operand 1 "const_int_operand" "n")] |
8ce80784 | 249 | UNSPEC_ARG_REG))] |
250 | "" | |
251 | "%.\\tcvt%t0.u32\\t%0, %%ar%1;") | |
252 | ||
253 | (define_insn "load_arg_reg<mode>" | |
254 | [(set (match_operand:SDISDFM 0 "nvptx_register_operand" "=R") | |
18cefec0 | 255 | (unspec:SDISDFM [(match_operand 1 "const_int_operand" "n")] |
8ce80784 | 256 | UNSPEC_ARG_REG))] |
257 | "" | |
258 | "%.\\tmov%t0\\t%0, %%ar%1;") | |
259 | ||
fcac805e | 260 | (define_expand "mov<mode>" |
261 | [(set (match_operand:VECIM 0 "nonimmediate_operand" "") | |
262 | (match_operand:VECIM 1 "general_operand" ""))] | |
263 | "" | |
264 | { | |
265 | if (MEM_P (operands[0]) && !REG_P (operands[1])) | |
266 | { | |
267 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
268 | emit_move_insn (tmp, operands[1]); | |
269 | emit_move_insn (operands[0], tmp); | |
270 | DONE; | |
271 | } | |
272 | }) | |
273 | ||
8ce80784 | 274 | (define_expand "mov<mode>" |
6bd291cd | 275 | [(set (match_operand:QHSDISDFM 0 "nonimmediate_operand" "") |
8ce80784 | 276 | (match_operand:QHSDISDFM 1 "general_operand" ""))] |
277 | "" | |
278 | { | |
9224dd1f | 279 | if (MEM_P (operands[0]) && !REG_P (operands[1])) |
8ce80784 | 280 | { |
281 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
282 | emit_move_insn (tmp, operands[1]); | |
283 | emit_move_insn (operands[0], tmp); | |
284 | DONE; | |
285 | } | |
1ab41d0d | 286 | |
287 | if (GET_CODE (operands[1]) == LABEL_REF) | |
288 | sorry ("target cannot support label values"); | |
8ce80784 | 289 | }) |
290 | ||
8ce80784 | 291 | (define_insn "zero_extendqihi2" |
292 | [(set (match_operand:HI 0 "nvptx_register_operand" "=R,R") | |
50ad9277 | 293 | (zero_extend:HI (match_operand:QI 1 "nvptx_nonimmediate_operand" "R,m")))] |
8ce80784 | 294 | "" |
295 | "@ | |
296 | %.\\tcvt.u16.u%T1\\t%0, %1; | |
297 | %.\\tld%A1.u8\\t%0, %1;" | |
298 | [(set_attr "subregs_ok" "true")]) | |
299 | ||
300 | (define_insn "zero_extend<mode>si2" | |
301 | [(set (match_operand:SI 0 "nvptx_register_operand" "=R,R") | |
50ad9277 | 302 | (zero_extend:SI (match_operand:QHIM 1 "nvptx_nonimmediate_operand" "R,m")))] |
8ce80784 | 303 | "" |
304 | "@ | |
305 | %.\\tcvt.u32.u%T1\\t%0, %1; | |
306 | %.\\tld%A1.u%T1\\t%0, %1;" | |
307 | [(set_attr "subregs_ok" "true")]) | |
308 | ||
309 | (define_insn "zero_extend<mode>di2" | |
310 | [(set (match_operand:DI 0 "nvptx_register_operand" "=R,R") | |
50ad9277 | 311 | (zero_extend:DI (match_operand:QHSIM 1 "nvptx_nonimmediate_operand" "R,m")))] |
8ce80784 | 312 | "" |
313 | "@ | |
314 | %.\\tcvt.u64.u%T1\\t%0, %1; | |
315 | %.\\tld%A1%u1\\t%0, %1;" | |
316 | [(set_attr "subregs_ok" "true")]) | |
317 | ||
318 | (define_insn "extend<mode>si2" | |
319 | [(set (match_operand:SI 0 "nvptx_register_operand" "=R,R") | |
50ad9277 | 320 | (sign_extend:SI (match_operand:QHIM 1 "nvptx_nonimmediate_operand" "R,m")))] |
8ce80784 | 321 | "" |
322 | "@ | |
323 | %.\\tcvt.s32.s%T1\\t%0, %1; | |
324 | %.\\tld%A1.s%T1\\t%0, %1;" | |
325 | [(set_attr "subregs_ok" "true")]) | |
326 | ||
327 | (define_insn "extend<mode>di2" | |
328 | [(set (match_operand:DI 0 "nvptx_register_operand" "=R,R") | |
50ad9277 | 329 | (sign_extend:DI (match_operand:QHSIM 1 "nvptx_nonimmediate_operand" "R,m")))] |
8ce80784 | 330 | "" |
331 | "@ | |
332 | %.\\tcvt.s64.s%T1\\t%0, %1; | |
333 | %.\\tld%A1.s%T1\\t%0, %1;" | |
334 | [(set_attr "subregs_ok" "true")]) | |
335 | ||
336 | (define_insn "trunchiqi2" | |
50ad9277 | 337 | [(set (match_operand:QI 0 "nvptx_nonimmediate_operand" "=R,m") |
8ce80784 | 338 | (truncate:QI (match_operand:HI 1 "nvptx_register_operand" "R,R")))] |
339 | "" | |
340 | "@ | |
341 | %.\\tcvt%t0.u16\\t%0, %1; | |
342 | %.\\tst%A0.u8\\t%0, %1;" | |
343 | [(set_attr "subregs_ok" "true")]) | |
344 | ||
345 | (define_insn "truncsi<mode>2" | |
50ad9277 | 346 | [(set (match_operand:QHIM 0 "nvptx_nonimmediate_operand" "=R,m") |
8ce80784 | 347 | (truncate:QHIM (match_operand:SI 1 "nvptx_register_operand" "R,R")))] |
348 | "" | |
349 | "@ | |
350 | %.\\tcvt%t0.u32\\t%0, %1; | |
351 | %.\\tst%A0.u%T0\\t%0, %1;" | |
352 | [(set_attr "subregs_ok" "true")]) | |
353 | ||
354 | (define_insn "truncdi<mode>2" | |
50ad9277 | 355 | [(set (match_operand:QHSIM 0 "nvptx_nonimmediate_operand" "=R,m") |
8ce80784 | 356 | (truncate:QHSIM (match_operand:DI 1 "nvptx_register_operand" "R,R")))] |
357 | "" | |
358 | "@ | |
359 | %.\\tcvt%t0.u64\\t%0, %1; | |
360 | %.\\tst%A0.u%T0\\t%0, %1;" | |
361 | [(set_attr "subregs_ok" "true")]) | |
362 | ||
8ce80784 | 363 | ;; Integer arithmetic |
364 | ||
365 | (define_insn "add<mode>3" | |
366 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
367 | (plus:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") | |
368 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] | |
369 | "" | |
370 | "%.\\tadd%t0\\t%0, %1, %2;") | |
371 | ||
372 | (define_insn "sub<mode>3" | |
373 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
374 | (minus:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") | |
375 | (match_operand:HSDIM 2 "nvptx_register_operand" "R")))] | |
376 | "" | |
377 | "%.\\tsub%t0\\t%0, %1, %2;") | |
378 | ||
379 | (define_insn "mul<mode>3" | |
380 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
381 | (mult:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") | |
382 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] | |
383 | "" | |
384 | "%.\\tmul.lo%t0\\t%0, %1, %2;") | |
385 | ||
386 | (define_insn "*mad<mode>3" | |
387 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
388 | (plus:HSDIM (mult:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") | |
389 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")) | |
390 | (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))] | |
391 | "" | |
392 | "%.\\tmad.lo%t0\\t%0, %1, %2, %3;") | |
393 | ||
394 | (define_insn "div<mode>3" | |
395 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
396 | (div:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") | |
397 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] | |
398 | "" | |
399 | "%.\\tdiv.s%T0\\t%0, %1, %2;") | |
400 | ||
401 | (define_insn "udiv<mode>3" | |
402 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
403 | (udiv:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") | |
404 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] | |
405 | "" | |
406 | "%.\\tdiv.u%T0\\t%0, %1, %2;") | |
407 | ||
408 | (define_insn "mod<mode>3" | |
409 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
410 | (mod:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "Ri") | |
411 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] | |
412 | "" | |
413 | "%.\\trem.s%T0\\t%0, %1, %2;") | |
414 | ||
415 | (define_insn "umod<mode>3" | |
416 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
417 | (umod:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "Ri") | |
418 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] | |
419 | "" | |
420 | "%.\\trem.u%T0\\t%0, %1, %2;") | |
421 | ||
422 | (define_insn "smin<mode>3" | |
423 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
424 | (smin:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") | |
425 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] | |
426 | "" | |
427 | "%.\\tmin.s%T0\\t%0, %1, %2;") | |
428 | ||
429 | (define_insn "umin<mode>3" | |
430 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
431 | (umin:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") | |
432 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] | |
433 | "" | |
434 | "%.\\tmin.u%T0\\t%0, %1, %2;") | |
435 | ||
436 | (define_insn "smax<mode>3" | |
437 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
438 | (smax:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") | |
439 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] | |
440 | "" | |
441 | "%.\\tmax.s%T0\\t%0, %1, %2;") | |
442 | ||
443 | (define_insn "umax<mode>3" | |
444 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
445 | (umax:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") | |
446 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] | |
447 | "" | |
448 | "%.\\tmax.u%T0\\t%0, %1, %2;") | |
449 | ||
450 | (define_insn "abs<mode>2" | |
451 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
452 | (abs:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")))] | |
453 | "" | |
454 | "%.\\tabs.s%T0\\t%0, %1;") | |
455 | ||
456 | (define_insn "neg<mode>2" | |
457 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
458 | (neg:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")))] | |
459 | "" | |
460 | "%.\\tneg.s%T0\\t%0, %1;") | |
461 | ||
462 | (define_insn "one_cmpl<mode>2" | |
463 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
464 | (not:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")))] | |
465 | "" | |
466 | "%.\\tnot.b%T0\\t%0, %1;") | |
467 | ||
468 | (define_insn "bitrev<mode>2" | |
469 | [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") | |
470 | (unspec:SDIM [(match_operand:SDIM 1 "nvptx_register_operand" "R")] | |
471 | UNSPEC_BITREV))] | |
472 | "" | |
473 | "%.\\tbrev.b%T0\\t%0, %1;") | |
474 | ||
475 | (define_insn "clz<mode>2" | |
476 | [(set (match_operand:SI 0 "nvptx_register_operand" "=R") | |
477 | (clz:SI (match_operand:SDIM 1 "nvptx_register_operand" "R")))] | |
478 | "" | |
b594ca2b | 479 | "%.\\tclz.b%T1\\t%0, %1;") |
8ce80784 | 480 | |
481 | (define_expand "ctz<mode>2" | |
482 | [(set (match_operand:SI 0 "nvptx_register_operand" "") | |
483 | (ctz:SI (match_operand:SDIM 1 "nvptx_register_operand" "")))] | |
484 | "" | |
485 | { | |
486 | rtx tmpreg = gen_reg_rtx (<MODE>mode); | |
487 | emit_insn (gen_bitrev<mode>2 (tmpreg, operands[1])); | |
488 | emit_insn (gen_clz<mode>2 (operands[0], tmpreg)); | |
489 | DONE; | |
490 | }) | |
491 | ||
492 | ;; Shifts | |
493 | ||
494 | (define_insn "ashl<mode>3" | |
495 | [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") | |
496 | (ashift:SDIM (match_operand:SDIM 1 "nvptx_register_operand" "R") | |
497 | (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")))] | |
498 | "" | |
499 | "%.\\tshl.b%T0\\t%0, %1, %2;") | |
500 | ||
501 | (define_insn "ashr<mode>3" | |
502 | [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") | |
503 | (ashiftrt:SDIM (match_operand:SDIM 1 "nvptx_register_operand" "R") | |
504 | (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")))] | |
505 | "" | |
506 | "%.\\tshr.s%T0\\t%0, %1, %2;") | |
507 | ||
508 | (define_insn "lshr<mode>3" | |
509 | [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") | |
510 | (lshiftrt:SDIM (match_operand:SDIM 1 "nvptx_register_operand" "R") | |
511 | (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")))] | |
512 | "" | |
513 | "%.\\tshr.u%T0\\t%0, %1, %2;") | |
514 | ||
515 | ;; Logical operations | |
516 | ||
517 | (define_insn "and<mode>3" | |
518 | [(set (match_operand:BHSDIM 0 "nvptx_register_operand" "=R") | |
519 | (and:BHSDIM (match_operand:BHSDIM 1 "nvptx_register_operand" "R") | |
520 | (match_operand:BHSDIM 2 "nvptx_nonmemory_operand" "Ri")))] | |
521 | "" | |
522 | "%.\\tand.b%T0\\t%0, %1, %2;") | |
523 | ||
524 | (define_insn "ior<mode>3" | |
525 | [(set (match_operand:BHSDIM 0 "nvptx_register_operand" "=R") | |
526 | (ior:BHSDIM (match_operand:BHSDIM 1 "nvptx_register_operand" "R") | |
527 | (match_operand:BHSDIM 2 "nvptx_nonmemory_operand" "Ri")))] | |
528 | "" | |
529 | "%.\\tor.b%T0\\t%0, %1, %2;") | |
530 | ||
531 | (define_insn "xor<mode>3" | |
532 | [(set (match_operand:BHSDIM 0 "nvptx_register_operand" "=R") | |
533 | (xor:BHSDIM (match_operand:BHSDIM 1 "nvptx_register_operand" "R") | |
534 | (match_operand:BHSDIM 2 "nvptx_nonmemory_operand" "Ri")))] | |
535 | "" | |
536 | "%.\\txor.b%T0\\t%0, %1, %2;") | |
537 | ||
538 | ;; Comparisons and branches | |
539 | ||
540 | (define_insn "*cmp<mode>" | |
541 | [(set (match_operand:BI 0 "nvptx_register_operand" "=R") | |
542 | (match_operator:BI 1 "nvptx_comparison_operator" | |
543 | [(match_operand:HSDIM 2 "nvptx_register_operand" "R") | |
544 | (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))] | |
545 | "" | |
b27697ca | 546 | "%.\\tsetp%c1\\t%0, %2, %3;") |
8ce80784 | 547 | |
548 | (define_insn "*cmp<mode>" | |
549 | [(set (match_operand:BI 0 "nvptx_register_operand" "=R") | |
550 | (match_operator:BI 1 "nvptx_float_comparison_operator" | |
551 | [(match_operand:SDFM 2 "nvptx_register_operand" "R") | |
552 | (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))] | |
553 | "" | |
b27697ca | 554 | "%.\\tsetp%c1\\t%0, %2, %3;") |
8ce80784 | 555 | |
556 | (define_insn "jump" | |
557 | [(set (pc) | |
558 | (label_ref (match_operand 0 "" "")))] | |
559 | "" | |
560 | "%.\\tbra\\t%l0;") | |
561 | ||
562 | (define_insn "br_true" | |
563 | [(set (pc) | |
564 | (if_then_else (ne (match_operand:BI 0 "nvptx_register_operand" "R") | |
565 | (const_int 0)) | |
566 | (label_ref (match_operand 1 "" "")) | |
567 | (pc)))] | |
568 | "" | |
7fce8768 | 569 | "%j0\\tbra\\t%l1;" |
570 | [(set_attr "predicable" "false")]) | |
8ce80784 | 571 | |
572 | (define_insn "br_false" | |
573 | [(set (pc) | |
574 | (if_then_else (eq (match_operand:BI 0 "nvptx_register_operand" "R") | |
575 | (const_int 0)) | |
576 | (label_ref (match_operand 1 "" "")) | |
577 | (pc)))] | |
578 | "" | |
7fce8768 | 579 | "%J0\\tbra\\t%l1;" |
580 | [(set_attr "predicable" "false")]) | |
8ce80784 | 581 | |
b3787ae4 | 582 | ;; unified conditional branch |
583 | (define_insn "br_true_uni" | |
584 | [(set (pc) (if_then_else | |
585 | (ne (unspec:BI [(match_operand:BI 0 "nvptx_register_operand" "R")] | |
586 | UNSPEC_BR_UNIFIED) (const_int 0)) | |
587 | (label_ref (match_operand 1 "" "")) (pc)))] | |
588 | "" | |
7fce8768 | 589 | "%j0\\tbra.uni\\t%l1;" |
590 | [(set_attr "predicable" "false")]) | |
b3787ae4 | 591 | |
592 | (define_insn "br_false_uni" | |
593 | [(set (pc) (if_then_else | |
594 | (eq (unspec:BI [(match_operand:BI 0 "nvptx_register_operand" "R")] | |
595 | UNSPEC_BR_UNIFIED) (const_int 0)) | |
596 | (label_ref (match_operand 1 "" "")) (pc)))] | |
597 | "" | |
7fce8768 | 598 | "%J0\\tbra.uni\\t%l1;" |
599 | [(set_attr "predicable" "false")]) | |
b3787ae4 | 600 | |
8ce80784 | 601 | (define_expand "cbranch<mode>4" |
602 | [(set (pc) | |
603 | (if_then_else (match_operator 0 "nvptx_comparison_operator" | |
604 | [(match_operand:HSDIM 1 "nvptx_register_operand" "") | |
9d846e45 | 605 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "")]) |
8ce80784 | 606 | (label_ref (match_operand 3 "" "")) |
607 | (pc)))] | |
608 | "" | |
609 | { | |
610 | rtx t = nvptx_expand_compare (operands[0]); | |
611 | operands[0] = t; | |
612 | operands[1] = XEXP (t, 0); | |
613 | operands[2] = XEXP (t, 1); | |
614 | }) | |
615 | ||
616 | (define_expand "cbranch<mode>4" | |
617 | [(set (pc) | |
618 | (if_then_else (match_operator 0 "nvptx_float_comparison_operator" | |
619 | [(match_operand:SDFM 1 "nvptx_register_operand" "") | |
9d846e45 | 620 | (match_operand:SDFM 2 "nvptx_nonmemory_operand" "")]) |
8ce80784 | 621 | (label_ref (match_operand 3 "" "")) |
622 | (pc)))] | |
623 | "" | |
624 | { | |
625 | rtx t = nvptx_expand_compare (operands[0]); | |
626 | operands[0] = t; | |
627 | operands[1] = XEXP (t, 0); | |
628 | operands[2] = XEXP (t, 1); | |
629 | }) | |
630 | ||
631 | (define_expand "cbranchbi4" | |
632 | [(set (pc) | |
633 | (if_then_else (match_operator 0 "predicate_operator" | |
634 | [(match_operand:BI 1 "nvptx_register_operand" "") | |
635 | (match_operand:BI 2 "const0_operand" "")]) | |
636 | (label_ref (match_operand 3 "" "")) | |
637 | (pc)))] | |
638 | "" | |
639 | "") | |
640 | ||
641 | ;; Conditional stores | |
642 | ||
643 | (define_insn "setcc_from_bi" | |
644 | [(set (match_operand:SI 0 "nvptx_register_operand" "=R") | |
645 | (ne:SI (match_operand:BI 1 "nvptx_register_operand" "R") | |
646 | (const_int 0)))] | |
647 | "" | |
648 | "%.\\tselp%t0 %0,-1,0,%1;") | |
649 | ||
75e09431 | 650 | (define_insn "sel_true<mode>" |
651 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
652 | (if_then_else:HSDIM | |
653 | (ne (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0)) | |
654 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri") | |
655 | (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))] | |
656 | "" | |
657 | "%.\\tselp%t0\\t%0, %2, %3, %1;") | |
658 | ||
659 | (define_insn "sel_true<mode>" | |
660 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
661 | (if_then_else:SDFM | |
662 | (ne (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0)) | |
663 | (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF") | |
664 | (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))] | |
665 | "" | |
666 | "%.\\tselp%t0\\t%0, %2, %3, %1;") | |
667 | ||
668 | (define_insn "sel_false<mode>" | |
669 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
670 | (if_then_else:HSDIM | |
671 | (eq (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0)) | |
672 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri") | |
673 | (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))] | |
674 | "" | |
675 | "%.\\tselp%t0\\t%0, %3, %2, %1;") | |
676 | ||
677 | (define_insn "sel_false<mode>" | |
678 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
679 | (if_then_else:SDFM | |
680 | (eq (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0)) | |
681 | (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF") | |
682 | (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))] | |
683 | "" | |
684 | "%.\\tselp%t0\\t%0, %3, %2, %1;") | |
685 | ||
8ce80784 | 686 | (define_insn "setcc_int<mode>" |
687 | [(set (match_operand:SI 0 "nvptx_register_operand" "=R") | |
688 | (match_operator:SI 1 "nvptx_comparison_operator" | |
75e09431 | 689 | [(match_operand:HSDIM 2 "nvptx_register_operand" "R") |
690 | (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))] | |
8ce80784 | 691 | "" |
b27697ca | 692 | "%.\\tset%t0%c1\\t%0, %2, %3;") |
8ce80784 | 693 | |
694 | (define_insn "setcc_int<mode>" | |
695 | [(set (match_operand:SI 0 "nvptx_register_operand" "=R") | |
696 | (match_operator:SI 1 "nvptx_float_comparison_operator" | |
75e09431 | 697 | [(match_operand:SDFM 2 "nvptx_register_operand" "R") |
698 | (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))] | |
8ce80784 | 699 | "" |
b27697ca | 700 | "%.\\tset%t0%c1\\t%0, %2, %3;") |
8ce80784 | 701 | |
702 | (define_insn "setcc_float<mode>" | |
703 | [(set (match_operand:SF 0 "nvptx_register_operand" "=R") | |
704 | (match_operator:SF 1 "nvptx_comparison_operator" | |
75e09431 | 705 | [(match_operand:HSDIM 2 "nvptx_register_operand" "R") |
706 | (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))] | |
8ce80784 | 707 | "" |
b27697ca | 708 | "%.\\tset%t0%c1\\t%0, %2, %3;") |
8ce80784 | 709 | |
710 | (define_insn "setcc_float<mode>" | |
711 | [(set (match_operand:SF 0 "nvptx_register_operand" "=R") | |
712 | (match_operator:SF 1 "nvptx_float_comparison_operator" | |
75e09431 | 713 | [(match_operand:SDFM 2 "nvptx_register_operand" "R") |
714 | (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))] | |
8ce80784 | 715 | "" |
b27697ca | 716 | "%.\\tset%t0%c1\\t%0, %2, %3;") |
8ce80784 | 717 | |
718 | (define_expand "cstorebi4" | |
719 | [(set (match_operand:SI 0 "nvptx_register_operand") | |
720 | (match_operator:SI 1 "ne_operator" | |
721 | [(match_operand:BI 2 "nvptx_register_operand") | |
722 | (match_operand:BI 3 "const0_operand")]))] | |
723 | "" | |
724 | "") | |
725 | ||
726 | (define_expand "cstore<mode>4" | |
727 | [(set (match_operand:SI 0 "nvptx_register_operand") | |
728 | (match_operator:SI 1 "nvptx_comparison_operator" | |
729 | [(match_operand:HSDIM 2 "nvptx_register_operand") | |
730 | (match_operand:HSDIM 3 "nvptx_nonmemory_operand")]))] | |
731 | "" | |
732 | "") | |
733 | ||
734 | (define_expand "cstore<mode>4" | |
735 | [(set (match_operand:SI 0 "nvptx_register_operand") | |
736 | (match_operator:SI 1 "nvptx_float_comparison_operator" | |
737 | [(match_operand:SDFM 2 "nvptx_register_operand") | |
738 | (match_operand:SDFM 3 "nvptx_nonmemory_operand")]))] | |
739 | "" | |
740 | "") | |
741 | ||
742 | ;; Calls | |
743 | ||
744 | (define_insn "call_insn" | |
745 | [(match_parallel 2 "call_operation" | |
f8e6fa1d | 746 | [(call (mem:QI (match_operand 0 "call_insn_operand" "Rs")) |
8ce80784 | 747 | (match_operand 1))])] |
748 | "" | |
749 | { | |
750 | return nvptx_output_call_insn (insn, NULL_RTX, operands[0]); | |
751 | }) | |
752 | ||
753 | (define_insn "call_value_insn" | |
754 | [(match_parallel 3 "call_operation" | |
755 | [(set (match_operand 0 "nvptx_register_operand" "=R") | |
f8e6fa1d | 756 | (call (mem:QI (match_operand 1 "call_insn_operand" "Rs")) |
8ce80784 | 757 | (match_operand 2)))])] |
758 | "" | |
759 | { | |
760 | return nvptx_output_call_insn (insn, operands[0], operands[1]); | |
761 | }) | |
762 | ||
763 | (define_expand "call" | |
764 | [(match_operand 0 "" "")] | |
765 | "" | |
766 | { | |
767 | nvptx_expand_call (NULL_RTX, operands[0]); | |
768 | DONE; | |
769 | }) | |
770 | ||
771 | (define_expand "call_value" | |
772 | [(match_operand 0 "" "") | |
773 | (match_operand 1 "" "")] | |
774 | "" | |
775 | { | |
776 | nvptx_expand_call (operands[0], operands[1]); | |
777 | DONE; | |
778 | }) | |
779 | ||
780 | ;; Floating point arithmetic. | |
781 | ||
782 | (define_insn "add<mode>3" | |
783 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
784 | (plus:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R") | |
785 | (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))] | |
786 | "" | |
787 | "%.\\tadd%t0\\t%0, %1, %2;") | |
788 | ||
789 | (define_insn "sub<mode>3" | |
790 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
791 | (minus:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R") | |
792 | (match_operand:SDFM 2 "nvptx_register_operand" "R")))] | |
793 | "" | |
794 | "%.\\tsub%t0\\t%0, %1, %2;") | |
795 | ||
796 | (define_insn "mul<mode>3" | |
797 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
798 | (mult:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R") | |
799 | (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))] | |
800 | "" | |
801 | "%.\\tmul%t0\\t%0, %1, %2;") | |
802 | ||
803 | (define_insn "fma<mode>4" | |
804 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
805 | (fma:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R") | |
806 | (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF") | |
807 | (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))] | |
808 | "" | |
809 | "%.\\tfma%#%t0\\t%0, %1, %2, %3;") | |
810 | ||
811 | (define_insn "div<mode>3" | |
812 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
813 | (div:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R") | |
814 | (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))] | |
815 | "" | |
816 | "%.\\tdiv%#%t0\\t%0, %1, %2;") | |
817 | ||
818 | (define_insn "copysign<mode>3" | |
819 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
820 | (unspec:SDFM [(match_operand:SDFM 1 "nvptx_register_operand" "R") | |
821 | (match_operand:SDFM 2 "nvptx_register_operand" "R")] | |
822 | UNSPEC_COPYSIGN))] | |
823 | "" | |
824 | "%.\\tcopysign%t0\\t%0, %2, %1;") | |
825 | ||
826 | (define_insn "smin<mode>3" | |
827 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
828 | (smin:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R") | |
829 | (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))] | |
830 | "" | |
831 | "%.\\tmin%t0\\t%0, %1, %2;") | |
832 | ||
833 | (define_insn "smax<mode>3" | |
834 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
835 | (smax:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R") | |
836 | (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))] | |
837 | "" | |
838 | "%.\\tmax%t0\\t%0, %1, %2;") | |
839 | ||
840 | (define_insn "abs<mode>2" | |
841 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
842 | (abs:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")))] | |
843 | "" | |
844 | "%.\\tabs%t0\\t%0, %1;") | |
845 | ||
846 | (define_insn "neg<mode>2" | |
847 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
848 | (neg:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")))] | |
849 | "" | |
850 | "%.\\tneg%t0\\t%0, %1;") | |
851 | ||
852 | (define_insn "sqrt<mode>2" | |
853 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
854 | (sqrt:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")))] | |
855 | "" | |
856 | "%.\\tsqrt%#%t0\\t%0, %1;") | |
857 | ||
3d380077 | 858 | (define_expand "sincossf3" |
859 | [(set (match_operand:SF 0 "nvptx_register_operand" "=R") | |
860 | (unspec:SF [(match_operand:SF 2 "nvptx_register_operand" "R")] | |
861 | UNSPEC_COS)) | |
862 | (set (match_operand:SF 1 "nvptx_register_operand" "=R") | |
863 | (unspec:SF [(match_dup 2)] UNSPEC_SIN))] | |
864 | "flag_unsafe_math_optimizations" | |
865 | { | |
866 | operands[2] = make_safe_from (operands[2], operands[0]); | |
867 | }) | |
868 | ||
8ce80784 | 869 | (define_insn "sinsf2" |
870 | [(set (match_operand:SF 0 "nvptx_register_operand" "=R") | |
871 | (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")] | |
872 | UNSPEC_SIN))] | |
873 | "flag_unsafe_math_optimizations" | |
874 | "%.\\tsin.approx%t0\\t%0, %1;") | |
875 | ||
876 | (define_insn "cossf2" | |
877 | [(set (match_operand:SF 0 "nvptx_register_operand" "=R") | |
878 | (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")] | |
879 | UNSPEC_COS))] | |
880 | "flag_unsafe_math_optimizations" | |
881 | "%.\\tcos.approx%t0\\t%0, %1;") | |
882 | ||
883 | (define_insn "log2sf2" | |
884 | [(set (match_operand:SF 0 "nvptx_register_operand" "=R") | |
885 | (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")] | |
886 | UNSPEC_LOG2))] | |
887 | "flag_unsafe_math_optimizations" | |
888 | "%.\\tlg2.approx%t0\\t%0, %1;") | |
889 | ||
890 | (define_insn "exp2sf2" | |
891 | [(set (match_operand:SF 0 "nvptx_register_operand" "=R") | |
892 | (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")] | |
893 | UNSPEC_EXP2))] | |
894 | "flag_unsafe_math_optimizations" | |
895 | "%.\\tex2.approx%t0\\t%0, %1;") | |
896 | ||
897 | ;; Conversions involving floating point | |
898 | ||
899 | (define_insn "extendsfdf2" | |
900 | [(set (match_operand:DF 0 "nvptx_register_operand" "=R") | |
901 | (float_extend:DF (match_operand:SF 1 "nvptx_register_operand" "R")))] | |
902 | "" | |
903 | "%.\\tcvt%t0%t1\\t%0, %1;") | |
904 | ||
905 | (define_insn "truncdfsf2" | |
906 | [(set (match_operand:SF 0 "nvptx_register_operand" "=R") | |
907 | (float_truncate:SF (match_operand:DF 1 "nvptx_register_operand" "R")))] | |
908 | "" | |
909 | "%.\\tcvt%#%t0%t1\\t%0, %1;") | |
910 | ||
911 | (define_insn "floatunssi<mode>2" | |
912 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
913 | (unsigned_float:SDFM (match_operand:SI 1 "nvptx_register_operand" "R")))] | |
914 | "" | |
915 | "%.\\tcvt%#%t0.u%T1\\t%0, %1;") | |
916 | ||
917 | (define_insn "floatsi<mode>2" | |
918 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
919 | (float:SDFM (match_operand:SI 1 "nvptx_register_operand" "R")))] | |
920 | "" | |
921 | "%.\\tcvt%#%t0.s%T1\\t%0, %1;") | |
922 | ||
923 | (define_insn "floatunsdi<mode>2" | |
924 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
925 | (unsigned_float:SDFM (match_operand:DI 1 "nvptx_register_operand" "R")))] | |
926 | "" | |
927 | "%.\\tcvt%#%t0.u%T1\\t%0, %1;") | |
928 | ||
929 | (define_insn "floatdi<mode>2" | |
930 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
931 | (float:SDFM (match_operand:DI 1 "nvptx_register_operand" "R")))] | |
932 | "" | |
933 | "%.\\tcvt%#%t0.s%T1\\t%0, %1;") | |
934 | ||
935 | (define_insn "fixuns_trunc<mode>si2" | |
936 | [(set (match_operand:SI 0 "nvptx_register_operand" "=R") | |
937 | (unsigned_fix:SI (match_operand:SDFM 1 "nvptx_register_operand" "R")))] | |
938 | "" | |
939 | "%.\\tcvt.rzi.u%T0%t1\\t%0, %1;") | |
940 | ||
941 | (define_insn "fix_trunc<mode>si2" | |
942 | [(set (match_operand:SI 0 "nvptx_register_operand" "=R") | |
943 | (fix:SI (match_operand:SDFM 1 "nvptx_register_operand" "R")))] | |
944 | "" | |
945 | "%.\\tcvt.rzi.s%T0%t1\\t%0, %1;") | |
946 | ||
947 | (define_insn "fixuns_trunc<mode>di2" | |
948 | [(set (match_operand:DI 0 "nvptx_register_operand" "=R") | |
949 | (unsigned_fix:DI (match_operand:SDFM 1 "nvptx_register_operand" "R")))] | |
950 | "" | |
951 | "%.\\tcvt.rzi.u%T0%t1\\t%0, %1;") | |
952 | ||
953 | (define_insn "fix_trunc<mode>di2" | |
954 | [(set (match_operand:DI 0 "nvptx_register_operand" "=R") | |
955 | (fix:DI (match_operand:SDFM 1 "nvptx_register_operand" "R")))] | |
956 | "" | |
957 | "%.\\tcvt.rzi.s%T0%t1\\t%0, %1;") | |
958 | ||
959 | (define_int_iterator FPINT [UNSPEC_FPINT_FLOOR UNSPEC_FPINT_BTRUNC | |
960 | UNSPEC_FPINT_CEIL UNSPEC_FPINT_NEARBYINT]) | |
961 | (define_int_attr fpint_name [(UNSPEC_FPINT_FLOOR "floor") | |
962 | (UNSPEC_FPINT_BTRUNC "btrunc") | |
963 | (UNSPEC_FPINT_CEIL "ceil") | |
964 | (UNSPEC_FPINT_NEARBYINT "nearbyint")]) | |
965 | (define_int_attr fpint_roundingmode [(UNSPEC_FPINT_FLOOR ".rmi") | |
966 | (UNSPEC_FPINT_BTRUNC ".rzi") | |
967 | (UNSPEC_FPINT_CEIL ".rpi") | |
968 | (UNSPEC_FPINT_NEARBYINT "%#i")]) | |
969 | ||
970 | (define_insn "<FPINT:fpint_name><SDFM:mode>2" | |
971 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
972 | (unspec:SDFM [(match_operand:SDFM 1 "nvptx_register_operand" "R")] | |
973 | FPINT))] | |
974 | "" | |
975 | "%.\\tcvt<FPINT:fpint_roundingmode>%t0%t1\\t%0, %1;") | |
976 | ||
977 | (define_int_iterator FPINT2 [UNSPEC_FPINT_FLOOR UNSPEC_FPINT_CEIL]) | |
978 | (define_int_attr fpint2_name [(UNSPEC_FPINT_FLOOR "lfloor") | |
979 | (UNSPEC_FPINT_CEIL "lceil")]) | |
980 | (define_int_attr fpint2_roundingmode [(UNSPEC_FPINT_FLOOR ".rmi") | |
981 | (UNSPEC_FPINT_CEIL ".rpi")]) | |
982 | ||
983 | (define_insn "<FPINT2:fpint2_name><SDFM:mode><SDIM:mode>2" | |
984 | [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") | |
985 | (unspec:SDIM [(match_operand:SDFM 1 "nvptx_register_operand" "R")] | |
986 | FPINT2))] | |
987 | "" | |
988 | "%.\\tcvt<FPINT2:fpint2_roundingmode>.s%T0%t1\\t%0, %1;") | |
989 | ||
990 | ;; Miscellaneous | |
991 | ||
992 | (define_insn "nop" | |
993 | [(const_int 0)] | |
994 | "" | |
995 | "") | |
996 | ||
f3c313db | 997 | (define_insn "exit" |
998 | [(const_int 1)] | |
999 | "" | |
1000 | "exit;") | |
1001 | ||
8ce80784 | 1002 | (define_insn "return" |
1003 | [(return)] | |
1004 | "" | |
1005 | { | |
1006 | return nvptx_output_return (); | |
7fce8768 | 1007 | } |
1008 | [(set_attr "predicable" "false")]) | |
8ce80784 | 1009 | |
1010 | (define_expand "epilogue" | |
1011 | [(clobber (const_int 0))] | |
1012 | "" | |
1013 | { | |
7fce8768 | 1014 | if (TARGET_SOFT_STACK) |
1015 | emit_insn (gen_set_softstack_insn (gen_rtx_REG (Pmode, | |
1016 | SOFTSTACK_PREV_REGNUM))); | |
8ce80784 | 1017 | emit_jump_insn (gen_return ()); |
1018 | DONE; | |
1019 | }) | |
1020 | ||
1021 | (define_expand "nonlocal_goto" | |
1022 | [(match_operand 0 "" "") | |
1023 | (match_operand 1 "" "") | |
1024 | (match_operand 2 "" "") | |
1025 | (match_operand 3 "" "")] | |
1026 | "" | |
1027 | { | |
1028 | sorry ("target cannot support nonlocal goto."); | |
1029 | emit_insn (gen_nop ()); | |
1030 | DONE; | |
1031 | }) | |
1032 | ||
1033 | (define_expand "nonlocal_goto_receiver" | |
1034 | [(const_int 0)] | |
1035 | "" | |
1036 | { | |
1037 | sorry ("target cannot support nonlocal goto."); | |
1038 | }) | |
1039 | ||
ed20400d | 1040 | (define_expand "allocate_stack" |
1041 | [(match_operand 0 "nvptx_register_operand") | |
1042 | (match_operand 1 "nvptx_register_operand")] | |
1043 | "" | |
1044 | { | |
7fce8768 | 1045 | if (TARGET_SOFT_STACK) |
1046 | { | |
1047 | emit_move_insn (stack_pointer_rtx, | |
1048 | gen_rtx_MINUS (Pmode, stack_pointer_rtx, operands[1])); | |
1049 | emit_insn (gen_set_softstack_insn (stack_pointer_rtx)); | |
1050 | emit_move_insn (operands[0], virtual_stack_dynamic_rtx); | |
1051 | DONE; | |
1052 | } | |
f289122f | 1053 | /* The ptx documentation specifies an alloca intrinsic (for 32 bit |
1054 | only) but notes it is not implemented. The assembler emits a | |
1055 | confused error message. Issue a blunt one now instead. */ | |
1056 | sorry ("target cannot support alloca."); | |
1057 | emit_insn (gen_nop ()); | |
1058 | DONE; | |
ed20400d | 1059 | }) |
1060 | ||
7fce8768 | 1061 | (define_insn "set_softstack_insn" |
1062 | [(unspec [(match_operand 0 "nvptx_register_operand" "R")] | |
1063 | UNSPEC_SET_SOFTSTACK)] | |
1064 | "TARGET_SOFT_STACK" | |
1065 | { | |
1066 | return nvptx_output_set_softstack (REGNO (operands[0])); | |
1067 | }) | |
8ce80784 | 1068 | |
1069 | (define_expand "restore_stack_block" | |
1070 | [(match_operand 0 "register_operand" "") | |
1071 | (match_operand 1 "register_operand" "")] | |
1072 | "" | |
1073 | { | |
7fce8768 | 1074 | if (TARGET_SOFT_STACK) |
1075 | { | |
1076 | emit_move_insn (operands[0], operands[1]); | |
1077 | emit_insn (gen_set_softstack_insn (operands[0])); | |
1078 | } | |
8ce80784 | 1079 | DONE; |
1080 | }) | |
1081 | ||
1082 | (define_expand "restore_stack_function" | |
1083 | [(match_operand 0 "register_operand" "") | |
1084 | (match_operand 1 "register_operand" "")] | |
1085 | "" | |
1086 | { | |
1087 | DONE; | |
1088 | }) | |
1089 | ||
1090 | (define_insn "trap" | |
1091 | [(trap_if (const_int 1) (const_int 0))] | |
1092 | "" | |
1093 | "trap;") | |
1094 | ||
1095 | (define_insn "trap_if_true" | |
1096 | [(trap_if (ne (match_operand:BI 0 "nvptx_register_operand" "R") | |
1097 | (const_int 0)) | |
1098 | (const_int 0))] | |
1099 | "" | |
7fce8768 | 1100 | "%j0 trap;" |
1101 | [(set_attr "predicable" "false")]) | |
8ce80784 | 1102 | |
1103 | (define_insn "trap_if_false" | |
1104 | [(trap_if (eq (match_operand:BI 0 "nvptx_register_operand" "R") | |
1105 | (const_int 0)) | |
1106 | (const_int 0))] | |
1107 | "" | |
7fce8768 | 1108 | "%J0 trap;" |
1109 | [(set_attr "predicable" "false")]) | |
8ce80784 | 1110 | |
1111 | (define_expand "ctrap<mode>4" | |
1112 | [(trap_if (match_operator 0 "nvptx_comparison_operator" | |
1113 | [(match_operand:SDIM 1 "nvptx_register_operand") | |
1114 | (match_operand:SDIM 2 "nvptx_nonmemory_operand")]) | |
25ce1bcb | 1115 | (match_operand 3 "const0_operand"))] |
8ce80784 | 1116 | "" |
1117 | { | |
1118 | rtx t = nvptx_expand_compare (operands[0]); | |
1119 | emit_insn (gen_trap_if_true (t)); | |
1120 | DONE; | |
1121 | }) | |
1122 | ||
b3787ae4 | 1123 | (define_insn "oacc_dim_size" |
1124 | [(set (match_operand:SI 0 "nvptx_register_operand" "") | |
1125 | (unspec:SI [(match_operand:SI 1 "const_int_operand" "")] | |
1126 | UNSPEC_DIM_SIZE))] | |
8ce80784 | 1127 | "" |
b3787ae4 | 1128 | { |
1129 | static const char *const asms[] = | |
1130 | { /* Must match oacc_loop_levels ordering. */ | |
1131 | "%.\\tmov.u32\\t%0, %%nctaid.x;", /* gang */ | |
1132 | "%.\\tmov.u32\\t%0, %%ntid.y;", /* worker */ | |
1133 | "%.\\tmov.u32\\t%0, %%ntid.x;", /* vector */ | |
1134 | }; | |
1135 | return asms[INTVAL (operands[1])]; | |
1136 | }) | |
8ce80784 | 1137 | |
b3787ae4 | 1138 | (define_insn "oacc_dim_pos" |
8ce80784 | 1139 | [(set (match_operand:SI 0 "nvptx_register_operand" "") |
b3787ae4 | 1140 | (unspec_volatile:SI [(match_operand:SI 1 "const_int_operand" "")] |
1141 | UNSPECV_DIM_POS))] | |
8ce80784 | 1142 | "" |
1143 | { | |
b3787ae4 | 1144 | static const char *const asms[] = |
1145 | { /* Must match oacc_loop_levels ordering. */ | |
1146 | "%.\\tmov.u32\\t%0, %%ctaid.x;", /* gang */ | |
1147 | "%.\\tmov.u32\\t%0, %%tid.y;", /* worker */ | |
1148 | "%.\\tmov.u32\\t%0, %%tid.x;", /* vector */ | |
1149 | }; | |
1150 | return asms[INTVAL (operands[1])]; | |
8ce80784 | 1151 | }) |
1152 | ||
b3787ae4 | 1153 | (define_insn "nvptx_fork" |
1154 | [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")] | |
1155 | UNSPECV_FORK)] | |
8ce80784 | 1156 | "" |
b3787ae4 | 1157 | "// fork %0;" |
7fce8768 | 1158 | [(set_attr "predicable" "false")]) |
8ce80784 | 1159 | |
b3787ae4 | 1160 | (define_insn "nvptx_forked" |
1161 | [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")] | |
1162 | UNSPECV_FORKED)] | |
1163 | "" | |
1164 | "// forked %0;" | |
7fce8768 | 1165 | [(set_attr "predicable" "false")]) |
b3787ae4 | 1166 | |
1167 | (define_insn "nvptx_joining" | |
1168 | [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")] | |
1169 | UNSPECV_JOINING)] | |
1170 | "" | |
1171 | "// joining %0;" | |
7fce8768 | 1172 | [(set_attr "predicable" "false")]) |
b3787ae4 | 1173 | |
1174 | (define_insn "nvptx_join" | |
1175 | [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")] | |
1176 | UNSPECV_JOIN)] | |
1177 | "" | |
1178 | "// join %0;" | |
7fce8768 | 1179 | [(set_attr "predicable" "false")]) |
b3787ae4 | 1180 | |
1181 | (define_expand "oacc_fork" | |
1182 | [(set (match_operand:SI 0 "nvptx_nonmemory_operand" "") | |
6bd291cd | 1183 | (match_operand:SI 1 "general_operand" "")) |
b3787ae4 | 1184 | (unspec_volatile:SI [(match_operand:SI 2 "const_int_operand" "")] |
1185 | UNSPECV_FORKED)] | |
8ce80784 | 1186 | "" |
1187 | { | |
b3787ae4 | 1188 | if (operands[0] != const0_rtx) |
1189 | emit_move_insn (operands[0], operands[1]); | |
1190 | nvptx_expand_oacc_fork (INTVAL (operands[2])); | |
1191 | DONE; | |
1192 | }) | |
1193 | ||
1194 | (define_expand "oacc_join" | |
1195 | [(set (match_operand:SI 0 "nvptx_nonmemory_operand" "") | |
6bd291cd | 1196 | (match_operand:SI 1 "general_operand" "")) |
b3787ae4 | 1197 | (unspec_volatile:SI [(match_operand:SI 2 "const_int_operand" "")] |
1198 | UNSPECV_JOIN)] | |
1199 | "" | |
1200 | { | |
1201 | if (operands[0] != const0_rtx) | |
1202 | emit_move_insn (operands[0], operands[1]); | |
1203 | nvptx_expand_oacc_join (INTVAL (operands[2])); | |
1204 | DONE; | |
8ce80784 | 1205 | }) |
1206 | ||
b3787ae4 | 1207 | ;; only 32-bit shuffles exist. |
1208 | (define_insn "nvptx_shuffle<mode>" | |
1209 | [(set (match_operand:BITS 0 "nvptx_register_operand" "=R") | |
1210 | (unspec:BITS | |
1211 | [(match_operand:BITS 1 "nvptx_register_operand" "R") | |
1212 | (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri") | |
1213 | (match_operand:SI 3 "const_int_operand" "n")] | |
1214 | UNSPEC_SHUFFLE))] | |
1215 | "" | |
1216 | "%.\\tshfl%S3.b32\\t%0, %1, %2, 31;") | |
1217 | ||
7fce8768 | 1218 | (define_insn "nvptx_vote_ballot" |
1219 | [(set (match_operand:SI 0 "nvptx_register_operand" "=R") | |
1220 | (unspec:SI [(match_operand:BI 1 "nvptx_register_operand" "R")] | |
1221 | UNSPEC_VOTE_BALLOT))] | |
1222 | "" | |
1223 | "%.\\tvote.ballot.b32\\t%0, %1;") | |
1224 | ||
1225 | ;; Patterns for OpenMP SIMD-via-SIMT lowering | |
1226 | ||
1b576300 | 1227 | (define_insn "omp_simt_enter_insn" |
1228 | [(set (match_operand 0 "nvptx_register_operand" "=R") | |
1229 | (unspec_volatile [(match_operand 1 "nvptx_nonmemory_operand" "Ri") | |
1230 | (match_operand 2 "nvptx_nonmemory_operand" "Ri")] | |
1231 | UNSPECV_SIMT_ENTER))] | |
1232 | "" | |
1233 | { | |
1234 | return nvptx_output_simt_enter (operands[0], operands[1], operands[2]); | |
1235 | }) | |
1236 | ||
1237 | (define_expand "omp_simt_enter" | |
1238 | [(match_operand 0 "nvptx_register_operand" "=R") | |
1239 | (match_operand 1 "nvptx_nonmemory_operand" "Ri") | |
1240 | (match_operand 2 "const_int_operand" "n")] | |
1241 | "" | |
1242 | { | |
1243 | if (!CONST_INT_P (operands[1])) | |
1244 | cfun->machine->simt_stack_size = HOST_WIDE_INT_M1U; | |
1245 | else | |
1246 | cfun->machine->simt_stack_size = MAX (UINTVAL (operands[1]), | |
1247 | cfun->machine->simt_stack_size); | |
1248 | cfun->machine->simt_stack_align = MAX (UINTVAL (operands[2]), | |
1249 | cfun->machine->simt_stack_align); | |
1250 | cfun->machine->has_simtreg = true; | |
1251 | emit_insn (gen_omp_simt_enter_insn (operands[0], operands[1], operands[2])); | |
1252 | DONE; | |
1253 | }) | |
1254 | ||
1255 | (define_insn "omp_simt_exit" | |
1256 | [(unspec_volatile [(match_operand 0 "nvptx_register_operand" "R")] | |
1257 | UNSPECV_SIMT_EXIT)] | |
1258 | "" | |
1259 | { | |
1260 | return nvptx_output_simt_exit (operands[0]); | |
1261 | }) | |
1262 | ||
7fce8768 | 1263 | ;; Implement IFN_GOMP_SIMT_LANE: set operand 0 to lane index |
1264 | (define_insn "omp_simt_lane" | |
1265 | [(set (match_operand:SI 0 "nvptx_register_operand" "") | |
1266 | (unspec:SI [(const_int 0)] UNSPEC_LANEID))] | |
1267 | "" | |
1268 | "%.\\tmov.u32\\t%0, %%laneid;") | |
1269 | ||
1270 | ;; Implement IFN_GOMP_SIMT_ORDERED: copy operand 1 to operand 0 and | |
1271 | ;; place a compiler barrier to disallow unrolling/peeling the containing loop | |
1272 | (define_expand "omp_simt_ordered" | |
1273 | [(match_operand:SI 0 "nvptx_register_operand" "=R") | |
1274 | (match_operand:SI 1 "nvptx_register_operand" "R")] | |
1275 | "" | |
1276 | { | |
1277 | emit_move_insn (operands[0], operands[1]); | |
1278 | emit_insn (gen_nvptx_nounroll ()); | |
1279 | DONE; | |
1280 | }) | |
1281 | ||
1282 | ;; Implement IFN_GOMP_SIMT_XCHG_BFLY: perform a "butterfly" exchange | |
1283 | ;; across lanes | |
1284 | (define_expand "omp_simt_xchg_bfly" | |
1285 | [(match_operand 0 "nvptx_register_operand" "=R") | |
1286 | (match_operand 1 "nvptx_register_operand" "R") | |
1287 | (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")] | |
1288 | "" | |
1289 | { | |
1290 | emit_insn (nvptx_gen_shuffle (operands[0], operands[1], operands[2], | |
1291 | SHUFFLE_BFLY)); | |
1292 | DONE; | |
1293 | }) | |
1294 | ||
1295 | ;; Implement IFN_GOMP_SIMT_XCHG_IDX: broadcast value in operand 1 | |
1296 | ;; from lane given by index in operand 2 to operand 0 in all lanes | |
1297 | (define_expand "omp_simt_xchg_idx" | |
1298 | [(match_operand 0 "nvptx_register_operand" "=R") | |
1299 | (match_operand 1 "nvptx_register_operand" "R") | |
1300 | (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")] | |
1301 | "" | |
1302 | { | |
1303 | emit_insn (nvptx_gen_shuffle (operands[0], operands[1], operands[2], | |
1304 | SHUFFLE_IDX)); | |
1305 | DONE; | |
1306 | }) | |
1307 | ||
1308 | ;; Implement IFN_GOMP_SIMT_VOTE_ANY: | |
1309 | ;; set operand 0 to zero iff all lanes supply zero in operand 1 | |
1310 | (define_expand "omp_simt_vote_any" | |
1311 | [(match_operand:SI 0 "nvptx_register_operand" "=R") | |
1312 | (match_operand:SI 1 "nvptx_register_operand" "R")] | |
1313 | "" | |
1314 | { | |
1315 | rtx pred = gen_reg_rtx (BImode); | |
1316 | emit_move_insn (pred, gen_rtx_NE (BImode, operands[1], const0_rtx)); | |
1317 | emit_insn (gen_nvptx_vote_ballot (operands[0], pred)); | |
1318 | DONE; | |
1319 | }) | |
1320 | ||
1321 | ;; Implement IFN_GOMP_SIMT_LAST_LANE: | |
1322 | ;; set operand 0 to the lowest lane index that passed non-zero in operand 1 | |
1323 | (define_expand "omp_simt_last_lane" | |
1324 | [(match_operand:SI 0 "nvptx_register_operand" "=R") | |
1325 | (match_operand:SI 1 "nvptx_register_operand" "R")] | |
1326 | "" | |
1327 | { | |
1328 | rtx pred = gen_reg_rtx (BImode); | |
1329 | rtx tmp = gen_reg_rtx (SImode); | |
1330 | emit_move_insn (pred, gen_rtx_NE (BImode, operands[1], const0_rtx)); | |
1331 | emit_insn (gen_nvptx_vote_ballot (tmp, pred)); | |
1332 | emit_insn (gen_ctzsi2 (operands[0], tmp)); | |
1333 | DONE; | |
1334 | }) | |
1335 | ||
b3787ae4 | 1336 | ;; extract parts of a 64 bit object into 2 32-bit ints |
1337 | (define_insn "unpack<mode>si2" | |
1338 | [(set (match_operand:SI 0 "nvptx_register_operand" "=R") | |
1339 | (unspec:SI [(match_operand:BITD 2 "nvptx_register_operand" "R") | |
1340 | (const_int 0)] UNSPEC_BIT_CONV)) | |
1341 | (set (match_operand:SI 1 "nvptx_register_operand" "=R") | |
1342 | (unspec:SI [(match_dup 2) (const_int 1)] UNSPEC_BIT_CONV))] | |
1343 | "" | |
1344 | "%.\\tmov.b64\\t{%0,%1}, %2;") | |
1345 | ||
1346 | ;; pack 2 32-bit ints into a 64 bit object | |
1347 | (define_insn "packsi<mode>2" | |
1348 | [(set (match_operand:BITD 0 "nvptx_register_operand" "=R") | |
1349 | (unspec:BITD [(match_operand:SI 1 "nvptx_register_operand" "R") | |
1350 | (match_operand:SI 2 "nvptx_register_operand" "R")] | |
1351 | UNSPEC_BIT_CONV))] | |
1352 | "" | |
1353 | "%.\\tmov.b64\\t%0, {%1,%2};") | |
1354 | ||
8ce80784 | 1355 | ;; Atomic insns. |
1356 | ||
1357 | (define_expand "atomic_compare_and_swap<mode>" | |
1358 | [(match_operand:SI 0 "nvptx_register_operand") ;; bool success output | |
1359 | (match_operand:SDIM 1 "nvptx_register_operand") ;; oldval output | |
1360 | (match_operand:SDIM 2 "memory_operand") ;; memory | |
1361 | (match_operand:SDIM 3 "nvptx_register_operand") ;; expected input | |
1362 | (match_operand:SDIM 4 "nvptx_register_operand") ;; newval input | |
1363 | (match_operand:SI 5 "const_int_operand") ;; is_weak | |
1364 | (match_operand:SI 6 "const_int_operand") ;; success model | |
1365 | (match_operand:SI 7 "const_int_operand")] ;; failure model | |
1366 | "" | |
1367 | { | |
c68f6b1c | 1368 | emit_insn (gen_atomic_compare_and_swap<mode>_1 |
1369 | (operands[1], operands[2], operands[3], operands[4], operands[6])); | |
1370 | ||
1371 | rtx cond = gen_reg_rtx (BImode); | |
1372 | emit_move_insn (cond, gen_rtx_EQ (BImode, operands[1], operands[3])); | |
1373 | emit_insn (gen_sel_truesi (operands[0], cond, GEN_INT (1), GEN_INT (0))); | |
8ce80784 | 1374 | DONE; |
1375 | }) | |
1376 | ||
1377 | (define_insn "atomic_compare_and_swap<mode>_1" | |
1378 | [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") | |
1379 | (unspec_volatile:SDIM | |
1380 | [(match_operand:SDIM 1 "memory_operand" "+m") | |
89f6d4a2 | 1381 | (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri") |
1382 | (match_operand:SDIM 3 "nvptx_nonmemory_operand" "Ri") | |
8ce80784 | 1383 | (match_operand:SI 4 "const_int_operand")] |
1384 | UNSPECV_CAS)) | |
1385 | (set (match_dup 1) | |
1386 | (unspec_volatile:SDIM [(const_int 0)] UNSPECV_CAS))] | |
1387 | "" | |
7fce8768 | 1388 | "%.\\tatom%A1.cas.b%T0\\t%0, %1, %2, %3;" |
1389 | [(set_attr "atomic" "true")]) | |
8ce80784 | 1390 | |
1391 | (define_insn "atomic_exchange<mode>" | |
1392 | [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") ;; output | |
1393 | (unspec_volatile:SDIM | |
1394 | [(match_operand:SDIM 1 "memory_operand" "+m") ;; memory | |
1395 | (match_operand:SI 3 "const_int_operand")] ;; model | |
1396 | UNSPECV_XCHG)) | |
1397 | (set (match_dup 1) | |
89f6d4a2 | 1398 | (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri"))] ;; input |
8ce80784 | 1399 | "" |
7fce8768 | 1400 | "%.\\tatom%A1.exch.b%T0\\t%0, %1, %2;" |
1401 | [(set_attr "atomic" "true")]) | |
8ce80784 | 1402 | |
1403 | (define_insn "atomic_fetch_add<mode>" | |
1404 | [(set (match_operand:SDIM 1 "memory_operand" "+m") | |
1405 | (unspec_volatile:SDIM | |
1406 | [(plus:SDIM (match_dup 1) | |
1407 | (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri")) | |
1408 | (match_operand:SI 3 "const_int_operand")] ;; model | |
1409 | UNSPECV_LOCK)) | |
1410 | (set (match_operand:SDIM 0 "nvptx_register_operand" "=R") | |
1411 | (match_dup 1))] | |
1412 | "" | |
7fce8768 | 1413 | "%.\\tatom%A1.add%t0\\t%0, %1, %2;" |
1414 | [(set_attr "atomic" "true")]) | |
8ce80784 | 1415 | |
1416 | (define_insn "atomic_fetch_addsf" | |
1417 | [(set (match_operand:SF 1 "memory_operand" "+m") | |
1418 | (unspec_volatile:SF | |
1419 | [(plus:SF (match_dup 1) | |
1420 | (match_operand:SF 2 "nvptx_nonmemory_operand" "RF")) | |
1421 | (match_operand:SI 3 "const_int_operand")] ;; model | |
1422 | UNSPECV_LOCK)) | |
1423 | (set (match_operand:SF 0 "nvptx_register_operand" "=R") | |
1424 | (match_dup 1))] | |
1425 | "" | |
7fce8768 | 1426 | "%.\\tatom%A1.add%t0\\t%0, %1, %2;" |
1427 | [(set_attr "atomic" "true")]) | |
8ce80784 | 1428 | |
1429 | (define_code_iterator any_logic [and ior xor]) | |
1430 | (define_code_attr logic [(and "and") (ior "or") (xor "xor")]) | |
1431 | ||
1432 | ;; Currently disabled until we add better subtarget support - requires sm_32. | |
1433 | (define_insn "atomic_fetch_<logic><mode>" | |
1434 | [(set (match_operand:SDIM 1 "memory_operand" "+m") | |
1435 | (unspec_volatile:SDIM | |
1436 | [(any_logic:SDIM (match_dup 1) | |
1437 | (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri")) | |
1438 | (match_operand:SI 3 "const_int_operand")] ;; model | |
1439 | UNSPECV_LOCK)) | |
1440 | (set (match_operand:SDIM 0 "nvptx_register_operand" "=R") | |
1441 | (match_dup 1))] | |
1442 | "0" | |
7fce8768 | 1443 | "%.\\tatom%A1.b%T0.<logic>\\t%0, %1, %2;" |
1444 | [(set_attr "atomic" "true")]) | |
b3787ae4 | 1445 | |
1446 | (define_insn "nvptx_barsync" | |
1447 | [(unspec_volatile [(match_operand:SI 0 "const_int_operand" "")] | |
1448 | UNSPECV_BARSYNC)] | |
1449 | "" | |
7fce8768 | 1450 | "\\tbar.sync\\t%0;" |
1451 | [(set_attr "predicable" "false")]) | |
1452 | ||
1453 | (define_insn "nvptx_nounroll" | |
1454 | [(unspec_volatile [(const_int 0)] UNSPECV_NOUNROLL)] | |
1455 | "" | |
1456 | "\\t.pragma \\\"nounroll\\\";" | |
1457 | [(set_attr "predicable" "false")]) |