]>
Commit | Line | Data |
---|---|---|
738f2522 | 1 | ;; Machine description for NVPTX. |
83ffe9cd | 2 | ;; Copyright (C) 2014-2023 Free Software Foundation, Inc. |
738f2522 BS |
3 | ;; Contributed by Bernd Schmidt <bernds@codesourcery.com> |
4 | ;; | |
5 | ;; This file is part of GCC. | |
6 | ;; | |
7 | ;; GCC is free software; you can redistribute it and/or modify | |
8 | ;; it under the terms of the GNU General Public License as published by | |
9 | ;; the Free Software Foundation; either version 3, or (at your option) | |
10 | ;; any later version. | |
11 | ;; | |
12 | ;; GCC is distributed in the hope that it will be useful, | |
13 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | ;; GNU General Public License for more details. | |
16 | ;; | |
17 | ;; You should have received a copy of the GNU General Public License | |
18 | ;; along with GCC; see the file COPYING3. If not see | |
19 | ;; <http://www.gnu.org/licenses/>. | |
20 | ||
21 | (define_c_enum "unspec" [ | |
22 | UNSPEC_ARG_REG | |
738f2522 | 23 | |
738f2522 BS |
24 | UNSPEC_COPYSIGN |
25 | UNSPEC_LOG2 | |
26 | UNSPEC_EXP2 | |
27 | UNSPEC_SIN | |
28 | UNSPEC_COS | |
308d688b | 29 | UNSPEC_TANH |
26d7b8f9 | 30 | UNSPEC_ISINF |
738f2522 BS |
31 | |
32 | UNSPEC_FPINT_FLOOR | |
33 | UNSPEC_FPINT_BTRUNC | |
34 | UNSPEC_FPINT_CEIL | |
35 | UNSPEC_FPINT_NEARBYINT | |
36 | ||
37 | UNSPEC_BITREV | |
38 | ||
39 | UNSPEC_ALLOCA | |
40 | ||
5012919d AM |
41 | UNSPEC_SET_SOFTSTACK |
42 | ||
d88cd9c4 NS |
43 | UNSPEC_DIM_SIZE |
44 | ||
d88cd9c4 NS |
45 | UNSPEC_BIT_CONV |
46 | ||
5012919d AM |
47 | UNSPEC_VOTE_BALLOT |
48 | ||
49 | UNSPEC_LANEID | |
50 | ||
d88cd9c4 NS |
51 | UNSPEC_SHUFFLE |
52 | UNSPEC_BR_UNIFIED | |
738f2522 BS |
53 | ]) |
54 | ||
55 | (define_c_enum "unspecv" [ | |
56 | UNSPECV_LOCK | |
57 | UNSPECV_CAS | |
04b54cc4 | 58 | UNSPECV_CAS_LOCAL |
738f2522 | 59 | UNSPECV_XCHG |
19a13d5a | 60 | UNSPECV_ST |
623daaf8 CLT |
61 | UNSPECV_BARRED_AND |
62 | UNSPECV_BARRED_OR | |
63 | UNSPECV_BARRED_POPC | |
d88cd9c4 | 64 | UNSPECV_BARSYNC |
bba61d40 | 65 | UNSPECV_WARPSYNC |
f32f74c2 | 66 | UNSPECV_UNIFORM_WARP_CHECK |
f04fd903 | 67 | UNSPECV_MEMBAR |
21251395 | 68 | UNSPECV_MEMBAR_CTA |
ca902055 | 69 | UNSPECV_MEMBAR_GL |
d88cd9c4 NS |
70 | UNSPECV_DIM_POS |
71 | ||
72 | UNSPECV_FORK | |
73 | UNSPECV_FORKED | |
74 | UNSPECV_JOINING | |
75 | UNSPECV_JOIN | |
5012919d AM |
76 | |
77 | UNSPECV_NOUNROLL | |
0c6b03b5 AM |
78 | |
79 | UNSPECV_SIMT_ENTER | |
80 | UNSPECV_SIMT_EXIT | |
f881693c TV |
81 | |
82 | UNSPECV_RED_PART | |
738f2522 BS |
83 | ]) |
84 | ||
85 | (define_attr "subregs_ok" "false,true" | |
86 | (const_string "false")) | |
87 | ||
5012919d AM |
88 | (define_attr "atomic" "false,true" |
89 | (const_string "false")) | |
90 | ||
bd602b7f NS |
91 | ;; The nvptx operand predicates, in general, don't permit subregs and |
92 | ;; only literal constants, which differ from the generic ones, which | |
93 | ;; permit subregs and symbolc constants (as appropriate) | |
738f2522 | 94 | (define_predicate "nvptx_register_operand" |
bd5d4b65 | 95 | (match_code "reg") |
738f2522 | 96 | { |
738f2522 BS |
97 | return register_operand (op, mode); |
98 | }) | |
99 | ||
c2e0d0c1 TV |
100 | (define_predicate "nvptx_register_or_complex_di_df_register_operand" |
101 | (ior (match_code "reg") | |
102 | (match_code "concat")) | |
103 | { | |
104 | if (GET_CODE (op) == CONCAT) | |
105 | return ((GET_MODE (op) == DCmode || GET_MODE (op) == CDImode) | |
106 | && nvptx_register_operand (XEXP (op, 0), mode) | |
107 | && nvptx_register_operand (XEXP (op, 1), mode)); | |
108 | ||
109 | return nvptx_register_operand (op, mode); | |
110 | }) | |
111 | ||
a02d84b6 | 112 | (define_predicate "nvptx_nonimmediate_operand" |
bd5d4b65 | 113 | (match_code "mem,reg") |
738f2522 | 114 | { |
15113b03 NS |
115 | return (REG_P (op) ? register_operand (op, mode) |
116 | : memory_operand (op, mode)); | |
738f2522 BS |
117 | }) |
118 | ||
738f2522 | 119 | (define_predicate "nvptx_nonmemory_operand" |
bd5d4b65 | 120 | (match_code "reg,const_int,const_double") |
738f2522 | 121 | { |
15113b03 NS |
122 | return (REG_P (op) ? register_operand (op, mode) |
123 | : immediate_operand (op, mode)); | |
738f2522 BS |
124 | }) |
125 | ||
738f2522 BS |
126 | (define_predicate "const0_operand" |
127 | (and (match_code "const_int") | |
128 | (match_test "op == const0_rtx"))) | |
129 | ||
130 | ;; True if this operator is valid for predication. | |
131 | (define_predicate "predicate_operator" | |
132 | (match_code "eq,ne")) | |
133 | ||
134 | (define_predicate "ne_operator" | |
135 | (match_code "ne")) | |
136 | ||
137 | (define_predicate "nvptx_comparison_operator" | |
138 | (match_code "eq,ne,le,ge,lt,gt,leu,geu,ltu,gtu")) | |
139 | ||
140 | (define_predicate "nvptx_float_comparison_operator" | |
141 | (match_code "eq,ne,le,ge,lt,gt,uneq,unle,unge,unlt,ungt,unordered,ordered")) | |
142 | ||
8240f2f4 RS |
143 | (define_predicate "nvptx_vector_index_operand" |
144 | (and (match_code "const_int") | |
145 | (match_test "UINTVAL (op) < 4"))) | |
146 | ||
738f2522 | 147 | ;; Test for a valid operand for a call instruction. |
bd5d4b65 | 148 | (define_predicate "call_insn_operand" |
738f2522 BS |
149 | (match_code "symbol_ref,reg") |
150 | { | |
a02d84b6 | 151 | return REG_P (op) || SYMBOL_REF_FUNCTION_P (op); |
738f2522 BS |
152 | }) |
153 | ||
154 | ;; Return true if OP is a call with parallel USEs of the argument | |
155 | ;; pseudos. | |
156 | (define_predicate "call_operation" | |
157 | (match_code "parallel") | |
158 | { | |
f324806d | 159 | int arg_end = XVECLEN (op, 0); |
738f2522 | 160 | |
f324806d | 161 | for (int i = 1; i < arg_end; i++) |
738f2522 BS |
162 | { |
163 | rtx elt = XVECEXP (op, 0, i); | |
738f2522 | 164 | |
a02d84b6 | 165 | if (GET_CODE (elt) != USE || !REG_P (XEXP (elt, 0))) |
738f2522 BS |
166 | return false; |
167 | } | |
168 | return true; | |
169 | }) | |
170 | ||
37c3c297 TV |
171 | ;; Test for a function symbol ref operand |
172 | (define_predicate "symbol_ref_function_operand" | |
173 | (match_code "symbol_ref") | |
174 | { | |
175 | return SYMBOL_REF_FUNCTION_P (op); | |
176 | }) | |
177 | ||
3357878e TV |
178 | (define_attr "predicable" "no,yes" |
179 | (const_string "yes")) | |
5012919d AM |
180 | |
181 | (define_cond_exec | |
182 | [(match_operator 0 "predicate_operator" | |
183 | [(match_operand:BI 1 "nvptx_register_operand" "") | |
184 | (match_operand:BI 2 "const0_operand" "")])] | |
185 | "" | |
186 | "" | |
187 | ) | |
188 | ||
738f2522 BS |
189 | (define_constraint "P0" |
190 | "An integer with the value 0." | |
191 | (and (match_code "const_int") | |
192 | (match_test "ival == 0"))) | |
193 | ||
194 | (define_constraint "P1" | |
195 | "An integer with the value 1." | |
196 | (and (match_code "const_int") | |
197 | (match_test "ival == 1"))) | |
198 | ||
199 | (define_constraint "Pn" | |
200 | "An integer with the value -1." | |
201 | (and (match_code "const_int") | |
202 | (match_test "ival == -1"))) | |
203 | ||
204 | (define_constraint "R" | |
205 | "A pseudo register." | |
206 | (match_code "reg")) | |
207 | ||
208 | (define_constraint "Ia" | |
209 | "Any integer constant." | |
210 | (and (match_code "const_int") (match_test "true"))) | |
211 | ||
212 | (define_mode_iterator QHSDISDFM [QI HI SI DI SF DF]) | |
213 | (define_mode_iterator QHSDIM [QI HI SI DI]) | |
214 | (define_mode_iterator HSDIM [HI SI DI]) | |
215 | (define_mode_iterator BHSDIM [BI HI SI DI]) | |
216 | (define_mode_iterator SDIM [SI DI]) | |
217 | (define_mode_iterator SDISDFM [SI DI SF DF]) | |
218 | (define_mode_iterator QHIM [QI HI]) | |
219 | (define_mode_iterator QHSIM [QI HI SI]) | |
220 | (define_mode_iterator SDFM [SF DF]) | |
308d688b | 221 | (define_mode_iterator HSFM [HF SF]) |
738f2522 | 222 | (define_mode_iterator SDCM [SC DC]) |
d88cd9c4 NS |
223 | (define_mode_iterator BITS [SI SF]) |
224 | (define_mode_iterator BITD [DI DF]) | |
3717fbe3 | 225 | (define_mode_iterator VECIM [V2SI V2DI]) |
738f2522 BS |
226 | |
227 | ;; This mode iterator allows :P to be used for patterns that operate on | |
228 | ;; pointer-sized quantities. Exactly one of the two alternatives will match. | |
229 | (define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")]) | |
230 | ||
8240f2f4 RS |
231 | ;; Define element mode for each vector mode. |
232 | (define_mode_attr VECELEM [(V2SI "SI") (V2DI "DI")]) | |
233 | (define_mode_attr Vecelem [(V2SI "si") (V2DI "di")]) | |
234 | ||
738f2522 BS |
235 | ;; We should get away with not defining memory alternatives, since we don't |
236 | ;; get variables in this mode and pseudos are never spilled. | |
237 | (define_insn "movbi" | |
238 | [(set (match_operand:BI 0 "nvptx_register_operand" "=R,R,R") | |
beed3f8f | 239 | (match_operand:BI 1 "nvptx_nonmemory_operand" "R,P0,P1"))] |
738f2522 BS |
240 | "" |
241 | "@ | |
242 | %.\\tmov%t0\\t%0, %1; | |
243 | %.\\tsetp.eq.u32\\t%0, 1, 0; | |
244 | %.\\tsetp.eq.u32\\t%0, 1, 1;") | |
245 | ||
b98b34b7 TV |
246 | (define_insn "*mov<mode>_insn" |
247 | [(set (match_operand:VECIM 0 "nonimmediate_operand" "=R,R,m") | |
248 | (match_operand:VECIM 1 "general_operand" "Ri,m,R"))] | |
249 | "!MEM_P (operands[0]) || REG_P (operands[1])" | |
250 | { | |
251 | if (which_alternative == 1) | |
252 | return "%.\\tld%A1%u1\\t%0, %1;"; | |
253 | if (which_alternative == 2) | |
254 | return "%.\\tst%A0%u0\\t%0, %1;"; | |
255 | ||
256 | return nvptx_output_mov_insn (operands[0], operands[1]); | |
257 | } | |
258 | [(set_attr "subregs_ok" "true")]) | |
259 | ||
738f2522 | 260 | (define_insn "*mov<mode>_insn" |
15113b03 | 261 | [(set (match_operand:QHSDIM 0 "nonimmediate_operand" "=R,R,m") |
f313d112 | 262 | (match_operand:QHSDIM 1 "general_operand" "Ri,m,R"))] |
15113b03 | 263 | "!MEM_P (operands[0]) || REG_P (operands[1])" |
738f2522 | 264 | { |
f313d112 | 265 | if (which_alternative == 1) |
738f2522 | 266 | return "%.\\tld%A1%u1\\t%0, %1;"; |
f313d112 | 267 | if (which_alternative == 2) |
738f2522 BS |
268 | return "%.\\tst%A0%u0\\t%0, %1;"; |
269 | ||
f313d112 | 270 | return nvptx_output_mov_insn (operands[0], operands[1]); |
738f2522 BS |
271 | } |
272 | [(set_attr "subregs_ok" "true")]) | |
273 | ||
37c3c297 TV |
274 | ;; ptxas segfaults on 'mov.u64 %r24,bar+4096', so break it up. |
275 | (define_split | |
276 | [(set (match_operand:DI 0 "nvptx_register_operand") | |
277 | (const:DI (plus:DI (match_operand:DI 1 "symbol_ref_function_operand") | |
278 | (match_operand 2 "const_int_operand"))))] | |
279 | "" | |
280 | [(set (match_dup 0) (match_dup 1)) | |
281 | (set (match_dup 0) (plus:DI (match_dup 0) (match_dup 2))) | |
282 | ] | |
283 | "") | |
284 | ||
738f2522 | 285 | (define_insn "*mov<mode>_insn" |
15113b03 | 286 | [(set (match_operand:SDFM 0 "nonimmediate_operand" "=R,R,m") |
738f2522 | 287 | (match_operand:SDFM 1 "general_operand" "RF,m,R"))] |
f313d112 | 288 | "!MEM_P (operands[0]) || REG_P (operands[1])" |
738f2522 BS |
289 | { |
290 | if (which_alternative == 1) | |
291 | return "%.\\tld%A1%u0\\t%0, %1;"; | |
292 | if (which_alternative == 2) | |
293 | return "%.\\tst%A0%u1\\t%0, %1;"; | |
294 | ||
f313d112 | 295 | return nvptx_output_mov_insn (operands[0], operands[1]); |
738f2522 BS |
296 | } |
297 | [(set_attr "subregs_ok" "true")]) | |
298 | ||
aeedb00a RS |
299 | (define_insn "*movhf_insn" |
300 | [(set (match_operand:HF 0 "nonimmediate_operand" "=R,R,m") | |
301 | (match_operand:HF 1 "nonimmediate_operand" "R,m,R"))] | |
302 | "!MEM_P (operands[0]) || REG_P (operands[1])" | |
303 | "@ | |
304 | %.\\tmov.b16\\t%0, %1; | |
305 | %.\\tld.b16\\t%0, %1; | |
06770148 RS |
306 | %.\\tst.b16\\t%0, %1;" |
307 | [(set_attr "subregs_ok" "true")]) | |
aeedb00a RS |
308 | |
309 | (define_expand "movhf" | |
310 | [(set (match_operand:HF 0 "nonimmediate_operand" "") | |
311 | (match_operand:HF 1 "nonimmediate_operand" ""))] | |
312 | "" | |
313 | { | |
314 | /* Load HFmode constants as SFmode with an explicit FLOAT_TRUNCATE. */ | |
315 | if (CONST_DOUBLE_P (operands[1])) | |
316 | { | |
317 | rtx tmp1 = gen_reg_rtx (SFmode); | |
318 | REAL_VALUE_TYPE d = *CONST_DOUBLE_REAL_VALUE (operands[1]); | |
319 | real_convert (&d, SFmode, &d); | |
320 | emit_move_insn (tmp1, const_double_from_real_value (d, SFmode)); | |
321 | ||
322 | if (!REG_P (operands[0])) | |
323 | { | |
324 | rtx tmp2 = gen_reg_rtx (HFmode); | |
325 | emit_insn (gen_truncsfhf2 (tmp2, tmp1)); | |
326 | emit_move_insn (operands[0], tmp2); | |
327 | } | |
328 | else | |
329 | emit_insn (gen_truncsfhf2 (operands[0], tmp1)); | |
330 | DONE; | |
331 | } | |
332 | ||
333 | if (MEM_P (operands[0]) && !REG_P (operands[1])) | |
334 | { | |
335 | rtx tmp = gen_reg_rtx (HFmode); | |
336 | emit_move_insn (tmp, operands[1]); | |
337 | emit_move_insn (operands[0], tmp); | |
338 | DONE; | |
339 | } | |
340 | }) | |
341 | ||
738f2522 BS |
342 | (define_insn "load_arg_reg<mode>" |
343 | [(set (match_operand:QHIM 0 "nvptx_register_operand" "=R") | |
df1bdded | 344 | (unspec:QHIM [(match_operand 1 "const_int_operand" "n")] |
738f2522 BS |
345 | UNSPEC_ARG_REG))] |
346 | "" | |
347 | "%.\\tcvt%t0.u32\\t%0, %%ar%1;") | |
348 | ||
349 | (define_insn "load_arg_reg<mode>" | |
350 | [(set (match_operand:SDISDFM 0 "nvptx_register_operand" "=R") | |
df1bdded | 351 | (unspec:SDISDFM [(match_operand 1 "const_int_operand" "n")] |
738f2522 BS |
352 | UNSPEC_ARG_REG))] |
353 | "" | |
354 | "%.\\tmov%t0\\t%0, %%ar%1;") | |
355 | ||
b98b34b7 TV |
356 | (define_expand "mov<mode>" |
357 | [(set (match_operand:VECIM 0 "nonimmediate_operand" "") | |
358 | (match_operand:VECIM 1 "general_operand" ""))] | |
359 | "" | |
360 | { | |
361 | if (MEM_P (operands[0]) && !REG_P (operands[1])) | |
362 | { | |
363 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
364 | emit_move_insn (tmp, operands[1]); | |
365 | emit_move_insn (operands[0], tmp); | |
366 | DONE; | |
367 | } | |
368 | }) | |
369 | ||
738f2522 | 370 | (define_expand "mov<mode>" |
15113b03 | 371 | [(set (match_operand:QHSDISDFM 0 "nonimmediate_operand" "") |
738f2522 BS |
372 | (match_operand:QHSDISDFM 1 "general_operand" ""))] |
373 | "" | |
374 | { | |
bd602b7f | 375 | if (MEM_P (operands[0]) && !REG_P (operands[1])) |
738f2522 BS |
376 | { |
377 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
378 | emit_move_insn (tmp, operands[1]); | |
379 | emit_move_insn (operands[0], tmp); | |
380 | DONE; | |
381 | } | |
37f30285 TV |
382 | |
383 | if (GET_CODE (operands[1]) == LABEL_REF) | |
384 | sorry ("target cannot support label values"); | |
738f2522 BS |
385 | }) |
386 | ||
738f2522 BS |
387 | (define_insn "zero_extendqihi2" |
388 | [(set (match_operand:HI 0 "nvptx_register_operand" "=R,R") | |
a02d84b6 | 389 | (zero_extend:HI (match_operand:QI 1 "nvptx_nonimmediate_operand" "R,m")))] |
738f2522 BS |
390 | "" |
391 | "@ | |
392 | %.\\tcvt.u16.u%T1\\t%0, %1; | |
393 | %.\\tld%A1.u8\\t%0, %1;" | |
394 | [(set_attr "subregs_ok" "true")]) | |
395 | ||
396 | (define_insn "zero_extend<mode>si2" | |
397 | [(set (match_operand:SI 0 "nvptx_register_operand" "=R,R") | |
a02d84b6 | 398 | (zero_extend:SI (match_operand:QHIM 1 "nvptx_nonimmediate_operand" "R,m")))] |
738f2522 BS |
399 | "" |
400 | "@ | |
401 | %.\\tcvt.u32.u%T1\\t%0, %1; | |
402 | %.\\tld%A1.u%T1\\t%0, %1;" | |
403 | [(set_attr "subregs_ok" "true")]) | |
404 | ||
405 | (define_insn "zero_extend<mode>di2" | |
406 | [(set (match_operand:DI 0 "nvptx_register_operand" "=R,R") | |
a02d84b6 | 407 | (zero_extend:DI (match_operand:QHSIM 1 "nvptx_nonimmediate_operand" "R,m")))] |
738f2522 BS |
408 | "" |
409 | "@ | |
410 | %.\\tcvt.u64.u%T1\\t%0, %1; | |
411 | %.\\tld%A1%u1\\t%0, %1;" | |
412 | [(set_attr "subregs_ok" "true")]) | |
413 | ||
862a58ed TV |
414 | (define_insn "extendqihi2" |
415 | [(set (match_operand:HI 0 "nvptx_register_operand" "=R") | |
416 | (sign_extend:HI (match_operand:QI 1 "nvptx_register_operand" "R")))] | |
417 | "" | |
418 | "%.\\tcvt.s16.s8\\t%0, %1;" | |
419 | [(set_attr "subregs_ok" "true")]) | |
420 | ||
738f2522 BS |
421 | (define_insn "extend<mode>si2" |
422 | [(set (match_operand:SI 0 "nvptx_register_operand" "=R,R") | |
a02d84b6 | 423 | (sign_extend:SI (match_operand:QHIM 1 "nvptx_nonimmediate_operand" "R,m")))] |
738f2522 BS |
424 | "" |
425 | "@ | |
426 | %.\\tcvt.s32.s%T1\\t%0, %1; | |
427 | %.\\tld%A1.s%T1\\t%0, %1;" | |
428 | [(set_attr "subregs_ok" "true")]) | |
429 | ||
430 | (define_insn "extend<mode>di2" | |
431 | [(set (match_operand:DI 0 "nvptx_register_operand" "=R,R") | |
a02d84b6 | 432 | (sign_extend:DI (match_operand:QHSIM 1 "nvptx_nonimmediate_operand" "R,m")))] |
738f2522 BS |
433 | "" |
434 | "@ | |
435 | %.\\tcvt.s64.s%T1\\t%0, %1; | |
436 | %.\\tld%A1.s%T1\\t%0, %1;" | |
437 | [(set_attr "subregs_ok" "true")]) | |
438 | ||
439 | (define_insn "trunchiqi2" | |
a02d84b6 | 440 | [(set (match_operand:QI 0 "nvptx_nonimmediate_operand" "=R,m") |
738f2522 BS |
441 | (truncate:QI (match_operand:HI 1 "nvptx_register_operand" "R,R")))] |
442 | "" | |
443 | "@ | |
444 | %.\\tcvt%t0.u16\\t%0, %1; | |
445 | %.\\tst%A0.u8\\t%0, %1;" | |
446 | [(set_attr "subregs_ok" "true")]) | |
447 | ||
448 | (define_insn "truncsi<mode>2" | |
a02d84b6 | 449 | [(set (match_operand:QHIM 0 "nvptx_nonimmediate_operand" "=R,m") |
738f2522 BS |
450 | (truncate:QHIM (match_operand:SI 1 "nvptx_register_operand" "R,R")))] |
451 | "" | |
b3ec0de0 TV |
452 | { |
453 | if (which_alternative == 1) | |
454 | return "%.\\tst%A0.u%T0\\t%0, %1;"; | |
455 | if (GET_MODE (operands[0]) == QImode) | |
456 | return "%.\\tmov%t0\\t%0, %1;"; | |
457 | return "%.\\tcvt%t0.u32\\t%0, %1;"; | |
458 | } | |
738f2522 BS |
459 | [(set_attr "subregs_ok" "true")]) |
460 | ||
461 | (define_insn "truncdi<mode>2" | |
a02d84b6 | 462 | [(set (match_operand:QHSIM 0 "nvptx_nonimmediate_operand" "=R,m") |
738f2522 BS |
463 | (truncate:QHSIM (match_operand:DI 1 "nvptx_register_operand" "R,R")))] |
464 | "" | |
465 | "@ | |
466 | %.\\tcvt%t0.u64\\t%0, %1; | |
467 | %.\\tst%A0.u%T0\\t%0, %1;" | |
468 | [(set_attr "subregs_ok" "true")]) | |
469 | ||
6b49d50a RS |
470 | ;; Sign-extensions of truncations |
471 | ||
472 | (define_insn "*extend_trunc_<mode>2_qi" | |
473 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
474 | (sign_extend:HSDIM | |
475 | (truncate:QI (match_operand:HSDIM 1 "nvptx_register_operand" "R"))))] | |
476 | "" | |
477 | "%.\\tcvt.s%T0.s8\\t%0, %1;" | |
478 | [(set_attr "subregs_ok" "true")]) | |
479 | ||
480 | (define_insn "*extend_trunc_<mode>2_hi" | |
481 | [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") | |
482 | (sign_extend:SDIM | |
483 | (truncate:HI (match_operand:SDIM 1 "nvptx_register_operand" "R"))))] | |
484 | "" | |
485 | "%.\\tcvt.s%T0.s16\\t%0, %1;" | |
486 | [(set_attr "subregs_ok" "true")]) | |
487 | ||
488 | (define_insn "*extend_trunc_di2_si" | |
489 | [(set (match_operand:DI 0 "nvptx_register_operand" "=R") | |
490 | (sign_extend:DI | |
491 | (truncate:SI (match_operand:DI 1 "nvptx_register_operand" "R"))))] | |
492 | "" | |
493 | "%.\\tcvt.s64.s32\\t%0, %1;" | |
494 | [(set_attr "subregs_ok" "true")]) | |
495 | ||
738f2522 BS |
496 | ;; Integer arithmetic |
497 | ||
498 | (define_insn "add<mode>3" | |
499 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
500 | (plus:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") | |
501 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] | |
502 | "" | |
503 | "%.\\tadd%t0\\t%0, %1, %2;") | |
504 | ||
e6f32337 RS |
505 | (define_insn "*vadd_addsi4" |
506 | [(set (match_operand:SI 0 "nvptx_register_operand" "=R") | |
507 | (plus:SI (plus:SI (match_operand:SI 1 "nvptx_register_operand" "R") | |
508 | (match_operand:SI 2 "nvptx_register_operand" "R")) | |
509 | (match_operand:SI 3 "nvptx_register_operand" "R")))] | |
510 | "" | |
511 | "%.\\tvadd%t0%t1%t2.add\\t%0, %1, %2, %3;") | |
512 | ||
513 | (define_insn "*vsub_addsi4" | |
514 | [(set (match_operand:SI 0 "nvptx_register_operand" "=R") | |
515 | (plus:SI (minus:SI (match_operand:SI 1 "nvptx_register_operand" "R") | |
516 | (match_operand:SI 2 "nvptx_register_operand" "R")) | |
517 | (match_operand:SI 3 "nvptx_register_operand" "R")))] | |
518 | "" | |
519 | "%.\\tvsub%t0%t1%t2.add\\t%0, %1, %2, %3;") | |
520 | ||
738f2522 BS |
521 | (define_insn "sub<mode>3" |
522 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
523 | (minus:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") | |
524 | (match_operand:HSDIM 2 "nvptx_register_operand" "R")))] | |
525 | "" | |
5b2d679b TV |
526 | { |
527 | if (GET_MODE (operands[0]) == HImode) | |
528 | /* Workaround https://developer.nvidia.com/nvidia_bug/3527713. | |
529 | See PR97005. */ | |
530 | return "%.\\tsub.s16\\t%0, %1, %2;"; | |
531 | ||
532 | return "%.\\tsub%t0\\t%0, %1, %2;"; | |
533 | }) | |
738f2522 BS |
534 | |
535 | (define_insn "mul<mode>3" | |
536 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
537 | (mult:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") | |
538 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] | |
539 | "" | |
540 | "%.\\tmul.lo%t0\\t%0, %1, %2;") | |
541 | ||
542 | (define_insn "*mad<mode>3" | |
543 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
544 | (plus:HSDIM (mult:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") | |
545 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")) | |
546 | (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))] | |
547 | "" | |
548 | "%.\\tmad.lo%t0\\t%0, %1, %2, %3;") | |
549 | ||
550 | (define_insn "div<mode>3" | |
551 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
552 | (div:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") | |
553 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] | |
554 | "" | |
555 | "%.\\tdiv.s%T0\\t%0, %1, %2;") | |
556 | ||
557 | (define_insn "udiv<mode>3" | |
558 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
559 | (udiv:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") | |
560 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] | |
561 | "" | |
562 | "%.\\tdiv.u%T0\\t%0, %1, %2;") | |
563 | ||
564 | (define_insn "mod<mode>3" | |
565 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
566 | (mod:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "Ri") | |
567 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] | |
568 | "" | |
569 | "%.\\trem.s%T0\\t%0, %1, %2;") | |
570 | ||
571 | (define_insn "umod<mode>3" | |
572 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
573 | (umod:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "Ri") | |
574 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] | |
575 | "" | |
576 | "%.\\trem.u%T0\\t%0, %1, %2;") | |
577 | ||
578 | (define_insn "smin<mode>3" | |
579 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
580 | (smin:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") | |
581 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] | |
582 | "" | |
583 | "%.\\tmin.s%T0\\t%0, %1, %2;") | |
584 | ||
585 | (define_insn "umin<mode>3" | |
586 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
587 | (umin:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") | |
588 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] | |
589 | "" | |
590 | "%.\\tmin.u%T0\\t%0, %1, %2;") | |
591 | ||
592 | (define_insn "smax<mode>3" | |
593 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
594 | (smax:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") | |
595 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] | |
596 | "" | |
597 | "%.\\tmax.s%T0\\t%0, %1, %2;") | |
598 | ||
599 | (define_insn "umax<mode>3" | |
600 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
601 | (umax:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") | |
602 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] | |
603 | "" | |
604 | "%.\\tmax.u%T0\\t%0, %1, %2;") | |
605 | ||
606 | (define_insn "abs<mode>2" | |
607 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
608 | (abs:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")))] | |
609 | "" | |
610 | "%.\\tabs.s%T0\\t%0, %1;") | |
611 | ||
612 | (define_insn "neg<mode>2" | |
613 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
614 | (neg:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")))] | |
615 | "" | |
616 | "%.\\tneg.s%T0\\t%0, %1;") | |
617 | ||
618 | (define_insn "one_cmpl<mode>2" | |
619 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
620 | (not:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")))] | |
621 | "" | |
622 | "%.\\tnot.b%T0\\t%0, %1;") | |
623 | ||
26d7b8f9 RS |
624 | (define_insn "one_cmplbi2" |
625 | [(set (match_operand:BI 0 "nvptx_register_operand" "=R") | |
626 | (not:BI (match_operand:BI 1 "nvptx_register_operand" "R")))] | |
627 | "" | |
628 | "%.\\tnot.pred\\t%0, %1;") | |
629 | ||
659f8161 RS |
630 | (define_insn "*cnot<mode>2" |
631 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
632 | (eq:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") | |
633 | (const_int 0)))] | |
634 | "" | |
635 | "%.\\tcnot.b%T0\\t%0, %1;") | |
636 | ||
738f2522 BS |
637 | (define_insn "bitrev<mode>2" |
638 | [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") | |
639 | (unspec:SDIM [(match_operand:SDIM 1 "nvptx_register_operand" "R")] | |
640 | UNSPEC_BITREV))] | |
641 | "" | |
642 | "%.\\tbrev.b%T0\\t%0, %1;") | |
643 | ||
644 | (define_insn "clz<mode>2" | |
645 | [(set (match_operand:SI 0 "nvptx_register_operand" "=R") | |
646 | (clz:SI (match_operand:SDIM 1 "nvptx_register_operand" "R")))] | |
647 | "" | |
ad16d073 | 648 | "%.\\tclz.b%T1\\t%0, %1;") |
738f2522 BS |
649 | |
650 | (define_expand "ctz<mode>2" | |
651 | [(set (match_operand:SI 0 "nvptx_register_operand" "") | |
652 | (ctz:SI (match_operand:SDIM 1 "nvptx_register_operand" "")))] | |
653 | "" | |
654 | { | |
655 | rtx tmpreg = gen_reg_rtx (<MODE>mode); | |
656 | emit_insn (gen_bitrev<mode>2 (tmpreg, operands[1])); | |
657 | emit_insn (gen_clz<mode>2 (operands[0], tmpreg)); | |
658 | DONE; | |
659 | }) | |
660 | ||
ce0f8424 RS |
661 | (define_insn "popcount<mode>2" |
662 | [(set (match_operand:SI 0 "nvptx_register_operand" "=R") | |
663 | (popcount:SI (match_operand:SDIM 1 "nvptx_register_operand" "R")))] | |
664 | "" | |
665 | "%.\\tpopc.b%T1\\t%0, %1;") | |
666 | ||
667 | ;; Multiplication variants | |
668 | ||
669 | (define_insn "mulhisi3" | |
670 | [(set (match_operand:SI 0 "nvptx_register_operand" "=R") | |
671 | (mult:SI (sign_extend:SI | |
672 | (match_operand:HI 1 "nvptx_register_operand" "R")) | |
673 | (sign_extend:SI | |
674 | (match_operand:HI 2 "nvptx_register_operand" "R"))))] | |
675 | "" | |
676 | "%.\\tmul.wide.s16\\t%0, %1, %2;") | |
677 | ||
678 | (define_insn "mulsidi3" | |
679 | [(set (match_operand:DI 0 "nvptx_register_operand" "=R") | |
680 | (mult:DI (sign_extend:DI | |
681 | (match_operand:SI 1 "nvptx_register_operand" "R")) | |
682 | (sign_extend:DI | |
683 | (match_operand:SI 2 "nvptx_register_operand" "R"))))] | |
684 | "" | |
685 | "%.\\tmul.wide.s32\\t%0, %1, %2;") | |
686 | ||
687 | (define_insn "umulhisi3" | |
688 | [(set (match_operand:SI 0 "nvptx_register_operand" "=R") | |
689 | (mult:SI (zero_extend:SI | |
690 | (match_operand:HI 1 "nvptx_register_operand" "R")) | |
691 | (zero_extend:SI | |
692 | (match_operand:HI 2 "nvptx_register_operand" "R"))))] | |
693 | "" | |
694 | "%.\\tmul.wide.u16\\t%0, %1, %2;") | |
695 | ||
696 | (define_insn "umulsidi3" | |
697 | [(set (match_operand:DI 0 "nvptx_register_operand" "=R") | |
698 | (mult:DI (zero_extend:DI | |
699 | (match_operand:SI 1 "nvptx_register_operand" "R")) | |
700 | (zero_extend:DI | |
701 | (match_operand:SI 2 "nvptx_register_operand" "R"))))] | |
702 | "" | |
703 | "%.\\tmul.wide.u32\\t%0, %1, %2;") | |
704 | ||
26d7b8f9 RS |
705 | (define_expand "mulditi3" |
706 | [(set (match_operand:TI 0 "nvptx_register_operand") | |
707 | (mult:TI (sign_extend:TI | |
708 | (match_operand:DI 1 "nvptx_register_operand")) | |
709 | (sign_extend:DI | |
710 | (match_operand:DI 2 "nvptx_nonmemory_operand"))))] | |
711 | "" | |
712 | { | |
713 | rtx hi = gen_reg_rtx (DImode); | |
714 | rtx lo = gen_reg_rtx (DImode); | |
715 | emit_insn (gen_smuldi3_highpart (hi, operands[1], operands[2])); | |
716 | emit_insn (gen_muldi3 (lo, operands[1], operands[2])); | |
717 | emit_move_insn (gen_highpart (DImode, operands[0]), hi); | |
718 | emit_move_insn (gen_lowpart (DImode, operands[0]), lo); | |
719 | DONE; | |
720 | }) | |
721 | ||
722 | (define_expand "umulditi3" | |
723 | [(set (match_operand:TI 0 "nvptx_register_operand") | |
724 | (mult:TI (zero_extend:TI | |
725 | (match_operand:DI 1 "nvptx_register_operand")) | |
726 | (zero_extend:DI | |
727 | (match_operand:DI 2 "nvptx_nonmemory_operand"))))] | |
728 | "" | |
729 | { | |
730 | rtx hi = gen_reg_rtx (DImode); | |
731 | rtx lo = gen_reg_rtx (DImode); | |
732 | emit_insn (gen_umuldi3_highpart (hi, operands[1], operands[2])); | |
733 | emit_insn (gen_muldi3 (lo, operands[1], operands[2])); | |
734 | emit_move_insn (gen_highpart (DImode, operands[0]), hi); | |
735 | emit_move_insn (gen_lowpart (DImode, operands[0]), lo); | |
736 | DONE; | |
737 | }) | |
738 | ||
739 | (define_insn "smul<mode>3_highpart" | |
740 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
741 | (smul_highpart:HSDIM | |
742 | (match_operand:HSDIM 1 "nvptx_register_operand" "R") | |
743 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] | |
744 | "" | |
745 | "%.\\tmul.hi.s%T0\\t%0, %1, %2;") | |
746 | ||
747 | (define_insn "umul<mode>3_highpart" | |
748 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
749 | (umul_highpart:HSDIM | |
750 | (match_operand:HSDIM 1 "nvptx_register_operand" "R") | |
751 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] | |
752 | "" | |
753 | "%.\\tmul.hi.u%T0\\t%0, %1, %2;") | |
754 | ||
755 | (define_insn "*smulhi3_highpart_2" | |
37a4c06f RS |
756 | [(set (match_operand:HI 0 "nvptx_register_operand" "=R") |
757 | (truncate:HI | |
758 | (lshiftrt:SI | |
759 | (mult:SI (sign_extend:SI | |
760 | (match_operand:HI 1 "nvptx_register_operand" "R")) | |
761 | (sign_extend:SI | |
762 | (match_operand:HI 2 "nvptx_register_operand" "R"))) | |
763 | (const_int 16))))] | |
764 | "" | |
765 | "%.\\tmul.hi.s16\\t%0, %1, %2;") | |
766 | ||
26d7b8f9 | 767 | (define_insn "*smulsi3_highpart_2" |
37a4c06f RS |
768 | [(set (match_operand:SI 0 "nvptx_register_operand" "=R") |
769 | (truncate:SI | |
770 | (lshiftrt:DI | |
771 | (mult:DI (sign_extend:DI | |
772 | (match_operand:SI 1 "nvptx_register_operand" "R")) | |
773 | (sign_extend:DI | |
774 | (match_operand:SI 2 "nvptx_register_operand" "R"))) | |
775 | (const_int 32))))] | |
776 | "" | |
777 | "%.\\tmul.hi.s32\\t%0, %1, %2;") | |
778 | ||
26d7b8f9 | 779 | (define_insn "*umulhi3_highpart_2" |
37a4c06f RS |
780 | [(set (match_operand:HI 0 "nvptx_register_operand" "=R") |
781 | (truncate:HI | |
782 | (lshiftrt:SI | |
783 | (mult:SI (zero_extend:SI | |
784 | (match_operand:HI 1 "nvptx_register_operand" "R")) | |
785 | (zero_extend:SI | |
786 | (match_operand:HI 2 "nvptx_register_operand" "R"))) | |
787 | (const_int 16))))] | |
788 | "" | |
789 | "%.\\tmul.hi.u16\\t%0, %1, %2;") | |
790 | ||
26d7b8f9 | 791 | (define_insn "*umulsi3_highpart_2" |
37a4c06f RS |
792 | [(set (match_operand:SI 0 "nvptx_register_operand" "=R") |
793 | (truncate:SI | |
794 | (lshiftrt:DI | |
795 | (mult:DI (zero_extend:DI | |
796 | (match_operand:SI 1 "nvptx_register_operand" "R")) | |
797 | (zero_extend:DI | |
798 | (match_operand:SI 2 "nvptx_register_operand" "R"))) | |
799 | (const_int 32))))] | |
800 | "" | |
801 | "%.\\tmul.hi.u32\\t%0, %1, %2;") | |
802 | ||
738f2522 BS |
803 | ;; Shifts |
804 | ||
805 | (define_insn "ashl<mode>3" | |
862a58ed TV |
806 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") |
807 | (ashift:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") | |
808 | (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")))] | |
738f2522 BS |
809 | "" |
810 | "%.\\tshl.b%T0\\t%0, %1, %2;") | |
811 | ||
812 | (define_insn "ashr<mode>3" | |
862a58ed TV |
813 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") |
814 | (ashiftrt:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") | |
815 | (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")))] | |
738f2522 BS |
816 | "" |
817 | "%.\\tshr.s%T0\\t%0, %1, %2;") | |
818 | ||
819 | (define_insn "lshr<mode>3" | |
862a58ed TV |
820 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") |
821 | (lshiftrt:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") | |
822 | (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")))] | |
738f2522 BS |
823 | "" |
824 | "%.\\tshr.u%T0\\t%0, %1, %2;") | |
825 | ||
c982d02f TV |
826 | (define_insn "rotlsi3" |
827 | [(set (match_operand:SI 0 "nvptx_register_operand" "=R") | |
828 | (rotate:SI (match_operand:SI 1 "nvptx_register_operand" "R") | |
829 | (and:SI (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri") | |
830 | (const_int 31))))] | |
831 | "TARGET_SM35" | |
832 | "%.\\tshf.l.wrap.b32\\t%0, %1, %1, %2;") | |
833 | ||
834 | (define_insn "rotrsi3" | |
835 | [(set (match_operand:SI 0 "nvptx_register_operand" "=R") | |
836 | (rotatert:SI (match_operand:SI 1 "nvptx_register_operand" "R") | |
837 | (and:SI (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri") | |
838 | (const_int 31))))] | |
839 | "TARGET_SM35" | |
840 | "%.\\tshf.r.wrap.b32\\t%0, %1, %1, %2;") | |
841 | ||
738f2522 BS |
842 | ;; Logical operations |
843 | ||
f68c3de7 RS |
844 | (define_code_iterator any_logic [and ior xor]) |
845 | (define_code_attr logic [(and "and") (ior "or") (xor "xor")]) | |
846 | (define_code_attr ilogic [(and "and") (ior "ior") (xor "xor")]) | |
738f2522 | 847 | |
f68c3de7 RS |
848 | (define_insn "<ilogic><mode>3" |
849 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
850 | (any_logic:HSDIM | |
851 | (match_operand:HSDIM 1 "nvptx_register_operand" "R") | |
852 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] | |
738f2522 | 853 | "" |
f68c3de7 | 854 | "%.\\t<logic>.b%T0\\t%0, %1, %2;") |
738f2522 | 855 | |
f68c3de7 RS |
856 | (define_insn "<ilogic>bi3" |
857 | [(set (match_operand:BI 0 "nvptx_register_operand" "=R") | |
858 | (any_logic:BI (match_operand:BI 1 "nvptx_register_operand" "R") | |
859 | (match_operand:BI 2 "nvptx_register_operand" "R")))] | |
738f2522 | 860 | "" |
f68c3de7 RS |
861 | "%.\\t<logic>.pred\\t%0, %1, %2;") |
862 | ||
863 | (define_split | |
864 | [(set (match_operand:HSDIM 0 "nvptx_register_operand") | |
865 | (any_logic:HSDIM | |
866 | (ne:HSDIM (match_operand:BI 1 "nvptx_register_operand") | |
867 | (const_int 0)) | |
868 | (ne:HSDIM (match_operand:BI 2 "nvptx_register_operand") | |
869 | (const_int 0))))] | |
870 | "can_create_pseudo_p ()" | |
871 | [(set (match_dup 3) (any_logic:BI (match_dup 1) (match_dup 2))) | |
872 | (set (match_dup 0) (ne:HSDIM (match_dup 3) (const_int 0)))] | |
873 | { | |
874 | operands[3] = gen_reg_rtx (BImode); | |
875 | }) | |
738f2522 BS |
876 | |
877 | ;; Comparisons and branches | |
878 | ||
de12b919 | 879 | (define_insn "cmp<mode>" |
738f2522 BS |
880 | [(set (match_operand:BI 0 "nvptx_register_operand" "=R") |
881 | (match_operator:BI 1 "nvptx_comparison_operator" | |
882 | [(match_operand:HSDIM 2 "nvptx_register_operand" "R") | |
883 | (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))] | |
884 | "" | |
f324806d | 885 | "%.\\tsetp%c1\\t%0, %2, %3;") |
738f2522 BS |
886 | |
887 | (define_insn "*cmp<mode>" | |
888 | [(set (match_operand:BI 0 "nvptx_register_operand" "=R") | |
889 | (match_operator:BI 1 "nvptx_float_comparison_operator" | |
890 | [(match_operand:SDFM 2 "nvptx_register_operand" "R") | |
891 | (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))] | |
892 | "" | |
f324806d | 893 | "%.\\tsetp%c1\\t%0, %2, %3;") |
738f2522 | 894 | |
91a7e1da RS |
895 | (define_insn "*cmphf" |
896 | [(set (match_operand:BI 0 "nvptx_register_operand" "=R") | |
897 | (match_operator:BI 1 "nvptx_float_comparison_operator" | |
898 | [(match_operand:HF 2 "nvptx_register_operand" "R") | |
899 | (match_operand:HF 3 "nvptx_nonmemory_operand" "RF")]))] | |
900 | "TARGET_SM53" | |
901 | "%.\\tsetp%c1\\t%0, %2, %3;") | |
902 | ||
738f2522 BS |
903 | (define_insn "jump" |
904 | [(set (pc) | |
905 | (label_ref (match_operand 0 "" "")))] | |
906 | "" | |
907 | "%.\\tbra\\t%l0;") | |
908 | ||
909 | (define_insn "br_true" | |
910 | [(set (pc) | |
911 | (if_then_else (ne (match_operand:BI 0 "nvptx_register_operand" "R") | |
912 | (const_int 0)) | |
913 | (label_ref (match_operand 1 "" "")) | |
914 | (pc)))] | |
915 | "" | |
5012919d | 916 | "%j0\\tbra\\t%l1;" |
3357878e | 917 | [(set_attr "predicable" "no")]) |
738f2522 BS |
918 | |
919 | (define_insn "br_false" | |
920 | [(set (pc) | |
921 | (if_then_else (eq (match_operand:BI 0 "nvptx_register_operand" "R") | |
922 | (const_int 0)) | |
923 | (label_ref (match_operand 1 "" "")) | |
924 | (pc)))] | |
925 | "" | |
5012919d | 926 | "%J0\\tbra\\t%l1;" |
3357878e | 927 | [(set_attr "predicable" "no")]) |
738f2522 | 928 | |
d88cd9c4 NS |
929 | ;; unified conditional branch |
930 | (define_insn "br_true_uni" | |
931 | [(set (pc) (if_then_else | |
932 | (ne (unspec:BI [(match_operand:BI 0 "nvptx_register_operand" "R")] | |
933 | UNSPEC_BR_UNIFIED) (const_int 0)) | |
934 | (label_ref (match_operand 1 "" "")) (pc)))] | |
935 | "" | |
5012919d | 936 | "%j0\\tbra.uni\\t%l1;" |
3357878e | 937 | [(set_attr "predicable" "no")]) |
d88cd9c4 NS |
938 | |
939 | (define_insn "br_false_uni" | |
940 | [(set (pc) (if_then_else | |
941 | (eq (unspec:BI [(match_operand:BI 0 "nvptx_register_operand" "R")] | |
942 | UNSPEC_BR_UNIFIED) (const_int 0)) | |
943 | (label_ref (match_operand 1 "" "")) (pc)))] | |
944 | "" | |
5012919d | 945 | "%J0\\tbra.uni\\t%l1;" |
3357878e | 946 | [(set_attr "predicable" "no")]) |
d88cd9c4 | 947 | |
738f2522 BS |
948 | (define_expand "cbranch<mode>4" |
949 | [(set (pc) | |
950 | (if_then_else (match_operator 0 "nvptx_comparison_operator" | |
951 | [(match_operand:HSDIM 1 "nvptx_register_operand" "") | |
bc4ec543 | 952 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "")]) |
738f2522 BS |
953 | (label_ref (match_operand 3 "" "")) |
954 | (pc)))] | |
955 | "" | |
956 | { | |
957 | rtx t = nvptx_expand_compare (operands[0]); | |
958 | operands[0] = t; | |
959 | operands[1] = XEXP (t, 0); | |
960 | operands[2] = XEXP (t, 1); | |
961 | }) | |
962 | ||
963 | (define_expand "cbranch<mode>4" | |
964 | [(set (pc) | |
965 | (if_then_else (match_operator 0 "nvptx_float_comparison_operator" | |
966 | [(match_operand:SDFM 1 "nvptx_register_operand" "") | |
bc4ec543 | 967 | (match_operand:SDFM 2 "nvptx_nonmemory_operand" "")]) |
738f2522 BS |
968 | (label_ref (match_operand 3 "" "")) |
969 | (pc)))] | |
970 | "" | |
971 | { | |
972 | rtx t = nvptx_expand_compare (operands[0]); | |
973 | operands[0] = t; | |
974 | operands[1] = XEXP (t, 0); | |
975 | operands[2] = XEXP (t, 1); | |
976 | }) | |
977 | ||
978 | (define_expand "cbranchbi4" | |
979 | [(set (pc) | |
980 | (if_then_else (match_operator 0 "predicate_operator" | |
981 | [(match_operand:BI 1 "nvptx_register_operand" "") | |
982 | (match_operand:BI 2 "const0_operand" "")]) | |
983 | (label_ref (match_operand 3 "" "")) | |
984 | (pc)))] | |
985 | "" | |
986 | "") | |
987 | ||
988 | ;; Conditional stores | |
989 | ||
beed3f8f | 990 | (define_insn "setcc<mode>_from_bi" |
de12b919 RS |
991 | [(set (match_operand:QHSDIM 0 "nvptx_register_operand" "=R") |
992 | (ne:QHSDIM (match_operand:BI 1 "nvptx_register_operand" "R") | |
beed3f8f RS |
993 | (const_int 0)))] |
994 | "" | |
995 | "%.\\tselp%t0\\t%0, 1, 0, %1;") | |
996 | ||
26d7b8f9 RS |
997 | (define_insn "*setcc<mode>_from_not_bi" |
998 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
999 | (eq:HSDIM (match_operand:BI 1 "nvptx_register_operand" "R") | |
1000 | (const_int 0)))] | |
1001 | "" | |
1002 | "%.\\tselp%t0\\t%0, 0, 1, %1;") | |
1003 | ||
beed3f8f | 1004 | (define_insn "extendbi<mode>2" |
de12b919 RS |
1005 | [(set (match_operand:QHSDIM 0 "nvptx_register_operand" "=R") |
1006 | (sign_extend:QHSDIM | |
beed3f8f RS |
1007 | (match_operand:BI 1 "nvptx_register_operand" "R")))] |
1008 | "" | |
1009 | "%.\\tselp%t0\\t%0, -1, 0, %1;") | |
1010 | ||
1011 | (define_insn "zero_extendbi<mode>2" | |
de12b919 RS |
1012 | [(set (match_operand:QHSDIM 0 "nvptx_register_operand" "=R") |
1013 | (zero_extend:QHSDIM | |
beed3f8f | 1014 | (match_operand:BI 1 "nvptx_register_operand" "R")))] |
738f2522 | 1015 | "" |
beed3f8f | 1016 | "%.\\tselp%t0\\t%0, 1, 0, %1;") |
738f2522 | 1017 | |
224b491b NS |
1018 | (define_insn "sel_true<mode>" |
1019 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
9bacd7af | 1020 | (if_then_else:HSDIM |
224b491b NS |
1021 | (ne (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0)) |
1022 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri") | |
1023 | (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))] | |
1024 | "" | |
1025 | "%.\\tselp%t0\\t%0, %2, %3, %1;") | |
1026 | ||
1027 | (define_insn "sel_true<mode>" | |
1028 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
9bacd7af | 1029 | (if_then_else:SDFM |
224b491b NS |
1030 | (ne (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0)) |
1031 | (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF") | |
1032 | (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))] | |
1033 | "" | |
1034 | "%.\\tselp%t0\\t%0, %2, %3, %1;") | |
1035 | ||
1036 | (define_insn "sel_false<mode>" | |
1037 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
9bacd7af | 1038 | (if_then_else:HSDIM |
224b491b NS |
1039 | (eq (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0)) |
1040 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri") | |
1041 | (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))] | |
1042 | "" | |
1043 | "%.\\tselp%t0\\t%0, %3, %2, %1;") | |
1044 | ||
1045 | (define_insn "sel_false<mode>" | |
1046 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
9bacd7af | 1047 | (if_then_else:SDFM |
224b491b NS |
1048 | (eq (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0)) |
1049 | (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF") | |
1050 | (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))] | |
1051 | "" | |
1052 | "%.\\tselp%t0\\t%0, %3, %2, %1;") | |
1053 | ||
9bacd7af RS |
1054 | (define_code_iterator eqne [eq ne]) |
1055 | ||
1056 | ;; Split negation of a predicate into a conditional move. | |
1057 | (define_insn_and_split "*selp<mode>_neg_<code>" | |
1058 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
1059 | (neg:HSDIM (eqne:HSDIM | |
1060 | (match_operand:BI 1 "nvptx_register_operand" "R") | |
1061 | (const_int 0))))] | |
1062 | "" | |
1063 | "#" | |
1064 | "&& 1" | |
1065 | [(set (match_dup 0) | |
1066 | (if_then_else:HSDIM | |
1067 | (eqne (match_dup 1) (const_int 0)) | |
1068 | (const_int -1) | |
1069 | (const_int 0)))]) | |
1070 | ||
1071 | ;; Split bitwise not of a predicate into a conditional move. | |
1072 | (define_insn_and_split "*selp<mode>_not_<code>" | |
1073 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
1074 | (not:HSDIM (eqne:HSDIM | |
1075 | (match_operand:BI 1 "nvptx_register_operand" "R") | |
1076 | (const_int 0))))] | |
1077 | "" | |
1078 | "#" | |
1079 | "&& 1" | |
1080 | [(set (match_dup 0) | |
1081 | (if_then_else:HSDIM | |
1082 | (eqne (match_dup 1) (const_int 0)) | |
1083 | (const_int -2) | |
1084 | (const_int -1)))]) | |
1085 | ||
1086 | (define_insn "*setcc_int<mode>" | |
1087 | [(set (match_operand:SI 0 "nvptx_register_operand" "=R") | |
1088 | (neg:SI | |
1089 | (match_operator:SI 1 "nvptx_comparison_operator" | |
1090 | [(match_operand:HSDIM 2 "nvptx_register_operand" "R") | |
1091 | (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")])))] | |
1092 | "" | |
1093 | "%.\\tset%t0%c1\\t%0, %2, %3;") | |
1094 | ||
1095 | (define_insn "*setcc_int<mode>" | |
1096 | [(set (match_operand:SI 0 "nvptx_register_operand" "=R") | |
1097 | (neg:SI | |
1098 | (match_operator:SI 1 "nvptx_float_comparison_operator" | |
1099 | [(match_operand:SDFM 2 "nvptx_register_operand" "R") | |
1100 | (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")])))] | |
1101 | "" | |
1102 | "%.\\tset%t0%c1\\t%0, %2, %3;") | |
1103 | ||
738f2522 BS |
1104 | (define_insn "setcc_float<mode>" |
1105 | [(set (match_operand:SF 0 "nvptx_register_operand" "=R") | |
1106 | (match_operator:SF 1 "nvptx_comparison_operator" | |
224b491b NS |
1107 | [(match_operand:HSDIM 2 "nvptx_register_operand" "R") |
1108 | (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))] | |
738f2522 | 1109 | "" |
f324806d | 1110 | "%.\\tset%t0%c1\\t%0, %2, %3;") |
738f2522 BS |
1111 | |
1112 | (define_insn "setcc_float<mode>" | |
1113 | [(set (match_operand:SF 0 "nvptx_register_operand" "=R") | |
1114 | (match_operator:SF 1 "nvptx_float_comparison_operator" | |
224b491b NS |
1115 | [(match_operand:SDFM 2 "nvptx_register_operand" "R") |
1116 | (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))] | |
738f2522 | 1117 | "" |
f324806d | 1118 | "%.\\tset%t0%c1\\t%0, %2, %3;") |
738f2522 | 1119 | |
738f2522 BS |
1120 | (define_expand "cstore<mode>4" |
1121 | [(set (match_operand:SI 0 "nvptx_register_operand") | |
1122 | (match_operator:SI 1 "nvptx_comparison_operator" | |
beed3f8f RS |
1123 | [(match_operand:HSDIM 2 "nvptx_register_operand") |
1124 | (match_operand:HSDIM 3 "nvptx_nonmemory_operand")]))] | |
738f2522 | 1125 | "" |
beed3f8f RS |
1126 | { |
1127 | rtx reg = gen_reg_rtx (BImode); | |
1128 | rtx cmp = gen_rtx_fmt_ee (GET_CODE (operands[1]), BImode, | |
1129 | operands[2], operands[3]); | |
1130 | emit_move_insn (reg, cmp); | |
1131 | emit_insn (gen_setccsi_from_bi (operands[0], reg)); | |
1132 | DONE; | |
1133 | }) | |
738f2522 BS |
1134 | |
1135 | (define_expand "cstore<mode>4" | |
1136 | [(set (match_operand:SI 0 "nvptx_register_operand") | |
1137 | (match_operator:SI 1 "nvptx_float_comparison_operator" | |
beed3f8f RS |
1138 | [(match_operand:SDFM 2 "nvptx_register_operand") |
1139 | (match_operand:SDFM 3 "nvptx_nonmemory_operand")]))] | |
738f2522 | 1140 | "" |
beed3f8f RS |
1141 | { |
1142 | rtx reg = gen_reg_rtx (BImode); | |
1143 | rtx cmp = gen_rtx_fmt_ee (GET_CODE (operands[1]), BImode, | |
1144 | operands[2], operands[3]); | |
1145 | emit_move_insn (reg, cmp); | |
1146 | emit_insn (gen_setccsi_from_bi (operands[0], reg)); | |
1147 | DONE; | |
1148 | }) | |
738f2522 | 1149 | |
91a7e1da RS |
1150 | (define_expand "cstorehf4" |
1151 | [(set (match_operand:SI 0 "nvptx_register_operand") | |
1152 | (match_operator:SI 1 "nvptx_float_comparison_operator" | |
1153 | [(match_operand:HF 2 "nvptx_register_operand") | |
1154 | (match_operand:HF 3 "nvptx_nonmemory_operand")]))] | |
1155 | "TARGET_SM53" | |
1156 | { | |
1157 | rtx reg = gen_reg_rtx (BImode); | |
1158 | rtx cmp = gen_rtx_fmt_ee (GET_CODE (operands[1]), BImode, | |
1159 | operands[2], operands[3]); | |
1160 | emit_move_insn (reg, cmp); | |
1161 | emit_insn (gen_setccsi_from_bi (operands[0], reg)); | |
1162 | DONE; | |
1163 | }) | |
1164 | ||
738f2522 BS |
1165 | ;; Calls |
1166 | ||
bbd54be5 | 1167 | (define_insn "call_insn_<mode>" |
738f2522 | 1168 | [(match_parallel 2 "call_operation" |
bbd54be5 | 1169 | [(call (mem:QI (match_operand:P 0 "call_insn_operand" "Rs")) |
738f2522 BS |
1170 | (match_operand 1))])] |
1171 | "" | |
1172 | { | |
1173 | return nvptx_output_call_insn (insn, NULL_RTX, operands[0]); | |
1174 | }) | |
1175 | ||
bbd54be5 | 1176 | (define_insn "call_value_insn_<mode>" |
738f2522 BS |
1177 | [(match_parallel 3 "call_operation" |
1178 | [(set (match_operand 0 "nvptx_register_operand" "=R") | |
bbd54be5 | 1179 | (call (mem:QI (match_operand:P 1 "call_insn_operand" "Rs")) |
738f2522 BS |
1180 | (match_operand 2)))])] |
1181 | "" | |
1182 | { | |
1183 | return nvptx_output_call_insn (insn, operands[0], operands[1]); | |
1184 | }) | |
1185 | ||
1186 | (define_expand "call" | |
1187 | [(match_operand 0 "" "")] | |
1188 | "" | |
1189 | { | |
1190 | nvptx_expand_call (NULL_RTX, operands[0]); | |
1191 | DONE; | |
1192 | }) | |
1193 | ||
1194 | (define_expand "call_value" | |
1195 | [(match_operand 0 "" "") | |
1196 | (match_operand 1 "" "")] | |
1197 | "" | |
1198 | { | |
1199 | nvptx_expand_call (operands[0], operands[1]); | |
1200 | DONE; | |
1201 | }) | |
1202 | ||
1203 | ;; Floating point arithmetic. | |
1204 | ||
1205 | (define_insn "add<mode>3" | |
1206 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
1207 | (plus:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R") | |
1208 | (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))] | |
1209 | "" | |
1210 | "%.\\tadd%t0\\t%0, %1, %2;") | |
1211 | ||
1212 | (define_insn "sub<mode>3" | |
1213 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
1214 | (minus:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R") | |
1215 | (match_operand:SDFM 2 "nvptx_register_operand" "R")))] | |
1216 | "" | |
1217 | "%.\\tsub%t0\\t%0, %1, %2;") | |
1218 | ||
1219 | (define_insn "mul<mode>3" | |
1220 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
1221 | (mult:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R") | |
1222 | (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))] | |
1223 | "" | |
1224 | "%.\\tmul%t0\\t%0, %1, %2;") | |
1225 | ||
1226 | (define_insn "fma<mode>4" | |
1227 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
1228 | (fma:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R") | |
1229 | (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF") | |
1230 | (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))] | |
1231 | "" | |
1232 | "%.\\tfma%#%t0\\t%0, %1, %2, %3;") | |
1233 | ||
a0d007d6 RS |
1234 | (define_insn "*recip<mode>2" |
1235 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
1236 | (div:SDFM | |
1237 | (match_operand:SDFM 2 "const_double_operand" "F") | |
1238 | (match_operand:SDFM 1 "nvptx_register_operand" "R")))] | |
1239 | "CONST_DOUBLE_P (operands[2]) | |
1240 | && real_identical (CONST_DOUBLE_REAL_VALUE (operands[2]), &dconst1)" | |
1241 | "%.\\trcp%#%t0\\t%0, %1;") | |
1242 | ||
738f2522 BS |
1243 | (define_insn "div<mode>3" |
1244 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
1245 | (div:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R") | |
1246 | (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))] | |
1247 | "" | |
1248 | "%.\\tdiv%#%t0\\t%0, %1, %2;") | |
1249 | ||
1250 | (define_insn "copysign<mode>3" | |
1251 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
6d98e83b RS |
1252 | (unspec:SDFM [(match_operand:SDFM 1 "nvptx_nonmemory_operand" "RF") |
1253 | (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")] | |
738f2522 BS |
1254 | UNSPEC_COPYSIGN))] |
1255 | "" | |
1256 | "%.\\tcopysign%t0\\t%0, %2, %1;") | |
1257 | ||
1258 | (define_insn "smin<mode>3" | |
1259 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
1260 | (smin:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R") | |
1261 | (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))] | |
1262 | "" | |
1263 | "%.\\tmin%t0\\t%0, %1, %2;") | |
1264 | ||
1265 | (define_insn "smax<mode>3" | |
1266 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
1267 | (smax:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R") | |
1268 | (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))] | |
1269 | "" | |
1270 | "%.\\tmax%t0\\t%0, %1, %2;") | |
1271 | ||
1272 | (define_insn "abs<mode>2" | |
1273 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
1274 | (abs:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")))] | |
1275 | "" | |
1276 | "%.\\tabs%t0\\t%0, %1;") | |
1277 | ||
1278 | (define_insn "neg<mode>2" | |
1279 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
1280 | (neg:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")))] | |
1281 | "" | |
1282 | "%.\\tneg%t0\\t%0, %1;") | |
1283 | ||
1284 | (define_insn "sqrt<mode>2" | |
1285 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
1286 | (sqrt:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")))] | |
1287 | "" | |
1288 | "%.\\tsqrt%#%t0\\t%0, %1;") | |
1289 | ||
7dea4ab3 CP |
1290 | (define_expand "sincossf3" |
1291 | [(set (match_operand:SF 0 "nvptx_register_operand" "=R") | |
1292 | (unspec:SF [(match_operand:SF 2 "nvptx_register_operand" "R")] | |
1293 | UNSPEC_COS)) | |
1294 | (set (match_operand:SF 1 "nvptx_register_operand" "=R") | |
1295 | (unspec:SF [(match_dup 2)] UNSPEC_SIN))] | |
1296 | "flag_unsafe_math_optimizations" | |
1297 | { | |
1298 | operands[2] = make_safe_from (operands[2], operands[0]); | |
1299 | }) | |
1300 | ||
738f2522 BS |
1301 | (define_insn "sinsf2" |
1302 | [(set (match_operand:SF 0 "nvptx_register_operand" "=R") | |
1303 | (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")] | |
1304 | UNSPEC_SIN))] | |
1305 | "flag_unsafe_math_optimizations" | |
1306 | "%.\\tsin.approx%t0\\t%0, %1;") | |
1307 | ||
1308 | (define_insn "cossf2" | |
1309 | [(set (match_operand:SF 0 "nvptx_register_operand" "=R") | |
1310 | (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")] | |
1311 | UNSPEC_COS))] | |
1312 | "flag_unsafe_math_optimizations" | |
1313 | "%.\\tcos.approx%t0\\t%0, %1;") | |
1314 | ||
1315 | (define_insn "log2sf2" | |
1316 | [(set (match_operand:SF 0 "nvptx_register_operand" "=R") | |
1317 | (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")] | |
1318 | UNSPEC_LOG2))] | |
1319 | "flag_unsafe_math_optimizations" | |
1320 | "%.\\tlg2.approx%t0\\t%0, %1;") | |
1321 | ||
1322 | (define_insn "exp2sf2" | |
1323 | [(set (match_operand:SF 0 "nvptx_register_operand" "=R") | |
1324 | (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")] | |
1325 | UNSPEC_EXP2))] | |
1326 | "flag_unsafe_math_optimizations" | |
1327 | "%.\\tex2.approx%t0\\t%0, %1;") | |
1328 | ||
26d7b8f9 RS |
1329 | (define_insn "setcc_isinf<mode>" |
1330 | [(set (match_operand:BI 0 "nvptx_register_operand" "=R") | |
1331 | (unspec:BI [(match_operand:SDFM 1 "nvptx_register_operand" "R")] | |
1332 | UNSPEC_ISINF))] | |
1333 | "" | |
1334 | "%.\\ttestp.infinite%t1\\t%0, %1;") | |
1335 | ||
1336 | (define_expand "isinf<mode>2" | |
1337 | [(set (match_operand:SI 0 "nvptx_register_operand" "=R") | |
1338 | (unspec:SI [(match_operand:SDFM 1 "nvptx_register_operand" "R")] | |
1339 | UNSPEC_ISINF))] | |
1340 | "" | |
1341 | { | |
1342 | rtx pred = gen_reg_rtx (BImode); | |
1343 | emit_insn (gen_setcc_isinf<mode> (pred, operands[1])); | |
1344 | emit_insn (gen_setccsi_from_bi (operands[0], pred)); | |
1345 | DONE; | |
1346 | }) | |
1347 | ||
aeedb00a RS |
1348 | ;; HFmode floating point arithmetic. |
1349 | ||
1350 | (define_insn "addhf3" | |
1351 | [(set (match_operand:HF 0 "nvptx_register_operand" "=R") | |
1352 | (plus:HF (match_operand:HF 1 "nvptx_register_operand" "R") | |
1353 | (match_operand:HF 2 "nvptx_register_operand" "R")))] | |
1354 | "TARGET_SM53" | |
1355 | "%.\\tadd.f16\\t%0, %1, %2;") | |
1356 | ||
1357 | (define_insn "subhf3" | |
1358 | [(set (match_operand:HF 0 "nvptx_register_operand" "=R") | |
1359 | (minus:HF (match_operand:HF 1 "nvptx_register_operand" "R") | |
1360 | (match_operand:HF 2 "nvptx_register_operand" "R")))] | |
1361 | "TARGET_SM53" | |
1362 | "%.\\tsub.f16\\t%0, %1, %2;") | |
1363 | ||
1364 | (define_insn "mulhf3" | |
1365 | [(set (match_operand:HF 0 "nvptx_register_operand" "=R") | |
1366 | (mult:HF (match_operand:HF 1 "nvptx_register_operand" "R") | |
1367 | (match_operand:HF 2 "nvptx_register_operand" "R")))] | |
1368 | "TARGET_SM53" | |
1369 | "%.\\tmul.f16\\t%0, %1, %2;") | |
1370 | ||
91a7e1da RS |
1371 | (define_insn "fmahf4" |
1372 | [(set (match_operand:HF 0 "nvptx_register_operand" "=R") | |
1373 | (fma:HF (match_operand:HF 1 "nvptx_register_operand" "R") | |
1374 | (match_operand:HF 2 "nvptx_nonmemory_operand" "RF") | |
1375 | (match_operand:HF 3 "nvptx_nonmemory_operand" "RF")))] | |
1376 | "TARGET_SM53" | |
1377 | "%.\\tfma%#.f16\\t%0, %1, %2, %3;") | |
1378 | ||
1379 | (define_insn "neghf2" | |
1380 | [(set (match_operand:HF 0 "nvptx_register_operand" "=R") | |
1381 | (neg:HF (match_operand:HF 1 "nvptx_register_operand" "R")))] | |
1382 | "" | |
1383 | "%.\\txor.b16\\t%0, %1, -32768;") | |
1384 | ||
1385 | (define_insn "abshf2" | |
1386 | [(set (match_operand:HF 0 "nvptx_register_operand" "=R") | |
1387 | (abs:HF (match_operand:HF 1 "nvptx_register_operand" "R")))] | |
1388 | "" | |
1389 | "%.\\tand.b16\\t%0, %1, 32767;") | |
1390 | ||
308d688b RS |
1391 | (define_insn "exp2hf2" |
1392 | [(set (match_operand:HF 0 "nvptx_register_operand" "=R") | |
1393 | (unspec:HF [(match_operand:HF 1 "nvptx_register_operand" "R")] | |
1394 | UNSPEC_EXP2))] | |
1395 | "TARGET_SM75 && flag_unsafe_math_optimizations" | |
1396 | "%.\\tex2.approx.f16\\t%0, %1;") | |
1397 | ||
1398 | (define_insn "tanh<mode>2" | |
1399 | [(set (match_operand:HSFM 0 "nvptx_register_operand" "=R") | |
1400 | (unspec:HSFM [(match_operand:HSFM 1 "nvptx_register_operand" "R")] | |
1401 | UNSPEC_TANH))] | |
1402 | "TARGET_SM75 && flag_unsafe_math_optimizations" | |
1403 | "%.\\ttanh.approx%t0\\t%0, %1;") | |
1404 | ||
1405 | ;; HFmode floating point arithmetic. | |
1406 | ||
1407 | (define_insn "sminhf3" | |
1408 | [(set (match_operand:HF 0 "nvptx_register_operand" "=R") | |
1409 | (smin:HF (match_operand:HF 1 "nvptx_register_operand" "R") | |
1410 | (match_operand:HF 2 "nvptx_register_operand" "R")))] | |
1411 | "TARGET_SM80" | |
1412 | "%.\\tmin.f16\\t%0, %1, %2;") | |
1413 | ||
1414 | (define_insn "smaxhf3" | |
1415 | [(set (match_operand:HF 0 "nvptx_register_operand" "=R") | |
1416 | (smax:HF (match_operand:HF 1 "nvptx_register_operand" "R") | |
1417 | (match_operand:HF 2 "nvptx_register_operand" "R")))] | |
1418 | "TARGET_SM80" | |
1419 | "%.\\tmax.f16\\t%0, %1, %2;") | |
1420 | ||
738f2522 BS |
1421 | ;; Conversions involving floating point |
1422 | ||
1423 | (define_insn "extendsfdf2" | |
1424 | [(set (match_operand:DF 0 "nvptx_register_operand" "=R") | |
1425 | (float_extend:DF (match_operand:SF 1 "nvptx_register_operand" "R")))] | |
1426 | "" | |
1427 | "%.\\tcvt%t0%t1\\t%0, %1;") | |
1428 | ||
1429 | (define_insn "truncdfsf2" | |
1430 | [(set (match_operand:SF 0 "nvptx_register_operand" "=R") | |
1431 | (float_truncate:SF (match_operand:DF 1 "nvptx_register_operand" "R")))] | |
1432 | "" | |
1433 | "%.\\tcvt%#%t0%t1\\t%0, %1;") | |
1434 | ||
1435 | (define_insn "floatunssi<mode>2" | |
1436 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
1437 | (unsigned_float:SDFM (match_operand:SI 1 "nvptx_register_operand" "R")))] | |
1438 | "" | |
1439 | "%.\\tcvt%#%t0.u%T1\\t%0, %1;") | |
1440 | ||
1441 | (define_insn "floatsi<mode>2" | |
1442 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
1443 | (float:SDFM (match_operand:SI 1 "nvptx_register_operand" "R")))] | |
1444 | "" | |
1445 | "%.\\tcvt%#%t0.s%T1\\t%0, %1;") | |
1446 | ||
1447 | (define_insn "floatunsdi<mode>2" | |
1448 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
1449 | (unsigned_float:SDFM (match_operand:DI 1 "nvptx_register_operand" "R")))] | |
1450 | "" | |
1451 | "%.\\tcvt%#%t0.u%T1\\t%0, %1;") | |
1452 | ||
1453 | (define_insn "floatdi<mode>2" | |
1454 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
1455 | (float:SDFM (match_operand:DI 1 "nvptx_register_operand" "R")))] | |
1456 | "" | |
1457 | "%.\\tcvt%#%t0.s%T1\\t%0, %1;") | |
1458 | ||
1459 | (define_insn "fixuns_trunc<mode>si2" | |
1460 | [(set (match_operand:SI 0 "nvptx_register_operand" "=R") | |
1461 | (unsigned_fix:SI (match_operand:SDFM 1 "nvptx_register_operand" "R")))] | |
1462 | "" | |
1463 | "%.\\tcvt.rzi.u%T0%t1\\t%0, %1;") | |
1464 | ||
1465 | (define_insn "fix_trunc<mode>si2" | |
1466 | [(set (match_operand:SI 0 "nvptx_register_operand" "=R") | |
1467 | (fix:SI (match_operand:SDFM 1 "nvptx_register_operand" "R")))] | |
1468 | "" | |
1469 | "%.\\tcvt.rzi.s%T0%t1\\t%0, %1;") | |
1470 | ||
1471 | (define_insn "fixuns_trunc<mode>di2" | |
1472 | [(set (match_operand:DI 0 "nvptx_register_operand" "=R") | |
1473 | (unsigned_fix:DI (match_operand:SDFM 1 "nvptx_register_operand" "R")))] | |
1474 | "" | |
1475 | "%.\\tcvt.rzi.u%T0%t1\\t%0, %1;") | |
1476 | ||
1477 | (define_insn "fix_trunc<mode>di2" | |
1478 | [(set (match_operand:DI 0 "nvptx_register_operand" "=R") | |
1479 | (fix:DI (match_operand:SDFM 1 "nvptx_register_operand" "R")))] | |
1480 | "" | |
1481 | "%.\\tcvt.rzi.s%T0%t1\\t%0, %1;") | |
1482 | ||
1483 | (define_int_iterator FPINT [UNSPEC_FPINT_FLOOR UNSPEC_FPINT_BTRUNC | |
1484 | UNSPEC_FPINT_CEIL UNSPEC_FPINT_NEARBYINT]) | |
1485 | (define_int_attr fpint_name [(UNSPEC_FPINT_FLOOR "floor") | |
1486 | (UNSPEC_FPINT_BTRUNC "btrunc") | |
1487 | (UNSPEC_FPINT_CEIL "ceil") | |
1488 | (UNSPEC_FPINT_NEARBYINT "nearbyint")]) | |
1489 | (define_int_attr fpint_roundingmode [(UNSPEC_FPINT_FLOOR ".rmi") | |
1490 | (UNSPEC_FPINT_BTRUNC ".rzi") | |
1491 | (UNSPEC_FPINT_CEIL ".rpi") | |
1492 | (UNSPEC_FPINT_NEARBYINT "%#i")]) | |
1493 | ||
1494 | (define_insn "<FPINT:fpint_name><SDFM:mode>2" | |
1495 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
1496 | (unspec:SDFM [(match_operand:SDFM 1 "nvptx_register_operand" "R")] | |
1497 | FPINT))] | |
1498 | "" | |
1499 | "%.\\tcvt<FPINT:fpint_roundingmode>%t0%t1\\t%0, %1;") | |
1500 | ||
1501 | (define_int_iterator FPINT2 [UNSPEC_FPINT_FLOOR UNSPEC_FPINT_CEIL]) | |
1502 | (define_int_attr fpint2_name [(UNSPEC_FPINT_FLOOR "lfloor") | |
1503 | (UNSPEC_FPINT_CEIL "lceil")]) | |
1504 | (define_int_attr fpint2_roundingmode [(UNSPEC_FPINT_FLOOR ".rmi") | |
1505 | (UNSPEC_FPINT_CEIL ".rpi")]) | |
1506 | ||
1507 | (define_insn "<FPINT2:fpint2_name><SDFM:mode><SDIM:mode>2" | |
1508 | [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") | |
1509 | (unspec:SDIM [(match_operand:SDFM 1 "nvptx_register_operand" "R")] | |
1510 | FPINT2))] | |
1511 | "" | |
1512 | "%.\\tcvt<FPINT2:fpint2_roundingmode>.s%T0%t1\\t%0, %1;") | |
1513 | ||
aeedb00a RS |
1514 | (define_insn "extendhf<mode>2" |
1515 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
1516 | (float_extend:SDFM (match_operand:HF 1 "nvptx_register_operand" "R")))] | |
1517 | "TARGET_SM53" | |
1518 | "%.\\tcvt%t0%t1\\t%0, %1;") | |
1519 | ||
1520 | (define_insn "trunc<mode>hf2" | |
1521 | [(set (match_operand:HF 0 "nvptx_register_operand" "=R") | |
1522 | (float_truncate:HF (match_operand:SDFM 1 "nvptx_register_operand" "R")))] | |
1523 | "TARGET_SM53" | |
1524 | "%.\\tcvt%#%t0%t1\\t%0, %1;") | |
1525 | ||
8240f2f4 RS |
1526 | ;; Vector operations |
1527 | ||
1528 | (define_insn "*vec_set<mode>_0" | |
1529 | [(set (match_operand:VECIM 0 "nvptx_register_operand" "=R") | |
1530 | (vec_merge:VECIM | |
1531 | (vec_duplicate:VECIM | |
1532 | (match_operand:<VECELEM> 1 "nvptx_register_operand" "R")) | |
1533 | (match_dup 0) | |
1534 | (const_int 1)))] | |
1535 | "" | |
1536 | "%.\\tmov%t1\\t%0.x, %1;") | |
1537 | ||
1538 | (define_insn "*vec_set<mode>_1" | |
1539 | [(set (match_operand:VECIM 0 "nvptx_register_operand" "=R") | |
1540 | (vec_merge:VECIM | |
1541 | (vec_duplicate:VECIM | |
1542 | (match_operand:<VECELEM> 1 "nvptx_register_operand" "R")) | |
1543 | (match_dup 0) | |
1544 | (const_int 2)))] | |
1545 | "" | |
1546 | "%.\\tmov%t1\\t%0.y, %1;") | |
1547 | ||
1548 | (define_insn "*vec_set<mode>_2" | |
1549 | [(set (match_operand:VECIM 0 "nvptx_register_operand" "=R") | |
1550 | (vec_merge:VECIM | |
1551 | (vec_duplicate:VECIM | |
1552 | (match_operand:<VECELEM> 1 "nvptx_register_operand" "R")) | |
1553 | (match_dup 0) | |
1554 | (const_int 4)))] | |
1555 | "" | |
1556 | "%.\\tmov%t1\\t%0.z, %1;") | |
1557 | ||
1558 | (define_insn "*vec_set<mode>_3" | |
1559 | [(set (match_operand:VECIM 0 "nvptx_register_operand" "=R") | |
1560 | (vec_merge:VECIM | |
1561 | (vec_duplicate:VECIM | |
1562 | (match_operand:<VECELEM> 1 "nvptx_register_operand" "R")) | |
1563 | (match_dup 0) | |
1564 | (const_int 8)))] | |
1565 | "" | |
1566 | "%.\\tmov%t1\\t%0.w, %1;") | |
1567 | ||
1568 | (define_expand "vec_set<mode>" | |
1569 | [(match_operand:VECIM 0 "nvptx_register_operand") | |
1570 | (match_operand:<VECELEM> 1 "nvptx_register_operand") | |
1571 | (match_operand:SI 2 "nvptx_vector_index_operand")] | |
1572 | "" | |
1573 | { | |
1574 | enum machine_mode mode = GET_MODE (operands[0]); | |
1575 | int mask = 1 << INTVAL (operands[2]); | |
1576 | rtx tmp = gen_rtx_VEC_DUPLICATE (mode, operands[1]); | |
1577 | tmp = gen_rtx_VEC_MERGE (mode, tmp, operands[0], GEN_INT (mask)); | |
1578 | emit_insn (gen_rtx_SET (operands[0], tmp)); | |
1579 | DONE; | |
1580 | }) | |
1581 | ||
1582 | (define_insn "vec_extract<mode><Vecelem>" | |
1583 | [(set (match_operand:<VECELEM> 0 "nvptx_register_operand" "=R") | |
1584 | (vec_select:<VECELEM> | |
1585 | (match_operand:VECIM 1 "nvptx_register_operand" "R") | |
1586 | (parallel [(match_operand:SI 2 "nvptx_vector_index_operand" "")])))] | |
1587 | "" | |
1588 | { | |
1589 | static const char *const asms[4] = { | |
1590 | "%.\\tmov%t0\\t%0, %1.x;", | |
1591 | "%.\\tmov%t0\\t%0, %1.y;", | |
1592 | "%.\\tmov%t0\\t%0, %1.z;", | |
1593 | "%.\\tmov%t0\\t%0, %1.w;" | |
1594 | }; | |
1595 | return asms[INTVAL (operands[2])]; | |
1596 | }) | |
1597 | ||
738f2522 BS |
1598 | ;; Miscellaneous |
1599 | ||
1600 | (define_insn "nop" | |
1601 | [(const_int 0)] | |
1602 | "" | |
1603 | "") | |
1604 | ||
be606483 TV |
1605 | (define_insn "exit" |
1606 | [(const_int 1)] | |
1607 | "" | |
1608 | "exit;") | |
1609 | ||
3dede32b TV |
1610 | (define_insn "fake_nop" |
1611 | [(const_int 2)] | |
1612 | "" | |
1613 | "{ | |
1614 | .reg .u32 %%nop_src; | |
1615 | .reg .u32 %%nop_dst; | |
1616 | mov.u32 %%nop_dst, %%nop_src; | |
1617 | }") | |
1618 | ||
738f2522 BS |
1619 | (define_insn "return" |
1620 | [(return)] | |
1621 | "" | |
1622 | { | |
1623 | return nvptx_output_return (); | |
5012919d | 1624 | } |
3357878e | 1625 | [(set_attr "predicable" "no")]) |
738f2522 BS |
1626 | |
1627 | (define_expand "epilogue" | |
1628 | [(clobber (const_int 0))] | |
1629 | "" | |
1630 | { | |
5012919d | 1631 | if (TARGET_SOFT_STACK) |
8b72af17 TV |
1632 | emit_insn (gen_set_softstack (Pmode, gen_rtx_REG (Pmode, |
1633 | SOFTSTACK_PREV_REGNUM))); | |
738f2522 BS |
1634 | emit_jump_insn (gen_return ()); |
1635 | DONE; | |
1636 | }) | |
1637 | ||
1638 | (define_expand "nonlocal_goto" | |
1639 | [(match_operand 0 "" "") | |
1640 | (match_operand 1 "" "") | |
1641 | (match_operand 2 "" "") | |
1642 | (match_operand 3 "" "")] | |
1643 | "" | |
1644 | { | |
b1f36409 | 1645 | sorry ("target cannot support nonlocal goto"); |
738f2522 BS |
1646 | emit_insn (gen_nop ()); |
1647 | DONE; | |
1648 | }) | |
1649 | ||
1650 | (define_expand "nonlocal_goto_receiver" | |
1651 | [(const_int 0)] | |
1652 | "" | |
1653 | { | |
b1f36409 | 1654 | sorry ("target cannot support nonlocal goto"); |
738f2522 BS |
1655 | }) |
1656 | ||
9faf9a56 JJ |
1657 | (define_expand "allocate_stack" |
1658 | [(match_operand 0 "nvptx_register_operand") | |
1659 | (match_operand 1 "nvptx_register_operand")] | |
1660 | "" | |
1661 | { | |
5012919d AM |
1662 | if (TARGET_SOFT_STACK) |
1663 | { | |
1664 | emit_move_insn (stack_pointer_rtx, | |
1665 | gen_rtx_MINUS (Pmode, stack_pointer_rtx, operands[1])); | |
8b72af17 | 1666 | emit_insn (gen_set_softstack (Pmode, stack_pointer_rtx)); |
5012919d AM |
1667 | emit_move_insn (operands[0], virtual_stack_dynamic_rtx); |
1668 | DONE; | |
1669 | } | |
18c05628 NS |
1670 | /* The ptx documentation specifies an alloca intrinsic (for 32 bit |
1671 | only) but notes it is not implemented. The assembler emits a | |
1672 | confused error message. Issue a blunt one now instead. */ | |
b1f36409 | 1673 | sorry ("target cannot support alloca"); |
18c05628 NS |
1674 | emit_insn (gen_nop ()); |
1675 | DONE; | |
9faf9a56 JJ |
1676 | }) |
1677 | ||
8b72af17 | 1678 | (define_insn "@set_softstack_<mode>" |
8b243438 | 1679 | [(unspec [(match_operand:P 0 "nvptx_register_operand" "R")] |
5012919d AM |
1680 | UNSPEC_SET_SOFTSTACK)] |
1681 | "TARGET_SOFT_STACK" | |
1682 | { | |
1683 | return nvptx_output_set_softstack (REGNO (operands[0])); | |
1684 | }) | |
738f2522 BS |
1685 | |
1686 | (define_expand "restore_stack_block" | |
1687 | [(match_operand 0 "register_operand" "") | |
1688 | (match_operand 1 "register_operand" "")] | |
1689 | "" | |
1690 | { | |
5012919d AM |
1691 | if (TARGET_SOFT_STACK) |
1692 | { | |
1693 | emit_move_insn (operands[0], operands[1]); | |
8b72af17 | 1694 | emit_insn (gen_set_softstack (Pmode, operands[0])); |
5012919d | 1695 | } |
738f2522 BS |
1696 | DONE; |
1697 | }) | |
1698 | ||
1699 | (define_expand "restore_stack_function" | |
1700 | [(match_operand 0 "register_operand" "") | |
1701 | (match_operand 1 "register_operand" "")] | |
1702 | "" | |
1703 | { | |
1704 | DONE; | |
1705 | }) | |
1706 | ||
1707 | (define_insn "trap" | |
1708 | [(trap_if (const_int 1) (const_int 0))] | |
1709 | "" | |
82191cbf | 1710 | "trap; exit;") |
738f2522 BS |
1711 | |
1712 | (define_insn "trap_if_true" | |
1713 | [(trap_if (ne (match_operand:BI 0 "nvptx_register_operand" "R") | |
1714 | (const_int 0)) | |
1715 | (const_int 0))] | |
1716 | "" | |
82191cbf | 1717 | "%j0 trap; %j0 exit;" |
3357878e | 1718 | [(set_attr "predicable" "no")]) |
738f2522 BS |
1719 | |
1720 | (define_insn "trap_if_false" | |
1721 | [(trap_if (eq (match_operand:BI 0 "nvptx_register_operand" "R") | |
1722 | (const_int 0)) | |
1723 | (const_int 0))] | |
1724 | "" | |
82191cbf | 1725 | "%J0 trap; %J0 exit;" |
3357878e | 1726 | [(set_attr "predicable" "no")]) |
738f2522 BS |
1727 | |
1728 | (define_expand "ctrap<mode>4" | |
1729 | [(trap_if (match_operator 0 "nvptx_comparison_operator" | |
1730 | [(match_operand:SDIM 1 "nvptx_register_operand") | |
1731 | (match_operand:SDIM 2 "nvptx_nonmemory_operand")]) | |
7f091e1c | 1732 | (match_operand 3 "const0_operand"))] |
738f2522 BS |
1733 | "" |
1734 | { | |
1735 | rtx t = nvptx_expand_compare (operands[0]); | |
1736 | emit_insn (gen_trap_if_true (t)); | |
1737 | DONE; | |
1738 | }) | |
1739 | ||
d88cd9c4 NS |
1740 | (define_insn "oacc_dim_size" |
1741 | [(set (match_operand:SI 0 "nvptx_register_operand" "") | |
1742 | (unspec:SI [(match_operand:SI 1 "const_int_operand" "")] | |
1743 | UNSPEC_DIM_SIZE))] | |
738f2522 | 1744 | "" |
d88cd9c4 NS |
1745 | { |
1746 | static const char *const asms[] = | |
1747 | { /* Must match oacc_loop_levels ordering. */ | |
1748 | "%.\\tmov.u32\\t%0, %%nctaid.x;", /* gang */ | |
1749 | "%.\\tmov.u32\\t%0, %%ntid.y;", /* worker */ | |
1750 | "%.\\tmov.u32\\t%0, %%ntid.x;", /* vector */ | |
1751 | }; | |
1752 | return asms[INTVAL (operands[1])]; | |
1753 | }) | |
738f2522 | 1754 | |
d88cd9c4 | 1755 | (define_insn "oacc_dim_pos" |
738f2522 | 1756 | [(set (match_operand:SI 0 "nvptx_register_operand" "") |
d88cd9c4 NS |
1757 | (unspec_volatile:SI [(match_operand:SI 1 "const_int_operand" "")] |
1758 | UNSPECV_DIM_POS))] | |
738f2522 BS |
1759 | "" |
1760 | { | |
d88cd9c4 NS |
1761 | static const char *const asms[] = |
1762 | { /* Must match oacc_loop_levels ordering. */ | |
1763 | "%.\\tmov.u32\\t%0, %%ctaid.x;", /* gang */ | |
1764 | "%.\\tmov.u32\\t%0, %%tid.y;", /* worker */ | |
1765 | "%.\\tmov.u32\\t%0, %%tid.x;", /* vector */ | |
1766 | }; | |
1767 | return asms[INTVAL (operands[1])]; | |
738f2522 BS |
1768 | }) |
1769 | ||
d88cd9c4 NS |
1770 | (define_insn "nvptx_fork" |
1771 | [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")] | |
1772 | UNSPECV_FORK)] | |
738f2522 | 1773 | "" |
d88cd9c4 | 1774 | "// fork %0;" |
3357878e | 1775 | [(set_attr "predicable" "no")]) |
738f2522 | 1776 | |
d88cd9c4 NS |
1777 | (define_insn "nvptx_forked" |
1778 | [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")] | |
1779 | UNSPECV_FORKED)] | |
1780 | "" | |
1781 | "// forked %0;" | |
3357878e | 1782 | [(set_attr "predicable" "no")]) |
d88cd9c4 NS |
1783 | |
1784 | (define_insn "nvptx_joining" | |
1785 | [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")] | |
1786 | UNSPECV_JOINING)] | |
1787 | "" | |
1788 | "// joining %0;" | |
3357878e | 1789 | [(set_attr "predicable" "no")]) |
d88cd9c4 NS |
1790 | |
1791 | (define_insn "nvptx_join" | |
1792 | [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")] | |
1793 | UNSPECV_JOIN)] | |
1794 | "" | |
1795 | "// join %0;" | |
3357878e | 1796 | [(set_attr "predicable" "no")]) |
d88cd9c4 NS |
1797 | |
1798 | (define_expand "oacc_fork" | |
1799 | [(set (match_operand:SI 0 "nvptx_nonmemory_operand" "") | |
15113b03 | 1800 | (match_operand:SI 1 "general_operand" "")) |
d88cd9c4 NS |
1801 | (unspec_volatile:SI [(match_operand:SI 2 "const_int_operand" "")] |
1802 | UNSPECV_FORKED)] | |
738f2522 BS |
1803 | "" |
1804 | { | |
d88cd9c4 NS |
1805 | if (operands[0] != const0_rtx) |
1806 | emit_move_insn (operands[0], operands[1]); | |
1807 | nvptx_expand_oacc_fork (INTVAL (operands[2])); | |
1808 | DONE; | |
1809 | }) | |
1810 | ||
1811 | (define_expand "oacc_join" | |
1812 | [(set (match_operand:SI 0 "nvptx_nonmemory_operand" "") | |
15113b03 | 1813 | (match_operand:SI 1 "general_operand" "")) |
d88cd9c4 NS |
1814 | (unspec_volatile:SI [(match_operand:SI 2 "const_int_operand" "")] |
1815 | UNSPECV_JOIN)] | |
1816 | "" | |
1817 | { | |
1818 | if (operands[0] != const0_rtx) | |
1819 | emit_move_insn (operands[0], operands[1]); | |
1820 | nvptx_expand_oacc_join (INTVAL (operands[2])); | |
1821 | DONE; | |
738f2522 BS |
1822 | }) |
1823 | ||
d88cd9c4 NS |
1824 | ;; only 32-bit shuffles exist. |
1825 | (define_insn "nvptx_shuffle<mode>" | |
1826 | [(set (match_operand:BITS 0 "nvptx_register_operand" "=R") | |
1827 | (unspec:BITS | |
1828 | [(match_operand:BITS 1 "nvptx_register_operand" "R") | |
1829 | (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri") | |
1830 | (match_operand:SI 3 "const_int_operand" "n")] | |
1831 | UNSPEC_SHUFFLE))] | |
1832 | "" | |
2a158640 | 1833 | { |
decde111 | 1834 | if (TARGET_PTX_6_0) |
2a158640 TV |
1835 | return "%.\\tshfl.sync%S3.b32\\t%0, %1, %2, 31, 0xffffffff;"; |
1836 | else | |
1837 | return "%.\\tshfl%S3.b32\\t%0, %1, %2, 31;"; | |
1838 | }) | |
d88cd9c4 | 1839 | |
5012919d AM |
1840 | (define_insn "nvptx_vote_ballot" |
1841 | [(set (match_operand:SI 0 "nvptx_register_operand" "=R") | |
1842 | (unspec:SI [(match_operand:BI 1 "nvptx_register_operand" "R")] | |
1843 | UNSPEC_VOTE_BALLOT))] | |
1844 | "" | |
2a158640 | 1845 | { |
decde111 | 1846 | if (TARGET_PTX_6_0) |
2a158640 TV |
1847 | return "%.\\tvote.sync.ballot.b32\\t%0, %1, 0xffffffff;"; |
1848 | else | |
1849 | return "%.\\tvote.ballot.b32\\t%0, %1;"; | |
1850 | }) | |
5012919d AM |
1851 | |
1852 | ;; Patterns for OpenMP SIMD-via-SIMT lowering | |
1853 | ||
8b72af17 | 1854 | (define_insn "@omp_simt_enter_<mode>" |
8b243438 TV |
1855 | [(set (match_operand:P 0 "nvptx_register_operand" "=R") |
1856 | (unspec_volatile:P [(match_operand:P 1 "nvptx_nonmemory_operand" "Ri") | |
1857 | (match_operand:P 2 "nvptx_nonmemory_operand" "Ri")] | |
0c6b03b5 AM |
1858 | UNSPECV_SIMT_ENTER))] |
1859 | "" | |
1860 | { | |
1861 | return nvptx_output_simt_enter (operands[0], operands[1], operands[2]); | |
1862 | }) | |
1863 | ||
1864 | (define_expand "omp_simt_enter" | |
1865 | [(match_operand 0 "nvptx_register_operand" "=R") | |
1866 | (match_operand 1 "nvptx_nonmemory_operand" "Ri") | |
1867 | (match_operand 2 "const_int_operand" "n")] | |
1868 | "" | |
1869 | { | |
1870 | if (!CONST_INT_P (operands[1])) | |
1871 | cfun->machine->simt_stack_size = HOST_WIDE_INT_M1U; | |
1872 | else | |
1873 | cfun->machine->simt_stack_size = MAX (UINTVAL (operands[1]), | |
1874 | cfun->machine->simt_stack_size); | |
1875 | cfun->machine->simt_stack_align = MAX (UINTVAL (operands[2]), | |
1876 | cfun->machine->simt_stack_align); | |
1877 | cfun->machine->has_simtreg = true; | |
8b72af17 | 1878 | emit_insn (gen_omp_simt_enter (Pmode, operands[0], operands[1], operands[2])); |
8b243438 TV |
1879 | DONE; |
1880 | }) | |
1881 | ||
1882 | (define_expand "omp_simt_exit" | |
1883 | [(match_operand 0 "nvptx_register_operand" "R")] | |
1884 | "" | |
1885 | { | |
8b72af17 | 1886 | emit_insn (gen_omp_simt_exit (Pmode, operands[0])); |
a624388b TV |
1887 | if (TARGET_PTX_6_0) |
1888 | emit_insn (gen_nvptx_warpsync ()); | |
1889 | else | |
1890 | emit_insn (gen_nvptx_uniform_warp_check ()); | |
0c6b03b5 AM |
1891 | DONE; |
1892 | }) | |
1893 | ||
8b72af17 | 1894 | (define_insn "@omp_simt_exit_<mode>" |
8b243438 | 1895 | [(unspec_volatile [(match_operand:P 0 "nvptx_register_operand" "R")] |
0c6b03b5 AM |
1896 | UNSPECV_SIMT_EXIT)] |
1897 | "" | |
1898 | { | |
1899 | return nvptx_output_simt_exit (operands[0]); | |
1900 | }) | |
1901 | ||
5012919d AM |
1902 | ;; Implement IFN_GOMP_SIMT_LANE: set operand 0 to lane index |
1903 | (define_insn "omp_simt_lane" | |
1904 | [(set (match_operand:SI 0 "nvptx_register_operand" "") | |
1905 | (unspec:SI [(const_int 0)] UNSPEC_LANEID))] | |
1906 | "" | |
1907 | "%.\\tmov.u32\\t%0, %%laneid;") | |
1908 | ||
1909 | ;; Implement IFN_GOMP_SIMT_ORDERED: copy operand 1 to operand 0 and | |
1910 | ;; place a compiler barrier to disallow unrolling/peeling the containing loop | |
1911 | (define_expand "omp_simt_ordered" | |
1912 | [(match_operand:SI 0 "nvptx_register_operand" "=R") | |
1913 | (match_operand:SI 1 "nvptx_register_operand" "R")] | |
1914 | "" | |
1915 | { | |
1916 | emit_move_insn (operands[0], operands[1]); | |
1917 | emit_insn (gen_nvptx_nounroll ()); | |
1918 | DONE; | |
1919 | }) | |
1920 | ||
1921 | ;; Implement IFN_GOMP_SIMT_XCHG_BFLY: perform a "butterfly" exchange | |
1922 | ;; across lanes | |
1923 | (define_expand "omp_simt_xchg_bfly" | |
c2e0d0c1 TV |
1924 | [(match_operand 0 "nvptx_register_or_complex_di_df_register_operand" "=R") |
1925 | (match_operand 1 "nvptx_register_or_complex_di_df_register_operand" "R") | |
5012919d AM |
1926 | (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")] |
1927 | "" | |
1928 | { | |
1929 | emit_insn (nvptx_gen_shuffle (operands[0], operands[1], operands[2], | |
1930 | SHUFFLE_BFLY)); | |
1931 | DONE; | |
1932 | }) | |
1933 | ||
1934 | ;; Implement IFN_GOMP_SIMT_XCHG_IDX: broadcast value in operand 1 | |
1935 | ;; from lane given by index in operand 2 to operand 0 in all lanes | |
1936 | (define_expand "omp_simt_xchg_idx" | |
c2e0d0c1 TV |
1937 | [(match_operand 0 "nvptx_register_or_complex_di_df_register_operand" "=R") |
1938 | (match_operand 1 "nvptx_register_or_complex_di_df_register_operand" "R") | |
5012919d AM |
1939 | (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")] |
1940 | "" | |
1941 | { | |
1942 | emit_insn (nvptx_gen_shuffle (operands[0], operands[1], operands[2], | |
1943 | SHUFFLE_IDX)); | |
1944 | DONE; | |
1945 | }) | |
1946 | ||
1947 | ;; Implement IFN_GOMP_SIMT_VOTE_ANY: | |
1948 | ;; set operand 0 to zero iff all lanes supply zero in operand 1 | |
1949 | (define_expand "omp_simt_vote_any" | |
1950 | [(match_operand:SI 0 "nvptx_register_operand" "=R") | |
1951 | (match_operand:SI 1 "nvptx_register_operand" "R")] | |
1952 | "" | |
1953 | { | |
1954 | rtx pred = gen_reg_rtx (BImode); | |
1955 | emit_move_insn (pred, gen_rtx_NE (BImode, operands[1], const0_rtx)); | |
1956 | emit_insn (gen_nvptx_vote_ballot (operands[0], pred)); | |
1957 | DONE; | |
1958 | }) | |
1959 | ||
1960 | ;; Implement IFN_GOMP_SIMT_LAST_LANE: | |
1961 | ;; set operand 0 to the lowest lane index that passed non-zero in operand 1 | |
1962 | (define_expand "omp_simt_last_lane" | |
1963 | [(match_operand:SI 0 "nvptx_register_operand" "=R") | |
1964 | (match_operand:SI 1 "nvptx_register_operand" "R")] | |
1965 | "" | |
1966 | { | |
1967 | rtx pred = gen_reg_rtx (BImode); | |
1968 | rtx tmp = gen_reg_rtx (SImode); | |
1969 | emit_move_insn (pred, gen_rtx_NE (BImode, operands[1], const0_rtx)); | |
1970 | emit_insn (gen_nvptx_vote_ballot (tmp, pred)); | |
1971 | emit_insn (gen_ctzsi2 (operands[0], tmp)); | |
1972 | DONE; | |
1973 | }) | |
1974 | ||
d88cd9c4 NS |
1975 | ;; extract parts of a 64 bit object into 2 32-bit ints |
1976 | (define_insn "unpack<mode>si2" | |
1977 | [(set (match_operand:SI 0 "nvptx_register_operand" "=R") | |
1978 | (unspec:SI [(match_operand:BITD 2 "nvptx_register_operand" "R") | |
1979 | (const_int 0)] UNSPEC_BIT_CONV)) | |
1980 | (set (match_operand:SI 1 "nvptx_register_operand" "=R") | |
1981 | (unspec:SI [(match_dup 2) (const_int 1)] UNSPEC_BIT_CONV))] | |
1982 | "" | |
1983 | "%.\\tmov.b64\\t{%0,%1}, %2;") | |
1984 | ||
1985 | ;; pack 2 32-bit ints into a 64 bit object | |
1986 | (define_insn "packsi<mode>2" | |
1987 | [(set (match_operand:BITD 0 "nvptx_register_operand" "=R") | |
1988 | (unspec:BITD [(match_operand:SI 1 "nvptx_register_operand" "R") | |
1989 | (match_operand:SI 2 "nvptx_register_operand" "R")] | |
1990 | UNSPEC_BIT_CONV))] | |
1991 | "" | |
1992 | "%.\\tmov.b64\\t%0, {%1,%2};") | |
1993 | ||
738f2522 BS |
1994 | ;; Atomic insns. |
1995 | ||
1996 | (define_expand "atomic_compare_and_swap<mode>" | |
1997 | [(match_operand:SI 0 "nvptx_register_operand") ;; bool success output | |
1998 | (match_operand:SDIM 1 "nvptx_register_operand") ;; oldval output | |
1999 | (match_operand:SDIM 2 "memory_operand") ;; memory | |
2000 | (match_operand:SDIM 3 "nvptx_register_operand") ;; expected input | |
2001 | (match_operand:SDIM 4 "nvptx_register_operand") ;; newval input | |
2002 | (match_operand:SI 5 "const_int_operand") ;; is_weak | |
2003 | (match_operand:SI 6 "const_int_operand") ;; success model | |
2004 | (match_operand:SI 7 "const_int_operand")] ;; failure model | |
2005 | "" | |
2006 | { | |
04b54cc4 TV |
2007 | if (nvptx_mem_local_p (operands[2])) |
2008 | emit_insn (gen_atomic_compare_and_swap<mode>_1_local | |
2009 | (operands[1], operands[2], operands[3], operands[4], | |
2010 | operands[6])); | |
2011 | else | |
2012 | emit_insn (gen_atomic_compare_and_swap<mode>_1 | |
2013 | (operands[1], operands[2], operands[3], operands[4], | |
2014 | operands[6])); | |
41c3713a NS |
2015 | |
2016 | rtx cond = gen_reg_rtx (BImode); | |
2017 | emit_move_insn (cond, gen_rtx_EQ (BImode, operands[1], operands[3])); | |
2018 | emit_insn (gen_sel_truesi (operands[0], cond, GEN_INT (1), GEN_INT (0))); | |
738f2522 BS |
2019 | DONE; |
2020 | }) | |
2021 | ||
04b54cc4 | 2022 | (define_insn "atomic_compare_and_swap<mode>_1_local" |
738f2522 BS |
2023 | [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") |
2024 | (unspec_volatile:SDIM | |
2025 | [(match_operand:SDIM 1 "memory_operand" "+m") | |
6c164570 NS |
2026 | (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri") |
2027 | (match_operand:SDIM 3 "nvptx_nonmemory_operand" "Ri") | |
738f2522 | 2028 | (match_operand:SI 4 "const_int_operand")] |
04b54cc4 | 2029 | UNSPECV_CAS_LOCAL)) |
738f2522 | 2030 | (set (match_dup 1) |
04b54cc4 | 2031 | (unspec_volatile:SDIM [(const_int 0)] UNSPECV_CAS_LOCAL))] |
738f2522 | 2032 | "" |
58f7c7e0 | 2033 | { |
e0451f93 TV |
2034 | output_asm_insn ("{", NULL); |
2035 | output_asm_insn ("\\t" ".reg.pred" "\\t" "%%eq_p;", NULL); | |
2036 | output_asm_insn ("\\t" ".reg%t0" "\\t" "%%val;", operands); | |
2037 | output_asm_insn ("\\t" "ld%A1%t0" "\\t" "%%val,%1;", operands); | |
2038 | output_asm_insn ("\\t" "setp.eq%t0" "\\t" "%%eq_p, %%val, %2;", | |
2039 | operands); | |
2040 | output_asm_insn ("@%%eq_p\\t" "st%A1%t0" "\\t" "%1,%3;", operands); | |
2041 | output_asm_insn ("\\t" "mov%t0" "\\t" "%0,%%val;", operands); | |
2042 | output_asm_insn ("}", NULL); | |
2043 | return ""; | |
04b54cc4 | 2044 | } |
3357878e | 2045 | [(set_attr "predicable" "no")]) |
04b54cc4 TV |
2046 | |
2047 | (define_insn "atomic_compare_and_swap<mode>_1" | |
2048 | [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") | |
2049 | (unspec_volatile:SDIM | |
2050 | [(match_operand:SDIM 1 "memory_operand" "+m") | |
2051 | (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri") | |
2052 | (match_operand:SDIM 3 "nvptx_nonmemory_operand" "Ri") | |
2053 | (match_operand:SI 4 "const_int_operand")] | |
2054 | UNSPECV_CAS)) | |
2055 | (set (match_dup 1) | |
2056 | (unspec_volatile:SDIM [(const_int 0)] UNSPECV_CAS))] | |
2057 | "" | |
2058 | { | |
58f7c7e0 | 2059 | const char *t |
3ebcc053 | 2060 | = "%.\\tatom%A1.cas.b%T0\\t%x0, %1, %2, %3;"; |
58f7c7e0 TV |
2061 | return nvptx_output_atomic_insn (t, operands, 1, 4); |
2062 | } | |
04b54cc4 | 2063 | [(set_attr "atomic" "true")]) |
738f2522 BS |
2064 | |
2065 | (define_insn "atomic_exchange<mode>" | |
2066 | [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") ;; output | |
2067 | (unspec_volatile:SDIM | |
2068 | [(match_operand:SDIM 1 "memory_operand" "+m") ;; memory | |
2069 | (match_operand:SI 3 "const_int_operand")] ;; model | |
2070 | UNSPECV_XCHG)) | |
2071 | (set (match_dup 1) | |
6c164570 | 2072 | (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri"))] ;; input |
738f2522 | 2073 | "" |
58f7c7e0 | 2074 | { |
04b54cc4 | 2075 | if (nvptx_mem_local_p (operands[1])) |
e0451f93 TV |
2076 | { |
2077 | output_asm_insn ("{", NULL); | |
2078 | output_asm_insn ("\\t" ".reg%t0" "\\t" "%%val;", operands); | |
2079 | output_asm_insn ("%.\\t" "ld%A1%t0" "\\t" "%%val,%1;", operands); | |
2080 | output_asm_insn ("%.\\t" "st%A1%t0" "\\t" "%1,%2;", operands); | |
2081 | output_asm_insn ("%.\\t" "mov%t0" "\\t" "%0,%%val;", operands); | |
2082 | output_asm_insn ("}", NULL); | |
2083 | return ""; | |
2084 | } | |
58f7c7e0 | 2085 | const char *t |
3ebcc053 | 2086 | = "%.\tatom%A1.exch.b%T0\t%x0, %1, %2;"; |
58f7c7e0 TV |
2087 | return nvptx_output_atomic_insn (t, operands, 1, 3); |
2088 | } | |
5012919d | 2089 | [(set_attr "atomic" "true")]) |
738f2522 | 2090 | |
3e7d4e82 TV |
2091 | (define_expand "atomic_store<mode>" |
2092 | [(match_operand:SDIM 0 "memory_operand" "=m") ;; memory | |
2093 | (match_operand:SDIM 1 "nvptx_nonmemory_operand" "Ri") ;; input | |
2094 | (match_operand:SI 2 "const_int_operand")] ;; model | |
2095 | "" | |
2096 | { | |
2097 | struct address_info info; | |
2098 | decompose_mem_address (&info, operands[0]); | |
2099 | if (info.base != NULL && REG_P (*info.base) | |
2100 | && REGNO_PTR_FRAME_P (REGNO (*info.base))) | |
2101 | { | |
2102 | emit_insn (gen_mov<mode> (operands[0], operands[1])); | |
2103 | DONE; | |
2104 | } | |
2105 | ||
2106 | if (TARGET_SM70) | |
19a13d5a | 2107 | { |
69cb3f2a TV |
2108 | emit_insn (gen_nvptx_atomic_store_sm70<mode> (operands[0], operands[1], |
2109 | operands[2])); | |
19a13d5a TV |
2110 | DONE; |
2111 | } | |
3e7d4e82 TV |
2112 | |
2113 | bool maybe_shared_p = nvptx_mem_maybe_shared_p (operands[0]); | |
2114 | if (!maybe_shared_p) | |
2115 | /* Fall back to expand_atomic_store. */ | |
2116 | FAIL; | |
2117 | ||
69cb3f2a TV |
2118 | emit_insn (gen_nvptx_atomic_store<mode> (operands[0], operands[1], |
2119 | operands[2])); | |
3e7d4e82 TV |
2120 | DONE; |
2121 | }) | |
2122 | ||
69cb3f2a | 2123 | (define_insn "nvptx_atomic_store_sm70<mode>" |
19a13d5a TV |
2124 | [(set (match_operand:SDIM 0 "memory_operand" "+m") ;; memory |
2125 | (unspec_volatile:SDIM | |
2126 | [(match_operand:SDIM 1 "nvptx_nonmemory_operand" "Ri") ;; input | |
2127 | (match_operand:SI 2 "const_int_operand")] ;; model | |
2128 | UNSPECV_ST))] | |
2129 | "TARGET_SM70" | |
2130 | { | |
2131 | const char *t | |
2132 | = "%.\tst%A0.b%T0\t%0, %1;"; | |
2133 | return nvptx_output_atomic_insn (t, operands, 0, 2); | |
2134 | } | |
9ed52438 | 2135 | [(set_attr "atomic" "false")]) ;; Note: st is not an atomic insn. |
19a13d5a | 2136 | |
69cb3f2a TV |
2137 | (define_insn "nvptx_atomic_store<mode>" |
2138 | [(set (match_operand:SDIM 0 "memory_operand" "+m") ;; memory | |
2139 | (unspec_volatile:SDIM | |
2140 | [(match_operand:SDIM 1 "nvptx_nonmemory_operand" "Ri") ;; input | |
2141 | (match_operand:SI 2 "const_int_operand")] ;; model | |
2142 | UNSPECV_ST))] | |
2143 | "!TARGET_SM70" | |
2144 | { | |
2145 | const char *t | |
2146 | = "%.\tatom%A0.exch.b%T0\t_, %0, %1;"; | |
2147 | return nvptx_output_atomic_insn (t, operands, 0, 2); | |
2148 | } | |
2149 | [(set_attr "atomic" "true")]) | |
2150 | ||
738f2522 BS |
2151 | (define_insn "atomic_fetch_add<mode>" |
2152 | [(set (match_operand:SDIM 1 "memory_operand" "+m") | |
2153 | (unspec_volatile:SDIM | |
2154 | [(plus:SDIM (match_dup 1) | |
2155 | (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri")) | |
2156 | (match_operand:SI 3 "const_int_operand")] ;; model | |
2157 | UNSPECV_LOCK)) | |
2158 | (set (match_operand:SDIM 0 "nvptx_register_operand" "=R") | |
2159 | (match_dup 1))] | |
2160 | "" | |
58f7c7e0 | 2161 | { |
04b54cc4 | 2162 | if (nvptx_mem_local_p (operands[1])) |
e0451f93 TV |
2163 | { |
2164 | output_asm_insn ("{", NULL); | |
2165 | output_asm_insn ("\\t" ".reg%t0" "\\t" "%%val;", operands); | |
2166 | output_asm_insn ("\\t" ".reg%t0" "\\t" "%%update;", operands); | |
2167 | output_asm_insn ("%.\\t" "ld%A1%t0" "\\t" "%%val,%1;", operands); | |
2168 | output_asm_insn ("%.\\t" "add%t0" "\\t" "%%update,%%val,%2;", | |
2169 | operands); | |
2170 | output_asm_insn ("%.\\t" "st%A1%t0" "\\t" "%1,%%update;", operands); | |
2171 | output_asm_insn ("%.\\t" "mov%t0" "\\t" "%0,%%val;", operands); | |
2172 | output_asm_insn ("}", NULL); | |
2173 | return ""; | |
2174 | } | |
58f7c7e0 | 2175 | const char *t |
3ebcc053 | 2176 | = "%.\\tatom%A1.add%t0\\t%x0, %1, %2;"; |
58f7c7e0 TV |
2177 | return nvptx_output_atomic_insn (t, operands, 1, 3); |
2178 | } | |
5012919d | 2179 | [(set_attr "atomic" "true")]) |
738f2522 BS |
2180 | |
2181 | (define_insn "atomic_fetch_addsf" | |
2182 | [(set (match_operand:SF 1 "memory_operand" "+m") | |
2183 | (unspec_volatile:SF | |
2184 | [(plus:SF (match_dup 1) | |
2185 | (match_operand:SF 2 "nvptx_nonmemory_operand" "RF")) | |
2186 | (match_operand:SI 3 "const_int_operand")] ;; model | |
2187 | UNSPECV_LOCK)) | |
2188 | (set (match_operand:SF 0 "nvptx_register_operand" "=R") | |
2189 | (match_dup 1))] | |
2190 | "" | |
58f7c7e0 | 2191 | { |
04b54cc4 | 2192 | if (nvptx_mem_local_p (operands[1])) |
e0451f93 TV |
2193 | { |
2194 | output_asm_insn ("{", NULL); | |
2195 | output_asm_insn ("\\t" ".reg%t0" "\\t" "%%val;", operands); | |
2196 | output_asm_insn ("\\t" ".reg%t0" "\\t" "%%update;", operands); | |
2197 | output_asm_insn ("%.\\t" "ld%A1%t0" "\\t" "%%val,%1;", operands); | |
2198 | output_asm_insn ("%.\\t" "add%t0" "\\t" "%%update,%%val,%2;", | |
2199 | operands); | |
2200 | output_asm_insn ("%.\\t" "st%A1%t0" "\\t" "%1,%%update;", operands); | |
2201 | output_asm_insn ("%.\\t" "mov%t0" "\\t" "%0,%%val;", operands); | |
2202 | output_asm_insn ("}", NULL); | |
2203 | return ""; | |
2204 | } | |
58f7c7e0 | 2205 | const char *t |
3ebcc053 | 2206 | = "%.\\tatom%A1.add%t0\\t%x0, %1, %2;"; |
58f7c7e0 TV |
2207 | return nvptx_output_atomic_insn (t, operands, 1, 3); |
2208 | } | |
5012919d | 2209 | [(set_attr "atomic" "true")]) |
738f2522 | 2210 | |
738f2522 BS |
2211 | (define_insn "atomic_fetch_<logic><mode>" |
2212 | [(set (match_operand:SDIM 1 "memory_operand" "+m") | |
2213 | (unspec_volatile:SDIM | |
2214 | [(any_logic:SDIM (match_dup 1) | |
2215 | (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri")) | |
2216 | (match_operand:SI 3 "const_int_operand")] ;; model | |
2217 | UNSPECV_LOCK)) | |
2218 | (set (match_operand:SDIM 0 "nvptx_register_operand" "=R") | |
2219 | (match_dup 1))] | |
69d7aabf | 2220 | "<MODE>mode == SImode || TARGET_SM35" |
58f7c7e0 | 2221 | { |
04b54cc4 | 2222 | if (nvptx_mem_local_p (operands[1])) |
e0451f93 TV |
2223 | { |
2224 | output_asm_insn ("{", NULL); | |
2225 | output_asm_insn ("\\t" ".reg.b%T0" "\\t" "%%val;", operands); | |
2226 | output_asm_insn ("\\t" ".reg.b%T0" "\\t" "%%update;", operands); | |
2227 | output_asm_insn ("%.\\t" "ld%A1%t0" "\\t" "%%val,%1;", operands); | |
2228 | output_asm_insn ("%.\\t" "<logic>.b%T0" "\\t" "%%update,%%val,%2;", | |
2229 | operands); | |
2230 | output_asm_insn ("%.\\t" "st%A1%t0" "\\t" "%1,%%update;", operands); | |
2231 | output_asm_insn ("%.\\t" "mov%t0" "\\t" "%0,%%val;", operands); | |
2232 | output_asm_insn ("}", NULL); | |
2233 | return ""; | |
2234 | } | |
58f7c7e0 | 2235 | const char *t |
3ebcc053 | 2236 | = "%.\\tatom%A1.<logic>.b%T0\\t%x0, %1, %2;"; |
58f7c7e0 TV |
2237 | return nvptx_output_atomic_insn (t, operands, 1, 3); |
2238 | } | |
2239 | ||
5012919d | 2240 | [(set_attr "atomic" "true")]) |
d88cd9c4 | 2241 | |
15545563 TV |
2242 | (define_expand "atomic_test_and_set" |
2243 | [(match_operand:SI 0 "nvptx_register_operand") ;; bool success output | |
2244 | (match_operand:QI 1 "memory_operand") ;; memory | |
2245 | (match_operand:SI 2 "const_int_operand")] ;; model | |
2246 | "" | |
2247 | { | |
2248 | rtx libfunc; | |
2249 | rtx addr; | |
2250 | libfunc = init_one_libfunc ("__atomic_test_and_set_1"); | |
2251 | addr = convert_memory_address (ptr_mode, XEXP (operands[1], 0)); | |
2252 | emit_library_call_value (libfunc, operands[0], LCT_NORMAL, SImode, | |
2253 | addr, ptr_mode, | |
2254 | operands[2], SImode); | |
2255 | DONE; | |
2256 | }) | |
2257 | ||
d88cd9c4 | 2258 | (define_insn "nvptx_barsync" |
1dcf2688 TV |
2259 | [(unspec_volatile [(match_operand:SI 0 "nvptx_nonmemory_operand" "Ri") |
2260 | (match_operand:SI 1 "const_int_operand")] | |
d88cd9c4 NS |
2261 | UNSPECV_BARSYNC)] |
2262 | "" | |
1dcf2688 TV |
2263 | { |
2264 | if (INTVAL (operands[1]) == 0) | |
57f971f9 TV |
2265 | return (TARGET_PTX_6_0 |
2266 | ? "\\tbarrier.sync.aligned\\t%0;" | |
2267 | : "\\tbar.sync\\t%0;"); | |
1dcf2688 | 2268 | else |
57f971f9 TV |
2269 | return (TARGET_PTX_6_0 |
2270 | ? "\\tbarrier.sync\\t%0, %1;" | |
2271 | : "\\tbar.sync\\t%0, %1;"); | |
1dcf2688 | 2272 | } |
3357878e | 2273 | [(set_attr "predicable" "no")]) |
5012919d | 2274 | |
bba61d40 TV |
2275 | (define_insn "nvptx_warpsync" |
2276 | [(unspec_volatile [(const_int 0)] UNSPECV_WARPSYNC)] | |
2277 | "TARGET_PTX_6_0" | |
f07178ca | 2278 | "%.\\tbar.warp.sync\\t0xffffffff;") |
bba61d40 | 2279 | |
623daaf8 CLT |
2280 | (define_int_iterator BARRED |
2281 | [UNSPECV_BARRED_AND | |
2282 | UNSPECV_BARRED_OR | |
2283 | UNSPECV_BARRED_POPC]) | |
2284 | (define_int_attr barred_op | |
2285 | [(UNSPECV_BARRED_AND "and") | |
2286 | (UNSPECV_BARRED_OR "or") | |
2287 | (UNSPECV_BARRED_POPC "popc")]) | |
2288 | (define_int_attr barred_mode | |
2289 | [(UNSPECV_BARRED_AND "BI") | |
2290 | (UNSPECV_BARRED_OR "BI") | |
2291 | (UNSPECV_BARRED_POPC "SI")]) | |
2292 | (define_int_attr barred_ptxtype | |
2293 | [(UNSPECV_BARRED_AND "pred") | |
2294 | (UNSPECV_BARRED_OR "pred") | |
2295 | (UNSPECV_BARRED_POPC "u32")]) | |
2296 | ||
2297 | (define_insn "nvptx_barred_<barred_op>" | |
2298 | [(set (match_operand:<barred_mode> 0 "nvptx_register_operand" "=R") | |
2299 | (unspec_volatile | |
2300 | [(match_operand:SI 1 "nvptx_nonmemory_operand" "Ri") | |
2301 | (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri") | |
2302 | (match_operand:SI 3 "const_int_operand" "i") | |
2303 | (match_operand:BI 4 "nvptx_register_operand" "R")] | |
2304 | BARRED))] | |
2305 | "" | |
2306 | "\\tbar.red.<barred_op>.<barred_ptxtype> \\t%0, %1, %2, %p3%4;";" | |
2307 | [(set_attr "predicable" "no")]) | |
2308 | ||
f32f74c2 TV |
2309 | (define_insn "nvptx_uniform_warp_check" |
2310 | [(unspec_volatile [(const_int 0)] UNSPECV_UNIFORM_WARP_CHECK)] | |
2311 | "" | |
2312 | { | |
f07178ca TV |
2313 | const char *insns[] = { |
2314 | "{", | |
24ee4319 TV |
2315 | "\\t" ".reg.b32" "\\t" "%%r_act;", |
2316 | "%.\\t" "vote.ballot.b32" "\\t" "%%r_act,1;", | |
2317 | "\\t" ".reg.pred" "\\t" "%%r_do_abort;", | |
2318 | "\\t" "mov.pred" "\\t" "%%r_do_abort,0;", | |
2319 | "%.\\t" "setp.ne.b32" "\\t" "%%r_do_abort,%%r_act," | |
2320 | "0xffffffff;", | |
2321 | "@ %%r_do_abort\\t" "trap;", | |
2322 | "@ %%r_do_abort\\t" "exit;", | |
f07178ca TV |
2323 | "}", |
2324 | NULL | |
2325 | }; | |
2326 | for (const char **p = &insns[0]; *p != NULL; p++) | |
2327 | output_asm_insn (*p, NULL); | |
f32f74c2 | 2328 | return ""; |
f07178ca | 2329 | }) |
f32f74c2 | 2330 | |
f04fd903 TV |
2331 | (define_expand "memory_barrier" |
2332 | [(set (match_dup 0) | |
2333 | (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR))] | |
2334 | "" | |
2335 | { | |
2336 | operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); | |
2337 | MEM_VOLATILE_P (operands[0]) = 1; | |
2338 | }) | |
2339 | ||
2340 | ;; Ptx defines the memory barriers membar.cta, membar.gl and membar.sys | |
2341 | ;; (corresponding to cuda functions threadfence_block, threadfence and | |
2342 | ;; threadfence_system). For the insn memory_barrier we use membar.sys. This | |
2343 | ;; may be overconservative, but before using membar.gl instead we'll need to | |
2344 | ;; explain in detail why it's safe to use. For now, use membar.sys. | |
2345 | (define_insn "*memory_barrier" | |
2346 | [(set (match_operand:BLK 0 "" "") | |
2347 | (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR))] | |
2348 | "" | |
2349 | "\\tmembar.sys;" | |
3357878e | 2350 | [(set_attr "predicable" "no")]) |
f04fd903 | 2351 | |
21251395 TV |
2352 | (define_expand "nvptx_membar_cta" |
2353 | [(set (match_dup 0) | |
2354 | (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR_CTA))] | |
2355 | "" | |
2356 | { | |
2357 | operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); | |
2358 | MEM_VOLATILE_P (operands[0]) = 1; | |
2359 | }) | |
2360 | ||
2361 | (define_insn "*nvptx_membar_cta" | |
2362 | [(set (match_operand:BLK 0 "" "") | |
2363 | (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR_CTA))] | |
2364 | "" | |
2365 | "\\tmembar.cta;" | |
3357878e | 2366 | [(set_attr "predicable" "no")]) |
21251395 | 2367 | |
ca902055 TV |
2368 | (define_expand "nvptx_membar_gl" |
2369 | [(set (match_dup 0) | |
2370 | (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR_GL))] | |
2371 | "" | |
2372 | { | |
2373 | operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); | |
2374 | MEM_VOLATILE_P (operands[0]) = 1; | |
2375 | }) | |
2376 | ||
2377 | (define_insn "*nvptx_membar_gl" | |
2378 | [(set (match_operand:BLK 0 "" "") | |
2379 | (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR_GL))] | |
2380 | "" | |
2381 | "\\tmembar.gl;" | |
3357878e | 2382 | [(set_attr "predicable" "no")]) |
ca902055 | 2383 | |
5012919d AM |
2384 | (define_insn "nvptx_nounroll" |
2385 | [(unspec_volatile [(const_int 0)] UNSPECV_NOUNROLL)] | |
2386 | "" | |
2387 | "\\t.pragma \\\"nounroll\\\";" | |
3357878e | 2388 | [(set_attr "predicable" "no")]) |
f881693c TV |
2389 | |
2390 | (define_insn "nvptx_red_partition" | |
2391 | [(set (match_operand:DI 0 "nonimmediate_operand" "=R") | |
bbd54be5 | 2392 | (unspec_volatile:DI [(match_operand:DI 1 "const_int_operand")] |
f881693c TV |
2393 | UNSPECV_RED_PART))] |
2394 | "" | |
2395 | { | |
2396 | return nvptx_output_red_partition (operands[0], operands[1]); | |
2397 | } | |
3357878e | 2398 | [(set_attr "predicable" "no")]) |
de12b919 RS |
2399 | |
2400 | ;; Expand QI mode operations using SI mode instructions. | |
2401 | (define_code_iterator any_sbinary [plus minus smin smax]) | |
2402 | (define_code_attr sbinary [(plus "add") (minus "sub") (smin "smin") (smax "smax")]) | |
2403 | ||
2404 | (define_code_iterator any_ubinary [and ior xor umin umax]) | |
2405 | (define_code_attr ubinary [(and "and") (ior "ior") (xor "xor") (umin "umin") | |
2406 | (umax "umax")]) | |
2407 | ||
2408 | (define_code_iterator any_sunary [neg abs]) | |
2409 | (define_code_attr sunary [(neg "neg") (abs "abs")]) | |
2410 | ||
2411 | (define_code_iterator any_uunary [not]) | |
2412 | (define_code_attr uunary [(not "one_cmpl")]) | |
2413 | ||
2414 | (define_expand "<sbinary>qi3" | |
2415 | [(set (match_operand:QI 0 "nvptx_register_operand") | |
2416 | (any_sbinary:QI (match_operand:QI 1 "nvptx_nonmemory_operand") | |
2417 | (match_operand:QI 2 "nvptx_nonmemory_operand")))] | |
2418 | "" | |
2419 | { | |
2420 | rtx reg = gen_reg_rtx (SImode); | |
2421 | rtx op0 = convert_modes (SImode, QImode, operands[1], 0); | |
2422 | rtx op1 = convert_modes (SImode, QImode, operands[2], 0); | |
2423 | if (<CODE> == MINUS) | |
2424 | op0 = force_reg (SImode, op0); | |
2425 | emit_insn (gen_<sbinary>si3 (reg, op0, op1)); | |
2426 | emit_insn (gen_truncsiqi2 (operands[0], reg)); | |
2427 | DONE; | |
2428 | }) | |
2429 | ||
2430 | (define_expand "<ubinary>qi3" | |
2431 | [(set (match_operand:QI 0 "nvptx_register_operand") | |
2432 | (any_ubinary:QI (match_operand:QI 1 "nvptx_nonmemory_operand") | |
2433 | (match_operand:QI 2 "nvptx_nonmemory_operand")))] | |
2434 | "" | |
2435 | { | |
2436 | rtx reg = gen_reg_rtx (SImode); | |
2437 | rtx op0 = convert_modes (SImode, QImode, operands[1], 1); | |
2438 | rtx op1 = convert_modes (SImode, QImode, operands[2], 1); | |
2439 | emit_insn (gen_<ubinary>si3 (reg, op0, op1)); | |
2440 | emit_insn (gen_truncsiqi2 (operands[0], reg)); | |
2441 | DONE; | |
2442 | }) | |
2443 | ||
2444 | (define_expand "<sunary>qi2" | |
2445 | [(set (match_operand:QI 0 "nvptx_register_operand") | |
2446 | (any_sunary:QI (match_operand:QI 1 "nvptx_nonmemory_operand")))] | |
2447 | "" | |
2448 | { | |
2449 | rtx reg = gen_reg_rtx (SImode); | |
2450 | rtx op0 = convert_modes (SImode, QImode, operands[1], 0); | |
2451 | emit_insn (gen_<sunary>si2 (reg, op0)); | |
2452 | emit_insn (gen_truncsiqi2 (operands[0], reg)); | |
2453 | DONE; | |
2454 | }) | |
2455 | ||
2456 | (define_expand "<uunary>qi2" | |
2457 | [(set (match_operand:QI 0 "nvptx_register_operand") | |
2458 | (any_uunary:QI (match_operand:QI 1 "nvptx_nonmemory_operand")))] | |
2459 | "" | |
2460 | { | |
2461 | rtx reg = gen_reg_rtx (SImode); | |
2462 | rtx op0 = convert_modes (SImode, QImode, operands[1], 1); | |
2463 | emit_insn (gen_<uunary>si2 (reg, op0)); | |
2464 | emit_insn (gen_truncsiqi2 (operands[0], reg)); | |
2465 | DONE; | |
2466 | }) | |
2467 | ||
2468 | (define_expand "cstoreqi4" | |
2469 | [(set (match_operand:SI 0 "nvptx_register_operand") | |
2470 | (match_operator:SI 1 "nvptx_comparison_operator" | |
2471 | [(match_operand:QI 2 "nvptx_nonmemory_operand") | |
2472 | (match_operand:QI 3 "nvptx_nonmemory_operand")]))] | |
2473 | "" | |
2474 | { | |
2475 | rtx reg = gen_reg_rtx (BImode); | |
2476 | enum rtx_code code = GET_CODE (operands[1]); | |
2477 | int unsignedp = unsigned_condition_p (code); | |
2478 | rtx op2 = convert_modes (SImode, QImode, operands[2], unsignedp); | |
2479 | rtx op3 = convert_modes (SImode, QImode, operands[3], unsignedp); | |
2480 | rtx cmp = gen_rtx_fmt_ee (code, SImode, op2, op3); | |
2481 | emit_insn (gen_cmpsi (reg, cmp, op2, op3)); | |
2482 | emit_insn (gen_setccsi_from_bi (operands[0], reg)); | |
2483 | DONE; | |
2484 | }) | |
2485 | ||
2486 | (define_insn "*ext_truncsi2_qi" | |
2487 | [(set (match_operand:SI 0 "nvptx_register_operand" "=R") | |
2488 | (sign_extend:SI | |
2489 | (truncate:QI (match_operand:SI 1 "nvptx_register_operand" "R"))))] | |
2490 | "" | |
2491 | "%.\\tcvt.s32.s8\\t%0, %1;") | |
2492 | ||
2493 | (define_insn "*zext_truncsi2_qi" | |
2494 | [(set (match_operand:SI 0 "nvptx_register_operand" "=R") | |
2495 | (zero_extend:SI | |
2496 | (truncate:QI (match_operand:SI 1 "nvptx_register_operand" "R"))))] | |
2497 | "" | |
2498 | "%.\\tcvt.u32.u8\\t%0, %1;") |