]>
Commit | Line | Data |
---|---|---|
8ce80784 | 1 | ;; Machine description for NVPTX. |
d353bf18 | 2 | ;; Copyright (C) 2014-2015 Free Software Foundation, Inc. |
8ce80784 | 3 | ;; Contributed by Bernd Schmidt <bernds@codesourcery.com> |
4 | ;; | |
5 | ;; This file is part of GCC. | |
6 | ;; | |
7 | ;; GCC is free software; you can redistribute it and/or modify | |
8 | ;; it under the terms of the GNU General Public License as published by | |
9 | ;; the Free Software Foundation; either version 3, or (at your option) | |
10 | ;; any later version. | |
11 | ;; | |
12 | ;; GCC is distributed in the hope that it will be useful, | |
13 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | ;; GNU General Public License for more details. | |
16 | ;; | |
17 | ;; You should have received a copy of the GNU General Public License | |
18 | ;; along with GCC; see the file COPYING3. If not see | |
19 | ;; <http://www.gnu.org/licenses/>. | |
20 | ||
21 | (define_c_enum "unspec" [ | |
22 | UNSPEC_ARG_REG | |
23 | UNSPEC_FROM_GLOBAL | |
24 | UNSPEC_FROM_LOCAL | |
25 | UNSPEC_FROM_PARAM | |
26 | UNSPEC_FROM_SHARED | |
27 | UNSPEC_FROM_CONST | |
28 | UNSPEC_TO_GLOBAL | |
29 | UNSPEC_TO_LOCAL | |
30 | UNSPEC_TO_PARAM | |
31 | UNSPEC_TO_SHARED | |
32 | UNSPEC_TO_CONST | |
33 | ||
34 | UNSPEC_CPLX_LOWPART | |
35 | UNSPEC_CPLX_HIGHPART | |
36 | ||
37 | UNSPEC_COPYSIGN | |
38 | UNSPEC_LOG2 | |
39 | UNSPEC_EXP2 | |
40 | UNSPEC_SIN | |
41 | UNSPEC_COS | |
42 | ||
43 | UNSPEC_FPINT_FLOOR | |
44 | UNSPEC_FPINT_BTRUNC | |
45 | UNSPEC_FPINT_CEIL | |
46 | UNSPEC_FPINT_NEARBYINT | |
47 | ||
48 | UNSPEC_BITREV | |
49 | ||
50 | UNSPEC_ALLOCA | |
51 | ||
b3787ae4 | 52 | UNSPEC_DIM_SIZE |
53 | ||
54 | UNSPEC_SHARED_DATA | |
55 | ||
56 | UNSPEC_BIT_CONV | |
57 | ||
58 | UNSPEC_SHUFFLE | |
59 | UNSPEC_BR_UNIFIED | |
8ce80784 | 60 | ]) |
61 | ||
62 | (define_c_enum "unspecv" [ | |
63 | UNSPECV_LOCK | |
64 | UNSPECV_CAS | |
65 | UNSPECV_XCHG | |
b3787ae4 | 66 | UNSPECV_BARSYNC |
67 | UNSPECV_DIM_POS | |
68 | ||
69 | UNSPECV_FORK | |
70 | UNSPECV_FORKED | |
71 | UNSPECV_JOINING | |
72 | UNSPECV_JOIN | |
8ce80784 | 73 | ]) |
74 | ||
75 | (define_attr "subregs_ok" "false,true" | |
76 | (const_string "false")) | |
77 | ||
78 | (define_predicate "nvptx_register_operand" | |
79 | (match_code "reg,subreg") | |
80 | { | |
81 | if (REG_P (op)) | |
82 | return !HARD_REGISTER_P (op); | |
83 | if (GET_CODE (op) == SUBREG && MEM_P (SUBREG_REG (op))) | |
84 | return false; | |
85 | if (GET_CODE (op) == SUBREG) | |
86 | return false; | |
87 | return register_operand (op, mode); | |
88 | }) | |
89 | ||
90 | (define_predicate "nvptx_reg_or_mem_operand" | |
91 | (match_code "mem,reg,subreg") | |
92 | { | |
93 | if (REG_P (op)) | |
94 | return !HARD_REGISTER_P (op); | |
95 | if (GET_CODE (op) == SUBREG && MEM_P (SUBREG_REG (op))) | |
96 | return false; | |
97 | if (GET_CODE (op) == SUBREG) | |
98 | return false; | |
99 | return memory_operand (op, mode) || register_operand (op, mode); | |
100 | }) | |
101 | ||
102 | ;; Allow symbolic constants. | |
103 | (define_predicate "symbolic_operand" | |
104 | (match_code "symbol_ref,const")) | |
105 | ||
106 | ;; Allow registers or symbolic constants. We can allow frame, arg or stack | |
107 | ;; pointers here since they are actually symbolic constants. | |
108 | (define_predicate "nvptx_register_or_symbolic_operand" | |
109 | (match_code "reg,subreg,symbol_ref,const") | |
110 | { | |
111 | if (GET_CODE (op) == SUBREG && MEM_P (SUBREG_REG (op))) | |
112 | return false; | |
113 | if (GET_CODE (op) == SUBREG) | |
114 | return false; | |
115 | if (CONSTANT_P (op)) | |
116 | return true; | |
117 | return register_operand (op, mode); | |
118 | }) | |
119 | ||
120 | ;; Registers or constants for normal instructions. Does not allow symbolic | |
121 | ;; constants. | |
122 | (define_predicate "nvptx_nonmemory_operand" | |
123 | (match_code "reg,subreg,const_int,const_double") | |
124 | { | |
125 | if (REG_P (op)) | |
126 | return !HARD_REGISTER_P (op); | |
127 | if (GET_CODE (op) == SUBREG && MEM_P (SUBREG_REG (op))) | |
128 | return false; | |
129 | if (GET_CODE (op) == SUBREG) | |
130 | return false; | |
131 | return nonmemory_operand (op, mode); | |
132 | }) | |
133 | ||
134 | ;; A source operand for a move instruction. This is the only predicate we use | |
135 | ;; that accepts symbolic constants. | |
136 | (define_predicate "nvptx_general_operand" | |
137 | (match_code "reg,subreg,mem,const,symbol_ref,label_ref,const_int,const_double") | |
138 | { | |
139 | if (REG_P (op)) | |
140 | return !HARD_REGISTER_P (op); | |
141 | return general_operand (op, mode); | |
142 | }) | |
143 | ||
144 | ;; A destination operand for a move instruction. This is the only destination | |
145 | ;; predicate that accepts the return register since it requires special handling. | |
146 | (define_predicate "nvptx_nonimmediate_operand" | |
147 | (match_code "reg,subreg,mem") | |
148 | { | |
149 | if (REG_P (op)) | |
150 | return (op != frame_pointer_rtx | |
151 | && op != arg_pointer_rtx | |
152 | && op != stack_pointer_rtx); | |
153 | return nonimmediate_operand (op, mode); | |
154 | }) | |
155 | ||
156 | (define_predicate "const_0_operand" | |
157 | (and (match_code "const_int,const_double,const_vector") | |
158 | (match_test "op == CONST0_RTX (GET_MODE (op))"))) | |
159 | ||
160 | (define_predicate "global_mem_operand" | |
161 | (and (match_code "mem") | |
162 | (match_test "MEM_ADDR_SPACE (op) == ADDR_SPACE_GLOBAL"))) | |
163 | ||
164 | (define_predicate "const_mem_operand" | |
165 | (and (match_code "mem") | |
166 | (match_test "MEM_ADDR_SPACE (op) == ADDR_SPACE_CONST"))) | |
167 | ||
168 | (define_predicate "param_mem_operand" | |
169 | (and (match_code "mem") | |
170 | (match_test "MEM_ADDR_SPACE (op) == ADDR_SPACE_PARAM"))) | |
171 | ||
172 | (define_predicate "shared_mem_operand" | |
173 | (and (match_code "mem") | |
174 | (match_test "MEM_ADDR_SPACE (op) == ADDR_SPACE_SHARED"))) | |
175 | ||
176 | (define_predicate "const0_operand" | |
177 | (and (match_code "const_int") | |
178 | (match_test "op == const0_rtx"))) | |
179 | ||
180 | ;; True if this operator is valid for predication. | |
181 | (define_predicate "predicate_operator" | |
182 | (match_code "eq,ne")) | |
183 | ||
184 | (define_predicate "ne_operator" | |
185 | (match_code "ne")) | |
186 | ||
187 | (define_predicate "nvptx_comparison_operator" | |
188 | (match_code "eq,ne,le,ge,lt,gt,leu,geu,ltu,gtu")) | |
189 | ||
190 | (define_predicate "nvptx_float_comparison_operator" | |
191 | (match_code "eq,ne,le,ge,lt,gt,uneq,unle,unge,unlt,ungt,unordered,ordered")) | |
192 | ||
193 | ;; Test for a valid operand for a call instruction. | |
194 | (define_special_predicate "call_insn_operand" | |
195 | (match_code "symbol_ref,reg") | |
196 | { | |
197 | if (GET_CODE (op) == SYMBOL_REF) | |
198 | { | |
199 | tree decl = SYMBOL_REF_DECL (op); | |
200 | /* This happens for libcalls. */ | |
201 | if (decl == NULL_TREE) | |
202 | return true; | |
203 | return TREE_CODE (SYMBOL_REF_DECL (op)) == FUNCTION_DECL; | |
204 | } | |
205 | return true; | |
206 | }) | |
207 | ||
208 | ;; Return true if OP is a call with parallel USEs of the argument | |
209 | ;; pseudos. | |
210 | (define_predicate "call_operation" | |
211 | (match_code "parallel") | |
212 | { | |
b27697ca | 213 | int arg_end = XVECLEN (op, 0); |
8ce80784 | 214 | |
b27697ca | 215 | for (int i = 1; i < arg_end; i++) |
8ce80784 | 216 | { |
217 | rtx elt = XVECEXP (op, 0, i); | |
8ce80784 | 218 | |
219 | if (GET_CODE (elt) != USE | |
220 | || GET_CODE (XEXP (elt, 0)) != REG | |
221 | || XEXP (elt, 0) == frame_pointer_rtx | |
222 | || XEXP (elt, 0) == arg_pointer_rtx | |
223 | || XEXP (elt, 0) == stack_pointer_rtx) | |
8ce80784 | 224 | return false; |
225 | } | |
226 | return true; | |
227 | }) | |
228 | ||
229 | (define_constraint "P0" | |
230 | "An integer with the value 0." | |
231 | (and (match_code "const_int") | |
232 | (match_test "ival == 0"))) | |
233 | ||
234 | (define_constraint "P1" | |
235 | "An integer with the value 1." | |
236 | (and (match_code "const_int") | |
237 | (match_test "ival == 1"))) | |
238 | ||
239 | (define_constraint "Pn" | |
240 | "An integer with the value -1." | |
241 | (and (match_code "const_int") | |
242 | (match_test "ival == -1"))) | |
243 | ||
244 | (define_constraint "R" | |
245 | "A pseudo register." | |
246 | (match_code "reg")) | |
247 | ||
248 | (define_constraint "Ia" | |
249 | "Any integer constant." | |
250 | (and (match_code "const_int") (match_test "true"))) | |
251 | ||
252 | (define_mode_iterator QHSDISDFM [QI HI SI DI SF DF]) | |
253 | (define_mode_iterator QHSDIM [QI HI SI DI]) | |
254 | (define_mode_iterator HSDIM [HI SI DI]) | |
255 | (define_mode_iterator BHSDIM [BI HI SI DI]) | |
256 | (define_mode_iterator SDIM [SI DI]) | |
257 | (define_mode_iterator SDISDFM [SI DI SF DF]) | |
258 | (define_mode_iterator QHIM [QI HI]) | |
259 | (define_mode_iterator QHSIM [QI HI SI]) | |
260 | (define_mode_iterator SDFM [SF DF]) | |
261 | (define_mode_iterator SDCM [SC DC]) | |
b3787ae4 | 262 | (define_mode_iterator BITS [SI SF]) |
263 | (define_mode_iterator BITD [DI DF]) | |
8ce80784 | 264 | |
265 | ;; This mode iterator allows :P to be used for patterns that operate on | |
266 | ;; pointer-sized quantities. Exactly one of the two alternatives will match. | |
267 | (define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")]) | |
268 | ||
269 | ;; We should get away with not defining memory alternatives, since we don't | |
270 | ;; get variables in this mode and pseudos are never spilled. | |
271 | (define_insn "movbi" | |
272 | [(set (match_operand:BI 0 "nvptx_register_operand" "=R,R,R") | |
273 | (match_operand:BI 1 "nvptx_nonmemory_operand" "R,P0,Pn"))] | |
274 | "" | |
275 | "@ | |
276 | %.\\tmov%t0\\t%0, %1; | |
277 | %.\\tsetp.eq.u32\\t%0, 1, 0; | |
278 | %.\\tsetp.eq.u32\\t%0, 1, 1;") | |
279 | ||
280 | (define_insn "*mov<mode>_insn" | |
281 | [(set (match_operand:QHSDIM 0 "nvptx_nonimmediate_operand" "=R,R,R,m") | |
282 | (match_operand:QHSDIM 1 "general_operand" "n,Ri,m,R"))] | |
283 | "!(MEM_P (operands[0]) | |
284 | && (!REG_P (operands[1]) || REGNO (operands[1]) <= LAST_VIRTUAL_REGISTER))" | |
285 | { | |
286 | if (which_alternative == 2) | |
287 | return "%.\\tld%A1%u1\\t%0, %1;"; | |
288 | if (which_alternative == 3) | |
289 | return "%.\\tst%A0%u0\\t%0, %1;"; | |
290 | ||
291 | rtx dst = operands[0]; | |
292 | rtx src = operands[1]; | |
293 | ||
294 | enum machine_mode dst_mode = nvptx_underlying_object_mode (dst); | |
295 | enum machine_mode src_mode = nvptx_underlying_object_mode (src); | |
296 | if (GET_CODE (dst) == SUBREG) | |
297 | dst = SUBREG_REG (dst); | |
298 | if (GET_CODE (src) == SUBREG) | |
299 | src = SUBREG_REG (src); | |
300 | if (src_mode == QImode) | |
301 | src_mode = SImode; | |
302 | if (dst_mode == QImode) | |
303 | dst_mode = SImode; | |
304 | if (CONSTANT_P (src)) | |
305 | { | |
306 | if (GET_MODE_CLASS (dst_mode) != MODE_INT) | |
307 | return "%.\\tmov.b%T0\\t%0, %1;"; | |
308 | else | |
309 | return "%.\\tmov%t0\\t%0, %1;"; | |
310 | } | |
311 | ||
312 | /* Special handling for the return register; we allow this register to | |
313 | only occur in the destination of a move insn. */ | |
314 | if (REG_P (dst) && REGNO (dst) == NVPTX_RETURN_REGNUM | |
315 | && dst_mode == HImode) | |
316 | dst_mode = SImode; | |
317 | if (dst_mode == src_mode) | |
318 | return "%.\\tmov%t0\\t%0, %1;"; | |
319 | /* Mode-punning between floating point and integer. */ | |
320 | if (GET_MODE_SIZE (dst_mode) == GET_MODE_SIZE (src_mode)) | |
321 | return "%.\\tmov.b%T0\\t%0, %1;"; | |
322 | return "%.\\tcvt%t0%t1\\t%0, %1;"; | |
323 | } | |
324 | [(set_attr "subregs_ok" "true")]) | |
325 | ||
326 | (define_insn "*mov<mode>_insn" | |
327 | [(set (match_operand:SDFM 0 "nvptx_nonimmediate_operand" "=R,R,m") | |
328 | (match_operand:SDFM 1 "general_operand" "RF,m,R"))] | |
329 | "!(MEM_P (operands[0]) && !REG_P (operands[1]))" | |
330 | { | |
331 | if (which_alternative == 1) | |
332 | return "%.\\tld%A1%u0\\t%0, %1;"; | |
333 | if (which_alternative == 2) | |
334 | return "%.\\tst%A0%u1\\t%0, %1;"; | |
335 | ||
336 | rtx dst = operands[0]; | |
337 | rtx src = operands[1]; | |
338 | if (GET_CODE (dst) == SUBREG) | |
339 | dst = SUBREG_REG (dst); | |
340 | if (GET_CODE (src) == SUBREG) | |
341 | src = SUBREG_REG (src); | |
342 | enum machine_mode dst_mode = GET_MODE (dst); | |
343 | enum machine_mode src_mode = GET_MODE (src); | |
344 | if (dst_mode == src_mode) | |
345 | return "%.\\tmov%t0\\t%0, %1;"; | |
346 | if (GET_MODE_SIZE (dst_mode) == GET_MODE_SIZE (src_mode)) | |
347 | return "%.\\tmov.b%T0\\t%0, %1;"; | |
348 | gcc_unreachable (); | |
349 | } | |
350 | [(set_attr "subregs_ok" "true")]) | |
351 | ||
352 | (define_insn "load_arg_reg<mode>" | |
353 | [(set (match_operand:QHIM 0 "nvptx_register_operand" "=R") | |
354 | (unspec:QHIM [(match_operand 1 "const_int_operand" "i")] | |
355 | UNSPEC_ARG_REG))] | |
356 | "" | |
357 | "%.\\tcvt%t0.u32\\t%0, %%ar%1;") | |
358 | ||
359 | (define_insn "load_arg_reg<mode>" | |
360 | [(set (match_operand:SDISDFM 0 "nvptx_register_operand" "=R") | |
361 | (unspec:SDISDFM [(match_operand 1 "const_int_operand" "i")] | |
362 | UNSPEC_ARG_REG))] | |
363 | "" | |
364 | "%.\\tmov%t0\\t%0, %%ar%1;") | |
365 | ||
366 | (define_expand "mov<mode>" | |
367 | [(set (match_operand:QHSDISDFM 0 "nvptx_nonimmediate_operand" "") | |
368 | (match_operand:QHSDISDFM 1 "general_operand" ""))] | |
369 | "" | |
370 | { | |
371 | operands[1] = nvptx_maybe_convert_symbolic_operand (operands[1]); | |
372 | /* Record the mode of the return register so that we can prevent | |
373 | later optimization passes from changing it. */ | |
374 | if (REG_P (operands[0]) && REGNO (operands[0]) == NVPTX_RETURN_REGNUM | |
375 | && cfun) | |
376 | { | |
377 | if (cfun->machine->ret_reg_mode == VOIDmode) | |
378 | cfun->machine->ret_reg_mode = GET_MODE (operands[0]); | |
379 | else | |
380 | gcc_assert (cfun->machine->ret_reg_mode == GET_MODE (operands[0])); | |
381 | } | |
382 | ||
383 | /* Hard registers are often actually symbolic operands on this target. | |
384 | Don't allow them when storing to memory. */ | |
385 | if (MEM_P (operands[0]) | |
386 | && (!REG_P (operands[1]) | |
387 | || REGNO (operands[1]) <= LAST_VIRTUAL_REGISTER)) | |
388 | { | |
389 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
390 | emit_move_insn (tmp, operands[1]); | |
391 | emit_move_insn (operands[0], tmp); | |
392 | DONE; | |
393 | } | |
394 | if (GET_CODE (operands[1]) == SYMBOL_REF) | |
395 | nvptx_record_needed_fndecl (SYMBOL_REF_DECL (operands[1])); | |
396 | }) | |
397 | ||
398 | (define_insn "highpartscsf2" | |
399 | [(set (match_operand:SF 0 "nvptx_register_operand" "=R") | |
400 | (unspec:SF [(match_operand:SC 1 "nvptx_register_operand")] | |
401 | UNSPEC_CPLX_HIGHPART))] | |
402 | "" | |
403 | "%.\\tmov%t0\\t%0, %f1$1;") | |
404 | ||
405 | (define_insn "set_highpartsfsc2" | |
406 | [(set (match_operand:SC 0 "nvptx_register_operand" "+R") | |
407 | (unspec:SC [(match_dup 0) | |
408 | (match_operand:SF 1 "nvptx_register_operand")] | |
409 | UNSPEC_CPLX_HIGHPART))] | |
410 | "" | |
411 | "%.\\tmov%t1\\t%f0$1, %1;") | |
412 | ||
413 | (define_insn "lowpartscsf2" | |
414 | [(set (match_operand:SF 0 "nvptx_register_operand" "=R") | |
415 | (unspec:SF [(match_operand:SC 1 "nvptx_register_operand")] | |
416 | UNSPEC_CPLX_LOWPART))] | |
417 | "" | |
418 | "%.\\tmov%t0\\t%0, %f1$0;") | |
419 | ||
420 | (define_insn "set_lowpartsfsc2" | |
421 | [(set (match_operand:SC 0 "nvptx_register_operand" "+R") | |
422 | (unspec:SC [(match_dup 0) | |
423 | (match_operand:SF 1 "nvptx_register_operand")] | |
424 | UNSPEC_CPLX_LOWPART))] | |
425 | "" | |
426 | "%.\\tmov%t1\\t%f0$0, %1;") | |
427 | ||
428 | (define_expand "mov<mode>" | |
429 | [(set (match_operand:SDCM 0 "nvptx_nonimmediate_operand" "") | |
430 | (match_operand:SDCM 1 "general_operand" ""))] | |
431 | "" | |
432 | { | |
433 | enum machine_mode submode = <MODE>mode == SCmode ? SFmode : DFmode; | |
434 | int sz = GET_MODE_SIZE (submode); | |
435 | rtx xops[4]; | |
436 | rtx punning_reg = NULL_RTX; | |
437 | rtx copyback = NULL_RTX; | |
438 | ||
439 | if (GET_CODE (operands[0]) == SUBREG) | |
440 | { | |
441 | rtx inner = SUBREG_REG (operands[0]); | |
442 | enum machine_mode inner_mode = GET_MODE (inner); | |
443 | int sz2 = GET_MODE_SIZE (inner_mode); | |
444 | gcc_assert (sz2 >= sz); | |
445 | cfun->machine->punning_buffer_size | |
446 | = MAX (cfun->machine->punning_buffer_size, sz2); | |
447 | if (punning_reg == NULL_RTX) | |
448 | punning_reg = gen_rtx_REG (Pmode, NVPTX_PUNNING_BUFFER_REGNUM); | |
449 | copyback = gen_move_insn (inner, gen_rtx_MEM (inner_mode, punning_reg)); | |
450 | operands[0] = gen_rtx_MEM (<MODE>mode, punning_reg); | |
451 | } | |
452 | if (GET_CODE (operands[1]) == SUBREG) | |
453 | { | |
454 | rtx inner = SUBREG_REG (operands[1]); | |
455 | enum machine_mode inner_mode = GET_MODE (inner); | |
456 | int sz2 = GET_MODE_SIZE (inner_mode); | |
457 | gcc_assert (sz2 >= sz); | |
458 | cfun->machine->punning_buffer_size | |
459 | = MAX (cfun->machine->punning_buffer_size, sz2); | |
460 | if (punning_reg == NULL_RTX) | |
461 | punning_reg = gen_rtx_REG (Pmode, NVPTX_PUNNING_BUFFER_REGNUM); | |
462 | emit_move_insn (gen_rtx_MEM (inner_mode, punning_reg), inner); | |
463 | operands[1] = gen_rtx_MEM (<MODE>mode, punning_reg); | |
464 | } | |
465 | ||
466 | if (REG_P (operands[0]) && submode == SFmode) | |
467 | { | |
468 | xops[0] = gen_reg_rtx (submode); | |
469 | xops[1] = gen_reg_rtx (submode); | |
470 | } | |
471 | else | |
472 | { | |
473 | xops[0] = gen_lowpart (submode, operands[0]); | |
474 | if (MEM_P (operands[0])) | |
475 | xops[1] = adjust_address_nv (operands[0], submode, sz); | |
476 | else | |
477 | xops[1] = gen_highpart (submode, operands[0]); | |
478 | } | |
479 | ||
480 | if (REG_P (operands[1]) && submode == SFmode) | |
481 | { | |
482 | xops[2] = gen_reg_rtx (submode); | |
483 | xops[3] = gen_reg_rtx (submode); | |
484 | emit_insn (gen_lowpartscsf2 (xops[2], operands[1])); | |
485 | emit_insn (gen_highpartscsf2 (xops[3], operands[1])); | |
486 | } | |
487 | else | |
488 | { | |
489 | xops[2] = gen_lowpart (submode, operands[1]); | |
490 | if (MEM_P (operands[1])) | |
491 | xops[3] = adjust_address_nv (operands[1], submode, sz); | |
492 | else | |
493 | xops[3] = gen_highpart (submode, operands[1]); | |
494 | } | |
495 | ||
496 | emit_move_insn (xops[0], xops[2]); | |
497 | emit_move_insn (xops[1], xops[3]); | |
498 | if (REG_P (operands[0]) && submode == SFmode) | |
499 | { | |
500 | emit_insn (gen_set_lowpartsfsc2 (operands[0], xops[0])); | |
501 | emit_insn (gen_set_highpartsfsc2 (operands[0], xops[1])); | |
502 | } | |
503 | if (copyback) | |
504 | emit_insn (copyback); | |
505 | DONE; | |
506 | }) | |
507 | ||
508 | (define_insn "zero_extendqihi2" | |
509 | [(set (match_operand:HI 0 "nvptx_register_operand" "=R,R") | |
510 | (zero_extend:HI (match_operand:QI 1 "nvptx_reg_or_mem_operand" "R,m")))] | |
511 | "" | |
512 | "@ | |
513 | %.\\tcvt.u16.u%T1\\t%0, %1; | |
514 | %.\\tld%A1.u8\\t%0, %1;" | |
515 | [(set_attr "subregs_ok" "true")]) | |
516 | ||
517 | (define_insn "zero_extend<mode>si2" | |
518 | [(set (match_operand:SI 0 "nvptx_register_operand" "=R,R") | |
519 | (zero_extend:SI (match_operand:QHIM 1 "nvptx_reg_or_mem_operand" "R,m")))] | |
520 | "" | |
521 | "@ | |
522 | %.\\tcvt.u32.u%T1\\t%0, %1; | |
523 | %.\\tld%A1.u%T1\\t%0, %1;" | |
524 | [(set_attr "subregs_ok" "true")]) | |
525 | ||
526 | (define_insn "zero_extend<mode>di2" | |
527 | [(set (match_operand:DI 0 "nvptx_register_operand" "=R,R") | |
528 | (zero_extend:DI (match_operand:QHSIM 1 "nvptx_reg_or_mem_operand" "R,m")))] | |
529 | "" | |
530 | "@ | |
531 | %.\\tcvt.u64.u%T1\\t%0, %1; | |
532 | %.\\tld%A1%u1\\t%0, %1;" | |
533 | [(set_attr "subregs_ok" "true")]) | |
534 | ||
535 | (define_insn "extend<mode>si2" | |
536 | [(set (match_operand:SI 0 "nvptx_register_operand" "=R,R") | |
537 | (sign_extend:SI (match_operand:QHIM 1 "nvptx_reg_or_mem_operand" "R,m")))] | |
538 | "" | |
539 | "@ | |
540 | %.\\tcvt.s32.s%T1\\t%0, %1; | |
541 | %.\\tld%A1.s%T1\\t%0, %1;" | |
542 | [(set_attr "subregs_ok" "true")]) | |
543 | ||
544 | (define_insn "extend<mode>di2" | |
545 | [(set (match_operand:DI 0 "nvptx_register_operand" "=R,R") | |
546 | (sign_extend:DI (match_operand:QHSIM 1 "nvptx_reg_or_mem_operand" "R,m")))] | |
547 | "" | |
548 | "@ | |
549 | %.\\tcvt.s64.s%T1\\t%0, %1; | |
550 | %.\\tld%A1.s%T1\\t%0, %1;" | |
551 | [(set_attr "subregs_ok" "true")]) | |
552 | ||
553 | (define_insn "trunchiqi2" | |
554 | [(set (match_operand:QI 0 "nvptx_reg_or_mem_operand" "=R,m") | |
555 | (truncate:QI (match_operand:HI 1 "nvptx_register_operand" "R,R")))] | |
556 | "" | |
557 | "@ | |
558 | %.\\tcvt%t0.u16\\t%0, %1; | |
559 | %.\\tst%A0.u8\\t%0, %1;" | |
560 | [(set_attr "subregs_ok" "true")]) | |
561 | ||
562 | (define_insn "truncsi<mode>2" | |
563 | [(set (match_operand:QHIM 0 "nvptx_reg_or_mem_operand" "=R,m") | |
564 | (truncate:QHIM (match_operand:SI 1 "nvptx_register_operand" "R,R")))] | |
565 | "" | |
566 | "@ | |
567 | %.\\tcvt%t0.u32\\t%0, %1; | |
568 | %.\\tst%A0.u%T0\\t%0, %1;" | |
569 | [(set_attr "subregs_ok" "true")]) | |
570 | ||
571 | (define_insn "truncdi<mode>2" | |
572 | [(set (match_operand:QHSIM 0 "nvptx_reg_or_mem_operand" "=R,m") | |
573 | (truncate:QHSIM (match_operand:DI 1 "nvptx_register_operand" "R,R")))] | |
574 | "" | |
575 | "@ | |
576 | %.\\tcvt%t0.u64\\t%0, %1; | |
577 | %.\\tst%A0.u%T0\\t%0, %1;" | |
578 | [(set_attr "subregs_ok" "true")]) | |
579 | ||
580 | ;; Pointer address space conversions | |
581 | ||
582 | (define_int_iterator cvt_code | |
583 | [UNSPEC_FROM_GLOBAL | |
584 | UNSPEC_FROM_LOCAL | |
585 | UNSPEC_FROM_SHARED | |
586 | UNSPEC_FROM_CONST | |
587 | UNSPEC_TO_GLOBAL | |
588 | UNSPEC_TO_LOCAL | |
589 | UNSPEC_TO_SHARED | |
590 | UNSPEC_TO_CONST]) | |
591 | ||
592 | (define_int_attr cvt_name | |
593 | [(UNSPEC_FROM_GLOBAL "from_global") | |
594 | (UNSPEC_FROM_LOCAL "from_local") | |
595 | (UNSPEC_FROM_SHARED "from_shared") | |
596 | (UNSPEC_FROM_CONST "from_const") | |
597 | (UNSPEC_TO_GLOBAL "to_global") | |
598 | (UNSPEC_TO_LOCAL "to_local") | |
599 | (UNSPEC_TO_SHARED "to_shared") | |
600 | (UNSPEC_TO_CONST "to_const")]) | |
601 | ||
602 | (define_int_attr cvt_str | |
603 | [(UNSPEC_FROM_GLOBAL ".global") | |
604 | (UNSPEC_FROM_LOCAL ".local") | |
605 | (UNSPEC_FROM_SHARED ".shared") | |
606 | (UNSPEC_FROM_CONST ".const") | |
607 | (UNSPEC_TO_GLOBAL ".to.global") | |
608 | (UNSPEC_TO_LOCAL ".to.local") | |
609 | (UNSPEC_TO_SHARED ".to.shared") | |
610 | (UNSPEC_TO_CONST ".to.const")]) | |
611 | ||
612 | (define_insn "convaddr_<cvt_name><mode>" | |
613 | [(set (match_operand:P 0 "nvptx_register_operand" "=R") | |
614 | (unspec:P [(match_operand:P 1 "nvptx_register_or_symbolic_operand" "Rs")] cvt_code))] | |
615 | "" | |
616 | "%.\\tcvta<cvt_str>%t0\\t%0, %1;") | |
617 | ||
618 | ;; Integer arithmetic | |
619 | ||
620 | (define_insn "add<mode>3" | |
621 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
622 | (plus:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") | |
623 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] | |
624 | "" | |
625 | "%.\\tadd%t0\\t%0, %1, %2;") | |
626 | ||
627 | (define_insn "sub<mode>3" | |
628 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
629 | (minus:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") | |
630 | (match_operand:HSDIM 2 "nvptx_register_operand" "R")))] | |
631 | "" | |
632 | "%.\\tsub%t0\\t%0, %1, %2;") | |
633 | ||
634 | (define_insn "mul<mode>3" | |
635 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
636 | (mult:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") | |
637 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] | |
638 | "" | |
639 | "%.\\tmul.lo%t0\\t%0, %1, %2;") | |
640 | ||
641 | (define_insn "*mad<mode>3" | |
642 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
643 | (plus:HSDIM (mult:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") | |
644 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")) | |
645 | (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))] | |
646 | "" | |
647 | "%.\\tmad.lo%t0\\t%0, %1, %2, %3;") | |
648 | ||
649 | (define_insn "div<mode>3" | |
650 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
651 | (div:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") | |
652 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] | |
653 | "" | |
654 | "%.\\tdiv.s%T0\\t%0, %1, %2;") | |
655 | ||
656 | (define_insn "udiv<mode>3" | |
657 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
658 | (udiv:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") | |
659 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] | |
660 | "" | |
661 | "%.\\tdiv.u%T0\\t%0, %1, %2;") | |
662 | ||
663 | (define_insn "mod<mode>3" | |
664 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
665 | (mod:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "Ri") | |
666 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] | |
667 | "" | |
668 | "%.\\trem.s%T0\\t%0, %1, %2;") | |
669 | ||
670 | (define_insn "umod<mode>3" | |
671 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
672 | (umod:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "Ri") | |
673 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] | |
674 | "" | |
675 | "%.\\trem.u%T0\\t%0, %1, %2;") | |
676 | ||
677 | (define_insn "smin<mode>3" | |
678 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
679 | (smin:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") | |
680 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] | |
681 | "" | |
682 | "%.\\tmin.s%T0\\t%0, %1, %2;") | |
683 | ||
684 | (define_insn "umin<mode>3" | |
685 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
686 | (umin:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") | |
687 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] | |
688 | "" | |
689 | "%.\\tmin.u%T0\\t%0, %1, %2;") | |
690 | ||
691 | (define_insn "smax<mode>3" | |
692 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
693 | (smax:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") | |
694 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] | |
695 | "" | |
696 | "%.\\tmax.s%T0\\t%0, %1, %2;") | |
697 | ||
698 | (define_insn "umax<mode>3" | |
699 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
700 | (umax:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") | |
701 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] | |
702 | "" | |
703 | "%.\\tmax.u%T0\\t%0, %1, %2;") | |
704 | ||
705 | (define_insn "abs<mode>2" | |
706 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
707 | (abs:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")))] | |
708 | "" | |
709 | "%.\\tabs.s%T0\\t%0, %1;") | |
710 | ||
711 | (define_insn "neg<mode>2" | |
712 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
713 | (neg:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")))] | |
714 | "" | |
715 | "%.\\tneg.s%T0\\t%0, %1;") | |
716 | ||
717 | (define_insn "one_cmpl<mode>2" | |
718 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
719 | (not:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")))] | |
720 | "" | |
721 | "%.\\tnot.b%T0\\t%0, %1;") | |
722 | ||
723 | (define_insn "bitrev<mode>2" | |
724 | [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") | |
725 | (unspec:SDIM [(match_operand:SDIM 1 "nvptx_register_operand" "R")] | |
726 | UNSPEC_BITREV))] | |
727 | "" | |
728 | "%.\\tbrev.b%T0\\t%0, %1;") | |
729 | ||
730 | (define_insn "clz<mode>2" | |
731 | [(set (match_operand:SI 0 "nvptx_register_operand" "=R") | |
732 | (clz:SI (match_operand:SDIM 1 "nvptx_register_operand" "R")))] | |
733 | "" | |
734 | "%.\\tclz.b%T0\\t%0, %1;") | |
735 | ||
736 | (define_expand "ctz<mode>2" | |
737 | [(set (match_operand:SI 0 "nvptx_register_operand" "") | |
738 | (ctz:SI (match_operand:SDIM 1 "nvptx_register_operand" "")))] | |
739 | "" | |
740 | { | |
741 | rtx tmpreg = gen_reg_rtx (<MODE>mode); | |
742 | emit_insn (gen_bitrev<mode>2 (tmpreg, operands[1])); | |
743 | emit_insn (gen_clz<mode>2 (operands[0], tmpreg)); | |
744 | DONE; | |
745 | }) | |
746 | ||
747 | ;; Shifts | |
748 | ||
749 | (define_insn "ashl<mode>3" | |
750 | [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") | |
751 | (ashift:SDIM (match_operand:SDIM 1 "nvptx_register_operand" "R") | |
752 | (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")))] | |
753 | "" | |
754 | "%.\\tshl.b%T0\\t%0, %1, %2;") | |
755 | ||
756 | (define_insn "ashr<mode>3" | |
757 | [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") | |
758 | (ashiftrt:SDIM (match_operand:SDIM 1 "nvptx_register_operand" "R") | |
759 | (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")))] | |
760 | "" | |
761 | "%.\\tshr.s%T0\\t%0, %1, %2;") | |
762 | ||
763 | (define_insn "lshr<mode>3" | |
764 | [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") | |
765 | (lshiftrt:SDIM (match_operand:SDIM 1 "nvptx_register_operand" "R") | |
766 | (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")))] | |
767 | "" | |
768 | "%.\\tshr.u%T0\\t%0, %1, %2;") | |
769 | ||
770 | ;; Logical operations | |
771 | ||
772 | (define_insn "and<mode>3" | |
773 | [(set (match_operand:BHSDIM 0 "nvptx_register_operand" "=R") | |
774 | (and:BHSDIM (match_operand:BHSDIM 1 "nvptx_register_operand" "R") | |
775 | (match_operand:BHSDIM 2 "nvptx_nonmemory_operand" "Ri")))] | |
776 | "" | |
777 | "%.\\tand.b%T0\\t%0, %1, %2;") | |
778 | ||
779 | (define_insn "ior<mode>3" | |
780 | [(set (match_operand:BHSDIM 0 "nvptx_register_operand" "=R") | |
781 | (ior:BHSDIM (match_operand:BHSDIM 1 "nvptx_register_operand" "R") | |
782 | (match_operand:BHSDIM 2 "nvptx_nonmemory_operand" "Ri")))] | |
783 | "" | |
784 | "%.\\tor.b%T0\\t%0, %1, %2;") | |
785 | ||
786 | (define_insn "xor<mode>3" | |
787 | [(set (match_operand:BHSDIM 0 "nvptx_register_operand" "=R") | |
788 | (xor:BHSDIM (match_operand:BHSDIM 1 "nvptx_register_operand" "R") | |
789 | (match_operand:BHSDIM 2 "nvptx_nonmemory_operand" "Ri")))] | |
790 | "" | |
791 | "%.\\txor.b%T0\\t%0, %1, %2;") | |
792 | ||
793 | ;; Comparisons and branches | |
794 | ||
795 | (define_insn "*cmp<mode>" | |
796 | [(set (match_operand:BI 0 "nvptx_register_operand" "=R") | |
797 | (match_operator:BI 1 "nvptx_comparison_operator" | |
798 | [(match_operand:HSDIM 2 "nvptx_register_operand" "R") | |
799 | (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))] | |
800 | "" | |
b27697ca | 801 | "%.\\tsetp%c1\\t%0, %2, %3;") |
8ce80784 | 802 | |
803 | (define_insn "*cmp<mode>" | |
804 | [(set (match_operand:BI 0 "nvptx_register_operand" "=R") | |
805 | (match_operator:BI 1 "nvptx_float_comparison_operator" | |
806 | [(match_operand:SDFM 2 "nvptx_register_operand" "R") | |
807 | (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))] | |
808 | "" | |
b27697ca | 809 | "%.\\tsetp%c1\\t%0, %2, %3;") |
8ce80784 | 810 | |
811 | (define_insn "jump" | |
812 | [(set (pc) | |
813 | (label_ref (match_operand 0 "" "")))] | |
814 | "" | |
815 | "%.\\tbra\\t%l0;") | |
816 | ||
817 | (define_insn "br_true" | |
818 | [(set (pc) | |
819 | (if_then_else (ne (match_operand:BI 0 "nvptx_register_operand" "R") | |
820 | (const_int 0)) | |
821 | (label_ref (match_operand 1 "" "")) | |
822 | (pc)))] | |
823 | "" | |
824 | "%j0\\tbra\\t%l1;") | |
825 | ||
826 | (define_insn "br_false" | |
827 | [(set (pc) | |
828 | (if_then_else (eq (match_operand:BI 0 "nvptx_register_operand" "R") | |
829 | (const_int 0)) | |
830 | (label_ref (match_operand 1 "" "")) | |
831 | (pc)))] | |
832 | "" | |
833 | "%J0\\tbra\\t%l1;") | |
834 | ||
b3787ae4 | 835 | ;; unified conditional branch |
836 | (define_insn "br_true_uni" | |
837 | [(set (pc) (if_then_else | |
838 | (ne (unspec:BI [(match_operand:BI 0 "nvptx_register_operand" "R")] | |
839 | UNSPEC_BR_UNIFIED) (const_int 0)) | |
840 | (label_ref (match_operand 1 "" "")) (pc)))] | |
841 | "" | |
842 | "%j0\\tbra.uni\\t%l1;") | |
843 | ||
844 | (define_insn "br_false_uni" | |
845 | [(set (pc) (if_then_else | |
846 | (eq (unspec:BI [(match_operand:BI 0 "nvptx_register_operand" "R")] | |
847 | UNSPEC_BR_UNIFIED) (const_int 0)) | |
848 | (label_ref (match_operand 1 "" "")) (pc)))] | |
849 | "" | |
850 | "%J0\\tbra.uni\\t%l1;") | |
851 | ||
8ce80784 | 852 | (define_expand "cbranch<mode>4" |
853 | [(set (pc) | |
854 | (if_then_else (match_operator 0 "nvptx_comparison_operator" | |
855 | [(match_operand:HSDIM 1 "nvptx_register_operand" "") | |
856 | (match_operand:HSDIM 2 "nvptx_register_operand" "")]) | |
857 | (label_ref (match_operand 3 "" "")) | |
858 | (pc)))] | |
859 | "" | |
860 | { | |
861 | rtx t = nvptx_expand_compare (operands[0]); | |
862 | operands[0] = t; | |
863 | operands[1] = XEXP (t, 0); | |
864 | operands[2] = XEXP (t, 1); | |
865 | }) | |
866 | ||
867 | (define_expand "cbranch<mode>4" | |
868 | [(set (pc) | |
869 | (if_then_else (match_operator 0 "nvptx_float_comparison_operator" | |
870 | [(match_operand:SDFM 1 "nvptx_register_operand" "") | |
871 | (match_operand:SDFM 2 "nvptx_register_operand" "")]) | |
872 | (label_ref (match_operand 3 "" "")) | |
873 | (pc)))] | |
874 | "" | |
875 | { | |
876 | rtx t = nvptx_expand_compare (operands[0]); | |
877 | operands[0] = t; | |
878 | operands[1] = XEXP (t, 0); | |
879 | operands[2] = XEXP (t, 1); | |
880 | }) | |
881 | ||
882 | (define_expand "cbranchbi4" | |
883 | [(set (pc) | |
884 | (if_then_else (match_operator 0 "predicate_operator" | |
885 | [(match_operand:BI 1 "nvptx_register_operand" "") | |
886 | (match_operand:BI 2 "const0_operand" "")]) | |
887 | (label_ref (match_operand 3 "" "")) | |
888 | (pc)))] | |
889 | "" | |
890 | "") | |
891 | ||
892 | ;; Conditional stores | |
893 | ||
894 | (define_insn "setcc_from_bi" | |
895 | [(set (match_operand:SI 0 "nvptx_register_operand" "=R") | |
896 | (ne:SI (match_operand:BI 1 "nvptx_register_operand" "R") | |
897 | (const_int 0)))] | |
898 | "" | |
899 | "%.\\tselp%t0 %0,-1,0,%1;") | |
900 | ||
75e09431 | 901 | (define_insn "sel_true<mode>" |
902 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
903 | (if_then_else:HSDIM | |
904 | (ne (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0)) | |
905 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri") | |
906 | (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))] | |
907 | "" | |
908 | "%.\\tselp%t0\\t%0, %2, %3, %1;") | |
909 | ||
910 | (define_insn "sel_true<mode>" | |
911 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
912 | (if_then_else:SDFM | |
913 | (ne (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0)) | |
914 | (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF") | |
915 | (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))] | |
916 | "" | |
917 | "%.\\tselp%t0\\t%0, %2, %3, %1;") | |
918 | ||
919 | (define_insn "sel_false<mode>" | |
920 | [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") | |
921 | (if_then_else:HSDIM | |
922 | (eq (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0)) | |
923 | (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri") | |
924 | (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))] | |
925 | "" | |
926 | "%.\\tselp%t0\\t%0, %3, %2, %1;") | |
927 | ||
928 | (define_insn "sel_false<mode>" | |
929 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
930 | (if_then_else:SDFM | |
931 | (eq (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0)) | |
932 | (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF") | |
933 | (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))] | |
934 | "" | |
935 | "%.\\tselp%t0\\t%0, %3, %2, %1;") | |
936 | ||
8ce80784 | 937 | (define_insn "setcc_int<mode>" |
938 | [(set (match_operand:SI 0 "nvptx_register_operand" "=R") | |
939 | (match_operator:SI 1 "nvptx_comparison_operator" | |
75e09431 | 940 | [(match_operand:HSDIM 2 "nvptx_register_operand" "R") |
941 | (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))] | |
8ce80784 | 942 | "" |
b27697ca | 943 | "%.\\tset%t0%c1\\t%0, %2, %3;") |
8ce80784 | 944 | |
945 | (define_insn "setcc_int<mode>" | |
946 | [(set (match_operand:SI 0 "nvptx_register_operand" "=R") | |
947 | (match_operator:SI 1 "nvptx_float_comparison_operator" | |
75e09431 | 948 | [(match_operand:SDFM 2 "nvptx_register_operand" "R") |
949 | (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))] | |
8ce80784 | 950 | "" |
b27697ca | 951 | "%.\\tset%t0%c1\\t%0, %2, %3;") |
8ce80784 | 952 | |
953 | (define_insn "setcc_float<mode>" | |
954 | [(set (match_operand:SF 0 "nvptx_register_operand" "=R") | |
955 | (match_operator:SF 1 "nvptx_comparison_operator" | |
75e09431 | 956 | [(match_operand:HSDIM 2 "nvptx_register_operand" "R") |
957 | (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))] | |
8ce80784 | 958 | "" |
b27697ca | 959 | "%.\\tset%t0%c1\\t%0, %2, %3;") |
8ce80784 | 960 | |
961 | (define_insn "setcc_float<mode>" | |
962 | [(set (match_operand:SF 0 "nvptx_register_operand" "=R") | |
963 | (match_operator:SF 1 "nvptx_float_comparison_operator" | |
75e09431 | 964 | [(match_operand:SDFM 2 "nvptx_register_operand" "R") |
965 | (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))] | |
8ce80784 | 966 | "" |
b27697ca | 967 | "%.\\tset%t0%c1\\t%0, %2, %3;") |
8ce80784 | 968 | |
969 | (define_expand "cstorebi4" | |
970 | [(set (match_operand:SI 0 "nvptx_register_operand") | |
971 | (match_operator:SI 1 "ne_operator" | |
972 | [(match_operand:BI 2 "nvptx_register_operand") | |
973 | (match_operand:BI 3 "const0_operand")]))] | |
974 | "" | |
975 | "") | |
976 | ||
977 | (define_expand "cstore<mode>4" | |
978 | [(set (match_operand:SI 0 "nvptx_register_operand") | |
979 | (match_operator:SI 1 "nvptx_comparison_operator" | |
980 | [(match_operand:HSDIM 2 "nvptx_register_operand") | |
981 | (match_operand:HSDIM 3 "nvptx_nonmemory_operand")]))] | |
982 | "" | |
983 | "") | |
984 | ||
985 | (define_expand "cstore<mode>4" | |
986 | [(set (match_operand:SI 0 "nvptx_register_operand") | |
987 | (match_operator:SI 1 "nvptx_float_comparison_operator" | |
988 | [(match_operand:SDFM 2 "nvptx_register_operand") | |
989 | (match_operand:SDFM 3 "nvptx_nonmemory_operand")]))] | |
990 | "" | |
991 | "") | |
992 | ||
993 | ;; Calls | |
994 | ||
995 | (define_insn "call_insn" | |
996 | [(match_parallel 2 "call_operation" | |
997 | [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "Rs")) | |
998 | (match_operand 1))])] | |
999 | "" | |
1000 | { | |
1001 | return nvptx_output_call_insn (insn, NULL_RTX, operands[0]); | |
1002 | }) | |
1003 | ||
1004 | (define_insn "call_value_insn" | |
1005 | [(match_parallel 3 "call_operation" | |
1006 | [(set (match_operand 0 "nvptx_register_operand" "=R") | |
1007 | (call (mem:QI (match_operand:SI 1 "call_insn_operand" "Rs")) | |
1008 | (match_operand 2)))])] | |
1009 | "" | |
1010 | { | |
1011 | return nvptx_output_call_insn (insn, operands[0], operands[1]); | |
1012 | }) | |
1013 | ||
1014 | (define_expand "call" | |
1015 | [(match_operand 0 "" "")] | |
1016 | "" | |
1017 | { | |
1018 | nvptx_expand_call (NULL_RTX, operands[0]); | |
1019 | DONE; | |
1020 | }) | |
1021 | ||
1022 | (define_expand "call_value" | |
1023 | [(match_operand 0 "" "") | |
1024 | (match_operand 1 "" "")] | |
1025 | "" | |
1026 | { | |
1027 | nvptx_expand_call (operands[0], operands[1]); | |
1028 | DONE; | |
1029 | }) | |
1030 | ||
1031 | ;; Floating point arithmetic. | |
1032 | ||
1033 | (define_insn "add<mode>3" | |
1034 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
1035 | (plus:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R") | |
1036 | (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))] | |
1037 | "" | |
1038 | "%.\\tadd%t0\\t%0, %1, %2;") | |
1039 | ||
1040 | (define_insn "sub<mode>3" | |
1041 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
1042 | (minus:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R") | |
1043 | (match_operand:SDFM 2 "nvptx_register_operand" "R")))] | |
1044 | "" | |
1045 | "%.\\tsub%t0\\t%0, %1, %2;") | |
1046 | ||
1047 | (define_insn "mul<mode>3" | |
1048 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
1049 | (mult:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R") | |
1050 | (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))] | |
1051 | "" | |
1052 | "%.\\tmul%t0\\t%0, %1, %2;") | |
1053 | ||
1054 | (define_insn "fma<mode>4" | |
1055 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
1056 | (fma:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R") | |
1057 | (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF") | |
1058 | (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))] | |
1059 | "" | |
1060 | "%.\\tfma%#%t0\\t%0, %1, %2, %3;") | |
1061 | ||
1062 | (define_insn "div<mode>3" | |
1063 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
1064 | (div:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R") | |
1065 | (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))] | |
1066 | "" | |
1067 | "%.\\tdiv%#%t0\\t%0, %1, %2;") | |
1068 | ||
1069 | (define_insn "copysign<mode>3" | |
1070 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
1071 | (unspec:SDFM [(match_operand:SDFM 1 "nvptx_register_operand" "R") | |
1072 | (match_operand:SDFM 2 "nvptx_register_operand" "R")] | |
1073 | UNSPEC_COPYSIGN))] | |
1074 | "" | |
1075 | "%.\\tcopysign%t0\\t%0, %2, %1;") | |
1076 | ||
1077 | (define_insn "smin<mode>3" | |
1078 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
1079 | (smin:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R") | |
1080 | (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))] | |
1081 | "" | |
1082 | "%.\\tmin%t0\\t%0, %1, %2;") | |
1083 | ||
1084 | (define_insn "smax<mode>3" | |
1085 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
1086 | (smax:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R") | |
1087 | (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))] | |
1088 | "" | |
1089 | "%.\\tmax%t0\\t%0, %1, %2;") | |
1090 | ||
1091 | (define_insn "abs<mode>2" | |
1092 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
1093 | (abs:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")))] | |
1094 | "" | |
1095 | "%.\\tabs%t0\\t%0, %1;") | |
1096 | ||
1097 | (define_insn "neg<mode>2" | |
1098 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
1099 | (neg:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")))] | |
1100 | "" | |
1101 | "%.\\tneg%t0\\t%0, %1;") | |
1102 | ||
1103 | (define_insn "sqrt<mode>2" | |
1104 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
1105 | (sqrt:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")))] | |
1106 | "" | |
1107 | "%.\\tsqrt%#%t0\\t%0, %1;") | |
1108 | ||
1109 | (define_insn "sinsf2" | |
1110 | [(set (match_operand:SF 0 "nvptx_register_operand" "=R") | |
1111 | (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")] | |
1112 | UNSPEC_SIN))] | |
1113 | "flag_unsafe_math_optimizations" | |
1114 | "%.\\tsin.approx%t0\\t%0, %1;") | |
1115 | ||
1116 | (define_insn "cossf2" | |
1117 | [(set (match_operand:SF 0 "nvptx_register_operand" "=R") | |
1118 | (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")] | |
1119 | UNSPEC_COS))] | |
1120 | "flag_unsafe_math_optimizations" | |
1121 | "%.\\tcos.approx%t0\\t%0, %1;") | |
1122 | ||
1123 | (define_insn "log2sf2" | |
1124 | [(set (match_operand:SF 0 "nvptx_register_operand" "=R") | |
1125 | (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")] | |
1126 | UNSPEC_LOG2))] | |
1127 | "flag_unsafe_math_optimizations" | |
1128 | "%.\\tlg2.approx%t0\\t%0, %1;") | |
1129 | ||
1130 | (define_insn "exp2sf2" | |
1131 | [(set (match_operand:SF 0 "nvptx_register_operand" "=R") | |
1132 | (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")] | |
1133 | UNSPEC_EXP2))] | |
1134 | "flag_unsafe_math_optimizations" | |
1135 | "%.\\tex2.approx%t0\\t%0, %1;") | |
1136 | ||
1137 | ;; Conversions involving floating point | |
1138 | ||
1139 | (define_insn "extendsfdf2" | |
1140 | [(set (match_operand:DF 0 "nvptx_register_operand" "=R") | |
1141 | (float_extend:DF (match_operand:SF 1 "nvptx_register_operand" "R")))] | |
1142 | "" | |
1143 | "%.\\tcvt%t0%t1\\t%0, %1;") | |
1144 | ||
1145 | (define_insn "truncdfsf2" | |
1146 | [(set (match_operand:SF 0 "nvptx_register_operand" "=R") | |
1147 | (float_truncate:SF (match_operand:DF 1 "nvptx_register_operand" "R")))] | |
1148 | "" | |
1149 | "%.\\tcvt%#%t0%t1\\t%0, %1;") | |
1150 | ||
1151 | (define_insn "floatunssi<mode>2" | |
1152 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
1153 | (unsigned_float:SDFM (match_operand:SI 1 "nvptx_register_operand" "R")))] | |
1154 | "" | |
1155 | "%.\\tcvt%#%t0.u%T1\\t%0, %1;") | |
1156 | ||
1157 | (define_insn "floatsi<mode>2" | |
1158 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
1159 | (float:SDFM (match_operand:SI 1 "nvptx_register_operand" "R")))] | |
1160 | "" | |
1161 | "%.\\tcvt%#%t0.s%T1\\t%0, %1;") | |
1162 | ||
1163 | (define_insn "floatunsdi<mode>2" | |
1164 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
1165 | (unsigned_float:SDFM (match_operand:DI 1 "nvptx_register_operand" "R")))] | |
1166 | "" | |
1167 | "%.\\tcvt%#%t0.u%T1\\t%0, %1;") | |
1168 | ||
1169 | (define_insn "floatdi<mode>2" | |
1170 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
1171 | (float:SDFM (match_operand:DI 1 "nvptx_register_operand" "R")))] | |
1172 | "" | |
1173 | "%.\\tcvt%#%t0.s%T1\\t%0, %1;") | |
1174 | ||
1175 | (define_insn "fixuns_trunc<mode>si2" | |
1176 | [(set (match_operand:SI 0 "nvptx_register_operand" "=R") | |
1177 | (unsigned_fix:SI (match_operand:SDFM 1 "nvptx_register_operand" "R")))] | |
1178 | "" | |
1179 | "%.\\tcvt.rzi.u%T0%t1\\t%0, %1;") | |
1180 | ||
1181 | (define_insn "fix_trunc<mode>si2" | |
1182 | [(set (match_operand:SI 0 "nvptx_register_operand" "=R") | |
1183 | (fix:SI (match_operand:SDFM 1 "nvptx_register_operand" "R")))] | |
1184 | "" | |
1185 | "%.\\tcvt.rzi.s%T0%t1\\t%0, %1;") | |
1186 | ||
1187 | (define_insn "fixuns_trunc<mode>di2" | |
1188 | [(set (match_operand:DI 0 "nvptx_register_operand" "=R") | |
1189 | (unsigned_fix:DI (match_operand:SDFM 1 "nvptx_register_operand" "R")))] | |
1190 | "" | |
1191 | "%.\\tcvt.rzi.u%T0%t1\\t%0, %1;") | |
1192 | ||
1193 | (define_insn "fix_trunc<mode>di2" | |
1194 | [(set (match_operand:DI 0 "nvptx_register_operand" "=R") | |
1195 | (fix:DI (match_operand:SDFM 1 "nvptx_register_operand" "R")))] | |
1196 | "" | |
1197 | "%.\\tcvt.rzi.s%T0%t1\\t%0, %1;") | |
1198 | ||
1199 | (define_int_iterator FPINT [UNSPEC_FPINT_FLOOR UNSPEC_FPINT_BTRUNC | |
1200 | UNSPEC_FPINT_CEIL UNSPEC_FPINT_NEARBYINT]) | |
1201 | (define_int_attr fpint_name [(UNSPEC_FPINT_FLOOR "floor") | |
1202 | (UNSPEC_FPINT_BTRUNC "btrunc") | |
1203 | (UNSPEC_FPINT_CEIL "ceil") | |
1204 | (UNSPEC_FPINT_NEARBYINT "nearbyint")]) | |
1205 | (define_int_attr fpint_roundingmode [(UNSPEC_FPINT_FLOOR ".rmi") | |
1206 | (UNSPEC_FPINT_BTRUNC ".rzi") | |
1207 | (UNSPEC_FPINT_CEIL ".rpi") | |
1208 | (UNSPEC_FPINT_NEARBYINT "%#i")]) | |
1209 | ||
1210 | (define_insn "<FPINT:fpint_name><SDFM:mode>2" | |
1211 | [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") | |
1212 | (unspec:SDFM [(match_operand:SDFM 1 "nvptx_register_operand" "R")] | |
1213 | FPINT))] | |
1214 | "" | |
1215 | "%.\\tcvt<FPINT:fpint_roundingmode>%t0%t1\\t%0, %1;") | |
1216 | ||
1217 | (define_int_iterator FPINT2 [UNSPEC_FPINT_FLOOR UNSPEC_FPINT_CEIL]) | |
1218 | (define_int_attr fpint2_name [(UNSPEC_FPINT_FLOOR "lfloor") | |
1219 | (UNSPEC_FPINT_CEIL "lceil")]) | |
1220 | (define_int_attr fpint2_roundingmode [(UNSPEC_FPINT_FLOOR ".rmi") | |
1221 | (UNSPEC_FPINT_CEIL ".rpi")]) | |
1222 | ||
1223 | (define_insn "<FPINT2:fpint2_name><SDFM:mode><SDIM:mode>2" | |
1224 | [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") | |
1225 | (unspec:SDIM [(match_operand:SDFM 1 "nvptx_register_operand" "R")] | |
1226 | FPINT2))] | |
1227 | "" | |
1228 | "%.\\tcvt<FPINT2:fpint2_roundingmode>.s%T0%t1\\t%0, %1;") | |
1229 | ||
1230 | ;; Miscellaneous | |
1231 | ||
1232 | (define_insn "nop" | |
1233 | [(const_int 0)] | |
1234 | "" | |
1235 | "") | |
1236 | ||
1237 | (define_insn "return" | |
1238 | [(return)] | |
1239 | "" | |
1240 | { | |
1241 | return nvptx_output_return (); | |
1242 | }) | |
1243 | ||
1244 | (define_expand "epilogue" | |
1245 | [(clobber (const_int 0))] | |
1246 | "" | |
1247 | { | |
1248 | emit_jump_insn (gen_return ()); | |
1249 | DONE; | |
1250 | }) | |
1251 | ||
1252 | (define_expand "nonlocal_goto" | |
1253 | [(match_operand 0 "" "") | |
1254 | (match_operand 1 "" "") | |
1255 | (match_operand 2 "" "") | |
1256 | (match_operand 3 "" "")] | |
1257 | "" | |
1258 | { | |
1259 | sorry ("target cannot support nonlocal goto."); | |
1260 | emit_insn (gen_nop ()); | |
1261 | DONE; | |
1262 | }) | |
1263 | ||
1264 | (define_expand "nonlocal_goto_receiver" | |
1265 | [(const_int 0)] | |
1266 | "" | |
1267 | { | |
1268 | sorry ("target cannot support nonlocal goto."); | |
1269 | }) | |
1270 | ||
ed20400d | 1271 | (define_expand "allocate_stack" |
1272 | [(match_operand 0 "nvptx_register_operand") | |
1273 | (match_operand 1 "nvptx_register_operand")] | |
1274 | "" | |
1275 | { | |
f289122f | 1276 | /* The ptx documentation specifies an alloca intrinsic (for 32 bit |
1277 | only) but notes it is not implemented. The assembler emits a | |
1278 | confused error message. Issue a blunt one now instead. */ | |
1279 | sorry ("target cannot support alloca."); | |
1280 | emit_insn (gen_nop ()); | |
1281 | DONE; | |
ed20400d | 1282 | if (TARGET_ABI64) |
1283 | emit_insn (gen_allocate_stack_di (operands[0], operands[1])); | |
1284 | else | |
1285 | emit_insn (gen_allocate_stack_si (operands[0], operands[1])); | |
1286 | DONE; | |
1287 | }) | |
1288 | ||
1289 | (define_insn "allocate_stack_<mode>" | |
1290 | [(set (match_operand:P 0 "nvptx_register_operand" "=R") | |
1291 | (unspec:P [(match_operand:P 1 "nvptx_register_operand" "R")] | |
1292 | UNSPEC_ALLOCA))] | |
8ce80784 | 1293 | "" |
1294 | "%.\\tcall (%0), %%alloca, (%1);") | |
1295 | ||
1296 | (define_expand "restore_stack_block" | |
1297 | [(match_operand 0 "register_operand" "") | |
1298 | (match_operand 1 "register_operand" "")] | |
1299 | "" | |
1300 | { | |
1301 | DONE; | |
1302 | }) | |
1303 | ||
1304 | (define_expand "restore_stack_function" | |
1305 | [(match_operand 0 "register_operand" "") | |
1306 | (match_operand 1 "register_operand" "")] | |
1307 | "" | |
1308 | { | |
1309 | DONE; | |
1310 | }) | |
1311 | ||
1312 | (define_insn "trap" | |
1313 | [(trap_if (const_int 1) (const_int 0))] | |
1314 | "" | |
1315 | "trap;") | |
1316 | ||
1317 | (define_insn "trap_if_true" | |
1318 | [(trap_if (ne (match_operand:BI 0 "nvptx_register_operand" "R") | |
1319 | (const_int 0)) | |
1320 | (const_int 0))] | |
1321 | "" | |
1322 | "%j0 trap;") | |
1323 | ||
1324 | (define_insn "trap_if_false" | |
1325 | [(trap_if (eq (match_operand:BI 0 "nvptx_register_operand" "R") | |
1326 | (const_int 0)) | |
1327 | (const_int 0))] | |
1328 | "" | |
1329 | "%J0 trap;") | |
1330 | ||
1331 | (define_expand "ctrap<mode>4" | |
1332 | [(trap_if (match_operator 0 "nvptx_comparison_operator" | |
1333 | [(match_operand:SDIM 1 "nvptx_register_operand") | |
1334 | (match_operand:SDIM 2 "nvptx_nonmemory_operand")]) | |
1335 | (match_operand 3 "const_0_operand"))] | |
1336 | "" | |
1337 | { | |
1338 | rtx t = nvptx_expand_compare (operands[0]); | |
1339 | emit_insn (gen_trap_if_true (t)); | |
1340 | DONE; | |
1341 | }) | |
1342 | ||
b3787ae4 | 1343 | (define_insn "oacc_dim_size" |
1344 | [(set (match_operand:SI 0 "nvptx_register_operand" "") | |
1345 | (unspec:SI [(match_operand:SI 1 "const_int_operand" "")] | |
1346 | UNSPEC_DIM_SIZE))] | |
8ce80784 | 1347 | "" |
b3787ae4 | 1348 | { |
1349 | static const char *const asms[] = | |
1350 | { /* Must match oacc_loop_levels ordering. */ | |
1351 | "%.\\tmov.u32\\t%0, %%nctaid.x;", /* gang */ | |
1352 | "%.\\tmov.u32\\t%0, %%ntid.y;", /* worker */ | |
1353 | "%.\\tmov.u32\\t%0, %%ntid.x;", /* vector */ | |
1354 | }; | |
1355 | return asms[INTVAL (operands[1])]; | |
1356 | }) | |
8ce80784 | 1357 | |
b3787ae4 | 1358 | (define_insn "oacc_dim_pos" |
8ce80784 | 1359 | [(set (match_operand:SI 0 "nvptx_register_operand" "") |
b3787ae4 | 1360 | (unspec_volatile:SI [(match_operand:SI 1 "const_int_operand" "")] |
1361 | UNSPECV_DIM_POS))] | |
8ce80784 | 1362 | "" |
1363 | { | |
b3787ae4 | 1364 | static const char *const asms[] = |
1365 | { /* Must match oacc_loop_levels ordering. */ | |
1366 | "%.\\tmov.u32\\t%0, %%ctaid.x;", /* gang */ | |
1367 | "%.\\tmov.u32\\t%0, %%tid.y;", /* worker */ | |
1368 | "%.\\tmov.u32\\t%0, %%tid.x;", /* vector */ | |
1369 | }; | |
1370 | return asms[INTVAL (operands[1])]; | |
8ce80784 | 1371 | }) |
1372 | ||
b3787ae4 | 1373 | (define_insn "nvptx_fork" |
1374 | [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")] | |
1375 | UNSPECV_FORK)] | |
8ce80784 | 1376 | "" |
b3787ae4 | 1377 | "// fork %0;" |
1378 | ) | |
8ce80784 | 1379 | |
b3787ae4 | 1380 | (define_insn "nvptx_forked" |
1381 | [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")] | |
1382 | UNSPECV_FORKED)] | |
1383 | "" | |
1384 | "// forked %0;" | |
1385 | ) | |
1386 | ||
1387 | (define_insn "nvptx_joining" | |
1388 | [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")] | |
1389 | UNSPECV_JOINING)] | |
1390 | "" | |
1391 | "// joining %0;" | |
1392 | ) | |
1393 | ||
1394 | (define_insn "nvptx_join" | |
1395 | [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")] | |
1396 | UNSPECV_JOIN)] | |
1397 | "" | |
1398 | "// join %0;" | |
1399 | ) | |
1400 | ||
1401 | (define_expand "oacc_fork" | |
1402 | [(set (match_operand:SI 0 "nvptx_nonmemory_operand" "") | |
1403 | (match_operand:SI 1 "nvptx_general_operand" "")) | |
1404 | (unspec_volatile:SI [(match_operand:SI 2 "const_int_operand" "")] | |
1405 | UNSPECV_FORKED)] | |
8ce80784 | 1406 | "" |
1407 | { | |
b3787ae4 | 1408 | if (operands[0] != const0_rtx) |
1409 | emit_move_insn (operands[0], operands[1]); | |
1410 | nvptx_expand_oacc_fork (INTVAL (operands[2])); | |
1411 | DONE; | |
1412 | }) | |
1413 | ||
1414 | (define_expand "oacc_join" | |
1415 | [(set (match_operand:SI 0 "nvptx_nonmemory_operand" "") | |
1416 | (match_operand:SI 1 "nvptx_general_operand" "")) | |
1417 | (unspec_volatile:SI [(match_operand:SI 2 "const_int_operand" "")] | |
1418 | UNSPECV_JOIN)] | |
1419 | "" | |
1420 | { | |
1421 | if (operands[0] != const0_rtx) | |
1422 | emit_move_insn (operands[0], operands[1]); | |
1423 | nvptx_expand_oacc_join (INTVAL (operands[2])); | |
1424 | DONE; | |
8ce80784 | 1425 | }) |
1426 | ||
b3787ae4 | 1427 | ;; only 32-bit shuffles exist. |
1428 | (define_insn "nvptx_shuffle<mode>" | |
1429 | [(set (match_operand:BITS 0 "nvptx_register_operand" "=R") | |
1430 | (unspec:BITS | |
1431 | [(match_operand:BITS 1 "nvptx_register_operand" "R") | |
1432 | (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri") | |
1433 | (match_operand:SI 3 "const_int_operand" "n")] | |
1434 | UNSPEC_SHUFFLE))] | |
1435 | "" | |
1436 | "%.\\tshfl%S3.b32\\t%0, %1, %2, 31;") | |
1437 | ||
1438 | ;; extract parts of a 64 bit object into 2 32-bit ints | |
1439 | (define_insn "unpack<mode>si2" | |
1440 | [(set (match_operand:SI 0 "nvptx_register_operand" "=R") | |
1441 | (unspec:SI [(match_operand:BITD 2 "nvptx_register_operand" "R") | |
1442 | (const_int 0)] UNSPEC_BIT_CONV)) | |
1443 | (set (match_operand:SI 1 "nvptx_register_operand" "=R") | |
1444 | (unspec:SI [(match_dup 2) (const_int 1)] UNSPEC_BIT_CONV))] | |
1445 | "" | |
1446 | "%.\\tmov.b64\\t{%0,%1}, %2;") | |
1447 | ||
1448 | ;; pack 2 32-bit ints into a 64 bit object | |
1449 | (define_insn "packsi<mode>2" | |
1450 | [(set (match_operand:BITD 0 "nvptx_register_operand" "=R") | |
1451 | (unspec:BITD [(match_operand:SI 1 "nvptx_register_operand" "R") | |
1452 | (match_operand:SI 2 "nvptx_register_operand" "R")] | |
1453 | UNSPEC_BIT_CONV))] | |
1454 | "" | |
1455 | "%.\\tmov.b64\\t%0, {%1,%2};") | |
1456 | ||
1457 | (define_insn "worker_load<mode>" | |
1458 | [(set (match_operand:SDISDFM 0 "nvptx_register_operand" "=R") | |
1459 | (unspec:SDISDFM [(match_operand:SDISDFM 1 "memory_operand" "m")] | |
1460 | UNSPEC_SHARED_DATA))] | |
1461 | "" | |
1462 | "%.\\tld.shared%u0\\t%0, %1;") | |
1463 | ||
1464 | (define_insn "worker_store<mode>" | |
1465 | [(set (unspec:SDISDFM [(match_operand:SDISDFM 1 "memory_operand" "=m")] | |
1466 | UNSPEC_SHARED_DATA) | |
1467 | (match_operand:SDISDFM 0 "nvptx_register_operand" "R"))] | |
1468 | "" | |
1469 | "%.\\tst.shared%u1\\t%1, %0;") | |
1470 | ||
8ce80784 | 1471 | ;; Atomic insns. |
1472 | ||
1473 | (define_expand "atomic_compare_and_swap<mode>" | |
1474 | [(match_operand:SI 0 "nvptx_register_operand") ;; bool success output | |
1475 | (match_operand:SDIM 1 "nvptx_register_operand") ;; oldval output | |
1476 | (match_operand:SDIM 2 "memory_operand") ;; memory | |
1477 | (match_operand:SDIM 3 "nvptx_register_operand") ;; expected input | |
1478 | (match_operand:SDIM 4 "nvptx_register_operand") ;; newval input | |
1479 | (match_operand:SI 5 "const_int_operand") ;; is_weak | |
1480 | (match_operand:SI 6 "const_int_operand") ;; success model | |
1481 | (match_operand:SI 7 "const_int_operand")] ;; failure model | |
1482 | "" | |
1483 | { | |
c68f6b1c | 1484 | emit_insn (gen_atomic_compare_and_swap<mode>_1 |
1485 | (operands[1], operands[2], operands[3], operands[4], operands[6])); | |
1486 | ||
1487 | rtx cond = gen_reg_rtx (BImode); | |
1488 | emit_move_insn (cond, gen_rtx_EQ (BImode, operands[1], operands[3])); | |
1489 | emit_insn (gen_sel_truesi (operands[0], cond, GEN_INT (1), GEN_INT (0))); | |
8ce80784 | 1490 | DONE; |
1491 | }) | |
1492 | ||
1493 | (define_insn "atomic_compare_and_swap<mode>_1" | |
1494 | [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") | |
1495 | (unspec_volatile:SDIM | |
1496 | [(match_operand:SDIM 1 "memory_operand" "+m") | |
1497 | (match_operand:SDIM 2 "nvptx_register_operand" "R") | |
1498 | (match_operand:SDIM 3 "nvptx_register_operand" "R") | |
1499 | (match_operand:SI 4 "const_int_operand")] | |
1500 | UNSPECV_CAS)) | |
1501 | (set (match_dup 1) | |
1502 | (unspec_volatile:SDIM [(const_int 0)] UNSPECV_CAS))] | |
1503 | "" | |
1504 | "%.\\tatom%A1.cas.b%T0\\t%0, %1, %2, %3;") | |
1505 | ||
1506 | (define_insn "atomic_exchange<mode>" | |
1507 | [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") ;; output | |
1508 | (unspec_volatile:SDIM | |
1509 | [(match_operand:SDIM 1 "memory_operand" "+m") ;; memory | |
1510 | (match_operand:SI 3 "const_int_operand")] ;; model | |
1511 | UNSPECV_XCHG)) | |
1512 | (set (match_dup 1) | |
1513 | (match_operand:SDIM 2 "nvptx_register_operand" "R"))] ;; input | |
1514 | "" | |
1515 | "%.\\tatom%A1.exch.b%T0\\t%0, %1, %2;") | |
1516 | ||
1517 | (define_insn "atomic_fetch_add<mode>" | |
1518 | [(set (match_operand:SDIM 1 "memory_operand" "+m") | |
1519 | (unspec_volatile:SDIM | |
1520 | [(plus:SDIM (match_dup 1) | |
1521 | (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri")) | |
1522 | (match_operand:SI 3 "const_int_operand")] ;; model | |
1523 | UNSPECV_LOCK)) | |
1524 | (set (match_operand:SDIM 0 "nvptx_register_operand" "=R") | |
1525 | (match_dup 1))] | |
1526 | "" | |
1527 | "%.\\tatom%A1.add%t0\\t%0, %1, %2;") | |
1528 | ||
1529 | (define_insn "atomic_fetch_addsf" | |
1530 | [(set (match_operand:SF 1 "memory_operand" "+m") | |
1531 | (unspec_volatile:SF | |
1532 | [(plus:SF (match_dup 1) | |
1533 | (match_operand:SF 2 "nvptx_nonmemory_operand" "RF")) | |
1534 | (match_operand:SI 3 "const_int_operand")] ;; model | |
1535 | UNSPECV_LOCK)) | |
1536 | (set (match_operand:SF 0 "nvptx_register_operand" "=R") | |
1537 | (match_dup 1))] | |
1538 | "" | |
1539 | "%.\\tatom%A1.add%t0\\t%0, %1, %2;") | |
1540 | ||
1541 | (define_code_iterator any_logic [and ior xor]) | |
1542 | (define_code_attr logic [(and "and") (ior "or") (xor "xor")]) | |
1543 | ||
1544 | ;; Currently disabled until we add better subtarget support - requires sm_32. | |
1545 | (define_insn "atomic_fetch_<logic><mode>" | |
1546 | [(set (match_operand:SDIM 1 "memory_operand" "+m") | |
1547 | (unspec_volatile:SDIM | |
1548 | [(any_logic:SDIM (match_dup 1) | |
1549 | (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri")) | |
1550 | (match_operand:SI 3 "const_int_operand")] ;; model | |
1551 | UNSPECV_LOCK)) | |
1552 | (set (match_operand:SDIM 0 "nvptx_register_operand" "=R") | |
1553 | (match_dup 1))] | |
1554 | "0" | |
1555 | "%.\\tatom%A1.b%T0.<logic>\\t%0, %1, %2;") | |
b3787ae4 | 1556 | |
1557 | (define_insn "nvptx_barsync" | |
1558 | [(unspec_volatile [(match_operand:SI 0 "const_int_operand" "")] | |
1559 | UNSPECV_BARSYNC)] | |
1560 | "" | |
1561 | "\\tbar.sync\\t%0;") |