1 /* Target code for NVPTX.
2 Copyright (C) 2014-2015 Free Software Foundation, Inc.
3 Contributed by Bernd Schmidt <bernds@codesourcery.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "double-int.h"
37 #include "insn-flags.h"
39 #include "insn-attr.h"
40 #include "insn-codes.h"
42 #include "hard-reg-set.h"
45 #include "statistics.h"
47 #include "fixed-value.h"
48 #include "insn-config.h"
64 #include "tm-constrs.h"
65 #include "langhooks.h"
68 #include "target-def.h"
69 #include "diagnostic.h"
71 #include "basic-block.h"
73 #include "stor-layout.h"
77 /* Record the function decls we've written, and the libfuncs and function
78 decls corresponding to them. */
79 static std::stringstream func_decls
;
81 struct declared_libfunc_hasher
: ggc_cache_hasher
<rtx
>
83 static hashval_t
hash (rtx x
) { return htab_hash_pointer (x
); }
84 static bool equal (rtx a
, rtx b
) { return a
== b
; }
88 hash_table
<declared_libfunc_hasher
> *declared_libfuncs_htab
;
90 struct tree_hasher
: ggc_cache_hasher
<tree
>
92 static hashval_t
hash (tree t
) { return htab_hash_pointer (t
); }
93 static bool equal (tree a
, tree b
) { return a
== b
; }
96 static GTY((cache
)) hash_table
<tree_hasher
> *declared_fndecls_htab
;
97 static GTY((cache
)) hash_table
<tree_hasher
> *needed_fndecls_htab
;
99 /* Allocate a new, cleared machine_function structure. */
101 static struct machine_function
*
102 nvptx_init_machine_status (void)
104 struct machine_function
*p
= ggc_cleared_alloc
<machine_function
> ();
105 p
->ret_reg_mode
= VOIDmode
;
109 /* Implement TARGET_OPTION_OVERRIDE. */
112 nvptx_option_override (void)
114 init_machine_status
= nvptx_init_machine_status
;
115 /* Gives us a predictable order, which we need especially for variables. */
116 flag_toplevel_reorder
= 1;
117 /* Assumes that it will see only hard registers. */
118 flag_var_tracking
= 0;
119 write_symbols
= NO_DEBUG
;
120 debug_info_level
= DINFO_LEVEL_NONE
;
122 declared_fndecls_htab
= hash_table
<tree_hasher
>::create_ggc (17);
123 needed_fndecls_htab
= hash_table
<tree_hasher
>::create_ggc (17);
124 declared_libfuncs_htab
125 = hash_table
<declared_libfunc_hasher
>::create_ggc (17);
128 /* Return the mode to be used when declaring a ptx object for OBJ.
129 For objects with subparts such as complex modes this is the mode
133 nvptx_underlying_object_mode (rtx obj
)
135 if (GET_CODE (obj
) == SUBREG
)
136 obj
= SUBREG_REG (obj
);
137 machine_mode mode
= GET_MODE (obj
);
140 if (COMPLEX_MODE_P (mode
))
141 return GET_MODE_INNER (mode
);
145 /* Return a ptx type for MODE. If PROMOTE, then use .u32 for QImode to
146 deal with ptx ideosyncracies. */
149 nvptx_ptx_type_from_mode (machine_mode mode
, bool promote
)
179 /* Return the number of pieces to use when dealing with a pseudo of *PMODE.
180 Alter *PMODE if we return a number greater than one. */
183 maybe_split_mode (machine_mode
*pmode
)
185 machine_mode mode
= *pmode
;
187 if (COMPLEX_MODE_P (mode
))
189 *pmode
= GET_MODE_INNER (mode
);
192 else if (mode
== TImode
)
200 /* Like maybe_split_mode, but only return whether or not the mode
201 needs to be split. */
203 nvptx_split_reg_p (machine_mode mode
)
205 if (COMPLEX_MODE_P (mode
))
212 #define PASS_IN_REG_P(MODE, TYPE) \
213 ((GET_MODE_CLASS (MODE) == MODE_INT \
214 || GET_MODE_CLASS (MODE) == MODE_FLOAT \
215 || ((GET_MODE_CLASS (MODE) == MODE_COMPLEX_INT \
216 || GET_MODE_CLASS (MODE) == MODE_COMPLEX_FLOAT) \
217 && !AGGREGATE_TYPE_P (TYPE))) \
220 #define RETURN_IN_REG_P(MODE) \
221 ((GET_MODE_CLASS (MODE) == MODE_INT \
222 || GET_MODE_CLASS (MODE) == MODE_FLOAT) \
223 && GET_MODE_SIZE (MODE) <= 8)
225 /* Perform a mode promotion for a function argument with MODE. Return
226 the promoted mode. */
229 arg_promotion (machine_mode mode
)
231 if (mode
== QImode
|| mode
== HImode
)
236 /* Write the declaration of a function arg of TYPE to S. I is the index
237 of the argument, MODE its mode. NO_ARG_TYPES is true if this is for
238 a decl with zero TYPE_ARG_TYPES, i.e. an old-style C decl. */
241 write_one_arg (std::stringstream
&s
, tree type
, int i
, machine_mode mode
,
244 if (!PASS_IN_REG_P (mode
, type
))
247 int count
= maybe_split_mode (&mode
);
251 write_one_arg (s
, NULL_TREE
, i
, mode
, false);
252 write_one_arg (s
, NULL_TREE
, i
+ 1, mode
, false);
256 if (no_arg_types
&& !AGGREGATE_TYPE_P (type
))
260 mode
= arg_promotion (mode
);
265 s
<< ".param" << nvptx_ptx_type_from_mode (mode
, false) << " %in_ar"
266 << (i
+ 1) << (mode
== QImode
|| mode
== HImode
? "[1]" : "");
268 s
<< "[" << int_size_in_bytes (type
) << "]";
272 /* Look for attributes in ATTRS that would indicate we must write a function
273 as a .entry kernel rather than a .func. Return true if one is found. */
276 write_as_kernel (tree attrs
)
278 return (lookup_attribute ("kernel", attrs
) != NULL_TREE
279 || lookup_attribute ("omp target entrypoint", attrs
) != NULL_TREE
);
282 /* Write a function decl for DECL to S, where NAME is the name to be used. */
285 nvptx_write_function_decl (std::stringstream
&s
, const char *name
, const_tree decl
)
287 tree fntype
= TREE_TYPE (decl
);
288 tree result_type
= TREE_TYPE (fntype
);
289 tree args
= TYPE_ARG_TYPES (fntype
);
290 tree attrs
= DECL_ATTRIBUTES (decl
);
291 bool kernel
= write_as_kernel (attrs
);
292 bool is_main
= strcmp (name
, "main") == 0;
293 bool args_from_decl
= false;
296 NULL in TYPE_ARG_TYPES, for old-style functions
297 NULL in DECL_ARGUMENTS, for builtin functions without another
299 So we have to pick the best one we have. */
302 args
= DECL_ARGUMENTS (decl
);
303 args_from_decl
= true;
306 if (DECL_EXTERNAL (decl
))
308 else if (TREE_PUBLIC (decl
))
316 /* Declare the result. */
317 bool return_in_mem
= false;
318 if (TYPE_MODE (result_type
) != VOIDmode
)
320 machine_mode mode
= TYPE_MODE (result_type
);
321 if (!RETURN_IN_REG_P (mode
))
322 return_in_mem
= true;
325 mode
= arg_promotion (mode
);
326 s
<< "(.param" << nvptx_ptx_type_from_mode (mode
, false)
336 /* Declare argument types. */
337 if ((args
!= NULL_TREE
338 && !(TREE_CODE (args
) == TREE_LIST
&& TREE_VALUE (args
) == void_type_node
))
341 || DECL_STATIC_CHAIN (decl
))
345 bool any_args
= false;
348 s
<< ".param.u" << GET_MODE_BITSIZE (Pmode
) << " %in_ar1";
351 while (args
!= NULL_TREE
)
353 tree type
= args_from_decl
? TREE_TYPE (args
) : TREE_VALUE (args
);
354 machine_mode mode
= TYPE_MODE (type
);
356 if (mode
!= VOIDmode
)
358 i
= write_one_arg (s
, type
, i
, mode
,
359 TYPE_ARG_TYPES (fntype
) == 0);
363 args
= TREE_CHAIN (args
);
365 if (stdarg_p (fntype
))
368 s
<< ", .param.u" << GET_MODE_BITSIZE (Pmode
) << " %in_argp";
370 if (DECL_STATIC_CHAIN (decl
))
374 s
<< ".reg.u" << GET_MODE_BITSIZE (Pmode
)
375 << reg_names
[STATIC_CHAIN_REGNUM
];
377 if (!any_args
&& is_main
)
378 s
<< ".param.u32 %argc, .param.u" << GET_MODE_BITSIZE (Pmode
)
384 /* Walk either ARGTYPES or ARGS if the former is null, and write out part of
385 the function header to FILE. If WRITE_COPY is false, write reg
386 declarations, otherwise write the copy from the incoming argument to that
387 reg. RETURN_IN_MEM indicates whether to start counting arg numbers at 1
391 walk_args_for_param (FILE *file
, tree argtypes
, tree args
, bool write_copy
,
396 bool args_from_decl
= false;
398 args_from_decl
= true;
402 for (i
= return_in_mem
? 1 : 0; args
!= NULL_TREE
; args
= TREE_CHAIN (args
))
404 tree type
= args_from_decl
? TREE_TYPE (args
) : TREE_VALUE (args
);
405 machine_mode mode
= TYPE_MODE (type
);
407 if (mode
== VOIDmode
)
410 if (!PASS_IN_REG_P (mode
, type
))
413 int count
= maybe_split_mode (&mode
);
416 if (argtypes
== NULL
&& !AGGREGATE_TYPE_P (type
))
422 mode
= arg_promotion (mode
);
428 fprintf (file
, "\tld.param%s %%ar%d, [%%in_ar%d];\n",
429 nvptx_ptx_type_from_mode (mode
, false), i
, i
);
431 fprintf (file
, "\t.reg%s %%ar%d;\n",
432 nvptx_ptx_type_from_mode (mode
, false), i
);
437 /* Write a .func or .kernel declaration (not a definition) along with
438 a helper comment for use by ld. S is the stream to write to, DECL
439 the decl for the function with name NAME. */
442 write_function_decl_and_comment (std::stringstream
&s
, const char *name
, const_tree decl
)
445 if (TREE_PUBLIC (decl
))
447 s
<< " FUNCTION DECL: ";
453 nvptx_write_function_decl (s
, name
, decl
);
457 /* Check NAME for special function names and redirect them by returning a
458 replacement. This applies to malloc, free and realloc, for which we
459 want to use libgcc wrappers, and call, which triggers a bug in ptxas. */
462 nvptx_name_replacement (const char *name
)
464 if (strcmp (name
, "call") == 0)
465 return "__nvptx_call";
466 if (strcmp (name
, "malloc") == 0)
467 return "__nvptx_malloc";
468 if (strcmp (name
, "free") == 0)
469 return "__nvptx_free";
470 if (strcmp (name
, "realloc") == 0)
471 return "__nvptx_realloc";
475 /* If DECL is a FUNCTION_DECL, check the hash table to see if we
476 already encountered it, and if not, insert it and write a ptx
477 declarations that will be output at the end of compilation. */
480 nvptx_record_fndecl (tree decl
, bool force
= false)
482 if (decl
== NULL_TREE
|| TREE_CODE (decl
) != FUNCTION_DECL
483 || !DECL_EXTERNAL (decl
))
486 if (!force
&& TYPE_ARG_TYPES (TREE_TYPE (decl
)) == NULL_TREE
)
489 tree
*slot
= declared_fndecls_htab
->find_slot (decl
, INSERT
);
493 const char *name
= get_fnname_from_decl (decl
);
494 name
= nvptx_name_replacement (name
);
495 write_function_decl_and_comment (func_decls
, name
, decl
);
500 /* Record that we need to emit a ptx decl for DECL. Either do it now, or
501 record it for later in case we have no argument information at this
505 nvptx_record_needed_fndecl (tree decl
)
507 if (nvptx_record_fndecl (decl
))
510 tree
*slot
= needed_fndecls_htab
->find_slot (decl
, INSERT
);
515 /* Implement ASM_DECLARE_FUNCTION_NAME. Writes the start of a ptx
516 function, including local var decls and copies from the arguments to
520 nvptx_declare_function_name (FILE *file
, const char *name
, const_tree decl
)
522 tree fntype
= TREE_TYPE (decl
);
523 tree result_type
= TREE_TYPE (fntype
);
525 name
= nvptx_name_replacement (name
);
528 write_function_decl_and_comment (s
, name
, decl
);
530 if (TREE_PUBLIC (decl
))
532 s
<< " FUNCTION DEF: ";
540 nvptx_write_function_decl (s
, name
, decl
);
541 fprintf (file
, "%s", s
.str().c_str());
543 bool return_in_mem
= false;
544 if (TYPE_MODE (result_type
) != VOIDmode
)
546 machine_mode mode
= TYPE_MODE (result_type
);
547 if (!RETURN_IN_REG_P (mode
))
548 return_in_mem
= true;
551 fprintf (file
, "\n{\n");
553 /* Ensure all arguments that should live in a register have one
554 declared. We'll emit the copies below. */
555 walk_args_for_param (file
, TYPE_ARG_TYPES (fntype
), DECL_ARGUMENTS (decl
),
556 false, return_in_mem
);
558 fprintf (file
, "\t.reg.u%d %%ar1;\n", GET_MODE_BITSIZE (Pmode
));
559 else if (TYPE_MODE (result_type
) != VOIDmode
)
561 machine_mode mode
= arg_promotion (TYPE_MODE (result_type
));
562 fprintf (file
, ".reg%s %%retval;\n",
563 nvptx_ptx_type_from_mode (mode
, false));
566 if (stdarg_p (fntype
))
567 fprintf (file
, "\t.reg.u%d %%argp;\n", GET_MODE_BITSIZE (Pmode
));
569 fprintf (file
, "\t.reg.u%d %s;\n", GET_MODE_BITSIZE (Pmode
),
570 reg_names
[OUTGOING_STATIC_CHAIN_REGNUM
]);
572 /* Declare the pseudos we have as ptx registers. */
573 int maxregs
= max_reg_num ();
574 for (int i
= LAST_VIRTUAL_REGISTER
+ 1; i
< maxregs
; i
++)
576 if (regno_reg_rtx
[i
] != const0_rtx
)
578 machine_mode mode
= PSEUDO_REGNO_MODE (i
);
579 int count
= maybe_split_mode (&mode
);
583 fprintf (file
, "\t.reg%s %%r%d$%d;\n",
584 nvptx_ptx_type_from_mode (mode
, true),
588 fprintf (file
, "\t.reg%s %%r%d;\n",
589 nvptx_ptx_type_from_mode (mode
, true),
594 /* The only reason we might be using outgoing args is if we call a stdargs
595 function. Allocate the space for this. If we called varargs functions
596 without passing any variadic arguments, we'll see a reference to outargs
597 even with a zero outgoing_args_size. */
598 HOST_WIDE_INT sz
= crtl
->outgoing_args_size
;
601 if (cfun
->machine
->has_call_with_varargs
)
602 fprintf (file
, "\t.reg.u%d %%outargs;\n"
603 "\t.local.align 8 .b8 %%outargs_ar["HOST_WIDE_INT_PRINT_DEC
"];\n",
605 if (cfun
->machine
->punning_buffer_size
> 0)
606 fprintf (file
, "\t.reg.u%d %%punbuffer;\n"
607 "\t.local.align 8 .b8 %%punbuffer_ar[%d];\n",
608 BITS_PER_WORD
, cfun
->machine
->punning_buffer_size
);
610 /* Declare a local variable for the frame. */
611 sz
= get_frame_size ();
612 if (sz
> 0 || cfun
->machine
->has_call_with_sc
)
614 fprintf (file
, "\t.reg.u%d %%frame;\n"
615 "\t.local.align 8 .b8 %%farray["HOST_WIDE_INT_PRINT_DEC
"];\n",
616 BITS_PER_WORD
, sz
== 0 ? 1 : sz
);
617 fprintf (file
, "\tcvta.local.u%d %%frame, %%farray;\n",
621 if (cfun
->machine
->has_call_with_varargs
)
622 fprintf (file
, "\tcvta.local.u%d %%outargs, %%outargs_ar;\n",
624 if (cfun
->machine
->punning_buffer_size
> 0)
625 fprintf (file
, "\tcvta.local.u%d %%punbuffer, %%punbuffer_ar;\n",
628 /* Now emit any copies necessary for arguments. */
629 walk_args_for_param (file
, TYPE_ARG_TYPES (fntype
), DECL_ARGUMENTS (decl
),
630 true, return_in_mem
);
632 fprintf (file
, "ld.param.u%d %%ar1, [%%in_ar1];\n",
633 GET_MODE_BITSIZE (Pmode
));
634 if (stdarg_p (fntype
))
635 fprintf (file
, "ld.param.u%d %%argp, [%%in_argp];\n",
636 GET_MODE_BITSIZE (Pmode
));
639 /* Output a return instruction. Also copy the return value to its outgoing
643 nvptx_output_return (void)
645 tree fntype
= TREE_TYPE (current_function_decl
);
646 tree result_type
= TREE_TYPE (fntype
);
647 if (TYPE_MODE (result_type
) != VOIDmode
)
649 machine_mode mode
= TYPE_MODE (result_type
);
650 if (RETURN_IN_REG_P (mode
))
652 mode
= arg_promotion (mode
);
653 fprintf (asm_out_file
, "\tst.param%s\t[%%out_retval], %%retval;\n",
654 nvptx_ptx_type_from_mode (mode
, false));
661 /* Construct a function declaration from a call insn. This can be
662 necessary for two reasons - either we have an indirect call which
663 requires a .callprototype declaration, or we have a libcall
664 generated by emit_library_call for which no decl exists. */
667 write_func_decl_from_insn (std::stringstream
&s
, rtx result
, rtx pat
,
670 bool callprototype
= register_operand (callee
, Pmode
);
671 const char *name
= "_";
674 name
= XSTR (callee
, 0);
675 name
= nvptx_name_replacement (name
);
676 s
<< "// BEGIN GLOBAL FUNCTION DECL: " << name
<< "\n";
678 s
<< (callprototype
? "\t.callprototype\t" : "\t.extern .func ");
680 if (result
!= NULL_RTX
)
683 s
<< nvptx_ptx_type_from_mode (arg_promotion (GET_MODE (result
)),
695 int nargs
= XVECLEN (pat
, 0) - 1;
699 for (int i
= 0; i
< nargs
; i
++)
701 rtx t
= XEXP (XVECEXP (pat
, 0, i
+ 1), 0);
702 machine_mode mode
= GET_MODE (t
);
703 int count
= maybe_split_mode (&mode
);
708 s
<< nvptx_ptx_type_from_mode (mode
, false);
714 if (mode
== QImode
|| mode
== HImode
)
716 if (i
+ 1 < nargs
|| count
> 0)
725 /* Terminate a function by writing a closing brace to FILE. */
728 nvptx_function_end (FILE *file
)
730 fprintf (file
, "\t}\n");
733 /* Decide whether we can make a sibling call to a function. For ptx, we
737 nvptx_function_ok_for_sibcall (tree
, tree
)
742 /* Implement the TARGET_CALL_ARGS hook. Record information about one
743 argument to the next call. */
746 nvptx_call_args (rtx arg
, tree funtype
)
748 if (cfun
->machine
->start_call
== NULL_RTX
)
750 cfun
->machine
->call_args
= NULL
;
751 cfun
->machine
->funtype
= funtype
;
752 cfun
->machine
->start_call
= const0_rtx
;
757 rtx_expr_list
*args_so_far
= cfun
->machine
->call_args
;
759 cfun
->machine
->call_args
= alloc_EXPR_LIST (VOIDmode
, arg
, args_so_far
);
762 /* Implement the corresponding END_CALL_ARGS hook. Clear and free the
763 information we recorded. */
766 nvptx_end_call_args (void)
768 cfun
->machine
->start_call
= NULL_RTX
;
769 free_EXPR_LIST_list (&cfun
->machine
->call_args
);
772 /* Emit the sequence for a call. */
775 nvptx_expand_call (rtx retval
, rtx address
)
778 rtx callee
= XEXP (address
, 0);
781 bool external_decl
= false;
784 for (t
= cfun
->machine
->call_args
; t
; t
= XEXP (t
, 1))
787 bool has_varargs
= false;
788 tree decl_type
= NULL_TREE
;
790 if (!call_insn_operand (callee
, Pmode
))
792 callee
= force_reg (Pmode
, callee
);
793 address
= change_address (address
, QImode
, callee
);
796 if (GET_CODE (callee
) == SYMBOL_REF
)
798 tree decl
= SYMBOL_REF_DECL (callee
);
799 if (decl
!= NULL_TREE
)
801 decl_type
= TREE_TYPE (decl
);
802 if (DECL_STATIC_CHAIN (decl
))
803 cfun
->machine
->has_call_with_sc
= true;
804 if (DECL_EXTERNAL (decl
))
805 external_decl
= true;
808 if (cfun
->machine
->funtype
809 /* It's possible to construct testcases where we call a variable.
810 See compile/20020129-1.c. stdarg_p will crash so avoid calling it
812 && (TREE_CODE (cfun
->machine
->funtype
) == FUNCTION_TYPE
813 || TREE_CODE (cfun
->machine
->funtype
) == METHOD_TYPE
)
814 && stdarg_p (cfun
->machine
->funtype
))
817 cfun
->machine
->has_call_with_varargs
= true;
819 vec
= rtvec_alloc (nargs
+ 1 + (has_varargs
? 1 : 0));
820 pat
= gen_rtx_PARALLEL (VOIDmode
, vec
);
823 rtx this_arg
= gen_reg_rtx (Pmode
);
825 emit_move_insn (this_arg
, stack_pointer_rtx
);
827 emit_move_insn (this_arg
, stack_pointer_rtx
);
828 XVECEXP (pat
, 0, nargs
+ 1) = gen_rtx_USE (VOIDmode
, this_arg
);
833 for (i
= 1, arg
= cfun
->machine
->call_args
; arg
; arg
= XEXP (arg
, 1), i
++)
835 rtx this_arg
= XEXP (arg
, 0);
836 XVECEXP (pat
, 0, i
) = gen_rtx_USE (VOIDmode
, this_arg
);
839 rtx tmp_retval
= retval
;
840 t
= gen_rtx_CALL (VOIDmode
, address
, const0_rtx
);
841 if (retval
!= NULL_RTX
)
843 if (!nvptx_register_operand (retval
, GET_MODE (retval
)))
844 tmp_retval
= gen_reg_rtx (GET_MODE (retval
));
845 t
= gen_rtx_SET (VOIDmode
, tmp_retval
, t
);
847 XVECEXP (pat
, 0, 0) = t
;
849 && (decl_type
== NULL_TREE
850 || (external_decl
&& TYPE_ARG_TYPES (decl_type
) == NULL_TREE
)))
852 rtx
*slot
= declared_libfuncs_htab
->find_slot (callee
, INSERT
);
856 write_func_decl_from_insn (func_decls
, retval
, pat
, callee
);
859 emit_call_insn (pat
);
860 if (tmp_retval
!= retval
)
861 emit_move_insn (retval
, tmp_retval
);
864 /* Implement TARGET_FUNCTION_ARG. */
867 nvptx_function_arg (cumulative_args_t
, machine_mode mode
,
868 const_tree
, bool named
)
870 if (mode
== VOIDmode
)
874 return gen_reg_rtx (mode
);
878 /* Implement TARGET_FUNCTION_INCOMING_ARG. */
881 nvptx_function_incoming_arg (cumulative_args_t cum_v
, machine_mode mode
,
882 const_tree
, bool named
)
884 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
885 if (mode
== VOIDmode
)
891 /* No need to deal with split modes here, the only case that can
892 happen is complex modes and those are dealt with by
893 TARGET_SPLIT_COMPLEX_ARG. */
894 return gen_rtx_UNSPEC (mode
,
895 gen_rtvec (1, GEN_INT (1 + cum
->count
)),
899 /* Implement TARGET_FUNCTION_ARG_ADVANCE. */
902 nvptx_function_arg_advance (cumulative_args_t cum_v
, machine_mode mode
,
903 const_tree type ATTRIBUTE_UNUSED
,
904 bool named ATTRIBUTE_UNUSED
)
906 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
913 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook.
915 For nvptx, we know how to handle functions declared as stdarg: by
916 passing an extra pointer to the unnamed arguments. However, the
917 Fortran frontend can produce a different situation, where a
918 function pointer is declared with no arguments, but the actual
919 function and calls to it take more arguments. In that case, we
920 want to ensure the call matches the definition of the function. */
923 nvptx_strict_argument_naming (cumulative_args_t cum_v
)
925 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
926 return cum
->fntype
== NULL_TREE
|| stdarg_p (cum
->fntype
);
929 /* Implement TARGET_FUNCTION_ARG_BOUNDARY. */
932 nvptx_function_arg_boundary (machine_mode mode
, const_tree type
)
934 unsigned int boundary
= type
? TYPE_ALIGN (type
) : GET_MODE_BITSIZE (mode
);
936 if (boundary
> BITS_PER_WORD
)
937 return 2 * BITS_PER_WORD
;
941 HOST_WIDE_INT size
= int_size_in_bytes (type
);
943 return 2 * BITS_PER_WORD
;
944 if (boundary
< BITS_PER_WORD
)
947 return BITS_PER_WORD
;
949 return 2 * BITS_PER_UNIT
;
955 /* TARGET_FUNCTION_VALUE implementation. Returns an RTX representing the place
956 where function FUNC returns or receives a value of data type TYPE. */
959 nvptx_function_value (const_tree type
, const_tree func ATTRIBUTE_UNUSED
,
962 int unsignedp
= TYPE_UNSIGNED (type
);
963 machine_mode orig_mode
= TYPE_MODE (type
);
964 machine_mode mode
= promote_function_mode (type
, orig_mode
,
965 &unsignedp
, NULL_TREE
, 1);
967 return gen_rtx_REG (mode
, NVPTX_RETURN_REGNUM
);
968 if (cfun
->machine
->start_call
== NULL_RTX
)
969 /* Pretend to return in a hard reg for early uses before pseudos can be
971 return gen_rtx_REG (mode
, NVPTX_RETURN_REGNUM
);
972 return gen_reg_rtx (mode
);
975 /* Implement TARGET_LIBCALL_VALUE. */
978 nvptx_libcall_value (machine_mode mode
, const_rtx
)
980 if (cfun
->machine
->start_call
== NULL_RTX
)
981 /* Pretend to return in a hard reg for early uses before pseudos can be
983 return gen_rtx_REG (mode
, NVPTX_RETURN_REGNUM
);
984 return gen_reg_rtx (mode
);
987 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
990 nvptx_function_value_regno_p (const unsigned int regno
)
992 return regno
== NVPTX_RETURN_REGNUM
;
995 /* Types with a mode other than those supported by the machine are passed by
996 reference in memory. */
999 nvptx_pass_by_reference (cumulative_args_t
, machine_mode mode
,
1000 const_tree type
, bool)
1002 return !PASS_IN_REG_P (mode
, type
);
1005 /* Implement TARGET_RETURN_IN_MEMORY. */
1008 nvptx_return_in_memory (const_tree type
, const_tree
)
1010 machine_mode mode
= TYPE_MODE (type
);
1011 if (!RETURN_IN_REG_P (mode
))
1016 /* Implement TARGET_PROMOTE_FUNCTION_MODE. */
1019 nvptx_promote_function_mode (const_tree type
, machine_mode mode
,
1021 const_tree funtype
, int for_return
)
1023 if (type
== NULL_TREE
)
1026 return promote_mode (type
, mode
, punsignedp
);
1027 /* For K&R-style functions, try to match the language promotion rules to
1028 minimize type mismatches at assembly time. */
1029 if (TYPE_ARG_TYPES (funtype
) == NULL_TREE
1030 && type
!= NULL_TREE
1031 && !AGGREGATE_TYPE_P (type
))
1035 mode
= arg_promotion (mode
);
1041 /* Implement TARGET_STATIC_CHAIN. */
1044 nvptx_static_chain (const_tree fndecl
, bool incoming_p
)
1046 if (!DECL_STATIC_CHAIN (fndecl
))
1050 return gen_rtx_REG (Pmode
, STATIC_CHAIN_REGNUM
);
1052 return gen_rtx_REG (Pmode
, OUTGOING_STATIC_CHAIN_REGNUM
);
1055 /* Emit a comparison COMPARE, and return the new test to be used in the
1059 nvptx_expand_compare (rtx compare
)
1061 rtx pred
= gen_reg_rtx (BImode
);
1062 rtx cmp
= gen_rtx_fmt_ee (GET_CODE (compare
), BImode
,
1063 XEXP (compare
, 0), XEXP (compare
, 1));
1064 emit_insn (gen_rtx_SET (VOIDmode
, pred
, cmp
));
1065 return gen_rtx_NE (BImode
, pred
, const0_rtx
);
1068 /* When loading an operand ORIG_OP, verify whether an address space
1069 conversion to generic is required, and if so, perform it. Also
1070 check for SYMBOL_REFs for function decls and call
1071 nvptx_record_needed_fndecl as needed.
1072 Return either the original operand, or the converted one. */
1075 nvptx_maybe_convert_symbolic_operand (rtx orig_op
)
1077 if (GET_MODE (orig_op
) != Pmode
)
1081 while (GET_CODE (op
) == PLUS
|| GET_CODE (op
) == CONST
)
1083 if (GET_CODE (op
) != SYMBOL_REF
)
1086 tree decl
= SYMBOL_REF_DECL (op
);
1087 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
)
1089 nvptx_record_needed_fndecl (decl
);
1093 addr_space_t as
= nvptx_addr_space_from_address (op
);
1094 if (as
== ADDR_SPACE_GENERIC
)
1098 code
= (as
== ADDR_SPACE_GLOBAL
? UNSPEC_FROM_GLOBAL
1099 : as
== ADDR_SPACE_LOCAL
? UNSPEC_FROM_LOCAL
1100 : as
== ADDR_SPACE_SHARED
? UNSPEC_FROM_SHARED
1101 : as
== ADDR_SPACE_CONST
? UNSPEC_FROM_CONST
1102 : UNSPEC_FROM_PARAM
);
1103 rtx dest
= gen_reg_rtx (Pmode
);
1104 emit_insn (gen_rtx_SET (VOIDmode
, dest
,
1105 gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, orig_op
),
1110 /* Returns true if X is a valid address for use in a memory reference. */
1113 nvptx_legitimate_address_p (machine_mode
, rtx x
, bool)
1115 enum rtx_code code
= GET_CODE (x
);
1123 if (REG_P (XEXP (x
, 0)) && CONST_INT_P (XEXP (x
, 1)))
1137 /* Implement HARD_REGNO_MODE_OK. We barely use hard regs, but we want
1138 to ensure that the return register's mode isn't changed. */
1141 nvptx_hard_regno_mode_ok (int regno
, machine_mode mode
)
1143 if (regno
!= NVPTX_RETURN_REGNUM
1144 || cfun
== NULL
|| cfun
->machine
->ret_reg_mode
== VOIDmode
)
1146 return mode
== cfun
->machine
->ret_reg_mode
;
1149 /* Convert an address space AS to the corresponding ptx string. */
1152 nvptx_section_from_addr_space (addr_space_t as
)
1156 case ADDR_SPACE_CONST
:
1159 case ADDR_SPACE_GLOBAL
:
1162 case ADDR_SPACE_SHARED
:
1165 case ADDR_SPACE_GENERIC
:
1173 /* Determine whether DECL goes into .const or .global. */
1176 nvptx_section_for_decl (const_tree decl
)
1178 bool is_const
= (CONSTANT_CLASS_P (decl
)
1179 || TREE_CODE (decl
) == CONST_DECL
1180 || TREE_READONLY (decl
));
1187 /* Look for a SYMBOL_REF in ADDR and return the address space to be used
1188 for the insn referencing this address. */
1191 nvptx_addr_space_from_address (rtx addr
)
1193 while (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == CONST
)
1194 addr
= XEXP (addr
, 0);
1195 if (GET_CODE (addr
) != SYMBOL_REF
)
1196 return ADDR_SPACE_GENERIC
;
1198 tree decl
= SYMBOL_REF_DECL (addr
);
1199 if (decl
== NULL_TREE
|| TREE_CODE (decl
) == FUNCTION_DECL
)
1200 return ADDR_SPACE_GENERIC
;
1202 bool is_const
= (CONSTANT_CLASS_P (decl
)
1203 || TREE_CODE (decl
) == CONST_DECL
1204 || TREE_READONLY (decl
));
1206 return ADDR_SPACE_CONST
;
1208 return ADDR_SPACE_GLOBAL
;
1211 /* Machinery to output constant initializers. */
1213 /* Used when assembling integers to ensure data is emitted in
1214 pieces whose size matches the declaration we printed. */
1215 static unsigned int decl_chunk_size
;
1216 static machine_mode decl_chunk_mode
;
1217 /* Used in the same situation, to keep track of the byte offset
1218 into the initializer. */
1219 static unsigned HOST_WIDE_INT decl_offset
;
1220 /* The initializer part we are currently processing. */
1221 static HOST_WIDE_INT init_part
;
1222 /* The total size of the object. */
1223 static unsigned HOST_WIDE_INT object_size
;
1224 /* True if we found a skip extending to the end of the object. Used to
1225 assert that no data follows. */
1226 static bool object_finished
;
1228 /* Write the necessary separator string to begin a new initializer value. */
1231 begin_decl_field (void)
1233 /* We never see decl_offset at zero by the time we get here. */
1234 if (decl_offset
== decl_chunk_size
)
1235 fprintf (asm_out_file
, " = { ");
1237 fprintf (asm_out_file
, ", ");
1240 /* Output the currently stored chunk as an initializer value. */
1243 output_decl_chunk (void)
1245 begin_decl_field ();
1246 output_address (gen_int_mode (init_part
, decl_chunk_mode
));
1250 /* Add value VAL sized SIZE to the data we're emitting, and keep writing
1251 out chunks as they fill up. */
1254 nvptx_assemble_value (HOST_WIDE_INT val
, unsigned int size
)
1256 unsigned HOST_WIDE_INT chunk_offset
= decl_offset
% decl_chunk_size
;
1257 gcc_assert (!object_finished
);
1260 int this_part
= size
;
1261 if (chunk_offset
+ this_part
> decl_chunk_size
)
1262 this_part
= decl_chunk_size
- chunk_offset
;
1263 HOST_WIDE_INT val_part
;
1264 HOST_WIDE_INT mask
= 2;
1265 mask
<<= this_part
* BITS_PER_UNIT
- 1;
1266 val_part
= val
& (mask
- 1);
1267 init_part
|= val_part
<< (BITS_PER_UNIT
* chunk_offset
);
1268 val
>>= BITS_PER_UNIT
* this_part
;
1270 decl_offset
+= this_part
;
1271 if (decl_offset
% decl_chunk_size
== 0)
1272 output_decl_chunk ();
1278 /* Target hook for assembling integer object X of size SIZE. */
1281 nvptx_assemble_integer (rtx x
, unsigned int size
, int ARG_UNUSED (aligned_p
))
1283 if (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == CONST
)
1285 gcc_assert (size
= decl_chunk_size
);
1286 if (decl_offset
% decl_chunk_size
!= 0)
1287 sorry ("cannot emit unaligned pointers in ptx assembly");
1288 decl_offset
+= size
;
1289 begin_decl_field ();
1291 HOST_WIDE_INT off
= 0;
1292 if (GET_CODE (x
) == CONST
)
1294 if (GET_CODE (x
) == PLUS
)
1296 off
= INTVAL (XEXP (x
, 1));
1299 if (GET_CODE (x
) == SYMBOL_REF
)
1301 nvptx_record_needed_fndecl (SYMBOL_REF_DECL (x
));
1302 fprintf (asm_out_file
, "generic(");
1304 fprintf (asm_out_file
, ")");
1307 fprintf (asm_out_file
, " + " HOST_WIDE_INT_PRINT_DEC
, off
);
1312 switch (GET_CODE (x
))
1324 nvptx_assemble_value (val
, size
);
1328 /* Output SIZE zero bytes. We ignore the FILE argument since the
1329 functions we're calling to perform the output just use
1333 nvptx_output_skip (FILE *, unsigned HOST_WIDE_INT size
)
1335 if (decl_offset
+ size
>= object_size
)
1337 if (decl_offset
% decl_chunk_size
!= 0)
1338 nvptx_assemble_value (0, decl_chunk_size
);
1339 object_finished
= true;
1343 while (size
> decl_chunk_size
)
1345 nvptx_assemble_value (0, decl_chunk_size
);
1346 size
-= decl_chunk_size
;
1349 nvptx_assemble_value (0, 1);
1352 /* Output a string STR with length SIZE. As in nvptx_output_skip we
1353 ignore the FILE arg. */
1356 nvptx_output_ascii (FILE *, const char *str
, unsigned HOST_WIDE_INT size
)
1358 for (unsigned HOST_WIDE_INT i
= 0; i
< size
; i
++)
1359 nvptx_assemble_value (str
[i
], 1);
1362 /* Called when the initializer for a decl has been completely output through
1363 combinations of the three functions above. */
1366 nvptx_assemble_decl_end (void)
1368 if (decl_offset
!= 0)
1370 if (!object_finished
&& decl_offset
% decl_chunk_size
!= 0)
1371 nvptx_assemble_value (0, decl_chunk_size
);
1373 fprintf (asm_out_file
, " }");
1375 fprintf (asm_out_file
, ";\n");
1378 /* Start a declaration of a variable of TYPE with NAME to
1379 FILE. IS_PUBLIC says whether this will be externally visible.
1380 Here we just write the linker hint and decide on the chunk size
1384 init_output_initializer (FILE *file
, const char *name
, const_tree type
,
1387 fprintf (file
, "// BEGIN%s VAR DEF: ", is_public
? " GLOBAL" : "");
1388 assemble_name_raw (file
, name
);
1391 if (TREE_CODE (type
) == ARRAY_TYPE
)
1392 type
= TREE_TYPE (type
);
1393 int sz
= int_size_in_bytes (type
);
1394 if ((TREE_CODE (type
) != INTEGER_TYPE
1395 && TREE_CODE (type
) != ENUMERAL_TYPE
1396 && TREE_CODE (type
) != REAL_TYPE
)
1398 || sz
> HOST_BITS_PER_WIDE_INT
)
1399 type
= ptr_type_node
;
1400 decl_chunk_size
= int_size_in_bytes (type
);
1401 decl_chunk_mode
= int_mode_for_mode (TYPE_MODE (type
));
1404 object_finished
= false;
1407 /* Implement TARGET_ASM_DECLARE_CONSTANT_NAME. Begin the process of
1408 writing a constant variable EXP with NAME and SIZE and its
1409 initializer to FILE. */
1412 nvptx_asm_declare_constant_name (FILE *file
, const char *name
,
1413 const_tree exp
, HOST_WIDE_INT size
)
1415 tree type
= TREE_TYPE (exp
);
1416 init_output_initializer (file
, name
, type
, false);
1417 fprintf (file
, "\t.const .align %d .u%d ",
1418 TYPE_ALIGN (TREE_TYPE (exp
)) / BITS_PER_UNIT
,
1419 decl_chunk_size
* BITS_PER_UNIT
);
1420 assemble_name (file
, name
);
1421 fprintf (file
, "[" HOST_WIDE_INT_PRINT_DEC
"]",
1422 (size
+ decl_chunk_size
- 1) / decl_chunk_size
);
1426 /* Implement the ASM_DECLARE_OBJECT_NAME macro. Used to start writing
1427 a variable DECL with NAME to FILE. */
1430 nvptx_declare_object_name (FILE *file
, const char *name
, const_tree decl
)
1432 if (decl
&& DECL_SIZE (decl
))
1434 tree type
= TREE_TYPE (decl
);
1435 unsigned HOST_WIDE_INT size
;
1437 init_output_initializer (file
, name
, type
, TREE_PUBLIC (decl
));
1438 size
= tree_to_uhwi (DECL_SIZE_UNIT (decl
));
1439 const char *section
= nvptx_section_for_decl (decl
);
1440 fprintf (file
, "\t%s%s .align %d .u%d ",
1441 TREE_PUBLIC (decl
) ? " .visible" : "", section
,
1442 DECL_ALIGN (decl
) / BITS_PER_UNIT
,
1443 decl_chunk_size
* BITS_PER_UNIT
);
1444 assemble_name (file
, name
);
1446 fprintf (file
, "[" HOST_WIDE_INT_PRINT_DEC
"]",
1447 (size
+ decl_chunk_size
- 1) / decl_chunk_size
);
1449 object_finished
= true;
1454 /* Implement TARGET_ASM_GLOBALIZE_LABEL by doing nothing. */
1457 nvptx_globalize_label (FILE *, const char *)
1461 /* Implement TARGET_ASM_ASSEMBLE_UNDEFINED_DECL. Write an extern
1462 declaration only for variable DECL with NAME to FILE. */
1464 nvptx_assemble_undefined_decl (FILE *file
, const char *name
, const_tree decl
)
1466 if (TREE_CODE (decl
) != VAR_DECL
)
1468 const char *section
= nvptx_section_for_decl (decl
);
1469 fprintf (file
, "// BEGIN%s VAR DECL: ", TREE_PUBLIC (decl
) ? " GLOBAL" : "");
1470 assemble_name_raw (file
, name
);
1472 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (decl
));
1473 fprintf (file
, ".extern %s .b8 ", section
);
1474 assemble_name_raw (file
, name
);
1476 fprintf (file
, "["HOST_WIDE_INT_PRINT_DEC
"]", size
);
1477 fprintf (file
, ";\n\n");
1480 /* Output INSN, which is a call to CALLEE with result RESULT. For ptx, this
1481 involves writing .param declarations and in/out copies into them. */
1484 nvptx_output_call_insn (rtx_insn
*insn
, rtx result
, rtx callee
)
1488 bool needs_tgt
= register_operand (callee
, Pmode
);
1489 rtx pat
= PATTERN (insn
);
1490 int nargs
= XVECLEN (pat
, 0) - 1;
1491 tree decl
= NULL_TREE
;
1493 fprintf (asm_out_file
, "\t{\n");
1496 fprintf (asm_out_file
, "\t\t.param%s %%retval_in;\n",
1497 nvptx_ptx_type_from_mode (arg_promotion (GET_MODE (result
)),
1501 if (GET_CODE (callee
) == SYMBOL_REF
)
1503 decl
= SYMBOL_REF_DECL (callee
);
1504 if (decl
&& DECL_EXTERNAL (decl
))
1505 nvptx_record_fndecl (decl
);
1510 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCT", labelno
);
1512 ASM_OUTPUT_LABEL (asm_out_file
, buf
);
1513 std::stringstream s
;
1514 write_func_decl_from_insn (s
, result
, pat
, callee
);
1515 fputs (s
.str().c_str(), asm_out_file
);
1518 for (int i
= 0, argno
= 0; i
< nargs
; i
++)
1520 rtx t
= XEXP (XVECEXP (pat
, 0, i
+ 1), 0);
1521 machine_mode mode
= GET_MODE (t
);
1522 int count
= maybe_split_mode (&mode
);
1525 fprintf (asm_out_file
, "\t\t.param%s %%out_arg%d%s;\n",
1526 nvptx_ptx_type_from_mode (mode
, false), argno
++,
1527 mode
== QImode
|| mode
== HImode
? "[1]" : "");
1529 for (int i
= 0, argno
= 0; i
< nargs
; i
++)
1531 rtx t
= XEXP (XVECEXP (pat
, 0, i
+ 1), 0);
1532 gcc_assert (REG_P (t
));
1533 machine_mode mode
= GET_MODE (t
);
1534 int count
= maybe_split_mode (&mode
);
1537 fprintf (asm_out_file
, "\t\tst.param%s [%%out_arg%d], %%r%d;\n",
1538 nvptx_ptx_type_from_mode (mode
, false), argno
++,
1544 fprintf (asm_out_file
, "\t\tst.param%s [%%out_arg%d], %%r%d$%d;\n",
1545 nvptx_ptx_type_from_mode (mode
, false), argno
++,
1550 fprintf (asm_out_file
, "\t\tcall ");
1551 if (result
!= NULL_RTX
)
1552 fprintf (asm_out_file
, "(%%retval_in), ");
1556 const char *name
= get_fnname_from_decl (decl
);
1557 name
= nvptx_name_replacement (name
);
1558 assemble_name (asm_out_file
, name
);
1561 output_address (callee
);
1563 if (nargs
> 0 || (decl
&& DECL_STATIC_CHAIN (decl
)))
1565 fprintf (asm_out_file
, ", (");
1567 for (i
= 0, argno
= 0; i
< nargs
; i
++)
1569 rtx t
= XEXP (XVECEXP (pat
, 0, i
+ 1), 0);
1570 machine_mode mode
= GET_MODE (t
);
1571 int count
= maybe_split_mode (&mode
);
1575 fprintf (asm_out_file
, "%%out_arg%d", argno
++);
1576 if (i
+ 1 < nargs
|| count
> 0)
1577 fprintf (asm_out_file
, ", ");
1580 if (decl
&& DECL_STATIC_CHAIN (decl
))
1583 fprintf (asm_out_file
, ", ");
1584 fprintf (asm_out_file
, "%s",
1585 reg_names
[OUTGOING_STATIC_CHAIN_REGNUM
]);
1588 fprintf (asm_out_file
, ")");
1592 fprintf (asm_out_file
, ", ");
1593 assemble_name (asm_out_file
, buf
);
1595 fprintf (asm_out_file
, ";\n");
1596 if (result
!= NULL_RTX
)
1597 return "ld.param%t0\t%0, [%%retval_in];\n\t}";
1602 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
1605 nvptx_print_operand_punct_valid_p (unsigned char c
)
1607 return c
== '.' || c
== '#';
1610 static void nvptx_print_operand (FILE *, rtx
, int);
1612 /* Subroutine of nvptx_print_operand; used to print a memory reference X to FILE. */
1615 nvptx_print_address_operand (FILE *file
, rtx x
, machine_mode
)
1618 if (GET_CODE (x
) == CONST
)
1620 switch (GET_CODE (x
))
1624 output_address (XEXP (x
, 0));
1625 fprintf (file
, "+");
1626 output_address (off
);
1631 output_addr_const (file
, x
);
1635 gcc_assert (GET_CODE (x
) != MEM
);
1636 nvptx_print_operand (file
, x
, 0);
1641 /* Write assembly language output for the address ADDR to FILE. */
1644 nvptx_print_operand_address (FILE *file
, rtx addr
)
1646 nvptx_print_address_operand (file
, addr
, VOIDmode
);
1649 /* Print an operand, X, to FILE, with an optional modifier in CODE.
1652 . -- print the predicate for the instruction or an emptry string for an
1654 # -- print a rounding mode for the instruction
1656 A -- print an address space identifier for a MEM
1657 c -- print an opcode suffix for a comparison operator, including a type code
1658 d -- print a CONST_INT as a vector dimension (x, y, or z)
1659 f -- print a full reg even for something that must always be split
1660 t -- print a type opcode suffix, promoting QImode to 32 bits
1661 T -- print a type size in bits
1662 u -- print a type opcode suffix without promotions. */
1665 nvptx_print_operand (FILE *file
, rtx x
, int code
)
1668 machine_mode op_mode
;
1672 x
= current_insn_predicate
;
1675 unsigned int regno
= REGNO (XEXP (x
, 0));
1677 if (GET_CODE (x
) == EQ
)
1679 fputs (reg_names
[regno
], file
);
1684 else if (code
== '#')
1686 fputs (".rn", file
);
1690 enum rtx_code x_code
= GET_CODE (x
);
1696 addr_space_t as
= nvptx_addr_space_from_address (XEXP (x
, 0));
1697 fputs (nvptx_section_from_addr_space (as
), file
);
1702 gcc_assert (x_code
== CONST_INT
);
1703 if (INTVAL (x
) == 0)
1705 else if (INTVAL (x
) == 1)
1707 else if (INTVAL (x
) == 2)
1714 op_mode
= nvptx_underlying_object_mode (x
);
1715 fprintf (file
, "%s", nvptx_ptx_type_from_mode (op_mode
, true));
1719 op_mode
= nvptx_underlying_object_mode (x
);
1720 fprintf (file
, "%s", nvptx_ptx_type_from_mode (op_mode
, false));
1724 fprintf (file
, "%d", GET_MODE_BITSIZE (GET_MODE (x
)));
1728 fprintf (file
, "@");
1732 fprintf (file
, "@!");
1736 op_mode
= GET_MODE (XEXP (x
, 0));
1740 fputs (".eq", file
);
1743 if (FLOAT_MODE_P (op_mode
))
1744 fputs (".neu", file
);
1746 fputs (".ne", file
);
1749 fputs (".le", file
);
1752 fputs (".ge", file
);
1755 fputs (".lt", file
);
1758 fputs (".gt", file
);
1761 fputs (".ls", file
);
1764 fputs (".hs", file
);
1767 fputs (".lo", file
);
1770 fputs (".hi", file
);
1773 fputs (".ne", file
);
1776 fputs (".equ", file
);
1779 fputs (".leu", file
);
1782 fputs (".geu", file
);
1785 fputs (".ltu", file
);
1788 fputs (".gtu", file
);
1791 fputs (".nan", file
);
1794 fputs (".num", file
);
1799 if (FLOAT_MODE_P (op_mode
)
1800 || x_code
== EQ
|| x_code
== NE
1801 || x_code
== GEU
|| x_code
== GTU
1802 || x_code
== LEU
|| x_code
== LTU
)
1803 fputs (nvptx_ptx_type_from_mode (op_mode
, true), file
);
1805 fprintf (file
, ".s%d", GET_MODE_BITSIZE (op_mode
));
1816 if (HARD_REGISTER_P (x
))
1817 fprintf (file
, "%s", reg_names
[REGNO (x
)]);
1819 fprintf (file
, "%%r%d", REGNO (x
));
1820 if (code
!= 'f' && nvptx_split_reg_p (GET_MODE (x
)))
1822 gcc_assert (GET_CODE (orig_x
) == SUBREG
1823 && !nvptx_split_reg_p (GET_MODE (orig_x
)));
1824 fprintf (file
, "$%d", SUBREG_BYTE (orig_x
) / UNITS_PER_WORD
);
1830 nvptx_print_address_operand (file
, XEXP (x
, 0), GET_MODE (x
));
1835 output_addr_const (file
, x
);
1841 /* We could use output_addr_const, but that can print things like
1842 "x-8", which breaks ptxas. Need to ensure it is output as
1844 nvptx_print_address_operand (file
, x
, VOIDmode
);
1849 REAL_VALUE_TYPE real
;
1850 REAL_VALUE_FROM_CONST_DOUBLE (real
, x
);
1851 real_to_target (vals
, &real
, GET_MODE (x
));
1852 vals
[0] &= 0xffffffff;
1853 vals
[1] &= 0xffffffff;
1854 if (GET_MODE (x
) == SFmode
)
1855 fprintf (file
, "0f%08lx", vals
[0]);
1857 fprintf (file
, "0d%08lx%08lx", vals
[1], vals
[0]);
1861 output_addr_const (file
, x
);
1866 /* Record replacement regs used to deal with subreg operands. */
1869 rtx replacement
[MAX_RECOG_OPERANDS
];
1875 /* Allocate or reuse a replacement in R and return the rtx. */
1878 get_replacement (struct reg_replace
*r
)
1880 if (r
->n_allocated
== r
->n_in_use
)
1881 r
->replacement
[r
->n_allocated
++] = gen_reg_rtx (r
->mode
);
1882 return r
->replacement
[r
->n_in_use
++];
1885 /* Clean up subreg operands. In ptx assembly, everything is typed, and
1886 the presence of subregs would break the rules for most instructions.
1887 Replace them with a suitable new register of the right size, plus
1888 conversion copyin/copyout instructions. */
1893 struct reg_replace qiregs
, hiregs
, siregs
, diregs
;
1894 rtx_insn
*insn
, *next
;
1896 /* We are freeing block_for_insn in the toplev to keep compatibility
1897 with old MDEP_REORGS that are not CFG based. Recompute it now. */
1898 compute_bb_for_insn ();
1900 df_clear_flags (DF_LR_RUN_DCE
);
1903 thread_prologue_and_epilogue_insns ();
1905 qiregs
.n_allocated
= 0;
1906 hiregs
.n_allocated
= 0;
1907 siregs
.n_allocated
= 0;
1908 diregs
.n_allocated
= 0;
1909 qiregs
.mode
= QImode
;
1910 hiregs
.mode
= HImode
;
1911 siregs
.mode
= SImode
;
1912 diregs
.mode
= DImode
;
1914 for (insn
= get_insns (); insn
; insn
= next
)
1916 next
= NEXT_INSN (insn
);
1917 if (!NONDEBUG_INSN_P (insn
)
1918 || asm_noperands (insn
) >= 0
1919 || GET_CODE (PATTERN (insn
)) == USE
1920 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
1922 qiregs
.n_in_use
= 0;
1923 hiregs
.n_in_use
= 0;
1924 siregs
.n_in_use
= 0;
1925 diregs
.n_in_use
= 0;
1926 extract_insn (insn
);
1927 enum attr_subregs_ok s_ok
= get_attr_subregs_ok (insn
);
1928 for (int i
= 0; i
< recog_data
.n_operands
; i
++)
1930 rtx op
= recog_data
.operand
[i
];
1931 if (GET_CODE (op
) != SUBREG
)
1934 rtx inner
= SUBREG_REG (op
);
1936 machine_mode outer_mode
= GET_MODE (op
);
1937 machine_mode inner_mode
= GET_MODE (inner
);
1940 && (GET_MODE_PRECISION (inner_mode
)
1941 >= GET_MODE_PRECISION (outer_mode
)))
1943 gcc_assert (SCALAR_INT_MODE_P (outer_mode
));
1944 struct reg_replace
*r
= (outer_mode
== QImode
? &qiregs
1945 : outer_mode
== HImode
? &hiregs
1946 : outer_mode
== SImode
? &siregs
1948 rtx new_reg
= get_replacement (r
);
1950 if (recog_data
.operand_type
[i
] != OP_OUT
)
1953 if (GET_MODE_PRECISION (inner_mode
)
1954 < GET_MODE_PRECISION (outer_mode
))
1959 rtx pat
= gen_rtx_SET (VOIDmode
, new_reg
,
1960 gen_rtx_fmt_e (code
, outer_mode
, inner
));
1961 emit_insn_before (pat
, insn
);
1964 if (recog_data
.operand_type
[i
] != OP_IN
)
1967 if (GET_MODE_PRECISION (inner_mode
)
1968 < GET_MODE_PRECISION (outer_mode
))
1973 rtx pat
= gen_rtx_SET (VOIDmode
, inner
,
1974 gen_rtx_fmt_e (code
, inner_mode
, new_reg
));
1975 emit_insn_after (pat
, insn
);
1977 validate_change (insn
, recog_data
.operand_loc
[i
], new_reg
, false);
1981 int maxregs
= max_reg_num ();
1982 regstat_init_n_sets_and_refs ();
1984 for (int i
= LAST_VIRTUAL_REGISTER
+ 1; i
< maxregs
; i
++)
1985 if (REG_N_SETS (i
) == 0 && REG_N_REFS (i
) == 0)
1986 regno_reg_rtx
[i
] = const0_rtx
;
1987 regstat_free_n_sets_and_refs ();
1990 /* Handle a "kernel" attribute; arguments as in
1991 struct attribute_spec.handler. */
1994 nvptx_handle_kernel_attribute (tree
*node
, tree name
, tree
ARG_UNUSED (args
),
1995 int ARG_UNUSED (flags
), bool *no_add_attrs
)
1999 if (TREE_CODE (decl
) != FUNCTION_DECL
)
2001 error ("%qE attribute only applies to functions", name
);
2002 *no_add_attrs
= true;
2005 else if (TREE_TYPE (TREE_TYPE (decl
)) != void_type_node
)
2007 error ("%qE attribute requires a void return type", name
);
2008 *no_add_attrs
= true;
2014 /* Table of valid machine attributes. */
2015 static const struct attribute_spec nvptx_attribute_table
[] =
2017 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
2018 affects_type_identity } */
2019 { "kernel", 0, 0, true, false, false, nvptx_handle_kernel_attribute
, false },
2020 { NULL
, 0, 0, false, false, false, NULL
, false }
2023 /* Limit vector alignments to BIGGEST_ALIGNMENT. */
2025 static HOST_WIDE_INT
2026 nvptx_vector_alignment (const_tree type
)
2028 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
2030 return MIN (align
, BIGGEST_ALIGNMENT
);
2033 /* Implement TARGET_ASM_FILE_START. Write the kinds of things ptxas expects
2034 at the start of a file. */
2037 nvptx_file_start (void)
2039 fputs ("// BEGIN PREAMBLE\n", asm_out_file
);
2040 fputs ("\t.version\t3.1\n", asm_out_file
);
2041 fputs ("\t.target\tsm_30\n", asm_out_file
);
2042 fprintf (asm_out_file
, "\t.address_size %d\n", GET_MODE_BITSIZE (Pmode
));
2043 fputs ("// END PREAMBLE\n", asm_out_file
);
2046 /* Write out the function declarations we've collected. */
2049 nvptx_file_end (void)
2051 hash_table
<tree_hasher
>::iterator iter
;
2053 FOR_EACH_HASH_TABLE_ELEMENT (*needed_fndecls_htab
, decl
, tree
, iter
)
2054 nvptx_record_fndecl (decl
, true);
2055 fputs (func_decls
.str().c_str(), asm_out_file
);
2058 #undef TARGET_OPTION_OVERRIDE
2059 #define TARGET_OPTION_OVERRIDE nvptx_option_override
2061 #undef TARGET_ATTRIBUTE_TABLE
2062 #define TARGET_ATTRIBUTE_TABLE nvptx_attribute_table
2064 #undef TARGET_LEGITIMATE_ADDRESS_P
2065 #define TARGET_LEGITIMATE_ADDRESS_P nvptx_legitimate_address_p
2067 #undef TARGET_PROMOTE_FUNCTION_MODE
2068 #define TARGET_PROMOTE_FUNCTION_MODE nvptx_promote_function_mode
2070 #undef TARGET_FUNCTION_ARG
2071 #define TARGET_FUNCTION_ARG nvptx_function_arg
2072 #undef TARGET_FUNCTION_INCOMING_ARG
2073 #define TARGET_FUNCTION_INCOMING_ARG nvptx_function_incoming_arg
2074 #undef TARGET_FUNCTION_ARG_ADVANCE
2075 #define TARGET_FUNCTION_ARG_ADVANCE nvptx_function_arg_advance
2076 #undef TARGET_FUNCTION_ARG_BOUNDARY
2077 #define TARGET_FUNCTION_ARG_BOUNDARY nvptx_function_arg_boundary
2078 #undef TARGET_FUNCTION_ARG_ROUND_BOUNDARY
2079 #define TARGET_FUNCTION_ARG_ROUND_BOUNDARY nvptx_function_arg_boundary
2080 #undef TARGET_PASS_BY_REFERENCE
2081 #define TARGET_PASS_BY_REFERENCE nvptx_pass_by_reference
2082 #undef TARGET_FUNCTION_VALUE_REGNO_P
2083 #define TARGET_FUNCTION_VALUE_REGNO_P nvptx_function_value_regno_p
2084 #undef TARGET_FUNCTION_VALUE
2085 #define TARGET_FUNCTION_VALUE nvptx_function_value
2086 #undef TARGET_LIBCALL_VALUE
2087 #define TARGET_LIBCALL_VALUE nvptx_libcall_value
2088 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
2089 #define TARGET_FUNCTION_OK_FOR_SIBCALL nvptx_function_ok_for_sibcall
2090 #undef TARGET_SPLIT_COMPLEX_ARG
2091 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
2092 #undef TARGET_RETURN_IN_MEMORY
2093 #define TARGET_RETURN_IN_MEMORY nvptx_return_in_memory
2094 #undef TARGET_OMIT_STRUCT_RETURN_REG
2095 #define TARGET_OMIT_STRUCT_RETURN_REG true
2096 #undef TARGET_STRICT_ARGUMENT_NAMING
2097 #define TARGET_STRICT_ARGUMENT_NAMING nvptx_strict_argument_naming
2098 #undef TARGET_STATIC_CHAIN
2099 #define TARGET_STATIC_CHAIN nvptx_static_chain
2101 #undef TARGET_CALL_ARGS
2102 #define TARGET_CALL_ARGS nvptx_call_args
2103 #undef TARGET_END_CALL_ARGS
2104 #define TARGET_END_CALL_ARGS nvptx_end_call_args
2106 #undef TARGET_ASM_FILE_START
2107 #define TARGET_ASM_FILE_START nvptx_file_start
2108 #undef TARGET_ASM_FILE_END
2109 #define TARGET_ASM_FILE_END nvptx_file_end
2110 #undef TARGET_ASM_GLOBALIZE_LABEL
2111 #define TARGET_ASM_GLOBALIZE_LABEL nvptx_globalize_label
2112 #undef TARGET_ASM_ASSEMBLE_UNDEFINED_DECL
2113 #define TARGET_ASM_ASSEMBLE_UNDEFINED_DECL nvptx_assemble_undefined_decl
2114 #undef TARGET_PRINT_OPERAND
2115 #define TARGET_PRINT_OPERAND nvptx_print_operand
2116 #undef TARGET_PRINT_OPERAND_ADDRESS
2117 #define TARGET_PRINT_OPERAND_ADDRESS nvptx_print_operand_address
2118 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
2119 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P nvptx_print_operand_punct_valid_p
2120 #undef TARGET_ASM_INTEGER
2121 #define TARGET_ASM_INTEGER nvptx_assemble_integer
2122 #undef TARGET_ASM_DECL_END
2123 #define TARGET_ASM_DECL_END nvptx_assemble_decl_end
2124 #undef TARGET_ASM_DECLARE_CONSTANT_NAME
2125 #define TARGET_ASM_DECLARE_CONSTANT_NAME nvptx_asm_declare_constant_name
2126 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
2127 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
2128 #undef TARGET_ASM_NEED_VAR_DECL_BEFORE_USE
2129 #define TARGET_ASM_NEED_VAR_DECL_BEFORE_USE true
2131 #undef TARGET_MACHINE_DEPENDENT_REORG
2132 #define TARGET_MACHINE_DEPENDENT_REORG nvptx_reorg
2133 #undef TARGET_NO_REGISTER_ALLOCATION
2134 #define TARGET_NO_REGISTER_ALLOCATION true
2136 #undef TARGET_VECTOR_ALIGNMENT
2137 #define TARGET_VECTOR_ALIGNMENT nvptx_vector_alignment
2139 struct gcc_target targetm
= TARGET_INITIALIZER
;
2141 #include "gt-nvptx.h"