]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/nvptx/nvptx.c
Move sqrt and cbrt simplifications to match.pd
[thirdparty/gcc.git] / gcc / config / nvptx / nvptx.c
CommitLineData
738f2522 1/* Target code for NVPTX.
5624e564 2 Copyright (C) 2014-2015 Free Software Foundation, Inc.
738f2522
BS
3 Contributed by Bernd Schmidt <bernds@codesourcery.com>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21#include "config.h"
3a4d1cb1 22#include <sstream>
738f2522
BS
23#include "system.h"
24#include "coretypes.h"
c7131fb2 25#include "backend.h"
9fdcd34e 26#include "cfghooks.h"
c7131fb2 27#include "tree.h"
738f2522 28#include "rtl.h"
c7131fb2 29#include "df.h"
40e23961 30#include "alias.h"
738f2522
BS
31#include "insn-flags.h"
32#include "output.h"
33#include "insn-attr.h"
34#include "insn-codes.h"
36566b39 35#include "flags.h"
36566b39
PK
36#include "insn-config.h"
37#include "expmed.h"
38#include "dojump.h"
39#include "explow.h"
40#include "calls.h"
41#include "emit-rtl.h"
42#include "varasm.h"
43#include "stmt.h"
738f2522
BS
44#include "expr.h"
45#include "regs.h"
46#include "optabs.h"
47#include "recog.h"
738f2522
BS
48#include "timevar.h"
49#include "tm_p.h"
50#include "tm-preds.h"
51#include "tm-constrs.h"
738f2522
BS
52#include "langhooks.h"
53#include "dbxout.h"
54#include "target.h"
738f2522 55#include "diagnostic.h"
738f2522
BS
56#include "cfgrtl.h"
57#include "stor-layout.h"
738f2522 58#include "builtins.h"
3e32ee19
NS
59#include "omp-low.h"
60#include "gomp-constants.h"
738f2522 61
994c5d85 62/* This file should be included last. */
d58627a0
RS
63#include "target-def.h"
64
738f2522
BS
65/* Record the function decls we've written, and the libfuncs and function
66 decls corresponding to them. */
67static std::stringstream func_decls;
f3dba894 68
6c907cff 69struct declared_libfunc_hasher : ggc_cache_ptr_hash<rtx_def>
f3dba894
TS
70{
71 static hashval_t hash (rtx x) { return htab_hash_pointer (x); }
72 static bool equal (rtx a, rtx b) { return a == b; }
73};
74
75static GTY((cache))
76 hash_table<declared_libfunc_hasher> *declared_libfuncs_htab;
77
6c907cff 78struct tree_hasher : ggc_cache_ptr_hash<tree_node>
f3dba894
TS
79{
80 static hashval_t hash (tree t) { return htab_hash_pointer (t); }
81 static bool equal (tree a, tree b) { return a == b; }
82};
83
84static GTY((cache)) hash_table<tree_hasher> *declared_fndecls_htab;
85static GTY((cache)) hash_table<tree_hasher> *needed_fndecls_htab;
738f2522
BS
86
87/* Allocate a new, cleared machine_function structure. */
88
89static struct machine_function *
90nvptx_init_machine_status (void)
91{
92 struct machine_function *p = ggc_cleared_alloc<machine_function> ();
93 p->ret_reg_mode = VOIDmode;
94 return p;
95}
96
97/* Implement TARGET_OPTION_OVERRIDE. */
98
99static void
100nvptx_option_override (void)
101{
102 init_machine_status = nvptx_init_machine_status;
103 /* Gives us a predictable order, which we need especially for variables. */
104 flag_toplevel_reorder = 1;
105 /* Assumes that it will see only hard registers. */
106 flag_var_tracking = 0;
f324806d
NS
107 write_symbols = NO_DEBUG;
108 debug_info_level = DINFO_LEVEL_NONE;
738f2522 109
f3dba894
TS
110 declared_fndecls_htab = hash_table<tree_hasher>::create_ggc (17);
111 needed_fndecls_htab = hash_table<tree_hasher>::create_ggc (17);
738f2522 112 declared_libfuncs_htab
f3dba894 113 = hash_table<declared_libfunc_hasher>::create_ggc (17);
738f2522
BS
114}
115
116/* Return the mode to be used when declaring a ptx object for OBJ.
117 For objects with subparts such as complex modes this is the mode
118 of the subpart. */
119
120machine_mode
121nvptx_underlying_object_mode (rtx obj)
122{
123 if (GET_CODE (obj) == SUBREG)
124 obj = SUBREG_REG (obj);
125 machine_mode mode = GET_MODE (obj);
126 if (mode == TImode)
127 return DImode;
128 if (COMPLEX_MODE_P (mode))
129 return GET_MODE_INNER (mode);
130 return mode;
131}
132
133/* Return a ptx type for MODE. If PROMOTE, then use .u32 for QImode to
134 deal with ptx ideosyncracies. */
135
136const char *
137nvptx_ptx_type_from_mode (machine_mode mode, bool promote)
138{
139 switch (mode)
140 {
141 case BLKmode:
142 return ".b8";
143 case BImode:
144 return ".pred";
145 case QImode:
146 if (promote)
147 return ".u32";
148 else
149 return ".u8";
150 case HImode:
151 return ".u16";
152 case SImode:
153 return ".u32";
154 case DImode:
155 return ".u64";
156
157 case SFmode:
158 return ".f32";
159 case DFmode:
160 return ".f64";
161
162 default:
163 gcc_unreachable ();
164 }
165}
166
167/* Return the number of pieces to use when dealing with a pseudo of *PMODE.
168 Alter *PMODE if we return a number greater than one. */
169
170static int
171maybe_split_mode (machine_mode *pmode)
172{
173 machine_mode mode = *pmode;
174
175 if (COMPLEX_MODE_P (mode))
176 {
177 *pmode = GET_MODE_INNER (mode);
178 return 2;
179 }
180 else if (mode == TImode)
181 {
182 *pmode = DImode;
183 return 2;
184 }
185 return 1;
186}
187
188/* Like maybe_split_mode, but only return whether or not the mode
189 needs to be split. */
190static bool
191nvptx_split_reg_p (machine_mode mode)
192{
193 if (COMPLEX_MODE_P (mode))
194 return true;
195 if (mode == TImode)
196 return true;
197 return false;
198}
199
200#define PASS_IN_REG_P(MODE, TYPE) \
201 ((GET_MODE_CLASS (MODE) == MODE_INT \
202 || GET_MODE_CLASS (MODE) == MODE_FLOAT \
203 || ((GET_MODE_CLASS (MODE) == MODE_COMPLEX_INT \
204 || GET_MODE_CLASS (MODE) == MODE_COMPLEX_FLOAT) \
205 && !AGGREGATE_TYPE_P (TYPE))) \
206 && (MODE) != TImode)
207
208#define RETURN_IN_REG_P(MODE) \
209 ((GET_MODE_CLASS (MODE) == MODE_INT \
210 || GET_MODE_CLASS (MODE) == MODE_FLOAT) \
211 && GET_MODE_SIZE (MODE) <= 8)
212\f
213/* Perform a mode promotion for a function argument with MODE. Return
214 the promoted mode. */
215
216static machine_mode
217arg_promotion (machine_mode mode)
218{
219 if (mode == QImode || mode == HImode)
220 return SImode;
221 return mode;
222}
223
224/* Write the declaration of a function arg of TYPE to S. I is the index
225 of the argument, MODE its mode. NO_ARG_TYPES is true if this is for
226 a decl with zero TYPE_ARG_TYPES, i.e. an old-style C decl. */
227
228static int
229write_one_arg (std::stringstream &s, tree type, int i, machine_mode mode,
230 bool no_arg_types)
231{
232 if (!PASS_IN_REG_P (mode, type))
233 mode = Pmode;
234
235 int count = maybe_split_mode (&mode);
236
237 if (count == 2)
238 {
239 write_one_arg (s, NULL_TREE, i, mode, false);
240 write_one_arg (s, NULL_TREE, i + 1, mode, false);
241 return i + 1;
242 }
243
244 if (no_arg_types && !AGGREGATE_TYPE_P (type))
245 {
246 if (mode == SFmode)
247 mode = DFmode;
248 mode = arg_promotion (mode);
249 }
250
251 if (i > 0)
252 s << ", ";
253 s << ".param" << nvptx_ptx_type_from_mode (mode, false) << " %in_ar"
254 << (i + 1) << (mode == QImode || mode == HImode ? "[1]" : "");
255 if (mode == BLKmode)
256 s << "[" << int_size_in_bytes (type) << "]";
257 return i;
258}
259
260/* Look for attributes in ATTRS that would indicate we must write a function
261 as a .entry kernel rather than a .func. Return true if one is found. */
262
263static bool
264write_as_kernel (tree attrs)
265{
266 return (lookup_attribute ("kernel", attrs) != NULL_TREE
267 || lookup_attribute ("omp target entrypoint", attrs) != NULL_TREE);
268}
269
ecf6e535
BS
270/* Write a function decl for DECL to S, where NAME is the name to be used.
271 This includes ptx .visible or .extern specifiers, .func or .kernel, and
272 argument and return types. */
738f2522
BS
273
274static void
275nvptx_write_function_decl (std::stringstream &s, const char *name, const_tree decl)
276{
277 tree fntype = TREE_TYPE (decl);
278 tree result_type = TREE_TYPE (fntype);
279 tree args = TYPE_ARG_TYPES (fntype);
280 tree attrs = DECL_ATTRIBUTES (decl);
281 bool kernel = write_as_kernel (attrs);
282 bool is_main = strcmp (name, "main") == 0;
283 bool args_from_decl = false;
284
285 /* We get:
286 NULL in TYPE_ARG_TYPES, for old-style functions
287 NULL in DECL_ARGUMENTS, for builtin functions without another
288 declaration.
289 So we have to pick the best one we have. */
290 if (args == 0)
291 {
292 args = DECL_ARGUMENTS (decl);
293 args_from_decl = true;
294 }
295
296 if (DECL_EXTERNAL (decl))
297 s << ".extern ";
298 else if (TREE_PUBLIC (decl))
299 s << ".visible ";
300
301 if (kernel)
302 s << ".entry ";
303 else
304 s << ".func ";
305
306 /* Declare the result. */
307 bool return_in_mem = false;
308 if (TYPE_MODE (result_type) != VOIDmode)
309 {
310 machine_mode mode = TYPE_MODE (result_type);
311 if (!RETURN_IN_REG_P (mode))
312 return_in_mem = true;
313 else
314 {
315 mode = arg_promotion (mode);
316 s << "(.param" << nvptx_ptx_type_from_mode (mode, false)
317 << " %out_retval)";
318 }
319 }
320
321 if (name[0] == '*')
322 s << (name + 1);
323 else
324 s << name;
325
326 /* Declare argument types. */
327 if ((args != NULL_TREE
1fe6befc
NS
328 && !(TREE_CODE (args) == TREE_LIST
329 && TREE_VALUE (args) == void_type_node))
738f2522
BS
330 || is_main
331 || return_in_mem
332 || DECL_STATIC_CHAIN (decl))
333 {
334 s << "(";
335 int i = 0;
336 bool any_args = false;
337 if (return_in_mem)
338 {
339 s << ".param.u" << GET_MODE_BITSIZE (Pmode) << " %in_ar1";
340 i++;
341 }
342 while (args != NULL_TREE)
343 {
344 tree type = args_from_decl ? TREE_TYPE (args) : TREE_VALUE (args);
345 machine_mode mode = TYPE_MODE (type);
346
347 if (mode != VOIDmode)
348 {
349 i = write_one_arg (s, type, i, mode,
350 TYPE_ARG_TYPES (fntype) == 0);
351 any_args = true;
352 i++;
353 }
354 args = TREE_CHAIN (args);
355 }
356 if (stdarg_p (fntype))
357 {
358 gcc_assert (i > 0);
359 s << ", .param.u" << GET_MODE_BITSIZE (Pmode) << " %in_argp";
360 }
361 if (DECL_STATIC_CHAIN (decl))
362 {
363 if (i > 0)
364 s << ", ";
365 s << ".reg.u" << GET_MODE_BITSIZE (Pmode)
366 << reg_names [STATIC_CHAIN_REGNUM];
367 }
368 if (!any_args && is_main)
369 s << ".param.u32 %argc, .param.u" << GET_MODE_BITSIZE (Pmode)
370 << " %argv";
371 s << ")";
372 }
373}
374
375/* Walk either ARGTYPES or ARGS if the former is null, and write out part of
376 the function header to FILE. If WRITE_COPY is false, write reg
377 declarations, otherwise write the copy from the incoming argument to that
378 reg. RETURN_IN_MEM indicates whether to start counting arg numbers at 1
379 instead of 0. */
380
381static void
382walk_args_for_param (FILE *file, tree argtypes, tree args, bool write_copy,
383 bool return_in_mem)
384{
385 int i;
386
387 bool args_from_decl = false;
388 if (argtypes == 0)
389 args_from_decl = true;
390 else
391 args = argtypes;
392
393 for (i = return_in_mem ? 1 : 0; args != NULL_TREE; args = TREE_CHAIN (args))
394 {
395 tree type = args_from_decl ? TREE_TYPE (args) : TREE_VALUE (args);
396 machine_mode mode = TYPE_MODE (type);
397
398 if (mode == VOIDmode)
399 break;
400
401 if (!PASS_IN_REG_P (mode, type))
402 mode = Pmode;
403
404 int count = maybe_split_mode (&mode);
405 if (count == 1)
406 {
407 if (argtypes == NULL && !AGGREGATE_TYPE_P (type))
408 {
409 if (mode == SFmode)
410 mode = DFmode;
411
412 }
738f2522 413 }
7373d132 414 mode = arg_promotion (mode);
738f2522
BS
415 while (count-- > 0)
416 {
417 i++;
418 if (write_copy)
419 fprintf (file, "\tld.param%s %%ar%d, [%%in_ar%d];\n",
7373d132 420 nvptx_ptx_type_from_mode (mode, false), i, i);
738f2522
BS
421 else
422 fprintf (file, "\t.reg%s %%ar%d;\n",
7373d132 423 nvptx_ptx_type_from_mode (mode, false), i);
738f2522
BS
424 }
425 }
426}
427
428/* Write a .func or .kernel declaration (not a definition) along with
429 a helper comment for use by ld. S is the stream to write to, DECL
430 the decl for the function with name NAME. */
431
432static void
433write_function_decl_and_comment (std::stringstream &s, const char *name, const_tree decl)
434{
435 s << "// BEGIN";
436 if (TREE_PUBLIC (decl))
437 s << " GLOBAL";
438 s << " FUNCTION DECL: ";
439 if (name[0] == '*')
440 s << (name + 1);
441 else
442 s << name;
443 s << "\n";
444 nvptx_write_function_decl (s, name, decl);
445 s << ";\n";
446}
447
448/* Check NAME for special function names and redirect them by returning a
449 replacement. This applies to malloc, free and realloc, for which we
450 want to use libgcc wrappers, and call, which triggers a bug in ptxas. */
451
452static const char *
453nvptx_name_replacement (const char *name)
454{
455 if (strcmp (name, "call") == 0)
456 return "__nvptx_call";
457 if (strcmp (name, "malloc") == 0)
458 return "__nvptx_malloc";
459 if (strcmp (name, "free") == 0)
460 return "__nvptx_free";
461 if (strcmp (name, "realloc") == 0)
462 return "__nvptx_realloc";
463 return name;
464}
465
466/* If DECL is a FUNCTION_DECL, check the hash table to see if we
467 already encountered it, and if not, insert it and write a ptx
468 declarations that will be output at the end of compilation. */
469
470static bool
471nvptx_record_fndecl (tree decl, bool force = false)
472{
473 if (decl == NULL_TREE || TREE_CODE (decl) != FUNCTION_DECL
474 || !DECL_EXTERNAL (decl))
475 return true;
476
477 if (!force && TYPE_ARG_TYPES (TREE_TYPE (decl)) == NULL_TREE)
478 return false;
479
f3dba894 480 tree *slot = declared_fndecls_htab->find_slot (decl, INSERT);
738f2522
BS
481 if (*slot == NULL)
482 {
483 *slot = decl;
484 const char *name = get_fnname_from_decl (decl);
485 name = nvptx_name_replacement (name);
486 write_function_decl_and_comment (func_decls, name, decl);
487 }
488 return true;
489}
490
491/* Record that we need to emit a ptx decl for DECL. Either do it now, or
492 record it for later in case we have no argument information at this
493 point. */
494
495void
496nvptx_record_needed_fndecl (tree decl)
497{
498 if (nvptx_record_fndecl (decl))
499 return;
500
f3dba894 501 tree *slot = needed_fndecls_htab->find_slot (decl, INSERT);
738f2522
BS
502 if (*slot == NULL)
503 *slot = decl;
504}
505
506/* Implement ASM_DECLARE_FUNCTION_NAME. Writes the start of a ptx
507 function, including local var decls and copies from the arguments to
508 local regs. */
509
510void
511nvptx_declare_function_name (FILE *file, const char *name, const_tree decl)
512{
513 tree fntype = TREE_TYPE (decl);
514 tree result_type = TREE_TYPE (fntype);
515
516 name = nvptx_name_replacement (name);
517
518 std::stringstream s;
519 write_function_decl_and_comment (s, name, decl);
520 s << "// BEGIN";
521 if (TREE_PUBLIC (decl))
522 s << " GLOBAL";
523 s << " FUNCTION DEF: ";
524
525 if (name[0] == '*')
526 s << (name + 1);
527 else
528 s << name;
529 s << "\n";
530
531 nvptx_write_function_decl (s, name, decl);
532 fprintf (file, "%s", s.str().c_str());
533
534 bool return_in_mem = false;
535 if (TYPE_MODE (result_type) != VOIDmode)
536 {
537 machine_mode mode = TYPE_MODE (result_type);
538 if (!RETURN_IN_REG_P (mode))
539 return_in_mem = true;
540 }
541
542 fprintf (file, "\n{\n");
543
544 /* Ensure all arguments that should live in a register have one
545 declared. We'll emit the copies below. */
546 walk_args_for_param (file, TYPE_ARG_TYPES (fntype), DECL_ARGUMENTS (decl),
547 false, return_in_mem);
548 if (return_in_mem)
549 fprintf (file, "\t.reg.u%d %%ar1;\n", GET_MODE_BITSIZE (Pmode));
550 else if (TYPE_MODE (result_type) != VOIDmode)
551 {
552 machine_mode mode = arg_promotion (TYPE_MODE (result_type));
ac952181 553 fprintf (file, "\t.reg%s %%retval;\n",
738f2522
BS
554 nvptx_ptx_type_from_mode (mode, false));
555 }
556
557 if (stdarg_p (fntype))
558 fprintf (file, "\t.reg.u%d %%argp;\n", GET_MODE_BITSIZE (Pmode));
559
560 fprintf (file, "\t.reg.u%d %s;\n", GET_MODE_BITSIZE (Pmode),
561 reg_names[OUTGOING_STATIC_CHAIN_REGNUM]);
562
563 /* Declare the pseudos we have as ptx registers. */
564 int maxregs = max_reg_num ();
565 for (int i = LAST_VIRTUAL_REGISTER + 1; i < maxregs; i++)
566 {
567 if (regno_reg_rtx[i] != const0_rtx)
568 {
569 machine_mode mode = PSEUDO_REGNO_MODE (i);
570 int count = maybe_split_mode (&mode);
571 if (count > 1)
572 {
573 while (count-- > 0)
574 fprintf (file, "\t.reg%s %%r%d$%d;\n",
575 nvptx_ptx_type_from_mode (mode, true),
576 i, count);
577 }
578 else
579 fprintf (file, "\t.reg%s %%r%d;\n",
580 nvptx_ptx_type_from_mode (mode, true),
581 i);
582 }
583 }
584
585 /* The only reason we might be using outgoing args is if we call a stdargs
586 function. Allocate the space for this. If we called varargs functions
587 without passing any variadic arguments, we'll see a reference to outargs
588 even with a zero outgoing_args_size. */
589 HOST_WIDE_INT sz = crtl->outgoing_args_size;
590 if (sz == 0)
591 sz = 1;
592 if (cfun->machine->has_call_with_varargs)
593 fprintf (file, "\t.reg.u%d %%outargs;\n"
16998094 594 "\t.local.align 8 .b8 %%outargs_ar[" HOST_WIDE_INT_PRINT_DEC"];\n",
738f2522
BS
595 BITS_PER_WORD, sz);
596 if (cfun->machine->punning_buffer_size > 0)
597 fprintf (file, "\t.reg.u%d %%punbuffer;\n"
598 "\t.local.align 8 .b8 %%punbuffer_ar[%d];\n",
599 BITS_PER_WORD, cfun->machine->punning_buffer_size);
600
601 /* Declare a local variable for the frame. */
602 sz = get_frame_size ();
603 if (sz > 0 || cfun->machine->has_call_with_sc)
604 {
18c05628
NS
605 int alignment = crtl->stack_alignment_needed / BITS_PER_UNIT;
606
738f2522 607 fprintf (file, "\t.reg.u%d %%frame;\n"
18c05628
NS
608 "\t.local.align %d .b8 %%farray[" HOST_WIDE_INT_PRINT_DEC"];\n",
609 BITS_PER_WORD, alignment, sz == 0 ? 1 : sz);
738f2522
BS
610 fprintf (file, "\tcvta.local.u%d %%frame, %%farray;\n",
611 BITS_PER_WORD);
612 }
613
614 if (cfun->machine->has_call_with_varargs)
615 fprintf (file, "\tcvta.local.u%d %%outargs, %%outargs_ar;\n",
616 BITS_PER_WORD);
617 if (cfun->machine->punning_buffer_size > 0)
618 fprintf (file, "\tcvta.local.u%d %%punbuffer, %%punbuffer_ar;\n",
619 BITS_PER_WORD);
620
621 /* Now emit any copies necessary for arguments. */
622 walk_args_for_param (file, TYPE_ARG_TYPES (fntype), DECL_ARGUMENTS (decl),
623 true, return_in_mem);
624 if (return_in_mem)
ac952181 625 fprintf (file, "\tld.param.u%d %%ar1, [%%in_ar1];\n",
738f2522
BS
626 GET_MODE_BITSIZE (Pmode));
627 if (stdarg_p (fntype))
ac952181 628 fprintf (file, "\tld.param.u%d %%argp, [%%in_argp];\n",
738f2522
BS
629 GET_MODE_BITSIZE (Pmode));
630}
631
632/* Output a return instruction. Also copy the return value to its outgoing
633 location. */
634
635const char *
636nvptx_output_return (void)
637{
638 tree fntype = TREE_TYPE (current_function_decl);
639 tree result_type = TREE_TYPE (fntype);
640 if (TYPE_MODE (result_type) != VOIDmode)
641 {
642 machine_mode mode = TYPE_MODE (result_type);
643 if (RETURN_IN_REG_P (mode))
644 {
645 mode = arg_promotion (mode);
646 fprintf (asm_out_file, "\tst.param%s\t[%%out_retval], %%retval;\n",
647 nvptx_ptx_type_from_mode (mode, false));
648 }
649 }
650
651 return "ret;";
652}
653
654/* Construct a function declaration from a call insn. This can be
655 necessary for two reasons - either we have an indirect call which
656 requires a .callprototype declaration, or we have a libcall
657 generated by emit_library_call for which no decl exists. */
658
659static void
660write_func_decl_from_insn (std::stringstream &s, rtx result, rtx pat,
661 rtx callee)
662{
663 bool callprototype = register_operand (callee, Pmode);
664 const char *name = "_";
665 if (!callprototype)
666 {
667 name = XSTR (callee, 0);
668 name = nvptx_name_replacement (name);
669 s << "// BEGIN GLOBAL FUNCTION DECL: " << name << "\n";
670 }
671 s << (callprototype ? "\t.callprototype\t" : "\t.extern .func ");
672
673 if (result != NULL_RTX)
674 {
675 s << "(.param";
676 s << nvptx_ptx_type_from_mode (arg_promotion (GET_MODE (result)),
677 false);
678 s << " ";
679 if (callprototype)
680 s << "_";
681 else
682 s << "%out_retval";
683 s << ")";
684 }
685
686 s << name;
687
f324806d
NS
688 int arg_end = XVECLEN (pat, 0);
689
690 if (1 < arg_end)
738f2522 691 {
f324806d 692 const char *comma = "";
738f2522 693 s << " (";
f324806d 694 for (int i = 1; i < arg_end; i++)
738f2522 695 {
f324806d 696 rtx t = XEXP (XVECEXP (pat, 0, i), 0);
738f2522
BS
697 machine_mode mode = GET_MODE (t);
698 int count = maybe_split_mode (&mode);
699
f324806d 700 while (count--)
738f2522 701 {
f324806d 702 s << comma << ".param";
738f2522
BS
703 s << nvptx_ptx_type_from_mode (mode, false);
704 s << " ";
705 if (callprototype)
706 s << "_";
707 else
f324806d 708 s << "%arg" << i - 1;
738f2522
BS
709 if (mode == QImode || mode == HImode)
710 s << "[1]";
f324806d 711 comma = ", ";
738f2522
BS
712 }
713 }
714 s << ")";
715 }
716 s << ";\n";
717}
718
719/* Terminate a function by writing a closing brace to FILE. */
720
721void
722nvptx_function_end (FILE *file)
723{
724 fprintf (file, "\t}\n");
725}
726\f
727/* Decide whether we can make a sibling call to a function. For ptx, we
728 can't. */
729
730static bool
731nvptx_function_ok_for_sibcall (tree, tree)
732{
733 return false;
734}
735
18c05628
NS
736/* Return Dynamic ReAlignment Pointer RTX. For PTX there isn't any. */
737
738static rtx
739nvptx_get_drap_rtx (void)
740{
741 return NULL_RTX;
742}
743
738f2522
BS
744/* Implement the TARGET_CALL_ARGS hook. Record information about one
745 argument to the next call. */
746
747static void
748nvptx_call_args (rtx arg, tree funtype)
749{
750 if (cfun->machine->start_call == NULL_RTX)
751 {
752 cfun->machine->call_args = NULL;
753 cfun->machine->funtype = funtype;
754 cfun->machine->start_call = const0_rtx;
755 }
756 if (arg == pc_rtx)
757 return;
758
759 rtx_expr_list *args_so_far = cfun->machine->call_args;
760 if (REG_P (arg))
761 cfun->machine->call_args = alloc_EXPR_LIST (VOIDmode, arg, args_so_far);
762}
763
764/* Implement the corresponding END_CALL_ARGS hook. Clear and free the
765 information we recorded. */
766
767static void
768nvptx_end_call_args (void)
769{
770 cfun->machine->start_call = NULL_RTX;
771 free_EXPR_LIST_list (&cfun->machine->call_args);
772}
773
ecf6e535
BS
774/* Emit the sequence for a call to ADDRESS, setting RETVAL. Keep
775 track of whether calls involving static chains or varargs were seen
776 in the current function.
777 For libcalls, maintain a hash table of decls we have seen, and
778 record a function decl for later when encountering a new one. */
738f2522
BS
779
780void
781nvptx_expand_call (rtx retval, rtx address)
782{
f324806d 783 int nargs = 0;
738f2522
BS
784 rtx callee = XEXP (address, 0);
785 rtx pat, t;
786 rtvec vec;
787 bool external_decl = false;
f324806d
NS
788 rtx varargs = NULL_RTX;
789 tree decl_type = NULL_TREE;
738f2522 790
738f2522
BS
791 for (t = cfun->machine->call_args; t; t = XEXP (t, 1))
792 nargs++;
793
738f2522
BS
794 if (!call_insn_operand (callee, Pmode))
795 {
796 callee = force_reg (Pmode, callee);
797 address = change_address (address, QImode, callee);
798 }
799
800 if (GET_CODE (callee) == SYMBOL_REF)
801 {
802 tree decl = SYMBOL_REF_DECL (callee);
803 if (decl != NULL_TREE)
804 {
805 decl_type = TREE_TYPE (decl);
806 if (DECL_STATIC_CHAIN (decl))
807 cfun->machine->has_call_with_sc = true;
808 if (DECL_EXTERNAL (decl))
809 external_decl = true;
810 }
811 }
c38f0d8c 812
738f2522
BS
813 if (cfun->machine->funtype
814 /* It's possible to construct testcases where we call a variable.
815 See compile/20020129-1.c. stdarg_p will crash so avoid calling it
816 in such a case. */
817 && (TREE_CODE (cfun->machine->funtype) == FUNCTION_TYPE
818 || TREE_CODE (cfun->machine->funtype) == METHOD_TYPE)
819 && stdarg_p (cfun->machine->funtype))
820 {
f324806d 821 varargs = gen_reg_rtx (Pmode);
738f2522 822 if (Pmode == DImode)
f324806d 823 emit_move_insn (varargs, stack_pointer_rtx);
738f2522 824 else
f324806d
NS
825 emit_move_insn (varargs, stack_pointer_rtx);
826 cfun->machine->has_call_with_varargs = true;
738f2522 827 }
f324806d
NS
828 vec = rtvec_alloc (nargs + 1 + (varargs ? 1 : 0));
829 pat = gen_rtx_PARALLEL (VOIDmode, vec);
738f2522 830
f324806d
NS
831 int vec_pos = 0;
832
738f2522
BS
833 rtx tmp_retval = retval;
834 t = gen_rtx_CALL (VOIDmode, address, const0_rtx);
835 if (retval != NULL_RTX)
836 {
837 if (!nvptx_register_operand (retval, GET_MODE (retval)))
838 tmp_retval = gen_reg_rtx (GET_MODE (retval));
f7df4a84 839 t = gen_rtx_SET (tmp_retval, t);
738f2522 840 }
f324806d
NS
841 XVECEXP (pat, 0, vec_pos++) = t;
842
843 /* Construct the call insn, including a USE for each argument pseudo
844 register. These will be used when printing the insn. */
845 for (rtx arg = cfun->machine->call_args; arg; arg = XEXP (arg, 1))
846 {
847 rtx this_arg = XEXP (arg, 0);
848 XVECEXP (pat, 0, vec_pos++) = gen_rtx_USE (VOIDmode, this_arg);
849 }
850
851 if (varargs)
852 XVECEXP (pat, 0, vec_pos++) = gen_rtx_USE (VOIDmode, varargs);
853
854 gcc_assert (vec_pos = XVECLEN (pat, 0));
ecf6e535
BS
855
856 /* If this is a libcall, decl_type is NULL. For a call to a non-libcall
857 undeclared function, we'll have an external decl without arg types.
858 In either case we have to try to construct a ptx declaration from one of
859 the calls to the function. */
738f2522
BS
860 if (!REG_P (callee)
861 && (decl_type == NULL_TREE
862 || (external_decl && TYPE_ARG_TYPES (decl_type) == NULL_TREE)))
863 {
f3dba894 864 rtx *slot = declared_libfuncs_htab->find_slot (callee, INSERT);
738f2522
BS
865 if (*slot == NULL)
866 {
867 *slot = callee;
868 write_func_decl_from_insn (func_decls, retval, pat, callee);
869 }
870 }
871 emit_call_insn (pat);
872 if (tmp_retval != retval)
873 emit_move_insn (retval, tmp_retval);
874}
875
876/* Implement TARGET_FUNCTION_ARG. */
877
878static rtx
879nvptx_function_arg (cumulative_args_t, machine_mode mode,
880 const_tree, bool named)
881{
882 if (mode == VOIDmode)
883 return NULL_RTX;
884
885 if (named)
886 return gen_reg_rtx (mode);
887 return NULL_RTX;
888}
889
890/* Implement TARGET_FUNCTION_INCOMING_ARG. */
891
892static rtx
893nvptx_function_incoming_arg (cumulative_args_t cum_v, machine_mode mode,
894 const_tree, bool named)
895{
896 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
897 if (mode == VOIDmode)
898 return NULL_RTX;
899
900 if (!named)
901 return NULL_RTX;
902
903 /* No need to deal with split modes here, the only case that can
904 happen is complex modes and those are dealt with by
905 TARGET_SPLIT_COMPLEX_ARG. */
906 return gen_rtx_UNSPEC (mode,
907 gen_rtvec (1, GEN_INT (1 + cum->count)),
908 UNSPEC_ARG_REG);
909}
910
911/* Implement TARGET_FUNCTION_ARG_ADVANCE. */
912
913static void
914nvptx_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
915 const_tree type ATTRIBUTE_UNUSED,
916 bool named ATTRIBUTE_UNUSED)
917{
918 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
919 if (mode == TImode)
920 cum->count += 2;
921 else
922 cum->count++;
923}
924
925/* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook.
926
927 For nvptx, we know how to handle functions declared as stdarg: by
928 passing an extra pointer to the unnamed arguments. However, the
929 Fortran frontend can produce a different situation, where a
930 function pointer is declared with no arguments, but the actual
931 function and calls to it take more arguments. In that case, we
932 want to ensure the call matches the definition of the function. */
933
934static bool
935nvptx_strict_argument_naming (cumulative_args_t cum_v)
936{
937 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
938 return cum->fntype == NULL_TREE || stdarg_p (cum->fntype);
939}
940
941/* Implement TARGET_FUNCTION_ARG_BOUNDARY. */
942
943static unsigned int
944nvptx_function_arg_boundary (machine_mode mode, const_tree type)
945{
946 unsigned int boundary = type ? TYPE_ALIGN (type) : GET_MODE_BITSIZE (mode);
947
948 if (boundary > BITS_PER_WORD)
949 return 2 * BITS_PER_WORD;
950
951 if (mode == BLKmode)
952 {
953 HOST_WIDE_INT size = int_size_in_bytes (type);
954 if (size > 4)
955 return 2 * BITS_PER_WORD;
956 if (boundary < BITS_PER_WORD)
957 {
958 if (size >= 3)
959 return BITS_PER_WORD;
960 if (size >= 2)
961 return 2 * BITS_PER_UNIT;
962 }
963 }
964 return boundary;
965}
966
967/* TARGET_FUNCTION_VALUE implementation. Returns an RTX representing the place
968 where function FUNC returns or receives a value of data type TYPE. */
969
970static rtx
971nvptx_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED,
972 bool outgoing)
973{
974 int unsignedp = TYPE_UNSIGNED (type);
975 machine_mode orig_mode = TYPE_MODE (type);
976 machine_mode mode = promote_function_mode (type, orig_mode,
977 &unsignedp, NULL_TREE, 1);
978 if (outgoing)
979 return gen_rtx_REG (mode, NVPTX_RETURN_REGNUM);
980 if (cfun->machine->start_call == NULL_RTX)
981 /* Pretend to return in a hard reg for early uses before pseudos can be
982 generated. */
983 return gen_rtx_REG (mode, NVPTX_RETURN_REGNUM);
984 return gen_reg_rtx (mode);
985}
986
987/* Implement TARGET_LIBCALL_VALUE. */
988
989static rtx
990nvptx_libcall_value (machine_mode mode, const_rtx)
991{
992 if (cfun->machine->start_call == NULL_RTX)
993 /* Pretend to return in a hard reg for early uses before pseudos can be
994 generated. */
995 return gen_rtx_REG (mode, NVPTX_RETURN_REGNUM);
996 return gen_reg_rtx (mode);
997}
998
999/* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
1000
1001static bool
1002nvptx_function_value_regno_p (const unsigned int regno)
1003{
1004 return regno == NVPTX_RETURN_REGNUM;
1005}
1006
1007/* Types with a mode other than those supported by the machine are passed by
1008 reference in memory. */
1009
1010static bool
1011nvptx_pass_by_reference (cumulative_args_t, machine_mode mode,
1012 const_tree type, bool)
1013{
1014 return !PASS_IN_REG_P (mode, type);
1015}
1016
1017/* Implement TARGET_RETURN_IN_MEMORY. */
1018
1019static bool
1020nvptx_return_in_memory (const_tree type, const_tree)
1021{
1022 machine_mode mode = TYPE_MODE (type);
1023 if (!RETURN_IN_REG_P (mode))
1024 return true;
1025 return false;
1026}
1027
1028/* Implement TARGET_PROMOTE_FUNCTION_MODE. */
1029
1030static machine_mode
1031nvptx_promote_function_mode (const_tree type, machine_mode mode,
1032 int *punsignedp,
1033 const_tree funtype, int for_return)
1034{
1035 if (type == NULL_TREE)
1036 return mode;
1037 if (for_return)
1038 return promote_mode (type, mode, punsignedp);
1039 /* For K&R-style functions, try to match the language promotion rules to
1040 minimize type mismatches at assembly time. */
1041 if (TYPE_ARG_TYPES (funtype) == NULL_TREE
1042 && type != NULL_TREE
1043 && !AGGREGATE_TYPE_P (type))
1044 {
1045 if (mode == SFmode)
1046 mode = DFmode;
1047 mode = arg_promotion (mode);
1048 }
1049
1050 return mode;
1051}
1052
1053/* Implement TARGET_STATIC_CHAIN. */
1054
1055static rtx
1056nvptx_static_chain (const_tree fndecl, bool incoming_p)
1057{
1058 if (!DECL_STATIC_CHAIN (fndecl))
1059 return NULL;
1060
1061 if (incoming_p)
1062 return gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
1063 else
1064 return gen_rtx_REG (Pmode, OUTGOING_STATIC_CHAIN_REGNUM);
1065}
1066\f
1067/* Emit a comparison COMPARE, and return the new test to be used in the
1068 jump. */
1069
1070rtx
1071nvptx_expand_compare (rtx compare)
1072{
1073 rtx pred = gen_reg_rtx (BImode);
1074 rtx cmp = gen_rtx_fmt_ee (GET_CODE (compare), BImode,
1075 XEXP (compare, 0), XEXP (compare, 1));
f7df4a84 1076 emit_insn (gen_rtx_SET (pred, cmp));
738f2522
BS
1077 return gen_rtx_NE (BImode, pred, const0_rtx);
1078}
1079
1080/* When loading an operand ORIG_OP, verify whether an address space
1081 conversion to generic is required, and if so, perform it. Also
1082 check for SYMBOL_REFs for function decls and call
1083 nvptx_record_needed_fndecl as needed.
1084 Return either the original operand, or the converted one. */
1085
1086rtx
1087nvptx_maybe_convert_symbolic_operand (rtx orig_op)
1088{
1089 if (GET_MODE (orig_op) != Pmode)
1090 return orig_op;
1091
1092 rtx op = orig_op;
1093 while (GET_CODE (op) == PLUS || GET_CODE (op) == CONST)
1094 op = XEXP (op, 0);
1095 if (GET_CODE (op) != SYMBOL_REF)
1096 return orig_op;
1097
1098 tree decl = SYMBOL_REF_DECL (op);
1099 if (decl && TREE_CODE (decl) == FUNCTION_DECL)
1100 {
1101 nvptx_record_needed_fndecl (decl);
1102 return orig_op;
1103 }
1104
1105 addr_space_t as = nvptx_addr_space_from_address (op);
1106 if (as == ADDR_SPACE_GENERIC)
1107 return orig_op;
1108
1109 enum unspec code;
1110 code = (as == ADDR_SPACE_GLOBAL ? UNSPEC_FROM_GLOBAL
1111 : as == ADDR_SPACE_LOCAL ? UNSPEC_FROM_LOCAL
1112 : as == ADDR_SPACE_SHARED ? UNSPEC_FROM_SHARED
1113 : as == ADDR_SPACE_CONST ? UNSPEC_FROM_CONST
1114 : UNSPEC_FROM_PARAM);
1115 rtx dest = gen_reg_rtx (Pmode);
f7df4a84
RS
1116 emit_insn (gen_rtx_SET (dest, gen_rtx_UNSPEC (Pmode, gen_rtvec (1, orig_op),
1117 code)));
738f2522
BS
1118 return dest;
1119}
1120\f
1121/* Returns true if X is a valid address for use in a memory reference. */
1122
1123static bool
1124nvptx_legitimate_address_p (machine_mode, rtx x, bool)
1125{
1126 enum rtx_code code = GET_CODE (x);
1127
1128 switch (code)
1129 {
1130 case REG:
1131 return true;
1132
1133 case PLUS:
1134 if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1135 return true;
1136 return false;
1137
1138 case CONST:
1139 case SYMBOL_REF:
1140 case LABEL_REF:
1141 return true;
1142
1143 default:
1144 return false;
1145 }
1146}
1147
1148/* Implement HARD_REGNO_MODE_OK. We barely use hard regs, but we want
1149 to ensure that the return register's mode isn't changed. */
1150
1151bool
1152nvptx_hard_regno_mode_ok (int regno, machine_mode mode)
1153{
1154 if (regno != NVPTX_RETURN_REGNUM
1155 || cfun == NULL || cfun->machine->ret_reg_mode == VOIDmode)
1156 return true;
1157 return mode == cfun->machine->ret_reg_mode;
1158}
1159\f
1160/* Convert an address space AS to the corresponding ptx string. */
1161
1162const char *
1163nvptx_section_from_addr_space (addr_space_t as)
1164{
1165 switch (as)
1166 {
1167 case ADDR_SPACE_CONST:
1168 return ".const";
1169
1170 case ADDR_SPACE_GLOBAL:
1171 return ".global";
1172
1173 case ADDR_SPACE_SHARED:
1174 return ".shared";
1175
1176 case ADDR_SPACE_GENERIC:
1177 return "";
1178
1179 default:
1180 gcc_unreachable ();
1181 }
1182}
1183
1184/* Determine whether DECL goes into .const or .global. */
1185
1186const char *
1187nvptx_section_for_decl (const_tree decl)
1188{
1189 bool is_const = (CONSTANT_CLASS_P (decl)
1190 || TREE_CODE (decl) == CONST_DECL
1191 || TREE_READONLY (decl));
1192 if (is_const)
1193 return ".const";
1194
1195 return ".global";
1196}
1197
1198/* Look for a SYMBOL_REF in ADDR and return the address space to be used
1199 for the insn referencing this address. */
1200
1201addr_space_t
1202nvptx_addr_space_from_address (rtx addr)
1203{
1204 while (GET_CODE (addr) == PLUS || GET_CODE (addr) == CONST)
1205 addr = XEXP (addr, 0);
1206 if (GET_CODE (addr) != SYMBOL_REF)
1207 return ADDR_SPACE_GENERIC;
1208
1209 tree decl = SYMBOL_REF_DECL (addr);
1210 if (decl == NULL_TREE || TREE_CODE (decl) == FUNCTION_DECL)
1211 return ADDR_SPACE_GENERIC;
1212
1213 bool is_const = (CONSTANT_CLASS_P (decl)
1214 || TREE_CODE (decl) == CONST_DECL
1215 || TREE_READONLY (decl));
1216 if (is_const)
1217 return ADDR_SPACE_CONST;
1218
1219 return ADDR_SPACE_GLOBAL;
1220}
1221\f
ecf6e535
BS
1222/* Machinery to output constant initializers. When beginning an initializer,
1223 we decide on a chunk size (which is visible in ptx in the type used), and
1224 then all initializer data is buffered until a chunk is filled and ready to
1225 be written out. */
738f2522
BS
1226
1227/* Used when assembling integers to ensure data is emitted in
1228 pieces whose size matches the declaration we printed. */
1229static unsigned int decl_chunk_size;
1230static machine_mode decl_chunk_mode;
1231/* Used in the same situation, to keep track of the byte offset
1232 into the initializer. */
1233static unsigned HOST_WIDE_INT decl_offset;
1234/* The initializer part we are currently processing. */
1235static HOST_WIDE_INT init_part;
1236/* The total size of the object. */
1237static unsigned HOST_WIDE_INT object_size;
1238/* True if we found a skip extending to the end of the object. Used to
1239 assert that no data follows. */
1240static bool object_finished;
1241
1242/* Write the necessary separator string to begin a new initializer value. */
1243
1244static void
1245begin_decl_field (void)
1246{
1247 /* We never see decl_offset at zero by the time we get here. */
1248 if (decl_offset == decl_chunk_size)
1249 fprintf (asm_out_file, " = { ");
1250 else
1251 fprintf (asm_out_file, ", ");
1252}
1253
1254/* Output the currently stored chunk as an initializer value. */
1255
1256static void
1257output_decl_chunk (void)
1258{
1259 begin_decl_field ();
1260 output_address (gen_int_mode (init_part, decl_chunk_mode));
1261 init_part = 0;
1262}
1263
1264/* Add value VAL sized SIZE to the data we're emitting, and keep writing
1265 out chunks as they fill up. */
1266
1267static void
1268nvptx_assemble_value (HOST_WIDE_INT val, unsigned int size)
1269{
1270 unsigned HOST_WIDE_INT chunk_offset = decl_offset % decl_chunk_size;
1271 gcc_assert (!object_finished);
1272 while (size > 0)
1273 {
1274 int this_part = size;
1275 if (chunk_offset + this_part > decl_chunk_size)
1276 this_part = decl_chunk_size - chunk_offset;
1277 HOST_WIDE_INT val_part;
1278 HOST_WIDE_INT mask = 2;
1279 mask <<= this_part * BITS_PER_UNIT - 1;
1280 val_part = val & (mask - 1);
1281 init_part |= val_part << (BITS_PER_UNIT * chunk_offset);
1282 val >>= BITS_PER_UNIT * this_part;
1283 size -= this_part;
1284 decl_offset += this_part;
1285 if (decl_offset % decl_chunk_size == 0)
1286 output_decl_chunk ();
1287
1288 chunk_offset = 0;
1289 }
1290}
1291
1292/* Target hook for assembling integer object X of size SIZE. */
1293
1294static bool
1295nvptx_assemble_integer (rtx x, unsigned int size, int ARG_UNUSED (aligned_p))
1296{
1297 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST)
1298 {
1299 gcc_assert (size = decl_chunk_size);
1300 if (decl_offset % decl_chunk_size != 0)
1301 sorry ("cannot emit unaligned pointers in ptx assembly");
1302 decl_offset += size;
1303 begin_decl_field ();
1304
1305 HOST_WIDE_INT off = 0;
1306 if (GET_CODE (x) == CONST)
1307 x = XEXP (x, 0);
1308 if (GET_CODE (x) == PLUS)
1309 {
1310 off = INTVAL (XEXP (x, 1));
1311 x = XEXP (x, 0);
1312 }
1313 if (GET_CODE (x) == SYMBOL_REF)
1314 {
1315 nvptx_record_needed_fndecl (SYMBOL_REF_DECL (x));
1316 fprintf (asm_out_file, "generic(");
1317 output_address (x);
1318 fprintf (asm_out_file, ")");
1319 }
1320 if (off != 0)
1321 fprintf (asm_out_file, " + " HOST_WIDE_INT_PRINT_DEC, off);
1322 return true;
1323 }
1324
1325 HOST_WIDE_INT val;
1326 switch (GET_CODE (x))
1327 {
1328 case CONST_INT:
1329 val = INTVAL (x);
1330 break;
1331 case CONST_DOUBLE:
1332 gcc_unreachable ();
1333 break;
1334 default:
1335 gcc_unreachable ();
1336 }
1337
1338 nvptx_assemble_value (val, size);
1339 return true;
1340}
1341
1342/* Output SIZE zero bytes. We ignore the FILE argument since the
1343 functions we're calling to perform the output just use
1344 asm_out_file. */
1345
1346void
1347nvptx_output_skip (FILE *, unsigned HOST_WIDE_INT size)
1348{
1349 if (decl_offset + size >= object_size)
1350 {
1351 if (decl_offset % decl_chunk_size != 0)
1352 nvptx_assemble_value (0, decl_chunk_size);
1353 object_finished = true;
1354 return;
1355 }
1356
1357 while (size > decl_chunk_size)
1358 {
1359 nvptx_assemble_value (0, decl_chunk_size);
1360 size -= decl_chunk_size;
1361 }
1362 while (size-- > 0)
1363 nvptx_assemble_value (0, 1);
1364}
1365
1366/* Output a string STR with length SIZE. As in nvptx_output_skip we
1367 ignore the FILE arg. */
1368
1369void
1370nvptx_output_ascii (FILE *, const char *str, unsigned HOST_WIDE_INT size)
1371{
1372 for (unsigned HOST_WIDE_INT i = 0; i < size; i++)
1373 nvptx_assemble_value (str[i], 1);
1374}
1375
1376/* Called when the initializer for a decl has been completely output through
1377 combinations of the three functions above. */
1378
1379static void
1380nvptx_assemble_decl_end (void)
1381{
1382 if (decl_offset != 0)
1383 {
1384 if (!object_finished && decl_offset % decl_chunk_size != 0)
1385 nvptx_assemble_value (0, decl_chunk_size);
1386
1387 fprintf (asm_out_file, " }");
1388 }
1389 fprintf (asm_out_file, ";\n");
1390}
1391
1392/* Start a declaration of a variable of TYPE with NAME to
1393 FILE. IS_PUBLIC says whether this will be externally visible.
1394 Here we just write the linker hint and decide on the chunk size
1395 to use. */
1396
1397static void
1398init_output_initializer (FILE *file, const char *name, const_tree type,
1399 bool is_public)
1400{
1401 fprintf (file, "// BEGIN%s VAR DEF: ", is_public ? " GLOBAL" : "");
1402 assemble_name_raw (file, name);
1403 fputc ('\n', file);
1404
1405 if (TREE_CODE (type) == ARRAY_TYPE)
1406 type = TREE_TYPE (type);
1407 int sz = int_size_in_bytes (type);
1408 if ((TREE_CODE (type) != INTEGER_TYPE
1409 && TREE_CODE (type) != ENUMERAL_TYPE
1410 && TREE_CODE (type) != REAL_TYPE)
1411 || sz < 0
1412 || sz > HOST_BITS_PER_WIDE_INT)
1413 type = ptr_type_node;
1414 decl_chunk_size = int_size_in_bytes (type);
1415 decl_chunk_mode = int_mode_for_mode (TYPE_MODE (type));
1416 decl_offset = 0;
1417 init_part = 0;
1418 object_finished = false;
1419}
1420
1421/* Implement TARGET_ASM_DECLARE_CONSTANT_NAME. Begin the process of
1422 writing a constant variable EXP with NAME and SIZE and its
1423 initializer to FILE. */
1424
1425static void
1426nvptx_asm_declare_constant_name (FILE *file, const char *name,
1427 const_tree exp, HOST_WIDE_INT size)
1428{
1429 tree type = TREE_TYPE (exp);
1430 init_output_initializer (file, name, type, false);
1431 fprintf (file, "\t.const .align %d .u%d ",
1432 TYPE_ALIGN (TREE_TYPE (exp)) / BITS_PER_UNIT,
1433 decl_chunk_size * BITS_PER_UNIT);
1434 assemble_name (file, name);
1435 fprintf (file, "[" HOST_WIDE_INT_PRINT_DEC "]",
1436 (size + decl_chunk_size - 1) / decl_chunk_size);
1437 object_size = size;
1438}
1439
1440/* Implement the ASM_DECLARE_OBJECT_NAME macro. Used to start writing
1441 a variable DECL with NAME to FILE. */
1442
1443void
1444nvptx_declare_object_name (FILE *file, const char *name, const_tree decl)
1445{
1446 if (decl && DECL_SIZE (decl))
1447 {
1448 tree type = TREE_TYPE (decl);
1449 unsigned HOST_WIDE_INT size;
1450
1451 init_output_initializer (file, name, type, TREE_PUBLIC (decl));
1452 size = tree_to_uhwi (DECL_SIZE_UNIT (decl));
1453 const char *section = nvptx_section_for_decl (decl);
1454 fprintf (file, "\t%s%s .align %d .u%d ",
1455 TREE_PUBLIC (decl) ? " .visible" : "", section,
1456 DECL_ALIGN (decl) / BITS_PER_UNIT,
1457 decl_chunk_size * BITS_PER_UNIT);
1458 assemble_name (file, name);
1459 if (size > 0)
1460 fprintf (file, "[" HOST_WIDE_INT_PRINT_DEC "]",
1461 (size + decl_chunk_size - 1) / decl_chunk_size);
1462 else
1463 object_finished = true;
1464 object_size = size;
1465 }
1466}
1467
1468/* Implement TARGET_ASM_GLOBALIZE_LABEL by doing nothing. */
1469
1470static void
1471nvptx_globalize_label (FILE *, const char *)
1472{
1473}
1474
1475/* Implement TARGET_ASM_ASSEMBLE_UNDEFINED_DECL. Write an extern
1476 declaration only for variable DECL with NAME to FILE. */
1477static void
1478nvptx_assemble_undefined_decl (FILE *file, const char *name, const_tree decl)
1479{
1480 if (TREE_CODE (decl) != VAR_DECL)
1481 return;
1482 const char *section = nvptx_section_for_decl (decl);
1483 fprintf (file, "// BEGIN%s VAR DECL: ", TREE_PUBLIC (decl) ? " GLOBAL" : "");
1484 assemble_name_raw (file, name);
1485 fputs ("\n", file);
1486 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
1487 fprintf (file, ".extern %s .b8 ", section);
1488 assemble_name_raw (file, name);
1489 if (size > 0)
16998094 1490 fprintf (file, "[" HOST_WIDE_INT_PRINT_DEC"]", size);
738f2522
BS
1491 fprintf (file, ";\n\n");
1492}
1493
1494/* Output INSN, which is a call to CALLEE with result RESULT. For ptx, this
ecf6e535
BS
1495 involves writing .param declarations and in/out copies into them. For
1496 indirect calls, also write the .callprototype. */
738f2522
BS
1497
1498const char *
1499nvptx_output_call_insn (rtx_insn *insn, rtx result, rtx callee)
1500{
1501 char buf[256];
1502 static int labelno;
1503 bool needs_tgt = register_operand (callee, Pmode);
1504 rtx pat = PATTERN (insn);
f324806d 1505 int arg_end = XVECLEN (pat, 0);
738f2522
BS
1506 tree decl = NULL_TREE;
1507
1508 fprintf (asm_out_file, "\t{\n");
1509 if (result != NULL)
f324806d
NS
1510 fprintf (asm_out_file, "\t\t.param%s %%retval_in;\n",
1511 nvptx_ptx_type_from_mode (arg_promotion (GET_MODE (result)),
1512 false));
738f2522 1513
ecf6e535 1514 /* Ensure we have a ptx declaration in the output if necessary. */
738f2522
BS
1515 if (GET_CODE (callee) == SYMBOL_REF)
1516 {
1517 decl = SYMBOL_REF_DECL (callee);
1518 if (decl && DECL_EXTERNAL (decl))
1519 nvptx_record_fndecl (decl);
1520 }
1521
1522 if (needs_tgt)
1523 {
1524 ASM_GENERATE_INTERNAL_LABEL (buf, "LCT", labelno);
1525 labelno++;
1526 ASM_OUTPUT_LABEL (asm_out_file, buf);
1527 std::stringstream s;
1528 write_func_decl_from_insn (s, result, pat, callee);
1529 fputs (s.str().c_str(), asm_out_file);
1530 }
1531
f324806d 1532 for (int i = 1, argno = 0; i < arg_end; i++)
738f2522 1533 {
f324806d 1534 rtx t = XEXP (XVECEXP (pat, 0, i), 0);
738f2522
BS
1535 machine_mode mode = GET_MODE (t);
1536 int count = maybe_split_mode (&mode);
1537
f324806d 1538 while (count--)
738f2522
BS
1539 fprintf (asm_out_file, "\t\t.param%s %%out_arg%d%s;\n",
1540 nvptx_ptx_type_from_mode (mode, false), argno++,
1541 mode == QImode || mode == HImode ? "[1]" : "");
1542 }
f324806d 1543 for (int i = 1, argno = 0; i < arg_end; i++)
738f2522 1544 {
f324806d 1545 rtx t = XEXP (XVECEXP (pat, 0, i), 0);
738f2522
BS
1546 gcc_assert (REG_P (t));
1547 machine_mode mode = GET_MODE (t);
1548 int count = maybe_split_mode (&mode);
1549
1550 if (count == 1)
1551 fprintf (asm_out_file, "\t\tst.param%s [%%out_arg%d], %%r%d;\n",
1552 nvptx_ptx_type_from_mode (mode, false), argno++,
1553 REGNO (t));
1554 else
1555 {
1556 int n = 0;
f324806d 1557 while (count--)
738f2522
BS
1558 fprintf (asm_out_file, "\t\tst.param%s [%%out_arg%d], %%r%d$%d;\n",
1559 nvptx_ptx_type_from_mode (mode, false), argno++,
1560 REGNO (t), n++);
1561 }
1562 }
1563
1564 fprintf (asm_out_file, "\t\tcall ");
1565 if (result != NULL_RTX)
1566 fprintf (asm_out_file, "(%%retval_in), ");
1567
1568 if (decl)
1569 {
1570 const char *name = get_fnname_from_decl (decl);
1571 name = nvptx_name_replacement (name);
1572 assemble_name (asm_out_file, name);
1573 }
1574 else
1575 output_address (callee);
1576
f324806d 1577 if (arg_end > 1 || (decl && DECL_STATIC_CHAIN (decl)))
738f2522 1578 {
f324806d
NS
1579 const char *comma = "";
1580
738f2522 1581 fprintf (asm_out_file, ", (");
f324806d 1582 for (int i = 1, argno = 0; i < arg_end; i++)
738f2522 1583 {
f324806d 1584 rtx t = XEXP (XVECEXP (pat, 0, i), 0);
738f2522
BS
1585 machine_mode mode = GET_MODE (t);
1586 int count = maybe_split_mode (&mode);
1587
f324806d 1588 while (count--)
738f2522 1589 {
f324806d
NS
1590 fprintf (asm_out_file, "%s%%out_arg%d", comma, argno++);
1591 comma = ", ";
738f2522
BS
1592 }
1593 }
1594 if (decl && DECL_STATIC_CHAIN (decl))
f324806d
NS
1595 fprintf (asm_out_file, "%s%s", comma,
1596 reg_names [OUTGOING_STATIC_CHAIN_REGNUM]);
738f2522
BS
1597
1598 fprintf (asm_out_file, ")");
1599 }
f324806d 1600
738f2522
BS
1601 if (needs_tgt)
1602 {
1603 fprintf (asm_out_file, ", ");
1604 assemble_name (asm_out_file, buf);
1605 }
1606 fprintf (asm_out_file, ";\n");
1607 if (result != NULL_RTX)
1608 return "ld.param%t0\t%0, [%%retval_in];\n\t}";
1609
1610 return "}";
1611}
1612
1613/* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
1614
1615static bool
1616nvptx_print_operand_punct_valid_p (unsigned char c)
1617{
1618 return c == '.' || c== '#';
1619}
1620
1621static void nvptx_print_operand (FILE *, rtx, int);
1622
1623/* Subroutine of nvptx_print_operand; used to print a memory reference X to FILE. */
1624
1625static void
1626nvptx_print_address_operand (FILE *file, rtx x, machine_mode)
1627{
1628 rtx off;
1629 if (GET_CODE (x) == CONST)
1630 x = XEXP (x, 0);
1631 switch (GET_CODE (x))
1632 {
1633 case PLUS:
1634 off = XEXP (x, 1);
1635 output_address (XEXP (x, 0));
1636 fprintf (file, "+");
1637 output_address (off);
1638 break;
1639
1640 case SYMBOL_REF:
1641 case LABEL_REF:
1642 output_addr_const (file, x);
1643 break;
1644
1645 default:
1646 gcc_assert (GET_CODE (x) != MEM);
1647 nvptx_print_operand (file, x, 0);
1648 break;
1649 }
1650}
1651
1652/* Write assembly language output for the address ADDR to FILE. */
1653
1654static void
1655nvptx_print_operand_address (FILE *file, rtx addr)
1656{
1657 nvptx_print_address_operand (file, addr, VOIDmode);
1658}
1659
1660/* Print an operand, X, to FILE, with an optional modifier in CODE.
1661
1662 Meaning of CODE:
1663 . -- print the predicate for the instruction or an emptry string for an
1664 unconditional one.
1665 # -- print a rounding mode for the instruction
1666
1667 A -- print an address space identifier for a MEM
1668 c -- print an opcode suffix for a comparison operator, including a type code
1669 d -- print a CONST_INT as a vector dimension (x, y, or z)
1670 f -- print a full reg even for something that must always be split
1671 t -- print a type opcode suffix, promoting QImode to 32 bits
1672 T -- print a type size in bits
1673 u -- print a type opcode suffix without promotions. */
1674
1675static void
1676nvptx_print_operand (FILE *file, rtx x, int code)
1677{
1678 rtx orig_x = x;
1679 machine_mode op_mode;
1680
1681 if (code == '.')
1682 {
1683 x = current_insn_predicate;
1684 if (x)
1685 {
1686 unsigned int regno = REGNO (XEXP (x, 0));
1687 fputs ("[", file);
1688 if (GET_CODE (x) == EQ)
1689 fputs ("!", file);
1690 fputs (reg_names [regno], file);
1691 fputs ("]", file);
1692 }
1693 return;
1694 }
1695 else if (code == '#')
1696 {
1697 fputs (".rn", file);
1698 return;
1699 }
1700
1701 enum rtx_code x_code = GET_CODE (x);
1702
1703 switch (code)
1704 {
1705 case 'A':
1706 {
1707 addr_space_t as = nvptx_addr_space_from_address (XEXP (x, 0));
1708 fputs (nvptx_section_from_addr_space (as), file);
1709 }
1710 break;
1711
1712 case 'd':
1713 gcc_assert (x_code == CONST_INT);
1714 if (INTVAL (x) == 0)
1715 fputs (".x", file);
1716 else if (INTVAL (x) == 1)
1717 fputs (".y", file);
1718 else if (INTVAL (x) == 2)
1719 fputs (".z", file);
1720 else
1721 gcc_unreachable ();
1722 break;
1723
1724 case 't':
1725 op_mode = nvptx_underlying_object_mode (x);
1726 fprintf (file, "%s", nvptx_ptx_type_from_mode (op_mode, true));
1727 break;
1728
1729 case 'u':
1730 op_mode = nvptx_underlying_object_mode (x);
1731 fprintf (file, "%s", nvptx_ptx_type_from_mode (op_mode, false));
1732 break;
1733
1734 case 'T':
1735 fprintf (file, "%d", GET_MODE_BITSIZE (GET_MODE (x)));
1736 break;
1737
1738 case 'j':
1739 fprintf (file, "@");
1740 goto common;
1741
1742 case 'J':
1743 fprintf (file, "@!");
1744 goto common;
1745
1746 case 'c':
1747 op_mode = GET_MODE (XEXP (x, 0));
1748 switch (x_code)
1749 {
1750 case EQ:
1751 fputs (".eq", file);
1752 break;
1753 case NE:
1754 if (FLOAT_MODE_P (op_mode))
1755 fputs (".neu", file);
1756 else
1757 fputs (".ne", file);
1758 break;
1759 case LE:
1760 fputs (".le", file);
1761 break;
1762 case GE:
1763 fputs (".ge", file);
1764 break;
1765 case LT:
1766 fputs (".lt", file);
1767 break;
1768 case GT:
1769 fputs (".gt", file);
1770 break;
1771 case LEU:
1772 fputs (".ls", file);
1773 break;
1774 case GEU:
1775 fputs (".hs", file);
1776 break;
1777 case LTU:
1778 fputs (".lo", file);
1779 break;
1780 case GTU:
1781 fputs (".hi", file);
1782 break;
1783 case LTGT:
1784 fputs (".ne", file);
1785 break;
1786 case UNEQ:
1787 fputs (".equ", file);
1788 break;
1789 case UNLE:
1790 fputs (".leu", file);
1791 break;
1792 case UNGE:
1793 fputs (".geu", file);
1794 break;
1795 case UNLT:
1796 fputs (".ltu", file);
1797 break;
1798 case UNGT:
1799 fputs (".gtu", file);
1800 break;
1801 case UNORDERED:
1802 fputs (".nan", file);
1803 break;
1804 case ORDERED:
1805 fputs (".num", file);
1806 break;
1807 default:
1808 gcc_unreachable ();
1809 }
1810 if (FLOAT_MODE_P (op_mode)
1811 || x_code == EQ || x_code == NE
1812 || x_code == GEU || x_code == GTU
1813 || x_code == LEU || x_code == LTU)
1814 fputs (nvptx_ptx_type_from_mode (op_mode, true), file);
1815 else
1816 fprintf (file, ".s%d", GET_MODE_BITSIZE (op_mode));
1817 break;
1818 default:
1819 common:
1820 switch (x_code)
1821 {
1822 case SUBREG:
1823 x = SUBREG_REG (x);
1824 /* fall through */
1825
1826 case REG:
1827 if (HARD_REGISTER_P (x))
1828 fprintf (file, "%s", reg_names[REGNO (x)]);
1829 else
1830 fprintf (file, "%%r%d", REGNO (x));
1831 if (code != 'f' && nvptx_split_reg_p (GET_MODE (x)))
1832 {
1833 gcc_assert (GET_CODE (orig_x) == SUBREG
1834 && !nvptx_split_reg_p (GET_MODE (orig_x)));
1835 fprintf (file, "$%d", SUBREG_BYTE (orig_x) / UNITS_PER_WORD);
1836 }
1837 break;
1838
1839 case MEM:
1840 fputc ('[', file);
1841 nvptx_print_address_operand (file, XEXP (x, 0), GET_MODE (x));
1842 fputc (']', file);
1843 break;
1844
1845 case CONST_INT:
1846 output_addr_const (file, x);
1847 break;
1848
1849 case CONST:
1850 case SYMBOL_REF:
1851 case LABEL_REF:
1852 /* We could use output_addr_const, but that can print things like
1853 "x-8", which breaks ptxas. Need to ensure it is output as
1854 "x+-8". */
1855 nvptx_print_address_operand (file, x, VOIDmode);
1856 break;
1857
1858 case CONST_DOUBLE:
1859 long vals[2];
34a72c33 1860 real_to_target (vals, CONST_DOUBLE_REAL_VALUE (x), GET_MODE (x));
738f2522
BS
1861 vals[0] &= 0xffffffff;
1862 vals[1] &= 0xffffffff;
1863 if (GET_MODE (x) == SFmode)
1864 fprintf (file, "0f%08lx", vals[0]);
1865 else
1866 fprintf (file, "0d%08lx%08lx", vals[1], vals[0]);
1867 break;
1868
1869 default:
1870 output_addr_const (file, x);
1871 }
1872 }
1873}
1874\f
1875/* Record replacement regs used to deal with subreg operands. */
1876struct reg_replace
1877{
1878 rtx replacement[MAX_RECOG_OPERANDS];
1879 machine_mode mode;
1880 int n_allocated;
1881 int n_in_use;
1882};
1883
1884/* Allocate or reuse a replacement in R and return the rtx. */
1885
1886static rtx
1887get_replacement (struct reg_replace *r)
1888{
1889 if (r->n_allocated == r->n_in_use)
1890 r->replacement[r->n_allocated++] = gen_reg_rtx (r->mode);
1891 return r->replacement[r->n_in_use++];
1892}
1893
1894/* Clean up subreg operands. In ptx assembly, everything is typed, and
1895 the presence of subregs would break the rules for most instructions.
1896 Replace them with a suitable new register of the right size, plus
1897 conversion copyin/copyout instructions. */
1898
1899static void
517665b3 1900nvptx_reorg_subreg (void)
738f2522
BS
1901{
1902 struct reg_replace qiregs, hiregs, siregs, diregs;
1903 rtx_insn *insn, *next;
1904
738f2522
BS
1905 qiregs.n_allocated = 0;
1906 hiregs.n_allocated = 0;
1907 siregs.n_allocated = 0;
1908 diregs.n_allocated = 0;
1909 qiregs.mode = QImode;
1910 hiregs.mode = HImode;
1911 siregs.mode = SImode;
1912 diregs.mode = DImode;
1913
1914 for (insn = get_insns (); insn; insn = next)
1915 {
1916 next = NEXT_INSN (insn);
1917 if (!NONDEBUG_INSN_P (insn)
1fe6befc 1918 || asm_noperands (PATTERN (insn)) >= 0
738f2522
BS
1919 || GET_CODE (PATTERN (insn)) == USE
1920 || GET_CODE (PATTERN (insn)) == CLOBBER)
1921 continue;
f324806d 1922
738f2522
BS
1923 qiregs.n_in_use = 0;
1924 hiregs.n_in_use = 0;
1925 siregs.n_in_use = 0;
1926 diregs.n_in_use = 0;
1927 extract_insn (insn);
1928 enum attr_subregs_ok s_ok = get_attr_subregs_ok (insn);
f324806d 1929
738f2522
BS
1930 for (int i = 0; i < recog_data.n_operands; i++)
1931 {
1932 rtx op = recog_data.operand[i];
1933 if (GET_CODE (op) != SUBREG)
1934 continue;
1935
1936 rtx inner = SUBREG_REG (op);
1937
1938 machine_mode outer_mode = GET_MODE (op);
1939 machine_mode inner_mode = GET_MODE (inner);
1940 gcc_assert (s_ok);
1941 if (s_ok
1942 && (GET_MODE_PRECISION (inner_mode)
1943 >= GET_MODE_PRECISION (outer_mode)))
1944 continue;
1945 gcc_assert (SCALAR_INT_MODE_P (outer_mode));
1946 struct reg_replace *r = (outer_mode == QImode ? &qiregs
1947 : outer_mode == HImode ? &hiregs
1948 : outer_mode == SImode ? &siregs
1949 : &diregs);
1950 rtx new_reg = get_replacement (r);
1951
1952 if (recog_data.operand_type[i] != OP_OUT)
1953 {
1954 enum rtx_code code;
1955 if (GET_MODE_PRECISION (inner_mode)
1956 < GET_MODE_PRECISION (outer_mode))
1957 code = ZERO_EXTEND;
1958 else
1959 code = TRUNCATE;
1960
f7df4a84 1961 rtx pat = gen_rtx_SET (new_reg,
738f2522
BS
1962 gen_rtx_fmt_e (code, outer_mode, inner));
1963 emit_insn_before (pat, insn);
1964 }
1965
1966 if (recog_data.operand_type[i] != OP_IN)
1967 {
1968 enum rtx_code code;
1969 if (GET_MODE_PRECISION (inner_mode)
1970 < GET_MODE_PRECISION (outer_mode))
1971 code = TRUNCATE;
1972 else
1973 code = ZERO_EXTEND;
1974
f7df4a84 1975 rtx pat = gen_rtx_SET (inner,
738f2522
BS
1976 gen_rtx_fmt_e (code, inner_mode, new_reg));
1977 emit_insn_after (pat, insn);
1978 }
1979 validate_change (insn, recog_data.operand_loc[i], new_reg, false);
1980 }
1981 }
517665b3 1982}
738f2522 1983
517665b3 1984/* PTX-specific reorganization
c38f0d8c
NS
1985 - Compute live registers
1986 - Mark now-unused registers, so function begin doesn't declare
517665b3 1987 unused registers.
c38f0d8c 1988 - Replace subregs with suitable sequences.
517665b3
NS
1989*/
1990
1991static void
1992nvptx_reorg (void)
1993{
517665b3
NS
1994 /* We are freeing block_for_insn in the toplev to keep compatibility
1995 with old MDEP_REORGS that are not CFG based. Recompute it now. */
1996 compute_bb_for_insn ();
1997
1998 thread_prologue_and_epilogue_insns ();
1999
c38f0d8c 2000 /* Compute live regs */
517665b3
NS
2001 df_clear_flags (DF_LR_RUN_DCE);
2002 df_set_flags (DF_NO_INSN_RESCAN | DF_NO_HARD_REGS);
2003 df_analyze ();
738f2522
BS
2004 regstat_init_n_sets_and_refs ();
2005
517665b3
NS
2006 int max_regs = max_reg_num ();
2007
2008 /* Mark unused regs as unused. */
2009 for (int i = LAST_VIRTUAL_REGISTER + 1; i < max_regs; i++)
738f2522
BS
2010 if (REG_N_SETS (i) == 0 && REG_N_REFS (i) == 0)
2011 regno_reg_rtx[i] = const0_rtx;
517665b3
NS
2012
2013 /* Replace subregs. */
c03b0416 2014 nvptx_reorg_subreg ();
517665b3 2015
738f2522 2016 regstat_free_n_sets_and_refs ();
517665b3
NS
2017
2018 df_finish_pass (true);
738f2522
BS
2019}
2020\f
2021/* Handle a "kernel" attribute; arguments as in
2022 struct attribute_spec.handler. */
2023
2024static tree
2025nvptx_handle_kernel_attribute (tree *node, tree name, tree ARG_UNUSED (args),
2026 int ARG_UNUSED (flags), bool *no_add_attrs)
2027{
2028 tree decl = *node;
2029
2030 if (TREE_CODE (decl) != FUNCTION_DECL)
2031 {
2032 error ("%qE attribute only applies to functions", name);
2033 *no_add_attrs = true;
2034 }
2035
2036 else if (TREE_TYPE (TREE_TYPE (decl)) != void_type_node)
2037 {
2038 error ("%qE attribute requires a void return type", name);
2039 *no_add_attrs = true;
2040 }
2041
2042 return NULL_TREE;
2043}
2044
2045/* Table of valid machine attributes. */
2046static const struct attribute_spec nvptx_attribute_table[] =
2047{
2048 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
2049 affects_type_identity } */
2050 { "kernel", 0, 0, true, false, false, nvptx_handle_kernel_attribute, false },
2051 { NULL, 0, 0, false, false, false, NULL, false }
2052};
2053\f
2054/* Limit vector alignments to BIGGEST_ALIGNMENT. */
2055
2056static HOST_WIDE_INT
2057nvptx_vector_alignment (const_tree type)
2058{
2059 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
2060
2061 return MIN (align, BIGGEST_ALIGNMENT);
2062}
2063\f
1f83528e
TS
2064/* Record a symbol for mkoffload to enter into the mapping table. */
2065
2066static void
2067nvptx_record_offload_symbol (tree decl)
2068{
3e32ee19
NS
2069 switch (TREE_CODE (decl))
2070 {
2071 case VAR_DECL:
2072 fprintf (asm_out_file, "//:VAR_MAP \"%s\"\n",
2073 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)));
2074 break;
2075
2076 case FUNCTION_DECL:
2077 {
2078 tree attr = get_oacc_fn_attrib (decl);
2079 tree dims = NULL_TREE;
2080 unsigned ix;
2081
2082 if (attr)
2083 dims = TREE_VALUE (attr);
2084 fprintf (asm_out_file, "//:FUNC_MAP \"%s\"",
2085 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)));
2086
2087 for (ix = 0; ix != GOMP_DIM_MAX; ix++)
2088 {
2089 int size = 1;
2090
2091 /* TODO: This check can go away once the dimension default
2092 machinery is merged to trunk. */
2093 if (dims)
2094 {
2095 tree dim = TREE_VALUE (dims);
2096
2097 if (dim)
2098 size = TREE_INT_CST_LOW (dim);
2099
2100 gcc_assert (!TREE_PURPOSE (dims));
2101 dims = TREE_CHAIN (dims);
2102 }
2103
2104 fprintf (asm_out_file, ", %#x", size);
2105 }
2106
2107 fprintf (asm_out_file, "\n");
2108 }
2109 break;
2110
2111 default:
2112 gcc_unreachable ();
2113 }
1f83528e
TS
2114}
2115
738f2522
BS
2116/* Implement TARGET_ASM_FILE_START. Write the kinds of things ptxas expects
2117 at the start of a file. */
2118
2119static void
2120nvptx_file_start (void)
2121{
2122 fputs ("// BEGIN PREAMBLE\n", asm_out_file);
2123 fputs ("\t.version\t3.1\n", asm_out_file);
2124 fputs ("\t.target\tsm_30\n", asm_out_file);
2125 fprintf (asm_out_file, "\t.address_size %d\n", GET_MODE_BITSIZE (Pmode));
2126 fputs ("// END PREAMBLE\n", asm_out_file);
2127}
2128
ecf6e535
BS
2129/* Write out the function declarations we've collected and declare storage
2130 for the broadcast buffer. */
738f2522
BS
2131
2132static void
2133nvptx_file_end (void)
2134{
f3dba894
TS
2135 hash_table<tree_hasher>::iterator iter;
2136 tree decl;
2137 FOR_EACH_HASH_TABLE_ELEMENT (*needed_fndecls_htab, decl, tree, iter)
2138 nvptx_record_fndecl (decl, true);
738f2522
BS
2139 fputs (func_decls.str().c_str(), asm_out_file);
2140}
2141\f
94829f87
NS
2142/* Validate compute dimensions of an OpenACC offload or routine, fill
2143 in non-unity defaults. FN_LEVEL indicates the level at which a
2144 routine might spawn a loop. It is negative for non-routines. */
2145
2146static bool
2147nvptx_goacc_validate_dims (tree ARG_UNUSED (decl), int *ARG_UNUSED (dims),
2148 int ARG_UNUSED (fn_level))
2149{
2150 bool changed = false;
2151
2152 /* TODO: Leave dimensions unaltered. Partitioned execution needs
2153 porting before filtering dimensions makes sense. */
2154
2155 return changed;
2156}
2157\f
738f2522
BS
2158#undef TARGET_OPTION_OVERRIDE
2159#define TARGET_OPTION_OVERRIDE nvptx_option_override
2160
2161#undef TARGET_ATTRIBUTE_TABLE
2162#define TARGET_ATTRIBUTE_TABLE nvptx_attribute_table
2163
2164#undef TARGET_LEGITIMATE_ADDRESS_P
2165#define TARGET_LEGITIMATE_ADDRESS_P nvptx_legitimate_address_p
2166
2167#undef TARGET_PROMOTE_FUNCTION_MODE
2168#define TARGET_PROMOTE_FUNCTION_MODE nvptx_promote_function_mode
2169
2170#undef TARGET_FUNCTION_ARG
2171#define TARGET_FUNCTION_ARG nvptx_function_arg
2172#undef TARGET_FUNCTION_INCOMING_ARG
2173#define TARGET_FUNCTION_INCOMING_ARG nvptx_function_incoming_arg
2174#undef TARGET_FUNCTION_ARG_ADVANCE
2175#define TARGET_FUNCTION_ARG_ADVANCE nvptx_function_arg_advance
2176#undef TARGET_FUNCTION_ARG_BOUNDARY
2177#define TARGET_FUNCTION_ARG_BOUNDARY nvptx_function_arg_boundary
2178#undef TARGET_FUNCTION_ARG_ROUND_BOUNDARY
2179#define TARGET_FUNCTION_ARG_ROUND_BOUNDARY nvptx_function_arg_boundary
2180#undef TARGET_PASS_BY_REFERENCE
2181#define TARGET_PASS_BY_REFERENCE nvptx_pass_by_reference
2182#undef TARGET_FUNCTION_VALUE_REGNO_P
2183#define TARGET_FUNCTION_VALUE_REGNO_P nvptx_function_value_regno_p
2184#undef TARGET_FUNCTION_VALUE
2185#define TARGET_FUNCTION_VALUE nvptx_function_value
2186#undef TARGET_LIBCALL_VALUE
2187#define TARGET_LIBCALL_VALUE nvptx_libcall_value
2188#undef TARGET_FUNCTION_OK_FOR_SIBCALL
2189#define TARGET_FUNCTION_OK_FOR_SIBCALL nvptx_function_ok_for_sibcall
18c05628
NS
2190#undef TARGET_GET_DRAP_RTX
2191#define TARGET_GET_DRAP_RTX nvptx_get_drap_rtx
738f2522
BS
2192#undef TARGET_SPLIT_COMPLEX_ARG
2193#define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
2194#undef TARGET_RETURN_IN_MEMORY
2195#define TARGET_RETURN_IN_MEMORY nvptx_return_in_memory
2196#undef TARGET_OMIT_STRUCT_RETURN_REG
2197#define TARGET_OMIT_STRUCT_RETURN_REG true
2198#undef TARGET_STRICT_ARGUMENT_NAMING
2199#define TARGET_STRICT_ARGUMENT_NAMING nvptx_strict_argument_naming
2200#undef TARGET_STATIC_CHAIN
2201#define TARGET_STATIC_CHAIN nvptx_static_chain
2202
2203#undef TARGET_CALL_ARGS
2204#define TARGET_CALL_ARGS nvptx_call_args
2205#undef TARGET_END_CALL_ARGS
2206#define TARGET_END_CALL_ARGS nvptx_end_call_args
2207
2208#undef TARGET_ASM_FILE_START
2209#define TARGET_ASM_FILE_START nvptx_file_start
2210#undef TARGET_ASM_FILE_END
2211#define TARGET_ASM_FILE_END nvptx_file_end
2212#undef TARGET_ASM_GLOBALIZE_LABEL
2213#define TARGET_ASM_GLOBALIZE_LABEL nvptx_globalize_label
2214#undef TARGET_ASM_ASSEMBLE_UNDEFINED_DECL
2215#define TARGET_ASM_ASSEMBLE_UNDEFINED_DECL nvptx_assemble_undefined_decl
2216#undef TARGET_PRINT_OPERAND
2217#define TARGET_PRINT_OPERAND nvptx_print_operand
2218#undef TARGET_PRINT_OPERAND_ADDRESS
2219#define TARGET_PRINT_OPERAND_ADDRESS nvptx_print_operand_address
2220#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
2221#define TARGET_PRINT_OPERAND_PUNCT_VALID_P nvptx_print_operand_punct_valid_p
2222#undef TARGET_ASM_INTEGER
2223#define TARGET_ASM_INTEGER nvptx_assemble_integer
2224#undef TARGET_ASM_DECL_END
2225#define TARGET_ASM_DECL_END nvptx_assemble_decl_end
2226#undef TARGET_ASM_DECLARE_CONSTANT_NAME
2227#define TARGET_ASM_DECLARE_CONSTANT_NAME nvptx_asm_declare_constant_name
2228#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
2229#define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
2230#undef TARGET_ASM_NEED_VAR_DECL_BEFORE_USE
2231#define TARGET_ASM_NEED_VAR_DECL_BEFORE_USE true
2232
2233#undef TARGET_MACHINE_DEPENDENT_REORG
2234#define TARGET_MACHINE_DEPENDENT_REORG nvptx_reorg
2235#undef TARGET_NO_REGISTER_ALLOCATION
2236#define TARGET_NO_REGISTER_ALLOCATION true
2237
1f83528e
TS
2238#undef TARGET_RECORD_OFFLOAD_SYMBOL
2239#define TARGET_RECORD_OFFLOAD_SYMBOL nvptx_record_offload_symbol
2240
738f2522
BS
2241#undef TARGET_VECTOR_ALIGNMENT
2242#define TARGET_VECTOR_ALIGNMENT nvptx_vector_alignment
2243
94829f87
NS
2244#undef TARGET_GOACC_VALIDATE_DIMS
2245#define TARGET_GOACC_VALIDATE_DIMS nvptx_goacc_validate_dims
2246
738f2522
BS
2247struct gcc_target targetm = TARGET_INITIALIZER;
2248
2249#include "gt-nvptx.h"