1 /* Offload image generation tool for AMD GCN.
3 Copyright (C) 2014-2024 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* Munges GCN assembly into a C source file defining the GCN code as a
24 This is not a complete assembler. We presume the source is well
25 formed from the compiler and can die horribly if it is not. */
29 #include "coretypes.h"
31 #include "diagnostic.h"
34 #include "collect-utils.h"
35 #include "gomp-constants.h"
36 #include "simple-object.h"
39 /* These probably won't (all) be in elf.h for a while. */
41 #define EM_AMDGPU 0xe0;
43 #undef ELFOSABI_AMDGPU_HSA
44 #define ELFOSABI_AMDGPU_HSA 64
45 #undef ELFABIVERSION_AMDGPU_HSA_V3
46 #define ELFABIVERSION_AMDGPU_HSA_V3 1
47 #undef ELFABIVERSION_AMDGPU_HSA_V4
48 #define ELFABIVERSION_AMDGPU_HSA_V4 2
50 #undef EF_AMDGPU_MACH_AMDGCN_GFX803
51 #define EF_AMDGPU_MACH_AMDGCN_GFX803 0x2a
52 #undef EF_AMDGPU_MACH_AMDGCN_GFX900
53 #define EF_AMDGPU_MACH_AMDGCN_GFX900 0x2c
54 #undef EF_AMDGPU_MACH_AMDGCN_GFX906
55 #define EF_AMDGPU_MACH_AMDGCN_GFX906 0x2f
56 #undef EF_AMDGPU_MACH_AMDGCN_GFX908
57 #define EF_AMDGPU_MACH_AMDGCN_GFX908 0x30
58 #undef EF_AMDGPU_MACH_AMDGCN_GFX90a
59 #define EF_AMDGPU_MACH_AMDGCN_GFX90a 0x3f
60 #undef EF_AMDGPU_MACH_AMDGCN_GFX1030
61 #define EF_AMDGPU_MACH_AMDGCN_GFX1030 0x36
62 #undef EF_AMDGPU_MACH_AMDGCN_GFX1100
63 #define EF_AMDGPU_MACH_AMDGCN_GFX1100 0x41
64 #undef EF_AMDGPU_MACH_AMDGCN_GFX1103
65 #define EF_AMDGPU_MACH_AMDGCN_GFX1103 0x44
67 #define EF_AMDGPU_FEATURE_XNACK_V4 0x300 /* Mask. */
68 #define EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4 0x000
69 #define EF_AMDGPU_FEATURE_XNACK_ANY_V4 0x100
70 #define EF_AMDGPU_FEATURE_XNACK_OFF_V4 0x200
71 #define EF_AMDGPU_FEATURE_XNACK_ON_V4 0x300
73 #define EF_AMDGPU_FEATURE_SRAMECC_V4 0xc00 /* Mask. */
74 #define EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4 0x000
75 #define EF_AMDGPU_FEATURE_SRAMECC_ANY_V4 0x400
76 #define EF_AMDGPU_FEATURE_SRAMECC_OFF_V4 0x800
77 #define EF_AMDGPU_FEATURE_SRAMECC_ON_V4 0xc00
79 #define SET_XNACK_ON(VAR) VAR = ((VAR & ~EF_AMDGPU_FEATURE_XNACK_V4) \
80 | EF_AMDGPU_FEATURE_XNACK_ON_V4)
81 #define SET_XNACK_ANY(VAR) VAR = ((VAR & ~EF_AMDGPU_FEATURE_XNACK_V4) \
82 | EF_AMDGPU_FEATURE_XNACK_ANY_V4)
83 #define SET_XNACK_OFF(VAR) VAR = ((VAR & ~EF_AMDGPU_FEATURE_XNACK_V4) \
84 | EF_AMDGPU_FEATURE_XNACK_OFF_V4)
85 #define SET_XNACK_UNSET(VAR) VAR = ((VAR & ~EF_AMDGPU_FEATURE_XNACK_V4) \
86 | EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4)
87 #define TEST_XNACK_ANY(VAR) ((VAR & EF_AMDGPU_FEATURE_XNACK_V4) \
88 == EF_AMDGPU_FEATURE_XNACK_ANY_V4)
89 #define TEST_XNACK_ON(VAR) ((VAR & EF_AMDGPU_FEATURE_XNACK_V4) \
90 == EF_AMDGPU_FEATURE_XNACK_ON_V4)
91 #define TEST_XNACK_OFF(VAR) ((VAR & EF_AMDGPU_FEATURE_XNACK_V4) \
92 == EF_AMDGPU_FEATURE_XNACK_OFF_V4)
93 #define TEST_XNACK_UNSET(VAR) ((VAR & EF_AMDGPU_FEATURE_XNACK_V4) == 0)
95 #define SET_SRAM_ECC_ON(VAR) VAR = ((VAR & ~EF_AMDGPU_FEATURE_SRAMECC_V4) \
96 | EF_AMDGPU_FEATURE_SRAMECC_ON_V4)
97 #define SET_SRAM_ECC_ANY(VAR) VAR = ((VAR & ~EF_AMDGPU_FEATURE_SRAMECC_V4) \
98 | EF_AMDGPU_FEATURE_SRAMECC_ANY_V4)
99 #define SET_SRAM_ECC_OFF(VAR) VAR = ((VAR & ~EF_AMDGPU_FEATURE_SRAMECC_V4) \
100 | EF_AMDGPU_FEATURE_SRAMECC_OFF_V4)
101 #define SET_SRAM_ECC_UNSET(VAR) \
102 VAR = ((VAR & ~EF_AMDGPU_FEATURE_SRAMECC_V4) \
103 | EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4)
104 #define TEST_SRAM_ECC_ANY(VAR) ((VAR & EF_AMDGPU_FEATURE_SRAMECC_V4) \
105 == EF_AMDGPU_FEATURE_SRAMECC_ANY_V4)
106 #define TEST_SRAM_ECC_ON(VAR) ((VAR & EF_AMDGPU_FEATURE_SRAMECC_V4) \
107 == EF_AMDGPU_FEATURE_SRAMECC_ON_V4)
108 #define TEST_SRAM_ECC_UNSET(VAR) ((VAR & EF_AMDGPU_FEATURE_SRAMECC_V4) == 0)
110 #ifndef R_AMDGPU_NONE
111 #define R_AMDGPU_NONE 0
112 #define R_AMDGPU_ABS32_LO 1 /* (S + A) & 0xFFFFFFFF */
113 #define R_AMDGPU_ABS32_HI 2 /* (S + A) >> 32 */
114 #define R_AMDGPU_ABS64 3 /* S + A */
115 #define R_AMDGPU_REL32 4 /* S + A - P */
116 #define R_AMDGPU_REL64 5 /* S + A - P */
117 #define R_AMDGPU_ABS32 6 /* S + A */
118 #define R_AMDGPU_GOTPCREL 7 /* G + GOT + A - P */
119 #define R_AMDGPU_GOTPCREL32_LO 8 /* (G + GOT + A - P) & 0xFFFFFFFF */
120 #define R_AMDGPU_GOTPCREL32_HI 9 /* (G + GOT + A - P) >> 32 */
121 #define R_AMDGPU_REL32_LO 10 /* (S + A - P) & 0xFFFFFFFF */
122 #define R_AMDGPU_REL32_HI 11 /* (S + A - P) >> 32 */
123 #define R_AMDGPU_RELATIVE64 13 /* B + A */
126 const char tool_name
[] = "gcn mkoffload";
128 static const char *gcn_dumpbase
;
129 static struct obstack files_to_cleanup
;
131 enum offload_abi offload_abi
= OFFLOAD_ABI_UNSET
;
132 uint32_t elf_arch
= EF_AMDGPU_MACH_AMDGCN_GFX900
; // Default GPU architecture.
133 uint32_t elf_flags
= EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4
;
135 static int gcn_stack_size
= 0; /* Zero means use default. */
137 /* Delete tempfiles. */
140 tool_cleanup (bool from_signal ATTRIBUTE_UNUSED
)
142 obstack_ptr_grow (&files_to_cleanup
, NULL
);
143 const char **files
= XOBFINISH (&files_to_cleanup
, const char **);
144 for (int i
= 0; files
[i
]; i
++)
145 maybe_unlink (files
[i
]);
149 mkoffload_cleanup (void)
151 tool_cleanup (false);
154 /* Unlink FILE unless requested otherwise. */
157 maybe_unlink (const char *file
)
161 if (unlink_if_ordinary (file
) && errno
!= ENOENT
)
162 fatal_error (input_location
, "deleting file %qs: %m", file
);
165 fprintf (stderr
, "[Leaving %s]\n", file
);
168 /* Add or change the value of an environment variable, outputting the
169 change to standard error if in verbose mode. */
172 xputenv (const char *string
)
175 fprintf (stderr
, "%s\n", string
);
176 putenv (CONST_CAST (char *, string
));
179 /* Read the whole input file. It will be NUL terminated (but
180 remember, there could be a NUL in the file itself. */
183 read_file (FILE *stream
, size_t *plen
)
185 size_t alloc
= 16384;
189 if (!fseek (stream
, 0, SEEK_END
))
191 /* Get the file size. */
192 long s
= ftell (stream
);
195 fseek (stream
, 0, SEEK_SET
);
197 buffer
= XNEWVEC (char, alloc
);
201 size_t n
= fread (buffer
+ base
, 1, alloc
- base
- 1, stream
);
206 if (base
+ 1 == alloc
)
209 buffer
= XRESIZEVEC (char, buffer
, alloc
);
217 /* Parse STR, saving found tokens into PVALUES and return their number.
218 Tokens are assumed to be delimited by ':'. */
221 parse_env_var (const char *str
, char ***pvalues
)
223 const char *curval
, *nextval
;
227 curval
= strchr (str
, ':');
231 curval
= strchr (curval
+ 1, ':');
234 values
= (char **) xmalloc (num
* sizeof (char *));
236 nextval
= strchr (curval
, ':');
238 nextval
= strchr (curval
, '\0');
240 for (i
= 0; i
< num
; i
++)
242 int l
= nextval
- curval
;
243 values
[i
] = (char *) xmalloc (l
+ 1);
244 memcpy (values
[i
], curval
, l
);
246 curval
= nextval
+ 1;
247 nextval
= strchr (curval
, ':');
249 nextval
= strchr (curval
, '\0');
255 /* Auxiliary function that frees elements of PTR and PTR itself.
256 N is number of elements to be freed. If PTR is NULL, nothing is freed.
257 If an element is NULL, subsequent elements are not freed. */
260 free_array_of_ptrs (void **ptr
, unsigned n
)
265 for (i
= 0; i
< n
; i
++)
275 /* Check whether NAME can be accessed in MODE. This is like access,
276 except that it never considers directories to be executable. */
279 access_check (const char *name
, int mode
)
285 if (stat (name
, &st
) < 0 || S_ISDIR (st
.st_mode
))
289 return access (name
, mode
);
292 /* Copy the early-debug-info from the incoming LTO object to a new object
293 that will be linked into the output HSACO file. The host relocations
294 must be translated into GCN relocations, and any global undefined symbols
295 must be weakened (so as not to have the debug info try to pull in host
298 Returns true if the file was created, false otherwise. */
301 copy_early_debug_info (const char *infile
, const char *outfile
)
306 /* The simple_object code can handle extracting the debug sections.
307 This code is based on that in lto-wrapper.cc. */
308 int infd
= open (infile
, O_RDONLY
| O_BINARY
);
311 simple_object_read
*inobj
= simple_object_start_read (infd
, 0,
318 if (simple_object_find_section (inobj
, ".gnu.debuglto_.debug_info",
319 &off
, &len
, &errmsg
, &err
) != 1)
321 simple_object_release_read (inobj
);
326 errmsg
= simple_object_copy_lto_debug_sections (inobj
, outfile
, &err
, true);
330 simple_object_release_read (inobj
);
333 /* Open the file we just created for some adjustments.
334 The simple_object code can't do this, so we do it manually. */
335 FILE *outfd
= fopen (outfile
, "r+b");
340 if (fread (&ehdr
, sizeof (ehdr
), 1, outfd
) != 1)
346 /* We only support host relocations of x86_64, for now. */
347 gcc_assert (ehdr
.e_machine
== EM_X86_64
);
349 /* Fiji devices use HSACOv3 regardless of the assembler. */
350 uint32_t elf_flags_actual
= (elf_arch
== EF_AMDGPU_MACH_AMDGCN_GFX803
353 /* Patch the correct elf architecture flag into the file. */
354 ehdr
.e_ident
[7] = ELFOSABI_AMDGPU_HSA
;
355 ehdr
.e_ident
[8] = (elf_arch
== EF_AMDGPU_MACH_AMDGCN_GFX803
356 ? ELFABIVERSION_AMDGPU_HSA_V3
357 : ELFABIVERSION_AMDGPU_HSA_V4
);
358 ehdr
.e_type
= ET_REL
;
359 ehdr
.e_machine
= EM_AMDGPU
;
360 ehdr
.e_flags
= elf_arch
| elf_flags_actual
;
362 /* Load the section headers so we can walk them later. */
363 Elf64_Shdr
*sections
= (Elf64_Shdr
*)xmalloc (sizeof (Elf64_Shdr
)
365 if (fseek (outfd
, ehdr
.e_shoff
, SEEK_SET
) == -1
366 || fread (sections
, sizeof (Elf64_Shdr
), ehdr
.e_shnum
,
367 outfd
) != ehdr
.e_shnum
)
374 /* Convert the host relocations to target relocations. */
375 for (int i
= 0; i
< ehdr
.e_shnum
; i
++)
377 if (sections
[i
].sh_type
!= SHT_RELA
)
380 char *data
= (char *)xmalloc (sections
[i
].sh_size
);
381 if (fseek (outfd
, sections
[i
].sh_offset
, SEEK_SET
) == -1
382 || fread (data
, sections
[i
].sh_size
, 1, outfd
) != 1)
388 for (size_t offset
= 0;
389 offset
< sections
[i
].sh_size
;
390 offset
+= sections
[i
].sh_entsize
)
392 Elf64_Rela
*reloc
= (Elf64_Rela
*) (data
+ offset
);
394 /* Map the host relocations to GCN relocations.
395 Only relocations that can appear in DWARF need be handled. */
396 switch (ELF64_R_TYPE (reloc
->r_info
))
400 reloc
->r_info
= ELF32_R_INFO(ELF32_R_SYM(reloc
->r_info
),
404 reloc
->r_info
= ELF32_R_INFO(ELF32_R_SYM(reloc
->r_info
),
408 reloc
->r_info
= ELF32_R_INFO(ELF32_R_SYM(reloc
->r_info
),
412 reloc
->r_info
= ELF32_R_INFO(ELF32_R_SYM(reloc
->r_info
),
415 case R_X86_64_RELATIVE
:
416 reloc
->r_info
= ELF32_R_INFO(ELF32_R_SYM(reloc
->r_info
),
417 R_AMDGPU_RELATIVE64
);
424 /* Write back our relocation changes. */
425 if (fseek (outfd
, sections
[i
].sh_offset
, SEEK_SET
) != -1)
426 fwrite (data
, sections
[i
].sh_size
, 1, outfd
);
431 /* Weaken any global undefined symbols that would pull in unwanted
433 for (int i
= 0; i
< ehdr
.e_shnum
; i
++)
435 if (sections
[i
].sh_type
!= SHT_SYMTAB
)
438 char *data
= (char *)xmalloc (sections
[i
].sh_size
);
439 if (fseek (outfd
, sections
[i
].sh_offset
, SEEK_SET
) == -1
440 || fread (data
, sections
[i
].sh_size
, 1, outfd
) != 1)
446 for (size_t offset
= 0;
447 offset
< sections
[i
].sh_size
;
448 offset
+= sections
[i
].sh_entsize
)
450 Elf64_Sym
*sym
= (Elf64_Sym
*) (data
+ offset
);
451 int type
= ELF64_ST_TYPE (sym
->st_info
);
452 int bind
= ELF64_ST_BIND (sym
->st_info
);
454 if (bind
== STB_GLOBAL
&& sym
->st_shndx
== 0)
455 sym
->st_info
= ELF64_ST_INFO (STB_WEAK
, type
);
458 /* Write back our symbol changes. */
459 if (fseek (outfd
, sections
[i
].sh_offset
, SEEK_SET
) != -1)
460 fwrite (data
, sections
[i
].sh_size
, 1, outfd
);
466 /* Write back our header changes. */
468 fwrite (&ehdr
, sizeof (ehdr
), 1, outfd
);
474 /* Parse an input assembler file, extract the offload tables etc.,
475 and output (1) the assembler code, minus the tables (which can contain
476 problematic relocations), and (2) a C file with the offload tables
477 encoded as structured data. */
480 process_asm (FILE *in
, FILE *out
, FILE *cfile
)
482 int fn_count
= 0, var_count
= 0, ind_fn_count
= 0;
483 int dims_count
= 0, regcount_count
= 0;
484 struct obstack fns_os
, dims_os
, regcounts_os
;
485 obstack_init (&fns_os
);
486 obstack_init (&dims_os
);
487 obstack_init (®counts_os
);
500 } regcount
= { -1, -1, NULL
};
502 /* Always add _init_array and _fini_array as kernels. */
503 obstack_ptr_grow (&fns_os
, xstrdup ("_init_array"));
504 obstack_ptr_grow (&fns_os
, xstrdup ("_fini_array"));
515 while (fgets (buf
, sizeof (buf
), in
))
521 if (sscanf (buf
, " ;; OPENACC-DIMS: %d, %d, %d : %ms\n",
522 &dim
.d
[0], &dim
.d
[1], &dim
.d
[2], &dim
.name
) == 4)
524 obstack_grow (&dims_os
, &dim
, sizeof (dim
));
532 if (sscanf (buf
, " - .name: %ms\n", ®count
.kernel_name
) == 1)
534 else if (sscanf (buf
, " .sgpr_count: %d\n",
535 ®count
.sgpr_count
) == 1)
537 gcc_assert (regcount
.kernel_name
);
540 else if (sscanf (buf
, " .vgpr_count: %d\n",
541 ®count
.vgpr_count
) == 1)
543 gcc_assert (regcount
.kernel_name
);
553 if (sscanf (buf
, " .8byte %ms\n", &varname
))
556 fgets (buf
, sizeof (buf
), in
);
557 if (!sscanf (buf
, " .8byte %u\n", &varsize
))
566 if (sscanf (buf
, "\t.8byte\t%ms\n", &funcname
))
569 obstack_ptr_grow (&fns_os
, funcname
);
578 if (sscanf (buf
, "\t.8byte\t%ms\n", &funcname
))
589 if (sscanf (buf
, " .section .gnu.offload_vars%c", &dummy
) > 0)
593 /* Add a global symbol to allow plugin-gcn.c to locate the table
594 at runtime. It can't use the "offload_var_table.N" emitted by
595 the compiler because a) they're not global, and b) there's one
596 for each input file combined into the binary. */
598 fputs ("\t.global .offload_var_table\n"
599 "\t.type .offload_var_table, @object\n"
600 ".offload_var_table:\n",
603 else if (sscanf (buf
, " .section .gnu.offload_funcs%c", &dummy
) > 0)
606 /* Likewise for .gnu.offload_vars; used for reverse offload. */
608 fputs ("\t.global .offload_func_table\n"
609 "\t.type .offload_func_table, @object\n"
610 ".offload_func_table:\n",
613 else if (sscanf (buf
, " .section .gnu.offload_ind_funcs%c", &dummy
) > 0)
615 state
= IN_IND_FUNCS
;
617 fputs ("\t.global .offload_ind_func_table\n"
618 "\t.type .offload_ind_func_table, @object\n"
619 ".offload_ind_func_table:\n",
622 else if (sscanf (buf
, " .amdgpu_metadata%c", &dummy
) > 0)
625 regcount
.kernel_name
= NULL
;
626 regcount
.sgpr_count
= regcount
.vgpr_count
= -1;
628 else if (sscanf (buf
, " .section %c", &dummy
) > 0
629 || sscanf (buf
, " .text%c", &dummy
) > 0
630 || sscanf (buf
, " .bss%c", &dummy
) > 0
631 || sscanf (buf
, " .data%c", &dummy
) > 0
632 || sscanf (buf
, " .ident %c", &dummy
) > 0)
634 else if (sscanf (buf
, " .end_amdgpu_metadata%c", &dummy
) > 0)
637 gcc_assert (regcount
.kernel_name
!= NULL
638 && regcount
.sgpr_count
>= 0
639 && regcount
.vgpr_count
>= 0);
640 obstack_grow (®counts_os
, ®count
, sizeof (regcount
));
642 regcount
.kernel_name
= NULL
;
643 regcount
.sgpr_count
= regcount
.vgpr_count
= -1;
646 if (state
== IN_CODE
|| state
== IN_METADATA
|| state
== IN_VARS
)
650 char **fns
= XOBFINISH (&fns_os
, char **);
651 struct oaccdims
*dims
= XOBFINISH (&dims_os
, struct oaccdims
*);
652 struct regcount
*regcounts
= XOBFINISH (®counts_os
, struct regcount
*);
654 fprintf (cfile
, "#include <stdlib.h>\n");
655 fprintf (cfile
, "#include <stdint.h>\n");
656 fprintf (cfile
, "#include <stdbool.h>\n\n");
658 fprintf (cfile
, "static const int gcn_num_vars = %d;\n\n", var_count
);
659 fprintf (cfile
, "static const int gcn_num_ind_funcs = %d;\n\n", ind_fn_count
);
661 /* Dump out function idents. */
662 fprintf (cfile
, "static const struct hsa_kernel_description {\n"
663 " const char *name;\n"
664 " int oacc_dims[3];\n"
667 "} gcn_kernels[] = {\n ");
668 dim
.d
[0] = dim
.d
[1] = dim
.d
[2] = 0;
671 for (comma
= "", i
= 0; i
< fn_count
; comma
= ",\n ", i
++)
673 /* Find if we recorded dimensions for this function. */
674 int *d
= dim
.d
; /* Previously zeroed. */
677 for (int j
= 0; j
< dims_count
; j
++)
678 if (strcmp (fns
[i
], dims
[j
].name
) == 0)
683 for (int j
= 0; j
< regcount_count
; j
++)
684 if (strcmp (fns
[i
], regcounts
[j
].kernel_name
) == 0)
686 sgpr_count
= regcounts
[j
].sgpr_count
;
687 vgpr_count
= regcounts
[j
].vgpr_count
;
691 fprintf (cfile
, "%s{\"%s\", {%d, %d, %d}, %d, %d}", comma
,
692 fns
[i
], d
[0], d
[1], d
[2], sgpr_count
, vgpr_count
);
696 fprintf (cfile
, "\n};\n\n");
698 /* Set the stack size if the user configured a value. */
701 "static __attribute__((constructor))\n"
702 "void configure_stack_size (void)\n"
704 " const char *val = getenv (\"GCN_STACK_SIZE\");\n"
705 " if (!val || val[0] == '\\0')\n"
706 " setenv (\"GCN_STACK_SIZE\", \"%d\", true);\n"
710 obstack_free (&fns_os
, NULL
);
711 for (i
= 0; i
< dims_count
; i
++)
713 for (i
= 0; i
< regcount_count
; i
++)
714 free (regcounts
[i
].kernel_name
);
715 obstack_free (&dims_os
, NULL
);
716 obstack_free (®counts_os
, NULL
);
719 /* Embed an object file into a C source file. */
722 process_obj (FILE *in
, FILE *cfile
, uint32_t omp_requires
)
725 const char *input
= read_file (in
, &len
);
727 /* Dump out an array containing the binary.
728 FIXME: do this with objcopy. */
729 fprintf (cfile
, "static unsigned char gcn_code[] = {");
730 for (size_t i
= 0; i
< len
; i
+= 17)
732 fprintf (cfile
, "\n\t");
733 for (size_t j
= i
; j
< i
+ 17 && j
< len
; j
++)
734 fprintf (cfile
, "%3u,", (unsigned char) input
[j
]);
736 fprintf (cfile
, "\n};\n\n");
739 "static const struct gcn_image {\n"
749 "static const struct gcn_data {\n"
750 " uintptr_t omp_requires_mask;\n"
751 " const struct gcn_image *gcn_image;\n"
752 " unsigned kernel_count;\n"
753 " const struct hsa_kernel_description *kernel_infos;\n"
754 " unsigned ind_func_count;\n"
755 " unsigned global_variable_count;\n"
759 " sizeof (gcn_kernels) / sizeof (gcn_kernels[0]),\n"
761 " gcn_num_ind_funcs,\n"
763 "};\n\n", omp_requires
);
766 "#ifdef __cplusplus\n"
769 "extern void GOMP_offload_register_ver"
770 " (unsigned, const void *, int, const void *);\n"
771 "extern void GOMP_offload_unregister_ver"
772 " (unsigned, const void *, int, const void *);\n"
773 "#ifdef __cplusplus\n"
777 fprintf (cfile
, "extern const void *const __OFFLOAD_TABLE__[];\n\n");
779 fprintf (cfile
, "static __attribute__((constructor)) void init (void)\n"
781 " GOMP_offload_register_ver (%#x, __OFFLOAD_TABLE__,"
782 " %d/*GCN*/, &gcn_data);\n"
784 GOMP_VERSION_PACK (GOMP_VERSION
, GOMP_VERSION_GCN
),
787 fprintf (cfile
, "static __attribute__((destructor)) void fini (void)\n"
789 " GOMP_offload_unregister_ver (%#x, __OFFLOAD_TABLE__,"
790 " %d/*GCN*/, &gcn_data);\n"
792 GOMP_VERSION_PACK (GOMP_VERSION
, GOMP_VERSION_GCN
),
796 /* Compile a C file using the host compiler. */
799 compile_native (const char *infile
, const char *outfile
, const char *compiler
,
800 bool fPIC
, bool fpic
)
802 const char *collect_gcc_options
= getenv ("COLLECT_GCC_OPTIONS");
803 if (!collect_gcc_options
)
804 fatal_error (input_location
,
805 "environment variable %<COLLECT_GCC_OPTIONS%> must be set");
807 struct obstack argv_obstack
;
808 obstack_init (&argv_obstack
);
809 obstack_ptr_grow (&argv_obstack
, compiler
);
811 obstack_ptr_grow (&argv_obstack
, "-fPIC");
813 obstack_ptr_grow (&argv_obstack
, "-fpic");
815 obstack_ptr_grow (&argv_obstack
, "-save-temps");
817 obstack_ptr_grow (&argv_obstack
, "-v");
818 obstack_ptr_grow (&argv_obstack
, "-dumpdir");
819 obstack_ptr_grow (&argv_obstack
, "");
820 obstack_ptr_grow (&argv_obstack
, "-dumpbase");
821 obstack_ptr_grow (&argv_obstack
, gcn_dumpbase
);
822 obstack_ptr_grow (&argv_obstack
, "-dumpbase-ext");
823 obstack_ptr_grow (&argv_obstack
, ".c");
826 case OFFLOAD_ABI_LP64
:
827 obstack_ptr_grow (&argv_obstack
, "-m64");
829 case OFFLOAD_ABI_ILP32
:
830 obstack_ptr_grow (&argv_obstack
, "-m32");
835 obstack_ptr_grow (&argv_obstack
, infile
);
836 obstack_ptr_grow (&argv_obstack
, "-c");
837 obstack_ptr_grow (&argv_obstack
, "-o");
838 obstack_ptr_grow (&argv_obstack
, outfile
);
839 obstack_ptr_grow (&argv_obstack
, NULL
);
841 const char **new_argv
= XOBFINISH (&argv_obstack
, const char **);
842 fork_execute (new_argv
[0], CONST_CAST (char **, new_argv
), true,
844 obstack_free (&argv_obstack
, NULL
);
848 main (int argc
, char **argv
)
852 FILE *cfile
= stdout
;
853 const char *outname
= 0;
855 progname
= tool_name
;
856 diagnostic_initialize (global_dc
, 0);
858 obstack_init (&files_to_cleanup
);
859 if (atexit (mkoffload_cleanup
) != 0)
860 fatal_error (input_location
, "%<atexit%> failed");
862 char *collect_gcc
= getenv ("COLLECT_GCC");
863 if (collect_gcc
== NULL
)
864 fatal_error (input_location
, "%<COLLECT_GCC%> must be set");
865 const char *gcc_path
= dirname (ASTRDUP (collect_gcc
));
866 const char *gcc_exec
= basename (ASTRDUP (collect_gcc
));
868 size_t len
= (strlen (gcc_path
) + 1 + strlen (GCC_INSTALL_NAME
) + 1);
869 char *driver
= XALLOCAVEC (char, len
);
871 if (strcmp (gcc_exec
, collect_gcc
) == 0)
872 /* collect_gcc has no path, so it was found in PATH. Make sure we also
873 find accel-gcc in PATH. */
877 if (gcc_path
!= NULL
)
878 driver_used
= sprintf (driver
, "%s/", gcc_path
);
879 sprintf (driver
+ driver_used
, "%s", GCC_INSTALL_NAME
);
882 if (gcc_path
== NULL
)
884 else if (access_check (driver
, X_OK
) == 0)
888 /* Don't use alloca pointer with XRESIZEVEC. */
890 /* Look in all COMPILER_PATHs for GCC_INSTALL_NAME. */
893 n_paths
= parse_env_var (getenv ("COMPILER_PATH"), &paths
);
894 for (unsigned i
= 0; i
< n_paths
; i
++)
896 len
= strlen (paths
[i
]) + 1 + strlen (GCC_INSTALL_NAME
) + 1;
897 driver
= XRESIZEVEC (char, driver
, len
);
898 sprintf (driver
, "%s/%s", paths
[i
], GCC_INSTALL_NAME
);
899 if (access_check (driver
, X_OK
) == 0)
905 free_array_of_ptrs ((void **) paths
, n_paths
);
909 fatal_error (input_location
,
910 "offload compiler %qs not found", GCC_INSTALL_NAME
);
912 /* We may be called with all the arguments stored in some file and
913 passed with @file. Expand them into argv before processing. */
914 expandargv (&argc
, &argv
);
916 /* Scan the argument vector. */
917 bool fopenmp
= false;
918 bool fopenacc
= false;
921 for (int i
= 1; i
< argc
; i
++)
923 #define STR "-foffload-abi="
924 if (startswith (argv
[i
], STR
))
926 if (strcmp (argv
[i
] + strlen (STR
), "lp64") == 0)
927 offload_abi
= OFFLOAD_ABI_LP64
;
928 else if (strcmp (argv
[i
] + strlen (STR
), "ilp32") == 0)
929 offload_abi
= OFFLOAD_ABI_ILP32
;
931 fatal_error (input_location
,
932 "unrecognizable argument of option %<" STR
"%>");
935 else if (strcmp (argv
[i
], "-fopenmp") == 0)
937 else if (strcmp (argv
[i
], "-fopenacc") == 0)
939 else if (strcmp (argv
[i
], "-fPIC") == 0)
941 else if (strcmp (argv
[i
], "-fpic") == 0)
943 else if (strcmp (argv
[i
], "-mxnack=on") == 0)
944 SET_XNACK_ON (elf_flags
);
945 else if (strcmp (argv
[i
], "-mxnack=any") == 0)
946 SET_XNACK_ANY (elf_flags
);
947 else if (strcmp (argv
[i
], "-mxnack=off") == 0)
948 SET_XNACK_OFF (elf_flags
);
949 else if (strcmp (argv
[i
], "-msram-ecc=on") == 0)
950 SET_SRAM_ECC_ON (elf_flags
);
951 else if (strcmp (argv
[i
], "-msram-ecc=any") == 0)
952 SET_SRAM_ECC_ANY (elf_flags
);
953 else if (strcmp (argv
[i
], "-msram-ecc=off") == 0)
954 SET_SRAM_ECC_OFF (elf_flags
);
955 else if (strcmp (argv
[i
], "-save-temps") == 0)
957 else if (strcmp (argv
[i
], "-v") == 0)
959 else if (strcmp (argv
[i
], "-dumpbase") == 0
962 else if (strcmp (argv
[i
], "-march=fiji") == 0)
963 elf_arch
= EF_AMDGPU_MACH_AMDGCN_GFX803
;
964 else if (strcmp (argv
[i
], "-march=gfx900") == 0)
965 elf_arch
= EF_AMDGPU_MACH_AMDGCN_GFX900
;
966 else if (strcmp (argv
[i
], "-march=gfx906") == 0)
967 elf_arch
= EF_AMDGPU_MACH_AMDGCN_GFX906
;
968 else if (strcmp (argv
[i
], "-march=gfx908") == 0)
969 elf_arch
= EF_AMDGPU_MACH_AMDGCN_GFX908
;
970 else if (strcmp (argv
[i
], "-march=gfx90a") == 0)
971 elf_arch
= EF_AMDGPU_MACH_AMDGCN_GFX90a
;
972 else if (strcmp (argv
[i
], "-march=gfx1030") == 0)
973 elf_arch
= EF_AMDGPU_MACH_AMDGCN_GFX1030
;
974 else if (strcmp (argv
[i
], "-march=gfx1100") == 0)
975 elf_arch
= EF_AMDGPU_MACH_AMDGCN_GFX1100
;
976 else if (strcmp (argv
[i
], "-march=gfx1103") == 0)
977 elf_arch
= EF_AMDGPU_MACH_AMDGCN_GFX1103
;
978 #define STR "-mstack-size="
979 else if (startswith (argv
[i
], STR
))
980 gcn_stack_size
= atoi (argv
[i
] + strlen (STR
));
982 /* Translate host into offloading libraries. */
983 else if (strcmp (argv
[i
], "-l_GCC_gfortran") == 0
984 || strcmp (argv
[i
], "-l_GCC_m") == 0)
987 size_t i_dst
= strlen ("-l");
988 size_t i_src
= strlen ("-l_GCC_");
991 c
= argv
[i
][i_dst
++] = argv
[i
][i_src
++];
996 if (!(fopenacc
^ fopenmp
))
997 fatal_error (input_location
,
998 "either %<-fopenacc%> or %<-fopenmp%> must be set");
1001 switch (offload_abi
)
1003 case OFFLOAD_ABI_LP64
:
1006 case OFFLOAD_ABI_ILP32
:
1013 /* This must match gcn-hsa.h's settings for NO_XNACK, NO_SRAM_ECC
1017 case EF_AMDGPU_MACH_AMDGCN_GFX803
:
1018 case EF_AMDGPU_MACH_AMDGCN_GFX1030
:
1019 case EF_AMDGPU_MACH_AMDGCN_GFX1100
:
1020 case EF_AMDGPU_MACH_AMDGCN_GFX1103
:
1021 SET_XNACK_UNSET (elf_flags
);
1022 SET_SRAM_ECC_UNSET (elf_flags
);
1024 case EF_AMDGPU_MACH_AMDGCN_GFX900
:
1025 SET_XNACK_OFF (elf_flags
);
1026 SET_SRAM_ECC_UNSET (elf_flags
);
1028 case EF_AMDGPU_MACH_AMDGCN_GFX906
:
1029 SET_XNACK_OFF (elf_flags
);
1030 SET_SRAM_ECC_ANY (elf_flags
);
1032 case EF_AMDGPU_MACH_AMDGCN_GFX908
:
1033 SET_XNACK_OFF (elf_flags
);
1034 if (TEST_SRAM_ECC_UNSET (elf_flags
))
1035 SET_SRAM_ECC_ANY (elf_flags
);
1037 case EF_AMDGPU_MACH_AMDGCN_GFX90a
:
1038 if (TEST_XNACK_UNSET (elf_flags
))
1039 SET_XNACK_ANY (elf_flags
);
1040 if (TEST_SRAM_ECC_UNSET (elf_flags
))
1041 SET_SRAM_ECC_ANY (elf_flags
);
1044 fatal_error (input_location
, "unhandled architecture");
1047 /* Build arguments for compiler pass. */
1048 struct obstack cc_argv_obstack
;
1049 obstack_init (&cc_argv_obstack
);
1050 obstack_ptr_grow (&cc_argv_obstack
, driver
);
1051 obstack_ptr_grow (&cc_argv_obstack
, "-S");
1054 obstack_ptr_grow (&cc_argv_obstack
, "-save-temps");
1056 obstack_ptr_grow (&cc_argv_obstack
, "-v");
1057 obstack_ptr_grow (&cc_argv_obstack
, abi
);
1058 obstack_ptr_grow (&cc_argv_obstack
, "-xlto");
1060 obstack_ptr_grow (&cc_argv_obstack
, "-mgomp");
1062 for (int ix
= 1; ix
!= argc
; ix
++)
1064 if (!strcmp (argv
[ix
], "-o") && ix
+ 1 != argc
)
1065 outname
= argv
[++ix
];
1067 obstack_ptr_grow (&cc_argv_obstack
, argv
[ix
]);
1073 gcn_dumpbase
= concat (dumppfx
, ".c", NULL
);
1075 const char *gcn_cfile_name
;
1077 gcn_cfile_name
= gcn_dumpbase
;
1079 gcn_cfile_name
= make_temp_file (".c");
1080 obstack_ptr_grow (&files_to_cleanup
, gcn_cfile_name
);
1082 cfile
= fopen (gcn_cfile_name
, "w");
1084 fatal_error (input_location
, "cannot open %qs", gcn_cfile_name
);
1086 /* Currently, we only support offloading in 64-bit configurations. */
1087 if (offload_abi
== OFFLOAD_ABI_LP64
)
1089 const char *mko_dumpbase
= concat (dumppfx
, ".mkoffload", NULL
);
1090 const char *hsaco_dumpbase
= concat (dumppfx
, ".mkoffload.hsaco", NULL
);
1092 const char *gcn_s1_name
;
1093 const char *gcn_s2_name
;
1094 const char *gcn_o_name
;
1097 gcn_s1_name
= concat (mko_dumpbase
, ".1.s", NULL
);
1098 gcn_s2_name
= concat (mko_dumpbase
, ".2.s", NULL
);
1099 gcn_o_name
= hsaco_dumpbase
;
1103 gcn_s1_name
= make_temp_file (".mkoffload.1.s");
1104 gcn_s2_name
= make_temp_file (".mkoffload.2.s");
1105 gcn_o_name
= make_temp_file (".mkoffload.hsaco");
1107 obstack_ptr_grow (&files_to_cleanup
, gcn_s1_name
);
1108 obstack_ptr_grow (&files_to_cleanup
, gcn_s2_name
);
1109 obstack_ptr_grow (&files_to_cleanup
, gcn_o_name
);
1111 obstack_ptr_grow (&cc_argv_obstack
, "-dumpdir");
1112 obstack_ptr_grow (&cc_argv_obstack
, "");
1113 obstack_ptr_grow (&cc_argv_obstack
, "-dumpbase");
1114 obstack_ptr_grow (&cc_argv_obstack
, mko_dumpbase
);
1115 obstack_ptr_grow (&cc_argv_obstack
, "-dumpbase-ext");
1116 obstack_ptr_grow (&cc_argv_obstack
, "");
1118 obstack_ptr_grow (&cc_argv_obstack
, "-o");
1119 obstack_ptr_grow (&cc_argv_obstack
, gcn_s1_name
);
1120 obstack_ptr_grow (&cc_argv_obstack
, NULL
);
1121 const char **cc_argv
= XOBFINISH (&cc_argv_obstack
, const char **);
1123 /* Build arguments for assemble/link pass. */
1124 struct obstack ld_argv_obstack
;
1125 obstack_init (&ld_argv_obstack
);
1126 obstack_ptr_grow (&ld_argv_obstack
, driver
);
1128 /* Extract early-debug information from the input objects.
1129 This loop finds all the inputs that end ".o" and aren't the output. */
1131 for (int ix
= 1; ix
!= argc
; ix
++)
1133 if (!strcmp (argv
[ix
], "-o") && ix
+ 1 != argc
)
1137 if (strcmp (argv
[ix
] + strlen(argv
[ix
]) - 2, ".o") == 0)
1143 sprintf (buf
, "%d", dbgcount
++);
1144 dbgobj
= concat (dumppfx
, ".mkoffload.dbg", buf
, ".o", NULL
);
1147 dbgobj
= make_temp_file (".mkoffload.dbg.o");
1149 /* If the copy fails then just ignore it. */
1150 if (copy_early_debug_info (argv
[ix
], dbgobj
))
1152 obstack_ptr_grow (&ld_argv_obstack
, dbgobj
);
1153 obstack_ptr_grow (&files_to_cleanup
, dbgobj
);
1157 maybe_unlink (dbgobj
);
1163 obstack_ptr_grow (&ld_argv_obstack
, gcn_s2_name
);
1164 obstack_ptr_grow (&ld_argv_obstack
, "-lgomp");
1165 if (!TEST_XNACK_UNSET (elf_flags
))
1166 obstack_ptr_grow (&ld_argv_obstack
,
1167 (TEST_XNACK_ON (elf_flags
) ? "-mxnack=on"
1168 : TEST_XNACK_ANY (elf_flags
) ? "-mxnack=any"
1170 if (!TEST_SRAM_ECC_UNSET (elf_flags
))
1171 obstack_ptr_grow (&ld_argv_obstack
,
1172 (TEST_SRAM_ECC_ON (elf_flags
) ? "-msram-ecc=on"
1173 : TEST_SRAM_ECC_ANY (elf_flags
) ? "-msram-ecc=any"
1174 : "-msram-ecc=off"));
1176 obstack_ptr_grow (&ld_argv_obstack
, "-v");
1179 obstack_ptr_grow (&ld_argv_obstack
, "-save-temps");
1181 for (int i
= 1; i
< argc
; i
++)
1182 if (startswith (argv
[i
], "-l")
1183 || startswith (argv
[i
], "-Wl")
1184 || startswith (argv
[i
], "-march"))
1185 obstack_ptr_grow (&ld_argv_obstack
, argv
[i
]);
1187 obstack_ptr_grow (&cc_argv_obstack
, "-dumpdir");
1188 obstack_ptr_grow (&cc_argv_obstack
, "");
1189 obstack_ptr_grow (&cc_argv_obstack
, "-dumpbase");
1190 obstack_ptr_grow (&cc_argv_obstack
, hsaco_dumpbase
);
1191 obstack_ptr_grow (&cc_argv_obstack
, "-dumpbase-ext");
1192 obstack_ptr_grow (&cc_argv_obstack
, "");
1194 obstack_ptr_grow (&ld_argv_obstack
, "-o");
1195 obstack_ptr_grow (&ld_argv_obstack
, gcn_o_name
);
1196 obstack_ptr_grow (&ld_argv_obstack
, NULL
);
1197 const char **ld_argv
= XOBFINISH (&ld_argv_obstack
, const char **);
1199 /* Clean up unhelpful environment variables. */
1200 char *execpath
= getenv ("GCC_EXEC_PREFIX");
1201 char *cpath
= getenv ("COMPILER_PATH");
1202 char *lpath
= getenv ("LIBRARY_PATH");
1203 unsetenv ("GCC_EXEC_PREFIX");
1204 unsetenv ("COMPILER_PATH");
1205 unsetenv ("LIBRARY_PATH");
1207 char *omp_requires_file
;
1209 omp_requires_file
= concat (dumppfx
, ".mkoffload.omp_requires", NULL
);
1211 omp_requires_file
= make_temp_file (".mkoffload.omp_requires");
1212 obstack_ptr_grow (&files_to_cleanup
, omp_requires_file
);
1214 /* Run the compiler pass. */
1215 xputenv (concat ("GCC_OFFLOAD_OMP_REQUIRES_FILE=", omp_requires_file
, NULL
));
1216 fork_execute (cc_argv
[0], CONST_CAST (char **, cc_argv
), true, ".gcc_args");
1217 obstack_free (&cc_argv_obstack
, NULL
);
1218 unsetenv("GCC_OFFLOAD_OMP_REQUIRES_FILE");
1220 in
= fopen (omp_requires_file
, "rb");
1222 fatal_error (input_location
, "cannot open omp_requires file %qs",
1224 uint32_t omp_requires
;
1225 if (fread (&omp_requires
, sizeof (omp_requires
), 1, in
) != 1)
1226 fatal_error (input_location
, "cannot read omp_requires file %qs",
1230 in
= fopen (gcn_s1_name
, "r");
1232 fatal_error (input_location
, "cannot open intermediate gcn asm file");
1234 out
= fopen (gcn_s2_name
, "w");
1236 fatal_error (input_location
, "cannot open %qs", gcn_s2_name
);
1238 process_asm (in
, out
, cfile
);
1243 /* Run the assemble/link pass. */
1244 fork_execute (ld_argv
[0], CONST_CAST (char **, ld_argv
), true, ".ld_args");
1245 obstack_free (&ld_argv_obstack
, NULL
);
1247 in
= fopen (gcn_o_name
, "r");
1249 fatal_error (input_location
, "cannot open intermediate gcn obj file");
1251 process_obj (in
, cfile
, omp_requires
);
1255 xputenv (concat ("GCC_EXEC_PREFIX=", execpath
, NULL
));
1256 xputenv (concat ("COMPILER_PATH=", cpath
, NULL
));
1257 xputenv (concat ("LIBRARY_PATH=", lpath
, NULL
));
1262 compile_native (gcn_cfile_name
, outname
, collect_gcc
, fPIC
, fpic
);