1 /* Offload image generation tool for AMD GCN.
3 Copyright (C) 2014-2024 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* Munges GCN assembly into a C source file defining the GCN code as a
24 This is not a complete assembler. We presume the source is well
25 formed from the compiler and can die horribly if it is not. */
29 #include "coretypes.h"
31 #include "diagnostic.h"
34 #include "collect-utils.h"
35 #include "gomp-constants.h"
36 #include "simple-object.h"
39 /* These probably won't (all) be in elf.h for a while. */
41 #define EM_AMDGPU 0xe0;
43 #undef ELFOSABI_AMDGPU_HSA
44 #define ELFOSABI_AMDGPU_HSA 64
45 #undef ELFABIVERSION_AMDGPU_HSA_V3
46 #define ELFABIVERSION_AMDGPU_HSA_V3 1
47 #undef ELFABIVERSION_AMDGPU_HSA_V4
48 #define ELFABIVERSION_AMDGPU_HSA_V4 2
50 #undef EF_AMDGPU_MACH_AMDGCN_GFX803
51 #define EF_AMDGPU_MACH_AMDGCN_GFX803 0x2a
52 #undef EF_AMDGPU_MACH_AMDGCN_GFX900
53 #define EF_AMDGPU_MACH_AMDGCN_GFX900 0x2c
54 #undef EF_AMDGPU_MACH_AMDGCN_GFX906
55 #define EF_AMDGPU_MACH_AMDGCN_GFX906 0x2f
56 #undef EF_AMDGPU_MACH_AMDGCN_GFX908
57 #define EF_AMDGPU_MACH_AMDGCN_GFX908 0x30
58 #undef EF_AMDGPU_MACH_AMDGCN_GFX90a
59 #define EF_AMDGPU_MACH_AMDGCN_GFX90a 0x3f
60 #undef EF_AMDGPU_MACH_AMDGCN_GFX1030
61 #define EF_AMDGPU_MACH_AMDGCN_GFX1030 0x36
63 #define EF_AMDGPU_FEATURE_XNACK_V4 0x300 /* Mask. */
64 #define EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4 0x000
65 #define EF_AMDGPU_FEATURE_XNACK_ANY_V4 0x100
66 #define EF_AMDGPU_FEATURE_XNACK_OFF_V4 0x200
67 #define EF_AMDGPU_FEATURE_XNACK_ON_V4 0x300
69 #define EF_AMDGPU_FEATURE_SRAMECC_V4 0xc00 /* Mask. */
70 #define EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4 0x000
71 #define EF_AMDGPU_FEATURE_SRAMECC_ANY_V4 0x400
72 #define EF_AMDGPU_FEATURE_SRAMECC_OFF_V4 0x800
73 #define EF_AMDGPU_FEATURE_SRAMECC_ON_V4 0xc00
75 #define SET_XNACK_ON(VAR) VAR = ((VAR & ~EF_AMDGPU_FEATURE_XNACK_V4) \
76 | EF_AMDGPU_FEATURE_XNACK_ON_V4)
77 #define SET_XNACK_ANY(VAR) VAR = ((VAR & ~EF_AMDGPU_FEATURE_XNACK_V4) \
78 | EF_AMDGPU_FEATURE_XNACK_ANY_V4)
79 #define SET_XNACK_OFF(VAR) VAR = ((VAR & ~EF_AMDGPU_FEATURE_XNACK_V4) \
80 | EF_AMDGPU_FEATURE_XNACK_OFF_V4)
81 #define TEST_XNACK_ANY(VAR) ((VAR & EF_AMDGPU_FEATURE_XNACK_V4) \
82 == EF_AMDGPU_FEATURE_XNACK_ANY_V4)
83 #define TEST_XNACK_ON(VAR) ((VAR & EF_AMDGPU_FEATURE_XNACK_V4) \
84 == EF_AMDGPU_FEATURE_XNACK_ON_V4)
85 #define TEST_XNACK_OFF(VAR) ((VAR & EF_AMDGPU_FEATURE_XNACK_V4) \
86 == EF_AMDGPU_FEATURE_XNACK_OFF_V4)
88 #define SET_SRAM_ECC_ON(VAR) VAR = ((VAR & ~EF_AMDGPU_FEATURE_SRAMECC_V4) \
89 | EF_AMDGPU_FEATURE_SRAMECC_ON_V4)
90 #define SET_SRAM_ECC_ANY(VAR) VAR = ((VAR & ~EF_AMDGPU_FEATURE_SRAMECC_V4) \
91 | EF_AMDGPU_FEATURE_SRAMECC_ANY_V4)
92 #define SET_SRAM_ECC_OFF(VAR) VAR = ((VAR & ~EF_AMDGPU_FEATURE_SRAMECC_V4) \
93 | EF_AMDGPU_FEATURE_SRAMECC_OFF_V4)
94 #define SET_SRAM_ECC_UNSUPPORTED(VAR) \
95 VAR = ((VAR & ~EF_AMDGPU_FEATURE_SRAMECC_V4) \
96 | EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4)
97 #define TEST_SRAM_ECC_ANY(VAR) ((VAR & EF_AMDGPU_FEATURE_SRAMECC_V4) \
98 == EF_AMDGPU_FEATURE_SRAMECC_ANY_V4)
99 #define TEST_SRAM_ECC_ON(VAR) ((VAR & EF_AMDGPU_FEATURE_SRAMECC_V4) \
100 == EF_AMDGPU_FEATURE_SRAMECC_ON_V4)
102 #ifndef R_AMDGPU_NONE
103 #define R_AMDGPU_NONE 0
104 #define R_AMDGPU_ABS32_LO 1 /* (S + A) & 0xFFFFFFFF */
105 #define R_AMDGPU_ABS32_HI 2 /* (S + A) >> 32 */
106 #define R_AMDGPU_ABS64 3 /* S + A */
107 #define R_AMDGPU_REL32 4 /* S + A - P */
108 #define R_AMDGPU_REL64 5 /* S + A - P */
109 #define R_AMDGPU_ABS32 6 /* S + A */
110 #define R_AMDGPU_GOTPCREL 7 /* G + GOT + A - P */
111 #define R_AMDGPU_GOTPCREL32_LO 8 /* (G + GOT + A - P) & 0xFFFFFFFF */
112 #define R_AMDGPU_GOTPCREL32_HI 9 /* (G + GOT + A - P) >> 32 */
113 #define R_AMDGPU_REL32_LO 10 /* (S + A - P) & 0xFFFFFFFF */
114 #define R_AMDGPU_REL32_HI 11 /* (S + A - P) >> 32 */
115 #define R_AMDGPU_RELATIVE64 13 /* B + A */
118 const char tool_name
[] = "gcn mkoffload";
120 static const char *gcn_dumpbase
;
121 static struct obstack files_to_cleanup
;
123 enum offload_abi offload_abi
= OFFLOAD_ABI_UNSET
;
124 uint32_t elf_arch
= EF_AMDGPU_MACH_AMDGCN_GFX803
; // Default GPU architecture.
126 (EF_AMDGPU_FEATURE_XNACK_ANY_V4
| EF_AMDGPU_FEATURE_SRAMECC_ANY_V4
);
128 static int gcn_stack_size
= 0; /* Zero means use default. */
130 /* Delete tempfiles. */
133 tool_cleanup (bool from_signal ATTRIBUTE_UNUSED
)
135 obstack_ptr_grow (&files_to_cleanup
, NULL
);
136 const char **files
= XOBFINISH (&files_to_cleanup
, const char **);
137 for (int i
= 0; files
[i
]; i
++)
138 maybe_unlink (files
[i
]);
142 mkoffload_cleanup (void)
144 tool_cleanup (false);
147 /* Unlink FILE unless requested otherwise. */
150 maybe_unlink (const char *file
)
154 if (unlink_if_ordinary (file
) && errno
!= ENOENT
)
155 fatal_error (input_location
, "deleting file %s: %m", file
);
158 fprintf (stderr
, "[Leaving %s]\n", file
);
161 /* Add or change the value of an environment variable, outputting the
162 change to standard error if in verbose mode. */
165 xputenv (const char *string
)
168 fprintf (stderr
, "%s\n", string
);
169 putenv (CONST_CAST (char *, string
));
172 /* Read the whole input file. It will be NUL terminated (but
173 remember, there could be a NUL in the file itself. */
176 read_file (FILE *stream
, size_t *plen
)
178 size_t alloc
= 16384;
182 if (!fseek (stream
, 0, SEEK_END
))
184 /* Get the file size. */
185 long s
= ftell (stream
);
188 fseek (stream
, 0, SEEK_SET
);
190 buffer
= XNEWVEC (char, alloc
);
194 size_t n
= fread (buffer
+ base
, 1, alloc
- base
- 1, stream
);
199 if (base
+ 1 == alloc
)
202 buffer
= XRESIZEVEC (char, buffer
, alloc
);
210 /* Parse STR, saving found tokens into PVALUES and return their number.
211 Tokens are assumed to be delimited by ':'. */
214 parse_env_var (const char *str
, char ***pvalues
)
216 const char *curval
, *nextval
;
220 curval
= strchr (str
, ':');
224 curval
= strchr (curval
+ 1, ':');
227 values
= (char **) xmalloc (num
* sizeof (char *));
229 nextval
= strchr (curval
, ':');
231 nextval
= strchr (curval
, '\0');
233 for (i
= 0; i
< num
; i
++)
235 int l
= nextval
- curval
;
236 values
[i
] = (char *) xmalloc (l
+ 1);
237 memcpy (values
[i
], curval
, l
);
239 curval
= nextval
+ 1;
240 nextval
= strchr (curval
, ':');
242 nextval
= strchr (curval
, '\0');
248 /* Auxiliary function that frees elements of PTR and PTR itself.
249 N is number of elements to be freed. If PTR is NULL, nothing is freed.
250 If an element is NULL, subsequent elements are not freed. */
253 free_array_of_ptrs (void **ptr
, unsigned n
)
258 for (i
= 0; i
< n
; i
++)
268 /* Check whether NAME can be accessed in MODE. This is like access,
269 except that it never considers directories to be executable. */
272 access_check (const char *name
, int mode
)
278 if (stat (name
, &st
) < 0 || S_ISDIR (st
.st_mode
))
282 return access (name
, mode
);
285 /* Copy the early-debug-info from the incoming LTO object to a new object
286 that will be linked into the output HSACO file. The host relocations
287 must be translated into GCN relocations, and any global undefined symbols
288 must be weakened (so as not to have the debug info try to pull in host
291 Returns true if the file was created, false otherwise. */
294 copy_early_debug_info (const char *infile
, const char *outfile
)
299 /* The simple_object code can handle extracting the debug sections.
300 This code is based on that in lto-wrapper.cc. */
301 int infd
= open (infile
, O_RDONLY
| O_BINARY
);
304 simple_object_read
*inobj
= simple_object_start_read (infd
, 0,
311 if (simple_object_find_section (inobj
, ".gnu.debuglto_.debug_info",
312 &off
, &len
, &errmsg
, &err
) != 1)
314 simple_object_release_read (inobj
);
319 errmsg
= simple_object_copy_lto_debug_sections (inobj
, outfile
, &err
, true);
322 unlink_if_ordinary (outfile
);
326 simple_object_release_read (inobj
);
329 /* Open the file we just created for some adjustments.
330 The simple_object code can't do this, so we do it manually. */
331 FILE *outfd
= fopen (outfile
, "r+b");
336 if (fread (&ehdr
, sizeof (ehdr
), 1, outfd
) != 1)
342 /* We only support host relocations of x86_64, for now. */
343 gcc_assert (ehdr
.e_machine
== EM_X86_64
);
345 /* Fiji devices use HSACOv3 regardless of the assembler. */
346 uint32_t elf_flags_actual
= (elf_arch
== EF_AMDGPU_MACH_AMDGCN_GFX803
348 /* GFX900 devices don't support the sramecc attribute even if
349 a buggy assembler thinks it does. This must match gcn-hsa.h */
350 if (elf_arch
== EF_AMDGPU_MACH_AMDGCN_GFX900
)
351 SET_SRAM_ECC_UNSUPPORTED (elf_flags_actual
);
353 /* Patch the correct elf architecture flag into the file. */
354 ehdr
.e_ident
[7] = ELFOSABI_AMDGPU_HSA
;
355 ehdr
.e_ident
[8] = (elf_arch
== EF_AMDGPU_MACH_AMDGCN_GFX803
356 ? ELFABIVERSION_AMDGPU_HSA_V3
357 : ELFABIVERSION_AMDGPU_HSA_V4
);
358 ehdr
.e_type
= ET_REL
;
359 ehdr
.e_machine
= EM_AMDGPU
;
360 ehdr
.e_flags
= elf_arch
| elf_flags_actual
;
362 /* Load the section headers so we can walk them later. */
363 Elf64_Shdr
*sections
= (Elf64_Shdr
*)xmalloc (sizeof (Elf64_Shdr
)
365 if (fseek (outfd
, ehdr
.e_shoff
, SEEK_SET
) == -1
366 || fread (sections
, sizeof (Elf64_Shdr
), ehdr
.e_shnum
,
367 outfd
) != ehdr
.e_shnum
)
374 /* Convert the host relocations to target relocations. */
375 for (int i
= 0; i
< ehdr
.e_shnum
; i
++)
377 if (sections
[i
].sh_type
!= SHT_RELA
)
380 char *data
= (char *)xmalloc (sections
[i
].sh_size
);
381 if (fseek (outfd
, sections
[i
].sh_offset
, SEEK_SET
) == -1
382 || fread (data
, sections
[i
].sh_size
, 1, outfd
) != 1)
388 for (size_t offset
= 0;
389 offset
< sections
[i
].sh_size
;
390 offset
+= sections
[i
].sh_entsize
)
392 Elf64_Rela
*reloc
= (Elf64_Rela
*) (data
+ offset
);
394 /* Map the host relocations to GCN relocations.
395 Only relocations that can appear in DWARF need be handled. */
396 switch (ELF64_R_TYPE (reloc
->r_info
))
400 reloc
->r_info
= ELF32_R_INFO(ELF32_R_SYM(reloc
->r_info
),
404 reloc
->r_info
= ELF32_R_INFO(ELF32_R_SYM(reloc
->r_info
),
408 reloc
->r_info
= ELF32_R_INFO(ELF32_R_SYM(reloc
->r_info
),
412 reloc
->r_info
= ELF32_R_INFO(ELF32_R_SYM(reloc
->r_info
),
415 case R_X86_64_RELATIVE
:
416 reloc
->r_info
= ELF32_R_INFO(ELF32_R_SYM(reloc
->r_info
),
417 R_AMDGPU_RELATIVE64
);
424 /* Write back our relocation changes. */
425 if (fseek (outfd
, sections
[i
].sh_offset
, SEEK_SET
) != -1)
426 fwrite (data
, sections
[i
].sh_size
, 1, outfd
);
431 /* Weaken any global undefined symbols that would pull in unwanted
433 for (int i
= 0; i
< ehdr
.e_shnum
; i
++)
435 if (sections
[i
].sh_type
!= SHT_SYMTAB
)
438 char *data
= (char *)xmalloc (sections
[i
].sh_size
);
439 if (fseek (outfd
, sections
[i
].sh_offset
, SEEK_SET
) == -1
440 || fread (data
, sections
[i
].sh_size
, 1, outfd
) != 1)
446 for (size_t offset
= 0;
447 offset
< sections
[i
].sh_size
;
448 offset
+= sections
[i
].sh_entsize
)
450 Elf64_Sym
*sym
= (Elf64_Sym
*) (data
+ offset
);
451 int type
= ELF64_ST_TYPE (sym
->st_info
);
452 int bind
= ELF64_ST_BIND (sym
->st_info
);
454 if (bind
== STB_GLOBAL
&& sym
->st_shndx
== 0)
455 sym
->st_info
= ELF64_ST_INFO (STB_WEAK
, type
);
458 /* Write back our symbol changes. */
459 if (fseek (outfd
, sections
[i
].sh_offset
, SEEK_SET
) != -1)
460 fwrite (data
, sections
[i
].sh_size
, 1, outfd
);
466 /* Write back our header changes. */
468 fwrite (&ehdr
, sizeof (ehdr
), 1, outfd
);
474 /* CDNA2 devices have twice as many VGPRs compared to older devices,
475 but the AVGPRS are allocated from the same pool. */
478 isa_has_combined_avgprs (int isa
)
482 case EF_AMDGPU_MACH_AMDGCN_GFX803
:
483 case EF_AMDGPU_MACH_AMDGCN_GFX900
:
484 case EF_AMDGPU_MACH_AMDGCN_GFX906
:
485 case EF_AMDGPU_MACH_AMDGCN_GFX908
:
486 case EF_AMDGPU_MACH_AMDGCN_GFX1030
:
488 case EF_AMDGPU_MACH_AMDGCN_GFX90a
:
491 fatal_error (input_location
, "unhandled ISA in isa_has_combined_avgprs");
494 /* Parse an input assembler file, extract the offload tables etc.,
495 and output (1) the assembler code, minus the tables (which can contain
496 problematic relocations), and (2) a C file with the offload tables
497 encoded as structured data. */
500 process_asm (FILE *in
, FILE *out
, FILE *cfile
)
502 int fn_count
= 0, var_count
= 0, ind_fn_count
= 0;
503 int dims_count
= 0, regcount_count
= 0;
504 struct obstack fns_os
, dims_os
, regcounts_os
;
505 obstack_init (&fns_os
);
506 obstack_init (&dims_os
);
507 obstack_init (®counts_os
);
521 } regcount
= { -1, -1, NULL
};
523 /* Always add _init_array and _fini_array as kernels. */
524 obstack_ptr_grow (&fns_os
, xstrdup ("_init_array"));
525 obstack_ptr_grow (&fns_os
, xstrdup ("_fini_array"));
536 while (fgets (buf
, sizeof (buf
), in
))
542 if (sscanf (buf
, " ;; OPENACC-DIMS: %d, %d, %d : %ms\n",
543 &dim
.d
[0], &dim
.d
[1], &dim
.d
[2], &dim
.name
) == 4)
545 obstack_grow (&dims_os
, &dim
, sizeof (dim
));
553 if (sscanf (buf
, " - .name: %ms\n", ®count
.kernel_name
) == 1)
555 else if (sscanf (buf
, " .sgpr_count: %d\n",
556 ®count
.sgpr_count
) == 1)
558 gcc_assert (regcount
.kernel_name
);
561 else if (sscanf (buf
, " .vgpr_count: %d\n",
562 ®count
.vgpr_count
) == 1)
564 gcc_assert (regcount
.kernel_name
);
567 else if (sscanf (buf
, " .agpr_count: %d\n",
568 ®count
.avgpr_count
) == 1)
570 gcc_assert (regcount
.kernel_name
);
580 if (sscanf (buf
, " .8byte %ms\n", &varname
))
583 fgets (buf
, sizeof (buf
), in
);
584 if (!sscanf (buf
, " .8byte %u\n", &varsize
))
593 if (sscanf (buf
, "\t.8byte\t%ms\n", &funcname
))
596 obstack_ptr_grow (&fns_os
, funcname
);
605 if (sscanf (buf
, "\t.8byte\t%ms\n", &funcname
))
616 if (sscanf (buf
, " .section .gnu.offload_vars%c", &dummy
) > 0)
620 /* Add a global symbol to allow plugin-gcn.c to locate the table
621 at runtime. It can't use the "offload_var_table.N" emitted by
622 the compiler because a) they're not global, and b) there's one
623 for each input file combined into the binary. */
625 fputs ("\t.global .offload_var_table\n"
626 "\t.type .offload_var_table, @object\n"
627 ".offload_var_table:\n",
630 else if (sscanf (buf
, " .section .gnu.offload_funcs%c", &dummy
) > 0)
633 /* Likewise for .gnu.offload_vars; used for reverse offload. */
635 fputs ("\t.global .offload_func_table\n"
636 "\t.type .offload_func_table, @object\n"
637 ".offload_func_table:\n",
640 else if (sscanf (buf
, " .section .gnu.offload_ind_funcs%c", &dummy
) > 0)
642 state
= IN_IND_FUNCS
;
644 fputs ("\t.global .offload_ind_func_table\n"
645 "\t.type .offload_ind_func_table, @object\n"
646 ".offload_ind_func_table:\n",
649 else if (sscanf (buf
, " .amdgpu_metadata%c", &dummy
) > 0)
652 regcount
.kernel_name
= NULL
;
653 regcount
.sgpr_count
= regcount
.vgpr_count
= -1;
655 else if (sscanf (buf
, " .section %c", &dummy
) > 0
656 || sscanf (buf
, " .text%c", &dummy
) > 0
657 || sscanf (buf
, " .bss%c", &dummy
) > 0
658 || sscanf (buf
, " .data%c", &dummy
) > 0
659 || sscanf (buf
, " .ident %c", &dummy
) > 0)
661 else if (sscanf (buf
, " .end_amdgpu_metadata%c", &dummy
) > 0)
664 gcc_assert (regcount
.kernel_name
!= NULL
665 && regcount
.sgpr_count
>= 0
666 && regcount
.vgpr_count
>= 0);
667 obstack_grow (®counts_os
, ®count
, sizeof (regcount
));
669 regcount
.kernel_name
= NULL
;
670 regcount
.sgpr_count
= regcount
.vgpr_count
= -1;
673 if (state
== IN_CODE
|| state
== IN_METADATA
|| state
== IN_VARS
)
677 char **fns
= XOBFINISH (&fns_os
, char **);
678 struct oaccdims
*dims
= XOBFINISH (&dims_os
, struct oaccdims
*);
679 struct regcount
*regcounts
= XOBFINISH (®counts_os
, struct regcount
*);
681 fprintf (cfile
, "#include <stdlib.h>\n");
682 fprintf (cfile
, "#include <stdint.h>\n");
683 fprintf (cfile
, "#include <stdbool.h>\n\n");
685 fprintf (cfile
, "static const int gcn_num_vars = %d;\n\n", var_count
);
686 fprintf (cfile
, "static const int gcn_num_ind_funcs = %d;\n\n", ind_fn_count
);
688 /* Dump out function idents. */
689 fprintf (cfile
, "static const struct hsa_kernel_description {\n"
690 " const char *name;\n"
691 " int oacc_dims[3];\n"
694 "} gcn_kernels[] = {\n ");
695 dim
.d
[0] = dim
.d
[1] = dim
.d
[2] = 0;
698 for (comma
= "", i
= 0; i
< fn_count
; comma
= ",\n ", i
++)
700 /* Find if we recorded dimensions for this function. */
701 int *d
= dim
.d
; /* Previously zeroed. */
704 for (int j
= 0; j
< dims_count
; j
++)
705 if (strcmp (fns
[i
], dims
[j
].name
) == 0)
710 for (int j
= 0; j
< regcount_count
; j
++)
711 if (strcmp (fns
[i
], regcounts
[j
].kernel_name
) == 0)
713 sgpr_count
= regcounts
[j
].sgpr_count
;
714 vgpr_count
= regcounts
[j
].vgpr_count
;
715 if (isa_has_combined_avgprs (elf_arch
))
716 vgpr_count
+= regcounts
[j
].avgpr_count
;
720 fprintf (cfile
, "%s{\"%s\", {%d, %d, %d}, %d, %d}", comma
,
721 fns
[i
], d
[0], d
[1], d
[2], sgpr_count
, vgpr_count
);
725 fprintf (cfile
, "\n};\n\n");
727 /* Set the stack size if the user configured a value. */
730 "static __attribute__((constructor))\n"
731 "void configure_stack_size (void)\n"
733 " const char *val = getenv (\"GCN_STACK_SIZE\");\n"
734 " if (!val || val[0] == '\\0')\n"
735 " setenv (\"GCN_STACK_SIZE\", \"%d\", true);\n"
739 obstack_free (&fns_os
, NULL
);
740 for (i
= 0; i
< dims_count
; i
++)
742 for (i
= 0; i
< regcount_count
; i
++)
743 free (regcounts
[i
].kernel_name
);
744 obstack_free (&dims_os
, NULL
);
745 obstack_free (®counts_os
, NULL
);
748 /* Embed an object file into a C source file. */
751 process_obj (FILE *in
, FILE *cfile
, uint32_t omp_requires
)
754 const char *input
= read_file (in
, &len
);
756 /* Dump out an array containing the binary.
757 FIXME: do this with objcopy. */
758 fprintf (cfile
, "static unsigned char gcn_code[] = {");
759 for (size_t i
= 0; i
< len
; i
+= 17)
761 fprintf (cfile
, "\n\t");
762 for (size_t j
= i
; j
< i
+ 17 && j
< len
; j
++)
763 fprintf (cfile
, "%3u,", (unsigned char) input
[j
]);
765 fprintf (cfile
, "\n};\n\n");
768 "static const struct gcn_image {\n"
778 "static const struct gcn_data {\n"
779 " uintptr_t omp_requires_mask;\n"
780 " const struct gcn_image *gcn_image;\n"
781 " unsigned kernel_count;\n"
782 " const struct hsa_kernel_description *kernel_infos;\n"
783 " unsigned ind_func_count;\n"
784 " unsigned global_variable_count;\n"
788 " sizeof (gcn_kernels) / sizeof (gcn_kernels[0]),\n"
790 " gcn_num_ind_funcs,\n"
792 "};\n\n", omp_requires
);
795 "#ifdef __cplusplus\n"
798 "extern void GOMP_offload_register_ver"
799 " (unsigned, const void *, int, const void *);\n"
800 "extern void GOMP_offload_unregister_ver"
801 " (unsigned, const void *, int, const void *);\n"
802 "#ifdef __cplusplus\n"
806 fprintf (cfile
, "extern const void *const __OFFLOAD_TABLE__[];\n\n");
808 fprintf (cfile
, "static __attribute__((constructor)) void init (void)\n"
810 " GOMP_offload_register_ver (%#x, __OFFLOAD_TABLE__,"
811 " %d/*GCN*/, &gcn_data);\n"
813 GOMP_VERSION_PACK (GOMP_VERSION
, GOMP_VERSION_GCN
),
816 fprintf (cfile
, "static __attribute__((destructor)) void fini (void)\n"
818 " GOMP_offload_unregister_ver (%#x, __OFFLOAD_TABLE__,"
819 " %d/*GCN*/, &gcn_data);\n"
821 GOMP_VERSION_PACK (GOMP_VERSION
, GOMP_VERSION_GCN
),
825 /* Compile a C file using the host compiler. */
828 compile_native (const char *infile
, const char *outfile
, const char *compiler
,
829 bool fPIC
, bool fpic
)
831 const char *collect_gcc_options
= getenv ("COLLECT_GCC_OPTIONS");
832 if (!collect_gcc_options
)
833 fatal_error (input_location
,
834 "environment variable COLLECT_GCC_OPTIONS must be set");
836 struct obstack argv_obstack
;
837 obstack_init (&argv_obstack
);
838 obstack_ptr_grow (&argv_obstack
, compiler
);
840 obstack_ptr_grow (&argv_obstack
, "-fPIC");
842 obstack_ptr_grow (&argv_obstack
, "-fpic");
844 obstack_ptr_grow (&argv_obstack
, "-save-temps");
846 obstack_ptr_grow (&argv_obstack
, "-v");
847 obstack_ptr_grow (&argv_obstack
, "-dumpdir");
848 obstack_ptr_grow (&argv_obstack
, "");
849 obstack_ptr_grow (&argv_obstack
, "-dumpbase");
850 obstack_ptr_grow (&argv_obstack
, gcn_dumpbase
);
851 obstack_ptr_grow (&argv_obstack
, "-dumpbase-ext");
852 obstack_ptr_grow (&argv_obstack
, ".c");
855 case OFFLOAD_ABI_LP64
:
856 obstack_ptr_grow (&argv_obstack
, "-m64");
858 case OFFLOAD_ABI_ILP32
:
859 obstack_ptr_grow (&argv_obstack
, "-m32");
864 obstack_ptr_grow (&argv_obstack
, infile
);
865 obstack_ptr_grow (&argv_obstack
, "-c");
866 obstack_ptr_grow (&argv_obstack
, "-o");
867 obstack_ptr_grow (&argv_obstack
, outfile
);
868 obstack_ptr_grow (&argv_obstack
, NULL
);
870 const char **new_argv
= XOBFINISH (&argv_obstack
, const char **);
871 fork_execute (new_argv
[0], CONST_CAST (char **, new_argv
), true,
873 obstack_free (&argv_obstack
, NULL
);
877 main (int argc
, char **argv
)
881 FILE *cfile
= stdout
;
882 const char *outname
= 0;
884 progname
= tool_name
;
885 diagnostic_initialize (global_dc
, 0);
887 obstack_init (&files_to_cleanup
);
888 if (atexit (mkoffload_cleanup
) != 0)
889 fatal_error (input_location
, "atexit failed");
891 char *collect_gcc
= getenv ("COLLECT_GCC");
892 if (collect_gcc
== NULL
)
893 fatal_error (input_location
, "COLLECT_GCC must be set.");
894 const char *gcc_path
= dirname (ASTRDUP (collect_gcc
));
895 const char *gcc_exec
= basename (ASTRDUP (collect_gcc
));
897 size_t len
= (strlen (gcc_path
) + 1 + strlen (GCC_INSTALL_NAME
) + 1);
898 char *driver
= XALLOCAVEC (char, len
);
900 if (strcmp (gcc_exec
, collect_gcc
) == 0)
901 /* collect_gcc has no path, so it was found in PATH. Make sure we also
902 find accel-gcc in PATH. */
906 if (gcc_path
!= NULL
)
907 driver_used
= sprintf (driver
, "%s/", gcc_path
);
908 sprintf (driver
+ driver_used
, "%s", GCC_INSTALL_NAME
);
911 if (gcc_path
== NULL
)
913 else if (access_check (driver
, X_OK
) == 0)
917 /* Don't use alloca pointer with XRESIZEVEC. */
919 /* Look in all COMPILER_PATHs for GCC_INSTALL_NAME. */
922 n_paths
= parse_env_var (getenv ("COMPILER_PATH"), &paths
);
923 for (unsigned i
= 0; i
< n_paths
; i
++)
925 len
= strlen (paths
[i
]) + 1 + strlen (GCC_INSTALL_NAME
) + 1;
926 driver
= XRESIZEVEC (char, driver
, len
);
927 sprintf (driver
, "%s/%s", paths
[i
], GCC_INSTALL_NAME
);
928 if (access_check (driver
, X_OK
) == 0)
934 free_array_of_ptrs ((void **) paths
, n_paths
);
938 fatal_error (input_location
,
939 "offload compiler %s not found", GCC_INSTALL_NAME
);
941 /* We may be called with all the arguments stored in some file and
942 passed with @file. Expand them into argv before processing. */
943 expandargv (&argc
, &argv
);
945 /* Scan the argument vector. */
946 bool fopenmp
= false;
947 bool fopenacc
= false;
950 for (int i
= 1; i
< argc
; i
++)
952 #define STR "-foffload-abi="
953 if (startswith (argv
[i
], STR
))
955 if (strcmp (argv
[i
] + strlen (STR
), "lp64") == 0)
956 offload_abi
= OFFLOAD_ABI_LP64
;
957 else if (strcmp (argv
[i
] + strlen (STR
), "ilp32") == 0)
958 offload_abi
= OFFLOAD_ABI_ILP32
;
960 fatal_error (input_location
,
961 "unrecognizable argument of option " STR
);
964 else if (strcmp (argv
[i
], "-fopenmp") == 0)
966 else if (strcmp (argv
[i
], "-fopenacc") == 0)
968 else if (strcmp (argv
[i
], "-fPIC") == 0)
970 else if (strcmp (argv
[i
], "-fpic") == 0)
972 else if (strcmp (argv
[i
], "-mxnack=on") == 0)
973 SET_XNACK_ON (elf_flags
);
974 else if (strcmp (argv
[i
], "-mxnack=any") == 0)
975 SET_XNACK_ANY (elf_flags
);
976 else if (strcmp (argv
[i
], "-mxnack=off") == 0)
977 SET_XNACK_OFF (elf_flags
);
978 else if (strcmp (argv
[i
], "-msram-ecc=on") == 0)
979 SET_SRAM_ECC_ON (elf_flags
);
980 else if (strcmp (argv
[i
], "-msram-ecc=any") == 0)
981 SET_SRAM_ECC_ANY (elf_flags
);
982 else if (strcmp (argv
[i
], "-msram-ecc=off") == 0)
983 SET_SRAM_ECC_OFF (elf_flags
);
984 else if (strcmp (argv
[i
], "-save-temps") == 0)
986 else if (strcmp (argv
[i
], "-v") == 0)
988 else if (strcmp (argv
[i
], "-dumpbase") == 0
991 else if (strcmp (argv
[i
], "-march=fiji") == 0)
992 elf_arch
= EF_AMDGPU_MACH_AMDGCN_GFX803
;
993 else if (strcmp (argv
[i
], "-march=gfx900") == 0)
994 elf_arch
= EF_AMDGPU_MACH_AMDGCN_GFX900
;
995 else if (strcmp (argv
[i
], "-march=gfx906") == 0)
996 elf_arch
= EF_AMDGPU_MACH_AMDGCN_GFX906
;
997 else if (strcmp (argv
[i
], "-march=gfx908") == 0)
998 elf_arch
= EF_AMDGPU_MACH_AMDGCN_GFX908
;
999 else if (strcmp (argv
[i
], "-march=gfx90a") == 0)
1000 elf_arch
= EF_AMDGPU_MACH_AMDGCN_GFX90a
;
1001 else if (strcmp (argv
[i
], "-march=gfx1030") == 0)
1002 elf_arch
= EF_AMDGPU_MACH_AMDGCN_GFX1030
;
1003 #define STR "-mstack-size="
1004 else if (startswith (argv
[i
], STR
))
1005 gcn_stack_size
= atoi (argv
[i
] + strlen (STR
));
1007 /* Translate host into offloading libraries. */
1008 else if (strcmp (argv
[i
], "-l_GCC_gfortran") == 0
1009 || strcmp (argv
[i
], "-l_GCC_m") == 0)
1011 /* Elide '_GCC_'. */
1012 size_t i_dst
= strlen ("-l");
1013 size_t i_src
= strlen ("-l_GCC_");
1016 c
= argv
[i
][i_dst
++] = argv
[i
][i_src
++];
1021 if (!(fopenacc
^ fopenmp
))
1022 fatal_error (input_location
, "either -fopenacc or -fopenmp must be set");
1025 switch (offload_abi
)
1027 case OFFLOAD_ABI_LP64
:
1030 case OFFLOAD_ABI_ILP32
:
1037 /* Build arguments for compiler pass. */
1038 struct obstack cc_argv_obstack
;
1039 obstack_init (&cc_argv_obstack
);
1040 obstack_ptr_grow (&cc_argv_obstack
, driver
);
1041 obstack_ptr_grow (&cc_argv_obstack
, "-S");
1044 obstack_ptr_grow (&cc_argv_obstack
, "-save-temps");
1046 obstack_ptr_grow (&cc_argv_obstack
, "-v");
1047 obstack_ptr_grow (&cc_argv_obstack
, abi
);
1048 obstack_ptr_grow (&cc_argv_obstack
, "-xlto");
1050 obstack_ptr_grow (&cc_argv_obstack
, "-mgomp");
1052 for (int ix
= 1; ix
!= argc
; ix
++)
1054 if (!strcmp (argv
[ix
], "-o") && ix
+ 1 != argc
)
1055 outname
= argv
[++ix
];
1057 obstack_ptr_grow (&cc_argv_obstack
, argv
[ix
]);
1063 gcn_dumpbase
= concat (dumppfx
, ".c", NULL
);
1065 const char *gcn_cfile_name
;
1067 gcn_cfile_name
= gcn_dumpbase
;
1069 gcn_cfile_name
= make_temp_file (".c");
1070 obstack_ptr_grow (&files_to_cleanup
, gcn_cfile_name
);
1072 cfile
= fopen (gcn_cfile_name
, "w");
1074 fatal_error (input_location
, "cannot open '%s'", gcn_cfile_name
);
1076 /* Currently, we only support offloading in 64-bit configurations. */
1077 if (offload_abi
== OFFLOAD_ABI_LP64
)
1079 const char *mko_dumpbase
= concat (dumppfx
, ".mkoffload", NULL
);
1080 const char *hsaco_dumpbase
= concat (dumppfx
, ".mkoffload.hsaco", NULL
);
1082 const char *gcn_s1_name
;
1083 const char *gcn_s2_name
;
1084 const char *gcn_o_name
;
1087 gcn_s1_name
= concat (mko_dumpbase
, ".1.s", NULL
);
1088 gcn_s2_name
= concat (mko_dumpbase
, ".2.s", NULL
);
1089 gcn_o_name
= hsaco_dumpbase
;
1093 gcn_s1_name
= make_temp_file (".mkoffload.1.s");
1094 gcn_s2_name
= make_temp_file (".mkoffload.2.s");
1095 gcn_o_name
= make_temp_file (".mkoffload.hsaco");
1097 obstack_ptr_grow (&files_to_cleanup
, gcn_s1_name
);
1098 obstack_ptr_grow (&files_to_cleanup
, gcn_s2_name
);
1099 obstack_ptr_grow (&files_to_cleanup
, gcn_o_name
);
1101 obstack_ptr_grow (&cc_argv_obstack
, "-dumpdir");
1102 obstack_ptr_grow (&cc_argv_obstack
, "");
1103 obstack_ptr_grow (&cc_argv_obstack
, "-dumpbase");
1104 obstack_ptr_grow (&cc_argv_obstack
, mko_dumpbase
);
1105 obstack_ptr_grow (&cc_argv_obstack
, "-dumpbase-ext");
1106 obstack_ptr_grow (&cc_argv_obstack
, "");
1108 obstack_ptr_grow (&cc_argv_obstack
, "-o");
1109 obstack_ptr_grow (&cc_argv_obstack
, gcn_s1_name
);
1110 obstack_ptr_grow (&cc_argv_obstack
, NULL
);
1111 const char **cc_argv
= XOBFINISH (&cc_argv_obstack
, const char **);
1113 /* Build arguments for assemble/link pass. */
1114 struct obstack ld_argv_obstack
;
1115 obstack_init (&ld_argv_obstack
);
1116 obstack_ptr_grow (&ld_argv_obstack
, driver
);
1118 /* Extract early-debug information from the input objects.
1119 This loop finds all the inputs that end ".o" and aren't the output. */
1121 for (int ix
= 1; ix
!= argc
; ix
++)
1123 if (!strcmp (argv
[ix
], "-o") && ix
+ 1 != argc
)
1127 if (strcmp (argv
[ix
] + strlen(argv
[ix
]) - 2, ".o") == 0)
1133 sprintf (buf
, "%d", dbgcount
++);
1134 dbgobj
= concat (dumppfx
, ".mkoffload.dbg", buf
, ".o", NULL
);
1137 dbgobj
= make_temp_file (".mkoffload.dbg.o");
1138 obstack_ptr_grow (&files_to_cleanup
, dbgobj
);
1140 /* If the copy fails then just ignore it. */
1141 if (copy_early_debug_info (argv
[ix
], dbgobj
))
1143 obstack_ptr_grow (&ld_argv_obstack
, dbgobj
);
1144 obstack_ptr_grow (&files_to_cleanup
, dbgobj
);
1151 obstack_ptr_grow (&ld_argv_obstack
, gcn_s2_name
);
1152 obstack_ptr_grow (&ld_argv_obstack
, "-lgomp");
1153 obstack_ptr_grow (&ld_argv_obstack
,
1154 (TEST_XNACK_ON (elf_flags
) ? "-mxnack=on"
1155 : TEST_XNACK_ANY (elf_flags
) ? "-mxnack=any"
1157 obstack_ptr_grow (&ld_argv_obstack
,
1158 (TEST_SRAM_ECC_ON (elf_flags
) ? "-msram-ecc=on"
1159 : TEST_SRAM_ECC_ANY (elf_flags
) ? "-msram-ecc=any"
1160 : "-msram-ecc=off"));
1162 obstack_ptr_grow (&ld_argv_obstack
, "-v");
1165 obstack_ptr_grow (&ld_argv_obstack
, "-save-temps");
1167 for (int i
= 1; i
< argc
; i
++)
1168 if (startswith (argv
[i
], "-l")
1169 || startswith (argv
[i
], "-Wl")
1170 || startswith (argv
[i
], "-march"))
1171 obstack_ptr_grow (&ld_argv_obstack
, argv
[i
]);
1173 obstack_ptr_grow (&cc_argv_obstack
, "-dumpdir");
1174 obstack_ptr_grow (&cc_argv_obstack
, "");
1175 obstack_ptr_grow (&cc_argv_obstack
, "-dumpbase");
1176 obstack_ptr_grow (&cc_argv_obstack
, hsaco_dumpbase
);
1177 obstack_ptr_grow (&cc_argv_obstack
, "-dumpbase-ext");
1178 obstack_ptr_grow (&cc_argv_obstack
, "");
1180 obstack_ptr_grow (&ld_argv_obstack
, "-o");
1181 obstack_ptr_grow (&ld_argv_obstack
, gcn_o_name
);
1182 obstack_ptr_grow (&ld_argv_obstack
, NULL
);
1183 const char **ld_argv
= XOBFINISH (&ld_argv_obstack
, const char **);
1185 /* Clean up unhelpful environment variables. */
1186 char *execpath
= getenv ("GCC_EXEC_PREFIX");
1187 char *cpath
= getenv ("COMPILER_PATH");
1188 char *lpath
= getenv ("LIBRARY_PATH");
1189 unsetenv ("GCC_EXEC_PREFIX");
1190 unsetenv ("COMPILER_PATH");
1191 unsetenv ("LIBRARY_PATH");
1193 char *omp_requires_file
;
1195 omp_requires_file
= concat (dumppfx
, ".mkoffload.omp_requires", NULL
);
1197 omp_requires_file
= make_temp_file (".mkoffload.omp_requires");
1198 obstack_ptr_grow (&files_to_cleanup
, omp_requires_file
);
1200 /* Run the compiler pass. */
1201 xputenv (concat ("GCC_OFFLOAD_OMP_REQUIRES_FILE=", omp_requires_file
, NULL
));
1202 fork_execute (cc_argv
[0], CONST_CAST (char **, cc_argv
), true, ".gcc_args");
1203 obstack_free (&cc_argv_obstack
, NULL
);
1204 unsetenv("GCC_OFFLOAD_OMP_REQUIRES_FILE");
1206 in
= fopen (omp_requires_file
, "rb");
1208 fatal_error (input_location
, "cannot open omp_requires file %qs",
1210 uint32_t omp_requires
;
1211 if (fread (&omp_requires
, sizeof (omp_requires
), 1, in
) != 1)
1212 fatal_error (input_location
, "cannot read omp_requires file %qs",
1216 in
= fopen (gcn_s1_name
, "r");
1218 fatal_error (input_location
, "cannot open intermediate gcn asm file");
1220 out
= fopen (gcn_s2_name
, "w");
1222 fatal_error (input_location
, "cannot open '%s'", gcn_s2_name
);
1224 process_asm (in
, out
, cfile
);
1229 /* Run the assemble/link pass. */
1230 fork_execute (ld_argv
[0], CONST_CAST (char **, ld_argv
), true, ".ld_args");
1231 obstack_free (&ld_argv_obstack
, NULL
);
1233 in
= fopen (gcn_o_name
, "r");
1235 fatal_error (input_location
, "cannot open intermediate gcn obj file");
1237 process_obj (in
, cfile
, omp_requires
);
1241 xputenv (concat ("GCC_EXEC_PREFIX=", execpath
, NULL
));
1242 xputenv (concat ("COMPILER_PATH=", cpath
, NULL
));
1243 xputenv (concat ("LIBRARY_PATH=", lpath
, NULL
));
1248 compile_native (gcn_cfile_name
, outname
, collect_gcc
, fPIC
, fpic
);