]>
Commit | Line | Data |
---|---|---|
83ffe9cd | 1 | /* Copyright (C) 2017-2023 Free Software Foundation, Inc. |
fa499995 AS |
2 | Contributed by Mentor Embedded. |
3 | ||
4 | This file is part of the GNU Offloading and Multi Processing Library | |
5 | (libgomp). | |
6 | ||
7 | Libgomp is free software; you can redistribute it and/or modify it | |
8 | under the terms of the GNU General Public License as published by | |
9 | the Free Software Foundation; either version 3, or (at your option) | |
10 | any later version. | |
11 | ||
12 | Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
14 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
15 | more details. | |
16 | ||
17 | Under Section 7 of GPL version 3, you are granted additional | |
18 | permissions described in the GCC Runtime Library Exception, version | |
19 | 3.1, as published by the Free Software Foundation. | |
20 | ||
21 | You should have received a copy of the GNU General Public License and | |
22 | a copy of the GCC Runtime Library Exception along with this program; | |
23 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
24 | <http://www.gnu.org/licenses/>. */ | |
25 | ||
93d90219 | 26 | /* This file handles maintenance of threads on AMD GCN. */ |
fa499995 AS |
27 | |
28 | #include "libgomp.h" | |
29 | #include <stdlib.h> | |
30 | #include <string.h> | |
31 | ||
e7d6c277 AS |
32 | #define LITTLEENDIAN_CPU |
33 | #include "hsa.h" | |
34 | ||
35 | /* Defined in basic-allocator.c via config/amdgcn/allocator.c. */ | |
36 | void __gcn_lowlat_init (void *heap, size_t size); | |
37 | ||
fa499995 | 38 | static void gomp_thread_start (struct gomp_thread_pool *); |
a49c7d31 | 39 | extern void build_indirect_map (void); |
fa499995 AS |
40 | |
41 | /* This externally visible function handles target region entry. It | |
42 | sets up a per-team thread pool and transfers control by returning to | |
43 | the kernel in the master thread or gomp_thread_start in other threads. | |
44 | ||
45 | The name of this function is part of the interface with the compiler: for | |
46 | each OpenMP kernel the compiler configures the stack, then calls here. | |
47 | ||
48 | Likewise, gomp_gcn_exit_kernel is called during the kernel epilogue. */ | |
49 | ||
50 | void | |
51 | gomp_gcn_enter_kernel (void) | |
52 | { | |
53 | int threadid = __builtin_gcn_dim_pos (1); | |
54 | ||
a49c7d31 KCY |
55 | /* Initialize indirect function support. */ |
56 | build_indirect_map (); | |
57 | ||
fa499995 AS |
58 | if (threadid == 0) |
59 | { | |
60 | int numthreads = __builtin_gcn_dim_size (1); | |
61 | int teamid = __builtin_gcn_dim_pos(0); | |
62 | ||
63 | /* Set up the global state. | |
64 | Every team will do this, but that should be harmless. */ | |
65 | gomp_global_icv.nthreads_var = 16; | |
66 | gomp_global_icv.thread_limit_var = numthreads; | |
67 | /* Starting additional threads is not supported. */ | |
68 | gomp_global_icv.dyn_var = true; | |
69 | ||
cee16451 AS |
70 | /* Initialize the team arena for optimized memory allocation. |
71 | The arena has been allocated on the host side, and the address | |
72 | passed in via the kernargs. Each team takes a small slice of it. */ | |
f6fff8a6 AS |
73 | struct kernargs_abi *kernargs = |
74 | (struct kernargs_abi*) __builtin_gcn_kernarg_ptr (); | |
75 | void *team_arena = ((void*)kernargs->arena_ptr | |
76 | + kernargs->arena_size_per_team * teamid); | |
cee16451 AS |
77 | void * __lds *arena_start = (void * __lds *)TEAM_ARENA_START; |
78 | void * __lds *arena_free = (void * __lds *)TEAM_ARENA_FREE; | |
79 | void * __lds *arena_end = (void * __lds *)TEAM_ARENA_END; | |
80 | *arena_start = team_arena; | |
81 | *arena_free = team_arena; | |
f6fff8a6 | 82 | *arena_end = team_arena + kernargs->arena_size_per_team; |
cee16451 | 83 | |
e7d6c277 AS |
84 | /* Initialize the low-latency heap. The header is the size. */ |
85 | void __lds *lowlat = (void __lds *)GCN_LOWLAT_HEAP; | |
86 | hsa_kernel_dispatch_packet_t *queue_ptr = __builtin_gcn_dispatch_ptr (); | |
87 | __gcn_lowlat_init ((void*)(uintptr_t)(void __flat*)lowlat, | |
88 | queue_ptr->group_segment_size - GCN_LOWLAT_HEAP); | |
89 | ||
fa499995 | 90 | /* Allocate and initialize the team-local-storage data. */ |
cee16451 | 91 | struct gomp_thread *thrs = team_malloc_cleared (sizeof (*thrs) |
fa499995 AS |
92 | * numthreads); |
93 | set_gcn_thrs (thrs); | |
94 | ||
93d90219 | 95 | /* Allocate and initialize a pool of threads in the team. |
fa499995 AS |
96 | The threads are already running, of course, we just need to manage |
97 | the communication between them. */ | |
cee16451 AS |
98 | struct gomp_thread_pool *pool = team_malloc (sizeof (*pool)); |
99 | pool->threads = team_malloc (sizeof (void *) * numthreads); | |
fa499995 AS |
100 | for (int tid = 0; tid < numthreads; tid++) |
101 | pool->threads[tid] = &thrs[tid]; | |
102 | pool->threads_size = numthreads; | |
103 | pool->threads_used = numthreads; | |
104 | pool->threads_busy = 1; | |
105 | pool->last_team = NULL; | |
106 | gomp_simple_barrier_init (&pool->threads_dock, numthreads); | |
107 | thrs->thread_pool = pool; | |
108 | ||
109 | asm ("s_barrier" ::: "memory"); | |
110 | return; /* Return to kernel. */ | |
111 | } | |
112 | else | |
113 | { | |
114 | asm ("s_barrier" ::: "memory"); | |
115 | gomp_thread_start (gcn_thrs ()[0].thread_pool); | |
116 | /* gomp_thread_start does not return. */ | |
117 | } | |
118 | } | |
119 | ||
120 | void | |
121 | gomp_gcn_exit_kernel (void) | |
122 | { | |
123 | gomp_free_thread (gcn_thrs ()); | |
cee16451 | 124 | team_free (gcn_thrs ()); |
fa499995 AS |
125 | } |
126 | ||
127 | /* This function contains the idle loop in which a thread waits | |
128 | to be called up to become part of a team. */ | |
129 | ||
130 | static void | |
131 | gomp_thread_start (struct gomp_thread_pool *pool) | |
132 | { | |
133 | struct gomp_thread *thr = gomp_thread (); | |
134 | ||
135 | gomp_sem_init (&thr->release, 0); | |
136 | thr->thread_pool = pool; | |
137 | ||
138 | /* The loop exits only when "fn" is assigned "gomp_free_pool_helper", | |
139 | which contains "s_endpgm", or an infinite no-op loop is | |
140 | suspected (this happens when the thread master crashes). */ | |
141 | int nul_limit = 99; | |
142 | do | |
143 | { | |
144 | gomp_simple_barrier_wait (&pool->threads_dock); | |
145 | if (!thr->fn) | |
146 | { | |
147 | if (nul_limit-- > 0) | |
148 | continue; | |
149 | else | |
150 | { | |
151 | const char msg[] = ("team master not responding;" | |
152 | " slave thread aborting"); | |
153 | write (2, msg, sizeof (msg)-1); | |
154 | abort(); | |
155 | } | |
156 | } | |
157 | thr->fn (thr->data); | |
158 | thr->fn = NULL; | |
159 | ||
160 | struct gomp_task *task = thr->task; | |
161 | gomp_team_barrier_wait_final (&thr->ts.team->barrier); | |
162 | gomp_finish_task (task); | |
163 | } | |
164 | while (1); | |
165 | } | |
166 | ||
167 | /* Launch a team. */ | |
168 | ||
169 | void | |
170 | gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads, | |
171 | unsigned flags, struct gomp_team *team, | |
172 | struct gomp_taskgroup *taskgroup) | |
173 | { | |
174 | struct gomp_thread *thr, *nthr; | |
175 | struct gomp_task *task; | |
176 | struct gomp_task_icv *icv; | |
177 | struct gomp_thread_pool *pool; | |
178 | unsigned long nthreads_var; | |
179 | ||
180 | thr = gomp_thread (); | |
181 | pool = thr->thread_pool; | |
182 | task = thr->task; | |
183 | icv = task ? &task->icv : &gomp_global_icv; | |
184 | ||
185 | /* Always save the previous state, even if this isn't a nested team. | |
186 | In particular, we should save any work share state from an outer | |
187 | orphaned work share construct. */ | |
188 | team->prev_ts = thr->ts; | |
189 | ||
190 | thr->ts.team = team; | |
191 | thr->ts.team_id = 0; | |
192 | ++thr->ts.level; | |
193 | if (nthreads > 1) | |
194 | ++thr->ts.active_level; | |
195 | thr->ts.work_share = &team->work_shares[0]; | |
196 | thr->ts.last_work_share = NULL; | |
197 | thr->ts.single_count = 0; | |
198 | thr->ts.static_trip = 0; | |
199 | thr->task = &team->implicit_task[0]; | |
200 | nthreads_var = icv->nthreads_var; | |
201 | gomp_init_task (thr->task, task, icv); | |
202 | team->implicit_task[0].icv.nthreads_var = nthreads_var; | |
203 | team->implicit_task[0].taskgroup = taskgroup; | |
204 | ||
205 | if (nthreads == 1) | |
206 | return; | |
207 | ||
208 | /* Release existing idle threads. */ | |
209 | for (unsigned i = 1; i < nthreads; ++i) | |
210 | { | |
211 | nthr = pool->threads[i]; | |
212 | nthr->ts.team = team; | |
213 | nthr->ts.work_share = &team->work_shares[0]; | |
214 | nthr->ts.last_work_share = NULL; | |
215 | nthr->ts.team_id = i; | |
216 | nthr->ts.level = team->prev_ts.level + 1; | |
217 | nthr->ts.active_level = thr->ts.active_level; | |
218 | nthr->ts.single_count = 0; | |
219 | nthr->ts.static_trip = 0; | |
220 | nthr->task = &team->implicit_task[i]; | |
221 | gomp_init_task (nthr->task, task, icv); | |
222 | team->implicit_task[i].icv.nthreads_var = nthreads_var; | |
223 | team->implicit_task[i].taskgroup = taskgroup; | |
224 | nthr->fn = fn; | |
225 | nthr->data = data; | |
226 | team->ordered_release[i] = &nthr->release; | |
227 | } | |
228 | ||
229 | gomp_simple_barrier_wait (&pool->threads_dock); | |
230 | } | |
231 | ||
232 | #include "../../team.c" |