]>
Commit | Line | Data |
---|---|---|
a945c346 | 1 | /* Copyright (C) 2015-2024 Free Software Foundation, Inc. |
6103184e AM |
2 | Contributed by Alexander Monakov <amonakov@ispras.ru> |
3 | ||
4 | This file is part of the GNU Offloading and Multi Processing Library | |
5 | (libgomp). | |
6 | ||
7 | Libgomp is free software; you can redistribute it and/or modify it | |
8 | under the terms of the GNU General Public License as published by | |
9 | the Free Software Foundation; either version 3, or (at your option) | |
10 | any later version. | |
11 | ||
12 | Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
14 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
15 | more details. | |
16 | ||
17 | Under Section 7 of GPL version 3, you are granted additional | |
18 | permissions described in the GCC Runtime Library Exception, version | |
19 | 3.1, as published by the Free Software Foundation. | |
20 | ||
21 | You should have received a copy of the GNU General Public License and | |
22 | a copy of the GCC Runtime Library Exception along with this program; | |
23 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
24 | <http://www.gnu.org/licenses/>. */ | |
25 | ||
93d90219 | 26 | /* This file handles maintenance of threads on NVPTX. */ |
6103184e AM |
27 | |
28 | #if defined __nvptx_softstack__ && defined __nvptx_unisimt__ | |
29 | ||
30 | #include "libgomp.h" | |
31 | #include <stdlib.h> | |
32 | #include <string.h> | |
33 | ||
34 | struct gomp_thread *nvptx_thrs __attribute__((shared,nocommon)); | |
9fa72756 | 35 | int __gomp_team_num __attribute__((shared,nocommon)); |
6103184e AM |
36 | |
37 | static void gomp_thread_start (struct gomp_thread_pool *); | |
a49c7d31 | 38 | extern void build_indirect_map (void); |
6103184e | 39 | |
30486fab AS |
40 | /* There should be some .shared space reserved for us. There's no way to |
41 | express this magic extern sizeless array in C so use asm. */ | |
42 | asm (".extern .shared .u8 __nvptx_lowlat_pool[];\n"); | |
43 | ||
44 | /* Defined in basic-allocator.c via config/nvptx/allocator.c. */ | |
45 | void __nvptx_lowlat_init (void *heap, size_t size); | |
6103184e AM |
46 | |
47 | /* This externally visible function handles target region entry. It | |
48 | sets up a per-team thread pool and transfers control by calling FN (FN_DATA) | |
49 | in the master thread or gomp_thread_start in other threads. | |
50 | ||
51 | The name of this function is part of the interface with the compiler: for | |
52 | each target region, GCC emits a PTX .kernel function that sets up soft-stack | |
53 | and uniform-simt state and calls this function, passing in FN the original | |
54 | function outlined for the target region. */ | |
55 | ||
56 | void | |
57 | gomp_nvptx_main (void (*fn) (void *), void *fn_data) | |
58 | { | |
59 | int tid, ntids; | |
60 | asm ("mov.u32 %0, %%tid.y;" : "=r" (tid)); | |
61 | asm ("mov.u32 %0, %%ntid.y;" : "=r" (ntids)); | |
a49c7d31 KCY |
62 | |
63 | /* Initialize indirect function support. */ | |
64 | build_indirect_map (); | |
65 | ||
6103184e AM |
66 | if (tid == 0) |
67 | { | |
68 | gomp_global_icv.nthreads_var = ntids; | |
aea72386 | 69 | gomp_global_icv.thread_limit_var = ntids; |
6103184e AM |
70 | /* Starting additional threads is not supported. */ |
71 | gomp_global_icv.dyn_var = true; | |
72 | ||
9fa72756 | 73 | __gomp_team_num = 0; |
6103184e AM |
74 | nvptx_thrs = alloca (ntids * sizeof (*nvptx_thrs)); |
75 | memset (nvptx_thrs, 0, ntids * sizeof (*nvptx_thrs)); | |
76 | ||
30486fab AS |
77 | /* Find the low-latency heap details .... */ |
78 | uint32_t *shared_pool; | |
79 | uint32_t shared_pool_size = 0; | |
80 | asm ("cvta.shared.u64\t%0, __nvptx_lowlat_pool;" : "=r"(shared_pool)); | |
81 | #if __PTX_ISA_VERSION_MAJOR__ > 4 \ | |
82 | || (__PTX_ISA_VERSION_MAJOR__ == 4 && __PTX_ISA_VERSION_MINOR__ >= 1) | |
83 | asm ("mov.u32\t%0, %%dynamic_smem_size;\n" | |
84 | : "=r"(shared_pool_size)); | |
85 | #endif | |
86 | __nvptx_lowlat_init (shared_pool, shared_pool_size); | |
87 | ||
88 | /* Initialize the thread pool. */ | |
6103184e AM |
89 | struct gomp_thread_pool *pool = alloca (sizeof (*pool)); |
90 | pool->threads = alloca (ntids * sizeof (*pool->threads)); | |
91 | for (tid = 0; tid < ntids; tid++) | |
92 | pool->threads[tid] = nvptx_thrs + tid; | |
93 | pool->threads_size = ntids; | |
94 | pool->threads_used = ntids; | |
95 | pool->threads_busy = 1; | |
96 | pool->last_team = NULL; | |
97 | gomp_simple_barrier_init (&pool->threads_dock, ntids); | |
98 | ||
99 | nvptx_thrs[0].thread_pool = pool; | |
100 | asm ("bar.sync 0;"); | |
101 | fn (fn_data); | |
102 | ||
103 | gomp_free_thread (nvptx_thrs); | |
104 | } | |
105 | else | |
106 | { | |
107 | asm ("bar.sync 0;"); | |
108 | gomp_thread_start (nvptx_thrs[0].thread_pool); | |
109 | } | |
110 | } | |
111 | ||
112 | /* This function contains the idle loop in which a thread waits | |
113 | to be called up to become part of a team. */ | |
114 | ||
115 | static void | |
116 | gomp_thread_start (struct gomp_thread_pool *pool) | |
117 | { | |
118 | struct gomp_thread *thr = gomp_thread (); | |
119 | ||
120 | gomp_sem_init (&thr->release, 0); | |
121 | thr->thread_pool = pool; | |
122 | ||
123 | do | |
124 | { | |
125 | gomp_simple_barrier_wait (&pool->threads_dock); | |
126 | if (!thr->fn) | |
127 | continue; | |
128 | thr->fn (thr->data); | |
129 | thr->fn = NULL; | |
130 | ||
131 | struct gomp_task *task = thr->task; | |
132 | gomp_team_barrier_wait_final (&thr->ts.team->barrier); | |
133 | gomp_finish_task (task); | |
134 | } | |
135 | /* Work around an NVIDIA driver bug: when generating sm_50 machine code, | |
136 | it can trash stack pointer R1 in loops lacking exit edges. Add a cheap | |
137 | artificial exit that the driver would not be able to optimize out. */ | |
138 | while (nvptx_thrs); | |
139 | } | |
140 | ||
141 | /* Launch a team. */ | |
142 | ||
143 | void | |
144 | gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads, | |
28567c40 JJ |
145 | unsigned flags, struct gomp_team *team, |
146 | struct gomp_taskgroup *taskgroup) | |
6103184e AM |
147 | { |
148 | struct gomp_thread *thr, *nthr; | |
149 | struct gomp_task *task; | |
150 | struct gomp_task_icv *icv; | |
151 | struct gomp_thread_pool *pool; | |
152 | unsigned long nthreads_var; | |
153 | ||
154 | thr = gomp_thread (); | |
155 | pool = thr->thread_pool; | |
156 | task = thr->task; | |
157 | icv = task ? &task->icv : &gomp_global_icv; | |
158 | ||
159 | /* Always save the previous state, even if this isn't a nested team. | |
160 | In particular, we should save any work share state from an outer | |
161 | orphaned work share construct. */ | |
162 | team->prev_ts = thr->ts; | |
163 | ||
164 | thr->ts.team = team; | |
165 | thr->ts.team_id = 0; | |
166 | ++thr->ts.level; | |
167 | if (nthreads > 1) | |
168 | ++thr->ts.active_level; | |
169 | thr->ts.work_share = &team->work_shares[0]; | |
170 | thr->ts.last_work_share = NULL; | |
171 | thr->ts.single_count = 0; | |
172 | thr->ts.static_trip = 0; | |
173 | thr->task = &team->implicit_task[0]; | |
174 | nthreads_var = icv->nthreads_var; | |
175 | gomp_init_task (thr->task, task, icv); | |
176 | team->implicit_task[0].icv.nthreads_var = nthreads_var; | |
28567c40 | 177 | team->implicit_task[0].taskgroup = taskgroup; |
6103184e AM |
178 | |
179 | if (nthreads == 1) | |
180 | return; | |
181 | ||
182 | /* Release existing idle threads. */ | |
183 | for (unsigned i = 1; i < nthreads; ++i) | |
184 | { | |
185 | nthr = pool->threads[i]; | |
186 | nthr->ts.team = team; | |
187 | nthr->ts.work_share = &team->work_shares[0]; | |
188 | nthr->ts.last_work_share = NULL; | |
189 | nthr->ts.team_id = i; | |
190 | nthr->ts.level = team->prev_ts.level + 1; | |
191 | nthr->ts.active_level = thr->ts.active_level; | |
192 | nthr->ts.single_count = 0; | |
193 | nthr->ts.static_trip = 0; | |
194 | nthr->task = &team->implicit_task[i]; | |
195 | gomp_init_task (nthr->task, task, icv); | |
196 | team->implicit_task[i].icv.nthreads_var = nthreads_var; | |
28567c40 | 197 | team->implicit_task[i].taskgroup = taskgroup; |
6103184e AM |
198 | nthr->fn = fn; |
199 | nthr->data = data; | |
200 | team->ordered_release[i] = &nthr->release; | |
201 | } | |
202 | ||
203 | gomp_simple_barrier_wait (&pool->threads_dock); | |
204 | } | |
205 | ||
28567c40 JJ |
206 | int |
207 | gomp_pause_host (void) | |
208 | { | |
209 | return -1; | |
210 | } | |
211 | ||
6103184e AM |
212 | #include "../../team.c" |
213 | #endif |