]> git.ipfire.org Git - thirdparty/gcc.git/blame - libgomp/config/nvptx/team.c
Update copyright years.
[thirdparty/gcc.git] / libgomp / config / nvptx / team.c
CommitLineData
a945c346 1/* Copyright (C) 2015-2024 Free Software Foundation, Inc.
6103184e
AM
2 Contributed by Alexander Monakov <amonakov@ispras.ru>
3
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
6
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
16
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
25
93d90219 26/* This file handles maintenance of threads on NVPTX. */
6103184e
AM
27
28#if defined __nvptx_softstack__ && defined __nvptx_unisimt__
29
30#include "libgomp.h"
31#include <stdlib.h>
32#include <string.h>
33
34struct gomp_thread *nvptx_thrs __attribute__((shared,nocommon));
9fa72756 35int __gomp_team_num __attribute__((shared,nocommon));
6103184e
AM
36
37static void gomp_thread_start (struct gomp_thread_pool *);
a49c7d31 38extern void build_indirect_map (void);
6103184e 39
30486fab
AS
40/* There should be some .shared space reserved for us. There's no way to
41 express this magic extern sizeless array in C so use asm. */
42asm (".extern .shared .u8 __nvptx_lowlat_pool[];\n");
43
44/* Defined in basic-allocator.c via config/nvptx/allocator.c. */
45void __nvptx_lowlat_init (void *heap, size_t size);
6103184e
AM
46
47/* This externally visible function handles target region entry. It
48 sets up a per-team thread pool and transfers control by calling FN (FN_DATA)
49 in the master thread or gomp_thread_start in other threads.
50
51 The name of this function is part of the interface with the compiler: for
52 each target region, GCC emits a PTX .kernel function that sets up soft-stack
53 and uniform-simt state and calls this function, passing in FN the original
54 function outlined for the target region. */
55
56void
57gomp_nvptx_main (void (*fn) (void *), void *fn_data)
58{
59 int tid, ntids;
60 asm ("mov.u32 %0, %%tid.y;" : "=r" (tid));
61 asm ("mov.u32 %0, %%ntid.y;" : "=r" (ntids));
a49c7d31
KCY
62
63 /* Initialize indirect function support. */
64 build_indirect_map ();
65
6103184e
AM
66 if (tid == 0)
67 {
68 gomp_global_icv.nthreads_var = ntids;
aea72386 69 gomp_global_icv.thread_limit_var = ntids;
6103184e
AM
70 /* Starting additional threads is not supported. */
71 gomp_global_icv.dyn_var = true;
72
9fa72756 73 __gomp_team_num = 0;
6103184e
AM
74 nvptx_thrs = alloca (ntids * sizeof (*nvptx_thrs));
75 memset (nvptx_thrs, 0, ntids * sizeof (*nvptx_thrs));
76
30486fab
AS
77 /* Find the low-latency heap details .... */
78 uint32_t *shared_pool;
79 uint32_t shared_pool_size = 0;
80 asm ("cvta.shared.u64\t%0, __nvptx_lowlat_pool;" : "=r"(shared_pool));
81#if __PTX_ISA_VERSION_MAJOR__ > 4 \
82 || (__PTX_ISA_VERSION_MAJOR__ == 4 && __PTX_ISA_VERSION_MINOR__ >= 1)
83 asm ("mov.u32\t%0, %%dynamic_smem_size;\n"
84 : "=r"(shared_pool_size));
85#endif
86 __nvptx_lowlat_init (shared_pool, shared_pool_size);
87
88 /* Initialize the thread pool. */
6103184e
AM
89 struct gomp_thread_pool *pool = alloca (sizeof (*pool));
90 pool->threads = alloca (ntids * sizeof (*pool->threads));
91 for (tid = 0; tid < ntids; tid++)
92 pool->threads[tid] = nvptx_thrs + tid;
93 pool->threads_size = ntids;
94 pool->threads_used = ntids;
95 pool->threads_busy = 1;
96 pool->last_team = NULL;
97 gomp_simple_barrier_init (&pool->threads_dock, ntids);
98
99 nvptx_thrs[0].thread_pool = pool;
100 asm ("bar.sync 0;");
101 fn (fn_data);
102
103 gomp_free_thread (nvptx_thrs);
104 }
105 else
106 {
107 asm ("bar.sync 0;");
108 gomp_thread_start (nvptx_thrs[0].thread_pool);
109 }
110}
111
112/* This function contains the idle loop in which a thread waits
113 to be called up to become part of a team. */
114
115static void
116gomp_thread_start (struct gomp_thread_pool *pool)
117{
118 struct gomp_thread *thr = gomp_thread ();
119
120 gomp_sem_init (&thr->release, 0);
121 thr->thread_pool = pool;
122
123 do
124 {
125 gomp_simple_barrier_wait (&pool->threads_dock);
126 if (!thr->fn)
127 continue;
128 thr->fn (thr->data);
129 thr->fn = NULL;
130
131 struct gomp_task *task = thr->task;
132 gomp_team_barrier_wait_final (&thr->ts.team->barrier);
133 gomp_finish_task (task);
134 }
135 /* Work around an NVIDIA driver bug: when generating sm_50 machine code,
136 it can trash stack pointer R1 in loops lacking exit edges. Add a cheap
137 artificial exit that the driver would not be able to optimize out. */
138 while (nvptx_thrs);
139}
140
141/* Launch a team. */
142
143void
144gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
28567c40
JJ
145 unsigned flags, struct gomp_team *team,
146 struct gomp_taskgroup *taskgroup)
6103184e
AM
147{
148 struct gomp_thread *thr, *nthr;
149 struct gomp_task *task;
150 struct gomp_task_icv *icv;
151 struct gomp_thread_pool *pool;
152 unsigned long nthreads_var;
153
154 thr = gomp_thread ();
155 pool = thr->thread_pool;
156 task = thr->task;
157 icv = task ? &task->icv : &gomp_global_icv;
158
159 /* Always save the previous state, even if this isn't a nested team.
160 In particular, we should save any work share state from an outer
161 orphaned work share construct. */
162 team->prev_ts = thr->ts;
163
164 thr->ts.team = team;
165 thr->ts.team_id = 0;
166 ++thr->ts.level;
167 if (nthreads > 1)
168 ++thr->ts.active_level;
169 thr->ts.work_share = &team->work_shares[0];
170 thr->ts.last_work_share = NULL;
171 thr->ts.single_count = 0;
172 thr->ts.static_trip = 0;
173 thr->task = &team->implicit_task[0];
174 nthreads_var = icv->nthreads_var;
175 gomp_init_task (thr->task, task, icv);
176 team->implicit_task[0].icv.nthreads_var = nthreads_var;
28567c40 177 team->implicit_task[0].taskgroup = taskgroup;
6103184e
AM
178
179 if (nthreads == 1)
180 return;
181
182 /* Release existing idle threads. */
183 for (unsigned i = 1; i < nthreads; ++i)
184 {
185 nthr = pool->threads[i];
186 nthr->ts.team = team;
187 nthr->ts.work_share = &team->work_shares[0];
188 nthr->ts.last_work_share = NULL;
189 nthr->ts.team_id = i;
190 nthr->ts.level = team->prev_ts.level + 1;
191 nthr->ts.active_level = thr->ts.active_level;
192 nthr->ts.single_count = 0;
193 nthr->ts.static_trip = 0;
194 nthr->task = &team->implicit_task[i];
195 gomp_init_task (nthr->task, task, icv);
196 team->implicit_task[i].icv.nthreads_var = nthreads_var;
28567c40 197 team->implicit_task[i].taskgroup = taskgroup;
6103184e
AM
198 nthr->fn = fn;
199 nthr->data = data;
200 team->ordered_release[i] = &nthr->release;
201 }
202
203 gomp_simple_barrier_wait (&pool->threads_dock);
204}
205
28567c40
JJ
206int
207gomp_pause_host (void)
208{
209 return -1;
210}
211
6103184e
AM
212#include "../../team.c"
213#endif