]>
Commit | Line | Data |
---|---|---|
8d9254fc | 1 | /* Copyright (C) 2015-2020 Free Software Foundation, Inc. |
6103184e AM |
2 | Contributed by Alexander Monakov <amonakov@ispras.ru> |
3 | ||
4 | This file is part of the GNU Offloading and Multi Processing Library | |
5 | (libgomp). | |
6 | ||
7 | Libgomp is free software; you can redistribute it and/or modify it | |
8 | under the terms of the GNU General Public License as published by | |
9 | the Free Software Foundation; either version 3, or (at your option) | |
10 | any later version. | |
11 | ||
12 | Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
14 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
15 | more details. | |
16 | ||
17 | Under Section 7 of GPL version 3, you are granted additional | |
18 | permissions described in the GCC Runtime Library Exception, version | |
19 | 3.1, as published by the Free Software Foundation. | |
20 | ||
21 | You should have received a copy of the GNU General Public License and | |
22 | a copy of the GCC Runtime Library Exception along with this program; | |
23 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
24 | <http://www.gnu.org/licenses/>. */ | |
25 | ||
93d90219 | 26 | /* This file handles maintenance of threads on NVPTX. */ |
6103184e AM |
27 | |
28 | #if defined __nvptx_softstack__ && defined __nvptx_unisimt__ | |
29 | ||
30 | #include "libgomp.h" | |
31 | #include <stdlib.h> | |
32 | #include <string.h> | |
33 | ||
34 | struct gomp_thread *nvptx_thrs __attribute__((shared,nocommon)); | |
35 | ||
36 | static void gomp_thread_start (struct gomp_thread_pool *); | |
37 | ||
38 | ||
39 | /* This externally visible function handles target region entry. It | |
40 | sets up a per-team thread pool and transfers control by calling FN (FN_DATA) | |
41 | in the master thread or gomp_thread_start in other threads. | |
42 | ||
43 | The name of this function is part of the interface with the compiler: for | |
44 | each target region, GCC emits a PTX .kernel function that sets up soft-stack | |
45 | and uniform-simt state and calls this function, passing in FN the original | |
46 | function outlined for the target region. */ | |
47 | ||
48 | void | |
49 | gomp_nvptx_main (void (*fn) (void *), void *fn_data) | |
50 | { | |
51 | int tid, ntids; | |
52 | asm ("mov.u32 %0, %%tid.y;" : "=r" (tid)); | |
53 | asm ("mov.u32 %0, %%ntid.y;" : "=r" (ntids)); | |
54 | if (tid == 0) | |
55 | { | |
56 | gomp_global_icv.nthreads_var = ntids; | |
57 | /* Starting additional threads is not supported. */ | |
58 | gomp_global_icv.dyn_var = true; | |
59 | ||
60 | nvptx_thrs = alloca (ntids * sizeof (*nvptx_thrs)); | |
61 | memset (nvptx_thrs, 0, ntids * sizeof (*nvptx_thrs)); | |
62 | ||
63 | struct gomp_thread_pool *pool = alloca (sizeof (*pool)); | |
64 | pool->threads = alloca (ntids * sizeof (*pool->threads)); | |
65 | for (tid = 0; tid < ntids; tid++) | |
66 | pool->threads[tid] = nvptx_thrs + tid; | |
67 | pool->threads_size = ntids; | |
68 | pool->threads_used = ntids; | |
69 | pool->threads_busy = 1; | |
70 | pool->last_team = NULL; | |
71 | gomp_simple_barrier_init (&pool->threads_dock, ntids); | |
72 | ||
73 | nvptx_thrs[0].thread_pool = pool; | |
74 | asm ("bar.sync 0;"); | |
75 | fn (fn_data); | |
76 | ||
77 | gomp_free_thread (nvptx_thrs); | |
78 | } | |
79 | else | |
80 | { | |
81 | asm ("bar.sync 0;"); | |
82 | gomp_thread_start (nvptx_thrs[0].thread_pool); | |
83 | } | |
84 | } | |
85 | ||
86 | /* This function contains the idle loop in which a thread waits | |
87 | to be called up to become part of a team. */ | |
88 | ||
89 | static void | |
90 | gomp_thread_start (struct gomp_thread_pool *pool) | |
91 | { | |
92 | struct gomp_thread *thr = gomp_thread (); | |
93 | ||
94 | gomp_sem_init (&thr->release, 0); | |
95 | thr->thread_pool = pool; | |
96 | ||
97 | do | |
98 | { | |
99 | gomp_simple_barrier_wait (&pool->threads_dock); | |
100 | if (!thr->fn) | |
101 | continue; | |
102 | thr->fn (thr->data); | |
103 | thr->fn = NULL; | |
104 | ||
105 | struct gomp_task *task = thr->task; | |
106 | gomp_team_barrier_wait_final (&thr->ts.team->barrier); | |
107 | gomp_finish_task (task); | |
108 | } | |
109 | /* Work around an NVIDIA driver bug: when generating sm_50 machine code, | |
110 | it can trash stack pointer R1 in loops lacking exit edges. Add a cheap | |
111 | artificial exit that the driver would not be able to optimize out. */ | |
112 | while (nvptx_thrs); | |
113 | } | |
114 | ||
115 | /* Launch a team. */ | |
116 | ||
117 | void | |
118 | gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads, | |
28567c40 JJ |
119 | unsigned flags, struct gomp_team *team, |
120 | struct gomp_taskgroup *taskgroup) | |
6103184e AM |
121 | { |
122 | struct gomp_thread *thr, *nthr; | |
123 | struct gomp_task *task; | |
124 | struct gomp_task_icv *icv; | |
125 | struct gomp_thread_pool *pool; | |
126 | unsigned long nthreads_var; | |
127 | ||
128 | thr = gomp_thread (); | |
129 | pool = thr->thread_pool; | |
130 | task = thr->task; | |
131 | icv = task ? &task->icv : &gomp_global_icv; | |
132 | ||
133 | /* Always save the previous state, even if this isn't a nested team. | |
134 | In particular, we should save any work share state from an outer | |
135 | orphaned work share construct. */ | |
136 | team->prev_ts = thr->ts; | |
137 | ||
138 | thr->ts.team = team; | |
139 | thr->ts.team_id = 0; | |
140 | ++thr->ts.level; | |
141 | if (nthreads > 1) | |
142 | ++thr->ts.active_level; | |
143 | thr->ts.work_share = &team->work_shares[0]; | |
144 | thr->ts.last_work_share = NULL; | |
145 | thr->ts.single_count = 0; | |
146 | thr->ts.static_trip = 0; | |
147 | thr->task = &team->implicit_task[0]; | |
148 | nthreads_var = icv->nthreads_var; | |
149 | gomp_init_task (thr->task, task, icv); | |
150 | team->implicit_task[0].icv.nthreads_var = nthreads_var; | |
28567c40 | 151 | team->implicit_task[0].taskgroup = taskgroup; |
6103184e AM |
152 | |
153 | if (nthreads == 1) | |
154 | return; | |
155 | ||
156 | /* Release existing idle threads. */ | |
157 | for (unsigned i = 1; i < nthreads; ++i) | |
158 | { | |
159 | nthr = pool->threads[i]; | |
160 | nthr->ts.team = team; | |
161 | nthr->ts.work_share = &team->work_shares[0]; | |
162 | nthr->ts.last_work_share = NULL; | |
163 | nthr->ts.team_id = i; | |
164 | nthr->ts.level = team->prev_ts.level + 1; | |
165 | nthr->ts.active_level = thr->ts.active_level; | |
166 | nthr->ts.single_count = 0; | |
167 | nthr->ts.static_trip = 0; | |
168 | nthr->task = &team->implicit_task[i]; | |
169 | gomp_init_task (nthr->task, task, icv); | |
170 | team->implicit_task[i].icv.nthreads_var = nthreads_var; | |
28567c40 | 171 | team->implicit_task[i].taskgroup = taskgroup; |
6103184e AM |
172 | nthr->fn = fn; |
173 | nthr->data = data; | |
174 | team->ordered_release[i] = &nthr->release; | |
175 | } | |
176 | ||
177 | gomp_simple_barrier_wait (&pool->threads_dock); | |
178 | } | |
179 | ||
28567c40 JJ |
180 | int |
181 | gomp_pause_host (void) | |
182 | { | |
183 | return -1; | |
184 | } | |
185 | ||
6103184e AM |
186 | #include "../../team.c" |
187 | #endif |