2 * SPDX-License-Identifier: MIT
4 * Copyright © 2019 Intel Corporation
7 #ifndef _I915_ACTIVE_H_
8 #define _I915_ACTIVE_H_
10 #include <linux/lockdep.h>
12 #include "i915_active_types.h"
13 #include "i915_request.h"
16 * We treat requests as fences. This is not be to confused with our
17 * "fence registers" but pipeline synchronisation objects ala GL_ARB_sync.
18 * We use the fences to synchronize access from the CPU with activity on the
19 * GPU, for example, we should not rewrite an object's PTE whilst the GPU
20 * is reading them. We also track fences at a higher level to provide
21 * implicit synchronisation around GEM objects, e.g. set-domain will wait
22 * for outstanding GPU rendering before marking the object ready for CPU
23 * access, or a pageflip will wait until the GPU is complete before showing
24 * the frame on the scanout.
26 * In order to use a fence, the object must track the fence it needs to
27 * serialise with. For example, GEM objects want to track both read and
28 * write access so that we can perform concurrent read operations between
29 * the CPU and GPU engines, as well as waiting for all rendering to
30 * complete, or waiting for the last GPU user of a "fence register". The
31 * object then embeds a #i915_active_request to track the most recent (in
32 * retirement order) request relevant for the desired mode of access.
33 * The #i915_active_request is updated with i915_active_request_set() to
34 * track the most recent fence request, typically this is done as part of
35 * i915_vma_move_to_active().
37 * When the #i915_active_request completes (is retired), it will
38 * signal its completion to the owner through a callback as well as mark
39 * itself as idle (i915_active_request.request == NULL). The owner
40 * can then perform any action, such as delayed freeing of an active
41 * resource including itself.
44 void i915_active_retire_noop(struct i915_active_request
*active
,
45 struct i915_request
*request
);
48 * i915_active_request_init - prepares the activity tracker for use
49 * @active - the active tracker
50 * @rq - initial request to track, can be NULL
51 * @func - a callback when then the tracker is retired (becomes idle),
54 * i915_active_request_init() prepares the embedded @active struct for use as
55 * an activity tracker, that is for tracking the last known active request
56 * associated with it. When the last request becomes idle, when it is retired
57 * after completion, the optional callback @func is invoked.
60 i915_active_request_init(struct i915_active_request
*active
,
61 struct i915_request
*rq
,
62 i915_active_retire_fn retire
)
64 RCU_INIT_POINTER(active
->request
, rq
);
65 INIT_LIST_HEAD(&active
->link
);
66 active
->retire
= retire
?: i915_active_retire_noop
;
69 #define INIT_ACTIVE_REQUEST(name) i915_active_request_init((name), NULL, NULL)
72 * i915_active_request_set - updates the tracker to watch the current request
73 * @active - the active tracker
74 * @request - the request to watch
76 * __i915_active_request_set() watches the given @request for completion. Whilst
77 * that @request is busy, the @active reports busy. When that @request is
78 * retired, the @active tracker is updated to report idle.
81 __i915_active_request_set(struct i915_active_request
*active
,
82 struct i915_request
*request
)
84 list_move(&active
->link
, &request
->active_list
);
85 rcu_assign_pointer(active
->request
, request
);
89 i915_active_request_set(struct i915_active_request
*active
,
90 struct i915_request
*rq
);
93 * i915_active_request_set_retire_fn - updates the retirement callback
94 * @active - the active tracker
95 * @fn - the routine called when the request is retired
96 * @mutex - struct_mutex used to guard retirements
98 * i915_active_request_set_retire_fn() updates the function pointer that
99 * is called when the final request associated with the @active tracker
103 i915_active_request_set_retire_fn(struct i915_active_request
*active
,
104 i915_active_retire_fn fn
,
107 lockdep_assert_held(mutex
);
108 active
->retire
= fn
?: i915_active_retire_noop
;
111 static inline struct i915_request
*
112 __i915_active_request_peek(const struct i915_active_request
*active
)
115 * Inside the error capture (running with the driver in an unknown
116 * state), we want to bend the rules slightly (a lot).
118 * Work is in progress to make it safer, in the meantime this keeps
119 * the known issue from spamming the logs.
121 return rcu_dereference_protected(active
->request
, 1);
125 * i915_active_request_raw - return the active request
126 * @active - the active tracker
128 * i915_active_request_raw() returns the current request being tracked, or NULL.
129 * It does not obtain a reference on the request for the caller, so the caller
130 * must hold struct_mutex.
132 static inline struct i915_request
*
133 i915_active_request_raw(const struct i915_active_request
*active
,
136 return rcu_dereference_protected(active
->request
,
137 lockdep_is_held(mutex
));
141 * i915_active_request_peek - report the active request being monitored
142 * @active - the active tracker
144 * i915_active_request_peek() returns the current request being tracked if
145 * still active, or NULL. It does not obtain a reference on the request
146 * for the caller, so the caller must hold struct_mutex.
148 static inline struct i915_request
*
149 i915_active_request_peek(const struct i915_active_request
*active
,
152 struct i915_request
*request
;
154 request
= i915_active_request_raw(active
, mutex
);
155 if (!request
|| i915_request_completed(request
))
162 * i915_active_request_get - return a reference to the active request
163 * @active - the active tracker
165 * i915_active_request_get() returns a reference to the active request, or NULL
166 * if the active tracker is idle. The caller must hold struct_mutex.
168 static inline struct i915_request
*
169 i915_active_request_get(const struct i915_active_request
*active
,
172 return i915_request_get(i915_active_request_peek(active
, mutex
));
176 * __i915_active_request_get_rcu - return a reference to the active request
177 * @active - the active tracker
179 * __i915_active_request_get() returns a reference to the active request,
180 * or NULL if the active tracker is idle. The caller must hold the RCU read
181 * lock, but the returned pointer is safe to use outside of RCU.
183 static inline struct i915_request
*
184 __i915_active_request_get_rcu(const struct i915_active_request
*active
)
187 * Performing a lockless retrieval of the active request is super
188 * tricky. SLAB_TYPESAFE_BY_RCU merely guarantees that the backing
189 * slab of request objects will not be freed whilst we hold the
190 * RCU read lock. It does not guarantee that the request itself
191 * will not be freed and then *reused*. Viz,
195 * rq = active.request
196 * retire(rq) -> free(rq);
197 * (rq is now first on the slab freelist)
198 * active.request = NULL
200 * rq = new submission on a new object
203 * To prevent the request from being reused whilst the caller
204 * uses it, we take a reference like normal. Whilst acquiring
205 * the reference we check that it is not in a destroyed state
206 * (refcnt == 0). That prevents the request being reallocated
207 * whilst the caller holds on to it. To check that the request
208 * was not reallocated as we acquired the reference we have to
209 * check that our request remains the active request across
210 * the lookup, in the same manner as a seqlock. The visibility
211 * of the pointer versus the reference counting is controlled
212 * by using RCU barriers (rcu_dereference and rcu_assign_pointer).
214 * In the middle of all that, we inspect whether the request is
215 * complete. Retiring is lazy so the request may be completed long
216 * before the active tracker is updated. Querying whether the
217 * request is complete is far cheaper (as it involves no locked
218 * instructions setting cachelines to exclusive) than acquiring
219 * the reference, so we do it first. The RCU read lock ensures the
220 * pointer dereference is valid, but does not ensure that the
221 * seqno nor HWS is the right one! However, if the request was
222 * reallocated, that means the active tracker's request was complete.
223 * If the new request is also complete, then both are and we can
224 * just report the active tracker is idle. If the new request is
225 * incomplete, then we acquire a reference on it and check that
226 * it remained the active request.
228 * It is then imperative that we do not zero the request on
229 * reallocation, so that we can chase the dangling pointers!
230 * See i915_request_alloc().
233 struct i915_request
*request
;
235 request
= rcu_dereference(active
->request
);
236 if (!request
|| i915_request_completed(request
))
240 * An especially silly compiler could decide to recompute the
241 * result of i915_request_completed, more specifically
242 * re-emit the load for request->fence.seqno. A race would catch
243 * a later seqno value, which could flip the result from true to
244 * false. Which means part of the instructions below might not
245 * be executed, while later on instructions are executed. Due to
246 * barriers within the refcounting the inconsistency can't reach
247 * past the call to i915_request_get_rcu, but not executing
248 * that while still executing i915_request_put() creates
249 * havoc enough. Prevent this with a compiler barrier.
253 request
= i915_request_get_rcu(request
);
256 * What stops the following rcu_access_pointer() from occurring
257 * before the above i915_request_get_rcu()? If we were
258 * to read the value before pausing to get the reference to
259 * the request, we may not notice a change in the active
262 * The rcu_access_pointer() is a mere compiler barrier, which
263 * means both the CPU and compiler are free to perform the
264 * memory read without constraint. The compiler only has to
265 * ensure that any operations after the rcu_access_pointer()
266 * occur afterwards in program order. This means the read may
267 * be performed earlier by an out-of-order CPU, or adventurous
270 * The atomic operation at the heart of
271 * i915_request_get_rcu(), see dma_fence_get_rcu(), is
272 * atomic_inc_not_zero() which is only a full memory barrier
273 * when successful. That is, if i915_request_get_rcu()
274 * returns the request (and so with the reference counted
275 * incremented) then the following read for rcu_access_pointer()
276 * must occur after the atomic operation and so confirm
277 * that this request is the one currently being tracked.
279 * The corresponding write barrier is part of
280 * rcu_assign_pointer().
282 if (!request
|| request
== rcu_access_pointer(active
->request
))
283 return rcu_pointer_handoff(request
);
285 i915_request_put(request
);
290 * i915_active_request_get_unlocked - return a reference to the active request
291 * @active - the active tracker
293 * i915_active_request_get_unlocked() returns a reference to the active request,
294 * or NULL if the active tracker is idle. The reference is obtained under RCU,
295 * so no locking is required by the caller.
297 * The reference should be freed with i915_request_put().
299 static inline struct i915_request
*
300 i915_active_request_get_unlocked(const struct i915_active_request
*active
)
302 struct i915_request
*request
;
305 request
= __i915_active_request_get_rcu(active
);
312 * i915_active_request_isset - report whether the active tracker is assigned
313 * @active - the active tracker
315 * i915_active_request_isset() returns true if the active tracker is currently
316 * assigned to a request. Due to the lazy retiring, that request may be idle
317 * and this may report stale information.
320 i915_active_request_isset(const struct i915_active_request
*active
)
322 return rcu_access_pointer(active
->request
);
326 * i915_active_request_retire - waits until the request is retired
327 * @active - the active request on which to wait
329 * i915_active_request_retire() waits until the request is completed,
330 * and then ensures that at least the retirement handler for this
331 * @active tracker is called before returning. If the @active
332 * tracker is idle, the function returns immediately.
334 static inline int __must_check
335 i915_active_request_retire(struct i915_active_request
*active
,
338 struct i915_request
*request
;
341 request
= i915_active_request_raw(active
, mutex
);
345 ret
= i915_request_wait(request
,
346 I915_WAIT_INTERRUPTIBLE
| I915_WAIT_LOCKED
,
347 MAX_SCHEDULE_TIMEOUT
);
351 list_del_init(&active
->link
);
352 RCU_INIT_POINTER(active
->request
, NULL
);
354 active
->retire(active
, request
);
360 * GPU activity tracking
362 * Each set of commands submitted to the GPU compromises a single request that
363 * signals a fence upon completion. struct i915_request combines the
364 * command submission, scheduling and fence signaling roles. If we want to see
365 * if a particular task is complete, we need to grab the fence (struct
366 * i915_request) for that task and check or wait for it to be signaled. More
367 * often though we want to track the status of a bunch of tasks, for example
368 * to wait for the GPU to finish accessing some memory across a variety of
369 * different command pipelines from different clients. We could choose to
370 * track every single request associated with the task, but knowing that
371 * each request belongs to an ordered timeline (later requests within a
372 * timeline must wait for earlier requests), we need only track the
373 * latest request in each timeline to determine the overall status of the
376 * struct i915_active provides this tracking across timelines. It builds a
377 * composite shared-fence, and is updated as new work is submitted to the task,
378 * forming a snapshot of the current status. It should be embedded into the
379 * different resources that need to track their associated GPU activity to
380 * provide a callback when that GPU activity has ceased, or otherwise to
381 * provide a serialisation point either for request submission or for CPU
385 void i915_active_init(struct drm_i915_private
*i915
,
386 struct i915_active
*ref
,
387 void (*retire
)(struct i915_active
*ref
));
389 int i915_active_ref(struct i915_active
*ref
,
391 struct i915_request
*rq
);
393 int i915_active_wait(struct i915_active
*ref
);
395 int i915_request_await_active(struct i915_request
*rq
,
396 struct i915_active
*ref
);
397 int i915_request_await_active_request(struct i915_request
*rq
,
398 struct i915_active_request
*active
);
400 bool i915_active_acquire(struct i915_active
*ref
);
402 static inline void i915_active_cancel(struct i915_active
*ref
)
404 GEM_BUG_ON(ref
->count
!= 1);
408 void i915_active_release(struct i915_active
*ref
);
411 i915_active_is_idle(const struct i915_active
*ref
)
416 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
417 void i915_active_fini(struct i915_active
*ref
);
419 static inline void i915_active_fini(struct i915_active
*ref
) { }
422 int i915_global_active_init(void);
423 void i915_global_active_exit(void);
425 #endif /* _I915_ACTIVE_H_ */