drivers/gpu/drm/i915/i915_active.h

   1 /*
   2  * SPDX-License-Identifier: MIT
   3  *
   4  * Copyright © 2019 Intel Corporation
   5  */
   6
   7 #ifndef _I915_ACTIVE_H_
   8 #define _I915_ACTIVE_H_
   9
  10 #include <linux/lockdep.h>
  11
  12 #include "i915_active_types.h"
  13 #include "i915_request.h"
  14
  15 /*
  16  * We treat requests as fences. This is not be to confused with our
  17  * "fence registers" but pipeline synchronisation objects ala GL_ARB_sync.
  18  * We use the fences to synchronize access from the CPU with activity on the
  19  * GPU, for example, we should not rewrite an object's PTE whilst the GPU
  20  * is reading them. We also track fences at a higher level to provide
  21  * implicit synchronisation around GEM objects, e.g. set-domain will wait
  22  * for outstanding GPU rendering before marking the object ready for CPU
  23  * access, or a pageflip will wait until the GPU is complete before showing
  24  * the frame on the scanout.
  25  *
  26  * In order to use a fence, the object must track the fence it needs to
  27  * serialise with. For example, GEM objects want to track both read and
  28  * write access so that we can perform concurrent read operations between
  29  * the CPU and GPU engines, as well as waiting for all rendering to
  30  * complete, or waiting for the last GPU user of a "fence register". The
  31  * object then embeds a #i915_active_request to track the most recent (in
  32  * retirement order) request relevant for the desired mode of access.
  33  * The #i915_active_request is updated with i915_active_request_set() to
  34  * track the most recent fence request, typically this is done as part of
  35  * i915_vma_move_to_active().
  36  *
  37  * When the #i915_active_request completes (is retired), it will
  38  * signal its completion to the owner through a callback as well as mark
  39  * itself as idle (i915_active_request.request == NULL). The owner
  40  * can then perform any action, such as delayed freeing of an active
  41  * resource including itself.
  42  */
  43
  44 void i915_active_retire_noop(struct i915_active_request *active,
  45                              struct i915_request *request);
  46
  47 /**
  48  * i915_active_request_init - prepares the activity tracker for use
  49  * @active - the active tracker
  50  * @rq - initial request to track, can be NULL
  51  * @func - a callback when then the tracker is retired (becomes idle),
  52  *         can be NULL
  53  *
  54  * i915_active_request_init() prepares the embedded @active struct for use as
  55  * an activity tracker, that is for tracking the last known active request
  56  * associated with it. When the last request becomes idle, when it is retired
  57  * after completion, the optional callback @func is invoked.
  58  */
  59 static inline void
  60 i915_active_request_init(struct i915_active_request *active,
  61                          struct i915_request *rq,
  62                          i915_active_retire_fn retire)
  63 {
  64         RCU_INIT_POINTER(active->request, rq);
  65         INIT_LIST_HEAD(&active->link);
  66         active->retire = retire ?: i915_active_retire_noop;
  67 }
  68
  69 #define INIT_ACTIVE_REQUEST(name) i915_active_request_init((name), NULL, NULL)
  70
  71 /**
  72  * i915_active_request_set - updates the tracker to watch the current request
  73  * @active - the active tracker
  74  * @request - the request to watch
  75  *
  76  * __i915_active_request_set() watches the given @request for completion. Whilst
  77  * that @request is busy, the @active reports busy. When that @request is
  78  * retired, the @active tracker is updated to report idle.
  79  */
  80 static inline void
  81 __i915_active_request_set(struct i915_active_request *active,
  82                           struct i915_request *request)
  83 {
  84         list_move(&active->link, &request->active_list);
  85         rcu_assign_pointer(active->request, request);
  86 }
  87
  88 int __must_check
  89 i915_active_request_set(struct i915_active_request *active,
  90                         struct i915_request *rq);
  91
  92 /**
  93  * i915_active_request_set_retire_fn - updates the retirement callback
  94  * @active - the active tracker
  95  * @fn - the routine called when the request is retired
  96  * @mutex - struct_mutex used to guard retirements
  97  *
  98  * i915_active_request_set_retire_fn() updates the function pointer that
  99  * is called when the final request associated with the @active tracker
 100  * is retired.
 101  */
 102 static inline void
 103 i915_active_request_set_retire_fn(struct i915_active_request *active,
 104                                   i915_active_retire_fn fn,
 105                                   struct mutex *mutex)
 106 {
 107         lockdep_assert_held(mutex);
 108         active->retire = fn ?: i915_active_retire_noop;
 109 }
 110
 111 static inline struct i915_request *
 112 __i915_active_request_peek(const struct i915_active_request *active)
 113 {
 114         /*
 115          * Inside the error capture (running with the driver in an unknown
 116          * state), we want to bend the rules slightly (a lot).
 117          *
 118          * Work is in progress to make it safer, in the meantime this keeps
 119          * the known issue from spamming the logs.
 120          */
 121         return rcu_dereference_protected(active->request, 1);
 122 }
 123
 124 /**
 125  * i915_active_request_raw - return the active request
 126  * @active - the active tracker
 127  *
 128  * i915_active_request_raw() returns the current request being tracked, or NULL.
 129  * It does not obtain a reference on the request for the caller, so the caller
 130  * must hold struct_mutex.
 131  */
 132 static inline struct i915_request *
 133 i915_active_request_raw(const struct i915_active_request *active,
 134                         struct mutex *mutex)
 135 {
 136         return rcu_dereference_protected(active->request,
 137                                          lockdep_is_held(mutex));
 138 }
 139
 140 /**
 141  * i915_active_request_peek - report the active request being monitored
 142  * @active - the active tracker
 143  *
 144  * i915_active_request_peek() returns the current request being tracked if
 145  * still active, or NULL. It does not obtain a reference on the request
 146  * for the caller, so the caller must hold struct_mutex.
 147  */
 148 static inline struct i915_request *
 149 i915_active_request_peek(const struct i915_active_request *active,
 150                          struct mutex *mutex)
 151 {
 152         struct i915_request *request;
 153
 154         request = i915_active_request_raw(active, mutex);
 155         if (!request || i915_request_completed(request))
 156                 return NULL;
 157
 158         return request;
 159 }
 160
 161 /**
 162  * i915_active_request_get - return a reference to the active request
 163  * @active - the active tracker
 164  *
 165  * i915_active_request_get() returns a reference to the active request, or NULL
 166  * if the active tracker is idle. The caller must hold struct_mutex.
 167  */
 168 static inline struct i915_request *
 169 i915_active_request_get(const struct i915_active_request *active,
 170                         struct mutex *mutex)
 171 {
 172         return i915_request_get(i915_active_request_peek(active, mutex));
 173 }
 174
 175 /**
 176  * __i915_active_request_get_rcu - return a reference to the active request
 177  * @active - the active tracker
 178  *
 179  * __i915_active_request_get() returns a reference to the active request,
 180  * or NULL if the active tracker is idle. The caller must hold the RCU read
 181  * lock, but the returned pointer is safe to use outside of RCU.
 182  */
 183 static inline struct i915_request *
 184 __i915_active_request_get_rcu(const struct i915_active_request *active)
 185 {
 186         /*
 187          * Performing a lockless retrieval of the active request is super
 188          * tricky. SLAB_TYPESAFE_BY_RCU merely guarantees that the backing
 189          * slab of request objects will not be freed whilst we hold the
 190          * RCU read lock. It does not guarantee that the request itself
 191          * will not be freed and then *reused*. Viz,
 192          *
 193          * Thread A                     Thread B
 194          *
 195          * rq = active.request
 196          *                              retire(rq) -> free(rq);
 197          *                              (rq is now first on the slab freelist)
 198          *                              active.request = NULL
 199          *
 200          *                              rq = new submission on a new object
 201          * ref(rq)
 202          *
 203          * To prevent the request from being reused whilst the caller
 204          * uses it, we take a reference like normal. Whilst acquiring
 205          * the reference we check that it is not in a destroyed state
 206          * (refcnt == 0). That prevents the request being reallocated
 207          * whilst the caller holds on to it. To check that the request
 208          * was not reallocated as we acquired the reference we have to
 209          * check that our request remains the active request across
 210          * the lookup, in the same manner as a seqlock. The visibility
 211          * of the pointer versus the reference counting is controlled
 212          * by using RCU barriers (rcu_dereference and rcu_assign_pointer).
 213          *
 214          * In the middle of all that, we inspect whether the request is
 215          * complete. Retiring is lazy so the request may be completed long
 216          * before the active tracker is updated. Querying whether the
 217          * request is complete is far cheaper (as it involves no locked
 218          * instructions setting cachelines to exclusive) than acquiring
 219          * the reference, so we do it first. The RCU read lock ensures the
 220          * pointer dereference is valid, but does not ensure that the
 221          * seqno nor HWS is the right one! However, if the request was
 222          * reallocated, that means the active tracker's request was complete.
 223          * If the new request is also complete, then both are and we can
 224          * just report the active tracker is idle. If the new request is
 225          * incomplete, then we acquire a reference on it and check that
 226          * it remained the active request.
 227          *
 228          * It is then imperative that we do not zero the request on
 229          * reallocation, so that we can chase the dangling pointers!
 230          * See i915_request_alloc().
 231          */
 232         do {
 233                 struct i915_request *request;
 234
 235                 request = rcu_dereference(active->request);
 236                 if (!request || i915_request_completed(request))
 237                         return NULL;
 238
 239                 /*
 240                  * An especially silly compiler could decide to recompute the
 241                  * result of i915_request_completed, more specifically
 242                  * re-emit the load for request->fence.seqno. A race would catch
 243                  * a later seqno value, which could flip the result from true to
 244                  * false. Which means part of the instructions below might not
 245                  * be executed, while later on instructions are executed. Due to
 246                  * barriers within the refcounting the inconsistency can't reach
 247                  * past the call to i915_request_get_rcu, but not executing
 248                  * that while still executing i915_request_put() creates
 249                  * havoc enough.  Prevent this with a compiler barrier.
 250                  */
 251                 barrier();
 252
 253                 request = i915_request_get_rcu(request);
 254
 255                 /*
 256                  * What stops the following rcu_access_pointer() from occurring
 257                  * before the above i915_request_get_rcu()? If we were
 258                  * to read the value before pausing to get the reference to
 259                  * the request, we may not notice a change in the active
 260                  * tracker.
 261                  *
 262                  * The rcu_access_pointer() is a mere compiler barrier, which
 263                  * means both the CPU and compiler are free to perform the
 264                  * memory read without constraint. The compiler only has to
 265                  * ensure that any operations after the rcu_access_pointer()
 266                  * occur afterwards in program order. This means the read may
 267                  * be performed earlier by an out-of-order CPU, or adventurous
 268                  * compiler.
 269                  *
 270                  * The atomic operation at the heart of
 271                  * i915_request_get_rcu(), see dma_fence_get_rcu(), is
 272                  * atomic_inc_not_zero() which is only a full memory barrier
 273                  * when successful. That is, if i915_request_get_rcu()
 274                  * returns the request (and so with the reference counted
 275                  * incremented) then the following read for rcu_access_pointer()
 276                  * must occur after the atomic operation and so confirm
 277                  * that this request is the one currently being tracked.
 278                  *
 279                  * The corresponding write barrier is part of
 280                  * rcu_assign_pointer().
 281                  */
 282                 if (!request || request == rcu_access_pointer(active->request))
 283                         return rcu_pointer_handoff(request);
 284
 285                 i915_request_put(request);
 286         } while (1);
 287 }
 288
 289 /**
 290  * i915_active_request_get_unlocked - return a reference to the active request
 291  * @active - the active tracker
 292  *
 293  * i915_active_request_get_unlocked() returns a reference to the active request,
 294  * or NULL if the active tracker is idle. The reference is obtained under RCU,
 295  * so no locking is required by the caller.
 296  *
 297  * The reference should be freed with i915_request_put().
 298  */
 299 static inline struct i915_request *
 300 i915_active_request_get_unlocked(const struct i915_active_request *active)
 301 {
 302         struct i915_request *request;
 303
 304         rcu_read_lock();
 305         request = __i915_active_request_get_rcu(active);
 306         rcu_read_unlock();
 307
 308         return request;
 309 }
 310
 311 /**
 312  * i915_active_request_isset - report whether the active tracker is assigned
 313  * @active - the active tracker
 314  *
 315  * i915_active_request_isset() returns true if the active tracker is currently
 316  * assigned to a request. Due to the lazy retiring, that request may be idle
 317  * and this may report stale information.
 318  */
 319 static inline bool
 320 i915_active_request_isset(const struct i915_active_request *active)
 321 {
 322         return rcu_access_pointer(active->request);
 323 }
 324
 325 /**
 326  * i915_active_request_retire - waits until the request is retired
 327  * @active - the active request on which to wait
 328  *
 329  * i915_active_request_retire() waits until the request is completed,
 330  * and then ensures that at least the retirement handler for this
 331  * @active tracker is called before returning. If the @active
 332  * tracker is idle, the function returns immediately.
 333  */
 334 static inline int __must_check
 335 i915_active_request_retire(struct i915_active_request *active,
 336                            struct mutex *mutex)
 337 {
 338         struct i915_request *request;
 339         long ret;
 340
 341         request = i915_active_request_raw(active, mutex);
 342         if (!request)
 343                 return 0;
 344
 345         ret = i915_request_wait(request,
 346                                 I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
 347                                 MAX_SCHEDULE_TIMEOUT);
 348         if (ret < 0)
 349                 return ret;
 350
 351         list_del_init(&active->link);
 352         RCU_INIT_POINTER(active->request, NULL);
 353
 354         active->retire(active, request);
 355
 356         return 0;
 357 }
 358
 359 /*
 360  * GPU activity tracking
 361  *
 362  * Each set of commands submitted to the GPU compromises a single request that
 363  * signals a fence upon completion. struct i915_request combines the
 364  * command submission, scheduling and fence signaling roles. If we want to see
 365  * if a particular task is complete, we need to grab the fence (struct
 366  * i915_request) for that task and check or wait for it to be signaled. More
 367  * often though we want to track the status of a bunch of tasks, for example
 368  * to wait for the GPU to finish accessing some memory across a variety of
 369  * different command pipelines from different clients. We could choose to
 370  * track every single request associated with the task, but knowing that
 371  * each request belongs to an ordered timeline (later requests within a
 372  * timeline must wait for earlier requests), we need only track the
 373  * latest request in each timeline to determine the overall status of the
 374  * task.
 375  *
 376  * struct i915_active provides this tracking across timelines. It builds a
 377  * composite shared-fence, and is updated as new work is submitted to the task,
 378  * forming a snapshot of the current status. It should be embedded into the
 379  * different resources that need to track their associated GPU activity to
 380  * provide a callback when that GPU activity has ceased, or otherwise to
 381  * provide a serialisation point either for request submission or for CPU
 382  * synchronisation.
 383  */
 384
 385 void i915_active_init(struct drm_i915_private *i915,
 386                       struct i915_active *ref,
 387                       void (*retire)(struct i915_active *ref));
 388
 389 int i915_active_ref(struct i915_active *ref,
 390                     u64 timeline,
 391                     struct i915_request *rq);
 392
 393 int i915_active_wait(struct i915_active *ref);
 394
 395 int i915_request_await_active(struct i915_request *rq,
 396                               struct i915_active *ref);
 397 int i915_request_await_active_request(struct i915_request *rq,
 398                                       struct i915_active_request *active);
 399
 400 bool i915_active_acquire(struct i915_active *ref);
 401
 402 static inline void i915_active_cancel(struct i915_active *ref)
 403 {
 404         GEM_BUG_ON(ref->count != 1);
 405         ref->count = 0;
 406 }
 407
 408 void i915_active_release(struct i915_active *ref);
 409
 410 static inline bool
 411 i915_active_is_idle(const struct i915_active *ref)
 412 {
 413         return !ref->count;
 414 }
 415
 416 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
 417 void i915_active_fini(struct i915_active *ref);
 418 #else
 419 static inline void i915_active_fini(struct i915_active *ref) { }
 420 #endif
 421
 422 int i915_global_active_init(void);
 423 void i915_global_active_exit(void);
 424
 425 #endif /* _I915_ACTIVE_H_ */