]>
git.ipfire.org Git - thirdparty/linux.git/blob - drivers/gpu/drm/i915/gt/intel_timeline.c
2 * SPDX-License-Identifier: MIT
4 * Copyright © 2016-2018 Intel Corporation
9 #include "i915_active.h"
10 #include "i915_syncmap.h"
12 #include "intel_ring.h"
13 #include "intel_timeline.h"
15 #define ptr_set_bit(ptr, bit) ((typeof(ptr))((unsigned long)(ptr) | BIT(bit)))
16 #define ptr_test_bit(ptr, bit) ((unsigned long)(ptr) & BIT(bit))
18 #define CACHELINE_BITS 6
19 #define CACHELINE_FREE CACHELINE_BITS
21 struct intel_timeline_hwsp
{
23 struct intel_gt_timelines
*gt_timelines
;
24 struct list_head free_link
;
29 static struct i915_vma
*__hwsp_alloc(struct intel_gt
*gt
)
31 struct drm_i915_private
*i915
= gt
->i915
;
32 struct drm_i915_gem_object
*obj
;
35 obj
= i915_gem_object_create_internal(i915
, PAGE_SIZE
);
39 i915_gem_object_set_cache_coherency(obj
, I915_CACHE_LLC
);
41 vma
= i915_vma_instance(obj
, >
->ggtt
->vm
, NULL
);
43 i915_gem_object_put(obj
);
48 static struct i915_vma
*
49 hwsp_alloc(struct intel_timeline
*timeline
, unsigned int *cacheline
)
51 struct intel_gt_timelines
*gt
= &timeline
->gt
->timelines
;
52 struct intel_timeline_hwsp
*hwsp
;
54 BUILD_BUG_ON(BITS_PER_TYPE(u64
) * CACHELINE_BYTES
> PAGE_SIZE
);
56 spin_lock_irq(>
->hwsp_lock
);
58 /* hwsp_free_list only contains HWSP that have available cachelines */
59 hwsp
= list_first_entry_or_null(>
->hwsp_free_list
,
60 typeof(*hwsp
), free_link
);
64 spin_unlock_irq(>
->hwsp_lock
);
66 hwsp
= kmalloc(sizeof(*hwsp
), GFP_KERNEL
);
68 return ERR_PTR(-ENOMEM
);
70 vma
= __hwsp_alloc(timeline
->gt
);
77 hwsp
->gt
= timeline
->gt
;
79 hwsp
->free_bitmap
= ~0ull;
80 hwsp
->gt_timelines
= gt
;
82 spin_lock_irq(>
->hwsp_lock
);
83 list_add(&hwsp
->free_link
, >
->hwsp_free_list
);
86 GEM_BUG_ON(!hwsp
->free_bitmap
);
87 *cacheline
= __ffs64(hwsp
->free_bitmap
);
88 hwsp
->free_bitmap
&= ~BIT_ULL(*cacheline
);
89 if (!hwsp
->free_bitmap
)
90 list_del(&hwsp
->free_link
);
92 spin_unlock_irq(>
->hwsp_lock
);
94 GEM_BUG_ON(hwsp
->vma
->private != hwsp
);
98 static void __idle_hwsp_free(struct intel_timeline_hwsp
*hwsp
, int cacheline
)
100 struct intel_gt_timelines
*gt
= hwsp
->gt_timelines
;
103 spin_lock_irqsave(>
->hwsp_lock
, flags
);
105 /* As a cacheline becomes available, publish the HWSP on the freelist */
106 if (!hwsp
->free_bitmap
)
107 list_add_tail(&hwsp
->free_link
, >
->hwsp_free_list
);
109 GEM_BUG_ON(cacheline
>= BITS_PER_TYPE(hwsp
->free_bitmap
));
110 hwsp
->free_bitmap
|= BIT_ULL(cacheline
);
112 /* And if no one is left using it, give the page back to the system */
113 if (hwsp
->free_bitmap
== ~0ull) {
114 i915_vma_put(hwsp
->vma
);
115 list_del(&hwsp
->free_link
);
119 spin_unlock_irqrestore(>
->hwsp_lock
, flags
);
122 static void __idle_cacheline_free(struct intel_timeline_cacheline
*cl
)
124 GEM_BUG_ON(!i915_active_is_idle(&cl
->active
));
126 i915_gem_object_unpin_map(cl
->hwsp
->vma
->obj
);
127 i915_vma_put(cl
->hwsp
->vma
);
128 __idle_hwsp_free(cl
->hwsp
, ptr_unmask_bits(cl
->vaddr
, CACHELINE_BITS
));
130 i915_active_fini(&cl
->active
);
135 static void __cacheline_retire(struct i915_active
*active
)
137 struct intel_timeline_cacheline
*cl
=
138 container_of(active
, typeof(*cl
), active
);
140 i915_vma_unpin(cl
->hwsp
->vma
);
141 if (ptr_test_bit(cl
->vaddr
, CACHELINE_FREE
))
142 __idle_cacheline_free(cl
);
145 static int __cacheline_active(struct i915_active
*active
)
147 struct intel_timeline_cacheline
*cl
=
148 container_of(active
, typeof(*cl
), active
);
150 __i915_vma_pin(cl
->hwsp
->vma
);
154 static struct intel_timeline_cacheline
*
155 cacheline_alloc(struct intel_timeline_hwsp
*hwsp
, unsigned int cacheline
)
157 struct intel_timeline_cacheline
*cl
;
160 GEM_BUG_ON(cacheline
>= BIT(CACHELINE_BITS
));
162 cl
= kmalloc(sizeof(*cl
), GFP_KERNEL
);
164 return ERR_PTR(-ENOMEM
);
166 vaddr
= i915_gem_object_pin_map(hwsp
->vma
->obj
, I915_MAP_WB
);
169 return ERR_CAST(vaddr
);
172 i915_vma_get(hwsp
->vma
);
174 cl
->vaddr
= page_pack_bits(vaddr
, cacheline
);
176 i915_active_init(&cl
->active
, __cacheline_active
, __cacheline_retire
);
181 static void cacheline_acquire(struct intel_timeline_cacheline
*cl
)
184 i915_active_acquire(&cl
->active
);
187 static void cacheline_release(struct intel_timeline_cacheline
*cl
)
190 i915_active_release(&cl
->active
);
193 static void cacheline_free(struct intel_timeline_cacheline
*cl
)
195 if (!i915_active_acquire_if_busy(&cl
->active
)) {
196 __idle_cacheline_free(cl
);
200 GEM_BUG_ON(ptr_test_bit(cl
->vaddr
, CACHELINE_FREE
));
201 cl
->vaddr
= ptr_set_bit(cl
->vaddr
, CACHELINE_FREE
);
203 i915_active_release(&cl
->active
);
206 int intel_timeline_init(struct intel_timeline
*timeline
,
208 struct i915_vma
*hwsp
)
212 kref_init(&timeline
->kref
);
213 atomic_set(&timeline
->pin_count
, 0);
217 timeline
->has_initial_breadcrumb
= !hwsp
;
218 timeline
->hwsp_cacheline
= NULL
;
221 struct intel_timeline_cacheline
*cl
;
222 unsigned int cacheline
;
224 hwsp
= hwsp_alloc(timeline
, &cacheline
);
226 return PTR_ERR(hwsp
);
228 cl
= cacheline_alloc(hwsp
->private, cacheline
);
230 __idle_hwsp_free(hwsp
->private, cacheline
);
234 timeline
->hwsp_cacheline
= cl
;
235 timeline
->hwsp_offset
= cacheline
* CACHELINE_BYTES
;
237 vaddr
= page_mask_bits(cl
->vaddr
);
239 timeline
->hwsp_offset
= I915_GEM_HWS_SEQNO_ADDR
;
241 vaddr
= i915_gem_object_pin_map(hwsp
->obj
, I915_MAP_WB
);
243 return PTR_ERR(vaddr
);
246 timeline
->hwsp_seqno
=
247 memset(vaddr
+ timeline
->hwsp_offset
, 0, CACHELINE_BYTES
);
249 timeline
->hwsp_ggtt
= i915_vma_get(hwsp
);
250 GEM_BUG_ON(timeline
->hwsp_offset
>= hwsp
->size
);
252 timeline
->fence_context
= dma_fence_context_alloc(1);
254 mutex_init(&timeline
->mutex
);
256 INIT_ACTIVE_FENCE(&timeline
->last_request
);
257 INIT_LIST_HEAD(&timeline
->requests
);
259 i915_syncmap_init(&timeline
->sync
);
264 void intel_gt_init_timelines(struct intel_gt
*gt
)
266 struct intel_gt_timelines
*timelines
= >
->timelines
;
268 spin_lock_init(&timelines
->lock
);
269 INIT_LIST_HEAD(&timelines
->active_list
);
271 spin_lock_init(&timelines
->hwsp_lock
);
272 INIT_LIST_HEAD(&timelines
->hwsp_free_list
);
275 void intel_timeline_fini(struct intel_timeline
*timeline
)
277 GEM_BUG_ON(atomic_read(&timeline
->pin_count
));
278 GEM_BUG_ON(!list_empty(&timeline
->requests
));
279 GEM_BUG_ON(timeline
->retire
);
281 if (timeline
->hwsp_cacheline
)
282 cacheline_free(timeline
->hwsp_cacheline
);
284 i915_gem_object_unpin_map(timeline
->hwsp_ggtt
->obj
);
286 i915_vma_put(timeline
->hwsp_ggtt
);
289 struct intel_timeline
*
290 intel_timeline_create(struct intel_gt
*gt
, struct i915_vma
*global_hwsp
)
292 struct intel_timeline
*timeline
;
295 timeline
= kzalloc(sizeof(*timeline
), GFP_KERNEL
);
297 return ERR_PTR(-ENOMEM
);
299 err
= intel_timeline_init(timeline
, gt
, global_hwsp
);
308 int intel_timeline_pin(struct intel_timeline
*tl
)
312 if (atomic_add_unless(&tl
->pin_count
, 1, 0))
315 err
= i915_ggtt_pin(tl
->hwsp_ggtt
, 0, PIN_HIGH
);
320 i915_ggtt_offset(tl
->hwsp_ggtt
) +
321 offset_in_page(tl
->hwsp_offset
);
323 cacheline_acquire(tl
->hwsp_cacheline
);
324 if (atomic_fetch_inc(&tl
->pin_count
)) {
325 cacheline_release(tl
->hwsp_cacheline
);
326 __i915_vma_unpin(tl
->hwsp_ggtt
);
332 void intel_timeline_enter(struct intel_timeline
*tl
)
334 struct intel_gt_timelines
*timelines
= &tl
->gt
->timelines
;
337 * Pretend we are serialised by the timeline->mutex.
339 * While generally true, there are a few exceptions to the rule
340 * for the engine->kernel_context being used to manage power
341 * transitions. As the engine_park may be called from under any
342 * timeline, it uses the power mutex as a global serialisation
343 * lock to prevent any other request entering its timeline.
345 * The rule is generally tl->mutex, otherwise engine->wakeref.mutex.
347 * However, intel_gt_retire_request() does not know which engine
348 * it is retiring along and so cannot partake in the engine-pm
349 * barrier, and there we use the tl->active_count as a means to
350 * pin the timeline in the active_list while the locks are dropped.
351 * Ergo, as that is outside of the engine-pm barrier, we need to
352 * use atomic to manipulate tl->active_count.
354 lockdep_assert_held(&tl
->mutex
);
356 if (atomic_add_unless(&tl
->active_count
, 1, 0))
359 spin_lock(&timelines
->lock
);
360 if (!atomic_fetch_inc(&tl
->active_count
))
361 list_add_tail(&tl
->link
, &timelines
->active_list
);
362 spin_unlock(&timelines
->lock
);
365 void intel_timeline_exit(struct intel_timeline
*tl
)
367 struct intel_gt_timelines
*timelines
= &tl
->gt
->timelines
;
369 /* See intel_timeline_enter() */
370 lockdep_assert_held(&tl
->mutex
);
372 GEM_BUG_ON(!atomic_read(&tl
->active_count
));
373 if (atomic_add_unless(&tl
->active_count
, -1, 1))
376 spin_lock(&timelines
->lock
);
377 if (atomic_dec_and_test(&tl
->active_count
))
379 spin_unlock(&timelines
->lock
);
382 * Since this timeline is idle, all bariers upon which we were waiting
383 * must also be complete and so we can discard the last used barriers
384 * without loss of information.
386 i915_syncmap_free(&tl
->sync
);
389 static u32
timeline_advance(struct intel_timeline
*tl
)
391 GEM_BUG_ON(!atomic_read(&tl
->pin_count
));
392 GEM_BUG_ON(tl
->seqno
& tl
->has_initial_breadcrumb
);
394 return tl
->seqno
+= 1 + tl
->has_initial_breadcrumb
;
397 static void timeline_rollback(struct intel_timeline
*tl
)
399 tl
->seqno
-= 1 + tl
->has_initial_breadcrumb
;
403 __intel_timeline_get_seqno(struct intel_timeline
*tl
,
404 struct i915_request
*rq
,
407 struct intel_timeline_cacheline
*cl
;
408 unsigned int cacheline
;
409 struct i915_vma
*vma
;
413 might_lock(&tl
->gt
->ggtt
->vm
.mutex
);
416 * If there is an outstanding GPU reference to this cacheline,
417 * such as it being sampled by a HW semaphore on another timeline,
418 * we cannot wraparound our seqno value (the HW semaphore does
419 * a strict greater-than-or-equals compare, not i915_seqno_passed).
420 * So if the cacheline is still busy, we must detach ourselves
421 * from it and leave it inflight alongside its users.
423 * However, if nobody is watching and we can guarantee that nobody
424 * will, we could simply reuse the same cacheline.
426 * if (i915_active_request_is_signaled(&tl->last_request) &&
427 * i915_active_is_signaled(&tl->hwsp_cacheline->active))
430 * That seems unlikely for a busy timeline that needed to wrap in
431 * the first place, so just replace the cacheline.
434 vma
= hwsp_alloc(tl
, &cacheline
);
440 err
= i915_ggtt_pin(vma
, 0, PIN_HIGH
);
442 __idle_hwsp_free(vma
->private, cacheline
);
446 cl
= cacheline_alloc(vma
->private, cacheline
);
449 __idle_hwsp_free(vma
->private, cacheline
);
452 GEM_BUG_ON(cl
->hwsp
->vma
!= vma
);
455 * Attach the old cacheline to the current request, so that we only
456 * free it after the current request is retired, which ensures that
457 * all writes into the cacheline from previous requests are complete.
459 err
= i915_active_ref(&tl
->hwsp_cacheline
->active
, tl
, &rq
->fence
);
463 cacheline_release(tl
->hwsp_cacheline
); /* ownership now xfered to rq */
464 cacheline_free(tl
->hwsp_cacheline
);
466 i915_vma_unpin(tl
->hwsp_ggtt
); /* binding kept alive by old cacheline */
467 i915_vma_put(tl
->hwsp_ggtt
);
469 tl
->hwsp_ggtt
= i915_vma_get(vma
);
471 vaddr
= page_mask_bits(cl
->vaddr
);
472 tl
->hwsp_offset
= cacheline
* CACHELINE_BYTES
;
474 memset(vaddr
+ tl
->hwsp_offset
, 0, CACHELINE_BYTES
);
476 tl
->hwsp_offset
+= i915_ggtt_offset(vma
);
478 cacheline_acquire(cl
);
479 tl
->hwsp_cacheline
= cl
;
481 *seqno
= timeline_advance(tl
);
482 GEM_BUG_ON(i915_seqno_passed(*tl
->hwsp_seqno
, *seqno
));
490 timeline_rollback(tl
);
494 int intel_timeline_get_seqno(struct intel_timeline
*tl
,
495 struct i915_request
*rq
,
498 *seqno
= timeline_advance(tl
);
500 /* Replace the HWSP on wraparound for HW semaphores */
501 if (unlikely(!*seqno
&& tl
->hwsp_cacheline
))
502 return __intel_timeline_get_seqno(tl
, rq
, seqno
);
507 static int cacheline_ref(struct intel_timeline_cacheline
*cl
,
508 struct i915_request
*rq
)
510 return i915_active_add_request(&cl
->active
, rq
);
513 int intel_timeline_read_hwsp(struct i915_request
*from
,
514 struct i915_request
*to
,
517 struct intel_timeline_cacheline
*cl
;
520 GEM_BUG_ON(!rcu_access_pointer(from
->hwsp_cacheline
));
523 cl
= rcu_dereference(from
->hwsp_cacheline
);
524 if (i915_request_completed(from
)) /* confirm cacheline is valid */
526 if (unlikely(!i915_active_acquire_if_busy(&cl
->active
)))
527 goto unlock
; /* seqno wrapped and completed! */
528 if (unlikely(i915_request_completed(from
)))
532 err
= cacheline_ref(cl
, to
);
536 *hwsp
= i915_ggtt_offset(cl
->hwsp
->vma
) +
537 ptr_unmask_bits(cl
->vaddr
, CACHELINE_BITS
) * CACHELINE_BYTES
;
540 i915_active_release(&cl
->active
);
544 i915_active_release(&cl
->active
);
550 void intel_timeline_unpin(struct intel_timeline
*tl
)
552 GEM_BUG_ON(!atomic_read(&tl
->pin_count
));
553 if (!atomic_dec_and_test(&tl
->pin_count
))
556 cacheline_release(tl
->hwsp_cacheline
);
558 __i915_vma_unpin(tl
->hwsp_ggtt
);
561 void __intel_timeline_free(struct kref
*kref
)
563 struct intel_timeline
*timeline
=
564 container_of(kref
, typeof(*timeline
), kref
);
566 intel_timeline_fini(timeline
);
567 kfree_rcu(timeline
, rcu
);
570 void intel_gt_fini_timelines(struct intel_gt
*gt
)
572 struct intel_gt_timelines
*timelines
= >
->timelines
;
574 GEM_BUG_ON(!list_empty(&timelines
->active_list
));
575 GEM_BUG_ON(!list_empty(&timelines
->hwsp_free_list
));
578 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
579 #include "gt/selftests/mock_timeline.c"
580 #include "gt/selftest_timeline.c"