]>
Commit | Line | Data |
---|---|---|
3ea58029 | 1 | // SPDX-License-Identifier: MIT |
bac427f8 AD |
2 | /* |
3 | * Copyright © 2014 Intel Corporation | |
bac427f8 | 4 | */ |
bac427f8 | 5 | |
9f436c46 | 6 | #include <linux/circ_buf.h> |
31de7350 | 7 | |
10be98a7 | 8 | #include "gem/i915_gem_context.h" |
0f261b24 DCS |
9 | #include "gt/intel_context.h" |
10 | #include "gt/intel_engine_pm.h" | |
84b1ca2f | 11 | #include "gt/intel_gt.h" |
c7302f20 | 12 | #include "gt/intel_gt_pm.h" |
0f261b24 | 13 | #include "gt/intel_lrc_reg.h" |
2871ea85 CW |
14 | #include "gt/intel_ring.h" |
15 | ||
a2695744 | 16 | #include "intel_guc_submission.h" |
0f261b24 | 17 | |
9f436c46 | 18 | #include "i915_drv.h" |
a09d9a80 | 19 | #include "i915_trace.h" |
9f436c46 | 20 | |
44a28b1d | 21 | /** |
feda33ef | 22 | * DOC: GuC-based command submission |
44a28b1d | 23 | * |
218151e9 DCS |
24 | * IMPORTANT NOTE: GuC submission is currently not supported in i915. The GuC |
25 | * firmware is moving to an updated submission interface and we plan to | |
26 | * turn submission back on when that lands. The below documentation (and related | |
27 | * code) matches the old submission model and will be updated as part of the | |
28 | * upgrade to the new flow. | |
29 | * | |
b09935a6 | 30 | * GuC stage descriptor: |
0d768126 | 31 | * During initialization, the driver allocates a static pool of 1024 such |
3c9abe88 DCS |
32 | * descriptors, and shares them with the GuC. Currently, we only use one |
33 | * descriptor. This stage descriptor lets the GuC know about the workqueue and | |
e9362e13 DCS |
34 | * process descriptor. Theoretically, it also lets the GuC know about our HW |
35 | * contexts (context ID, etc...), but we actually employ a kind of submission | |
3c9abe88 | 36 | * where the GuC uses the LRCA sent via the work item instead. This is called |
e9362e13 | 37 | * a "proxy" submission. |
44a28b1d DG |
38 | * |
39 | * The Scratch registers: | |
40 | * There are 16 MMIO-based registers start from 0xC180. The kernel driver writes | |
41 | * a value to the action register (SOFT_SCRATCH_0) along with any data. It then | |
42 | * triggers an interrupt on the GuC via another register write (0xC4C8). | |
43 | * Firmware writes a success/fail code back to the action register after | |
44 | * processes the request. The kernel driver polls waiting for this update and | |
45 | * then proceeds. | |
44a28b1d | 46 | * |
44a28b1d DG |
47 | * Work Items: |
48 | * There are several types of work items that the host may place into a | |
49 | * workqueue, each with its own requirements and limitations. Currently only | |
50 | * WQ_TYPE_INORDER is needed to support legacy submission via GuC, which | |
51 | * represents in-order queue. The kernel driver packs ring tail pointer and an | |
52 | * ELSP context descriptor dword into Work Item. | |
a0991e1d | 53 | * See guc_add_request() |
44a28b1d DG |
54 | * |
55 | */ | |
56 | ||
f6322edd CW |
57 | static inline struct i915_priolist *to_priolist(struct rb_node *rb) |
58 | { | |
59 | return rb_entry(rb, struct i915_priolist, node); | |
60 | } | |
61 | ||
3c9abe88 | 62 | static struct guc_stage_desc *__get_stage_desc(struct intel_guc *guc, u32 id) |
abddffdf | 63 | { |
3c9abe88 DCS |
64 | struct guc_stage_desc *base = guc->stage_desc_pool_vaddr; |
65 | ||
66 | return &base[id]; | |
abddffdf JL |
67 | } |
68 | ||
3c9abe88 | 69 | static int guc_workqueue_create(struct intel_guc *guc) |
73b05534 | 70 | { |
3c9abe88 DCS |
71 | return intel_guc_allocate_and_map_vma(guc, GUC_WQ_SIZE, &guc->workqueue, |
72 | &guc->workqueue_vaddr); | |
73b05534 OM |
73 | } |
74 | ||
3c9abe88 | 75 | static void guc_workqueue_destroy(struct intel_guc *guc) |
bd00e73e | 76 | { |
3c9abe88 | 77 | i915_vma_unpin_and_release(&guc->workqueue, I915_VMA_RELEASE_MAP); |
bd00e73e CW |
78 | } |
79 | ||
44a28b1d DG |
80 | /* |
81 | * Initialise the process descriptor shared with the GuC firmware. | |
82 | */ | |
3c9abe88 DCS |
83 | static int guc_proc_desc_create(struct intel_guc *guc) |
84 | { | |
85 | const u32 size = PAGE_ALIGN(sizeof(struct guc_process_desc)); | |
86 | ||
87 | return intel_guc_allocate_and_map_vma(guc, size, &guc->proc_desc, | |
88 | &guc->proc_desc_vaddr); | |
89 | } | |
90 | ||
91 | static void guc_proc_desc_destroy(struct intel_guc *guc) | |
92 | { | |
93 | i915_vma_unpin_and_release(&guc->proc_desc, I915_VMA_RELEASE_MAP); | |
94 | } | |
95 | ||
96 | static void guc_proc_desc_init(struct intel_guc *guc) | |
44a28b1d DG |
97 | { |
98 | struct guc_process_desc *desc; | |
44a28b1d | 99 | |
3c9abe88 | 100 | desc = memset(guc->proc_desc_vaddr, 0, sizeof(*desc)); |
44a28b1d DG |
101 | |
102 | /* | |
103 | * XXX: pDoorbell and WQVBaseAddress are pointers in process address | |
104 | * space for ring3 clients (set them as in mmap_ioctl) or kernel | |
105 | * space for kernel clients (map on demand instead? May make debug | |
106 | * easier to have it mapped). | |
107 | */ | |
108 | desc->wq_base_addr = 0; | |
109 | desc->db_base_addr = 0; | |
110 | ||
a529a1c9 | 111 | desc->wq_size_bytes = GUC_WQ_SIZE; |
44a28b1d | 112 | desc->wq_status = WQ_STATUS_ACTIVE; |
3c9abe88 | 113 | desc->priority = GUC_CLIENT_PRIORITY_KMD_NORMAL; |
44a28b1d DG |
114 | } |
115 | ||
3c9abe88 | 116 | static void guc_proc_desc_fini(struct intel_guc *guc) |
48b426a9 | 117 | { |
3c9abe88 | 118 | memset(guc->proc_desc_vaddr, 0, sizeof(struct guc_process_desc)); |
48b426a9 DCS |
119 | } |
120 | ||
89922d01 MW |
121 | static int guc_stage_desc_pool_create(struct intel_guc *guc) |
122 | { | |
18c094b3 DCS |
123 | u32 size = PAGE_ALIGN(sizeof(struct guc_stage_desc) * |
124 | GUC_MAX_STAGE_DESCRIPTORS); | |
89922d01 | 125 | |
3c9abe88 DCS |
126 | return intel_guc_allocate_and_map_vma(guc, size, &guc->stage_desc_pool, |
127 | &guc->stage_desc_pool_vaddr); | |
89922d01 MW |
128 | } |
129 | ||
130 | static void guc_stage_desc_pool_destroy(struct intel_guc *guc) | |
131 | { | |
6a2f59e4 | 132 | i915_vma_unpin_and_release(&guc->stage_desc_pool, I915_VMA_RELEASE_MAP); |
89922d01 MW |
133 | } |
134 | ||
44a28b1d | 135 | /* |
b09935a6 | 136 | * Initialise/clear the stage descriptor shared with the GuC firmware. |
44a28b1d DG |
137 | * |
138 | * This descriptor tells the GuC where (in GGTT space) to find the important | |
3c9abe88 | 139 | * data structures related to work submission (process descriptor, write queue, |
e9362e13 | 140 | * etc). |
44a28b1d | 141 | */ |
3c9abe88 | 142 | static void guc_stage_desc_init(struct intel_guc *guc) |
44a28b1d | 143 | { |
b09935a6 | 144 | struct guc_stage_desc *desc; |
44a28b1d | 145 | |
3c9abe88 DCS |
146 | /* we only use 1 stage desc, so hardcode it to 0 */ |
147 | desc = __get_stage_desc(guc, 0); | |
73b05534 | 148 | memset(desc, 0, sizeof(*desc)); |
44a28b1d | 149 | |
a2695744 SAK |
150 | desc->attribute = GUC_STAGE_DESC_ATTR_ACTIVE | |
151 | GUC_STAGE_DESC_ATTR_KERNEL; | |
44a28b1d | 152 | |
3c9abe88 DCS |
153 | desc->stage_id = 0; |
154 | desc->priority = GUC_CLIENT_PRIORITY_KMD_NORMAL; | |
44a28b1d | 155 | |
3c9abe88 DCS |
156 | desc->process_desc = intel_guc_ggtt_offset(guc, guc->proc_desc); |
157 | desc->wq_addr = intel_guc_ggtt_offset(guc, guc->workqueue); | |
158 | desc->wq_size = GUC_WQ_SIZE; | |
44a28b1d DG |
159 | } |
160 | ||
3c9abe88 | 161 | static void guc_stage_desc_fini(struct intel_guc *guc) |
44a28b1d | 162 | { |
b09935a6 | 163 | struct guc_stage_desc *desc; |
44a28b1d | 164 | |
3c9abe88 | 165 | desc = __get_stage_desc(guc, 0); |
73b05534 | 166 | memset(desc, 0, sizeof(*desc)); |
44a28b1d DG |
167 | } |
168 | ||
7a9347f9 | 169 | /* Construct a Work Item and append it to the GuC's Work Queue */ |
3c9abe88 | 170 | static void guc_wq_item_append(struct intel_guc *guc, |
a0991e1d MW |
171 | u32 target_engine, u32 context_desc, |
172 | u32 ring_tail, u32 fence_id) | |
44a28b1d | 173 | { |
0a31afbc DG |
174 | /* wqi_len is in DWords, and does not include the one-word header */ |
175 | const size_t wqi_size = sizeof(struct guc_wq_item); | |
ada8c413 | 176 | const u32 wqi_len = wqi_size / sizeof(u32) - 1; |
3c9abe88 | 177 | struct guc_process_desc *desc = guc->proc_desc_vaddr; |
44a28b1d | 178 | struct guc_wq_item *wqi; |
a0991e1d | 179 | u32 wq_off; |
a7e02199 | 180 | |
3c9abe88 | 181 | lockdep_assert_held(&guc->wq_lock); |
0a31afbc | 182 | |
44a28b1d DG |
183 | /* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we |
184 | * should not have the case where structure wqi is across page, neither | |
185 | * wrapped to the beginning. This simplifies the implementation below. | |
186 | * | |
187 | * XXX: if not the case, we need save data to a temp wqi and copy it to | |
188 | * workqueue buffer dw by dw. | |
189 | */ | |
0a31afbc | 190 | BUILD_BUG_ON(wqi_size != 16); |
44a28b1d | 191 | |
5382bed3 DCS |
192 | /* We expect the WQ to be active if we're appending items to it */ |
193 | GEM_BUG_ON(desc->wq_status != WQ_STATUS_ACTIVE); | |
194 | ||
a529a1c9 MW |
195 | /* Free space is guaranteed. */ |
196 | wq_off = READ_ONCE(desc->tail); | |
197 | GEM_BUG_ON(CIRC_SPACE(wq_off, READ_ONCE(desc->head), | |
198 | GUC_WQ_SIZE) < wqi_size); | |
dadd481b | 199 | GEM_BUG_ON(wq_off & (wqi_size - 1)); |
0a31afbc | 200 | |
3c9abe88 | 201 | wqi = guc->workqueue_vaddr + wq_off; |
e9362e13 DCS |
202 | |
203 | /* Now fill in the 4-word work queue item */ | |
204 | wqi->header = WQ_TYPE_INORDER | | |
205 | (wqi_len << WQ_LEN_SHIFT) | | |
206 | (target_engine << WQ_TARGET_SHIFT) | | |
207 | WQ_NO_WCFLUSH_WAIT; | |
208 | wqi->context_desc = context_desc; | |
209 | wqi->submit_element_info = ring_tail << WQ_RING_TAIL_SHIFT; | |
210 | GEM_BUG_ON(ring_tail > WQ_RING_TAIL_MAX); | |
211 | wqi->fence_id = fence_id; | |
a529a1c9 | 212 | |
a0991e1d | 213 | /* Make the update visible to GuC */ |
a529a1c9 | 214 | WRITE_ONCE(desc->tail, (wq_off + wqi_size) & (GUC_WQ_SIZE - 1)); |
44a28b1d DG |
215 | } |
216 | ||
e61e0f51 | 217 | static void guc_add_request(struct intel_guc *guc, struct i915_request *rq) |
a0991e1d | 218 | { |
a0991e1d | 219 | struct intel_engine_cs *engine = rq->engine; |
53b2622e | 220 | u32 ctx_desc = rq->context->lrc.ccid; |
a0991e1d MW |
221 | u32 ring_tail = intel_ring_set_tail(rq->ring, rq->tail) / sizeof(u64); |
222 | ||
3c9abe88 | 223 | guc_wq_item_append(guc, engine->guc_id, ctx_desc, |
b300fde8 | 224 | ring_tail, rq->fence.seqno); |
a0991e1d MW |
225 | } |
226 | ||
c41937fd MW |
227 | /* |
228 | * When we're doing submissions using regular execlists backend, writing to | |
229 | * ELSP from CPU side is enough to make sure that writes to ringbuffer pages | |
230 | * pinned in mappable aperture portion of GGTT are visible to command streamer. | |
231 | * Writes done by GuC on our behalf are not guaranteeing such ordering, | |
232 | * therefore, to ensure the flush, we're issuing a POSTING READ. | |
233 | */ | |
234 | static void flush_ggtt_writes(struct i915_vma *vma) | |
235 | { | |
c41937fd | 236 | if (i915_vma_is_map_and_fenceable(vma)) |
a22198a9 MB |
237 | intel_uncore_posting_read_fw(vma->vm->gt->uncore, |
238 | GUC_STATUS); | |
c41937fd MW |
239 | } |
240 | ||
22b7a426 CW |
241 | static void guc_submit(struct intel_engine_cs *engine, |
242 | struct i915_request **out, | |
243 | struct i915_request **end) | |
44a28b1d | 244 | { |
8b5689d7 | 245 | struct intel_guc *guc = &engine->gt->uc.guc; |
44a28b1d | 246 | |
3c9abe88 | 247 | spin_lock(&guc->wq_lock); |
ed4596ea | 248 | |
22b7a426 CW |
249 | do { |
250 | struct i915_request *rq = *out++; | |
0c33518d | 251 | |
22b7a426 CW |
252 | flush_ggtt_writes(rq->ring->vma); |
253 | guc_add_request(guc, rq); | |
254 | } while (out != end); | |
44a28b1d | 255 | |
3c9abe88 | 256 | spin_unlock(&guc->wq_lock); |
34ba5a80 CW |
257 | } |
258 | ||
22b7a426 | 259 | static inline int rq_prio(const struct i915_request *rq) |
77f0d0e9 | 260 | { |
22b7a426 | 261 | return rq->sched.attr.priority | __NO_PREEMPTION; |
77f0d0e9 CW |
262 | } |
263 | ||
22b7a426 | 264 | static struct i915_request *schedule_in(struct i915_request *rq, int idx) |
2a694feb | 265 | { |
22b7a426 CW |
266 | trace_i915_request_in(rq, idx); |
267 | ||
5f15c1e6 CW |
268 | /* |
269 | * Currently we are not tracking the rq->context being inflight | |
270 | * (ce->inflight = rq->engine). It is only used by the execlists | |
271 | * backend at the moment, a similar counting strategy would be | |
272 | * required if we generalise the inflight tracking. | |
273 | */ | |
22b7a426 | 274 | |
93b0e8fe | 275 | __intel_gt_pm_get(rq->engine->gt); |
22b7a426 | 276 | return i915_request_get(rq); |
2a694feb CW |
277 | } |
278 | ||
22b7a426 | 279 | static void schedule_out(struct i915_request *rq) |
2a694feb | 280 | { |
22b7a426 CW |
281 | trace_i915_request_out(rq); |
282 | ||
e18417b4 | 283 | intel_gt_pm_put_async(rq->engine->gt); |
22b7a426 | 284 | i915_request_put(rq); |
2a694feb CW |
285 | } |
286 | ||
22b7a426 | 287 | static void __guc_dequeue(struct intel_engine_cs *engine) |
31de7350 | 288 | { |
b620e870 | 289 | struct intel_engine_execlists * const execlists = &engine->execlists; |
22b7a426 CW |
290 | struct i915_request **first = execlists->inflight; |
291 | struct i915_request ** const last_port = first + execlists->port_mask; | |
292 | struct i915_request *last = first[0]; | |
293 | struct i915_request **port; | |
31de7350 | 294 | bool submit = false; |
85e2fe67 MW |
295 | struct rb_node *rb; |
296 | ||
422d7df4 | 297 | lockdep_assert_held(&engine->active.lock); |
6486d84b | 298 | |
22b7a426 | 299 | if (last) { |
22b7a426 CW |
300 | if (*++first) |
301 | return; | |
302 | ||
303 | last = NULL; | |
c41937fd MW |
304 | } |
305 | ||
5f15c1e6 CW |
306 | /* |
307 | * We write directly into the execlists->inflight queue and don't use | |
308 | * the execlists->pending queue, as we don't have a distinct switch | |
309 | * event. | |
310 | */ | |
22b7a426 | 311 | port = first; |
655250a8 | 312 | while ((rb = rb_first_cached(&execlists->queue))) { |
f6322edd | 313 | struct i915_priolist *p = to_priolist(rb); |
e61e0f51 | 314 | struct i915_request *rq, *rn; |
85f5e1f3 | 315 | int i; |
6c067579 | 316 | |
85f5e1f3 | 317 | priolist_for_each_request_consume(rq, rn, p, i) { |
9f3ccd40 | 318 | if (last && rq->context != last->context) { |
85f5e1f3 | 319 | if (port == last_port) |
6c067579 | 320 | goto done; |
6c067579 | 321 | |
22b7a426 CW |
322 | *port = schedule_in(last, |
323 | port - execlists->inflight); | |
6c067579 CW |
324 | port++; |
325 | } | |
326 | ||
85f5e1f3 | 327 | list_del_init(&rq->sched.link); |
e61e0f51 | 328 | __i915_request_submit(rq); |
6c067579 | 329 | submit = true; |
22b7a426 | 330 | last = rq; |
31de7350 CW |
331 | } |
332 | ||
655250a8 | 333 | rb_erase_cached(&p->node, &execlists->queue); |
32eb6bcf | 334 | i915_priolist_free(p); |
f6322edd | 335 | } |
6c067579 | 336 | done: |
4d97cbe0 CW |
337 | execlists->queue_priority_hint = |
338 | rb ? to_priolist(rb)->priority : INT_MIN; | |
22b7a426 CW |
339 | if (submit) { |
340 | *port = schedule_in(last, port - execlists->inflight); | |
341 | *++port = NULL; | |
342 | guc_submit(engine, first, port); | |
343 | } | |
344 | execlists->active = execlists->inflight; | |
31de7350 CW |
345 | } |
346 | ||
c6dce8f1 | 347 | static void guc_submission_tasklet(unsigned long data) |
31de7350 | 348 | { |
b620e870 | 349 | struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; |
7a62cc61 | 350 | struct intel_engine_execlists * const execlists = &engine->execlists; |
22b7a426 | 351 | struct i915_request **port, *rq; |
a2bf92e8 CW |
352 | unsigned long flags; |
353 | ||
422d7df4 | 354 | spin_lock_irqsave(&engine->active.lock, flags); |
31de7350 | 355 | |
22b7a426 CW |
356 | for (port = execlists->inflight; (rq = *port); port++) { |
357 | if (!i915_request_completed(rq)) | |
358 | break; | |
77f0d0e9 | 359 | |
22b7a426 CW |
360 | schedule_out(rq); |
361 | } | |
362 | if (port != execlists->inflight) { | |
363 | int idx = port - execlists->inflight; | |
364 | int rem = ARRAY_SIZE(execlists->inflight) - idx; | |
365 | memmove(execlists->inflight, port, rem * sizeof(*port)); | |
85e2fe67 | 366 | } |
31de7350 | 367 | |
71b0846c | 368 | __guc_dequeue(engine); |
a2bf92e8 | 369 | |
422d7df4 | 370 | spin_unlock_irqrestore(&engine->active.lock, flags); |
31de7350 CW |
371 | } |
372 | ||
eb8d0f5a | 373 | static void guc_reset_prepare(struct intel_engine_cs *engine) |
1329115c CW |
374 | { |
375 | struct intel_engine_execlists * const execlists = &engine->execlists; | |
376 | ||
639f2f24 | 377 | ENGINE_TRACE(engine, "\n"); |
1329115c CW |
378 | |
379 | /* | |
380 | * Prevent request submission to the hardware until we have | |
381 | * completed the reset in i915_gem_reset_finish(). If a request | |
382 | * is completed by one engine, it may then queue a request | |
383 | * to a second via its execlists->tasklet *just* as we are | |
384 | * calling engine->init_hw() and also writing the ELSP. | |
385 | * Turning off the execlists->tasklet until the reset is over | |
386 | * prevents the race. | |
387 | */ | |
388 | __tasklet_disable_sync_once(&execlists->tasklet); | |
1329115c CW |
389 | } |
390 | ||
5f15c1e6 CW |
391 | static void |
392 | cancel_port_requests(struct intel_engine_execlists * const execlists) | |
393 | { | |
394 | struct i915_request * const *port, *rq; | |
395 | ||
396 | /* Note we are only using the inflight and not the pending queue */ | |
397 | ||
398 | for (port = execlists->active; (rq = *port); port++) | |
399 | schedule_out(rq); | |
400 | execlists->active = | |
401 | memset(execlists->inflight, 0, sizeof(execlists->inflight)); | |
402 | } | |
403 | ||
e26b6d43 | 404 | static void guc_reset_rewind(struct intel_engine_cs *engine, bool stalled) |
292ad25c CW |
405 | { |
406 | struct intel_engine_execlists * const execlists = &engine->execlists; | |
407 | struct i915_request *rq; | |
408 | unsigned long flags; | |
409 | ||
422d7df4 | 410 | spin_lock_irqsave(&engine->active.lock, flags); |
292ad25c | 411 | |
5f15c1e6 | 412 | cancel_port_requests(execlists); |
292ad25c CW |
413 | |
414 | /* Push back any incomplete requests for replay after the reset. */ | |
415 | rq = execlists_unwind_incomplete_requests(execlists); | |
416 | if (!rq) | |
417 | goto out_unlock; | |
418 | ||
419 | if (!i915_request_started(rq)) | |
420 | stalled = false; | |
421 | ||
cb823ed9 | 422 | __i915_request_reset(rq, stalled); |
9f3ccd40 | 423 | intel_lr_context_reset(engine, rq->context, rq->head, stalled); |
292ad25c CW |
424 | |
425 | out_unlock: | |
422d7df4 | 426 | spin_unlock_irqrestore(&engine->active.lock, flags); |
292ad25c CW |
427 | } |
428 | ||
e26b6d43 | 429 | static void guc_reset_cancel(struct intel_engine_cs *engine) |
292ad25c CW |
430 | { |
431 | struct intel_engine_execlists * const execlists = &engine->execlists; | |
432 | struct i915_request *rq, *rn; | |
433 | struct rb_node *rb; | |
434 | unsigned long flags; | |
435 | ||
639f2f24 | 436 | ENGINE_TRACE(engine, "\n"); |
292ad25c CW |
437 | |
438 | /* | |
439 | * Before we call engine->cancel_requests(), we should have exclusive | |
440 | * access to the submission state. This is arranged for us by the | |
441 | * caller disabling the interrupt generation, the tasklet and other | |
442 | * threads that may then access the same state, giving us a free hand | |
443 | * to reset state. However, we still need to let lockdep be aware that | |
444 | * we know this state may be accessed in hardirq context, so we | |
445 | * disable the irq around this manipulation and we want to keep | |
446 | * the spinlock focused on its duties and not accidentally conflate | |
447 | * coverage to the submission's irq state. (Similarly, although we | |
448 | * shouldn't need to disable irq around the manipulation of the | |
449 | * submission's irq state, we also wish to remind ourselves that | |
450 | * it is irq state.) | |
451 | */ | |
422d7df4 | 452 | spin_lock_irqsave(&engine->active.lock, flags); |
292ad25c CW |
453 | |
454 | /* Cancel the requests on the HW and clear the ELSP tracker. */ | |
5f15c1e6 | 455 | cancel_port_requests(execlists); |
292ad25c CW |
456 | |
457 | /* Mark all executing requests as skipped. */ | |
422d7df4 | 458 | list_for_each_entry(rq, &engine->active.requests, sched.link) { |
36e191f0 | 459 | i915_request_set_error_once(rq, -EIO); |
292ad25c CW |
460 | i915_request_mark_complete(rq); |
461 | } | |
462 | ||
463 | /* Flush the queued requests to the timeline list (for retiring). */ | |
464 | while ((rb = rb_first_cached(&execlists->queue))) { | |
465 | struct i915_priolist *p = to_priolist(rb); | |
466 | int i; | |
467 | ||
468 | priolist_for_each_request_consume(rq, rn, p, i) { | |
469 | list_del_init(&rq->sched.link); | |
470 | __i915_request_submit(rq); | |
471 | dma_fence_set_error(&rq->fence, -EIO); | |
472 | i915_request_mark_complete(rq); | |
473 | } | |
474 | ||
475 | rb_erase_cached(&p->node, &execlists->queue); | |
476 | i915_priolist_free(p); | |
477 | } | |
478 | ||
479 | /* Remaining _unready_ requests will be nop'ed when submitted */ | |
480 | ||
481 | execlists->queue_priority_hint = INT_MIN; | |
482 | execlists->queue = RB_ROOT_CACHED; | |
292ad25c | 483 | |
422d7df4 | 484 | spin_unlock_irqrestore(&engine->active.lock, flags); |
292ad25c CW |
485 | } |
486 | ||
487 | static void guc_reset_finish(struct intel_engine_cs *engine) | |
488 | { | |
489 | struct intel_engine_execlists * const execlists = &engine->execlists; | |
490 | ||
491 | if (__tasklet_enable(&execlists->tasklet)) | |
492 | /* And kick in case we missed a new request submission. */ | |
493 | tasklet_hi_schedule(&execlists->tasklet); | |
494 | ||
639f2f24 VSD |
495 | ENGINE_TRACE(engine, "depth->%d\n", |
496 | atomic_read(&execlists->tasklet.count)); | |
292ad25c CW |
497 | } |
498 | ||
44a28b1d DG |
499 | /* |
500 | * Everything below here is concerned with setup & teardown, and is | |
501 | * therefore not part of the somewhat time-critical batch-submission | |
6fa1f6fb | 502 | * path of guc_submit() above. |
44a28b1d DG |
503 | */ |
504 | ||
bac427f8 | 505 | /* |
397fce88 OM |
506 | * Set up the memory resources to be shared with the GuC (via the GGTT) |
507 | * at firmware loading time. | |
bac427f8 | 508 | */ |
db14d0c5 | 509 | int intel_guc_submission_init(struct intel_guc *guc) |
bac427f8 | 510 | { |
3950bf3d | 511 | int ret; |
bac427f8 | 512 | |
b09935a6 | 513 | if (guc->stage_desc_pool) |
3950bf3d | 514 | return 0; |
bac427f8 | 515 | |
89922d01 MW |
516 | ret = guc_stage_desc_pool_create(guc); |
517 | if (ret) | |
518 | return ret; | |
856efd21 CW |
519 | /* |
520 | * Keep static analysers happy, let them know that we allocated the | |
521 | * vma after testing that it didn't exist earlier. | |
522 | */ | |
523 | GEM_BUG_ON(!guc->stage_desc_pool); | |
73b05534 | 524 | |
3c9abe88 | 525 | ret = guc_workqueue_create(guc); |
8ec52ec8 | 526 | if (ret) |
70be8b3d | 527 | goto err_pool; |
8ec52ec8 | 528 | |
3c9abe88 DCS |
529 | ret = guc_proc_desc_create(guc); |
530 | if (ret) | |
531 | goto err_workqueue; | |
532 | ||
533 | spin_lock_init(&guc->wq_lock); | |
534 | ||
bac427f8 | 535 | return 0; |
4d357af4 | 536 | |
3c9abe88 DCS |
537 | err_workqueue: |
538 | guc_workqueue_destroy(guc); | |
70be8b3d MW |
539 | err_pool: |
540 | guc_stage_desc_pool_destroy(guc); | |
541 | return ret; | |
3950bf3d OM |
542 | } |
543 | ||
db14d0c5 | 544 | void intel_guc_submission_fini(struct intel_guc *guc) |
3950bf3d | 545 | { |
3c9abe88 DCS |
546 | if (guc->stage_desc_pool) { |
547 | guc_proc_desc_destroy(guc); | |
548 | guc_workqueue_destroy(guc); | |
6710fcfc | 549 | guc_stage_desc_pool_destroy(guc); |
3c9abe88 | 550 | } |
4d357af4 CW |
551 | } |
552 | ||
84b1ca2f | 553 | static void guc_interrupts_capture(struct intel_gt *gt) |
cbf4b77a | 554 | { |
84b1ca2f | 555 | struct intel_uncore *uncore = gt->uncore; |
1cdc2330 DCS |
556 | u32 irqs = GT_CONTEXT_SWITCH_INTERRUPT; |
557 | u32 dmask = irqs << 16 | irqs; | |
cbf4b77a | 558 | |
1cdc2330 | 559 | GEM_BUG_ON(INTEL_GEN(gt->i915) < 11); |
1f3b1fd3 | 560 | |
1cdc2330 DCS |
561 | /* Don't handle the ctx switch interrupt in GuC submission mode */ |
562 | intel_uncore_rmw(uncore, GEN11_RENDER_COPY_INTR_ENABLE, dmask, 0); | |
563 | intel_uncore_rmw(uncore, GEN11_VCS_VECS_INTR_ENABLE, dmask, 0); | |
cbf4b77a TU |
564 | } |
565 | ||
84b1ca2f | 566 | static void guc_interrupts_release(struct intel_gt *gt) |
618ef005 | 567 | { |
84b1ca2f | 568 | struct intel_uncore *uncore = gt->uncore; |
1cdc2330 DCS |
569 | u32 irqs = GT_CONTEXT_SWITCH_INTERRUPT; |
570 | u32 dmask = irqs << 16 | irqs; | |
618ef005 | 571 | |
1cdc2330 DCS |
572 | GEM_BUG_ON(INTEL_GEN(gt->i915) < 11); |
573 | ||
574 | /* Handle ctx switch interrupts again */ | |
575 | intel_uncore_rmw(uncore, GEN11_RENDER_COPY_INTR_ENABLE, 0, dmask); | |
576 | intel_uncore_rmw(uncore, GEN11_VCS_VECS_INTR_ENABLE, 0, dmask); | |
618ef005 OM |
577 | } |
578 | ||
209b7955 CW |
579 | static void guc_set_default_submission(struct intel_engine_cs *engine) |
580 | { | |
581 | /* | |
582 | * We inherit a bunch of functions from execlists that we'd like | |
583 | * to keep using: | |
584 | * | |
585 | * engine->submit_request = execlists_submit_request; | |
586 | * engine->cancel_requests = execlists_cancel_requests; | |
587 | * engine->schedule = execlists_schedule; | |
588 | * | |
589 | * But we need to override the actual submission backend in order | |
590 | * to talk to the GuC. | |
591 | */ | |
592 | intel_execlists_set_default_submission(engine); | |
593 | ||
594 | engine->execlists.tasklet.func = guc_submission_tasklet; | |
595 | ||
ee94e0c4 DCS |
596 | /* do not use execlists park/unpark */ |
597 | engine->park = engine->unpark = NULL; | |
209b7955 CW |
598 | |
599 | engine->reset.prepare = guc_reset_prepare; | |
e26b6d43 CW |
600 | engine->reset.rewind = guc_reset_rewind; |
601 | engine->reset.cancel = guc_reset_cancel; | |
292ad25c CW |
602 | engine->reset.finish = guc_reset_finish; |
603 | ||
209b7955 | 604 | engine->flags &= ~I915_ENGINE_SUPPORTS_STATS; |
ee94e0c4 DCS |
605 | engine->flags |= I915_ENGINE_NEEDS_BREADCRUMB_TASKLET; |
606 | ||
607 | /* | |
608 | * For the breadcrumb irq to work we need the interrupts to stay | |
609 | * enabled. However, on all platforms on which we'll have support for | |
610 | * GuC submission we don't allow disabling the interrupts at runtime, so | |
611 | * we're always safe with the current flow. | |
612 | */ | |
613 | GEM_BUG_ON(engine->irq_enable || engine->irq_disable); | |
209b7955 CW |
614 | } |
615 | ||
e9362e13 | 616 | void intel_guc_submission_enable(struct intel_guc *guc) |
44a28b1d | 617 | { |
84b1ca2f | 618 | struct intel_gt *gt = guc_to_gt(guc); |
ddd66c51 | 619 | struct intel_engine_cs *engine; |
3b3f1650 | 620 | enum intel_engine_id id; |
5d1ef2b4 | 621 | |
85e2fe67 MW |
622 | /* |
623 | * We're using GuC work items for submitting work through GuC. Since | |
624 | * we're coalescing multiple requests from a single context into a | |
625 | * single work item prior to assigning it to execlist_port, we can | |
626 | * never have more work items than the total number of ports (for all | |
627 | * engines). The GuC firmware is controlling the HEAD of work queue, | |
628 | * and it is guaranteed that it will remove the work item from the | |
629 | * queue before our request is completed. | |
630 | */ | |
22b7a426 | 631 | BUILD_BUG_ON(ARRAY_SIZE(engine->execlists.inflight) * |
85e2fe67 MW |
632 | sizeof(struct guc_wq_item) * |
633 | I915_NUM_ENGINES > GUC_WQ_SIZE); | |
634 | ||
3c9abe88 DCS |
635 | guc_proc_desc_init(guc); |
636 | guc_stage_desc_init(guc); | |
f5d3c3ea | 637 | |
ddd66c51 | 638 | /* Take over from manual control of ELSP (execlists) */ |
84b1ca2f | 639 | guc_interrupts_capture(gt); |
cbf4b77a | 640 | |
5d904e3c | 641 | for_each_engine(engine, gt, id) { |
209b7955 CW |
642 | engine->set_default_submission = guc_set_default_submission; |
643 | engine->set_default_submission(engine); | |
821ed7df | 644 | } |
44a28b1d DG |
645 | } |
646 | ||
db14d0c5 | 647 | void intel_guc_submission_disable(struct intel_guc *guc) |
44a28b1d | 648 | { |
84b1ca2f | 649 | struct intel_gt *gt = guc_to_gt(guc); |
44a28b1d | 650 | |
84b1ca2f | 651 | GEM_BUG_ON(gt->awake); /* GT should be parked first */ |
bcbd5c33 | 652 | |
3c9abe88 DCS |
653 | /* Note: By the time we're here, GuC may have already been reset */ |
654 | ||
84b1ca2f | 655 | guc_interrupts_release(gt); |
3c9abe88 DCS |
656 | |
657 | guc_stage_desc_fini(guc); | |
658 | guc_proc_desc_fini(guc); | |
44a28b1d | 659 | } |
55bd6bd7 | 660 | |
202c98e7 | 661 | static bool __guc_submission_selected(struct intel_guc *guc) |
724df646 | 662 | { |
202c98e7 | 663 | if (!intel_guc_submission_is_supported(guc)) |
724df646 MW |
664 | return false; |
665 | ||
666 | return i915_modparams.enable_guc & ENABLE_GUC_SUBMISSION; | |
667 | } | |
668 | ||
669 | void intel_guc_submission_init_early(struct intel_guc *guc) | |
670 | { | |
202c98e7 | 671 | guc->submission_selected = __guc_submission_selected(guc); |
724df646 | 672 | } |
3c9abe88 DCS |
673 | |
674 | bool intel_engine_in_guc_submission_mode(const struct intel_engine_cs *engine) | |
675 | { | |
676 | return engine->set_default_submission == guc_set_default_submission; | |
677 | } |