]> git.ipfire.org Git - thirdparty/linux.git/blame - drivers/gpu/drm/i915/gt/intel_engine_cs.c
Merge tag 'io_uring-5.7-2020-05-22' of git://git.kernel.dk/linux-block
[thirdparty/linux.git] / drivers / gpu / drm / i915 / gt / intel_engine_cs.c
CommitLineData
88d2ba2e
TU
1/*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
f636edb2
CW
25#include <drm/drm_print.h>
26
10be98a7
CW
27#include "gem/i915_gem_context.h"
28
88d2ba2e 29#include "i915_drv.h"
112ed2d3 30
4f88f874 31#include "intel_context.h"
112ed2d3 32#include "intel_engine.h"
79ffac85 33#include "intel_engine_pm.h"
b40d7378 34#include "intel_engine_pool.h"
750e76b4 35#include "intel_engine_user.h"
4f88f874
CW
36#include "intel_gt.h"
37#include "intel_gt_requests.h"
cd699527 38#include "intel_gt_pm.h"
88d2ba2e 39#include "intel_lrc.h"
112ed2d3 40#include "intel_reset.h"
2871ea85 41#include "intel_ring.h"
88d2ba2e 42
63ffbcda
JL
43/* Haswell does have the CXT_SIZE register however it does not appear to be
44 * valid. Now, docs explain in dwords what is in the context object. The full
45 * size is 70720 bytes, however, the power context and execlist context will
46 * never be saved (power context is stored elsewhere, and execlists don't work
47 * on HSW) - so the final size, including the extra state required for the
48 * Resource Streamer, is 66944 bytes, which rounds to 17 pages.
49 */
50#define HSW_CXT_TOTAL_SIZE (17 * PAGE_SIZE)
63ffbcda 51
7ab4adbd 52#define DEFAULT_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE)
63ffbcda
JL
53#define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE)
54#define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE)
3cf1934a 55#define GEN10_LR_CONTEXT_RENDER_SIZE (18 * PAGE_SIZE)
b86aa445 56#define GEN11_LR_CONTEXT_RENDER_SIZE (14 * PAGE_SIZE)
63ffbcda
JL
57
58#define GEN8_LR_CONTEXT_OTHER_SIZE ( 2 * PAGE_SIZE)
59
80b216b9 60#define MAX_MMIO_BASES 3
b8400f01 61struct engine_info {
237ae7c7 62 unsigned int hw_id;
0908180b
DCS
63 u8 class;
64 u8 instance;
80b216b9
DCS
65 /* mmio bases table *must* be sorted in reverse gen order */
66 struct engine_mmio_base {
67 u32 gen : 8;
68 u32 base : 24;
69 } mmio_bases[MAX_MMIO_BASES];
b8400f01
OM
70};
71
72static const struct engine_info intel_engines[] = {
8a68d464
CW
73 [RCS0] = {
74 .hw_id = RCS0_HW,
0908180b
DCS
75 .class = RENDER_CLASS,
76 .instance = 0,
80b216b9
DCS
77 .mmio_bases = {
78 { .gen = 1, .base = RENDER_RING_BASE }
79 },
88d2ba2e 80 },
8a68d464
CW
81 [BCS0] = {
82 .hw_id = BCS0_HW,
0908180b
DCS
83 .class = COPY_ENGINE_CLASS,
84 .instance = 0,
80b216b9
DCS
85 .mmio_bases = {
86 { .gen = 6, .base = BLT_RING_BASE }
87 },
88d2ba2e 88 },
8a68d464
CW
89 [VCS0] = {
90 .hw_id = VCS0_HW,
0908180b
DCS
91 .class = VIDEO_DECODE_CLASS,
92 .instance = 0,
80b216b9
DCS
93 .mmio_bases = {
94 { .gen = 11, .base = GEN11_BSD_RING_BASE },
95 { .gen = 6, .base = GEN6_BSD_RING_BASE },
96 { .gen = 4, .base = BSD_RING_BASE }
97 },
88d2ba2e 98 },
8a68d464
CW
99 [VCS1] = {
100 .hw_id = VCS1_HW,
0908180b
DCS
101 .class = VIDEO_DECODE_CLASS,
102 .instance = 1,
80b216b9
DCS
103 .mmio_bases = {
104 { .gen = 11, .base = GEN11_BSD2_RING_BASE },
105 { .gen = 8, .base = GEN8_BSD2_RING_BASE }
106 },
88d2ba2e 107 },
8a68d464
CW
108 [VCS2] = {
109 .hw_id = VCS2_HW,
5f79e7c6
OM
110 .class = VIDEO_DECODE_CLASS,
111 .instance = 2,
80b216b9
DCS
112 .mmio_bases = {
113 { .gen = 11, .base = GEN11_BSD3_RING_BASE }
114 },
5f79e7c6 115 },
8a68d464
CW
116 [VCS3] = {
117 .hw_id = VCS3_HW,
5f79e7c6
OM
118 .class = VIDEO_DECODE_CLASS,
119 .instance = 3,
80b216b9
DCS
120 .mmio_bases = {
121 { .gen = 11, .base = GEN11_BSD4_RING_BASE }
122 },
5f79e7c6 123 },
8a68d464
CW
124 [VECS0] = {
125 .hw_id = VECS0_HW,
0908180b
DCS
126 .class = VIDEO_ENHANCEMENT_CLASS,
127 .instance = 0,
80b216b9
DCS
128 .mmio_bases = {
129 { .gen = 11, .base = GEN11_VEBOX_RING_BASE },
130 { .gen = 7, .base = VEBOX_RING_BASE }
131 },
88d2ba2e 132 },
8a68d464
CW
133 [VECS1] = {
134 .hw_id = VECS1_HW,
5f79e7c6
OM
135 .class = VIDEO_ENHANCEMENT_CLASS,
136 .instance = 1,
80b216b9
DCS
137 .mmio_bases = {
138 { .gen = 11, .base = GEN11_VEBOX2_RING_BASE }
139 },
5f79e7c6 140 },
88d2ba2e
TU
141};
142
63ffbcda 143/**
ffd5ce22 144 * intel_engine_context_size() - return the size of the context for an engine
92c964ca 145 * @gt: the gt
63ffbcda
JL
146 * @class: engine class
147 *
148 * Each engine class may require a different amount of space for a context
149 * image.
150 *
151 * Return: size (in bytes) of an engine class specific context image
152 *
153 * Note: this size includes the HWSP, which is part of the context image
154 * in LRC mode, but does not include the "shared data page" used with
155 * GuC submission. The caller should account for this if using the GuC.
156 */
92c964ca 157u32 intel_engine_context_size(struct intel_gt *gt, u8 class)
63ffbcda 158{
92c964ca 159 struct intel_uncore *uncore = gt->uncore;
63ffbcda
JL
160 u32 cxt_size;
161
162 BUILD_BUG_ON(I915_GTT_PAGE_SIZE != PAGE_SIZE);
163
164 switch (class) {
165 case RENDER_CLASS:
92c964ca 166 switch (INTEL_GEN(gt->i915)) {
63ffbcda 167 default:
92c964ca 168 MISSING_CASE(INTEL_GEN(gt->i915));
7ab4adbd 169 return DEFAULT_LR_CONTEXT_RENDER_SIZE;
0aa5427a 170 case 12:
b86aa445
TU
171 case 11:
172 return GEN11_LR_CONTEXT_RENDER_SIZE;
f65f8417 173 case 10:
7fd0b1a2 174 return GEN10_LR_CONTEXT_RENDER_SIZE;
63ffbcda
JL
175 case 9:
176 return GEN9_LR_CONTEXT_RENDER_SIZE;
177 case 8:
fb5c551a 178 return GEN8_LR_CONTEXT_RENDER_SIZE;
63ffbcda 179 case 7:
92c964ca 180 if (IS_HASWELL(gt->i915))
63ffbcda
JL
181 return HSW_CXT_TOTAL_SIZE;
182
92c964ca 183 cxt_size = intel_uncore_read(uncore, GEN7_CXT_SIZE);
63ffbcda
JL
184 return round_up(GEN7_CXT_TOTAL_SIZE(cxt_size) * 64,
185 PAGE_SIZE);
186 case 6:
92c964ca 187 cxt_size = intel_uncore_read(uncore, CXT_SIZE);
63ffbcda
JL
188 return round_up(GEN6_CXT_TOTAL_SIZE(cxt_size) * 64,
189 PAGE_SIZE);
190 case 5:
9ce9bdb0 191 case 4:
1215d28e
CW
192 /*
193 * There is a discrepancy here between the size reported
194 * by the register and the size of the context layout
195 * in the docs. Both are described as authorative!
196 *
197 * The discrepancy is on the order of a few cachelines,
198 * but the total is under one page (4k), which is our
199 * minimum allocation anyway so it should all come
200 * out in the wash.
201 */
92c964ca 202 cxt_size = intel_uncore_read(uncore, CXT_SIZE) + 1;
ce016437
WK
203 drm_dbg(&gt->i915->drm,
204 "gen%d CXT_SIZE = %d bytes [0x%08x]\n",
205 INTEL_GEN(gt->i915), cxt_size * 64,
206 cxt_size - 1);
1215d28e 207 return round_up(cxt_size * 64, PAGE_SIZE);
63ffbcda
JL
208 case 3:
209 case 2:
210 /* For the special day when i810 gets merged. */
211 case 1:
212 return 0;
213 }
214 break;
215 default:
216 MISSING_CASE(class);
f0d759f0 217 /* fall through */
63ffbcda
JL
218 case VIDEO_DECODE_CLASS:
219 case VIDEO_ENHANCEMENT_CLASS:
220 case COPY_ENGINE_CLASS:
92c964ca 221 if (INTEL_GEN(gt->i915) < 8)
63ffbcda
JL
222 return 0;
223 return GEN8_LR_CONTEXT_OTHER_SIZE;
224 }
225}
226
80b216b9
DCS
227static u32 __engine_mmio_base(struct drm_i915_private *i915,
228 const struct engine_mmio_base *bases)
229{
230 int i;
231
232 for (i = 0; i < MAX_MMIO_BASES; i++)
233 if (INTEL_GEN(i915) >= bases[i].gen)
234 break;
235
236 GEM_BUG_ON(i == MAX_MMIO_BASES);
237 GEM_BUG_ON(!bases[i].base);
238
239 return bases[i].base;
240}
241
2edda80d 242static void __sprint_engine_name(struct intel_engine_cs *engine)
74419daa 243{
2edda80d
CW
244 /*
245 * Before we know what the uABI name for this engine will be,
246 * we still would like to keep track of this engine in the debug logs.
247 * We throw in a ' here as a reminder that this isn't its final name.
248 */
249 GEM_WARN_ON(snprintf(engine->name, sizeof(engine->name), "%s'%u",
250 intel_engine_class_repr(engine->class),
251 engine->instance) >= sizeof(engine->name));
74419daa
DCS
252}
253
060f2322
CW
254void intel_engine_set_hwsp_writemask(struct intel_engine_cs *engine, u32 mask)
255{
060f2322
CW
256 /*
257 * Though they added more rings on g4x/ilk, they did not add
258 * per-engine HWSTAM until gen6.
259 */
baba6e57 260 if (INTEL_GEN(engine->i915) < 6 && engine->class != RENDER_CLASS)
060f2322
CW
261 return;
262
baba6e57
DCS
263 if (INTEL_GEN(engine->i915) >= 3)
264 ENGINE_WRITE(engine, RING_HWSTAM, mask);
060f2322 265 else
baba6e57 266 ENGINE_WRITE16(engine, RING_HWSTAM, mask);
060f2322
CW
267}
268
269static void intel_engine_sanitize_mmio(struct intel_engine_cs *engine)
270{
271 /* Mask off all writes into the unknown HWSP */
272 intel_engine_set_hwsp_writemask(engine, ~0u);
273}
274
750e76b4 275static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
88d2ba2e
TU
276{
277 const struct engine_info *info = &intel_engines[id];
07bcfd12 278 struct drm_i915_private *i915 = gt->i915;
3b3f1650
AG
279 struct intel_engine_cs *engine;
280
ac52da6a
DCS
281 BUILD_BUG_ON(MAX_ENGINE_CLASS >= BIT(GEN11_ENGINE_CLASS_WIDTH));
282 BUILD_BUG_ON(MAX_ENGINE_INSTANCE >= BIT(GEN11_ENGINE_INSTANCE_WIDTH));
283
a50134b1
TU
284 if (GEM_DEBUG_WARN_ON(id >= ARRAY_SIZE(gt->engine)))
285 return -EINVAL;
286
bbb8a9d7 287 if (GEM_DEBUG_WARN_ON(info->class > MAX_ENGINE_CLASS))
b46a33e2
TU
288 return -EINVAL;
289
bbb8a9d7 290 if (GEM_DEBUG_WARN_ON(info->instance > MAX_ENGINE_INSTANCE))
b46a33e2
TU
291 return -EINVAL;
292
750e76b4 293 if (GEM_DEBUG_WARN_ON(gt->engine_class[info->class][info->instance]))
b46a33e2
TU
294 return -EINVAL;
295
3b3f1650
AG
296 engine = kzalloc(sizeof(*engine), GFP_KERNEL);
297 if (!engine)
298 return -ENOMEM;
88d2ba2e 299
8a68d464
CW
300 BUILD_BUG_ON(BITS_PER_TYPE(engine->mask) < I915_NUM_ENGINES);
301
88d2ba2e 302 engine->id = id;
a50134b1 303 engine->legacy_idx = INVALID_ENGINE;
8a68d464 304 engine->mask = BIT(id);
07bcfd12 305 engine->i915 = i915;
750e76b4
CW
306 engine->gt = gt;
307 engine->uncore = gt->uncore;
5ec2cf7e 308 engine->hw_id = engine->guc_id = info->hw_id;
07bcfd12 309 engine->mmio_base = __engine_mmio_base(i915, info->mmio_bases);
2edda80d 310
0908180b
DCS
311 engine->class = info->class;
312 engine->instance = info->instance;
2edda80d 313 __sprint_engine_name(engine);
88d2ba2e 314
058179e7
CW
315 engine->props.heartbeat_interval_ms =
316 CONFIG_DRM_I915_HEARTBEAT_INTERVAL;
062444bb
CW
317 engine->props.max_busywait_duration_ns =
318 CONFIG_DRM_I915_MAX_REQUEST_BUSYWAIT;
3a7a92ab
CW
319 engine->props.preempt_timeout_ms =
320 CONFIG_DRM_I915_PREEMPT_TIMEOUT;
a8c51ed2
CW
321 engine->props.stop_timeout_ms =
322 CONFIG_DRM_I915_STOP_TIMEOUT;
b79029b2
CW
323 engine->props.timeslice_duration_ms =
324 CONFIG_DRM_I915_TIMESLICE_DURATION;
a8c51ed2 325
07bcfd12
TU
326 /* Override to uninterruptible for OpenCL workloads. */
327 if (INTEL_GEN(i915) == 12 && engine->class == RENDER_CLASS)
328 engine->props.preempt_timeout_ms = 0;
329
92c964ca 330 engine->context_size = intel_engine_context_size(gt, engine->class);
63ffbcda
JL
331 if (WARN_ON(engine->context_size > BIT(20)))
332 engine->context_size = 0;
481827b4 333 if (engine->context_size)
07bcfd12 334 DRIVER_CAPS(i915)->has_logical_contexts = true;
63ffbcda 335
0de9136d
CW
336 /* Nothing to do here, execute in order of dependencies */
337 engine->schedule = NULL;
338
b81e4d9b 339 ewma__engine_latency_init(&engine->latency);
741258cd 340 seqlock_init(&engine->stats.lock);
30e17b78 341
3fc03069
CD
342 ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier);
343
060f2322
CW
344 /* Scrub mmio state on takeover */
345 intel_engine_sanitize_mmio(engine);
346
750e76b4 347 gt->engine_class[info->class][info->instance] = engine;
a50134b1 348 gt->engine[id] = engine;
750e76b4 349
07bcfd12 350 i915->engine[id] = engine;
750e76b4 351
3b3f1650 352 return 0;
88d2ba2e
TU
353}
354
c5d3e39c
TU
355static void __setup_engine_capabilities(struct intel_engine_cs *engine)
356{
357 struct drm_i915_private *i915 = engine->i915;
358
359 if (engine->class == VIDEO_DECODE_CLASS) {
360 /*
361 * HEVC support is present on first engine instance
362 * before Gen11 and on all instances afterwards.
363 */
364 if (INTEL_GEN(i915) >= 11 ||
365 (INTEL_GEN(i915) >= 9 && engine->instance == 0))
366 engine->uabi_capabilities |=
367 I915_VIDEO_CLASS_CAPABILITY_HEVC;
368
369 /*
370 * SFC block is present only on even logical engine
371 * instances.
372 */
373 if ((INTEL_GEN(i915) >= 11 &&
374 RUNTIME_INFO(i915)->vdbox_sfc_access & engine->mask) ||
375 (INTEL_GEN(i915) >= 9 && engine->instance == 0))
376 engine->uabi_capabilities |=
377 I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC;
378 } else if (engine->class == VIDEO_ENHANCEMENT_CLASS) {
379 if (INTEL_GEN(i915) >= 9)
380 engine->uabi_capabilities |=
381 I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC;
382 }
383}
384
3ea951c6 385static void intel_setup_engine_capabilities(struct intel_gt *gt)
c5d3e39c
TU
386{
387 struct intel_engine_cs *engine;
388 enum intel_engine_id id;
389
3ea951c6 390 for_each_engine(engine, gt, id)
c5d3e39c
TU
391 __setup_engine_capabilities(engine);
392}
393
45b9c968 394/**
e26b6d43 395 * intel_engines_release() - free the resources allocated for Command Streamers
b0258bf2 396 * @gt: pointer to struct intel_gt
45b9c968 397 */
e26b6d43 398void intel_engines_release(struct intel_gt *gt)
45b9c968
CW
399{
400 struct intel_engine_cs *engine;
401 enum intel_engine_id id;
402
cd699527
CW
403 /*
404 * Before we release the resources held by engine, we must be certain
405 * that the HW is no longer accessing them -- having the GPU scribble
406 * to or read from a page being used for something else causes no end
407 * of fun.
408 *
409 * The GPU should be reset by this point, but assume the worst just
410 * in case we aborted before completely initialising the engines.
411 */
412 GEM_BUG_ON(intel_gt_pm_is_awake(gt));
413 if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
414 __intel_gt_reset(gt, ALL_ENGINES);
415
e26b6d43 416 /* Decouple the backend; but keep the layout for late GPU resets */
b0258bf2 417 for_each_engine(engine, gt, id) {
0ea60c1d 418 intel_wakeref_wait_for_idle(&engine->wakeref);
cd699527
CW
419 GEM_BUG_ON(intel_engine_pm_is_awake(engine));
420
e26b6d43
CW
421 if (!engine->release)
422 continue;
423
424 engine->release(engine);
425 engine->release = NULL;
426
427 memset(&engine->reset, 0, sizeof(engine->reset));
428
b0258bf2 429 gt->i915->engine[id] = NULL;
45b9c968
CW
430 }
431}
432
e26b6d43
CW
433void intel_engines_free(struct intel_gt *gt)
434{
435 struct intel_engine_cs *engine;
436 enum intel_engine_id id;
437
438 for_each_engine(engine, gt, id) {
439 kfree(engine);
440 gt->engine[id] = NULL;
441 }
442}
443
88d2ba2e 444/**
63ffbcda 445 * intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers
adcb5264 446 * @gt: pointer to struct intel_gt
88d2ba2e
TU
447 *
448 * Return: non-zero if the initialization failed.
449 */
adcb5264 450int intel_engines_init_mmio(struct intel_gt *gt)
88d2ba2e 451{
adcb5264 452 struct drm_i915_private *i915 = gt->i915;
45b9c968
CW
453 struct intel_device_info *device_info = mkwrite_device_info(i915);
454 const unsigned int engine_mask = INTEL_INFO(i915)->engine_mask;
5f9be054 455 unsigned int mask = 0;
88d2ba2e 456 unsigned int i;
bb8f0f5a 457 int err;
88d2ba2e 458
0d4c351a
PB
459 drm_WARN_ON(&i915->drm, engine_mask == 0);
460 drm_WARN_ON(&i915->drm, engine_mask &
461 GENMASK(BITS_PER_TYPE(mask) - 1, I915_NUM_ENGINES));
88d2ba2e 462
50d84418 463 if (i915_inject_probe_failure(i915))
645ff9e3
MW
464 return -ENODEV;
465
88d2ba2e 466 for (i = 0; i < ARRAY_SIZE(intel_engines); i++) {
45b9c968 467 if (!HAS_ENGINE(i915, i))
88d2ba2e
TU
468 continue;
469
adcb5264 470 err = intel_engine_setup(gt, i);
bb8f0f5a
CW
471 if (err)
472 goto cleanup;
473
8a68d464 474 mask |= BIT(i);
bb8f0f5a
CW
475 }
476
477 /*
478 * Catch failures to update intel_engines table when the new engines
479 * are added to the driver by a warning and disabling the forgotten
480 * engines.
481 */
0d4c351a 482 if (drm_WARN_ON(&i915->drm, mask != engine_mask))
8a68d464 483 device_info->engine_mask = mask;
bb8f0f5a 484
45b9c968 485 RUNTIME_INFO(i915)->num_engines = hweight32(mask);
bb8f0f5a 486
adcb5264 487 intel_gt_check_and_clear_faults(gt);
ce453b3e 488
3ea951c6 489 intel_setup_engine_capabilities(gt);
c5d3e39c 490
bb8f0f5a
CW
491 return 0;
492
493cleanup:
e26b6d43 494 intel_engines_free(gt);
bb8f0f5a
CW
495 return err;
496}
497
79ffac85 498void intel_engine_init_execlists(struct intel_engine_cs *engine)
19df9a57
MK
499{
500 struct intel_engine_execlists * const execlists = &engine->execlists;
501
76e70087 502 execlists->port_mask = 1;
410ed573 503 GEM_BUG_ON(!is_power_of_2(execlists_num_ports(execlists)));
76e70087
MK
504 GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS);
505
22b7a426
CW
506 memset(execlists->pending, 0, sizeof(execlists->pending));
507 execlists->active =
508 memset(execlists->inflight, 0, sizeof(execlists->inflight));
509
4d97cbe0 510 execlists->queue_priority_hint = INT_MIN;
655250a8 511 execlists->queue = RB_ROOT_CACHED;
19df9a57
MK
512}
513
a0e731f4 514static void cleanup_status_page(struct intel_engine_cs *engine)
486e93f7 515{
0ca88ba0
CW
516 struct i915_vma *vma;
517
060f2322
CW
518 /* Prevent writes into HWSP after returning the page to the system */
519 intel_engine_set_hwsp_writemask(engine, ~0u);
520
0ca88ba0
CW
521 vma = fetch_and_zero(&engine->status_page.vma);
522 if (!vma)
523 return;
486e93f7 524
0ca88ba0
CW
525 if (!HWS_NEEDS_PHYSICAL(engine->i915))
526 i915_vma_unpin(vma);
527
528 i915_gem_object_unpin_map(vma->obj);
c017cf6b 529 i915_gem_object_put(vma->obj);
0ca88ba0
CW
530}
531
532static int pin_ggtt_status_page(struct intel_engine_cs *engine,
533 struct i915_vma *vma)
534{
535 unsigned int flags;
536
4dc0a7ca 537 if (!HAS_LLC(engine->i915) && i915_ggtt_has_aperture(engine->gt->ggtt))
0ca88ba0
CW
538 /*
539 * On g33, we cannot place HWS above 256MiB, so
540 * restrict its pinning to the low mappable arena.
541 * Though this restriction is not documented for
542 * gen4, gen5, or byt, they also behave similarly
543 * and hang if the HWS is placed at the top of the
544 * GTT. To generalise, it appears that all !llc
545 * platforms have issues with us placing the HWS
546 * above the mappable region (even though we never
547 * actually map it).
548 */
e3793468 549 flags = PIN_MAPPABLE;
0ca88ba0 550 else
e3793468 551 flags = PIN_HIGH;
486e93f7 552
e3793468 553 return i915_ggtt_pin(vma, 0, flags);
486e93f7
DCS
554}
555
556static int init_status_page(struct intel_engine_cs *engine)
557{
558 struct drm_i915_gem_object *obj;
559 struct i915_vma *vma;
486e93f7
DCS
560 void *vaddr;
561 int ret;
562
0ca88ba0
CW
563 /*
564 * Though the HWS register does support 36bit addresses, historically
565 * we have had hangs and corruption reported due to wild writes if
566 * the HWS is placed above 4G. We only allow objects to be allocated
567 * in GFP_DMA32 for i965, and no earlier physical address users had
568 * access to more than 4G.
569 */
486e93f7
DCS
570 obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
571 if (IS_ERR(obj)) {
ce016437
WK
572 drm_err(&engine->i915->drm,
573 "Failed to allocate status page\n");
486e93f7
DCS
574 return PTR_ERR(obj);
575 }
576
a679f58d 577 i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
486e93f7 578
ba4134a4 579 vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
486e93f7
DCS
580 if (IS_ERR(vma)) {
581 ret = PTR_ERR(vma);
582 goto err;
583 }
584
486e93f7
DCS
585 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
586 if (IS_ERR(vaddr)) {
587 ret = PTR_ERR(vaddr);
0ca88ba0 588 goto err;
486e93f7
DCS
589 }
590
0ca88ba0 591 engine->status_page.addr = memset(vaddr, 0, PAGE_SIZE);
486e93f7 592 engine->status_page.vma = vma;
0ca88ba0
CW
593
594 if (!HWS_NEEDS_PHYSICAL(engine->i915)) {
595 ret = pin_ggtt_status_page(engine, vma);
596 if (ret)
597 goto err_unpin;
598 }
599
486e93f7
DCS
600 return 0;
601
602err_unpin:
0ca88ba0 603 i915_gem_object_unpin_map(obj);
486e93f7
DCS
604err:
605 i915_gem_object_put(obj);
606 return ret;
607}
608
7d70a123 609static int engine_setup_common(struct intel_engine_cs *engine)
52954edd
CW
610{
611 int err;
612
ce476c80
CW
613 init_llist_head(&engine->barrier_tasks);
614
52954edd
CW
615 err = init_status_page(engine);
616 if (err)
617 return err;
618
422d7df4 619 intel_engine_init_active(engine, ENGINE_PHYSICAL);
52c0fdb2 620 intel_engine_init_breadcrumbs(engine);
79ffac85 621 intel_engine_init_execlists(engine);
52954edd 622 intel_engine_init_cmd_parser(engine);
79ffac85 623 intel_engine_init__pm(engine);
4f88f874 624 intel_engine_init_retire(engine);
52954edd 625
b40d7378
CW
626 intel_engine_pool_init(&engine->pool);
627
09407579
CW
628 /* Use the whole device by default */
629 engine->sseu =
630 intel_sseu_from_device_info(&RUNTIME_INFO(engine->i915)->sseu);
631
ab9e2f77
CW
632 intel_engine_init_workarounds(engine);
633 intel_engine_init_whitelist(engine);
634 intel_engine_init_ctx_wa(engine);
635
52954edd 636 return 0;
52954edd
CW
637}
638
e1a73a54
CW
639struct measure_breadcrumb {
640 struct i915_request rq;
e1a73a54
CW
641 struct intel_ring ring;
642 u32 cs[1024];
643};
644
fb5970da 645static int measure_breadcrumb_dw(struct intel_context *ce)
e1a73a54 646{
fb5970da 647 struct intel_engine_cs *engine = ce->engine;
e1a73a54 648 struct measure_breadcrumb *frame;
5d8b1341 649 int dw;
e1a73a54 650
db56f974 651 GEM_BUG_ON(!engine->gt->scratch);
e1a73a54
CW
652
653 frame = kzalloc(sizeof(*frame), GFP_KERNEL);
654 if (!frame)
655 return -ENOMEM;
656
fb5970da
CW
657 frame->rq.i915 = engine->i915;
658 frame->rq.engine = engine;
659 frame->rq.context = ce;
660 rcu_assign_pointer(frame->rq.timeline, ce->timeline);
d19d71fc 661
e1a73a54
CW
662 frame->ring.vaddr = frame->cs;
663 frame->ring.size = sizeof(frame->cs);
664 frame->ring.effective_size = frame->ring.size;
665 intel_ring_update_space(&frame->ring);
e1a73a54 666 frame->rq.ring = &frame->ring;
5013eb8c 667
fb5970da 668 mutex_lock(&ce->timeline->mutex);
d19d71fc 669 spin_lock_irq(&engine->active.lock);
fb5970da 670
85474441 671 dw = engine->emit_fini_breadcrumb(&frame->rq, frame->cs) - frame->cs;
fb5970da 672
d19d71fc 673 spin_unlock_irq(&engine->active.lock);
fb5970da 674 mutex_unlock(&ce->timeline->mutex);
d19d71fc 675
519a0194 676 GEM_BUG_ON(dw & 1); /* RING_TAIL must be qword aligned */
e1a73a54 677
52954edd 678 kfree(frame);
e1a73a54
CW
679 return dw;
680}
681
422d7df4
CW
682void
683intel_engine_init_active(struct intel_engine_cs *engine, unsigned int subclass)
684{
685 INIT_LIST_HEAD(&engine->active.requests);
32ff621f 686 INIT_LIST_HEAD(&engine->active.hold);
422d7df4
CW
687
688 spin_lock_init(&engine->active.lock);
689 lockdep_set_subclass(&engine->active.lock, subclass);
690
691 /*
692 * Due to an interesting quirk in lockdep's internal debug tracking,
693 * after setting a subclass we must ensure the lock is used. Otherwise,
694 * nr_unused_locks is incremented once too often.
695 */
696#ifdef CONFIG_DEBUG_LOCK_ALLOC
697 local_irq_disable();
698 lock_map_acquire(&engine->active.lock.dep_map);
699 lock_map_release(&engine->active.lock.dep_map);
700 local_irq_enable();
701#endif
702}
703
38775829
CW
704static struct intel_context *
705create_kernel_context(struct intel_engine_cs *engine)
706{
6ad145fe 707 static struct lock_class_key kernel;
38775829
CW
708 struct intel_context *ce;
709 int err;
710
e6ba7648 711 ce = intel_context_create(engine);
38775829
CW
712 if (IS_ERR(ce))
713 return ce;
714
e6ba7648 715 __set_bit(CONTEXT_BARRIER_BIT, &ce->flags);
48ae397b 716
e6ba7648 717 err = intel_context_pin(ce); /* perma-pin so it is always available */
38775829
CW
718 if (err) {
719 intel_context_put(ce);
720 return ERR_PTR(err);
721 }
722
6ad145fe
CW
723 /*
724 * Give our perma-pinned kernel timelines a separate lockdep class,
725 * so that we can use them from within the normal user timelines
726 * should we need to inject GPU operations during their request
727 * construction.
728 */
729 lockdep_set_class(&ce->timeline->mutex, &kernel);
730
38775829
CW
731 return ce;
732}
733
019bf277
TU
734/**
735 * intel_engines_init_common - initialize cengine state which might require hw access
736 * @engine: Engine to initialize.
737 *
738 * Initializes @engine@ structure members shared between legacy and execlists
739 * submission modes which do require hardware access.
740 *
741 * Typcally done at later stages of submission mode specific engine setup.
742 *
743 * Returns zero on success or an error code on failure.
744 */
7d70a123 745static int engine_init_common(struct intel_engine_cs *engine)
019bf277 746{
38775829 747 struct intel_context *ce;
019bf277
TU
748 int ret;
749
09975b86
CW
750 engine->set_default_submission(engine);
751
38775829
CW
752 /*
753 * We may need to do things with the shrinker which
e8a9c58f
CW
754 * require us to immediately switch back to the default
755 * context. This can cause a problem as pinning the
756 * default context also requires GTT space which may not
757 * be available. To avoid this we always pin the default
758 * context.
759 */
38775829
CW
760 ce = create_kernel_context(engine);
761 if (IS_ERR(ce))
762 return PTR_ERR(ce);
763
fb5970da
CW
764 ret = measure_breadcrumb_dw(ce);
765 if (ret < 0)
766 goto err_context;
767
768 engine->emit_fini_breadcrumb_dw = ret;
38775829 769 engine->kernel_context = ce;
019bf277 770
9dbfea98 771 return 0;
fb5970da
CW
772
773err_context:
774 intel_context_put(ce);
775 return ret;
019bf277 776}
96a945aa 777
7d70a123
CW
778int intel_engines_init(struct intel_gt *gt)
779{
780 int (*setup)(struct intel_engine_cs *engine);
781 struct intel_engine_cs *engine;
782 enum intel_engine_id id;
783 int err;
784
785 if (HAS_EXECLISTS(gt->i915))
786 setup = intel_execlists_submission_setup;
787 else
788 setup = intel_ring_submission_setup;
789
790 for_each_engine(engine, gt, id) {
791 err = engine_setup_common(engine);
792 if (err)
793 return err;
794
795 err = setup(engine);
796 if (err)
797 return err;
798
799 err = engine_init_common(engine);
800 if (err)
801 return err;
802
803 intel_engine_add_user(engine);
804 }
805
806 return 0;
807}
808
96a945aa
CW
809/**
810 * intel_engines_cleanup_common - cleans up the engine state created by
811 * the common initiailizers.
812 * @engine: Engine to cleanup.
813 *
814 * This cleans up everything created by the common helpers.
815 */
816void intel_engine_cleanup_common(struct intel_engine_cs *engine)
817{
422d7df4 818 GEM_BUG_ON(!list_empty(&engine->active.requests));
e26b6d43 819 tasklet_kill(&engine->execlists.tasklet); /* flush the callback */
422d7df4 820
a0e731f4 821 cleanup_status_page(engine);
486e93f7 822
4f88f874 823 intel_engine_fini_retire(engine);
b40d7378 824 intel_engine_pool_fini(&engine->pool);
96a945aa 825 intel_engine_fini_breadcrumbs(engine);
7756e454 826 intel_engine_cleanup_cmd_parser(engine);
e8a9c58f 827
d2b4b979
CW
828 if (engine->default_state)
829 i915_gem_object_put(engine->default_state);
830
b178a3f6
MA
831 if (engine->kernel_context) {
832 intel_context_unpin(engine->kernel_context);
833 intel_context_put(engine->kernel_context);
834 }
ce476c80 835 GEM_BUG_ON(!llist_empty(&engine->barrier_tasks));
a89d1f92 836
452420d2 837 intel_wa_list_free(&engine->ctx_wa_list);
4a15c75c 838 intel_wa_list_free(&engine->wa_list);
69bcdecf 839 intel_wa_list_free(&engine->whitelist);
96a945aa 840}
1b36595f 841
faea1792
DCS
842/**
843 * intel_engine_resume - re-initializes the HW state of the engine
844 * @engine: Engine to resume.
845 *
846 * Returns zero on success or an error code on failure.
847 */
848int intel_engine_resume(struct intel_engine_cs *engine)
849{
850 intel_engine_apply_workarounds(engine);
851 intel_engine_apply_whitelist(engine);
852
853 return engine->resume(engine);
854}
855
3ceda3a4 856u64 intel_engine_get_active_head(const struct intel_engine_cs *engine)
1b36595f 857{
baba6e57
DCS
858 struct drm_i915_private *i915 = engine->i915;
859
1b36595f
CW
860 u64 acthd;
861
baba6e57
DCS
862 if (INTEL_GEN(i915) >= 8)
863 acthd = ENGINE_READ64(engine, RING_ACTHD, RING_ACTHD_UDW);
864 else if (INTEL_GEN(i915) >= 4)
865 acthd = ENGINE_READ(engine, RING_ACTHD);
1b36595f 866 else
baba6e57 867 acthd = ENGINE_READ(engine, ACTHD);
1b36595f
CW
868
869 return acthd;
870}
871
3ceda3a4 872u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine)
1b36595f 873{
1b36595f
CW
874 u64 bbaddr;
875
baba6e57
DCS
876 if (INTEL_GEN(engine->i915) >= 8)
877 bbaddr = ENGINE_READ64(engine, RING_BBADDR, RING_BBADDR_UDW);
1b36595f 878 else
baba6e57 879 bbaddr = ENGINE_READ(engine, RING_BBADDR);
1b36595f
CW
880
881 return bbaddr;
882}
0e704476 883
a8c51ed2
CW
884static unsigned long stop_timeout(const struct intel_engine_cs *engine)
885{
886 if (in_atomic() || irqs_disabled()) /* inside atomic preempt-reset? */
887 return 0;
888
889 /*
890 * If we are doing a normal GPU reset, we can take our time and allow
891 * the engine to quiesce. We've stopped submission to the engine, and
892 * if we wait long enough an innocent context should complete and
893 * leave the engine idle. So they should not be caught unaware by
894 * the forthcoming GPU reset (which usually follows the stop_cs)!
895 */
896 return READ_ONCE(engine->props.stop_timeout_ms);
897}
898
3f6e9822
CW
899int intel_engine_stop_cs(struct intel_engine_cs *engine)
900{
baba6e57 901 struct intel_uncore *uncore = engine->uncore;
3f6e9822
CW
902 const u32 base = engine->mmio_base;
903 const i915_reg_t mode = RING_MI_MODE(base);
904 int err;
905
d2d551c0 906 if (INTEL_GEN(engine->i915) < 3)
3f6e9822
CW
907 return -ENODEV;
908
639f2f24 909 ENGINE_TRACE(engine, "\n");
3f6e9822 910
d2d551c0 911 intel_uncore_write_fw(uncore, mode, _MASKED_BIT_ENABLE(STOP_RING));
3f6e9822
CW
912
913 err = 0;
d2d551c0 914 if (__intel_wait_for_register_fw(uncore,
3f6e9822 915 mode, MODE_IDLE, MODE_IDLE,
a8c51ed2 916 1000, stop_timeout(engine),
3f6e9822 917 NULL)) {
639f2f24 918 ENGINE_TRACE(engine, "timed out on STOP_RING -> IDLE\n");
3f6e9822
CW
919 err = -ETIMEDOUT;
920 }
921
922 /* A final mmio read to let GPU writes be hopefully flushed to memory */
d2d551c0 923 intel_uncore_posting_read_fw(uncore, mode);
3f6e9822
CW
924
925 return err;
926}
927
a99b32a6
CW
928void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine)
929{
639f2f24 930 ENGINE_TRACE(engine, "\n");
a99b32a6 931
baba6e57 932 ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
a99b32a6
CW
933}
934
0e704476
CW
935const char *i915_cache_level_str(struct drm_i915_private *i915, int type)
936{
937 switch (type) {
938 case I915_CACHE_NONE: return " uncached";
939 case I915_CACHE_LLC: return HAS_LLC(i915) ? " LLC" : " snooped";
940 case I915_CACHE_L3_LLC: return " L3+LLC";
941 case I915_CACHE_WT: return " WT";
942 default: return "";
943 }
944}
945
f398bbde 946static u32
742379c0
CW
947read_subslice_reg(const struct intel_engine_cs *engine,
948 int slice, int subslice, i915_reg_t reg)
0e704476 949{
f398bbde
TU
950 struct drm_i915_private *i915 = engine->i915;
951 struct intel_uncore *uncore = engine->uncore;
7405cb77 952 u32 mcr_mask, mcr_ss, mcr, old_mcr, val;
0e704476
CW
953 enum forcewake_domains fw_domains;
954
f398bbde 955 if (INTEL_GEN(i915) >= 11) {
7405cb77
TU
956 mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK;
957 mcr_ss = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice);
d3d57927 958 } else {
7405cb77
TU
959 mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK;
960 mcr_ss = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice);
d3d57927
KG
961 }
962
4319382e 963 fw_domains = intel_uncore_forcewake_for_reg(uncore, reg,
0e704476 964 FW_REG_READ);
4319382e 965 fw_domains |= intel_uncore_forcewake_for_reg(uncore,
0e704476
CW
966 GEN8_MCR_SELECTOR,
967 FW_REG_READ | FW_REG_WRITE);
968
4319382e
DCS
969 spin_lock_irq(&uncore->lock);
970 intel_uncore_forcewake_get__locked(uncore, fw_domains);
0e704476 971
7405cb77 972 old_mcr = mcr = intel_uncore_read_fw(uncore, GEN8_MCR_SELECTOR);
1e40d4ae 973
7405cb77
TU
974 mcr &= ~mcr_mask;
975 mcr |= mcr_ss;
4319382e 976 intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr);
0e704476 977
7405cb77 978 val = intel_uncore_read_fw(uncore, reg);
0e704476 979
7405cb77
TU
980 mcr &= ~mcr_mask;
981 mcr |= old_mcr & mcr_mask;
1e40d4ae 982
4319382e 983 intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr);
0e704476 984
4319382e
DCS
985 intel_uncore_forcewake_put__locked(uncore, fw_domains);
986 spin_unlock_irq(&uncore->lock);
0e704476 987
7405cb77 988 return val;
0e704476
CW
989}
990
991/* NB: please notice the memset */
742379c0 992void intel_engine_get_instdone(const struct intel_engine_cs *engine,
0e704476
CW
993 struct intel_instdone *instdone)
994{
f398bbde 995 struct drm_i915_private *i915 = engine->i915;
eaef5b3c 996 const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu;
baba6e57 997 struct intel_uncore *uncore = engine->uncore;
0e704476
CW
998 u32 mmio_base = engine->mmio_base;
999 int slice;
1000 int subslice;
1001
1002 memset(instdone, 0, sizeof(*instdone));
1003
f398bbde 1004 switch (INTEL_GEN(i915)) {
0e704476 1005 default:
baba6e57
DCS
1006 instdone->instdone =
1007 intel_uncore_read(uncore, RING_INSTDONE(mmio_base));
0e704476 1008
8a68d464 1009 if (engine->id != RCS0)
0e704476
CW
1010 break;
1011
baba6e57
DCS
1012 instdone->slice_common =
1013 intel_uncore_read(uncore, GEN7_SC_INSTDONE);
f7043102
LL
1014 if (INTEL_GEN(i915) >= 12) {
1015 instdone->slice_common_extra[0] =
1016 intel_uncore_read(uncore, GEN12_SC_INSTDONE_EXTRA);
1017 instdone->slice_common_extra[1] =
1018 intel_uncore_read(uncore, GEN12_SC_INSTDONE_EXTRA2);
1019 }
eaef5b3c 1020 for_each_instdone_slice_subslice(i915, sseu, slice, subslice) {
0e704476 1021 instdone->sampler[slice][subslice] =
f398bbde 1022 read_subslice_reg(engine, slice, subslice,
0e704476
CW
1023 GEN7_SAMPLER_INSTDONE);
1024 instdone->row[slice][subslice] =
f398bbde 1025 read_subslice_reg(engine, slice, subslice,
0e704476
CW
1026 GEN7_ROW_INSTDONE);
1027 }
1028 break;
1029 case 7:
baba6e57
DCS
1030 instdone->instdone =
1031 intel_uncore_read(uncore, RING_INSTDONE(mmio_base));
0e704476 1032
8a68d464 1033 if (engine->id != RCS0)
0e704476
CW
1034 break;
1035
baba6e57
DCS
1036 instdone->slice_common =
1037 intel_uncore_read(uncore, GEN7_SC_INSTDONE);
1038 instdone->sampler[0][0] =
1039 intel_uncore_read(uncore, GEN7_SAMPLER_INSTDONE);
1040 instdone->row[0][0] =
1041 intel_uncore_read(uncore, GEN7_ROW_INSTDONE);
0e704476
CW
1042
1043 break;
1044 case 6:
1045 case 5:
1046 case 4:
baba6e57
DCS
1047 instdone->instdone =
1048 intel_uncore_read(uncore, RING_INSTDONE(mmio_base));
8a68d464 1049 if (engine->id == RCS0)
0e704476 1050 /* HACK: Using the wrong struct member */
baba6e57
DCS
1051 instdone->slice_common =
1052 intel_uncore_read(uncore, GEN4_INSTDONE1);
0e704476
CW
1053 break;
1054 case 3:
1055 case 2:
baba6e57 1056 instdone->instdone = intel_uncore_read(uncore, GEN2_INSTDONE);
0e704476
CW
1057 break;
1058 }
1059}
f97fbf96 1060
a091d4ee
CW
1061static bool ring_is_idle(struct intel_engine_cs *engine)
1062{
a091d4ee
CW
1063 bool idle = true;
1064
293f8c0f
CW
1065 if (I915_SELFTEST_ONLY(!engine->mmio_base))
1066 return true;
1067
4ecd20c9 1068 if (!intel_engine_pm_get_if_awake(engine))
74d00d28 1069 return true;
a091d4ee 1070
44f8b802 1071 /* First check that no commands are left in the ring */
baba6e57
DCS
1072 if ((ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR) !=
1073 (ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR))
44f8b802 1074 idle = false;
aed2fc10 1075
44f8b802 1076 /* No bit for gen2, so assume the CS parser is idle */
4ecd20c9 1077 if (INTEL_GEN(engine->i915) > 2 &&
baba6e57 1078 !(ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE))
a091d4ee
CW
1079 idle = false;
1080
4ecd20c9 1081 intel_engine_pm_put(engine);
a091d4ee
CW
1082
1083 return idle;
1084}
1085
30084b14 1086void intel_engine_flush_submission(struct intel_engine_cs *engine)
d99f7b07
CW
1087{
1088 struct tasklet_struct *t = &engine->execlists.tasklet;
1089
1090 if (__tasklet_is_scheduled(t)) {
1091 local_bh_disable();
1092 if (tasklet_trylock(t)) {
1093 /* Must wait for any GPU reset in progress. */
1094 if (__tasklet_is_enabled(t))
1095 t->func(t->data);
1096 tasklet_unlock(t);
1097 }
1098 local_bh_enable();
1099 }
1100
1101 /* Otherwise flush the tasklet if it was running on another cpu */
1102 tasklet_unlock_wait(t);
1103}
1104
5400367a
CW
1105/**
1106 * intel_engine_is_idle() - Report if the engine has finished process all work
1107 * @engine: the intel_engine_cs
1108 *
1109 * Return true if there are no requests pending, nothing left to be submitted
1110 * to hardware, and that the engine is idle.
1111 */
1112bool intel_engine_is_idle(struct intel_engine_cs *engine)
1113{
a8e9a419 1114 /* More white lies, if wedged, hw state is inconsistent */
cb823ed9 1115 if (intel_gt_is_wedged(engine->gt))
a8e9a419
CW
1116 return true;
1117
5f22e5b3 1118 if (!intel_engine_pm_is_awake(engine))
dc58958d
CW
1119 return true;
1120
4a118ecb 1121 /* Waiting to drain ELSP? */
22b7a426 1122 if (execlists_active(&engine->execlists)) {
315ca4c4 1123 synchronize_hardirq(engine->i915->drm.pdev->irq);
c34c5bca 1124
d99f7b07 1125 intel_engine_flush_submission(engine);
22495b68 1126
22b7a426 1127 if (execlists_active(&engine->execlists))
dd0cf235
CW
1128 return false;
1129 }
5400367a 1130
dd0cf235 1131 /* ELSP is empty, but there are ready requests? E.g. after reset */
655250a8 1132 if (!RB_EMPTY_ROOT(&engine->execlists.queue.rb_root))
d6edb6e3
CW
1133 return false;
1134
5400367a 1135 /* Ring stopped? */
293f8c0f 1136 return ring_is_idle(engine);
5400367a
CW
1137}
1138
cb823ed9 1139bool intel_engines_are_idle(struct intel_gt *gt)
05425249
CW
1140{
1141 struct intel_engine_cs *engine;
1142 enum intel_engine_id id;
1143
d7dc4131
CW
1144 /*
1145 * If the driver is wedged, HW state may be very inconsistent and
8490ae20
CW
1146 * report that it is still busy, even though we have stopped using it.
1147 */
cb823ed9 1148 if (intel_gt_is_wedged(gt))
8490ae20
CW
1149 return true;
1150
bd2be141 1151 /* Already parked (and passed an idleness test); must still be idle */
cb823ed9 1152 if (!READ_ONCE(gt->awake))
bd2be141
CW
1153 return true;
1154
5d904e3c 1155 for_each_engine(engine, gt, id) {
05425249
CW
1156 if (!intel_engine_is_idle(engine))
1157 return false;
1158 }
1159
1160 return true;
1161}
1162
cb823ed9 1163void intel_engines_reset_default_submission(struct intel_gt *gt)
ff44ad51
CW
1164{
1165 struct intel_engine_cs *engine;
1166 enum intel_engine_id id;
1167
5d904e3c 1168 for_each_engine(engine, gt, id)
ff44ad51
CW
1169 engine->set_default_submission(engine);
1170}
1171
90cad095
CW
1172bool intel_engine_can_store_dword(struct intel_engine_cs *engine)
1173{
1174 switch (INTEL_GEN(engine->i915)) {
1175 case 2:
1176 return false; /* uses physical not virtual addresses */
1177 case 3:
1178 /* maybe only uses physical not virtual addresses */
1179 return !(IS_I915G(engine->i915) || IS_I915GM(engine->i915));
325b916a
CW
1180 case 4:
1181 return !IS_I965G(engine->i915); /* who knows! */
90cad095
CW
1182 case 6:
1183 return engine->class != VIDEO_DECODE_CLASS; /* b0rked */
1184 default:
1185 return true;
1186 }
1187}
1188
247870ac
CW
1189static int print_sched_attr(struct drm_i915_private *i915,
1190 const struct i915_sched_attr *attr,
1191 char *buf, int x, int len)
b7268c5e
CW
1192{
1193 if (attr->priority == I915_PRIORITY_INVALID)
247870ac
CW
1194 return x;
1195
1196 x += snprintf(buf + x, len - x,
1197 " prio=%d", attr->priority);
b7268c5e 1198
247870ac 1199 return x;
b7268c5e
CW
1200}
1201
f636edb2 1202static void print_request(struct drm_printer *m,
e61e0f51 1203 struct i915_request *rq,
f636edb2
CW
1204 const char *prefix)
1205{
ab268151 1206 const char *name = rq->fence.ops->get_timeline_name(&rq->fence);
96d4f03c 1207 char buf[80] = "";
247870ac
CW
1208 int x = 0;
1209
1210 x = print_sched_attr(rq->i915, &rq->sched.attr, buf, x, sizeof(buf));
ab268151 1211
b300fde8 1212 drm_printf(m, "%s %llx:%llx%s%s %s @ %dms: %s\n",
b7268c5e 1213 prefix,
b300fde8 1214 rq->fence.context, rq->fence.seqno,
85474441
CW
1215 i915_request_completed(rq) ? "!" :
1216 i915_request_started(rq) ? "*" :
1217 "",
8c334f24
CW
1218 test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
1219 &rq->fence.flags) ? "+" :
52c0fdb2 1220 test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
8c334f24
CW
1221 &rq->fence.flags) ? "-" :
1222 "",
247870ac 1223 buf,
f636edb2 1224 jiffies_to_msecs(jiffies - rq->emitted_jiffies),
ab268151 1225 name);
f636edb2
CW
1226}
1227
c1bf2728
CW
1228static void hexdump(struct drm_printer *m, const void *buf, size_t len)
1229{
1230 const size_t rowsize = 8 * sizeof(u32);
1231 const void *prev = NULL;
1232 bool skip = false;
1233 size_t pos;
1234
1235 for (pos = 0; pos < len; pos += rowsize) {
1236 char line[128];
1237
1238 if (prev && !memcmp(prev, buf + pos, rowsize)) {
1239 if (!skip) {
1240 drm_printf(m, "*\n");
1241 skip = true;
1242 }
1243 continue;
1244 }
1245
1246 WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos,
1247 rowsize, sizeof(u32),
1248 line, sizeof(line),
1249 false) >= sizeof(line));
286e6153 1250 drm_printf(m, "[%04zx] %s\n", pos, line);
c1bf2728
CW
1251
1252 prev = buf + pos;
1253 skip = false;
1254 }
1255}
1256
d19d71fc
CW
1257static struct intel_timeline *get_timeline(struct i915_request *rq)
1258{
1259 struct intel_timeline *tl;
1260
1261 /*
1262 * Even though we are holding the engine->active.lock here, there
1263 * is no control over the submission queue per-se and we are
1264 * inspecting the active state at a random point in time, with an
1265 * unknown queue. Play safe and make sure the timeline remains valid.
1266 * (Only being used for pretty printing, one extra kref shouldn't
1267 * cause a camel stampede!)
1268 */
1269 rcu_read_lock();
1270 tl = rcu_dereference(rq->timeline);
1271 if (!kref_get_unless_zero(&tl->kref))
1272 tl = NULL;
1273 rcu_read_unlock();
1274
1275 return tl;
1276}
1277
2229adc8
CW
1278static const char *repr_timer(const struct timer_list *t)
1279{
1280 if (!READ_ONCE(t->expires))
1281 return "inactive";
1282
1283 if (timer_pending(t))
1284 return "active";
1285
1286 return "expired";
1287}
1288
eca15360 1289static void intel_engine_print_registers(struct intel_engine_cs *engine,
3ceda3a4 1290 struct drm_printer *m)
f636edb2 1291{
f636edb2 1292 struct drm_i915_private *dev_priv = engine->i915;
c36eebd9 1293 struct intel_engine_execlists * const execlists = &engine->execlists;
f636edb2
CW
1294 u64 addr;
1295
b26496ae 1296 if (engine->id == RENDER_CLASS && IS_GEN_RANGE(dev_priv, 4, 7))
baba6e57 1297 drm_printf(m, "\tCCID: 0x%08x\n", ENGINE_READ(engine, CCID));
220dcfc1
CW
1298 if (HAS_EXECLISTS(dev_priv)) {
1299 drm_printf(m, "\tEL_STAT_HI: 0x%08x\n",
1300 ENGINE_READ(engine, RING_EXECLIST_STATUS_HI));
1301 drm_printf(m, "\tEL_STAT_LO: 0x%08x\n",
1302 ENGINE_READ(engine, RING_EXECLIST_STATUS_LO));
1303 }
3ceda3a4 1304 drm_printf(m, "\tRING_START: 0x%08x\n",
baba6e57 1305 ENGINE_READ(engine, RING_START));
3ceda3a4 1306 drm_printf(m, "\tRING_HEAD: 0x%08x\n",
baba6e57 1307 ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR);
3ceda3a4 1308 drm_printf(m, "\tRING_TAIL: 0x%08x\n",
baba6e57 1309 ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR);
3c75de5b 1310 drm_printf(m, "\tRING_CTL: 0x%08x%s\n",
baba6e57
DCS
1311 ENGINE_READ(engine, RING_CTL),
1312 ENGINE_READ(engine, RING_CTL) & (RING_WAIT | RING_WAIT_SEMAPHORE) ? " [waiting]" : "");
3c75de5b
CW
1313 if (INTEL_GEN(engine->i915) > 2) {
1314 drm_printf(m, "\tRING_MODE: 0x%08x%s\n",
baba6e57
DCS
1315 ENGINE_READ(engine, RING_MI_MODE),
1316 ENGINE_READ(engine, RING_MI_MODE) & (MODE_IDLE) ? " [idle]" : "");
3c75de5b 1317 }
3ceda3a4
CW
1318
1319 if (INTEL_GEN(dev_priv) >= 6) {
70a76a9b 1320 drm_printf(m, "\tRING_IMR: 0x%08x\n",
baba6e57 1321 ENGINE_READ(engine, RING_IMR));
70a76a9b
CW
1322 drm_printf(m, "\tRING_ESR: 0x%08x\n",
1323 ENGINE_READ(engine, RING_ESR));
1324 drm_printf(m, "\tRING_EMR: 0x%08x\n",
1325 ENGINE_READ(engine, RING_EMR));
1326 drm_printf(m, "\tRING_EIR: 0x%08x\n",
1327 ENGINE_READ(engine, RING_EIR));
3ceda3a4
CW
1328 }
1329
f636edb2
CW
1330 addr = intel_engine_get_active_head(engine);
1331 drm_printf(m, "\tACTHD: 0x%08x_%08x\n",
1332 upper_32_bits(addr), lower_32_bits(addr));
1333 addr = intel_engine_get_last_batch_head(engine);
1334 drm_printf(m, "\tBBADDR: 0x%08x_%08x\n",
1335 upper_32_bits(addr), lower_32_bits(addr));
a0cf5790 1336 if (INTEL_GEN(dev_priv) >= 8)
baba6e57 1337 addr = ENGINE_READ64(engine, RING_DMA_FADD, RING_DMA_FADD_UDW);
a0cf5790 1338 else if (INTEL_GEN(dev_priv) >= 4)
baba6e57 1339 addr = ENGINE_READ(engine, RING_DMA_FADD);
a0cf5790 1340 else
baba6e57 1341 addr = ENGINE_READ(engine, DMA_FADD_I8XX);
a0cf5790
CW
1342 drm_printf(m, "\tDMA_FADDR: 0x%08x_%08x\n",
1343 upper_32_bits(addr), lower_32_bits(addr));
1344 if (INTEL_GEN(dev_priv) >= 4) {
1345 drm_printf(m, "\tIPEIR: 0x%08x\n",
baba6e57 1346 ENGINE_READ(engine, RING_IPEIR));
a0cf5790 1347 drm_printf(m, "\tIPEHR: 0x%08x\n",
baba6e57 1348 ENGINE_READ(engine, RING_IPEHR));
a0cf5790 1349 } else {
baba6e57
DCS
1350 drm_printf(m, "\tIPEIR: 0x%08x\n", ENGINE_READ(engine, IPEIR));
1351 drm_printf(m, "\tIPEHR: 0x%08x\n", ENGINE_READ(engine, IPEHR));
a0cf5790 1352 }
f636edb2 1353
fb5c551a 1354 if (HAS_EXECLISTS(dev_priv)) {
22b7a426 1355 struct i915_request * const *port, *rq;
0ca88ba0
CW
1356 const u32 *hws =
1357 &engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
7d4c75d9 1358 const u8 num_entries = execlists->csb_size;
f636edb2 1359 unsigned int idx;
df4f94e8 1360 u8 read, write;
f636edb2 1361
3a7a92ab 1362 drm_printf(m, "\tExeclist tasklet queued? %s (%s), preempt? %s, timeslice? %s\n",
2229adc8
CW
1363 yesno(test_bit(TASKLET_STATE_SCHED,
1364 &engine->execlists.tasklet.state)),
1365 enableddisabled(!atomic_read(&engine->execlists.tasklet.count)),
3a7a92ab 1366 repr_timer(&engine->execlists.preempt),
2229adc8 1367 repr_timer(&engine->execlists.timer));
f636edb2 1368
df4f94e8
CW
1369 read = execlists->csb_head;
1370 write = READ_ONCE(*execlists->csb_write);
1371
2229adc8
CW
1372 drm_printf(m, "\tExeclist status: 0x%08x %08x; CSB read:%d, write:%d, entries:%d\n",
1373 ENGINE_READ(engine, RING_EXECLIST_STATUS_LO),
1374 ENGINE_READ(engine, RING_EXECLIST_STATUS_HI),
1375 read, write, num_entries);
1376
7d4c75d9 1377 if (read >= num_entries)
f636edb2 1378 read = 0;
7d4c75d9 1379 if (write >= num_entries)
f636edb2
CW
1380 write = 0;
1381 if (read > write)
7d4c75d9 1382 write += num_entries;
f636edb2 1383 while (read < write) {
7d4c75d9
MK
1384 idx = ++read % num_entries;
1385 drm_printf(m, "\tExeclist CSB[%d]: 0x%08x, context: %d\n",
1386 idx, hws[idx * 2], hws[idx * 2 + 1]);
f636edb2
CW
1387 }
1388
c36eebd9 1389 execlists_active_lock_bh(execlists);
fecffa46 1390 rcu_read_lock();
22b7a426 1391 for (port = execlists->active; (rq = *port); port++) {
489645d5 1392 char hdr[160];
22b7a426
CW
1393 int len;
1394
61f874d6
TI
1395 len = scnprintf(hdr, sizeof(hdr),
1396 "\t\tActive[%d]: ",
1397 (int)(port - execlists->active));
d19d71fc
CW
1398 if (!i915_request_signaled(rq)) {
1399 struct intel_timeline *tl = get_timeline(rq);
1400
61f874d6
TI
1401 len += scnprintf(hdr + len, sizeof(hdr) - len,
1402 "ring:{start:%08x, hwsp:%08x, seqno:%08x, runtime:%llums}, ",
1403 i915_ggtt_offset(rq->ring->vma),
1404 tl ? tl->hwsp_offset : 0,
1405 hwsp_seqno(rq),
1406 DIV_ROUND_CLOSEST_ULL(intel_context_get_total_runtime_ns(rq->context),
1407 1000 * 1000));
d19d71fc
CW
1408
1409 if (tl)
1410 intel_timeline_put(tl);
1411 }
61f874d6 1412 scnprintf(hdr + len, sizeof(hdr) - len, "rq: ");
22b7a426
CW
1413 print_request(m, rq, hdr);
1414 }
1415 for (port = execlists->pending; (rq = *port); port++) {
d19d71fc 1416 struct intel_timeline *tl = get_timeline(rq);
eca15360 1417 char hdr[80];
f636edb2 1418
22b7a426
CW
1419 snprintf(hdr, sizeof(hdr),
1420 "\t\tPending[%d] ring:{start:%08x, hwsp:%08x, seqno:%08x}, rq: ",
1421 (int)(port - execlists->pending),
1422 i915_ggtt_offset(rq->ring->vma),
d19d71fc 1423 tl ? tl->hwsp_offset : 0,
22b7a426
CW
1424 hwsp_seqno(rq));
1425 print_request(m, rq, hdr);
d19d71fc
CW
1426
1427 if (tl)
1428 intel_timeline_put(tl);
f636edb2 1429 }
fecffa46 1430 rcu_read_unlock();
c36eebd9 1431 execlists_active_unlock_bh(execlists);
f636edb2
CW
1432 } else if (INTEL_GEN(dev_priv) > 6) {
1433 drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n",
baba6e57 1434 ENGINE_READ(engine, RING_PP_DIR_BASE));
f636edb2 1435 drm_printf(m, "\tPP_DIR_BASE_READ: 0x%08x\n",
baba6e57 1436 ENGINE_READ(engine, RING_PP_DIR_BASE_READ));
f636edb2 1437 drm_printf(m, "\tPP_DIR_DCLV: 0x%08x\n",
baba6e57 1438 ENGINE_READ(engine, RING_PP_DIR_DCLV));
f636edb2 1439 }
3ceda3a4
CW
1440}
1441
83c31783
CW
1442static void print_request_ring(struct drm_printer *m, struct i915_request *rq)
1443{
1444 void *ring;
1445 int size;
1446
1447 drm_printf(m,
1448 "[head %04x, postfix %04x, tail %04x, batch 0x%08x_%08x]:\n",
1449 rq->head, rq->postfix, rq->tail,
1450 rq->batch ? upper_32_bits(rq->batch->node.start) : ~0u,
1451 rq->batch ? lower_32_bits(rq->batch->node.start) : ~0u);
1452
1453 size = rq->tail - rq->head;
1454 if (rq->tail < rq->head)
1455 size += rq->ring->size;
1456
1457 ring = kmalloc(size, GFP_ATOMIC);
1458 if (ring) {
1459 const void *vaddr = rq->ring->vaddr;
1460 unsigned int head = rq->head;
1461 unsigned int len = 0;
1462
1463 if (rq->tail < head) {
1464 len = rq->ring->size - head;
1465 memcpy(ring, vaddr + head, len);
1466 head = 0;
1467 }
1468 memcpy(ring + len, vaddr + head, size - len);
1469
1470 hexdump(m, ring, size);
1471 kfree(ring);
1472 }
1473}
1474
32ff621f
CW
1475static unsigned long list_count(struct list_head *list)
1476{
1477 struct list_head *pos;
1478 unsigned long count = 0;
1479
1480 list_for_each(pos, list)
1481 count++;
1482
1483 return count;
1484}
1485
3ceda3a4
CW
1486void intel_engine_dump(struct intel_engine_cs *engine,
1487 struct drm_printer *m,
1488 const char *header, ...)
1489{
3ceda3a4 1490 struct i915_gpu_error * const error = &engine->i915->gpu_error;
0212bdef 1491 struct i915_request *rq;
538ef96b 1492 intel_wakeref_t wakeref;
cfe7288c 1493 unsigned long flags;
3ceda3a4
CW
1494
1495 if (header) {
1496 va_list ap;
1497
1498 va_start(ap, header);
1499 drm_vprintf(m, header, &ap);
1500 va_end(ap);
1501 }
1502
cb823ed9 1503 if (intel_gt_is_wedged(engine->gt))
3ceda3a4
CW
1504 drm_printf(m, "*** WEDGED ***\n");
1505
79ffac85 1506 drm_printf(m, "\tAwake? %d\n", atomic_read(&engine->wakeref.count));
7983990c
CW
1507 drm_printf(m, "\tBarriers?: %s\n",
1508 yesno(!llist_empty(&engine->barrier_tasks)));
b81e4d9b
CW
1509 drm_printf(m, "\tLatency: %luus\n",
1510 ewma__engine_latency_read(&engine->latency));
058179e7
CW
1511
1512 rcu_read_lock();
1513 rq = READ_ONCE(engine->heartbeat.systole);
1514 if (rq)
1515 drm_printf(m, "\tHeartbeat: %d ms ago\n",
1516 jiffies_to_msecs(jiffies - rq->emitted_jiffies));
1517 rcu_read_unlock();
3ceda3a4
CW
1518 drm_printf(m, "\tReset count: %d (global %d)\n",
1519 i915_reset_engine_count(error, engine),
1520 i915_reset_count(error));
1521
3ceda3a4
CW
1522 drm_printf(m, "\tRequests:\n");
1523
cfe7288c 1524 spin_lock_irqsave(&engine->active.lock, flags);
cf4331dd 1525 rq = intel_engine_find_active_request(engine);
3ceda3a4 1526 if (rq) {
d19d71fc
CW
1527 struct intel_timeline *tl = get_timeline(rq);
1528
3ceda3a4 1529 print_request(m, rq, "\t\tactive ");
83c31783 1530
ef5032a0 1531 drm_printf(m, "\t\tring->start: 0x%08x\n",
3ceda3a4 1532 i915_ggtt_offset(rq->ring->vma));
ef5032a0 1533 drm_printf(m, "\t\tring->head: 0x%08x\n",
3ceda3a4 1534 rq->ring->head);
ef5032a0 1535 drm_printf(m, "\t\tring->tail: 0x%08x\n",
3ceda3a4 1536 rq->ring->tail);
ef5032a0
CW
1537 drm_printf(m, "\t\tring->emit: 0x%08x\n",
1538 rq->ring->emit);
1539 drm_printf(m, "\t\tring->space: 0x%08x\n",
1540 rq->ring->space);
d19d71fc
CW
1541
1542 if (tl) {
1543 drm_printf(m, "\t\tring->hwsp: 0x%08x\n",
1544 tl->hwsp_offset);
1545 intel_timeline_put(tl);
1546 }
83c31783
CW
1547
1548 print_request_ring(m, rq);
bb120e11 1549
9f3ccd40 1550 if (rq->context->lrc_reg_state) {
bb120e11 1551 drm_printf(m, "Logical Ring Context:\n");
9f3ccd40 1552 hexdump(m, rq->context->lrc_reg_state, PAGE_SIZE);
bb120e11 1553 }
3ceda3a4 1554 }
32ff621f 1555 drm_printf(m, "\tOn hold?: %lu\n", list_count(&engine->active.hold));
cfe7288c 1556 spin_unlock_irqrestore(&engine->active.lock, flags);
3ceda3a4 1557
a4eb99a1 1558 drm_printf(m, "\tMMIO base: 0x%08x\n", engine->mmio_base);
cd6a8513 1559 wakeref = intel_runtime_pm_get_if_in_use(engine->uncore->rpm);
538ef96b 1560 if (wakeref) {
3ceda3a4 1561 intel_engine_print_registers(engine, m);
cd6a8513 1562 intel_runtime_pm_put(engine->uncore->rpm, wakeref);
3ceda3a4
CW
1563 } else {
1564 drm_printf(m, "\tDevice is asleep; skipping register dump\n");
1565 }
f636edb2 1566
0212bdef 1567 intel_execlists_show_requests(engine, m, print_request, 8);
a27d5a44 1568
c1bf2728 1569 drm_printf(m, "HWSP:\n");
0ca88ba0 1570 hexdump(m, engine->status_page.addr, PAGE_SIZE);
c1bf2728 1571
c400cc2a 1572 drm_printf(m, "Idle? %s\n", yesno(intel_engine_is_idle(engine)));
52c0fdb2
CW
1573
1574 intel_engine_print_breadcrumbs(engine, m);
f636edb2
CW
1575}
1576
30e17b78
TU
1577/**
1578 * intel_enable_engine_stats() - Enable engine busy tracking on engine
1579 * @engine: engine to enable stats collection
1580 *
1581 * Start collecting the engine busyness data for @engine.
1582 *
1583 * Returns 0 on success or a negative error code.
1584 */
1585int intel_enable_engine_stats(struct intel_engine_cs *engine)
1586{
99e48bf9 1587 struct intel_engine_execlists *execlists = &engine->execlists;
30e17b78 1588 unsigned long flags;
99e48bf9 1589 int err = 0;
30e17b78 1590
cf669b4e 1591 if (!intel_engine_supports_stats(engine))
30e17b78
TU
1592 return -ENODEV;
1593
c36eebd9
CW
1594 execlists_active_lock_bh(execlists);
1595 write_seqlock_irqsave(&engine->stats.lock, flags);
99e48bf9
CW
1596
1597 if (unlikely(engine->stats.enabled == ~0)) {
1598 err = -EBUSY;
1599 goto unlock;
1600 }
1601
4900727d 1602 if (engine->stats.enabled++ == 0) {
22b7a426
CW
1603 struct i915_request * const *port;
1604 struct i915_request *rq;
4900727d 1605
30e17b78 1606 engine->stats.enabled_at = ktime_get();
4900727d
CW
1607
1608 /* XXX submission method oblivious? */
22b7a426 1609 for (port = execlists->active; (rq = *port); port++)
4900727d 1610 engine->stats.active++;
22b7a426
CW
1611
1612 for (port = execlists->pending; (rq = *port); port++) {
1613 /* Exclude any contexts already counted in active */
9f3ccd40 1614 if (!intel_context_inflight_count(rq->context))
22b7a426 1615 engine->stats.active++;
4900727d
CW
1616 }
1617
1618 if (engine->stats.active)
1619 engine->stats.start = engine->stats.enabled_at;
1620 }
30e17b78 1621
99e48bf9 1622unlock:
c36eebd9
CW
1623 write_sequnlock_irqrestore(&engine->stats.lock, flags);
1624 execlists_active_unlock_bh(execlists);
30e17b78 1625
99e48bf9 1626 return err;
30e17b78
TU
1627}
1628
1629static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine)
1630{
1631 ktime_t total = engine->stats.total;
1632
1633 /*
1634 * If the engine is executing something at the moment
1635 * add it to the total.
1636 */
1637 if (engine->stats.active)
1638 total = ktime_add(total,
1639 ktime_sub(ktime_get(), engine->stats.start));
1640
1641 return total;
1642}
1643
1644/**
1645 * intel_engine_get_busy_time() - Return current accumulated engine busyness
1646 * @engine: engine to report on
1647 *
1648 * Returns accumulated time @engine was busy since engine stats were enabled.
1649 */
1650ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine)
1651{
741258cd 1652 unsigned int seq;
30e17b78 1653 ktime_t total;
30e17b78 1654
741258cd
TU
1655 do {
1656 seq = read_seqbegin(&engine->stats.lock);
1657 total = __intel_engine_get_busy_time(engine);
1658 } while (read_seqretry(&engine->stats.lock, seq));
30e17b78
TU
1659
1660 return total;
1661}
1662
1663/**
1664 * intel_disable_engine_stats() - Disable engine busy tracking on engine
1665 * @engine: engine to disable stats collection
1666 *
1667 * Stops collecting the engine busyness data for @engine.
1668 */
1669void intel_disable_engine_stats(struct intel_engine_cs *engine)
1670{
1671 unsigned long flags;
1672
cf669b4e 1673 if (!intel_engine_supports_stats(engine))
30e17b78
TU
1674 return;
1675
741258cd 1676 write_seqlock_irqsave(&engine->stats.lock, flags);
30e17b78
TU
1677 WARN_ON_ONCE(engine->stats.enabled == 0);
1678 if (--engine->stats.enabled == 0) {
1679 engine->stats.total = __intel_engine_get_busy_time(engine);
1680 engine->stats.active = 0;
1681 }
741258cd 1682 write_sequnlock_irqrestore(&engine->stats.lock, flags);
30e17b78
TU
1683}
1684
cf4331dd
CW
1685static bool match_ring(struct i915_request *rq)
1686{
baba6e57 1687 u32 ring = ENGINE_READ(rq->engine, RING_START);
cf4331dd
CW
1688
1689 return ring == i915_ggtt_offset(rq->ring->vma);
1690}
1691
1692struct i915_request *
1693intel_engine_find_active_request(struct intel_engine_cs *engine)
1694{
1695 struct i915_request *request, *active = NULL;
cf4331dd
CW
1696
1697 /*
1698 * We are called by the error capture, reset and to dump engine
1699 * state at random points in time. In particular, note that neither is
1700 * crucially ordered with an interrupt. After a hang, the GPU is dead
1701 * and we assume that no more writes can happen (we waited long enough
1702 * for all writes that were in transaction to be flushed) - adding an
1703 * extra delay for a recent interrupt is pointless. Hence, we do
1704 * not need an engine->irq_seqno_barrier() before the seqno reads.
1705 * At all other times, we must assume the GPU is still running, but
1706 * we only care about the snapshot of this moment.
1707 */
cfe7288c 1708 lockdep_assert_held(&engine->active.lock);
94523024
CW
1709
1710 rcu_read_lock();
1711 request = execlists_active(&engine->execlists);
1712 if (request) {
1713 struct intel_timeline *tl = request->context->timeline;
1714
1715 list_for_each_entry_from_reverse(request, &tl->requests, link) {
1716 if (i915_request_completed(request))
1717 break;
1718
1719 active = request;
1720 }
1721 }
1722 rcu_read_unlock();
1723 if (active)
1724 return active;
1725
422d7df4 1726 list_for_each_entry(request, &engine->active.requests, sched.link) {
cf4331dd
CW
1727 if (i915_request_completed(request))
1728 continue;
1729
1730 if (!i915_request_started(request))
422d7df4 1731 continue;
cf4331dd
CW
1732
1733 /* More than one preemptible request may match! */
1734 if (!match_ring(request))
422d7df4 1735 continue;
cf4331dd
CW
1736
1737 active = request;
1738 break;
1739 }
cf4331dd
CW
1740
1741 return active;
1742}
1743
f97fbf96 1744#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
38775829 1745#include "mock_engine.c"
c7302f20 1746#include "selftest_engine.c"
112ed2d3 1747#include "selftest_engine_cs.c"
f97fbf96 1748#endif