]> git.ipfire.org Git - thirdparty/kernel/stable.git/blame - drivers/gpu/drm/i915/i915_gpu_error.h
Merge tag 'pwm/for-5.2-rc1' of ssh://gitolite.kernel.org/pub/scm/linux/kernel/git...
[thirdparty/kernel/stable.git] / drivers / gpu / drm / i915 / i915_gpu_error.h
CommitLineData
d897a111
MW
1/*
2 * SPDX-License-Identifier: MIT
3 *
4 * Copyright � 2008-2018 Intel Corporation
5 */
6
7#ifndef _I915_GPU_ERROR_H_
8#define _I915_GPU_ERROR_H_
9
10#include <linux/kref.h>
11#include <linux/ktime.h>
12#include <linux/sched.h>
13
14#include <drm/drm_mm.h>
15
16#include "intel_device_info.h"
17#include "intel_ringbuffer.h"
18#include "intel_uc_fw.h"
19
20#include "i915_gem.h"
21#include "i915_gem_gtt.h"
22#include "i915_params.h"
b7268c5e 23#include "i915_scheduler.h"
d897a111
MW
24
25struct drm_i915_private;
26struct intel_overlay_error_state;
27struct intel_display_error_state;
28
29struct i915_gpu_state {
30 struct kref ref;
31 ktime_t time;
32 ktime_t boottime;
33 ktime_t uptime;
043477b0
MK
34 unsigned long capture;
35 unsigned long epoch;
d897a111
MW
36
37 struct drm_i915_private *i915;
38
39 char error_msg[128];
40 bool simulated;
41 bool awake;
42 bool wakelock;
43 bool suspended;
44 int iommu;
45 u32 reset_count;
46 u32 suspend_count;
47 struct intel_device_info device_info;
0258404f 48 struct intel_runtime_info runtime_info;
d897a111
MW
49 struct intel_driver_caps driver_caps;
50 struct i915_params params;
51
52 struct i915_error_uc {
53 struct intel_uc_fw guc_fw;
54 struct intel_uc_fw huc_fw;
55 struct drm_i915_error_object *guc_log;
56 } uc;
57
58 /* Generic register state */
59 u32 eir;
60 u32 pgtbl_er;
61 u32 ier;
6b7a6a7b 62 u32 gtier[6], ngtier;
d897a111
MW
63 u32 ccid;
64 u32 derrmr;
65 u32 forcewake;
66 u32 error; /* gen6+ */
67 u32 err_int; /* gen7 */
68 u32 fault_data0; /* gen8, gen9 */
69 u32 fault_data1; /* gen8, gen9 */
70 u32 done_reg;
71 u32 gac_eco;
72 u32 gam_ecochk;
73 u32 gab_ctl;
74 u32 gfx_mode;
75
76 u32 nfence;
77 u64 fence[I915_MAX_NUM_FENCES];
78 struct intel_overlay_error_state *overlay;
79 struct intel_display_error_state *display;
80
81 struct drm_i915_error_engine {
82 int engine_id;
83 /* Software tracked state */
84 bool idle;
d897a111 85 unsigned long hangcheck_timestamp;
d897a111
MW
86 struct i915_address_space *vm;
87 int num_requests;
88 u32 reset_count;
89
90 /* position of active request inside the ring */
91 u32 rq_head, rq_post, rq_tail;
92
93 /* our own tracking of ring head and tail */
94 u32 cpu_ring_head;
95 u32 cpu_ring_tail;
96
d897a111
MW
97 /* Register state */
98 u32 start;
99 u32 tail;
100 u32 head;
101 u32 ctl;
102 u32 mode;
103 u32 hws;
104 u32 ipeir;
105 u32 ipehr;
106 u32 bbstate;
107 u32 instpm;
108 u32 instps;
d897a111
MW
109 u64 bbaddr;
110 u64 acthd;
111 u32 fault_reg;
112 u64 faddr;
113 u32 rc_psmi; /* sleep state */
d897a111
MW
114 struct intel_instdone instdone;
115
116 struct drm_i915_error_context {
117 char comm[TASK_COMM_LEN];
118 pid_t pid;
d897a111 119 u32 hw_id;
d897a111
MW
120 int active;
121 int guilty;
b7268c5e 122 struct i915_sched_attr sched_attr;
d897a111
MW
123 } context;
124
125 struct drm_i915_error_object {
126 u64 gtt_offset;
127 u64 gtt_size;
83bc0f5b 128 int num_pages;
d897a111
MW
129 int page_count;
130 int unused;
131 u32 *pages[0];
132 } *ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page;
133
134 struct drm_i915_error_object **user_bo;
135 long user_bo_count;
136
137 struct drm_i915_error_object *wa_ctx;
138 struct drm_i915_error_object *default_state;
139
140 struct drm_i915_error_request {
52c0fdb2 141 unsigned long flags;
d897a111
MW
142 long jiffies;
143 pid_t pid;
144 u32 context;
d897a111 145 u32 seqno;
3a068721 146 u32 start;
d897a111
MW
147 u32 head;
148 u32 tail;
b7268c5e 149 struct i915_sched_attr sched_attr;
d897a111
MW
150 } *requests, execlist[EXECLIST_MAX_PORTS];
151 unsigned int num_ports;
152
d897a111
MW
153 struct {
154 u32 gfx_mode;
155 union {
156 u64 pdp[4];
157 u32 pp_dir_base;
158 };
159 } vm_info;
160 } engine[I915_NUM_ENGINES];
161
162 struct drm_i915_error_buffer {
163 u32 size;
164 u32 name;
d897a111
MW
165 u64 gtt_offset;
166 u32 read_domains;
167 u32 write_domain;
168 s32 fence_reg:I915_MAX_NUM_FENCE_BITS;
169 u32 tiling:2;
170 u32 dirty:1;
171 u32 purgeable:1;
172 u32 userptr:1;
d897a111
MW
173 u32 cache_level:3;
174 } *active_bo[I915_NUM_ENGINES], *pinned_bo;
175 u32 active_bo_count[I915_NUM_ENGINES], pinned_bo_count;
176 struct i915_address_space *active_vm[I915_NUM_ENGINES];
0e39037b
CW
177
178 struct scatterlist *sgl, *fit;
d897a111
MW
179};
180
eb8d0f5a
CW
181struct i915_gpu_restart;
182
d897a111
MW
183struct i915_gpu_error {
184 /* For hangcheck timer */
185#define DRM_I915_HANGCHECK_PERIOD 1500 /* in ms */
186#define DRM_I915_HANGCHECK_JIFFIES msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD)
187
188 struct delayed_work hangcheck_work;
189
190 /* For reset and error_state handling. */
191 spinlock_t lock;
192 /* Protected by the above dev->gpu_error.lock. */
193 struct i915_gpu_state *first_error;
194
195 atomic_t pending_fb_pin;
196
d897a111
MW
197 /**
198 * flags: Control various stages of the GPU reset
199 *
2caffbf1
CW
200 * #I915_RESET_BACKOFF - When we start a global reset, we need to
201 * serialise with any other users attempting to do the same, and
202 * any global resources that may be clobber by the reset (such as
203 * FENCE registers).
d897a111 204 *
d897a111
MW
205 * #I915_RESET_ENGINE[num_engines] - Since the driver doesn't need to
206 * acquire the struct_mutex to reset an engine, we need an explicit
207 * flag to prevent two concurrent reset attempts in the same engine.
208 * As the number of engines continues to grow, allocate the flags from
209 * the most significant bits.
210 *
211 * #I915_WEDGED - If reset fails and we can no longer use the GPU,
212 * we set the #I915_WEDGED bit. Prior to command submission, e.g.
213 * i915_request_alloc(), this bit is checked and the sequence
214 * aborted (with -EIO reported to userspace) if set.
215 */
216 unsigned long flags;
217#define I915_RESET_BACKOFF 0
eb8d0f5a
CW
218#define I915_RESET_MODESET 1
219#define I915_RESET_ENGINE 2
d897a111 220#define I915_WEDGED (BITS_PER_LONG - 1)
d897a111 221
2caffbf1
CW
222 /** Number of times the device has been reset (global) */
223 u32 reset_count;
224
d897a111
MW
225 /** Number of times an engine has been reset */
226 u32 reset_engine_count[I915_NUM_ENGINES];
227
18bb2bcc
CW
228 struct mutex wedge_mutex; /* serialises wedging/unwedging */
229
d897a111
MW
230 /**
231 * Waitqueue to signal when a hang is detected. Used to for waiters
232 * to release the struct_mutex for the reset to procede.
233 */
234 wait_queue_head_t wait_queue;
235
236 /**
237 * Waitqueue to signal when the reset has completed. Used by clients
238 * that wait for dev_priv->mm.wedged to settle.
239 */
240 wait_queue_head_t reset_queue;
241
2caffbf1
CW
242 struct srcu_struct reset_backoff_srcu;
243
eb8d0f5a 244 struct i915_gpu_restart *restart;
d897a111
MW
245};
246
247struct drm_i915_error_state_buf {
248 struct drm_i915_private *i915;
0e39037b
CW
249 struct scatterlist *sgl, *cur, *end;
250
251 char *buf;
252 size_t bytes;
253 size_t size;
254 loff_t iter;
255
d897a111 256 int err;
d897a111
MW
257};
258
259#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
260
261__printf(2, 3)
262void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...);
d897a111
MW
263
264struct i915_gpu_state *i915_capture_gpu_state(struct drm_i915_private *i915);
265void i915_capture_error_state(struct drm_i915_private *dev_priv,
3a891a62 266 intel_engine_mask_t engine_mask,
d897a111
MW
267 const char *error_msg);
268
269static inline struct i915_gpu_state *
270i915_gpu_state_get(struct i915_gpu_state *gpu)
271{
272 kref_get(&gpu->ref);
273 return gpu;
274}
275
0e39037b
CW
276ssize_t i915_gpu_state_copy_to_buffer(struct i915_gpu_state *error,
277 char *buf, loff_t offset, size_t count);
278
d897a111
MW
279void __i915_gpu_state_free(struct kref *kref);
280static inline void i915_gpu_state_put(struct i915_gpu_state *gpu)
281{
282 if (gpu)
283 kref_put(&gpu->ref, __i915_gpu_state_free);
284}
285
286struct i915_gpu_state *i915_first_error_state(struct drm_i915_private *i915);
287void i915_reset_error_state(struct drm_i915_private *i915);
fb6f0b64 288void i915_disable_error_state(struct drm_i915_private *i915, int err);
d897a111
MW
289
290#else
291
292static inline void i915_capture_error_state(struct drm_i915_private *dev_priv,
293 u32 engine_mask,
294 const char *error_msg)
295{
296}
297
298static inline struct i915_gpu_state *
299i915_first_error_state(struct drm_i915_private *i915)
300{
fb6f0b64 301 return ERR_PTR(-ENODEV);
d897a111
MW
302}
303
304static inline void i915_reset_error_state(struct drm_i915_private *i915)
305{
306}
307
fb6f0b64
CW
308static inline void i915_disable_error_state(struct drm_i915_private *i915,
309 int err)
310{
311}
312
d897a111
MW
313#endif /* IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) */
314
315#endif /* _I915_GPU_ERROR_H_ */