]>
Commit | Line | Data |
---|---|---|
d897a111 MW |
1 | /* |
2 | * SPDX-License-Identifier: MIT | |
3 | * | |
4 | * Copyright � 2008-2018 Intel Corporation | |
5 | */ | |
6 | ||
7 | #ifndef _I915_GPU_ERROR_H_ | |
8 | #define _I915_GPU_ERROR_H_ | |
9 | ||
10 | #include <linux/kref.h> | |
11 | #include <linux/ktime.h> | |
12 | #include <linux/sched.h> | |
13 | ||
14 | #include <drm/drm_mm.h> | |
15 | ||
16 | #include "intel_device_info.h" | |
17 | #include "intel_ringbuffer.h" | |
18 | #include "intel_uc_fw.h" | |
19 | ||
20 | #include "i915_gem.h" | |
21 | #include "i915_gem_gtt.h" | |
22 | #include "i915_params.h" | |
b7268c5e | 23 | #include "i915_scheduler.h" |
d897a111 MW |
24 | |
25 | struct drm_i915_private; | |
26 | struct intel_overlay_error_state; | |
27 | struct intel_display_error_state; | |
28 | ||
29 | struct i915_gpu_state { | |
30 | struct kref ref; | |
31 | ktime_t time; | |
32 | ktime_t boottime; | |
33 | ktime_t uptime; | |
043477b0 MK |
34 | unsigned long capture; |
35 | unsigned long epoch; | |
d897a111 MW |
36 | |
37 | struct drm_i915_private *i915; | |
38 | ||
39 | char error_msg[128]; | |
40 | bool simulated; | |
41 | bool awake; | |
42 | bool wakelock; | |
43 | bool suspended; | |
44 | int iommu; | |
45 | u32 reset_count; | |
46 | u32 suspend_count; | |
47 | struct intel_device_info device_info; | |
0258404f | 48 | struct intel_runtime_info runtime_info; |
d897a111 MW |
49 | struct intel_driver_caps driver_caps; |
50 | struct i915_params params; | |
51 | ||
52 | struct i915_error_uc { | |
53 | struct intel_uc_fw guc_fw; | |
54 | struct intel_uc_fw huc_fw; | |
55 | struct drm_i915_error_object *guc_log; | |
56 | } uc; | |
57 | ||
58 | /* Generic register state */ | |
59 | u32 eir; | |
60 | u32 pgtbl_er; | |
61 | u32 ier; | |
6b7a6a7b | 62 | u32 gtier[6], ngtier; |
d897a111 MW |
63 | u32 ccid; |
64 | u32 derrmr; | |
65 | u32 forcewake; | |
66 | u32 error; /* gen6+ */ | |
67 | u32 err_int; /* gen7 */ | |
68 | u32 fault_data0; /* gen8, gen9 */ | |
69 | u32 fault_data1; /* gen8, gen9 */ | |
70 | u32 done_reg; | |
71 | u32 gac_eco; | |
72 | u32 gam_ecochk; | |
73 | u32 gab_ctl; | |
74 | u32 gfx_mode; | |
75 | ||
76 | u32 nfence; | |
77 | u64 fence[I915_MAX_NUM_FENCES]; | |
78 | struct intel_overlay_error_state *overlay; | |
79 | struct intel_display_error_state *display; | |
80 | ||
81 | struct drm_i915_error_engine { | |
82 | int engine_id; | |
83 | /* Software tracked state */ | |
84 | bool idle; | |
d897a111 | 85 | unsigned long hangcheck_timestamp; |
d897a111 MW |
86 | struct i915_address_space *vm; |
87 | int num_requests; | |
88 | u32 reset_count; | |
89 | ||
90 | /* position of active request inside the ring */ | |
91 | u32 rq_head, rq_post, rq_tail; | |
92 | ||
93 | /* our own tracking of ring head and tail */ | |
94 | u32 cpu_ring_head; | |
95 | u32 cpu_ring_tail; | |
96 | ||
d897a111 MW |
97 | /* Register state */ |
98 | u32 start; | |
99 | u32 tail; | |
100 | u32 head; | |
101 | u32 ctl; | |
102 | u32 mode; | |
103 | u32 hws; | |
104 | u32 ipeir; | |
105 | u32 ipehr; | |
106 | u32 bbstate; | |
107 | u32 instpm; | |
108 | u32 instps; | |
d897a111 MW |
109 | u64 bbaddr; |
110 | u64 acthd; | |
111 | u32 fault_reg; | |
112 | u64 faddr; | |
113 | u32 rc_psmi; /* sleep state */ | |
d897a111 MW |
114 | struct intel_instdone instdone; |
115 | ||
116 | struct drm_i915_error_context { | |
117 | char comm[TASK_COMM_LEN]; | |
118 | pid_t pid; | |
d897a111 | 119 | u32 hw_id; |
d897a111 MW |
120 | int active; |
121 | int guilty; | |
b7268c5e | 122 | struct i915_sched_attr sched_attr; |
d897a111 MW |
123 | } context; |
124 | ||
125 | struct drm_i915_error_object { | |
126 | u64 gtt_offset; | |
127 | u64 gtt_size; | |
83bc0f5b | 128 | int num_pages; |
d897a111 MW |
129 | int page_count; |
130 | int unused; | |
131 | u32 *pages[0]; | |
132 | } *ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page; | |
133 | ||
134 | struct drm_i915_error_object **user_bo; | |
135 | long user_bo_count; | |
136 | ||
137 | struct drm_i915_error_object *wa_ctx; | |
138 | struct drm_i915_error_object *default_state; | |
139 | ||
140 | struct drm_i915_error_request { | |
52c0fdb2 | 141 | unsigned long flags; |
d897a111 MW |
142 | long jiffies; |
143 | pid_t pid; | |
144 | u32 context; | |
d897a111 | 145 | u32 seqno; |
3a068721 | 146 | u32 start; |
d897a111 MW |
147 | u32 head; |
148 | u32 tail; | |
b7268c5e | 149 | struct i915_sched_attr sched_attr; |
d897a111 MW |
150 | } *requests, execlist[EXECLIST_MAX_PORTS]; |
151 | unsigned int num_ports; | |
152 | ||
d897a111 MW |
153 | struct { |
154 | u32 gfx_mode; | |
155 | union { | |
156 | u64 pdp[4]; | |
157 | u32 pp_dir_base; | |
158 | }; | |
159 | } vm_info; | |
160 | } engine[I915_NUM_ENGINES]; | |
161 | ||
162 | struct drm_i915_error_buffer { | |
163 | u32 size; | |
164 | u32 name; | |
d897a111 MW |
165 | u64 gtt_offset; |
166 | u32 read_domains; | |
167 | u32 write_domain; | |
168 | s32 fence_reg:I915_MAX_NUM_FENCE_BITS; | |
169 | u32 tiling:2; | |
170 | u32 dirty:1; | |
171 | u32 purgeable:1; | |
172 | u32 userptr:1; | |
d897a111 MW |
173 | u32 cache_level:3; |
174 | } *active_bo[I915_NUM_ENGINES], *pinned_bo; | |
175 | u32 active_bo_count[I915_NUM_ENGINES], pinned_bo_count; | |
176 | struct i915_address_space *active_vm[I915_NUM_ENGINES]; | |
0e39037b CW |
177 | |
178 | struct scatterlist *sgl, *fit; | |
d897a111 MW |
179 | }; |
180 | ||
eb8d0f5a CW |
181 | struct i915_gpu_restart; |
182 | ||
d897a111 MW |
183 | struct i915_gpu_error { |
184 | /* For hangcheck timer */ | |
185 | #define DRM_I915_HANGCHECK_PERIOD 1500 /* in ms */ | |
186 | #define DRM_I915_HANGCHECK_JIFFIES msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD) | |
187 | ||
188 | struct delayed_work hangcheck_work; | |
189 | ||
190 | /* For reset and error_state handling. */ | |
191 | spinlock_t lock; | |
192 | /* Protected by the above dev->gpu_error.lock. */ | |
193 | struct i915_gpu_state *first_error; | |
194 | ||
195 | atomic_t pending_fb_pin; | |
196 | ||
d897a111 MW |
197 | /** |
198 | * flags: Control various stages of the GPU reset | |
199 | * | |
2caffbf1 CW |
200 | * #I915_RESET_BACKOFF - When we start a global reset, we need to |
201 | * serialise with any other users attempting to do the same, and | |
202 | * any global resources that may be clobber by the reset (such as | |
203 | * FENCE registers). | |
d897a111 | 204 | * |
d897a111 MW |
205 | * #I915_RESET_ENGINE[num_engines] - Since the driver doesn't need to |
206 | * acquire the struct_mutex to reset an engine, we need an explicit | |
207 | * flag to prevent two concurrent reset attempts in the same engine. | |
208 | * As the number of engines continues to grow, allocate the flags from | |
209 | * the most significant bits. | |
210 | * | |
211 | * #I915_WEDGED - If reset fails and we can no longer use the GPU, | |
212 | * we set the #I915_WEDGED bit. Prior to command submission, e.g. | |
213 | * i915_request_alloc(), this bit is checked and the sequence | |
214 | * aborted (with -EIO reported to userspace) if set. | |
215 | */ | |
216 | unsigned long flags; | |
217 | #define I915_RESET_BACKOFF 0 | |
eb8d0f5a CW |
218 | #define I915_RESET_MODESET 1 |
219 | #define I915_RESET_ENGINE 2 | |
d897a111 | 220 | #define I915_WEDGED (BITS_PER_LONG - 1) |
d897a111 | 221 | |
2caffbf1 CW |
222 | /** Number of times the device has been reset (global) */ |
223 | u32 reset_count; | |
224 | ||
d897a111 MW |
225 | /** Number of times an engine has been reset */ |
226 | u32 reset_engine_count[I915_NUM_ENGINES]; | |
227 | ||
18bb2bcc CW |
228 | struct mutex wedge_mutex; /* serialises wedging/unwedging */ |
229 | ||
d897a111 MW |
230 | /** |
231 | * Waitqueue to signal when a hang is detected. Used to for waiters | |
232 | * to release the struct_mutex for the reset to procede. | |
233 | */ | |
234 | wait_queue_head_t wait_queue; | |
235 | ||
236 | /** | |
237 | * Waitqueue to signal when the reset has completed. Used by clients | |
238 | * that wait for dev_priv->mm.wedged to settle. | |
239 | */ | |
240 | wait_queue_head_t reset_queue; | |
241 | ||
2caffbf1 CW |
242 | struct srcu_struct reset_backoff_srcu; |
243 | ||
eb8d0f5a | 244 | struct i915_gpu_restart *restart; |
d897a111 MW |
245 | }; |
246 | ||
247 | struct drm_i915_error_state_buf { | |
248 | struct drm_i915_private *i915; | |
0e39037b CW |
249 | struct scatterlist *sgl, *cur, *end; |
250 | ||
251 | char *buf; | |
252 | size_t bytes; | |
253 | size_t size; | |
254 | loff_t iter; | |
255 | ||
d897a111 | 256 | int err; |
d897a111 MW |
257 | }; |
258 | ||
259 | #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) | |
260 | ||
261 | __printf(2, 3) | |
262 | void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...); | |
d897a111 MW |
263 | |
264 | struct i915_gpu_state *i915_capture_gpu_state(struct drm_i915_private *i915); | |
265 | void i915_capture_error_state(struct drm_i915_private *dev_priv, | |
3a891a62 | 266 | intel_engine_mask_t engine_mask, |
d897a111 MW |
267 | const char *error_msg); |
268 | ||
269 | static inline struct i915_gpu_state * | |
270 | i915_gpu_state_get(struct i915_gpu_state *gpu) | |
271 | { | |
272 | kref_get(&gpu->ref); | |
273 | return gpu; | |
274 | } | |
275 | ||
0e39037b CW |
276 | ssize_t i915_gpu_state_copy_to_buffer(struct i915_gpu_state *error, |
277 | char *buf, loff_t offset, size_t count); | |
278 | ||
d897a111 MW |
279 | void __i915_gpu_state_free(struct kref *kref); |
280 | static inline void i915_gpu_state_put(struct i915_gpu_state *gpu) | |
281 | { | |
282 | if (gpu) | |
283 | kref_put(&gpu->ref, __i915_gpu_state_free); | |
284 | } | |
285 | ||
286 | struct i915_gpu_state *i915_first_error_state(struct drm_i915_private *i915); | |
287 | void i915_reset_error_state(struct drm_i915_private *i915); | |
fb6f0b64 | 288 | void i915_disable_error_state(struct drm_i915_private *i915, int err); |
d897a111 MW |
289 | |
290 | #else | |
291 | ||
292 | static inline void i915_capture_error_state(struct drm_i915_private *dev_priv, | |
293 | u32 engine_mask, | |
294 | const char *error_msg) | |
295 | { | |
296 | } | |
297 | ||
298 | static inline struct i915_gpu_state * | |
299 | i915_first_error_state(struct drm_i915_private *i915) | |
300 | { | |
fb6f0b64 | 301 | return ERR_PTR(-ENODEV); |
d897a111 MW |
302 | } |
303 | ||
304 | static inline void i915_reset_error_state(struct drm_i915_private *i915) | |
305 | { | |
306 | } | |
307 | ||
fb6f0b64 CW |
308 | static inline void i915_disable_error_state(struct drm_i915_private *i915, |
309 | int err) | |
310 | { | |
311 | } | |
312 | ||
d897a111 MW |
313 | #endif /* IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) */ |
314 | ||
315 | #endif /* _I915_GPU_ERROR_H_ */ |