1 /* SPDX-License-Identifier: MIT */
3 * Copyright © 2019 Intel Corporation
6 #ifndef _I915_PERF_TYPES_H_
7 #define _I915_PERF_TYPES_H_
9 #include <linux/atomic.h>
10 #include <linux/device.h>
11 #include <linux/hrtimer.h>
12 #include <linux/llist.h>
13 #include <linux/poll.h>
14 #include <linux/sysfs.h>
15 #include <linux/types.h>
16 #include <linux/uuid.h>
17 #include <linux/wait.h>
18 #include <uapi/drm/i915_drm.h>
20 #include "gt/intel_engine_types.h"
21 #include "gt/intel_sseu.h"
22 #include "i915_reg_defs.h"
23 #include "intel_uncore.h"
24 #include "intel_wakeref.h"
26 struct drm_i915_private
;
29 struct i915_gem_context
;
33 struct intel_engine_cs
;
37 PERF_GROUP_OAM_SAMEDIA_0
= 0,
40 PERF_GROUP_INVALID
= U32_MAX
,
48 struct i915_perf_regs
{
50 i915_reg_t oa_head_ptr
;
51 i915_reg_t oa_tail_ptr
;
53 i915_reg_t oa_ctx_ctrl
;
57 u32 oa_ctrl_counter_format_shift
;
65 struct i915_oa_format
{
69 enum report_header header
;
77 struct i915_oa_config
{
78 struct i915_perf
*perf
;
80 char uuid
[UUID_STRING_LEN
+ 1];
83 const struct i915_oa_reg
*mux_regs
;
85 const struct i915_oa_reg
*b_counter_regs
;
86 u32 b_counter_regs_len
;
87 const struct i915_oa_reg
*flex_regs
;
90 struct attribute_group sysfs_metric
;
91 struct attribute
*attrs
[2];
92 struct kobj_attribute sysfs_metric_id
;
98 struct i915_perf_stream
;
101 * struct i915_perf_stream_ops - the OPs to support a specific stream type
103 struct i915_perf_stream_ops
{
105 * @enable: Enables the collection of HW samples, either in response to
106 * `I915_PERF_IOCTL_ENABLE` or implicitly called when stream is opened
107 * without `I915_PERF_FLAG_DISABLED`.
109 void (*enable
)(struct i915_perf_stream
*stream
);
112 * @disable: Disables the collection of HW samples, either in response
113 * to `I915_PERF_IOCTL_DISABLE` or implicitly called before destroying
116 void (*disable
)(struct i915_perf_stream
*stream
);
119 * @poll_wait: Call poll_wait, passing a wait queue that will be woken
120 * once there is something ready to read() for the stream
122 void (*poll_wait
)(struct i915_perf_stream
*stream
,
127 * @wait_unlocked: For handling a blocking read, wait until there is
128 * something to ready to read() for the stream. E.g. wait on the same
129 * wait queue that would be passed to poll_wait().
131 int (*wait_unlocked
)(struct i915_perf_stream
*stream
);
134 * @read: Copy buffered metrics as records to userspace
135 * **buf**: the userspace, destination buffer
136 * **count**: the number of bytes to copy, requested by userspace
137 * **offset**: zero at the start of the read, updated as the read
138 * proceeds, it represents how many bytes have been copied so far and
139 * the buffer offset for copying the next record.
141 * Copy as many buffered i915 perf samples and records for this stream
142 * to userspace as will fit in the given buffer.
144 * Only write complete records; returning -%ENOSPC if there isn't room
145 * for a complete record.
147 * Return any error condition that results in a short read such as
148 * -%ENOSPC or -%EFAULT, even though these may be squashed before
149 * returning to userspace.
151 int (*read
)(struct i915_perf_stream
*stream
,
157 * @destroy: Cleanup any stream specific resources.
159 * The stream will always be disabled before this is called.
161 void (*destroy
)(struct i915_perf_stream
*stream
);
165 * struct i915_perf_stream - state for a single open stream FD
167 struct i915_perf_stream
{
169 * @perf: i915_perf backpointer
171 struct i915_perf
*perf
;
174 * @uncore: mmio access path
176 struct intel_uncore
*uncore
;
179 * @engine: Engine associated with this performance stream.
181 struct intel_engine_cs
*engine
;
184 * @lock: Lock associated with operations on stream
189 * @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*`
190 * properties given when opening a stream, representing the contents
191 * of a single sample as read() by userspace.
196 * @sample_size: Considering the configured contents of a sample
197 * combined with the required header size, this is the total size
198 * of a single sample record.
203 * @ctx: %NULL if measuring system-wide across all contexts or a
204 * specific context that is being monitored.
206 struct i915_gem_context
*ctx
;
209 * @enabled: Whether the stream is currently enabled, considering
210 * whether the stream was opened in a disabled state and based
211 * on `I915_PERF_IOCTL_ENABLE` and `I915_PERF_IOCTL_DISABLE` calls.
216 * @hold_preemption: Whether preemption is put on hold for command
217 * submissions done on the @ctx. This is useful for some drivers that
218 * cannot easily post process the OA buffer context to subtract delta
219 * of performance counters not associated with @ctx.
221 bool hold_preemption
;
224 * @ops: The callbacks providing the implementation of this specific
225 * type of configured stream.
227 const struct i915_perf_stream_ops
*ops
;
230 * @oa_config: The OA configuration used by the stream.
232 struct i915_oa_config
*oa_config
;
235 * @oa_config_bos: A list of struct i915_oa_config_bo allocated lazily
236 * each time @oa_config changes.
238 struct llist_head oa_config_bos
;
241 * @pinned_ctx: The OA context specific information.
243 struct intel_context
*pinned_ctx
;
246 * @specific_ctx_id: The id of the specific context.
251 * @specific_ctx_id_mask: The mask used to masking specific_ctx_id bits.
253 u32 specific_ctx_id_mask
;
256 * @poll_check_timer: High resolution timer that will periodically
257 * check for data in the circular OA buffer for notifying userspace
258 * (e.g. during a read() or poll()).
260 struct hrtimer poll_check_timer
;
263 * @poll_wq: The wait queue that hrtimer callback wakes when it
264 * sees data ready to read in the circular OA buffer.
266 wait_queue_head_t poll_wq
;
269 * @pollin: Whether there is data available to read.
274 * @periodic: Whether periodic sampling is currently enabled.
279 * @period_exponent: The OA unit sampling frequency is derived from this.
284 * @oa_buffer: State of the OA buffer.
287 const struct i915_oa_format
*format
;
288 struct i915_vma
*vma
;
294 * @oa_buffer.ptr_lock: Locks reads and writes to all
297 * Consider: the head and tail pointer state needs to be read
298 * consistently from a hrtimer callback (atomic context) and
299 * read() fop (user context) with tail pointer updates happening
300 * in atomic context and head updates in user context and the
301 * (unlikely) possibility of read() errors needing to reset all
304 * Note: Contention/performance aren't currently a significant
305 * concern here considering the relatively low frequency of
306 * hrtimer callbacks (5ms period) and that reads typically only
307 * happen in response to a hrtimer event and likely complete
308 * before the next callback.
310 * Note: This lock is not held *while* reading and copying data
311 * to userspace so the value of head observed in htrimer
312 * callbacks won't represent any partial consumption of data.
317 * @oa_buffer.head: Although we can always read back
318 * the head pointer register,
319 * we prefer to avoid trusting the HW state, just to avoid any
320 * risk that some hardware condition could * somehow bump the
321 * head pointer unpredictably and cause us to forward the wrong
322 * OA buffer data to userspace.
327 * @oa_buffer.tail: The last verified tail that can be
334 * @noa_wait: A batch buffer doing a wait on the GPU for the NOA logic to be
337 struct i915_vma
*noa_wait
;
340 * @poll_oa_period: The period in nanoseconds at which the OA
341 * buffer should be checked for available data.
347 * struct i915_oa_ops - Gen specific implementation of an OA unit stream
351 * @is_valid_b_counter_reg: Validates register's address for
352 * programming boolean counters for a particular platform.
354 bool (*is_valid_b_counter_reg
)(struct i915_perf
*perf
, u32 addr
);
357 * @is_valid_mux_reg: Validates register's address for programming mux
358 * for a particular platform.
360 bool (*is_valid_mux_reg
)(struct i915_perf
*perf
, u32 addr
);
363 * @is_valid_flex_reg: Validates register's address for programming
364 * flex EU filtering for a particular platform.
366 bool (*is_valid_flex_reg
)(struct i915_perf
*perf
, u32 addr
);
369 * @enable_metric_set: Selects and applies any MUX configuration to set
370 * up the Boolean and Custom (B/C) counters that are part of the
371 * counter reports being sampled. May apply system constraints such as
372 * disabling EU clock gating as required.
374 int (*enable_metric_set
)(struct i915_perf_stream
*stream
,
375 struct i915_active
*active
);
378 * @disable_metric_set: Remove system constraints associated with using
381 void (*disable_metric_set
)(struct i915_perf_stream
*stream
);
384 * @oa_enable: Enable periodic sampling
386 void (*oa_enable
)(struct i915_perf_stream
*stream
);
389 * @oa_disable: Disable periodic sampling
391 void (*oa_disable
)(struct i915_perf_stream
*stream
);
394 * @read: Copy data from the circular OA buffer into a given userspace
397 int (*read
)(struct i915_perf_stream
*stream
,
403 * @oa_hw_tail_read: read the OA tail pointer register
405 * In particular this enables us to share all the fiddly code for
406 * handling the OA unit tail pointer race that affects multiple
409 u32 (*oa_hw_tail_read
)(struct i915_perf_stream
*stream
);
412 struct i915_perf_group
{
414 * @exclusive_stream: The stream currently using the OA unit. This is
415 * sometimes accessed outside a syscall associated to its file
418 struct i915_perf_stream
*exclusive_stream
;
421 * @num_engines: The number of engines using this OA unit.
426 * @regs: OA buffer register group for programming the OA unit.
428 struct i915_perf_regs regs
;
431 * @type: Type of OA unit - OAM, OAG etc.
436 struct i915_perf_gt
{
438 * Lock associated with anything below within this structure.
443 * @sseu: sseu configuration selected to run while perf is active,
444 * applies to all contexts.
446 struct intel_sseu sseu
;
449 * @num_perf_groups: number of perf groups per gt.
454 * @group: list of OA groups - one for each OA buffer.
456 struct i915_perf_group
*group
;
460 struct drm_i915_private
*i915
;
462 struct kobject
*metrics_kobj
;
465 * Lock associated with adding/modifying/removing OA configs
466 * in perf->metrics_idr.
468 struct mutex metrics_lock
;
471 * List of dynamic configurations (struct i915_oa_config), you
472 * need to hold perf->metrics_lock to access it.
474 struct idr metrics_idr
;
477 * For rate limiting any notifications of spurious
480 struct ratelimit_state spurious_report_rs
;
483 * For rate limiting any notifications of tail pointer
486 struct ratelimit_state tail_pointer_race
;
488 u32 gen7_latched_oastatus1
;
489 u32 ctx_oactxctrl_offset
;
490 u32 ctx_flexeu0_offset
;
493 * The RPT_ID/reason field for Gen8+ includes a bit
494 * to determine if the CTX ID in the report is valid
495 * but the specific bit differs between Gen 8 and 9
497 u32 gen8_valid_ctx_bit
;
499 struct i915_oa_ops ops
;
500 const struct i915_oa_format
*oa_formats
;
503 * Use a format mask to store the supported formats
506 #define FORMAT_MASK_SIZE DIV_ROUND_UP(I915_OA_FORMAT_MAX - 1, BITS_PER_LONG)
507 unsigned long format_mask
[FORMAT_MASK_SIZE
];
509 atomic64_t noa_programming_delay
;
512 #endif /* _I915_PERF_TYPES_H_ */