]> git.ipfire.org Git - thirdparty/linux.git/blame - drivers/gpu/drm/i915/i915_perf.c
Merge tag 'io_uring-5.7-2020-05-22' of git://git.kernel.dk/linux-block
[thirdparty/linux.git] / drivers / gpu / drm / i915 / i915_perf.c
CommitLineData
eec688e1
RB
1/*
2 * Copyright © 2015-2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Robert Bragg <robert@sixbynine.org>
25 */
26
7abbd8d6
RB
27
28/**
16d98b31 29 * DOC: i915 Perf Overview
7abbd8d6
RB
30 *
31 * Gen graphics supports a large number of performance counters that can help
32 * driver and application developers understand and optimize their use of the
33 * GPU.
34 *
35 * This i915 perf interface enables userspace to configure and open a file
36 * descriptor representing a stream of GPU metrics which can then be read() as
37 * a stream of sample records.
38 *
39 * The interface is particularly suited to exposing buffered metrics that are
40 * captured by DMA from the GPU, unsynchronized with and unrelated to the CPU.
41 *
42 * Streams representing a single context are accessible to applications with a
43 * corresponding drm file descriptor, such that OpenGL can use the interface
44 * without special privileges. Access to system-wide metrics requires root
45 * privileges by default, unless changed via the dev.i915.perf_event_paranoid
46 * sysctl option.
47 *
16d98b31
RB
48 */
49
50/**
51 * DOC: i915 Perf History and Comparison with Core Perf
7abbd8d6
RB
52 *
53 * The interface was initially inspired by the core Perf infrastructure but
54 * some notable differences are:
55 *
56 * i915 perf file descriptors represent a "stream" instead of an "event"; where
57 * a perf event primarily corresponds to a single 64bit value, while a stream
58 * might sample sets of tightly-coupled counters, depending on the
59 * configuration. For example the Gen OA unit isn't designed to support
60 * orthogonal configurations of individual counters; it's configured for a set
61 * of related counters. Samples for an i915 perf stream capturing OA metrics
62 * will include a set of counter values packed in a compact HW specific format.
63 * The OA unit supports a number of different packing formats which can be
64 * selected by the user opening the stream. Perf has support for grouping
65 * events, but each event in the group is configured, validated and
66 * authenticated individually with separate system calls.
67 *
68 * i915 perf stream configurations are provided as an array of u64 (key,value)
69 * pairs, instead of a fixed struct with multiple miscellaneous config members,
70 * interleaved with event-type specific members.
71 *
72 * i915 perf doesn't support exposing metrics via an mmap'd circular buffer.
73 * The supported metrics are being written to memory by the GPU unsynchronized
74 * with the CPU, using HW specific packing formats for counter sets. Sometimes
75 * the constraints on HW configuration require reports to be filtered before it
76 * would be acceptable to expose them to unprivileged applications - to hide
77 * the metrics of other processes/contexts. For these use cases a read() based
78 * interface is a good fit, and provides an opportunity to filter data as it
79 * gets copied from the GPU mapped buffers to userspace buffers.
80 *
81 *
16d98b31
RB
82 * Issues hit with first prototype based on Core Perf
83 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
7abbd8d6
RB
84 *
85 * The first prototype of this driver was based on the core perf
86 * infrastructure, and while we did make that mostly work, with some changes to
87 * perf, we found we were breaking or working around too many assumptions baked
88 * into perf's currently cpu centric design.
89 *
90 * In the end we didn't see a clear benefit to making perf's implementation and
91 * interface more complex by changing design assumptions while we knew we still
92 * wouldn't be able to use any existing perf based userspace tools.
93 *
94 * Also considering the Gen specific nature of the Observability hardware and
95 * how userspace will sometimes need to combine i915 perf OA metrics with
96 * side-band OA data captured via MI_REPORT_PERF_COUNT commands; we're
97 * expecting the interface to be used by a platform specific userspace such as
98 * OpenGL or tools. This is to say; we aren't inherently missing out on having
99 * a standard vendor/architecture agnostic interface by not using perf.
100 *
101 *
102 * For posterity, in case we might re-visit trying to adapt core perf to be
103 * better suited to exposing i915 metrics these were the main pain points we
104 * hit:
105 *
106 * - The perf based OA PMU driver broke some significant design assumptions:
107 *
108 * Existing perf pmus are used for profiling work on a cpu and we were
109 * introducing the idea of _IS_DEVICE pmus with different security
110 * implications, the need to fake cpu-related data (such as user/kernel
111 * registers) to fit with perf's current design, and adding _DEVICE records
112 * as a way to forward device-specific status records.
113 *
114 * The OA unit writes reports of counters into a circular buffer, without
115 * involvement from the CPU, making our PMU driver the first of a kind.
116 *
117 * Given the way we were periodically forward data from the GPU-mapped, OA
118 * buffer to perf's buffer, those bursts of sample writes looked to perf like
119 * we were sampling too fast and so we had to subvert its throttling checks.
120 *
121 * Perf supports groups of counters and allows those to be read via
122 * transactions internally but transactions currently seem designed to be
123 * explicitly initiated from the cpu (say in response to a userspace read())
124 * and while we could pull a report out of the OA buffer we can't
125 * trigger a report from the cpu on demand.
126 *
127 * Related to being report based; the OA counters are configured in HW as a
128 * set while perf generally expects counter configurations to be orthogonal.
129 * Although counters can be associated with a group leader as they are
130 * opened, there's no clear precedent for being able to provide group-wide
131 * configuration attributes (for example we want to let userspace choose the
132 * OA unit report format used to capture all counters in a set, or specify a
133 * GPU context to filter metrics on). We avoided using perf's grouping
134 * feature and forwarded OA reports to userspace via perf's 'raw' sample
135 * field. This suited our userspace well considering how coupled the counters
136 * are when dealing with normalizing. It would be inconvenient to split
137 * counters up into separate events, only to require userspace to recombine
138 * them. For Mesa it's also convenient to be forwarded raw, periodic reports
139 * for combining with the side-band raw reports it captures using
140 * MI_REPORT_PERF_COUNT commands.
141 *
16d98b31 142 * - As a side note on perf's grouping feature; there was also some concern
7abbd8d6
RB
143 * that using PERF_FORMAT_GROUP as a way to pack together counter values
144 * would quite drastically inflate our sample sizes, which would likely
145 * lower the effective sampling resolutions we could use when the available
146 * memory bandwidth is limited.
147 *
148 * With the OA unit's report formats, counters are packed together as 32
149 * or 40bit values, with the largest report size being 256 bytes.
150 *
151 * PERF_FORMAT_GROUP values are 64bit, but there doesn't appear to be a
152 * documented ordering to the values, implying PERF_FORMAT_ID must also be
153 * used to add a 64bit ID before each value; giving 16 bytes per counter.
154 *
155 * Related to counter orthogonality; we can't time share the OA unit, while
156 * event scheduling is a central design idea within perf for allowing
157 * userspace to open + enable more events than can be configured in HW at any
158 * one time. The OA unit is not designed to allow re-configuration while in
159 * use. We can't reconfigure the OA unit without losing internal OA unit
160 * state which we can't access explicitly to save and restore. Reconfiguring
161 * the OA unit is also relatively slow, involving ~100 register writes. From
162 * userspace Mesa also depends on a stable OA configuration when emitting
163 * MI_REPORT_PERF_COUNT commands and importantly the OA unit can't be
164 * disabled while there are outstanding MI_RPC commands lest we hang the
165 * command streamer.
166 *
167 * The contents of sample records aren't extensible by device drivers (i.e.
168 * the sample_type bits). As an example; Sourab Gupta had been looking to
169 * attach GPU timestamps to our OA samples. We were shoehorning OA reports
170 * into sample records by using the 'raw' field, but it's tricky to pack more
171 * than one thing into this field because events/core.c currently only lets a
172 * pmu give a single raw data pointer plus len which will be copied into the
173 * ring buffer. To include more than the OA report we'd have to copy the
174 * report into an intermediate larger buffer. I'd been considering allowing a
175 * vector of data+len values to be specified for copying the raw data, but
176 * it felt like a kludge to being using the raw field for this purpose.
177 *
178 * - It felt like our perf based PMU was making some technical compromises
179 * just for the sake of using perf:
180 *
181 * perf_event_open() requires events to either relate to a pid or a specific
182 * cpu core, while our device pmu related to neither. Events opened with a
183 * pid will be automatically enabled/disabled according to the scheduling of
184 * that process - so not appropriate for us. When an event is related to a
185 * cpu id, perf ensures pmu methods will be invoked via an inter process
186 * interrupt on that core. To avoid invasive changes our userspace opened OA
187 * perf events for a specific cpu. This was workable but it meant the
188 * majority of the OA driver ran in atomic context, including all OA report
189 * forwarding, which wasn't really necessary in our case and seems to make
190 * our locking requirements somewhat complex as we handled the interaction
191 * with the rest of the i915 driver.
192 */
193
eec688e1 194#include <linux/anon_inodes.h>
d7965152 195#include <linux/sizes.h>
f89823c2 196#include <linux/uuid.h>
eec688e1 197
10be98a7 198#include "gem/i915_gem_context.h"
a5efcde6 199#include "gt/intel_engine_pm.h"
9a61363a 200#include "gt/intel_engine_user.h"
daed3e44 201#include "gt/intel_gt.h"
112ed2d3 202#include "gt/intel_lrc_reg.h"
2871ea85 203#include "gt/intel_ring.h"
112ed2d3 204
eec688e1 205#include "i915_drv.h"
db94e9f1 206#include "i915_perf.h"
5ed7a0cf
MW
207#include "oa/i915_oa_hsw.h"
208#include "oa/i915_oa_bdw.h"
209#include "oa/i915_oa_chv.h"
210#include "oa/i915_oa_sklgt2.h"
211#include "oa/i915_oa_sklgt3.h"
212#include "oa/i915_oa_sklgt4.h"
213#include "oa/i915_oa_bxt.h"
214#include "oa/i915_oa_kblgt2.h"
215#include "oa/i915_oa_kblgt3.h"
216#include "oa/i915_oa_glk.h"
217#include "oa/i915_oa_cflgt2.h"
218#include "oa/i915_oa_cflgt3.h"
219#include "oa/i915_oa_cnl.h"
220#include "oa/i915_oa_icl.h"
00a7f0d7 221#include "oa/i915_oa_tgl.h"
d7965152 222
fe841686
JL
223/* HW requires this to be a power of two, between 128k and 16M, though driver
224 * is currently generally designed assuming the largest 16M size is used such
225 * that the overflow cases are unlikely in normal operation.
226 */
227#define OA_BUFFER_SIZE SZ_16M
228
229#define OA_TAKEN(tail, head) ((tail - head) & (OA_BUFFER_SIZE - 1))
d7965152 230
0dd860cf
RB
231/**
232 * DOC: OA Tail Pointer Race
233 *
234 * There's a HW race condition between OA unit tail pointer register updates and
d7965152 235 * writes to memory whereby the tail pointer can sometimes get ahead of what's
0dd860cf
RB
236 * been written out to the OA buffer so far (in terms of what's visible to the
237 * CPU).
238 *
239 * Although this can be observed explicitly while copying reports to userspace
240 * by checking for a zeroed report-id field in tail reports, we want to account
19f81df2 241 * for this earlier, as part of the oa_buffer_check to avoid lots of redundant
0dd860cf
RB
242 * read() attempts.
243 *
244 * In effect we define a tail pointer for reading that lags the real tail
245 * pointer by at least %OA_TAIL_MARGIN_NSEC nanoseconds, which gives enough
246 * time for the corresponding reports to become visible to the CPU.
247 *
248 * To manage this we actually track two tail pointers:
249 * 1) An 'aging' tail with an associated timestamp that is tracked until we
250 * can trust the corresponding data is visible to the CPU; at which point
251 * it is considered 'aged'.
252 * 2) An 'aged' tail that can be used for read()ing.
d7965152 253 *
0dd860cf 254 * The two separate pointers let us decouple read()s from tail pointer aging.
d7965152 255 *
0dd860cf 256 * The tail pointers are checked and updated at a limited rate within a hrtimer
a9a08845 257 * callback (the same callback that is used for delivering EPOLLIN events)
d7965152 258 *
0dd860cf
RB
259 * Initially the tails are marked invalid with %INVALID_TAIL_PTR which
260 * indicates that an updated tail pointer is needed.
261 *
262 * Most of the implementation details for this workaround are in
19f81df2 263 * oa_buffer_check_unlocked() and _append_oa_reports()
0dd860cf
RB
264 *
265 * Note for posterity: previously the driver used to define an effective tail
266 * pointer that lagged the real pointer by a 'tail margin' measured in bytes
267 * derived from %OA_TAIL_MARGIN_NSEC and the configured sampling frequency.
268 * This was flawed considering that the OA unit may also automatically generate
269 * non-periodic reports (such as on context switch) or the OA unit may be
270 * enabled without any periodic sampling.
d7965152
RB
271 */
272#define OA_TAIL_MARGIN_NSEC 100000ULL
0dd860cf 273#define INVALID_TAIL_PTR 0xffffffff
d7965152
RB
274
275/* frequency for checking whether the OA unit has written new reports to the
276 * circular OA buffer...
277 */
278#define POLL_FREQUENCY 200
279#define POLL_PERIOD (NSEC_PER_SEC / POLL_FREQUENCY)
280
ccdf6341 281/* for sysctl proc_dointvec_minmax of dev.i915.perf_stream_paranoid */
ccdf6341
RB
282static u32 i915_perf_stream_paranoid = true;
283
d7965152
RB
284/* The maximum exponent the hardware accepts is 63 (essentially it selects one
285 * of the 64bit timestamp bits to trigger reports from) but there's currently
286 * no known use case for sampling as infrequently as once per 47 thousand years.
287 *
288 * Since the timestamps included in OA reports are only 32bits it seems
289 * reasonable to limit the OA exponent where it's still possible to account for
290 * overflow in OA report timestamps.
291 */
292#define OA_EXPONENT_MAX 31
293
294#define INVALID_CTX_ID 0xffffffff
295
19f81df2
RB
296/* On Gen8+ automatically triggered OA reports include a 'reason' field... */
297#define OAREPORT_REASON_MASK 0x3f
00a7f0d7 298#define OAREPORT_REASON_MASK_EXTENDED 0x7f
19f81df2
RB
299#define OAREPORT_REASON_SHIFT 19
300#define OAREPORT_REASON_TIMER (1<<0)
301#define OAREPORT_REASON_CTX_SWITCH (1<<3)
302#define OAREPORT_REASON_CLK_RATIO (1<<5)
303
d7965152 304
00319ba0
RB
305/* For sysctl proc_dointvec_minmax of i915_oa_max_sample_rate
306 *
155e941f
RB
307 * The highest sampling frequency we can theoretically program the OA unit
308 * with is always half the timestamp frequency: E.g. 6.25Mhz for Haswell.
309 *
310 * Initialized just before we register the sysctl parameter.
00319ba0 311 */
155e941f 312static int oa_sample_rate_hard_limit;
00319ba0
RB
313
314/* Theoretically we can program the OA unit to sample every 160ns but don't
315 * allow that by default unless root...
316 *
317 * The default threshold of 100000Hz is based on perf's similar
318 * kernel.perf_event_max_sample_rate sysctl parameter.
319 */
320static u32 i915_oa_max_sample_rate = 100000;
321
d7965152
RB
322/* XXX: beware if future OA HW adds new report formats that the current
323 * code assumes all reports have a power-of-two size and ~(size - 1) can
324 * be used as a mask to align the OA tail pointer.
325 */
6ebb6d8e 326static const struct i915_oa_format hsw_oa_formats[I915_OA_FORMAT_MAX] = {
d7965152
RB
327 [I915_OA_FORMAT_A13] = { 0, 64 },
328 [I915_OA_FORMAT_A29] = { 1, 128 },
329 [I915_OA_FORMAT_A13_B8_C8] = { 2, 128 },
330 /* A29_B8_C8 Disallowed as 192 bytes doesn't factor into buffer size */
331 [I915_OA_FORMAT_B4_C8] = { 4, 64 },
332 [I915_OA_FORMAT_A45_B8_C8] = { 5, 256 },
333 [I915_OA_FORMAT_B4_C8_A16] = { 6, 128 },
334 [I915_OA_FORMAT_C4_B8] = { 7, 64 },
335};
336
6ebb6d8e 337static const struct i915_oa_format gen8_plus_oa_formats[I915_OA_FORMAT_MAX] = {
19f81df2
RB
338 [I915_OA_FORMAT_A12] = { 0, 64 },
339 [I915_OA_FORMAT_A12_B8_C8] = { 2, 128 },
340 [I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 },
341 [I915_OA_FORMAT_C4_B8] = { 7, 64 },
342};
343
00a7f0d7
LL
344static const struct i915_oa_format gen12_oa_formats[I915_OA_FORMAT_MAX] = {
345 [I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 },
346};
347
d7965152 348#define SAMPLE_OA_REPORT (1<<0)
eec688e1 349
16d98b31
RB
350/**
351 * struct perf_open_properties - for validated properties given to open a stream
352 * @sample_flags: `DRM_I915_PERF_PROP_SAMPLE_*` properties are tracked as flags
353 * @single_context: Whether a single or all gpu contexts should be monitored
9cd20ef7
LL
354 * @hold_preemption: Whether the preemption is disabled for the filtered
355 * context
16d98b31
RB
356 * @ctx_handle: A gem ctx handle for use with @single_context
357 * @metrics_set: An ID for an OA unit metric set advertised via sysfs
358 * @oa_format: An OA unit HW report format
359 * @oa_periodic: Whether to enable periodic OA unit sampling
360 * @oa_period_exponent: The OA unit sampling period is derived from this
9a61363a 361 * @engine: The engine (typically rcs0) being monitored by the OA unit
16d98b31
RB
362 *
363 * As read_properties_unlocked() enumerates and validates the properties given
364 * to open a stream of metrics the configuration is built up in the structure
365 * which starts out zero initialized.
366 */
eec688e1
RB
367struct perf_open_properties {
368 u32 sample_flags;
369
370 u64 single_context:1;
9cd20ef7 371 u64 hold_preemption:1;
eec688e1 372 u64 ctx_handle;
d7965152
RB
373
374 /* OA sampling state */
375 int metrics_set;
376 int oa_format;
377 bool oa_periodic;
378 int oa_period_exponent;
9a61363a
LL
379
380 struct intel_engine_cs *engine;
d7965152
RB
381};
382
6a45008a
LL
383struct i915_oa_config_bo {
384 struct llist_node node;
385
386 struct i915_oa_config *oa_config;
387 struct i915_vma *vma;
388};
389
3dc716fd
VSD
390static struct ctl_table_header *sysctl_header;
391
a37f08a8
UNR
392static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer);
393
6a45008a 394void i915_oa_config_release(struct kref *ref)
f89823c2 395{
6a45008a
LL
396 struct i915_oa_config *oa_config =
397 container_of(ref, typeof(*oa_config), ref);
398
c2fba936
CW
399 kfree(oa_config->flex_regs);
400 kfree(oa_config->b_counter_regs);
401 kfree(oa_config->mux_regs);
f89823c2 402
6a45008a 403 kfree_rcu(oa_config, rcu);
f89823c2
LL
404}
405
6a45008a
LL
406struct i915_oa_config *
407i915_perf_get_oa_config(struct i915_perf *perf, int metrics_set)
f89823c2 408{
6a45008a 409 struct i915_oa_config *oa_config;
f89823c2 410
6a45008a
LL
411 rcu_read_lock();
412 if (metrics_set == 1)
413 oa_config = &perf->test_config;
f89823c2 414 else
6a45008a
LL
415 oa_config = idr_find(&perf->metrics_idr, metrics_set);
416 if (oa_config)
417 oa_config = i915_oa_config_get(oa_config);
418 rcu_read_unlock();
f89823c2 419
6a45008a
LL
420 return oa_config;
421}
f89823c2 422
6a45008a
LL
423static void free_oa_config_bo(struct i915_oa_config_bo *oa_bo)
424{
425 i915_oa_config_put(oa_bo->oa_config);
426 i915_vma_put(oa_bo->vma);
427 kfree(oa_bo);
f89823c2
LL
428}
429
00a7f0d7
LL
430static u32 gen12_oa_hw_tail_read(struct i915_perf_stream *stream)
431{
432 struct intel_uncore *uncore = stream->uncore;
433
434 return intel_uncore_read(uncore, GEN12_OAG_OATAILPTR) &
435 GEN12_OAG_OATAILPTR_MASK;
436}
437
a37f08a8 438static u32 gen8_oa_hw_tail_read(struct i915_perf_stream *stream)
19f81df2 439{
52111c46 440 struct intel_uncore *uncore = stream->uncore;
a37f08a8 441
8f8b1171 442 return intel_uncore_read(uncore, GEN8_OATAILPTR) & GEN8_OATAILPTR_MASK;
19f81df2
RB
443}
444
a37f08a8 445static u32 gen7_oa_hw_tail_read(struct i915_perf_stream *stream)
19f81df2 446{
52111c46 447 struct intel_uncore *uncore = stream->uncore;
8f8b1171 448 u32 oastatus1 = intel_uncore_read(uncore, GEN7_OASTATUS1);
19f81df2
RB
449
450 return oastatus1 & GEN7_OASTATUS1_TAIL_MASK;
451}
452
0dd860cf 453/**
19f81df2 454 * oa_buffer_check_unlocked - check for data and update tail ptr state
a37f08a8 455 * @stream: i915 stream instance
d7965152 456 *
0dd860cf
RB
457 * This is either called via fops (for blocking reads in user ctx) or the poll
458 * check hrtimer (atomic ctx) to check the OA buffer tail pointer and check
459 * if there is data available for userspace to read.
d7965152 460 *
0dd860cf
RB
461 * This function is central to providing a workaround for the OA unit tail
462 * pointer having a race with respect to what data is visible to the CPU.
463 * It is responsible for reading tail pointers from the hardware and giving
464 * the pointers time to 'age' before they are made available for reading.
465 * (See description of OA_TAIL_MARGIN_NSEC above for further details.)
466 *
467 * Besides returning true when there is data available to read() this function
468 * also has the side effect of updating the oa_buffer.tails[], .aging_timestamp
469 * and .aged_tail_idx state used for reading.
470 *
471 * Note: It's safe to read OA config state here unlocked, assuming that this is
472 * only called while the stream is enabled, while the global OA configuration
473 * can't be modified.
474 *
475 * Returns: %true if the OA buffer contains data, else %false
d7965152 476 */
a37f08a8 477static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
d7965152 478{
a37f08a8 479 int report_size = stream->oa_buffer.format_size;
0dd860cf
RB
480 unsigned long flags;
481 unsigned int aged_idx;
0dd860cf
RB
482 u32 head, hw_tail, aged_tail, aging_tail;
483 u64 now;
484
485 /* We have to consider the (unlikely) possibility that read() errors
486 * could result in an OA buffer reset which might reset the head,
487 * tails[] and aged_tail state.
488 */
a37f08a8 489 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
0dd860cf
RB
490
491 /* NB: The head we observe here might effectively be a little out of
492 * date (between head and tails[aged_idx].offset if there is currently
493 * a read() in progress.
494 */
a37f08a8 495 head = stream->oa_buffer.head;
0dd860cf 496
a37f08a8
UNR
497 aged_idx = stream->oa_buffer.aged_tail_idx;
498 aged_tail = stream->oa_buffer.tails[aged_idx].offset;
499 aging_tail = stream->oa_buffer.tails[!aged_idx].offset;
0dd860cf 500
8f8b1171 501 hw_tail = stream->perf->ops.oa_hw_tail_read(stream);
0dd860cf
RB
502
503 /* The tail pointer increases in 64 byte increments,
504 * not in report_size steps...
505 */
506 hw_tail &= ~(report_size - 1);
507
508 now = ktime_get_mono_fast_ns();
509
4117ebc7
RB
510 /* Update the aged tail
511 *
512 * Flip the tail pointer available for read()s once the aging tail is
513 * old enough to trust that the corresponding data will be visible to
514 * the CPU...
515 *
516 * Do this before updating the aging pointer in case we may be able to
517 * immediately start aging a new pointer too (if new data has become
518 * available) without needing to wait for a later hrtimer callback.
519 */
520 if (aging_tail != INVALID_TAIL_PTR &&
a37f08a8 521 ((now - stream->oa_buffer.aging_timestamp) >
4117ebc7 522 OA_TAIL_MARGIN_NSEC)) {
19f81df2 523
4117ebc7 524 aged_idx ^= 1;
a37f08a8 525 stream->oa_buffer.aged_tail_idx = aged_idx;
4117ebc7
RB
526
527 aged_tail = aging_tail;
528
529 /* Mark that we need a new pointer to start aging... */
a37f08a8 530 stream->oa_buffer.tails[!aged_idx].offset = INVALID_TAIL_PTR;
4117ebc7
RB
531 aging_tail = INVALID_TAIL_PTR;
532 }
533
0dd860cf
RB
534 /* Update the aging tail
535 *
536 * We throttle aging tail updates until we have a new tail that
537 * represents >= one report more data than is already available for
538 * reading. This ensures there will be enough data for a successful
539 * read once this new pointer has aged and ensures we will give the new
540 * pointer time to age.
541 */
542 if (aging_tail == INVALID_TAIL_PTR &&
543 (aged_tail == INVALID_TAIL_PTR ||
544 OA_TAKEN(hw_tail, aged_tail) >= report_size)) {
a37f08a8 545 struct i915_vma *vma = stream->oa_buffer.vma;
0dd860cf
RB
546 u32 gtt_offset = i915_ggtt_offset(vma);
547
548 /* Be paranoid and do a bounds check on the pointer read back
549 * from hardware, just in case some spurious hardware condition
550 * could put the tail out of bounds...
551 */
552 if (hw_tail >= gtt_offset &&
fe841686 553 hw_tail < (gtt_offset + OA_BUFFER_SIZE)) {
a37f08a8 554 stream->oa_buffer.tails[!aged_idx].offset =
0dd860cf 555 aging_tail = hw_tail;
a37f08a8 556 stream->oa_buffer.aging_timestamp = now;
0dd860cf 557 } else {
0bf85735
WK
558 drm_err(&stream->perf->i915->drm,
559 "Ignoring spurious out of range OA buffer tail pointer = %x\n",
560 hw_tail);
0dd860cf
RB
561 }
562 }
563
a37f08a8 564 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
0dd860cf
RB
565
566 return aged_tail == INVALID_TAIL_PTR ?
567 false : OA_TAKEN(aged_tail, head) >= report_size;
d7965152
RB
568}
569
570/**
16d98b31
RB
571 * append_oa_status - Appends a status record to a userspace read() buffer.
572 * @stream: An i915-perf stream opened for OA metrics
573 * @buf: destination buffer given by userspace
574 * @count: the number of bytes userspace wants to read
575 * @offset: (inout): the current position for writing into @buf
576 * @type: The kind of status to report to userspace
577 *
578 * Writes a status record (such as `DRM_I915_PERF_RECORD_OA_REPORT_LOST`)
579 * into the userspace read() buffer.
580 *
581 * The @buf @offset will only be updated on success.
582 *
583 * Returns: 0 on success, negative error code on failure.
d7965152
RB
584 */
585static int append_oa_status(struct i915_perf_stream *stream,
586 char __user *buf,
587 size_t count,
588 size_t *offset,
589 enum drm_i915_perf_record_type type)
590{
591 struct drm_i915_perf_record_header header = { type, 0, sizeof(header) };
592
593 if ((count - *offset) < header.size)
594 return -ENOSPC;
595
596 if (copy_to_user(buf + *offset, &header, sizeof(header)))
597 return -EFAULT;
598
599 (*offset) += header.size;
600
601 return 0;
602}
603
604/**
16d98b31
RB
605 * append_oa_sample - Copies single OA report into userspace read() buffer.
606 * @stream: An i915-perf stream opened for OA metrics
607 * @buf: destination buffer given by userspace
608 * @count: the number of bytes userspace wants to read
609 * @offset: (inout): the current position for writing into @buf
610 * @report: A single OA report to (optionally) include as part of the sample
611 *
612 * The contents of a sample are configured through `DRM_I915_PERF_PROP_SAMPLE_*`
613 * properties when opening a stream, tracked as `stream->sample_flags`. This
614 * function copies the requested components of a single sample to the given
615 * read() @buf.
616 *
617 * The @buf @offset will only be updated on success.
618 *
619 * Returns: 0 on success, negative error code on failure.
d7965152
RB
620 */
621static int append_oa_sample(struct i915_perf_stream *stream,
622 char __user *buf,
623 size_t count,
624 size_t *offset,
625 const u8 *report)
626{
a37f08a8 627 int report_size = stream->oa_buffer.format_size;
d7965152
RB
628 struct drm_i915_perf_record_header header;
629 u32 sample_flags = stream->sample_flags;
630
631 header.type = DRM_I915_PERF_RECORD_SAMPLE;
632 header.pad = 0;
633 header.size = stream->sample_size;
634
635 if ((count - *offset) < header.size)
636 return -ENOSPC;
637
638 buf += *offset;
639 if (copy_to_user(buf, &header, sizeof(header)))
640 return -EFAULT;
641 buf += sizeof(header);
642
643 if (sample_flags & SAMPLE_OA_REPORT) {
644 if (copy_to_user(buf, report, report_size))
645 return -EFAULT;
646 }
647
648 (*offset) += header.size;
649
650 return 0;
651}
652
19f81df2
RB
653/**
654 * Copies all buffered OA reports into userspace read() buffer.
655 * @stream: An i915-perf stream opened for OA metrics
656 * @buf: destination buffer given by userspace
657 * @count: the number of bytes userspace wants to read
658 * @offset: (inout): the current position for writing into @buf
659 *
660 * Notably any error condition resulting in a short read (-%ENOSPC or
661 * -%EFAULT) will be returned even though one or more records may
662 * have been successfully copied. In this case it's up to the caller
663 * to decide if the error should be squashed before returning to
664 * userspace.
665 *
666 * Note: reports are consumed from the head, and appended to the
667 * tail, so the tail chases the head?... If you think that's mad
668 * and back-to-front you're not alone, but this follows the
669 * Gen PRM naming convention.
670 *
671 * Returns: 0 on success, negative error code on failure.
672 */
673static int gen8_append_oa_reports(struct i915_perf_stream *stream,
674 char __user *buf,
675 size_t count,
676 size_t *offset)
677{
52111c46 678 struct intel_uncore *uncore = stream->uncore;
a37f08a8
UNR
679 int report_size = stream->oa_buffer.format_size;
680 u8 *oa_buf_base = stream->oa_buffer.vaddr;
681 u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
fe841686 682 u32 mask = (OA_BUFFER_SIZE - 1);
19f81df2
RB
683 size_t start_offset = *offset;
684 unsigned long flags;
685 unsigned int aged_tail_idx;
686 u32 head, tail;
687 u32 taken;
688 int ret = 0;
689
a9f236d1 690 if (drm_WARN_ON(&uncore->i915->drm, !stream->enabled))
19f81df2
RB
691 return -EIO;
692
a37f08a8 693 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
19f81df2 694
a37f08a8
UNR
695 head = stream->oa_buffer.head;
696 aged_tail_idx = stream->oa_buffer.aged_tail_idx;
697 tail = stream->oa_buffer.tails[aged_tail_idx].offset;
19f81df2 698
a37f08a8 699 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
19f81df2
RB
700
701 /*
702 * An invalid tail pointer here means we're still waiting for the poll
703 * hrtimer callback to give us a pointer
704 */
705 if (tail == INVALID_TAIL_PTR)
706 return -EAGAIN;
707
708 /*
709 * NB: oa_buffer.head/tail include the gtt_offset which we don't want
710 * while indexing relative to oa_buf_base.
711 */
712 head -= gtt_offset;
713 tail -= gtt_offset;
714
715 /*
716 * An out of bounds or misaligned head or tail pointer implies a driver
717 * bug since we validate + align the tail pointers we read from the
718 * hardware and we are in full control of the head pointer which should
719 * only be incremented by multiples of the report size (notably also
720 * all a power of two).
721 */
a9f236d1
PB
722 if (drm_WARN_ONCE(&uncore->i915->drm,
723 head > OA_BUFFER_SIZE || head % report_size ||
724 tail > OA_BUFFER_SIZE || tail % report_size,
725 "Inconsistent OA buffer pointers: head = %u, tail = %u\n",
726 head, tail))
19f81df2
RB
727 return -EIO;
728
729
730 for (/* none */;
731 (taken = OA_TAKEN(tail, head));
732 head = (head + report_size) & mask) {
733 u8 *report = oa_buf_base + head;
734 u32 *report32 = (void *)report;
735 u32 ctx_id;
736 u32 reason;
737
738 /*
739 * All the report sizes factor neatly into the buffer
740 * size so we never expect to see a report split
741 * between the beginning and end of the buffer.
742 *
743 * Given the initial alignment check a misalignment
744 * here would imply a driver bug that would result
745 * in an overrun.
746 */
a9f236d1
PB
747 if (drm_WARN_ON(&uncore->i915->drm,
748 (OA_BUFFER_SIZE - head) < report_size)) {
0bf85735
WK
749 drm_err(&uncore->i915->drm,
750 "Spurious OA head ptr: non-integral report offset\n");
19f81df2
RB
751 break;
752 }
753
754 /*
755 * The reason field includes flags identifying what
756 * triggered this specific report (mostly timer
757 * triggered or e.g. due to a context switch).
758 *
759 * This field is never expected to be zero so we can
760 * check that the report isn't invalid before copying
761 * it to userspace...
762 */
763 reason = ((report32[0] >> OAREPORT_REASON_SHIFT) &
00a7f0d7
LL
764 (IS_GEN(stream->perf->i915, 12) ?
765 OAREPORT_REASON_MASK_EXTENDED :
766 OAREPORT_REASON_MASK));
19f81df2 767 if (reason == 0) {
8f8b1171 768 if (__ratelimit(&stream->perf->spurious_report_rs))
19f81df2
RB
769 DRM_NOTE("Skipping spurious, invalid OA report\n");
770 continue;
771 }
772
a37f08a8 773 ctx_id = report32[2] & stream->specific_ctx_id_mask;
19f81df2
RB
774
775 /*
776 * Squash whatever is in the CTX_ID field if it's marked as
777 * invalid to be sure we avoid false-positive, single-context
778 * filtering below...
779 *
780 * Note: that we don't clear the valid_ctx_bit so userspace can
781 * understand that the ID has been squashed by the kernel.
782 */
00a7f0d7
LL
783 if (!(report32[0] & stream->perf->gen8_valid_ctx_bit) &&
784 INTEL_GEN(stream->perf->i915) <= 11)
19f81df2
RB
785 ctx_id = report32[2] = INVALID_CTX_ID;
786
787 /*
788 * NB: For Gen 8 the OA unit no longer supports clock gating
789 * off for a specific context and the kernel can't securely
790 * stop the counters from updating as system-wide / global
791 * values.
792 *
793 * Automatic reports now include a context ID so reports can be
794 * filtered on the cpu but it's not worth trying to
795 * automatically subtract/hide counter progress for other
796 * contexts while filtering since we can't stop userspace
797 * issuing MI_REPORT_PERF_COUNT commands which would still
798 * provide a side-band view of the real values.
799 *
800 * To allow userspace (such as Mesa/GL_INTEL_performance_query)
801 * to normalize counters for a single filtered context then it
802 * needs be forwarded bookend context-switch reports so that it
803 * can track switches in between MI_REPORT_PERF_COUNT commands
804 * and can itself subtract/ignore the progress of counters
805 * associated with other contexts. Note that the hardware
806 * automatically triggers reports when switching to a new
807 * context which are tagged with the ID of the newly active
808 * context. To avoid the complexity (and likely fragility) of
809 * reading ahead while parsing reports to try and minimize
810 * forwarding redundant context switch reports (i.e. between
811 * other, unrelated contexts) we simply elect to forward them
812 * all.
813 *
814 * We don't rely solely on the reason field to identify context
815 * switches since it's not-uncommon for periodic samples to
816 * identify a switch before any 'context switch' report.
817 */
8f8b1171 818 if (!stream->perf->exclusive_stream->ctx ||
a37f08a8
UNR
819 stream->specific_ctx_id == ctx_id ||
820 stream->oa_buffer.last_ctx_id == stream->specific_ctx_id ||
19f81df2
RB
821 reason & OAREPORT_REASON_CTX_SWITCH) {
822
823 /*
824 * While filtering for a single context we avoid
825 * leaking the IDs of other contexts.
826 */
8f8b1171 827 if (stream->perf->exclusive_stream->ctx &&
a37f08a8 828 stream->specific_ctx_id != ctx_id) {
19f81df2
RB
829 report32[2] = INVALID_CTX_ID;
830 }
831
832 ret = append_oa_sample(stream, buf, count, offset,
833 report);
834 if (ret)
835 break;
836
a37f08a8 837 stream->oa_buffer.last_ctx_id = ctx_id;
19f81df2
RB
838 }
839
840 /*
841 * The above reason field sanity check is based on
842 * the assumption that the OA buffer is initially
843 * zeroed and we reset the field after copying so the
844 * check is still meaningful once old reports start
845 * being overwritten.
846 */
847 report32[0] = 0;
848 }
849
850 if (start_offset != *offset) {
00a7f0d7
LL
851 i915_reg_t oaheadptr;
852
853 oaheadptr = IS_GEN(stream->perf->i915, 12) ?
854 GEN12_OAG_OAHEADPTR : GEN8_OAHEADPTR;
855
a37f08a8 856 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
19f81df2
RB
857
858 /*
859 * We removed the gtt_offset for the copy loop above, indexing
860 * relative to oa_buf_base so put back here...
861 */
862 head += gtt_offset;
00a7f0d7
LL
863 intel_uncore_write(uncore, oaheadptr,
864 head & GEN12_OAG_OAHEADPTR_MASK);
a37f08a8 865 stream->oa_buffer.head = head;
19f81df2 866
a37f08a8 867 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
19f81df2
RB
868 }
869
870 return ret;
871}
872
873/**
874 * gen8_oa_read - copy status records then buffered OA reports
875 * @stream: An i915-perf stream opened for OA metrics
876 * @buf: destination buffer given by userspace
877 * @count: the number of bytes userspace wants to read
878 * @offset: (inout): the current position for writing into @buf
879 *
880 * Checks OA unit status registers and if necessary appends corresponding
881 * status records for userspace (such as for a buffer full condition) and then
882 * initiate appending any buffered OA reports.
883 *
884 * Updates @offset according to the number of bytes successfully copied into
885 * the userspace buffer.
886 *
887 * NB: some data may be successfully copied to the userspace buffer
888 * even if an error is returned, and this is reflected in the
889 * updated @offset.
890 *
891 * Returns: zero on success or a negative error code
892 */
893static int gen8_oa_read(struct i915_perf_stream *stream,
894 char __user *buf,
895 size_t count,
896 size_t *offset)
897{
52111c46 898 struct intel_uncore *uncore = stream->uncore;
19f81df2 899 u32 oastatus;
00a7f0d7 900 i915_reg_t oastatus_reg;
19f81df2
RB
901 int ret;
902
a9f236d1 903 if (drm_WARN_ON(&uncore->i915->drm, !stream->oa_buffer.vaddr))
19f81df2
RB
904 return -EIO;
905
00a7f0d7
LL
906 oastatus_reg = IS_GEN(stream->perf->i915, 12) ?
907 GEN12_OAG_OASTATUS : GEN8_OASTATUS;
908
909 oastatus = intel_uncore_read(uncore, oastatus_reg);
19f81df2
RB
910
911 /*
912 * We treat OABUFFER_OVERFLOW as a significant error:
913 *
914 * Although theoretically we could handle this more gracefully
915 * sometimes, some Gens don't correctly suppress certain
916 * automatically triggered reports in this condition and so we
917 * have to assume that old reports are now being trampled
918 * over.
fe841686
JL
919 *
920 * Considering how we don't currently give userspace control
921 * over the OA buffer size and always configure a large 16MB
922 * buffer, then a buffer overflow does anyway likely indicate
923 * that something has gone quite badly wrong.
19f81df2
RB
924 */
925 if (oastatus & GEN8_OASTATUS_OABUFFER_OVERFLOW) {
926 ret = append_oa_status(stream, buf, count, offset,
927 DRM_I915_PERF_RECORD_OA_BUFFER_LOST);
928 if (ret)
929 return ret;
930
931 DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n",
a37f08a8 932 stream->period_exponent);
19f81df2 933
8f8b1171
CW
934 stream->perf->ops.oa_disable(stream);
935 stream->perf->ops.oa_enable(stream);
19f81df2
RB
936
937 /*
938 * Note: .oa_enable() is expected to re-init the oabuffer and
939 * reset GEN8_OASTATUS for us
940 */
00a7f0d7 941 oastatus = intel_uncore_read(uncore, oastatus_reg);
19f81df2
RB
942 }
943
944 if (oastatus & GEN8_OASTATUS_REPORT_LOST) {
945 ret = append_oa_status(stream, buf, count, offset,
946 DRM_I915_PERF_RECORD_OA_REPORT_LOST);
947 if (ret)
948 return ret;
00a7f0d7 949 intel_uncore_write(uncore, oastatus_reg,
8f8b1171 950 oastatus & ~GEN8_OASTATUS_REPORT_LOST);
19f81df2
RB
951 }
952
953 return gen8_append_oa_reports(stream, buf, count, offset);
954}
955
d7965152
RB
956/**
957 * Copies all buffered OA reports into userspace read() buffer.
958 * @stream: An i915-perf stream opened for OA metrics
959 * @buf: destination buffer given by userspace
960 * @count: the number of bytes userspace wants to read
961 * @offset: (inout): the current position for writing into @buf
d7965152 962 *
16d98b31
RB
963 * Notably any error condition resulting in a short read (-%ENOSPC or
964 * -%EFAULT) will be returned even though one or more records may
d7965152
RB
965 * have been successfully copied. In this case it's up to the caller
966 * to decide if the error should be squashed before returning to
967 * userspace.
968 *
969 * Note: reports are consumed from the head, and appended to the
e81b3a55 970 * tail, so the tail chases the head?... If you think that's mad
d7965152
RB
971 * and back-to-front you're not alone, but this follows the
972 * Gen PRM naming convention.
16d98b31
RB
973 *
974 * Returns: 0 on success, negative error code on failure.
d7965152
RB
975 */
976static int gen7_append_oa_reports(struct i915_perf_stream *stream,
977 char __user *buf,
978 size_t count,
3bb335c1 979 size_t *offset)
d7965152 980{
52111c46 981 struct intel_uncore *uncore = stream->uncore;
a37f08a8
UNR
982 int report_size = stream->oa_buffer.format_size;
983 u8 *oa_buf_base = stream->oa_buffer.vaddr;
984 u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
fe841686 985 u32 mask = (OA_BUFFER_SIZE - 1);
3bb335c1 986 size_t start_offset = *offset;
0dd860cf
RB
987 unsigned long flags;
988 unsigned int aged_tail_idx;
989 u32 head, tail;
d7965152
RB
990 u32 taken;
991 int ret = 0;
992
a9f236d1 993 if (drm_WARN_ON(&uncore->i915->drm, !stream->enabled))
d7965152
RB
994 return -EIO;
995
a37f08a8 996 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
f279020a 997
a37f08a8
UNR
998 head = stream->oa_buffer.head;
999 aged_tail_idx = stream->oa_buffer.aged_tail_idx;
1000 tail = stream->oa_buffer.tails[aged_tail_idx].offset;
f279020a 1001
a37f08a8 1002 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
d7965152 1003
0dd860cf
RB
1004 /* An invalid tail pointer here means we're still waiting for the poll
1005 * hrtimer callback to give us a pointer
d7965152 1006 */
0dd860cf
RB
1007 if (tail == INVALID_TAIL_PTR)
1008 return -EAGAIN;
d7965152 1009
0dd860cf
RB
1010 /* NB: oa_buffer.head/tail include the gtt_offset which we don't want
1011 * while indexing relative to oa_buf_base.
d7965152 1012 */
0dd860cf
RB
1013 head -= gtt_offset;
1014 tail -= gtt_offset;
d7965152 1015
0dd860cf
RB
1016 /* An out of bounds or misaligned head or tail pointer implies a driver
1017 * bug since we validate + align the tail pointers we read from the
1018 * hardware and we are in full control of the head pointer which should
1019 * only be incremented by multiples of the report size (notably also
1020 * all a power of two).
d7965152 1021 */
a9f236d1
PB
1022 if (drm_WARN_ONCE(&uncore->i915->drm,
1023 head > OA_BUFFER_SIZE || head % report_size ||
1024 tail > OA_BUFFER_SIZE || tail % report_size,
1025 "Inconsistent OA buffer pointers: head = %u, tail = %u\n",
1026 head, tail))
0dd860cf 1027 return -EIO;
d7965152 1028
d7965152
RB
1029
1030 for (/* none */;
1031 (taken = OA_TAKEN(tail, head));
1032 head = (head + report_size) & mask) {
1033 u8 *report = oa_buf_base + head;
1034 u32 *report32 = (void *)report;
1035
1036 /* All the report sizes factor neatly into the buffer
1037 * size so we never expect to see a report split
1038 * between the beginning and end of the buffer.
1039 *
1040 * Given the initial alignment check a misalignment
1041 * here would imply a driver bug that would result
1042 * in an overrun.
1043 */
a9f236d1
PB
1044 if (drm_WARN_ON(&uncore->i915->drm,
1045 (OA_BUFFER_SIZE - head) < report_size)) {
0bf85735
WK
1046 drm_err(&uncore->i915->drm,
1047 "Spurious OA head ptr: non-integral report offset\n");
d7965152
RB
1048 break;
1049 }
1050
1051 /* The report-ID field for periodic samples includes
1052 * some undocumented flags related to what triggered
1053 * the report and is never expected to be zero so we
1054 * can check that the report isn't invalid before
1055 * copying it to userspace...
1056 */
1057 if (report32[0] == 0) {
8f8b1171 1058 if (__ratelimit(&stream->perf->spurious_report_rs))
712122ea 1059 DRM_NOTE("Skipping spurious, invalid OA report\n");
d7965152
RB
1060 continue;
1061 }
1062
1063 ret = append_oa_sample(stream, buf, count, offset, report);
1064 if (ret)
1065 break;
1066
1067 /* The above report-id field sanity check is based on
1068 * the assumption that the OA buffer is initially
1069 * zeroed and we reset the field after copying so the
1070 * check is still meaningful once old reports start
1071 * being overwritten.
1072 */
1073 report32[0] = 0;
1074 }
1075
3bb335c1 1076 if (start_offset != *offset) {
a37f08a8 1077 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
0dd860cf 1078
3bb335c1
RB
1079 /* We removed the gtt_offset for the copy loop above, indexing
1080 * relative to oa_buf_base so put back here...
1081 */
1082 head += gtt_offset;
1083
8f8b1171
CW
1084 intel_uncore_write(uncore, GEN7_OASTATUS2,
1085 (head & GEN7_OASTATUS2_HEAD_MASK) |
1086 GEN7_OASTATUS2_MEM_SELECT_GGTT);
a37f08a8 1087 stream->oa_buffer.head = head;
0dd860cf 1088
a37f08a8 1089 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
3bb335c1 1090 }
d7965152
RB
1091
1092 return ret;
1093}
1094
16d98b31
RB
1095/**
1096 * gen7_oa_read - copy status records then buffered OA reports
1097 * @stream: An i915-perf stream opened for OA metrics
1098 * @buf: destination buffer given by userspace
1099 * @count: the number of bytes userspace wants to read
1100 * @offset: (inout): the current position for writing into @buf
1101 *
1102 * Checks Gen 7 specific OA unit status registers and if necessary appends
1103 * corresponding status records for userspace (such as for a buffer full
1104 * condition) and then initiate appending any buffered OA reports.
1105 *
1106 * Updates @offset according to the number of bytes successfully copied into
1107 * the userspace buffer.
1108 *
1109 * Returns: zero on success or a negative error code
1110 */
d7965152
RB
1111static int gen7_oa_read(struct i915_perf_stream *stream,
1112 char __user *buf,
1113 size_t count,
1114 size_t *offset)
1115{
52111c46 1116 struct intel_uncore *uncore = stream->uncore;
d7965152 1117 u32 oastatus1;
d7965152
RB
1118 int ret;
1119
a9f236d1 1120 if (drm_WARN_ON(&uncore->i915->drm, !stream->oa_buffer.vaddr))
d7965152
RB
1121 return -EIO;
1122
8f8b1171 1123 oastatus1 = intel_uncore_read(uncore, GEN7_OASTATUS1);
d7965152 1124
d7965152
RB
1125 /* XXX: On Haswell we don't have a safe way to clear oastatus1
1126 * bits while the OA unit is enabled (while the tail pointer
1127 * may be updated asynchronously) so we ignore status bits
1128 * that have already been reported to userspace.
1129 */
8f8b1171 1130 oastatus1 &= ~stream->perf->gen7_latched_oastatus1;
d7965152
RB
1131
1132 /* We treat OABUFFER_OVERFLOW as a significant error:
1133 *
1134 * - The status can be interpreted to mean that the buffer is
1135 * currently full (with a higher precedence than OA_TAKEN()
1136 * which will start to report a near-empty buffer after an
1137 * overflow) but it's awkward that we can't clear the status
1138 * on Haswell, so without a reset we won't be able to catch
1139 * the state again.
1140 *
1141 * - Since it also implies the HW has started overwriting old
1142 * reports it may also affect our sanity checks for invalid
1143 * reports when copying to userspace that assume new reports
1144 * are being written to cleared memory.
1145 *
1146 * - In the future we may want to introduce a flight recorder
1147 * mode where the driver will automatically maintain a safe
1148 * guard band between head/tail, avoiding this overflow
1149 * condition, but we avoid the added driver complexity for
1150 * now.
1151 */
1152 if (unlikely(oastatus1 & GEN7_OASTATUS1_OABUFFER_OVERFLOW)) {
1153 ret = append_oa_status(stream, buf, count, offset,
1154 DRM_I915_PERF_RECORD_OA_BUFFER_LOST);
1155 if (ret)
1156 return ret;
1157
19f81df2 1158 DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n",
a37f08a8 1159 stream->period_exponent);
d7965152 1160
8f8b1171
CW
1161 stream->perf->ops.oa_disable(stream);
1162 stream->perf->ops.oa_enable(stream);
d7965152 1163
8f8b1171 1164 oastatus1 = intel_uncore_read(uncore, GEN7_OASTATUS1);
d7965152
RB
1165 }
1166
1167 if (unlikely(oastatus1 & GEN7_OASTATUS1_REPORT_LOST)) {
1168 ret = append_oa_status(stream, buf, count, offset,
1169 DRM_I915_PERF_RECORD_OA_REPORT_LOST);
1170 if (ret)
1171 return ret;
8f8b1171 1172 stream->perf->gen7_latched_oastatus1 |=
d7965152
RB
1173 GEN7_OASTATUS1_REPORT_LOST;
1174 }
1175
3bb335c1 1176 return gen7_append_oa_reports(stream, buf, count, offset);
d7965152
RB
1177}
1178
16d98b31
RB
1179/**
1180 * i915_oa_wait_unlocked - handles blocking IO until OA data available
1181 * @stream: An i915-perf stream opened for OA metrics
1182 *
1183 * Called when userspace tries to read() from a blocking stream FD opened
1184 * for OA metrics. It waits until the hrtimer callback finds a non-empty
1185 * OA buffer and wakes us.
1186 *
1187 * Note: it's acceptable to have this return with some false positives
1188 * since any subsequent read handling will return -EAGAIN if there isn't
1189 * really data ready for userspace yet.
1190 *
1191 * Returns: zero on success or a negative error code
1192 */
d7965152
RB
1193static int i915_oa_wait_unlocked(struct i915_perf_stream *stream)
1194{
d7965152 1195 /* We would wait indefinitely if periodic sampling is not enabled */
a37f08a8 1196 if (!stream->periodic)
d7965152
RB
1197 return -EIO;
1198
a37f08a8
UNR
1199 return wait_event_interruptible(stream->poll_wq,
1200 oa_buffer_check_unlocked(stream));
d7965152
RB
1201}
1202
16d98b31
RB
1203/**
1204 * i915_oa_poll_wait - call poll_wait() for an OA stream poll()
1205 * @stream: An i915-perf stream opened for OA metrics
1206 * @file: An i915 perf stream file
1207 * @wait: poll() state table
1208 *
1209 * For handling userspace polling on an i915 perf stream opened for OA metrics,
1210 * this starts a poll_wait with the wait queue that our hrtimer callback wakes
1211 * when it sees data ready to read in the circular OA buffer.
1212 */
d7965152
RB
1213static void i915_oa_poll_wait(struct i915_perf_stream *stream,
1214 struct file *file,
1215 poll_table *wait)
1216{
a37f08a8 1217 poll_wait(file, &stream->poll_wq, wait);
d7965152
RB
1218}
1219
16d98b31
RB
1220/**
1221 * i915_oa_read - just calls through to &i915_oa_ops->read
1222 * @stream: An i915-perf stream opened for OA metrics
1223 * @buf: destination buffer given by userspace
1224 * @count: the number of bytes userspace wants to read
1225 * @offset: (inout): the current position for writing into @buf
1226 *
1227 * Updates @offset according to the number of bytes successfully copied into
1228 * the userspace buffer.
1229 *
1230 * Returns: zero on success or a negative error code
1231 */
d7965152
RB
1232static int i915_oa_read(struct i915_perf_stream *stream,
1233 char __user *buf,
1234 size_t count,
1235 size_t *offset)
1236{
8f8b1171 1237 return stream->perf->ops.read(stream, buf, count, offset);
d7965152
RB
1238}
1239
a37f08a8 1240static struct intel_context *oa_pin_context(struct i915_perf_stream *stream)
61d5676b 1241{
5e2a0419 1242 struct i915_gem_engines_iter it;
a37f08a8 1243 struct i915_gem_context *ctx = stream->ctx;
61d5676b 1244 struct intel_context *ce;
fa9f6681 1245 int err;
61d5676b 1246
5e2a0419 1247 for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
9a61363a 1248 if (ce->engine != stream->engine) /* first match! */
5e2a0419
CW
1249 continue;
1250
1251 /*
1252 * As the ID is the gtt offset of the context's vma we
1253 * pin the vma to ensure the ID remains fixed.
1254 */
1255 err = intel_context_pin(ce);
1256 if (err == 0) {
a37f08a8 1257 stream->pinned_ctx = ce;
5e2a0419
CW
1258 break;
1259 }
fa9f6681 1260 }
5e2a0419 1261 i915_gem_context_unlock_engines(ctx);
61d5676b 1262
a37f08a8 1263 return stream->pinned_ctx;
61d5676b
LL
1264}
1265
16d98b31
RB
1266/**
1267 * oa_get_render_ctx_id - determine and hold ctx hw id
1268 * @stream: An i915-perf stream opened for OA metrics
1269 *
1270 * Determine the render context hw id, and ensure it remains fixed for the
d7965152
RB
1271 * lifetime of the stream. This ensures that we don't have to worry about
1272 * updating the context ID in OACONTROL on the fly.
16d98b31
RB
1273 *
1274 * Returns: zero on success or a negative error code
d7965152
RB
1275 */
1276static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
1277{
61d5676b 1278 struct intel_context *ce;
d7965152 1279
a37f08a8 1280 ce = oa_pin_context(stream);
61d5676b
LL
1281 if (IS_ERR(ce))
1282 return PTR_ERR(ce);
19f81df2 1283
8f8b1171 1284 switch (INTEL_GEN(ce->engine->i915)) {
61d5676b 1285 case 7: {
19f81df2 1286 /*
61d5676b
LL
1287 * On Haswell we don't do any post processing of the reports
1288 * and don't need to use the mask.
19f81df2 1289 */
a37f08a8
UNR
1290 stream->specific_ctx_id = i915_ggtt_offset(ce->state);
1291 stream->specific_ctx_id_mask = 0;
61d5676b
LL
1292 break;
1293 }
d7965152 1294
61d5676b
LL
1295 case 8:
1296 case 9:
1297 case 10:
19c17b76
MW
1298 if (intel_engine_in_execlists_submission_mode(ce->engine)) {
1299 stream->specific_ctx_id_mask =
1300 (1U << GEN8_CTX_ID_WIDTH) - 1;
1301 stream->specific_ctx_id = stream->specific_ctx_id_mask;
1302 } else {
61d5676b
LL
1303 /*
1304 * When using GuC, the context descriptor we write in
1305 * i915 is read by GuC and rewritten before it's
1306 * actually written into the hardware. The LRCA is
1307 * what is put into the context id field of the
1308 * context descriptor by GuC. Because it's aligned to
1309 * a page, the lower 12bits are always at 0 and
1310 * dropped by GuC. They won't be part of the context
1311 * ID in the OA reports, so squash those lower bits.
1312 */
53b2622e 1313 stream->specific_ctx_id = ce->lrc.lrca >> 12;
19f81df2 1314
61d5676b
LL
1315 /*
1316 * GuC uses the top bit to signal proxy submission, so
1317 * ignore that bit.
1318 */
a37f08a8 1319 stream->specific_ctx_id_mask =
61d5676b 1320 (1U << (GEN8_CTX_ID_WIDTH - 1)) - 1;
61d5676b
LL
1321 }
1322 break;
1323
45e9c829
MT
1324 case 11:
1325 case 12: {
a37f08a8 1326 stream->specific_ctx_id_mask =
2935ed53 1327 ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32);
6f280b13
UNR
1328 /*
1329 * Pick an unused context id
1bc6a601 1330 * 0 - BITS_PER_LONG are used by other contexts
6f280b13
UNR
1331 * GEN12_MAX_CONTEXT_HW_ID (0x7ff) is used by idle context
1332 */
1333 stream->specific_ctx_id = (GEN12_MAX_CONTEXT_HW_ID - 1) << (GEN11_SW_CTX_ID_SHIFT - 32);
61d5676b
LL
1334 break;
1335 }
1336
1337 default:
8f8b1171 1338 MISSING_CASE(INTEL_GEN(ce->engine->i915));
19f81df2 1339 }
d7965152 1340
6f280b13 1341 ce->tag = stream->specific_ctx_id;
2935ed53 1342
0bf85735
WK
1343 drm_dbg(&stream->perf->i915->drm,
1344 "filtering on ctx_id=0x%x ctx_id_mask=0x%x\n",
1345 stream->specific_ctx_id,
1346 stream->specific_ctx_id_mask);
61d5676b 1347
266a240b 1348 return 0;
d7965152
RB
1349}
1350
16d98b31
RB
1351/**
1352 * oa_put_render_ctx_id - counterpart to oa_get_render_ctx_id releases hold
1353 * @stream: An i915-perf stream opened for OA metrics
1354 *
1355 * In case anything needed doing to ensure the context HW ID would remain valid
1356 * for the lifetime of the stream, then that can be undone here.
1357 */
d7965152
RB
1358static void oa_put_render_ctx_id(struct i915_perf_stream *stream)
1359{
1fc44d9b 1360 struct intel_context *ce;
d7965152 1361
a37f08a8 1362 ce = fetch_and_zero(&stream->pinned_ctx);
2935ed53
CW
1363 if (ce) {
1364 ce->tag = 0; /* recomputed on next submission after parking */
1fc44d9b 1365 intel_context_unpin(ce);
2935ed53
CW
1366 }
1367
1368 stream->specific_ctx_id = INVALID_CTX_ID;
1369 stream->specific_ctx_id_mask = 0;
d7965152
RB
1370}
1371
1372static void
a37f08a8 1373free_oa_buffer(struct i915_perf_stream *stream)
d7965152 1374{
a37f08a8 1375 i915_vma_unpin_and_release(&stream->oa_buffer.vma,
6a2f59e4 1376 I915_VMA_RELEASE_MAP);
d7965152 1377
a37f08a8 1378 stream->oa_buffer.vaddr = NULL;
d7965152
RB
1379}
1380
6a45008a
LL
1381static void
1382free_oa_configs(struct i915_perf_stream *stream)
1383{
1384 struct i915_oa_config_bo *oa_bo, *tmp;
1385
1386 i915_oa_config_put(stream->oa_config);
1387 llist_for_each_entry_safe(oa_bo, tmp, stream->oa_config_bos.first, node)
1388 free_oa_config_bo(oa_bo);
1389}
1390
daed3e44
LL
1391static void
1392free_noa_wait(struct i915_perf_stream *stream)
1393{
1394 i915_vma_unpin_and_release(&stream->noa_wait, 0);
1395}
1396
d7965152
RB
1397static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
1398{
8f8b1171 1399 struct i915_perf *perf = stream->perf;
d7965152 1400
8f8b1171 1401 BUG_ON(stream != perf->exclusive_stream);
d7965152 1402
19f81df2 1403 /*
f89823c2
LL
1404 * Unset exclusive_stream first, it will be checked while disabling
1405 * the metric set on gen8+.
a5af081d
CW
1406 *
1407 * See i915_oa_init_reg_state() and lrc_configure_all_contexts()
19f81df2 1408 */
a5af081d 1409 WRITE_ONCE(perf->exclusive_stream, NULL);
8f8b1171 1410 perf->ops.disable_metric_set(stream);
d7965152 1411
a37f08a8 1412 free_oa_buffer(stream);
d7965152 1413
52111c46 1414 intel_uncore_forcewake_put(stream->uncore, FORCEWAKE_ALL);
a5efcde6 1415 intel_engine_pm_put(stream->engine);
d7965152
RB
1416
1417 if (stream->ctx)
1418 oa_put_render_ctx_id(stream);
1419
6a45008a 1420 free_oa_configs(stream);
daed3e44 1421 free_noa_wait(stream);
f89823c2 1422
8f8b1171 1423 if (perf->spurious_report_rs.missed) {
712122ea 1424 DRM_NOTE("%d spurious OA report notices suppressed due to ratelimiting\n",
8f8b1171 1425 perf->spurious_report_rs.missed);
712122ea 1426 }
d7965152
RB
1427}
1428
a37f08a8 1429static void gen7_init_oa_buffer(struct i915_perf_stream *stream)
d7965152 1430{
52111c46 1431 struct intel_uncore *uncore = stream->uncore;
a37f08a8 1432 u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
0dd860cf
RB
1433 unsigned long flags;
1434
a37f08a8 1435 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
d7965152
RB
1436
1437 /* Pre-DevBDW: OABUFFER must be set with counters off,
1438 * before OASTATUS1, but after OASTATUS2
1439 */
8f8b1171
CW
1440 intel_uncore_write(uncore, GEN7_OASTATUS2, /* head */
1441 gtt_offset | GEN7_OASTATUS2_MEM_SELECT_GGTT);
a37f08a8 1442 stream->oa_buffer.head = gtt_offset;
f279020a 1443
8f8b1171 1444 intel_uncore_write(uncore, GEN7_OABUFFER, gtt_offset);
f279020a 1445
8f8b1171
CW
1446 intel_uncore_write(uncore, GEN7_OASTATUS1, /* tail */
1447 gtt_offset | OABUFFER_SIZE_16M);
d7965152 1448
0dd860cf 1449 /* Mark that we need updated tail pointers to read from... */
a37f08a8
UNR
1450 stream->oa_buffer.tails[0].offset = INVALID_TAIL_PTR;
1451 stream->oa_buffer.tails[1].offset = INVALID_TAIL_PTR;
0dd860cf 1452
a37f08a8 1453 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
0dd860cf 1454
d7965152
RB
1455 /* On Haswell we have to track which OASTATUS1 flags we've
1456 * already seen since they can't be cleared while periodic
1457 * sampling is enabled.
1458 */
8f8b1171 1459 stream->perf->gen7_latched_oastatus1 = 0;
d7965152
RB
1460
1461 /* NB: although the OA buffer will initially be allocated
1462 * zeroed via shmfs (and so this memset is redundant when
1463 * first allocating), we may re-init the OA buffer, either
1464 * when re-enabling a stream or in error/reset paths.
1465 *
1466 * The reason we clear the buffer for each re-init is for the
1467 * sanity check in gen7_append_oa_reports() that looks at the
1468 * report-id field to make sure it's non-zero which relies on
1469 * the assumption that new reports are being written to zeroed
1470 * memory...
1471 */
a37f08a8 1472 memset(stream->oa_buffer.vaddr, 0, OA_BUFFER_SIZE);
d7965152 1473
a37f08a8 1474 stream->pollin = false;
d7965152
RB
1475}
1476
a37f08a8 1477static void gen8_init_oa_buffer(struct i915_perf_stream *stream)
19f81df2 1478{
52111c46 1479 struct intel_uncore *uncore = stream->uncore;
a37f08a8 1480 u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
19f81df2
RB
1481 unsigned long flags;
1482
a37f08a8 1483 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
19f81df2 1484
8f8b1171
CW
1485 intel_uncore_write(uncore, GEN8_OASTATUS, 0);
1486 intel_uncore_write(uncore, GEN8_OAHEADPTR, gtt_offset);
a37f08a8 1487 stream->oa_buffer.head = gtt_offset;
19f81df2 1488
8f8b1171 1489 intel_uncore_write(uncore, GEN8_OABUFFER_UDW, 0);
19f81df2
RB
1490
1491 /*
1492 * PRM says:
1493 *
1494 * "This MMIO must be set before the OATAILPTR
1495 * register and after the OAHEADPTR register. This is
1496 * to enable proper functionality of the overflow
1497 * bit."
1498 */
8f8b1171 1499 intel_uncore_write(uncore, GEN8_OABUFFER, gtt_offset |
fe841686 1500 OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT);
8f8b1171 1501 intel_uncore_write(uncore, GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK);
19f81df2
RB
1502
1503 /* Mark that we need updated tail pointers to read from... */
a37f08a8
UNR
1504 stream->oa_buffer.tails[0].offset = INVALID_TAIL_PTR;
1505 stream->oa_buffer.tails[1].offset = INVALID_TAIL_PTR;
19f81df2
RB
1506
1507 /*
1508 * Reset state used to recognise context switches, affecting which
1509 * reports we will forward to userspace while filtering for a single
1510 * context.
1511 */
a37f08a8 1512 stream->oa_buffer.last_ctx_id = INVALID_CTX_ID;
19f81df2 1513
a37f08a8 1514 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
19f81df2
RB
1515
1516 /*
1517 * NB: although the OA buffer will initially be allocated
1518 * zeroed via shmfs (and so this memset is redundant when
1519 * first allocating), we may re-init the OA buffer, either
1520 * when re-enabling a stream or in error/reset paths.
1521 *
1522 * The reason we clear the buffer for each re-init is for the
1523 * sanity check in gen8_append_oa_reports() that looks at the
1524 * reason field to make sure it's non-zero which relies on
1525 * the assumption that new reports are being written to zeroed
1526 * memory...
1527 */
a37f08a8 1528 memset(stream->oa_buffer.vaddr, 0, OA_BUFFER_SIZE);
19f81df2 1529
a37f08a8 1530 stream->pollin = false;
19f81df2
RB
1531}
1532
00a7f0d7
LL
1533static void gen12_init_oa_buffer(struct i915_perf_stream *stream)
1534{
1535 struct intel_uncore *uncore = stream->uncore;
1536 u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
1537 unsigned long flags;
1538
1539 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
1540
1541 intel_uncore_write(uncore, GEN12_OAG_OASTATUS, 0);
1542 intel_uncore_write(uncore, GEN12_OAG_OAHEADPTR,
1543 gtt_offset & GEN12_OAG_OAHEADPTR_MASK);
1544 stream->oa_buffer.head = gtt_offset;
1545
1546 /*
1547 * PRM says:
1548 *
1549 * "This MMIO must be set before the OATAILPTR
1550 * register and after the OAHEADPTR register. This is
1551 * to enable proper functionality of the overflow
1552 * bit."
1553 */
1554 intel_uncore_write(uncore, GEN12_OAG_OABUFFER, gtt_offset |
1555 OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT);
1556 intel_uncore_write(uncore, GEN12_OAG_OATAILPTR,
1557 gtt_offset & GEN12_OAG_OATAILPTR_MASK);
1558
1559 /* Mark that we need updated tail pointers to read from... */
1560 stream->oa_buffer.tails[0].offset = INVALID_TAIL_PTR;
1561 stream->oa_buffer.tails[1].offset = INVALID_TAIL_PTR;
1562
1563 /*
1564 * Reset state used to recognise context switches, affecting which
1565 * reports we will forward to userspace while filtering for a single
1566 * context.
1567 */
1568 stream->oa_buffer.last_ctx_id = INVALID_CTX_ID;
1569
1570 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
1571
1572 /*
1573 * NB: although the OA buffer will initially be allocated
1574 * zeroed via shmfs (and so this memset is redundant when
1575 * first allocating), we may re-init the OA buffer, either
1576 * when re-enabling a stream or in error/reset paths.
1577 *
1578 * The reason we clear the buffer for each re-init is for the
1579 * sanity check in gen8_append_oa_reports() that looks at the
1580 * reason field to make sure it's non-zero which relies on
1581 * the assumption that new reports are being written to zeroed
1582 * memory...
1583 */
1584 memset(stream->oa_buffer.vaddr, 0,
1585 stream->oa_buffer.vma->size);
1586
1587 stream->pollin = false;
1588}
1589
a37f08a8 1590static int alloc_oa_buffer(struct i915_perf_stream *stream)
d7965152 1591{
a9f236d1 1592 struct drm_i915_private *i915 = stream->perf->i915;
d7965152
RB
1593 struct drm_i915_gem_object *bo;
1594 struct i915_vma *vma;
1595 int ret;
1596
a9f236d1 1597 if (drm_WARN_ON(&i915->drm, stream->oa_buffer.vma))
d7965152
RB
1598 return -ENODEV;
1599
fe841686
JL
1600 BUILD_BUG_ON_NOT_POWER_OF_2(OA_BUFFER_SIZE);
1601 BUILD_BUG_ON(OA_BUFFER_SIZE < SZ_128K || OA_BUFFER_SIZE > SZ_16M);
1602
8f8b1171 1603 bo = i915_gem_object_create_shmem(stream->perf->i915, OA_BUFFER_SIZE);
d7965152 1604 if (IS_ERR(bo)) {
00376ccf 1605 drm_err(&i915->drm, "Failed to allocate OA buffer\n");
2850748e 1606 return PTR_ERR(bo);
d7965152
RB
1607 }
1608
a679f58d 1609 i915_gem_object_set_cache_coherency(bo, I915_CACHE_LLC);
d7965152
RB
1610
1611 /* PreHSW required 512K alignment, HSW requires 16M */
1612 vma = i915_gem_object_ggtt_pin(bo, NULL, 0, SZ_16M, 0);
1613 if (IS_ERR(vma)) {
1614 ret = PTR_ERR(vma);
1615 goto err_unref;
1616 }
a37f08a8 1617 stream->oa_buffer.vma = vma;
d7965152 1618
a37f08a8 1619 stream->oa_buffer.vaddr =
d7965152 1620 i915_gem_object_pin_map(bo, I915_MAP_WB);
a37f08a8
UNR
1621 if (IS_ERR(stream->oa_buffer.vaddr)) {
1622 ret = PTR_ERR(stream->oa_buffer.vaddr);
d7965152
RB
1623 goto err_unpin;
1624 }
1625
2850748e 1626 return 0;
d7965152
RB
1627
1628err_unpin:
1629 __i915_vma_unpin(vma);
1630
1631err_unref:
1632 i915_gem_object_put(bo);
1633
a37f08a8
UNR
1634 stream->oa_buffer.vaddr = NULL;
1635 stream->oa_buffer.vma = NULL;
d7965152 1636
d7965152
RB
1637 return ret;
1638}
1639
daed3e44
LL
1640static u32 *save_restore_register(struct i915_perf_stream *stream, u32 *cs,
1641 bool save, i915_reg_t reg, u32 offset,
1642 u32 dword_count)
1643{
1644 u32 cmd;
1645 u32 d;
1646
1647 cmd = save ? MI_STORE_REGISTER_MEM : MI_LOAD_REGISTER_MEM;
1648 if (INTEL_GEN(stream->perf->i915) >= 8)
1649 cmd++;
1650
1651 for (d = 0; d < dword_count; d++) {
1652 *cs++ = cmd;
1653 *cs++ = i915_mmio_reg_offset(reg) + 4 * d;
1654 *cs++ = intel_gt_scratch_offset(stream->engine->gt,
1655 offset) + 4 * d;
1656 *cs++ = 0;
1657 }
1658
1659 return cs;
1660}
1661
1662static int alloc_noa_wait(struct i915_perf_stream *stream)
1663{
1664 struct drm_i915_private *i915 = stream->perf->i915;
1665 struct drm_i915_gem_object *bo;
1666 struct i915_vma *vma;
1667 const u64 delay_ticks = 0xffffffffffffffff -
1668 DIV64_U64_ROUND_UP(
1669 atomic64_read(&stream->perf->noa_programming_delay) *
1670 RUNTIME_INFO(i915)->cs_timestamp_frequency_khz,
1671 1000000ull);
1672 const u32 base = stream->engine->mmio_base;
1673#define CS_GPR(x) GEN8_RING_CS_GPR(base, x)
1674 u32 *batch, *ts0, *cs, *jump;
1675 int ret, i;
1676 enum {
1677 START_TS,
1678 NOW_TS,
1679 DELTA_TS,
1680 JUMP_PREDICATE,
1681 DELTA_TARGET,
1682 N_CS_GPR
1683 };
1684
1685 bo = i915_gem_object_create_internal(i915, 4096);
1686 if (IS_ERR(bo)) {
00376ccf
WK
1687 drm_err(&i915->drm,
1688 "Failed to allocate NOA wait batchbuffer\n");
daed3e44
LL
1689 return PTR_ERR(bo);
1690 }
1691
1692 /*
1693 * We pin in GGTT because we jump into this buffer now because
1694 * multiple OA config BOs will have a jump to this address and it
1695 * needs to be fixed during the lifetime of the i915/perf stream.
1696 */
1697 vma = i915_gem_object_ggtt_pin(bo, NULL, 0, 0, PIN_HIGH);
1698 if (IS_ERR(vma)) {
1699 ret = PTR_ERR(vma);
1700 goto err_unref;
1701 }
1702
1703 batch = cs = i915_gem_object_pin_map(bo, I915_MAP_WB);
1704 if (IS_ERR(batch)) {
1705 ret = PTR_ERR(batch);
1706 goto err_unpin;
1707 }
1708
1709 /* Save registers. */
1710 for (i = 0; i < N_CS_GPR; i++)
1711 cs = save_restore_register(
1712 stream, cs, true /* save */, CS_GPR(i),
1713 INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR + 8 * i, 2);
1714 cs = save_restore_register(
1715 stream, cs, true /* save */, MI_PREDICATE_RESULT_1,
1716 INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1, 1);
1717
1718 /* First timestamp snapshot location. */
1719 ts0 = cs;
1720
1721 /*
1722 * Initial snapshot of the timestamp register to implement the wait.
1723 * We work with 32b values, so clear out the top 32b bits of the
1724 * register because the ALU works 64bits.
1725 */
1726 *cs++ = MI_LOAD_REGISTER_IMM(1);
1727 *cs++ = i915_mmio_reg_offset(CS_GPR(START_TS)) + 4;
1728 *cs++ = 0;
1729 *cs++ = MI_LOAD_REGISTER_REG | (3 - 2);
1730 *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(base));
1731 *cs++ = i915_mmio_reg_offset(CS_GPR(START_TS));
1732
1733 /*
1734 * This is the location we're going to jump back into until the
1735 * required amount of time has passed.
1736 */
1737 jump = cs;
1738
1739 /*
1740 * Take another snapshot of the timestamp register. Take care to clear
1741 * up the top 32bits of CS_GPR(1) as we're using it for other
1742 * operations below.
1743 */
1744 *cs++ = MI_LOAD_REGISTER_IMM(1);
1745 *cs++ = i915_mmio_reg_offset(CS_GPR(NOW_TS)) + 4;
1746 *cs++ = 0;
1747 *cs++ = MI_LOAD_REGISTER_REG | (3 - 2);
1748 *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(base));
1749 *cs++ = i915_mmio_reg_offset(CS_GPR(NOW_TS));
1750
1751 /*
1752 * Do a diff between the 2 timestamps and store the result back into
1753 * CS_GPR(1).
1754 */
1755 *cs++ = MI_MATH(5);
1756 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(NOW_TS));
1757 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(START_TS));
1758 *cs++ = MI_MATH_SUB;
1759 *cs++ = MI_MATH_STORE(MI_MATH_REG(DELTA_TS), MI_MATH_REG_ACCU);
1760 *cs++ = MI_MATH_STORE(MI_MATH_REG(JUMP_PREDICATE), MI_MATH_REG_CF);
1761
1762 /*
1763 * Transfer the carry flag (set to 1 if ts1 < ts0, meaning the
1764 * timestamp have rolled over the 32bits) into the predicate register
1765 * to be used for the predicated jump.
1766 */
1767 *cs++ = MI_LOAD_REGISTER_REG | (3 - 2);
1768 *cs++ = i915_mmio_reg_offset(CS_GPR(JUMP_PREDICATE));
1769 *cs++ = i915_mmio_reg_offset(MI_PREDICATE_RESULT_1);
1770
1771 /* Restart from the beginning if we had timestamps roll over. */
1772 *cs++ = (INTEL_GEN(i915) < 8 ?
1773 MI_BATCH_BUFFER_START :
1774 MI_BATCH_BUFFER_START_GEN8) |
1775 MI_BATCH_PREDICATE;
1776 *cs++ = i915_ggtt_offset(vma) + (ts0 - batch) * 4;
1777 *cs++ = 0;
1778
1779 /*
1780 * Now add the diff between to previous timestamps and add it to :
1781 * (((1 * << 64) - 1) - delay_ns)
1782 *
1783 * When the Carry Flag contains 1 this means the elapsed time is
1784 * longer than the expected delay, and we can exit the wait loop.
1785 */
1786 *cs++ = MI_LOAD_REGISTER_IMM(2);
1787 *cs++ = i915_mmio_reg_offset(CS_GPR(DELTA_TARGET));
1788 *cs++ = lower_32_bits(delay_ticks);
1789 *cs++ = i915_mmio_reg_offset(CS_GPR(DELTA_TARGET)) + 4;
1790 *cs++ = upper_32_bits(delay_ticks);
1791
1792 *cs++ = MI_MATH(4);
1793 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(DELTA_TS));
1794 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(DELTA_TARGET));
1795 *cs++ = MI_MATH_ADD;
1796 *cs++ = MI_MATH_STOREINV(MI_MATH_REG(JUMP_PREDICATE), MI_MATH_REG_CF);
1797
dd590f68
LL
1798 *cs++ = MI_ARB_CHECK;
1799
daed3e44
LL
1800 /*
1801 * Transfer the result into the predicate register to be used for the
1802 * predicated jump.
1803 */
1804 *cs++ = MI_LOAD_REGISTER_REG | (3 - 2);
1805 *cs++ = i915_mmio_reg_offset(CS_GPR(JUMP_PREDICATE));
1806 *cs++ = i915_mmio_reg_offset(MI_PREDICATE_RESULT_1);
1807
1808 /* Predicate the jump. */
1809 *cs++ = (INTEL_GEN(i915) < 8 ?
1810 MI_BATCH_BUFFER_START :
1811 MI_BATCH_BUFFER_START_GEN8) |
1812 MI_BATCH_PREDICATE;
1813 *cs++ = i915_ggtt_offset(vma) + (jump - batch) * 4;
1814 *cs++ = 0;
1815
1816 /* Restore registers. */
1817 for (i = 0; i < N_CS_GPR; i++)
1818 cs = save_restore_register(
1819 stream, cs, false /* restore */, CS_GPR(i),
1820 INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR + 8 * i, 2);
1821 cs = save_restore_register(
1822 stream, cs, false /* restore */, MI_PREDICATE_RESULT_1,
1823 INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1, 1);
1824
1825 /* And return to the ring. */
1826 *cs++ = MI_BATCH_BUFFER_END;
1827
1828 GEM_BUG_ON(cs - batch > PAGE_SIZE / sizeof(*batch));
1829
1830 i915_gem_object_flush_map(bo);
1831 i915_gem_object_unpin_map(bo);
1832
1833 stream->noa_wait = vma;
1834 return 0;
1835
1836err_unpin:
15d0ace1 1837 i915_vma_unpin_and_release(&vma, 0);
daed3e44
LL
1838err_unref:
1839 i915_gem_object_put(bo);
1840 return ret;
1841}
1842
15d0ace1
LL
1843static u32 *write_cs_mi_lri(u32 *cs,
1844 const struct i915_oa_reg *reg_data,
1845 u32 n_regs)
d7965152 1846{
701f8231 1847 u32 i;
d7965152
RB
1848
1849 for (i = 0; i < n_regs; i++) {
15d0ace1
LL
1850 if ((i % MI_LOAD_REGISTER_IMM_MAX_REGS) == 0) {
1851 u32 n_lri = min_t(u32,
1852 n_regs - i,
1853 MI_LOAD_REGISTER_IMM_MAX_REGS);
d7965152 1854
15d0ace1
LL
1855 *cs++ = MI_LOAD_REGISTER_IMM(n_lri);
1856 }
1857 *cs++ = i915_mmio_reg_offset(reg_data[i].addr);
1858 *cs++ = reg_data[i].value;
d7965152 1859 }
15d0ace1
LL
1860
1861 return cs;
d7965152
RB
1862}
1863
15d0ace1 1864static int num_lri_dwords(int num_regs)
d7965152 1865{
15d0ace1
LL
1866 int count = 0;
1867
1868 if (num_regs > 0) {
1869 count += DIV_ROUND_UP(num_regs, MI_LOAD_REGISTER_IMM_MAX_REGS);
1870 count += num_regs * 2;
1871 }
1872
1873 return count;
1874}
1875
1876static struct i915_oa_config_bo *
1877alloc_oa_config_buffer(struct i915_perf_stream *stream,
1878 struct i915_oa_config *oa_config)
1879{
1880 struct drm_i915_gem_object *obj;
1881 struct i915_oa_config_bo *oa_bo;
1882 size_t config_length = 0;
1883 u32 *cs;
1884 int err;
1885
1886 oa_bo = kzalloc(sizeof(*oa_bo), GFP_KERNEL);
1887 if (!oa_bo)
1888 return ERR_PTR(-ENOMEM);
1889
1890 config_length += num_lri_dwords(oa_config->mux_regs_len);
1891 config_length += num_lri_dwords(oa_config->b_counter_regs_len);
1892 config_length += num_lri_dwords(oa_config->flex_regs_len);
93937659 1893 config_length += 3; /* MI_BATCH_BUFFER_START */
15d0ace1
LL
1894 config_length = ALIGN(sizeof(u32) * config_length, I915_GTT_PAGE_SIZE);
1895
1896 obj = i915_gem_object_create_shmem(stream->perf->i915, config_length);
1897 if (IS_ERR(obj)) {
1898 err = PTR_ERR(obj);
1899 goto err_free;
1900 }
1901
1902 cs = i915_gem_object_pin_map(obj, I915_MAP_WB);
1903 if (IS_ERR(cs)) {
1904 err = PTR_ERR(cs);
1905 goto err_oa_bo;
1906 }
1907
1908 cs = write_cs_mi_lri(cs,
1909 oa_config->mux_regs,
1910 oa_config->mux_regs_len);
1911 cs = write_cs_mi_lri(cs,
1912 oa_config->b_counter_regs,
1913 oa_config->b_counter_regs_len);
1914 cs = write_cs_mi_lri(cs,
1915 oa_config->flex_regs,
1916 oa_config->flex_regs_len);
1917
93937659
LL
1918 /* Jump into the active wait. */
1919 *cs++ = (INTEL_GEN(stream->perf->i915) < 8 ?
1920 MI_BATCH_BUFFER_START :
1921 MI_BATCH_BUFFER_START_GEN8);
1922 *cs++ = i915_ggtt_offset(stream->noa_wait);
1923 *cs++ = 0;
15d0ace1
LL
1924
1925 i915_gem_object_flush_map(obj);
1926 i915_gem_object_unpin_map(obj);
1927
1928 oa_bo->vma = i915_vma_instance(obj,
1929 &stream->engine->gt->ggtt->vm,
1930 NULL);
1931 if (IS_ERR(oa_bo->vma)) {
1932 err = PTR_ERR(oa_bo->vma);
1933 goto err_oa_bo;
1934 }
1935
1936 oa_bo->oa_config = i915_oa_config_get(oa_config);
1937 llist_add(&oa_bo->node, &stream->oa_config_bos);
1938
1939 return oa_bo;
1940
1941err_oa_bo:
1942 i915_gem_object_put(obj);
1943err_free:
1944 kfree(oa_bo);
1945 return ERR_PTR(err);
1946}
1947
1948static struct i915_vma *
1949get_oa_vma(struct i915_perf_stream *stream, struct i915_oa_config *oa_config)
1950{
1951 struct i915_oa_config_bo *oa_bo;
1952
14bfcd3e 1953 /*
15d0ace1
LL
1954 * Look for the buffer in the already allocated BOs attached
1955 * to the stream.
d7965152 1956 */
15d0ace1
LL
1957 llist_for_each_entry(oa_bo, stream->oa_config_bos.first, node) {
1958 if (oa_bo->oa_config == oa_config &&
1959 memcmp(oa_bo->oa_config->uuid,
1960 oa_config->uuid,
1961 sizeof(oa_config->uuid)) == 0)
1962 goto out;
1963 }
1964
1965 oa_bo = alloc_oa_config_buffer(stream, oa_config);
1966 if (IS_ERR(oa_bo))
1967 return ERR_CAST(oa_bo);
1968
1969out:
1970 return i915_vma_get(oa_bo->vma);
1971}
1972
4b4e973d
CW
1973static struct i915_request *
1974emit_oa_config(struct i915_perf_stream *stream,
1975 struct i915_oa_config *oa_config,
1976 struct intel_context *ce)
15d0ace1
LL
1977{
1978 struct i915_request *rq;
1979 struct i915_vma *vma;
1980 int err;
1981
8814c6d0 1982 vma = get_oa_vma(stream, oa_config);
15d0ace1 1983 if (IS_ERR(vma))
4b4e973d 1984 return ERR_CAST(vma);
15d0ace1
LL
1985
1986 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
1987 if (err)
1988 goto err_vma_put;
1989
de5825be 1990 intel_engine_pm_get(ce->engine);
15d0ace1 1991 rq = i915_request_create(ce);
de5825be 1992 intel_engine_pm_put(ce->engine);
15d0ace1
LL
1993 if (IS_ERR(rq)) {
1994 err = PTR_ERR(rq);
1995 goto err_vma_unpin;
1996 }
1997
1998 i915_vma_lock(vma);
1999 err = i915_request_await_object(rq, vma->obj, 0);
2000 if (!err)
2001 err = i915_vma_move_to_active(vma, rq, 0);
2002 i915_vma_unlock(vma);
2003 if (err)
2004 goto err_add_request;
2005
2006 err = rq->engine->emit_bb_start(rq,
2007 vma->node.start, 0,
2008 I915_DISPATCH_SECURE);
4b4e973d
CW
2009 if (err)
2010 goto err_add_request;
2011
2012 i915_request_get(rq);
15d0ace1
LL
2013err_add_request:
2014 i915_request_add(rq);
2015err_vma_unpin:
2016 i915_vma_unpin(vma);
2017err_vma_put:
2018 i915_vma_put(vma);
4b4e973d 2019 return err ? ERR_PTR(err) : rq;
14bfcd3e
LL
2020}
2021
5f5c382e
CW
2022static struct intel_context *oa_context(struct i915_perf_stream *stream)
2023{
2024 return stream->pinned_ctx ?: stream->engine->kernel_context;
2025}
2026
4b4e973d
CW
2027static struct i915_request *
2028hsw_enable_metric_set(struct i915_perf_stream *stream)
14bfcd3e 2029{
52111c46 2030 struct intel_uncore *uncore = stream->uncore;
14bfcd3e
LL
2031
2032 /*
2033 * PRM:
2034 *
2035 * OA unit is using “crclk” for its functionality. When trunk
2036 * level clock gating takes place, OA clock would be gated,
2037 * unable to count the events from non-render clock domain.
2038 * Render clock gating must be disabled when OA is enabled to
2039 * count the events from non-render domain. Unit level clock
2040 * gating for RCS should also be disabled.
2041 */
8f8b1171
CW
2042 intel_uncore_rmw(uncore, GEN7_MISCCPCTL,
2043 GEN7_DOP_CLOCK_GATE_ENABLE, 0);
2044 intel_uncore_rmw(uncore, GEN6_UCGCTL1,
2045 0, GEN6_CSUNIT_CLOCK_GATE_DISABLE);
14bfcd3e 2046
8814c6d0 2047 return emit_oa_config(stream, stream->oa_config, oa_context(stream));
d7965152
RB
2048}
2049
a37f08a8 2050static void hsw_disable_metric_set(struct i915_perf_stream *stream)
d7965152 2051{
52111c46 2052 struct intel_uncore *uncore = stream->uncore;
a37f08a8 2053
8f8b1171
CW
2054 intel_uncore_rmw(uncore, GEN6_UCGCTL1,
2055 GEN6_CSUNIT_CLOCK_GATE_DISABLE, 0);
2056 intel_uncore_rmw(uncore, GEN7_MISCCPCTL,
2057 0, GEN7_DOP_CLOCK_GATE_ENABLE);
d7965152 2058
8f8b1171 2059 intel_uncore_rmw(uncore, GDT_CHICKEN_BITS, GT_NOA_ENABLE, 0);
d7965152
RB
2060}
2061
a9877da2
CW
2062static u32 oa_config_flex_reg(const struct i915_oa_config *oa_config,
2063 i915_reg_t reg)
2064{
2065 u32 mmio = i915_mmio_reg_offset(reg);
2066 int i;
2067
2068 /*
2069 * This arbitrary default will select the 'EU FPU0 Pipeline
2070 * Active' event. In the future it's anticipated that there
2071 * will be an explicit 'No Event' we can select, but not yet...
2072 */
2073 if (!oa_config)
2074 return 0;
2075
2076 for (i = 0; i < oa_config->flex_regs_len; i++) {
2077 if (i915_mmio_reg_offset(oa_config->flex_regs[i].addr) == mmio)
2078 return oa_config->flex_regs[i].value;
2079 }
2080
2081 return 0;
2082}
19f81df2
RB
2083/*
2084 * NB: It must always remain pointer safe to run this even if the OA unit
2085 * has been disabled.
2086 *
2087 * It's fine to put out-of-date values into these per-context registers
2088 * in the case that the OA unit has been disabled.
2089 */
b146e5ef 2090static void
7dc56af5
CW
2091gen8_update_reg_state_unlocked(const struct intel_context *ce,
2092 const struct i915_perf_stream *stream)
19f81df2 2093{
8f8b1171
CW
2094 u32 ctx_oactxctrl = stream->perf->ctx_oactxctrl_offset;
2095 u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
19f81df2 2096 /* The MMIO offsets for Flex EU registers aren't contiguous */
35ab4fd2
LL
2097 i915_reg_t flex_regs[] = {
2098 EU_PERF_CNTL0,
2099 EU_PERF_CNTL1,
2100 EU_PERF_CNTL2,
2101 EU_PERF_CNTL3,
2102 EU_PERF_CNTL4,
2103 EU_PERF_CNTL5,
2104 EU_PERF_CNTL6,
19f81df2 2105 };
7dc56af5 2106 u32 *reg_state = ce->lrc_reg_state;
19f81df2
RB
2107 int i;
2108
ccdeed49
UNR
2109 reg_state[ctx_oactxctrl + 1] =
2110 (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
2111 (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
2112 GEN8_OA_COUNTER_RESUME;
19f81df2 2113
ccdeed49 2114 for (i = 0; i < ARRAY_SIZE(flex_regs); i++)
7dc56af5
CW
2115 reg_state[ctx_flexeu0 + i * 2 + 1] =
2116 oa_config_flex_reg(stream->oa_config, flex_regs[i]);
ec431eae 2117
8f8b1171
CW
2118 reg_state[CTX_R_PWR_CLK_STATE] =
2119 intel_sseu_make_rpcs(ce->engine->i915, &ce->sseu);
19f81df2
RB
2120}
2121
a9877da2
CW
2122struct flex {
2123 i915_reg_t reg;
2124 u32 offset;
2125 u32 value;
2126};
2127
2128static int
2129gen8_store_flex(struct i915_request *rq,
2130 struct intel_context *ce,
2131 const struct flex *flex, unsigned int count)
2132{
2133 u32 offset;
2134 u32 *cs;
2135
2136 cs = intel_ring_begin(rq, 4 * count);
2137 if (IS_ERR(cs))
2138 return PTR_ERR(cs);
2139
2140 offset = i915_ggtt_offset(ce->state) + LRC_STATE_PN * PAGE_SIZE;
2141 do {
2142 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
7dc56af5 2143 *cs++ = offset + flex->offset * sizeof(u32);
a9877da2
CW
2144 *cs++ = 0;
2145 *cs++ = flex->value;
2146 } while (flex++, --count);
2147
2148 intel_ring_advance(rq, cs);
2149
2150 return 0;
2151}
2152
2153static int
2154gen8_load_flex(struct i915_request *rq,
2155 struct intel_context *ce,
2156 const struct flex *flex, unsigned int count)
2157{
2158 u32 *cs;
2159
2160 GEM_BUG_ON(!count || count > 63);
2161
2162 cs = intel_ring_begin(rq, 2 * count + 2);
2163 if (IS_ERR(cs))
2164 return PTR_ERR(cs);
2165
2166 *cs++ = MI_LOAD_REGISTER_IMM(count);
2167 do {
2168 *cs++ = i915_mmio_reg_offset(flex->reg);
2169 *cs++ = flex->value;
2170 } while (flex++, --count);
2171 *cs++ = MI_NOOP;
2172
2173 intel_ring_advance(rq, cs);
2174
2175 return 0;
2176}
2177
2178static int gen8_modify_context(struct intel_context *ce,
2179 const struct flex *flex, unsigned int count)
2180{
2181 struct i915_request *rq;
2182 int err;
2183
de5825be 2184 rq = intel_engine_create_kernel_request(ce->engine);
a9877da2
CW
2185 if (IS_ERR(rq))
2186 return PTR_ERR(rq);
2187
2188 /* Serialise with the remote context */
2189 err = intel_context_prepare_remote_request(ce, rq);
2190 if (err == 0)
2191 err = gen8_store_flex(rq, ce, flex, count);
2192
2193 i915_request_add(rq);
2194 return err;
2195}
2196
2197static int gen8_modify_self(struct intel_context *ce,
2198 const struct flex *flex, unsigned int count)
2199{
2200 struct i915_request *rq;
2201 int err;
2202
d236e2ac 2203 intel_engine_pm_get(ce->engine);
a9877da2 2204 rq = i915_request_create(ce);
d236e2ac 2205 intel_engine_pm_put(ce->engine);
a9877da2
CW
2206 if (IS_ERR(rq))
2207 return PTR_ERR(rq);
2208
2209 err = gen8_load_flex(rq, ce, flex, count);
2210
2211 i915_request_add(rq);
2212 return err;
2213}
2214
5cca5038
CW
2215static int gen8_configure_context(struct i915_gem_context *ctx,
2216 struct flex *flex, unsigned int count)
2217{
2218 struct i915_gem_engines_iter it;
2219 struct intel_context *ce;
2220 int err = 0;
2221
2222 for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
2223 GEM_BUG_ON(ce == ce->engine->kernel_context);
2224
2225 if (ce->engine->class != RENDER_CLASS)
2226 continue;
2227
feed5c7b
CW
2228 /* Otherwise OA settings will be set upon first use */
2229 if (!intel_context_pin_if_active(ce))
2230 continue;
5cca5038
CW
2231
2232 flex->value = intel_sseu_make_rpcs(ctx->i915, &ce->sseu);
feed5c7b 2233 err = gen8_modify_context(ce, flex, count);
5cca5038 2234
feed5c7b 2235 intel_context_unpin(ce);
5cca5038
CW
2236 if (err)
2237 break;
2238 }
2239 i915_gem_context_unlock_engines(ctx);
2240
2241 return err;
2242}
2243
ccdeed49 2244static int gen12_configure_oar_context(struct i915_perf_stream *stream, bool enable)
00a7f0d7 2245{
ccdeed49
UNR
2246 int err;
2247 struct intel_context *ce = stream->pinned_ctx;
2248 u32 format = stream->oa_buffer.format;
2249 struct flex regs_context[] = {
2250 {
2251 GEN8_OACTXCONTROL,
2252 stream->perf->ctx_oactxctrl_offset + 1,
2253 enable ? GEN8_OA_COUNTER_RESUME : 0,
2254 },
2255 };
2256 /* Offsets in regs_lri are not used since this configuration is only
2257 * applied using LRI. Initialize the correct offsets for posterity.
2258 */
2259#define GEN12_OAR_OACONTROL_OFFSET 0x5B0
2260 struct flex regs_lri[] = {
2261 {
2262 GEN12_OAR_OACONTROL,
2263 GEN12_OAR_OACONTROL_OFFSET + 1,
2264 (format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) |
2265 (enable ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0)
2266 },
2267 {
2268 RING_CONTEXT_CONTROL(ce->engine->mmio_base),
2269 CTX_CONTEXT_CONTROL,
2270 _MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE,
2271 enable ?
2272 GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE :
2273 0)
2274 },
2275 };
00a7f0d7 2276
ccdeed49
UNR
2277 /* Modify the context image of pinned context with regs_context*/
2278 err = intel_context_lock_pinned(ce);
2279 if (err)
2280 return err;
00a7f0d7 2281
ccdeed49
UNR
2282 err = gen8_modify_context(ce, regs_context, ARRAY_SIZE(regs_context));
2283 intel_context_unlock_pinned(ce);
2284 if (err)
2285 return err;
00a7f0d7 2286
ccdeed49
UNR
2287 /* Apply regs_lri using LRI with pinned context */
2288 return gen8_modify_self(ce, regs_lri, ARRAY_SIZE(regs_lri));
00a7f0d7
LL
2289}
2290
19f81df2
RB
2291/*
2292 * Manages updating the per-context aspects of the OA stream
2293 * configuration across all contexts.
2294 *
2295 * The awkward consideration here is that OACTXCONTROL controls the
2296 * exponent for periodic sampling which is primarily used for system
2297 * wide profiling where we'd like a consistent sampling period even in
2298 * the face of context switches.
2299 *
2300 * Our approach of updating the register state context (as opposed to
2301 * say using a workaround batch buffer) ensures that the hardware
2302 * won't automatically reload an out-of-date timer exponent even
2303 * transiently before a WA BB could be parsed.
2304 *
2305 * This function needs to:
2306 * - Ensure the currently running context's per-context OA state is
2307 * updated
2308 * - Ensure that all existing contexts will have the correct per-context
2309 * OA state if they are scheduled for use.
2310 * - Ensure any new contexts will be initialized with the correct
2311 * per-context OA state.
2312 *
2313 * Note: it's only the RCS/Render context that has any OA state.
ccdeed49 2314 * Note: the first flex register passed must always be R_PWR_CLK_STATE
19f81df2 2315 */
ccdeed49
UNR
2316static int oa_configure_all_contexts(struct i915_perf_stream *stream,
2317 struct flex *regs,
2318 size_t num_regs)
19f81df2 2319{
8f8b1171 2320 struct drm_i915_private *i915 = stream->perf->i915;
a9877da2 2321 struct intel_engine_cs *engine;
a4e7ccda 2322 struct i915_gem_context *ctx, *cn;
ccdeed49 2323 int err;
a9877da2 2324
a4c969d1 2325 lockdep_assert_held(&stream->perf->lock);
19f81df2 2326
19f81df2
RB
2327 /*
2328 * The OA register config is setup through the context image. This image
2329 * might be written to by the GPU on context switch (in particular on
2330 * lite-restore). This means we can't safely update a context's image,
2331 * if this context is scheduled/submitted to run on the GPU.
2332 *
2333 * We could emit the OA register config through the batch buffer but
2334 * this might leave small interval of time where the OA unit is
2335 * configured at an invalid sampling period.
2336 *
a9877da2
CW
2337 * Note that since we emit all requests from a single ring, there
2338 * is still an implicit global barrier here that may cause a high
2339 * priority context to wait for an otherwise independent low priority
2340 * context. Contexts idle at the time of reconfiguration are not
2341 * trapped behind the barrier.
19f81df2 2342 */
a4e7ccda
CW
2343 spin_lock(&i915->gem.contexts.lock);
2344 list_for_each_entry_safe(ctx, cn, &i915->gem.contexts.list, link) {
a4e7ccda
CW
2345 if (!kref_get_unless_zero(&ctx->ref))
2346 continue;
2347
2348 spin_unlock(&i915->gem.contexts.lock);
2349
ccdeed49 2350 err = gen8_configure_context(ctx, regs, num_regs);
a4e7ccda
CW
2351 if (err) {
2352 i915_gem_context_put(ctx);
a9877da2 2353 return err;
a4e7ccda
CW
2354 }
2355
2356 spin_lock(&i915->gem.contexts.lock);
2357 list_safe_reset_next(ctx, cn, link);
2358 i915_gem_context_put(ctx);
19f81df2 2359 }
a4e7ccda 2360 spin_unlock(&i915->gem.contexts.lock);
19f81df2 2361
722f3de3 2362 /*
a9877da2
CW
2363 * After updating all other contexts, we need to modify ourselves.
2364 * If we don't modify the kernel_context, we do not get events while
2365 * idle.
722f3de3 2366 */
750e76b4 2367 for_each_uabi_engine(engine, i915) {
a9877da2 2368 struct intel_context *ce = engine->kernel_context;
722f3de3 2369
a9877da2
CW
2370 if (engine->class != RENDER_CLASS)
2371 continue;
2372
2373 regs[0].value = intel_sseu_make_rpcs(i915, &ce->sseu);
2374
ccdeed49 2375 err = gen8_modify_self(ce, regs, num_regs);
a9877da2
CW
2376 if (err)
2377 return err;
2378 }
722f3de3
TU
2379
2380 return 0;
19f81df2
RB
2381}
2382
ccdeed49
UNR
2383static int gen12_configure_all_contexts(struct i915_perf_stream *stream,
2384 const struct i915_oa_config *oa_config)
2385{
2386 struct flex regs[] = {
2387 {
2388 GEN8_R_PWR_CLK_STATE,
2389 CTX_R_PWR_CLK_STATE,
2390 },
2391 };
2392
2393 return oa_configure_all_contexts(stream, regs, ARRAY_SIZE(regs));
2394}
2395
2396static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
2397 const struct i915_oa_config *oa_config)
2398{
2399 /* The MMIO offsets for Flex EU registers aren't contiguous */
2400 const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
2401#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1)
2402 struct flex regs[] = {
2403 {
2404 GEN8_R_PWR_CLK_STATE,
2405 CTX_R_PWR_CLK_STATE,
2406 },
2407 {
2408 GEN8_OACTXCONTROL,
2409 stream->perf->ctx_oactxctrl_offset + 1,
2410 },
2411 { EU_PERF_CNTL0, ctx_flexeuN(0) },
2412 { EU_PERF_CNTL1, ctx_flexeuN(1) },
2413 { EU_PERF_CNTL2, ctx_flexeuN(2) },
2414 { EU_PERF_CNTL3, ctx_flexeuN(3) },
2415 { EU_PERF_CNTL4, ctx_flexeuN(4) },
2416 { EU_PERF_CNTL5, ctx_flexeuN(5) },
2417 { EU_PERF_CNTL6, ctx_flexeuN(6) },
2418 };
2419#undef ctx_flexeuN
2420 int i;
2421
2422 regs[1].value =
2423 (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
2424 (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
2425 GEN8_OA_COUNTER_RESUME;
2426
2427 for (i = 2; i < ARRAY_SIZE(regs); i++)
2428 regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg);
2429
2430 return oa_configure_all_contexts(stream, regs, ARRAY_SIZE(regs));
2431}
2432
4b4e973d
CW
2433static struct i915_request *
2434gen8_enable_metric_set(struct i915_perf_stream *stream)
19f81df2 2435{
52111c46 2436 struct intel_uncore *uncore = stream->uncore;
8814c6d0 2437 struct i915_oa_config *oa_config = stream->oa_config;
701f8231 2438 int ret;
19f81df2
RB
2439
2440 /*
2441 * We disable slice/unslice clock ratio change reports on SKL since
2442 * they are too noisy. The HW generates a lot of redundant reports
2443 * where the ratio hasn't really changed causing a lot of redundant
2444 * work to processes and increasing the chances we'll hit buffer
2445 * overruns.
2446 *
2447 * Although we don't currently use the 'disable overrun' OABUFFER
2448 * feature it's worth noting that clock ratio reports have to be
2449 * disabled before considering to use that feature since the HW doesn't
2450 * correctly block these reports.
2451 *
2452 * Currently none of the high-level metrics we have depend on knowing
2453 * this ratio to normalize.
2454 *
2455 * Note: This register is not power context saved and restored, but
2456 * that's OK considering that we disable RC6 while the OA unit is
2457 * enabled.
2458 *
2459 * The _INCLUDE_CLK_RATIO bit allows the slice/unslice frequency to
2460 * be read back from automatically triggered reports, as part of the
2461 * RPT_ID field.
2462 */
8f8b1171
CW
2463 if (IS_GEN_RANGE(stream->perf->i915, 9, 11)) {
2464 intel_uncore_write(uncore, GEN8_OA_DEBUG,
2465 _MASKED_BIT_ENABLE(GEN9_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS |
2466 GEN9_OA_DEBUG_INCLUDE_CLK_RATIO));
19f81df2
RB
2467 }
2468
2469 /*
2470 * Update all contexts prior writing the mux configurations as we need
2471 * to make sure all slices/subslices are ON before writing to NOA
2472 * registers.
2473 */
00a7f0d7
LL
2474 ret = lrc_configure_all_contexts(stream, oa_config);
2475 if (ret)
4b4e973d 2476 return ERR_PTR(ret);
00a7f0d7
LL
2477
2478 return emit_oa_config(stream, oa_config, oa_context(stream));
2479}
2480
9278bbb6
CW
2481static u32 oag_report_ctx_switches(const struct i915_perf_stream *stream)
2482{
2483 return _MASKED_FIELD(GEN12_OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS,
2484 (stream->sample_flags & SAMPLE_OA_REPORT) ?
2485 0 : GEN12_OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS);
2486}
2487
4b4e973d
CW
2488static struct i915_request *
2489gen12_enable_metric_set(struct i915_perf_stream *stream)
00a7f0d7
LL
2490{
2491 struct intel_uncore *uncore = stream->uncore;
2492 struct i915_oa_config *oa_config = stream->oa_config;
2493 bool periodic = stream->periodic;
2494 u32 period_exponent = stream->period_exponent;
2495 int ret;
2496
2497 intel_uncore_write(uncore, GEN12_OAG_OA_DEBUG,
2498 /* Disable clk ratio reports, like previous Gens. */
2499 _MASKED_BIT_ENABLE(GEN12_OAG_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS |
2500 GEN12_OAG_OA_DEBUG_INCLUDE_CLK_RATIO) |
2501 /*
9278bbb6
CW
2502 * If the user didn't require OA reports, instruct
2503 * the hardware not to emit ctx switch reports.
00a7f0d7 2504 */
9278bbb6 2505 oag_report_ctx_switches(stream));
00a7f0d7
LL
2506
2507 intel_uncore_write(uncore, GEN12_OAG_OAGLBCTXCTRL, periodic ?
2508 (GEN12_OAG_OAGLBCTXCTRL_COUNTER_RESUME |
2509 GEN12_OAG_OAGLBCTXCTRL_TIMER_ENABLE |
2510 (period_exponent << GEN12_OAG_OAGLBCTXCTRL_TIMER_PERIOD_SHIFT))
2511 : 0);
2512
2513 /*
2514 * Update all contexts prior writing the mux configurations as we need
2515 * to make sure all slices/subslices are ON before writing to NOA
2516 * registers.
2517 */
ccdeed49 2518 ret = gen12_configure_all_contexts(stream, oa_config);
19f81df2 2519 if (ret)
4b4e973d 2520 return ERR_PTR(ret);
19f81df2 2521
00a7f0d7
LL
2522 /*
2523 * For Gen12, performance counters are context
2524 * saved/restored. Only enable it for the context that
2525 * requested this.
2526 */
2527 if (stream->ctx) {
ccdeed49 2528 ret = gen12_configure_oar_context(stream, true);
00a7f0d7 2529 if (ret)
4b4e973d 2530 return ERR_PTR(ret);
00a7f0d7
LL
2531 }
2532
8814c6d0 2533 return emit_oa_config(stream, oa_config, oa_context(stream));
19f81df2
RB
2534}
2535
a37f08a8 2536static void gen8_disable_metric_set(struct i915_perf_stream *stream)
19f81df2 2537{
52111c46 2538 struct intel_uncore *uncore = stream->uncore;
a37f08a8 2539
19f81df2 2540 /* Reset all contexts' slices/subslices configurations. */
00a7f0d7 2541 lrc_configure_all_contexts(stream, NULL);
28964cf2 2542
8f8b1171 2543 intel_uncore_rmw(uncore, GDT_CHICKEN_BITS, GT_NOA_ENABLE, 0);
19f81df2
RB
2544}
2545
a37f08a8 2546static void gen10_disable_metric_set(struct i915_perf_stream *stream)
95690a02 2547{
52111c46 2548 struct intel_uncore *uncore = stream->uncore;
a37f08a8 2549
95690a02 2550 /* Reset all contexts' slices/subslices configurations. */
00a7f0d7
LL
2551 lrc_configure_all_contexts(stream, NULL);
2552
2553 /* Make sure we disable noa to save power. */
2554 intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0);
2555}
2556
2557static void gen12_disable_metric_set(struct i915_perf_stream *stream)
2558{
2559 struct intel_uncore *uncore = stream->uncore;
2560
2561 /* Reset all contexts' slices/subslices configurations. */
ccdeed49 2562 gen12_configure_all_contexts(stream, NULL);
00a7f0d7
LL
2563
2564 /* disable the context save/restore or OAR counters */
2565 if (stream->ctx)
ccdeed49 2566 gen12_configure_oar_context(stream, false);
95690a02
LL
2567
2568 /* Make sure we disable noa to save power. */
8f8b1171 2569 intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0);
95690a02
LL
2570}
2571
5728de2f 2572static void gen7_oa_enable(struct i915_perf_stream *stream)
d7965152 2573{
52111c46 2574 struct intel_uncore *uncore = stream->uncore;
5728de2f 2575 struct i915_gem_context *ctx = stream->ctx;
a37f08a8
UNR
2576 u32 ctx_id = stream->specific_ctx_id;
2577 bool periodic = stream->periodic;
2578 u32 period_exponent = stream->period_exponent;
2579 u32 report_format = stream->oa_buffer.format;
11051303 2580
1bef3409
RB
2581 /*
2582 * Reset buf pointers so we don't forward reports from before now.
2583 *
2584 * Think carefully if considering trying to avoid this, since it
2585 * also ensures status flags and the buffer itself are cleared
2586 * in error paths, and we have checks for invalid reports based
2587 * on the assumption that certain fields are written to zeroed
2588 * memory which this helps maintains.
2589 */
a37f08a8 2590 gen7_init_oa_buffer(stream);
d7965152 2591
8f8b1171
CW
2592 intel_uncore_write(uncore, GEN7_OACONTROL,
2593 (ctx_id & GEN7_OACONTROL_CTX_MASK) |
2594 (period_exponent <<
2595 GEN7_OACONTROL_TIMER_PERIOD_SHIFT) |
2596 (periodic ? GEN7_OACONTROL_TIMER_ENABLE : 0) |
2597 (report_format << GEN7_OACONTROL_FORMAT_SHIFT) |
2598 (ctx ? GEN7_OACONTROL_PER_CTX_ENABLE : 0) |
2599 GEN7_OACONTROL_ENABLE);
d7965152
RB
2600}
2601
5728de2f 2602static void gen8_oa_enable(struct i915_perf_stream *stream)
19f81df2 2603{
52111c46 2604 struct intel_uncore *uncore = stream->uncore;
a37f08a8 2605 u32 report_format = stream->oa_buffer.format;
19f81df2
RB
2606
2607 /*
2608 * Reset buf pointers so we don't forward reports from before now.
2609 *
2610 * Think carefully if considering trying to avoid this, since it
2611 * also ensures status flags and the buffer itself are cleared
2612 * in error paths, and we have checks for invalid reports based
2613 * on the assumption that certain fields are written to zeroed
2614 * memory which this helps maintains.
2615 */
a37f08a8 2616 gen8_init_oa_buffer(stream);
19f81df2
RB
2617
2618 /*
2619 * Note: we don't rely on the hardware to perform single context
2620 * filtering and instead filter on the cpu based on the context-id
2621 * field of reports
2622 */
8f8b1171
CW
2623 intel_uncore_write(uncore, GEN8_OACONTROL,
2624 (report_format << GEN8_OA_REPORT_FORMAT_SHIFT) |
2625 GEN8_OA_COUNTER_ENABLE);
19f81df2
RB
2626}
2627
00a7f0d7
LL
2628static void gen12_oa_enable(struct i915_perf_stream *stream)
2629{
2630 struct intel_uncore *uncore = stream->uncore;
2631 u32 report_format = stream->oa_buffer.format;
2632
2633 /*
2634 * If we don't want OA reports from the OA buffer, then we don't even
2635 * need to program the OAG unit.
2636 */
2637 if (!(stream->sample_flags & SAMPLE_OA_REPORT))
2638 return;
2639
2640 gen12_init_oa_buffer(stream);
2641
2642 intel_uncore_write(uncore, GEN12_OAG_OACONTROL,
2643 (report_format << GEN12_OAG_OACONTROL_OA_COUNTER_FORMAT_SHIFT) |
2644 GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE);
2645}
2646
16d98b31
RB
2647/**
2648 * i915_oa_stream_enable - handle `I915_PERF_IOCTL_ENABLE` for OA stream
2649 * @stream: An i915 perf stream opened for OA metrics
2650 *
2651 * [Re]enables hardware periodic sampling according to the period configured
2652 * when opening the stream. This also starts a hrtimer that will periodically
2653 * check for data in the circular OA buffer for notifying userspace (e.g.
2654 * during a read() or poll()).
2655 */
d7965152
RB
2656static void i915_oa_stream_enable(struct i915_perf_stream *stream)
2657{
8f8b1171 2658 stream->perf->ops.oa_enable(stream);
d7965152 2659
a37f08a8
UNR
2660 if (stream->periodic)
2661 hrtimer_start(&stream->poll_check_timer,
d7965152
RB
2662 ns_to_ktime(POLL_PERIOD),
2663 HRTIMER_MODE_REL_PINNED);
2664}
2665
5728de2f 2666static void gen7_oa_disable(struct i915_perf_stream *stream)
d7965152 2667{
52111c46 2668 struct intel_uncore *uncore = stream->uncore;
5728de2f 2669
97a04e0d
DCS
2670 intel_uncore_write(uncore, GEN7_OACONTROL, 0);
2671 if (intel_wait_for_register(uncore,
e896d29a
CW
2672 GEN7_OACONTROL, GEN7_OACONTROL_ENABLE, 0,
2673 50))
0bf85735
WK
2674 drm_err(&stream->perf->i915->drm,
2675 "wait for OA to be disabled timed out\n");
d7965152
RB
2676}
2677
5728de2f 2678static void gen8_oa_disable(struct i915_perf_stream *stream)
19f81df2 2679{
52111c46 2680 struct intel_uncore *uncore = stream->uncore;
5728de2f 2681
97a04e0d
DCS
2682 intel_uncore_write(uncore, GEN8_OACONTROL, 0);
2683 if (intel_wait_for_register(uncore,
e896d29a
CW
2684 GEN8_OACONTROL, GEN8_OA_COUNTER_ENABLE, 0,
2685 50))
0bf85735
WK
2686 drm_err(&stream->perf->i915->drm,
2687 "wait for OA to be disabled timed out\n");
19f81df2
RB
2688}
2689
00a7f0d7
LL
2690static void gen12_oa_disable(struct i915_perf_stream *stream)
2691{
2692 struct intel_uncore *uncore = stream->uncore;
2693
2694 intel_uncore_write(uncore, GEN12_OAG_OACONTROL, 0);
2695 if (intel_wait_for_register(uncore,
2696 GEN12_OAG_OACONTROL,
2697 GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE, 0,
2698 50))
0bf85735
WK
2699 drm_err(&stream->perf->i915->drm,
2700 "wait for OA to be disabled timed out\n");
c06aa1b4
UNR
2701
2702 intel_uncore_write(uncore, GEN12_OA_TLB_INV_CR, 1);
2703 if (intel_wait_for_register(uncore,
2704 GEN12_OA_TLB_INV_CR,
2705 1, 0,
2706 50))
2707 drm_err(&stream->perf->i915->drm,
2708 "wait for OA tlb invalidate timed out\n");
00a7f0d7
LL
2709}
2710
16d98b31
RB
2711/**
2712 * i915_oa_stream_disable - handle `I915_PERF_IOCTL_DISABLE` for OA stream
2713 * @stream: An i915 perf stream opened for OA metrics
2714 *
2715 * Stops the OA unit from periodically writing counter reports into the
2716 * circular OA buffer. This also stops the hrtimer that periodically checks for
2717 * data in the circular OA buffer, for notifying userspace.
2718 */
d7965152
RB
2719static void i915_oa_stream_disable(struct i915_perf_stream *stream)
2720{
8f8b1171 2721 stream->perf->ops.oa_disable(stream);
d7965152 2722
a37f08a8
UNR
2723 if (stream->periodic)
2724 hrtimer_cancel(&stream->poll_check_timer);
d7965152
RB
2725}
2726
d7965152
RB
2727static const struct i915_perf_stream_ops i915_oa_stream_ops = {
2728 .destroy = i915_oa_stream_destroy,
2729 .enable = i915_oa_stream_enable,
2730 .disable = i915_oa_stream_disable,
2731 .wait_unlocked = i915_oa_wait_unlocked,
2732 .poll_wait = i915_oa_poll_wait,
2733 .read = i915_oa_read,
eec688e1
RB
2734};
2735
4b4e973d
CW
2736static int i915_perf_stream_enable_sync(struct i915_perf_stream *stream)
2737{
2738 struct i915_request *rq;
2739
2740 rq = stream->perf->ops.enable_metric_set(stream);
2741 if (IS_ERR(rq))
2742 return PTR_ERR(rq);
2743
2744 i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
2745 i915_request_put(rq);
2746
2747 return 0;
2748}
2749
16d98b31
RB
2750/**
2751 * i915_oa_stream_init - validate combined props for OA stream and init
2752 * @stream: An i915 perf stream
2753 * @param: The open parameters passed to `DRM_I915_PERF_OPEN`
2754 * @props: The property state that configures stream (individually validated)
2755 *
2756 * While read_properties_unlocked() validates properties in isolation it
2757 * doesn't ensure that the combination necessarily makes sense.
2758 *
2759 * At this point it has been determined that userspace wants a stream of
2760 * OA metrics, but still we need to further validate the combined
2761 * properties are OK.
2762 *
2763 * If the configuration makes sense then we can allocate memory for
2764 * a circular OA buffer and apply the requested metric set configuration.
2765 *
2766 * Returns: zero on success or a negative error code.
2767 */
d7965152
RB
2768static int i915_oa_stream_init(struct i915_perf_stream *stream,
2769 struct drm_i915_perf_open_param *param,
2770 struct perf_open_properties *props)
2771{
a9f236d1 2772 struct drm_i915_private *i915 = stream->perf->i915;
8f8b1171 2773 struct i915_perf *perf = stream->perf;
d7965152
RB
2774 int format_size;
2775 int ret;
2776
9a61363a
LL
2777 if (!props->engine) {
2778 DRM_DEBUG("OA engine not specified\n");
2779 return -EINVAL;
2780 }
2781
2782 /*
2783 * If the sysfs metrics/ directory wasn't registered for some
442b8c06
RB
2784 * reason then don't let userspace try their luck with config
2785 * IDs
2786 */
8f8b1171 2787 if (!perf->metrics_kobj) {
7708550c 2788 DRM_DEBUG("OA metrics weren't advertised via sysfs\n");
442b8c06
RB
2789 return -EINVAL;
2790 }
2791
322d56aa
UNR
2792 if (!(props->sample_flags & SAMPLE_OA_REPORT) &&
2793 (INTEL_GEN(perf->i915) < 12 || !stream->ctx)) {
7708550c 2794 DRM_DEBUG("Only OA report sampling supported\n");
d7965152
RB
2795 return -EINVAL;
2796 }
2797
8f8b1171 2798 if (!perf->ops.enable_metric_set) {
7708550c 2799 DRM_DEBUG("OA unit not supported\n");
d7965152
RB
2800 return -ENODEV;
2801 }
2802
9a61363a
LL
2803 /*
2804 * To avoid the complexity of having to accurately filter
d7965152
RB
2805 * counter reports and marshal to the appropriate client
2806 * we currently only allow exclusive access
2807 */
8f8b1171 2808 if (perf->exclusive_stream) {
7708550c 2809 DRM_DEBUG("OA unit already in use\n");
d7965152
RB
2810 return -EBUSY;
2811 }
2812
d7965152 2813 if (!props->oa_format) {
7708550c 2814 DRM_DEBUG("OA report format not specified\n");
d7965152
RB
2815 return -EINVAL;
2816 }
2817
9a61363a 2818 stream->engine = props->engine;
52111c46 2819 stream->uncore = stream->engine->gt->uncore;
9a61363a 2820
d7965152
RB
2821 stream->sample_size = sizeof(struct drm_i915_perf_record_header);
2822
8f8b1171 2823 format_size = perf->oa_formats[props->oa_format].size;
d7965152 2824
322d56aa 2825 stream->sample_flags = props->sample_flags;
d7965152
RB
2826 stream->sample_size += format_size;
2827
a37f08a8 2828 stream->oa_buffer.format_size = format_size;
a9f236d1 2829 if (drm_WARN_ON(&i915->drm, stream->oa_buffer.format_size == 0))
d7965152
RB
2830 return -EINVAL;
2831
9cd20ef7
LL
2832 stream->hold_preemption = props->hold_preemption;
2833
a37f08a8 2834 stream->oa_buffer.format =
8f8b1171 2835 perf->oa_formats[props->oa_format].format;
d7965152 2836
a37f08a8
UNR
2837 stream->periodic = props->oa_periodic;
2838 if (stream->periodic)
2839 stream->period_exponent = props->oa_period_exponent;
d7965152 2840
d7965152
RB
2841 if (stream->ctx) {
2842 ret = oa_get_render_ctx_id(stream);
9bd9be66
LL
2843 if (ret) {
2844 DRM_DEBUG("Invalid context id to filter with\n");
d7965152 2845 return ret;
9bd9be66 2846 }
d7965152
RB
2847 }
2848
daed3e44
LL
2849 ret = alloc_noa_wait(stream);
2850 if (ret) {
2851 DRM_DEBUG("Unable to allocate NOA wait batch buffer\n");
2852 goto err_noa_wait_alloc;
2853 }
2854
6a45008a
LL
2855 stream->oa_config = i915_perf_get_oa_config(perf, props->metrics_set);
2856 if (!stream->oa_config) {
9bd9be66 2857 DRM_DEBUG("Invalid OA config id=%i\n", props->metrics_set);
6a45008a 2858 ret = -EINVAL;
f89823c2 2859 goto err_config;
9bd9be66 2860 }
701f8231 2861
d7965152
RB
2862 /* PRM - observability performance counters:
2863 *
2864 * OACONTROL, performance counter enable, note:
2865 *
2866 * "When this bit is set, in order to have coherent counts,
2867 * RC6 power state and trunk clock gating must be disabled.
2868 * This can be achieved by programming MMIO registers as
2869 * 0xA094=0 and 0xA090[31]=1"
2870 *
2871 * In our case we are expecting that taking pm + FORCEWAKE
2872 * references will effectively disable RC6.
2873 */
a5efcde6 2874 intel_engine_pm_get(stream->engine);
52111c46 2875 intel_uncore_forcewake_get(stream->uncore, FORCEWAKE_ALL);
d7965152 2876
a37f08a8 2877 ret = alloc_oa_buffer(stream);
987f8c44 2878 if (ret)
2879 goto err_oa_buf_alloc;
2880
ec431eae 2881 stream->ops = &i915_oa_stream_ops;
a5af081d 2882 WRITE_ONCE(perf->exclusive_stream, stream);
ec431eae 2883
4b4e973d 2884 ret = i915_perf_stream_enable_sync(stream);
9bd9be66
LL
2885 if (ret) {
2886 DRM_DEBUG("Unable to enable metric set\n");
d7965152 2887 goto err_enable;
9bd9be66 2888 }
d7965152 2889
6a45008a
LL
2890 DRM_DEBUG("opening stream oa config uuid=%s\n",
2891 stream->oa_config->uuid);
2892
a37f08a8
UNR
2893 hrtimer_init(&stream->poll_check_timer,
2894 CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2895 stream->poll_check_timer.function = oa_poll_check_timer_cb;
2896 init_waitqueue_head(&stream->poll_wq);
2897 spin_lock_init(&stream->oa_buffer.ptr_lock);
2898
d7965152
RB
2899 return 0;
2900
41d3fdcd 2901err_enable:
a5af081d 2902 WRITE_ONCE(perf->exclusive_stream, NULL);
8f8b1171 2903 perf->ops.disable_metric_set(stream);
701f8231 2904
a37f08a8 2905 free_oa_buffer(stream);
d7965152
RB
2906
2907err_oa_buf_alloc:
6a45008a 2908 free_oa_configs(stream);
f89823c2 2909
52111c46 2910 intel_uncore_forcewake_put(stream->uncore, FORCEWAKE_ALL);
a5efcde6 2911 intel_engine_pm_put(stream->engine);
f89823c2
LL
2912
2913err_config:
daed3e44
LL
2914 free_noa_wait(stream);
2915
2916err_noa_wait_alloc:
d7965152
RB
2917 if (stream->ctx)
2918 oa_put_render_ctx_id(stream);
2919
2920 return ret;
2921}
2922
7dc56af5
CW
2923void i915_oa_init_reg_state(const struct intel_context *ce,
2924 const struct intel_engine_cs *engine)
19f81df2 2925{
28b6cb08 2926 struct i915_perf_stream *stream;
19f81df2 2927
8a68d464 2928 if (engine->class != RENDER_CLASS)
19f81df2
RB
2929 return;
2930
a5af081d
CW
2931 /* perf.exclusive_stream serialised by lrc_configure_all_contexts() */
2932 stream = READ_ONCE(engine->i915->perf.exclusive_stream);
ccdeed49
UNR
2933 /*
2934 * For gen12, only CTX_R_PWR_CLK_STATE needs update, but the caller
2935 * is already doing that, so nothing to be done for gen12 here.
2936 */
2937 if (stream && INTEL_GEN(stream->perf->i915) < 12)
7dc56af5 2938 gen8_update_reg_state_unlocked(ce, stream);
19f81df2
RB
2939}
2940
16d98b31
RB
2941/**
2942 * i915_perf_read - handles read() FOP for i915 perf stream FDs
2943 * @file: An i915 perf stream file
2944 * @buf: destination buffer given by userspace
2945 * @count: the number of bytes userspace wants to read
2946 * @ppos: (inout) file seek position (unused)
2947 *
2948 * The entry point for handling a read() on a stream file descriptor from
2949 * userspace. Most of the work is left to the i915_perf_read_locked() and
2950 * &i915_perf_stream_ops->read but to save having stream implementations (of
2951 * which we might have multiple later) we handle blocking read here.
2952 *
2953 * We can also consistently treat trying to read from a disabled stream
2954 * as an IO error so implementations can assume the stream is enabled
2955 * while reading.
2956 *
2957 * Returns: The number of bytes copied or a negative error code on failure.
2958 */
eec688e1
RB
2959static ssize_t i915_perf_read(struct file *file,
2960 char __user *buf,
2961 size_t count,
2962 loff_t *ppos)
2963{
2964 struct i915_perf_stream *stream = file->private_data;
8f8b1171 2965 struct i915_perf *perf = stream->perf;
bcad588d
AD
2966 size_t offset = 0;
2967 int ret;
eec688e1 2968
d7965152
RB
2969 /* To ensure it's handled consistently we simply treat all reads of a
2970 * disabled stream as an error. In particular it might otherwise lead
2971 * to a deadlock for blocking file descriptors...
2972 */
2973 if (!stream->enabled)
2974 return -EIO;
2975
eec688e1 2976 if (!(file->f_flags & O_NONBLOCK)) {
d7965152
RB
2977 /* There's the small chance of false positives from
2978 * stream->ops->wait_unlocked.
2979 *
2980 * E.g. with single context filtering since we only wait until
2981 * oabuffer has >= 1 report we don't immediately know whether
2982 * any reports really belong to the current context
eec688e1
RB
2983 */
2984 do {
2985 ret = stream->ops->wait_unlocked(stream);
2986 if (ret)
2987 return ret;
2988
8f8b1171 2989 mutex_lock(&perf->lock);
bcad588d 2990 ret = stream->ops->read(stream, buf, count, &offset);
8f8b1171 2991 mutex_unlock(&perf->lock);
bcad588d 2992 } while (!offset && !ret);
eec688e1 2993 } else {
8f8b1171 2994 mutex_lock(&perf->lock);
bcad588d 2995 ret = stream->ops->read(stream, buf, count, &offset);
8f8b1171 2996 mutex_unlock(&perf->lock);
eec688e1
RB
2997 }
2998
a9a08845 2999 /* We allow the poll checking to sometimes report false positive EPOLLIN
26ebd9c7
RB
3000 * events where we might actually report EAGAIN on read() if there's
3001 * not really any data available. In this situation though we don't
a9a08845 3002 * want to enter a busy loop between poll() reporting a EPOLLIN event
26ebd9c7
RB
3003 * and read() returning -EAGAIN. Clearing the oa.pollin state here
3004 * effectively ensures we back off until the next hrtimer callback
a9a08845 3005 * before reporting another EPOLLIN event.
bcad588d
AD
3006 * The exception to this is if ops->read() returned -ENOSPC which means
3007 * that more OA data is available than could fit in the user provided
3008 * buffer. In this case we want the next poll() call to not block.
26ebd9c7 3009 */
bcad588d 3010 if (ret != -ENOSPC)
a37f08a8 3011 stream->pollin = false;
d7965152 3012
bcad588d
AD
3013 /* Possible values for ret are 0, -EFAULT, -ENOSPC, -EIO, ... */
3014 return offset ?: (ret ?: -EAGAIN);
eec688e1
RB
3015}
3016
d7965152
RB
3017static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer)
3018{
a37f08a8
UNR
3019 struct i915_perf_stream *stream =
3020 container_of(hrtimer, typeof(*stream), poll_check_timer);
d7965152 3021
a37f08a8
UNR
3022 if (oa_buffer_check_unlocked(stream)) {
3023 stream->pollin = true;
3024 wake_up(&stream->poll_wq);
d7965152
RB
3025 }
3026
3027 hrtimer_forward_now(hrtimer, ns_to_ktime(POLL_PERIOD));
3028
3029 return HRTIMER_RESTART;
3030}
3031
16d98b31
RB
3032/**
3033 * i915_perf_poll_locked - poll_wait() with a suitable wait queue for stream
16d98b31
RB
3034 * @stream: An i915 perf stream
3035 * @file: An i915 perf stream file
3036 * @wait: poll() state table
3037 *
3038 * For handling userspace polling on an i915 perf stream, this calls through to
3039 * &i915_perf_stream_ops->poll_wait to call poll_wait() with a wait queue that
3040 * will be woken for new stream data.
3041 *
8f8b1171 3042 * Note: The &perf->lock mutex has been taken to serialize
16d98b31
RB
3043 * with any non-file-operation driver hooks.
3044 *
3045 * Returns: any poll events that are ready without sleeping
3046 */
8f8b1171
CW
3047static __poll_t i915_perf_poll_locked(struct i915_perf_stream *stream,
3048 struct file *file,
3049 poll_table *wait)
eec688e1 3050{
afc9a42b 3051 __poll_t events = 0;
eec688e1
RB
3052
3053 stream->ops->poll_wait(stream, file, wait);
3054
d7965152
RB
3055 /* Note: we don't explicitly check whether there's something to read
3056 * here since this path may be very hot depending on what else
3057 * userspace is polling, or on the timeout in use. We rely solely on
3058 * the hrtimer/oa_poll_check_timer_cb to notify us when there are
3059 * samples to read.
3060 */
a37f08a8 3061 if (stream->pollin)
a9a08845 3062 events |= EPOLLIN;
eec688e1 3063
d7965152 3064 return events;
eec688e1
RB
3065}
3066
16d98b31
RB
3067/**
3068 * i915_perf_poll - call poll_wait() with a suitable wait queue for stream
3069 * @file: An i915 perf stream file
3070 * @wait: poll() state table
3071 *
3072 * For handling userspace polling on an i915 perf stream, this ensures
3073 * poll_wait() gets called with a wait queue that will be woken for new stream
3074 * data.
3075 *
3076 * Note: Implementation deferred to i915_perf_poll_locked()
3077 *
3078 * Returns: any poll events that are ready without sleeping
3079 */
afc9a42b 3080static __poll_t i915_perf_poll(struct file *file, poll_table *wait)
eec688e1
RB
3081{
3082 struct i915_perf_stream *stream = file->private_data;
8f8b1171 3083 struct i915_perf *perf = stream->perf;
afc9a42b 3084 __poll_t ret;
eec688e1 3085
8f8b1171
CW
3086 mutex_lock(&perf->lock);
3087 ret = i915_perf_poll_locked(stream, file, wait);
3088 mutex_unlock(&perf->lock);
eec688e1
RB
3089
3090 return ret;
3091}
3092
16d98b31
RB
3093/**
3094 * i915_perf_enable_locked - handle `I915_PERF_IOCTL_ENABLE` ioctl
3095 * @stream: A disabled i915 perf stream
3096 *
3097 * [Re]enables the associated capture of data for this stream.
3098 *
3099 * If a stream was previously enabled then there's currently no intention
3100 * to provide userspace any guarantee about the preservation of previously
3101 * buffered data.
3102 */
eec688e1
RB
3103static void i915_perf_enable_locked(struct i915_perf_stream *stream)
3104{
3105 if (stream->enabled)
3106 return;
3107
3108 /* Allow stream->ops->enable() to refer to this */
3109 stream->enabled = true;
3110
3111 if (stream->ops->enable)
3112 stream->ops->enable(stream);
9cd20ef7
LL
3113
3114 if (stream->hold_preemption)
9f3ccd40 3115 intel_context_set_nopreempt(stream->pinned_ctx);
eec688e1
RB
3116}
3117
16d98b31
RB
3118/**
3119 * i915_perf_disable_locked - handle `I915_PERF_IOCTL_DISABLE` ioctl
3120 * @stream: An enabled i915 perf stream
3121 *
3122 * Disables the associated capture of data for this stream.
3123 *
3124 * The intention is that disabling an re-enabling a stream will ideally be
3125 * cheaper than destroying and re-opening a stream with the same configuration,
3126 * though there are no formal guarantees about what state or buffered data
3127 * must be retained between disabling and re-enabling a stream.
3128 *
3129 * Note: while a stream is disabled it's considered an error for userspace
3130 * to attempt to read from the stream (-EIO).
3131 */
eec688e1
RB
3132static void i915_perf_disable_locked(struct i915_perf_stream *stream)
3133{
3134 if (!stream->enabled)
3135 return;
3136
3137 /* Allow stream->ops->disable() to refer to this */
3138 stream->enabled = false;
3139
9cd20ef7 3140 if (stream->hold_preemption)
9f3ccd40 3141 intel_context_clear_nopreempt(stream->pinned_ctx);
9cd20ef7 3142
eec688e1
RB
3143 if (stream->ops->disable)
3144 stream->ops->disable(stream);
3145}
3146
7831e9a9
CW
3147static long i915_perf_config_locked(struct i915_perf_stream *stream,
3148 unsigned long metrics_set)
3149{
3150 struct i915_oa_config *config;
3151 long ret = stream->oa_config->id;
3152
3153 config = i915_perf_get_oa_config(stream->perf, metrics_set);
3154 if (!config)
3155 return -EINVAL;
3156
3157 if (config != stream->oa_config) {
4b4e973d 3158 struct i915_request *rq;
7831e9a9
CW
3159
3160 /*
3161 * If OA is bound to a specific context, emit the
3162 * reconfiguration inline from that context. The update
3163 * will then be ordered with respect to submission on that
3164 * context.
3165 *
3166 * When set globally, we use a low priority kernel context,
3167 * so it will effectively take effect when idle.
3168 */
4b4e973d
CW
3169 rq = emit_oa_config(stream, config, oa_context(stream));
3170 if (!IS_ERR(rq)) {
7831e9a9 3171 config = xchg(&stream->oa_config, config);
4b4e973d
CW
3172 i915_request_put(rq);
3173 } else {
3174 ret = PTR_ERR(rq);
3175 }
7831e9a9
CW
3176 }
3177
3178 i915_oa_config_put(config);
3179
3180 return ret;
3181}
3182
16d98b31
RB
3183/**
3184 * i915_perf_ioctl - support ioctl() usage with i915 perf stream FDs
3185 * @stream: An i915 perf stream
3186 * @cmd: the ioctl request
3187 * @arg: the ioctl data
3188 *
8f8b1171 3189 * Note: The &perf->lock mutex has been taken to serialize
16d98b31
RB
3190 * with any non-file-operation driver hooks.
3191 *
3192 * Returns: zero on success or a negative error code. Returns -EINVAL for
3193 * an unknown ioctl request.
3194 */
eec688e1
RB
3195static long i915_perf_ioctl_locked(struct i915_perf_stream *stream,
3196 unsigned int cmd,
3197 unsigned long arg)
3198{
3199 switch (cmd) {
3200 case I915_PERF_IOCTL_ENABLE:
3201 i915_perf_enable_locked(stream);
3202 return 0;
3203 case I915_PERF_IOCTL_DISABLE:
3204 i915_perf_disable_locked(stream);
3205 return 0;
7831e9a9
CW
3206 case I915_PERF_IOCTL_CONFIG:
3207 return i915_perf_config_locked(stream, arg);
eec688e1
RB
3208 }
3209
3210 return -EINVAL;
3211}
3212
16d98b31
RB
3213/**
3214 * i915_perf_ioctl - support ioctl() usage with i915 perf stream FDs
3215 * @file: An i915 perf stream file
3216 * @cmd: the ioctl request
3217 * @arg: the ioctl data
3218 *
3219 * Implementation deferred to i915_perf_ioctl_locked().
3220 *
3221 * Returns: zero on success or a negative error code. Returns -EINVAL for
3222 * an unknown ioctl request.
3223 */
eec688e1
RB
3224static long i915_perf_ioctl(struct file *file,
3225 unsigned int cmd,
3226 unsigned long arg)
3227{
3228 struct i915_perf_stream *stream = file->private_data;
8f8b1171 3229 struct i915_perf *perf = stream->perf;
eec688e1
RB
3230 long ret;
3231
8f8b1171 3232 mutex_lock(&perf->lock);
eec688e1 3233 ret = i915_perf_ioctl_locked(stream, cmd, arg);
8f8b1171 3234 mutex_unlock(&perf->lock);
eec688e1
RB
3235
3236 return ret;
3237}
3238
16d98b31
RB
3239/**
3240 * i915_perf_destroy_locked - destroy an i915 perf stream
3241 * @stream: An i915 perf stream
3242 *
3243 * Frees all resources associated with the given i915 perf @stream, disabling
3244 * any associated data capture in the process.
3245 *
8f8b1171 3246 * Note: The &perf->lock mutex has been taken to serialize
16d98b31
RB
3247 * with any non-file-operation driver hooks.
3248 */
eec688e1
RB
3249static void i915_perf_destroy_locked(struct i915_perf_stream *stream)
3250{
eec688e1
RB
3251 if (stream->enabled)
3252 i915_perf_disable_locked(stream);
3253
3254 if (stream->ops->destroy)
3255 stream->ops->destroy(stream);
3256
69df05e1 3257 if (stream->ctx)
5f09a9c8 3258 i915_gem_context_put(stream->ctx);
eec688e1
RB
3259
3260 kfree(stream);
3261}
3262
16d98b31
RB
3263/**
3264 * i915_perf_release - handles userspace close() of a stream file
3265 * @inode: anonymous inode associated with file
3266 * @file: An i915 perf stream file
3267 *
3268 * Cleans up any resources associated with an open i915 perf stream file.
3269 *
3270 * NB: close() can't really fail from the userspace point of view.
3271 *
3272 * Returns: zero on success or a negative error code.
3273 */
eec688e1
RB
3274static int i915_perf_release(struct inode *inode, struct file *file)
3275{
3276 struct i915_perf_stream *stream = file->private_data;
8f8b1171 3277 struct i915_perf *perf = stream->perf;
eec688e1 3278
8f8b1171 3279 mutex_lock(&perf->lock);
eec688e1 3280 i915_perf_destroy_locked(stream);
8f8b1171 3281 mutex_unlock(&perf->lock);
eec688e1 3282
a5af1df7 3283 /* Release the reference the perf stream kept on the driver. */
8f8b1171 3284 drm_dev_put(&perf->i915->drm);
a5af1df7 3285
eec688e1
RB
3286 return 0;
3287}
3288
3289
3290static const struct file_operations fops = {
3291 .owner = THIS_MODULE,
3292 .llseek = no_llseek,
3293 .release = i915_perf_release,
3294 .poll = i915_perf_poll,
3295 .read = i915_perf_read,
3296 .unlocked_ioctl = i915_perf_ioctl,
191f8960
LL
3297 /* Our ioctl have no arguments, so it's safe to use the same function
3298 * to handle 32bits compatibility.
3299 */
3300 .compat_ioctl = i915_perf_ioctl,
eec688e1
RB
3301};
3302
3303
16d98b31
RB
3304/**
3305 * i915_perf_open_ioctl_locked - DRM ioctl() for userspace to open a stream FD
8f8b1171 3306 * @perf: i915 perf instance
16d98b31
RB
3307 * @param: The open parameters passed to 'DRM_I915_PERF_OPEN`
3308 * @props: individually validated u64 property value pairs
3309 * @file: drm file
3310 *
3311 * See i915_perf_ioctl_open() for interface details.
3312 *
3313 * Implements further stream config validation and stream initialization on
8f8b1171 3314 * behalf of i915_perf_open_ioctl() with the &perf->lock mutex
16d98b31
RB
3315 * taken to serialize with any non-file-operation driver hooks.
3316 *
3317 * Note: at this point the @props have only been validated in isolation and
3318 * it's still necessary to validate that the combination of properties makes
3319 * sense.
3320 *
3321 * In the case where userspace is interested in OA unit metrics then further
3322 * config validation and stream initialization details will be handled by
3323 * i915_oa_stream_init(). The code here should only validate config state that
3324 * will be relevant to all stream types / backends.
3325 *
3326 * Returns: zero on success or a negative error code.
3327 */
eec688e1 3328static int
8f8b1171 3329i915_perf_open_ioctl_locked(struct i915_perf *perf,
eec688e1
RB
3330 struct drm_i915_perf_open_param *param,
3331 struct perf_open_properties *props,
3332 struct drm_file *file)
3333{
3334 struct i915_gem_context *specific_ctx = NULL;
3335 struct i915_perf_stream *stream = NULL;
3336 unsigned long f_flags = 0;
19f81df2 3337 bool privileged_op = true;
eec688e1
RB
3338 int stream_fd;
3339 int ret;
3340
3341 if (props->single_context) {
3342 u32 ctx_handle = props->ctx_handle;
3343 struct drm_i915_file_private *file_priv = file->driver_priv;
3344
635f56c3
ID
3345 specific_ctx = i915_gem_context_lookup(file_priv, ctx_handle);
3346 if (!specific_ctx) {
3347 DRM_DEBUG("Failed to look up context with ID %u for opening perf stream\n",
3348 ctx_handle);
3349 ret = -ENOENT;
eec688e1
RB
3350 goto err;
3351 }
3352 }
3353
19f81df2
RB
3354 /*
3355 * On Haswell the OA unit supports clock gating off for a specific
3356 * context and in this mode there's no visibility of metrics for the
3357 * rest of the system, which we consider acceptable for a
3358 * non-privileged client.
3359 *
00a7f0d7 3360 * For Gen8->11 the OA unit no longer supports clock gating off for a
19f81df2
RB
3361 * specific context and the kernel can't securely stop the counters
3362 * from updating as system-wide / global values. Even though we can
3363 * filter reports based on the included context ID we can't block
3364 * clients from seeing the raw / global counter values via
3365 * MI_REPORT_PERF_COUNT commands and so consider it a privileged op to
3366 * enable the OA unit by default.
00a7f0d7
LL
3367 *
3368 * For Gen12+ we gain a new OAR unit that only monitors the RCS on a
3369 * per context basis. So we can relax requirements there if the user
3370 * doesn't request global stream access (i.e. query based sampling
3371 * using MI_RECORD_PERF_COUNT.
19f81df2 3372 */
0b0120d4 3373 if (IS_HASWELL(perf->i915) && specific_ctx)
19f81df2 3374 privileged_op = false;
00a7f0d7
LL
3375 else if (IS_GEN(perf->i915, 12) && specific_ctx &&
3376 (props->sample_flags & SAMPLE_OA_REPORT) == 0)
3377 privileged_op = false;
19f81df2 3378
0b0120d4
LL
3379 if (props->hold_preemption) {
3380 if (!props->single_context) {
3381 DRM_DEBUG("preemption disable with no context\n");
3382 ret = -EINVAL;
3383 goto err;
3384 }
3385 privileged_op = true;
3386 }
3387
ccdf6341
RB
3388 /* Similar to perf's kernel.perf_paranoid_cpu sysctl option
3389 * we check a dev.i915.perf_stream_paranoid sysctl option
3390 * to determine if it's ok to access system wide OA counters
3391 * without CAP_SYS_ADMIN privileges.
3392 */
19f81df2 3393 if (privileged_op &&
ccdf6341 3394 i915_perf_stream_paranoid && !capable(CAP_SYS_ADMIN)) {
9cd20ef7 3395 DRM_DEBUG("Insufficient privileges to open i915 perf stream\n");
eec688e1
RB
3396 ret = -EACCES;
3397 goto err_ctx;
3398 }
3399
3400 stream = kzalloc(sizeof(*stream), GFP_KERNEL);
3401 if (!stream) {
3402 ret = -ENOMEM;
3403 goto err_ctx;
3404 }
3405
8f8b1171 3406 stream->perf = perf;
eec688e1
RB
3407 stream->ctx = specific_ctx;
3408
d7965152
RB
3409 ret = i915_oa_stream_init(stream, param, props);
3410 if (ret)
3411 goto err_alloc;
3412
3413 /* we avoid simply assigning stream->sample_flags = props->sample_flags
3414 * to have _stream_init check the combination of sample flags more
3415 * thoroughly, but still this is the expected result at this point.
eec688e1 3416 */
d7965152
RB
3417 if (WARN_ON(stream->sample_flags != props->sample_flags)) {
3418 ret = -ENODEV;
22f880ca 3419 goto err_flags;
d7965152 3420 }
eec688e1 3421
eec688e1
RB
3422 if (param->flags & I915_PERF_FLAG_FD_CLOEXEC)
3423 f_flags |= O_CLOEXEC;
3424 if (param->flags & I915_PERF_FLAG_FD_NONBLOCK)
3425 f_flags |= O_NONBLOCK;
3426
3427 stream_fd = anon_inode_getfd("[i915_perf]", &fops, stream, f_flags);
3428 if (stream_fd < 0) {
3429 ret = stream_fd;
23b9e41a 3430 goto err_flags;
eec688e1
RB
3431 }
3432
3433 if (!(param->flags & I915_PERF_FLAG_DISABLED))
3434 i915_perf_enable_locked(stream);
3435
a5af1df7
LL
3436 /* Take a reference on the driver that will be kept with stream_fd
3437 * until its release.
3438 */
8f8b1171 3439 drm_dev_get(&perf->i915->drm);
a5af1df7 3440
eec688e1
RB
3441 return stream_fd;
3442
22f880ca 3443err_flags:
eec688e1
RB
3444 if (stream->ops->destroy)
3445 stream->ops->destroy(stream);
3446err_alloc:
3447 kfree(stream);
3448err_ctx:
69df05e1 3449 if (specific_ctx)
5f09a9c8 3450 i915_gem_context_put(specific_ctx);
eec688e1
RB
3451err:
3452 return ret;
3453}
3454
8f8b1171 3455static u64 oa_exponent_to_ns(struct i915_perf *perf, int exponent)
155e941f 3456{
9f9b2792 3457 return div64_u64(1000000000ULL * (2ULL << exponent),
8f8b1171 3458 1000ULL * RUNTIME_INFO(perf->i915)->cs_timestamp_frequency_khz);
155e941f
RB
3459}
3460
16d98b31
RB
3461/**
3462 * read_properties_unlocked - validate + copy userspace stream open properties
8f8b1171 3463 * @perf: i915 perf instance
16d98b31
RB
3464 * @uprops: The array of u64 key value pairs given by userspace
3465 * @n_props: The number of key value pairs expected in @uprops
3466 * @props: The stream configuration built up while validating properties
eec688e1
RB
3467 *
3468 * Note this function only validates properties in isolation it doesn't
3469 * validate that the combination of properties makes sense or that all
3470 * properties necessary for a particular kind of stream have been set.
16d98b31
RB
3471 *
3472 * Note that there currently aren't any ordering requirements for properties so
3473 * we shouldn't validate or assume anything about ordering here. This doesn't
3474 * rule out defining new properties with ordering requirements in the future.
eec688e1 3475 */
8f8b1171 3476static int read_properties_unlocked(struct i915_perf *perf,
eec688e1
RB
3477 u64 __user *uprops,
3478 u32 n_props,
3479 struct perf_open_properties *props)
3480{
3481 u64 __user *uprop = uprops;
701f8231 3482 u32 i;
eec688e1
RB
3483
3484 memset(props, 0, sizeof(struct perf_open_properties));
3485
3486 if (!n_props) {
7708550c 3487 DRM_DEBUG("No i915 perf properties given\n");
eec688e1
RB
3488 return -EINVAL;
3489 }
3490
9a61363a
LL
3491 /* At the moment we only support using i915-perf on the RCS. */
3492 props->engine = intel_engine_lookup_user(perf->i915,
3493 I915_ENGINE_CLASS_RENDER,
3494 0);
3495 if (!props->engine) {
3496 DRM_DEBUG("No RENDER-capable engines\n");
3497 return -EINVAL;
3498 }
3499
eec688e1
RB
3500 /* Considering that ID = 0 is reserved and assuming that we don't
3501 * (currently) expect any configurations to ever specify duplicate
3502 * values for a particular property ID then the last _PROP_MAX value is
3503 * one greater than the maximum number of properties we expect to get
3504 * from userspace.
3505 */
3506 if (n_props >= DRM_I915_PERF_PROP_MAX) {
7708550c 3507 DRM_DEBUG("More i915 perf properties specified than exist\n");
eec688e1
RB
3508 return -EINVAL;
3509 }
3510
3511 for (i = 0; i < n_props; i++) {
00319ba0 3512 u64 oa_period, oa_freq_hz;
eec688e1
RB
3513 u64 id, value;
3514 int ret;
3515
3516 ret = get_user(id, uprop);
3517 if (ret)
3518 return ret;
3519
3520 ret = get_user(value, uprop + 1);
3521 if (ret)
3522 return ret;
3523
0a309f9e
MA
3524 if (id == 0 || id >= DRM_I915_PERF_PROP_MAX) {
3525 DRM_DEBUG("Unknown i915 perf property ID\n");
3526 return -EINVAL;
3527 }
3528
eec688e1
RB
3529 switch ((enum drm_i915_perf_property_id)id) {
3530 case DRM_I915_PERF_PROP_CTX_HANDLE:
3531 props->single_context = 1;
3532 props->ctx_handle = value;
3533 break;
d7965152 3534 case DRM_I915_PERF_PROP_SAMPLE_OA:
b6dd47b9
LL
3535 if (value)
3536 props->sample_flags |= SAMPLE_OA_REPORT;
d7965152
RB
3537 break;
3538 case DRM_I915_PERF_PROP_OA_METRICS_SET:
701f8231 3539 if (value == 0) {
7708550c 3540 DRM_DEBUG("Unknown OA metric set ID\n");
d7965152
RB
3541 return -EINVAL;
3542 }
3543 props->metrics_set = value;
3544 break;
3545 case DRM_I915_PERF_PROP_OA_FORMAT:
3546 if (value == 0 || value >= I915_OA_FORMAT_MAX) {
52c57c26
RB
3547 DRM_DEBUG("Out-of-range OA report format %llu\n",
3548 value);
d7965152
RB
3549 return -EINVAL;
3550 }
8f8b1171 3551 if (!perf->oa_formats[value].size) {
52c57c26
RB
3552 DRM_DEBUG("Unsupported OA report format %llu\n",
3553 value);
d7965152
RB
3554 return -EINVAL;
3555 }
3556 props->oa_format = value;
3557 break;
3558 case DRM_I915_PERF_PROP_OA_EXPONENT:
3559 if (value > OA_EXPONENT_MAX) {
7708550c
RB
3560 DRM_DEBUG("OA timer exponent too high (> %u)\n",
3561 OA_EXPONENT_MAX);
d7965152
RB
3562 return -EINVAL;
3563 }
3564
00319ba0 3565 /* Theoretically we can program the OA unit to sample
155e941f
RB
3566 * e.g. every 160ns for HSW, 167ns for BDW/SKL or 104ns
3567 * for BXT. We don't allow such high sampling
3568 * frequencies by default unless root.
00319ba0 3569 */
155e941f 3570
00319ba0 3571 BUILD_BUG_ON(sizeof(oa_period) != 8);
8f8b1171 3572 oa_period = oa_exponent_to_ns(perf, value);
00319ba0
RB
3573
3574 /* This check is primarily to ensure that oa_period <=
3575 * UINT32_MAX (before passing to do_div which only
3576 * accepts a u32 denominator), but we can also skip
3577 * checking anything < 1Hz which implicitly can't be
3578 * limited via an integer oa_max_sample_rate.
d7965152 3579 */
00319ba0
RB
3580 if (oa_period <= NSEC_PER_SEC) {
3581 u64 tmp = NSEC_PER_SEC;
3582 do_div(tmp, oa_period);
3583 oa_freq_hz = tmp;
3584 } else
3585 oa_freq_hz = 0;
3586
3587 if (oa_freq_hz > i915_oa_max_sample_rate &&
3588 !capable(CAP_SYS_ADMIN)) {
7708550c 3589 DRM_DEBUG("OA exponent would exceed the max sampling frequency (sysctl dev.i915.oa_max_sample_rate) %uHz without root privileges\n",
00319ba0 3590 i915_oa_max_sample_rate);
d7965152
RB
3591 return -EACCES;
3592 }
3593
3594 props->oa_periodic = true;
3595 props->oa_period_exponent = value;
3596 break;
9cd20ef7
LL
3597 case DRM_I915_PERF_PROP_HOLD_PREEMPTION:
3598 props->hold_preemption = !!value;
3599 break;
0a309f9e 3600 case DRM_I915_PERF_PROP_MAX:
eec688e1 3601 MISSING_CASE(id);
eec688e1
RB
3602 return -EINVAL;
3603 }
3604
3605 uprop += 2;
3606 }
3607
3608 return 0;
3609}
3610
16d98b31
RB
3611/**
3612 * i915_perf_open_ioctl - DRM ioctl() for userspace to open a stream FD
3613 * @dev: drm device
3614 * @data: ioctl data copied from userspace (unvalidated)
3615 * @file: drm file
3616 *
3617 * Validates the stream open parameters given by userspace including flags
3618 * and an array of u64 key, value pair properties.
3619 *
3620 * Very little is assumed up front about the nature of the stream being
3621 * opened (for instance we don't assume it's for periodic OA unit metrics). An
3622 * i915-perf stream is expected to be a suitable interface for other forms of
3623 * buffered data written by the GPU besides periodic OA metrics.
3624 *
3625 * Note we copy the properties from userspace outside of the i915 perf
3626 * mutex to avoid an awkward lockdep with mmap_sem.
3627 *
3628 * Most of the implementation details are handled by
8f8b1171 3629 * i915_perf_open_ioctl_locked() after taking the &perf->lock
16d98b31
RB
3630 * mutex for serializing with any non-file-operation driver hooks.
3631 *
3632 * Return: A newly opened i915 Perf stream file descriptor or negative
3633 * error code on failure.
3634 */
eec688e1
RB
3635int i915_perf_open_ioctl(struct drm_device *dev, void *data,
3636 struct drm_file *file)
3637{
8f8b1171 3638 struct i915_perf *perf = &to_i915(dev)->perf;
eec688e1
RB
3639 struct drm_i915_perf_open_param *param = data;
3640 struct perf_open_properties props;
3641 u32 known_open_flags;
3642 int ret;
3643
8f8b1171 3644 if (!perf->i915) {
7708550c 3645 DRM_DEBUG("i915 perf interface not available for this system\n");
eec688e1
RB
3646 return -ENOTSUPP;
3647 }
3648
3649 known_open_flags = I915_PERF_FLAG_FD_CLOEXEC |
3650 I915_PERF_FLAG_FD_NONBLOCK |
3651 I915_PERF_FLAG_DISABLED;
3652 if (param->flags & ~known_open_flags) {
7708550c 3653 DRM_DEBUG("Unknown drm_i915_perf_open_param flag\n");
eec688e1
RB
3654 return -EINVAL;
3655 }
3656
8f8b1171 3657 ret = read_properties_unlocked(perf,
eec688e1
RB
3658 u64_to_user_ptr(param->properties_ptr),
3659 param->num_properties,
3660 &props);
3661 if (ret)
3662 return ret;
3663
8f8b1171
CW
3664 mutex_lock(&perf->lock);
3665 ret = i915_perf_open_ioctl_locked(perf, param, &props, file);
3666 mutex_unlock(&perf->lock);
eec688e1
RB
3667
3668 return ret;
3669}
3670
16d98b31
RB
3671/**
3672 * i915_perf_register - exposes i915-perf to userspace
8f8b1171 3673 * @i915: i915 device instance
16d98b31
RB
3674 *
3675 * In particular OA metric sets are advertised under a sysfs metrics/
3676 * directory allowing userspace to enumerate valid IDs that can be
3677 * used to open an i915-perf stream.
3678 */
8f8b1171 3679void i915_perf_register(struct drm_i915_private *i915)
442b8c06 3680{
8f8b1171 3681 struct i915_perf *perf = &i915->perf;
701f8231
LL
3682 int ret;
3683
8f8b1171 3684 if (!perf->i915)
442b8c06
RB
3685 return;
3686
3687 /* To be sure we're synchronized with an attempted
3688 * i915_perf_open_ioctl(); considering that we register after
3689 * being exposed to userspace.
3690 */
8f8b1171 3691 mutex_lock(&perf->lock);
442b8c06 3692
8f8b1171 3693 perf->metrics_kobj =
442b8c06 3694 kobject_create_and_add("metrics",
8f8b1171
CW
3695 &i915->drm.primary->kdev->kobj);
3696 if (!perf->metrics_kobj)
442b8c06
RB
3697 goto exit;
3698
8f8b1171
CW
3699 sysfs_attr_init(&perf->test_config.sysfs_metric_id.attr);
3700
00a7f0d7
LL
3701 if (IS_TIGERLAKE(i915)) {
3702 i915_perf_load_test_config_tgl(i915);
3703 } else if (INTEL_GEN(i915) >= 11) {
8f8b1171
CW
3704 i915_perf_load_test_config_icl(i915);
3705 } else if (IS_CANNONLAKE(i915)) {
3706 i915_perf_load_test_config_cnl(i915);
3707 } else if (IS_COFFEELAKE(i915)) {
3708 if (IS_CFL_GT2(i915))
3709 i915_perf_load_test_config_cflgt2(i915);
3710 if (IS_CFL_GT3(i915))
3711 i915_perf_load_test_config_cflgt3(i915);
3712 } else if (IS_GEMINILAKE(i915)) {
3713 i915_perf_load_test_config_glk(i915);
3714 } else if (IS_KABYLAKE(i915)) {
3715 if (IS_KBL_GT2(i915))
3716 i915_perf_load_test_config_kblgt2(i915);
3717 else if (IS_KBL_GT3(i915))
3718 i915_perf_load_test_config_kblgt3(i915);
3719 } else if (IS_BROXTON(i915)) {
3720 i915_perf_load_test_config_bxt(i915);
3721 } else if (IS_SKYLAKE(i915)) {
3722 if (IS_SKL_GT2(i915))
3723 i915_perf_load_test_config_sklgt2(i915);
3724 else if (IS_SKL_GT3(i915))
3725 i915_perf_load_test_config_sklgt3(i915);
3726 else if (IS_SKL_GT4(i915))
3727 i915_perf_load_test_config_sklgt4(i915);
3728 } else if (IS_CHERRYVIEW(i915)) {
3729 i915_perf_load_test_config_chv(i915);
3730 } else if (IS_BROADWELL(i915)) {
3731 i915_perf_load_test_config_bdw(i915);
3732 } else if (IS_HASWELL(i915)) {
3733 i915_perf_load_test_config_hsw(i915);
3734 }
3735
3736 if (perf->test_config.id == 0)
701f8231
LL
3737 goto sysfs_error;
3738
8f8b1171
CW
3739 ret = sysfs_create_group(perf->metrics_kobj,
3740 &perf->test_config.sysfs_metric);
701f8231
LL
3741 if (ret)
3742 goto sysfs_error;
f89823c2 3743
6a45008a
LL
3744 perf->test_config.perf = perf;
3745 kref_init(&perf->test_config.ref);
f89823c2 3746
19f81df2
RB
3747 goto exit;
3748
3749sysfs_error:
8f8b1171
CW
3750 kobject_put(perf->metrics_kobj);
3751 perf->metrics_kobj = NULL;
19f81df2 3752
442b8c06 3753exit:
8f8b1171 3754 mutex_unlock(&perf->lock);
442b8c06
RB
3755}
3756
16d98b31
RB
3757/**
3758 * i915_perf_unregister - hide i915-perf from userspace
8f8b1171 3759 * @i915: i915 device instance
16d98b31
RB
3760 *
3761 * i915-perf state cleanup is split up into an 'unregister' and
3762 * 'deinit' phase where the interface is first hidden from
3763 * userspace by i915_perf_unregister() before cleaning up
3764 * remaining state in i915_perf_fini().
3765 */
8f8b1171 3766void i915_perf_unregister(struct drm_i915_private *i915)
442b8c06 3767{
8f8b1171
CW
3768 struct i915_perf *perf = &i915->perf;
3769
3770 if (!perf->metrics_kobj)
442b8c06
RB
3771 return;
3772
8f8b1171
CW
3773 sysfs_remove_group(perf->metrics_kobj,
3774 &perf->test_config.sysfs_metric);
442b8c06 3775
8f8b1171
CW
3776 kobject_put(perf->metrics_kobj);
3777 perf->metrics_kobj = NULL;
442b8c06
RB
3778}
3779
8f8b1171 3780static bool gen8_is_valid_flex_addr(struct i915_perf *perf, u32 addr)
f89823c2
LL
3781{
3782 static const i915_reg_t flex_eu_regs[] = {
3783 EU_PERF_CNTL0,
3784 EU_PERF_CNTL1,
3785 EU_PERF_CNTL2,
3786 EU_PERF_CNTL3,
3787 EU_PERF_CNTL4,
3788 EU_PERF_CNTL5,
3789 EU_PERF_CNTL6,
3790 };
3791 int i;
3792
3793 for (i = 0; i < ARRAY_SIZE(flex_eu_regs); i++) {
7c52a221 3794 if (i915_mmio_reg_offset(flex_eu_regs[i]) == addr)
f89823c2
LL
3795 return true;
3796 }
3797 return false;
3798}
3799
fc215230
UNR
3800#define ADDR_IN_RANGE(addr, start, end) \
3801 ((addr) >= (start) && \
3802 (addr) <= (end))
3803
3804#define REG_IN_RANGE(addr, start, end) \
3805 ((addr) >= i915_mmio_reg_offset(start) && \
3806 (addr) <= i915_mmio_reg_offset(end))
3807
3808#define REG_EQUAL(addr, mmio) \
3809 ((addr) == i915_mmio_reg_offset(mmio))
3810
8f8b1171 3811static bool gen7_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
f89823c2 3812{
fc215230
UNR
3813 return REG_IN_RANGE(addr, OASTARTTRIG1, OASTARTTRIG8) ||
3814 REG_IN_RANGE(addr, OAREPORTTRIG1, OAREPORTTRIG8) ||
3815 REG_IN_RANGE(addr, OACEC0_0, OACEC7_1);
f89823c2
LL
3816}
3817
8f8b1171 3818static bool gen7_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
f89823c2 3819{
fc215230
UNR
3820 return REG_EQUAL(addr, HALF_SLICE_CHICKEN2) ||
3821 REG_IN_RANGE(addr, MICRO_BP0_0, NOA_WRITE) ||
3822 REG_IN_RANGE(addr, OA_PERFCNT1_LO, OA_PERFCNT2_HI) ||
3823 REG_IN_RANGE(addr, OA_PERFMATRIX_LO, OA_PERFMATRIX_HI);
f89823c2
LL
3824}
3825
8f8b1171 3826static bool gen8_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
f89823c2 3827{
8f8b1171 3828 return gen7_is_valid_mux_addr(perf, addr) ||
fc215230
UNR
3829 REG_EQUAL(addr, WAIT_FOR_RC6_EXIT) ||
3830 REG_IN_RANGE(addr, RPM_CONFIG0, NOA_CONFIG(8));
f89823c2
LL
3831}
3832
8f8b1171 3833static bool gen10_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
95690a02 3834{
8f8b1171 3835 return gen8_is_valid_mux_addr(perf, addr) ||
fc215230
UNR
3836 REG_EQUAL(addr, GEN10_NOA_WRITE_HIGH) ||
3837 REG_IN_RANGE(addr, OA_PERFCNT3_LO, OA_PERFCNT4_HI);
95690a02
LL
3838}
3839
8f8b1171 3840static bool hsw_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
f89823c2 3841{
8f8b1171 3842 return gen7_is_valid_mux_addr(perf, addr) ||
fc215230
UNR
3843 ADDR_IN_RANGE(addr, 0x25100, 0x2FF90) ||
3844 REG_IN_RANGE(addr, HSW_MBVID2_NOA0, HSW_MBVID2_NOA9) ||
3845 REG_EQUAL(addr, HSW_MBVID2_MISR0);
f89823c2
LL
3846}
3847
8f8b1171 3848static bool chv_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
f89823c2 3849{
8f8b1171 3850 return gen7_is_valid_mux_addr(perf, addr) ||
fc215230 3851 ADDR_IN_RANGE(addr, 0x182300, 0x1823A4);
f89823c2
LL
3852}
3853
00a7f0d7
LL
3854static bool gen12_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
3855{
3856 return REG_IN_RANGE(addr, GEN12_OAG_OASTARTTRIG1, GEN12_OAG_OASTARTTRIG8) ||
3857 REG_IN_RANGE(addr, GEN12_OAG_OAREPORTTRIG1, GEN12_OAG_OAREPORTTRIG8) ||
3858 REG_IN_RANGE(addr, GEN12_OAG_CEC0_0, GEN12_OAG_CEC7_1) ||
3859 REG_IN_RANGE(addr, GEN12_OAG_SCEC0_0, GEN12_OAG_SCEC7_1) ||
3860 REG_EQUAL(addr, GEN12_OAA_DBG_REG) ||
3861 REG_EQUAL(addr, GEN12_OAG_OA_PESS) ||
3862 REG_EQUAL(addr, GEN12_OAG_SPCTR_CNF);
3863}
3864
3865static bool gen12_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
3866{
3867 return REG_EQUAL(addr, NOA_WRITE) ||
3868 REG_EQUAL(addr, GEN10_NOA_WRITE_HIGH) ||
3869 REG_EQUAL(addr, GDT_CHICKEN_BITS) ||
3870 REG_EQUAL(addr, WAIT_FOR_RC6_EXIT) ||
3871 REG_EQUAL(addr, RPM_CONFIG0) ||
3872 REG_EQUAL(addr, RPM_CONFIG1) ||
3873 REG_IN_RANGE(addr, NOA_CONFIG(0), NOA_CONFIG(8));
3874}
3875
739f3abd 3876static u32 mask_reg_value(u32 reg, u32 val)
f89823c2
LL
3877{
3878 /* HALF_SLICE_CHICKEN2 is programmed with a the
3879 * WaDisableSTUnitPowerOptimization workaround. Make sure the value
3880 * programmed by userspace doesn't change this.
3881 */
fc215230 3882 if (REG_EQUAL(reg, HALF_SLICE_CHICKEN2))
f89823c2
LL
3883 val = val & ~_MASKED_BIT_ENABLE(GEN8_ST_PO_DISABLE);
3884
3885 /* WAIT_FOR_RC6_EXIT has only one bit fullfilling the function
3886 * indicated by its name and a bunch of selection fields used by OA
3887 * configs.
3888 */
fc215230 3889 if (REG_EQUAL(reg, WAIT_FOR_RC6_EXIT))
f89823c2
LL
3890 val = val & ~_MASKED_BIT_ENABLE(HSW_WAIT_FOR_RC6_EXIT_ENABLE);
3891
3892 return val;
3893}
3894
8f8b1171
CW
3895static struct i915_oa_reg *alloc_oa_regs(struct i915_perf *perf,
3896 bool (*is_valid)(struct i915_perf *perf, u32 addr),
f89823c2
LL
3897 u32 __user *regs,
3898 u32 n_regs)
3899{
3900 struct i915_oa_reg *oa_regs;
3901 int err;
3902 u32 i;
3903
3904 if (!n_regs)
3905 return NULL;
3906
96d4f267 3907 if (!access_ok(regs, n_regs * sizeof(u32) * 2))
f89823c2
LL
3908 return ERR_PTR(-EFAULT);
3909
3910 /* No is_valid function means we're not allowing any register to be programmed. */
3911 GEM_BUG_ON(!is_valid);
3912 if (!is_valid)
3913 return ERR_PTR(-EINVAL);
3914
3915 oa_regs = kmalloc_array(n_regs, sizeof(*oa_regs), GFP_KERNEL);
3916 if (!oa_regs)
3917 return ERR_PTR(-ENOMEM);
3918
3919 for (i = 0; i < n_regs; i++) {
3920 u32 addr, value;
3921
3922 err = get_user(addr, regs);
3923 if (err)
3924 goto addr_err;
3925
8f8b1171 3926 if (!is_valid(perf, addr)) {
f89823c2
LL
3927 DRM_DEBUG("Invalid oa_reg address: %X\n", addr);
3928 err = -EINVAL;
3929 goto addr_err;
3930 }
3931
3932 err = get_user(value, regs + 1);
3933 if (err)
3934 goto addr_err;
3935
3936 oa_regs[i].addr = _MMIO(addr);
3937 oa_regs[i].value = mask_reg_value(addr, value);
3938
3939 regs += 2;
3940 }
3941
3942 return oa_regs;
3943
3944addr_err:
3945 kfree(oa_regs);
3946 return ERR_PTR(err);
3947}
3948
3949static ssize_t show_dynamic_id(struct device *dev,
3950 struct device_attribute *attr,
3951 char *buf)
3952{
3953 struct i915_oa_config *oa_config =
3954 container_of(attr, typeof(*oa_config), sysfs_metric_id);
3955
3956 return sprintf(buf, "%d\n", oa_config->id);
3957}
3958
8f8b1171 3959static int create_dynamic_oa_sysfs_entry(struct i915_perf *perf,
f89823c2
LL
3960 struct i915_oa_config *oa_config)
3961{
28152a23 3962 sysfs_attr_init(&oa_config->sysfs_metric_id.attr);
f89823c2
LL
3963 oa_config->sysfs_metric_id.attr.name = "id";
3964 oa_config->sysfs_metric_id.attr.mode = S_IRUGO;
3965 oa_config->sysfs_metric_id.show = show_dynamic_id;
3966 oa_config->sysfs_metric_id.store = NULL;
3967
3968 oa_config->attrs[0] = &oa_config->sysfs_metric_id.attr;
3969 oa_config->attrs[1] = NULL;
3970
3971 oa_config->sysfs_metric.name = oa_config->uuid;
3972 oa_config->sysfs_metric.attrs = oa_config->attrs;
3973
8f8b1171 3974 return sysfs_create_group(perf->metrics_kobj,
f89823c2
LL
3975 &oa_config->sysfs_metric);
3976}
3977
3978/**
3979 * i915_perf_add_config_ioctl - DRM ioctl() for userspace to add a new OA config
3980 * @dev: drm device
3981 * @data: ioctl data (pointer to struct drm_i915_perf_oa_config) copied from
3982 * userspace (unvalidated)
3983 * @file: drm file
3984 *
3985 * Validates the submitted OA register to be saved into a new OA config that
3986 * can then be used for programming the OA unit and its NOA network.
3987 *
3988 * Returns: A new allocated config number to be used with the perf open ioctl
3989 * or a negative error code on failure.
3990 */
3991int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
3992 struct drm_file *file)
3993{
8f8b1171 3994 struct i915_perf *perf = &to_i915(dev)->perf;
f89823c2
LL
3995 struct drm_i915_perf_oa_config *args = data;
3996 struct i915_oa_config *oa_config, *tmp;
c415ef2a 3997 struct i915_oa_reg *regs;
f89823c2
LL
3998 int err, id;
3999
8f8b1171 4000 if (!perf->i915) {
f89823c2
LL
4001 DRM_DEBUG("i915 perf interface not available for this system\n");
4002 return -ENOTSUPP;
4003 }
4004
8f8b1171 4005 if (!perf->metrics_kobj) {
f89823c2
LL
4006 DRM_DEBUG("OA metrics weren't advertised via sysfs\n");
4007 return -EINVAL;
4008 }
4009
4010 if (i915_perf_stream_paranoid && !capable(CAP_SYS_ADMIN)) {
4011 DRM_DEBUG("Insufficient privileges to add i915 OA config\n");
4012 return -EACCES;
4013 }
4014
4015 if ((!args->mux_regs_ptr || !args->n_mux_regs) &&
4016 (!args->boolean_regs_ptr || !args->n_boolean_regs) &&
4017 (!args->flex_regs_ptr || !args->n_flex_regs)) {
4018 DRM_DEBUG("No OA registers given\n");
4019 return -EINVAL;
4020 }
4021
4022 oa_config = kzalloc(sizeof(*oa_config), GFP_KERNEL);
4023 if (!oa_config) {
4024 DRM_DEBUG("Failed to allocate memory for the OA config\n");
4025 return -ENOMEM;
4026 }
4027
6a45008a
LL
4028 oa_config->perf = perf;
4029 kref_init(&oa_config->ref);
f89823c2
LL
4030
4031 if (!uuid_is_valid(args->uuid)) {
4032 DRM_DEBUG("Invalid uuid format for OA config\n");
4033 err = -EINVAL;
4034 goto reg_err;
4035 }
4036
4037 /* Last character in oa_config->uuid will be 0 because oa_config is
4038 * kzalloc.
4039 */
4040 memcpy(oa_config->uuid, args->uuid, sizeof(args->uuid));
4041
4042 oa_config->mux_regs_len = args->n_mux_regs;
c2fba936
CW
4043 regs = alloc_oa_regs(perf,
4044 perf->ops.is_valid_mux_reg,
4045 u64_to_user_ptr(args->mux_regs_ptr),
4046 args->n_mux_regs);
f89823c2 4047
c2fba936 4048 if (IS_ERR(regs)) {
f89823c2 4049 DRM_DEBUG("Failed to create OA config for mux_regs\n");
c2fba936 4050 err = PTR_ERR(regs);
f89823c2
LL
4051 goto reg_err;
4052 }
c2fba936 4053 oa_config->mux_regs = regs;
f89823c2
LL
4054
4055 oa_config->b_counter_regs_len = args->n_boolean_regs;
c2fba936
CW
4056 regs = alloc_oa_regs(perf,
4057 perf->ops.is_valid_b_counter_reg,
4058 u64_to_user_ptr(args->boolean_regs_ptr),
4059 args->n_boolean_regs);
f89823c2 4060
c2fba936 4061 if (IS_ERR(regs)) {
f89823c2 4062 DRM_DEBUG("Failed to create OA config for b_counter_regs\n");
c2fba936 4063 err = PTR_ERR(regs);
f89823c2
LL
4064 goto reg_err;
4065 }
c2fba936 4066 oa_config->b_counter_regs = regs;
f89823c2 4067
8f8b1171 4068 if (INTEL_GEN(perf->i915) < 8) {
f89823c2
LL
4069 if (args->n_flex_regs != 0) {
4070 err = -EINVAL;
4071 goto reg_err;
4072 }
4073 } else {
4074 oa_config->flex_regs_len = args->n_flex_regs;
c2fba936
CW
4075 regs = alloc_oa_regs(perf,
4076 perf->ops.is_valid_flex_reg,
4077 u64_to_user_ptr(args->flex_regs_ptr),
4078 args->n_flex_regs);
f89823c2 4079
c2fba936 4080 if (IS_ERR(regs)) {
f89823c2 4081 DRM_DEBUG("Failed to create OA config for flex_regs\n");
c2fba936 4082 err = PTR_ERR(regs);
f89823c2
LL
4083 goto reg_err;
4084 }
c2fba936 4085 oa_config->flex_regs = regs;
f89823c2
LL
4086 }
4087
8f8b1171 4088 err = mutex_lock_interruptible(&perf->metrics_lock);
f89823c2
LL
4089 if (err)
4090 goto reg_err;
4091
4092 /* We shouldn't have too many configs, so this iteration shouldn't be
4093 * too costly.
4094 */
8f8b1171 4095 idr_for_each_entry(&perf->metrics_idr, tmp, id) {
f89823c2
LL
4096 if (!strcmp(tmp->uuid, oa_config->uuid)) {
4097 DRM_DEBUG("OA config already exists with this uuid\n");
4098 err = -EADDRINUSE;
4099 goto sysfs_err;
4100 }
4101 }
4102
8f8b1171 4103 err = create_dynamic_oa_sysfs_entry(perf, oa_config);
f89823c2
LL
4104 if (err) {
4105 DRM_DEBUG("Failed to create sysfs entry for OA config\n");
4106 goto sysfs_err;
4107 }
4108
4109 /* Config id 0 is invalid, id 1 for kernel stored test config. */
8f8b1171 4110 oa_config->id = idr_alloc(&perf->metrics_idr,
f89823c2
LL
4111 oa_config, 2,
4112 0, GFP_KERNEL);
4113 if (oa_config->id < 0) {
4114 DRM_DEBUG("Failed to create sysfs entry for OA config\n");
4115 err = oa_config->id;
4116 goto sysfs_err;
4117 }
4118
8f8b1171 4119 mutex_unlock(&perf->metrics_lock);
f89823c2 4120
9bd9be66
LL
4121 DRM_DEBUG("Added config %s id=%i\n", oa_config->uuid, oa_config->id);
4122
f89823c2
LL
4123 return oa_config->id;
4124
4125sysfs_err:
8f8b1171 4126 mutex_unlock(&perf->metrics_lock);
f89823c2 4127reg_err:
6a45008a 4128 i915_oa_config_put(oa_config);
f89823c2
LL
4129 DRM_DEBUG("Failed to add new OA config\n");
4130 return err;
4131}
4132
4133/**
4134 * i915_perf_remove_config_ioctl - DRM ioctl() for userspace to remove an OA config
4135 * @dev: drm device
4136 * @data: ioctl data (pointer to u64 integer) copied from userspace
4137 * @file: drm file
4138 *
4139 * Configs can be removed while being used, the will stop appearing in sysfs
4140 * and their content will be freed when the stream using the config is closed.
4141 *
4142 * Returns: 0 on success or a negative error code on failure.
4143 */
4144int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data,
4145 struct drm_file *file)
4146{
8f8b1171 4147 struct i915_perf *perf = &to_i915(dev)->perf;
f89823c2
LL
4148 u64 *arg = data;
4149 struct i915_oa_config *oa_config;
4150 int ret;
4151
8f8b1171 4152 if (!perf->i915) {
f89823c2
LL
4153 DRM_DEBUG("i915 perf interface not available for this system\n");
4154 return -ENOTSUPP;
4155 }
4156
4157 if (i915_perf_stream_paranoid && !capable(CAP_SYS_ADMIN)) {
4158 DRM_DEBUG("Insufficient privileges to remove i915 OA config\n");
4159 return -EACCES;
4160 }
4161
8f8b1171 4162 ret = mutex_lock_interruptible(&perf->metrics_lock);
f89823c2 4163 if (ret)
6a45008a 4164 return ret;
f89823c2 4165
8f8b1171 4166 oa_config = idr_find(&perf->metrics_idr, *arg);
f89823c2
LL
4167 if (!oa_config) {
4168 DRM_DEBUG("Failed to remove unknown OA config\n");
4169 ret = -ENOENT;
6a45008a 4170 goto err_unlock;
f89823c2
LL
4171 }
4172
4173 GEM_BUG_ON(*arg != oa_config->id);
4174
4f6ccc74 4175 sysfs_remove_group(perf->metrics_kobj, &oa_config->sysfs_metric);
f89823c2 4176
8f8b1171 4177 idr_remove(&perf->metrics_idr, *arg);
9bd9be66 4178
6a45008a
LL
4179 mutex_unlock(&perf->metrics_lock);
4180
9bd9be66
LL
4181 DRM_DEBUG("Removed config %s id=%i\n", oa_config->uuid, oa_config->id);
4182
6a45008a
LL
4183 i915_oa_config_put(oa_config);
4184
4185 return 0;
f89823c2 4186
6a45008a 4187err_unlock:
8f8b1171 4188 mutex_unlock(&perf->metrics_lock);
f89823c2
LL
4189 return ret;
4190}
4191
ccdf6341
RB
4192static struct ctl_table oa_table[] = {
4193 {
4194 .procname = "perf_stream_paranoid",
4195 .data = &i915_perf_stream_paranoid,
4196 .maxlen = sizeof(i915_perf_stream_paranoid),
4197 .mode = 0644,
4198 .proc_handler = proc_dointvec_minmax,
eec4844f
MC
4199 .extra1 = SYSCTL_ZERO,
4200 .extra2 = SYSCTL_ONE,
ccdf6341 4201 },
00319ba0
RB
4202 {
4203 .procname = "oa_max_sample_rate",
4204 .data = &i915_oa_max_sample_rate,
4205 .maxlen = sizeof(i915_oa_max_sample_rate),
4206 .mode = 0644,
4207 .proc_handler = proc_dointvec_minmax,
eec4844f 4208 .extra1 = SYSCTL_ZERO,
00319ba0
RB
4209 .extra2 = &oa_sample_rate_hard_limit,
4210 },
ccdf6341
RB
4211 {}
4212};
4213
4214static struct ctl_table i915_root[] = {
4215 {
4216 .procname = "i915",
4217 .maxlen = 0,
4218 .mode = 0555,
4219 .child = oa_table,
4220 },
4221 {}
4222};
4223
4224static struct ctl_table dev_root[] = {
4225 {
4226 .procname = "dev",
4227 .maxlen = 0,
4228 .mode = 0555,
4229 .child = i915_root,
4230 },
4231 {}
4232};
4233
16d98b31 4234/**
3dc716fd 4235 * i915_perf_init - initialize i915-perf state on module bind
8f8b1171 4236 * @i915: i915 device instance
16d98b31
RB
4237 *
4238 * Initializes i915-perf state without exposing anything to userspace.
4239 *
4240 * Note: i915-perf initialization is split into an 'init' and 'register'
4241 * phase with the i915_perf_register() exposing state to userspace.
4242 */
8f8b1171
CW
4243void i915_perf_init(struct drm_i915_private *i915)
4244{
4245 struct i915_perf *perf = &i915->perf;
4246
4247 /* XXX const struct i915_perf_ops! */
4248
4249 if (IS_HASWELL(i915)) {
4250 perf->ops.is_valid_b_counter_reg = gen7_is_valid_b_counter_addr;
4251 perf->ops.is_valid_mux_reg = hsw_is_valid_mux_addr;
4252 perf->ops.is_valid_flex_reg = NULL;
4253 perf->ops.enable_metric_set = hsw_enable_metric_set;
4254 perf->ops.disable_metric_set = hsw_disable_metric_set;
4255 perf->ops.oa_enable = gen7_oa_enable;
4256 perf->ops.oa_disable = gen7_oa_disable;
4257 perf->ops.read = gen7_oa_read;
4258 perf->ops.oa_hw_tail_read = gen7_oa_hw_tail_read;
4259
4260 perf->oa_formats = hsw_oa_formats;
4261 } else if (HAS_LOGICAL_RING_CONTEXTS(i915)) {
19f81df2
RB
4262 /* Note: that although we could theoretically also support the
4263 * legacy ringbuffer mode on BDW (and earlier iterations of
4264 * this driver, before upstreaming did this) it didn't seem
4265 * worth the complexity to maintain now that BDW+ enable
4266 * execlist mode by default.
4267 */
8f8b1171 4268 perf->ops.read = gen8_oa_read;
701f8231 4269
8f8b1171 4270 if (IS_GEN_RANGE(i915, 8, 9)) {
00a7f0d7
LL
4271 perf->oa_formats = gen8_plus_oa_formats;
4272
8f8b1171 4273 perf->ops.is_valid_b_counter_reg =
ba6b7c1a 4274 gen7_is_valid_b_counter_addr;
8f8b1171 4275 perf->ops.is_valid_mux_reg =
ba6b7c1a 4276 gen8_is_valid_mux_addr;
8f8b1171 4277 perf->ops.is_valid_flex_reg =
ba6b7c1a 4278 gen8_is_valid_flex_addr;
155e941f 4279
8f8b1171
CW
4280 if (IS_CHERRYVIEW(i915)) {
4281 perf->ops.is_valid_mux_reg =
f89823c2
LL
4282 chv_is_valid_mux_addr;
4283 }
155e941f 4284
00a7f0d7
LL
4285 perf->ops.oa_enable = gen8_oa_enable;
4286 perf->ops.oa_disable = gen8_oa_disable;
8f8b1171
CW
4287 perf->ops.enable_metric_set = gen8_enable_metric_set;
4288 perf->ops.disable_metric_set = gen8_disable_metric_set;
00a7f0d7 4289 perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read;
ba6b7c1a 4290
8f8b1171
CW
4291 if (IS_GEN(i915, 8)) {
4292 perf->ctx_oactxctrl_offset = 0x120;
4293 perf->ctx_flexeu0_offset = 0x2ce;
ba6b7c1a 4294
8f8b1171 4295 perf->gen8_valid_ctx_bit = BIT(25);
ba6b7c1a 4296 } else {
8f8b1171
CW
4297 perf->ctx_oactxctrl_offset = 0x128;
4298 perf->ctx_flexeu0_offset = 0x3de;
ba6b7c1a 4299
8f8b1171 4300 perf->gen8_valid_ctx_bit = BIT(16);
ba6b7c1a 4301 }
8f8b1171 4302 } else if (IS_GEN_RANGE(i915, 10, 11)) {
00a7f0d7
LL
4303 perf->oa_formats = gen8_plus_oa_formats;
4304
8f8b1171 4305 perf->ops.is_valid_b_counter_reg =
95690a02 4306 gen7_is_valid_b_counter_addr;
8f8b1171 4307 perf->ops.is_valid_mux_reg =
95690a02 4308 gen10_is_valid_mux_addr;
8f8b1171 4309 perf->ops.is_valid_flex_reg =
95690a02
LL
4310 gen8_is_valid_flex_addr;
4311
00a7f0d7
LL
4312 perf->ops.oa_enable = gen8_oa_enable;
4313 perf->ops.oa_disable = gen8_oa_disable;
8f8b1171
CW
4314 perf->ops.enable_metric_set = gen8_enable_metric_set;
4315 perf->ops.disable_metric_set = gen10_disable_metric_set;
00a7f0d7 4316 perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read;
95690a02 4317
8f8b1171
CW
4318 if (IS_GEN(i915, 10)) {
4319 perf->ctx_oactxctrl_offset = 0x128;
4320 perf->ctx_flexeu0_offset = 0x3de;
8dcfdfb4 4321 } else {
8f8b1171
CW
4322 perf->ctx_oactxctrl_offset = 0x124;
4323 perf->ctx_flexeu0_offset = 0x78e;
8dcfdfb4 4324 }
8f8b1171 4325 perf->gen8_valid_ctx_bit = BIT(16);
00a7f0d7
LL
4326 } else if (IS_GEN(i915, 12)) {
4327 perf->oa_formats = gen12_oa_formats;
4328
4329 perf->ops.is_valid_b_counter_reg =
4330 gen12_is_valid_b_counter_addr;
4331 perf->ops.is_valid_mux_reg =
4332 gen12_is_valid_mux_addr;
4333 perf->ops.is_valid_flex_reg =
4334 gen8_is_valid_flex_addr;
4335
4336 perf->ops.oa_enable = gen12_oa_enable;
4337 perf->ops.oa_disable = gen12_oa_disable;
4338 perf->ops.enable_metric_set = gen12_enable_metric_set;
4339 perf->ops.disable_metric_set = gen12_disable_metric_set;
4340 perf->ops.oa_hw_tail_read = gen12_oa_hw_tail_read;
4341
4342 perf->ctx_flexeu0_offset = 0;
4343 perf->ctx_oactxctrl_offset = 0x144;
19f81df2 4344 }
19f81df2 4345 }
d7965152 4346
8f8b1171 4347 if (perf->ops.enable_metric_set) {
8f8b1171 4348 mutex_init(&perf->lock);
eec688e1 4349
9f9b2792 4350 oa_sample_rate_hard_limit = 1000 *
8f8b1171 4351 (RUNTIME_INFO(i915)->cs_timestamp_frequency_khz / 2);
ccdf6341 4352
8f8b1171
CW
4353 mutex_init(&perf->metrics_lock);
4354 idr_init(&perf->metrics_idr);
f89823c2 4355
a37f08a8
UNR
4356 /* We set up some ratelimit state to potentially throttle any
4357 * _NOTES about spurious, invalid OA reports which we don't
4358 * forward to userspace.
4359 *
4360 * We print a _NOTE about any throttling when closing the
4361 * stream instead of waiting until driver _fini which no one
4362 * would ever see.
4363 *
4364 * Using the same limiting factors as printk_ratelimit()
4365 */
8f8b1171 4366 ratelimit_state_init(&perf->spurious_report_rs, 5 * HZ, 10);
a37f08a8
UNR
4367 /* Since we use a DRM_NOTE for spurious reports it would be
4368 * inconsistent to let __ratelimit() automatically print a
4369 * warning for throttling.
4370 */
8f8b1171 4371 ratelimit_set_flags(&perf->spurious_report_rs,
a37f08a8
UNR
4372 RATELIMIT_MSG_ON_RELEASE);
4373
daed3e44
LL
4374 atomic64_set(&perf->noa_programming_delay,
4375 500 * 1000 /* 500us */);
4376
8f8b1171 4377 perf->i915 = i915;
19f81df2 4378 }
eec688e1
RB
4379}
4380
f89823c2
LL
4381static int destroy_config(int id, void *p, void *data)
4382{
6a45008a 4383 i915_oa_config_put(p);
f89823c2
LL
4384 return 0;
4385}
4386
3dc716fd
VSD
4387void i915_perf_sysctl_register(void)
4388{
4389 sysctl_header = register_sysctl_table(dev_root);
4390}
4391
4392void i915_perf_sysctl_unregister(void)
4393{
4394 unregister_sysctl_table(sysctl_header);
4395}
4396
16d98b31
RB
4397/**
4398 * i915_perf_fini - Counter part to i915_perf_init()
8f8b1171 4399 * @i915: i915 device instance
16d98b31 4400 */
8f8b1171 4401void i915_perf_fini(struct drm_i915_private *i915)
eec688e1 4402{
8f8b1171 4403 struct i915_perf *perf = &i915->perf;
eec688e1 4404
8f8b1171
CW
4405 if (!perf->i915)
4406 return;
f89823c2 4407
8f8b1171
CW
4408 idr_for_each(&perf->metrics_idr, destroy_config, perf);
4409 idr_destroy(&perf->metrics_idr);
ccdf6341 4410
8f8b1171
CW
4411 memset(&perf->ops, 0, sizeof(perf->ops));
4412 perf->i915 = NULL;
eec688e1 4413}
daed3e44 4414
b8d49f28
LL
4415/**
4416 * i915_perf_ioctl_version - Version of the i915-perf subsystem
4417 *
4418 * This version number is used by userspace to detect available features.
4419 */
4420int i915_perf_ioctl_version(void)
4421{
7831e9a9
CW
4422 /*
4423 * 1: Initial version
4424 * I915_PERF_IOCTL_ENABLE
4425 * I915_PERF_IOCTL_DISABLE
4426 *
4427 * 2: Added runtime modification of OA config.
4428 * I915_PERF_IOCTL_CONFIG
9cd20ef7
LL
4429 *
4430 * 3: Add DRM_I915_PERF_PROP_HOLD_PREEMPTION parameter to hold
4431 * preemption on a particular context so that performance data is
4432 * accessible from a delta of MI_RPC reports without looking at the
4433 * OA buffer.
7831e9a9 4434 */
9cd20ef7 4435 return 3;
b8d49f28
LL
4436}
4437
daed3e44
LL
4438#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
4439#include "selftests/i915_perf.c"
4440#endif