2 * Copyright © 2012 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * Eugeni Dodonov <eugeni.dodonov@intel.com>
28 #include <linux/cpufreq.h>
29 #include <linux/pm_runtime.h>
30 #include <drm/drm_plane_helper.h>
32 #include "intel_drv.h"
33 #include "../../../platform/x86/intel_ips.h"
34 #include <linux/module.h>
35 #include <drm/drm_atomic_helper.h>
40 * RC6 is a special power stage which allows the GPU to enter an very
41 * low-voltage mode when idle, using down to 0V while at this stage. This
42 * stage is entered automatically when the GPU is idle when RC6 support is
43 * enabled, and as soon as new workload arises GPU wakes up automatically as well.
45 * There are different RC6 modes available in Intel GPU, which differentiate
46 * among each other with the latency required to enter and leave RC6 and
47 * voltage consumed by the GPU in different states.
49 * The combination of the following flags define which states GPU is allowed
50 * to enter, while RC6 is the normal RC6 state, RC6p is the deep RC6, and
51 * RC6pp is deepest RC6. Their support by hardware varies according to the
52 * GPU, BIOS, chipset and platform. RC6 is usually the safest one and the one
53 * which brings the most power savings; deeper states save more power, but
54 * require higher latency to switch to and wake up.
57 static void gen9_init_clock_gating(struct drm_i915_private
*dev_priv
)
59 if (HAS_LLC(dev_priv
)) {
61 * WaCompressedResourceDisplayNewHashMode:skl,kbl
62 * Display WA #0390: skl,kbl
64 * Must match Sampler, Pixel Back End, and Media. See
65 * WaCompressedResourceSamplerPbeMediaNewHashMode.
67 I915_WRITE(CHICKEN_PAR1_1
,
68 I915_READ(CHICKEN_PAR1_1
) |
69 SKL_DE_COMPRESSED_HASH_MODE
);
72 /* See Bspec note for PSR2_CTL bit 31, Wa#828:skl,bxt,kbl,cfl */
73 I915_WRITE(CHICKEN_PAR1_1
,
74 I915_READ(CHICKEN_PAR1_1
) | SKL_EDP_PSR_FIX_RDWRAP
);
76 /* WaEnableChickenDCPR:skl,bxt,kbl,glk,cfl */
77 I915_WRITE(GEN8_CHICKEN_DCPR_1
,
78 I915_READ(GEN8_CHICKEN_DCPR_1
) | MASK_WAKEMEM
);
80 /* WaFbcTurnOffFbcWatermark:skl,bxt,kbl,cfl */
81 /* WaFbcWakeMemOn:skl,bxt,kbl,glk,cfl */
82 I915_WRITE(DISP_ARB_CTL
, I915_READ(DISP_ARB_CTL
) |
84 DISP_FBC_MEMORY_WAKE
);
86 /* WaFbcHighMemBwCorruptionAvoidance:skl,bxt,kbl,cfl */
87 I915_WRITE(ILK_DPFC_CHICKEN
, I915_READ(ILK_DPFC_CHICKEN
) |
88 ILK_DPFC_DISABLE_DUMMY0
);
90 if (IS_SKYLAKE(dev_priv
)) {
91 /* WaDisableDopClockGating */
92 I915_WRITE(GEN7_MISCCPCTL
, I915_READ(GEN7_MISCCPCTL
)
93 & ~GEN7_DOP_CLOCK_GATE_ENABLE
);
97 static void bxt_init_clock_gating(struct drm_i915_private
*dev_priv
)
99 gen9_init_clock_gating(dev_priv
);
101 /* WaDisableSDEUnitClockGating:bxt */
102 I915_WRITE(GEN8_UCGCTL6
, I915_READ(GEN8_UCGCTL6
) |
103 GEN8_SDEUNIT_CLOCK_GATE_DISABLE
);
107 * GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ applies on 3x6 GT SKUs only.
109 I915_WRITE(GEN8_UCGCTL6
, I915_READ(GEN8_UCGCTL6
) |
110 GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ
);
113 * Wa: Backlight PWM may stop in the asserted state, causing backlight
116 I915_WRITE(GEN9_CLKGATE_DIS_0
, I915_READ(GEN9_CLKGATE_DIS_0
) |
117 PWM1_GATING_DIS
| PWM2_GATING_DIS
);
120 static void glk_init_clock_gating(struct drm_i915_private
*dev_priv
)
122 gen9_init_clock_gating(dev_priv
);
125 * WaDisablePWMClockGating:glk
126 * Backlight PWM may stop in the asserted state, causing backlight
129 I915_WRITE(GEN9_CLKGATE_DIS_0
, I915_READ(GEN9_CLKGATE_DIS_0
) |
130 PWM1_GATING_DIS
| PWM2_GATING_DIS
);
132 /* WaDDIIOTimeout:glk */
133 if (IS_GLK_REVID(dev_priv
, 0, GLK_REVID_A1
)) {
134 u32 val
= I915_READ(CHICKEN_MISC_2
);
135 val
&= ~(GLK_CL0_PWR_DOWN
|
138 I915_WRITE(CHICKEN_MISC_2
, val
);
143 static void i915_pineview_get_mem_freq(struct drm_i915_private
*dev_priv
)
147 tmp
= I915_READ(CLKCFG
);
149 switch (tmp
& CLKCFG_FSB_MASK
) {
151 dev_priv
->fsb_freq
= 533; /* 133*4 */
154 dev_priv
->fsb_freq
= 800; /* 200*4 */
157 dev_priv
->fsb_freq
= 667; /* 167*4 */
160 dev_priv
->fsb_freq
= 400; /* 100*4 */
164 switch (tmp
& CLKCFG_MEM_MASK
) {
166 dev_priv
->mem_freq
= 533;
169 dev_priv
->mem_freq
= 667;
172 dev_priv
->mem_freq
= 800;
176 /* detect pineview DDR3 setting */
177 tmp
= I915_READ(CSHRDDR3CTL
);
178 dev_priv
->is_ddr3
= (tmp
& CSHRDDR3CTL_DDR3
) ? 1 : 0;
181 static void i915_ironlake_get_mem_freq(struct drm_i915_private
*dev_priv
)
185 ddrpll
= I915_READ16(DDRMPLL1
);
186 csipll
= I915_READ16(CSIPLL0
);
188 switch (ddrpll
& 0xff) {
190 dev_priv
->mem_freq
= 800;
193 dev_priv
->mem_freq
= 1066;
196 dev_priv
->mem_freq
= 1333;
199 dev_priv
->mem_freq
= 1600;
202 DRM_DEBUG_DRIVER("unknown memory frequency 0x%02x\n",
204 dev_priv
->mem_freq
= 0;
208 dev_priv
->ips
.r_t
= dev_priv
->mem_freq
;
210 switch (csipll
& 0x3ff) {
212 dev_priv
->fsb_freq
= 3200;
215 dev_priv
->fsb_freq
= 3733;
218 dev_priv
->fsb_freq
= 4266;
221 dev_priv
->fsb_freq
= 4800;
224 dev_priv
->fsb_freq
= 5333;
227 dev_priv
->fsb_freq
= 5866;
230 dev_priv
->fsb_freq
= 6400;
233 DRM_DEBUG_DRIVER("unknown fsb frequency 0x%04x\n",
235 dev_priv
->fsb_freq
= 0;
239 if (dev_priv
->fsb_freq
== 3200) {
240 dev_priv
->ips
.c_m
= 0;
241 } else if (dev_priv
->fsb_freq
> 3200 && dev_priv
->fsb_freq
<= 4800) {
242 dev_priv
->ips
.c_m
= 1;
244 dev_priv
->ips
.c_m
= 2;
248 static const struct cxsr_latency cxsr_latency_table
[] = {
249 {1, 0, 800, 400, 3382, 33382, 3983, 33983}, /* DDR2-400 SC */
250 {1, 0, 800, 667, 3354, 33354, 3807, 33807}, /* DDR2-667 SC */
251 {1, 0, 800, 800, 3347, 33347, 3763, 33763}, /* DDR2-800 SC */
252 {1, 1, 800, 667, 6420, 36420, 6873, 36873}, /* DDR3-667 SC */
253 {1, 1, 800, 800, 5902, 35902, 6318, 36318}, /* DDR3-800 SC */
255 {1, 0, 667, 400, 3400, 33400, 4021, 34021}, /* DDR2-400 SC */
256 {1, 0, 667, 667, 3372, 33372, 3845, 33845}, /* DDR2-667 SC */
257 {1, 0, 667, 800, 3386, 33386, 3822, 33822}, /* DDR2-800 SC */
258 {1, 1, 667, 667, 6438, 36438, 6911, 36911}, /* DDR3-667 SC */
259 {1, 1, 667, 800, 5941, 35941, 6377, 36377}, /* DDR3-800 SC */
261 {1, 0, 400, 400, 3472, 33472, 4173, 34173}, /* DDR2-400 SC */
262 {1, 0, 400, 667, 3443, 33443, 3996, 33996}, /* DDR2-667 SC */
263 {1, 0, 400, 800, 3430, 33430, 3946, 33946}, /* DDR2-800 SC */
264 {1, 1, 400, 667, 6509, 36509, 7062, 37062}, /* DDR3-667 SC */
265 {1, 1, 400, 800, 5985, 35985, 6501, 36501}, /* DDR3-800 SC */
267 {0, 0, 800, 400, 3438, 33438, 4065, 34065}, /* DDR2-400 SC */
268 {0, 0, 800, 667, 3410, 33410, 3889, 33889}, /* DDR2-667 SC */
269 {0, 0, 800, 800, 3403, 33403, 3845, 33845}, /* DDR2-800 SC */
270 {0, 1, 800, 667, 6476, 36476, 6955, 36955}, /* DDR3-667 SC */
271 {0, 1, 800, 800, 5958, 35958, 6400, 36400}, /* DDR3-800 SC */
273 {0, 0, 667, 400, 3456, 33456, 4103, 34106}, /* DDR2-400 SC */
274 {0, 0, 667, 667, 3428, 33428, 3927, 33927}, /* DDR2-667 SC */
275 {0, 0, 667, 800, 3443, 33443, 3905, 33905}, /* DDR2-800 SC */
276 {0, 1, 667, 667, 6494, 36494, 6993, 36993}, /* DDR3-667 SC */
277 {0, 1, 667, 800, 5998, 35998, 6460, 36460}, /* DDR3-800 SC */
279 {0, 0, 400, 400, 3528, 33528, 4255, 34255}, /* DDR2-400 SC */
280 {0, 0, 400, 667, 3500, 33500, 4079, 34079}, /* DDR2-667 SC */
281 {0, 0, 400, 800, 3487, 33487, 4029, 34029}, /* DDR2-800 SC */
282 {0, 1, 400, 667, 6566, 36566, 7145, 37145}, /* DDR3-667 SC */
283 {0, 1, 400, 800, 6042, 36042, 6584, 36584}, /* DDR3-800 SC */
286 static const struct cxsr_latency
*intel_get_cxsr_latency(bool is_desktop
,
291 const struct cxsr_latency
*latency
;
294 if (fsb
== 0 || mem
== 0)
297 for (i
= 0; i
< ARRAY_SIZE(cxsr_latency_table
); i
++) {
298 latency
= &cxsr_latency_table
[i
];
299 if (is_desktop
== latency
->is_desktop
&&
300 is_ddr3
== latency
->is_ddr3
&&
301 fsb
== latency
->fsb_freq
&& mem
== latency
->mem_freq
)
305 DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n");
310 static void chv_set_memory_dvfs(struct drm_i915_private
*dev_priv
, bool enable
)
314 mutex_lock(&dev_priv
->pcu_lock
);
316 val
= vlv_punit_read(dev_priv
, PUNIT_REG_DDR_SETUP2
);
318 val
&= ~FORCE_DDR_HIGH_FREQ
;
320 val
|= FORCE_DDR_HIGH_FREQ
;
321 val
&= ~FORCE_DDR_LOW_FREQ
;
322 val
|= FORCE_DDR_FREQ_REQ_ACK
;
323 vlv_punit_write(dev_priv
, PUNIT_REG_DDR_SETUP2
, val
);
325 if (wait_for((vlv_punit_read(dev_priv
, PUNIT_REG_DDR_SETUP2
) &
326 FORCE_DDR_FREQ_REQ_ACK
) == 0, 3))
327 DRM_ERROR("timed out waiting for Punit DDR DVFS request\n");
329 mutex_unlock(&dev_priv
->pcu_lock
);
332 static void chv_set_memory_pm5(struct drm_i915_private
*dev_priv
, bool enable
)
336 mutex_lock(&dev_priv
->pcu_lock
);
338 val
= vlv_punit_read(dev_priv
, PUNIT_REG_DSPFREQ
);
340 val
|= DSP_MAXFIFO_PM5_ENABLE
;
342 val
&= ~DSP_MAXFIFO_PM5_ENABLE
;
343 vlv_punit_write(dev_priv
, PUNIT_REG_DSPFREQ
, val
);
345 mutex_unlock(&dev_priv
->pcu_lock
);
348 #define FW_WM(value, plane) \
349 (((value) << DSPFW_ ## plane ## _SHIFT) & DSPFW_ ## plane ## _MASK)
351 static bool _intel_set_memory_cxsr(struct drm_i915_private
*dev_priv
, bool enable
)
356 if (IS_VALLEYVIEW(dev_priv
) || IS_CHERRYVIEW(dev_priv
)) {
357 was_enabled
= I915_READ(FW_BLC_SELF_VLV
) & FW_CSPWRDWNEN
;
358 I915_WRITE(FW_BLC_SELF_VLV
, enable
? FW_CSPWRDWNEN
: 0);
359 POSTING_READ(FW_BLC_SELF_VLV
);
360 } else if (IS_G4X(dev_priv
) || IS_I965GM(dev_priv
)) {
361 was_enabled
= I915_READ(FW_BLC_SELF
) & FW_BLC_SELF_EN
;
362 I915_WRITE(FW_BLC_SELF
, enable
? FW_BLC_SELF_EN
: 0);
363 POSTING_READ(FW_BLC_SELF
);
364 } else if (IS_PINEVIEW(dev_priv
)) {
365 val
= I915_READ(DSPFW3
);
366 was_enabled
= val
& PINEVIEW_SELF_REFRESH_EN
;
368 val
|= PINEVIEW_SELF_REFRESH_EN
;
370 val
&= ~PINEVIEW_SELF_REFRESH_EN
;
371 I915_WRITE(DSPFW3
, val
);
372 POSTING_READ(DSPFW3
);
373 } else if (IS_I945G(dev_priv
) || IS_I945GM(dev_priv
)) {
374 was_enabled
= I915_READ(FW_BLC_SELF
) & FW_BLC_SELF_EN
;
375 val
= enable
? _MASKED_BIT_ENABLE(FW_BLC_SELF_EN
) :
376 _MASKED_BIT_DISABLE(FW_BLC_SELF_EN
);
377 I915_WRITE(FW_BLC_SELF
, val
);
378 POSTING_READ(FW_BLC_SELF
);
379 } else if (IS_I915GM(dev_priv
)) {
381 * FIXME can't find a bit like this for 915G, and
382 * and yet it does have the related watermark in
383 * FW_BLC_SELF. What's going on?
385 was_enabled
= I915_READ(INSTPM
) & INSTPM_SELF_EN
;
386 val
= enable
? _MASKED_BIT_ENABLE(INSTPM_SELF_EN
) :
387 _MASKED_BIT_DISABLE(INSTPM_SELF_EN
);
388 I915_WRITE(INSTPM
, val
);
389 POSTING_READ(INSTPM
);
394 trace_intel_memory_cxsr(dev_priv
, was_enabled
, enable
);
396 DRM_DEBUG_KMS("memory self-refresh is %s (was %s)\n",
397 enableddisabled(enable
),
398 enableddisabled(was_enabled
));
404 * intel_set_memory_cxsr - Configure CxSR state
405 * @dev_priv: i915 device
406 * @enable: Allow vs. disallow CxSR
408 * Allow or disallow the system to enter a special CxSR
409 * (C-state self refresh) state. What typically happens in CxSR mode
410 * is that several display FIFOs may get combined into a single larger
411 * FIFO for a particular plane (so called max FIFO mode) to allow the
412 * system to defer memory fetches longer, and the memory will enter
415 * Note that enabling CxSR does not guarantee that the system enter
416 * this special mode, nor does it guarantee that the system stays
417 * in that mode once entered. So this just allows/disallows the system
418 * to autonomously utilize the CxSR mode. Other factors such as core
419 * C-states will affect when/if the system actually enters/exits the
422 * Note that on VLV/CHV this actually only controls the max FIFO mode,
423 * and the system is free to enter/exit memory self refresh at any time
424 * even when the use of CxSR has been disallowed.
426 * While the system is actually in the CxSR/max FIFO mode, some plane
427 * control registers will not get latched on vblank. Thus in order to
428 * guarantee the system will respond to changes in the plane registers
429 * we must always disallow CxSR prior to making changes to those registers.
430 * Unfortunately the system will re-evaluate the CxSR conditions at
431 * frame start which happens after vblank start (which is when the plane
432 * registers would get latched), so we can't proceed with the plane update
433 * during the same frame where we disallowed CxSR.
435 * Certain platforms also have a deeper HPLL SR mode. Fortunately the
436 * HPLL SR mode depends on CxSR itself, so we don't have to hand hold
437 * the hardware w.r.t. HPLL SR when writing to plane registers.
438 * Disallowing just CxSR is sufficient.
440 bool intel_set_memory_cxsr(struct drm_i915_private
*dev_priv
, bool enable
)
444 mutex_lock(&dev_priv
->wm
.wm_mutex
);
445 ret
= _intel_set_memory_cxsr(dev_priv
, enable
);
446 if (IS_VALLEYVIEW(dev_priv
) || IS_CHERRYVIEW(dev_priv
))
447 dev_priv
->wm
.vlv
.cxsr
= enable
;
448 else if (IS_G4X(dev_priv
))
449 dev_priv
->wm
.g4x
.cxsr
= enable
;
450 mutex_unlock(&dev_priv
->wm
.wm_mutex
);
456 * Latency for FIFO fetches is dependent on several factors:
457 * - memory configuration (speed, channels)
459 * - current MCH state
460 * It can be fairly high in some situations, so here we assume a fairly
461 * pessimal value. It's a tradeoff between extra memory fetches (if we
462 * set this value too high, the FIFO will fetch frequently to stay full)
463 * and power consumption (set it too low to save power and we might see
464 * FIFO underruns and display "flicker").
466 * A value of 5us seems to be a good balance; safe for very low end
467 * platforms but not overly aggressive on lower latency configs.
469 static const int pessimal_latency_ns
= 5000;
471 #define VLV_FIFO_START(dsparb, dsparb2, lo_shift, hi_shift) \
472 ((((dsparb) >> (lo_shift)) & 0xff) | ((((dsparb2) >> (hi_shift)) & 0x1) << 8))
474 static void vlv_get_fifo_size(struct intel_crtc_state
*crtc_state
)
476 struct intel_crtc
*crtc
= to_intel_crtc(crtc_state
->base
.crtc
);
477 struct drm_i915_private
*dev_priv
= to_i915(crtc
->base
.dev
);
478 struct vlv_fifo_state
*fifo_state
= &crtc_state
->wm
.vlv
.fifo_state
;
479 enum pipe pipe
= crtc
->pipe
;
480 int sprite0_start
, sprite1_start
;
483 u32 dsparb
, dsparb2
, dsparb3
;
485 dsparb
= I915_READ(DSPARB
);
486 dsparb2
= I915_READ(DSPARB2
);
487 sprite0_start
= VLV_FIFO_START(dsparb
, dsparb2
, 0, 0);
488 sprite1_start
= VLV_FIFO_START(dsparb
, dsparb2
, 8, 4);
491 dsparb
= I915_READ(DSPARB
);
492 dsparb2
= I915_READ(DSPARB2
);
493 sprite0_start
= VLV_FIFO_START(dsparb
, dsparb2
, 16, 8);
494 sprite1_start
= VLV_FIFO_START(dsparb
, dsparb2
, 24, 12);
497 dsparb2
= I915_READ(DSPARB2
);
498 dsparb3
= I915_READ(DSPARB3
);
499 sprite0_start
= VLV_FIFO_START(dsparb3
, dsparb2
, 0, 16);
500 sprite1_start
= VLV_FIFO_START(dsparb3
, dsparb2
, 8, 20);
507 fifo_state
->plane
[PLANE_PRIMARY
] = sprite0_start
;
508 fifo_state
->plane
[PLANE_SPRITE0
] = sprite1_start
- sprite0_start
;
509 fifo_state
->plane
[PLANE_SPRITE1
] = 511 - sprite1_start
;
510 fifo_state
->plane
[PLANE_CURSOR
] = 63;
513 static int i9xx_get_fifo_size(struct drm_i915_private
*dev_priv
,
514 enum i9xx_plane_id i9xx_plane
)
516 u32 dsparb
= I915_READ(DSPARB
);
519 size
= dsparb
& 0x7f;
520 if (i9xx_plane
== PLANE_B
)
521 size
= ((dsparb
>> DSPARB_CSTART_SHIFT
) & 0x7f) - size
;
523 DRM_DEBUG_KMS("FIFO size - (0x%08x) %c: %d\n",
524 dsparb
, plane_name(i9xx_plane
), size
);
529 static int i830_get_fifo_size(struct drm_i915_private
*dev_priv
,
530 enum i9xx_plane_id i9xx_plane
)
532 u32 dsparb
= I915_READ(DSPARB
);
535 size
= dsparb
& 0x1ff;
536 if (i9xx_plane
== PLANE_B
)
537 size
= ((dsparb
>> DSPARB_BEND_SHIFT
) & 0x1ff) - size
;
538 size
>>= 1; /* Convert to cachelines */
540 DRM_DEBUG_KMS("FIFO size - (0x%08x) %c: %d\n",
541 dsparb
, plane_name(i9xx_plane
), size
);
546 static int i845_get_fifo_size(struct drm_i915_private
*dev_priv
,
547 enum i9xx_plane_id i9xx_plane
)
549 u32 dsparb
= I915_READ(DSPARB
);
552 size
= dsparb
& 0x7f;
553 size
>>= 2; /* Convert to cachelines */
555 DRM_DEBUG_KMS("FIFO size - (0x%08x) %c: %d\n",
556 dsparb
, plane_name(i9xx_plane
), size
);
561 /* Pineview has different values for various configs */
562 static const struct intel_watermark_params pineview_display_wm
= {
563 .fifo_size
= PINEVIEW_DISPLAY_FIFO
,
564 .max_wm
= PINEVIEW_MAX_WM
,
565 .default_wm
= PINEVIEW_DFT_WM
,
566 .guard_size
= PINEVIEW_GUARD_WM
,
567 .cacheline_size
= PINEVIEW_FIFO_LINE_SIZE
,
569 static const struct intel_watermark_params pineview_display_hplloff_wm
= {
570 .fifo_size
= PINEVIEW_DISPLAY_FIFO
,
571 .max_wm
= PINEVIEW_MAX_WM
,
572 .default_wm
= PINEVIEW_DFT_HPLLOFF_WM
,
573 .guard_size
= PINEVIEW_GUARD_WM
,
574 .cacheline_size
= PINEVIEW_FIFO_LINE_SIZE
,
576 static const struct intel_watermark_params pineview_cursor_wm
= {
577 .fifo_size
= PINEVIEW_CURSOR_FIFO
,
578 .max_wm
= PINEVIEW_CURSOR_MAX_WM
,
579 .default_wm
= PINEVIEW_CURSOR_DFT_WM
,
580 .guard_size
= PINEVIEW_CURSOR_GUARD_WM
,
581 .cacheline_size
= PINEVIEW_FIFO_LINE_SIZE
,
583 static const struct intel_watermark_params pineview_cursor_hplloff_wm
= {
584 .fifo_size
= PINEVIEW_CURSOR_FIFO
,
585 .max_wm
= PINEVIEW_CURSOR_MAX_WM
,
586 .default_wm
= PINEVIEW_CURSOR_DFT_WM
,
587 .guard_size
= PINEVIEW_CURSOR_GUARD_WM
,
588 .cacheline_size
= PINEVIEW_FIFO_LINE_SIZE
,
590 static const struct intel_watermark_params i965_cursor_wm_info
= {
591 .fifo_size
= I965_CURSOR_FIFO
,
592 .max_wm
= I965_CURSOR_MAX_WM
,
593 .default_wm
= I965_CURSOR_DFT_WM
,
595 .cacheline_size
= I915_FIFO_LINE_SIZE
,
597 static const struct intel_watermark_params i945_wm_info
= {
598 .fifo_size
= I945_FIFO_SIZE
,
599 .max_wm
= I915_MAX_WM
,
602 .cacheline_size
= I915_FIFO_LINE_SIZE
,
604 static const struct intel_watermark_params i915_wm_info
= {
605 .fifo_size
= I915_FIFO_SIZE
,
606 .max_wm
= I915_MAX_WM
,
609 .cacheline_size
= I915_FIFO_LINE_SIZE
,
611 static const struct intel_watermark_params i830_a_wm_info
= {
612 .fifo_size
= I855GM_FIFO_SIZE
,
613 .max_wm
= I915_MAX_WM
,
616 .cacheline_size
= I830_FIFO_LINE_SIZE
,
618 static const struct intel_watermark_params i830_bc_wm_info
= {
619 .fifo_size
= I855GM_FIFO_SIZE
,
620 .max_wm
= I915_MAX_WM
/2,
623 .cacheline_size
= I830_FIFO_LINE_SIZE
,
625 static const struct intel_watermark_params i845_wm_info
= {
626 .fifo_size
= I830_FIFO_SIZE
,
627 .max_wm
= I915_MAX_WM
,
630 .cacheline_size
= I830_FIFO_LINE_SIZE
,
634 * intel_wm_method1 - Method 1 / "small buffer" watermark formula
635 * @pixel_rate: Pipe pixel rate in kHz
636 * @cpp: Plane bytes per pixel
637 * @latency: Memory wakeup latency in 0.1us units
639 * Compute the watermark using the method 1 or "small buffer"
640 * formula. The caller may additonally add extra cachelines
641 * to account for TLB misses and clock crossings.
643 * This method is concerned with the short term drain rate
644 * of the FIFO, ie. it does not account for blanking periods
645 * which would effectively reduce the average drain rate across
646 * a longer period. The name "small" refers to the fact the
647 * FIFO is relatively small compared to the amount of data
650 * The FIFO level vs. time graph might look something like:
654 * __---__---__ (- plane active, _ blanking)
657 * or perhaps like this:
660 * __----__----__ (- plane active, _ blanking)
664 * The watermark in bytes
666 static unsigned int intel_wm_method1(unsigned int pixel_rate
,
668 unsigned int latency
)
672 ret
= (u64
)pixel_rate
* cpp
* latency
;
673 ret
= DIV_ROUND_UP_ULL(ret
, 10000);
679 * intel_wm_method2 - Method 2 / "large buffer" watermark formula
680 * @pixel_rate: Pipe pixel rate in kHz
681 * @htotal: Pipe horizontal total
682 * @width: Plane width in pixels
683 * @cpp: Plane bytes per pixel
684 * @latency: Memory wakeup latency in 0.1us units
686 * Compute the watermark using the method 2 or "large buffer"
687 * formula. The caller may additonally add extra cachelines
688 * to account for TLB misses and clock crossings.
690 * This method is concerned with the long term drain rate
691 * of the FIFO, ie. it does account for blanking periods
692 * which effectively reduce the average drain rate across
693 * a longer period. The name "large" refers to the fact the
694 * FIFO is relatively large compared to the amount of data
697 * The FIFO level vs. time graph might look something like:
702 * __ --__--__--__--__--__--__ (- plane active, _ blanking)
706 * The watermark in bytes
708 static unsigned int intel_wm_method2(unsigned int pixel_rate
,
712 unsigned int latency
)
717 * FIXME remove once all users are computing
718 * watermarks in the correct place.
720 if (WARN_ON_ONCE(htotal
== 0))
723 ret
= (latency
* pixel_rate
) / (htotal
* 10000);
724 ret
= (ret
+ 1) * width
* cpp
;
730 * intel_calculate_wm - calculate watermark level
731 * @pixel_rate: pixel clock
732 * @wm: chip FIFO params
733 * @fifo_size: size of the FIFO buffer
734 * @cpp: bytes per pixel
735 * @latency_ns: memory latency for the platform
737 * Calculate the watermark level (the level at which the display plane will
738 * start fetching from memory again). Each chip has a different display
739 * FIFO size and allocation, so the caller needs to figure that out and pass
740 * in the correct intel_watermark_params structure.
742 * As the pixel clock runs, the FIFO will be drained at a rate that depends
743 * on the pixel size. When it reaches the watermark level, it'll start
744 * fetching FIFO line sized based chunks from memory until the FIFO fills
745 * past the watermark point. If the FIFO drains completely, a FIFO underrun
746 * will occur, and a display engine hang could result.
748 static unsigned int intel_calculate_wm(int pixel_rate
,
749 const struct intel_watermark_params
*wm
,
750 int fifo_size
, int cpp
,
751 unsigned int latency_ns
)
753 int entries
, wm_size
;
756 * Note: we need to make sure we don't overflow for various clock &
758 * clocks go from a few thousand to several hundred thousand.
759 * latency is usually a few thousand
761 entries
= intel_wm_method1(pixel_rate
, cpp
,
763 entries
= DIV_ROUND_UP(entries
, wm
->cacheline_size
) +
765 DRM_DEBUG_KMS("FIFO entries required for mode: %d\n", entries
);
767 wm_size
= fifo_size
- entries
;
768 DRM_DEBUG_KMS("FIFO watermark level: %d\n", wm_size
);
770 /* Don't promote wm_size to unsigned... */
771 if (wm_size
> wm
->max_wm
)
772 wm_size
= wm
->max_wm
;
774 wm_size
= wm
->default_wm
;
777 * Bspec seems to indicate that the value shouldn't be lower than
778 * 'burst size + 1'. Certainly 830 is quite unhappy with low values.
779 * Lets go for 8 which is the burst size since certain platforms
780 * already use a hardcoded 8 (which is what the spec says should be
789 static bool is_disabling(int old
, int new, int threshold
)
791 return old
>= threshold
&& new < threshold
;
794 static bool is_enabling(int old
, int new, int threshold
)
796 return old
< threshold
&& new >= threshold
;
799 static int intel_wm_num_levels(struct drm_i915_private
*dev_priv
)
801 return dev_priv
->wm
.max_level
+ 1;
804 static bool intel_wm_plane_visible(const struct intel_crtc_state
*crtc_state
,
805 const struct intel_plane_state
*plane_state
)
807 struct intel_plane
*plane
= to_intel_plane(plane_state
->base
.plane
);
809 /* FIXME check the 'enable' instead */
810 if (!crtc_state
->base
.active
)
814 * Treat cursor with fb as always visible since cursor updates
815 * can happen faster than the vrefresh rate, and the current
816 * watermark code doesn't handle that correctly. Cursor updates
817 * which set/clear the fb or change the cursor size are going
818 * to get throttled by intel_legacy_cursor_update() to work
819 * around this problem with the watermark code.
821 if (plane
->id
== PLANE_CURSOR
)
822 return plane_state
->base
.fb
!= NULL
;
824 return plane_state
->base
.visible
;
827 static struct intel_crtc
*single_enabled_crtc(struct drm_i915_private
*dev_priv
)
829 struct intel_crtc
*crtc
, *enabled
= NULL
;
831 for_each_intel_crtc(&dev_priv
->drm
, crtc
) {
832 if (intel_crtc_active(crtc
)) {
842 static void pineview_update_wm(struct intel_crtc
*unused_crtc
)
844 struct drm_i915_private
*dev_priv
= to_i915(unused_crtc
->base
.dev
);
845 struct intel_crtc
*crtc
;
846 const struct cxsr_latency
*latency
;
850 latency
= intel_get_cxsr_latency(IS_PINEVIEW_G(dev_priv
),
855 DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n");
856 intel_set_memory_cxsr(dev_priv
, false);
860 crtc
= single_enabled_crtc(dev_priv
);
862 const struct drm_display_mode
*adjusted_mode
=
863 &crtc
->config
->base
.adjusted_mode
;
864 const struct drm_framebuffer
*fb
=
865 crtc
->base
.primary
->state
->fb
;
866 int cpp
= fb
->format
->cpp
[0];
867 int clock
= adjusted_mode
->crtc_clock
;
870 wm
= intel_calculate_wm(clock
, &pineview_display_wm
,
871 pineview_display_wm
.fifo_size
,
872 cpp
, latency
->display_sr
);
873 reg
= I915_READ(DSPFW1
);
874 reg
&= ~DSPFW_SR_MASK
;
875 reg
|= FW_WM(wm
, SR
);
876 I915_WRITE(DSPFW1
, reg
);
877 DRM_DEBUG_KMS("DSPFW1 register is %x\n", reg
);
880 wm
= intel_calculate_wm(clock
, &pineview_cursor_wm
,
881 pineview_display_wm
.fifo_size
,
882 4, latency
->cursor_sr
);
883 reg
= I915_READ(DSPFW3
);
884 reg
&= ~DSPFW_CURSOR_SR_MASK
;
885 reg
|= FW_WM(wm
, CURSOR_SR
);
886 I915_WRITE(DSPFW3
, reg
);
888 /* Display HPLL off SR */
889 wm
= intel_calculate_wm(clock
, &pineview_display_hplloff_wm
,
890 pineview_display_hplloff_wm
.fifo_size
,
891 cpp
, latency
->display_hpll_disable
);
892 reg
= I915_READ(DSPFW3
);
893 reg
&= ~DSPFW_HPLL_SR_MASK
;
894 reg
|= FW_WM(wm
, HPLL_SR
);
895 I915_WRITE(DSPFW3
, reg
);
897 /* cursor HPLL off SR */
898 wm
= intel_calculate_wm(clock
, &pineview_cursor_hplloff_wm
,
899 pineview_display_hplloff_wm
.fifo_size
,
900 4, latency
->cursor_hpll_disable
);
901 reg
= I915_READ(DSPFW3
);
902 reg
&= ~DSPFW_HPLL_CURSOR_MASK
;
903 reg
|= FW_WM(wm
, HPLL_CURSOR
);
904 I915_WRITE(DSPFW3
, reg
);
905 DRM_DEBUG_KMS("DSPFW3 register is %x\n", reg
);
907 intel_set_memory_cxsr(dev_priv
, true);
909 intel_set_memory_cxsr(dev_priv
, false);
914 * Documentation says:
915 * "If the line size is small, the TLB fetches can get in the way of the
916 * data fetches, causing some lag in the pixel data return which is not
917 * accounted for in the above formulas. The following adjustment only
918 * needs to be applied if eight whole lines fit in the buffer at once.
919 * The WM is adjusted upwards by the difference between the FIFO size
920 * and the size of 8 whole lines. This adjustment is always performed
921 * in the actual pixel depth regardless of whether FBC is enabled or not."
923 static unsigned int g4x_tlb_miss_wa(int fifo_size
, int width
, int cpp
)
925 int tlb_miss
= fifo_size
* 64 - width
* cpp
* 8;
927 return max(0, tlb_miss
);
930 static void g4x_write_wm_values(struct drm_i915_private
*dev_priv
,
931 const struct g4x_wm_values
*wm
)
935 for_each_pipe(dev_priv
, pipe
)
936 trace_g4x_wm(intel_get_crtc_for_pipe(dev_priv
, pipe
), wm
);
939 FW_WM(wm
->sr
.plane
, SR
) |
940 FW_WM(wm
->pipe
[PIPE_B
].plane
[PLANE_CURSOR
], CURSORB
) |
941 FW_WM(wm
->pipe
[PIPE_B
].plane
[PLANE_PRIMARY
], PLANEB
) |
942 FW_WM(wm
->pipe
[PIPE_A
].plane
[PLANE_PRIMARY
], PLANEA
));
944 (wm
->fbc_en
? DSPFW_FBC_SR_EN
: 0) |
945 FW_WM(wm
->sr
.fbc
, FBC_SR
) |
946 FW_WM(wm
->hpll
.fbc
, FBC_HPLL_SR
) |
947 FW_WM(wm
->pipe
[PIPE_B
].plane
[PLANE_SPRITE0
], SPRITEB
) |
948 FW_WM(wm
->pipe
[PIPE_A
].plane
[PLANE_CURSOR
], CURSORA
) |
949 FW_WM(wm
->pipe
[PIPE_A
].plane
[PLANE_SPRITE0
], SPRITEA
));
951 (wm
->hpll_en
? DSPFW_HPLL_SR_EN
: 0) |
952 FW_WM(wm
->sr
.cursor
, CURSOR_SR
) |
953 FW_WM(wm
->hpll
.cursor
, HPLL_CURSOR
) |
954 FW_WM(wm
->hpll
.plane
, HPLL_SR
));
956 POSTING_READ(DSPFW1
);
959 #define FW_WM_VLV(value, plane) \
960 (((value) << DSPFW_ ## plane ## _SHIFT) & DSPFW_ ## plane ## _MASK_VLV)
962 static void vlv_write_wm_values(struct drm_i915_private
*dev_priv
,
963 const struct vlv_wm_values
*wm
)
967 for_each_pipe(dev_priv
, pipe
) {
968 trace_vlv_wm(intel_get_crtc_for_pipe(dev_priv
, pipe
), wm
);
970 I915_WRITE(VLV_DDL(pipe
),
971 (wm
->ddl
[pipe
].plane
[PLANE_CURSOR
] << DDL_CURSOR_SHIFT
) |
972 (wm
->ddl
[pipe
].plane
[PLANE_SPRITE1
] << DDL_SPRITE_SHIFT(1)) |
973 (wm
->ddl
[pipe
].plane
[PLANE_SPRITE0
] << DDL_SPRITE_SHIFT(0)) |
974 (wm
->ddl
[pipe
].plane
[PLANE_PRIMARY
] << DDL_PLANE_SHIFT
));
978 * Zero the (unused) WM1 watermarks, and also clear all the
979 * high order bits so that there are no out of bounds values
980 * present in the registers during the reprogramming.
982 I915_WRITE(DSPHOWM
, 0);
983 I915_WRITE(DSPHOWM1
, 0);
984 I915_WRITE(DSPFW4
, 0);
985 I915_WRITE(DSPFW5
, 0);
986 I915_WRITE(DSPFW6
, 0);
989 FW_WM(wm
->sr
.plane
, SR
) |
990 FW_WM(wm
->pipe
[PIPE_B
].plane
[PLANE_CURSOR
], CURSORB
) |
991 FW_WM_VLV(wm
->pipe
[PIPE_B
].plane
[PLANE_PRIMARY
], PLANEB
) |
992 FW_WM_VLV(wm
->pipe
[PIPE_A
].plane
[PLANE_PRIMARY
], PLANEA
));
994 FW_WM_VLV(wm
->pipe
[PIPE_A
].plane
[PLANE_SPRITE1
], SPRITEB
) |
995 FW_WM(wm
->pipe
[PIPE_A
].plane
[PLANE_CURSOR
], CURSORA
) |
996 FW_WM_VLV(wm
->pipe
[PIPE_A
].plane
[PLANE_SPRITE0
], SPRITEA
));
998 FW_WM(wm
->sr
.cursor
, CURSOR_SR
));
1000 if (IS_CHERRYVIEW(dev_priv
)) {
1001 I915_WRITE(DSPFW7_CHV
,
1002 FW_WM_VLV(wm
->pipe
[PIPE_B
].plane
[PLANE_SPRITE1
], SPRITED
) |
1003 FW_WM_VLV(wm
->pipe
[PIPE_B
].plane
[PLANE_SPRITE0
], SPRITEC
));
1004 I915_WRITE(DSPFW8_CHV
,
1005 FW_WM_VLV(wm
->pipe
[PIPE_C
].plane
[PLANE_SPRITE1
], SPRITEF
) |
1006 FW_WM_VLV(wm
->pipe
[PIPE_C
].plane
[PLANE_SPRITE0
], SPRITEE
));
1007 I915_WRITE(DSPFW9_CHV
,
1008 FW_WM_VLV(wm
->pipe
[PIPE_C
].plane
[PLANE_PRIMARY
], PLANEC
) |
1009 FW_WM(wm
->pipe
[PIPE_C
].plane
[PLANE_CURSOR
], CURSORC
));
1011 FW_WM(wm
->sr
.plane
>> 9, SR_HI
) |
1012 FW_WM(wm
->pipe
[PIPE_C
].plane
[PLANE_SPRITE1
] >> 8, SPRITEF_HI
) |
1013 FW_WM(wm
->pipe
[PIPE_C
].plane
[PLANE_SPRITE0
] >> 8, SPRITEE_HI
) |
1014 FW_WM(wm
->pipe
[PIPE_C
].plane
[PLANE_PRIMARY
] >> 8, PLANEC_HI
) |
1015 FW_WM(wm
->pipe
[PIPE_B
].plane
[PLANE_SPRITE1
] >> 8, SPRITED_HI
) |
1016 FW_WM(wm
->pipe
[PIPE_B
].plane
[PLANE_SPRITE0
] >> 8, SPRITEC_HI
) |
1017 FW_WM(wm
->pipe
[PIPE_B
].plane
[PLANE_PRIMARY
] >> 8, PLANEB_HI
) |
1018 FW_WM(wm
->pipe
[PIPE_A
].plane
[PLANE_SPRITE1
] >> 8, SPRITEB_HI
) |
1019 FW_WM(wm
->pipe
[PIPE_A
].plane
[PLANE_SPRITE0
] >> 8, SPRITEA_HI
) |
1020 FW_WM(wm
->pipe
[PIPE_A
].plane
[PLANE_PRIMARY
] >> 8, PLANEA_HI
));
1023 FW_WM_VLV(wm
->pipe
[PIPE_B
].plane
[PLANE_SPRITE1
], SPRITED
) |
1024 FW_WM_VLV(wm
->pipe
[PIPE_B
].plane
[PLANE_SPRITE0
], SPRITEC
));
1026 FW_WM(wm
->sr
.plane
>> 9, SR_HI
) |
1027 FW_WM(wm
->pipe
[PIPE_B
].plane
[PLANE_SPRITE1
] >> 8, SPRITED_HI
) |
1028 FW_WM(wm
->pipe
[PIPE_B
].plane
[PLANE_SPRITE0
] >> 8, SPRITEC_HI
) |
1029 FW_WM(wm
->pipe
[PIPE_B
].plane
[PLANE_PRIMARY
] >> 8, PLANEB_HI
) |
1030 FW_WM(wm
->pipe
[PIPE_A
].plane
[PLANE_SPRITE1
] >> 8, SPRITEB_HI
) |
1031 FW_WM(wm
->pipe
[PIPE_A
].plane
[PLANE_SPRITE0
] >> 8, SPRITEA_HI
) |
1032 FW_WM(wm
->pipe
[PIPE_A
].plane
[PLANE_PRIMARY
] >> 8, PLANEA_HI
));
1035 POSTING_READ(DSPFW1
);
1040 static void g4x_setup_wm_latency(struct drm_i915_private
*dev_priv
)
1042 /* all latencies in usec */
1043 dev_priv
->wm
.pri_latency
[G4X_WM_LEVEL_NORMAL
] = 5;
1044 dev_priv
->wm
.pri_latency
[G4X_WM_LEVEL_SR
] = 12;
1045 dev_priv
->wm
.pri_latency
[G4X_WM_LEVEL_HPLL
] = 35;
1047 dev_priv
->wm
.max_level
= G4X_WM_LEVEL_HPLL
;
1050 static int g4x_plane_fifo_size(enum plane_id plane_id
, int level
)
1053 * DSPCNTR[13] supposedly controls whether the
1054 * primary plane can use the FIFO space otherwise
1055 * reserved for the sprite plane. It's not 100% clear
1056 * what the actual FIFO size is, but it looks like we
1057 * can happily set both primary and sprite watermarks
1058 * up to 127 cachelines. So that would seem to mean
1059 * that either DSPCNTR[13] doesn't do anything, or that
1060 * the total FIFO is >= 256 cachelines in size. Either
1061 * way, we don't seem to have to worry about this
1062 * repartitioning as the maximum watermark value the
1063 * register can hold for each plane is lower than the
1064 * minimum FIFO size.
1070 return level
== G4X_WM_LEVEL_NORMAL
? 127 : 511;
1072 return level
== G4X_WM_LEVEL_NORMAL
? 127 : 0;
1074 MISSING_CASE(plane_id
);
1079 static int g4x_fbc_fifo_size(int level
)
1082 case G4X_WM_LEVEL_SR
:
1084 case G4X_WM_LEVEL_HPLL
:
1087 MISSING_CASE(level
);
1092 static u16
g4x_compute_wm(const struct intel_crtc_state
*crtc_state
,
1093 const struct intel_plane_state
*plane_state
,
1096 struct intel_plane
*plane
= to_intel_plane(plane_state
->base
.plane
);
1097 struct drm_i915_private
*dev_priv
= to_i915(plane
->base
.dev
);
1098 const struct drm_display_mode
*adjusted_mode
=
1099 &crtc_state
->base
.adjusted_mode
;
1100 unsigned int latency
= dev_priv
->wm
.pri_latency
[level
] * 10;
1101 unsigned int clock
, htotal
, cpp
, width
, wm
;
1106 if (!intel_wm_plane_visible(crtc_state
, plane_state
))
1110 * Not 100% sure which way ELK should go here as the
1111 * spec only says CL/CTG should assume 32bpp and BW
1112 * doesn't need to. But as these things followed the
1113 * mobile vs. desktop lines on gen3 as well, let's
1114 * assume ELK doesn't need this.
1116 * The spec also fails to list such a restriction for
1117 * the HPLL watermark, which seems a little strange.
1118 * Let's use 32bpp for the HPLL watermark as well.
1120 if (IS_GM45(dev_priv
) && plane
->id
== PLANE_PRIMARY
&&
1121 level
!= G4X_WM_LEVEL_NORMAL
)
1124 cpp
= plane_state
->base
.fb
->format
->cpp
[0];
1126 clock
= adjusted_mode
->crtc_clock
;
1127 htotal
= adjusted_mode
->crtc_htotal
;
1129 if (plane
->id
== PLANE_CURSOR
)
1130 width
= plane_state
->base
.crtc_w
;
1132 width
= drm_rect_width(&plane_state
->base
.dst
);
1134 if (plane
->id
== PLANE_CURSOR
) {
1135 wm
= intel_wm_method2(clock
, htotal
, width
, cpp
, latency
);
1136 } else if (plane
->id
== PLANE_PRIMARY
&&
1137 level
== G4X_WM_LEVEL_NORMAL
) {
1138 wm
= intel_wm_method1(clock
, cpp
, latency
);
1140 unsigned int small
, large
;
1142 small
= intel_wm_method1(clock
, cpp
, latency
);
1143 large
= intel_wm_method2(clock
, htotal
, width
, cpp
, latency
);
1145 wm
= min(small
, large
);
1148 wm
+= g4x_tlb_miss_wa(g4x_plane_fifo_size(plane
->id
, level
),
1151 wm
= DIV_ROUND_UP(wm
, 64) + 2;
1153 return min_t(unsigned int, wm
, USHRT_MAX
);
1156 static bool g4x_raw_plane_wm_set(struct intel_crtc_state
*crtc_state
,
1157 int level
, enum plane_id plane_id
, u16 value
)
1159 struct drm_i915_private
*dev_priv
= to_i915(crtc_state
->base
.crtc
->dev
);
1162 for (; level
< intel_wm_num_levels(dev_priv
); level
++) {
1163 struct g4x_pipe_wm
*raw
= &crtc_state
->wm
.g4x
.raw
[level
];
1165 dirty
|= raw
->plane
[plane_id
] != value
;
1166 raw
->plane
[plane_id
] = value
;
1172 static bool g4x_raw_fbc_wm_set(struct intel_crtc_state
*crtc_state
,
1173 int level
, u16 value
)
1175 struct drm_i915_private
*dev_priv
= to_i915(crtc_state
->base
.crtc
->dev
);
1178 /* NORMAL level doesn't have an FBC watermark */
1179 level
= max(level
, G4X_WM_LEVEL_SR
);
1181 for (; level
< intel_wm_num_levels(dev_priv
); level
++) {
1182 struct g4x_pipe_wm
*raw
= &crtc_state
->wm
.g4x
.raw
[level
];
1184 dirty
|= raw
->fbc
!= value
;
1191 static u32
ilk_compute_fbc_wm(const struct intel_crtc_state
*cstate
,
1192 const struct intel_plane_state
*pstate
,
1195 static bool g4x_raw_plane_wm_compute(struct intel_crtc_state
*crtc_state
,
1196 const struct intel_plane_state
*plane_state
)
1198 struct intel_plane
*plane
= to_intel_plane(plane_state
->base
.plane
);
1199 int num_levels
= intel_wm_num_levels(to_i915(plane
->base
.dev
));
1200 enum plane_id plane_id
= plane
->id
;
1204 if (!intel_wm_plane_visible(crtc_state
, plane_state
)) {
1205 dirty
|= g4x_raw_plane_wm_set(crtc_state
, 0, plane_id
, 0);
1206 if (plane_id
== PLANE_PRIMARY
)
1207 dirty
|= g4x_raw_fbc_wm_set(crtc_state
, 0, 0);
1211 for (level
= 0; level
< num_levels
; level
++) {
1212 struct g4x_pipe_wm
*raw
= &crtc_state
->wm
.g4x
.raw
[level
];
1215 wm
= g4x_compute_wm(crtc_state
, plane_state
, level
);
1216 max_wm
= g4x_plane_fifo_size(plane_id
, level
);
1221 dirty
|= raw
->plane
[plane_id
] != wm
;
1222 raw
->plane
[plane_id
] = wm
;
1224 if (plane_id
!= PLANE_PRIMARY
||
1225 level
== G4X_WM_LEVEL_NORMAL
)
1228 wm
= ilk_compute_fbc_wm(crtc_state
, plane_state
,
1229 raw
->plane
[plane_id
]);
1230 max_wm
= g4x_fbc_fifo_size(level
);
1233 * FBC wm is not mandatory as we
1234 * can always just disable its use.
1239 dirty
|= raw
->fbc
!= wm
;
1243 /* mark watermarks as invalid */
1244 dirty
|= g4x_raw_plane_wm_set(crtc_state
, level
, plane_id
, USHRT_MAX
);
1246 if (plane_id
== PLANE_PRIMARY
)
1247 dirty
|= g4x_raw_fbc_wm_set(crtc_state
, level
, USHRT_MAX
);
1251 DRM_DEBUG_KMS("%s watermarks: normal=%d, SR=%d, HPLL=%d\n",
1253 crtc_state
->wm
.g4x
.raw
[G4X_WM_LEVEL_NORMAL
].plane
[plane_id
],
1254 crtc_state
->wm
.g4x
.raw
[G4X_WM_LEVEL_SR
].plane
[plane_id
],
1255 crtc_state
->wm
.g4x
.raw
[G4X_WM_LEVEL_HPLL
].plane
[plane_id
]);
1257 if (plane_id
== PLANE_PRIMARY
)
1258 DRM_DEBUG_KMS("FBC watermarks: SR=%d, HPLL=%d\n",
1259 crtc_state
->wm
.g4x
.raw
[G4X_WM_LEVEL_SR
].fbc
,
1260 crtc_state
->wm
.g4x
.raw
[G4X_WM_LEVEL_HPLL
].fbc
);
1266 static bool g4x_raw_plane_wm_is_valid(const struct intel_crtc_state
*crtc_state
,
1267 enum plane_id plane_id
, int level
)
1269 const struct g4x_pipe_wm
*raw
= &crtc_state
->wm
.g4x
.raw
[level
];
1271 return raw
->plane
[plane_id
] <= g4x_plane_fifo_size(plane_id
, level
);
1274 static bool g4x_raw_crtc_wm_is_valid(const struct intel_crtc_state
*crtc_state
,
1277 struct drm_i915_private
*dev_priv
= to_i915(crtc_state
->base
.crtc
->dev
);
1279 if (level
> dev_priv
->wm
.max_level
)
1282 return g4x_raw_plane_wm_is_valid(crtc_state
, PLANE_PRIMARY
, level
) &&
1283 g4x_raw_plane_wm_is_valid(crtc_state
, PLANE_SPRITE0
, level
) &&
1284 g4x_raw_plane_wm_is_valid(crtc_state
, PLANE_CURSOR
, level
);
1287 /* mark all levels starting from 'level' as invalid */
1288 static void g4x_invalidate_wms(struct intel_crtc
*crtc
,
1289 struct g4x_wm_state
*wm_state
, int level
)
1291 if (level
<= G4X_WM_LEVEL_NORMAL
) {
1292 enum plane_id plane_id
;
1294 for_each_plane_id_on_crtc(crtc
, plane_id
)
1295 wm_state
->wm
.plane
[plane_id
] = USHRT_MAX
;
1298 if (level
<= G4X_WM_LEVEL_SR
) {
1299 wm_state
->cxsr
= false;
1300 wm_state
->sr
.cursor
= USHRT_MAX
;
1301 wm_state
->sr
.plane
= USHRT_MAX
;
1302 wm_state
->sr
.fbc
= USHRT_MAX
;
1305 if (level
<= G4X_WM_LEVEL_HPLL
) {
1306 wm_state
->hpll_en
= false;
1307 wm_state
->hpll
.cursor
= USHRT_MAX
;
1308 wm_state
->hpll
.plane
= USHRT_MAX
;
1309 wm_state
->hpll
.fbc
= USHRT_MAX
;
1313 static int g4x_compute_pipe_wm(struct intel_crtc_state
*crtc_state
)
1315 struct intel_crtc
*crtc
= to_intel_crtc(crtc_state
->base
.crtc
);
1316 struct intel_atomic_state
*state
=
1317 to_intel_atomic_state(crtc_state
->base
.state
);
1318 struct g4x_wm_state
*wm_state
= &crtc_state
->wm
.g4x
.optimal
;
1319 int num_active_planes
= hweight32(crtc_state
->active_planes
&
1320 ~BIT(PLANE_CURSOR
));
1321 const struct g4x_pipe_wm
*raw
;
1322 const struct intel_plane_state
*old_plane_state
;
1323 const struct intel_plane_state
*new_plane_state
;
1324 struct intel_plane
*plane
;
1325 enum plane_id plane_id
;
1327 unsigned int dirty
= 0;
1329 for_each_oldnew_intel_plane_in_state(state
, plane
,
1331 new_plane_state
, i
) {
1332 if (new_plane_state
->base
.crtc
!= &crtc
->base
&&
1333 old_plane_state
->base
.crtc
!= &crtc
->base
)
1336 if (g4x_raw_plane_wm_compute(crtc_state
, new_plane_state
))
1337 dirty
|= BIT(plane
->id
);
1343 level
= G4X_WM_LEVEL_NORMAL
;
1344 if (!g4x_raw_crtc_wm_is_valid(crtc_state
, level
))
1347 raw
= &crtc_state
->wm
.g4x
.raw
[level
];
1348 for_each_plane_id_on_crtc(crtc
, plane_id
)
1349 wm_state
->wm
.plane
[plane_id
] = raw
->plane
[plane_id
];
1351 level
= G4X_WM_LEVEL_SR
;
1353 if (!g4x_raw_crtc_wm_is_valid(crtc_state
, level
))
1356 raw
= &crtc_state
->wm
.g4x
.raw
[level
];
1357 wm_state
->sr
.plane
= raw
->plane
[PLANE_PRIMARY
];
1358 wm_state
->sr
.cursor
= raw
->plane
[PLANE_CURSOR
];
1359 wm_state
->sr
.fbc
= raw
->fbc
;
1361 wm_state
->cxsr
= num_active_planes
== BIT(PLANE_PRIMARY
);
1363 level
= G4X_WM_LEVEL_HPLL
;
1365 if (!g4x_raw_crtc_wm_is_valid(crtc_state
, level
))
1368 raw
= &crtc_state
->wm
.g4x
.raw
[level
];
1369 wm_state
->hpll
.plane
= raw
->plane
[PLANE_PRIMARY
];
1370 wm_state
->hpll
.cursor
= raw
->plane
[PLANE_CURSOR
];
1371 wm_state
->hpll
.fbc
= raw
->fbc
;
1373 wm_state
->hpll_en
= wm_state
->cxsr
;
1378 if (level
== G4X_WM_LEVEL_NORMAL
)
1381 /* invalidate the higher levels */
1382 g4x_invalidate_wms(crtc
, wm_state
, level
);
1385 * Determine if the FBC watermark(s) can be used. IF
1386 * this isn't the case we prefer to disable the FBC
1387 ( watermark(s) rather than disable the SR/HPLL
1388 * level(s) entirely.
1390 wm_state
->fbc_en
= level
> G4X_WM_LEVEL_NORMAL
;
1392 if (level
>= G4X_WM_LEVEL_SR
&&
1393 wm_state
->sr
.fbc
> g4x_fbc_fifo_size(G4X_WM_LEVEL_SR
))
1394 wm_state
->fbc_en
= false;
1395 else if (level
>= G4X_WM_LEVEL_HPLL
&&
1396 wm_state
->hpll
.fbc
> g4x_fbc_fifo_size(G4X_WM_LEVEL_HPLL
))
1397 wm_state
->fbc_en
= false;
1402 static int g4x_compute_intermediate_wm(struct intel_crtc_state
*new_crtc_state
)
1404 struct intel_crtc
*crtc
= to_intel_crtc(new_crtc_state
->base
.crtc
);
1405 struct g4x_wm_state
*intermediate
= &new_crtc_state
->wm
.g4x
.intermediate
;
1406 const struct g4x_wm_state
*optimal
= &new_crtc_state
->wm
.g4x
.optimal
;
1407 struct intel_atomic_state
*intel_state
=
1408 to_intel_atomic_state(new_crtc_state
->base
.state
);
1409 const struct intel_crtc_state
*old_crtc_state
=
1410 intel_atomic_get_old_crtc_state(intel_state
, crtc
);
1411 const struct g4x_wm_state
*active
= &old_crtc_state
->wm
.g4x
.optimal
;
1412 enum plane_id plane_id
;
1414 if (!new_crtc_state
->base
.active
|| drm_atomic_crtc_needs_modeset(&new_crtc_state
->base
)) {
1415 *intermediate
= *optimal
;
1417 intermediate
->cxsr
= false;
1418 intermediate
->hpll_en
= false;
1422 intermediate
->cxsr
= optimal
->cxsr
&& active
->cxsr
&&
1423 !new_crtc_state
->disable_cxsr
;
1424 intermediate
->hpll_en
= optimal
->hpll_en
&& active
->hpll_en
&&
1425 !new_crtc_state
->disable_cxsr
;
1426 intermediate
->fbc_en
= optimal
->fbc_en
&& active
->fbc_en
;
1428 for_each_plane_id_on_crtc(crtc
, plane_id
) {
1429 intermediate
->wm
.plane
[plane_id
] =
1430 max(optimal
->wm
.plane
[plane_id
],
1431 active
->wm
.plane
[plane_id
]);
1433 WARN_ON(intermediate
->wm
.plane
[plane_id
] >
1434 g4x_plane_fifo_size(plane_id
, G4X_WM_LEVEL_NORMAL
));
1437 intermediate
->sr
.plane
= max(optimal
->sr
.plane
,
1439 intermediate
->sr
.cursor
= max(optimal
->sr
.cursor
,
1441 intermediate
->sr
.fbc
= max(optimal
->sr
.fbc
,
1444 intermediate
->hpll
.plane
= max(optimal
->hpll
.plane
,
1445 active
->hpll
.plane
);
1446 intermediate
->hpll
.cursor
= max(optimal
->hpll
.cursor
,
1447 active
->hpll
.cursor
);
1448 intermediate
->hpll
.fbc
= max(optimal
->hpll
.fbc
,
1451 WARN_ON((intermediate
->sr
.plane
>
1452 g4x_plane_fifo_size(PLANE_PRIMARY
, G4X_WM_LEVEL_SR
) ||
1453 intermediate
->sr
.cursor
>
1454 g4x_plane_fifo_size(PLANE_CURSOR
, G4X_WM_LEVEL_SR
)) &&
1455 intermediate
->cxsr
);
1456 WARN_ON((intermediate
->sr
.plane
>
1457 g4x_plane_fifo_size(PLANE_PRIMARY
, G4X_WM_LEVEL_HPLL
) ||
1458 intermediate
->sr
.cursor
>
1459 g4x_plane_fifo_size(PLANE_CURSOR
, G4X_WM_LEVEL_HPLL
)) &&
1460 intermediate
->hpll_en
);
1462 WARN_ON(intermediate
->sr
.fbc
> g4x_fbc_fifo_size(1) &&
1463 intermediate
->fbc_en
&& intermediate
->cxsr
);
1464 WARN_ON(intermediate
->hpll
.fbc
> g4x_fbc_fifo_size(2) &&
1465 intermediate
->fbc_en
&& intermediate
->hpll_en
);
1469 * If our intermediate WM are identical to the final WM, then we can
1470 * omit the post-vblank programming; only update if it's different.
1472 if (memcmp(intermediate
, optimal
, sizeof(*intermediate
)) != 0)
1473 new_crtc_state
->wm
.need_postvbl_update
= true;
1478 static void g4x_merge_wm(struct drm_i915_private
*dev_priv
,
1479 struct g4x_wm_values
*wm
)
1481 struct intel_crtc
*crtc
;
1482 int num_active_crtcs
= 0;
1488 for_each_intel_crtc(&dev_priv
->drm
, crtc
) {
1489 const struct g4x_wm_state
*wm_state
= &crtc
->wm
.active
.g4x
;
1494 if (!wm_state
->cxsr
)
1496 if (!wm_state
->hpll_en
)
1497 wm
->hpll_en
= false;
1498 if (!wm_state
->fbc_en
)
1504 if (num_active_crtcs
!= 1) {
1506 wm
->hpll_en
= false;
1510 for_each_intel_crtc(&dev_priv
->drm
, crtc
) {
1511 const struct g4x_wm_state
*wm_state
= &crtc
->wm
.active
.g4x
;
1512 enum pipe pipe
= crtc
->pipe
;
1514 wm
->pipe
[pipe
] = wm_state
->wm
;
1515 if (crtc
->active
&& wm
->cxsr
)
1516 wm
->sr
= wm_state
->sr
;
1517 if (crtc
->active
&& wm
->hpll_en
)
1518 wm
->hpll
= wm_state
->hpll
;
1522 static void g4x_program_watermarks(struct drm_i915_private
*dev_priv
)
1524 struct g4x_wm_values
*old_wm
= &dev_priv
->wm
.g4x
;
1525 struct g4x_wm_values new_wm
= {};
1527 g4x_merge_wm(dev_priv
, &new_wm
);
1529 if (memcmp(old_wm
, &new_wm
, sizeof(new_wm
)) == 0)
1532 if (is_disabling(old_wm
->cxsr
, new_wm
.cxsr
, true))
1533 _intel_set_memory_cxsr(dev_priv
, false);
1535 g4x_write_wm_values(dev_priv
, &new_wm
);
1537 if (is_enabling(old_wm
->cxsr
, new_wm
.cxsr
, true))
1538 _intel_set_memory_cxsr(dev_priv
, true);
1543 static void g4x_initial_watermarks(struct intel_atomic_state
*state
,
1544 struct intel_crtc_state
*crtc_state
)
1546 struct drm_i915_private
*dev_priv
= to_i915(crtc_state
->base
.crtc
->dev
);
1547 struct intel_crtc
*crtc
= to_intel_crtc(crtc_state
->base
.crtc
);
1549 mutex_lock(&dev_priv
->wm
.wm_mutex
);
1550 crtc
->wm
.active
.g4x
= crtc_state
->wm
.g4x
.intermediate
;
1551 g4x_program_watermarks(dev_priv
);
1552 mutex_unlock(&dev_priv
->wm
.wm_mutex
);
1555 static void g4x_optimize_watermarks(struct intel_atomic_state
*state
,
1556 struct intel_crtc_state
*crtc_state
)
1558 struct drm_i915_private
*dev_priv
= to_i915(crtc_state
->base
.crtc
->dev
);
1559 struct intel_crtc
*intel_crtc
= to_intel_crtc(crtc_state
->base
.crtc
);
1561 if (!crtc_state
->wm
.need_postvbl_update
)
1564 mutex_lock(&dev_priv
->wm
.wm_mutex
);
1565 intel_crtc
->wm
.active
.g4x
= crtc_state
->wm
.g4x
.optimal
;
1566 g4x_program_watermarks(dev_priv
);
1567 mutex_unlock(&dev_priv
->wm
.wm_mutex
);
1570 /* latency must be in 0.1us units. */
1571 static unsigned int vlv_wm_method2(unsigned int pixel_rate
,
1572 unsigned int htotal
,
1575 unsigned int latency
)
1579 ret
= intel_wm_method2(pixel_rate
, htotal
,
1580 width
, cpp
, latency
);
1581 ret
= DIV_ROUND_UP(ret
, 64);
1586 static void vlv_setup_wm_latency(struct drm_i915_private
*dev_priv
)
1588 /* all latencies in usec */
1589 dev_priv
->wm
.pri_latency
[VLV_WM_LEVEL_PM2
] = 3;
1591 dev_priv
->wm
.max_level
= VLV_WM_LEVEL_PM2
;
1593 if (IS_CHERRYVIEW(dev_priv
)) {
1594 dev_priv
->wm
.pri_latency
[VLV_WM_LEVEL_PM5
] = 12;
1595 dev_priv
->wm
.pri_latency
[VLV_WM_LEVEL_DDR_DVFS
] = 33;
1597 dev_priv
->wm
.max_level
= VLV_WM_LEVEL_DDR_DVFS
;
1601 static u16
vlv_compute_wm_level(const struct intel_crtc_state
*crtc_state
,
1602 const struct intel_plane_state
*plane_state
,
1605 struct intel_plane
*plane
= to_intel_plane(plane_state
->base
.plane
);
1606 struct drm_i915_private
*dev_priv
= to_i915(plane
->base
.dev
);
1607 const struct drm_display_mode
*adjusted_mode
=
1608 &crtc_state
->base
.adjusted_mode
;
1609 unsigned int clock
, htotal
, cpp
, width
, wm
;
1611 if (dev_priv
->wm
.pri_latency
[level
] == 0)
1614 if (!intel_wm_plane_visible(crtc_state
, plane_state
))
1617 cpp
= plane_state
->base
.fb
->format
->cpp
[0];
1618 clock
= adjusted_mode
->crtc_clock
;
1619 htotal
= adjusted_mode
->crtc_htotal
;
1620 width
= crtc_state
->pipe_src_w
;
1622 if (plane
->id
== PLANE_CURSOR
) {
1624 * FIXME the formula gives values that are
1625 * too big for the cursor FIFO, and hence we
1626 * would never be able to use cursors. For
1627 * now just hardcode the watermark.
1631 wm
= vlv_wm_method2(clock
, htotal
, width
, cpp
,
1632 dev_priv
->wm
.pri_latency
[level
] * 10);
1635 return min_t(unsigned int, wm
, USHRT_MAX
);
1638 static bool vlv_need_sprite0_fifo_workaround(unsigned int active_planes
)
1640 return (active_planes
& (BIT(PLANE_SPRITE0
) |
1641 BIT(PLANE_SPRITE1
))) == BIT(PLANE_SPRITE1
);
1644 static int vlv_compute_fifo(struct intel_crtc_state
*crtc_state
)
1646 struct intel_crtc
*crtc
= to_intel_crtc(crtc_state
->base
.crtc
);
1647 const struct g4x_pipe_wm
*raw
=
1648 &crtc_state
->wm
.vlv
.raw
[VLV_WM_LEVEL_PM2
];
1649 struct vlv_fifo_state
*fifo_state
= &crtc_state
->wm
.vlv
.fifo_state
;
1650 unsigned int active_planes
= crtc_state
->active_planes
& ~BIT(PLANE_CURSOR
);
1651 int num_active_planes
= hweight32(active_planes
);
1652 const int fifo_size
= 511;
1653 int fifo_extra
, fifo_left
= fifo_size
;
1654 int sprite0_fifo_extra
= 0;
1655 unsigned int total_rate
;
1656 enum plane_id plane_id
;
1659 * When enabling sprite0 after sprite1 has already been enabled
1660 * we tend to get an underrun unless sprite0 already has some
1661 * FIFO space allcoated. Hence we always allocate at least one
1662 * cacheline for sprite0 whenever sprite1 is enabled.
1664 * All other plane enable sequences appear immune to this problem.
1666 if (vlv_need_sprite0_fifo_workaround(active_planes
))
1667 sprite0_fifo_extra
= 1;
1669 total_rate
= raw
->plane
[PLANE_PRIMARY
] +
1670 raw
->plane
[PLANE_SPRITE0
] +
1671 raw
->plane
[PLANE_SPRITE1
] +
1674 if (total_rate
> fifo_size
)
1677 if (total_rate
== 0)
1680 for_each_plane_id_on_crtc(crtc
, plane_id
) {
1683 if ((active_planes
& BIT(plane_id
)) == 0) {
1684 fifo_state
->plane
[plane_id
] = 0;
1688 rate
= raw
->plane
[plane_id
];
1689 fifo_state
->plane
[plane_id
] = fifo_size
* rate
/ total_rate
;
1690 fifo_left
-= fifo_state
->plane
[plane_id
];
1693 fifo_state
->plane
[PLANE_SPRITE0
] += sprite0_fifo_extra
;
1694 fifo_left
-= sprite0_fifo_extra
;
1696 fifo_state
->plane
[PLANE_CURSOR
] = 63;
1698 fifo_extra
= DIV_ROUND_UP(fifo_left
, num_active_planes
?: 1);
1700 /* spread the remainder evenly */
1701 for_each_plane_id_on_crtc(crtc
, plane_id
) {
1707 if ((active_planes
& BIT(plane_id
)) == 0)
1710 plane_extra
= min(fifo_extra
, fifo_left
);
1711 fifo_state
->plane
[plane_id
] += plane_extra
;
1712 fifo_left
-= plane_extra
;
1715 WARN_ON(active_planes
!= 0 && fifo_left
!= 0);
1717 /* give it all to the first plane if none are active */
1718 if (active_planes
== 0) {
1719 WARN_ON(fifo_left
!= fifo_size
);
1720 fifo_state
->plane
[PLANE_PRIMARY
] = fifo_left
;
1726 /* mark all levels starting from 'level' as invalid */
1727 static void vlv_invalidate_wms(struct intel_crtc
*crtc
,
1728 struct vlv_wm_state
*wm_state
, int level
)
1730 struct drm_i915_private
*dev_priv
= to_i915(crtc
->base
.dev
);
1732 for (; level
< intel_wm_num_levels(dev_priv
); level
++) {
1733 enum plane_id plane_id
;
1735 for_each_plane_id_on_crtc(crtc
, plane_id
)
1736 wm_state
->wm
[level
].plane
[plane_id
] = USHRT_MAX
;
1738 wm_state
->sr
[level
].cursor
= USHRT_MAX
;
1739 wm_state
->sr
[level
].plane
= USHRT_MAX
;
1743 static u16
vlv_invert_wm_value(u16 wm
, u16 fifo_size
)
1748 return fifo_size
- wm
;
1752 * Starting from 'level' set all higher
1753 * levels to 'value' in the "raw" watermarks.
1755 static bool vlv_raw_plane_wm_set(struct intel_crtc_state
*crtc_state
,
1756 int level
, enum plane_id plane_id
, u16 value
)
1758 struct drm_i915_private
*dev_priv
= to_i915(crtc_state
->base
.crtc
->dev
);
1759 int num_levels
= intel_wm_num_levels(dev_priv
);
1762 for (; level
< num_levels
; level
++) {
1763 struct g4x_pipe_wm
*raw
= &crtc_state
->wm
.vlv
.raw
[level
];
1765 dirty
|= raw
->plane
[plane_id
] != value
;
1766 raw
->plane
[plane_id
] = value
;
1772 static bool vlv_raw_plane_wm_compute(struct intel_crtc_state
*crtc_state
,
1773 const struct intel_plane_state
*plane_state
)
1775 struct intel_plane
*plane
= to_intel_plane(plane_state
->base
.plane
);
1776 enum plane_id plane_id
= plane
->id
;
1777 int num_levels
= intel_wm_num_levels(to_i915(plane
->base
.dev
));
1781 if (!intel_wm_plane_visible(crtc_state
, plane_state
)) {
1782 dirty
|= vlv_raw_plane_wm_set(crtc_state
, 0, plane_id
, 0);
1786 for (level
= 0; level
< num_levels
; level
++) {
1787 struct g4x_pipe_wm
*raw
= &crtc_state
->wm
.vlv
.raw
[level
];
1788 int wm
= vlv_compute_wm_level(crtc_state
, plane_state
, level
);
1789 int max_wm
= plane_id
== PLANE_CURSOR
? 63 : 511;
1794 dirty
|= raw
->plane
[plane_id
] != wm
;
1795 raw
->plane
[plane_id
] = wm
;
1798 /* mark all higher levels as invalid */
1799 dirty
|= vlv_raw_plane_wm_set(crtc_state
, level
, plane_id
, USHRT_MAX
);
1803 DRM_DEBUG_KMS("%s watermarks: PM2=%d, PM5=%d, DDR DVFS=%d\n",
1805 crtc_state
->wm
.vlv
.raw
[VLV_WM_LEVEL_PM2
].plane
[plane_id
],
1806 crtc_state
->wm
.vlv
.raw
[VLV_WM_LEVEL_PM5
].plane
[plane_id
],
1807 crtc_state
->wm
.vlv
.raw
[VLV_WM_LEVEL_DDR_DVFS
].plane
[plane_id
]);
1812 static bool vlv_raw_plane_wm_is_valid(const struct intel_crtc_state
*crtc_state
,
1813 enum plane_id plane_id
, int level
)
1815 const struct g4x_pipe_wm
*raw
=
1816 &crtc_state
->wm
.vlv
.raw
[level
];
1817 const struct vlv_fifo_state
*fifo_state
=
1818 &crtc_state
->wm
.vlv
.fifo_state
;
1820 return raw
->plane
[plane_id
] <= fifo_state
->plane
[plane_id
];
1823 static bool vlv_raw_crtc_wm_is_valid(const struct intel_crtc_state
*crtc_state
, int level
)
1825 return vlv_raw_plane_wm_is_valid(crtc_state
, PLANE_PRIMARY
, level
) &&
1826 vlv_raw_plane_wm_is_valid(crtc_state
, PLANE_SPRITE0
, level
) &&
1827 vlv_raw_plane_wm_is_valid(crtc_state
, PLANE_SPRITE1
, level
) &&
1828 vlv_raw_plane_wm_is_valid(crtc_state
, PLANE_CURSOR
, level
);
1831 static int vlv_compute_pipe_wm(struct intel_crtc_state
*crtc_state
)
1833 struct intel_crtc
*crtc
= to_intel_crtc(crtc_state
->base
.crtc
);
1834 struct drm_i915_private
*dev_priv
= to_i915(crtc
->base
.dev
);
1835 struct intel_atomic_state
*state
=
1836 to_intel_atomic_state(crtc_state
->base
.state
);
1837 struct vlv_wm_state
*wm_state
= &crtc_state
->wm
.vlv
.optimal
;
1838 const struct vlv_fifo_state
*fifo_state
=
1839 &crtc_state
->wm
.vlv
.fifo_state
;
1840 int num_active_planes
= hweight32(crtc_state
->active_planes
&
1841 ~BIT(PLANE_CURSOR
));
1842 bool needs_modeset
= drm_atomic_crtc_needs_modeset(&crtc_state
->base
);
1843 const struct intel_plane_state
*old_plane_state
;
1844 const struct intel_plane_state
*new_plane_state
;
1845 struct intel_plane
*plane
;
1846 enum plane_id plane_id
;
1848 unsigned int dirty
= 0;
1850 for_each_oldnew_intel_plane_in_state(state
, plane
,
1852 new_plane_state
, i
) {
1853 if (new_plane_state
->base
.crtc
!= &crtc
->base
&&
1854 old_plane_state
->base
.crtc
!= &crtc
->base
)
1857 if (vlv_raw_plane_wm_compute(crtc_state
, new_plane_state
))
1858 dirty
|= BIT(plane
->id
);
1862 * DSPARB registers may have been reset due to the
1863 * power well being turned off. Make sure we restore
1864 * them to a consistent state even if no primary/sprite
1865 * planes are initially active.
1868 crtc_state
->fifo_changed
= true;
1873 /* cursor changes don't warrant a FIFO recompute */
1874 if (dirty
& ~BIT(PLANE_CURSOR
)) {
1875 const struct intel_crtc_state
*old_crtc_state
=
1876 intel_atomic_get_old_crtc_state(state
, crtc
);
1877 const struct vlv_fifo_state
*old_fifo_state
=
1878 &old_crtc_state
->wm
.vlv
.fifo_state
;
1880 ret
= vlv_compute_fifo(crtc_state
);
1884 if (needs_modeset
||
1885 memcmp(old_fifo_state
, fifo_state
,
1886 sizeof(*fifo_state
)) != 0)
1887 crtc_state
->fifo_changed
= true;
1890 /* initially allow all levels */
1891 wm_state
->num_levels
= intel_wm_num_levels(dev_priv
);
1893 * Note that enabling cxsr with no primary/sprite planes
1894 * enabled can wedge the pipe. Hence we only allow cxsr
1895 * with exactly one enabled primary/sprite plane.
1897 wm_state
->cxsr
= crtc
->pipe
!= PIPE_C
&& num_active_planes
== 1;
1899 for (level
= 0; level
< wm_state
->num_levels
; level
++) {
1900 const struct g4x_pipe_wm
*raw
= &crtc_state
->wm
.vlv
.raw
[level
];
1901 const int sr_fifo_size
= INTEL_INFO(dev_priv
)->num_pipes
* 512 - 1;
1903 if (!vlv_raw_crtc_wm_is_valid(crtc_state
, level
))
1906 for_each_plane_id_on_crtc(crtc
, plane_id
) {
1907 wm_state
->wm
[level
].plane
[plane_id
] =
1908 vlv_invert_wm_value(raw
->plane
[plane_id
],
1909 fifo_state
->plane
[plane_id
]);
1912 wm_state
->sr
[level
].plane
=
1913 vlv_invert_wm_value(max3(raw
->plane
[PLANE_PRIMARY
],
1914 raw
->plane
[PLANE_SPRITE0
],
1915 raw
->plane
[PLANE_SPRITE1
]),
1918 wm_state
->sr
[level
].cursor
=
1919 vlv_invert_wm_value(raw
->plane
[PLANE_CURSOR
],
1926 /* limit to only levels we can actually handle */
1927 wm_state
->num_levels
= level
;
1929 /* invalidate the higher levels */
1930 vlv_invalidate_wms(crtc
, wm_state
, level
);
1935 #define VLV_FIFO(plane, value) \
1936 (((value) << DSPARB_ ## plane ## _SHIFT_VLV) & DSPARB_ ## plane ## _MASK_VLV)
1938 static void vlv_atomic_update_fifo(struct intel_atomic_state
*state
,
1939 struct intel_crtc_state
*crtc_state
)
1941 struct intel_crtc
*crtc
= to_intel_crtc(crtc_state
->base
.crtc
);
1942 struct drm_i915_private
*dev_priv
= to_i915(crtc
->base
.dev
);
1943 const struct vlv_fifo_state
*fifo_state
=
1944 &crtc_state
->wm
.vlv
.fifo_state
;
1945 int sprite0_start
, sprite1_start
, fifo_size
;
1947 if (!crtc_state
->fifo_changed
)
1950 sprite0_start
= fifo_state
->plane
[PLANE_PRIMARY
];
1951 sprite1_start
= fifo_state
->plane
[PLANE_SPRITE0
] + sprite0_start
;
1952 fifo_size
= fifo_state
->plane
[PLANE_SPRITE1
] + sprite1_start
;
1954 WARN_ON(fifo_state
->plane
[PLANE_CURSOR
] != 63);
1955 WARN_ON(fifo_size
!= 511);
1957 trace_vlv_fifo_size(crtc
, sprite0_start
, sprite1_start
, fifo_size
);
1960 * uncore.lock serves a double purpose here. It allows us to
1961 * use the less expensive I915_{READ,WRITE}_FW() functions, and
1962 * it protects the DSPARB registers from getting clobbered by
1963 * parallel updates from multiple pipes.
1965 * intel_pipe_update_start() has already disabled interrupts
1966 * for us, so a plain spin_lock() is sufficient here.
1968 spin_lock(&dev_priv
->uncore
.lock
);
1970 switch (crtc
->pipe
) {
1971 u32 dsparb
, dsparb2
, dsparb3
;
1973 dsparb
= I915_READ_FW(DSPARB
);
1974 dsparb2
= I915_READ_FW(DSPARB2
);
1976 dsparb
&= ~(VLV_FIFO(SPRITEA
, 0xff) |
1977 VLV_FIFO(SPRITEB
, 0xff));
1978 dsparb
|= (VLV_FIFO(SPRITEA
, sprite0_start
) |
1979 VLV_FIFO(SPRITEB
, sprite1_start
));
1981 dsparb2
&= ~(VLV_FIFO(SPRITEA_HI
, 0x1) |
1982 VLV_FIFO(SPRITEB_HI
, 0x1));
1983 dsparb2
|= (VLV_FIFO(SPRITEA_HI
, sprite0_start
>> 8) |
1984 VLV_FIFO(SPRITEB_HI
, sprite1_start
>> 8));
1986 I915_WRITE_FW(DSPARB
, dsparb
);
1987 I915_WRITE_FW(DSPARB2
, dsparb2
);
1990 dsparb
= I915_READ_FW(DSPARB
);
1991 dsparb2
= I915_READ_FW(DSPARB2
);
1993 dsparb
&= ~(VLV_FIFO(SPRITEC
, 0xff) |
1994 VLV_FIFO(SPRITED
, 0xff));
1995 dsparb
|= (VLV_FIFO(SPRITEC
, sprite0_start
) |
1996 VLV_FIFO(SPRITED
, sprite1_start
));
1998 dsparb2
&= ~(VLV_FIFO(SPRITEC_HI
, 0xff) |
1999 VLV_FIFO(SPRITED_HI
, 0xff));
2000 dsparb2
|= (VLV_FIFO(SPRITEC_HI
, sprite0_start
>> 8) |
2001 VLV_FIFO(SPRITED_HI
, sprite1_start
>> 8));
2003 I915_WRITE_FW(DSPARB
, dsparb
);
2004 I915_WRITE_FW(DSPARB2
, dsparb2
);
2007 dsparb3
= I915_READ_FW(DSPARB3
);
2008 dsparb2
= I915_READ_FW(DSPARB2
);
2010 dsparb3
&= ~(VLV_FIFO(SPRITEE
, 0xff) |
2011 VLV_FIFO(SPRITEF
, 0xff));
2012 dsparb3
|= (VLV_FIFO(SPRITEE
, sprite0_start
) |
2013 VLV_FIFO(SPRITEF
, sprite1_start
));
2015 dsparb2
&= ~(VLV_FIFO(SPRITEE_HI
, 0xff) |
2016 VLV_FIFO(SPRITEF_HI
, 0xff));
2017 dsparb2
|= (VLV_FIFO(SPRITEE_HI
, sprite0_start
>> 8) |
2018 VLV_FIFO(SPRITEF_HI
, sprite1_start
>> 8));
2020 I915_WRITE_FW(DSPARB3
, dsparb3
);
2021 I915_WRITE_FW(DSPARB2
, dsparb2
);
2027 POSTING_READ_FW(DSPARB
);
2029 spin_unlock(&dev_priv
->uncore
.lock
);
2034 static int vlv_compute_intermediate_wm(struct intel_crtc_state
*new_crtc_state
)
2036 struct intel_crtc
*crtc
= to_intel_crtc(new_crtc_state
->base
.crtc
);
2037 struct vlv_wm_state
*intermediate
= &new_crtc_state
->wm
.vlv
.intermediate
;
2038 const struct vlv_wm_state
*optimal
= &new_crtc_state
->wm
.vlv
.optimal
;
2039 struct intel_atomic_state
*intel_state
=
2040 to_intel_atomic_state(new_crtc_state
->base
.state
);
2041 const struct intel_crtc_state
*old_crtc_state
=
2042 intel_atomic_get_old_crtc_state(intel_state
, crtc
);
2043 const struct vlv_wm_state
*active
= &old_crtc_state
->wm
.vlv
.optimal
;
2046 if (!new_crtc_state
->base
.active
|| drm_atomic_crtc_needs_modeset(&new_crtc_state
->base
)) {
2047 *intermediate
= *optimal
;
2049 intermediate
->cxsr
= false;
2053 intermediate
->num_levels
= min(optimal
->num_levels
, active
->num_levels
);
2054 intermediate
->cxsr
= optimal
->cxsr
&& active
->cxsr
&&
2055 !new_crtc_state
->disable_cxsr
;
2057 for (level
= 0; level
< intermediate
->num_levels
; level
++) {
2058 enum plane_id plane_id
;
2060 for_each_plane_id_on_crtc(crtc
, plane_id
) {
2061 intermediate
->wm
[level
].plane
[plane_id
] =
2062 min(optimal
->wm
[level
].plane
[plane_id
],
2063 active
->wm
[level
].plane
[plane_id
]);
2066 intermediate
->sr
[level
].plane
= min(optimal
->sr
[level
].plane
,
2067 active
->sr
[level
].plane
);
2068 intermediate
->sr
[level
].cursor
= min(optimal
->sr
[level
].cursor
,
2069 active
->sr
[level
].cursor
);
2072 vlv_invalidate_wms(crtc
, intermediate
, level
);
2076 * If our intermediate WM are identical to the final WM, then we can
2077 * omit the post-vblank programming; only update if it's different.
2079 if (memcmp(intermediate
, optimal
, sizeof(*intermediate
)) != 0)
2080 new_crtc_state
->wm
.need_postvbl_update
= true;
2085 static void vlv_merge_wm(struct drm_i915_private
*dev_priv
,
2086 struct vlv_wm_values
*wm
)
2088 struct intel_crtc
*crtc
;
2089 int num_active_crtcs
= 0;
2091 wm
->level
= dev_priv
->wm
.max_level
;
2094 for_each_intel_crtc(&dev_priv
->drm
, crtc
) {
2095 const struct vlv_wm_state
*wm_state
= &crtc
->wm
.active
.vlv
;
2100 if (!wm_state
->cxsr
)
2104 wm
->level
= min_t(int, wm
->level
, wm_state
->num_levels
- 1);
2107 if (num_active_crtcs
!= 1)
2110 if (num_active_crtcs
> 1)
2111 wm
->level
= VLV_WM_LEVEL_PM2
;
2113 for_each_intel_crtc(&dev_priv
->drm
, crtc
) {
2114 const struct vlv_wm_state
*wm_state
= &crtc
->wm
.active
.vlv
;
2115 enum pipe pipe
= crtc
->pipe
;
2117 wm
->pipe
[pipe
] = wm_state
->wm
[wm
->level
];
2118 if (crtc
->active
&& wm
->cxsr
)
2119 wm
->sr
= wm_state
->sr
[wm
->level
];
2121 wm
->ddl
[pipe
].plane
[PLANE_PRIMARY
] = DDL_PRECISION_HIGH
| 2;
2122 wm
->ddl
[pipe
].plane
[PLANE_SPRITE0
] = DDL_PRECISION_HIGH
| 2;
2123 wm
->ddl
[pipe
].plane
[PLANE_SPRITE1
] = DDL_PRECISION_HIGH
| 2;
2124 wm
->ddl
[pipe
].plane
[PLANE_CURSOR
] = DDL_PRECISION_HIGH
| 2;
2128 static void vlv_program_watermarks(struct drm_i915_private
*dev_priv
)
2130 struct vlv_wm_values
*old_wm
= &dev_priv
->wm
.vlv
;
2131 struct vlv_wm_values new_wm
= {};
2133 vlv_merge_wm(dev_priv
, &new_wm
);
2135 if (memcmp(old_wm
, &new_wm
, sizeof(new_wm
)) == 0)
2138 if (is_disabling(old_wm
->level
, new_wm
.level
, VLV_WM_LEVEL_DDR_DVFS
))
2139 chv_set_memory_dvfs(dev_priv
, false);
2141 if (is_disabling(old_wm
->level
, new_wm
.level
, VLV_WM_LEVEL_PM5
))
2142 chv_set_memory_pm5(dev_priv
, false);
2144 if (is_disabling(old_wm
->cxsr
, new_wm
.cxsr
, true))
2145 _intel_set_memory_cxsr(dev_priv
, false);
2147 vlv_write_wm_values(dev_priv
, &new_wm
);
2149 if (is_enabling(old_wm
->cxsr
, new_wm
.cxsr
, true))
2150 _intel_set_memory_cxsr(dev_priv
, true);
2152 if (is_enabling(old_wm
->level
, new_wm
.level
, VLV_WM_LEVEL_PM5
))
2153 chv_set_memory_pm5(dev_priv
, true);
2155 if (is_enabling(old_wm
->level
, new_wm
.level
, VLV_WM_LEVEL_DDR_DVFS
))
2156 chv_set_memory_dvfs(dev_priv
, true);
2161 static void vlv_initial_watermarks(struct intel_atomic_state
*state
,
2162 struct intel_crtc_state
*crtc_state
)
2164 struct drm_i915_private
*dev_priv
= to_i915(crtc_state
->base
.crtc
->dev
);
2165 struct intel_crtc
*crtc
= to_intel_crtc(crtc_state
->base
.crtc
);
2167 mutex_lock(&dev_priv
->wm
.wm_mutex
);
2168 crtc
->wm
.active
.vlv
= crtc_state
->wm
.vlv
.intermediate
;
2169 vlv_program_watermarks(dev_priv
);
2170 mutex_unlock(&dev_priv
->wm
.wm_mutex
);
2173 static void vlv_optimize_watermarks(struct intel_atomic_state
*state
,
2174 struct intel_crtc_state
*crtc_state
)
2176 struct drm_i915_private
*dev_priv
= to_i915(crtc_state
->base
.crtc
->dev
);
2177 struct intel_crtc
*intel_crtc
= to_intel_crtc(crtc_state
->base
.crtc
);
2179 if (!crtc_state
->wm
.need_postvbl_update
)
2182 mutex_lock(&dev_priv
->wm
.wm_mutex
);
2183 intel_crtc
->wm
.active
.vlv
= crtc_state
->wm
.vlv
.optimal
;
2184 vlv_program_watermarks(dev_priv
);
2185 mutex_unlock(&dev_priv
->wm
.wm_mutex
);
2188 static void i965_update_wm(struct intel_crtc
*unused_crtc
)
2190 struct drm_i915_private
*dev_priv
= to_i915(unused_crtc
->base
.dev
);
2191 struct intel_crtc
*crtc
;
2196 /* Calc sr entries for one plane configs */
2197 crtc
= single_enabled_crtc(dev_priv
);
2199 /* self-refresh has much higher latency */
2200 static const int sr_latency_ns
= 12000;
2201 const struct drm_display_mode
*adjusted_mode
=
2202 &crtc
->config
->base
.adjusted_mode
;
2203 const struct drm_framebuffer
*fb
=
2204 crtc
->base
.primary
->state
->fb
;
2205 int clock
= adjusted_mode
->crtc_clock
;
2206 int htotal
= adjusted_mode
->crtc_htotal
;
2207 int hdisplay
= crtc
->config
->pipe_src_w
;
2208 int cpp
= fb
->format
->cpp
[0];
2211 entries
= intel_wm_method2(clock
, htotal
,
2212 hdisplay
, cpp
, sr_latency_ns
/ 100);
2213 entries
= DIV_ROUND_UP(entries
, I915_FIFO_LINE_SIZE
);
2214 srwm
= I965_FIFO_SIZE
- entries
;
2218 DRM_DEBUG_KMS("self-refresh entries: %d, wm: %d\n",
2221 entries
= intel_wm_method2(clock
, htotal
,
2222 crtc
->base
.cursor
->state
->crtc_w
, 4,
2223 sr_latency_ns
/ 100);
2224 entries
= DIV_ROUND_UP(entries
,
2225 i965_cursor_wm_info
.cacheline_size
) +
2226 i965_cursor_wm_info
.guard_size
;
2228 cursor_sr
= i965_cursor_wm_info
.fifo_size
- entries
;
2229 if (cursor_sr
> i965_cursor_wm_info
.max_wm
)
2230 cursor_sr
= i965_cursor_wm_info
.max_wm
;
2232 DRM_DEBUG_KMS("self-refresh watermark: display plane %d "
2233 "cursor %d\n", srwm
, cursor_sr
);
2235 cxsr_enabled
= true;
2237 cxsr_enabled
= false;
2238 /* Turn off self refresh if both pipes are enabled */
2239 intel_set_memory_cxsr(dev_priv
, false);
2242 DRM_DEBUG_KMS("Setting FIFO watermarks - A: 8, B: 8, C: 8, SR %d\n",
2245 /* 965 has limitations... */
2246 I915_WRITE(DSPFW1
, FW_WM(srwm
, SR
) |
2250 I915_WRITE(DSPFW2
, FW_WM(8, CURSORA
) |
2251 FW_WM(8, PLANEC_OLD
));
2252 /* update cursor SR watermark */
2253 I915_WRITE(DSPFW3
, FW_WM(cursor_sr
, CURSOR_SR
));
2256 intel_set_memory_cxsr(dev_priv
, true);
2261 static void i9xx_update_wm(struct intel_crtc
*unused_crtc
)
2263 struct drm_i915_private
*dev_priv
= to_i915(unused_crtc
->base
.dev
);
2264 const struct intel_watermark_params
*wm_info
;
2269 int planea_wm
, planeb_wm
;
2270 struct intel_crtc
*crtc
, *enabled
= NULL
;
2272 if (IS_I945GM(dev_priv
))
2273 wm_info
= &i945_wm_info
;
2274 else if (!IS_GEN(dev_priv
, 2))
2275 wm_info
= &i915_wm_info
;
2277 wm_info
= &i830_a_wm_info
;
2279 fifo_size
= dev_priv
->display
.get_fifo_size(dev_priv
, PLANE_A
);
2280 crtc
= intel_get_crtc_for_plane(dev_priv
, PLANE_A
);
2281 if (intel_crtc_active(crtc
)) {
2282 const struct drm_display_mode
*adjusted_mode
=
2283 &crtc
->config
->base
.adjusted_mode
;
2284 const struct drm_framebuffer
*fb
=
2285 crtc
->base
.primary
->state
->fb
;
2288 if (IS_GEN(dev_priv
, 2))
2291 cpp
= fb
->format
->cpp
[0];
2293 planea_wm
= intel_calculate_wm(adjusted_mode
->crtc_clock
,
2294 wm_info
, fifo_size
, cpp
,
2295 pessimal_latency_ns
);
2298 planea_wm
= fifo_size
- wm_info
->guard_size
;
2299 if (planea_wm
> (long)wm_info
->max_wm
)
2300 planea_wm
= wm_info
->max_wm
;
2303 if (IS_GEN(dev_priv
, 2))
2304 wm_info
= &i830_bc_wm_info
;
2306 fifo_size
= dev_priv
->display
.get_fifo_size(dev_priv
, PLANE_B
);
2307 crtc
= intel_get_crtc_for_plane(dev_priv
, PLANE_B
);
2308 if (intel_crtc_active(crtc
)) {
2309 const struct drm_display_mode
*adjusted_mode
=
2310 &crtc
->config
->base
.adjusted_mode
;
2311 const struct drm_framebuffer
*fb
=
2312 crtc
->base
.primary
->state
->fb
;
2315 if (IS_GEN(dev_priv
, 2))
2318 cpp
= fb
->format
->cpp
[0];
2320 planeb_wm
= intel_calculate_wm(adjusted_mode
->crtc_clock
,
2321 wm_info
, fifo_size
, cpp
,
2322 pessimal_latency_ns
);
2323 if (enabled
== NULL
)
2328 planeb_wm
= fifo_size
- wm_info
->guard_size
;
2329 if (planeb_wm
> (long)wm_info
->max_wm
)
2330 planeb_wm
= wm_info
->max_wm
;
2333 DRM_DEBUG_KMS("FIFO watermarks - A: %d, B: %d\n", planea_wm
, planeb_wm
);
2335 if (IS_I915GM(dev_priv
) && enabled
) {
2336 struct drm_i915_gem_object
*obj
;
2338 obj
= intel_fb_obj(enabled
->base
.primary
->state
->fb
);
2340 /* self-refresh seems busted with untiled */
2341 if (!i915_gem_object_is_tiled(obj
))
2346 * Overlay gets an aggressive default since video jitter is bad.
2350 /* Play safe and disable self-refresh before adjusting watermarks. */
2351 intel_set_memory_cxsr(dev_priv
, false);
2353 /* Calc sr entries for one plane configs */
2354 if (HAS_FW_BLC(dev_priv
) && enabled
) {
2355 /* self-refresh has much higher latency */
2356 static const int sr_latency_ns
= 6000;
2357 const struct drm_display_mode
*adjusted_mode
=
2358 &enabled
->config
->base
.adjusted_mode
;
2359 const struct drm_framebuffer
*fb
=
2360 enabled
->base
.primary
->state
->fb
;
2361 int clock
= adjusted_mode
->crtc_clock
;
2362 int htotal
= adjusted_mode
->crtc_htotal
;
2363 int hdisplay
= enabled
->config
->pipe_src_w
;
2367 if (IS_I915GM(dev_priv
) || IS_I945GM(dev_priv
))
2370 cpp
= fb
->format
->cpp
[0];
2372 entries
= intel_wm_method2(clock
, htotal
, hdisplay
, cpp
,
2373 sr_latency_ns
/ 100);
2374 entries
= DIV_ROUND_UP(entries
, wm_info
->cacheline_size
);
2375 DRM_DEBUG_KMS("self-refresh entries: %d\n", entries
);
2376 srwm
= wm_info
->fifo_size
- entries
;
2380 if (IS_I945G(dev_priv
) || IS_I945GM(dev_priv
))
2381 I915_WRITE(FW_BLC_SELF
,
2382 FW_BLC_SELF_FIFO_MASK
| (srwm
& 0xff));
2384 I915_WRITE(FW_BLC_SELF
, srwm
& 0x3f);
2387 DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d, B: %d, C: %d, SR %d\n",
2388 planea_wm
, planeb_wm
, cwm
, srwm
);
2390 fwater_lo
= ((planeb_wm
& 0x3f) << 16) | (planea_wm
& 0x3f);
2391 fwater_hi
= (cwm
& 0x1f);
2393 /* Set request length to 8 cachelines per fetch */
2394 fwater_lo
= fwater_lo
| (1 << 24) | (1 << 8);
2395 fwater_hi
= fwater_hi
| (1 << 8);
2397 I915_WRITE(FW_BLC
, fwater_lo
);
2398 I915_WRITE(FW_BLC2
, fwater_hi
);
2401 intel_set_memory_cxsr(dev_priv
, true);
2404 static void i845_update_wm(struct intel_crtc
*unused_crtc
)
2406 struct drm_i915_private
*dev_priv
= to_i915(unused_crtc
->base
.dev
);
2407 struct intel_crtc
*crtc
;
2408 const struct drm_display_mode
*adjusted_mode
;
2412 crtc
= single_enabled_crtc(dev_priv
);
2416 adjusted_mode
= &crtc
->config
->base
.adjusted_mode
;
2417 planea_wm
= intel_calculate_wm(adjusted_mode
->crtc_clock
,
2419 dev_priv
->display
.get_fifo_size(dev_priv
, PLANE_A
),
2420 4, pessimal_latency_ns
);
2421 fwater_lo
= I915_READ(FW_BLC
) & ~0xfff;
2422 fwater_lo
|= (3<<8) | planea_wm
;
2424 DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d\n", planea_wm
);
2426 I915_WRITE(FW_BLC
, fwater_lo
);
2429 /* latency must be in 0.1us units. */
2430 static unsigned int ilk_wm_method1(unsigned int pixel_rate
,
2432 unsigned int latency
)
2436 ret
= intel_wm_method1(pixel_rate
, cpp
, latency
);
2437 ret
= DIV_ROUND_UP(ret
, 64) + 2;
2442 /* latency must be in 0.1us units. */
2443 static unsigned int ilk_wm_method2(unsigned int pixel_rate
,
2444 unsigned int htotal
,
2447 unsigned int latency
)
2451 ret
= intel_wm_method2(pixel_rate
, htotal
,
2452 width
, cpp
, latency
);
2453 ret
= DIV_ROUND_UP(ret
, 64) + 2;
2458 static u32
ilk_wm_fbc(u32 pri_val
, u32 horiz_pixels
, u8 cpp
)
2461 * Neither of these should be possible since this function shouldn't be
2462 * called if the CRTC is off or the plane is invisible. But let's be
2463 * extra paranoid to avoid a potential divide-by-zero if we screw up
2464 * elsewhere in the driver.
2468 if (WARN_ON(!horiz_pixels
))
2471 return DIV_ROUND_UP(pri_val
* 64, horiz_pixels
* cpp
) + 2;
2474 struct ilk_wm_maximums
{
2482 * For both WM_PIPE and WM_LP.
2483 * mem_value must be in 0.1us units.
2485 static u32
ilk_compute_pri_wm(const struct intel_crtc_state
*cstate
,
2486 const struct intel_plane_state
*pstate
,
2487 u32 mem_value
, bool is_lp
)
2489 u32 method1
, method2
;
2495 if (!intel_wm_plane_visible(cstate
, pstate
))
2498 cpp
= pstate
->base
.fb
->format
->cpp
[0];
2500 method1
= ilk_wm_method1(cstate
->pixel_rate
, cpp
, mem_value
);
2505 method2
= ilk_wm_method2(cstate
->pixel_rate
,
2506 cstate
->base
.adjusted_mode
.crtc_htotal
,
2507 drm_rect_width(&pstate
->base
.dst
),
2510 return min(method1
, method2
);
2514 * For both WM_PIPE and WM_LP.
2515 * mem_value must be in 0.1us units.
2517 static u32
ilk_compute_spr_wm(const struct intel_crtc_state
*cstate
,
2518 const struct intel_plane_state
*pstate
,
2521 u32 method1
, method2
;
2527 if (!intel_wm_plane_visible(cstate
, pstate
))
2530 cpp
= pstate
->base
.fb
->format
->cpp
[0];
2532 method1
= ilk_wm_method1(cstate
->pixel_rate
, cpp
, mem_value
);
2533 method2
= ilk_wm_method2(cstate
->pixel_rate
,
2534 cstate
->base
.adjusted_mode
.crtc_htotal
,
2535 drm_rect_width(&pstate
->base
.dst
),
2537 return min(method1
, method2
);
2541 * For both WM_PIPE and WM_LP.
2542 * mem_value must be in 0.1us units.
2544 static u32
ilk_compute_cur_wm(const struct intel_crtc_state
*cstate
,
2545 const struct intel_plane_state
*pstate
,
2553 if (!intel_wm_plane_visible(cstate
, pstate
))
2556 cpp
= pstate
->base
.fb
->format
->cpp
[0];
2558 return ilk_wm_method2(cstate
->pixel_rate
,
2559 cstate
->base
.adjusted_mode
.crtc_htotal
,
2560 pstate
->base
.crtc_w
, cpp
, mem_value
);
2563 /* Only for WM_LP. */
2564 static u32
ilk_compute_fbc_wm(const struct intel_crtc_state
*cstate
,
2565 const struct intel_plane_state
*pstate
,
2570 if (!intel_wm_plane_visible(cstate
, pstate
))
2573 cpp
= pstate
->base
.fb
->format
->cpp
[0];
2575 return ilk_wm_fbc(pri_val
, drm_rect_width(&pstate
->base
.dst
), cpp
);
2579 ilk_display_fifo_size(const struct drm_i915_private
*dev_priv
)
2581 if (INTEL_GEN(dev_priv
) >= 8)
2583 else if (INTEL_GEN(dev_priv
) >= 7)
2590 ilk_plane_wm_reg_max(const struct drm_i915_private
*dev_priv
,
2591 int level
, bool is_sprite
)
2593 if (INTEL_GEN(dev_priv
) >= 8)
2594 /* BDW primary/sprite plane watermarks */
2595 return level
== 0 ? 255 : 2047;
2596 else if (INTEL_GEN(dev_priv
) >= 7)
2597 /* IVB/HSW primary/sprite plane watermarks */
2598 return level
== 0 ? 127 : 1023;
2599 else if (!is_sprite
)
2600 /* ILK/SNB primary plane watermarks */
2601 return level
== 0 ? 127 : 511;
2603 /* ILK/SNB sprite plane watermarks */
2604 return level
== 0 ? 63 : 255;
2608 ilk_cursor_wm_reg_max(const struct drm_i915_private
*dev_priv
, int level
)
2610 if (INTEL_GEN(dev_priv
) >= 7)
2611 return level
== 0 ? 63 : 255;
2613 return level
== 0 ? 31 : 63;
2616 static unsigned int ilk_fbc_wm_reg_max(const struct drm_i915_private
*dev_priv
)
2618 if (INTEL_GEN(dev_priv
) >= 8)
2624 /* Calculate the maximum primary/sprite plane watermark */
2625 static unsigned int ilk_plane_wm_max(const struct drm_i915_private
*dev_priv
,
2627 const struct intel_wm_config
*config
,
2628 enum intel_ddb_partitioning ddb_partitioning
,
2631 unsigned int fifo_size
= ilk_display_fifo_size(dev_priv
);
2633 /* if sprites aren't enabled, sprites get nothing */
2634 if (is_sprite
&& !config
->sprites_enabled
)
2637 /* HSW allows LP1+ watermarks even with multiple pipes */
2638 if (level
== 0 || config
->num_pipes_active
> 1) {
2639 fifo_size
/= INTEL_INFO(dev_priv
)->num_pipes
;
2642 * For some reason the non self refresh
2643 * FIFO size is only half of the self
2644 * refresh FIFO size on ILK/SNB.
2646 if (INTEL_GEN(dev_priv
) <= 6)
2650 if (config
->sprites_enabled
) {
2651 /* level 0 is always calculated with 1:1 split */
2652 if (level
> 0 && ddb_partitioning
== INTEL_DDB_PART_5_6
) {
2661 /* clamp to max that the registers can hold */
2662 return min(fifo_size
, ilk_plane_wm_reg_max(dev_priv
, level
, is_sprite
));
2665 /* Calculate the maximum cursor plane watermark */
2666 static unsigned int ilk_cursor_wm_max(const struct drm_i915_private
*dev_priv
,
2668 const struct intel_wm_config
*config
)
2670 /* HSW LP1+ watermarks w/ multiple pipes */
2671 if (level
> 0 && config
->num_pipes_active
> 1)
2674 /* otherwise just report max that registers can hold */
2675 return ilk_cursor_wm_reg_max(dev_priv
, level
);
2678 static void ilk_compute_wm_maximums(const struct drm_i915_private
*dev_priv
,
2680 const struct intel_wm_config
*config
,
2681 enum intel_ddb_partitioning ddb_partitioning
,
2682 struct ilk_wm_maximums
*max
)
2684 max
->pri
= ilk_plane_wm_max(dev_priv
, level
, config
, ddb_partitioning
, false);
2685 max
->spr
= ilk_plane_wm_max(dev_priv
, level
, config
, ddb_partitioning
, true);
2686 max
->cur
= ilk_cursor_wm_max(dev_priv
, level
, config
);
2687 max
->fbc
= ilk_fbc_wm_reg_max(dev_priv
);
2690 static void ilk_compute_wm_reg_maximums(const struct drm_i915_private
*dev_priv
,
2692 struct ilk_wm_maximums
*max
)
2694 max
->pri
= ilk_plane_wm_reg_max(dev_priv
, level
, false);
2695 max
->spr
= ilk_plane_wm_reg_max(dev_priv
, level
, true);
2696 max
->cur
= ilk_cursor_wm_reg_max(dev_priv
, level
);
2697 max
->fbc
= ilk_fbc_wm_reg_max(dev_priv
);
2700 static bool ilk_validate_wm_level(int level
,
2701 const struct ilk_wm_maximums
*max
,
2702 struct intel_wm_level
*result
)
2706 /* already determined to be invalid? */
2707 if (!result
->enable
)
2710 result
->enable
= result
->pri_val
<= max
->pri
&&
2711 result
->spr_val
<= max
->spr
&&
2712 result
->cur_val
<= max
->cur
;
2714 ret
= result
->enable
;
2717 * HACK until we can pre-compute everything,
2718 * and thus fail gracefully if LP0 watermarks
2721 if (level
== 0 && !result
->enable
) {
2722 if (result
->pri_val
> max
->pri
)
2723 DRM_DEBUG_KMS("Primary WM%d too large %u (max %u)\n",
2724 level
, result
->pri_val
, max
->pri
);
2725 if (result
->spr_val
> max
->spr
)
2726 DRM_DEBUG_KMS("Sprite WM%d too large %u (max %u)\n",
2727 level
, result
->spr_val
, max
->spr
);
2728 if (result
->cur_val
> max
->cur
)
2729 DRM_DEBUG_KMS("Cursor WM%d too large %u (max %u)\n",
2730 level
, result
->cur_val
, max
->cur
);
2732 result
->pri_val
= min_t(u32
, result
->pri_val
, max
->pri
);
2733 result
->spr_val
= min_t(u32
, result
->spr_val
, max
->spr
);
2734 result
->cur_val
= min_t(u32
, result
->cur_val
, max
->cur
);
2735 result
->enable
= true;
2741 static void ilk_compute_wm_level(const struct drm_i915_private
*dev_priv
,
2742 const struct intel_crtc
*intel_crtc
,
2744 struct intel_crtc_state
*cstate
,
2745 const struct intel_plane_state
*pristate
,
2746 const struct intel_plane_state
*sprstate
,
2747 const struct intel_plane_state
*curstate
,
2748 struct intel_wm_level
*result
)
2750 u16 pri_latency
= dev_priv
->wm
.pri_latency
[level
];
2751 u16 spr_latency
= dev_priv
->wm
.spr_latency
[level
];
2752 u16 cur_latency
= dev_priv
->wm
.cur_latency
[level
];
2754 /* WM1+ latency values stored in 0.5us units */
2762 result
->pri_val
= ilk_compute_pri_wm(cstate
, pristate
,
2763 pri_latency
, level
);
2764 result
->fbc_val
= ilk_compute_fbc_wm(cstate
, pristate
, result
->pri_val
);
2768 result
->spr_val
= ilk_compute_spr_wm(cstate
, sprstate
, spr_latency
);
2771 result
->cur_val
= ilk_compute_cur_wm(cstate
, curstate
, cur_latency
);
2773 result
->enable
= true;
2777 hsw_compute_linetime_wm(const struct intel_crtc_state
*cstate
)
2779 const struct intel_atomic_state
*intel_state
=
2780 to_intel_atomic_state(cstate
->base
.state
);
2781 const struct drm_display_mode
*adjusted_mode
=
2782 &cstate
->base
.adjusted_mode
;
2783 u32 linetime
, ips_linetime
;
2785 if (!cstate
->base
.active
)
2787 if (WARN_ON(adjusted_mode
->crtc_clock
== 0))
2789 if (WARN_ON(intel_state
->cdclk
.logical
.cdclk
== 0))
2792 /* The WM are computed with base on how long it takes to fill a single
2793 * row at the given clock rate, multiplied by 8.
2795 linetime
= DIV_ROUND_CLOSEST(adjusted_mode
->crtc_htotal
* 1000 * 8,
2796 adjusted_mode
->crtc_clock
);
2797 ips_linetime
= DIV_ROUND_CLOSEST(adjusted_mode
->crtc_htotal
* 1000 * 8,
2798 intel_state
->cdclk
.logical
.cdclk
);
2800 return PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime
) |
2801 PIPE_WM_LINETIME_TIME(linetime
);
2804 static void intel_read_wm_latency(struct drm_i915_private
*dev_priv
,
2807 if (INTEL_GEN(dev_priv
) >= 9) {
2810 int level
, max_level
= ilk_wm_max_level(dev_priv
);
2812 /* read the first set of memory latencies[0:3] */
2813 val
= 0; /* data0 to be programmed to 0 for first set */
2814 mutex_lock(&dev_priv
->pcu_lock
);
2815 ret
= sandybridge_pcode_read(dev_priv
,
2816 GEN9_PCODE_READ_MEM_LATENCY
,
2818 mutex_unlock(&dev_priv
->pcu_lock
);
2821 DRM_ERROR("SKL Mailbox read error = %d\n", ret
);
2825 wm
[0] = val
& GEN9_MEM_LATENCY_LEVEL_MASK
;
2826 wm
[1] = (val
>> GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT
) &
2827 GEN9_MEM_LATENCY_LEVEL_MASK
;
2828 wm
[2] = (val
>> GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT
) &
2829 GEN9_MEM_LATENCY_LEVEL_MASK
;
2830 wm
[3] = (val
>> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT
) &
2831 GEN9_MEM_LATENCY_LEVEL_MASK
;
2833 /* read the second set of memory latencies[4:7] */
2834 val
= 1; /* data0 to be programmed to 1 for second set */
2835 mutex_lock(&dev_priv
->pcu_lock
);
2836 ret
= sandybridge_pcode_read(dev_priv
,
2837 GEN9_PCODE_READ_MEM_LATENCY
,
2839 mutex_unlock(&dev_priv
->pcu_lock
);
2841 DRM_ERROR("SKL Mailbox read error = %d\n", ret
);
2845 wm
[4] = val
& GEN9_MEM_LATENCY_LEVEL_MASK
;
2846 wm
[5] = (val
>> GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT
) &
2847 GEN9_MEM_LATENCY_LEVEL_MASK
;
2848 wm
[6] = (val
>> GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT
) &
2849 GEN9_MEM_LATENCY_LEVEL_MASK
;
2850 wm
[7] = (val
>> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT
) &
2851 GEN9_MEM_LATENCY_LEVEL_MASK
;
2854 * If a level n (n > 1) has a 0us latency, all levels m (m >= n)
2855 * need to be disabled. We make sure to sanitize the values out
2856 * of the punit to satisfy this requirement.
2858 for (level
= 1; level
<= max_level
; level
++) {
2859 if (wm
[level
] == 0) {
2860 for (i
= level
+ 1; i
<= max_level
; i
++)
2867 * WaWmMemoryReadLatency:skl+,glk
2869 * punit doesn't take into account the read latency so we need
2870 * to add 2us to the various latency levels we retrieve from the
2871 * punit when level 0 response data us 0us.
2875 for (level
= 1; level
<= max_level
; level
++) {
2883 * WA Level-0 adjustment for 16GB DIMMs: SKL+
2884 * If we could not get dimm info enable this WA to prevent from
2885 * any underrun. If not able to get Dimm info assume 16GB dimm
2886 * to avoid any underrun.
2888 if (dev_priv
->dram_info
.is_16gb_dimm
)
2891 } else if (IS_HASWELL(dev_priv
) || IS_BROADWELL(dev_priv
)) {
2892 u64 sskpd
= I915_READ64(MCH_SSKPD
);
2894 wm
[0] = (sskpd
>> 56) & 0xFF;
2896 wm
[0] = sskpd
& 0xF;
2897 wm
[1] = (sskpd
>> 4) & 0xFF;
2898 wm
[2] = (sskpd
>> 12) & 0xFF;
2899 wm
[3] = (sskpd
>> 20) & 0x1FF;
2900 wm
[4] = (sskpd
>> 32) & 0x1FF;
2901 } else if (INTEL_GEN(dev_priv
) >= 6) {
2902 u32 sskpd
= I915_READ(MCH_SSKPD
);
2904 wm
[0] = (sskpd
>> SSKPD_WM0_SHIFT
) & SSKPD_WM_MASK
;
2905 wm
[1] = (sskpd
>> SSKPD_WM1_SHIFT
) & SSKPD_WM_MASK
;
2906 wm
[2] = (sskpd
>> SSKPD_WM2_SHIFT
) & SSKPD_WM_MASK
;
2907 wm
[3] = (sskpd
>> SSKPD_WM3_SHIFT
) & SSKPD_WM_MASK
;
2908 } else if (INTEL_GEN(dev_priv
) >= 5) {
2909 u32 mltr
= I915_READ(MLTR_ILK
);
2911 /* ILK primary LP0 latency is 700 ns */
2913 wm
[1] = (mltr
>> MLTR_WM1_SHIFT
) & ILK_SRLT_MASK
;
2914 wm
[2] = (mltr
>> MLTR_WM2_SHIFT
) & ILK_SRLT_MASK
;
2916 MISSING_CASE(INTEL_DEVID(dev_priv
));
2920 static void intel_fixup_spr_wm_latency(struct drm_i915_private
*dev_priv
,
2923 /* ILK sprite LP0 latency is 1300 ns */
2924 if (IS_GEN(dev_priv
, 5))
2928 static void intel_fixup_cur_wm_latency(struct drm_i915_private
*dev_priv
,
2931 /* ILK cursor LP0 latency is 1300 ns */
2932 if (IS_GEN(dev_priv
, 5))
2936 int ilk_wm_max_level(const struct drm_i915_private
*dev_priv
)
2938 /* how many WM levels are we expecting */
2939 if (INTEL_GEN(dev_priv
) >= 9)
2941 else if (IS_HASWELL(dev_priv
) || IS_BROADWELL(dev_priv
))
2943 else if (INTEL_GEN(dev_priv
) >= 6)
2949 static void intel_print_wm_latency(struct drm_i915_private
*dev_priv
,
2953 int level
, max_level
= ilk_wm_max_level(dev_priv
);
2955 for (level
= 0; level
<= max_level
; level
++) {
2956 unsigned int latency
= wm
[level
];
2959 DRM_DEBUG_KMS("%s WM%d latency not provided\n",
2965 * - latencies are in us on gen9.
2966 * - before then, WM1+ latency values are in 0.5us units
2968 if (INTEL_GEN(dev_priv
) >= 9)
2973 DRM_DEBUG_KMS("%s WM%d latency %u (%u.%u usec)\n",
2974 name
, level
, wm
[level
],
2975 latency
/ 10, latency
% 10);
2979 static bool ilk_increase_wm_latency(struct drm_i915_private
*dev_priv
,
2982 int level
, max_level
= ilk_wm_max_level(dev_priv
);
2987 wm
[0] = max(wm
[0], min
);
2988 for (level
= 1; level
<= max_level
; level
++)
2989 wm
[level
] = max_t(u16
, wm
[level
], DIV_ROUND_UP(min
, 5));
2994 static void snb_wm_latency_quirk(struct drm_i915_private
*dev_priv
)
2999 * The BIOS provided WM memory latency values are often
3000 * inadequate for high resolution displays. Adjust them.
3002 changed
= ilk_increase_wm_latency(dev_priv
, dev_priv
->wm
.pri_latency
, 12) |
3003 ilk_increase_wm_latency(dev_priv
, dev_priv
->wm
.spr_latency
, 12) |
3004 ilk_increase_wm_latency(dev_priv
, dev_priv
->wm
.cur_latency
, 12);
3009 DRM_DEBUG_KMS("WM latency values increased to avoid potential underruns\n");
3010 intel_print_wm_latency(dev_priv
, "Primary", dev_priv
->wm
.pri_latency
);
3011 intel_print_wm_latency(dev_priv
, "Sprite", dev_priv
->wm
.spr_latency
);
3012 intel_print_wm_latency(dev_priv
, "Cursor", dev_priv
->wm
.cur_latency
);
3015 static void snb_wm_lp3_irq_quirk(struct drm_i915_private
*dev_priv
)
3018 * On some SNB machines (Thinkpad X220 Tablet at least)
3019 * LP3 usage can cause vblank interrupts to be lost.
3020 * The DEIIR bit will go high but it looks like the CPU
3021 * never gets interrupted.
3023 * It's not clear whether other interrupt source could
3024 * be affected or if this is somehow limited to vblank
3025 * interrupts only. To play it safe we disable LP3
3026 * watermarks entirely.
3028 if (dev_priv
->wm
.pri_latency
[3] == 0 &&
3029 dev_priv
->wm
.spr_latency
[3] == 0 &&
3030 dev_priv
->wm
.cur_latency
[3] == 0)
3033 dev_priv
->wm
.pri_latency
[3] = 0;
3034 dev_priv
->wm
.spr_latency
[3] = 0;
3035 dev_priv
->wm
.cur_latency
[3] = 0;
3037 DRM_DEBUG_KMS("LP3 watermarks disabled due to potential for lost interrupts\n");
3038 intel_print_wm_latency(dev_priv
, "Primary", dev_priv
->wm
.pri_latency
);
3039 intel_print_wm_latency(dev_priv
, "Sprite", dev_priv
->wm
.spr_latency
);
3040 intel_print_wm_latency(dev_priv
, "Cursor", dev_priv
->wm
.cur_latency
);
3043 static void ilk_setup_wm_latency(struct drm_i915_private
*dev_priv
)
3045 intel_read_wm_latency(dev_priv
, dev_priv
->wm
.pri_latency
);
3047 memcpy(dev_priv
->wm
.spr_latency
, dev_priv
->wm
.pri_latency
,
3048 sizeof(dev_priv
->wm
.pri_latency
));
3049 memcpy(dev_priv
->wm
.cur_latency
, dev_priv
->wm
.pri_latency
,
3050 sizeof(dev_priv
->wm
.pri_latency
));
3052 intel_fixup_spr_wm_latency(dev_priv
, dev_priv
->wm
.spr_latency
);
3053 intel_fixup_cur_wm_latency(dev_priv
, dev_priv
->wm
.cur_latency
);
3055 intel_print_wm_latency(dev_priv
, "Primary", dev_priv
->wm
.pri_latency
);
3056 intel_print_wm_latency(dev_priv
, "Sprite", dev_priv
->wm
.spr_latency
);
3057 intel_print_wm_latency(dev_priv
, "Cursor", dev_priv
->wm
.cur_latency
);
3059 if (IS_GEN(dev_priv
, 6)) {
3060 snb_wm_latency_quirk(dev_priv
);
3061 snb_wm_lp3_irq_quirk(dev_priv
);
3065 static void skl_setup_wm_latency(struct drm_i915_private
*dev_priv
)
3067 intel_read_wm_latency(dev_priv
, dev_priv
->wm
.skl_latency
);
3068 intel_print_wm_latency(dev_priv
, "Gen9 Plane", dev_priv
->wm
.skl_latency
);
3071 static bool ilk_validate_pipe_wm(const struct drm_i915_private
*dev_priv
,
3072 struct intel_pipe_wm
*pipe_wm
)
3074 /* LP0 watermark maximums depend on this pipe alone */
3075 const struct intel_wm_config config
= {
3076 .num_pipes_active
= 1,
3077 .sprites_enabled
= pipe_wm
->sprites_enabled
,
3078 .sprites_scaled
= pipe_wm
->sprites_scaled
,
3080 struct ilk_wm_maximums max
;
3082 /* LP0 watermarks always use 1/2 DDB partitioning */
3083 ilk_compute_wm_maximums(dev_priv
, 0, &config
, INTEL_DDB_PART_1_2
, &max
);
3085 /* At least LP0 must be valid */
3086 if (!ilk_validate_wm_level(0, &max
, &pipe_wm
->wm
[0])) {
3087 DRM_DEBUG_KMS("LP0 watermark invalid\n");
3094 /* Compute new watermarks for the pipe */
3095 static int ilk_compute_pipe_wm(struct intel_crtc_state
*cstate
)
3097 struct drm_atomic_state
*state
= cstate
->base
.state
;
3098 struct intel_crtc
*intel_crtc
= to_intel_crtc(cstate
->base
.crtc
);
3099 struct intel_pipe_wm
*pipe_wm
;
3100 struct drm_device
*dev
= state
->dev
;
3101 const struct drm_i915_private
*dev_priv
= to_i915(dev
);
3102 struct drm_plane
*plane
;
3103 const struct drm_plane_state
*plane_state
;
3104 const struct intel_plane_state
*pristate
= NULL
;
3105 const struct intel_plane_state
*sprstate
= NULL
;
3106 const struct intel_plane_state
*curstate
= NULL
;
3107 int level
, max_level
= ilk_wm_max_level(dev_priv
), usable_level
;
3108 struct ilk_wm_maximums max
;
3110 pipe_wm
= &cstate
->wm
.ilk
.optimal
;
3112 drm_atomic_crtc_state_for_each_plane_state(plane
, plane_state
, &cstate
->base
) {
3113 const struct intel_plane_state
*ps
= to_intel_plane_state(plane_state
);
3115 if (plane
->type
== DRM_PLANE_TYPE_PRIMARY
)
3117 else if (plane
->type
== DRM_PLANE_TYPE_OVERLAY
)
3119 else if (plane
->type
== DRM_PLANE_TYPE_CURSOR
)
3123 pipe_wm
->pipe_enabled
= cstate
->base
.active
;
3125 pipe_wm
->sprites_enabled
= sprstate
->base
.visible
;
3126 pipe_wm
->sprites_scaled
= sprstate
->base
.visible
&&
3127 (drm_rect_width(&sprstate
->base
.dst
) != drm_rect_width(&sprstate
->base
.src
) >> 16 ||
3128 drm_rect_height(&sprstate
->base
.dst
) != drm_rect_height(&sprstate
->base
.src
) >> 16);
3131 usable_level
= max_level
;
3133 /* ILK/SNB: LP2+ watermarks only w/o sprites */
3134 if (INTEL_GEN(dev_priv
) <= 6 && pipe_wm
->sprites_enabled
)
3137 /* ILK/SNB/IVB: LP1+ watermarks only w/o scaling */
3138 if (pipe_wm
->sprites_scaled
)
3141 memset(&pipe_wm
->wm
, 0, sizeof(pipe_wm
->wm
));
3142 ilk_compute_wm_level(dev_priv
, intel_crtc
, 0, cstate
,
3143 pristate
, sprstate
, curstate
, &pipe_wm
->wm
[0]);
3145 if (IS_HASWELL(dev_priv
) || IS_BROADWELL(dev_priv
))
3146 pipe_wm
->linetime
= hsw_compute_linetime_wm(cstate
);
3148 if (!ilk_validate_pipe_wm(dev_priv
, pipe_wm
))
3151 ilk_compute_wm_reg_maximums(dev_priv
, 1, &max
);
3153 for (level
= 1; level
<= usable_level
; level
++) {
3154 struct intel_wm_level
*wm
= &pipe_wm
->wm
[level
];
3156 ilk_compute_wm_level(dev_priv
, intel_crtc
, level
, cstate
,
3157 pristate
, sprstate
, curstate
, wm
);
3160 * Disable any watermark level that exceeds the
3161 * register maximums since such watermarks are
3164 if (!ilk_validate_wm_level(level
, &max
, wm
)) {
3165 memset(wm
, 0, sizeof(*wm
));
3174 * Build a set of 'intermediate' watermark values that satisfy both the old
3175 * state and the new state. These can be programmed to the hardware
3178 static int ilk_compute_intermediate_wm(struct intel_crtc_state
*newstate
)
3180 struct intel_crtc
*intel_crtc
= to_intel_crtc(newstate
->base
.crtc
);
3181 struct drm_i915_private
*dev_priv
= to_i915(intel_crtc
->base
.dev
);
3182 struct intel_pipe_wm
*a
= &newstate
->wm
.ilk
.intermediate
;
3183 struct intel_atomic_state
*intel_state
=
3184 to_intel_atomic_state(newstate
->base
.state
);
3185 const struct intel_crtc_state
*oldstate
=
3186 intel_atomic_get_old_crtc_state(intel_state
, intel_crtc
);
3187 const struct intel_pipe_wm
*b
= &oldstate
->wm
.ilk
.optimal
;
3188 int level
, max_level
= ilk_wm_max_level(dev_priv
);
3191 * Start with the final, target watermarks, then combine with the
3192 * currently active watermarks to get values that are safe both before
3193 * and after the vblank.
3195 *a
= newstate
->wm
.ilk
.optimal
;
3196 if (!newstate
->base
.active
|| drm_atomic_crtc_needs_modeset(&newstate
->base
) ||
3197 intel_state
->skip_intermediate_wm
)
3200 a
->pipe_enabled
|= b
->pipe_enabled
;
3201 a
->sprites_enabled
|= b
->sprites_enabled
;
3202 a
->sprites_scaled
|= b
->sprites_scaled
;
3204 for (level
= 0; level
<= max_level
; level
++) {
3205 struct intel_wm_level
*a_wm
= &a
->wm
[level
];
3206 const struct intel_wm_level
*b_wm
= &b
->wm
[level
];
3208 a_wm
->enable
&= b_wm
->enable
;
3209 a_wm
->pri_val
= max(a_wm
->pri_val
, b_wm
->pri_val
);
3210 a_wm
->spr_val
= max(a_wm
->spr_val
, b_wm
->spr_val
);
3211 a_wm
->cur_val
= max(a_wm
->cur_val
, b_wm
->cur_val
);
3212 a_wm
->fbc_val
= max(a_wm
->fbc_val
, b_wm
->fbc_val
);
3216 * We need to make sure that these merged watermark values are
3217 * actually a valid configuration themselves. If they're not,
3218 * there's no safe way to transition from the old state to
3219 * the new state, so we need to fail the atomic transaction.
3221 if (!ilk_validate_pipe_wm(dev_priv
, a
))
3225 * If our intermediate WM are identical to the final WM, then we can
3226 * omit the post-vblank programming; only update if it's different.
3228 if (memcmp(a
, &newstate
->wm
.ilk
.optimal
, sizeof(*a
)) != 0)
3229 newstate
->wm
.need_postvbl_update
= true;
3235 * Merge the watermarks from all active pipes for a specific level.
3237 static void ilk_merge_wm_level(struct drm_i915_private
*dev_priv
,
3239 struct intel_wm_level
*ret_wm
)
3241 const struct intel_crtc
*intel_crtc
;
3243 ret_wm
->enable
= true;
3245 for_each_intel_crtc(&dev_priv
->drm
, intel_crtc
) {
3246 const struct intel_pipe_wm
*active
= &intel_crtc
->wm
.active
.ilk
;
3247 const struct intel_wm_level
*wm
= &active
->wm
[level
];
3249 if (!active
->pipe_enabled
)
3253 * The watermark values may have been used in the past,
3254 * so we must maintain them in the registers for some
3255 * time even if the level is now disabled.
3258 ret_wm
->enable
= false;
3260 ret_wm
->pri_val
= max(ret_wm
->pri_val
, wm
->pri_val
);
3261 ret_wm
->spr_val
= max(ret_wm
->spr_val
, wm
->spr_val
);
3262 ret_wm
->cur_val
= max(ret_wm
->cur_val
, wm
->cur_val
);
3263 ret_wm
->fbc_val
= max(ret_wm
->fbc_val
, wm
->fbc_val
);
3268 * Merge all low power watermarks for all active pipes.
3270 static void ilk_wm_merge(struct drm_i915_private
*dev_priv
,
3271 const struct intel_wm_config
*config
,
3272 const struct ilk_wm_maximums
*max
,
3273 struct intel_pipe_wm
*merged
)
3275 int level
, max_level
= ilk_wm_max_level(dev_priv
);
3276 int last_enabled_level
= max_level
;
3278 /* ILK/SNB/IVB: LP1+ watermarks only w/ single pipe */
3279 if ((INTEL_GEN(dev_priv
) <= 6 || IS_IVYBRIDGE(dev_priv
)) &&
3280 config
->num_pipes_active
> 1)
3281 last_enabled_level
= 0;
3283 /* ILK: FBC WM must be disabled always */
3284 merged
->fbc_wm_enabled
= INTEL_GEN(dev_priv
) >= 6;
3286 /* merge each WM1+ level */
3287 for (level
= 1; level
<= max_level
; level
++) {
3288 struct intel_wm_level
*wm
= &merged
->wm
[level
];
3290 ilk_merge_wm_level(dev_priv
, level
, wm
);
3292 if (level
> last_enabled_level
)
3294 else if (!ilk_validate_wm_level(level
, max
, wm
))
3295 /* make sure all following levels get disabled */
3296 last_enabled_level
= level
- 1;
3299 * The spec says it is preferred to disable
3300 * FBC WMs instead of disabling a WM level.
3302 if (wm
->fbc_val
> max
->fbc
) {
3304 merged
->fbc_wm_enabled
= false;
3309 /* ILK: LP2+ must be disabled when FBC WM is disabled but FBC enabled */
3311 * FIXME this is racy. FBC might get enabled later.
3312 * What we should check here is whether FBC can be
3313 * enabled sometime later.
3315 if (IS_GEN(dev_priv
, 5) && !merged
->fbc_wm_enabled
&&
3316 intel_fbc_is_active(dev_priv
)) {
3317 for (level
= 2; level
<= max_level
; level
++) {
3318 struct intel_wm_level
*wm
= &merged
->wm
[level
];
3325 static int ilk_wm_lp_to_level(int wm_lp
, const struct intel_pipe_wm
*pipe_wm
)
3327 /* LP1,LP2,LP3 levels are either 1,2,3 or 1,3,4 */
3328 return wm_lp
+ (wm_lp
>= 2 && pipe_wm
->wm
[4].enable
);
3331 /* The value we need to program into the WM_LPx latency field */
3332 static unsigned int ilk_wm_lp_latency(struct drm_i915_private
*dev_priv
,
3335 if (IS_HASWELL(dev_priv
) || IS_BROADWELL(dev_priv
))
3338 return dev_priv
->wm
.pri_latency
[level
];
3341 static void ilk_compute_wm_results(struct drm_i915_private
*dev_priv
,
3342 const struct intel_pipe_wm
*merged
,
3343 enum intel_ddb_partitioning partitioning
,
3344 struct ilk_wm_values
*results
)
3346 struct intel_crtc
*intel_crtc
;
3349 results
->enable_fbc_wm
= merged
->fbc_wm_enabled
;
3350 results
->partitioning
= partitioning
;
3352 /* LP1+ register values */
3353 for (wm_lp
= 1; wm_lp
<= 3; wm_lp
++) {
3354 const struct intel_wm_level
*r
;
3356 level
= ilk_wm_lp_to_level(wm_lp
, merged
);
3358 r
= &merged
->wm
[level
];
3361 * Maintain the watermark values even if the level is
3362 * disabled. Doing otherwise could cause underruns.
3364 results
->wm_lp
[wm_lp
- 1] =
3365 (ilk_wm_lp_latency(dev_priv
, level
) << WM1_LP_LATENCY_SHIFT
) |
3366 (r
->pri_val
<< WM1_LP_SR_SHIFT
) |
3370 results
->wm_lp
[wm_lp
- 1] |= WM1_LP_SR_EN
;
3372 if (INTEL_GEN(dev_priv
) >= 8)
3373 results
->wm_lp
[wm_lp
- 1] |=
3374 r
->fbc_val
<< WM1_LP_FBC_SHIFT_BDW
;
3376 results
->wm_lp
[wm_lp
- 1] |=
3377 r
->fbc_val
<< WM1_LP_FBC_SHIFT
;
3380 * Always set WM1S_LP_EN when spr_val != 0, even if the
3381 * level is disabled. Doing otherwise could cause underruns.
3383 if (INTEL_GEN(dev_priv
) <= 6 && r
->spr_val
) {
3384 WARN_ON(wm_lp
!= 1);
3385 results
->wm_lp_spr
[wm_lp
- 1] = WM1S_LP_EN
| r
->spr_val
;
3387 results
->wm_lp_spr
[wm_lp
- 1] = r
->spr_val
;
3390 /* LP0 register values */
3391 for_each_intel_crtc(&dev_priv
->drm
, intel_crtc
) {
3392 enum pipe pipe
= intel_crtc
->pipe
;
3393 const struct intel_wm_level
*r
=
3394 &intel_crtc
->wm
.active
.ilk
.wm
[0];
3396 if (WARN_ON(!r
->enable
))
3399 results
->wm_linetime
[pipe
] = intel_crtc
->wm
.active
.ilk
.linetime
;
3401 results
->wm_pipe
[pipe
] =
3402 (r
->pri_val
<< WM0_PIPE_PLANE_SHIFT
) |
3403 (r
->spr_val
<< WM0_PIPE_SPRITE_SHIFT
) |
3408 /* Find the result with the highest level enabled. Check for enable_fbc_wm in
3409 * case both are at the same level. Prefer r1 in case they're the same. */
3410 static struct intel_pipe_wm
*
3411 ilk_find_best_result(struct drm_i915_private
*dev_priv
,
3412 struct intel_pipe_wm
*r1
,
3413 struct intel_pipe_wm
*r2
)
3415 int level
, max_level
= ilk_wm_max_level(dev_priv
);
3416 int level1
= 0, level2
= 0;
3418 for (level
= 1; level
<= max_level
; level
++) {
3419 if (r1
->wm
[level
].enable
)
3421 if (r2
->wm
[level
].enable
)
3425 if (level1
== level2
) {
3426 if (r2
->fbc_wm_enabled
&& !r1
->fbc_wm_enabled
)
3430 } else if (level1
> level2
) {
3437 /* dirty bits used to track which watermarks need changes */
3438 #define WM_DIRTY_PIPE(pipe) (1 << (pipe))
3439 #define WM_DIRTY_LINETIME(pipe) (1 << (8 + (pipe)))
3440 #define WM_DIRTY_LP(wm_lp) (1 << (15 + (wm_lp)))
3441 #define WM_DIRTY_LP_ALL (WM_DIRTY_LP(1) | WM_DIRTY_LP(2) | WM_DIRTY_LP(3))
3442 #define WM_DIRTY_FBC (1 << 24)
3443 #define WM_DIRTY_DDB (1 << 25)
3445 static unsigned int ilk_compute_wm_dirty(struct drm_i915_private
*dev_priv
,
3446 const struct ilk_wm_values
*old
,
3447 const struct ilk_wm_values
*new)
3449 unsigned int dirty
= 0;
3453 for_each_pipe(dev_priv
, pipe
) {
3454 if (old
->wm_linetime
[pipe
] != new->wm_linetime
[pipe
]) {
3455 dirty
|= WM_DIRTY_LINETIME(pipe
);
3456 /* Must disable LP1+ watermarks too */
3457 dirty
|= WM_DIRTY_LP_ALL
;
3460 if (old
->wm_pipe
[pipe
] != new->wm_pipe
[pipe
]) {
3461 dirty
|= WM_DIRTY_PIPE(pipe
);
3462 /* Must disable LP1+ watermarks too */
3463 dirty
|= WM_DIRTY_LP_ALL
;
3467 if (old
->enable_fbc_wm
!= new->enable_fbc_wm
) {
3468 dirty
|= WM_DIRTY_FBC
;
3469 /* Must disable LP1+ watermarks too */
3470 dirty
|= WM_DIRTY_LP_ALL
;
3473 if (old
->partitioning
!= new->partitioning
) {
3474 dirty
|= WM_DIRTY_DDB
;
3475 /* Must disable LP1+ watermarks too */
3476 dirty
|= WM_DIRTY_LP_ALL
;
3479 /* LP1+ watermarks already deemed dirty, no need to continue */
3480 if (dirty
& WM_DIRTY_LP_ALL
)
3483 /* Find the lowest numbered LP1+ watermark in need of an update... */
3484 for (wm_lp
= 1; wm_lp
<= 3; wm_lp
++) {
3485 if (old
->wm_lp
[wm_lp
- 1] != new->wm_lp
[wm_lp
- 1] ||
3486 old
->wm_lp_spr
[wm_lp
- 1] != new->wm_lp_spr
[wm_lp
- 1])
3490 /* ...and mark it and all higher numbered LP1+ watermarks as dirty */
3491 for (; wm_lp
<= 3; wm_lp
++)
3492 dirty
|= WM_DIRTY_LP(wm_lp
);
3497 static bool _ilk_disable_lp_wm(struct drm_i915_private
*dev_priv
,
3500 struct ilk_wm_values
*previous
= &dev_priv
->wm
.hw
;
3501 bool changed
= false;
3503 if (dirty
& WM_DIRTY_LP(3) && previous
->wm_lp
[2] & WM1_LP_SR_EN
) {
3504 previous
->wm_lp
[2] &= ~WM1_LP_SR_EN
;
3505 I915_WRITE(WM3_LP_ILK
, previous
->wm_lp
[2]);
3508 if (dirty
& WM_DIRTY_LP(2) && previous
->wm_lp
[1] & WM1_LP_SR_EN
) {
3509 previous
->wm_lp
[1] &= ~WM1_LP_SR_EN
;
3510 I915_WRITE(WM2_LP_ILK
, previous
->wm_lp
[1]);
3513 if (dirty
& WM_DIRTY_LP(1) && previous
->wm_lp
[0] & WM1_LP_SR_EN
) {
3514 previous
->wm_lp
[0] &= ~WM1_LP_SR_EN
;
3515 I915_WRITE(WM1_LP_ILK
, previous
->wm_lp
[0]);
3520 * Don't touch WM1S_LP_EN here.
3521 * Doing so could cause underruns.
3528 * The spec says we shouldn't write when we don't need, because every write
3529 * causes WMs to be re-evaluated, expending some power.
3531 static void ilk_write_wm_values(struct drm_i915_private
*dev_priv
,
3532 struct ilk_wm_values
*results
)
3534 struct ilk_wm_values
*previous
= &dev_priv
->wm
.hw
;
3538 dirty
= ilk_compute_wm_dirty(dev_priv
, previous
, results
);
3542 _ilk_disable_lp_wm(dev_priv
, dirty
);
3544 if (dirty
& WM_DIRTY_PIPE(PIPE_A
))
3545 I915_WRITE(WM0_PIPEA_ILK
, results
->wm_pipe
[0]);
3546 if (dirty
& WM_DIRTY_PIPE(PIPE_B
))
3547 I915_WRITE(WM0_PIPEB_ILK
, results
->wm_pipe
[1]);
3548 if (dirty
& WM_DIRTY_PIPE(PIPE_C
))
3549 I915_WRITE(WM0_PIPEC_IVB
, results
->wm_pipe
[2]);
3551 if (dirty
& WM_DIRTY_LINETIME(PIPE_A
))
3552 I915_WRITE(PIPE_WM_LINETIME(PIPE_A
), results
->wm_linetime
[0]);
3553 if (dirty
& WM_DIRTY_LINETIME(PIPE_B
))
3554 I915_WRITE(PIPE_WM_LINETIME(PIPE_B
), results
->wm_linetime
[1]);
3555 if (dirty
& WM_DIRTY_LINETIME(PIPE_C
))
3556 I915_WRITE(PIPE_WM_LINETIME(PIPE_C
), results
->wm_linetime
[2]);
3558 if (dirty
& WM_DIRTY_DDB
) {
3559 if (IS_HASWELL(dev_priv
) || IS_BROADWELL(dev_priv
)) {
3560 val
= I915_READ(WM_MISC
);
3561 if (results
->partitioning
== INTEL_DDB_PART_1_2
)
3562 val
&= ~WM_MISC_DATA_PARTITION_5_6
;
3564 val
|= WM_MISC_DATA_PARTITION_5_6
;
3565 I915_WRITE(WM_MISC
, val
);
3567 val
= I915_READ(DISP_ARB_CTL2
);
3568 if (results
->partitioning
== INTEL_DDB_PART_1_2
)
3569 val
&= ~DISP_DATA_PARTITION_5_6
;
3571 val
|= DISP_DATA_PARTITION_5_6
;
3572 I915_WRITE(DISP_ARB_CTL2
, val
);
3576 if (dirty
& WM_DIRTY_FBC
) {
3577 val
= I915_READ(DISP_ARB_CTL
);
3578 if (results
->enable_fbc_wm
)
3579 val
&= ~DISP_FBC_WM_DIS
;
3581 val
|= DISP_FBC_WM_DIS
;
3582 I915_WRITE(DISP_ARB_CTL
, val
);
3585 if (dirty
& WM_DIRTY_LP(1) &&
3586 previous
->wm_lp_spr
[0] != results
->wm_lp_spr
[0])
3587 I915_WRITE(WM1S_LP_ILK
, results
->wm_lp_spr
[0]);
3589 if (INTEL_GEN(dev_priv
) >= 7) {
3590 if (dirty
& WM_DIRTY_LP(2) && previous
->wm_lp_spr
[1] != results
->wm_lp_spr
[1])
3591 I915_WRITE(WM2S_LP_IVB
, results
->wm_lp_spr
[1]);
3592 if (dirty
& WM_DIRTY_LP(3) && previous
->wm_lp_spr
[2] != results
->wm_lp_spr
[2])
3593 I915_WRITE(WM3S_LP_IVB
, results
->wm_lp_spr
[2]);
3596 if (dirty
& WM_DIRTY_LP(1) && previous
->wm_lp
[0] != results
->wm_lp
[0])
3597 I915_WRITE(WM1_LP_ILK
, results
->wm_lp
[0]);
3598 if (dirty
& WM_DIRTY_LP(2) && previous
->wm_lp
[1] != results
->wm_lp
[1])
3599 I915_WRITE(WM2_LP_ILK
, results
->wm_lp
[1]);
3600 if (dirty
& WM_DIRTY_LP(3) && previous
->wm_lp
[2] != results
->wm_lp
[2])
3601 I915_WRITE(WM3_LP_ILK
, results
->wm_lp
[2]);
3603 dev_priv
->wm
.hw
= *results
;
3606 bool ilk_disable_lp_wm(struct drm_device
*dev
)
3608 struct drm_i915_private
*dev_priv
= to_i915(dev
);
3610 return _ilk_disable_lp_wm(dev_priv
, WM_DIRTY_LP_ALL
);
3613 static u8
intel_enabled_dbuf_slices_num(struct drm_i915_private
*dev_priv
)
3617 /* Slice 1 will always be enabled */
3620 /* Gen prior to GEN11 have only one DBuf slice */
3621 if (INTEL_GEN(dev_priv
) < 11)
3622 return enabled_slices
;
3624 if (I915_READ(DBUF_CTL_S2
) & DBUF_POWER_STATE
)
3627 return enabled_slices
;
3631 * FIXME: We still don't have the proper code detect if we need to apply the WA,
3632 * so assume we'll always need it in order to avoid underruns.
3634 static bool skl_needs_memory_bw_wa(struct drm_i915_private
*dev_priv
)
3636 return IS_GEN9_BC(dev_priv
) || IS_BROXTON(dev_priv
);
3640 intel_has_sagv(struct drm_i915_private
*dev_priv
)
3642 return (IS_GEN9_BC(dev_priv
) || INTEL_GEN(dev_priv
) >= 10) &&
3643 dev_priv
->sagv_status
!= I915_SAGV_NOT_CONTROLLED
;
3647 * SAGV dynamically adjusts the system agent voltage and clock frequencies
3648 * depending on power and performance requirements. The display engine access
3649 * to system memory is blocked during the adjustment time. Because of the
3650 * blocking time, having this enabled can cause full system hangs and/or pipe
3651 * underruns if we don't meet all of the following requirements:
3653 * - <= 1 pipe enabled
3654 * - All planes can enable watermarks for latencies >= SAGV engine block time
3655 * - We're not using an interlaced display configuration
3658 intel_enable_sagv(struct drm_i915_private
*dev_priv
)
3662 if (!intel_has_sagv(dev_priv
))
3665 if (dev_priv
->sagv_status
== I915_SAGV_ENABLED
)
3668 DRM_DEBUG_KMS("Enabling SAGV\n");
3669 mutex_lock(&dev_priv
->pcu_lock
);
3671 ret
= sandybridge_pcode_write(dev_priv
, GEN9_PCODE_SAGV_CONTROL
,
3674 /* We don't need to wait for SAGV when enabling */
3675 mutex_unlock(&dev_priv
->pcu_lock
);
3678 * Some skl systems, pre-release machines in particular,
3679 * don't actually have SAGV.
3681 if (IS_SKYLAKE(dev_priv
) && ret
== -ENXIO
) {
3682 DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n");
3683 dev_priv
->sagv_status
= I915_SAGV_NOT_CONTROLLED
;
3685 } else if (ret
< 0) {
3686 DRM_ERROR("Failed to enable SAGV\n");
3690 dev_priv
->sagv_status
= I915_SAGV_ENABLED
;
3695 intel_disable_sagv(struct drm_i915_private
*dev_priv
)
3699 if (!intel_has_sagv(dev_priv
))
3702 if (dev_priv
->sagv_status
== I915_SAGV_DISABLED
)
3705 DRM_DEBUG_KMS("Disabling SAGV\n");
3706 mutex_lock(&dev_priv
->pcu_lock
);
3708 /* bspec says to keep retrying for at least 1 ms */
3709 ret
= skl_pcode_request(dev_priv
, GEN9_PCODE_SAGV_CONTROL
,
3711 GEN9_SAGV_IS_DISABLED
, GEN9_SAGV_IS_DISABLED
,
3713 mutex_unlock(&dev_priv
->pcu_lock
);
3716 * Some skl systems, pre-release machines in particular,
3717 * don't actually have SAGV.
3719 if (IS_SKYLAKE(dev_priv
) && ret
== -ENXIO
) {
3720 DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n");
3721 dev_priv
->sagv_status
= I915_SAGV_NOT_CONTROLLED
;
3723 } else if (ret
< 0) {
3724 DRM_ERROR("Failed to disable SAGV (%d)\n", ret
);
3728 dev_priv
->sagv_status
= I915_SAGV_DISABLED
;
3732 bool intel_can_enable_sagv(struct drm_atomic_state
*state
)
3734 struct drm_device
*dev
= state
->dev
;
3735 struct drm_i915_private
*dev_priv
= to_i915(dev
);
3736 struct intel_atomic_state
*intel_state
= to_intel_atomic_state(state
);
3737 struct intel_crtc
*crtc
;
3738 struct intel_plane
*plane
;
3739 struct intel_crtc_state
*cstate
;
3742 int sagv_block_time_us
;
3744 if (!intel_has_sagv(dev_priv
))
3747 if (IS_GEN(dev_priv
, 9))
3748 sagv_block_time_us
= 30;
3749 else if (IS_GEN(dev_priv
, 10))
3750 sagv_block_time_us
= 20;
3752 sagv_block_time_us
= 10;
3755 * SKL+ workaround: bspec recommends we disable SAGV when we have
3756 * more then one pipe enabled
3758 * If there are no active CRTCs, no additional checks need be performed
3760 if (hweight32(intel_state
->active_crtcs
) == 0)
3762 else if (hweight32(intel_state
->active_crtcs
) > 1)
3765 /* Since we're now guaranteed to only have one active CRTC... */
3766 pipe
= ffs(intel_state
->active_crtcs
) - 1;
3767 crtc
= intel_get_crtc_for_pipe(dev_priv
, pipe
);
3768 cstate
= to_intel_crtc_state(crtc
->base
.state
);
3770 if (crtc
->base
.state
->adjusted_mode
.flags
& DRM_MODE_FLAG_INTERLACE
)
3773 for_each_intel_plane_on_crtc(dev
, crtc
, plane
) {
3774 struct skl_plane_wm
*wm
=
3775 &cstate
->wm
.skl
.optimal
.planes
[plane
->id
];
3777 /* Skip this plane if it's not enabled */
3778 if (!wm
->wm
[0].plane_en
)
3781 /* Find the highest enabled wm level for this plane */
3782 for (level
= ilk_wm_max_level(dev_priv
);
3783 !wm
->wm
[level
].plane_en
; --level
)
3786 latency
= dev_priv
->wm
.skl_latency
[level
];
3788 if (skl_needs_memory_bw_wa(dev_priv
) &&
3789 plane
->base
.state
->fb
->modifier
==
3790 I915_FORMAT_MOD_X_TILED
)
3794 * If any of the planes on this pipe don't enable wm levels that
3795 * incur memory latencies higher than sagv_block_time_us we
3796 * can't enable SAGV.
3798 if (latency
< sagv_block_time_us
)
3805 static u16
intel_get_ddb_size(struct drm_i915_private
*dev_priv
,
3806 const struct intel_crtc_state
*cstate
,
3807 const u64 total_data_rate
,
3808 const int num_active
,
3809 struct skl_ddb_allocation
*ddb
)
3811 const struct drm_display_mode
*adjusted_mode
;
3813 u16 ddb_size
= INTEL_INFO(dev_priv
)->ddb_size
;
3815 WARN_ON(ddb_size
== 0);
3817 if (INTEL_GEN(dev_priv
) < 11)
3818 return ddb_size
- 4; /* 4 blocks for bypass path allocation */
3820 adjusted_mode
= &cstate
->base
.adjusted_mode
;
3821 total_data_bw
= total_data_rate
* drm_mode_vrefresh(adjusted_mode
);
3824 * 12GB/s is maximum BW supported by single DBuf slice.
3826 * FIXME dbuf slice code is broken:
3827 * - must wait for planes to stop using the slice before powering it off
3828 * - plane straddling both slices is illegal in multi-pipe scenarios
3829 * - should validate we stay within the hw bandwidth limits
3831 if (0 && (num_active
> 1 || total_data_bw
>= GBps(12))) {
3832 ddb
->enabled_slices
= 2;
3834 ddb
->enabled_slices
= 1;
3842 skl_ddb_get_pipe_allocation_limits(struct drm_i915_private
*dev_priv
,
3843 const struct intel_crtc_state
*cstate
,
3844 const u64 total_data_rate
,
3845 struct skl_ddb_allocation
*ddb
,
3846 struct skl_ddb_entry
*alloc
, /* out */
3847 int *num_active
/* out */)
3849 struct drm_atomic_state
*state
= cstate
->base
.state
;
3850 struct intel_atomic_state
*intel_state
= to_intel_atomic_state(state
);
3851 struct drm_crtc
*for_crtc
= cstate
->base
.crtc
;
3852 const struct drm_crtc_state
*crtc_state
;
3853 const struct drm_crtc
*crtc
;
3854 u32 pipe_width
= 0, total_width
= 0, width_before_pipe
= 0;
3855 enum pipe for_pipe
= to_intel_crtc(for_crtc
)->pipe
;
3859 if (WARN_ON(!state
) || !cstate
->base
.active
) {
3862 *num_active
= hweight32(dev_priv
->active_crtcs
);
3866 if (intel_state
->active_pipe_changes
)
3867 *num_active
= hweight32(intel_state
->active_crtcs
);
3869 *num_active
= hweight32(dev_priv
->active_crtcs
);
3871 ddb_size
= intel_get_ddb_size(dev_priv
, cstate
, total_data_rate
,
3875 * If the state doesn't change the active CRTC's or there is no
3876 * modeset request, then there's no need to recalculate;
3877 * the existing pipe allocation limits should remain unchanged.
3878 * Note that we're safe from racing commits since any racing commit
3879 * that changes the active CRTC list or do modeset would need to
3880 * grab _all_ crtc locks, including the one we currently hold.
3882 if (!intel_state
->active_pipe_changes
&& !intel_state
->modeset
) {
3884 * alloc may be cleared by clear_intel_crtc_state,
3885 * copy from old state to be sure
3887 *alloc
= to_intel_crtc_state(for_crtc
->state
)->wm
.skl
.ddb
;
3892 * Watermark/ddb requirement highly depends upon width of the
3893 * framebuffer, So instead of allocating DDB equally among pipes
3894 * distribute DDB based on resolution/width of the display.
3896 for_each_new_crtc_in_state(state
, crtc
, crtc_state
, i
) {
3897 const struct drm_display_mode
*adjusted_mode
;
3898 int hdisplay
, vdisplay
;
3901 if (!crtc_state
->enable
)
3904 pipe
= to_intel_crtc(crtc
)->pipe
;
3905 adjusted_mode
= &crtc_state
->adjusted_mode
;
3906 drm_mode_get_hv_timing(adjusted_mode
, &hdisplay
, &vdisplay
);
3907 total_width
+= hdisplay
;
3909 if (pipe
< for_pipe
)
3910 width_before_pipe
+= hdisplay
;
3911 else if (pipe
== for_pipe
)
3912 pipe_width
= hdisplay
;
3915 alloc
->start
= ddb_size
* width_before_pipe
/ total_width
;
3916 alloc
->end
= ddb_size
* (width_before_pipe
+ pipe_width
) / total_width
;
3919 static unsigned int skl_cursor_allocation(int num_active
)
3921 if (num_active
== 1)
3927 static void skl_ddb_entry_init_from_hw(struct drm_i915_private
*dev_priv
,
3928 struct skl_ddb_entry
*entry
, u32 reg
)
3931 entry
->start
= reg
& DDB_ENTRY_MASK
;
3932 entry
->end
= (reg
>> DDB_ENTRY_END_SHIFT
) & DDB_ENTRY_MASK
;
3939 skl_ddb_get_hw_plane_state(struct drm_i915_private
*dev_priv
,
3940 const enum pipe pipe
,
3941 const enum plane_id plane_id
,
3942 struct skl_ddb_entry
*ddb_y
,
3943 struct skl_ddb_entry
*ddb_uv
)
3948 /* Cursor doesn't support NV12/planar, so no extra calculation needed */
3949 if (plane_id
== PLANE_CURSOR
) {
3950 val
= I915_READ(CUR_BUF_CFG(pipe
));
3951 skl_ddb_entry_init_from_hw(dev_priv
, ddb_y
, val
);
3955 val
= I915_READ(PLANE_CTL(pipe
, plane_id
));
3957 /* No DDB allocated for disabled planes */
3958 if (val
& PLANE_CTL_ENABLE
)
3959 fourcc
= skl_format_to_fourcc(val
& PLANE_CTL_FORMAT_MASK
,
3960 val
& PLANE_CTL_ORDER_RGBX
,
3961 val
& PLANE_CTL_ALPHA_MASK
);
3963 if (INTEL_GEN(dev_priv
) >= 11) {
3964 val
= I915_READ(PLANE_BUF_CFG(pipe
, plane_id
));
3965 skl_ddb_entry_init_from_hw(dev_priv
, ddb_y
, val
);
3967 val
= I915_READ(PLANE_BUF_CFG(pipe
, plane_id
));
3968 val2
= I915_READ(PLANE_NV12_BUF_CFG(pipe
, plane_id
));
3970 if (fourcc
== DRM_FORMAT_NV12
)
3973 skl_ddb_entry_init_from_hw(dev_priv
, ddb_y
, val
);
3974 skl_ddb_entry_init_from_hw(dev_priv
, ddb_uv
, val2
);
3978 void skl_pipe_ddb_get_hw_state(struct intel_crtc
*crtc
,
3979 struct skl_ddb_entry
*ddb_y
,
3980 struct skl_ddb_entry
*ddb_uv
)
3982 struct drm_i915_private
*dev_priv
= to_i915(crtc
->base
.dev
);
3983 enum intel_display_power_domain power_domain
;
3984 enum pipe pipe
= crtc
->pipe
;
3985 intel_wakeref_t wakeref
;
3986 enum plane_id plane_id
;
3988 power_domain
= POWER_DOMAIN_PIPE(pipe
);
3989 wakeref
= intel_display_power_get_if_enabled(dev_priv
, power_domain
);
3993 for_each_plane_id_on_crtc(crtc
, plane_id
)
3994 skl_ddb_get_hw_plane_state(dev_priv
, pipe
,
3999 intel_display_power_put(dev_priv
, power_domain
, wakeref
);
4002 void skl_ddb_get_hw_state(struct drm_i915_private
*dev_priv
,
4003 struct skl_ddb_allocation
*ddb
/* out */)
4005 ddb
->enabled_slices
= intel_enabled_dbuf_slices_num(dev_priv
);
4009 * Determines the downscale amount of a plane for the purposes of watermark calculations.
4010 * The bspec defines downscale amount as:
4013 * Horizontal down scale amount = maximum[1, Horizontal source size /
4014 * Horizontal destination size]
4015 * Vertical down scale amount = maximum[1, Vertical source size /
4016 * Vertical destination size]
4017 * Total down scale amount = Horizontal down scale amount *
4018 * Vertical down scale amount
4021 * Return value is provided in 16.16 fixed point form to retain fractional part.
4022 * Caller should take care of dividing & rounding off the value.
4024 static uint_fixed_16_16_t
4025 skl_plane_downscale_amount(const struct intel_crtc_state
*cstate
,
4026 const struct intel_plane_state
*pstate
)
4028 struct intel_plane
*plane
= to_intel_plane(pstate
->base
.plane
);
4029 u32 src_w
, src_h
, dst_w
, dst_h
;
4030 uint_fixed_16_16_t fp_w_ratio
, fp_h_ratio
;
4031 uint_fixed_16_16_t downscale_h
, downscale_w
;
4033 if (WARN_ON(!intel_wm_plane_visible(cstate
, pstate
)))
4034 return u32_to_fixed16(0);
4036 /* n.b., src is 16.16 fixed point, dst is whole integer */
4037 if (plane
->id
== PLANE_CURSOR
) {
4039 * Cursors only support 0/180 degree rotation,
4040 * hence no need to account for rotation here.
4042 src_w
= pstate
->base
.src_w
>> 16;
4043 src_h
= pstate
->base
.src_h
>> 16;
4044 dst_w
= pstate
->base
.crtc_w
;
4045 dst_h
= pstate
->base
.crtc_h
;
4048 * Src coordinates are already rotated by 270 degrees for
4049 * the 90/270 degree plane rotation cases (to match the
4050 * GTT mapping), hence no need to account for rotation here.
4052 src_w
= drm_rect_width(&pstate
->base
.src
) >> 16;
4053 src_h
= drm_rect_height(&pstate
->base
.src
) >> 16;
4054 dst_w
= drm_rect_width(&pstate
->base
.dst
);
4055 dst_h
= drm_rect_height(&pstate
->base
.dst
);
4058 fp_w_ratio
= div_fixed16(src_w
, dst_w
);
4059 fp_h_ratio
= div_fixed16(src_h
, dst_h
);
4060 downscale_w
= max_fixed16(fp_w_ratio
, u32_to_fixed16(1));
4061 downscale_h
= max_fixed16(fp_h_ratio
, u32_to_fixed16(1));
4063 return mul_fixed16(downscale_w
, downscale_h
);
4066 static uint_fixed_16_16_t
4067 skl_pipe_downscale_amount(const struct intel_crtc_state
*crtc_state
)
4069 uint_fixed_16_16_t pipe_downscale
= u32_to_fixed16(1);
4071 if (!crtc_state
->base
.enable
)
4072 return pipe_downscale
;
4074 if (crtc_state
->pch_pfit
.enabled
) {
4075 u32 src_w
, src_h
, dst_w
, dst_h
;
4076 u32 pfit_size
= crtc_state
->pch_pfit
.size
;
4077 uint_fixed_16_16_t fp_w_ratio
, fp_h_ratio
;
4078 uint_fixed_16_16_t downscale_h
, downscale_w
;
4080 src_w
= crtc_state
->pipe_src_w
;
4081 src_h
= crtc_state
->pipe_src_h
;
4082 dst_w
= pfit_size
>> 16;
4083 dst_h
= pfit_size
& 0xffff;
4085 if (!dst_w
|| !dst_h
)
4086 return pipe_downscale
;
4088 fp_w_ratio
= div_fixed16(src_w
, dst_w
);
4089 fp_h_ratio
= div_fixed16(src_h
, dst_h
);
4090 downscale_w
= max_fixed16(fp_w_ratio
, u32_to_fixed16(1));
4091 downscale_h
= max_fixed16(fp_h_ratio
, u32_to_fixed16(1));
4093 pipe_downscale
= mul_fixed16(downscale_w
, downscale_h
);
4096 return pipe_downscale
;
4099 int skl_check_pipe_max_pixel_rate(struct intel_crtc
*intel_crtc
,
4100 struct intel_crtc_state
*cstate
)
4102 struct drm_i915_private
*dev_priv
= to_i915(intel_crtc
->base
.dev
);
4103 struct drm_crtc_state
*crtc_state
= &cstate
->base
;
4104 struct drm_atomic_state
*state
= crtc_state
->state
;
4105 struct drm_plane
*plane
;
4106 const struct drm_plane_state
*pstate
;
4107 struct intel_plane_state
*intel_pstate
;
4108 int crtc_clock
, dotclk
;
4109 u32 pipe_max_pixel_rate
;
4110 uint_fixed_16_16_t pipe_downscale
;
4111 uint_fixed_16_16_t max_downscale
= u32_to_fixed16(1);
4113 if (!cstate
->base
.enable
)
4116 drm_atomic_crtc_state_for_each_plane_state(plane
, pstate
, crtc_state
) {
4117 uint_fixed_16_16_t plane_downscale
;
4118 uint_fixed_16_16_t fp_9_div_8
= div_fixed16(9, 8);
4121 if (!intel_wm_plane_visible(cstate
,
4122 to_intel_plane_state(pstate
)))
4125 if (WARN_ON(!pstate
->fb
))
4128 intel_pstate
= to_intel_plane_state(pstate
);
4129 plane_downscale
= skl_plane_downscale_amount(cstate
,
4131 bpp
= pstate
->fb
->format
->cpp
[0] * 8;
4133 plane_downscale
= mul_fixed16(plane_downscale
,
4136 max_downscale
= max_fixed16(plane_downscale
, max_downscale
);
4138 pipe_downscale
= skl_pipe_downscale_amount(cstate
);
4140 pipe_downscale
= mul_fixed16(pipe_downscale
, max_downscale
);
4142 crtc_clock
= crtc_state
->adjusted_mode
.crtc_clock
;
4143 dotclk
= to_intel_atomic_state(state
)->cdclk
.logical
.cdclk
;
4145 if (IS_GEMINILAKE(dev_priv
) || INTEL_GEN(dev_priv
) >= 10)
4148 pipe_max_pixel_rate
= div_round_up_u32_fixed16(dotclk
, pipe_downscale
);
4150 if (pipe_max_pixel_rate
< crtc_clock
) {
4151 DRM_DEBUG_KMS("Max supported pixel clock with scaling exceeded\n");
4159 skl_plane_relative_data_rate(const struct intel_crtc_state
*cstate
,
4160 const struct intel_plane_state
*intel_pstate
,
4163 struct intel_plane
*intel_plane
=
4164 to_intel_plane(intel_pstate
->base
.plane
);
4166 u32 width
= 0, height
= 0;
4167 struct drm_framebuffer
*fb
;
4169 uint_fixed_16_16_t down_scale_amount
;
4172 if (!intel_pstate
->base
.visible
)
4175 fb
= intel_pstate
->base
.fb
;
4176 format
= fb
->format
->format
;
4178 if (intel_plane
->id
== PLANE_CURSOR
)
4180 if (plane
== 1 && format
!= DRM_FORMAT_NV12
)
4184 * Src coordinates are already rotated by 270 degrees for
4185 * the 90/270 degree plane rotation cases (to match the
4186 * GTT mapping), hence no need to account for rotation here.
4188 width
= drm_rect_width(&intel_pstate
->base
.src
) >> 16;
4189 height
= drm_rect_height(&intel_pstate
->base
.src
) >> 16;
4191 /* UV plane does 1/2 pixel sub-sampling */
4192 if (plane
== 1 && format
== DRM_FORMAT_NV12
) {
4197 data_rate
= width
* height
;
4199 down_scale_amount
= skl_plane_downscale_amount(cstate
, intel_pstate
);
4201 rate
= mul_round_up_u32_fixed16(data_rate
, down_scale_amount
);
4203 rate
*= fb
->format
->cpp
[plane
];
4208 skl_get_total_relative_data_rate(struct intel_crtc_state
*intel_cstate
,
4209 u64
*plane_data_rate
,
4210 u64
*uv_plane_data_rate
)
4212 struct drm_crtc_state
*cstate
= &intel_cstate
->base
;
4213 struct drm_atomic_state
*state
= cstate
->state
;
4214 struct drm_plane
*plane
;
4215 const struct drm_plane_state
*pstate
;
4216 u64 total_data_rate
= 0;
4218 if (WARN_ON(!state
))
4221 /* Calculate and cache data rate for each plane */
4222 drm_atomic_crtc_state_for_each_plane_state(plane
, pstate
, cstate
) {
4223 enum plane_id plane_id
= to_intel_plane(plane
)->id
;
4225 const struct intel_plane_state
*intel_pstate
=
4226 to_intel_plane_state(pstate
);
4229 rate
= skl_plane_relative_data_rate(intel_cstate
,
4231 plane_data_rate
[plane_id
] = rate
;
4232 total_data_rate
+= rate
;
4235 rate
= skl_plane_relative_data_rate(intel_cstate
,
4237 uv_plane_data_rate
[plane_id
] = rate
;
4238 total_data_rate
+= rate
;
4241 return total_data_rate
;
4245 icl_get_total_relative_data_rate(struct intel_crtc_state
*intel_cstate
,
4246 u64
*plane_data_rate
)
4248 struct drm_crtc_state
*cstate
= &intel_cstate
->base
;
4249 struct drm_atomic_state
*state
= cstate
->state
;
4250 struct drm_plane
*plane
;
4251 const struct drm_plane_state
*pstate
;
4252 u64 total_data_rate
= 0;
4254 if (WARN_ON(!state
))
4257 /* Calculate and cache data rate for each plane */
4258 drm_atomic_crtc_state_for_each_plane_state(plane
, pstate
, cstate
) {
4259 const struct intel_plane_state
*intel_pstate
=
4260 to_intel_plane_state(pstate
);
4261 enum plane_id plane_id
= to_intel_plane(plane
)->id
;
4264 if (!intel_pstate
->linked_plane
) {
4265 rate
= skl_plane_relative_data_rate(intel_cstate
,
4267 plane_data_rate
[plane_id
] = rate
;
4268 total_data_rate
+= rate
;
4270 enum plane_id y_plane_id
;
4273 * The slave plane might not iterate in
4274 * drm_atomic_crtc_state_for_each_plane_state(),
4275 * and needs the master plane state which may be
4276 * NULL if we try get_new_plane_state(), so we
4277 * always calculate from the master.
4279 if (intel_pstate
->slave
)
4282 /* Y plane rate is calculated on the slave */
4283 rate
= skl_plane_relative_data_rate(intel_cstate
,
4285 y_plane_id
= intel_pstate
->linked_plane
->id
;
4286 plane_data_rate
[y_plane_id
] = rate
;
4287 total_data_rate
+= rate
;
4289 rate
= skl_plane_relative_data_rate(intel_cstate
,
4291 plane_data_rate
[plane_id
] = rate
;
4292 total_data_rate
+= rate
;
4296 return total_data_rate
;
4300 skl_allocate_pipe_ddb(struct intel_crtc_state
*cstate
,
4301 struct skl_ddb_allocation
*ddb
/* out */)
4303 struct drm_atomic_state
*state
= cstate
->base
.state
;
4304 struct drm_crtc
*crtc
= cstate
->base
.crtc
;
4305 struct drm_i915_private
*dev_priv
= to_i915(crtc
->dev
);
4306 struct intel_crtc
*intel_crtc
= to_intel_crtc(crtc
);
4307 struct skl_ddb_entry
*alloc
= &cstate
->wm
.skl
.ddb
;
4308 struct skl_plane_wm
*wm
;
4309 u16 alloc_size
, start
= 0;
4310 u16 total
[I915_MAX_PLANES
] = {};
4311 u16 uv_total
[I915_MAX_PLANES
] = {};
4312 u64 total_data_rate
;
4313 enum plane_id plane_id
;
4315 u64 plane_data_rate
[I915_MAX_PLANES
] = {};
4316 u64 uv_plane_data_rate
[I915_MAX_PLANES
] = {};
4320 /* Clear the partitioning for disabled planes. */
4321 memset(cstate
->wm
.skl
.plane_ddb_y
, 0, sizeof(cstate
->wm
.skl
.plane_ddb_y
));
4322 memset(cstate
->wm
.skl
.plane_ddb_uv
, 0, sizeof(cstate
->wm
.skl
.plane_ddb_uv
));
4324 if (WARN_ON(!state
))
4327 if (!cstate
->base
.active
) {
4328 alloc
->start
= alloc
->end
= 0;
4332 if (INTEL_GEN(dev_priv
) < 11)
4334 skl_get_total_relative_data_rate(cstate
,
4336 uv_plane_data_rate
);
4339 icl_get_total_relative_data_rate(cstate
,
4342 skl_ddb_get_pipe_allocation_limits(dev_priv
, cstate
, total_data_rate
,
4343 ddb
, alloc
, &num_active
);
4344 alloc_size
= skl_ddb_entry_size(alloc
);
4345 if (alloc_size
== 0)
4348 /* Allocate fixed number of blocks for cursor. */
4349 total
[PLANE_CURSOR
] = skl_cursor_allocation(num_active
);
4350 alloc_size
-= total
[PLANE_CURSOR
];
4351 cstate
->wm
.skl
.plane_ddb_y
[PLANE_CURSOR
].start
=
4352 alloc
->end
- total
[PLANE_CURSOR
];
4353 cstate
->wm
.skl
.plane_ddb_y
[PLANE_CURSOR
].end
= alloc
->end
;
4355 if (total_data_rate
== 0)
4359 * Find the highest watermark level for which we can satisfy the block
4360 * requirement of active planes.
4362 for (level
= ilk_wm_max_level(dev_priv
); level
>= 0; level
--) {
4364 for_each_plane_id_on_crtc(intel_crtc
, plane_id
) {
4365 if (plane_id
== PLANE_CURSOR
)
4368 wm
= &cstate
->wm
.skl
.optimal
.planes
[plane_id
];
4369 blocks
+= wm
->wm
[level
].min_ddb_alloc
;
4370 blocks
+= wm
->uv_wm
[level
].min_ddb_alloc
;
4373 if (blocks
< alloc_size
) {
4374 alloc_size
-= blocks
;
4380 DRM_DEBUG_KMS("Requested display configuration exceeds system DDB limitations");
4381 DRM_DEBUG_KMS("minimum required %d/%d\n", blocks
,
4387 * Grant each plane the blocks it requires at the highest achievable
4388 * watermark level, plus an extra share of the leftover blocks
4389 * proportional to its relative data rate.
4391 for_each_plane_id_on_crtc(intel_crtc
, plane_id
) {
4395 if (plane_id
== PLANE_CURSOR
)
4399 * We've accounted for all active planes; remaining planes are
4402 if (total_data_rate
== 0)
4405 wm
= &cstate
->wm
.skl
.optimal
.planes
[plane_id
];
4407 rate
= plane_data_rate
[plane_id
];
4408 extra
= min_t(u16
, alloc_size
,
4409 DIV64_U64_ROUND_UP(alloc_size
* rate
,
4411 total
[plane_id
] = wm
->wm
[level
].min_ddb_alloc
+ extra
;
4412 alloc_size
-= extra
;
4413 total_data_rate
-= rate
;
4415 if (total_data_rate
== 0)
4418 rate
= uv_plane_data_rate
[plane_id
];
4419 extra
= min_t(u16
, alloc_size
,
4420 DIV64_U64_ROUND_UP(alloc_size
* rate
,
4422 uv_total
[plane_id
] = wm
->uv_wm
[level
].min_ddb_alloc
+ extra
;
4423 alloc_size
-= extra
;
4424 total_data_rate
-= rate
;
4426 WARN_ON(alloc_size
!= 0 || total_data_rate
!= 0);
4428 /* Set the actual DDB start/end points for each plane */
4429 start
= alloc
->start
;
4430 for_each_plane_id_on_crtc(intel_crtc
, plane_id
) {
4431 struct skl_ddb_entry
*plane_alloc
, *uv_plane_alloc
;
4433 if (plane_id
== PLANE_CURSOR
)
4436 plane_alloc
= &cstate
->wm
.skl
.plane_ddb_y
[plane_id
];
4437 uv_plane_alloc
= &cstate
->wm
.skl
.plane_ddb_uv
[plane_id
];
4439 /* Gen11+ uses a separate plane for UV watermarks */
4440 WARN_ON(INTEL_GEN(dev_priv
) >= 11 && uv_total
[plane_id
]);
4442 /* Leave disabled planes at (0,0) */
4443 if (total
[plane_id
]) {
4444 plane_alloc
->start
= start
;
4445 start
+= total
[plane_id
];
4446 plane_alloc
->end
= start
;
4449 if (uv_total
[plane_id
]) {
4450 uv_plane_alloc
->start
= start
;
4451 start
+= uv_total
[plane_id
];
4452 uv_plane_alloc
->end
= start
;
4457 * When we calculated watermark values we didn't know how high
4458 * of a level we'd actually be able to hit, so we just marked
4459 * all levels as "enabled." Go back now and disable the ones
4460 * that aren't actually possible.
4462 for (level
++; level
<= ilk_wm_max_level(dev_priv
); level
++) {
4463 for_each_plane_id_on_crtc(intel_crtc
, plane_id
) {
4464 wm
= &cstate
->wm
.skl
.optimal
.planes
[plane_id
];
4465 memset(&wm
->wm
[level
], 0, sizeof(wm
->wm
[level
]));
4470 * Go back and disable the transition watermark if it turns out we
4471 * don't have enough DDB blocks for it.
4473 for_each_plane_id_on_crtc(intel_crtc
, plane_id
) {
4474 wm
= &cstate
->wm
.skl
.optimal
.planes
[plane_id
];
4475 if (wm
->trans_wm
.plane_res_b
>= total
[plane_id
])
4476 memset(&wm
->trans_wm
, 0, sizeof(wm
->trans_wm
));
4483 * The max latency should be 257 (max the punit can code is 255 and we add 2us
4484 * for the read latency) and cpp should always be <= 8, so that
4485 * should allow pixel_rate up to ~2 GHz which seems sufficient since max
4486 * 2xcdclk is 1350 MHz and the pixel rate should never exceed that.
4488 static uint_fixed_16_16_t
4489 skl_wm_method1(const struct drm_i915_private
*dev_priv
, u32 pixel_rate
,
4490 u8 cpp
, u32 latency
, u32 dbuf_block_size
)
4492 u32 wm_intermediate_val
;
4493 uint_fixed_16_16_t ret
;
4496 return FP_16_16_MAX
;
4498 wm_intermediate_val
= latency
* pixel_rate
* cpp
;
4499 ret
= div_fixed16(wm_intermediate_val
, 1000 * dbuf_block_size
);
4501 if (INTEL_GEN(dev_priv
) >= 10)
4502 ret
= add_fixed16_u32(ret
, 1);
4507 static uint_fixed_16_16_t
4508 skl_wm_method2(u32 pixel_rate
, u32 pipe_htotal
, u32 latency
,
4509 uint_fixed_16_16_t plane_blocks_per_line
)
4511 u32 wm_intermediate_val
;
4512 uint_fixed_16_16_t ret
;
4515 return FP_16_16_MAX
;
4517 wm_intermediate_val
= latency
* pixel_rate
;
4518 wm_intermediate_val
= DIV_ROUND_UP(wm_intermediate_val
,
4519 pipe_htotal
* 1000);
4520 ret
= mul_u32_fixed16(wm_intermediate_val
, plane_blocks_per_line
);
4524 static uint_fixed_16_16_t
4525 intel_get_linetime_us(const struct intel_crtc_state
*cstate
)
4529 uint_fixed_16_16_t linetime_us
;
4531 if (!cstate
->base
.active
)
4532 return u32_to_fixed16(0);
4534 pixel_rate
= cstate
->pixel_rate
;
4536 if (WARN_ON(pixel_rate
== 0))
4537 return u32_to_fixed16(0);
4539 crtc_htotal
= cstate
->base
.adjusted_mode
.crtc_htotal
;
4540 linetime_us
= div_fixed16(crtc_htotal
* 1000, pixel_rate
);
4546 skl_adjusted_plane_pixel_rate(const struct intel_crtc_state
*cstate
,
4547 const struct intel_plane_state
*pstate
)
4549 u64 adjusted_pixel_rate
;
4550 uint_fixed_16_16_t downscale_amount
;
4552 /* Shouldn't reach here on disabled planes... */
4553 if (WARN_ON(!intel_wm_plane_visible(cstate
, pstate
)))
4557 * Adjusted plane pixel rate is just the pipe's adjusted pixel rate
4558 * with additional adjustments for plane-specific scaling.
4560 adjusted_pixel_rate
= cstate
->pixel_rate
;
4561 downscale_amount
= skl_plane_downscale_amount(cstate
, pstate
);
4563 return mul_round_up_u32_fixed16(adjusted_pixel_rate
,
4568 skl_compute_plane_wm_params(const struct intel_crtc_state
*cstate
,
4569 const struct intel_plane_state
*intel_pstate
,
4570 struct skl_wm_params
*wp
, int color_plane
)
4572 struct intel_plane
*plane
= to_intel_plane(intel_pstate
->base
.plane
);
4573 struct drm_i915_private
*dev_priv
= to_i915(plane
->base
.dev
);
4574 const struct drm_plane_state
*pstate
= &intel_pstate
->base
;
4575 const struct drm_framebuffer
*fb
= pstate
->fb
;
4578 /* only NV12 format has two planes */
4579 if (color_plane
== 1 && fb
->format
->format
!= DRM_FORMAT_NV12
) {
4580 DRM_DEBUG_KMS("Non NV12 format have single plane\n");
4584 wp
->y_tiled
= fb
->modifier
== I915_FORMAT_MOD_Y_TILED
||
4585 fb
->modifier
== I915_FORMAT_MOD_Yf_TILED
||
4586 fb
->modifier
== I915_FORMAT_MOD_Y_TILED_CCS
||
4587 fb
->modifier
== I915_FORMAT_MOD_Yf_TILED_CCS
;
4588 wp
->x_tiled
= fb
->modifier
== I915_FORMAT_MOD_X_TILED
;
4589 wp
->rc_surface
= fb
->modifier
== I915_FORMAT_MOD_Y_TILED_CCS
||
4590 fb
->modifier
== I915_FORMAT_MOD_Yf_TILED_CCS
;
4591 wp
->is_planar
= fb
->format
->format
== DRM_FORMAT_NV12
;
4593 if (plane
->id
== PLANE_CURSOR
) {
4594 wp
->width
= intel_pstate
->base
.crtc_w
;
4597 * Src coordinates are already rotated by 270 degrees for
4598 * the 90/270 degree plane rotation cases (to match the
4599 * GTT mapping), hence no need to account for rotation here.
4601 wp
->width
= drm_rect_width(&intel_pstate
->base
.src
) >> 16;
4604 if (color_plane
== 1 && wp
->is_planar
)
4607 wp
->cpp
= fb
->format
->cpp
[color_plane
];
4608 wp
->plane_pixel_rate
= skl_adjusted_plane_pixel_rate(cstate
,
4611 if (INTEL_GEN(dev_priv
) >= 11 &&
4612 fb
->modifier
== I915_FORMAT_MOD_Yf_TILED
&& wp
->cpp
== 1)
4613 wp
->dbuf_block_size
= 256;
4615 wp
->dbuf_block_size
= 512;
4617 if (drm_rotation_90_or_270(pstate
->rotation
)) {
4621 wp
->y_min_scanlines
= 16;
4624 wp
->y_min_scanlines
= 8;
4627 wp
->y_min_scanlines
= 4;
4630 MISSING_CASE(wp
->cpp
);
4634 wp
->y_min_scanlines
= 4;
4637 if (skl_needs_memory_bw_wa(dev_priv
))
4638 wp
->y_min_scanlines
*= 2;
4640 wp
->plane_bytes_per_line
= wp
->width
* wp
->cpp
;
4642 interm_pbpl
= DIV_ROUND_UP(wp
->plane_bytes_per_line
*
4643 wp
->y_min_scanlines
,
4644 wp
->dbuf_block_size
);
4646 if (INTEL_GEN(dev_priv
) >= 10)
4649 wp
->plane_blocks_per_line
= div_fixed16(interm_pbpl
,
4650 wp
->y_min_scanlines
);
4651 } else if (wp
->x_tiled
&& IS_GEN(dev_priv
, 9)) {
4652 interm_pbpl
= DIV_ROUND_UP(wp
->plane_bytes_per_line
,
4653 wp
->dbuf_block_size
);
4654 wp
->plane_blocks_per_line
= u32_to_fixed16(interm_pbpl
);
4656 interm_pbpl
= DIV_ROUND_UP(wp
->plane_bytes_per_line
,
4657 wp
->dbuf_block_size
) + 1;
4658 wp
->plane_blocks_per_line
= u32_to_fixed16(interm_pbpl
);
4661 wp
->y_tile_minimum
= mul_u32_fixed16(wp
->y_min_scanlines
,
4662 wp
->plane_blocks_per_line
);
4663 wp
->linetime_us
= fixed16_to_u32_round_up(
4664 intel_get_linetime_us(cstate
));
4669 static bool skl_wm_has_lines(struct drm_i915_private
*dev_priv
, int level
)
4671 if (INTEL_GEN(dev_priv
) >= 10 || IS_GEMINILAKE(dev_priv
))
4674 /* The number of lines are ignored for the level 0 watermark. */
4678 static void skl_compute_plane_wm(const struct intel_crtc_state
*cstate
,
4679 const struct intel_plane_state
*intel_pstate
,
4681 const struct skl_wm_params
*wp
,
4682 const struct skl_wm_level
*result_prev
,
4683 struct skl_wm_level
*result
/* out */)
4685 struct drm_i915_private
*dev_priv
=
4686 to_i915(intel_pstate
->base
.plane
->dev
);
4687 u32 latency
= dev_priv
->wm
.skl_latency
[level
];
4688 uint_fixed_16_16_t method1
, method2
;
4689 uint_fixed_16_16_t selected_result
;
4690 u32 res_blocks
, res_lines
, min_ddb_alloc
= 0;
4694 result
->min_ddb_alloc
= U16_MAX
;
4698 /* Display WA #1141: kbl,cfl */
4699 if ((IS_KABYLAKE(dev_priv
) || IS_COFFEELAKE(dev_priv
) ||
4700 IS_CNL_REVID(dev_priv
, CNL_REVID_A0
, CNL_REVID_B0
)) &&
4701 dev_priv
->ipc_enabled
)
4704 if (skl_needs_memory_bw_wa(dev_priv
) && wp
->x_tiled
)
4707 method1
= skl_wm_method1(dev_priv
, wp
->plane_pixel_rate
,
4708 wp
->cpp
, latency
, wp
->dbuf_block_size
);
4709 method2
= skl_wm_method2(wp
->plane_pixel_rate
,
4710 cstate
->base
.adjusted_mode
.crtc_htotal
,
4712 wp
->plane_blocks_per_line
);
4715 selected_result
= max_fixed16(method2
, wp
->y_tile_minimum
);
4717 if ((wp
->cpp
* cstate
->base
.adjusted_mode
.crtc_htotal
/
4718 wp
->dbuf_block_size
< 1) &&
4719 (wp
->plane_bytes_per_line
/ wp
->dbuf_block_size
< 1)) {
4720 selected_result
= method2
;
4721 } else if (latency
>= wp
->linetime_us
) {
4722 if (IS_GEN(dev_priv
, 9) &&
4723 !IS_GEMINILAKE(dev_priv
))
4724 selected_result
= min_fixed16(method1
, method2
);
4726 selected_result
= method2
;
4728 selected_result
= method1
;
4732 res_blocks
= fixed16_to_u32_round_up(selected_result
) + 1;
4733 res_lines
= div_round_up_fixed16(selected_result
,
4734 wp
->plane_blocks_per_line
);
4736 if (IS_GEN9_BC(dev_priv
) || IS_BROXTON(dev_priv
)) {
4737 /* Display WA #1125: skl,bxt,kbl */
4738 if (level
== 0 && wp
->rc_surface
)
4740 fixed16_to_u32_round_up(wp
->y_tile_minimum
);
4742 /* Display WA #1126: skl,bxt,kbl */
4743 if (level
>= 1 && level
<= 7) {
4746 fixed16_to_u32_round_up(wp
->y_tile_minimum
);
4747 res_lines
+= wp
->y_min_scanlines
;
4753 * Make sure result blocks for higher latency levels are
4754 * atleast as high as level below the current level.
4755 * Assumption in DDB algorithm optimization for special
4756 * cases. Also covers Display WA #1125 for RC.
4758 if (result_prev
->plane_res_b
> res_blocks
)
4759 res_blocks
= result_prev
->plane_res_b
;
4763 if (INTEL_GEN(dev_priv
) >= 11) {
4767 if (res_lines
% wp
->y_min_scanlines
== 0)
4768 extra_lines
= wp
->y_min_scanlines
;
4770 extra_lines
= wp
->y_min_scanlines
* 2 -
4771 res_lines
% wp
->y_min_scanlines
;
4773 min_ddb_alloc
= mul_round_up_u32_fixed16(res_lines
+ extra_lines
,
4774 wp
->plane_blocks_per_line
);
4776 min_ddb_alloc
= res_blocks
+
4777 DIV_ROUND_UP(res_blocks
, 10);
4781 if (!skl_wm_has_lines(dev_priv
, level
))
4784 if (res_lines
> 31) {
4786 result
->min_ddb_alloc
= U16_MAX
;
4791 * If res_lines is valid, assume we can use this watermark level
4792 * for now. We'll come back and disable it after we calculate the
4793 * DDB allocation if it turns out we don't actually have enough
4794 * blocks to satisfy it.
4796 result
->plane_res_b
= res_blocks
;
4797 result
->plane_res_l
= res_lines
;
4798 /* Bspec says: value >= plane ddb allocation -> invalid, hence the +1 here */
4799 result
->min_ddb_alloc
= max(min_ddb_alloc
, res_blocks
) + 1;
4800 result
->plane_en
= true;
4804 skl_compute_wm_levels(const struct intel_crtc_state
*cstate
,
4805 const struct intel_plane_state
*intel_pstate
,
4806 const struct skl_wm_params
*wm_params
,
4807 struct skl_wm_level
*levels
)
4809 struct drm_i915_private
*dev_priv
=
4810 to_i915(intel_pstate
->base
.plane
->dev
);
4811 int level
, max_level
= ilk_wm_max_level(dev_priv
);
4812 struct skl_wm_level
*result_prev
= &levels
[0];
4814 for (level
= 0; level
<= max_level
; level
++) {
4815 struct skl_wm_level
*result
= &levels
[level
];
4817 skl_compute_plane_wm(cstate
, intel_pstate
, level
, wm_params
,
4818 result_prev
, result
);
4820 result_prev
= result
;
4825 skl_compute_linetime_wm(const struct intel_crtc_state
*cstate
)
4827 struct drm_atomic_state
*state
= cstate
->base
.state
;
4828 struct drm_i915_private
*dev_priv
= to_i915(state
->dev
);
4829 uint_fixed_16_16_t linetime_us
;
4832 linetime_us
= intel_get_linetime_us(cstate
);
4833 linetime_wm
= fixed16_to_u32_round_up(mul_u32_fixed16(8, linetime_us
));
4835 /* Display WA #1135: BXT:ALL GLK:ALL */
4836 if (IS_GEN9_LP(dev_priv
) && dev_priv
->ipc_enabled
)
4842 static void skl_compute_transition_wm(const struct intel_crtc_state
*cstate
,
4843 const struct skl_wm_params
*wp
,
4844 struct skl_plane_wm
*wm
)
4846 struct drm_device
*dev
= cstate
->base
.crtc
->dev
;
4847 const struct drm_i915_private
*dev_priv
= to_i915(dev
);
4848 u16 trans_min
, trans_y_tile_min
;
4849 const u16 trans_amount
= 10; /* This is configurable amount */
4850 u16 wm0_sel_res_b
, trans_offset_b
, res_blocks
;
4852 /* Transition WM are not recommended by HW team for GEN9 */
4853 if (INTEL_GEN(dev_priv
) <= 9)
4856 /* Transition WM don't make any sense if ipc is disabled */
4857 if (!dev_priv
->ipc_enabled
)
4861 if (INTEL_GEN(dev_priv
) >= 11)
4864 trans_offset_b
= trans_min
+ trans_amount
;
4867 * The spec asks for Selected Result Blocks for wm0 (the real value),
4868 * not Result Blocks (the integer value). Pay attention to the capital
4869 * letters. The value wm_l0->plane_res_b is actually Result Blocks, but
4870 * since Result Blocks is the ceiling of Selected Result Blocks plus 1,
4871 * and since we later will have to get the ceiling of the sum in the
4872 * transition watermarks calculation, we can just pretend Selected
4873 * Result Blocks is Result Blocks minus 1 and it should work for the
4874 * current platforms.
4876 wm0_sel_res_b
= wm
->wm
[0].plane_res_b
- 1;
4880 (u16
)mul_round_up_u32_fixed16(2, wp
->y_tile_minimum
);
4881 res_blocks
= max(wm0_sel_res_b
, trans_y_tile_min
) +
4884 res_blocks
= wm0_sel_res_b
+ trans_offset_b
;
4886 /* WA BUG:1938466 add one block for non y-tile planes */
4887 if (IS_CNL_REVID(dev_priv
, CNL_REVID_A0
, CNL_REVID_A0
))
4893 * Just assume we can enable the transition watermark. After
4894 * computing the DDB we'll come back and disable it if that
4895 * assumption turns out to be false.
4897 wm
->trans_wm
.plane_res_b
= res_blocks
+ 1;
4898 wm
->trans_wm
.plane_en
= true;
4901 static int skl_build_plane_wm_single(struct intel_crtc_state
*crtc_state
,
4902 const struct intel_plane_state
*plane_state
,
4903 enum plane_id plane_id
, int color_plane
)
4905 struct skl_plane_wm
*wm
= &crtc_state
->wm
.skl
.optimal
.planes
[plane_id
];
4906 struct skl_wm_params wm_params
;
4909 ret
= skl_compute_plane_wm_params(crtc_state
, plane_state
,
4910 &wm_params
, color_plane
);
4914 skl_compute_wm_levels(crtc_state
, plane_state
, &wm_params
, wm
->wm
);
4915 skl_compute_transition_wm(crtc_state
, &wm_params
, wm
);
4920 static int skl_build_plane_wm_uv(struct intel_crtc_state
*crtc_state
,
4921 const struct intel_plane_state
*plane_state
,
4922 enum plane_id plane_id
)
4924 struct skl_plane_wm
*wm
= &crtc_state
->wm
.skl
.optimal
.planes
[plane_id
];
4925 struct skl_wm_params wm_params
;
4928 wm
->is_planar
= true;
4930 /* uv plane watermarks must also be validated for NV12/Planar */
4931 ret
= skl_compute_plane_wm_params(crtc_state
, plane_state
,
4936 skl_compute_wm_levels(crtc_state
, plane_state
, &wm_params
, wm
->uv_wm
);
4941 static int skl_build_plane_wm(struct skl_pipe_wm
*pipe_wm
,
4942 struct intel_crtc_state
*crtc_state
,
4943 const struct intel_plane_state
*plane_state
)
4945 struct intel_plane
*plane
= to_intel_plane(plane_state
->base
.plane
);
4946 const struct drm_framebuffer
*fb
= plane_state
->base
.fb
;
4947 enum plane_id plane_id
= plane
->id
;
4950 if (!intel_wm_plane_visible(crtc_state
, plane_state
))
4953 ret
= skl_build_plane_wm_single(crtc_state
, plane_state
,
4958 if (fb
->format
->is_yuv
&& fb
->format
->num_planes
> 1) {
4959 ret
= skl_build_plane_wm_uv(crtc_state
, plane_state
,
4968 static int icl_build_plane_wm(struct skl_pipe_wm
*pipe_wm
,
4969 struct intel_crtc_state
*crtc_state
,
4970 const struct intel_plane_state
*plane_state
)
4972 enum plane_id plane_id
= to_intel_plane(plane_state
->base
.plane
)->id
;
4975 /* Watermarks calculated in master */
4976 if (plane_state
->slave
)
4979 if (plane_state
->linked_plane
) {
4980 const struct drm_framebuffer
*fb
= plane_state
->base
.fb
;
4981 enum plane_id y_plane_id
= plane_state
->linked_plane
->id
;
4983 WARN_ON(!intel_wm_plane_visible(crtc_state
, plane_state
));
4984 WARN_ON(!fb
->format
->is_yuv
||
4985 fb
->format
->num_planes
== 1);
4987 ret
= skl_build_plane_wm_single(crtc_state
, plane_state
,
4992 ret
= skl_build_plane_wm_single(crtc_state
, plane_state
,
4996 } else if (intel_wm_plane_visible(crtc_state
, plane_state
)) {
4997 ret
= skl_build_plane_wm_single(crtc_state
, plane_state
,
5006 static int skl_build_pipe_wm(struct intel_crtc_state
*cstate
,
5007 struct skl_pipe_wm
*pipe_wm
)
5009 struct drm_i915_private
*dev_priv
= to_i915(cstate
->base
.crtc
->dev
);
5010 struct drm_crtc_state
*crtc_state
= &cstate
->base
;
5011 struct drm_plane
*plane
;
5012 const struct drm_plane_state
*pstate
;
5016 * We'll only calculate watermarks for planes that are actually
5017 * enabled, so make sure all other planes are set as disabled.
5019 memset(pipe_wm
->planes
, 0, sizeof(pipe_wm
->planes
));
5021 drm_atomic_crtc_state_for_each_plane_state(plane
, pstate
, crtc_state
) {
5022 const struct intel_plane_state
*intel_pstate
=
5023 to_intel_plane_state(pstate
);
5025 if (INTEL_GEN(dev_priv
) >= 11)
5026 ret
= icl_build_plane_wm(pipe_wm
,
5027 cstate
, intel_pstate
);
5029 ret
= skl_build_plane_wm(pipe_wm
,
5030 cstate
, intel_pstate
);
5035 pipe_wm
->linetime
= skl_compute_linetime_wm(cstate
);
5040 static void skl_ddb_entry_write(struct drm_i915_private
*dev_priv
,
5042 const struct skl_ddb_entry
*entry
)
5045 I915_WRITE_FW(reg
, (entry
->end
- 1) << 16 | entry
->start
);
5047 I915_WRITE_FW(reg
, 0);
5050 static void skl_write_wm_level(struct drm_i915_private
*dev_priv
,
5052 const struct skl_wm_level
*level
)
5056 if (level
->plane_en
) {
5058 val
|= level
->plane_res_b
;
5059 val
|= level
->plane_res_l
<< PLANE_WM_LINES_SHIFT
;
5062 I915_WRITE_FW(reg
, val
);
5065 void skl_write_plane_wm(struct intel_plane
*plane
,
5066 const struct intel_crtc_state
*crtc_state
)
5068 struct drm_i915_private
*dev_priv
= to_i915(plane
->base
.dev
);
5069 int level
, max_level
= ilk_wm_max_level(dev_priv
);
5070 enum plane_id plane_id
= plane
->id
;
5071 enum pipe pipe
= plane
->pipe
;
5072 const struct skl_plane_wm
*wm
=
5073 &crtc_state
->wm
.skl
.optimal
.planes
[plane_id
];
5074 const struct skl_ddb_entry
*ddb_y
=
5075 &crtc_state
->wm
.skl
.plane_ddb_y
[plane_id
];
5076 const struct skl_ddb_entry
*ddb_uv
=
5077 &crtc_state
->wm
.skl
.plane_ddb_uv
[plane_id
];
5079 for (level
= 0; level
<= max_level
; level
++) {
5080 skl_write_wm_level(dev_priv
, PLANE_WM(pipe
, plane_id
, level
),
5083 skl_write_wm_level(dev_priv
, PLANE_WM_TRANS(pipe
, plane_id
),
5086 if (INTEL_GEN(dev_priv
) >= 11) {
5087 skl_ddb_entry_write(dev_priv
,
5088 PLANE_BUF_CFG(pipe
, plane_id
), ddb_y
);
5093 swap(ddb_y
, ddb_uv
);
5095 skl_ddb_entry_write(dev_priv
,
5096 PLANE_BUF_CFG(pipe
, plane_id
), ddb_y
);
5097 skl_ddb_entry_write(dev_priv
,
5098 PLANE_NV12_BUF_CFG(pipe
, plane_id
), ddb_uv
);
5101 void skl_write_cursor_wm(struct intel_plane
*plane
,
5102 const struct intel_crtc_state
*crtc_state
)
5104 struct drm_i915_private
*dev_priv
= to_i915(plane
->base
.dev
);
5105 int level
, max_level
= ilk_wm_max_level(dev_priv
);
5106 enum plane_id plane_id
= plane
->id
;
5107 enum pipe pipe
= plane
->pipe
;
5108 const struct skl_plane_wm
*wm
=
5109 &crtc_state
->wm
.skl
.optimal
.planes
[plane_id
];
5110 const struct skl_ddb_entry
*ddb
=
5111 &crtc_state
->wm
.skl
.plane_ddb_y
[plane_id
];
5113 for (level
= 0; level
<= max_level
; level
++) {
5114 skl_write_wm_level(dev_priv
, CUR_WM(pipe
, level
),
5117 skl_write_wm_level(dev_priv
, CUR_WM_TRANS(pipe
), &wm
->trans_wm
);
5119 skl_ddb_entry_write(dev_priv
, CUR_BUF_CFG(pipe
), ddb
);
5122 bool skl_wm_level_equals(const struct skl_wm_level
*l1
,
5123 const struct skl_wm_level
*l2
)
5125 return l1
->plane_en
== l2
->plane_en
&&
5126 l1
->plane_res_l
== l2
->plane_res_l
&&
5127 l1
->plane_res_b
== l2
->plane_res_b
;
5130 static bool skl_plane_wm_equals(struct drm_i915_private
*dev_priv
,
5131 const struct skl_plane_wm
*wm1
,
5132 const struct skl_plane_wm
*wm2
)
5134 int level
, max_level
= ilk_wm_max_level(dev_priv
);
5136 for (level
= 0; level
<= max_level
; level
++) {
5137 if (!skl_wm_level_equals(&wm1
->wm
[level
], &wm2
->wm
[level
]) ||
5138 !skl_wm_level_equals(&wm1
->uv_wm
[level
], &wm2
->uv_wm
[level
]))
5142 return skl_wm_level_equals(&wm1
->trans_wm
, &wm2
->trans_wm
);
5145 static bool skl_pipe_wm_equals(struct intel_crtc
*crtc
,
5146 const struct skl_pipe_wm
*wm1
,
5147 const struct skl_pipe_wm
*wm2
)
5149 struct drm_i915_private
*dev_priv
= to_i915(crtc
->base
.dev
);
5150 enum plane_id plane_id
;
5152 for_each_plane_id_on_crtc(crtc
, plane_id
) {
5153 if (!skl_plane_wm_equals(dev_priv
,
5154 &wm1
->planes
[plane_id
],
5155 &wm2
->planes
[plane_id
]))
5159 return wm1
->linetime
== wm2
->linetime
;
5162 static inline bool skl_ddb_entries_overlap(const struct skl_ddb_entry
*a
,
5163 const struct skl_ddb_entry
*b
)
5165 return a
->start
< b
->end
&& b
->start
< a
->end
;
5168 bool skl_ddb_allocation_overlaps(const struct skl_ddb_entry
*ddb
,
5169 const struct skl_ddb_entry entries
[],
5170 int num_entries
, int ignore_idx
)
5174 for (i
= 0; i
< num_entries
; i
++) {
5175 if (i
!= ignore_idx
&&
5176 skl_ddb_entries_overlap(ddb
, &entries
[i
]))
5183 static int skl_update_pipe_wm(struct intel_crtc_state
*cstate
,
5184 const struct skl_pipe_wm
*old_pipe_wm
,
5185 struct skl_pipe_wm
*pipe_wm
, /* out */
5186 bool *changed
/* out */)
5188 struct intel_crtc
*crtc
= to_intel_crtc(cstate
->base
.crtc
);
5191 ret
= skl_build_pipe_wm(cstate
, pipe_wm
);
5195 *changed
= !skl_pipe_wm_equals(crtc
, old_pipe_wm
, pipe_wm
);
5201 pipes_modified(struct intel_atomic_state
*state
)
5203 struct intel_crtc
*crtc
;
5204 struct intel_crtc_state
*cstate
;
5207 for_each_new_intel_crtc_in_state(state
, crtc
, cstate
, i
)
5208 ret
|= drm_crtc_mask(&crtc
->base
);
5214 skl_ddb_add_affected_planes(const struct intel_crtc_state
*old_crtc_state
,
5215 struct intel_crtc_state
*new_crtc_state
)
5217 struct intel_atomic_state
*state
= to_intel_atomic_state(new_crtc_state
->base
.state
);
5218 struct intel_crtc
*crtc
= to_intel_crtc(new_crtc_state
->base
.crtc
);
5219 struct drm_i915_private
*dev_priv
= to_i915(crtc
->base
.dev
);
5220 struct intel_plane
*plane
;
5222 for_each_intel_plane_on_crtc(&dev_priv
->drm
, crtc
, plane
) {
5223 struct intel_plane_state
*plane_state
;
5224 enum plane_id plane_id
= plane
->id
;
5226 if (skl_ddb_entry_equal(&old_crtc_state
->wm
.skl
.plane_ddb_y
[plane_id
],
5227 &new_crtc_state
->wm
.skl
.plane_ddb_y
[plane_id
]) &&
5228 skl_ddb_entry_equal(&old_crtc_state
->wm
.skl
.plane_ddb_uv
[plane_id
],
5229 &new_crtc_state
->wm
.skl
.plane_ddb_uv
[plane_id
]))
5232 plane_state
= intel_atomic_get_plane_state(state
, plane
);
5233 if (IS_ERR(plane_state
))
5234 return PTR_ERR(plane_state
);
5236 new_crtc_state
->update_planes
|= BIT(plane_id
);
5243 skl_compute_ddb(struct intel_atomic_state
*state
)
5245 const struct drm_i915_private
*dev_priv
= to_i915(state
->base
.dev
);
5246 struct skl_ddb_allocation
*ddb
= &state
->wm_results
.ddb
;
5247 struct intel_crtc_state
*old_crtc_state
;
5248 struct intel_crtc_state
*new_crtc_state
;
5249 struct intel_crtc
*crtc
;
5252 memcpy(ddb
, &dev_priv
->wm
.skl_hw
.ddb
, sizeof(*ddb
));
5254 for_each_oldnew_intel_crtc_in_state(state
, crtc
, old_crtc_state
,
5255 new_crtc_state
, i
) {
5256 ret
= skl_allocate_pipe_ddb(new_crtc_state
, ddb
);
5260 ret
= skl_ddb_add_affected_planes(old_crtc_state
,
5270 skl_print_wm_changes(struct intel_atomic_state
*state
)
5272 struct drm_i915_private
*dev_priv
= to_i915(state
->base
.dev
);
5273 const struct intel_crtc_state
*old_crtc_state
;
5274 const struct intel_crtc_state
*new_crtc_state
;
5275 struct intel_plane
*plane
;
5276 struct intel_crtc
*crtc
;
5279 for_each_oldnew_intel_crtc_in_state(state
, crtc
, old_crtc_state
,
5280 new_crtc_state
, i
) {
5281 for_each_intel_plane_on_crtc(&dev_priv
->drm
, crtc
, plane
) {
5282 enum plane_id plane_id
= plane
->id
;
5283 const struct skl_ddb_entry
*old
, *new;
5285 old
= &old_crtc_state
->wm
.skl
.plane_ddb_y
[plane_id
];
5286 new = &new_crtc_state
->wm
.skl
.plane_ddb_y
[plane_id
];
5288 if (skl_ddb_entry_equal(old
, new))
5291 DRM_DEBUG_KMS("[PLANE:%d:%s] ddb (%d - %d) -> (%d - %d)\n",
5292 plane
->base
.base
.id
, plane
->base
.name
,
5293 old
->start
, old
->end
,
5294 new->start
, new->end
);
5300 skl_ddb_add_affected_pipes(struct intel_atomic_state
*state
, bool *changed
)
5302 struct drm_device
*dev
= state
->base
.dev
;
5303 const struct drm_i915_private
*dev_priv
= to_i915(dev
);
5304 struct intel_crtc
*crtc
;
5305 struct intel_crtc_state
*crtc_state
;
5306 u32 realloc_pipes
= pipes_modified(state
);
5310 * When we distrust bios wm we always need to recompute to set the
5311 * expected DDB allocations for each CRTC.
5313 if (dev_priv
->wm
.distrust_bios_wm
)
5317 * If this transaction isn't actually touching any CRTC's, don't
5318 * bother with watermark calculation. Note that if we pass this
5319 * test, we're guaranteed to hold at least one CRTC state mutex,
5320 * which means we can safely use values like dev_priv->active_crtcs
5321 * since any racing commits that want to update them would need to
5322 * hold _all_ CRTC state mutexes.
5324 for_each_new_intel_crtc_in_state(state
, crtc
, crtc_state
, i
)
5331 * If this is our first atomic update following hardware readout,
5332 * we can't trust the DDB that the BIOS programmed for us. Let's
5333 * pretend that all pipes switched active status so that we'll
5334 * ensure a full DDB recompute.
5336 if (dev_priv
->wm
.distrust_bios_wm
) {
5337 ret
= drm_modeset_lock(&dev
->mode_config
.connection_mutex
,
5338 state
->base
.acquire_ctx
);
5342 state
->active_pipe_changes
= ~0;
5345 * We usually only initialize state->active_crtcs if we
5346 * we're doing a modeset; make sure this field is always
5347 * initialized during the sanitization process that happens
5348 * on the first commit too.
5350 if (!state
->modeset
)
5351 state
->active_crtcs
= dev_priv
->active_crtcs
;
5355 * If the modeset changes which CRTC's are active, we need to
5356 * recompute the DDB allocation for *all* active pipes, even
5357 * those that weren't otherwise being modified in any way by this
5358 * atomic commit. Due to the shrinking of the per-pipe allocations
5359 * when new active CRTC's are added, it's possible for a pipe that
5360 * we were already using and aren't changing at all here to suddenly
5361 * become invalid if its DDB needs exceeds its new allocation.
5363 * Note that if we wind up doing a full DDB recompute, we can't let
5364 * any other display updates race with this transaction, so we need
5365 * to grab the lock on *all* CRTC's.
5367 if (state
->active_pipe_changes
|| state
->modeset
) {
5369 state
->wm_results
.dirty_pipes
= ~0;
5373 * We're not recomputing for the pipes not included in the commit, so
5374 * make sure we start with the current state.
5376 for_each_intel_crtc_mask(dev
, crtc
, realloc_pipes
) {
5377 crtc_state
= intel_atomic_get_crtc_state(&state
->base
, crtc
);
5378 if (IS_ERR(crtc_state
))
5379 return PTR_ERR(crtc_state
);
5386 * To make sure the cursor watermark registers are always consistent
5387 * with our computed state the following scenario needs special
5391 * 2. move cursor entirely offscreen
5394 * Step 2. does call .disable_plane() but does not zero the watermarks
5395 * (since we consider an offscreen cursor still active for the purposes
5396 * of watermarks). Step 3. would not normally call .disable_plane()
5397 * because the actual plane visibility isn't changing, and we don't
5398 * deallocate the cursor ddb until the pipe gets disabled. So we must
5399 * force step 3. to call .disable_plane() to update the watermark
5400 * registers properly.
5402 * Other planes do not suffer from this issues as their watermarks are
5403 * calculated based on the actual plane visibility. The only time this
5404 * can trigger for the other planes is during the initial readout as the
5405 * default value of the watermarks registers is not zero.
5407 static int skl_wm_add_affected_planes(struct intel_atomic_state
*state
,
5408 struct intel_crtc
*crtc
)
5410 struct drm_i915_private
*dev_priv
= to_i915(crtc
->base
.dev
);
5411 const struct intel_crtc_state
*old_crtc_state
=
5412 intel_atomic_get_old_crtc_state(state
, crtc
);
5413 struct intel_crtc_state
*new_crtc_state
=
5414 intel_atomic_get_new_crtc_state(state
, crtc
);
5415 struct intel_plane
*plane
;
5417 for_each_intel_plane_on_crtc(&dev_priv
->drm
, crtc
, plane
) {
5418 struct intel_plane_state
*plane_state
;
5419 enum plane_id plane_id
= plane
->id
;
5422 * Force a full wm update for every plane on modeset.
5423 * Required because the reset value of the wm registers
5424 * is non-zero, whereas we want all disabled planes to
5425 * have zero watermarks. So if we turn off the relevant
5426 * power well the hardware state will go out of sync
5427 * with the software state.
5429 if (!drm_atomic_crtc_needs_modeset(&new_crtc_state
->base
) &&
5430 skl_plane_wm_equals(dev_priv
,
5431 &old_crtc_state
->wm
.skl
.optimal
.planes
[plane_id
],
5432 &new_crtc_state
->wm
.skl
.optimal
.planes
[plane_id
]))
5435 plane_state
= intel_atomic_get_plane_state(state
, plane
);
5436 if (IS_ERR(plane_state
))
5437 return PTR_ERR(plane_state
);
5439 new_crtc_state
->update_planes
|= BIT(plane_id
);
5446 skl_compute_wm(struct intel_atomic_state
*state
)
5448 struct intel_crtc
*crtc
;
5449 struct intel_crtc_state
*cstate
;
5450 struct intel_crtc_state
*old_crtc_state
;
5451 struct skl_ddb_values
*results
= &state
->wm_results
;
5452 struct skl_pipe_wm
*pipe_wm
;
5453 bool changed
= false;
5456 /* Clear all dirty flags */
5457 results
->dirty_pipes
= 0;
5459 ret
= skl_ddb_add_affected_pipes(state
, &changed
);
5460 if (ret
|| !changed
)
5464 * Calculate WM's for all pipes that are part of this transaction.
5465 * Note that skl_ddb_add_affected_pipes may have added more CRTC's that
5466 * weren't otherwise being modified (and set bits in dirty_pipes) if
5467 * pipe allocations had to change.
5469 for_each_oldnew_intel_crtc_in_state(state
, crtc
, old_crtc_state
,
5471 const struct skl_pipe_wm
*old_pipe_wm
=
5472 &old_crtc_state
->wm
.skl
.optimal
;
5474 pipe_wm
= &cstate
->wm
.skl
.optimal
;
5475 ret
= skl_update_pipe_wm(cstate
, old_pipe_wm
, pipe_wm
, &changed
);
5479 ret
= skl_wm_add_affected_planes(state
, crtc
);
5484 results
->dirty_pipes
|= drm_crtc_mask(&crtc
->base
);
5487 ret
= skl_compute_ddb(state
);
5491 skl_print_wm_changes(state
);
5496 static void skl_atomic_update_crtc_wm(struct intel_atomic_state
*state
,
5497 struct intel_crtc_state
*cstate
)
5499 struct intel_crtc
*crtc
= to_intel_crtc(cstate
->base
.crtc
);
5500 struct drm_i915_private
*dev_priv
= to_i915(state
->base
.dev
);
5501 struct skl_pipe_wm
*pipe_wm
= &cstate
->wm
.skl
.optimal
;
5502 enum pipe pipe
= crtc
->pipe
;
5504 if (!(state
->wm_results
.dirty_pipes
& drm_crtc_mask(&crtc
->base
)))
5507 I915_WRITE(PIPE_WM_LINETIME(pipe
), pipe_wm
->linetime
);
5510 static void skl_initial_wm(struct intel_atomic_state
*state
,
5511 struct intel_crtc_state
*cstate
)
5513 struct intel_crtc
*intel_crtc
= to_intel_crtc(cstate
->base
.crtc
);
5514 struct drm_device
*dev
= intel_crtc
->base
.dev
;
5515 struct drm_i915_private
*dev_priv
= to_i915(dev
);
5516 struct skl_ddb_values
*results
= &state
->wm_results
;
5518 if ((results
->dirty_pipes
& drm_crtc_mask(&intel_crtc
->base
)) == 0)
5521 mutex_lock(&dev_priv
->wm
.wm_mutex
);
5523 if (cstate
->base
.active_changed
)
5524 skl_atomic_update_crtc_wm(state
, cstate
);
5526 mutex_unlock(&dev_priv
->wm
.wm_mutex
);
5529 static void ilk_compute_wm_config(struct drm_i915_private
*dev_priv
,
5530 struct intel_wm_config
*config
)
5532 struct intel_crtc
*crtc
;
5534 /* Compute the currently _active_ config */
5535 for_each_intel_crtc(&dev_priv
->drm
, crtc
) {
5536 const struct intel_pipe_wm
*wm
= &crtc
->wm
.active
.ilk
;
5538 if (!wm
->pipe_enabled
)
5541 config
->sprites_enabled
|= wm
->sprites_enabled
;
5542 config
->sprites_scaled
|= wm
->sprites_scaled
;
5543 config
->num_pipes_active
++;
5547 static void ilk_program_watermarks(struct drm_i915_private
*dev_priv
)
5549 struct intel_pipe_wm lp_wm_1_2
= {}, lp_wm_5_6
= {}, *best_lp_wm
;
5550 struct ilk_wm_maximums max
;
5551 struct intel_wm_config config
= {};
5552 struct ilk_wm_values results
= {};
5553 enum intel_ddb_partitioning partitioning
;
5555 ilk_compute_wm_config(dev_priv
, &config
);
5557 ilk_compute_wm_maximums(dev_priv
, 1, &config
, INTEL_DDB_PART_1_2
, &max
);
5558 ilk_wm_merge(dev_priv
, &config
, &max
, &lp_wm_1_2
);
5560 /* 5/6 split only in single pipe config on IVB+ */
5561 if (INTEL_GEN(dev_priv
) >= 7 &&
5562 config
.num_pipes_active
== 1 && config
.sprites_enabled
) {
5563 ilk_compute_wm_maximums(dev_priv
, 1, &config
, INTEL_DDB_PART_5_6
, &max
);
5564 ilk_wm_merge(dev_priv
, &config
, &max
, &lp_wm_5_6
);
5566 best_lp_wm
= ilk_find_best_result(dev_priv
, &lp_wm_1_2
, &lp_wm_5_6
);
5568 best_lp_wm
= &lp_wm_1_2
;
5571 partitioning
= (best_lp_wm
== &lp_wm_1_2
) ?
5572 INTEL_DDB_PART_1_2
: INTEL_DDB_PART_5_6
;
5574 ilk_compute_wm_results(dev_priv
, best_lp_wm
, partitioning
, &results
);
5576 ilk_write_wm_values(dev_priv
, &results
);
5579 static void ilk_initial_watermarks(struct intel_atomic_state
*state
,
5580 struct intel_crtc_state
*cstate
)
5582 struct drm_i915_private
*dev_priv
= to_i915(cstate
->base
.crtc
->dev
);
5583 struct intel_crtc
*intel_crtc
= to_intel_crtc(cstate
->base
.crtc
);
5585 mutex_lock(&dev_priv
->wm
.wm_mutex
);
5586 intel_crtc
->wm
.active
.ilk
= cstate
->wm
.ilk
.intermediate
;
5587 ilk_program_watermarks(dev_priv
);
5588 mutex_unlock(&dev_priv
->wm
.wm_mutex
);
5591 static void ilk_optimize_watermarks(struct intel_atomic_state
*state
,
5592 struct intel_crtc_state
*cstate
)
5594 struct drm_i915_private
*dev_priv
= to_i915(cstate
->base
.crtc
->dev
);
5595 struct intel_crtc
*intel_crtc
= to_intel_crtc(cstate
->base
.crtc
);
5597 mutex_lock(&dev_priv
->wm
.wm_mutex
);
5598 if (cstate
->wm
.need_postvbl_update
) {
5599 intel_crtc
->wm
.active
.ilk
= cstate
->wm
.ilk
.optimal
;
5600 ilk_program_watermarks(dev_priv
);
5602 mutex_unlock(&dev_priv
->wm
.wm_mutex
);
5605 static inline void skl_wm_level_from_reg_val(u32 val
,
5606 struct skl_wm_level
*level
)
5608 level
->plane_en
= val
& PLANE_WM_EN
;
5609 level
->plane_res_b
= val
& PLANE_WM_BLOCKS_MASK
;
5610 level
->plane_res_l
= (val
>> PLANE_WM_LINES_SHIFT
) &
5611 PLANE_WM_LINES_MASK
;
5614 void skl_pipe_wm_get_hw_state(struct intel_crtc
*crtc
,
5615 struct skl_pipe_wm
*out
)
5617 struct drm_i915_private
*dev_priv
= to_i915(crtc
->base
.dev
);
5618 enum pipe pipe
= crtc
->pipe
;
5619 int level
, max_level
;
5620 enum plane_id plane_id
;
5623 max_level
= ilk_wm_max_level(dev_priv
);
5625 for_each_plane_id_on_crtc(crtc
, plane_id
) {
5626 struct skl_plane_wm
*wm
= &out
->planes
[plane_id
];
5628 for (level
= 0; level
<= max_level
; level
++) {
5629 if (plane_id
!= PLANE_CURSOR
)
5630 val
= I915_READ(PLANE_WM(pipe
, plane_id
, level
));
5632 val
= I915_READ(CUR_WM(pipe
, level
));
5634 skl_wm_level_from_reg_val(val
, &wm
->wm
[level
]);
5637 if (plane_id
!= PLANE_CURSOR
)
5638 val
= I915_READ(PLANE_WM_TRANS(pipe
, plane_id
));
5640 val
= I915_READ(CUR_WM_TRANS(pipe
));
5642 skl_wm_level_from_reg_val(val
, &wm
->trans_wm
);
5648 out
->linetime
= I915_READ(PIPE_WM_LINETIME(pipe
));
5651 void skl_wm_get_hw_state(struct drm_i915_private
*dev_priv
)
5653 struct skl_ddb_values
*hw
= &dev_priv
->wm
.skl_hw
;
5654 struct skl_ddb_allocation
*ddb
= &dev_priv
->wm
.skl_hw
.ddb
;
5655 struct intel_crtc
*crtc
;
5656 struct intel_crtc_state
*cstate
;
5658 skl_ddb_get_hw_state(dev_priv
, ddb
);
5659 for_each_intel_crtc(&dev_priv
->drm
, crtc
) {
5660 cstate
= to_intel_crtc_state(crtc
->base
.state
);
5662 skl_pipe_wm_get_hw_state(crtc
, &cstate
->wm
.skl
.optimal
);
5665 hw
->dirty_pipes
|= drm_crtc_mask(&crtc
->base
);
5668 if (dev_priv
->active_crtcs
) {
5669 /* Fully recompute DDB on first atomic commit */
5670 dev_priv
->wm
.distrust_bios_wm
= true;
5674 static void ilk_pipe_wm_get_hw_state(struct intel_crtc
*crtc
)
5676 struct drm_device
*dev
= crtc
->base
.dev
;
5677 struct drm_i915_private
*dev_priv
= to_i915(dev
);
5678 struct ilk_wm_values
*hw
= &dev_priv
->wm
.hw
;
5679 struct intel_crtc_state
*cstate
= to_intel_crtc_state(crtc
->base
.state
);
5680 struct intel_pipe_wm
*active
= &cstate
->wm
.ilk
.optimal
;
5681 enum pipe pipe
= crtc
->pipe
;
5682 static const i915_reg_t wm0_pipe_reg
[] = {
5683 [PIPE_A
] = WM0_PIPEA_ILK
,
5684 [PIPE_B
] = WM0_PIPEB_ILK
,
5685 [PIPE_C
] = WM0_PIPEC_IVB
,
5688 hw
->wm_pipe
[pipe
] = I915_READ(wm0_pipe_reg
[pipe
]);
5689 if (IS_HASWELL(dev_priv
) || IS_BROADWELL(dev_priv
))
5690 hw
->wm_linetime
[pipe
] = I915_READ(PIPE_WM_LINETIME(pipe
));
5692 memset(active
, 0, sizeof(*active
));
5694 active
->pipe_enabled
= crtc
->active
;
5696 if (active
->pipe_enabled
) {
5697 u32 tmp
= hw
->wm_pipe
[pipe
];
5700 * For active pipes LP0 watermark is marked as
5701 * enabled, and LP1+ watermaks as disabled since
5702 * we can't really reverse compute them in case
5703 * multiple pipes are active.
5705 active
->wm
[0].enable
= true;
5706 active
->wm
[0].pri_val
= (tmp
& WM0_PIPE_PLANE_MASK
) >> WM0_PIPE_PLANE_SHIFT
;
5707 active
->wm
[0].spr_val
= (tmp
& WM0_PIPE_SPRITE_MASK
) >> WM0_PIPE_SPRITE_SHIFT
;
5708 active
->wm
[0].cur_val
= tmp
& WM0_PIPE_CURSOR_MASK
;
5709 active
->linetime
= hw
->wm_linetime
[pipe
];
5711 int level
, max_level
= ilk_wm_max_level(dev_priv
);
5714 * For inactive pipes, all watermark levels
5715 * should be marked as enabled but zeroed,
5716 * which is what we'd compute them to.
5718 for (level
= 0; level
<= max_level
; level
++)
5719 active
->wm
[level
].enable
= true;
5722 crtc
->wm
.active
.ilk
= *active
;
5725 #define _FW_WM(value, plane) \
5726 (((value) & DSPFW_ ## plane ## _MASK) >> DSPFW_ ## plane ## _SHIFT)
5727 #define _FW_WM_VLV(value, plane) \
5728 (((value) & DSPFW_ ## plane ## _MASK_VLV) >> DSPFW_ ## plane ## _SHIFT)
5730 static void g4x_read_wm_values(struct drm_i915_private
*dev_priv
,
5731 struct g4x_wm_values
*wm
)
5735 tmp
= I915_READ(DSPFW1
);
5736 wm
->sr
.plane
= _FW_WM(tmp
, SR
);
5737 wm
->pipe
[PIPE_B
].plane
[PLANE_CURSOR
] = _FW_WM(tmp
, CURSORB
);
5738 wm
->pipe
[PIPE_B
].plane
[PLANE_PRIMARY
] = _FW_WM(tmp
, PLANEB
);
5739 wm
->pipe
[PIPE_A
].plane
[PLANE_PRIMARY
] = _FW_WM(tmp
, PLANEA
);
5741 tmp
= I915_READ(DSPFW2
);
5742 wm
->fbc_en
= tmp
& DSPFW_FBC_SR_EN
;
5743 wm
->sr
.fbc
= _FW_WM(tmp
, FBC_SR
);
5744 wm
->hpll
.fbc
= _FW_WM(tmp
, FBC_HPLL_SR
);
5745 wm
->pipe
[PIPE_B
].plane
[PLANE_SPRITE0
] = _FW_WM(tmp
, SPRITEB
);
5746 wm
->pipe
[PIPE_A
].plane
[PLANE_CURSOR
] = _FW_WM(tmp
, CURSORA
);
5747 wm
->pipe
[PIPE_A
].plane
[PLANE_SPRITE0
] = _FW_WM(tmp
, SPRITEA
);
5749 tmp
= I915_READ(DSPFW3
);
5750 wm
->hpll_en
= tmp
& DSPFW_HPLL_SR_EN
;
5751 wm
->sr
.cursor
= _FW_WM(tmp
, CURSOR_SR
);
5752 wm
->hpll
.cursor
= _FW_WM(tmp
, HPLL_CURSOR
);
5753 wm
->hpll
.plane
= _FW_WM(tmp
, HPLL_SR
);
5756 static void vlv_read_wm_values(struct drm_i915_private
*dev_priv
,
5757 struct vlv_wm_values
*wm
)
5762 for_each_pipe(dev_priv
, pipe
) {
5763 tmp
= I915_READ(VLV_DDL(pipe
));
5765 wm
->ddl
[pipe
].plane
[PLANE_PRIMARY
] =
5766 (tmp
>> DDL_PLANE_SHIFT
) & (DDL_PRECISION_HIGH
| DRAIN_LATENCY_MASK
);
5767 wm
->ddl
[pipe
].plane
[PLANE_CURSOR
] =
5768 (tmp
>> DDL_CURSOR_SHIFT
) & (DDL_PRECISION_HIGH
| DRAIN_LATENCY_MASK
);
5769 wm
->ddl
[pipe
].plane
[PLANE_SPRITE0
] =
5770 (tmp
>> DDL_SPRITE_SHIFT(0)) & (DDL_PRECISION_HIGH
| DRAIN_LATENCY_MASK
);
5771 wm
->ddl
[pipe
].plane
[PLANE_SPRITE1
] =
5772 (tmp
>> DDL_SPRITE_SHIFT(1)) & (DDL_PRECISION_HIGH
| DRAIN_LATENCY_MASK
);
5775 tmp
= I915_READ(DSPFW1
);
5776 wm
->sr
.plane
= _FW_WM(tmp
, SR
);
5777 wm
->pipe
[PIPE_B
].plane
[PLANE_CURSOR
] = _FW_WM(tmp
, CURSORB
);
5778 wm
->pipe
[PIPE_B
].plane
[PLANE_PRIMARY
] = _FW_WM_VLV(tmp
, PLANEB
);
5779 wm
->pipe
[PIPE_A
].plane
[PLANE_PRIMARY
] = _FW_WM_VLV(tmp
, PLANEA
);
5781 tmp
= I915_READ(DSPFW2
);
5782 wm
->pipe
[PIPE_A
].plane
[PLANE_SPRITE1
] = _FW_WM_VLV(tmp
, SPRITEB
);
5783 wm
->pipe
[PIPE_A
].plane
[PLANE_CURSOR
] = _FW_WM(tmp
, CURSORA
);
5784 wm
->pipe
[PIPE_A
].plane
[PLANE_SPRITE0
] = _FW_WM_VLV(tmp
, SPRITEA
);
5786 tmp
= I915_READ(DSPFW3
);
5787 wm
->sr
.cursor
= _FW_WM(tmp
, CURSOR_SR
);
5789 if (IS_CHERRYVIEW(dev_priv
)) {
5790 tmp
= I915_READ(DSPFW7_CHV
);
5791 wm
->pipe
[PIPE_B
].plane
[PLANE_SPRITE1
] = _FW_WM_VLV(tmp
, SPRITED
);
5792 wm
->pipe
[PIPE_B
].plane
[PLANE_SPRITE0
] = _FW_WM_VLV(tmp
, SPRITEC
);
5794 tmp
= I915_READ(DSPFW8_CHV
);
5795 wm
->pipe
[PIPE_C
].plane
[PLANE_SPRITE1
] = _FW_WM_VLV(tmp
, SPRITEF
);
5796 wm
->pipe
[PIPE_C
].plane
[PLANE_SPRITE0
] = _FW_WM_VLV(tmp
, SPRITEE
);
5798 tmp
= I915_READ(DSPFW9_CHV
);
5799 wm
->pipe
[PIPE_C
].plane
[PLANE_PRIMARY
] = _FW_WM_VLV(tmp
, PLANEC
);
5800 wm
->pipe
[PIPE_C
].plane
[PLANE_CURSOR
] = _FW_WM(tmp
, CURSORC
);
5802 tmp
= I915_READ(DSPHOWM
);
5803 wm
->sr
.plane
|= _FW_WM(tmp
, SR_HI
) << 9;
5804 wm
->pipe
[PIPE_C
].plane
[PLANE_SPRITE1
] |= _FW_WM(tmp
, SPRITEF_HI
) << 8;
5805 wm
->pipe
[PIPE_C
].plane
[PLANE_SPRITE0
] |= _FW_WM(tmp
, SPRITEE_HI
) << 8;
5806 wm
->pipe
[PIPE_C
].plane
[PLANE_PRIMARY
] |= _FW_WM(tmp
, PLANEC_HI
) << 8;
5807 wm
->pipe
[PIPE_B
].plane
[PLANE_SPRITE1
] |= _FW_WM(tmp
, SPRITED_HI
) << 8;
5808 wm
->pipe
[PIPE_B
].plane
[PLANE_SPRITE0
] |= _FW_WM(tmp
, SPRITEC_HI
) << 8;
5809 wm
->pipe
[PIPE_B
].plane
[PLANE_PRIMARY
] |= _FW_WM(tmp
, PLANEB_HI
) << 8;
5810 wm
->pipe
[PIPE_A
].plane
[PLANE_SPRITE1
] |= _FW_WM(tmp
, SPRITEB_HI
) << 8;
5811 wm
->pipe
[PIPE_A
].plane
[PLANE_SPRITE0
] |= _FW_WM(tmp
, SPRITEA_HI
) << 8;
5812 wm
->pipe
[PIPE_A
].plane
[PLANE_PRIMARY
] |= _FW_WM(tmp
, PLANEA_HI
) << 8;
5814 tmp
= I915_READ(DSPFW7
);
5815 wm
->pipe
[PIPE_B
].plane
[PLANE_SPRITE1
] = _FW_WM_VLV(tmp
, SPRITED
);
5816 wm
->pipe
[PIPE_B
].plane
[PLANE_SPRITE0
] = _FW_WM_VLV(tmp
, SPRITEC
);
5818 tmp
= I915_READ(DSPHOWM
);
5819 wm
->sr
.plane
|= _FW_WM(tmp
, SR_HI
) << 9;
5820 wm
->pipe
[PIPE_B
].plane
[PLANE_SPRITE1
] |= _FW_WM(tmp
, SPRITED_HI
) << 8;
5821 wm
->pipe
[PIPE_B
].plane
[PLANE_SPRITE0
] |= _FW_WM(tmp
, SPRITEC_HI
) << 8;
5822 wm
->pipe
[PIPE_B
].plane
[PLANE_PRIMARY
] |= _FW_WM(tmp
, PLANEB_HI
) << 8;
5823 wm
->pipe
[PIPE_A
].plane
[PLANE_SPRITE1
] |= _FW_WM(tmp
, SPRITEB_HI
) << 8;
5824 wm
->pipe
[PIPE_A
].plane
[PLANE_SPRITE0
] |= _FW_WM(tmp
, SPRITEA_HI
) << 8;
5825 wm
->pipe
[PIPE_A
].plane
[PLANE_PRIMARY
] |= _FW_WM(tmp
, PLANEA_HI
) << 8;
5832 void g4x_wm_get_hw_state(struct drm_i915_private
*dev_priv
)
5834 struct g4x_wm_values
*wm
= &dev_priv
->wm
.g4x
;
5835 struct intel_crtc
*crtc
;
5837 g4x_read_wm_values(dev_priv
, wm
);
5839 wm
->cxsr
= I915_READ(FW_BLC_SELF
) & FW_BLC_SELF_EN
;
5841 for_each_intel_crtc(&dev_priv
->drm
, crtc
) {
5842 struct intel_crtc_state
*crtc_state
=
5843 to_intel_crtc_state(crtc
->base
.state
);
5844 struct g4x_wm_state
*active
= &crtc
->wm
.active
.g4x
;
5845 struct g4x_pipe_wm
*raw
;
5846 enum pipe pipe
= crtc
->pipe
;
5847 enum plane_id plane_id
;
5848 int level
, max_level
;
5850 active
->cxsr
= wm
->cxsr
;
5851 active
->hpll_en
= wm
->hpll_en
;
5852 active
->fbc_en
= wm
->fbc_en
;
5854 active
->sr
= wm
->sr
;
5855 active
->hpll
= wm
->hpll
;
5857 for_each_plane_id_on_crtc(crtc
, plane_id
) {
5858 active
->wm
.plane
[plane_id
] =
5859 wm
->pipe
[pipe
].plane
[plane_id
];
5862 if (wm
->cxsr
&& wm
->hpll_en
)
5863 max_level
= G4X_WM_LEVEL_HPLL
;
5865 max_level
= G4X_WM_LEVEL_SR
;
5867 max_level
= G4X_WM_LEVEL_NORMAL
;
5869 level
= G4X_WM_LEVEL_NORMAL
;
5870 raw
= &crtc_state
->wm
.g4x
.raw
[level
];
5871 for_each_plane_id_on_crtc(crtc
, plane_id
)
5872 raw
->plane
[plane_id
] = active
->wm
.plane
[plane_id
];
5874 if (++level
> max_level
)
5877 raw
= &crtc_state
->wm
.g4x
.raw
[level
];
5878 raw
->plane
[PLANE_PRIMARY
] = active
->sr
.plane
;
5879 raw
->plane
[PLANE_CURSOR
] = active
->sr
.cursor
;
5880 raw
->plane
[PLANE_SPRITE0
] = 0;
5881 raw
->fbc
= active
->sr
.fbc
;
5883 if (++level
> max_level
)
5886 raw
= &crtc_state
->wm
.g4x
.raw
[level
];
5887 raw
->plane
[PLANE_PRIMARY
] = active
->hpll
.plane
;
5888 raw
->plane
[PLANE_CURSOR
] = active
->hpll
.cursor
;
5889 raw
->plane
[PLANE_SPRITE0
] = 0;
5890 raw
->fbc
= active
->hpll
.fbc
;
5893 for_each_plane_id_on_crtc(crtc
, plane_id
)
5894 g4x_raw_plane_wm_set(crtc_state
, level
,
5895 plane_id
, USHRT_MAX
);
5896 g4x_raw_fbc_wm_set(crtc_state
, level
, USHRT_MAX
);
5898 crtc_state
->wm
.g4x
.optimal
= *active
;
5899 crtc_state
->wm
.g4x
.intermediate
= *active
;
5901 DRM_DEBUG_KMS("Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite=%d\n",
5903 wm
->pipe
[pipe
].plane
[PLANE_PRIMARY
],
5904 wm
->pipe
[pipe
].plane
[PLANE_CURSOR
],
5905 wm
->pipe
[pipe
].plane
[PLANE_SPRITE0
]);
5908 DRM_DEBUG_KMS("Initial SR watermarks: plane=%d, cursor=%d fbc=%d\n",
5909 wm
->sr
.plane
, wm
->sr
.cursor
, wm
->sr
.fbc
);
5910 DRM_DEBUG_KMS("Initial HPLL watermarks: plane=%d, SR cursor=%d fbc=%d\n",
5911 wm
->hpll
.plane
, wm
->hpll
.cursor
, wm
->hpll
.fbc
);
5912 DRM_DEBUG_KMS("Initial SR=%s HPLL=%s FBC=%s\n",
5913 yesno(wm
->cxsr
), yesno(wm
->hpll_en
), yesno(wm
->fbc_en
));
5916 void g4x_wm_sanitize(struct drm_i915_private
*dev_priv
)
5918 struct intel_plane
*plane
;
5919 struct intel_crtc
*crtc
;
5921 mutex_lock(&dev_priv
->wm
.wm_mutex
);
5923 for_each_intel_plane(&dev_priv
->drm
, plane
) {
5924 struct intel_crtc
*crtc
=
5925 intel_get_crtc_for_pipe(dev_priv
, plane
->pipe
);
5926 struct intel_crtc_state
*crtc_state
=
5927 to_intel_crtc_state(crtc
->base
.state
);
5928 struct intel_plane_state
*plane_state
=
5929 to_intel_plane_state(plane
->base
.state
);
5930 struct g4x_wm_state
*wm_state
= &crtc_state
->wm
.g4x
.optimal
;
5931 enum plane_id plane_id
= plane
->id
;
5934 if (plane_state
->base
.visible
)
5937 for (level
= 0; level
< 3; level
++) {
5938 struct g4x_pipe_wm
*raw
=
5939 &crtc_state
->wm
.g4x
.raw
[level
];
5941 raw
->plane
[plane_id
] = 0;
5942 wm_state
->wm
.plane
[plane_id
] = 0;
5945 if (plane_id
== PLANE_PRIMARY
) {
5946 for (level
= 0; level
< 3; level
++) {
5947 struct g4x_pipe_wm
*raw
=
5948 &crtc_state
->wm
.g4x
.raw
[level
];
5952 wm_state
->sr
.fbc
= 0;
5953 wm_state
->hpll
.fbc
= 0;
5954 wm_state
->fbc_en
= false;
5958 for_each_intel_crtc(&dev_priv
->drm
, crtc
) {
5959 struct intel_crtc_state
*crtc_state
=
5960 to_intel_crtc_state(crtc
->base
.state
);
5962 crtc_state
->wm
.g4x
.intermediate
=
5963 crtc_state
->wm
.g4x
.optimal
;
5964 crtc
->wm
.active
.g4x
= crtc_state
->wm
.g4x
.optimal
;
5967 g4x_program_watermarks(dev_priv
);
5969 mutex_unlock(&dev_priv
->wm
.wm_mutex
);
5972 void vlv_wm_get_hw_state(struct drm_i915_private
*dev_priv
)
5974 struct vlv_wm_values
*wm
= &dev_priv
->wm
.vlv
;
5975 struct intel_crtc
*crtc
;
5978 vlv_read_wm_values(dev_priv
, wm
);
5980 wm
->cxsr
= I915_READ(FW_BLC_SELF_VLV
) & FW_CSPWRDWNEN
;
5981 wm
->level
= VLV_WM_LEVEL_PM2
;
5983 if (IS_CHERRYVIEW(dev_priv
)) {
5984 mutex_lock(&dev_priv
->pcu_lock
);
5986 val
= vlv_punit_read(dev_priv
, PUNIT_REG_DSPFREQ
);
5987 if (val
& DSP_MAXFIFO_PM5_ENABLE
)
5988 wm
->level
= VLV_WM_LEVEL_PM5
;
5991 * If DDR DVFS is disabled in the BIOS, Punit
5992 * will never ack the request. So if that happens
5993 * assume we don't have to enable/disable DDR DVFS
5994 * dynamically. To test that just set the REQ_ACK
5995 * bit to poke the Punit, but don't change the
5996 * HIGH/LOW bits so that we don't actually change
5997 * the current state.
5999 val
= vlv_punit_read(dev_priv
, PUNIT_REG_DDR_SETUP2
);
6000 val
|= FORCE_DDR_FREQ_REQ_ACK
;
6001 vlv_punit_write(dev_priv
, PUNIT_REG_DDR_SETUP2
, val
);
6003 if (wait_for((vlv_punit_read(dev_priv
, PUNIT_REG_DDR_SETUP2
) &
6004 FORCE_DDR_FREQ_REQ_ACK
) == 0, 3)) {
6005 DRM_DEBUG_KMS("Punit not acking DDR DVFS request, "
6006 "assuming DDR DVFS is disabled\n");
6007 dev_priv
->wm
.max_level
= VLV_WM_LEVEL_PM5
;
6009 val
= vlv_punit_read(dev_priv
, PUNIT_REG_DDR_SETUP2
);
6010 if ((val
& FORCE_DDR_HIGH_FREQ
) == 0)
6011 wm
->level
= VLV_WM_LEVEL_DDR_DVFS
;
6014 mutex_unlock(&dev_priv
->pcu_lock
);
6017 for_each_intel_crtc(&dev_priv
->drm
, crtc
) {
6018 struct intel_crtc_state
*crtc_state
=
6019 to_intel_crtc_state(crtc
->base
.state
);
6020 struct vlv_wm_state
*active
= &crtc
->wm
.active
.vlv
;
6021 const struct vlv_fifo_state
*fifo_state
=
6022 &crtc_state
->wm
.vlv
.fifo_state
;
6023 enum pipe pipe
= crtc
->pipe
;
6024 enum plane_id plane_id
;
6027 vlv_get_fifo_size(crtc_state
);
6029 active
->num_levels
= wm
->level
+ 1;
6030 active
->cxsr
= wm
->cxsr
;
6032 for (level
= 0; level
< active
->num_levels
; level
++) {
6033 struct g4x_pipe_wm
*raw
=
6034 &crtc_state
->wm
.vlv
.raw
[level
];
6036 active
->sr
[level
].plane
= wm
->sr
.plane
;
6037 active
->sr
[level
].cursor
= wm
->sr
.cursor
;
6039 for_each_plane_id_on_crtc(crtc
, plane_id
) {
6040 active
->wm
[level
].plane
[plane_id
] =
6041 wm
->pipe
[pipe
].plane
[plane_id
];
6043 raw
->plane
[plane_id
] =
6044 vlv_invert_wm_value(active
->wm
[level
].plane
[plane_id
],
6045 fifo_state
->plane
[plane_id
]);
6049 for_each_plane_id_on_crtc(crtc
, plane_id
)
6050 vlv_raw_plane_wm_set(crtc_state
, level
,
6051 plane_id
, USHRT_MAX
);
6052 vlv_invalidate_wms(crtc
, active
, level
);
6054 crtc_state
->wm
.vlv
.optimal
= *active
;
6055 crtc_state
->wm
.vlv
.intermediate
= *active
;
6057 DRM_DEBUG_KMS("Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite0=%d, sprite1=%d\n",
6059 wm
->pipe
[pipe
].plane
[PLANE_PRIMARY
],
6060 wm
->pipe
[pipe
].plane
[PLANE_CURSOR
],
6061 wm
->pipe
[pipe
].plane
[PLANE_SPRITE0
],
6062 wm
->pipe
[pipe
].plane
[PLANE_SPRITE1
]);
6065 DRM_DEBUG_KMS("Initial watermarks: SR plane=%d, SR cursor=%d level=%d cxsr=%d\n",
6066 wm
->sr
.plane
, wm
->sr
.cursor
, wm
->level
, wm
->cxsr
);
6069 void vlv_wm_sanitize(struct drm_i915_private
*dev_priv
)
6071 struct intel_plane
*plane
;
6072 struct intel_crtc
*crtc
;
6074 mutex_lock(&dev_priv
->wm
.wm_mutex
);
6076 for_each_intel_plane(&dev_priv
->drm
, plane
) {
6077 struct intel_crtc
*crtc
=
6078 intel_get_crtc_for_pipe(dev_priv
, plane
->pipe
);
6079 struct intel_crtc_state
*crtc_state
=
6080 to_intel_crtc_state(crtc
->base
.state
);
6081 struct intel_plane_state
*plane_state
=
6082 to_intel_plane_state(plane
->base
.state
);
6083 struct vlv_wm_state
*wm_state
= &crtc_state
->wm
.vlv
.optimal
;
6084 const struct vlv_fifo_state
*fifo_state
=
6085 &crtc_state
->wm
.vlv
.fifo_state
;
6086 enum plane_id plane_id
= plane
->id
;
6089 if (plane_state
->base
.visible
)
6092 for (level
= 0; level
< wm_state
->num_levels
; level
++) {
6093 struct g4x_pipe_wm
*raw
=
6094 &crtc_state
->wm
.vlv
.raw
[level
];
6096 raw
->plane
[plane_id
] = 0;
6098 wm_state
->wm
[level
].plane
[plane_id
] =
6099 vlv_invert_wm_value(raw
->plane
[plane_id
],
6100 fifo_state
->plane
[plane_id
]);
6104 for_each_intel_crtc(&dev_priv
->drm
, crtc
) {
6105 struct intel_crtc_state
*crtc_state
=
6106 to_intel_crtc_state(crtc
->base
.state
);
6108 crtc_state
->wm
.vlv
.intermediate
=
6109 crtc_state
->wm
.vlv
.optimal
;
6110 crtc
->wm
.active
.vlv
= crtc_state
->wm
.vlv
.optimal
;
6113 vlv_program_watermarks(dev_priv
);
6115 mutex_unlock(&dev_priv
->wm
.wm_mutex
);
6119 * FIXME should probably kill this and improve
6120 * the real watermark readout/sanitation instead
6122 static void ilk_init_lp_watermarks(struct drm_i915_private
*dev_priv
)
6124 I915_WRITE(WM3_LP_ILK
, I915_READ(WM3_LP_ILK
) & ~WM1_LP_SR_EN
);
6125 I915_WRITE(WM2_LP_ILK
, I915_READ(WM2_LP_ILK
) & ~WM1_LP_SR_EN
);
6126 I915_WRITE(WM1_LP_ILK
, I915_READ(WM1_LP_ILK
) & ~WM1_LP_SR_EN
);
6129 * Don't touch WM1S_LP_EN here.
6130 * Doing so could cause underruns.
6134 void ilk_wm_get_hw_state(struct drm_i915_private
*dev_priv
)
6136 struct ilk_wm_values
*hw
= &dev_priv
->wm
.hw
;
6137 struct intel_crtc
*crtc
;
6139 ilk_init_lp_watermarks(dev_priv
);
6141 for_each_intel_crtc(&dev_priv
->drm
, crtc
)
6142 ilk_pipe_wm_get_hw_state(crtc
);
6144 hw
->wm_lp
[0] = I915_READ(WM1_LP_ILK
);
6145 hw
->wm_lp
[1] = I915_READ(WM2_LP_ILK
);
6146 hw
->wm_lp
[2] = I915_READ(WM3_LP_ILK
);
6148 hw
->wm_lp_spr
[0] = I915_READ(WM1S_LP_ILK
);
6149 if (INTEL_GEN(dev_priv
) >= 7) {
6150 hw
->wm_lp_spr
[1] = I915_READ(WM2S_LP_IVB
);
6151 hw
->wm_lp_spr
[2] = I915_READ(WM3S_LP_IVB
);
6154 if (IS_HASWELL(dev_priv
) || IS_BROADWELL(dev_priv
))
6155 hw
->partitioning
= (I915_READ(WM_MISC
) & WM_MISC_DATA_PARTITION_5_6
) ?
6156 INTEL_DDB_PART_5_6
: INTEL_DDB_PART_1_2
;
6157 else if (IS_IVYBRIDGE(dev_priv
))
6158 hw
->partitioning
= (I915_READ(DISP_ARB_CTL2
) & DISP_DATA_PARTITION_5_6
) ?
6159 INTEL_DDB_PART_5_6
: INTEL_DDB_PART_1_2
;
6162 !(I915_READ(DISP_ARB_CTL
) & DISP_FBC_WM_DIS
);
6166 * intel_update_watermarks - update FIFO watermark values based on current modes
6167 * @crtc: the #intel_crtc on which to compute the WM
6169 * Calculate watermark values for the various WM regs based on current mode
6170 * and plane configuration.
6172 * There are several cases to deal with here:
6173 * - normal (i.e. non-self-refresh)
6174 * - self-refresh (SR) mode
6175 * - lines are large relative to FIFO size (buffer can hold up to 2)
6176 * - lines are small relative to FIFO size (buffer can hold more than 2
6177 * lines), so need to account for TLB latency
6179 * The normal calculation is:
6180 * watermark = dotclock * bytes per pixel * latency
6181 * where latency is platform & configuration dependent (we assume pessimal
6184 * The SR calculation is:
6185 * watermark = (trunc(latency/line time)+1) * surface width *
6188 * line time = htotal / dotclock
6189 * surface width = hdisplay for normal plane and 64 for cursor
6190 * and latency is assumed to be high, as above.
6192 * The final value programmed to the register should always be rounded up,
6193 * and include an extra 2 entries to account for clock crossings.
6195 * We don't use the sprite, so we can ignore that. And on Crestline we have
6196 * to set the non-SR watermarks to 8.
6198 void intel_update_watermarks(struct intel_crtc
*crtc
)
6200 struct drm_i915_private
*dev_priv
= to_i915(crtc
->base
.dev
);
6202 if (dev_priv
->display
.update_wm
)
6203 dev_priv
->display
.update_wm(crtc
);
6206 void intel_enable_ipc(struct drm_i915_private
*dev_priv
)
6210 if (!HAS_IPC(dev_priv
))
6213 val
= I915_READ(DISP_ARB_CTL2
);
6215 if (dev_priv
->ipc_enabled
)
6216 val
|= DISP_IPC_ENABLE
;
6218 val
&= ~DISP_IPC_ENABLE
;
6220 I915_WRITE(DISP_ARB_CTL2
, val
);
6223 void intel_init_ipc(struct drm_i915_private
*dev_priv
)
6225 if (!HAS_IPC(dev_priv
))
6228 /* Display WA #1141: SKL:all KBL:all CFL */
6229 if (IS_KABYLAKE(dev_priv
) || IS_COFFEELAKE(dev_priv
))
6230 dev_priv
->ipc_enabled
= dev_priv
->dram_info
.symmetric_memory
;
6232 dev_priv
->ipc_enabled
= true;
6234 intel_enable_ipc(dev_priv
);
6238 * Lock protecting IPS related data structures
6240 DEFINE_SPINLOCK(mchdev_lock
);
6242 bool ironlake_set_drps(struct drm_i915_private
*dev_priv
, u8 val
)
6246 lockdep_assert_held(&mchdev_lock
);
6248 rgvswctl
= I915_READ16(MEMSWCTL
);
6249 if (rgvswctl
& MEMCTL_CMD_STS
) {
6250 DRM_DEBUG("gpu busy, RCS change rejected\n");
6251 return false; /* still busy with another command */
6254 rgvswctl
= (MEMCTL_CMD_CHFREQ
<< MEMCTL_CMD_SHIFT
) |
6255 (val
<< MEMCTL_FREQ_SHIFT
) | MEMCTL_SFCAVM
;
6256 I915_WRITE16(MEMSWCTL
, rgvswctl
);
6257 POSTING_READ16(MEMSWCTL
);
6259 rgvswctl
|= MEMCTL_CMD_STS
;
6260 I915_WRITE16(MEMSWCTL
, rgvswctl
);
6265 static void ironlake_enable_drps(struct drm_i915_private
*dev_priv
)
6268 u8 fmax
, fmin
, fstart
, vstart
;
6270 spin_lock_irq(&mchdev_lock
);
6272 rgvmodectl
= I915_READ(MEMMODECTL
);
6274 /* Enable temp reporting */
6275 I915_WRITE16(PMMISC
, I915_READ(PMMISC
) | MCPPCE_EN
);
6276 I915_WRITE16(TSC1
, I915_READ(TSC1
) | TSE
);
6278 /* 100ms RC evaluation intervals */
6279 I915_WRITE(RCUPEI
, 100000);
6280 I915_WRITE(RCDNEI
, 100000);
6282 /* Set max/min thresholds to 90ms and 80ms respectively */
6283 I915_WRITE(RCBMAXAVG
, 90000);
6284 I915_WRITE(RCBMINAVG
, 80000);
6286 I915_WRITE(MEMIHYST
, 1);
6288 /* Set up min, max, and cur for interrupt handling */
6289 fmax
= (rgvmodectl
& MEMMODE_FMAX_MASK
) >> MEMMODE_FMAX_SHIFT
;
6290 fmin
= (rgvmodectl
& MEMMODE_FMIN_MASK
);
6291 fstart
= (rgvmodectl
& MEMMODE_FSTART_MASK
) >>
6292 MEMMODE_FSTART_SHIFT
;
6294 vstart
= (I915_READ(PXVFREQ(fstart
)) & PXVFREQ_PX_MASK
) >>
6297 dev_priv
->ips
.fmax
= fmax
; /* IPS callback will increase this */
6298 dev_priv
->ips
.fstart
= fstart
;
6300 dev_priv
->ips
.max_delay
= fstart
;
6301 dev_priv
->ips
.min_delay
= fmin
;
6302 dev_priv
->ips
.cur_delay
= fstart
;
6304 DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n",
6305 fmax
, fmin
, fstart
);
6307 I915_WRITE(MEMINTREN
, MEMINT_CX_SUPR_EN
| MEMINT_EVAL_CHG_EN
);
6310 * Interrupts will be enabled in ironlake_irq_postinstall
6313 I915_WRITE(VIDSTART
, vstart
);
6314 POSTING_READ(VIDSTART
);
6316 rgvmodectl
|= MEMMODE_SWMODE_EN
;
6317 I915_WRITE(MEMMODECTL
, rgvmodectl
);
6319 if (wait_for_atomic((I915_READ(MEMSWCTL
) & MEMCTL_CMD_STS
) == 0, 10))
6320 DRM_ERROR("stuck trying to change perf mode\n");
6323 ironlake_set_drps(dev_priv
, fstart
);
6325 dev_priv
->ips
.last_count1
= I915_READ(DMIEC
) +
6326 I915_READ(DDREC
) + I915_READ(CSIEC
);
6327 dev_priv
->ips
.last_time1
= jiffies_to_msecs(jiffies
);
6328 dev_priv
->ips
.last_count2
= I915_READ(GFXEC
);
6329 dev_priv
->ips
.last_time2
= ktime_get_raw_ns();
6331 spin_unlock_irq(&mchdev_lock
);
6334 static void ironlake_disable_drps(struct drm_i915_private
*dev_priv
)
6338 spin_lock_irq(&mchdev_lock
);
6340 rgvswctl
= I915_READ16(MEMSWCTL
);
6342 /* Ack interrupts, disable EFC interrupt */
6343 I915_WRITE(MEMINTREN
, I915_READ(MEMINTREN
) & ~MEMINT_EVAL_CHG_EN
);
6344 I915_WRITE(MEMINTRSTS
, MEMINT_EVAL_CHG
);
6345 I915_WRITE(DEIER
, I915_READ(DEIER
) & ~DE_PCU_EVENT
);
6346 I915_WRITE(DEIIR
, DE_PCU_EVENT
);
6347 I915_WRITE(DEIMR
, I915_READ(DEIMR
) | DE_PCU_EVENT
);
6349 /* Go back to the starting frequency */
6350 ironlake_set_drps(dev_priv
, dev_priv
->ips
.fstart
);
6352 rgvswctl
|= MEMCTL_CMD_STS
;
6353 I915_WRITE(MEMSWCTL
, rgvswctl
);
6356 spin_unlock_irq(&mchdev_lock
);
6359 /* There's a funny hw issue where the hw returns all 0 when reading from
6360 * GEN6_RP_INTERRUPT_LIMITS. Hence we always need to compute the desired value
6361 * ourselves, instead of doing a rmw cycle (which might result in us clearing
6362 * all limits and the gpu stuck at whatever frequency it is at atm).
6364 static u32
intel_rps_limits(struct drm_i915_private
*dev_priv
, u8 val
)
6366 struct intel_rps
*rps
= &dev_priv
->gt_pm
.rps
;
6369 /* Only set the down limit when we've reached the lowest level to avoid
6370 * getting more interrupts, otherwise leave this clear. This prevents a
6371 * race in the hw when coming out of rc6: There's a tiny window where
6372 * the hw runs at the minimal clock before selecting the desired
6373 * frequency, if the down threshold expires in that window we will not
6374 * receive a down interrupt. */
6375 if (INTEL_GEN(dev_priv
) >= 9) {
6376 limits
= (rps
->max_freq_softlimit
) << 23;
6377 if (val
<= rps
->min_freq_softlimit
)
6378 limits
|= (rps
->min_freq_softlimit
) << 14;
6380 limits
= rps
->max_freq_softlimit
<< 24;
6381 if (val
<= rps
->min_freq_softlimit
)
6382 limits
|= rps
->min_freq_softlimit
<< 16;
6388 static void rps_set_power(struct drm_i915_private
*dev_priv
, int new_power
)
6390 struct intel_rps
*rps
= &dev_priv
->gt_pm
.rps
;
6391 u32 threshold_up
= 0, threshold_down
= 0; /* in % */
6392 u32 ei_up
= 0, ei_down
= 0;
6394 lockdep_assert_held(&rps
->power
.mutex
);
6396 if (new_power
== rps
->power
.mode
)
6399 /* Note the units here are not exactly 1us, but 1280ns. */
6400 switch (new_power
) {
6402 /* Upclock if more than 95% busy over 16ms */
6406 /* Downclock if less than 85% busy over 32ms */
6408 threshold_down
= 85;
6412 /* Upclock if more than 90% busy over 13ms */
6416 /* Downclock if less than 75% busy over 32ms */
6418 threshold_down
= 75;
6422 /* Upclock if more than 85% busy over 10ms */
6426 /* Downclock if less than 60% busy over 32ms */
6428 threshold_down
= 60;
6432 /* When byt can survive without system hang with dynamic
6433 * sw freq adjustments, this restriction can be lifted.
6435 if (IS_VALLEYVIEW(dev_priv
))
6438 I915_WRITE(GEN6_RP_UP_EI
,
6439 GT_INTERVAL_FROM_US(dev_priv
, ei_up
));
6440 I915_WRITE(GEN6_RP_UP_THRESHOLD
,
6441 GT_INTERVAL_FROM_US(dev_priv
,
6442 ei_up
* threshold_up
/ 100));
6444 I915_WRITE(GEN6_RP_DOWN_EI
,
6445 GT_INTERVAL_FROM_US(dev_priv
, ei_down
));
6446 I915_WRITE(GEN6_RP_DOWN_THRESHOLD
,
6447 GT_INTERVAL_FROM_US(dev_priv
,
6448 ei_down
* threshold_down
/ 100));
6450 I915_WRITE(GEN6_RP_CONTROL
,
6451 GEN6_RP_MEDIA_TURBO
|
6452 GEN6_RP_MEDIA_HW_NORMAL_MODE
|
6453 GEN6_RP_MEDIA_IS_GFX
|
6455 GEN6_RP_UP_BUSY_AVG
|
6456 GEN6_RP_DOWN_IDLE_AVG
);
6459 rps
->power
.mode
= new_power
;
6460 rps
->power
.up_threshold
= threshold_up
;
6461 rps
->power
.down_threshold
= threshold_down
;
6464 static void gen6_set_rps_thresholds(struct drm_i915_private
*dev_priv
, u8 val
)
6466 struct intel_rps
*rps
= &dev_priv
->gt_pm
.rps
;
6469 new_power
= rps
->power
.mode
;
6470 switch (rps
->power
.mode
) {
6472 if (val
> rps
->efficient_freq
+ 1 &&
6473 val
> rps
->cur_freq
)
6474 new_power
= BETWEEN
;
6478 if (val
<= rps
->efficient_freq
&&
6479 val
< rps
->cur_freq
)
6480 new_power
= LOW_POWER
;
6481 else if (val
>= rps
->rp0_freq
&&
6482 val
> rps
->cur_freq
)
6483 new_power
= HIGH_POWER
;
6487 if (val
< (rps
->rp1_freq
+ rps
->rp0_freq
) >> 1 &&
6488 val
< rps
->cur_freq
)
6489 new_power
= BETWEEN
;
6492 /* Max/min bins are special */
6493 if (val
<= rps
->min_freq_softlimit
)
6494 new_power
= LOW_POWER
;
6495 if (val
>= rps
->max_freq_softlimit
)
6496 new_power
= HIGH_POWER
;
6498 mutex_lock(&rps
->power
.mutex
);
6499 if (rps
->power
.interactive
)
6500 new_power
= HIGH_POWER
;
6501 rps_set_power(dev_priv
, new_power
);
6502 mutex_unlock(&rps
->power
.mutex
);
6505 void intel_rps_mark_interactive(struct drm_i915_private
*i915
, bool interactive
)
6507 struct intel_rps
*rps
= &i915
->gt_pm
.rps
;
6509 if (INTEL_GEN(i915
) < 6)
6512 mutex_lock(&rps
->power
.mutex
);
6514 if (!rps
->power
.interactive
++ && READ_ONCE(i915
->gt
.awake
))
6515 rps_set_power(i915
, HIGH_POWER
);
6517 GEM_BUG_ON(!rps
->power
.interactive
);
6518 rps
->power
.interactive
--;
6520 mutex_unlock(&rps
->power
.mutex
);
6523 static u32
gen6_rps_pm_mask(struct drm_i915_private
*dev_priv
, u8 val
)
6525 struct intel_rps
*rps
= &dev_priv
->gt_pm
.rps
;
6528 /* We use UP_EI_EXPIRED interupts for both up/down in manual mode */
6529 if (val
> rps
->min_freq_softlimit
)
6530 mask
|= GEN6_PM_RP_UP_EI_EXPIRED
| GEN6_PM_RP_DOWN_THRESHOLD
| GEN6_PM_RP_DOWN_TIMEOUT
;
6531 if (val
< rps
->max_freq_softlimit
)
6532 mask
|= GEN6_PM_RP_UP_EI_EXPIRED
| GEN6_PM_RP_UP_THRESHOLD
;
6534 mask
&= dev_priv
->pm_rps_events
;
6536 return gen6_sanitize_rps_pm_mask(dev_priv
, ~mask
);
6539 /* gen6_set_rps is called to update the frequency request, but should also be
6540 * called when the range (min_delay and max_delay) is modified so that we can
6541 * update the GEN6_RP_INTERRUPT_LIMITS register accordingly. */
6542 static int gen6_set_rps(struct drm_i915_private
*dev_priv
, u8 val
)
6544 struct intel_rps
*rps
= &dev_priv
->gt_pm
.rps
;
6546 /* min/max delay may still have been modified so be sure to
6547 * write the limits value.
6549 if (val
!= rps
->cur_freq
) {
6550 gen6_set_rps_thresholds(dev_priv
, val
);
6552 if (INTEL_GEN(dev_priv
) >= 9)
6553 I915_WRITE(GEN6_RPNSWREQ
,
6554 GEN9_FREQUENCY(val
));
6555 else if (IS_HASWELL(dev_priv
) || IS_BROADWELL(dev_priv
))
6556 I915_WRITE(GEN6_RPNSWREQ
,
6557 HSW_FREQUENCY(val
));
6559 I915_WRITE(GEN6_RPNSWREQ
,
6560 GEN6_FREQUENCY(val
) |
6562 GEN6_AGGRESSIVE_TURBO
);
6565 /* Make sure we continue to get interrupts
6566 * until we hit the minimum or maximum frequencies.
6568 I915_WRITE(GEN6_RP_INTERRUPT_LIMITS
, intel_rps_limits(dev_priv
, val
));
6569 I915_WRITE(GEN6_PMINTRMSK
, gen6_rps_pm_mask(dev_priv
, val
));
6571 rps
->cur_freq
= val
;
6572 trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv
, val
));
6577 static int valleyview_set_rps(struct drm_i915_private
*dev_priv
, u8 val
)
6581 if (WARN_ONCE(IS_CHERRYVIEW(dev_priv
) && (val
& 1),
6582 "Odd GPU freq value\n"))
6585 I915_WRITE(GEN6_PMINTRMSK
, gen6_rps_pm_mask(dev_priv
, val
));
6587 if (val
!= dev_priv
->gt_pm
.rps
.cur_freq
) {
6588 err
= vlv_punit_write(dev_priv
, PUNIT_REG_GPU_FREQ_REQ
, val
);
6592 gen6_set_rps_thresholds(dev_priv
, val
);
6595 dev_priv
->gt_pm
.rps
.cur_freq
= val
;
6596 trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv
, val
));
6601 /* vlv_set_rps_idle: Set the frequency to idle, if Gfx clocks are down
6603 * * If Gfx is Idle, then
6604 * 1. Forcewake Media well.
6605 * 2. Request idle freq.
6606 * 3. Release Forcewake of Media well.
6608 static void vlv_set_rps_idle(struct drm_i915_private
*dev_priv
)
6610 struct intel_rps
*rps
= &dev_priv
->gt_pm
.rps
;
6611 u32 val
= rps
->idle_freq
;
6614 if (rps
->cur_freq
<= val
)
6617 /* The punit delays the write of the frequency and voltage until it
6618 * determines the GPU is awake. During normal usage we don't want to
6619 * waste power changing the frequency if the GPU is sleeping (rc6).
6620 * However, the GPU and driver is now idle and we do not want to delay
6621 * switching to minimum voltage (reducing power whilst idle) as we do
6622 * not expect to be woken in the near future and so must flush the
6623 * change by waking the device.
6625 * We choose to take the media powerwell (either would do to trick the
6626 * punit into committing the voltage change) as that takes a lot less
6627 * power than the render powerwell.
6629 intel_uncore_forcewake_get(dev_priv
, FORCEWAKE_MEDIA
);
6630 err
= valleyview_set_rps(dev_priv
, val
);
6631 intel_uncore_forcewake_put(dev_priv
, FORCEWAKE_MEDIA
);
6634 DRM_ERROR("Failed to set RPS for idle\n");
6637 void gen6_rps_busy(struct drm_i915_private
*dev_priv
)
6639 struct intel_rps
*rps
= &dev_priv
->gt_pm
.rps
;
6641 mutex_lock(&dev_priv
->pcu_lock
);
6645 if (dev_priv
->pm_rps_events
& GEN6_PM_RP_UP_EI_EXPIRED
)
6646 gen6_rps_reset_ei(dev_priv
);
6647 I915_WRITE(GEN6_PMINTRMSK
,
6648 gen6_rps_pm_mask(dev_priv
, rps
->cur_freq
));
6650 gen6_enable_rps_interrupts(dev_priv
);
6652 /* Use the user's desired frequency as a guide, but for better
6653 * performance, jump directly to RPe as our starting frequency.
6655 freq
= max(rps
->cur_freq
,
6656 rps
->efficient_freq
);
6658 if (intel_set_rps(dev_priv
,
6660 rps
->min_freq_softlimit
,
6661 rps
->max_freq_softlimit
)))
6662 DRM_DEBUG_DRIVER("Failed to set idle frequency\n");
6664 mutex_unlock(&dev_priv
->pcu_lock
);
6667 void gen6_rps_idle(struct drm_i915_private
*dev_priv
)
6669 struct intel_rps
*rps
= &dev_priv
->gt_pm
.rps
;
6671 /* Flush our bottom-half so that it does not race with us
6672 * setting the idle frequency and so that it is bounded by
6673 * our rpm wakeref. And then disable the interrupts to stop any
6674 * futher RPS reclocking whilst we are asleep.
6676 gen6_disable_rps_interrupts(dev_priv
);
6678 mutex_lock(&dev_priv
->pcu_lock
);
6680 if (IS_VALLEYVIEW(dev_priv
) || IS_CHERRYVIEW(dev_priv
))
6681 vlv_set_rps_idle(dev_priv
);
6683 gen6_set_rps(dev_priv
, rps
->idle_freq
);
6685 I915_WRITE(GEN6_PMINTRMSK
,
6686 gen6_sanitize_rps_pm_mask(dev_priv
, ~0));
6688 mutex_unlock(&dev_priv
->pcu_lock
);
6691 void gen6_rps_boost(struct i915_request
*rq
,
6692 struct intel_rps_client
*rps_client
)
6694 struct intel_rps
*rps
= &rq
->i915
->gt_pm
.rps
;
6695 unsigned long flags
;
6698 /* This is intentionally racy! We peek at the state here, then
6699 * validate inside the RPS worker.
6704 if (i915_request_signaled(rq
))
6707 /* Serializes with i915_request_retire() */
6709 spin_lock_irqsave(&rq
->lock
, flags
);
6710 if (!rq
->waitboost
&& !dma_fence_is_signaled_locked(&rq
->fence
)) {
6711 boost
= !atomic_fetch_inc(&rps
->num_waiters
);
6712 rq
->waitboost
= true;
6714 spin_unlock_irqrestore(&rq
->lock
, flags
);
6718 if (READ_ONCE(rps
->cur_freq
) < rps
->boost_freq
)
6719 schedule_work(&rps
->work
);
6721 atomic_inc(rps_client
? &rps_client
->boosts
: &rps
->boosts
);
6724 int intel_set_rps(struct drm_i915_private
*dev_priv
, u8 val
)
6726 struct intel_rps
*rps
= &dev_priv
->gt_pm
.rps
;
6729 lockdep_assert_held(&dev_priv
->pcu_lock
);
6730 GEM_BUG_ON(val
> rps
->max_freq
);
6731 GEM_BUG_ON(val
< rps
->min_freq
);
6733 if (!rps
->enabled
) {
6734 rps
->cur_freq
= val
;
6738 if (IS_VALLEYVIEW(dev_priv
) || IS_CHERRYVIEW(dev_priv
))
6739 err
= valleyview_set_rps(dev_priv
, val
);
6741 err
= gen6_set_rps(dev_priv
, val
);
6746 static void gen9_disable_rc6(struct drm_i915_private
*dev_priv
)
6748 I915_WRITE(GEN6_RC_CONTROL
, 0);
6749 I915_WRITE(GEN9_PG_ENABLE
, 0);
6752 static void gen9_disable_rps(struct drm_i915_private
*dev_priv
)
6754 I915_WRITE(GEN6_RP_CONTROL
, 0);
6757 static void gen6_disable_rc6(struct drm_i915_private
*dev_priv
)
6759 I915_WRITE(GEN6_RC_CONTROL
, 0);
6762 static void gen6_disable_rps(struct drm_i915_private
*dev_priv
)
6764 I915_WRITE(GEN6_RPNSWREQ
, 1 << 31);
6765 I915_WRITE(GEN6_RP_CONTROL
, 0);
6768 static void cherryview_disable_rc6(struct drm_i915_private
*dev_priv
)
6770 I915_WRITE(GEN6_RC_CONTROL
, 0);
6773 static void cherryview_disable_rps(struct drm_i915_private
*dev_priv
)
6775 I915_WRITE(GEN6_RP_CONTROL
, 0);
6778 static void valleyview_disable_rc6(struct drm_i915_private
*dev_priv
)
6780 /* We're doing forcewake before Disabling RC6,
6781 * This what the BIOS expects when going into suspend */
6782 intel_uncore_forcewake_get(dev_priv
, FORCEWAKE_ALL
);
6784 I915_WRITE(GEN6_RC_CONTROL
, 0);
6786 intel_uncore_forcewake_put(dev_priv
, FORCEWAKE_ALL
);
6789 static void valleyview_disable_rps(struct drm_i915_private
*dev_priv
)
6791 I915_WRITE(GEN6_RP_CONTROL
, 0);
6794 static bool bxt_check_bios_rc6_setup(struct drm_i915_private
*dev_priv
)
6796 bool enable_rc6
= true;
6797 unsigned long rc6_ctx_base
;
6801 rc_ctl
= I915_READ(GEN6_RC_CONTROL
);
6802 rc_sw_target
= (I915_READ(GEN6_RC_STATE
) & RC_SW_TARGET_STATE_MASK
) >>
6803 RC_SW_TARGET_STATE_SHIFT
;
6804 DRM_DEBUG_DRIVER("BIOS enabled RC states: "
6805 "HW_CTRL %s HW_RC6 %s SW_TARGET_STATE %x\n",
6806 onoff(rc_ctl
& GEN6_RC_CTL_HW_ENABLE
),
6807 onoff(rc_ctl
& GEN6_RC_CTL_RC6_ENABLE
),
6810 if (!(I915_READ(RC6_LOCATION
) & RC6_CTX_IN_DRAM
)) {
6811 DRM_DEBUG_DRIVER("RC6 Base location not set properly.\n");
6816 * The exact context size is not known for BXT, so assume a page size
6819 rc6_ctx_base
= I915_READ(RC6_CTX_BASE
) & RC6_CTX_BASE_MASK
;
6820 if (!((rc6_ctx_base
>= dev_priv
->dsm_reserved
.start
) &&
6821 (rc6_ctx_base
+ PAGE_SIZE
< dev_priv
->dsm_reserved
.end
))) {
6822 DRM_DEBUG_DRIVER("RC6 Base address not as expected.\n");
6826 if (!(((I915_READ(PWRCTX_MAXCNT_RCSUNIT
) & IDLE_TIME_MASK
) > 1) &&
6827 ((I915_READ(PWRCTX_MAXCNT_VCSUNIT0
) & IDLE_TIME_MASK
) > 1) &&
6828 ((I915_READ(PWRCTX_MAXCNT_BCSUNIT
) & IDLE_TIME_MASK
) > 1) &&
6829 ((I915_READ(PWRCTX_MAXCNT_VECSUNIT
) & IDLE_TIME_MASK
) > 1))) {
6830 DRM_DEBUG_DRIVER("Engine Idle wait time not set properly.\n");
6834 if (!I915_READ(GEN8_PUSHBUS_CONTROL
) ||
6835 !I915_READ(GEN8_PUSHBUS_ENABLE
) ||
6836 !I915_READ(GEN8_PUSHBUS_SHIFT
)) {
6837 DRM_DEBUG_DRIVER("Pushbus not setup properly.\n");
6841 if (!I915_READ(GEN6_GFXPAUSE
)) {
6842 DRM_DEBUG_DRIVER("GFX pause not setup properly.\n");
6846 if (!I915_READ(GEN8_MISC_CTRL0
)) {
6847 DRM_DEBUG_DRIVER("GPM control not setup properly.\n");
6854 static bool sanitize_rc6(struct drm_i915_private
*i915
)
6856 struct intel_device_info
*info
= mkwrite_device_info(i915
);
6858 /* Powersaving is controlled by the host when inside a VM */
6859 if (intel_vgpu_active(i915
))
6862 if (info
->has_rc6
&&
6863 IS_GEN9_LP(i915
) && !bxt_check_bios_rc6_setup(i915
)) {
6864 DRM_INFO("RC6 disabled by BIOS\n");
6869 * We assume that we do not have any deep rc6 levels if we don't have
6870 * have the previous rc6 level supported, i.e. we use HAS_RC6()
6871 * as the initial coarse check for rc6 in general, moving on to
6872 * progressively finer/deeper levels.
6874 if (!info
->has_rc6
&& info
->has_rc6p
)
6877 return info
->has_rc6
;
6880 static void gen6_init_rps_frequencies(struct drm_i915_private
*dev_priv
)
6882 struct intel_rps
*rps
= &dev_priv
->gt_pm
.rps
;
6884 /* All of these values are in units of 50MHz */
6886 /* static values from HW: RP0 > RP1 > RPn (min_freq) */
6887 if (IS_GEN9_LP(dev_priv
)) {
6888 u32 rp_state_cap
= I915_READ(BXT_RP_STATE_CAP
);
6889 rps
->rp0_freq
= (rp_state_cap
>> 16) & 0xff;
6890 rps
->rp1_freq
= (rp_state_cap
>> 8) & 0xff;
6891 rps
->min_freq
= (rp_state_cap
>> 0) & 0xff;
6893 u32 rp_state_cap
= I915_READ(GEN6_RP_STATE_CAP
);
6894 rps
->rp0_freq
= (rp_state_cap
>> 0) & 0xff;
6895 rps
->rp1_freq
= (rp_state_cap
>> 8) & 0xff;
6896 rps
->min_freq
= (rp_state_cap
>> 16) & 0xff;
6898 /* hw_max = RP0 until we check for overclocking */
6899 rps
->max_freq
= rps
->rp0_freq
;
6901 rps
->efficient_freq
= rps
->rp1_freq
;
6902 if (IS_HASWELL(dev_priv
) || IS_BROADWELL(dev_priv
) ||
6903 IS_GEN9_BC(dev_priv
) || INTEL_GEN(dev_priv
) >= 10) {
6904 u32 ddcc_status
= 0;
6906 if (sandybridge_pcode_read(dev_priv
,
6907 HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL
,
6909 rps
->efficient_freq
=
6911 ((ddcc_status
>> 8) & 0xff),
6916 if (IS_GEN9_BC(dev_priv
) || INTEL_GEN(dev_priv
) >= 10) {
6917 /* Store the frequency values in 16.66 MHZ units, which is
6918 * the natural hardware unit for SKL
6920 rps
->rp0_freq
*= GEN9_FREQ_SCALER
;
6921 rps
->rp1_freq
*= GEN9_FREQ_SCALER
;
6922 rps
->min_freq
*= GEN9_FREQ_SCALER
;
6923 rps
->max_freq
*= GEN9_FREQ_SCALER
;
6924 rps
->efficient_freq
*= GEN9_FREQ_SCALER
;
6928 static void reset_rps(struct drm_i915_private
*dev_priv
,
6929 int (*set
)(struct drm_i915_private
*, u8
))
6931 struct intel_rps
*rps
= &dev_priv
->gt_pm
.rps
;
6932 u8 freq
= rps
->cur_freq
;
6935 rps
->power
.mode
= -1;
6938 if (set(dev_priv
, freq
))
6939 DRM_ERROR("Failed to reset RPS to initial values\n");
6942 /* See the Gen9_GT_PM_Programming_Guide doc for the below */
6943 static void gen9_enable_rps(struct drm_i915_private
*dev_priv
)
6945 intel_uncore_forcewake_get(dev_priv
, FORCEWAKE_ALL
);
6947 /* Program defaults and thresholds for RPS */
6948 if (IS_GEN(dev_priv
, 9))
6949 I915_WRITE(GEN6_RC_VIDEO_FREQ
,
6950 GEN9_FREQUENCY(dev_priv
->gt_pm
.rps
.rp1_freq
));
6952 /* 1 second timeout*/
6953 I915_WRITE(GEN6_RP_DOWN_TIMEOUT
,
6954 GT_INTERVAL_FROM_US(dev_priv
, 1000000));
6956 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS
, 0xa);
6958 /* Leaning on the below call to gen6_set_rps to program/setup the
6959 * Up/Down EI & threshold registers, as well as the RP_CONTROL,
6960 * RP_INTERRUPT_LIMITS & RPNSWREQ registers */
6961 reset_rps(dev_priv
, gen6_set_rps
);
6963 intel_uncore_forcewake_put(dev_priv
, FORCEWAKE_ALL
);
6966 static void gen9_enable_rc6(struct drm_i915_private
*dev_priv
)
6968 struct intel_engine_cs
*engine
;
6969 enum intel_engine_id id
;
6972 /* 1a: Software RC state - RC0 */
6973 I915_WRITE(GEN6_RC_STATE
, 0);
6975 /* 1b: Get forcewake during program sequence. Although the driver
6976 * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
6977 intel_uncore_forcewake_get(dev_priv
, FORCEWAKE_ALL
);
6979 /* 2a: Disable RC states. */
6980 I915_WRITE(GEN6_RC_CONTROL
, 0);
6982 /* 2b: Program RC6 thresholds.*/
6983 if (INTEL_GEN(dev_priv
) >= 10) {
6984 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT
, 54 << 16 | 85);
6985 I915_WRITE(GEN10_MEDIA_WAKE_RATE_LIMIT
, 150);
6986 } else if (IS_SKYLAKE(dev_priv
)) {
6988 * WaRsDoubleRc6WrlWithCoarsePowerGating:skl Doubling WRL only
6989 * when CPG is enabled
6991 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT
, 108 << 16);
6993 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT
, 54 << 16);
6996 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL
, 125000); /* 12500 * 1280ns */
6997 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS
, 25); /* 25 * 1280ns */
6998 for_each_engine(engine
, dev_priv
, id
)
6999 I915_WRITE(RING_MAX_IDLE(engine
->mmio_base
), 10);
7001 if (HAS_GUC(dev_priv
))
7002 I915_WRITE(GUC_MAX_IDLE_COUNT
, 0xA);
7004 I915_WRITE(GEN6_RC_SLEEP
, 0);
7007 * 2c: Program Coarse Power Gating Policies.
7009 * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we
7010 * use instead is a more conservative estimate for the maximum time
7011 * it takes us to service a CS interrupt and submit a new ELSP - that
7012 * is the time which the GPU is idle waiting for the CPU to select the
7013 * next request to execute. If the idle hysteresis is less than that
7014 * interrupt service latency, the hardware will automatically gate
7015 * the power well and we will then incur the wake up cost on top of
7016 * the service latency. A similar guide from intel_pstate is that we
7017 * do not want the enable hysteresis to less than the wakeup latency.
7019 * igt/gem_exec_nop/sequential provides a rough estimate for the
7020 * service latency, and puts it around 10us for Broadwell (and other
7021 * big core) and around 40us for Broxton (and other low power cores).
7022 * [Note that for legacy ringbuffer submission, this is less than 1us!]
7023 * However, the wakeup latency on Broxton is closer to 100us. To be
7024 * conservative, we have to factor in a context switch on top (due
7027 I915_WRITE(GEN9_MEDIA_PG_IDLE_HYSTERESIS
, 250);
7028 I915_WRITE(GEN9_RENDER_PG_IDLE_HYSTERESIS
, 250);
7030 /* 3a: Enable RC6 */
7031 I915_WRITE(GEN6_RC6_THRESHOLD
, 37500); /* 37.5/125ms per EI */
7033 /* WaRsUseTimeoutMode:cnl (pre-prod) */
7034 if (IS_CNL_REVID(dev_priv
, CNL_REVID_A0
, CNL_REVID_C0
))
7035 rc6_mode
= GEN7_RC_CTL_TO_MODE
;
7037 rc6_mode
= GEN6_RC_CTL_EI_MODE(1);
7039 I915_WRITE(GEN6_RC_CONTROL
,
7040 GEN6_RC_CTL_HW_ENABLE
|
7041 GEN6_RC_CTL_RC6_ENABLE
|
7045 * 3b: Enable Coarse Power Gating only when RC6 is enabled.
7046 * WaRsDisableCoarsePowerGating:skl,cnl - Render/Media PG need to be disabled with RC6.
7048 if (NEEDS_WaRsDisableCoarsePowerGating(dev_priv
))
7049 I915_WRITE(GEN9_PG_ENABLE
, 0);
7051 I915_WRITE(GEN9_PG_ENABLE
,
7052 GEN9_RENDER_PG_ENABLE
| GEN9_MEDIA_PG_ENABLE
);
7054 intel_uncore_forcewake_put(dev_priv
, FORCEWAKE_ALL
);
7057 static void gen8_enable_rc6(struct drm_i915_private
*dev_priv
)
7059 struct intel_engine_cs
*engine
;
7060 enum intel_engine_id id
;
7062 /* 1a: Software RC state - RC0 */
7063 I915_WRITE(GEN6_RC_STATE
, 0);
7065 /* 1b: Get forcewake during program sequence. Although the driver
7066 * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
7067 intel_uncore_forcewake_get(dev_priv
, FORCEWAKE_ALL
);
7069 /* 2a: Disable RC states. */
7070 I915_WRITE(GEN6_RC_CONTROL
, 0);
7072 /* 2b: Program RC6 thresholds.*/
7073 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT
, 40 << 16);
7074 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL
, 125000); /* 12500 * 1280ns */
7075 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS
, 25); /* 25 * 1280ns */
7076 for_each_engine(engine
, dev_priv
, id
)
7077 I915_WRITE(RING_MAX_IDLE(engine
->mmio_base
), 10);
7078 I915_WRITE(GEN6_RC_SLEEP
, 0);
7079 I915_WRITE(GEN6_RC6_THRESHOLD
, 625); /* 800us/1.28 for TO */
7083 I915_WRITE(GEN6_RC_CONTROL
,
7084 GEN6_RC_CTL_HW_ENABLE
|
7085 GEN7_RC_CTL_TO_MODE
|
7086 GEN6_RC_CTL_RC6_ENABLE
);
7088 intel_uncore_forcewake_put(dev_priv
, FORCEWAKE_ALL
);
7091 static void gen8_enable_rps(struct drm_i915_private
*dev_priv
)
7093 struct intel_rps
*rps
= &dev_priv
->gt_pm
.rps
;
7095 intel_uncore_forcewake_get(dev_priv
, FORCEWAKE_ALL
);
7097 /* 1 Program defaults and thresholds for RPS*/
7098 I915_WRITE(GEN6_RPNSWREQ
,
7099 HSW_FREQUENCY(rps
->rp1_freq
));
7100 I915_WRITE(GEN6_RC_VIDEO_FREQ
,
7101 HSW_FREQUENCY(rps
->rp1_freq
));
7102 /* NB: Docs say 1s, and 1000000 - which aren't equivalent */
7103 I915_WRITE(GEN6_RP_DOWN_TIMEOUT
, 100000000 / 128); /* 1 second timeout */
7105 /* Docs recommend 900MHz, and 300 MHz respectively */
7106 I915_WRITE(GEN6_RP_INTERRUPT_LIMITS
,
7107 rps
->max_freq_softlimit
<< 24 |
7108 rps
->min_freq_softlimit
<< 16);
7110 I915_WRITE(GEN6_RP_UP_THRESHOLD
, 7600000 / 128); /* 76ms busyness per EI, 90% */
7111 I915_WRITE(GEN6_RP_DOWN_THRESHOLD
, 31300000 / 128); /* 313ms busyness per EI, 70%*/
7112 I915_WRITE(GEN6_RP_UP_EI
, 66000); /* 84.48ms, XXX: random? */
7113 I915_WRITE(GEN6_RP_DOWN_EI
, 350000); /* 448ms, XXX: random? */
7115 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS
, 10);
7118 I915_WRITE(GEN6_RP_CONTROL
,
7119 GEN6_RP_MEDIA_TURBO
|
7120 GEN6_RP_MEDIA_HW_NORMAL_MODE
|
7121 GEN6_RP_MEDIA_IS_GFX
|
7123 GEN6_RP_UP_BUSY_AVG
|
7124 GEN6_RP_DOWN_IDLE_AVG
);
7126 reset_rps(dev_priv
, gen6_set_rps
);
7128 intel_uncore_forcewake_put(dev_priv
, FORCEWAKE_ALL
);
7131 static void gen6_enable_rc6(struct drm_i915_private
*dev_priv
)
7133 struct intel_engine_cs
*engine
;
7134 enum intel_engine_id id
;
7135 u32 rc6vids
, rc6_mask
;
7139 I915_WRITE(GEN6_RC_STATE
, 0);
7141 /* Clear the DBG now so we don't confuse earlier errors */
7142 gtfifodbg
= I915_READ(GTFIFODBG
);
7144 DRM_ERROR("GT fifo had a previous error %x\n", gtfifodbg
);
7145 I915_WRITE(GTFIFODBG
, gtfifodbg
);
7148 intel_uncore_forcewake_get(dev_priv
, FORCEWAKE_ALL
);
7150 /* disable the counters and set deterministic thresholds */
7151 I915_WRITE(GEN6_RC_CONTROL
, 0);
7153 I915_WRITE(GEN6_RC1_WAKE_RATE_LIMIT
, 1000 << 16);
7154 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT
, 40 << 16 | 30);
7155 I915_WRITE(GEN6_RC6pp_WAKE_RATE_LIMIT
, 30);
7156 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL
, 125000);
7157 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS
, 25);
7159 for_each_engine(engine
, dev_priv
, id
)
7160 I915_WRITE(RING_MAX_IDLE(engine
->mmio_base
), 10);
7162 I915_WRITE(GEN6_RC_SLEEP
, 0);
7163 I915_WRITE(GEN6_RC1e_THRESHOLD
, 1000);
7164 if (IS_IVYBRIDGE(dev_priv
))
7165 I915_WRITE(GEN6_RC6_THRESHOLD
, 125000);
7167 I915_WRITE(GEN6_RC6_THRESHOLD
, 50000);
7168 I915_WRITE(GEN6_RC6p_THRESHOLD
, 150000);
7169 I915_WRITE(GEN6_RC6pp_THRESHOLD
, 64000); /* unused */
7171 /* We don't use those on Haswell */
7172 rc6_mask
= GEN6_RC_CTL_RC6_ENABLE
;
7173 if (HAS_RC6p(dev_priv
))
7174 rc6_mask
|= GEN6_RC_CTL_RC6p_ENABLE
;
7175 if (HAS_RC6pp(dev_priv
))
7176 rc6_mask
|= GEN6_RC_CTL_RC6pp_ENABLE
;
7177 I915_WRITE(GEN6_RC_CONTROL
,
7179 GEN6_RC_CTL_EI_MODE(1) |
7180 GEN6_RC_CTL_HW_ENABLE
);
7183 ret
= sandybridge_pcode_read(dev_priv
, GEN6_PCODE_READ_RC6VIDS
, &rc6vids
);
7184 if (IS_GEN(dev_priv
, 6) && ret
) {
7185 DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n");
7186 } else if (IS_GEN(dev_priv
, 6) && (GEN6_DECODE_RC6_VID(rc6vids
& 0xff) < 450)) {
7187 DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n",
7188 GEN6_DECODE_RC6_VID(rc6vids
& 0xff), 450);
7189 rc6vids
&= 0xffff00;
7190 rc6vids
|= GEN6_ENCODE_RC6_VID(450);
7191 ret
= sandybridge_pcode_write(dev_priv
, GEN6_PCODE_WRITE_RC6VIDS
, rc6vids
);
7193 DRM_ERROR("Couldn't fix incorrect rc6 voltage\n");
7196 intel_uncore_forcewake_put(dev_priv
, FORCEWAKE_ALL
);
7199 static void gen6_enable_rps(struct drm_i915_private
*dev_priv
)
7201 /* Here begins a magic sequence of register writes to enable
7202 * auto-downclocking.
7204 * Perhaps there might be some value in exposing these to
7207 intel_uncore_forcewake_get(dev_priv
, FORCEWAKE_ALL
);
7209 /* Power down if completely idle for over 50ms */
7210 I915_WRITE(GEN6_RP_DOWN_TIMEOUT
, 50000);
7211 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS
, 10);
7213 reset_rps(dev_priv
, gen6_set_rps
);
7215 intel_uncore_forcewake_put(dev_priv
, FORCEWAKE_ALL
);
7218 static void gen6_update_ring_freq(struct drm_i915_private
*dev_priv
)
7220 struct intel_rps
*rps
= &dev_priv
->gt_pm
.rps
;
7221 const int min_freq
= 15;
7222 const int scaling_factor
= 180;
7223 unsigned int gpu_freq
;
7224 unsigned int max_ia_freq
, min_ring_freq
;
7225 unsigned int max_gpu_freq
, min_gpu_freq
;
7226 struct cpufreq_policy
*policy
;
7228 WARN_ON(!mutex_is_locked(&dev_priv
->pcu_lock
));
7230 if (rps
->max_freq
<= rps
->min_freq
)
7233 policy
= cpufreq_cpu_get(0);
7235 max_ia_freq
= policy
->cpuinfo
.max_freq
;
7236 cpufreq_cpu_put(policy
);
7239 * Default to measured freq if none found, PCU will ensure we
7242 max_ia_freq
= tsc_khz
;
7245 /* Convert from kHz to MHz */
7246 max_ia_freq
/= 1000;
7248 min_ring_freq
= I915_READ(DCLK
) & 0xf;
7249 /* convert DDR frequency from units of 266.6MHz to bandwidth */
7250 min_ring_freq
= mult_frac(min_ring_freq
, 8, 3);
7252 min_gpu_freq
= rps
->min_freq
;
7253 max_gpu_freq
= rps
->max_freq
;
7254 if (IS_GEN9_BC(dev_priv
) || INTEL_GEN(dev_priv
) >= 10) {
7255 /* Convert GT frequency to 50 HZ units */
7256 min_gpu_freq
/= GEN9_FREQ_SCALER
;
7257 max_gpu_freq
/= GEN9_FREQ_SCALER
;
7261 * For each potential GPU frequency, load a ring frequency we'd like
7262 * to use for memory access. We do this by specifying the IA frequency
7263 * the PCU should use as a reference to determine the ring frequency.
7265 for (gpu_freq
= max_gpu_freq
; gpu_freq
>= min_gpu_freq
; gpu_freq
--) {
7266 const int diff
= max_gpu_freq
- gpu_freq
;
7267 unsigned int ia_freq
= 0, ring_freq
= 0;
7269 if (IS_GEN9_BC(dev_priv
) || INTEL_GEN(dev_priv
) >= 10) {
7271 * ring_freq = 2 * GT. ring_freq is in 100MHz units
7272 * No floor required for ring frequency on SKL.
7274 ring_freq
= gpu_freq
;
7275 } else if (INTEL_GEN(dev_priv
) >= 8) {
7276 /* max(2 * GT, DDR). NB: GT is 50MHz units */
7277 ring_freq
= max(min_ring_freq
, gpu_freq
);
7278 } else if (IS_HASWELL(dev_priv
)) {
7279 ring_freq
= mult_frac(gpu_freq
, 5, 4);
7280 ring_freq
= max(min_ring_freq
, ring_freq
);
7281 /* leave ia_freq as the default, chosen by cpufreq */
7283 /* On older processors, there is no separate ring
7284 * clock domain, so in order to boost the bandwidth
7285 * of the ring, we need to upclock the CPU (ia_freq).
7287 * For GPU frequencies less than 750MHz,
7288 * just use the lowest ring freq.
7290 if (gpu_freq
< min_freq
)
7293 ia_freq
= max_ia_freq
- ((diff
* scaling_factor
) / 2);
7294 ia_freq
= DIV_ROUND_CLOSEST(ia_freq
, 100);
7297 sandybridge_pcode_write(dev_priv
,
7298 GEN6_PCODE_WRITE_MIN_FREQ_TABLE
,
7299 ia_freq
<< GEN6_PCODE_FREQ_IA_RATIO_SHIFT
|
7300 ring_freq
<< GEN6_PCODE_FREQ_RING_RATIO_SHIFT
|
7305 static int cherryview_rps_max_freq(struct drm_i915_private
*dev_priv
)
7309 val
= vlv_punit_read(dev_priv
, FB_GFX_FMAX_AT_VMAX_FUSE
);
7311 switch (RUNTIME_INFO(dev_priv
)->sseu
.eu_total
) {
7313 /* (2 * 4) config */
7314 rp0
= (val
>> FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT
);
7317 /* (2 * 6) config */
7318 rp0
= (val
>> FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT
);
7321 /* (2 * 8) config */
7323 /* Setting (2 * 8) Min RP0 for any other combination */
7324 rp0
= (val
>> FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT
);
7328 rp0
= (rp0
& FB_GFX_FREQ_FUSE_MASK
);
7333 static int cherryview_rps_rpe_freq(struct drm_i915_private
*dev_priv
)
7337 val
= vlv_punit_read(dev_priv
, PUNIT_GPU_DUTYCYCLE_REG
);
7338 rpe
= (val
>> PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT
) & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK
;
7343 static int cherryview_rps_guar_freq(struct drm_i915_private
*dev_priv
)
7347 val
= vlv_punit_read(dev_priv
, FB_GFX_FMAX_AT_VMAX_FUSE
);
7348 rp1
= (val
& FB_GFX_FREQ_FUSE_MASK
);
7353 static u32
cherryview_rps_min_freq(struct drm_i915_private
*dev_priv
)
7357 val
= vlv_punit_read(dev_priv
, FB_GFX_FMIN_AT_VMIN_FUSE
);
7358 rpn
= ((val
>> FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT
) &
7359 FB_GFX_FREQ_FUSE_MASK
);
7364 static int valleyview_rps_guar_freq(struct drm_i915_private
*dev_priv
)
7368 val
= vlv_nc_read(dev_priv
, IOSF_NC_FB_GFX_FREQ_FUSE
);
7370 rp1
= (val
& FB_GFX_FGUARANTEED_FREQ_FUSE_MASK
) >> FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT
;
7375 static int valleyview_rps_max_freq(struct drm_i915_private
*dev_priv
)
7379 val
= vlv_nc_read(dev_priv
, IOSF_NC_FB_GFX_FREQ_FUSE
);
7381 rp0
= (val
& FB_GFX_MAX_FREQ_FUSE_MASK
) >> FB_GFX_MAX_FREQ_FUSE_SHIFT
;
7383 rp0
= min_t(u32
, rp0
, 0xea);
7388 static int valleyview_rps_rpe_freq(struct drm_i915_private
*dev_priv
)
7392 val
= vlv_nc_read(dev_priv
, IOSF_NC_FB_GFX_FMAX_FUSE_LO
);
7393 rpe
= (val
& FB_FMAX_VMIN_FREQ_LO_MASK
) >> FB_FMAX_VMIN_FREQ_LO_SHIFT
;
7394 val
= vlv_nc_read(dev_priv
, IOSF_NC_FB_GFX_FMAX_FUSE_HI
);
7395 rpe
|= (val
& FB_FMAX_VMIN_FREQ_HI_MASK
) << 5;
7400 static int valleyview_rps_min_freq(struct drm_i915_private
*dev_priv
)
7404 val
= vlv_punit_read(dev_priv
, PUNIT_REG_GPU_LFM
) & 0xff;
7406 * According to the BYT Punit GPU turbo HAS 1.1.6.3 the minimum value
7407 * for the minimum frequency in GPLL mode is 0xc1. Contrary to this on
7408 * a BYT-M B0 the above register contains 0xbf. Moreover when setting
7409 * a frequency Punit will not allow values below 0xc0. Clamp it 0xc0
7410 * to make sure it matches what Punit accepts.
7412 return max_t(u32
, val
, 0xc0);
7415 /* Check that the pctx buffer wasn't move under us. */
7416 static void valleyview_check_pctx(struct drm_i915_private
*dev_priv
)
7418 unsigned long pctx_addr
= I915_READ(VLV_PCBR
) & ~4095;
7420 WARN_ON(pctx_addr
!= dev_priv
->dsm
.start
+
7421 dev_priv
->vlv_pctx
->stolen
->start
);
7425 /* Check that the pcbr address is not empty. */
7426 static void cherryview_check_pctx(struct drm_i915_private
*dev_priv
)
7428 unsigned long pctx_addr
= I915_READ(VLV_PCBR
) & ~4095;
7430 WARN_ON((pctx_addr
>> VLV_PCBR_ADDR_SHIFT
) == 0);
7433 static void cherryview_setup_pctx(struct drm_i915_private
*dev_priv
)
7435 resource_size_t pctx_paddr
, paddr
;
7436 resource_size_t pctx_size
= 32*1024;
7439 pcbr
= I915_READ(VLV_PCBR
);
7440 if ((pcbr
>> VLV_PCBR_ADDR_SHIFT
) == 0) {
7441 DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
7442 paddr
= dev_priv
->dsm
.end
+ 1 - pctx_size
;
7443 GEM_BUG_ON(paddr
> U32_MAX
);
7445 pctx_paddr
= (paddr
& (~4095));
7446 I915_WRITE(VLV_PCBR
, pctx_paddr
);
7449 DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR
));
7452 static void valleyview_setup_pctx(struct drm_i915_private
*dev_priv
)
7454 struct drm_i915_gem_object
*pctx
;
7455 resource_size_t pctx_paddr
;
7456 resource_size_t pctx_size
= 24*1024;
7459 pcbr
= I915_READ(VLV_PCBR
);
7461 /* BIOS set it up already, grab the pre-alloc'd space */
7462 resource_size_t pcbr_offset
;
7464 pcbr_offset
= (pcbr
& (~4095)) - dev_priv
->dsm
.start
;
7465 pctx
= i915_gem_object_create_stolen_for_preallocated(dev_priv
,
7467 I915_GTT_OFFSET_NONE
,
7472 DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
7475 * From the Gunit register HAS:
7476 * The Gfx driver is expected to program this register and ensure
7477 * proper allocation within Gfx stolen memory. For example, this
7478 * register should be programmed such than the PCBR range does not
7479 * overlap with other ranges, such as the frame buffer, protected
7480 * memory, or any other relevant ranges.
7482 pctx
= i915_gem_object_create_stolen(dev_priv
, pctx_size
);
7484 DRM_DEBUG("not enough stolen space for PCTX, disabling\n");
7488 GEM_BUG_ON(range_overflows_t(u64
,
7489 dev_priv
->dsm
.start
,
7490 pctx
->stolen
->start
,
7492 pctx_paddr
= dev_priv
->dsm
.start
+ pctx
->stolen
->start
;
7493 I915_WRITE(VLV_PCBR
, pctx_paddr
);
7496 DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR
));
7497 dev_priv
->vlv_pctx
= pctx
;
7500 static void valleyview_cleanup_pctx(struct drm_i915_private
*dev_priv
)
7502 struct drm_i915_gem_object
*pctx
;
7504 pctx
= fetch_and_zero(&dev_priv
->vlv_pctx
);
7506 i915_gem_object_put(pctx
);
7509 static void vlv_init_gpll_ref_freq(struct drm_i915_private
*dev_priv
)
7511 dev_priv
->gt_pm
.rps
.gpll_ref_freq
=
7512 vlv_get_cck_clock(dev_priv
, "GPLL ref",
7513 CCK_GPLL_CLOCK_CONTROL
,
7514 dev_priv
->czclk_freq
);
7516 DRM_DEBUG_DRIVER("GPLL reference freq: %d kHz\n",
7517 dev_priv
->gt_pm
.rps
.gpll_ref_freq
);
7520 static void valleyview_init_gt_powersave(struct drm_i915_private
*dev_priv
)
7522 struct intel_rps
*rps
= &dev_priv
->gt_pm
.rps
;
7525 valleyview_setup_pctx(dev_priv
);
7527 vlv_init_gpll_ref_freq(dev_priv
);
7529 val
= vlv_punit_read(dev_priv
, PUNIT_REG_GPU_FREQ_STS
);
7530 switch ((val
>> 6) & 3) {
7533 dev_priv
->mem_freq
= 800;
7536 dev_priv
->mem_freq
= 1066;
7539 dev_priv
->mem_freq
= 1333;
7542 DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv
->mem_freq
);
7544 rps
->max_freq
= valleyview_rps_max_freq(dev_priv
);
7545 rps
->rp0_freq
= rps
->max_freq
;
7546 DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
7547 intel_gpu_freq(dev_priv
, rps
->max_freq
),
7550 rps
->efficient_freq
= valleyview_rps_rpe_freq(dev_priv
);
7551 DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
7552 intel_gpu_freq(dev_priv
, rps
->efficient_freq
),
7553 rps
->efficient_freq
);
7555 rps
->rp1_freq
= valleyview_rps_guar_freq(dev_priv
);
7556 DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n",
7557 intel_gpu_freq(dev_priv
, rps
->rp1_freq
),
7560 rps
->min_freq
= valleyview_rps_min_freq(dev_priv
);
7561 DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
7562 intel_gpu_freq(dev_priv
, rps
->min_freq
),
7566 static void cherryview_init_gt_powersave(struct drm_i915_private
*dev_priv
)
7568 struct intel_rps
*rps
= &dev_priv
->gt_pm
.rps
;
7571 cherryview_setup_pctx(dev_priv
);
7573 vlv_init_gpll_ref_freq(dev_priv
);
7575 mutex_lock(&dev_priv
->sb_lock
);
7576 val
= vlv_cck_read(dev_priv
, CCK_FUSE_REG
);
7577 mutex_unlock(&dev_priv
->sb_lock
);
7579 switch ((val
>> 2) & 0x7) {
7581 dev_priv
->mem_freq
= 2000;
7584 dev_priv
->mem_freq
= 1600;
7587 DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv
->mem_freq
);
7589 rps
->max_freq
= cherryview_rps_max_freq(dev_priv
);
7590 rps
->rp0_freq
= rps
->max_freq
;
7591 DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
7592 intel_gpu_freq(dev_priv
, rps
->max_freq
),
7595 rps
->efficient_freq
= cherryview_rps_rpe_freq(dev_priv
);
7596 DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
7597 intel_gpu_freq(dev_priv
, rps
->efficient_freq
),
7598 rps
->efficient_freq
);
7600 rps
->rp1_freq
= cherryview_rps_guar_freq(dev_priv
);
7601 DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n",
7602 intel_gpu_freq(dev_priv
, rps
->rp1_freq
),
7605 rps
->min_freq
= cherryview_rps_min_freq(dev_priv
);
7606 DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
7607 intel_gpu_freq(dev_priv
, rps
->min_freq
),
7610 WARN_ONCE((rps
->max_freq
| rps
->efficient_freq
| rps
->rp1_freq
|
7612 "Odd GPU freq values\n");
7615 static void valleyview_cleanup_gt_powersave(struct drm_i915_private
*dev_priv
)
7617 valleyview_cleanup_pctx(dev_priv
);
7620 static void cherryview_enable_rc6(struct drm_i915_private
*dev_priv
)
7622 struct intel_engine_cs
*engine
;
7623 enum intel_engine_id id
;
7624 u32 gtfifodbg
, rc6_mode
, pcbr
;
7626 gtfifodbg
= I915_READ(GTFIFODBG
) & ~(GT_FIFO_SBDEDICATE_FREE_ENTRY_CHV
|
7627 GT_FIFO_FREE_ENTRIES_CHV
);
7629 DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n",
7631 I915_WRITE(GTFIFODBG
, gtfifodbg
);
7634 cherryview_check_pctx(dev_priv
);
7636 /* 1a & 1b: Get forcewake during program sequence. Although the driver
7637 * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
7638 intel_uncore_forcewake_get(dev_priv
, FORCEWAKE_ALL
);
7640 /* Disable RC states. */
7641 I915_WRITE(GEN6_RC_CONTROL
, 0);
7643 /* 2a: Program RC6 thresholds.*/
7644 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT
, 40 << 16);
7645 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL
, 125000); /* 12500 * 1280ns */
7646 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS
, 25); /* 25 * 1280ns */
7648 for_each_engine(engine
, dev_priv
, id
)
7649 I915_WRITE(RING_MAX_IDLE(engine
->mmio_base
), 10);
7650 I915_WRITE(GEN6_RC_SLEEP
, 0);
7652 /* TO threshold set to 500 us ( 0x186 * 1.28 us) */
7653 I915_WRITE(GEN6_RC6_THRESHOLD
, 0x186);
7655 /* Allows RC6 residency counter to work */
7656 I915_WRITE(VLV_COUNTER_CONTROL
,
7657 _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH
|
7658 VLV_MEDIA_RC6_COUNT_EN
|
7659 VLV_RENDER_RC6_COUNT_EN
));
7661 /* For now we assume BIOS is allocating and populating the PCBR */
7662 pcbr
= I915_READ(VLV_PCBR
);
7666 if (pcbr
>> VLV_PCBR_ADDR_SHIFT
)
7667 rc6_mode
= GEN7_RC_CTL_TO_MODE
;
7668 I915_WRITE(GEN6_RC_CONTROL
, rc6_mode
);
7670 intel_uncore_forcewake_put(dev_priv
, FORCEWAKE_ALL
);
7673 static void cherryview_enable_rps(struct drm_i915_private
*dev_priv
)
7677 intel_uncore_forcewake_get(dev_priv
, FORCEWAKE_ALL
);
7679 /* 1: Program defaults and thresholds for RPS*/
7680 I915_WRITE(GEN6_RP_DOWN_TIMEOUT
, 1000000);
7681 I915_WRITE(GEN6_RP_UP_THRESHOLD
, 59400);
7682 I915_WRITE(GEN6_RP_DOWN_THRESHOLD
, 245000);
7683 I915_WRITE(GEN6_RP_UP_EI
, 66000);
7684 I915_WRITE(GEN6_RP_DOWN_EI
, 350000);
7686 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS
, 10);
7689 I915_WRITE(GEN6_RP_CONTROL
,
7690 GEN6_RP_MEDIA_HW_NORMAL_MODE
|
7691 GEN6_RP_MEDIA_IS_GFX
|
7693 GEN6_RP_UP_BUSY_AVG
|
7694 GEN6_RP_DOWN_IDLE_AVG
);
7696 /* Setting Fixed Bias */
7697 val
= VLV_OVERRIDE_EN
|
7699 CHV_BIAS_CPU_50_SOC_50
;
7700 vlv_punit_write(dev_priv
, VLV_TURBO_SOC_OVERRIDE
, val
);
7702 val
= vlv_punit_read(dev_priv
, PUNIT_REG_GPU_FREQ_STS
);
7704 /* RPS code assumes GPLL is used */
7705 WARN_ONCE((val
& GPLLENABLE
) == 0, "GPLL not enabled\n");
7707 DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val
& GPLLENABLE
));
7708 DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val
);
7710 reset_rps(dev_priv
, valleyview_set_rps
);
7712 intel_uncore_forcewake_put(dev_priv
, FORCEWAKE_ALL
);
7715 static void valleyview_enable_rc6(struct drm_i915_private
*dev_priv
)
7717 struct intel_engine_cs
*engine
;
7718 enum intel_engine_id id
;
7721 valleyview_check_pctx(dev_priv
);
7723 gtfifodbg
= I915_READ(GTFIFODBG
);
7725 DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n",
7727 I915_WRITE(GTFIFODBG
, gtfifodbg
);
7730 intel_uncore_forcewake_get(dev_priv
, FORCEWAKE_ALL
);
7732 /* Disable RC states. */
7733 I915_WRITE(GEN6_RC_CONTROL
, 0);
7735 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT
, 0x00280000);
7736 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL
, 125000);
7737 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS
, 25);
7739 for_each_engine(engine
, dev_priv
, id
)
7740 I915_WRITE(RING_MAX_IDLE(engine
->mmio_base
), 10);
7742 I915_WRITE(GEN6_RC6_THRESHOLD
, 0x557);
7744 /* Allows RC6 residency counter to work */
7745 I915_WRITE(VLV_COUNTER_CONTROL
,
7746 _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH
|
7747 VLV_MEDIA_RC0_COUNT_EN
|
7748 VLV_RENDER_RC0_COUNT_EN
|
7749 VLV_MEDIA_RC6_COUNT_EN
|
7750 VLV_RENDER_RC6_COUNT_EN
));
7752 I915_WRITE(GEN6_RC_CONTROL
,
7753 GEN7_RC_CTL_TO_MODE
| VLV_RC_CTL_CTX_RST_PARALLEL
);
7755 intel_uncore_forcewake_put(dev_priv
, FORCEWAKE_ALL
);
7758 static void valleyview_enable_rps(struct drm_i915_private
*dev_priv
)
7762 intel_uncore_forcewake_get(dev_priv
, FORCEWAKE_ALL
);
7764 I915_WRITE(GEN6_RP_DOWN_TIMEOUT
, 1000000);
7765 I915_WRITE(GEN6_RP_UP_THRESHOLD
, 59400);
7766 I915_WRITE(GEN6_RP_DOWN_THRESHOLD
, 245000);
7767 I915_WRITE(GEN6_RP_UP_EI
, 66000);
7768 I915_WRITE(GEN6_RP_DOWN_EI
, 350000);
7770 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS
, 10);
7772 I915_WRITE(GEN6_RP_CONTROL
,
7773 GEN6_RP_MEDIA_TURBO
|
7774 GEN6_RP_MEDIA_HW_NORMAL_MODE
|
7775 GEN6_RP_MEDIA_IS_GFX
|
7777 GEN6_RP_UP_BUSY_AVG
|
7778 GEN6_RP_DOWN_IDLE_CONT
);
7780 /* Setting Fixed Bias */
7781 val
= VLV_OVERRIDE_EN
|
7783 VLV_BIAS_CPU_125_SOC_875
;
7784 vlv_punit_write(dev_priv
, VLV_TURBO_SOC_OVERRIDE
, val
);
7786 val
= vlv_punit_read(dev_priv
, PUNIT_REG_GPU_FREQ_STS
);
7788 /* RPS code assumes GPLL is used */
7789 WARN_ONCE((val
& GPLLENABLE
) == 0, "GPLL not enabled\n");
7791 DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val
& GPLLENABLE
));
7792 DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val
);
7794 reset_rps(dev_priv
, valleyview_set_rps
);
7796 intel_uncore_forcewake_put(dev_priv
, FORCEWAKE_ALL
);
7799 static unsigned long intel_pxfreq(u32 vidfreq
)
7802 int div
= (vidfreq
& 0x3f0000) >> 16;
7803 int post
= (vidfreq
& 0x3000) >> 12;
7804 int pre
= (vidfreq
& 0x7);
7809 freq
= ((div
* 133333) / ((1<<post
) * pre
));
7814 static const struct cparams
{
7820 { 1, 1333, 301, 28664 },
7821 { 1, 1066, 294, 24460 },
7822 { 1, 800, 294, 25192 },
7823 { 0, 1333, 276, 27605 },
7824 { 0, 1066, 276, 27605 },
7825 { 0, 800, 231, 23784 },
7828 static unsigned long __i915_chipset_val(struct drm_i915_private
*dev_priv
)
7830 u64 total_count
, diff
, ret
;
7831 u32 count1
, count2
, count3
, m
= 0, c
= 0;
7832 unsigned long now
= jiffies_to_msecs(jiffies
), diff1
;
7835 lockdep_assert_held(&mchdev_lock
);
7837 diff1
= now
- dev_priv
->ips
.last_time1
;
7839 /* Prevent division-by-zero if we are asking too fast.
7840 * Also, we don't get interesting results if we are polling
7841 * faster than once in 10ms, so just return the saved value
7845 return dev_priv
->ips
.chipset_power
;
7847 count1
= I915_READ(DMIEC
);
7848 count2
= I915_READ(DDREC
);
7849 count3
= I915_READ(CSIEC
);
7851 total_count
= count1
+ count2
+ count3
;
7853 /* FIXME: handle per-counter overflow */
7854 if (total_count
< dev_priv
->ips
.last_count1
) {
7855 diff
= ~0UL - dev_priv
->ips
.last_count1
;
7856 diff
+= total_count
;
7858 diff
= total_count
- dev_priv
->ips
.last_count1
;
7861 for (i
= 0; i
< ARRAY_SIZE(cparams
); i
++) {
7862 if (cparams
[i
].i
== dev_priv
->ips
.c_m
&&
7863 cparams
[i
].t
== dev_priv
->ips
.r_t
) {
7870 diff
= div_u64(diff
, diff1
);
7871 ret
= ((m
* diff
) + c
);
7872 ret
= div_u64(ret
, 10);
7874 dev_priv
->ips
.last_count1
= total_count
;
7875 dev_priv
->ips
.last_time1
= now
;
7877 dev_priv
->ips
.chipset_power
= ret
;
7882 unsigned long i915_chipset_val(struct drm_i915_private
*dev_priv
)
7884 intel_wakeref_t wakeref
;
7885 unsigned long val
= 0;
7887 if (!IS_GEN(dev_priv
, 5))
7890 with_intel_runtime_pm(dev_priv
, wakeref
) {
7891 spin_lock_irq(&mchdev_lock
);
7892 val
= __i915_chipset_val(dev_priv
);
7893 spin_unlock_irq(&mchdev_lock
);
7899 unsigned long i915_mch_val(struct drm_i915_private
*dev_priv
)
7901 unsigned long m
, x
, b
;
7904 tsfs
= I915_READ(TSFS
);
7906 m
= ((tsfs
& TSFS_SLOPE_MASK
) >> TSFS_SLOPE_SHIFT
);
7907 x
= I915_READ8(TR1
);
7909 b
= tsfs
& TSFS_INTR_MASK
;
7911 return ((m
* x
) / 127) - b
;
7914 static int _pxvid_to_vd(u8 pxvid
)
7919 if (pxvid
>= 8 && pxvid
< 31)
7922 return (pxvid
+ 2) * 125;
7925 static u32
pvid_to_extvid(struct drm_i915_private
*dev_priv
, u8 pxvid
)
7927 const int vd
= _pxvid_to_vd(pxvid
);
7928 const int vm
= vd
- 1125;
7930 if (INTEL_INFO(dev_priv
)->is_mobile
)
7931 return vm
> 0 ? vm
: 0;
7936 static void __i915_update_gfx_val(struct drm_i915_private
*dev_priv
)
7938 u64 now
, diff
, diffms
;
7941 lockdep_assert_held(&mchdev_lock
);
7943 now
= ktime_get_raw_ns();
7944 diffms
= now
- dev_priv
->ips
.last_time2
;
7945 do_div(diffms
, NSEC_PER_MSEC
);
7947 /* Don't divide by 0 */
7951 count
= I915_READ(GFXEC
);
7953 if (count
< dev_priv
->ips
.last_count2
) {
7954 diff
= ~0UL - dev_priv
->ips
.last_count2
;
7957 diff
= count
- dev_priv
->ips
.last_count2
;
7960 dev_priv
->ips
.last_count2
= count
;
7961 dev_priv
->ips
.last_time2
= now
;
7963 /* More magic constants... */
7965 diff
= div_u64(diff
, diffms
* 10);
7966 dev_priv
->ips
.gfx_power
= diff
;
7969 void i915_update_gfx_val(struct drm_i915_private
*dev_priv
)
7971 intel_wakeref_t wakeref
;
7973 if (!IS_GEN(dev_priv
, 5))
7976 with_intel_runtime_pm(dev_priv
, wakeref
) {
7977 spin_lock_irq(&mchdev_lock
);
7978 __i915_update_gfx_val(dev_priv
);
7979 spin_unlock_irq(&mchdev_lock
);
7983 static unsigned long __i915_gfx_val(struct drm_i915_private
*dev_priv
)
7985 unsigned long t
, corr
, state1
, corr2
, state2
;
7988 lockdep_assert_held(&mchdev_lock
);
7990 pxvid
= I915_READ(PXVFREQ(dev_priv
->gt_pm
.rps
.cur_freq
));
7991 pxvid
= (pxvid
>> 24) & 0x7f;
7992 ext_v
= pvid_to_extvid(dev_priv
, pxvid
);
7996 t
= i915_mch_val(dev_priv
);
7998 /* Revel in the empirically derived constants */
8000 /* Correction factor in 1/100000 units */
8002 corr
= ((t
* 2349) + 135940);
8004 corr
= ((t
* 964) + 29317);
8006 corr
= ((t
* 301) + 1004);
8008 corr
= corr
* ((150142 * state1
) / 10000 - 78642);
8010 corr2
= (corr
* dev_priv
->ips
.corr
);
8012 state2
= (corr2
* state1
) / 10000;
8013 state2
/= 100; /* convert to mW */
8015 __i915_update_gfx_val(dev_priv
);
8017 return dev_priv
->ips
.gfx_power
+ state2
;
8020 unsigned long i915_gfx_val(struct drm_i915_private
*dev_priv
)
8022 intel_wakeref_t wakeref
;
8023 unsigned long val
= 0;
8025 if (!IS_GEN(dev_priv
, 5))
8028 with_intel_runtime_pm(dev_priv
, wakeref
) {
8029 spin_lock_irq(&mchdev_lock
);
8030 val
= __i915_gfx_val(dev_priv
);
8031 spin_unlock_irq(&mchdev_lock
);
8037 static struct drm_i915_private
*i915_mch_dev
;
8039 static struct drm_i915_private
*mchdev_get(void)
8041 struct drm_i915_private
*i915
;
8044 i915
= i915_mch_dev
;
8045 if (!kref_get_unless_zero(&i915
->drm
.ref
))
8053 * i915_read_mch_val - return value for IPS use
8055 * Calculate and return a value for the IPS driver to use when deciding whether
8056 * we have thermal and power headroom to increase CPU or GPU power budget.
8058 unsigned long i915_read_mch_val(void)
8060 struct drm_i915_private
*i915
;
8061 unsigned long chipset_val
= 0;
8062 unsigned long graphics_val
= 0;
8063 intel_wakeref_t wakeref
;
8065 i915
= mchdev_get();
8069 with_intel_runtime_pm(i915
, wakeref
) {
8070 spin_lock_irq(&mchdev_lock
);
8071 chipset_val
= __i915_chipset_val(i915
);
8072 graphics_val
= __i915_gfx_val(i915
);
8073 spin_unlock_irq(&mchdev_lock
);
8076 drm_dev_put(&i915
->drm
);
8077 return chipset_val
+ graphics_val
;
8079 EXPORT_SYMBOL_GPL(i915_read_mch_val
);
8082 * i915_gpu_raise - raise GPU frequency limit
8084 * Raise the limit; IPS indicates we have thermal headroom.
8086 bool i915_gpu_raise(void)
8088 struct drm_i915_private
*i915
;
8090 i915
= mchdev_get();
8094 spin_lock_irq(&mchdev_lock
);
8095 if (i915
->ips
.max_delay
> i915
->ips
.fmax
)
8096 i915
->ips
.max_delay
--;
8097 spin_unlock_irq(&mchdev_lock
);
8099 drm_dev_put(&i915
->drm
);
8102 EXPORT_SYMBOL_GPL(i915_gpu_raise
);
8105 * i915_gpu_lower - lower GPU frequency limit
8107 * IPS indicates we're close to a thermal limit, so throttle back the GPU
8108 * frequency maximum.
8110 bool i915_gpu_lower(void)
8112 struct drm_i915_private
*i915
;
8114 i915
= mchdev_get();
8118 spin_lock_irq(&mchdev_lock
);
8119 if (i915
->ips
.max_delay
< i915
->ips
.min_delay
)
8120 i915
->ips
.max_delay
++;
8121 spin_unlock_irq(&mchdev_lock
);
8123 drm_dev_put(&i915
->drm
);
8126 EXPORT_SYMBOL_GPL(i915_gpu_lower
);
8129 * i915_gpu_busy - indicate GPU business to IPS
8131 * Tell the IPS driver whether or not the GPU is busy.
8133 bool i915_gpu_busy(void)
8135 struct drm_i915_private
*i915
;
8138 i915
= mchdev_get();
8142 ret
= i915
->gt
.awake
;
8144 drm_dev_put(&i915
->drm
);
8147 EXPORT_SYMBOL_GPL(i915_gpu_busy
);
8150 * i915_gpu_turbo_disable - disable graphics turbo
8152 * Disable graphics turbo by resetting the max frequency and setting the
8153 * current frequency to the default.
8155 bool i915_gpu_turbo_disable(void)
8157 struct drm_i915_private
*i915
;
8160 i915
= mchdev_get();
8164 spin_lock_irq(&mchdev_lock
);
8165 i915
->ips
.max_delay
= i915
->ips
.fstart
;
8166 ret
= ironlake_set_drps(i915
, i915
->ips
.fstart
);
8167 spin_unlock_irq(&mchdev_lock
);
8169 drm_dev_put(&i915
->drm
);
8172 EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable
);
8175 * Tells the intel_ips driver that the i915 driver is now loaded, if
8176 * IPS got loaded first.
8178 * This awkward dance is so that neither module has to depend on the
8179 * other in order for IPS to do the appropriate communication of
8180 * GPU turbo limits to i915.
8183 ips_ping_for_i915_load(void)
8187 link
= symbol_get(ips_link_to_i915_driver
);
8190 symbol_put(ips_link_to_i915_driver
);
8194 void intel_gpu_ips_init(struct drm_i915_private
*dev_priv
)
8196 /* We only register the i915 ips part with intel-ips once everything is
8197 * set up, to avoid intel-ips sneaking in and reading bogus values. */
8198 rcu_assign_pointer(i915_mch_dev
, dev_priv
);
8200 ips_ping_for_i915_load();
8203 void intel_gpu_ips_teardown(void)
8205 rcu_assign_pointer(i915_mch_dev
, NULL
);
8208 static void intel_init_emon(struct drm_i915_private
*dev_priv
)
8214 /* Disable to program */
8218 /* Program energy weights for various events */
8219 I915_WRITE(SDEW
, 0x15040d00);
8220 I915_WRITE(CSIEW0
, 0x007f0000);
8221 I915_WRITE(CSIEW1
, 0x1e220004);
8222 I915_WRITE(CSIEW2
, 0x04000004);
8224 for (i
= 0; i
< 5; i
++)
8225 I915_WRITE(PEW(i
), 0);
8226 for (i
= 0; i
< 3; i
++)
8227 I915_WRITE(DEW(i
), 0);
8229 /* Program P-state weights to account for frequency power adjustment */
8230 for (i
= 0; i
< 16; i
++) {
8231 u32 pxvidfreq
= I915_READ(PXVFREQ(i
));
8232 unsigned long freq
= intel_pxfreq(pxvidfreq
);
8233 unsigned long vid
= (pxvidfreq
& PXVFREQ_PX_MASK
) >>
8238 val
*= (freq
/ 1000);
8240 val
/= (127*127*900);
8242 DRM_ERROR("bad pxval: %ld\n", val
);
8245 /* Render standby states get 0 weight */
8249 for (i
= 0; i
< 4; i
++) {
8250 u32 val
= (pxw
[i
*4] << 24) | (pxw
[(i
*4)+1] << 16) |
8251 (pxw
[(i
*4)+2] << 8) | (pxw
[(i
*4)+3]);
8252 I915_WRITE(PXW(i
), val
);
8255 /* Adjust magic regs to magic values (more experimental results) */
8256 I915_WRITE(OGW0
, 0);
8257 I915_WRITE(OGW1
, 0);
8258 I915_WRITE(EG0
, 0x00007f00);
8259 I915_WRITE(EG1
, 0x0000000e);
8260 I915_WRITE(EG2
, 0x000e0000);
8261 I915_WRITE(EG3
, 0x68000300);
8262 I915_WRITE(EG4
, 0x42000000);
8263 I915_WRITE(EG5
, 0x00140031);
8267 for (i
= 0; i
< 8; i
++)
8268 I915_WRITE(PXWL(i
), 0);
8270 /* Enable PMON + select events */
8271 I915_WRITE(ECR
, 0x80000019);
8273 lcfuse
= I915_READ(LCFUSE02
);
8275 dev_priv
->ips
.corr
= (lcfuse
& LCFUSE_HIV_MASK
);
8278 void intel_init_gt_powersave(struct drm_i915_private
*dev_priv
)
8280 struct intel_rps
*rps
= &dev_priv
->gt_pm
.rps
;
8283 * RPM depends on RC6 to save restore the GT HW context, so make RC6 a
8286 if (!sanitize_rc6(dev_priv
)) {
8287 DRM_INFO("RC6 disabled, disabling runtime PM support\n");
8288 pm_runtime_get(&dev_priv
->drm
.pdev
->dev
);
8291 mutex_lock(&dev_priv
->pcu_lock
);
8293 /* Initialize RPS limits (for userspace) */
8294 if (IS_CHERRYVIEW(dev_priv
))
8295 cherryview_init_gt_powersave(dev_priv
);
8296 else if (IS_VALLEYVIEW(dev_priv
))
8297 valleyview_init_gt_powersave(dev_priv
);
8298 else if (INTEL_GEN(dev_priv
) >= 6)
8299 gen6_init_rps_frequencies(dev_priv
);
8301 /* Derive initial user preferences/limits from the hardware limits */
8302 rps
->idle_freq
= rps
->min_freq
;
8303 rps
->cur_freq
= rps
->idle_freq
;
8305 rps
->max_freq_softlimit
= rps
->max_freq
;
8306 rps
->min_freq_softlimit
= rps
->min_freq
;
8308 if (IS_HASWELL(dev_priv
) || IS_BROADWELL(dev_priv
))
8309 rps
->min_freq_softlimit
=
8311 rps
->efficient_freq
,
8312 intel_freq_opcode(dev_priv
, 450));
8314 /* After setting max-softlimit, find the overclock max freq */
8315 if (IS_GEN(dev_priv
, 6) ||
8316 IS_IVYBRIDGE(dev_priv
) || IS_HASWELL(dev_priv
)) {
8319 sandybridge_pcode_read(dev_priv
, GEN6_READ_OC_PARAMS
, ¶ms
);
8320 if (params
& BIT(31)) { /* OC supported */
8321 DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n",
8322 (rps
->max_freq
& 0xff) * 50,
8323 (params
& 0xff) * 50);
8324 rps
->max_freq
= params
& 0xff;
8328 /* Finally allow us to boost to max by default */
8329 rps
->boost_freq
= rps
->max_freq
;
8331 mutex_unlock(&dev_priv
->pcu_lock
);
8334 void intel_cleanup_gt_powersave(struct drm_i915_private
*dev_priv
)
8336 if (IS_VALLEYVIEW(dev_priv
))
8337 valleyview_cleanup_gt_powersave(dev_priv
);
8339 if (!HAS_RC6(dev_priv
))
8340 pm_runtime_put(&dev_priv
->drm
.pdev
->dev
);
8344 * intel_suspend_gt_powersave - suspend PM work and helper threads
8345 * @dev_priv: i915 device
8347 * We don't want to disable RC6 or other features here, we just want
8348 * to make sure any work we've queued has finished and won't bother
8349 * us while we're suspended.
8351 void intel_suspend_gt_powersave(struct drm_i915_private
*dev_priv
)
8353 if (INTEL_GEN(dev_priv
) < 6)
8356 /* gen6_rps_idle() will be called later to disable interrupts */
8359 void intel_sanitize_gt_powersave(struct drm_i915_private
*dev_priv
)
8361 dev_priv
->gt_pm
.rps
.enabled
= true; /* force RPS disabling */
8362 dev_priv
->gt_pm
.rc6
.enabled
= true; /* force RC6 disabling */
8363 intel_disable_gt_powersave(dev_priv
);
8365 if (INTEL_GEN(dev_priv
) >= 11)
8366 gen11_reset_rps_interrupts(dev_priv
);
8367 else if (INTEL_GEN(dev_priv
) >= 6)
8368 gen6_reset_rps_interrupts(dev_priv
);
8371 static inline void intel_disable_llc_pstate(struct drm_i915_private
*i915
)
8373 lockdep_assert_held(&i915
->pcu_lock
);
8375 if (!i915
->gt_pm
.llc_pstate
.enabled
)
8378 /* Currently there is no HW configuration to be done to disable. */
8380 i915
->gt_pm
.llc_pstate
.enabled
= false;
8383 static void intel_disable_rc6(struct drm_i915_private
*dev_priv
)
8385 lockdep_assert_held(&dev_priv
->pcu_lock
);
8387 if (!dev_priv
->gt_pm
.rc6
.enabled
)
8390 if (INTEL_GEN(dev_priv
) >= 9)
8391 gen9_disable_rc6(dev_priv
);
8392 else if (IS_CHERRYVIEW(dev_priv
))
8393 cherryview_disable_rc6(dev_priv
);
8394 else if (IS_VALLEYVIEW(dev_priv
))
8395 valleyview_disable_rc6(dev_priv
);
8396 else if (INTEL_GEN(dev_priv
) >= 6)
8397 gen6_disable_rc6(dev_priv
);
8399 dev_priv
->gt_pm
.rc6
.enabled
= false;
8402 static void intel_disable_rps(struct drm_i915_private
*dev_priv
)
8404 lockdep_assert_held(&dev_priv
->pcu_lock
);
8406 if (!dev_priv
->gt_pm
.rps
.enabled
)
8409 if (INTEL_GEN(dev_priv
) >= 9)
8410 gen9_disable_rps(dev_priv
);
8411 else if (IS_CHERRYVIEW(dev_priv
))
8412 cherryview_disable_rps(dev_priv
);
8413 else if (IS_VALLEYVIEW(dev_priv
))
8414 valleyview_disable_rps(dev_priv
);
8415 else if (INTEL_GEN(dev_priv
) >= 6)
8416 gen6_disable_rps(dev_priv
);
8417 else if (IS_IRONLAKE_M(dev_priv
))
8418 ironlake_disable_drps(dev_priv
);
8420 dev_priv
->gt_pm
.rps
.enabled
= false;
8423 void intel_disable_gt_powersave(struct drm_i915_private
*dev_priv
)
8425 mutex_lock(&dev_priv
->pcu_lock
);
8427 intel_disable_rc6(dev_priv
);
8428 intel_disable_rps(dev_priv
);
8429 if (HAS_LLC(dev_priv
))
8430 intel_disable_llc_pstate(dev_priv
);
8432 mutex_unlock(&dev_priv
->pcu_lock
);
8435 static inline void intel_enable_llc_pstate(struct drm_i915_private
*i915
)
8437 lockdep_assert_held(&i915
->pcu_lock
);
8439 if (i915
->gt_pm
.llc_pstate
.enabled
)
8442 gen6_update_ring_freq(i915
);
8444 i915
->gt_pm
.llc_pstate
.enabled
= true;
8447 static void intel_enable_rc6(struct drm_i915_private
*dev_priv
)
8449 lockdep_assert_held(&dev_priv
->pcu_lock
);
8451 if (dev_priv
->gt_pm
.rc6
.enabled
)
8454 if (IS_CHERRYVIEW(dev_priv
))
8455 cherryview_enable_rc6(dev_priv
);
8456 else if (IS_VALLEYVIEW(dev_priv
))
8457 valleyview_enable_rc6(dev_priv
);
8458 else if (INTEL_GEN(dev_priv
) >= 9)
8459 gen9_enable_rc6(dev_priv
);
8460 else if (IS_BROADWELL(dev_priv
))
8461 gen8_enable_rc6(dev_priv
);
8462 else if (INTEL_GEN(dev_priv
) >= 6)
8463 gen6_enable_rc6(dev_priv
);
8465 dev_priv
->gt_pm
.rc6
.enabled
= true;
8468 static void intel_enable_rps(struct drm_i915_private
*dev_priv
)
8470 struct intel_rps
*rps
= &dev_priv
->gt_pm
.rps
;
8472 lockdep_assert_held(&dev_priv
->pcu_lock
);
8477 if (IS_CHERRYVIEW(dev_priv
)) {
8478 cherryview_enable_rps(dev_priv
);
8479 } else if (IS_VALLEYVIEW(dev_priv
)) {
8480 valleyview_enable_rps(dev_priv
);
8481 } else if (INTEL_GEN(dev_priv
) >= 9) {
8482 gen9_enable_rps(dev_priv
);
8483 } else if (IS_BROADWELL(dev_priv
)) {
8484 gen8_enable_rps(dev_priv
);
8485 } else if (INTEL_GEN(dev_priv
) >= 6) {
8486 gen6_enable_rps(dev_priv
);
8487 } else if (IS_IRONLAKE_M(dev_priv
)) {
8488 ironlake_enable_drps(dev_priv
);
8489 intel_init_emon(dev_priv
);
8492 WARN_ON(rps
->max_freq
< rps
->min_freq
);
8493 WARN_ON(rps
->idle_freq
> rps
->max_freq
);
8495 WARN_ON(rps
->efficient_freq
< rps
->min_freq
);
8496 WARN_ON(rps
->efficient_freq
> rps
->max_freq
);
8498 rps
->enabled
= true;
8501 void intel_enable_gt_powersave(struct drm_i915_private
*dev_priv
)
8503 /* Powersaving is controlled by the host when inside a VM */
8504 if (intel_vgpu_active(dev_priv
))
8507 mutex_lock(&dev_priv
->pcu_lock
);
8509 if (HAS_RC6(dev_priv
))
8510 intel_enable_rc6(dev_priv
);
8511 intel_enable_rps(dev_priv
);
8512 if (HAS_LLC(dev_priv
))
8513 intel_enable_llc_pstate(dev_priv
);
8515 mutex_unlock(&dev_priv
->pcu_lock
);
8518 static void ibx_init_clock_gating(struct drm_i915_private
*dev_priv
)
8521 * On Ibex Peak and Cougar Point, we need to disable clock
8522 * gating for the panel power sequencer or it will fail to
8523 * start up when no ports are active.
8525 I915_WRITE(SOUTH_DSPCLK_GATE_D
, PCH_DPLSUNIT_CLOCK_GATE_DISABLE
);
8528 static void g4x_disable_trickle_feed(struct drm_i915_private
*dev_priv
)
8532 for_each_pipe(dev_priv
, pipe
) {
8533 I915_WRITE(DSPCNTR(pipe
),
8534 I915_READ(DSPCNTR(pipe
)) |
8535 DISPPLANE_TRICKLE_FEED_DISABLE
);
8537 I915_WRITE(DSPSURF(pipe
), I915_READ(DSPSURF(pipe
)));
8538 POSTING_READ(DSPSURF(pipe
));
8542 static void ilk_init_clock_gating(struct drm_i915_private
*dev_priv
)
8544 u32 dspclk_gate
= ILK_VRHUNIT_CLOCK_GATE_DISABLE
;
8548 * WaFbcDisableDpfcClockGating:ilk
8550 dspclk_gate
|= ILK_DPFCRUNIT_CLOCK_GATE_DISABLE
|
8551 ILK_DPFCUNIT_CLOCK_GATE_DISABLE
|
8552 ILK_DPFDUNIT_CLOCK_GATE_ENABLE
;
8554 I915_WRITE(PCH_3DCGDIS0
,
8555 MARIUNIT_CLOCK_GATE_DISABLE
|
8556 SVSMUNIT_CLOCK_GATE_DISABLE
);
8557 I915_WRITE(PCH_3DCGDIS1
,
8558 VFMUNIT_CLOCK_GATE_DISABLE
);
8561 * According to the spec the following bits should be set in
8562 * order to enable memory self-refresh
8563 * The bit 22/21 of 0x42004
8564 * The bit 5 of 0x42020
8565 * The bit 15 of 0x45000
8567 I915_WRITE(ILK_DISPLAY_CHICKEN2
,
8568 (I915_READ(ILK_DISPLAY_CHICKEN2
) |
8569 ILK_DPARB_GATE
| ILK_VSDPFD_FULL
));
8570 dspclk_gate
|= ILK_DPARBUNIT_CLOCK_GATE_ENABLE
;
8571 I915_WRITE(DISP_ARB_CTL
,
8572 (I915_READ(DISP_ARB_CTL
) |
8576 * Based on the document from hardware guys the following bits
8577 * should be set unconditionally in order to enable FBC.
8578 * The bit 22 of 0x42000
8579 * The bit 22 of 0x42004
8580 * The bit 7,8,9 of 0x42020.
8582 if (IS_IRONLAKE_M(dev_priv
)) {
8583 /* WaFbcAsynchFlipDisableFbcQueue:ilk */
8584 I915_WRITE(ILK_DISPLAY_CHICKEN1
,
8585 I915_READ(ILK_DISPLAY_CHICKEN1
) |
8587 I915_WRITE(ILK_DISPLAY_CHICKEN2
,
8588 I915_READ(ILK_DISPLAY_CHICKEN2
) |
8592 I915_WRITE(ILK_DSPCLK_GATE_D
, dspclk_gate
);
8594 I915_WRITE(ILK_DISPLAY_CHICKEN2
,
8595 I915_READ(ILK_DISPLAY_CHICKEN2
) |
8596 ILK_ELPIN_409_SELECT
);
8597 I915_WRITE(_3D_CHICKEN2
,
8598 _3D_CHICKEN2_WM_READ_PIPELINED
<< 16 |
8599 _3D_CHICKEN2_WM_READ_PIPELINED
);
8601 /* WaDisableRenderCachePipelinedFlush:ilk */
8602 I915_WRITE(CACHE_MODE_0
,
8603 _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE
));
8605 /* WaDisable_RenderCache_OperationalFlush:ilk */
8606 I915_WRITE(CACHE_MODE_0
, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE
));
8608 g4x_disable_trickle_feed(dev_priv
);
8610 ibx_init_clock_gating(dev_priv
);
8613 static void cpt_init_clock_gating(struct drm_i915_private
*dev_priv
)
8619 * On Ibex Peak and Cougar Point, we need to disable clock
8620 * gating for the panel power sequencer or it will fail to
8621 * start up when no ports are active.
8623 I915_WRITE(SOUTH_DSPCLK_GATE_D
, PCH_DPLSUNIT_CLOCK_GATE_DISABLE
|
8624 PCH_DPLUNIT_CLOCK_GATE_DISABLE
|
8625 PCH_CPUNIT_CLOCK_GATE_DISABLE
);
8626 I915_WRITE(SOUTH_CHICKEN2
, I915_READ(SOUTH_CHICKEN2
) |
8627 DPLS_EDP_PPS_FIX_DIS
);
8628 /* The below fixes the weird display corruption, a few pixels shifted
8629 * downward, on (only) LVDS of some HP laptops with IVY.
8631 for_each_pipe(dev_priv
, pipe
) {
8632 val
= I915_READ(TRANS_CHICKEN2(pipe
));
8633 val
|= TRANS_CHICKEN2_TIMING_OVERRIDE
;
8634 val
&= ~TRANS_CHICKEN2_FDI_POLARITY_REVERSED
;
8635 if (dev_priv
->vbt
.fdi_rx_polarity_inverted
)
8636 val
|= TRANS_CHICKEN2_FDI_POLARITY_REVERSED
;
8637 val
&= ~TRANS_CHICKEN2_FRAME_START_DELAY_MASK
;
8638 val
&= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_COUNTER
;
8639 val
&= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_MODESWITCH
;
8640 I915_WRITE(TRANS_CHICKEN2(pipe
), val
);
8642 /* WADP0ClockGatingDisable */
8643 for_each_pipe(dev_priv
, pipe
) {
8644 I915_WRITE(TRANS_CHICKEN1(pipe
),
8645 TRANS_CHICKEN1_DP0UNIT_GC_DISABLE
);
8649 static void gen6_check_mch_setup(struct drm_i915_private
*dev_priv
)
8653 tmp
= I915_READ(MCH_SSKPD
);
8654 if ((tmp
& MCH_SSKPD_WM0_MASK
) != MCH_SSKPD_WM0_VAL
)
8655 DRM_DEBUG_KMS("Wrong MCH_SSKPD value: 0x%08x This can cause underruns.\n",
8659 static void gen6_init_clock_gating(struct drm_i915_private
*dev_priv
)
8661 u32 dspclk_gate
= ILK_VRHUNIT_CLOCK_GATE_DISABLE
;
8663 I915_WRITE(ILK_DSPCLK_GATE_D
, dspclk_gate
);
8665 I915_WRITE(ILK_DISPLAY_CHICKEN2
,
8666 I915_READ(ILK_DISPLAY_CHICKEN2
) |
8667 ILK_ELPIN_409_SELECT
);
8669 /* WaDisableHiZPlanesWhenMSAAEnabled:snb */
8670 I915_WRITE(_3D_CHICKEN
,
8671 _MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB
));
8673 /* WaDisable_RenderCache_OperationalFlush:snb */
8674 I915_WRITE(CACHE_MODE_0
, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE
));
8677 * BSpec recoomends 8x4 when MSAA is used,
8678 * however in practice 16x4 seems fastest.
8680 * Note that PS/WM thread counts depend on the WIZ hashing
8681 * disable bit, which we don't touch here, but it's good
8682 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
8684 I915_WRITE(GEN6_GT_MODE
,
8685 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK
, GEN6_WIZ_HASHING_16x4
));
8687 I915_WRITE(CACHE_MODE_0
,
8688 _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB
));
8690 I915_WRITE(GEN6_UCGCTL1
,
8691 I915_READ(GEN6_UCGCTL1
) |
8692 GEN6_BLBUNIT_CLOCK_GATE_DISABLE
|
8693 GEN6_CSUNIT_CLOCK_GATE_DISABLE
);
8695 /* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock
8696 * gating disable must be set. Failure to set it results in
8697 * flickering pixels due to Z write ordering failures after
8698 * some amount of runtime in the Mesa "fire" demo, and Unigine
8699 * Sanctuary and Tropics, and apparently anything else with
8700 * alpha test or pixel discard.
8702 * According to the spec, bit 11 (RCCUNIT) must also be set,
8703 * but we didn't debug actual testcases to find it out.
8705 * WaDisableRCCUnitClockGating:snb
8706 * WaDisableRCPBUnitClockGating:snb
8708 I915_WRITE(GEN6_UCGCTL2
,
8709 GEN6_RCPBUNIT_CLOCK_GATE_DISABLE
|
8710 GEN6_RCCUNIT_CLOCK_GATE_DISABLE
);
8712 /* WaStripsFansDisableFastClipPerformanceFix:snb */
8713 I915_WRITE(_3D_CHICKEN3
,
8714 _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL
));
8718 * "This bit must be set if 3DSTATE_CLIP clip mode is set to normal and
8719 * 3DSTATE_SF number of SF output attributes is more than 16."
8721 I915_WRITE(_3D_CHICKEN3
,
8722 _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH
));
8725 * According to the spec the following bits should be
8726 * set in order to enable memory self-refresh and fbc:
8727 * The bit21 and bit22 of 0x42000
8728 * The bit21 and bit22 of 0x42004
8729 * The bit5 and bit7 of 0x42020
8730 * The bit14 of 0x70180
8731 * The bit14 of 0x71180
8733 * WaFbcAsynchFlipDisableFbcQueue:snb
8735 I915_WRITE(ILK_DISPLAY_CHICKEN1
,
8736 I915_READ(ILK_DISPLAY_CHICKEN1
) |
8737 ILK_FBCQ_DIS
| ILK_PABSTRETCH_DIS
);
8738 I915_WRITE(ILK_DISPLAY_CHICKEN2
,
8739 I915_READ(ILK_DISPLAY_CHICKEN2
) |
8740 ILK_DPARB_GATE
| ILK_VSDPFD_FULL
);
8741 I915_WRITE(ILK_DSPCLK_GATE_D
,
8742 I915_READ(ILK_DSPCLK_GATE_D
) |
8743 ILK_DPARBUNIT_CLOCK_GATE_ENABLE
|
8744 ILK_DPFDUNIT_CLOCK_GATE_ENABLE
);
8746 g4x_disable_trickle_feed(dev_priv
);
8748 cpt_init_clock_gating(dev_priv
);
8750 gen6_check_mch_setup(dev_priv
);
8753 static void gen7_setup_fixed_func_scheduler(struct drm_i915_private
*dev_priv
)
8755 u32 reg
= I915_READ(GEN7_FF_THREAD_MODE
);
8758 * WaVSThreadDispatchOverride:ivb,vlv
8760 * This actually overrides the dispatch
8761 * mode for all thread types.
8763 reg
&= ~GEN7_FF_SCHED_MASK
;
8764 reg
|= GEN7_FF_TS_SCHED_HW
;
8765 reg
|= GEN7_FF_VS_SCHED_HW
;
8766 reg
|= GEN7_FF_DS_SCHED_HW
;
8768 I915_WRITE(GEN7_FF_THREAD_MODE
, reg
);
8771 static void lpt_init_clock_gating(struct drm_i915_private
*dev_priv
)
8774 * TODO: this bit should only be enabled when really needed, then
8775 * disabled when not needed anymore in order to save power.
8777 if (HAS_PCH_LPT_LP(dev_priv
))
8778 I915_WRITE(SOUTH_DSPCLK_GATE_D
,
8779 I915_READ(SOUTH_DSPCLK_GATE_D
) |
8780 PCH_LP_PARTITION_LEVEL_DISABLE
);
8782 /* WADPOClockGatingDisable:hsw */
8783 I915_WRITE(TRANS_CHICKEN1(PIPE_A
),
8784 I915_READ(TRANS_CHICKEN1(PIPE_A
)) |
8785 TRANS_CHICKEN1_DP0UNIT_GC_DISABLE
);
8788 static void lpt_suspend_hw(struct drm_i915_private
*dev_priv
)
8790 if (HAS_PCH_LPT_LP(dev_priv
)) {
8791 u32 val
= I915_READ(SOUTH_DSPCLK_GATE_D
);
8793 val
&= ~PCH_LP_PARTITION_LEVEL_DISABLE
;
8794 I915_WRITE(SOUTH_DSPCLK_GATE_D
, val
);
8798 static void gen8_set_l3sqc_credits(struct drm_i915_private
*dev_priv
,
8799 int general_prio_credits
,
8800 int high_prio_credits
)
8805 /* WaTempDisableDOPClkGating:bdw */
8806 misccpctl
= I915_READ(GEN7_MISCCPCTL
);
8807 I915_WRITE(GEN7_MISCCPCTL
, misccpctl
& ~GEN7_DOP_CLOCK_GATE_ENABLE
);
8809 val
= I915_READ(GEN8_L3SQCREG1
);
8810 val
&= ~L3_PRIO_CREDITS_MASK
;
8811 val
|= L3_GENERAL_PRIO_CREDITS(general_prio_credits
);
8812 val
|= L3_HIGH_PRIO_CREDITS(high_prio_credits
);
8813 I915_WRITE(GEN8_L3SQCREG1
, val
);
8816 * Wait at least 100 clocks before re-enabling clock gating.
8817 * See the definition of L3SQCREG1 in BSpec.
8819 POSTING_READ(GEN8_L3SQCREG1
);
8821 I915_WRITE(GEN7_MISCCPCTL
, misccpctl
);
8824 static void icl_init_clock_gating(struct drm_i915_private
*dev_priv
)
8826 /* This is not an Wa. Enable to reduce Sampler power */
8827 I915_WRITE(GEN10_DFR_RATIO_EN_AND_CHICKEN
,
8828 I915_READ(GEN10_DFR_RATIO_EN_AND_CHICKEN
) & ~DFR_DISABLE
);
8830 /* WaEnable32PlaneMode:icl */
8831 I915_WRITE(GEN9_CSFE_CHICKEN1_RCS
,
8832 _MASKED_BIT_ENABLE(GEN11_ENABLE_32_PLANE_MODE
));
8835 static void cnp_init_clock_gating(struct drm_i915_private
*dev_priv
)
8837 if (!HAS_PCH_CNP(dev_priv
))
8840 /* Display WA #1181 WaSouthDisplayDisablePWMCGEGating: cnp */
8841 I915_WRITE(SOUTH_DSPCLK_GATE_D
, I915_READ(SOUTH_DSPCLK_GATE_D
) |
8842 CNP_PWM_CGE_GATING_DISABLE
);
8845 static void cnl_init_clock_gating(struct drm_i915_private
*dev_priv
)
8848 cnp_init_clock_gating(dev_priv
);
8850 /* This is not an Wa. Enable for better image quality */
8851 I915_WRITE(_3D_CHICKEN3
,
8852 _MASKED_BIT_ENABLE(_3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE
));
8854 /* WaEnableChickenDCPR:cnl */
8855 I915_WRITE(GEN8_CHICKEN_DCPR_1
,
8856 I915_READ(GEN8_CHICKEN_DCPR_1
) | MASK_WAKEMEM
);
8858 /* WaFbcWakeMemOn:cnl */
8859 I915_WRITE(DISP_ARB_CTL
, I915_READ(DISP_ARB_CTL
) |
8860 DISP_FBC_MEMORY_WAKE
);
8862 val
= I915_READ(SLICE_UNIT_LEVEL_CLKGATE
);
8863 /* ReadHitWriteOnlyDisable:cnl */
8864 val
|= RCCUNIT_CLKGATE_DIS
;
8865 /* WaSarbUnitClockGatingDisable:cnl (pre-prod) */
8866 if (IS_CNL_REVID(dev_priv
, CNL_REVID_A0
, CNL_REVID_B0
))
8867 val
|= SARBUNIT_CLKGATE_DIS
;
8868 I915_WRITE(SLICE_UNIT_LEVEL_CLKGATE
, val
);
8870 /* Wa_2201832410:cnl */
8871 val
= I915_READ(SUBSLICE_UNIT_LEVEL_CLKGATE
);
8872 val
|= GWUNIT_CLKGATE_DIS
;
8873 I915_WRITE(SUBSLICE_UNIT_LEVEL_CLKGATE
, val
);
8875 /* WaDisableVFclkgate:cnl */
8876 /* WaVFUnitClockGatingDisable:cnl */
8877 val
= I915_READ(UNSLICE_UNIT_LEVEL_CLKGATE
);
8878 val
|= VFUNIT_CLKGATE_DIS
;
8879 I915_WRITE(UNSLICE_UNIT_LEVEL_CLKGATE
, val
);
8882 static void cfl_init_clock_gating(struct drm_i915_private
*dev_priv
)
8884 cnp_init_clock_gating(dev_priv
);
8885 gen9_init_clock_gating(dev_priv
);
8887 /* WaFbcNukeOnHostModify:cfl */
8888 I915_WRITE(ILK_DPFC_CHICKEN
, I915_READ(ILK_DPFC_CHICKEN
) |
8889 ILK_DPFC_NUKE_ON_ANY_MODIFICATION
);
8892 static void kbl_init_clock_gating(struct drm_i915_private
*dev_priv
)
8894 gen9_init_clock_gating(dev_priv
);
8896 /* WaDisableSDEUnitClockGating:kbl */
8897 if (IS_KBL_REVID(dev_priv
, 0, KBL_REVID_B0
))
8898 I915_WRITE(GEN8_UCGCTL6
, I915_READ(GEN8_UCGCTL6
) |
8899 GEN8_SDEUNIT_CLOCK_GATE_DISABLE
);
8901 /* WaDisableGamClockGating:kbl */
8902 if (IS_KBL_REVID(dev_priv
, 0, KBL_REVID_B0
))
8903 I915_WRITE(GEN6_UCGCTL1
, I915_READ(GEN6_UCGCTL1
) |
8904 GEN6_GAMUNIT_CLOCK_GATE_DISABLE
);
8906 /* WaFbcNukeOnHostModify:kbl */
8907 I915_WRITE(ILK_DPFC_CHICKEN
, I915_READ(ILK_DPFC_CHICKEN
) |
8908 ILK_DPFC_NUKE_ON_ANY_MODIFICATION
);
8911 static void skl_init_clock_gating(struct drm_i915_private
*dev_priv
)
8913 gen9_init_clock_gating(dev_priv
);
8915 /* WAC6entrylatency:skl */
8916 I915_WRITE(FBC_LLC_READ_CTRL
, I915_READ(FBC_LLC_READ_CTRL
) |
8917 FBC_LLC_FULLY_OPEN
);
8919 /* WaFbcNukeOnHostModify:skl */
8920 I915_WRITE(ILK_DPFC_CHICKEN
, I915_READ(ILK_DPFC_CHICKEN
) |
8921 ILK_DPFC_NUKE_ON_ANY_MODIFICATION
);
8924 static void bdw_init_clock_gating(struct drm_i915_private
*dev_priv
)
8926 /* The GTT cache must be disabled if the system is using 2M pages. */
8927 bool can_use_gtt_cache
= !HAS_PAGE_SIZES(dev_priv
,
8928 I915_GTT_PAGE_SIZE_2M
);
8931 /* WaSwitchSolVfFArbitrationPriority:bdw */
8932 I915_WRITE(GAM_ECOCHK
, I915_READ(GAM_ECOCHK
) | HSW_ECOCHK_ARB_PRIO_SOL
);
8934 /* WaPsrDPAMaskVBlankInSRD:bdw */
8935 I915_WRITE(CHICKEN_PAR1_1
,
8936 I915_READ(CHICKEN_PAR1_1
) | DPA_MASK_VBLANK_SRD
);
8938 /* WaPsrDPRSUnmaskVBlankInSRD:bdw */
8939 for_each_pipe(dev_priv
, pipe
) {
8940 I915_WRITE(CHICKEN_PIPESL_1(pipe
),
8941 I915_READ(CHICKEN_PIPESL_1(pipe
)) |
8942 BDW_DPRS_MASK_VBLANK_SRD
);
8945 /* WaVSRefCountFullforceMissDisable:bdw */
8946 /* WaDSRefCountFullforceMissDisable:bdw */
8947 I915_WRITE(GEN7_FF_THREAD_MODE
,
8948 I915_READ(GEN7_FF_THREAD_MODE
) &
8949 ~(GEN8_FF_DS_REF_CNT_FFME
| GEN7_FF_VS_REF_CNT_FFME
));
8951 I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL
,
8952 _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE
));
8954 /* WaDisableSDEUnitClockGating:bdw */
8955 I915_WRITE(GEN8_UCGCTL6
, I915_READ(GEN8_UCGCTL6
) |
8956 GEN8_SDEUNIT_CLOCK_GATE_DISABLE
);
8958 /* WaProgramL3SqcReg1Default:bdw */
8959 gen8_set_l3sqc_credits(dev_priv
, 30, 2);
8961 /* WaGttCachingOffByDefault:bdw */
8962 I915_WRITE(HSW_GTT_CACHE_EN
, can_use_gtt_cache
? GTT_CACHE_EN_ALL
: 0);
8964 /* WaKVMNotificationOnConfigChange:bdw */
8965 I915_WRITE(CHICKEN_PAR2_1
, I915_READ(CHICKEN_PAR2_1
)
8966 | KVM_CONFIG_CHANGE_NOTIFICATION_SELECT
);
8968 lpt_init_clock_gating(dev_priv
);
8970 /* WaDisableDopClockGating:bdw
8972 * Also see the CHICKEN2 write in bdw_init_workarounds() to disable DOP
8975 I915_WRITE(GEN6_UCGCTL1
,
8976 I915_READ(GEN6_UCGCTL1
) | GEN6_EU_TCUNIT_CLOCK_GATE_DISABLE
);
8979 static void hsw_init_clock_gating(struct drm_i915_private
*dev_priv
)
8981 /* L3 caching of data atomics doesn't work -- disable it. */
8982 I915_WRITE(HSW_SCRATCH1
, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE
);
8983 I915_WRITE(HSW_ROW_CHICKEN3
,
8984 _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE
));
8986 /* This is required by WaCatErrorRejectionIssue:hsw */
8987 I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG
,
8988 I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG
) |
8989 GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB
);
8991 /* WaVSRefCountFullforceMissDisable:hsw */
8992 I915_WRITE(GEN7_FF_THREAD_MODE
,
8993 I915_READ(GEN7_FF_THREAD_MODE
) & ~GEN7_FF_VS_REF_CNT_FFME
);
8995 /* WaDisable_RenderCache_OperationalFlush:hsw */
8996 I915_WRITE(CACHE_MODE_0_GEN7
, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE
));
8998 /* enable HiZ Raw Stall Optimization */
8999 I915_WRITE(CACHE_MODE_0_GEN7
,
9000 _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE
));
9002 /* WaDisable4x2SubspanOptimization:hsw */
9003 I915_WRITE(CACHE_MODE_1
,
9004 _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE
));
9007 * BSpec recommends 8x4 when MSAA is used,
9008 * however in practice 16x4 seems fastest.
9010 * Note that PS/WM thread counts depend on the WIZ hashing
9011 * disable bit, which we don't touch here, but it's good
9012 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
9014 I915_WRITE(GEN7_GT_MODE
,
9015 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK
, GEN6_WIZ_HASHING_16x4
));
9017 /* WaSampleCChickenBitEnable:hsw */
9018 I915_WRITE(HALF_SLICE_CHICKEN3
,
9019 _MASKED_BIT_ENABLE(HSW_SAMPLE_C_PERFORMANCE
));
9021 /* WaSwitchSolVfFArbitrationPriority:hsw */
9022 I915_WRITE(GAM_ECOCHK
, I915_READ(GAM_ECOCHK
) | HSW_ECOCHK_ARB_PRIO_SOL
);
9024 lpt_init_clock_gating(dev_priv
);
9027 static void ivb_init_clock_gating(struct drm_i915_private
*dev_priv
)
9031 I915_WRITE(ILK_DSPCLK_GATE_D
, ILK_VRHUNIT_CLOCK_GATE_DISABLE
);
9033 /* WaDisableEarlyCull:ivb */
9034 I915_WRITE(_3D_CHICKEN3
,
9035 _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL
));
9037 /* WaDisableBackToBackFlipFix:ivb */
9038 I915_WRITE(IVB_CHICKEN3
,
9039 CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE
|
9040 CHICKEN3_DGMG_DONE_FIX_DISABLE
);
9042 /* WaDisablePSDDualDispatchEnable:ivb */
9043 if (IS_IVB_GT1(dev_priv
))
9044 I915_WRITE(GEN7_HALF_SLICE_CHICKEN1
,
9045 _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE
));
9047 /* WaDisable_RenderCache_OperationalFlush:ivb */
9048 I915_WRITE(CACHE_MODE_0_GEN7
, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE
));
9050 /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */
9051 I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1
,
9052 GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC
);
9054 /* WaApplyL3ControlAndL3ChickenMode:ivb */
9055 I915_WRITE(GEN7_L3CNTLREG1
,
9056 GEN7_WA_FOR_GEN7_L3_CONTROL
);
9057 I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER
,
9058 GEN7_WA_L3_CHICKEN_MODE
);
9059 if (IS_IVB_GT1(dev_priv
))
9060 I915_WRITE(GEN7_ROW_CHICKEN2
,
9061 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE
));
9063 /* must write both registers */
9064 I915_WRITE(GEN7_ROW_CHICKEN2
,
9065 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE
));
9066 I915_WRITE(GEN7_ROW_CHICKEN2_GT2
,
9067 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE
));
9070 /* WaForceL3Serialization:ivb */
9071 I915_WRITE(GEN7_L3SQCREG4
, I915_READ(GEN7_L3SQCREG4
) &
9072 ~L3SQ_URB_READ_CAM_MATCH_DISABLE
);
9075 * According to the spec, bit 13 (RCZUNIT) must be set on IVB.
9076 * This implements the WaDisableRCZUnitClockGating:ivb workaround.
9078 I915_WRITE(GEN6_UCGCTL2
,
9079 GEN6_RCZUNIT_CLOCK_GATE_DISABLE
);
9081 /* This is required by WaCatErrorRejectionIssue:ivb */
9082 I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG
,
9083 I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG
) |
9084 GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB
);
9086 g4x_disable_trickle_feed(dev_priv
);
9088 gen7_setup_fixed_func_scheduler(dev_priv
);
9090 if (0) { /* causes HiZ corruption on ivb:gt1 */
9091 /* enable HiZ Raw Stall Optimization */
9092 I915_WRITE(CACHE_MODE_0_GEN7
,
9093 _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE
));
9096 /* WaDisable4x2SubspanOptimization:ivb */
9097 I915_WRITE(CACHE_MODE_1
,
9098 _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE
));
9101 * BSpec recommends 8x4 when MSAA is used,
9102 * however in practice 16x4 seems fastest.
9104 * Note that PS/WM thread counts depend on the WIZ hashing
9105 * disable bit, which we don't touch here, but it's good
9106 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
9108 I915_WRITE(GEN7_GT_MODE
,
9109 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK
, GEN6_WIZ_HASHING_16x4
));
9111 snpcr
= I915_READ(GEN6_MBCUNIT_SNPCR
);
9112 snpcr
&= ~GEN6_MBC_SNPCR_MASK
;
9113 snpcr
|= GEN6_MBC_SNPCR_MED
;
9114 I915_WRITE(GEN6_MBCUNIT_SNPCR
, snpcr
);
9116 if (!HAS_PCH_NOP(dev_priv
))
9117 cpt_init_clock_gating(dev_priv
);
9119 gen6_check_mch_setup(dev_priv
);
9122 static void vlv_init_clock_gating(struct drm_i915_private
*dev_priv
)
9124 /* WaDisableEarlyCull:vlv */
9125 I915_WRITE(_3D_CHICKEN3
,
9126 _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL
));
9128 /* WaDisableBackToBackFlipFix:vlv */
9129 I915_WRITE(IVB_CHICKEN3
,
9130 CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE
|
9131 CHICKEN3_DGMG_DONE_FIX_DISABLE
);
9133 /* WaPsdDispatchEnable:vlv */
9134 /* WaDisablePSDDualDispatchEnable:vlv */
9135 I915_WRITE(GEN7_HALF_SLICE_CHICKEN1
,
9136 _MASKED_BIT_ENABLE(GEN7_MAX_PS_THREAD_DEP
|
9137 GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE
));
9139 /* WaDisable_RenderCache_OperationalFlush:vlv */
9140 I915_WRITE(CACHE_MODE_0_GEN7
, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE
));
9142 /* WaForceL3Serialization:vlv */
9143 I915_WRITE(GEN7_L3SQCREG4
, I915_READ(GEN7_L3SQCREG4
) &
9144 ~L3SQ_URB_READ_CAM_MATCH_DISABLE
);
9146 /* WaDisableDopClockGating:vlv */
9147 I915_WRITE(GEN7_ROW_CHICKEN2
,
9148 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE
));
9150 /* This is required by WaCatErrorRejectionIssue:vlv */
9151 I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG
,
9152 I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG
) |
9153 GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB
);
9155 gen7_setup_fixed_func_scheduler(dev_priv
);
9158 * According to the spec, bit 13 (RCZUNIT) must be set on IVB.
9159 * This implements the WaDisableRCZUnitClockGating:vlv workaround.
9161 I915_WRITE(GEN6_UCGCTL2
,
9162 GEN6_RCZUNIT_CLOCK_GATE_DISABLE
);
9164 /* WaDisableL3Bank2xClockGate:vlv
9165 * Disabling L3 clock gating- MMIO 940c[25] = 1
9166 * Set bit 25, to disable L3_BANK_2x_CLK_GATING */
9167 I915_WRITE(GEN7_UCGCTL4
,
9168 I915_READ(GEN7_UCGCTL4
) | GEN7_L3BANK2X_CLOCK_GATE_DISABLE
);
9171 * BSpec says this must be set, even though
9172 * WaDisable4x2SubspanOptimization isn't listed for VLV.
9174 I915_WRITE(CACHE_MODE_1
,
9175 _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE
));
9178 * BSpec recommends 8x4 when MSAA is used,
9179 * however in practice 16x4 seems fastest.
9181 * Note that PS/WM thread counts depend on the WIZ hashing
9182 * disable bit, which we don't touch here, but it's good
9183 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
9185 I915_WRITE(GEN7_GT_MODE
,
9186 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK
, GEN6_WIZ_HASHING_16x4
));
9189 * WaIncreaseL3CreditsForVLVB0:vlv
9190 * This is the hardware default actually.
9192 I915_WRITE(GEN7_L3SQCREG1
, VLV_B0_WA_L3SQCREG1_VALUE
);
9195 * WaDisableVLVClockGating_VBIIssue:vlv
9196 * Disable clock gating on th GCFG unit to prevent a delay
9197 * in the reporting of vblank events.
9199 I915_WRITE(VLV_GUNIT_CLOCK_GATE
, GCFG_DIS
);
9202 static void chv_init_clock_gating(struct drm_i915_private
*dev_priv
)
9204 /* WaVSRefCountFullforceMissDisable:chv */
9205 /* WaDSRefCountFullforceMissDisable:chv */
9206 I915_WRITE(GEN7_FF_THREAD_MODE
,
9207 I915_READ(GEN7_FF_THREAD_MODE
) &
9208 ~(GEN8_FF_DS_REF_CNT_FFME
| GEN7_FF_VS_REF_CNT_FFME
));
9210 /* WaDisableSemaphoreAndSyncFlipWait:chv */
9211 I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL
,
9212 _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE
));
9214 /* WaDisableCSUnitClockGating:chv */
9215 I915_WRITE(GEN6_UCGCTL1
, I915_READ(GEN6_UCGCTL1
) |
9216 GEN6_CSUNIT_CLOCK_GATE_DISABLE
);
9218 /* WaDisableSDEUnitClockGating:chv */
9219 I915_WRITE(GEN8_UCGCTL6
, I915_READ(GEN8_UCGCTL6
) |
9220 GEN8_SDEUNIT_CLOCK_GATE_DISABLE
);
9223 * WaProgramL3SqcReg1Default:chv
9224 * See gfxspecs/Related Documents/Performance Guide/
9225 * LSQC Setting Recommendations.
9227 gen8_set_l3sqc_credits(dev_priv
, 38, 2);
9230 * GTT cache may not work with big pages, so if those
9231 * are ever enabled GTT cache may need to be disabled.
9233 I915_WRITE(HSW_GTT_CACHE_EN
, GTT_CACHE_EN_ALL
);
9236 static void g4x_init_clock_gating(struct drm_i915_private
*dev_priv
)
9240 I915_WRITE(RENCLK_GATE_D1
, 0);
9241 I915_WRITE(RENCLK_GATE_D2
, VF_UNIT_CLOCK_GATE_DISABLE
|
9242 GS_UNIT_CLOCK_GATE_DISABLE
|
9243 CL_UNIT_CLOCK_GATE_DISABLE
);
9244 I915_WRITE(RAMCLK_GATE_D
, 0);
9245 dspclk_gate
= VRHUNIT_CLOCK_GATE_DISABLE
|
9246 OVRUNIT_CLOCK_GATE_DISABLE
|
9247 OVCUNIT_CLOCK_GATE_DISABLE
;
9248 if (IS_GM45(dev_priv
))
9249 dspclk_gate
|= DSSUNIT_CLOCK_GATE_DISABLE
;
9250 I915_WRITE(DSPCLK_GATE_D
, dspclk_gate
);
9252 /* WaDisableRenderCachePipelinedFlush */
9253 I915_WRITE(CACHE_MODE_0
,
9254 _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE
));
9256 /* WaDisable_RenderCache_OperationalFlush:g4x */
9257 I915_WRITE(CACHE_MODE_0
, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE
));
9259 g4x_disable_trickle_feed(dev_priv
);
9262 static void i965gm_init_clock_gating(struct drm_i915_private
*dev_priv
)
9264 I915_WRITE(RENCLK_GATE_D1
, I965_RCC_CLOCK_GATE_DISABLE
);
9265 I915_WRITE(RENCLK_GATE_D2
, 0);
9266 I915_WRITE(DSPCLK_GATE_D
, 0);
9267 I915_WRITE(RAMCLK_GATE_D
, 0);
9268 I915_WRITE16(DEUC
, 0);
9269 I915_WRITE(MI_ARB_STATE
,
9270 _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE
));
9272 /* WaDisable_RenderCache_OperationalFlush:gen4 */
9273 I915_WRITE(CACHE_MODE_0
, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE
));
9276 static void i965g_init_clock_gating(struct drm_i915_private
*dev_priv
)
9278 I915_WRITE(RENCLK_GATE_D1
, I965_RCZ_CLOCK_GATE_DISABLE
|
9279 I965_RCC_CLOCK_GATE_DISABLE
|
9280 I965_RCPB_CLOCK_GATE_DISABLE
|
9281 I965_ISC_CLOCK_GATE_DISABLE
|
9282 I965_FBC_CLOCK_GATE_DISABLE
);
9283 I915_WRITE(RENCLK_GATE_D2
, 0);
9284 I915_WRITE(MI_ARB_STATE
,
9285 _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE
));
9287 /* WaDisable_RenderCache_OperationalFlush:gen4 */
9288 I915_WRITE(CACHE_MODE_0
, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE
));
9291 static void gen3_init_clock_gating(struct drm_i915_private
*dev_priv
)
9293 u32 dstate
= I915_READ(D_STATE
);
9295 dstate
|= DSTATE_PLL_D3_OFF
| DSTATE_GFX_CLOCK_GATING
|
9296 DSTATE_DOT_CLOCK_GATING
;
9297 I915_WRITE(D_STATE
, dstate
);
9299 if (IS_PINEVIEW(dev_priv
))
9300 I915_WRITE(ECOSKPD
, _MASKED_BIT_ENABLE(ECO_GATING_CX_ONLY
));
9302 /* IIR "flip pending" means done if this bit is set */
9303 I915_WRITE(ECOSKPD
, _MASKED_BIT_DISABLE(ECO_FLIP_DONE
));
9305 /* interrupts should cause a wake up from C3 */
9306 I915_WRITE(INSTPM
, _MASKED_BIT_ENABLE(INSTPM_AGPBUSY_INT_EN
));
9308 /* On GEN3 we really need to make sure the ARB C3 LP bit is set */
9309 I915_WRITE(MI_ARB_STATE
, _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE
));
9311 I915_WRITE(MI_ARB_STATE
,
9312 _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE
));
9315 static void i85x_init_clock_gating(struct drm_i915_private
*dev_priv
)
9317 I915_WRITE(RENCLK_GATE_D1
, SV_CLOCK_GATE_DISABLE
);
9319 /* interrupts should cause a wake up from C3 */
9320 I915_WRITE(MI_STATE
, _MASKED_BIT_ENABLE(MI_AGPBUSY_INT_EN
) |
9321 _MASKED_BIT_DISABLE(MI_AGPBUSY_830_MODE
));
9323 I915_WRITE(MEM_MODE
,
9324 _MASKED_BIT_ENABLE(MEM_DISPLAY_TRICKLE_FEED_DISABLE
));
9327 static void i830_init_clock_gating(struct drm_i915_private
*dev_priv
)
9329 I915_WRITE(MEM_MODE
,
9330 _MASKED_BIT_ENABLE(MEM_DISPLAY_A_TRICKLE_FEED_DISABLE
) |
9331 _MASKED_BIT_ENABLE(MEM_DISPLAY_B_TRICKLE_FEED_DISABLE
));
9334 void intel_init_clock_gating(struct drm_i915_private
*dev_priv
)
9336 dev_priv
->display
.init_clock_gating(dev_priv
);
9339 void intel_suspend_hw(struct drm_i915_private
*dev_priv
)
9341 if (HAS_PCH_LPT(dev_priv
))
9342 lpt_suspend_hw(dev_priv
);
9345 static void nop_init_clock_gating(struct drm_i915_private
*dev_priv
)
9347 DRM_DEBUG_KMS("No clock gating settings or workarounds applied.\n");
9351 * intel_init_clock_gating_hooks - setup the clock gating hooks
9352 * @dev_priv: device private
9354 * Setup the hooks that configure which clocks of a given platform can be
9355 * gated and also apply various GT and display specific workarounds for these
9356 * platforms. Note that some GT specific workarounds are applied separately
9357 * when GPU contexts or batchbuffers start their execution.
9359 void intel_init_clock_gating_hooks(struct drm_i915_private
*dev_priv
)
9361 if (IS_ICELAKE(dev_priv
))
9362 dev_priv
->display
.init_clock_gating
= icl_init_clock_gating
;
9363 else if (IS_CANNONLAKE(dev_priv
))
9364 dev_priv
->display
.init_clock_gating
= cnl_init_clock_gating
;
9365 else if (IS_COFFEELAKE(dev_priv
))
9366 dev_priv
->display
.init_clock_gating
= cfl_init_clock_gating
;
9367 else if (IS_SKYLAKE(dev_priv
))
9368 dev_priv
->display
.init_clock_gating
= skl_init_clock_gating
;
9369 else if (IS_KABYLAKE(dev_priv
))
9370 dev_priv
->display
.init_clock_gating
= kbl_init_clock_gating
;
9371 else if (IS_BROXTON(dev_priv
))
9372 dev_priv
->display
.init_clock_gating
= bxt_init_clock_gating
;
9373 else if (IS_GEMINILAKE(dev_priv
))
9374 dev_priv
->display
.init_clock_gating
= glk_init_clock_gating
;
9375 else if (IS_BROADWELL(dev_priv
))
9376 dev_priv
->display
.init_clock_gating
= bdw_init_clock_gating
;
9377 else if (IS_CHERRYVIEW(dev_priv
))
9378 dev_priv
->display
.init_clock_gating
= chv_init_clock_gating
;
9379 else if (IS_HASWELL(dev_priv
))
9380 dev_priv
->display
.init_clock_gating
= hsw_init_clock_gating
;
9381 else if (IS_IVYBRIDGE(dev_priv
))
9382 dev_priv
->display
.init_clock_gating
= ivb_init_clock_gating
;
9383 else if (IS_VALLEYVIEW(dev_priv
))
9384 dev_priv
->display
.init_clock_gating
= vlv_init_clock_gating
;
9385 else if (IS_GEN(dev_priv
, 6))
9386 dev_priv
->display
.init_clock_gating
= gen6_init_clock_gating
;
9387 else if (IS_GEN(dev_priv
, 5))
9388 dev_priv
->display
.init_clock_gating
= ilk_init_clock_gating
;
9389 else if (IS_G4X(dev_priv
))
9390 dev_priv
->display
.init_clock_gating
= g4x_init_clock_gating
;
9391 else if (IS_I965GM(dev_priv
))
9392 dev_priv
->display
.init_clock_gating
= i965gm_init_clock_gating
;
9393 else if (IS_I965G(dev_priv
))
9394 dev_priv
->display
.init_clock_gating
= i965g_init_clock_gating
;
9395 else if (IS_GEN(dev_priv
, 3))
9396 dev_priv
->display
.init_clock_gating
= gen3_init_clock_gating
;
9397 else if (IS_I85X(dev_priv
) || IS_I865G(dev_priv
))
9398 dev_priv
->display
.init_clock_gating
= i85x_init_clock_gating
;
9399 else if (IS_GEN(dev_priv
, 2))
9400 dev_priv
->display
.init_clock_gating
= i830_init_clock_gating
;
9402 MISSING_CASE(INTEL_DEVID(dev_priv
));
9403 dev_priv
->display
.init_clock_gating
= nop_init_clock_gating
;
9407 /* Set up chip specific power management-related functions */
9408 void intel_init_pm(struct drm_i915_private
*dev_priv
)
9411 if (IS_PINEVIEW(dev_priv
))
9412 i915_pineview_get_mem_freq(dev_priv
);
9413 else if (IS_GEN(dev_priv
, 5))
9414 i915_ironlake_get_mem_freq(dev_priv
);
9416 /* For FIFO watermark updates */
9417 if (INTEL_GEN(dev_priv
) >= 9) {
9418 skl_setup_wm_latency(dev_priv
);
9419 dev_priv
->display
.initial_watermarks
= skl_initial_wm
;
9420 dev_priv
->display
.atomic_update_watermarks
= skl_atomic_update_crtc_wm
;
9421 dev_priv
->display
.compute_global_watermarks
= skl_compute_wm
;
9422 } else if (HAS_PCH_SPLIT(dev_priv
)) {
9423 ilk_setup_wm_latency(dev_priv
);
9425 if ((IS_GEN(dev_priv
, 5) && dev_priv
->wm
.pri_latency
[1] &&
9426 dev_priv
->wm
.spr_latency
[1] && dev_priv
->wm
.cur_latency
[1]) ||
9427 (!IS_GEN(dev_priv
, 5) && dev_priv
->wm
.pri_latency
[0] &&
9428 dev_priv
->wm
.spr_latency
[0] && dev_priv
->wm
.cur_latency
[0])) {
9429 dev_priv
->display
.compute_pipe_wm
= ilk_compute_pipe_wm
;
9430 dev_priv
->display
.compute_intermediate_wm
=
9431 ilk_compute_intermediate_wm
;
9432 dev_priv
->display
.initial_watermarks
=
9433 ilk_initial_watermarks
;
9434 dev_priv
->display
.optimize_watermarks
=
9435 ilk_optimize_watermarks
;
9437 DRM_DEBUG_KMS("Failed to read display plane latency. "
9440 } else if (IS_VALLEYVIEW(dev_priv
) || IS_CHERRYVIEW(dev_priv
)) {
9441 vlv_setup_wm_latency(dev_priv
);
9442 dev_priv
->display
.compute_pipe_wm
= vlv_compute_pipe_wm
;
9443 dev_priv
->display
.compute_intermediate_wm
= vlv_compute_intermediate_wm
;
9444 dev_priv
->display
.initial_watermarks
= vlv_initial_watermarks
;
9445 dev_priv
->display
.optimize_watermarks
= vlv_optimize_watermarks
;
9446 dev_priv
->display
.atomic_update_watermarks
= vlv_atomic_update_fifo
;
9447 } else if (IS_G4X(dev_priv
)) {
9448 g4x_setup_wm_latency(dev_priv
);
9449 dev_priv
->display
.compute_pipe_wm
= g4x_compute_pipe_wm
;
9450 dev_priv
->display
.compute_intermediate_wm
= g4x_compute_intermediate_wm
;
9451 dev_priv
->display
.initial_watermarks
= g4x_initial_watermarks
;
9452 dev_priv
->display
.optimize_watermarks
= g4x_optimize_watermarks
;
9453 } else if (IS_PINEVIEW(dev_priv
)) {
9454 if (!intel_get_cxsr_latency(IS_PINEVIEW_G(dev_priv
),
9457 dev_priv
->mem_freq
)) {
9458 DRM_INFO("failed to find known CxSR latency "
9459 "(found ddr%s fsb freq %d, mem freq %d), "
9461 (dev_priv
->is_ddr3
== 1) ? "3" : "2",
9462 dev_priv
->fsb_freq
, dev_priv
->mem_freq
);
9463 /* Disable CxSR and never update its watermark again */
9464 intel_set_memory_cxsr(dev_priv
, false);
9465 dev_priv
->display
.update_wm
= NULL
;
9467 dev_priv
->display
.update_wm
= pineview_update_wm
;
9468 } else if (IS_GEN(dev_priv
, 4)) {
9469 dev_priv
->display
.update_wm
= i965_update_wm
;
9470 } else if (IS_GEN(dev_priv
, 3)) {
9471 dev_priv
->display
.update_wm
= i9xx_update_wm
;
9472 dev_priv
->display
.get_fifo_size
= i9xx_get_fifo_size
;
9473 } else if (IS_GEN(dev_priv
, 2)) {
9474 if (INTEL_INFO(dev_priv
)->num_pipes
== 1) {
9475 dev_priv
->display
.update_wm
= i845_update_wm
;
9476 dev_priv
->display
.get_fifo_size
= i845_get_fifo_size
;
9478 dev_priv
->display
.update_wm
= i9xx_update_wm
;
9479 dev_priv
->display
.get_fifo_size
= i830_get_fifo_size
;
9482 DRM_ERROR("unexpected fall-through in intel_init_pm\n");
9486 static inline int gen6_check_mailbox_status(struct drm_i915_private
*dev_priv
)
9489 I915_READ_FW(GEN6_PCODE_MAILBOX
) & GEN6_PCODE_ERROR_MASK
;
9492 case GEN6_PCODE_SUCCESS
:
9494 case GEN6_PCODE_UNIMPLEMENTED_CMD
:
9496 case GEN6_PCODE_ILLEGAL_CMD
:
9498 case GEN6_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE
:
9499 case GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE
:
9501 case GEN6_PCODE_TIMEOUT
:
9504 MISSING_CASE(flags
);
9509 static inline int gen7_check_mailbox_status(struct drm_i915_private
*dev_priv
)
9512 I915_READ_FW(GEN6_PCODE_MAILBOX
) & GEN6_PCODE_ERROR_MASK
;
9515 case GEN6_PCODE_SUCCESS
:
9517 case GEN6_PCODE_ILLEGAL_CMD
:
9519 case GEN7_PCODE_TIMEOUT
:
9521 case GEN7_PCODE_ILLEGAL_DATA
:
9523 case GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE
:
9526 MISSING_CASE(flags
);
9531 int sandybridge_pcode_read(struct drm_i915_private
*dev_priv
, u32 mbox
, u32
*val
)
9535 WARN_ON(!mutex_is_locked(&dev_priv
->pcu_lock
));
9537 /* GEN6_PCODE_* are outside of the forcewake domain, we can
9538 * use te fw I915_READ variants to reduce the amount of work
9539 * required when reading/writing.
9542 if (I915_READ_FW(GEN6_PCODE_MAILBOX
) & GEN6_PCODE_READY
) {
9543 DRM_DEBUG_DRIVER("warning: pcode (read from mbox %x) mailbox access failed for %ps\n",
9544 mbox
, __builtin_return_address(0));
9548 I915_WRITE_FW(GEN6_PCODE_DATA
, *val
);
9549 I915_WRITE_FW(GEN6_PCODE_DATA1
, 0);
9550 I915_WRITE_FW(GEN6_PCODE_MAILBOX
, GEN6_PCODE_READY
| mbox
);
9552 if (__intel_wait_for_register_fw(dev_priv
,
9553 GEN6_PCODE_MAILBOX
, GEN6_PCODE_READY
, 0,
9555 DRM_ERROR("timeout waiting for pcode read (from mbox %x) to finish for %ps\n",
9556 mbox
, __builtin_return_address(0));
9560 *val
= I915_READ_FW(GEN6_PCODE_DATA
);
9561 I915_WRITE_FW(GEN6_PCODE_DATA
, 0);
9563 if (INTEL_GEN(dev_priv
) > 6)
9564 status
= gen7_check_mailbox_status(dev_priv
);
9566 status
= gen6_check_mailbox_status(dev_priv
);
9569 DRM_DEBUG_DRIVER("warning: pcode (read from mbox %x) mailbox access failed for %ps: %d\n",
9570 mbox
, __builtin_return_address(0), status
);
9577 int sandybridge_pcode_write_timeout(struct drm_i915_private
*dev_priv
,
9579 int fast_timeout_us
, int slow_timeout_ms
)
9583 WARN_ON(!mutex_is_locked(&dev_priv
->pcu_lock
));
9585 /* GEN6_PCODE_* are outside of the forcewake domain, we can
9586 * use te fw I915_READ variants to reduce the amount of work
9587 * required when reading/writing.
9590 if (I915_READ_FW(GEN6_PCODE_MAILBOX
) & GEN6_PCODE_READY
) {
9591 DRM_DEBUG_DRIVER("warning: pcode (write of 0x%08x to mbox %x) mailbox access failed for %ps\n",
9592 val
, mbox
, __builtin_return_address(0));
9596 I915_WRITE_FW(GEN6_PCODE_DATA
, val
);
9597 I915_WRITE_FW(GEN6_PCODE_DATA1
, 0);
9598 I915_WRITE_FW(GEN6_PCODE_MAILBOX
, GEN6_PCODE_READY
| mbox
);
9600 if (__intel_wait_for_register_fw(dev_priv
,
9601 GEN6_PCODE_MAILBOX
, GEN6_PCODE_READY
, 0,
9602 fast_timeout_us
, slow_timeout_ms
,
9604 DRM_ERROR("timeout waiting for pcode write of 0x%08x to mbox %x to finish for %ps\n",
9605 val
, mbox
, __builtin_return_address(0));
9609 I915_WRITE_FW(GEN6_PCODE_DATA
, 0);
9611 if (INTEL_GEN(dev_priv
) > 6)
9612 status
= gen7_check_mailbox_status(dev_priv
);
9614 status
= gen6_check_mailbox_status(dev_priv
);
9617 DRM_DEBUG_DRIVER("warning: pcode (write of 0x%08x to mbox %x) mailbox access failed for %ps: %d\n",
9618 val
, mbox
, __builtin_return_address(0), status
);
9625 static bool skl_pcode_try_request(struct drm_i915_private
*dev_priv
, u32 mbox
,
9626 u32 request
, u32 reply_mask
, u32 reply
,
9631 *status
= sandybridge_pcode_read(dev_priv
, mbox
, &val
);
9633 return *status
|| ((val
& reply_mask
) == reply
);
9637 * skl_pcode_request - send PCODE request until acknowledgment
9638 * @dev_priv: device private
9639 * @mbox: PCODE mailbox ID the request is targeted for
9640 * @request: request ID
9641 * @reply_mask: mask used to check for request acknowledgment
9642 * @reply: value used to check for request acknowledgment
9643 * @timeout_base_ms: timeout for polling with preemption enabled
9645 * Keep resending the @request to @mbox until PCODE acknowledges it, PCODE
9646 * reports an error or an overall timeout of @timeout_base_ms+50 ms expires.
9647 * The request is acknowledged once the PCODE reply dword equals @reply after
9648 * applying @reply_mask. Polling is first attempted with preemption enabled
9649 * for @timeout_base_ms and if this times out for another 50 ms with
9650 * preemption disabled.
9652 * Returns 0 on success, %-ETIMEDOUT in case of a timeout, <0 in case of some
9653 * other error as reported by PCODE.
9655 int skl_pcode_request(struct drm_i915_private
*dev_priv
, u32 mbox
, u32 request
,
9656 u32 reply_mask
, u32 reply
, int timeout_base_ms
)
9661 WARN_ON(!mutex_is_locked(&dev_priv
->pcu_lock
));
9663 #define COND skl_pcode_try_request(dev_priv, mbox, request, reply_mask, reply, \
9667 * Prime the PCODE by doing a request first. Normally it guarantees
9668 * that a subsequent request, at most @timeout_base_ms later, succeeds.
9669 * _wait_for() doesn't guarantee when its passed condition is evaluated
9670 * first, so send the first request explicitly.
9676 ret
= _wait_for(COND
, timeout_base_ms
* 1000, 10, 10);
9681 * The above can time out if the number of requests was low (2 in the
9682 * worst case) _and_ PCODE was busy for some reason even after a
9683 * (queued) request and @timeout_base_ms delay. As a workaround retry
9684 * the poll with preemption disabled to maximize the number of
9685 * requests. Increase the timeout from @timeout_base_ms to 50ms to
9686 * account for interrupts that could reduce the number of these
9687 * requests, and for any quirks of the PCODE firmware that delays
9688 * the request completion.
9690 DRM_DEBUG_KMS("PCODE timeout, retrying with preemption disabled\n");
9691 WARN_ON_ONCE(timeout_base_ms
> 3);
9693 ret
= wait_for_atomic(COND
, 50);
9697 return ret
? ret
: status
;
9701 static int byt_gpu_freq(struct drm_i915_private
*dev_priv
, int val
)
9703 struct intel_rps
*rps
= &dev_priv
->gt_pm
.rps
;
9707 * Slow = Fast = GPLL ref * N
9709 return DIV_ROUND_CLOSEST(rps
->gpll_ref_freq
* (val
- 0xb7), 1000);
9712 static int byt_freq_opcode(struct drm_i915_private
*dev_priv
, int val
)
9714 struct intel_rps
*rps
= &dev_priv
->gt_pm
.rps
;
9716 return DIV_ROUND_CLOSEST(1000 * val
, rps
->gpll_ref_freq
) + 0xb7;
9719 static int chv_gpu_freq(struct drm_i915_private
*dev_priv
, int val
)
9721 struct intel_rps
*rps
= &dev_priv
->gt_pm
.rps
;
9725 * CU (slow) = CU2x (fast) / 2 = GPLL ref * N / 2
9727 return DIV_ROUND_CLOSEST(rps
->gpll_ref_freq
* val
, 2 * 2 * 1000);
9730 static int chv_freq_opcode(struct drm_i915_private
*dev_priv
, int val
)
9732 struct intel_rps
*rps
= &dev_priv
->gt_pm
.rps
;
9734 /* CHV needs even values */
9735 return DIV_ROUND_CLOSEST(2 * 1000 * val
, rps
->gpll_ref_freq
) * 2;
9738 int intel_gpu_freq(struct drm_i915_private
*dev_priv
, int val
)
9740 if (INTEL_GEN(dev_priv
) >= 9)
9741 return DIV_ROUND_CLOSEST(val
* GT_FREQUENCY_MULTIPLIER
,
9743 else if (IS_CHERRYVIEW(dev_priv
))
9744 return chv_gpu_freq(dev_priv
, val
);
9745 else if (IS_VALLEYVIEW(dev_priv
))
9746 return byt_gpu_freq(dev_priv
, val
);
9748 return val
* GT_FREQUENCY_MULTIPLIER
;
9751 int intel_freq_opcode(struct drm_i915_private
*dev_priv
, int val
)
9753 if (INTEL_GEN(dev_priv
) >= 9)
9754 return DIV_ROUND_CLOSEST(val
* GEN9_FREQ_SCALER
,
9755 GT_FREQUENCY_MULTIPLIER
);
9756 else if (IS_CHERRYVIEW(dev_priv
))
9757 return chv_freq_opcode(dev_priv
, val
);
9758 else if (IS_VALLEYVIEW(dev_priv
))
9759 return byt_freq_opcode(dev_priv
, val
);
9761 return DIV_ROUND_CLOSEST(val
, GT_FREQUENCY_MULTIPLIER
);
9764 void intel_pm_setup(struct drm_i915_private
*dev_priv
)
9766 mutex_init(&dev_priv
->pcu_lock
);
9767 mutex_init(&dev_priv
->gt_pm
.rps
.power
.mutex
);
9769 atomic_set(&dev_priv
->gt_pm
.rps
.num_waiters
, 0);
9771 dev_priv
->runtime_pm
.suspended
= false;
9772 atomic_set(&dev_priv
->runtime_pm
.wakeref_count
, 0);
9775 static u64
vlv_residency_raw(struct drm_i915_private
*dev_priv
,
9776 const i915_reg_t reg
)
9778 u32 lower
, upper
, tmp
;
9782 * The register accessed do not need forcewake. We borrow
9783 * uncore lock to prevent concurrent access to range reg.
9785 lockdep_assert_held(&dev_priv
->uncore
.lock
);
9788 * vlv and chv residency counters are 40 bits in width.
9789 * With a control bit, we can choose between upper or lower
9790 * 32bit window into this counter.
9792 * Although we always use the counter in high-range mode elsewhere,
9793 * userspace may attempt to read the value before rc6 is initialised,
9794 * before we have set the default VLV_COUNTER_CONTROL value. So always
9795 * set the high bit to be safe.
9797 I915_WRITE_FW(VLV_COUNTER_CONTROL
,
9798 _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH
));
9799 upper
= I915_READ_FW(reg
);
9803 I915_WRITE_FW(VLV_COUNTER_CONTROL
,
9804 _MASKED_BIT_DISABLE(VLV_COUNT_RANGE_HIGH
));
9805 lower
= I915_READ_FW(reg
);
9807 I915_WRITE_FW(VLV_COUNTER_CONTROL
,
9808 _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH
));
9809 upper
= I915_READ_FW(reg
);
9810 } while (upper
!= tmp
&& --loop
);
9813 * Everywhere else we always use VLV_COUNTER_CONTROL with the
9814 * VLV_COUNT_RANGE_HIGH bit set - so it is safe to leave it set
9818 return lower
| (u64
)upper
<< 8;
9821 u64
intel_rc6_residency_ns(struct drm_i915_private
*dev_priv
,
9822 const i915_reg_t reg
)
9824 u64 time_hw
, prev_hw
, overflow_hw
;
9825 unsigned int fw_domains
;
9826 unsigned long flags
;
9830 if (!HAS_RC6(dev_priv
))
9834 * Store previous hw counter values for counter wrap-around handling.
9836 * There are only four interesting registers and they live next to each
9837 * other so we can use the relative address, compared to the smallest
9838 * one as the index into driver storage.
9840 i
= (i915_mmio_reg_offset(reg
) -
9841 i915_mmio_reg_offset(GEN6_GT_GFX_RC6_LOCKED
)) / sizeof(u32
);
9842 if (WARN_ON_ONCE(i
>= ARRAY_SIZE(dev_priv
->gt_pm
.rc6
.cur_residency
)))
9845 fw_domains
= intel_uncore_forcewake_for_reg(dev_priv
, reg
, FW_REG_READ
);
9847 spin_lock_irqsave(&dev_priv
->uncore
.lock
, flags
);
9848 intel_uncore_forcewake_get__locked(dev_priv
, fw_domains
);
9850 /* On VLV and CHV, residency time is in CZ units rather than 1.28us */
9851 if (IS_VALLEYVIEW(dev_priv
) || IS_CHERRYVIEW(dev_priv
)) {
9853 div
= dev_priv
->czclk_freq
;
9854 overflow_hw
= BIT_ULL(40);
9855 time_hw
= vlv_residency_raw(dev_priv
, reg
);
9857 /* 833.33ns units on Gen9LP, 1.28us elsewhere. */
9858 if (IS_GEN9_LP(dev_priv
)) {
9866 overflow_hw
= BIT_ULL(32);
9867 time_hw
= I915_READ_FW(reg
);
9871 * Counter wrap handling.
9873 * But relying on a sufficient frequency of queries otherwise counters
9876 prev_hw
= dev_priv
->gt_pm
.rc6
.prev_hw_residency
[i
];
9877 dev_priv
->gt_pm
.rc6
.prev_hw_residency
[i
] = time_hw
;
9879 /* RC6 delta from last sample. */
9880 if (time_hw
>= prev_hw
)
9883 time_hw
+= overflow_hw
- prev_hw
;
9885 /* Add delta to RC6 extended raw driver copy. */
9886 time_hw
+= dev_priv
->gt_pm
.rc6
.cur_residency
[i
];
9887 dev_priv
->gt_pm
.rc6
.cur_residency
[i
] = time_hw
;
9889 intel_uncore_forcewake_put__locked(dev_priv
, fw_domains
);
9890 spin_unlock_irqrestore(&dev_priv
->uncore
.lock
, flags
);
9892 return mul_u64_u32_div(time_hw
, mul
, div
);
9895 u32
intel_get_cagf(struct drm_i915_private
*dev_priv
, u32 rpstat
)
9899 if (INTEL_GEN(dev_priv
) >= 9)
9900 cagf
= (rpstat
& GEN9_CAGF_MASK
) >> GEN9_CAGF_SHIFT
;
9901 else if (IS_HASWELL(dev_priv
) || IS_BROADWELL(dev_priv
))
9902 cagf
= (rpstat
& HSW_CAGF_MASK
) >> HSW_CAGF_SHIFT
;
9904 cagf
= (rpstat
& GEN6_CAGF_MASK
) >> GEN6_CAGF_SHIFT
;