]> git.ipfire.org Git - thirdparty/kernel/stable.git/blame - drivers/gpu/drm/i915/intel_pm.c
drm/i915: Use mul_u32_u32() more
[thirdparty/kernel/stable.git] / drivers / gpu / drm / i915 / intel_pm.c
CommitLineData
85208be0
ED
1/*
2 * Copyright © 2012 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Eugeni Dodonov <eugeni.dodonov@intel.com>
25 *
26 */
27
2b4e57bd 28#include <linux/cpufreq.h>
d0e93599 29#include <linux/module.h>
08ea70a4 30#include <linux/pm_runtime.h>
d0e93599
SR
31
32#include <drm/drm_atomic_helper.h>
33#include <drm/drm_fourcc.h>
9c2f7a9d 34#include <drm/drm_plane_helper.h>
d0e93599 35
85208be0 36#include "i915_drv.h"
440e2b3d 37#include "i915_irq.h"
12392a74 38#include "intel_atomic.h"
85208be0 39#include "intel_drv.h"
98afa316 40#include "intel_fbc.h"
696173b0 41#include "intel_pm.h"
f9a79f9a 42#include "intel_sprite.h"
56c5098f 43#include "intel_sideband.h"
eb48eb00 44#include "../../../platform/x86/intel_ips.h"
85208be0 45
dc39fff7 46/**
18afd443
JN
47 * DOC: RC6
48 *
dc39fff7
BW
49 * RC6 is a special power stage which allows the GPU to enter an very
50 * low-voltage mode when idle, using down to 0V while at this stage. This
51 * stage is entered automatically when the GPU is idle when RC6 support is
52 * enabled, and as soon as new workload arises GPU wakes up automatically as well.
53 *
54 * There are different RC6 modes available in Intel GPU, which differentiate
55 * among each other with the latency required to enter and leave RC6 and
56 * voltage consumed by the GPU in different states.
57 *
58 * The combination of the following flags define which states GPU is allowed
59 * to enter, while RC6 is the normal RC6 state, RC6p is the deep RC6, and
60 * RC6pp is deepest RC6. Their support by hardware varies according to the
61 * GPU, BIOS, chipset and platform. RC6 is usually the safest one and the one
62 * which brings the most power savings; deeper states save more power, but
63 * require higher latency to switch to and wake up.
64 */
dc39fff7 65
46f16e63 66static void gen9_init_clock_gating(struct drm_i915_private *dev_priv)
a82abe43 67{
93564044
VS
68 if (HAS_LLC(dev_priv)) {
69 /*
70 * WaCompressedResourceDisplayNewHashMode:skl,kbl
e0403cb9 71 * Display WA #0390: skl,kbl
93564044
VS
72 *
73 * Must match Sampler, Pixel Back End, and Media. See
74 * WaCompressedResourceSamplerPbeMediaNewHashMode.
75 */
76 I915_WRITE(CHICKEN_PAR1_1,
77 I915_READ(CHICKEN_PAR1_1) |
78 SKL_DE_COMPRESSED_HASH_MODE);
79 }
80
82525c17 81 /* See Bspec note for PSR2_CTL bit 31, Wa#828:skl,bxt,kbl,cfl */
dc00b6a0
DV
82 I915_WRITE(CHICKEN_PAR1_1,
83 I915_READ(CHICKEN_PAR1_1) | SKL_EDP_PSR_FIX_RDWRAP);
84
82525c17 85 /* WaEnableChickenDCPR:skl,bxt,kbl,glk,cfl */
590e8ff0
MK
86 I915_WRITE(GEN8_CHICKEN_DCPR_1,
87 I915_READ(GEN8_CHICKEN_DCPR_1) | MASK_WAKEMEM);
0f78dee6 88
82525c17
RV
89 /* WaFbcTurnOffFbcWatermark:skl,bxt,kbl,cfl */
90 /* WaFbcWakeMemOn:skl,bxt,kbl,glk,cfl */
303d4ea5
MK
91 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
92 DISP_FBC_WM_DIS |
93 DISP_FBC_MEMORY_WAKE);
d1b4eefd 94
82525c17 95 /* WaFbcHighMemBwCorruptionAvoidance:skl,bxt,kbl,cfl */
d1b4eefd
MK
96 I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
97 ILK_DPFC_DISABLE_DUMMY0);
32087d14
PP
98
99 if (IS_SKYLAKE(dev_priv)) {
100 /* WaDisableDopClockGating */
101 I915_WRITE(GEN7_MISCCPCTL, I915_READ(GEN7_MISCCPCTL)
102 & ~GEN7_DOP_CLOCK_GATE_ENABLE);
103 }
b033bb6d
MK
104}
105
46f16e63 106static void bxt_init_clock_gating(struct drm_i915_private *dev_priv)
b033bb6d 107{
46f16e63 108 gen9_init_clock_gating(dev_priv);
b033bb6d 109
a7546159
NH
110 /* WaDisableSDEUnitClockGating:bxt */
111 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
112 GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
113
32608ca2
ID
114 /*
115 * FIXME:
868434c5 116 * GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ applies on 3x6 GT SKUs only.
32608ca2 117 */
32608ca2 118 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
868434c5 119 GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ);
d965e7ac
ID
120
121 /*
122 * Wa: Backlight PWM may stop in the asserted state, causing backlight
123 * to stay fully on.
124 */
8aeaf64c
JN
125 I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) |
126 PWM1_GATING_DIS | PWM2_GATING_DIS);
a82abe43
ID
127}
128
9fb5026f
ACO
129static void glk_init_clock_gating(struct drm_i915_private *dev_priv)
130{
131 gen9_init_clock_gating(dev_priv);
132
133 /*
134 * WaDisablePWMClockGating:glk
135 * Backlight PWM may stop in the asserted state, causing backlight
136 * to stay fully on.
137 */
138 I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) |
139 PWM1_GATING_DIS | PWM2_GATING_DIS);
f4f4b59b
ACO
140
141 /* WaDDIIOTimeout:glk */
142 if (IS_GLK_REVID(dev_priv, 0, GLK_REVID_A1)) {
143 u32 val = I915_READ(CHICKEN_MISC_2);
144 val &= ~(GLK_CL0_PWR_DOWN |
145 GLK_CL1_PWR_DOWN |
146 GLK_CL2_PWR_DOWN);
147 I915_WRITE(CHICKEN_MISC_2, val);
148 }
149
9fb5026f
ACO
150}
151
148ac1f3 152static void i915_pineview_get_mem_freq(struct drm_i915_private *dev_priv)
c921aba8 153{
c921aba8
DV
154 u32 tmp;
155
156 tmp = I915_READ(CLKCFG);
157
158 switch (tmp & CLKCFG_FSB_MASK) {
159 case CLKCFG_FSB_533:
160 dev_priv->fsb_freq = 533; /* 133*4 */
161 break;
162 case CLKCFG_FSB_800:
163 dev_priv->fsb_freq = 800; /* 200*4 */
164 break;
165 case CLKCFG_FSB_667:
166 dev_priv->fsb_freq = 667; /* 167*4 */
167 break;
168 case CLKCFG_FSB_400:
169 dev_priv->fsb_freq = 400; /* 100*4 */
170 break;
171 }
172
173 switch (tmp & CLKCFG_MEM_MASK) {
174 case CLKCFG_MEM_533:
175 dev_priv->mem_freq = 533;
176 break;
177 case CLKCFG_MEM_667:
178 dev_priv->mem_freq = 667;
179 break;
180 case CLKCFG_MEM_800:
181 dev_priv->mem_freq = 800;
182 break;
183 }
184
185 /* detect pineview DDR3 setting */
186 tmp = I915_READ(CSHRDDR3CTL);
187 dev_priv->is_ddr3 = (tmp & CSHRDDR3CTL_DDR3) ? 1 : 0;
188}
189
148ac1f3 190static void i915_ironlake_get_mem_freq(struct drm_i915_private *dev_priv)
c921aba8 191{
c921aba8
DV
192 u16 ddrpll, csipll;
193
194 ddrpll = I915_READ16(DDRMPLL1);
195 csipll = I915_READ16(CSIPLL0);
196
197 switch (ddrpll & 0xff) {
198 case 0xc:
199 dev_priv->mem_freq = 800;
200 break;
201 case 0x10:
202 dev_priv->mem_freq = 1066;
203 break;
204 case 0x14:
205 dev_priv->mem_freq = 1333;
206 break;
207 case 0x18:
208 dev_priv->mem_freq = 1600;
209 break;
210 default:
211 DRM_DEBUG_DRIVER("unknown memory frequency 0x%02x\n",
212 ddrpll & 0xff);
213 dev_priv->mem_freq = 0;
214 break;
215 }
216
20e4d407 217 dev_priv->ips.r_t = dev_priv->mem_freq;
c921aba8
DV
218
219 switch (csipll & 0x3ff) {
220 case 0x00c:
221 dev_priv->fsb_freq = 3200;
222 break;
223 case 0x00e:
224 dev_priv->fsb_freq = 3733;
225 break;
226 case 0x010:
227 dev_priv->fsb_freq = 4266;
228 break;
229 case 0x012:
230 dev_priv->fsb_freq = 4800;
231 break;
232 case 0x014:
233 dev_priv->fsb_freq = 5333;
234 break;
235 case 0x016:
236 dev_priv->fsb_freq = 5866;
237 break;
238 case 0x018:
239 dev_priv->fsb_freq = 6400;
240 break;
241 default:
242 DRM_DEBUG_DRIVER("unknown fsb frequency 0x%04x\n",
243 csipll & 0x3ff);
244 dev_priv->fsb_freq = 0;
245 break;
246 }
247
248 if (dev_priv->fsb_freq == 3200) {
20e4d407 249 dev_priv->ips.c_m = 0;
c921aba8 250 } else if (dev_priv->fsb_freq > 3200 && dev_priv->fsb_freq <= 4800) {
20e4d407 251 dev_priv->ips.c_m = 1;
c921aba8 252 } else {
20e4d407 253 dev_priv->ips.c_m = 2;
c921aba8
DV
254 }
255}
256
b445e3b0
ED
257static const struct cxsr_latency cxsr_latency_table[] = {
258 {1, 0, 800, 400, 3382, 33382, 3983, 33983}, /* DDR2-400 SC */
259 {1, 0, 800, 667, 3354, 33354, 3807, 33807}, /* DDR2-667 SC */
260 {1, 0, 800, 800, 3347, 33347, 3763, 33763}, /* DDR2-800 SC */
261 {1, 1, 800, 667, 6420, 36420, 6873, 36873}, /* DDR3-667 SC */
262 {1, 1, 800, 800, 5902, 35902, 6318, 36318}, /* DDR3-800 SC */
263
264 {1, 0, 667, 400, 3400, 33400, 4021, 34021}, /* DDR2-400 SC */
265 {1, 0, 667, 667, 3372, 33372, 3845, 33845}, /* DDR2-667 SC */
266 {1, 0, 667, 800, 3386, 33386, 3822, 33822}, /* DDR2-800 SC */
267 {1, 1, 667, 667, 6438, 36438, 6911, 36911}, /* DDR3-667 SC */
268 {1, 1, 667, 800, 5941, 35941, 6377, 36377}, /* DDR3-800 SC */
269
270 {1, 0, 400, 400, 3472, 33472, 4173, 34173}, /* DDR2-400 SC */
271 {1, 0, 400, 667, 3443, 33443, 3996, 33996}, /* DDR2-667 SC */
272 {1, 0, 400, 800, 3430, 33430, 3946, 33946}, /* DDR2-800 SC */
273 {1, 1, 400, 667, 6509, 36509, 7062, 37062}, /* DDR3-667 SC */
274 {1, 1, 400, 800, 5985, 35985, 6501, 36501}, /* DDR3-800 SC */
275
276 {0, 0, 800, 400, 3438, 33438, 4065, 34065}, /* DDR2-400 SC */
277 {0, 0, 800, 667, 3410, 33410, 3889, 33889}, /* DDR2-667 SC */
278 {0, 0, 800, 800, 3403, 33403, 3845, 33845}, /* DDR2-800 SC */
279 {0, 1, 800, 667, 6476, 36476, 6955, 36955}, /* DDR3-667 SC */
280 {0, 1, 800, 800, 5958, 35958, 6400, 36400}, /* DDR3-800 SC */
281
282 {0, 0, 667, 400, 3456, 33456, 4103, 34106}, /* DDR2-400 SC */
283 {0, 0, 667, 667, 3428, 33428, 3927, 33927}, /* DDR2-667 SC */
284 {0, 0, 667, 800, 3443, 33443, 3905, 33905}, /* DDR2-800 SC */
285 {0, 1, 667, 667, 6494, 36494, 6993, 36993}, /* DDR3-667 SC */
286 {0, 1, 667, 800, 5998, 35998, 6460, 36460}, /* DDR3-800 SC */
287
288 {0, 0, 400, 400, 3528, 33528, 4255, 34255}, /* DDR2-400 SC */
289 {0, 0, 400, 667, 3500, 33500, 4079, 34079}, /* DDR2-667 SC */
290 {0, 0, 400, 800, 3487, 33487, 4029, 34029}, /* DDR2-800 SC */
291 {0, 1, 400, 667, 6566, 36566, 7145, 37145}, /* DDR3-667 SC */
292 {0, 1, 400, 800, 6042, 36042, 6584, 36584}, /* DDR3-800 SC */
293};
294
44a655ca
TU
295static const struct cxsr_latency *intel_get_cxsr_latency(bool is_desktop,
296 bool is_ddr3,
b445e3b0
ED
297 int fsb,
298 int mem)
299{
300 const struct cxsr_latency *latency;
301 int i;
302
303 if (fsb == 0 || mem == 0)
304 return NULL;
305
306 for (i = 0; i < ARRAY_SIZE(cxsr_latency_table); i++) {
307 latency = &cxsr_latency_table[i];
308 if (is_desktop == latency->is_desktop &&
309 is_ddr3 == latency->is_ddr3 &&
310 fsb == latency->fsb_freq && mem == latency->mem_freq)
311 return latency;
312 }
313
314 DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n");
315
316 return NULL;
317}
318
fc1ac8de
VS
319static void chv_set_memory_dvfs(struct drm_i915_private *dev_priv, bool enable)
320{
321 u32 val;
322
337fa6e0 323 vlv_punit_get(dev_priv);
fc1ac8de
VS
324
325 val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
326 if (enable)
327 val &= ~FORCE_DDR_HIGH_FREQ;
328 else
329 val |= FORCE_DDR_HIGH_FREQ;
330 val &= ~FORCE_DDR_LOW_FREQ;
331 val |= FORCE_DDR_FREQ_REQ_ACK;
332 vlv_punit_write(dev_priv, PUNIT_REG_DDR_SETUP2, val);
333
334 if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2) &
335 FORCE_DDR_FREQ_REQ_ACK) == 0, 3))
336 DRM_ERROR("timed out waiting for Punit DDR DVFS request\n");
337
337fa6e0 338 vlv_punit_put(dev_priv);
fc1ac8de
VS
339}
340
cfb41411
VS
341static void chv_set_memory_pm5(struct drm_i915_private *dev_priv, bool enable)
342{
343 u32 val;
344
337fa6e0 345 vlv_punit_get(dev_priv);
cfb41411 346
c11b813f 347 val = vlv_punit_read(dev_priv, PUNIT_REG_DSPSSPM);
cfb41411
VS
348 if (enable)
349 val |= DSP_MAXFIFO_PM5_ENABLE;
350 else
351 val &= ~DSP_MAXFIFO_PM5_ENABLE;
c11b813f 352 vlv_punit_write(dev_priv, PUNIT_REG_DSPSSPM, val);
cfb41411 353
337fa6e0 354 vlv_punit_put(dev_priv);
cfb41411
VS
355}
356
f4998963
VS
357#define FW_WM(value, plane) \
358 (((value) << DSPFW_ ## plane ## _SHIFT) & DSPFW_ ## plane ## _MASK)
359
11a85d6a 360static bool _intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable)
b445e3b0 361{
11a85d6a 362 bool was_enabled;
5209b1f4 363 u32 val;
b445e3b0 364
920a14b2 365 if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
11a85d6a 366 was_enabled = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN;
5209b1f4 367 I915_WRITE(FW_BLC_SELF_VLV, enable ? FW_CSPWRDWNEN : 0);
a7a6c498 368 POSTING_READ(FW_BLC_SELF_VLV);
c0f86832 369 } else if (IS_G4X(dev_priv) || IS_I965GM(dev_priv)) {
11a85d6a 370 was_enabled = I915_READ(FW_BLC_SELF) & FW_BLC_SELF_EN;
5209b1f4 371 I915_WRITE(FW_BLC_SELF, enable ? FW_BLC_SELF_EN : 0);
a7a6c498 372 POSTING_READ(FW_BLC_SELF);
9b1e14f4 373 } else if (IS_PINEVIEW(dev_priv)) {
11a85d6a
VS
374 val = I915_READ(DSPFW3);
375 was_enabled = val & PINEVIEW_SELF_REFRESH_EN;
376 if (enable)
377 val |= PINEVIEW_SELF_REFRESH_EN;
378 else
379 val &= ~PINEVIEW_SELF_REFRESH_EN;
5209b1f4 380 I915_WRITE(DSPFW3, val);
a7a6c498 381 POSTING_READ(DSPFW3);
50a0bc90 382 } else if (IS_I945G(dev_priv) || IS_I945GM(dev_priv)) {
11a85d6a 383 was_enabled = I915_READ(FW_BLC_SELF) & FW_BLC_SELF_EN;
5209b1f4
ID
384 val = enable ? _MASKED_BIT_ENABLE(FW_BLC_SELF_EN) :
385 _MASKED_BIT_DISABLE(FW_BLC_SELF_EN);
386 I915_WRITE(FW_BLC_SELF, val);
a7a6c498 387 POSTING_READ(FW_BLC_SELF);
50a0bc90 388 } else if (IS_I915GM(dev_priv)) {
acb91359
VS
389 /*
390 * FIXME can't find a bit like this for 915G, and
391 * and yet it does have the related watermark in
392 * FW_BLC_SELF. What's going on?
393 */
11a85d6a 394 was_enabled = I915_READ(INSTPM) & INSTPM_SELF_EN;
5209b1f4
ID
395 val = enable ? _MASKED_BIT_ENABLE(INSTPM_SELF_EN) :
396 _MASKED_BIT_DISABLE(INSTPM_SELF_EN);
397 I915_WRITE(INSTPM, val);
a7a6c498 398 POSTING_READ(INSTPM);
5209b1f4 399 } else {
11a85d6a 400 return false;
5209b1f4 401 }
b445e3b0 402
1489bba8
VS
403 trace_intel_memory_cxsr(dev_priv, was_enabled, enable);
404
11a85d6a
VS
405 DRM_DEBUG_KMS("memory self-refresh is %s (was %s)\n",
406 enableddisabled(enable),
407 enableddisabled(was_enabled));
408
409 return was_enabled;
b445e3b0
ED
410}
411
62571fc3
VS
412/**
413 * intel_set_memory_cxsr - Configure CxSR state
414 * @dev_priv: i915 device
415 * @enable: Allow vs. disallow CxSR
416 *
417 * Allow or disallow the system to enter a special CxSR
418 * (C-state self refresh) state. What typically happens in CxSR mode
419 * is that several display FIFOs may get combined into a single larger
420 * FIFO for a particular plane (so called max FIFO mode) to allow the
421 * system to defer memory fetches longer, and the memory will enter
422 * self refresh.
423 *
424 * Note that enabling CxSR does not guarantee that the system enter
425 * this special mode, nor does it guarantee that the system stays
426 * in that mode once entered. So this just allows/disallows the system
427 * to autonomously utilize the CxSR mode. Other factors such as core
428 * C-states will affect when/if the system actually enters/exits the
429 * CxSR mode.
430 *
431 * Note that on VLV/CHV this actually only controls the max FIFO mode,
432 * and the system is free to enter/exit memory self refresh at any time
433 * even when the use of CxSR has been disallowed.
434 *
435 * While the system is actually in the CxSR/max FIFO mode, some plane
436 * control registers will not get latched on vblank. Thus in order to
437 * guarantee the system will respond to changes in the plane registers
438 * we must always disallow CxSR prior to making changes to those registers.
439 * Unfortunately the system will re-evaluate the CxSR conditions at
440 * frame start which happens after vblank start (which is when the plane
441 * registers would get latched), so we can't proceed with the plane update
442 * during the same frame where we disallowed CxSR.
443 *
444 * Certain platforms also have a deeper HPLL SR mode. Fortunately the
445 * HPLL SR mode depends on CxSR itself, so we don't have to hand hold
446 * the hardware w.r.t. HPLL SR when writing to plane registers.
447 * Disallowing just CxSR is sufficient.
448 */
11a85d6a 449bool intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable)
3d90e649 450{
11a85d6a
VS
451 bool ret;
452
3d90e649 453 mutex_lock(&dev_priv->wm.wm_mutex);
11a85d6a 454 ret = _intel_set_memory_cxsr(dev_priv, enable);
04548cba
VS
455 if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
456 dev_priv->wm.vlv.cxsr = enable;
457 else if (IS_G4X(dev_priv))
458 dev_priv->wm.g4x.cxsr = enable;
3d90e649 459 mutex_unlock(&dev_priv->wm.wm_mutex);
11a85d6a
VS
460
461 return ret;
3d90e649 462}
fc1ac8de 463
b445e3b0
ED
464/*
465 * Latency for FIFO fetches is dependent on several factors:
466 * - memory configuration (speed, channels)
467 * - chipset
468 * - current MCH state
469 * It can be fairly high in some situations, so here we assume a fairly
470 * pessimal value. It's a tradeoff between extra memory fetches (if we
471 * set this value too high, the FIFO will fetch frequently to stay full)
472 * and power consumption (set it too low to save power and we might see
473 * FIFO underruns and display "flicker").
474 *
475 * A value of 5us seems to be a good balance; safe for very low end
476 * platforms but not overly aggressive on lower latency configs.
477 */
5aef6003 478static const int pessimal_latency_ns = 5000;
b445e3b0 479
b5004720
VS
480#define VLV_FIFO_START(dsparb, dsparb2, lo_shift, hi_shift) \
481 ((((dsparb) >> (lo_shift)) & 0xff) | ((((dsparb2) >> (hi_shift)) & 0x1) << 8))
482
814e7f0b 483static void vlv_get_fifo_size(struct intel_crtc_state *crtc_state)
b5004720 484{
814e7f0b 485 struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
f07d43d2 486 struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
814e7f0b 487 struct vlv_fifo_state *fifo_state = &crtc_state->wm.vlv.fifo_state;
f07d43d2
VS
488 enum pipe pipe = crtc->pipe;
489 int sprite0_start, sprite1_start;
49845a23 490
f07d43d2 491 switch (pipe) {
5ce9a649 492 u32 dsparb, dsparb2, dsparb3;
b5004720
VS
493 case PIPE_A:
494 dsparb = I915_READ(DSPARB);
495 dsparb2 = I915_READ(DSPARB2);
496 sprite0_start = VLV_FIFO_START(dsparb, dsparb2, 0, 0);
497 sprite1_start = VLV_FIFO_START(dsparb, dsparb2, 8, 4);
498 break;
499 case PIPE_B:
500 dsparb = I915_READ(DSPARB);
501 dsparb2 = I915_READ(DSPARB2);
502 sprite0_start = VLV_FIFO_START(dsparb, dsparb2, 16, 8);
503 sprite1_start = VLV_FIFO_START(dsparb, dsparb2, 24, 12);
504 break;
505 case PIPE_C:
506 dsparb2 = I915_READ(DSPARB2);
507 dsparb3 = I915_READ(DSPARB3);
508 sprite0_start = VLV_FIFO_START(dsparb3, dsparb2, 0, 16);
509 sprite1_start = VLV_FIFO_START(dsparb3, dsparb2, 8, 20);
510 break;
511 default:
f07d43d2
VS
512 MISSING_CASE(pipe);
513 return;
b5004720
VS
514 }
515
f07d43d2
VS
516 fifo_state->plane[PLANE_PRIMARY] = sprite0_start;
517 fifo_state->plane[PLANE_SPRITE0] = sprite1_start - sprite0_start;
518 fifo_state->plane[PLANE_SPRITE1] = 511 - sprite1_start;
519 fifo_state->plane[PLANE_CURSOR] = 63;
b5004720
VS
520}
521
bdaf8439
VS
522static int i9xx_get_fifo_size(struct drm_i915_private *dev_priv,
523 enum i9xx_plane_id i9xx_plane)
b445e3b0 524{
5ce9a649 525 u32 dsparb = I915_READ(DSPARB);
b445e3b0
ED
526 int size;
527
528 size = dsparb & 0x7f;
bdaf8439 529 if (i9xx_plane == PLANE_B)
b445e3b0
ED
530 size = ((dsparb >> DSPARB_CSTART_SHIFT) & 0x7f) - size;
531
bdaf8439
VS
532 DRM_DEBUG_KMS("FIFO size - (0x%08x) %c: %d\n",
533 dsparb, plane_name(i9xx_plane), size);
b445e3b0
ED
534
535 return size;
536}
537
bdaf8439
VS
538static int i830_get_fifo_size(struct drm_i915_private *dev_priv,
539 enum i9xx_plane_id i9xx_plane)
b445e3b0 540{
5ce9a649 541 u32 dsparb = I915_READ(DSPARB);
b445e3b0
ED
542 int size;
543
544 size = dsparb & 0x1ff;
bdaf8439 545 if (i9xx_plane == PLANE_B)
b445e3b0
ED
546 size = ((dsparb >> DSPARB_BEND_SHIFT) & 0x1ff) - size;
547 size >>= 1; /* Convert to cachelines */
548
bdaf8439
VS
549 DRM_DEBUG_KMS("FIFO size - (0x%08x) %c: %d\n",
550 dsparb, plane_name(i9xx_plane), size);
b445e3b0
ED
551
552 return size;
553}
554
bdaf8439
VS
555static int i845_get_fifo_size(struct drm_i915_private *dev_priv,
556 enum i9xx_plane_id i9xx_plane)
b445e3b0 557{
5ce9a649 558 u32 dsparb = I915_READ(DSPARB);
b445e3b0
ED
559 int size;
560
561 size = dsparb & 0x7f;
562 size >>= 2; /* Convert to cachelines */
563
bdaf8439
VS
564 DRM_DEBUG_KMS("FIFO size - (0x%08x) %c: %d\n",
565 dsparb, plane_name(i9xx_plane), size);
b445e3b0
ED
566
567 return size;
568}
569
b445e3b0
ED
570/* Pineview has different values for various configs */
571static const struct intel_watermark_params pineview_display_wm = {
e0f0273e
VS
572 .fifo_size = PINEVIEW_DISPLAY_FIFO,
573 .max_wm = PINEVIEW_MAX_WM,
574 .default_wm = PINEVIEW_DFT_WM,
575 .guard_size = PINEVIEW_GUARD_WM,
576 .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
b445e3b0
ED
577};
578static const struct intel_watermark_params pineview_display_hplloff_wm = {
e0f0273e
VS
579 .fifo_size = PINEVIEW_DISPLAY_FIFO,
580 .max_wm = PINEVIEW_MAX_WM,
581 .default_wm = PINEVIEW_DFT_HPLLOFF_WM,
582 .guard_size = PINEVIEW_GUARD_WM,
583 .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
b445e3b0
ED
584};
585static const struct intel_watermark_params pineview_cursor_wm = {
e0f0273e
VS
586 .fifo_size = PINEVIEW_CURSOR_FIFO,
587 .max_wm = PINEVIEW_CURSOR_MAX_WM,
588 .default_wm = PINEVIEW_CURSOR_DFT_WM,
589 .guard_size = PINEVIEW_CURSOR_GUARD_WM,
590 .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
b445e3b0
ED
591};
592static const struct intel_watermark_params pineview_cursor_hplloff_wm = {
e0f0273e
VS
593 .fifo_size = PINEVIEW_CURSOR_FIFO,
594 .max_wm = PINEVIEW_CURSOR_MAX_WM,
595 .default_wm = PINEVIEW_CURSOR_DFT_WM,
596 .guard_size = PINEVIEW_CURSOR_GUARD_WM,
597 .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
b445e3b0 598};
b445e3b0 599static const struct intel_watermark_params i965_cursor_wm_info = {
e0f0273e
VS
600 .fifo_size = I965_CURSOR_FIFO,
601 .max_wm = I965_CURSOR_MAX_WM,
602 .default_wm = I965_CURSOR_DFT_WM,
603 .guard_size = 2,
604 .cacheline_size = I915_FIFO_LINE_SIZE,
b445e3b0
ED
605};
606static const struct intel_watermark_params i945_wm_info = {
e0f0273e
VS
607 .fifo_size = I945_FIFO_SIZE,
608 .max_wm = I915_MAX_WM,
609 .default_wm = 1,
610 .guard_size = 2,
611 .cacheline_size = I915_FIFO_LINE_SIZE,
b445e3b0
ED
612};
613static const struct intel_watermark_params i915_wm_info = {
e0f0273e
VS
614 .fifo_size = I915_FIFO_SIZE,
615 .max_wm = I915_MAX_WM,
616 .default_wm = 1,
617 .guard_size = 2,
618 .cacheline_size = I915_FIFO_LINE_SIZE,
b445e3b0 619};
9d539105 620static const struct intel_watermark_params i830_a_wm_info = {
e0f0273e
VS
621 .fifo_size = I855GM_FIFO_SIZE,
622 .max_wm = I915_MAX_WM,
623 .default_wm = 1,
624 .guard_size = 2,
625 .cacheline_size = I830_FIFO_LINE_SIZE,
b445e3b0 626};
9d539105
VS
627static const struct intel_watermark_params i830_bc_wm_info = {
628 .fifo_size = I855GM_FIFO_SIZE,
629 .max_wm = I915_MAX_WM/2,
630 .default_wm = 1,
631 .guard_size = 2,
632 .cacheline_size = I830_FIFO_LINE_SIZE,
633};
feb56b93 634static const struct intel_watermark_params i845_wm_info = {
e0f0273e
VS
635 .fifo_size = I830_FIFO_SIZE,
636 .max_wm = I915_MAX_WM,
637 .default_wm = 1,
638 .guard_size = 2,
639 .cacheline_size = I830_FIFO_LINE_SIZE,
b445e3b0
ED
640};
641
baf69ca8
VS
642/**
643 * intel_wm_method1 - Method 1 / "small buffer" watermark formula
644 * @pixel_rate: Pipe pixel rate in kHz
645 * @cpp: Plane bytes per pixel
646 * @latency: Memory wakeup latency in 0.1us units
647 *
648 * Compute the watermark using the method 1 or "small buffer"
649 * formula. The caller may additonally add extra cachelines
650 * to account for TLB misses and clock crossings.
651 *
652 * This method is concerned with the short term drain rate
653 * of the FIFO, ie. it does not account for blanking periods
654 * which would effectively reduce the average drain rate across
655 * a longer period. The name "small" refers to the fact the
656 * FIFO is relatively small compared to the amount of data
657 * fetched.
658 *
659 * The FIFO level vs. time graph might look something like:
660 *
661 * |\ |\
662 * | \ | \
663 * __---__---__ (- plane active, _ blanking)
664 * -> time
665 *
666 * or perhaps like this:
667 *
668 * |\|\ |\|\
669 * __----__----__ (- plane active, _ blanking)
670 * -> time
671 *
672 * Returns:
673 * The watermark in bytes
674 */
675static unsigned int intel_wm_method1(unsigned int pixel_rate,
676 unsigned int cpp,
677 unsigned int latency)
678{
5ce9a649 679 u64 ret;
baf69ca8 680
d492a29d 681 ret = mul_u32_u32(pixel_rate, cpp * latency);
baf69ca8
VS
682 ret = DIV_ROUND_UP_ULL(ret, 10000);
683
684 return ret;
685}
686
687/**
688 * intel_wm_method2 - Method 2 / "large buffer" watermark formula
689 * @pixel_rate: Pipe pixel rate in kHz
690 * @htotal: Pipe horizontal total
691 * @width: Plane width in pixels
692 * @cpp: Plane bytes per pixel
693 * @latency: Memory wakeup latency in 0.1us units
694 *
695 * Compute the watermark using the method 2 or "large buffer"
696 * formula. The caller may additonally add extra cachelines
697 * to account for TLB misses and clock crossings.
698 *
699 * This method is concerned with the long term drain rate
700 * of the FIFO, ie. it does account for blanking periods
701 * which effectively reduce the average drain rate across
702 * a longer period. The name "large" refers to the fact the
703 * FIFO is relatively large compared to the amount of data
704 * fetched.
705 *
706 * The FIFO level vs. time graph might look something like:
707 *
708 * |\___ |\___
709 * | \___ | \___
710 * | \ | \
711 * __ --__--__--__--__--__--__ (- plane active, _ blanking)
712 * -> time
713 *
714 * Returns:
715 * The watermark in bytes
716 */
717static unsigned int intel_wm_method2(unsigned int pixel_rate,
718 unsigned int htotal,
719 unsigned int width,
720 unsigned int cpp,
721 unsigned int latency)
722{
723 unsigned int ret;
724
725 /*
726 * FIXME remove once all users are computing
727 * watermarks in the correct place.
728 */
729 if (WARN_ON_ONCE(htotal == 0))
730 htotal = 1;
731
732 ret = (latency * pixel_rate) / (htotal * 10000);
733 ret = (ret + 1) * width * cpp;
734
735 return ret;
736}
737
b445e3b0
ED
738/**
739 * intel_calculate_wm - calculate watermark level
baf69ca8 740 * @pixel_rate: pixel clock
b445e3b0 741 * @wm: chip FIFO params
31383410 742 * @fifo_size: size of the FIFO buffer
ac484963 743 * @cpp: bytes per pixel
b445e3b0
ED
744 * @latency_ns: memory latency for the platform
745 *
746 * Calculate the watermark level (the level at which the display plane will
747 * start fetching from memory again). Each chip has a different display
748 * FIFO size and allocation, so the caller needs to figure that out and pass
749 * in the correct intel_watermark_params structure.
750 *
751 * As the pixel clock runs, the FIFO will be drained at a rate that depends
752 * on the pixel size. When it reaches the watermark level, it'll start
753 * fetching FIFO line sized based chunks from memory until the FIFO fills
754 * past the watermark point. If the FIFO drains completely, a FIFO underrun
755 * will occur, and a display engine hang could result.
756 */
baf69ca8
VS
757static unsigned int intel_calculate_wm(int pixel_rate,
758 const struct intel_watermark_params *wm,
759 int fifo_size, int cpp,
760 unsigned int latency_ns)
b445e3b0 761{
baf69ca8 762 int entries, wm_size;
b445e3b0
ED
763
764 /*
765 * Note: we need to make sure we don't overflow for various clock &
766 * latency values.
767 * clocks go from a few thousand to several hundred thousand.
768 * latency is usually a few thousand
769 */
baf69ca8
VS
770 entries = intel_wm_method1(pixel_rate, cpp,
771 latency_ns / 100);
772 entries = DIV_ROUND_UP(entries, wm->cacheline_size) +
773 wm->guard_size;
774 DRM_DEBUG_KMS("FIFO entries required for mode: %d\n", entries);
b445e3b0 775
baf69ca8
VS
776 wm_size = fifo_size - entries;
777 DRM_DEBUG_KMS("FIFO watermark level: %d\n", wm_size);
b445e3b0
ED
778
779 /* Don't promote wm_size to unsigned... */
baf69ca8 780 if (wm_size > wm->max_wm)
b445e3b0
ED
781 wm_size = wm->max_wm;
782 if (wm_size <= 0)
783 wm_size = wm->default_wm;
d6feb196
VS
784
785 /*
786 * Bspec seems to indicate that the value shouldn't be lower than
787 * 'burst size + 1'. Certainly 830 is quite unhappy with low values.
788 * Lets go for 8 which is the burst size since certain platforms
789 * already use a hardcoded 8 (which is what the spec says should be
790 * done).
791 */
792 if (wm_size <= 8)
793 wm_size = 8;
794
b445e3b0
ED
795 return wm_size;
796}
797
04548cba
VS
798static bool is_disabling(int old, int new, int threshold)
799{
800 return old >= threshold && new < threshold;
801}
802
803static bool is_enabling(int old, int new, int threshold)
804{
805 return old < threshold && new >= threshold;
806}
807
6d5019b6
VS
808static int intel_wm_num_levels(struct drm_i915_private *dev_priv)
809{
810 return dev_priv->wm.max_level + 1;
811}
812
24304d81
VS
813static bool intel_wm_plane_visible(const struct intel_crtc_state *crtc_state,
814 const struct intel_plane_state *plane_state)
815{
816 struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
817
818 /* FIXME check the 'enable' instead */
819 if (!crtc_state->base.active)
820 return false;
821
822 /*
823 * Treat cursor with fb as always visible since cursor updates
824 * can happen faster than the vrefresh rate, and the current
825 * watermark code doesn't handle that correctly. Cursor updates
826 * which set/clear the fb or change the cursor size are going
827 * to get throttled by intel_legacy_cursor_update() to work
828 * around this problem with the watermark code.
829 */
830 if (plane->id == PLANE_CURSOR)
831 return plane_state->base.fb != NULL;
832 else
833 return plane_state->base.visible;
834}
835
ffc7a76b 836static struct intel_crtc *single_enabled_crtc(struct drm_i915_private *dev_priv)
b445e3b0 837{
efc2611e 838 struct intel_crtc *crtc, *enabled = NULL;
b445e3b0 839
ffc7a76b 840 for_each_intel_crtc(&dev_priv->drm, crtc) {
efc2611e 841 if (intel_crtc_active(crtc)) {
b445e3b0
ED
842 if (enabled)
843 return NULL;
844 enabled = crtc;
845 }
846 }
847
848 return enabled;
849}
850
432081bc 851static void pineview_update_wm(struct intel_crtc *unused_crtc)
b445e3b0 852{
ffc7a76b 853 struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev);
efc2611e 854 struct intel_crtc *crtc;
b445e3b0
ED
855 const struct cxsr_latency *latency;
856 u32 reg;
baf69ca8 857 unsigned int wm;
b445e3b0 858
86d35d4e 859 latency = intel_get_cxsr_latency(!IS_MOBILE(dev_priv),
50a0bc90
TU
860 dev_priv->is_ddr3,
861 dev_priv->fsb_freq,
862 dev_priv->mem_freq);
b445e3b0
ED
863 if (!latency) {
864 DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n");
5209b1f4 865 intel_set_memory_cxsr(dev_priv, false);
b445e3b0
ED
866 return;
867 }
868
ffc7a76b 869 crtc = single_enabled_crtc(dev_priv);
b445e3b0 870 if (crtc) {
efc2611e
VS
871 const struct drm_display_mode *adjusted_mode =
872 &crtc->config->base.adjusted_mode;
873 const struct drm_framebuffer *fb =
874 crtc->base.primary->state->fb;
353c8598 875 int cpp = fb->format->cpp[0];
7c5f93b0 876 int clock = adjusted_mode->crtc_clock;
b445e3b0
ED
877
878 /* Display SR */
879 wm = intel_calculate_wm(clock, &pineview_display_wm,
880 pineview_display_wm.fifo_size,
ac484963 881 cpp, latency->display_sr);
b445e3b0
ED
882 reg = I915_READ(DSPFW1);
883 reg &= ~DSPFW_SR_MASK;
f4998963 884 reg |= FW_WM(wm, SR);
b445e3b0
ED
885 I915_WRITE(DSPFW1, reg);
886 DRM_DEBUG_KMS("DSPFW1 register is %x\n", reg);
887
888 /* cursor SR */
889 wm = intel_calculate_wm(clock, &pineview_cursor_wm,
890 pineview_display_wm.fifo_size,
99834b14 891 4, latency->cursor_sr);
b445e3b0
ED
892 reg = I915_READ(DSPFW3);
893 reg &= ~DSPFW_CURSOR_SR_MASK;
f4998963 894 reg |= FW_WM(wm, CURSOR_SR);
b445e3b0
ED
895 I915_WRITE(DSPFW3, reg);
896
897 /* Display HPLL off SR */
898 wm = intel_calculate_wm(clock, &pineview_display_hplloff_wm,
899 pineview_display_hplloff_wm.fifo_size,
ac484963 900 cpp, latency->display_hpll_disable);
b445e3b0
ED
901 reg = I915_READ(DSPFW3);
902 reg &= ~DSPFW_HPLL_SR_MASK;
f4998963 903 reg |= FW_WM(wm, HPLL_SR);
b445e3b0
ED
904 I915_WRITE(DSPFW3, reg);
905
906 /* cursor HPLL off SR */
907 wm = intel_calculate_wm(clock, &pineview_cursor_hplloff_wm,
908 pineview_display_hplloff_wm.fifo_size,
99834b14 909 4, latency->cursor_hpll_disable);
b445e3b0
ED
910 reg = I915_READ(DSPFW3);
911 reg &= ~DSPFW_HPLL_CURSOR_MASK;
f4998963 912 reg |= FW_WM(wm, HPLL_CURSOR);
b445e3b0
ED
913 I915_WRITE(DSPFW3, reg);
914 DRM_DEBUG_KMS("DSPFW3 register is %x\n", reg);
915
5209b1f4 916 intel_set_memory_cxsr(dev_priv, true);
b445e3b0 917 } else {
5209b1f4 918 intel_set_memory_cxsr(dev_priv, false);
b445e3b0
ED
919 }
920}
921
0f95ff85
VS
922/*
923 * Documentation says:
924 * "If the line size is small, the TLB fetches can get in the way of the
925 * data fetches, causing some lag in the pixel data return which is not
926 * accounted for in the above formulas. The following adjustment only
927 * needs to be applied if eight whole lines fit in the buffer at once.
928 * The WM is adjusted upwards by the difference between the FIFO size
929 * and the size of 8 whole lines. This adjustment is always performed
930 * in the actual pixel depth regardless of whether FBC is enabled or not."
931 */
1a1f1287 932static unsigned int g4x_tlb_miss_wa(int fifo_size, int width, int cpp)
0f95ff85
VS
933{
934 int tlb_miss = fifo_size * 64 - width * cpp * 8;
935
936 return max(0, tlb_miss);
937}
938
04548cba
VS
939static void g4x_write_wm_values(struct drm_i915_private *dev_priv,
940 const struct g4x_wm_values *wm)
b445e3b0 941{
e93329a5
VS
942 enum pipe pipe;
943
944 for_each_pipe(dev_priv, pipe)
945 trace_g4x_wm(intel_get_crtc_for_pipe(dev_priv, pipe), wm);
946
04548cba
VS
947 I915_WRITE(DSPFW1,
948 FW_WM(wm->sr.plane, SR) |
949 FW_WM(wm->pipe[PIPE_B].plane[PLANE_CURSOR], CURSORB) |
950 FW_WM(wm->pipe[PIPE_B].plane[PLANE_PRIMARY], PLANEB) |
951 FW_WM(wm->pipe[PIPE_A].plane[PLANE_PRIMARY], PLANEA));
952 I915_WRITE(DSPFW2,
953 (wm->fbc_en ? DSPFW_FBC_SR_EN : 0) |
954 FW_WM(wm->sr.fbc, FBC_SR) |
955 FW_WM(wm->hpll.fbc, FBC_HPLL_SR) |
956 FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE0], SPRITEB) |
957 FW_WM(wm->pipe[PIPE_A].plane[PLANE_CURSOR], CURSORA) |
958 FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE0], SPRITEA));
959 I915_WRITE(DSPFW3,
960 (wm->hpll_en ? DSPFW_HPLL_SR_EN : 0) |
961 FW_WM(wm->sr.cursor, CURSOR_SR) |
962 FW_WM(wm->hpll.cursor, HPLL_CURSOR) |
963 FW_WM(wm->hpll.plane, HPLL_SR));
b445e3b0 964
04548cba 965 POSTING_READ(DSPFW1);
b445e3b0
ED
966}
967
15665979
VS
968#define FW_WM_VLV(value, plane) \
969 (((value) << DSPFW_ ## plane ## _SHIFT) & DSPFW_ ## plane ## _MASK_VLV)
970
50f4caef 971static void vlv_write_wm_values(struct drm_i915_private *dev_priv,
0018fda1
VS
972 const struct vlv_wm_values *wm)
973{
50f4caef
VS
974 enum pipe pipe;
975
976 for_each_pipe(dev_priv, pipe) {
c137d660
VS
977 trace_vlv_wm(intel_get_crtc_for_pipe(dev_priv, pipe), wm);
978
50f4caef
VS
979 I915_WRITE(VLV_DDL(pipe),
980 (wm->ddl[pipe].plane[PLANE_CURSOR] << DDL_CURSOR_SHIFT) |
981 (wm->ddl[pipe].plane[PLANE_SPRITE1] << DDL_SPRITE_SHIFT(1)) |
982 (wm->ddl[pipe].plane[PLANE_SPRITE0] << DDL_SPRITE_SHIFT(0)) |
983 (wm->ddl[pipe].plane[PLANE_PRIMARY] << DDL_PLANE_SHIFT));
984 }
0018fda1 985
6fe6a7ff
VS
986 /*
987 * Zero the (unused) WM1 watermarks, and also clear all the
988 * high order bits so that there are no out of bounds values
989 * present in the registers during the reprogramming.
990 */
991 I915_WRITE(DSPHOWM, 0);
992 I915_WRITE(DSPHOWM1, 0);
993 I915_WRITE(DSPFW4, 0);
994 I915_WRITE(DSPFW5, 0);
995 I915_WRITE(DSPFW6, 0);
996
ae80152d 997 I915_WRITE(DSPFW1,
15665979 998 FW_WM(wm->sr.plane, SR) |
1b31389c
VS
999 FW_WM(wm->pipe[PIPE_B].plane[PLANE_CURSOR], CURSORB) |
1000 FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_PRIMARY], PLANEB) |
1001 FW_WM_VLV(wm->pipe[PIPE_A].plane[PLANE_PRIMARY], PLANEA));
ae80152d 1002 I915_WRITE(DSPFW2,
1b31389c
VS
1003 FW_WM_VLV(wm->pipe[PIPE_A].plane[PLANE_SPRITE1], SPRITEB) |
1004 FW_WM(wm->pipe[PIPE_A].plane[PLANE_CURSOR], CURSORA) |
1005 FW_WM_VLV(wm->pipe[PIPE_A].plane[PLANE_SPRITE0], SPRITEA));
ae80152d 1006 I915_WRITE(DSPFW3,
15665979 1007 FW_WM(wm->sr.cursor, CURSOR_SR));
ae80152d
VS
1008
1009 if (IS_CHERRYVIEW(dev_priv)) {
1010 I915_WRITE(DSPFW7_CHV,
1b31389c
VS
1011 FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_SPRITE1], SPRITED) |
1012 FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_SPRITE0], SPRITEC));
ae80152d 1013 I915_WRITE(DSPFW8_CHV,
1b31389c
VS
1014 FW_WM_VLV(wm->pipe[PIPE_C].plane[PLANE_SPRITE1], SPRITEF) |
1015 FW_WM_VLV(wm->pipe[PIPE_C].plane[PLANE_SPRITE0], SPRITEE));
ae80152d 1016 I915_WRITE(DSPFW9_CHV,
1b31389c
VS
1017 FW_WM_VLV(wm->pipe[PIPE_C].plane[PLANE_PRIMARY], PLANEC) |
1018 FW_WM(wm->pipe[PIPE_C].plane[PLANE_CURSOR], CURSORC));
ae80152d 1019 I915_WRITE(DSPHOWM,
15665979 1020 FW_WM(wm->sr.plane >> 9, SR_HI) |
1b31389c
VS
1021 FW_WM(wm->pipe[PIPE_C].plane[PLANE_SPRITE1] >> 8, SPRITEF_HI) |
1022 FW_WM(wm->pipe[PIPE_C].plane[PLANE_SPRITE0] >> 8, SPRITEE_HI) |
1023 FW_WM(wm->pipe[PIPE_C].plane[PLANE_PRIMARY] >> 8, PLANEC_HI) |
1024 FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE1] >> 8, SPRITED_HI) |
1025 FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE0] >> 8, SPRITEC_HI) |
1026 FW_WM(wm->pipe[PIPE_B].plane[PLANE_PRIMARY] >> 8, PLANEB_HI) |
1027 FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE1] >> 8, SPRITEB_HI) |
1028 FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE0] >> 8, SPRITEA_HI) |
1029 FW_WM(wm->pipe[PIPE_A].plane[PLANE_PRIMARY] >> 8, PLANEA_HI));
ae80152d
VS
1030 } else {
1031 I915_WRITE(DSPFW7,
1b31389c
VS
1032 FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_SPRITE1], SPRITED) |
1033 FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_SPRITE0], SPRITEC));
ae80152d 1034 I915_WRITE(DSPHOWM,
15665979 1035 FW_WM(wm->sr.plane >> 9, SR_HI) |
1b31389c
VS
1036 FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE1] >> 8, SPRITED_HI) |
1037 FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE0] >> 8, SPRITEC_HI) |
1038 FW_WM(wm->pipe[PIPE_B].plane[PLANE_PRIMARY] >> 8, PLANEB_HI) |
1039 FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE1] >> 8, SPRITEB_HI) |
1040 FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE0] >> 8, SPRITEA_HI) |
1041 FW_WM(wm->pipe[PIPE_A].plane[PLANE_PRIMARY] >> 8, PLANEA_HI));
ae80152d
VS
1042 }
1043
1044 POSTING_READ(DSPFW1);
0018fda1
VS
1045}
1046
15665979
VS
1047#undef FW_WM_VLV
1048
04548cba
VS
1049static void g4x_setup_wm_latency(struct drm_i915_private *dev_priv)
1050{
1051 /* all latencies in usec */
1052 dev_priv->wm.pri_latency[G4X_WM_LEVEL_NORMAL] = 5;
1053 dev_priv->wm.pri_latency[G4X_WM_LEVEL_SR] = 12;
79d94306 1054 dev_priv->wm.pri_latency[G4X_WM_LEVEL_HPLL] = 35;
04548cba 1055
79d94306 1056 dev_priv->wm.max_level = G4X_WM_LEVEL_HPLL;
04548cba
VS
1057}
1058
1059static int g4x_plane_fifo_size(enum plane_id plane_id, int level)
1060{
1061 /*
1062 * DSPCNTR[13] supposedly controls whether the
1063 * primary plane can use the FIFO space otherwise
1064 * reserved for the sprite plane. It's not 100% clear
1065 * what the actual FIFO size is, but it looks like we
1066 * can happily set both primary and sprite watermarks
1067 * up to 127 cachelines. So that would seem to mean
1068 * that either DSPCNTR[13] doesn't do anything, or that
1069 * the total FIFO is >= 256 cachelines in size. Either
1070 * way, we don't seem to have to worry about this
1071 * repartitioning as the maximum watermark value the
1072 * register can hold for each plane is lower than the
1073 * minimum FIFO size.
1074 */
1075 switch (plane_id) {
1076 case PLANE_CURSOR:
1077 return 63;
1078 case PLANE_PRIMARY:
1079 return level == G4X_WM_LEVEL_NORMAL ? 127 : 511;
1080 case PLANE_SPRITE0:
1081 return level == G4X_WM_LEVEL_NORMAL ? 127 : 0;
1082 default:
1083 MISSING_CASE(plane_id);
1084 return 0;
1085 }
1086}
1087
1088static int g4x_fbc_fifo_size(int level)
1089{
1090 switch (level) {
1091 case G4X_WM_LEVEL_SR:
1092 return 7;
1093 case G4X_WM_LEVEL_HPLL:
1094 return 15;
1095 default:
1096 MISSING_CASE(level);
1097 return 0;
1098 }
1099}
1100
5ce9a649
JN
1101static u16 g4x_compute_wm(const struct intel_crtc_state *crtc_state,
1102 const struct intel_plane_state *plane_state,
1103 int level)
04548cba
VS
1104{
1105 struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
1106 struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
1107 const struct drm_display_mode *adjusted_mode =
1108 &crtc_state->base.adjusted_mode;
1a1f1287
CW
1109 unsigned int latency = dev_priv->wm.pri_latency[level] * 10;
1110 unsigned int clock, htotal, cpp, width, wm;
04548cba
VS
1111
1112 if (latency == 0)
1113 return USHRT_MAX;
1114
1115 if (!intel_wm_plane_visible(crtc_state, plane_state))
1116 return 0;
1117
1118 /*
1119 * Not 100% sure which way ELK should go here as the
1120 * spec only says CL/CTG should assume 32bpp and BW
1121 * doesn't need to. But as these things followed the
1122 * mobile vs. desktop lines on gen3 as well, let's
1123 * assume ELK doesn't need this.
1124 *
1125 * The spec also fails to list such a restriction for
1126 * the HPLL watermark, which seems a little strange.
1127 * Let's use 32bpp for the HPLL watermark as well.
1128 */
1129 if (IS_GM45(dev_priv) && plane->id == PLANE_PRIMARY &&
1130 level != G4X_WM_LEVEL_NORMAL)
1131 cpp = 4;
1132 else
1133 cpp = plane_state->base.fb->format->cpp[0];
1134
1135 clock = adjusted_mode->crtc_clock;
1136 htotal = adjusted_mode->crtc_htotal;
1137
1138 if (plane->id == PLANE_CURSOR)
1139 width = plane_state->base.crtc_w;
1140 else
1141 width = drm_rect_width(&plane_state->base.dst);
1142
1143 if (plane->id == PLANE_CURSOR) {
1144 wm = intel_wm_method2(clock, htotal, width, cpp, latency);
1145 } else if (plane->id == PLANE_PRIMARY &&
1146 level == G4X_WM_LEVEL_NORMAL) {
1147 wm = intel_wm_method1(clock, cpp, latency);
1148 } else {
1a1f1287 1149 unsigned int small, large;
04548cba
VS
1150
1151 small = intel_wm_method1(clock, cpp, latency);
1152 large = intel_wm_method2(clock, htotal, width, cpp, latency);
1153
1154 wm = min(small, large);
1155 }
1156
1157 wm += g4x_tlb_miss_wa(g4x_plane_fifo_size(plane->id, level),
1158 width, cpp);
1159
1160 wm = DIV_ROUND_UP(wm, 64) + 2;
1161
1a1f1287 1162 return min_t(unsigned int, wm, USHRT_MAX);
04548cba
VS
1163}
1164
1165static bool g4x_raw_plane_wm_set(struct intel_crtc_state *crtc_state,
1166 int level, enum plane_id plane_id, u16 value)
1167{
1168 struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1169 bool dirty = false;
1170
1171 for (; level < intel_wm_num_levels(dev_priv); level++) {
1172 struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level];
1173
1174 dirty |= raw->plane[plane_id] != value;
1175 raw->plane[plane_id] = value;
1176 }
1177
1178 return dirty;
1179}
1180
1181static bool g4x_raw_fbc_wm_set(struct intel_crtc_state *crtc_state,
1182 int level, u16 value)
1183{
1184 struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1185 bool dirty = false;
1186
1187 /* NORMAL level doesn't have an FBC watermark */
1188 level = max(level, G4X_WM_LEVEL_SR);
1189
1190 for (; level < intel_wm_num_levels(dev_priv); level++) {
1191 struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level];
1192
1193 dirty |= raw->fbc != value;
1194 raw->fbc = value;
1195 }
1196
1197 return dirty;
1198}
1199
5ce9a649
JN
1200static u32 ilk_compute_fbc_wm(const struct intel_crtc_state *cstate,
1201 const struct intel_plane_state *pstate,
1202 u32 pri_val);
04548cba
VS
1203
1204static bool g4x_raw_plane_wm_compute(struct intel_crtc_state *crtc_state,
1205 const struct intel_plane_state *plane_state)
1206{
1207 struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
1208 int num_levels = intel_wm_num_levels(to_i915(plane->base.dev));
1209 enum plane_id plane_id = plane->id;
1210 bool dirty = false;
1211 int level;
1212
1213 if (!intel_wm_plane_visible(crtc_state, plane_state)) {
1214 dirty |= g4x_raw_plane_wm_set(crtc_state, 0, plane_id, 0);
1215 if (plane_id == PLANE_PRIMARY)
1216 dirty |= g4x_raw_fbc_wm_set(crtc_state, 0, 0);
1217 goto out;
1218 }
1219
1220 for (level = 0; level < num_levels; level++) {
1221 struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level];
1222 int wm, max_wm;
1223
1224 wm = g4x_compute_wm(crtc_state, plane_state, level);
1225 max_wm = g4x_plane_fifo_size(plane_id, level);
1226
1227 if (wm > max_wm)
1228 break;
1229
1230 dirty |= raw->plane[plane_id] != wm;
1231 raw->plane[plane_id] = wm;
1232
1233 if (plane_id != PLANE_PRIMARY ||
1234 level == G4X_WM_LEVEL_NORMAL)
1235 continue;
1236
1237 wm = ilk_compute_fbc_wm(crtc_state, plane_state,
1238 raw->plane[plane_id]);
1239 max_wm = g4x_fbc_fifo_size(level);
1240
1241 /*
1242 * FBC wm is not mandatory as we
1243 * can always just disable its use.
1244 */
1245 if (wm > max_wm)
1246 wm = USHRT_MAX;
1247
1248 dirty |= raw->fbc != wm;
1249 raw->fbc = wm;
1250 }
1251
1252 /* mark watermarks as invalid */
1253 dirty |= g4x_raw_plane_wm_set(crtc_state, level, plane_id, USHRT_MAX);
1254
1255 if (plane_id == PLANE_PRIMARY)
1256 dirty |= g4x_raw_fbc_wm_set(crtc_state, level, USHRT_MAX);
1257
1258 out:
1259 if (dirty) {
1260 DRM_DEBUG_KMS("%s watermarks: normal=%d, SR=%d, HPLL=%d\n",
1261 plane->base.name,
1262 crtc_state->wm.g4x.raw[G4X_WM_LEVEL_NORMAL].plane[plane_id],
1263 crtc_state->wm.g4x.raw[G4X_WM_LEVEL_SR].plane[plane_id],
1264 crtc_state->wm.g4x.raw[G4X_WM_LEVEL_HPLL].plane[plane_id]);
1265
1266 if (plane_id == PLANE_PRIMARY)
1267 DRM_DEBUG_KMS("FBC watermarks: SR=%d, HPLL=%d\n",
1268 crtc_state->wm.g4x.raw[G4X_WM_LEVEL_SR].fbc,
1269 crtc_state->wm.g4x.raw[G4X_WM_LEVEL_HPLL].fbc);
1270 }
1271
1272 return dirty;
1273}
1274
1275static bool g4x_raw_plane_wm_is_valid(const struct intel_crtc_state *crtc_state,
1276 enum plane_id plane_id, int level)
1277{
1278 const struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level];
1279
1280 return raw->plane[plane_id] <= g4x_plane_fifo_size(plane_id, level);
1281}
1282
1283static bool g4x_raw_crtc_wm_is_valid(const struct intel_crtc_state *crtc_state,
1284 int level)
1285{
1286 struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1287
1288 if (level > dev_priv->wm.max_level)
1289 return false;
1290
1291 return g4x_raw_plane_wm_is_valid(crtc_state, PLANE_PRIMARY, level) &&
1292 g4x_raw_plane_wm_is_valid(crtc_state, PLANE_SPRITE0, level) &&
1293 g4x_raw_plane_wm_is_valid(crtc_state, PLANE_CURSOR, level);
1294}
1295
1296/* mark all levels starting from 'level' as invalid */
1297static void g4x_invalidate_wms(struct intel_crtc *crtc,
1298 struct g4x_wm_state *wm_state, int level)
1299{
1300 if (level <= G4X_WM_LEVEL_NORMAL) {
1301 enum plane_id plane_id;
1302
1303 for_each_plane_id_on_crtc(crtc, plane_id)
1304 wm_state->wm.plane[plane_id] = USHRT_MAX;
1305 }
1306
1307 if (level <= G4X_WM_LEVEL_SR) {
1308 wm_state->cxsr = false;
1309 wm_state->sr.cursor = USHRT_MAX;
1310 wm_state->sr.plane = USHRT_MAX;
1311 wm_state->sr.fbc = USHRT_MAX;
1312 }
1313
1314 if (level <= G4X_WM_LEVEL_HPLL) {
1315 wm_state->hpll_en = false;
1316 wm_state->hpll.cursor = USHRT_MAX;
1317 wm_state->hpll.plane = USHRT_MAX;
1318 wm_state->hpll.fbc = USHRT_MAX;
1319 }
1320}
1321
1322static int g4x_compute_pipe_wm(struct intel_crtc_state *crtc_state)
1323{
1324 struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
1325 struct intel_atomic_state *state =
1326 to_intel_atomic_state(crtc_state->base.state);
1327 struct g4x_wm_state *wm_state = &crtc_state->wm.g4x.optimal;
1328 int num_active_planes = hweight32(crtc_state->active_planes &
1329 ~BIT(PLANE_CURSOR));
1330 const struct g4x_pipe_wm *raw;
7b510451
VS
1331 const struct intel_plane_state *old_plane_state;
1332 const struct intel_plane_state *new_plane_state;
04548cba
VS
1333 struct intel_plane *plane;
1334 enum plane_id plane_id;
1335 int i, level;
1336 unsigned int dirty = 0;
1337
7b510451
VS
1338 for_each_oldnew_intel_plane_in_state(state, plane,
1339 old_plane_state,
1340 new_plane_state, i) {
1341 if (new_plane_state->base.crtc != &crtc->base &&
04548cba
VS
1342 old_plane_state->base.crtc != &crtc->base)
1343 continue;
1344
7b510451 1345 if (g4x_raw_plane_wm_compute(crtc_state, new_plane_state))
04548cba
VS
1346 dirty |= BIT(plane->id);
1347 }
1348
1349 if (!dirty)
1350 return 0;
1351
1352 level = G4X_WM_LEVEL_NORMAL;
1353 if (!g4x_raw_crtc_wm_is_valid(crtc_state, level))
1354 goto out;
1355
1356 raw = &crtc_state->wm.g4x.raw[level];
1357 for_each_plane_id_on_crtc(crtc, plane_id)
1358 wm_state->wm.plane[plane_id] = raw->plane[plane_id];
1359
1360 level = G4X_WM_LEVEL_SR;
1361
1362 if (!g4x_raw_crtc_wm_is_valid(crtc_state, level))
1363 goto out;
1364
1365 raw = &crtc_state->wm.g4x.raw[level];
1366 wm_state->sr.plane = raw->plane[PLANE_PRIMARY];
1367 wm_state->sr.cursor = raw->plane[PLANE_CURSOR];
1368 wm_state->sr.fbc = raw->fbc;
1369
1370 wm_state->cxsr = num_active_planes == BIT(PLANE_PRIMARY);
1371
1372 level = G4X_WM_LEVEL_HPLL;
1373
1374 if (!g4x_raw_crtc_wm_is_valid(crtc_state, level))
1375 goto out;
1376
1377 raw = &crtc_state->wm.g4x.raw[level];
1378 wm_state->hpll.plane = raw->plane[PLANE_PRIMARY];
1379 wm_state->hpll.cursor = raw->plane[PLANE_CURSOR];
1380 wm_state->hpll.fbc = raw->fbc;
1381
1382 wm_state->hpll_en = wm_state->cxsr;
1383
1384 level++;
1385
1386 out:
1387 if (level == G4X_WM_LEVEL_NORMAL)
1388 return -EINVAL;
1389
1390 /* invalidate the higher levels */
1391 g4x_invalidate_wms(crtc, wm_state, level);
1392
1393 /*
1394 * Determine if the FBC watermark(s) can be used. IF
1395 * this isn't the case we prefer to disable the FBC
1396 ( watermark(s) rather than disable the SR/HPLL
1397 * level(s) entirely.
1398 */
1399 wm_state->fbc_en = level > G4X_WM_LEVEL_NORMAL;
1400
1401 if (level >= G4X_WM_LEVEL_SR &&
1402 wm_state->sr.fbc > g4x_fbc_fifo_size(G4X_WM_LEVEL_SR))
1403 wm_state->fbc_en = false;
1404 else if (level >= G4X_WM_LEVEL_HPLL &&
1405 wm_state->hpll.fbc > g4x_fbc_fifo_size(G4X_WM_LEVEL_HPLL))
1406 wm_state->fbc_en = false;
1407
1408 return 0;
1409}
1410
cd1d3ee9 1411static int g4x_compute_intermediate_wm(struct intel_crtc_state *new_crtc_state)
04548cba 1412{
cd1d3ee9 1413 struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc);
248c2435
ML
1414 struct g4x_wm_state *intermediate = &new_crtc_state->wm.g4x.intermediate;
1415 const struct g4x_wm_state *optimal = &new_crtc_state->wm.g4x.optimal;
1416 struct intel_atomic_state *intel_state =
1417 to_intel_atomic_state(new_crtc_state->base.state);
1418 const struct intel_crtc_state *old_crtc_state =
1419 intel_atomic_get_old_crtc_state(intel_state, crtc);
1420 const struct g4x_wm_state *active = &old_crtc_state->wm.g4x.optimal;
04548cba
VS
1421 enum plane_id plane_id;
1422
248c2435
ML
1423 if (!new_crtc_state->base.active || drm_atomic_crtc_needs_modeset(&new_crtc_state->base)) {
1424 *intermediate = *optimal;
1425
1426 intermediate->cxsr = false;
1427 intermediate->hpll_en = false;
1428 goto out;
1429 }
1430
04548cba 1431 intermediate->cxsr = optimal->cxsr && active->cxsr &&
248c2435 1432 !new_crtc_state->disable_cxsr;
04548cba 1433 intermediate->hpll_en = optimal->hpll_en && active->hpll_en &&
248c2435 1434 !new_crtc_state->disable_cxsr;
04548cba
VS
1435 intermediate->fbc_en = optimal->fbc_en && active->fbc_en;
1436
1437 for_each_plane_id_on_crtc(crtc, plane_id) {
1438 intermediate->wm.plane[plane_id] =
1439 max(optimal->wm.plane[plane_id],
1440 active->wm.plane[plane_id]);
1441
1442 WARN_ON(intermediate->wm.plane[plane_id] >
1443 g4x_plane_fifo_size(plane_id, G4X_WM_LEVEL_NORMAL));
1444 }
1445
1446 intermediate->sr.plane = max(optimal->sr.plane,
1447 active->sr.plane);
1448 intermediate->sr.cursor = max(optimal->sr.cursor,
1449 active->sr.cursor);
1450 intermediate->sr.fbc = max(optimal->sr.fbc,
1451 active->sr.fbc);
1452
1453 intermediate->hpll.plane = max(optimal->hpll.plane,
1454 active->hpll.plane);
1455 intermediate->hpll.cursor = max(optimal->hpll.cursor,
1456 active->hpll.cursor);
1457 intermediate->hpll.fbc = max(optimal->hpll.fbc,
1458 active->hpll.fbc);
1459
1460 WARN_ON((intermediate->sr.plane >
1461 g4x_plane_fifo_size(PLANE_PRIMARY, G4X_WM_LEVEL_SR) ||
1462 intermediate->sr.cursor >
1463 g4x_plane_fifo_size(PLANE_CURSOR, G4X_WM_LEVEL_SR)) &&
1464 intermediate->cxsr);
1465 WARN_ON((intermediate->sr.plane >
1466 g4x_plane_fifo_size(PLANE_PRIMARY, G4X_WM_LEVEL_HPLL) ||
1467 intermediate->sr.cursor >
1468 g4x_plane_fifo_size(PLANE_CURSOR, G4X_WM_LEVEL_HPLL)) &&
1469 intermediate->hpll_en);
1470
1471 WARN_ON(intermediate->sr.fbc > g4x_fbc_fifo_size(1) &&
1472 intermediate->fbc_en && intermediate->cxsr);
1473 WARN_ON(intermediate->hpll.fbc > g4x_fbc_fifo_size(2) &&
1474 intermediate->fbc_en && intermediate->hpll_en);
1475
248c2435 1476out:
04548cba
VS
1477 /*
1478 * If our intermediate WM are identical to the final WM, then we can
1479 * omit the post-vblank programming; only update if it's different.
1480 */
1481 if (memcmp(intermediate, optimal, sizeof(*intermediate)) != 0)
248c2435 1482 new_crtc_state->wm.need_postvbl_update = true;
04548cba
VS
1483
1484 return 0;
1485}
1486
1487static void g4x_merge_wm(struct drm_i915_private *dev_priv,
1488 struct g4x_wm_values *wm)
1489{
1490 struct intel_crtc *crtc;
1491 int num_active_crtcs = 0;
1492
1493 wm->cxsr = true;
1494 wm->hpll_en = true;
1495 wm->fbc_en = true;
1496
1497 for_each_intel_crtc(&dev_priv->drm, crtc) {
1498 const struct g4x_wm_state *wm_state = &crtc->wm.active.g4x;
1499
1500 if (!crtc->active)
1501 continue;
1502
1503 if (!wm_state->cxsr)
1504 wm->cxsr = false;
1505 if (!wm_state->hpll_en)
1506 wm->hpll_en = false;
1507 if (!wm_state->fbc_en)
1508 wm->fbc_en = false;
1509
1510 num_active_crtcs++;
1511 }
1512
1513 if (num_active_crtcs != 1) {
1514 wm->cxsr = false;
1515 wm->hpll_en = false;
1516 wm->fbc_en = false;
1517 }
1518
1519 for_each_intel_crtc(&dev_priv->drm, crtc) {
1520 const struct g4x_wm_state *wm_state = &crtc->wm.active.g4x;
1521 enum pipe pipe = crtc->pipe;
1522
1523 wm->pipe[pipe] = wm_state->wm;
1524 if (crtc->active && wm->cxsr)
1525 wm->sr = wm_state->sr;
1526 if (crtc->active && wm->hpll_en)
1527 wm->hpll = wm_state->hpll;
1528 }
1529}
1530
1531static void g4x_program_watermarks(struct drm_i915_private *dev_priv)
1532{
1533 struct g4x_wm_values *old_wm = &dev_priv->wm.g4x;
1534 struct g4x_wm_values new_wm = {};
1535
1536 g4x_merge_wm(dev_priv, &new_wm);
1537
1538 if (memcmp(old_wm, &new_wm, sizeof(new_wm)) == 0)
1539 return;
1540
1541 if (is_disabling(old_wm->cxsr, new_wm.cxsr, true))
1542 _intel_set_memory_cxsr(dev_priv, false);
1543
1544 g4x_write_wm_values(dev_priv, &new_wm);
1545
1546 if (is_enabling(old_wm->cxsr, new_wm.cxsr, true))
1547 _intel_set_memory_cxsr(dev_priv, true);
1548
1549 *old_wm = new_wm;
1550}
1551
1552static void g4x_initial_watermarks(struct intel_atomic_state *state,
1553 struct intel_crtc_state *crtc_state)
1554{
1555 struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1556 struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
1557
1558 mutex_lock(&dev_priv->wm.wm_mutex);
1559 crtc->wm.active.g4x = crtc_state->wm.g4x.intermediate;
1560 g4x_program_watermarks(dev_priv);
1561 mutex_unlock(&dev_priv->wm.wm_mutex);
1562}
1563
1564static void g4x_optimize_watermarks(struct intel_atomic_state *state,
1565 struct intel_crtc_state *crtc_state)
1566{
1567 struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1568 struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc);
1569
1570 if (!crtc_state->wm.need_postvbl_update)
1571 return;
1572
1573 mutex_lock(&dev_priv->wm.wm_mutex);
1574 intel_crtc->wm.active.g4x = crtc_state->wm.g4x.optimal;
1575 g4x_program_watermarks(dev_priv);
1576 mutex_unlock(&dev_priv->wm.wm_mutex);
1577}
1578
262cd2e1
VS
1579/* latency must be in 0.1us units. */
1580static unsigned int vlv_wm_method2(unsigned int pixel_rate,
baf69ca8
VS
1581 unsigned int htotal,
1582 unsigned int width,
ac484963 1583 unsigned int cpp,
262cd2e1
VS
1584 unsigned int latency)
1585{
1586 unsigned int ret;
1587
baf69ca8
VS
1588 ret = intel_wm_method2(pixel_rate, htotal,
1589 width, cpp, latency);
262cd2e1
VS
1590 ret = DIV_ROUND_UP(ret, 64);
1591
1592 return ret;
1593}
1594
bb726519 1595static void vlv_setup_wm_latency(struct drm_i915_private *dev_priv)
262cd2e1 1596{
262cd2e1
VS
1597 /* all latencies in usec */
1598 dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM2] = 3;
1599
58590c14
VS
1600 dev_priv->wm.max_level = VLV_WM_LEVEL_PM2;
1601
262cd2e1
VS
1602 if (IS_CHERRYVIEW(dev_priv)) {
1603 dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM5] = 12;
1604 dev_priv->wm.pri_latency[VLV_WM_LEVEL_DDR_DVFS] = 33;
58590c14
VS
1605
1606 dev_priv->wm.max_level = VLV_WM_LEVEL_DDR_DVFS;
262cd2e1
VS
1607 }
1608}
1609
5ce9a649
JN
1610static u16 vlv_compute_wm_level(const struct intel_crtc_state *crtc_state,
1611 const struct intel_plane_state *plane_state,
1612 int level)
262cd2e1 1613{
e339d67e 1614 struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
262cd2e1 1615 struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
e339d67e
VS
1616 const struct drm_display_mode *adjusted_mode =
1617 &crtc_state->base.adjusted_mode;
1a1f1287 1618 unsigned int clock, htotal, cpp, width, wm;
262cd2e1
VS
1619
1620 if (dev_priv->wm.pri_latency[level] == 0)
1621 return USHRT_MAX;
1622
a07102f1 1623 if (!intel_wm_plane_visible(crtc_state, plane_state))
262cd2e1
VS
1624 return 0;
1625
ef426c10 1626 cpp = plane_state->base.fb->format->cpp[0];
e339d67e
VS
1627 clock = adjusted_mode->crtc_clock;
1628 htotal = adjusted_mode->crtc_htotal;
1629 width = crtc_state->pipe_src_w;
262cd2e1 1630
709f3fc9 1631 if (plane->id == PLANE_CURSOR) {
262cd2e1
VS
1632 /*
1633 * FIXME the formula gives values that are
1634 * too big for the cursor FIFO, and hence we
1635 * would never be able to use cursors. For
1636 * now just hardcode the watermark.
1637 */
1638 wm = 63;
1639 } else {
ac484963 1640 wm = vlv_wm_method2(clock, htotal, width, cpp,
262cd2e1
VS
1641 dev_priv->wm.pri_latency[level] * 10);
1642 }
1643
1a1f1287 1644 return min_t(unsigned int, wm, USHRT_MAX);
262cd2e1
VS
1645}
1646
1a10ae6b
VS
1647static bool vlv_need_sprite0_fifo_workaround(unsigned int active_planes)
1648{
1649 return (active_planes & (BIT(PLANE_SPRITE0) |
1650 BIT(PLANE_SPRITE1))) == BIT(PLANE_SPRITE1);
1651}
1652
5012e604 1653static int vlv_compute_fifo(struct intel_crtc_state *crtc_state)
54f1b6e1 1654{
855c79f5 1655 struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
114d7dc0 1656 const struct g4x_pipe_wm *raw =
5012e604 1657 &crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM2];
814e7f0b 1658 struct vlv_fifo_state *fifo_state = &crtc_state->wm.vlv.fifo_state;
5012e604
VS
1659 unsigned int active_planes = crtc_state->active_planes & ~BIT(PLANE_CURSOR);
1660 int num_active_planes = hweight32(active_planes);
1661 const int fifo_size = 511;
54f1b6e1 1662 int fifo_extra, fifo_left = fifo_size;
1a10ae6b 1663 int sprite0_fifo_extra = 0;
5012e604
VS
1664 unsigned int total_rate;
1665 enum plane_id plane_id;
54f1b6e1 1666
1a10ae6b
VS
1667 /*
1668 * When enabling sprite0 after sprite1 has already been enabled
1669 * we tend to get an underrun unless sprite0 already has some
1670 * FIFO space allcoated. Hence we always allocate at least one
1671 * cacheline for sprite0 whenever sprite1 is enabled.
1672 *
1673 * All other plane enable sequences appear immune to this problem.
1674 */
1675 if (vlv_need_sprite0_fifo_workaround(active_planes))
1676 sprite0_fifo_extra = 1;
1677
5012e604
VS
1678 total_rate = raw->plane[PLANE_PRIMARY] +
1679 raw->plane[PLANE_SPRITE0] +
1a10ae6b
VS
1680 raw->plane[PLANE_SPRITE1] +
1681 sprite0_fifo_extra;
54f1b6e1 1682
5012e604
VS
1683 if (total_rate > fifo_size)
1684 return -EINVAL;
54f1b6e1 1685
5012e604
VS
1686 if (total_rate == 0)
1687 total_rate = 1;
54f1b6e1 1688
5012e604 1689 for_each_plane_id_on_crtc(crtc, plane_id) {
54f1b6e1
VS
1690 unsigned int rate;
1691
5012e604
VS
1692 if ((active_planes & BIT(plane_id)) == 0) {
1693 fifo_state->plane[plane_id] = 0;
54f1b6e1
VS
1694 continue;
1695 }
1696
5012e604
VS
1697 rate = raw->plane[plane_id];
1698 fifo_state->plane[plane_id] = fifo_size * rate / total_rate;
1699 fifo_left -= fifo_state->plane[plane_id];
54f1b6e1
VS
1700 }
1701
1a10ae6b
VS
1702 fifo_state->plane[PLANE_SPRITE0] += sprite0_fifo_extra;
1703 fifo_left -= sprite0_fifo_extra;
1704
5012e604
VS
1705 fifo_state->plane[PLANE_CURSOR] = 63;
1706
1707 fifo_extra = DIV_ROUND_UP(fifo_left, num_active_planes ?: 1);
54f1b6e1
VS
1708
1709 /* spread the remainder evenly */
5012e604 1710 for_each_plane_id_on_crtc(crtc, plane_id) {
54f1b6e1
VS
1711 int plane_extra;
1712
1713 if (fifo_left == 0)
1714 break;
1715
5012e604 1716 if ((active_planes & BIT(plane_id)) == 0)
54f1b6e1
VS
1717 continue;
1718
1719 plane_extra = min(fifo_extra, fifo_left);
5012e604 1720 fifo_state->plane[plane_id] += plane_extra;
54f1b6e1
VS
1721 fifo_left -= plane_extra;
1722 }
1723
5012e604
VS
1724 WARN_ON(active_planes != 0 && fifo_left != 0);
1725
1726 /* give it all to the first plane if none are active */
1727 if (active_planes == 0) {
1728 WARN_ON(fifo_left != fifo_size);
1729 fifo_state->plane[PLANE_PRIMARY] = fifo_left;
1730 }
1731
1732 return 0;
54f1b6e1
VS
1733}
1734
ff32c54e
VS
1735/* mark all levels starting from 'level' as invalid */
1736static void vlv_invalidate_wms(struct intel_crtc *crtc,
1737 struct vlv_wm_state *wm_state, int level)
1738{
1739 struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
1740
6d5019b6 1741 for (; level < intel_wm_num_levels(dev_priv); level++) {
ff32c54e
VS
1742 enum plane_id plane_id;
1743
1744 for_each_plane_id_on_crtc(crtc, plane_id)
1745 wm_state->wm[level].plane[plane_id] = USHRT_MAX;
1746
1747 wm_state->sr[level].cursor = USHRT_MAX;
1748 wm_state->sr[level].plane = USHRT_MAX;
1749 }
1750}
1751
26cca0e5
VS
1752static u16 vlv_invert_wm_value(u16 wm, u16 fifo_size)
1753{
1754 if (wm > fifo_size)
1755 return USHRT_MAX;
1756 else
1757 return fifo_size - wm;
1758}
1759
ff32c54e
VS
1760/*
1761 * Starting from 'level' set all higher
1762 * levels to 'value' in the "raw" watermarks.
1763 */
236c48e6 1764static bool vlv_raw_plane_wm_set(struct intel_crtc_state *crtc_state,
ff32c54e 1765 int level, enum plane_id plane_id, u16 value)
262cd2e1 1766{
ff32c54e 1767 struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
6d5019b6 1768 int num_levels = intel_wm_num_levels(dev_priv);
236c48e6 1769 bool dirty = false;
262cd2e1 1770
ff32c54e 1771 for (; level < num_levels; level++) {
114d7dc0 1772 struct g4x_pipe_wm *raw = &crtc_state->wm.vlv.raw[level];
262cd2e1 1773
236c48e6 1774 dirty |= raw->plane[plane_id] != value;
ff32c54e 1775 raw->plane[plane_id] = value;
262cd2e1 1776 }
236c48e6
VS
1777
1778 return dirty;
262cd2e1
VS
1779}
1780
77d14ee4
VS
1781static bool vlv_raw_plane_wm_compute(struct intel_crtc_state *crtc_state,
1782 const struct intel_plane_state *plane_state)
262cd2e1 1783{
ff32c54e
VS
1784 struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
1785 enum plane_id plane_id = plane->id;
6d5019b6 1786 int num_levels = intel_wm_num_levels(to_i915(plane->base.dev));
262cd2e1 1787 int level;
236c48e6 1788 bool dirty = false;
262cd2e1 1789
a07102f1 1790 if (!intel_wm_plane_visible(crtc_state, plane_state)) {
236c48e6
VS
1791 dirty |= vlv_raw_plane_wm_set(crtc_state, 0, plane_id, 0);
1792 goto out;
ff32c54e 1793 }
262cd2e1 1794
ff32c54e 1795 for (level = 0; level < num_levels; level++) {
114d7dc0 1796 struct g4x_pipe_wm *raw = &crtc_state->wm.vlv.raw[level];
ff32c54e
VS
1797 int wm = vlv_compute_wm_level(crtc_state, plane_state, level);
1798 int max_wm = plane_id == PLANE_CURSOR ? 63 : 511;
262cd2e1 1799
ff32c54e
VS
1800 if (wm > max_wm)
1801 break;
262cd2e1 1802
236c48e6 1803 dirty |= raw->plane[plane_id] != wm;
ff32c54e
VS
1804 raw->plane[plane_id] = wm;
1805 }
262cd2e1 1806
ff32c54e 1807 /* mark all higher levels as invalid */
236c48e6 1808 dirty |= vlv_raw_plane_wm_set(crtc_state, level, plane_id, USHRT_MAX);
262cd2e1 1809
236c48e6
VS
1810out:
1811 if (dirty)
57a6528a 1812 DRM_DEBUG_KMS("%s watermarks: PM2=%d, PM5=%d, DDR DVFS=%d\n",
236c48e6
VS
1813 plane->base.name,
1814 crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM2].plane[plane_id],
1815 crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM5].plane[plane_id],
1816 crtc_state->wm.vlv.raw[VLV_WM_LEVEL_DDR_DVFS].plane[plane_id]);
1817
1818 return dirty;
ff32c54e 1819}
262cd2e1 1820
77d14ee4
VS
1821static bool vlv_raw_plane_wm_is_valid(const struct intel_crtc_state *crtc_state,
1822 enum plane_id plane_id, int level)
ff32c54e 1823{
114d7dc0 1824 const struct g4x_pipe_wm *raw =
ff32c54e
VS
1825 &crtc_state->wm.vlv.raw[level];
1826 const struct vlv_fifo_state *fifo_state =
1827 &crtc_state->wm.vlv.fifo_state;
262cd2e1 1828
ff32c54e
VS
1829 return raw->plane[plane_id] <= fifo_state->plane[plane_id];
1830}
262cd2e1 1831
77d14ee4 1832static bool vlv_raw_crtc_wm_is_valid(const struct intel_crtc_state *crtc_state, int level)
ff32c54e 1833{
77d14ee4
VS
1834 return vlv_raw_plane_wm_is_valid(crtc_state, PLANE_PRIMARY, level) &&
1835 vlv_raw_plane_wm_is_valid(crtc_state, PLANE_SPRITE0, level) &&
1836 vlv_raw_plane_wm_is_valid(crtc_state, PLANE_SPRITE1, level) &&
1837 vlv_raw_plane_wm_is_valid(crtc_state, PLANE_CURSOR, level);
ff32c54e
VS
1838}
1839
1840static int vlv_compute_pipe_wm(struct intel_crtc_state *crtc_state)
1841{
1842 struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
1843 struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
1844 struct intel_atomic_state *state =
1845 to_intel_atomic_state(crtc_state->base.state);
1846 struct vlv_wm_state *wm_state = &crtc_state->wm.vlv.optimal;
1847 const struct vlv_fifo_state *fifo_state =
1848 &crtc_state->wm.vlv.fifo_state;
1849 int num_active_planes = hweight32(crtc_state->active_planes &
1850 ~BIT(PLANE_CURSOR));
236c48e6 1851 bool needs_modeset = drm_atomic_crtc_needs_modeset(&crtc_state->base);
7b510451
VS
1852 const struct intel_plane_state *old_plane_state;
1853 const struct intel_plane_state *new_plane_state;
ff32c54e
VS
1854 struct intel_plane *plane;
1855 enum plane_id plane_id;
1856 int level, ret, i;
236c48e6 1857 unsigned int dirty = 0;
ff32c54e 1858
7b510451
VS
1859 for_each_oldnew_intel_plane_in_state(state, plane,
1860 old_plane_state,
1861 new_plane_state, i) {
1862 if (new_plane_state->base.crtc != &crtc->base &&
ff32c54e
VS
1863 old_plane_state->base.crtc != &crtc->base)
1864 continue;
262cd2e1 1865
7b510451 1866 if (vlv_raw_plane_wm_compute(crtc_state, new_plane_state))
236c48e6
VS
1867 dirty |= BIT(plane->id);
1868 }
1869
1870 /*
1871 * DSPARB registers may have been reset due to the
1872 * power well being turned off. Make sure we restore
1873 * them to a consistent state even if no primary/sprite
1874 * planes are initially active.
1875 */
1876 if (needs_modeset)
1877 crtc_state->fifo_changed = true;
1878
1879 if (!dirty)
1880 return 0;
1881
1882 /* cursor changes don't warrant a FIFO recompute */
1883 if (dirty & ~BIT(PLANE_CURSOR)) {
1884 const struct intel_crtc_state *old_crtc_state =
7b510451 1885 intel_atomic_get_old_crtc_state(state, crtc);
236c48e6
VS
1886 const struct vlv_fifo_state *old_fifo_state =
1887 &old_crtc_state->wm.vlv.fifo_state;
1888
1889 ret = vlv_compute_fifo(crtc_state);
1890 if (ret)
1891 return ret;
1892
1893 if (needs_modeset ||
1894 memcmp(old_fifo_state, fifo_state,
1895 sizeof(*fifo_state)) != 0)
1896 crtc_state->fifo_changed = true;
5012e604 1897 }
262cd2e1 1898
ff32c54e 1899 /* initially allow all levels */
6d5019b6 1900 wm_state->num_levels = intel_wm_num_levels(dev_priv);
ff32c54e
VS
1901 /*
1902 * Note that enabling cxsr with no primary/sprite planes
1903 * enabled can wedge the pipe. Hence we only allow cxsr
1904 * with exactly one enabled primary/sprite plane.
1905 */
5eeb798b 1906 wm_state->cxsr = crtc->pipe != PIPE_C && num_active_planes == 1;
ff32c54e 1907
5012e604 1908 for (level = 0; level < wm_state->num_levels; level++) {
114d7dc0 1909 const struct g4x_pipe_wm *raw = &crtc_state->wm.vlv.raw[level];
ff32c54e 1910 const int sr_fifo_size = INTEL_INFO(dev_priv)->num_pipes * 512 - 1;
5012e604 1911
77d14ee4 1912 if (!vlv_raw_crtc_wm_is_valid(crtc_state, level))
ff32c54e 1913 break;
5012e604 1914
ff32c54e
VS
1915 for_each_plane_id_on_crtc(crtc, plane_id) {
1916 wm_state->wm[level].plane[plane_id] =
1917 vlv_invert_wm_value(raw->plane[plane_id],
1918 fifo_state->plane[plane_id]);
1919 }
1920
1921 wm_state->sr[level].plane =
1922 vlv_invert_wm_value(max3(raw->plane[PLANE_PRIMARY],
5012e604 1923 raw->plane[PLANE_SPRITE0],
ff32c54e
VS
1924 raw->plane[PLANE_SPRITE1]),
1925 sr_fifo_size);
262cd2e1 1926
ff32c54e
VS
1927 wm_state->sr[level].cursor =
1928 vlv_invert_wm_value(raw->plane[PLANE_CURSOR],
1929 63);
262cd2e1
VS
1930 }
1931
ff32c54e
VS
1932 if (level == 0)
1933 return -EINVAL;
1934
1935 /* limit to only levels we can actually handle */
1936 wm_state->num_levels = level;
1937
1938 /* invalidate the higher levels */
1939 vlv_invalidate_wms(crtc, wm_state, level);
1940
1941 return 0;
262cd2e1
VS
1942}
1943
54f1b6e1
VS
1944#define VLV_FIFO(plane, value) \
1945 (((value) << DSPARB_ ## plane ## _SHIFT_VLV) & DSPARB_ ## plane ## _MASK_VLV)
1946
ff32c54e
VS
1947static void vlv_atomic_update_fifo(struct intel_atomic_state *state,
1948 struct intel_crtc_state *crtc_state)
54f1b6e1 1949{
814e7f0b 1950 struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
f07d43d2 1951 struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
814e7f0b
VS
1952 const struct vlv_fifo_state *fifo_state =
1953 &crtc_state->wm.vlv.fifo_state;
f07d43d2 1954 int sprite0_start, sprite1_start, fifo_size;
54f1b6e1 1955
236c48e6
VS
1956 if (!crtc_state->fifo_changed)
1957 return;
1958
f07d43d2
VS
1959 sprite0_start = fifo_state->plane[PLANE_PRIMARY];
1960 sprite1_start = fifo_state->plane[PLANE_SPRITE0] + sprite0_start;
1961 fifo_size = fifo_state->plane[PLANE_SPRITE1] + sprite1_start;
54f1b6e1 1962
f07d43d2
VS
1963 WARN_ON(fifo_state->plane[PLANE_CURSOR] != 63);
1964 WARN_ON(fifo_size != 511);
54f1b6e1 1965
c137d660
VS
1966 trace_vlv_fifo_size(crtc, sprite0_start, sprite1_start, fifo_size);
1967
44e921d4
VS
1968 /*
1969 * uncore.lock serves a double purpose here. It allows us to
1970 * use the less expensive I915_{READ,WRITE}_FW() functions, and
1971 * it protects the DSPARB registers from getting clobbered by
1972 * parallel updates from multiple pipes.
1973 *
1974 * intel_pipe_update_start() has already disabled interrupts
1975 * for us, so a plain spin_lock() is sufficient here.
1976 */
1977 spin_lock(&dev_priv->uncore.lock);
467a14d9 1978
54f1b6e1 1979 switch (crtc->pipe) {
5ce9a649 1980 u32 dsparb, dsparb2, dsparb3;
54f1b6e1 1981 case PIPE_A:
44e921d4
VS
1982 dsparb = I915_READ_FW(DSPARB);
1983 dsparb2 = I915_READ_FW(DSPARB2);
54f1b6e1
VS
1984
1985 dsparb &= ~(VLV_FIFO(SPRITEA, 0xff) |
1986 VLV_FIFO(SPRITEB, 0xff));
1987 dsparb |= (VLV_FIFO(SPRITEA, sprite0_start) |
1988 VLV_FIFO(SPRITEB, sprite1_start));
1989
1990 dsparb2 &= ~(VLV_FIFO(SPRITEA_HI, 0x1) |
1991 VLV_FIFO(SPRITEB_HI, 0x1));
1992 dsparb2 |= (VLV_FIFO(SPRITEA_HI, sprite0_start >> 8) |
1993 VLV_FIFO(SPRITEB_HI, sprite1_start >> 8));
1994
44e921d4
VS
1995 I915_WRITE_FW(DSPARB, dsparb);
1996 I915_WRITE_FW(DSPARB2, dsparb2);
54f1b6e1
VS
1997 break;
1998 case PIPE_B:
44e921d4
VS
1999 dsparb = I915_READ_FW(DSPARB);
2000 dsparb2 = I915_READ_FW(DSPARB2);
54f1b6e1
VS
2001
2002 dsparb &= ~(VLV_FIFO(SPRITEC, 0xff) |
2003 VLV_FIFO(SPRITED, 0xff));
2004 dsparb |= (VLV_FIFO(SPRITEC, sprite0_start) |
2005 VLV_FIFO(SPRITED, sprite1_start));
2006
2007 dsparb2 &= ~(VLV_FIFO(SPRITEC_HI, 0xff) |
2008 VLV_FIFO(SPRITED_HI, 0xff));
2009 dsparb2 |= (VLV_FIFO(SPRITEC_HI, sprite0_start >> 8) |
2010 VLV_FIFO(SPRITED_HI, sprite1_start >> 8));
2011
44e921d4
VS
2012 I915_WRITE_FW(DSPARB, dsparb);
2013 I915_WRITE_FW(DSPARB2, dsparb2);
54f1b6e1
VS
2014 break;
2015 case PIPE_C:
44e921d4
VS
2016 dsparb3 = I915_READ_FW(DSPARB3);
2017 dsparb2 = I915_READ_FW(DSPARB2);
54f1b6e1
VS
2018
2019 dsparb3 &= ~(VLV_FIFO(SPRITEE, 0xff) |
2020 VLV_FIFO(SPRITEF, 0xff));
2021 dsparb3 |= (VLV_FIFO(SPRITEE, sprite0_start) |
2022 VLV_FIFO(SPRITEF, sprite1_start));
2023
2024 dsparb2 &= ~(VLV_FIFO(SPRITEE_HI, 0xff) |
2025 VLV_FIFO(SPRITEF_HI, 0xff));
2026 dsparb2 |= (VLV_FIFO(SPRITEE_HI, sprite0_start >> 8) |
2027 VLV_FIFO(SPRITEF_HI, sprite1_start >> 8));
2028
44e921d4
VS
2029 I915_WRITE_FW(DSPARB3, dsparb3);
2030 I915_WRITE_FW(DSPARB2, dsparb2);
54f1b6e1
VS
2031 break;
2032 default:
2033 break;
2034 }
467a14d9 2035
44e921d4 2036 POSTING_READ_FW(DSPARB);
467a14d9 2037
44e921d4 2038 spin_unlock(&dev_priv->uncore.lock);
54f1b6e1
VS
2039}
2040
2041#undef VLV_FIFO
2042
cd1d3ee9 2043static int vlv_compute_intermediate_wm(struct intel_crtc_state *new_crtc_state)
4841da51 2044{
cd1d3ee9 2045 struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc);
5b9489cb
ML
2046 struct vlv_wm_state *intermediate = &new_crtc_state->wm.vlv.intermediate;
2047 const struct vlv_wm_state *optimal = &new_crtc_state->wm.vlv.optimal;
2048 struct intel_atomic_state *intel_state =
2049 to_intel_atomic_state(new_crtc_state->base.state);
2050 const struct intel_crtc_state *old_crtc_state =
2051 intel_atomic_get_old_crtc_state(intel_state, crtc);
2052 const struct vlv_wm_state *active = &old_crtc_state->wm.vlv.optimal;
4841da51
VS
2053 int level;
2054
5b9489cb
ML
2055 if (!new_crtc_state->base.active || drm_atomic_crtc_needs_modeset(&new_crtc_state->base)) {
2056 *intermediate = *optimal;
2057
2058 intermediate->cxsr = false;
2059 goto out;
2060 }
2061
4841da51 2062 intermediate->num_levels = min(optimal->num_levels, active->num_levels);
5eeb798b 2063 intermediate->cxsr = optimal->cxsr && active->cxsr &&
5b9489cb 2064 !new_crtc_state->disable_cxsr;
4841da51
VS
2065
2066 for (level = 0; level < intermediate->num_levels; level++) {
2067 enum plane_id plane_id;
2068
2069 for_each_plane_id_on_crtc(crtc, plane_id) {
2070 intermediate->wm[level].plane[plane_id] =
2071 min(optimal->wm[level].plane[plane_id],
2072 active->wm[level].plane[plane_id]);
2073 }
2074
2075 intermediate->sr[level].plane = min(optimal->sr[level].plane,
2076 active->sr[level].plane);
2077 intermediate->sr[level].cursor = min(optimal->sr[level].cursor,
2078 active->sr[level].cursor);
2079 }
2080
2081 vlv_invalidate_wms(crtc, intermediate, level);
2082
5b9489cb 2083out:
4841da51
VS
2084 /*
2085 * If our intermediate WM are identical to the final WM, then we can
2086 * omit the post-vblank programming; only update if it's different.
2087 */
5eeb798b 2088 if (memcmp(intermediate, optimal, sizeof(*intermediate)) != 0)
5b9489cb 2089 new_crtc_state->wm.need_postvbl_update = true;
4841da51
VS
2090
2091 return 0;
2092}
2093
7c951c00 2094static void vlv_merge_wm(struct drm_i915_private *dev_priv,
262cd2e1
VS
2095 struct vlv_wm_values *wm)
2096{
2097 struct intel_crtc *crtc;
2098 int num_active_crtcs = 0;
2099
7c951c00 2100 wm->level = dev_priv->wm.max_level;
262cd2e1
VS
2101 wm->cxsr = true;
2102
7c951c00 2103 for_each_intel_crtc(&dev_priv->drm, crtc) {
7eb4941f 2104 const struct vlv_wm_state *wm_state = &crtc->wm.active.vlv;
262cd2e1
VS
2105
2106 if (!crtc->active)
2107 continue;
2108
2109 if (!wm_state->cxsr)
2110 wm->cxsr = false;
2111
2112 num_active_crtcs++;
2113 wm->level = min_t(int, wm->level, wm_state->num_levels - 1);
2114 }
2115
2116 if (num_active_crtcs != 1)
2117 wm->cxsr = false;
2118
6f9c784b
VS
2119 if (num_active_crtcs > 1)
2120 wm->level = VLV_WM_LEVEL_PM2;
2121
7c951c00 2122 for_each_intel_crtc(&dev_priv->drm, crtc) {
7eb4941f 2123 const struct vlv_wm_state *wm_state = &crtc->wm.active.vlv;
262cd2e1
VS
2124 enum pipe pipe = crtc->pipe;
2125
262cd2e1 2126 wm->pipe[pipe] = wm_state->wm[wm->level];
ff32c54e 2127 if (crtc->active && wm->cxsr)
262cd2e1
VS
2128 wm->sr = wm_state->sr[wm->level];
2129
1b31389c
VS
2130 wm->ddl[pipe].plane[PLANE_PRIMARY] = DDL_PRECISION_HIGH | 2;
2131 wm->ddl[pipe].plane[PLANE_SPRITE0] = DDL_PRECISION_HIGH | 2;
2132 wm->ddl[pipe].plane[PLANE_SPRITE1] = DDL_PRECISION_HIGH | 2;
2133 wm->ddl[pipe].plane[PLANE_CURSOR] = DDL_PRECISION_HIGH | 2;
262cd2e1
VS
2134 }
2135}
2136
ff32c54e 2137static void vlv_program_watermarks(struct drm_i915_private *dev_priv)
262cd2e1 2138{
fa292a4b
VS
2139 struct vlv_wm_values *old_wm = &dev_priv->wm.vlv;
2140 struct vlv_wm_values new_wm = {};
262cd2e1 2141
fa292a4b 2142 vlv_merge_wm(dev_priv, &new_wm);
262cd2e1 2143
ff32c54e 2144 if (memcmp(old_wm, &new_wm, sizeof(new_wm)) == 0)
262cd2e1
VS
2145 return;
2146
fa292a4b 2147 if (is_disabling(old_wm->level, new_wm.level, VLV_WM_LEVEL_DDR_DVFS))
262cd2e1
VS
2148 chv_set_memory_dvfs(dev_priv, false);
2149
fa292a4b 2150 if (is_disabling(old_wm->level, new_wm.level, VLV_WM_LEVEL_PM5))
262cd2e1
VS
2151 chv_set_memory_pm5(dev_priv, false);
2152
fa292a4b 2153 if (is_disabling(old_wm->cxsr, new_wm.cxsr, true))
3d90e649 2154 _intel_set_memory_cxsr(dev_priv, false);
262cd2e1 2155
fa292a4b 2156 vlv_write_wm_values(dev_priv, &new_wm);
262cd2e1 2157
fa292a4b 2158 if (is_enabling(old_wm->cxsr, new_wm.cxsr, true))
3d90e649 2159 _intel_set_memory_cxsr(dev_priv, true);
262cd2e1 2160
fa292a4b 2161 if (is_enabling(old_wm->level, new_wm.level, VLV_WM_LEVEL_PM5))
262cd2e1
VS
2162 chv_set_memory_pm5(dev_priv, true);
2163
fa292a4b 2164 if (is_enabling(old_wm->level, new_wm.level, VLV_WM_LEVEL_DDR_DVFS))
262cd2e1
VS
2165 chv_set_memory_dvfs(dev_priv, true);
2166
fa292a4b 2167 *old_wm = new_wm;
3c2777fd
VS
2168}
2169
ff32c54e
VS
2170static void vlv_initial_watermarks(struct intel_atomic_state *state,
2171 struct intel_crtc_state *crtc_state)
2172{
2173 struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
2174 struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
2175
2176 mutex_lock(&dev_priv->wm.wm_mutex);
4841da51
VS
2177 crtc->wm.active.vlv = crtc_state->wm.vlv.intermediate;
2178 vlv_program_watermarks(dev_priv);
2179 mutex_unlock(&dev_priv->wm.wm_mutex);
2180}
2181
2182static void vlv_optimize_watermarks(struct intel_atomic_state *state,
2183 struct intel_crtc_state *crtc_state)
2184{
2185 struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
2186 struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc);
2187
2188 if (!crtc_state->wm.need_postvbl_update)
2189 return;
2190
2191 mutex_lock(&dev_priv->wm.wm_mutex);
2192 intel_crtc->wm.active.vlv = crtc_state->wm.vlv.optimal;
ff32c54e
VS
2193 vlv_program_watermarks(dev_priv);
2194 mutex_unlock(&dev_priv->wm.wm_mutex);
2195}
2196
432081bc 2197static void i965_update_wm(struct intel_crtc *unused_crtc)
b445e3b0 2198{
ffc7a76b 2199 struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev);
efc2611e 2200 struct intel_crtc *crtc;
b445e3b0
ED
2201 int srwm = 1;
2202 int cursor_sr = 16;
9858425c 2203 bool cxsr_enabled;
b445e3b0
ED
2204
2205 /* Calc sr entries for one plane configs */
ffc7a76b 2206 crtc = single_enabled_crtc(dev_priv);
b445e3b0
ED
2207 if (crtc) {
2208 /* self-refresh has much higher latency */
2209 static const int sr_latency_ns = 12000;
efc2611e
VS
2210 const struct drm_display_mode *adjusted_mode =
2211 &crtc->config->base.adjusted_mode;
2212 const struct drm_framebuffer *fb =
2213 crtc->base.primary->state->fb;
241bfc38 2214 int clock = adjusted_mode->crtc_clock;
fec8cba3 2215 int htotal = adjusted_mode->crtc_htotal;
efc2611e 2216 int hdisplay = crtc->config->pipe_src_w;
353c8598 2217 int cpp = fb->format->cpp[0];
b445e3b0
ED
2218 int entries;
2219
baf69ca8
VS
2220 entries = intel_wm_method2(clock, htotal,
2221 hdisplay, cpp, sr_latency_ns / 100);
b445e3b0
ED
2222 entries = DIV_ROUND_UP(entries, I915_FIFO_LINE_SIZE);
2223 srwm = I965_FIFO_SIZE - entries;
2224 if (srwm < 0)
2225 srwm = 1;
2226 srwm &= 0x1ff;
2227 DRM_DEBUG_KMS("self-refresh entries: %d, wm: %d\n",
2228 entries, srwm);
2229
baf69ca8
VS
2230 entries = intel_wm_method2(clock, htotal,
2231 crtc->base.cursor->state->crtc_w, 4,
2232 sr_latency_ns / 100);
b445e3b0 2233 entries = DIV_ROUND_UP(entries,
baf69ca8
VS
2234 i965_cursor_wm_info.cacheline_size) +
2235 i965_cursor_wm_info.guard_size;
b445e3b0 2236
baf69ca8 2237 cursor_sr = i965_cursor_wm_info.fifo_size - entries;
b445e3b0
ED
2238 if (cursor_sr > i965_cursor_wm_info.max_wm)
2239 cursor_sr = i965_cursor_wm_info.max_wm;
2240
2241 DRM_DEBUG_KMS("self-refresh watermark: display plane %d "
2242 "cursor %d\n", srwm, cursor_sr);
2243
9858425c 2244 cxsr_enabled = true;
b445e3b0 2245 } else {
9858425c 2246 cxsr_enabled = false;
b445e3b0 2247 /* Turn off self refresh if both pipes are enabled */
5209b1f4 2248 intel_set_memory_cxsr(dev_priv, false);
b445e3b0
ED
2249 }
2250
2251 DRM_DEBUG_KMS("Setting FIFO watermarks - A: 8, B: 8, C: 8, SR %d\n",
2252 srwm);
2253
2254 /* 965 has limitations... */
f4998963
VS
2255 I915_WRITE(DSPFW1, FW_WM(srwm, SR) |
2256 FW_WM(8, CURSORB) |
2257 FW_WM(8, PLANEB) |
2258 FW_WM(8, PLANEA));
2259 I915_WRITE(DSPFW2, FW_WM(8, CURSORA) |
2260 FW_WM(8, PLANEC_OLD));
b445e3b0 2261 /* update cursor SR watermark */
f4998963 2262 I915_WRITE(DSPFW3, FW_WM(cursor_sr, CURSOR_SR));
9858425c
ID
2263
2264 if (cxsr_enabled)
2265 intel_set_memory_cxsr(dev_priv, true);
b445e3b0
ED
2266}
2267
f4998963
VS
2268#undef FW_WM
2269
432081bc 2270static void i9xx_update_wm(struct intel_crtc *unused_crtc)
b445e3b0 2271{
ffc7a76b 2272 struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev);
b445e3b0 2273 const struct intel_watermark_params *wm_info;
5ce9a649
JN
2274 u32 fwater_lo;
2275 u32 fwater_hi;
b445e3b0
ED
2276 int cwm, srwm = 1;
2277 int fifo_size;
2278 int planea_wm, planeb_wm;
efc2611e 2279 struct intel_crtc *crtc, *enabled = NULL;
b445e3b0 2280
a9097be4 2281 if (IS_I945GM(dev_priv))
b445e3b0 2282 wm_info = &i945_wm_info;
cf819eff 2283 else if (!IS_GEN(dev_priv, 2))
b445e3b0
ED
2284 wm_info = &i915_wm_info;
2285 else
9d539105 2286 wm_info = &i830_a_wm_info;
b445e3b0 2287
bdaf8439
VS
2288 fifo_size = dev_priv->display.get_fifo_size(dev_priv, PLANE_A);
2289 crtc = intel_get_crtc_for_plane(dev_priv, PLANE_A);
efc2611e
VS
2290 if (intel_crtc_active(crtc)) {
2291 const struct drm_display_mode *adjusted_mode =
2292 &crtc->config->base.adjusted_mode;
2293 const struct drm_framebuffer *fb =
2294 crtc->base.primary->state->fb;
2295 int cpp;
2296
cf819eff 2297 if (IS_GEN(dev_priv, 2))
b9e0bda3 2298 cpp = 4;
efc2611e 2299 else
353c8598 2300 cpp = fb->format->cpp[0];
b9e0bda3 2301
241bfc38 2302 planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
b9e0bda3 2303 wm_info, fifo_size, cpp,
5aef6003 2304 pessimal_latency_ns);
b445e3b0 2305 enabled = crtc;
9d539105 2306 } else {
b445e3b0 2307 planea_wm = fifo_size - wm_info->guard_size;
9d539105
VS
2308 if (planea_wm > (long)wm_info->max_wm)
2309 planea_wm = wm_info->max_wm;
2310 }
2311
cf819eff 2312 if (IS_GEN(dev_priv, 2))
9d539105 2313 wm_info = &i830_bc_wm_info;
b445e3b0 2314
bdaf8439
VS
2315 fifo_size = dev_priv->display.get_fifo_size(dev_priv, PLANE_B);
2316 crtc = intel_get_crtc_for_plane(dev_priv, PLANE_B);
efc2611e
VS
2317 if (intel_crtc_active(crtc)) {
2318 const struct drm_display_mode *adjusted_mode =
2319 &crtc->config->base.adjusted_mode;
2320 const struct drm_framebuffer *fb =
2321 crtc->base.primary->state->fb;
2322 int cpp;
2323
cf819eff 2324 if (IS_GEN(dev_priv, 2))
b9e0bda3 2325 cpp = 4;
efc2611e 2326 else
353c8598 2327 cpp = fb->format->cpp[0];
b9e0bda3 2328
241bfc38 2329 planeb_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
b9e0bda3 2330 wm_info, fifo_size, cpp,
5aef6003 2331 pessimal_latency_ns);
b445e3b0
ED
2332 if (enabled == NULL)
2333 enabled = crtc;
2334 else
2335 enabled = NULL;
9d539105 2336 } else {
b445e3b0 2337 planeb_wm = fifo_size - wm_info->guard_size;
9d539105
VS
2338 if (planeb_wm > (long)wm_info->max_wm)
2339 planeb_wm = wm_info->max_wm;
2340 }
b445e3b0
ED
2341
2342 DRM_DEBUG_KMS("FIFO watermarks - A: %d, B: %d\n", planea_wm, planeb_wm);
2343
50a0bc90 2344 if (IS_I915GM(dev_priv) && enabled) {
2ff8fde1 2345 struct drm_i915_gem_object *obj;
2ab1bc9d 2346
efc2611e 2347 obj = intel_fb_obj(enabled->base.primary->state->fb);
2ab1bc9d
DV
2348
2349 /* self-refresh seems busted with untiled */
3e510a8e 2350 if (!i915_gem_object_is_tiled(obj))
2ab1bc9d
DV
2351 enabled = NULL;
2352 }
2353
b445e3b0
ED
2354 /*
2355 * Overlay gets an aggressive default since video jitter is bad.
2356 */
2357 cwm = 2;
2358
2359 /* Play safe and disable self-refresh before adjusting watermarks. */
5209b1f4 2360 intel_set_memory_cxsr(dev_priv, false);
b445e3b0
ED
2361
2362 /* Calc sr entries for one plane configs */
03427fcb 2363 if (HAS_FW_BLC(dev_priv) && enabled) {
b445e3b0
ED
2364 /* self-refresh has much higher latency */
2365 static const int sr_latency_ns = 6000;
efc2611e
VS
2366 const struct drm_display_mode *adjusted_mode =
2367 &enabled->config->base.adjusted_mode;
2368 const struct drm_framebuffer *fb =
2369 enabled->base.primary->state->fb;
241bfc38 2370 int clock = adjusted_mode->crtc_clock;
fec8cba3 2371 int htotal = adjusted_mode->crtc_htotal;
efc2611e
VS
2372 int hdisplay = enabled->config->pipe_src_w;
2373 int cpp;
b445e3b0
ED
2374 int entries;
2375
50a0bc90 2376 if (IS_I915GM(dev_priv) || IS_I945GM(dev_priv))
2d1b5056 2377 cpp = 4;
efc2611e 2378 else
353c8598 2379 cpp = fb->format->cpp[0];
2d1b5056 2380
baf69ca8
VS
2381 entries = intel_wm_method2(clock, htotal, hdisplay, cpp,
2382 sr_latency_ns / 100);
b445e3b0
ED
2383 entries = DIV_ROUND_UP(entries, wm_info->cacheline_size);
2384 DRM_DEBUG_KMS("self-refresh entries: %d\n", entries);
2385 srwm = wm_info->fifo_size - entries;
2386 if (srwm < 0)
2387 srwm = 1;
2388
50a0bc90 2389 if (IS_I945G(dev_priv) || IS_I945GM(dev_priv))
b445e3b0
ED
2390 I915_WRITE(FW_BLC_SELF,
2391 FW_BLC_SELF_FIFO_MASK | (srwm & 0xff));
acb91359 2392 else
b445e3b0
ED
2393 I915_WRITE(FW_BLC_SELF, srwm & 0x3f);
2394 }
2395
2396 DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d, B: %d, C: %d, SR %d\n",
2397 planea_wm, planeb_wm, cwm, srwm);
2398
2399 fwater_lo = ((planeb_wm & 0x3f) << 16) | (planea_wm & 0x3f);
2400 fwater_hi = (cwm & 0x1f);
2401
2402 /* Set request length to 8 cachelines per fetch */
2403 fwater_lo = fwater_lo | (1 << 24) | (1 << 8);
2404 fwater_hi = fwater_hi | (1 << 8);
2405
2406 I915_WRITE(FW_BLC, fwater_lo);
2407 I915_WRITE(FW_BLC2, fwater_hi);
2408
5209b1f4
ID
2409 if (enabled)
2410 intel_set_memory_cxsr(dev_priv, true);
b445e3b0
ED
2411}
2412
432081bc 2413static void i845_update_wm(struct intel_crtc *unused_crtc)
b445e3b0 2414{
ffc7a76b 2415 struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev);
efc2611e 2416 struct intel_crtc *crtc;
241bfc38 2417 const struct drm_display_mode *adjusted_mode;
5ce9a649 2418 u32 fwater_lo;
b445e3b0
ED
2419 int planea_wm;
2420
ffc7a76b 2421 crtc = single_enabled_crtc(dev_priv);
b445e3b0
ED
2422 if (crtc == NULL)
2423 return;
2424
efc2611e 2425 adjusted_mode = &crtc->config->base.adjusted_mode;
241bfc38 2426 planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
feb56b93 2427 &i845_wm_info,
bdaf8439 2428 dev_priv->display.get_fifo_size(dev_priv, PLANE_A),
5aef6003 2429 4, pessimal_latency_ns);
b445e3b0
ED
2430 fwater_lo = I915_READ(FW_BLC) & ~0xfff;
2431 fwater_lo |= (3<<8) | planea_wm;
2432
2433 DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d\n", planea_wm);
2434
2435 I915_WRITE(FW_BLC, fwater_lo);
2436}
2437
37126462 2438/* latency must be in 0.1us units. */
baf69ca8
VS
2439static unsigned int ilk_wm_method1(unsigned int pixel_rate,
2440 unsigned int cpp,
2441 unsigned int latency)
801bcfff 2442{
baf69ca8 2443 unsigned int ret;
3312ba65 2444
baf69ca8
VS
2445 ret = intel_wm_method1(pixel_rate, cpp, latency);
2446 ret = DIV_ROUND_UP(ret, 64) + 2;
801bcfff
PZ
2447
2448 return ret;
2449}
2450
37126462 2451/* latency must be in 0.1us units. */
baf69ca8
VS
2452static unsigned int ilk_wm_method2(unsigned int pixel_rate,
2453 unsigned int htotal,
2454 unsigned int width,
2455 unsigned int cpp,
2456 unsigned int latency)
801bcfff 2457{
baf69ca8 2458 unsigned int ret;
3312ba65 2459
baf69ca8
VS
2460 ret = intel_wm_method2(pixel_rate, htotal,
2461 width, cpp, latency);
801bcfff 2462 ret = DIV_ROUND_UP(ret, 64) + 2;
baf69ca8 2463
801bcfff
PZ
2464 return ret;
2465}
2466
5ce9a649 2467static u32 ilk_wm_fbc(u32 pri_val, u32 horiz_pixels, u8 cpp)
cca32e9a 2468{
15126882
MR
2469 /*
2470 * Neither of these should be possible since this function shouldn't be
2471 * called if the CRTC is off or the plane is invisible. But let's be
2472 * extra paranoid to avoid a potential divide-by-zero if we screw up
2473 * elsewhere in the driver.
2474 */
ac484963 2475 if (WARN_ON(!cpp))
15126882
MR
2476 return 0;
2477 if (WARN_ON(!horiz_pixels))
2478 return 0;
2479
ac484963 2480 return DIV_ROUND_UP(pri_val * 64, horiz_pixels * cpp) + 2;
cca32e9a
PZ
2481}
2482
820c1980 2483struct ilk_wm_maximums {
5ce9a649
JN
2484 u16 pri;
2485 u16 spr;
2486 u16 cur;
2487 u16 fbc;
cca32e9a
PZ
2488};
2489
37126462
VS
2490/*
2491 * For both WM_PIPE and WM_LP.
2492 * mem_value must be in 0.1us units.
2493 */
5ce9a649
JN
2494static u32 ilk_compute_pri_wm(const struct intel_crtc_state *cstate,
2495 const struct intel_plane_state *pstate,
2496 u32 mem_value, bool is_lp)
801bcfff 2497{
5ce9a649 2498 u32 method1, method2;
8305494e 2499 int cpp;
cca32e9a 2500
03981c6e
VS
2501 if (mem_value == 0)
2502 return U32_MAX;
2503
24304d81 2504 if (!intel_wm_plane_visible(cstate, pstate))
801bcfff
PZ
2505 return 0;
2506
353c8598 2507 cpp = pstate->base.fb->format->cpp[0];
8305494e 2508
a7d1b3f4 2509 method1 = ilk_wm_method1(cstate->pixel_rate, cpp, mem_value);
cca32e9a
PZ
2510
2511 if (!is_lp)
2512 return method1;
2513
a7d1b3f4 2514 method2 = ilk_wm_method2(cstate->pixel_rate,
7221fc33 2515 cstate->base.adjusted_mode.crtc_htotal,
936e71e3 2516 drm_rect_width(&pstate->base.dst),
ac484963 2517 cpp, mem_value);
cca32e9a
PZ
2518
2519 return min(method1, method2);
801bcfff
PZ
2520}
2521
37126462
VS
2522/*
2523 * For both WM_PIPE and WM_LP.
2524 * mem_value must be in 0.1us units.
2525 */
5ce9a649
JN
2526static u32 ilk_compute_spr_wm(const struct intel_crtc_state *cstate,
2527 const struct intel_plane_state *pstate,
2528 u32 mem_value)
801bcfff 2529{
5ce9a649 2530 u32 method1, method2;
8305494e 2531 int cpp;
801bcfff 2532
03981c6e
VS
2533 if (mem_value == 0)
2534 return U32_MAX;
2535
24304d81 2536 if (!intel_wm_plane_visible(cstate, pstate))
801bcfff
PZ
2537 return 0;
2538
353c8598 2539 cpp = pstate->base.fb->format->cpp[0];
8305494e 2540
a7d1b3f4
VS
2541 method1 = ilk_wm_method1(cstate->pixel_rate, cpp, mem_value);
2542 method2 = ilk_wm_method2(cstate->pixel_rate,
7221fc33 2543 cstate->base.adjusted_mode.crtc_htotal,
936e71e3 2544 drm_rect_width(&pstate->base.dst),
ac484963 2545 cpp, mem_value);
801bcfff
PZ
2546 return min(method1, method2);
2547}
2548
37126462
VS
2549/*
2550 * For both WM_PIPE and WM_LP.
2551 * mem_value must be in 0.1us units.
2552 */
5ce9a649
JN
2553static u32 ilk_compute_cur_wm(const struct intel_crtc_state *cstate,
2554 const struct intel_plane_state *pstate,
2555 u32 mem_value)
801bcfff 2556{
a5509abd
VS
2557 int cpp;
2558
03981c6e
VS
2559 if (mem_value == 0)
2560 return U32_MAX;
2561
24304d81 2562 if (!intel_wm_plane_visible(cstate, pstate))
801bcfff
PZ
2563 return 0;
2564
a5509abd
VS
2565 cpp = pstate->base.fb->format->cpp[0];
2566
a7d1b3f4 2567 return ilk_wm_method2(cstate->pixel_rate,
7221fc33 2568 cstate->base.adjusted_mode.crtc_htotal,
a5509abd 2569 pstate->base.crtc_w, cpp, mem_value);
801bcfff
PZ
2570}
2571
cca32e9a 2572/* Only for WM_LP. */
5ce9a649
JN
2573static u32 ilk_compute_fbc_wm(const struct intel_crtc_state *cstate,
2574 const struct intel_plane_state *pstate,
2575 u32 pri_val)
cca32e9a 2576{
8305494e 2577 int cpp;
43d59eda 2578
24304d81 2579 if (!intel_wm_plane_visible(cstate, pstate))
cca32e9a
PZ
2580 return 0;
2581
353c8598 2582 cpp = pstate->base.fb->format->cpp[0];
8305494e 2583
936e71e3 2584 return ilk_wm_fbc(pri_val, drm_rect_width(&pstate->base.dst), cpp);
cca32e9a
PZ
2585}
2586
175fded1
TU
2587static unsigned int
2588ilk_display_fifo_size(const struct drm_i915_private *dev_priv)
158ae64f 2589{
175fded1 2590 if (INTEL_GEN(dev_priv) >= 8)
416f4727 2591 return 3072;
175fded1 2592 else if (INTEL_GEN(dev_priv) >= 7)
158ae64f
VS
2593 return 768;
2594 else
2595 return 512;
2596}
2597
175fded1
TU
2598static unsigned int
2599ilk_plane_wm_reg_max(const struct drm_i915_private *dev_priv,
2600 int level, bool is_sprite)
4e975081 2601{
175fded1 2602 if (INTEL_GEN(dev_priv) >= 8)
4e975081
VS
2603 /* BDW primary/sprite plane watermarks */
2604 return level == 0 ? 255 : 2047;
175fded1 2605 else if (INTEL_GEN(dev_priv) >= 7)
4e975081
VS
2606 /* IVB/HSW primary/sprite plane watermarks */
2607 return level == 0 ? 127 : 1023;
2608 else if (!is_sprite)
2609 /* ILK/SNB primary plane watermarks */
2610 return level == 0 ? 127 : 511;
2611 else
2612 /* ILK/SNB sprite plane watermarks */
2613 return level == 0 ? 63 : 255;
2614}
2615
175fded1
TU
2616static unsigned int
2617ilk_cursor_wm_reg_max(const struct drm_i915_private *dev_priv, int level)
4e975081 2618{
175fded1 2619 if (INTEL_GEN(dev_priv) >= 7)
4e975081
VS
2620 return level == 0 ? 63 : 255;
2621 else
2622 return level == 0 ? 31 : 63;
2623}
2624
175fded1 2625static unsigned int ilk_fbc_wm_reg_max(const struct drm_i915_private *dev_priv)
4e975081 2626{
175fded1 2627 if (INTEL_GEN(dev_priv) >= 8)
4e975081
VS
2628 return 31;
2629 else
2630 return 15;
2631}
2632
158ae64f 2633/* Calculate the maximum primary/sprite plane watermark */
cd1d3ee9 2634static unsigned int ilk_plane_wm_max(const struct drm_i915_private *dev_priv,
158ae64f 2635 int level,
240264f4 2636 const struct intel_wm_config *config,
158ae64f
VS
2637 enum intel_ddb_partitioning ddb_partitioning,
2638 bool is_sprite)
2639{
175fded1 2640 unsigned int fifo_size = ilk_display_fifo_size(dev_priv);
158ae64f
VS
2641
2642 /* if sprites aren't enabled, sprites get nothing */
240264f4 2643 if (is_sprite && !config->sprites_enabled)
158ae64f
VS
2644 return 0;
2645
2646 /* HSW allows LP1+ watermarks even with multiple pipes */
240264f4 2647 if (level == 0 || config->num_pipes_active > 1) {
175fded1 2648 fifo_size /= INTEL_INFO(dev_priv)->num_pipes;
158ae64f
VS
2649
2650 /*
2651 * For some reason the non self refresh
2652 * FIFO size is only half of the self
2653 * refresh FIFO size on ILK/SNB.
2654 */
175fded1 2655 if (INTEL_GEN(dev_priv) <= 6)
158ae64f
VS
2656 fifo_size /= 2;
2657 }
2658
240264f4 2659 if (config->sprites_enabled) {
158ae64f
VS
2660 /* level 0 is always calculated with 1:1 split */
2661 if (level > 0 && ddb_partitioning == INTEL_DDB_PART_5_6) {
2662 if (is_sprite)
2663 fifo_size *= 5;
2664 fifo_size /= 6;
2665 } else {
2666 fifo_size /= 2;
2667 }
2668 }
2669
2670 /* clamp to max that the registers can hold */
175fded1 2671 return min(fifo_size, ilk_plane_wm_reg_max(dev_priv, level, is_sprite));
158ae64f
VS
2672}
2673
2674/* Calculate the maximum cursor plane watermark */
cd1d3ee9 2675static unsigned int ilk_cursor_wm_max(const struct drm_i915_private *dev_priv,
240264f4
VS
2676 int level,
2677 const struct intel_wm_config *config)
158ae64f
VS
2678{
2679 /* HSW LP1+ watermarks w/ multiple pipes */
240264f4 2680 if (level > 0 && config->num_pipes_active > 1)
158ae64f
VS
2681 return 64;
2682
2683 /* otherwise just report max that registers can hold */
cd1d3ee9 2684 return ilk_cursor_wm_reg_max(dev_priv, level);
158ae64f
VS
2685}
2686
cd1d3ee9 2687static void ilk_compute_wm_maximums(const struct drm_i915_private *dev_priv,
34982fe1
VS
2688 int level,
2689 const struct intel_wm_config *config,
2690 enum intel_ddb_partitioning ddb_partitioning,
820c1980 2691 struct ilk_wm_maximums *max)
158ae64f 2692{
cd1d3ee9
MR
2693 max->pri = ilk_plane_wm_max(dev_priv, level, config, ddb_partitioning, false);
2694 max->spr = ilk_plane_wm_max(dev_priv, level, config, ddb_partitioning, true);
2695 max->cur = ilk_cursor_wm_max(dev_priv, level, config);
2696 max->fbc = ilk_fbc_wm_reg_max(dev_priv);
158ae64f
VS
2697}
2698
175fded1 2699static void ilk_compute_wm_reg_maximums(const struct drm_i915_private *dev_priv,
a3cb4048
VS
2700 int level,
2701 struct ilk_wm_maximums *max)
2702{
175fded1
TU
2703 max->pri = ilk_plane_wm_reg_max(dev_priv, level, false);
2704 max->spr = ilk_plane_wm_reg_max(dev_priv, level, true);
2705 max->cur = ilk_cursor_wm_reg_max(dev_priv, level);
2706 max->fbc = ilk_fbc_wm_reg_max(dev_priv);
a3cb4048
VS
2707}
2708
d9395655 2709static bool ilk_validate_wm_level(int level,
820c1980 2710 const struct ilk_wm_maximums *max,
d9395655 2711 struct intel_wm_level *result)
a9786a11
VS
2712{
2713 bool ret;
2714
2715 /* already determined to be invalid? */
2716 if (!result->enable)
2717 return false;
2718
2719 result->enable = result->pri_val <= max->pri &&
2720 result->spr_val <= max->spr &&
2721 result->cur_val <= max->cur;
2722
2723 ret = result->enable;
2724
2725 /*
2726 * HACK until we can pre-compute everything,
2727 * and thus fail gracefully if LP0 watermarks
2728 * are exceeded...
2729 */
2730 if (level == 0 && !result->enable) {
2731 if (result->pri_val > max->pri)
2732 DRM_DEBUG_KMS("Primary WM%d too large %u (max %u)\n",
2733 level, result->pri_val, max->pri);
2734 if (result->spr_val > max->spr)
2735 DRM_DEBUG_KMS("Sprite WM%d too large %u (max %u)\n",
2736 level, result->spr_val, max->spr);
2737 if (result->cur_val > max->cur)
2738 DRM_DEBUG_KMS("Cursor WM%d too large %u (max %u)\n",
2739 level, result->cur_val, max->cur);
2740
5ce9a649
JN
2741 result->pri_val = min_t(u32, result->pri_val, max->pri);
2742 result->spr_val = min_t(u32, result->spr_val, max->spr);
2743 result->cur_val = min_t(u32, result->cur_val, max->cur);
a9786a11
VS
2744 result->enable = true;
2745 }
2746
a9786a11
VS
2747 return ret;
2748}
2749
d34ff9c6 2750static void ilk_compute_wm_level(const struct drm_i915_private *dev_priv,
43d59eda 2751 const struct intel_crtc *intel_crtc,
6f5ddd17 2752 int level,
7221fc33 2753 struct intel_crtc_state *cstate,
28283f4f
ML
2754 const struct intel_plane_state *pristate,
2755 const struct intel_plane_state *sprstate,
2756 const struct intel_plane_state *curstate,
1fd527cc 2757 struct intel_wm_level *result)
6f5ddd17 2758{
5ce9a649
JN
2759 u16 pri_latency = dev_priv->wm.pri_latency[level];
2760 u16 spr_latency = dev_priv->wm.spr_latency[level];
2761 u16 cur_latency = dev_priv->wm.cur_latency[level];
6f5ddd17
VS
2762
2763 /* WM1+ latency values stored in 0.5us units */
2764 if (level > 0) {
2765 pri_latency *= 5;
2766 spr_latency *= 5;
2767 cur_latency *= 5;
2768 }
2769
e3bddded
ML
2770 if (pristate) {
2771 result->pri_val = ilk_compute_pri_wm(cstate, pristate,
2772 pri_latency, level);
2773 result->fbc_val = ilk_compute_fbc_wm(cstate, pristate, result->pri_val);
2774 }
2775
2776 if (sprstate)
2777 result->spr_val = ilk_compute_spr_wm(cstate, sprstate, spr_latency);
2778
2779 if (curstate)
2780 result->cur_val = ilk_compute_cur_wm(cstate, curstate, cur_latency);
2781
6f5ddd17
VS
2782 result->enable = true;
2783}
2784
5ce9a649 2785static u32
532f7a7f 2786hsw_compute_linetime_wm(const struct intel_crtc_state *cstate)
1f8eeabf 2787{
532f7a7f
VS
2788 const struct intel_atomic_state *intel_state =
2789 to_intel_atomic_state(cstate->base.state);
ee91a159
MR
2790 const struct drm_display_mode *adjusted_mode =
2791 &cstate->base.adjusted_mode;
85a02deb 2792 u32 linetime, ips_linetime;
1f8eeabf 2793
ee91a159
MR
2794 if (!cstate->base.active)
2795 return 0;
2796 if (WARN_ON(adjusted_mode->crtc_clock == 0))
2797 return 0;
bb0f4aab 2798 if (WARN_ON(intel_state->cdclk.logical.cdclk == 0))
801bcfff 2799 return 0;
1011d8c4 2800
1f8eeabf
ED
2801 /* The WM are computed with base on how long it takes to fill a single
2802 * row at the given clock rate, multiplied by 8.
2803 * */
124abe07
VS
2804 linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8,
2805 adjusted_mode->crtc_clock);
2806 ips_linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8,
bb0f4aab 2807 intel_state->cdclk.logical.cdclk);
1f8eeabf 2808
801bcfff
PZ
2809 return PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) |
2810 PIPE_WM_LINETIME_TIME(linetime);
1f8eeabf
ED
2811}
2812
bb726519 2813static void intel_read_wm_latency(struct drm_i915_private *dev_priv,
5ce9a649 2814 u16 wm[8])
12b134df 2815{
50682ee6 2816 if (INTEL_GEN(dev_priv) >= 9) {
5ce9a649 2817 u32 val;
4f947386 2818 int ret, i;
5db94019 2819 int level, max_level = ilk_wm_max_level(dev_priv);
2af30a5c
PB
2820
2821 /* read the first set of memory latencies[0:3] */
2822 val = 0; /* data0 to be programmed to 0 for first set */
2af30a5c
PB
2823 ret = sandybridge_pcode_read(dev_priv,
2824 GEN9_PCODE_READ_MEM_LATENCY,
2825 &val);
2af30a5c
PB
2826
2827 if (ret) {
2828 DRM_ERROR("SKL Mailbox read error = %d\n", ret);
2829 return;
2830 }
2831
2832 wm[0] = val & GEN9_MEM_LATENCY_LEVEL_MASK;
2833 wm[1] = (val >> GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT) &
2834 GEN9_MEM_LATENCY_LEVEL_MASK;
2835 wm[2] = (val >> GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT) &
2836 GEN9_MEM_LATENCY_LEVEL_MASK;
2837 wm[3] = (val >> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT) &
2838 GEN9_MEM_LATENCY_LEVEL_MASK;
2839
2840 /* read the second set of memory latencies[4:7] */
2841 val = 1; /* data0 to be programmed to 1 for second set */
2af30a5c
PB
2842 ret = sandybridge_pcode_read(dev_priv,
2843 GEN9_PCODE_READ_MEM_LATENCY,
2844 &val);
2af30a5c
PB
2845 if (ret) {
2846 DRM_ERROR("SKL Mailbox read error = %d\n", ret);
2847 return;
2848 }
2849
2850 wm[4] = val & GEN9_MEM_LATENCY_LEVEL_MASK;
2851 wm[5] = (val >> GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT) &
2852 GEN9_MEM_LATENCY_LEVEL_MASK;
2853 wm[6] = (val >> GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT) &
2854 GEN9_MEM_LATENCY_LEVEL_MASK;
2855 wm[7] = (val >> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT) &
2856 GEN9_MEM_LATENCY_LEVEL_MASK;
2857
0727e40a
PZ
2858 /*
2859 * If a level n (n > 1) has a 0us latency, all levels m (m >= n)
2860 * need to be disabled. We make sure to sanitize the values out
2861 * of the punit to satisfy this requirement.
2862 */
2863 for (level = 1; level <= max_level; level++) {
2864 if (wm[level] == 0) {
2865 for (i = level + 1; i <= max_level; i++)
2866 wm[i] = 0;
2867 break;
2868 }
2869 }
2870
367294be 2871 /*
50682ee6 2872 * WaWmMemoryReadLatency:skl+,glk
6f97235b 2873 *
367294be 2874 * punit doesn't take into account the read latency so we need
0727e40a
PZ
2875 * to add 2us to the various latency levels we retrieve from the
2876 * punit when level 0 response data us 0us.
367294be 2877 */
0727e40a
PZ
2878 if (wm[0] == 0) {
2879 wm[0] += 2;
2880 for (level = 1; level <= max_level; level++) {
2881 if (wm[level] == 0)
2882 break;
367294be 2883 wm[level] += 2;
4f947386 2884 }
0727e40a
PZ
2885 }
2886
86b59287
MK
2887 /*
2888 * WA Level-0 adjustment for 16GB DIMMs: SKL+
2889 * If we could not get dimm info enable this WA to prevent from
2890 * any underrun. If not able to get Dimm info assume 16GB dimm
2891 * to avoid any underrun.
2892 */
5d6f36b2 2893 if (dev_priv->dram_info.is_16gb_dimm)
86b59287
MK
2894 wm[0] += 1;
2895
8652744b 2896 } else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) {
5ce9a649 2897 u64 sskpd = I915_READ64(MCH_SSKPD);
12b134df
VS
2898
2899 wm[0] = (sskpd >> 56) & 0xFF;
2900 if (wm[0] == 0)
2901 wm[0] = sskpd & 0xF;
e5d5019e
VS
2902 wm[1] = (sskpd >> 4) & 0xFF;
2903 wm[2] = (sskpd >> 12) & 0xFF;
2904 wm[3] = (sskpd >> 20) & 0x1FF;
2905 wm[4] = (sskpd >> 32) & 0x1FF;
bb726519 2906 } else if (INTEL_GEN(dev_priv) >= 6) {
5ce9a649 2907 u32 sskpd = I915_READ(MCH_SSKPD);
63cf9a13
VS
2908
2909 wm[0] = (sskpd >> SSKPD_WM0_SHIFT) & SSKPD_WM_MASK;
2910 wm[1] = (sskpd >> SSKPD_WM1_SHIFT) & SSKPD_WM_MASK;
2911 wm[2] = (sskpd >> SSKPD_WM2_SHIFT) & SSKPD_WM_MASK;
2912 wm[3] = (sskpd >> SSKPD_WM3_SHIFT) & SSKPD_WM_MASK;
bb726519 2913 } else if (INTEL_GEN(dev_priv) >= 5) {
5ce9a649 2914 u32 mltr = I915_READ(MLTR_ILK);
3a88d0ac
VS
2915
2916 /* ILK primary LP0 latency is 700 ns */
2917 wm[0] = 7;
2918 wm[1] = (mltr >> MLTR_WM1_SHIFT) & ILK_SRLT_MASK;
2919 wm[2] = (mltr >> MLTR_WM2_SHIFT) & ILK_SRLT_MASK;
50682ee6
PZ
2920 } else {
2921 MISSING_CASE(INTEL_DEVID(dev_priv));
12b134df
VS
2922 }
2923}
2924
5db94019 2925static void intel_fixup_spr_wm_latency(struct drm_i915_private *dev_priv,
5ce9a649 2926 u16 wm[5])
53615a5e
VS
2927{
2928 /* ILK sprite LP0 latency is 1300 ns */
cf819eff 2929 if (IS_GEN(dev_priv, 5))
53615a5e
VS
2930 wm[0] = 13;
2931}
2932
fd6b8f43 2933static void intel_fixup_cur_wm_latency(struct drm_i915_private *dev_priv,
5ce9a649 2934 u16 wm[5])
53615a5e
VS
2935{
2936 /* ILK cursor LP0 latency is 1300 ns */
cf819eff 2937 if (IS_GEN(dev_priv, 5))
53615a5e 2938 wm[0] = 13;
53615a5e
VS
2939}
2940
5db94019 2941int ilk_wm_max_level(const struct drm_i915_private *dev_priv)
26ec971e 2942{
26ec971e 2943 /* how many WM levels are we expecting */
8652744b 2944 if (INTEL_GEN(dev_priv) >= 9)
2af30a5c 2945 return 7;
8652744b 2946 else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
ad0d6dc4 2947 return 4;
8652744b 2948 else if (INTEL_GEN(dev_priv) >= 6)
ad0d6dc4 2949 return 3;
26ec971e 2950 else
ad0d6dc4
VS
2951 return 2;
2952}
7526ed79 2953
5db94019 2954static void intel_print_wm_latency(struct drm_i915_private *dev_priv,
ad0d6dc4 2955 const char *name,
5ce9a649 2956 const u16 wm[8])
ad0d6dc4 2957{
5db94019 2958 int level, max_level = ilk_wm_max_level(dev_priv);
26ec971e
VS
2959
2960 for (level = 0; level <= max_level; level++) {
2961 unsigned int latency = wm[level];
2962
2963 if (latency == 0) {
86c1c87d
CW
2964 DRM_DEBUG_KMS("%s WM%d latency not provided\n",
2965 name, level);
26ec971e
VS
2966 continue;
2967 }
2968
2af30a5c
PB
2969 /*
2970 * - latencies are in us on gen9.
2971 * - before then, WM1+ latency values are in 0.5us units
2972 */
dfc267ab 2973 if (INTEL_GEN(dev_priv) >= 9)
2af30a5c
PB
2974 latency *= 10;
2975 else if (level > 0)
26ec971e
VS
2976 latency *= 5;
2977
2978 DRM_DEBUG_KMS("%s WM%d latency %u (%u.%u usec)\n",
2979 name, level, wm[level],
2980 latency / 10, latency % 10);
2981 }
2982}
2983
e95a2f75 2984static bool ilk_increase_wm_latency(struct drm_i915_private *dev_priv,
5ce9a649 2985 u16 wm[5], u16 min)
e95a2f75 2986{
5db94019 2987 int level, max_level = ilk_wm_max_level(dev_priv);
e95a2f75
VS
2988
2989 if (wm[0] >= min)
2990 return false;
2991
2992 wm[0] = max(wm[0], min);
2993 for (level = 1; level <= max_level; level++)
5ce9a649 2994 wm[level] = max_t(u16, wm[level], DIV_ROUND_UP(min, 5));
e95a2f75
VS
2995
2996 return true;
2997}
2998
bb726519 2999static void snb_wm_latency_quirk(struct drm_i915_private *dev_priv)
e95a2f75 3000{
e95a2f75
VS
3001 bool changed;
3002
3003 /*
3004 * The BIOS provided WM memory latency values are often
3005 * inadequate for high resolution displays. Adjust them.
3006 */
3007 changed = ilk_increase_wm_latency(dev_priv, dev_priv->wm.pri_latency, 12) |
3008 ilk_increase_wm_latency(dev_priv, dev_priv->wm.spr_latency, 12) |
3009 ilk_increase_wm_latency(dev_priv, dev_priv->wm.cur_latency, 12);
3010
3011 if (!changed)
3012 return;
3013
3014 DRM_DEBUG_KMS("WM latency values increased to avoid potential underruns\n");
5db94019
TU
3015 intel_print_wm_latency(dev_priv, "Primary", dev_priv->wm.pri_latency);
3016 intel_print_wm_latency(dev_priv, "Sprite", dev_priv->wm.spr_latency);
3017 intel_print_wm_latency(dev_priv, "Cursor", dev_priv->wm.cur_latency);
e95a2f75
VS
3018}
3019
03981c6e
VS
3020static void snb_wm_lp3_irq_quirk(struct drm_i915_private *dev_priv)
3021{
3022 /*
3023 * On some SNB machines (Thinkpad X220 Tablet at least)
3024 * LP3 usage can cause vblank interrupts to be lost.
3025 * The DEIIR bit will go high but it looks like the CPU
3026 * never gets interrupted.
3027 *
3028 * It's not clear whether other interrupt source could
3029 * be affected or if this is somehow limited to vblank
3030 * interrupts only. To play it safe we disable LP3
3031 * watermarks entirely.
3032 */
3033 if (dev_priv->wm.pri_latency[3] == 0 &&
3034 dev_priv->wm.spr_latency[3] == 0 &&
3035 dev_priv->wm.cur_latency[3] == 0)
3036 return;
3037
3038 dev_priv->wm.pri_latency[3] = 0;
3039 dev_priv->wm.spr_latency[3] = 0;
3040 dev_priv->wm.cur_latency[3] = 0;
3041
3042 DRM_DEBUG_KMS("LP3 watermarks disabled due to potential for lost interrupts\n");
3043 intel_print_wm_latency(dev_priv, "Primary", dev_priv->wm.pri_latency);
3044 intel_print_wm_latency(dev_priv, "Sprite", dev_priv->wm.spr_latency);
3045 intel_print_wm_latency(dev_priv, "Cursor", dev_priv->wm.cur_latency);
3046}
3047
bb726519 3048static void ilk_setup_wm_latency(struct drm_i915_private *dev_priv)
53615a5e 3049{
bb726519 3050 intel_read_wm_latency(dev_priv, dev_priv->wm.pri_latency);
53615a5e
VS
3051
3052 memcpy(dev_priv->wm.spr_latency, dev_priv->wm.pri_latency,
3053 sizeof(dev_priv->wm.pri_latency));
3054 memcpy(dev_priv->wm.cur_latency, dev_priv->wm.pri_latency,
3055 sizeof(dev_priv->wm.pri_latency));
3056
5db94019 3057 intel_fixup_spr_wm_latency(dev_priv, dev_priv->wm.spr_latency);
fd6b8f43 3058 intel_fixup_cur_wm_latency(dev_priv, dev_priv->wm.cur_latency);
26ec971e 3059
5db94019
TU
3060 intel_print_wm_latency(dev_priv, "Primary", dev_priv->wm.pri_latency);
3061 intel_print_wm_latency(dev_priv, "Sprite", dev_priv->wm.spr_latency);
3062 intel_print_wm_latency(dev_priv, "Cursor", dev_priv->wm.cur_latency);
e95a2f75 3063
cf819eff 3064 if (IS_GEN(dev_priv, 6)) {
bb726519 3065 snb_wm_latency_quirk(dev_priv);
03981c6e
VS
3066 snb_wm_lp3_irq_quirk(dev_priv);
3067 }
53615a5e
VS
3068}
3069
bb726519 3070static void skl_setup_wm_latency(struct drm_i915_private *dev_priv)
2af30a5c 3071{
bb726519 3072 intel_read_wm_latency(dev_priv, dev_priv->wm.skl_latency);
5db94019 3073 intel_print_wm_latency(dev_priv, "Gen9 Plane", dev_priv->wm.skl_latency);
2af30a5c
PB
3074}
3075
cd1d3ee9 3076static bool ilk_validate_pipe_wm(const struct drm_i915_private *dev_priv,
ed4a6a7c
MR
3077 struct intel_pipe_wm *pipe_wm)
3078{
3079 /* LP0 watermark maximums depend on this pipe alone */
3080 const struct intel_wm_config config = {
3081 .num_pipes_active = 1,
3082 .sprites_enabled = pipe_wm->sprites_enabled,
3083 .sprites_scaled = pipe_wm->sprites_scaled,
3084 };
3085 struct ilk_wm_maximums max;
3086
3087 /* LP0 watermarks always use 1/2 DDB partitioning */
cd1d3ee9 3088 ilk_compute_wm_maximums(dev_priv, 0, &config, INTEL_DDB_PART_1_2, &max);
ed4a6a7c
MR
3089
3090 /* At least LP0 must be valid */
3091 if (!ilk_validate_wm_level(0, &max, &pipe_wm->wm[0])) {
3092 DRM_DEBUG_KMS("LP0 watermark invalid\n");
3093 return false;
3094 }
3095
3096 return true;
3097}
3098
0b2ae6d7 3099/* Compute new watermarks for the pipe */
e3bddded 3100static int ilk_compute_pipe_wm(struct intel_crtc_state *cstate)
0b2ae6d7 3101{
e3bddded
ML
3102 struct drm_atomic_state *state = cstate->base.state;
3103 struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
86c8bbbe 3104 struct intel_pipe_wm *pipe_wm;
e3bddded 3105 struct drm_device *dev = state->dev;
fac5e23e 3106 const struct drm_i915_private *dev_priv = to_i915(dev);
28283f4f
ML
3107 struct drm_plane *plane;
3108 const struct drm_plane_state *plane_state;
3109 const struct intel_plane_state *pristate = NULL;
3110 const struct intel_plane_state *sprstate = NULL;
3111 const struct intel_plane_state *curstate = NULL;
5db94019 3112 int level, max_level = ilk_wm_max_level(dev_priv), usable_level;
820c1980 3113 struct ilk_wm_maximums max;
0b2ae6d7 3114
e8f1f02e 3115 pipe_wm = &cstate->wm.ilk.optimal;
86c8bbbe 3116
28283f4f
ML
3117 drm_atomic_crtc_state_for_each_plane_state(plane, plane_state, &cstate->base) {
3118 const struct intel_plane_state *ps = to_intel_plane_state(plane_state);
e3bddded 3119
28283f4f 3120 if (plane->type == DRM_PLANE_TYPE_PRIMARY)
e3bddded 3121 pristate = ps;
28283f4f 3122 else if (plane->type == DRM_PLANE_TYPE_OVERLAY)
e3bddded 3123 sprstate = ps;
28283f4f 3124 else if (plane->type == DRM_PLANE_TYPE_CURSOR)
e3bddded 3125 curstate = ps;
43d59eda
MR
3126 }
3127
ed4a6a7c 3128 pipe_wm->pipe_enabled = cstate->base.active;
e3bddded 3129 if (sprstate) {
936e71e3
VS
3130 pipe_wm->sprites_enabled = sprstate->base.visible;
3131 pipe_wm->sprites_scaled = sprstate->base.visible &&
3132 (drm_rect_width(&sprstate->base.dst) != drm_rect_width(&sprstate->base.src) >> 16 ||
3133 drm_rect_height(&sprstate->base.dst) != drm_rect_height(&sprstate->base.src) >> 16);
e3bddded
ML
3134 }
3135
d81f04c5
ML
3136 usable_level = max_level;
3137
7b39a0b7 3138 /* ILK/SNB: LP2+ watermarks only w/o sprites */
175fded1 3139 if (INTEL_GEN(dev_priv) <= 6 && pipe_wm->sprites_enabled)
d81f04c5 3140 usable_level = 1;
7b39a0b7
VS
3141
3142 /* ILK/SNB/IVB: LP1+ watermarks only w/o scaling */
ed4a6a7c 3143 if (pipe_wm->sprites_scaled)
d81f04c5 3144 usable_level = 0;
7b39a0b7 3145
71f0a626 3146 memset(&pipe_wm->wm, 0, sizeof(pipe_wm->wm));
28283f4f
ML
3147 ilk_compute_wm_level(dev_priv, intel_crtc, 0, cstate,
3148 pristate, sprstate, curstate, &pipe_wm->wm[0]);
0b2ae6d7 3149
8652744b 3150 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
532f7a7f 3151 pipe_wm->linetime = hsw_compute_linetime_wm(cstate);
0b2ae6d7 3152
cd1d3ee9 3153 if (!ilk_validate_pipe_wm(dev_priv, pipe_wm))
1a426d61 3154 return -EINVAL;
a3cb4048 3155
175fded1 3156 ilk_compute_wm_reg_maximums(dev_priv, 1, &max);
a3cb4048 3157
28283f4f
ML
3158 for (level = 1; level <= usable_level; level++) {
3159 struct intel_wm_level *wm = &pipe_wm->wm[level];
a3cb4048 3160
86c8bbbe 3161 ilk_compute_wm_level(dev_priv, intel_crtc, level, cstate,
d81f04c5 3162 pristate, sprstate, curstate, wm);
a3cb4048
VS
3163
3164 /*
3165 * Disable any watermark level that exceeds the
3166 * register maximums since such watermarks are
3167 * always invalid.
3168 */
28283f4f
ML
3169 if (!ilk_validate_wm_level(level, &max, wm)) {
3170 memset(wm, 0, sizeof(*wm));
3171 break;
3172 }
a3cb4048
VS
3173 }
3174
86c8bbbe 3175 return 0;
0b2ae6d7
VS
3176}
3177
ed4a6a7c
MR
3178/*
3179 * Build a set of 'intermediate' watermark values that satisfy both the old
3180 * state and the new state. These can be programmed to the hardware
3181 * immediately.
3182 */
cd1d3ee9 3183static int ilk_compute_intermediate_wm(struct intel_crtc_state *newstate)
ed4a6a7c 3184{
cd1d3ee9
MR
3185 struct intel_crtc *intel_crtc = to_intel_crtc(newstate->base.crtc);
3186 struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev);
e8f1f02e 3187 struct intel_pipe_wm *a = &newstate->wm.ilk.intermediate;
b6b178a7
ML
3188 struct intel_atomic_state *intel_state =
3189 to_intel_atomic_state(newstate->base.state);
3190 const struct intel_crtc_state *oldstate =
3191 intel_atomic_get_old_crtc_state(intel_state, intel_crtc);
3192 const struct intel_pipe_wm *b = &oldstate->wm.ilk.optimal;
cd1d3ee9 3193 int level, max_level = ilk_wm_max_level(dev_priv);
ed4a6a7c
MR
3194
3195 /*
3196 * Start with the final, target watermarks, then combine with the
3197 * currently active watermarks to get values that are safe both before
3198 * and after the vblank.
3199 */
e8f1f02e 3200 *a = newstate->wm.ilk.optimal;
f255c624
VS
3201 if (!newstate->base.active || drm_atomic_crtc_needs_modeset(&newstate->base) ||
3202 intel_state->skip_intermediate_wm)
b6b178a7
ML
3203 return 0;
3204
ed4a6a7c
MR
3205 a->pipe_enabled |= b->pipe_enabled;
3206 a->sprites_enabled |= b->sprites_enabled;
3207 a->sprites_scaled |= b->sprites_scaled;
3208
3209 for (level = 0; level <= max_level; level++) {
3210 struct intel_wm_level *a_wm = &a->wm[level];
3211 const struct intel_wm_level *b_wm = &b->wm[level];
3212
3213 a_wm->enable &= b_wm->enable;
3214 a_wm->pri_val = max(a_wm->pri_val, b_wm->pri_val);
3215 a_wm->spr_val = max(a_wm->spr_val, b_wm->spr_val);
3216 a_wm->cur_val = max(a_wm->cur_val, b_wm->cur_val);
3217 a_wm->fbc_val = max(a_wm->fbc_val, b_wm->fbc_val);
3218 }
3219
3220 /*
3221 * We need to make sure that these merged watermark values are
3222 * actually a valid configuration themselves. If they're not,
3223 * there's no safe way to transition from the old state to
3224 * the new state, so we need to fail the atomic transaction.
3225 */
cd1d3ee9 3226 if (!ilk_validate_pipe_wm(dev_priv, a))
ed4a6a7c
MR
3227 return -EINVAL;
3228
3229 /*
3230 * If our intermediate WM are identical to the final WM, then we can
3231 * omit the post-vblank programming; only update if it's different.
3232 */
5eeb798b
VS
3233 if (memcmp(a, &newstate->wm.ilk.optimal, sizeof(*a)) != 0)
3234 newstate->wm.need_postvbl_update = true;
ed4a6a7c
MR
3235
3236 return 0;
3237}
3238
0b2ae6d7
VS
3239/*
3240 * Merge the watermarks from all active pipes for a specific level.
3241 */
cd1d3ee9 3242static void ilk_merge_wm_level(struct drm_i915_private *dev_priv,
0b2ae6d7
VS
3243 int level,
3244 struct intel_wm_level *ret_wm)
3245{
3246 const struct intel_crtc *intel_crtc;
3247
d52fea5b
VS
3248 ret_wm->enable = true;
3249
cd1d3ee9 3250 for_each_intel_crtc(&dev_priv->drm, intel_crtc) {
ed4a6a7c 3251 const struct intel_pipe_wm *active = &intel_crtc->wm.active.ilk;
fe392efd
VS
3252 const struct intel_wm_level *wm = &active->wm[level];
3253
3254 if (!active->pipe_enabled)
3255 continue;
0b2ae6d7 3256
d52fea5b
VS
3257 /*
3258 * The watermark values may have been used in the past,
3259 * so we must maintain them in the registers for some
3260 * time even if the level is now disabled.
3261 */
0b2ae6d7 3262 if (!wm->enable)
d52fea5b 3263 ret_wm->enable = false;
0b2ae6d7
VS
3264
3265 ret_wm->pri_val = max(ret_wm->pri_val, wm->pri_val);
3266 ret_wm->spr_val = max(ret_wm->spr_val, wm->spr_val);
3267 ret_wm->cur_val = max(ret_wm->cur_val, wm->cur_val);
3268 ret_wm->fbc_val = max(ret_wm->fbc_val, wm->fbc_val);
3269 }
0b2ae6d7
VS
3270}
3271
3272/*
3273 * Merge all low power watermarks for all active pipes.
3274 */
cd1d3ee9 3275static void ilk_wm_merge(struct drm_i915_private *dev_priv,
0ba22e26 3276 const struct intel_wm_config *config,
820c1980 3277 const struct ilk_wm_maximums *max,
0b2ae6d7
VS
3278 struct intel_pipe_wm *merged)
3279{
5db94019 3280 int level, max_level = ilk_wm_max_level(dev_priv);
d52fea5b 3281 int last_enabled_level = max_level;
0b2ae6d7 3282
0ba22e26 3283 /* ILK/SNB/IVB: LP1+ watermarks only w/ single pipe */
fd6b8f43 3284 if ((INTEL_GEN(dev_priv) <= 6 || IS_IVYBRIDGE(dev_priv)) &&
0ba22e26 3285 config->num_pipes_active > 1)
1204d5ba 3286 last_enabled_level = 0;
0ba22e26 3287
6c8b6c28 3288 /* ILK: FBC WM must be disabled always */
175fded1 3289 merged->fbc_wm_enabled = INTEL_GEN(dev_priv) >= 6;
0b2ae6d7
VS
3290
3291 /* merge each WM1+ level */
3292 for (level = 1; level <= max_level; level++) {
3293 struct intel_wm_level *wm = &merged->wm[level];
3294
cd1d3ee9 3295 ilk_merge_wm_level(dev_priv, level, wm);
0b2ae6d7 3296
d52fea5b
VS
3297 if (level > last_enabled_level)
3298 wm->enable = false;
3299 else if (!ilk_validate_wm_level(level, max, wm))
3300 /* make sure all following levels get disabled */
3301 last_enabled_level = level - 1;
0b2ae6d7
VS
3302
3303 /*
3304 * The spec says it is preferred to disable
3305 * FBC WMs instead of disabling a WM level.
3306 */
3307 if (wm->fbc_val > max->fbc) {
d52fea5b
VS
3308 if (wm->enable)
3309 merged->fbc_wm_enabled = false;
0b2ae6d7
VS
3310 wm->fbc_val = 0;
3311 }
3312 }
6c8b6c28
VS
3313
3314 /* ILK: LP2+ must be disabled when FBC WM is disabled but FBC enabled */
3315 /*
3316 * FIXME this is racy. FBC might get enabled later.
3317 * What we should check here is whether FBC can be
3318 * enabled sometime later.
3319 */
cf819eff 3320 if (IS_GEN(dev_priv, 5) && !merged->fbc_wm_enabled &&
0e631adc 3321 intel_fbc_is_active(dev_priv)) {
6c8b6c28
VS
3322 for (level = 2; level <= max_level; level++) {
3323 struct intel_wm_level *wm = &merged->wm[level];
3324
3325 wm->enable = false;
3326 }
3327 }
0b2ae6d7
VS
3328}
3329
b380ca3c
VS
3330static int ilk_wm_lp_to_level(int wm_lp, const struct intel_pipe_wm *pipe_wm)
3331{
3332 /* LP1,LP2,LP3 levels are either 1,2,3 or 1,3,4 */
3333 return wm_lp + (wm_lp >= 2 && pipe_wm->wm[4].enable);
3334}
3335
a68d68ee 3336/* The value we need to program into the WM_LPx latency field */
cd1d3ee9
MR
3337static unsigned int ilk_wm_lp_latency(struct drm_i915_private *dev_priv,
3338 int level)
a68d68ee 3339{
8652744b 3340 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
a68d68ee
VS
3341 return 2 * level;
3342 else
3343 return dev_priv->wm.pri_latency[level];
3344}
3345
cd1d3ee9 3346static void ilk_compute_wm_results(struct drm_i915_private *dev_priv,
0362c781 3347 const struct intel_pipe_wm *merged,
609cedef 3348 enum intel_ddb_partitioning partitioning,
820c1980 3349 struct ilk_wm_values *results)
801bcfff 3350{
0b2ae6d7
VS
3351 struct intel_crtc *intel_crtc;
3352 int level, wm_lp;
cca32e9a 3353
0362c781 3354 results->enable_fbc_wm = merged->fbc_wm_enabled;
609cedef 3355 results->partitioning = partitioning;
cca32e9a 3356
0b2ae6d7 3357 /* LP1+ register values */
cca32e9a 3358 for (wm_lp = 1; wm_lp <= 3; wm_lp++) {
1fd527cc 3359 const struct intel_wm_level *r;
801bcfff 3360
b380ca3c 3361 level = ilk_wm_lp_to_level(wm_lp, merged);
0b2ae6d7 3362
0362c781 3363 r = &merged->wm[level];
cca32e9a 3364
d52fea5b
VS
3365 /*
3366 * Maintain the watermark values even if the level is
3367 * disabled. Doing otherwise could cause underruns.
3368 */
3369 results->wm_lp[wm_lp - 1] =
cd1d3ee9 3370 (ilk_wm_lp_latency(dev_priv, level) << WM1_LP_LATENCY_SHIFT) |
416f4727
VS
3371 (r->pri_val << WM1_LP_SR_SHIFT) |
3372 r->cur_val;
3373
d52fea5b
VS
3374 if (r->enable)
3375 results->wm_lp[wm_lp - 1] |= WM1_LP_SR_EN;
3376
175fded1 3377 if (INTEL_GEN(dev_priv) >= 8)
416f4727
VS
3378 results->wm_lp[wm_lp - 1] |=
3379 r->fbc_val << WM1_LP_FBC_SHIFT_BDW;
3380 else
3381 results->wm_lp[wm_lp - 1] |=
3382 r->fbc_val << WM1_LP_FBC_SHIFT;
3383
d52fea5b
VS
3384 /*
3385 * Always set WM1S_LP_EN when spr_val != 0, even if the
3386 * level is disabled. Doing otherwise could cause underruns.
3387 */
175fded1 3388 if (INTEL_GEN(dev_priv) <= 6 && r->spr_val) {
6cef2b8a
VS
3389 WARN_ON(wm_lp != 1);
3390 results->wm_lp_spr[wm_lp - 1] = WM1S_LP_EN | r->spr_val;
3391 } else
3392 results->wm_lp_spr[wm_lp - 1] = r->spr_val;
cca32e9a 3393 }
801bcfff 3394
0b2ae6d7 3395 /* LP0 register values */
cd1d3ee9 3396 for_each_intel_crtc(&dev_priv->drm, intel_crtc) {
0b2ae6d7 3397 enum pipe pipe = intel_crtc->pipe;
ed4a6a7c
MR
3398 const struct intel_wm_level *r =
3399 &intel_crtc->wm.active.ilk.wm[0];
0b2ae6d7
VS
3400
3401 if (WARN_ON(!r->enable))
3402 continue;
3403
ed4a6a7c 3404 results->wm_linetime[pipe] = intel_crtc->wm.active.ilk.linetime;
1011d8c4 3405
0b2ae6d7
VS
3406 results->wm_pipe[pipe] =
3407 (r->pri_val << WM0_PIPE_PLANE_SHIFT) |
3408 (r->spr_val << WM0_PIPE_SPRITE_SHIFT) |
3409 r->cur_val;
801bcfff
PZ
3410 }
3411}
3412
861f3389
PZ
3413/* Find the result with the highest level enabled. Check for enable_fbc_wm in
3414 * case both are at the same level. Prefer r1 in case they're the same. */
cd1d3ee9
MR
3415static struct intel_pipe_wm *
3416ilk_find_best_result(struct drm_i915_private *dev_priv,
3417 struct intel_pipe_wm *r1,
3418 struct intel_pipe_wm *r2)
861f3389 3419{
cd1d3ee9 3420 int level, max_level = ilk_wm_max_level(dev_priv);
198a1e9b 3421 int level1 = 0, level2 = 0;
861f3389 3422
198a1e9b
VS
3423 for (level = 1; level <= max_level; level++) {
3424 if (r1->wm[level].enable)
3425 level1 = level;
3426 if (r2->wm[level].enable)
3427 level2 = level;
861f3389
PZ
3428 }
3429
198a1e9b
VS
3430 if (level1 == level2) {
3431 if (r2->fbc_wm_enabled && !r1->fbc_wm_enabled)
861f3389
PZ
3432 return r2;
3433 else
3434 return r1;
198a1e9b 3435 } else if (level1 > level2) {
861f3389
PZ
3436 return r1;
3437 } else {
3438 return r2;
3439 }
3440}
3441
49a687c4
VS
3442/* dirty bits used to track which watermarks need changes */
3443#define WM_DIRTY_PIPE(pipe) (1 << (pipe))
3444#define WM_DIRTY_LINETIME(pipe) (1 << (8 + (pipe)))
3445#define WM_DIRTY_LP(wm_lp) (1 << (15 + (wm_lp)))
3446#define WM_DIRTY_LP_ALL (WM_DIRTY_LP(1) | WM_DIRTY_LP(2) | WM_DIRTY_LP(3))
3447#define WM_DIRTY_FBC (1 << 24)
3448#define WM_DIRTY_DDB (1 << 25)
3449
055e393f 3450static unsigned int ilk_compute_wm_dirty(struct drm_i915_private *dev_priv,
820c1980
ID
3451 const struct ilk_wm_values *old,
3452 const struct ilk_wm_values *new)
49a687c4
VS
3453{
3454 unsigned int dirty = 0;
3455 enum pipe pipe;
3456 int wm_lp;
3457
055e393f 3458 for_each_pipe(dev_priv, pipe) {
49a687c4
VS
3459 if (old->wm_linetime[pipe] != new->wm_linetime[pipe]) {
3460 dirty |= WM_DIRTY_LINETIME(pipe);
3461 /* Must disable LP1+ watermarks too */
3462 dirty |= WM_DIRTY_LP_ALL;
3463 }
3464
3465 if (old->wm_pipe[pipe] != new->wm_pipe[pipe]) {
3466 dirty |= WM_DIRTY_PIPE(pipe);
3467 /* Must disable LP1+ watermarks too */
3468 dirty |= WM_DIRTY_LP_ALL;
3469 }
3470 }
3471
3472 if (old->enable_fbc_wm != new->enable_fbc_wm) {
3473 dirty |= WM_DIRTY_FBC;
3474 /* Must disable LP1+ watermarks too */
3475 dirty |= WM_DIRTY_LP_ALL;
3476 }
3477
3478 if (old->partitioning != new->partitioning) {
3479 dirty |= WM_DIRTY_DDB;
3480 /* Must disable LP1+ watermarks too */
3481 dirty |= WM_DIRTY_LP_ALL;
3482 }
3483
3484 /* LP1+ watermarks already deemed dirty, no need to continue */
3485 if (dirty & WM_DIRTY_LP_ALL)
3486 return dirty;
3487
3488 /* Find the lowest numbered LP1+ watermark in need of an update... */
3489 for (wm_lp = 1; wm_lp <= 3; wm_lp++) {
3490 if (old->wm_lp[wm_lp - 1] != new->wm_lp[wm_lp - 1] ||
3491 old->wm_lp_spr[wm_lp - 1] != new->wm_lp_spr[wm_lp - 1])
3492 break;
3493 }
3494
3495 /* ...and mark it and all higher numbered LP1+ watermarks as dirty */
3496 for (; wm_lp <= 3; wm_lp++)
3497 dirty |= WM_DIRTY_LP(wm_lp);
3498
3499 return dirty;
3500}
3501
8553c18e
VS
3502static bool _ilk_disable_lp_wm(struct drm_i915_private *dev_priv,
3503 unsigned int dirty)
801bcfff 3504{
820c1980 3505 struct ilk_wm_values *previous = &dev_priv->wm.hw;
8553c18e 3506 bool changed = false;
801bcfff 3507
facd619b
VS
3508 if (dirty & WM_DIRTY_LP(3) && previous->wm_lp[2] & WM1_LP_SR_EN) {
3509 previous->wm_lp[2] &= ~WM1_LP_SR_EN;
3510 I915_WRITE(WM3_LP_ILK, previous->wm_lp[2]);
8553c18e 3511 changed = true;
facd619b
VS
3512 }
3513 if (dirty & WM_DIRTY_LP(2) && previous->wm_lp[1] & WM1_LP_SR_EN) {
3514 previous->wm_lp[1] &= ~WM1_LP_SR_EN;
3515 I915_WRITE(WM2_LP_ILK, previous->wm_lp[1]);
8553c18e 3516 changed = true;
facd619b
VS
3517 }
3518 if (dirty & WM_DIRTY_LP(1) && previous->wm_lp[0] & WM1_LP_SR_EN) {
3519 previous->wm_lp[0] &= ~WM1_LP_SR_EN;
3520 I915_WRITE(WM1_LP_ILK, previous->wm_lp[0]);
8553c18e 3521 changed = true;
facd619b 3522 }
801bcfff 3523
facd619b
VS
3524 /*
3525 * Don't touch WM1S_LP_EN here.
3526 * Doing so could cause underruns.
3527 */
6cef2b8a 3528
8553c18e
VS
3529 return changed;
3530}
3531
3532/*
3533 * The spec says we shouldn't write when we don't need, because every write
3534 * causes WMs to be re-evaluated, expending some power.
3535 */
820c1980
ID
3536static void ilk_write_wm_values(struct drm_i915_private *dev_priv,
3537 struct ilk_wm_values *results)
8553c18e 3538{
820c1980 3539 struct ilk_wm_values *previous = &dev_priv->wm.hw;
8553c18e 3540 unsigned int dirty;
5ce9a649 3541 u32 val;
8553c18e 3542
055e393f 3543 dirty = ilk_compute_wm_dirty(dev_priv, previous, results);
8553c18e
VS
3544 if (!dirty)
3545 return;
3546
3547 _ilk_disable_lp_wm(dev_priv, dirty);
3548
49a687c4 3549 if (dirty & WM_DIRTY_PIPE(PIPE_A))
801bcfff 3550 I915_WRITE(WM0_PIPEA_ILK, results->wm_pipe[0]);
49a687c4 3551 if (dirty & WM_DIRTY_PIPE(PIPE_B))
801bcfff 3552 I915_WRITE(WM0_PIPEB_ILK, results->wm_pipe[1]);
49a687c4 3553 if (dirty & WM_DIRTY_PIPE(PIPE_C))
801bcfff
PZ
3554 I915_WRITE(WM0_PIPEC_IVB, results->wm_pipe[2]);
3555
49a687c4 3556 if (dirty & WM_DIRTY_LINETIME(PIPE_A))
801bcfff 3557 I915_WRITE(PIPE_WM_LINETIME(PIPE_A), results->wm_linetime[0]);
49a687c4 3558 if (dirty & WM_DIRTY_LINETIME(PIPE_B))
801bcfff 3559 I915_WRITE(PIPE_WM_LINETIME(PIPE_B), results->wm_linetime[1]);
49a687c4 3560 if (dirty & WM_DIRTY_LINETIME(PIPE_C))
801bcfff
PZ
3561 I915_WRITE(PIPE_WM_LINETIME(PIPE_C), results->wm_linetime[2]);
3562
49a687c4 3563 if (dirty & WM_DIRTY_DDB) {
8652744b 3564 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) {
ac9545fd
VS
3565 val = I915_READ(WM_MISC);
3566 if (results->partitioning == INTEL_DDB_PART_1_2)
3567 val &= ~WM_MISC_DATA_PARTITION_5_6;
3568 else
3569 val |= WM_MISC_DATA_PARTITION_5_6;
3570 I915_WRITE(WM_MISC, val);
3571 } else {
3572 val = I915_READ(DISP_ARB_CTL2);
3573 if (results->partitioning == INTEL_DDB_PART_1_2)
3574 val &= ~DISP_DATA_PARTITION_5_6;
3575 else
3576 val |= DISP_DATA_PARTITION_5_6;
3577 I915_WRITE(DISP_ARB_CTL2, val);
3578 }
1011d8c4
PZ
3579 }
3580
49a687c4 3581 if (dirty & WM_DIRTY_FBC) {
cca32e9a
PZ
3582 val = I915_READ(DISP_ARB_CTL);
3583 if (results->enable_fbc_wm)
3584 val &= ~DISP_FBC_WM_DIS;
3585 else
3586 val |= DISP_FBC_WM_DIS;
3587 I915_WRITE(DISP_ARB_CTL, val);
3588 }
3589
954911eb
ID
3590 if (dirty & WM_DIRTY_LP(1) &&
3591 previous->wm_lp_spr[0] != results->wm_lp_spr[0])
3592 I915_WRITE(WM1S_LP_ILK, results->wm_lp_spr[0]);
3593
175fded1 3594 if (INTEL_GEN(dev_priv) >= 7) {
6cef2b8a
VS
3595 if (dirty & WM_DIRTY_LP(2) && previous->wm_lp_spr[1] != results->wm_lp_spr[1])
3596 I915_WRITE(WM2S_LP_IVB, results->wm_lp_spr[1]);
3597 if (dirty & WM_DIRTY_LP(3) && previous->wm_lp_spr[2] != results->wm_lp_spr[2])
3598 I915_WRITE(WM3S_LP_IVB, results->wm_lp_spr[2]);
3599 }
801bcfff 3600
facd619b 3601 if (dirty & WM_DIRTY_LP(1) && previous->wm_lp[0] != results->wm_lp[0])
801bcfff 3602 I915_WRITE(WM1_LP_ILK, results->wm_lp[0]);
facd619b 3603 if (dirty & WM_DIRTY_LP(2) && previous->wm_lp[1] != results->wm_lp[1])
801bcfff 3604 I915_WRITE(WM2_LP_ILK, results->wm_lp[1]);
facd619b 3605 if (dirty & WM_DIRTY_LP(3) && previous->wm_lp[2] != results->wm_lp[2])
801bcfff 3606 I915_WRITE(WM3_LP_ILK, results->wm_lp[2]);
609cedef
VS
3607
3608 dev_priv->wm.hw = *results;
801bcfff
PZ
3609}
3610
ed4a6a7c 3611bool ilk_disable_lp_wm(struct drm_device *dev)
8553c18e 3612{
fac5e23e 3613 struct drm_i915_private *dev_priv = to_i915(dev);
8553c18e
VS
3614
3615 return _ilk_disable_lp_wm(dev_priv, WM_DIRTY_LP_ALL);
3616}
3617
74bd8004
MK
3618static u8 intel_enabled_dbuf_slices_num(struct drm_i915_private *dev_priv)
3619{
3620 u8 enabled_slices;
3621
3622 /* Slice 1 will always be enabled */
3623 enabled_slices = 1;
3624
3625 /* Gen prior to GEN11 have only one DBuf slice */
3626 if (INTEL_GEN(dev_priv) < 11)
3627 return enabled_slices;
3628
209d7353
ID
3629 /*
3630 * FIXME: for now we'll only ever use 1 slice; pretend that we have
3631 * only that 1 slice enabled until we have a proper way for on-demand
3632 * toggling of the second slice.
3633 */
3634 if (0 && I915_READ(DBUF_CTL_S2) & DBUF_POWER_STATE)
74bd8004
MK
3635 enabled_slices++;
3636
3637 return enabled_slices;
3638}
3639
ee3d532f
PZ
3640/*
3641 * FIXME: We still don't have the proper code detect if we need to apply the WA,
3642 * so assume we'll always need it in order to avoid underruns.
3643 */
60e983ff 3644static bool skl_needs_memory_bw_wa(struct drm_i915_private *dev_priv)
ee3d532f 3645{
60e983ff 3646 return IS_GEN9_BC(dev_priv) || IS_BROXTON(dev_priv);
ee3d532f
PZ
3647}
3648
56feca91
PZ
3649static bool
3650intel_has_sagv(struct drm_i915_private *dev_priv)
3651{
1ca2b067
RV
3652 return (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) &&
3653 dev_priv->sagv_status != I915_SAGV_NOT_CONTROLLED;
56feca91
PZ
3654}
3655
656d1b89
L
3656/*
3657 * SAGV dynamically adjusts the system agent voltage and clock frequencies
3658 * depending on power and performance requirements. The display engine access
3659 * to system memory is blocked during the adjustment time. Because of the
3660 * blocking time, having this enabled can cause full system hangs and/or pipe
3661 * underruns if we don't meet all of the following requirements:
3662 *
3663 * - <= 1 pipe enabled
3664 * - All planes can enable watermarks for latencies >= SAGV engine block time
3665 * - We're not using an interlaced display configuration
3666 */
3667int
16dcdc4e 3668intel_enable_sagv(struct drm_i915_private *dev_priv)
656d1b89
L
3669{
3670 int ret;
3671
56feca91
PZ
3672 if (!intel_has_sagv(dev_priv))
3673 return 0;
3674
3675 if (dev_priv->sagv_status == I915_SAGV_ENABLED)
656d1b89
L
3676 return 0;
3677
ff61a974 3678 DRM_DEBUG_KMS("Enabling SAGV\n");
656d1b89
L
3679 ret = sandybridge_pcode_write(dev_priv, GEN9_PCODE_SAGV_CONTROL,
3680 GEN9_SAGV_ENABLE);
3681
ff61a974 3682 /* We don't need to wait for SAGV when enabling */
656d1b89
L
3683
3684 /*
3685 * Some skl systems, pre-release machines in particular,
ff61a974 3686 * don't actually have SAGV.
656d1b89 3687 */
6e3100ec 3688 if (IS_SKYLAKE(dev_priv) && ret == -ENXIO) {
656d1b89 3689 DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n");
16dcdc4e 3690 dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED;
656d1b89
L
3691 return 0;
3692 } else if (ret < 0) {
ff61a974 3693 DRM_ERROR("Failed to enable SAGV\n");
656d1b89
L
3694 return ret;
3695 }
3696
16dcdc4e 3697 dev_priv->sagv_status = I915_SAGV_ENABLED;
656d1b89
L
3698 return 0;
3699}
3700
656d1b89 3701int
16dcdc4e 3702intel_disable_sagv(struct drm_i915_private *dev_priv)
656d1b89 3703{
b3b8e999 3704 int ret;
656d1b89 3705
56feca91
PZ
3706 if (!intel_has_sagv(dev_priv))
3707 return 0;
3708
3709 if (dev_priv->sagv_status == I915_SAGV_DISABLED)
656d1b89
L
3710 return 0;
3711
ff61a974 3712 DRM_DEBUG_KMS("Disabling SAGV\n");
656d1b89 3713 /* bspec says to keep retrying for at least 1 ms */
b3b8e999
ID
3714 ret = skl_pcode_request(dev_priv, GEN9_PCODE_SAGV_CONTROL,
3715 GEN9_SAGV_DISABLE,
3716 GEN9_SAGV_IS_DISABLED, GEN9_SAGV_IS_DISABLED,
3717 1);
656d1b89
L
3718 /*
3719 * Some skl systems, pre-release machines in particular,
ff61a974 3720 * don't actually have SAGV.
656d1b89 3721 */
b3b8e999 3722 if (IS_SKYLAKE(dev_priv) && ret == -ENXIO) {
656d1b89 3723 DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n");
16dcdc4e 3724 dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED;
656d1b89 3725 return 0;
b3b8e999 3726 } else if (ret < 0) {
ff61a974 3727 DRM_ERROR("Failed to disable SAGV (%d)\n", ret);
b3b8e999 3728 return ret;
656d1b89
L
3729 }
3730
16dcdc4e 3731 dev_priv->sagv_status = I915_SAGV_DISABLED;
656d1b89
L
3732 return 0;
3733}
3734
16dcdc4e 3735bool intel_can_enable_sagv(struct drm_atomic_state *state)
656d1b89
L
3736{
3737 struct drm_device *dev = state->dev;
3738 struct drm_i915_private *dev_priv = to_i915(dev);
3739 struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
ee3d532f
PZ
3740 struct intel_crtc *crtc;
3741 struct intel_plane *plane;
d8c0fafc 3742 struct intel_crtc_state *cstate;
656d1b89 3743 enum pipe pipe;
d8c0fafc 3744 int level, latency;
4357ce07 3745 int sagv_block_time_us;
656d1b89 3746
56feca91
PZ
3747 if (!intel_has_sagv(dev_priv))
3748 return false;
3749
cf819eff 3750 if (IS_GEN(dev_priv, 9))
4357ce07 3751 sagv_block_time_us = 30;
cf819eff 3752 else if (IS_GEN(dev_priv, 10))
4357ce07
PZ
3753 sagv_block_time_us = 20;
3754 else
3755 sagv_block_time_us = 10;
3756
656d1b89 3757 /*
656d1b89
L
3758 * If there are no active CRTCs, no additional checks need be performed
3759 */
3760 if (hweight32(intel_state->active_crtcs) == 0)
3761 return true;
da17223e
LDM
3762
3763 /*
3764 * SKL+ workaround: bspec recommends we disable SAGV when we have
3765 * more then one pipe enabled
3766 */
3767 if (hweight32(intel_state->active_crtcs) > 1)
656d1b89
L
3768 return false;
3769
3770 /* Since we're now guaranteed to only have one active CRTC... */
3771 pipe = ffs(intel_state->active_crtcs) - 1;
98187836 3772 crtc = intel_get_crtc_for_pipe(dev_priv, pipe);
d8c0fafc 3773 cstate = to_intel_crtc_state(crtc->base.state);
656d1b89 3774
c89cadd5 3775 if (crtc->base.state->adjusted_mode.flags & DRM_MODE_FLAG_INTERLACE)
656d1b89
L
3776 return false;
3777
ee3d532f 3778 for_each_intel_plane_on_crtc(dev, crtc, plane) {
d5cdfdf5
VS
3779 struct skl_plane_wm *wm =
3780 &cstate->wm.skl.optimal.planes[plane->id];
ee3d532f 3781
656d1b89 3782 /* Skip this plane if it's not enabled */
d8c0fafc 3783 if (!wm->wm[0].plane_en)
656d1b89
L
3784 continue;
3785
3786 /* Find the highest enabled wm level for this plane */
5db94019 3787 for (level = ilk_wm_max_level(dev_priv);
d8c0fafc 3788 !wm->wm[level].plane_en; --level)
656d1b89
L
3789 { }
3790
ee3d532f
PZ
3791 latency = dev_priv->wm.skl_latency[level];
3792
60e983ff 3793 if (skl_needs_memory_bw_wa(dev_priv) &&
bae781b2 3794 plane->base.state->fb->modifier ==
ee3d532f
PZ
3795 I915_FORMAT_MOD_X_TILED)
3796 latency += 15;
3797
656d1b89 3798 /*
fdd11c2b
PZ
3799 * If any of the planes on this pipe don't enable wm levels that
3800 * incur memory latencies higher than sagv_block_time_us we
ff61a974 3801 * can't enable SAGV.
656d1b89 3802 */
fdd11c2b 3803 if (latency < sagv_block_time_us)
656d1b89
L
3804 return false;
3805 }
3806
3807 return true;
3808}
3809
aaa02378
MK
3810static u16 intel_get_ddb_size(struct drm_i915_private *dev_priv,
3811 const struct intel_crtc_state *cstate,
24719e94 3812 const u64 total_data_rate,
aaa02378
MK
3813 const int num_active,
3814 struct skl_ddb_allocation *ddb)
aa9664ff
MK
3815{
3816 const struct drm_display_mode *adjusted_mode;
3817 u64 total_data_bw;
3818 u16 ddb_size = INTEL_INFO(dev_priv)->ddb_size;
3819
3820 WARN_ON(ddb_size == 0);
3821
3822 if (INTEL_GEN(dev_priv) < 11)
3823 return ddb_size - 4; /* 4 blocks for bypass path allocation */
3824
3825 adjusted_mode = &cstate->base.adjusted_mode;
24719e94 3826 total_data_bw = total_data_rate * drm_mode_vrefresh(adjusted_mode);
aa9664ff
MK
3827
3828 /*
3829 * 12GB/s is maximum BW supported by single DBuf slice.
ad3e7b82
VS
3830 *
3831 * FIXME dbuf slice code is broken:
3832 * - must wait for planes to stop using the slice before powering it off
3833 * - plane straddling both slices is illegal in multi-pipe scenarios
3834 * - should validate we stay within the hw bandwidth limits
aa9664ff 3835 */
ad3e7b82 3836 if (0 && (num_active > 1 || total_data_bw >= GBps(12))) {
aa9664ff
MK
3837 ddb->enabled_slices = 2;
3838 } else {
3839 ddb->enabled_slices = 1;
3840 ddb_size /= 2;
3841 }
3842
3843 return ddb_size;
3844}
3845
b9cec075 3846static void
b048a00b 3847skl_ddb_get_pipe_allocation_limits(struct drm_i915_private *dev_priv,
024c9045 3848 const struct intel_crtc_state *cstate,
24719e94 3849 const u64 total_data_rate,
aa9664ff 3850 struct skl_ddb_allocation *ddb,
c107acfe
MR
3851 struct skl_ddb_entry *alloc, /* out */
3852 int *num_active /* out */)
b9cec075 3853{
c107acfe
MR
3854 struct drm_atomic_state *state = cstate->base.state;
3855 struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
024c9045 3856 struct drm_crtc *for_crtc = cstate->base.crtc;
cf1f697a
MK
3857 const struct drm_crtc_state *crtc_state;
3858 const struct drm_crtc *crtc;
3859 u32 pipe_width = 0, total_width = 0, width_before_pipe = 0;
3860 enum pipe for_pipe = to_intel_crtc(for_crtc)->pipe;
3861 u16 ddb_size;
3862 u32 i;
c107acfe 3863
a6d3460e 3864 if (WARN_ON(!state) || !cstate->base.active) {
b9cec075
DL
3865 alloc->start = 0;
3866 alloc->end = 0;
a6d3460e 3867 *num_active = hweight32(dev_priv->active_crtcs);
b9cec075
DL
3868 return;
3869 }
3870
a6d3460e
MR
3871 if (intel_state->active_pipe_changes)
3872 *num_active = hweight32(intel_state->active_crtcs);
3873 else
3874 *num_active = hweight32(dev_priv->active_crtcs);
3875
aa9664ff
MK
3876 ddb_size = intel_get_ddb_size(dev_priv, cstate, total_data_rate,
3877 *num_active, ddb);
b9cec075 3878
c107acfe 3879 /*
cf1f697a
MK
3880 * If the state doesn't change the active CRTC's or there is no
3881 * modeset request, then there's no need to recalculate;
3882 * the existing pipe allocation limits should remain unchanged.
3883 * Note that we're safe from racing commits since any racing commit
3884 * that changes the active CRTC list or do modeset would need to
3885 * grab _all_ crtc locks, including the one we currently hold.
c107acfe 3886 */
cf1f697a 3887 if (!intel_state->active_pipe_changes && !intel_state->modeset) {
512b5527
ML
3888 /*
3889 * alloc may be cleared by clear_intel_crtc_state,
3890 * copy from old state to be sure
3891 */
3892 *alloc = to_intel_crtc_state(for_crtc->state)->wm.skl.ddb;
a6d3460e 3893 return;
c107acfe 3894 }
a6d3460e 3895
cf1f697a
MK
3896 /*
3897 * Watermark/ddb requirement highly depends upon width of the
3898 * framebuffer, So instead of allocating DDB equally among pipes
3899 * distribute DDB based on resolution/width of the display.
3900 */
3901 for_each_new_crtc_in_state(state, crtc, crtc_state, i) {
3902 const struct drm_display_mode *adjusted_mode;
3903 int hdisplay, vdisplay;
3904 enum pipe pipe;
3905
3906 if (!crtc_state->enable)
3907 continue;
3908
3909 pipe = to_intel_crtc(crtc)->pipe;
3910 adjusted_mode = &crtc_state->adjusted_mode;
3911 drm_mode_get_hv_timing(adjusted_mode, &hdisplay, &vdisplay);
3912 total_width += hdisplay;
3913
3914 if (pipe < for_pipe)
3915 width_before_pipe += hdisplay;
3916 else if (pipe == for_pipe)
3917 pipe_width = hdisplay;
3918 }
3919
3920 alloc->start = ddb_size * width_before_pipe / total_width;
3921 alloc->end = ddb_size * (width_before_pipe + pipe_width) / total_width;
b9cec075
DL
3922}
3923
df331de3
VS
3924static int skl_compute_wm_params(const struct intel_crtc_state *crtc_state,
3925 int width, const struct drm_format_info *format,
3926 u64 modifier, unsigned int rotation,
3927 u32 plane_pixel_rate, struct skl_wm_params *wp,
3928 int color_plane);
3929static void skl_compute_plane_wm(const struct intel_crtc_state *cstate,
3930 int level,
3931 const struct skl_wm_params *wp,
3932 const struct skl_wm_level *result_prev,
3933 struct skl_wm_level *result /* out */);
3934
3935static unsigned int
3936skl_cursor_allocation(const struct intel_crtc_state *crtc_state,
3937 int num_active)
b9cec075 3938{
df331de3
VS
3939 struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
3940 int level, max_level = ilk_wm_max_level(dev_priv);
3941 struct skl_wm_level wm = {};
3942 int ret, min_ddb_alloc = 0;
3943 struct skl_wm_params wp;
3944
3945 ret = skl_compute_wm_params(crtc_state, 256,
3946 drm_format_info(DRM_FORMAT_ARGB8888),
3947 DRM_FORMAT_MOD_LINEAR,
3948 DRM_MODE_ROTATE_0,
3949 crtc_state->pixel_rate, &wp, 0);
3950 WARN_ON(ret);
3951
3952 for (level = 0; level <= max_level; level++) {
6086e47b 3953 skl_compute_plane_wm(crtc_state, level, &wp, &wm, &wm);
df331de3
VS
3954 if (wm.min_ddb_alloc == U16_MAX)
3955 break;
3956
3957 min_ddb_alloc = wm.min_ddb_alloc;
3958 }
b9cec075 3959
df331de3 3960 return max(num_active == 1 ? 32 : 8, min_ddb_alloc);
b9cec075
DL
3961}
3962
37cde11b
MK
3963static void skl_ddb_entry_init_from_hw(struct drm_i915_private *dev_priv,
3964 struct skl_ddb_entry *entry, u32 reg)
a269c583 3965{
37cde11b 3966
d7e449a8
VS
3967 entry->start = reg & DDB_ENTRY_MASK;
3968 entry->end = (reg >> DDB_ENTRY_END_SHIFT) & DDB_ENTRY_MASK;
37cde11b 3969
16160e3d
DL
3970 if (entry->end)
3971 entry->end += 1;
a269c583
DL
3972}
3973
ddf34319
MK
3974static void
3975skl_ddb_get_hw_plane_state(struct drm_i915_private *dev_priv,
3976 const enum pipe pipe,
3977 const enum plane_id plane_id,
ff43bc37
VS
3978 struct skl_ddb_entry *ddb_y,
3979 struct skl_ddb_entry *ddb_uv)
ddf34319 3980{
ff43bc37
VS
3981 u32 val, val2;
3982 u32 fourcc = 0;
ddf34319
MK
3983
3984 /* Cursor doesn't support NV12/planar, so no extra calculation needed */
3985 if (plane_id == PLANE_CURSOR) {
3986 val = I915_READ(CUR_BUF_CFG(pipe));
ff43bc37 3987 skl_ddb_entry_init_from_hw(dev_priv, ddb_y, val);
ddf34319
MK
3988 return;
3989 }
3990
3991 val = I915_READ(PLANE_CTL(pipe, plane_id));
3992
3993 /* No DDB allocated for disabled planes */
ff43bc37
VS
3994 if (val & PLANE_CTL_ENABLE)
3995 fourcc = skl_format_to_fourcc(val & PLANE_CTL_FORMAT_MASK,
3996 val & PLANE_CTL_ORDER_RGBX,
3997 val & PLANE_CTL_ALPHA_MASK);
ddf34319 3998
ff43bc37
VS
3999 if (INTEL_GEN(dev_priv) >= 11) {
4000 val = I915_READ(PLANE_BUF_CFG(pipe, plane_id));
4001 skl_ddb_entry_init_from_hw(dev_priv, ddb_y, val);
4002 } else {
4003 val = I915_READ(PLANE_BUF_CFG(pipe, plane_id));
12a6c931 4004 val2 = I915_READ(PLANE_NV12_BUF_CFG(pipe, plane_id));
ddf34319 4005
df7d4156 4006 if (is_planar_yuv_format(fourcc))
ff43bc37
VS
4007 swap(val, val2);
4008
4009 skl_ddb_entry_init_from_hw(dev_priv, ddb_y, val);
4010 skl_ddb_entry_init_from_hw(dev_priv, ddb_uv, val2);
ddf34319
MK
4011 }
4012}
4013
ff43bc37
VS
4014void skl_pipe_ddb_get_hw_state(struct intel_crtc *crtc,
4015 struct skl_ddb_entry *ddb_y,
4016 struct skl_ddb_entry *ddb_uv)
a269c583 4017{
ff43bc37
VS
4018 struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
4019 enum intel_display_power_domain power_domain;
4020 enum pipe pipe = crtc->pipe;
0e6e0be4 4021 intel_wakeref_t wakeref;
ff43bc37 4022 enum plane_id plane_id;
74bd8004 4023
ff43bc37 4024 power_domain = POWER_DOMAIN_PIPE(pipe);
0e6e0be4
CW
4025 wakeref = intel_display_power_get_if_enabled(dev_priv, power_domain);
4026 if (!wakeref)
ff43bc37 4027 return;
4d800030 4028
ff43bc37
VS
4029 for_each_plane_id_on_crtc(crtc, plane_id)
4030 skl_ddb_get_hw_plane_state(dev_priv, pipe,
4031 plane_id,
4032 &ddb_y[plane_id],
4033 &ddb_uv[plane_id]);
b10f1b20 4034
0e6e0be4 4035 intel_display_power_put(dev_priv, power_domain, wakeref);
ff43bc37 4036}
4d800030 4037
ff43bc37
VS
4038void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
4039 struct skl_ddb_allocation *ddb /* out */)
4040{
4041 ddb->enabled_slices = intel_enabled_dbuf_slices_num(dev_priv);
a269c583
DL
4042}
4043
9c2f7a9d
KM
4044/*
4045 * Determines the downscale amount of a plane for the purposes of watermark calculations.
4046 * The bspec defines downscale amount as:
4047 *
4048 * """
4049 * Horizontal down scale amount = maximum[1, Horizontal source size /
4050 * Horizontal destination size]
4051 * Vertical down scale amount = maximum[1, Vertical source size /
4052 * Vertical destination size]
4053 * Total down scale amount = Horizontal down scale amount *
4054 * Vertical down scale amount
4055 * """
4056 *
4057 * Return value is provided in 16.16 fixed point form to retain fractional part.
4058 * Caller should take care of dividing & rounding off the value.
4059 */
7084b50b 4060static uint_fixed_16_16_t
93aa2a1c
VS
4061skl_plane_downscale_amount(const struct intel_crtc_state *cstate,
4062 const struct intel_plane_state *pstate)
9c2f7a9d 4063{
93aa2a1c 4064 struct intel_plane *plane = to_intel_plane(pstate->base.plane);
5ce9a649 4065 u32 src_w, src_h, dst_w, dst_h;
7084b50b
KM
4066 uint_fixed_16_16_t fp_w_ratio, fp_h_ratio;
4067 uint_fixed_16_16_t downscale_h, downscale_w;
9c2f7a9d 4068
93aa2a1c 4069 if (WARN_ON(!intel_wm_plane_visible(cstate, pstate)))
eac2cb81 4070 return u32_to_fixed16(0);
9c2f7a9d
KM
4071
4072 /* n.b., src is 16.16 fixed point, dst is whole integer */
93aa2a1c 4073 if (plane->id == PLANE_CURSOR) {
fce5adf5
VS
4074 /*
4075 * Cursors only support 0/180 degree rotation,
4076 * hence no need to account for rotation here.
4077 */
7084b50b
KM
4078 src_w = pstate->base.src_w >> 16;
4079 src_h = pstate->base.src_h >> 16;
93aa2a1c
VS
4080 dst_w = pstate->base.crtc_w;
4081 dst_h = pstate->base.crtc_h;
4082 } else {
fce5adf5
VS
4083 /*
4084 * Src coordinates are already rotated by 270 degrees for
4085 * the 90/270 degree plane rotation cases (to match the
4086 * GTT mapping), hence no need to account for rotation here.
4087 */
7084b50b
KM
4088 src_w = drm_rect_width(&pstate->base.src) >> 16;
4089 src_h = drm_rect_height(&pstate->base.src) >> 16;
93aa2a1c
VS
4090 dst_w = drm_rect_width(&pstate->base.dst);
4091 dst_h = drm_rect_height(&pstate->base.dst);
4092 }
4093
eac2cb81
KM
4094 fp_w_ratio = div_fixed16(src_w, dst_w);
4095 fp_h_ratio = div_fixed16(src_h, dst_h);
4096 downscale_w = max_fixed16(fp_w_ratio, u32_to_fixed16(1));
4097 downscale_h = max_fixed16(fp_h_ratio, u32_to_fixed16(1));
9c2f7a9d 4098
7084b50b 4099 return mul_fixed16(downscale_w, downscale_h);
9c2f7a9d
KM
4100}
4101
73b0ca8e
MK
4102static uint_fixed_16_16_t
4103skl_pipe_downscale_amount(const struct intel_crtc_state *crtc_state)
4104{
eac2cb81 4105 uint_fixed_16_16_t pipe_downscale = u32_to_fixed16(1);
73b0ca8e
MK
4106
4107 if (!crtc_state->base.enable)
4108 return pipe_downscale;
4109
4110 if (crtc_state->pch_pfit.enabled) {
5ce9a649
JN
4111 u32 src_w, src_h, dst_w, dst_h;
4112 u32 pfit_size = crtc_state->pch_pfit.size;
73b0ca8e
MK
4113 uint_fixed_16_16_t fp_w_ratio, fp_h_ratio;
4114 uint_fixed_16_16_t downscale_h, downscale_w;
4115
4116 src_w = crtc_state->pipe_src_w;
4117 src_h = crtc_state->pipe_src_h;
4118 dst_w = pfit_size >> 16;
4119 dst_h = pfit_size & 0xffff;
4120
4121 if (!dst_w || !dst_h)
4122 return pipe_downscale;
4123
eac2cb81
KM
4124 fp_w_ratio = div_fixed16(src_w, dst_w);
4125 fp_h_ratio = div_fixed16(src_h, dst_h);
4126 downscale_w = max_fixed16(fp_w_ratio, u32_to_fixed16(1));
4127 downscale_h = max_fixed16(fp_h_ratio, u32_to_fixed16(1));
73b0ca8e
MK
4128
4129 pipe_downscale = mul_fixed16(downscale_w, downscale_h);
4130 }
4131
4132 return pipe_downscale;
4133}
4134
4135int skl_check_pipe_max_pixel_rate(struct intel_crtc *intel_crtc,
4136 struct intel_crtc_state *cstate)
4137{
43037c86 4138 struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev);
73b0ca8e
MK
4139 struct drm_crtc_state *crtc_state = &cstate->base;
4140 struct drm_atomic_state *state = crtc_state->state;
4141 struct drm_plane *plane;
4142 const struct drm_plane_state *pstate;
4143 struct intel_plane_state *intel_pstate;
789f35d7 4144 int crtc_clock, dotclk;
5ce9a649 4145 u32 pipe_max_pixel_rate;
73b0ca8e 4146 uint_fixed_16_16_t pipe_downscale;
eac2cb81 4147 uint_fixed_16_16_t max_downscale = u32_to_fixed16(1);
73b0ca8e
MK
4148
4149 if (!cstate->base.enable)
4150 return 0;
4151
4152 drm_atomic_crtc_state_for_each_plane_state(plane, pstate, crtc_state) {
4153 uint_fixed_16_16_t plane_downscale;
eac2cb81 4154 uint_fixed_16_16_t fp_9_div_8 = div_fixed16(9, 8);
73b0ca8e
MK
4155 int bpp;
4156
4157 if (!intel_wm_plane_visible(cstate,
4158 to_intel_plane_state(pstate)))
4159 continue;
4160
4161 if (WARN_ON(!pstate->fb))
4162 return -EINVAL;
4163
4164 intel_pstate = to_intel_plane_state(pstate);
4165 plane_downscale = skl_plane_downscale_amount(cstate,
4166 intel_pstate);
4167 bpp = pstate->fb->format->cpp[0] * 8;
4168 if (bpp == 64)
4169 plane_downscale = mul_fixed16(plane_downscale,
4170 fp_9_div_8);
4171
eac2cb81 4172 max_downscale = max_fixed16(plane_downscale, max_downscale);
73b0ca8e
MK
4173 }
4174 pipe_downscale = skl_pipe_downscale_amount(cstate);
4175
4176 pipe_downscale = mul_fixed16(pipe_downscale, max_downscale);
4177
4178 crtc_clock = crtc_state->adjusted_mode.crtc_clock;
789f35d7
ML
4179 dotclk = to_intel_atomic_state(state)->cdclk.logical.cdclk;
4180
43037c86 4181 if (IS_GEMINILAKE(dev_priv) || INTEL_GEN(dev_priv) >= 10)
789f35d7
ML
4182 dotclk *= 2;
4183
4184 pipe_max_pixel_rate = div_round_up_u32_fixed16(dotclk, pipe_downscale);
73b0ca8e
MK
4185
4186 if (pipe_max_pixel_rate < crtc_clock) {
789f35d7 4187 DRM_DEBUG_KMS("Max supported pixel clock with scaling exceeded\n");
73b0ca8e
MK
4188 return -EINVAL;
4189 }
4190
4191 return 0;
4192}
4193
24719e94 4194static u64
024c9045 4195skl_plane_relative_data_rate(const struct intel_crtc_state *cstate,
b048a00b 4196 const struct intel_plane_state *intel_pstate,
b879d58f 4197 const int plane)
b9cec075 4198{
b048a00b
ML
4199 struct intel_plane *intel_plane =
4200 to_intel_plane(intel_pstate->base.plane);
5ce9a649
JN
4201 u32 data_rate;
4202 u32 width = 0, height = 0;
8305494e
VS
4203 struct drm_framebuffer *fb;
4204 u32 format;
7084b50b 4205 uint_fixed_16_16_t down_scale_amount;
24719e94 4206 u64 rate;
a1de91e5 4207
936e71e3 4208 if (!intel_pstate->base.visible)
a1de91e5 4209 return 0;
8305494e 4210
b048a00b 4211 fb = intel_pstate->base.fb;
438b74a5 4212 format = fb->format->format;
8305494e 4213
b879d58f 4214 if (intel_plane->id == PLANE_CURSOR)
a1de91e5 4215 return 0;
df7d4156 4216 if (plane == 1 && !is_planar_yuv_format(format))
a1de91e5 4217 return 0;
a280f7dd 4218
fce5adf5
VS
4219 /*
4220 * Src coordinates are already rotated by 270 degrees for
4221 * the 90/270 degree plane rotation cases (to match the
4222 * GTT mapping), hence no need to account for rotation here.
4223 */
936e71e3
VS
4224 width = drm_rect_width(&intel_pstate->base.src) >> 16;
4225 height = drm_rect_height(&intel_pstate->base.src) >> 16;
a280f7dd 4226
b879d58f 4227 /* UV plane does 1/2 pixel sub-sampling */
df7d4156 4228 if (plane == 1 && is_planar_yuv_format(format)) {
b879d58f
MK
4229 width /= 2;
4230 height /= 2;
2cd601c6
CK
4231 }
4232
24719e94 4233 data_rate = width * height;
b879d58f 4234
93aa2a1c 4235 down_scale_amount = skl_plane_downscale_amount(cstate, intel_pstate);
8d19d7d9 4236
24719e94
ML
4237 rate = mul_round_up_u32_fixed16(data_rate, down_scale_amount);
4238
4239 rate *= fb->format->cpp[plane];
4240 return rate;
b9cec075
DL
4241}
4242
24719e94 4243static u64
1e6ee542 4244skl_get_total_relative_data_rate(struct intel_crtc_state *intel_cstate,
24719e94
ML
4245 u64 *plane_data_rate,
4246 u64 *uv_plane_data_rate)
b9cec075 4247{
9c74d826
MR
4248 struct drm_crtc_state *cstate = &intel_cstate->base;
4249 struct drm_atomic_state *state = cstate->state;
c8fe32c1 4250 struct drm_plane *plane;
c8fe32c1 4251 const struct drm_plane_state *pstate;
24719e94 4252 u64 total_data_rate = 0;
a6d3460e
MR
4253
4254 if (WARN_ON(!state))
4255 return 0;
b9cec075 4256
a1de91e5 4257 /* Calculate and cache data rate for each plane */
c8fe32c1 4258 drm_atomic_crtc_state_for_each_plane_state(plane, pstate, cstate) {
d5cdfdf5 4259 enum plane_id plane_id = to_intel_plane(plane)->id;
24719e94 4260 u64 rate;
b048a00b
ML
4261 const struct intel_plane_state *intel_pstate =
4262 to_intel_plane_state(pstate);
a6d3460e 4263
b879d58f 4264 /* packed/y */
a6d3460e 4265 rate = skl_plane_relative_data_rate(intel_cstate,
b048a00b 4266 intel_pstate, 0);
d5cdfdf5 4267 plane_data_rate[plane_id] = rate;
1e6ee542 4268 total_data_rate += rate;
a6d3460e 4269
b879d58f 4270 /* uv-plane */
a6d3460e 4271 rate = skl_plane_relative_data_rate(intel_cstate,
b048a00b 4272 intel_pstate, 1);
b879d58f 4273 uv_plane_data_rate[plane_id] = rate;
1e6ee542 4274 total_data_rate += rate;
b9cec075
DL
4275 }
4276
4277 return total_data_rate;
4278}
4279
b048a00b
ML
4280static u64
4281icl_get_total_relative_data_rate(struct intel_crtc_state *intel_cstate,
4282 u64 *plane_data_rate)
4283{
4284 struct drm_crtc_state *cstate = &intel_cstate->base;
4285 struct drm_atomic_state *state = cstate->state;
4286 struct drm_plane *plane;
4287 const struct drm_plane_state *pstate;
4288 u64 total_data_rate = 0;
4289
4290 if (WARN_ON(!state))
4291 return 0;
4292
4293 /* Calculate and cache data rate for each plane */
4294 drm_atomic_crtc_state_for_each_plane_state(plane, pstate, cstate) {
4295 const struct intel_plane_state *intel_pstate =
4296 to_intel_plane_state(pstate);
4297 enum plane_id plane_id = to_intel_plane(plane)->id;
4298 u64 rate;
4299
4300 if (!intel_pstate->linked_plane) {
4301 rate = skl_plane_relative_data_rate(intel_cstate,
4302 intel_pstate, 0);
4303 plane_data_rate[plane_id] = rate;
4304 total_data_rate += rate;
4305 } else {
4306 enum plane_id y_plane_id;
4307
4308 /*
4309 * The slave plane might not iterate in
4310 * drm_atomic_crtc_state_for_each_plane_state(),
4311 * and needs the master plane state which may be
4312 * NULL if we try get_new_plane_state(), so we
4313 * always calculate from the master.
4314 */
4315 if (intel_pstate->slave)
4316 continue;
4317
4318 /* Y plane rate is calculated on the slave */
4319 rate = skl_plane_relative_data_rate(intel_cstate,
4320 intel_pstate, 0);
4321 y_plane_id = intel_pstate->linked_plane->id;
4322 plane_data_rate[y_plane_id] = rate;
4323 total_data_rate += rate;
4324
4325 rate = skl_plane_relative_data_rate(intel_cstate,
4326 intel_pstate, 1);
4327 plane_data_rate[plane_id] = rate;
4328 total_data_rate += rate;
4329 }
4330 }
4331
4332 return total_data_rate;
4333}
4334
c107acfe 4335static int
024c9045 4336skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
b9cec075
DL
4337 struct skl_ddb_allocation *ddb /* out */)
4338{
c107acfe 4339 struct drm_atomic_state *state = cstate->base.state;
024c9045 4340 struct drm_crtc *crtc = cstate->base.crtc;
b048a00b 4341 struct drm_i915_private *dev_priv = to_i915(crtc->dev);
b9cec075 4342 struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
ce0ba283 4343 struct skl_ddb_entry *alloc = &cstate->wm.skl.ddb;
5ce9a649
JN
4344 u16 alloc_size, start = 0;
4345 u16 total[I915_MAX_PLANES] = {};
4346 u16 uv_total[I915_MAX_PLANES] = {};
24719e94 4347 u64 total_data_rate;
d5cdfdf5 4348 enum plane_id plane_id;
c107acfe 4349 int num_active;
24719e94
ML
4350 u64 plane_data_rate[I915_MAX_PLANES] = {};
4351 u64 uv_plane_data_rate[I915_MAX_PLANES] = {};
0aded171 4352 u32 blocks;
d8e87498 4353 int level;
b9cec075 4354
5a920b85 4355 /* Clear the partitioning for disabled planes. */
ff43bc37
VS
4356 memset(cstate->wm.skl.plane_ddb_y, 0, sizeof(cstate->wm.skl.plane_ddb_y));
4357 memset(cstate->wm.skl.plane_ddb_uv, 0, sizeof(cstate->wm.skl.plane_ddb_uv));
5a920b85 4358
a6d3460e
MR
4359 if (WARN_ON(!state))
4360 return 0;
4361
c107acfe 4362 if (!cstate->base.active) {
ce0ba283 4363 alloc->start = alloc->end = 0;
c107acfe
MR
4364 return 0;
4365 }
4366
323b0a82
LDM
4367 if (INTEL_GEN(dev_priv) >= 11)
4368 total_data_rate =
4369 icl_get_total_relative_data_rate(cstate,
4370 plane_data_rate);
4371 else
b048a00b
ML
4372 total_data_rate =
4373 skl_get_total_relative_data_rate(cstate,
4374 plane_data_rate,
4375 uv_plane_data_rate);
323b0a82 4376
b048a00b
ML
4377
4378 skl_ddb_get_pipe_allocation_limits(dev_priv, cstate, total_data_rate,
4379 ddb, alloc, &num_active);
34bb56af 4380 alloc_size = skl_ddb_entry_size(alloc);
336031ea 4381 if (alloc_size == 0)
c107acfe 4382 return 0;
b9cec075 4383
d8e87498 4384 /* Allocate fixed number of blocks for cursor. */
df331de3 4385 total[PLANE_CURSOR] = skl_cursor_allocation(cstate, num_active);
d8e87498
MR
4386 alloc_size -= total[PLANE_CURSOR];
4387 cstate->wm.skl.plane_ddb_y[PLANE_CURSOR].start =
4388 alloc->end - total[PLANE_CURSOR];
4389 cstate->wm.skl.plane_ddb_y[PLANE_CURSOR].end = alloc->end;
4390
4391 if (total_data_rate == 0)
4392 return 0;
a6d3460e 4393
49845a7a 4394 /*
d8e87498
MR
4395 * Find the highest watermark level for which we can satisfy the block
4396 * requirement of active planes.
49845a7a 4397 */
d8e87498 4398 for (level = ilk_wm_max_level(dev_priv); level >= 0; level--) {
25db2eaf 4399 blocks = 0;
d8e87498 4400 for_each_plane_id_on_crtc(intel_crtc, plane_id) {
5e6037c8
VS
4401 const struct skl_plane_wm *wm =
4402 &cstate->wm.skl.optimal.planes[plane_id];
10a7e07b
VS
4403
4404 if (plane_id == PLANE_CURSOR) {
4405 if (WARN_ON(wm->wm[level].min_ddb_alloc >
4406 total[PLANE_CURSOR])) {
4407 blocks = U32_MAX;
4408 break;
4409 }
d8e87498 4410 continue;
10a7e07b 4411 }
80958155 4412
961d95e0
VS
4413 blocks += wm->wm[level].min_ddb_alloc;
4414 blocks += wm->uv_wm[level].min_ddb_alloc;
d8e87498
MR
4415 }
4416
3cf963cf 4417 if (blocks <= alloc_size) {
d8e87498
MR
4418 alloc_size -= blocks;
4419 break;
4420 }
80958155
DL
4421 }
4422
d8e87498 4423 if (level < 0) {
5ba6faaf 4424 DRM_DEBUG_KMS("Requested display configuration exceeds system DDB limitations");
d8e87498
MR
4425 DRM_DEBUG_KMS("minimum required %d/%d\n", blocks,
4426 alloc_size);
5ba6faaf
KM
4427 return -EINVAL;
4428 }
4429
b9cec075 4430 /*
d8e87498
MR
4431 * Grant each plane the blocks it requires at the highest achievable
4432 * watermark level, plus an extra share of the leftover blocks
4433 * proportional to its relative data rate.
b9cec075 4434 */
d5cdfdf5 4435 for_each_plane_id_on_crtc(intel_crtc, plane_id) {
5e6037c8
VS
4436 const struct skl_plane_wm *wm =
4437 &cstate->wm.skl.optimal.planes[plane_id];
d8e87498
MR
4438 u64 rate;
4439 u16 extra;
b9cec075 4440
d5cdfdf5 4441 if (plane_id == PLANE_CURSOR)
49845a7a
ML
4442 continue;
4443
b9cec075 4444 /*
d8e87498
MR
4445 * We've accounted for all active planes; remaining planes are
4446 * all disabled.
b9cec075 4447 */
d8e87498
MR
4448 if (total_data_rate == 0)
4449 break;
b9cec075 4450
d8e87498
MR
4451 rate = plane_data_rate[plane_id];
4452 extra = min_t(u16, alloc_size,
4453 DIV64_U64_ROUND_UP(alloc_size * rate,
4454 total_data_rate));
961d95e0 4455 total[plane_id] = wm->wm[level].min_ddb_alloc + extra;
d8e87498
MR
4456 alloc_size -= extra;
4457 total_data_rate -= rate;
9a30a261 4458
d8e87498
MR
4459 if (total_data_rate == 0)
4460 break;
a1de91e5 4461
d8e87498
MR
4462 rate = uv_plane_data_rate[plane_id];
4463 extra = min_t(u16, alloc_size,
4464 DIV64_U64_ROUND_UP(alloc_size * rate,
4465 total_data_rate));
961d95e0 4466 uv_total[plane_id] = wm->uv_wm[level].min_ddb_alloc + extra;
d8e87498
MR
4467 alloc_size -= extra;
4468 total_data_rate -= rate;
4469 }
4470 WARN_ON(alloc_size != 0 || total_data_rate != 0);
4471
4472 /* Set the actual DDB start/end points for each plane */
4473 start = alloc->start;
4474 for_each_plane_id_on_crtc(intel_crtc, plane_id) {
5e6037c8
VS
4475 struct skl_ddb_entry *plane_alloc =
4476 &cstate->wm.skl.plane_ddb_y[plane_id];
4477 struct skl_ddb_entry *uv_plane_alloc =
4478 &cstate->wm.skl.plane_ddb_uv[plane_id];
d8e87498
MR
4479
4480 if (plane_id == PLANE_CURSOR)
4481 continue;
4482
b048a00b 4483 /* Gen11+ uses a separate plane for UV watermarks */
d8e87498
MR
4484 WARN_ON(INTEL_GEN(dev_priv) >= 11 && uv_total[plane_id]);
4485
4486 /* Leave disabled planes at (0,0) */
4487 if (total[plane_id]) {
4488 plane_alloc->start = start;
4489 start += total[plane_id];
4490 plane_alloc->end = start;
4491 }
b048a00b 4492
d8e87498
MR
4493 if (uv_total[plane_id]) {
4494 uv_plane_alloc->start = start;
4495 start += uv_total[plane_id];
4496 uv_plane_alloc->end = start;
c107acfe 4497 }
d8e87498 4498 }
9a30a261 4499
d8e87498
MR
4500 /*
4501 * When we calculated watermark values we didn't know how high
4502 * of a level we'd actually be able to hit, so we just marked
4503 * all levels as "enabled." Go back now and disable the ones
4504 * that aren't actually possible.
4505 */
4506 for (level++; level <= ilk_wm_max_level(dev_priv); level++) {
4507 for_each_plane_id_on_crtc(intel_crtc, plane_id) {
5e6037c8
VS
4508 struct skl_plane_wm *wm =
4509 &cstate->wm.skl.optimal.planes[plane_id];
a301cb0f
VS
4510
4511 /*
4512 * We only disable the watermarks for each plane if
4513 * they exceed the ddb allocation of said plane. This
4514 * is done so that we don't end up touching cursor
4515 * watermarks needlessly when some other plane reduces
4516 * our max possible watermark level.
4517 *
4518 * Bspec has this to say about the PLANE_WM enable bit:
4519 * "All the watermarks at this level for all enabled
4520 * planes must be enabled before the level will be used."
4521 * So this is actually safe to do.
4522 */
4523 if (wm->wm[level].min_ddb_alloc > total[plane_id] ||
4524 wm->uv_wm[level].min_ddb_alloc > uv_total[plane_id])
4525 memset(&wm->wm[level], 0, sizeof(wm->wm[level]));
290248c2 4526
c384afe3 4527 /*
39564ae8 4528 * Wa_1408961008:icl, ehl
c384afe3
VS
4529 * Underruns with WM1+ disabled
4530 */
39564ae8 4531 if (IS_GEN(dev_priv, 11) &&
290248c2
VS
4532 level == 1 && wm->wm[0].plane_en) {
4533 wm->wm[level].plane_res_b = wm->wm[0].plane_res_b;
c384afe3
VS
4534 wm->wm[level].plane_res_l = wm->wm[0].plane_res_l;
4535 wm->wm[level].ignore_lines = wm->wm[0].ignore_lines;
290248c2 4536 }
d8e87498
MR
4537 }
4538 }
4539
4540 /*
4541 * Go back and disable the transition watermark if it turns out we
4542 * don't have enough DDB blocks for it.
4543 */
4544 for_each_plane_id_on_crtc(intel_crtc, plane_id) {
5e6037c8
VS
4545 struct skl_plane_wm *wm =
4546 &cstate->wm.skl.optimal.planes[plane_id];
4547
b19c9bca 4548 if (wm->trans_wm.plane_res_b >= total[plane_id])
d8e87498 4549 memset(&wm->trans_wm, 0, sizeof(wm->trans_wm));
b9cec075
DL
4550 }
4551
c107acfe 4552 return 0;
b9cec075
DL
4553}
4554
2d41c0b5
PB
4555/*
4556 * The max latency should be 257 (max the punit can code is 255 and we add 2us
ac484963 4557 * for the read latency) and cpp should always be <= 8, so that
2d41c0b5
PB
4558 * should allow pixel_rate up to ~2 GHz which seems sufficient since max
4559 * 2xcdclk is 1350 MHz and the pixel rate should never exceed that.
4560*/
6c64dd37 4561static uint_fixed_16_16_t
5ce9a649
JN
4562skl_wm_method1(const struct drm_i915_private *dev_priv, u32 pixel_rate,
4563 u8 cpp, u32 latency, u32 dbuf_block_size)
2d41c0b5 4564{
5ce9a649 4565 u32 wm_intermediate_val;
b95320bd 4566 uint_fixed_16_16_t ret;
2d41c0b5
PB
4567
4568 if (latency == 0)
b95320bd 4569 return FP_16_16_MAX;
2d41c0b5 4570
b95320bd 4571 wm_intermediate_val = latency * pixel_rate * cpp;
df8ee190 4572 ret = div_fixed16(wm_intermediate_val, 1000 * dbuf_block_size);
6c64dd37
PZ
4573
4574 if (INTEL_GEN(dev_priv) >= 10)
4575 ret = add_fixed16_u32(ret, 1);
4576
2d41c0b5
PB
4577 return ret;
4578}
4579
5ce9a649
JN
4580static uint_fixed_16_16_t
4581skl_wm_method2(u32 pixel_rate, u32 pipe_htotal, u32 latency,
4582 uint_fixed_16_16_t plane_blocks_per_line)
2d41c0b5 4583{
5ce9a649 4584 u32 wm_intermediate_val;
b95320bd 4585 uint_fixed_16_16_t ret;
2d41c0b5
PB
4586
4587 if (latency == 0)
b95320bd 4588 return FP_16_16_MAX;
2d41c0b5 4589
2d41c0b5 4590 wm_intermediate_val = latency * pixel_rate;
b95320bd
MK
4591 wm_intermediate_val = DIV_ROUND_UP(wm_intermediate_val,
4592 pipe_htotal * 1000);
eac2cb81 4593 ret = mul_u32_fixed16(wm_intermediate_val, plane_blocks_per_line);
2d41c0b5
PB
4594 return ret;
4595}
4596
d555cb58 4597static uint_fixed_16_16_t
b048a00b 4598intel_get_linetime_us(const struct intel_crtc_state *cstate)
d555cb58 4599{
5ce9a649
JN
4600 u32 pixel_rate;
4601 u32 crtc_htotal;
d555cb58
KM
4602 uint_fixed_16_16_t linetime_us;
4603
4604 if (!cstate->base.active)
eac2cb81 4605 return u32_to_fixed16(0);
d555cb58
KM
4606
4607 pixel_rate = cstate->pixel_rate;
4608
4609 if (WARN_ON(pixel_rate == 0))
eac2cb81 4610 return u32_to_fixed16(0);
d555cb58
KM
4611
4612 crtc_htotal = cstate->base.adjusted_mode.crtc_htotal;
eac2cb81 4613 linetime_us = div_fixed16(crtc_htotal * 1000, pixel_rate);
d555cb58
KM
4614
4615 return linetime_us;
4616}
4617
5ce9a649 4618static u32
eb2fdcdf
KM
4619skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *cstate,
4620 const struct intel_plane_state *pstate)
9c2f7a9d 4621{
5ce9a649 4622 u64 adjusted_pixel_rate;
7084b50b 4623 uint_fixed_16_16_t downscale_amount;
9c2f7a9d
KM
4624
4625 /* Shouldn't reach here on disabled planes... */
93aa2a1c 4626 if (WARN_ON(!intel_wm_plane_visible(cstate, pstate)))
9c2f7a9d
KM
4627 return 0;
4628
4629 /*
4630 * Adjusted plane pixel rate is just the pipe's adjusted pixel rate
4631 * with additional adjustments for plane-specific scaling.
4632 */
a7d1b3f4 4633 adjusted_pixel_rate = cstate->pixel_rate;
93aa2a1c 4634 downscale_amount = skl_plane_downscale_amount(cstate, pstate);
9c2f7a9d 4635
7084b50b
KM
4636 return mul_round_up_u32_fixed16(adjusted_pixel_rate,
4637 downscale_amount);
9c2f7a9d
KM
4638}
4639
7e452fdb 4640static int
c92558aa
VS
4641skl_compute_wm_params(const struct intel_crtc_state *crtc_state,
4642 int width, const struct drm_format_info *format,
4643 u64 modifier, unsigned int rotation,
4644 u32 plane_pixel_rate, struct skl_wm_params *wp,
4645 int color_plane)
2d41c0b5 4646{
c92558aa
VS
4647 struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
4648 struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
5ce9a649 4649 u32 interm_pbpl;
2d41c0b5 4650
df7d4156 4651 /* only planar format has two planes */
c92558aa 4652 if (color_plane == 1 && !is_planar_yuv_format(format->format)) {
df7d4156 4653 DRM_DEBUG_KMS("Non planar format have single plane\n");
942aa2d0
MK
4654 return -EINVAL;
4655 }
4656
c92558aa
VS
4657 wp->y_tiled = modifier == I915_FORMAT_MOD_Y_TILED ||
4658 modifier == I915_FORMAT_MOD_Yf_TILED ||
4659 modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
4660 modifier == I915_FORMAT_MOD_Yf_TILED_CCS;
4661 wp->x_tiled = modifier == I915_FORMAT_MOD_X_TILED;
4662 wp->rc_surface = modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
4663 modifier == I915_FORMAT_MOD_Yf_TILED_CCS;
4664 wp->is_planar = is_planar_yuv_format(format->format);
a280f7dd 4665
c92558aa 4666 wp->width = width;
45bee430 4667 if (color_plane == 1 && wp->is_planar)
942aa2d0
MK
4668 wp->width /= 2;
4669
c92558aa
VS
4670 wp->cpp = format->cpp[color_plane];
4671 wp->plane_pixel_rate = plane_pixel_rate;
9c2f7a9d 4672
df8ee190 4673 if (INTEL_GEN(dev_priv) >= 11 &&
c92558aa 4674 modifier == I915_FORMAT_MOD_Yf_TILED && wp->cpp == 1)
df8ee190
MK
4675 wp->dbuf_block_size = 256;
4676 else
4677 wp->dbuf_block_size = 512;
4678
c92558aa 4679 if (drm_rotation_90_or_270(rotation)) {
7e452fdb 4680 switch (wp->cpp) {
1186fa85 4681 case 1:
7e452fdb 4682 wp->y_min_scanlines = 16;
1186fa85
PZ
4683 break;
4684 case 2:
7e452fdb 4685 wp->y_min_scanlines = 8;
1186fa85 4686 break;
1186fa85 4687 case 4:
7e452fdb 4688 wp->y_min_scanlines = 4;
1186fa85 4689 break;
86a462bc 4690 default:
7e452fdb 4691 MISSING_CASE(wp->cpp);
86a462bc 4692 return -EINVAL;
1186fa85
PZ
4693 }
4694 } else {
7e452fdb 4695 wp->y_min_scanlines = 4;
1186fa85
PZ
4696 }
4697
60e983ff 4698 if (skl_needs_memory_bw_wa(dev_priv))
7e452fdb 4699 wp->y_min_scanlines *= 2;
2ef32dee 4700
7e452fdb
KM
4701 wp->plane_bytes_per_line = wp->width * wp->cpp;
4702 if (wp->y_tiled) {
4703 interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line *
df8ee190
MK
4704 wp->y_min_scanlines,
4705 wp->dbuf_block_size);
6c64dd37
PZ
4706
4707 if (INTEL_GEN(dev_priv) >= 10)
4708 interm_pbpl++;
4709
7e452fdb
KM
4710 wp->plane_blocks_per_line = div_fixed16(interm_pbpl,
4711 wp->y_min_scanlines);
cf819eff 4712 } else if (wp->x_tiled && IS_GEN(dev_priv, 9)) {
df8ee190
MK
4713 interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line,
4714 wp->dbuf_block_size);
7e452fdb 4715 wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl);
ef8a4fb4 4716 } else {
df8ee190
MK
4717 interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line,
4718 wp->dbuf_block_size) + 1;
7e452fdb 4719 wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl);
7a1a8aed
PZ
4720 }
4721
7e452fdb
KM
4722 wp->y_tile_minimum = mul_u32_fixed16(wp->y_min_scanlines,
4723 wp->plane_blocks_per_line);
c92558aa 4724
7e452fdb 4725 wp->linetime_us = fixed16_to_u32_round_up(
c92558aa 4726 intel_get_linetime_us(crtc_state));
7e452fdb
KM
4727
4728 return 0;
4729}
4730
c92558aa
VS
4731static int
4732skl_compute_plane_wm_params(const struct intel_crtc_state *crtc_state,
4733 const struct intel_plane_state *plane_state,
4734 struct skl_wm_params *wp, int color_plane)
4735{
4736 struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
4737 const struct drm_framebuffer *fb = plane_state->base.fb;
4738 int width;
4739
4740 if (plane->id == PLANE_CURSOR) {
4741 width = plane_state->base.crtc_w;
4742 } else {
4743 /*
4744 * Src coordinates are already rotated by 270 degrees for
4745 * the 90/270 degree plane rotation cases (to match the
4746 * GTT mapping), hence no need to account for rotation here.
4747 */
4748 width = drm_rect_width(&plane_state->base.src) >> 16;
4749 }
4750
4751 return skl_compute_wm_params(crtc_state, width,
4752 fb->format, fb->modifier,
4753 plane_state->base.rotation,
4754 skl_adjusted_plane_pixel_rate(crtc_state, plane_state),
4755 wp, color_plane);
4756}
4757
b52c273b
VS
4758static bool skl_wm_has_lines(struct drm_i915_private *dev_priv, int level)
4759{
4760 if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv))
4761 return true;
4762
4763 /* The number of lines are ignored for the level 0 watermark. */
4764 return level > 0;
4765}
4766
d8e87498 4767static void skl_compute_plane_wm(const struct intel_crtc_state *cstate,
d8e87498
MR
4768 int level,
4769 const struct skl_wm_params *wp,
4770 const struct skl_wm_level *result_prev,
4771 struct skl_wm_level *result /* out */)
7e452fdb 4772{
67155a69 4773 struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev);
5ce9a649 4774 u32 latency = dev_priv->wm.skl_latency[level];
7e452fdb
KM
4775 uint_fixed_16_16_t method1, method2;
4776 uint_fixed_16_16_t selected_result;
961d95e0 4777 u32 res_blocks, res_lines, min_ddb_alloc = 0;
ce110ec3 4778
0aded171
VS
4779 if (latency == 0) {
4780 /* reject it */
4781 result->min_ddb_alloc = U16_MAX;
692927f4 4782 return;
0aded171 4783 }
692927f4 4784
7e452fdb 4785 /* Display WA #1141: kbl,cfl */
d86ba628
KM
4786 if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv) ||
4787 IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_B0)) &&
7e452fdb
KM
4788 dev_priv->ipc_enabled)
4789 latency += 4;
4790
60e983ff 4791 if (skl_needs_memory_bw_wa(dev_priv) && wp->x_tiled)
7e452fdb
KM
4792 latency += 15;
4793
4794 method1 = skl_wm_method1(dev_priv, wp->plane_pixel_rate,
df8ee190 4795 wp->cpp, latency, wp->dbuf_block_size);
7e452fdb 4796 method2 = skl_wm_method2(wp->plane_pixel_rate,
024c9045 4797 cstate->base.adjusted_mode.crtc_htotal,
1186fa85 4798 latency,
7e452fdb 4799 wp->plane_blocks_per_line);
75676ed4 4800
7e452fdb
KM
4801 if (wp->y_tiled) {
4802 selected_result = max_fixed16(method2, wp->y_tile_minimum);
0fda6568 4803 } else {
7e452fdb 4804 if ((wp->cpp * cstate->base.adjusted_mode.crtc_htotal /
df8ee190 4805 wp->dbuf_block_size < 1) &&
077b5820 4806 (wp->plane_bytes_per_line / wp->dbuf_block_size < 1)) {
f1db3eaf 4807 selected_result = method2;
077b5820 4808 } else if (latency >= wp->linetime_us) {
cf819eff 4809 if (IS_GEN(dev_priv, 9) &&
077b5820
PZ
4810 !IS_GEMINILAKE(dev_priv))
4811 selected_result = min_fixed16(method1, method2);
4812 else
4813 selected_result = method2;
4814 } else {
0fda6568 4815 selected_result = method1;
077b5820 4816 }
0fda6568 4817 }
2d41c0b5 4818
eac2cb81 4819 res_blocks = fixed16_to_u32_round_up(selected_result) + 1;
d273ecce 4820 res_lines = div_round_up_fixed16(selected_result,
7e452fdb 4821 wp->plane_blocks_per_line);
e6d66171 4822
a5b79d34
PZ
4823 if (IS_GEN9_BC(dev_priv) || IS_BROXTON(dev_priv)) {
4824 /* Display WA #1125: skl,bxt,kbl */
4825 if (level == 0 && wp->rc_surface)
4826 res_blocks +=
4827 fixed16_to_u32_round_up(wp->y_tile_minimum);
4828
4829 /* Display WA #1126: skl,bxt,kbl */
4830 if (level >= 1 && level <= 7) {
4831 if (wp->y_tiled) {
4832 res_blocks +=
4833 fixed16_to_u32_round_up(wp->y_tile_minimum);
4834 res_lines += wp->y_min_scanlines;
4835 } else {
4836 res_blocks++;
4837 }
8b2b53ce 4838
a5b79d34
PZ
4839 /*
4840 * Make sure result blocks for higher latency levels are
4841 * atleast as high as level below the current level.
4842 * Assumption in DDB algorithm optimization for special
4843 * cases. Also covers Display WA #1125 for RC.
4844 */
4845 if (result_prev->plane_res_b > res_blocks)
4846 res_blocks = result_prev->plane_res_b;
4847 }
0fda6568 4848 }
e6d66171 4849
961d95e0
VS
4850 if (INTEL_GEN(dev_priv) >= 11) {
4851 if (wp->y_tiled) {
4852 int extra_lines;
4853
4854 if (res_lines % wp->y_min_scanlines == 0)
4855 extra_lines = wp->y_min_scanlines;
4856 else
4857 extra_lines = wp->y_min_scanlines * 2 -
4858 res_lines % wp->y_min_scanlines;
4859
4860 min_ddb_alloc = mul_round_up_u32_fixed16(res_lines + extra_lines,
4861 wp->plane_blocks_per_line);
4862 } else {
4863 min_ddb_alloc = res_blocks +
4864 DIV_ROUND_UP(res_blocks, 10);
4865 }
4866 }
4867
b52c273b
VS
4868 if (!skl_wm_has_lines(dev_priv, level))
4869 res_lines = 0;
4870
0aded171
VS
4871 if (res_lines > 31) {
4872 /* reject it */
4873 result->min_ddb_alloc = U16_MAX;
d8e87498 4874 return;
0aded171 4875 }
d8e87498
MR
4876
4877 /*
4878 * If res_lines is valid, assume we can use this watermark level
4879 * for now. We'll come back and disable it after we calculate the
4880 * DDB allocation if it turns out we don't actually have enough
4881 * blocks to satisfy it.
4882 */
62027b77
MK
4883 result->plane_res_b = res_blocks;
4884 result->plane_res_l = res_lines;
961d95e0
VS
4885 /* Bspec says: value >= plane ddb allocation -> invalid, hence the +1 here */
4886 result->min_ddb_alloc = max(min_ddb_alloc, res_blocks) + 1;
62027b77 4887 result->plane_en = true;
2d41c0b5
PB
4888}
4889
d8e87498 4890static void
51de9c6d 4891skl_compute_wm_levels(const struct intel_crtc_state *cstate,
7e452fdb 4892 const struct skl_wm_params *wm_params,
b048a00b 4893 struct skl_wm_level *levels)
2d41c0b5 4894{
67155a69 4895 struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev);
d2f5e36d 4896 int level, max_level = ilk_wm_max_level(dev_priv);
b048a00b 4897 struct skl_wm_level *result_prev = &levels[0];
a62163e9 4898
d2f5e36d 4899 for (level = 0; level <= max_level; level++) {
b048a00b 4900 struct skl_wm_level *result = &levels[level];
d2f5e36d 4901
67155a69 4902 skl_compute_plane_wm(cstate, level, wm_params,
d8e87498 4903 result_prev, result);
b048a00b
ML
4904
4905 result_prev = result;
d2f5e36d 4906 }
2d41c0b5
PB
4907}
4908
5ce9a649 4909static u32
b048a00b 4910skl_compute_linetime_wm(const struct intel_crtc_state *cstate)
407b50f3 4911{
a3a8986c
MK
4912 struct drm_atomic_state *state = cstate->base.state;
4913 struct drm_i915_private *dev_priv = to_i915(state->dev);
d555cb58 4914 uint_fixed_16_16_t linetime_us;
5ce9a649 4915 u32 linetime_wm;
30d1b5fe 4916
d555cb58 4917 linetime_us = intel_get_linetime_us(cstate);
eac2cb81 4918 linetime_wm = fixed16_to_u32_round_up(mul_u32_fixed16(8, linetime_us));
a3a8986c 4919
717671c6
VS
4920 /* Display WA #1135: BXT:ALL GLK:ALL */
4921 if (IS_GEN9_LP(dev_priv) && dev_priv->ipc_enabled)
446e850c 4922 linetime_wm /= 2;
a3a8986c
MK
4923
4924 return linetime_wm;
407b50f3
DL
4925}
4926
b048a00b 4927static void skl_compute_transition_wm(const struct intel_crtc_state *cstate,
6a3c910b 4928 const struct skl_wm_params *wp,
d8e87498 4929 struct skl_plane_wm *wm)
407b50f3 4930{
ca47667f
KM
4931 struct drm_device *dev = cstate->base.crtc->dev;
4932 const struct drm_i915_private *dev_priv = to_i915(dev);
5ce9a649
JN
4933 u16 trans_min, trans_y_tile_min;
4934 const u16 trans_amount = 10; /* This is configurable amount */
4935 u16 wm0_sel_res_b, trans_offset_b, res_blocks;
ca47667f 4936
ca47667f
KM
4937 /* Transition WM are not recommended by HW team for GEN9 */
4938 if (INTEL_GEN(dev_priv) <= 9)
14a43062 4939 return;
ca47667f
KM
4940
4941 /* Transition WM don't make any sense if ipc is disabled */
4942 if (!dev_priv->ipc_enabled)
14a43062 4943 return;
ca47667f 4944
91961a85
PZ
4945 trans_min = 14;
4946 if (INTEL_GEN(dev_priv) >= 11)
ca47667f
KM
4947 trans_min = 4;
4948
4949 trans_offset_b = trans_min + trans_amount;
4950
cbacc79d
PZ
4951 /*
4952 * The spec asks for Selected Result Blocks for wm0 (the real value),
4953 * not Result Blocks (the integer value). Pay attention to the capital
4954 * letters. The value wm_l0->plane_res_b is actually Result Blocks, but
4955 * since Result Blocks is the ceiling of Selected Result Blocks plus 1,
4956 * and since we later will have to get the ceiling of the sum in the
4957 * transition watermarks calculation, we can just pretend Selected
4958 * Result Blocks is Result Blocks minus 1 and it should work for the
4959 * current platforms.
4960 */
6a3c910b 4961 wm0_sel_res_b = wm->wm[0].plane_res_b - 1;
cbacc79d 4962
ca47667f 4963 if (wp->y_tiled) {
5ce9a649
JN
4964 trans_y_tile_min =
4965 (u16)mul_round_up_u32_fixed16(2, wp->y_tile_minimum);
cbacc79d 4966 res_blocks = max(wm0_sel_res_b, trans_y_tile_min) +
ca47667f
KM
4967 trans_offset_b;
4968 } else {
cbacc79d 4969 res_blocks = wm0_sel_res_b + trans_offset_b;
ca47667f
KM
4970
4971 /* WA BUG:1938466 add one block for non y-tile planes */
4972 if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_A0))
4973 res_blocks += 1;
4974
4975 }
4976
d8e87498
MR
4977 /*
4978 * Just assume we can enable the transition watermark. After
4979 * computing the DDB we'll come back and disable it if that
4980 * assumption turns out to be false.
4981 */
4982 wm->trans_wm.plane_res_b = res_blocks + 1;
4983 wm->trans_wm.plane_en = true;
407b50f3
DL
4984}
4985
ff43bc37 4986static int skl_build_plane_wm_single(struct intel_crtc_state *crtc_state,
8315847b
VS
4987 const struct intel_plane_state *plane_state,
4988 enum plane_id plane_id, int color_plane)
b048a00b 4989{
8315847b 4990 struct skl_plane_wm *wm = &crtc_state->wm.skl.optimal.planes[plane_id];
b048a00b 4991 struct skl_wm_params wm_params;
b048a00b
ML
4992 int ret;
4993
51de9c6d 4994 ret = skl_compute_plane_wm_params(crtc_state, plane_state,
b048a00b
ML
4995 &wm_params, color_plane);
4996 if (ret)
4997 return ret;
4998
67155a69 4999 skl_compute_wm_levels(crtc_state, &wm_params, wm->wm);
d8e87498 5000 skl_compute_transition_wm(crtc_state, &wm_params, wm);
b048a00b
ML
5001
5002 return 0;
5003}
5004
ff43bc37 5005static int skl_build_plane_wm_uv(struct intel_crtc_state *crtc_state,
8315847b
VS
5006 const struct intel_plane_state *plane_state,
5007 enum plane_id plane_id)
b048a00b 5008{
8315847b 5009 struct skl_plane_wm *wm = &crtc_state->wm.skl.optimal.planes[plane_id];
b048a00b 5010 struct skl_wm_params wm_params;
b048a00b
ML
5011 int ret;
5012
8315847b 5013 wm->is_planar = true;
b048a00b
ML
5014
5015 /* uv plane watermarks must also be validated for NV12/Planar */
51de9c6d 5016 ret = skl_compute_plane_wm_params(crtc_state, plane_state,
8315847b
VS
5017 &wm_params, 1);
5018 if (ret)
5019 return ret;
b048a00b 5020
67155a69 5021 skl_compute_wm_levels(crtc_state, &wm_params, wm->uv_wm);
b048a00b 5022
8315847b 5023 return 0;
b048a00b
ML
5024}
5025
96cb7cde 5026static int skl_build_plane_wm(struct intel_crtc_state *crtc_state,
8315847b 5027 const struct intel_plane_state *plane_state)
b048a00b 5028{
8315847b
VS
5029 struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
5030 const struct drm_framebuffer *fb = plane_state->base.fb;
5031 enum plane_id plane_id = plane->id;
b048a00b 5032 int ret;
b048a00b 5033
8315847b
VS
5034 if (!intel_wm_plane_visible(crtc_state, plane_state))
5035 return 0;
5036
ff43bc37 5037 ret = skl_build_plane_wm_single(crtc_state, plane_state,
8315847b 5038 plane_id, 0);
b048a00b
ML
5039 if (ret)
5040 return ret;
5041
8315847b 5042 if (fb->format->is_yuv && fb->format->num_planes > 1) {
ff43bc37 5043 ret = skl_build_plane_wm_uv(crtc_state, plane_state,
8315847b
VS
5044 plane_id);
5045 if (ret)
5046 return ret;
5047 }
5048
5049 return 0;
5050}
5051
96cb7cde 5052static int icl_build_plane_wm(struct intel_crtc_state *crtc_state,
8315847b
VS
5053 const struct intel_plane_state *plane_state)
5054{
5055 enum plane_id plane_id = to_intel_plane(plane_state->base.plane)->id;
5056 int ret;
5057
5058 /* Watermarks calculated in master */
5059 if (plane_state->slave)
5060 return 0;
5061
5062 if (plane_state->linked_plane) {
5063 const struct drm_framebuffer *fb = plane_state->base.fb;
5064 enum plane_id y_plane_id = plane_state->linked_plane->id;
5065
5066 WARN_ON(!intel_wm_plane_visible(crtc_state, plane_state));
5067 WARN_ON(!fb->format->is_yuv ||
5068 fb->format->num_planes == 1);
5069
ff43bc37 5070 ret = skl_build_plane_wm_single(crtc_state, plane_state,
8315847b
VS
5071 y_plane_id, 0);
5072 if (ret)
5073 return ret;
5074
ff43bc37 5075 ret = skl_build_plane_wm_single(crtc_state, plane_state,
8315847b
VS
5076 plane_id, 1);
5077 if (ret)
5078 return ret;
5079 } else if (intel_wm_plane_visible(crtc_state, plane_state)) {
ff43bc37 5080 ret = skl_build_plane_wm_single(crtc_state, plane_state,
8315847b
VS
5081 plane_id, 0);
5082 if (ret)
5083 return ret;
5084 }
5085
5086 return 0;
b048a00b
ML
5087}
5088
96cb7cde 5089static int skl_build_pipe_wm(struct intel_crtc_state *cstate)
2d41c0b5 5090{
8315847b 5091 struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev);
96cb7cde 5092 struct skl_pipe_wm *pipe_wm = &cstate->wm.skl.optimal;
eb2fdcdf 5093 struct drm_crtc_state *crtc_state = &cstate->base;
eb2fdcdf
KM
5094 struct drm_plane *plane;
5095 const struct drm_plane_state *pstate;
55994c2c 5096 int ret;
2d41c0b5 5097
a62163e9
L
5098 /*
5099 * We'll only calculate watermarks for planes that are actually
5100 * enabled, so make sure all other planes are set as disabled.
5101 */
5102 memset(pipe_wm->planes, 0, sizeof(pipe_wm->planes));
5103
eb2fdcdf
KM
5104 drm_atomic_crtc_state_for_each_plane_state(plane, pstate, crtc_state) {
5105 const struct intel_plane_state *intel_pstate =
5106 to_intel_plane_state(pstate);
eb2fdcdf 5107
8315847b 5108 if (INTEL_GEN(dev_priv) >= 11)
96cb7cde 5109 ret = icl_build_plane_wm(cstate, intel_pstate);
b048a00b 5110 else
96cb7cde 5111 ret = skl_build_plane_wm(cstate, intel_pstate);
d2f5e36d
KM
5112 if (ret)
5113 return ret;
2d41c0b5 5114 }
942aa2d0 5115
024c9045 5116 pipe_wm->linetime = skl_compute_linetime_wm(cstate);
2d41c0b5 5117
55994c2c 5118 return 0;
2d41c0b5
PB
5119}
5120
f0f59a00
VS
5121static void skl_ddb_entry_write(struct drm_i915_private *dev_priv,
5122 i915_reg_t reg,
16160e3d
DL
5123 const struct skl_ddb_entry *entry)
5124{
5125 if (entry->end)
ff43bc37 5126 I915_WRITE_FW(reg, (entry->end - 1) << 16 | entry->start);
16160e3d 5127 else
ff43bc37 5128 I915_WRITE_FW(reg, 0);
16160e3d
DL
5129}
5130
d8c0fafc 5131static void skl_write_wm_level(struct drm_i915_private *dev_priv,
5132 i915_reg_t reg,
5133 const struct skl_wm_level *level)
5134{
5ce9a649 5135 u32 val = 0;
d8c0fafc 5136
2ed8e1f5 5137 if (level->plane_en)
d8c0fafc 5138 val |= PLANE_WM_EN;
2ed8e1f5
VS
5139 if (level->ignore_lines)
5140 val |= PLANE_WM_IGNORE_LINES;
5141 val |= level->plane_res_b;
5142 val |= level->plane_res_l << PLANE_WM_LINES_SHIFT;
d8c0fafc 5143
ff43bc37 5144 I915_WRITE_FW(reg, val);
d8c0fafc 5145}
5146
ff43bc37
VS
5147void skl_write_plane_wm(struct intel_plane *plane,
5148 const struct intel_crtc_state *crtc_state)
62e0fb88 5149{
ff43bc37 5150 struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
5db94019 5151 int level, max_level = ilk_wm_max_level(dev_priv);
ff43bc37
VS
5152 enum plane_id plane_id = plane->id;
5153 enum pipe pipe = plane->pipe;
5154 const struct skl_plane_wm *wm =
5155 &crtc_state->wm.skl.optimal.planes[plane_id];
5156 const struct skl_ddb_entry *ddb_y =
5157 &crtc_state->wm.skl.plane_ddb_y[plane_id];
5158 const struct skl_ddb_entry *ddb_uv =
5159 &crtc_state->wm.skl.plane_ddb_uv[plane_id];
62e0fb88
L
5160
5161 for (level = 0; level <= max_level; level++) {
d5cdfdf5 5162 skl_write_wm_level(dev_priv, PLANE_WM(pipe, plane_id, level),
d8c0fafc 5163 &wm->wm[level]);
62e0fb88 5164 }
d5cdfdf5 5165 skl_write_wm_level(dev_priv, PLANE_WM_TRANS(pipe, plane_id),
d8c0fafc 5166 &wm->trans_wm);
27082493 5167
ff43bc37 5168 if (INTEL_GEN(dev_priv) >= 11) {
234059da 5169 skl_ddb_entry_write(dev_priv,
ff43bc37
VS
5170 PLANE_BUF_CFG(pipe, plane_id), ddb_y);
5171 return;
b879d58f 5172 }
ff43bc37
VS
5173
5174 if (wm->is_planar)
5175 swap(ddb_y, ddb_uv);
5176
5177 skl_ddb_entry_write(dev_priv,
5178 PLANE_BUF_CFG(pipe, plane_id), ddb_y);
5179 skl_ddb_entry_write(dev_priv,
5180 PLANE_NV12_BUF_CFG(pipe, plane_id), ddb_uv);
62e0fb88
L
5181}
5182
ff43bc37
VS
5183void skl_write_cursor_wm(struct intel_plane *plane,
5184 const struct intel_crtc_state *crtc_state)
62e0fb88 5185{
ff43bc37 5186 struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
5db94019 5187 int level, max_level = ilk_wm_max_level(dev_priv);
ff43bc37
VS
5188 enum plane_id plane_id = plane->id;
5189 enum pipe pipe = plane->pipe;
5190 const struct skl_plane_wm *wm =
5191 &crtc_state->wm.skl.optimal.planes[plane_id];
5192 const struct skl_ddb_entry *ddb =
5193 &crtc_state->wm.skl.plane_ddb_y[plane_id];
62e0fb88
L
5194
5195 for (level = 0; level <= max_level; level++) {
d8c0fafc 5196 skl_write_wm_level(dev_priv, CUR_WM(pipe, level),
5197 &wm->wm[level]);
62e0fb88 5198 }
d8c0fafc 5199 skl_write_wm_level(dev_priv, CUR_WM_TRANS(pipe), &wm->trans_wm);
5d374d96 5200
ff43bc37 5201 skl_ddb_entry_write(dev_priv, CUR_BUF_CFG(pipe), ddb);
2d41c0b5
PB
5202}
5203
45ece230 5204bool skl_wm_level_equals(const struct skl_wm_level *l1,
5205 const struct skl_wm_level *l2)
5206{
ff43bc37 5207 return l1->plane_en == l2->plane_en &&
2ed8e1f5 5208 l1->ignore_lines == l2->ignore_lines &&
ff43bc37
VS
5209 l1->plane_res_l == l2->plane_res_l &&
5210 l1->plane_res_b == l2->plane_res_b;
5211}
45ece230 5212
ff43bc37
VS
5213static bool skl_plane_wm_equals(struct drm_i915_private *dev_priv,
5214 const struct skl_plane_wm *wm1,
5215 const struct skl_plane_wm *wm2)
5216{
5217 int level, max_level = ilk_wm_max_level(dev_priv);
45ece230 5218
ff43bc37
VS
5219 for (level = 0; level <= max_level; level++) {
5220 if (!skl_wm_level_equals(&wm1->wm[level], &wm2->wm[level]) ||
5221 !skl_wm_level_equals(&wm1->uv_wm[level], &wm2->uv_wm[level]))
5222 return false;
5223 }
5224
5225 return skl_wm_level_equals(&wm1->trans_wm, &wm2->trans_wm);
45ece230 5226}
5227
961d95e0
VS
5228static bool skl_pipe_wm_equals(struct intel_crtc *crtc,
5229 const struct skl_pipe_wm *wm1,
5230 const struct skl_pipe_wm *wm2)
5231{
5232 struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
5233 enum plane_id plane_id;
5234
5235 for_each_plane_id_on_crtc(crtc, plane_id) {
5236 if (!skl_plane_wm_equals(dev_priv,
5237 &wm1->planes[plane_id],
5238 &wm2->planes[plane_id]))
5239 return false;
5240 }
5241
5242 return wm1->linetime == wm2->linetime;
5243}
5244
27082493
L
5245static inline bool skl_ddb_entries_overlap(const struct skl_ddb_entry *a,
5246 const struct skl_ddb_entry *b)
0e8fb7ba 5247{
27082493 5248 return a->start < b->end && b->start < a->end;
0e8fb7ba
DL
5249}
5250
53cc6880 5251bool skl_ddb_allocation_overlaps(const struct skl_ddb_entry *ddb,
696173b0 5252 const struct skl_ddb_entry *entries,
53cc6880 5253 int num_entries, int ignore_idx)
0e8fb7ba 5254{
53cc6880 5255 int i;
0e8fb7ba 5256
53cc6880
VS
5257 for (i = 0; i < num_entries; i++) {
5258 if (i != ignore_idx &&
5259 skl_ddb_entries_overlap(ddb, &entries[i]))
27082493 5260 return true;
2b68504b 5261 }
0e8fb7ba 5262
27082493 5263 return false;
0e8fb7ba
DL
5264}
5265
5ce9a649 5266static u32
cd1d3ee9 5267pipes_modified(struct intel_atomic_state *state)
9b613022 5268{
cd1d3ee9
MR
5269 struct intel_crtc *crtc;
5270 struct intel_crtc_state *cstate;
5ce9a649 5271 u32 i, ret = 0;
9b613022 5272
cd1d3ee9
MR
5273 for_each_new_intel_crtc_in_state(state, crtc, cstate, i)
5274 ret |= drm_crtc_mask(&crtc->base);
9b613022
MR
5275
5276 return ret;
5277}
5278
bb7791bd 5279static int
ff43bc37
VS
5280skl_ddb_add_affected_planes(const struct intel_crtc_state *old_crtc_state,
5281 struct intel_crtc_state *new_crtc_state)
9a30a261 5282{
ff43bc37
VS
5283 struct intel_atomic_state *state = to_intel_atomic_state(new_crtc_state->base.state);
5284 struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc);
5285 struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
5286 struct intel_plane *plane;
9a30a261 5287
ff43bc37
VS
5288 for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) {
5289 struct intel_plane_state *plane_state;
5290 enum plane_id plane_id = plane->id;
9a30a261 5291
ff43bc37
VS
5292 if (skl_ddb_entry_equal(&old_crtc_state->wm.skl.plane_ddb_y[plane_id],
5293 &new_crtc_state->wm.skl.plane_ddb_y[plane_id]) &&
5294 skl_ddb_entry_equal(&old_crtc_state->wm.skl.plane_ddb_uv[plane_id],
5295 &new_crtc_state->wm.skl.plane_ddb_uv[plane_id]))
9a30a261
RV
5296 continue;
5297
ff43bc37 5298 plane_state = intel_atomic_get_plane_state(state, plane);
9a30a261
RV
5299 if (IS_ERR(plane_state))
5300 return PTR_ERR(plane_state);
1ab554b0 5301
ff43bc37 5302 new_crtc_state->update_planes |= BIT(plane_id);
9a30a261
RV
5303 }
5304
5305 return 0;
5306}
5307
5308static int
cd1d3ee9 5309skl_compute_ddb(struct intel_atomic_state *state)
98d39494 5310{
cd1d3ee9
MR
5311 const struct drm_i915_private *dev_priv = to_i915(state->base.dev);
5312 struct skl_ddb_allocation *ddb = &state->wm_results.ddb;
ff43bc37
VS
5313 struct intel_crtc_state *old_crtc_state;
5314 struct intel_crtc_state *new_crtc_state;
e1f96a66 5315 struct intel_crtc *crtc;
e1f96a66 5316 int ret, i;
98d39494 5317
5a920b85
PZ
5318 memcpy(ddb, &dev_priv->wm.skl_hw.ddb, sizeof(*ddb));
5319
cd1d3ee9 5320 for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
ff43bc37
VS
5321 new_crtc_state, i) {
5322 ret = skl_allocate_pipe_ddb(new_crtc_state, ddb);
9a30a261
RV
5323 if (ret)
5324 return ret;
5325
ff43bc37
VS
5326 ret = skl_ddb_add_affected_planes(old_crtc_state,
5327 new_crtc_state);
9a30a261
RV
5328 if (ret)
5329 return ret;
98d39494
MR
5330 }
5331
5332 return 0;
5333}
5334
ab98e944
VS
5335static char enast(bool enable)
5336{
5337 return enable ? '*' : ' ';
5338}
5339
413fc530 5340static void
ff43bc37 5341skl_print_wm_changes(struct intel_atomic_state *state)
413fc530 5342{
ff43bc37
VS
5343 struct drm_i915_private *dev_priv = to_i915(state->base.dev);
5344 const struct intel_crtc_state *old_crtc_state;
5345 const struct intel_crtc_state *new_crtc_state;
5346 struct intel_plane *plane;
5347 struct intel_crtc *crtc;
7570498e 5348 int i;
413fc530 5349
ab98e944
VS
5350 if ((drm_debug & DRM_UT_KMS) == 0)
5351 return;
5352
ff43bc37
VS
5353 for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
5354 new_crtc_state, i) {
ab98e944
VS
5355 const struct skl_pipe_wm *old_pipe_wm, *new_pipe_wm;
5356
5357 old_pipe_wm = &old_crtc_state->wm.skl.optimal;
5358 new_pipe_wm = &new_crtc_state->wm.skl.optimal;
5359
ff43bc37
VS
5360 for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) {
5361 enum plane_id plane_id = plane->id;
413fc530 5362 const struct skl_ddb_entry *old, *new;
5363
ff43bc37
VS
5364 old = &old_crtc_state->wm.skl.plane_ddb_y[plane_id];
5365 new = &new_crtc_state->wm.skl.plane_ddb_y[plane_id];
413fc530 5366
413fc530 5367 if (skl_ddb_entry_equal(old, new))
5368 continue;
5369
ab98e944
VS
5370 DRM_DEBUG_KMS("[PLANE:%d:%s] ddb (%4d - %4d) -> (%4d - %4d), size %4d -> %4d\n",
5371 plane->base.base.id, plane->base.name,
5372 old->start, old->end, new->start, new->end,
5373 skl_ddb_entry_size(old), skl_ddb_entry_size(new));
5374 }
5375
5376 for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) {
5377 enum plane_id plane_id = plane->id;
5378 const struct skl_plane_wm *old_wm, *new_wm;
5379
5380 old_wm = &old_pipe_wm->planes[plane_id];
5381 new_wm = &new_pipe_wm->planes[plane_id];
5382
5383 if (skl_plane_wm_equals(dev_priv, old_wm, new_wm))
5384 continue;
5385
5386 DRM_DEBUG_KMS("[PLANE:%d:%s] level %cwm0,%cwm1,%cwm2,%cwm3,%cwm4,%cwm5,%cwm6,%cwm7,%ctwm"
5387 " -> %cwm0,%cwm1,%cwm2,%cwm3,%cwm4,%cwm5,%cwm6,%cwm7,%ctwm\n",
5388 plane->base.base.id, plane->base.name,
5389 enast(old_wm->wm[0].plane_en), enast(old_wm->wm[1].plane_en),
5390 enast(old_wm->wm[2].plane_en), enast(old_wm->wm[3].plane_en),
5391 enast(old_wm->wm[4].plane_en), enast(old_wm->wm[5].plane_en),
5392 enast(old_wm->wm[6].plane_en), enast(old_wm->wm[7].plane_en),
5393 enast(old_wm->trans_wm.plane_en),
5394 enast(new_wm->wm[0].plane_en), enast(new_wm->wm[1].plane_en),
5395 enast(new_wm->wm[2].plane_en), enast(new_wm->wm[3].plane_en),
5396 enast(new_wm->wm[4].plane_en), enast(new_wm->wm[5].plane_en),
5397 enast(new_wm->wm[6].plane_en), enast(new_wm->wm[7].plane_en),
5398 enast(new_wm->trans_wm.plane_en));
5399
2ed8e1f5
VS
5400 DRM_DEBUG_KMS("[PLANE:%d:%s] lines %c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d"
5401 " -> %c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d\n",
ab98e944 5402 plane->base.base.id, plane->base.name,
2ed8e1f5
VS
5403 enast(old_wm->wm[0].ignore_lines), old_wm->wm[0].plane_res_l,
5404 enast(old_wm->wm[1].ignore_lines), old_wm->wm[1].plane_res_l,
5405 enast(old_wm->wm[2].ignore_lines), old_wm->wm[2].plane_res_l,
5406 enast(old_wm->wm[3].ignore_lines), old_wm->wm[3].plane_res_l,
5407 enast(old_wm->wm[4].ignore_lines), old_wm->wm[4].plane_res_l,
5408 enast(old_wm->wm[5].ignore_lines), old_wm->wm[5].plane_res_l,
5409 enast(old_wm->wm[6].ignore_lines), old_wm->wm[6].plane_res_l,
5410 enast(old_wm->wm[7].ignore_lines), old_wm->wm[7].plane_res_l,
5411 enast(old_wm->trans_wm.ignore_lines), old_wm->trans_wm.plane_res_l,
5412
5413 enast(new_wm->wm[0].ignore_lines), new_wm->wm[0].plane_res_l,
5414 enast(new_wm->wm[1].ignore_lines), new_wm->wm[1].plane_res_l,
5415 enast(new_wm->wm[2].ignore_lines), new_wm->wm[2].plane_res_l,
5416 enast(new_wm->wm[3].ignore_lines), new_wm->wm[3].plane_res_l,
5417 enast(new_wm->wm[4].ignore_lines), new_wm->wm[4].plane_res_l,
5418 enast(new_wm->wm[5].ignore_lines), new_wm->wm[5].plane_res_l,
5419 enast(new_wm->wm[6].ignore_lines), new_wm->wm[6].plane_res_l,
5420 enast(new_wm->wm[7].ignore_lines), new_wm->wm[7].plane_res_l,
5421 enast(new_wm->trans_wm.ignore_lines), new_wm->trans_wm.plane_res_l);
ab98e944
VS
5422
5423 DRM_DEBUG_KMS("[PLANE:%d:%s] blocks %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d"
5424 " -> %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d\n",
5425 plane->base.base.id, plane->base.name,
5426 old_wm->wm[0].plane_res_b, old_wm->wm[1].plane_res_b,
5427 old_wm->wm[2].plane_res_b, old_wm->wm[3].plane_res_b,
5428 old_wm->wm[4].plane_res_b, old_wm->wm[5].plane_res_b,
5429 old_wm->wm[6].plane_res_b, old_wm->wm[7].plane_res_b,
5430 old_wm->trans_wm.plane_res_b,
5431 new_wm->wm[0].plane_res_b, new_wm->wm[1].plane_res_b,
5432 new_wm->wm[2].plane_res_b, new_wm->wm[3].plane_res_b,
5433 new_wm->wm[4].plane_res_b, new_wm->wm[5].plane_res_b,
5434 new_wm->wm[6].plane_res_b, new_wm->wm[7].plane_res_b,
5435 new_wm->trans_wm.plane_res_b);
5436
5437 DRM_DEBUG_KMS("[PLANE:%d:%s] min_ddb %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d"
5438 " -> %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d\n",
ff43bc37 5439 plane->base.base.id, plane->base.name,
ab98e944
VS
5440 old_wm->wm[0].min_ddb_alloc, old_wm->wm[1].min_ddb_alloc,
5441 old_wm->wm[2].min_ddb_alloc, old_wm->wm[3].min_ddb_alloc,
5442 old_wm->wm[4].min_ddb_alloc, old_wm->wm[5].min_ddb_alloc,
5443 old_wm->wm[6].min_ddb_alloc, old_wm->wm[7].min_ddb_alloc,
5444 old_wm->trans_wm.min_ddb_alloc,
5445 new_wm->wm[0].min_ddb_alloc, new_wm->wm[1].min_ddb_alloc,
5446 new_wm->wm[2].min_ddb_alloc, new_wm->wm[3].min_ddb_alloc,
5447 new_wm->wm[4].min_ddb_alloc, new_wm->wm[5].min_ddb_alloc,
5448 new_wm->wm[6].min_ddb_alloc, new_wm->wm[7].min_ddb_alloc,
5449 new_wm->trans_wm.min_ddb_alloc);
413fc530 5450 }
5451 }
5452}
5453
98d39494 5454static int
cd1d3ee9 5455skl_ddb_add_affected_pipes(struct intel_atomic_state *state, bool *changed)
98d39494 5456{
cd1d3ee9 5457 struct drm_device *dev = state->base.dev;
e1f96a66 5458 const struct drm_i915_private *dev_priv = to_i915(dev);
cd1d3ee9
MR
5459 struct intel_crtc *crtc;
5460 struct intel_crtc_state *crtc_state;
5ce9a649 5461 u32 realloc_pipes = pipes_modified(state);
734fa01f 5462 int ret, i;
98d39494 5463
367d73d2
ML
5464 /*
5465 * When we distrust bios wm we always need to recompute to set the
5466 * expected DDB allocations for each CRTC.
5467 */
e1f96a66
MK
5468 if (dev_priv->wm.distrust_bios_wm)
5469 (*changed) = true;
367d73d2 5470
98d39494
MR
5471 /*
5472 * If this transaction isn't actually touching any CRTC's, don't
5473 * bother with watermark calculation. Note that if we pass this
5474 * test, we're guaranteed to hold at least one CRTC state mutex,
5475 * which means we can safely use values like dev_priv->active_crtcs
5476 * since any racing commits that want to update them would need to
5477 * hold _all_ CRTC state mutexes.
5478 */
cd1d3ee9 5479 for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i)
e1f96a66 5480 (*changed) = true;
367d73d2 5481
e1f96a66 5482 if (!*changed)
98d39494
MR
5483 return 0;
5484
e1f96a66
MK
5485 /*
5486 * If this is our first atomic update following hardware readout,
5487 * we can't trust the DDB that the BIOS programmed for us. Let's
5488 * pretend that all pipes switched active status so that we'll
5489 * ensure a full DDB recompute.
5490 */
5491 if (dev_priv->wm.distrust_bios_wm) {
5492 ret = drm_modeset_lock(&dev->mode_config.connection_mutex,
cd1d3ee9 5493 state->base.acquire_ctx);
e1f96a66
MK
5494 if (ret)
5495 return ret;
5496
cd1d3ee9 5497 state->active_pipe_changes = ~0;
e1f96a66
MK
5498
5499 /*
cd1d3ee9 5500 * We usually only initialize state->active_crtcs if we
e1f96a66
MK
5501 * we're doing a modeset; make sure this field is always
5502 * initialized during the sanitization process that happens
5503 * on the first commit too.
5504 */
cd1d3ee9
MR
5505 if (!state->modeset)
5506 state->active_crtcs = dev_priv->active_crtcs;
e1f96a66
MK
5507 }
5508
5509 /*
5510 * If the modeset changes which CRTC's are active, we need to
5511 * recompute the DDB allocation for *all* active pipes, even
5512 * those that weren't otherwise being modified in any way by this
5513 * atomic commit. Due to the shrinking of the per-pipe allocations
5514 * when new active CRTC's are added, it's possible for a pipe that
5515 * we were already using and aren't changing at all here to suddenly
5516 * become invalid if its DDB needs exceeds its new allocation.
5517 *
5518 * Note that if we wind up doing a full DDB recompute, we can't let
5519 * any other display updates race with this transaction, so we need
5520 * to grab the lock on *all* CRTC's.
5521 */
cd1d3ee9 5522 if (state->active_pipe_changes || state->modeset) {
e1f96a66 5523 realloc_pipes = ~0;
cd1d3ee9 5524 state->wm_results.dirty_pipes = ~0;
e1f96a66
MK
5525 }
5526
5527 /*
5528 * We're not recomputing for the pipes not included in the commit, so
5529 * make sure we start with the current state.
5530 */
cd1d3ee9
MR
5531 for_each_intel_crtc_mask(dev, crtc, realloc_pipes) {
5532 crtc_state = intel_atomic_get_crtc_state(&state->base, crtc);
5533 if (IS_ERR(crtc_state))
5534 return PTR_ERR(crtc_state);
e1f96a66
MK
5535 }
5536
5537 return 0;
5538}
5539
ff43bc37
VS
5540/*
5541 * To make sure the cursor watermark registers are always consistent
5542 * with our computed state the following scenario needs special
5543 * treatment:
5544 *
5545 * 1. enable cursor
5546 * 2. move cursor entirely offscreen
5547 * 3. disable cursor
5548 *
5549 * Step 2. does call .disable_plane() but does not zero the watermarks
5550 * (since we consider an offscreen cursor still active for the purposes
5551 * of watermarks). Step 3. would not normally call .disable_plane()
5552 * because the actual plane visibility isn't changing, and we don't
5553 * deallocate the cursor ddb until the pipe gets disabled. So we must
5554 * force step 3. to call .disable_plane() to update the watermark
5555 * registers properly.
5556 *
5557 * Other planes do not suffer from this issues as their watermarks are
5558 * calculated based on the actual plane visibility. The only time this
5559 * can trigger for the other planes is during the initial readout as the
5560 * default value of the watermarks registers is not zero.
5561 */
5562static int skl_wm_add_affected_planes(struct intel_atomic_state *state,
5563 struct intel_crtc *crtc)
5564{
5565 struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
5566 const struct intel_crtc_state *old_crtc_state =
5567 intel_atomic_get_old_crtc_state(state, crtc);
5568 struct intel_crtc_state *new_crtc_state =
5569 intel_atomic_get_new_crtc_state(state, crtc);
5570 struct intel_plane *plane;
5571
5572 for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) {
5573 struct intel_plane_state *plane_state;
5574 enum plane_id plane_id = plane->id;
5575
5576 /*
5577 * Force a full wm update for every plane on modeset.
5578 * Required because the reset value of the wm registers
5579 * is non-zero, whereas we want all disabled planes to
5580 * have zero watermarks. So if we turn off the relevant
5581 * power well the hardware state will go out of sync
5582 * with the software state.
5583 */
5584 if (!drm_atomic_crtc_needs_modeset(&new_crtc_state->base) &&
5585 skl_plane_wm_equals(dev_priv,
5586 &old_crtc_state->wm.skl.optimal.planes[plane_id],
5587 &new_crtc_state->wm.skl.optimal.planes[plane_id]))
5588 continue;
5589
5590 plane_state = intel_atomic_get_plane_state(state, plane);
5591 if (IS_ERR(plane_state))
5592 return PTR_ERR(plane_state);
5593
5594 new_crtc_state->update_planes |= BIT(plane_id);
5595 }
5596
5597 return 0;
5598}
5599
e1f96a66 5600static int
cd1d3ee9 5601skl_compute_wm(struct intel_atomic_state *state)
e1f96a66 5602{
cd1d3ee9 5603 struct intel_crtc *crtc;
8cac9fd9 5604 struct intel_crtc_state *new_crtc_state;
cd1d3ee9
MR
5605 struct intel_crtc_state *old_crtc_state;
5606 struct skl_ddb_values *results = &state->wm_results;
e1f96a66
MK
5607 bool changed = false;
5608 int ret, i;
5609
734fa01f
MR
5610 /* Clear all dirty flags */
5611 results->dirty_pipes = 0;
5612
e1f96a66
MK
5613 ret = skl_ddb_add_affected_pipes(state, &changed);
5614 if (ret || !changed)
5615 return ret;
5616
734fa01f
MR
5617 /*
5618 * Calculate WM's for all pipes that are part of this transaction.
d8e87498 5619 * Note that skl_ddb_add_affected_pipes may have added more CRTC's that
734fa01f
MR
5620 * weren't otherwise being modified (and set bits in dirty_pipes) if
5621 * pipe allocations had to change.
734fa01f 5622 */
cd1d3ee9 5623 for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
8cac9fd9
VS
5624 new_crtc_state, i) {
5625 ret = skl_build_pipe_wm(new_crtc_state);
ff43bc37
VS
5626 if (ret)
5627 return ret;
5628
cd1d3ee9 5629 ret = skl_wm_add_affected_planes(state, crtc);
734fa01f
MR
5630 if (ret)
5631 return ret;
5632
8cac9fd9
VS
5633 if (!skl_pipe_wm_equals(crtc,
5634 &old_crtc_state->wm.skl.optimal,
5635 &new_crtc_state->wm.skl.optimal))
cd1d3ee9 5636 results->dirty_pipes |= drm_crtc_mask(&crtc->base);
734fa01f
MR
5637 }
5638
d8e87498
MR
5639 ret = skl_compute_ddb(state);
5640 if (ret)
5641 return ret;
5642
cd1d3ee9 5643 skl_print_wm_changes(state);
413fc530 5644
98d39494
MR
5645 return 0;
5646}
5647
ccf010fb
ML
5648static void skl_atomic_update_crtc_wm(struct intel_atomic_state *state,
5649 struct intel_crtc_state *cstate)
5650{
5651 struct intel_crtc *crtc = to_intel_crtc(cstate->base.crtc);
5652 struct drm_i915_private *dev_priv = to_i915(state->base.dev);
5653 struct skl_pipe_wm *pipe_wm = &cstate->wm.skl.optimal;
5654 enum pipe pipe = crtc->pipe;
e62929b3
ML
5655
5656 if (!(state->wm_results.dirty_pipes & drm_crtc_mask(&crtc->base)))
5657 return;
ccf010fb
ML
5658
5659 I915_WRITE(PIPE_WM_LINETIME(pipe), pipe_wm->linetime);
5660}
5661
e62929b3
ML
5662static void skl_initial_wm(struct intel_atomic_state *state,
5663 struct intel_crtc_state *cstate)
2d41c0b5 5664{
e62929b3 5665 struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
432081bc 5666 struct drm_device *dev = intel_crtc->base.dev;
fac5e23e 5667 struct drm_i915_private *dev_priv = to_i915(dev);
60f8e873 5668 struct skl_ddb_values *results = &state->wm_results;
adda50b8 5669
432081bc 5670 if ((results->dirty_pipes & drm_crtc_mask(&intel_crtc->base)) == 0)
2d41c0b5
PB
5671 return;
5672
734fa01f 5673 mutex_lock(&dev_priv->wm.wm_mutex);
2d41c0b5 5674
e62929b3
ML
5675 if (cstate->base.active_changed)
5676 skl_atomic_update_crtc_wm(state, cstate);
27082493 5677
734fa01f 5678 mutex_unlock(&dev_priv->wm.wm_mutex);
2d41c0b5
PB
5679}
5680
cd1d3ee9 5681static void ilk_compute_wm_config(struct drm_i915_private *dev_priv,
d890565c
VS
5682 struct intel_wm_config *config)
5683{
5684 struct intel_crtc *crtc;
5685
5686 /* Compute the currently _active_ config */
cd1d3ee9 5687 for_each_intel_crtc(&dev_priv->drm, crtc) {
d890565c
VS
5688 const struct intel_pipe_wm *wm = &crtc->wm.active.ilk;
5689
5690 if (!wm->pipe_enabled)
5691 continue;
5692
5693 config->sprites_enabled |= wm->sprites_enabled;
5694 config->sprites_scaled |= wm->sprites_scaled;
5695 config->num_pipes_active++;
5696 }
5697}
5698
ed4a6a7c 5699static void ilk_program_watermarks(struct drm_i915_private *dev_priv)
801bcfff 5700{
b9d5c839 5701 struct intel_pipe_wm lp_wm_1_2 = {}, lp_wm_5_6 = {}, *best_lp_wm;
820c1980 5702 struct ilk_wm_maximums max;
d890565c 5703 struct intel_wm_config config = {};
820c1980 5704 struct ilk_wm_values results = {};
77c122bc 5705 enum intel_ddb_partitioning partitioning;
261a27d1 5706
cd1d3ee9 5707 ilk_compute_wm_config(dev_priv, &config);
d890565c 5708
cd1d3ee9
MR
5709 ilk_compute_wm_maximums(dev_priv, 1, &config, INTEL_DDB_PART_1_2, &max);
5710 ilk_wm_merge(dev_priv, &config, &max, &lp_wm_1_2);
a485bfb8
VS
5711
5712 /* 5/6 split only in single pipe config on IVB+ */
175fded1 5713 if (INTEL_GEN(dev_priv) >= 7 &&
d890565c 5714 config.num_pipes_active == 1 && config.sprites_enabled) {
cd1d3ee9
MR
5715 ilk_compute_wm_maximums(dev_priv, 1, &config, INTEL_DDB_PART_5_6, &max);
5716 ilk_wm_merge(dev_priv, &config, &max, &lp_wm_5_6);
0362c781 5717
cd1d3ee9 5718 best_lp_wm = ilk_find_best_result(dev_priv, &lp_wm_1_2, &lp_wm_5_6);
861f3389 5719 } else {
198a1e9b 5720 best_lp_wm = &lp_wm_1_2;
861f3389
PZ
5721 }
5722
198a1e9b 5723 partitioning = (best_lp_wm == &lp_wm_1_2) ?
77c122bc 5724 INTEL_DDB_PART_1_2 : INTEL_DDB_PART_5_6;
801bcfff 5725
cd1d3ee9 5726 ilk_compute_wm_results(dev_priv, best_lp_wm, partitioning, &results);
609cedef 5727
820c1980 5728 ilk_write_wm_values(dev_priv, &results);
1011d8c4
PZ
5729}
5730
ccf010fb
ML
5731static void ilk_initial_watermarks(struct intel_atomic_state *state,
5732 struct intel_crtc_state *cstate)
b9d5c839 5733{
ed4a6a7c
MR
5734 struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev);
5735 struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
b9d5c839 5736
ed4a6a7c 5737 mutex_lock(&dev_priv->wm.wm_mutex);
e8f1f02e 5738 intel_crtc->wm.active.ilk = cstate->wm.ilk.intermediate;
ed4a6a7c
MR
5739 ilk_program_watermarks(dev_priv);
5740 mutex_unlock(&dev_priv->wm.wm_mutex);
5741}
bf220452 5742
ccf010fb
ML
5743static void ilk_optimize_watermarks(struct intel_atomic_state *state,
5744 struct intel_crtc_state *cstate)
ed4a6a7c
MR
5745{
5746 struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev);
5747 struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
bf220452 5748
ed4a6a7c
MR
5749 mutex_lock(&dev_priv->wm.wm_mutex);
5750 if (cstate->wm.need_postvbl_update) {
e8f1f02e 5751 intel_crtc->wm.active.ilk = cstate->wm.ilk.optimal;
ed4a6a7c
MR
5752 ilk_program_watermarks(dev_priv);
5753 }
5754 mutex_unlock(&dev_priv->wm.wm_mutex);
b9d5c839
VS
5755}
5756
5ce9a649 5757static inline void skl_wm_level_from_reg_val(u32 val,
d8c0fafc 5758 struct skl_wm_level *level)
3078999f 5759{
d8c0fafc 5760 level->plane_en = val & PLANE_WM_EN;
2ed8e1f5 5761 level->ignore_lines = val & PLANE_WM_IGNORE_LINES;
d8c0fafc 5762 level->plane_res_b = val & PLANE_WM_BLOCKS_MASK;
5763 level->plane_res_l = (val >> PLANE_WM_LINES_SHIFT) &
5764 PLANE_WM_LINES_MASK;
3078999f
PB
5765}
5766
cd1d3ee9 5767void skl_pipe_wm_get_hw_state(struct intel_crtc *crtc,
bf9d99ad 5768 struct skl_pipe_wm *out)
3078999f 5769{
cd1d3ee9
MR
5770 struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
5771 enum pipe pipe = crtc->pipe;
d5cdfdf5
VS
5772 int level, max_level;
5773 enum plane_id plane_id;
5ce9a649 5774 u32 val;
3078999f 5775
5db94019 5776 max_level = ilk_wm_max_level(dev_priv);
3078999f 5777
cd1d3ee9 5778 for_each_plane_id_on_crtc(crtc, plane_id) {
d5cdfdf5 5779 struct skl_plane_wm *wm = &out->planes[plane_id];
3078999f 5780
d8c0fafc 5781 for (level = 0; level <= max_level; level++) {
d5cdfdf5
VS
5782 if (plane_id != PLANE_CURSOR)
5783 val = I915_READ(PLANE_WM(pipe, plane_id, level));
d8c0fafc 5784 else
5785 val = I915_READ(CUR_WM(pipe, level));
3078999f 5786
d8c0fafc 5787 skl_wm_level_from_reg_val(val, &wm->wm[level]);
3078999f 5788 }
3078999f 5789
d5cdfdf5
VS
5790 if (plane_id != PLANE_CURSOR)
5791 val = I915_READ(PLANE_WM_TRANS(pipe, plane_id));
d8c0fafc 5792 else
5793 val = I915_READ(CUR_WM_TRANS(pipe));
5794
5795 skl_wm_level_from_reg_val(val, &wm->trans_wm);
3078999f
PB
5796 }
5797
cd1d3ee9 5798 if (!crtc->active)
d8c0fafc 5799 return;
4e0963c7 5800
bf9d99ad 5801 out->linetime = I915_READ(PIPE_WM_LINETIME(pipe));
3078999f
PB
5802}
5803
cd1d3ee9 5804void skl_wm_get_hw_state(struct drm_i915_private *dev_priv)
3078999f 5805{
60f8e873 5806 struct skl_ddb_values *hw = &dev_priv->wm.skl_hw;
a269c583 5807 struct skl_ddb_allocation *ddb = &dev_priv->wm.skl_hw.ddb;
cd1d3ee9 5808 struct intel_crtc *crtc;
bf9d99ad 5809 struct intel_crtc_state *cstate;
3078999f 5810
a269c583 5811 skl_ddb_get_hw_state(dev_priv, ddb);
cd1d3ee9
MR
5812 for_each_intel_crtc(&dev_priv->drm, crtc) {
5813 cstate = to_intel_crtc_state(crtc->base.state);
bf9d99ad 5814
5815 skl_pipe_wm_get_hw_state(crtc, &cstate->wm.skl.optimal);
5816
cd1d3ee9
MR
5817 if (crtc->active)
5818 hw->dirty_pipes |= drm_crtc_mask(&crtc->base);
bf9d99ad 5819 }
a1de91e5 5820
279e99d7
MR
5821 if (dev_priv->active_crtcs) {
5822 /* Fully recompute DDB on first atomic commit */
5823 dev_priv->wm.distrust_bios_wm = true;
279e99d7 5824 }
3078999f
PB
5825}
5826
cd1d3ee9 5827static void ilk_pipe_wm_get_hw_state(struct intel_crtc *crtc)
243e6a44 5828{
cd1d3ee9 5829 struct drm_device *dev = crtc->base.dev;
fac5e23e 5830 struct drm_i915_private *dev_priv = to_i915(dev);
820c1980 5831 struct ilk_wm_values *hw = &dev_priv->wm.hw;
cd1d3ee9 5832 struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->base.state);
e8f1f02e 5833 struct intel_pipe_wm *active = &cstate->wm.ilk.optimal;
cd1d3ee9 5834 enum pipe pipe = crtc->pipe;
f0f59a00 5835 static const i915_reg_t wm0_pipe_reg[] = {
243e6a44
VS
5836 [PIPE_A] = WM0_PIPEA_ILK,
5837 [PIPE_B] = WM0_PIPEB_ILK,
5838 [PIPE_C] = WM0_PIPEC_IVB,
5839 };
5840
5841 hw->wm_pipe[pipe] = I915_READ(wm0_pipe_reg[pipe]);
8652744b 5842 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
ce0e0713 5843 hw->wm_linetime[pipe] = I915_READ(PIPE_WM_LINETIME(pipe));
243e6a44 5844
15606534
VS
5845 memset(active, 0, sizeof(*active));
5846
cd1d3ee9 5847 active->pipe_enabled = crtc->active;
2a44b76b
VS
5848
5849 if (active->pipe_enabled) {
243e6a44
VS
5850 u32 tmp = hw->wm_pipe[pipe];
5851
5852 /*
5853 * For active pipes LP0 watermark is marked as
5854 * enabled, and LP1+ watermaks as disabled since
5855 * we can't really reverse compute them in case
5856 * multiple pipes are active.
5857 */
5858 active->wm[0].enable = true;
5859 active->wm[0].pri_val = (tmp & WM0_PIPE_PLANE_MASK) >> WM0_PIPE_PLANE_SHIFT;
5860 active->wm[0].spr_val = (tmp & WM0_PIPE_SPRITE_MASK) >> WM0_PIPE_SPRITE_SHIFT;
5861 active->wm[0].cur_val = tmp & WM0_PIPE_CURSOR_MASK;
5862 active->linetime = hw->wm_linetime[pipe];
5863 } else {
5db94019 5864 int level, max_level = ilk_wm_max_level(dev_priv);
243e6a44
VS
5865
5866 /*
5867 * For inactive pipes, all watermark levels
5868 * should be marked as enabled but zeroed,
5869 * which is what we'd compute them to.
5870 */
5871 for (level = 0; level <= max_level; level++)
5872 active->wm[level].enable = true;
5873 }
4e0963c7 5874
cd1d3ee9 5875 crtc->wm.active.ilk = *active;
243e6a44
VS
5876}
5877
6eb1a681
VS
5878#define _FW_WM(value, plane) \
5879 (((value) & DSPFW_ ## plane ## _MASK) >> DSPFW_ ## plane ## _SHIFT)
5880#define _FW_WM_VLV(value, plane) \
5881 (((value) & DSPFW_ ## plane ## _MASK_VLV) >> DSPFW_ ## plane ## _SHIFT)
5882
04548cba
VS
5883static void g4x_read_wm_values(struct drm_i915_private *dev_priv,
5884 struct g4x_wm_values *wm)
5885{
5ce9a649 5886 u32 tmp;
04548cba
VS
5887
5888 tmp = I915_READ(DSPFW1);
5889 wm->sr.plane = _FW_WM(tmp, SR);
5890 wm->pipe[PIPE_B].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORB);
5891 wm->pipe[PIPE_B].plane[PLANE_PRIMARY] = _FW_WM(tmp, PLANEB);
5892 wm->pipe[PIPE_A].plane[PLANE_PRIMARY] = _FW_WM(tmp, PLANEA);
5893
5894 tmp = I915_READ(DSPFW2);
5895 wm->fbc_en = tmp & DSPFW_FBC_SR_EN;
5896 wm->sr.fbc = _FW_WM(tmp, FBC_SR);
5897 wm->hpll.fbc = _FW_WM(tmp, FBC_HPLL_SR);
5898 wm->pipe[PIPE_B].plane[PLANE_SPRITE0] = _FW_WM(tmp, SPRITEB);
5899 wm->pipe[PIPE_A].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORA);
5900 wm->pipe[PIPE_A].plane[PLANE_SPRITE0] = _FW_WM(tmp, SPRITEA);
5901
5902 tmp = I915_READ(DSPFW3);
5903 wm->hpll_en = tmp & DSPFW_HPLL_SR_EN;
5904 wm->sr.cursor = _FW_WM(tmp, CURSOR_SR);
5905 wm->hpll.cursor = _FW_WM(tmp, HPLL_CURSOR);
5906 wm->hpll.plane = _FW_WM(tmp, HPLL_SR);
5907}
5908
6eb1a681
VS
5909static void vlv_read_wm_values(struct drm_i915_private *dev_priv,
5910 struct vlv_wm_values *wm)
5911{
5912 enum pipe pipe;
5ce9a649 5913 u32 tmp;
6eb1a681
VS
5914
5915 for_each_pipe(dev_priv, pipe) {
5916 tmp = I915_READ(VLV_DDL(pipe));
5917
1b31389c 5918 wm->ddl[pipe].plane[PLANE_PRIMARY] =
6eb1a681 5919 (tmp >> DDL_PLANE_SHIFT) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
1b31389c 5920 wm->ddl[pipe].plane[PLANE_CURSOR] =
6eb1a681 5921 (tmp >> DDL_CURSOR_SHIFT) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
1b31389c 5922 wm->ddl[pipe].plane[PLANE_SPRITE0] =
6eb1a681 5923 (tmp >> DDL_SPRITE_SHIFT(0)) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
1b31389c 5924 wm->ddl[pipe].plane[PLANE_SPRITE1] =
6eb1a681
VS
5925 (tmp >> DDL_SPRITE_SHIFT(1)) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
5926 }
5927
5928 tmp = I915_READ(DSPFW1);
5929 wm->sr.plane = _FW_WM(tmp, SR);
1b31389c
VS
5930 wm->pipe[PIPE_B].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORB);
5931 wm->pipe[PIPE_B].plane[PLANE_PRIMARY] = _FW_WM_VLV(tmp, PLANEB);
5932 wm->pipe[PIPE_A].plane[PLANE_PRIMARY] = _FW_WM_VLV(tmp, PLANEA);
6eb1a681
VS
5933
5934 tmp = I915_READ(DSPFW2);
1b31389c
VS
5935 wm->pipe[PIPE_A].plane[PLANE_SPRITE1] = _FW_WM_VLV(tmp, SPRITEB);
5936 wm->pipe[PIPE_A].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORA);
5937 wm->pipe[PIPE_A].plane[PLANE_SPRITE0] = _FW_WM_VLV(tmp, SPRITEA);
6eb1a681
VS
5938
5939 tmp = I915_READ(DSPFW3);
5940 wm->sr.cursor = _FW_WM(tmp, CURSOR_SR);
5941
5942 if (IS_CHERRYVIEW(dev_priv)) {
5943 tmp = I915_READ(DSPFW7_CHV);
1b31389c
VS
5944 wm->pipe[PIPE_B].plane[PLANE_SPRITE1] = _FW_WM_VLV(tmp, SPRITED);
5945 wm->pipe[PIPE_B].plane[PLANE_SPRITE0] = _FW_WM_VLV(tmp, SPRITEC);
6eb1a681
VS
5946
5947 tmp = I915_READ(DSPFW8_CHV);
1b31389c
VS
5948 wm->pipe[PIPE_C].plane[PLANE_SPRITE1] = _FW_WM_VLV(tmp, SPRITEF);
5949 wm->pipe[PIPE_C].plane[PLANE_SPRITE0] = _FW_WM_VLV(tmp, SPRITEE);
6eb1a681
VS
5950
5951 tmp = I915_READ(DSPFW9_CHV);
1b31389c
VS
5952 wm->pipe[PIPE_C].plane[PLANE_PRIMARY] = _FW_WM_VLV(tmp, PLANEC);
5953 wm->pipe[PIPE_C].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORC);
6eb1a681
VS
5954
5955 tmp = I915_READ(DSPHOWM);
5956 wm->sr.plane |= _FW_WM(tmp, SR_HI) << 9;
1b31389c
VS
5957 wm->pipe[PIPE_C].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITEF_HI) << 8;
5958 wm->pipe[PIPE_C].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEE_HI) << 8;
5959 wm->pipe[PIPE_C].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEC_HI) << 8;
5960 wm->pipe[PIPE_B].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITED_HI) << 8;
5961 wm->pipe[PIPE_B].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEC_HI) << 8;
5962 wm->pipe[PIPE_B].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEB_HI) << 8;
5963 wm->pipe[PIPE_A].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITEB_HI) << 8;
5964 wm->pipe[PIPE_A].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEA_HI) << 8;
5965 wm->pipe[PIPE_A].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEA_HI) << 8;
6eb1a681
VS
5966 } else {
5967 tmp = I915_READ(DSPFW7);
1b31389c
VS
5968 wm->pipe[PIPE_B].plane[PLANE_SPRITE1] = _FW_WM_VLV(tmp, SPRITED);
5969 wm->pipe[PIPE_B].plane[PLANE_SPRITE0] = _FW_WM_VLV(tmp, SPRITEC);
6eb1a681
VS
5970
5971 tmp = I915_READ(DSPHOWM);
5972 wm->sr.plane |= _FW_WM(tmp, SR_HI) << 9;
1b31389c
VS
5973 wm->pipe[PIPE_B].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITED_HI) << 8;
5974 wm->pipe[PIPE_B].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEC_HI) << 8;
5975 wm->pipe[PIPE_B].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEB_HI) << 8;
5976 wm->pipe[PIPE_A].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITEB_HI) << 8;
5977 wm->pipe[PIPE_A].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEA_HI) << 8;
5978 wm->pipe[PIPE_A].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEA_HI) << 8;
6eb1a681
VS
5979 }
5980}
5981
5982#undef _FW_WM
5983#undef _FW_WM_VLV
5984
cd1d3ee9 5985void g4x_wm_get_hw_state(struct drm_i915_private *dev_priv)
04548cba 5986{
04548cba
VS
5987 struct g4x_wm_values *wm = &dev_priv->wm.g4x;
5988 struct intel_crtc *crtc;
5989
5990 g4x_read_wm_values(dev_priv, wm);
5991
5992 wm->cxsr = I915_READ(FW_BLC_SELF) & FW_BLC_SELF_EN;
5993
cd1d3ee9 5994 for_each_intel_crtc(&dev_priv->drm, crtc) {
04548cba
VS
5995 struct intel_crtc_state *crtc_state =
5996 to_intel_crtc_state(crtc->base.state);
5997 struct g4x_wm_state *active = &crtc->wm.active.g4x;
5998 struct g4x_pipe_wm *raw;
5999 enum pipe pipe = crtc->pipe;
6000 enum plane_id plane_id;
6001 int level, max_level;
6002
6003 active->cxsr = wm->cxsr;
6004 active->hpll_en = wm->hpll_en;
6005 active->fbc_en = wm->fbc_en;
6006
6007 active->sr = wm->sr;
6008 active->hpll = wm->hpll;
6009
6010 for_each_plane_id_on_crtc(crtc, plane_id) {
6011 active->wm.plane[plane_id] =
6012 wm->pipe[pipe].plane[plane_id];
6013 }
6014
6015 if (wm->cxsr && wm->hpll_en)
6016 max_level = G4X_WM_LEVEL_HPLL;
6017 else if (wm->cxsr)
6018 max_level = G4X_WM_LEVEL_SR;
6019 else
6020 max_level = G4X_WM_LEVEL_NORMAL;
6021
6022 level = G4X_WM_LEVEL_NORMAL;
6023 raw = &crtc_state->wm.g4x.raw[level];
6024 for_each_plane_id_on_crtc(crtc, plane_id)
6025 raw->plane[plane_id] = active->wm.plane[plane_id];
6026
6027 if (++level > max_level)
6028 goto out;
6029
6030 raw = &crtc_state->wm.g4x.raw[level];
6031 raw->plane[PLANE_PRIMARY] = active->sr.plane;
6032 raw->plane[PLANE_CURSOR] = active->sr.cursor;
6033 raw->plane[PLANE_SPRITE0] = 0;
6034 raw->fbc = active->sr.fbc;
6035
6036 if (++level > max_level)
6037 goto out;
6038
6039 raw = &crtc_state->wm.g4x.raw[level];
6040 raw->plane[PLANE_PRIMARY] = active->hpll.plane;
6041 raw->plane[PLANE_CURSOR] = active->hpll.cursor;
6042 raw->plane[PLANE_SPRITE0] = 0;
6043 raw->fbc = active->hpll.fbc;
6044
6045 out:
6046 for_each_plane_id_on_crtc(crtc, plane_id)
6047 g4x_raw_plane_wm_set(crtc_state, level,
6048 plane_id, USHRT_MAX);
6049 g4x_raw_fbc_wm_set(crtc_state, level, USHRT_MAX);
6050
6051 crtc_state->wm.g4x.optimal = *active;
6052 crtc_state->wm.g4x.intermediate = *active;
6053
6054 DRM_DEBUG_KMS("Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite=%d\n",
6055 pipe_name(pipe),
6056 wm->pipe[pipe].plane[PLANE_PRIMARY],
6057 wm->pipe[pipe].plane[PLANE_CURSOR],
6058 wm->pipe[pipe].plane[PLANE_SPRITE0]);
6059 }
6060
6061 DRM_DEBUG_KMS("Initial SR watermarks: plane=%d, cursor=%d fbc=%d\n",
6062 wm->sr.plane, wm->sr.cursor, wm->sr.fbc);
6063 DRM_DEBUG_KMS("Initial HPLL watermarks: plane=%d, SR cursor=%d fbc=%d\n",
6064 wm->hpll.plane, wm->hpll.cursor, wm->hpll.fbc);
6065 DRM_DEBUG_KMS("Initial SR=%s HPLL=%s FBC=%s\n",
6066 yesno(wm->cxsr), yesno(wm->hpll_en), yesno(wm->fbc_en));
6067}
6068
6069void g4x_wm_sanitize(struct drm_i915_private *dev_priv)
6070{
6071 struct intel_plane *plane;
6072 struct intel_crtc *crtc;
6073
6074 mutex_lock(&dev_priv->wm.wm_mutex);
6075
6076 for_each_intel_plane(&dev_priv->drm, plane) {
6077 struct intel_crtc *crtc =
6078 intel_get_crtc_for_pipe(dev_priv, plane->pipe);
6079 struct intel_crtc_state *crtc_state =
6080 to_intel_crtc_state(crtc->base.state);
6081 struct intel_plane_state *plane_state =
6082 to_intel_plane_state(plane->base.state);
6083 struct g4x_wm_state *wm_state = &crtc_state->wm.g4x.optimal;
6084 enum plane_id plane_id = plane->id;
6085 int level;
6086
6087 if (plane_state->base.visible)
6088 continue;
6089
6090 for (level = 0; level < 3; level++) {
6091 struct g4x_pipe_wm *raw =
6092 &crtc_state->wm.g4x.raw[level];
6093
6094 raw->plane[plane_id] = 0;
6095 wm_state->wm.plane[plane_id] = 0;
6096 }
6097
6098 if (plane_id == PLANE_PRIMARY) {
6099 for (level = 0; level < 3; level++) {
6100 struct g4x_pipe_wm *raw =
6101 &crtc_state->wm.g4x.raw[level];
6102 raw->fbc = 0;
6103 }
6104
6105 wm_state->sr.fbc = 0;
6106 wm_state->hpll.fbc = 0;
6107 wm_state->fbc_en = false;
6108 }
6109 }
6110
6111 for_each_intel_crtc(&dev_priv->drm, crtc) {
6112 struct intel_crtc_state *crtc_state =
6113 to_intel_crtc_state(crtc->base.state);
6114
6115 crtc_state->wm.g4x.intermediate =
6116 crtc_state->wm.g4x.optimal;
6117 crtc->wm.active.g4x = crtc_state->wm.g4x.optimal;
6118 }
6119
6120 g4x_program_watermarks(dev_priv);
6121
6122 mutex_unlock(&dev_priv->wm.wm_mutex);
6123}
6124
cd1d3ee9 6125void vlv_wm_get_hw_state(struct drm_i915_private *dev_priv)
6eb1a681 6126{
6eb1a681 6127 struct vlv_wm_values *wm = &dev_priv->wm.vlv;
f07d43d2 6128 struct intel_crtc *crtc;
6eb1a681
VS
6129 u32 val;
6130
6131 vlv_read_wm_values(dev_priv, wm);
6132
6eb1a681
VS
6133 wm->cxsr = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN;
6134 wm->level = VLV_WM_LEVEL_PM2;
6135
6136 if (IS_CHERRYVIEW(dev_priv)) {
337fa6e0 6137 vlv_punit_get(dev_priv);
6eb1a681 6138
c11b813f 6139 val = vlv_punit_read(dev_priv, PUNIT_REG_DSPSSPM);
6eb1a681
VS
6140 if (val & DSP_MAXFIFO_PM5_ENABLE)
6141 wm->level = VLV_WM_LEVEL_PM5;
6142
58590c14
VS
6143 /*
6144 * If DDR DVFS is disabled in the BIOS, Punit
6145 * will never ack the request. So if that happens
6146 * assume we don't have to enable/disable DDR DVFS
6147 * dynamically. To test that just set the REQ_ACK
6148 * bit to poke the Punit, but don't change the
6149 * HIGH/LOW bits so that we don't actually change
6150 * the current state.
6151 */
6eb1a681 6152 val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
58590c14
VS
6153 val |= FORCE_DDR_FREQ_REQ_ACK;
6154 vlv_punit_write(dev_priv, PUNIT_REG_DDR_SETUP2, val);
6155
6156 if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2) &
6157 FORCE_DDR_FREQ_REQ_ACK) == 0, 3)) {
6158 DRM_DEBUG_KMS("Punit not acking DDR DVFS request, "
6159 "assuming DDR DVFS is disabled\n");
6160 dev_priv->wm.max_level = VLV_WM_LEVEL_PM5;
6161 } else {
6162 val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
6163 if ((val & FORCE_DDR_HIGH_FREQ) == 0)
6164 wm->level = VLV_WM_LEVEL_DDR_DVFS;
6165 }
6eb1a681 6166
337fa6e0 6167 vlv_punit_put(dev_priv);
6eb1a681
VS
6168 }
6169
cd1d3ee9 6170 for_each_intel_crtc(&dev_priv->drm, crtc) {
ff32c54e
VS
6171 struct intel_crtc_state *crtc_state =
6172 to_intel_crtc_state(crtc->base.state);
6173 struct vlv_wm_state *active = &crtc->wm.active.vlv;
6174 const struct vlv_fifo_state *fifo_state =
6175 &crtc_state->wm.vlv.fifo_state;
6176 enum pipe pipe = crtc->pipe;
6177 enum plane_id plane_id;
6178 int level;
6179
6180 vlv_get_fifo_size(crtc_state);
6181
6182 active->num_levels = wm->level + 1;
6183 active->cxsr = wm->cxsr;
6184
ff32c54e 6185 for (level = 0; level < active->num_levels; level++) {
114d7dc0 6186 struct g4x_pipe_wm *raw =
ff32c54e
VS
6187 &crtc_state->wm.vlv.raw[level];
6188
6189 active->sr[level].plane = wm->sr.plane;
6190 active->sr[level].cursor = wm->sr.cursor;
6191
6192 for_each_plane_id_on_crtc(crtc, plane_id) {
6193 active->wm[level].plane[plane_id] =
6194 wm->pipe[pipe].plane[plane_id];
6195
6196 raw->plane[plane_id] =
6197 vlv_invert_wm_value(active->wm[level].plane[plane_id],
6198 fifo_state->plane[plane_id]);
6199 }
6200 }
6201
6202 for_each_plane_id_on_crtc(crtc, plane_id)
6203 vlv_raw_plane_wm_set(crtc_state, level,
6204 plane_id, USHRT_MAX);
6205 vlv_invalidate_wms(crtc, active, level);
6206
6207 crtc_state->wm.vlv.optimal = *active;
4841da51 6208 crtc_state->wm.vlv.intermediate = *active;
ff32c54e 6209
6eb1a681 6210 DRM_DEBUG_KMS("Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite0=%d, sprite1=%d\n",
1b31389c
VS
6211 pipe_name(pipe),
6212 wm->pipe[pipe].plane[PLANE_PRIMARY],
6213 wm->pipe[pipe].plane[PLANE_CURSOR],
6214 wm->pipe[pipe].plane[PLANE_SPRITE0],
6215 wm->pipe[pipe].plane[PLANE_SPRITE1]);
ff32c54e 6216 }
6eb1a681
VS
6217
6218 DRM_DEBUG_KMS("Initial watermarks: SR plane=%d, SR cursor=%d level=%d cxsr=%d\n",
6219 wm->sr.plane, wm->sr.cursor, wm->level, wm->cxsr);
6220}
6221
602ae835
VS
6222void vlv_wm_sanitize(struct drm_i915_private *dev_priv)
6223{
6224 struct intel_plane *plane;
6225 struct intel_crtc *crtc;
6226
6227 mutex_lock(&dev_priv->wm.wm_mutex);
6228
6229 for_each_intel_plane(&dev_priv->drm, plane) {
6230 struct intel_crtc *crtc =
6231 intel_get_crtc_for_pipe(dev_priv, plane->pipe);
6232 struct intel_crtc_state *crtc_state =
6233 to_intel_crtc_state(crtc->base.state);
6234 struct intel_plane_state *plane_state =
6235 to_intel_plane_state(plane->base.state);
6236 struct vlv_wm_state *wm_state = &crtc_state->wm.vlv.optimal;
6237 const struct vlv_fifo_state *fifo_state =
6238 &crtc_state->wm.vlv.fifo_state;
6239 enum plane_id plane_id = plane->id;
6240 int level;
6241
6242 if (plane_state->base.visible)
6243 continue;
6244
6245 for (level = 0; level < wm_state->num_levels; level++) {
114d7dc0 6246 struct g4x_pipe_wm *raw =
602ae835
VS
6247 &crtc_state->wm.vlv.raw[level];
6248
6249 raw->plane[plane_id] = 0;
6250
6251 wm_state->wm[level].plane[plane_id] =
6252 vlv_invert_wm_value(raw->plane[plane_id],
6253 fifo_state->plane[plane_id]);
6254 }
6255 }
6256
6257 for_each_intel_crtc(&dev_priv->drm, crtc) {
6258 struct intel_crtc_state *crtc_state =
6259 to_intel_crtc_state(crtc->base.state);
6260
6261 crtc_state->wm.vlv.intermediate =
6262 crtc_state->wm.vlv.optimal;
6263 crtc->wm.active.vlv = crtc_state->wm.vlv.optimal;
6264 }
6265
6266 vlv_program_watermarks(dev_priv);
6267
6268 mutex_unlock(&dev_priv->wm.wm_mutex);
6269}
6270
f72b84c6
VS
6271/*
6272 * FIXME should probably kill this and improve
6273 * the real watermark readout/sanitation instead
6274 */
6275static void ilk_init_lp_watermarks(struct drm_i915_private *dev_priv)
6276{
6277 I915_WRITE(WM3_LP_ILK, I915_READ(WM3_LP_ILK) & ~WM1_LP_SR_EN);
6278 I915_WRITE(WM2_LP_ILK, I915_READ(WM2_LP_ILK) & ~WM1_LP_SR_EN);
6279 I915_WRITE(WM1_LP_ILK, I915_READ(WM1_LP_ILK) & ~WM1_LP_SR_EN);
6280
6281 /*
6282 * Don't touch WM1S_LP_EN here.
6283 * Doing so could cause underruns.
6284 */
6285}
6286
cd1d3ee9 6287void ilk_wm_get_hw_state(struct drm_i915_private *dev_priv)
243e6a44 6288{
820c1980 6289 struct ilk_wm_values *hw = &dev_priv->wm.hw;
cd1d3ee9 6290 struct intel_crtc *crtc;
243e6a44 6291
f72b84c6
VS
6292 ilk_init_lp_watermarks(dev_priv);
6293
cd1d3ee9 6294 for_each_intel_crtc(&dev_priv->drm, crtc)
243e6a44
VS
6295 ilk_pipe_wm_get_hw_state(crtc);
6296
6297 hw->wm_lp[0] = I915_READ(WM1_LP_ILK);
6298 hw->wm_lp[1] = I915_READ(WM2_LP_ILK);
6299 hw->wm_lp[2] = I915_READ(WM3_LP_ILK);
6300
6301 hw->wm_lp_spr[0] = I915_READ(WM1S_LP_ILK);
175fded1 6302 if (INTEL_GEN(dev_priv) >= 7) {
cfa7698b
VS
6303 hw->wm_lp_spr[1] = I915_READ(WM2S_LP_IVB);
6304 hw->wm_lp_spr[2] = I915_READ(WM3S_LP_IVB);
6305 }
243e6a44 6306
8652744b 6307 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
ac9545fd
VS
6308 hw->partitioning = (I915_READ(WM_MISC) & WM_MISC_DATA_PARTITION_5_6) ?
6309 INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2;
fd6b8f43 6310 else if (IS_IVYBRIDGE(dev_priv))
ac9545fd
VS
6311 hw->partitioning = (I915_READ(DISP_ARB_CTL2) & DISP_DATA_PARTITION_5_6) ?
6312 INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2;
243e6a44
VS
6313
6314 hw->enable_fbc_wm =
6315 !(I915_READ(DISP_ARB_CTL) & DISP_FBC_WM_DIS);
6316}
6317
b445e3b0
ED
6318/**
6319 * intel_update_watermarks - update FIFO watermark values based on current modes
31383410 6320 * @crtc: the #intel_crtc on which to compute the WM
b445e3b0
ED
6321 *
6322 * Calculate watermark values for the various WM regs based on current mode
6323 * and plane configuration.
6324 *
6325 * There are several cases to deal with here:
6326 * - normal (i.e. non-self-refresh)
6327 * - self-refresh (SR) mode
6328 * - lines are large relative to FIFO size (buffer can hold up to 2)
6329 * - lines are small relative to FIFO size (buffer can hold more than 2
6330 * lines), so need to account for TLB latency
6331 *
6332 * The normal calculation is:
6333 * watermark = dotclock * bytes per pixel * latency
6334 * where latency is platform & configuration dependent (we assume pessimal
6335 * values here).
6336 *
6337 * The SR calculation is:
6338 * watermark = (trunc(latency/line time)+1) * surface width *
6339 * bytes per pixel
6340 * where
6341 * line time = htotal / dotclock
6342 * surface width = hdisplay for normal plane and 64 for cursor
6343 * and latency is assumed to be high, as above.
6344 *
6345 * The final value programmed to the register should always be rounded up,
6346 * and include an extra 2 entries to account for clock crossings.
6347 *
6348 * We don't use the sprite, so we can ignore that. And on Crestline we have
6349 * to set the non-SR watermarks to 8.
6350 */
432081bc 6351void intel_update_watermarks(struct intel_crtc *crtc)
b445e3b0 6352{
432081bc 6353 struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
b445e3b0
ED
6354
6355 if (dev_priv->display.update_wm)
46ba614c 6356 dev_priv->display.update_wm(crtc);
b445e3b0
ED
6357}
6358
2503a0fe
KM
6359void intel_enable_ipc(struct drm_i915_private *dev_priv)
6360{
6361 u32 val;
6362
fd847b8e
JRS
6363 if (!HAS_IPC(dev_priv))
6364 return;
6365
2503a0fe
KM
6366 val = I915_READ(DISP_ARB_CTL2);
6367
6368 if (dev_priv->ipc_enabled)
6369 val |= DISP_IPC_ENABLE;
6370 else
6371 val &= ~DISP_IPC_ENABLE;
6372
6373 I915_WRITE(DISP_ARB_CTL2, val);
6374}
6375
6376void intel_init_ipc(struct drm_i915_private *dev_priv)
6377{
2503a0fe
KM
6378 if (!HAS_IPC(dev_priv))
6379 return;
6380
c9b818d3
JRS
6381 /* Display WA #1141: SKL:all KBL:all CFL */
6382 if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv))
6383 dev_priv->ipc_enabled = dev_priv->dram_info.symmetric_memory;
6384 else
6385 dev_priv->ipc_enabled = true;
6386
2503a0fe
KM
6387 intel_enable_ipc(dev_priv);
6388}
6389
e2828914 6390/*
9270388e 6391 * Lock protecting IPS related data structures
9270388e
DV
6392 */
6393DEFINE_SPINLOCK(mchdev_lock);
6394
91d14251 6395bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val)
2b4e57bd 6396{
2b4e57bd
ED
6397 u16 rgvswctl;
6398
67520415 6399 lockdep_assert_held(&mchdev_lock);
9270388e 6400
2b4e57bd
ED
6401 rgvswctl = I915_READ16(MEMSWCTL);
6402 if (rgvswctl & MEMCTL_CMD_STS) {
6403 DRM_DEBUG("gpu busy, RCS change rejected\n");
6404 return false; /* still busy with another command */
6405 }
6406
6407 rgvswctl = (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) |
6408 (val << MEMCTL_FREQ_SHIFT) | MEMCTL_SFCAVM;
6409 I915_WRITE16(MEMSWCTL, rgvswctl);
6410 POSTING_READ16(MEMSWCTL);
6411
6412 rgvswctl |= MEMCTL_CMD_STS;
6413 I915_WRITE16(MEMSWCTL, rgvswctl);
6414
6415 return true;
6416}
6417
91d14251 6418static void ironlake_enable_drps(struct drm_i915_private *dev_priv)
2b4e57bd 6419{
84f1b20f 6420 u32 rgvmodectl;
2b4e57bd
ED
6421 u8 fmax, fmin, fstart, vstart;
6422
9270388e
DV
6423 spin_lock_irq(&mchdev_lock);
6424
84f1b20f
TU
6425 rgvmodectl = I915_READ(MEMMODECTL);
6426
2b4e57bd
ED
6427 /* Enable temp reporting */
6428 I915_WRITE16(PMMISC, I915_READ(PMMISC) | MCPPCE_EN);
6429 I915_WRITE16(TSC1, I915_READ(TSC1) | TSE);
6430
6431 /* 100ms RC evaluation intervals */
6432 I915_WRITE(RCUPEI, 100000);
6433 I915_WRITE(RCDNEI, 100000);
6434
6435 /* Set max/min thresholds to 90ms and 80ms respectively */
6436 I915_WRITE(RCBMAXAVG, 90000);
6437 I915_WRITE(RCBMINAVG, 80000);
6438
6439 I915_WRITE(MEMIHYST, 1);
6440
6441 /* Set up min, max, and cur for interrupt handling */
6442 fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT;
6443 fmin = (rgvmodectl & MEMMODE_FMIN_MASK);
6444 fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
6445 MEMMODE_FSTART_SHIFT;
6446
616847e7 6447 vstart = (I915_READ(PXVFREQ(fstart)) & PXVFREQ_PX_MASK) >>
2b4e57bd
ED
6448 PXVFREQ_PX_SHIFT;
6449
20e4d407
DV
6450 dev_priv->ips.fmax = fmax; /* IPS callback will increase this */
6451 dev_priv->ips.fstart = fstart;
2b4e57bd 6452
20e4d407
DV
6453 dev_priv->ips.max_delay = fstart;
6454 dev_priv->ips.min_delay = fmin;
6455 dev_priv->ips.cur_delay = fstart;
2b4e57bd
ED
6456
6457 DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n",
6458 fmax, fmin, fstart);
6459
6460 I915_WRITE(MEMINTREN, MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN);
6461
6462 /*
6463 * Interrupts will be enabled in ironlake_irq_postinstall
6464 */
6465
6466 I915_WRITE(VIDSTART, vstart);
6467 POSTING_READ(VIDSTART);
6468
6469 rgvmodectl |= MEMMODE_SWMODE_EN;
6470 I915_WRITE(MEMMODECTL, rgvmodectl);
6471
9270388e 6472 if (wait_for_atomic((I915_READ(MEMSWCTL) & MEMCTL_CMD_STS) == 0, 10))
2b4e57bd 6473 DRM_ERROR("stuck trying to change perf mode\n");
dd92d8de 6474 mdelay(1);
2b4e57bd 6475
91d14251 6476 ironlake_set_drps(dev_priv, fstart);
2b4e57bd 6477
7d81c3e0
VS
6478 dev_priv->ips.last_count1 = I915_READ(DMIEC) +
6479 I915_READ(DDREC) + I915_READ(CSIEC);
20e4d407 6480 dev_priv->ips.last_time1 = jiffies_to_msecs(jiffies);
7d81c3e0 6481 dev_priv->ips.last_count2 = I915_READ(GFXEC);
5ed0bdf2 6482 dev_priv->ips.last_time2 = ktime_get_raw_ns();
9270388e
DV
6483
6484 spin_unlock_irq(&mchdev_lock);
2b4e57bd
ED
6485}
6486
91d14251 6487static void ironlake_disable_drps(struct drm_i915_private *dev_priv)
2b4e57bd 6488{
9270388e
DV
6489 u16 rgvswctl;
6490
6491 spin_lock_irq(&mchdev_lock);
6492
6493 rgvswctl = I915_READ16(MEMSWCTL);
2b4e57bd
ED
6494
6495 /* Ack interrupts, disable EFC interrupt */
6496 I915_WRITE(MEMINTREN, I915_READ(MEMINTREN) & ~MEMINT_EVAL_CHG_EN);
6497 I915_WRITE(MEMINTRSTS, MEMINT_EVAL_CHG);
6498 I915_WRITE(DEIER, I915_READ(DEIER) & ~DE_PCU_EVENT);
6499 I915_WRITE(DEIIR, DE_PCU_EVENT);
6500 I915_WRITE(DEIMR, I915_READ(DEIMR) | DE_PCU_EVENT);
6501
6502 /* Go back to the starting frequency */
91d14251 6503 ironlake_set_drps(dev_priv, dev_priv->ips.fstart);
dd92d8de 6504 mdelay(1);
2b4e57bd
ED
6505 rgvswctl |= MEMCTL_CMD_STS;
6506 I915_WRITE(MEMSWCTL, rgvswctl);
dd92d8de 6507 mdelay(1);
2b4e57bd 6508
9270388e 6509 spin_unlock_irq(&mchdev_lock);
2b4e57bd
ED
6510}
6511
acbe9475
DV
6512/* There's a funny hw issue where the hw returns all 0 when reading from
6513 * GEN6_RP_INTERRUPT_LIMITS. Hence we always need to compute the desired value
6514 * ourselves, instead of doing a rmw cycle (which might result in us clearing
6515 * all limits and the gpu stuck at whatever frequency it is at atm).
6516 */
74ef1173 6517static u32 intel_rps_limits(struct drm_i915_private *dev_priv, u8 val)
2b4e57bd 6518{
562d9bae 6519 struct intel_rps *rps = &dev_priv->gt_pm.rps;
7b9e0ae6 6520 u32 limits;
2b4e57bd 6521
20b46e59
DV
6522 /* Only set the down limit when we've reached the lowest level to avoid
6523 * getting more interrupts, otherwise leave this clear. This prevents a
6524 * race in the hw when coming out of rc6: There's a tiny window where
6525 * the hw runs at the minimal clock before selecting the desired
6526 * frequency, if the down threshold expires in that window we will not
6527 * receive a down interrupt. */
35ceabf3 6528 if (INTEL_GEN(dev_priv) >= 9) {
562d9bae
SAK
6529 limits = (rps->max_freq_softlimit) << 23;
6530 if (val <= rps->min_freq_softlimit)
6531 limits |= (rps->min_freq_softlimit) << 14;
74ef1173 6532 } else {
562d9bae
SAK
6533 limits = rps->max_freq_softlimit << 24;
6534 if (val <= rps->min_freq_softlimit)
6535 limits |= rps->min_freq_softlimit << 16;
74ef1173 6536 }
20b46e59
DV
6537
6538 return limits;
6539}
6540
60548c55 6541static void rps_set_power(struct drm_i915_private *dev_priv, int new_power)
dd75fdc8 6542{
562d9bae 6543 struct intel_rps *rps = &dev_priv->gt_pm.rps;
8a586437
AG
6544 u32 threshold_up = 0, threshold_down = 0; /* in % */
6545 u32 ei_up = 0, ei_down = 0;
dd75fdc8 6546
60548c55 6547 lockdep_assert_held(&rps->power.mutex);
dd75fdc8 6548
60548c55 6549 if (new_power == rps->power.mode)
dd75fdc8
CW
6550 return;
6551
6552 /* Note the units here are not exactly 1us, but 1280ns. */
6553 switch (new_power) {
6554 case LOW_POWER:
6555 /* Upclock if more than 95% busy over 16ms */
8a586437
AG
6556 ei_up = 16000;
6557 threshold_up = 95;
dd75fdc8
CW
6558
6559 /* Downclock if less than 85% busy over 32ms */
8a586437
AG
6560 ei_down = 32000;
6561 threshold_down = 85;
dd75fdc8
CW
6562 break;
6563
6564 case BETWEEN:
6565 /* Upclock if more than 90% busy over 13ms */
8a586437
AG
6566 ei_up = 13000;
6567 threshold_up = 90;
dd75fdc8
CW
6568
6569 /* Downclock if less than 75% busy over 32ms */
8a586437
AG
6570 ei_down = 32000;
6571 threshold_down = 75;
dd75fdc8
CW
6572 break;
6573
6574 case HIGH_POWER:
6575 /* Upclock if more than 85% busy over 10ms */
8a586437
AG
6576 ei_up = 10000;
6577 threshold_up = 85;
dd75fdc8
CW
6578
6579 /* Downclock if less than 60% busy over 32ms */
8a586437
AG
6580 ei_down = 32000;
6581 threshold_down = 60;
dd75fdc8
CW
6582 break;
6583 }
6584
6067a27d
MK
6585 /* When byt can survive without system hang with dynamic
6586 * sw freq adjustments, this restriction can be lifted.
6587 */
6588 if (IS_VALLEYVIEW(dev_priv))
6589 goto skip_hw_write;
6590
8a586437 6591 I915_WRITE(GEN6_RP_UP_EI,
a72b5623 6592 GT_INTERVAL_FROM_US(dev_priv, ei_up));
8a586437 6593 I915_WRITE(GEN6_RP_UP_THRESHOLD,
a72b5623
CW
6594 GT_INTERVAL_FROM_US(dev_priv,
6595 ei_up * threshold_up / 100));
8a586437
AG
6596
6597 I915_WRITE(GEN6_RP_DOWN_EI,
a72b5623 6598 GT_INTERVAL_FROM_US(dev_priv, ei_down));
8a586437 6599 I915_WRITE(GEN6_RP_DOWN_THRESHOLD,
a72b5623
CW
6600 GT_INTERVAL_FROM_US(dev_priv,
6601 ei_down * threshold_down / 100));
6602
6603 I915_WRITE(GEN6_RP_CONTROL,
1071d0f6 6604 (INTEL_GEN(dev_priv) > 9 ? 0 : GEN6_RP_MEDIA_TURBO) |
a72b5623
CW
6605 GEN6_RP_MEDIA_HW_NORMAL_MODE |
6606 GEN6_RP_MEDIA_IS_GFX |
6607 GEN6_RP_ENABLE |
6608 GEN6_RP_UP_BUSY_AVG |
6609 GEN6_RP_DOWN_IDLE_AVG);
8a586437 6610
6067a27d 6611skip_hw_write:
60548c55
CW
6612 rps->power.mode = new_power;
6613 rps->power.up_threshold = threshold_up;
6614 rps->power.down_threshold = threshold_down;
6615}
6616
6617static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
6618{
6619 struct intel_rps *rps = &dev_priv->gt_pm.rps;
6620 int new_power;
6621
6622 new_power = rps->power.mode;
6623 switch (rps->power.mode) {
6624 case LOW_POWER:
6625 if (val > rps->efficient_freq + 1 &&
6626 val > rps->cur_freq)
6627 new_power = BETWEEN;
6628 break;
6629
6630 case BETWEEN:
6631 if (val <= rps->efficient_freq &&
6632 val < rps->cur_freq)
6633 new_power = LOW_POWER;
6634 else if (val >= rps->rp0_freq &&
6635 val > rps->cur_freq)
6636 new_power = HIGH_POWER;
6637 break;
6638
6639 case HIGH_POWER:
6640 if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 &&
6641 val < rps->cur_freq)
6642 new_power = BETWEEN;
6643 break;
6644 }
6645 /* Max/min bins are special */
6646 if (val <= rps->min_freq_softlimit)
6647 new_power = LOW_POWER;
6648 if (val >= rps->max_freq_softlimit)
6649 new_power = HIGH_POWER;
6650
6651 mutex_lock(&rps->power.mutex);
6652 if (rps->power.interactive)
6653 new_power = HIGH_POWER;
6654 rps_set_power(dev_priv, new_power);
6655 mutex_unlock(&rps->power.mutex);
dd75fdc8
CW
6656}
6657
60548c55
CW
6658void intel_rps_mark_interactive(struct drm_i915_private *i915, bool interactive)
6659{
6660 struct intel_rps *rps = &i915->gt_pm.rps;
6661
6662 if (INTEL_GEN(i915) < 6)
6663 return;
6664
6665 mutex_lock(&rps->power.mutex);
6666 if (interactive) {
6667 if (!rps->power.interactive++ && READ_ONCE(i915->gt.awake))
6668 rps_set_power(i915, HIGH_POWER);
6669 } else {
6670 GEM_BUG_ON(!rps->power.interactive);
6671 rps->power.interactive--;
6672 }
6673 mutex_unlock(&rps->power.mutex);
6674}
6675
2876ce73
CW
6676static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val)
6677{
562d9bae 6678 struct intel_rps *rps = &dev_priv->gt_pm.rps;
2876ce73
CW
6679 u32 mask = 0;
6680
e0e8c7cb 6681 /* We use UP_EI_EXPIRED interupts for both up/down in manual mode */
562d9bae 6682 if (val > rps->min_freq_softlimit)
e0e8c7cb 6683 mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT;
562d9bae 6684 if (val < rps->max_freq_softlimit)
6f4b12f8 6685 mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD;
2876ce73 6686
7b3c29f6
CW
6687 mask &= dev_priv->pm_rps_events;
6688
59d02a1f 6689 return gen6_sanitize_rps_pm_mask(dev_priv, ~mask);
2876ce73
CW
6690}
6691
b8a5ff8d
JM
6692/* gen6_set_rps is called to update the frequency request, but should also be
6693 * called when the range (min_delay and max_delay) is modified so that we can
6694 * update the GEN6_RP_INTERRUPT_LIMITS register accordingly. */
9fcee2f7 6695static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val)
20b46e59 6696{
562d9bae
SAK
6697 struct intel_rps *rps = &dev_priv->gt_pm.rps;
6698
eb64cad1
CW
6699 /* min/max delay may still have been modified so be sure to
6700 * write the limits value.
6701 */
562d9bae 6702 if (val != rps->cur_freq) {
eb64cad1 6703 gen6_set_rps_thresholds(dev_priv, val);
b8a5ff8d 6704
35ceabf3 6705 if (INTEL_GEN(dev_priv) >= 9)
5704195c
AG
6706 I915_WRITE(GEN6_RPNSWREQ,
6707 GEN9_FREQUENCY(val));
dc97997a 6708 else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
eb64cad1
CW
6709 I915_WRITE(GEN6_RPNSWREQ,
6710 HSW_FREQUENCY(val));
6711 else
6712 I915_WRITE(GEN6_RPNSWREQ,
6713 GEN6_FREQUENCY(val) |
6714 GEN6_OFFSET(0) |
6715 GEN6_AGGRESSIVE_TURBO);
b8a5ff8d 6716 }
7b9e0ae6 6717
7b9e0ae6
CW
6718 /* Make sure we continue to get interrupts
6719 * until we hit the minimum or maximum frequencies.
6720 */
74ef1173 6721 I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, intel_rps_limits(dev_priv, val));
2876ce73 6722 I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
7b9e0ae6 6723
562d9bae 6724 rps->cur_freq = val;
0f94592e 6725 trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
9fcee2f7
CW
6726
6727 return 0;
2b4e57bd
ED
6728}
6729
9fcee2f7 6730static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val)
ffe02b40 6731{
9fcee2f7
CW
6732 int err;
6733
dc97997a 6734 if (WARN_ONCE(IS_CHERRYVIEW(dev_priv) && (val & 1),
ffe02b40
VS
6735 "Odd GPU freq value\n"))
6736 val &= ~1;
6737
cd25dd5b
D
6738 I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
6739
562d9bae 6740 if (val != dev_priv->gt_pm.rps.cur_freq) {
337fa6e0 6741 vlv_punit_get(dev_priv);
9fcee2f7 6742 err = vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
337fa6e0 6743 vlv_punit_put(dev_priv);
9fcee2f7
CW
6744 if (err)
6745 return err;
6746
db4c5e0b 6747 gen6_set_rps_thresholds(dev_priv, val);
8fb55197 6748 }
ffe02b40 6749
562d9bae 6750 dev_priv->gt_pm.rps.cur_freq = val;
ffe02b40 6751 trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
9fcee2f7
CW
6752
6753 return 0;
ffe02b40
VS
6754}
6755
a7f6e231 6756/* vlv_set_rps_idle: Set the frequency to idle, if Gfx clocks are down
76c3552f
D
6757 *
6758 * * If Gfx is Idle, then
a7f6e231
D
6759 * 1. Forcewake Media well.
6760 * 2. Request idle freq.
6761 * 3. Release Forcewake of Media well.
76c3552f
D
6762*/
6763static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
6764{
562d9bae
SAK
6765 struct intel_rps *rps = &dev_priv->gt_pm.rps;
6766 u32 val = rps->idle_freq;
9fcee2f7 6767 int err;
5549d25f 6768
562d9bae 6769 if (rps->cur_freq <= val)
76c3552f
D
6770 return;
6771
c9efef7b
CW
6772 /* The punit delays the write of the frequency and voltage until it
6773 * determines the GPU is awake. During normal usage we don't want to
6774 * waste power changing the frequency if the GPU is sleeping (rc6).
6775 * However, the GPU and driver is now idle and we do not want to delay
6776 * switching to minimum voltage (reducing power whilst idle) as we do
6777 * not expect to be woken in the near future and so must flush the
6778 * change by waking the device.
6779 *
6780 * We choose to take the media powerwell (either would do to trick the
6781 * punit into committing the voltage change) as that takes a lot less
6782 * power than the render powerwell.
6783 */
3ceea6a1 6784 intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_MEDIA);
9fcee2f7 6785 err = valleyview_set_rps(dev_priv, val);
3ceea6a1 6786 intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_MEDIA);
9fcee2f7
CW
6787
6788 if (err)
6789 DRM_ERROR("Failed to set RPS for idle\n");
76c3552f
D
6790}
6791
43cf3bf0
CW
6792void gen6_rps_busy(struct drm_i915_private *dev_priv)
6793{
562d9bae
SAK
6794 struct intel_rps *rps = &dev_priv->gt_pm.rps;
6795
ebb5eb7d 6796 mutex_lock(&rps->lock);
562d9bae 6797 if (rps->enabled) {
bd64818d
CW
6798 u8 freq;
6799
e0e8c7cb 6800 if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED)
43cf3bf0
CW
6801 gen6_rps_reset_ei(dev_priv);
6802 I915_WRITE(GEN6_PMINTRMSK,
562d9bae 6803 gen6_rps_pm_mask(dev_priv, rps->cur_freq));
2b83c4c4 6804
c33d247d
CW
6805 gen6_enable_rps_interrupts(dev_priv);
6806
bd64818d
CW
6807 /* Use the user's desired frequency as a guide, but for better
6808 * performance, jump directly to RPe as our starting frequency.
6809 */
562d9bae
SAK
6810 freq = max(rps->cur_freq,
6811 rps->efficient_freq);
bd64818d 6812
9fcee2f7 6813 if (intel_set_rps(dev_priv,
bd64818d 6814 clamp(freq,
562d9bae
SAK
6815 rps->min_freq_softlimit,
6816 rps->max_freq_softlimit)))
9fcee2f7 6817 DRM_DEBUG_DRIVER("Failed to set idle frequency\n");
43cf3bf0 6818 }
ebb5eb7d 6819 mutex_unlock(&rps->lock);
43cf3bf0
CW
6820}
6821
b29c19b6
CW
6822void gen6_rps_idle(struct drm_i915_private *dev_priv)
6823{
562d9bae
SAK
6824 struct intel_rps *rps = &dev_priv->gt_pm.rps;
6825
c33d247d
CW
6826 /* Flush our bottom-half so that it does not race with us
6827 * setting the idle frequency and so that it is bounded by
6828 * our rpm wakeref. And then disable the interrupts to stop any
6829 * futher RPS reclocking whilst we are asleep.
6830 */
6831 gen6_disable_rps_interrupts(dev_priv);
6832
ebb5eb7d 6833 mutex_lock(&rps->lock);
562d9bae 6834 if (rps->enabled) {
dc97997a 6835 if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
76c3552f 6836 vlv_set_rps_idle(dev_priv);
7526ed79 6837 else
562d9bae
SAK
6838 gen6_set_rps(dev_priv, rps->idle_freq);
6839 rps->last_adj = 0;
12c100bf
VS
6840 I915_WRITE(GEN6_PMINTRMSK,
6841 gen6_sanitize_rps_pm_mask(dev_priv, ~0));
c0951f0c 6842 }
ebb5eb7d 6843 mutex_unlock(&rps->lock);
b29c19b6
CW
6844}
6845
62eb3c24 6846void gen6_rps_boost(struct i915_request *rq)
b29c19b6 6847{
562d9bae 6848 struct intel_rps *rps = &rq->i915->gt_pm.rps;
74d290f8 6849 unsigned long flags;
7b92c1bd
CW
6850 bool boost;
6851
8d3afd7d
CW
6852 /* This is intentionally racy! We peek at the state here, then
6853 * validate inside the RPS worker.
6854 */
562d9bae 6855 if (!rps->enabled)
8d3afd7d 6856 return;
43cf3bf0 6857
0e21834e 6858 if (i915_request_signaled(rq))
253a2817
CW
6859 return;
6860
e61e0f51 6861 /* Serializes with i915_request_retire() */
7b92c1bd 6862 boost = false;
74d290f8 6863 spin_lock_irqsave(&rq->lock, flags);
253a2817
CW
6864 if (!rq->waitboost && !dma_fence_is_signaled_locked(&rq->fence)) {
6865 boost = !atomic_fetch_inc(&rps->num_waiters);
7b92c1bd 6866 rq->waitboost = true;
c0951f0c 6867 }
74d290f8 6868 spin_unlock_irqrestore(&rq->lock, flags);
7b92c1bd
CW
6869 if (!boost)
6870 return;
6871
562d9bae
SAK
6872 if (READ_ONCE(rps->cur_freq) < rps->boost_freq)
6873 schedule_work(&rps->work);
7b92c1bd 6874
62eb3c24 6875 atomic_inc(&rps->boosts);
b29c19b6
CW
6876}
6877
9fcee2f7 6878int intel_set_rps(struct drm_i915_private *dev_priv, u8 val)
0a073b84 6879{
562d9bae 6880 struct intel_rps *rps = &dev_priv->gt_pm.rps;
9fcee2f7
CW
6881 int err;
6882
ebb5eb7d 6883 lockdep_assert_held(&rps->lock);
562d9bae
SAK
6884 GEM_BUG_ON(val > rps->max_freq);
6885 GEM_BUG_ON(val < rps->min_freq);
cfd1c488 6886
562d9bae
SAK
6887 if (!rps->enabled) {
6888 rps->cur_freq = val;
76e4e4b5
CW
6889 return 0;
6890 }
6891
dc97997a 6892 if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
9fcee2f7 6893 err = valleyview_set_rps(dev_priv, val);
ffe02b40 6894 else
9fcee2f7
CW
6895 err = gen6_set_rps(dev_priv, val);
6896
6897 return err;
0a073b84
JB
6898}
6899
dc97997a 6900static void gen9_disable_rc6(struct drm_i915_private *dev_priv)
20e49366 6901{
20e49366 6902 I915_WRITE(GEN6_RC_CONTROL, 0);
38c23527 6903 I915_WRITE(GEN9_PG_ENABLE, 0);
20e49366
ZW
6904}
6905
dc97997a 6906static void gen9_disable_rps(struct drm_i915_private *dev_priv)
2030d684 6907{
2030d684
AG
6908 I915_WRITE(GEN6_RP_CONTROL, 0);
6909}
6910
960e5465 6911static void gen6_disable_rc6(struct drm_i915_private *dev_priv)
d20d4f0c 6912{
d20d4f0c 6913 I915_WRITE(GEN6_RC_CONTROL, 0);
960e5465
SAK
6914}
6915
6916static void gen6_disable_rps(struct drm_i915_private *dev_priv)
6917{
44fc7d5c 6918 I915_WRITE(GEN6_RPNSWREQ, 1 << 31);
2030d684 6919 I915_WRITE(GEN6_RP_CONTROL, 0);
44fc7d5c
DV
6920}
6921
d46b00dc 6922static void cherryview_disable_rc6(struct drm_i915_private *dev_priv)
38807746 6923{
38807746
D
6924 I915_WRITE(GEN6_RC_CONTROL, 0);
6925}
6926
d46b00dc
SAK
6927static void cherryview_disable_rps(struct drm_i915_private *dev_priv)
6928{
6929 I915_WRITE(GEN6_RP_CONTROL, 0);
6930}
6931
0d6fc92a 6932static void valleyview_disable_rc6(struct drm_i915_private *dev_priv)
44fc7d5c 6933{
0d6fc92a 6934 /* We're doing forcewake before Disabling RC6,
98a2e5f9 6935 * This what the BIOS expects when going into suspend */
3ceea6a1 6936 intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
98a2e5f9 6937
44fc7d5c 6938 I915_WRITE(GEN6_RC_CONTROL, 0);
d20d4f0c 6939
3ceea6a1 6940 intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
d20d4f0c
JB
6941}
6942
0d6fc92a
SAK
6943static void valleyview_disable_rps(struct drm_i915_private *dev_priv)
6944{
6945 I915_WRITE(GEN6_RP_CONTROL, 0);
6946}
6947
dc97997a 6948static bool bxt_check_bios_rc6_setup(struct drm_i915_private *dev_priv)
274008e8 6949{
274008e8
SAK
6950 bool enable_rc6 = true;
6951 unsigned long rc6_ctx_base;
fc619841
ID
6952 u32 rc_ctl;
6953 int rc_sw_target;
6954
6955 rc_ctl = I915_READ(GEN6_RC_CONTROL);
6956 rc_sw_target = (I915_READ(GEN6_RC_STATE) & RC_SW_TARGET_STATE_MASK) >>
6957 RC_SW_TARGET_STATE_SHIFT;
6958 DRM_DEBUG_DRIVER("BIOS enabled RC states: "
6959 "HW_CTRL %s HW_RC6 %s SW_TARGET_STATE %x\n",
6960 onoff(rc_ctl & GEN6_RC_CTL_HW_ENABLE),
6961 onoff(rc_ctl & GEN6_RC_CTL_RC6_ENABLE),
6962 rc_sw_target);
274008e8
SAK
6963
6964 if (!(I915_READ(RC6_LOCATION) & RC6_CTX_IN_DRAM)) {
b99d49cc 6965 DRM_DEBUG_DRIVER("RC6 Base location not set properly.\n");
274008e8
SAK
6966 enable_rc6 = false;
6967 }
6968
6969 /*
6970 * The exact context size is not known for BXT, so assume a page size
6971 * for this check.
6972 */
6973 rc6_ctx_base = I915_READ(RC6_CTX_BASE) & RC6_CTX_BASE_MASK;
17a05345
MA
6974 if (!((rc6_ctx_base >= dev_priv->dsm_reserved.start) &&
6975 (rc6_ctx_base + PAGE_SIZE < dev_priv->dsm_reserved.end))) {
b99d49cc 6976 DRM_DEBUG_DRIVER("RC6 Base address not as expected.\n");
274008e8
SAK
6977 enable_rc6 = false;
6978 }
6979
6980 if (!(((I915_READ(PWRCTX_MAXCNT_RCSUNIT) & IDLE_TIME_MASK) > 1) &&
6981 ((I915_READ(PWRCTX_MAXCNT_VCSUNIT0) & IDLE_TIME_MASK) > 1) &&
6982 ((I915_READ(PWRCTX_MAXCNT_BCSUNIT) & IDLE_TIME_MASK) > 1) &&
6983 ((I915_READ(PWRCTX_MAXCNT_VECSUNIT) & IDLE_TIME_MASK) > 1))) {
b99d49cc 6984 DRM_DEBUG_DRIVER("Engine Idle wait time not set properly.\n");
274008e8
SAK
6985 enable_rc6 = false;
6986 }
6987
fc619841
ID
6988 if (!I915_READ(GEN8_PUSHBUS_CONTROL) ||
6989 !I915_READ(GEN8_PUSHBUS_ENABLE) ||
6990 !I915_READ(GEN8_PUSHBUS_SHIFT)) {
6991 DRM_DEBUG_DRIVER("Pushbus not setup properly.\n");
6992 enable_rc6 = false;
6993 }
6994
6995 if (!I915_READ(GEN6_GFXPAUSE)) {
6996 DRM_DEBUG_DRIVER("GFX pause not setup properly.\n");
6997 enable_rc6 = false;
6998 }
6999
7000 if (!I915_READ(GEN8_MISC_CTRL0)) {
7001 DRM_DEBUG_DRIVER("GPM control not setup properly.\n");
274008e8
SAK
7002 enable_rc6 = false;
7003 }
7004
7005 return enable_rc6;
7006}
7007
fb6db0f5 7008static bool sanitize_rc6(struct drm_i915_private *i915)
2b4e57bd 7009{
fb6db0f5 7010 struct intel_device_info *info = mkwrite_device_info(i915);
e6069ca8 7011
fb6db0f5 7012 /* Powersaving is controlled by the host when inside a VM */
91cbdb83 7013 if (intel_vgpu_active(i915)) {
fb6db0f5 7014 info->has_rc6 = 0;
91cbdb83
CW
7015 info->has_rps = false;
7016 }
274008e8 7017
fb6db0f5
CW
7018 if (info->has_rc6 &&
7019 IS_GEN9_LP(i915) && !bxt_check_bios_rc6_setup(i915)) {
274008e8 7020 DRM_INFO("RC6 disabled by BIOS\n");
fb6db0f5 7021 info->has_rc6 = 0;
274008e8
SAK
7022 }
7023
fb6db0f5
CW
7024 /*
7025 * We assume that we do not have any deep rc6 levels if we don't have
7026 * have the previous rc6 level supported, i.e. we use HAS_RC6()
7027 * as the initial coarse check for rc6 in general, moving on to
7028 * progressively finer/deeper levels.
7029 */
7030 if (!info->has_rc6 && info->has_rc6p)
7031 info->has_rc6p = 0;
8bade1ad 7032
fb6db0f5 7033 return info->has_rc6;
2b4e57bd
ED
7034}
7035
dc97997a 7036static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv)
3280e8b0 7037{
562d9bae
SAK
7038 struct intel_rps *rps = &dev_priv->gt_pm.rps;
7039
3280e8b0 7040 /* All of these values are in units of 50MHz */
773ea9a8 7041
93ee2920 7042 /* static values from HW: RP0 > RP1 > RPn (min_freq) */
cc3f90f0 7043 if (IS_GEN9_LP(dev_priv)) {
773ea9a8 7044 u32 rp_state_cap = I915_READ(BXT_RP_STATE_CAP);
562d9bae
SAK
7045 rps->rp0_freq = (rp_state_cap >> 16) & 0xff;
7046 rps->rp1_freq = (rp_state_cap >> 8) & 0xff;
7047 rps->min_freq = (rp_state_cap >> 0) & 0xff;
35040562 7048 } else {
773ea9a8 7049 u32 rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
562d9bae
SAK
7050 rps->rp0_freq = (rp_state_cap >> 0) & 0xff;
7051 rps->rp1_freq = (rp_state_cap >> 8) & 0xff;
7052 rps->min_freq = (rp_state_cap >> 16) & 0xff;
35040562 7053 }
3280e8b0 7054 /* hw_max = RP0 until we check for overclocking */
562d9bae 7055 rps->max_freq = rps->rp0_freq;
3280e8b0 7056
562d9bae 7057 rps->efficient_freq = rps->rp1_freq;
dc97997a 7058 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv) ||
2b2874ef 7059 IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) {
773ea9a8
CW
7060 u32 ddcc_status = 0;
7061
7062 if (sandybridge_pcode_read(dev_priv,
7063 HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL,
7064 &ddcc_status) == 0)
562d9bae 7065 rps->efficient_freq =
46efa4ab
TR
7066 clamp_t(u8,
7067 ((ddcc_status >> 8) & 0xff),
562d9bae
SAK
7068 rps->min_freq,
7069 rps->max_freq);
93ee2920
TR
7070 }
7071
2b2874ef 7072 if (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) {
c5e0688c 7073 /* Store the frequency values in 16.66 MHZ units, which is
773ea9a8
CW
7074 * the natural hardware unit for SKL
7075 */
562d9bae
SAK
7076 rps->rp0_freq *= GEN9_FREQ_SCALER;
7077 rps->rp1_freq *= GEN9_FREQ_SCALER;
7078 rps->min_freq *= GEN9_FREQ_SCALER;
7079 rps->max_freq *= GEN9_FREQ_SCALER;
7080 rps->efficient_freq *= GEN9_FREQ_SCALER;
c5e0688c 7081 }
3280e8b0
BW
7082}
7083
3a45b05c 7084static void reset_rps(struct drm_i915_private *dev_priv,
9fcee2f7 7085 int (*set)(struct drm_i915_private *, u8))
3a45b05c 7086{
562d9bae
SAK
7087 struct intel_rps *rps = &dev_priv->gt_pm.rps;
7088 u8 freq = rps->cur_freq;
3a45b05c
CW
7089
7090 /* force a reset */
60548c55 7091 rps->power.mode = -1;
562d9bae 7092 rps->cur_freq = -1;
3a45b05c 7093
9fcee2f7
CW
7094 if (set(dev_priv, freq))
7095 DRM_ERROR("Failed to reset RPS to initial values\n");
3a45b05c
CW
7096}
7097
b6fef0ef 7098/* See the Gen9_GT_PM_Programming_Guide doc for the below */
dc97997a 7099static void gen9_enable_rps(struct drm_i915_private *dev_priv)
b6fef0ef 7100{
3ceea6a1 7101 intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
b6fef0ef 7102
36fe778a 7103 /* Program defaults and thresholds for RPS */
cf819eff 7104 if (IS_GEN(dev_priv, 9))
36fe778a
DW
7105 I915_WRITE(GEN6_RC_VIDEO_FREQ,
7106 GEN9_FREQUENCY(dev_priv->gt_pm.rps.rp1_freq));
0beb059a
AG
7107
7108 /* 1 second timeout*/
7109 I915_WRITE(GEN6_RP_DOWN_TIMEOUT,
7110 GT_INTERVAL_FROM_US(dev_priv, 1000000));
7111
b6fef0ef 7112 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 0xa);
b6fef0ef 7113
0beb059a
AG
7114 /* Leaning on the below call to gen6_set_rps to program/setup the
7115 * Up/Down EI & threshold registers, as well as the RP_CONTROL,
7116 * RP_INTERRUPT_LIMITS & RPNSWREQ registers */
3a45b05c 7117 reset_rps(dev_priv, gen6_set_rps);
b6fef0ef 7118
3ceea6a1 7119 intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
b6fef0ef
JB
7120}
7121
a79208de
MK
7122static void gen11_enable_rc6(struct drm_i915_private *dev_priv)
7123{
7124 struct intel_engine_cs *engine;
7125 enum intel_engine_id id;
7126
7127 /* 1a: Software RC state - RC0 */
7128 I915_WRITE(GEN6_RC_STATE, 0);
7129
7130 /*
7131 * 1b: Get forcewake during program sequence. Although the driver
7132 * hasn't enabled a state yet where we need forcewake, BIOS may have.
7133 */
7134 intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
7135
7136 /* 2a: Disable RC states. */
7137 I915_WRITE(GEN6_RC_CONTROL, 0);
7138
7139 /* 2b: Program RC6 thresholds.*/
7140 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85);
7141 I915_WRITE(GEN10_MEDIA_WAKE_RATE_LIMIT, 150);
7142
7143 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
7144 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
7145 for_each_engine(engine, dev_priv, id)
7146 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
7147
7148 if (HAS_GUC(dev_priv))
7149 I915_WRITE(GUC_MAX_IDLE_COUNT, 0xA);
7150
7151 I915_WRITE(GEN6_RC_SLEEP, 0);
7152
d105e9ad
MK
7153 I915_WRITE(GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */
7154
a79208de
MK
7155 /*
7156 * 2c: Program Coarse Power Gating Policies.
7157 *
7158 * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we
7159 * use instead is a more conservative estimate for the maximum time
7160 * it takes us to service a CS interrupt and submit a new ELSP - that
7161 * is the time which the GPU is idle waiting for the CPU to select the
7162 * next request to execute. If the idle hysteresis is less than that
7163 * interrupt service latency, the hardware will automatically gate
7164 * the power well and we will then incur the wake up cost on top of
7165 * the service latency. A similar guide from intel_pstate is that we
7166 * do not want the enable hysteresis to less than the wakeup latency.
7167 *
7168 * igt/gem_exec_nop/sequential provides a rough estimate for the
7169 * service latency, and puts it around 10us for Broadwell (and other
7170 * big core) and around 40us for Broxton (and other low power cores).
7171 * [Note that for legacy ringbuffer submission, this is less than 1us!]
7172 * However, the wakeup latency on Broxton is closer to 100us. To be
7173 * conservative, we have to factor in a context switch on top (due
7174 * to ksoftirqd).
7175 */
7176 I915_WRITE(GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250);
7177 I915_WRITE(GEN9_RENDER_PG_IDLE_HYSTERESIS, 250);
7178
7179 /* 3a: Enable RC6 */
a79208de
MK
7180 I915_WRITE(GEN6_RC_CONTROL,
7181 GEN6_RC_CTL_HW_ENABLE |
7182 GEN6_RC_CTL_RC6_ENABLE |
7183 GEN6_RC_CTL_EI_MODE(1));
7184
7185 /* 3b: Enable Coarse Power Gating only when RC6 is enabled. */
7186 I915_WRITE(GEN9_PG_ENABLE,
2ea74141
MK
7187 GEN9_RENDER_PG_ENABLE |
7188 GEN9_MEDIA_PG_ENABLE |
7189 GEN11_MEDIA_SAMPLER_PG_ENABLE);
a79208de
MK
7190
7191 intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
7192}
7193
dc97997a 7194static void gen9_enable_rc6(struct drm_i915_private *dev_priv)
20e49366 7195{
e2f80391 7196 struct intel_engine_cs *engine;
3b3f1650 7197 enum intel_engine_id id;
fb6db0f5 7198 u32 rc6_mode;
20e49366
ZW
7199
7200 /* 1a: Software RC state - RC0 */
7201 I915_WRITE(GEN6_RC_STATE, 0);
7202
7203 /* 1b: Get forcewake during program sequence. Although the driver
7204 * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
3ceea6a1 7205 intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
20e49366
ZW
7206
7207 /* 2a: Disable RC states. */
7208 I915_WRITE(GEN6_RC_CONTROL, 0);
7209
7210 /* 2b: Program RC6 thresholds.*/
0aab201b
RV
7211 if (INTEL_GEN(dev_priv) >= 10) {
7212 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85);
7213 I915_WRITE(GEN10_MEDIA_WAKE_RATE_LIMIT, 150);
7214 } else if (IS_SKYLAKE(dev_priv)) {
7215 /*
7216 * WaRsDoubleRc6WrlWithCoarsePowerGating:skl Doubling WRL only
7217 * when CPG is enabled
7218 */
63a4dec2 7219 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 108 << 16);
0aab201b 7220 } else {
63a4dec2 7221 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16);
0aab201b
RV
7222 }
7223
20e49366
ZW
7224 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
7225 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
3b3f1650 7226 for_each_engine(engine, dev_priv, id)
e2f80391 7227 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
97c322e7 7228
1a3d1898 7229 if (HAS_GUC(dev_priv))
97c322e7
SAK
7230 I915_WRITE(GUC_MAX_IDLE_COUNT, 0xA);
7231
20e49366 7232 I915_WRITE(GEN6_RC_SLEEP, 0);
20e49366 7233
c1beabcf
CW
7234 /*
7235 * 2c: Program Coarse Power Gating Policies.
7236 *
7237 * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we
7238 * use instead is a more conservative estimate for the maximum time
7239 * it takes us to service a CS interrupt and submit a new ELSP - that
7240 * is the time which the GPU is idle waiting for the CPU to select the
7241 * next request to execute. If the idle hysteresis is less than that
7242 * interrupt service latency, the hardware will automatically gate
7243 * the power well and we will then incur the wake up cost on top of
7244 * the service latency. A similar guide from intel_pstate is that we
7245 * do not want the enable hysteresis to less than the wakeup latency.
7246 *
7247 * igt/gem_exec_nop/sequential provides a rough estimate for the
7248 * service latency, and puts it around 10us for Broadwell (and other
7249 * big core) and around 40us for Broxton (and other low power cores).
7250 * [Note that for legacy ringbuffer submission, this is less than 1us!]
7251 * However, the wakeup latency on Broxton is closer to 100us. To be
7252 * conservative, we have to factor in a context switch on top (due
7253 * to ksoftirqd).
7254 */
7255 I915_WRITE(GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250);
7256 I915_WRITE(GEN9_RENDER_PG_IDLE_HYSTERESIS, 250);
38c23527 7257
20e49366 7258 /* 3a: Enable RC6 */
1c044f9b 7259 I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */
e4ffc83d
RV
7260
7261 /* WaRsUseTimeoutMode:cnl (pre-prod) */
7262 if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_C0))
7263 rc6_mode = GEN7_RC_CTL_TO_MODE;
7264 else
7265 rc6_mode = GEN6_RC_CTL_EI_MODE(1);
7266
1c044f9b 7267 I915_WRITE(GEN6_RC_CONTROL,
fb6db0f5
CW
7268 GEN6_RC_CTL_HW_ENABLE |
7269 GEN6_RC_CTL_RC6_ENABLE |
7270 rc6_mode);
20e49366 7271
cb07bae0
SK
7272 /*
7273 * 3b: Enable Coarse Power Gating only when RC6 is enabled.
d66047e4 7274 * WaRsDisableCoarsePowerGating:skl,cnl - Render/Media PG need to be disabled with RC6.
cb07bae0 7275 */
dc97997a 7276 if (NEEDS_WaRsDisableCoarsePowerGating(dev_priv))
f2d2fe95
SAK
7277 I915_WRITE(GEN9_PG_ENABLE, 0);
7278 else
fb6db0f5
CW
7279 I915_WRITE(GEN9_PG_ENABLE,
7280 GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE);
38c23527 7281
3ceea6a1 7282 intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
20e49366
ZW
7283}
7284
3a85392c 7285static void gen8_enable_rc6(struct drm_i915_private *dev_priv)
6edee7f3 7286{
e2f80391 7287 struct intel_engine_cs *engine;
3b3f1650 7288 enum intel_engine_id id;
6edee7f3
BW
7289
7290 /* 1a: Software RC state - RC0 */
7291 I915_WRITE(GEN6_RC_STATE, 0);
7292
3a85392c 7293 /* 1b: Get forcewake during program sequence. Although the driver
6edee7f3 7294 * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
3ceea6a1 7295 intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
6edee7f3
BW
7296
7297 /* 2a: Disable RC states. */
7298 I915_WRITE(GEN6_RC_CONTROL, 0);
7299
6edee7f3
BW
7300 /* 2b: Program RC6 thresholds.*/
7301 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
7302 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
7303 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
3b3f1650 7304 for_each_engine(engine, dev_priv, id)
e2f80391 7305 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
6edee7f3 7306 I915_WRITE(GEN6_RC_SLEEP, 0);
415544d5 7307 I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */
6edee7f3
BW
7308
7309 /* 3: Enable RC6 */
415544d5 7310
fb6db0f5
CW
7311 I915_WRITE(GEN6_RC_CONTROL,
7312 GEN6_RC_CTL_HW_ENABLE |
7313 GEN7_RC_CTL_TO_MODE |
7314 GEN6_RC_CTL_RC6_ENABLE);
6edee7f3 7315
3ceea6a1 7316 intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
3a85392c
SAK
7317}
7318
7319static void gen8_enable_rps(struct drm_i915_private *dev_priv)
7320{
562d9bae
SAK
7321 struct intel_rps *rps = &dev_priv->gt_pm.rps;
7322
3ceea6a1 7323 intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
3a85392c
SAK
7324
7325 /* 1 Program defaults and thresholds for RPS*/
f9bdc585 7326 I915_WRITE(GEN6_RPNSWREQ,
562d9bae 7327 HSW_FREQUENCY(rps->rp1_freq));
f9bdc585 7328 I915_WRITE(GEN6_RC_VIDEO_FREQ,
562d9bae 7329 HSW_FREQUENCY(rps->rp1_freq));
7526ed79
DV
7330 /* NB: Docs say 1s, and 1000000 - which aren't equivalent */
7331 I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 100000000 / 128); /* 1 second timeout */
7332
7333 /* Docs recommend 900MHz, and 300 MHz respectively */
7334 I915_WRITE(GEN6_RP_INTERRUPT_LIMITS,
562d9bae
SAK
7335 rps->max_freq_softlimit << 24 |
7336 rps->min_freq_softlimit << 16);
7526ed79
DV
7337
7338 I915_WRITE(GEN6_RP_UP_THRESHOLD, 7600000 / 128); /* 76ms busyness per EI, 90% */
7339 I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 31300000 / 128); /* 313ms busyness per EI, 70%*/
7340 I915_WRITE(GEN6_RP_UP_EI, 66000); /* 84.48ms, XXX: random? */
7341 I915_WRITE(GEN6_RP_DOWN_EI, 350000); /* 448ms, XXX: random? */
7342
7343 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
6edee7f3 7344
3a85392c 7345 /* 2: Enable RPS */
7526ed79
DV
7346 I915_WRITE(GEN6_RP_CONTROL,
7347 GEN6_RP_MEDIA_TURBO |
7348 GEN6_RP_MEDIA_HW_NORMAL_MODE |
7349 GEN6_RP_MEDIA_IS_GFX |
7350 GEN6_RP_ENABLE |
7351 GEN6_RP_UP_BUSY_AVG |
7352 GEN6_RP_DOWN_IDLE_AVG);
7353
3a45b05c 7354 reset_rps(dev_priv, gen6_set_rps);
7526ed79 7355
3ceea6a1 7356 intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
6edee7f3
BW
7357}
7358
960e5465 7359static void gen6_enable_rc6(struct drm_i915_private *dev_priv)
2b4e57bd 7360{
e2f80391 7361 struct intel_engine_cs *engine;
3b3f1650 7362 enum intel_engine_id id;
fb6db0f5 7363 u32 rc6vids, rc6_mask;
2b4e57bd 7364 u32 gtfifodbg;
b4ac5afc 7365 int ret;
2b4e57bd 7366
2b4e57bd 7367 I915_WRITE(GEN6_RC_STATE, 0);
2b4e57bd
ED
7368
7369 /* Clear the DBG now so we don't confuse earlier errors */
297b32ec
VS
7370 gtfifodbg = I915_READ(GTFIFODBG);
7371 if (gtfifodbg) {
2b4e57bd
ED
7372 DRM_ERROR("GT fifo had a previous error %x\n", gtfifodbg);
7373 I915_WRITE(GTFIFODBG, gtfifodbg);
7374 }
7375
3ceea6a1 7376 intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
2b4e57bd
ED
7377
7378 /* disable the counters and set deterministic thresholds */
7379 I915_WRITE(GEN6_RC_CONTROL, 0);
7380
7381 I915_WRITE(GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16);
7382 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30);
7383 I915_WRITE(GEN6_RC6pp_WAKE_RATE_LIMIT, 30);
7384 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
7385 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
7386
3b3f1650 7387 for_each_engine(engine, dev_priv, id)
e2f80391 7388 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
2b4e57bd
ED
7389
7390 I915_WRITE(GEN6_RC_SLEEP, 0);
7391 I915_WRITE(GEN6_RC1e_THRESHOLD, 1000);
dc97997a 7392 if (IS_IVYBRIDGE(dev_priv))
351aa566
SM
7393 I915_WRITE(GEN6_RC6_THRESHOLD, 125000);
7394 else
7395 I915_WRITE(GEN6_RC6_THRESHOLD, 50000);
0920a487 7396 I915_WRITE(GEN6_RC6p_THRESHOLD, 150000);
2b4e57bd
ED
7397 I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */
7398
5a7dc92a 7399 /* We don't use those on Haswell */
fb6db0f5
CW
7400 rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
7401 if (HAS_RC6p(dev_priv))
7402 rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE;
7403 if (HAS_RC6pp(dev_priv))
7404 rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE;
2b4e57bd
ED
7405 I915_WRITE(GEN6_RC_CONTROL,
7406 rc6_mask |
7407 GEN6_RC_CTL_EI_MODE(1) |
7408 GEN6_RC_CTL_HW_ENABLE);
7409
31643d54
BW
7410 rc6vids = 0;
7411 ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids);
cf819eff 7412 if (IS_GEN(dev_priv, 6) && ret) {
31643d54 7413 DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n");
cf819eff 7414 } else if (IS_GEN(dev_priv, 6) && (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) {
31643d54
BW
7415 DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n",
7416 GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450);
7417 rc6vids &= 0xffff00;
7418 rc6vids |= GEN6_ENCODE_RC6_VID(450);
7419 ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_RC6VIDS, rc6vids);
7420 if (ret)
7421 DRM_ERROR("Couldn't fix incorrect rc6 voltage\n");
7422 }
7423
3ceea6a1 7424 intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
2b4e57bd
ED
7425}
7426
960e5465
SAK
7427static void gen6_enable_rps(struct drm_i915_private *dev_priv)
7428{
960e5465
SAK
7429 /* Here begins a magic sequence of register writes to enable
7430 * auto-downclocking.
7431 *
7432 * Perhaps there might be some value in exposing these to
7433 * userspace...
7434 */
3ceea6a1 7435 intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
960e5465
SAK
7436
7437 /* Power down if completely idle for over 50ms */
7438 I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 50000);
7439 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
7440
7441 reset_rps(dev_priv, gen6_set_rps);
7442
3ceea6a1 7443 intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
960e5465
SAK
7444}
7445
fb7404e8 7446static void gen6_update_ring_freq(struct drm_i915_private *dev_priv)
2b4e57bd 7447{
562d9bae 7448 struct intel_rps *rps = &dev_priv->gt_pm.rps;
66c1f77a
MK
7449 const int min_freq = 15;
7450 const int scaling_factor = 180;
3ebecd07
CW
7451 unsigned int gpu_freq;
7452 unsigned int max_ia_freq, min_ring_freq;
4c8c7743 7453 unsigned int max_gpu_freq, min_gpu_freq;
eda79642 7454 struct cpufreq_policy *policy;
2b4e57bd 7455
ebb5eb7d 7456 lockdep_assert_held(&rps->lock);
79f5b2c7 7457
66c1f77a
MK
7458 if (rps->max_freq <= rps->min_freq)
7459 return;
7460
eda79642
BW
7461 policy = cpufreq_cpu_get(0);
7462 if (policy) {
7463 max_ia_freq = policy->cpuinfo.max_freq;
7464 cpufreq_cpu_put(policy);
7465 } else {
7466 /*
7467 * Default to measured freq if none found, PCU will ensure we
7468 * don't go over
7469 */
2b4e57bd 7470 max_ia_freq = tsc_khz;
eda79642 7471 }
2b4e57bd
ED
7472
7473 /* Convert from kHz to MHz */
7474 max_ia_freq /= 1000;
7475
153b4b95 7476 min_ring_freq = I915_READ(DCLK) & 0xf;
f6aca45c
BW
7477 /* convert DDR frequency from units of 266.6MHz to bandwidth */
7478 min_ring_freq = mult_frac(min_ring_freq, 8, 3);
3ebecd07 7479
d586b5f4
CW
7480 min_gpu_freq = rps->min_freq;
7481 max_gpu_freq = rps->max_freq;
2b2874ef 7482 if (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) {
4c8c7743 7483 /* Convert GT frequency to 50 HZ units */
d586b5f4
CW
7484 min_gpu_freq /= GEN9_FREQ_SCALER;
7485 max_gpu_freq /= GEN9_FREQ_SCALER;
4c8c7743
AG
7486 }
7487
2b4e57bd
ED
7488 /*
7489 * For each potential GPU frequency, load a ring frequency we'd like
7490 * to use for memory access. We do this by specifying the IA frequency
7491 * the PCU should use as a reference to determine the ring frequency.
7492 */
4c8c7743 7493 for (gpu_freq = max_gpu_freq; gpu_freq >= min_gpu_freq; gpu_freq--) {
66c1f77a 7494 const int diff = max_gpu_freq - gpu_freq;
3ebecd07
CW
7495 unsigned int ia_freq = 0, ring_freq = 0;
7496
2b2874ef 7497 if (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) {
4c8c7743
AG
7498 /*
7499 * ring_freq = 2 * GT. ring_freq is in 100MHz units
7500 * No floor required for ring frequency on SKL.
7501 */
7502 ring_freq = gpu_freq;
c56b89f1 7503 } else if (INTEL_GEN(dev_priv) >= 8) {
46c764d4
BW
7504 /* max(2 * GT, DDR). NB: GT is 50MHz units */
7505 ring_freq = max(min_ring_freq, gpu_freq);
dc97997a 7506 } else if (IS_HASWELL(dev_priv)) {
f6aca45c 7507 ring_freq = mult_frac(gpu_freq, 5, 4);
3ebecd07
CW
7508 ring_freq = max(min_ring_freq, ring_freq);
7509 /* leave ia_freq as the default, chosen by cpufreq */
7510 } else {
7511 /* On older processors, there is no separate ring
7512 * clock domain, so in order to boost the bandwidth
7513 * of the ring, we need to upclock the CPU (ia_freq).
7514 *
7515 * For GPU frequencies less than 750MHz,
7516 * just use the lowest ring freq.
7517 */
7518 if (gpu_freq < min_freq)
7519 ia_freq = 800;
7520 else
7521 ia_freq = max_ia_freq - ((diff * scaling_factor) / 2);
7522 ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100);
7523 }
2b4e57bd 7524
42c0526c
BW
7525 sandybridge_pcode_write(dev_priv,
7526 GEN6_PCODE_WRITE_MIN_FREQ_TABLE,
3ebecd07
CW
7527 ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT |
7528 ring_freq << GEN6_PCODE_FREQ_RING_RATIO_SHIFT |
7529 gpu_freq);
2b4e57bd 7530 }
2b4e57bd
ED
7531}
7532
03af2045 7533static int cherryview_rps_max_freq(struct drm_i915_private *dev_priv)
2b6b3a09
D
7534{
7535 u32 val, rp0;
7536
5b5929cb 7537 val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE);
2b6b3a09 7538
0258404f 7539 switch (RUNTIME_INFO(dev_priv)->sseu.eu_total) {
5b5929cb
JN
7540 case 8:
7541 /* (2 * 4) config */
7542 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT);
7543 break;
7544 case 12:
7545 /* (2 * 6) config */
7546 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT);
7547 break;
7548 case 16:
7549 /* (2 * 8) config */
7550 default:
7551 /* Setting (2 * 8) Min RP0 for any other combination */
7552 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT);
7553 break;
095acd5f 7554 }
5b5929cb
JN
7555
7556 rp0 = (rp0 & FB_GFX_FREQ_FUSE_MASK);
7557
2b6b3a09
D
7558 return rp0;
7559}
7560
7561static int cherryview_rps_rpe_freq(struct drm_i915_private *dev_priv)
7562{
7563 u32 val, rpe;
7564
7565 val = vlv_punit_read(dev_priv, PUNIT_GPU_DUTYCYCLE_REG);
7566 rpe = (val >> PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT) & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK;
7567
7568 return rpe;
7569}
7570
7707df4a
D
7571static int cherryview_rps_guar_freq(struct drm_i915_private *dev_priv)
7572{
7573 u32 val, rp1;
7574
5b5929cb
JN
7575 val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE);
7576 rp1 = (val & FB_GFX_FREQ_FUSE_MASK);
7577
7707df4a
D
7578 return rp1;
7579}
7580
96676fe3
D
7581static u32 cherryview_rps_min_freq(struct drm_i915_private *dev_priv)
7582{
7583 u32 val, rpn;
7584
7585 val = vlv_punit_read(dev_priv, FB_GFX_FMIN_AT_VMIN_FUSE);
7586 rpn = ((val >> FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT) &
7587 FB_GFX_FREQ_FUSE_MASK);
7588
7589 return rpn;
7590}
7591
f8f2b001
D
7592static int valleyview_rps_guar_freq(struct drm_i915_private *dev_priv)
7593{
7594 u32 val, rp1;
7595
7596 val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE);
7597
7598 rp1 = (val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK) >> FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT;
7599
7600 return rp1;
7601}
7602
03af2045 7603static int valleyview_rps_max_freq(struct drm_i915_private *dev_priv)
0a073b84
JB
7604{
7605 u32 val, rp0;
7606
64936258 7607 val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE);
0a073b84
JB
7608
7609 rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT;
7610 /* Clamp to max */
7611 rp0 = min_t(u32, rp0, 0xea);
7612
7613 return rp0;
7614}
7615
7616static int valleyview_rps_rpe_freq(struct drm_i915_private *dev_priv)
7617{
7618 u32 val, rpe;
7619
64936258 7620 val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_LO);
0a073b84 7621 rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT;
64936258 7622 val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_HI);
0a073b84
JB
7623 rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5;
7624
7625 return rpe;
7626}
7627
03af2045 7628static int valleyview_rps_min_freq(struct drm_i915_private *dev_priv)
0a073b84 7629{
36146035
ID
7630 u32 val;
7631
7632 val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_LFM) & 0xff;
7633 /*
7634 * According to the BYT Punit GPU turbo HAS 1.1.6.3 the minimum value
7635 * for the minimum frequency in GPLL mode is 0xc1. Contrary to this on
7636 * a BYT-M B0 the above register contains 0xbf. Moreover when setting
7637 * a frequency Punit will not allow values below 0xc0. Clamp it 0xc0
7638 * to make sure it matches what Punit accepts.
7639 */
7640 return max_t(u32, val, 0xc0);
0a073b84
JB
7641}
7642
ae48434c
ID
7643/* Check that the pctx buffer wasn't move under us. */
7644static void valleyview_check_pctx(struct drm_i915_private *dev_priv)
7645{
7646 unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095;
7647
77894226 7648 WARN_ON(pctx_addr != dev_priv->dsm.start +
ae48434c
ID
7649 dev_priv->vlv_pctx->stolen->start);
7650}
7651
38807746
D
7652
7653/* Check that the pcbr address is not empty. */
7654static void cherryview_check_pctx(struct drm_i915_private *dev_priv)
7655{
7656 unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095;
7657
7658 WARN_ON((pctx_addr >> VLV_PCBR_ADDR_SHIFT) == 0);
7659}
7660
dc97997a 7661static void cherryview_setup_pctx(struct drm_i915_private *dev_priv)
38807746 7662{
b7128ef1
MA
7663 resource_size_t pctx_paddr, paddr;
7664 resource_size_t pctx_size = 32*1024;
38807746 7665 u32 pcbr;
38807746 7666
38807746
D
7667 pcbr = I915_READ(VLV_PCBR);
7668 if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) {
ce611ef8 7669 DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
77894226
MA
7670 paddr = dev_priv->dsm.end + 1 - pctx_size;
7671 GEM_BUG_ON(paddr > U32_MAX);
38807746
D
7672
7673 pctx_paddr = (paddr & (~4095));
7674 I915_WRITE(VLV_PCBR, pctx_paddr);
7675 }
ce611ef8
VS
7676
7677 DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR));
38807746
D
7678}
7679
dc97997a 7680static void valleyview_setup_pctx(struct drm_i915_private *dev_priv)
c9cddffc 7681{
c9cddffc 7682 struct drm_i915_gem_object *pctx;
b7128ef1
MA
7683 resource_size_t pctx_paddr;
7684 resource_size_t pctx_size = 24*1024;
c9cddffc 7685 u32 pcbr;
c9cddffc
JB
7686
7687 pcbr = I915_READ(VLV_PCBR);
7688 if (pcbr) {
7689 /* BIOS set it up already, grab the pre-alloc'd space */
b7128ef1 7690 resource_size_t pcbr_offset;
c9cddffc 7691
77894226 7692 pcbr_offset = (pcbr & (~4095)) - dev_priv->dsm.start;
187685cb 7693 pctx = i915_gem_object_create_stolen_for_preallocated(dev_priv,
c9cddffc 7694 pcbr_offset,
190d6cd5 7695 I915_GTT_OFFSET_NONE,
c9cddffc
JB
7696 pctx_size);
7697 goto out;
7698 }
7699
ce611ef8
VS
7700 DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
7701
c9cddffc
JB
7702 /*
7703 * From the Gunit register HAS:
7704 * The Gfx driver is expected to program this register and ensure
7705 * proper allocation within Gfx stolen memory. For example, this
7706 * register should be programmed such than the PCBR range does not
7707 * overlap with other ranges, such as the frame buffer, protected
7708 * memory, or any other relevant ranges.
7709 */
187685cb 7710 pctx = i915_gem_object_create_stolen(dev_priv, pctx_size);
c9cddffc
JB
7711 if (!pctx) {
7712 DRM_DEBUG("not enough stolen space for PCTX, disabling\n");
ee504898 7713 goto out;
c9cddffc
JB
7714 }
7715
77894226
MA
7716 GEM_BUG_ON(range_overflows_t(u64,
7717 dev_priv->dsm.start,
7718 pctx->stolen->start,
7719 U32_MAX));
7720 pctx_paddr = dev_priv->dsm.start + pctx->stolen->start;
c9cddffc
JB
7721 I915_WRITE(VLV_PCBR, pctx_paddr);
7722
7723out:
ce611ef8 7724 DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR));
c9cddffc
JB
7725 dev_priv->vlv_pctx = pctx;
7726}
7727
dc97997a 7728static void valleyview_cleanup_pctx(struct drm_i915_private *dev_priv)
ae48434c 7729{
818fed4f 7730 struct drm_i915_gem_object *pctx;
ae48434c 7731
818fed4f
CW
7732 pctx = fetch_and_zero(&dev_priv->vlv_pctx);
7733 if (pctx)
7734 i915_gem_object_put(pctx);
ae48434c
ID
7735}
7736
c30fec65
VS
7737static void vlv_init_gpll_ref_freq(struct drm_i915_private *dev_priv)
7738{
562d9bae 7739 dev_priv->gt_pm.rps.gpll_ref_freq =
c30fec65
VS
7740 vlv_get_cck_clock(dev_priv, "GPLL ref",
7741 CCK_GPLL_CLOCK_CONTROL,
7742 dev_priv->czclk_freq);
7743
7744 DRM_DEBUG_DRIVER("GPLL reference freq: %d kHz\n",
562d9bae 7745 dev_priv->gt_pm.rps.gpll_ref_freq);
c30fec65
VS
7746}
7747
dc97997a 7748static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv)
4e80519e 7749{
562d9bae 7750 struct intel_rps *rps = &dev_priv->gt_pm.rps;
2bb25c17 7751 u32 val;
4e80519e 7752
dc97997a 7753 valleyview_setup_pctx(dev_priv);
4e80519e 7754
337fa6e0
CW
7755 vlv_iosf_sb_get(dev_priv,
7756 BIT(VLV_IOSF_SB_PUNIT) |
7757 BIT(VLV_IOSF_SB_NC) |
7758 BIT(VLV_IOSF_SB_CCK));
7759
c30fec65
VS
7760 vlv_init_gpll_ref_freq(dev_priv);
7761
2bb25c17
VS
7762 val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
7763 switch ((val >> 6) & 3) {
7764 case 0:
7765 case 1:
7766 dev_priv->mem_freq = 800;
7767 break;
7768 case 2:
7769 dev_priv->mem_freq = 1066;
7770 break;
7771 case 3:
7772 dev_priv->mem_freq = 1333;
7773 break;
7774 }
80b83b62 7775 DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq);
2bb25c17 7776
562d9bae
SAK
7777 rps->max_freq = valleyview_rps_max_freq(dev_priv);
7778 rps->rp0_freq = rps->max_freq;
4e80519e 7779 DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
562d9bae
SAK
7780 intel_gpu_freq(dev_priv, rps->max_freq),
7781 rps->max_freq);
4e80519e 7782
562d9bae 7783 rps->efficient_freq = valleyview_rps_rpe_freq(dev_priv);
4e80519e 7784 DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
562d9bae
SAK
7785 intel_gpu_freq(dev_priv, rps->efficient_freq),
7786 rps->efficient_freq);
4e80519e 7787
562d9bae 7788 rps->rp1_freq = valleyview_rps_guar_freq(dev_priv);
f8f2b001 7789 DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n",
562d9bae
SAK
7790 intel_gpu_freq(dev_priv, rps->rp1_freq),
7791 rps->rp1_freq);
f8f2b001 7792
562d9bae 7793 rps->min_freq = valleyview_rps_min_freq(dev_priv);
4e80519e 7794 DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
562d9bae
SAK
7795 intel_gpu_freq(dev_priv, rps->min_freq),
7796 rps->min_freq);
337fa6e0
CW
7797
7798 vlv_iosf_sb_put(dev_priv,
7799 BIT(VLV_IOSF_SB_PUNIT) |
7800 BIT(VLV_IOSF_SB_NC) |
7801 BIT(VLV_IOSF_SB_CCK));
4e80519e
ID
7802}
7803
dc97997a 7804static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv)
38807746 7805{
562d9bae 7806 struct intel_rps *rps = &dev_priv->gt_pm.rps;
2bb25c17 7807 u32 val;
2b6b3a09 7808
dc97997a 7809 cherryview_setup_pctx(dev_priv);
2b6b3a09 7810
337fa6e0
CW
7811 vlv_iosf_sb_get(dev_priv,
7812 BIT(VLV_IOSF_SB_PUNIT) |
7813 BIT(VLV_IOSF_SB_NC) |
7814 BIT(VLV_IOSF_SB_CCK));
7815
c30fec65
VS
7816 vlv_init_gpll_ref_freq(dev_priv);
7817
c6e8f39d 7818 val = vlv_cck_read(dev_priv, CCK_FUSE_REG);
c6e8f39d 7819
2bb25c17 7820 switch ((val >> 2) & 0x7) {
2bb25c17 7821 case 3:
2bb25c17
VS
7822 dev_priv->mem_freq = 2000;
7823 break;
bfa7df01 7824 default:
2bb25c17
VS
7825 dev_priv->mem_freq = 1600;
7826 break;
7827 }
80b83b62 7828 DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq);
2bb25c17 7829
562d9bae
SAK
7830 rps->max_freq = cherryview_rps_max_freq(dev_priv);
7831 rps->rp0_freq = rps->max_freq;
2b6b3a09 7832 DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
562d9bae
SAK
7833 intel_gpu_freq(dev_priv, rps->max_freq),
7834 rps->max_freq);
2b6b3a09 7835
562d9bae 7836 rps->efficient_freq = cherryview_rps_rpe_freq(dev_priv);
2b6b3a09 7837 DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
562d9bae
SAK
7838 intel_gpu_freq(dev_priv, rps->efficient_freq),
7839 rps->efficient_freq);
2b6b3a09 7840
562d9bae 7841 rps->rp1_freq = cherryview_rps_guar_freq(dev_priv);
7707df4a 7842 DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n",
562d9bae
SAK
7843 intel_gpu_freq(dev_priv, rps->rp1_freq),
7844 rps->rp1_freq);
7707df4a 7845
562d9bae 7846 rps->min_freq = cherryview_rps_min_freq(dev_priv);
2b6b3a09 7847 DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
562d9bae
SAK
7848 intel_gpu_freq(dev_priv, rps->min_freq),
7849 rps->min_freq);
2b6b3a09 7850
337fa6e0
CW
7851 vlv_iosf_sb_put(dev_priv,
7852 BIT(VLV_IOSF_SB_PUNIT) |
7853 BIT(VLV_IOSF_SB_NC) |
7854 BIT(VLV_IOSF_SB_CCK));
7855
562d9bae
SAK
7856 WARN_ONCE((rps->max_freq | rps->efficient_freq | rps->rp1_freq |
7857 rps->min_freq) & 1,
1c14762d 7858 "Odd GPU freq values\n");
38807746
D
7859}
7860
dc97997a 7861static void valleyview_cleanup_gt_powersave(struct drm_i915_private *dev_priv)
4e80519e 7862{
dc97997a 7863 valleyview_cleanup_pctx(dev_priv);
4e80519e
ID
7864}
7865
d46b00dc 7866static void cherryview_enable_rc6(struct drm_i915_private *dev_priv)
38807746 7867{
e2f80391 7868 struct intel_engine_cs *engine;
3b3f1650 7869 enum intel_engine_id id;
fb6db0f5 7870 u32 gtfifodbg, rc6_mode, pcbr;
38807746 7871
297b32ec
VS
7872 gtfifodbg = I915_READ(GTFIFODBG) & ~(GT_FIFO_SBDEDICATE_FREE_ENTRY_CHV |
7873 GT_FIFO_FREE_ENTRIES_CHV);
38807746
D
7874 if (gtfifodbg) {
7875 DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n",
7876 gtfifodbg);
7877 I915_WRITE(GTFIFODBG, gtfifodbg);
7878 }
7879
7880 cherryview_check_pctx(dev_priv);
7881
7882 /* 1a & 1b: Get forcewake during program sequence. Although the driver
7883 * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
3ceea6a1 7884 intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
38807746 7885
160614a2
VS
7886 /* Disable RC states. */
7887 I915_WRITE(GEN6_RC_CONTROL, 0);
7888
38807746
D
7889 /* 2a: Program RC6 thresholds.*/
7890 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
7891 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
7892 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
7893
3b3f1650 7894 for_each_engine(engine, dev_priv, id)
e2f80391 7895 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
38807746
D
7896 I915_WRITE(GEN6_RC_SLEEP, 0);
7897
f4f71c7d
D
7898 /* TO threshold set to 500 us ( 0x186 * 1.28 us) */
7899 I915_WRITE(GEN6_RC6_THRESHOLD, 0x186);
38807746 7900
d46b00dc 7901 /* Allows RC6 residency counter to work */
38807746
D
7902 I915_WRITE(VLV_COUNTER_CONTROL,
7903 _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
7904 VLV_MEDIA_RC6_COUNT_EN |
7905 VLV_RENDER_RC6_COUNT_EN));
7906
7907 /* For now we assume BIOS is allocating and populating the PCBR */
7908 pcbr = I915_READ(VLV_PCBR);
7909
38807746 7910 /* 3: Enable RC6 */
fb6db0f5
CW
7911 rc6_mode = 0;
7912 if (pcbr >> VLV_PCBR_ADDR_SHIFT)
af5a75a3 7913 rc6_mode = GEN7_RC_CTL_TO_MODE;
38807746
D
7914 I915_WRITE(GEN6_RC_CONTROL, rc6_mode);
7915
3ceea6a1 7916 intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
d46b00dc
SAK
7917}
7918
7919static void cherryview_enable_rps(struct drm_i915_private *dev_priv)
7920{
7921 u32 val;
7922
3ceea6a1 7923 intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
d46b00dc
SAK
7924
7925 /* 1: Program defaults and thresholds for RPS*/
3cbdb48f 7926 I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);
2b6b3a09
D
7927 I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
7928 I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
7929 I915_WRITE(GEN6_RP_UP_EI, 66000);
7930 I915_WRITE(GEN6_RP_DOWN_EI, 350000);
7931
7932 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
7933
d46b00dc 7934 /* 2: Enable RPS */
2b6b3a09
D
7935 I915_WRITE(GEN6_RP_CONTROL,
7936 GEN6_RP_MEDIA_HW_NORMAL_MODE |
eb973a5e 7937 GEN6_RP_MEDIA_IS_GFX |
2b6b3a09
D
7938 GEN6_RP_ENABLE |
7939 GEN6_RP_UP_BUSY_AVG |
7940 GEN6_RP_DOWN_IDLE_AVG);
7941
3ef62342 7942 /* Setting Fixed Bias */
337fa6e0
CW
7943 vlv_punit_get(dev_priv);
7944
7945 val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | CHV_BIAS_CPU_50_SOC_50;
3ef62342
D
7946 vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val);
7947
2b6b3a09
D
7948 val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
7949
337fa6e0
CW
7950 vlv_punit_put(dev_priv);
7951
8d40c3ae
VS
7952 /* RPS code assumes GPLL is used */
7953 WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
7954
742f491d 7955 DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
2b6b3a09
D
7956 DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
7957
3a45b05c 7958 reset_rps(dev_priv, valleyview_set_rps);
2b6b3a09 7959
3ceea6a1 7960 intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
38807746
D
7961}
7962
0d6fc92a 7963static void valleyview_enable_rc6(struct drm_i915_private *dev_priv)
0a073b84 7964{
e2f80391 7965 struct intel_engine_cs *engine;
3b3f1650 7966 enum intel_engine_id id;
fb6db0f5 7967 u32 gtfifodbg;
0a073b84 7968
ae48434c
ID
7969 valleyview_check_pctx(dev_priv);
7970
297b32ec
VS
7971 gtfifodbg = I915_READ(GTFIFODBG);
7972 if (gtfifodbg) {
f7d85c1e
JB
7973 DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n",
7974 gtfifodbg);
0a073b84
JB
7975 I915_WRITE(GTFIFODBG, gtfifodbg);
7976 }
7977
3ceea6a1 7978 intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
0a073b84 7979
160614a2
VS
7980 /* Disable RC states. */
7981 I915_WRITE(GEN6_RC_CONTROL, 0);
7982
0a073b84
JB
7983 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000);
7984 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
7985 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
7986
3b3f1650 7987 for_each_engine(engine, dev_priv, id)
e2f80391 7988 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
0a073b84 7989
2f0aa304 7990 I915_WRITE(GEN6_RC6_THRESHOLD, 0x557);
0a073b84 7991
0d6fc92a 7992 /* Allows RC6 residency counter to work */
49798eb2 7993 I915_WRITE(VLV_COUNTER_CONTROL,
6b7f6aa7
MK
7994 _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
7995 VLV_MEDIA_RC0_COUNT_EN |
31685c25 7996 VLV_RENDER_RC0_COUNT_EN |
49798eb2
JB
7997 VLV_MEDIA_RC6_COUNT_EN |
7998 VLV_RENDER_RC6_COUNT_EN));
31685c25 7999
fb6db0f5
CW
8000 I915_WRITE(GEN6_RC_CONTROL,
8001 GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL);
0a073b84 8002
3ceea6a1 8003 intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
0d6fc92a
SAK
8004}
8005
8006static void valleyview_enable_rps(struct drm_i915_private *dev_priv)
8007{
8008 u32 val;
8009
3ceea6a1 8010 intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
0d6fc92a
SAK
8011
8012 I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);
8013 I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
8014 I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
8015 I915_WRITE(GEN6_RP_UP_EI, 66000);
8016 I915_WRITE(GEN6_RP_DOWN_EI, 350000);
8017
8018 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
8019
8020 I915_WRITE(GEN6_RP_CONTROL,
8021 GEN6_RP_MEDIA_TURBO |
8022 GEN6_RP_MEDIA_HW_NORMAL_MODE |
8023 GEN6_RP_MEDIA_IS_GFX |
8024 GEN6_RP_ENABLE |
8025 GEN6_RP_UP_BUSY_AVG |
8026 GEN6_RP_DOWN_IDLE_CONT);
8027
337fa6e0
CW
8028 vlv_punit_get(dev_priv);
8029
3ef62342 8030 /* Setting Fixed Bias */
337fa6e0 8031 val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | VLV_BIAS_CPU_125_SOC_875;
3ef62342
D
8032 vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val);
8033
64936258 8034 val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
0a073b84 8035
337fa6e0
CW
8036 vlv_punit_put(dev_priv);
8037
8d40c3ae
VS
8038 /* RPS code assumes GPLL is used */
8039 WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
8040
742f491d 8041 DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
0a073b84
JB
8042 DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
8043
3a45b05c 8044 reset_rps(dev_priv, valleyview_set_rps);
0a073b84 8045
3ceea6a1 8046 intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
0a073b84
JB
8047}
8048
dde18883
ED
8049static unsigned long intel_pxfreq(u32 vidfreq)
8050{
8051 unsigned long freq;
8052 int div = (vidfreq & 0x3f0000) >> 16;
8053 int post = (vidfreq & 0x3000) >> 12;
8054 int pre = (vidfreq & 0x7);
8055
8056 if (!pre)
8057 return 0;
8058
8059 freq = ((div * 133333) / ((1<<post) * pre));
8060
8061 return freq;
8062}
8063
eb48eb00
DV
8064static const struct cparams {
8065 u16 i;
8066 u16 t;
8067 u16 m;
8068 u16 c;
8069} cparams[] = {
8070 { 1, 1333, 301, 28664 },
8071 { 1, 1066, 294, 24460 },
8072 { 1, 800, 294, 25192 },
8073 { 0, 1333, 276, 27605 },
8074 { 0, 1066, 276, 27605 },
8075 { 0, 800, 231, 23784 },
8076};
8077
f531dcb2 8078static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv)
eb48eb00
DV
8079{
8080 u64 total_count, diff, ret;
8081 u32 count1, count2, count3, m = 0, c = 0;
8082 unsigned long now = jiffies_to_msecs(jiffies), diff1;
8083 int i;
8084
67520415 8085 lockdep_assert_held(&mchdev_lock);
02d71956 8086
20e4d407 8087 diff1 = now - dev_priv->ips.last_time1;
eb48eb00
DV
8088
8089 /* Prevent division-by-zero if we are asking too fast.
8090 * Also, we don't get interesting results if we are polling
8091 * faster than once in 10ms, so just return the saved value
8092 * in such cases.
8093 */
8094 if (diff1 <= 10)
20e4d407 8095 return dev_priv->ips.chipset_power;
eb48eb00
DV
8096
8097 count1 = I915_READ(DMIEC);
8098 count2 = I915_READ(DDREC);
8099 count3 = I915_READ(CSIEC);
8100
8101 total_count = count1 + count2 + count3;
8102
8103 /* FIXME: handle per-counter overflow */
20e4d407
DV
8104 if (total_count < dev_priv->ips.last_count1) {
8105 diff = ~0UL - dev_priv->ips.last_count1;
eb48eb00
DV
8106 diff += total_count;
8107 } else {
20e4d407 8108 diff = total_count - dev_priv->ips.last_count1;
eb48eb00
DV
8109 }
8110
8111 for (i = 0; i < ARRAY_SIZE(cparams); i++) {
20e4d407
DV
8112 if (cparams[i].i == dev_priv->ips.c_m &&
8113 cparams[i].t == dev_priv->ips.r_t) {
eb48eb00
DV
8114 m = cparams[i].m;
8115 c = cparams[i].c;
8116 break;
8117 }
8118 }
8119
8120 diff = div_u64(diff, diff1);
8121 ret = ((m * diff) + c);
8122 ret = div_u64(ret, 10);
8123
20e4d407
DV
8124 dev_priv->ips.last_count1 = total_count;
8125 dev_priv->ips.last_time1 = now;
eb48eb00 8126
20e4d407 8127 dev_priv->ips.chipset_power = ret;
eb48eb00
DV
8128
8129 return ret;
8130}
8131
f531dcb2
CW
8132unsigned long i915_chipset_val(struct drm_i915_private *dev_priv)
8133{
4a8ab5ea
CW
8134 intel_wakeref_t wakeref;
8135 unsigned long val = 0;
f531dcb2 8136
cf819eff 8137 if (!IS_GEN(dev_priv, 5))
f531dcb2
CW
8138 return 0;
8139
4a8ab5ea
CW
8140 with_intel_runtime_pm(dev_priv, wakeref) {
8141 spin_lock_irq(&mchdev_lock);
8142 val = __i915_chipset_val(dev_priv);
8143 spin_unlock_irq(&mchdev_lock);
8144 }
f531dcb2
CW
8145
8146 return val;
8147}
8148
eb48eb00
DV
8149unsigned long i915_mch_val(struct drm_i915_private *dev_priv)
8150{
8151 unsigned long m, x, b;
8152 u32 tsfs;
8153
8154 tsfs = I915_READ(TSFS);
8155
8156 m = ((tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT);
8157 x = I915_READ8(TR1);
8158
8159 b = tsfs & TSFS_INTR_MASK;
8160
8161 return ((m * x) / 127) - b;
8162}
8163
d972d6ee
MK
8164static int _pxvid_to_vd(u8 pxvid)
8165{
8166 if (pxvid == 0)
8167 return 0;
8168
8169 if (pxvid >= 8 && pxvid < 31)
8170 pxvid = 31;
8171
8172 return (pxvid + 2) * 125;
8173}
8174
8175static u32 pvid_to_extvid(struct drm_i915_private *dev_priv, u8 pxvid)
eb48eb00 8176{
d972d6ee
MK
8177 const int vd = _pxvid_to_vd(pxvid);
8178 const int vm = vd - 1125;
8179
dc97997a 8180 if (INTEL_INFO(dev_priv)->is_mobile)
d972d6ee
MK
8181 return vm > 0 ? vm : 0;
8182
8183 return vd;
eb48eb00
DV
8184}
8185
02d71956 8186static void __i915_update_gfx_val(struct drm_i915_private *dev_priv)
eb48eb00 8187{
5ed0bdf2 8188 u64 now, diff, diffms;
eb48eb00
DV
8189 u32 count;
8190
67520415 8191 lockdep_assert_held(&mchdev_lock);
eb48eb00 8192
5ed0bdf2
TG
8193 now = ktime_get_raw_ns();
8194 diffms = now - dev_priv->ips.last_time2;
8195 do_div(diffms, NSEC_PER_MSEC);
eb48eb00
DV
8196
8197 /* Don't divide by 0 */
eb48eb00
DV
8198 if (!diffms)
8199 return;
8200
8201 count = I915_READ(GFXEC);
8202
20e4d407
DV
8203 if (count < dev_priv->ips.last_count2) {
8204 diff = ~0UL - dev_priv->ips.last_count2;
eb48eb00
DV
8205 diff += count;
8206 } else {
20e4d407 8207 diff = count - dev_priv->ips.last_count2;
eb48eb00
DV
8208 }
8209
20e4d407
DV
8210 dev_priv->ips.last_count2 = count;
8211 dev_priv->ips.last_time2 = now;
eb48eb00
DV
8212
8213 /* More magic constants... */
8214 diff = diff * 1181;
8215 diff = div_u64(diff, diffms * 10);
20e4d407 8216 dev_priv->ips.gfx_power = diff;
eb48eb00
DV
8217}
8218
02d71956
DV
8219void i915_update_gfx_val(struct drm_i915_private *dev_priv)
8220{
4a8ab5ea
CW
8221 intel_wakeref_t wakeref;
8222
cf819eff 8223 if (!IS_GEN(dev_priv, 5))
02d71956
DV
8224 return;
8225
4a8ab5ea
CW
8226 with_intel_runtime_pm(dev_priv, wakeref) {
8227 spin_lock_irq(&mchdev_lock);
8228 __i915_update_gfx_val(dev_priv);
8229 spin_unlock_irq(&mchdev_lock);
8230 }
02d71956
DV
8231}
8232
f531dcb2 8233static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv)
eb48eb00
DV
8234{
8235 unsigned long t, corr, state1, corr2, state2;
8236 u32 pxvid, ext_v;
8237
67520415 8238 lockdep_assert_held(&mchdev_lock);
02d71956 8239
562d9bae 8240 pxvid = I915_READ(PXVFREQ(dev_priv->gt_pm.rps.cur_freq));
eb48eb00
DV
8241 pxvid = (pxvid >> 24) & 0x7f;
8242 ext_v = pvid_to_extvid(dev_priv, pxvid);
8243
8244 state1 = ext_v;
8245
8246 t = i915_mch_val(dev_priv);
8247
8248 /* Revel in the empirically derived constants */
8249
8250 /* Correction factor in 1/100000 units */
8251 if (t > 80)
8252 corr = ((t * 2349) + 135940);
8253 else if (t >= 50)
8254 corr = ((t * 964) + 29317);
8255 else /* < 50 */
8256 corr = ((t * 301) + 1004);
8257
8258 corr = corr * ((150142 * state1) / 10000 - 78642);
8259 corr /= 100000;
20e4d407 8260 corr2 = (corr * dev_priv->ips.corr);
eb48eb00
DV
8261
8262 state2 = (corr2 * state1) / 10000;
8263 state2 /= 100; /* convert to mW */
8264
02d71956 8265 __i915_update_gfx_val(dev_priv);
eb48eb00 8266
20e4d407 8267 return dev_priv->ips.gfx_power + state2;
eb48eb00
DV
8268}
8269
f531dcb2
CW
8270unsigned long i915_gfx_val(struct drm_i915_private *dev_priv)
8271{
4a8ab5ea
CW
8272 intel_wakeref_t wakeref;
8273 unsigned long val = 0;
f531dcb2 8274
cf819eff 8275 if (!IS_GEN(dev_priv, 5))
f531dcb2
CW
8276 return 0;
8277
4a8ab5ea
CW
8278 with_intel_runtime_pm(dev_priv, wakeref) {
8279 spin_lock_irq(&mchdev_lock);
8280 val = __i915_gfx_val(dev_priv);
8281 spin_unlock_irq(&mchdev_lock);
8282 }
f531dcb2 8283
4a8ab5ea
CW
8284 return val;
8285}
f531dcb2 8286
adc674ce 8287static struct drm_i915_private __rcu *i915_mch_dev;
f531dcb2 8288
4a8ab5ea
CW
8289static struct drm_i915_private *mchdev_get(void)
8290{
8291 struct drm_i915_private *i915;
8292
8293 rcu_read_lock();
adc674ce 8294 i915 = rcu_dereference(i915_mch_dev);
4a8ab5ea
CW
8295 if (!kref_get_unless_zero(&i915->drm.ref))
8296 i915 = NULL;
8297 rcu_read_unlock();
8298
8299 return i915;
f531dcb2
CW
8300}
8301
eb48eb00
DV
8302/**
8303 * i915_read_mch_val - return value for IPS use
8304 *
8305 * Calculate and return a value for the IPS driver to use when deciding whether
8306 * we have thermal and power headroom to increase CPU or GPU power budget.
8307 */
8308unsigned long i915_read_mch_val(void)
8309{
4a8ab5ea
CW
8310 struct drm_i915_private *i915;
8311 unsigned long chipset_val = 0;
8312 unsigned long graphics_val = 0;
8313 intel_wakeref_t wakeref;
eb48eb00 8314
4a8ab5ea
CW
8315 i915 = mchdev_get();
8316 if (!i915)
8317 return 0;
eb48eb00 8318
4a8ab5ea
CW
8319 with_intel_runtime_pm(i915, wakeref) {
8320 spin_lock_irq(&mchdev_lock);
8321 chipset_val = __i915_chipset_val(i915);
8322 graphics_val = __i915_gfx_val(i915);
8323 spin_unlock_irq(&mchdev_lock);
8324 }
eb48eb00 8325
4a8ab5ea
CW
8326 drm_dev_put(&i915->drm);
8327 return chipset_val + graphics_val;
eb48eb00
DV
8328}
8329EXPORT_SYMBOL_GPL(i915_read_mch_val);
8330
8331/**
8332 * i915_gpu_raise - raise GPU frequency limit
8333 *
8334 * Raise the limit; IPS indicates we have thermal headroom.
8335 */
8336bool i915_gpu_raise(void)
8337{
4a8ab5ea 8338 struct drm_i915_private *i915;
eb48eb00 8339
4a8ab5ea
CW
8340 i915 = mchdev_get();
8341 if (!i915)
8342 return false;
eb48eb00 8343
4a8ab5ea
CW
8344 spin_lock_irq(&mchdev_lock);
8345 if (i915->ips.max_delay > i915->ips.fmax)
8346 i915->ips.max_delay--;
9270388e 8347 spin_unlock_irq(&mchdev_lock);
eb48eb00 8348
4a8ab5ea
CW
8349 drm_dev_put(&i915->drm);
8350 return true;
eb48eb00
DV
8351}
8352EXPORT_SYMBOL_GPL(i915_gpu_raise);
8353
8354/**
8355 * i915_gpu_lower - lower GPU frequency limit
8356 *
8357 * IPS indicates we're close to a thermal limit, so throttle back the GPU
8358 * frequency maximum.
8359 */
8360bool i915_gpu_lower(void)
8361{
4a8ab5ea 8362 struct drm_i915_private *i915;
eb48eb00 8363
4a8ab5ea
CW
8364 i915 = mchdev_get();
8365 if (!i915)
8366 return false;
eb48eb00 8367
4a8ab5ea
CW
8368 spin_lock_irq(&mchdev_lock);
8369 if (i915->ips.max_delay < i915->ips.min_delay)
8370 i915->ips.max_delay++;
9270388e 8371 spin_unlock_irq(&mchdev_lock);
eb48eb00 8372
4a8ab5ea
CW
8373 drm_dev_put(&i915->drm);
8374 return true;
eb48eb00
DV
8375}
8376EXPORT_SYMBOL_GPL(i915_gpu_lower);
8377
8378/**
8379 * i915_gpu_busy - indicate GPU business to IPS
8380 *
8381 * Tell the IPS driver whether or not the GPU is busy.
8382 */
8383bool i915_gpu_busy(void)
8384{
4a8ab5ea
CW
8385 struct drm_i915_private *i915;
8386 bool ret;
eb48eb00 8387
4a8ab5ea
CW
8388 i915 = mchdev_get();
8389 if (!i915)
8390 return false;
eb48eb00 8391
4a8ab5ea
CW
8392 ret = i915->gt.awake;
8393
8394 drm_dev_put(&i915->drm);
eb48eb00
DV
8395 return ret;
8396}
8397EXPORT_SYMBOL_GPL(i915_gpu_busy);
8398
8399/**
8400 * i915_gpu_turbo_disable - disable graphics turbo
8401 *
8402 * Disable graphics turbo by resetting the max frequency and setting the
8403 * current frequency to the default.
8404 */
8405bool i915_gpu_turbo_disable(void)
8406{
4a8ab5ea
CW
8407 struct drm_i915_private *i915;
8408 bool ret;
eb48eb00 8409
4a8ab5ea
CW
8410 i915 = mchdev_get();
8411 if (!i915)
8412 return false;
eb48eb00 8413
4a8ab5ea
CW
8414 spin_lock_irq(&mchdev_lock);
8415 i915->ips.max_delay = i915->ips.fstart;
8416 ret = ironlake_set_drps(i915, i915->ips.fstart);
9270388e 8417 spin_unlock_irq(&mchdev_lock);
eb48eb00 8418
4a8ab5ea 8419 drm_dev_put(&i915->drm);
eb48eb00
DV
8420 return ret;
8421}
8422EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable);
8423
8424/**
8425 * Tells the intel_ips driver that the i915 driver is now loaded, if
8426 * IPS got loaded first.
8427 *
8428 * This awkward dance is so that neither module has to depend on the
8429 * other in order for IPS to do the appropriate communication of
8430 * GPU turbo limits to i915.
8431 */
8432static void
8433ips_ping_for_i915_load(void)
8434{
8435 void (*link)(void);
8436
8437 link = symbol_get(ips_link_to_i915_driver);
8438 if (link) {
8439 link();
8440 symbol_put(ips_link_to_i915_driver);
8441 }
8442}
8443
8444void intel_gpu_ips_init(struct drm_i915_private *dev_priv)
8445{
02d71956
DV
8446 /* We only register the i915 ips part with intel-ips once everything is
8447 * set up, to avoid intel-ips sneaking in and reading bogus values. */
4a8ab5ea 8448 rcu_assign_pointer(i915_mch_dev, dev_priv);
eb48eb00
DV
8449
8450 ips_ping_for_i915_load();
8451}
8452
8453void intel_gpu_ips_teardown(void)
8454{
4a8ab5ea 8455 rcu_assign_pointer(i915_mch_dev, NULL);
eb48eb00 8456}
76c3552f 8457
dc97997a 8458static void intel_init_emon(struct drm_i915_private *dev_priv)
dde18883 8459{
dde18883
ED
8460 u32 lcfuse;
8461 u8 pxw[16];
8462 int i;
8463
8464 /* Disable to program */
8465 I915_WRITE(ECR, 0);
8466 POSTING_READ(ECR);
8467
8468 /* Program energy weights for various events */
8469 I915_WRITE(SDEW, 0x15040d00);
8470 I915_WRITE(CSIEW0, 0x007f0000);
8471 I915_WRITE(CSIEW1, 0x1e220004);
8472 I915_WRITE(CSIEW2, 0x04000004);
8473
8474 for (i = 0; i < 5; i++)
616847e7 8475 I915_WRITE(PEW(i), 0);
dde18883 8476 for (i = 0; i < 3; i++)
616847e7 8477 I915_WRITE(DEW(i), 0);
dde18883
ED
8478
8479 /* Program P-state weights to account for frequency power adjustment */
8480 for (i = 0; i < 16; i++) {
616847e7 8481 u32 pxvidfreq = I915_READ(PXVFREQ(i));
dde18883
ED
8482 unsigned long freq = intel_pxfreq(pxvidfreq);
8483 unsigned long vid = (pxvidfreq & PXVFREQ_PX_MASK) >>
8484 PXVFREQ_PX_SHIFT;
8485 unsigned long val;
8486
8487 val = vid * vid;
8488 val *= (freq / 1000);
8489 val *= 255;
8490 val /= (127*127*900);
8491 if (val > 0xff)
8492 DRM_ERROR("bad pxval: %ld\n", val);
8493 pxw[i] = val;
8494 }
8495 /* Render standby states get 0 weight */
8496 pxw[14] = 0;
8497 pxw[15] = 0;
8498
8499 for (i = 0; i < 4; i++) {
8500 u32 val = (pxw[i*4] << 24) | (pxw[(i*4)+1] << 16) |
8501 (pxw[(i*4)+2] << 8) | (pxw[(i*4)+3]);
616847e7 8502 I915_WRITE(PXW(i), val);
dde18883
ED
8503 }
8504
8505 /* Adjust magic regs to magic values (more experimental results) */
8506 I915_WRITE(OGW0, 0);
8507 I915_WRITE(OGW1, 0);
8508 I915_WRITE(EG0, 0x00007f00);
8509 I915_WRITE(EG1, 0x0000000e);
8510 I915_WRITE(EG2, 0x000e0000);
8511 I915_WRITE(EG3, 0x68000300);
8512 I915_WRITE(EG4, 0x42000000);
8513 I915_WRITE(EG5, 0x00140031);
8514 I915_WRITE(EG6, 0);
8515 I915_WRITE(EG7, 0);
8516
8517 for (i = 0; i < 8; i++)
616847e7 8518 I915_WRITE(PXWL(i), 0);
dde18883
ED
8519
8520 /* Enable PMON + select events */
8521 I915_WRITE(ECR, 0x80000019);
8522
8523 lcfuse = I915_READ(LCFUSE02);
8524
20e4d407 8525 dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK);
dde18883
ED
8526}
8527
dc97997a 8528void intel_init_gt_powersave(struct drm_i915_private *dev_priv)
ae48434c 8529{
562d9bae
SAK
8530 struct intel_rps *rps = &dev_priv->gt_pm.rps;
8531
b268c699
ID
8532 /*
8533 * RPM depends on RC6 to save restore the GT HW context, so make RC6 a
8534 * requirement.
8535 */
fb6db0f5 8536 if (!sanitize_rc6(dev_priv)) {
b268c699 8537 DRM_INFO("RC6 disabled, disabling runtime PM support\n");
08ea70a4 8538 pm_runtime_get(&dev_priv->drm.pdev->dev);
b268c699 8539 }
e6069ca8 8540
773ea9a8 8541 /* Initialize RPS limits (for userspace) */
dc97997a
CW
8542 if (IS_CHERRYVIEW(dev_priv))
8543 cherryview_init_gt_powersave(dev_priv);
8544 else if (IS_VALLEYVIEW(dev_priv))
8545 valleyview_init_gt_powersave(dev_priv);
2a13ae79 8546 else if (INTEL_GEN(dev_priv) >= 6)
773ea9a8
CW
8547 gen6_init_rps_frequencies(dev_priv);
8548
8549 /* Derive initial user preferences/limits from the hardware limits */
562d9bae
SAK
8550 rps->max_freq_softlimit = rps->max_freq;
8551 rps->min_freq_softlimit = rps->min_freq;
773ea9a8 8552
99ac9612 8553 /* After setting max-softlimit, find the overclock max freq */
cf819eff 8554 if (IS_GEN(dev_priv, 6) ||
99ac9612
CW
8555 IS_IVYBRIDGE(dev_priv) || IS_HASWELL(dev_priv)) {
8556 u32 params = 0;
8557
8558 sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, &params);
8559 if (params & BIT(31)) { /* OC supported */
8560 DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n",
562d9bae 8561 (rps->max_freq & 0xff) * 50,
99ac9612 8562 (params & 0xff) * 50);
562d9bae 8563 rps->max_freq = params & 0xff;
99ac9612
CW
8564 }
8565 }
8566
29ecd78d 8567 /* Finally allow us to boost to max by default */
562d9bae 8568 rps->boost_freq = rps->max_freq;
844e3313
CW
8569 rps->idle_freq = rps->min_freq;
8570 rps->cur_freq = rps->idle_freq;
ae48434c
ID
8571}
8572
dc97997a 8573void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv)
ae48434c 8574{
8dac1e1f 8575 if (IS_VALLEYVIEW(dev_priv))
dc97997a 8576 valleyview_cleanup_gt_powersave(dev_priv);
b268c699 8577
fb6db0f5 8578 if (!HAS_RC6(dev_priv))
08ea70a4 8579 pm_runtime_put(&dev_priv->drm.pdev->dev);
ae48434c
ID
8580}
8581
b7137e0c
CW
8582void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv)
8583{
37d933fc
SAK
8584 dev_priv->gt_pm.rps.enabled = true; /* force RPS disabling */
8585 dev_priv->gt_pm.rc6.enabled = true; /* force RC6 disabling */
b7137e0c 8586 intel_disable_gt_powersave(dev_priv);
54b4f68f 8587
d02b98b8
OM
8588 if (INTEL_GEN(dev_priv) >= 11)
8589 gen11_reset_rps_interrupts(dev_priv);
61e1e376 8590 else if (INTEL_GEN(dev_priv) >= 6)
d02b98b8 8591 gen6_reset_rps_interrupts(dev_priv);
156c7ca0
JB
8592}
8593
0870a2a4
SAK
8594static inline void intel_disable_llc_pstate(struct drm_i915_private *i915)
8595{
ebb5eb7d 8596 lockdep_assert_held(&i915->gt_pm.rps.lock);
0870a2a4 8597
37d933fc
SAK
8598 if (!i915->gt_pm.llc_pstate.enabled)
8599 return;
8600
0870a2a4 8601 /* Currently there is no HW configuration to be done to disable. */
37d933fc
SAK
8602
8603 i915->gt_pm.llc_pstate.enabled = false;
0870a2a4
SAK
8604}
8605
fc77426a 8606static void intel_disable_rc6(struct drm_i915_private *dev_priv)
8090c6b9 8607{
ebb5eb7d 8608 lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
562d9bae 8609
37d933fc
SAK
8610 if (!dev_priv->gt_pm.rc6.enabled)
8611 return;
8612
fc77426a
SAK
8613 if (INTEL_GEN(dev_priv) >= 9)
8614 gen9_disable_rc6(dev_priv);
8615 else if (IS_CHERRYVIEW(dev_priv))
8616 cherryview_disable_rc6(dev_priv);
8617 else if (IS_VALLEYVIEW(dev_priv))
8618 valleyview_disable_rc6(dev_priv);
8619 else if (INTEL_GEN(dev_priv) >= 6)
8620 gen6_disable_rc6(dev_priv);
37d933fc
SAK
8621
8622 dev_priv->gt_pm.rc6.enabled = false;
fc77426a 8623}
e494837a 8624
fc77426a
SAK
8625static void intel_disable_rps(struct drm_i915_private *dev_priv)
8626{
ebb5eb7d 8627 lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
e534770a 8628
37d933fc
SAK
8629 if (!dev_priv->gt_pm.rps.enabled)
8630 return;
8631
fc77426a 8632 if (INTEL_GEN(dev_priv) >= 9)
b7137e0c 8633 gen9_disable_rps(dev_priv);
fc77426a 8634 else if (IS_CHERRYVIEW(dev_priv))
b7137e0c 8635 cherryview_disable_rps(dev_priv);
fc77426a 8636 else if (IS_VALLEYVIEW(dev_priv))
b7137e0c 8637 valleyview_disable_rps(dev_priv);
fc77426a 8638 else if (INTEL_GEN(dev_priv) >= 6)
b7137e0c 8639 gen6_disable_rps(dev_priv);
fc77426a 8640 else if (IS_IRONLAKE_M(dev_priv))
b7137e0c 8641 ironlake_disable_drps(dev_priv);
37d933fc
SAK
8642
8643 dev_priv->gt_pm.rps.enabled = false;
fc77426a
SAK
8644}
8645
8646void intel_disable_gt_powersave(struct drm_i915_private *dev_priv)
8647{
ebb5eb7d 8648 mutex_lock(&dev_priv->gt_pm.rps.lock);
b7137e0c 8649
fc77426a
SAK
8650 intel_disable_rc6(dev_priv);
8651 intel_disable_rps(dev_priv);
0870a2a4
SAK
8652 if (HAS_LLC(dev_priv))
8653 intel_disable_llc_pstate(dev_priv);
8654
ebb5eb7d 8655 mutex_unlock(&dev_priv->gt_pm.rps.lock);
8090c6b9
DV
8656}
8657
0870a2a4
SAK
8658static inline void intel_enable_llc_pstate(struct drm_i915_private *i915)
8659{
ebb5eb7d 8660 lockdep_assert_held(&i915->gt_pm.rps.lock);
0870a2a4 8661
37d933fc
SAK
8662 if (i915->gt_pm.llc_pstate.enabled)
8663 return;
8664
0870a2a4 8665 gen6_update_ring_freq(i915);
37d933fc
SAK
8666
8667 i915->gt_pm.llc_pstate.enabled = true;
0870a2a4
SAK
8668}
8669
fc77426a 8670static void intel_enable_rc6(struct drm_i915_private *dev_priv)
1a01ab3b 8671{
ebb5eb7d 8672 lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
562d9bae 8673
37d933fc
SAK
8674 if (dev_priv->gt_pm.rc6.enabled)
8675 return;
8676
fc77426a
SAK
8677 if (IS_CHERRYVIEW(dev_priv))
8678 cherryview_enable_rc6(dev_priv);
8679 else if (IS_VALLEYVIEW(dev_priv))
8680 valleyview_enable_rc6(dev_priv);
a79208de
MK
8681 else if (INTEL_GEN(dev_priv) >= 11)
8682 gen11_enable_rc6(dev_priv);
fc77426a
SAK
8683 else if (INTEL_GEN(dev_priv) >= 9)
8684 gen9_enable_rc6(dev_priv);
8685 else if (IS_BROADWELL(dev_priv))
8686 gen8_enable_rc6(dev_priv);
8687 else if (INTEL_GEN(dev_priv) >= 6)
8688 gen6_enable_rc6(dev_priv);
37d933fc
SAK
8689
8690 dev_priv->gt_pm.rc6.enabled = true;
fc77426a 8691}
1a01ab3b 8692
fc77426a
SAK
8693static void intel_enable_rps(struct drm_i915_private *dev_priv)
8694{
8695 struct intel_rps *rps = &dev_priv->gt_pm.rps;
0a073b84 8696
ebb5eb7d 8697 lockdep_assert_held(&rps->lock);
dc97997a 8698
37d933fc
SAK
8699 if (rps->enabled)
8700 return;
8701
dc97997a
CW
8702 if (IS_CHERRYVIEW(dev_priv)) {
8703 cherryview_enable_rps(dev_priv);
8704 } else if (IS_VALLEYVIEW(dev_priv)) {
8705 valleyview_enable_rps(dev_priv);
b7137e0c 8706 } else if (INTEL_GEN(dev_priv) >= 9) {
dc97997a 8707 gen9_enable_rps(dev_priv);
dc97997a
CW
8708 } else if (IS_BROADWELL(dev_priv)) {
8709 gen8_enable_rps(dev_priv);
b7137e0c 8710 } else if (INTEL_GEN(dev_priv) >= 6) {
dc97997a 8711 gen6_enable_rps(dev_priv);
b7137e0c
CW
8712 } else if (IS_IRONLAKE_M(dev_priv)) {
8713 ironlake_enable_drps(dev_priv);
8714 intel_init_emon(dev_priv);
0a073b84 8715 }
aed242ff 8716
562d9bae
SAK
8717 WARN_ON(rps->max_freq < rps->min_freq);
8718 WARN_ON(rps->idle_freq > rps->max_freq);
aed242ff 8719
562d9bae
SAK
8720 WARN_ON(rps->efficient_freq < rps->min_freq);
8721 WARN_ON(rps->efficient_freq > rps->max_freq);
37d933fc
SAK
8722
8723 rps->enabled = true;
fc77426a
SAK
8724}
8725
8726void intel_enable_gt_powersave(struct drm_i915_private *dev_priv)
8727{
fc77426a
SAK
8728 /* Powersaving is controlled by the host when inside a VM */
8729 if (intel_vgpu_active(dev_priv))
8730 return;
8731
ebb5eb7d 8732 mutex_lock(&dev_priv->gt_pm.rps.lock);
fc77426a 8733
fb6db0f5
CW
8734 if (HAS_RC6(dev_priv))
8735 intel_enable_rc6(dev_priv);
91cbdb83
CW
8736 if (HAS_RPS(dev_priv))
8737 intel_enable_rps(dev_priv);
fc77426a
SAK
8738 if (HAS_LLC(dev_priv))
8739 intel_enable_llc_pstate(dev_priv);
aed242ff 8740
ebb5eb7d 8741 mutex_unlock(&dev_priv->gt_pm.rps.lock);
b7137e0c 8742}
3cc134e3 8743
46f16e63 8744static void ibx_init_clock_gating(struct drm_i915_private *dev_priv)
3107bd48 8745{
3107bd48
DV
8746 /*
8747 * On Ibex Peak and Cougar Point, we need to disable clock
8748 * gating for the panel power sequencer or it will fail to
8749 * start up when no ports are active.
8750 */
8751 I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE);
8752}
8753
46f16e63 8754static void g4x_disable_trickle_feed(struct drm_i915_private *dev_priv)
0e088b8f 8755{
b12ce1d8 8756 enum pipe pipe;
0e088b8f 8757
055e393f 8758 for_each_pipe(dev_priv, pipe) {
0e088b8f
VS
8759 I915_WRITE(DSPCNTR(pipe),
8760 I915_READ(DSPCNTR(pipe)) |
8761 DISPPLANE_TRICKLE_FEED_DISABLE);
b12ce1d8
VS
8762
8763 I915_WRITE(DSPSURF(pipe), I915_READ(DSPSURF(pipe)));
8764 POSTING_READ(DSPSURF(pipe));
0e088b8f
VS
8765 }
8766}
8767
91200c09 8768static void ilk_init_clock_gating(struct drm_i915_private *dev_priv)
6f1d69b0 8769{
5ce9a649 8770 u32 dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
6f1d69b0 8771
f1e8fa56
DL
8772 /*
8773 * Required for FBC
8774 * WaFbcDisableDpfcClockGating:ilk
8775 */
4d47e4f5
DL
8776 dspclk_gate |= ILK_DPFCRUNIT_CLOCK_GATE_DISABLE |
8777 ILK_DPFCUNIT_CLOCK_GATE_DISABLE |
8778 ILK_DPFDUNIT_CLOCK_GATE_ENABLE;
6f1d69b0
ED
8779
8780 I915_WRITE(PCH_3DCGDIS0,
8781 MARIUNIT_CLOCK_GATE_DISABLE |
8782 SVSMUNIT_CLOCK_GATE_DISABLE);
8783 I915_WRITE(PCH_3DCGDIS1,
8784 VFMUNIT_CLOCK_GATE_DISABLE);
8785
6f1d69b0
ED
8786 /*
8787 * According to the spec the following bits should be set in
8788 * order to enable memory self-refresh
8789 * The bit 22/21 of 0x42004
8790 * The bit 5 of 0x42020
8791 * The bit 15 of 0x45000
8792 */
8793 I915_WRITE(ILK_DISPLAY_CHICKEN2,
8794 (I915_READ(ILK_DISPLAY_CHICKEN2) |
8795 ILK_DPARB_GATE | ILK_VSDPFD_FULL));
4d47e4f5 8796 dspclk_gate |= ILK_DPARBUNIT_CLOCK_GATE_ENABLE;
6f1d69b0
ED
8797 I915_WRITE(DISP_ARB_CTL,
8798 (I915_READ(DISP_ARB_CTL) |
8799 DISP_FBC_WM_DIS));
017636cc 8800
6f1d69b0
ED
8801 /*
8802 * Based on the document from hardware guys the following bits
8803 * should be set unconditionally in order to enable FBC.
8804 * The bit 22 of 0x42000
8805 * The bit 22 of 0x42004
8806 * The bit 7,8,9 of 0x42020.
8807 */
50a0bc90 8808 if (IS_IRONLAKE_M(dev_priv)) {
4bb35334 8809 /* WaFbcAsynchFlipDisableFbcQueue:ilk */
6f1d69b0
ED
8810 I915_WRITE(ILK_DISPLAY_CHICKEN1,
8811 I915_READ(ILK_DISPLAY_CHICKEN1) |
8812 ILK_FBCQ_DIS);
8813 I915_WRITE(ILK_DISPLAY_CHICKEN2,
8814 I915_READ(ILK_DISPLAY_CHICKEN2) |
8815 ILK_DPARB_GATE);
6f1d69b0
ED
8816 }
8817
4d47e4f5
DL
8818 I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
8819
6f1d69b0
ED
8820 I915_WRITE(ILK_DISPLAY_CHICKEN2,
8821 I915_READ(ILK_DISPLAY_CHICKEN2) |
8822 ILK_ELPIN_409_SELECT);
8823 I915_WRITE(_3D_CHICKEN2,
8824 _3D_CHICKEN2_WM_READ_PIPELINED << 16 |
8825 _3D_CHICKEN2_WM_READ_PIPELINED);
4358a374 8826
ecdb4eb7 8827 /* WaDisableRenderCachePipelinedFlush:ilk */
4358a374
DV
8828 I915_WRITE(CACHE_MODE_0,
8829 _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
3107bd48 8830
4e04632e
AG
8831 /* WaDisable_RenderCache_OperationalFlush:ilk */
8832 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
8833
46f16e63 8834 g4x_disable_trickle_feed(dev_priv);
bdad2b2f 8835
46f16e63 8836 ibx_init_clock_gating(dev_priv);
3107bd48
DV
8837}
8838
46f16e63 8839static void cpt_init_clock_gating(struct drm_i915_private *dev_priv)
3107bd48 8840{
3107bd48 8841 int pipe;
5ce9a649 8842 u32 val;
3107bd48
DV
8843
8844 /*
8845 * On Ibex Peak and Cougar Point, we need to disable clock
8846 * gating for the panel power sequencer or it will fail to
8847 * start up when no ports are active.
8848 */
cd664078
JB
8849 I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE |
8850 PCH_DPLUNIT_CLOCK_GATE_DISABLE |
8851 PCH_CPUNIT_CLOCK_GATE_DISABLE);
3107bd48
DV
8852 I915_WRITE(SOUTH_CHICKEN2, I915_READ(SOUTH_CHICKEN2) |
8853 DPLS_EDP_PPS_FIX_DIS);
335c07b7
TI
8854 /* The below fixes the weird display corruption, a few pixels shifted
8855 * downward, on (only) LVDS of some HP laptops with IVY.
8856 */
055e393f 8857 for_each_pipe(dev_priv, pipe) {
dc4bd2d1
PZ
8858 val = I915_READ(TRANS_CHICKEN2(pipe));
8859 val |= TRANS_CHICKEN2_TIMING_OVERRIDE;
8860 val &= ~TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
41aa3448 8861 if (dev_priv->vbt.fdi_rx_polarity_inverted)
3f704fa2 8862 val |= TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
dc4bd2d1
PZ
8863 val &= ~TRANS_CHICKEN2_FRAME_START_DELAY_MASK;
8864 val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_COUNTER;
8865 val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_MODESWITCH;
3f704fa2
PZ
8866 I915_WRITE(TRANS_CHICKEN2(pipe), val);
8867 }
3107bd48 8868 /* WADP0ClockGatingDisable */
055e393f 8869 for_each_pipe(dev_priv, pipe) {
3107bd48
DV
8870 I915_WRITE(TRANS_CHICKEN1(pipe),
8871 TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
8872 }
6f1d69b0
ED
8873}
8874
46f16e63 8875static void gen6_check_mch_setup(struct drm_i915_private *dev_priv)
1d7aaa0c 8876{
5ce9a649 8877 u32 tmp;
1d7aaa0c
DV
8878
8879 tmp = I915_READ(MCH_SSKPD);
df662a28
DV
8880 if ((tmp & MCH_SSKPD_WM0_MASK) != MCH_SSKPD_WM0_VAL)
8881 DRM_DEBUG_KMS("Wrong MCH_SSKPD value: 0x%08x This can cause underruns.\n",
8882 tmp);
1d7aaa0c
DV
8883}
8884
46f16e63 8885static void gen6_init_clock_gating(struct drm_i915_private *dev_priv)
6f1d69b0 8886{
5ce9a649 8887 u32 dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
6f1d69b0 8888
231e54f6 8889 I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
6f1d69b0
ED
8890
8891 I915_WRITE(ILK_DISPLAY_CHICKEN2,
8892 I915_READ(ILK_DISPLAY_CHICKEN2) |
8893 ILK_ELPIN_409_SELECT);
8894
ecdb4eb7 8895 /* WaDisableHiZPlanesWhenMSAAEnabled:snb */
4283908e
DV
8896 I915_WRITE(_3D_CHICKEN,
8897 _MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB));
8898
4e04632e
AG
8899 /* WaDisable_RenderCache_OperationalFlush:snb */
8900 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
8901
8d85d272
VS
8902 /*
8903 * BSpec recoomends 8x4 when MSAA is used,
8904 * however in practice 16x4 seems fastest.
c5c98a58
VS
8905 *
8906 * Note that PS/WM thread counts depend on the WIZ hashing
8907 * disable bit, which we don't touch here, but it's good
8908 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
8d85d272
VS
8909 */
8910 I915_WRITE(GEN6_GT_MODE,
98533251 8911 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
8d85d272 8912
6f1d69b0 8913 I915_WRITE(CACHE_MODE_0,
50743298 8914 _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
6f1d69b0
ED
8915
8916 I915_WRITE(GEN6_UCGCTL1,
8917 I915_READ(GEN6_UCGCTL1) |
8918 GEN6_BLBUNIT_CLOCK_GATE_DISABLE |
8919 GEN6_CSUNIT_CLOCK_GATE_DISABLE);
8920
8921 /* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock
8922 * gating disable must be set. Failure to set it results in
8923 * flickering pixels due to Z write ordering failures after
8924 * some amount of runtime in the Mesa "fire" demo, and Unigine
8925 * Sanctuary and Tropics, and apparently anything else with
8926 * alpha test or pixel discard.
8927 *
8928 * According to the spec, bit 11 (RCCUNIT) must also be set,
8929 * but we didn't debug actual testcases to find it out.
0f846f81 8930 *
ef59318c
VS
8931 * WaDisableRCCUnitClockGating:snb
8932 * WaDisableRCPBUnitClockGating:snb
6f1d69b0
ED
8933 */
8934 I915_WRITE(GEN6_UCGCTL2,
8935 GEN6_RCPBUNIT_CLOCK_GATE_DISABLE |
8936 GEN6_RCCUNIT_CLOCK_GATE_DISABLE);
8937
5eb146dd 8938 /* WaStripsFansDisableFastClipPerformanceFix:snb */
743b57d8
VS
8939 I915_WRITE(_3D_CHICKEN3,
8940 _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL));
6f1d69b0 8941
e927ecde
VS
8942 /*
8943 * Bspec says:
8944 * "This bit must be set if 3DSTATE_CLIP clip mode is set to normal and
8945 * 3DSTATE_SF number of SF output attributes is more than 16."
8946 */
8947 I915_WRITE(_3D_CHICKEN3,
8948 _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH));
8949
6f1d69b0
ED
8950 /*
8951 * According to the spec the following bits should be
8952 * set in order to enable memory self-refresh and fbc:
8953 * The bit21 and bit22 of 0x42000
8954 * The bit21 and bit22 of 0x42004
8955 * The bit5 and bit7 of 0x42020
8956 * The bit14 of 0x70180
8957 * The bit14 of 0x71180
4bb35334
DL
8958 *
8959 * WaFbcAsynchFlipDisableFbcQueue:snb
6f1d69b0
ED
8960 */
8961 I915_WRITE(ILK_DISPLAY_CHICKEN1,
8962 I915_READ(ILK_DISPLAY_CHICKEN1) |
8963 ILK_FBCQ_DIS | ILK_PABSTRETCH_DIS);
8964 I915_WRITE(ILK_DISPLAY_CHICKEN2,
8965 I915_READ(ILK_DISPLAY_CHICKEN2) |
8966 ILK_DPARB_GATE | ILK_VSDPFD_FULL);
231e54f6
DL
8967 I915_WRITE(ILK_DSPCLK_GATE_D,
8968 I915_READ(ILK_DSPCLK_GATE_D) |
8969 ILK_DPARBUNIT_CLOCK_GATE_ENABLE |
8970 ILK_DPFDUNIT_CLOCK_GATE_ENABLE);
6f1d69b0 8971
46f16e63 8972 g4x_disable_trickle_feed(dev_priv);
f8f2ac9a 8973
46f16e63 8974 cpt_init_clock_gating(dev_priv);
1d7aaa0c 8975
46f16e63 8976 gen6_check_mch_setup(dev_priv);
6f1d69b0
ED
8977}
8978
8979static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv)
8980{
5ce9a649 8981 u32 reg = I915_READ(GEN7_FF_THREAD_MODE);
6f1d69b0 8982
3aad9059 8983 /*
46680e0a 8984 * WaVSThreadDispatchOverride:ivb,vlv
3aad9059
VS
8985 *
8986 * This actually overrides the dispatch
8987 * mode for all thread types.
8988 */
6f1d69b0
ED
8989 reg &= ~GEN7_FF_SCHED_MASK;
8990 reg |= GEN7_FF_TS_SCHED_HW;
8991 reg |= GEN7_FF_VS_SCHED_HW;
8992 reg |= GEN7_FF_DS_SCHED_HW;
8993
8994 I915_WRITE(GEN7_FF_THREAD_MODE, reg);
8995}
8996
46f16e63 8997static void lpt_init_clock_gating(struct drm_i915_private *dev_priv)
17a303ec 8998{
17a303ec
PZ
8999 /*
9000 * TODO: this bit should only be enabled when really needed, then
9001 * disabled when not needed anymore in order to save power.
9002 */
4f8036a2 9003 if (HAS_PCH_LPT_LP(dev_priv))
17a303ec
PZ
9004 I915_WRITE(SOUTH_DSPCLK_GATE_D,
9005 I915_READ(SOUTH_DSPCLK_GATE_D) |
9006 PCH_LP_PARTITION_LEVEL_DISABLE);
0a790cdb
PZ
9007
9008 /* WADPOClockGatingDisable:hsw */
36c0d0cf
VS
9009 I915_WRITE(TRANS_CHICKEN1(PIPE_A),
9010 I915_READ(TRANS_CHICKEN1(PIPE_A)) |
0a790cdb 9011 TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
17a303ec
PZ
9012}
9013
712bf364 9014static void lpt_suspend_hw(struct drm_i915_private *dev_priv)
7d708ee4 9015{
4f8036a2 9016 if (HAS_PCH_LPT_LP(dev_priv)) {
5ce9a649 9017 u32 val = I915_READ(SOUTH_DSPCLK_GATE_D);
7d708ee4
ID
9018
9019 val &= ~PCH_LP_PARTITION_LEVEL_DISABLE;
9020 I915_WRITE(SOUTH_DSPCLK_GATE_D, val);
9021 }
9022}
9023
450174fe
ID
9024static void gen8_set_l3sqc_credits(struct drm_i915_private *dev_priv,
9025 int general_prio_credits,
9026 int high_prio_credits)
9027{
9028 u32 misccpctl;
930a784d 9029 u32 val;
450174fe
ID
9030
9031 /* WaTempDisableDOPClkGating:bdw */
9032 misccpctl = I915_READ(GEN7_MISCCPCTL);
9033 I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
9034
930a784d
OM
9035 val = I915_READ(GEN8_L3SQCREG1);
9036 val &= ~L3_PRIO_CREDITS_MASK;
9037 val |= L3_GENERAL_PRIO_CREDITS(general_prio_credits);
9038 val |= L3_HIGH_PRIO_CREDITS(high_prio_credits);
9039 I915_WRITE(GEN8_L3SQCREG1, val);
450174fe
ID
9040
9041 /*
9042 * Wait at least 100 clocks before re-enabling clock gating.
9043 * See the definition of L3SQCREG1 in BSpec.
9044 */
9045 POSTING_READ(GEN8_L3SQCREG1);
9046 udelay(1);
9047 I915_WRITE(GEN7_MISCCPCTL, misccpctl);
9048}
9049
d65dc3e4
OM
9050static void icl_init_clock_gating(struct drm_i915_private *dev_priv)
9051{
9052 /* This is not an Wa. Enable to reduce Sampler power */
9053 I915_WRITE(GEN10_DFR_RATIO_EN_AND_CHICKEN,
9054 I915_READ(GEN10_DFR_RATIO_EN_AND_CHICKEN) & ~DFR_DISABLE);
622b3f68
RS
9055
9056 /* WaEnable32PlaneMode:icl */
9057 I915_WRITE(GEN9_CSFE_CHICKEN1_RCS,
9058 _MASKED_BIT_ENABLE(GEN11_ENABLE_32_PLANE_MODE));
d65dc3e4
OM
9059}
9060
0a46ddd5
RV
9061static void cnp_init_clock_gating(struct drm_i915_private *dev_priv)
9062{
9063 if (!HAS_PCH_CNP(dev_priv))
9064 return;
9065
470e7c61 9066 /* Display WA #1181 WaSouthDisplayDisablePWMCGEGating: cnp */
4cc6feb7
RV
9067 I915_WRITE(SOUTH_DSPCLK_GATE_D, I915_READ(SOUTH_DSPCLK_GATE_D) |
9068 CNP_PWM_CGE_GATING_DISABLE);
0a46ddd5
RV
9069}
9070
91200c09 9071static void cnl_init_clock_gating(struct drm_i915_private *dev_priv)
90007bca 9072{
8f067837 9073 u32 val;
0a46ddd5
RV
9074 cnp_init_clock_gating(dev_priv);
9075
1a25db65
RV
9076 /* This is not an Wa. Enable for better image quality */
9077 I915_WRITE(_3D_CHICKEN3,
9078 _MASKED_BIT_ENABLE(_3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE));
9079
90007bca
RV
9080 /* WaEnableChickenDCPR:cnl */
9081 I915_WRITE(GEN8_CHICKEN_DCPR_1,
9082 I915_READ(GEN8_CHICKEN_DCPR_1) | MASK_WAKEMEM);
9083
9084 /* WaFbcWakeMemOn:cnl */
9085 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
9086 DISP_FBC_MEMORY_WAKE);
9087
34991bd4
CW
9088 val = I915_READ(SLICE_UNIT_LEVEL_CLKGATE);
9089 /* ReadHitWriteOnlyDisable:cnl */
9090 val |= RCCUNIT_CLKGATE_DIS;
90007bca
RV
9091 /* WaSarbUnitClockGatingDisable:cnl (pre-prod) */
9092 if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_B0))
34991bd4
CW
9093 val |= SARBUNIT_CLKGATE_DIS;
9094 I915_WRITE(SLICE_UNIT_LEVEL_CLKGATE, val);
01ab0f92 9095
a4713c5a
RV
9096 /* Wa_2201832410:cnl */
9097 val = I915_READ(SUBSLICE_UNIT_LEVEL_CLKGATE);
9098 val |= GWUNIT_CLKGATE_DIS;
9099 I915_WRITE(SUBSLICE_UNIT_LEVEL_CLKGATE, val);
9100
01ab0f92 9101 /* WaDisableVFclkgate:cnl */
14941b6e 9102 /* WaVFUnitClockGatingDisable:cnl */
01ab0f92
RA
9103 val = I915_READ(UNSLICE_UNIT_LEVEL_CLKGATE);
9104 val |= VFUNIT_CLKGATE_DIS;
9105 I915_WRITE(UNSLICE_UNIT_LEVEL_CLKGATE, val);
90007bca
RV
9106}
9107
0a46ddd5
RV
9108static void cfl_init_clock_gating(struct drm_i915_private *dev_priv)
9109{
9110 cnp_init_clock_gating(dev_priv);
9111 gen9_init_clock_gating(dev_priv);
9112
9113 /* WaFbcNukeOnHostModify:cfl */
9114 I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
9115 ILK_DPFC_NUKE_ON_ANY_MODIFICATION);
9116}
9117
91200c09 9118static void kbl_init_clock_gating(struct drm_i915_private *dev_priv)
9498dba7 9119{
46f16e63 9120 gen9_init_clock_gating(dev_priv);
9498dba7
MK
9121
9122 /* WaDisableSDEUnitClockGating:kbl */
9123 if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
9124 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
9125 GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
8aeb7f62
MK
9126
9127 /* WaDisableGamClockGating:kbl */
9128 if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
9129 I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) |
9130 GEN6_GAMUNIT_CLOCK_GATE_DISABLE);
031cd8c8 9131
0a46ddd5 9132 /* WaFbcNukeOnHostModify:kbl */
031cd8c8
MK
9133 I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
9134 ILK_DPFC_NUKE_ON_ANY_MODIFICATION);
9498dba7
MK
9135}
9136
91200c09 9137static void skl_init_clock_gating(struct drm_i915_private *dev_priv)
dc00b6a0 9138{
46f16e63 9139 gen9_init_clock_gating(dev_priv);
44fff99f
MK
9140
9141 /* WAC6entrylatency:skl */
9142 I915_WRITE(FBC_LLC_READ_CTRL, I915_READ(FBC_LLC_READ_CTRL) |
9143 FBC_LLC_FULLY_OPEN);
031cd8c8
MK
9144
9145 /* WaFbcNukeOnHostModify:skl */
9146 I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
9147 ILK_DPFC_NUKE_ON_ANY_MODIFICATION);
dc00b6a0
DV
9148}
9149
91200c09 9150static void bdw_init_clock_gating(struct drm_i915_private *dev_priv)
1020a5c2 9151{
8cb09836
MA
9152 /* The GTT cache must be disabled if the system is using 2M pages. */
9153 bool can_use_gtt_cache = !HAS_PAGE_SIZES(dev_priv,
9154 I915_GTT_PAGE_SIZE_2M);
07d27e20 9155 enum pipe pipe;
1020a5c2 9156
ab57fff1 9157 /* WaSwitchSolVfFArbitrationPriority:bdw */
50ed5fbd 9158 I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
fe4ab3ce 9159
ab57fff1 9160 /* WaPsrDPAMaskVBlankInSRD:bdw */
fe4ab3ce
BW
9161 I915_WRITE(CHICKEN_PAR1_1,
9162 I915_READ(CHICKEN_PAR1_1) | DPA_MASK_VBLANK_SRD);
9163
ab57fff1 9164 /* WaPsrDPRSUnmaskVBlankInSRD:bdw */
055e393f 9165 for_each_pipe(dev_priv, pipe) {
07d27e20 9166 I915_WRITE(CHICKEN_PIPESL_1(pipe),
c7c65622 9167 I915_READ(CHICKEN_PIPESL_1(pipe)) |
8f670bb1 9168 BDW_DPRS_MASK_VBLANK_SRD);
fe4ab3ce 9169 }
63801f21 9170
ab57fff1
BW
9171 /* WaVSRefCountFullforceMissDisable:bdw */
9172 /* WaDSRefCountFullforceMissDisable:bdw */
9173 I915_WRITE(GEN7_FF_THREAD_MODE,
9174 I915_READ(GEN7_FF_THREAD_MODE) &
9175 ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
36075a4c 9176
295e8bb7
VS
9177 I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
9178 _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
4f1ca9e9
VS
9179
9180 /* WaDisableSDEUnitClockGating:bdw */
9181 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
9182 GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
5d708680 9183
450174fe
ID
9184 /* WaProgramL3SqcReg1Default:bdw */
9185 gen8_set_l3sqc_credits(dev_priv, 30, 2);
4d487cff 9186
8cb09836
MA
9187 /* WaGttCachingOffByDefault:bdw */
9188 I915_WRITE(HSW_GTT_CACHE_EN, can_use_gtt_cache ? GTT_CACHE_EN_ALL : 0);
6d50b065 9189
17e0adf0
MK
9190 /* WaKVMNotificationOnConfigChange:bdw */
9191 I915_WRITE(CHICKEN_PAR2_1, I915_READ(CHICKEN_PAR2_1)
9192 | KVM_CONFIG_CHANGE_NOTIFICATION_SELECT);
9193
46f16e63 9194 lpt_init_clock_gating(dev_priv);
9cc19733
RB
9195
9196 /* WaDisableDopClockGating:bdw
9197 *
9198 * Also see the CHICKEN2 write in bdw_init_workarounds() to disable DOP
9199 * clock gating.
9200 */
9201 I915_WRITE(GEN6_UCGCTL1,
9202 I915_READ(GEN6_UCGCTL1) | GEN6_EU_TCUNIT_CLOCK_GATE_DISABLE);
1020a5c2
BW
9203}
9204
91200c09 9205static void hsw_init_clock_gating(struct drm_i915_private *dev_priv)
cad2a2d7 9206{
f3fc4884
FJ
9207 /* L3 caching of data atomics doesn't work -- disable it. */
9208 I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE);
9209 I915_WRITE(HSW_ROW_CHICKEN3,
9210 _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE));
9211
ecdb4eb7 9212 /* This is required by WaCatErrorRejectionIssue:hsw */
cad2a2d7
ED
9213 I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
9214 I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
9215 GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
9216
e36ea7ff
VS
9217 /* WaVSRefCountFullforceMissDisable:hsw */
9218 I915_WRITE(GEN7_FF_THREAD_MODE,
9219 I915_READ(GEN7_FF_THREAD_MODE) & ~GEN7_FF_VS_REF_CNT_FFME);
cad2a2d7 9220
4e04632e
AG
9221 /* WaDisable_RenderCache_OperationalFlush:hsw */
9222 I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
9223
fe27c606
CW
9224 /* enable HiZ Raw Stall Optimization */
9225 I915_WRITE(CACHE_MODE_0_GEN7,
9226 _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
9227
ecdb4eb7 9228 /* WaDisable4x2SubspanOptimization:hsw */
cad2a2d7
ED
9229 I915_WRITE(CACHE_MODE_1,
9230 _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
1544d9d5 9231
a12c4967
VS
9232 /*
9233 * BSpec recommends 8x4 when MSAA is used,
9234 * however in practice 16x4 seems fastest.
c5c98a58
VS
9235 *
9236 * Note that PS/WM thread counts depend on the WIZ hashing
9237 * disable bit, which we don't touch here, but it's good
9238 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
a12c4967
VS
9239 */
9240 I915_WRITE(GEN7_GT_MODE,
98533251 9241 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
a12c4967 9242
94411593
KG
9243 /* WaSampleCChickenBitEnable:hsw */
9244 I915_WRITE(HALF_SLICE_CHICKEN3,
9245 _MASKED_BIT_ENABLE(HSW_SAMPLE_C_PERFORMANCE));
9246
ecdb4eb7 9247 /* WaSwitchSolVfFArbitrationPriority:hsw */
e3dff585
BW
9248 I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
9249
46f16e63 9250 lpt_init_clock_gating(dev_priv);
cad2a2d7
ED
9251}
9252
91200c09 9253static void ivb_init_clock_gating(struct drm_i915_private *dev_priv)
6f1d69b0 9254{
5ce9a649 9255 u32 snpcr;
6f1d69b0 9256
231e54f6 9257 I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE);
6f1d69b0 9258
ecdb4eb7 9259 /* WaDisableEarlyCull:ivb */
87f8020e
JB
9260 I915_WRITE(_3D_CHICKEN3,
9261 _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL));
9262
ecdb4eb7 9263 /* WaDisableBackToBackFlipFix:ivb */
6f1d69b0
ED
9264 I915_WRITE(IVB_CHICKEN3,
9265 CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
9266 CHICKEN3_DGMG_DONE_FIX_DISABLE);
9267
ecdb4eb7 9268 /* WaDisablePSDDualDispatchEnable:ivb */
50a0bc90 9269 if (IS_IVB_GT1(dev_priv))
12f3382b
JB
9270 I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
9271 _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
12f3382b 9272
4e04632e
AG
9273 /* WaDisable_RenderCache_OperationalFlush:ivb */
9274 I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
9275
ecdb4eb7 9276 /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */
6f1d69b0
ED
9277 I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1,
9278 GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
9279
ecdb4eb7 9280 /* WaApplyL3ControlAndL3ChickenMode:ivb */
6f1d69b0
ED
9281 I915_WRITE(GEN7_L3CNTLREG1,
9282 GEN7_WA_FOR_GEN7_L3_CONTROL);
9283 I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER,
8ab43976 9284 GEN7_WA_L3_CHICKEN_MODE);
50a0bc90 9285 if (IS_IVB_GT1(dev_priv))
8ab43976
JB
9286 I915_WRITE(GEN7_ROW_CHICKEN2,
9287 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
412236c2
VS
9288 else {
9289 /* must write both registers */
9290 I915_WRITE(GEN7_ROW_CHICKEN2,
9291 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
8ab43976
JB
9292 I915_WRITE(GEN7_ROW_CHICKEN2_GT2,
9293 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
412236c2 9294 }
6f1d69b0 9295
ecdb4eb7 9296 /* WaForceL3Serialization:ivb */
61939d97
JB
9297 I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
9298 ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
9299
1b80a19a 9300 /*
0f846f81 9301 * According to the spec, bit 13 (RCZUNIT) must be set on IVB.
ecdb4eb7 9302 * This implements the WaDisableRCZUnitClockGating:ivb workaround.
0f846f81
JB
9303 */
9304 I915_WRITE(GEN6_UCGCTL2,
28acf3b2 9305 GEN6_RCZUNIT_CLOCK_GATE_DISABLE);
0f846f81 9306
ecdb4eb7 9307 /* This is required by WaCatErrorRejectionIssue:ivb */
6f1d69b0
ED
9308 I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
9309 I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
9310 GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
9311
46f16e63 9312 g4x_disable_trickle_feed(dev_priv);
6f1d69b0
ED
9313
9314 gen7_setup_fixed_func_scheduler(dev_priv);
97e1930f 9315
22721343
CW
9316 if (0) { /* causes HiZ corruption on ivb:gt1 */
9317 /* enable HiZ Raw Stall Optimization */
9318 I915_WRITE(CACHE_MODE_0_GEN7,
9319 _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
9320 }
116f2b6d 9321
ecdb4eb7 9322 /* WaDisable4x2SubspanOptimization:ivb */
97e1930f
DV
9323 I915_WRITE(CACHE_MODE_1,
9324 _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
20848223 9325
a607c1a4
VS
9326 /*
9327 * BSpec recommends 8x4 when MSAA is used,
9328 * however in practice 16x4 seems fastest.
c5c98a58
VS
9329 *
9330 * Note that PS/WM thread counts depend on the WIZ hashing
9331 * disable bit, which we don't touch here, but it's good
9332 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
a607c1a4
VS
9333 */
9334 I915_WRITE(GEN7_GT_MODE,
98533251 9335 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
a607c1a4 9336
20848223
BW
9337 snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
9338 snpcr &= ~GEN6_MBC_SNPCR_MASK;
9339 snpcr |= GEN6_MBC_SNPCR_MED;
9340 I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr);
3107bd48 9341
6e266956 9342 if (!HAS_PCH_NOP(dev_priv))
46f16e63 9343 cpt_init_clock_gating(dev_priv);
1d7aaa0c 9344
46f16e63 9345 gen6_check_mch_setup(dev_priv);
6f1d69b0
ED
9346}
9347
91200c09 9348static void vlv_init_clock_gating(struct drm_i915_private *dev_priv)
6f1d69b0 9349{
ecdb4eb7 9350 /* WaDisableEarlyCull:vlv */
87f8020e
JB
9351 I915_WRITE(_3D_CHICKEN3,
9352 _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL));
9353
ecdb4eb7 9354 /* WaDisableBackToBackFlipFix:vlv */
6f1d69b0
ED
9355 I915_WRITE(IVB_CHICKEN3,
9356 CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
9357 CHICKEN3_DGMG_DONE_FIX_DISABLE);
9358
fad7d36e 9359 /* WaPsdDispatchEnable:vlv */
ecdb4eb7 9360 /* WaDisablePSDDualDispatchEnable:vlv */
12f3382b 9361 I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
d3bc0303
JB
9362 _MASKED_BIT_ENABLE(GEN7_MAX_PS_THREAD_DEP |
9363 GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
12f3382b 9364
4e04632e
AG
9365 /* WaDisable_RenderCache_OperationalFlush:vlv */
9366 I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
9367
ecdb4eb7 9368 /* WaForceL3Serialization:vlv */
61939d97
JB
9369 I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
9370 ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
9371
ecdb4eb7 9372 /* WaDisableDopClockGating:vlv */
8ab43976
JB
9373 I915_WRITE(GEN7_ROW_CHICKEN2,
9374 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
9375
ecdb4eb7 9376 /* This is required by WaCatErrorRejectionIssue:vlv */
6f1d69b0
ED
9377 I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
9378 I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
9379 GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
9380
46680e0a
VS
9381 gen7_setup_fixed_func_scheduler(dev_priv);
9382
3c0edaeb 9383 /*
0f846f81 9384 * According to the spec, bit 13 (RCZUNIT) must be set on IVB.
ecdb4eb7 9385 * This implements the WaDisableRCZUnitClockGating:vlv workaround.
0f846f81
JB
9386 */
9387 I915_WRITE(GEN6_UCGCTL2,
3c0edaeb 9388 GEN6_RCZUNIT_CLOCK_GATE_DISABLE);
0f846f81 9389
c98f5062
AG
9390 /* WaDisableL3Bank2xClockGate:vlv
9391 * Disabling L3 clock gating- MMIO 940c[25] = 1
9392 * Set bit 25, to disable L3_BANK_2x_CLK_GATING */
9393 I915_WRITE(GEN7_UCGCTL4,
9394 I915_READ(GEN7_UCGCTL4) | GEN7_L3BANK2X_CLOCK_GATE_DISABLE);
e3f33d46 9395
afd58e79
VS
9396 /*
9397 * BSpec says this must be set, even though
9398 * WaDisable4x2SubspanOptimization isn't listed for VLV.
9399 */
6b26c86d
DV
9400 I915_WRITE(CACHE_MODE_1,
9401 _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
7983117f 9402
da2518f9
VS
9403 /*
9404 * BSpec recommends 8x4 when MSAA is used,
9405 * however in practice 16x4 seems fastest.
9406 *
9407 * Note that PS/WM thread counts depend on the WIZ hashing
9408 * disable bit, which we don't touch here, but it's good
9409 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
9410 */
9411 I915_WRITE(GEN7_GT_MODE,
9412 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
9413
031994ee
VS
9414 /*
9415 * WaIncreaseL3CreditsForVLVB0:vlv
9416 * This is the hardware default actually.
9417 */
9418 I915_WRITE(GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE);
9419
2d809570 9420 /*
ecdb4eb7 9421 * WaDisableVLVClockGating_VBIIssue:vlv
2d809570
JB
9422 * Disable clock gating on th GCFG unit to prevent a delay
9423 * in the reporting of vblank events.
9424 */
7a0d1eed 9425 I915_WRITE(VLV_GUNIT_CLOCK_GATE, GCFG_DIS);
6f1d69b0
ED
9426}
9427
91200c09 9428static void chv_init_clock_gating(struct drm_i915_private *dev_priv)
a4565da8 9429{
232ce337
VS
9430 /* WaVSRefCountFullforceMissDisable:chv */
9431 /* WaDSRefCountFullforceMissDisable:chv */
9432 I915_WRITE(GEN7_FF_THREAD_MODE,
9433 I915_READ(GEN7_FF_THREAD_MODE) &
9434 ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
acea6f95
VS
9435
9436 /* WaDisableSemaphoreAndSyncFlipWait:chv */
9437 I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
9438 _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
0846697c
VS
9439
9440 /* WaDisableCSUnitClockGating:chv */
9441 I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) |
9442 GEN6_CSUNIT_CLOCK_GATE_DISABLE);
c631780f
VS
9443
9444 /* WaDisableSDEUnitClockGating:chv */
9445 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
9446 GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
6d50b065 9447
450174fe
ID
9448 /*
9449 * WaProgramL3SqcReg1Default:chv
9450 * See gfxspecs/Related Documents/Performance Guide/
9451 * LSQC Setting Recommendations.
9452 */
9453 gen8_set_l3sqc_credits(dev_priv, 38, 2);
9454
6d50b065
VS
9455 /*
9456 * GTT cache may not work with big pages, so if those
9457 * are ever enabled GTT cache may need to be disabled.
9458 */
9459 I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL);
a4565da8
VS
9460}
9461
46f16e63 9462static void g4x_init_clock_gating(struct drm_i915_private *dev_priv)
6f1d69b0 9463{
5ce9a649 9464 u32 dspclk_gate;
6f1d69b0
ED
9465
9466 I915_WRITE(RENCLK_GATE_D1, 0);
9467 I915_WRITE(RENCLK_GATE_D2, VF_UNIT_CLOCK_GATE_DISABLE |
9468 GS_UNIT_CLOCK_GATE_DISABLE |
9469 CL_UNIT_CLOCK_GATE_DISABLE);
9470 I915_WRITE(RAMCLK_GATE_D, 0);
9471 dspclk_gate = VRHUNIT_CLOCK_GATE_DISABLE |
9472 OVRUNIT_CLOCK_GATE_DISABLE |
9473 OVCUNIT_CLOCK_GATE_DISABLE;
50a0bc90 9474 if (IS_GM45(dev_priv))
6f1d69b0
ED
9475 dspclk_gate |= DSSUNIT_CLOCK_GATE_DISABLE;
9476 I915_WRITE(DSPCLK_GATE_D, dspclk_gate);
4358a374
DV
9477
9478 /* WaDisableRenderCachePipelinedFlush */
9479 I915_WRITE(CACHE_MODE_0,
9480 _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
de1aa629 9481
4e04632e
AG
9482 /* WaDisable_RenderCache_OperationalFlush:g4x */
9483 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
9484
46f16e63 9485 g4x_disable_trickle_feed(dev_priv);
6f1d69b0
ED
9486}
9487
91200c09 9488static void i965gm_init_clock_gating(struct drm_i915_private *dev_priv)
6f1d69b0 9489{
6f1d69b0
ED
9490 I915_WRITE(RENCLK_GATE_D1, I965_RCC_CLOCK_GATE_DISABLE);
9491 I915_WRITE(RENCLK_GATE_D2, 0);
9492 I915_WRITE(DSPCLK_GATE_D, 0);
9493 I915_WRITE(RAMCLK_GATE_D, 0);
9494 I915_WRITE16(DEUC, 0);
20f94967
VS
9495 I915_WRITE(MI_ARB_STATE,
9496 _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
4e04632e
AG
9497
9498 /* WaDisable_RenderCache_OperationalFlush:gen4 */
9499 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
6f1d69b0
ED
9500}
9501
91200c09 9502static void i965g_init_clock_gating(struct drm_i915_private *dev_priv)
6f1d69b0 9503{
6f1d69b0
ED
9504 I915_WRITE(RENCLK_GATE_D1, I965_RCZ_CLOCK_GATE_DISABLE |
9505 I965_RCC_CLOCK_GATE_DISABLE |
9506 I965_RCPB_CLOCK_GATE_DISABLE |
9507 I965_ISC_CLOCK_GATE_DISABLE |
9508 I965_FBC_CLOCK_GATE_DISABLE);
9509 I915_WRITE(RENCLK_GATE_D2, 0);
20f94967
VS
9510 I915_WRITE(MI_ARB_STATE,
9511 _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
4e04632e
AG
9512
9513 /* WaDisable_RenderCache_OperationalFlush:gen4 */
9514 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
6f1d69b0
ED
9515}
9516
46f16e63 9517static void gen3_init_clock_gating(struct drm_i915_private *dev_priv)
6f1d69b0 9518{
6f1d69b0
ED
9519 u32 dstate = I915_READ(D_STATE);
9520
9521 dstate |= DSTATE_PLL_D3_OFF | DSTATE_GFX_CLOCK_GATING |
9522 DSTATE_DOT_CLOCK_GATING;
9523 I915_WRITE(D_STATE, dstate);
13a86b85 9524
9b1e14f4 9525 if (IS_PINEVIEW(dev_priv))
13a86b85 9526 I915_WRITE(ECOSKPD, _MASKED_BIT_ENABLE(ECO_GATING_CX_ONLY));
974a3b0f
DV
9527
9528 /* IIR "flip pending" means done if this bit is set */
9529 I915_WRITE(ECOSKPD, _MASKED_BIT_DISABLE(ECO_FLIP_DONE));
12fabbcb
VS
9530
9531 /* interrupts should cause a wake up from C3 */
3299254f 9532 I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_AGPBUSY_INT_EN));
dbb42748
VS
9533
9534 /* On GEN3 we really need to make sure the ARB C3 LP bit is set */
9535 I915_WRITE(MI_ARB_STATE, _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE));
1038392b
VS
9536
9537 I915_WRITE(MI_ARB_STATE,
9538 _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
6f1d69b0
ED
9539}
9540
46f16e63 9541static void i85x_init_clock_gating(struct drm_i915_private *dev_priv)
6f1d69b0 9542{
6f1d69b0 9543 I915_WRITE(RENCLK_GATE_D1, SV_CLOCK_GATE_DISABLE);
54e472ae
VS
9544
9545 /* interrupts should cause a wake up from C3 */
9546 I915_WRITE(MI_STATE, _MASKED_BIT_ENABLE(MI_AGPBUSY_INT_EN) |
9547 _MASKED_BIT_DISABLE(MI_AGPBUSY_830_MODE));
1038392b
VS
9548
9549 I915_WRITE(MEM_MODE,
9550 _MASKED_BIT_ENABLE(MEM_DISPLAY_TRICKLE_FEED_DISABLE));
6f1d69b0
ED
9551}
9552
46f16e63 9553static void i830_init_clock_gating(struct drm_i915_private *dev_priv)
6f1d69b0 9554{
1038392b
VS
9555 I915_WRITE(MEM_MODE,
9556 _MASKED_BIT_ENABLE(MEM_DISPLAY_A_TRICKLE_FEED_DISABLE) |
9557 _MASKED_BIT_ENABLE(MEM_DISPLAY_B_TRICKLE_FEED_DISABLE));
6f1d69b0
ED
9558}
9559
46f16e63 9560void intel_init_clock_gating(struct drm_i915_private *dev_priv)
6f1d69b0 9561{
46f16e63 9562 dev_priv->display.init_clock_gating(dev_priv);
6f1d69b0
ED
9563}
9564
712bf364 9565void intel_suspend_hw(struct drm_i915_private *dev_priv)
7d708ee4 9566{
712bf364
VS
9567 if (HAS_PCH_LPT(dev_priv))
9568 lpt_suspend_hw(dev_priv);
7d708ee4
ID
9569}
9570
46f16e63 9571static void nop_init_clock_gating(struct drm_i915_private *dev_priv)
bb400da9
ID
9572{
9573 DRM_DEBUG_KMS("No clock gating settings or workarounds applied.\n");
9574}
9575
9576/**
9577 * intel_init_clock_gating_hooks - setup the clock gating hooks
9578 * @dev_priv: device private
9579 *
9580 * Setup the hooks that configure which clocks of a given platform can be
9581 * gated and also apply various GT and display specific workarounds for these
9582 * platforms. Note that some GT specific workarounds are applied separately
9583 * when GPU contexts or batchbuffers start their execution.
9584 */
9585void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv)
9586{
39564ae8 9587 if (IS_GEN(dev_priv, 11))
d65dc3e4 9588 dev_priv->display.init_clock_gating = icl_init_clock_gating;
cc38cae7 9589 else if (IS_CANNONLAKE(dev_priv))
91200c09 9590 dev_priv->display.init_clock_gating = cnl_init_clock_gating;
0a46ddd5
RV
9591 else if (IS_COFFEELAKE(dev_priv))
9592 dev_priv->display.init_clock_gating = cfl_init_clock_gating;
90007bca 9593 else if (IS_SKYLAKE(dev_priv))
91200c09 9594 dev_priv->display.init_clock_gating = skl_init_clock_gating;
0a46ddd5 9595 else if (IS_KABYLAKE(dev_priv))
91200c09 9596 dev_priv->display.init_clock_gating = kbl_init_clock_gating;
9fb5026f 9597 else if (IS_BROXTON(dev_priv))
bb400da9 9598 dev_priv->display.init_clock_gating = bxt_init_clock_gating;
9fb5026f
ACO
9599 else if (IS_GEMINILAKE(dev_priv))
9600 dev_priv->display.init_clock_gating = glk_init_clock_gating;
bb400da9 9601 else if (IS_BROADWELL(dev_priv))
91200c09 9602 dev_priv->display.init_clock_gating = bdw_init_clock_gating;
bb400da9 9603 else if (IS_CHERRYVIEW(dev_priv))
91200c09 9604 dev_priv->display.init_clock_gating = chv_init_clock_gating;
bb400da9 9605 else if (IS_HASWELL(dev_priv))
91200c09 9606 dev_priv->display.init_clock_gating = hsw_init_clock_gating;
bb400da9 9607 else if (IS_IVYBRIDGE(dev_priv))
91200c09 9608 dev_priv->display.init_clock_gating = ivb_init_clock_gating;
bb400da9 9609 else if (IS_VALLEYVIEW(dev_priv))
91200c09 9610 dev_priv->display.init_clock_gating = vlv_init_clock_gating;
cf819eff 9611 else if (IS_GEN(dev_priv, 6))
bb400da9 9612 dev_priv->display.init_clock_gating = gen6_init_clock_gating;
cf819eff 9613 else if (IS_GEN(dev_priv, 5))
91200c09 9614 dev_priv->display.init_clock_gating = ilk_init_clock_gating;
bb400da9
ID
9615 else if (IS_G4X(dev_priv))
9616 dev_priv->display.init_clock_gating = g4x_init_clock_gating;
c0f86832 9617 else if (IS_I965GM(dev_priv))
91200c09 9618 dev_priv->display.init_clock_gating = i965gm_init_clock_gating;
c0f86832 9619 else if (IS_I965G(dev_priv))
91200c09 9620 dev_priv->display.init_clock_gating = i965g_init_clock_gating;
cf819eff 9621 else if (IS_GEN(dev_priv, 3))
bb400da9
ID
9622 dev_priv->display.init_clock_gating = gen3_init_clock_gating;
9623 else if (IS_I85X(dev_priv) || IS_I865G(dev_priv))
9624 dev_priv->display.init_clock_gating = i85x_init_clock_gating;
cf819eff 9625 else if (IS_GEN(dev_priv, 2))
bb400da9
ID
9626 dev_priv->display.init_clock_gating = i830_init_clock_gating;
9627 else {
9628 MISSING_CASE(INTEL_DEVID(dev_priv));
9629 dev_priv->display.init_clock_gating = nop_init_clock_gating;
9630 }
9631}
9632
1fa61106 9633/* Set up chip specific power management-related functions */
62d75df7 9634void intel_init_pm(struct drm_i915_private *dev_priv)
1fa61106 9635{
c921aba8 9636 /* For cxsr */
9b1e14f4 9637 if (IS_PINEVIEW(dev_priv))
148ac1f3 9638 i915_pineview_get_mem_freq(dev_priv);
cf819eff 9639 else if (IS_GEN(dev_priv, 5))
148ac1f3 9640 i915_ironlake_get_mem_freq(dev_priv);
c921aba8 9641
1fa61106 9642 /* For FIFO watermark updates */
62d75df7 9643 if (INTEL_GEN(dev_priv) >= 9) {
bb726519 9644 skl_setup_wm_latency(dev_priv);
e62929b3 9645 dev_priv->display.initial_watermarks = skl_initial_wm;
ccf010fb 9646 dev_priv->display.atomic_update_watermarks = skl_atomic_update_crtc_wm;
98d39494 9647 dev_priv->display.compute_global_watermarks = skl_compute_wm;
6e266956 9648 } else if (HAS_PCH_SPLIT(dev_priv)) {
bb726519 9649 ilk_setup_wm_latency(dev_priv);
53615a5e 9650
cf819eff 9651 if ((IS_GEN(dev_priv, 5) && dev_priv->wm.pri_latency[1] &&
bd602544 9652 dev_priv->wm.spr_latency[1] && dev_priv->wm.cur_latency[1]) ||
cf819eff 9653 (!IS_GEN(dev_priv, 5) && dev_priv->wm.pri_latency[0] &&
bd602544 9654 dev_priv->wm.spr_latency[0] && dev_priv->wm.cur_latency[0])) {
86c8bbbe 9655 dev_priv->display.compute_pipe_wm = ilk_compute_pipe_wm;
ed4a6a7c
MR
9656 dev_priv->display.compute_intermediate_wm =
9657 ilk_compute_intermediate_wm;
9658 dev_priv->display.initial_watermarks =
9659 ilk_initial_watermarks;
9660 dev_priv->display.optimize_watermarks =
9661 ilk_optimize_watermarks;
bd602544
VS
9662 } else {
9663 DRM_DEBUG_KMS("Failed to read display plane latency. "
9664 "Disable CxSR\n");
9665 }
6b6b3eef 9666 } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
bb726519 9667 vlv_setup_wm_latency(dev_priv);
ff32c54e 9668 dev_priv->display.compute_pipe_wm = vlv_compute_pipe_wm;
4841da51 9669 dev_priv->display.compute_intermediate_wm = vlv_compute_intermediate_wm;
ff32c54e 9670 dev_priv->display.initial_watermarks = vlv_initial_watermarks;
4841da51 9671 dev_priv->display.optimize_watermarks = vlv_optimize_watermarks;
ff32c54e 9672 dev_priv->display.atomic_update_watermarks = vlv_atomic_update_fifo;
04548cba
VS
9673 } else if (IS_G4X(dev_priv)) {
9674 g4x_setup_wm_latency(dev_priv);
9675 dev_priv->display.compute_pipe_wm = g4x_compute_pipe_wm;
9676 dev_priv->display.compute_intermediate_wm = g4x_compute_intermediate_wm;
9677 dev_priv->display.initial_watermarks = g4x_initial_watermarks;
9678 dev_priv->display.optimize_watermarks = g4x_optimize_watermarks;
9b1e14f4 9679 } else if (IS_PINEVIEW(dev_priv)) {
86d35d4e 9680 if (!intel_get_cxsr_latency(!IS_MOBILE(dev_priv),
1fa61106
ED
9681 dev_priv->is_ddr3,
9682 dev_priv->fsb_freq,
9683 dev_priv->mem_freq)) {
9684 DRM_INFO("failed to find known CxSR latency "
9685 "(found ddr%s fsb freq %d, mem freq %d), "
9686 "disabling CxSR\n",
9687 (dev_priv->is_ddr3 == 1) ? "3" : "2",
9688 dev_priv->fsb_freq, dev_priv->mem_freq);
9689 /* Disable CxSR and never update its watermark again */
5209b1f4 9690 intel_set_memory_cxsr(dev_priv, false);
1fa61106
ED
9691 dev_priv->display.update_wm = NULL;
9692 } else
9693 dev_priv->display.update_wm = pineview_update_wm;
cf819eff 9694 } else if (IS_GEN(dev_priv, 4)) {
1fa61106 9695 dev_priv->display.update_wm = i965_update_wm;
cf819eff 9696 } else if (IS_GEN(dev_priv, 3)) {
1fa61106
ED
9697 dev_priv->display.update_wm = i9xx_update_wm;
9698 dev_priv->display.get_fifo_size = i9xx_get_fifo_size;
cf819eff 9699 } else if (IS_GEN(dev_priv, 2)) {
62d75df7 9700 if (INTEL_INFO(dev_priv)->num_pipes == 1) {
feb56b93 9701 dev_priv->display.update_wm = i845_update_wm;
1fa61106 9702 dev_priv->display.get_fifo_size = i845_get_fifo_size;
feb56b93
DV
9703 } else {
9704 dev_priv->display.update_wm = i9xx_update_wm;
1fa61106 9705 dev_priv->display.get_fifo_size = i830_get_fifo_size;
feb56b93 9706 }
feb56b93
DV
9707 } else {
9708 DRM_ERROR("unexpected fall-through in intel_init_pm\n");
1fa61106
ED
9709 }
9710}
9711
dd06f88c
VS
9712static int byt_gpu_freq(struct drm_i915_private *dev_priv, int val)
9713{
562d9bae
SAK
9714 struct intel_rps *rps = &dev_priv->gt_pm.rps;
9715
c30fec65
VS
9716 /*
9717 * N = val - 0xb7
9718 * Slow = Fast = GPLL ref * N
9719 */
562d9bae 9720 return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * (val - 0xb7), 1000);
855ba3be
JB
9721}
9722
b55dd647 9723static int byt_freq_opcode(struct drm_i915_private *dev_priv, int val)
855ba3be 9724{
562d9bae
SAK
9725 struct intel_rps *rps = &dev_priv->gt_pm.rps;
9726
9727 return DIV_ROUND_CLOSEST(1000 * val, rps->gpll_ref_freq) + 0xb7;
855ba3be
JB
9728}
9729
b55dd647 9730static int chv_gpu_freq(struct drm_i915_private *dev_priv, int val)
22b1b2f8 9731{
562d9bae
SAK
9732 struct intel_rps *rps = &dev_priv->gt_pm.rps;
9733
c30fec65
VS
9734 /*
9735 * N = val / 2
9736 * CU (slow) = CU2x (fast) / 2 = GPLL ref * N / 2
9737 */
562d9bae 9738 return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * val, 2 * 2 * 1000);
22b1b2f8
D
9739}
9740
b55dd647 9741static int chv_freq_opcode(struct drm_i915_private *dev_priv, int val)
22b1b2f8 9742{
562d9bae
SAK
9743 struct intel_rps *rps = &dev_priv->gt_pm.rps;
9744
1c14762d 9745 /* CHV needs even values */
562d9bae 9746 return DIV_ROUND_CLOSEST(2 * 1000 * val, rps->gpll_ref_freq) * 2;
22b1b2f8
D
9747}
9748
616bc820 9749int intel_gpu_freq(struct drm_i915_private *dev_priv, int val)
22b1b2f8 9750{
35ceabf3 9751 if (INTEL_GEN(dev_priv) >= 9)
500a3d2e
MK
9752 return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER,
9753 GEN9_FREQ_SCALER);
2d1fe073 9754 else if (IS_CHERRYVIEW(dev_priv))
616bc820 9755 return chv_gpu_freq(dev_priv, val);
2d1fe073 9756 else if (IS_VALLEYVIEW(dev_priv))
616bc820
VS
9757 return byt_gpu_freq(dev_priv, val);
9758 else
9759 return val * GT_FREQUENCY_MULTIPLIER;
22b1b2f8
D
9760}
9761
616bc820
VS
9762int intel_freq_opcode(struct drm_i915_private *dev_priv, int val)
9763{
35ceabf3 9764 if (INTEL_GEN(dev_priv) >= 9)
500a3d2e
MK
9765 return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER,
9766 GT_FREQUENCY_MULTIPLIER);
2d1fe073 9767 else if (IS_CHERRYVIEW(dev_priv))
616bc820 9768 return chv_freq_opcode(dev_priv, val);
2d1fe073 9769 else if (IS_VALLEYVIEW(dev_priv))
616bc820
VS
9770 return byt_freq_opcode(dev_priv, val);
9771 else
500a3d2e 9772 return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER);
616bc820 9773}
22b1b2f8 9774
192aa181 9775void intel_pm_setup(struct drm_i915_private *dev_priv)
907b28c5 9776{
ebb5eb7d 9777 mutex_init(&dev_priv->gt_pm.rps.lock);
60548c55 9778 mutex_init(&dev_priv->gt_pm.rps.power.mutex);
f742a552 9779
562d9bae 9780 atomic_set(&dev_priv->gt_pm.rps.num_waiters, 0);
5d584b2e 9781
ad1443f0
SAK
9782 dev_priv->runtime_pm.suspended = false;
9783 atomic_set(&dev_priv->runtime_pm.wakeref_count, 0);
907b28c5 9784}
135bafa5 9785
47c21d9a
MK
9786static u64 vlv_residency_raw(struct drm_i915_private *dev_priv,
9787 const i915_reg_t reg)
9788{
facbecad 9789 u32 lower, upper, tmp;
71cc2b18 9790 int loop = 2;
47c21d9a 9791
817cc079
TU
9792 /*
9793 * The register accessed do not need forcewake. We borrow
47c21d9a
MK
9794 * uncore lock to prevent concurrent access to range reg.
9795 */
817cc079 9796 lockdep_assert_held(&dev_priv->uncore.lock);
47c21d9a 9797
817cc079
TU
9798 /*
9799 * vlv and chv residency counters are 40 bits in width.
47c21d9a
MK
9800 * With a control bit, we can choose between upper or lower
9801 * 32bit window into this counter.
facbecad
CW
9802 *
9803 * Although we always use the counter in high-range mode elsewhere,
9804 * userspace may attempt to read the value before rc6 is initialised,
9805 * before we have set the default VLV_COUNTER_CONTROL value. So always
9806 * set the high bit to be safe.
47c21d9a 9807 */
facbecad
CW
9808 I915_WRITE_FW(VLV_COUNTER_CONTROL,
9809 _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH));
47c21d9a
MK
9810 upper = I915_READ_FW(reg);
9811 do {
9812 tmp = upper;
9813
9814 I915_WRITE_FW(VLV_COUNTER_CONTROL,
9815 _MASKED_BIT_DISABLE(VLV_COUNT_RANGE_HIGH));
9816 lower = I915_READ_FW(reg);
9817
9818 I915_WRITE_FW(VLV_COUNTER_CONTROL,
9819 _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH));
9820 upper = I915_READ_FW(reg);
71cc2b18 9821 } while (upper != tmp && --loop);
47c21d9a 9822
817cc079
TU
9823 /*
9824 * Everywhere else we always use VLV_COUNTER_CONTROL with the
facbecad
CW
9825 * VLV_COUNT_RANGE_HIGH bit set - so it is safe to leave it set
9826 * now.
9827 */
9828
47c21d9a
MK
9829 return lower | (u64)upper << 8;
9830}
9831
36cc8b96 9832u64 intel_rc6_residency_ns(struct drm_i915_private *dev_priv,
c5a0ad11 9833 const i915_reg_t reg)
135bafa5 9834{
4319382e 9835 struct intel_uncore *uncore = &dev_priv->uncore;
817cc079
TU
9836 u64 time_hw, prev_hw, overflow_hw;
9837 unsigned int fw_domains;
9838 unsigned long flags;
9839 unsigned int i;
36cc8b96 9840 u32 mul, div;
135bafa5 9841
fb6db0f5 9842 if (!HAS_RC6(dev_priv))
135bafa5
MK
9843 return 0;
9844
817cc079
TU
9845 /*
9846 * Store previous hw counter values for counter wrap-around handling.
9847 *
9848 * There are only four interesting registers and they live next to each
9849 * other so we can use the relative address, compared to the smallest
9850 * one as the index into driver storage.
9851 */
9852 i = (i915_mmio_reg_offset(reg) -
9853 i915_mmio_reg_offset(GEN6_GT_GFX_RC6_LOCKED)) / sizeof(u32);
9854 if (WARN_ON_ONCE(i >= ARRAY_SIZE(dev_priv->gt_pm.rc6.cur_residency)))
9855 return 0;
9856
4319382e 9857 fw_domains = intel_uncore_forcewake_for_reg(uncore, reg, FW_REG_READ);
817cc079 9858
4319382e
DCS
9859 spin_lock_irqsave(&uncore->lock, flags);
9860 intel_uncore_forcewake_get__locked(uncore, fw_domains);
817cc079 9861
135bafa5
MK
9862 /* On VLV and CHV, residency time is in CZ units rather than 1.28us */
9863 if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
36cc8b96 9864 mul = 1000000;
135bafa5 9865 div = dev_priv->czclk_freq;
817cc079 9866 overflow_hw = BIT_ULL(40);
47c21d9a 9867 time_hw = vlv_residency_raw(dev_priv, reg);
47c21d9a 9868 } else {
36cc8b96
TU
9869 /* 833.33ns units on Gen9LP, 1.28us elsewhere. */
9870 if (IS_GEN9_LP(dev_priv)) {
9871 mul = 10000;
9872 div = 12;
9873 } else {
9874 mul = 1280;
9875 div = 1;
9876 }
47c21d9a 9877
817cc079 9878 overflow_hw = BIT_ULL(32);
4319382e 9879 time_hw = intel_uncore_read_fw(uncore, reg);
47c21d9a 9880 }
135bafa5 9881
817cc079
TU
9882 /*
9883 * Counter wrap handling.
9884 *
9885 * But relying on a sufficient frequency of queries otherwise counters
9886 * can still wrap.
9887 */
9888 prev_hw = dev_priv->gt_pm.rc6.prev_hw_residency[i];
9889 dev_priv->gt_pm.rc6.prev_hw_residency[i] = time_hw;
9890
9891 /* RC6 delta from last sample. */
9892 if (time_hw >= prev_hw)
9893 time_hw -= prev_hw;
9894 else
9895 time_hw += overflow_hw - prev_hw;
9896
9897 /* Add delta to RC6 extended raw driver copy. */
9898 time_hw += dev_priv->gt_pm.rc6.cur_residency[i];
9899 dev_priv->gt_pm.rc6.cur_residency[i] = time_hw;
9900
4319382e
DCS
9901 intel_uncore_forcewake_put__locked(uncore, fw_domains);
9902 spin_unlock_irqrestore(&uncore->lock, flags);
817cc079
TU
9903
9904 return mul_u64_u32_div(time_hw, mul, div);
135bafa5 9905}
c84b2705 9906
ecbb5fb7
JN
9907u64 intel_rc6_residency_us(struct drm_i915_private *dev_priv,
9908 i915_reg_t reg)
9909{
9910 return DIV_ROUND_UP_ULL(intel_rc6_residency_ns(dev_priv, reg), 1000);
9911}
9912
c84b2705
TU
9913u32 intel_get_cagf(struct drm_i915_private *dev_priv, u32 rpstat)
9914{
9915 u32 cagf;
9916
9917 if (INTEL_GEN(dev_priv) >= 9)
9918 cagf = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT;
9919 else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
9920 cagf = (rpstat & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT;
9921 else
9922 cagf = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT;
9923
9924 return cagf;
9925}