From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 12 Apr 2017 13:01:50 +0000 (+0200)
Subject: 4.4-stable patches
X-Git-Tag: v4.10.11~21
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=42c43af1d53bbbf9288ddf022b768061fefb31ad;p=thirdparty%2Fkernel%2Fstable-queue.git

4.4-stable patches

added patches:
	drm-i915-stop-using-rp_down_ei-on-baytrail.patch
---

diff --git a/queue-4.4/drm-i915-stop-using-rp_down_ei-on-baytrail.patch b/queue-4.4/drm-i915-stop-using-rp_down_ei-on-baytrail.patch
new file mode 100644
index 00000000000..03d1e129a16
--- /dev/null
+++ b/queue-4.4/drm-i915-stop-using-rp_down_ei-on-baytrail.patch
@@ -0,0 +1,177 @@
+From 8f68d591d4765b2e1ce9d916ac7bc5583285c4ad Mon Sep 17 00:00:00 2001
+From: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Mon, 13 Mar 2017 17:06:17 +0000
+Subject: drm/i915: Stop using RP_DOWN_EI on Baytrail
+
+From: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 8f68d591d4765b2e1ce9d916ac7bc5583285c4ad upstream.
+
+On Baytrail, we manually calculate busyness over the evaluation interval
+to avoid issues with miscaluations with RC6 enabled. However, it turns
+out that the DOWN_EI interrupt generator is completely bust - it
+operates in two modes, continuous or never. Neither of which are
+conducive to good behaviour. Stop unmask the DOWN_EI interrupt and just
+compute everything from the UP_EI which does seem to correspond to the
+desired interval.
+
+v2: Fixup gen6_rps_pm_mask() as well
+v3: Inline vlv_c0_above() to combine the now identical elapsed
+calculation for up/down and simplify the threshold testing
+
+Fixes: 43cf3bf084ba ("drm/i915: Improved w/a for rps on Baytrail")
+Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
+Link: http://patchwork.freedesktop.org/patch/msgid/20170309211232.28878-1-chris@chris-wilson.co.uk
+Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
+Link: http://patchwork.freedesktop.org/patch/msgid/20170313170617.31564-1-chris@chris-wilson.co.uk
+(cherry picked from commit e0e8c7cb6eb68e9256de2d8cbeb481d3701c05ac)
+Signed-off-by: Jani Nikula <jani.nikula@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/gpu/drm/i915/i915_drv.h |    2 -
+ drivers/gpu/drm/i915/i915_irq.c |   73 +++++++++++++++-------------------------
+ drivers/gpu/drm/i915/intel_pm.c |    5 +-
+ 3 files changed, 32 insertions(+), 48 deletions(-)
+
+--- a/drivers/gpu/drm/i915/i915_drv.h
++++ b/drivers/gpu/drm/i915/i915_drv.h
+@@ -1159,7 +1159,7 @@ struct intel_gen6_power_mgmt {
+ 	struct intel_rps_client semaphores, mmioflips;
+ 
+ 	/* manual wa residency calculations */
+-	struct intel_rps_ei up_ei, down_ei;
++	struct intel_rps_ei ei;
+ 
+ 	/*
+ 	 * Protects RPS/RC6 register access and PCU communication.
+--- a/drivers/gpu/drm/i915/i915_irq.c
++++ b/drivers/gpu/drm/i915/i915_irq.c
+@@ -994,68 +994,51 @@ static void vlv_c0_read(struct drm_i915_
+ 	ei->media_c0 = I915_READ(VLV_MEDIA_C0_COUNT);
+ }
+ 
+-static bool vlv_c0_above(struct drm_i915_private *dev_priv,
+-			 const struct intel_rps_ei *old,
+-			 const struct intel_rps_ei *now,
+-			 int threshold)
+-{
+-	u64 time, c0;
+-	unsigned int mul = 100;
+-
+-	if (old->cz_clock == 0)
+-		return false;
+-
+-	if (I915_READ(VLV_COUNTER_CONTROL) & VLV_COUNT_RANGE_HIGH)
+-		mul <<= 8;
+-
+-	time = now->cz_clock - old->cz_clock;
+-	time *= threshold * dev_priv->czclk_freq;
+-
+-	/* Workload can be split between render + media, e.g. SwapBuffers
+-	 * being blitted in X after being rendered in mesa. To account for
+-	 * this we need to combine both engines into our activity counter.
+-	 */
+-	c0 = now->render_c0 - old->render_c0;
+-	c0 += now->media_c0 - old->media_c0;
+-	c0 *= mul * VLV_CZ_CLOCK_TO_MILLI_SEC;
+-
+-	return c0 >= time;
+-}
+-
+ void gen6_rps_reset_ei(struct drm_i915_private *dev_priv)
+ {
+-	vlv_c0_read(dev_priv, &dev_priv->rps.down_ei);
+-	dev_priv->rps.up_ei = dev_priv->rps.down_ei;
++	memset(&dev_priv->rps.ei, 0, sizeof(dev_priv->rps.ei));
+ }
+ 
+ static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
+ {
++	const struct intel_rps_ei *prev = &dev_priv->rps.ei;
+ 	struct intel_rps_ei now;
+ 	u32 events = 0;
+ 
+-	if ((pm_iir & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED)) == 0)
++	if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0)
+ 		return 0;
+ 
+ 	vlv_c0_read(dev_priv, &now);
+ 	if (now.cz_clock == 0)
+ 		return 0;
+ 
+-	if (pm_iir & GEN6_PM_RP_DOWN_EI_EXPIRED) {
+-		if (!vlv_c0_above(dev_priv,
+-				  &dev_priv->rps.down_ei, &now,
+-				  dev_priv->rps.down_threshold))
+-			events |= GEN6_PM_RP_DOWN_THRESHOLD;
+-		dev_priv->rps.down_ei = now;
+-	}
++	if (prev->cz_clock) {
++		u64 time, c0;
++		unsigned int mul;
++
++		mul = VLV_CZ_CLOCK_TO_MILLI_SEC * 100; /* scale to threshold% */
++		if (I915_READ(VLV_COUNTER_CONTROL) & VLV_COUNT_RANGE_HIGH)
++			mul <<= 8;
++
++		time = now.cz_clock - prev->cz_clock;
++		time *= dev_priv->czclk_freq;
++
++		/* Workload can be split between render + media,
++		 * e.g. SwapBuffers being blitted in X after being rendered in
++		 * mesa. To account for this we need to combine both engines
++		 * into our activity counter.
++		 */
++		c0 = now.render_c0 - prev->render_c0;
++		c0 += now.media_c0 - prev->media_c0;
++		c0 *= mul;
+ 
+-	if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
+-		if (vlv_c0_above(dev_priv,
+-				 &dev_priv->rps.up_ei, &now,
+-				 dev_priv->rps.up_threshold))
+-			events |= GEN6_PM_RP_UP_THRESHOLD;
+-		dev_priv->rps.up_ei = now;
++		if (c0 > time * dev_priv->rps.up_threshold)
++			events = GEN6_PM_RP_UP_THRESHOLD;
++		else if (c0 < time * dev_priv->rps.down_threshold)
++			events = GEN6_PM_RP_DOWN_THRESHOLD;
+ 	}
+ 
++	dev_priv->rps.ei = now;
+ 	return events;
+ }
+ 
+@@ -4390,7 +4373,7 @@ void intel_irq_init(struct drm_i915_priv
+ 	/* Let's track the enabled rps events */
+ 	if (IS_VALLEYVIEW(dev_priv) && !IS_CHERRYVIEW(dev_priv))
+ 		/* WaGsvRC0ResidencyMethod:vlv */
+-		dev_priv->pm_rps_events = GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED;
++		dev_priv->pm_rps_events = GEN6_PM_RP_UP_EI_EXPIRED;
+ 	else
+ 		dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
+ 
+--- a/drivers/gpu/drm/i915/intel_pm.c
++++ b/drivers/gpu/drm/i915/intel_pm.c
+@@ -4411,8 +4411,9 @@ static u32 gen6_rps_pm_mask(struct drm_i
+ {
+ 	u32 mask = 0;
+ 
++	/* We use UP_EI_EXPIRED interupts for both up/down in manual mode */
+ 	if (val > dev_priv->rps.min_freq_softlimit)
+-		mask |= GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT;
++		mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT;
+ 	if (val < dev_priv->rps.max_freq_softlimit)
+ 		mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD;
+ 
+@@ -4516,7 +4517,7 @@ void gen6_rps_busy(struct drm_i915_priva
+ {
+ 	mutex_lock(&dev_priv->rps.hw_lock);
+ 	if (dev_priv->rps.enabled) {
+-		if (dev_priv->pm_rps_events & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED))
++		if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED)
+ 			gen6_rps_reset_ei(dev_priv);
+ 		I915_WRITE(GEN6_PMINTRMSK,
+ 			   gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq));
diff --git a/queue-4.4/series b/queue-4.4/series
index 920f89c6a48..a37dc3f8f8d 100644
--- a/queue-4.4/series
+++ b/queue-4.4/series
@@ -1 +1,2 @@
 drm-i915-avoid-tweaking-evaluation-thresholds-on-baytrail-v3.patch
+drm-i915-stop-using-rp_down_ei-on-baytrail.patch