From 70ca414094d082163ddcd1e40d998d6d6f4b3473 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 31 Jan 2017 06:14:32 +0100 Subject: [PATCH] 4.9-stable patches added patches: drm-i915-remove-wadisablelsqcroperfforocl-kbl-workaround.patch --- ...sablelsqcroperfforocl-kbl-workaround.patch | 93 +++++++++++++++++++ queue-4.9/series | 1 + 2 files changed, 94 insertions(+) create mode 100644 queue-4.9/drm-i915-remove-wadisablelsqcroperfforocl-kbl-workaround.patch diff --git a/queue-4.9/drm-i915-remove-wadisablelsqcroperfforocl-kbl-workaround.patch b/queue-4.9/drm-i915-remove-wadisablelsqcroperfforocl-kbl-workaround.patch new file mode 100644 index 00000000000..bcd6f862a2a --- /dev/null +++ b/queue-4.9/drm-i915-remove-wadisablelsqcroperfforocl-kbl-workaround.patch @@ -0,0 +1,93 @@ +From 4fc020d864647ea3ae8cb8f17d63e48e87ebd0bf Mon Sep 17 00:00:00 2001 +From: Francisco Jerez +Date: Thu, 12 Jan 2017 12:44:54 +0200 +Subject: drm/i915: Remove WaDisableLSQCROPERFforOCL KBL workaround. + +From: Francisco Jerez + +commit 4fc020d864647ea3ae8cb8f17d63e48e87ebd0bf upstream. + +The WaDisableLSQCROPERFforOCL workaround has the side effect of +disabling an L3SQ optimization that has huge performance implications +and is unlikely to be necessary for the correct functioning of usual +graphic workloads. Userspace is free to re-enable the workaround on +demand, and is generally in a better position to determine whether the +workaround is necessary than the DRM is (e.g. only during the +execution of compute kernels that rely on both L3 fences and HDC R/W +requests). + +The same workaround seems to apply to BDW (at least to production +stepping G1) and SKL as well (the internal workaround database claims +that it does for all steppings, while the BSpec workaround table only +mentions pre-production steppings), but the DRM doesn't do anything +beyond whitelisting the L3SQCREG4 register so userspace can enable it +when it sees fit. Do the same on KBL platforms. + +Improves performance of the GFXBench4 gl_manhattan31 benchmark by 60%, +and gl_4 (AKA car chase) by 14% on a KBL GT2 running Mesa master -- +This is followed by a regression of 35% and 10% respectively for the +same benchmarks and platform caused by my recent patch series +switching userspace to use the dataport constant cache instead of the +sampler to implement uniform pull constant loads, which caused us to +hit more heavily the L3 cache (and on platforms other than KBL had the +opposite effect of improving performance of the same two benchmarks). +The overall effect on KBL of this change combined with the recent +userspace change is respectively 4.6% and 2.6%. SynMark2 OglShMapPcf +was affected by the constant cache changes (though it improved as it +did on other platforms rather than regressing), but is not +significantly affected by this patch (with statistical significance of +5% and sample size 20). + +v2: Drop some more code to avoid unused variable warning. + +Fixes: 738fa1b3123f ("drm/i915/kbl: Add WaDisableLSQCROPERFforOCL") +Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99256 +Signed-off-by: Francisco Jerez +Cc: Matthew Auld +Cc: Eero Tamminen +Cc: Jani Nikula +Cc: Mika Kuoppala +Cc: beignet@lists.freedesktop.org +Reviewed-by: Mika Kuoppala +[Removed double Fixes tag] +Signed-off-by: Mika Kuoppala +Link: http://patchwork.freedesktop.org/patch/msgid/1484217894-20505-1-git-send-email-mika.kuoppala@intel.com +(cherry picked from commit 8726f2faa371514fba2f594d799db95203dfeee0) +Signed-off-by: Jani Nikula +[ Francisco Jerez: Rebase on v4.9 branch. ] +Signed-off-by: Francisco Jerez +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/i915/intel_lrc.c | 3 +-- + drivers/gpu/drm/i915/intel_ringbuffer.c | 8 -------- + 2 files changed, 1 insertion(+), 10 deletions(-) + +--- a/drivers/gpu/drm/i915/intel_lrc.c ++++ b/drivers/gpu/drm/i915/intel_lrc.c +@@ -858,8 +858,7 @@ static inline int gen8_emit_flush_cohere + * this batch updates GEN8_L3SQCREG4 with default value we need to + * set this bit here to retain the WA during flush. + */ +- if (IS_SKL_REVID(dev_priv, 0, SKL_REVID_E0) || +- IS_KBL_REVID(dev_priv, 0, KBL_REVID_E0)) ++ if (IS_SKL_REVID(dev_priv, 0, SKL_REVID_E0)) + l3sqc4_flush |= GEN8_LQSC_RO_PERF_DIS; + + wa_ctx_emit(batch, index, (MI_STORE_REGISTER_MEM_GEN8 | +--- a/drivers/gpu/drm/i915/intel_ringbuffer.c ++++ b/drivers/gpu/drm/i915/intel_ringbuffer.c +@@ -1153,14 +1153,6 @@ static int kbl_init_workarounds(struct i + WA_SET_BIT_MASKED(HDC_CHICKEN0, + HDC_FENCE_DEST_SLM_DISABLE); + +- /* GEN8_L3SQCREG4 has a dependency with WA batch so any new changes +- * involving this register should also be added to WA batch as required. +- */ +- if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_E0)) +- /* WaDisableLSQCROPERFforOCL:kbl */ +- I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) | +- GEN8_LQSC_RO_PERF_DIS); +- + /* WaToEnableHwFixForPushConstHWBug:kbl */ + if (IS_KBL_REVID(dev_priv, KBL_REVID_C0, REVID_FOREVER)) + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, diff --git a/queue-4.9/series b/queue-4.9/series index 1dbdde1170d..7d73f7f86b5 100644 --- a/queue-4.9/series +++ b/queue-4.9/series @@ -63,3 +63,4 @@ platform-x86-mlx-platform-free-first-dev-on-error.patch platform-x86-intel_mid_powerbtn-set-irq_oneshot.patch mm-memcg-do-not-retry-precharge-charges.patch perf-core-fix-concurrent-sys_perf_event_open-vs.-move_group-race.patch +drm-i915-remove-wadisablelsqcroperfforocl-kbl-workaround.patch -- 2.47.3