From d755fab9d6ea29584660dd2459d62bf5071f1e81 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 23 Jun 2020 09:56:41 +0200 Subject: [PATCH] 5.7-stable patches added patches: drm-amd-display-use-kvfree-to-free-coeff-in-build_regamma.patch drm-amdkfd-use-correct-major-in-devcgroup-check.patch drm-connector-notify-userspace-on-hotplug-after-register-complete.patch drm-i915-fix-aux-power-domain-toggling-across-typec-mode-resets.patch drm-i915-gem-avoid-iterating-an-empty-list.patch drm-i915-gt-incrementally-check-for-rewinding.patch drm-i915-gt-move-gen4-gt-workarounds-from-init_clock_gating-to-workarounds.patch drm-i915-gt-move-hsw-gt-workarounds-from-init_clock_gating-to-workarounds.patch drm-i915-gt-move-ilk-gt-workarounds-from-init_clock_gating-to-workarounds.patch drm-i915-gt-move-ivb-gt-workarounds-from-init_clock_gating-to-workarounds.patch drm-i915-gt-move-snb-gt-workarounds-from-init_clock_gating-to-workarounds.patch drm-i915-gt-move-vlv-gt-workarounds-from-init_clock_gating-to-workarounds.patch drm-i915-icl-fix-hotplug-interrupt-disabling-after-storm-detection.patch drm-i915-tc-fix-the-reset-of-ln0.patch drm-i915-whitelist-context-local-timestamp-in-the-gen9-cmdparser.patch drm-msm-check-for-powered-down-hw-in-the-devfreq-callbacks.patch s390-fix-syscall_get_error-for-compat-processes.patch --- ...vfree-to-free-coeff-in-build_regamma.patch | 33 ++ ...use-correct-major-in-devcgroup-check.patch | 48 +++ ...e-on-hotplug-after-register-complete.patch | 59 ++++ ...in-toggling-across-typec-mode-resets.patch | 52 +++ ...15-gem-avoid-iterating-an-empty-list.patch | 77 +++++ ...gt-incrementally-check-for-rewinding.patch | 297 ++++++++++++++++++ ...rom-init_clock_gating-to-workarounds.patch | 111 +++++++ ...rom-init_clock_gating-to-workarounds.patch | 152 +++++++++ ...rom-init_clock_gating-to-workarounds.patch | 75 +++++ ...rom-init_clock_gating-to-workarounds.patch | 192 +++++++++++ ...rom-init_clock_gating-to-workarounds.patch | 132 ++++++++ ...rom-init_clock_gating-to-workarounds.patch | 197 ++++++++++++ ...rupt-disabling-after-storm-detection.patch | 41 +++ .../drm-i915-tc-fix-the-reset-of-ln0.patch | 39 +++ ...ocal-timestamp-in-the-gen9-cmdparser.patch | 46 +++ ...red-down-hw-in-the-devfreq-callbacks.patch | 108 +++++++ ...scall_get_error-for-compat-processes.patch | 54 ++++ queue-5.7/series | 17 + 18 files changed, 1730 insertions(+) create mode 100644 queue-5.7/drm-amd-display-use-kvfree-to-free-coeff-in-build_regamma.patch create mode 100644 queue-5.7/drm-amdkfd-use-correct-major-in-devcgroup-check.patch create mode 100644 queue-5.7/drm-connector-notify-userspace-on-hotplug-after-register-complete.patch create mode 100644 queue-5.7/drm-i915-fix-aux-power-domain-toggling-across-typec-mode-resets.patch create mode 100644 queue-5.7/drm-i915-gem-avoid-iterating-an-empty-list.patch create mode 100644 queue-5.7/drm-i915-gt-incrementally-check-for-rewinding.patch create mode 100644 queue-5.7/drm-i915-gt-move-gen4-gt-workarounds-from-init_clock_gating-to-workarounds.patch create mode 100644 queue-5.7/drm-i915-gt-move-hsw-gt-workarounds-from-init_clock_gating-to-workarounds.patch create mode 100644 queue-5.7/drm-i915-gt-move-ilk-gt-workarounds-from-init_clock_gating-to-workarounds.patch create mode 100644 queue-5.7/drm-i915-gt-move-ivb-gt-workarounds-from-init_clock_gating-to-workarounds.patch create mode 100644 queue-5.7/drm-i915-gt-move-snb-gt-workarounds-from-init_clock_gating-to-workarounds.patch create mode 100644 queue-5.7/drm-i915-gt-move-vlv-gt-workarounds-from-init_clock_gating-to-workarounds.patch create mode 100644 queue-5.7/drm-i915-icl-fix-hotplug-interrupt-disabling-after-storm-detection.patch create mode 100644 queue-5.7/drm-i915-tc-fix-the-reset-of-ln0.patch create mode 100644 queue-5.7/drm-i915-whitelist-context-local-timestamp-in-the-gen9-cmdparser.patch create mode 100644 queue-5.7/drm-msm-check-for-powered-down-hw-in-the-devfreq-callbacks.patch create mode 100644 queue-5.7/s390-fix-syscall_get_error-for-compat-processes.patch diff --git a/queue-5.7/drm-amd-display-use-kvfree-to-free-coeff-in-build_regamma.patch b/queue-5.7/drm-amd-display-use-kvfree-to-free-coeff-in-build_regamma.patch new file mode 100644 index 00000000000..5a44eb07e17 --- /dev/null +++ b/queue-5.7/drm-amd-display-use-kvfree-to-free-coeff-in-build_regamma.patch @@ -0,0 +1,33 @@ +From 81921a828b94ce2816932c19a5ec74d302972833 Mon Sep 17 00:00:00 2001 +From: Denis Efremov +Date: Fri, 5 Jun 2020 20:37:43 +0300 +Subject: drm/amd/display: Use kvfree() to free coeff in build_regamma() + +From: Denis Efremov + +commit 81921a828b94ce2816932c19a5ec74d302972833 upstream. + +Use kvfree() instead of kfree() to free coeff in build_regamma() +because the memory is allocated with kvzalloc(). + +Fixes: e752058b8671 ("drm/amd/display: Optimize gamma calculations") +Cc: stable@vger.kernel.org +Signed-off-by: Denis Efremov +Signed-off-by: Alex Deucher +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpu/drm/amd/display/modules/color/color_gamma.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c ++++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c +@@ -843,7 +843,7 @@ static bool build_regamma(struct pwl_flo + pow_buffer_ptr = -1; // reset back to no optimize + ret = true; + release: +- kfree(coeff); ++ kvfree(coeff); + return ret; + } + diff --git a/queue-5.7/drm-amdkfd-use-correct-major-in-devcgroup-check.patch b/queue-5.7/drm-amdkfd-use-correct-major-in-devcgroup-check.patch new file mode 100644 index 00000000000..5bf74079ba0 --- /dev/null +++ b/queue-5.7/drm-amdkfd-use-correct-major-in-devcgroup-check.patch @@ -0,0 +1,48 @@ +From 99c7b309472787026ce52fd2bc5d00630567a872 Mon Sep 17 00:00:00 2001 +From: Lorenz Brun +Date: Thu, 11 Jun 2020 22:11:21 +0200 +Subject: drm/amdkfd: Use correct major in devcgroup check + +From: Lorenz Brun + +commit 99c7b309472787026ce52fd2bc5d00630567a872 upstream. + +The existing code used the major version number of the DRM driver +instead of the device major number of the DRM subsystem for +validating access for a devices cgroup. + +This meant that accesses allowed by the devices cgroup weren't +permitted and certain accesses denied by the devices cgroup were +permitted (if they matched the wrong major device number). + +Signed-off-by: Lorenz Brun +Fixes: 6b855f7b83d2f ("drm/amdkfd: Check against device cgroup") +Reviewed-off-by: Felix Kuehling +Signed-off-by: Felix Kuehling +Signed-off-by: Alex Deucher +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +@@ -40,6 +40,7 @@ + #include + #include + #include ++#include + #include + #include + +@@ -1053,7 +1054,7 @@ static inline int kfd_devcgroup_check_pe + #if defined(CONFIG_CGROUP_DEVICE) || defined(CONFIG_CGROUP_BPF) + struct drm_device *ddev = kfd->ddev; + +- return devcgroup_check_permission(DEVCG_DEV_CHAR, ddev->driver->major, ++ return devcgroup_check_permission(DEVCG_DEV_CHAR, DRM_MAJOR, + ddev->render->index, + DEVCG_ACC_WRITE | DEVCG_ACC_READ); + #else diff --git a/queue-5.7/drm-connector-notify-userspace-on-hotplug-after-register-complete.patch b/queue-5.7/drm-connector-notify-userspace-on-hotplug-after-register-complete.patch new file mode 100644 index 00000000000..fb4e3287312 --- /dev/null +++ b/queue-5.7/drm-connector-notify-userspace-on-hotplug-after-register-complete.patch @@ -0,0 +1,59 @@ +From 968d81a64a883af2d16dd3f8a6ad6b67db2fde58 Mon Sep 17 00:00:00 2001 +From: Jeykumar Sankaran +Date: Tue, 2 Jun 2020 20:37:31 -0700 +Subject: drm/connector: notify userspace on hotplug after register complete + +From: Jeykumar Sankaran + +commit 968d81a64a883af2d16dd3f8a6ad6b67db2fde58 upstream. + +drm connector notifies userspace on hotplug event prematurely before +late_register and mode_object register completes. This leads to a race +between userspace and kernel on updating the IDR list. So, move the +notification to end of connector register. + +Signed-off-by: Jeykumar Sankaran +Signed-off-by: Steve Cohen +Cc: stable@vger.kernel.org +Signed-off-by: Daniel Vetter +Link: https://patchwork.freedesktop.org/patch/msgid/1591155451-10393-1-git-send-email-jsanka@codeaurora.org +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpu/drm/drm_connector.c | 5 +++++ + drivers/gpu/drm/drm_sysfs.c | 3 --- + 2 files changed, 5 insertions(+), 3 deletions(-) + +--- a/drivers/gpu/drm/drm_connector.c ++++ b/drivers/gpu/drm/drm_connector.c +@@ -27,6 +27,7 @@ + #include + #include + #include ++#include + + #include + +@@ -523,6 +524,10 @@ int drm_connector_register(struct drm_co + drm_mode_object_register(connector->dev, &connector->base); + + connector->registration_state = DRM_CONNECTOR_REGISTERED; ++ ++ /* Let userspace know we have a new connector */ ++ drm_sysfs_hotplug_event(connector->dev); ++ + goto unlock; + + err_debugfs: +--- a/drivers/gpu/drm/drm_sysfs.c ++++ b/drivers/gpu/drm/drm_sysfs.c +@@ -291,9 +291,6 @@ int drm_sysfs_connector_add(struct drm_c + return PTR_ERR(connector->kdev); + } + +- /* Let userspace know we have a new connector */ +- drm_sysfs_hotplug_event(dev); +- + if (connector->ddc) + return sysfs_create_link(&connector->kdev->kobj, + &connector->ddc->dev.kobj, "ddc"); diff --git a/queue-5.7/drm-i915-fix-aux-power-domain-toggling-across-typec-mode-resets.patch b/queue-5.7/drm-i915-fix-aux-power-domain-toggling-across-typec-mode-resets.patch new file mode 100644 index 00000000000..415a12675df --- /dev/null +++ b/queue-5.7/drm-i915-fix-aux-power-domain-toggling-across-typec-mode-resets.patch @@ -0,0 +1,52 @@ +From d96536f0fe699729a0974eb5b65eb0d87cc747e1 Mon Sep 17 00:00:00 2001 +From: Imre Deak +Date: Thu, 14 May 2020 23:45:53 +0300 +Subject: drm/i915: Fix AUX power domain toggling across TypeC mode resets +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Imre Deak + +commit d96536f0fe699729a0974eb5b65eb0d87cc747e1 upstream. + +Make sure to select the port's AUX power domain while holding the TC +port lock. The domain depends on the port's current TC mode, which may +get changed under us if we're not holding the lock. + +This was left out from +commit 8c10e2262663 ("drm/i915: Keep the TypeC port mode fixed for detect/AUX transfers") + +Cc: # v5.4+ +Signed-off-by: Imre Deak +Reviewed-by: José Roberto de Souza +Link: https://patchwork.freedesktop.org/patch/msgid/20200514204553.27193-1-imre.deak@intel.com +(cherry picked from commit ae9b6cfe1352da25931bce3ea4acfd4dc1ac8a85) +Signed-off-by: Joonas Lahtinen +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpu/drm/i915/display/intel_dp.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/drivers/gpu/drm/i915/display/intel_dp.c ++++ b/drivers/gpu/drm/i915/display/intel_dp.c +@@ -1343,8 +1343,7 @@ intel_dp_aux_xfer(struct intel_dp *intel + bool is_tc_port = intel_phy_is_tc(i915, phy); + i915_reg_t ch_ctl, ch_data[5]; + u32 aux_clock_divider; +- enum intel_display_power_domain aux_domain = +- intel_aux_power_domain(intel_dig_port); ++ enum intel_display_power_domain aux_domain; + intel_wakeref_t aux_wakeref; + intel_wakeref_t pps_wakeref; + int i, ret, recv_bytes; +@@ -1359,6 +1358,8 @@ intel_dp_aux_xfer(struct intel_dp *intel + if (is_tc_port) + intel_tc_port_lock(intel_dig_port); + ++ aux_domain = intel_aux_power_domain(intel_dig_port); ++ + aux_wakeref = intel_display_power_get(i915, aux_domain); + pps_wakeref = pps_lock(intel_dp); + diff --git a/queue-5.7/drm-i915-gem-avoid-iterating-an-empty-list.patch b/queue-5.7/drm-i915-gem-avoid-iterating-an-empty-list.patch new file mode 100644 index 00000000000..b25731440eb --- /dev/null +++ b/queue-5.7/drm-i915-gem-avoid-iterating-an-empty-list.patch @@ -0,0 +1,77 @@ +From 757a9395f33c51c4e6eff2c7c0fbd50226a58224 Mon Sep 17 00:00:00 2001 +From: Chris Wilson +Date: Fri, 22 May 2020 14:27:06 +0100 +Subject: drm/i915/gem: Avoid iterating an empty list + +From: Chris Wilson + +commit 757a9395f33c51c4e6eff2c7c0fbd50226a58224 upstream. + +Our __sgt_iter assumes that the scattergather list has at least one +element. But during construction we may fail in allocating the first +page, and so mark the first element as the terminator. This is +unexpected! + +[22555.524752] RIP: 0010:shmem_get_pages+0x506/0x710 [i915] +[22555.524759] Code: 49 8b 2c 24 31 c0 66 89 44 24 40 48 85 ed 0f 84 62 01 00 00 4c 8b 75 00 8b 5d 08 44 8b 7d 0c 48 8b 0d 7e 34 07 e2 49 83 e6 fc <49> 8b 16 41 01 df 48 89 cf 48 89 d0 48 c1 e8 2d 48 85 c9 0f 84 c8 +[22555.524765] RSP: 0018:ffffc9000053f9d0 EFLAGS: 00010246 +[22555.524770] RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffff8881ffffa000 +[22555.524774] RDX: fffffffffffffff4 RSI: ffffffffffffffff RDI: ffffffff821efe00 +[22555.524778] RBP: ffff8881b099ab00 R08: 0000000000000000 R09: 00000000fffffff4 +[22555.524782] R10: 0000000000000002 R11: 00000000ffec0a02 R12: ffff8881cd3c8d60 +[22555.524786] R13: 00000000fffffff4 R14: 0000000000000000 R15: 0000000000000000 +[22555.524790] FS: 00007f4fbeb9b9c0(0000) GS:ffff8881f8580000(0000) knlGS:0000000000000000 +[22555.524795] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[22555.524799] CR2: 0000000000000000 CR3: 00000001ec7f0004 CR4: 00000000001606e0 +[22555.524803] Call Trace: +[22555.524919] __i915_gem_object_get_pages+0x4f/0x60 [i915] + +Fixes: 85d1225ec066 ("drm/i915: Introduce & use new lightweight SGL iterators") +Signed-off-by: Chris Wilson +Cc: Matthew Auld +Cc: Tvrtko Ursulin +Cc: # v4.8+ +Reviewed-by: Matthew Auld +Reviewed-by: Maciej Patelczyk +Link: https://patchwork.freedesktop.org/patch/msgid/20200522132706.5133-1-chris@chris-wilson.co.uk +(cherry picked from commit 957ad9a02be6faa87594c58ac09460cd3d190d0e) +Signed-off-by: Joonas Lahtinen +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 15 +++++++++------ + 1 file changed, 9 insertions(+), 6 deletions(-) + +--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c ++++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +@@ -39,7 +39,6 @@ static int shmem_get_pages(struct drm_i9 + unsigned long last_pfn = 0; /* suppress gcc warning */ + unsigned int max_segment = i915_sg_segment_size(); + unsigned int sg_page_sizes; +- struct pagevec pvec; + gfp_t noreclaim; + int ret; + +@@ -192,13 +191,17 @@ err_sg: + sg_mark_end(sg); + err_pages: + mapping_clear_unevictable(mapping); +- pagevec_init(&pvec); +- for_each_sgt_page(page, sgt_iter, st) { +- if (!pagevec_add(&pvec, page)) ++ if (sg != st->sgl) { ++ struct pagevec pvec; ++ ++ pagevec_init(&pvec); ++ for_each_sgt_page(page, sgt_iter, st) { ++ if (!pagevec_add(&pvec, page)) ++ check_release_pagevec(&pvec); ++ } ++ if (pagevec_count(&pvec)) + check_release_pagevec(&pvec); + } +- if (pagevec_count(&pvec)) +- check_release_pagevec(&pvec); + sg_free_table(st); + kfree(st); + diff --git a/queue-5.7/drm-i915-gt-incrementally-check-for-rewinding.patch b/queue-5.7/drm-i915-gt-incrementally-check-for-rewinding.patch new file mode 100644 index 00000000000..9f5ea37e479 --- /dev/null +++ b/queue-5.7/drm-i915-gt-incrementally-check-for-rewinding.patch @@ -0,0 +1,297 @@ +From 8ab3a3812aa90e488813e719308ffd807b865624 Mon Sep 17 00:00:00 2001 +From: Chris Wilson +Date: Tue, 9 Jun 2020 16:17:23 +0100 +Subject: drm/i915/gt: Incrementally check for rewinding + +From: Chris Wilson + +commit 8ab3a3812aa90e488813e719308ffd807b865624 upstream. + +In commit 5ba32c7be81e ("drm/i915/execlists: Always force a context +reload when rewinding RING_TAIL"), we placed the check for rewinding a +context on actually submitting the next request in that context. This +was so that we only had to check once, and could do so with precision +avoiding as many forced restores as possible. For example, to ensure +that we can resubmit the same request a couple of times, we include a +small wa_tail such that on the next submission, the ring->tail will +appear to move forwards when resubmitting the same request. This is very +common as it will happen for every lite-restore to fill the second port +after a context switch. + +However, intel_ring_direction() is limited in precision to movements of +upto half the ring size. The consequence being that if we tried to +unwind many requests, we could exceed half the ring and flip the sense +of the direction, so missing a force restore. As no request can be +greater than half the ring (i.e. 2048 bytes in the smallest case), we +can check for rollback incrementally. As we check against the tail that +would be submitted, we do not lose any sensitivity and allow lite +restores for the simple case. We still need to double check upon +submitting the context, to allow for multiple preemptions and +resubmissions. + +Fixes: 5ba32c7be81e ("drm/i915/execlists: Always force a context reload when rewinding RING_TAIL") +Signed-off-by: Chris Wilson +Cc: Mika Kuoppala +Cc: # v5.4+ +Reviewed-by: Bruce Chang +Reviewed-by: Mika Kuoppala +Link: https://patchwork.freedesktop.org/patch/msgid/20200609151723.12971-1-chris@chris-wilson.co.uk +(cherry picked from commit e36ba817fa966f81fb1c8d16f3721b5a644b2fa9) +Signed-off-by: Joonas Lahtinen +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpu/drm/i915/gt/intel_engine_cs.c | 4 + drivers/gpu/drm/i915/gt/intel_lrc.c | 21 +++ + drivers/gpu/drm/i915/gt/intel_ring.c | 4 + drivers/gpu/drm/i915/gt/selftest_mocs.c | 18 ++- + drivers/gpu/drm/i915/gt/selftest_ring.c | 110 +++++++++++++++++++ + drivers/gpu/drm/i915/selftests/i915_mock_selftests.h | 1 + 6 files changed, 154 insertions(+), 4 deletions(-) + +--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c ++++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c +@@ -639,7 +639,7 @@ static int engine_setup_common(struct in + struct measure_breadcrumb { + struct i915_request rq; + struct intel_ring ring; +- u32 cs[1024]; ++ u32 cs[2048]; + }; + + static int measure_breadcrumb_dw(struct intel_context *ce) +@@ -661,6 +661,8 @@ static int measure_breadcrumb_dw(struct + + frame->ring.vaddr = frame->cs; + frame->ring.size = sizeof(frame->cs); ++ frame->ring.wrap = ++ BITS_PER_TYPE(frame->ring.size) - ilog2(frame->ring.size); + frame->ring.effective_size = frame->ring.size; + intel_ring_update_space(&frame->ring); + frame->rq.ring = &frame->ring; +--- a/drivers/gpu/drm/i915/gt/intel_lrc.c ++++ b/drivers/gpu/drm/i915/gt/intel_lrc.c +@@ -972,6 +972,13 @@ __unwind_incomplete_requests(struct inte + list_move(&rq->sched.link, pl); + set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); + ++ /* Check in case we rollback so far we wrap [size/2] */ ++ if (intel_ring_direction(rq->ring, ++ intel_ring_wrap(rq->ring, ++ rq->tail), ++ rq->ring->tail) > 0) ++ rq->context->lrc.desc |= CTX_DESC_FORCE_RESTORE; ++ + active = rq; + } else { + struct intel_engine_cs *owner = rq->context->engine; +@@ -1383,8 +1390,9 @@ static u64 execlists_update_context(stru + * HW has a tendency to ignore us rewinding the TAIL to the end of + * an earlier request. + */ ++ GEM_BUG_ON(ce->lrc_reg_state[CTX_RING_TAIL] != rq->ring->tail); ++ prev = rq->ring->tail; + tail = intel_ring_set_tail(rq->ring, rq->tail); +- prev = ce->lrc_reg_state[CTX_RING_TAIL]; + if (unlikely(intel_ring_direction(rq->ring, tail, prev) <= 0)) + desc |= CTX_DESC_FORCE_RESTORE; + ce->lrc_reg_state[CTX_RING_TAIL] = tail; +@@ -4213,6 +4221,14 @@ static int gen12_emit_flush_render(struc + return 0; + } + ++static void assert_request_valid(struct i915_request *rq) ++{ ++ struct intel_ring *ring __maybe_unused = rq->ring; ++ ++ /* Can we unwind this request without appearing to go forwards? */ ++ GEM_BUG_ON(intel_ring_direction(ring, rq->wa_tail, rq->head) <= 0); ++} ++ + /* + * Reserve space for 2 NOOPs at the end of each request to be + * used as a workaround for not being allowed to do lite +@@ -4225,6 +4241,9 @@ static u32 *gen8_emit_wa_tail(struct i91 + *cs++ = MI_NOOP; + request->wa_tail = intel_ring_offset(request, cs); + ++ /* Check that entire request is less than half the ring */ ++ assert_request_valid(request); ++ + return cs; + } + +--- a/drivers/gpu/drm/i915/gt/intel_ring.c ++++ b/drivers/gpu/drm/i915/gt/intel_ring.c +@@ -315,3 +315,7 @@ int intel_ring_cacheline_align(struct i9 + GEM_BUG_ON(rq->ring->emit & (CACHELINE_BYTES - 1)); + return 0; + } ++ ++#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) ++#include "selftest_ring.c" ++#endif +--- a/drivers/gpu/drm/i915/gt/selftest_mocs.c ++++ b/drivers/gpu/drm/i915/gt/selftest_mocs.c +@@ -18,6 +18,20 @@ struct live_mocs { + void *vaddr; + }; + ++static struct intel_context *mocs_context_create(struct intel_engine_cs *engine) ++{ ++ struct intel_context *ce; ++ ++ ce = intel_context_create(engine); ++ if (IS_ERR(ce)) ++ return ce; ++ ++ /* We build large requests to read the registers from the ring */ ++ ce->ring = __intel_context_ring_size(SZ_16K); ++ ++ return ce; ++} ++ + static int request_add_sync(struct i915_request *rq, int err) + { + i915_request_get(rq); +@@ -301,7 +315,7 @@ static int live_mocs_clean(void *arg) + for_each_engine(engine, gt, id) { + struct intel_context *ce; + +- ce = intel_context_create(engine); ++ ce = mocs_context_create(engine); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + break; +@@ -395,7 +409,7 @@ static int live_mocs_reset(void *arg) + for_each_engine(engine, gt, id) { + struct intel_context *ce; + +- ce = intel_context_create(engine); ++ ce = mocs_context_create(engine); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + break; +--- /dev/null ++++ b/drivers/gpu/drm/i915/gt/selftest_ring.c +@@ -0,0 +1,110 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * Copyright © 2020 Intel Corporation ++ */ ++ ++static struct intel_ring *mock_ring(unsigned long sz) ++{ ++ struct intel_ring *ring; ++ ++ ring = kzalloc(sizeof(*ring) + sz, GFP_KERNEL); ++ if (!ring) ++ return NULL; ++ ++ kref_init(&ring->ref); ++ ring->size = sz; ++ ring->wrap = BITS_PER_TYPE(ring->size) - ilog2(sz); ++ ring->effective_size = sz; ++ ring->vaddr = (void *)(ring + 1); ++ atomic_set(&ring->pin_count, 1); ++ ++ intel_ring_update_space(ring); ++ ++ return ring; ++} ++ ++static void mock_ring_free(struct intel_ring *ring) ++{ ++ kfree(ring); ++} ++ ++static int check_ring_direction(struct intel_ring *ring, ++ u32 next, u32 prev, ++ int expected) ++{ ++ int result; ++ ++ result = intel_ring_direction(ring, next, prev); ++ if (result < 0) ++ result = -1; ++ else if (result > 0) ++ result = 1; ++ ++ if (result != expected) { ++ pr_err("intel_ring_direction(%u, %u):%d != %d\n", ++ next, prev, result, expected); ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ ++static int check_ring_step(struct intel_ring *ring, u32 x, u32 step) ++{ ++ u32 prev = x, next = intel_ring_wrap(ring, x + step); ++ int err = 0; ++ ++ err |= check_ring_direction(ring, next, next, 0); ++ err |= check_ring_direction(ring, prev, prev, 0); ++ err |= check_ring_direction(ring, next, prev, 1); ++ err |= check_ring_direction(ring, prev, next, -1); ++ ++ return err; ++} ++ ++static int check_ring_offset(struct intel_ring *ring, u32 x, u32 step) ++{ ++ int err = 0; ++ ++ err |= check_ring_step(ring, x, step); ++ err |= check_ring_step(ring, intel_ring_wrap(ring, x + 1), step); ++ err |= check_ring_step(ring, intel_ring_wrap(ring, x - 1), step); ++ ++ return err; ++} ++ ++static int igt_ring_direction(void *dummy) ++{ ++ struct intel_ring *ring; ++ unsigned int half = 2048; ++ int step, err = 0; ++ ++ ring = mock_ring(2 * half); ++ if (!ring) ++ return -ENOMEM; ++ ++ GEM_BUG_ON(ring->size != 2 * half); ++ ++ /* Precision of wrap detection is limited to ring->size / 2 */ ++ for (step = 1; step < half; step <<= 1) { ++ err |= check_ring_offset(ring, 0, step); ++ err |= check_ring_offset(ring, half, step); ++ } ++ err |= check_ring_step(ring, 0, half - 64); ++ ++ /* And check unwrapped handling for good measure */ ++ err |= check_ring_offset(ring, 0, 2 * half + 64); ++ err |= check_ring_offset(ring, 3 * half, 1); ++ ++ mock_ring_free(ring); ++ return err; ++} ++ ++int intel_ring_mock_selftests(void) ++{ ++ static const struct i915_subtest tests[] = { ++ SUBTEST(igt_ring_direction), ++ }; ++ ++ return i915_subtests(tests, NULL); ++} +--- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h ++++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h +@@ -20,6 +20,7 @@ selftest(fence, i915_sw_fence_mock_selft + selftest(scatterlist, scatterlist_mock_selftests) + selftest(syncmap, i915_syncmap_mock_selftests) + selftest(uncore, intel_uncore_mock_selftests) ++selftest(ring, intel_ring_mock_selftests) + selftest(engine, intel_engine_cs_mock_selftests) + selftest(timelines, intel_timeline_mock_selftests) + selftest(requests, i915_request_mock_selftests) diff --git a/queue-5.7/drm-i915-gt-move-gen4-gt-workarounds-from-init_clock_gating-to-workarounds.patch b/queue-5.7/drm-i915-gt-move-gen4-gt-workarounds-from-init_clock_gating-to-workarounds.patch new file mode 100644 index 00000000000..30adc1b3e16 --- /dev/null +++ b/queue-5.7/drm-i915-gt-move-gen4-gt-workarounds-from-init_clock_gating-to-workarounds.patch @@ -0,0 +1,111 @@ +From 27582a9c917940bc71c0df0b8e022cbde8d735d2 Mon Sep 17 00:00:00 2001 +From: Chris Wilson +Date: Thu, 11 Jun 2020 09:01:40 +0100 +Subject: drm/i915/gt: Move gen4 GT workarounds from init_clock_gating to workarounds + +From: Chris Wilson + +commit 27582a9c917940bc71c0df0b8e022cbde8d735d2 upstream. + +Rescue the GT workarounds from being buried inside init_clock_gating so +that we remember to apply them after a GT reset, and that they are +included in our verification that the workarounds are applied. + +Signed-off-by: Chris Wilson +Reviewed-by: Mika Kuoppala +Cc: stable@vger.kernel.org +Link: https://patchwork.freedesktop.org/patch/msgid/20200611080140.30228-6-chris@chris-wilson.co.uk +(cherry picked from commit 2bcefd0d263ab4a72f0d61921ae6b0dc81606551) +Signed-off-by: Joonas Lahtinen +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpu/drm/i915/gt/intel_workarounds.c | 27 ++++++++++++++++++++++----- + drivers/gpu/drm/i915/intel_pm.c | 15 --------------- + 2 files changed, 22 insertions(+), 20 deletions(-) + +--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c ++++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c +@@ -704,15 +704,28 @@ int intel_engine_emit_ctx_wa(struct i915 + } + + static void +-ilk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) ++gen4_gt_workarounds_init(struct drm_i915_private *i915, ++ struct i915_wa_list *wal) + { +- wa_masked_en(wal, _3D_CHICKEN2, _3D_CHICKEN2_WM_READ_PIPELINED); ++ /* WaDisable_RenderCache_OperationalFlush:gen4,ilk */ ++ wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE); ++} ++ ++static void ++g4x_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) ++{ ++ gen4_gt_workarounds_init(i915, wal); + +- /* WaDisableRenderCachePipelinedFlush:ilk */ ++ /* WaDisableRenderCachePipelinedFlush:g4x,ilk */ + wa_masked_en(wal, CACHE_MODE_0, CM0_PIPELINED_RENDER_FLUSH_DISABLE); ++} + +- /* WaDisable_RenderCache_OperationalFlush:ilk */ +- wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE); ++static void ++ilk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) ++{ ++ g4x_gt_workarounds_init(i915, wal); ++ ++ wa_masked_en(wal, _3D_CHICKEN2, _3D_CHICKEN2_WM_READ_PIPELINED); + } + + static void +@@ -1198,6 +1211,10 @@ gt_init_workarounds(struct drm_i915_priv + snb_gt_workarounds_init(i915, wal); + else if (IS_GEN(i915, 5)) + ilk_gt_workarounds_init(i915, wal); ++ else if (IS_G4X(i915)) ++ g4x_gt_workarounds_init(i915, wal); ++ else if (IS_GEN(i915, 4)) ++ gen4_gt_workarounds_init(i915, wal); + else if (INTEL_GEN(i915) <= 8) + return; + else +--- a/drivers/gpu/drm/i915/intel_pm.c ++++ b/drivers/gpu/drm/i915/intel_pm.c +@@ -7070,13 +7070,6 @@ static void g4x_init_clock_gating(struct + dspclk_gate |= DSSUNIT_CLOCK_GATE_DISABLE; + I915_WRITE(DSPCLK_GATE_D, dspclk_gate); + +- /* WaDisableRenderCachePipelinedFlush */ +- I915_WRITE(CACHE_MODE_0, +- _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE)); +- +- /* WaDisable_RenderCache_OperationalFlush:g4x */ +- I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); +- + g4x_disable_trickle_feed(dev_priv); + } + +@@ -7092,11 +7085,6 @@ static void i965gm_init_clock_gating(str + intel_uncore_write(uncore, + MI_ARB_STATE, + _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE)); +- +- /* WaDisable_RenderCache_OperationalFlush:gen4 */ +- intel_uncore_write(uncore, +- CACHE_MODE_0, +- _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); + } + + static void i965g_init_clock_gating(struct drm_i915_private *dev_priv) +@@ -7109,9 +7097,6 @@ static void i965g_init_clock_gating(stru + I915_WRITE(RENCLK_GATE_D2, 0); + I915_WRITE(MI_ARB_STATE, + _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE)); +- +- /* WaDisable_RenderCache_OperationalFlush:gen4 */ +- I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); + } + + static void gen3_init_clock_gating(struct drm_i915_private *dev_priv) diff --git a/queue-5.7/drm-i915-gt-move-hsw-gt-workarounds-from-init_clock_gating-to-workarounds.patch b/queue-5.7/drm-i915-gt-move-hsw-gt-workarounds-from-init_clock_gating-to-workarounds.patch new file mode 100644 index 00000000000..99471d55b95 --- /dev/null +++ b/queue-5.7/drm-i915-gt-move-hsw-gt-workarounds-from-init_clock_gating-to-workarounds.patch @@ -0,0 +1,152 @@ +From ef50fa9bd17d13d0611e39e13b37bbd3e1ea50bf Mon Sep 17 00:00:00 2001 +From: Chris Wilson +Date: Thu, 11 Jun 2020 10:30:15 +0100 +Subject: drm/i915/gt: Move hsw GT workarounds from init_clock_gating to workarounds + +From: Chris Wilson + +commit ef50fa9bd17d13d0611e39e13b37bbd3e1ea50bf upstream. + +Rescue the GT workarounds from being buried inside init_clock_gating so +that we remember to apply them after a GT reset, and that they are +included in our verification that the workarounds are applied. + +v2: Leave HSW_SCRATCH to set an explicit value, not or in our disable +bit. + +Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2011 +Signed-off-by: Chris Wilson +Cc: Mika Kuoppala +Reviewed-by: Mika Kuoppala +Cc: stable@vger.kernel.org +Link: https://patchwork.freedesktop.org/patch/msgid/20200611093015.11370-1-chris@chris-wilson.co.uk +(cherry picked from commit f93ec5fb563779bda4501890b1854526de58e0f1) +Signed-off-by: Joonas Lahtinen +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpu/drm/i915/gt/intel_workarounds.c | 48 ++++++++++++++++++++++++++++ + drivers/gpu/drm/i915/intel_pm.c | 39 +--------------------- + 2 files changed, 50 insertions(+), 37 deletions(-) + +--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c ++++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c +@@ -179,6 +179,12 @@ wa_write_or(struct i915_wa_list *wal, i9 + } + + static void ++wa_write_clr(struct i915_wa_list *wal, i915_reg_t reg, u32 clr) ++{ ++ wa_write_masked_or(wal, reg, clr, 0); ++} ++ ++static void + wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val) + { + wa_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val); +@@ -698,6 +704,46 @@ int intel_engine_emit_ctx_wa(struct i915 + } + + static void ++hsw_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) ++{ ++ /* L3 caching of data atomics doesn't work -- disable it. */ ++ wa_write(wal, HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE); ++ ++ wa_add(wal, ++ HSW_ROW_CHICKEN3, 0, ++ _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE), ++ 0 /* XXX does this reg exist? */); ++ ++ /* WaVSRefCountFullforceMissDisable:hsw */ ++ wa_write_clr(wal, GEN7_FF_THREAD_MODE, GEN7_FF_VS_REF_CNT_FFME); ++ ++ wa_masked_dis(wal, ++ CACHE_MODE_0_GEN7, ++ /* WaDisable_RenderCache_OperationalFlush:hsw */ ++ RC_OP_FLUSH_ENABLE | ++ /* enable HiZ Raw Stall Optimization */ ++ HIZ_RAW_STALL_OPT_DISABLE); ++ ++ /* WaDisable4x2SubspanOptimization:hsw */ ++ wa_masked_en(wal, CACHE_MODE_1, PIXEL_SUBSPAN_COLLECT_OPT_DISABLE); ++ ++ /* ++ * BSpec recommends 8x4 when MSAA is used, ++ * however in practice 16x4 seems fastest. ++ * ++ * Note that PS/WM thread counts depend on the WIZ hashing ++ * disable bit, which we don't touch here, but it's good ++ * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). ++ */ ++ wa_add(wal, GEN7_GT_MODE, 0, ++ _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4), ++ GEN6_WIZ_HASHING_16x4); ++ ++ /* WaSampleCChickenBitEnable:hsw */ ++ wa_masked_en(wal, HALF_SLICE_CHICKEN3, HSW_SAMPLE_C_PERFORMANCE); ++} ++ ++static void + gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) + { + /* WaDisableKillLogic:bxt,skl,kbl */ +@@ -974,6 +1020,8 @@ gt_init_workarounds(struct drm_i915_priv + bxt_gt_workarounds_init(i915, wal); + else if (IS_SKYLAKE(i915)) + skl_gt_workarounds_init(i915, wal); ++ else if (IS_HASWELL(i915)) ++ hsw_gt_workarounds_init(i915, wal); + else if (INTEL_GEN(i915) <= 8) + return; + else +--- a/drivers/gpu/drm/i915/intel_pm.c ++++ b/drivers/gpu/drm/i915/intel_pm.c +@@ -6992,45 +6992,10 @@ static void bdw_init_clock_gating(struct + + static void hsw_init_clock_gating(struct drm_i915_private *dev_priv) + { +- /* L3 caching of data atomics doesn't work -- disable it. */ +- I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE); +- I915_WRITE(HSW_ROW_CHICKEN3, +- _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE)); +- + /* This is required by WaCatErrorRejectionIssue:hsw */ + I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG, +- I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) | +- GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB); +- +- /* WaVSRefCountFullforceMissDisable:hsw */ +- I915_WRITE(GEN7_FF_THREAD_MODE, +- I915_READ(GEN7_FF_THREAD_MODE) & ~GEN7_FF_VS_REF_CNT_FFME); +- +- /* WaDisable_RenderCache_OperationalFlush:hsw */ +- I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); +- +- /* enable HiZ Raw Stall Optimization */ +- I915_WRITE(CACHE_MODE_0_GEN7, +- _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE)); +- +- /* WaDisable4x2SubspanOptimization:hsw */ +- I915_WRITE(CACHE_MODE_1, +- _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE)); +- +- /* +- * BSpec recommends 8x4 when MSAA is used, +- * however in practice 16x4 seems fastest. +- * +- * Note that PS/WM thread counts depend on the WIZ hashing +- * disable bit, which we don't touch here, but it's good +- * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). +- */ +- I915_WRITE(GEN7_GT_MODE, +- _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); +- +- /* WaSampleCChickenBitEnable:hsw */ +- I915_WRITE(HALF_SLICE_CHICKEN3, +- _MASKED_BIT_ENABLE(HSW_SAMPLE_C_PERFORMANCE)); ++ I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) | ++ GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB); + + /* WaSwitchSolVfFArbitrationPriority:hsw */ + I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL); diff --git a/queue-5.7/drm-i915-gt-move-ilk-gt-workarounds-from-init_clock_gating-to-workarounds.patch b/queue-5.7/drm-i915-gt-move-ilk-gt-workarounds-from-init_clock_gating-to-workarounds.patch new file mode 100644 index 00000000000..28cc6ff07dd --- /dev/null +++ b/queue-5.7/drm-i915-gt-move-ilk-gt-workarounds-from-init_clock_gating-to-workarounds.patch @@ -0,0 +1,75 @@ +From eacf21040aa97fd1b3c6bb201bfd43820e1c49be Mon Sep 17 00:00:00 2001 +From: Chris Wilson +Date: Thu, 11 Jun 2020 09:01:39 +0100 +Subject: drm/i915/gt: Move ilk GT workarounds from init_clock_gating to workarounds + +From: Chris Wilson + +commit eacf21040aa97fd1b3c6bb201bfd43820e1c49be upstream. + +Rescue the GT workarounds from being buried inside init_clock_gating so +that we remember to apply them after a GT reset, and that they are +included in our verification that the workarounds are applied. + +Signed-off-by: Chris Wilson +Reviewed-by: Mika Kuoppala +Cc: stable@vger.kernel.org +Link: https://patchwork.freedesktop.org/patch/msgid/20200611080140.30228-5-chris@chris-wilson.co.uk +(cherry picked from commit 806a45c0838d253e306a6384057e851b65d11099) +Signed-off-by: Joonas Lahtinen +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpu/drm/i915/gt/intel_workarounds.c | 14 ++++++++++++++ + drivers/gpu/drm/i915/intel_pm.c | 10 ---------- + 2 files changed, 14 insertions(+), 10 deletions(-) + +--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c ++++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c +@@ -704,6 +704,18 @@ int intel_engine_emit_ctx_wa(struct i915 + } + + static void ++ilk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) ++{ ++ wa_masked_en(wal, _3D_CHICKEN2, _3D_CHICKEN2_WM_READ_PIPELINED); ++ ++ /* WaDisableRenderCachePipelinedFlush:ilk */ ++ wa_masked_en(wal, CACHE_MODE_0, CM0_PIPELINED_RENDER_FLUSH_DISABLE); ++ ++ /* WaDisable_RenderCache_OperationalFlush:ilk */ ++ wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE); ++} ++ ++static void + snb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) + { + /* WaDisableHiZPlanesWhenMSAAEnabled:snb */ +@@ -1125,6 +1137,8 @@ gt_init_workarounds(struct drm_i915_priv + ivb_gt_workarounds_init(i915, wal); + else if (IS_GEN(i915, 6)) + snb_gt_workarounds_init(i915, wal); ++ else if (IS_GEN(i915, 5)) ++ ilk_gt_workarounds_init(i915, wal); + else if (INTEL_GEN(i915) <= 8) + return; + else +--- a/drivers/gpu/drm/i915/intel_pm.c ++++ b/drivers/gpu/drm/i915/intel_pm.c +@@ -6593,16 +6593,6 @@ static void ilk_init_clock_gating(struct + I915_WRITE(ILK_DISPLAY_CHICKEN2, + I915_READ(ILK_DISPLAY_CHICKEN2) | + ILK_ELPIN_409_SELECT); +- I915_WRITE(_3D_CHICKEN2, +- _3D_CHICKEN2_WM_READ_PIPELINED << 16 | +- _3D_CHICKEN2_WM_READ_PIPELINED); +- +- /* WaDisableRenderCachePipelinedFlush:ilk */ +- I915_WRITE(CACHE_MODE_0, +- _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE)); +- +- /* WaDisable_RenderCache_OperationalFlush:ilk */ +- I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); + + g4x_disable_trickle_feed(dev_priv); + diff --git a/queue-5.7/drm-i915-gt-move-ivb-gt-workarounds-from-init_clock_gating-to-workarounds.patch b/queue-5.7/drm-i915-gt-move-ivb-gt-workarounds-from-init_clock_gating-to-workarounds.patch new file mode 100644 index 00000000000..1498595857e --- /dev/null +++ b/queue-5.7/drm-i915-gt-move-ivb-gt-workarounds-from-init_clock_gating-to-workarounds.patch @@ -0,0 +1,192 @@ +From 7237b190add0794bd95979018a23eda698f2705d Mon Sep 17 00:00:00 2001 +From: Chris Wilson +Date: Thu, 11 Jun 2020 09:01:36 +0100 +Subject: drm/i915/gt: Move ivb GT workarounds from init_clock_gating to workarounds + +From: Chris Wilson + +commit 7237b190add0794bd95979018a23eda698f2705d upstream. + +Rescue the GT workarounds from being buried inside init_clock_gating so +that we remember to apply them after a GT reset, and that they are +included in our verification that the workarounds are applied. + +Signed-off-by: Chris Wilson +Reviewed-by: Mika Kuoppala +Cc: stable@vger.kernel.org +Link: https://patchwork.freedesktop.org/patch/msgid/20200611080140.30228-2-chris@chris-wilson.co.uk +(cherry picked from commit 19f1f627b33385a2f0855cbc7d33d86d7f4a1e78) +Signed-off-by: Joonas Lahtinen +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpu/drm/i915/gt/intel_workarounds.c | 62 ++++++++++++++++++++++++++++ + drivers/gpu/drm/i915/i915_reg.h | 2 + drivers/gpu/drm/i915/intel_pm.c | 48 --------------------- + 3 files changed, 63 insertions(+), 49 deletions(-) + +--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c ++++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c +@@ -704,6 +704,66 @@ int intel_engine_emit_ctx_wa(struct i915 + } + + static void ++ivb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) ++{ ++ /* WaDisableEarlyCull:ivb */ ++ wa_masked_en(wal, _3D_CHICKEN3, _3D_CHICKEN_SF_DISABLE_OBJEND_CULL); ++ ++ /* WaDisablePSDDualDispatchEnable:ivb */ ++ if (IS_IVB_GT1(i915)) ++ wa_masked_en(wal, ++ GEN7_HALF_SLICE_CHICKEN1, ++ GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE); ++ ++ /* WaDisable_RenderCache_OperationalFlush:ivb */ ++ wa_masked_dis(wal, CACHE_MODE_0_GEN7, RC_OP_FLUSH_ENABLE); ++ ++ /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */ ++ wa_masked_dis(wal, ++ GEN7_COMMON_SLICE_CHICKEN1, ++ GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC); ++ ++ /* WaApplyL3ControlAndL3ChickenMode:ivb */ ++ wa_write(wal, GEN7_L3CNTLREG1, GEN7_WA_FOR_GEN7_L3_CONTROL); ++ wa_write(wal, GEN7_L3_CHICKEN_MODE_REGISTER, GEN7_WA_L3_CHICKEN_MODE); ++ ++ /* WaForceL3Serialization:ivb */ ++ wa_write_clr(wal, GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE); ++ ++ /* ++ * WaVSThreadDispatchOverride:ivb,vlv ++ * ++ * This actually overrides the dispatch ++ * mode for all thread types. ++ */ ++ wa_write_masked_or(wal, GEN7_FF_THREAD_MODE, ++ GEN7_FF_SCHED_MASK, ++ GEN7_FF_TS_SCHED_HW | ++ GEN7_FF_VS_SCHED_HW | ++ GEN7_FF_DS_SCHED_HW); ++ ++ if (0) { /* causes HiZ corruption on ivb:gt1 */ ++ /* enable HiZ Raw Stall Optimization */ ++ wa_masked_dis(wal, CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE); ++ } ++ ++ /* WaDisable4x2SubspanOptimization:ivb */ ++ wa_masked_en(wal, CACHE_MODE_1, PIXEL_SUBSPAN_COLLECT_OPT_DISABLE); ++ ++ /* ++ * BSpec recommends 8x4 when MSAA is used, ++ * however in practice 16x4 seems fastest. ++ * ++ * Note that PS/WM thread counts depend on the WIZ hashing ++ * disable bit, which we don't touch here, but it's good ++ * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). ++ */ ++ wa_add(wal, GEN7_GT_MODE, 0, ++ _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4), ++ GEN6_WIZ_HASHING_16x4); ++} ++ ++static void + hsw_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) + { + /* L3 caching of data atomics doesn't work -- disable it. */ +@@ -1022,6 +1082,8 @@ gt_init_workarounds(struct drm_i915_priv + skl_gt_workarounds_init(i915, wal); + else if (IS_HASWELL(i915)) + hsw_gt_workarounds_init(i915, wal); ++ else if (IS_IVYBRIDGE(i915)) ++ ivb_gt_workarounds_init(i915, wal); + else if (INTEL_GEN(i915) <= 8) + return; + else +--- a/drivers/gpu/drm/i915/i915_reg.h ++++ b/drivers/gpu/drm/i915/i915_reg.h +@@ -7819,7 +7819,7 @@ enum { + + /* GEN7 chicken */ + #define GEN7_COMMON_SLICE_CHICKEN1 _MMIO(0x7010) +- #define GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC ((1 << 10) | (1 << 26)) ++ #define GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC (1 << 10) + #define GEN9_RHWO_OPTIMIZATION_DISABLE (1 << 14) + + #define COMMON_SLICE_CHICKEN2 _MMIO(0x7014) +--- a/drivers/gpu/drm/i915/intel_pm.c ++++ b/drivers/gpu/drm/i915/intel_pm.c +@@ -7009,32 +7009,11 @@ static void ivb_init_clock_gating(struct + + I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE); + +- /* WaDisableEarlyCull:ivb */ +- I915_WRITE(_3D_CHICKEN3, +- _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL)); +- + /* WaDisableBackToBackFlipFix:ivb */ + I915_WRITE(IVB_CHICKEN3, + CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE | + CHICKEN3_DGMG_DONE_FIX_DISABLE); + +- /* WaDisablePSDDualDispatchEnable:ivb */ +- if (IS_IVB_GT1(dev_priv)) +- I915_WRITE(GEN7_HALF_SLICE_CHICKEN1, +- _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE)); +- +- /* WaDisable_RenderCache_OperationalFlush:ivb */ +- I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); +- +- /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */ +- I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1, +- GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC); +- +- /* WaApplyL3ControlAndL3ChickenMode:ivb */ +- I915_WRITE(GEN7_L3CNTLREG1, +- GEN7_WA_FOR_GEN7_L3_CONTROL); +- I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER, +- GEN7_WA_L3_CHICKEN_MODE); + if (IS_IVB_GT1(dev_priv)) + I915_WRITE(GEN7_ROW_CHICKEN2, + _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); +@@ -7046,10 +7025,6 @@ static void ivb_init_clock_gating(struct + _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); + } + +- /* WaForceL3Serialization:ivb */ +- I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) & +- ~L3SQ_URB_READ_CAM_MATCH_DISABLE); +- + /* + * According to the spec, bit 13 (RCZUNIT) must be set on IVB. + * This implements the WaDisableRCZUnitClockGating:ivb workaround. +@@ -7064,29 +7039,6 @@ static void ivb_init_clock_gating(struct + + g4x_disable_trickle_feed(dev_priv); + +- gen7_setup_fixed_func_scheduler(dev_priv); +- +- if (0) { /* causes HiZ corruption on ivb:gt1 */ +- /* enable HiZ Raw Stall Optimization */ +- I915_WRITE(CACHE_MODE_0_GEN7, +- _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE)); +- } +- +- /* WaDisable4x2SubspanOptimization:ivb */ +- I915_WRITE(CACHE_MODE_1, +- _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE)); +- +- /* +- * BSpec recommends 8x4 when MSAA is used, +- * however in practice 16x4 seems fastest. +- * +- * Note that PS/WM thread counts depend on the WIZ hashing +- * disable bit, which we don't touch here, but it's good +- * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). +- */ +- I915_WRITE(GEN7_GT_MODE, +- _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); +- + snpcr = I915_READ(GEN6_MBCUNIT_SNPCR); + snpcr &= ~GEN6_MBC_SNPCR_MASK; + snpcr |= GEN6_MBC_SNPCR_MED; diff --git a/queue-5.7/drm-i915-gt-move-snb-gt-workarounds-from-init_clock_gating-to-workarounds.patch b/queue-5.7/drm-i915-gt-move-snb-gt-workarounds-from-init_clock_gating-to-workarounds.patch new file mode 100644 index 00000000000..511bd85f94d --- /dev/null +++ b/queue-5.7/drm-i915-gt-move-snb-gt-workarounds-from-init_clock_gating-to-workarounds.patch @@ -0,0 +1,132 @@ +From fd2599bda5a989c3332f4956fd7760ec32bd51ee Mon Sep 17 00:00:00 2001 +From: Chris Wilson +Date: Thu, 11 Jun 2020 09:01:38 +0100 +Subject: drm/i915/gt: Move snb GT workarounds from init_clock_gating to workarounds + +From: Chris Wilson + +commit fd2599bda5a989c3332f4956fd7760ec32bd51ee upstream. + +Rescue the GT workarounds from being buried inside init_clock_gating so +that we remember to apply them after a GT reset, and that they are +included in our verification that the workarounds are applied. + +Signed-off-by: Chris Wilson +Reviewed-by: Mika Kuoppala +Cc: stable@vger.kernel.org +Link: https://patchwork.freedesktop.org/patch/msgid/20200611080140.30228-4-chris@chris-wilson.co.uk +(cherry picked from commit c3b93a943f2c9ee4a106db100a2fc3b2f126bfc5) +Signed-off-by: Joonas Lahtinen +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpu/drm/i915/gt/intel_workarounds.c | 41 ++++++++++++++++++++++++++++ + drivers/gpu/drm/i915/intel_pm.c | 33 ---------------------- + 2 files changed, 41 insertions(+), 33 deletions(-) + +--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c ++++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c +@@ -704,6 +704,45 @@ int intel_engine_emit_ctx_wa(struct i915 + } + + static void ++snb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) ++{ ++ /* WaDisableHiZPlanesWhenMSAAEnabled:snb */ ++ wa_masked_en(wal, ++ _3D_CHICKEN, ++ _3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB); ++ ++ /* WaDisable_RenderCache_OperationalFlush:snb */ ++ wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE); ++ ++ /* ++ * BSpec recommends 8x4 when MSAA is used, ++ * however in practice 16x4 seems fastest. ++ * ++ * Note that PS/WM thread counts depend on the WIZ hashing ++ * disable bit, which we don't touch here, but it's good ++ * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). ++ */ ++ wa_add(wal, ++ GEN6_GT_MODE, 0, ++ _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4), ++ GEN6_WIZ_HASHING_16x4); ++ ++ wa_masked_dis(wal, CACHE_MODE_0, CM0_STC_EVICT_DISABLE_LRA_SNB); ++ ++ wa_masked_en(wal, ++ _3D_CHICKEN3, ++ /* WaStripsFansDisableFastClipPerformanceFix:snb */ ++ _3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL | ++ /* ++ * Bspec says: ++ * "This bit must be set if 3DSTATE_CLIP clip mode is set ++ * to normal and 3DSTATE_SF number of SF output attributes ++ * is more than 16." ++ */ ++ _3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH); ++} ++ ++static void + ivb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) + { + /* WaDisableEarlyCull:ivb */ +@@ -1084,6 +1123,8 @@ gt_init_workarounds(struct drm_i915_priv + hsw_gt_workarounds_init(i915, wal); + else if (IS_IVYBRIDGE(i915)) + ivb_gt_workarounds_init(i915, wal); ++ else if (IS_GEN(i915, 6)) ++ snb_gt_workarounds_init(i915, wal); + else if (INTEL_GEN(i915) <= 8) + return; + else +--- a/drivers/gpu/drm/i915/intel_pm.c ++++ b/drivers/gpu/drm/i915/intel_pm.c +@@ -6665,27 +6665,6 @@ static void gen6_init_clock_gating(struc + I915_READ(ILK_DISPLAY_CHICKEN2) | + ILK_ELPIN_409_SELECT); + +- /* WaDisableHiZPlanesWhenMSAAEnabled:snb */ +- I915_WRITE(_3D_CHICKEN, +- _MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB)); +- +- /* WaDisable_RenderCache_OperationalFlush:snb */ +- I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); +- +- /* +- * BSpec recoomends 8x4 when MSAA is used, +- * however in practice 16x4 seems fastest. +- * +- * Note that PS/WM thread counts depend on the WIZ hashing +- * disable bit, which we don't touch here, but it's good +- * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). +- */ +- I915_WRITE(GEN6_GT_MODE, +- _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); +- +- I915_WRITE(CACHE_MODE_0, +- _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB)); +- + I915_WRITE(GEN6_UCGCTL1, + I915_READ(GEN6_UCGCTL1) | + GEN6_BLBUNIT_CLOCK_GATE_DISABLE | +@@ -6708,18 +6687,6 @@ static void gen6_init_clock_gating(struc + GEN6_RCPBUNIT_CLOCK_GATE_DISABLE | + GEN6_RCCUNIT_CLOCK_GATE_DISABLE); + +- /* WaStripsFansDisableFastClipPerformanceFix:snb */ +- I915_WRITE(_3D_CHICKEN3, +- _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL)); +- +- /* +- * Bspec says: +- * "This bit must be set if 3DSTATE_CLIP clip mode is set to normal and +- * 3DSTATE_SF number of SF output attributes is more than 16." +- */ +- I915_WRITE(_3D_CHICKEN3, +- _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH)); +- + /* + * According to the spec the following bits should be + * set in order to enable memory self-refresh and fbc: diff --git a/queue-5.7/drm-i915-gt-move-vlv-gt-workarounds-from-init_clock_gating-to-workarounds.patch b/queue-5.7/drm-i915-gt-move-vlv-gt-workarounds-from-init_clock_gating-to-workarounds.patch new file mode 100644 index 00000000000..4bf455a62c6 --- /dev/null +++ b/queue-5.7/drm-i915-gt-move-vlv-gt-workarounds-from-init_clock_gating-to-workarounds.patch @@ -0,0 +1,197 @@ +From 695a2b11649e99bbf15d278042247042c42b8728 Mon Sep 17 00:00:00 2001 +From: Chris Wilson +Date: Thu, 11 Jun 2020 09:01:37 +0100 +Subject: drm/i915/gt: Move vlv GT workarounds from init_clock_gating to workarounds + +From: Chris Wilson + +commit 695a2b11649e99bbf15d278042247042c42b8728 upstream. + +Rescue the GT workarounds from being buried inside init_clock_gating so +that we remember to apply them after a GT reset, and that they are +included in our verification that the workarounds are applied. + +Signed-off-by: Chris Wilson +Reviewed-by: Mika Kuoppala +Cc: stable@vger.kernel.org +Link: https://patchwork.freedesktop.org/patch/msgid/20200611080140.30228-3-chris@chris-wilson.co.uk +(cherry picked from commit 7331c356b6d2d8a01422cacab27478a1dba9fa2a) +Signed-off-by: Joonas Lahtinen +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpu/drm/i915/gt/intel_workarounds.c | 59 +++++++++++++++++++++++++++ + drivers/gpu/drm/i915/intel_pm.c | 61 ---------------------------- + 2 files changed, 59 insertions(+), 61 deletions(-) + +--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c ++++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c +@@ -815,6 +815,63 @@ ivb_gt_workarounds_init(struct drm_i915_ + } + + static void ++vlv_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) ++{ ++ /* WaDisableEarlyCull:vlv */ ++ wa_masked_en(wal, _3D_CHICKEN3, _3D_CHICKEN_SF_DISABLE_OBJEND_CULL); ++ ++ /* WaPsdDispatchEnable:vlv */ ++ /* WaDisablePSDDualDispatchEnable:vlv */ ++ wa_masked_en(wal, ++ GEN7_HALF_SLICE_CHICKEN1, ++ GEN7_MAX_PS_THREAD_DEP | ++ GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE); ++ ++ /* WaDisable_RenderCache_OperationalFlush:vlv */ ++ wa_masked_dis(wal, CACHE_MODE_0_GEN7, RC_OP_FLUSH_ENABLE); ++ ++ /* WaForceL3Serialization:vlv */ ++ wa_write_clr(wal, GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE); ++ ++ /* ++ * WaVSThreadDispatchOverride:ivb,vlv ++ * ++ * This actually overrides the dispatch ++ * mode for all thread types. ++ */ ++ wa_write_masked_or(wal, ++ GEN7_FF_THREAD_MODE, ++ GEN7_FF_SCHED_MASK, ++ GEN7_FF_TS_SCHED_HW | ++ GEN7_FF_VS_SCHED_HW | ++ GEN7_FF_DS_SCHED_HW); ++ ++ /* ++ * BSpec says this must be set, even though ++ * WaDisable4x2SubspanOptimization isn't listed for VLV. ++ */ ++ wa_masked_en(wal, CACHE_MODE_1, PIXEL_SUBSPAN_COLLECT_OPT_DISABLE); ++ ++ /* ++ * BSpec recommends 8x4 when MSAA is used, ++ * however in practice 16x4 seems fastest. ++ * ++ * Note that PS/WM thread counts depend on the WIZ hashing ++ * disable bit, which we don't touch here, but it's good ++ * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). ++ */ ++ wa_add(wal, GEN7_GT_MODE, 0, ++ _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4), ++ GEN6_WIZ_HASHING_16x4); ++ ++ /* ++ * WaIncreaseL3CreditsForVLVB0:vlv ++ * This is the hardware default actually. ++ */ ++ wa_write(wal, GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE); ++} ++ ++static void + hsw_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) + { + /* L3 caching of data atomics doesn't work -- disable it. */ +@@ -1133,6 +1190,8 @@ gt_init_workarounds(struct drm_i915_priv + skl_gt_workarounds_init(i915, wal); + else if (IS_HASWELL(i915)) + hsw_gt_workarounds_init(i915, wal); ++ else if (IS_VALLEYVIEW(i915)) ++ vlv_gt_workarounds_init(i915, wal); + else if (IS_IVYBRIDGE(i915)) + ivb_gt_workarounds_init(i915, wal); + else if (IS_GEN(i915, 6)) +--- a/drivers/gpu/drm/i915/intel_pm.c ++++ b/drivers/gpu/drm/i915/intel_pm.c +@@ -6706,24 +6706,6 @@ static void gen6_init_clock_gating(struc + gen6_check_mch_setup(dev_priv); + } + +-static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv) +-{ +- u32 reg = I915_READ(GEN7_FF_THREAD_MODE); +- +- /* +- * WaVSThreadDispatchOverride:ivb,vlv +- * +- * This actually overrides the dispatch +- * mode for all thread types. +- */ +- reg &= ~GEN7_FF_SCHED_MASK; +- reg |= GEN7_FF_TS_SCHED_HW; +- reg |= GEN7_FF_VS_SCHED_HW; +- reg |= GEN7_FF_DS_SCHED_HW; +- +- I915_WRITE(GEN7_FF_THREAD_MODE, reg); +-} +- + static void lpt_init_clock_gating(struct drm_i915_private *dev_priv) + { + /* +@@ -7009,28 +6991,11 @@ static void ivb_init_clock_gating(struct + + static void vlv_init_clock_gating(struct drm_i915_private *dev_priv) + { +- /* WaDisableEarlyCull:vlv */ +- I915_WRITE(_3D_CHICKEN3, +- _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL)); +- + /* WaDisableBackToBackFlipFix:vlv */ + I915_WRITE(IVB_CHICKEN3, + CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE | + CHICKEN3_DGMG_DONE_FIX_DISABLE); + +- /* WaPsdDispatchEnable:vlv */ +- /* WaDisablePSDDualDispatchEnable:vlv */ +- I915_WRITE(GEN7_HALF_SLICE_CHICKEN1, +- _MASKED_BIT_ENABLE(GEN7_MAX_PS_THREAD_DEP | +- GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE)); +- +- /* WaDisable_RenderCache_OperationalFlush:vlv */ +- I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); +- +- /* WaForceL3Serialization:vlv */ +- I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) & +- ~L3SQ_URB_READ_CAM_MATCH_DISABLE); +- + /* WaDisableDopClockGating:vlv */ + I915_WRITE(GEN7_ROW_CHICKEN2, + _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); +@@ -7040,8 +7005,6 @@ static void vlv_init_clock_gating(struct + I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) | + GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB); + +- gen7_setup_fixed_func_scheduler(dev_priv); +- + /* + * According to the spec, bit 13 (RCZUNIT) must be set on IVB. + * This implements the WaDisableRCZUnitClockGating:vlv workaround. +@@ -7056,30 +7019,6 @@ static void vlv_init_clock_gating(struct + I915_READ(GEN7_UCGCTL4) | GEN7_L3BANK2X_CLOCK_GATE_DISABLE); + + /* +- * BSpec says this must be set, even though +- * WaDisable4x2SubspanOptimization isn't listed for VLV. +- */ +- I915_WRITE(CACHE_MODE_1, +- _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE)); +- +- /* +- * BSpec recommends 8x4 when MSAA is used, +- * however in practice 16x4 seems fastest. +- * +- * Note that PS/WM thread counts depend on the WIZ hashing +- * disable bit, which we don't touch here, but it's good +- * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). +- */ +- I915_WRITE(GEN7_GT_MODE, +- _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); +- +- /* +- * WaIncreaseL3CreditsForVLVB0:vlv +- * This is the hardware default actually. +- */ +- I915_WRITE(GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE); +- +- /* + * WaDisableVLVClockGating_VBIIssue:vlv + * Disable clock gating on th GCFG unit to prevent a delay + * in the reporting of vblank events. diff --git a/queue-5.7/drm-i915-icl-fix-hotplug-interrupt-disabling-after-storm-detection.patch b/queue-5.7/drm-i915-icl-fix-hotplug-interrupt-disabling-after-storm-detection.patch new file mode 100644 index 00000000000..8432ea6f689 --- /dev/null +++ b/queue-5.7/drm-i915-icl-fix-hotplug-interrupt-disabling-after-storm-detection.patch @@ -0,0 +1,41 @@ +From a3005c2edf7e8c3478880db1ca84028a2b6819bb Mon Sep 17 00:00:00 2001 +From: Imre Deak +Date: Fri, 12 Jun 2020 15:17:31 +0300 +Subject: drm/i915/icl+: Fix hotplug interrupt disabling after storm detection +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Imre Deak + +commit a3005c2edf7e8c3478880db1ca84028a2b6819bb upstream. + +Atm, hotplug interrupts on TypeC ports are left enabled after detecting +an interrupt storm, fix this. + +Reported-by: Kunal Joshi +References: https://gitlab.freedesktop.org/drm/intel/-/issues/351 +Bugzilla: https://gitlab.freedesktop.org/drm/intel/-/issues/1964 +Cc: Kunal Joshi +Cc: stable@vger.kernel.org +Signed-off-by: Imre Deak +Reviewed-by: Ville Syrjälä +Link: https://patchwork.freedesktop.org/patch/msgid/20200612121731.19596-1-imre.deak@intel.com +(cherry picked from commit 587a87b9d7e94927edcdea018565bc1939381eb1) +Signed-off-by: Joonas Lahtinen +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpu/drm/i915/i915_irq.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/gpu/drm/i915/i915_irq.c ++++ b/drivers/gpu/drm/i915/i915_irq.c +@@ -3092,6 +3092,7 @@ static void gen11_hpd_irq_setup(struct d + + val = I915_READ(GEN11_DE_HPD_IMR); + val &= ~hotplug_irqs; ++ val |= ~enabled_irqs & hotplug_irqs; + I915_WRITE(GEN11_DE_HPD_IMR, val); + POSTING_READ(GEN11_DE_HPD_IMR); + diff --git a/queue-5.7/drm-i915-tc-fix-the-reset-of-ln0.patch b/queue-5.7/drm-i915-tc-fix-the-reset-of-ln0.patch new file mode 100644 index 00000000000..a30d68e0c51 --- /dev/null +++ b/queue-5.7/drm-i915-tc-fix-the-reset-of-ln0.patch @@ -0,0 +1,39 @@ +From a43555ac908c604f45ed98628805aec9355b9737 Mon Sep 17 00:00:00 2001 +From: Khaled Almahallawy +Date: Mon, 8 Jun 2020 13:45:37 -0700 +Subject: drm/i915/tc: fix the reset of ln0 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Khaled Almahallawy + +commit a43555ac908c604f45ed98628805aec9355b9737 upstream. + +Setting ln0 similar to ln1 + +Fixes: 3b51be4e4061b ("drm/i915/tc: Update DP_MODE programming") +Cc: # v5.5+ +Signed-off-by: Khaled Almahallawy +Reviewed-by: José Roberto de Souza +Signed-off-by: Imre Deak +Link: https://patchwork.freedesktop.org/patch/msgid/20200608204537.28468-1-khaled.almahallawy@intel.com +(cherry picked from commit 4f72a8ee819d57d7329d88f487a2fc9b45153177) +Signed-off-by: Joonas Lahtinen +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpu/drm/i915/display/intel_ddi.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/gpu/drm/i915/display/intel_ddi.c ++++ b/drivers/gpu/drm/i915/display/intel_ddi.c +@@ -2866,7 +2866,7 @@ icl_program_mg_dp_mode(struct intel_digi + ln1 = intel_de_read(dev_priv, MG_DP_MODE(1, tc_port)); + } + +- ln0 &= ~(MG_DP_MODE_CFG_DP_X1_MODE | MG_DP_MODE_CFG_DP_X1_MODE); ++ ln0 &= ~(MG_DP_MODE_CFG_DP_X1_MODE | MG_DP_MODE_CFG_DP_X2_MODE); + ln1 &= ~(MG_DP_MODE_CFG_DP_X1_MODE | MG_DP_MODE_CFG_DP_X2_MODE); + + /* DPPATC */ diff --git a/queue-5.7/drm-i915-whitelist-context-local-timestamp-in-the-gen9-cmdparser.patch b/queue-5.7/drm-i915-whitelist-context-local-timestamp-in-the-gen9-cmdparser.patch new file mode 100644 index 00000000000..817f5ec047f --- /dev/null +++ b/queue-5.7/drm-i915-whitelist-context-local-timestamp-in-the-gen9-cmdparser.patch @@ -0,0 +1,46 @@ +From 273500ae71711c040d258a7b3f4b6f44c368fff2 Mon Sep 17 00:00:00 2001 +From: Chris Wilson +Date: Mon, 1 Jun 2020 17:19:42 +0100 +Subject: drm/i915: Whitelist context-local timestamp in the gen9 cmdparser + +From: Chris Wilson + +commit 273500ae71711c040d258a7b3f4b6f44c368fff2 upstream. + +Allow batch buffers to read their own _local_ cumulative HW runtime of +their logical context. + +Fixes: 0f2f39758341 ("drm/i915: Add gen9 BCS cmdparsing") +Signed-off-by: Chris Wilson +Cc: Mika Kuoppala +Cc: # v5.4+ +Reviewed-by: Mika Kuoppala +Link: https://patchwork.freedesktop.org/patch/msgid/20200601161942.30854-1-chris@chris-wilson.co.uk +(cherry picked from commit f9496520df11de00fbafc3cbd693b9570d600ab3) +Signed-off-by: Joonas Lahtinen +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpu/drm/i915/i915_cmd_parser.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/drivers/gpu/drm/i915/i915_cmd_parser.c ++++ b/drivers/gpu/drm/i915/i915_cmd_parser.c +@@ -572,6 +572,9 @@ struct drm_i915_reg_descriptor { + #define REG32(_reg, ...) \ + { .addr = (_reg), __VA_ARGS__ } + ++#define REG32_IDX(_reg, idx) \ ++ { .addr = _reg(idx) } ++ + /* + * Convenience macro for adding 64-bit registers. + * +@@ -669,6 +672,7 @@ static const struct drm_i915_reg_descrip + REG64_IDX(RING_TIMESTAMP, BSD_RING_BASE), + REG32(BCS_SWCTRL), + REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE), ++ REG32_IDX(RING_CTX_TIMESTAMP, BLT_RING_BASE), + REG64_IDX(BCS_GPR, 0), + REG64_IDX(BCS_GPR, 1), + REG64_IDX(BCS_GPR, 2), diff --git a/queue-5.7/drm-msm-check-for-powered-down-hw-in-the-devfreq-callbacks.patch b/queue-5.7/drm-msm-check-for-powered-down-hw-in-the-devfreq-callbacks.patch new file mode 100644 index 00000000000..673a20aa6a9 --- /dev/null +++ b/queue-5.7/drm-msm-check-for-powered-down-hw-in-the-devfreq-callbacks.patch @@ -0,0 +1,108 @@ +From eadf79286a4badebc95af7061530bdb50a7e6f38 Mon Sep 17 00:00:00 2001 +From: Jordan Crouse +Date: Fri, 1 May 2020 13:43:26 -0600 +Subject: drm/msm: Check for powered down HW in the devfreq callbacks + +From: Jordan Crouse + +commit eadf79286a4badebc95af7061530bdb50a7e6f38 upstream. + +Writing to the devfreq sysfs nodes while the GPU is powered down can +result in a system crash (on a5xx) or a nasty GMU error (on a6xx): + + $ /sys/class/devfreq/5000000.gpu# echo 500000000 > min_freq + [ 104.841625] platform 506a000.gmu: [drm:a6xx_gmu_set_oob] + *ERROR* Timeout waiting for GMU OOB set GPU_DCVS: 0x0 + +Despite the fact that we carefully try to suspend the devfreq device when +the hardware is powered down there are lots of holes in the governors that +don't check for the suspend state and blindly call into the devfreq +callbacks that end up triggering hardware reads in the GPU driver. + +Call pm_runtime_get_if_in_use() in the gpu_busy() and gpu_set_freq() +callbacks to skip the hardware access if it isn't active. + +v3: Only check pm_runtime_get_if_in_use() for == 0 per Eric Anholt +v2: Use pm_runtime_get_if_in_use() per Eric Anholt + +Cc: stable@vger.kernel.org +Reviewed-by: Eric Anholt +Signed-off-by: Jordan Crouse +Signed-off-by: Rob Clark +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 6 ++++++ + drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 8 ++++++++ + drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 7 +++++++ + 3 files changed, 21 insertions(+) + +--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c ++++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +@@ -1404,6 +1404,10 @@ static unsigned long a5xx_gpu_busy(struc + { + u64 busy_cycles, busy_time; + ++ /* Only read the gpu busy if the hardware is already active */ ++ if (pm_runtime_get_if_in_use(&gpu->pdev->dev) == 0) ++ return 0; ++ + busy_cycles = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO, + REG_A5XX_RBBM_PERFCTR_RBBM_0_HI); + +@@ -1412,6 +1416,8 @@ static unsigned long a5xx_gpu_busy(struc + + gpu->devfreq.busy_cycles = busy_cycles; + ++ pm_runtime_put(&gpu->pdev->dev); ++ + if (WARN_ON(busy_time > ~0LU)) + return ~0LU; + +--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c ++++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c +@@ -108,6 +108,13 @@ static void __a6xx_gmu_set_freq(struct a + struct msm_gpu *gpu = &adreno_gpu->base; + int ret; + ++ /* ++ * This can get called from devfreq while the hardware is idle. Don't ++ * bring up the power if it isn't already active ++ */ ++ if (pm_runtime_get_if_in_use(gmu->dev) == 0) ++ return; ++ + gmu_write(gmu, REG_A6XX_GMU_DCVS_ACK_OPTION, 0); + + gmu_write(gmu, REG_A6XX_GMU_DCVS_PERF_SETTING, +@@ -134,6 +141,7 @@ static void __a6xx_gmu_set_freq(struct a + * for now leave it at max so that the performance is nominal. + */ + icc_set_bw(gpu->icc_path, 0, MBps_to_icc(7216)); ++ pm_runtime_put(gmu->dev); + } + + void a6xx_gmu_set_freq(struct msm_gpu *gpu, unsigned long freq) +--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c ++++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +@@ -810,6 +810,11 @@ static unsigned long a6xx_gpu_busy(struc + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + u64 busy_cycles, busy_time; + ++ ++ /* Only read the gpu busy if the hardware is already active */ ++ if (pm_runtime_get_if_in_use(a6xx_gpu->gmu.dev) == 0) ++ return 0; ++ + busy_cycles = gmu_read64(&a6xx_gpu->gmu, + REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_L, + REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_H); +@@ -819,6 +824,8 @@ static unsigned long a6xx_gpu_busy(struc + + gpu->devfreq.busy_cycles = busy_cycles; + ++ pm_runtime_put(a6xx_gpu->gmu.dev); ++ + if (WARN_ON(busy_time > ~0LU)) + return ~0LU; + diff --git a/queue-5.7/s390-fix-syscall_get_error-for-compat-processes.patch b/queue-5.7/s390-fix-syscall_get_error-for-compat-processes.patch new file mode 100644 index 00000000000..680730b7708 --- /dev/null +++ b/queue-5.7/s390-fix-syscall_get_error-for-compat-processes.patch @@ -0,0 +1,54 @@ +From b3583fca5fb654af2cfc1c08259abb9728272538 Mon Sep 17 00:00:00 2001 +From: "Dmitry V. Levin" +Date: Tue, 2 Jun 2020 21:00:51 +0300 +Subject: s390: fix syscall_get_error for compat processes + +From: Dmitry V. Levin + +commit b3583fca5fb654af2cfc1c08259abb9728272538 upstream. + +If both the tracer and the tracee are compat processes, and gprs[2] +is assigned a value by __poke_user_compat, then the higher 32 bits +of gprs[2] are cleared, IS_ERR_VALUE() always returns false, and +syscall_get_error() always returns 0. + +Fix the implementation by sign-extending the value for compat processes +the same way as x86 implementation does. + +The bug was exposed to user space by commit 201766a20e30f ("ptrace: add +PTRACE_GET_SYSCALL_INFO request") and detected by strace test suite. + +This change fixes strace syscall tampering on s390. + +Link: https://lkml.kernel.org/r/20200602180051.GA2427@altlinux.org +Fixes: 753c4dd6a2fa2 ("[S390] ptrace changes") +Cc: Elvira Khabirova +Cc: stable@vger.kernel.org # v2.6.28+ +Signed-off-by: Dmitry V. Levin +Signed-off-by: Heiko Carstens +Signed-off-by: Vasily Gorbik +Signed-off-by: Greg Kroah-Hartman + +diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h +index f073292e9fdb..d9d5de0f67ff 100644 +--- a/arch/s390/include/asm/syscall.h ++++ b/arch/s390/include/asm/syscall.h +@@ -33,7 +33,17 @@ static inline void syscall_rollback(struct task_struct *task, + static inline long syscall_get_error(struct task_struct *task, + struct pt_regs *regs) + { +- return IS_ERR_VALUE(regs->gprs[2]) ? regs->gprs[2] : 0; ++ unsigned long error = regs->gprs[2]; ++#ifdef CONFIG_COMPAT ++ if (test_tsk_thread_flag(task, TIF_31BIT)) { ++ /* ++ * Sign-extend the value so (int)-EFOO becomes (long)-EFOO ++ * and will match correctly in comparisons. ++ */ ++ error = (long)(int)error; ++ } ++#endif ++ return IS_ERR_VALUE(error) ? error : 0; + } + + static inline long syscall_get_return_value(struct task_struct *task, diff --git a/queue-5.7/series b/queue-5.7/series index 7628161c852..1bba4235d83 100644 --- a/queue-5.7/series +++ b/queue-5.7/series @@ -438,3 +438,20 @@ drm-dp_mst-increase-act-retry-timeout-to-3s.patch net-mlx5-dr-fix-freeing-in-dr_create_rc_qp.patch f2fs-split-f2fs_d_compare-from-f2fs_match_name.patch f2fs-avoid-utf8_strncasecmp-with-unstable-name.patch +s390-fix-syscall_get_error-for-compat-processes.patch +drm-i915-fix-aux-power-domain-toggling-across-typec-mode-resets.patch +drm-msm-check-for-powered-down-hw-in-the-devfreq-callbacks.patch +drm-i915-gem-avoid-iterating-an-empty-list.patch +drm-i915-whitelist-context-local-timestamp-in-the-gen9-cmdparser.patch +drm-connector-notify-userspace-on-hotplug-after-register-complete.patch +drm-amdkfd-use-correct-major-in-devcgroup-check.patch +drm-amd-display-use-kvfree-to-free-coeff-in-build_regamma.patch +drm-i915-icl-fix-hotplug-interrupt-disabling-after-storm-detection.patch +drm-i915-tc-fix-the-reset-of-ln0.patch +drm-i915-gt-incrementally-check-for-rewinding.patch +drm-i915-gt-move-hsw-gt-workarounds-from-init_clock_gating-to-workarounds.patch +drm-i915-gt-move-ivb-gt-workarounds-from-init_clock_gating-to-workarounds.patch +drm-i915-gt-move-snb-gt-workarounds-from-init_clock_gating-to-workarounds.patch +drm-i915-gt-move-ilk-gt-workarounds-from-init_clock_gating-to-workarounds.patch +drm-i915-gt-move-vlv-gt-workarounds-from-init_clock_gating-to-workarounds.patch +drm-i915-gt-move-gen4-gt-workarounds-from-init_clock_gating-to-workarounds.patch -- 2.47.3