From: Greg Kroah-Hartman Date: Thu, 17 Nov 2016 07:15:07 +0000 (+0100) Subject: 4.8-stable patches X-Git-Tag: v4.4.33~10 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=651cd20633819174665e5d0223520966ec8bde7b;p=thirdparty%2Fkernel%2Fstable-queue.git 4.8-stable patches added patches: drm-amd-fix-scheduler-fence-teardown-order-v2.patch drm-amdgpu-fix-sched-fence-slab-teardown.patch xprtrdma-fix-dmar-failure-in-frwr_op_map-after-reconnect.patch xprtrdma-use-complete-instead-complete_all.patch --- diff --git a/queue-4.8/drm-amd-fix-scheduler-fence-teardown-order-v2.patch b/queue-4.8/drm-amd-fix-scheduler-fence-teardown-order-v2.patch new file mode 100644 index 00000000000..b30f562a6a9 --- /dev/null +++ b/queue-4.8/drm-amd-fix-scheduler-fence-teardown-order-v2.patch @@ -0,0 +1,137 @@ +From c24784f01549ecdf23fc00d0588423bcf8956714 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Christian=20K=C3=B6nig?= +Date: Fri, 28 Oct 2016 17:04:07 +0200 +Subject: drm/amd: fix scheduler fence teardown order v2 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Christian König + +commit c24784f01549ecdf23fc00d0588423bcf8956714 upstream. + +Some fences might be alive even after we have stopped the scheduler leading +to warnings about leaked objects from the SLUB allocator. + +Fix this by allocating/freeing the SLUB allocator from the module +init/fini functions just like we do it for hw fences. + +v2: make variable static, add link to bug + +Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=97500 + +Reported-by: Grazvydas Ignotas +Signed-off-by: Christian König +Reviewed-by: Alex Deucher (v1) +Signed-off-by: Alex Deucher +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 ++ + drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 13 ------------- + drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 6 +++--- + drivers/gpu/drm/amd/scheduler/sched_fence.c | 19 +++++++++++++++++++ + 4 files changed, 24 insertions(+), 16 deletions(-) + +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +@@ -605,6 +605,7 @@ static int __init amdgpu_init(void) + { + amdgpu_sync_init(); + amdgpu_fence_slab_init(); ++ amd_sched_fence_slab_init(); + if (vgacon_text_force()) { + DRM_ERROR("VGACON disables amdgpu kernel modesetting.\n"); + return -EINVAL; +@@ -624,6 +625,7 @@ static void __exit amdgpu_exit(void) + drm_pci_exit(driver, pdriver); + amdgpu_unregister_atpx_handler(); + amdgpu_sync_fini(); ++ amd_sched_fence_slab_fini(); + amdgpu_fence_slab_fini(); + } + +--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c ++++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c +@@ -34,9 +34,6 @@ static bool amd_sched_entity_is_ready(st + static void amd_sched_wakeup(struct amd_gpu_scheduler *sched); + static void amd_sched_process_job(struct fence *f, struct fence_cb *cb); + +-struct kmem_cache *sched_fence_slab; +-atomic_t sched_fence_slab_ref = ATOMIC_INIT(0); +- + /* Initialize a given run queue struct */ + static void amd_sched_rq_init(struct amd_sched_rq *rq) + { +@@ -618,13 +615,6 @@ int amd_sched_init(struct amd_gpu_schedu + INIT_LIST_HEAD(&sched->ring_mirror_list); + spin_lock_init(&sched->job_list_lock); + atomic_set(&sched->hw_rq_count, 0); +- if (atomic_inc_return(&sched_fence_slab_ref) == 1) { +- sched_fence_slab = kmem_cache_create( +- "amd_sched_fence", sizeof(struct amd_sched_fence), 0, +- SLAB_HWCACHE_ALIGN, NULL); +- if (!sched_fence_slab) +- return -ENOMEM; +- } + + /* Each scheduler will run on a seperate kernel thread */ + sched->thread = kthread_run(amd_sched_main, sched, sched->name); +@@ -645,7 +635,4 @@ void amd_sched_fini(struct amd_gpu_sched + { + if (sched->thread) + kthread_stop(sched->thread); +- rcu_barrier(); +- if (atomic_dec_and_test(&sched_fence_slab_ref)) +- kmem_cache_destroy(sched_fence_slab); + } +--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h ++++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h +@@ -30,9 +30,6 @@ + struct amd_gpu_scheduler; + struct amd_sched_rq; + +-extern struct kmem_cache *sched_fence_slab; +-extern atomic_t sched_fence_slab_ref; +- + /** + * A scheduler entity is a wrapper around a job queue or a group + * of other entities. Entities take turns emitting jobs from their +@@ -145,6 +142,9 @@ void amd_sched_entity_fini(struct amd_gp + struct amd_sched_entity *entity); + void amd_sched_entity_push_job(struct amd_sched_job *sched_job); + ++int amd_sched_fence_slab_init(void); ++void amd_sched_fence_slab_fini(void); ++ + struct amd_sched_fence *amd_sched_fence_create( + struct amd_sched_entity *s_entity, void *owner); + void amd_sched_fence_scheduled(struct amd_sched_fence *fence); +--- a/drivers/gpu/drm/amd/scheduler/sched_fence.c ++++ b/drivers/gpu/drm/amd/scheduler/sched_fence.c +@@ -27,6 +27,25 @@ + #include + #include "gpu_scheduler.h" + ++static struct kmem_cache *sched_fence_slab; ++ ++int amd_sched_fence_slab_init(void) ++{ ++ sched_fence_slab = kmem_cache_create( ++ "amd_sched_fence", sizeof(struct amd_sched_fence), 0, ++ SLAB_HWCACHE_ALIGN, NULL); ++ if (!sched_fence_slab) ++ return -ENOMEM; ++ ++ return 0; ++} ++ ++void amd_sched_fence_slab_fini(void) ++{ ++ rcu_barrier(); ++ kmem_cache_destroy(sched_fence_slab); ++} ++ + struct amd_sched_fence *amd_sched_fence_create(struct amd_sched_entity *entity, + void *owner) + { diff --git a/queue-4.8/drm-amdgpu-fix-sched-fence-slab-teardown.patch b/queue-4.8/drm-amdgpu-fix-sched-fence-slab-teardown.patch new file mode 100644 index 00000000000..e0b0864e05f --- /dev/null +++ b/queue-4.8/drm-amdgpu-fix-sched-fence-slab-teardown.patch @@ -0,0 +1,42 @@ +From a053fb7e512c77f0742bceb578b10025256e1911 Mon Sep 17 00:00:00 2001 +From: Grazvydas Ignotas +Date: Sun, 23 Oct 2016 21:31:44 +0300 +Subject: drm/amdgpu: fix sched fence slab teardown +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Grazvydas Ignotas + +commit a053fb7e512c77f0742bceb578b10025256e1911 upstream. + +To free fences, call_rcu() is used, which calls amd_sched_fence_free() +after a grace period. During teardown, there is no guarantee all +callbacks have finished, so sched_fence_slab may be destroyed before +all fences have been freed. If we are lucky, this results in some slab +warnings, if not, we get a crash in one of rcu threads because callback +is called after amdgpu has already been unloaded. + +Fix it with a rcu_barrier(). + +Fixes: 189e0fb76304 ("drm/amdgpu: RCU protected amd_sched_fence_release") +Acked-by: Chunming Zhou +Reviewed-by: Christian König +Signed-off-by: Grazvydas Ignotas +Signed-off-by: Alex Deucher +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c ++++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c +@@ -645,6 +645,7 @@ void amd_sched_fini(struct amd_gpu_sched + { + if (sched->thread) + kthread_stop(sched->thread); ++ rcu_barrier(); + if (atomic_dec_and_test(&sched_fence_slab_ref)) + kmem_cache_destroy(sched_fence_slab); + } diff --git a/queue-4.8/series b/queue-4.8/series index 0cc4d78d29b..34ac9437020 100644 --- a/queue-4.8/series +++ b/queue-4.8/series @@ -46,3 +46,7 @@ drm-i915-dp-extend-bdw-dp-audio-workaround-to-gen9-platforms.patch drm-amdgpu-disable-runtime-pm-in-certain-cases.patch drm-amdgpu-fix-crash-in-acp_hw_fini.patch tty-serial-at91-fix-hardware-handshake-on-atmel-platforms.patch +drm-amdgpu-fix-sched-fence-slab-teardown.patch +drm-amd-fix-scheduler-fence-teardown-order-v2.patch +xprtrdma-use-complete-instead-complete_all.patch +xprtrdma-fix-dmar-failure-in-frwr_op_map-after-reconnect.patch diff --git a/queue-4.8/xprtrdma-fix-dmar-failure-in-frwr_op_map-after-reconnect.patch b/queue-4.8/xprtrdma-fix-dmar-failure-in-frwr_op_map-after-reconnect.patch new file mode 100644 index 00000000000..ff2f255eb76 --- /dev/null +++ b/queue-4.8/xprtrdma-fix-dmar-failure-in-frwr_op_map-after-reconnect.patch @@ -0,0 +1,138 @@ +From 62bdf94a2049822ef8c6d4b0e83cd9c3a1663ab4 Mon Sep 17 00:00:00 2001 +From: Chuck Lever +Date: Mon, 7 Nov 2016 16:16:24 -0500 +Subject: xprtrdma: Fix DMAR failure in frwr_op_map() after reconnect + +From: Chuck Lever + +commit 62bdf94a2049822ef8c6d4b0e83cd9c3a1663ab4 upstream. + +When a LOCALINV WR is flushed, the frmr is marked STALE, then +frwr_op_unmap_sync DMA-unmaps the frmr's SGL. These STALE frmrs +are then recovered when frwr_op_map hunts for an INVALID frmr to +use. + +All other cases that need frmr recovery leave that SGL DMA-mapped. +The FRMR recovery path unconditionally DMA-unmaps the frmr's SGL. + +To avoid DMA unmapping the SGL twice for flushed LOCAL_INV WRs, +alter the recovery logic (rather than the hot frwr_op_unmap_sync +path) to distinguish among these cases. This solution also takes +care of the case where multiple LOCAL_INV WRs are issued for the +same rpcrdma_req, some complete successfully, but some are flushed. + +Reported-by: Vasco Steinmetz +Signed-off-by: Chuck Lever +Tested-by: Vasco Steinmetz +Signed-off-by: Anna Schumaker +Signed-off-by: Greg Kroah-Hartman + +--- + net/sunrpc/xprtrdma/frwr_ops.c | 37 ++++++++++++++++++++++--------------- + net/sunrpc/xprtrdma/xprt_rdma.h | 3 ++- + 2 files changed, 24 insertions(+), 16 deletions(-) + +--- a/net/sunrpc/xprtrdma/frwr_ops.c ++++ b/net/sunrpc/xprtrdma/frwr_ops.c +@@ -44,18 +44,20 @@ + * being done. + * + * When the underlying transport disconnects, MRs are left in one of +- * three states: ++ * four states: + * + * INVALID: The MR was not in use before the QP entered ERROR state. +- * (Or, the LOCAL_INV WR has not completed or flushed yet). +- * +- * STALE: The MR was being registered or unregistered when the QP +- * entered ERROR state, and the pending WR was flushed. + * + * VALID: The MR was registered before the QP entered ERROR state. + * +- * When frwr_op_map encounters STALE and VALID MRs, they are recovered +- * with ib_dereg_mr and then are re-initialized. Beause MR recovery ++ * FLUSHED_FR: The MR was being registered when the QP entered ERROR ++ * state, and the pending WR was flushed. ++ * ++ * FLUSHED_LI: The MR was being invalidated when the QP entered ERROR ++ * state, and the pending WR was flushed. ++ * ++ * When frwr_op_map encounters FLUSHED and VALID MRs, they are recovered ++ * with ib_dereg_mr and then are re-initialized. Because MR recovery + * allocates fresh resources, it is deferred to a workqueue, and the + * recovered MRs are placed back on the rb_mws list when recovery is + * complete. frwr_op_map allocates another MR for the current RPC while +@@ -175,12 +177,15 @@ __frwr_reset_mr(struct rpcrdma_ia *ia, s + static void + frwr_op_recover_mr(struct rpcrdma_mw *mw) + { ++ enum rpcrdma_frmr_state state = mw->frmr.fr_state; + struct rpcrdma_xprt *r_xprt = mw->mw_xprt; + struct rpcrdma_ia *ia = &r_xprt->rx_ia; + int rc; + + rc = __frwr_reset_mr(ia, mw); +- ib_dma_unmap_sg(ia->ri_device, mw->mw_sg, mw->mw_nents, mw->mw_dir); ++ if (state != FRMR_FLUSHED_LI) ++ ib_dma_unmap_sg(ia->ri_device, ++ mw->mw_sg, mw->mw_nents, mw->mw_dir); + if (rc) + goto out_release; + +@@ -261,10 +266,8 @@ frwr_op_maxpages(struct rpcrdma_xprt *r_ + } + + static void +-__frwr_sendcompletion_flush(struct ib_wc *wc, struct rpcrdma_frmr *frmr, +- const char *wr) ++__frwr_sendcompletion_flush(struct ib_wc *wc, const char *wr) + { +- frmr->fr_state = FRMR_IS_STALE; + if (wc->status != IB_WC_WR_FLUSH_ERR) + pr_err("rpcrdma: %s: %s (%u/0x%x)\n", + wr, ib_wc_status_msg(wc->status), +@@ -287,7 +290,8 @@ frwr_wc_fastreg(struct ib_cq *cq, struct + if (wc->status != IB_WC_SUCCESS) { + cqe = wc->wr_cqe; + frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe); +- __frwr_sendcompletion_flush(wc, frmr, "fastreg"); ++ frmr->fr_state = FRMR_FLUSHED_FR; ++ __frwr_sendcompletion_flush(wc, "fastreg"); + } + } + +@@ -307,7 +311,8 @@ frwr_wc_localinv(struct ib_cq *cq, struc + if (wc->status != IB_WC_SUCCESS) { + cqe = wc->wr_cqe; + frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe); +- __frwr_sendcompletion_flush(wc, frmr, "localinv"); ++ frmr->fr_state = FRMR_FLUSHED_LI; ++ __frwr_sendcompletion_flush(wc, "localinv"); + } + } + +@@ -327,8 +332,10 @@ frwr_wc_localinv_wake(struct ib_cq *cq, + /* WARNING: Only wr_cqe and status are reliable at this point */ + cqe = wc->wr_cqe; + frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe); +- if (wc->status != IB_WC_SUCCESS) +- __frwr_sendcompletion_flush(wc, frmr, "localinv"); ++ if (wc->status != IB_WC_SUCCESS) { ++ frmr->fr_state = FRMR_FLUSHED_LI; ++ __frwr_sendcompletion_flush(wc, "localinv"); ++ } + complete(&frmr->fr_linv_done); + } + +--- a/net/sunrpc/xprtrdma/xprt_rdma.h ++++ b/net/sunrpc/xprtrdma/xprt_rdma.h +@@ -207,7 +207,8 @@ struct rpcrdma_rep { + enum rpcrdma_frmr_state { + FRMR_IS_INVALID, /* ready to be used */ + FRMR_IS_VALID, /* in use */ +- FRMR_IS_STALE, /* failed completion */ ++ FRMR_FLUSHED_FR, /* flushed FASTREG WR */ ++ FRMR_FLUSHED_LI, /* flushed LOCALINV WR */ + }; + + struct rpcrdma_frmr { diff --git a/queue-4.8/xprtrdma-use-complete-instead-complete_all.patch b/queue-4.8/xprtrdma-use-complete-instead-complete_all.patch new file mode 100644 index 00000000000..75d9b624c13 --- /dev/null +++ b/queue-4.8/xprtrdma-use-complete-instead-complete_all.patch @@ -0,0 +1,49 @@ +From 5690a22d8612e1788b48b4ea53c59868589cd2db Mon Sep 17 00:00:00 2001 +From: Daniel Wagner +Date: Fri, 23 Sep 2016 10:41:57 +0200 +Subject: xprtrdma: use complete() instead complete_all() + +From: Daniel Wagner + +commit 5690a22d8612e1788b48b4ea53c59868589cd2db upstream. + +There is only one waiter for the completion, therefore there +is no need to use complete_all(). Let's make that clear by +using complete() instead of complete_all(). + +The usage pattern of the completion is: + +waiter context waker context + +frwr_op_unmap_sync() + reinit_completion() + ib_post_send() + wait_for_completion() + + frwr_wc_localinv_wake() + complete() + +Signed-off-by: Daniel Wagner +Cc: Anna Schumaker +Cc: Trond Myklebust +Cc: Chuck Lever +Cc: linux-nfs@vger.kernel.org +Cc: netdev@vger.kernel.org +Signed-off-by: Anna Schumaker +Signed-off-by: Greg Kroah-Hartman + +--- + net/sunrpc/xprtrdma/frwr_ops.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/sunrpc/xprtrdma/frwr_ops.c ++++ b/net/sunrpc/xprtrdma/frwr_ops.c +@@ -329,7 +329,7 @@ frwr_wc_localinv_wake(struct ib_cq *cq, + frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe); + if (wc->status != IB_WC_SUCCESS) + __frwr_sendcompletion_flush(wc, frmr, "localinv"); +- complete_all(&frmr->fr_linv_done); ++ complete(&frmr->fr_linv_done); + } + + /* Post a REG_MR Work Request to register a memory region