4.8-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Thu, 17 Nov 2016 07:15:07 +0000 (08:15 +0100)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Thu, 17 Nov 2016 07:15:07 +0000 (08:15 +0100)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 17 Nov 2016 07:15:07 +0000 (08:15 +0100)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 17 Nov 2016 07:15:07 +0000 (08:15 +0100)
diff --git a/queue-4.8/drm-amd-fix-scheduler-fence-teardown-order-v2.patch b/queue-4.8/drm-amd-fix-scheduler-fence-teardown-order-v2.patch

new file mode 100644 (file)

index 0000000..b30f562
--- /dev/null
+++ b/queue-4.8/drm-amd-fix-scheduler-fence-teardown-order-v2.patch
@@ -0,0 +1,137 @@
+From c24784f01549ecdf23fc00d0588423bcf8956714 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
+Date: Fri, 28 Oct 2016 17:04:07 +0200
+Subject: drm/amd: fix scheduler fence teardown order v2
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Christian König <christian.koenig@amd.com>
+
+commit c24784f01549ecdf23fc00d0588423bcf8956714 upstream.
+
+Some fences might be alive even after we have stopped the scheduler leading
+to warnings about leaked objects from the SLUB allocator.
+
+Fix this by allocating/freeing the SLUB allocator from the module
+init/fini functions just like we do it for hw fences.
+
+v2: make variable static, add link to bug
+
+Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=97500
+
+Reported-by: Grazvydas Ignotas <notasas@gmail.com>
+Signed-off-by: Christian König <christian.koenig@amd.com>
+Reviewed-by: Alex Deucher <alexander.deucher@amd.com> (v1)
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c       |    2 ++
+ drivers/gpu/drm/amd/scheduler/gpu_scheduler.c |   13 -------------
+ drivers/gpu/drm/amd/scheduler/gpu_scheduler.h |    6 +++---
+ drivers/gpu/drm/amd/scheduler/sched_fence.c   |   19 +++++++++++++++++++
+ 4 files changed, 24 insertions(+), 16 deletions(-)
+
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+@@ -605,6 +605,7 @@ static int __init amdgpu_init(void)
+ {
+       amdgpu_sync_init();
+       amdgpu_fence_slab_init();
++      amd_sched_fence_slab_init();
+       if (vgacon_text_force()) {
+               DRM_ERROR("VGACON disables amdgpu kernel modesetting.\n");
+               return -EINVAL;
+@@ -624,6 +625,7 @@ static void __exit amdgpu_exit(void)
+       drm_pci_exit(driver, pdriver);
+       amdgpu_unregister_atpx_handler();
+       amdgpu_sync_fini();
++      amd_sched_fence_slab_fini();
+       amdgpu_fence_slab_fini();
+ }
+ 
+--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
++++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+@@ -34,9 +34,6 @@ static bool amd_sched_entity_is_ready(st
+ static void amd_sched_wakeup(struct amd_gpu_scheduler *sched);
+ static void amd_sched_process_job(struct fence *f, struct fence_cb *cb);
+ 
+-struct kmem_cache *sched_fence_slab;
+-atomic_t sched_fence_slab_ref = ATOMIC_INIT(0);
+-
+ /* Initialize a given run queue struct */
+ static void amd_sched_rq_init(struct amd_sched_rq *rq)
+ {
+@@ -618,13 +615,6 @@ int amd_sched_init(struct amd_gpu_schedu
+       INIT_LIST_HEAD(&sched->ring_mirror_list);
+       spin_lock_init(&sched->job_list_lock);
+       atomic_set(&sched->hw_rq_count, 0);
+-      if (atomic_inc_return(&sched_fence_slab_ref) == 1) {
+-              sched_fence_slab = kmem_cache_create(
+-                      "amd_sched_fence", sizeof(struct amd_sched_fence), 0,
+-                      SLAB_HWCACHE_ALIGN, NULL);
+-              if (!sched_fence_slab)
+-                      return -ENOMEM;
+-      }
+ 
+       /* Each scheduler will run on a seperate kernel thread */
+       sched->thread = kthread_run(amd_sched_main, sched, sched->name);
+@@ -645,7 +635,4 @@ void amd_sched_fini(struct amd_gpu_sched
+ {
+       if (sched->thread)
+               kthread_stop(sched->thread);
+-      rcu_barrier();
+-      if (atomic_dec_and_test(&sched_fence_slab_ref))
+-              kmem_cache_destroy(sched_fence_slab);
+ }
+--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
++++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
+@@ -30,9 +30,6 @@
+ struct amd_gpu_scheduler;
+ struct amd_sched_rq;
+ 
+-extern struct kmem_cache *sched_fence_slab;
+-extern atomic_t sched_fence_slab_ref;
+-
+ /**
+  * A scheduler entity is a wrapper around a job queue or a group
+  * of other entities. Entities take turns emitting jobs from their
+@@ -145,6 +142,9 @@ void amd_sched_entity_fini(struct amd_gp
+                          struct amd_sched_entity *entity);
+ void amd_sched_entity_push_job(struct amd_sched_job *sched_job);
+ 
++int amd_sched_fence_slab_init(void);
++void amd_sched_fence_slab_fini(void);
++
+ struct amd_sched_fence *amd_sched_fence_create(
+       struct amd_sched_entity *s_entity, void *owner);
+ void amd_sched_fence_scheduled(struct amd_sched_fence *fence);
+--- a/drivers/gpu/drm/amd/scheduler/sched_fence.c
++++ b/drivers/gpu/drm/amd/scheduler/sched_fence.c
+@@ -27,6 +27,25 @@
+ #include <drm/drmP.h>
+ #include "gpu_scheduler.h"
+ 
++static struct kmem_cache *sched_fence_slab;
++
++int amd_sched_fence_slab_init(void)
++{
++      sched_fence_slab = kmem_cache_create(
++              "amd_sched_fence", sizeof(struct amd_sched_fence), 0,
++              SLAB_HWCACHE_ALIGN, NULL);
++      if (!sched_fence_slab)
++              return -ENOMEM;
++
++      return 0;
++}
++
++void amd_sched_fence_slab_fini(void)
++{
++      rcu_barrier();
++      kmem_cache_destroy(sched_fence_slab);
++}
++
+ struct amd_sched_fence *amd_sched_fence_create(struct amd_sched_entity *entity,
+                                              void *owner)
+ {
diff --git a/queue-4.8/drm-amdgpu-fix-sched-fence-slab-teardown.patch b/queue-4.8/drm-amdgpu-fix-sched-fence-slab-teardown.patch

new file mode 100644 (file)

index 0000000..e0b0864
--- /dev/null
+++ b/queue-4.8/drm-amdgpu-fix-sched-fence-slab-teardown.patch
@@ -0,0 +1,42 @@
+From a053fb7e512c77f0742bceb578b10025256e1911 Mon Sep 17 00:00:00 2001
+From: Grazvydas Ignotas <notasas@gmail.com>
+Date: Sun, 23 Oct 2016 21:31:44 +0300
+Subject: drm/amdgpu: fix sched fence slab teardown
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Grazvydas Ignotas <notasas@gmail.com>
+
+commit a053fb7e512c77f0742bceb578b10025256e1911 upstream.
+
+To free fences, call_rcu() is used, which calls amd_sched_fence_free()
+after a grace period. During teardown, there is no guarantee all
+callbacks have finished, so sched_fence_slab may be destroyed before
+all fences have been freed. If we are lucky, this results in some slab
+warnings, if not, we get a crash in one of rcu threads because callback
+is called after amdgpu has already been unloaded.
+
+Fix it with a rcu_barrier().
+
+Fixes: 189e0fb76304 ("drm/amdgpu: RCU protected amd_sched_fence_release")
+Acked-by: Chunming Zhou <david1.zhou@amd.com>
+Reviewed-by: Christian König <christian.koenig@amd.com>
+Signed-off-by: Grazvydas Ignotas <notasas@gmail.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/gpu/drm/amd/scheduler/gpu_scheduler.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
++++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+@@ -645,6 +645,7 @@ void amd_sched_fini(struct amd_gpu_sched
+ {
+       if (sched->thread)
+               kthread_stop(sched->thread);
++      rcu_barrier();
+       if (atomic_dec_and_test(&sched_fence_slab_ref))
+               kmem_cache_destroy(sched_fence_slab);
+ }
diff --git a/queue-4.8/series b/queue-4.8/series

index 0cc4d78d29b33b04d961f4d4b6b78d387afdf6e9..34ac9437020b74176aaf45e039494ff96d6031cf 100644 (file)
--- a/queue-4.8/series
+++ b/queue-4.8/series
@@ -46,3 +46,7 @@ drm-i915-dp-extend-bdw-dp-audio-workaround-to-gen9-platforms.patch
  drm-amdgpu-disable-runtime-pm-in-certain-cases.patch
  drm-amdgpu-fix-crash-in-acp_hw_fini.patch
  tty-serial-at91-fix-hardware-handshake-on-atmel-platforms.patch
+drm-amdgpu-fix-sched-fence-slab-teardown.patch
+drm-amd-fix-scheduler-fence-teardown-order-v2.patch
+xprtrdma-use-complete-instead-complete_all.patch
+xprtrdma-fix-dmar-failure-in-frwr_op_map-after-reconnect.patch
diff --git a/queue-4.8/xprtrdma-fix-dmar-failure-in-frwr_op_map-after-reconnect.patch b/queue-4.8/xprtrdma-fix-dmar-failure-in-frwr_op_map-after-reconnect.patch

new file mode 100644 (file)

index 0000000..ff2f255
--- /dev/null
+++ b/queue-4.8/xprtrdma-fix-dmar-failure-in-frwr_op_map-after-reconnect.patch
@@ -0,0 +1,138 @@
+From 62bdf94a2049822ef8c6d4b0e83cd9c3a1663ab4 Mon Sep 17 00:00:00 2001
+From: Chuck Lever <chuck.lever@oracle.com>
+Date: Mon, 7 Nov 2016 16:16:24 -0500
+Subject: xprtrdma: Fix DMAR failure in frwr_op_map() after reconnect
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+commit 62bdf94a2049822ef8c6d4b0e83cd9c3a1663ab4 upstream.
+
+When a LOCALINV WR is flushed, the frmr is marked STALE, then
+frwr_op_unmap_sync DMA-unmaps the frmr's SGL. These STALE frmrs
+are then recovered when frwr_op_map hunts for an INVALID frmr to
+use.
+
+All other cases that need frmr recovery leave that SGL DMA-mapped.
+The FRMR recovery path unconditionally DMA-unmaps the frmr's SGL.
+
+To avoid DMA unmapping the SGL twice for flushed LOCAL_INV WRs,
+alter the recovery logic (rather than the hot frwr_op_unmap_sync
+path) to distinguish among these cases. This solution also takes
+care of the case where multiple LOCAL_INV WRs are issued for the
+same rpcrdma_req, some complete successfully, but some are flushed.
+
+Reported-by: Vasco Steinmetz <linux@kyberraum.net>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Tested-by: Vasco Steinmetz <linux@kyberraum.net>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/sunrpc/xprtrdma/frwr_ops.c  |   37 ++++++++++++++++++++++---------------
+ net/sunrpc/xprtrdma/xprt_rdma.h |    3 ++-
+ 2 files changed, 24 insertions(+), 16 deletions(-)
+
+--- a/net/sunrpc/xprtrdma/frwr_ops.c
++++ b/net/sunrpc/xprtrdma/frwr_ops.c
+@@ -44,18 +44,20 @@
+  * being done.
+  *
+  * When the underlying transport disconnects, MRs are left in one of
+- * three states:
++ * four states:
+  *
+  * INVALID:   The MR was not in use before the QP entered ERROR state.
+- *            (Or, the LOCAL_INV WR has not completed or flushed yet).
+- *
+- * STALE:     The MR was being registered or unregistered when the QP
+- *            entered ERROR state, and the pending WR was flushed.
+  *
+  * VALID:     The MR was registered before the QP entered ERROR state.
+  *
+- * When frwr_op_map encounters STALE and VALID MRs, they are recovered
+- * with ib_dereg_mr and then are re-initialized. Beause MR recovery
++ * FLUSHED_FR:        The MR was being registered when the QP entered ERROR
++ *            state, and the pending WR was flushed.
++ *
++ * FLUSHED_LI:        The MR was being invalidated when the QP entered ERROR
++ *            state, and the pending WR was flushed.
++ *
++ * When frwr_op_map encounters FLUSHED and VALID MRs, they are recovered
++ * with ib_dereg_mr and then are re-initialized. Because MR recovery
+  * allocates fresh resources, it is deferred to a workqueue, and the
+  * recovered MRs are placed back on the rb_mws list when recovery is
+  * complete. frwr_op_map allocates another MR for the current RPC while
+@@ -175,12 +177,15 @@ __frwr_reset_mr(struct rpcrdma_ia *ia, s
+ static void
+ frwr_op_recover_mr(struct rpcrdma_mw *mw)
+ {
++      enum rpcrdma_frmr_state state = mw->frmr.fr_state;
+       struct rpcrdma_xprt *r_xprt = mw->mw_xprt;
+       struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+       int rc;
+ 
+       rc = __frwr_reset_mr(ia, mw);
+-      ib_dma_unmap_sg(ia->ri_device, mw->mw_sg, mw->mw_nents, mw->mw_dir);
++      if (state != FRMR_FLUSHED_LI)
++              ib_dma_unmap_sg(ia->ri_device,
++                              mw->mw_sg, mw->mw_nents, mw->mw_dir);
+       if (rc)
+               goto out_release;
+ 
+@@ -261,10 +266,8 @@ frwr_op_maxpages(struct rpcrdma_xprt *r_
+ }
+ 
+ static void
+-__frwr_sendcompletion_flush(struct ib_wc *wc, struct rpcrdma_frmr *frmr,
+-                          const char *wr)
++__frwr_sendcompletion_flush(struct ib_wc *wc, const char *wr)
+ {
+-      frmr->fr_state = FRMR_IS_STALE;
+       if (wc->status != IB_WC_WR_FLUSH_ERR)
+               pr_err("rpcrdma: %s: %s (%u/0x%x)\n",
+                      wr, ib_wc_status_msg(wc->status),
+@@ -287,7 +290,8 @@ frwr_wc_fastreg(struct ib_cq *cq, struct
+       if (wc->status != IB_WC_SUCCESS) {
+               cqe = wc->wr_cqe;
+               frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
+-              __frwr_sendcompletion_flush(wc, frmr, "fastreg");
++              frmr->fr_state = FRMR_FLUSHED_FR;
++              __frwr_sendcompletion_flush(wc, "fastreg");
+       }
+ }
+ 
+@@ -307,7 +311,8 @@ frwr_wc_localinv(struct ib_cq *cq, struc
+       if (wc->status != IB_WC_SUCCESS) {
+               cqe = wc->wr_cqe;
+               frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
+-              __frwr_sendcompletion_flush(wc, frmr, "localinv");
++              frmr->fr_state = FRMR_FLUSHED_LI;
++              __frwr_sendcompletion_flush(wc, "localinv");
+       }
+ }
+ 
+@@ -327,8 +332,10 @@ frwr_wc_localinv_wake(struct ib_cq *cq,
+       /* WARNING: Only wr_cqe and status are reliable at this point */
+       cqe = wc->wr_cqe;
+       frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
+-      if (wc->status != IB_WC_SUCCESS)
+-              __frwr_sendcompletion_flush(wc, frmr, "localinv");
++      if (wc->status != IB_WC_SUCCESS) {
++              frmr->fr_state = FRMR_FLUSHED_LI;
++              __frwr_sendcompletion_flush(wc, "localinv");
++      }
+       complete(&frmr->fr_linv_done);
+ }
+ 
+--- a/net/sunrpc/xprtrdma/xprt_rdma.h
++++ b/net/sunrpc/xprtrdma/xprt_rdma.h
+@@ -207,7 +207,8 @@ struct rpcrdma_rep {
+ enum rpcrdma_frmr_state {
+       FRMR_IS_INVALID,        /* ready to be used */
+       FRMR_IS_VALID,          /* in use */
+-      FRMR_IS_STALE,          /* failed completion */
++      FRMR_FLUSHED_FR,        /* flushed FASTREG WR */
++      FRMR_FLUSHED_LI,        /* flushed LOCALINV WR */
+ };
+ 
+ struct rpcrdma_frmr {
diff --git a/queue-4.8/xprtrdma-use-complete-instead-complete_all.patch b/queue-4.8/xprtrdma-use-complete-instead-complete_all.patch

new file mode 100644 (file)

index 0000000..75d9b62
--- /dev/null
+++ b/queue-4.8/xprtrdma-use-complete-instead-complete_all.patch
@@ -0,0 +1,49 @@
+From 5690a22d8612e1788b48b4ea53c59868589cd2db Mon Sep 17 00:00:00 2001
+From: Daniel Wagner <daniel.wagner@bmw-carit.de>
+Date: Fri, 23 Sep 2016 10:41:57 +0200
+Subject: xprtrdma: use complete() instead complete_all()
+
+From: Daniel Wagner <daniel.wagner@bmw-carit.de>
+
+commit 5690a22d8612e1788b48b4ea53c59868589cd2db upstream.
+
+There is only one waiter for the completion, therefore there
+is no need to use complete_all(). Let's make that clear by
+using complete() instead of complete_all().
+
+The usage pattern of the completion is:
+
+waiter context                          waker context
+
+frwr_op_unmap_sync()
+  reinit_completion()
+  ib_post_send()
+  wait_for_completion()
+
+                                       frwr_wc_localinv_wake()
+                                         complete()
+
+Signed-off-by: Daniel Wagner <daniel.wagner@bmw-carit.de>
+Cc: Anna Schumaker <Anna.Schumaker@Netapp.com>
+Cc: Trond Myklebust <trond.myklebust@primarydata.com>
+Cc: Chuck Lever <chuck.lever@oracle.com>
+Cc: linux-nfs@vger.kernel.org
+Cc: netdev@vger.kernel.org
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/sunrpc/xprtrdma/frwr_ops.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/sunrpc/xprtrdma/frwr_ops.c
++++ b/net/sunrpc/xprtrdma/frwr_ops.c
+@@ -329,7 +329,7 @@ frwr_wc_localinv_wake(struct ib_cq *cq,
+       frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
+       if (wc->status != IB_WC_SUCCESS)
+               __frwr_sendcompletion_flush(wc, frmr, "localinv");
+-      complete_all(&frmr->fr_linv_done);
++      complete(&frmr->fr_linv_done);
+ }
+ 
+ /* Post a REG_MR Work Request to register a memory region
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Thu, 17 Nov 2016 07:15:07 +0000 (08:15 +0100)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Thu, 17 Nov 2016 07:15:07 +0000 (08:15 +0100)
queue-4.8/drm-amd-fix-scheduler-fence-teardown-order-v2.patch	[new file with mode: 0644]	patch \| blob
queue-4.8/drm-amdgpu-fix-sched-fence-slab-teardown.patch	[new file with mode: 0644]	patch \| blob
queue-4.8/series		patch \| blob \| blame \| history
queue-4.8/xprtrdma-fix-dmar-failure-in-frwr_op_map-after-reconnect.patch	[new file with mode: 0644]	patch \| blob
queue-4.8/xprtrdma-use-complete-instead-complete_all.patch	[new file with mode: 0644]	patch \| blob