--- /dev/null
+From c24784f01549ecdf23fc00d0588423bcf8956714 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
+Date: Fri, 28 Oct 2016 17:04:07 +0200
+Subject: drm/amd: fix scheduler fence teardown order v2
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Christian König <christian.koenig@amd.com>
+
+commit c24784f01549ecdf23fc00d0588423bcf8956714 upstream.
+
+Some fences might be alive even after we have stopped the scheduler leading
+to warnings about leaked objects from the SLUB allocator.
+
+Fix this by allocating/freeing the SLUB allocator from the module
+init/fini functions just like we do it for hw fences.
+
+v2: make variable static, add link to bug
+
+Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=97500
+
+Reported-by: Grazvydas Ignotas <notasas@gmail.com>
+Signed-off-by: Christian König <christian.koenig@amd.com>
+Reviewed-by: Alex Deucher <alexander.deucher@amd.com> (v1)
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 ++
+ drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 13 -------------
+ drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 6 +++---
+ drivers/gpu/drm/amd/scheduler/sched_fence.c | 19 +++++++++++++++++++
+ 4 files changed, 24 insertions(+), 16 deletions(-)
+
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+@@ -605,6 +605,7 @@ static int __init amdgpu_init(void)
+ {
+ amdgpu_sync_init();
+ amdgpu_fence_slab_init();
++ amd_sched_fence_slab_init();
+ if (vgacon_text_force()) {
+ DRM_ERROR("VGACON disables amdgpu kernel modesetting.\n");
+ return -EINVAL;
+@@ -624,6 +625,7 @@ static void __exit amdgpu_exit(void)
+ drm_pci_exit(driver, pdriver);
+ amdgpu_unregister_atpx_handler();
+ amdgpu_sync_fini();
++ amd_sched_fence_slab_fini();
+ amdgpu_fence_slab_fini();
+ }
+
+--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
++++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+@@ -34,9 +34,6 @@ static bool amd_sched_entity_is_ready(st
+ static void amd_sched_wakeup(struct amd_gpu_scheduler *sched);
+ static void amd_sched_process_job(struct fence *f, struct fence_cb *cb);
+
+-struct kmem_cache *sched_fence_slab;
+-atomic_t sched_fence_slab_ref = ATOMIC_INIT(0);
+-
+ /* Initialize a given run queue struct */
+ static void amd_sched_rq_init(struct amd_sched_rq *rq)
+ {
+@@ -618,13 +615,6 @@ int amd_sched_init(struct amd_gpu_schedu
+ INIT_LIST_HEAD(&sched->ring_mirror_list);
+ spin_lock_init(&sched->job_list_lock);
+ atomic_set(&sched->hw_rq_count, 0);
+- if (atomic_inc_return(&sched_fence_slab_ref) == 1) {
+- sched_fence_slab = kmem_cache_create(
+- "amd_sched_fence", sizeof(struct amd_sched_fence), 0,
+- SLAB_HWCACHE_ALIGN, NULL);
+- if (!sched_fence_slab)
+- return -ENOMEM;
+- }
+
+ /* Each scheduler will run on a seperate kernel thread */
+ sched->thread = kthread_run(amd_sched_main, sched, sched->name);
+@@ -645,7 +635,4 @@ void amd_sched_fini(struct amd_gpu_sched
+ {
+ if (sched->thread)
+ kthread_stop(sched->thread);
+- rcu_barrier();
+- if (atomic_dec_and_test(&sched_fence_slab_ref))
+- kmem_cache_destroy(sched_fence_slab);
+ }
+--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
++++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
+@@ -30,9 +30,6 @@
+ struct amd_gpu_scheduler;
+ struct amd_sched_rq;
+
+-extern struct kmem_cache *sched_fence_slab;
+-extern atomic_t sched_fence_slab_ref;
+-
+ /**
+ * A scheduler entity is a wrapper around a job queue or a group
+ * of other entities. Entities take turns emitting jobs from their
+@@ -145,6 +142,9 @@ void amd_sched_entity_fini(struct amd_gp
+ struct amd_sched_entity *entity);
+ void amd_sched_entity_push_job(struct amd_sched_job *sched_job);
+
++int amd_sched_fence_slab_init(void);
++void amd_sched_fence_slab_fini(void);
++
+ struct amd_sched_fence *amd_sched_fence_create(
+ struct amd_sched_entity *s_entity, void *owner);
+ void amd_sched_fence_scheduled(struct amd_sched_fence *fence);
+--- a/drivers/gpu/drm/amd/scheduler/sched_fence.c
++++ b/drivers/gpu/drm/amd/scheduler/sched_fence.c
+@@ -27,6 +27,25 @@
+ #include <drm/drmP.h>
+ #include "gpu_scheduler.h"
+
++static struct kmem_cache *sched_fence_slab;
++
++int amd_sched_fence_slab_init(void)
++{
++ sched_fence_slab = kmem_cache_create(
++ "amd_sched_fence", sizeof(struct amd_sched_fence), 0,
++ SLAB_HWCACHE_ALIGN, NULL);
++ if (!sched_fence_slab)
++ return -ENOMEM;
++
++ return 0;
++}
++
++void amd_sched_fence_slab_fini(void)
++{
++ rcu_barrier();
++ kmem_cache_destroy(sched_fence_slab);
++}
++
+ struct amd_sched_fence *amd_sched_fence_create(struct amd_sched_entity *entity,
+ void *owner)
+ {
--- /dev/null
+From a053fb7e512c77f0742bceb578b10025256e1911 Mon Sep 17 00:00:00 2001
+From: Grazvydas Ignotas <notasas@gmail.com>
+Date: Sun, 23 Oct 2016 21:31:44 +0300
+Subject: drm/amdgpu: fix sched fence slab teardown
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Grazvydas Ignotas <notasas@gmail.com>
+
+commit a053fb7e512c77f0742bceb578b10025256e1911 upstream.
+
+To free fences, call_rcu() is used, which calls amd_sched_fence_free()
+after a grace period. During teardown, there is no guarantee all
+callbacks have finished, so sched_fence_slab may be destroyed before
+all fences have been freed. If we are lucky, this results in some slab
+warnings, if not, we get a crash in one of rcu threads because callback
+is called after amdgpu has already been unloaded.
+
+Fix it with a rcu_barrier().
+
+Fixes: 189e0fb76304 ("drm/amdgpu: RCU protected amd_sched_fence_release")
+Acked-by: Chunming Zhou <david1.zhou@amd.com>
+Reviewed-by: Christian König <christian.koenig@amd.com>
+Signed-off-by: Grazvydas Ignotas <notasas@gmail.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
++++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+@@ -645,6 +645,7 @@ void amd_sched_fini(struct amd_gpu_sched
+ {
+ if (sched->thread)
+ kthread_stop(sched->thread);
++ rcu_barrier();
+ if (atomic_dec_and_test(&sched_fence_slab_ref))
+ kmem_cache_destroy(sched_fence_slab);
+ }
drm-amdgpu-disable-runtime-pm-in-certain-cases.patch
drm-amdgpu-fix-crash-in-acp_hw_fini.patch
tty-serial-at91-fix-hardware-handshake-on-atmel-platforms.patch
+drm-amdgpu-fix-sched-fence-slab-teardown.patch
+drm-amd-fix-scheduler-fence-teardown-order-v2.patch
+xprtrdma-use-complete-instead-complete_all.patch
+xprtrdma-fix-dmar-failure-in-frwr_op_map-after-reconnect.patch
--- /dev/null
+From 62bdf94a2049822ef8c6d4b0e83cd9c3a1663ab4 Mon Sep 17 00:00:00 2001
+From: Chuck Lever <chuck.lever@oracle.com>
+Date: Mon, 7 Nov 2016 16:16:24 -0500
+Subject: xprtrdma: Fix DMAR failure in frwr_op_map() after reconnect
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+commit 62bdf94a2049822ef8c6d4b0e83cd9c3a1663ab4 upstream.
+
+When a LOCALINV WR is flushed, the frmr is marked STALE, then
+frwr_op_unmap_sync DMA-unmaps the frmr's SGL. These STALE frmrs
+are then recovered when frwr_op_map hunts for an INVALID frmr to
+use.
+
+All other cases that need frmr recovery leave that SGL DMA-mapped.
+The FRMR recovery path unconditionally DMA-unmaps the frmr's SGL.
+
+To avoid DMA unmapping the SGL twice for flushed LOCAL_INV WRs,
+alter the recovery logic (rather than the hot frwr_op_unmap_sync
+path) to distinguish among these cases. This solution also takes
+care of the case where multiple LOCAL_INV WRs are issued for the
+same rpcrdma_req, some complete successfully, but some are flushed.
+
+Reported-by: Vasco Steinmetz <linux@kyberraum.net>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Tested-by: Vasco Steinmetz <linux@kyberraum.net>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/sunrpc/xprtrdma/frwr_ops.c | 37 ++++++++++++++++++++++---------------
+ net/sunrpc/xprtrdma/xprt_rdma.h | 3 ++-
+ 2 files changed, 24 insertions(+), 16 deletions(-)
+
+--- a/net/sunrpc/xprtrdma/frwr_ops.c
++++ b/net/sunrpc/xprtrdma/frwr_ops.c
+@@ -44,18 +44,20 @@
+ * being done.
+ *
+ * When the underlying transport disconnects, MRs are left in one of
+- * three states:
++ * four states:
+ *
+ * INVALID: The MR was not in use before the QP entered ERROR state.
+- * (Or, the LOCAL_INV WR has not completed or flushed yet).
+- *
+- * STALE: The MR was being registered or unregistered when the QP
+- * entered ERROR state, and the pending WR was flushed.
+ *
+ * VALID: The MR was registered before the QP entered ERROR state.
+ *
+- * When frwr_op_map encounters STALE and VALID MRs, they are recovered
+- * with ib_dereg_mr and then are re-initialized. Beause MR recovery
++ * FLUSHED_FR: The MR was being registered when the QP entered ERROR
++ * state, and the pending WR was flushed.
++ *
++ * FLUSHED_LI: The MR was being invalidated when the QP entered ERROR
++ * state, and the pending WR was flushed.
++ *
++ * When frwr_op_map encounters FLUSHED and VALID MRs, they are recovered
++ * with ib_dereg_mr and then are re-initialized. Because MR recovery
+ * allocates fresh resources, it is deferred to a workqueue, and the
+ * recovered MRs are placed back on the rb_mws list when recovery is
+ * complete. frwr_op_map allocates another MR for the current RPC while
+@@ -175,12 +177,15 @@ __frwr_reset_mr(struct rpcrdma_ia *ia, s
+ static void
+ frwr_op_recover_mr(struct rpcrdma_mw *mw)
+ {
++ enum rpcrdma_frmr_state state = mw->frmr.fr_state;
+ struct rpcrdma_xprt *r_xprt = mw->mw_xprt;
+ struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+ int rc;
+
+ rc = __frwr_reset_mr(ia, mw);
+- ib_dma_unmap_sg(ia->ri_device, mw->mw_sg, mw->mw_nents, mw->mw_dir);
++ if (state != FRMR_FLUSHED_LI)
++ ib_dma_unmap_sg(ia->ri_device,
++ mw->mw_sg, mw->mw_nents, mw->mw_dir);
+ if (rc)
+ goto out_release;
+
+@@ -261,10 +266,8 @@ frwr_op_maxpages(struct rpcrdma_xprt *r_
+ }
+
+ static void
+-__frwr_sendcompletion_flush(struct ib_wc *wc, struct rpcrdma_frmr *frmr,
+- const char *wr)
++__frwr_sendcompletion_flush(struct ib_wc *wc, const char *wr)
+ {
+- frmr->fr_state = FRMR_IS_STALE;
+ if (wc->status != IB_WC_WR_FLUSH_ERR)
+ pr_err("rpcrdma: %s: %s (%u/0x%x)\n",
+ wr, ib_wc_status_msg(wc->status),
+@@ -287,7 +290,8 @@ frwr_wc_fastreg(struct ib_cq *cq, struct
+ if (wc->status != IB_WC_SUCCESS) {
+ cqe = wc->wr_cqe;
+ frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
+- __frwr_sendcompletion_flush(wc, frmr, "fastreg");
++ frmr->fr_state = FRMR_FLUSHED_FR;
++ __frwr_sendcompletion_flush(wc, "fastreg");
+ }
+ }
+
+@@ -307,7 +311,8 @@ frwr_wc_localinv(struct ib_cq *cq, struc
+ if (wc->status != IB_WC_SUCCESS) {
+ cqe = wc->wr_cqe;
+ frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
+- __frwr_sendcompletion_flush(wc, frmr, "localinv");
++ frmr->fr_state = FRMR_FLUSHED_LI;
++ __frwr_sendcompletion_flush(wc, "localinv");
+ }
+ }
+
+@@ -327,8 +332,10 @@ frwr_wc_localinv_wake(struct ib_cq *cq,
+ /* WARNING: Only wr_cqe and status are reliable at this point */
+ cqe = wc->wr_cqe;
+ frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
+- if (wc->status != IB_WC_SUCCESS)
+- __frwr_sendcompletion_flush(wc, frmr, "localinv");
++ if (wc->status != IB_WC_SUCCESS) {
++ frmr->fr_state = FRMR_FLUSHED_LI;
++ __frwr_sendcompletion_flush(wc, "localinv");
++ }
+ complete(&frmr->fr_linv_done);
+ }
+
+--- a/net/sunrpc/xprtrdma/xprt_rdma.h
++++ b/net/sunrpc/xprtrdma/xprt_rdma.h
+@@ -207,7 +207,8 @@ struct rpcrdma_rep {
+ enum rpcrdma_frmr_state {
+ FRMR_IS_INVALID, /* ready to be used */
+ FRMR_IS_VALID, /* in use */
+- FRMR_IS_STALE, /* failed completion */
++ FRMR_FLUSHED_FR, /* flushed FASTREG WR */
++ FRMR_FLUSHED_LI, /* flushed LOCALINV WR */
+ };
+
+ struct rpcrdma_frmr {
--- /dev/null
+From 5690a22d8612e1788b48b4ea53c59868589cd2db Mon Sep 17 00:00:00 2001
+From: Daniel Wagner <daniel.wagner@bmw-carit.de>
+Date: Fri, 23 Sep 2016 10:41:57 +0200
+Subject: xprtrdma: use complete() instead complete_all()
+
+From: Daniel Wagner <daniel.wagner@bmw-carit.de>
+
+commit 5690a22d8612e1788b48b4ea53c59868589cd2db upstream.
+
+There is only one waiter for the completion, therefore there
+is no need to use complete_all(). Let's make that clear by
+using complete() instead of complete_all().
+
+The usage pattern of the completion is:
+
+waiter context waker context
+
+frwr_op_unmap_sync()
+ reinit_completion()
+ ib_post_send()
+ wait_for_completion()
+
+ frwr_wc_localinv_wake()
+ complete()
+
+Signed-off-by: Daniel Wagner <daniel.wagner@bmw-carit.de>
+Cc: Anna Schumaker <Anna.Schumaker@Netapp.com>
+Cc: Trond Myklebust <trond.myklebust@primarydata.com>
+Cc: Chuck Lever <chuck.lever@oracle.com>
+Cc: linux-nfs@vger.kernel.org
+Cc: netdev@vger.kernel.org
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/sunrpc/xprtrdma/frwr_ops.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/sunrpc/xprtrdma/frwr_ops.c
++++ b/net/sunrpc/xprtrdma/frwr_ops.c
+@@ -329,7 +329,7 @@ frwr_wc_localinv_wake(struct ib_cq *cq,
+ frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
+ if (wc->status != IB_WC_SUCCESS)
+ __frwr_sendcompletion_flush(wc, frmr, "localinv");
+- complete_all(&frmr->fr_linv_done);
++ complete(&frmr->fr_linv_done);
+ }
+
+ /* Post a REG_MR Work Request to register a memory region