From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 7 Mar 2018 19:03:37 +0000 (-0800)
Subject: 4.14-stable patches
X-Git-Tag: v4.14.25~13
X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=cd6d17ace060124c8bb32a771206df0a1666f383;p=thirdparty%2Fkernel%2Fstable-queue.git

4.14-stable patches

added patches:
	nvme-rdma-don-t-suppress-send-completions.patch
---

diff --git a/queue-4.14/nvme-rdma-don-t-suppress-send-completions.patch b/queue-4.14/nvme-rdma-don-t-suppress-send-completions.patch
new file mode 100644
index 00000000000..99ba6264592
--- /dev/null
+++ b/queue-4.14/nvme-rdma-don-t-suppress-send-completions.patch
@@ -0,0 +1,153 @@
+From b4b591c87f2b0f4ebaf3a68d4f13873b241aa584 Mon Sep 17 00:00:00 2001
+From: Sagi Grimberg <sagi@grimberg.me>
+Date: Thu, 23 Nov 2017 17:35:21 +0200
+Subject: nvme-rdma: don't suppress send completions
+
+From: Sagi Grimberg <sagi@grimberg.me>
+
+commit b4b591c87f2b0f4ebaf3a68d4f13873b241aa584 upstream.
+
+The entire completions suppress mechanism is currently broken because the
+HCA might retry a send operation (due to dropped ack) after the nvme
+transaction has completed.
+
+In order to handle this, we signal all send completions and introduce a
+separate done handler for async events as they will be handled differently
+(as they don't include in-capsule data by definition).
+
+Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
+Reviewed-by: Max Gurtovoy <maxg@mellanox.com>
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+
+---
+ drivers/nvme/host/rdma.c |   54 ++++++++++++-----------------------------------
+ 1 file changed, 14 insertions(+), 40 deletions(-)
+
+--- a/drivers/nvme/host/rdma.c
++++ b/drivers/nvme/host/rdma.c
+@@ -88,7 +88,6 @@ enum nvme_rdma_queue_flags {
+ 
+ struct nvme_rdma_queue {
+ 	struct nvme_rdma_qe	*rsp_ring;
+-	atomic_t		sig_count;
+ 	int			queue_size;
+ 	size_t			cmnd_capsule_len;
+ 	struct nvme_rdma_ctrl	*ctrl;
+@@ -521,7 +520,6 @@ static int nvme_rdma_alloc_queue(struct
+ 		queue->cmnd_capsule_len = sizeof(struct nvme_command);
+ 
+ 	queue->queue_size = queue_size;
+-	atomic_set(&queue->sig_count, 0);
+ 
+ 	queue->cm_id = rdma_create_id(&init_net, nvme_rdma_cm_handler, queue,
+ 			RDMA_PS_TCP, IB_QPT_RC);
+@@ -1232,21 +1230,9 @@ static void nvme_rdma_send_done(struct i
+ 		nvme_end_request(rq, req->status, req->result);
+ }
+ 
+-/*
+- * We want to signal completion at least every queue depth/2.  This returns the
+- * largest power of two that is not above half of (queue size + 1) to optimize
+- * (avoid divisions).
+- */
+-static inline bool nvme_rdma_queue_sig_limit(struct nvme_rdma_queue *queue)
+-{
+-	int limit = 1 << ilog2((queue->queue_size + 1) / 2);
+-
+-	return (atomic_inc_return(&queue->sig_count) & (limit - 1)) == 0;
+-}
+-
+ static int nvme_rdma_post_send(struct nvme_rdma_queue *queue,
+ 		struct nvme_rdma_qe *qe, struct ib_sge *sge, u32 num_sge,
+-		struct ib_send_wr *first, bool flush)
++		struct ib_send_wr *first)
+ {
+ 	struct ib_send_wr wr, *bad_wr;
+ 	int ret;
+@@ -1255,31 +1241,12 @@ static int nvme_rdma_post_send(struct nv
+ 	sge->length = sizeof(struct nvme_command),
+ 	sge->lkey   = queue->device->pd->local_dma_lkey;
+ 
+-	qe->cqe.done = nvme_rdma_send_done;
+-
+ 	wr.next       = NULL;
+ 	wr.wr_cqe     = &qe->cqe;
+ 	wr.sg_list    = sge;
+ 	wr.num_sge    = num_sge;
+ 	wr.opcode     = IB_WR_SEND;
+-	wr.send_flags = 0;
+-
+-	/*
+-	 * Unsignalled send completions are another giant desaster in the
+-	 * IB Verbs spec:  If we don't regularly post signalled sends
+-	 * the send queue will fill up and only a QP reset will rescue us.
+-	 * Would have been way to obvious to handle this in hardware or
+-	 * at least the RDMA stack..
+-	 *
+-	 * Always signal the flushes. The magic request used for the flush
+-	 * sequencer is not allocated in our driver's tagset and it's
+-	 * triggered to be freed by blk_cleanup_queue(). So we need to
+-	 * always mark it as signaled to ensure that the "wr_cqe", which is
+-	 * embedded in request's payload, is not freed when __ib_process_cq()
+-	 * calls wr_cqe->done().
+-	 */
+-	if (nvme_rdma_queue_sig_limit(queue) || flush)
+-		wr.send_flags |= IB_SEND_SIGNALED;
++	wr.send_flags = IB_SEND_SIGNALED;
+ 
+ 	if (first)
+ 		first->next = &wr;
+@@ -1329,6 +1296,12 @@ static struct blk_mq_tags *nvme_rdma_tag
+ 	return queue->ctrl->tag_set.tags[queue_idx - 1];
+ }
+ 
++static void nvme_rdma_async_done(struct ib_cq *cq, struct ib_wc *wc)
++{
++	if (unlikely(wc->status != IB_WC_SUCCESS))
++		nvme_rdma_wr_error(cq, wc, "ASYNC");
++}
++
+ static void nvme_rdma_submit_async_event(struct nvme_ctrl *arg, int aer_idx)
+ {
+ 	struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(arg);
+@@ -1350,10 +1323,12 @@ static void nvme_rdma_submit_async_event
+ 	cmd->common.flags |= NVME_CMD_SGL_METABUF;
+ 	nvme_rdma_set_sg_null(cmd);
+ 
++	sqe->cqe.done = nvme_rdma_async_done;
++
+ 	ib_dma_sync_single_for_device(dev, sqe->dma, sizeof(*cmd),
+ 			DMA_TO_DEVICE);
+ 
+-	ret = nvme_rdma_post_send(queue, sqe, &sge, 1, NULL, false);
++	ret = nvme_rdma_post_send(queue, sqe, &sge, 1, NULL);
+ 	WARN_ON_ONCE(ret);
+ }
+ 
+@@ -1639,7 +1614,6 @@ static blk_status_t nvme_rdma_queue_rq(s
+ 	struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
+ 	struct nvme_rdma_qe *sqe = &req->sqe;
+ 	struct nvme_command *c = sqe->data;
+-	bool flush = false;
+ 	struct ib_device *dev;
+ 	blk_status_t ret;
+ 	int err;
+@@ -1668,13 +1642,13 @@ static blk_status_t nvme_rdma_queue_rq(s
+ 		goto err;
+ 	}
+ 
++	sqe->cqe.done = nvme_rdma_send_done;
++
+ 	ib_dma_sync_single_for_device(dev, sqe->dma,
+ 			sizeof(struct nvme_command), DMA_TO_DEVICE);
+ 
+-	if (req_op(rq) == REQ_OP_FLUSH)
+-		flush = true;
+ 	err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
+-			req->mr->need_inval ? &req->reg_wr.wr : NULL, flush);
++			req->mr->need_inval ? &req->reg_wr.wr : NULL);
+ 	if (unlikely(err)) {
+ 		nvme_rdma_unmap_data(queue, rq);
+ 		goto err;
diff --git a/queue-4.14/series b/queue-4.14/series
index 7c2250101b5..434a6da672b 100644
--- a/queue-4.14/series
+++ b/queue-4.14/series
@@ -107,3 +107,4 @@ acpi-bus-parse-tables-as-term_list-for-dell-xps-9570-and-precision-m5530.patch
 arm-dts-logicpd-som-lv-fix-i2c1-pinmux.patch
 arm-dts-logicpd-torpedo-fix-i2c1-pinmux.patch
 md-only-allow-remove_and_add_spares-when-no-sync_thread-running.patch
+nvme-rdma-don-t-suppress-send-completions.patch