From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 28 Nov 2018 11:13:33 +0000 (+0100)
Subject: 4.9-stable patches
X-Git-Tag: v4.19.6~50
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=7b39c7883b2256bbb70c411115d41c130be4cdb5;p=thirdparty%2Fkernel%2Fstable-queue.git

4.9-stable patches

added patches:
	acpica-aml-interpreter-add-region-addresses-in-global-list-during-initialization.patch
	ib-core-fix-for-core-panic.patch
	ib-hfi1-eliminate-races-in-the-sdma-send-error-path.patch
---

diff --git a/queue-4.9/acpica-aml-interpreter-add-region-addresses-in-global-list-during-initialization.patch b/queue-4.9/acpica-aml-interpreter-add-region-addresses-in-global-list-during-initialization.patch
new file mode 100644
index 00000000000..b319a4dd316
--- /dev/null
+++ b/queue-4.9/acpica-aml-interpreter-add-region-addresses-in-global-list-during-initialization.patch
@@ -0,0 +1,46 @@
+From 4abb951b73ff0a8a979113ef185651aa3c8da19b Mon Sep 17 00:00:00 2001
+From: Erik Schmauss <erik.schmauss@intel.com>
+Date: Wed, 17 Oct 2018 14:09:35 -0700
+Subject: ACPICA: AML interpreter: add region addresses in global list during initialization
+
+From: Erik Schmauss <erik.schmauss@intel.com>
+
+commit 4abb951b73ff0a8a979113ef185651aa3c8da19b upstream.
+
+The table load process omitted adding the operation region address
+range to the global list. This omission is problematic because the OS
+queries the global list to check for address range conflicts before
+deciding which drivers to load. This commit may result in warning
+messages that look like the following:
+
+[    7.871761] ACPI Warning: system_IO range 0x00000428-0x0000042F conflicts with op_region 0x00000400-0x0000047F (\PMIO) (20180531/utaddress-213)
+[    7.871769] ACPI: If an ACPI driver is available for this device, you should use it instead of the native driver
+
+However, these messages do not signify regressions. It is a result of
+properly adding address ranges within the global address list.
+
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=200011
+Tested-by: Jean-Marc Lenoir <archlinux@jihemel.com>
+Signed-off-by: Erik Schmauss <erik.schmauss@intel.com>
+Cc: All applicable <stable@vger.kernel.org>
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Cc: Jean Delvare <jdelvare@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/acpi/acpica/dsopcode.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/drivers/acpi/acpica/dsopcode.c
++++ b/drivers/acpi/acpica/dsopcode.c
+@@ -452,6 +452,10 @@ acpi_ds_eval_region_operands(struct acpi
+ 			  ACPI_FORMAT_UINT64(obj_desc->region.address),
+ 			  obj_desc->region.length));
+ 
++	status = acpi_ut_add_address_range(obj_desc->region.space_id,
++					   obj_desc->region.address,
++					   obj_desc->region.length, node);
++
+ 	/* Now the address and length are valid for this opregion */
+ 
+ 	obj_desc->region.flags |= AOPOBJ_DATA_VALID;
diff --git a/queue-4.9/ib-core-fix-for-core-panic.patch b/queue-4.9/ib-core-fix-for-core-panic.patch
new file mode 100644
index 00000000000..1fa2ea8925f
--- /dev/null
+++ b/queue-4.9/ib-core-fix-for-core-panic.patch
@@ -0,0 +1,109 @@
+From e6f9bc34d3779cb7b6a337afed5de8be3f0fab77 Mon Sep 17 00:00:00 2001
+From: Alex Estrin <alex.estrin@intel.com>
+Date: Thu, 31 Aug 2017 09:30:34 -0700
+Subject: IB/core: Fix for core panic
+
+From: Alex Estrin <alex.estrin@intel.com>
+
+commit e6f9bc34d3779cb7b6a337afed5de8be3f0fab77 upstream.
+
+Build with the latest patches resulted in panic:
+11384.486289] BUG: unable to handle kernel NULL pointer dereference at
+         (null)
+[11384.486293] IP:           (null)
+[11384.486295] PGD 0
+[11384.486295] P4D 0
+[11384.486296]
+[11384.486299] Oops: 0010 [#1] SMP
+......... snip ......
+[11384.486401] CPU: 0 PID: 968 Comm: kworker/0:1H Tainted: G        W  O
+    4.13.0-a-stream-20170825 #1
+[11384.486402] Hardware name: Intel Corporation S2600WT2R/S2600WT2R,
+BIOS SE5C610.86B.01.01.0014.121820151719 12/18/2015
+[11384.486418] Workqueue: ib-comp-wq ib_cq_poll_work [ib_core]
+[11384.486419] task: ffff880850579680 task.stack: ffffc90007fec000
+[11384.486420] RIP: 0010:          (null)
+[11384.486420] RSP: 0018:ffffc90007fef970 EFLAGS: 00010206
+[11384.486421] RAX: ffff88084cfe8000 RBX: ffff88084dce4000 RCX:
+ffffc90007fef978
+[11384.486422] RDX: 0000000000000000 RSI: 0000000000000001 RDI:
+ffff88084cfe8000
+[11384.486422] RBP: ffffc90007fefab0 R08: 0000000000000000 R09:
+ffff88084dce4080
+[11384.486423] R10: ffffffffa02d7f60 R11: 0000000000000000 R12:
+ffff88105af65a00
+[11384.486423] R13: ffff88084dce4000 R14: 000000000000c000 R15:
+000000000000c000
+[11384.486424] FS:  0000000000000000(0000) GS:ffff88085f400000(0000)
+knlGS:0000000000000000
+[11384.486425] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[11384.486425] CR2: 0000000000000000 CR3: 0000000001c09000 CR4:
+00000000001406f0
+[11384.486426] Call Trace:
+[11384.486431]  ? is_valid_mcast_lid.isra.21+0xfb/0x110 [ib_core]
+[11384.486436]  ib_attach_mcast+0x6f/0xa0 [ib_core]
+[11384.486441]  ipoib_mcast_attach+0x81/0x190 [ib_ipoib]
+[11384.486443]  ipoib_mcast_join_complete+0x354/0xb40 [ib_ipoib]
+[11384.486448]  mcast_work_handler+0x330/0x6c0 [ib_core]
+[11384.486452]  join_handler+0x101/0x220 [ib_core]
+[11384.486455]  ib_sa_mcmember_rec_callback+0x54/0x80 [ib_core]
+[11384.486459]  recv_handler+0x3a/0x60 [ib_core]
+[11384.486462]  ib_mad_recv_done+0x423/0x9b0 [ib_core]
+[11384.486466]  __ib_process_cq+0x5d/0xb0 [ib_core]
+[11384.486469]  ib_cq_poll_work+0x20/0x60 [ib_core]
+[11384.486472]  process_one_work+0x149/0x360
+[11384.486474]  worker_thread+0x4d/0x3c0
+[11384.486487]  kthread+0x109/0x140
+[11384.486488]  ? rescuer_thread+0x380/0x380
+[11384.486489]  ? kthread_park+0x60/0x60
+[11384.486490]  ? kthread_park+0x60/0x60
+[11384.486493]  ret_from_fork+0x25/0x30
+[11384.486493] Code:  Bad RIP value.
+[11384.486493] Code:  Bad RIP value.
+[11384.486496] RIP:           (null) RSP: ffffc90007fef970
+[11384.486497] CR2: 0000000000000000
+[11384.486531] ---[ end trace b1acec6fb4ff6e75 ]---
+[11384.532133] Kernel panic - not syncing: Fatal exception
+[11384.536541] Kernel Offset: disabled
+[11384.969491] ---[ end Kernel panic - not syncing: Fatal exception
+[11384.976875] sched: Unexpected reschedule of offline CPU#1!
+[11384.983646] ------------[ cut here ]------------
+
+Rdma device driver may not have implemented (*get_link_layer)()
+so it can not be called directly. Should use appropriate helper function.
+
+Reviewed-by: Yuval Shaia <yuval.shaia@oracle.com>
+Fixes: 523633359224 ("IB/core: Fix the validations of a multicast LID in attach or detach operations")
+Cc: stable@kernel.org # 4.13
+Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
+Signed-off-by: Alex Estrin <alex.estrin@intel.com>
+Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
+Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
+Signed-off-by: Doug Ledford <dledford@redhat.com>
+Cc: Mike Marciniszyn <mike.marciniszyn@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/infiniband/core/verbs.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/infiniband/core/verbs.c
++++ b/drivers/infiniband/core/verbs.c
+@@ -1522,7 +1522,7 @@ static bool is_valid_mcast_lid(struct ib
+ 	 */
+ 	if (!ib_query_qp(qp, &attr, IB_QP_STATE | IB_QP_PORT, &init_attr)) {
+ 		if (attr.qp_state >= IB_QPS_INIT) {
+-			if (qp->device->get_link_layer(qp->device, attr.port_num) !=
++			if (rdma_port_get_link_layer(qp->device, attr.port_num) !=
+ 			    IB_LINK_LAYER_INFINIBAND)
+ 				return true;
+ 			goto lid_check;
+@@ -1531,7 +1531,7 @@ static bool is_valid_mcast_lid(struct ib
+ 
+ 	/* Can't get a quick answer, iterate over all ports */
+ 	for (port = 0; port < qp->device->phys_port_cnt; port++)
+-		if (qp->device->get_link_layer(qp->device, port) !=
++		if (rdma_port_get_link_layer(qp->device, port) !=
+ 		    IB_LINK_LAYER_INFINIBAND)
+ 			num_eth_ports++;
+ 
diff --git a/queue-4.9/ib-hfi1-eliminate-races-in-the-sdma-send-error-path.patch b/queue-4.9/ib-hfi1-eliminate-races-in-the-sdma-send-error-path.patch
new file mode 100644
index 00000000000..f72dd4f53b1
--- /dev/null
+++ b/queue-4.9/ib-hfi1-eliminate-races-in-the-sdma-send-error-path.patch
@@ -0,0 +1,290 @@
+From a0e0cb82804a6a21d9067022c2dfdf80d11da429 Mon Sep 17 00:00:00 2001
+From: "Michael J. Ruhl" <michael.j.ruhl@intel.com>
+Date: Mon, 10 Sep 2018 09:39:03 -0700
+Subject: IB/hfi1: Eliminate races in the SDMA send error path
+
+From: Michael J. Ruhl <michael.j.ruhl@intel.com>
+
+commit a0e0cb82804a6a21d9067022c2dfdf80d11da429 upstream.
+
+pq_update() can only be called in two places: from the completion
+function when the complete (npkts) sequence of packets has been
+submitted and processed, or from setup function if a subset of the
+packets were submitted (i.e. the error path).
+
+Currently both paths can call pq_update() if an error occurrs.  This
+race will cause the n_req value to go negative, hanging file_close(),
+or cause a crash by freeing the txlist more than once.
+
+Several variables are used to determine SDMA send state.  Most of
+these are unnecessary, and have code inspectible races between the
+setup function and the completion function, in both the send path and
+the error path.
+
+The request 'status' value can be set by the setup or by the
+completion function.  This is code inspectibly racy.  Since the status
+is not needed in the completion code or by the caller it has been
+removed.
+
+The request 'done' value races between usage by the setup and the
+completion function.  The completion function does not need this.
+When the number of processed packets matches npkts, it is done.
+
+The 'has_error' value races between usage of the setup and the
+completion function.  This can cause incorrect error handling and leave
+the n_req in an incorrect value (i.e. negative).
+
+Simplify the code by removing all of the unneeded state checks and
+variables.
+
+Clean up iovs node when it is freed.
+
+Eliminate race conditions in the error path:
+
+If all packets are submitted, the completion handler will set the
+completion status correctly (ok or aborted).
+
+If all packets are not submitted, the caller must wait until the
+submitted packets have completed, and then set the completion status.
+
+These two change eliminate the race condition in the error path.
+
+Reviewed-by: Mitko Haralanov <mitko.haralanov@intel.com>
+Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
+Signed-off-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
+Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
+Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/infiniband/hw/hfi1/user_sdma.c |  104 +++++++++++++--------------------
+ 1 file changed, 44 insertions(+), 60 deletions(-)
+
+--- a/drivers/infiniband/hw/hfi1/user_sdma.c
++++ b/drivers/infiniband/hw/hfi1/user_sdma.c
+@@ -148,11 +148,8 @@ MODULE_PARM_DESC(sdma_comp_size, "Size o
+ #define TXREQ_FLAGS_REQ_LAST_PKT BIT(0)
+ 
+ /* SDMA request flag bits */
+-#define SDMA_REQ_FOR_THREAD 1
+-#define SDMA_REQ_SEND_DONE  2
+-#define SDMA_REQ_HAVE_AHG   3
+-#define SDMA_REQ_HAS_ERROR  4
+-#define SDMA_REQ_DONE_ERROR 5
++#define SDMA_REQ_HAVE_AHG   1
++#define SDMA_REQ_HAS_ERROR  2
+ 
+ #define SDMA_PKT_Q_INACTIVE BIT(0)
+ #define SDMA_PKT_Q_ACTIVE   BIT(1)
+@@ -252,8 +249,6 @@ struct user_sdma_request {
+ 	u64 seqsubmitted;
+ 	struct list_head txps;
+ 	unsigned long flags;
+-	/* status of the last txreq completed */
+-	int status;
+ };
+ 
+ /*
+@@ -546,7 +541,6 @@ int hfi1_user_sdma_process_request(struc
+ 	struct sdma_req_info info;
+ 	struct user_sdma_request *req;
+ 	u8 opcode, sc, vl;
+-	int req_queued = 0;
+ 	u16 dlid;
+ 	u32 selector;
+ 
+@@ -611,11 +605,13 @@ int hfi1_user_sdma_process_request(struc
+ 	req->data_iovs = req_iovcnt(info.ctrl) - 1; /* subtract header vector */
+ 	req->pq = pq;
+ 	req->cq = cq;
+-	req->status = -1;
+ 	INIT_LIST_HEAD(&req->txps);
+ 
+ 	memcpy(&req->info, &info, sizeof(info));
+ 
++	/* The request is initialized, count it */
++	atomic_inc(&pq->n_reqs);
++
+ 	if (req_opcode(info.ctrl) == EXPECTED) {
+ 		/* expected must have a TID info and at least one data vector */
+ 		if (req->data_iovs < 2) {
+@@ -704,7 +700,7 @@ int hfi1_user_sdma_process_request(struc
+ 		memcpy(&req->iovs[i].iov, iovec + idx++, sizeof(struct iovec));
+ 		ret = pin_vector_pages(req, &req->iovs[i]);
+ 		if (ret) {
+-			req->status = ret;
++			req->data_iovs = i;
+ 			goto free_req;
+ 		}
+ 		req->data_len += req->iovs[i].iov.iov_len;
+@@ -772,14 +768,10 @@ int hfi1_user_sdma_process_request(struc
+ 	}
+ 
+ 	set_comp_state(pq, cq, info.comp_idx, QUEUED, 0);
+-	atomic_inc(&pq->n_reqs);
+-	req_queued = 1;
+ 	/* Send the first N packets in the request to buy us some time */
+ 	ret = user_sdma_send_pkts(req, pcount);
+-	if (unlikely(ret < 0 && ret != -EBUSY)) {
+-		req->status = ret;
++	if (unlikely(ret < 0 && ret != -EBUSY))
+ 		goto free_req;
+-	}
+ 
+ 	/*
+ 	 * It is possible that the SDMA engine would have processed all the
+@@ -796,17 +788,11 @@ int hfi1_user_sdma_process_request(struc
+ 	 * request have been submitted to the SDMA engine. However, it
+ 	 * will not wait for send completions.
+ 	 */
+-	while (!test_bit(SDMA_REQ_SEND_DONE, &req->flags)) {
++	while (req->seqsubmitted != req->info.npkts) {
+ 		ret = user_sdma_send_pkts(req, pcount);
+ 		if (ret < 0) {
+-			if (ret != -EBUSY) {
+-				req->status = ret;
+-				set_bit(SDMA_REQ_DONE_ERROR, &req->flags);
+-				if (ACCESS_ONCE(req->seqcomp) ==
+-				    req->seqsubmitted - 1)
+-					goto free_req;
+-				return ret;
+-			}
++			if (ret != -EBUSY)
++				goto free_req;
+ 			wait_event_interruptible_timeout(
+ 				pq->busy.wait_dma,
+ 				(pq->state == SDMA_PKT_Q_ACTIVE),
+@@ -817,10 +803,19 @@ int hfi1_user_sdma_process_request(struc
+ 	*count += idx;
+ 	return 0;
+ free_req:
+-	user_sdma_free_request(req, true);
+-	if (req_queued)
++	/*
++	 * If the submitted seqsubmitted == npkts, the completion routine
++	 * controls the final state.  If sequbmitted < npkts, wait for any
++	 * outstanding packets to finish before cleaning up.
++	 */
++	if (req->seqsubmitted < req->info.npkts) {
++		if (req->seqsubmitted)
++			wait_event(pq->busy.wait_dma,
++				   (req->seqcomp == req->seqsubmitted - 1));
++		user_sdma_free_request(req, true);
+ 		pq_update(pq);
+-	set_comp_state(pq, cq, info.comp_idx, ERROR, req->status);
++		set_comp_state(pq, cq, info.comp_idx, ERROR, ret);
++	}
+ 	return ret;
+ }
+ 
+@@ -903,10 +898,8 @@ static int user_sdma_send_pkts(struct us
+ 	pq = req->pq;
+ 
+ 	/* If tx completion has reported an error, we are done. */
+-	if (test_bit(SDMA_REQ_HAS_ERROR, &req->flags)) {
+-		set_bit(SDMA_REQ_DONE_ERROR, &req->flags);
++	if (test_bit(SDMA_REQ_HAS_ERROR, &req->flags))
+ 		return -EFAULT;
+-	}
+ 
+ 	/*
+ 	 * Check if we might have sent the entire request already
+@@ -929,10 +922,8 @@ static int user_sdma_send_pkts(struct us
+ 		 * with errors. If so, we are not going to process any
+ 		 * more packets from this request.
+ 		 */
+-		if (test_bit(SDMA_REQ_HAS_ERROR, &req->flags)) {
+-			set_bit(SDMA_REQ_DONE_ERROR, &req->flags);
++		if (test_bit(SDMA_REQ_HAS_ERROR, &req->flags))
+ 			return -EFAULT;
+-		}
+ 
+ 		tx = kmem_cache_alloc(pq->txreq_cache, GFP_KERNEL);
+ 		if (!tx)
+@@ -1090,7 +1081,6 @@ dosend:
+ 	ret = sdma_send_txlist(req->sde, &pq->busy, &req->txps, &count);
+ 	req->seqsubmitted += count;
+ 	if (req->seqsubmitted == req->info.npkts) {
+-		set_bit(SDMA_REQ_SEND_DONE, &req->flags);
+ 		/*
+ 		 * The txreq has already been submitted to the HW queue
+ 		 * so we can free the AHG entry now. Corruption will not
+@@ -1489,11 +1479,15 @@ static int set_txreq_header_ahg(struct u
+ 	return diff;
+ }
+ 
+-/*
+- * SDMA tx request completion callback. Called when the SDMA progress
+- * state machine gets notification that the SDMA descriptors for this
+- * tx request have been processed by the DMA engine. Called in
+- * interrupt context.
++/**
++ * user_sdma_txreq_cb() - SDMA tx request completion callback.
++ * @txreq: valid sdma tx request
++ * @status: success/failure of request
++ *
++ * Called when the SDMA progress state machine gets notification that
++ * the SDMA descriptors for this tx request have been processed by the
++ * DMA engine. Called in interrupt context.
++ * Only do work on completed sequences.
+  */
+ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
+ {
+@@ -1502,7 +1496,7 @@ static void user_sdma_txreq_cb(struct sd
+ 	struct user_sdma_request *req;
+ 	struct hfi1_user_sdma_pkt_q *pq;
+ 	struct hfi1_user_sdma_comp_q *cq;
+-	u16 idx;
++	enum hfi1_sdma_comp_state state = COMPLETE;
+ 
+ 	if (!tx->req)
+ 		return;
+@@ -1515,31 +1509,19 @@ static void user_sdma_txreq_cb(struct sd
+ 		SDMA_DBG(req, "SDMA completion with error %d",
+ 			 status);
+ 		set_bit(SDMA_REQ_HAS_ERROR, &req->flags);
++		state = ERROR;
+ 	}
+ 
+ 	req->seqcomp = tx->seqnum;
+ 	kmem_cache_free(pq->txreq_cache, tx);
+-	tx = NULL;
+ 
+-	idx = req->info.comp_idx;
+-	if (req->status == -1 && status == SDMA_TXREQ_S_OK) {
+-		if (req->seqcomp == req->info.npkts - 1) {
+-			req->status = 0;
+-			user_sdma_free_request(req, false);
+-			pq_update(pq);
+-			set_comp_state(pq, cq, idx, COMPLETE, 0);
+-		}
+-	} else {
+-		if (status != SDMA_TXREQ_S_OK)
+-			req->status = status;
+-		if (req->seqcomp == (ACCESS_ONCE(req->seqsubmitted) - 1) &&
+-		    (test_bit(SDMA_REQ_SEND_DONE, &req->flags) ||
+-		     test_bit(SDMA_REQ_DONE_ERROR, &req->flags))) {
+-			user_sdma_free_request(req, false);
+-			pq_update(pq);
+-			set_comp_state(pq, cq, idx, ERROR, req->status);
+-		}
+-	}
++	/* sequence isn't complete?  We are done */
++	if (req->seqcomp != req->info.npkts - 1)
++		return;
++
++	user_sdma_free_request(req, false);
++	set_comp_state(pq, cq, req->info.comp_idx, state, status);
++	pq_update(pq);
+ }
+ 
+ static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq)
+@@ -1572,6 +1554,8 @@ static void user_sdma_free_request(struc
+ 			if (!node)
+ 				continue;
+ 
++			req->iovs[i].node = NULL;
++
+ 			if (unpin)
+ 				hfi1_mmu_rb_remove(req->pq->handler,
+ 						   &node->rb);
diff --git a/queue-4.9/series b/queue-4.9/series
index 04ffc695c19..1c51d9c8fce 100644
--- a/queue-4.9/series
+++ b/queue-4.9/series
@@ -22,3 +22,6 @@ can-dev-can_get_echo_skb-factor-out-non-sending-code-to-__can_get_echo_skb.patch
 can-dev-__can_get_echo_skb-replace-struct-can_frame-by-canfd_frame-to-access-frame-length.patch
 can-dev-__can_get_echo_skb-don-t-crash-the-kernel-if-can_priv-echo_skb-is-accessed-out-of-bounds.patch
 can-dev-__can_get_echo_skb-print-error-message-if-trying-to-echo-non-existing-skb.patch
+acpica-aml-interpreter-add-region-addresses-in-global-list-during-initialization.patch
+ib-core-fix-for-core-panic.patch
+ib-hfi1-eliminate-races-in-the-sdma-send-error-path.patch