]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.9-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 28 Nov 2018 11:13:33 +0000 (12:13 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 28 Nov 2018 11:13:33 +0000 (12:13 +0100)
added patches:
acpica-aml-interpreter-add-region-addresses-in-global-list-during-initialization.patch
ib-core-fix-for-core-panic.patch
ib-hfi1-eliminate-races-in-the-sdma-send-error-path.patch

queue-4.9/acpica-aml-interpreter-add-region-addresses-in-global-list-during-initialization.patch [new file with mode: 0644]
queue-4.9/ib-core-fix-for-core-panic.patch [new file with mode: 0644]
queue-4.9/ib-hfi1-eliminate-races-in-the-sdma-send-error-path.patch [new file with mode: 0644]
queue-4.9/series

diff --git a/queue-4.9/acpica-aml-interpreter-add-region-addresses-in-global-list-during-initialization.patch b/queue-4.9/acpica-aml-interpreter-add-region-addresses-in-global-list-during-initialization.patch
new file mode 100644 (file)
index 0000000..b319a4d
--- /dev/null
@@ -0,0 +1,46 @@
+From 4abb951b73ff0a8a979113ef185651aa3c8da19b Mon Sep 17 00:00:00 2001
+From: Erik Schmauss <erik.schmauss@intel.com>
+Date: Wed, 17 Oct 2018 14:09:35 -0700
+Subject: ACPICA: AML interpreter: add region addresses in global list during initialization
+
+From: Erik Schmauss <erik.schmauss@intel.com>
+
+commit 4abb951b73ff0a8a979113ef185651aa3c8da19b upstream.
+
+The table load process omitted adding the operation region address
+range to the global list. This omission is problematic because the OS
+queries the global list to check for address range conflicts before
+deciding which drivers to load. This commit may result in warning
+messages that look like the following:
+
+[    7.871761] ACPI Warning: system_IO range 0x00000428-0x0000042F conflicts with op_region 0x00000400-0x0000047F (\PMIO) (20180531/utaddress-213)
+[    7.871769] ACPI: If an ACPI driver is available for this device, you should use it instead of the native driver
+
+However, these messages do not signify regressions. It is a result of
+properly adding address ranges within the global address list.
+
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=200011
+Tested-by: Jean-Marc Lenoir <archlinux@jihemel.com>
+Signed-off-by: Erik Schmauss <erik.schmauss@intel.com>
+Cc: All applicable <stable@vger.kernel.org>
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Cc: Jean Delvare <jdelvare@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/acpi/acpica/dsopcode.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/drivers/acpi/acpica/dsopcode.c
++++ b/drivers/acpi/acpica/dsopcode.c
+@@ -452,6 +452,10 @@ acpi_ds_eval_region_operands(struct acpi
+                         ACPI_FORMAT_UINT64(obj_desc->region.address),
+                         obj_desc->region.length));
++      status = acpi_ut_add_address_range(obj_desc->region.space_id,
++                                         obj_desc->region.address,
++                                         obj_desc->region.length, node);
++
+       /* Now the address and length are valid for this opregion */
+       obj_desc->region.flags |= AOPOBJ_DATA_VALID;
diff --git a/queue-4.9/ib-core-fix-for-core-panic.patch b/queue-4.9/ib-core-fix-for-core-panic.patch
new file mode 100644 (file)
index 0000000..1fa2ea8
--- /dev/null
@@ -0,0 +1,109 @@
+From e6f9bc34d3779cb7b6a337afed5de8be3f0fab77 Mon Sep 17 00:00:00 2001
+From: Alex Estrin <alex.estrin@intel.com>
+Date: Thu, 31 Aug 2017 09:30:34 -0700
+Subject: IB/core: Fix for core panic
+
+From: Alex Estrin <alex.estrin@intel.com>
+
+commit e6f9bc34d3779cb7b6a337afed5de8be3f0fab77 upstream.
+
+Build with the latest patches resulted in panic:
+11384.486289] BUG: unable to handle kernel NULL pointer dereference at
+         (null)
+[11384.486293] IP:           (null)
+[11384.486295] PGD 0
+[11384.486295] P4D 0
+[11384.486296]
+[11384.486299] Oops: 0010 [#1] SMP
+......... snip ......
+[11384.486401] CPU: 0 PID: 968 Comm: kworker/0:1H Tainted: G        W  O
+    4.13.0-a-stream-20170825 #1
+[11384.486402] Hardware name: Intel Corporation S2600WT2R/S2600WT2R,
+BIOS SE5C610.86B.01.01.0014.121820151719 12/18/2015
+[11384.486418] Workqueue: ib-comp-wq ib_cq_poll_work [ib_core]
+[11384.486419] task: ffff880850579680 task.stack: ffffc90007fec000
+[11384.486420] RIP: 0010:          (null)
+[11384.486420] RSP: 0018:ffffc90007fef970 EFLAGS: 00010206
+[11384.486421] RAX: ffff88084cfe8000 RBX: ffff88084dce4000 RCX:
+ffffc90007fef978
+[11384.486422] RDX: 0000000000000000 RSI: 0000000000000001 RDI:
+ffff88084cfe8000
+[11384.486422] RBP: ffffc90007fefab0 R08: 0000000000000000 R09:
+ffff88084dce4080
+[11384.486423] R10: ffffffffa02d7f60 R11: 0000000000000000 R12:
+ffff88105af65a00
+[11384.486423] R13: ffff88084dce4000 R14: 000000000000c000 R15:
+000000000000c000
+[11384.486424] FS:  0000000000000000(0000) GS:ffff88085f400000(0000)
+knlGS:0000000000000000
+[11384.486425] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[11384.486425] CR2: 0000000000000000 CR3: 0000000001c09000 CR4:
+00000000001406f0
+[11384.486426] Call Trace:
+[11384.486431]  ? is_valid_mcast_lid.isra.21+0xfb/0x110 [ib_core]
+[11384.486436]  ib_attach_mcast+0x6f/0xa0 [ib_core]
+[11384.486441]  ipoib_mcast_attach+0x81/0x190 [ib_ipoib]
+[11384.486443]  ipoib_mcast_join_complete+0x354/0xb40 [ib_ipoib]
+[11384.486448]  mcast_work_handler+0x330/0x6c0 [ib_core]
+[11384.486452]  join_handler+0x101/0x220 [ib_core]
+[11384.486455]  ib_sa_mcmember_rec_callback+0x54/0x80 [ib_core]
+[11384.486459]  recv_handler+0x3a/0x60 [ib_core]
+[11384.486462]  ib_mad_recv_done+0x423/0x9b0 [ib_core]
+[11384.486466]  __ib_process_cq+0x5d/0xb0 [ib_core]
+[11384.486469]  ib_cq_poll_work+0x20/0x60 [ib_core]
+[11384.486472]  process_one_work+0x149/0x360
+[11384.486474]  worker_thread+0x4d/0x3c0
+[11384.486487]  kthread+0x109/0x140
+[11384.486488]  ? rescuer_thread+0x380/0x380
+[11384.486489]  ? kthread_park+0x60/0x60
+[11384.486490]  ? kthread_park+0x60/0x60
+[11384.486493]  ret_from_fork+0x25/0x30
+[11384.486493] Code:  Bad RIP value.
+[11384.486493] Code:  Bad RIP value.
+[11384.486496] RIP:           (null) RSP: ffffc90007fef970
+[11384.486497] CR2: 0000000000000000
+[11384.486531] ---[ end trace b1acec6fb4ff6e75 ]---
+[11384.532133] Kernel panic - not syncing: Fatal exception
+[11384.536541] Kernel Offset: disabled
+[11384.969491] ---[ end Kernel panic - not syncing: Fatal exception
+[11384.976875] sched: Unexpected reschedule of offline CPU#1!
+[11384.983646] ------------[ cut here ]------------
+
+Rdma device driver may not have implemented (*get_link_layer)()
+so it can not be called directly. Should use appropriate helper function.
+
+Reviewed-by: Yuval Shaia <yuval.shaia@oracle.com>
+Fixes: 523633359224 ("IB/core: Fix the validations of a multicast LID in attach or detach operations")
+Cc: stable@kernel.org # 4.13
+Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
+Signed-off-by: Alex Estrin <alex.estrin@intel.com>
+Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
+Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
+Signed-off-by: Doug Ledford <dledford@redhat.com>
+Cc: Mike Marciniszyn <mike.marciniszyn@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/infiniband/core/verbs.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/infiniband/core/verbs.c
++++ b/drivers/infiniband/core/verbs.c
+@@ -1522,7 +1522,7 @@ static bool is_valid_mcast_lid(struct ib
+        */
+       if (!ib_query_qp(qp, &attr, IB_QP_STATE | IB_QP_PORT, &init_attr)) {
+               if (attr.qp_state >= IB_QPS_INIT) {
+-                      if (qp->device->get_link_layer(qp->device, attr.port_num) !=
++                      if (rdma_port_get_link_layer(qp->device, attr.port_num) !=
+                           IB_LINK_LAYER_INFINIBAND)
+                               return true;
+                       goto lid_check;
+@@ -1531,7 +1531,7 @@ static bool is_valid_mcast_lid(struct ib
+       /* Can't get a quick answer, iterate over all ports */
+       for (port = 0; port < qp->device->phys_port_cnt; port++)
+-              if (qp->device->get_link_layer(qp->device, port) !=
++              if (rdma_port_get_link_layer(qp->device, port) !=
+                   IB_LINK_LAYER_INFINIBAND)
+                       num_eth_ports++;
diff --git a/queue-4.9/ib-hfi1-eliminate-races-in-the-sdma-send-error-path.patch b/queue-4.9/ib-hfi1-eliminate-races-in-the-sdma-send-error-path.patch
new file mode 100644 (file)
index 0000000..f72dd4f
--- /dev/null
@@ -0,0 +1,290 @@
+From a0e0cb82804a6a21d9067022c2dfdf80d11da429 Mon Sep 17 00:00:00 2001
+From: "Michael J. Ruhl" <michael.j.ruhl@intel.com>
+Date: Mon, 10 Sep 2018 09:39:03 -0700
+Subject: IB/hfi1: Eliminate races in the SDMA send error path
+
+From: Michael J. Ruhl <michael.j.ruhl@intel.com>
+
+commit a0e0cb82804a6a21d9067022c2dfdf80d11da429 upstream.
+
+pq_update() can only be called in two places: from the completion
+function when the complete (npkts) sequence of packets has been
+submitted and processed, or from setup function if a subset of the
+packets were submitted (i.e. the error path).
+
+Currently both paths can call pq_update() if an error occurrs.  This
+race will cause the n_req value to go negative, hanging file_close(),
+or cause a crash by freeing the txlist more than once.
+
+Several variables are used to determine SDMA send state.  Most of
+these are unnecessary, and have code inspectible races between the
+setup function and the completion function, in both the send path and
+the error path.
+
+The request 'status' value can be set by the setup or by the
+completion function.  This is code inspectibly racy.  Since the status
+is not needed in the completion code or by the caller it has been
+removed.
+
+The request 'done' value races between usage by the setup and the
+completion function.  The completion function does not need this.
+When the number of processed packets matches npkts, it is done.
+
+The 'has_error' value races between usage of the setup and the
+completion function.  This can cause incorrect error handling and leave
+the n_req in an incorrect value (i.e. negative).
+
+Simplify the code by removing all of the unneeded state checks and
+variables.
+
+Clean up iovs node when it is freed.
+
+Eliminate race conditions in the error path:
+
+If all packets are submitted, the completion handler will set the
+completion status correctly (ok or aborted).
+
+If all packets are not submitted, the caller must wait until the
+submitted packets have completed, and then set the completion status.
+
+These two change eliminate the race condition in the error path.
+
+Reviewed-by: Mitko Haralanov <mitko.haralanov@intel.com>
+Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
+Signed-off-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
+Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
+Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/infiniband/hw/hfi1/user_sdma.c |  104 +++++++++++++--------------------
+ 1 file changed, 44 insertions(+), 60 deletions(-)
+
+--- a/drivers/infiniband/hw/hfi1/user_sdma.c
++++ b/drivers/infiniband/hw/hfi1/user_sdma.c
+@@ -148,11 +148,8 @@ MODULE_PARM_DESC(sdma_comp_size, "Size o
+ #define TXREQ_FLAGS_REQ_LAST_PKT BIT(0)
+ /* SDMA request flag bits */
+-#define SDMA_REQ_FOR_THREAD 1
+-#define SDMA_REQ_SEND_DONE  2
+-#define SDMA_REQ_HAVE_AHG   3
+-#define SDMA_REQ_HAS_ERROR  4
+-#define SDMA_REQ_DONE_ERROR 5
++#define SDMA_REQ_HAVE_AHG   1
++#define SDMA_REQ_HAS_ERROR  2
+ #define SDMA_PKT_Q_INACTIVE BIT(0)
+ #define SDMA_PKT_Q_ACTIVE   BIT(1)
+@@ -252,8 +249,6 @@ struct user_sdma_request {
+       u64 seqsubmitted;
+       struct list_head txps;
+       unsigned long flags;
+-      /* status of the last txreq completed */
+-      int status;
+ };
+ /*
+@@ -546,7 +541,6 @@ int hfi1_user_sdma_process_request(struc
+       struct sdma_req_info info;
+       struct user_sdma_request *req;
+       u8 opcode, sc, vl;
+-      int req_queued = 0;
+       u16 dlid;
+       u32 selector;
+@@ -611,11 +605,13 @@ int hfi1_user_sdma_process_request(struc
+       req->data_iovs = req_iovcnt(info.ctrl) - 1; /* subtract header vector */
+       req->pq = pq;
+       req->cq = cq;
+-      req->status = -1;
+       INIT_LIST_HEAD(&req->txps);
+       memcpy(&req->info, &info, sizeof(info));
++      /* The request is initialized, count it */
++      atomic_inc(&pq->n_reqs);
++
+       if (req_opcode(info.ctrl) == EXPECTED) {
+               /* expected must have a TID info and at least one data vector */
+               if (req->data_iovs < 2) {
+@@ -704,7 +700,7 @@ int hfi1_user_sdma_process_request(struc
+               memcpy(&req->iovs[i].iov, iovec + idx++, sizeof(struct iovec));
+               ret = pin_vector_pages(req, &req->iovs[i]);
+               if (ret) {
+-                      req->status = ret;
++                      req->data_iovs = i;
+                       goto free_req;
+               }
+               req->data_len += req->iovs[i].iov.iov_len;
+@@ -772,14 +768,10 @@ int hfi1_user_sdma_process_request(struc
+       }
+       set_comp_state(pq, cq, info.comp_idx, QUEUED, 0);
+-      atomic_inc(&pq->n_reqs);
+-      req_queued = 1;
+       /* Send the first N packets in the request to buy us some time */
+       ret = user_sdma_send_pkts(req, pcount);
+-      if (unlikely(ret < 0 && ret != -EBUSY)) {
+-              req->status = ret;
++      if (unlikely(ret < 0 && ret != -EBUSY))
+               goto free_req;
+-      }
+       /*
+        * It is possible that the SDMA engine would have processed all the
+@@ -796,17 +788,11 @@ int hfi1_user_sdma_process_request(struc
+        * request have been submitted to the SDMA engine. However, it
+        * will not wait for send completions.
+        */
+-      while (!test_bit(SDMA_REQ_SEND_DONE, &req->flags)) {
++      while (req->seqsubmitted != req->info.npkts) {
+               ret = user_sdma_send_pkts(req, pcount);
+               if (ret < 0) {
+-                      if (ret != -EBUSY) {
+-                              req->status = ret;
+-                              set_bit(SDMA_REQ_DONE_ERROR, &req->flags);
+-                              if (ACCESS_ONCE(req->seqcomp) ==
+-                                  req->seqsubmitted - 1)
+-                                      goto free_req;
+-                              return ret;
+-                      }
++                      if (ret != -EBUSY)
++                              goto free_req;
+                       wait_event_interruptible_timeout(
+                               pq->busy.wait_dma,
+                               (pq->state == SDMA_PKT_Q_ACTIVE),
+@@ -817,10 +803,19 @@ int hfi1_user_sdma_process_request(struc
+       *count += idx;
+       return 0;
+ free_req:
+-      user_sdma_free_request(req, true);
+-      if (req_queued)
++      /*
++       * If the submitted seqsubmitted == npkts, the completion routine
++       * controls the final state.  If sequbmitted < npkts, wait for any
++       * outstanding packets to finish before cleaning up.
++       */
++      if (req->seqsubmitted < req->info.npkts) {
++              if (req->seqsubmitted)
++                      wait_event(pq->busy.wait_dma,
++                                 (req->seqcomp == req->seqsubmitted - 1));
++              user_sdma_free_request(req, true);
+               pq_update(pq);
+-      set_comp_state(pq, cq, info.comp_idx, ERROR, req->status);
++              set_comp_state(pq, cq, info.comp_idx, ERROR, ret);
++      }
+       return ret;
+ }
+@@ -903,10 +898,8 @@ static int user_sdma_send_pkts(struct us
+       pq = req->pq;
+       /* If tx completion has reported an error, we are done. */
+-      if (test_bit(SDMA_REQ_HAS_ERROR, &req->flags)) {
+-              set_bit(SDMA_REQ_DONE_ERROR, &req->flags);
++      if (test_bit(SDMA_REQ_HAS_ERROR, &req->flags))
+               return -EFAULT;
+-      }
+       /*
+        * Check if we might have sent the entire request already
+@@ -929,10 +922,8 @@ static int user_sdma_send_pkts(struct us
+                * with errors. If so, we are not going to process any
+                * more packets from this request.
+                */
+-              if (test_bit(SDMA_REQ_HAS_ERROR, &req->flags)) {
+-                      set_bit(SDMA_REQ_DONE_ERROR, &req->flags);
++              if (test_bit(SDMA_REQ_HAS_ERROR, &req->flags))
+                       return -EFAULT;
+-              }
+               tx = kmem_cache_alloc(pq->txreq_cache, GFP_KERNEL);
+               if (!tx)
+@@ -1090,7 +1081,6 @@ dosend:
+       ret = sdma_send_txlist(req->sde, &pq->busy, &req->txps, &count);
+       req->seqsubmitted += count;
+       if (req->seqsubmitted == req->info.npkts) {
+-              set_bit(SDMA_REQ_SEND_DONE, &req->flags);
+               /*
+                * The txreq has already been submitted to the HW queue
+                * so we can free the AHG entry now. Corruption will not
+@@ -1489,11 +1479,15 @@ static int set_txreq_header_ahg(struct u
+       return diff;
+ }
+-/*
+- * SDMA tx request completion callback. Called when the SDMA progress
+- * state machine gets notification that the SDMA descriptors for this
+- * tx request have been processed by the DMA engine. Called in
+- * interrupt context.
++/**
++ * user_sdma_txreq_cb() - SDMA tx request completion callback.
++ * @txreq: valid sdma tx request
++ * @status: success/failure of request
++ *
++ * Called when the SDMA progress state machine gets notification that
++ * the SDMA descriptors for this tx request have been processed by the
++ * DMA engine. Called in interrupt context.
++ * Only do work on completed sequences.
+  */
+ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
+ {
+@@ -1502,7 +1496,7 @@ static void user_sdma_txreq_cb(struct sd
+       struct user_sdma_request *req;
+       struct hfi1_user_sdma_pkt_q *pq;
+       struct hfi1_user_sdma_comp_q *cq;
+-      u16 idx;
++      enum hfi1_sdma_comp_state state = COMPLETE;
+       if (!tx->req)
+               return;
+@@ -1515,31 +1509,19 @@ static void user_sdma_txreq_cb(struct sd
+               SDMA_DBG(req, "SDMA completion with error %d",
+                        status);
+               set_bit(SDMA_REQ_HAS_ERROR, &req->flags);
++              state = ERROR;
+       }
+       req->seqcomp = tx->seqnum;
+       kmem_cache_free(pq->txreq_cache, tx);
+-      tx = NULL;
+-      idx = req->info.comp_idx;
+-      if (req->status == -1 && status == SDMA_TXREQ_S_OK) {
+-              if (req->seqcomp == req->info.npkts - 1) {
+-                      req->status = 0;
+-                      user_sdma_free_request(req, false);
+-                      pq_update(pq);
+-                      set_comp_state(pq, cq, idx, COMPLETE, 0);
+-              }
+-      } else {
+-              if (status != SDMA_TXREQ_S_OK)
+-                      req->status = status;
+-              if (req->seqcomp == (ACCESS_ONCE(req->seqsubmitted) - 1) &&
+-                  (test_bit(SDMA_REQ_SEND_DONE, &req->flags) ||
+-                   test_bit(SDMA_REQ_DONE_ERROR, &req->flags))) {
+-                      user_sdma_free_request(req, false);
+-                      pq_update(pq);
+-                      set_comp_state(pq, cq, idx, ERROR, req->status);
+-              }
+-      }
++      /* sequence isn't complete?  We are done */
++      if (req->seqcomp != req->info.npkts - 1)
++              return;
++
++      user_sdma_free_request(req, false);
++      set_comp_state(pq, cq, req->info.comp_idx, state, status);
++      pq_update(pq);
+ }
+ static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq)
+@@ -1572,6 +1554,8 @@ static void user_sdma_free_request(struc
+                       if (!node)
+                               continue;
++                      req->iovs[i].node = NULL;
++
+                       if (unpin)
+                               hfi1_mmu_rb_remove(req->pq->handler,
+                                                  &node->rb);
index 04ffc695c196da9e95a6eb80d8da0e4fdbf4046d..1c51d9c8fcecc8d378da2f0a819182033355274c 100644 (file)
@@ -22,3 +22,6 @@ can-dev-can_get_echo_skb-factor-out-non-sending-code-to-__can_get_echo_skb.patch
 can-dev-__can_get_echo_skb-replace-struct-can_frame-by-canfd_frame-to-access-frame-length.patch
 can-dev-__can_get_echo_skb-don-t-crash-the-kernel-if-can_priv-echo_skb-is-accessed-out-of-bounds.patch
 can-dev-__can_get_echo_skb-print-error-message-if-trying-to-echo-non-existing-skb.patch
+acpica-aml-interpreter-add-region-addresses-in-global-list-during-initialization.patch
+ib-core-fix-for-core-panic.patch
+ib-hfi1-eliminate-races-in-the-sdma-send-error-path.patch