]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.9-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 11 Feb 2019 13:43:36 +0000 (14:43 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 11 Feb 2019 13:43:36 +0000 (14:43 +0100)
added patches:
ib-hfi1-add-limit-test-for-rc-uc-send-via-loopback.patch
oom-oom_reaper-do-not-enqueue-same-task-twice.patch
pci-vmd-free-up-irqs-on-suspend-path.patch

queue-4.9/ib-hfi1-add-limit-test-for-rc-uc-send-via-loopback.patch [new file with mode: 0644]
queue-4.9/oom-oom_reaper-do-not-enqueue-same-task-twice.patch [new file with mode: 0644]
queue-4.9/pci-vmd-free-up-irqs-on-suspend-path.patch [new file with mode: 0644]
queue-4.9/series

diff --git a/queue-4.9/ib-hfi1-add-limit-test-for-rc-uc-send-via-loopback.patch b/queue-4.9/ib-hfi1-add-limit-test-for-rc-uc-send-via-loopback.patch
new file mode 100644 (file)
index 0000000..bb865d7
--- /dev/null
@@ -0,0 +1,79 @@
+From 09ce351dff8e7636af0beb72cd4a86c3904a0500 Mon Sep 17 00:00:00 2001
+From: Mike Marciniszyn <mike.marciniszyn@intel.com>
+Date: Thu, 17 Jan 2019 12:42:16 -0800
+Subject: IB/hfi1: Add limit test for RC/UC send via loopback
+
+From: Mike Marciniszyn <mike.marciniszyn@intel.com>
+
+commit 09ce351dff8e7636af0beb72cd4a86c3904a0500 upstream.
+
+Fix potential memory corruption and panic in loopback for IB_WR_SEND
+variants.
+
+The code blindly assumes the posted length will fit in the fetched rwqe,
+which is not a valid assumption.
+
+Fix by adding a limit test, and triggering the appropriate send completion
+and putting the QP in an error state.  This mimics the handling for
+non-loopback QPs.
+
+Fixes: 15703461533a ("IB/{hfi1, qib, rdmavt}: Move ruc_loopback to rdmavt")
+Cc: <stable@vger.kernel.org> #v4.20+
+Reviewed-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
+Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
+Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
+Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
+
+---
+ drivers/infiniband/hw/hfi1/ruc.c    |    7 ++++++-
+ drivers/infiniband/hw/qib/qib_ruc.c |    7 ++++++-
+ 2 files changed, 12 insertions(+), 2 deletions(-)
+
+--- a/drivers/infiniband/hw/hfi1/ruc.c
++++ b/drivers/infiniband/hw/hfi1/ruc.c
+@@ -471,6 +471,8 @@ send:
+                       goto op_err;
+               if (!ret)
+                       goto rnr_nak;
++              if (wqe->length > qp->r_len)
++                      goto inv_err;
+               break;
+       case IB_WR_RDMA_WRITE_WITH_IMM:
+@@ -638,7 +640,10 @@ op_err:
+       goto err;
+ inv_err:
+-      send_status = IB_WC_REM_INV_REQ_ERR;
++      send_status =
++              sqp->ibqp.qp_type == IB_QPT_RC ?
++                      IB_WC_REM_INV_REQ_ERR :
++                      IB_WC_SUCCESS;
+       wc.status = IB_WC_LOC_QP_OP_ERR;
+       goto err;
+--- a/drivers/infiniband/hw/qib/qib_ruc.c
++++ b/drivers/infiniband/hw/qib/qib_ruc.c
+@@ -449,6 +449,8 @@ again:
+                       goto op_err;
+               if (!ret)
+                       goto rnr_nak;
++              if (wqe->length > qp->r_len)
++                      goto inv_err;
+               break;
+       case IB_WR_RDMA_WRITE_WITH_IMM:
+@@ -612,7 +614,10 @@ op_err:
+       goto err;
+ inv_err:
+-      send_status = IB_WC_REM_INV_REQ_ERR;
++      send_status =
++              sqp->ibqp.qp_type == IB_QPT_RC ?
++                      IB_WC_REM_INV_REQ_ERR :
++                      IB_WC_SUCCESS;
+       wc.status = IB_WC_LOC_QP_OP_ERR;
+       goto err;
diff --git a/queue-4.9/oom-oom_reaper-do-not-enqueue-same-task-twice.patch b/queue-4.9/oom-oom_reaper-do-not-enqueue-same-task-twice.patch
new file mode 100644 (file)
index 0000000..86eb78a
--- /dev/null
@@ -0,0 +1,104 @@
+From 9bcdeb51bd7d2ae9fe65ea4d60643d2aeef5bfe3 Mon Sep 17 00:00:00 2001
+From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+Date: Fri, 1 Feb 2019 14:20:31 -0800
+Subject: oom, oom_reaper: do not enqueue same task twice
+
+From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+
+commit 9bcdeb51bd7d2ae9fe65ea4d60643d2aeef5bfe3 upstream.
+
+Arkadiusz reported that enabling memcg's group oom killing causes
+strange memcg statistics where there is no task in a memcg despite the
+number of tasks in that memcg is not 0.  It turned out that there is a
+bug in wake_oom_reaper() which allows enqueuing same task twice which
+makes impossible to decrease the number of tasks in that memcg due to a
+refcount leak.
+
+This bug existed since the OOM reaper became invokable from
+task_will_free_mem(current) path in out_of_memory() in Linux 4.7,
+
+  T1@P1     |T2@P1     |T3@P1     |OOM reaper
+  ----------+----------+----------+------------
+                                   # Processing an OOM victim in a different memcg domain.
+                        try_charge()
+                          mem_cgroup_out_of_memory()
+                            mutex_lock(&oom_lock)
+             try_charge()
+               mem_cgroup_out_of_memory()
+                 mutex_lock(&oom_lock)
+  try_charge()
+    mem_cgroup_out_of_memory()
+      mutex_lock(&oom_lock)
+                            out_of_memory()
+                              oom_kill_process(P1)
+                                do_send_sig_info(SIGKILL, @P1)
+                                mark_oom_victim(T1@P1)
+                                wake_oom_reaper(T1@P1) # T1@P1 is enqueued.
+                            mutex_unlock(&oom_lock)
+                 out_of_memory()
+                   mark_oom_victim(T2@P1)
+                   wake_oom_reaper(T2@P1) # T2@P1 is enqueued.
+                 mutex_unlock(&oom_lock)
+      out_of_memory()
+        mark_oom_victim(T1@P1)
+        wake_oom_reaper(T1@P1) # T1@P1 is enqueued again due to oom_reaper_list == T2@P1 && T1@P1->oom_reaper_list == NULL.
+      mutex_unlock(&oom_lock)
+                                   # Completed processing an OOM victim in a different memcg domain.
+                                   spin_lock(&oom_reaper_lock)
+                                   # T1P1 is dequeued.
+                                   spin_unlock(&oom_reaper_lock)
+
+but memcg's group oom killing made it easier to trigger this bug by
+calling wake_oom_reaper() on the same task from one out_of_memory()
+request.
+
+Fix this bug using an approach used by commit 855b018325737f76 ("oom,
+oom_reaper: disable oom_reaper for oom_kill_allocating_task").  As a
+side effect of this patch, this patch also avoids enqueuing multiple
+threads sharing memory via task_will_free_mem(current) path.
+
+Link: http://lkml.kernel.org/r/e865a044-2c10-9858-f4ef-254bc71d6cc2@i-love.sakura.ne.jp
+Link: http://lkml.kernel.org/r/5ee34fc6-1485-34f8-8790-903ddabaa809@i-love.sakura.ne.jp
+Fixes: af8e15cc85a25315 ("oom, oom_reaper: do not enqueue task if it is on the oom_reaper_list head")
+Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+Reported-by: Arkadiusz Miskiewicz <arekm@maven.pl>
+Tested-by: Arkadiusz Miskiewicz <arekm@maven.pl>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Acked-by: Roman Gushchin <guro@fb.com>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: Aleksa Sarai <asarai@suse.de>
+Cc: Jay Kamat <jgkamat@fb.com>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/sched.h |    1 +
+ mm/oom_kill.c         |    4 ++--
+ 2 files changed, 3 insertions(+), 2 deletions(-)
+
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -527,6 +527,7 @@ static inline int get_dumpable(struct mm
+ #define MMF_OOM_SKIP          21      /* mm is of no interest for the OOM killer */
+ #define MMF_UNSTABLE          22      /* mm is unstable for copy_from_user */
+ #define MMF_HUGE_ZERO_PAGE    23      /* mm has ever used the global huge zero page */
++#define MMF_OOM_REAP_QUEUED   26      /* mm was queued for oom_reaper */
+ #define MMF_INIT_MASK         (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK)
+--- a/mm/oom_kill.c
++++ b/mm/oom_kill.c
+@@ -626,8 +626,8 @@ static void wake_oom_reaper(struct task_
+       if (!oom_reaper_th)
+               return;
+-      /* tsk is already queued? */
+-      if (tsk == oom_reaper_list || tsk->oom_reaper_list)
++      /* mm is already queued? */
++      if (test_and_set_bit(MMF_OOM_REAP_QUEUED, &tsk->signal->oom_mm->flags))
+               return;
+       get_task_struct(tsk);
diff --git a/queue-4.9/pci-vmd-free-up-irqs-on-suspend-path.patch b/queue-4.9/pci-vmd-free-up-irqs-on-suspend-path.patch
new file mode 100644 (file)
index 0000000..1832585
--- /dev/null
@@ -0,0 +1,61 @@
+From e2b1820bd5d0962d6f271b0d47c3a0e38647df2f Mon Sep 17 00:00:00 2001
+From: Scott Bauer <scott.bauer@intel.com>
+Date: Fri, 11 Aug 2017 14:54:32 -0600
+Subject: PCI: vmd: Free up IRQs on suspend path
+
+From: Scott Bauer <scott.bauer@intel.com>
+
+commit e2b1820bd5d0962d6f271b0d47c3a0e38647df2f upstream.
+
+Free up the IRQs we request on the suspend path and reallocate them on the
+resume path.
+
+Fixes this error:
+
+  CPU 111 disable failed: CPU has 9 vectors assigned and there are only 0 available.
+  Error taking CPU111 down: -34
+  Non-boot CPUs are not disabled
+  Enabling non-boot CPUs ...
+
+Signed-off-by: Scott Bauer <scott.bauer@intel.com>
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Acked-by: Keith Busch <keith.busch@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Reviewed-by: Jon Derrick <jonathan.derrick@intel.com>
+Signed-off-by: Sushma Kalakota <sushmax.kalakota@intel.com>
+
+---
+ drivers/pci/host/vmd.c |   15 +++++++++++++++
+ 1 file changed, 15 insertions(+)
+
+--- a/drivers/pci/host/vmd.c
++++ b/drivers/pci/host/vmd.c
+@@ -731,6 +731,11 @@ static void vmd_remove(struct pci_dev *d
+ static int vmd_suspend(struct device *dev)
+ {
+       struct pci_dev *pdev = to_pci_dev(dev);
++      struct vmd_dev *vmd = pci_get_drvdata(pdev);
++      int i;
++
++      for (i = 0; i < vmd->msix_count; i++)
++                devm_free_irq(dev, pci_irq_vector(pdev, i), &vmd->irqs[i]);
+       pci_save_state(pdev);
+       return 0;
+@@ -739,6 +744,16 @@ static int vmd_suspend(struct device *de
+ static int vmd_resume(struct device *dev)
+ {
+       struct pci_dev *pdev = to_pci_dev(dev);
++      struct vmd_dev *vmd = pci_get_drvdata(pdev);
++      int err, i;
++
++      for (i = 0; i < vmd->msix_count; i++) {
++              err = devm_request_irq(dev, pci_irq_vector(pdev, i),
++                                     vmd_irq, IRQF_NO_THREAD,
++                                     "vmd", &vmd->irqs[i]);
++              if (err)
++                      return err;
++      }
+       pci_restore_state(pdev);
+       return 0;
index 04e8a8f416c3b95408fd430a99eea5e2d691acfc..3dc138d1858dd522ffd9ca19f7929470a65e073e 100644 (file)
@@ -131,3 +131,6 @@ x86-mce-initialize-mce.bank-in-the-case-of-a-fatal-error-in-mce_no_way_out.patch
 perf-core-don-t-warn-for-impossible-ring-buffer-sizes.patch
 perf-tests-evsel-tp-sched-fix-bitwise-operator.patch
 serial-fix-race-between-flush_to_ldisc-and-tty_open.patch
+oom-oom_reaper-do-not-enqueue-same-task-twice.patch
+pci-vmd-free-up-irqs-on-suspend-path.patch
+ib-hfi1-add-limit-test-for-rc-uc-send-via-loopback.patch