]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.14-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 11 Oct 2018 09:32:27 +0000 (11:32 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 11 Oct 2018 09:32:27 +0000 (11:32 +0200)
added patches:
ath10k-fix-kernel-panic-issue-during-pci-probe.patch
ath10k-fix-use-after-free-in-ath10k_wmi_cmd_send_nowait.patch
cgroup-cpuset-remove-circular-dependency-deadlock.patch
nvme_fc-fix-ctrl-create-failures-racing-with-workq-items.patch

queue-4.14/ath10k-fix-kernel-panic-issue-during-pci-probe.patch [new file with mode: 0644]
queue-4.14/ath10k-fix-use-after-free-in-ath10k_wmi_cmd_send_nowait.patch [new file with mode: 0644]
queue-4.14/cgroup-cpuset-remove-circular-dependency-deadlock.patch [new file with mode: 0644]
queue-4.14/nvme_fc-fix-ctrl-create-failures-racing-with-workq-items.patch [new file with mode: 0644]
queue-4.14/series

diff --git a/queue-4.14/ath10k-fix-kernel-panic-issue-during-pci-probe.patch b/queue-4.14/ath10k-fix-kernel-panic-issue-during-pci-probe.patch
new file mode 100644 (file)
index 0000000..d1cf65d
--- /dev/null
@@ -0,0 +1,100 @@
+From 50e79e25250bf928369996277e85b00536b380c7 Mon Sep 17 00:00:00 2001
+From: Yu Wang <yyuwang@codeaurora.org>
+Date: Tue, 30 Jan 2018 14:06:08 +0200
+Subject: ath10k: fix kernel panic issue during pci probe
+
+From: Yu Wang <yyuwang@codeaurora.org>
+
+commit 50e79e25250bf928369996277e85b00536b380c7 upstream.
+
+If device gone during chip reset, ar->normal_mode_fw.board is not
+initialized, but ath10k_debug_print_hwfw_info() will try to access its
+member, which will cause 'kernel NULL pointer' issue. This was found
+using a faulty device (pci link went down sometimes) in a random
+insmod/rmmod/other-op test.
+To fix it, check ar->normal_mode_fw.board before accessing the member.
+
+pci 0000:02:00.0: BAR 0: assigned [mem 0xf7400000-0xf75fffff 64bit]
+ath10k_pci 0000:02:00.0: enabling device (0000 -> 0002)
+ath10k_pci 0000:02:00.0: pci irq msi oper_irq_mode 2 irq_mode 0 reset_mode 0
+ath10k_pci 0000:02:00.0: failed to read device register, device is gone
+ath10k_pci 0000:02:00.0: failed to wait for target init: -5
+ath10k_pci 0000:02:00.0: failed to warm reset: -5
+ath10k_pci 0000:02:00.0: firmware crashed during chip reset
+ath10k_pci 0000:02:00.0: firmware crashed! (uuid 5d018951-b8e1-404a-8fde-923078b4423a)
+ath10k_pci 0000:02:00.0: (null) target 0x00000000 chip_id 0x00340aff sub 0000:0000
+ath10k_pci 0000:02:00.0: kconfig debug 1 debugfs 1 tracing 1 dfs 1 testmode 1
+ath10k_pci 0000:02:00.0: firmware ver  api 0 features  crc32 00000000
+...
+BUG: unable to handle kernel NULL pointer dereference at 00000004
+...
+Call Trace:
+ [<fb4e7882>] ath10k_print_driver_info+0x12/0x20 [ath10k_core]
+ [<fb62b7dd>] ath10k_pci_fw_crashed_dump+0x6d/0x4d0 [ath10k_pci]
+ [<fb629f07>] ? ath10k_pci_sleep.part.19+0x57/0xc0 [ath10k_pci]
+ [<fb62c8ee>] ath10k_pci_hif_power_up+0x14e/0x1b0 [ath10k_pci]
+ [<c10477fb>] ? do_page_fault+0xb/0x10
+ [<fb4eb934>] ath10k_core_register_work+0x24/0x840 [ath10k_core]
+ [<c18a00d8>] ? netlbl_unlhsh_remove+0x178/0x410
+ [<c10477f0>] ? __do_page_fault+0x480/0x480
+ [<c1068e44>] process_one_work+0x114/0x3e0
+ [<c1069d07>] worker_thread+0x37/0x4a0
+ [<c106e294>] kthread+0xa4/0xc0
+ [<c1069cd0>] ? create_worker+0x180/0x180
+ [<c106e1f0>] ? kthread_park+0x50/0x50
+ [<c18ab4f7>] ret_from_fork+0x1b/0x28
+ Code: 78 80 b8 50 09 00 00 00 75 5d 8d 75 94 c7 44 24 08 aa d7 52 fb c7 44 24 04 64 00 00 00
+ 89 34 24 e8 82 52 e2 c5 8b 83 dc 08 00 00 <8b> 50 04 8b 08 31 c0 e8 20 57 e3 c5 89 44 24 10 8b 83 58 09 00
+ EIP: [<fb4e7754>]-
+ ath10k_debug_print_board_info+0x34/0xb0 [ath10k_core]
+ SS:ESP 0068:f4921d90
+ CR2: 0000000000000004
+
+Signed-off-by: Yu Wang <yyuwang@codeaurora.org>
+Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
+[AmitP: Minor rebasing for 4.14.y and 4.9.y]
+Signed-off-by: Amit Pundir <amit.pundir@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/net/wireless/ath/ath10k/debug.c |   12 ++++++++++--
+ 1 file changed, 10 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/wireless/ath/ath10k/debug.c
++++ b/drivers/net/wireless/ath/ath10k/debug.c
+@@ -1,6 +1,7 @@
+ /*
+  * Copyright (c) 2005-2011 Atheros Communications Inc.
+  * Copyright (c) 2011-2013 Qualcomm Atheros, Inc.
++ * Copyright (c) 2018, The Linux Foundation. All rights reserved.
+  *
+  * Permission to use, copy, modify, and/or distribute this software for any
+  * purpose with or without fee is hereby granted, provided that the above
+@@ -163,6 +164,8 @@ void ath10k_debug_print_hwfw_info(struct
+ void ath10k_debug_print_board_info(struct ath10k *ar)
+ {
+       char boardinfo[100];
++      const struct firmware *board;
++      u32 crc;
+       if (ar->id.bmi_ids_valid)
+               scnprintf(boardinfo, sizeof(boardinfo), "%d:%d",
+@@ -170,11 +173,16 @@ void ath10k_debug_print_board_info(struc
+       else
+               scnprintf(boardinfo, sizeof(boardinfo), "N/A");
++      board = ar->normal_mode_fw.board;
++      if (!IS_ERR_OR_NULL(board))
++              crc = crc32_le(0, board->data, board->size);
++      else
++              crc = 0;
++
+       ath10k_info(ar, "board_file api %d bmi_id %s crc32 %08x",
+                   ar->bd_api,
+                   boardinfo,
+-                  crc32_le(0, ar->normal_mode_fw.board->data,
+-                           ar->normal_mode_fw.board->size));
++                  crc);
+ }
+ void ath10k_debug_print_boot_info(struct ath10k *ar)
diff --git a/queue-4.14/ath10k-fix-use-after-free-in-ath10k_wmi_cmd_send_nowait.patch b/queue-4.14/ath10k-fix-use-after-free-in-ath10k_wmi_cmd_send_nowait.patch
new file mode 100644 (file)
index 0000000..1145fd1
--- /dev/null
@@ -0,0 +1,82 @@
+From 9ef0f58ed7b4a55da4a64641d538e0d9e46579ac Mon Sep 17 00:00:00 2001
+From: Carl Huang <cjhuang@codeaurora.org>
+Date: Mon, 5 Mar 2018 14:44:02 +0800
+Subject: ath10k: fix use-after-free in ath10k_wmi_cmd_send_nowait
+
+From: Carl Huang <cjhuang@codeaurora.org>
+
+commit 9ef0f58ed7b4a55da4a64641d538e0d9e46579ac upstream.
+
+The skb may be freed in tx completion context before
+trace_ath10k_wmi_cmd is called. This can be easily captured when
+KASAN(Kernel Address Sanitizer) is enabled. The fix is to move
+trace_ath10k_wmi_cmd before the send operation. As the ret has no
+meaning in trace_ath10k_wmi_cmd then, so remove this parameter too.
+
+Signed-off-by: Carl Huang <cjhuang@codeaurora.org>
+Tested-by: Brian Norris <briannorris@chromium.org>
+Reviewed-by: Brian Norris <briannorris@chromium.org>
+Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
+Signed-off-by: Amit Pundir <amit.pundir@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/net/wireless/ath/ath10k/trace.h |   12 ++++--------
+ drivers/net/wireless/ath/ath10k/wmi.c   |    2 +-
+ 2 files changed, 5 insertions(+), 9 deletions(-)
+
+--- a/drivers/net/wireless/ath/ath10k/trace.h
++++ b/drivers/net/wireless/ath/ath10k/trace.h
+@@ -152,10 +152,9 @@ TRACE_EVENT(ath10k_log_dbg_dump,
+ );
+ TRACE_EVENT(ath10k_wmi_cmd,
+-      TP_PROTO(struct ath10k *ar, int id, const void *buf, size_t buf_len,
+-               int ret),
++      TP_PROTO(struct ath10k *ar, int id, const void *buf, size_t buf_len),
+-      TP_ARGS(ar, id, buf, buf_len, ret),
++      TP_ARGS(ar, id, buf, buf_len),
+       TP_STRUCT__entry(
+               __string(device, dev_name(ar->dev))
+@@ -163,7 +162,6 @@ TRACE_EVENT(ath10k_wmi_cmd,
+               __field(unsigned int, id)
+               __field(size_t, buf_len)
+               __dynamic_array(u8, buf, buf_len)
+-              __field(int, ret)
+       ),
+       TP_fast_assign(
+@@ -171,17 +169,15 @@ TRACE_EVENT(ath10k_wmi_cmd,
+               __assign_str(driver, dev_driver_string(ar->dev));
+               __entry->id = id;
+               __entry->buf_len = buf_len;
+-              __entry->ret = ret;
+               memcpy(__get_dynamic_array(buf), buf, buf_len);
+       ),
+       TP_printk(
+-              "%s %s id %d len %zu ret %d",
++              "%s %s id %d len %zu",
+               __get_str(driver),
+               __get_str(device),
+               __entry->id,
+-              __entry->buf_len,
+-              __entry->ret
++              __entry->buf_len
+       )
+ );
+--- a/drivers/net/wireless/ath/ath10k/wmi.c
++++ b/drivers/net/wireless/ath/ath10k/wmi.c
+@@ -1741,8 +1741,8 @@ int ath10k_wmi_cmd_send_nowait(struct at
+       cmd_hdr->cmd_id = __cpu_to_le32(cmd);
+       memset(skb_cb, 0, sizeof(*skb_cb));
++      trace_ath10k_wmi_cmd(ar, cmd_id, skb->data, skb->len);
+       ret = ath10k_htc_send(&ar->htc, ar->wmi.eid, skb);
+-      trace_ath10k_wmi_cmd(ar, cmd_id, skb->data, skb->len, ret);
+       if (ret)
+               goto err_pull;
diff --git a/queue-4.14/cgroup-cpuset-remove-circular-dependency-deadlock.patch b/queue-4.14/cgroup-cpuset-remove-circular-dependency-deadlock.patch
new file mode 100644 (file)
index 0000000..2ec8d0c
--- /dev/null
@@ -0,0 +1,264 @@
+From aa24163b2ee5c92120e32e99b5a93143a0f4258e Mon Sep 17 00:00:00 2001
+From: Prateek Sood <prsood@codeaurora.org>
+Date: Wed, 15 Nov 2017 19:50:14 +0530
+Subject: cgroup/cpuset: remove circular dependency deadlock
+
+From: Prateek Sood <prsood@codeaurora.org>
+
+commit aa24163b2ee5c92120e32e99b5a93143a0f4258e upstream.
+
+Remove circular dependency deadlock in a scenario where hotplug of CPU is
+being done while there is updation in cgroup and cpuset triggered from
+userspace.
+
+Process A => kthreadd => Process B => Process C => Process A
+
+Process A
+cpu_subsys_offline();
+  cpu_down();
+    _cpu_down();
+      percpu_down_write(&cpu_hotplug_lock); //held
+      cpuhp_invoke_callback();
+            workqueue_offline_cpu();
+            queue_work_on(); // unbind_work on system_highpri_wq
+               __queue_work();
+                 insert_work();
+                    wake_up_worker();
+            flush_work();
+               wait_for_completion();
+
+worker_thread();
+   manage_workers();
+      create_worker();
+            kthread_create_on_node();
+                   wake_up_process(kthreadd_task);
+
+kthreadd
+kthreadd();
+  kernel_thread();
+    do_fork();
+      copy_process();
+        percpu_down_read(&cgroup_threadgroup_rwsem);
+          __rwsem_down_read_failed_common(); //waiting
+
+Process B
+kernfs_fop_write();
+  cgroup_file_write();
+    cgroup_procs_write();
+      percpu_down_write(&cgroup_threadgroup_rwsem); //held
+      cgroup_attach_task();
+        cgroup_migrate();
+          cgroup_migrate_execute();
+            cpuset_can_attach();
+              mutex_lock(&cpuset_mutex); //waiting
+
+Process C
+kernfs_fop_write();
+  cgroup_file_write();
+    cpuset_write_resmask();
+      mutex_lock(&cpuset_mutex); //held
+      update_cpumask();
+        update_cpumasks_hier();
+          rebuild_sched_domains_locked();
+            get_online_cpus();
+              percpu_down_read(&cpu_hotplug_lock); //waiting
+
+Eliminating deadlock by reversing the locking order for cpuset_mutex and
+cpu_hotplug_lock.
+
+Signed-off-by: Prateek Sood <prsood@codeaurora.org>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Amit Pundir <amit.pundir@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/cgroup/cpuset.c |   53 +++++++++++++++++++++++++++----------------------
+ 1 file changed, 30 insertions(+), 23 deletions(-)
+
+--- a/kernel/cgroup/cpuset.c
++++ b/kernel/cgroup/cpuset.c
+@@ -817,6 +817,18 @@ done:
+       return ndoms;
+ }
++static void cpuset_sched_change_begin(void)
++{
++      cpus_read_lock();
++      mutex_lock(&cpuset_mutex);
++}
++
++static void cpuset_sched_change_end(void)
++{
++      mutex_unlock(&cpuset_mutex);
++      cpus_read_unlock();
++}
++
+ /*
+  * Rebuild scheduler domains.
+  *
+@@ -826,16 +838,14 @@ done:
+  * 'cpus' is removed, then call this routine to rebuild the
+  * scheduler's dynamic sched domains.
+  *
+- * Call with cpuset_mutex held.  Takes get_online_cpus().
+  */
+-static void rebuild_sched_domains_locked(void)
++static void rebuild_sched_domains_cpuslocked(void)
+ {
+       struct sched_domain_attr *attr;
+       cpumask_var_t *doms;
+       int ndoms;
+       lockdep_assert_held(&cpuset_mutex);
+-      get_online_cpus();
+       /*
+        * We have raced with CPU hotplug. Don't do anything to avoid
+@@ -843,27 +853,25 @@ static void rebuild_sched_domains_locked
+        * Anyways, hotplug work item will rebuild sched domains.
+        */
+       if (!cpumask_equal(top_cpuset.effective_cpus, cpu_active_mask))
+-              goto out;
++              return;
+       /* Generate domain masks and attrs */
+       ndoms = generate_sched_domains(&doms, &attr);
+       /* Have scheduler rebuild the domains */
+       partition_sched_domains(ndoms, doms, attr);
+-out:
+-      put_online_cpus();
+ }
+ #else /* !CONFIG_SMP */
+-static void rebuild_sched_domains_locked(void)
++static void rebuild_sched_domains_cpuslocked(void)
+ {
+ }
+ #endif /* CONFIG_SMP */
+ void rebuild_sched_domains(void)
+ {
+-      mutex_lock(&cpuset_mutex);
+-      rebuild_sched_domains_locked();
+-      mutex_unlock(&cpuset_mutex);
++      cpuset_sched_change_begin();
++      rebuild_sched_domains_cpuslocked();
++      cpuset_sched_change_end();
+ }
+ /**
+@@ -949,7 +957,7 @@ static void update_cpumasks_hier(struct
+       rcu_read_unlock();
+       if (need_rebuild_sched_domains)
+-              rebuild_sched_domains_locked();
++              rebuild_sched_domains_cpuslocked();
+ }
+ /**
+@@ -1281,7 +1289,7 @@ static int update_relax_domain_level(str
+               cs->relax_domain_level = val;
+               if (!cpumask_empty(cs->cpus_allowed) &&
+                   is_sched_load_balance(cs))
+-                      rebuild_sched_domains_locked();
++                      rebuild_sched_domains_cpuslocked();
+       }
+       return 0;
+@@ -1314,7 +1322,6 @@ static void update_tasks_flags(struct cp
+  *
+  * Call with cpuset_mutex held.
+  */
+-
+ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
+                      int turning_on)
+ {
+@@ -1347,7 +1354,7 @@ static int update_flag(cpuset_flagbits_t
+       spin_unlock_irq(&callback_lock);
+       if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed)
+-              rebuild_sched_domains_locked();
++              rebuild_sched_domains_cpuslocked();
+       if (spread_flag_changed)
+               update_tasks_flags(cs);
+@@ -1615,7 +1622,7 @@ static int cpuset_write_u64(struct cgrou
+       cpuset_filetype_t type = cft->private;
+       int retval = 0;
+-      mutex_lock(&cpuset_mutex);
++      cpuset_sched_change_begin();
+       if (!is_cpuset_online(cs)) {
+               retval = -ENODEV;
+               goto out_unlock;
+@@ -1651,7 +1658,7 @@ static int cpuset_write_u64(struct cgrou
+               break;
+       }
+ out_unlock:
+-      mutex_unlock(&cpuset_mutex);
++      cpuset_sched_change_end();
+       return retval;
+ }
+@@ -1662,7 +1669,7 @@ static int cpuset_write_s64(struct cgrou
+       cpuset_filetype_t type = cft->private;
+       int retval = -ENODEV;
+-      mutex_lock(&cpuset_mutex);
++      cpuset_sched_change_begin();
+       if (!is_cpuset_online(cs))
+               goto out_unlock;
+@@ -1675,7 +1682,7 @@ static int cpuset_write_s64(struct cgrou
+               break;
+       }
+ out_unlock:
+-      mutex_unlock(&cpuset_mutex);
++      cpuset_sched_change_end();
+       return retval;
+ }
+@@ -1714,7 +1721,7 @@ static ssize_t cpuset_write_resmask(stru
+       kernfs_break_active_protection(of->kn);
+       flush_work(&cpuset_hotplug_work);
+-      mutex_lock(&cpuset_mutex);
++      cpuset_sched_change_begin();
+       if (!is_cpuset_online(cs))
+               goto out_unlock;
+@@ -1738,7 +1745,7 @@ static ssize_t cpuset_write_resmask(stru
+       free_trial_cpuset(trialcs);
+ out_unlock:
+-      mutex_unlock(&cpuset_mutex);
++      cpuset_sched_change_end();
+       kernfs_unbreak_active_protection(of->kn);
+       css_put(&cs->css);
+       flush_workqueue(cpuset_migrate_mm_wq);
+@@ -2039,14 +2046,14 @@ out_unlock:
+ /*
+  * If the cpuset being removed has its flag 'sched_load_balance'
+  * enabled, then simulate turning sched_load_balance off, which
+- * will call rebuild_sched_domains_locked().
++ * will call rebuild_sched_domains_cpuslocked().
+  */
+ static void cpuset_css_offline(struct cgroup_subsys_state *css)
+ {
+       struct cpuset *cs = css_cs(css);
+-      mutex_lock(&cpuset_mutex);
++      cpuset_sched_change_begin();
+       if (is_sched_load_balance(cs))
+               update_flag(CS_SCHED_LOAD_BALANCE, cs, 0);
+@@ -2054,7 +2061,7 @@ static void cpuset_css_offline(struct cg
+       cpuset_dec();
+       clear_bit(CS_ONLINE, &cs->flags);
+-      mutex_unlock(&cpuset_mutex);
++      cpuset_sched_change_end();
+ }
+ static void cpuset_css_free(struct cgroup_subsys_state *css)
diff --git a/queue-4.14/nvme_fc-fix-ctrl-create-failures-racing-with-workq-items.patch b/queue-4.14/nvme_fc-fix-ctrl-create-failures-racing-with-workq-items.patch
new file mode 100644 (file)
index 0000000..c133e61
--- /dev/null
@@ -0,0 +1,45 @@
+From cf25809bec2c7df4b45df5b2196845d9a4a3c89b Mon Sep 17 00:00:00 2001
+From: James Smart <jsmart2021@gmail.com>
+Date: Tue, 13 Mar 2018 09:48:07 -0700
+Subject: nvme_fc: fix ctrl create failures racing with workq items
+
+From: James Smart <jsmart2021@gmail.com>
+
+commit cf25809bec2c7df4b45df5b2196845d9a4a3c89b upstream.
+
+If there are errors during initial controller create, the transport
+will teardown the partially initialized controller struct and free
+the ctlr memory.  Trouble is - most of those errors can occur due
+to asynchronous events happening such io timeouts and subsystem
+connectivity failures. Those failures invoke async workq items to
+reset the controller and attempt reconnect.  Those may be in progress
+as the main thread frees the ctrl memory, resulting in NULL ptr oops.
+
+Prevent this from happening by having the main ctrl failure thread
+changing state to DELETING followed by synchronously cancelling any
+pending queued work item. The change of state will prevent the
+scheduling of resets or reconnect events.
+
+Signed-off-by: James Smart <james.smart@broadcom.com>
+Signed-off-by: Keith Busch <keith.busch@intel.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Amit Pundir <amit.pundir@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/nvme/host/fc.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/drivers/nvme/host/fc.c
++++ b/drivers/nvme/host/fc.c
+@@ -2868,6 +2868,10 @@ nvme_fc_init_ctrl(struct device *dev, st
+       }
+       if (ret) {
++              nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING);
++              cancel_work_sync(&ctrl->ctrl.reset_work);
++              cancel_delayed_work_sync(&ctrl->connect_work);
++
+               /* couldn't schedule retry - fail out */
+               dev_err(ctrl->ctrl.device,
+                       "NVME-FC{%d}: Connect retry failed\n", ctrl->cnum);
index e1f6938efaad6c3680a781e48dd4e84e2dc69ed1..24accf0bd3904d31b5eeaaa5132a132c9462403f 100644 (file)
@@ -26,3 +26,7 @@ tty-drop-tty-count-on-tty_reopen-failure.patch
 of-unittest-disable-interrupt-node-tests-for-old-world-mac-systems.patch
 perf-annotate-use-asprintf-when-formatting-objdump-command-line.patch
 perf-tools-fix-python-extension-build-for-gcc-8.patch
+cgroup-cpuset-remove-circular-dependency-deadlock.patch
+ath10k-fix-use-after-free-in-ath10k_wmi_cmd_send_nowait.patch
+ath10k-fix-kernel-panic-issue-during-pci-probe.patch
+nvme_fc-fix-ctrl-create-failures-racing-with-workq-items.patch