4.9-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Tue, 25 Apr 2017 12:20:53 +0000 (13:20 +0100)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Tue, 25 Apr 2017 12:20:53 +0000 (13:20 +0100)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 25 Apr 2017 12:20:53 +0000 (13:20 +0100)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 25 Apr 2017 12:20:53 +0000 (13:20 +0100)
diff --git a/queue-4.9/device-dax-switch-to-srcu-fix-rcu_read_lock-vs-pte-allocation.patch b/queue-4.9/device-dax-switch-to-srcu-fix-rcu_read_lock-vs-pte-allocation.patch

new file mode 100644 (file)

index 0000000..7e60e2b
--- /dev/null
+++ b/queue-4.9/device-dax-switch-to-srcu-fix-rcu_read_lock-vs-pte-allocation.patch
@@ -0,0 +1,127 @@
+From 956a4cd2c957acf638ff29951aabaa9d8e92bbc2 Mon Sep 17 00:00:00 2001
+From: Dan Williams <dan.j.williams@intel.com>
+Date: Fri, 7 Apr 2017 16:42:08 -0700
+Subject: device-dax: switch to srcu, fix rcu_read_lock() vs pte allocation
+
+From: Dan Williams <dan.j.williams@intel.com>
+
+commit 956a4cd2c957acf638ff29951aabaa9d8e92bbc2 upstream.
+
+The following warning triggers with a new unit test that stresses the
+device-dax interface.
+
+ ===============================
+ [ ERR: suspicious RCU usage.  ]
+ 4.11.0-rc4+ #1049 Tainted: G           O
+ -------------------------------
+ ./include/linux/rcupdate.h:521 Illegal context switch in RCU read-side critical section!
+
+ other info that might help us debug this:
+
+ rcu_scheduler_active = 2, debug_locks = 0
+ 2 locks held by fio/9070:
+  #0:  (&mm->mmap_sem){++++++}, at: [<ffffffff8d0739d7>] __do_page_fault+0x167/0x4f0
+  #1:  (rcu_read_lock){......}, at: [<ffffffffc03fbd02>] dax_dev_huge_fault+0x32/0x620 [dax]
+
+ Call Trace:
+  dump_stack+0x86/0xc3
+  lockdep_rcu_suspicious+0xd7/0x110
+  ___might_sleep+0xac/0x250
+  __might_sleep+0x4a/0x80
+  __alloc_pages_nodemask+0x23a/0x360
+  alloc_pages_current+0xa1/0x1f0
+  pte_alloc_one+0x17/0x80
+  __pte_alloc+0x1e/0x120
+  __get_locked_pte+0x1bf/0x1d0
+  insert_pfn.isra.70+0x3a/0x100
+  ? lookup_memtype+0xa6/0xd0
+  vm_insert_mixed+0x64/0x90
+  dax_dev_huge_fault+0x520/0x620 [dax]
+  ? dax_dev_huge_fault+0x32/0x620 [dax]
+  dax_dev_fault+0x10/0x20 [dax]
+  __do_fault+0x1e/0x140
+  __handle_mm_fault+0x9af/0x10d0
+  handle_mm_fault+0x16d/0x370
+  ? handle_mm_fault+0x47/0x370
+  __do_page_fault+0x28c/0x4f0
+  trace_do_page_fault+0x58/0x2a0
+  do_async_page_fault+0x1a/0xa0
+  async_page_fault+0x28/0x30
+
+Inserting a page table entry may trigger an allocation while we are
+holding a read lock to keep the device instance alive for the duration
+of the fault. Use srcu for this keep-alive protection.
+
+Fixes: dee410792419 ("/dev/dax, core: file operations and dax-mmap")
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/dax/Kconfig |    1 +
+ drivers/dax/dax.c   |   13 +++++++------
+ 2 files changed, 8 insertions(+), 6 deletions(-)
+
+--- a/drivers/dax/Kconfig
++++ b/drivers/dax/Kconfig
+@@ -2,6 +2,7 @@ menuconfig DEV_DAX
+       tristate "DAX: direct access to differentiated memory"
+       default m if NVDIMM_DAX
+       depends on TRANSPARENT_HUGEPAGE
++      select SRCU
+       help
+         Support raw access to differentiated (persistence, bandwidth,
+         latency...) memory via an mmap(2) capable character
+--- a/drivers/dax/dax.c
++++ b/drivers/dax/dax.c
+@@ -24,6 +24,7 @@
+ #include "dax.h"
+ 
+ static dev_t dax_devt;
++DEFINE_STATIC_SRCU(dax_srcu);
+ static struct class *dax_class;
+ static DEFINE_IDA(dax_minor_ida);
+ static int nr_dax = CONFIG_NR_DEV_DAX;
+@@ -59,7 +60,7 @@ struct dax_region {
+  * @region - parent region
+  * @dev - device backing the character device
+  * @cdev - core chardev data
+- * @alive - !alive + rcu grace period == no new mappings can be established
++ * @alive - !alive + srcu grace period == no new mappings can be established
+  * @id - child id in the region
+  * @num_resources - number of physical address extents in this device
+  * @res - array of physical address ranges
+@@ -437,7 +438,7 @@ static int __dax_dev_pmd_fault(struct da
+ static int dax_dev_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
+               pmd_t *pmd, unsigned int flags)
+ {
+-      int rc;
++      int rc, id;
+       struct file *filp = vma->vm_file;
+       struct dax_dev *dax_dev = filp->private_data;
+ 
+@@ -445,9 +446,9 @@ static int dax_dev_pmd_fault(struct vm_a
+                       current->comm, (flags & FAULT_FLAG_WRITE)
+                       ? "write" : "read", vma->vm_start, vma->vm_end);
+ 
+-      rcu_read_lock();
++      id = srcu_read_lock(&dax_srcu);
+       rc = __dax_dev_pmd_fault(dax_dev, vma, addr, pmd, flags);
+-      rcu_read_unlock();
++      srcu_read_unlock(&dax_srcu, id);
+ 
+       return rc;
+ }
+@@ -563,11 +564,11 @@ static void unregister_dax_dev(void *dev
+        * Note, rcu is not protecting the liveness of dax_dev, rcu is
+        * ensuring that any fault handlers that might have seen
+        * dax_dev->alive == true, have completed.  Any fault handlers
+-       * that start after synchronize_rcu() has started will abort
++       * that start after synchronize_srcu() has started will abort
+        * upon seeing dax_dev->alive == false.
+        */
+       dax_dev->alive = false;
+-      synchronize_rcu();
++      synchronize_srcu(&dax_srcu);
+       unmap_mapping_range(dax_dev->inode->i_mapping, 0, 0, 1);
+       cdev_del(cdev);
+       device_unregister(dev);
diff --git a/queue-4.9/series b/queue-4.9/series

index fb1ee6d46cda53255a98f166fb5c4d13a16fb276..e98293bddcac1ee07249c70a76e37d7c7f5ca6dc 100644 (file)
--- a/queue-4.9/series
+++ b/queue-4.9/series
@@ -16,3 +16,6 @@ mac80211-reject-tods-broadcast-data-frames.patch
  mac80211-fix-mu-mimo-follow-mac-mode.patch
  ubi-upd-always-flush-after-prepared-for-an-update.patch
  powerpc-kprobe-fix-oops-when-kprobed-on-stdu-instruction.patch
+x86-mce-amd-give-a-name-to-mca-bank-3-when-accessed-with-legacy-msrs.patch
+x86-mce-make-the-mce-notifier-a-blocking-one.patch
+device-dax-switch-to-srcu-fix-rcu_read_lock-vs-pte-allocation.patch
diff --git a/queue-4.9/x86-mce-amd-give-a-name-to-mca-bank-3-when-accessed-with-legacy-msrs.patch b/queue-4.9/x86-mce-amd-give-a-name-to-mca-bank-3-when-accessed-with-legacy-msrs.patch

new file mode 100644 (file)

index 0000000..072f6b6
--- /dev/null
+++ b/queue-4.9/x86-mce-amd-give-a-name-to-mca-bank-3-when-accessed-with-legacy-msrs.patch
@@ -0,0 +1,53 @@
+From 29f72ce3e4d18066ec75c79c857bee0618a3504b Mon Sep 17 00:00:00 2001
+From: Yazen Ghannam <yazen.ghannam@amd.com>
+Date: Thu, 30 Mar 2017 13:17:14 +0200
+Subject: x86/mce/AMD: Give a name to MCA bank 3 when accessed with legacy MSRs
+
+From: Yazen Ghannam <yazen.ghannam@amd.com>
+
+commit 29f72ce3e4d18066ec75c79c857bee0618a3504b upstream.
+
+MCA bank 3 is reserved on systems pre-Fam17h, so it didn't have a name.
+However, MCA bank 3 is defined on Fam17h systems and can be accessed
+using legacy MSRs. Without a name we get a stack trace on Fam17h systems
+when trying to register sysfs files for bank 3 on kernels that don't
+recognize Scalable MCA.
+
+Call MCA bank 3 "decode_unit" since this is what it represents on
+Fam17h. This will allow kernels without SMCA support to see this bank on
+Fam17h+ and prevent the stack trace. This will not affect older systems
+since this bank is reserved on them, i.e. it'll be ignored.
+
+Tested on AMD Fam15h and Fam17h systems.
+
+  WARNING: CPU: 26 PID: 1 at lib/kobject.c:210 kobject_add_internal
+  kobject: (ffff88085bb256c0): attempted to be registered with empty name!
+  ...
+  Call Trace:
+   kobject_add_internal
+   kobject_add
+   kobject_create_and_add
+   threshold_create_device
+   threshold_init_device
+
+Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Link: http://lkml.kernel.org/r/1490102285-3659-1-git-send-email-Yazen.Ghannam@amd.com
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/cpu/mcheck/mce_amd.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
++++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
+@@ -59,7 +59,7 @@ static const char * const th_names[] = {
+       "load_store",
+       "insn_fetch",
+       "combined_unit",
+-      "",
++      "decode_unit",
+       "northbridge",
+       "execution_unit",
+ };
diff --git a/queue-4.9/x86-mce-make-the-mce-notifier-a-blocking-one.patch b/queue-4.9/x86-mce-make-the-mce-notifier-a-blocking-one.patch

new file mode 100644 (file)

index 0000000..6e0f62f
--- /dev/null
+++ b/queue-4.9/x86-mce-make-the-mce-notifier-a-blocking-one.patch
@@ -0,0 +1,127 @@
+From 0dc9c639e6553e39c13b2c0d54c8a1b098cb95e2 Mon Sep 17 00:00:00 2001
+From: Vishal Verma <vishal.l.verma@intel.com>
+Date: Tue, 18 Apr 2017 20:42:35 +0200
+Subject: x86/mce: Make the MCE notifier a blocking one
+
+From: Vishal Verma <vishal.l.verma@intel.com>
+
+commit 0dc9c639e6553e39c13b2c0d54c8a1b098cb95e2 upstream.
+
+The NFIT MCE handler callback (for handling media errors on NVDIMMs)
+takes a mutex to add the location of a memory error to a list. But since
+the notifier call chain for machine checks (x86_mce_decoder_chain) is
+atomic, we get a lockdep splat like:
+
+  BUG: sleeping function called from invalid context at kernel/locking/mutex.c:620
+  in_atomic(): 1, irqs_disabled(): 0, pid: 4, name: kworker/0:0
+  [..]
+  Call Trace:
+   dump_stack
+   ___might_sleep
+   __might_sleep
+   mutex_lock_nested
+   ? __lock_acquire
+   nfit_handle_mce
+   notifier_call_chain
+   atomic_notifier_call_chain
+   ? atomic_notifier_call_chain
+   mce_gen_pool_process
+
+Convert the notifier to a blocking one which gets to run only in process
+context.
+
+Boris: remove the notifier call in atomic context in print_mce(). For
+now, let's print the MCE on the atomic path so that we can make sure
+they go out and get logged at least.
+
+Fixes: 6839a6d96f4e ("nfit: do an ARS scrub on hitting a latent media error")
+Reported-by: Ross Zwisler <ross.zwisler@linux.intel.com>
+Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
+Acked-by: Tony Luck <tony.luck@intel.com>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: linux-edac <linux-edac@vger.kernel.org>
+Cc: x86-ml <x86@kernel.org>
+Link: http://lkml.kernel.org/r/20170411224457.24777-1-vishal.l.verma@intel.com
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/cpu/mcheck/mce-genpool.c  |    2 +-
+ arch/x86/kernel/cpu/mcheck/mce-internal.h |    2 +-
+ arch/x86/kernel/cpu/mcheck/mce.c          |   16 +++-------------
+ 3 files changed, 5 insertions(+), 15 deletions(-)
+
+--- a/arch/x86/kernel/cpu/mcheck/mce-genpool.c
++++ b/arch/x86/kernel/cpu/mcheck/mce-genpool.c
+@@ -85,7 +85,7 @@ void mce_gen_pool_process(void)
+       head = llist_reverse_order(head);
+       llist_for_each_entry_safe(node, tmp, head, llnode) {
+               mce = &node->mce;
+-              atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, mce);
++              blocking_notifier_call_chain(&x86_mce_decoder_chain, 0, mce);
+               gen_pool_free(mce_evt_pool, (unsigned long)node, sizeof(*node));
+       }
+ }
+--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
++++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
+@@ -13,7 +13,7 @@ enum severity_level {
+       MCE_PANIC_SEVERITY,
+ };
+ 
+-extern struct atomic_notifier_head x86_mce_decoder_chain;
++extern struct blocking_notifier_head x86_mce_decoder_chain;
+ 
+ #define ATTR_LEN              16
+ #define INITIAL_CHECK_INTERVAL        5 * 60 /* 5 minutes */
+--- a/arch/x86/kernel/cpu/mcheck/mce.c
++++ b/arch/x86/kernel/cpu/mcheck/mce.c
+@@ -120,7 +120,7 @@ static void (*quirk_no_way_out)(int bank
+  * CPU/chipset specific EDAC code can register a notifier call here to print
+  * MCE errors in a human-readable form.
+  */
+-ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain);
++BLOCKING_NOTIFIER_HEAD(x86_mce_decoder_chain);
+ 
+ /* Do initial initialization of a struct mce */
+ void mce_setup(struct mce *m)
+@@ -213,13 +213,13 @@ void mce_register_decode_chain(struct no
+       if (nb != &mce_srao_nb && nb->priority == INT_MAX)
+               nb->priority -= 1;
+ 
+-      atomic_notifier_chain_register(&x86_mce_decoder_chain, nb);
++      blocking_notifier_chain_register(&x86_mce_decoder_chain, nb);
+ }
+ EXPORT_SYMBOL_GPL(mce_register_decode_chain);
+ 
+ void mce_unregister_decode_chain(struct notifier_block *nb)
+ {
+-      atomic_notifier_chain_unregister(&x86_mce_decoder_chain, nb);
++      blocking_notifier_chain_unregister(&x86_mce_decoder_chain, nb);
+ }
+ EXPORT_SYMBOL_GPL(mce_unregister_decode_chain);
+ 
+@@ -272,8 +272,6 @@ struct mca_msr_regs msr_ops = {
+ 
+ static void print_mce(struct mce *m)
+ {
+-      int ret = 0;
+-
+       pr_emerg(HW_ERR "CPU %d: Machine Check Exception: %Lx Bank %d: %016Lx\n",
+              m->extcpu, m->mcgstatus, m->bank, m->status);
+ 
+@@ -309,14 +307,6 @@ static void print_mce(struct mce *m)
+               m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid,
+               cpu_data(m->extcpu).microcode);
+ 
+-      /*
+-       * Print out human-readable details about the MCE error,
+-       * (if the CPU has an implementation for that)
+-       */
+-      ret = atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m);
+-      if (ret == NOTIFY_STOP)
+-              return;
+-
+       pr_emerg_ratelimited(HW_ERR "Run the above through 'mcelog --ascii'\n");
+ }
+
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Tue, 25 Apr 2017 12:20:53 +0000 (13:20 +0100)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Tue, 25 Apr 2017 12:20:53 +0000 (13:20 +0100)
queue-4.9/device-dax-switch-to-srcu-fix-rcu_read_lock-vs-pte-allocation.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/series		patch \| blob \| blame \| history
queue-4.9/x86-mce-amd-give-a-name-to-mca-bank-3-when-accessed-with-legacy-msrs.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/x86-mce-make-the-mce-notifier-a-blocking-one.patch	[new file with mode: 0644]	patch \| blob