From: Greg Kroah-Hartman Date: Tue, 25 Apr 2017 12:20:53 +0000 (+0100) Subject: 4.9-stable patches X-Git-Tag: v4.4.64~2 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=f772adc14fc0089e242cfaaa780b99e86ba4bbe8;p=thirdparty%2Fkernel%2Fstable-queue.git 4.9-stable patches added patches: device-dax-switch-to-srcu-fix-rcu_read_lock-vs-pte-allocation.patch x86-mce-amd-give-a-name-to-mca-bank-3-when-accessed-with-legacy-msrs.patch x86-mce-make-the-mce-notifier-a-blocking-one.patch --- diff --git a/queue-4.9/device-dax-switch-to-srcu-fix-rcu_read_lock-vs-pte-allocation.patch b/queue-4.9/device-dax-switch-to-srcu-fix-rcu_read_lock-vs-pte-allocation.patch new file mode 100644 index 00000000000..7e60e2b1f87 --- /dev/null +++ b/queue-4.9/device-dax-switch-to-srcu-fix-rcu_read_lock-vs-pte-allocation.patch @@ -0,0 +1,127 @@ +From 956a4cd2c957acf638ff29951aabaa9d8e92bbc2 Mon Sep 17 00:00:00 2001 +From: Dan Williams +Date: Fri, 7 Apr 2017 16:42:08 -0700 +Subject: device-dax: switch to srcu, fix rcu_read_lock() vs pte allocation + +From: Dan Williams + +commit 956a4cd2c957acf638ff29951aabaa9d8e92bbc2 upstream. + +The following warning triggers with a new unit test that stresses the +device-dax interface. + + =============================== + [ ERR: suspicious RCU usage. ] + 4.11.0-rc4+ #1049 Tainted: G O + ------------------------------- + ./include/linux/rcupdate.h:521 Illegal context switch in RCU read-side critical section! + + other info that might help us debug this: + + rcu_scheduler_active = 2, debug_locks = 0 + 2 locks held by fio/9070: + #0: (&mm->mmap_sem){++++++}, at: [] __do_page_fault+0x167/0x4f0 + #1: (rcu_read_lock){......}, at: [] dax_dev_huge_fault+0x32/0x620 [dax] + + Call Trace: + dump_stack+0x86/0xc3 + lockdep_rcu_suspicious+0xd7/0x110 + ___might_sleep+0xac/0x250 + __might_sleep+0x4a/0x80 + __alloc_pages_nodemask+0x23a/0x360 + alloc_pages_current+0xa1/0x1f0 + pte_alloc_one+0x17/0x80 + __pte_alloc+0x1e/0x120 + __get_locked_pte+0x1bf/0x1d0 + insert_pfn.isra.70+0x3a/0x100 + ? lookup_memtype+0xa6/0xd0 + vm_insert_mixed+0x64/0x90 + dax_dev_huge_fault+0x520/0x620 [dax] + ? dax_dev_huge_fault+0x32/0x620 [dax] + dax_dev_fault+0x10/0x20 [dax] + __do_fault+0x1e/0x140 + __handle_mm_fault+0x9af/0x10d0 + handle_mm_fault+0x16d/0x370 + ? handle_mm_fault+0x47/0x370 + __do_page_fault+0x28c/0x4f0 + trace_do_page_fault+0x58/0x2a0 + do_async_page_fault+0x1a/0xa0 + async_page_fault+0x28/0x30 + +Inserting a page table entry may trigger an allocation while we are +holding a read lock to keep the device instance alive for the duration +of the fault. Use srcu for this keep-alive protection. + +Fixes: dee410792419 ("/dev/dax, core: file operations and dax-mmap") +Signed-off-by: Dan Williams +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/dax/Kconfig | 1 + + drivers/dax/dax.c | 13 +++++++------ + 2 files changed, 8 insertions(+), 6 deletions(-) + +--- a/drivers/dax/Kconfig ++++ b/drivers/dax/Kconfig +@@ -2,6 +2,7 @@ menuconfig DEV_DAX + tristate "DAX: direct access to differentiated memory" + default m if NVDIMM_DAX + depends on TRANSPARENT_HUGEPAGE ++ select SRCU + help + Support raw access to differentiated (persistence, bandwidth, + latency...) memory via an mmap(2) capable character +--- a/drivers/dax/dax.c ++++ b/drivers/dax/dax.c +@@ -24,6 +24,7 @@ + #include "dax.h" + + static dev_t dax_devt; ++DEFINE_STATIC_SRCU(dax_srcu); + static struct class *dax_class; + static DEFINE_IDA(dax_minor_ida); + static int nr_dax = CONFIG_NR_DEV_DAX; +@@ -59,7 +60,7 @@ struct dax_region { + * @region - parent region + * @dev - device backing the character device + * @cdev - core chardev data +- * @alive - !alive + rcu grace period == no new mappings can be established ++ * @alive - !alive + srcu grace period == no new mappings can be established + * @id - child id in the region + * @num_resources - number of physical address extents in this device + * @res - array of physical address ranges +@@ -437,7 +438,7 @@ static int __dax_dev_pmd_fault(struct da + static int dax_dev_pmd_fault(struct vm_area_struct *vma, unsigned long addr, + pmd_t *pmd, unsigned int flags) + { +- int rc; ++ int rc, id; + struct file *filp = vma->vm_file; + struct dax_dev *dax_dev = filp->private_data; + +@@ -445,9 +446,9 @@ static int dax_dev_pmd_fault(struct vm_a + current->comm, (flags & FAULT_FLAG_WRITE) + ? "write" : "read", vma->vm_start, vma->vm_end); + +- rcu_read_lock(); ++ id = srcu_read_lock(&dax_srcu); + rc = __dax_dev_pmd_fault(dax_dev, vma, addr, pmd, flags); +- rcu_read_unlock(); ++ srcu_read_unlock(&dax_srcu, id); + + return rc; + } +@@ -563,11 +564,11 @@ static void unregister_dax_dev(void *dev + * Note, rcu is not protecting the liveness of dax_dev, rcu is + * ensuring that any fault handlers that might have seen + * dax_dev->alive == true, have completed. Any fault handlers +- * that start after synchronize_rcu() has started will abort ++ * that start after synchronize_srcu() has started will abort + * upon seeing dax_dev->alive == false. + */ + dax_dev->alive = false; +- synchronize_rcu(); ++ synchronize_srcu(&dax_srcu); + unmap_mapping_range(dax_dev->inode->i_mapping, 0, 0, 1); + cdev_del(cdev); + device_unregister(dev); diff --git a/queue-4.9/series b/queue-4.9/series index fb1ee6d46cd..e98293bddca 100644 --- a/queue-4.9/series +++ b/queue-4.9/series @@ -16,3 +16,6 @@ mac80211-reject-tods-broadcast-data-frames.patch mac80211-fix-mu-mimo-follow-mac-mode.patch ubi-upd-always-flush-after-prepared-for-an-update.patch powerpc-kprobe-fix-oops-when-kprobed-on-stdu-instruction.patch +x86-mce-amd-give-a-name-to-mca-bank-3-when-accessed-with-legacy-msrs.patch +x86-mce-make-the-mce-notifier-a-blocking-one.patch +device-dax-switch-to-srcu-fix-rcu_read_lock-vs-pte-allocation.patch diff --git a/queue-4.9/x86-mce-amd-give-a-name-to-mca-bank-3-when-accessed-with-legacy-msrs.patch b/queue-4.9/x86-mce-amd-give-a-name-to-mca-bank-3-when-accessed-with-legacy-msrs.patch new file mode 100644 index 00000000000..072f6b6d5e6 --- /dev/null +++ b/queue-4.9/x86-mce-amd-give-a-name-to-mca-bank-3-when-accessed-with-legacy-msrs.patch @@ -0,0 +1,53 @@ +From 29f72ce3e4d18066ec75c79c857bee0618a3504b Mon Sep 17 00:00:00 2001 +From: Yazen Ghannam +Date: Thu, 30 Mar 2017 13:17:14 +0200 +Subject: x86/mce/AMD: Give a name to MCA bank 3 when accessed with legacy MSRs + +From: Yazen Ghannam + +commit 29f72ce3e4d18066ec75c79c857bee0618a3504b upstream. + +MCA bank 3 is reserved on systems pre-Fam17h, so it didn't have a name. +However, MCA bank 3 is defined on Fam17h systems and can be accessed +using legacy MSRs. Without a name we get a stack trace on Fam17h systems +when trying to register sysfs files for bank 3 on kernels that don't +recognize Scalable MCA. + +Call MCA bank 3 "decode_unit" since this is what it represents on +Fam17h. This will allow kernels without SMCA support to see this bank on +Fam17h+ and prevent the stack trace. This will not affect older systems +since this bank is reserved on them, i.e. it'll be ignored. + +Tested on AMD Fam15h and Fam17h systems. + + WARNING: CPU: 26 PID: 1 at lib/kobject.c:210 kobject_add_internal + kobject: (ffff88085bb256c0): attempted to be registered with empty name! + ... + Call Trace: + kobject_add_internal + kobject_add + kobject_create_and_add + threshold_create_device + threshold_init_device + +Signed-off-by: Yazen Ghannam +Signed-off-by: Borislav Petkov +Link: http://lkml.kernel.org/r/1490102285-3659-1-git-send-email-Yazen.Ghannam@amd.com +Signed-off-by: Thomas Gleixner +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/cpu/mcheck/mce_amd.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c ++++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c +@@ -59,7 +59,7 @@ static const char * const th_names[] = { + "load_store", + "insn_fetch", + "combined_unit", +- "", ++ "decode_unit", + "northbridge", + "execution_unit", + }; diff --git a/queue-4.9/x86-mce-make-the-mce-notifier-a-blocking-one.patch b/queue-4.9/x86-mce-make-the-mce-notifier-a-blocking-one.patch new file mode 100644 index 00000000000..6e0f62fd3b1 --- /dev/null +++ b/queue-4.9/x86-mce-make-the-mce-notifier-a-blocking-one.patch @@ -0,0 +1,127 @@ +From 0dc9c639e6553e39c13b2c0d54c8a1b098cb95e2 Mon Sep 17 00:00:00 2001 +From: Vishal Verma +Date: Tue, 18 Apr 2017 20:42:35 +0200 +Subject: x86/mce: Make the MCE notifier a blocking one + +From: Vishal Verma + +commit 0dc9c639e6553e39c13b2c0d54c8a1b098cb95e2 upstream. + +The NFIT MCE handler callback (for handling media errors on NVDIMMs) +takes a mutex to add the location of a memory error to a list. But since +the notifier call chain for machine checks (x86_mce_decoder_chain) is +atomic, we get a lockdep splat like: + + BUG: sleeping function called from invalid context at kernel/locking/mutex.c:620 + in_atomic(): 1, irqs_disabled(): 0, pid: 4, name: kworker/0:0 + [..] + Call Trace: + dump_stack + ___might_sleep + __might_sleep + mutex_lock_nested + ? __lock_acquire + nfit_handle_mce + notifier_call_chain + atomic_notifier_call_chain + ? atomic_notifier_call_chain + mce_gen_pool_process + +Convert the notifier to a blocking one which gets to run only in process +context. + +Boris: remove the notifier call in atomic context in print_mce(). For +now, let's print the MCE on the atomic path so that we can make sure +they go out and get logged at least. + +Fixes: 6839a6d96f4e ("nfit: do an ARS scrub on hitting a latent media error") +Reported-by: Ross Zwisler +Signed-off-by: Vishal Verma +Acked-by: Tony Luck +Cc: Dan Williams +Cc: linux-edac +Cc: x86-ml +Link: http://lkml.kernel.org/r/20170411224457.24777-1-vishal.l.verma@intel.com +Signed-off-by: Borislav Petkov +Signed-off-by: Thomas Gleixner +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/cpu/mcheck/mce-genpool.c | 2 +- + arch/x86/kernel/cpu/mcheck/mce-internal.h | 2 +- + arch/x86/kernel/cpu/mcheck/mce.c | 16 +++------------- + 3 files changed, 5 insertions(+), 15 deletions(-) + +--- a/arch/x86/kernel/cpu/mcheck/mce-genpool.c ++++ b/arch/x86/kernel/cpu/mcheck/mce-genpool.c +@@ -85,7 +85,7 @@ void mce_gen_pool_process(void) + head = llist_reverse_order(head); + llist_for_each_entry_safe(node, tmp, head, llnode) { + mce = &node->mce; +- atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, mce); ++ blocking_notifier_call_chain(&x86_mce_decoder_chain, 0, mce); + gen_pool_free(mce_evt_pool, (unsigned long)node, sizeof(*node)); + } + } +--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h ++++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h +@@ -13,7 +13,7 @@ enum severity_level { + MCE_PANIC_SEVERITY, + }; + +-extern struct atomic_notifier_head x86_mce_decoder_chain; ++extern struct blocking_notifier_head x86_mce_decoder_chain; + + #define ATTR_LEN 16 + #define INITIAL_CHECK_INTERVAL 5 * 60 /* 5 minutes */ +--- a/arch/x86/kernel/cpu/mcheck/mce.c ++++ b/arch/x86/kernel/cpu/mcheck/mce.c +@@ -120,7 +120,7 @@ static void (*quirk_no_way_out)(int bank + * CPU/chipset specific EDAC code can register a notifier call here to print + * MCE errors in a human-readable form. + */ +-ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain); ++BLOCKING_NOTIFIER_HEAD(x86_mce_decoder_chain); + + /* Do initial initialization of a struct mce */ + void mce_setup(struct mce *m) +@@ -213,13 +213,13 @@ void mce_register_decode_chain(struct no + if (nb != &mce_srao_nb && nb->priority == INT_MAX) + nb->priority -= 1; + +- atomic_notifier_chain_register(&x86_mce_decoder_chain, nb); ++ blocking_notifier_chain_register(&x86_mce_decoder_chain, nb); + } + EXPORT_SYMBOL_GPL(mce_register_decode_chain); + + void mce_unregister_decode_chain(struct notifier_block *nb) + { +- atomic_notifier_chain_unregister(&x86_mce_decoder_chain, nb); ++ blocking_notifier_chain_unregister(&x86_mce_decoder_chain, nb); + } + EXPORT_SYMBOL_GPL(mce_unregister_decode_chain); + +@@ -272,8 +272,6 @@ struct mca_msr_regs msr_ops = { + + static void print_mce(struct mce *m) + { +- int ret = 0; +- + pr_emerg(HW_ERR "CPU %d: Machine Check Exception: %Lx Bank %d: %016Lx\n", + m->extcpu, m->mcgstatus, m->bank, m->status); + +@@ -309,14 +307,6 @@ static void print_mce(struct mce *m) + m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid, + cpu_data(m->extcpu).microcode); + +- /* +- * Print out human-readable details about the MCE error, +- * (if the CPU has an implementation for that) +- */ +- ret = atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m); +- if (ret == NOTIFY_STOP) +- return; +- + pr_emerg_ratelimited(HW_ERR "Run the above through 'mcelog --ascii'\n"); + } +