From 73211426052071a1c8f23f581b37a0612f4ff1fe Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 20 Jun 2012 09:20:45 -0700 Subject: [PATCH] 3.4-stable patches added patches: edac-avoid-mce-decoding-crash-after-edac-driver-unloaded.patch edac-fix-the-error-about-memory-type-detection-on-sandybridge.patch --- ...ing-crash-after-edac-driver-unloaded.patch | 157 ++++++++++++++++++ ...memory-type-detection-on-sandybridge.patch | 31 ++++ queue-3.4/series | 2 + 3 files changed, 190 insertions(+) create mode 100644 queue-3.4/edac-avoid-mce-decoding-crash-after-edac-driver-unloaded.patch create mode 100644 queue-3.4/edac-fix-the-error-about-memory-type-detection-on-sandybridge.patch diff --git a/queue-3.4/edac-avoid-mce-decoding-crash-after-edac-driver-unloaded.patch b/queue-3.4/edac-avoid-mce-decoding-crash-after-edac-driver-unloaded.patch new file mode 100644 index 00000000000..c0b4d5274ad --- /dev/null +++ b/queue-3.4/edac-avoid-mce-decoding-crash-after-edac-driver-unloaded.patch @@ -0,0 +1,157 @@ +From e35fca4791fcdd43dc1fd769797df40c562ab491 Mon Sep 17 00:00:00 2001 +From: Chen Gong +Date: Tue, 8 May 2012 20:40:12 -0300 +Subject: edac: avoid mce decoding crash after edac driver unloaded + +From: Chen Gong + +commit e35fca4791fcdd43dc1fd769797df40c562ab491 upstream. + +Some edac drivers register themselves as mce decoders via +notifier_chain. But in current notifier_chain implementation logic, +it doesn't accept same notifier registered twice. If so, it will be +wrong when adding/removing the element from the list. For example, +on one SandyBridge platform, remove module sb_edac and then trigger +one error, it will hit oops because it has no mce decoder registered +but related notifier_chain still points to an invalid callback +function. Here is an example: + +Call Trace: + [] atomic_notifier_call_chain+0x1a/0x20 + [] mce_log+0x46/0x180 + [] apei_mce_report_mem_error+0x4a/0x60 + [] ghes_do_proc+0x192/0x210 + [] ghes_proc+0x46/0x70 + [] ghes_notify_sci+0x48/0x80 + [] notifier_call_chain+0x55/0x80 + [] __blocking_notifier_call_chain+0x5a/0x80 + [] ? acpi_os_wait_events_complete+0x23/0x23 + [] blocking_notifier_call_chain+0x16/0x20 + [] acpi_hed_notify+0x19/0x1b + [] acpi_device_notify+0x19/0x1b + [] acpi_ev_notify_dispatch+0x67/0x7f + [] acpi_os_execute_deferred+0x29/0x36 + [] process_one_work+0x132/0x450 + [] worker_thread+0x17b/0x3c0 + [] ? manage_workers+0x120/0x120 + [] kthread+0x9e/0xb0 + [] kernel_thread_helper+0x4/0x10 + [] ? kthread_freezable_should_stop+0x70/0x70 + [] ? gs_change+0x13/0x13 +Code: f3 49 89 d4 45 85 ed 4d 89 c6 48 8b 0f 74 48 48 85 c9 75 17 eb 41 +0f 1f 80 00 00 00 00 41 83 ed 01 4c 89 f9 74 22 4d 85 ff 74 1d <4c> 8b +79 08 4c 89 e2 48 89 de 48 89 cf ff 11 4d 85 f6 74 04 41 +RIP [] notifier_call_chain+0x46/0x80 + RSP +CR2: ffffffffa01af838 +---[ end trace 0100930068e73e6f ]--- +BUG: unable to handle kernel paging request at fffffffffffffff8 +IP: [] kthread_data+0x10/0x20 +PGD 1a0d067 PUD 1a0e067 PMD 0 +Oops: 0000 [#2] SMP + +Only i7core_edac and sb_edac have such issues because they have more +than one memory controller which means they have to register mce +decoder many times. + +Signed-off-by: Chen Gong +Signed-off-by: Mauro Carvalho Chehab +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/edac/i7core_edac.c | 15 ++++----------- + drivers/edac/sb_edac.c | 8 ++++---- + 2 files changed, 8 insertions(+), 15 deletions(-) + +--- a/drivers/edac/i7core_edac.c ++++ b/drivers/edac/i7core_edac.c +@@ -1932,12 +1932,6 @@ static int i7core_mce_check_error(struct + if (mce->bank != 8) + return NOTIFY_DONE; + +-#ifdef CONFIG_SMP +- /* Only handle if it is the right mc controller */ +- if (mce->socketid != pvt->i7core_dev->socket) +- return NOTIFY_DONE; +-#endif +- + smp_rmb(); + if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) { + smp_wmb(); +@@ -2234,8 +2228,6 @@ static void i7core_unregister_mci(struct + if (pvt->enable_scrub) + disable_sdram_scrub_setting(mci); + +- mce_unregister_decode_chain(&i7_mce_dec); +- + /* Disable EDAC polling */ + i7core_pci_ctl_release(pvt); + +@@ -2336,8 +2328,6 @@ static int i7core_register_mci(struct i7 + /* DCLK for scrub rate setting */ + pvt->dclk_freq = get_dclk_freq(); + +- mce_register_decode_chain(&i7_mce_dec); +- + return 0; + + fail0: +@@ -2481,8 +2471,10 @@ static int __init i7core_init(void) + + pci_rc = pci_register_driver(&i7core_driver); + +- if (pci_rc >= 0) ++ if (pci_rc >= 0) { ++ mce_register_decode_chain(&i7_mce_dec); + return 0; ++ } + + i7core_printk(KERN_ERR, "Failed to register device with error %d.\n", + pci_rc); +@@ -2498,6 +2490,7 @@ static void __exit i7core_exit(void) + { + debugf2("MC: " __FILE__ ": %s()\n", __func__); + pci_unregister_driver(&i7core_driver); ++ mce_unregister_decode_chain(&i7_mce_dec); + } + + module_init(i7core_init); +--- a/drivers/edac/sb_edac.c ++++ b/drivers/edac/sb_edac.c +@@ -1669,8 +1669,6 @@ static void sbridge_unregister_mci(struc + debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n", + __func__, mci, &sbridge_dev->pdev[0]->dev); + +- mce_unregister_decode_chain(&sbridge_mce_dec); +- + /* Remove MC sysfs nodes */ + edac_mc_del_mc(mci->dev); + +@@ -1738,7 +1736,6 @@ static int sbridge_register_mci(struct s + goto fail0; + } + +- mce_register_decode_chain(&sbridge_mce_dec); + return 0; + + fail0: +@@ -1867,8 +1864,10 @@ static int __init sbridge_init(void) + + pci_rc = pci_register_driver(&sbridge_driver); + +- if (pci_rc >= 0) ++ if (pci_rc >= 0) { ++ mce_register_decode_chain(&sbridge_mce_dec); + return 0; ++ } + + sbridge_printk(KERN_ERR, "Failed to register device with error %d.\n", + pci_rc); +@@ -1884,6 +1883,7 @@ static void __exit sbridge_exit(void) + { + debugf2("MC: " __FILE__ ": %s()\n", __func__); + pci_unregister_driver(&sbridge_driver); ++ mce_unregister_decode_chain(&sbridge_mce_dec); + } + + module_init(sbridge_init); diff --git a/queue-3.4/edac-fix-the-error-about-memory-type-detection-on-sandybridge.patch b/queue-3.4/edac-fix-the-error-about-memory-type-detection-on-sandybridge.patch new file mode 100644 index 00000000000..4c2f7604b9f --- /dev/null +++ b/queue-3.4/edac-fix-the-error-about-memory-type-detection-on-sandybridge.patch @@ -0,0 +1,31 @@ +From 2cbb587d3bc41a305168e91b4f3c5b6944a12566 Mon Sep 17 00:00:00 2001 +From: Chen Gong +Date: Mon, 14 May 2012 05:51:26 -0300 +Subject: edac: fix the error about memory type detection on SandyBridge + +From: Chen Gong + +commit 2cbb587d3bc41a305168e91b4f3c5b6944a12566 upstream. + +On SandyBridge, DDRIOA(Dev: 17 Func: 0 Offset: 328) is used +to detect whether DIMM is RDIMM/LRDIMM, not TA(Dev: 15 Func: 0). + +Signed-off-by: Chen Gong +Signed-off-by: Mauro Carvalho Chehab +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/edac/sb_edac.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/edac/sb_edac.c ++++ b/drivers/edac/sb_edac.c +@@ -599,7 +599,7 @@ static int get_dimm_config(const struct + pvt->is_close_pg = false; + } + +- pci_read_config_dword(pvt->pci_ta, RANK_CFG_A, ®); ++ pci_read_config_dword(pvt->pci_ddrio, RANK_CFG_A, ®); + if (IS_RDIMM_ENABLED(reg)) { + /* FIXME: Can also be LRDIMM */ + debugf0("Memory is registered\n"); diff --git a/queue-3.4/series b/queue-3.4/series index 8e8656cb7e3..f9c468bf996 100644 --- a/queue-3.4/series +++ b/queue-3.4/series @@ -46,3 +46,5 @@ usb-serial-enforce-usb-driver-and-usb-serial-driver-match.patch usb-fix-gathering-of-interface-associations.patch asoc-wm8904-fix-gpio-and-micbias-initialisation-for-regmap-conversion.patch hwrng-atmel-rng-fix-data-valid-check.patch +edac-avoid-mce-decoding-crash-after-edac-driver-unloaded.patch +edac-fix-the-error-about-memory-type-detection-on-sandybridge.patch -- 2.47.3