From 5b32d4042e1f1e16242c734406f8eb817c5f97f7 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 17 Apr 2018 12:58:33 +0200 Subject: [PATCH] 4.15-stable patches added patches: drivers-hv-vmbus-do-not-mark-hv_pcie-as-perf_device.patch ipmi-fix-some-error-cleanup-issues.patch kvm-ppc-book3s-hv-trace_tlbie-must-not-be-called-in-realmode.patch parisc-fix-hpmc-handler-by-increasing-size-to-multiple-of-16-bytes.patch parisc-fix-out-of-array-access-in-match_pci_device.patch pci-hv-fix-2-hang-issues-in-hv_compose_msi_msg.patch pci-hv-serialize-the-present-and-eject-work-items.patch perf-core-fix-use-after-free-in-uprobe_perf_close.patch x86-mce-amd-edac-mce_amd-enumerate-reserved-smca-bank-type.patch x86-mce-amd-get-address-from-already-initialized-block.patch --- ...s-do-not-mark-hv_pcie-as-perf_device.patch | 35 ++++ .../ipmi-fix-some-error-cleanup-issues.patch | 88 +++++++++ ...tlbie-must-not-be-called-in-realmode.patch | 56 ++++++ ...reasing-size-to-multiple-of-16-bytes.patch | 48 +++++ ...-of-array-access-in-match_pci_device.patch | 49 +++++ ...-2-hang-issues-in-hv_compose_msi_msg.patch | 142 ++++++++++++++ ...ize-the-present-and-eject-work-items.patch | 160 ++++++++++++++++ ...-use-after-free-in-uprobe_perf_close.patch | 174 ++++++++++++++++++ queue-4.15/series | 10 + ...md-enumerate-reserved-smca-bank-type.patch | 123 +++++++++++++ ...dress-from-already-initialized-block.patch | 63 +++++++ 11 files changed, 948 insertions(+) create mode 100644 queue-4.15/drivers-hv-vmbus-do-not-mark-hv_pcie-as-perf_device.patch create mode 100644 queue-4.15/ipmi-fix-some-error-cleanup-issues.patch create mode 100644 queue-4.15/kvm-ppc-book3s-hv-trace_tlbie-must-not-be-called-in-realmode.patch create mode 100644 queue-4.15/parisc-fix-hpmc-handler-by-increasing-size-to-multiple-of-16-bytes.patch create mode 100644 queue-4.15/parisc-fix-out-of-array-access-in-match_pci_device.patch create mode 100644 queue-4.15/pci-hv-fix-2-hang-issues-in-hv_compose_msi_msg.patch create mode 100644 queue-4.15/pci-hv-serialize-the-present-and-eject-work-items.patch create mode 100644 queue-4.15/perf-core-fix-use-after-free-in-uprobe_perf_close.patch create mode 100644 queue-4.15/x86-mce-amd-edac-mce_amd-enumerate-reserved-smca-bank-type.patch create mode 100644 queue-4.15/x86-mce-amd-get-address-from-already-initialized-block.patch diff --git a/queue-4.15/drivers-hv-vmbus-do-not-mark-hv_pcie-as-perf_device.patch b/queue-4.15/drivers-hv-vmbus-do-not-mark-hv_pcie-as-perf_device.patch new file mode 100644 index 00000000000..16aef98df4f --- /dev/null +++ b/queue-4.15/drivers-hv-vmbus-do-not-mark-hv_pcie-as-perf_device.patch @@ -0,0 +1,35 @@ +From 238064f13d057390a8c5e1a6a80f4f0a0ec46499 Mon Sep 17 00:00:00 2001 +From: Dexuan Cui +Date: Tue, 27 Mar 2018 15:01:02 -0700 +Subject: Drivers: hv: vmbus: do not mark HV_PCIE as perf_device + +From: Dexuan Cui + +commit 238064f13d057390a8c5e1a6a80f4f0a0ec46499 upstream. + +The pci-hyperv driver's channel callback hv_pci_onchannelcallback() is not +really a hot path, so we don't need to mark it as a perf_device, meaning +with this patch all HV_PCIE channels' target_cpu will be CPU0. + +Signed-off-by: Dexuan Cui +Cc: stable@vger.kernel.org +Cc: Stephen Hemminger +Signed-off-by: K. Y. Srinivasan +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/hv/channel_mgmt.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/hv/channel_mgmt.c ++++ b/drivers/hv/channel_mgmt.c +@@ -71,7 +71,7 @@ static const struct vmbus_device vmbus_d + /* PCIE */ + { .dev_type = HV_PCIE, + HV_PCIE_GUID, +- .perf_device = true, ++ .perf_device = false, + }, + + /* Synthetic Frame Buffer */ diff --git a/queue-4.15/ipmi-fix-some-error-cleanup-issues.patch b/queue-4.15/ipmi-fix-some-error-cleanup-issues.patch new file mode 100644 index 00000000000..b6027542e3b --- /dev/null +++ b/queue-4.15/ipmi-fix-some-error-cleanup-issues.patch @@ -0,0 +1,88 @@ +From cc095f0ac1f7c200e51a5c2a78a43c9f42049dbb Mon Sep 17 00:00:00 2001 +From: Corey Minyard +Date: Wed, 28 Feb 2018 08:09:49 -0600 +Subject: ipmi: Fix some error cleanup issues + +From: Corey Minyard + +commit cc095f0ac1f7c200e51a5c2a78a43c9f42049dbb upstream. + +device_remove_group() was called on any cleanup, even if the +device attrs had not been added yet. That can occur in certain +error scenarios, so add a flag to know if it has been added. + +Also make sure we remove the dev if we added it ourselves. + +Signed-off-by: Corey Minyard +Cc: stable@vger.kernel.org # 4.15 +Cc: Laura Abbott +Tested-by: Bill Perkins +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/char/ipmi/ipmi_si_intf.c | 18 ++++++++++++++---- + 1 file changed, 14 insertions(+), 4 deletions(-) + +--- a/drivers/char/ipmi/ipmi_si_intf.c ++++ b/drivers/char/ipmi/ipmi_si_intf.c +@@ -252,6 +252,9 @@ struct smi_info { + /* Default driver model device. */ + struct platform_device *pdev; + ++ /* Have we added the device group to the device? */ ++ bool dev_group_added; ++ + /* Counters and things for the proc filesystem. */ + atomic_t stats[SI_NUM_STATS]; + +@@ -2025,8 +2028,8 @@ int ipmi_si_add_smi(struct si_sm_io *io) + if (initialized) { + rv = try_smi_init(new_smi); + if (rv) { +- mutex_unlock(&smi_infos_lock); + cleanup_one_si(new_smi); ++ mutex_unlock(&smi_infos_lock); + return rv; + } + } +@@ -2185,6 +2188,7 @@ static int try_smi_init(struct smi_info + rv); + goto out_err_stop_timer; + } ++ new_smi->dev_group_added = true; + + rv = ipmi_register_smi(&handlers, + new_smi, +@@ -2238,7 +2242,10 @@ static int try_smi_init(struct smi_info + return 0; + + out_err_remove_attrs: +- device_remove_group(new_smi->io.dev, &ipmi_si_dev_attr_group); ++ if (new_smi->dev_group_added) { ++ device_remove_group(new_smi->io.dev, &ipmi_si_dev_attr_group); ++ new_smi->dev_group_added = false; ++ } + dev_set_drvdata(new_smi->io.dev, NULL); + + out_err_stop_timer: +@@ -2286,6 +2293,7 @@ out_err: + else + platform_device_put(new_smi->pdev); + new_smi->pdev = NULL; ++ new_smi->io.dev = NULL; + } + + kfree(init_name); +@@ -2382,8 +2390,10 @@ static void cleanup_one_si(struct smi_in + } + } + +- device_remove_group(to_clean->io.dev, &ipmi_si_dev_attr_group); +- dev_set_drvdata(to_clean->io.dev, NULL); ++ if (to_clean->dev_group_added) ++ device_remove_group(to_clean->io.dev, &ipmi_si_dev_attr_group); ++ if (to_clean->io.dev) ++ dev_set_drvdata(to_clean->io.dev, NULL); + + list_del(&to_clean->link); + diff --git a/queue-4.15/kvm-ppc-book3s-hv-trace_tlbie-must-not-be-called-in-realmode.patch b/queue-4.15/kvm-ppc-book3s-hv-trace_tlbie-must-not-be-called-in-realmode.patch new file mode 100644 index 00000000000..b8e1fc3ac19 --- /dev/null +++ b/queue-4.15/kvm-ppc-book3s-hv-trace_tlbie-must-not-be-called-in-realmode.patch @@ -0,0 +1,56 @@ +From 19ce7909ed11c49f7eddf59e7f49cd3062bf83d5 Mon Sep 17 00:00:00 2001 +From: Nicholas Piggin +Date: Fri, 6 Apr 2018 03:56:30 +1000 +Subject: KVM: PPC: Book3S HV: trace_tlbie must not be called in realmode + +From: Nicholas Piggin + +commit 19ce7909ed11c49f7eddf59e7f49cd3062bf83d5 upstream. + +This crashes with a "Bad real address for load" attempting to load +from the vmalloc region in realmode (faulting address is in DAR). + + Oops: Bad interrupt in KVM entry/exit code, sig: 6 [#1] + LE SMP NR_CPUS=2048 NUMA PowerNV + CPU: 53 PID: 6582 Comm: qemu-system-ppc Not tainted 4.16.0-01530-g43d1859f0994 + NIP: c0000000000155ac LR: c0000000000c2430 CTR: c000000000015580 + REGS: c000000fff76dd80 TRAP: 0200 Not tainted (4.16.0-01530-g43d1859f0994) + MSR: 9000000000201003 CR: 48082222 XER: 00000000 + CFAR: 0000000102900ef0 DAR: d00017fffd941a28 DSISR: 00000040 SOFTE: 3 + NIP [c0000000000155ac] perf_trace_tlbie+0x2c/0x1a0 + LR [c0000000000c2430] do_tlbies+0x230/0x2f0 + +I suspect the reason is the per-cpu data is not in the linear chunk. +This could be restored if that was able to be fixed, but for now, +just remove the tracepoints. + +Fixes: 0428491cba92 ("powerpc/mm: Trace tlbie(l) instructions") +Cc: stable@vger.kernel.org # v4.13+ +Signed-off-by: Nicholas Piggin +Signed-off-by: Michael Ellerman +Signed-off-by: Greg Kroah-Hartman + +--- + arch/powerpc/kvm/book3s_hv_rm_mmu.c | 4 ---- + 1 file changed, 4 deletions(-) + +--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c ++++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c +@@ -470,8 +470,6 @@ static void do_tlbies(struct kvm *kvm, u + for (i = 0; i < npages; ++i) { + asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : : + "r" (rbvalues[i]), "r" (kvm->arch.lpid)); +- trace_tlbie(kvm->arch.lpid, 0, rbvalues[i], +- kvm->arch.lpid, 0, 0, 0); + } + asm volatile("eieio; tlbsync; ptesync" : : : "memory"); + kvm->arch.tlbie_lock = 0; +@@ -481,8 +479,6 @@ static void do_tlbies(struct kvm *kvm, u + for (i = 0; i < npages; ++i) { + asm volatile(PPC_TLBIEL(%0,%1,0,0,0) : : + "r" (rbvalues[i]), "r" (0)); +- trace_tlbie(kvm->arch.lpid, 1, rbvalues[i], +- 0, 0, 0, 0); + } + asm volatile("ptesync" : : : "memory"); + } diff --git a/queue-4.15/parisc-fix-hpmc-handler-by-increasing-size-to-multiple-of-16-bytes.patch b/queue-4.15/parisc-fix-hpmc-handler-by-increasing-size-to-multiple-of-16-bytes.patch new file mode 100644 index 00000000000..fbaceee9778 --- /dev/null +++ b/queue-4.15/parisc-fix-hpmc-handler-by-increasing-size-to-multiple-of-16-bytes.patch @@ -0,0 +1,48 @@ +From d5654e156bc4d68a87bbaa6d7e020baceddf6e68 Mon Sep 17 00:00:00 2001 +From: Helge Deller +Date: Sat, 24 Mar 2018 21:18:25 +0100 +Subject: parisc: Fix HPMC handler by increasing size to multiple of 16 bytes + +From: Helge Deller + +commit d5654e156bc4d68a87bbaa6d7e020baceddf6e68 upstream. + +Make sure that the HPMC (High Priority Machine Check) handler is 16-byte +aligned and that it's length in the IVT is a multiple of 16 bytes. +Otherwise PDC may decide not to call the HPMC crash handler. + +Signed-off-by: Helge Deller +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman + +--- + arch/parisc/kernel/hpmc.S | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/arch/parisc/kernel/hpmc.S ++++ b/arch/parisc/kernel/hpmc.S +@@ -84,6 +84,7 @@ END(hpmc_pim_data) + .text + + .import intr_save, code ++ .align 16 + ENTRY_CFI(os_hpmc) + .os_hpmc: + +@@ -300,12 +301,15 @@ os_hpmc_6: + + b . + nop ++ .align 16 /* make function length multiple of 16 bytes */ + ENDPROC_CFI(os_hpmc) + .os_hpmc_end: + + + __INITRODATA ++.globl os_hpmc_size + .align 4 +- .export os_hpmc_size ++ .type os_hpmc_size, @object ++ .size os_hpmc_size, 4 + os_hpmc_size: + .word .os_hpmc_end-.os_hpmc diff --git a/queue-4.15/parisc-fix-out-of-array-access-in-match_pci_device.patch b/queue-4.15/parisc-fix-out-of-array-access-in-match_pci_device.patch new file mode 100644 index 00000000000..abc80bfe6d9 --- /dev/null +++ b/queue-4.15/parisc-fix-out-of-array-access-in-match_pci_device.patch @@ -0,0 +1,49 @@ +From 615b2665fd20c327b631ff1e79426775de748094 Mon Sep 17 00:00:00 2001 +From: Helge Deller +Date: Sun, 25 Mar 2018 23:53:22 +0200 +Subject: parisc: Fix out of array access in match_pci_device() + +From: Helge Deller + +commit 615b2665fd20c327b631ff1e79426775de748094 upstream. + +As found by the ubsan checker, the value of the 'index' variable can be +out of range for the bc[] array: + +UBSAN: Undefined behaviour in arch/parisc/kernel/drivers.c:655:21 +index 6 is out of range for type 'char [6]' +Backtrace: + [<104fa850>] __ubsan_handle_out_of_bounds+0x68/0x80 + [<1019d83c>] check_parent+0xc0/0x170 + [<1019d91c>] descend_children+0x30/0x6c + [<1059e164>] device_for_each_child+0x60/0x98 + [<1019cd54>] parse_tree_node+0x40/0x54 + [<1019d86c>] check_parent+0xf0/0x170 + [<1019d91c>] descend_children+0x30/0x6c + [<1059e164>] device_for_each_child+0x60/0x98 + [<1019d938>] descend_children+0x4c/0x6c + [<1059e164>] device_for_each_child+0x60/0x98 + [<1019cd54>] parse_tree_node+0x40/0x54 + [<1019cffc>] hwpath_to_device+0xa4/0xc4 + +Signed-off-by: Helge Deller +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman + +--- + arch/parisc/kernel/drivers.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/arch/parisc/kernel/drivers.c ++++ b/arch/parisc/kernel/drivers.c +@@ -651,6 +651,10 @@ static int match_pci_device(struct devic + (modpath->mod == PCI_FUNC(devfn))); + } + ++ /* index might be out of bounds for bc[] */ ++ if (index >= 6) ++ return 0; ++ + id = PCI_SLOT(pdev->devfn) | (PCI_FUNC(pdev->devfn) << 5); + return (modpath->bc[index] == id); + } diff --git a/queue-4.15/pci-hv-fix-2-hang-issues-in-hv_compose_msi_msg.patch b/queue-4.15/pci-hv-fix-2-hang-issues-in-hv_compose_msi_msg.patch new file mode 100644 index 00000000000..f5cc086218b --- /dev/null +++ b/queue-4.15/pci-hv-fix-2-hang-issues-in-hv_compose_msi_msg.patch @@ -0,0 +1,142 @@ +From de0aa7b2f97d348ba7d1e17a00744c989baa0cb6 Mon Sep 17 00:00:00 2001 +From: Dexuan Cui +Date: Thu, 15 Mar 2018 14:21:08 +0000 +Subject: PCI: hv: Fix 2 hang issues in hv_compose_msi_msg() + +From: Dexuan Cui + +commit de0aa7b2f97d348ba7d1e17a00744c989baa0cb6 upstream. + +1. With the patch "x86/vector/msi: Switch to global reservation mode", +the recent v4.15 and newer kernels always hang for 1-vCPU Hyper-V VM +with SR-IOV. This is because when we reach hv_compose_msi_msg() by +request_irq() -> request_threaded_irq() ->__setup_irq()->irq_startup() +-> __irq_startup() -> irq_domain_activate_irq() -> ... -> +msi_domain_activate() -> ... -> hv_compose_msi_msg(), local irq is +disabled in __setup_irq(). + +Note: when we reach hv_compose_msi_msg() by another code path: +pci_enable_msix_range() -> ... -> irq_domain_activate_irq() -> ... -> +hv_compose_msi_msg(), local irq is not disabled. + +hv_compose_msi_msg() depends on an interrupt from the host. +With interrupts disabled, a UP VM always hangs in the busy loop in +the function, because the interrupt callback hv_pci_onchannelcallback() +can not be called. + +We can do nothing but work it around by polling the channel. This +is ugly, but we don't have any other choice. + +2. If the host is ejecting the VF device before we reach +hv_compose_msi_msg(), in a UP VM, we can hang in hv_compose_msi_msg() +forever, because at this time the host doesn't respond to the +CREATE_INTERRUPT request. This issue exists the first day the +pci-hyperv driver appears in the kernel. + +Luckily, this can also by worked around by polling the channel +for the PCI_EJECT message and hpdev->state, and by checking the +PCI vendor ID. + +Note: actually the above 2 issues also happen to a SMP VM, if +"hbus->hdev->channel->target_cpu == smp_processor_id()" is true. + +Fixes: 4900be83602b ("x86/vector/msi: Switch to global reservation mode") +Tested-by: Adrian Suhov +Tested-by: Chris Valean +Signed-off-by: Dexuan Cui +Signed-off-by: Lorenzo Pieralisi +Reviewed-by: Michael Kelley +Acked-by: Haiyang Zhang +Cc: +Cc: Stephen Hemminger +Cc: K. Y. Srinivasan +Cc: Vitaly Kuznetsov +Cc: Jack Morgenstein +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/pci/host/pci-hyperv.c | 58 +++++++++++++++++++++++++++++++++++++++++- + 1 file changed, 57 insertions(+), 1 deletion(-) + +--- a/drivers/pci/host/pci-hyperv.c ++++ b/drivers/pci/host/pci-hyperv.c +@@ -531,6 +531,8 @@ struct hv_pci_compl { + s32 completion_status; + }; + ++static void hv_pci_onchannelcallback(void *context); ++ + /** + * hv_pci_generic_compl() - Invoked for a completion packet + * @context: Set up by the sender of the packet. +@@ -675,6 +677,31 @@ static void _hv_pcifront_read_config(str + } + } + ++static u16 hv_pcifront_get_vendor_id(struct hv_pci_dev *hpdev) ++{ ++ u16 ret; ++ unsigned long flags; ++ void __iomem *addr = hpdev->hbus->cfg_addr + CFG_PAGE_OFFSET + ++ PCI_VENDOR_ID; ++ ++ spin_lock_irqsave(&hpdev->hbus->config_lock, flags); ++ ++ /* Choose the function to be read. (See comment above) */ ++ writel(hpdev->desc.win_slot.slot, hpdev->hbus->cfg_addr); ++ /* Make sure the function was chosen before we start reading. */ ++ mb(); ++ /* Read from that function's config space. */ ++ ret = readw(addr); ++ /* ++ * mb() is not required here, because the spin_unlock_irqrestore() ++ * is a barrier. ++ */ ++ ++ spin_unlock_irqrestore(&hpdev->hbus->config_lock, flags); ++ ++ return ret; ++} ++ + /** + * _hv_pcifront_write_config() - Internal PCI config write + * @hpdev: The PCI driver's representation of the device +@@ -1117,8 +1144,37 @@ static void hv_compose_msi_msg(struct ir + * Since this function is called with IRQ locks held, can't + * do normal wait for completion; instead poll. + */ +- while (!try_wait_for_completion(&comp.comp_pkt.host_event)) ++ while (!try_wait_for_completion(&comp.comp_pkt.host_event)) { ++ /* 0xFFFF means an invalid PCI VENDOR ID. */ ++ if (hv_pcifront_get_vendor_id(hpdev) == 0xFFFF) { ++ dev_err_once(&hbus->hdev->device, ++ "the device has gone\n"); ++ goto free_int_desc; ++ } ++ ++ /* ++ * When the higher level interrupt code calls us with ++ * interrupt disabled, we must poll the channel by calling ++ * the channel callback directly when channel->target_cpu is ++ * the current CPU. When the higher level interrupt code ++ * calls us with interrupt enabled, let's add the ++ * local_bh_disable()/enable() to avoid race. ++ */ ++ local_bh_disable(); ++ ++ if (hbus->hdev->channel->target_cpu == smp_processor_id()) ++ hv_pci_onchannelcallback(hbus); ++ ++ local_bh_enable(); ++ ++ if (hpdev->state == hv_pcichild_ejecting) { ++ dev_err_once(&hbus->hdev->device, ++ "the device is being ejected\n"); ++ goto free_int_desc; ++ } ++ + udelay(100); ++ } + + if (comp.comp_pkt.completion_status < 0) { + dev_err(&hbus->hdev->device, diff --git a/queue-4.15/pci-hv-serialize-the-present-and-eject-work-items.patch b/queue-4.15/pci-hv-serialize-the-present-and-eject-work-items.patch new file mode 100644 index 00000000000..76ab939d198 --- /dev/null +++ b/queue-4.15/pci-hv-serialize-the-present-and-eject-work-items.patch @@ -0,0 +1,160 @@ +From 021ad274d7dc31611d4f47f7dd4ac7a224526f30 Mon Sep 17 00:00:00 2001 +From: Dexuan Cui +Date: Thu, 15 Mar 2018 14:20:53 +0000 +Subject: PCI: hv: Serialize the present and eject work items + +From: Dexuan Cui + +commit 021ad274d7dc31611d4f47f7dd4ac7a224526f30 upstream. + +When we hot-remove the device, we first receive a PCI_EJECT message and +then receive a PCI_BUS_RELATIONS message with bus_rel->device_count == 0. + +The first message is offloaded to hv_eject_device_work(), and the second +is offloaded to pci_devices_present_work(). Both the paths can be running +list_del(&hpdev->list_entry), causing general protection fault, because +system_wq can run them concurrently. + +The patch eliminates the race condition. + +Since access to present/eject work items is serialized, we do not need the +hbus->enum_sem anymore, so remove it. + +Fixes: 4daace0d8ce8 ("PCI: hv: Add paravirtual PCI front-end for Microsoft Hyper-V VMs") +Link: https://lkml.kernel.org/r/KL1P15301MB00064DA6B4D221123B5241CFBFD70@KL1P15301MB0006.APCP153.PROD.OUTLOOK.COM +Tested-by: Adrian Suhov +Tested-by: Chris Valean +Signed-off-by: Dexuan Cui +[lorenzo.pieralisi@arm.com: squashed semaphore removal patch] +Signed-off-by: Lorenzo Pieralisi +Reviewed-by: Michael Kelley +Acked-by: Haiyang Zhang +Cc: # v4.6+ +Cc: Vitaly Kuznetsov +Cc: Jack Morgenstein +Cc: Stephen Hemminger +Cc: K. Y. Srinivasan +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/pci/host/pci-hyperv.c | 34 ++++++++++++++++------------------ + 1 file changed, 16 insertions(+), 18 deletions(-) + +--- a/drivers/pci/host/pci-hyperv.c ++++ b/drivers/pci/host/pci-hyperv.c +@@ -457,7 +457,6 @@ struct hv_pcibus_device { + spinlock_t device_list_lock; /* Protect lists below */ + void __iomem *cfg_addr; + +- struct semaphore enum_sem; + struct list_head resources_for_children; + + struct list_head children; +@@ -471,6 +470,8 @@ struct hv_pcibus_device { + struct retarget_msi_interrupt retarget_msi_interrupt_params; + + spinlock_t retarget_msi_interrupt_lock; ++ ++ struct workqueue_struct *wq; + }; + + /* +@@ -1600,12 +1601,8 @@ static struct hv_pci_dev *get_pcichild_w + * It must also treat the omission of a previously observed device as + * notification that the device no longer exists. + * +- * Note that this function is a work item, and it may not be +- * invoked in the order that it was queued. Back to back +- * updates of the list of present devices may involve queuing +- * multiple work items, and this one may run before ones that +- * were sent later. As such, this function only does something +- * if is the last one in the queue. ++ * Note that this function is serialized with hv_eject_device_work(), ++ * because both are pushed to the ordered workqueue hbus->wq. + */ + static void pci_devices_present_work(struct work_struct *work) + { +@@ -1626,11 +1623,6 @@ static void pci_devices_present_work(str + + INIT_LIST_HEAD(&removed); + +- if (down_interruptible(&hbus->enum_sem)) { +- put_hvpcibus(hbus); +- return; +- } +- + /* Pull this off the queue and process it if it was the last one. */ + spin_lock_irqsave(&hbus->device_list_lock, flags); + while (!list_empty(&hbus->dr_list)) { +@@ -1647,7 +1639,6 @@ static void pci_devices_present_work(str + spin_unlock_irqrestore(&hbus->device_list_lock, flags); + + if (!dr) { +- up(&hbus->enum_sem); + put_hvpcibus(hbus); + return; + } +@@ -1734,7 +1725,6 @@ static void pci_devices_present_work(str + break; + } + +- up(&hbus->enum_sem); + put_hvpcibus(hbus); + kfree(dr); + } +@@ -1780,7 +1770,7 @@ static void hv_pci_devices_present(struc + spin_unlock_irqrestore(&hbus->device_list_lock, flags); + + get_hvpcibus(hbus); +- schedule_work(&dr_wrk->wrk); ++ queue_work(hbus->wq, &dr_wrk->wrk); + } + + /** +@@ -1858,7 +1848,7 @@ static void hv_pci_eject_device(struct h + get_pcichild(hpdev, hv_pcidev_ref_pnp); + INIT_WORK(&hpdev->wrk, hv_eject_device_work); + get_hvpcibus(hpdev->hbus); +- schedule_work(&hpdev->wrk); ++ queue_work(hpdev->hbus->wq, &hpdev->wrk); + } + + /** +@@ -2471,13 +2461,18 @@ static int hv_pci_probe(struct hv_device + spin_lock_init(&hbus->config_lock); + spin_lock_init(&hbus->device_list_lock); + spin_lock_init(&hbus->retarget_msi_interrupt_lock); +- sema_init(&hbus->enum_sem, 1); + init_completion(&hbus->remove_event); ++ hbus->wq = alloc_ordered_workqueue("hv_pci_%x", 0, ++ hbus->sysdata.domain); ++ if (!hbus->wq) { ++ ret = -ENOMEM; ++ goto free_bus; ++ } + + ret = vmbus_open(hdev->channel, pci_ring_size, pci_ring_size, NULL, 0, + hv_pci_onchannelcallback, hbus); + if (ret) +- goto free_bus; ++ goto destroy_wq; + + hv_set_drvdata(hdev, hbus); + +@@ -2546,6 +2541,8 @@ free_config: + hv_free_config_window(hbus); + close: + vmbus_close(hdev->channel); ++destroy_wq: ++ destroy_workqueue(hbus->wq); + free_bus: + free_page((unsigned long)hbus); + return ret; +@@ -2625,6 +2622,7 @@ static int hv_pci_remove(struct hv_devic + irq_domain_free_fwnode(hbus->sysdata.fwnode); + put_hvpcibus(hbus); + wait_for_completion(&hbus->remove_event); ++ destroy_workqueue(hbus->wq); + free_page((unsigned long)hbus); + return 0; + } diff --git a/queue-4.15/perf-core-fix-use-after-free-in-uprobe_perf_close.patch b/queue-4.15/perf-core-fix-use-after-free-in-uprobe_perf_close.patch new file mode 100644 index 00000000000..359ce239204 --- /dev/null +++ b/queue-4.15/perf-core-fix-use-after-free-in-uprobe_perf_close.patch @@ -0,0 +1,174 @@ +From 621b6d2ea297d0fb6030452c5bcd221f12165fcf Mon Sep 17 00:00:00 2001 +From: Prashant Bhole +Date: Mon, 9 Apr 2018 19:03:46 +0900 +Subject: perf/core: Fix use-after-free in uprobe_perf_close() + +From: Prashant Bhole + +commit 621b6d2ea297d0fb6030452c5bcd221f12165fcf upstream. + +A use-after-free bug was caught by KASAN while running usdt related +code (BCC project. bcc/tests/python/test_usdt2.py): + + ================================================================== + BUG: KASAN: use-after-free in uprobe_perf_close+0x222/0x3b0 + Read of size 4 at addr ffff880384f9b4a4 by task test_usdt2.py/870 + + CPU: 4 PID: 870 Comm: test_usdt2.py Tainted: G W 4.16.0-next-20180409 #215 + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 + Call Trace: + dump_stack+0xc7/0x15b + ? show_regs_print_info+0x5/0x5 + ? printk+0x9c/0xc3 + ? kmsg_dump_rewind_nolock+0x6e/0x6e + ? uprobe_perf_close+0x222/0x3b0 + print_address_description+0x83/0x3a0 + ? uprobe_perf_close+0x222/0x3b0 + kasan_report+0x1dd/0x460 + ? uprobe_perf_close+0x222/0x3b0 + uprobe_perf_close+0x222/0x3b0 + ? probes_open+0x180/0x180 + ? free_filters_list+0x290/0x290 + trace_uprobe_register+0x1bb/0x500 + ? perf_event_attach_bpf_prog+0x310/0x310 + ? probe_event_disable+0x4e0/0x4e0 + perf_uprobe_destroy+0x63/0xd0 + _free_event+0x2bc/0xbd0 + ? lockdep_rcu_suspicious+0x100/0x100 + ? ring_buffer_attach+0x550/0x550 + ? kvm_sched_clock_read+0x1a/0x30 + ? perf_event_release_kernel+0x3e4/0xc00 + ? __mutex_unlock_slowpath+0x12e/0x540 + ? wait_for_completion+0x430/0x430 + ? lock_downgrade+0x3c0/0x3c0 + ? lock_release+0x980/0x980 + ? do_raw_spin_trylock+0x118/0x150 + ? do_raw_spin_unlock+0x121/0x210 + ? do_raw_spin_trylock+0x150/0x150 + perf_event_release_kernel+0x5d4/0xc00 + ? put_event+0x30/0x30 + ? fsnotify+0xd2d/0xea0 + ? sched_clock_cpu+0x18/0x1a0 + ? __fsnotify_update_child_dentry_flags.part.0+0x1b0/0x1b0 + ? pvclock_clocksource_read+0x152/0x2b0 + ? pvclock_read_flags+0x80/0x80 + ? kvm_sched_clock_read+0x1a/0x30 + ? sched_clock_cpu+0x18/0x1a0 + ? pvclock_clocksource_read+0x152/0x2b0 + ? locks_remove_file+0xec/0x470 + ? pvclock_read_flags+0x80/0x80 + ? fcntl_setlk+0x880/0x880 + ? ima_file_free+0x8d/0x390 + ? lockdep_rcu_suspicious+0x100/0x100 + ? ima_file_check+0x110/0x110 + ? fsnotify+0xea0/0xea0 + ? kvm_sched_clock_read+0x1a/0x30 + ? rcu_note_context_switch+0x600/0x600 + perf_release+0x21/0x40 + __fput+0x264/0x620 + ? fput+0xf0/0xf0 + ? do_raw_spin_unlock+0x121/0x210 + ? do_raw_spin_trylock+0x150/0x150 + ? SyS_fchdir+0x100/0x100 + ? fsnotify+0xea0/0xea0 + task_work_run+0x14b/0x1e0 + ? task_work_cancel+0x1c0/0x1c0 + ? copy_fd_bitmaps+0x150/0x150 + ? vfs_read+0xe5/0x260 + exit_to_usermode_loop+0x17b/0x1b0 + ? trace_event_raw_event_sys_exit+0x1a0/0x1a0 + do_syscall_64+0x3f6/0x490 + ? syscall_return_slowpath+0x2c0/0x2c0 + ? lockdep_sys_exit+0x1f/0xaa + ? syscall_return_slowpath+0x1a3/0x2c0 + ? lockdep_sys_exit+0x1f/0xaa + ? prepare_exit_to_usermode+0x11c/0x1e0 + ? enter_from_user_mode+0x30/0x30 + random: crng init done + ? __put_user_4+0x1c/0x30 + entry_SYSCALL_64_after_hwframe+0x3d/0xa2 + RIP: 0033:0x7f41d95f9340 + RSP: 002b:00007fffe71e4268 EFLAGS: 00000246 ORIG_RAX: 0000000000000003 + RAX: 0000000000000000 RBX: 000000000000000d RCX: 00007f41d95f9340 + RDX: 0000000000000000 RSI: 0000000000002401 RDI: 000000000000000d + RBP: 0000000000000000 R08: 00007f41ca8ff700 R09: 00007f41d996dd1f + R10: 00007fffe71e41e0 R11: 0000000000000246 R12: 00007fffe71e4330 + R13: 0000000000000000 R14: fffffffffffffffc R15: 00007fffe71e4290 + + Allocated by task 870: + kasan_kmalloc+0xa0/0xd0 + kmem_cache_alloc_node+0x11a/0x430 + copy_process.part.19+0x11a0/0x41c0 + _do_fork+0x1be/0xa20 + do_syscall_64+0x198/0x490 + entry_SYSCALL_64_after_hwframe+0x3d/0xa2 + + Freed by task 0: + __kasan_slab_free+0x12e/0x180 + kmem_cache_free+0x102/0x4d0 + free_task+0xfe/0x160 + __put_task_struct+0x189/0x290 + delayed_put_task_struct+0x119/0x250 + rcu_process_callbacks+0xa6c/0x1b60 + __do_softirq+0x238/0x7ae + + The buggy address belongs to the object at ffff880384f9b480 + which belongs to the cache task_struct of size 12928 + +It occurs because task_struct is freed before perf_event which refers +to the task and task flags are checked while teardown of the event. +perf_event_alloc() assigns task_struct to hw.target of perf_event, +but there is no reference counting for it. + +As a fix we get_task_struct() in perf_event_alloc() at above mentioned +assignment and put_task_struct() in _free_event(). + +Signed-off-by: Prashant Bhole +Reviewed-by: Oleg Nesterov +Acked-by: Peter Zijlstra (Intel) +Cc: +Cc: Alexander Shishkin +Cc: Arnaldo Carvalho de Melo +Cc: Jiri Olsa +Cc: Linus Torvalds +Cc: Namhyung Kim +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Fixes: 63b6da39bb38e8f1a1ef3180d32a39d6 ("perf: Fix perf_event_exit_task() race") +Link: http://lkml.kernel.org/r/20180409100346.6416-1-bhole_prashant_q7@lab.ntt.co.jp +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/events/core.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/kernel/events/core.c ++++ b/kernel/events/core.c +@@ -4112,6 +4112,9 @@ static void _free_event(struct perf_even + if (event->ctx) + put_ctx(event->ctx); + ++ if (event->hw.target) ++ put_task_struct(event->hw.target); ++ + exclusive_event_destroy(event); + module_put(event->pmu->module); + +@@ -9456,6 +9459,7 @@ perf_event_alloc(struct perf_event_attr + * and we cannot use the ctx information because we need the + * pmu before we get a ctx. + */ ++ get_task_struct(task); + event->hw.target = task; + } + +@@ -9571,6 +9575,8 @@ err_ns: + perf_detach_cgroup(event); + if (event->ns) + put_pid_ns(event->ns); ++ if (event->hw.target) ++ put_task_struct(event->hw.target); + kfree(event); + + return ERR_PTR(err); diff --git a/queue-4.15/series b/queue-4.15/series index 4db873f680e..3dbc7bcbc4a 100644 --- a/queue-4.15/series +++ b/queue-4.15/series @@ -11,3 +11,13 @@ vhost-fix-vhost_copy_to_user.patch lan78xx-correctly-indicate-invalid-otp.patch media-v4l2-compat-ioctl32-don-t-oops-on-overlay.patch media-v4l-vsp1-fix-header-display-list-status-check-in-continuous-mode.patch +ipmi-fix-some-error-cleanup-issues.patch +parisc-fix-out-of-array-access-in-match_pci_device.patch +parisc-fix-hpmc-handler-by-increasing-size-to-multiple-of-16-bytes.patch +drivers-hv-vmbus-do-not-mark-hv_pcie-as-perf_device.patch +pci-hv-serialize-the-present-and-eject-work-items.patch +pci-hv-fix-2-hang-issues-in-hv_compose_msi_msg.patch +kvm-ppc-book3s-hv-trace_tlbie-must-not-be-called-in-realmode.patch +perf-core-fix-use-after-free-in-uprobe_perf_close.patch +x86-mce-amd-edac-mce_amd-enumerate-reserved-smca-bank-type.patch +x86-mce-amd-get-address-from-already-initialized-block.patch diff --git a/queue-4.15/x86-mce-amd-edac-mce_amd-enumerate-reserved-smca-bank-type.patch b/queue-4.15/x86-mce-amd-edac-mce_amd-enumerate-reserved-smca-bank-type.patch new file mode 100644 index 00000000000..3db33395a34 --- /dev/null +++ b/queue-4.15/x86-mce-amd-edac-mce_amd-enumerate-reserved-smca-bank-type.patch @@ -0,0 +1,123 @@ +From 68627a697c195937672ce07683094c72b1174786 Mon Sep 17 00:00:00 2001 +From: Yazen Ghannam +Date: Wed, 21 Feb 2018 11:18:58 +0100 +Subject: x86/mce/AMD, EDAC/mce_amd: Enumerate Reserved SMCA bank type + +From: Yazen Ghannam + +commit 68627a697c195937672ce07683094c72b1174786 upstream. + +Currently, bank 4 is reserved on Fam17h, so we chose not to initialize +bank 4 in the smca_banks array. This means that when we check if a bank +is initialized, like during boot or resume, we will see that bank 4 is +not initialized and try to initialize it. + +This will cause a call trace, when resuming from suspend, due to +rdmsr_*on_cpu() calls in the init path. The rdmsr_*on_cpu() calls issue +an IPI but we're running with interrupts disabled. This triggers: + + WARNING: CPU: 0 PID: 11523 at kernel/smp.c:291 smp_call_function_single+0xdc/0xe0 + ... + +Reserved banks will be read-as-zero, so their MCA_IPID register will be +zero. So, like the smca_banks array, the threshold_banks array will not +have an entry for a reserved bank since all its MCA_MISC* registers will +be zero. + +Enumerate a "Reserved" bank type that matches on a HWID_MCATYPE of 0,0. + +Use the "Reserved" type when checking if a bank is reserved. It's +possible that other bank numbers may be reserved on future systems. + +Don't try to find the block address on reserved banks. + +Signed-off-by: Yazen Ghannam +Signed-off-by: Borislav Petkov +Cc: # 4.14.x +Cc: Borislav Petkov +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Cc: Tony Luck +Cc: linux-edac +Link: http://lkml.kernel.org/r/20180221101900.10326-7-bp@alien8.de +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/mce.h | 1 + + arch/x86/kernel/cpu/mcheck/mce_amd.c | 7 +++++++ + drivers/edac/mce_amd.c | 11 +++++++---- + 3 files changed, 15 insertions(+), 4 deletions(-) + +--- a/arch/x86/include/asm/mce.h ++++ b/arch/x86/include/asm/mce.h +@@ -346,6 +346,7 @@ enum smca_bank_types { + SMCA_IF, /* Instruction Fetch */ + SMCA_L2_CACHE, /* L2 Cache */ + SMCA_DE, /* Decoder Unit */ ++ SMCA_RESERVED, /* Reserved */ + SMCA_EX, /* Execution Unit */ + SMCA_FP, /* Floating Point */ + SMCA_L3_CACHE, /* L3 Cache */ +--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c ++++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c +@@ -82,6 +82,7 @@ static struct smca_bank_name smca_names[ + [SMCA_IF] = { "insn_fetch", "Instruction Fetch Unit" }, + [SMCA_L2_CACHE] = { "l2_cache", "L2 Cache" }, + [SMCA_DE] = { "decode_unit", "Decode Unit" }, ++ [SMCA_RESERVED] = { "reserved", "Reserved" }, + [SMCA_EX] = { "execution_unit", "Execution Unit" }, + [SMCA_FP] = { "floating_point", "Floating Point Unit" }, + [SMCA_L3_CACHE] = { "l3_cache", "L3 Cache" }, +@@ -113,6 +114,9 @@ EXPORT_SYMBOL_GPL(smca_get_long_name); + static struct smca_hwid smca_hwid_mcatypes[] = { + /* { bank_type, hwid_mcatype, xec_bitmap } */ + ++ /* Reserved type */ ++ { SMCA_RESERVED, HWID_MCATYPE(0x00, 0x0), 0x0 }, ++ + /* ZN Core (HWID=0xB0) MCA types */ + { SMCA_LS, HWID_MCATYPE(0xB0, 0x0), 0x1FFFEF }, + { SMCA_IF, HWID_MCATYPE(0xB0, 0x1), 0x3FFF }, +@@ -417,6 +421,9 @@ static u32 get_block_address(unsigned in + u32 addr = 0, offset = 0; + + if (mce_flags.smca) { ++ if (smca_get_bank_type(bank) == SMCA_RESERVED) ++ return addr; ++ + if (!block) { + addr = MSR_AMD64_SMCA_MCx_MISC(bank); + } else { +--- a/drivers/edac/mce_amd.c ++++ b/drivers/edac/mce_amd.c +@@ -854,21 +854,24 @@ static void decode_mc6_mce(struct mce *m + static void decode_smca_error(struct mce *m) + { + struct smca_hwid *hwid; +- unsigned int bank_type; ++ enum smca_bank_types bank_type; + const char *ip_name; + u8 xec = XEC(m->status, xec_mask); + + if (m->bank >= ARRAY_SIZE(smca_banks)) + return; + +- if (x86_family(m->cpuid) >= 0x17 && m->bank == 4) +- pr_emerg(HW_ERR "Bank 4 is reserved on Fam17h.\n"); +- + hwid = smca_banks[m->bank].hwid; + if (!hwid) + return; + + bank_type = hwid->bank_type; ++ ++ if (bank_type == SMCA_RESERVED) { ++ pr_emerg(HW_ERR "Bank %d is reserved.\n", m->bank); ++ return; ++ } ++ + ip_name = smca_get_long_name(bank_type); + + pr_emerg(HW_ERR "%s Extended Error Code: %d\n", ip_name, xec); diff --git a/queue-4.15/x86-mce-amd-get-address-from-already-initialized-block.patch b/queue-4.15/x86-mce-amd-get-address-from-already-initialized-block.patch new file mode 100644 index 00000000000..ebeb9d5b336 --- /dev/null +++ b/queue-4.15/x86-mce-amd-get-address-from-already-initialized-block.patch @@ -0,0 +1,63 @@ +From 27bd59502702fe51d9eb00450a75b727ec6bfcb4 Mon Sep 17 00:00:00 2001 +From: Yazen Ghannam +Date: Wed, 21 Feb 2018 11:18:59 +0100 +Subject: x86/mce/AMD: Get address from already initialized block + +From: Yazen Ghannam + +commit 27bd59502702fe51d9eb00450a75b727ec6bfcb4 upstream. + +The block address is saved after the block is initialized when +threshold_init_device() is called. + +Use the saved block address, if available, rather than trying to +rediscover it. + +This will avoid a call trace, when resuming from suspend, due to the +rdmsr_safe_on_cpu() call in get_block_address(). The rdmsr_safe_on_cpu() +call issues an IPI but we're running with interrupts disabled. This +triggers: + + WARNING: CPU: 0 PID: 11523 at kernel/smp.c:291 smp_call_function_single+0xdc/0xe0 + +Signed-off-by: Yazen Ghannam +Signed-off-by: Borislav Petkov +Cc: # 4.14.x +Cc: Borislav Petkov +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Cc: Tony Luck +Cc: linux-edac +Link: http://lkml.kernel.org/r/20180221101900.10326-8-bp@alien8.de +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/cpu/mcheck/mce_amd.c | 15 +++++++++++++++ + 1 file changed, 15 insertions(+) + +--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c ++++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c +@@ -420,6 +420,21 @@ static u32 get_block_address(unsigned in + { + u32 addr = 0, offset = 0; + ++ if ((bank >= mca_cfg.banks) || (block >= NR_BLOCKS)) ++ return addr; ++ ++ /* Get address from already initialized block. */ ++ if (per_cpu(threshold_banks, cpu)) { ++ struct threshold_bank *bankp = per_cpu(threshold_banks, cpu)[bank]; ++ ++ if (bankp && bankp->blocks) { ++ struct threshold_block *blockp = &bankp->blocks[block]; ++ ++ if (blockp) ++ return blockp->address; ++ } ++ } ++ + if (mce_flags.smca) { + if (smca_get_bank_type(bank) == SMCA_RESERVED) + return addr; -- 2.47.3