From: Greg Kroah-Hartman Date: Tue, 2 Aug 2016 07:03:51 +0000 (+0200) Subject: 4.4-stable patches X-Git-Tag: v3.14.75~24 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=4f84335e17351568f58f448f83ceabbcecaa74eb;p=thirdparty%2Fkernel%2Fstable-queue.git 4.4-stable patches added patches: arc-unwind-ensure-that-.debug_frame-is-generated-vs.-.eh_frame.patch arc-unwind-warn-only-once-if-dw2_unwind-is-disabled.patch dmaengine-at_xdmac-align-descriptors-on-64-bits.patch dmaengine-at_xdmac-double-fifo-flush-needed-to-compute-residue.patch dmaengine-at_xdmac-fix-residue-corruption.patch fs-nilfs2-fix-potential-underflow-in-call-to-crc32_le.patch kernel-sysrq-watchdog-sched-core-reset-watchdog-on-all-cpus-while-processing-sysrq-w.patch mm-compaction-abort-free-scanner-if-split-fails.patch mm-compaction-prevent-vm_bug_on-when-terminating-freeing-scanner.patch mm-meminit-always-return-a-valid-node-from-early_pfn_to_nid.patch mm-meminit-ensure-node-is-online-before-checking-whether-pages-are-uninitialised.patch mm-slb-add-__gfp_atomic-to-the-gfp-reclaim-mask.patch perf-test-ignore-kcore-files-in-the-vmlinux-matches-kallsyms-test.patch pps-do-not-crash-when-failed-to-register.patch vmlinux.lds-account-for-destructor-sections.patch x86-quirks-add-early-quirk-to-reset-apple-airport-card.patch x86-quirks-apply-nvidia_bugs-quirk-only-on-root-bus.patch x86-quirks-reintroduce-scanning-of-secondary-buses.patch xen-pciback-fix-conf_space-read-write-overlap-check.patch xenbus-don-t-bail-early-from-xenbus_dev_request_and_reply.patch xenbus-don-t-bug-on-user-mode-induced-condition.patch --- diff --git a/queue-4.4/arc-unwind-ensure-that-.debug_frame-is-generated-vs.-.eh_frame.patch b/queue-4.4/arc-unwind-ensure-that-.debug_frame-is-generated-vs.-.eh_frame.patch new file mode 100644 index 00000000000..ae9cfaf862d --- /dev/null +++ b/queue-4.4/arc-unwind-ensure-that-.debug_frame-is-generated-vs.-.eh_frame.patch @@ -0,0 +1,45 @@ +From f52e126cc7476196f44f3c313b7d9f0699a881fc Mon Sep 17 00:00:00 2001 +From: Vineet Gupta +Date: Tue, 28 Jun 2016 09:42:25 +0530 +Subject: ARC: unwind: ensure that .debug_frame is generated (vs. .eh_frame) + +From: Vineet Gupta + +commit f52e126cc7476196f44f3c313b7d9f0699a881fc upstream. + +With recent binutils update to support dwarf CFI pseudo-ops in gas, we +now get .eh_frame vs. .debug_frame. Although the call frame info is +exactly the same in both, the CIE differs, which the current kernel +unwinder can't cope with. + +This broke both the kernel unwinder as well as loadable modules (latter +because of a new unhandled relo R_ARC_32_PCREL from .rela.eh_frame in +the module loader) + +The ideal solution would be to switch unwinder to .eh_frame. +For now however we can make do by just ensureing .debug_frame is +generated by removing -fasynchronous-unwind-tables + + .eh_frame generated with -gdwarf-2 -fasynchronous-unwind-tables + .debug_frame generated with -gdwarf-2 + +Fixes STAR 9001058196 + +Signed-off-by: Vineet Gupta +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arc/Makefile | 2 -- + 1 file changed, 2 deletions(-) + +--- a/arch/arc/Makefile ++++ b/arch/arc/Makefile +@@ -48,8 +48,6 @@ endif + + endif + +-cflags-$(CONFIG_ARC_DW2_UNWIND) += -fasynchronous-unwind-tables +- + # By default gcc 4.8 generates dwarf4 which kernel unwinder can't grok + ifeq ($(atleast_gcc48),y) + cflags-$(CONFIG_ARC_DW2_UNWIND) += -gdwarf-2 diff --git a/queue-4.4/arc-unwind-warn-only-once-if-dw2_unwind-is-disabled.patch b/queue-4.4/arc-unwind-warn-only-once-if-dw2_unwind-is-disabled.patch new file mode 100644 index 00000000000..3a54d1d4e47 --- /dev/null +++ b/queue-4.4/arc-unwind-warn-only-once-if-dw2_unwind-is-disabled.patch @@ -0,0 +1,41 @@ +From 9bd54517ee86cb164c734f72ea95aeba4804f10b Mon Sep 17 00:00:00 2001 +From: Alexey Brodkin +Date: Thu, 23 Jun 2016 11:00:39 +0300 +Subject: arc: unwind: warn only once if DW2_UNWIND is disabled + +From: Alexey Brodkin + +commit 9bd54517ee86cb164c734f72ea95aeba4804f10b upstream. + +If CONFIG_ARC_DW2_UNWIND is disabled every time arc_unwind_core() +gets called following message gets printed in debug console: +----------------->8--------------- +CONFIG_ARC_DW2_UNWIND needs to be enabled +----------------->8--------------- + +That message makes sense if user indeed wants to see a backtrace or +get nice function call-graphs in perf but what if user disabled +unwinder for the purpose? Why pollute his debug console? + +So instead we'll warn user about possibly missing feature once and +let him decide if that was what he or she really wanted. + +Signed-off-by: Alexey Brodkin +Signed-off-by: Vineet Gupta +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arc/kernel/stacktrace.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/arc/kernel/stacktrace.c ++++ b/arch/arc/kernel/stacktrace.c +@@ -142,7 +142,7 @@ arc_unwind_core(struct task_struct *tsk, + * prelogue is setup (callee regs saved and then fp set and not other + * way around + */ +- pr_warn("CONFIG_ARC_DW2_UNWIND needs to be enabled\n"); ++ pr_warn_once("CONFIG_ARC_DW2_UNWIND needs to be enabled\n"); + return 0; + + #endif diff --git a/queue-4.4/dmaengine-at_xdmac-align-descriptors-on-64-bits.patch b/queue-4.4/dmaengine-at_xdmac-align-descriptors-on-64-bits.patch new file mode 100644 index 00000000000..d5fe7164757 --- /dev/null +++ b/queue-4.4/dmaengine-at_xdmac-align-descriptors-on-64-bits.patch @@ -0,0 +1,43 @@ +From 4a9723e8df68cfce4048517ee32e37f78854b6fb Mon Sep 17 00:00:00 2001 +From: Ludovic Desroches +Date: Thu, 12 May 2016 16:54:08 +0200 +Subject: dmaengine: at_xdmac: align descriptors on 64 bits + +From: Ludovic Desroches + +commit 4a9723e8df68cfce4048517ee32e37f78854b6fb upstream. + +Having descriptors aligned on 64 bits allows update CNDA and CUBC in an +atomic way. + +Signed-off-by: Ludovic Desroches +Fixes: e1f7c9eee707 ("dmaengine: at_xdmac: creation of the atmel +eXtended DMA Controller driver") +Reviewed-by: Nicolas Ferre +Signed-off-by: Vinod Koul +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/dma/at_xdmac.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/dma/at_xdmac.c ++++ b/drivers/dma/at_xdmac.c +@@ -242,7 +242,7 @@ struct at_xdmac_lld { + u32 mbr_dus; /* Destination Microblock Stride Register */ + }; + +- ++/* 64-bit alignment needed to update CNDA and CUBC registers in an atomic way. */ + struct at_xdmac_desc { + struct at_xdmac_lld lld; + enum dma_transfer_direction direction; +@@ -253,7 +253,7 @@ struct at_xdmac_desc { + unsigned int xfer_size; + struct list_head descs_list; + struct list_head xfer_node; +-}; ++} __aligned(sizeof(u64)); + + static inline void __iomem *at_xdmac_chan_reg_base(struct at_xdmac *atxdmac, unsigned int chan_nb) + { diff --git a/queue-4.4/dmaengine-at_xdmac-double-fifo-flush-needed-to-compute-residue.patch b/queue-4.4/dmaengine-at_xdmac-double-fifo-flush-needed-to-compute-residue.patch new file mode 100644 index 00000000000..46484ef868c --- /dev/null +++ b/queue-4.4/dmaengine-at_xdmac-double-fifo-flush-needed-to-compute-residue.patch @@ -0,0 +1,65 @@ +From 9295c41d77ca93aac79cfca6fa09fa1ca5cab66f Mon Sep 17 00:00:00 2001 +From: Ludovic Desroches +Date: Thu, 12 May 2016 16:54:10 +0200 +Subject: dmaengine: at_xdmac: double FIFO flush needed to compute residue + +From: Ludovic Desroches + +commit 9295c41d77ca93aac79cfca6fa09fa1ca5cab66f upstream. + +Due to the way CUBC register is updated, a double flush is needed to +compute an accurate residue. First flush aim is to get data from the DMA +FIFO and second one ensures that we won't report data which are not in +memory. + +Signed-off-by: Ludovic Desroches +Fixes: e1f7c9eee707 ("dmaengine: at_xdmac: creation of the atmel +eXtended DMA Controller driver") +Reviewed-by: Nicolas Ferre +Signed-off-by: Vinod Koul +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/dma/at_xdmac.c | 24 +++++++++++++++++++++++- + 1 file changed, 23 insertions(+), 1 deletion(-) + +--- a/drivers/dma/at_xdmac.c ++++ b/drivers/dma/at_xdmac.c +@@ -1413,7 +1413,16 @@ at_xdmac_tx_status(struct dma_chan *chan + residue = desc->xfer_size; + /* + * Flush FIFO: only relevant when the transfer is source peripheral +- * synchronized. ++ * synchronized. Flush is needed before reading CUBC because data in ++ * the FIFO are not reported by CUBC. Reporting a residue of the ++ * transfer length while we have data in FIFO can cause issue. ++ * Usecase: atmel USART has a timeout which means I have received ++ * characters but there is no more character received for a while. On ++ * timeout, it requests the residue. If the data are in the DMA FIFO, ++ * we will return a residue of the transfer length. It means no data ++ * received. If an application is waiting for these data, it will hang ++ * since we won't have another USART timeout without receiving new ++ * data. + */ + mask = AT_XDMAC_CC_TYPE | AT_XDMAC_CC_DSYNC; + value = AT_XDMAC_CC_TYPE_PER_TRAN | AT_XDMAC_CC_DSYNC_PER2MEM; +@@ -1469,6 +1478,19 @@ at_xdmac_tx_status(struct dma_chan *chan + } + + /* ++ * Flush FIFO: only relevant when the transfer is source peripheral ++ * synchronized. Another flush is needed here because CUBC is updated ++ * when the controller sends the data write command. It can lead to ++ * report data that are not written in the memory or the device. The ++ * FIFO flush ensures that data are really written. ++ */ ++ if ((desc->lld.mbr_cfg & mask) == value) { ++ at_xdmac_write(atxdmac, AT_XDMAC_GSWF, atchan->mask); ++ while (!(at_xdmac_chan_read(atchan, AT_XDMAC_CIS) & AT_XDMAC_CIS_FIS)) ++ cpu_relax(); ++ } ++ ++ /* + * Remove size of all microblocks already transferred and the current + * one. Then add the remaining size to transfer of the current + * microblock. diff --git a/queue-4.4/dmaengine-at_xdmac-fix-residue-corruption.patch b/queue-4.4/dmaengine-at_xdmac-fix-residue-corruption.patch new file mode 100644 index 00000000000..d8d2e43f7e1 --- /dev/null +++ b/queue-4.4/dmaengine-at_xdmac-fix-residue-corruption.patch @@ -0,0 +1,99 @@ +From 53398f488821c2b5b15291e3debec6ad33f75d3d Mon Sep 17 00:00:00 2001 +From: Ludovic Desroches +Date: Thu, 12 May 2016 16:54:09 +0200 +Subject: dmaengine: at_xdmac: fix residue corruption + +From: Ludovic Desroches + +commit 53398f488821c2b5b15291e3debec6ad33f75d3d upstream. + +An unexpected value of CUBC can lead to a corrupted residue. A more +complex sequence is needed to detect an inaccurate value for NCA or CUBC. + +Signed-off-by: Ludovic Desroches +Fixes: e1f7c9eee707 ("dmaengine: at_xdmac: creation of the atmel +eXtended DMA Controller driver") +Reviewed-by: Nicolas Ferre +Signed-off-by: Vinod Koul +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/dma/at_xdmac.c | 54 +++++++++++++++++++++++++++++-------------------- + 1 file changed, 32 insertions(+), 22 deletions(-) + +--- a/drivers/dma/at_xdmac.c ++++ b/drivers/dma/at_xdmac.c +@@ -1388,6 +1388,7 @@ at_xdmac_tx_status(struct dma_chan *chan + u32 cur_nda, check_nda, cur_ubc, mask, value; + u8 dwidth = 0; + unsigned long flags; ++ bool initd; + + ret = dma_cookie_status(chan, cookie, txstate); + if (ret == DMA_COMPLETE) +@@ -1423,34 +1424,43 @@ at_xdmac_tx_status(struct dma_chan *chan + } + + /* +- * When processing the residue, we need to read two registers but we +- * can't do it in an atomic way. AT_XDMAC_CNDA is used to find where +- * we stand in the descriptor list and AT_XDMAC_CUBC is used +- * to know how many data are remaining for the current descriptor. +- * Since the dma channel is not paused to not loose data, between the +- * AT_XDMAC_CNDA and AT_XDMAC_CUBC read, we may have change of +- * descriptor. +- * For that reason, after reading AT_XDMAC_CUBC, we check if we are +- * still using the same descriptor by reading a second time +- * AT_XDMAC_CNDA. If AT_XDMAC_CNDA has changed, it means we have to +- * read again AT_XDMAC_CUBC. ++ * The easiest way to compute the residue should be to pause the DMA ++ * but doing this can lead to miss some data as some devices don't ++ * have FIFO. ++ * We need to read several registers because: ++ * - DMA is running therefore a descriptor change is possible while ++ * reading these registers ++ * - When the block transfer is done, the value of the CUBC register ++ * is set to its initial value until the fetch of the next descriptor. ++ * This value will corrupt the residue calculation so we have to skip ++ * it. ++ * ++ * INITD -------- ------------ ++ * |____________________| ++ * _______________________ _______________ ++ * NDA @desc2 \/ @desc3 ++ * _______________________/\_______________ ++ * __________ ___________ _______________ ++ * CUBC 0 \/ MAX desc1 \/ MAX desc2 ++ * __________/\___________/\_______________ ++ * ++ * Since descriptors are aligned on 64 bits, we can assume that ++ * the update of NDA and CUBC is atomic. + * Memory barriers are used to ensure the read order of the registers. +- * A max number of retries is set because unlikely it can never ends if +- * we are transferring a lot of data with small buffers. ++ * A max number of retries is set because unlikely it could never ends. + */ +- cur_nda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA) & 0xfffffffc; +- rmb(); +- cur_ubc = at_xdmac_chan_read(atchan, AT_XDMAC_CUBC); + for (retry = 0; retry < AT_XDMAC_RESIDUE_MAX_RETRIES; retry++) { +- rmb(); + check_nda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA) & 0xfffffffc; +- +- if (likely(cur_nda == check_nda)) +- break; +- +- cur_nda = check_nda; ++ rmb(); ++ initd = !!(at_xdmac_chan_read(atchan, AT_XDMAC_CC) & AT_XDMAC_CC_INITD); + rmb(); + cur_ubc = at_xdmac_chan_read(atchan, AT_XDMAC_CUBC); ++ rmb(); ++ cur_nda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA) & 0xfffffffc; ++ rmb(); ++ ++ if ((check_nda == cur_nda) && initd) ++ break; + } + + if (unlikely(retry >= AT_XDMAC_RESIDUE_MAX_RETRIES)) { diff --git a/queue-4.4/fs-nilfs2-fix-potential-underflow-in-call-to-crc32_le.patch b/queue-4.4/fs-nilfs2-fix-potential-underflow-in-call-to-crc32_le.patch new file mode 100644 index 00000000000..9ced3ae1cb7 --- /dev/null +++ b/queue-4.4/fs-nilfs2-fix-potential-underflow-in-call-to-crc32_le.patch @@ -0,0 +1,59 @@ +From 63d2f95d63396059200c391ca87161897b99e74a Mon Sep 17 00:00:00 2001 +From: Torsten Hilbrich +Date: Fri, 24 Jun 2016 14:50:18 -0700 +Subject: fs/nilfs2: fix potential underflow in call to crc32_le + +From: Torsten Hilbrich + +commit 63d2f95d63396059200c391ca87161897b99e74a upstream. + +The value `bytes' comes from the filesystem which is about to be +mounted. We cannot trust that the value is always in the range we +expect it to be. + +Check its value before using it to calculate the length for the crc32_le +call. It value must be larger (or equal) sumoff + 4. + +This fixes a kernel bug when accidentially mounting an image file which +had the nilfs2 magic value 0x3434 at the right offset 0x406 by chance. +The bytes 0x01 0x00 were stored at 0x408 and were interpreted as a +s_bytes value of 1. This caused an underflow when substracting sumoff + +4 (20) in the call to crc32_le. + + BUG: unable to handle kernel paging request at ffff88021e600000 + IP: crc32_le+0x36/0x100 + ... + Call Trace: + nilfs_valid_sb.part.5+0x52/0x60 [nilfs2] + nilfs_load_super_block+0x142/0x300 [nilfs2] + init_nilfs+0x60/0x390 [nilfs2] + nilfs_mount+0x302/0x520 [nilfs2] + mount_fs+0x38/0x160 + vfs_kern_mount+0x67/0x110 + do_mount+0x269/0xe00 + SyS_mount+0x9f/0x100 + entry_SYSCALL_64_fastpath+0x16/0x71 + +Link: http://lkml.kernel.org/r/1466778587-5184-2-git-send-email-konishi.ryusuke@lab.ntt.co.jp +Signed-off-by: Torsten Hilbrich +Tested-by: Torsten Hilbrich +Signed-off-by: Ryusuke Konishi +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + fs/nilfs2/the_nilfs.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/nilfs2/the_nilfs.c ++++ b/fs/nilfs2/the_nilfs.c +@@ -443,7 +443,7 @@ static int nilfs_valid_sb(struct nilfs_s + if (!sbp || le16_to_cpu(sbp->s_magic) != NILFS_SUPER_MAGIC) + return 0; + bytes = le16_to_cpu(sbp->s_bytes); +- if (bytes > BLOCK_SIZE) ++ if (bytes < sumoff + 4 || bytes > BLOCK_SIZE) + return 0; + crc = crc32_le(le32_to_cpu(sbp->s_crc_seed), (unsigned char *)sbp, + sumoff); diff --git a/queue-4.4/kernel-sysrq-watchdog-sched-core-reset-watchdog-on-all-cpus-while-processing-sysrq-w.patch b/queue-4.4/kernel-sysrq-watchdog-sched-core-reset-watchdog-on-all-cpus-while-processing-sysrq-w.patch new file mode 100644 index 00000000000..f8295375ecc --- /dev/null +++ b/queue-4.4/kernel-sysrq-watchdog-sched-core-reset-watchdog-on-all-cpus-while-processing-sysrq-w.patch @@ -0,0 +1,52 @@ +From 57675cb976eff977aefb428e68e4e0236d48a9ff Mon Sep 17 00:00:00 2001 +From: Andrey Ryabinin +Date: Thu, 9 Jun 2016 15:20:05 +0300 +Subject: kernel/sysrq, watchdog, sched/core: Reset watchdog on all CPUs while processing sysrq-w + +From: Andrey Ryabinin + +commit 57675cb976eff977aefb428e68e4e0236d48a9ff upstream. + +Lengthy output of sysrq-w may take a lot of time on slow serial console. + +Currently we reset NMI-watchdog on the current CPU to avoid spurious +lockup messages. Sometimes this doesn't work since softlockup watchdog +might trigger on another CPU which is waiting for an IPI to proceed. +We reset softlockup watchdogs on all CPUs, but we do this only after +listing all tasks, and this may be too late on a busy system. + +So, reset watchdogs CPUs earlier, in for_each_process_thread() loop. + +Signed-off-by: Andrey Ryabinin +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Link: http://lkml.kernel.org/r/1465474805-14641-1-git-send-email-aryabinin@virtuozzo.com +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/sched/core.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -4951,14 +4951,16 @@ void show_state_filter(unsigned long sta + /* + * reset the NMI-timeout, listing all files on a slow + * console might take a lot of time: ++ * Also, reset softlockup watchdogs on all CPUs, because ++ * another CPU might be blocked waiting for us to process ++ * an IPI. + */ + touch_nmi_watchdog(); ++ touch_all_softlockup_watchdogs(); + if (!state_filter || (p->state & state_filter)) + sched_show_task(p); + } + +- touch_all_softlockup_watchdogs(); +- + #ifdef CONFIG_SCHED_DEBUG + sysrq_sched_debug_show(); + #endif diff --git a/queue-4.4/mm-compaction-abort-free-scanner-if-split-fails.patch b/queue-4.4/mm-compaction-abort-free-scanner-if-split-fails.patch new file mode 100644 index 00000000000..686865ea692 --- /dev/null +++ b/queue-4.4/mm-compaction-abort-free-scanner-if-split-fails.patch @@ -0,0 +1,127 @@ +From a4f04f2c6955aff5e2c08dcb40aca247ff4d7370 Mon Sep 17 00:00:00 2001 +From: David Rientjes +Date: Fri, 24 Jun 2016 14:50:10 -0700 +Subject: mm, compaction: abort free scanner if split fails + +From: David Rientjes + +commit a4f04f2c6955aff5e2c08dcb40aca247ff4d7370 upstream. + +If the memory compaction free scanner cannot successfully split a free +page (only possible due to per-zone low watermark), terminate the free +scanner rather than continuing to scan memory needlessly. If the +watermark is insufficient for a free page of order <= cc->order, then +terminate the scanner since all future splits will also likely fail. + +This prevents the compaction freeing scanner from scanning all memory on +very large zones (very noticeable for zones > 128GB, for instance) when +all splits will likely fail while holding zone->lock. + +compaction_alloc() iterating a 128GB zone has been benchmarked to take +over 400ms on some systems whereas any free page isolated and ready to +be split ends up failing in split_free_page() because of the low +watermark check and thus the iteration continues. + +The next time compaction occurs, the freeing scanner will likely start +at the end of the zone again since no success was made previously and we +get the same lengthy iteration until the zone is brought above the low +watermark. All thp page faults can take >400ms in such a state without +this fix. + +Link: http://lkml.kernel.org/r/alpine.DEB.2.10.1606211820350.97086@chino.kir.corp.google.com +Signed-off-by: David Rientjes +Acked-by: Vlastimil Babka +Cc: Minchan Kim +Cc: Joonsoo Kim +Cc: Mel Gorman +Cc: Hugh Dickins +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/compaction.c | 39 +++++++++++++++++++++------------------ + 1 file changed, 21 insertions(+), 18 deletions(-) + +--- a/mm/compaction.c ++++ b/mm/compaction.c +@@ -475,25 +475,23 @@ static unsigned long isolate_freepages_b + + /* Found a free page, break it into order-0 pages */ + isolated = split_free_page(page); ++ if (!isolated) ++ break; ++ + total_isolated += isolated; ++ cc->nr_freepages += isolated; + for (i = 0; i < isolated; i++) { + list_add(&page->lru, freelist); + page++; + } +- +- /* If a page was split, advance to the end of it */ +- if (isolated) { +- cc->nr_freepages += isolated; +- if (!strict && +- cc->nr_migratepages <= cc->nr_freepages) { +- blockpfn += isolated; +- break; +- } +- +- blockpfn += isolated - 1; +- cursor += isolated - 1; +- continue; ++ if (!strict && cc->nr_migratepages <= cc->nr_freepages) { ++ blockpfn += isolated; ++ break; + } ++ /* Advance to the end of split page */ ++ blockpfn += isolated - 1; ++ cursor += isolated - 1; ++ continue; + + isolate_fail: + if (strict) +@@ -503,6 +501,9 @@ isolate_fail: + + } + ++ if (locked) ++ spin_unlock_irqrestore(&cc->zone->lock, flags); ++ + /* + * There is a tiny chance that we have read bogus compound_order(), + * so be careful to not go outside of the pageblock. +@@ -524,9 +525,6 @@ isolate_fail: + if (strict && blockpfn < end_pfn) + total_isolated = 0; + +- if (locked) +- spin_unlock_irqrestore(&cc->zone->lock, flags); +- + /* Update the pageblock-skip if the whole pageblock was scanned */ + if (blockpfn == end_pfn) + update_pageblock_skip(cc, valid_page, total_isolated, false); +@@ -966,6 +964,7 @@ static void isolate_freepages(struct com + block_end_pfn = block_start_pfn, + block_start_pfn -= pageblock_nr_pages, + isolate_start_pfn = block_start_pfn) { ++ unsigned long isolated; + + /* + * This can iterate a massively long zone without finding any +@@ -990,8 +989,12 @@ static void isolate_freepages(struct com + continue; + + /* Found a block suitable for isolating free pages from. */ +- isolate_freepages_block(cc, &isolate_start_pfn, +- block_end_pfn, freelist, false); ++ isolated = isolate_freepages_block(cc, &isolate_start_pfn, ++ block_end_pfn, freelist, false); ++ /* If isolation failed early, do not continue needlessly */ ++ if (!isolated && isolate_start_pfn < block_end_pfn && ++ cc->nr_migratepages > cc->nr_freepages) ++ break; + + /* + * If we isolated enough freepages, or aborted due to async diff --git a/queue-4.4/mm-compaction-prevent-vm_bug_on-when-terminating-freeing-scanner.patch b/queue-4.4/mm-compaction-prevent-vm_bug_on-when-terminating-freeing-scanner.patch new file mode 100644 index 00000000000..a6af2b5d734 --- /dev/null +++ b/queue-4.4/mm-compaction-prevent-vm_bug_on-when-terminating-freeing-scanner.patch @@ -0,0 +1,104 @@ +From a46cbf3bc53b6a93fb84a5ffb288c354fa807954 Mon Sep 17 00:00:00 2001 +From: David Rientjes +Date: Thu, 14 Jul 2016 12:06:50 -0700 +Subject: mm, compaction: prevent VM_BUG_ON when terminating freeing scanner + +From: David Rientjes + +commit a46cbf3bc53b6a93fb84a5ffb288c354fa807954 upstream. + +It's possible to isolate some freepages in a pageblock and then fail +split_free_page() due to the low watermark check. In this case, we hit +VM_BUG_ON() because the freeing scanner terminated early without a +contended lock or enough freepages. + +This should never have been a VM_BUG_ON() since it's not a fatal +condition. It should have been a VM_WARN_ON() at best, or even handled +gracefully. + +Regardless, we need to terminate anytime the full pageblock scan was not +done. The logic belongs in isolate_freepages_block(), so handle its +state gracefully by terminating the pageblock loop and making a note to +restart at the same pageblock next time since it was not possible to +complete the scan this time. + +[rientjes@google.com: don't rescan pages in a pageblock] + Link: http://lkml.kernel.org/r/alpine.DEB.2.10.1607111244150.83138@chino.kir.corp.google.com +Link: http://lkml.kernel.org/r/alpine.DEB.2.10.1606291436300.145590@chino.kir.corp.google.com +Signed-off-by: David Rientjes +Reported-by: Minchan Kim +Tested-by: Minchan Kim +Cc: Joonsoo Kim +Cc: Hugh Dickins +Cc: Mel Gorman +Cc: Vlastimil Babka +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/compaction.c | 36 ++++++++++++++---------------------- + 1 file changed, 14 insertions(+), 22 deletions(-) + +--- a/mm/compaction.c ++++ b/mm/compaction.c +@@ -964,8 +964,6 @@ static void isolate_freepages(struct com + block_end_pfn = block_start_pfn, + block_start_pfn -= pageblock_nr_pages, + isolate_start_pfn = block_start_pfn) { +- unsigned long isolated; +- + /* + * This can iterate a massively long zone without finding any + * suitable migration targets, so periodically check if we need +@@ -989,36 +987,30 @@ static void isolate_freepages(struct com + continue; + + /* Found a block suitable for isolating free pages from. */ +- isolated = isolate_freepages_block(cc, &isolate_start_pfn, +- block_end_pfn, freelist, false); +- /* If isolation failed early, do not continue needlessly */ +- if (!isolated && isolate_start_pfn < block_end_pfn && +- cc->nr_migratepages > cc->nr_freepages) +- break; ++ isolate_freepages_block(cc, &isolate_start_pfn, block_end_pfn, ++ freelist, false); + + /* +- * If we isolated enough freepages, or aborted due to async +- * compaction being contended, terminate the loop. +- * Remember where the free scanner should restart next time, +- * which is where isolate_freepages_block() left off. +- * But if it scanned the whole pageblock, isolate_start_pfn +- * now points at block_end_pfn, which is the start of the next +- * pageblock. +- * In that case we will however want to restart at the start +- * of the previous pageblock. ++ * If we isolated enough freepages, or aborted due to lock ++ * contention, terminate. + */ + if ((cc->nr_freepages >= cc->nr_migratepages) + || cc->contended) { +- if (isolate_start_pfn >= block_end_pfn) ++ if (isolate_start_pfn >= block_end_pfn) { ++ /* ++ * Restart at previous pageblock if more ++ * freepages can be isolated next time. ++ */ + isolate_start_pfn = + block_start_pfn - pageblock_nr_pages; ++ } + break; +- } else { ++ } else if (isolate_start_pfn < block_end_pfn) { + /* +- * isolate_freepages_block() should not terminate +- * prematurely unless contended, or isolated enough ++ * If isolation failed early, do not continue ++ * needlessly. + */ +- VM_BUG_ON(isolate_start_pfn < block_end_pfn); ++ break; + } + } + diff --git a/queue-4.4/mm-meminit-always-return-a-valid-node-from-early_pfn_to_nid.patch b/queue-4.4/mm-meminit-always-return-a-valid-node-from-early_pfn_to_nid.patch new file mode 100644 index 00000000000..cfcf261bd0f --- /dev/null +++ b/queue-4.4/mm-meminit-always-return-a-valid-node-from-early_pfn_to_nid.patch @@ -0,0 +1,52 @@ +From e4568d3803852d00effd41dcdd489e726b998879 Mon Sep 17 00:00:00 2001 +From: Mel Gorman +Date: Thu, 14 Jul 2016 12:07:20 -0700 +Subject: mm, meminit: always return a valid node from early_pfn_to_nid + +From: Mel Gorman + +commit e4568d3803852d00effd41dcdd489e726b998879 upstream. + +early_pfn_to_nid can return node 0 if a PFN is invalid on machines that +has no node 0. A machine with only node 1 was observed to crash with +the following message: + + BUG: unable to handle kernel paging request at 000000000002a3c8 + PGD 0 + Modules linked in: + Hardware name: Supermicro H8DSP-8/H8DSP-8, BIOS 080011 06/30/2006 + task: ffffffff81c0d500 ti: ffffffff81c00000 task.ti: ffffffff81c00000 + RIP: reserve_bootmem_region+0x6a/0xef + CR2: 000000000002a3c8 CR3: 0000000001c06000 CR4: 00000000000006b0 + Call Trace: + free_all_bootmem+0x4b/0x12a + mem_init+0x70/0xa3 + start_kernel+0x25b/0x49b + +The problem is that early_page_uninitialised uses the early_pfn_to_nid +helper which returns node 0 for invalid PFNs. No caller of +early_pfn_to_nid cares except early_page_uninitialised. This patch has +early_pfn_to_nid always return a valid node. + +Link: http://lkml.kernel.org/r/1468008031-3848-3-git-send-email-mgorman@techsingularity.net +Signed-off-by: Mel Gorman +Acked-by: David Rientjes +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/page_alloc.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -1057,7 +1057,7 @@ int __meminit early_pfn_to_nid(unsigned + spin_lock(&early_pfn_lock); + nid = __early_pfn_to_nid(pfn, &early_pfnnid_cache); + if (nid < 0) +- nid = 0; ++ nid = first_online_node; + spin_unlock(&early_pfn_lock); + + return nid; diff --git a/queue-4.4/mm-meminit-ensure-node-is-online-before-checking-whether-pages-are-uninitialised.patch b/queue-4.4/mm-meminit-ensure-node-is-online-before-checking-whether-pages-are-uninitialised.patch new file mode 100644 index 00000000000..fc6b0d2351a --- /dev/null +++ b/queue-4.4/mm-meminit-ensure-node-is-online-before-checking-whether-pages-are-uninitialised.patch @@ -0,0 +1,39 @@ +From ef70b6f41cda6270165a6f27b2548ed31cfa3cb2 Mon Sep 17 00:00:00 2001 +From: Mel Gorman +Date: Thu, 14 Jul 2016 12:07:23 -0700 +Subject: mm, meminit: ensure node is online before checking whether pages are uninitialised + +From: Mel Gorman + +commit ef70b6f41cda6270165a6f27b2548ed31cfa3cb2 upstream. + +early_page_uninitialised looks up an arbitrary PFN. While a machine +without node 0 will boot with "mm, page_alloc: Always return a valid +node from early_pfn_to_nid", it works because it assumes that nodes are +always in PFN order. This is not guaranteed so this patch adds +robustness by always checking if the node being checked is online. + +Link: http://lkml.kernel.org/r/1468008031-3848-4-git-send-email-mgorman@techsingularity.net +Signed-off-by: Mel Gorman +Acked-by: David Rientjes +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/page_alloc.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -275,7 +275,9 @@ static inline void reset_deferred_memini + /* Returns true if the struct page for the pfn is uninitialised */ + static inline bool __meminit early_page_uninitialised(unsigned long pfn) + { +- if (pfn >= NODE_DATA(early_pfn_to_nid(pfn))->first_deferred_pfn) ++ int nid = early_pfn_to_nid(pfn); ++ ++ if (node_online(nid) && pfn >= NODE_DATA(nid)->first_deferred_pfn) + return true; + + return false; diff --git a/queue-4.4/mm-slb-add-__gfp_atomic-to-the-gfp-reclaim-mask.patch b/queue-4.4/mm-slb-add-__gfp_atomic-to-the-gfp-reclaim-mask.patch new file mode 100644 index 00000000000..67e54645b66 --- /dev/null +++ b/queue-4.4/mm-slb-add-__gfp_atomic-to-the-gfp-reclaim-mask.patch @@ -0,0 +1,49 @@ +From e838a45f9392a5bd2be1cd3ab0b16ae85857461c Mon Sep 17 00:00:00 2001 +From: Mel Gorman +Date: Fri, 24 Jun 2016 14:49:37 -0700 +Subject: mm, sl[au]b: add __GFP_ATOMIC to the GFP reclaim mask + +From: Mel Gorman + +commit e838a45f9392a5bd2be1cd3ab0b16ae85857461c upstream. + +Commit d0164adc89f6 ("mm, page_alloc: distinguish between being unable +to sleep, unwilling to sleep and avoiding waking kswapd") modified +__GFP_WAIT to explicitly identify the difference between atomic callers +and those that were unwilling to sleep. Later the definition was +removed entirely. + +The GFP_RECLAIM_MASK is the set of flags that affect watermark checking +and reclaim behaviour but __GFP_ATOMIC was never added. Without it, +atomic users of the slab allocator strip the __GFP_ATOMIC flag and +cannot access the page allocator atomic reserves. This patch addresses +the problem. + +The user-visible impact depends on the workload but potentially atomic +allocations unnecessarily fail without this path. + +Link: http://lkml.kernel.org/r/20160610093832.GK2527@techsingularity.net +Signed-off-by: Mel Gorman +Reported-by: Marcin Wojtas +Acked-by: Vlastimil Babka +Acked-by: Michal Hocko +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/internal.h | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/mm/internal.h ++++ b/mm/internal.h +@@ -22,7 +22,8 @@ + */ + #define GFP_RECLAIM_MASK (__GFP_RECLAIM|__GFP_HIGH|__GFP_IO|__GFP_FS|\ + __GFP_NOWARN|__GFP_REPEAT|__GFP_NOFAIL|\ +- __GFP_NORETRY|__GFP_MEMALLOC|__GFP_NOMEMALLOC) ++ __GFP_NORETRY|__GFP_MEMALLOC|__GFP_NOMEMALLOC|\ ++ __GFP_ATOMIC) + + /* The GFP flags allowed during early boot */ + #define GFP_BOOT_MASK (__GFP_BITS_MASK & ~(__GFP_RECLAIM|__GFP_IO|__GFP_FS)) diff --git a/queue-4.4/perf-test-ignore-kcore-files-in-the-vmlinux-matches-kallsyms-test.patch b/queue-4.4/perf-test-ignore-kcore-files-in-the-vmlinux-matches-kallsyms-test.patch new file mode 100644 index 00000000000..7b93a4fd2a1 --- /dev/null +++ b/queue-4.4/perf-test-ignore-kcore-files-in-the-vmlinux-matches-kallsyms-test.patch @@ -0,0 +1,91 @@ +From 53d0fe68275dbdaf6a532bb4e87f00db5d36c140 Mon Sep 17 00:00:00 2001 +From: Arnaldo Carvalho de Melo +Date: Tue, 19 Apr 2016 12:16:55 -0300 +Subject: perf test: Ignore kcore files in the "vmlinux matches kallsyms" test + +From: Arnaldo Carvalho de Melo + +commit 53d0fe68275dbdaf6a532bb4e87f00db5d36c140 upstream. + +Before: + + # perf test -v kallsyms + + Maps only in vmlinux: + ffffffff81d5e000-ffffffff81ec3ac8 115e000 [kernel].init.text + ffffffff81ec3ac8-ffffffffa0000000 12c3ac8 [kernel].exit.text + ffffffffa0000000-ffffffffa000c000 0 [fjes] + ffffffffa000c000-ffffffffa0017000 0 [video] + ffffffffa0017000-ffffffffa001c000 0 [grace] + + ffffffffa0a7f000-ffffffffa0ba5000 0 [xfs] + ffffffffa0ba5000-ffffffffffffffff 0 [veth] + Maps in vmlinux with a different name in kallsyms: + Maps only in kallsyms: + ffff880000100000-ffff88001000b000 80000103000 [kernel.kallsyms] + ffff88001000b000-ffff880100000000 8001000e000 [kernel.kallsyms] + ffff880100000000-ffffc90000000000 80100003000 [kernel.kallsyms] + + ffffffffa0000000-ffffffffff600000 7fffa0003000 [kernel.kallsyms] + ffffffffff600000-ffffffffffffffff 7fffff603000 [kernel.kallsyms] + test child finished with -1 + ---- end ---- + vmlinux symtab matches kallsyms: FAILED! + # + +After: + + # perf test -v 1 + 1: vmlinux symtab matches kallsyms : + --- start --- + test child forked, pid 7058 + Looking at the vmlinux_path (8 entries long) + Using /lib/modules/4.6.0-rc1+/build/vmlinux for symbols + 0xffffffff81076870: diff end addr for aesni_gcm_dec v: 0xffffffff810791f2 k: 0xffffffff81076902 + 0xffffffff81079200: diff end addr for aesni_gcm_enc v: 0xffffffff8107bb03 k: 0xffffffff81079292 + 0xffffffff8107e8d0: diff end addr for aesni_gcm_enc_avx_gen2 v: 0xffffffff81083e76 k: 0xffffffff8107e943 + 0xffffffff81083e80: diff end addr for aesni_gcm_dec_avx_gen2 v: 0xffffffff81089611 k: 0xffffffff81083ef3 + 0xffffffff81089990: diff end addr for aesni_gcm_enc_avx_gen4 v: 0xffffffff8108e7c4 k: 0xffffffff81089a03 + 0xffffffff8108e7d0: diff end addr for aesni_gcm_dec_avx_gen4 v: 0xffffffff810937ef k: 0xffffffff8108e843 + Maps only in vmlinux: + ffffffff81d5e000-ffffffff81ec3ac8 115e000 [kernel].init.text + ffffffff81ec3ac8-ffffffffa0000000 12c3ac8 [kernel].exit.text + Maps in vmlinux with a different name in kallsyms: + Maps only in kallsyms: + test child finished with -1 + ---- end ---- + vmlinux symtab matches kallsyms: FAILED! + # + +Cc: Adrian Hunter +Cc: David Ahern +Cc: Jiri Olsa +Cc: Namhyung Kim +Cc: Wang Nan +Fixes: 8e0cf965f95e ("perf symbols: Add support for reading from /proc/kcore") +Link: http://lkml.kernel.org/n/tip-n6vrwt9t89w8k769y349govx@git.kernel.org +Signed-off-by: Arnaldo Carvalho de Melo +Signed-off-by: Greg Kroah-Hartman + +--- + tools/perf/tests/vmlinux-kallsyms.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +--- a/tools/perf/tests/vmlinux-kallsyms.c ++++ b/tools/perf/tests/vmlinux-kallsyms.c +@@ -54,8 +54,14 @@ int test__vmlinux_matches_kallsyms(void) + * Step 3: + * + * Load and split /proc/kallsyms into multiple maps, one per module. ++ * Do not use kcore, as this test was designed before kcore support ++ * and has parts that only make sense if using the non-kcore code. ++ * XXX: extend it to stress the kcorre code as well, hint: the list ++ * of modules extracted from /proc/kcore, in its current form, can't ++ * be compacted against the list of modules found in the "vmlinux" ++ * code and with the one got from /proc/modules from the "kallsyms" code. + */ +- if (machine__load_kallsyms(&kallsyms, "/proc/kallsyms", type, NULL) <= 0) { ++ if (__machine__load_kallsyms(&kallsyms, "/proc/kallsyms", type, true, NULL) <= 0) { + pr_debug("dso__load_kallsyms "); + goto out; + } diff --git a/queue-4.4/pps-do-not-crash-when-failed-to-register.patch b/queue-4.4/pps-do-not-crash-when-failed-to-register.patch new file mode 100644 index 00000000000..3b1fbfeff18 --- /dev/null +++ b/queue-4.4/pps-do-not-crash-when-failed-to-register.patch @@ -0,0 +1,64 @@ +From 368301f2fe4b07e5fb71dba3cc566bc59eb6705f Mon Sep 17 00:00:00 2001 +From: Jiri Slaby +Date: Wed, 20 Jul 2016 15:45:08 -0700 +Subject: pps: do not crash when failed to register + +From: Jiri Slaby + +commit 368301f2fe4b07e5fb71dba3cc566bc59eb6705f upstream. + +With this command sequence: + + modprobe plip + modprobe pps_parport + rmmod pps_parport + +the partport_pps modules causes this crash: + + BUG: unable to handle kernel NULL pointer dereference at (null) + IP: parport_detach+0x1d/0x60 [pps_parport] + Oops: 0000 [#1] SMP + ... + Call Trace: + parport_unregister_driver+0x65/0xc0 [parport] + SyS_delete_module+0x187/0x210 + +The sequence that builds up to this is: + + 1) plip is loaded and takes the parport device for exclusive use: + + plip0: Parallel port at 0x378, using IRQ 7. + + 2) pps_parport then fails to grab the device: + + pps_parport: parallel port PPS client + parport0: cannot grant exclusive access for device pps_parport + pps_parport: couldn't register with parport0 + + 3) rmmod of pps_parport is then killed because it tries to access + pardev->name, but pardev (taken from port->cad) is NULL. + +So add a check for NULL in the test there too. + +Link: http://lkml.kernel.org/r/20160714115245.12651-1-jslaby@suse.cz +Signed-off-by: Jiri Slaby +Acked-by: Rodolfo Giometti +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/pps/clients/pps_parport.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/pps/clients/pps_parport.c ++++ b/drivers/pps/clients/pps_parport.c +@@ -195,7 +195,7 @@ static void parport_detach(struct parpor + struct pps_client_pp *device; + + /* FIXME: oooh, this is ugly! */ +- if (strcmp(pardev->name, KBUILD_MODNAME)) ++ if (!pardev || strcmp(pardev->name, KBUILD_MODNAME)) + /* not our port */ + return; + diff --git a/queue-4.4/series b/queue-4.4/series index 54f49665fc7..956f19610e7 100644 --- a/queue-4.4/series +++ b/queue-4.4/series @@ -1 +1,22 @@ usb-ohci-don-t-mark-eds-as-ed_oper-if-scheduling-fails.patch +x86-quirks-apply-nvidia_bugs-quirk-only-on-root-bus.patch +x86-quirks-reintroduce-scanning-of-secondary-buses.patch +x86-quirks-add-early-quirk-to-reset-apple-airport-card.patch +dmaengine-at_xdmac-align-descriptors-on-64-bits.patch +dmaengine-at_xdmac-fix-residue-corruption.patch +dmaengine-at_xdmac-double-fifo-flush-needed-to-compute-residue.patch +mm-slb-add-__gfp_atomic-to-the-gfp-reclaim-mask.patch +mm-compaction-abort-free-scanner-if-split-fails.patch +fs-nilfs2-fix-potential-underflow-in-call-to-crc32_le.patch +mm-compaction-prevent-vm_bug_on-when-terminating-freeing-scanner.patch +mm-meminit-always-return-a-valid-node-from-early_pfn_to_nid.patch +mm-meminit-ensure-node-is-online-before-checking-whether-pages-are-uninitialised.patch +vmlinux.lds-account-for-destructor-sections.patch +perf-test-ignore-kcore-files-in-the-vmlinux-matches-kallsyms-test.patch +pps-do-not-crash-when-failed-to-register.patch +kernel-sysrq-watchdog-sched-core-reset-watchdog-on-all-cpus-while-processing-sysrq-w.patch +arc-unwind-warn-only-once-if-dw2_unwind-is-disabled.patch +arc-unwind-ensure-that-.debug_frame-is-generated-vs.-.eh_frame.patch +xen-pciback-fix-conf_space-read-write-overlap-check.patch +xenbus-don-t-bug-on-user-mode-induced-condition.patch +xenbus-don-t-bail-early-from-xenbus_dev_request_and_reply.patch diff --git a/queue-4.4/vmlinux.lds-account-for-destructor-sections.patch b/queue-4.4/vmlinux.lds-account-for-destructor-sections.patch new file mode 100644 index 00000000000..26cb864195a --- /dev/null +++ b/queue-4.4/vmlinux.lds-account-for-destructor-sections.patch @@ -0,0 +1,81 @@ +From e41f501d391265ff568f3e49d6128cc30856a36f Mon Sep 17 00:00:00 2001 +From: Dmitry Vyukov +Date: Thu, 14 Jul 2016 12:07:29 -0700 +Subject: vmlinux.lds: account for destructor sections + +From: Dmitry Vyukov + +commit e41f501d391265ff568f3e49d6128cc30856a36f upstream. + +If CONFIG_KASAN is enabled and gcc is configured with +--disable-initfini-array and/or gold linker is used, gcc emits +.ctors/.dtors and .text.startup/.text.exit sections instead of +.init_array/.fini_array. .dtors section is not explicitly accounted in +the linker script and messes vvar/percpu layout. + +We want: + ffffffff822bfd80 D _edata + ffffffff822c0000 D __vvar_beginning_hack + ffffffff822c0000 A __vvar_page + ffffffff822c0080 0000000000000098 D vsyscall_gtod_data + ffffffff822c1000 A __init_begin + ffffffff822c1000 D init_per_cpu__irq_stack_union + ffffffff822c1000 A __per_cpu_load + ffffffff822d3000 D init_per_cpu__gdt_page + +We got: + ffffffff8279a600 D _edata + ffffffff8279b000 A __vvar_page + ffffffff8279c000 A __init_begin + ffffffff8279c000 D init_per_cpu__irq_stack_union + ffffffff8279c000 A __per_cpu_load + ffffffff8279e000 D __vvar_beginning_hack + ffffffff8279e080 0000000000000098 D vsyscall_gtod_data + ffffffff827ae000 D init_per_cpu__gdt_page + +This happens because __vvar_page and .vvar get different addresses in +arch/x86/kernel/vmlinux.lds.S: + + . = ALIGN(PAGE_SIZE); + __vvar_page = .; + + .vvar : AT(ADDR(.vvar) - LOAD_OFFSET) { + /* work around gold bug 13023 */ + __vvar_beginning_hack = .; + +Discard .dtors/.fini_array/.text.exit, since we don't call dtors. +Merge .text.startup into init text. + +Link: http://lkml.kernel.org/r/1467386363-120030-1-git-send-email-dvyukov@google.com +Signed-off-by: Dmitry Vyukov +Reviewed-by: Andrey Ryabinin +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + include/asm-generic/vmlinux.lds.h | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/include/asm-generic/vmlinux.lds.h ++++ b/include/asm-generic/vmlinux.lds.h +@@ -531,15 +531,19 @@ + + #define INIT_TEXT \ + *(.init.text) \ ++ *(.text.startup) \ + MEM_DISCARD(init.text) + + #define EXIT_DATA \ + *(.exit.data) \ ++ *(.fini_array) \ ++ *(.dtors) \ + MEM_DISCARD(exit.data) \ + MEM_DISCARD(exit.rodata) + + #define EXIT_TEXT \ + *(.exit.text) \ ++ *(.text.exit) \ + MEM_DISCARD(exit.text) + + #define EXIT_CALL \ diff --git a/queue-4.4/x86-quirks-add-early-quirk-to-reset-apple-airport-card.patch b/queue-4.4/x86-quirks-add-early-quirk-to-reset-apple-airport-card.patch new file mode 100644 index 00000000000..cc6a479cb86 --- /dev/null +++ b/queue-4.4/x86-quirks-add-early-quirk-to-reset-apple-airport-card.patch @@ -0,0 +1,246 @@ +From abb2bafd295fe962bbadc329dbfb2146457283ac Mon Sep 17 00:00:00 2001 +From: Lukas Wunner +Date: Sun, 12 Jun 2016 12:31:53 +0200 +Subject: x86/quirks: Add early quirk to reset Apple AirPort card +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Lukas Wunner + +commit abb2bafd295fe962bbadc329dbfb2146457283ac upstream. + +The EFI firmware on Macs contains a full-fledged network stack for +downloading OS X images from osrecovery.apple.com. Unfortunately +on Macs introduced 2011 and 2012, EFI brings up the Broadcom 4331 +wireless card on every boot and leaves it enabled even after +ExitBootServices has been called. The card continues to assert its IRQ +line, causing spurious interrupts if the IRQ is shared. It also corrupts +memory by DMAing received packets, allowing for remote code execution +over the air. This only stops when a driver is loaded for the wireless +card, which may be never if the driver is not installed or blacklisted. + +The issue seems to be constrained to the Broadcom 4331. Chris Milsted +has verified that the newer Broadcom 4360 built into the MacBookPro11,3 +(2013/2014) does not exhibit this behaviour. The chances that Apple will +ever supply a firmware fix for the older machines appear to be zero. + +The solution is to reset the card on boot by writing to a reset bit in +its mmio space. This must be done as an early quirk and not as a plain +vanilla PCI quirk to successfully combat memory corruption by DMAed +packets: Matthew Garrett found out in 2012 that the packets are written +to EfiBootServicesData memory (http://mjg59.dreamwidth.org/11235.html). +This type of memory is made available to the page allocator by +efi_free_boot_services(). Plain vanilla PCI quirks run much later, in +subsys initcall level. In-between a time window would be open for memory +corruption. Random crashes occurring in this time window and attributed +to DMAed packets have indeed been observed in the wild by Chris +Bainbridge. + +When Matthew Garrett analyzed the memory corruption issue in 2012, he +sought to fix it with a grub quirk which transitions the card to D3hot: +http://git.savannah.gnu.org/cgit/grub.git/commit/?id=9d34bb85da56 + +This approach does not help users with other bootloaders and while it +may prevent DMAed packets, it does not cure the spurious interrupts +emanating from the card. Unfortunately the card's mmio space is +inaccessible in D3hot, so to reset it, we have to undo the effect of +Matthew's grub patch and transition the card back to D0. + +Note that the quirk takes a few shortcuts to reduce the amount of code: +The size of BAR 0 and the location of the PM capability is identical +on all affected machines and therefore hardcoded. Only the address of +BAR 0 differs between models. Also, it is assumed that the BCMA core +currently mapped is the 802.11 core. The EFI driver seems to always take +care of this. + +Michael Büsch, Bjorn Helgaas and Matt Fleming contributed feedback +towards finding the best solution to this problem. + +The following should be a comprehensive list of affected models: + iMac13,1 2012 21.5" [Root Port 00:1c.3 = 8086:1e16] + iMac13,2 2012 27" [Root Port 00:1c.3 = 8086:1e16] + Macmini5,1 2011 i5 2.3 GHz [Root Port 00:1c.1 = 8086:1c12] + Macmini5,2 2011 i5 2.5 GHz [Root Port 00:1c.1 = 8086:1c12] + Macmini5,3 2011 i7 2.0 GHz [Root Port 00:1c.1 = 8086:1c12] + Macmini6,1 2012 i5 2.5 GHz [Root Port 00:1c.1 = 8086:1e12] + Macmini6,2 2012 i7 2.3 GHz [Root Port 00:1c.1 = 8086:1e12] + MacBookPro8,1 2011 13" [Root Port 00:1c.1 = 8086:1c12] + MacBookPro8,2 2011 15" [Root Port 00:1c.1 = 8086:1c12] + MacBookPro8,3 2011 17" [Root Port 00:1c.1 = 8086:1c12] + MacBookPro9,1 2012 15" [Root Port 00:1c.1 = 8086:1e12] + MacBookPro9,2 2012 13" [Root Port 00:1c.1 = 8086:1e12] + MacBookPro10,1 2012 15" [Root Port 00:1c.1 = 8086:1e12] + MacBookPro10,2 2012 13" [Root Port 00:1c.1 = 8086:1e12] + +For posterity, spurious interrupts caused by the Broadcom 4331 wireless +card resulted in splats like this (stacktrace omitted): + + irq 17: nobody cared (try booting with the "irqpoll" option) + handlers: + [] pcie_isr + [] sdhci_irq [sdhci] threaded [] sdhci_thread_irq [sdhci] + [] azx_interrupt [snd_hda_codec] + Disabling IRQ #17 + +Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=79301 +Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=111781 +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=728916 +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=895951#c16 +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1009819 +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1098621 +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1149632#c5 +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1279130 +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1332732 +Tested-by: Konstantin Simanov # [MacBookPro8,1] +Tested-by: Lukas Wunner # [MacBookPro9,1] +Tested-by: Bryan Paradis # [MacBookPro9,2] +Tested-by: Andrew Worsley # [MacBookPro10,1] +Tested-by: Chris Bainbridge # [MacBookPro10,2] +Signed-off-by: Lukas Wunner +Acked-by: Rafał Miłecki +Acked-by: Matt Fleming +Cc: Andy Lutomirski +Cc: Bjorn Helgaas +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Chris Milsted +Cc: Denys Vlasenko +Cc: H. Peter Anvin +Cc: Josh Poimboeuf +Cc: Linus Torvalds +Cc: Matthew Garrett +Cc: Michael Buesch +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Cc: Yinghai Lu +Cc: b43-dev@lists.infradead.org +Cc: linux-pci@vger.kernel.org +Cc: linux-wireless@vger.kernel.org +Link: http://lkml.kernel.org/r/48d0972ac82a53d460e5fce77a07b2560db95203.1465690253.git.lukas@wunner.de +[ Did minor readability edits. ] +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/early-quirks.c | 64 +++++++++++++++++++++++++++++++++++++++++ + drivers/bcma/bcma_private.h | 2 - + include/linux/bcma/bcma.h | 1 + 3 files changed, 65 insertions(+), 2 deletions(-) + +--- a/arch/x86/kernel/early-quirks.c ++++ b/arch/x86/kernel/early-quirks.c +@@ -11,7 +11,11 @@ + + #include + #include ++#include ++#include + #include ++#include ++#include + #include + #include + #include +@@ -21,6 +25,9 @@ + #include + #include + #include ++#include ++ ++#define dev_err(msg) pr_err("pci 0000:%02x:%02x.%d: %s", bus, slot, func, msg) + + static void __init fix_hypertransport_config(int num, int slot, int func) + { +@@ -596,6 +603,61 @@ static void __init force_disable_hpet(in + #endif + } + ++#define BCM4331_MMIO_SIZE 16384 ++#define BCM4331_PM_CAP 0x40 ++#define bcma_aread32(reg) ioread32(mmio + 1 * BCMA_CORE_SIZE + reg) ++#define bcma_awrite32(reg, val) iowrite32(val, mmio + 1 * BCMA_CORE_SIZE + reg) ++ ++static void __init apple_airport_reset(int bus, int slot, int func) ++{ ++ void __iomem *mmio; ++ u16 pmcsr; ++ u64 addr; ++ int i; ++ ++ if (!dmi_match(DMI_SYS_VENDOR, "Apple Inc.")) ++ return; ++ ++ /* Card may have been put into PCI_D3hot by grub quirk */ ++ pmcsr = read_pci_config_16(bus, slot, func, BCM4331_PM_CAP + PCI_PM_CTRL); ++ ++ if ((pmcsr & PCI_PM_CTRL_STATE_MASK) != PCI_D0) { ++ pmcsr &= ~PCI_PM_CTRL_STATE_MASK; ++ write_pci_config_16(bus, slot, func, BCM4331_PM_CAP + PCI_PM_CTRL, pmcsr); ++ mdelay(10); ++ ++ pmcsr = read_pci_config_16(bus, slot, func, BCM4331_PM_CAP + PCI_PM_CTRL); ++ if ((pmcsr & PCI_PM_CTRL_STATE_MASK) != PCI_D0) { ++ dev_err("Cannot power up Apple AirPort card\n"); ++ return; ++ } ++ } ++ ++ addr = read_pci_config(bus, slot, func, PCI_BASE_ADDRESS_0); ++ addr |= (u64)read_pci_config(bus, slot, func, PCI_BASE_ADDRESS_1) << 32; ++ addr &= PCI_BASE_ADDRESS_MEM_MASK; ++ ++ mmio = early_ioremap(addr, BCM4331_MMIO_SIZE); ++ if (!mmio) { ++ dev_err("Cannot iomap Apple AirPort card\n"); ++ return; ++ } ++ ++ pr_info("Resetting Apple AirPort card (left enabled by EFI)\n"); ++ ++ for (i = 0; bcma_aread32(BCMA_RESET_ST) && i < 30; i++) ++ udelay(10); ++ ++ bcma_awrite32(BCMA_RESET_CTL, BCMA_RESET_CTL_RESET); ++ bcma_aread32(BCMA_RESET_CTL); ++ udelay(1); ++ ++ bcma_awrite32(BCMA_RESET_CTL, 0); ++ bcma_aread32(BCMA_RESET_CTL); ++ udelay(10); ++ ++ early_iounmap(mmio, BCM4331_MMIO_SIZE); ++} + + #define QFLAG_APPLY_ONCE 0x1 + #define QFLAG_APPLIED 0x2 +@@ -638,6 +700,8 @@ static struct chipset early_qrk[] __init + */ + { PCI_VENDOR_ID_INTEL, 0x0f00, + PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet}, ++ { PCI_VENDOR_ID_BROADCOM, 0x4331, ++ PCI_CLASS_NETWORK_OTHER, PCI_ANY_ID, 0, apple_airport_reset}, + {} + }; + +--- a/drivers/bcma/bcma_private.h ++++ b/drivers/bcma/bcma_private.h +@@ -8,8 +8,6 @@ + #include + #include + +-#define BCMA_CORE_SIZE 0x1000 +- + #define bcma_err(bus, fmt, ...) \ + pr_err("bus%d: " fmt, (bus)->num, ##__VA_ARGS__) + #define bcma_warn(bus, fmt, ...) \ +--- a/include/linux/bcma/bcma.h ++++ b/include/linux/bcma/bcma.h +@@ -156,6 +156,7 @@ struct bcma_host_ops { + #define BCMA_CORE_DEFAULT 0xFFF + + #define BCMA_MAX_NR_CORES 16 ++#define BCMA_CORE_SIZE 0x1000 + + /* Chip IDs of PCIe devices */ + #define BCMA_CHIP_ID_BCM4313 0x4313 diff --git a/queue-4.4/x86-quirks-apply-nvidia_bugs-quirk-only-on-root-bus.patch b/queue-4.4/x86-quirks-apply-nvidia_bugs-quirk-only-on-root-bus.patch new file mode 100644 index 00000000000..bec033c653b --- /dev/null +++ b/queue-4.4/x86-quirks-apply-nvidia_bugs-quirk-only-on-root-bus.patch @@ -0,0 +1,59 @@ +From 447d29d1d3aed839e74c2401ef63387780ac51ed Mon Sep 17 00:00:00 2001 +From: Lukas Wunner +Date: Sun, 12 Jun 2016 12:31:53 +0200 +Subject: x86/quirks: Apply nvidia_bugs quirk only on root bus + +From: Lukas Wunner + +commit 447d29d1d3aed839e74c2401ef63387780ac51ed upstream. + +Since the following commit: + + 8659c406ade3 ("x86: only scan the root bus in early PCI quirks") + +... early quirks are only applied to devices on the root bus. + +The motivation was to prevent application of the nvidia_bugs quirk on +secondary buses. + +We're about to reintroduce scanning of secondary buses for a quirk to +reset the Broadcom 4331 wireless card on 2011/2012 Macs. To prevent +regressions, open code the requirement to apply nvidia_bugs only on the +root bus. + +Signed-off-by: Lukas Wunner +Cc: Andy Lutomirski +Cc: Bjorn Helgaas +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Denys Vlasenko +Cc: H. Peter Anvin +Cc: Josh Poimboeuf +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Cc: Yinghai Lu +Link: http://lkml.kernel.org/r/4d5477c1d76b2f0387a780f2142bbcdd9fee869b.1465690253.git.lukas@wunner.de +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/early-quirks.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/arch/x86/kernel/early-quirks.c ++++ b/arch/x86/kernel/early-quirks.c +@@ -76,6 +76,13 @@ static void __init nvidia_bugs(int num, + #ifdef CONFIG_ACPI + #ifdef CONFIG_X86_IO_APIC + /* ++ * Only applies to Nvidia root ports (bus 0) and not to ++ * Nvidia graphics cards with PCI ports on secondary buses. ++ */ ++ if (num) ++ return; ++ ++ /* + * All timer overrides on Nvidia are + * wrong unless HPET is enabled. + * Unfortunately that's not true on many Asus boards. diff --git a/queue-4.4/x86-quirks-reintroduce-scanning-of-secondary-buses.patch b/queue-4.4/x86-quirks-reintroduce-scanning-of-secondary-buses.patch new file mode 100644 index 00000000000..1c76a0fac7b --- /dev/null +++ b/queue-4.4/x86-quirks-reintroduce-scanning-of-secondary-buses.patch @@ -0,0 +1,151 @@ +From 850c321027c2e31d0afc71588974719a4b565550 Mon Sep 17 00:00:00 2001 +From: Lukas Wunner +Date: Sun, 12 Jun 2016 12:31:53 +0200 +Subject: x86/quirks: Reintroduce scanning of secondary buses + +From: Lukas Wunner + +commit 850c321027c2e31d0afc71588974719a4b565550 upstream. + +We used to scan secondary buses until the following commit that +was applied in 2009: + + 8659c406ade3 ("x86: only scan the root bus in early PCI quirks") + +which commit constrained early quirks to the root bus only. Its +motivation was to prevent application of the nvidia_bugs quirk +on secondary buses. + +We're about to add a quirk to reset the Broadcom 4331 wireless card on +2011/2012 Macs, which is located on a secondary bus behind a PCIe root +port. To facilitate that, reintroduce scanning of secondary buses. + +The commit message of 8659c406ade3 notes that scanning only the root bus +"saves quite some unnecessary scanning work". The algorithm used prior +to 8659c406ade3 was particularly time consuming because it scanned +buses 0 to 31 brute force. To avoid lengthening boot time, employ a +recursive strategy which only scans buses that are actually reachable +from the root bus. + +Yinghai Lu pointed out that the secondary bus number read from a +bridge's config space may be invalid, in particular a value of 0 would +cause an infinite loop. The PCI core goes beyond that and recurses to a +child bus only if its bus number is greater than the parent bus number +(see pci_scan_bridge()). Since the root bus is numbered 0, this implies +that secondary buses may not be 0. Do the same on early scanning. + +If this algorithm is found to significantly impact boot time or cause +infinite loops on broken hardware, it would be possible to limit its +recursion depth: The Broadcom 4331 quirk applies at depth 1, all others +at depth 0, so the bus need not be scanned deeper than that for now. An +alternative approach would be to revert to scanning only the root bus, +and apply the Broadcom 4331 quirk to the root ports 8086:1c12, 8086:1e12 +and 8086:1e16. Apple always positioned the card behind either of these +three ports. The quirk would then check presence of the card in slot 0 +below the root port and do its deed. + +Signed-off-by: Lukas Wunner +Cc: Andy Lutomirski +Cc: Bjorn Helgaas +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Denys Vlasenko +Cc: H. Peter Anvin +Cc: Josh Poimboeuf +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Cc: Yinghai Lu +Cc: linux-pci@vger.kernel.org +Link: http://lkml.kernel.org/r/f0daa70dac1a9b2483abdb31887173eb6ab77bdf.1465690253.git.lukas@wunner.de +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/early-quirks.c | 34 +++++++++++++++++++++------------- + 1 file changed, 21 insertions(+), 13 deletions(-) + +--- a/arch/x86/kernel/early-quirks.c ++++ b/arch/x86/kernel/early-quirks.c +@@ -609,12 +609,6 @@ struct chipset { + void (*f)(int num, int slot, int func); + }; + +-/* +- * Only works for devices on the root bus. If you add any devices +- * not on bus 0 readd another loop level in early_quirks(). But +- * be careful because at least the Nvidia quirk here relies on +- * only matching on bus 0. +- */ + static struct chipset early_qrk[] __initdata = { + { PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID, + PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, nvidia_bugs }, +@@ -647,6 +641,8 @@ static struct chipset early_qrk[] __init + {} + }; + ++static void __init early_pci_scan_bus(int bus); ++ + /** + * check_dev_quirk - apply early quirks to a given PCI device + * @num: bus number +@@ -655,7 +651,7 @@ static struct chipset early_qrk[] __init + * + * Check the vendor & device ID against the early quirks table. + * +- * If the device is single function, let early_quirks() know so we don't ++ * If the device is single function, let early_pci_scan_bus() know so we don't + * poke at this device again. + */ + static int __init check_dev_quirk(int num, int slot, int func) +@@ -664,6 +660,7 @@ static int __init check_dev_quirk(int nu + u16 vendor; + u16 device; + u8 type; ++ u8 sec; + int i; + + class = read_pci_config_16(num, slot, func, PCI_CLASS_DEVICE); +@@ -691,25 +688,36 @@ static int __init check_dev_quirk(int nu + + type = read_pci_config_byte(num, slot, func, + PCI_HEADER_TYPE); ++ ++ if ((type & 0x7f) == PCI_HEADER_TYPE_BRIDGE) { ++ sec = read_pci_config_byte(num, slot, func, PCI_SECONDARY_BUS); ++ if (sec > num) ++ early_pci_scan_bus(sec); ++ } ++ + if (!(type & 0x80)) + return -1; + + return 0; + } + +-void __init early_quirks(void) ++static void __init early_pci_scan_bus(int bus) + { + int slot, func; + +- if (!early_pci_allowed()) +- return; +- + /* Poor man's PCI discovery */ +- /* Only scan the root bus */ + for (slot = 0; slot < 32; slot++) + for (func = 0; func < 8; func++) { + /* Only probe function 0 on single fn devices */ +- if (check_dev_quirk(0, slot, func)) ++ if (check_dev_quirk(bus, slot, func)) + break; + } + } ++ ++void __init early_quirks(void) ++{ ++ if (!early_pci_allowed()) ++ return; ++ ++ early_pci_scan_bus(0); ++} diff --git a/queue-4.4/xen-pciback-fix-conf_space-read-write-overlap-check.patch b/queue-4.4/xen-pciback-fix-conf_space-read-write-overlap-check.patch new file mode 100644 index 00000000000..331f342203b --- /dev/null +++ b/queue-4.4/xen-pciback-fix-conf_space-read-write-overlap-check.patch @@ -0,0 +1,55 @@ +From 02ef871ecac290919ea0c783d05da7eedeffc10e Mon Sep 17 00:00:00 2001 +From: Andrey Grodzovsky +Date: Tue, 21 Jun 2016 14:26:36 -0400 +Subject: xen/pciback: Fix conf_space read/write overlap check. + +From: Andrey Grodzovsky + +commit 02ef871ecac290919ea0c783d05da7eedeffc10e upstream. + +Current overlap check is evaluating to false a case where a filter +field is fully contained (proper subset) of a r/w request. This +change applies classical overlap check instead to include all the +scenarios. + +More specifically, for (Hilscher GmbH CIFX 50E-DP(M/S)) device driver +the logic is such that the entire confspace is read and written in 4 +byte chunks. In this case as an example, CACHE_LINE_SIZE, +LATENCY_TIMER and PCI_BIST are arriving together in one call to +xen_pcibk_config_write() with offset == 0xc and size == 4. With the +exsisting overlap check the LATENCY_TIMER field (offset == 0xd, length +== 1) is fully contained in the write request and hence is excluded +from write, which is incorrect. + +Signed-off-by: Andrey Grodzovsky +Reviewed-by: Boris Ostrovsky +Reviewed-by: Jan Beulich +Signed-off-by: David Vrabel +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/xen/xen-pciback/conf_space.c | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +--- a/drivers/xen/xen-pciback/conf_space.c ++++ b/drivers/xen/xen-pciback/conf_space.c +@@ -183,8 +183,7 @@ int xen_pcibk_config_read(struct pci_dev + field_start = OFFSET(cfg_entry); + field_end = OFFSET(cfg_entry) + field->size; + +- if ((req_start >= field_start && req_start < field_end) +- || (req_end > field_start && req_end <= field_end)) { ++ if (req_end > field_start && field_end > req_start) { + err = conf_space_read(dev, cfg_entry, field_start, + &tmp_val); + if (err) +@@ -230,8 +229,7 @@ int xen_pcibk_config_write(struct pci_de + field_start = OFFSET(cfg_entry); + field_end = OFFSET(cfg_entry) + field->size; + +- if ((req_start >= field_start && req_start < field_end) +- || (req_end > field_start && req_end <= field_end)) { ++ if (req_end > field_start && field_end > req_start) { + tmp_val = 0; + + err = xen_pcibk_config_read(dev, field_start, diff --git a/queue-4.4/xenbus-don-t-bail-early-from-xenbus_dev_request_and_reply.patch b/queue-4.4/xenbus-don-t-bail-early-from-xenbus_dev_request_and_reply.patch new file mode 100644 index 00000000000..c5edacc6aef --- /dev/null +++ b/queue-4.4/xenbus-don-t-bail-early-from-xenbus_dev_request_and_reply.patch @@ -0,0 +1,51 @@ +From 7469be95a487319514adce2304ad2af3553d2fc9 Mon Sep 17 00:00:00 2001 +From: Jan Beulich +Date: Thu, 7 Jul 2016 01:32:04 -0600 +Subject: xenbus: don't bail early from xenbus_dev_request_and_reply() + +From: Jan Beulich + +commit 7469be95a487319514adce2304ad2af3553d2fc9 upstream. + +xenbus_dev_request_and_reply() needs to track whether a transaction is +open. For XS_TRANSACTION_START messages it calls transaction_start() +and for XS_TRANSACTION_END messages it calls transaction_end(). + +If sending an XS_TRANSACTION_START message fails or responds with an +an error, the transaction is not open and transaction_end() must be +called. + +If sending an XS_TRANSACTION_END message fails, the transaction is +still open, but if an error response is returned the transaction is +closed. + +Commit 027bd7e89906 ("xen/xenbus: Avoid synchronous wait on XenBus +stalling shutdown/restart") introduced a regression where failed +XS_TRANSACTION_START messages were leaving the transaction open. This +can cause problems with suspend (and migration) as all transactions +must be closed before suspending. + +It appears that the problematic change was added accidentally, so just +remove it. + +Signed-off-by: Jan Beulich +Cc: Konrad Rzeszutek Wilk +Signed-off-by: David Vrabel +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/xen/xenbus/xenbus_xs.c | 3 --- + 1 file changed, 3 deletions(-) + +--- a/drivers/xen/xenbus/xenbus_xs.c ++++ b/drivers/xen/xenbus/xenbus_xs.c +@@ -250,9 +250,6 @@ void *xenbus_dev_request_and_reply(struc + + mutex_unlock(&xs_state.request_mutex); + +- if (IS_ERR(ret)) +- return ret; +- + if ((msg->type == XS_TRANSACTION_END) || + ((req_msg.type == XS_TRANSACTION_START) && + (msg->type == XS_ERROR))) diff --git a/queue-4.4/xenbus-don-t-bug-on-user-mode-induced-condition.patch b/queue-4.4/xenbus-don-t-bug-on-user-mode-induced-condition.patch new file mode 100644 index 00000000000..c8e42611595 --- /dev/null +++ b/queue-4.4/xenbus-don-t-bug-on-user-mode-induced-condition.patch @@ -0,0 +1,57 @@ +From 0beef634b86a1350c31da5fcc2992f0d7c8a622b Mon Sep 17 00:00:00 2001 +From: Jan Beulich +Date: Thu, 7 Jul 2016 01:23:57 -0600 +Subject: xenbus: don't BUG() on user mode induced condition + +From: Jan Beulich + +commit 0beef634b86a1350c31da5fcc2992f0d7c8a622b upstream. + +Inability to locate a user mode specified transaction ID should not +lead to a kernel crash. For other than XS_TRANSACTION_START also +don't issue anything to xenbus if the specified ID doesn't match that +of any active transaction. + +Signed-off-by: Jan Beulich +Signed-off-by: David Vrabel +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/xen/xenbus/xenbus_dev_frontend.c | 14 ++++++++------ + 1 file changed, 8 insertions(+), 6 deletions(-) + +--- a/drivers/xen/xenbus/xenbus_dev_frontend.c ++++ b/drivers/xen/xenbus/xenbus_dev_frontend.c +@@ -316,11 +316,18 @@ static int xenbus_write_transaction(unsi + rc = -ENOMEM; + goto out; + } ++ } else { ++ list_for_each_entry(trans, &u->transactions, list) ++ if (trans->handle.id == u->u.msg.tx_id) ++ break; ++ if (&trans->list == &u->transactions) ++ return -ESRCH; + } + + reply = xenbus_dev_request_and_reply(&u->u.msg); + if (IS_ERR(reply)) { +- kfree(trans); ++ if (msg_type == XS_TRANSACTION_START) ++ kfree(trans); + rc = PTR_ERR(reply); + goto out; + } +@@ -333,12 +340,7 @@ static int xenbus_write_transaction(unsi + list_add(&trans->list, &u->transactions); + } + } else if (u->u.msg.type == XS_TRANSACTION_END) { +- list_for_each_entry(trans, &u->transactions, list) +- if (trans->handle.id == u->u.msg.tx_id) +- break; +- BUG_ON(&trans->list == &u->transactions); + list_del(&trans->list); +- + kfree(trans); + } +