From: Greg Kroah-Hartman Date: Tue, 13 Oct 2015 21:49:40 +0000 (-0700) Subject: 4.2-stable patches X-Git-Tag: v3.10.91~78 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=1b6cc1102b01e90f6edae976acf8209b8fb599cb;p=thirdparty%2Fkernel%2Fstable-queue.git 4.2-stable patches added patches: block-blkg_destroy_all-should-clear-q-root_blkg-and-root_rl.blkg.patch blockdev-don-t-set-s_dax-for-misaligned-partitions.patch dax-fix-o_direct-i-o-to-the-last-block-of-a-blockdev.patch dmaengine-at_xdmac-change-block-increment-addressing-mode.patch dmaengine-at_xdmac-clean-used-descriptor.patch dmaengine-at_xdmac-fix-bug-in-prep_dma_cyclic.patch dmaengine-dw-properly-read-dwc_params-register.patch dmaengine-pxa_dma-fix-initial-list-move.patch pmem-add-proper-fencing-to-pmem_rw_page.patch x86-alternatives-make-optimize_nops-interrupt-safe-and-synced.patch x86-apic-serialize-lvtt-and-tsc_deadline-writes.patch x86-ioapic-force-affinity-setting-in-setup_ioapic_dest.patch x86-pci-intel_mid_pci-work-around-for-irq0-assignment.patch x86-platform-fix-geode-lx-timekeeping-in-the-generic-x86-build.patch --- diff --git a/queue-4.2/block-blkg_destroy_all-should-clear-q-root_blkg-and-root_rl.blkg.patch b/queue-4.2/block-blkg_destroy_all-should-clear-q-root_blkg-and-root_rl.blkg.patch new file mode 100644 index 00000000000..927377702ab --- /dev/null +++ b/queue-4.2/block-blkg_destroy_all-should-clear-q-root_blkg-and-root_rl.blkg.patch @@ -0,0 +1,65 @@ +From 6fe810bda0bd9a5d7674fc671fac27b8aa8ec243 Mon Sep 17 00:00:00 2001 +From: Tejun Heo +Date: Sat, 5 Sep 2015 15:47:36 -0400 +Subject: block: blkg_destroy_all() should clear q->root_blkg and ->root_rl.blkg + +From: Tejun Heo + +commit 6fe810bda0bd9a5d7674fc671fac27b8aa8ec243 upstream. + +While making the root blkg unconditional, ec13b1d6f0a0 ("blkcg: always +create the blkcg_gq for the root blkcg") removed the part which clears +q->root_blkg and ->root_rl.blkg during q exit. This leaves the two +pointers dangling after blkg_destroy_all(). blk-throttle exit path +performs blkg traversals and dereferences ->root_blkg and can lead to +the following oops. + + BUG: unable to handle kernel NULL pointer dereference at 0000000000000558 + IP: [] __blkg_lookup+0x26/0x70 + ... + task: ffff88001b4e2580 ti: ffff88001ac0c000 task.ti: ffff88001ac0c000 + RIP: 0010:[] [] __blkg_lookup+0x26/0x70 + ... + Call Trace: + [] blk_throtl_drain+0x5a/0x110 + [] blkcg_drain_queue+0x18/0x20 + [] __blk_drain_queue+0xc0/0x170 + [] blk_queue_bypass_start+0x61/0x80 + [] blkcg_deactivate_policy+0x39/0x100 + [] blk_throtl_exit+0x38/0x50 + [] blkcg_exit_queue+0x3e/0x50 + [] blk_release_queue+0x1e/0xc0 + ... + +While the bug is a straigh-forward use-after-free bug, it is tricky to +reproduce because blkg release is RCU protected and the rest of exit +path usually finishes before RCU grace period. + +This patch fixes the bug by updating blkg_destro_all() to clear +q->root_blkg and ->root_rl.blkg. + +Signed-off-by: Tejun Heo +Reported-by: "Richard W.M. Jones" +Reported-by: Josh Boyer +Link: http://lkml.kernel.org/g/CA+5PVA5rzQ0s4723n5rHBcxQa9t0cW8BPPBekr_9aMRoWt2aYg@mail.gmail.com +Fixes: ec13b1d6f0a0 ("blkcg: always create the blkcg_gq for the root blkcg") +Tested-by: Richard W.M. Jones +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman + +--- + block/blk-cgroup.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/block/blk-cgroup.c ++++ b/block/blk-cgroup.c +@@ -387,6 +387,9 @@ static void blkg_destroy_all(struct requ + blkg_destroy(blkg); + spin_unlock(&blkcg->lock); + } ++ ++ q->root_blkg = NULL; ++ q->root_rl.blkg = NULL; + } + + /* diff --git a/queue-4.2/blockdev-don-t-set-s_dax-for-misaligned-partitions.patch b/queue-4.2/blockdev-don-t-set-s_dax-for-misaligned-partitions.patch new file mode 100644 index 00000000000..75fa4e9b0cc --- /dev/null +++ b/queue-4.2/blockdev-don-t-set-s_dax-for-misaligned-partitions.patch @@ -0,0 +1,38 @@ +From f0b2e563bc419df7c1b3d2f494574c25125f6aed Mon Sep 17 00:00:00 2001 +From: Jeff Moyer +Date: Fri, 14 Aug 2015 16:15:32 -0400 +Subject: blockdev: don't set S_DAX for misaligned partitions + +From: Jeff Moyer + +commit f0b2e563bc419df7c1b3d2f494574c25125f6aed upstream. + +The dax code doesn't currently support misaligned partitions, +so disable O_DIRECT via dax until such time as that support +materializes. + +Suggested-by: Boaz Harrosh +Signed-off-by: Jeff Moyer +Signed-off-by: Dan Williams +Signed-off-by: Greg Kroah-Hartman + +--- + fs/block_dev.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/fs/block_dev.c ++++ b/fs/block_dev.c +@@ -1241,6 +1241,13 @@ static int __blkdev_get(struct block_dev + goto out_clear; + } + bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9); ++ /* ++ * If the partition is not aligned on a page ++ * boundary, we can't do dax I/O to it. ++ */ ++ if ((bdev->bd_part->start_sect % (PAGE_SIZE / 512)) || ++ (bdev->bd_part->nr_sects % (PAGE_SIZE / 512))) ++ bdev->bd_inode->i_flags &= ~S_DAX; + } + } else { + if (bdev->bd_contains == bdev) { diff --git a/queue-4.2/dax-fix-o_direct-i-o-to-the-last-block-of-a-blockdev.patch b/queue-4.2/dax-fix-o_direct-i-o-to-the-last-block-of-a-blockdev.patch new file mode 100644 index 00000000000..a359edfe912 --- /dev/null +++ b/queue-4.2/dax-fix-o_direct-i-o-to-the-last-block-of-a-blockdev.patch @@ -0,0 +1,43 @@ +From e94f5a2285fc94202a9efb2c687481f29b64132c Mon Sep 17 00:00:00 2001 +From: Jeff Moyer +Date: Fri, 14 Aug 2015 16:15:31 -0400 +Subject: dax: fix O_DIRECT I/O to the last block of a blockdev + +From: Jeff Moyer + +commit e94f5a2285fc94202a9efb2c687481f29b64132c upstream. + +commit bbab37ddc20b (block: Add support for DAX reads/writes to +block devices) caused a regression in mkfs.xfs. That utility +sets the block size of the device to the logical block size +using the BLKBSZSET ioctl, and then issues a single sector read +from the last sector of the device. This results in the dax_io +code trying to do a page-sized read from 512 bytes from the end +of the device. The result is -ERANGE being returned to userspace. + +The fix is to align the block to the page size before calling +get_block. + +Thanks to willy for simplifying my original patch. + +Signed-off-by: Jeff Moyer +Tested-by: Linda Knippers +Signed-off-by: Dan Williams +Signed-off-by: Greg Kroah-Hartman + +--- + fs/dax.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/fs/dax.c ++++ b/fs/dax.c +@@ -116,7 +116,8 @@ static ssize_t dax_io(struct inode *inod + unsigned len; + if (pos == max) { + unsigned blkbits = inode->i_blkbits; +- sector_t block = pos >> blkbits; ++ long page = pos >> PAGE_SHIFT; ++ sector_t block = page << (PAGE_SHIFT - blkbits); + unsigned first = pos - (block << blkbits); + long size; + diff --git a/queue-4.2/dmaengine-at_xdmac-change-block-increment-addressing-mode.patch b/queue-4.2/dmaengine-at_xdmac-change-block-increment-addressing-mode.patch new file mode 100644 index 00000000000..7953d91bb39 --- /dev/null +++ b/queue-4.2/dmaengine-at_xdmac-change-block-increment-addressing-mode.patch @@ -0,0 +1,45 @@ +From a1cf09031e641d3cceaca4a4dd20ef6a785bc9b3 Mon Sep 17 00:00:00 2001 +From: Maxime Ripard +Date: Tue, 15 Sep 2015 15:36:00 +0200 +Subject: dmaengine: at_xdmac: change block increment addressing mode + +From: Maxime Ripard + +commit a1cf09031e641d3cceaca4a4dd20ef6a785bc9b3 upstream. + +The addressing mode we were using was not only incrementing the address at +each microblock, but also at each data boundary, which was severely slowing +the transfer, without any benefit since we were not using the data stride. + +Switch to the micro block increment only in order to get back to an +acceptable performance level. + +Signed-off-by: Maxime Ripard +Signed-off-by: Ludovic Desroches +Fixes: 6007ccb57744 ("dmaengine: xdmac: Add interleaved transfer support") +Signed-off-by: Vinod Koul +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/dma/at_xdmac.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/dma/at_xdmac.c ++++ b/drivers/dma/at_xdmac.c +@@ -878,14 +878,14 @@ at_xdmac_interleaved_queue_desc(struct d + + if (xt->src_inc) { + if (xt->src_sgl) +- chan_cc |= AT_XDMAC_CC_SAM_UBS_DS_AM; ++ chan_cc |= AT_XDMAC_CC_SAM_UBS_AM; + else + chan_cc |= AT_XDMAC_CC_SAM_INCREMENTED_AM; + } + + if (xt->dst_inc) { + if (xt->dst_sgl) +- chan_cc |= AT_XDMAC_CC_DAM_UBS_DS_AM; ++ chan_cc |= AT_XDMAC_CC_DAM_UBS_AM; + else + chan_cc |= AT_XDMAC_CC_DAM_INCREMENTED_AM; + } diff --git a/queue-4.2/dmaengine-at_xdmac-clean-used-descriptor.patch b/queue-4.2/dmaengine-at_xdmac-clean-used-descriptor.patch new file mode 100644 index 00000000000..9f3b821f3ab --- /dev/null +++ b/queue-4.2/dmaengine-at_xdmac-clean-used-descriptor.patch @@ -0,0 +1,50 @@ +From 0be2136b67067617b36c70e525d7534108361e36 Mon Sep 17 00:00:00 2001 +From: Ludovic Desroches +Date: Tue, 15 Sep 2015 15:39:11 +0200 +Subject: dmaengine: at_xdmac: clean used descriptor + +From: Ludovic Desroches + +commit 0be2136b67067617b36c70e525d7534108361e36 upstream. + +When putting back a descriptor to the free descs list, some fields are +not set to 0, it can cause bugs if someone uses it without having this +in mind. +Descriptor are not put back one by one so it is easier to clean +descriptors when we request them. + +Signed-off-by: Ludovic Desroches +Signed-off-by: Vinod Koul +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/dma/at_xdmac.c | 11 ++++++++++- + 1 file changed, 10 insertions(+), 1 deletion(-) + +--- a/drivers/dma/at_xdmac.c ++++ b/drivers/dma/at_xdmac.c +@@ -455,6 +455,15 @@ static struct at_xdmac_desc *at_xdmac_al + return desc; + } + ++void at_xdmac_init_used_desc(struct at_xdmac_desc *desc) ++{ ++ memset(&desc->lld, 0, sizeof(desc->lld)); ++ INIT_LIST_HEAD(&desc->descs_list); ++ desc->direction = DMA_TRANS_NONE; ++ desc->xfer_size = 0; ++ desc->active_xfer = false; ++} ++ + /* Call must be protected by lock. */ + static struct at_xdmac_desc *at_xdmac_get_desc(struct at_xdmac_chan *atchan) + { +@@ -466,7 +475,7 @@ static struct at_xdmac_desc *at_xdmac_ge + desc = list_first_entry(&atchan->free_descs_list, + struct at_xdmac_desc, desc_node); + list_del(&desc->desc_node); +- desc->active_xfer = false; ++ at_xdmac_init_used_desc(desc); + } + + return desc; diff --git a/queue-4.2/dmaengine-at_xdmac-fix-bug-in-prep_dma_cyclic.patch b/queue-4.2/dmaengine-at_xdmac-fix-bug-in-prep_dma_cyclic.patch new file mode 100644 index 00000000000..655fc4a82c0 --- /dev/null +++ b/queue-4.2/dmaengine-at_xdmac-fix-bug-in-prep_dma_cyclic.patch @@ -0,0 +1,37 @@ +From e900c30dc1bb0cbc07708e9be1188f531632b2ef Mon Sep 17 00:00:00 2001 +From: Ludovic Desroches +Date: Wed, 22 Jul 2015 16:12:29 +0200 +Subject: dmaengine: at_xdmac: fix bug in prep_dma_cyclic + +From: Ludovic Desroches + +commit e900c30dc1bb0cbc07708e9be1188f531632b2ef upstream. + +In cyclic mode, the round chaining has been broken by the introduction +of at_xdmac_queue_desc(): AT_XDMAC_MBR_UBC_NDE is set for all descriptors +excepted for the last one. at_xdmac_queue_desc() has to be called one +more time to chain the last and the first descriptors. + +Signed-off-by: Ludovic Desroches +Fixes: 0d0ee751f7f7 ("dmaengine: xdmac: Rework the chaining logic") +Signed-off-by: Vinod Koul +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/dma/at_xdmac.c | 5 +---- + 1 file changed, 1 insertion(+), 4 deletions(-) + +--- a/drivers/dma/at_xdmac.c ++++ b/drivers/dma/at_xdmac.c +@@ -806,10 +806,7 @@ at_xdmac_prep_dma_cyclic(struct dma_chan + list_add_tail(&desc->desc_node, &first->descs_list); + } + +- prev->lld.mbr_nda = first->tx_dma_desc.phys; +- dev_dbg(chan2dev(chan), +- "%s: chain lld: prev=0x%p, mbr_nda=%pad\n", +- __func__, prev, &prev->lld.mbr_nda); ++ at_xdmac_queue_desc(chan, prev, first); + first->tx_dma_desc.flags = flags; + first->xfer_size = buf_len; + first->direction = direction; diff --git a/queue-4.2/dmaengine-dw-properly-read-dwc_params-register.patch b/queue-4.2/dmaengine-dw-properly-read-dwc_params-register.patch new file mode 100644 index 00000000000..c29b8eae232 --- /dev/null +++ b/queue-4.2/dmaengine-dw-properly-read-dwc_params-register.patch @@ -0,0 +1,52 @@ +From 6bea0f6d1c47b07be88dfd93f013ae05fcb3d8bf Mon Sep 17 00:00:00 2001 +From: Andy Shevchenko +Date: Mon, 28 Sep 2015 18:57:03 +0300 +Subject: dmaengine: dw: properly read DWC_PARAMS register + +From: Andy Shevchenko + +commit 6bea0f6d1c47b07be88dfd93f013ae05fcb3d8bf upstream. + +In case we have less than maximum allowed channels (8) and autoconfiguration is +enabled the DWC_PARAMS read is wrong because it uses different arithmetic to +what is needed for channel priority setup. + +Re-do the caclulations properly. This now works on AVR32 board well. + +Fixes: fed2574b3c9f (dw_dmac: introduce software emulation of LLP transfers) +Cc: yitian.bu@tangramtek.com +Signed-off-by: Andy Shevchenko +Signed-off-by: Vinod Koul +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/dma/dw/core.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/dma/dw/core.c ++++ b/drivers/dma/dw/core.c +@@ -1591,7 +1591,6 @@ int dw_dma_probe(struct dw_dma_chip *chi + INIT_LIST_HEAD(&dw->dma.channels); + for (i = 0; i < nr_channels; i++) { + struct dw_dma_chan *dwc = &dw->chan[i]; +- int r = nr_channels - i - 1; + + dwc->chan.device = &dw->dma; + dma_cookie_init(&dwc->chan); +@@ -1603,7 +1602,7 @@ int dw_dma_probe(struct dw_dma_chip *chi + + /* 7 is highest priority & 0 is lowest. */ + if (pdata->chan_priority == CHAN_PRIORITY_ASCENDING) +- dwc->priority = r; ++ dwc->priority = nr_channels - i - 1; + else + dwc->priority = i; + +@@ -1622,6 +1621,7 @@ int dw_dma_probe(struct dw_dma_chip *chi + /* Hardware configuration */ + if (autocfg) { + unsigned int dwc_params; ++ unsigned int r = DW_DMA_MAX_NR_CHANNELS - i - 1; + void __iomem *addr = chip->regs + r * sizeof(u32); + + dwc_params = dma_read_byaddr(addr, DWC_PARAMS); diff --git a/queue-4.2/dmaengine-pxa_dma-fix-initial-list-move.patch b/queue-4.2/dmaengine-pxa_dma-fix-initial-list-move.patch new file mode 100644 index 00000000000..366d9dfdf2f --- /dev/null +++ b/queue-4.2/dmaengine-pxa_dma-fix-initial-list-move.patch @@ -0,0 +1,35 @@ +From aebf5a67db8dbacbc624b9c652b81f5460b15eff Mon Sep 17 00:00:00 2001 +From: Robert Jarzmik +Date: Mon, 21 Sep 2015 11:06:32 +0200 +Subject: dmaengine: pxa_dma: fix initial list move + +From: Robert Jarzmik + +commit aebf5a67db8dbacbc624b9c652b81f5460b15eff upstream. + +Since the commit to have an allocated list of virtual descriptors was +reverted, the pxa_dma driver is broken, as it assumes the descriptor is +placed on the allocated list upon allocation. + +Fix the issue in pxa_dma by making an allocated virtual descriptor a +singleton. + +Fixes: 8c8fe97b2b8a ("Revert "dmaengine: virt-dma: don't always free descriptor upon completion"") +Signed-off-by: Robert Jarzmik +Signed-off-by: Vinod Koul +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/dma/pxa_dma.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/dma/pxa_dma.c ++++ b/drivers/dma/pxa_dma.c +@@ -888,6 +888,7 @@ pxad_tx_prep(struct virt_dma_chan *vc, s + struct dma_async_tx_descriptor *tx; + struct pxad_chan *chan = container_of(vc, struct pxad_chan, vc); + ++ INIT_LIST_HEAD(&vd->node); + tx = vchan_tx_prep(vc, vd, tx_flags); + tx->tx_submit = pxad_tx_submit; + dev_dbg(&chan->vc.chan.dev->device, diff --git a/queue-4.2/pmem-add-proper-fencing-to-pmem_rw_page.patch b/queue-4.2/pmem-add-proper-fencing-to-pmem_rw_page.patch new file mode 100644 index 00000000000..2746c2ec961 --- /dev/null +++ b/queue-4.2/pmem-add-proper-fencing-to-pmem_rw_page.patch @@ -0,0 +1,37 @@ +From ba8fe0f85e15d047686caf8a42463b592c63c98c Mon Sep 17 00:00:00 2001 +From: Ross Zwisler +Date: Wed, 16 Sep 2015 14:52:21 -0600 +Subject: pmem: add proper fencing to pmem_rw_page() + +From: Ross Zwisler + +commit ba8fe0f85e15d047686caf8a42463b592c63c98c upstream. + +pmem_rw_page() needs to call wmb_pmem() on writes to make sure that the +newly written data is durable. This flow was added to pmem_rw_bytes() +and pmem_make_request() with this commit: + +commit 61031952f4c8 ("arch, x86: pmem api for ensuring durability of + persistent memory updates") + +...the pmem_rw_page() path was missed. + +Signed-off-by: Ross Zwisler +Signed-off-by: Dan Williams +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/nvdimm/pmem.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/nvdimm/pmem.c ++++ b/drivers/nvdimm/pmem.c +@@ -86,6 +86,8 @@ static int pmem_rw_page(struct block_dev + struct pmem_device *pmem = bdev->bd_disk->private_data; + + pmem_do_bvec(pmem, page, PAGE_CACHE_SIZE, 0, rw, sector); ++ if (rw & WRITE) ++ wmb_pmem(); + page_endio(page, rw & WRITE, 0); + + return 0; diff --git a/queue-4.2/series b/queue-4.2/series index 45974f6b62a..c697fc52353 100644 --- a/queue-4.2/series +++ b/queue-4.2/series @@ -48,3 +48,17 @@ arm-exynos-reset-little-cores-when-cpu-is-up.patch arm-dts-sunxi-raise-minimum-cpu-voltage-for-sun7i-a20-to-meet-soc-specifications.patch arm-dts-fix-wrong-clock-binding-for-sysmmu_fimd1_1-on-exynos5420.patch arm-dts-fix-usb-pin-control-for-imx-rex-dts.patch +dax-fix-o_direct-i-o-to-the-last-block-of-a-blockdev.patch +blockdev-don-t-set-s_dax-for-misaligned-partitions.patch +block-blkg_destroy_all-should-clear-q-root_blkg-and-root_rl.blkg.patch +dmaengine-at_xdmac-change-block-increment-addressing-mode.patch +dmaengine-at_xdmac-clean-used-descriptor.patch +dmaengine-dw-properly-read-dwc_params-register.patch +dmaengine-at_xdmac-fix-bug-in-prep_dma_cyclic.patch +dmaengine-pxa_dma-fix-initial-list-move.patch +pmem-add-proper-fencing-to-pmem_rw_page.patch +x86-apic-serialize-lvtt-and-tsc_deadline-writes.patch +x86-alternatives-make-optimize_nops-interrupt-safe-and-synced.patch +x86-platform-fix-geode-lx-timekeeping-in-the-generic-x86-build.patch +x86-ioapic-force-affinity-setting-in-setup_ioapic_dest.patch +x86-pci-intel_mid_pci-work-around-for-irq0-assignment.patch diff --git a/queue-4.2/x86-alternatives-make-optimize_nops-interrupt-safe-and-synced.patch b/queue-4.2/x86-alternatives-make-optimize_nops-interrupt-safe-and-synced.patch new file mode 100644 index 00000000000..2f4aceda0b7 --- /dev/null +++ b/queue-4.2/x86-alternatives-make-optimize_nops-interrupt-safe-and-synced.patch @@ -0,0 +1,70 @@ +From 66c117d7fa2ae429911e60d84bf31a90b2b96189 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 3 Sep 2015 12:34:55 +0200 +Subject: x86/alternatives: Make optimize_nops() interrupt safe and synced + +From: Thomas Gleixner + +commit 66c117d7fa2ae429911e60d84bf31a90b2b96189 upstream. + +Richard reported the following crash: + +[ 0.036000] BUG: unable to handle kernel paging request at 55501e06 +[ 0.036000] IP: [] common_interrupt+0xb/0x38 +[ 0.036000] Call Trace: +[ 0.036000] [] ? add_nops+0x90/0xa0 +[ 0.036000] [] apply_alternatives+0x274/0x630 + +Chuck decoded: + + " 0: 8d 90 90 83 04 24 lea 0x24048390(%eax),%edx + 6: 80 fc 0f cmp $0xf,%ah + 9: a8 0f test $0xf,%al + >> b: a0 06 1e 50 55 mov 0x55501e06,%al + 10: 57 push %edi + 11: 56 push %esi + + Interrupt 0x30 occurred while the alternatives code was replacing the + initial 0x90,0x90,0x90 NOPs (from the ASM_CLAC macro) with the + optimized version, 0x8d,0x76,0x00. Only the first byte has been + replaced so far, and it makes a mess out of the insn decoding." + +optimize_nops() is buggy in two aspects: + +- It's not disabling interrupts across the modification +- It's lacking a sync_core() call + +Add both. + +Fixes: 4fd4b6e5537c 'x86/alternatives: Use optimized NOPs for padding' +Reported-and-tested-by: "Richard W.M. Jones" +Signed-off-by: Thomas Gleixner +Cc: Richard W.M. Jones +Cc: Chuck Ebbert +Cc: Borislav Petkov +Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1509031232340.15006@nanos +Signed-off-by: Thomas Gleixner +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/alternative.c | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/arch/x86/kernel/alternative.c ++++ b/arch/x86/kernel/alternative.c +@@ -338,10 +338,15 @@ done: + + static void __init_or_module optimize_nops(struct alt_instr *a, u8 *instr) + { ++ unsigned long flags; ++ + if (instr[0] != 0x90) + return; + ++ local_irq_save(flags); + add_nops(instr + (a->instrlen - a->padlen), a->padlen); ++ sync_core(); ++ local_irq_restore(flags); + + DUMP_BYTES(instr, a->instrlen, "%p: [%d:%d) optimized NOPs: ", + instr, a->instrlen - a->padlen, a->padlen); diff --git a/queue-4.2/x86-apic-serialize-lvtt-and-tsc_deadline-writes.patch b/queue-4.2/x86-apic-serialize-lvtt-and-tsc_deadline-writes.patch new file mode 100644 index 00000000000..32f35049de7 --- /dev/null +++ b/queue-4.2/x86-apic-serialize-lvtt-and-tsc_deadline-writes.patch @@ -0,0 +1,77 @@ +From 5d7c631d926b59aa16f3c56eaeb83f1036c81dc7 Mon Sep 17 00:00:00 2001 +From: Shaohua Li +Date: Thu, 30 Jul 2015 16:24:43 -0700 +Subject: x86/apic: Serialize LVTT and TSC_DEADLINE writes + +From: Shaohua Li + +commit 5d7c631d926b59aa16f3c56eaeb83f1036c81dc7 upstream. + +The APIC LVTT register is MMIO mapped but the TSC_DEADLINE register is an +MSR. The write to the TSC_DEADLINE MSR is not serializing, so it's not +guaranteed that the write to LVTT has reached the APIC before the +TSC_DEADLINE MSR is written. In such a case the write to the MSR is +ignored and as a consequence the local timer interrupt never fires. + +The SDM decribes this issue for xAPIC and x2APIC modes. The +serialization methods recommended by the SDM differ. + +xAPIC: + "1. Memory-mapped write to LVT Timer Register, setting bits 18:17 to 10b. + 2. WRMSR to the IA32_TSC_DEADLINE MSR a value much larger than current time-stamp counter. + 3. If RDMSR of the IA32_TSC_DEADLINE MSR returns zero, go to step 2. + 4. WRMSR to the IA32_TSC_DEADLINE MSR the desired deadline." + +x2APIC: + "To allow for efficient access to the APIC registers in x2APIC mode, + the serializing semantics of WRMSR are relaxed when writing to the + APIC registers. Thus, system software should not use 'WRMSR to APIC + registers in x2APIC mode' as a serializing instruction. Read and write + accesses to the APIC registers will occur in program order. A WRMSR to + an APIC register may complete before all preceding stores are globally + visible; software can prevent this by inserting a serializing + instruction, an SFENCE, or an MFENCE before the WRMSR." + +The xAPIC method is to just wait for the memory mapped write to hit +the LVTT by checking whether the MSR write has reached the hardware. +There is no reason why a proper MFENCE after the memory mapped write would +not do the same. Andi Kleen confirmed that MFENCE is sufficient for the +xAPIC case as well. + +Issue MFENCE before writing to the TSC_DEADLINE MSR. This can be done +unconditionally as all CPUs which have TSC_DEADLINE also have MFENCE +support. + +[ tglx: Massaged the changelog ] + +Signed-off-by: Shaohua Li +Reviewed-by: Ingo Molnar +Cc: +Cc: +Cc: +Cc: Andi Kleen +Cc: H. Peter Anvin +Link: http://lkml.kernel.org/r/20150909041352.GA2059853@devbig257.prn2.facebook.com +Signed-off-by: Thomas Gleixner +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/apic/apic.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/arch/x86/kernel/apic/apic.c ++++ b/arch/x86/kernel/apic/apic.c +@@ -336,6 +336,13 @@ static void __setup_APIC_LVTT(unsigned i + apic_write(APIC_LVTT, lvtt_value); + + if (lvtt_value & APIC_LVT_TIMER_TSCDEADLINE) { ++ /* ++ * See Intel SDM: TSC-Deadline Mode chapter. In xAPIC mode, ++ * writing to the APIC LVTT and TSC_DEADLINE MSR isn't serialized. ++ * According to Intel, MFENCE can do the serialization here. ++ */ ++ asm volatile("mfence" : : : "memory"); ++ + printk_once(KERN_DEBUG "TSC deadline timer enabled\n"); + return; + } diff --git a/queue-4.2/x86-ioapic-force-affinity-setting-in-setup_ioapic_dest.patch b/queue-4.2/x86-ioapic-force-affinity-setting-in-setup_ioapic_dest.patch new file mode 100644 index 00000000000..12fc81453a1 --- /dev/null +++ b/queue-4.2/x86-ioapic-force-affinity-setting-in-setup_ioapic_dest.patch @@ -0,0 +1,63 @@ +From 4857c91f0d195f05908fff296ba1ec5fca87066c Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Mon, 14 Sep 2015 12:00:55 +0200 +Subject: x86/ioapic: Force affinity setting in setup_ioapic_dest() + +From: Thomas Gleixner + +commit 4857c91f0d195f05908fff296ba1ec5fca87066c upstream. + +The recent ioapic cleanups changed the affinity setting in +setup_ioapic_dest() from a direct write to the hardware to the delayed +affinity setup via irq_set_affinity(). + +That results in a warning from chained_irq_exit(): +WARNING: CPU: 0 PID: 5 at kernel/irq/migration.c:32 irq_move_masked_irq +[] irq_move_masked_irq+0xb8/0xc0 +[] ioapic_ack_level+0x111/0x130 +[] intel_gpio_irq_handler+0x148/0x1c0 + +The reason is that irq_set_affinity() does not write directly to the +hardware. It marks the affinity setting as pending and executes it +from the next interrupt. The chained handler infrastructure does not +take the irq descriptor lock for performance reasons because such a +chained interrupt is not visible to any interfaces. So the delayed +affinity setting triggers the warning in irq_move_masked_irq(). + +Restore the old behaviour by calling the set_affinity function of the +ioapic chip in setup_ioapic_dest(). This is safe as none of the +interrupts can be on the fly at this point. + +Fixes: aa5cb97f14a2 'x86/irq: Remove x86_io_apic_ops.set_affinity and related interfaces' +Reported-and-tested-by: Mika Westerberg +Signed-off-by: Thomas Gleixner +Cc: Jiang Liu +Cc: jarkko.nikula@linux.intel.com +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/apic/io_apic.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/arch/x86/kernel/apic/io_apic.c ++++ b/arch/x86/kernel/apic/io_apic.c +@@ -2522,6 +2522,7 @@ void __init setup_ioapic_dest(void) + int pin, ioapic, irq, irq_entry; + const struct cpumask *mask; + struct irq_data *idata; ++ struct irq_chip *chip; + + if (skip_ioapic_setup == 1) + return; +@@ -2545,9 +2546,9 @@ void __init setup_ioapic_dest(void) + else + mask = apic->target_cpus(); + +- irq_set_affinity(irq, mask); ++ chip = irq_data_get_irq_chip(idata); ++ chip->irq_set_affinity(idata, mask, false); + } +- + } + #endif + diff --git a/queue-4.2/x86-pci-intel_mid_pci-work-around-for-irq0-assignment.patch b/queue-4.2/x86-pci-intel_mid_pci-work-around-for-irq0-assignment.patch new file mode 100644 index 00000000000..d4a1c034fdd --- /dev/null +++ b/queue-4.2/x86-pci-intel_mid_pci-work-around-for-irq0-assignment.patch @@ -0,0 +1,75 @@ +From 39d9b77b8debb4746e189aa5b61ae6e81ec5eab8 Mon Sep 17 00:00:00 2001 +From: Andy Shevchenko +Date: Wed, 29 Jul 2015 12:16:47 +0300 +Subject: x86/pci/intel_mid_pci: Work around for IRQ0 assignment + +From: Andy Shevchenko + +commit 39d9b77b8debb4746e189aa5b61ae6e81ec5eab8 upstream. + +On Intel Tangier the MMC host controller is wired up to irq 0. But +several other devices have irq 0 associated as well due to a bogus PCI +configuration. + +The first initialized driver will acquire irq 0 and make it +unavailable for other devices. If the sdhci driver is not the first +one it will fail to acquire the interrupt and therefor be non +functional. + +Add a quirk to the pci irq enable function which denies irq 0 to +anything else than the MMC host controller driver on Tangier +platforms. + +Fixes: 90b9aacf912a (serial: 8250_pci: add Intel Tangier support) +Signed-off-by: Andy Shevchenko +Cc: Bjorn Helgaas +Link: http://lkml.kernel.org/r/1438161409-4671-2-git-send-email-andriy.shevchenko@linux.intel.com +Signed-off-by: Thomas Gleixner +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/pci/intel_mid_pci.c | 24 ++++++++++++++++++++++-- + 1 file changed, 22 insertions(+), 2 deletions(-) + +--- a/arch/x86/pci/intel_mid_pci.c ++++ b/arch/x86/pci/intel_mid_pci.c +@@ -35,6 +35,9 @@ + + #define PCIE_CAP_OFFSET 0x100 + ++/* Quirks for the listed devices */ ++#define PCI_DEVICE_ID_INTEL_MRFL_MMC 0x1190 ++ + /* Fixed BAR fields */ + #define PCIE_VNDR_CAP_ID_FIXED_BAR 0x00 /* Fixed BAR (TBD) */ + #define PCI_FIXED_BAR_0_SIZE 0x04 +@@ -214,10 +217,27 @@ static int intel_mid_pci_irq_enable(stru + if (dev->irq_managed && dev->irq > 0) + return 0; + +- if (intel_mid_identify_cpu() == INTEL_MID_CPU_CHIP_TANGIER) ++ switch (intel_mid_identify_cpu()) { ++ case INTEL_MID_CPU_CHIP_TANGIER: + polarity = 0; /* active high */ +- else ++ ++ /* Special treatment for IRQ0 */ ++ if (dev->irq == 0) { ++ /* ++ * TNG has IRQ0 assigned to eMMC controller. But there ++ * are also other devices with bogus PCI configuration ++ * that have IRQ0 assigned. This check ensures that ++ * eMMC gets it. ++ */ ++ if (dev->device != PCI_DEVICE_ID_INTEL_MRFL_MMC) ++ return -EBUSY; ++ } ++ break; ++ default: + polarity = 1; /* active low */ ++ break; ++ } ++ + ioapic_set_alloc_attr(&info, dev_to_node(&dev->dev), 1, polarity); + + /* diff --git a/queue-4.2/x86-platform-fix-geode-lx-timekeeping-in-the-generic-x86-build.patch b/queue-4.2/x86-platform-fix-geode-lx-timekeeping-in-the-generic-x86-build.patch new file mode 100644 index 00000000000..430c1abc0fe --- /dev/null +++ b/queue-4.2/x86-platform-fix-geode-lx-timekeeping-in-the-generic-x86-build.patch @@ -0,0 +1,74 @@ +From 03da3ff1cfcd7774c8780d2547ba0d995f7dc03d Mon Sep 17 00:00:00 2001 +From: David Woodhouse +Date: Wed, 16 Sep 2015 14:10:03 +0100 +Subject: x86/platform: Fix Geode LX timekeeping in the generic x86 build + +From: David Woodhouse + +commit 03da3ff1cfcd7774c8780d2547ba0d995f7dc03d upstream. + +In 2007, commit 07190a08eef36 ("Mark TSC on GeodeLX reliable") +bypassed verification of the TSC on Geode LX. However, this code +(now in the check_system_tsc_reliable() function in +arch/x86/kernel/tsc.c) was only present if CONFIG_MGEODE_LX was +set. + +OpenWRT has recently started building its generic Geode target +for Geode GX, not LX, to include support for additional +platforms. This broke the timekeeping on LX-based devices, +because the TSC wasn't marked as reliable: +https://dev.openwrt.org/ticket/20531 + +By adding a runtime check on is_geode_lx(), we can also include +the fix if CONFIG_MGEODEGX1 or CONFIG_X86_GENERIC are set, thus +fixing the problem. + +Signed-off-by: David Woodhouse +Cc: Andres Salomon +Cc: Linus Torvalds +Cc: Marcelo Tosatti +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Link: http://lkml.kernel.org/r/1442409003.131189.87.camel@infradead.org +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/tsc.c | 17 ++++++++++------- + 1 file changed, 10 insertions(+), 7 deletions(-) + +--- a/arch/x86/kernel/tsc.c ++++ b/arch/x86/kernel/tsc.c +@@ -21,6 +21,7 @@ + #include + #include + #include ++#include + + unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */ + EXPORT_SYMBOL(cpu_khz); +@@ -1013,15 +1014,17 @@ EXPORT_SYMBOL_GPL(mark_tsc_unstable); + + static void __init check_system_tsc_reliable(void) + { +-#ifdef CONFIG_MGEODE_LX +- /* RTSC counts during suspend */ ++#if defined(CONFIG_MGEODEGX1) || defined(CONFIG_MGEODE_LX) || defined(CONFIG_X86_GENERIC) ++ if (is_geode_lx()) { ++ /* RTSC counts during suspend */ + #define RTSC_SUSP 0x100 +- unsigned long res_low, res_high; ++ unsigned long res_low, res_high; + +- rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high); +- /* Geode_LX - the OLPC CPU has a very reliable TSC */ +- if (res_low & RTSC_SUSP) +- tsc_clocksource_reliable = 1; ++ rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high); ++ /* Geode_LX - the OLPC CPU has a very reliable TSC */ ++ if (res_low & RTSC_SUSP) ++ tsc_clocksource_reliable = 1; ++ } + #endif + if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) + tsc_clocksource_reliable = 1;