--- /dev/null
+From 6fe810bda0bd9a5d7674fc671fac27b8aa8ec243 Mon Sep 17 00:00:00 2001
+From: Tejun Heo <tj@kernel.org>
+Date: Sat, 5 Sep 2015 15:47:36 -0400
+Subject: block: blkg_destroy_all() should clear q->root_blkg and ->root_rl.blkg
+
+From: Tejun Heo <tj@kernel.org>
+
+commit 6fe810bda0bd9a5d7674fc671fac27b8aa8ec243 upstream.
+
+While making the root blkg unconditional, ec13b1d6f0a0 ("blkcg: always
+create the blkcg_gq for the root blkcg") removed the part which clears
+q->root_blkg and ->root_rl.blkg during q exit. This leaves the two
+pointers dangling after blkg_destroy_all(). blk-throttle exit path
+performs blkg traversals and dereferences ->root_blkg and can lead to
+the following oops.
+
+ BUG: unable to handle kernel NULL pointer dereference at 0000000000000558
+ IP: [<ffffffff81389746>] __blkg_lookup+0x26/0x70
+ ...
+ task: ffff88001b4e2580 ti: ffff88001ac0c000 task.ti: ffff88001ac0c000
+ RIP: 0010:[<ffffffff81389746>] [<ffffffff81389746>] __blkg_lookup+0x26/0x70
+ ...
+ Call Trace:
+ [<ffffffff8138d14a>] blk_throtl_drain+0x5a/0x110
+ [<ffffffff8138a108>] blkcg_drain_queue+0x18/0x20
+ [<ffffffff81369a70>] __blk_drain_queue+0xc0/0x170
+ [<ffffffff8136a101>] blk_queue_bypass_start+0x61/0x80
+ [<ffffffff81388c59>] blkcg_deactivate_policy+0x39/0x100
+ [<ffffffff8138d328>] blk_throtl_exit+0x38/0x50
+ [<ffffffff8138a14e>] blkcg_exit_queue+0x3e/0x50
+ [<ffffffff8137016e>] blk_release_queue+0x1e/0xc0
+ ...
+
+While the bug is a straigh-forward use-after-free bug, it is tricky to
+reproduce because blkg release is RCU protected and the rest of exit
+path usually finishes before RCU grace period.
+
+This patch fixes the bug by updating blkg_destro_all() to clear
+q->root_blkg and ->root_rl.blkg.
+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Reported-by: "Richard W.M. Jones" <rjones@redhat.com>
+Reported-by: Josh Boyer <jwboyer@fedoraproject.org>
+Link: http://lkml.kernel.org/g/CA+5PVA5rzQ0s4723n5rHBcxQa9t0cW8BPPBekr_9aMRoWt2aYg@mail.gmail.com
+Fixes: ec13b1d6f0a0 ("blkcg: always create the blkcg_gq for the root blkcg")
+Tested-by: Richard W.M. Jones <rjones@redhat.com>
+Signed-off-by: Jens Axboe <axboe@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ block/blk-cgroup.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/block/blk-cgroup.c
++++ b/block/blk-cgroup.c
+@@ -387,6 +387,9 @@ static void blkg_destroy_all(struct requ
+ blkg_destroy(blkg);
+ spin_unlock(&blkcg->lock);
+ }
++
++ q->root_blkg = NULL;
++ q->root_rl.blkg = NULL;
+ }
+
+ /*
--- /dev/null
+From f0b2e563bc419df7c1b3d2f494574c25125f6aed Mon Sep 17 00:00:00 2001
+From: Jeff Moyer <jmoyer@redhat.com>
+Date: Fri, 14 Aug 2015 16:15:32 -0400
+Subject: blockdev: don't set S_DAX for misaligned partitions
+
+From: Jeff Moyer <jmoyer@redhat.com>
+
+commit f0b2e563bc419df7c1b3d2f494574c25125f6aed upstream.
+
+The dax code doesn't currently support misaligned partitions,
+so disable O_DIRECT via dax until such time as that support
+materializes.
+
+Suggested-by: Boaz Harrosh <boaz@plexistor.com>
+Signed-off-by: Jeff Moyer <jmoyer@redhat.com>
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/block_dev.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/fs/block_dev.c
++++ b/fs/block_dev.c
+@@ -1241,6 +1241,13 @@ static int __blkdev_get(struct block_dev
+ goto out_clear;
+ }
+ bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9);
++ /*
++ * If the partition is not aligned on a page
++ * boundary, we can't do dax I/O to it.
++ */
++ if ((bdev->bd_part->start_sect % (PAGE_SIZE / 512)) ||
++ (bdev->bd_part->nr_sects % (PAGE_SIZE / 512)))
++ bdev->bd_inode->i_flags &= ~S_DAX;
+ }
+ } else {
+ if (bdev->bd_contains == bdev) {
--- /dev/null
+From e94f5a2285fc94202a9efb2c687481f29b64132c Mon Sep 17 00:00:00 2001
+From: Jeff Moyer <jmoyer@redhat.com>
+Date: Fri, 14 Aug 2015 16:15:31 -0400
+Subject: dax: fix O_DIRECT I/O to the last block of a blockdev
+
+From: Jeff Moyer <jmoyer@redhat.com>
+
+commit e94f5a2285fc94202a9efb2c687481f29b64132c upstream.
+
+commit bbab37ddc20b (block: Add support for DAX reads/writes to
+block devices) caused a regression in mkfs.xfs. That utility
+sets the block size of the device to the logical block size
+using the BLKBSZSET ioctl, and then issues a single sector read
+from the last sector of the device. This results in the dax_io
+code trying to do a page-sized read from 512 bytes from the end
+of the device. The result is -ERANGE being returned to userspace.
+
+The fix is to align the block to the page size before calling
+get_block.
+
+Thanks to willy for simplifying my original patch.
+
+Signed-off-by: Jeff Moyer <jmoyer@redhat.com>
+Tested-by: Linda Knippers <linda.knippers@hp.com>
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/dax.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/fs/dax.c
++++ b/fs/dax.c
+@@ -116,7 +116,8 @@ static ssize_t dax_io(struct inode *inod
+ unsigned len;
+ if (pos == max) {
+ unsigned blkbits = inode->i_blkbits;
+- sector_t block = pos >> blkbits;
++ long page = pos >> PAGE_SHIFT;
++ sector_t block = page << (PAGE_SHIFT - blkbits);
+ unsigned first = pos - (block << blkbits);
+ long size;
+
--- /dev/null
+From a1cf09031e641d3cceaca4a4dd20ef6a785bc9b3 Mon Sep 17 00:00:00 2001
+From: Maxime Ripard <maxime.ripard@free-electrons.com>
+Date: Tue, 15 Sep 2015 15:36:00 +0200
+Subject: dmaengine: at_xdmac: change block increment addressing mode
+
+From: Maxime Ripard <maxime.ripard@free-electrons.com>
+
+commit a1cf09031e641d3cceaca4a4dd20ef6a785bc9b3 upstream.
+
+The addressing mode we were using was not only incrementing the address at
+each microblock, but also at each data boundary, which was severely slowing
+the transfer, without any benefit since we were not using the data stride.
+
+Switch to the micro block increment only in order to get back to an
+acceptable performance level.
+
+Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
+Signed-off-by: Ludovic Desroches <ludovic.desroches@atmel.com>
+Fixes: 6007ccb57744 ("dmaengine: xdmac: Add interleaved transfer support")
+Signed-off-by: Vinod Koul <vinod.koul@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/dma/at_xdmac.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/dma/at_xdmac.c
++++ b/drivers/dma/at_xdmac.c
+@@ -878,14 +878,14 @@ at_xdmac_interleaved_queue_desc(struct d
+
+ if (xt->src_inc) {
+ if (xt->src_sgl)
+- chan_cc |= AT_XDMAC_CC_SAM_UBS_DS_AM;
++ chan_cc |= AT_XDMAC_CC_SAM_UBS_AM;
+ else
+ chan_cc |= AT_XDMAC_CC_SAM_INCREMENTED_AM;
+ }
+
+ if (xt->dst_inc) {
+ if (xt->dst_sgl)
+- chan_cc |= AT_XDMAC_CC_DAM_UBS_DS_AM;
++ chan_cc |= AT_XDMAC_CC_DAM_UBS_AM;
+ else
+ chan_cc |= AT_XDMAC_CC_DAM_INCREMENTED_AM;
+ }
--- /dev/null
+From 0be2136b67067617b36c70e525d7534108361e36 Mon Sep 17 00:00:00 2001
+From: Ludovic Desroches <ludovic.desroches@atmel.com>
+Date: Tue, 15 Sep 2015 15:39:11 +0200
+Subject: dmaengine: at_xdmac: clean used descriptor
+
+From: Ludovic Desroches <ludovic.desroches@atmel.com>
+
+commit 0be2136b67067617b36c70e525d7534108361e36 upstream.
+
+When putting back a descriptor to the free descs list, some fields are
+not set to 0, it can cause bugs if someone uses it without having this
+in mind.
+Descriptor are not put back one by one so it is easier to clean
+descriptors when we request them.
+
+Signed-off-by: Ludovic Desroches <ludovic.desroches@atmel.com>
+Signed-off-by: Vinod Koul <vinod.koul@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/dma/at_xdmac.c | 11 ++++++++++-
+ 1 file changed, 10 insertions(+), 1 deletion(-)
+
+--- a/drivers/dma/at_xdmac.c
++++ b/drivers/dma/at_xdmac.c
+@@ -455,6 +455,15 @@ static struct at_xdmac_desc *at_xdmac_al
+ return desc;
+ }
+
++void at_xdmac_init_used_desc(struct at_xdmac_desc *desc)
++{
++ memset(&desc->lld, 0, sizeof(desc->lld));
++ INIT_LIST_HEAD(&desc->descs_list);
++ desc->direction = DMA_TRANS_NONE;
++ desc->xfer_size = 0;
++ desc->active_xfer = false;
++}
++
+ /* Call must be protected by lock. */
+ static struct at_xdmac_desc *at_xdmac_get_desc(struct at_xdmac_chan *atchan)
+ {
+@@ -466,7 +475,7 @@ static struct at_xdmac_desc *at_xdmac_ge
+ desc = list_first_entry(&atchan->free_descs_list,
+ struct at_xdmac_desc, desc_node);
+ list_del(&desc->desc_node);
+- desc->active_xfer = false;
++ at_xdmac_init_used_desc(desc);
+ }
+
+ return desc;
--- /dev/null
+From e900c30dc1bb0cbc07708e9be1188f531632b2ef Mon Sep 17 00:00:00 2001
+From: Ludovic Desroches <ludovic.desroches@atmel.com>
+Date: Wed, 22 Jul 2015 16:12:29 +0200
+Subject: dmaengine: at_xdmac: fix bug in prep_dma_cyclic
+
+From: Ludovic Desroches <ludovic.desroches@atmel.com>
+
+commit e900c30dc1bb0cbc07708e9be1188f531632b2ef upstream.
+
+In cyclic mode, the round chaining has been broken by the introduction
+of at_xdmac_queue_desc(): AT_XDMAC_MBR_UBC_NDE is set for all descriptors
+excepted for the last one. at_xdmac_queue_desc() has to be called one
+more time to chain the last and the first descriptors.
+
+Signed-off-by: Ludovic Desroches <ludovic.desroches@atmel.com>
+Fixes: 0d0ee751f7f7 ("dmaengine: xdmac: Rework the chaining logic")
+Signed-off-by: Vinod Koul <vinod.koul@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/dma/at_xdmac.c | 5 +----
+ 1 file changed, 1 insertion(+), 4 deletions(-)
+
+--- a/drivers/dma/at_xdmac.c
++++ b/drivers/dma/at_xdmac.c
+@@ -806,10 +806,7 @@ at_xdmac_prep_dma_cyclic(struct dma_chan
+ list_add_tail(&desc->desc_node, &first->descs_list);
+ }
+
+- prev->lld.mbr_nda = first->tx_dma_desc.phys;
+- dev_dbg(chan2dev(chan),
+- "%s: chain lld: prev=0x%p, mbr_nda=%pad\n",
+- __func__, prev, &prev->lld.mbr_nda);
++ at_xdmac_queue_desc(chan, prev, first);
+ first->tx_dma_desc.flags = flags;
+ first->xfer_size = buf_len;
+ first->direction = direction;
--- /dev/null
+From 6bea0f6d1c47b07be88dfd93f013ae05fcb3d8bf Mon Sep 17 00:00:00 2001
+From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Date: Mon, 28 Sep 2015 18:57:03 +0300
+Subject: dmaengine: dw: properly read DWC_PARAMS register
+
+From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+
+commit 6bea0f6d1c47b07be88dfd93f013ae05fcb3d8bf upstream.
+
+In case we have less than maximum allowed channels (8) and autoconfiguration is
+enabled the DWC_PARAMS read is wrong because it uses different arithmetic to
+what is needed for channel priority setup.
+
+Re-do the caclulations properly. This now works on AVR32 board well.
+
+Fixes: fed2574b3c9f (dw_dmac: introduce software emulation of LLP transfers)
+Cc: yitian.bu@tangramtek.com
+Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Signed-off-by: Vinod Koul <vinod.koul@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/dma/dw/core.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/dma/dw/core.c
++++ b/drivers/dma/dw/core.c
+@@ -1591,7 +1591,6 @@ int dw_dma_probe(struct dw_dma_chip *chi
+ INIT_LIST_HEAD(&dw->dma.channels);
+ for (i = 0; i < nr_channels; i++) {
+ struct dw_dma_chan *dwc = &dw->chan[i];
+- int r = nr_channels - i - 1;
+
+ dwc->chan.device = &dw->dma;
+ dma_cookie_init(&dwc->chan);
+@@ -1603,7 +1602,7 @@ int dw_dma_probe(struct dw_dma_chip *chi
+
+ /* 7 is highest priority & 0 is lowest. */
+ if (pdata->chan_priority == CHAN_PRIORITY_ASCENDING)
+- dwc->priority = r;
++ dwc->priority = nr_channels - i - 1;
+ else
+ dwc->priority = i;
+
+@@ -1622,6 +1621,7 @@ int dw_dma_probe(struct dw_dma_chip *chi
+ /* Hardware configuration */
+ if (autocfg) {
+ unsigned int dwc_params;
++ unsigned int r = DW_DMA_MAX_NR_CHANNELS - i - 1;
+ void __iomem *addr = chip->regs + r * sizeof(u32);
+
+ dwc_params = dma_read_byaddr(addr, DWC_PARAMS);
--- /dev/null
+From aebf5a67db8dbacbc624b9c652b81f5460b15eff Mon Sep 17 00:00:00 2001
+From: Robert Jarzmik <robert.jarzmik@free.fr>
+Date: Mon, 21 Sep 2015 11:06:32 +0200
+Subject: dmaengine: pxa_dma: fix initial list move
+
+From: Robert Jarzmik <robert.jarzmik@free.fr>
+
+commit aebf5a67db8dbacbc624b9c652b81f5460b15eff upstream.
+
+Since the commit to have an allocated list of virtual descriptors was
+reverted, the pxa_dma driver is broken, as it assumes the descriptor is
+placed on the allocated list upon allocation.
+
+Fix the issue in pxa_dma by making an allocated virtual descriptor a
+singleton.
+
+Fixes: 8c8fe97b2b8a ("Revert "dmaengine: virt-dma: don't always free descriptor upon completion"")
+Signed-off-by: Robert Jarzmik <robert.jarzmik@free.fr>
+Signed-off-by: Vinod Koul <vinod.koul@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/dma/pxa_dma.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/dma/pxa_dma.c
++++ b/drivers/dma/pxa_dma.c
+@@ -888,6 +888,7 @@ pxad_tx_prep(struct virt_dma_chan *vc, s
+ struct dma_async_tx_descriptor *tx;
+ struct pxad_chan *chan = container_of(vc, struct pxad_chan, vc);
+
++ INIT_LIST_HEAD(&vd->node);
+ tx = vchan_tx_prep(vc, vd, tx_flags);
+ tx->tx_submit = pxad_tx_submit;
+ dev_dbg(&chan->vc.chan.dev->device,
--- /dev/null
+From ba8fe0f85e15d047686caf8a42463b592c63c98c Mon Sep 17 00:00:00 2001
+From: Ross Zwisler <ross.zwisler@linux.intel.com>
+Date: Wed, 16 Sep 2015 14:52:21 -0600
+Subject: pmem: add proper fencing to pmem_rw_page()
+
+From: Ross Zwisler <ross.zwisler@linux.intel.com>
+
+commit ba8fe0f85e15d047686caf8a42463b592c63c98c upstream.
+
+pmem_rw_page() needs to call wmb_pmem() on writes to make sure that the
+newly written data is durable. This flow was added to pmem_rw_bytes()
+and pmem_make_request() with this commit:
+
+commit 61031952f4c8 ("arch, x86: pmem api for ensuring durability of
+ persistent memory updates")
+
+...the pmem_rw_page() path was missed.
+
+Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/nvdimm/pmem.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/nvdimm/pmem.c
++++ b/drivers/nvdimm/pmem.c
+@@ -86,6 +86,8 @@ static int pmem_rw_page(struct block_dev
+ struct pmem_device *pmem = bdev->bd_disk->private_data;
+
+ pmem_do_bvec(pmem, page, PAGE_CACHE_SIZE, 0, rw, sector);
++ if (rw & WRITE)
++ wmb_pmem();
+ page_endio(page, rw & WRITE, 0);
+
+ return 0;
arm-dts-sunxi-raise-minimum-cpu-voltage-for-sun7i-a20-to-meet-soc-specifications.patch
arm-dts-fix-wrong-clock-binding-for-sysmmu_fimd1_1-on-exynos5420.patch
arm-dts-fix-usb-pin-control-for-imx-rex-dts.patch
+dax-fix-o_direct-i-o-to-the-last-block-of-a-blockdev.patch
+blockdev-don-t-set-s_dax-for-misaligned-partitions.patch
+block-blkg_destroy_all-should-clear-q-root_blkg-and-root_rl.blkg.patch
+dmaengine-at_xdmac-change-block-increment-addressing-mode.patch
+dmaengine-at_xdmac-clean-used-descriptor.patch
+dmaengine-dw-properly-read-dwc_params-register.patch
+dmaengine-at_xdmac-fix-bug-in-prep_dma_cyclic.patch
+dmaengine-pxa_dma-fix-initial-list-move.patch
+pmem-add-proper-fencing-to-pmem_rw_page.patch
+x86-apic-serialize-lvtt-and-tsc_deadline-writes.patch
+x86-alternatives-make-optimize_nops-interrupt-safe-and-synced.patch
+x86-platform-fix-geode-lx-timekeeping-in-the-generic-x86-build.patch
+x86-ioapic-force-affinity-setting-in-setup_ioapic_dest.patch
+x86-pci-intel_mid_pci-work-around-for-irq0-assignment.patch
--- /dev/null
+From 66c117d7fa2ae429911e60d84bf31a90b2b96189 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 3 Sep 2015 12:34:55 +0200
+Subject: x86/alternatives: Make optimize_nops() interrupt safe and synced
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 66c117d7fa2ae429911e60d84bf31a90b2b96189 upstream.
+
+Richard reported the following crash:
+
+[ 0.036000] BUG: unable to handle kernel paging request at 55501e06
+[ 0.036000] IP: [<c0aae48b>] common_interrupt+0xb/0x38
+[ 0.036000] Call Trace:
+[ 0.036000] [<c0409c80>] ? add_nops+0x90/0xa0
+[ 0.036000] [<c040a054>] apply_alternatives+0x274/0x630
+
+Chuck decoded:
+
+ " 0: 8d 90 90 83 04 24 lea 0x24048390(%eax),%edx
+ 6: 80 fc 0f cmp $0xf,%ah
+ 9: a8 0f test $0xf,%al
+ >> b: a0 06 1e 50 55 mov 0x55501e06,%al
+ 10: 57 push %edi
+ 11: 56 push %esi
+
+ Interrupt 0x30 occurred while the alternatives code was replacing the
+ initial 0x90,0x90,0x90 NOPs (from the ASM_CLAC macro) with the
+ optimized version, 0x8d,0x76,0x00. Only the first byte has been
+ replaced so far, and it makes a mess out of the insn decoding."
+
+optimize_nops() is buggy in two aspects:
+
+- It's not disabling interrupts across the modification
+- It's lacking a sync_core() call
+
+Add both.
+
+Fixes: 4fd4b6e5537c 'x86/alternatives: Use optimized NOPs for padding'
+Reported-and-tested-by: "Richard W.M. Jones" <rjones@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Richard W.M. Jones <rjones@redhat.com>
+Cc: Chuck Ebbert <cebbert.lkml@gmail.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1509031232340.15006@nanos
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/alternative.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/arch/x86/kernel/alternative.c
++++ b/arch/x86/kernel/alternative.c
+@@ -338,10 +338,15 @@ done:
+
+ static void __init_or_module optimize_nops(struct alt_instr *a, u8 *instr)
+ {
++ unsigned long flags;
++
+ if (instr[0] != 0x90)
+ return;
+
++ local_irq_save(flags);
+ add_nops(instr + (a->instrlen - a->padlen), a->padlen);
++ sync_core();
++ local_irq_restore(flags);
+
+ DUMP_BYTES(instr, a->instrlen, "%p: [%d:%d) optimized NOPs: ",
+ instr, a->instrlen - a->padlen, a->padlen);
--- /dev/null
+From 5d7c631d926b59aa16f3c56eaeb83f1036c81dc7 Mon Sep 17 00:00:00 2001
+From: Shaohua Li <shli@fb.com>
+Date: Thu, 30 Jul 2015 16:24:43 -0700
+Subject: x86/apic: Serialize LVTT and TSC_DEADLINE writes
+
+From: Shaohua Li <shli@fb.com>
+
+commit 5d7c631d926b59aa16f3c56eaeb83f1036c81dc7 upstream.
+
+The APIC LVTT register is MMIO mapped but the TSC_DEADLINE register is an
+MSR. The write to the TSC_DEADLINE MSR is not serializing, so it's not
+guaranteed that the write to LVTT has reached the APIC before the
+TSC_DEADLINE MSR is written. In such a case the write to the MSR is
+ignored and as a consequence the local timer interrupt never fires.
+
+The SDM decribes this issue for xAPIC and x2APIC modes. The
+serialization methods recommended by the SDM differ.
+
+xAPIC:
+ "1. Memory-mapped write to LVT Timer Register, setting bits 18:17 to 10b.
+ 2. WRMSR to the IA32_TSC_DEADLINE MSR a value much larger than current time-stamp counter.
+ 3. If RDMSR of the IA32_TSC_DEADLINE MSR returns zero, go to step 2.
+ 4. WRMSR to the IA32_TSC_DEADLINE MSR the desired deadline."
+
+x2APIC:
+ "To allow for efficient access to the APIC registers in x2APIC mode,
+ the serializing semantics of WRMSR are relaxed when writing to the
+ APIC registers. Thus, system software should not use 'WRMSR to APIC
+ registers in x2APIC mode' as a serializing instruction. Read and write
+ accesses to the APIC registers will occur in program order. A WRMSR to
+ an APIC register may complete before all preceding stores are globally
+ visible; software can prevent this by inserting a serializing
+ instruction, an SFENCE, or an MFENCE before the WRMSR."
+
+The xAPIC method is to just wait for the memory mapped write to hit
+the LVTT by checking whether the MSR write has reached the hardware.
+There is no reason why a proper MFENCE after the memory mapped write would
+not do the same. Andi Kleen confirmed that MFENCE is sufficient for the
+xAPIC case as well.
+
+Issue MFENCE before writing to the TSC_DEADLINE MSR. This can be done
+unconditionally as all CPUs which have TSC_DEADLINE also have MFENCE
+support.
+
+[ tglx: Massaged the changelog ]
+
+Signed-off-by: Shaohua Li <shli@fb.com>
+Reviewed-by: Ingo Molnar <mingo@kernel.org>
+Cc: <Kernel-team@fb.com>
+Cc: <lenb@kernel.org>
+Cc: <fenghua.yu@intel.com>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Link: http://lkml.kernel.org/r/20150909041352.GA2059853@devbig257.prn2.facebook.com
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/apic/apic.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/arch/x86/kernel/apic/apic.c
++++ b/arch/x86/kernel/apic/apic.c
+@@ -336,6 +336,13 @@ static void __setup_APIC_LVTT(unsigned i
+ apic_write(APIC_LVTT, lvtt_value);
+
+ if (lvtt_value & APIC_LVT_TIMER_TSCDEADLINE) {
++ /*
++ * See Intel SDM: TSC-Deadline Mode chapter. In xAPIC mode,
++ * writing to the APIC LVTT and TSC_DEADLINE MSR isn't serialized.
++ * According to Intel, MFENCE can do the serialization here.
++ */
++ asm volatile("mfence" : : : "memory");
++
+ printk_once(KERN_DEBUG "TSC deadline timer enabled\n");
+ return;
+ }
--- /dev/null
+From 4857c91f0d195f05908fff296ba1ec5fca87066c Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Mon, 14 Sep 2015 12:00:55 +0200
+Subject: x86/ioapic: Force affinity setting in setup_ioapic_dest()
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 4857c91f0d195f05908fff296ba1ec5fca87066c upstream.
+
+The recent ioapic cleanups changed the affinity setting in
+setup_ioapic_dest() from a direct write to the hardware to the delayed
+affinity setup via irq_set_affinity().
+
+That results in a warning from chained_irq_exit():
+WARNING: CPU: 0 PID: 5 at kernel/irq/migration.c:32 irq_move_masked_irq
+[<ffffffff810a0a88>] irq_move_masked_irq+0xb8/0xc0
+[<ffffffff8103c161>] ioapic_ack_level+0x111/0x130
+[<ffffffff812bbfe8>] intel_gpio_irq_handler+0x148/0x1c0
+
+The reason is that irq_set_affinity() does not write directly to the
+hardware. It marks the affinity setting as pending and executes it
+from the next interrupt. The chained handler infrastructure does not
+take the irq descriptor lock for performance reasons because such a
+chained interrupt is not visible to any interfaces. So the delayed
+affinity setting triggers the warning in irq_move_masked_irq().
+
+Restore the old behaviour by calling the set_affinity function of the
+ioapic chip in setup_ioapic_dest(). This is safe as none of the
+interrupts can be on the fly at this point.
+
+Fixes: aa5cb97f14a2 'x86/irq: Remove x86_io_apic_ops.set_affinity and related interfaces'
+Reported-and-tested-by: Mika Westerberg <mika.westerberg@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Jiang Liu <jiang.liu@linux.intel.com>
+Cc: jarkko.nikula@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/apic/io_apic.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kernel/apic/io_apic.c
++++ b/arch/x86/kernel/apic/io_apic.c
+@@ -2522,6 +2522,7 @@ void __init setup_ioapic_dest(void)
+ int pin, ioapic, irq, irq_entry;
+ const struct cpumask *mask;
+ struct irq_data *idata;
++ struct irq_chip *chip;
+
+ if (skip_ioapic_setup == 1)
+ return;
+@@ -2545,9 +2546,9 @@ void __init setup_ioapic_dest(void)
+ else
+ mask = apic->target_cpus();
+
+- irq_set_affinity(irq, mask);
++ chip = irq_data_get_irq_chip(idata);
++ chip->irq_set_affinity(idata, mask, false);
+ }
+-
+ }
+ #endif
+
--- /dev/null
+From 39d9b77b8debb4746e189aa5b61ae6e81ec5eab8 Mon Sep 17 00:00:00 2001
+From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Date: Wed, 29 Jul 2015 12:16:47 +0300
+Subject: x86/pci/intel_mid_pci: Work around for IRQ0 assignment
+
+From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+
+commit 39d9b77b8debb4746e189aa5b61ae6e81ec5eab8 upstream.
+
+On Intel Tangier the MMC host controller is wired up to irq 0. But
+several other devices have irq 0 associated as well due to a bogus PCI
+configuration.
+
+The first initialized driver will acquire irq 0 and make it
+unavailable for other devices. If the sdhci driver is not the first
+one it will fail to acquire the interrupt and therefor be non
+functional.
+
+Add a quirk to the pci irq enable function which denies irq 0 to
+anything else than the MMC host controller driver on Tangier
+platforms.
+
+Fixes: 90b9aacf912a (serial: 8250_pci: add Intel Tangier support)
+Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Cc: Bjorn Helgaas <bhelgaas@google.com>
+Link: http://lkml.kernel.org/r/1438161409-4671-2-git-send-email-andriy.shevchenko@linux.intel.com
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/pci/intel_mid_pci.c | 24 ++++++++++++++++++++++--
+ 1 file changed, 22 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/pci/intel_mid_pci.c
++++ b/arch/x86/pci/intel_mid_pci.c
+@@ -35,6 +35,9 @@
+
+ #define PCIE_CAP_OFFSET 0x100
+
++/* Quirks for the listed devices */
++#define PCI_DEVICE_ID_INTEL_MRFL_MMC 0x1190
++
+ /* Fixed BAR fields */
+ #define PCIE_VNDR_CAP_ID_FIXED_BAR 0x00 /* Fixed BAR (TBD) */
+ #define PCI_FIXED_BAR_0_SIZE 0x04
+@@ -214,10 +217,27 @@ static int intel_mid_pci_irq_enable(stru
+ if (dev->irq_managed && dev->irq > 0)
+ return 0;
+
+- if (intel_mid_identify_cpu() == INTEL_MID_CPU_CHIP_TANGIER)
++ switch (intel_mid_identify_cpu()) {
++ case INTEL_MID_CPU_CHIP_TANGIER:
+ polarity = 0; /* active high */
+- else
++
++ /* Special treatment for IRQ0 */
++ if (dev->irq == 0) {
++ /*
++ * TNG has IRQ0 assigned to eMMC controller. But there
++ * are also other devices with bogus PCI configuration
++ * that have IRQ0 assigned. This check ensures that
++ * eMMC gets it.
++ */
++ if (dev->device != PCI_DEVICE_ID_INTEL_MRFL_MMC)
++ return -EBUSY;
++ }
++ break;
++ default:
+ polarity = 1; /* active low */
++ break;
++ }
++
+ ioapic_set_alloc_attr(&info, dev_to_node(&dev->dev), 1, polarity);
+
+ /*
--- /dev/null
+From 03da3ff1cfcd7774c8780d2547ba0d995f7dc03d Mon Sep 17 00:00:00 2001
+From: David Woodhouse <dwmw2@infradead.org>
+Date: Wed, 16 Sep 2015 14:10:03 +0100
+Subject: x86/platform: Fix Geode LX timekeeping in the generic x86 build
+
+From: David Woodhouse <dwmw2@infradead.org>
+
+commit 03da3ff1cfcd7774c8780d2547ba0d995f7dc03d upstream.
+
+In 2007, commit 07190a08eef36 ("Mark TSC on GeodeLX reliable")
+bypassed verification of the TSC on Geode LX. However, this code
+(now in the check_system_tsc_reliable() function in
+arch/x86/kernel/tsc.c) was only present if CONFIG_MGEODE_LX was
+set.
+
+OpenWRT has recently started building its generic Geode target
+for Geode GX, not LX, to include support for additional
+platforms. This broke the timekeeping on LX-based devices,
+because the TSC wasn't marked as reliable:
+https://dev.openwrt.org/ticket/20531
+
+By adding a runtime check on is_geode_lx(), we can also include
+the fix if CONFIG_MGEODEGX1 or CONFIG_X86_GENERIC are set, thus
+fixing the problem.
+
+Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
+Cc: Andres Salomon <dilinger@queued.net>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Marcelo Tosatti <marcelo@kvack.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/1442409003.131189.87.camel@infradead.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/tsc.c | 17 ++++++++++-------
+ 1 file changed, 10 insertions(+), 7 deletions(-)
+
+--- a/arch/x86/kernel/tsc.c
++++ b/arch/x86/kernel/tsc.c
+@@ -21,6 +21,7 @@
+ #include <asm/hypervisor.h>
+ #include <asm/nmi.h>
+ #include <asm/x86_init.h>
++#include <asm/geode.h>
+
+ unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */
+ EXPORT_SYMBOL(cpu_khz);
+@@ -1013,15 +1014,17 @@ EXPORT_SYMBOL_GPL(mark_tsc_unstable);
+
+ static void __init check_system_tsc_reliable(void)
+ {
+-#ifdef CONFIG_MGEODE_LX
+- /* RTSC counts during suspend */
++#if defined(CONFIG_MGEODEGX1) || defined(CONFIG_MGEODE_LX) || defined(CONFIG_X86_GENERIC)
++ if (is_geode_lx()) {
++ /* RTSC counts during suspend */
+ #define RTSC_SUSP 0x100
+- unsigned long res_low, res_high;
++ unsigned long res_low, res_high;
+
+- rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high);
+- /* Geode_LX - the OLPC CPU has a very reliable TSC */
+- if (res_low & RTSC_SUSP)
+- tsc_clocksource_reliable = 1;
++ rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high);
++ /* Geode_LX - the OLPC CPU has a very reliable TSC */
++ if (res_low & RTSC_SUSP)
++ tsc_clocksource_reliable = 1;
++ }
+ #endif
+ if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE))
+ tsc_clocksource_reliable = 1;