--- /dev/null
+From 2ff396be602f10b5eab8e73b24f20348fa2de159 Mon Sep 17 00:00:00 2001
+From: Jeff Moyer <jmoyer@redhat.com>
+Date: Tue, 2 Sep 2014 13:17:00 -0400
+Subject: aio: add missing smp_rmb() in read_events_ring
+
+From: Jeff Moyer <jmoyer@redhat.com>
+
+commit 2ff396be602f10b5eab8e73b24f20348fa2de159 upstream.
+
+We ran into a case on ppc64 running mariadb where io_getevents would
+return zeroed out I/O events. After adding instrumentation, it became
+clear that there was some missing synchronization between reading the
+tail pointer and the events themselves. This small patch fixes the
+problem in testing.
+
+Thanks to Zach for helping to look into this, and suggesting the fix.
+
+Signed-off-by: Jeff Moyer <jmoyer@redhat.com>
+Signed-off-by: Benjamin LaHaise <bcrl@kvack.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/aio.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/fs/aio.c
++++ b/fs/aio.c
+@@ -1134,6 +1134,12 @@ static long aio_read_events_ring(struct
+ tail = ring->tail;
+ kunmap_atomic(ring);
+
++ /*
++ * Ensure that once we've read the current tail pointer, that
++ * we also see the events that were stored up to the tail.
++ */
++ smp_rmb();
++
+ pr_debug("h%u t%u m%u\n", head, tail, ctx->nr_events);
+
+ if (head == tail)
--- /dev/null
+From d856f32a86b2b015ab180ab7a55e455ed8d3ccc5 Mon Sep 17 00:00:00 2001
+From: Benjamin LaHaise <bcrl@kvack.org>
+Date: Sun, 24 Aug 2014 13:14:05 -0400
+Subject: aio: fix reqs_available handling
+
+From: Benjamin LaHaise <bcrl@kvack.org>
+
+commit d856f32a86b2b015ab180ab7a55e455ed8d3ccc5 upstream.
+
+As reported by Dan Aloni, commit f8567a3845ac ("aio: fix aio request
+leak when events are reaped by userspace") introduces a regression when
+user code attempts to perform io_submit() with more events than are
+available in the ring buffer. Reverting that commit would reintroduce a
+regression when user space event reaping is used.
+
+Fixing this bug is a bit more involved than the previous attempts to fix
+this regression. Since we do not have a single point at which we can
+count events as being reaped by user space and io_getevents(), we have
+to track event completion by looking at the number of events left in the
+event ring. So long as there are as many events in the ring buffer as
+there have been completion events generate, we cannot call
+put_reqs_available(). The code to check for this is now placed in
+refill_reqs_available().
+
+A test program from Dan and modified by me for verifying this bug is available
+at http://www.kvack.org/~bcrl/20140824-aio_bug.c .
+
+Reported-by: Dan Aloni <dan@kernelim.com>
+Signed-off-by: Benjamin LaHaise <bcrl@kvack.org>
+Acked-by: Dan Aloni <dan@kernelim.com>
+Cc: Kent Overstreet <kmo@daterainc.com>
+Cc: Mateusz Guzik <mguzik@redhat.com>
+Cc: Petr Matousek <pmatouse@redhat.com>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/aio.c | 77 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----
+ 1 file changed, 73 insertions(+), 4 deletions(-)
+
+--- a/fs/aio.c
++++ b/fs/aio.c
+@@ -141,6 +141,7 @@ struct kioctx {
+
+ struct {
+ unsigned tail;
++ unsigned completed_events;
+ spinlock_t completion_lock;
+ } ____cacheline_aligned_in_smp;
+
+@@ -880,6 +881,68 @@ out:
+ return ret;
+ }
+
++/* refill_reqs_available
++ * Updates the reqs_available reference counts used for tracking the
++ * number of free slots in the completion ring. This can be called
++ * from aio_complete() (to optimistically update reqs_available) or
++ * from aio_get_req() (the we're out of events case). It must be
++ * called holding ctx->completion_lock.
++ */
++static void refill_reqs_available(struct kioctx *ctx, unsigned head,
++ unsigned tail)
++{
++ unsigned events_in_ring, completed;
++
++ /* Clamp head since userland can write to it. */
++ head %= ctx->nr_events;
++ if (head <= tail)
++ events_in_ring = tail - head;
++ else
++ events_in_ring = ctx->nr_events - (head - tail);
++
++ completed = ctx->completed_events;
++ if (events_in_ring < completed)
++ completed -= events_in_ring;
++ else
++ completed = 0;
++
++ if (!completed)
++ return;
++
++ ctx->completed_events -= completed;
++ put_reqs_available(ctx, completed);
++}
++
++/* user_refill_reqs_available
++ * Called to refill reqs_available when aio_get_req() encounters an
++ * out of space in the completion ring.
++ */
++static void user_refill_reqs_available(struct kioctx *ctx)
++{
++ spin_lock_irq(&ctx->completion_lock);
++ if (ctx->completed_events) {
++ struct aio_ring *ring;
++ unsigned head;
++
++ /* Access of ring->head may race with aio_read_events_ring()
++ * here, but that's okay since whether we read the old version
++ * or the new version, and either will be valid. The important
++ * part is that head cannot pass tail since we prevent
++ * aio_complete() from updating tail by holding
++ * ctx->completion_lock. Even if head is invalid, the check
++ * against ctx->completed_events below will make sure we do the
++ * safe/right thing.
++ */
++ ring = kmap_atomic(ctx->ring_pages[0]);
++ head = ring->head;
++ kunmap_atomic(ring);
++
++ refill_reqs_available(ctx, head, ctx->tail);
++ }
++
++ spin_unlock_irq(&ctx->completion_lock);
++}
++
+ /* aio_get_req
+ * Allocate a slot for an aio request.
+ * Returns NULL if no requests are free.
+@@ -888,8 +951,11 @@ static inline struct kiocb *aio_get_req(
+ {
+ struct kiocb *req;
+
+- if (!get_reqs_available(ctx))
+- return NULL;
++ if (!get_reqs_available(ctx)) {
++ user_refill_reqs_available(ctx);
++ if (!get_reqs_available(ctx))
++ return NULL;
++ }
+
+ req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL|__GFP_ZERO);
+ if (unlikely(!req))
+@@ -948,8 +1014,8 @@ void aio_complete(struct kiocb *iocb, lo
+ struct kioctx *ctx = iocb->ki_ctx;
+ struct aio_ring *ring;
+ struct io_event *ev_page, *event;
++ unsigned tail, pos, head;
+ unsigned long flags;
+- unsigned tail, pos;
+
+ /*
+ * Special case handling for sync iocbs:
+@@ -1010,10 +1076,14 @@ void aio_complete(struct kiocb *iocb, lo
+ ctx->tail = tail;
+
+ ring = kmap_atomic(ctx->ring_pages[0]);
++ head = ring->head;
+ ring->tail = tail;
+ kunmap_atomic(ring);
+ flush_dcache_page(ctx->ring_pages[0]);
+
++ ctx->completed_events++;
++ if (ctx->completed_events > 1)
++ refill_reqs_available(ctx, head, tail);
+ spin_unlock_irqrestore(&ctx->completion_lock, flags);
+
+ pr_debug("added to ring %p at [%u]\n", iocb, tail);
+@@ -1028,7 +1098,6 @@ void aio_complete(struct kiocb *iocb, lo
+
+ /* everything turned out well, dispose of the aiocb. */
+ kiocb_free(iocb);
+- put_reqs_available(ctx, 1);
+
+ /*
+ * We have to order our ring_info tail store above and test
--- /dev/null
+From 05e0127f9e362b36aa35f17b1a3d52bca9322a3a Mon Sep 17 00:00:00 2001
+From: Christoffer Dall <christoffer.dall@linaro.org>
+Date: Tue, 26 Aug 2014 14:33:02 +0200
+Subject: arm/arm64: KVM: Complete WFI/WFE instructions
+
+From: Christoffer Dall <christoffer.dall@linaro.org>
+
+commit 05e0127f9e362b36aa35f17b1a3d52bca9322a3a upstream.
+
+The architecture specifies that when the processor wakes up from a WFE
+or WFI instruction, the instruction is considered complete, however we
+currrently return to EL1 (or EL0) at the WFI/WFE instruction itself.
+
+While most guests may not be affected by this because their local
+exception handler performs an exception returning setting the event bit
+or with an interrupt pending, some guests like UEFI will get wedged due
+this little mishap.
+
+Simply skip the instruction when we have completed the emulation.
+
+Acked-by: Marc Zyngier <marc.zyngier@arm.com>
+Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
+Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm/kvm/handle_exit.c | 2 ++
+ arch/arm64/kvm/handle_exit.c | 2 ++
+ 2 files changed, 4 insertions(+)
+
+--- a/arch/arm/kvm/handle_exit.c
++++ b/arch/arm/kvm/handle_exit.c
+@@ -89,6 +89,8 @@ static int kvm_handle_wfx(struct kvm_vcp
+ else
+ kvm_vcpu_block(vcpu);
+
++ kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
++
+ return 1;
+ }
+
+--- a/arch/arm64/kvm/handle_exit.c
++++ b/arch/arm64/kvm/handle_exit.c
+@@ -62,6 +62,8 @@ static int kvm_handle_wfx(struct kvm_vcp
+ else
+ kvm_vcpu_block(vcpu);
+
++ kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
++
+ return 1;
+ }
+
--- /dev/null
+From f6edbbf36da3a27b298b66c7955fc84e1dcca305 Mon Sep 17 00:00:00 2001
+From: Pranavkumar Sawargaonkar <pranavkumar@linaro.org>
+Date: Thu, 31 Jul 2014 12:23:23 +0530
+Subject: ARM/ARM64: KVM: Nuke Hyp-mode tlbs before enabling MMU
+
+From: Pranavkumar Sawargaonkar <pranavkumar@linaro.org>
+
+commit f6edbbf36da3a27b298b66c7955fc84e1dcca305 upstream.
+
+X-Gene u-boot runs in EL2 mode with MMU enabled hence we might
+have stale EL2 tlb enteris when we enable EL2 MMU on each host CPU.
+
+This can happen on any ARM/ARM64 board running bootloader in
+Hyp-mode (or EL2-mode) with MMU enabled.
+
+This patch ensures that we flush all Hyp-mode (or EL2-mode) TLBs
+on each host CPU before enabling Hyp-mode (or EL2-mode) MMU.
+
+Tested-by: Mark Rutland <mark.rutland@arm.com>
+Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
+Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org>
+Signed-off-by: Anup Patel <anup.patel@linaro.org>
+Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm/kvm/init.S | 4 ++++
+ arch/arm64/kvm/hyp-init.S | 4 ++++
+ 2 files changed, 8 insertions(+)
+
+--- a/arch/arm/kvm/init.S
++++ b/arch/arm/kvm/init.S
+@@ -98,6 +98,10 @@ __do_hyp_init:
+ mrc p15, 0, r0, c10, c2, 1
+ mcr p15, 4, r0, c10, c2, 1
+
++ @ Invalidate the stale TLBs from Bootloader
++ mcr p15, 4, r0, c8, c7, 0 @ TLBIALLH
++ dsb ish
++
+ @ Set the HSCTLR to:
+ @ - ARM/THUMB exceptions: Kernel config (Thumb-2 kernel)
+ @ - Endianness: Kernel config
+--- a/arch/arm64/kvm/hyp-init.S
++++ b/arch/arm64/kvm/hyp-init.S
+@@ -74,6 +74,10 @@ __do_hyp_init:
+ msr mair_el2, x4
+ isb
+
++ /* Invalidate the stale TLBs from Bootloader */
++ tlbi alle2
++ dsb sy
++
+ mrs x4, sctlr_el2
+ and x4, x4, #SCTLR_EL2_EE // preserve endianness of EL2
+ ldr x5, =SCTLR_EL2_FLAGS
--- /dev/null
+From eb35bdd7bca29a13c8ecd44e6fd747a84ce675db Mon Sep 17 00:00:00 2001
+From: Will Deacon <will.deacon@arm.com>
+Date: Thu, 11 Sep 2014 14:38:16 +0100
+Subject: arm64: flush TLS registers during exec
+
+From: Will Deacon <will.deacon@arm.com>
+
+commit eb35bdd7bca29a13c8ecd44e6fd747a84ce675db upstream.
+
+Nathan reports that we leak TLS information from the parent context
+during an exec, as we don't clear the TLS registers when flushing the
+thread state.
+
+This patch updates the flushing code so that we:
+
+ (1) Unconditionally zero the tpidr_el0 register (since this is fully
+ context switched for native tasks and zeroed for compat tasks)
+
+ (2) Zero the tp_value state in thread_info before clearing the
+ tpidrr0_el0 register for compat tasks (since this is only writable
+ by the set_tls compat syscall and therefore not fully switched).
+
+A missing compiler barrier is also added to the compat set_tls syscall.
+
+Acked-by: Nathan Lynch <Nathan_Lynch@mentor.com>
+Reported-by: Nathan Lynch <Nathan_Lynch@mentor.com>
+Signed-off-by: Will Deacon <will.deacon@arm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm64/kernel/process.c | 18 ++++++++++++++++++
+ arch/arm64/kernel/sys_compat.c | 6 ++++++
+ 2 files changed, 24 insertions(+)
+
+--- a/arch/arm64/kernel/process.c
++++ b/arch/arm64/kernel/process.c
+@@ -187,9 +187,27 @@ void exit_thread(void)
+ {
+ }
+
++static void tls_thread_flush(void)
++{
++ asm ("msr tpidr_el0, xzr");
++
++ if (is_compat_task()) {
++ current->thread.tp_value = 0;
++
++ /*
++ * We need to ensure ordering between the shadow state and the
++ * hardware state, so that we don't corrupt the hardware state
++ * with a stale shadow state during context switch.
++ */
++ barrier();
++ asm ("msr tpidrro_el0, xzr");
++ }
++}
++
+ void flush_thread(void)
+ {
+ fpsimd_flush_thread();
++ tls_thread_flush();
+ flush_ptrace_hw_breakpoint(current);
+ }
+
+--- a/arch/arm64/kernel/sys_compat.c
++++ b/arch/arm64/kernel/sys_compat.c
+@@ -79,6 +79,12 @@ long compat_arm_syscall(struct pt_regs *
+
+ case __ARM_NR_compat_set_tls:
+ current->thread.tp_value = regs->regs[0];
++
++ /*
++ * Protect against register corruption from context switch.
++ * See comment in tls_thread_flush.
++ */
++ barrier();
+ asm ("msr tpidrro_el0, %0" : : "r" (regs->regs[0]));
+ return 0;
+
--- /dev/null
+From 3d8afe3099ebc602848aa7f09235cce3a9a023ce Mon Sep 17 00:00:00 2001
+From: Sudeep Holla <sudeep.holla@arm.com>
+Date: Tue, 2 Sep 2014 11:35:24 +0100
+Subject: arm64: use irq_set_affinity with force=false when migrating irqs
+
+From: Sudeep Holla <sudeep.holla@arm.com>
+
+commit 3d8afe3099ebc602848aa7f09235cce3a9a023ce upstream.
+
+The arm64 interrupt migration code on cpu offline calls
+irqchip.irq_set_affinity() with the argument force=true. Originally
+this argument had no effect because it was not used by any interrupt
+chip driver and there was no semantics defined.
+
+This changed with commit 01f8fa4f01d8 ("genirq: Allow forcing cpu
+affinity of interrupts") which made the force argument useful to route
+interrupts to not yet online cpus without checking the target cpu
+against the cpu online mask. The following commit ffde1de64012
+("irqchip: gic: Support forced affinity setting") implemented this for
+the GIC interrupt controller.
+
+As a consequence the cpu offline irq migration fails if CPU0 is
+offlined, because CPU0 is still set in the affinity mask and the
+validation against cpu online mask is skipped to the force argument
+being true. The following first_cpu(mask) selection always selects
+CPU0 as the target.
+
+Commit 601c942176d8("arm64: use cpu_online_mask when using forced
+irq_set_affinity") intended to fix the above mentioned issue but
+introduced another issue where affinity can be migrated to a wrong
+CPU due to unconditional copy of cpu_online_mask.
+
+As with for arm, solve the issue by calling irq_set_affinity() with
+force=false from the CPU offline irq migration code so the GIC driver
+validates the affinity mask against CPU online mask and therefore
+removes CPU0 from the possible target candidates. Also revert the
+changes done in the commit 601c942176d8 as it's no longer needed.
+
+Tested on Juno platform.
+
+Fixes: 601c942176d8("arm64: use cpu_online_mask when using forced
+ irq_set_affinity")
+Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
+Acked-by: Mark Rutland <mark.rutland@arm.com>
+Cc: Catalin Marinas <catalin.marinas@arm.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Signed-off-by: Will Deacon <will.deacon@arm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm64/kernel/irq.c | 12 ++++--------
+ 1 file changed, 4 insertions(+), 8 deletions(-)
+
+--- a/arch/arm64/kernel/irq.c
++++ b/arch/arm64/kernel/irq.c
+@@ -97,19 +97,15 @@ static bool migrate_one_irq(struct irq_d
+ if (irqd_is_per_cpu(d) || !cpumask_test_cpu(smp_processor_id(), affinity))
+ return false;
+
+- if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids)
++ if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
++ affinity = cpu_online_mask;
+ ret = true;
++ }
+
+- /*
+- * when using forced irq_set_affinity we must ensure that the cpu
+- * being offlined is not present in the affinity mask, it may be
+- * selected as the target CPU otherwise
+- */
+- affinity = cpu_online_mask;
+ c = irq_data_get_irq_chip(d);
+ if (!c->irq_set_affinity)
+ pr_debug("IRQ%u: unable to set affinity\n", d->irq);
+- else if (c->irq_set_affinity(d, affinity, true) == IRQ_SET_MASK_OK && ret)
++ else if (c->irq_set_affinity(d, affinity, false) == IRQ_SET_MASK_OK && ret)
+ cpumask_copy(d->affinity, affinity);
+
+ return ret;
--- /dev/null
+From cbd5228199d8be45d895d9d0cc2b8ce53835fc21 Mon Sep 17 00:00:00 2001
+From: Anton Blanchard <anton@samba.org>
+Date: Fri, 22 Aug 2014 11:36:52 +1000
+Subject: ibmveth: Fix endian issues with rx_no_buffer statistic
+
+From: Anton Blanchard <anton@samba.org>
+
+commit cbd5228199d8be45d895d9d0cc2b8ce53835fc21 upstream.
+
+Hidden away in the last 8 bytes of the buffer_list page is a solitary
+statistic. It needs to be byte swapped or else ethtool -S will
+produce numbers that terrify the user.
+
+Since we do this in multiple places, create a helper function with a
+comment explaining what is going on.
+
+Signed-off-by: Anton Blanchard <anton@samba.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/net/ethernet/ibm/ibmveth.c | 18 ++++++++++++++----
+ 1 file changed, 14 insertions(+), 4 deletions(-)
+
+--- a/drivers/net/ethernet/ibm/ibmveth.c
++++ b/drivers/net/ethernet/ibm/ibmveth.c
+@@ -292,6 +292,18 @@ failure:
+ atomic_add(buffers_added, &(pool->available));
+ }
+
++/*
++ * The final 8 bytes of the buffer list is a counter of frames dropped
++ * because there was not a buffer in the buffer list capable of holding
++ * the frame.
++ */
++static void ibmveth_update_rx_no_buffer(struct ibmveth_adapter *adapter)
++{
++ __be64 *p = adapter->buffer_list_addr + 4096 - 8;
++
++ adapter->rx_no_buffer = be64_to_cpup(p);
++}
++
+ /* replenish routine */
+ static void ibmveth_replenish_task(struct ibmveth_adapter *adapter)
+ {
+@@ -307,8 +319,7 @@ static void ibmveth_replenish_task(struc
+ ibmveth_replenish_buffer_pool(adapter, pool);
+ }
+
+- adapter->rx_no_buffer = *(u64 *)(((char*)adapter->buffer_list_addr) +
+- 4096 - 8);
++ ibmveth_update_rx_no_buffer(adapter);
+ }
+
+ /* empty and free ana buffer pool - also used to do cleanup in error paths */
+@@ -698,8 +709,7 @@ static int ibmveth_close(struct net_devi
+
+ free_irq(netdev->irq, netdev);
+
+- adapter->rx_no_buffer = *(u64 *)(((char *)adapter->buffer_list_addr) +
+- 4096 - 8);
++ ibmveth_update_rx_no_buffer(adapter);
+
+ ibmveth_cleanup(adapter);
+
pata_scc-propagate-return-value-of-scc_wait_after_reset.patch
ahci-add-device-ids-for-intel-9-series-pch.patch
ahci-add-pcid-for-marvel-0x9182-controller.patch
+ibmveth-fix-endian-issues-with-rx_no_buffer-statistic.patch
+aio-fix-reqs_available-handling.patch
+aio-add-missing-smp_rmb-in-read_events_ring.patch
+arm64-flush-tls-registers-during-exec.patch
+arm64-use-irq_set_affinity-with-force-false-when-migrating-irqs.patch
+arm-arm64-kvm-complete-wfi-wfe-instructions.patch
+arm-arm64-kvm-nuke-hyp-mode-tlbs-before-enabling-mmu.patch