--- /dev/null
+From ae705930fca6322600690df9dc1c7d0516145a93 Mon Sep 17 00:00:00 2001
+From: Christoffer Dall <christoffer.dall@linaro.org>
+Date: Fri, 13 Mar 2015 17:02:56 +0000
+Subject: arm/arm64: KVM: Keep elrsr/aisr in sync with software model
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Christoffer Dall <christoffer.dall@linaro.org>
+
+commit ae705930fca6322600690df9dc1c7d0516145a93 upstream.
+
+There is an interesting bug in the vgic code, which manifests itself
+when the KVM run loop has a signal pending or needs a vmid generation
+rollover after having disabled interrupts but before actually switching
+to the guest.
+
+In this case, we flush the vgic as usual, but we sync back the vgic
+state and exit to userspace before entering the guest. The consequence
+is that we will be syncing the list registers back to the software model
+using the GICH_ELRSR and GICH_EISR from the last execution of the guest,
+potentially overwriting a list register containing an interrupt.
+
+This showed up during migration testing where we would capture a state
+where the VM has masked the arch timer but there were no interrupts,
+resulting in a hung test.
+
+Cc: Marc Zyngier <marc.zyngier@arm.com>
+Reported-by: Alex Bennee <alex.bennee@linaro.org>
+Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
+Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
+Acked-by: Marc Zyngier <marc.zyngier@arm.com>
+Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
+Signed-off-by: Shannon Zhao <shannon.zhao@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/kvm/arm_vgic.h | 1 +
+ virt/kvm/arm/vgic-v2.c | 8 ++++++++
+ virt/kvm/arm/vgic-v3.c | 8 ++++++++
+ virt/kvm/arm/vgic.c | 16 ++++++++++++++++
+ 4 files changed, 33 insertions(+)
+
+--- a/include/kvm/arm_vgic.h
++++ b/include/kvm/arm_vgic.h
+@@ -113,6 +113,7 @@ struct vgic_ops {
+ void (*sync_lr_elrsr)(struct kvm_vcpu *, int, struct vgic_lr);
+ u64 (*get_elrsr)(const struct kvm_vcpu *vcpu);
+ u64 (*get_eisr)(const struct kvm_vcpu *vcpu);
++ void (*clear_eisr)(struct kvm_vcpu *vcpu);
+ u32 (*get_interrupt_status)(const struct kvm_vcpu *vcpu);
+ void (*enable_underflow)(struct kvm_vcpu *vcpu);
+ void (*disable_underflow)(struct kvm_vcpu *vcpu);
+--- a/virt/kvm/arm/vgic-v2.c
++++ b/virt/kvm/arm/vgic-v2.c
+@@ -72,6 +72,8 @@ static void vgic_v2_sync_lr_elrsr(struct
+ {
+ if (!(lr_desc.state & LR_STATE_MASK))
+ vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr |= (1ULL << lr);
++ else
++ vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr &= ~(1ULL << lr);
+ }
+
+ static u64 vgic_v2_get_elrsr(const struct kvm_vcpu *vcpu)
+@@ -84,6 +86,11 @@ static u64 vgic_v2_get_eisr(const struct
+ return vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr;
+ }
+
++static void vgic_v2_clear_eisr(struct kvm_vcpu *vcpu)
++{
++ vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr = 0;
++}
++
+ static u32 vgic_v2_get_interrupt_status(const struct kvm_vcpu *vcpu)
+ {
+ u32 misr = vcpu->arch.vgic_cpu.vgic_v2.vgic_misr;
+@@ -148,6 +155,7 @@ static const struct vgic_ops vgic_v2_ops
+ .sync_lr_elrsr = vgic_v2_sync_lr_elrsr,
+ .get_elrsr = vgic_v2_get_elrsr,
+ .get_eisr = vgic_v2_get_eisr,
++ .clear_eisr = vgic_v2_clear_eisr,
+ .get_interrupt_status = vgic_v2_get_interrupt_status,
+ .enable_underflow = vgic_v2_enable_underflow,
+ .disable_underflow = vgic_v2_disable_underflow,
+--- a/virt/kvm/arm/vgic-v3.c
++++ b/virt/kvm/arm/vgic-v3.c
+@@ -86,6 +86,8 @@ static void vgic_v3_sync_lr_elrsr(struct
+ {
+ if (!(lr_desc.state & LR_STATE_MASK))
+ vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr |= (1U << lr);
++ else
++ vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr &= ~(1U << lr);
+ }
+
+ static u64 vgic_v3_get_elrsr(const struct kvm_vcpu *vcpu)
+@@ -98,6 +100,11 @@ static u64 vgic_v3_get_eisr(const struct
+ return vcpu->arch.vgic_cpu.vgic_v3.vgic_eisr;
+ }
+
++static void vgic_v3_clear_eisr(struct kvm_vcpu *vcpu)
++{
++ vcpu->arch.vgic_cpu.vgic_v3.vgic_eisr = 0;
++}
++
+ static u32 vgic_v3_get_interrupt_status(const struct kvm_vcpu *vcpu)
+ {
+ u32 misr = vcpu->arch.vgic_cpu.vgic_v3.vgic_misr;
+@@ -162,6 +169,7 @@ static const struct vgic_ops vgic_v3_ops
+ .sync_lr_elrsr = vgic_v3_sync_lr_elrsr,
+ .get_elrsr = vgic_v3_get_elrsr,
+ .get_eisr = vgic_v3_get_eisr,
++ .clear_eisr = vgic_v3_clear_eisr,
+ .get_interrupt_status = vgic_v3_get_interrupt_status,
+ .enable_underflow = vgic_v3_enable_underflow,
+ .disable_underflow = vgic_v3_disable_underflow,
+--- a/virt/kvm/arm/vgic.c
++++ b/virt/kvm/arm/vgic.c
+@@ -1219,6 +1219,11 @@ static inline u64 vgic_get_eisr(struct k
+ return vgic_ops->get_eisr(vcpu);
+ }
+
++static inline void vgic_clear_eisr(struct kvm_vcpu *vcpu)
++{
++ vgic_ops->clear_eisr(vcpu);
++}
++
+ static inline u32 vgic_get_interrupt_status(struct kvm_vcpu *vcpu)
+ {
+ return vgic_ops->get_interrupt_status(vcpu);
+@@ -1258,6 +1263,7 @@ static void vgic_retire_lr(int lr_nr, in
+ vgic_set_lr(vcpu, lr_nr, vlr);
+ clear_bit(lr_nr, vgic_cpu->lr_used);
+ vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY;
++ vgic_sync_lr_elrsr(vcpu, lr_nr, vlr);
+ }
+
+ /*
+@@ -1313,6 +1319,7 @@ static bool vgic_queue_irq(struct kvm_vc
+ BUG_ON(!test_bit(lr, vgic_cpu->lr_used));
+ vlr.state |= LR_STATE_PENDING;
+ vgic_set_lr(vcpu, lr, vlr);
++ vgic_sync_lr_elrsr(vcpu, lr, vlr);
+ return true;
+ }
+ }
+@@ -1334,6 +1341,7 @@ static bool vgic_queue_irq(struct kvm_vc
+ vlr.state |= LR_EOI_INT;
+
+ vgic_set_lr(vcpu, lr, vlr);
++ vgic_sync_lr_elrsr(vcpu, lr, vlr);
+
+ return true;
+ }
+@@ -1502,6 +1510,14 @@ static bool vgic_process_maintenance(str
+ if (status & INT_STATUS_UNDERFLOW)
+ vgic_disable_underflow(vcpu);
+
++ /*
++ * In the next iterations of the vcpu loop, if we sync the vgic state
++ * after flushing it, but before entering the guest (this happens for
++ * pending signals and vmid rollovers), then make sure we don't pick
++ * up any old maintenance interrupts here.
++ */
++ vgic_clear_eisr(vcpu);
++
+ return level_pending;
+ }
+
--- /dev/null
+From a050dfb21cc22ac0c666d52531040c1bc48184cc Mon Sep 17 00:00:00 2001
+From: Jan Kiszka <jan.kiszka@siemens.com>
+Date: Sat, 7 Feb 2015 22:21:20 +0100
+Subject: ARM: KVM: Fix size check in __coherent_cache_guest_page
+
+From: Jan Kiszka <jan.kiszka@siemens.com>
+
+commit a050dfb21cc22ac0c666d52531040c1bc48184cc upstream.
+
+The check is supposed to catch page-unaligned sizes, not the inverse.
+
+Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
+Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
+Signed-off-by: Shannon Zhao <shannon.zhao@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm/include/asm/kvm_mmu.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/arm/include/asm/kvm_mmu.h
++++ b/arch/arm/include/asm/kvm_mmu.h
+@@ -186,7 +186,7 @@ static inline void __coherent_cache_gues
+
+ bool need_flush = !vcpu_has_cache_enabled(vcpu) || ipa_uncached;
+
+- VM_BUG_ON(size & PAGE_MASK);
++ VM_BUG_ON(size & ~PAGE_MASK);
+
+ if (!need_flush && !icache_is_pipt())
+ goto vipt_cache;
--- /dev/null
+From 04b8dc85bf4a64517e3cf20e409eeaa503b15cc1 Mon Sep 17 00:00:00 2001
+From: Marc Zyngier <marc.zyngier@arm.com>
+Date: Tue, 10 Mar 2015 19:07:00 +0000
+Subject: arm64: KVM: Do not use pgd_index to index stage-2 pgd
+
+From: Marc Zyngier <marc.zyngier@arm.com>
+
+commit 04b8dc85bf4a64517e3cf20e409eeaa503b15cc1 upstream.
+
+The kernel's pgd_index macro is designed to index a normal, page
+sized array. KVM is a bit diffferent, as we can use concatenated
+pages to have a bigger address space (for example 40bit IPA with
+4kB pages gives us an 8kB PGD.
+
+In the above case, the use of pgd_index will always return an index
+inside the first 4kB, which makes a guest that has memory above
+0x8000000000 rather unhappy, as it spins forever in a page fault,
+whist the host happilly corrupts the lower pgd.
+
+The obvious fix is to get our own kvm_pgd_index that does the right
+thing(tm).
+
+Tested on X-Gene with a hacked kvmtool that put memory at a stupidly
+high address.
+
+Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
+Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
+Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
+Signed-off-by: Shannon Zhao <shannon.zhao@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm/include/asm/kvm_mmu.h | 3 ++-
+ arch/arm/kvm/mmu.c | 6 +++---
+ arch/arm64/include/asm/kvm_mmu.h | 2 ++
+ 3 files changed, 7 insertions(+), 4 deletions(-)
+
+--- a/arch/arm/include/asm/kvm_mmu.h
++++ b/arch/arm/include/asm/kvm_mmu.h
+@@ -128,13 +128,14 @@ static inline void kvm_set_s2pmd_writabl
+ (__boundary - 1 < (end) - 1)? __boundary: (end); \
+ })
+
++#define kvm_pgd_index(addr) pgd_index(addr)
++
+ static inline bool kvm_page_empty(void *ptr)
+ {
+ struct page *ptr_page = virt_to_page(ptr);
+ return page_count(ptr_page) == 1;
+ }
+
+-
+ #define kvm_pte_table_empty(kvm, ptep) kvm_page_empty(ptep)
+ #define kvm_pmd_table_empty(kvm, pmdp) kvm_page_empty(pmdp)
+ #define kvm_pud_table_empty(kvm, pudp) (0)
+--- a/arch/arm/kvm/mmu.c
++++ b/arch/arm/kvm/mmu.c
+@@ -251,7 +251,7 @@ static void unmap_range(struct kvm *kvm,
+ phys_addr_t addr = start, end = start + size;
+ phys_addr_t next;
+
+- pgd = pgdp + pgd_index(addr);
++ pgd = pgdp + kvm_pgd_index(addr);
+ do {
+ next = kvm_pgd_addr_end(addr, end);
+ if (!pgd_none(*pgd))
+@@ -316,7 +316,7 @@ static void stage2_flush_memslot(struct
+ phys_addr_t next;
+ pgd_t *pgd;
+
+- pgd = kvm->arch.pgd + pgd_index(addr);
++ pgd = kvm->arch.pgd + kvm_pgd_index(addr);
+ do {
+ next = kvm_pgd_addr_end(addr, end);
+ stage2_flush_puds(kvm, pgd, addr, next);
+@@ -791,7 +791,7 @@ static pud_t *stage2_get_pud(struct kvm
+ pgd_t *pgd;
+ pud_t *pud;
+
+- pgd = kvm->arch.pgd + pgd_index(addr);
++ pgd = kvm->arch.pgd + kvm_pgd_index(addr);
+ if (WARN_ON(pgd_none(*pgd))) {
+ if (!cache)
+ return NULL;
+--- a/arch/arm64/include/asm/kvm_mmu.h
++++ b/arch/arm64/include/asm/kvm_mmu.h
+@@ -137,6 +137,8 @@ static inline void kvm_set_s2pmd_writabl
+ #define PTRS_PER_S2_PGD (1 << PTRS_PER_S2_PGD_SHIFT)
+ #define S2_PGD_ORDER get_order(PTRS_PER_S2_PGD * sizeof(pgd_t))
+
++#define kvm_pgd_index(addr) (((addr) >> PGDIR_SHIFT) & (PTRS_PER_S2_PGD - 1))
++
+ /*
+ * If we are concatenating first level stage-2 page tables, we would have less
+ * than or equal to 16 pointers in the fake PGD, because that's what the
--- /dev/null
+From a987370f8e7a1677ae385042644326d9cd145a20 Mon Sep 17 00:00:00 2001
+From: Marc Zyngier <marc.zyngier@arm.com>
+Date: Tue, 10 Mar 2015 19:06:59 +0000
+Subject: arm64: KVM: Fix stage-2 PGD allocation to have per-page refcounting
+
+From: Marc Zyngier <marc.zyngier@arm.com>
+
+commit a987370f8e7a1677ae385042644326d9cd145a20 upstream.
+
+We're using __get_free_pages with to allocate the guest's stage-2
+PGD. The standard behaviour of this function is to return a set of
+pages where only the head page has a valid refcount.
+
+This behaviour gets us into trouble when we're trying to increment
+the refcount on a non-head page:
+
+page:ffff7c00cfb693c0 count:0 mapcount:0 mapping: (null) index:0x0
+flags: 0x4000000000000000()
+page dumped because: VM_BUG_ON_PAGE((*({ __attribute__((unused)) typeof((&page->_count)->counter) __var = ( typeof((&page->_count)->counter)) 0; (volatile typeof((&page->_count)->counter) *)&((&page->_count)->counter); })) <= 0)
+BUG: failure at include/linux/mm.h:548/get_page()!
+Kernel panic - not syncing: BUG!
+CPU: 1 PID: 1695 Comm: kvm-vcpu-0 Not tainted 4.0.0-rc1+ #3825
+Hardware name: APM X-Gene Mustang board (DT)
+Call trace:
+[<ffff80000008a09c>] dump_backtrace+0x0/0x13c
+[<ffff80000008a1e8>] show_stack+0x10/0x1c
+[<ffff800000691da8>] dump_stack+0x74/0x94
+[<ffff800000690d78>] panic+0x100/0x240
+[<ffff8000000a0bc4>] stage2_get_pmd+0x17c/0x2bc
+[<ffff8000000a1dc4>] kvm_handle_guest_abort+0x4b4/0x6b0
+[<ffff8000000a420c>] handle_exit+0x58/0x180
+[<ffff80000009e7a4>] kvm_arch_vcpu_ioctl_run+0x114/0x45c
+[<ffff800000099df4>] kvm_vcpu_ioctl+0x2e0/0x754
+[<ffff8000001c0a18>] do_vfs_ioctl+0x424/0x5c8
+[<ffff8000001c0bfc>] SyS_ioctl+0x40/0x78
+CPU0: stopping
+
+A possible approach for this is to split the compound page using
+split_page() at allocation time, and change the teardown path to
+free one page at a time. It turns out that alloc_pages_exact() and
+free_pages_exact() does exactly that.
+
+While we're at it, the PGD allocation code is reworked to reduce
+duplication.
+
+This has been tested on an X-Gene platform with a 4kB/48bit-VA host
+kernel, and kvmtool hacked to place memory in the second page of
+the hardware PGD (PUD for the host kernel). Also regression-tested
+on a Cubietruck (Cortex-A7).
+
+ [ Reworked to use alloc_pages_exact() and free_pages_exact() and to
+ return pointers directly instead of by reference as arguments
+ - Christoffer ]
+
+Reported-by: Mark Rutland <mark.rutland@arm.com>
+Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
+Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
+Signed-off-by: Shannon Zhao <shannon.zhao@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm/include/asm/kvm_mmu.h | 10 ++---
+ arch/arm/kvm/mmu.c | 67 ++++++++++++++++++++++++++++-----------
+ arch/arm64/include/asm/kvm_mmu.h | 46 ++------------------------
+ 3 files changed, 57 insertions(+), 66 deletions(-)
+
+--- a/arch/arm/include/asm/kvm_mmu.h
++++ b/arch/arm/include/asm/kvm_mmu.h
+@@ -141,16 +141,14 @@ static inline bool kvm_page_empty(void *
+
+ #define KVM_PREALLOC_LEVEL 0
+
+-static inline int kvm_prealloc_hwpgd(struct kvm *kvm, pgd_t *pgd)
++static inline void *kvm_get_hwpgd(struct kvm *kvm)
+ {
+- return 0;
++ return kvm->arch.pgd;
+ }
+
+-static inline void kvm_free_hwpgd(struct kvm *kvm) { }
+-
+-static inline void *kvm_get_hwpgd(struct kvm *kvm)
++static inline unsigned int kvm_get_hwpgd_size(void)
+ {
+- return kvm->arch.pgd;
++ return PTRS_PER_S2_PGD * sizeof(pgd_t);
+ }
+
+ struct kvm;
+--- a/arch/arm/kvm/mmu.c
++++ b/arch/arm/kvm/mmu.c
+@@ -593,6 +593,20 @@ int create_hyp_io_mappings(void *from, v
+ __phys_to_pfn(phys_addr), PAGE_HYP_DEVICE);
+ }
+
++/* Free the HW pgd, one page at a time */
++static void kvm_free_hwpgd(void *hwpgd)
++{
++ free_pages_exact(hwpgd, kvm_get_hwpgd_size());
++}
++
++/* Allocate the HW PGD, making sure that each page gets its own refcount */
++static void *kvm_alloc_hwpgd(void)
++{
++ unsigned int size = kvm_get_hwpgd_size();
++
++ return alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO);
++}
++
+ /**
+ * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation.
+ * @kvm: The KVM struct pointer for the VM.
+@@ -606,15 +620,31 @@ int create_hyp_io_mappings(void *from, v
+ */
+ int kvm_alloc_stage2_pgd(struct kvm *kvm)
+ {
+- int ret;
+ pgd_t *pgd;
++ void *hwpgd;
+
+ if (kvm->arch.pgd != NULL) {
+ kvm_err("kvm_arch already initialized?\n");
+ return -EINVAL;
+ }
+
++ hwpgd = kvm_alloc_hwpgd();
++ if (!hwpgd)
++ return -ENOMEM;
++
++ /* When the kernel uses more levels of page tables than the
++ * guest, we allocate a fake PGD and pre-populate it to point
++ * to the next-level page table, which will be the real
++ * initial page table pointed to by the VTTBR.
++ *
++ * When KVM_PREALLOC_LEVEL==2, we allocate a single page for
++ * the PMD and the kernel will use folded pud.
++ * When KVM_PREALLOC_LEVEL==1, we allocate 2 consecutive PUD
++ * pages.
++ */
+ if (KVM_PREALLOC_LEVEL > 0) {
++ int i;
++
+ /*
+ * Allocate fake pgd for the page table manipulation macros to
+ * work. This is not used by the hardware and we have no
+@@ -622,30 +652,32 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm
+ */
+ pgd = (pgd_t *)kmalloc(PTRS_PER_S2_PGD * sizeof(pgd_t),
+ GFP_KERNEL | __GFP_ZERO);
++
++ if (!pgd) {
++ kvm_free_hwpgd(hwpgd);
++ return -ENOMEM;
++ }
++
++ /* Plug the HW PGD into the fake one. */
++ for (i = 0; i < PTRS_PER_S2_PGD; i++) {
++ if (KVM_PREALLOC_LEVEL == 1)
++ pgd_populate(NULL, pgd + i,
++ (pud_t *)hwpgd + i * PTRS_PER_PUD);
++ else if (KVM_PREALLOC_LEVEL == 2)
++ pud_populate(NULL, pud_offset(pgd, 0) + i,
++ (pmd_t *)hwpgd + i * PTRS_PER_PMD);
++ }
+ } else {
+ /*
+ * Allocate actual first-level Stage-2 page table used by the
+ * hardware for Stage-2 page table walks.
+ */
+- pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, S2_PGD_ORDER);
++ pgd = (pgd_t *)hwpgd;
+ }
+
+- if (!pgd)
+- return -ENOMEM;
+-
+- ret = kvm_prealloc_hwpgd(kvm, pgd);
+- if (ret)
+- goto out_err;
+-
+ kvm_clean_pgd(pgd);
+ kvm->arch.pgd = pgd;
+ return 0;
+-out_err:
+- if (KVM_PREALLOC_LEVEL > 0)
+- kfree(pgd);
+- else
+- free_pages((unsigned long)pgd, S2_PGD_ORDER);
+- return ret;
+ }
+
+ /**
+@@ -746,11 +778,10 @@ void kvm_free_stage2_pgd(struct kvm *kvm
+ return;
+
+ unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
+- kvm_free_hwpgd(kvm);
++ kvm_free_hwpgd(kvm_get_hwpgd(kvm));
+ if (KVM_PREALLOC_LEVEL > 0)
+ kfree(kvm->arch.pgd);
+- else
+- free_pages((unsigned long)kvm->arch.pgd, S2_PGD_ORDER);
++
+ kvm->arch.pgd = NULL;
+ }
+
+--- a/arch/arm64/include/asm/kvm_mmu.h
++++ b/arch/arm64/include/asm/kvm_mmu.h
+@@ -150,43 +150,6 @@ static inline void kvm_set_s2pmd_writabl
+ #define KVM_PREALLOC_LEVEL (0)
+ #endif
+
+-/**
+- * kvm_prealloc_hwpgd - allocate inital table for VTTBR
+- * @kvm: The KVM struct pointer for the VM.
+- * @pgd: The kernel pseudo pgd
+- *
+- * When the kernel uses more levels of page tables than the guest, we allocate
+- * a fake PGD and pre-populate it to point to the next-level page table, which
+- * will be the real initial page table pointed to by the VTTBR.
+- *
+- * When KVM_PREALLOC_LEVEL==2, we allocate a single page for the PMD and
+- * the kernel will use folded pud. When KVM_PREALLOC_LEVEL==1, we
+- * allocate 2 consecutive PUD pages.
+- */
+-static inline int kvm_prealloc_hwpgd(struct kvm *kvm, pgd_t *pgd)
+-{
+- unsigned int i;
+- unsigned long hwpgd;
+-
+- if (KVM_PREALLOC_LEVEL == 0)
+- return 0;
+-
+- hwpgd = __get_free_pages(GFP_KERNEL | __GFP_ZERO, PTRS_PER_S2_PGD_SHIFT);
+- if (!hwpgd)
+- return -ENOMEM;
+-
+- for (i = 0; i < PTRS_PER_S2_PGD; i++) {
+- if (KVM_PREALLOC_LEVEL == 1)
+- pgd_populate(NULL, pgd + i,
+- (pud_t *)hwpgd + i * PTRS_PER_PUD);
+- else if (KVM_PREALLOC_LEVEL == 2)
+- pud_populate(NULL, pud_offset(pgd, 0) + i,
+- (pmd_t *)hwpgd + i * PTRS_PER_PMD);
+- }
+-
+- return 0;
+-}
+-
+ static inline void *kvm_get_hwpgd(struct kvm *kvm)
+ {
+ pgd_t *pgd = kvm->arch.pgd;
+@@ -203,12 +166,11 @@ static inline void *kvm_get_hwpgd(struct
+ return pmd_offset(pud, 0);
+ }
+
+-static inline void kvm_free_hwpgd(struct kvm *kvm)
++static inline unsigned int kvm_get_hwpgd_size(void)
+ {
+- if (KVM_PREALLOC_LEVEL > 0) {
+- unsigned long hwpgd = (unsigned long)kvm_get_hwpgd(kvm);
+- free_pages(hwpgd, PTRS_PER_S2_PGD_SHIFT);
+- }
++ if (KVM_PREALLOC_LEVEL > 0)
++ return PTRS_PER_S2_PGD * PAGE_SIZE;
++ return PTRS_PER_S2_PGD * sizeof(pgd_t);
+ }
+
+ static inline bool kvm_page_empty(void *ptr)
--- /dev/null
+From fd1d0ddf2ae92fb3df42ed476939861806c5d785 Mon Sep 17 00:00:00 2001
+From: Andre Przywara <andre.przywara@arm.com>
+Date: Fri, 10 Apr 2015 16:17:59 +0100
+Subject: KVM: arm/arm64: check IRQ number on userland injection
+
+From: Andre Przywara <andre.przywara@arm.com>
+
+commit fd1d0ddf2ae92fb3df42ed476939861806c5d785 upstream.
+
+When userland injects a SPI via the KVM_IRQ_LINE ioctl we currently
+only check it against a fixed limit, which historically is set
+to 127. With the new dynamic IRQ allocation the effective limit may
+actually be smaller (64).
+So when now a malicious or buggy userland injects a SPI in that
+range, we spill over on our VGIC bitmaps and bytemaps memory.
+I could trigger a host kernel NULL pointer dereference with current
+mainline by injecting some bogus IRQ number from a hacked kvmtool:
+-----------------
+....
+DEBUG: kvm_vgic_inject_irq(kvm, cpu=0, irq=114, level=1)
+DEBUG: vgic_update_irq_pending(kvm, cpu=0, irq=114, level=1)
+DEBUG: IRQ #114 still in the game, writing to bytemap now...
+Unable to handle kernel NULL pointer dereference at virtual address 00000000
+pgd = ffffffc07652e000
+[00000000] *pgd=00000000f658b003, *pud=00000000f658b003, *pmd=0000000000000000
+Internal error: Oops: 96000006 [#1] PREEMPT SMP
+Modules linked in:
+CPU: 1 PID: 1053 Comm: lkvm-msi-irqinj Not tainted 4.0.0-rc7+ #3027
+Hardware name: FVP Base (DT)
+task: ffffffc0774e9680 ti: ffffffc0765a8000 task.ti: ffffffc0765a8000
+PC is at kvm_vgic_inject_irq+0x234/0x310
+LR is at kvm_vgic_inject_irq+0x30c/0x310
+pc : [<ffffffc0000ae0a8>] lr : [<ffffffc0000ae180>] pstate: 80000145
+.....
+
+So this patch fixes this by checking the SPI number against the
+actual limit. Also we remove the former legacy hard limit of
+127 in the ioctl code.
+
+Signed-off-by: Andre Przywara <andre.przywara@arm.com>
+Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
+[maz: wrap KVM_ARM_IRQ_GIC_MAX with #ifndef __KERNEL__,
+as suggested by Christopher Covington]
+Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm/include/uapi/asm/kvm.h | 8 +++++++-
+ arch/arm/kvm/arm.c | 3 +--
+ arch/arm64/include/uapi/asm/kvm.h | 8 +++++++-
+ virt/kvm/arm/vgic.c | 3 +++
+ 4 files changed, 18 insertions(+), 4 deletions(-)
+
+--- a/arch/arm/include/uapi/asm/kvm.h
++++ b/arch/arm/include/uapi/asm/kvm.h
+@@ -193,8 +193,14 @@ struct kvm_arch_memory_slot {
+ #define KVM_ARM_IRQ_CPU_IRQ 0
+ #define KVM_ARM_IRQ_CPU_FIQ 1
+
+-/* Highest supported SPI, from VGIC_NR_IRQS */
++/*
++ * This used to hold the highest supported SPI, but it is now obsolete
++ * and only here to provide source code level compatibility with older
++ * userland. The highest SPI number can be set via KVM_DEV_ARM_VGIC_GRP_NR_IRQS.
++ */
++#ifndef __KERNEL__
+ #define KVM_ARM_IRQ_GIC_MAX 127
++#endif
+
+ /* PSCI interface */
+ #define KVM_PSCI_FN_BASE 0x95c1ba5e
+--- a/arch/arm/kvm/arm.c
++++ b/arch/arm/kvm/arm.c
+@@ -644,8 +644,7 @@ int kvm_vm_ioctl_irq_line(struct kvm *kv
+ if (!irqchip_in_kernel(kvm))
+ return -ENXIO;
+
+- if (irq_num < VGIC_NR_PRIVATE_IRQS ||
+- irq_num > KVM_ARM_IRQ_GIC_MAX)
++ if (irq_num < VGIC_NR_PRIVATE_IRQS)
+ return -EINVAL;
+
+ return kvm_vgic_inject_irq(kvm, 0, irq_num, level);
+--- a/arch/arm64/include/uapi/asm/kvm.h
++++ b/arch/arm64/include/uapi/asm/kvm.h
+@@ -179,8 +179,14 @@ struct kvm_arch_memory_slot {
+ #define KVM_ARM_IRQ_CPU_IRQ 0
+ #define KVM_ARM_IRQ_CPU_FIQ 1
+
+-/* Highest supported SPI, from VGIC_NR_IRQS */
++/*
++ * This used to hold the highest supported SPI, but it is now obsolete
++ * and only here to provide source code level compatibility with older
++ * userland. The highest SPI number can be set via KVM_DEV_ARM_VGIC_GRP_NR_IRQS.
++ */
++#ifndef __KERNEL__
+ #define KVM_ARM_IRQ_GIC_MAX 127
++#endif
+
+ /* PSCI interface */
+ #define KVM_PSCI_FN_BASE 0x95c1ba5e
+--- a/virt/kvm/arm/vgic.c
++++ b/virt/kvm/arm/vgic.c
+@@ -1706,6 +1706,9 @@ int kvm_vgic_inject_irq(struct kvm *kvm,
+ goto out;
+ }
+
++ if (irq_num >= kvm->arch.vgic.nr_irqs)
++ return -EINVAL;
++
+ vcpu_id = vgic_update_irq_pending(kvm, cpuid, irq_num, level);
+ if (vcpu_id >= 0) {
+ /* kick the specified vcpu */
--- /dev/null
+From 66b030e48af68fd4c22d343908bc057207a0a31e Mon Sep 17 00:00:00 2001
+From: Eric Auger <eric.auger@linaro.org>
+Date: Mon, 15 Dec 2014 18:43:32 +0100
+Subject: KVM: arm/arm64: vgic: vgic_init returns -ENODEV when no online vcpu
+
+From: Eric Auger <eric.auger@linaro.org>
+
+commit 66b030e48af68fd4c22d343908bc057207a0a31e upstream.
+
+To be more explicit on vgic initialization failure, -ENODEV is
+returned by vgic_init when no online vcpus can be found at init.
+
+Signed-off-by: Eric Auger <eric.auger@linaro.org>
+Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
+Signed-off-by: Shannon Zhao <shannon.zhao@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ virt/kvm/arm/vgic.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/virt/kvm/arm/vgic.c
++++ b/virt/kvm/arm/vgic.c
+@@ -1812,7 +1812,7 @@ static int vgic_init(struct kvm *kvm)
+
+ nr_cpus = dist->nr_cpus = atomic_read(&kvm->online_vcpus);
+ if (!nr_cpus) /* No vcpus? Can't be good... */
+- return -EINVAL;
++ return -ENODEV;
+
+ /*
+ * If nobody configured the number of interrupts, use the
--- /dev/null
+From 94aa033efcac47b09db22cb561e135baf37b7887 Mon Sep 17 00:00:00 2001
+From: Jens Freimann <jfrei@linux.vnet.ibm.com>
+Date: Mon, 16 Mar 2015 12:17:13 +0100
+Subject: KVM: s390: fix get_all_floating_irqs
+
+From: Jens Freimann <jfrei@linux.vnet.ibm.com>
+
+commit 94aa033efcac47b09db22cb561e135baf37b7887 upstream.
+
+This fixes a bug introduced with commit c05c4186bbe4 ("KVM: s390:
+add floating irq controller").
+
+get_all_floating_irqs() does copy_to_user() while holding
+a spin lock. Let's fix this by filling a temporary buffer
+first and copy it to userspace after giving up the lock.
+
+Reviewed-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
+Signed-off-by: Jens Freimann <jfrei@linux.vnet.ibm.com>
+Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
+Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ Documentation/virtual/kvm/devices/s390_flic.txt | 3 +
+ arch/s390/kvm/interrupt.c | 58 +++++++++++++-----------
+ 2 files changed, 35 insertions(+), 26 deletions(-)
+
+--- a/Documentation/virtual/kvm/devices/s390_flic.txt
++++ b/Documentation/virtual/kvm/devices/s390_flic.txt
+@@ -27,6 +27,9 @@ Groups:
+ Copies all floating interrupts into a buffer provided by userspace.
+ When the buffer is too small it returns -ENOMEM, which is the indication
+ for userspace to try again with a bigger buffer.
++ -ENOBUFS is returned when the allocation of a kernelspace buffer has
++ failed.
++ -EFAULT is returned when copying data to userspace failed.
+ All interrupts remain pending, i.e. are not deleted from the list of
+ currently pending interrupts.
+ attr->addr contains the userspace address of the buffer into which all
+--- a/arch/s390/kvm/interrupt.c
++++ b/arch/s390/kvm/interrupt.c
+@@ -17,6 +17,7 @@
+ #include <linux/signal.h>
+ #include <linux/slab.h>
+ #include <linux/bitmap.h>
++#include <linux/vmalloc.h>
+ #include <asm/asm-offsets.h>
+ #include <asm/uaccess.h>
+ #include "kvm-s390.h"
+@@ -1386,61 +1387,66 @@ void kvm_s390_clear_float_irqs(struct kv
+ spin_unlock(&fi->lock);
+ }
+
+-static inline int copy_irq_to_user(struct kvm_s390_interrupt_info *inti,
+- u8 *addr)
++static void inti_to_irq(struct kvm_s390_interrupt_info *inti,
++ struct kvm_s390_irq *irq)
+ {
+- struct kvm_s390_irq __user *uptr = (struct kvm_s390_irq __user *) addr;
+- struct kvm_s390_irq irq = {0};
+-
+- irq.type = inti->type;
++ irq->type = inti->type;
+ switch (inti->type) {
+ case KVM_S390_INT_PFAULT_INIT:
+ case KVM_S390_INT_PFAULT_DONE:
+ case KVM_S390_INT_VIRTIO:
+ case KVM_S390_INT_SERVICE:
+- irq.u.ext = inti->ext;
++ irq->u.ext = inti->ext;
+ break;
+ case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
+- irq.u.io = inti->io;
++ irq->u.io = inti->io;
+ break;
+ case KVM_S390_MCHK:
+- irq.u.mchk = inti->mchk;
++ irq->u.mchk = inti->mchk;
+ break;
+- default:
+- return -EINVAL;
+ }
+-
+- if (copy_to_user(uptr, &irq, sizeof(irq)))
+- return -EFAULT;
+-
+- return 0;
+ }
+
+-static int get_all_floating_irqs(struct kvm *kvm, __u8 *buf, __u64 len)
++static int get_all_floating_irqs(struct kvm *kvm, u8 __user *usrbuf, u64 len)
+ {
+ struct kvm_s390_interrupt_info *inti;
+ struct kvm_s390_float_interrupt *fi;
++ struct kvm_s390_irq *buf;
++ int max_irqs;
+ int ret = 0;
+ int n = 0;
+
++ if (len > KVM_S390_FLIC_MAX_BUFFER || len == 0)
++ return -EINVAL;
++
++ /*
++ * We are already using -ENOMEM to signal
++ * userspace it may retry with a bigger buffer,
++ * so we need to use something else for this case
++ */
++ buf = vzalloc(len);
++ if (!buf)
++ return -ENOBUFS;
++
++ max_irqs = len / sizeof(struct kvm_s390_irq);
++
+ fi = &kvm->arch.float_int;
+ spin_lock(&fi->lock);
+-
+ list_for_each_entry(inti, &fi->list, list) {
+- if (len < sizeof(struct kvm_s390_irq)) {
++ if (n == max_irqs) {
+ /* signal userspace to try again */
+ ret = -ENOMEM;
+ break;
+ }
+- ret = copy_irq_to_user(inti, buf);
+- if (ret)
+- break;
+- buf += sizeof(struct kvm_s390_irq);
+- len -= sizeof(struct kvm_s390_irq);
++ inti_to_irq(inti, &buf[n]);
+ n++;
+ }
+-
+ spin_unlock(&fi->lock);
++ if (!ret && n > 0) {
++ if (copy_to_user(usrbuf, buf, sizeof(struct kvm_s390_irq) * n))
++ ret = -EFAULT;
++ }
++ vfree(buf);
+
+ return ret < 0 ? ret : n;
+ }
+@@ -1451,7 +1457,7 @@ static int flic_get_attr(struct kvm_devi
+
+ switch (attr->group) {
+ case KVM_DEV_FLIC_GET_ALL_IRQS:
+- r = get_all_floating_irqs(dev->kvm, (u8 *) attr->addr,
++ r = get_all_floating_irqs(dev->kvm, (u8 __user *) attr->addr,
+ attr->attr);
+ break;
+ default:
--- /dev/null
+From 261520dcfcba93ca5dfe671b88ffab038cd940c8 Mon Sep 17 00:00:00 2001
+From: David Hildenbrand <dahi@linux.vnet.ibm.com>
+Date: Wed, 4 Feb 2015 15:53:42 +0100
+Subject: KVM: s390: fix handling of write errors in the tpi handler
+
+From: David Hildenbrand <dahi@linux.vnet.ibm.com>
+
+commit 261520dcfcba93ca5dfe671b88ffab038cd940c8 upstream.
+
+If the I/O interrupt could not be written to the guest provided
+area (e.g. access exception), a program exception was injected into the
+guest but "inti" wasn't freed, therefore resulting in a memory leak.
+
+In addition, the I/O interrupt wasn't reinjected. Therefore the dequeued
+interrupt is lost.
+
+This patch fixes the problem while cleaning up the function and making the
+cc and rc logic easier to handle.
+
+Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
+Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/s390/kvm/priv.c | 40 +++++++++++++++++++++++-----------------
+ 1 file changed, 23 insertions(+), 17 deletions(-)
+
+--- a/arch/s390/kvm/priv.c
++++ b/arch/s390/kvm/priv.c
+@@ -229,18 +229,19 @@ static int handle_tpi(struct kvm_vcpu *v
+ struct kvm_s390_interrupt_info *inti;
+ unsigned long len;
+ u32 tpi_data[3];
+- int cc, rc;
++ int rc;
+ u64 addr;
+
+- rc = 0;
+ addr = kvm_s390_get_base_disp_s(vcpu);
+ if (addr & 3)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+- cc = 0;
++
+ inti = kvm_s390_get_io_int(vcpu->kvm, vcpu->arch.sie_block->gcr[6], 0);
+- if (!inti)
+- goto no_interrupt;
+- cc = 1;
++ if (!inti) {
++ kvm_s390_set_psw_cc(vcpu, 0);
++ return 0;
++ }
++
+ tpi_data[0] = inti->io.subchannel_id << 16 | inti->io.subchannel_nr;
+ tpi_data[1] = inti->io.io_int_parm;
+ tpi_data[2] = inti->io.io_int_word;
+@@ -251,30 +252,35 @@ static int handle_tpi(struct kvm_vcpu *v
+ */
+ len = sizeof(tpi_data) - 4;
+ rc = write_guest(vcpu, addr, &tpi_data, len);
+- if (rc)
+- return kvm_s390_inject_prog_cond(vcpu, rc);
++ if (rc) {
++ rc = kvm_s390_inject_prog_cond(vcpu, rc);
++ goto reinject_interrupt;
++ }
+ } else {
+ /*
+ * Store the three-word I/O interruption code into
+ * the appropriate lowcore area.
+ */
+ len = sizeof(tpi_data);
+- if (write_guest_lc(vcpu, __LC_SUBCHANNEL_ID, &tpi_data, len))
++ if (write_guest_lc(vcpu, __LC_SUBCHANNEL_ID, &tpi_data, len)) {
++ /* failed writes to the low core are not recoverable */
+ rc = -EFAULT;
++ goto reinject_interrupt;
++ }
+ }
++
++ /* irq was successfully handed to the guest */
++ kfree(inti);
++ kvm_s390_set_psw_cc(vcpu, 1);
++ return 0;
++reinject_interrupt:
+ /*
+ * If we encounter a problem storing the interruption code, the
+ * instruction is suppressed from the guest's view: reinject the
+ * interrupt.
+ */
+- if (!rc)
+- kfree(inti);
+- else
+- kvm_s390_reinject_io_int(vcpu->kvm, inti);
+-no_interrupt:
+- /* Set condition code and we're done. */
+- if (!rc)
+- kvm_s390_set_psw_cc(vcpu, cc);
++ kvm_s390_reinject_io_int(vcpu->kvm, inti);
++ /* don't set the cc, a pgm irq was injected or we drop to user space */
+ return rc ? -EFAULT : 0;
+ }
+
--- /dev/null
+From 69a8d456263849152826542c7cb0a164b90e68a8 Mon Sep 17 00:00:00 2001
+From: Christian Borntraeger <borntraeger@de.ibm.com>
+Date: Wed, 17 Dec 2014 10:36:04 +0100
+Subject: KVM: s390: no need to hold the kvm->mutex for floating interrupts
+
+From: Christian Borntraeger <borntraeger@de.ibm.com>
+
+commit 69a8d456263849152826542c7cb0a164b90e68a8 upstream.
+
+The kvm mutex was (probably) used to protect against cpu hotplug.
+The current code no longer needs to protect against that, as we only
+rely on CPU data structures that are guaranteed to be available
+if we can access the CPU. (e.g. vcpu_create will put the cpu
+in the array AFTER the cpu is ready).
+
+Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
+Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
+Reviewed-by: Jens Freimann <jfrei@linux.vnet.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/s390/kvm/interrupt.c | 8 --------
+ 1 file changed, 8 deletions(-)
+
+--- a/arch/s390/kvm/interrupt.c
++++ b/arch/s390/kvm/interrupt.c
+@@ -1131,7 +1131,6 @@ struct kvm_s390_interrupt_info *kvm_s390
+
+ if ((!schid && !cr6) || (schid && cr6))
+ return NULL;
+- mutex_lock(&kvm->lock);
+ fi = &kvm->arch.float_int;
+ spin_lock(&fi->lock);
+ inti = NULL;
+@@ -1159,7 +1158,6 @@ struct kvm_s390_interrupt_info *kvm_s390
+ if (list_empty(&fi->list))
+ atomic_set(&fi->active, 0);
+ spin_unlock(&fi->lock);
+- mutex_unlock(&kvm->lock);
+ return inti;
+ }
+
+@@ -1172,7 +1170,6 @@ static int __inject_vm(struct kvm *kvm,
+ int sigcpu;
+ int rc = 0;
+
+- mutex_lock(&kvm->lock);
+ fi = &kvm->arch.float_int;
+ spin_lock(&fi->lock);
+ if (fi->irq_count >= KVM_S390_MAX_FLOAT_IRQS) {
+@@ -1225,7 +1222,6 @@ static int __inject_vm(struct kvm *kvm,
+ kvm_s390_vcpu_wakeup(kvm_get_vcpu(kvm, sigcpu));
+ unlock_fi:
+ spin_unlock(&fi->lock);
+- mutex_unlock(&kvm->lock);
+ return rc;
+ }
+
+@@ -1379,7 +1375,6 @@ void kvm_s390_clear_float_irqs(struct kv
+ struct kvm_s390_float_interrupt *fi;
+ struct kvm_s390_interrupt_info *n, *inti = NULL;
+
+- mutex_lock(&kvm->lock);
+ fi = &kvm->arch.float_int;
+ spin_lock(&fi->lock);
+ list_for_each_entry_safe(inti, n, &fi->list, list) {
+@@ -1389,7 +1384,6 @@ void kvm_s390_clear_float_irqs(struct kv
+ fi->irq_count = 0;
+ atomic_set(&fi->active, 0);
+ spin_unlock(&fi->lock);
+- mutex_unlock(&kvm->lock);
+ }
+
+ static inline int copy_irq_to_user(struct kvm_s390_interrupt_info *inti,
+@@ -1429,7 +1423,6 @@ static int get_all_floating_irqs(struct
+ int ret = 0;
+ int n = 0;
+
+- mutex_lock(&kvm->lock);
+ fi = &kvm->arch.float_int;
+ spin_lock(&fi->lock);
+
+@@ -1448,7 +1441,6 @@ static int get_all_floating_irqs(struct
+ }
+
+ spin_unlock(&fi->lock);
+- mutex_unlock(&kvm->lock);
+
+ return ret < 0 ? ret : n;
+ }
--- /dev/null
+From 15462e37ca848abac7477dece65f8af25febd744 Mon Sep 17 00:00:00 2001
+From: David Hildenbrand <dahi@linux.vnet.ibm.com>
+Date: Wed, 4 Feb 2015 15:59:11 +0100
+Subject: KVM: s390: reinjection of irqs can fail in the tpi handler
+
+From: David Hildenbrand <dahi@linux.vnet.ibm.com>
+
+commit 15462e37ca848abac7477dece65f8af25febd744 upstream.
+
+The reinjection of an I/O interrupt can fail if the list is at the limit
+and between the dequeue and the reinjection, another I/O interrupt is
+injected (e.g. if user space floods kvm with I/O interrupts).
+
+This patch avoids this memory leak and returns -EFAULT in this special
+case. This error is not recoverable, so let's fail hard. This can later
+be avoided by not dequeuing the interrupt but working directly on the
+locked list.
+
+Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
+Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/s390/kvm/interrupt.c | 4 ++--
+ arch/s390/kvm/kvm-s390.h | 4 ++--
+ arch/s390/kvm/priv.c | 5 ++++-
+ 3 files changed, 8 insertions(+), 5 deletions(-)
+
+--- a/arch/s390/kvm/interrupt.c
++++ b/arch/s390/kvm/interrupt.c
+@@ -1287,10 +1287,10 @@ int kvm_s390_inject_vm(struct kvm *kvm,
+ return rc;
+ }
+
+-void kvm_s390_reinject_io_int(struct kvm *kvm,
++int kvm_s390_reinject_io_int(struct kvm *kvm,
+ struct kvm_s390_interrupt_info *inti)
+ {
+- __inject_vm(kvm, inti);
++ return __inject_vm(kvm, inti);
+ }
+
+ int s390int_to_s390irq(struct kvm_s390_interrupt *s390int,
+--- a/arch/s390/kvm/kvm-s390.h
++++ b/arch/s390/kvm/kvm-s390.h
+@@ -146,8 +146,8 @@ int __must_check kvm_s390_inject_vcpu(st
+ int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
+ struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
+ u64 cr6, u64 schid);
+-void kvm_s390_reinject_io_int(struct kvm *kvm,
+- struct kvm_s390_interrupt_info *inti);
++int kvm_s390_reinject_io_int(struct kvm *kvm,
++ struct kvm_s390_interrupt_info *inti);
+ int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked);
+
+ /* implemented in intercept.c */
+--- a/arch/s390/kvm/priv.c
++++ b/arch/s390/kvm/priv.c
+@@ -279,7 +279,10 @@ reinject_interrupt:
+ * instruction is suppressed from the guest's view: reinject the
+ * interrupt.
+ */
+- kvm_s390_reinject_io_int(vcpu->kvm, inti);
++ if (kvm_s390_reinject_io_int(vcpu->kvm, inti)) {
++ kfree(inti);
++ rc = -EFAULT;
++ }
+ /* don't set the cc, a pgm irq was injected or we drop to user space */
+ return rc ? -EFAULT : 0;
+ }
--- /dev/null
+From b75f4c9afac2604feb971441116c07a24ecca1ec Mon Sep 17 00:00:00 2001
+From: Ekaterina Tumanova <tumanova@linux.vnet.ibm.com>
+Date: Tue, 3 Mar 2015 09:54:41 +0100
+Subject: KVM: s390: Zero out current VMDB of STSI before including level3 data.
+
+From: Ekaterina Tumanova <tumanova@linux.vnet.ibm.com>
+
+commit b75f4c9afac2604feb971441116c07a24ecca1ec upstream.
+
+s390 documentation requires words 0 and 10-15 to be reserved and stored as
+zeros. As we fill out all other fields, we can memset the full structure.
+
+Signed-off-by: Ekaterina Tumanova <tumanova@linux.vnet.ibm.com>
+Reviewed-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
+Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/s390/kvm/priv.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/s390/kvm/priv.c
++++ b/arch/s390/kvm/priv.c
+@@ -471,6 +471,7 @@ static void handle_stsi_3_2_2(struct kvm
+ for (n = mem->count - 1; n > 0 ; n--)
+ memcpy(&mem->vm[n], &mem->vm[n - 1], sizeof(mem->vm[0]));
+
++ memset(&mem->vm[0], 0, sizeof(mem->vm[0]));
+ mem->vm[0].cpus_total = cpus;
+ mem->vm[0].cpus_configured = cpus;
+ mem->vm[0].cpus_standby = 0;
--- /dev/null
+From ca3f0874723fad81d0c701b63ae3a17a408d5f25 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= <rkrcmar@redhat.com>
+Date: Wed, 8 Apr 2015 14:16:48 +0200
+Subject: KVM: use slowpath for cross page cached accesses
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= <rkrcmar@redhat.com>
+
+commit ca3f0874723fad81d0c701b63ae3a17a408d5f25 upstream.
+
+kvm_write_guest_cached() does not mark all written pages as dirty and
+code comments in kvm_gfn_to_hva_cache_init() talk about NULL memslot
+with cross page accesses. Fix all the easy way.
+
+The check is '<= 1' to have the same result for 'len = 0' cache anywhere
+in the page. (nr_pages_needed is 0 on page boundary.)
+
+Fixes: 8f964525a121 ("KVM: Allow cross page reads and writes from cached translations.")
+Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
+Message-Id: <20150408121648.GA3519@potion.brq.redhat.com>
+Reviewed-by: Wanpeng Li <wanpeng.li@linux.intel.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ virt/kvm/kvm_main.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/virt/kvm/kvm_main.c
++++ b/virt/kvm/kvm_main.c
+@@ -1608,8 +1608,8 @@ int kvm_gfn_to_hva_cache_init(struct kvm
+ ghc->generation = slots->generation;
+ ghc->len = len;
+ ghc->memslot = gfn_to_memslot(kvm, start_gfn);
+- ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn, &nr_pages_avail);
+- if (!kvm_is_error_hva(ghc->hva) && nr_pages_avail >= nr_pages_needed) {
++ ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn, NULL);
++ if (!kvm_is_error_hva(ghc->hva) && nr_pages_needed <= 1) {
+ ghc->hva += offset;
+ } else {
+ /*
--- /dev/null
+From 60cd7e08e453bc6828ac4b539f949e4acd80f143 Mon Sep 17 00:00:00 2001
+From: Markos Chandras <markos.chandras@imgtec.com>
+Date: Mon, 9 Mar 2015 14:54:49 +0000
+Subject: MIPS: asm: asm-eva: Introduce kernel load/store variants
+
+From: Markos Chandras <markos.chandras@imgtec.com>
+
+commit 60cd7e08e453bc6828ac4b539f949e4acd80f143 upstream.
+
+Introduce new macros for kernel load/store variants which will be
+used to perform regular kernel space load/store operations in EVA
+mode.
+
+Signed-off-by: Markos Chandras <markos.chandras@imgtec.com>
+Cc: linux-mips@linux-mips.org
+Patchwork: https://patchwork.linux-mips.org/patch/9500/
+Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/mips/include/asm/asm-eva.h | 137 +++++++++++++++++++++++++++-------------
+ 1 file changed, 93 insertions(+), 44 deletions(-)
+
+--- a/arch/mips/include/asm/asm-eva.h
++++ b/arch/mips/include/asm/asm-eva.h
+@@ -11,6 +11,36 @@
+ #define __ASM_ASM_EVA_H
+
+ #ifndef __ASSEMBLY__
++
++/* Kernel variants */
++
++#define kernel_cache(op, base) "cache " op ", " base "\n"
++#define kernel_ll(reg, addr) "ll " reg ", " addr "\n"
++#define kernel_sc(reg, addr) "sc " reg ", " addr "\n"
++#define kernel_lw(reg, addr) "lw " reg ", " addr "\n"
++#define kernel_lwl(reg, addr) "lwl " reg ", " addr "\n"
++#define kernel_lwr(reg, addr) "lwr " reg ", " addr "\n"
++#define kernel_lh(reg, addr) "lh " reg ", " addr "\n"
++#define kernel_lb(reg, addr) "lb " reg ", " addr "\n"
++#define kernel_lbu(reg, addr) "lbu " reg ", " addr "\n"
++#define kernel_sw(reg, addr) "sw " reg ", " addr "\n"
++#define kernel_swl(reg, addr) "swl " reg ", " addr "\n"
++#define kernel_swr(reg, addr) "swr " reg ", " addr "\n"
++#define kernel_sh(reg, addr) "sh " reg ", " addr "\n"
++#define kernel_sb(reg, addr) "sb " reg ", " addr "\n"
++
++#ifdef CONFIG_32BIT
++/*
++ * No 'sd' or 'ld' instructions in 32-bit but the code will
++ * do the correct thing
++ */
++#define kernel_sd(reg, addr) user_sw(reg, addr)
++#define kernel_ld(reg, addr) user_lw(reg, addr)
++#else
++#define kernel_sd(reg, addr) "sd " reg", " addr "\n"
++#define kernel_ld(reg, addr) "ld " reg", " addr "\n"
++#endif /* CONFIG_32BIT */
++
+ #ifdef CONFIG_EVA
+
+ #define __BUILD_EVA_INSN(insn, reg, addr) \
+@@ -41,37 +71,60 @@
+
+ #else
+
+-#define user_cache(op, base) "cache " op ", " base "\n"
+-#define user_ll(reg, addr) "ll " reg ", " addr "\n"
+-#define user_sc(reg, addr) "sc " reg ", " addr "\n"
+-#define user_lw(reg, addr) "lw " reg ", " addr "\n"
+-#define user_lwl(reg, addr) "lwl " reg ", " addr "\n"
+-#define user_lwr(reg, addr) "lwr " reg ", " addr "\n"
+-#define user_lh(reg, addr) "lh " reg ", " addr "\n"
+-#define user_lb(reg, addr) "lb " reg ", " addr "\n"
+-#define user_lbu(reg, addr) "lbu " reg ", " addr "\n"
+-#define user_sw(reg, addr) "sw " reg ", " addr "\n"
+-#define user_swl(reg, addr) "swl " reg ", " addr "\n"
+-#define user_swr(reg, addr) "swr " reg ", " addr "\n"
+-#define user_sh(reg, addr) "sh " reg ", " addr "\n"
+-#define user_sb(reg, addr) "sb " reg ", " addr "\n"
++#define user_cache(op, base) kernel_cache(op, base)
++#define user_ll(reg, addr) kernel_ll(reg, addr)
++#define user_sc(reg, addr) kernel_sc(reg, addr)
++#define user_lw(reg, addr) kernel_lw(reg, addr)
++#define user_lwl(reg, addr) kernel_lwl(reg, addr)
++#define user_lwr(reg, addr) kernel_lwr(reg, addr)
++#define user_lh(reg, addr) kernel_lh(reg, addr)
++#define user_lb(reg, addr) kernel_lb(reg, addr)
++#define user_lbu(reg, addr) kernel_lbu(reg, addr)
++#define user_sw(reg, addr) kernel_sw(reg, addr)
++#define user_swl(reg, addr) kernel_swl(reg, addr)
++#define user_swr(reg, addr) kernel_swr(reg, addr)
++#define user_sh(reg, addr) kernel_sh(reg, addr)
++#define user_sb(reg, addr) kernel_sb(reg, addr)
+
+ #ifdef CONFIG_32BIT
+-/*
+- * No 'sd' or 'ld' instructions in 32-bit but the code will
+- * do the correct thing
+- */
+-#define user_sd(reg, addr) user_sw(reg, addr)
+-#define user_ld(reg, addr) user_lw(reg, addr)
++#define user_sd(reg, addr) kernel_sw(reg, addr)
++#define user_ld(reg, addr) kernel_lw(reg, addr)
+ #else
+-#define user_sd(reg, addr) "sd " reg", " addr "\n"
+-#define user_ld(reg, addr) "ld " reg", " addr "\n"
++#define user_sd(reg, addr) kernel_sd(reg, addr)
++#define user_ld(reg, addr) kernel_ld(reg, addr)
+ #endif /* CONFIG_32BIT */
+
+ #endif /* CONFIG_EVA */
+
+ #else /* __ASSEMBLY__ */
+
++#define kernel_cache(op, base) cache op, base
++#define kernel_ll(reg, addr) ll reg, addr
++#define kernel_sc(reg, addr) sc reg, addr
++#define kernel_lw(reg, addr) lw reg, addr
++#define kernel_lwl(reg, addr) lwl reg, addr
++#define kernel_lwr(reg, addr) lwr reg, addr
++#define kernel_lh(reg, addr) lh reg, addr
++#define kernel_lb(reg, addr) lb reg, addr
++#define kernel_lbu(reg, addr) lbu reg, addr
++#define kernel_sw(reg, addr) sw reg, addr
++#define kernel_swl(reg, addr) swl reg, addr
++#define kernel_swr(reg, addr) swr reg, addr
++#define kernel_sh(reg, addr) sh reg, addr
++#define kernel_sb(reg, addr) sb reg, addr
++
++#ifdef CONFIG_32BIT
++/*
++ * No 'sd' or 'ld' instructions in 32-bit but the code will
++ * do the correct thing
++ */
++#define kernel_sd(reg, addr) user_sw(reg, addr)
++#define kernel_ld(reg, addr) user_lw(reg, addr)
++#else
++#define kernel_sd(reg, addr) sd reg, addr
++#define kernel_ld(reg, addr) ld reg, addr
++#endif /* CONFIG_32BIT */
++
+ #ifdef CONFIG_EVA
+
+ #define __BUILD_EVA_INSN(insn, reg, addr) \
+@@ -101,31 +154,27 @@
+ #define user_sd(reg, addr) user_sw(reg, addr)
+ #else
+
+-#define user_cache(op, base) cache op, base
+-#define user_ll(reg, addr) ll reg, addr
+-#define user_sc(reg, addr) sc reg, addr
+-#define user_lw(reg, addr) lw reg, addr
+-#define user_lwl(reg, addr) lwl reg, addr
+-#define user_lwr(reg, addr) lwr reg, addr
+-#define user_lh(reg, addr) lh reg, addr
+-#define user_lb(reg, addr) lb reg, addr
+-#define user_lbu(reg, addr) lbu reg, addr
+-#define user_sw(reg, addr) sw reg, addr
+-#define user_swl(reg, addr) swl reg, addr
+-#define user_swr(reg, addr) swr reg, addr
+-#define user_sh(reg, addr) sh reg, addr
+-#define user_sb(reg, addr) sb reg, addr
++#define user_cache(op, base) kernel_cache(op, base)
++#define user_ll(reg, addr) kernel_ll(reg, addr)
++#define user_sc(reg, addr) kernel_sc(reg, addr)
++#define user_lw(reg, addr) kernel_lw(reg, addr)
++#define user_lwl(reg, addr) kernel_lwl(reg, addr)
++#define user_lwr(reg, addr) kernel_lwr(reg, addr)
++#define user_lh(reg, addr) kernel_lh(reg, addr)
++#define user_lb(reg, addr) kernel_lb(reg, addr)
++#define user_lbu(reg, addr) kernel_lbu(reg, addr)
++#define user_sw(reg, addr) kernel_sw(reg, addr)
++#define user_swl(reg, addr) kernel_swl(reg, addr)
++#define user_swr(reg, addr) kernel_swr(reg, addr)
++#define user_sh(reg, addr) kernel_sh(reg, addr)
++#define user_sb(reg, addr) kernel_sb(reg, addr)
+
+ #ifdef CONFIG_32BIT
+-/*
+- * No 'sd' or 'ld' instructions in 32-bit but the code will
+- * do the correct thing
+- */
+-#define user_sd(reg, addr) user_sw(reg, addr)
+-#define user_ld(reg, addr) user_lw(reg, addr)
++#define user_sd(reg, addr) kernel_sw(reg, addr)
++#define user_ld(reg, addr) kernel_lw(reg, addr)
+ #else
+-#define user_sd(reg, addr) sd reg, addr
+-#define user_ld(reg, addr) ld reg, addr
++#define user_sd(reg, addr) kernel_sd(reg, addr)
++#define user_ld(reg, addr) kernel_sd(reg, addr)
+ #endif /* CONFIG_32BIT */
+
+ #endif /* CONFIG_EVA */
--- /dev/null
+From a843d00d038b11267279e3b5388222320f9ddc1d Mon Sep 17 00:00:00 2001
+From: Huacai Chen <chenhc@lemote.com>
+Date: Sun, 29 Mar 2015 10:54:05 +0800
+Subject: MIPS: Hibernate: flush TLB entries earlier
+
+From: Huacai Chen <chenhc@lemote.com>
+
+commit a843d00d038b11267279e3b5388222320f9ddc1d upstream.
+
+We found that TLB mismatch not only happens after kernel resume, but
+also happens during snapshot restore. So move it to the beginning of
+swsusp_arch_suspend().
+
+Signed-off-by: Huacai Chen <chenhc@lemote.com>
+Cc: Steven J. Hill <Steven.Hill@imgtec.com>
+Cc: linux-mips@linux-mips.org
+Cc: Fuxin Zhang <zhangfx@lemote.com>
+Cc: Zhangjin Wu <wuzhangjin@gmail.com>
+Patchwork: https://patchwork.linux-mips.org/patch/9621/
+Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/mips/power/hibernate.S | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/arch/mips/power/hibernate.S
++++ b/arch/mips/power/hibernate.S
+@@ -30,6 +30,8 @@ LEAF(swsusp_arch_suspend)
+ END(swsusp_arch_suspend)
+
+ LEAF(swsusp_arch_resume)
++ /* Avoid TLB mismatch during and after kernel resume */
++ jal local_flush_tlb_all
+ PTR_L t0, restore_pblist
+ 0:
+ PTR_L t1, PBE_ADDRESS(t0) /* source */
+@@ -43,7 +45,6 @@ LEAF(swsusp_arch_resume)
+ bne t1, t3, 1b
+ PTR_L t0, PBE_NEXT(t0)
+ bnez t0, 0b
+- jal local_flush_tlb_all /* Avoid TLB mismatch after kernel resume */
+ PTR_LA t0, saved_regs
+ PTR_L ra, PT_R31(t0)
+ PTR_L sp, PT_R29(t0)
--- /dev/null
+From 98119ad53376885819d93dfb8737b6a9a61ca0ba Mon Sep 17 00:00:00 2001
+From: James Hogan <james.hogan@imgtec.com>
+Date: Fri, 6 Feb 2015 11:11:56 +0000
+Subject: MIPS: KVM: Handle MSA Disabled exceptions from guest
+
+From: James Hogan <james.hogan@imgtec.com>
+
+commit 98119ad53376885819d93dfb8737b6a9a61ca0ba upstream.
+
+Guest user mode can generate a guest MSA Disabled exception on an MSA
+capable core by simply trying to execute an MSA instruction. Since this
+exception is unknown to KVM it will be passed on to the guest kernel.
+However guest Linux kernels prior to v3.15 do not set up an exception
+handler for the MSA Disabled exception as they don't support any MSA
+capable cores. This results in a guest OS panic.
+
+Since an older processor ID may be being emulated, and MSA support is
+not advertised to the guest, the correct behaviour is to generate a
+Reserved Instruction exception in the guest kernel so it can send the
+guest process an illegal instruction signal (SIGILL), as would happen
+with a non-MSA-capable core.
+
+Fix this as minimally as reasonably possible by preventing
+kvm_mips_check_privilege() from relaying MSA Disabled exceptions from
+guest user mode to the guest kernel, and handling the MSA Disabled
+exception by emulating a Reserved Instruction exception in the guest,
+via a new handle_msa_disabled() KVM callback.
+
+Signed-off-by: James Hogan <james.hogan@imgtec.com>
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Paul Burton <paul.burton@imgtec.com>
+Cc: Ralf Baechle <ralf@linux-mips.org>
+Cc: Gleb Natapov <gleb@kernel.org>
+Cc: linux-mips@linux-mips.org
+Cc: kvm@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/mips/include/asm/kvm_host.h | 2 ++
+ arch/mips/kvm/emulate.c | 1 +
+ arch/mips/kvm/mips.c | 4 ++++
+ arch/mips/kvm/trap_emul.c | 28 ++++++++++++++++++++++++++++
+ 4 files changed, 35 insertions(+)
+
+--- a/arch/mips/include/asm/kvm_host.h
++++ b/arch/mips/include/asm/kvm_host.h
+@@ -321,6 +321,7 @@ enum mips_mmu_types {
+ #define T_TRAP 13 /* Trap instruction */
+ #define T_VCEI 14 /* Virtual coherency exception */
+ #define T_FPE 15 /* Floating point exception */
++#define T_MSADIS 21 /* MSA disabled exception */
+ #define T_WATCH 23 /* Watch address reference */
+ #define T_VCED 31 /* Virtual coherency data */
+
+@@ -577,6 +578,7 @@ struct kvm_mips_callbacks {
+ int (*handle_syscall)(struct kvm_vcpu *vcpu);
+ int (*handle_res_inst)(struct kvm_vcpu *vcpu);
+ int (*handle_break)(struct kvm_vcpu *vcpu);
++ int (*handle_msa_disabled)(struct kvm_vcpu *vcpu);
+ int (*vm_init)(struct kvm *kvm);
+ int (*vcpu_init)(struct kvm_vcpu *vcpu);
+ int (*vcpu_setup)(struct kvm_vcpu *vcpu);
+--- a/arch/mips/kvm/emulate.c
++++ b/arch/mips/kvm/emulate.c
+@@ -2176,6 +2176,7 @@ enum emulation_result kvm_mips_check_pri
+ case T_SYSCALL:
+ case T_BREAK:
+ case T_RES_INST:
++ case T_MSADIS:
+ break;
+
+ case T_COP_UNUSABLE:
+--- a/arch/mips/kvm/mips.c
++++ b/arch/mips/kvm/mips.c
+@@ -1119,6 +1119,10 @@ int kvm_mips_handle_exit(struct kvm_run
+ ret = kvm_mips_callbacks->handle_break(vcpu);
+ break;
+
++ case T_MSADIS:
++ ret = kvm_mips_callbacks->handle_msa_disabled(vcpu);
++ break;
++
+ default:
+ kvm_err("Exception Code: %d, not yet handled, @ PC: %p, inst: 0x%08x BadVaddr: %#lx Status: %#lx\n",
+ exccode, opc, kvm_get_inst(opc, vcpu), badvaddr,
+--- a/arch/mips/kvm/trap_emul.c
++++ b/arch/mips/kvm/trap_emul.c
+@@ -330,6 +330,33 @@ static int kvm_trap_emul_handle_break(st
+ return ret;
+ }
+
++static int kvm_trap_emul_handle_msa_disabled(struct kvm_vcpu *vcpu)
++{
++ struct kvm_run *run = vcpu->run;
++ uint32_t __user *opc = (uint32_t __user *) vcpu->arch.pc;
++ unsigned long cause = vcpu->arch.host_cp0_cause;
++ enum emulation_result er = EMULATE_DONE;
++ int ret = RESUME_GUEST;
++
++ /* No MSA supported in guest, guest reserved instruction exception */
++ er = kvm_mips_emulate_ri_exc(cause, opc, run, vcpu);
++
++ switch (er) {
++ case EMULATE_DONE:
++ ret = RESUME_GUEST;
++ break;
++
++ case EMULATE_FAIL:
++ run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
++ ret = RESUME_HOST;
++ break;
++
++ default:
++ BUG();
++ }
++ return ret;
++}
++
+ static int kvm_trap_emul_vm_init(struct kvm *kvm)
+ {
+ return 0;
+@@ -470,6 +497,7 @@ static struct kvm_mips_callbacks kvm_tra
+ .handle_syscall = kvm_trap_emul_handle_syscall,
+ .handle_res_inst = kvm_trap_emul_handle_res_inst,
+ .handle_break = kvm_trap_emul_handle_break,
++ .handle_msa_disabled = kvm_trap_emul_handle_msa_disabled,
+
+ .vm_init = kvm_trap_emul_vm_init,
+ .vcpu_init = kvm_trap_emul_vcpu_init,
--- /dev/null
+From 0add9c2f1cff9f3f1f2eb7e9babefa872a9d14b9 Mon Sep 17 00:00:00 2001
+From: Huacai Chen <chenhc@lemote.com>
+Date: Thu, 12 Mar 2015 11:51:06 +0800
+Subject: MIPS: Loongson-3: Add IRQF_NO_SUSPEND to Cascade irqaction
+
+From: Huacai Chen <chenhc@lemote.com>
+
+commit 0add9c2f1cff9f3f1f2eb7e9babefa872a9d14b9 upstream.
+
+HPET irq is routed to i8259 and then to MIPS CPU irq (cascade). After
+commit a3e6c1eff5 (MIPS: IRQ: Fix disable_irq on CPU IRQs), if without
+IRQF_NO_SUSPEND in cascade_irqaction, HPET interrupts will lost during
+suspend. The result is machine cannot be waken up.
+
+Signed-off-by: Huacai Chen <chenhc@lemote.com>
+Cc: Steven J. Hill <Steven.Hill@imgtec.com>
+Cc: linux-mips@linux-mips.org
+Cc: Fuxin Zhang <zhangfx@lemote.com>
+Cc: Zhangjin Wu <wuzhangjin@gmail.com>
+Patchwork: https://patchwork.linux-mips.org/patch/9528/
+Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/mips/loongson/loongson-3/irq.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/mips/loongson/loongson-3/irq.c
++++ b/arch/mips/loongson/loongson-3/irq.c
+@@ -44,6 +44,7 @@ void mach_irq_dispatch(unsigned int pend
+
+ static struct irqaction cascade_irqaction = {
+ .handler = no_action,
++ .flags = IRQF_NO_SUSPEND,
+ .name = "cascade",
+ };
+
--- /dev/null
+From acaf6a97d623af123314c2f8ce4cf7254f6b2fc1 Mon Sep 17 00:00:00 2001
+From: James Hogan <james.hogan@imgtec.com>
+Date: Wed, 25 Feb 2015 13:08:05 +0000
+Subject: MIPS: lose_fpu(): Disable FPU when MSA enabled
+
+From: James Hogan <james.hogan@imgtec.com>
+
+commit acaf6a97d623af123314c2f8ce4cf7254f6b2fc1 upstream.
+
+The lose_fpu() function only disables the FPU in CP0_Status.CU1 if the
+FPU is in use and MSA isn't enabled.
+
+This isn't necessarily a problem because KSTK_STATUS(current), the
+version of CP0_Status stored on the kernel stack on entry from user
+mode, does always get updated and gets restored when returning to user
+mode, but I don't think it was intended, and it is inconsistent with the
+case of only the FPU being in use. Sometimes leaving the FPU enabled may
+also mask kernel bugs where FPU operations are executed when the FPU
+might not be enabled.
+
+So lets disable the FPU in the MSA case too.
+
+Fixes: 33c771ba5c5d ("MIPS: save/disable MSA in lose_fpu")
+Signed-off-by: James Hogan <james.hogan@imgtec.com>
+Cc: Ralf Baechle <ralf@linux-mips.org>
+Cc: Paul Burton <paul.burton@imgtec.com>
+Cc: linux-mips@linux-mips.org
+Patchwork: https://patchwork.linux-mips.org/patch/9323/
+Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/mips/include/asm/fpu.h | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/mips/include/asm/fpu.h
++++ b/arch/mips/include/asm/fpu.h
+@@ -169,6 +169,7 @@ static inline void lose_fpu(int save)
+ }
+ disable_msa();
+ clear_thread_flag(TIF_USEDMSA);
++ __disable_fpu();
+ } else if (is_fpu_owner()) {
+ if (save)
+ _save_fp(current);
--- /dev/null
+From f7f8aea4b97c4d48e42f02cb37026bee445f239f Mon Sep 17 00:00:00 2001
+From: Markos Chandras <markos.chandras@imgtec.com>
+Date: Fri, 27 Feb 2015 07:51:32 +0000
+Subject: MIPS: Malta: Detect and fix bad memsize values
+
+From: Markos Chandras <markos.chandras@imgtec.com>
+
+commit f7f8aea4b97c4d48e42f02cb37026bee445f239f upstream.
+
+memsize denotes the amount of RAM we can access from kseg{0,1} and
+that should be up to 256M. In case the bootloader reports a value
+higher than that (perhaps reporting all the available RAM) it's best
+if we fix it ourselves and just warn the user about that. This is
+usually a problem with the bootloader and/or its environment.
+
+[ralf@linux-mips.org: Remove useless parens as suggested bei Sergei.
+Reformat long pr_warn statement to fit into 80 column limit.]
+
+Signed-off-by: Markos Chandras <markos.chandras@imgtec.com>
+Cc: linux-mips@linux-mips.org
+Patchwork: https://patchwork.linux-mips.org/patch/9362/
+Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/mips/mti-malta/malta-memory.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/arch/mips/mti-malta/malta-memory.c
++++ b/arch/mips/mti-malta/malta-memory.c
+@@ -53,6 +53,12 @@ fw_memblock_t * __init fw_getmdesc(int e
+ pr_warn("memsize not set in YAMON, set to default (32Mb)\n");
+ physical_memsize = 0x02000000;
+ } else {
++ if (memsize > (256 << 20)) { /* memsize should be capped to 256M */
++ pr_warn("Unsupported memsize value (0x%lx) detected! "
++ "Using 0x10000000 (256M) instead\n",
++ memsize);
++ memsize = 256 << 20;
++ }
+ /* If ememsize is set, then set physical_memsize to that */
+ physical_memsize = ememsize ? : memsize;
+ }
--- /dev/null
+From 6eae35485b26f9e51ab896eb8a936bed9908fdf6 Mon Sep 17 00:00:00 2001
+From: Markos Chandras <markos.chandras@imgtec.com>
+Date: Mon, 9 Mar 2015 14:54:52 +0000
+Subject: MIPS: unaligned: Fix regular load/store instruction emulation for EVA
+
+From: Markos Chandras <markos.chandras@imgtec.com>
+
+commit 6eae35485b26f9e51ab896eb8a936bed9908fdf6 upstream.
+
+When emulating a regular lh/lw/lhu/sh/sw we need to use the appropriate
+instruction if we are in EVA mode. This is necessary for userspace
+applications which trigger alignment exceptions. In such case, the
+userspace load/store instruction needs to be emulated with the correct
+eva/non-eva instruction by the kernel emulator.
+
+Signed-off-by: Markos Chandras <markos.chandras@imgtec.com>
+Fixes: c1771216ab48 ("MIPS: kernel: unaligned: Handle unaligned accesses for EVA")
+Cc: linux-mips@linux-mips.org
+Patchwork: https://patchwork.linux-mips.org/patch/9503/
+Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/mips/kernel/unaligned.c | 52 ++++++++++++++++++++++++++++++++++++++-----
+ 1 file changed, 47 insertions(+), 5 deletions(-)
+
+--- a/arch/mips/kernel/unaligned.c
++++ b/arch/mips/kernel/unaligned.c
+@@ -564,7 +564,15 @@ static void emulate_load_store_insn(stru
+ if (!access_ok(VERIFY_READ, addr, 2))
+ goto sigbus;
+
+- LoadHW(addr, value, res);
++ if (config_enabled(CONFIG_EVA)) {
++ if (segment_eq(get_fs(), get_ds()))
++ LoadHW(addr, value, res);
++ else
++ LoadHWE(addr, value, res);
++ } else {
++ LoadHW(addr, value, res);
++ }
++
+ if (res)
+ goto fault;
+ compute_return_epc(regs);
+@@ -575,7 +583,15 @@ static void emulate_load_store_insn(stru
+ if (!access_ok(VERIFY_READ, addr, 4))
+ goto sigbus;
+
+- LoadW(addr, value, res);
++ if (config_enabled(CONFIG_EVA)) {
++ if (segment_eq(get_fs(), get_ds()))
++ LoadW(addr, value, res);
++ else
++ LoadWE(addr, value, res);
++ } else {
++ LoadW(addr, value, res);
++ }
++
+ if (res)
+ goto fault;
+ compute_return_epc(regs);
+@@ -586,7 +602,15 @@ static void emulate_load_store_insn(stru
+ if (!access_ok(VERIFY_READ, addr, 2))
+ goto sigbus;
+
+- LoadHWU(addr, value, res);
++ if (config_enabled(CONFIG_EVA)) {
++ if (segment_eq(get_fs(), get_ds()))
++ LoadHWU(addr, value, res);
++ else
++ LoadHWUE(addr, value, res);
++ } else {
++ LoadHWU(addr, value, res);
++ }
++
+ if (res)
+ goto fault;
+ compute_return_epc(regs);
+@@ -645,7 +669,16 @@ static void emulate_load_store_insn(stru
+
+ compute_return_epc(regs);
+ value = regs->regs[insn.i_format.rt];
+- StoreHW(addr, value, res);
++
++ if (config_enabled(CONFIG_EVA)) {
++ if (segment_eq(get_fs(), get_ds()))
++ StoreHW(addr, value, res);
++ else
++ StoreHWE(addr, value, res);
++ } else {
++ StoreHW(addr, value, res);
++ }
++
+ if (res)
+ goto fault;
+ break;
+@@ -656,7 +689,16 @@ static void emulate_load_store_insn(stru
+
+ compute_return_epc(regs);
+ value = regs->regs[insn.i_format.rt];
+- StoreW(addr, value, res);
++
++ if (config_enabled(CONFIG_EVA)) {
++ if (segment_eq(get_fs(), get_ds()))
++ StoreW(addr, value, res);
++ else
++ StoreWE(addr, value, res);
++ } else {
++ StoreW(addr, value, res);
++ }
++
+ if (res)
+ goto fault;
+ break;
--- /dev/null
+From d74419495633493c9cd3f2bbeb7f3529d0edded6 Mon Sep 17 00:00:00 2001
+From: Heiko Carstens <heiko.carstens@de.ibm.com>
+Date: Wed, 25 Mar 2015 10:13:33 +0100
+Subject: s390/hibernate: fix save and restore of kernel text section
+
+From: Heiko Carstens <heiko.carstens@de.ibm.com>
+
+commit d74419495633493c9cd3f2bbeb7f3529d0edded6 upstream.
+
+Sebastian reported a crash caused by a jump label mismatch after resume.
+This happens because we do not save the kernel text section during suspend
+and therefore also do not restore it during resume, but use the kernel image
+that restores the old system.
+
+This means that after a suspend/resume cycle we lost all modifications done
+to the kernel text section.
+The reason for this is the pfn_is_nosave() function, which incorrectly
+returns that read-only pages don't need to be saved. This is incorrect since
+we mark the kernel text section read-only.
+We still need to make sure to not save and restore pages contained within
+NSS and DCSS segment.
+To fix this add an extra case for the kernel text section and only save
+those pages if they are not contained within an NSS segment.
+
+Fixes the following crash (and the above bugs as well):
+
+Jump label code mismatch at netif_receive_skb_internal+0x28/0xd0
+Found: c0 04 00 00 00 00
+Expected: c0 f4 00 00 00 11
+New: c0 04 00 00 00 00
+Kernel panic - not syncing: Corrupted kernel text
+CPU: 0 PID: 9 Comm: migration/0 Not tainted 3.19.0-01975-gb1b096e70f23 #4
+Call Trace:
+ [<0000000000113972>] show_stack+0x72/0xf0
+ [<000000000081f15e>] dump_stack+0x6e/0x90
+ [<000000000081c4e8>] panic+0x108/0x2b0
+ [<000000000081be64>] jump_label_bug.isra.2+0x104/0x108
+ [<0000000000112176>] __jump_label_transform+0x9e/0xd0
+ [<00000000001121e6>] __sm_arch_jump_label_transform+0x3e/0x50
+ [<00000000001d1136>] multi_cpu_stop+0x12e/0x170
+ [<00000000001d1472>] cpu_stopper_thread+0xb2/0x168
+ [<000000000015d2ac>] smpboot_thread_fn+0x134/0x1b0
+ [<0000000000158baa>] kthread+0x10a/0x110
+ [<0000000000824a86>] kernel_thread_starter+0x6/0xc
+
+Reported-and-tested-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
+Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
+Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/s390/kernel/suspend.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/arch/s390/kernel/suspend.c
++++ b/arch/s390/kernel/suspend.c
+@@ -138,6 +138,8 @@ int pfn_is_nosave(unsigned long pfn)
+ {
+ unsigned long nosave_begin_pfn = PFN_DOWN(__pa(&__nosave_begin));
+ unsigned long nosave_end_pfn = PFN_DOWN(__pa(&__nosave_end));
++ unsigned long eshared_pfn = PFN_DOWN(__pa(&_eshared)) - 1;
++ unsigned long stext_pfn = PFN_DOWN(__pa(&_stext));
+
+ /* Always save lowcore pages (LC protection might be enabled). */
+ if (pfn <= LC_PAGES)
+@@ -145,6 +147,8 @@ int pfn_is_nosave(unsigned long pfn)
+ if (pfn >= nosave_begin_pfn && pfn < nosave_end_pfn)
+ return 1;
+ /* Skip memory holes and read-only pages (NSS, DCSS, ...). */
++ if (pfn >= stext_pfn && pfn <= eshared_pfn)
++ return ipl_info.type == IPL_TYPE_NSS ? 1 : 0;
+ if (tprot(PFN_PHYS(pfn)))
+ return 1;
+ return 0;
btrfs-fix-inode-eviction-infinite-loop-after-cloning-into-it.patch
btrfs-fix-inode-eviction-infinite-loop-after-extent_same-ioctl.patch
usb-gadget-printer-enqueue-printer-s-response-for-setup-request.patch
+kvm-s390-fix-handling-of-write-errors-in-the-tpi-handler.patch
+kvm-s390-reinjection-of-irqs-can-fail-in-the-tpi-handler.patch
+kvm-s390-zero-out-current-vmdb-of-stsi-before-including-level3-data.patch
+kvm-s390-no-need-to-hold-the-kvm-mutex-for-floating-interrupts.patch
+kvm-s390-fix-get_all_floating_irqs.patch
+s390-hibernate-fix-save-and-restore-of-kernel-text-section.patch
+kvm-use-slowpath-for-cross-page-cached-accesses.patch
+kvm-arm-arm64-check-irq-number-on-userland-injection.patch
+kvm-arm-arm64-vgic-vgic_init-returns-enodev-when-no-online-vcpu.patch
+arm-kvm-fix-size-check-in-__coherent_cache_guest_page.patch
+arm64-kvm-fix-stage-2-pgd-allocation-to-have-per-page-refcounting.patch
+arm64-kvm-do-not-use-pgd_index-to-index-stage-2-pgd.patch
+arm-arm64-kvm-keep-elrsr-aisr-in-sync-with-software-model.patch
+mips-kvm-handle-msa-disabled-exceptions-from-guest.patch
+mips-lose_fpu-disable-fpu-when-msa-enabled.patch
+mips-malta-detect-and-fix-bad-memsize-values.patch
+mips-asm-asm-eva-introduce-kernel-load-store-variants.patch
+mips-unaligned-fix-regular-load-store-instruction-emulation-for-eva.patch
+mips-loongson-3-add-irqf_no_suspend-to-cascade-irqaction.patch
+mips-hibernate-flush-tlb-entries-earlier.patch