From: Greg Kroah-Hartman Date: Sat, 2 May 2015 12:25:31 +0000 (+0200) Subject: 3.19-stable patches X-Git-Tag: v3.10.77~53 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=c9c77b0b1ea2e29f9aabeab2dc4290ef0634ffae;p=thirdparty%2Fkernel%2Fstable-queue.git 3.19-stable patches added patches: arm-arm64-kvm-keep-elrsr-aisr-in-sync-with-software-model.patch arm-kvm-fix-size-check-in-__coherent_cache_guest_page.patch arm64-kvm-do-not-use-pgd_index-to-index-stage-2-pgd.patch arm64-kvm-fix-stage-2-pgd-allocation-to-have-per-page-refcounting.patch kvm-arm-arm64-check-irq-number-on-userland-injection.patch kvm-arm-arm64-vgic-vgic_init-returns-enodev-when-no-online-vcpu.patch kvm-s390-fix-get_all_floating_irqs.patch kvm-s390-fix-handling-of-write-errors-in-the-tpi-handler.patch kvm-s390-no-need-to-hold-the-kvm-mutex-for-floating-interrupts.patch kvm-s390-reinjection-of-irqs-can-fail-in-the-tpi-handler.patch kvm-s390-zero-out-current-vmdb-of-stsi-before-including-level3-data.patch kvm-use-slowpath-for-cross-page-cached-accesses.patch mips-asm-asm-eva-introduce-kernel-load-store-variants.patch mips-hibernate-flush-tlb-entries-earlier.patch mips-kvm-handle-msa-disabled-exceptions-from-guest.patch mips-loongson-3-add-irqf_no_suspend-to-cascade-irqaction.patch mips-lose_fpu-disable-fpu-when-msa-enabled.patch mips-malta-detect-and-fix-bad-memsize-values.patch mips-unaligned-fix-regular-load-store-instruction-emulation-for-eva.patch s390-hibernate-fix-save-and-restore-of-kernel-text-section.patch --- diff --git a/queue-3.19/arm-arm64-kvm-keep-elrsr-aisr-in-sync-with-software-model.patch b/queue-3.19/arm-arm64-kvm-keep-elrsr-aisr-in-sync-with-software-model.patch new file mode 100644 index 00000000000..39c3732bd14 --- /dev/null +++ b/queue-3.19/arm-arm64-kvm-keep-elrsr-aisr-in-sync-with-software-model.patch @@ -0,0 +1,167 @@ +From ae705930fca6322600690df9dc1c7d0516145a93 Mon Sep 17 00:00:00 2001 +From: Christoffer Dall +Date: Fri, 13 Mar 2015 17:02:56 +0000 +Subject: arm/arm64: KVM: Keep elrsr/aisr in sync with software model +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Christoffer Dall + +commit ae705930fca6322600690df9dc1c7d0516145a93 upstream. + +There is an interesting bug in the vgic code, which manifests itself +when the KVM run loop has a signal pending or needs a vmid generation +rollover after having disabled interrupts but before actually switching +to the guest. + +In this case, we flush the vgic as usual, but we sync back the vgic +state and exit to userspace before entering the guest. The consequence +is that we will be syncing the list registers back to the software model +using the GICH_ELRSR and GICH_EISR from the last execution of the guest, +potentially overwriting a list register containing an interrupt. + +This showed up during migration testing where we would capture a state +where the VM has masked the arch timer but there were no interrupts, +resulting in a hung test. + +Cc: Marc Zyngier +Reported-by: Alex Bennee +Signed-off-by: Christoffer Dall +Signed-off-by: Alex Bennée +Acked-by: Marc Zyngier +Signed-off-by: Christoffer Dall +Signed-off-by: Shannon Zhao +Signed-off-by: Greg Kroah-Hartman +--- + include/kvm/arm_vgic.h | 1 + + virt/kvm/arm/vgic-v2.c | 8 ++++++++ + virt/kvm/arm/vgic-v3.c | 8 ++++++++ + virt/kvm/arm/vgic.c | 16 ++++++++++++++++ + 4 files changed, 33 insertions(+) + +--- a/include/kvm/arm_vgic.h ++++ b/include/kvm/arm_vgic.h +@@ -113,6 +113,7 @@ struct vgic_ops { + void (*sync_lr_elrsr)(struct kvm_vcpu *, int, struct vgic_lr); + u64 (*get_elrsr)(const struct kvm_vcpu *vcpu); + u64 (*get_eisr)(const struct kvm_vcpu *vcpu); ++ void (*clear_eisr)(struct kvm_vcpu *vcpu); + u32 (*get_interrupt_status)(const struct kvm_vcpu *vcpu); + void (*enable_underflow)(struct kvm_vcpu *vcpu); + void (*disable_underflow)(struct kvm_vcpu *vcpu); +--- a/virt/kvm/arm/vgic-v2.c ++++ b/virt/kvm/arm/vgic-v2.c +@@ -72,6 +72,8 @@ static void vgic_v2_sync_lr_elrsr(struct + { + if (!(lr_desc.state & LR_STATE_MASK)) + vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr |= (1ULL << lr); ++ else ++ vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr &= ~(1ULL << lr); + } + + static u64 vgic_v2_get_elrsr(const struct kvm_vcpu *vcpu) +@@ -84,6 +86,11 @@ static u64 vgic_v2_get_eisr(const struct + return vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr; + } + ++static void vgic_v2_clear_eisr(struct kvm_vcpu *vcpu) ++{ ++ vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr = 0; ++} ++ + static u32 vgic_v2_get_interrupt_status(const struct kvm_vcpu *vcpu) + { + u32 misr = vcpu->arch.vgic_cpu.vgic_v2.vgic_misr; +@@ -148,6 +155,7 @@ static const struct vgic_ops vgic_v2_ops + .sync_lr_elrsr = vgic_v2_sync_lr_elrsr, + .get_elrsr = vgic_v2_get_elrsr, + .get_eisr = vgic_v2_get_eisr, ++ .clear_eisr = vgic_v2_clear_eisr, + .get_interrupt_status = vgic_v2_get_interrupt_status, + .enable_underflow = vgic_v2_enable_underflow, + .disable_underflow = vgic_v2_disable_underflow, +--- a/virt/kvm/arm/vgic-v3.c ++++ b/virt/kvm/arm/vgic-v3.c +@@ -86,6 +86,8 @@ static void vgic_v3_sync_lr_elrsr(struct + { + if (!(lr_desc.state & LR_STATE_MASK)) + vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr |= (1U << lr); ++ else ++ vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr &= ~(1U << lr); + } + + static u64 vgic_v3_get_elrsr(const struct kvm_vcpu *vcpu) +@@ -98,6 +100,11 @@ static u64 vgic_v3_get_eisr(const struct + return vcpu->arch.vgic_cpu.vgic_v3.vgic_eisr; + } + ++static void vgic_v3_clear_eisr(struct kvm_vcpu *vcpu) ++{ ++ vcpu->arch.vgic_cpu.vgic_v3.vgic_eisr = 0; ++} ++ + static u32 vgic_v3_get_interrupt_status(const struct kvm_vcpu *vcpu) + { + u32 misr = vcpu->arch.vgic_cpu.vgic_v3.vgic_misr; +@@ -162,6 +169,7 @@ static const struct vgic_ops vgic_v3_ops + .sync_lr_elrsr = vgic_v3_sync_lr_elrsr, + .get_elrsr = vgic_v3_get_elrsr, + .get_eisr = vgic_v3_get_eisr, ++ .clear_eisr = vgic_v3_clear_eisr, + .get_interrupt_status = vgic_v3_get_interrupt_status, + .enable_underflow = vgic_v3_enable_underflow, + .disable_underflow = vgic_v3_disable_underflow, +--- a/virt/kvm/arm/vgic.c ++++ b/virt/kvm/arm/vgic.c +@@ -1219,6 +1219,11 @@ static inline u64 vgic_get_eisr(struct k + return vgic_ops->get_eisr(vcpu); + } + ++static inline void vgic_clear_eisr(struct kvm_vcpu *vcpu) ++{ ++ vgic_ops->clear_eisr(vcpu); ++} ++ + static inline u32 vgic_get_interrupt_status(struct kvm_vcpu *vcpu) + { + return vgic_ops->get_interrupt_status(vcpu); +@@ -1258,6 +1263,7 @@ static void vgic_retire_lr(int lr_nr, in + vgic_set_lr(vcpu, lr_nr, vlr); + clear_bit(lr_nr, vgic_cpu->lr_used); + vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY; ++ vgic_sync_lr_elrsr(vcpu, lr_nr, vlr); + } + + /* +@@ -1313,6 +1319,7 @@ static bool vgic_queue_irq(struct kvm_vc + BUG_ON(!test_bit(lr, vgic_cpu->lr_used)); + vlr.state |= LR_STATE_PENDING; + vgic_set_lr(vcpu, lr, vlr); ++ vgic_sync_lr_elrsr(vcpu, lr, vlr); + return true; + } + } +@@ -1334,6 +1341,7 @@ static bool vgic_queue_irq(struct kvm_vc + vlr.state |= LR_EOI_INT; + + vgic_set_lr(vcpu, lr, vlr); ++ vgic_sync_lr_elrsr(vcpu, lr, vlr); + + return true; + } +@@ -1502,6 +1510,14 @@ static bool vgic_process_maintenance(str + if (status & INT_STATUS_UNDERFLOW) + vgic_disable_underflow(vcpu); + ++ /* ++ * In the next iterations of the vcpu loop, if we sync the vgic state ++ * after flushing it, but before entering the guest (this happens for ++ * pending signals and vmid rollovers), then make sure we don't pick ++ * up any old maintenance interrupts here. ++ */ ++ vgic_clear_eisr(vcpu); ++ + return level_pending; + } + diff --git a/queue-3.19/arm-kvm-fix-size-check-in-__coherent_cache_guest_page.patch b/queue-3.19/arm-kvm-fix-size-check-in-__coherent_cache_guest_page.patch new file mode 100644 index 00000000000..87357abe640 --- /dev/null +++ b/queue-3.19/arm-kvm-fix-size-check-in-__coherent_cache_guest_page.patch @@ -0,0 +1,31 @@ +From a050dfb21cc22ac0c666d52531040c1bc48184cc Mon Sep 17 00:00:00 2001 +From: Jan Kiszka +Date: Sat, 7 Feb 2015 22:21:20 +0100 +Subject: ARM: KVM: Fix size check in __coherent_cache_guest_page + +From: Jan Kiszka + +commit a050dfb21cc22ac0c666d52531040c1bc48184cc upstream. + +The check is supposed to catch page-unaligned sizes, not the inverse. + +Signed-off-by: Jan Kiszka +Signed-off-by: Christoffer Dall +Signed-off-by: Shannon Zhao +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arm/include/asm/kvm_mmu.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/arm/include/asm/kvm_mmu.h ++++ b/arch/arm/include/asm/kvm_mmu.h +@@ -186,7 +186,7 @@ static inline void __coherent_cache_gues + + bool need_flush = !vcpu_has_cache_enabled(vcpu) || ipa_uncached; + +- VM_BUG_ON(size & PAGE_MASK); ++ VM_BUG_ON(size & ~PAGE_MASK); + + if (!need_flush && !icache_is_pipt()) + goto vipt_cache; diff --git a/queue-3.19/arm64-kvm-do-not-use-pgd_index-to-index-stage-2-pgd.patch b/queue-3.19/arm64-kvm-do-not-use-pgd_index-to-index-stage-2-pgd.patch new file mode 100644 index 00000000000..871ec28a275 --- /dev/null +++ b/queue-3.19/arm64-kvm-do-not-use-pgd_index-to-index-stage-2-pgd.patch @@ -0,0 +1,94 @@ +From 04b8dc85bf4a64517e3cf20e409eeaa503b15cc1 Mon Sep 17 00:00:00 2001 +From: Marc Zyngier +Date: Tue, 10 Mar 2015 19:07:00 +0000 +Subject: arm64: KVM: Do not use pgd_index to index stage-2 pgd + +From: Marc Zyngier + +commit 04b8dc85bf4a64517e3cf20e409eeaa503b15cc1 upstream. + +The kernel's pgd_index macro is designed to index a normal, page +sized array. KVM is a bit diffferent, as we can use concatenated +pages to have a bigger address space (for example 40bit IPA with +4kB pages gives us an 8kB PGD. + +In the above case, the use of pgd_index will always return an index +inside the first 4kB, which makes a guest that has memory above +0x8000000000 rather unhappy, as it spins forever in a page fault, +whist the host happilly corrupts the lower pgd. + +The obvious fix is to get our own kvm_pgd_index that does the right +thing(tm). + +Tested on X-Gene with a hacked kvmtool that put memory at a stupidly +high address. + +Reviewed-by: Christoffer Dall +Signed-off-by: Marc Zyngier +Signed-off-by: Christoffer Dall +Signed-off-by: Shannon Zhao +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm/include/asm/kvm_mmu.h | 3 ++- + arch/arm/kvm/mmu.c | 6 +++--- + arch/arm64/include/asm/kvm_mmu.h | 2 ++ + 3 files changed, 7 insertions(+), 4 deletions(-) + +--- a/arch/arm/include/asm/kvm_mmu.h ++++ b/arch/arm/include/asm/kvm_mmu.h +@@ -128,13 +128,14 @@ static inline void kvm_set_s2pmd_writabl + (__boundary - 1 < (end) - 1)? __boundary: (end); \ + }) + ++#define kvm_pgd_index(addr) pgd_index(addr) ++ + static inline bool kvm_page_empty(void *ptr) + { + struct page *ptr_page = virt_to_page(ptr); + return page_count(ptr_page) == 1; + } + +- + #define kvm_pte_table_empty(kvm, ptep) kvm_page_empty(ptep) + #define kvm_pmd_table_empty(kvm, pmdp) kvm_page_empty(pmdp) + #define kvm_pud_table_empty(kvm, pudp) (0) +--- a/arch/arm/kvm/mmu.c ++++ b/arch/arm/kvm/mmu.c +@@ -251,7 +251,7 @@ static void unmap_range(struct kvm *kvm, + phys_addr_t addr = start, end = start + size; + phys_addr_t next; + +- pgd = pgdp + pgd_index(addr); ++ pgd = pgdp + kvm_pgd_index(addr); + do { + next = kvm_pgd_addr_end(addr, end); + if (!pgd_none(*pgd)) +@@ -316,7 +316,7 @@ static void stage2_flush_memslot(struct + phys_addr_t next; + pgd_t *pgd; + +- pgd = kvm->arch.pgd + pgd_index(addr); ++ pgd = kvm->arch.pgd + kvm_pgd_index(addr); + do { + next = kvm_pgd_addr_end(addr, end); + stage2_flush_puds(kvm, pgd, addr, next); +@@ -791,7 +791,7 @@ static pud_t *stage2_get_pud(struct kvm + pgd_t *pgd; + pud_t *pud; + +- pgd = kvm->arch.pgd + pgd_index(addr); ++ pgd = kvm->arch.pgd + kvm_pgd_index(addr); + if (WARN_ON(pgd_none(*pgd))) { + if (!cache) + return NULL; +--- a/arch/arm64/include/asm/kvm_mmu.h ++++ b/arch/arm64/include/asm/kvm_mmu.h +@@ -137,6 +137,8 @@ static inline void kvm_set_s2pmd_writabl + #define PTRS_PER_S2_PGD (1 << PTRS_PER_S2_PGD_SHIFT) + #define S2_PGD_ORDER get_order(PTRS_PER_S2_PGD * sizeof(pgd_t)) + ++#define kvm_pgd_index(addr) (((addr) >> PGDIR_SHIFT) & (PTRS_PER_S2_PGD - 1)) ++ + /* + * If we are concatenating first level stage-2 page tables, we would have less + * than or equal to 16 pointers in the fake PGD, because that's what the diff --git a/queue-3.19/arm64-kvm-fix-stage-2-pgd-allocation-to-have-per-page-refcounting.patch b/queue-3.19/arm64-kvm-fix-stage-2-pgd-allocation-to-have-per-page-refcounting.patch new file mode 100644 index 00000000000..ecaf9e94000 --- /dev/null +++ b/queue-3.19/arm64-kvm-fix-stage-2-pgd-allocation-to-have-per-page-refcounting.patch @@ -0,0 +1,269 @@ +From a987370f8e7a1677ae385042644326d9cd145a20 Mon Sep 17 00:00:00 2001 +From: Marc Zyngier +Date: Tue, 10 Mar 2015 19:06:59 +0000 +Subject: arm64: KVM: Fix stage-2 PGD allocation to have per-page refcounting + +From: Marc Zyngier + +commit a987370f8e7a1677ae385042644326d9cd145a20 upstream. + +We're using __get_free_pages with to allocate the guest's stage-2 +PGD. The standard behaviour of this function is to return a set of +pages where only the head page has a valid refcount. + +This behaviour gets us into trouble when we're trying to increment +the refcount on a non-head page: + +page:ffff7c00cfb693c0 count:0 mapcount:0 mapping: (null) index:0x0 +flags: 0x4000000000000000() +page dumped because: VM_BUG_ON_PAGE((*({ __attribute__((unused)) typeof((&page->_count)->counter) __var = ( typeof((&page->_count)->counter)) 0; (volatile typeof((&page->_count)->counter) *)&((&page->_count)->counter); })) <= 0) +BUG: failure at include/linux/mm.h:548/get_page()! +Kernel panic - not syncing: BUG! +CPU: 1 PID: 1695 Comm: kvm-vcpu-0 Not tainted 4.0.0-rc1+ #3825 +Hardware name: APM X-Gene Mustang board (DT) +Call trace: +[] dump_backtrace+0x0/0x13c +[] show_stack+0x10/0x1c +[] dump_stack+0x74/0x94 +[] panic+0x100/0x240 +[] stage2_get_pmd+0x17c/0x2bc +[] kvm_handle_guest_abort+0x4b4/0x6b0 +[] handle_exit+0x58/0x180 +[] kvm_arch_vcpu_ioctl_run+0x114/0x45c +[] kvm_vcpu_ioctl+0x2e0/0x754 +[] do_vfs_ioctl+0x424/0x5c8 +[] SyS_ioctl+0x40/0x78 +CPU0: stopping + +A possible approach for this is to split the compound page using +split_page() at allocation time, and change the teardown path to +free one page at a time. It turns out that alloc_pages_exact() and +free_pages_exact() does exactly that. + +While we're at it, the PGD allocation code is reworked to reduce +duplication. + +This has been tested on an X-Gene platform with a 4kB/48bit-VA host +kernel, and kvmtool hacked to place memory in the second page of +the hardware PGD (PUD for the host kernel). Also regression-tested +on a Cubietruck (Cortex-A7). + + [ Reworked to use alloc_pages_exact() and free_pages_exact() and to + return pointers directly instead of by reference as arguments + - Christoffer ] + +Reported-by: Mark Rutland +Signed-off-by: Marc Zyngier +Signed-off-by: Christoffer Dall +Signed-off-by: Shannon Zhao +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arm/include/asm/kvm_mmu.h | 10 ++--- + arch/arm/kvm/mmu.c | 67 ++++++++++++++++++++++++++++----------- + arch/arm64/include/asm/kvm_mmu.h | 46 ++------------------------ + 3 files changed, 57 insertions(+), 66 deletions(-) + +--- a/arch/arm/include/asm/kvm_mmu.h ++++ b/arch/arm/include/asm/kvm_mmu.h +@@ -141,16 +141,14 @@ static inline bool kvm_page_empty(void * + + #define KVM_PREALLOC_LEVEL 0 + +-static inline int kvm_prealloc_hwpgd(struct kvm *kvm, pgd_t *pgd) ++static inline void *kvm_get_hwpgd(struct kvm *kvm) + { +- return 0; ++ return kvm->arch.pgd; + } + +-static inline void kvm_free_hwpgd(struct kvm *kvm) { } +- +-static inline void *kvm_get_hwpgd(struct kvm *kvm) ++static inline unsigned int kvm_get_hwpgd_size(void) + { +- return kvm->arch.pgd; ++ return PTRS_PER_S2_PGD * sizeof(pgd_t); + } + + struct kvm; +--- a/arch/arm/kvm/mmu.c ++++ b/arch/arm/kvm/mmu.c +@@ -593,6 +593,20 @@ int create_hyp_io_mappings(void *from, v + __phys_to_pfn(phys_addr), PAGE_HYP_DEVICE); + } + ++/* Free the HW pgd, one page at a time */ ++static void kvm_free_hwpgd(void *hwpgd) ++{ ++ free_pages_exact(hwpgd, kvm_get_hwpgd_size()); ++} ++ ++/* Allocate the HW PGD, making sure that each page gets its own refcount */ ++static void *kvm_alloc_hwpgd(void) ++{ ++ unsigned int size = kvm_get_hwpgd_size(); ++ ++ return alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO); ++} ++ + /** + * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation. + * @kvm: The KVM struct pointer for the VM. +@@ -606,15 +620,31 @@ int create_hyp_io_mappings(void *from, v + */ + int kvm_alloc_stage2_pgd(struct kvm *kvm) + { +- int ret; + pgd_t *pgd; ++ void *hwpgd; + + if (kvm->arch.pgd != NULL) { + kvm_err("kvm_arch already initialized?\n"); + return -EINVAL; + } + ++ hwpgd = kvm_alloc_hwpgd(); ++ if (!hwpgd) ++ return -ENOMEM; ++ ++ /* When the kernel uses more levels of page tables than the ++ * guest, we allocate a fake PGD and pre-populate it to point ++ * to the next-level page table, which will be the real ++ * initial page table pointed to by the VTTBR. ++ * ++ * When KVM_PREALLOC_LEVEL==2, we allocate a single page for ++ * the PMD and the kernel will use folded pud. ++ * When KVM_PREALLOC_LEVEL==1, we allocate 2 consecutive PUD ++ * pages. ++ */ + if (KVM_PREALLOC_LEVEL > 0) { ++ int i; ++ + /* + * Allocate fake pgd for the page table manipulation macros to + * work. This is not used by the hardware and we have no +@@ -622,30 +652,32 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm + */ + pgd = (pgd_t *)kmalloc(PTRS_PER_S2_PGD * sizeof(pgd_t), + GFP_KERNEL | __GFP_ZERO); ++ ++ if (!pgd) { ++ kvm_free_hwpgd(hwpgd); ++ return -ENOMEM; ++ } ++ ++ /* Plug the HW PGD into the fake one. */ ++ for (i = 0; i < PTRS_PER_S2_PGD; i++) { ++ if (KVM_PREALLOC_LEVEL == 1) ++ pgd_populate(NULL, pgd + i, ++ (pud_t *)hwpgd + i * PTRS_PER_PUD); ++ else if (KVM_PREALLOC_LEVEL == 2) ++ pud_populate(NULL, pud_offset(pgd, 0) + i, ++ (pmd_t *)hwpgd + i * PTRS_PER_PMD); ++ } + } else { + /* + * Allocate actual first-level Stage-2 page table used by the + * hardware for Stage-2 page table walks. + */ +- pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, S2_PGD_ORDER); ++ pgd = (pgd_t *)hwpgd; + } + +- if (!pgd) +- return -ENOMEM; +- +- ret = kvm_prealloc_hwpgd(kvm, pgd); +- if (ret) +- goto out_err; +- + kvm_clean_pgd(pgd); + kvm->arch.pgd = pgd; + return 0; +-out_err: +- if (KVM_PREALLOC_LEVEL > 0) +- kfree(pgd); +- else +- free_pages((unsigned long)pgd, S2_PGD_ORDER); +- return ret; + } + + /** +@@ -746,11 +778,10 @@ void kvm_free_stage2_pgd(struct kvm *kvm + return; + + unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE); +- kvm_free_hwpgd(kvm); ++ kvm_free_hwpgd(kvm_get_hwpgd(kvm)); + if (KVM_PREALLOC_LEVEL > 0) + kfree(kvm->arch.pgd); +- else +- free_pages((unsigned long)kvm->arch.pgd, S2_PGD_ORDER); ++ + kvm->arch.pgd = NULL; + } + +--- a/arch/arm64/include/asm/kvm_mmu.h ++++ b/arch/arm64/include/asm/kvm_mmu.h +@@ -150,43 +150,6 @@ static inline void kvm_set_s2pmd_writabl + #define KVM_PREALLOC_LEVEL (0) + #endif + +-/** +- * kvm_prealloc_hwpgd - allocate inital table for VTTBR +- * @kvm: The KVM struct pointer for the VM. +- * @pgd: The kernel pseudo pgd +- * +- * When the kernel uses more levels of page tables than the guest, we allocate +- * a fake PGD and pre-populate it to point to the next-level page table, which +- * will be the real initial page table pointed to by the VTTBR. +- * +- * When KVM_PREALLOC_LEVEL==2, we allocate a single page for the PMD and +- * the kernel will use folded pud. When KVM_PREALLOC_LEVEL==1, we +- * allocate 2 consecutive PUD pages. +- */ +-static inline int kvm_prealloc_hwpgd(struct kvm *kvm, pgd_t *pgd) +-{ +- unsigned int i; +- unsigned long hwpgd; +- +- if (KVM_PREALLOC_LEVEL == 0) +- return 0; +- +- hwpgd = __get_free_pages(GFP_KERNEL | __GFP_ZERO, PTRS_PER_S2_PGD_SHIFT); +- if (!hwpgd) +- return -ENOMEM; +- +- for (i = 0; i < PTRS_PER_S2_PGD; i++) { +- if (KVM_PREALLOC_LEVEL == 1) +- pgd_populate(NULL, pgd + i, +- (pud_t *)hwpgd + i * PTRS_PER_PUD); +- else if (KVM_PREALLOC_LEVEL == 2) +- pud_populate(NULL, pud_offset(pgd, 0) + i, +- (pmd_t *)hwpgd + i * PTRS_PER_PMD); +- } +- +- return 0; +-} +- + static inline void *kvm_get_hwpgd(struct kvm *kvm) + { + pgd_t *pgd = kvm->arch.pgd; +@@ -203,12 +166,11 @@ static inline void *kvm_get_hwpgd(struct + return pmd_offset(pud, 0); + } + +-static inline void kvm_free_hwpgd(struct kvm *kvm) ++static inline unsigned int kvm_get_hwpgd_size(void) + { +- if (KVM_PREALLOC_LEVEL > 0) { +- unsigned long hwpgd = (unsigned long)kvm_get_hwpgd(kvm); +- free_pages(hwpgd, PTRS_PER_S2_PGD_SHIFT); +- } ++ if (KVM_PREALLOC_LEVEL > 0) ++ return PTRS_PER_S2_PGD * PAGE_SIZE; ++ return PTRS_PER_S2_PGD * sizeof(pgd_t); + } + + static inline bool kvm_page_empty(void *ptr) diff --git a/queue-3.19/kvm-arm-arm64-check-irq-number-on-userland-injection.patch b/queue-3.19/kvm-arm-arm64-check-irq-number-on-userland-injection.patch new file mode 100644 index 00000000000..27d2089172c --- /dev/null +++ b/queue-3.19/kvm-arm-arm64-check-irq-number-on-userland-injection.patch @@ -0,0 +1,113 @@ +From fd1d0ddf2ae92fb3df42ed476939861806c5d785 Mon Sep 17 00:00:00 2001 +From: Andre Przywara +Date: Fri, 10 Apr 2015 16:17:59 +0100 +Subject: KVM: arm/arm64: check IRQ number on userland injection + +From: Andre Przywara + +commit fd1d0ddf2ae92fb3df42ed476939861806c5d785 upstream. + +When userland injects a SPI via the KVM_IRQ_LINE ioctl we currently +only check it against a fixed limit, which historically is set +to 127. With the new dynamic IRQ allocation the effective limit may +actually be smaller (64). +So when now a malicious or buggy userland injects a SPI in that +range, we spill over on our VGIC bitmaps and bytemaps memory. +I could trigger a host kernel NULL pointer dereference with current +mainline by injecting some bogus IRQ number from a hacked kvmtool: +----------------- +.... +DEBUG: kvm_vgic_inject_irq(kvm, cpu=0, irq=114, level=1) +DEBUG: vgic_update_irq_pending(kvm, cpu=0, irq=114, level=1) +DEBUG: IRQ #114 still in the game, writing to bytemap now... +Unable to handle kernel NULL pointer dereference at virtual address 00000000 +pgd = ffffffc07652e000 +[00000000] *pgd=00000000f658b003, *pud=00000000f658b003, *pmd=0000000000000000 +Internal error: Oops: 96000006 [#1] PREEMPT SMP +Modules linked in: +CPU: 1 PID: 1053 Comm: lkvm-msi-irqinj Not tainted 4.0.0-rc7+ #3027 +Hardware name: FVP Base (DT) +task: ffffffc0774e9680 ti: ffffffc0765a8000 task.ti: ffffffc0765a8000 +PC is at kvm_vgic_inject_irq+0x234/0x310 +LR is at kvm_vgic_inject_irq+0x30c/0x310 +pc : [] lr : [] pstate: 80000145 +..... + +So this patch fixes this by checking the SPI number against the +actual limit. Also we remove the former legacy hard limit of +127 in the ioctl code. + +Signed-off-by: Andre Przywara +Reviewed-by: Christoffer Dall +[maz: wrap KVM_ARM_IRQ_GIC_MAX with #ifndef __KERNEL__, +as suggested by Christopher Covington] +Signed-off-by: Marc Zyngier +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arm/include/uapi/asm/kvm.h | 8 +++++++- + arch/arm/kvm/arm.c | 3 +-- + arch/arm64/include/uapi/asm/kvm.h | 8 +++++++- + virt/kvm/arm/vgic.c | 3 +++ + 4 files changed, 18 insertions(+), 4 deletions(-) + +--- a/arch/arm/include/uapi/asm/kvm.h ++++ b/arch/arm/include/uapi/asm/kvm.h +@@ -193,8 +193,14 @@ struct kvm_arch_memory_slot { + #define KVM_ARM_IRQ_CPU_IRQ 0 + #define KVM_ARM_IRQ_CPU_FIQ 1 + +-/* Highest supported SPI, from VGIC_NR_IRQS */ ++/* ++ * This used to hold the highest supported SPI, but it is now obsolete ++ * and only here to provide source code level compatibility with older ++ * userland. The highest SPI number can be set via KVM_DEV_ARM_VGIC_GRP_NR_IRQS. ++ */ ++#ifndef __KERNEL__ + #define KVM_ARM_IRQ_GIC_MAX 127 ++#endif + + /* PSCI interface */ + #define KVM_PSCI_FN_BASE 0x95c1ba5e +--- a/arch/arm/kvm/arm.c ++++ b/arch/arm/kvm/arm.c +@@ -644,8 +644,7 @@ int kvm_vm_ioctl_irq_line(struct kvm *kv + if (!irqchip_in_kernel(kvm)) + return -ENXIO; + +- if (irq_num < VGIC_NR_PRIVATE_IRQS || +- irq_num > KVM_ARM_IRQ_GIC_MAX) ++ if (irq_num < VGIC_NR_PRIVATE_IRQS) + return -EINVAL; + + return kvm_vgic_inject_irq(kvm, 0, irq_num, level); +--- a/arch/arm64/include/uapi/asm/kvm.h ++++ b/arch/arm64/include/uapi/asm/kvm.h +@@ -179,8 +179,14 @@ struct kvm_arch_memory_slot { + #define KVM_ARM_IRQ_CPU_IRQ 0 + #define KVM_ARM_IRQ_CPU_FIQ 1 + +-/* Highest supported SPI, from VGIC_NR_IRQS */ ++/* ++ * This used to hold the highest supported SPI, but it is now obsolete ++ * and only here to provide source code level compatibility with older ++ * userland. The highest SPI number can be set via KVM_DEV_ARM_VGIC_GRP_NR_IRQS. ++ */ ++#ifndef __KERNEL__ + #define KVM_ARM_IRQ_GIC_MAX 127 ++#endif + + /* PSCI interface */ + #define KVM_PSCI_FN_BASE 0x95c1ba5e +--- a/virt/kvm/arm/vgic.c ++++ b/virt/kvm/arm/vgic.c +@@ -1706,6 +1706,9 @@ int kvm_vgic_inject_irq(struct kvm *kvm, + goto out; + } + ++ if (irq_num >= kvm->arch.vgic.nr_irqs) ++ return -EINVAL; ++ + vcpu_id = vgic_update_irq_pending(kvm, cpuid, irq_num, level); + if (vcpu_id >= 0) { + /* kick the specified vcpu */ diff --git a/queue-3.19/kvm-arm-arm64-vgic-vgic_init-returns-enodev-when-no-online-vcpu.patch b/queue-3.19/kvm-arm-arm64-vgic-vgic_init-returns-enodev-when-no-online-vcpu.patch new file mode 100644 index 00000000000..12ab3b350d9 --- /dev/null +++ b/queue-3.19/kvm-arm-arm64-vgic-vgic_init-returns-enodev-when-no-online-vcpu.patch @@ -0,0 +1,32 @@ +From 66b030e48af68fd4c22d343908bc057207a0a31e Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Mon, 15 Dec 2014 18:43:32 +0100 +Subject: KVM: arm/arm64: vgic: vgic_init returns -ENODEV when no online vcpu + +From: Eric Auger + +commit 66b030e48af68fd4c22d343908bc057207a0a31e upstream. + +To be more explicit on vgic initialization failure, -ENODEV is +returned by vgic_init when no online vcpus can be found at init. + +Signed-off-by: Eric Auger +Signed-off-by: Christoffer Dall +Signed-off-by: Shannon Zhao +Signed-off-by: Greg Kroah-Hartman + +--- + virt/kvm/arm/vgic.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/virt/kvm/arm/vgic.c ++++ b/virt/kvm/arm/vgic.c +@@ -1812,7 +1812,7 @@ static int vgic_init(struct kvm *kvm) + + nr_cpus = dist->nr_cpus = atomic_read(&kvm->online_vcpus); + if (!nr_cpus) /* No vcpus? Can't be good... */ +- return -EINVAL; ++ return -ENODEV; + + /* + * If nobody configured the number of interrupts, use the diff --git a/queue-3.19/kvm-s390-fix-get_all_floating_irqs.patch b/queue-3.19/kvm-s390-fix-get_all_floating_irqs.patch new file mode 100644 index 00000000000..b20318f9afb --- /dev/null +++ b/queue-3.19/kvm-s390-fix-get_all_floating_irqs.patch @@ -0,0 +1,150 @@ +From 94aa033efcac47b09db22cb561e135baf37b7887 Mon Sep 17 00:00:00 2001 +From: Jens Freimann +Date: Mon, 16 Mar 2015 12:17:13 +0100 +Subject: KVM: s390: fix get_all_floating_irqs + +From: Jens Freimann + +commit 94aa033efcac47b09db22cb561e135baf37b7887 upstream. + +This fixes a bug introduced with commit c05c4186bbe4 ("KVM: s390: +add floating irq controller"). + +get_all_floating_irqs() does copy_to_user() while holding +a spin lock. Let's fix this by filling a temporary buffer +first and copy it to userspace after giving up the lock. + +Reviewed-by: David Hildenbrand +Signed-off-by: Jens Freimann +Signed-off-by: Christian Borntraeger +Acked-by: Cornelia Huck +Signed-off-by: Greg Kroah-Hartman + +--- + Documentation/virtual/kvm/devices/s390_flic.txt | 3 + + arch/s390/kvm/interrupt.c | 58 +++++++++++++----------- + 2 files changed, 35 insertions(+), 26 deletions(-) + +--- a/Documentation/virtual/kvm/devices/s390_flic.txt ++++ b/Documentation/virtual/kvm/devices/s390_flic.txt +@@ -27,6 +27,9 @@ Groups: + Copies all floating interrupts into a buffer provided by userspace. + When the buffer is too small it returns -ENOMEM, which is the indication + for userspace to try again with a bigger buffer. ++ -ENOBUFS is returned when the allocation of a kernelspace buffer has ++ failed. ++ -EFAULT is returned when copying data to userspace failed. + All interrupts remain pending, i.e. are not deleted from the list of + currently pending interrupts. + attr->addr contains the userspace address of the buffer into which all +--- a/arch/s390/kvm/interrupt.c ++++ b/arch/s390/kvm/interrupt.c +@@ -17,6 +17,7 @@ + #include + #include + #include ++#include + #include + #include + #include "kvm-s390.h" +@@ -1386,61 +1387,66 @@ void kvm_s390_clear_float_irqs(struct kv + spin_unlock(&fi->lock); + } + +-static inline int copy_irq_to_user(struct kvm_s390_interrupt_info *inti, +- u8 *addr) ++static void inti_to_irq(struct kvm_s390_interrupt_info *inti, ++ struct kvm_s390_irq *irq) + { +- struct kvm_s390_irq __user *uptr = (struct kvm_s390_irq __user *) addr; +- struct kvm_s390_irq irq = {0}; +- +- irq.type = inti->type; ++ irq->type = inti->type; + switch (inti->type) { + case KVM_S390_INT_PFAULT_INIT: + case KVM_S390_INT_PFAULT_DONE: + case KVM_S390_INT_VIRTIO: + case KVM_S390_INT_SERVICE: +- irq.u.ext = inti->ext; ++ irq->u.ext = inti->ext; + break; + case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: +- irq.u.io = inti->io; ++ irq->u.io = inti->io; + break; + case KVM_S390_MCHK: +- irq.u.mchk = inti->mchk; ++ irq->u.mchk = inti->mchk; + break; +- default: +- return -EINVAL; + } +- +- if (copy_to_user(uptr, &irq, sizeof(irq))) +- return -EFAULT; +- +- return 0; + } + +-static int get_all_floating_irqs(struct kvm *kvm, __u8 *buf, __u64 len) ++static int get_all_floating_irqs(struct kvm *kvm, u8 __user *usrbuf, u64 len) + { + struct kvm_s390_interrupt_info *inti; + struct kvm_s390_float_interrupt *fi; ++ struct kvm_s390_irq *buf; ++ int max_irqs; + int ret = 0; + int n = 0; + ++ if (len > KVM_S390_FLIC_MAX_BUFFER || len == 0) ++ return -EINVAL; ++ ++ /* ++ * We are already using -ENOMEM to signal ++ * userspace it may retry with a bigger buffer, ++ * so we need to use something else for this case ++ */ ++ buf = vzalloc(len); ++ if (!buf) ++ return -ENOBUFS; ++ ++ max_irqs = len / sizeof(struct kvm_s390_irq); ++ + fi = &kvm->arch.float_int; + spin_lock(&fi->lock); +- + list_for_each_entry(inti, &fi->list, list) { +- if (len < sizeof(struct kvm_s390_irq)) { ++ if (n == max_irqs) { + /* signal userspace to try again */ + ret = -ENOMEM; + break; + } +- ret = copy_irq_to_user(inti, buf); +- if (ret) +- break; +- buf += sizeof(struct kvm_s390_irq); +- len -= sizeof(struct kvm_s390_irq); ++ inti_to_irq(inti, &buf[n]); + n++; + } +- + spin_unlock(&fi->lock); ++ if (!ret && n > 0) { ++ if (copy_to_user(usrbuf, buf, sizeof(struct kvm_s390_irq) * n)) ++ ret = -EFAULT; ++ } ++ vfree(buf); + + return ret < 0 ? ret : n; + } +@@ -1451,7 +1457,7 @@ static int flic_get_attr(struct kvm_devi + + switch (attr->group) { + case KVM_DEV_FLIC_GET_ALL_IRQS: +- r = get_all_floating_irqs(dev->kvm, (u8 *) attr->addr, ++ r = get_all_floating_irqs(dev->kvm, (u8 __user *) attr->addr, + attr->attr); + break; + default: diff --git a/queue-3.19/kvm-s390-fix-handling-of-write-errors-in-the-tpi-handler.patch b/queue-3.19/kvm-s390-fix-handling-of-write-errors-in-the-tpi-handler.patch new file mode 100644 index 00000000000..e874e925fd3 --- /dev/null +++ b/queue-3.19/kvm-s390-fix-handling-of-write-errors-in-the-tpi-handler.patch @@ -0,0 +1,102 @@ +From 261520dcfcba93ca5dfe671b88ffab038cd940c8 Mon Sep 17 00:00:00 2001 +From: David Hildenbrand +Date: Wed, 4 Feb 2015 15:53:42 +0100 +Subject: KVM: s390: fix handling of write errors in the tpi handler + +From: David Hildenbrand + +commit 261520dcfcba93ca5dfe671b88ffab038cd940c8 upstream. + +If the I/O interrupt could not be written to the guest provided +area (e.g. access exception), a program exception was injected into the +guest but "inti" wasn't freed, therefore resulting in a memory leak. + +In addition, the I/O interrupt wasn't reinjected. Therefore the dequeued +interrupt is lost. + +This patch fixes the problem while cleaning up the function and making the +cc and rc logic easier to handle. + +Signed-off-by: David Hildenbrand +Signed-off-by: Christian Borntraeger +Signed-off-by: Greg Kroah-Hartman + +--- + arch/s390/kvm/priv.c | 40 +++++++++++++++++++++++----------------- + 1 file changed, 23 insertions(+), 17 deletions(-) + +--- a/arch/s390/kvm/priv.c ++++ b/arch/s390/kvm/priv.c +@@ -229,18 +229,19 @@ static int handle_tpi(struct kvm_vcpu *v + struct kvm_s390_interrupt_info *inti; + unsigned long len; + u32 tpi_data[3]; +- int cc, rc; ++ int rc; + u64 addr; + +- rc = 0; + addr = kvm_s390_get_base_disp_s(vcpu); + if (addr & 3) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); +- cc = 0; ++ + inti = kvm_s390_get_io_int(vcpu->kvm, vcpu->arch.sie_block->gcr[6], 0); +- if (!inti) +- goto no_interrupt; +- cc = 1; ++ if (!inti) { ++ kvm_s390_set_psw_cc(vcpu, 0); ++ return 0; ++ } ++ + tpi_data[0] = inti->io.subchannel_id << 16 | inti->io.subchannel_nr; + tpi_data[1] = inti->io.io_int_parm; + tpi_data[2] = inti->io.io_int_word; +@@ -251,30 +252,35 @@ static int handle_tpi(struct kvm_vcpu *v + */ + len = sizeof(tpi_data) - 4; + rc = write_guest(vcpu, addr, &tpi_data, len); +- if (rc) +- return kvm_s390_inject_prog_cond(vcpu, rc); ++ if (rc) { ++ rc = kvm_s390_inject_prog_cond(vcpu, rc); ++ goto reinject_interrupt; ++ } + } else { + /* + * Store the three-word I/O interruption code into + * the appropriate lowcore area. + */ + len = sizeof(tpi_data); +- if (write_guest_lc(vcpu, __LC_SUBCHANNEL_ID, &tpi_data, len)) ++ if (write_guest_lc(vcpu, __LC_SUBCHANNEL_ID, &tpi_data, len)) { ++ /* failed writes to the low core are not recoverable */ + rc = -EFAULT; ++ goto reinject_interrupt; ++ } + } ++ ++ /* irq was successfully handed to the guest */ ++ kfree(inti); ++ kvm_s390_set_psw_cc(vcpu, 1); ++ return 0; ++reinject_interrupt: + /* + * If we encounter a problem storing the interruption code, the + * instruction is suppressed from the guest's view: reinject the + * interrupt. + */ +- if (!rc) +- kfree(inti); +- else +- kvm_s390_reinject_io_int(vcpu->kvm, inti); +-no_interrupt: +- /* Set condition code and we're done. */ +- if (!rc) +- kvm_s390_set_psw_cc(vcpu, cc); ++ kvm_s390_reinject_io_int(vcpu->kvm, inti); ++ /* don't set the cc, a pgm irq was injected or we drop to user space */ + return rc ? -EFAULT : 0; + } + diff --git a/queue-3.19/kvm-s390-no-need-to-hold-the-kvm-mutex-for-floating-interrupts.patch b/queue-3.19/kvm-s390-no-need-to-hold-the-kvm-mutex-for-floating-interrupts.patch new file mode 100644 index 00000000000..de5c894739f --- /dev/null +++ b/queue-3.19/kvm-s390-no-need-to-hold-the-kvm-mutex-for-floating-interrupts.patch @@ -0,0 +1,90 @@ +From 69a8d456263849152826542c7cb0a164b90e68a8 Mon Sep 17 00:00:00 2001 +From: Christian Borntraeger +Date: Wed, 17 Dec 2014 10:36:04 +0100 +Subject: KVM: s390: no need to hold the kvm->mutex for floating interrupts + +From: Christian Borntraeger + +commit 69a8d456263849152826542c7cb0a164b90e68a8 upstream. + +The kvm mutex was (probably) used to protect against cpu hotplug. +The current code no longer needs to protect against that, as we only +rely on CPU data structures that are guaranteed to be available +if we can access the CPU. (e.g. vcpu_create will put the cpu +in the array AFTER the cpu is ready). + +Signed-off-by: Christian Borntraeger +Acked-by: Cornelia Huck +Reviewed-by: Jens Freimann +Signed-off-by: Greg Kroah-Hartman + +--- + arch/s390/kvm/interrupt.c | 8 -------- + 1 file changed, 8 deletions(-) + +--- a/arch/s390/kvm/interrupt.c ++++ b/arch/s390/kvm/interrupt.c +@@ -1131,7 +1131,6 @@ struct kvm_s390_interrupt_info *kvm_s390 + + if ((!schid && !cr6) || (schid && cr6)) + return NULL; +- mutex_lock(&kvm->lock); + fi = &kvm->arch.float_int; + spin_lock(&fi->lock); + inti = NULL; +@@ -1159,7 +1158,6 @@ struct kvm_s390_interrupt_info *kvm_s390 + if (list_empty(&fi->list)) + atomic_set(&fi->active, 0); + spin_unlock(&fi->lock); +- mutex_unlock(&kvm->lock); + return inti; + } + +@@ -1172,7 +1170,6 @@ static int __inject_vm(struct kvm *kvm, + int sigcpu; + int rc = 0; + +- mutex_lock(&kvm->lock); + fi = &kvm->arch.float_int; + spin_lock(&fi->lock); + if (fi->irq_count >= KVM_S390_MAX_FLOAT_IRQS) { +@@ -1225,7 +1222,6 @@ static int __inject_vm(struct kvm *kvm, + kvm_s390_vcpu_wakeup(kvm_get_vcpu(kvm, sigcpu)); + unlock_fi: + spin_unlock(&fi->lock); +- mutex_unlock(&kvm->lock); + return rc; + } + +@@ -1379,7 +1375,6 @@ void kvm_s390_clear_float_irqs(struct kv + struct kvm_s390_float_interrupt *fi; + struct kvm_s390_interrupt_info *n, *inti = NULL; + +- mutex_lock(&kvm->lock); + fi = &kvm->arch.float_int; + spin_lock(&fi->lock); + list_for_each_entry_safe(inti, n, &fi->list, list) { +@@ -1389,7 +1384,6 @@ void kvm_s390_clear_float_irqs(struct kv + fi->irq_count = 0; + atomic_set(&fi->active, 0); + spin_unlock(&fi->lock); +- mutex_unlock(&kvm->lock); + } + + static inline int copy_irq_to_user(struct kvm_s390_interrupt_info *inti, +@@ -1429,7 +1423,6 @@ static int get_all_floating_irqs(struct + int ret = 0; + int n = 0; + +- mutex_lock(&kvm->lock); + fi = &kvm->arch.float_int; + spin_lock(&fi->lock); + +@@ -1448,7 +1441,6 @@ static int get_all_floating_irqs(struct + } + + spin_unlock(&fi->lock); +- mutex_unlock(&kvm->lock); + + return ret < 0 ? ret : n; + } diff --git a/queue-3.19/kvm-s390-reinjection-of-irqs-can-fail-in-the-tpi-handler.patch b/queue-3.19/kvm-s390-reinjection-of-irqs-can-fail-in-the-tpi-handler.patch new file mode 100644 index 00000000000..141eb531619 --- /dev/null +++ b/queue-3.19/kvm-s390-reinjection-of-irqs-can-fail-in-the-tpi-handler.patch @@ -0,0 +1,70 @@ +From 15462e37ca848abac7477dece65f8af25febd744 Mon Sep 17 00:00:00 2001 +From: David Hildenbrand +Date: Wed, 4 Feb 2015 15:59:11 +0100 +Subject: KVM: s390: reinjection of irqs can fail in the tpi handler + +From: David Hildenbrand + +commit 15462e37ca848abac7477dece65f8af25febd744 upstream. + +The reinjection of an I/O interrupt can fail if the list is at the limit +and between the dequeue and the reinjection, another I/O interrupt is +injected (e.g. if user space floods kvm with I/O interrupts). + +This patch avoids this memory leak and returns -EFAULT in this special +case. This error is not recoverable, so let's fail hard. This can later +be avoided by not dequeuing the interrupt but working directly on the +locked list. + +Signed-off-by: David Hildenbrand +Signed-off-by: Christian Borntraeger +Signed-off-by: Greg Kroah-Hartman + +--- + arch/s390/kvm/interrupt.c | 4 ++-- + arch/s390/kvm/kvm-s390.h | 4 ++-- + arch/s390/kvm/priv.c | 5 ++++- + 3 files changed, 8 insertions(+), 5 deletions(-) + +--- a/arch/s390/kvm/interrupt.c ++++ b/arch/s390/kvm/interrupt.c +@@ -1287,10 +1287,10 @@ int kvm_s390_inject_vm(struct kvm *kvm, + return rc; + } + +-void kvm_s390_reinject_io_int(struct kvm *kvm, ++int kvm_s390_reinject_io_int(struct kvm *kvm, + struct kvm_s390_interrupt_info *inti) + { +- __inject_vm(kvm, inti); ++ return __inject_vm(kvm, inti); + } + + int s390int_to_s390irq(struct kvm_s390_interrupt *s390int, +--- a/arch/s390/kvm/kvm-s390.h ++++ b/arch/s390/kvm/kvm-s390.h +@@ -146,8 +146,8 @@ int __must_check kvm_s390_inject_vcpu(st + int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code); + struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, + u64 cr6, u64 schid); +-void kvm_s390_reinject_io_int(struct kvm *kvm, +- struct kvm_s390_interrupt_info *inti); ++int kvm_s390_reinject_io_int(struct kvm *kvm, ++ struct kvm_s390_interrupt_info *inti); + int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked); + + /* implemented in intercept.c */ +--- a/arch/s390/kvm/priv.c ++++ b/arch/s390/kvm/priv.c +@@ -279,7 +279,10 @@ reinject_interrupt: + * instruction is suppressed from the guest's view: reinject the + * interrupt. + */ +- kvm_s390_reinject_io_int(vcpu->kvm, inti); ++ if (kvm_s390_reinject_io_int(vcpu->kvm, inti)) { ++ kfree(inti); ++ rc = -EFAULT; ++ } + /* don't set the cc, a pgm irq was injected or we drop to user space */ + return rc ? -EFAULT : 0; + } diff --git a/queue-3.19/kvm-s390-zero-out-current-vmdb-of-stsi-before-including-level3-data.patch b/queue-3.19/kvm-s390-zero-out-current-vmdb-of-stsi-before-including-level3-data.patch new file mode 100644 index 00000000000..55c5185d66f --- /dev/null +++ b/queue-3.19/kvm-s390-zero-out-current-vmdb-of-stsi-before-including-level3-data.patch @@ -0,0 +1,31 @@ +From b75f4c9afac2604feb971441116c07a24ecca1ec Mon Sep 17 00:00:00 2001 +From: Ekaterina Tumanova +Date: Tue, 3 Mar 2015 09:54:41 +0100 +Subject: KVM: s390: Zero out current VMDB of STSI before including level3 data. + +From: Ekaterina Tumanova + +commit b75f4c9afac2604feb971441116c07a24ecca1ec upstream. + +s390 documentation requires words 0 and 10-15 to be reserved and stored as +zeros. As we fill out all other fields, we can memset the full structure. + +Signed-off-by: Ekaterina Tumanova +Reviewed-by: David Hildenbrand +Signed-off-by: Christian Borntraeger +Signed-off-by: Greg Kroah-Hartman + +--- + arch/s390/kvm/priv.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/s390/kvm/priv.c ++++ b/arch/s390/kvm/priv.c +@@ -471,6 +471,7 @@ static void handle_stsi_3_2_2(struct kvm + for (n = mem->count - 1; n > 0 ; n--) + memcpy(&mem->vm[n], &mem->vm[n - 1], sizeof(mem->vm[0])); + ++ memset(&mem->vm[0], 0, sizeof(mem->vm[0])); + mem->vm[0].cpus_total = cpus; + mem->vm[0].cpus_configured = cpus; + mem->vm[0].cpus_standby = 0; diff --git a/queue-3.19/kvm-use-slowpath-for-cross-page-cached-accesses.patch b/queue-3.19/kvm-use-slowpath-for-cross-page-cached-accesses.patch new file mode 100644 index 00000000000..6e0af97375e --- /dev/null +++ b/queue-3.19/kvm-use-slowpath-for-cross-page-cached-accesses.patch @@ -0,0 +1,43 @@ +From ca3f0874723fad81d0c701b63ae3a17a408d5f25 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= +Date: Wed, 8 Apr 2015 14:16:48 +0200 +Subject: KVM: use slowpath for cross page cached accesses +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= + +commit ca3f0874723fad81d0c701b63ae3a17a408d5f25 upstream. + +kvm_write_guest_cached() does not mark all written pages as dirty and +code comments in kvm_gfn_to_hva_cache_init() talk about NULL memslot +with cross page accesses. Fix all the easy way. + +The check is '<= 1' to have the same result for 'len = 0' cache anywhere +in the page. (nr_pages_needed is 0 on page boundary.) + +Fixes: 8f964525a121 ("KVM: Allow cross page reads and writes from cached translations.") +Signed-off-by: Radim Krčmář +Message-Id: <20150408121648.GA3519@potion.brq.redhat.com> +Reviewed-by: Wanpeng Li +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman + +--- + virt/kvm/kvm_main.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/virt/kvm/kvm_main.c ++++ b/virt/kvm/kvm_main.c +@@ -1608,8 +1608,8 @@ int kvm_gfn_to_hva_cache_init(struct kvm + ghc->generation = slots->generation; + ghc->len = len; + ghc->memslot = gfn_to_memslot(kvm, start_gfn); +- ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn, &nr_pages_avail); +- if (!kvm_is_error_hva(ghc->hva) && nr_pages_avail >= nr_pages_needed) { ++ ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn, NULL); ++ if (!kvm_is_error_hva(ghc->hva) && nr_pages_needed <= 1) { + ghc->hva += offset; + } else { + /* diff --git a/queue-3.19/mips-asm-asm-eva-introduce-kernel-load-store-variants.patch b/queue-3.19/mips-asm-asm-eva-introduce-kernel-load-store-variants.patch new file mode 100644 index 00000000000..2feddac73b8 --- /dev/null +++ b/queue-3.19/mips-asm-asm-eva-introduce-kernel-load-store-variants.patch @@ -0,0 +1,195 @@ +From 60cd7e08e453bc6828ac4b539f949e4acd80f143 Mon Sep 17 00:00:00 2001 +From: Markos Chandras +Date: Mon, 9 Mar 2015 14:54:49 +0000 +Subject: MIPS: asm: asm-eva: Introduce kernel load/store variants + +From: Markos Chandras + +commit 60cd7e08e453bc6828ac4b539f949e4acd80f143 upstream. + +Introduce new macros for kernel load/store variants which will be +used to perform regular kernel space load/store operations in EVA +mode. + +Signed-off-by: Markos Chandras +Cc: linux-mips@linux-mips.org +Patchwork: https://patchwork.linux-mips.org/patch/9500/ +Signed-off-by: Ralf Baechle +Signed-off-by: Greg Kroah-Hartman + +--- + arch/mips/include/asm/asm-eva.h | 137 +++++++++++++++++++++++++++------------- + 1 file changed, 93 insertions(+), 44 deletions(-) + +--- a/arch/mips/include/asm/asm-eva.h ++++ b/arch/mips/include/asm/asm-eva.h +@@ -11,6 +11,36 @@ + #define __ASM_ASM_EVA_H + + #ifndef __ASSEMBLY__ ++ ++/* Kernel variants */ ++ ++#define kernel_cache(op, base) "cache " op ", " base "\n" ++#define kernel_ll(reg, addr) "ll " reg ", " addr "\n" ++#define kernel_sc(reg, addr) "sc " reg ", " addr "\n" ++#define kernel_lw(reg, addr) "lw " reg ", " addr "\n" ++#define kernel_lwl(reg, addr) "lwl " reg ", " addr "\n" ++#define kernel_lwr(reg, addr) "lwr " reg ", " addr "\n" ++#define kernel_lh(reg, addr) "lh " reg ", " addr "\n" ++#define kernel_lb(reg, addr) "lb " reg ", " addr "\n" ++#define kernel_lbu(reg, addr) "lbu " reg ", " addr "\n" ++#define kernel_sw(reg, addr) "sw " reg ", " addr "\n" ++#define kernel_swl(reg, addr) "swl " reg ", " addr "\n" ++#define kernel_swr(reg, addr) "swr " reg ", " addr "\n" ++#define kernel_sh(reg, addr) "sh " reg ", " addr "\n" ++#define kernel_sb(reg, addr) "sb " reg ", " addr "\n" ++ ++#ifdef CONFIG_32BIT ++/* ++ * No 'sd' or 'ld' instructions in 32-bit but the code will ++ * do the correct thing ++ */ ++#define kernel_sd(reg, addr) user_sw(reg, addr) ++#define kernel_ld(reg, addr) user_lw(reg, addr) ++#else ++#define kernel_sd(reg, addr) "sd " reg", " addr "\n" ++#define kernel_ld(reg, addr) "ld " reg", " addr "\n" ++#endif /* CONFIG_32BIT */ ++ + #ifdef CONFIG_EVA + + #define __BUILD_EVA_INSN(insn, reg, addr) \ +@@ -41,37 +71,60 @@ + + #else + +-#define user_cache(op, base) "cache " op ", " base "\n" +-#define user_ll(reg, addr) "ll " reg ", " addr "\n" +-#define user_sc(reg, addr) "sc " reg ", " addr "\n" +-#define user_lw(reg, addr) "lw " reg ", " addr "\n" +-#define user_lwl(reg, addr) "lwl " reg ", " addr "\n" +-#define user_lwr(reg, addr) "lwr " reg ", " addr "\n" +-#define user_lh(reg, addr) "lh " reg ", " addr "\n" +-#define user_lb(reg, addr) "lb " reg ", " addr "\n" +-#define user_lbu(reg, addr) "lbu " reg ", " addr "\n" +-#define user_sw(reg, addr) "sw " reg ", " addr "\n" +-#define user_swl(reg, addr) "swl " reg ", " addr "\n" +-#define user_swr(reg, addr) "swr " reg ", " addr "\n" +-#define user_sh(reg, addr) "sh " reg ", " addr "\n" +-#define user_sb(reg, addr) "sb " reg ", " addr "\n" ++#define user_cache(op, base) kernel_cache(op, base) ++#define user_ll(reg, addr) kernel_ll(reg, addr) ++#define user_sc(reg, addr) kernel_sc(reg, addr) ++#define user_lw(reg, addr) kernel_lw(reg, addr) ++#define user_lwl(reg, addr) kernel_lwl(reg, addr) ++#define user_lwr(reg, addr) kernel_lwr(reg, addr) ++#define user_lh(reg, addr) kernel_lh(reg, addr) ++#define user_lb(reg, addr) kernel_lb(reg, addr) ++#define user_lbu(reg, addr) kernel_lbu(reg, addr) ++#define user_sw(reg, addr) kernel_sw(reg, addr) ++#define user_swl(reg, addr) kernel_swl(reg, addr) ++#define user_swr(reg, addr) kernel_swr(reg, addr) ++#define user_sh(reg, addr) kernel_sh(reg, addr) ++#define user_sb(reg, addr) kernel_sb(reg, addr) + + #ifdef CONFIG_32BIT +-/* +- * No 'sd' or 'ld' instructions in 32-bit but the code will +- * do the correct thing +- */ +-#define user_sd(reg, addr) user_sw(reg, addr) +-#define user_ld(reg, addr) user_lw(reg, addr) ++#define user_sd(reg, addr) kernel_sw(reg, addr) ++#define user_ld(reg, addr) kernel_lw(reg, addr) + #else +-#define user_sd(reg, addr) "sd " reg", " addr "\n" +-#define user_ld(reg, addr) "ld " reg", " addr "\n" ++#define user_sd(reg, addr) kernel_sd(reg, addr) ++#define user_ld(reg, addr) kernel_ld(reg, addr) + #endif /* CONFIG_32BIT */ + + #endif /* CONFIG_EVA */ + + #else /* __ASSEMBLY__ */ + ++#define kernel_cache(op, base) cache op, base ++#define kernel_ll(reg, addr) ll reg, addr ++#define kernel_sc(reg, addr) sc reg, addr ++#define kernel_lw(reg, addr) lw reg, addr ++#define kernel_lwl(reg, addr) lwl reg, addr ++#define kernel_lwr(reg, addr) lwr reg, addr ++#define kernel_lh(reg, addr) lh reg, addr ++#define kernel_lb(reg, addr) lb reg, addr ++#define kernel_lbu(reg, addr) lbu reg, addr ++#define kernel_sw(reg, addr) sw reg, addr ++#define kernel_swl(reg, addr) swl reg, addr ++#define kernel_swr(reg, addr) swr reg, addr ++#define kernel_sh(reg, addr) sh reg, addr ++#define kernel_sb(reg, addr) sb reg, addr ++ ++#ifdef CONFIG_32BIT ++/* ++ * No 'sd' or 'ld' instructions in 32-bit but the code will ++ * do the correct thing ++ */ ++#define kernel_sd(reg, addr) user_sw(reg, addr) ++#define kernel_ld(reg, addr) user_lw(reg, addr) ++#else ++#define kernel_sd(reg, addr) sd reg, addr ++#define kernel_ld(reg, addr) ld reg, addr ++#endif /* CONFIG_32BIT */ ++ + #ifdef CONFIG_EVA + + #define __BUILD_EVA_INSN(insn, reg, addr) \ +@@ -101,31 +154,27 @@ + #define user_sd(reg, addr) user_sw(reg, addr) + #else + +-#define user_cache(op, base) cache op, base +-#define user_ll(reg, addr) ll reg, addr +-#define user_sc(reg, addr) sc reg, addr +-#define user_lw(reg, addr) lw reg, addr +-#define user_lwl(reg, addr) lwl reg, addr +-#define user_lwr(reg, addr) lwr reg, addr +-#define user_lh(reg, addr) lh reg, addr +-#define user_lb(reg, addr) lb reg, addr +-#define user_lbu(reg, addr) lbu reg, addr +-#define user_sw(reg, addr) sw reg, addr +-#define user_swl(reg, addr) swl reg, addr +-#define user_swr(reg, addr) swr reg, addr +-#define user_sh(reg, addr) sh reg, addr +-#define user_sb(reg, addr) sb reg, addr ++#define user_cache(op, base) kernel_cache(op, base) ++#define user_ll(reg, addr) kernel_ll(reg, addr) ++#define user_sc(reg, addr) kernel_sc(reg, addr) ++#define user_lw(reg, addr) kernel_lw(reg, addr) ++#define user_lwl(reg, addr) kernel_lwl(reg, addr) ++#define user_lwr(reg, addr) kernel_lwr(reg, addr) ++#define user_lh(reg, addr) kernel_lh(reg, addr) ++#define user_lb(reg, addr) kernel_lb(reg, addr) ++#define user_lbu(reg, addr) kernel_lbu(reg, addr) ++#define user_sw(reg, addr) kernel_sw(reg, addr) ++#define user_swl(reg, addr) kernel_swl(reg, addr) ++#define user_swr(reg, addr) kernel_swr(reg, addr) ++#define user_sh(reg, addr) kernel_sh(reg, addr) ++#define user_sb(reg, addr) kernel_sb(reg, addr) + + #ifdef CONFIG_32BIT +-/* +- * No 'sd' or 'ld' instructions in 32-bit but the code will +- * do the correct thing +- */ +-#define user_sd(reg, addr) user_sw(reg, addr) +-#define user_ld(reg, addr) user_lw(reg, addr) ++#define user_sd(reg, addr) kernel_sw(reg, addr) ++#define user_ld(reg, addr) kernel_lw(reg, addr) + #else +-#define user_sd(reg, addr) sd reg, addr +-#define user_ld(reg, addr) ld reg, addr ++#define user_sd(reg, addr) kernel_sd(reg, addr) ++#define user_ld(reg, addr) kernel_sd(reg, addr) + #endif /* CONFIG_32BIT */ + + #endif /* CONFIG_EVA */ diff --git a/queue-3.19/mips-hibernate-flush-tlb-entries-earlier.patch b/queue-3.19/mips-hibernate-flush-tlb-entries-earlier.patch new file mode 100644 index 00000000000..764694c63a0 --- /dev/null +++ b/queue-3.19/mips-hibernate-flush-tlb-entries-earlier.patch @@ -0,0 +1,45 @@ +From a843d00d038b11267279e3b5388222320f9ddc1d Mon Sep 17 00:00:00 2001 +From: Huacai Chen +Date: Sun, 29 Mar 2015 10:54:05 +0800 +Subject: MIPS: Hibernate: flush TLB entries earlier + +From: Huacai Chen + +commit a843d00d038b11267279e3b5388222320f9ddc1d upstream. + +We found that TLB mismatch not only happens after kernel resume, but +also happens during snapshot restore. So move it to the beginning of +swsusp_arch_suspend(). + +Signed-off-by: Huacai Chen +Cc: Steven J. Hill +Cc: linux-mips@linux-mips.org +Cc: Fuxin Zhang +Cc: Zhangjin Wu +Patchwork: https://patchwork.linux-mips.org/patch/9621/ +Signed-off-by: Ralf Baechle +Signed-off-by: Greg Kroah-Hartman + +--- + arch/mips/power/hibernate.S | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/arch/mips/power/hibernate.S ++++ b/arch/mips/power/hibernate.S +@@ -30,6 +30,8 @@ LEAF(swsusp_arch_suspend) + END(swsusp_arch_suspend) + + LEAF(swsusp_arch_resume) ++ /* Avoid TLB mismatch during and after kernel resume */ ++ jal local_flush_tlb_all + PTR_L t0, restore_pblist + 0: + PTR_L t1, PBE_ADDRESS(t0) /* source */ +@@ -43,7 +45,6 @@ LEAF(swsusp_arch_resume) + bne t1, t3, 1b + PTR_L t0, PBE_NEXT(t0) + bnez t0, 0b +- jal local_flush_tlb_all /* Avoid TLB mismatch after kernel resume */ + PTR_LA t0, saved_regs + PTR_L ra, PT_R31(t0) + PTR_L sp, PT_R29(t0) diff --git a/queue-3.19/mips-kvm-handle-msa-disabled-exceptions-from-guest.patch b/queue-3.19/mips-kvm-handle-msa-disabled-exceptions-from-guest.patch new file mode 100644 index 00000000000..04c5bd0e72c --- /dev/null +++ b/queue-3.19/mips-kvm-handle-msa-disabled-exceptions-from-guest.patch @@ -0,0 +1,129 @@ +From 98119ad53376885819d93dfb8737b6a9a61ca0ba Mon Sep 17 00:00:00 2001 +From: James Hogan +Date: Fri, 6 Feb 2015 11:11:56 +0000 +Subject: MIPS: KVM: Handle MSA Disabled exceptions from guest + +From: James Hogan + +commit 98119ad53376885819d93dfb8737b6a9a61ca0ba upstream. + +Guest user mode can generate a guest MSA Disabled exception on an MSA +capable core by simply trying to execute an MSA instruction. Since this +exception is unknown to KVM it will be passed on to the guest kernel. +However guest Linux kernels prior to v3.15 do not set up an exception +handler for the MSA Disabled exception as they don't support any MSA +capable cores. This results in a guest OS panic. + +Since an older processor ID may be being emulated, and MSA support is +not advertised to the guest, the correct behaviour is to generate a +Reserved Instruction exception in the guest kernel so it can send the +guest process an illegal instruction signal (SIGILL), as would happen +with a non-MSA-capable core. + +Fix this as minimally as reasonably possible by preventing +kvm_mips_check_privilege() from relaying MSA Disabled exceptions from +guest user mode to the guest kernel, and handling the MSA Disabled +exception by emulating a Reserved Instruction exception in the guest, +via a new handle_msa_disabled() KVM callback. + +Signed-off-by: James Hogan +Cc: Paolo Bonzini +Cc: Paul Burton +Cc: Ralf Baechle +Cc: Gleb Natapov +Cc: linux-mips@linux-mips.org +Cc: kvm@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman + +--- + arch/mips/include/asm/kvm_host.h | 2 ++ + arch/mips/kvm/emulate.c | 1 + + arch/mips/kvm/mips.c | 4 ++++ + arch/mips/kvm/trap_emul.c | 28 ++++++++++++++++++++++++++++ + 4 files changed, 35 insertions(+) + +--- a/arch/mips/include/asm/kvm_host.h ++++ b/arch/mips/include/asm/kvm_host.h +@@ -321,6 +321,7 @@ enum mips_mmu_types { + #define T_TRAP 13 /* Trap instruction */ + #define T_VCEI 14 /* Virtual coherency exception */ + #define T_FPE 15 /* Floating point exception */ ++#define T_MSADIS 21 /* MSA disabled exception */ + #define T_WATCH 23 /* Watch address reference */ + #define T_VCED 31 /* Virtual coherency data */ + +@@ -577,6 +578,7 @@ struct kvm_mips_callbacks { + int (*handle_syscall)(struct kvm_vcpu *vcpu); + int (*handle_res_inst)(struct kvm_vcpu *vcpu); + int (*handle_break)(struct kvm_vcpu *vcpu); ++ int (*handle_msa_disabled)(struct kvm_vcpu *vcpu); + int (*vm_init)(struct kvm *kvm); + int (*vcpu_init)(struct kvm_vcpu *vcpu); + int (*vcpu_setup)(struct kvm_vcpu *vcpu); +--- a/arch/mips/kvm/emulate.c ++++ b/arch/mips/kvm/emulate.c +@@ -2176,6 +2176,7 @@ enum emulation_result kvm_mips_check_pri + case T_SYSCALL: + case T_BREAK: + case T_RES_INST: ++ case T_MSADIS: + break; + + case T_COP_UNUSABLE: +--- a/arch/mips/kvm/mips.c ++++ b/arch/mips/kvm/mips.c +@@ -1119,6 +1119,10 @@ int kvm_mips_handle_exit(struct kvm_run + ret = kvm_mips_callbacks->handle_break(vcpu); + break; + ++ case T_MSADIS: ++ ret = kvm_mips_callbacks->handle_msa_disabled(vcpu); ++ break; ++ + default: + kvm_err("Exception Code: %d, not yet handled, @ PC: %p, inst: 0x%08x BadVaddr: %#lx Status: %#lx\n", + exccode, opc, kvm_get_inst(opc, vcpu), badvaddr, +--- a/arch/mips/kvm/trap_emul.c ++++ b/arch/mips/kvm/trap_emul.c +@@ -330,6 +330,33 @@ static int kvm_trap_emul_handle_break(st + return ret; + } + ++static int kvm_trap_emul_handle_msa_disabled(struct kvm_vcpu *vcpu) ++{ ++ struct kvm_run *run = vcpu->run; ++ uint32_t __user *opc = (uint32_t __user *) vcpu->arch.pc; ++ unsigned long cause = vcpu->arch.host_cp0_cause; ++ enum emulation_result er = EMULATE_DONE; ++ int ret = RESUME_GUEST; ++ ++ /* No MSA supported in guest, guest reserved instruction exception */ ++ er = kvm_mips_emulate_ri_exc(cause, opc, run, vcpu); ++ ++ switch (er) { ++ case EMULATE_DONE: ++ ret = RESUME_GUEST; ++ break; ++ ++ case EMULATE_FAIL: ++ run->exit_reason = KVM_EXIT_INTERNAL_ERROR; ++ ret = RESUME_HOST; ++ break; ++ ++ default: ++ BUG(); ++ } ++ return ret; ++} ++ + static int kvm_trap_emul_vm_init(struct kvm *kvm) + { + return 0; +@@ -470,6 +497,7 @@ static struct kvm_mips_callbacks kvm_tra + .handle_syscall = kvm_trap_emul_handle_syscall, + .handle_res_inst = kvm_trap_emul_handle_res_inst, + .handle_break = kvm_trap_emul_handle_break, ++ .handle_msa_disabled = kvm_trap_emul_handle_msa_disabled, + + .vm_init = kvm_trap_emul_vm_init, + .vcpu_init = kvm_trap_emul_vcpu_init, diff --git a/queue-3.19/mips-loongson-3-add-irqf_no_suspend-to-cascade-irqaction.patch b/queue-3.19/mips-loongson-3-add-irqf_no_suspend-to-cascade-irqaction.patch new file mode 100644 index 00000000000..c17e49b28f5 --- /dev/null +++ b/queue-3.19/mips-loongson-3-add-irqf_no_suspend-to-cascade-irqaction.patch @@ -0,0 +1,37 @@ +From 0add9c2f1cff9f3f1f2eb7e9babefa872a9d14b9 Mon Sep 17 00:00:00 2001 +From: Huacai Chen +Date: Thu, 12 Mar 2015 11:51:06 +0800 +Subject: MIPS: Loongson-3: Add IRQF_NO_SUSPEND to Cascade irqaction + +From: Huacai Chen + +commit 0add9c2f1cff9f3f1f2eb7e9babefa872a9d14b9 upstream. + +HPET irq is routed to i8259 and then to MIPS CPU irq (cascade). After +commit a3e6c1eff5 (MIPS: IRQ: Fix disable_irq on CPU IRQs), if without +IRQF_NO_SUSPEND in cascade_irqaction, HPET interrupts will lost during +suspend. The result is machine cannot be waken up. + +Signed-off-by: Huacai Chen +Cc: Steven J. Hill +Cc: linux-mips@linux-mips.org +Cc: Fuxin Zhang +Cc: Zhangjin Wu +Patchwork: https://patchwork.linux-mips.org/patch/9528/ +Signed-off-by: Ralf Baechle +Signed-off-by: Greg Kroah-Hartman + +--- + arch/mips/loongson/loongson-3/irq.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/mips/loongson/loongson-3/irq.c ++++ b/arch/mips/loongson/loongson-3/irq.c +@@ -44,6 +44,7 @@ void mach_irq_dispatch(unsigned int pend + + static struct irqaction cascade_irqaction = { + .handler = no_action, ++ .flags = IRQF_NO_SUSPEND, + .name = "cascade", + }; + diff --git a/queue-3.19/mips-lose_fpu-disable-fpu-when-msa-enabled.patch b/queue-3.19/mips-lose_fpu-disable-fpu-when-msa-enabled.patch new file mode 100644 index 00000000000..f51ccdc02a7 --- /dev/null +++ b/queue-3.19/mips-lose_fpu-disable-fpu-when-msa-enabled.patch @@ -0,0 +1,45 @@ +From acaf6a97d623af123314c2f8ce4cf7254f6b2fc1 Mon Sep 17 00:00:00 2001 +From: James Hogan +Date: Wed, 25 Feb 2015 13:08:05 +0000 +Subject: MIPS: lose_fpu(): Disable FPU when MSA enabled + +From: James Hogan + +commit acaf6a97d623af123314c2f8ce4cf7254f6b2fc1 upstream. + +The lose_fpu() function only disables the FPU in CP0_Status.CU1 if the +FPU is in use and MSA isn't enabled. + +This isn't necessarily a problem because KSTK_STATUS(current), the +version of CP0_Status stored on the kernel stack on entry from user +mode, does always get updated and gets restored when returning to user +mode, but I don't think it was intended, and it is inconsistent with the +case of only the FPU being in use. Sometimes leaving the FPU enabled may +also mask kernel bugs where FPU operations are executed when the FPU +might not be enabled. + +So lets disable the FPU in the MSA case too. + +Fixes: 33c771ba5c5d ("MIPS: save/disable MSA in lose_fpu") +Signed-off-by: James Hogan +Cc: Ralf Baechle +Cc: Paul Burton +Cc: linux-mips@linux-mips.org +Patchwork: https://patchwork.linux-mips.org/patch/9323/ +Signed-off-by: Ralf Baechle +Signed-off-by: Greg Kroah-Hartman + +--- + arch/mips/include/asm/fpu.h | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/mips/include/asm/fpu.h ++++ b/arch/mips/include/asm/fpu.h +@@ -169,6 +169,7 @@ static inline void lose_fpu(int save) + } + disable_msa(); + clear_thread_flag(TIF_USEDMSA); ++ __disable_fpu(); + } else if (is_fpu_owner()) { + if (save) + _save_fp(current); diff --git a/queue-3.19/mips-malta-detect-and-fix-bad-memsize-values.patch b/queue-3.19/mips-malta-detect-and-fix-bad-memsize-values.patch new file mode 100644 index 00000000000..bced60afbcd --- /dev/null +++ b/queue-3.19/mips-malta-detect-and-fix-bad-memsize-values.patch @@ -0,0 +1,43 @@ +From f7f8aea4b97c4d48e42f02cb37026bee445f239f Mon Sep 17 00:00:00 2001 +From: Markos Chandras +Date: Fri, 27 Feb 2015 07:51:32 +0000 +Subject: MIPS: Malta: Detect and fix bad memsize values + +From: Markos Chandras + +commit f7f8aea4b97c4d48e42f02cb37026bee445f239f upstream. + +memsize denotes the amount of RAM we can access from kseg{0,1} and +that should be up to 256M. In case the bootloader reports a value +higher than that (perhaps reporting all the available RAM) it's best +if we fix it ourselves and just warn the user about that. This is +usually a problem with the bootloader and/or its environment. + +[ralf@linux-mips.org: Remove useless parens as suggested bei Sergei. +Reformat long pr_warn statement to fit into 80 column limit.] + +Signed-off-by: Markos Chandras +Cc: linux-mips@linux-mips.org +Patchwork: https://patchwork.linux-mips.org/patch/9362/ +Signed-off-by: Ralf Baechle +Signed-off-by: Greg Kroah-Hartman + +--- + arch/mips/mti-malta/malta-memory.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/arch/mips/mti-malta/malta-memory.c ++++ b/arch/mips/mti-malta/malta-memory.c +@@ -53,6 +53,12 @@ fw_memblock_t * __init fw_getmdesc(int e + pr_warn("memsize not set in YAMON, set to default (32Mb)\n"); + physical_memsize = 0x02000000; + } else { ++ if (memsize > (256 << 20)) { /* memsize should be capped to 256M */ ++ pr_warn("Unsupported memsize value (0x%lx) detected! " ++ "Using 0x10000000 (256M) instead\n", ++ memsize); ++ memsize = 256 << 20; ++ } + /* If ememsize is set, then set physical_memsize to that */ + physical_memsize = ememsize ? : memsize; + } diff --git a/queue-3.19/mips-unaligned-fix-regular-load-store-instruction-emulation-for-eva.patch b/queue-3.19/mips-unaligned-fix-regular-load-store-instruction-emulation-for-eva.patch new file mode 100644 index 00000000000..78f2e99c5ae --- /dev/null +++ b/queue-3.19/mips-unaligned-fix-regular-load-store-instruction-emulation-for-eva.patch @@ -0,0 +1,115 @@ +From 6eae35485b26f9e51ab896eb8a936bed9908fdf6 Mon Sep 17 00:00:00 2001 +From: Markos Chandras +Date: Mon, 9 Mar 2015 14:54:52 +0000 +Subject: MIPS: unaligned: Fix regular load/store instruction emulation for EVA + +From: Markos Chandras + +commit 6eae35485b26f9e51ab896eb8a936bed9908fdf6 upstream. + +When emulating a regular lh/lw/lhu/sh/sw we need to use the appropriate +instruction if we are in EVA mode. This is necessary for userspace +applications which trigger alignment exceptions. In such case, the +userspace load/store instruction needs to be emulated with the correct +eva/non-eva instruction by the kernel emulator. + +Signed-off-by: Markos Chandras +Fixes: c1771216ab48 ("MIPS: kernel: unaligned: Handle unaligned accesses for EVA") +Cc: linux-mips@linux-mips.org +Patchwork: https://patchwork.linux-mips.org/patch/9503/ +Signed-off-by: Ralf Baechle +Signed-off-by: Greg Kroah-Hartman + +--- + arch/mips/kernel/unaligned.c | 52 ++++++++++++++++++++++++++++++++++++++----- + 1 file changed, 47 insertions(+), 5 deletions(-) + +--- a/arch/mips/kernel/unaligned.c ++++ b/arch/mips/kernel/unaligned.c +@@ -564,7 +564,15 @@ static void emulate_load_store_insn(stru + if (!access_ok(VERIFY_READ, addr, 2)) + goto sigbus; + +- LoadHW(addr, value, res); ++ if (config_enabled(CONFIG_EVA)) { ++ if (segment_eq(get_fs(), get_ds())) ++ LoadHW(addr, value, res); ++ else ++ LoadHWE(addr, value, res); ++ } else { ++ LoadHW(addr, value, res); ++ } ++ + if (res) + goto fault; + compute_return_epc(regs); +@@ -575,7 +583,15 @@ static void emulate_load_store_insn(stru + if (!access_ok(VERIFY_READ, addr, 4)) + goto sigbus; + +- LoadW(addr, value, res); ++ if (config_enabled(CONFIG_EVA)) { ++ if (segment_eq(get_fs(), get_ds())) ++ LoadW(addr, value, res); ++ else ++ LoadWE(addr, value, res); ++ } else { ++ LoadW(addr, value, res); ++ } ++ + if (res) + goto fault; + compute_return_epc(regs); +@@ -586,7 +602,15 @@ static void emulate_load_store_insn(stru + if (!access_ok(VERIFY_READ, addr, 2)) + goto sigbus; + +- LoadHWU(addr, value, res); ++ if (config_enabled(CONFIG_EVA)) { ++ if (segment_eq(get_fs(), get_ds())) ++ LoadHWU(addr, value, res); ++ else ++ LoadHWUE(addr, value, res); ++ } else { ++ LoadHWU(addr, value, res); ++ } ++ + if (res) + goto fault; + compute_return_epc(regs); +@@ -645,7 +669,16 @@ static void emulate_load_store_insn(stru + + compute_return_epc(regs); + value = regs->regs[insn.i_format.rt]; +- StoreHW(addr, value, res); ++ ++ if (config_enabled(CONFIG_EVA)) { ++ if (segment_eq(get_fs(), get_ds())) ++ StoreHW(addr, value, res); ++ else ++ StoreHWE(addr, value, res); ++ } else { ++ StoreHW(addr, value, res); ++ } ++ + if (res) + goto fault; + break; +@@ -656,7 +689,16 @@ static void emulate_load_store_insn(stru + + compute_return_epc(regs); + value = regs->regs[insn.i_format.rt]; +- StoreW(addr, value, res); ++ ++ if (config_enabled(CONFIG_EVA)) { ++ if (segment_eq(get_fs(), get_ds())) ++ StoreW(addr, value, res); ++ else ++ StoreWE(addr, value, res); ++ } else { ++ StoreW(addr, value, res); ++ } ++ + if (res) + goto fault; + break; diff --git a/queue-3.19/s390-hibernate-fix-save-and-restore-of-kernel-text-section.patch b/queue-3.19/s390-hibernate-fix-save-and-restore-of-kernel-text-section.patch new file mode 100644 index 00000000000..8fc8daacf9b --- /dev/null +++ b/queue-3.19/s390-hibernate-fix-save-and-restore-of-kernel-text-section.patch @@ -0,0 +1,74 @@ +From d74419495633493c9cd3f2bbeb7f3529d0edded6 Mon Sep 17 00:00:00 2001 +From: Heiko Carstens +Date: Wed, 25 Mar 2015 10:13:33 +0100 +Subject: s390/hibernate: fix save and restore of kernel text section + +From: Heiko Carstens + +commit d74419495633493c9cd3f2bbeb7f3529d0edded6 upstream. + +Sebastian reported a crash caused by a jump label mismatch after resume. +This happens because we do not save the kernel text section during suspend +and therefore also do not restore it during resume, but use the kernel image +that restores the old system. + +This means that after a suspend/resume cycle we lost all modifications done +to the kernel text section. +The reason for this is the pfn_is_nosave() function, which incorrectly +returns that read-only pages don't need to be saved. This is incorrect since +we mark the kernel text section read-only. +We still need to make sure to not save and restore pages contained within +NSS and DCSS segment. +To fix this add an extra case for the kernel text section and only save +those pages if they are not contained within an NSS segment. + +Fixes the following crash (and the above bugs as well): + +Jump label code mismatch at netif_receive_skb_internal+0x28/0xd0 +Found: c0 04 00 00 00 00 +Expected: c0 f4 00 00 00 11 +New: c0 04 00 00 00 00 +Kernel panic - not syncing: Corrupted kernel text +CPU: 0 PID: 9 Comm: migration/0 Not tainted 3.19.0-01975-gb1b096e70f23 #4 +Call Trace: + [<0000000000113972>] show_stack+0x72/0xf0 + [<000000000081f15e>] dump_stack+0x6e/0x90 + [<000000000081c4e8>] panic+0x108/0x2b0 + [<000000000081be64>] jump_label_bug.isra.2+0x104/0x108 + [<0000000000112176>] __jump_label_transform+0x9e/0xd0 + [<00000000001121e6>] __sm_arch_jump_label_transform+0x3e/0x50 + [<00000000001d1136>] multi_cpu_stop+0x12e/0x170 + [<00000000001d1472>] cpu_stopper_thread+0xb2/0x168 + [<000000000015d2ac>] smpboot_thread_fn+0x134/0x1b0 + [<0000000000158baa>] kthread+0x10a/0x110 + [<0000000000824a86>] kernel_thread_starter+0x6/0xc + +Reported-and-tested-by: Sebastian Ott +Signed-off-by: Heiko Carstens +Signed-off-by: Martin Schwidefsky +Signed-off-by: Greg Kroah-Hartman + +--- + arch/s390/kernel/suspend.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/arch/s390/kernel/suspend.c ++++ b/arch/s390/kernel/suspend.c +@@ -138,6 +138,8 @@ int pfn_is_nosave(unsigned long pfn) + { + unsigned long nosave_begin_pfn = PFN_DOWN(__pa(&__nosave_begin)); + unsigned long nosave_end_pfn = PFN_DOWN(__pa(&__nosave_end)); ++ unsigned long eshared_pfn = PFN_DOWN(__pa(&_eshared)) - 1; ++ unsigned long stext_pfn = PFN_DOWN(__pa(&_stext)); + + /* Always save lowcore pages (LC protection might be enabled). */ + if (pfn <= LC_PAGES) +@@ -145,6 +147,8 @@ int pfn_is_nosave(unsigned long pfn) + if (pfn >= nosave_begin_pfn && pfn < nosave_end_pfn) + return 1; + /* Skip memory holes and read-only pages (NSS, DCSS, ...). */ ++ if (pfn >= stext_pfn && pfn <= eshared_pfn) ++ return ipl_info.type == IPL_TYPE_NSS ? 1 : 0; + if (tprot(PFN_PHYS(pfn))) + return 1; + return 0; diff --git a/queue-3.19/series b/queue-3.19/series index 33aa3174078..c94b87e2cdc 100644 --- a/queue-3.19/series +++ b/queue-3.19/series @@ -18,3 +18,23 @@ btrfs-don-t-accept-bare-namespace-as-a-valid-xattr.patch btrfs-fix-inode-eviction-infinite-loop-after-cloning-into-it.patch btrfs-fix-inode-eviction-infinite-loop-after-extent_same-ioctl.patch usb-gadget-printer-enqueue-printer-s-response-for-setup-request.patch +kvm-s390-fix-handling-of-write-errors-in-the-tpi-handler.patch +kvm-s390-reinjection-of-irqs-can-fail-in-the-tpi-handler.patch +kvm-s390-zero-out-current-vmdb-of-stsi-before-including-level3-data.patch +kvm-s390-no-need-to-hold-the-kvm-mutex-for-floating-interrupts.patch +kvm-s390-fix-get_all_floating_irqs.patch +s390-hibernate-fix-save-and-restore-of-kernel-text-section.patch +kvm-use-slowpath-for-cross-page-cached-accesses.patch +kvm-arm-arm64-check-irq-number-on-userland-injection.patch +kvm-arm-arm64-vgic-vgic_init-returns-enodev-when-no-online-vcpu.patch +arm-kvm-fix-size-check-in-__coherent_cache_guest_page.patch +arm64-kvm-fix-stage-2-pgd-allocation-to-have-per-page-refcounting.patch +arm64-kvm-do-not-use-pgd_index-to-index-stage-2-pgd.patch +arm-arm64-kvm-keep-elrsr-aisr-in-sync-with-software-model.patch +mips-kvm-handle-msa-disabled-exceptions-from-guest.patch +mips-lose_fpu-disable-fpu-when-msa-enabled.patch +mips-malta-detect-and-fix-bad-memsize-values.patch +mips-asm-asm-eva-introduce-kernel-load-store-variants.patch +mips-unaligned-fix-regular-load-store-instruction-emulation-for-eva.patch +mips-loongson-3-add-irqf_no_suspend-to-cascade-irqaction.patch +mips-hibernate-flush-tlb-entries-earlier.patch