From 1ebf5b52ff8a93e369ca7bbe66668dcd6cc66ac3 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 29 Jan 2022 15:48:53 +0100 Subject: [PATCH] 5.16-stable patches added patches: arm64-extable-fix-load_unaligned_zeropad-reg-indices.patch block-add-bio_start_io_acct_time-to-control-start_time.patch dm-properly-fix-redundant-bio-based-io-accounting.patch dm-revert-partial-fix-for-redundant-bio-based-io-accounting.patch kvm-ppc-book3s-hv-nested-fix-nested-hfscr-being-clobbered-with-multiple-vcpus.patch kvm-x86-check-.flags-in-kvm_cpuid_check_equal-too.patch kvm-x86-forcibly-leave-nested-virt-when-smm-state-is-toggled.patch kvm-x86-free-kvm_cpuid_entry2-array-on-post-kvm_run-kvm_set_cpuid-2.patch kvm-x86-keep-msr_ia32_xss-unchanged-for-init.patch kvm-x86-move-cpuid.-eax-0x12-ecx-1-mangling-to-__kvm_update_cpuid_runtime.patch kvm-x86-sync-the-states-size-with-the-xcr0-ia32_xss-at-any-time.patch kvm-x86-update-vcpu-s-runtime-cpuid-on-write-to-msr_ia32_xss.patch security-lsm-dentry_init_security-handle-multi-lsm-registration.patch --- ...x-load_unaligned_zeropad-reg-indices.patch | 68 +++++++ ...t_io_acct_time-to-control-start_time.patch | 93 ++++++++++ ...ix-redundant-bio-based-io-accounting.patch | 56 ++++++ ...or-redundant-bio-based-io-accounting.patch | 53 ++++++ ...-being-clobbered-with-multiple-vcpus.patch | 80 ++++++++ ...-.flags-in-kvm_cpuid_check_equal-too.patch | 33 ++++ ...ested-virt-when-smm-state-is-toggled.patch | 173 ++++++++++++++++++ ...rray-on-post-kvm_run-kvm_set_cpuid-2.patch | 66 +++++++ ...keep-msr_ia32_xss-unchanged-for-init.patch | 42 +++++ ...ngling-to-__kvm_update_cpuid_runtime.patch | 107 +++++++++++ ...e-with-the-xcr0-ia32_xss-at-any-time.patch | 42 +++++ ...ntime-cpuid-on-write-to-msr_ia32_xss.patch | 34 ++++ ...curity-handle-multi-lsm-registration.patch | 158 ++++++++++++++++ queue-5.16/series | 13 ++ 14 files changed, 1018 insertions(+) create mode 100644 queue-5.16/arm64-extable-fix-load_unaligned_zeropad-reg-indices.patch create mode 100644 queue-5.16/block-add-bio_start_io_acct_time-to-control-start_time.patch create mode 100644 queue-5.16/dm-properly-fix-redundant-bio-based-io-accounting.patch create mode 100644 queue-5.16/dm-revert-partial-fix-for-redundant-bio-based-io-accounting.patch create mode 100644 queue-5.16/kvm-ppc-book3s-hv-nested-fix-nested-hfscr-being-clobbered-with-multiple-vcpus.patch create mode 100644 queue-5.16/kvm-x86-check-.flags-in-kvm_cpuid_check_equal-too.patch create mode 100644 queue-5.16/kvm-x86-forcibly-leave-nested-virt-when-smm-state-is-toggled.patch create mode 100644 queue-5.16/kvm-x86-free-kvm_cpuid_entry2-array-on-post-kvm_run-kvm_set_cpuid-2.patch create mode 100644 queue-5.16/kvm-x86-keep-msr_ia32_xss-unchanged-for-init.patch create mode 100644 queue-5.16/kvm-x86-move-cpuid.-eax-0x12-ecx-1-mangling-to-__kvm_update_cpuid_runtime.patch create mode 100644 queue-5.16/kvm-x86-sync-the-states-size-with-the-xcr0-ia32_xss-at-any-time.patch create mode 100644 queue-5.16/kvm-x86-update-vcpu-s-runtime-cpuid-on-write-to-msr_ia32_xss.patch create mode 100644 queue-5.16/security-lsm-dentry_init_security-handle-multi-lsm-registration.patch diff --git a/queue-5.16/arm64-extable-fix-load_unaligned_zeropad-reg-indices.patch b/queue-5.16/arm64-extable-fix-load_unaligned_zeropad-reg-indices.patch new file mode 100644 index 00000000000..d2de9fb1f20 --- /dev/null +++ b/queue-5.16/arm64-extable-fix-load_unaligned_zeropad-reg-indices.patch @@ -0,0 +1,68 @@ +From 3758a6c74e08bdc15ccccd6872a6ad37d165239a Mon Sep 17 00:00:00 2001 +From: Evgenii Stepanov +Date: Tue, 25 Jan 2022 10:22:17 -0800 +Subject: arm64: extable: fix load_unaligned_zeropad() reg indices + +From: Evgenii Stepanov + +commit 3758a6c74e08bdc15ccccd6872a6ad37d165239a upstream. + +In ex_handler_load_unaligned_zeropad() we erroneously extract the data and +addr register indices from ex->type rather than ex->data. As ex->type will +contain EX_TYPE_LOAD_UNALIGNED_ZEROPAD (i.e. 4): + * We'll always treat X0 as the address register, since EX_DATA_REG_ADDR is + extracted from bits [9:5]. Thus, we may attempt to dereference an + arbitrary address as X0 may hold an arbitrary value. + * We'll always treat X4 as the data register, since EX_DATA_REG_DATA is + extracted from bits [4:0]. Thus we will corrupt X4 and cause arbitrary + behaviour within load_unaligned_zeropad() and its caller. + +Fix this by extracting both values from ex->data as originally intended. + +On an MTE-enabled QEMU image we are hitting the following crash: + Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 + Call trace: + fixup_exception+0xc4/0x108 + __do_kernel_fault+0x3c/0x268 + do_tag_check_fault+0x3c/0x104 + do_mem_abort+0x44/0xf4 + el1_abort+0x40/0x64 + el1h_64_sync_handler+0x60/0xa0 + el1h_64_sync+0x7c/0x80 + link_path_walk+0x150/0x344 + path_openat+0xa0/0x7dc + do_filp_open+0xb8/0x168 + do_sys_openat2+0x88/0x17c + __arm64_sys_openat+0x74/0xa0 + invoke_syscall+0x48/0x148 + el0_svc_common+0xb8/0xf8 + do_el0_svc+0x28/0x88 + el0_svc+0x24/0x84 + el0t_64_sync_handler+0x88/0xec + el0t_64_sync+0x1b4/0x1b8 + Code: f8695a69 71007d1f 540000e0 927df12a (f940014a) + +Fixes: 753b32368705 ("arm64: extable: add load_unaligned_zeropad() handler") +Cc: # 5.16.x +Reviewed-by: Mark Rutland +Signed-off-by: Evgenii Stepanov +Link: https://lore.kernel.org/r/20220125182217.2605202-1-eugenis@google.com +Signed-off-by: Catalin Marinas +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/mm/extable.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/arch/arm64/mm/extable.c ++++ b/arch/arm64/mm/extable.c +@@ -43,8 +43,8 @@ static bool + ex_handler_load_unaligned_zeropad(const struct exception_table_entry *ex, + struct pt_regs *regs) + { +- int reg_data = FIELD_GET(EX_DATA_REG_DATA, ex->type); +- int reg_addr = FIELD_GET(EX_DATA_REG_ADDR, ex->type); ++ int reg_data = FIELD_GET(EX_DATA_REG_DATA, ex->data); ++ int reg_addr = FIELD_GET(EX_DATA_REG_ADDR, ex->data); + unsigned long data, addr, offset; + + addr = pt_regs_read_reg(regs, reg_addr); diff --git a/queue-5.16/block-add-bio_start_io_acct_time-to-control-start_time.patch b/queue-5.16/block-add-bio_start_io_acct_time-to-control-start_time.patch new file mode 100644 index 00000000000..738392bcf7b --- /dev/null +++ b/queue-5.16/block-add-bio_start_io_acct_time-to-control-start_time.patch @@ -0,0 +1,93 @@ +From e45c47d1f94e0cc7b6b079fdb4bcce2995e2adc4 Mon Sep 17 00:00:00 2001 +From: Mike Snitzer +Date: Fri, 28 Jan 2022 10:58:39 -0500 +Subject: block: add bio_start_io_acct_time() to control start_time + +From: Mike Snitzer + +commit e45c47d1f94e0cc7b6b079fdb4bcce2995e2adc4 upstream. + +bio_start_io_acct_time() interface is like bio_start_io_acct() that +allows start_time to be passed in. This gives drivers the ability to +defer starting accounting until after IO is issued (but possibily not +entirely due to bio splitting). + +Reviewed-by: Christoph Hellwig +Signed-off-by: Mike Snitzer +Link: https://lore.kernel.org/r/20220128155841.39644-2-snitzer@redhat.com +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + block/blk-core.c | 25 +++++++++++++++++++------ + include/linux/blkdev.h | 1 + + 2 files changed, 20 insertions(+), 6 deletions(-) + +--- a/block/blk-core.c ++++ b/block/blk-core.c +@@ -1258,22 +1258,34 @@ void __blk_account_io_start(struct reque + } + + static unsigned long __part_start_io_acct(struct block_device *part, +- unsigned int sectors, unsigned int op) ++ unsigned int sectors, unsigned int op, ++ unsigned long start_time) + { + const int sgrp = op_stat_group(op); +- unsigned long now = READ_ONCE(jiffies); + + part_stat_lock(); +- update_io_ticks(part, now, false); ++ update_io_ticks(part, start_time, false); + part_stat_inc(part, ios[sgrp]); + part_stat_add(part, sectors[sgrp], sectors); + part_stat_local_inc(part, in_flight[op_is_write(op)]); + part_stat_unlock(); + +- return now; ++ return start_time; + } + + /** ++ * bio_start_io_acct_time - start I/O accounting for bio based drivers ++ * @bio: bio to start account for ++ * @start_time: start time that should be passed back to bio_end_io_acct(). ++ */ ++void bio_start_io_acct_time(struct bio *bio, unsigned long start_time) ++{ ++ __part_start_io_acct(bio->bi_bdev, bio_sectors(bio), ++ bio_op(bio), start_time); ++} ++EXPORT_SYMBOL_GPL(bio_start_io_acct_time); ++ ++/** + * bio_start_io_acct - start I/O accounting for bio based drivers + * @bio: bio to start account for + * +@@ -1281,14 +1293,15 @@ static unsigned long __part_start_io_acc + */ + unsigned long bio_start_io_acct(struct bio *bio) + { +- return __part_start_io_acct(bio->bi_bdev, bio_sectors(bio), bio_op(bio)); ++ return __part_start_io_acct(bio->bi_bdev, bio_sectors(bio), ++ bio_op(bio), jiffies); + } + EXPORT_SYMBOL_GPL(bio_start_io_acct); + + unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors, + unsigned int op) + { +- return __part_start_io_acct(disk->part0, sectors, op); ++ return __part_start_io_acct(disk->part0, sectors, op, jiffies); + } + EXPORT_SYMBOL(disk_start_io_acct); + +--- a/include/linux/blkdev.h ++++ b/include/linux/blkdev.h +@@ -1254,6 +1254,7 @@ unsigned long disk_start_io_acct(struct + void disk_end_io_acct(struct gendisk *disk, unsigned int op, + unsigned long start_time); + ++void bio_start_io_acct_time(struct bio *bio, unsigned long start_time); + unsigned long bio_start_io_acct(struct bio *bio); + void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time, + struct block_device *orig_bdev); diff --git a/queue-5.16/dm-properly-fix-redundant-bio-based-io-accounting.patch b/queue-5.16/dm-properly-fix-redundant-bio-based-io-accounting.patch new file mode 100644 index 00000000000..dfb7d44aa6c --- /dev/null +++ b/queue-5.16/dm-properly-fix-redundant-bio-based-io-accounting.patch @@ -0,0 +1,56 @@ +From b879f915bc48a18d4f4462729192435bb0f17052 Mon Sep 17 00:00:00 2001 +From: Mike Snitzer +Date: Fri, 28 Jan 2022 10:58:41 -0500 +Subject: dm: properly fix redundant bio-based IO accounting + +From: Mike Snitzer + +commit b879f915bc48a18d4f4462729192435bb0f17052 upstream. + +Record the start_time for a bio but defer the starting block core's IO +accounting until after IO is submitted using bio_start_io_acct_time(). + +This approach avoids the need to mess around with any of the +individual IO stats in response to a bio_split() that follows bio +submission. + +Reported-by: Bud Brown +Reviewed-by: Christoph Hellwig +Cc: stable@vger.kernel.org +Depends-on: e45c47d1f94e ("block: add bio_start_io_acct_time() to control start_time") +Signed-off-by: Mike Snitzer +Link: https://lore.kernel.org/r/20220128155841.39644-4-snitzer@redhat.com +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + drivers/md/dm.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/drivers/md/dm.c ++++ b/drivers/md/dm.c +@@ -489,7 +489,7 @@ static void start_io_acct(struct dm_io * + struct mapped_device *md = io->md; + struct bio *bio = io->orig_bio; + +- io->start_time = bio_start_io_acct(bio); ++ bio_start_io_acct_time(bio, io->start_time); + if (unlikely(dm_stats_used(&md->stats))) + dm_stats_account_io(&md->stats, bio_data_dir(bio), + bio->bi_iter.bi_sector, bio_sectors(bio), +@@ -535,7 +535,7 @@ static struct dm_io *alloc_io(struct map + io->md = md; + spin_lock_init(&io->endio_lock); + +- start_io_acct(io); ++ io->start_time = jiffies; + + return io; + } +@@ -1550,6 +1550,7 @@ static void __split_and_process_bio(stru + submit_bio_noacct(bio); + } + } ++ start_io_acct(ci.io); + + /* drop the extra reference count */ + dm_io_dec_pending(ci.io, errno_to_blk_status(error)); diff --git a/queue-5.16/dm-revert-partial-fix-for-redundant-bio-based-io-accounting.patch b/queue-5.16/dm-revert-partial-fix-for-redundant-bio-based-io-accounting.patch new file mode 100644 index 00000000000..29c023039a3 --- /dev/null +++ b/queue-5.16/dm-revert-partial-fix-for-redundant-bio-based-io-accounting.patch @@ -0,0 +1,53 @@ +From f524d9c95fab54783d0038f7a3e8c014d5b56857 Mon Sep 17 00:00:00 2001 +From: Mike Snitzer +Date: Fri, 28 Jan 2022 10:58:40 -0500 +Subject: dm: revert partial fix for redundant bio-based IO accounting + +From: Mike Snitzer + +commit f524d9c95fab54783d0038f7a3e8c014d5b56857 upstream. + +Reverts a1e1cb72d9649 ("dm: fix redundant IO accounting for bios that +need splitting") because it was too narrow in scope (only addressed +redundant 'sectors[]' accounting and not ios, nsecs[], etc). + +Cc: stable@vger.kernel.org +Signed-off-by: Mike Snitzer +Link: https://lore.kernel.org/r/20220128155841.39644-3-snitzer@redhat.com +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + drivers/md/dm.c | 15 --------------- + 1 file changed, 15 deletions(-) + +--- a/drivers/md/dm.c ++++ b/drivers/md/dm.c +@@ -1510,9 +1510,6 @@ static void init_clone_info(struct clone + ci->sector = bio->bi_iter.bi_sector; + } + +-#define __dm_part_stat_sub(part, field, subnd) \ +- (part_stat_get(part, field) -= (subnd)) +- + /* + * Entry point to split a bio into clones and submit them to the targets. + */ +@@ -1548,18 +1545,6 @@ static void __split_and_process_bio(stru + GFP_NOIO, &md->queue->bio_split); + ci.io->orig_bio = b; + +- /* +- * Adjust IO stats for each split, otherwise upon queue +- * reentry there will be redundant IO accounting. +- * NOTE: this is a stop-gap fix, a proper fix involves +- * significant refactoring of DM core's bio splitting +- * (by eliminating DM's splitting and just using bio_split) +- */ +- part_stat_lock(); +- __dm_part_stat_sub(dm_disk(md)->part0, +- sectors[op_stat_group(bio_op(bio))], ci.sector_count); +- part_stat_unlock(); +- + bio_chain(b, bio); + trace_block_split(b, bio->bi_iter.bi_sector); + submit_bio_noacct(bio); diff --git a/queue-5.16/kvm-ppc-book3s-hv-nested-fix-nested-hfscr-being-clobbered-with-multiple-vcpus.patch b/queue-5.16/kvm-ppc-book3s-hv-nested-fix-nested-hfscr-being-clobbered-with-multiple-vcpus.patch new file mode 100644 index 00000000000..91599b7145c --- /dev/null +++ b/queue-5.16/kvm-ppc-book3s-hv-nested-fix-nested-hfscr-being-clobbered-with-multiple-vcpus.patch @@ -0,0 +1,80 @@ +From 22f7ff0dea9491e90b6fe808ed40c30bd791e5c2 Mon Sep 17 00:00:00 2001 +From: Nicholas Piggin +Date: Sat, 22 Jan 2022 20:55:30 +1000 +Subject: KVM: PPC: Book3S HV Nested: Fix nested HFSCR being clobbered with multiple vCPUs + +From: Nicholas Piggin + +commit 22f7ff0dea9491e90b6fe808ed40c30bd791e5c2 upstream. + +The L0 is storing HFSCR requested by the L1 for the L2 in struct +kvm_nested_guest when the L1 requests a vCPU enter L2. kvm_nested_guest +is not a per-vCPU structure. Hilarity ensues. + +Fix it by moving the nested hfscr into the vCPU structure together with +the other per-vCPU nested fields. + +Fixes: 8b210a880b35 ("KVM: PPC: Book3S HV Nested: Make nested HFSCR state accessible") +Cc: stable@vger.kernel.org # v5.15+ +Signed-off-by: Nicholas Piggin +Reviewed-by: Fabiano Rosas +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/20220122105530.3477250-1-npiggin@gmail.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/powerpc/include/asm/kvm_book3s_64.h | 1 - + arch/powerpc/include/asm/kvm_host.h | 1 + + arch/powerpc/kvm/book3s_hv.c | 3 +-- + arch/powerpc/kvm/book3s_hv_nested.c | 2 +- + 4 files changed, 3 insertions(+), 4 deletions(-) + +--- a/arch/powerpc/include/asm/kvm_book3s_64.h ++++ b/arch/powerpc/include/asm/kvm_book3s_64.h +@@ -39,7 +39,6 @@ struct kvm_nested_guest { + pgd_t *shadow_pgtable; /* our page table for this guest */ + u64 l1_gr_to_hr; /* L1's addr of part'n-scoped table */ + u64 process_table; /* process table entry for this guest */ +- u64 hfscr; /* HFSCR that the L1 requested for this nested guest */ + long refcnt; /* number of pointers to this struct */ + struct mutex tlb_lock; /* serialize page faults and tlbies */ + struct kvm_nested_guest *next; +--- a/arch/powerpc/include/asm/kvm_host.h ++++ b/arch/powerpc/include/asm/kvm_host.h +@@ -814,6 +814,7 @@ struct kvm_vcpu_arch { + + /* For support of nested guests */ + struct kvm_nested_guest *nested; ++ u64 nested_hfscr; /* HFSCR that the L1 requested for the nested guest */ + u32 nested_vcpu_id; + gpa_t nested_io_gpr; + #endif +--- a/arch/powerpc/kvm/book3s_hv.c ++++ b/arch/powerpc/kvm/book3s_hv.c +@@ -1731,7 +1731,6 @@ static int kvmppc_handle_exit_hv(struct + + static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu) + { +- struct kvm_nested_guest *nested = vcpu->arch.nested; + int r; + int srcu_idx; + +@@ -1831,7 +1830,7 @@ static int kvmppc_handle_nested_exit(str + * it into a HEAI. + */ + if (!(vcpu->arch.hfscr_permitted & (1UL << cause)) || +- (nested->hfscr & (1UL << cause))) { ++ (vcpu->arch.nested_hfscr & (1UL << cause))) { + vcpu->arch.trap = BOOK3S_INTERRUPT_H_EMUL_ASSIST; + + /* +--- a/arch/powerpc/kvm/book3s_hv_nested.c ++++ b/arch/powerpc/kvm/book3s_hv_nested.c +@@ -362,7 +362,7 @@ long kvmhv_enter_nested_guest(struct kvm + /* set L1 state to L2 state */ + vcpu->arch.nested = l2; + vcpu->arch.nested_vcpu_id = l2_hv.vcpu_token; +- l2->hfscr = l2_hv.hfscr; ++ vcpu->arch.nested_hfscr = l2_hv.hfscr; + vcpu->arch.regs = l2_regs; + + /* Guest must always run with ME enabled, HV disabled. */ diff --git a/queue-5.16/kvm-x86-check-.flags-in-kvm_cpuid_check_equal-too.patch b/queue-5.16/kvm-x86-check-.flags-in-kvm_cpuid_check_equal-too.patch new file mode 100644 index 00000000000..41176400fd9 --- /dev/null +++ b/queue-5.16/kvm-x86-check-.flags-in-kvm_cpuid_check_equal-too.patch @@ -0,0 +1,33 @@ +From 033a3ea59a19df63edb4db6bfdbb357cd028258a Mon Sep 17 00:00:00 2001 +From: Vitaly Kuznetsov +Date: Wed, 26 Jan 2022 14:18:04 +0100 +Subject: KVM: x86: Check .flags in kvm_cpuid_check_equal() too + +From: Vitaly Kuznetsov + +commit 033a3ea59a19df63edb4db6bfdbb357cd028258a upstream. + +kvm_cpuid_check_equal() checks for the (full) equality of the supplied +CPUID data so .flags need to be checked too. + +Reported-by: Sean Christopherson +Fixes: c6617c61e8fe ("KVM: x86: Partially allow KVM_SET_CPUID{,2} after KVM_RUN") +Signed-off-by: Vitaly Kuznetsov +Message-Id: <20220126131804.2839410-1-vkuznets@redhat.com> +Cc: stable@vger.kernel.org +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/cpuid.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/x86/kvm/cpuid.c ++++ b/arch/x86/kvm/cpuid.c +@@ -113,6 +113,7 @@ static int kvm_cpuid_check_equal(struct + orig = &vcpu->arch.cpuid_entries[i]; + if (e2[i].function != orig->function || + e2[i].index != orig->index || ++ e2[i].flags != orig->flags || + e2[i].eax != orig->eax || e2[i].ebx != orig->ebx || + e2[i].ecx != orig->ecx || e2[i].edx != orig->edx) + return -EINVAL; diff --git a/queue-5.16/kvm-x86-forcibly-leave-nested-virt-when-smm-state-is-toggled.patch b/queue-5.16/kvm-x86-forcibly-leave-nested-virt-when-smm-state-is-toggled.patch new file mode 100644 index 00000000000..c92b50ba08a --- /dev/null +++ b/queue-5.16/kvm-x86-forcibly-leave-nested-virt-when-smm-state-is-toggled.patch @@ -0,0 +1,173 @@ +From f7e570780efc5cec9b2ed1e0472a7da14e864fdb Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Tue, 25 Jan 2022 22:03:58 +0000 +Subject: KVM: x86: Forcibly leave nested virt when SMM state is toggled + +From: Sean Christopherson + +commit f7e570780efc5cec9b2ed1e0472a7da14e864fdb upstream. + +Forcibly leave nested virtualization operation if userspace toggles SMM +state via KVM_SET_VCPU_EVENTS or KVM_SYNC_X86_EVENTS. If userspace +forces the vCPU out of SMM while it's post-VMXON and then injects an SMI, +vmx_enter_smm() will overwrite vmx->nested.smm.vmxon and end up with both +vmxon=false and smm.vmxon=false, but all other nVMX state allocated. + +Don't attempt to gracefully handle the transition as (a) most transitions +are nonsencial, e.g. forcing SMM while L2 is running, (b) there isn't +sufficient information to handle all transitions, e.g. SVM wants access +to the SMRAM save state, and (c) KVM_SET_VCPU_EVENTS must precede +KVM_SET_NESTED_STATE during state restore as the latter disallows putting +the vCPU into L2 if SMM is active, and disallows tagging the vCPU as +being post-VMXON in SMM if SMM is not active. + +Abuse of KVM_SET_VCPU_EVENTS manifests as a WARN and memory leak in nVMX +due to failure to free vmcs01's shadow VMCS, but the bug goes far beyond +just a memory leak, e.g. toggling SMM on while L2 is active puts the vCPU +in an architecturally impossible state. + + WARNING: CPU: 0 PID: 3606 at free_loaded_vmcs arch/x86/kvm/vmx/vmx.c:2665 [inline] + WARNING: CPU: 0 PID: 3606 at free_loaded_vmcs+0x158/0x1a0 arch/x86/kvm/vmx/vmx.c:2656 + Modules linked in: + CPU: 1 PID: 3606 Comm: syz-executor725 Not tainted 5.17.0-rc1-syzkaller #0 + Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 + RIP: 0010:free_loaded_vmcs arch/x86/kvm/vmx/vmx.c:2665 [inline] + RIP: 0010:free_loaded_vmcs+0x158/0x1a0 arch/x86/kvm/vmx/vmx.c:2656 + Code: <0f> 0b eb b3 e8 8f 4d 9f 00 e9 f7 fe ff ff 48 89 df e8 92 4d 9f 00 + Call Trace: + + kvm_arch_vcpu_destroy+0x72/0x2f0 arch/x86/kvm/x86.c:11123 + kvm_vcpu_destroy arch/x86/kvm/../../../virt/kvm/kvm_main.c:441 [inline] + kvm_destroy_vcpus+0x11f/0x290 arch/x86/kvm/../../../virt/kvm/kvm_main.c:460 + kvm_free_vcpus arch/x86/kvm/x86.c:11564 [inline] + kvm_arch_destroy_vm+0x2e8/0x470 arch/x86/kvm/x86.c:11676 + kvm_destroy_vm arch/x86/kvm/../../../virt/kvm/kvm_main.c:1217 [inline] + kvm_put_kvm+0x4fa/0xb00 arch/x86/kvm/../../../virt/kvm/kvm_main.c:1250 + kvm_vm_release+0x3f/0x50 arch/x86/kvm/../../../virt/kvm/kvm_main.c:1273 + __fput+0x286/0x9f0 fs/file_table.c:311 + task_work_run+0xdd/0x1a0 kernel/task_work.c:164 + exit_task_work include/linux/task_work.h:32 [inline] + do_exit+0xb29/0x2a30 kernel/exit.c:806 + do_group_exit+0xd2/0x2f0 kernel/exit.c:935 + get_signal+0x4b0/0x28c0 kernel/signal.c:2862 + arch_do_signal_or_restart+0x2a9/0x1c40 arch/x86/kernel/signal.c:868 + handle_signal_work kernel/entry/common.c:148 [inline] + exit_to_user_mode_loop kernel/entry/common.c:172 [inline] + exit_to_user_mode_prepare+0x17d/0x290 kernel/entry/common.c:207 + __syscall_exit_to_user_mode_work kernel/entry/common.c:289 [inline] + syscall_exit_to_user_mode+0x19/0x60 kernel/entry/common.c:300 + do_syscall_64+0x42/0xb0 arch/x86/entry/common.c:86 + entry_SYSCALL_64_after_hwframe+0x44/0xae + + +Cc: stable@vger.kernel.org +Reported-by: syzbot+8112db3ab20e70d50c31@syzkaller.appspotmail.com +Signed-off-by: Sean Christopherson +Message-Id: <20220125220358.2091737-1-seanjc@google.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/kvm_host.h | 1 + + arch/x86/kvm/svm/nested.c | 9 +++++---- + arch/x86/kvm/svm/svm.c | 2 +- + arch/x86/kvm/svm/svm.h | 2 +- + arch/x86/kvm/vmx/nested.c | 1 + + arch/x86/kvm/x86.c | 4 +++- + 6 files changed, 12 insertions(+), 7 deletions(-) + +--- a/arch/x86/include/asm/kvm_host.h ++++ b/arch/x86/include/asm/kvm_host.h +@@ -1497,6 +1497,7 @@ struct kvm_x86_ops { + }; + + struct kvm_x86_nested_ops { ++ void (*leave_nested)(struct kvm_vcpu *vcpu); + int (*check_events)(struct kvm_vcpu *vcpu); + bool (*hv_timer_pending)(struct kvm_vcpu *vcpu); + void (*triple_fault)(struct kvm_vcpu *vcpu); +--- a/arch/x86/kvm/svm/nested.c ++++ b/arch/x86/kvm/svm/nested.c +@@ -964,9 +964,9 @@ void svm_free_nested(struct vcpu_svm *sv + /* + * Forcibly leave nested mode in order to be able to reset the VCPU later on. + */ +-void svm_leave_nested(struct vcpu_svm *svm) ++void svm_leave_nested(struct kvm_vcpu *vcpu) + { +- struct kvm_vcpu *vcpu = &svm->vcpu; ++ struct vcpu_svm *svm = to_svm(vcpu); + + if (is_guest_mode(vcpu)) { + svm->nested.nested_run_pending = 0; +@@ -1345,7 +1345,7 @@ static int svm_set_nested_state(struct k + return -EINVAL; + + if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE)) { +- svm_leave_nested(svm); ++ svm_leave_nested(vcpu); + svm_set_gif(svm, !!(kvm_state->flags & KVM_STATE_NESTED_GIF_SET)); + return 0; + } +@@ -1410,7 +1410,7 @@ static int svm_set_nested_state(struct k + */ + + if (is_guest_mode(vcpu)) +- svm_leave_nested(svm); ++ svm_leave_nested(vcpu); + else + svm->nested.vmcb02.ptr->save = svm->vmcb01.ptr->save; + +@@ -1464,6 +1464,7 @@ static bool svm_get_nested_state_pages(s + } + + struct kvm_x86_nested_ops svm_nested_ops = { ++ .leave_nested = svm_leave_nested, + .check_events = svm_check_nested_events, + .triple_fault = nested_svm_triple_fault, + .get_nested_state_pages = svm_get_nested_state_pages, +--- a/arch/x86/kvm/svm/svm.c ++++ b/arch/x86/kvm/svm/svm.c +@@ -290,7 +290,7 @@ int svm_set_efer(struct kvm_vcpu *vcpu, + + if ((old_efer & EFER_SVME) != (efer & EFER_SVME)) { + if (!(efer & EFER_SVME)) { +- svm_leave_nested(svm); ++ svm_leave_nested(vcpu); + svm_set_gif(svm, true); + /* #GP intercept is still needed for vmware backdoor */ + if (!enable_vmware_backdoor) +--- a/arch/x86/kvm/svm/svm.h ++++ b/arch/x86/kvm/svm/svm.h +@@ -470,7 +470,7 @@ static inline bool nested_exit_on_nmi(st + + int enter_svm_guest_mode(struct kvm_vcpu *vcpu, + u64 vmcb_gpa, struct vmcb *vmcb12, bool from_vmrun); +-void svm_leave_nested(struct vcpu_svm *svm); ++void svm_leave_nested(struct kvm_vcpu *vcpu); + void svm_free_nested(struct vcpu_svm *svm); + int svm_allocate_nested(struct vcpu_svm *svm); + int nested_svm_vmrun(struct kvm_vcpu *vcpu); +--- a/arch/x86/kvm/vmx/nested.c ++++ b/arch/x86/kvm/vmx/nested.c +@@ -6744,6 +6744,7 @@ __init int nested_vmx_hardware_setup(int + } + + struct kvm_x86_nested_ops vmx_nested_ops = { ++ .leave_nested = vmx_leave_nested, + .check_events = vmx_check_nested_events, + .hv_timer_pending = nested_vmx_preemption_timer_pending, + .triple_fault = nested_vmx_triple_fault, +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -4784,8 +4784,10 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_e + vcpu->arch.apic->sipi_vector = events->sipi_vector; + + if (events->flags & KVM_VCPUEVENT_VALID_SMM) { +- if (!!(vcpu->arch.hflags & HF_SMM_MASK) != events->smi.smm) ++ if (!!(vcpu->arch.hflags & HF_SMM_MASK) != events->smi.smm) { ++ kvm_x86_ops.nested_ops->leave_nested(vcpu); + kvm_smm_changed(vcpu, events->smi.smm); ++ } + + vcpu->arch.smi_pending = events->smi.pending; + diff --git a/queue-5.16/kvm-x86-free-kvm_cpuid_entry2-array-on-post-kvm_run-kvm_set_cpuid-2.patch b/queue-5.16/kvm-x86-free-kvm_cpuid_entry2-array-on-post-kvm_run-kvm_set_cpuid-2.patch new file mode 100644 index 00000000000..83c9d78812c --- /dev/null +++ b/queue-5.16/kvm-x86-free-kvm_cpuid_entry2-array-on-post-kvm_run-kvm_set_cpuid-2.patch @@ -0,0 +1,66 @@ +From 811f95ff95270e6048197821434d9301e3d7f07c Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Tue, 25 Jan 2022 21:04:45 +0000 +Subject: KVM: x86: Free kvm_cpuid_entry2 array on post-KVM_RUN KVM_SET_CPUID{,2} + +From: Sean Christopherson + +commit 811f95ff95270e6048197821434d9301e3d7f07c upstream. + +Free the "struct kvm_cpuid_entry2" array on successful post-KVM_RUN +KVM_SET_CPUID{,2} to fix a memory leak, the callers of kvm_set_cpuid() +free the array only on failure. + + BUG: memory leak + unreferenced object 0xffff88810963a800 (size 2048): + comm "syz-executor025", pid 3610, jiffies 4294944928 (age 8.080s) + hex dump (first 32 bytes): + 00 00 00 00 00 00 00 00 00 00 00 00 0d 00 00 00 ................ + 47 65 6e 75 6e 74 65 6c 69 6e 65 49 00 00 00 00 GenuntelineI.... + backtrace: + [] kmalloc_node include/linux/slab.h:604 [inline] + [] kvmalloc_node+0x3e/0x100 mm/util.c:580 + [] kvmalloc include/linux/slab.h:732 [inline] + [] vmemdup_user+0x22/0x100 mm/util.c:199 + [] kvm_vcpu_ioctl_set_cpuid2+0x8f/0xf0 arch/x86/kvm/cpuid.c:423 + [] kvm_arch_vcpu_ioctl+0xb99/0x1e60 arch/x86/kvm/x86.c:5251 + [] kvm_vcpu_ioctl+0x4ad/0x950 arch/x86/kvm/../../../virt/kvm/kvm_main.c:4066 + [] vfs_ioctl fs/ioctl.c:51 [inline] + [] __do_sys_ioctl fs/ioctl.c:874 [inline] + [] __se_sys_ioctl fs/ioctl.c:860 [inline] + [] __x64_sys_ioctl+0xfc/0x140 fs/ioctl.c:860 + [] do_syscall_x64 arch/x86/entry/common.c:50 [inline] + [] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 + [] entry_SYSCALL_64_after_hwframe+0x44/0xae + +Fixes: c6617c61e8fe ("KVM: x86: Partially allow KVM_SET_CPUID{,2} after KVM_RUN") +Cc: stable@vger.kernel.org +Reported-by: syzbot+be576ad7655690586eec@syzkaller.appspotmail.com +Signed-off-by: Sean Christopherson +Message-Id: <20220125210445.2053429-1-seanjc@google.com> +Reviewed-by: Vitaly Kuznetsov +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/cpuid.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +--- a/arch/x86/kvm/cpuid.c ++++ b/arch/x86/kvm/cpuid.c +@@ -338,8 +338,14 @@ static int kvm_set_cpuid(struct kvm_vcpu + * KVM_SET_CPUID{,2} again. To support this legacy behavior, check + * whether the supplied CPUID data is equal to what's already set. + */ +- if (vcpu->arch.last_vmentry_cpu != -1) +- return kvm_cpuid_check_equal(vcpu, e2, nent); ++ if (vcpu->arch.last_vmentry_cpu != -1) { ++ r = kvm_cpuid_check_equal(vcpu, e2, nent); ++ if (r) ++ return r; ++ ++ kvfree(e2); ++ return 0; ++ } + + r = kvm_check_cpuid(e2, nent); + if (r) diff --git a/queue-5.16/kvm-x86-keep-msr_ia32_xss-unchanged-for-init.patch b/queue-5.16/kvm-x86-keep-msr_ia32_xss-unchanged-for-init.patch new file mode 100644 index 00000000000..9c8f1cef432 --- /dev/null +++ b/queue-5.16/kvm-x86-keep-msr_ia32_xss-unchanged-for-init.patch @@ -0,0 +1,42 @@ +From be4f3b3f82271c3193ce200a996dc70682c8e622 Mon Sep 17 00:00:00 2001 +From: Xiaoyao Li +Date: Wed, 26 Jan 2022 17:22:24 +0000 +Subject: KVM: x86: Keep MSR_IA32_XSS unchanged for INIT + +From: Xiaoyao Li + +commit be4f3b3f82271c3193ce200a996dc70682c8e622 upstream. + +It has been corrected from SDM version 075 that MSR_IA32_XSS is reset to +zero on Power up and Reset but keeps unchanged on INIT. + +Fixes: a554d207dc46 ("KVM: X86: Processor States following Reset or INIT") +Cc: stable@vger.kernel.org +Signed-off-by: Xiaoyao Li +Signed-off-by: Sean Christopherson +Message-Id: <20220126172226.2298529-2-seanjc@google.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/x86.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -11065,6 +11065,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcp + vcpu->arch.msr_misc_features_enables = 0; + + vcpu->arch.xcr0 = XFEATURE_MASK_FP; ++ vcpu->arch.ia32_xss = 0; + } + + /* All GPRs except RDX (handled below) are zeroed on RESET/INIT. */ +@@ -11081,8 +11082,6 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcp + cpuid_0x1 = kvm_find_cpuid_entry(vcpu, 1, 0); + kvm_rdx_write(vcpu, cpuid_0x1 ? cpuid_0x1->eax : 0x600); + +- vcpu->arch.ia32_xss = 0; +- + static_call(kvm_x86_vcpu_reset)(vcpu, init_event); + + kvm_set_rflags(vcpu, X86_EFLAGS_FIXED); diff --git a/queue-5.16/kvm-x86-move-cpuid.-eax-0x12-ecx-1-mangling-to-__kvm_update_cpuid_runtime.patch b/queue-5.16/kvm-x86-move-cpuid.-eax-0x12-ecx-1-mangling-to-__kvm_update_cpuid_runtime.patch new file mode 100644 index 00000000000..5bcf888391b --- /dev/null +++ b/queue-5.16/kvm-x86-move-cpuid.-eax-0x12-ecx-1-mangling-to-__kvm_update_cpuid_runtime.patch @@ -0,0 +1,107 @@ +From 5c89be1dd5cfb697614bc13626ba3bd0781aa160 Mon Sep 17 00:00:00 2001 +From: Vitaly Kuznetsov +Date: Mon, 24 Jan 2022 11:36:05 +0100 +Subject: KVM: x86: Move CPUID.(EAX=0x12,ECX=1) mangling to __kvm_update_cpuid_runtime() + +From: Vitaly Kuznetsov + +commit 5c89be1dd5cfb697614bc13626ba3bd0781aa160 upstream. + +Full equality check of CPUID data on update (kvm_cpuid_check_equal()) may +fail for SGX enabled CPUs as CPUID.(EAX=0x12,ECX=1) is currently being +mangled in kvm_vcpu_after_set_cpuid(). Move it to +__kvm_update_cpuid_runtime() and split off cpuid_get_supported_xcr0() +helper as 'vcpu->arch.guest_supported_xcr0' update needs (logically) +to stay in kvm_vcpu_after_set_cpuid(). + +Cc: stable@vger.kernel.org +Fixes: feb627e8d6f6 ("KVM: x86: Forbid KVM_SET_CPUID{,2} after KVM_RUN") +Signed-off-by: Vitaly Kuznetsov +Message-Id: <20220124103606.2630588-2-vkuznets@redhat.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/cpuid.c | 54 +++++++++++++++++++++++++++++++-------------------- + 1 file changed, 33 insertions(+), 21 deletions(-) + +--- a/arch/x86/kvm/cpuid.c ++++ b/arch/x86/kvm/cpuid.c +@@ -176,10 +176,26 @@ void kvm_update_pv_runtime(struct kvm_vc + vcpu->arch.pv_cpuid.features = best->eax; + } + ++/* ++ * Calculate guest's supported XCR0 taking into account guest CPUID data and ++ * supported_xcr0 (comprised of host configuration and KVM_SUPPORTED_XCR0). ++ */ ++static u64 cpuid_get_supported_xcr0(struct kvm_cpuid_entry2 *entries, int nent) ++{ ++ struct kvm_cpuid_entry2 *best; ++ ++ best = cpuid_entry2_find(entries, nent, 0xd, 0); ++ if (!best) ++ return 0; ++ ++ return (best->eax | ((u64)best->edx << 32)) & supported_xcr0; ++} ++ + static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *entries, + int nent) + { + struct kvm_cpuid_entry2 *best; ++ u64 guest_supported_xcr0 = cpuid_get_supported_xcr0(entries, nent); + + best = cpuid_entry2_find(entries, nent, 1, 0); + if (best) { +@@ -218,6 +234,21 @@ static void __kvm_update_cpuid_runtime(s + vcpu->arch.ia32_misc_enable_msr & + MSR_IA32_MISC_ENABLE_MWAIT); + } ++ ++ /* ++ * Bits 127:0 of the allowed SECS.ATTRIBUTES (CPUID.0x12.0x1) enumerate ++ * the supported XSAVE Feature Request Mask (XFRM), i.e. the enclave's ++ * requested XCR0 value. The enclave's XFRM must be a subset of XCRO ++ * at the time of EENTER, thus adjust the allowed XFRM by the guest's ++ * supported XCR0. Similar to XCR0 handling, FP and SSE are forced to ++ * '1' even on CPUs that don't support XSAVE. ++ */ ++ best = cpuid_entry2_find(entries, nent, 0x12, 0x1); ++ if (best) { ++ best->ecx &= guest_supported_xcr0 & 0xffffffff; ++ best->edx &= guest_supported_xcr0 >> 32; ++ best->ecx |= XFEATURE_MASK_FPSSE; ++ } + } + + void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu) +@@ -241,27 +272,8 @@ static void kvm_vcpu_after_set_cpuid(str + kvm_apic_set_version(vcpu); + } + +- best = kvm_find_cpuid_entry(vcpu, 0xD, 0); +- if (!best) +- vcpu->arch.guest_supported_xcr0 = 0; +- else +- vcpu->arch.guest_supported_xcr0 = +- (best->eax | ((u64)best->edx << 32)) & supported_xcr0; +- +- /* +- * Bits 127:0 of the allowed SECS.ATTRIBUTES (CPUID.0x12.0x1) enumerate +- * the supported XSAVE Feature Request Mask (XFRM), i.e. the enclave's +- * requested XCR0 value. The enclave's XFRM must be a subset of XCRO +- * at the time of EENTER, thus adjust the allowed XFRM by the guest's +- * supported XCR0. Similar to XCR0 handling, FP and SSE are forced to +- * '1' even on CPUs that don't support XSAVE. +- */ +- best = kvm_find_cpuid_entry(vcpu, 0x12, 0x1); +- if (best) { +- best->ecx &= vcpu->arch.guest_supported_xcr0 & 0xffffffff; +- best->edx &= vcpu->arch.guest_supported_xcr0 >> 32; +- best->ecx |= XFEATURE_MASK_FPSSE; +- } ++ vcpu->arch.guest_supported_xcr0 = ++ cpuid_get_supported_xcr0(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent); + + kvm_update_pv_runtime(vcpu); + diff --git a/queue-5.16/kvm-x86-sync-the-states-size-with-the-xcr0-ia32_xss-at-any-time.patch b/queue-5.16/kvm-x86-sync-the-states-size-with-the-xcr0-ia32_xss-at-any-time.patch new file mode 100644 index 00000000000..78f260c8e0d --- /dev/null +++ b/queue-5.16/kvm-x86-sync-the-states-size-with-the-xcr0-ia32_xss-at-any-time.patch @@ -0,0 +1,42 @@ +From 05a9e065059e566f218f8778c4d17ee75db56c55 Mon Sep 17 00:00:00 2001 +From: Like Xu +Date: Wed, 26 Jan 2022 17:22:26 +0000 +Subject: KVM: x86: Sync the states size with the XCR0/IA32_XSS at, any time + +From: Like Xu + +commit 05a9e065059e566f218f8778c4d17ee75db56c55 upstream. + +XCR0 is reset to 1 by RESET but not INIT and IA32_XSS is zeroed by +both RESET and INIT. The kvm_set_msr_common()'s handling of MSR_IA32_XSS +also needs to update kvm_update_cpuid_runtime(). In the above cases, the +size in bytes of the XSAVE area containing all states enabled by XCR0 or +(XCRO | IA32_XSS) needs to be updated. + +For simplicity and consistency, existing helpers are used to write values +and call kvm_update_cpuid_runtime(), and it's not exactly a fast path. + +Fixes: a554d207dc46 ("KVM: X86: Processor States following Reset or INIT") +Cc: stable@vger.kernel.org +Signed-off-by: Like Xu +Signed-off-by: Sean Christopherson +Message-Id: <20220126172226.2298529-4-seanjc@google.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/x86.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -11065,8 +11065,8 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcp + + vcpu->arch.msr_misc_features_enables = 0; + +- vcpu->arch.xcr0 = XFEATURE_MASK_FP; +- vcpu->arch.ia32_xss = 0; ++ __kvm_set_xcr(vcpu, 0, XFEATURE_MASK_FP); ++ __kvm_set_msr(vcpu, MSR_IA32_XSS, 0, true); + } + + /* All GPRs except RDX (handled below) are zeroed on RESET/INIT. */ diff --git a/queue-5.16/kvm-x86-update-vcpu-s-runtime-cpuid-on-write-to-msr_ia32_xss.patch b/queue-5.16/kvm-x86-update-vcpu-s-runtime-cpuid-on-write-to-msr_ia32_xss.patch new file mode 100644 index 00000000000..d45a678dcc9 --- /dev/null +++ b/queue-5.16/kvm-x86-update-vcpu-s-runtime-cpuid-on-write-to-msr_ia32_xss.patch @@ -0,0 +1,34 @@ +From 4c282e51e4450b94680d6ca3b10f830483b1f243 Mon Sep 17 00:00:00 2001 +From: Like Xu +Date: Wed, 26 Jan 2022 17:22:25 +0000 +Subject: KVM: x86: Update vCPU's runtime CPUID on write to MSR_IA32_XSS + +From: Like Xu + +commit 4c282e51e4450b94680d6ca3b10f830483b1f243 upstream. + +Do a runtime CPUID update for a vCPU if MSR_IA32_XSS is written, as the +size in bytes of the XSAVE area is affected by the states enabled in XSS. + +Fixes: 203000993de5 ("kvm: vmx: add MSR logic for XSAVES") +Cc: stable@vger.kernel.org +Signed-off-by: Like Xu +[sean: split out as a separate patch, adjust Fixes tag] +Signed-off-by: Sean Christopherson +Message-Id: <20220126172226.2298529-3-seanjc@google.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/x86.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -3508,6 +3508,7 @@ int kvm_set_msr_common(struct kvm_vcpu * + if (data & ~supported_xss) + return 1; + vcpu->arch.ia32_xss = data; ++ kvm_update_cpuid_runtime(vcpu); + break; + case MSR_SMI_COUNT: + if (!msr_info->host_initiated) diff --git a/queue-5.16/security-lsm-dentry_init_security-handle-multi-lsm-registration.patch b/queue-5.16/security-lsm-dentry_init_security-handle-multi-lsm-registration.patch new file mode 100644 index 00000000000..392c5196a88 --- /dev/null +++ b/queue-5.16/security-lsm-dentry_init_security-handle-multi-lsm-registration.patch @@ -0,0 +1,158 @@ +From 7f5056b9e7b71149bf11073f00a57fa1ac2921a9 Mon Sep 17 00:00:00 2001 +From: Vivek Goyal +Date: Wed, 26 Jan 2022 15:35:14 -0500 +Subject: security, lsm: dentry_init_security() Handle multi LSM registration + +From: Vivek Goyal + +commit 7f5056b9e7b71149bf11073f00a57fa1ac2921a9 upstream. + +A ceph user has reported that ceph is crashing with kernel NULL pointer +dereference. Following is the backtrace. + +/proc/version: Linux version 5.16.2-arch1-1 (linux@archlinux) (gcc (GCC) +11.1.0, GNU ld (GNU Binutils) 2.36.1) #1 SMP PREEMPT Thu, 20 Jan 2022 +16:18:29 +0000 +distro / arch: Arch Linux / x86_64 +SELinux is not enabled +ceph cluster version: 16.2.7 (dd0603118f56ab514f133c8d2e3adfc983942503) + +relevant dmesg output: +[ 30.947129] BUG: kernel NULL pointer dereference, address: +0000000000000000 +[ 30.947206] #PF: supervisor read access in kernel mode +[ 30.947258] #PF: error_code(0x0000) - not-present page +[ 30.947310] PGD 0 P4D 0 +[ 30.947342] Oops: 0000 [#1] PREEMPT SMP PTI +[ 30.947388] CPU: 5 PID: 778 Comm: touch Not tainted 5.16.2-arch1-1 #1 +86fbf2c313cc37a553d65deb81d98e9dcc2a3659 +[ 30.947486] Hardware name: Gigabyte Technology Co., Ltd. B365M +DS3H/B365M DS3H, BIOS F5 08/13/2019 +[ 30.947569] RIP: 0010:strlen+0x0/0x20 +[ 30.947616] Code: b6 07 38 d0 74 16 48 83 c7 01 84 c0 74 05 48 39 f7 75 +ec 31 c0 31 d2 89 d6 89 d7 c3 48 89 f8 31 d2 89 d6 89 d7 c3 0 +f 1f 40 00 <80> 3f 00 74 12 48 89 f8 48 83 c0 01 80 38 00 75 f7 48 29 f8 31 +ff +[ 30.947782] RSP: 0018:ffffa4ed80ffbbb8 EFLAGS: 00010246 +[ 30.947836] RAX: 0000000000000000 RBX: ffffa4ed80ffbc60 RCX: +0000000000000000 +[ 30.947904] RDX: 0000000000000000 RSI: 0000000000000000 RDI: +0000000000000000 +[ 30.947971] RBP: ffff94b0d15c0ae0 R08: 0000000000000000 R09: +0000000000000000 +[ 30.948040] R10: 0000000000000000 R11: 0000000000000000 R12: +0000000000000000 +[ 30.948106] R13: 0000000000000001 R14: ffffa4ed80ffbc60 R15: +0000000000000000 +[ 30.948174] FS: 00007fc7520f0740(0000) GS:ffff94b7ced40000(0000) +knlGS:0000000000000000 +[ 30.948252] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 30.948308] CR2: 0000000000000000 CR3: 0000000104a40001 CR4: +00000000003706e0 +[ 30.948376] Call Trace: +[ 30.948404] +[ 30.948431] ceph_security_init_secctx+0x7b/0x240 [ceph +49f9c4b9bf5be8760f19f1747e26da33920bce4b] +[ 30.948582] ceph_atomic_open+0x51e/0x8a0 [ceph +49f9c4b9bf5be8760f19f1747e26da33920bce4b] +[ 30.948708] ? get_cached_acl+0x4d/0xa0 +[ 30.948759] path_openat+0x60d/0x1030 +[ 30.948809] do_filp_open+0xa5/0x150 +[ 30.948859] do_sys_openat2+0xc4/0x190 +[ 30.948904] __x64_sys_openat+0x53/0xa0 +[ 30.948948] do_syscall_64+0x5c/0x90 +[ 30.948989] ? exc_page_fault+0x72/0x180 +[ 30.949034] entry_SYSCALL_64_after_hwframe+0x44/0xae +[ 30.949091] RIP: 0033:0x7fc7521e25bb +[ 30.950849] Code: 25 00 00 41 00 3d 00 00 41 00 74 4b 64 8b 04 25 18 00 +00 00 85 c0 75 67 44 89 e2 48 89 ee bf 9c ff ff ff b8 01 01 0 +0 00 0f 05 <48> 3d 00 f0 ff ff 0f 87 91 00 00 00 48 8b 54 24 28 64 48 2b 14 +25 + +Core of the problem is that ceph checks for return code from +security_dentry_init_security() and if return code is 0, it assumes +everything is fine and continues to call strlen(name), which crashes. + +Typically SELinux LSM returns 0 and sets name to "security.selinux" and +it is not a problem. Or if selinux is not compiled in or disabled, it +returns -EOPNOTSUP and ceph deals with it. + +But somehow in this configuration, 0 is being returned and "name" is +not being initialized and that's creating the problem. + +Our suspicion is that BPF LSM is registering a hook for +dentry_init_security() and returns hook default of 0. + +LSM_HOOK(int, 0, dentry_init_security, struct dentry *dentry,...) + +I have not been able to reproduce it just by doing CONFIG_BPF_LSM=y. +Stephen has tested the patch though and confirms it solves the problem +for him. + +dentry_init_security() is written in such a way that it expects only one +LSM to register the hook. Atleast that's the expectation with current code. + +If another LSM returns a hook and returns default, it will simply return +0 as of now and that will break ceph. + +Hence, suggestion is that change semantics of this hook a bit. If there +are no LSMs or no LSM is taking ownership and initializing security context, +then return -EOPNOTSUP. Also allow at max one LSM to initialize security +context. This hook can't deal with multiple LSMs trying to init security +context. This patch implements this new behavior. + +Reported-by: Stephen Muth +Tested-by: Stephen Muth +Suggested-by: Casey Schaufler +Acked-by: Casey Schaufler +Reviewed-by: Serge Hallyn +Cc: Jeff Layton +Cc: Christian Brauner +Cc: Paul Moore +Cc: # 5.16.0 +Signed-off-by: Vivek Goyal +Reviewed-by: Jeff Layton +Acked-by: Paul Moore +Acked-by: Christian Brauner +Signed-off-by: James Morris +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/lsm_hook_defs.h | 2 +- + security/security.c | 15 +++++++++++++-- + 2 files changed, 14 insertions(+), 3 deletions(-) + +--- a/include/linux/lsm_hook_defs.h ++++ b/include/linux/lsm_hook_defs.h +@@ -82,7 +82,7 @@ LSM_HOOK(int, 0, sb_add_mnt_opt, const c + int len, void **mnt_opts) + LSM_HOOK(int, 0, move_mount, const struct path *from_path, + const struct path *to_path) +-LSM_HOOK(int, 0, dentry_init_security, struct dentry *dentry, ++LSM_HOOK(int, -EOPNOTSUPP, dentry_init_security, struct dentry *dentry, + int mode, const struct qstr *name, const char **xattr_name, + void **ctx, u32 *ctxlen) + LSM_HOOK(int, 0, dentry_create_files_as, struct dentry *dentry, int mode, +--- a/security/security.c ++++ b/security/security.c +@@ -1056,8 +1056,19 @@ int security_dentry_init_security(struct + const char **xattr_name, void **ctx, + u32 *ctxlen) + { +- return call_int_hook(dentry_init_security, -EOPNOTSUPP, dentry, mode, +- name, xattr_name, ctx, ctxlen); ++ struct security_hook_list *hp; ++ int rc; ++ ++ /* ++ * Only one module will provide a security context. ++ */ ++ hlist_for_each_entry(hp, &security_hook_heads.dentry_init_security, list) { ++ rc = hp->hook.dentry_init_security(dentry, mode, name, ++ xattr_name, ctx, ctxlen); ++ if (rc != LSM_RET_DEFAULT(dentry_init_security)) ++ return rc; ++ } ++ return LSM_RET_DEFAULT(dentry_init_security); + } + EXPORT_SYMBOL(security_dentry_init_security); + diff --git a/queue-5.16/series b/queue-5.16/series index 498ce57b7d0..9525a8c78f4 100644 --- a/queue-5.16/series +++ b/queue-5.16/series @@ -49,3 +49,16 @@ kvm-lapic-also-cancel-preemption-timer-during-set_lapic.patch kvm-svm-never-reject-emulation-due-to-smap-errata-for-sev-guests.patch kvm-svm-don-t-intercept-gp-for-sev-guests.patch kvm-x86-nsvm-skip-eax-alignment-check-for-non-svm-instructions.patch +kvm-x86-move-cpuid.-eax-0x12-ecx-1-mangling-to-__kvm_update_cpuid_runtime.patch +kvm-x86-free-kvm_cpuid_entry2-array-on-post-kvm_run-kvm_set_cpuid-2.patch +kvm-x86-forcibly-leave-nested-virt-when-smm-state-is-toggled.patch +kvm-x86-check-.flags-in-kvm_cpuid_check_equal-too.patch +kvm-x86-keep-msr_ia32_xss-unchanged-for-init.patch +kvm-x86-update-vcpu-s-runtime-cpuid-on-write-to-msr_ia32_xss.patch +kvm-x86-sync-the-states-size-with-the-xcr0-ia32_xss-at-any-time.patch +kvm-ppc-book3s-hv-nested-fix-nested-hfscr-being-clobbered-with-multiple-vcpus.patch +security-lsm-dentry_init_security-handle-multi-lsm-registration.patch +arm64-extable-fix-load_unaligned_zeropad-reg-indices.patch +dm-revert-partial-fix-for-redundant-bio-based-io-accounting.patch +block-add-bio_start_io_acct_time-to-control-start_time.patch +dm-properly-fix-redundant-bio-based-io-accounting.patch -- 2.47.2