5.16-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sat, 29 Jan 2022 14:48:53 +0000 (15:48 +0100)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sat, 29 Jan 2022 14:48:53 +0000 (15:48 +0100)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 29 Jan 2022 14:48:53 +0000 (15:48 +0100)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 29 Jan 2022 14:48:53 +0000 (15:48 +0100)
diff --git a/queue-5.16/arm64-extable-fix-load_unaligned_zeropad-reg-indices.patch b/queue-5.16/arm64-extable-fix-load_unaligned_zeropad-reg-indices.patch

new file mode 100644 (file)

index 0000000..d2de9fb
--- /dev/null
+++ b/queue-5.16/arm64-extable-fix-load_unaligned_zeropad-reg-indices.patch
@@ -0,0 +1,68 @@
+From 3758a6c74e08bdc15ccccd6872a6ad37d165239a Mon Sep 17 00:00:00 2001
+From: Evgenii Stepanov <eugenis@google.com>
+Date: Tue, 25 Jan 2022 10:22:17 -0800
+Subject: arm64: extable: fix load_unaligned_zeropad() reg indices
+
+From: Evgenii Stepanov <eugenis@google.com>
+
+commit 3758a6c74e08bdc15ccccd6872a6ad37d165239a upstream.
+
+In ex_handler_load_unaligned_zeropad() we erroneously extract the data and
+addr register indices from ex->type rather than ex->data. As ex->type will
+contain EX_TYPE_LOAD_UNALIGNED_ZEROPAD (i.e. 4):
+ * We'll always treat X0 as the address register, since EX_DATA_REG_ADDR is
+   extracted from bits [9:5]. Thus, we may attempt to dereference an
+   arbitrary address as X0 may hold an arbitrary value.
+ * We'll always treat X4 as the data register, since EX_DATA_REG_DATA is
+   extracted from bits [4:0]. Thus we will corrupt X4 and cause arbitrary
+   behaviour within load_unaligned_zeropad() and its caller.
+
+Fix this by extracting both values from ex->data as originally intended.
+
+On an MTE-enabled QEMU image we are hitting the following crash:
+ Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000
+ Call trace:
+  fixup_exception+0xc4/0x108
+  __do_kernel_fault+0x3c/0x268
+  do_tag_check_fault+0x3c/0x104
+  do_mem_abort+0x44/0xf4
+  el1_abort+0x40/0x64
+  el1h_64_sync_handler+0x60/0xa0
+  el1h_64_sync+0x7c/0x80
+  link_path_walk+0x150/0x344
+  path_openat+0xa0/0x7dc
+  do_filp_open+0xb8/0x168
+  do_sys_openat2+0x88/0x17c
+  __arm64_sys_openat+0x74/0xa0
+  invoke_syscall+0x48/0x148
+  el0_svc_common+0xb8/0xf8
+  do_el0_svc+0x28/0x88
+  el0_svc+0x24/0x84
+  el0t_64_sync_handler+0x88/0xec
+  el0t_64_sync+0x1b4/0x1b8
+ Code: f8695a69 71007d1f 540000e0 927df12a (f940014a)
+
+Fixes: 753b32368705 ("arm64: extable: add load_unaligned_zeropad() handler")
+Cc: <stable@vger.kernel.org> # 5.16.x
+Reviewed-by: Mark Rutland <mark.rutland@arm.com>
+Signed-off-by: Evgenii Stepanov <eugenis@google.com>
+Link: https://lore.kernel.org/r/20220125182217.2605202-1-eugenis@google.com
+Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/mm/extable.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/arm64/mm/extable.c
++++ b/arch/arm64/mm/extable.c
+@@ -43,8 +43,8 @@ static bool
+ ex_handler_load_unaligned_zeropad(const struct exception_table_entry *ex,
+                                 struct pt_regs *regs)
+ {
+-      int reg_data = FIELD_GET(EX_DATA_REG_DATA, ex->type);
+-      int reg_addr = FIELD_GET(EX_DATA_REG_ADDR, ex->type);
++      int reg_data = FIELD_GET(EX_DATA_REG_DATA, ex->data);
++      int reg_addr = FIELD_GET(EX_DATA_REG_ADDR, ex->data);
+       unsigned long data, addr, offset;
+ 
+       addr = pt_regs_read_reg(regs, reg_addr);
diff --git a/queue-5.16/block-add-bio_start_io_acct_time-to-control-start_time.patch b/queue-5.16/block-add-bio_start_io_acct_time-to-control-start_time.patch

new file mode 100644 (file)

index 0000000..738392b
--- /dev/null
+++ b/queue-5.16/block-add-bio_start_io_acct_time-to-control-start_time.patch
@@ -0,0 +1,93 @@
+From e45c47d1f94e0cc7b6b079fdb4bcce2995e2adc4 Mon Sep 17 00:00:00 2001
+From: Mike Snitzer <snitzer@redhat.com>
+Date: Fri, 28 Jan 2022 10:58:39 -0500
+Subject: block: add bio_start_io_acct_time() to control start_time
+
+From: Mike Snitzer <snitzer@redhat.com>
+
+commit e45c47d1f94e0cc7b6b079fdb4bcce2995e2adc4 upstream.
+
+bio_start_io_acct_time() interface is like bio_start_io_acct() that
+allows start_time to be passed in. This gives drivers the ability to
+defer starting accounting until after IO is issued (but possibily not
+entirely due to bio splitting).
+
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Link: https://lore.kernel.org/r/20220128155841.39644-2-snitzer@redhat.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ block/blk-core.c       |   25 +++++++++++++++++++------
+ include/linux/blkdev.h |    1 +
+ 2 files changed, 20 insertions(+), 6 deletions(-)
+
+--- a/block/blk-core.c
++++ b/block/blk-core.c
+@@ -1258,22 +1258,34 @@ void __blk_account_io_start(struct reque
+ }
+ 
+ static unsigned long __part_start_io_acct(struct block_device *part,
+-                                        unsigned int sectors, unsigned int op)
++                                        unsigned int sectors, unsigned int op,
++                                        unsigned long start_time)
+ {
+       const int sgrp = op_stat_group(op);
+-      unsigned long now = READ_ONCE(jiffies);
+ 
+       part_stat_lock();
+-      update_io_ticks(part, now, false);
++      update_io_ticks(part, start_time, false);
+       part_stat_inc(part, ios[sgrp]);
+       part_stat_add(part, sectors[sgrp], sectors);
+       part_stat_local_inc(part, in_flight[op_is_write(op)]);
+       part_stat_unlock();
+ 
+-      return now;
++      return start_time;
+ }
+ 
+ /**
++ * bio_start_io_acct_time - start I/O accounting for bio based drivers
++ * @bio:      bio to start account for
++ * @start_time:       start time that should be passed back to bio_end_io_acct().
++ */
++void bio_start_io_acct_time(struct bio *bio, unsigned long start_time)
++{
++      __part_start_io_acct(bio->bi_bdev, bio_sectors(bio),
++                           bio_op(bio), start_time);
++}
++EXPORT_SYMBOL_GPL(bio_start_io_acct_time);
++
++/**
+  * bio_start_io_acct - start I/O accounting for bio based drivers
+  * @bio:      bio to start account for
+  *
+@@ -1281,14 +1293,15 @@ static unsigned long __part_start_io_acc
+  */
+ unsigned long bio_start_io_acct(struct bio *bio)
+ {
+-      return __part_start_io_acct(bio->bi_bdev, bio_sectors(bio), bio_op(bio));
++      return __part_start_io_acct(bio->bi_bdev, bio_sectors(bio),
++                                  bio_op(bio), jiffies);
+ }
+ EXPORT_SYMBOL_GPL(bio_start_io_acct);
+ 
+ unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors,
+                                unsigned int op)
+ {
+-      return __part_start_io_acct(disk->part0, sectors, op);
++      return __part_start_io_acct(disk->part0, sectors, op, jiffies);
+ }
+ EXPORT_SYMBOL(disk_start_io_acct);
+ 
+--- a/include/linux/blkdev.h
++++ b/include/linux/blkdev.h
+@@ -1254,6 +1254,7 @@ unsigned long disk_start_io_acct(struct
+ void disk_end_io_acct(struct gendisk *disk, unsigned int op,
+               unsigned long start_time);
+ 
++void bio_start_io_acct_time(struct bio *bio, unsigned long start_time);
+ unsigned long bio_start_io_acct(struct bio *bio);
+ void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time,
+               struct block_device *orig_bdev);
diff --git a/queue-5.16/dm-properly-fix-redundant-bio-based-io-accounting.patch b/queue-5.16/dm-properly-fix-redundant-bio-based-io-accounting.patch

new file mode 100644 (file)

index 0000000..dfb7d44
--- /dev/null
+++ b/queue-5.16/dm-properly-fix-redundant-bio-based-io-accounting.patch
@@ -0,0 +1,56 @@
+From b879f915bc48a18d4f4462729192435bb0f17052 Mon Sep 17 00:00:00 2001
+From: Mike Snitzer <snitzer@redhat.com>
+Date: Fri, 28 Jan 2022 10:58:41 -0500
+Subject: dm: properly fix redundant bio-based IO accounting
+
+From: Mike Snitzer <snitzer@redhat.com>
+
+commit b879f915bc48a18d4f4462729192435bb0f17052 upstream.
+
+Record the start_time for a bio but defer the starting block core's IO
+accounting until after IO is submitted using bio_start_io_acct_time().
+
+This approach avoids the need to mess around with any of the
+individual IO stats in response to a bio_split() that follows bio
+submission.
+
+Reported-by: Bud Brown <bubrown@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Cc: stable@vger.kernel.org
+Depends-on: e45c47d1f94e ("block: add bio_start_io_acct_time() to control start_time")
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Link: https://lore.kernel.org/r/20220128155841.39644-4-snitzer@redhat.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm.c |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/drivers/md/dm.c
++++ b/drivers/md/dm.c
+@@ -489,7 +489,7 @@ static void start_io_acct(struct dm_io *
+       struct mapped_device *md = io->md;
+       struct bio *bio = io->orig_bio;
+ 
+-      io->start_time = bio_start_io_acct(bio);
++      bio_start_io_acct_time(bio, io->start_time);
+       if (unlikely(dm_stats_used(&md->stats)))
+               dm_stats_account_io(&md->stats, bio_data_dir(bio),
+                                   bio->bi_iter.bi_sector, bio_sectors(bio),
+@@ -535,7 +535,7 @@ static struct dm_io *alloc_io(struct map
+       io->md = md;
+       spin_lock_init(&io->endio_lock);
+ 
+-      start_io_acct(io);
++      io->start_time = jiffies;
+ 
+       return io;
+ }
+@@ -1550,6 +1550,7 @@ static void __split_and_process_bio(stru
+                       submit_bio_noacct(bio);
+               }
+       }
++      start_io_acct(ci.io);
+ 
+       /* drop the extra reference count */
+       dm_io_dec_pending(ci.io, errno_to_blk_status(error));
diff --git a/queue-5.16/dm-revert-partial-fix-for-redundant-bio-based-io-accounting.patch b/queue-5.16/dm-revert-partial-fix-for-redundant-bio-based-io-accounting.patch

new file mode 100644 (file)

index 0000000..29c0230
--- /dev/null
+++ b/queue-5.16/dm-revert-partial-fix-for-redundant-bio-based-io-accounting.patch
@@ -0,0 +1,53 @@
+From f524d9c95fab54783d0038f7a3e8c014d5b56857 Mon Sep 17 00:00:00 2001
+From: Mike Snitzer <snitzer@redhat.com>
+Date: Fri, 28 Jan 2022 10:58:40 -0500
+Subject: dm: revert partial fix for redundant bio-based IO accounting
+
+From: Mike Snitzer <snitzer@redhat.com>
+
+commit f524d9c95fab54783d0038f7a3e8c014d5b56857 upstream.
+
+Reverts a1e1cb72d9649 ("dm: fix redundant IO accounting for bios that
+need splitting") because it was too narrow in scope (only addressed
+redundant 'sectors[]' accounting and not ios, nsecs[], etc).
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Link: https://lore.kernel.org/r/20220128155841.39644-3-snitzer@redhat.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm.c |   15 ---------------
+ 1 file changed, 15 deletions(-)
+
+--- a/drivers/md/dm.c
++++ b/drivers/md/dm.c
+@@ -1510,9 +1510,6 @@ static void init_clone_info(struct clone
+       ci->sector = bio->bi_iter.bi_sector;
+ }
+ 
+-#define __dm_part_stat_sub(part, field, subnd)        \
+-      (part_stat_get(part, field) -= (subnd))
+-
+ /*
+  * Entry point to split a bio into clones and submit them to the targets.
+  */
+@@ -1548,18 +1545,6 @@ static void __split_and_process_bio(stru
+                                                 GFP_NOIO, &md->queue->bio_split);
+                       ci.io->orig_bio = b;
+ 
+-                      /*
+-                       * Adjust IO stats for each split, otherwise upon queue
+-                       * reentry there will be redundant IO accounting.
+-                       * NOTE: this is a stop-gap fix, a proper fix involves
+-                       * significant refactoring of DM core's bio splitting
+-                       * (by eliminating DM's splitting and just using bio_split)
+-                       */
+-                      part_stat_lock();
+-                      __dm_part_stat_sub(dm_disk(md)->part0,
+-                                         sectors[op_stat_group(bio_op(bio))], ci.sector_count);
+-                      part_stat_unlock();
+-
+                       bio_chain(b, bio);
+                       trace_block_split(b, bio->bi_iter.bi_sector);
+                       submit_bio_noacct(bio);
diff --git a/queue-5.16/kvm-ppc-book3s-hv-nested-fix-nested-hfscr-being-clobbered-with-multiple-vcpus.patch b/queue-5.16/kvm-ppc-book3s-hv-nested-fix-nested-hfscr-being-clobbered-with-multiple-vcpus.patch

new file mode 100644 (file)

index 0000000..91599b7
--- /dev/null
+++ b/queue-5.16/kvm-ppc-book3s-hv-nested-fix-nested-hfscr-being-clobbered-with-multiple-vcpus.patch
@@ -0,0 +1,80 @@
+From 22f7ff0dea9491e90b6fe808ed40c30bd791e5c2 Mon Sep 17 00:00:00 2001
+From: Nicholas Piggin <npiggin@gmail.com>
+Date: Sat, 22 Jan 2022 20:55:30 +1000
+Subject: KVM: PPC: Book3S HV Nested: Fix nested HFSCR being clobbered with multiple vCPUs
+
+From: Nicholas Piggin <npiggin@gmail.com>
+
+commit 22f7ff0dea9491e90b6fe808ed40c30bd791e5c2 upstream.
+
+The L0 is storing HFSCR requested by the L1 for the L2 in struct
+kvm_nested_guest when the L1 requests a vCPU enter L2. kvm_nested_guest
+is not a per-vCPU structure. Hilarity ensues.
+
+Fix it by moving the nested hfscr into the vCPU structure together with
+the other per-vCPU nested fields.
+
+Fixes: 8b210a880b35 ("KVM: PPC: Book3S HV Nested: Make nested HFSCR state accessible")
+Cc: stable@vger.kernel.org # v5.15+
+Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
+Reviewed-by: Fabiano Rosas <farosas@linux.ibm.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/20220122105530.3477250-1-npiggin@gmail.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/powerpc/include/asm/kvm_book3s_64.h |    1 -
+ arch/powerpc/include/asm/kvm_host.h      |    1 +
+ arch/powerpc/kvm/book3s_hv.c             |    3 +--
+ arch/powerpc/kvm/book3s_hv_nested.c      |    2 +-
+ 4 files changed, 3 insertions(+), 4 deletions(-)
+
+--- a/arch/powerpc/include/asm/kvm_book3s_64.h
++++ b/arch/powerpc/include/asm/kvm_book3s_64.h
+@@ -39,7 +39,6 @@ struct kvm_nested_guest {
+       pgd_t *shadow_pgtable;          /* our page table for this guest */
+       u64 l1_gr_to_hr;                /* L1's addr of part'n-scoped table */
+       u64 process_table;              /* process table entry for this guest */
+-      u64 hfscr;                      /* HFSCR that the L1 requested for this nested guest */
+       long refcnt;                    /* number of pointers to this struct */
+       struct mutex tlb_lock;          /* serialize page faults and tlbies */
+       struct kvm_nested_guest *next;
+--- a/arch/powerpc/include/asm/kvm_host.h
++++ b/arch/powerpc/include/asm/kvm_host.h
+@@ -814,6 +814,7 @@ struct kvm_vcpu_arch {
+ 
+       /* For support of nested guests */
+       struct kvm_nested_guest *nested;
++      u64 nested_hfscr;       /* HFSCR that the L1 requested for the nested guest */
+       u32 nested_vcpu_id;
+       gpa_t nested_io_gpr;
+ #endif
+--- a/arch/powerpc/kvm/book3s_hv.c
++++ b/arch/powerpc/kvm/book3s_hv.c
+@@ -1731,7 +1731,6 @@ static int kvmppc_handle_exit_hv(struct
+ 
+ static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu)
+ {
+-      struct kvm_nested_guest *nested = vcpu->arch.nested;
+       int r;
+       int srcu_idx;
+ 
+@@ -1831,7 +1830,7 @@ static int kvmppc_handle_nested_exit(str
+                * it into a HEAI.
+                */
+               if (!(vcpu->arch.hfscr_permitted & (1UL << cause)) ||
+-                                      (nested->hfscr & (1UL << cause))) {
++                              (vcpu->arch.nested_hfscr & (1UL << cause))) {
+                       vcpu->arch.trap = BOOK3S_INTERRUPT_H_EMUL_ASSIST;
+ 
+                       /*
+--- a/arch/powerpc/kvm/book3s_hv_nested.c
++++ b/arch/powerpc/kvm/book3s_hv_nested.c
+@@ -362,7 +362,7 @@ long kvmhv_enter_nested_guest(struct kvm
+       /* set L1 state to L2 state */
+       vcpu->arch.nested = l2;
+       vcpu->arch.nested_vcpu_id = l2_hv.vcpu_token;
+-      l2->hfscr = l2_hv.hfscr;
++      vcpu->arch.nested_hfscr = l2_hv.hfscr;
+       vcpu->arch.regs = l2_regs;
+ 
+       /* Guest must always run with ME enabled, HV disabled. */
diff --git a/queue-5.16/kvm-x86-check-.flags-in-kvm_cpuid_check_equal-too.patch b/queue-5.16/kvm-x86-check-.flags-in-kvm_cpuid_check_equal-too.patch

new file mode 100644 (file)

index 0000000..4117640
--- /dev/null
+++ b/queue-5.16/kvm-x86-check-.flags-in-kvm_cpuid_check_equal-too.patch
@@ -0,0 +1,33 @@
+From 033a3ea59a19df63edb4db6bfdbb357cd028258a Mon Sep 17 00:00:00 2001
+From: Vitaly Kuznetsov <vkuznets@redhat.com>
+Date: Wed, 26 Jan 2022 14:18:04 +0100
+Subject: KVM: x86: Check .flags in kvm_cpuid_check_equal() too
+
+From: Vitaly Kuznetsov <vkuznets@redhat.com>
+
+commit 033a3ea59a19df63edb4db6bfdbb357cd028258a upstream.
+
+kvm_cpuid_check_equal() checks for the (full) equality of the supplied
+CPUID data so .flags need to be checked too.
+
+Reported-by: Sean Christopherson <seanjc@google.com>
+Fixes: c6617c61e8fe ("KVM: x86: Partially allow KVM_SET_CPUID{,2} after KVM_RUN")
+Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
+Message-Id: <20220126131804.2839410-1-vkuznets@redhat.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/cpuid.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/x86/kvm/cpuid.c
++++ b/arch/x86/kvm/cpuid.c
+@@ -113,6 +113,7 @@ static int kvm_cpuid_check_equal(struct
+               orig = &vcpu->arch.cpuid_entries[i];
+               if (e2[i].function != orig->function ||
+                   e2[i].index != orig->index ||
++                  e2[i].flags != orig->flags ||
+                   e2[i].eax != orig->eax || e2[i].ebx != orig->ebx ||
+                   e2[i].ecx != orig->ecx || e2[i].edx != orig->edx)
+                       return -EINVAL;
diff --git a/queue-5.16/kvm-x86-forcibly-leave-nested-virt-when-smm-state-is-toggled.patch b/queue-5.16/kvm-x86-forcibly-leave-nested-virt-when-smm-state-is-toggled.patch

new file mode 100644 (file)

index 0000000..c92b50b
--- /dev/null
+++ b/queue-5.16/kvm-x86-forcibly-leave-nested-virt-when-smm-state-is-toggled.patch
@@ -0,0 +1,173 @@
+From f7e570780efc5cec9b2ed1e0472a7da14e864fdb Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Tue, 25 Jan 2022 22:03:58 +0000
+Subject: KVM: x86: Forcibly leave nested virt when SMM state is toggled
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit f7e570780efc5cec9b2ed1e0472a7da14e864fdb upstream.
+
+Forcibly leave nested virtualization operation if userspace toggles SMM
+state via KVM_SET_VCPU_EVENTS or KVM_SYNC_X86_EVENTS.  If userspace
+forces the vCPU out of SMM while it's post-VMXON and then injects an SMI,
+vmx_enter_smm() will overwrite vmx->nested.smm.vmxon and end up with both
+vmxon=false and smm.vmxon=false, but all other nVMX state allocated.
+
+Don't attempt to gracefully handle the transition as (a) most transitions
+are nonsencial, e.g. forcing SMM while L2 is running, (b) there isn't
+sufficient information to handle all transitions, e.g. SVM wants access
+to the SMRAM save state, and (c) KVM_SET_VCPU_EVENTS must precede
+KVM_SET_NESTED_STATE during state restore as the latter disallows putting
+the vCPU into L2 if SMM is active, and disallows tagging the vCPU as
+being post-VMXON in SMM if SMM is not active.
+
+Abuse of KVM_SET_VCPU_EVENTS manifests as a WARN and memory leak in nVMX
+due to failure to free vmcs01's shadow VMCS, but the bug goes far beyond
+just a memory leak, e.g. toggling SMM on while L2 is active puts the vCPU
+in an architecturally impossible state.
+
+  WARNING: CPU: 0 PID: 3606 at free_loaded_vmcs arch/x86/kvm/vmx/vmx.c:2665 [inline]
+  WARNING: CPU: 0 PID: 3606 at free_loaded_vmcs+0x158/0x1a0 arch/x86/kvm/vmx/vmx.c:2656
+  Modules linked in:
+  CPU: 1 PID: 3606 Comm: syz-executor725 Not tainted 5.17.0-rc1-syzkaller #0
+  Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+  RIP: 0010:free_loaded_vmcs arch/x86/kvm/vmx/vmx.c:2665 [inline]
+  RIP: 0010:free_loaded_vmcs+0x158/0x1a0 arch/x86/kvm/vmx/vmx.c:2656
+  Code: <0f> 0b eb b3 e8 8f 4d 9f 00 e9 f7 fe ff ff 48 89 df e8 92 4d 9f 00
+  Call Trace:
+   <TASK>
+   kvm_arch_vcpu_destroy+0x72/0x2f0 arch/x86/kvm/x86.c:11123
+   kvm_vcpu_destroy arch/x86/kvm/../../../virt/kvm/kvm_main.c:441 [inline]
+   kvm_destroy_vcpus+0x11f/0x290 arch/x86/kvm/../../../virt/kvm/kvm_main.c:460
+   kvm_free_vcpus arch/x86/kvm/x86.c:11564 [inline]
+   kvm_arch_destroy_vm+0x2e8/0x470 arch/x86/kvm/x86.c:11676
+   kvm_destroy_vm arch/x86/kvm/../../../virt/kvm/kvm_main.c:1217 [inline]
+   kvm_put_kvm+0x4fa/0xb00 arch/x86/kvm/../../../virt/kvm/kvm_main.c:1250
+   kvm_vm_release+0x3f/0x50 arch/x86/kvm/../../../virt/kvm/kvm_main.c:1273
+   __fput+0x286/0x9f0 fs/file_table.c:311
+   task_work_run+0xdd/0x1a0 kernel/task_work.c:164
+   exit_task_work include/linux/task_work.h:32 [inline]
+   do_exit+0xb29/0x2a30 kernel/exit.c:806
+   do_group_exit+0xd2/0x2f0 kernel/exit.c:935
+   get_signal+0x4b0/0x28c0 kernel/signal.c:2862
+   arch_do_signal_or_restart+0x2a9/0x1c40 arch/x86/kernel/signal.c:868
+   handle_signal_work kernel/entry/common.c:148 [inline]
+   exit_to_user_mode_loop kernel/entry/common.c:172 [inline]
+   exit_to_user_mode_prepare+0x17d/0x290 kernel/entry/common.c:207
+   __syscall_exit_to_user_mode_work kernel/entry/common.c:289 [inline]
+   syscall_exit_to_user_mode+0x19/0x60 kernel/entry/common.c:300
+   do_syscall_64+0x42/0xb0 arch/x86/entry/common.c:86
+   entry_SYSCALL_64_after_hwframe+0x44/0xae
+   </TASK>
+
+Cc: stable@vger.kernel.org
+Reported-by: syzbot+8112db3ab20e70d50c31@syzkaller.appspotmail.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20220125220358.2091737-1-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/kvm_host.h |    1 +
+ arch/x86/kvm/svm/nested.c       |    9 +++++----
+ arch/x86/kvm/svm/svm.c          |    2 +-
+ arch/x86/kvm/svm/svm.h          |    2 +-
+ arch/x86/kvm/vmx/nested.c       |    1 +
+ arch/x86/kvm/x86.c              |    4 +++-
+ 6 files changed, 12 insertions(+), 7 deletions(-)
+
+--- a/arch/x86/include/asm/kvm_host.h
++++ b/arch/x86/include/asm/kvm_host.h
+@@ -1497,6 +1497,7 @@ struct kvm_x86_ops {
+ };
+ 
+ struct kvm_x86_nested_ops {
++      void (*leave_nested)(struct kvm_vcpu *vcpu);
+       int (*check_events)(struct kvm_vcpu *vcpu);
+       bool (*hv_timer_pending)(struct kvm_vcpu *vcpu);
+       void (*triple_fault)(struct kvm_vcpu *vcpu);
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -964,9 +964,9 @@ void svm_free_nested(struct vcpu_svm *sv
+ /*
+  * Forcibly leave nested mode in order to be able to reset the VCPU later on.
+  */
+-void svm_leave_nested(struct vcpu_svm *svm)
++void svm_leave_nested(struct kvm_vcpu *vcpu)
+ {
+-      struct kvm_vcpu *vcpu = &svm->vcpu;
++      struct vcpu_svm *svm = to_svm(vcpu);
+ 
+       if (is_guest_mode(vcpu)) {
+               svm->nested.nested_run_pending = 0;
+@@ -1345,7 +1345,7 @@ static int svm_set_nested_state(struct k
+               return -EINVAL;
+ 
+       if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE)) {
+-              svm_leave_nested(svm);
++              svm_leave_nested(vcpu);
+               svm_set_gif(svm, !!(kvm_state->flags & KVM_STATE_NESTED_GIF_SET));
+               return 0;
+       }
+@@ -1410,7 +1410,7 @@ static int svm_set_nested_state(struct k
+        */
+ 
+       if (is_guest_mode(vcpu))
+-              svm_leave_nested(svm);
++              svm_leave_nested(vcpu);
+       else
+               svm->nested.vmcb02.ptr->save = svm->vmcb01.ptr->save;
+ 
+@@ -1464,6 +1464,7 @@ static bool svm_get_nested_state_pages(s
+ }
+ 
+ struct kvm_x86_nested_ops svm_nested_ops = {
++      .leave_nested = svm_leave_nested,
+       .check_events = svm_check_nested_events,
+       .triple_fault = nested_svm_triple_fault,
+       .get_nested_state_pages = svm_get_nested_state_pages,
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -290,7 +290,7 @@ int svm_set_efer(struct kvm_vcpu *vcpu,
+ 
+       if ((old_efer & EFER_SVME) != (efer & EFER_SVME)) {
+               if (!(efer & EFER_SVME)) {
+-                      svm_leave_nested(svm);
++                      svm_leave_nested(vcpu);
+                       svm_set_gif(svm, true);
+                       /* #GP intercept is still needed for vmware backdoor */
+                       if (!enable_vmware_backdoor)
+--- a/arch/x86/kvm/svm/svm.h
++++ b/arch/x86/kvm/svm/svm.h
+@@ -470,7 +470,7 @@ static inline bool nested_exit_on_nmi(st
+ 
+ int enter_svm_guest_mode(struct kvm_vcpu *vcpu,
+                        u64 vmcb_gpa, struct vmcb *vmcb12, bool from_vmrun);
+-void svm_leave_nested(struct vcpu_svm *svm);
++void svm_leave_nested(struct kvm_vcpu *vcpu);
+ void svm_free_nested(struct vcpu_svm *svm);
+ int svm_allocate_nested(struct vcpu_svm *svm);
+ int nested_svm_vmrun(struct kvm_vcpu *vcpu);
+--- a/arch/x86/kvm/vmx/nested.c
++++ b/arch/x86/kvm/vmx/nested.c
+@@ -6744,6 +6744,7 @@ __init int nested_vmx_hardware_setup(int
+ }
+ 
+ struct kvm_x86_nested_ops vmx_nested_ops = {
++      .leave_nested = vmx_leave_nested,
+       .check_events = vmx_check_nested_events,
+       .hv_timer_pending = nested_vmx_preemption_timer_pending,
+       .triple_fault = nested_vmx_triple_fault,
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -4784,8 +4784,10 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_e
+               vcpu->arch.apic->sipi_vector = events->sipi_vector;
+ 
+       if (events->flags & KVM_VCPUEVENT_VALID_SMM) {
+-              if (!!(vcpu->arch.hflags & HF_SMM_MASK) != events->smi.smm)
++              if (!!(vcpu->arch.hflags & HF_SMM_MASK) != events->smi.smm) {
++                      kvm_x86_ops.nested_ops->leave_nested(vcpu);
+                       kvm_smm_changed(vcpu, events->smi.smm);
++              }
+ 
+               vcpu->arch.smi_pending = events->smi.pending;
+ 
diff --git a/queue-5.16/kvm-x86-free-kvm_cpuid_entry2-array-on-post-kvm_run-kvm_set_cpuid-2.patch b/queue-5.16/kvm-x86-free-kvm_cpuid_entry2-array-on-post-kvm_run-kvm_set_cpuid-2.patch

new file mode 100644 (file)

index 0000000..83c9d78
--- /dev/null
+++ b/queue-5.16/kvm-x86-free-kvm_cpuid_entry2-array-on-post-kvm_run-kvm_set_cpuid-2.patch
@@ -0,0 +1,66 @@
+From 811f95ff95270e6048197821434d9301e3d7f07c Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Tue, 25 Jan 2022 21:04:45 +0000
+Subject: KVM: x86: Free kvm_cpuid_entry2 array on post-KVM_RUN KVM_SET_CPUID{,2}
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 811f95ff95270e6048197821434d9301e3d7f07c upstream.
+
+Free the "struct kvm_cpuid_entry2" array on successful post-KVM_RUN
+KVM_SET_CPUID{,2} to fix a memory leak, the callers of kvm_set_cpuid()
+free the array only on failure.
+
+ BUG: memory leak
+ unreferenced object 0xffff88810963a800 (size 2048):
+  comm "syz-executor025", pid 3610, jiffies 4294944928 (age 8.080s)
+  hex dump (first 32 bytes):
+    00 00 00 00 00 00 00 00 00 00 00 00 0d 00 00 00  ................
+    47 65 6e 75 6e 74 65 6c 69 6e 65 49 00 00 00 00  GenuntelineI....
+  backtrace:
+    [<ffffffff814948ee>] kmalloc_node include/linux/slab.h:604 [inline]
+    [<ffffffff814948ee>] kvmalloc_node+0x3e/0x100 mm/util.c:580
+    [<ffffffff814950f2>] kvmalloc include/linux/slab.h:732 [inline]
+    [<ffffffff814950f2>] vmemdup_user+0x22/0x100 mm/util.c:199
+    [<ffffffff8109f5ff>] kvm_vcpu_ioctl_set_cpuid2+0x8f/0xf0 arch/x86/kvm/cpuid.c:423
+    [<ffffffff810711b9>] kvm_arch_vcpu_ioctl+0xb99/0x1e60 arch/x86/kvm/x86.c:5251
+    [<ffffffff8103e92d>] kvm_vcpu_ioctl+0x4ad/0x950 arch/x86/kvm/../../../virt/kvm/kvm_main.c:4066
+    [<ffffffff815afacc>] vfs_ioctl fs/ioctl.c:51 [inline]
+    [<ffffffff815afacc>] __do_sys_ioctl fs/ioctl.c:874 [inline]
+    [<ffffffff815afacc>] __se_sys_ioctl fs/ioctl.c:860 [inline]
+    [<ffffffff815afacc>] __x64_sys_ioctl+0xfc/0x140 fs/ioctl.c:860
+    [<ffffffff844a3335>] do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+    [<ffffffff844a3335>] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
+    [<ffffffff84600068>] entry_SYSCALL_64_after_hwframe+0x44/0xae
+
+Fixes: c6617c61e8fe ("KVM: x86: Partially allow KVM_SET_CPUID{,2} after KVM_RUN")
+Cc: stable@vger.kernel.org
+Reported-by: syzbot+be576ad7655690586eec@syzkaller.appspotmail.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20220125210445.2053429-1-seanjc@google.com>
+Reviewed-by: Vitaly Kuznetsov <vkuznets@redhat.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/cpuid.c |   10 ++++++++--
+ 1 file changed, 8 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kvm/cpuid.c
++++ b/arch/x86/kvm/cpuid.c
+@@ -338,8 +338,14 @@ static int kvm_set_cpuid(struct kvm_vcpu
+        * KVM_SET_CPUID{,2} again. To support this legacy behavior, check
+        * whether the supplied CPUID data is equal to what's already set.
+        */
+-      if (vcpu->arch.last_vmentry_cpu != -1)
+-              return kvm_cpuid_check_equal(vcpu, e2, nent);
++      if (vcpu->arch.last_vmentry_cpu != -1) {
++              r = kvm_cpuid_check_equal(vcpu, e2, nent);
++              if (r)
++                      return r;
++
++              kvfree(e2);
++              return 0;
++      }
+ 
+       r = kvm_check_cpuid(e2, nent);
+       if (r)
diff --git a/queue-5.16/kvm-x86-keep-msr_ia32_xss-unchanged-for-init.patch b/queue-5.16/kvm-x86-keep-msr_ia32_xss-unchanged-for-init.patch

new file mode 100644 (file)

index 0000000..9c8f1ce
--- /dev/null
+++ b/queue-5.16/kvm-x86-keep-msr_ia32_xss-unchanged-for-init.patch
@@ -0,0 +1,42 @@
+From be4f3b3f82271c3193ce200a996dc70682c8e622 Mon Sep 17 00:00:00 2001
+From: Xiaoyao Li <xiaoyao.li@intel.com>
+Date: Wed, 26 Jan 2022 17:22:24 +0000
+Subject: KVM: x86: Keep MSR_IA32_XSS unchanged for INIT
+
+From: Xiaoyao Li <xiaoyao.li@intel.com>
+
+commit be4f3b3f82271c3193ce200a996dc70682c8e622 upstream.
+
+It has been corrected from SDM version 075 that MSR_IA32_XSS is reset to
+zero on Power up and Reset but keeps unchanged on INIT.
+
+Fixes: a554d207dc46 ("KVM: X86: Processor States following Reset or INIT")
+Cc: stable@vger.kernel.org
+Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20220126172226.2298529-2-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/x86.c |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -11065,6 +11065,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcp
+               vcpu->arch.msr_misc_features_enables = 0;
+ 
+               vcpu->arch.xcr0 = XFEATURE_MASK_FP;
++              vcpu->arch.ia32_xss = 0;
+       }
+ 
+       /* All GPRs except RDX (handled below) are zeroed on RESET/INIT. */
+@@ -11081,8 +11082,6 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcp
+       cpuid_0x1 = kvm_find_cpuid_entry(vcpu, 1, 0);
+       kvm_rdx_write(vcpu, cpuid_0x1 ? cpuid_0x1->eax : 0x600);
+ 
+-      vcpu->arch.ia32_xss = 0;
+-
+       static_call(kvm_x86_vcpu_reset)(vcpu, init_event);
+ 
+       kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
diff --git a/queue-5.16/kvm-x86-move-cpuid.-eax-0x12-ecx-1-mangling-to-__kvm_update_cpuid_runtime.patch b/queue-5.16/kvm-x86-move-cpuid.-eax-0x12-ecx-1-mangling-to-__kvm_update_cpuid_runtime.patch

new file mode 100644 (file)

index 0000000..5bcf888
--- /dev/null
+++ b/queue-5.16/kvm-x86-move-cpuid.-eax-0x12-ecx-1-mangling-to-__kvm_update_cpuid_runtime.patch
@@ -0,0 +1,107 @@
+From 5c89be1dd5cfb697614bc13626ba3bd0781aa160 Mon Sep 17 00:00:00 2001
+From: Vitaly Kuznetsov <vkuznets@redhat.com>
+Date: Mon, 24 Jan 2022 11:36:05 +0100
+Subject: KVM: x86: Move CPUID.(EAX=0x12,ECX=1) mangling to __kvm_update_cpuid_runtime()
+
+From: Vitaly Kuznetsov <vkuznets@redhat.com>
+
+commit 5c89be1dd5cfb697614bc13626ba3bd0781aa160 upstream.
+
+Full equality check of CPUID data on update (kvm_cpuid_check_equal()) may
+fail for SGX enabled CPUs as CPUID.(EAX=0x12,ECX=1) is currently being
+mangled in kvm_vcpu_after_set_cpuid(). Move it to
+__kvm_update_cpuid_runtime() and split off cpuid_get_supported_xcr0()
+helper  as 'vcpu->arch.guest_supported_xcr0' update needs (logically)
+to stay in kvm_vcpu_after_set_cpuid().
+
+Cc: stable@vger.kernel.org
+Fixes: feb627e8d6f6 ("KVM: x86: Forbid KVM_SET_CPUID{,2} after KVM_RUN")
+Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
+Message-Id: <20220124103606.2630588-2-vkuznets@redhat.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/cpuid.c |   54 +++++++++++++++++++++++++++++++--------------------
+ 1 file changed, 33 insertions(+), 21 deletions(-)
+
+--- a/arch/x86/kvm/cpuid.c
++++ b/arch/x86/kvm/cpuid.c
+@@ -176,10 +176,26 @@ void kvm_update_pv_runtime(struct kvm_vc
+               vcpu->arch.pv_cpuid.features = best->eax;
+ }
+ 
++/*
++ * Calculate guest's supported XCR0 taking into account guest CPUID data and
++ * supported_xcr0 (comprised of host configuration and KVM_SUPPORTED_XCR0).
++ */
++static u64 cpuid_get_supported_xcr0(struct kvm_cpuid_entry2 *entries, int nent)
++{
++      struct kvm_cpuid_entry2 *best;
++
++      best = cpuid_entry2_find(entries, nent, 0xd, 0);
++      if (!best)
++              return 0;
++
++      return (best->eax | ((u64)best->edx << 32)) & supported_xcr0;
++}
++
+ static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *entries,
+                                      int nent)
+ {
+       struct kvm_cpuid_entry2 *best;
++      u64 guest_supported_xcr0 = cpuid_get_supported_xcr0(entries, nent);
+ 
+       best = cpuid_entry2_find(entries, nent, 1, 0);
+       if (best) {
+@@ -218,6 +234,21 @@ static void __kvm_update_cpuid_runtime(s
+                                          vcpu->arch.ia32_misc_enable_msr &
+                                          MSR_IA32_MISC_ENABLE_MWAIT);
+       }
++
++      /*
++       * Bits 127:0 of the allowed SECS.ATTRIBUTES (CPUID.0x12.0x1) enumerate
++       * the supported XSAVE Feature Request Mask (XFRM), i.e. the enclave's
++       * requested XCR0 value.  The enclave's XFRM must be a subset of XCRO
++       * at the time of EENTER, thus adjust the allowed XFRM by the guest's
++       * supported XCR0.  Similar to XCR0 handling, FP and SSE are forced to
++       * '1' even on CPUs that don't support XSAVE.
++       */
++      best = cpuid_entry2_find(entries, nent, 0x12, 0x1);
++      if (best) {
++              best->ecx &= guest_supported_xcr0 & 0xffffffff;
++              best->edx &= guest_supported_xcr0 >> 32;
++              best->ecx |= XFEATURE_MASK_FPSSE;
++      }
+ }
+ 
+ void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
+@@ -241,27 +272,8 @@ static void kvm_vcpu_after_set_cpuid(str
+               kvm_apic_set_version(vcpu);
+       }
+ 
+-      best = kvm_find_cpuid_entry(vcpu, 0xD, 0);
+-      if (!best)
+-              vcpu->arch.guest_supported_xcr0 = 0;
+-      else
+-              vcpu->arch.guest_supported_xcr0 =
+-                      (best->eax | ((u64)best->edx << 32)) & supported_xcr0;
+-
+-      /*
+-       * Bits 127:0 of the allowed SECS.ATTRIBUTES (CPUID.0x12.0x1) enumerate
+-       * the supported XSAVE Feature Request Mask (XFRM), i.e. the enclave's
+-       * requested XCR0 value.  The enclave's XFRM must be a subset of XCRO
+-       * at the time of EENTER, thus adjust the allowed XFRM by the guest's
+-       * supported XCR0.  Similar to XCR0 handling, FP and SSE are forced to
+-       * '1' even on CPUs that don't support XSAVE.
+-       */
+-      best = kvm_find_cpuid_entry(vcpu, 0x12, 0x1);
+-      if (best) {
+-              best->ecx &= vcpu->arch.guest_supported_xcr0 & 0xffffffff;
+-              best->edx &= vcpu->arch.guest_supported_xcr0 >> 32;
+-              best->ecx |= XFEATURE_MASK_FPSSE;
+-      }
++      vcpu->arch.guest_supported_xcr0 =
++              cpuid_get_supported_xcr0(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent);
+ 
+       kvm_update_pv_runtime(vcpu);
+ 
diff --git a/queue-5.16/kvm-x86-sync-the-states-size-with-the-xcr0-ia32_xss-at-any-time.patch b/queue-5.16/kvm-x86-sync-the-states-size-with-the-xcr0-ia32_xss-at-any-time.patch

new file mode 100644 (file)

index 0000000..78f260c
--- /dev/null
+++ b/queue-5.16/kvm-x86-sync-the-states-size-with-the-xcr0-ia32_xss-at-any-time.patch
@@ -0,0 +1,42 @@
+From 05a9e065059e566f218f8778c4d17ee75db56c55 Mon Sep 17 00:00:00 2001
+From: Like Xu <likexu@tencent.com>
+Date: Wed, 26 Jan 2022 17:22:26 +0000
+Subject: KVM: x86: Sync the states size with the XCR0/IA32_XSS at, any time
+
+From: Like Xu <likexu@tencent.com>
+
+commit 05a9e065059e566f218f8778c4d17ee75db56c55 upstream.
+
+XCR0 is reset to 1 by RESET but not INIT and IA32_XSS is zeroed by
+both RESET and INIT. The kvm_set_msr_common()'s handling of MSR_IA32_XSS
+also needs to update kvm_update_cpuid_runtime(). In the above cases, the
+size in bytes of the XSAVE area containing all states enabled by XCR0 or
+(XCRO | IA32_XSS) needs to be updated.
+
+For simplicity and consistency, existing helpers are used to write values
+and call kvm_update_cpuid_runtime(), and it's not exactly a fast path.
+
+Fixes: a554d207dc46 ("KVM: X86: Processor States following Reset or INIT")
+Cc: stable@vger.kernel.org
+Signed-off-by: Like Xu <likexu@tencent.com>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20220126172226.2298529-4-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/x86.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -11065,8 +11065,8 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcp
+ 
+               vcpu->arch.msr_misc_features_enables = 0;
+ 
+-              vcpu->arch.xcr0 = XFEATURE_MASK_FP;
+-              vcpu->arch.ia32_xss = 0;
++              __kvm_set_xcr(vcpu, 0, XFEATURE_MASK_FP);
++              __kvm_set_msr(vcpu, MSR_IA32_XSS, 0, true);
+       }
+ 
+       /* All GPRs except RDX (handled below) are zeroed on RESET/INIT. */
diff --git a/queue-5.16/kvm-x86-update-vcpu-s-runtime-cpuid-on-write-to-msr_ia32_xss.patch b/queue-5.16/kvm-x86-update-vcpu-s-runtime-cpuid-on-write-to-msr_ia32_xss.patch

new file mode 100644 (file)

index 0000000..d45a678
--- /dev/null
+++ b/queue-5.16/kvm-x86-update-vcpu-s-runtime-cpuid-on-write-to-msr_ia32_xss.patch
@@ -0,0 +1,34 @@
+From 4c282e51e4450b94680d6ca3b10f830483b1f243 Mon Sep 17 00:00:00 2001
+From: Like Xu <likexu@tencent.com>
+Date: Wed, 26 Jan 2022 17:22:25 +0000
+Subject: KVM: x86: Update vCPU's runtime CPUID on write to MSR_IA32_XSS
+
+From: Like Xu <likexu@tencent.com>
+
+commit 4c282e51e4450b94680d6ca3b10f830483b1f243 upstream.
+
+Do a runtime CPUID update for a vCPU if MSR_IA32_XSS is written, as the
+size in bytes of the XSAVE area is affected by the states enabled in XSS.
+
+Fixes: 203000993de5 ("kvm: vmx: add MSR logic for XSAVES")
+Cc: stable@vger.kernel.org
+Signed-off-by: Like Xu <likexu@tencent.com>
+[sean: split out as a separate patch, adjust Fixes tag]
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20220126172226.2298529-3-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/x86.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -3508,6 +3508,7 @@ int kvm_set_msr_common(struct kvm_vcpu *
+               if (data & ~supported_xss)
+                       return 1;
+               vcpu->arch.ia32_xss = data;
++              kvm_update_cpuid_runtime(vcpu);
+               break;
+       case MSR_SMI_COUNT:
+               if (!msr_info->host_initiated)
diff --git a/queue-5.16/security-lsm-dentry_init_security-handle-multi-lsm-registration.patch b/queue-5.16/security-lsm-dentry_init_security-handle-multi-lsm-registration.patch

new file mode 100644 (file)

index 0000000..392c519
--- /dev/null
+++ b/queue-5.16/security-lsm-dentry_init_security-handle-multi-lsm-registration.patch
@@ -0,0 +1,158 @@
+From 7f5056b9e7b71149bf11073f00a57fa1ac2921a9 Mon Sep 17 00:00:00 2001
+From: Vivek Goyal <vgoyal@redhat.com>
+Date: Wed, 26 Jan 2022 15:35:14 -0500
+Subject: security, lsm: dentry_init_security() Handle multi LSM registration
+
+From: Vivek Goyal <vgoyal@redhat.com>
+
+commit 7f5056b9e7b71149bf11073f00a57fa1ac2921a9 upstream.
+
+A ceph user has reported that ceph is crashing with kernel NULL pointer
+dereference. Following is the backtrace.
+
+/proc/version: Linux version 5.16.2-arch1-1 (linux@archlinux) (gcc (GCC)
+11.1.0, GNU ld (GNU Binutils) 2.36.1) #1 SMP PREEMPT Thu, 20 Jan 2022
+16:18:29 +0000
+distro / arch: Arch Linux / x86_64
+SELinux is not enabled
+ceph cluster version: 16.2.7 (dd0603118f56ab514f133c8d2e3adfc983942503)
+
+relevant dmesg output:
+[   30.947129] BUG: kernel NULL pointer dereference, address:
+0000000000000000
+[   30.947206] #PF: supervisor read access in kernel mode
+[   30.947258] #PF: error_code(0x0000) - not-present page
+[   30.947310] PGD 0 P4D 0
+[   30.947342] Oops: 0000 [#1] PREEMPT SMP PTI
+[   30.947388] CPU: 5 PID: 778 Comm: touch Not tainted 5.16.2-arch1-1 #1
+86fbf2c313cc37a553d65deb81d98e9dcc2a3659
+[   30.947486] Hardware name: Gigabyte Technology Co., Ltd. B365M
+DS3H/B365M DS3H, BIOS F5 08/13/2019
+[   30.947569] RIP: 0010:strlen+0x0/0x20
+[   30.947616] Code: b6 07 38 d0 74 16 48 83 c7 01 84 c0 74 05 48 39 f7 75
+ec 31 c0 31 d2 89 d6 89 d7 c3 48 89 f8 31 d2 89 d6 89 d7 c3 0
+f 1f 40 00 <80> 3f 00 74 12 48 89 f8 48 83 c0 01 80 38 00 75 f7 48 29 f8 31
+ff
+[   30.947782] RSP: 0018:ffffa4ed80ffbbb8 EFLAGS: 00010246
+[   30.947836] RAX: 0000000000000000 RBX: ffffa4ed80ffbc60 RCX:
+0000000000000000
+[   30.947904] RDX: 0000000000000000 RSI: 0000000000000000 RDI:
+0000000000000000
+[   30.947971] RBP: ffff94b0d15c0ae0 R08: 0000000000000000 R09:
+0000000000000000
+[   30.948040] R10: 0000000000000000 R11: 0000000000000000 R12:
+0000000000000000
+[   30.948106] R13: 0000000000000001 R14: ffffa4ed80ffbc60 R15:
+0000000000000000
+[   30.948174] FS:  00007fc7520f0740(0000) GS:ffff94b7ced40000(0000)
+knlGS:0000000000000000
+[   30.948252] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[   30.948308] CR2: 0000000000000000 CR3: 0000000104a40001 CR4:
+00000000003706e0
+[   30.948376] Call Trace:
+[   30.948404]  <TASK>
+[   30.948431]  ceph_security_init_secctx+0x7b/0x240 [ceph
+49f9c4b9bf5be8760f19f1747e26da33920bce4b]
+[   30.948582]  ceph_atomic_open+0x51e/0x8a0 [ceph
+49f9c4b9bf5be8760f19f1747e26da33920bce4b]
+[   30.948708]  ? get_cached_acl+0x4d/0xa0
+[   30.948759]  path_openat+0x60d/0x1030
+[   30.948809]  do_filp_open+0xa5/0x150
+[   30.948859]  do_sys_openat2+0xc4/0x190
+[   30.948904]  __x64_sys_openat+0x53/0xa0
+[   30.948948]  do_syscall_64+0x5c/0x90
+[   30.948989]  ? exc_page_fault+0x72/0x180
+[   30.949034]  entry_SYSCALL_64_after_hwframe+0x44/0xae
+[   30.949091] RIP: 0033:0x7fc7521e25bb
+[   30.950849] Code: 25 00 00 41 00 3d 00 00 41 00 74 4b 64 8b 04 25 18 00
+00 00 85 c0 75 67 44 89 e2 48 89 ee bf 9c ff ff ff b8 01 01 0
+0 00 0f 05 <48> 3d 00 f0 ff ff 0f 87 91 00 00 00 48 8b 54 24 28 64 48 2b 14
+25
+
+Core of the problem is that ceph checks for return code from
+security_dentry_init_security() and if return code is 0, it assumes
+everything is fine and continues to call strlen(name), which crashes.
+
+Typically SELinux LSM returns 0 and sets name to "security.selinux" and
+it is not a problem. Or if selinux is not compiled in or disabled, it
+returns -EOPNOTSUP and ceph deals with it.
+
+But somehow in this configuration, 0 is being returned and "name" is
+not being initialized and that's creating the problem.
+
+Our suspicion is that BPF LSM is registering a hook for
+dentry_init_security() and returns hook default of 0.
+
+LSM_HOOK(int, 0, dentry_init_security, struct dentry *dentry,...)
+
+I have not been able to reproduce it just by doing CONFIG_BPF_LSM=y.
+Stephen has tested the patch though and confirms it solves the problem
+for him.
+
+dentry_init_security() is written in such a way that it expects only one
+LSM to register the hook. Atleast that's the expectation with current code.
+
+If another LSM returns a hook and returns default, it will simply return
+0 as of now and that will break ceph.
+
+Hence, suggestion is that change semantics of this hook a bit. If there
+are no LSMs or no LSM is taking ownership and initializing security context,
+then return -EOPNOTSUP. Also allow at max one LSM to initialize security
+context. This hook can't deal with multiple LSMs trying to init security
+context. This patch implements this new behavior.
+
+Reported-by: Stephen Muth <smuth4@gmail.com>
+Tested-by: Stephen Muth <smuth4@gmail.com>
+Suggested-by: Casey Schaufler <casey@schaufler-ca.com>
+Acked-by: Casey Schaufler <casey@schaufler-ca.com>
+Reviewed-by: Serge Hallyn <serge@hallyn.com>
+Cc: Jeff Layton <jlayton@kernel.org>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Paul Moore <paul@paul-moore.com>
+Cc: <stable@vger.kernel.org> # 5.16.0
+Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Acked-by: Paul Moore <paul@paul-moore.com>
+Acked-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: James Morris <jmorris@namei.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/lsm_hook_defs.h |    2 +-
+ security/security.c           |   15 +++++++++++++--
+ 2 files changed, 14 insertions(+), 3 deletions(-)
+
+--- a/include/linux/lsm_hook_defs.h
++++ b/include/linux/lsm_hook_defs.h
+@@ -82,7 +82,7 @@ LSM_HOOK(int, 0, sb_add_mnt_opt, const c
+        int len, void **mnt_opts)
+ LSM_HOOK(int, 0, move_mount, const struct path *from_path,
+        const struct path *to_path)
+-LSM_HOOK(int, 0, dentry_init_security, struct dentry *dentry,
++LSM_HOOK(int, -EOPNOTSUPP, dentry_init_security, struct dentry *dentry,
+        int mode, const struct qstr *name, const char **xattr_name,
+        void **ctx, u32 *ctxlen)
+ LSM_HOOK(int, 0, dentry_create_files_as, struct dentry *dentry, int mode,
+--- a/security/security.c
++++ b/security/security.c
+@@ -1056,8 +1056,19 @@ int security_dentry_init_security(struct
+                                 const char **xattr_name, void **ctx,
+                                 u32 *ctxlen)
+ {
+-      return call_int_hook(dentry_init_security, -EOPNOTSUPP, dentry, mode,
+-                              name, xattr_name, ctx, ctxlen);
++      struct security_hook_list *hp;
++      int rc;
++
++      /*
++       * Only one module will provide a security context.
++       */
++      hlist_for_each_entry(hp, &security_hook_heads.dentry_init_security, list) {
++              rc = hp->hook.dentry_init_security(dentry, mode, name,
++                                                 xattr_name, ctx, ctxlen);
++              if (rc != LSM_RET_DEFAULT(dentry_init_security))
++                      return rc;
++      }
++      return LSM_RET_DEFAULT(dentry_init_security);
+ }
+ EXPORT_SYMBOL(security_dentry_init_security);
+ 
diff --git a/queue-5.16/series b/queue-5.16/series

index 498ce57b7d0df1fb9c5f534da6512ee39f9947ae..9525a8c78f49eead8c3da056706390f56959cd3a 100644 (file)
--- a/queue-5.16/series
+++ b/queue-5.16/series
@@ -49,3 +49,16 @@ kvm-lapic-also-cancel-preemption-timer-during-set_lapic.patch
  kvm-svm-never-reject-emulation-due-to-smap-errata-for-sev-guests.patch
  kvm-svm-don-t-intercept-gp-for-sev-guests.patch
  kvm-x86-nsvm-skip-eax-alignment-check-for-non-svm-instructions.patch
+kvm-x86-move-cpuid.-eax-0x12-ecx-1-mangling-to-__kvm_update_cpuid_runtime.patch
+kvm-x86-free-kvm_cpuid_entry2-array-on-post-kvm_run-kvm_set_cpuid-2.patch
+kvm-x86-forcibly-leave-nested-virt-when-smm-state-is-toggled.patch
+kvm-x86-check-.flags-in-kvm_cpuid_check_equal-too.patch
+kvm-x86-keep-msr_ia32_xss-unchanged-for-init.patch
+kvm-x86-update-vcpu-s-runtime-cpuid-on-write-to-msr_ia32_xss.patch
+kvm-x86-sync-the-states-size-with-the-xcr0-ia32_xss-at-any-time.patch
+kvm-ppc-book3s-hv-nested-fix-nested-hfscr-being-clobbered-with-multiple-vcpus.patch
+security-lsm-dentry_init_security-handle-multi-lsm-registration.patch
+arm64-extable-fix-load_unaligned_zeropad-reg-indices.patch
+dm-revert-partial-fix-for-redundant-bio-based-io-accounting.patch
+block-add-bio_start_io_acct_time-to-control-start_time.patch
+dm-properly-fix-redundant-bio-based-io-accounting.patch
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sat, 29 Jan 2022 14:48:53 +0000 (15:48 +0100)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sat, 29 Jan 2022 14:48:53 +0000 (15:48 +0100)
queue-5.16/arm64-extable-fix-load_unaligned_zeropad-reg-indices.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/block-add-bio_start_io_acct_time-to-control-start_time.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/dm-properly-fix-redundant-bio-based-io-accounting.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/dm-revert-partial-fix-for-redundant-bio-based-io-accounting.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/kvm-ppc-book3s-hv-nested-fix-nested-hfscr-being-clobbered-with-multiple-vcpus.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/kvm-x86-check-.flags-in-kvm_cpuid_check_equal-too.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/kvm-x86-forcibly-leave-nested-virt-when-smm-state-is-toggled.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/kvm-x86-free-kvm_cpuid_entry2-array-on-post-kvm_run-kvm_set_cpuid-2.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/kvm-x86-keep-msr_ia32_xss-unchanged-for-init.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/kvm-x86-move-cpuid.-eax-0x12-ecx-1-mangling-to-__kvm_update_cpuid_runtime.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/kvm-x86-sync-the-states-size-with-the-xcr0-ia32_xss-at-any-time.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/kvm-x86-update-vcpu-s-runtime-cpuid-on-write-to-msr_ia32_xss.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/security-lsm-dentry_init_security-handle-multi-lsm-registration.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/series		patch \| blob \| blame \| history