From a151210dc4c5c6f69a299f766d40566fbe183945 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 8 Nov 2022 09:11:04 +0100 Subject: [PATCH] 6.0-stable patches added patches: ext4-fix-bug_on-when-directory-entry-has-invalid-rec_len.patch ext4-fix-warning-in-ext4_da_release_space.patch ext4-update-the-backup-superblock-s-at-the-end-of-the-online-resize.patch kvm-arm64-fix-bad-dereference-on-mte-enabled-systems.patch kvm-arm64-fix-smpri_el1-tpidr2_el0-trapping-on-vhe.patch kvm-initialize-gfn_to_pfn_cache-locks-in-dedicated-helper.patch kvm-reject-attempts-to-consume-or-refresh-inactive-gfn_to_pfn_cache.patch kvm-vmx-advertise-pmu-lbrs-if-and-only-if-perf-supports-lbrs.patch kvm-vmx-fold-vmx_supported_debugctl-into-vcpu_supported_debugctl.patch kvm-vmx-fully-disable-sgx-if-secondary_exec_encls_exiting-unavailable.patch kvm-vmx-ignore-guest-cpuid-for-host-userspace-writes-to-debugctl.patch kvm-x86-emulator-em_sysexit-should-update-ctxt-mode.patch kvm-x86-emulator-introduce-emulator_recalc_and_set_mode.patch kvm-x86-emulator-update-the-emulation-mode-after-cr0-write.patch kvm-x86-emulator-update-the-emulation-mode-after-rsm.patch kvm-x86-mask-off-reserved-bits-in-cpuid.80000001h.patch kvm-x86-mask-off-reserved-bits-in-cpuid.80000006h.patch kvm-x86-mask-off-reserved-bits-in-cpuid.80000008h.patch kvm-x86-mask-off-reserved-bits-in-cpuid.8000001ah.patch kvm-x86-mask-off-reserved-bits-in-cpuid.8000001fh.patch kvm-x86-smm-number-of-gprs-in-the-smram-image-depends-on-the-image-format.patch parisc-avoid-printing-the-hardware-path-twice.patch parisc-export-iosapic_serial_irq-symbol-for-serial-port-driver.patch parisc-make-8250_gsc-driver-dependend-on-config_parisc.patch x86-syscall-include-asm-ptrace.h-in-syscall_wrapper-header.patch x86-tdx-panic-on-bad-configs-that-ve-on-private-memory-access.patch x86-tdx-prepare-for-using-info-call-for-a-second-purpose.patch --- ...-directory-entry-has-invalid-rec_len.patch | 69 ++++ ...fix-warning-in-ext4_da_release_space.patch | 102 ++++++ ...ck-s-at-the-end-of-the-online-resize.patch | 80 +++++ ...d-dereference-on-mte-enabled-systems.patch | 49 +++ ...smpri_el1-tpidr2_el0-trapping-on-vhe.patch | 143 ++++++++ ..._pfn_cache-locks-in-dedicated-helper.patch | 315 ++++++++++++++++++ ...or-refresh-inactive-gfn_to_pfn_cache.patch | 197 +++++++++++ ...rs-if-and-only-if-perf-supports-lbrs.patch | 45 +++ ...ebugctl-into-vcpu_supported_debugctl.patch | 85 +++++ ...ndary_exec_encls_exiting-unavailable.patch | 58 ++++ ...or-host-userspace-writes-to-debugctl.patch | 68 ++++ ...r-em_sysexit-should-update-ctxt-mode.patch | 36 ++ ...troduce-emulator_recalc_and_set_mode.patch | 163 +++++++++ ...e-the-emulation-mode-after-cr0-write.patch | 55 +++ ...-update-the-emulation-mode-after-rsm.patch | 36 ++ ...off-reserved-bits-in-cpuid.80000001h.patch | 32 ++ ...off-reserved-bits-in-cpuid.80000006h.patch | 35 ++ ...off-reserved-bits-in-cpuid.80000008h.patch | 38 +++ ...off-reserved-bits-in-cpuid.8000001ah.patch | 35 ++ ...off-reserved-bits-in-cpuid.8000001fh.patch | 36 ++ ...am-image-depends-on-the-image-format.patch | 48 +++ ...oid-printing-the-hardware-path-twice.patch | 77 +++++ ...al_irq-symbol-for-serial-port-driver.patch | 30 ++ ...sc-driver-dependend-on-config_parisc.patch | 36 ++ queue-6.0/series | 27 ++ ...m-ptrace.h-in-syscall_wrapper-header.patch | 64 ++++ ...igs-that-ve-on-private-memory-access.patch | 98 ++++++ ...using-info-call-for-a-second-purpose.patch | 61 ++++ 28 files changed, 2118 insertions(+) create mode 100644 queue-6.0/ext4-fix-bug_on-when-directory-entry-has-invalid-rec_len.patch create mode 100644 queue-6.0/ext4-fix-warning-in-ext4_da_release_space.patch create mode 100644 queue-6.0/ext4-update-the-backup-superblock-s-at-the-end-of-the-online-resize.patch create mode 100644 queue-6.0/kvm-arm64-fix-bad-dereference-on-mte-enabled-systems.patch create mode 100644 queue-6.0/kvm-arm64-fix-smpri_el1-tpidr2_el0-trapping-on-vhe.patch create mode 100644 queue-6.0/kvm-initialize-gfn_to_pfn_cache-locks-in-dedicated-helper.patch create mode 100644 queue-6.0/kvm-reject-attempts-to-consume-or-refresh-inactive-gfn_to_pfn_cache.patch create mode 100644 queue-6.0/kvm-vmx-advertise-pmu-lbrs-if-and-only-if-perf-supports-lbrs.patch create mode 100644 queue-6.0/kvm-vmx-fold-vmx_supported_debugctl-into-vcpu_supported_debugctl.patch create mode 100644 queue-6.0/kvm-vmx-fully-disable-sgx-if-secondary_exec_encls_exiting-unavailable.patch create mode 100644 queue-6.0/kvm-vmx-ignore-guest-cpuid-for-host-userspace-writes-to-debugctl.patch create mode 100644 queue-6.0/kvm-x86-emulator-em_sysexit-should-update-ctxt-mode.patch create mode 100644 queue-6.0/kvm-x86-emulator-introduce-emulator_recalc_and_set_mode.patch create mode 100644 queue-6.0/kvm-x86-emulator-update-the-emulation-mode-after-cr0-write.patch create mode 100644 queue-6.0/kvm-x86-emulator-update-the-emulation-mode-after-rsm.patch create mode 100644 queue-6.0/kvm-x86-mask-off-reserved-bits-in-cpuid.80000001h.patch create mode 100644 queue-6.0/kvm-x86-mask-off-reserved-bits-in-cpuid.80000006h.patch create mode 100644 queue-6.0/kvm-x86-mask-off-reserved-bits-in-cpuid.80000008h.patch create mode 100644 queue-6.0/kvm-x86-mask-off-reserved-bits-in-cpuid.8000001ah.patch create mode 100644 queue-6.0/kvm-x86-mask-off-reserved-bits-in-cpuid.8000001fh.patch create mode 100644 queue-6.0/kvm-x86-smm-number-of-gprs-in-the-smram-image-depends-on-the-image-format.patch create mode 100644 queue-6.0/parisc-avoid-printing-the-hardware-path-twice.patch create mode 100644 queue-6.0/parisc-export-iosapic_serial_irq-symbol-for-serial-port-driver.patch create mode 100644 queue-6.0/parisc-make-8250_gsc-driver-dependend-on-config_parisc.patch create mode 100644 queue-6.0/x86-syscall-include-asm-ptrace.h-in-syscall_wrapper-header.patch create mode 100644 queue-6.0/x86-tdx-panic-on-bad-configs-that-ve-on-private-memory-access.patch create mode 100644 queue-6.0/x86-tdx-prepare-for-using-info-call-for-a-second-purpose.patch diff --git a/queue-6.0/ext4-fix-bug_on-when-directory-entry-has-invalid-rec_len.patch b/queue-6.0/ext4-fix-bug_on-when-directory-entry-has-invalid-rec_len.patch new file mode 100644 index 00000000000..56a4ae0891f --- /dev/null +++ b/queue-6.0/ext4-fix-bug_on-when-directory-entry-has-invalid-rec_len.patch @@ -0,0 +1,69 @@ +From 17a0bc9bd697f75cfdf9b378d5eb2d7409c91340 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Lu=C3=ADs=20Henriques?= +Date: Wed, 12 Oct 2022 14:13:30 +0100 +Subject: ext4: fix BUG_ON() when directory entry has invalid rec_len +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Luís Henriques + +commit 17a0bc9bd697f75cfdf9b378d5eb2d7409c91340 upstream. + +The rec_len field in the directory entry has to be a multiple of 4. A +corrupted filesystem image can be used to hit a BUG() in +ext4_rec_len_to_disk(), called from make_indexed_dir(). + + ------------[ cut here ]------------ + kernel BUG at fs/ext4/ext4.h:2413! + ... + RIP: 0010:make_indexed_dir+0x53f/0x5f0 + ... + Call Trace: + + ? add_dirent_to_buf+0x1b2/0x200 + ext4_add_entry+0x36e/0x480 + ext4_add_nondir+0x2b/0xc0 + ext4_create+0x163/0x200 + path_openat+0x635/0xe90 + do_filp_open+0xb4/0x160 + ? __create_object.isra.0+0x1de/0x3b0 + ? _raw_spin_unlock+0x12/0x30 + do_sys_openat2+0x91/0x150 + __x64_sys_open+0x6c/0xa0 + do_syscall_64+0x3c/0x80 + entry_SYSCALL_64_after_hwframe+0x46/0xb0 + +The fix simply adds a call to ext4_check_dir_entry() to validate the +directory entry, returning -EFSCORRUPTED if the entry is invalid. + +CC: stable@kernel.org +Link: https://bugzilla.kernel.org/show_bug.cgi?id=216540 +Signed-off-by: Luís Henriques +Link: https://lore.kernel.org/r/20221012131330.32456-1-lhenriques@suse.de +Signed-off-by: Theodore Ts'o +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/namei.c | 10 +++++++++- + 1 file changed, 9 insertions(+), 1 deletion(-) + +--- a/fs/ext4/namei.c ++++ b/fs/ext4/namei.c +@@ -2259,8 +2259,16 @@ static int make_indexed_dir(handle_t *ha + memset(de, 0, len); /* wipe old data */ + de = (struct ext4_dir_entry_2 *) data2; + top = data2 + len; +- while ((char *)(de2 = ext4_next_entry(de, blocksize)) < top) ++ while ((char *)(de2 = ext4_next_entry(de, blocksize)) < top) { ++ if (ext4_check_dir_entry(dir, NULL, de, bh2, data2, len, ++ (data2 + (blocksize - csum_size) - ++ (char *) de))) { ++ brelse(bh2); ++ brelse(bh); ++ return -EFSCORRUPTED; ++ } + de = de2; ++ } + de->rec_len = ext4_rec_len_to_disk(data2 + (blocksize - csum_size) - + (char *) de, blocksize); + diff --git a/queue-6.0/ext4-fix-warning-in-ext4_da_release_space.patch b/queue-6.0/ext4-fix-warning-in-ext4_da_release_space.patch new file mode 100644 index 00000000000..5ae9e23276a --- /dev/null +++ b/queue-6.0/ext4-fix-warning-in-ext4_da_release_space.patch @@ -0,0 +1,102 @@ +From 1b8f787ef547230a3249bcf897221ef0cc78481b Mon Sep 17 00:00:00 2001 +From: Ye Bin +Date: Tue, 18 Oct 2022 10:27:01 +0800 +Subject: ext4: fix warning in 'ext4_da_release_space' + +From: Ye Bin + +commit 1b8f787ef547230a3249bcf897221ef0cc78481b upstream. + +Syzkaller report issue as follows: +EXT4-fs (loop0): Free/Dirty block details +EXT4-fs (loop0): free_blocks=0 +EXT4-fs (loop0): dirty_blocks=0 +EXT4-fs (loop0): Block reservation details +EXT4-fs (loop0): i_reserved_data_blocks=0 +EXT4-fs warning (device loop0): ext4_da_release_space:1527: ext4_da_release_space: ino 18, to_free 1 with only 0 reserved data blocks +------------[ cut here ]------------ +WARNING: CPU: 0 PID: 92 at fs/ext4/inode.c:1528 ext4_da_release_space+0x25e/0x370 fs/ext4/inode.c:1524 +Modules linked in: +CPU: 0 PID: 92 Comm: kworker/u4:4 Not tainted 6.0.0-syzkaller-09423-g493ffd6605b2 #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/22/2022 +Workqueue: writeback wb_workfn (flush-7:0) +RIP: 0010:ext4_da_release_space+0x25e/0x370 fs/ext4/inode.c:1528 +RSP: 0018:ffffc900015f6c90 EFLAGS: 00010296 +RAX: 42215896cd52ea00 RBX: 0000000000000000 RCX: 42215896cd52ea00 +RDX: 0000000000000000 RSI: 0000000080000001 RDI: 0000000000000000 +RBP: 1ffff1100e907d96 R08: ffffffff816aa79d R09: fffff520002bece5 +R10: fffff520002bece5 R11: 1ffff920002bece4 R12: ffff888021fd2000 +R13: ffff88807483ecb0 R14: 0000000000000001 R15: ffff88807483e740 +FS: 0000000000000000(0000) GS:ffff8880b9a00000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 00005555569ba628 CR3: 000000000c88e000 CR4: 00000000003506f0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +Call Trace: + + ext4_es_remove_extent+0x1ab/0x260 fs/ext4/extents_status.c:1461 + mpage_release_unused_pages+0x24d/0xef0 fs/ext4/inode.c:1589 + ext4_writepages+0x12eb/0x3be0 fs/ext4/inode.c:2852 + do_writepages+0x3c3/0x680 mm/page-writeback.c:2469 + __writeback_single_inode+0xd1/0x670 fs/fs-writeback.c:1587 + writeback_sb_inodes+0xb3b/0x18f0 fs/fs-writeback.c:1870 + wb_writeback+0x41f/0x7b0 fs/fs-writeback.c:2044 + wb_do_writeback fs/fs-writeback.c:2187 [inline] + wb_workfn+0x3cb/0xef0 fs/fs-writeback.c:2227 + process_one_work+0x877/0xdb0 kernel/workqueue.c:2289 + worker_thread+0xb14/0x1330 kernel/workqueue.c:2436 + kthread+0x266/0x300 kernel/kthread.c:376 + ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:306 + + +Above issue may happens as follows: +ext4_da_write_begin + ext4_create_inline_data + ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS); + ext4_set_inode_flag(inode, EXT4_INODE_INLINE_DATA); +__ext4_ioctl + ext4_ext_migrate -> will lead to eh->eh_entries not zero, and set extent flag +ext4_da_write_begin + ext4_da_convert_inline_data_to_extent + ext4_da_write_inline_data_begin + ext4_da_map_blocks + ext4_insert_delayed_block + if (!ext4_es_scan_clu(inode, &ext4_es_is_delonly, lblk)) + if (!ext4_es_scan_clu(inode, &ext4_es_is_mapped, lblk)) + ext4_clu_mapped(inode, EXT4_B2C(sbi, lblk)); -> will return 1 + allocated = true; + ext4_es_insert_delayed_block(inode, lblk, allocated); +ext4_writepages + mpage_map_and_submit_extent(handle, &mpd, &give_up_on_write); -> return -ENOSPC + mpage_release_unused_pages(&mpd, give_up_on_write); -> give_up_on_write == 1 + ext4_es_remove_extent + ext4_da_release_space(inode, reserved); + if (unlikely(to_free > ei->i_reserved_data_blocks)) + -> to_free == 1 but ei->i_reserved_data_blocks == 0 + -> then trigger warning as above + +To solve above issue, forbid inode do migrate which has inline data. + +Cc: stable@kernel.org +Reported-by: syzbot+c740bb18df70ad00952e@syzkaller.appspotmail.com +Signed-off-by: Ye Bin +Reviewed-by: Jan Kara +Link: https://lore.kernel.org/r/20221018022701.683489-1-yebin10@huawei.com +Signed-off-by: Theodore Ts'o +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/migrate.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/fs/ext4/migrate.c ++++ b/fs/ext4/migrate.c +@@ -425,7 +425,8 @@ int ext4_ext_migrate(struct inode *inode + * already is extent-based, error out. + */ + if (!ext4_has_feature_extents(inode->i_sb) || +- (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) ++ ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) || ++ ext4_has_inline_data(inode)) + return -EINVAL; + + if (S_ISLNK(inode->i_mode) && inode->i_blocks == 0) diff --git a/queue-6.0/ext4-update-the-backup-superblock-s-at-the-end-of-the-online-resize.patch b/queue-6.0/ext4-update-the-backup-superblock-s-at-the-end-of-the-online-resize.patch new file mode 100644 index 00000000000..40ea1eb01de --- /dev/null +++ b/queue-6.0/ext4-update-the-backup-superblock-s-at-the-end-of-the-online-resize.patch @@ -0,0 +1,80 @@ +From 9a8c5b0d061554fedd7dbe894e63aa34d0bac7c4 Mon Sep 17 00:00:00 2001 +From: Theodore Ts'o +Date: Thu, 27 Oct 2022 16:04:36 -0400 +Subject: ext4: update the backup superblock's at the end of the online resize + +From: Theodore Ts'o + +commit 9a8c5b0d061554fedd7dbe894e63aa34d0bac7c4 upstream. + +When expanding a file system using online resize, various fields in +the superblock (e.g., s_blocks_count, s_inodes_count, etc.) change. +To update the backup superblocks, the online resize uses the function +update_backups() in fs/ext4/resize.c. This function was not updating +the checksum field in the backup superblocks. This wasn't a big deal +previously, because e2fsck didn't care about the checksum field in the +backup superblock. (And indeed, update_backups() goes all the way +back to the ext3 days, well before we had support for metadata +checksums.) + +However, there is an alternate, more general way of updating +superblock fields, ext4_update_primary_sb() in fs/ext4/ioctl.c. This +function does check the checksum of the backup superblock, and if it +doesn't match will mark the file system as corrupted. That was +clearly not the intent, so avoid to aborting the resize when a bad +superblock is found. + +In addition, teach update_backups() to properly update the checksum in +the backup superblocks. We will eventually want to unify +updapte_backups() with the infrasture in ext4_update_primary_sb(), but +that's for another day. + +Note: The problem has been around for a while; it just didn't really +matter until ext4_update_primary_sb() was added by commit bbc605cdb1e1 +("ext4: implement support for get/set fs label"). And it became +trivially easy to reproduce after commit 827891a38acc ("ext4: update +the s_overhead_clusters in the backup sb's when resizing") in v6.0. + +Cc: stable@kernel.org # 5.17+ +Fixes: bbc605cdb1e1 ("ext4: implement support for get/set fs label") +Signed-off-by: Theodore Ts'o +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/ioctl.c | 3 +-- + fs/ext4/resize.c | 5 +++++ + 2 files changed, 6 insertions(+), 2 deletions(-) + +--- a/fs/ext4/ioctl.c ++++ b/fs/ext4/ioctl.c +@@ -145,9 +145,8 @@ static int ext4_update_backup_sb(struct + if (ext4_has_metadata_csum(sb) && + es->s_checksum != ext4_superblock_csum(sb, es)) { + ext4_msg(sb, KERN_ERR, "Invalid checksum for backup " +- "superblock %llu\n", sb_block); ++ "superblock %llu", sb_block); + unlock_buffer(bh); +- err = -EFSBADCRC; + goto out_bh; + } + func(es, arg); +--- a/fs/ext4/resize.c ++++ b/fs/ext4/resize.c +@@ -1158,6 +1158,7 @@ static void update_backups(struct super_ + while (group < sbi->s_groups_count) { + struct buffer_head *bh; + ext4_fsblk_t backup_block; ++ struct ext4_super_block *es; + + /* Out of journal space, and can't get more - abort - so sad */ + err = ext4_resize_ensure_credits_batch(handle, 1); +@@ -1186,6 +1187,10 @@ static void update_backups(struct super_ + memcpy(bh->b_data, data, size); + if (rest) + memset(bh->b_data + size, 0, rest); ++ es = (struct ext4_super_block *) bh->b_data; ++ es->s_block_group_nr = cpu_to_le16(group); ++ if (ext4_has_metadata_csum(sb)) ++ es->s_checksum = ext4_superblock_csum(sb, es); + set_buffer_uptodate(bh); + unlock_buffer(bh); + err = ext4_handle_dirty_metadata(handle, NULL, bh); diff --git a/queue-6.0/kvm-arm64-fix-bad-dereference-on-mte-enabled-systems.patch b/queue-6.0/kvm-arm64-fix-bad-dereference-on-mte-enabled-systems.patch new file mode 100644 index 00000000000..077d8346064 --- /dev/null +++ b/queue-6.0/kvm-arm64-fix-bad-dereference-on-mte-enabled-systems.patch @@ -0,0 +1,49 @@ +From b6bcdc9f6b8321e4471ff45413b6410e16762a8d Mon Sep 17 00:00:00 2001 +From: Ryan Roberts +Date: Thu, 27 Oct 2022 13:09:45 +0100 +Subject: KVM: arm64: Fix bad dereference on MTE-enabled systems + +From: Ryan Roberts + +commit b6bcdc9f6b8321e4471ff45413b6410e16762a8d upstream. + +enter_exception64() performs an MTE check, which involves dereferencing +vcpu->kvm. While vcpu has already been fixed up to be a HYP VA pointer, +kvm is still a pointer in the kernel VA space. + +This only affects nVHE configurations with MTE enabled, as in other +cases, the pointer is either valid (VHE) or not dereferenced (!MTE). + +Fix this by first converting kvm to a HYP VA pointer. + +Fixes: ea7fc1bb1cd1 ("KVM: arm64: Introduce MTE VM feature") +Signed-off-by: Ryan Roberts +Reviewed-by: Steven Price +[maz: commit message tidy-up] +Signed-off-by: Marc Zyngier +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20221027120945.29679-1-ryan.roberts@arm.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/kvm/hyp/exception.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/arch/arm64/kvm/hyp/exception.c ++++ b/arch/arm64/kvm/hyp/exception.c +@@ -13,6 +13,7 @@ + #include + #include + #include ++#include + + #if !defined (__KVM_NVHE_HYPERVISOR__) && !defined (__KVM_VHE_HYPERVISOR__) + #error Hypervisor code only! +@@ -115,7 +116,7 @@ static void enter_exception64(struct kvm + new |= (old & PSR_C_BIT); + new |= (old & PSR_V_BIT); + +- if (kvm_has_mte(vcpu->kvm)) ++ if (kvm_has_mte(kern_hyp_va(vcpu->kvm))) + new |= PSR_TCO_BIT; + + new |= (old & PSR_DIT_BIT); diff --git a/queue-6.0/kvm-arm64-fix-smpri_el1-tpidr2_el0-trapping-on-vhe.patch b/queue-6.0/kvm-arm64-fix-smpri_el1-tpidr2_el0-trapping-on-vhe.patch new file mode 100644 index 00000000000..2e136974cbf --- /dev/null +++ b/queue-6.0/kvm-arm64-fix-smpri_el1-tpidr2_el0-trapping-on-vhe.patch @@ -0,0 +1,143 @@ +From 4151bb636acf32bb2e6126cec8216b023117c0e9 Mon Sep 17 00:00:00 2001 +From: Marc Zyngier +Date: Tue, 1 Nov 2022 12:19:51 +0000 +Subject: KVM: arm64: Fix SMPRI_EL1/TPIDR2_EL0 trapping on VHE + +From: Marc Zyngier + +commit 4151bb636acf32bb2e6126cec8216b023117c0e9 upstream. + +The trapping of SMPRI_EL1 and TPIDR2_EL0 currently only really +work on nVHE, as only this mode uses the fine-grained trapping +that controls these two registers. + +Move the trapping enable/disable code into +__{de,}activate_traps_common(), allowing it to be called when it +actually matters on VHE, and remove the flipping of EL2 control +for TPIDR2_EL0, which only affects the host access of this +register. + +Fixes: 861262ab8627 ("KVM: arm64: Handle SME host state when running guests") +Reported-by: Mark Brown +Reviewed-by: Mark Brown +Signed-off-by: Marc Zyngier +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/86bkpqer4z.wl-maz@kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/kvm/hyp/include/hyp/switch.h | 20 +++++++++++++++++++ + arch/arm64/kvm/hyp/nvhe/switch.c | 26 ------------------------- + arch/arm64/kvm/hyp/vhe/switch.c | 8 -------- + 3 files changed, 20 insertions(+), 34 deletions(-) + +diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h +index 6cbbb6c02f66..3330d1b76bdd 100644 +--- a/arch/arm64/kvm/hyp/include/hyp/switch.h ++++ b/arch/arm64/kvm/hyp/include/hyp/switch.h +@@ -87,6 +87,17 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu) + + vcpu->arch.mdcr_el2_host = read_sysreg(mdcr_el2); + write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); ++ ++ if (cpus_have_final_cap(ARM64_SME)) { ++ sysreg_clear_set_s(SYS_HFGRTR_EL2, ++ HFGxTR_EL2_nSMPRI_EL1_MASK | ++ HFGxTR_EL2_nTPIDR2_EL0_MASK, ++ 0); ++ sysreg_clear_set_s(SYS_HFGWTR_EL2, ++ HFGxTR_EL2_nSMPRI_EL1_MASK | ++ HFGxTR_EL2_nTPIDR2_EL0_MASK, ++ 0); ++ } + } + + static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu) +@@ -96,6 +107,15 @@ static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu) + write_sysreg(0, hstr_el2); + if (kvm_arm_support_pmu_v3()) + write_sysreg(0, pmuserenr_el0); ++ ++ if (cpus_have_final_cap(ARM64_SME)) { ++ sysreg_clear_set_s(SYS_HFGRTR_EL2, 0, ++ HFGxTR_EL2_nSMPRI_EL1_MASK | ++ HFGxTR_EL2_nTPIDR2_EL0_MASK); ++ sysreg_clear_set_s(SYS_HFGWTR_EL2, 0, ++ HFGxTR_EL2_nSMPRI_EL1_MASK | ++ HFGxTR_EL2_nTPIDR2_EL0_MASK); ++ } + } + + static inline void ___activate_traps(struct kvm_vcpu *vcpu) +diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c +index 8e9d49a964be..c2cb46ca4fb6 100644 +--- a/arch/arm64/kvm/hyp/nvhe/switch.c ++++ b/arch/arm64/kvm/hyp/nvhe/switch.c +@@ -55,18 +55,6 @@ static void __activate_traps(struct kvm_vcpu *vcpu) + write_sysreg(val, cptr_el2); + write_sysreg(__this_cpu_read(kvm_hyp_vector), vbar_el2); + +- if (cpus_have_final_cap(ARM64_SME)) { +- val = read_sysreg_s(SYS_HFGRTR_EL2); +- val &= ~(HFGxTR_EL2_nTPIDR2_EL0_MASK | +- HFGxTR_EL2_nSMPRI_EL1_MASK); +- write_sysreg_s(val, SYS_HFGRTR_EL2); +- +- val = read_sysreg_s(SYS_HFGWTR_EL2); +- val &= ~(HFGxTR_EL2_nTPIDR2_EL0_MASK | +- HFGxTR_EL2_nSMPRI_EL1_MASK); +- write_sysreg_s(val, SYS_HFGWTR_EL2); +- } +- + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { + struct kvm_cpu_context *ctxt = &vcpu->arch.ctxt; + +@@ -110,20 +98,6 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu) + + write_sysreg(this_cpu_ptr(&kvm_init_params)->hcr_el2, hcr_el2); + +- if (cpus_have_final_cap(ARM64_SME)) { +- u64 val; +- +- val = read_sysreg_s(SYS_HFGRTR_EL2); +- val |= HFGxTR_EL2_nTPIDR2_EL0_MASK | +- HFGxTR_EL2_nSMPRI_EL1_MASK; +- write_sysreg_s(val, SYS_HFGRTR_EL2); +- +- val = read_sysreg_s(SYS_HFGWTR_EL2); +- val |= HFGxTR_EL2_nTPIDR2_EL0_MASK | +- HFGxTR_EL2_nSMPRI_EL1_MASK; +- write_sysreg_s(val, SYS_HFGWTR_EL2); +- } +- + cptr = CPTR_EL2_DEFAULT; + if (vcpu_has_sve(vcpu) && (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED)) + cptr |= CPTR_EL2_TZ; +diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c +index 7acb87eaa092..1a97391fedd2 100644 +--- a/arch/arm64/kvm/hyp/vhe/switch.c ++++ b/arch/arm64/kvm/hyp/vhe/switch.c +@@ -63,10 +63,6 @@ static void __activate_traps(struct kvm_vcpu *vcpu) + __activate_traps_fpsimd32(vcpu); + } + +- if (cpus_have_final_cap(ARM64_SME)) +- write_sysreg(read_sysreg(sctlr_el2) & ~SCTLR_ELx_ENTP2, +- sctlr_el2); +- + write_sysreg(val, cpacr_el1); + + write_sysreg(__this_cpu_read(kvm_hyp_vector), vbar_el1); +@@ -88,10 +84,6 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu) + */ + asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT)); + +- if (cpus_have_final_cap(ARM64_SME)) +- write_sysreg(read_sysreg(sctlr_el2) | SCTLR_ELx_ENTP2, +- sctlr_el2); +- + write_sysreg(CPACR_EL1_DEFAULT, cpacr_el1); + + if (!arm64_kernel_unmapped_at_el0()) +-- +2.38.1 + diff --git a/queue-6.0/kvm-initialize-gfn_to_pfn_cache-locks-in-dedicated-helper.patch b/queue-6.0/kvm-initialize-gfn_to_pfn_cache-locks-in-dedicated-helper.patch new file mode 100644 index 00000000000..3f4135e37ce --- /dev/null +++ b/queue-6.0/kvm-initialize-gfn_to_pfn_cache-locks-in-dedicated-helper.patch @@ -0,0 +1,315 @@ +From 52491a38b2c2411f3f0229dc6ad610349c704a41 Mon Sep 17 00:00:00 2001 +From: Michal Luczaj +Date: Thu, 13 Oct 2022 21:12:19 +0000 +Subject: KVM: Initialize gfn_to_pfn_cache locks in dedicated helper + +From: Michal Luczaj + +commit 52491a38b2c2411f3f0229dc6ad610349c704a41 upstream. + +Move the gfn_to_pfn_cache lock initialization to another helper and +call the new helper during VM/vCPU creation. There are race +conditions possible due to kvm_gfn_to_pfn_cache_init()'s +ability to re-initialize the cache's locks. + +For example: a race between ioctl(KVM_XEN_HVM_EVTCHN_SEND) and +kvm_gfn_to_pfn_cache_init() leads to a corrupted shinfo gpc lock. + + (thread 1) | (thread 2) + | + kvm_xen_set_evtchn_fast | + read_lock_irqsave(&gpc->lock, ...) | + | kvm_gfn_to_pfn_cache_init + | rwlock_init(&gpc->lock) + read_unlock_irqrestore(&gpc->lock, ...) | + +Rename "cache_init" and "cache_destroy" to activate+deactivate to +avoid implying that the cache really is destroyed/freed. + +Note, there more races in the newly named kvm_gpc_activate() that will +be addressed separately. + +Fixes: 982ed0de4753 ("KVM: Reinstate gfn_to_pfn_cache with invalidation support") +Cc: stable@vger.kernel.org +Suggested-by: Sean Christopherson +Signed-off-by: Michal Luczaj +[sean: call out that this is a bug fix] +Signed-off-by: Sean Christopherson +Message-Id: <20221013211234.1318131-2-seanjc@google.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/x86.c | 12 +++++---- + arch/x86/kvm/xen.c | 57 ++++++++++++++++++++++++----------------------- + include/linux/kvm_host.h | 24 ++++++++++++++----- + virt/kvm/pfncache.c | 21 +++++++++-------- + 4 files changed, 66 insertions(+), 48 deletions(-) + +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -2304,11 +2304,11 @@ static void kvm_write_system_time(struct + + /* we verify if the enable bit is set... */ + if (system_time & 1) { +- kvm_gfn_to_pfn_cache_init(vcpu->kvm, &vcpu->arch.pv_time, vcpu, +- KVM_HOST_USES_PFN, system_time & ~1ULL, +- sizeof(struct pvclock_vcpu_time_info)); ++ kvm_gpc_activate(vcpu->kvm, &vcpu->arch.pv_time, vcpu, ++ KVM_HOST_USES_PFN, system_time & ~1ULL, ++ sizeof(struct pvclock_vcpu_time_info)); + } else { +- kvm_gfn_to_pfn_cache_destroy(vcpu->kvm, &vcpu->arch.pv_time); ++ kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.pv_time); + } + + return; +@@ -3377,7 +3377,7 @@ static int kvm_pv_enable_async_pf_int(st + + static void kvmclock_reset(struct kvm_vcpu *vcpu) + { +- kvm_gfn_to_pfn_cache_destroy(vcpu->kvm, &vcpu->arch.pv_time); ++ kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.pv_time); + vcpu->arch.time = 0; + } + +@@ -11629,6 +11629,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu + vcpu->arch.regs_avail = ~0; + vcpu->arch.regs_dirty = ~0; + ++ kvm_gpc_init(&vcpu->arch.pv_time); ++ + if (!irqchip_in_kernel(vcpu->kvm) || kvm_vcpu_is_reset_bsp(vcpu)) + vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; + else +--- a/arch/x86/kvm/xen.c ++++ b/arch/x86/kvm/xen.c +@@ -42,13 +42,13 @@ static int kvm_xen_shared_info_init(stru + int idx = srcu_read_lock(&kvm->srcu); + + if (gfn == GPA_INVALID) { +- kvm_gfn_to_pfn_cache_destroy(kvm, gpc); ++ kvm_gpc_deactivate(kvm, gpc); + goto out; + } + + do { +- ret = kvm_gfn_to_pfn_cache_init(kvm, gpc, NULL, KVM_HOST_USES_PFN, +- gpa, PAGE_SIZE); ++ ret = kvm_gpc_activate(kvm, gpc, NULL, KVM_HOST_USES_PFN, gpa, ++ PAGE_SIZE); + if (ret) + goto out; + +@@ -554,15 +554,15 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcp + offsetof(struct compat_vcpu_info, time)); + + if (data->u.gpa == GPA_INVALID) { +- kvm_gfn_to_pfn_cache_destroy(vcpu->kvm, &vcpu->arch.xen.vcpu_info_cache); ++ kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.vcpu_info_cache); + r = 0; + break; + } + +- r = kvm_gfn_to_pfn_cache_init(vcpu->kvm, +- &vcpu->arch.xen.vcpu_info_cache, +- NULL, KVM_HOST_USES_PFN, data->u.gpa, +- sizeof(struct vcpu_info)); ++ r = kvm_gpc_activate(vcpu->kvm, ++ &vcpu->arch.xen.vcpu_info_cache, NULL, ++ KVM_HOST_USES_PFN, data->u.gpa, ++ sizeof(struct vcpu_info)); + if (!r) + kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); + +@@ -570,16 +570,16 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcp + + case KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO: + if (data->u.gpa == GPA_INVALID) { +- kvm_gfn_to_pfn_cache_destroy(vcpu->kvm, +- &vcpu->arch.xen.vcpu_time_info_cache); ++ kvm_gpc_deactivate(vcpu->kvm, ++ &vcpu->arch.xen.vcpu_time_info_cache); + r = 0; + break; + } + +- r = kvm_gfn_to_pfn_cache_init(vcpu->kvm, +- &vcpu->arch.xen.vcpu_time_info_cache, +- NULL, KVM_HOST_USES_PFN, data->u.gpa, +- sizeof(struct pvclock_vcpu_time_info)); ++ r = kvm_gpc_activate(vcpu->kvm, ++ &vcpu->arch.xen.vcpu_time_info_cache, ++ NULL, KVM_HOST_USES_PFN, data->u.gpa, ++ sizeof(struct pvclock_vcpu_time_info)); + if (!r) + kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); + break; +@@ -590,16 +590,15 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcp + break; + } + if (data->u.gpa == GPA_INVALID) { +- kvm_gfn_to_pfn_cache_destroy(vcpu->kvm, +- &vcpu->arch.xen.runstate_cache); ++ kvm_gpc_deactivate(vcpu->kvm, ++ &vcpu->arch.xen.runstate_cache); + r = 0; + break; + } + +- r = kvm_gfn_to_pfn_cache_init(vcpu->kvm, +- &vcpu->arch.xen.runstate_cache, +- NULL, KVM_HOST_USES_PFN, data->u.gpa, +- sizeof(struct vcpu_runstate_info)); ++ r = kvm_gpc_activate(vcpu->kvm, &vcpu->arch.xen.runstate_cache, ++ NULL, KVM_HOST_USES_PFN, data->u.gpa, ++ sizeof(struct vcpu_runstate_info)); + break; + + case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT: +@@ -1817,7 +1816,12 @@ void kvm_xen_init_vcpu(struct kvm_vcpu * + { + vcpu->arch.xen.vcpu_id = vcpu->vcpu_idx; + vcpu->arch.xen.poll_evtchn = 0; ++ + timer_setup(&vcpu->arch.xen.poll_timer, cancel_evtchn_poll, 0); ++ ++ kvm_gpc_init(&vcpu->arch.xen.runstate_cache); ++ kvm_gpc_init(&vcpu->arch.xen.vcpu_info_cache); ++ kvm_gpc_init(&vcpu->arch.xen.vcpu_time_info_cache); + } + + void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu) +@@ -1825,18 +1829,17 @@ void kvm_xen_destroy_vcpu(struct kvm_vcp + if (kvm_xen_timer_enabled(vcpu)) + kvm_xen_stop_timer(vcpu); + +- kvm_gfn_to_pfn_cache_destroy(vcpu->kvm, +- &vcpu->arch.xen.runstate_cache); +- kvm_gfn_to_pfn_cache_destroy(vcpu->kvm, +- &vcpu->arch.xen.vcpu_info_cache); +- kvm_gfn_to_pfn_cache_destroy(vcpu->kvm, +- &vcpu->arch.xen.vcpu_time_info_cache); ++ kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.runstate_cache); ++ kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.vcpu_info_cache); ++ kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.vcpu_time_info_cache); ++ + del_timer_sync(&vcpu->arch.xen.poll_timer); + } + + void kvm_xen_init_vm(struct kvm *kvm) + { + idr_init(&kvm->arch.xen.evtchn_ports); ++ kvm_gpc_init(&kvm->arch.xen.shinfo_cache); + } + + void kvm_xen_destroy_vm(struct kvm *kvm) +@@ -1844,7 +1847,7 @@ void kvm_xen_destroy_vm(struct kvm *kvm) + struct evtchnfd *evtchnfd; + int i; + +- kvm_gfn_to_pfn_cache_destroy(kvm, &kvm->arch.xen.shinfo_cache); ++ kvm_gpc_deactivate(kvm, &kvm->arch.xen.shinfo_cache); + + idr_for_each_entry(&kvm->arch.xen.evtchn_ports, evtchnfd, i) { + if (!evtchnfd->deliver.port.port) +--- a/include/linux/kvm_host.h ++++ b/include/linux/kvm_host.h +@@ -1241,8 +1241,18 @@ int kvm_vcpu_write_guest(struct kvm_vcpu + void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn); + + /** +- * kvm_gfn_to_pfn_cache_init - prepare a cached kernel mapping and HPA for a +- * given guest physical address. ++ * kvm_gpc_init - initialize gfn_to_pfn_cache. ++ * ++ * @gpc: struct gfn_to_pfn_cache object. ++ * ++ * This sets up a gfn_to_pfn_cache by initializing locks. Note, the cache must ++ * be zero-allocated (or zeroed by the caller before init). ++ */ ++void kvm_gpc_init(struct gfn_to_pfn_cache *gpc); ++ ++/** ++ * kvm_gpc_activate - prepare a cached kernel mapping and HPA for a given guest ++ * physical address. + * + * @kvm: pointer to kvm instance. + * @gpc: struct gfn_to_pfn_cache object. +@@ -1266,9 +1276,9 @@ void kvm_vcpu_mark_page_dirty(struct kvm + * kvm_gfn_to_pfn_cache_check() to ensure that the cache is valid before + * accessing the target page. + */ +-int kvm_gfn_to_pfn_cache_init(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, +- struct kvm_vcpu *vcpu, enum pfn_cache_usage usage, +- gpa_t gpa, unsigned long len); ++int kvm_gpc_activate(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, ++ struct kvm_vcpu *vcpu, enum pfn_cache_usage usage, ++ gpa_t gpa, unsigned long len); + + /** + * kvm_gfn_to_pfn_cache_check - check validity of a gfn_to_pfn_cache. +@@ -1325,7 +1335,7 @@ int kvm_gfn_to_pfn_cache_refresh(struct + void kvm_gfn_to_pfn_cache_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc); + + /** +- * kvm_gfn_to_pfn_cache_destroy - destroy and unlink a gfn_to_pfn_cache. ++ * kvm_gpc_deactivate - deactivate and unlink a gfn_to_pfn_cache. + * + * @kvm: pointer to kvm instance. + * @gpc: struct gfn_to_pfn_cache object. +@@ -1333,7 +1343,7 @@ void kvm_gfn_to_pfn_cache_unmap(struct k + * This removes a cache from the @kvm's list to be processed on MMU notifier + * invocation. + */ +-void kvm_gfn_to_pfn_cache_destroy(struct kvm *kvm, struct gfn_to_pfn_cache *gpc); ++void kvm_gpc_deactivate(struct kvm *kvm, struct gfn_to_pfn_cache *gpc); + + void kvm_sigset_activate(struct kvm_vcpu *vcpu); + void kvm_sigset_deactivate(struct kvm_vcpu *vcpu); +--- a/virt/kvm/pfncache.c ++++ b/virt/kvm/pfncache.c +@@ -346,17 +346,20 @@ void kvm_gfn_to_pfn_cache_unmap(struct k + } + EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_unmap); + ++void kvm_gpc_init(struct gfn_to_pfn_cache *gpc) ++{ ++ rwlock_init(&gpc->lock); ++ mutex_init(&gpc->refresh_lock); ++} ++EXPORT_SYMBOL_GPL(kvm_gpc_init); + +-int kvm_gfn_to_pfn_cache_init(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, +- struct kvm_vcpu *vcpu, enum pfn_cache_usage usage, +- gpa_t gpa, unsigned long len) ++int kvm_gpc_activate(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, ++ struct kvm_vcpu *vcpu, enum pfn_cache_usage usage, ++ gpa_t gpa, unsigned long len) + { + WARN_ON_ONCE(!usage || (usage & KVM_GUEST_AND_HOST_USE_PFN) != usage); + + if (!gpc->active) { +- rwlock_init(&gpc->lock); +- mutex_init(&gpc->refresh_lock); +- + gpc->khva = NULL; + gpc->pfn = KVM_PFN_ERR_FAULT; + gpc->uhva = KVM_HVA_ERR_BAD; +@@ -371,9 +374,9 @@ int kvm_gfn_to_pfn_cache_init(struct kvm + } + return kvm_gfn_to_pfn_cache_refresh(kvm, gpc, gpa, len); + } +-EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_init); ++EXPORT_SYMBOL_GPL(kvm_gpc_activate); + +-void kvm_gfn_to_pfn_cache_destroy(struct kvm *kvm, struct gfn_to_pfn_cache *gpc) ++void kvm_gpc_deactivate(struct kvm *kvm, struct gfn_to_pfn_cache *gpc) + { + if (gpc->active) { + spin_lock(&kvm->gpc_lock); +@@ -384,4 +387,4 @@ void kvm_gfn_to_pfn_cache_destroy(struct + gpc->active = false; + } + } +-EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_destroy); ++EXPORT_SYMBOL_GPL(kvm_gpc_deactivate); diff --git a/queue-6.0/kvm-reject-attempts-to-consume-or-refresh-inactive-gfn_to_pfn_cache.patch b/queue-6.0/kvm-reject-attempts-to-consume-or-refresh-inactive-gfn_to_pfn_cache.patch new file mode 100644 index 00000000000..4959d46ed6f --- /dev/null +++ b/queue-6.0/kvm-reject-attempts-to-consume-or-refresh-inactive-gfn_to_pfn_cache.patch @@ -0,0 +1,197 @@ +From ecbcf030b45666ad11bc98565e71dfbcb7be4393 Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Thu, 13 Oct 2022 21:12:20 +0000 +Subject: KVM: Reject attempts to consume or refresh inactive gfn_to_pfn_cache + +From: Sean Christopherson + +commit ecbcf030b45666ad11bc98565e71dfbcb7be4393 upstream. + +Reject kvm_gpc_check() and kvm_gpc_refresh() if the cache is inactive. +Not checking the active flag during refresh is particularly egregious, as +KVM can end up with a valid, inactive cache, which can lead to a variety +of use-after-free bugs, e.g. consuming a NULL kernel pointer or missing +an mmu_notifier invalidation due to the cache not being on the list of +gfns to invalidate. + +Note, "active" needs to be set if and only if the cache is on the list +of caches, i.e. is reachable via mmu_notifier events. If a relevant +mmu_notifier event occurs while the cache is "active" but not on the +list, KVM will not acquire the cache's lock and so will not serailize +the mmu_notifier event with active users and/or kvm_gpc_refresh(). + +A race between KVM_XEN_ATTR_TYPE_SHARED_INFO and KVM_XEN_HVM_EVTCHN_SEND +can be exploited to trigger the bug. + +1. Deactivate shinfo cache: + +kvm_xen_hvm_set_attr +case KVM_XEN_ATTR_TYPE_SHARED_INFO + kvm_gpc_deactivate + kvm_gpc_unmap + gpc->valid = false + gpc->khva = NULL + gpc->active = false + +Result: active = false, valid = false + +2. Cause cache refresh: + +kvm_arch_vm_ioctl +case KVM_XEN_HVM_EVTCHN_SEND + kvm_xen_hvm_evtchn_send + kvm_xen_set_evtchn + kvm_xen_set_evtchn_fast + kvm_gpc_check + return -EWOULDBLOCK because !gpc->valid + kvm_xen_set_evtchn_fast + return -EWOULDBLOCK + kvm_gpc_refresh + hva_to_pfn_retry + gpc->valid = true + gpc->khva = not NULL + +Result: active = false, valid = true + +3. Race ioctl KVM_XEN_HVM_EVTCHN_SEND against ioctl +KVM_XEN_ATTR_TYPE_SHARED_INFO: + +kvm_arch_vm_ioctl +case KVM_XEN_HVM_EVTCHN_SEND + kvm_xen_hvm_evtchn_send + kvm_xen_set_evtchn + kvm_xen_set_evtchn_fast + read_lock gpc->lock + kvm_xen_hvm_set_attr case + KVM_XEN_ATTR_TYPE_SHARED_INFO + mutex_lock kvm->lock + kvm_xen_shared_info_init + kvm_gpc_activate + gpc->khva = NULL + kvm_gpc_check + [ Check passes because gpc->valid is + still true, even though gpc->khva + is already NULL. ] + shinfo = gpc->khva + pending_bits = shinfo->evtchn_pending + CRASH: test_and_set_bit(..., pending_bits) + +Fixes: 982ed0de4753 ("KVM: Reinstate gfn_to_pfn_cache with invalidation support") +Cc: stable@vger.kernel.org +Reported-by: : Michal Luczaj +Signed-off-by: Sean Christopherson +Message-Id: <20221013211234.1318131-3-seanjc@google.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + virt/kvm/pfncache.c | 41 ++++++++++++++++++++++++++++++++++------- + 1 file changed, 34 insertions(+), 7 deletions(-) + +--- a/virt/kvm/pfncache.c ++++ b/virt/kvm/pfncache.c +@@ -81,6 +81,9 @@ bool kvm_gfn_to_pfn_cache_check(struct k + { + struct kvm_memslots *slots = kvm_memslots(kvm); + ++ if (!gpc->active) ++ return false; ++ + if ((gpa & ~PAGE_MASK) + len > PAGE_SIZE) + return false; + +@@ -240,10 +243,11 @@ int kvm_gfn_to_pfn_cache_refresh(struct + { + struct kvm_memslots *slots = kvm_memslots(kvm); + unsigned long page_offset = gpa & ~PAGE_MASK; +- kvm_pfn_t old_pfn, new_pfn; ++ bool unmap_old = false; + unsigned long old_uhva; ++ kvm_pfn_t old_pfn; + void *old_khva; +- int ret = 0; ++ int ret; + + /* + * If must fit within a single page. The 'len' argument is +@@ -261,6 +265,11 @@ int kvm_gfn_to_pfn_cache_refresh(struct + + write_lock_irq(&gpc->lock); + ++ if (!gpc->active) { ++ ret = -EINVAL; ++ goto out_unlock; ++ } ++ + old_pfn = gpc->pfn; + old_khva = gpc->khva - offset_in_page(gpc->khva); + old_uhva = gpc->uhva; +@@ -291,6 +300,7 @@ int kvm_gfn_to_pfn_cache_refresh(struct + /* If the HVA→PFN mapping was already valid, don't unmap it. */ + old_pfn = KVM_PFN_ERR_FAULT; + old_khva = NULL; ++ ret = 0; + } + + out: +@@ -305,14 +315,15 @@ int kvm_gfn_to_pfn_cache_refresh(struct + gpc->khva = NULL; + } + +- /* Snapshot the new pfn before dropping the lock! */ +- new_pfn = gpc->pfn; ++ /* Detect a pfn change before dropping the lock! */ ++ unmap_old = (old_pfn != gpc->pfn); + ++out_unlock: + write_unlock_irq(&gpc->lock); + + mutex_unlock(&gpc->refresh_lock); + +- if (old_pfn != new_pfn) ++ if (unmap_old) + gpc_unmap_khva(kvm, old_pfn, old_khva); + + return ret; +@@ -366,11 +377,19 @@ int kvm_gpc_activate(struct kvm *kvm, st + gpc->vcpu = vcpu; + gpc->usage = usage; + gpc->valid = false; +- gpc->active = true; + + spin_lock(&kvm->gpc_lock); + list_add(&gpc->list, &kvm->gpc_list); + spin_unlock(&kvm->gpc_lock); ++ ++ /* ++ * Activate the cache after adding it to the list, a concurrent ++ * refresh must not establish a mapping until the cache is ++ * reachable by mmu_notifier events. ++ */ ++ write_lock_irq(&gpc->lock); ++ gpc->active = true; ++ write_unlock_irq(&gpc->lock); + } + return kvm_gfn_to_pfn_cache_refresh(kvm, gpc, gpa, len); + } +@@ -379,12 +398,20 @@ EXPORT_SYMBOL_GPL(kvm_gpc_activate); + void kvm_gpc_deactivate(struct kvm *kvm, struct gfn_to_pfn_cache *gpc) + { + if (gpc->active) { ++ /* ++ * Deactivate the cache before removing it from the list, KVM ++ * must stall mmu_notifier events until all users go away, i.e. ++ * until gpc->lock is dropped and refresh is guaranteed to fail. ++ */ ++ write_lock_irq(&gpc->lock); ++ gpc->active = false; ++ write_unlock_irq(&gpc->lock); ++ + spin_lock(&kvm->gpc_lock); + list_del(&gpc->list); + spin_unlock(&kvm->gpc_lock); + + kvm_gfn_to_pfn_cache_unmap(kvm, gpc); +- gpc->active = false; + } + } + EXPORT_SYMBOL_GPL(kvm_gpc_deactivate); diff --git a/queue-6.0/kvm-vmx-advertise-pmu-lbrs-if-and-only-if-perf-supports-lbrs.patch b/queue-6.0/kvm-vmx-advertise-pmu-lbrs-if-and-only-if-perf-supports-lbrs.patch new file mode 100644 index 00000000000..1b626d8b7b2 --- /dev/null +++ b/queue-6.0/kvm-vmx-advertise-pmu-lbrs-if-and-only-if-perf-supports-lbrs.patch @@ -0,0 +1,45 @@ +From 145dfad998eac74abc59219d936e905766ba2d98 Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Thu, 6 Oct 2022 00:03:08 +0000 +Subject: KVM: VMX: Advertise PMU LBRs if and only if perf supports LBRs + +From: Sean Christopherson + +commit 145dfad998eac74abc59219d936e905766ba2d98 upstream. + +Advertise LBR support to userspace via MSR_IA32_PERF_CAPABILITIES if and +only if perf fully supports LBRs. Perf may disable LBRs (by zeroing the +number of LBRs) even on platforms the allegedly support LBRs, e.g. if +probing any LBR MSRs during setup fails. + +Fixes: be635e34c284 ("KVM: vmx/pmu: Expose LBR_FMT in the MSR_IA32_PERF_CAPABILITIES") +Reported-by: Like Xu +Signed-off-by: Sean Christopherson +Message-Id: <20221006000314.73240-3-seanjc@google.com> +Cc: stable@vger.kernel.org +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/vmx/capabilities.h | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/arch/x86/kvm/vmx/capabilities.h ++++ b/arch/x86/kvm/vmx/capabilities.h +@@ -404,6 +404,7 @@ static inline bool vmx_pebs_supported(vo + static inline u64 vmx_get_perf_capabilities(void) + { + u64 perf_cap = PMU_CAP_FW_WRITES; ++ struct x86_pmu_lbr lbr; + u64 host_perf_cap = 0; + + if (!enable_pmu) +@@ -412,7 +413,8 @@ static inline u64 vmx_get_perf_capabilit + if (boot_cpu_has(X86_FEATURE_PDCM)) + rdmsrl(MSR_IA32_PERF_CAPABILITIES, host_perf_cap); + +- perf_cap |= host_perf_cap & PMU_CAP_LBR_FMT; ++ if (x86_perf_get_lbr(&lbr) >= 0 && lbr.nr) ++ perf_cap |= host_perf_cap & PMU_CAP_LBR_FMT; + + if (vmx_pebs_supported()) { + perf_cap |= host_perf_cap & PERF_CAP_PEBS_MASK; diff --git a/queue-6.0/kvm-vmx-fold-vmx_supported_debugctl-into-vcpu_supported_debugctl.patch b/queue-6.0/kvm-vmx-fold-vmx_supported_debugctl-into-vcpu_supported_debugctl.patch new file mode 100644 index 00000000000..3ef0f551470 --- /dev/null +++ b/queue-6.0/kvm-vmx-fold-vmx_supported_debugctl-into-vcpu_supported_debugctl.patch @@ -0,0 +1,85 @@ +From 18e897d213cb152c786abab14919196bd9dc3a9f Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Thu, 6 Oct 2022 00:03:09 +0000 +Subject: KVM: VMX: Fold vmx_supported_debugctl() into vcpu_supported_debugctl() + +From: Sean Christopherson + +commit 18e897d213cb152c786abab14919196bd9dc3a9f upstream. + +Fold vmx_supported_debugctl() into vcpu_supported_debugctl(), its only +caller. Setting bits only to clear them a few instructions later is +rather silly, and splitting the logic makes things seem more complicated +than they actually are. + +Opportunistically drop DEBUGCTLMSR_LBR_MASK now that there's a single +reference to the pair of bits. The extra layer of indirection provides +no meaningful value and makes it unnecessarily tedious to understand +what KVM is doing. + +No functional change. + +Signed-off-by: Sean Christopherson +Message-Id: <20221006000314.73240-4-seanjc@google.com> +Cc: stable@vger.kernel.org +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/vmx/capabilities.h | 15 --------------- + arch/x86/kvm/vmx/vmx.c | 12 +++++++----- + 2 files changed, 7 insertions(+), 20 deletions(-) + +--- a/arch/x86/kvm/vmx/capabilities.h ++++ b/arch/x86/kvm/vmx/capabilities.h +@@ -24,8 +24,6 @@ extern int __read_mostly pt_mode; + #define PMU_CAP_FW_WRITES (1ULL << 13) + #define PMU_CAP_LBR_FMT 0x3f + +-#define DEBUGCTLMSR_LBR_MASK (DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI) +- + struct nested_vmx_msrs { + /* + * We only store the "true" versions of the VMX capability MSRs. We +@@ -425,19 +423,6 @@ static inline u64 vmx_get_perf_capabilit + return perf_cap; + } + +-static inline u64 vmx_supported_debugctl(void) +-{ +- u64 debugctl = 0; +- +- if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) +- debugctl |= DEBUGCTLMSR_BUS_LOCK_DETECT; +- +- if (vmx_get_perf_capabilities() & PMU_CAP_LBR_FMT) +- debugctl |= DEBUGCTLMSR_LBR_MASK; +- +- return debugctl; +-} +- + static inline bool cpu_has_notify_vmexit(void) + { + return vmcs_config.cpu_based_2nd_exec_ctrl & +--- a/arch/x86/kvm/vmx/vmx.c ++++ b/arch/x86/kvm/vmx/vmx.c +@@ -2018,13 +2018,15 @@ static u64 nested_vmx_truncate_sysenter_ + + static u64 vcpu_supported_debugctl(struct kvm_vcpu *vcpu) + { +- u64 debugctl = vmx_supported_debugctl(); ++ u64 debugctl = 0; + +- if (!intel_pmu_lbr_is_enabled(vcpu)) +- debugctl &= ~DEBUGCTLMSR_LBR_MASK; ++ if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT) && ++ guest_cpuid_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT)) ++ debugctl |= DEBUGCTLMSR_BUS_LOCK_DETECT; + +- if (!guest_cpuid_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT)) +- debugctl &= ~DEBUGCTLMSR_BUS_LOCK_DETECT; ++ if ((vmx_get_perf_capabilities() & PMU_CAP_LBR_FMT) && ++ intel_pmu_lbr_is_enabled(vcpu)) ++ debugctl |= DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI; + + return debugctl; + } diff --git a/queue-6.0/kvm-vmx-fully-disable-sgx-if-secondary_exec_encls_exiting-unavailable.patch b/queue-6.0/kvm-vmx-fully-disable-sgx-if-secondary_exec_encls_exiting-unavailable.patch new file mode 100644 index 00000000000..eb068a6568a --- /dev/null +++ b/queue-6.0/kvm-vmx-fully-disable-sgx-if-secondary_exec_encls_exiting-unavailable.patch @@ -0,0 +1,58 @@ +From 1c1a41497ab879ac9608f3047f230af833eeef3d Mon Sep 17 00:00:00 2001 +From: Emanuele Giuseppe Esposito +Date: Tue, 25 Oct 2022 08:37:49 -0400 +Subject: KVM: VMX: fully disable SGX if SECONDARY_EXEC_ENCLS_EXITING unavailable + +From: Emanuele Giuseppe Esposito + +commit 1c1a41497ab879ac9608f3047f230af833eeef3d upstream. + +Clear enable_sgx if ENCLS-exiting is not supported, i.e. if SGX cannot be +virtualized. When KVM is loaded, adjust_vmx_controls checks that the +bit is available before enabling the feature; however, other parts of the +code check enable_sgx and not clearing the variable caused two different +bugs, mostly affecting nested virtualization scenarios. + +First, because enable_sgx remained true, SECONDARY_EXEC_ENCLS_EXITING +would be marked available in the capability MSR that are accessed by a +nested hypervisor. KVM would then propagate the control from vmcs12 +to vmcs02 even if it isn't supported by the processor, thus causing an +unexpected VM-Fail (exit code 0x7) in L1. + +Second, vmx_set_cpu_caps() would not clear the SGX bits when hardware +support is unavailable. This is a much less problematic bug as it only +happens if SGX is soft-disabled (available in the processor but hidden +in CPUID) or if SGX is supported for bare metal but not in the VMCS +(will never happen when running on bare metal, but can theoertically +happen when running in a VM). + +Last but not least, this ensures that module params in sysfs reflect +KVM's actual configuration. + +RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=2127128 +Fixes: 72add915fbd5 ("KVM: VMX: Enable SGX virtualization for SGX1, SGX2 and LC") +Cc: stable@vger.kernel.org +Suggested-by: Sean Christopherson +Suggested-by: Bandan Das +Signed-off-by: Emanuele Giuseppe Esposito +Message-Id: <20221025123749.2201649-1-eesposit@redhat.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/vmx/vmx.c | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/arch/x86/kvm/vmx/vmx.c ++++ b/arch/x86/kvm/vmx/vmx.c +@@ -8281,6 +8281,11 @@ static __init int hardware_setup(void) + if (!cpu_has_virtual_nmis()) + enable_vnmi = 0; + ++#ifdef CONFIG_X86_SGX_KVM ++ if (!cpu_has_vmx_encls_vmexit()) ++ enable_sgx = false; ++#endif ++ + /* + * set_apic_access_page_addr() is used to reload apic access + * page upon invalidation. No need to do anything if not diff --git a/queue-6.0/kvm-vmx-ignore-guest-cpuid-for-host-userspace-writes-to-debugctl.patch b/queue-6.0/kvm-vmx-ignore-guest-cpuid-for-host-userspace-writes-to-debugctl.patch new file mode 100644 index 00000000000..bc80aa5186b --- /dev/null +++ b/queue-6.0/kvm-vmx-ignore-guest-cpuid-for-host-userspace-writes-to-debugctl.patch @@ -0,0 +1,68 @@ +From b333b8ebb85d62469f32b52fa03fd7d1522afc03 Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Thu, 6 Oct 2022 00:03:10 +0000 +Subject: KVM: VMX: Ignore guest CPUID for host userspace writes to DEBUGCTL + +From: Sean Christopherson + +commit b333b8ebb85d62469f32b52fa03fd7d1522afc03 upstream. + +Ignore guest CPUID for host userspace writes to the DEBUGCTL MSR, KVM's +ABI is that setting CPUID vs. state can be done in any order, i.e. KVM +allows userspace to stuff MSRs prior to setting the guest's CPUID that +makes the new MSR "legal". + +Keep the vmx_get_perf_capabilities() check for guest writes, even though +it's technically unnecessary since the vCPU's PERF_CAPABILITIES is +consulted when refreshing LBR support. A future patch will clean up +vmx_get_perf_capabilities() to avoid the RDMSR on every call, at which +point the paranoia will incur no meaningful overhead. + +Note, prior to vmx_get_perf_capabilities() checking that the host fully +supports LBRs via x86_perf_get_lbr(), KVM effectively relied on +intel_pmu_lbr_is_enabled() to guard against host userspace enabling LBRs +on platforms without full support. + +Fixes: c646236344e9 ("KVM: vmx/pmu: Add PMU_CAP_LBR_FMT check when guest LBR is enabled") +Signed-off-by: Sean Christopherson +Message-Id: <20221006000314.73240-5-seanjc@google.com> +Cc: stable@vger.kernel.org +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/vmx/vmx.c | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +--- a/arch/x86/kvm/vmx/vmx.c ++++ b/arch/x86/kvm/vmx/vmx.c +@@ -2016,16 +2016,16 @@ static u64 nested_vmx_truncate_sysenter_ + return (unsigned long)data; + } + +-static u64 vcpu_supported_debugctl(struct kvm_vcpu *vcpu) ++static u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated) + { + u64 debugctl = 0; + + if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT) && +- guest_cpuid_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT)) ++ (host_initiated || guest_cpuid_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT))) + debugctl |= DEBUGCTLMSR_BUS_LOCK_DETECT; + + if ((vmx_get_perf_capabilities() & PMU_CAP_LBR_FMT) && +- intel_pmu_lbr_is_enabled(vcpu)) ++ (host_initiated || intel_pmu_lbr_is_enabled(vcpu))) + debugctl |= DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI; + + return debugctl; +@@ -2100,7 +2100,9 @@ static int vmx_set_msr(struct kvm_vcpu * + vmcs_writel(GUEST_SYSENTER_ESP, data); + break; + case MSR_IA32_DEBUGCTLMSR: { +- u64 invalid = data & ~vcpu_supported_debugctl(vcpu); ++ u64 invalid; ++ ++ invalid = data & ~vmx_get_supported_debugctl(vcpu, msr_info->host_initiated); + if (invalid & (DEBUGCTLMSR_BTF|DEBUGCTLMSR_LBR)) { + if (report_ignored_msrs) + vcpu_unimpl(vcpu, "%s: BTF|LBR in IA32_DEBUGCTLMSR 0x%llx, nop\n", diff --git a/queue-6.0/kvm-x86-emulator-em_sysexit-should-update-ctxt-mode.patch b/queue-6.0/kvm-x86-emulator-em_sysexit-should-update-ctxt-mode.patch new file mode 100644 index 00000000000..a137cf2f695 --- /dev/null +++ b/queue-6.0/kvm-x86-emulator-em_sysexit-should-update-ctxt-mode.patch @@ -0,0 +1,36 @@ +From 5015bb89b58225f97df6ac44383e7e8c8662c8c9 Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Tue, 25 Oct 2022 15:47:28 +0300 +Subject: KVM: x86: emulator: em_sysexit should update ctxt->mode + +From: Maxim Levitsky + +commit 5015bb89b58225f97df6ac44383e7e8c8662c8c9 upstream. + +SYSEXIT is one of the instructions that can change the +processor mode, thus ctxt->mode should be updated after it. + +Note that this is likely a benign bug, because the only problematic +mode change is from 32 bit to 64 bit which can lead to truncation of RIP, +and it is not possible to do with sysexit, +since sysexit running in 32 bit mode will be limited to 32 bit version. + +Signed-off-by: Maxim Levitsky +Message-Id: <20221025124741.228045-11-mlevitsk@redhat.com> +Cc: stable@vger.kernel.org +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/emulate.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/x86/kvm/emulate.c ++++ b/arch/x86/kvm/emulate.c +@@ -2874,6 +2874,7 @@ static int em_sysexit(struct x86_emulate + ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); + + ctxt->_eip = rdx; ++ ctxt->mode = usermode; + *reg_write(ctxt, VCPU_REGS_RSP) = rcx; + + return X86EMUL_CONTINUE; diff --git a/queue-6.0/kvm-x86-emulator-introduce-emulator_recalc_and_set_mode.patch b/queue-6.0/kvm-x86-emulator-introduce-emulator_recalc_and_set_mode.patch new file mode 100644 index 00000000000..608b5a22179 --- /dev/null +++ b/queue-6.0/kvm-x86-emulator-introduce-emulator_recalc_and_set_mode.patch @@ -0,0 +1,163 @@ +From d087e0f79fa0dd336a9a6b2f79ec23120f5eff73 Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Tue, 25 Oct 2022 15:47:29 +0300 +Subject: KVM: x86: emulator: introduce emulator_recalc_and_set_mode + +From: Maxim Levitsky + +commit d087e0f79fa0dd336a9a6b2f79ec23120f5eff73 upstream. + +Some instructions update the cpu execution mode, which needs to update the +emulation mode. + +Extract this code, and make assign_eip_far use it. + +assign_eip_far now reads CS, instead of getting it via a parameter, +which is ok, because callers always assign CS to the same value +before calling this function. + +No functional change is intended. + +Signed-off-by: Maxim Levitsky +Message-Id: <20221025124741.228045-12-mlevitsk@redhat.com> +Cc: stable@vger.kernel.org +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/emulate.c | 85 ++++++++++++++++++++++++++++++++----------------- + 1 file changed, 57 insertions(+), 28 deletions(-) + +--- a/arch/x86/kvm/emulate.c ++++ b/arch/x86/kvm/emulate.c +@@ -791,8 +791,7 @@ static int linearize(struct x86_emulate_ + ctxt->mode, linear); + } + +-static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst, +- enum x86emul_mode mode) ++static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst) + { + ulong linear; + int rc; +@@ -802,41 +801,71 @@ static inline int assign_eip(struct x86_ + + if (ctxt->op_bytes != sizeof(unsigned long)) + addr.ea = dst & ((1UL << (ctxt->op_bytes << 3)) - 1); +- rc = __linearize(ctxt, addr, &max_size, 1, false, true, mode, &linear); ++ rc = __linearize(ctxt, addr, &max_size, 1, false, true, ctxt->mode, &linear); + if (rc == X86EMUL_CONTINUE) + ctxt->_eip = addr.ea; + return rc; + } + ++static inline int emulator_recalc_and_set_mode(struct x86_emulate_ctxt *ctxt) ++{ ++ u64 efer; ++ struct desc_struct cs; ++ u16 selector; ++ u32 base3; ++ ++ ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); ++ ++ if (!(ctxt->ops->get_cr(ctxt, 0) & X86_CR0_PE)) { ++ /* Real mode. cpu must not have long mode active */ ++ if (efer & EFER_LMA) ++ return X86EMUL_UNHANDLEABLE; ++ ctxt->mode = X86EMUL_MODE_REAL; ++ return X86EMUL_CONTINUE; ++ } ++ ++ if (ctxt->eflags & X86_EFLAGS_VM) { ++ /* Protected/VM86 mode. cpu must not have long mode active */ ++ if (efer & EFER_LMA) ++ return X86EMUL_UNHANDLEABLE; ++ ctxt->mode = X86EMUL_MODE_VM86; ++ return X86EMUL_CONTINUE; ++ } ++ ++ if (!ctxt->ops->get_segment(ctxt, &selector, &cs, &base3, VCPU_SREG_CS)) ++ return X86EMUL_UNHANDLEABLE; ++ ++ if (efer & EFER_LMA) { ++ if (cs.l) { ++ /* Proper long mode */ ++ ctxt->mode = X86EMUL_MODE_PROT64; ++ } else if (cs.d) { ++ /* 32 bit compatibility mode*/ ++ ctxt->mode = X86EMUL_MODE_PROT32; ++ } else { ++ ctxt->mode = X86EMUL_MODE_PROT16; ++ } ++ } else { ++ /* Legacy 32 bit / 16 bit mode */ ++ ctxt->mode = cs.d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; ++ } ++ ++ return X86EMUL_CONTINUE; ++} ++ + static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst) + { +- return assign_eip(ctxt, dst, ctxt->mode); ++ return assign_eip(ctxt, dst); + } + +-static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst, +- const struct desc_struct *cs_desc) ++static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst) + { +- enum x86emul_mode mode = ctxt->mode; +- int rc; ++ int rc = emulator_recalc_and_set_mode(ctxt); + +-#ifdef CONFIG_X86_64 +- if (ctxt->mode >= X86EMUL_MODE_PROT16) { +- if (cs_desc->l) { +- u64 efer = 0; ++ if (rc != X86EMUL_CONTINUE) ++ return rc; + +- ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); +- if (efer & EFER_LMA) +- mode = X86EMUL_MODE_PROT64; +- } else +- mode = X86EMUL_MODE_PROT32; /* temporary value */ +- } +-#endif +- if (mode == X86EMUL_MODE_PROT16 || mode == X86EMUL_MODE_PROT32) +- mode = cs_desc->d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; +- rc = assign_eip(ctxt, dst, mode); +- if (rc == X86EMUL_CONTINUE) +- ctxt->mode = mode; +- return rc; ++ return assign_eip(ctxt, dst); + } + + static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel) +@@ -2170,7 +2199,7 @@ static int em_jmp_far(struct x86_emulate + if (rc != X86EMUL_CONTINUE) + return rc; + +- rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc); ++ rc = assign_eip_far(ctxt, ctxt->src.val); + /* Error handling is not implemented. */ + if (rc != X86EMUL_CONTINUE) + return X86EMUL_UNHANDLEABLE; +@@ -2248,7 +2277,7 @@ static int em_ret_far(struct x86_emulate + &new_desc); + if (rc != X86EMUL_CONTINUE) + return rc; +- rc = assign_eip_far(ctxt, eip, &new_desc); ++ rc = assign_eip_far(ctxt, eip); + /* Error handling is not implemented. */ + if (rc != X86EMUL_CONTINUE) + return X86EMUL_UNHANDLEABLE; +@@ -3468,7 +3497,7 @@ static int em_call_far(struct x86_emulat + if (rc != X86EMUL_CONTINUE) + return rc; + +- rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc); ++ rc = assign_eip_far(ctxt, ctxt->src.val); + if (rc != X86EMUL_CONTINUE) + goto fail; + diff --git a/queue-6.0/kvm-x86-emulator-update-the-emulation-mode-after-cr0-write.patch b/queue-6.0/kvm-x86-emulator-update-the-emulation-mode-after-cr0-write.patch new file mode 100644 index 00000000000..abde3aa4c55 --- /dev/null +++ b/queue-6.0/kvm-x86-emulator-update-the-emulation-mode-after-cr0-write.patch @@ -0,0 +1,55 @@ +From ad8f9e69942c7db90758d9d774157e53bce94840 Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Tue, 25 Oct 2022 15:47:31 +0300 +Subject: KVM: x86: emulator: update the emulation mode after CR0 write + +From: Maxim Levitsky + +commit ad8f9e69942c7db90758d9d774157e53bce94840 upstream. + +Update the emulation mode when handling writes to CR0, because +toggling CR0.PE switches between Real and Protected Mode, and toggling +CR0.PG when EFER.LME=1 switches between Long and Protected Mode. + +This is likely a benign bug because there is no writeback of state, +other than the RIP increment, and when toggling CR0.PE, the CPU has +to execute code from a very low memory address. + +Signed-off-by: Maxim Levitsky +Message-Id: <20221025124741.228045-14-mlevitsk@redhat.com> +Cc: stable@vger.kernel.org +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/emulate.c | 16 +++++++++++++++- + 1 file changed, 15 insertions(+), 1 deletion(-) + +--- a/arch/x86/kvm/emulate.c ++++ b/arch/x86/kvm/emulate.c +@@ -3639,11 +3639,25 @@ static int em_movbe(struct x86_emulate_c + + static int em_cr_write(struct x86_emulate_ctxt *ctxt) + { +- if (ctxt->ops->set_cr(ctxt, ctxt->modrm_reg, ctxt->src.val)) ++ int cr_num = ctxt->modrm_reg; ++ int r; ++ ++ if (ctxt->ops->set_cr(ctxt, cr_num, ctxt->src.val)) + return emulate_gp(ctxt, 0); + + /* Disable writeback. */ + ctxt->dst.type = OP_NONE; ++ ++ if (cr_num == 0) { ++ /* ++ * CR0 write might have updated CR0.PE and/or CR0.PG ++ * which can affect the cpu's execution mode. ++ */ ++ r = emulator_recalc_and_set_mode(ctxt); ++ if (r != X86EMUL_CONTINUE) ++ return r; ++ } ++ + return X86EMUL_CONTINUE; + } + diff --git a/queue-6.0/kvm-x86-emulator-update-the-emulation-mode-after-rsm.patch b/queue-6.0/kvm-x86-emulator-update-the-emulation-mode-after-rsm.patch new file mode 100644 index 00000000000..1f20993eaa8 --- /dev/null +++ b/queue-6.0/kvm-x86-emulator-update-the-emulation-mode-after-rsm.patch @@ -0,0 +1,36 @@ +From 055f37f84e304e59c046d1accfd8f08462f52c4c Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Tue, 25 Oct 2022 15:47:30 +0300 +Subject: KVM: x86: emulator: update the emulation mode after rsm + +From: Maxim Levitsky + +commit 055f37f84e304e59c046d1accfd8f08462f52c4c upstream. + +Update the emulation mode after RSM so that RIP will be correctly +written back, because the RSM instruction can switch the CPU mode from +32 bit (or less) to 64 bit. + +This fixes a guest crash in case the #SMI is received while the guest +runs a code from an address > 32 bit. + +Signed-off-by: Maxim Levitsky +Message-Id: <20221025124741.228045-13-mlevitsk@redhat.com> +Cc: stable@vger.kernel.org +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/emulate.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/x86/kvm/emulate.c ++++ b/arch/x86/kvm/emulate.c +@@ -2660,7 +2660,7 @@ static int em_rsm(struct x86_emulate_ctx + * those side effects need to be explicitly handled for both success + * and shutdown. + */ +- return X86EMUL_CONTINUE; ++ return emulator_recalc_and_set_mode(ctxt); + + emulate_shutdown: + ctxt->ops->triple_fault(ctxt); diff --git a/queue-6.0/kvm-x86-mask-off-reserved-bits-in-cpuid.80000001h.patch b/queue-6.0/kvm-x86-mask-off-reserved-bits-in-cpuid.80000001h.patch new file mode 100644 index 00000000000..8ddf68683db --- /dev/null +++ b/queue-6.0/kvm-x86-mask-off-reserved-bits-in-cpuid.80000001h.patch @@ -0,0 +1,32 @@ +From 0469e56a14bf8cfb80507e51b7aeec0332cdbc13 Mon Sep 17 00:00:00 2001 +From: Jim Mattson +Date: Fri, 30 Sep 2022 00:51:58 +0200 +Subject: KVM: x86: Mask off reserved bits in CPUID.80000001H + +From: Jim Mattson + +commit 0469e56a14bf8cfb80507e51b7aeec0332cdbc13 upstream. + +KVM_GET_SUPPORTED_CPUID should only enumerate features that KVM +actually supports. CPUID.80000001:EBX[27:16] are reserved bits and +should be masked off. + +Fixes: 0771671749b5 ("KVM: Enhance guest cpuid management") +Signed-off-by: Jim Mattson +Cc: stable@vger.kernel.org +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/cpuid.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/x86/kvm/cpuid.c ++++ b/arch/x86/kvm/cpuid.c +@@ -1117,6 +1117,7 @@ static inline int __do_cpuid_func(struct + entry->eax = max(entry->eax, 0x80000021); + break; + case 0x80000001: ++ entry->ebx &= ~GENMASK(27, 16); + cpuid_entry_override(entry, CPUID_8000_0001_EDX); + cpuid_entry_override(entry, CPUID_8000_0001_ECX); + break; diff --git a/queue-6.0/kvm-x86-mask-off-reserved-bits-in-cpuid.80000006h.patch b/queue-6.0/kvm-x86-mask-off-reserved-bits-in-cpuid.80000006h.patch new file mode 100644 index 00000000000..858eacd1e5d --- /dev/null +++ b/queue-6.0/kvm-x86-mask-off-reserved-bits-in-cpuid.80000006h.patch @@ -0,0 +1,35 @@ +From eeb69eab57c6604ac90b3fd8e5ac43f24a5535b1 Mon Sep 17 00:00:00 2001 +From: Jim Mattson +Date: Thu, 29 Sep 2022 15:51:59 -0700 +Subject: KVM: x86: Mask off reserved bits in CPUID.80000006H + +From: Jim Mattson + +commit eeb69eab57c6604ac90b3fd8e5ac43f24a5535b1 upstream. + +KVM_GET_SUPPORTED_CPUID should only enumerate features that KVM +actually supports. CPUID.80000006H:EDX[17:16] are reserved bits and +should be masked off. + +Fixes: 43d05de2bee7 ("KVM: pass through CPUID(0x80000006)") +Signed-off-by: Jim Mattson +Message-Id: <20220929225203.2234702-2-jmattson@google.com> +Cc: stable@vger.kernel.org +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/cpuid.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/arch/x86/kvm/cpuid.c ++++ b/arch/x86/kvm/cpuid.c +@@ -1121,7 +1121,8 @@ static inline int __do_cpuid_func(struct + cpuid_entry_override(entry, CPUID_8000_0001_ECX); + break; + case 0x80000006: +- /* L2 cache and TLB: pass through host info. */ ++ /* Drop reserved bits, pass host L2 cache and TLB info. */ ++ entry->edx &= ~GENMASK(17, 16); + break; + case 0x80000007: /* Advanced power management */ + /* invariant TSC is CPUID.80000007H:EDX[8] */ diff --git a/queue-6.0/kvm-x86-mask-off-reserved-bits-in-cpuid.80000008h.patch b/queue-6.0/kvm-x86-mask-off-reserved-bits-in-cpuid.80000008h.patch new file mode 100644 index 00000000000..47e4a4628b3 --- /dev/null +++ b/queue-6.0/kvm-x86-mask-off-reserved-bits-in-cpuid.80000008h.patch @@ -0,0 +1,38 @@ +From 7030d8530e533844e2f4b0e7476498afcd324634 Mon Sep 17 00:00:00 2001 +From: Jim Mattson +Date: Thu, 29 Sep 2022 15:52:00 -0700 +Subject: KVM: x86: Mask off reserved bits in CPUID.80000008H + +From: Jim Mattson + +commit 7030d8530e533844e2f4b0e7476498afcd324634 upstream. + +KVM_GET_SUPPORTED_CPUID should only enumerate features that KVM +actually supports. The following ranges of CPUID.80000008H are reserved +and should be masked off: + ECX[31:18] + ECX[11:8] + +In addition, the PerfTscSize field at ECX[17:16] should also be zero +because KVM does not set the PERFTSC bit at CPUID.80000001H.ECX[27]. + +Fixes: 24c82e576b78 ("KVM: Sanitize cpuid") +Signed-off-by: Jim Mattson +Message-Id: <20220929225203.2234702-3-jmattson@google.com> +Cc: stable@vger.kernel.org +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/cpuid.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/x86/kvm/cpuid.c ++++ b/arch/x86/kvm/cpuid.c +@@ -1152,6 +1152,7 @@ static inline int __do_cpuid_func(struct + g_phys_as = phys_as; + + entry->eax = g_phys_as | (virt_as << 8); ++ entry->ecx &= ~(GENMASK(31, 16) | GENMASK(11, 8)); + entry->edx = 0; + cpuid_entry_override(entry, CPUID_8000_0008_EBX); + break; diff --git a/queue-6.0/kvm-x86-mask-off-reserved-bits-in-cpuid.8000001ah.patch b/queue-6.0/kvm-x86-mask-off-reserved-bits-in-cpuid.8000001ah.patch new file mode 100644 index 00000000000..ea3b007b9bb --- /dev/null +++ b/queue-6.0/kvm-x86-mask-off-reserved-bits-in-cpuid.8000001ah.patch @@ -0,0 +1,35 @@ +From 079f6889818dd07903fb36c252532ab47ebb6d48 Mon Sep 17 00:00:00 2001 +From: Jim Mattson +Date: Thu, 29 Sep 2022 15:52:01 -0700 +Subject: KVM: x86: Mask off reserved bits in CPUID.8000001AH + +From: Jim Mattson + +commit 079f6889818dd07903fb36c252532ab47ebb6d48 upstream. + +KVM_GET_SUPPORTED_CPUID should only enumerate features that KVM +actually supports. In the case of CPUID.8000001AH, only three bits are +currently defined. The 125 reserved bits should be masked off. + +Fixes: 24c82e576b78 ("KVM: Sanitize cpuid") +Signed-off-by: Jim Mattson +Message-Id: <20220929225203.2234702-4-jmattson@google.com> +Cc: stable@vger.kernel.org +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/cpuid.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/arch/x86/kvm/cpuid.c ++++ b/arch/x86/kvm/cpuid.c +@@ -1171,6 +1171,9 @@ static inline int __do_cpuid_func(struct + entry->ecx = entry->edx = 0; + break; + case 0x8000001a: ++ entry->eax &= GENMASK(2, 0); ++ entry->ebx = entry->ecx = entry->edx = 0; ++ break; + case 0x8000001e: + break; + case 0x8000001F: diff --git a/queue-6.0/kvm-x86-mask-off-reserved-bits-in-cpuid.8000001fh.patch b/queue-6.0/kvm-x86-mask-off-reserved-bits-in-cpuid.8000001fh.patch new file mode 100644 index 00000000000..3227e3eacfe --- /dev/null +++ b/queue-6.0/kvm-x86-mask-off-reserved-bits-in-cpuid.8000001fh.patch @@ -0,0 +1,36 @@ +From 86c4f0d547f6460d0426ebb3ba0614f1134b8cda Mon Sep 17 00:00:00 2001 +From: Jim Mattson +Date: Thu, 29 Sep 2022 15:52:03 -0700 +Subject: KVM: x86: Mask off reserved bits in CPUID.8000001FH + +From: Jim Mattson + +commit 86c4f0d547f6460d0426ebb3ba0614f1134b8cda upstream. + +KVM_GET_SUPPORTED_CPUID should only enumerate features that KVM +actually supports. CPUID.8000001FH:EBX[31:16] are reserved bits and +should be masked off. + +Fixes: 8765d75329a3 ("KVM: X86: Extend CPUID range to include new leaf") +Signed-off-by: Jim Mattson +Message-Id: <20220929225203.2234702-6-jmattson@google.com> +Cc: stable@vger.kernel.org +[Clear NumVMPL too. - Paolo] +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/cpuid.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/arch/x86/kvm/cpuid.c ++++ b/arch/x86/kvm/cpuid.c +@@ -1183,7 +1183,8 @@ static inline int __do_cpuid_func(struct + entry->eax = entry->ebx = entry->ecx = entry->edx = 0; + } else { + cpuid_entry_override(entry, CPUID_8000_001F_EAX); +- ++ /* Clear NumVMPL since KVM does not support VMPL. */ ++ entry->ebx &= ~GENMASK(31, 12); + /* + * Enumerate '0' for "PA bits reduction", the adjusted + * MAXPHYADDR is enumerated directly (see 0x80000008). diff --git a/queue-6.0/kvm-x86-smm-number-of-gprs-in-the-smram-image-depends-on-the-image-format.patch b/queue-6.0/kvm-x86-smm-number-of-gprs-in-the-smram-image-depends-on-the-image-format.patch new file mode 100644 index 00000000000..23afdd7ff8d --- /dev/null +++ b/queue-6.0/kvm-x86-smm-number-of-gprs-in-the-smram-image-depends-on-the-image-format.patch @@ -0,0 +1,48 @@ +From 696db303e54f7352623d9f640e6c51d8fa9d5588 Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Tue, 25 Oct 2022 15:47:32 +0300 +Subject: KVM: x86: smm: number of GPRs in the SMRAM image depends on the image format + +From: Maxim Levitsky + +commit 696db303e54f7352623d9f640e6c51d8fa9d5588 upstream. + +On 64 bit host, if the guest doesn't have X86_FEATURE_LM, KVM will +access 16 gprs to 32-bit smram image, causing out-ouf-bound ram +access. + +On 32 bit host, the rsm_load_state_64/enter_smm_save_state_64 +is compiled out, thus access overflow can't happen. + +Fixes: b443183a25ab61 ("KVM: x86: Reduce the number of emulator GPRs to '8' for 32-bit KVM") + +Signed-off-by: Maxim Levitsky +Reviewed-by: Sean Christopherson +Message-Id: <20221025124741.228045-15-mlevitsk@redhat.com> +Cc: stable@vger.kernel.org +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/emulate.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/arch/x86/kvm/emulate.c ++++ b/arch/x86/kvm/emulate.c +@@ -2430,7 +2430,7 @@ static int rsm_load_state_32(struct x86_ + ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7ff4) | X86_EFLAGS_FIXED; + ctxt->_eip = GET_SMSTATE(u32, smstate, 0x7ff0); + +- for (i = 0; i < NR_EMULATOR_GPRS; i++) ++ for (i = 0; i < 8; i++) + *reg_write(ctxt, i) = GET_SMSTATE(u32, smstate, 0x7fd0 + i * 4); + + val = GET_SMSTATE(u32, smstate, 0x7fcc); +@@ -2487,7 +2487,7 @@ static int rsm_load_state_64(struct x86_ + u16 selector; + int i, r; + +- for (i = 0; i < NR_EMULATOR_GPRS; i++) ++ for (i = 0; i < 16; i++) + *reg_write(ctxt, i) = GET_SMSTATE(u64, smstate, 0x7ff8 - i * 8); + + ctxt->_eip = GET_SMSTATE(u64, smstate, 0x7f78); diff --git a/queue-6.0/parisc-avoid-printing-the-hardware-path-twice.patch b/queue-6.0/parisc-avoid-printing-the-hardware-path-twice.patch new file mode 100644 index 00000000000..cc3eaeaae7d --- /dev/null +++ b/queue-6.0/parisc-avoid-printing-the-hardware-path-twice.patch @@ -0,0 +1,77 @@ +From 2b6ae0962b421103feb41a80406732944b0665b3 Mon Sep 17 00:00:00 2001 +From: Helge Deller +Date: Fri, 28 Oct 2022 18:12:49 +0200 +Subject: parisc: Avoid printing the hardware path twice + +From: Helge Deller + +commit 2b6ae0962b421103feb41a80406732944b0665b3 upstream. + +Avoid that the hardware path is shown twice in the kernel log, and clean +up the output of the version numbers to show up in the same order as +they are listed in the hardware database in the hardware.c file. +Additionally, optimize the memory footprint of the hardware database +and mark some code as init code. + +Fixes: cab56b51ec0e ("parisc: Fix device names in /proc/iomem") +Signed-off-by: Helge Deller +Cc: # v4.9+ +Signed-off-by: Greg Kroah-Hartman +--- + arch/parisc/include/asm/hardware.h | 12 ++++++------ + arch/parisc/kernel/drivers.c | 14 ++++++-------- + 2 files changed, 12 insertions(+), 14 deletions(-) + +--- a/arch/parisc/include/asm/hardware.h ++++ b/arch/parisc/include/asm/hardware.h +@@ -10,12 +10,12 @@ + #define SVERSION_ANY_ID PA_SVERSION_ANY_ID + + struct hp_hardware { +- unsigned short hw_type:5; /* HPHW_xxx */ +- unsigned short hversion; +- unsigned long sversion:28; +- unsigned short opt; +- const char name[80]; /* The hardware description */ +-}; ++ unsigned int hw_type:8; /* HPHW_xxx */ ++ unsigned int hversion:12; ++ unsigned int sversion:12; ++ unsigned char opt; ++ unsigned char name[59]; /* The hardware description */ ++} __packed; + + struct parisc_device; + +--- a/arch/parisc/kernel/drivers.c ++++ b/arch/parisc/kernel/drivers.c +@@ -882,15 +882,13 @@ void __init walk_central_bus(void) + &root); + } + +-static void print_parisc_device(struct parisc_device *dev) ++static __init void print_parisc_device(struct parisc_device *dev) + { +- char hw_path[64]; +- static int count; ++ static int count __initdata; + +- print_pa_hwpath(dev, hw_path); +- pr_info("%d. %s at %pap [%s] { %d, 0x%x, 0x%.3x, 0x%.5x }", +- ++count, dev->name, &(dev->hpa.start), hw_path, dev->id.hw_type, +- dev->id.hversion_rev, dev->id.hversion, dev->id.sversion); ++ pr_info("%d. %s at %pap { type:%d, hv:%#x, sv:%#x, rev:%#x }", ++ ++count, dev->name, &(dev->hpa.start), dev->id.hw_type, ++ dev->id.hversion, dev->id.sversion, dev->id.hversion_rev); + + if (dev->num_addrs) { + int k; +@@ -1079,7 +1077,7 @@ static __init int qemu_print_iodc_data(s + + + +-static int print_one_device(struct device * dev, void * data) ++static __init int print_one_device(struct device * dev, void * data) + { + struct parisc_device * pdev = to_parisc_device(dev); + diff --git a/queue-6.0/parisc-export-iosapic_serial_irq-symbol-for-serial-port-driver.patch b/queue-6.0/parisc-export-iosapic_serial_irq-symbol-for-serial-port-driver.patch new file mode 100644 index 00000000000..f0b6cc2d8e8 --- /dev/null +++ b/queue-6.0/parisc-export-iosapic_serial_irq-symbol-for-serial-port-driver.patch @@ -0,0 +1,30 @@ +From a0c9f1f2e53b8eb2ae43987a30e547ba56b4fa18 Mon Sep 17 00:00:00 2001 +From: Helge Deller +Date: Thu, 27 Oct 2022 09:12:05 +0200 +Subject: parisc: Export iosapic_serial_irq() symbol for serial port driver + +From: Helge Deller + +commit a0c9f1f2e53b8eb2ae43987a30e547ba56b4fa18 upstream. + +The parisc serial port driver needs this symbol when it's compiled +as module. + +Signed-off-by: Helge Deller +Reported-by: kernel test robot +Cc: +Signed-off-by: Greg Kroah-Hartman +--- + drivers/parisc/iosapic.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/parisc/iosapic.c ++++ b/drivers/parisc/iosapic.c +@@ -866,6 +866,7 @@ int iosapic_serial_irq(struct parisc_dev + + return vi->txn_irq; + } ++EXPORT_SYMBOL(iosapic_serial_irq); + #endif + + diff --git a/queue-6.0/parisc-make-8250_gsc-driver-dependend-on-config_parisc.patch b/queue-6.0/parisc-make-8250_gsc-driver-dependend-on-config_parisc.patch new file mode 100644 index 00000000000..a8493200ef7 --- /dev/null +++ b/queue-6.0/parisc-make-8250_gsc-driver-dependend-on-config_parisc.patch @@ -0,0 +1,36 @@ +From e8a18e3f00f3ee8d07c17ab1ea3ad4df4a3b6fe0 Mon Sep 17 00:00:00 2001 +From: Helge Deller +Date: Fri, 21 Oct 2022 07:44:49 +0200 +Subject: parisc: Make 8250_gsc driver dependend on CONFIG_PARISC + +From: Helge Deller + +commit e8a18e3f00f3ee8d07c17ab1ea3ad4df4a3b6fe0 upstream. + +Although the name of the driver 8250_gsc.c suggests that it handles +only serial ports on the GSC bus, it does handle serial ports listed +in the parisc machine inventory as well, e.g. the serial ports in a +C8000 PCI-only workstation. + +Change the dependency to CONFIG_PARISC, so that the driver gets included +in the kernel even if CONFIG_GSC isn't set. + +Reported-by: Mikulas Patocka +Cc: +Signed-off-by: Helge Deller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/tty/serial/8250/Kconfig | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/tty/serial/8250/Kconfig ++++ b/drivers/tty/serial/8250/Kconfig +@@ -118,7 +118,7 @@ config SERIAL_8250_CONSOLE + + config SERIAL_8250_GSC + tristate +- depends on SERIAL_8250 && GSC ++ depends on SERIAL_8250 && PARISC + default SERIAL_8250 + + config SERIAL_8250_DMA diff --git a/queue-6.0/series b/queue-6.0/series index 5516c2be964..ef58b4d7a39 100644 --- a/queue-6.0/series +++ b/queue-6.0/series @@ -160,3 +160,30 @@ perf-x86-intel-add-cooper-lake-stepping-to-isolation_ucodes.patch perf-x86-intel-fix-pebs-event-constraints-for-spr.patch net-remove-sock_support_zc-from-sockmap.patch net-also-flag-accepted-sockets-supporting-msghdr-originated-zerocopy.patch +parisc-make-8250_gsc-driver-dependend-on-config_parisc.patch +parisc-export-iosapic_serial_irq-symbol-for-serial-port-driver.patch +parisc-avoid-printing-the-hardware-path-twice.patch +ext4-fix-warning-in-ext4_da_release_space.patch +ext4-fix-bug_on-when-directory-entry-has-invalid-rec_len.patch +ext4-update-the-backup-superblock-s-at-the-end-of-the-online-resize.patch +x86-tdx-prepare-for-using-info-call-for-a-second-purpose.patch +x86-tdx-panic-on-bad-configs-that-ve-on-private-memory-access.patch +x86-syscall-include-asm-ptrace.h-in-syscall_wrapper-header.patch +kvm-x86-mask-off-reserved-bits-in-cpuid.80000006h.patch +kvm-x86-mask-off-reserved-bits-in-cpuid.8000001ah.patch +kvm-x86-mask-off-reserved-bits-in-cpuid.80000008h.patch +kvm-x86-mask-off-reserved-bits-in-cpuid.80000001h.patch +kvm-x86-mask-off-reserved-bits-in-cpuid.8000001fh.patch +kvm-vmx-advertise-pmu-lbrs-if-and-only-if-perf-supports-lbrs.patch +kvm-vmx-fold-vmx_supported_debugctl-into-vcpu_supported_debugctl.patch +kvm-vmx-ignore-guest-cpuid-for-host-userspace-writes-to-debugctl.patch +kvm-vmx-fully-disable-sgx-if-secondary_exec_encls_exiting-unavailable.patch +kvm-initialize-gfn_to_pfn_cache-locks-in-dedicated-helper.patch +kvm-reject-attempts-to-consume-or-refresh-inactive-gfn_to_pfn_cache.patch +kvm-arm64-fix-bad-dereference-on-mte-enabled-systems.patch +kvm-arm64-fix-smpri_el1-tpidr2_el0-trapping-on-vhe.patch +kvm-x86-smm-number-of-gprs-in-the-smram-image-depends-on-the-image-format.patch +kvm-x86-emulator-em_sysexit-should-update-ctxt-mode.patch +kvm-x86-emulator-introduce-emulator_recalc_and_set_mode.patch +kvm-x86-emulator-update-the-emulation-mode-after-rsm.patch +kvm-x86-emulator-update-the-emulation-mode-after-cr0-write.patch diff --git a/queue-6.0/x86-syscall-include-asm-ptrace.h-in-syscall_wrapper-header.patch b/queue-6.0/x86-syscall-include-asm-ptrace.h-in-syscall_wrapper-header.patch new file mode 100644 index 00000000000..64d500069c2 --- /dev/null +++ b/queue-6.0/x86-syscall-include-asm-ptrace.h-in-syscall_wrapper-header.patch @@ -0,0 +1,64 @@ +From 9440c42941606af4c379afa3cf8624f0dc43a629 Mon Sep 17 00:00:00 2001 +From: Jiri Olsa +Date: Tue, 18 Oct 2022 14:27:08 +0200 +Subject: x86/syscall: Include asm/ptrace.h in syscall_wrapper header + +From: Jiri Olsa + +commit 9440c42941606af4c379afa3cf8624f0dc43a629 upstream. + +With just the forward declaration of the 'struct pt_regs' in +syscall_wrapper.h, the syscall stub functions: + + __[x64|ia32]_sys_*(struct pt_regs *regs) + +will have different definition of 'regs' argument in BTF data +based on which object file they are defined in. + +If the syscall's object includes 'struct pt_regs' definition, +the BTF argument data will point to a 'struct pt_regs' record, +like: + + [226] STRUCT 'pt_regs' size=168 vlen=21 + 'r15' type_id=1 bits_offset=0 + 'r14' type_id=1 bits_offset=64 + 'r13' type_id=1 bits_offset=128 + ... + +If not, it will point to a fwd declaration record: + + [15439] FWD 'pt_regs' fwd_kind=struct + +and make bpf tracing program hooking on those functions unable +to access fields from 'struct pt_regs'. + +Include asm/ptrace.h directly in syscall_wrapper.h to make sure all +syscalls see 'struct pt_regs' definition. This then results in BTF for +'__*_sys_*(struct pt_regs *regs)' functions to point to the actual +struct, not just the forward declaration. + + [ bp: No Fixes tag as this is not really a bug fix but "adjustment" so + that BTF is happy. ] + +Reported-by: Akihiro HARAI +Signed-off-by: Jiri Olsa +Signed-off-by: Borislav Petkov +Acked-by: Andrii Nakryiko +Cc: # this is needed only for BTF so kernels >= 5.15 +Link: https://lore.kernel.org/r/20221018122708.823792-1-jolsa@kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/syscall_wrapper.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/x86/include/asm/syscall_wrapper.h ++++ b/arch/x86/include/asm/syscall_wrapper.h +@@ -6,7 +6,7 @@ + #ifndef _ASM_X86_SYSCALL_WRAPPER_H + #define _ASM_X86_SYSCALL_WRAPPER_H + +-struct pt_regs; ++#include + + extern long __x64_sys_ni_syscall(const struct pt_regs *regs); + extern long __ia32_sys_ni_syscall(const struct pt_regs *regs); diff --git a/queue-6.0/x86-tdx-panic-on-bad-configs-that-ve-on-private-memory-access.patch b/queue-6.0/x86-tdx-panic-on-bad-configs-that-ve-on-private-memory-access.patch new file mode 100644 index 00000000000..ea39a48fd7d --- /dev/null +++ b/queue-6.0/x86-tdx-panic-on-bad-configs-that-ve-on-private-memory-access.patch @@ -0,0 +1,98 @@ +From 373e715e31bf4e0f129befe87613a278fac228d3 Mon Sep 17 00:00:00 2001 +From: "Kirill A. Shutemov" +Date: Fri, 28 Oct 2022 17:12:20 +0300 +Subject: x86/tdx: Panic on bad configs that #VE on "private" memory access + +From: Kirill A. Shutemov + +commit 373e715e31bf4e0f129befe87613a278fac228d3 upstream. + +All normal kernel memory is "TDX private memory". This includes +everything from kernel stacks to kernel text. Handling +exceptions on arbitrary accesses to kernel memory is essentially +impossible because they can happen in horribly nasty places like +kernel entry/exit. But, TDX hardware can theoretically _deliver_ +a virtualization exception (#VE) on any access to private memory. + +But, it's not as bad as it sounds. TDX can be configured to never +deliver these exceptions on private memory with a "TD attribute" +called ATTR_SEPT_VE_DISABLE. The guest has no way to *set* this +attribute, but it can check it. + +Ensure ATTR_SEPT_VE_DISABLE is set in early boot. panic() if it +is unset. There is no sane way for Linux to run with this +attribute clear so a panic() is appropriate. + +There's small window during boot before the check where kernel +has an early #VE handler. But the handler is only for port I/O +and will also panic() as soon as it sees any other #VE, such as +a one generated by a private memory access. + +[ dhansen: Rewrite changelog and rebase on new tdx_parse_tdinfo(). + Add Kirill's tested-by because I made changes since + he wrote this. ] + +Fixes: 9a22bf6debbf ("x86/traps: Add #VE support for TDX guest") +Reported-by: ruogui.ygr@alibaba-inc.com +Signed-off-by: Kirill A. Shutemov +Signed-off-by: Dave Hansen +Tested-by: Kirill A. Shutemov +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/all/20221028141220.29217-3-kirill.shutemov%40linux.intel.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/coco/tdx/tdx.c | 21 ++++++++++++++++----- + 1 file changed, 16 insertions(+), 5 deletions(-) + +--- a/arch/x86/coco/tdx/tdx.c ++++ b/arch/x86/coco/tdx/tdx.c +@@ -34,6 +34,8 @@ + #define VE_GET_PORT_NUM(e) ((e) >> 16) + #define VE_IS_IO_STRING(e) ((e) & BIT(4)) + ++#define ATTR_SEPT_VE_DISABLE BIT(28) ++ + /* + * Wrapper for standard use of __tdx_hypercall with no output aside from + * return code. +@@ -102,6 +104,7 @@ static void tdx_parse_tdinfo(u64 *cc_mas + { + struct tdx_module_output out; + unsigned int gpa_width; ++ u64 td_attr; + + /* + * TDINFO TDX module call is used to get the TD execution environment +@@ -109,19 +112,27 @@ static void tdx_parse_tdinfo(u64 *cc_mas + * information, etc. More details about the ABI can be found in TDX + * Guest-Host-Communication Interface (GHCI), section 2.4.2 TDCALL + * [TDG.VP.INFO]. +- * +- * The GPA width that comes out of this call is critical. TDX guests +- * can not meaningfully run without it. + */ + tdx_module_call(TDX_GET_INFO, 0, 0, 0, 0, &out); + +- gpa_width = out.rcx & GENMASK(5, 0); +- + /* + * The highest bit of a guest physical address is the "sharing" bit. + * Set it for shared pages and clear it for private pages. ++ * ++ * The GPA width that comes out of this call is critical. TDX guests ++ * can not meaningfully run without it. + */ ++ gpa_width = out.rcx & GENMASK(5, 0); + *cc_mask = BIT_ULL(gpa_width - 1); ++ ++ /* ++ * The kernel can not handle #VE's when accessing normal kernel ++ * memory. Ensure that no #VE will be delivered for accesses to ++ * TD-private memory. Only VMM-shared memory (MMIO) will #VE. ++ */ ++ td_attr = out.rdx; ++ if (!(td_attr & ATTR_SEPT_VE_DISABLE)) ++ panic("TD misconfiguration: SEPT_VE_DISABLE attibute must be set.\n"); + } + + /* diff --git a/queue-6.0/x86-tdx-prepare-for-using-info-call-for-a-second-purpose.patch b/queue-6.0/x86-tdx-prepare-for-using-info-call-for-a-second-purpose.patch new file mode 100644 index 00000000000..f6567e42ce1 --- /dev/null +++ b/queue-6.0/x86-tdx-prepare-for-using-info-call-for-a-second-purpose.patch @@ -0,0 +1,61 @@ +From a6dd6f39008bb3ef7c73ef0a2acc2a4209555bd8 Mon Sep 17 00:00:00 2001 +From: Dave Hansen +Date: Fri, 28 Oct 2022 17:12:19 +0300 +Subject: x86/tdx: Prepare for using "INFO" call for a second purpose + +From: Dave Hansen + +commit a6dd6f39008bb3ef7c73ef0a2acc2a4209555bd8 upstream. + +The TDG.VP.INFO TDCALL provides the guest with various details about +the TDX system that the guest needs to run. Only one field is currently +used: 'gpa_width' which tells the guest which PTE bits mark pages shared +or private. + +A second field is now needed: the guest "TD attributes" to tell if +virtualization exceptions are configured in a way that can harm the guest. + +Make the naming and calling convention more generic and discrete from the +mask-centric one. + +Thanks to Sathya for the inspiration here, but there's no code, comments +or changelogs left from where he started. + +Signed-off-by: Dave Hansen +Acked-by: Kirill A. Shutemov +Tested-by: Kirill A. Shutemov +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/coco/tdx/tdx.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/arch/x86/coco/tdx/tdx.c ++++ b/arch/x86/coco/tdx/tdx.c +@@ -98,7 +98,7 @@ static inline void tdx_module_call(u64 f + panic("TDCALL %lld failed (Buggy TDX module!)\n", fn); + } + +-static u64 get_cc_mask(void) ++static void tdx_parse_tdinfo(u64 *cc_mask) + { + struct tdx_module_output out; + unsigned int gpa_width; +@@ -121,7 +121,7 @@ static u64 get_cc_mask(void) + * The highest bit of a guest physical address is the "sharing" bit. + * Set it for shared pages and clear it for private pages. + */ +- return BIT_ULL(gpa_width - 1); ++ *cc_mask = BIT_ULL(gpa_width - 1); + } + + /* +@@ -758,7 +758,7 @@ void __init tdx_early_init(void) + setup_force_cpu_cap(X86_FEATURE_TDX_GUEST); + + cc_set_vendor(CC_VENDOR_INTEL); +- cc_mask = get_cc_mask(); ++ tdx_parse_tdinfo(&cc_mask); + cc_set_mask(cc_mask); + + /* -- 2.47.3