From f05a31ba870f80abc02228ce6f67345770335c50 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 8 Jun 2021 18:08:14 +0200 Subject: [PATCH] 5.10-stable patches added patches: btrfs-fix-unmountable-seed-device-after-fstrim.patch kvm-arm64-fix-debug-register-indexing.patch kvm-svm-truncate-gpr-value-for-dr-and-cr-accesses-in-64-bit-mode.patch lib-lz4-explicitly-support-in-place-decompression.patch x86-kvm-disable-all-pv-features-on-crash.patch x86-kvm-disable-kvmclock-on-all-cpus-on-shutdown.patch x86-kvm-teardown-pv-features-on-boot-cpu-as-well.patch --- ...unmountable-seed-device-after-fstrim.patch | 111 ++++++++++ ...vm-arm64-fix-debug-register-indexing.patch | 208 ++++++++++++++++++ ...or-dr-and-cr-accesses-in-64-bit-mode.patch | 65 ++++++ ...citly-support-in-place-decompression.patch | 85 +++++++ queue-5.10/series | 7 + ...kvm-disable-all-pv-features-on-crash.patch | 201 +++++++++++++++++ ...ble-kvmclock-on-all-cpus-on-shutdown.patch | 86 ++++++++ ...down-pv-features-on-boot-cpu-as-well.patch | 139 ++++++++++++ 8 files changed, 902 insertions(+) create mode 100644 queue-5.10/btrfs-fix-unmountable-seed-device-after-fstrim.patch create mode 100644 queue-5.10/kvm-arm64-fix-debug-register-indexing.patch create mode 100644 queue-5.10/kvm-svm-truncate-gpr-value-for-dr-and-cr-accesses-in-64-bit-mode.patch create mode 100644 queue-5.10/lib-lz4-explicitly-support-in-place-decompression.patch create mode 100644 queue-5.10/x86-kvm-disable-all-pv-features-on-crash.patch create mode 100644 queue-5.10/x86-kvm-disable-kvmclock-on-all-cpus-on-shutdown.patch create mode 100644 queue-5.10/x86-kvm-teardown-pv-features-on-boot-cpu-as-well.patch diff --git a/queue-5.10/btrfs-fix-unmountable-seed-device-after-fstrim.patch b/queue-5.10/btrfs-fix-unmountable-seed-device-after-fstrim.patch new file mode 100644 index 00000000000..c6e80579648 --- /dev/null +++ b/queue-5.10/btrfs-fix-unmountable-seed-device-after-fstrim.patch @@ -0,0 +1,111 @@ +From foo@baz Tue Jun 8 05:36:29 PM CEST 2021 +From: Anand Jain +Date: Fri, 30 Apr 2021 19:59:51 +0800 +Subject: btrfs: fix unmountable seed device after fstrim + +From: Anand Jain + +commit 5e753a817b2d5991dfe8a801b7b1e8e79a1c5a20 upstream. + +The following test case reproduces an issue of wrongly freeing in-use +blocks on the readonly seed device when fstrim is called on the rw sprout +device. As shown below. + +Create a seed device and add a sprout device to it: + + $ mkfs.btrfs -fq -dsingle -msingle /dev/loop0 + $ btrfstune -S 1 /dev/loop0 + $ mount /dev/loop0 /btrfs + $ btrfs dev add -f /dev/loop1 /btrfs + BTRFS info (device loop0): relocating block group 290455552 flags system + BTRFS info (device loop0): relocating block group 1048576 flags system + BTRFS info (device loop0): disk added /dev/loop1 + $ umount /btrfs + +Mount the sprout device and run fstrim: + + $ mount /dev/loop1 /btrfs + $ fstrim /btrfs + $ umount /btrfs + +Now try to mount the seed device, and it fails: + + $ mount /dev/loop0 /btrfs + mount: /btrfs: wrong fs type, bad option, bad superblock on /dev/loop0, missing codepage or helper program, or other error. + +Block 5292032 is missing on the readonly seed device: + + $ dmesg -kt | tail + + BTRFS error (device loop0): bad tree block start, want 5292032 have 0 + BTRFS warning (device loop0): couldn't read-tree root + BTRFS error (device loop0): open_ctree failed + +>From the dump-tree of the seed device (taken before the fstrim). Block +5292032 belonged to the block group starting at 5242880: + + $ btrfs inspect dump-tree -e /dev/loop0 | grep -A1 BLOCK_GROUP + + item 3 key (5242880 BLOCK_GROUP_ITEM 8388608) itemoff 16169 itemsize 24 + block group used 114688 chunk_objectid 256 flags METADATA + + +>From the dump-tree of the sprout device (taken before the fstrim). +fstrim used block-group 5242880 to find the related free space to free: + + $ btrfs inspect dump-tree -e /dev/loop1 | grep -A1 BLOCK_GROUP + + item 1 key (5242880 BLOCK_GROUP_ITEM 8388608) itemoff 16226 itemsize 24 + block group used 32768 chunk_objectid 256 flags METADATA + + +BPF kernel tracing the fstrim command finds the missing block 5292032 +within the range of the discarded blocks as below: + + kprobe:btrfs_discard_extent { + printf("freeing start %llu end %llu num_bytes %llu:\n", + arg1, arg1+arg2, arg2); + } + + freeing start 5259264 end 5406720 num_bytes 147456 + + +Fix this by avoiding the discard command to the readonly seed device. + +Reported-by: Chris Murphy +CC: stable@vger.kernel.org # 4.4+ +Reviewed-by: Filipe Manana +Signed-off-by: Anand Jain +Signed-off-by: David Sterba +Signed-off-by: Sudip Mukherjee +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/extent-tree.c | 10 +++++++--- + 1 file changed, 7 insertions(+), 3 deletions(-) + +--- a/fs/btrfs/extent-tree.c ++++ b/fs/btrfs/extent-tree.c +@@ -1297,16 +1297,20 @@ int btrfs_discard_extent(struct btrfs_fs + for (i = 0; i < bbio->num_stripes; i++, stripe++) { + u64 bytes; + struct request_queue *req_q; ++ struct btrfs_device *device = stripe->dev; + +- if (!stripe->dev->bdev) { ++ if (!device->bdev) { + ASSERT(btrfs_test_opt(fs_info, DEGRADED)); + continue; + } +- req_q = bdev_get_queue(stripe->dev->bdev); ++ req_q = bdev_get_queue(device->bdev); + if (!blk_queue_discard(req_q)) + continue; + +- ret = btrfs_issue_discard(stripe->dev->bdev, ++ if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) ++ continue; ++ ++ ret = btrfs_issue_discard(device->bdev, + stripe->physical, + stripe->length, + &bytes); diff --git a/queue-5.10/kvm-arm64-fix-debug-register-indexing.patch b/queue-5.10/kvm-arm64-fix-debug-register-indexing.patch new file mode 100644 index 00000000000..99bc59d358e --- /dev/null +++ b/queue-5.10/kvm-arm64-fix-debug-register-indexing.patch @@ -0,0 +1,208 @@ +From cb853ded1d25e5b026ce115dbcde69e3d7e2e831 Mon Sep 17 00:00:00 2001 +From: Marc Zyngier +Date: Fri, 14 May 2021 09:05:41 +0100 +Subject: KVM: arm64: Fix debug register indexing + +From: Marc Zyngier + +commit cb853ded1d25e5b026ce115dbcde69e3d7e2e831 upstream. + +Commit 03fdfb2690099 ("KVM: arm64: Don't write junk to sysregs on +reset") flipped the register number to 0 for all the debug registers +in the sysreg table, hereby indicating that these registers live +in a separate shadow structure. + +However, the author of this patch failed to realise that all the +accessors are using that particular index instead of the register +encoding, resulting in all the registers hitting index 0. Not quite +a valid implementation of the architecture... + +Address the issue by fixing all the accessors to use the CRm field +of the encoding, which contains the debug register index. + +Fixes: 03fdfb2690099 ("KVM: arm64: Don't write junk to sysregs on reset") +Reported-by: Ricardo Koller +Signed-off-by: Marc Zyngier +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/kvm/sys_regs.c | 42 +++++++++++++++++++++--------------------- + 1 file changed, 21 insertions(+), 21 deletions(-) + +--- a/arch/arm64/kvm/sys_regs.c ++++ b/arch/arm64/kvm/sys_regs.c +@@ -464,14 +464,14 @@ static bool trap_bvr(struct kvm_vcpu *vc + struct sys_reg_params *p, + const struct sys_reg_desc *rd) + { +- u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg]; ++ u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm]; + + if (p->is_write) + reg_to_dbg(vcpu, p, dbg_reg); + else + dbg_to_reg(vcpu, p, dbg_reg); + +- trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg); ++ trace_trap_reg(__func__, rd->CRm, p->is_write, *dbg_reg); + + return true; + } +@@ -479,7 +479,7 @@ static bool trap_bvr(struct kvm_vcpu *vc + static int set_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, + const struct kvm_one_reg *reg, void __user *uaddr) + { +- __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg]; ++ __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm]; + + if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0) + return -EFAULT; +@@ -489,7 +489,7 @@ static int set_bvr(struct kvm_vcpu *vcpu + static int get_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, + const struct kvm_one_reg *reg, void __user *uaddr) + { +- __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg]; ++ __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm]; + + if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) + return -EFAULT; +@@ -499,21 +499,21 @@ static int get_bvr(struct kvm_vcpu *vcpu + static void reset_bvr(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd) + { +- vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg] = rd->val; ++ vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm] = rd->val; + } + + static bool trap_bcr(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *rd) + { +- u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg]; ++ u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm]; + + if (p->is_write) + reg_to_dbg(vcpu, p, dbg_reg); + else + dbg_to_reg(vcpu, p, dbg_reg); + +- trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg); ++ trace_trap_reg(__func__, rd->CRm, p->is_write, *dbg_reg); + + return true; + } +@@ -521,7 +521,7 @@ static bool trap_bcr(struct kvm_vcpu *vc + static int set_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, + const struct kvm_one_reg *reg, void __user *uaddr) + { +- __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg]; ++ __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm]; + + if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0) + return -EFAULT; +@@ -532,7 +532,7 @@ static int set_bcr(struct kvm_vcpu *vcpu + static int get_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, + const struct kvm_one_reg *reg, void __user *uaddr) + { +- __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg]; ++ __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm]; + + if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) + return -EFAULT; +@@ -542,22 +542,22 @@ static int get_bcr(struct kvm_vcpu *vcpu + static void reset_bcr(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd) + { +- vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg] = rd->val; ++ vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm] = rd->val; + } + + static bool trap_wvr(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *rd) + { +- u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg]; ++ u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm]; + + if (p->is_write) + reg_to_dbg(vcpu, p, dbg_reg); + else + dbg_to_reg(vcpu, p, dbg_reg); + +- trace_trap_reg(__func__, rd->reg, p->is_write, +- vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg]); ++ trace_trap_reg(__func__, rd->CRm, p->is_write, ++ vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm]); + + return true; + } +@@ -565,7 +565,7 @@ static bool trap_wvr(struct kvm_vcpu *vc + static int set_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, + const struct kvm_one_reg *reg, void __user *uaddr) + { +- __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg]; ++ __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm]; + + if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0) + return -EFAULT; +@@ -575,7 +575,7 @@ static int set_wvr(struct kvm_vcpu *vcpu + static int get_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, + const struct kvm_one_reg *reg, void __user *uaddr) + { +- __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg]; ++ __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm]; + + if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) + return -EFAULT; +@@ -585,21 +585,21 @@ static int get_wvr(struct kvm_vcpu *vcpu + static void reset_wvr(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd) + { +- vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg] = rd->val; ++ vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm] = rd->val; + } + + static bool trap_wcr(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *rd) + { +- u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg]; ++ u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm]; + + if (p->is_write) + reg_to_dbg(vcpu, p, dbg_reg); + else + dbg_to_reg(vcpu, p, dbg_reg); + +- trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg); ++ trace_trap_reg(__func__, rd->CRm, p->is_write, *dbg_reg); + + return true; + } +@@ -607,7 +607,7 @@ static bool trap_wcr(struct kvm_vcpu *vc + static int set_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, + const struct kvm_one_reg *reg, void __user *uaddr) + { +- __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg]; ++ __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm]; + + if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0) + return -EFAULT; +@@ -617,7 +617,7 @@ static int set_wcr(struct kvm_vcpu *vcpu + static int get_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, + const struct kvm_one_reg *reg, void __user *uaddr) + { +- __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg]; ++ __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm]; + + if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) + return -EFAULT; +@@ -627,7 +627,7 @@ static int get_wcr(struct kvm_vcpu *vcpu + static void reset_wcr(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd) + { +- vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg] = rd->val; ++ vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm] = rd->val; + } + + static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) diff --git a/queue-5.10/kvm-svm-truncate-gpr-value-for-dr-and-cr-accesses-in-64-bit-mode.patch b/queue-5.10/kvm-svm-truncate-gpr-value-for-dr-and-cr-accesses-in-64-bit-mode.patch new file mode 100644 index 00000000000..a415e8cd607 --- /dev/null +++ b/queue-5.10/kvm-svm-truncate-gpr-value-for-dr-and-cr-accesses-in-64-bit-mode.patch @@ -0,0 +1,65 @@ +From foo@baz Tue Jun 8 05:37:28 PM CEST 2021 +From: Sean Christopherson +Date: Wed, 21 Apr 2021 19:21:22 -0700 +Subject: KVM: SVM: Truncate GPR value for DR and CR accesses in !64-bit mode + +From: Sean Christopherson + +commit 0884335a2e653b8a045083aa1d57ce74269ac81d upstream. + +Drop bits 63:32 on loads/stores to/from DRs and CRs when the vCPU is not +in 64-bit mode. The APM states bits 63:32 are dropped for both DRs and +CRs: + + In 64-bit mode, the operand size is fixed at 64 bits without the need + for a REX prefix. In non-64-bit mode, the operand size is fixed at 32 + bits and the upper 32 bits of the destination are forced to 0. + +Fixes: 7ff76d58a9dc ("KVM: SVM: enhance MOV CR intercept handler") +Fixes: cae3797a4639 ("KVM: SVM: enhance mov DR intercept handler") +Cc: stable@vger.kernel.org +Signed-off-by: Sean Christopherson +Message-Id: <20210422022128.3464144-4-seanjc@google.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Sudip Mukherjee +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/svm/svm.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/arch/x86/kvm/svm/svm.c ++++ b/arch/x86/kvm/svm/svm.c +@@ -2362,7 +2362,7 @@ static int cr_interception(struct vcpu_s + err = 0; + if (cr >= 16) { /* mov to cr */ + cr -= 16; +- val = kvm_register_read(&svm->vcpu, reg); ++ val = kvm_register_readl(&svm->vcpu, reg); + trace_kvm_cr_write(cr, val); + switch (cr) { + case 0: +@@ -2408,7 +2408,7 @@ static int cr_interception(struct vcpu_s + kvm_queue_exception(&svm->vcpu, UD_VECTOR); + return 1; + } +- kvm_register_write(&svm->vcpu, reg, val); ++ kvm_register_writel(&svm->vcpu, reg, val); + trace_kvm_cr_read(cr, val); + } + return kvm_complete_insn_gp(&svm->vcpu, err); +@@ -2439,13 +2439,13 @@ static int dr_interception(struct vcpu_s + if (dr >= 16) { /* mov to DRn */ + if (!kvm_require_dr(&svm->vcpu, dr - 16)) + return 1; +- val = kvm_register_read(&svm->vcpu, reg); ++ val = kvm_register_readl(&svm->vcpu, reg); + kvm_set_dr(&svm->vcpu, dr - 16, val); + } else { + if (!kvm_require_dr(&svm->vcpu, dr)) + return 1; + kvm_get_dr(&svm->vcpu, dr, &val); +- kvm_register_write(&svm->vcpu, reg, val); ++ kvm_register_writel(&svm->vcpu, reg, val); + } + + return kvm_skip_emulated_instruction(&svm->vcpu); diff --git a/queue-5.10/lib-lz4-explicitly-support-in-place-decompression.patch b/queue-5.10/lib-lz4-explicitly-support-in-place-decompression.patch new file mode 100644 index 00000000000..a44f02f4a52 --- /dev/null +++ b/queue-5.10/lib-lz4-explicitly-support-in-place-decompression.patch @@ -0,0 +1,85 @@ +From 89b158635ad79574bde8e94d45dad33f8cf09549 Mon Sep 17 00:00:00 2001 +From: Gao Xiang +Date: Tue, 15 Dec 2020 20:44:03 -0800 +Subject: lib/lz4: explicitly support in-place decompression + +From: Gao Xiang + +commit 89b158635ad79574bde8e94d45dad33f8cf09549 upstream. + +LZ4 final literal copy could be overlapped when doing +in-place decompression, so it's unsafe to just use memcpy() +on an optimized memcpy approach but memmove() instead. + +Upstream LZ4 has updated this years ago [1] (and the impact +is non-sensible [2] plus only a few bytes remain), this commit +just synchronizes LZ4 upstream code to the kernel side as well. + +It can be observed as EROFS in-place decompression failure +on specific files when X86_FEATURE_ERMS is unsupported, +memcpy() optimization of commit 59daa706fbec ("x86, mem: +Optimize memcpy by avoiding memory false dependece") will +be enabled then. + +Currently most modern x86-CPUs support ERMS, these CPUs just +use "rep movsb" approach so no problem at all. However, it can +still be verified with forcely disabling ERMS feature... + +arch/x86/lib/memcpy_64.S: + ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \ +- "jmp memcpy_erms", X86_FEATURE_ERMS ++ "jmp memcpy_orig", X86_FEATURE_ERMS + +We didn't observe any strange on arm64/arm/x86 platform before +since most memcpy() would behave in an increasing address order +("copy upwards" [3]) and it's the correct order of in-place +decompression but it really needs an update to memmove() for sure +considering it's an undefined behavior according to the standard +and some unique optimization already exists in the kernel. + +[1] https://github.com/lz4/lz4/commit/33cb8518ac385835cc17be9a770b27b40cd0e15b +[2] https://github.com/lz4/lz4/pull/717#issuecomment-497818921 +[3] https://sourceware.org/bugzilla/show_bug.cgi?id=12518 + +Link: https://lkml.kernel.org/r/20201122030749.2698994-1-hsiangkao@redhat.com +Signed-off-by: Gao Xiang +Reviewed-by: Nick Terrell +Cc: Yann Collet +Cc: Miao Xie +Cc: Chao Yu +Cc: Li Guifu +Cc: Guo Xuenan +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Gao Xiang +Signed-off-by: Greg Kroah-Hartman +--- + lib/lz4/lz4_decompress.c | 6 +++++- + lib/lz4/lz4defs.h | 1 + + 2 files changed, 6 insertions(+), 1 deletion(-) + +--- a/lib/lz4/lz4_decompress.c ++++ b/lib/lz4/lz4_decompress.c +@@ -263,7 +263,11 @@ static FORCE_INLINE int LZ4_decompress_g + } + } + +- LZ4_memcpy(op, ip, length); ++ /* ++ * supports overlapping memory regions; only matters ++ * for in-place decompression scenarios ++ */ ++ LZ4_memmove(op, ip, length); + ip += length; + op += length; + +--- a/lib/lz4/lz4defs.h ++++ b/lib/lz4/lz4defs.h +@@ -146,6 +146,7 @@ static FORCE_INLINE void LZ4_writeLE16(v + * environments. This is needed when decompressing the Linux Kernel, for example. + */ + #define LZ4_memcpy(dst, src, size) __builtin_memcpy(dst, src, size) ++#define LZ4_memmove(dst, src, size) __builtin_memmove(dst, src, size) + + static FORCE_INLINE void LZ4_copy8(void *dst, const void *src) + { diff --git a/queue-5.10/series b/queue-5.10/series index 1f72e9f841e..4ebffb0b359 100644 --- a/queue-5.10/series +++ b/queue-5.10/series @@ -124,3 +124,10 @@ btrfs-abort-in-rename_exchange-if-we-fail-to-insert-the-second-ref.patch btrfs-fix-deadlock-when-cloning-inline-extents-and-low-on-available-space.patch mm-hugetlb-fix-simple-resv_huge_pages-underflow-on-u.patch drm-msm-dpu-always-use-mdp-device-to-scale-bandwidth.patch +btrfs-fix-unmountable-seed-device-after-fstrim.patch +kvm-svm-truncate-gpr-value-for-dr-and-cr-accesses-in-64-bit-mode.patch +kvm-arm64-fix-debug-register-indexing.patch +x86-kvm-teardown-pv-features-on-boot-cpu-as-well.patch +x86-kvm-disable-kvmclock-on-all-cpus-on-shutdown.patch +x86-kvm-disable-all-pv-features-on-crash.patch +lib-lz4-explicitly-support-in-place-decompression.patch diff --git a/queue-5.10/x86-kvm-disable-all-pv-features-on-crash.patch b/queue-5.10/x86-kvm-disable-all-pv-features-on-crash.patch new file mode 100644 index 00000000000..191f0119ea1 --- /dev/null +++ b/queue-5.10/x86-kvm-disable-all-pv-features-on-crash.patch @@ -0,0 +1,201 @@ +From foo@baz Tue Jun 8 05:44:24 PM CEST 2021 +From: Krzysztof Kozlowski +Date: Mon, 31 May 2021 16:05:26 +0200 +Subject: x86/kvm: Disable all PV features on crash +To: stable@vger.kernel.org +Cc: Andrea Righi , Paolo Bonzini , Vitaly Kuznetsov , Krzysztof Kozlowski +Message-ID: <20210531140526.42932-4-krzysztof.kozlowski@canonical.com> + +From: Vitaly Kuznetsov + +commit 3d6b84132d2a57b5a74100f6923a8feb679ac2ce upstream. + +Crash shutdown handler only disables kvmclock and steal time, other PV +features remain active so we risk corrupting memory or getting some +side-effects in kdump kernel. Move crash handler to kvm.c and unify +with CPU offline. + +Signed-off-by: Vitaly Kuznetsov +Message-Id: <20210414123544.1060604-5-vkuznets@redhat.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Krzysztof Kozlowski +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/kvm_para.h | 6 ----- + arch/x86/kernel/kvm.c | 44 +++++++++++++++++++++++++++++----------- + arch/x86/kernel/kvmclock.c | 21 ------------------- + 3 files changed, 32 insertions(+), 39 deletions(-) + +--- a/arch/x86/include/asm/kvm_para.h ++++ b/arch/x86/include/asm/kvm_para.h +@@ -92,7 +92,6 @@ unsigned int kvm_arch_para_hints(void); + void kvm_async_pf_task_wait_schedule(u32 token); + void kvm_async_pf_task_wake(u32 token); + u32 kvm_read_and_reset_apf_flags(void); +-void kvm_disable_steal_time(void); + bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token); + + DECLARE_STATIC_KEY_FALSE(kvm_async_pf_enabled); +@@ -137,11 +136,6 @@ static inline u32 kvm_read_and_reset_apf + return 0; + } + +-static inline void kvm_disable_steal_time(void) +-{ +- return; +-} +- + static __always_inline bool kvm_handle_async_pf(struct pt_regs *regs, u32 token) + { + return false; +--- a/arch/x86/kernel/kvm.c ++++ b/arch/x86/kernel/kvm.c +@@ -38,6 +38,7 @@ + #include + #include + #include ++#include + #include + + DEFINE_STATIC_KEY_FALSE(kvm_async_pf_enabled); +@@ -375,6 +376,14 @@ static void kvm_pv_disable_apf(void) + pr_info("Unregister pv shared memory for cpu %d\n", smp_processor_id()); + } + ++static void kvm_disable_steal_time(void) ++{ ++ if (!has_steal_clock) ++ return; ++ ++ wrmsr(MSR_KVM_STEAL_TIME, 0, 0); ++} ++ + static void kvm_pv_guest_cpu_reboot(void *unused) + { + /* +@@ -417,14 +426,6 @@ static u64 kvm_steal_clock(int cpu) + return steal; + } + +-void kvm_disable_steal_time(void) +-{ +- if (!has_steal_clock) +- return; +- +- wrmsr(MSR_KVM_STEAL_TIME, 0, 0); +-} +- + static inline void __set_percpu_decrypted(void *ptr, unsigned long size) + { + early_set_memory_decrypted((unsigned long) ptr, size); +@@ -461,13 +462,14 @@ static bool pv_tlb_flush_supported(void) + + static DEFINE_PER_CPU(cpumask_var_t, __pv_cpu_mask); + +-static void kvm_guest_cpu_offline(void) ++static void kvm_guest_cpu_offline(bool shutdown) + { + kvm_disable_steal_time(); + if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) + wrmsrl(MSR_KVM_PV_EOI_EN, 0); + kvm_pv_disable_apf(); +- apf_task_wake_all(); ++ if (!shutdown) ++ apf_task_wake_all(); + kvmclock_disable(); + } + +@@ -613,7 +615,7 @@ static int kvm_cpu_down_prepare(unsigned + unsigned long flags; + + local_irq_save(flags); +- kvm_guest_cpu_offline(); ++ kvm_guest_cpu_offline(false); + local_irq_restore(flags); + return 0; + } +@@ -622,7 +624,7 @@ static int kvm_cpu_down_prepare(unsigned + + static int kvm_suspend(void) + { +- kvm_guest_cpu_offline(); ++ kvm_guest_cpu_offline(false); + + return 0; + } +@@ -637,6 +639,20 @@ static struct syscore_ops kvm_syscore_op + .resume = kvm_resume, + }; + ++/* ++ * After a PV feature is registered, the host will keep writing to the ++ * registered memory location. If the guest happens to shutdown, this memory ++ * won't be valid. In cases like kexec, in which you install a new kernel, this ++ * means a random memory location will be kept being written. ++ */ ++#ifdef CONFIG_KEXEC_CORE ++static void kvm_crash_shutdown(struct pt_regs *regs) ++{ ++ kvm_guest_cpu_offline(true); ++ native_machine_crash_shutdown(regs); ++} ++#endif ++ + static void kvm_flush_tlb_others(const struct cpumask *cpumask, + const struct flush_tlb_info *info) + { +@@ -705,6 +721,10 @@ static void __init kvm_guest_init(void) + kvm_guest_cpu_init(); + #endif + ++#ifdef CONFIG_KEXEC_CORE ++ machine_ops.crash_shutdown = kvm_crash_shutdown; ++#endif ++ + register_syscore_ops(&kvm_syscore_ops); + + /* +--- a/arch/x86/kernel/kvmclock.c ++++ b/arch/x86/kernel/kvmclock.c +@@ -20,7 +20,6 @@ + #include + #include + #include +-#include + #include + + static int kvmclock __initdata = 1; +@@ -204,23 +203,6 @@ static void kvm_setup_secondary_clock(vo + } + #endif + +-/* +- * After the clock is registered, the host will keep writing to the +- * registered memory location. If the guest happens to shutdown, this memory +- * won't be valid. In cases like kexec, in which you install a new kernel, this +- * means a random memory location will be kept being written. So before any +- * kind of shutdown from our side, we unregister the clock by writing anything +- * that does not have the 'enable' bit set in the msr +- */ +-#ifdef CONFIG_KEXEC_CORE +-static void kvm_crash_shutdown(struct pt_regs *regs) +-{ +- native_write_msr(msr_kvm_system_time, 0, 0); +- kvm_disable_steal_time(); +- native_machine_crash_shutdown(regs); +-} +-#endif +- + void kvmclock_disable(void) + { + native_write_msr(msr_kvm_system_time, 0, 0); +@@ -350,9 +332,6 @@ void __init kvmclock_init(void) + #endif + x86_platform.save_sched_clock_state = kvm_save_sched_clock_state; + x86_platform.restore_sched_clock_state = kvm_restore_sched_clock_state; +-#ifdef CONFIG_KEXEC_CORE +- machine_ops.crash_shutdown = kvm_crash_shutdown; +-#endif + kvm_get_preset_lpj(); + + /* diff --git a/queue-5.10/x86-kvm-disable-kvmclock-on-all-cpus-on-shutdown.patch b/queue-5.10/x86-kvm-disable-kvmclock-on-all-cpus-on-shutdown.patch new file mode 100644 index 00000000000..88d13c89ff9 --- /dev/null +++ b/queue-5.10/x86-kvm-disable-kvmclock-on-all-cpus-on-shutdown.patch @@ -0,0 +1,86 @@ +From foo@baz Tue Jun 8 05:44:24 PM CEST 2021 +From: Krzysztof Kozlowski +Date: Mon, 31 May 2021 16:05:25 +0200 +Subject: x86/kvm: Disable kvmclock on all CPUs on shutdown +To: stable@vger.kernel.org +Cc: Andrea Righi , Paolo Bonzini , Vitaly Kuznetsov , Krzysztof Kozlowski +Message-ID: <20210531140526.42932-3-krzysztof.kozlowski@canonical.com> + +From: Vitaly Kuznetsov + +commit c02027b5742b5aa804ef08a4a9db433295533046 upstream. + +Currenly, we disable kvmclock from machine_shutdown() hook and this +only happens for boot CPU. We need to disable it for all CPUs to +guard against memory corruption e.g. on restore from hibernate. + +Note, writing '0' to kvmclock MSR doesn't clear memory location, it +just prevents hypervisor from updating the location so for the short +while after write and while CPU is still alive, the clock remains usable +and correct so we don't need to switch to some other clocksource. + +Signed-off-by: Vitaly Kuznetsov +Message-Id: <20210414123544.1060604-4-vkuznets@redhat.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Andrea Righi +Signed-off-by: Krzysztof Kozlowski +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/kvm_para.h | 4 ++-- + arch/x86/kernel/kvm.c | 1 + + arch/x86/kernel/kvmclock.c | 5 +---- + 3 files changed, 4 insertions(+), 6 deletions(-) + +--- a/arch/x86/include/asm/kvm_para.h ++++ b/arch/x86/include/asm/kvm_para.h +@@ -7,8 +7,6 @@ + #include + #include + +-extern void kvmclock_init(void); +- + #ifdef CONFIG_KVM_GUEST + bool kvm_check_and_clear_guest_paused(void); + #else +@@ -86,6 +84,8 @@ static inline long kvm_hypercall4(unsign + } + + #ifdef CONFIG_KVM_GUEST ++void kvmclock_init(void); ++void kvmclock_disable(void); + bool kvm_para_available(void); + unsigned int kvm_arch_para_features(void); + unsigned int kvm_arch_para_hints(void); +--- a/arch/x86/kernel/kvm.c ++++ b/arch/x86/kernel/kvm.c +@@ -468,6 +468,7 @@ static void kvm_guest_cpu_offline(void) + wrmsrl(MSR_KVM_PV_EOI_EN, 0); + kvm_pv_disable_apf(); + apf_task_wake_all(); ++ kvmclock_disable(); + } + + static int kvm_cpu_online(unsigned int cpu) +--- a/arch/x86/kernel/kvmclock.c ++++ b/arch/x86/kernel/kvmclock.c +@@ -221,11 +221,9 @@ static void kvm_crash_shutdown(struct pt + } + #endif + +-static void kvm_shutdown(void) ++void kvmclock_disable(void) + { + native_write_msr(msr_kvm_system_time, 0, 0); +- kvm_disable_steal_time(); +- native_machine_shutdown(); + } + + static void __init kvmclock_init_mem(void) +@@ -352,7 +350,6 @@ void __init kvmclock_init(void) + #endif + x86_platform.save_sched_clock_state = kvm_save_sched_clock_state; + x86_platform.restore_sched_clock_state = kvm_restore_sched_clock_state; +- machine_ops.shutdown = kvm_shutdown; + #ifdef CONFIG_KEXEC_CORE + machine_ops.crash_shutdown = kvm_crash_shutdown; + #endif diff --git a/queue-5.10/x86-kvm-teardown-pv-features-on-boot-cpu-as-well.patch b/queue-5.10/x86-kvm-teardown-pv-features-on-boot-cpu-as-well.patch new file mode 100644 index 00000000000..3daa41cf40a --- /dev/null +++ b/queue-5.10/x86-kvm-teardown-pv-features-on-boot-cpu-as-well.patch @@ -0,0 +1,139 @@ +From foo@baz Tue Jun 8 05:44:24 PM CEST 2021 +From: Krzysztof Kozlowski +Date: Mon, 31 May 2021 16:05:24 +0200 +Subject: x86/kvm: Teardown PV features on boot CPU as well +To: stable@vger.kernel.org +Cc: Andrea Righi , Paolo Bonzini , Vitaly Kuznetsov , Krzysztof Kozlowski +Message-ID: <20210531140526.42932-2-krzysztof.kozlowski@canonical.com> + +From: Vitaly Kuznetsov + +commit 8b79feffeca28c5459458fe78676b081e87c93a4 upstream. + +Various PV features (Async PF, PV EOI, steal time) work through memory +shared with hypervisor and when we restore from hibernation we must +properly teardown all these features to make sure hypervisor doesn't +write to stale locations after we jump to the previously hibernated kernel +(which can try to place anything there). For secondary CPUs the job is +already done by kvm_cpu_down_prepare(), register syscore ops to do +the same for boot CPU. + +Krzysztof: +This fixes memory corruption visible after second resume from +hibernation: + + BUG: Bad page state in process dbus-daemon pfn:18b01 + page:ffffea000062c040 refcount:0 mapcount:0 mapping:0000000000000000 index:0x1 compound_mapcount: -30591 + flags: 0xfffffc0078141(locked|error|workingset|writeback|head|mappedtodisk|reclaim) + raw: 000fffffc0078141 dead0000000002d0 dead000000000100 0000000000000000 + raw: 0000000000000001 0000000000000000 00000000ffffffff 0000000000000000 + page dumped because: PAGE_FLAGS_CHECK_AT_PREP flag set + bad because of flags: 0x78141(locked|error|workingset|writeback|head|mappedtodisk|reclaim) + +Signed-off-by: Vitaly Kuznetsov +Message-Id: <20210414123544.1060604-3-vkuznets@redhat.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Andrea Righi +[krzysztof: Extend the commit message, adjust for v5.10 context] +Signed-off-by: Krzysztof Kozlowski +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/kvm.c | 57 +++++++++++++++++++++++++++++++++++--------------- + 1 file changed, 41 insertions(+), 16 deletions(-) + +--- a/arch/x86/kernel/kvm.c ++++ b/arch/x86/kernel/kvm.c +@@ -26,6 +26,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -460,6 +461,25 @@ static bool pv_tlb_flush_supported(void) + + static DEFINE_PER_CPU(cpumask_var_t, __pv_cpu_mask); + ++static void kvm_guest_cpu_offline(void) ++{ ++ kvm_disable_steal_time(); ++ if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) ++ wrmsrl(MSR_KVM_PV_EOI_EN, 0); ++ kvm_pv_disable_apf(); ++ apf_task_wake_all(); ++} ++ ++static int kvm_cpu_online(unsigned int cpu) ++{ ++ unsigned long flags; ++ ++ local_irq_save(flags); ++ kvm_guest_cpu_init(); ++ local_irq_restore(flags); ++ return 0; ++} ++ + #ifdef CONFIG_SMP + + static bool pv_ipi_supported(void) +@@ -587,31 +607,34 @@ static void __init kvm_smp_prepare_boot_ + kvm_spinlock_init(); + } + +-static void kvm_guest_cpu_offline(void) ++static int kvm_cpu_down_prepare(unsigned int cpu) + { +- kvm_disable_steal_time(); +- if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) +- wrmsrl(MSR_KVM_PV_EOI_EN, 0); +- kvm_pv_disable_apf(); +- apf_task_wake_all(); +-} ++ unsigned long flags; + +-static int kvm_cpu_online(unsigned int cpu) +-{ +- local_irq_disable(); +- kvm_guest_cpu_init(); +- local_irq_enable(); ++ local_irq_save(flags); ++ kvm_guest_cpu_offline(); ++ local_irq_restore(flags); + return 0; + } + +-static int kvm_cpu_down_prepare(unsigned int cpu) ++#endif ++ ++static int kvm_suspend(void) + { +- local_irq_disable(); + kvm_guest_cpu_offline(); +- local_irq_enable(); ++ + return 0; + } +-#endif ++ ++static void kvm_resume(void) ++{ ++ kvm_cpu_online(raw_smp_processor_id()); ++} ++ ++static struct syscore_ops kvm_syscore_ops = { ++ .suspend = kvm_suspend, ++ .resume = kvm_resume, ++}; + + static void kvm_flush_tlb_others(const struct cpumask *cpumask, + const struct flush_tlb_info *info) +@@ -681,6 +704,8 @@ static void __init kvm_guest_init(void) + kvm_guest_cpu_init(); + #endif + ++ register_syscore_ops(&kvm_syscore_ops); ++ + /* + * Hard lockup detection is enabled by default. Disable it, as guests + * can get false positives too easily, for example if the host is -- 2.47.3