--- /dev/null
+From foo@baz Tue Jun 8 05:36:29 PM CEST 2021
+From: Anand Jain <anand.jain@oracle.com>
+Date: Fri, 30 Apr 2021 19:59:51 +0800
+Subject: btrfs: fix unmountable seed device after fstrim
+
+From: Anand Jain <anand.jain@oracle.com>
+
+commit 5e753a817b2d5991dfe8a801b7b1e8e79a1c5a20 upstream.
+
+The following test case reproduces an issue of wrongly freeing in-use
+blocks on the readonly seed device when fstrim is called on the rw sprout
+device. As shown below.
+
+Create a seed device and add a sprout device to it:
+
+ $ mkfs.btrfs -fq -dsingle -msingle /dev/loop0
+ $ btrfstune -S 1 /dev/loop0
+ $ mount /dev/loop0 /btrfs
+ $ btrfs dev add -f /dev/loop1 /btrfs
+ BTRFS info (device loop0): relocating block group 290455552 flags system
+ BTRFS info (device loop0): relocating block group 1048576 flags system
+ BTRFS info (device loop0): disk added /dev/loop1
+ $ umount /btrfs
+
+Mount the sprout device and run fstrim:
+
+ $ mount /dev/loop1 /btrfs
+ $ fstrim /btrfs
+ $ umount /btrfs
+
+Now try to mount the seed device, and it fails:
+
+ $ mount /dev/loop0 /btrfs
+ mount: /btrfs: wrong fs type, bad option, bad superblock on /dev/loop0, missing codepage or helper program, or other error.
+
+Block 5292032 is missing on the readonly seed device:
+
+ $ dmesg -kt | tail
+ <snip>
+ BTRFS error (device loop0): bad tree block start, want 5292032 have 0
+ BTRFS warning (device loop0): couldn't read-tree root
+ BTRFS error (device loop0): open_ctree failed
+
+>From the dump-tree of the seed device (taken before the fstrim). Block
+5292032 belonged to the block group starting at 5242880:
+
+ $ btrfs inspect dump-tree -e /dev/loop0 | grep -A1 BLOCK_GROUP
+ <snip>
+ item 3 key (5242880 BLOCK_GROUP_ITEM 8388608) itemoff 16169 itemsize 24
+ block group used 114688 chunk_objectid 256 flags METADATA
+ <snip>
+
+>From the dump-tree of the sprout device (taken before the fstrim).
+fstrim used block-group 5242880 to find the related free space to free:
+
+ $ btrfs inspect dump-tree -e /dev/loop1 | grep -A1 BLOCK_GROUP
+ <snip>
+ item 1 key (5242880 BLOCK_GROUP_ITEM 8388608) itemoff 16226 itemsize 24
+ block group used 32768 chunk_objectid 256 flags METADATA
+ <snip>
+
+BPF kernel tracing the fstrim command finds the missing block 5292032
+within the range of the discarded blocks as below:
+
+ kprobe:btrfs_discard_extent {
+ printf("freeing start %llu end %llu num_bytes %llu:\n",
+ arg1, arg1+arg2, arg2);
+ }
+
+ freeing start 5259264 end 5406720 num_bytes 147456
+ <snip>
+
+Fix this by avoiding the discard command to the readonly seed device.
+
+Reported-by: Chris Murphy <lists@colorremedies.com>
+CC: stable@vger.kernel.org # 4.4+
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Anand Jain <anand.jain@oracle.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/extent-tree.c | 10 +++++++---
+ 1 file changed, 7 insertions(+), 3 deletions(-)
+
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -1297,16 +1297,20 @@ int btrfs_discard_extent(struct btrfs_fs
+ for (i = 0; i < bbio->num_stripes; i++, stripe++) {
+ u64 bytes;
+ struct request_queue *req_q;
++ struct btrfs_device *device = stripe->dev;
+
+- if (!stripe->dev->bdev) {
++ if (!device->bdev) {
+ ASSERT(btrfs_test_opt(fs_info, DEGRADED));
+ continue;
+ }
+- req_q = bdev_get_queue(stripe->dev->bdev);
++ req_q = bdev_get_queue(device->bdev);
+ if (!blk_queue_discard(req_q))
+ continue;
+
+- ret = btrfs_issue_discard(stripe->dev->bdev,
++ if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
++ continue;
++
++ ret = btrfs_issue_discard(device->bdev,
+ stripe->physical,
+ stripe->length,
+ &bytes);
--- /dev/null
+From cb853ded1d25e5b026ce115dbcde69e3d7e2e831 Mon Sep 17 00:00:00 2001
+From: Marc Zyngier <maz@kernel.org>
+Date: Fri, 14 May 2021 09:05:41 +0100
+Subject: KVM: arm64: Fix debug register indexing
+
+From: Marc Zyngier <maz@kernel.org>
+
+commit cb853ded1d25e5b026ce115dbcde69e3d7e2e831 upstream.
+
+Commit 03fdfb2690099 ("KVM: arm64: Don't write junk to sysregs on
+reset") flipped the register number to 0 for all the debug registers
+in the sysreg table, hereby indicating that these registers live
+in a separate shadow structure.
+
+However, the author of this patch failed to realise that all the
+accessors are using that particular index instead of the register
+encoding, resulting in all the registers hitting index 0. Not quite
+a valid implementation of the architecture...
+
+Address the issue by fixing all the accessors to use the CRm field
+of the encoding, which contains the debug register index.
+
+Fixes: 03fdfb2690099 ("KVM: arm64: Don't write junk to sysregs on reset")
+Reported-by: Ricardo Koller <ricarkol@google.com>
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/kvm/sys_regs.c | 42 +++++++++++++++++++++---------------------
+ 1 file changed, 21 insertions(+), 21 deletions(-)
+
+--- a/arch/arm64/kvm/sys_regs.c
++++ b/arch/arm64/kvm/sys_regs.c
+@@ -464,14 +464,14 @@ static bool trap_bvr(struct kvm_vcpu *vc
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *rd)
+ {
+- u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg];
++ u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm];
+
+ if (p->is_write)
+ reg_to_dbg(vcpu, p, dbg_reg);
+ else
+ dbg_to_reg(vcpu, p, dbg_reg);
+
+- trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg);
++ trace_trap_reg(__func__, rd->CRm, p->is_write, *dbg_reg);
+
+ return true;
+ }
+@@ -479,7 +479,7 @@ static bool trap_bvr(struct kvm_vcpu *vc
+ static int set_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+ const struct kvm_one_reg *reg, void __user *uaddr)
+ {
+- __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg];
++ __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm];
+
+ if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0)
+ return -EFAULT;
+@@ -489,7 +489,7 @@ static int set_bvr(struct kvm_vcpu *vcpu
+ static int get_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+ const struct kvm_one_reg *reg, void __user *uaddr)
+ {
+- __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg];
++ __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm];
+
+ if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
+ return -EFAULT;
+@@ -499,21 +499,21 @@ static int get_bvr(struct kvm_vcpu *vcpu
+ static void reset_bvr(struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd)
+ {
+- vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg] = rd->val;
++ vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm] = rd->val;
+ }
+
+ static bool trap_bcr(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *rd)
+ {
+- u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg];
++ u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm];
+
+ if (p->is_write)
+ reg_to_dbg(vcpu, p, dbg_reg);
+ else
+ dbg_to_reg(vcpu, p, dbg_reg);
+
+- trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg);
++ trace_trap_reg(__func__, rd->CRm, p->is_write, *dbg_reg);
+
+ return true;
+ }
+@@ -521,7 +521,7 @@ static bool trap_bcr(struct kvm_vcpu *vc
+ static int set_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+ const struct kvm_one_reg *reg, void __user *uaddr)
+ {
+- __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg];
++ __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm];
+
+ if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0)
+ return -EFAULT;
+@@ -532,7 +532,7 @@ static int set_bcr(struct kvm_vcpu *vcpu
+ static int get_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+ const struct kvm_one_reg *reg, void __user *uaddr)
+ {
+- __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg];
++ __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm];
+
+ if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
+ return -EFAULT;
+@@ -542,22 +542,22 @@ static int get_bcr(struct kvm_vcpu *vcpu
+ static void reset_bcr(struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd)
+ {
+- vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg] = rd->val;
++ vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm] = rd->val;
+ }
+
+ static bool trap_wvr(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *rd)
+ {
+- u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg];
++ u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm];
+
+ if (p->is_write)
+ reg_to_dbg(vcpu, p, dbg_reg);
+ else
+ dbg_to_reg(vcpu, p, dbg_reg);
+
+- trace_trap_reg(__func__, rd->reg, p->is_write,
+- vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg]);
++ trace_trap_reg(__func__, rd->CRm, p->is_write,
++ vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm]);
+
+ return true;
+ }
+@@ -565,7 +565,7 @@ static bool trap_wvr(struct kvm_vcpu *vc
+ static int set_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+ const struct kvm_one_reg *reg, void __user *uaddr)
+ {
+- __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg];
++ __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm];
+
+ if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0)
+ return -EFAULT;
+@@ -575,7 +575,7 @@ static int set_wvr(struct kvm_vcpu *vcpu
+ static int get_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+ const struct kvm_one_reg *reg, void __user *uaddr)
+ {
+- __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg];
++ __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm];
+
+ if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
+ return -EFAULT;
+@@ -585,21 +585,21 @@ static int get_wvr(struct kvm_vcpu *vcpu
+ static void reset_wvr(struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd)
+ {
+- vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg] = rd->val;
++ vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm] = rd->val;
+ }
+
+ static bool trap_wcr(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *rd)
+ {
+- u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg];
++ u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm];
+
+ if (p->is_write)
+ reg_to_dbg(vcpu, p, dbg_reg);
+ else
+ dbg_to_reg(vcpu, p, dbg_reg);
+
+- trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg);
++ trace_trap_reg(__func__, rd->CRm, p->is_write, *dbg_reg);
+
+ return true;
+ }
+@@ -607,7 +607,7 @@ static bool trap_wcr(struct kvm_vcpu *vc
+ static int set_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+ const struct kvm_one_reg *reg, void __user *uaddr)
+ {
+- __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg];
++ __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm];
+
+ if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0)
+ return -EFAULT;
+@@ -617,7 +617,7 @@ static int set_wcr(struct kvm_vcpu *vcpu
+ static int get_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+ const struct kvm_one_reg *reg, void __user *uaddr)
+ {
+- __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg];
++ __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm];
+
+ if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
+ return -EFAULT;
+@@ -627,7 +627,7 @@ static int get_wcr(struct kvm_vcpu *vcpu
+ static void reset_wcr(struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd)
+ {
+- vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg] = rd->val;
++ vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm] = rd->val;
+ }
+
+ static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
--- /dev/null
+From foo@baz Tue Jun 8 05:37:28 PM CEST 2021
+From: Sean Christopherson <seanjc@google.com>
+Date: Wed, 21 Apr 2021 19:21:22 -0700
+Subject: KVM: SVM: Truncate GPR value for DR and CR accesses in !64-bit mode
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 0884335a2e653b8a045083aa1d57ce74269ac81d upstream.
+
+Drop bits 63:32 on loads/stores to/from DRs and CRs when the vCPU is not
+in 64-bit mode. The APM states bits 63:32 are dropped for both DRs and
+CRs:
+
+ In 64-bit mode, the operand size is fixed at 64 bits without the need
+ for a REX prefix. In non-64-bit mode, the operand size is fixed at 32
+ bits and the upper 32 bits of the destination are forced to 0.
+
+Fixes: 7ff76d58a9dc ("KVM: SVM: enhance MOV CR intercept handler")
+Fixes: cae3797a4639 ("KVM: SVM: enhance mov DR intercept handler")
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20210422022128.3464144-4-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/svm.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -2362,7 +2362,7 @@ static int cr_interception(struct vcpu_s
+ err = 0;
+ if (cr >= 16) { /* mov to cr */
+ cr -= 16;
+- val = kvm_register_read(&svm->vcpu, reg);
++ val = kvm_register_readl(&svm->vcpu, reg);
+ trace_kvm_cr_write(cr, val);
+ switch (cr) {
+ case 0:
+@@ -2408,7 +2408,7 @@ static int cr_interception(struct vcpu_s
+ kvm_queue_exception(&svm->vcpu, UD_VECTOR);
+ return 1;
+ }
+- kvm_register_write(&svm->vcpu, reg, val);
++ kvm_register_writel(&svm->vcpu, reg, val);
+ trace_kvm_cr_read(cr, val);
+ }
+ return kvm_complete_insn_gp(&svm->vcpu, err);
+@@ -2439,13 +2439,13 @@ static int dr_interception(struct vcpu_s
+ if (dr >= 16) { /* mov to DRn */
+ if (!kvm_require_dr(&svm->vcpu, dr - 16))
+ return 1;
+- val = kvm_register_read(&svm->vcpu, reg);
++ val = kvm_register_readl(&svm->vcpu, reg);
+ kvm_set_dr(&svm->vcpu, dr - 16, val);
+ } else {
+ if (!kvm_require_dr(&svm->vcpu, dr))
+ return 1;
+ kvm_get_dr(&svm->vcpu, dr, &val);
+- kvm_register_write(&svm->vcpu, reg, val);
++ kvm_register_writel(&svm->vcpu, reg, val);
+ }
+
+ return kvm_skip_emulated_instruction(&svm->vcpu);
--- /dev/null
+From 89b158635ad79574bde8e94d45dad33f8cf09549 Mon Sep 17 00:00:00 2001
+From: Gao Xiang <hsiangkao@redhat.com>
+Date: Tue, 15 Dec 2020 20:44:03 -0800
+Subject: lib/lz4: explicitly support in-place decompression
+
+From: Gao Xiang <hsiangkao@redhat.com>
+
+commit 89b158635ad79574bde8e94d45dad33f8cf09549 upstream.
+
+LZ4 final literal copy could be overlapped when doing
+in-place decompression, so it's unsafe to just use memcpy()
+on an optimized memcpy approach but memmove() instead.
+
+Upstream LZ4 has updated this years ago [1] (and the impact
+is non-sensible [2] plus only a few bytes remain), this commit
+just synchronizes LZ4 upstream code to the kernel side as well.
+
+It can be observed as EROFS in-place decompression failure
+on specific files when X86_FEATURE_ERMS is unsupported,
+memcpy() optimization of commit 59daa706fbec ("x86, mem:
+Optimize memcpy by avoiding memory false dependece") will
+be enabled then.
+
+Currently most modern x86-CPUs support ERMS, these CPUs just
+use "rep movsb" approach so no problem at all. However, it can
+still be verified with forcely disabling ERMS feature...
+
+arch/x86/lib/memcpy_64.S:
+ ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \
+- "jmp memcpy_erms", X86_FEATURE_ERMS
++ "jmp memcpy_orig", X86_FEATURE_ERMS
+
+We didn't observe any strange on arm64/arm/x86 platform before
+since most memcpy() would behave in an increasing address order
+("copy upwards" [3]) and it's the correct order of in-place
+decompression but it really needs an update to memmove() for sure
+considering it's an undefined behavior according to the standard
+and some unique optimization already exists in the kernel.
+
+[1] https://github.com/lz4/lz4/commit/33cb8518ac385835cc17be9a770b27b40cd0e15b
+[2] https://github.com/lz4/lz4/pull/717#issuecomment-497818921
+[3] https://sourceware.org/bugzilla/show_bug.cgi?id=12518
+
+Link: https://lkml.kernel.org/r/20201122030749.2698994-1-hsiangkao@redhat.com
+Signed-off-by: Gao Xiang <hsiangkao@redhat.com>
+Reviewed-by: Nick Terrell <terrelln@fb.com>
+Cc: Yann Collet <yann.collet.73@gmail.com>
+Cc: Miao Xie <miaoxie@huawei.com>
+Cc: Chao Yu <yuchao0@huawei.com>
+Cc: Li Guifu <bluce.liguifu@huawei.com>
+Cc: Guo Xuenan <guoxuenan@huawei.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ lib/lz4/lz4_decompress.c | 6 +++++-
+ lib/lz4/lz4defs.h | 1 +
+ 2 files changed, 6 insertions(+), 1 deletion(-)
+
+--- a/lib/lz4/lz4_decompress.c
++++ b/lib/lz4/lz4_decompress.c
+@@ -263,7 +263,11 @@ static FORCE_INLINE int LZ4_decompress_g
+ }
+ }
+
+- LZ4_memcpy(op, ip, length);
++ /*
++ * supports overlapping memory regions; only matters
++ * for in-place decompression scenarios
++ */
++ LZ4_memmove(op, ip, length);
+ ip += length;
+ op += length;
+
+--- a/lib/lz4/lz4defs.h
++++ b/lib/lz4/lz4defs.h
+@@ -146,6 +146,7 @@ static FORCE_INLINE void LZ4_writeLE16(v
+ * environments. This is needed when decompressing the Linux Kernel, for example.
+ */
+ #define LZ4_memcpy(dst, src, size) __builtin_memcpy(dst, src, size)
++#define LZ4_memmove(dst, src, size) __builtin_memmove(dst, src, size)
+
+ static FORCE_INLINE void LZ4_copy8(void *dst, const void *src)
+ {
btrfs-fix-deadlock-when-cloning-inline-extents-and-low-on-available-space.patch
mm-hugetlb-fix-simple-resv_huge_pages-underflow-on-u.patch
drm-msm-dpu-always-use-mdp-device-to-scale-bandwidth.patch
+btrfs-fix-unmountable-seed-device-after-fstrim.patch
+kvm-svm-truncate-gpr-value-for-dr-and-cr-accesses-in-64-bit-mode.patch
+kvm-arm64-fix-debug-register-indexing.patch
+x86-kvm-teardown-pv-features-on-boot-cpu-as-well.patch
+x86-kvm-disable-kvmclock-on-all-cpus-on-shutdown.patch
+x86-kvm-disable-all-pv-features-on-crash.patch
+lib-lz4-explicitly-support-in-place-decompression.patch
--- /dev/null
+From foo@baz Tue Jun 8 05:44:24 PM CEST 2021
+From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+Date: Mon, 31 May 2021 16:05:26 +0200
+Subject: x86/kvm: Disable all PV features on crash
+To: stable@vger.kernel.org
+Cc: Andrea Righi <andrea.righi@canonical.com>, Paolo Bonzini <pbonzini@redhat.com>, Vitaly Kuznetsov <vkuznets@redhat.com>, Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+Message-ID: <20210531140526.42932-4-krzysztof.kozlowski@canonical.com>
+
+From: Vitaly Kuznetsov <vkuznets@redhat.com>
+
+commit 3d6b84132d2a57b5a74100f6923a8feb679ac2ce upstream.
+
+Crash shutdown handler only disables kvmclock and steal time, other PV
+features remain active so we risk corrupting memory or getting some
+side-effects in kdump kernel. Move crash handler to kvm.c and unify
+with CPU offline.
+
+Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
+Message-Id: <20210414123544.1060604-5-vkuznets@redhat.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/kvm_para.h | 6 -----
+ arch/x86/kernel/kvm.c | 44 +++++++++++++++++++++++++++++-----------
+ arch/x86/kernel/kvmclock.c | 21 -------------------
+ 3 files changed, 32 insertions(+), 39 deletions(-)
+
+--- a/arch/x86/include/asm/kvm_para.h
++++ b/arch/x86/include/asm/kvm_para.h
+@@ -92,7 +92,6 @@ unsigned int kvm_arch_para_hints(void);
+ void kvm_async_pf_task_wait_schedule(u32 token);
+ void kvm_async_pf_task_wake(u32 token);
+ u32 kvm_read_and_reset_apf_flags(void);
+-void kvm_disable_steal_time(void);
+ bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token);
+
+ DECLARE_STATIC_KEY_FALSE(kvm_async_pf_enabled);
+@@ -137,11 +136,6 @@ static inline u32 kvm_read_and_reset_apf
+ return 0;
+ }
+
+-static inline void kvm_disable_steal_time(void)
+-{
+- return;
+-}
+-
+ static __always_inline bool kvm_handle_async_pf(struct pt_regs *regs, u32 token)
+ {
+ return false;
+--- a/arch/x86/kernel/kvm.c
++++ b/arch/x86/kernel/kvm.c
+@@ -38,6 +38,7 @@
+ #include <asm/tlb.h>
+ #include <asm/cpuidle_haltpoll.h>
+ #include <asm/ptrace.h>
++#include <asm/reboot.h>
+ #include <asm/svm.h>
+
+ DEFINE_STATIC_KEY_FALSE(kvm_async_pf_enabled);
+@@ -375,6 +376,14 @@ static void kvm_pv_disable_apf(void)
+ pr_info("Unregister pv shared memory for cpu %d\n", smp_processor_id());
+ }
+
++static void kvm_disable_steal_time(void)
++{
++ if (!has_steal_clock)
++ return;
++
++ wrmsr(MSR_KVM_STEAL_TIME, 0, 0);
++}
++
+ static void kvm_pv_guest_cpu_reboot(void *unused)
+ {
+ /*
+@@ -417,14 +426,6 @@ static u64 kvm_steal_clock(int cpu)
+ return steal;
+ }
+
+-void kvm_disable_steal_time(void)
+-{
+- if (!has_steal_clock)
+- return;
+-
+- wrmsr(MSR_KVM_STEAL_TIME, 0, 0);
+-}
+-
+ static inline void __set_percpu_decrypted(void *ptr, unsigned long size)
+ {
+ early_set_memory_decrypted((unsigned long) ptr, size);
+@@ -461,13 +462,14 @@ static bool pv_tlb_flush_supported(void)
+
+ static DEFINE_PER_CPU(cpumask_var_t, __pv_cpu_mask);
+
+-static void kvm_guest_cpu_offline(void)
++static void kvm_guest_cpu_offline(bool shutdown)
+ {
+ kvm_disable_steal_time();
+ if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
+ wrmsrl(MSR_KVM_PV_EOI_EN, 0);
+ kvm_pv_disable_apf();
+- apf_task_wake_all();
++ if (!shutdown)
++ apf_task_wake_all();
+ kvmclock_disable();
+ }
+
+@@ -613,7 +615,7 @@ static int kvm_cpu_down_prepare(unsigned
+ unsigned long flags;
+
+ local_irq_save(flags);
+- kvm_guest_cpu_offline();
++ kvm_guest_cpu_offline(false);
+ local_irq_restore(flags);
+ return 0;
+ }
+@@ -622,7 +624,7 @@ static int kvm_cpu_down_prepare(unsigned
+
+ static int kvm_suspend(void)
+ {
+- kvm_guest_cpu_offline();
++ kvm_guest_cpu_offline(false);
+
+ return 0;
+ }
+@@ -637,6 +639,20 @@ static struct syscore_ops kvm_syscore_op
+ .resume = kvm_resume,
+ };
+
++/*
++ * After a PV feature is registered, the host will keep writing to the
++ * registered memory location. If the guest happens to shutdown, this memory
++ * won't be valid. In cases like kexec, in which you install a new kernel, this
++ * means a random memory location will be kept being written.
++ */
++#ifdef CONFIG_KEXEC_CORE
++static void kvm_crash_shutdown(struct pt_regs *regs)
++{
++ kvm_guest_cpu_offline(true);
++ native_machine_crash_shutdown(regs);
++}
++#endif
++
+ static void kvm_flush_tlb_others(const struct cpumask *cpumask,
+ const struct flush_tlb_info *info)
+ {
+@@ -705,6 +721,10 @@ static void __init kvm_guest_init(void)
+ kvm_guest_cpu_init();
+ #endif
+
++#ifdef CONFIG_KEXEC_CORE
++ machine_ops.crash_shutdown = kvm_crash_shutdown;
++#endif
++
+ register_syscore_ops(&kvm_syscore_ops);
+
+ /*
+--- a/arch/x86/kernel/kvmclock.c
++++ b/arch/x86/kernel/kvmclock.c
+@@ -20,7 +20,6 @@
+ #include <asm/hypervisor.h>
+ #include <asm/mem_encrypt.h>
+ #include <asm/x86_init.h>
+-#include <asm/reboot.h>
+ #include <asm/kvmclock.h>
+
+ static int kvmclock __initdata = 1;
+@@ -204,23 +203,6 @@ static void kvm_setup_secondary_clock(vo
+ }
+ #endif
+
+-/*
+- * After the clock is registered, the host will keep writing to the
+- * registered memory location. If the guest happens to shutdown, this memory
+- * won't be valid. In cases like kexec, in which you install a new kernel, this
+- * means a random memory location will be kept being written. So before any
+- * kind of shutdown from our side, we unregister the clock by writing anything
+- * that does not have the 'enable' bit set in the msr
+- */
+-#ifdef CONFIG_KEXEC_CORE
+-static void kvm_crash_shutdown(struct pt_regs *regs)
+-{
+- native_write_msr(msr_kvm_system_time, 0, 0);
+- kvm_disable_steal_time();
+- native_machine_crash_shutdown(regs);
+-}
+-#endif
+-
+ void kvmclock_disable(void)
+ {
+ native_write_msr(msr_kvm_system_time, 0, 0);
+@@ -350,9 +332,6 @@ void __init kvmclock_init(void)
+ #endif
+ x86_platform.save_sched_clock_state = kvm_save_sched_clock_state;
+ x86_platform.restore_sched_clock_state = kvm_restore_sched_clock_state;
+-#ifdef CONFIG_KEXEC_CORE
+- machine_ops.crash_shutdown = kvm_crash_shutdown;
+-#endif
+ kvm_get_preset_lpj();
+
+ /*
--- /dev/null
+From foo@baz Tue Jun 8 05:44:24 PM CEST 2021
+From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+Date: Mon, 31 May 2021 16:05:25 +0200
+Subject: x86/kvm: Disable kvmclock on all CPUs on shutdown
+To: stable@vger.kernel.org
+Cc: Andrea Righi <andrea.righi@canonical.com>, Paolo Bonzini <pbonzini@redhat.com>, Vitaly Kuznetsov <vkuznets@redhat.com>, Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+Message-ID: <20210531140526.42932-3-krzysztof.kozlowski@canonical.com>
+
+From: Vitaly Kuznetsov <vkuznets@redhat.com>
+
+commit c02027b5742b5aa804ef08a4a9db433295533046 upstream.
+
+Currenly, we disable kvmclock from machine_shutdown() hook and this
+only happens for boot CPU. We need to disable it for all CPUs to
+guard against memory corruption e.g. on restore from hibernate.
+
+Note, writing '0' to kvmclock MSR doesn't clear memory location, it
+just prevents hypervisor from updating the location so for the short
+while after write and while CPU is still alive, the clock remains usable
+and correct so we don't need to switch to some other clocksource.
+
+Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
+Message-Id: <20210414123544.1060604-4-vkuznets@redhat.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Andrea Righi <andrea.righi@canonical.com>
+Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/kvm_para.h | 4 ++--
+ arch/x86/kernel/kvm.c | 1 +
+ arch/x86/kernel/kvmclock.c | 5 +----
+ 3 files changed, 4 insertions(+), 6 deletions(-)
+
+--- a/arch/x86/include/asm/kvm_para.h
++++ b/arch/x86/include/asm/kvm_para.h
+@@ -7,8 +7,6 @@
+ #include <linux/interrupt.h>
+ #include <uapi/asm/kvm_para.h>
+
+-extern void kvmclock_init(void);
+-
+ #ifdef CONFIG_KVM_GUEST
+ bool kvm_check_and_clear_guest_paused(void);
+ #else
+@@ -86,6 +84,8 @@ static inline long kvm_hypercall4(unsign
+ }
+
+ #ifdef CONFIG_KVM_GUEST
++void kvmclock_init(void);
++void kvmclock_disable(void);
+ bool kvm_para_available(void);
+ unsigned int kvm_arch_para_features(void);
+ unsigned int kvm_arch_para_hints(void);
+--- a/arch/x86/kernel/kvm.c
++++ b/arch/x86/kernel/kvm.c
+@@ -468,6 +468,7 @@ static void kvm_guest_cpu_offline(void)
+ wrmsrl(MSR_KVM_PV_EOI_EN, 0);
+ kvm_pv_disable_apf();
+ apf_task_wake_all();
++ kvmclock_disable();
+ }
+
+ static int kvm_cpu_online(unsigned int cpu)
+--- a/arch/x86/kernel/kvmclock.c
++++ b/arch/x86/kernel/kvmclock.c
+@@ -221,11 +221,9 @@ static void kvm_crash_shutdown(struct pt
+ }
+ #endif
+
+-static void kvm_shutdown(void)
++void kvmclock_disable(void)
+ {
+ native_write_msr(msr_kvm_system_time, 0, 0);
+- kvm_disable_steal_time();
+- native_machine_shutdown();
+ }
+
+ static void __init kvmclock_init_mem(void)
+@@ -352,7 +350,6 @@ void __init kvmclock_init(void)
+ #endif
+ x86_platform.save_sched_clock_state = kvm_save_sched_clock_state;
+ x86_platform.restore_sched_clock_state = kvm_restore_sched_clock_state;
+- machine_ops.shutdown = kvm_shutdown;
+ #ifdef CONFIG_KEXEC_CORE
+ machine_ops.crash_shutdown = kvm_crash_shutdown;
+ #endif
--- /dev/null
+From foo@baz Tue Jun 8 05:44:24 PM CEST 2021
+From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+Date: Mon, 31 May 2021 16:05:24 +0200
+Subject: x86/kvm: Teardown PV features on boot CPU as well
+To: stable@vger.kernel.org
+Cc: Andrea Righi <andrea.righi@canonical.com>, Paolo Bonzini <pbonzini@redhat.com>, Vitaly Kuznetsov <vkuznets@redhat.com>, Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+Message-ID: <20210531140526.42932-2-krzysztof.kozlowski@canonical.com>
+
+From: Vitaly Kuznetsov <vkuznets@redhat.com>
+
+commit 8b79feffeca28c5459458fe78676b081e87c93a4 upstream.
+
+Various PV features (Async PF, PV EOI, steal time) work through memory
+shared with hypervisor and when we restore from hibernation we must
+properly teardown all these features to make sure hypervisor doesn't
+write to stale locations after we jump to the previously hibernated kernel
+(which can try to place anything there). For secondary CPUs the job is
+already done by kvm_cpu_down_prepare(), register syscore ops to do
+the same for boot CPU.
+
+Krzysztof:
+This fixes memory corruption visible after second resume from
+hibernation:
+
+ BUG: Bad page state in process dbus-daemon pfn:18b01
+ page:ffffea000062c040 refcount:0 mapcount:0 mapping:0000000000000000 index:0x1 compound_mapcount: -30591
+ flags: 0xfffffc0078141(locked|error|workingset|writeback|head|mappedtodisk|reclaim)
+ raw: 000fffffc0078141 dead0000000002d0 dead000000000100 0000000000000000
+ raw: 0000000000000001 0000000000000000 00000000ffffffff 0000000000000000
+ page dumped because: PAGE_FLAGS_CHECK_AT_PREP flag set
+ bad because of flags: 0x78141(locked|error|workingset|writeback|head|mappedtodisk|reclaim)
+
+Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
+Message-Id: <20210414123544.1060604-3-vkuznets@redhat.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Andrea Righi <andrea.righi@canonical.com>
+[krzysztof: Extend the commit message, adjust for v5.10 context]
+Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/kvm.c | 57 +++++++++++++++++++++++++++++++++++---------------
+ 1 file changed, 41 insertions(+), 16 deletions(-)
+
+--- a/arch/x86/kernel/kvm.c
++++ b/arch/x86/kernel/kvm.c
+@@ -26,6 +26,7 @@
+ #include <linux/kprobes.h>
+ #include <linux/nmi.h>
+ #include <linux/swait.h>
++#include <linux/syscore_ops.h>
+ #include <asm/timer.h>
+ #include <asm/cpu.h>
+ #include <asm/traps.h>
+@@ -460,6 +461,25 @@ static bool pv_tlb_flush_supported(void)
+
+ static DEFINE_PER_CPU(cpumask_var_t, __pv_cpu_mask);
+
++static void kvm_guest_cpu_offline(void)
++{
++ kvm_disable_steal_time();
++ if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
++ wrmsrl(MSR_KVM_PV_EOI_EN, 0);
++ kvm_pv_disable_apf();
++ apf_task_wake_all();
++}
++
++static int kvm_cpu_online(unsigned int cpu)
++{
++ unsigned long flags;
++
++ local_irq_save(flags);
++ kvm_guest_cpu_init();
++ local_irq_restore(flags);
++ return 0;
++}
++
+ #ifdef CONFIG_SMP
+
+ static bool pv_ipi_supported(void)
+@@ -587,31 +607,34 @@ static void __init kvm_smp_prepare_boot_
+ kvm_spinlock_init();
+ }
+
+-static void kvm_guest_cpu_offline(void)
++static int kvm_cpu_down_prepare(unsigned int cpu)
+ {
+- kvm_disable_steal_time();
+- if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
+- wrmsrl(MSR_KVM_PV_EOI_EN, 0);
+- kvm_pv_disable_apf();
+- apf_task_wake_all();
+-}
++ unsigned long flags;
+
+-static int kvm_cpu_online(unsigned int cpu)
+-{
+- local_irq_disable();
+- kvm_guest_cpu_init();
+- local_irq_enable();
++ local_irq_save(flags);
++ kvm_guest_cpu_offline();
++ local_irq_restore(flags);
+ return 0;
+ }
+
+-static int kvm_cpu_down_prepare(unsigned int cpu)
++#endif
++
++static int kvm_suspend(void)
+ {
+- local_irq_disable();
+ kvm_guest_cpu_offline();
+- local_irq_enable();
++
+ return 0;
+ }
+-#endif
++
++static void kvm_resume(void)
++{
++ kvm_cpu_online(raw_smp_processor_id());
++}
++
++static struct syscore_ops kvm_syscore_ops = {
++ .suspend = kvm_suspend,
++ .resume = kvm_resume,
++};
+
+ static void kvm_flush_tlb_others(const struct cpumask *cpumask,
+ const struct flush_tlb_info *info)
+@@ -681,6 +704,8 @@ static void __init kvm_guest_init(void)
+ kvm_guest_cpu_init();
+ #endif
+
++ register_syscore_ops(&kvm_syscore_ops);
++
+ /*
+ * Hard lockup detection is enabled by default. Disable it, as guests
+ * can get false positives too easily, for example if the host is