]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.10-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 8 Jun 2021 16:08:14 +0000 (18:08 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 8 Jun 2021 16:08:14 +0000 (18:08 +0200)
added patches:
btrfs-fix-unmountable-seed-device-after-fstrim.patch
kvm-arm64-fix-debug-register-indexing.patch
kvm-svm-truncate-gpr-value-for-dr-and-cr-accesses-in-64-bit-mode.patch
lib-lz4-explicitly-support-in-place-decompression.patch
x86-kvm-disable-all-pv-features-on-crash.patch
x86-kvm-disable-kvmclock-on-all-cpus-on-shutdown.patch
x86-kvm-teardown-pv-features-on-boot-cpu-as-well.patch

queue-5.10/btrfs-fix-unmountable-seed-device-after-fstrim.patch [new file with mode: 0644]
queue-5.10/kvm-arm64-fix-debug-register-indexing.patch [new file with mode: 0644]
queue-5.10/kvm-svm-truncate-gpr-value-for-dr-and-cr-accesses-in-64-bit-mode.patch [new file with mode: 0644]
queue-5.10/lib-lz4-explicitly-support-in-place-decompression.patch [new file with mode: 0644]
queue-5.10/series
queue-5.10/x86-kvm-disable-all-pv-features-on-crash.patch [new file with mode: 0644]
queue-5.10/x86-kvm-disable-kvmclock-on-all-cpus-on-shutdown.patch [new file with mode: 0644]
queue-5.10/x86-kvm-teardown-pv-features-on-boot-cpu-as-well.patch [new file with mode: 0644]

diff --git a/queue-5.10/btrfs-fix-unmountable-seed-device-after-fstrim.patch b/queue-5.10/btrfs-fix-unmountable-seed-device-after-fstrim.patch
new file mode 100644 (file)
index 0000000..c6e8057
--- /dev/null
@@ -0,0 +1,111 @@
+From foo@baz Tue Jun  8 05:36:29 PM CEST 2021
+From: Anand Jain <anand.jain@oracle.com>
+Date: Fri, 30 Apr 2021 19:59:51 +0800
+Subject: btrfs: fix unmountable seed device after fstrim
+
+From: Anand Jain <anand.jain@oracle.com>
+
+commit 5e753a817b2d5991dfe8a801b7b1e8e79a1c5a20 upstream.
+
+The following test case reproduces an issue of wrongly freeing in-use
+blocks on the readonly seed device when fstrim is called on the rw sprout
+device. As shown below.
+
+Create a seed device and add a sprout device to it:
+
+  $ mkfs.btrfs -fq -dsingle -msingle /dev/loop0
+  $ btrfstune -S 1 /dev/loop0
+  $ mount /dev/loop0 /btrfs
+  $ btrfs dev add -f /dev/loop1 /btrfs
+  BTRFS info (device loop0): relocating block group 290455552 flags system
+  BTRFS info (device loop0): relocating block group 1048576 flags system
+  BTRFS info (device loop0): disk added /dev/loop1
+  $ umount /btrfs
+
+Mount the sprout device and run fstrim:
+
+  $ mount /dev/loop1 /btrfs
+  $ fstrim /btrfs
+  $ umount /btrfs
+
+Now try to mount the seed device, and it fails:
+
+  $ mount /dev/loop0 /btrfs
+  mount: /btrfs: wrong fs type, bad option, bad superblock on /dev/loop0, missing codepage or helper program, or other error.
+
+Block 5292032 is missing on the readonly seed device:
+
+ $ dmesg -kt | tail
+ <snip>
+ BTRFS error (device loop0): bad tree block start, want 5292032 have 0
+ BTRFS warning (device loop0): couldn't read-tree root
+ BTRFS error (device loop0): open_ctree failed
+
+>From the dump-tree of the seed device (taken before the fstrim). Block
+5292032 belonged to the block group starting at 5242880:
+
+  $ btrfs inspect dump-tree -e /dev/loop0 | grep -A1 BLOCK_GROUP
+  <snip>
+  item 3 key (5242880 BLOCK_GROUP_ITEM 8388608) itemoff 16169 itemsize 24
+       block group used 114688 chunk_objectid 256 flags METADATA
+  <snip>
+
+>From the dump-tree of the sprout device (taken before the fstrim).
+fstrim used block-group 5242880 to find the related free space to free:
+
+  $ btrfs inspect dump-tree -e /dev/loop1 | grep -A1 BLOCK_GROUP
+  <snip>
+  item 1 key (5242880 BLOCK_GROUP_ITEM 8388608) itemoff 16226 itemsize 24
+       block group used 32768 chunk_objectid 256 flags METADATA
+  <snip>
+
+BPF kernel tracing the fstrim command finds the missing block 5292032
+within the range of the discarded blocks as below:
+
+  kprobe:btrfs_discard_extent {
+       printf("freeing start %llu end %llu num_bytes %llu:\n",
+               arg1, arg1+arg2, arg2);
+  }
+
+  freeing start 5259264 end 5406720 num_bytes 147456
+  <snip>
+
+Fix this by avoiding the discard command to the readonly seed device.
+
+Reported-by: Chris Murphy <lists@colorremedies.com>
+CC: stable@vger.kernel.org # 4.4+
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Anand Jain <anand.jain@oracle.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/extent-tree.c |   10 +++++++---
+ 1 file changed, 7 insertions(+), 3 deletions(-)
+
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -1297,16 +1297,20 @@ int btrfs_discard_extent(struct btrfs_fs
+               for (i = 0; i < bbio->num_stripes; i++, stripe++) {
+                       u64 bytes;
+                       struct request_queue *req_q;
++                      struct btrfs_device *device = stripe->dev;
+-                      if (!stripe->dev->bdev) {
++                      if (!device->bdev) {
+                               ASSERT(btrfs_test_opt(fs_info, DEGRADED));
+                               continue;
+                       }
+-                      req_q = bdev_get_queue(stripe->dev->bdev);
++                      req_q = bdev_get_queue(device->bdev);
+                       if (!blk_queue_discard(req_q))
+                               continue;
+-                      ret = btrfs_issue_discard(stripe->dev->bdev,
++                      if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
++                              continue;
++
++                      ret = btrfs_issue_discard(device->bdev,
+                                                 stripe->physical,
+                                                 stripe->length,
+                                                 &bytes);
diff --git a/queue-5.10/kvm-arm64-fix-debug-register-indexing.patch b/queue-5.10/kvm-arm64-fix-debug-register-indexing.patch
new file mode 100644 (file)
index 0000000..99bc59d
--- /dev/null
@@ -0,0 +1,208 @@
+From cb853ded1d25e5b026ce115dbcde69e3d7e2e831 Mon Sep 17 00:00:00 2001
+From: Marc Zyngier <maz@kernel.org>
+Date: Fri, 14 May 2021 09:05:41 +0100
+Subject: KVM: arm64: Fix debug register indexing
+
+From: Marc Zyngier <maz@kernel.org>
+
+commit cb853ded1d25e5b026ce115dbcde69e3d7e2e831 upstream.
+
+Commit 03fdfb2690099 ("KVM: arm64: Don't write junk to sysregs on
+reset") flipped the register number to 0 for all the debug registers
+in the sysreg table, hereby indicating that these registers live
+in a separate shadow structure.
+
+However, the author of this patch failed to realise that all the
+accessors are using that particular index instead of the register
+encoding, resulting in all the registers hitting index 0. Not quite
+a valid implementation of the architecture...
+
+Address the issue by fixing all the accessors to use the CRm field
+of the encoding, which contains the debug register index.
+
+Fixes: 03fdfb2690099 ("KVM: arm64: Don't write junk to sysregs on reset")
+Reported-by: Ricardo Koller <ricarkol@google.com>
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/kvm/sys_regs.c |   42 +++++++++++++++++++++---------------------
+ 1 file changed, 21 insertions(+), 21 deletions(-)
+
+--- a/arch/arm64/kvm/sys_regs.c
++++ b/arch/arm64/kvm/sys_regs.c
+@@ -464,14 +464,14 @@ static bool trap_bvr(struct kvm_vcpu *vc
+                    struct sys_reg_params *p,
+                    const struct sys_reg_desc *rd)
+ {
+-      u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg];
++      u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm];
+       if (p->is_write)
+               reg_to_dbg(vcpu, p, dbg_reg);
+       else
+               dbg_to_reg(vcpu, p, dbg_reg);
+-      trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg);
++      trace_trap_reg(__func__, rd->CRm, p->is_write, *dbg_reg);
+       return true;
+ }
+@@ -479,7 +479,7 @@ static bool trap_bvr(struct kvm_vcpu *vc
+ static int set_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+               const struct kvm_one_reg *reg, void __user *uaddr)
+ {
+-      __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg];
++      __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm];
+       if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0)
+               return -EFAULT;
+@@ -489,7 +489,7 @@ static int set_bvr(struct kvm_vcpu *vcpu
+ static int get_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+       const struct kvm_one_reg *reg, void __user *uaddr)
+ {
+-      __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg];
++      __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm];
+       if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
+               return -EFAULT;
+@@ -499,21 +499,21 @@ static int get_bvr(struct kvm_vcpu *vcpu
+ static void reset_bvr(struct kvm_vcpu *vcpu,
+                     const struct sys_reg_desc *rd)
+ {
+-      vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg] = rd->val;
++      vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm] = rd->val;
+ }
+ static bool trap_bcr(struct kvm_vcpu *vcpu,
+                    struct sys_reg_params *p,
+                    const struct sys_reg_desc *rd)
+ {
+-      u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg];
++      u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm];
+       if (p->is_write)
+               reg_to_dbg(vcpu, p, dbg_reg);
+       else
+               dbg_to_reg(vcpu, p, dbg_reg);
+-      trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg);
++      trace_trap_reg(__func__, rd->CRm, p->is_write, *dbg_reg);
+       return true;
+ }
+@@ -521,7 +521,7 @@ static bool trap_bcr(struct kvm_vcpu *vc
+ static int set_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+               const struct kvm_one_reg *reg, void __user *uaddr)
+ {
+-      __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg];
++      __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm];
+       if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0)
+               return -EFAULT;
+@@ -532,7 +532,7 @@ static int set_bcr(struct kvm_vcpu *vcpu
+ static int get_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+       const struct kvm_one_reg *reg, void __user *uaddr)
+ {
+-      __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg];
++      __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm];
+       if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
+               return -EFAULT;
+@@ -542,22 +542,22 @@ static int get_bcr(struct kvm_vcpu *vcpu
+ static void reset_bcr(struct kvm_vcpu *vcpu,
+                     const struct sys_reg_desc *rd)
+ {
+-      vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg] = rd->val;
++      vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm] = rd->val;
+ }
+ static bool trap_wvr(struct kvm_vcpu *vcpu,
+                    struct sys_reg_params *p,
+                    const struct sys_reg_desc *rd)
+ {
+-      u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg];
++      u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm];
+       if (p->is_write)
+               reg_to_dbg(vcpu, p, dbg_reg);
+       else
+               dbg_to_reg(vcpu, p, dbg_reg);
+-      trace_trap_reg(__func__, rd->reg, p->is_write,
+-              vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg]);
++      trace_trap_reg(__func__, rd->CRm, p->is_write,
++              vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm]);
+       return true;
+ }
+@@ -565,7 +565,7 @@ static bool trap_wvr(struct kvm_vcpu *vc
+ static int set_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+               const struct kvm_one_reg *reg, void __user *uaddr)
+ {
+-      __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg];
++      __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm];
+       if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0)
+               return -EFAULT;
+@@ -575,7 +575,7 @@ static int set_wvr(struct kvm_vcpu *vcpu
+ static int get_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+       const struct kvm_one_reg *reg, void __user *uaddr)
+ {
+-      __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg];
++      __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm];
+       if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
+               return -EFAULT;
+@@ -585,21 +585,21 @@ static int get_wvr(struct kvm_vcpu *vcpu
+ static void reset_wvr(struct kvm_vcpu *vcpu,
+                     const struct sys_reg_desc *rd)
+ {
+-      vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg] = rd->val;
++      vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm] = rd->val;
+ }
+ static bool trap_wcr(struct kvm_vcpu *vcpu,
+                    struct sys_reg_params *p,
+                    const struct sys_reg_desc *rd)
+ {
+-      u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg];
++      u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm];
+       if (p->is_write)
+               reg_to_dbg(vcpu, p, dbg_reg);
+       else
+               dbg_to_reg(vcpu, p, dbg_reg);
+-      trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg);
++      trace_trap_reg(__func__, rd->CRm, p->is_write, *dbg_reg);
+       return true;
+ }
+@@ -607,7 +607,7 @@ static bool trap_wcr(struct kvm_vcpu *vc
+ static int set_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+               const struct kvm_one_reg *reg, void __user *uaddr)
+ {
+-      __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg];
++      __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm];
+       if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0)
+               return -EFAULT;
+@@ -617,7 +617,7 @@ static int set_wcr(struct kvm_vcpu *vcpu
+ static int get_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+       const struct kvm_one_reg *reg, void __user *uaddr)
+ {
+-      __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg];
++      __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm];
+       if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
+               return -EFAULT;
+@@ -627,7 +627,7 @@ static int get_wcr(struct kvm_vcpu *vcpu
+ static void reset_wcr(struct kvm_vcpu *vcpu,
+                     const struct sys_reg_desc *rd)
+ {
+-      vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg] = rd->val;
++      vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm] = rd->val;
+ }
+ static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
diff --git a/queue-5.10/kvm-svm-truncate-gpr-value-for-dr-and-cr-accesses-in-64-bit-mode.patch b/queue-5.10/kvm-svm-truncate-gpr-value-for-dr-and-cr-accesses-in-64-bit-mode.patch
new file mode 100644 (file)
index 0000000..a415e8c
--- /dev/null
@@ -0,0 +1,65 @@
+From foo@baz Tue Jun  8 05:37:28 PM CEST 2021
+From: Sean Christopherson <seanjc@google.com>
+Date: Wed, 21 Apr 2021 19:21:22 -0700
+Subject: KVM: SVM: Truncate GPR value for DR and CR accesses in !64-bit mode
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 0884335a2e653b8a045083aa1d57ce74269ac81d upstream.
+
+Drop bits 63:32 on loads/stores to/from DRs and CRs when the vCPU is not
+in 64-bit mode.  The APM states bits 63:32 are dropped for both DRs and
+CRs:
+
+  In 64-bit mode, the operand size is fixed at 64 bits without the need
+  for a REX prefix. In non-64-bit mode, the operand size is fixed at 32
+  bits and the upper 32 bits of the destination are forced to 0.
+
+Fixes: 7ff76d58a9dc ("KVM: SVM: enhance MOV CR intercept handler")
+Fixes: cae3797a4639 ("KVM: SVM: enhance mov DR intercept handler")
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20210422022128.3464144-4-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/svm.c |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -2362,7 +2362,7 @@ static int cr_interception(struct vcpu_s
+       err = 0;
+       if (cr >= 16) { /* mov to cr */
+               cr -= 16;
+-              val = kvm_register_read(&svm->vcpu, reg);
++              val = kvm_register_readl(&svm->vcpu, reg);
+               trace_kvm_cr_write(cr, val);
+               switch (cr) {
+               case 0:
+@@ -2408,7 +2408,7 @@ static int cr_interception(struct vcpu_s
+                       kvm_queue_exception(&svm->vcpu, UD_VECTOR);
+                       return 1;
+               }
+-              kvm_register_write(&svm->vcpu, reg, val);
++              kvm_register_writel(&svm->vcpu, reg, val);
+               trace_kvm_cr_read(cr, val);
+       }
+       return kvm_complete_insn_gp(&svm->vcpu, err);
+@@ -2439,13 +2439,13 @@ static int dr_interception(struct vcpu_s
+       if (dr >= 16) { /* mov to DRn */
+               if (!kvm_require_dr(&svm->vcpu, dr - 16))
+                       return 1;
+-              val = kvm_register_read(&svm->vcpu, reg);
++              val = kvm_register_readl(&svm->vcpu, reg);
+               kvm_set_dr(&svm->vcpu, dr - 16, val);
+       } else {
+               if (!kvm_require_dr(&svm->vcpu, dr))
+                       return 1;
+               kvm_get_dr(&svm->vcpu, dr, &val);
+-              kvm_register_write(&svm->vcpu, reg, val);
++              kvm_register_writel(&svm->vcpu, reg, val);
+       }
+       return kvm_skip_emulated_instruction(&svm->vcpu);
diff --git a/queue-5.10/lib-lz4-explicitly-support-in-place-decompression.patch b/queue-5.10/lib-lz4-explicitly-support-in-place-decompression.patch
new file mode 100644 (file)
index 0000000..a44f02f
--- /dev/null
@@ -0,0 +1,85 @@
+From 89b158635ad79574bde8e94d45dad33f8cf09549 Mon Sep 17 00:00:00 2001
+From: Gao Xiang <hsiangkao@redhat.com>
+Date: Tue, 15 Dec 2020 20:44:03 -0800
+Subject: lib/lz4: explicitly support in-place decompression
+
+From: Gao Xiang <hsiangkao@redhat.com>
+
+commit 89b158635ad79574bde8e94d45dad33f8cf09549 upstream.
+
+LZ4 final literal copy could be overlapped when doing
+in-place decompression, so it's unsafe to just use memcpy()
+on an optimized memcpy approach but memmove() instead.
+
+Upstream LZ4 has updated this years ago [1] (and the impact
+is non-sensible [2] plus only a few bytes remain), this commit
+just synchronizes LZ4 upstream code to the kernel side as well.
+
+It can be observed as EROFS in-place decompression failure
+on specific files when X86_FEATURE_ERMS is unsupported,
+memcpy() optimization of commit 59daa706fbec ("x86, mem:
+Optimize memcpy by avoiding memory false dependece") will
+be enabled then.
+
+Currently most modern x86-CPUs support ERMS, these CPUs just
+use "rep movsb" approach so no problem at all. However, it can
+still be verified with forcely disabling ERMS feature...
+
+arch/x86/lib/memcpy_64.S:
+        ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \
+-                     "jmp memcpy_erms", X86_FEATURE_ERMS
++                     "jmp memcpy_orig", X86_FEATURE_ERMS
+
+We didn't observe any strange on arm64/arm/x86 platform before
+since most memcpy() would behave in an increasing address order
+("copy upwards" [3]) and it's the correct order of in-place
+decompression but it really needs an update to memmove() for sure
+considering it's an undefined behavior according to the standard
+and some unique optimization already exists in the kernel.
+
+[1] https://github.com/lz4/lz4/commit/33cb8518ac385835cc17be9a770b27b40cd0e15b
+[2] https://github.com/lz4/lz4/pull/717#issuecomment-497818921
+[3] https://sourceware.org/bugzilla/show_bug.cgi?id=12518
+
+Link: https://lkml.kernel.org/r/20201122030749.2698994-1-hsiangkao@redhat.com
+Signed-off-by: Gao Xiang <hsiangkao@redhat.com>
+Reviewed-by: Nick Terrell <terrelln@fb.com>
+Cc: Yann Collet <yann.collet.73@gmail.com>
+Cc: Miao Xie <miaoxie@huawei.com>
+Cc: Chao Yu <yuchao0@huawei.com>
+Cc: Li Guifu <bluce.liguifu@huawei.com>
+Cc: Guo Xuenan <guoxuenan@huawei.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ lib/lz4/lz4_decompress.c |    6 +++++-
+ lib/lz4/lz4defs.h        |    1 +
+ 2 files changed, 6 insertions(+), 1 deletion(-)
+
+--- a/lib/lz4/lz4_decompress.c
++++ b/lib/lz4/lz4_decompress.c
+@@ -263,7 +263,11 @@ static FORCE_INLINE int LZ4_decompress_g
+                               }
+                       }
+-                      LZ4_memcpy(op, ip, length);
++                      /*
++                       * supports overlapping memory regions; only matters
++                       * for in-place decompression scenarios
++                       */
++                      LZ4_memmove(op, ip, length);
+                       ip += length;
+                       op += length;
+--- a/lib/lz4/lz4defs.h
++++ b/lib/lz4/lz4defs.h
+@@ -146,6 +146,7 @@ static FORCE_INLINE void LZ4_writeLE16(v
+  * environments. This is needed when decompressing the Linux Kernel, for example.
+  */
+ #define LZ4_memcpy(dst, src, size) __builtin_memcpy(dst, src, size)
++#define LZ4_memmove(dst, src, size) __builtin_memmove(dst, src, size)
+ static FORCE_INLINE void LZ4_copy8(void *dst, const void *src)
+ {
index 1f72e9f841e7e6cb7b6bf99361ab5613cf47f61d..4ebffb0b3598b57ff3c1b72828f34fa6be61dda4 100644 (file)
@@ -124,3 +124,10 @@ btrfs-abort-in-rename_exchange-if-we-fail-to-insert-the-second-ref.patch
 btrfs-fix-deadlock-when-cloning-inline-extents-and-low-on-available-space.patch
 mm-hugetlb-fix-simple-resv_huge_pages-underflow-on-u.patch
 drm-msm-dpu-always-use-mdp-device-to-scale-bandwidth.patch
+btrfs-fix-unmountable-seed-device-after-fstrim.patch
+kvm-svm-truncate-gpr-value-for-dr-and-cr-accesses-in-64-bit-mode.patch
+kvm-arm64-fix-debug-register-indexing.patch
+x86-kvm-teardown-pv-features-on-boot-cpu-as-well.patch
+x86-kvm-disable-kvmclock-on-all-cpus-on-shutdown.patch
+x86-kvm-disable-all-pv-features-on-crash.patch
+lib-lz4-explicitly-support-in-place-decompression.patch
diff --git a/queue-5.10/x86-kvm-disable-all-pv-features-on-crash.patch b/queue-5.10/x86-kvm-disable-all-pv-features-on-crash.patch
new file mode 100644 (file)
index 0000000..191f011
--- /dev/null
@@ -0,0 +1,201 @@
+From foo@baz Tue Jun  8 05:44:24 PM CEST 2021
+From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+Date: Mon, 31 May 2021 16:05:26 +0200
+Subject: x86/kvm: Disable all PV features on crash
+To: stable@vger.kernel.org
+Cc: Andrea Righi <andrea.righi@canonical.com>, Paolo Bonzini <pbonzini@redhat.com>, Vitaly Kuznetsov <vkuznets@redhat.com>, Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+Message-ID: <20210531140526.42932-4-krzysztof.kozlowski@canonical.com>
+
+From: Vitaly Kuznetsov <vkuznets@redhat.com>
+
+commit 3d6b84132d2a57b5a74100f6923a8feb679ac2ce upstream.
+
+Crash shutdown handler only disables kvmclock and steal time, other PV
+features remain active so we risk corrupting memory or getting some
+side-effects in kdump kernel. Move crash handler to kvm.c and unify
+with CPU offline.
+
+Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
+Message-Id: <20210414123544.1060604-5-vkuznets@redhat.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/kvm_para.h |    6 -----
+ arch/x86/kernel/kvm.c           |   44 +++++++++++++++++++++++++++++-----------
+ arch/x86/kernel/kvmclock.c      |   21 -------------------
+ 3 files changed, 32 insertions(+), 39 deletions(-)
+
+--- a/arch/x86/include/asm/kvm_para.h
++++ b/arch/x86/include/asm/kvm_para.h
+@@ -92,7 +92,6 @@ unsigned int kvm_arch_para_hints(void);
+ void kvm_async_pf_task_wait_schedule(u32 token);
+ void kvm_async_pf_task_wake(u32 token);
+ u32 kvm_read_and_reset_apf_flags(void);
+-void kvm_disable_steal_time(void);
+ bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token);
+ DECLARE_STATIC_KEY_FALSE(kvm_async_pf_enabled);
+@@ -137,11 +136,6 @@ static inline u32 kvm_read_and_reset_apf
+       return 0;
+ }
+-static inline void kvm_disable_steal_time(void)
+-{
+-      return;
+-}
+-
+ static __always_inline bool kvm_handle_async_pf(struct pt_regs *regs, u32 token)
+ {
+       return false;
+--- a/arch/x86/kernel/kvm.c
++++ b/arch/x86/kernel/kvm.c
+@@ -38,6 +38,7 @@
+ #include <asm/tlb.h>
+ #include <asm/cpuidle_haltpoll.h>
+ #include <asm/ptrace.h>
++#include <asm/reboot.h>
+ #include <asm/svm.h>
+ DEFINE_STATIC_KEY_FALSE(kvm_async_pf_enabled);
+@@ -375,6 +376,14 @@ static void kvm_pv_disable_apf(void)
+       pr_info("Unregister pv shared memory for cpu %d\n", smp_processor_id());
+ }
++static void kvm_disable_steal_time(void)
++{
++      if (!has_steal_clock)
++              return;
++
++      wrmsr(MSR_KVM_STEAL_TIME, 0, 0);
++}
++
+ static void kvm_pv_guest_cpu_reboot(void *unused)
+ {
+       /*
+@@ -417,14 +426,6 @@ static u64 kvm_steal_clock(int cpu)
+       return steal;
+ }
+-void kvm_disable_steal_time(void)
+-{
+-      if (!has_steal_clock)
+-              return;
+-
+-      wrmsr(MSR_KVM_STEAL_TIME, 0, 0);
+-}
+-
+ static inline void __set_percpu_decrypted(void *ptr, unsigned long size)
+ {
+       early_set_memory_decrypted((unsigned long) ptr, size);
+@@ -461,13 +462,14 @@ static bool pv_tlb_flush_supported(void)
+ static DEFINE_PER_CPU(cpumask_var_t, __pv_cpu_mask);
+-static void kvm_guest_cpu_offline(void)
++static void kvm_guest_cpu_offline(bool shutdown)
+ {
+       kvm_disable_steal_time();
+       if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
+               wrmsrl(MSR_KVM_PV_EOI_EN, 0);
+       kvm_pv_disable_apf();
+-      apf_task_wake_all();
++      if (!shutdown)
++              apf_task_wake_all();
+       kvmclock_disable();
+ }
+@@ -613,7 +615,7 @@ static int kvm_cpu_down_prepare(unsigned
+       unsigned long flags;
+       local_irq_save(flags);
+-      kvm_guest_cpu_offline();
++      kvm_guest_cpu_offline(false);
+       local_irq_restore(flags);
+       return 0;
+ }
+@@ -622,7 +624,7 @@ static int kvm_cpu_down_prepare(unsigned
+ static int kvm_suspend(void)
+ {
+-      kvm_guest_cpu_offline();
++      kvm_guest_cpu_offline(false);
+       return 0;
+ }
+@@ -637,6 +639,20 @@ static struct syscore_ops kvm_syscore_op
+       .resume         = kvm_resume,
+ };
++/*
++ * After a PV feature is registered, the host will keep writing to the
++ * registered memory location. If the guest happens to shutdown, this memory
++ * won't be valid. In cases like kexec, in which you install a new kernel, this
++ * means a random memory location will be kept being written.
++ */
++#ifdef CONFIG_KEXEC_CORE
++static void kvm_crash_shutdown(struct pt_regs *regs)
++{
++      kvm_guest_cpu_offline(true);
++      native_machine_crash_shutdown(regs);
++}
++#endif
++
+ static void kvm_flush_tlb_others(const struct cpumask *cpumask,
+                       const struct flush_tlb_info *info)
+ {
+@@ -705,6 +721,10 @@ static void __init kvm_guest_init(void)
+       kvm_guest_cpu_init();
+ #endif
++#ifdef CONFIG_KEXEC_CORE
++      machine_ops.crash_shutdown = kvm_crash_shutdown;
++#endif
++
+       register_syscore_ops(&kvm_syscore_ops);
+       /*
+--- a/arch/x86/kernel/kvmclock.c
++++ b/arch/x86/kernel/kvmclock.c
+@@ -20,7 +20,6 @@
+ #include <asm/hypervisor.h>
+ #include <asm/mem_encrypt.h>
+ #include <asm/x86_init.h>
+-#include <asm/reboot.h>
+ #include <asm/kvmclock.h>
+ static int kvmclock __initdata = 1;
+@@ -204,23 +203,6 @@ static void kvm_setup_secondary_clock(vo
+ }
+ #endif
+-/*
+- * After the clock is registered, the host will keep writing to the
+- * registered memory location. If the guest happens to shutdown, this memory
+- * won't be valid. In cases like kexec, in which you install a new kernel, this
+- * means a random memory location will be kept being written. So before any
+- * kind of shutdown from our side, we unregister the clock by writing anything
+- * that does not have the 'enable' bit set in the msr
+- */
+-#ifdef CONFIG_KEXEC_CORE
+-static void kvm_crash_shutdown(struct pt_regs *regs)
+-{
+-      native_write_msr(msr_kvm_system_time, 0, 0);
+-      kvm_disable_steal_time();
+-      native_machine_crash_shutdown(regs);
+-}
+-#endif
+-
+ void kvmclock_disable(void)
+ {
+       native_write_msr(msr_kvm_system_time, 0, 0);
+@@ -350,9 +332,6 @@ void __init kvmclock_init(void)
+ #endif
+       x86_platform.save_sched_clock_state = kvm_save_sched_clock_state;
+       x86_platform.restore_sched_clock_state = kvm_restore_sched_clock_state;
+-#ifdef CONFIG_KEXEC_CORE
+-      machine_ops.crash_shutdown  = kvm_crash_shutdown;
+-#endif
+       kvm_get_preset_lpj();
+       /*
diff --git a/queue-5.10/x86-kvm-disable-kvmclock-on-all-cpus-on-shutdown.patch b/queue-5.10/x86-kvm-disable-kvmclock-on-all-cpus-on-shutdown.patch
new file mode 100644 (file)
index 0000000..88d13c8
--- /dev/null
@@ -0,0 +1,86 @@
+From foo@baz Tue Jun  8 05:44:24 PM CEST 2021
+From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+Date: Mon, 31 May 2021 16:05:25 +0200
+Subject: x86/kvm: Disable kvmclock on all CPUs on shutdown
+To: stable@vger.kernel.org
+Cc: Andrea Righi <andrea.righi@canonical.com>, Paolo Bonzini <pbonzini@redhat.com>, Vitaly Kuznetsov <vkuznets@redhat.com>, Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+Message-ID: <20210531140526.42932-3-krzysztof.kozlowski@canonical.com>
+
+From: Vitaly Kuznetsov <vkuznets@redhat.com>
+
+commit c02027b5742b5aa804ef08a4a9db433295533046 upstream.
+
+Currenly, we disable kvmclock from machine_shutdown() hook and this
+only happens for boot CPU. We need to disable it for all CPUs to
+guard against memory corruption e.g. on restore from hibernate.
+
+Note, writing '0' to kvmclock MSR doesn't clear memory location, it
+just prevents hypervisor from updating the location so for the short
+while after write and while CPU is still alive, the clock remains usable
+and correct so we don't need to switch to some other clocksource.
+
+Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
+Message-Id: <20210414123544.1060604-4-vkuznets@redhat.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Andrea Righi <andrea.righi@canonical.com>
+Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/kvm_para.h |    4 ++--
+ arch/x86/kernel/kvm.c           |    1 +
+ arch/x86/kernel/kvmclock.c      |    5 +----
+ 3 files changed, 4 insertions(+), 6 deletions(-)
+
+--- a/arch/x86/include/asm/kvm_para.h
++++ b/arch/x86/include/asm/kvm_para.h
+@@ -7,8 +7,6 @@
+ #include <linux/interrupt.h>
+ #include <uapi/asm/kvm_para.h>
+-extern void kvmclock_init(void);
+-
+ #ifdef CONFIG_KVM_GUEST
+ bool kvm_check_and_clear_guest_paused(void);
+ #else
+@@ -86,6 +84,8 @@ static inline long kvm_hypercall4(unsign
+ }
+ #ifdef CONFIG_KVM_GUEST
++void kvmclock_init(void);
++void kvmclock_disable(void);
+ bool kvm_para_available(void);
+ unsigned int kvm_arch_para_features(void);
+ unsigned int kvm_arch_para_hints(void);
+--- a/arch/x86/kernel/kvm.c
++++ b/arch/x86/kernel/kvm.c
+@@ -468,6 +468,7 @@ static void kvm_guest_cpu_offline(void)
+               wrmsrl(MSR_KVM_PV_EOI_EN, 0);
+       kvm_pv_disable_apf();
+       apf_task_wake_all();
++      kvmclock_disable();
+ }
+ static int kvm_cpu_online(unsigned int cpu)
+--- a/arch/x86/kernel/kvmclock.c
++++ b/arch/x86/kernel/kvmclock.c
+@@ -221,11 +221,9 @@ static void kvm_crash_shutdown(struct pt
+ }
+ #endif
+-static void kvm_shutdown(void)
++void kvmclock_disable(void)
+ {
+       native_write_msr(msr_kvm_system_time, 0, 0);
+-      kvm_disable_steal_time();
+-      native_machine_shutdown();
+ }
+ static void __init kvmclock_init_mem(void)
+@@ -352,7 +350,6 @@ void __init kvmclock_init(void)
+ #endif
+       x86_platform.save_sched_clock_state = kvm_save_sched_clock_state;
+       x86_platform.restore_sched_clock_state = kvm_restore_sched_clock_state;
+-      machine_ops.shutdown  = kvm_shutdown;
+ #ifdef CONFIG_KEXEC_CORE
+       machine_ops.crash_shutdown  = kvm_crash_shutdown;
+ #endif
diff --git a/queue-5.10/x86-kvm-teardown-pv-features-on-boot-cpu-as-well.patch b/queue-5.10/x86-kvm-teardown-pv-features-on-boot-cpu-as-well.patch
new file mode 100644 (file)
index 0000000..3daa41c
--- /dev/null
@@ -0,0 +1,139 @@
+From foo@baz Tue Jun  8 05:44:24 PM CEST 2021
+From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+Date: Mon, 31 May 2021 16:05:24 +0200
+Subject: x86/kvm: Teardown PV features on boot CPU as well
+To: stable@vger.kernel.org
+Cc: Andrea Righi <andrea.righi@canonical.com>, Paolo Bonzini <pbonzini@redhat.com>, Vitaly Kuznetsov <vkuznets@redhat.com>, Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+Message-ID: <20210531140526.42932-2-krzysztof.kozlowski@canonical.com>
+
+From: Vitaly Kuznetsov <vkuznets@redhat.com>
+
+commit 8b79feffeca28c5459458fe78676b081e87c93a4 upstream.
+
+Various PV features (Async PF, PV EOI, steal time) work through memory
+shared with hypervisor and when we restore from hibernation we must
+properly teardown all these features to make sure hypervisor doesn't
+write to stale locations after we jump to the previously hibernated kernel
+(which can try to place anything there). For secondary CPUs the job is
+already done by kvm_cpu_down_prepare(), register syscore ops to do
+the same for boot CPU.
+
+Krzysztof:
+This fixes memory corruption visible after second resume from
+hibernation:
+
+  BUG: Bad page state in process dbus-daemon  pfn:18b01
+  page:ffffea000062c040 refcount:0 mapcount:0 mapping:0000000000000000 index:0x1 compound_mapcount: -30591
+  flags: 0xfffffc0078141(locked|error|workingset|writeback|head|mappedtodisk|reclaim)
+  raw: 000fffffc0078141 dead0000000002d0 dead000000000100 0000000000000000
+  raw: 0000000000000001 0000000000000000 00000000ffffffff 0000000000000000
+  page dumped because: PAGE_FLAGS_CHECK_AT_PREP flag set
+  bad because of flags: 0x78141(locked|error|workingset|writeback|head|mappedtodisk|reclaim)
+
+Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
+Message-Id: <20210414123544.1060604-3-vkuznets@redhat.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Andrea Righi <andrea.righi@canonical.com>
+[krzysztof: Extend the commit message, adjust for v5.10 context]
+Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/kvm.c |   57 +++++++++++++++++++++++++++++++++++---------------
+ 1 file changed, 41 insertions(+), 16 deletions(-)
+
+--- a/arch/x86/kernel/kvm.c
++++ b/arch/x86/kernel/kvm.c
+@@ -26,6 +26,7 @@
+ #include <linux/kprobes.h>
+ #include <linux/nmi.h>
+ #include <linux/swait.h>
++#include <linux/syscore_ops.h>
+ #include <asm/timer.h>
+ #include <asm/cpu.h>
+ #include <asm/traps.h>
+@@ -460,6 +461,25 @@ static bool pv_tlb_flush_supported(void)
+ static DEFINE_PER_CPU(cpumask_var_t, __pv_cpu_mask);
++static void kvm_guest_cpu_offline(void)
++{
++      kvm_disable_steal_time();
++      if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
++              wrmsrl(MSR_KVM_PV_EOI_EN, 0);
++      kvm_pv_disable_apf();
++      apf_task_wake_all();
++}
++
++static int kvm_cpu_online(unsigned int cpu)
++{
++      unsigned long flags;
++
++      local_irq_save(flags);
++      kvm_guest_cpu_init();
++      local_irq_restore(flags);
++      return 0;
++}
++
+ #ifdef CONFIG_SMP
+ static bool pv_ipi_supported(void)
+@@ -587,31 +607,34 @@ static void __init kvm_smp_prepare_boot_
+       kvm_spinlock_init();
+ }
+-static void kvm_guest_cpu_offline(void)
++static int kvm_cpu_down_prepare(unsigned int cpu)
+ {
+-      kvm_disable_steal_time();
+-      if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
+-              wrmsrl(MSR_KVM_PV_EOI_EN, 0);
+-      kvm_pv_disable_apf();
+-      apf_task_wake_all();
+-}
++      unsigned long flags;
+-static int kvm_cpu_online(unsigned int cpu)
+-{
+-      local_irq_disable();
+-      kvm_guest_cpu_init();
+-      local_irq_enable();
++      local_irq_save(flags);
++      kvm_guest_cpu_offline();
++      local_irq_restore(flags);
+       return 0;
+ }
+-static int kvm_cpu_down_prepare(unsigned int cpu)
++#endif
++
++static int kvm_suspend(void)
+ {
+-      local_irq_disable();
+       kvm_guest_cpu_offline();
+-      local_irq_enable();
++
+       return 0;
+ }
+-#endif
++
++static void kvm_resume(void)
++{
++      kvm_cpu_online(raw_smp_processor_id());
++}
++
++static struct syscore_ops kvm_syscore_ops = {
++      .suspend        = kvm_suspend,
++      .resume         = kvm_resume,
++};
+ static void kvm_flush_tlb_others(const struct cpumask *cpumask,
+                       const struct flush_tlb_info *info)
+@@ -681,6 +704,8 @@ static void __init kvm_guest_init(void)
+       kvm_guest_cpu_init();
+ #endif
++      register_syscore_ops(&kvm_syscore_ops);
++
+       /*
+        * Hard lockup detection is enabled by default. Disable it, as guests
+        * can get false positives too easily, for example if the host is