--- /dev/null
+From a6862e6708c15995bc10614b2ef34ca35b4b9078 Mon Sep 17 00:00:00 2001
+From: Jeff Layton <jlayton@kernel.org>
+Date: Tue, 1 Jun 2021 08:13:38 -0400
+Subject: ceph: add some lockdep assertions around snaprealm handling
+
+From: Jeff Layton <jlayton@kernel.org>
+
+commit a6862e6708c15995bc10614b2ef34ca35b4b9078 upstream.
+
+Turn some comments into lockdep asserts.
+
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Reviewed-by: Ilya Dryomov <idryomov@gmail.com>
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ceph/snap.c | 16 ++++++++++++++++
+ 1 file changed, 16 insertions(+)
+
+--- a/fs/ceph/snap.c
++++ b/fs/ceph/snap.c
+@@ -65,6 +65,8 @@
+ void ceph_get_snap_realm(struct ceph_mds_client *mdsc,
+ struct ceph_snap_realm *realm)
+ {
++ lockdep_assert_held_write(&mdsc->snap_rwsem);
++
+ dout("get_realm %p %d -> %d\n", realm,
+ atomic_read(&realm->nref), atomic_read(&realm->nref)+1);
+ /*
+@@ -113,6 +115,8 @@ static struct ceph_snap_realm *ceph_crea
+ {
+ struct ceph_snap_realm *realm;
+
++ lockdep_assert_held_write(&mdsc->snap_rwsem);
++
+ realm = kzalloc(sizeof(*realm), GFP_NOFS);
+ if (!realm)
+ return ERR_PTR(-ENOMEM);
+@@ -143,6 +147,8 @@ static struct ceph_snap_realm *__lookup_
+ struct rb_node *n = mdsc->snap_realms.rb_node;
+ struct ceph_snap_realm *r;
+
++ lockdep_assert_held_write(&mdsc->snap_rwsem);
++
+ while (n) {
+ r = rb_entry(n, struct ceph_snap_realm, node);
+ if (ino < r->ino)
+@@ -176,6 +182,8 @@ static void __put_snap_realm(struct ceph
+ static void __destroy_snap_realm(struct ceph_mds_client *mdsc,
+ struct ceph_snap_realm *realm)
+ {
++ lockdep_assert_held_write(&mdsc->snap_rwsem);
++
+ dout("__destroy_snap_realm %p %llx\n", realm, realm->ino);
+
+ rb_erase(&realm->node, &mdsc->snap_realms);
+@@ -198,6 +206,8 @@ static void __destroy_snap_realm(struct
+ static void __put_snap_realm(struct ceph_mds_client *mdsc,
+ struct ceph_snap_realm *realm)
+ {
++ lockdep_assert_held_write(&mdsc->snap_rwsem);
++
+ dout("__put_snap_realm %llx %p %d -> %d\n", realm->ino, realm,
+ atomic_read(&realm->nref), atomic_read(&realm->nref)-1);
+ if (atomic_dec_and_test(&realm->nref))
+@@ -236,6 +246,8 @@ static void __cleanup_empty_realms(struc
+ {
+ struct ceph_snap_realm *realm;
+
++ lockdep_assert_held_write(&mdsc->snap_rwsem);
++
+ spin_lock(&mdsc->snap_empty_lock);
+ while (!list_empty(&mdsc->snap_empty)) {
+ realm = list_first_entry(&mdsc->snap_empty,
+@@ -269,6 +281,8 @@ static int adjust_snap_realm_parent(stru
+ {
+ struct ceph_snap_realm *parent;
+
++ lockdep_assert_held_write(&mdsc->snap_rwsem);
++
+ if (realm->parent_ino == parentino)
+ return 0;
+
+@@ -696,6 +710,8 @@ int ceph_update_snap_trace(struct ceph_m
+ int err = -ENOMEM;
+ LIST_HEAD(dirty_realms);
+
++ lockdep_assert_held_write(&mdsc->snap_rwsem);
++
+ dout("update_snap_trace deletion=%d\n", deletion);
+ more:
+ ceph_decode_need(&p, e, sizeof(*ri), bad);
--- /dev/null
+From df2c0cb7f8e8c83e495260ad86df8c5da947f2a7 Mon Sep 17 00:00:00 2001
+From: Jeff Layton <jlayton@kernel.org>
+Date: Tue, 1 Jun 2021 09:24:38 -0400
+Subject: ceph: clean up locking annotation for ceph_get_snap_realm and __lookup_snap_realm
+
+From: Jeff Layton <jlayton@kernel.org>
+
+commit df2c0cb7f8e8c83e495260ad86df8c5da947f2a7 upstream.
+
+They both say that the snap_rwsem must be held for write, but I don't
+see any real reason for it, and it's not currently always called that
+way.
+
+The lookup is just walking the rbtree, so holding it for read should be
+fine there. The "get" is bumping the refcount and (possibly) removing
+it from the empty list. I see no need to hold the snap_rwsem for write
+for that.
+
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Reviewed-by: Ilya Dryomov <idryomov@gmail.com>
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ceph/snap.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/fs/ceph/snap.c
++++ b/fs/ceph/snap.c
+@@ -60,12 +60,12 @@
+ /*
+ * increase ref count for the realm
+ *
+- * caller must hold snap_rwsem for write.
++ * caller must hold snap_rwsem.
+ */
+ void ceph_get_snap_realm(struct ceph_mds_client *mdsc,
+ struct ceph_snap_realm *realm)
+ {
+- lockdep_assert_held_write(&mdsc->snap_rwsem);
++ lockdep_assert_held(&mdsc->snap_rwsem);
+
+ dout("get_realm %p %d -> %d\n", realm,
+ atomic_read(&realm->nref), atomic_read(&realm->nref)+1);
+@@ -139,7 +139,7 @@ static struct ceph_snap_realm *ceph_crea
+ /*
+ * lookup the realm rooted at @ino.
+ *
+- * caller must hold snap_rwsem for write.
++ * caller must hold snap_rwsem.
+ */
+ static struct ceph_snap_realm *__lookup_snap_realm(struct ceph_mds_client *mdsc,
+ u64 ino)
+@@ -147,7 +147,7 @@ static struct ceph_snap_realm *__lookup_
+ struct rb_node *n = mdsc->snap_realms.rb_node;
+ struct ceph_snap_realm *r;
+
+- lockdep_assert_held_write(&mdsc->snap_rwsem);
++ lockdep_assert_held(&mdsc->snap_rwsem);
+
+ while (n) {
+ r = rb_entry(n, struct ceph_snap_realm, node);
--- /dev/null
+From 8434ffe71c874b9c4e184b88d25de98c2bf5fe3f Mon Sep 17 00:00:00 2001
+From: Jeff Layton <jlayton@kernel.org>
+Date: Tue, 3 Aug 2021 12:47:34 -0400
+Subject: ceph: take snap_empty_lock atomically with snaprealm refcount change
+
+From: Jeff Layton <jlayton@kernel.org>
+
+commit 8434ffe71c874b9c4e184b88d25de98c2bf5fe3f upstream.
+
+There is a race in ceph_put_snap_realm. The change to the nref and the
+spinlock acquisition are not done atomically, so you could decrement
+nref, and before you take the spinlock, the nref is incremented again.
+At that point, you end up putting it on the empty list when it
+shouldn't be there. Eventually __cleanup_empty_realms runs and frees
+it when it's still in-use.
+
+Fix this by protecting the 1->0 transition with atomic_dec_and_lock,
+and just drop the spinlock if we can get the rwsem.
+
+Because these objects can also undergo a 0->1 refcount transition, we
+must protect that change as well with the spinlock. Increment locklessly
+unless the value is at 0, in which case we take the spinlock, increment
+and then take it off the empty list if it did the 0->1 transition.
+
+With these changes, I'm removing the dout() messages from these
+functions, as well as in __put_snap_realm. They've always been racy, and
+it's better to not print values that may be misleading.
+
+Cc: stable@vger.kernel.org
+URL: https://tracker.ceph.com/issues/46419
+Reported-by: Mark Nelson <mnelson@redhat.com>
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Reviewed-by: Luis Henriques <lhenriques@suse.de>
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ceph/snap.c | 34 +++++++++++++++++-----------------
+ 1 file changed, 17 insertions(+), 17 deletions(-)
+
+--- a/fs/ceph/snap.c
++++ b/fs/ceph/snap.c
+@@ -67,19 +67,19 @@ void ceph_get_snap_realm(struct ceph_mds
+ {
+ lockdep_assert_held(&mdsc->snap_rwsem);
+
+- dout("get_realm %p %d -> %d\n", realm,
+- atomic_read(&realm->nref), atomic_read(&realm->nref)+1);
+ /*
+- * since we _only_ increment realm refs or empty the empty
+- * list with snap_rwsem held, adjusting the empty list here is
+- * safe. we do need to protect against concurrent empty list
+- * additions, however.
++ * The 0->1 and 1->0 transitions must take the snap_empty_lock
++ * atomically with the refcount change. Go ahead and bump the
++ * nref here, unless it's 0, in which case we take the spinlock
++ * and then do the increment and remove it from the list.
+ */
+- if (atomic_inc_return(&realm->nref) == 1) {
+- spin_lock(&mdsc->snap_empty_lock);
++ if (atomic_inc_not_zero(&realm->nref))
++ return;
++
++ spin_lock(&mdsc->snap_empty_lock);
++ if (atomic_inc_return(&realm->nref) == 1)
+ list_del_init(&realm->empty_item);
+- spin_unlock(&mdsc->snap_empty_lock);
+- }
++ spin_unlock(&mdsc->snap_empty_lock);
+ }
+
+ static void __insert_snap_realm(struct rb_root *root,
+@@ -208,28 +208,28 @@ static void __put_snap_realm(struct ceph
+ {
+ lockdep_assert_held_write(&mdsc->snap_rwsem);
+
+- dout("__put_snap_realm %llx %p %d -> %d\n", realm->ino, realm,
+- atomic_read(&realm->nref), atomic_read(&realm->nref)-1);
++ /*
++ * We do not require the snap_empty_lock here, as any caller that
++ * increments the value must hold the snap_rwsem.
++ */
+ if (atomic_dec_and_test(&realm->nref))
+ __destroy_snap_realm(mdsc, realm);
+ }
+
+ /*
+- * caller needn't hold any locks
++ * See comments in ceph_get_snap_realm. Caller needn't hold any locks.
+ */
+ void ceph_put_snap_realm(struct ceph_mds_client *mdsc,
+ struct ceph_snap_realm *realm)
+ {
+- dout("put_snap_realm %llx %p %d -> %d\n", realm->ino, realm,
+- atomic_read(&realm->nref), atomic_read(&realm->nref)-1);
+- if (!atomic_dec_and_test(&realm->nref))
++ if (!atomic_dec_and_lock(&realm->nref, &mdsc->snap_empty_lock))
+ return;
+
+ if (down_write_trylock(&mdsc->snap_rwsem)) {
++ spin_unlock(&mdsc->snap_empty_lock);
+ __destroy_snap_realm(mdsc, realm);
+ up_write(&mdsc->snap_rwsem);
+ } else {
+- spin_lock(&mdsc->snap_empty_lock);
+ list_add(&realm->empty_item, &mdsc->snap_empty);
+ spin_unlock(&mdsc->snap_empty_lock);
+ }
kvm-nvmx-use-vmx_need_pf_intercept-when-deciding-if-l0-wants-a-pf.patch
kvm-x86-mmu-don-t-leak-non-leaf-sptes-when-zapping-all-sptes.patch
kvm-x86-mmu-protect-marking-sps-unsync-when-using-tdp-mmu-with-spinlock.patch
+ceph-add-some-lockdep-assertions-around-snaprealm-handling.patch
+ceph-clean-up-locking-annotation-for-ceph_get_snap_realm-and-__lookup_snap_realm.patch
+ceph-take-snap_empty_lock-atomically-with-snaprealm-refcount-change.patch