--- /dev/null
+From 69c7b2fe4c9cc1d3b1186d1c5606627ecf0de883 Mon Sep 17 00:00:00 2001
+From: Ilya Dryomov <idryomov@gmail.com>
+Date: Mon, 8 Jul 2024 22:37:29 +0200
+Subject: libceph: fix race between delayed_work() and ceph_monc_stop()
+
+From: Ilya Dryomov <idryomov@gmail.com>
+
+commit 69c7b2fe4c9cc1d3b1186d1c5606627ecf0de883 upstream.
+
+The way the delayed work is handled in ceph_monc_stop() is prone to
+races with mon_fault() and possibly also finish_hunting(). Both of
+these can requeue the delayed work which wouldn't be canceled by any of
+the following code in case that happens after cancel_delayed_work_sync()
+runs -- __close_session() doesn't mess with the delayed work in order
+to avoid interfering with the hunting interval logic. This part was
+missed in commit b5d91704f53e ("libceph: behave in mon_fault() if
+cur_mon < 0") and use-after-free can still ensue on monc and objects
+that hang off of it, with monc->auth and monc->monmap being
+particularly susceptible to quickly being reused.
+
+To fix this:
+
+- clear monc->cur_mon and monc->hunting as part of closing the session
+ in ceph_monc_stop()
+- bail from delayed_work() if monc->cur_mon is cleared, similar to how
+ it's done in mon_fault() and finish_hunting() (based on monc->hunting)
+- call cancel_delayed_work_sync() after the session is closed
+
+Cc: stable@vger.kernel.org
+Link: https://tracker.ceph.com/issues/66857
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Reviewed-by: Xiubo Li <xiubli@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/mon_client.c | 14 ++++++++++++--
+ 1 file changed, 12 insertions(+), 2 deletions(-)
+
+--- a/net/ceph/mon_client.c
++++ b/net/ceph/mon_client.c
+@@ -975,13 +975,19 @@ static void delayed_work(struct work_str
+ struct ceph_mon_client *monc =
+ container_of(work, struct ceph_mon_client, delayed_work.work);
+
+- dout("monc delayed_work\n");
+ mutex_lock(&monc->mutex);
++ dout("%s mon%d\n", __func__, monc->cur_mon);
++ if (monc->cur_mon < 0) {
++ goto out;
++ }
++
+ if (monc->hunting) {
+ dout("%s continuing hunt\n", __func__);
+ reopen_session(monc);
+ } else {
+ int is_auth = ceph_auth_is_authenticated(monc->auth);
++
++ dout("%s is_authed %d\n", __func__, is_auth);
+ if (ceph_con_keepalive_expired(&monc->con,
+ CEPH_MONC_PING_TIMEOUT)) {
+ dout("monc keepalive timeout\n");
+@@ -1006,6 +1012,8 @@ static void delayed_work(struct work_str
+ }
+ }
+ __schedule_delayed(monc);
++
++out:
+ mutex_unlock(&monc->mutex);
+ }
+
+@@ -1118,13 +1126,15 @@ EXPORT_SYMBOL(ceph_monc_init);
+ void ceph_monc_stop(struct ceph_mon_client *monc)
+ {
+ dout("stop\n");
+- cancel_delayed_work_sync(&monc->delayed_work);
+
+ mutex_lock(&monc->mutex);
+ __close_session(monc);
++ monc->hunting = false;
+ monc->cur_mon = -1;
+ mutex_unlock(&monc->mutex);
+
++ cancel_delayed_work_sync(&monc->delayed_work);
++
+ /*
+ * flush msgr queue before we destroy ourselves to ensure that:
+ * - any work that references our embedded con is finished.