]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
ceph: check session state after bumping session->s_seq
authorJeff Layton <jlayton@kernel.org>
Mon, 12 Oct 2020 13:39:06 +0000 (09:39 -0400)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 18 Nov 2020 18:22:07 +0000 (19:22 +0100)
[ Upstream commit 62575e270f661aba64778cbc5f354511cf9abb21 ]

Some messages sent by the MDS entail a session sequence number
increment, and the MDS will drop certain types of requests on the floor
when the sequence numbers don't match.

In particular, a REQUEST_CLOSE message can cross with one of the
sequence morphing messages from the MDS which can cause the client to
stall, waiting for a response that will never come.

Originally, this meant an up to 5s delay before the recurring workqueue
job kicked in and resent the request, but a recent change made it so
that the client would never resend, causing a 60s stall unmounting and
sometimes a blockisting event.

Add a new helper for incrementing the session sequence and then testing
to see whether a REQUEST_CLOSE needs to be resent, and move the handling
of CEPH_MDS_SESSION_CLOSING into that function. Change all of the
bare sequence counter increments to use the new helper.

Reorganize check_session_state with a switch statement.  It should no
longer be called when the session is CLOSING, so throw a warning if it
ever is (but still handle that case sanely).

[ idryomov: whitespace, pr_err() call fixup ]

URL: https://tracker.ceph.com/issues/47563
Fixes: fa9967734227 ("ceph: fix potential mdsc use-after-free crash")
Reported-by: Patrick Donnelly <pdonnell@redhat.com>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Xiubo Li <xiubli@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
fs/ceph/caps.c
fs/ceph/mds_client.c
fs/ceph/mds_client.h
fs/ceph/quota.c
fs/ceph/snap.c

index 034b3f4fdd3a7f201900298f47b295d5cce7b957..64a64a29f5c794537814c2ef613ede230a499aeb 100644 (file)
@@ -4064,7 +4064,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
             vino.snap, inode);
 
        mutex_lock(&session->s_mutex);
-       session->s_seq++;
+       inc_session_sequence(session);
        dout(" mds%d seq %lld cap seq %u\n", session->s_mds, session->s_seq,
             (unsigned)seq);
 
index 76d8d9495d1d4b4467a8b3b4b23d7a9554e01990..b2214679baf4e3bcbfa7520528d1bbd7c5eb72bf 100644 (file)
@@ -4227,7 +4227,7 @@ static void handle_lease(struct ceph_mds_client *mdsc,
             dname.len, dname.name);
 
        mutex_lock(&session->s_mutex);
-       session->s_seq++;
+       inc_session_sequence(session);
 
        if (!inode) {
                dout("handle_lease no inode %llx\n", vino.ino);
@@ -4381,28 +4381,48 @@ static void maybe_recover_session(struct ceph_mds_client *mdsc)
 
 bool check_session_state(struct ceph_mds_session *s)
 {
-       if (s->s_state == CEPH_MDS_SESSION_CLOSING) {
-               dout("resending session close request for mds%d\n",
-                               s->s_mds);
-               request_close_session(s);
-               return false;
-       }
-       if (s->s_ttl && time_after(jiffies, s->s_ttl)) {
-               if (s->s_state == CEPH_MDS_SESSION_OPEN) {
+       switch (s->s_state) {
+       case CEPH_MDS_SESSION_OPEN:
+               if (s->s_ttl && time_after(jiffies, s->s_ttl)) {
                        s->s_state = CEPH_MDS_SESSION_HUNG;
                        pr_info("mds%d hung\n", s->s_mds);
                }
-       }
-       if (s->s_state == CEPH_MDS_SESSION_NEW ||
-           s->s_state == CEPH_MDS_SESSION_RESTARTING ||
-           s->s_state == CEPH_MDS_SESSION_CLOSED ||
-           s->s_state == CEPH_MDS_SESSION_REJECTED)
-               /* this mds is failed or recovering, just wait */
+               break;
+       case CEPH_MDS_SESSION_CLOSING:
+               /* Should never reach this when we're unmounting */
+               WARN_ON_ONCE(true);
+               fallthrough;
+       case CEPH_MDS_SESSION_NEW:
+       case CEPH_MDS_SESSION_RESTARTING:
+       case CEPH_MDS_SESSION_CLOSED:
+       case CEPH_MDS_SESSION_REJECTED:
                return false;
+       }
 
        return true;
 }
 
+/*
+ * If the sequence is incremented while we're waiting on a REQUEST_CLOSE reply,
+ * then we need to retransmit that request.
+ */
+void inc_session_sequence(struct ceph_mds_session *s)
+{
+       lockdep_assert_held(&s->s_mutex);
+
+       s->s_seq++;
+
+       if (s->s_state == CEPH_MDS_SESSION_CLOSING) {
+               int ret;
+
+               dout("resending session close request for mds%d\n", s->s_mds);
+               ret = request_close_session(s);
+               if (ret < 0)
+                       pr_err("unable to close session to mds%d: %d\n",
+                              s->s_mds, ret);
+       }
+}
+
 /*
  * delayed work -- periodically trim expired leases, renew caps with mds
  */
index 658800605bfb48f2e43fca54e6ef9d82225f8e93..11f20a4d36bc532d2f24ad20d0b160f5f198cc99 100644 (file)
@@ -480,6 +480,7 @@ struct ceph_mds_client {
 extern const char *ceph_mds_op_name(int op);
 
 extern bool check_session_state(struct ceph_mds_session *s);
+void inc_session_sequence(struct ceph_mds_session *s);
 
 extern struct ceph_mds_session *
 __ceph_lookup_mds_session(struct ceph_mds_client *, int mds);
index cc2c4d40b0222f77114666410c3fcc30350dd1b0..2b213f864c564c04b96d83bb7f142305830893e6 100644 (file)
@@ -53,7 +53,7 @@ void ceph_handle_quota(struct ceph_mds_client *mdsc,
 
        /* increment msg sequence number */
        mutex_lock(&session->s_mutex);
-       session->s_seq++;
+       inc_session_sequence(session);
        mutex_unlock(&session->s_mutex);
 
        /* lookup inode */
index 923be9399b21ce7a7feb48d3c471d5fa77fed1fd..cc9a9bfc790a3275c122b3694226db21b643a8fc 100644 (file)
@@ -873,7 +873,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
             ceph_snap_op_name(op), split, trace_len);
 
        mutex_lock(&session->s_mutex);
-       session->s_seq++;
+       inc_session_sequence(session);
        mutex_unlock(&session->s_mutex);
 
        down_write(&mdsc->snap_rwsem);