]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
libceph: make calc_target() set t->paused, not just clear it
authorIlya Dryomov <idryomov@gmail.com>
Mon, 5 Jan 2026 18:23:19 +0000 (19:23 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 19 Jan 2026 12:10:14 +0000 (13:10 +0100)
commit c0fe2994f9a9d0a2ec9e42441ea5ba74b6a16176 upstream.

Currently calc_target() clears t->paused if the request shouldn't be
paused anymore, but doesn't ever set t->paused even though it's able to
determine when the request should be paused.  Setting t->paused is left
to __submit_request() which is fine for regular requests but doesn't
work for linger requests -- since __submit_request() doesn't operate
on linger requests, there is nowhere for lreq->t.paused to be set.
One consequence of this is that watches don't get reestablished on
paused -> unpaused transitions in cases where requests have been paused
long enough for the (paused) unwatch request to time out and for the
subsequent (re)watch request to enter the paused state.  On top of the
watch not getting reestablished, rbd_reregister_watch() gets stuck with
rbd_dev->watch_mutex held:

  rbd_register_watch
    __rbd_register_watch
      ceph_osdc_watch
        linger_reg_commit_wait

It's waiting for lreq->reg_commit_wait to be completed, but for that to
happen the respective request needs to end up on need_resend_linger list
and be kicked when requests are unpaused.  There is no chance for that
if the request in question is never marked paused in the first place.

The fact that rbd_dev->watch_mutex remains taken out forever then
prevents the image from getting unmapped -- "rbd unmap" would inevitably
hang in D state on an attempt to grab the mutex.

Cc: stable@vger.kernel.org
Reported-by: Raphael Zimmer <raphael.zimmer@tu-ilmenau.de>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Viacheslav Dubeyko <Slava.Dubeyko@ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
net/ceph/osd_client.c

index 0c5e0d2c609e3c9947892785d56b42c70c8508ca..94edea03b0cb2aabb3c2a20d6a36e3f69f5e550f 100644 (file)
@@ -1529,6 +1529,7 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
        struct ceph_pg_pool_info *pi;
        struct ceph_pg pgid, last_pgid;
        struct ceph_osds up, acting;
+       bool should_be_paused;
        bool is_read = t->flags & CEPH_OSD_FLAG_READ;
        bool is_write = t->flags & CEPH_OSD_FLAG_WRITE;
        bool force_resend = false;
@@ -1597,10 +1598,16 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
                                 &last_pgid))
                force_resend = true;
 
-       if (t->paused && !target_should_be_paused(osdc, t, pi)) {
-               t->paused = false;
+       should_be_paused = target_should_be_paused(osdc, t, pi);
+       if (t->paused && !should_be_paused) {
                unpaused = true;
        }
+       if (t->paused != should_be_paused) {
+               dout("%s t %p paused %d -> %d\n", __func__, t, t->paused,
+                    should_be_paused);
+               t->paused = should_be_paused;
+       }
+
        legacy_change = ceph_pg_compare(&t->pgid, &pgid) ||
                        ceph_osds_changed(&t->acting, &acting,
                                          t->used_replica || any_change);