]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
rbd: check for EOD after exclusive lock is ensured to be held
authorIlya Dryomov <idryomov@gmail.com>
Wed, 7 Jan 2026 21:37:55 +0000 (22:37 +0100)
committerIlya Dryomov <idryomov@gmail.com>
Tue, 3 Feb 2026 20:00:22 +0000 (21:00 +0100)
Similar to commit 870611e4877e ("rbd: get snapshot context after
exclusive lock is ensured to be held"), move the "beyond EOD" check
into the image request state machine so that it's performed after
exclusive lock is ensured to be held.  This avoids various race
conditions which can arise when the image is shrunk under I/O (in
practice, mostly readahead).  In one such scenario

    rbd_assert(objno < rbd_dev->object_map_size);

can be triggered if a close-to-EOD read gets queued right before the
shrink is initiated and the EOD check is performed against an outdated
mapping_size.  After the resize is done on the server side and exclusive
lock is (re)acquired bringing along the new (now shrunk) object map, the
read starts going through the state machine and rbd_obj_may_exist() gets
invoked on an object that is out of bounds of rbd_dev->object_map array.

Cc: stable@vger.kernel.org
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Dongsheng Yang <dongsheng.yang@linux.dev>
drivers/block/rbd.c

index af0e21149dbc9e2230ccdc624ae3616448c23d51..8f441eb8b19265a0ab7658124f0e99ddaa48ffc3 100644 (file)
@@ -3495,11 +3495,29 @@ static void rbd_img_object_requests(struct rbd_img_request *img_req)
        rbd_assert(!need_exclusive_lock(img_req) ||
                   __rbd_is_lock_owner(rbd_dev));
 
-       if (rbd_img_is_write(img_req)) {
-               rbd_assert(!img_req->snapc);
+       if (test_bit(IMG_REQ_CHILD, &img_req->flags)) {
+               rbd_assert(!rbd_img_is_write(img_req));
+       } else {
+               struct request *rq = blk_mq_rq_from_pdu(img_req);
+               u64 off = (u64)blk_rq_pos(rq) << SECTOR_SHIFT;
+               u64 len = blk_rq_bytes(rq);
+               u64 mapping_size;
+
                down_read(&rbd_dev->header_rwsem);
-               img_req->snapc = ceph_get_snap_context(rbd_dev->header.snapc);
+               mapping_size = rbd_dev->mapping.size;
+               if (rbd_img_is_write(img_req)) {
+                       rbd_assert(!img_req->snapc);
+                       img_req->snapc =
+                           ceph_get_snap_context(rbd_dev->header.snapc);
+               }
                up_read(&rbd_dev->header_rwsem);
+
+               if (unlikely(off + len > mapping_size)) {
+                       rbd_warn(rbd_dev, "beyond EOD (%llu~%llu > %llu)",
+                                off, len, mapping_size);
+                       img_req->pending.result = -EIO;
+                       return;
+               }
        }
 
        for_each_obj_request(img_req, obj_req) {
@@ -4725,7 +4743,6 @@ static void rbd_queue_workfn(struct work_struct *work)
        struct request *rq = blk_mq_rq_from_pdu(img_request);
        u64 offset = (u64)blk_rq_pos(rq) << SECTOR_SHIFT;
        u64 length = blk_rq_bytes(rq);
-       u64 mapping_size;
        int result;
 
        /* Ignore/skip any zero-length requests */
@@ -4738,17 +4755,9 @@ static void rbd_queue_workfn(struct work_struct *work)
        blk_mq_start_request(rq);
 
        down_read(&rbd_dev->header_rwsem);
-       mapping_size = rbd_dev->mapping.size;
        rbd_img_capture_header(img_request);
        up_read(&rbd_dev->header_rwsem);
 
-       if (offset + length > mapping_size) {
-               rbd_warn(rbd_dev, "beyond EOD (%llu~%llu > %llu)", offset,
-                        length, mapping_size);
-               result = -EIO;
-               goto err_img_request;
-       }
-
        dout("%s rbd_dev %p img_req %p %s %llu~%llu\n", __func__, rbd_dev,
             img_request, obj_op_name(op_type), offset, length);