]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 5.10
authorSasha Levin <sashal@kernel.org>
Fri, 6 Oct 2023 14:12:49 +0000 (10:12 -0400)
committerSasha Levin <sashal@kernel.org>
Fri, 6 Oct 2023 14:12:49 +0000 (10:12 -0400)
Signed-off-by: Sasha Levin <sashal@kernel.org>
queue-5.10/rbd-decouple-header-read-in-from-updating-rbd_dev-he.patch [new file with mode: 0644]
queue-5.10/rbd-decouple-parent-info-read-in-from-updating-rbd_d.patch [new file with mode: 0644]
queue-5.10/rbd-move-rbd_dev_refresh-definition.patch [new file with mode: 0644]
queue-5.10/rbd-take-header_rwsem-in-rbd_dev_refresh-only-when-u.patch [new file with mode: 0644]
queue-5.10/series

diff --git a/queue-5.10/rbd-decouple-header-read-in-from-updating-rbd_dev-he.patch b/queue-5.10/rbd-decouple-header-read-in-from-updating-rbd_dev-he.patch
new file mode 100644 (file)
index 0000000..c4db121
--- /dev/null
@@ -0,0 +1,452 @@
+From 4028c624cfd276e86e1898139e99a6917b9fed46 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 5 Oct 2023 11:59:33 +0200
+Subject: rbd: decouple header read-in from updating rbd_dev->header
+
+From: Ilya Dryomov <idryomov@gmail.com>
+
+commit 510a7330c82a7754d5df0117a8589e8a539067c7 upstream.
+
+Make rbd_dev_header_info() populate a passed struct rbd_image_header
+instead of rbd_dev->header and introduce rbd_dev_update_header() for
+updating mutable fields in rbd_dev->header upon refresh.  The initial
+read-in of both mutable and immutable fields in rbd_dev_image_probe()
+passes in rbd_dev->header so no update step is required there.
+
+rbd_init_layout() is now called directly from rbd_dev_image_probe()
+instead of individually in format 1 and format 2 implementations.
+
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Reviewed-by: Dongsheng Yang <dongsheng.yang@easystack.cn>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/block/rbd.c | 206 ++++++++++++++++++++++++--------------------
+ 1 file changed, 114 insertions(+), 92 deletions(-)
+
+diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
+index 82cf9be4badc5..73f917a429f38 100644
+--- a/drivers/block/rbd.c
++++ b/drivers/block/rbd.c
+@@ -632,7 +632,8 @@ void rbd_warn(struct rbd_device *rbd_dev, const char *fmt, ...)
+ static void rbd_dev_remove_parent(struct rbd_device *rbd_dev);
+ static int rbd_dev_refresh(struct rbd_device *rbd_dev);
+-static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev);
++static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev,
++                                   struct rbd_image_header *header);
+ static const char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev,
+                                       u64 snap_id);
+ static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id,
+@@ -1045,15 +1046,24 @@ static void rbd_init_layout(struct rbd_device *rbd_dev)
+       RCU_INIT_POINTER(rbd_dev->layout.pool_ns, NULL);
+ }
++static void rbd_image_header_cleanup(struct rbd_image_header *header)
++{
++      kfree(header->object_prefix);
++      ceph_put_snap_context(header->snapc);
++      kfree(header->snap_sizes);
++      kfree(header->snap_names);
++
++      memset(header, 0, sizeof(*header));
++}
++
+ /*
+  * Fill an rbd image header with information from the given format 1
+  * on-disk header.
+  */
+-static int rbd_header_from_disk(struct rbd_device *rbd_dev,
+-                               struct rbd_image_header_ondisk *ondisk)
++static int rbd_header_from_disk(struct rbd_image_header *header,
++                              struct rbd_image_header_ondisk *ondisk,
++                              bool first_time)
+ {
+-      struct rbd_image_header *header = &rbd_dev->header;
+-      bool first_time = header->object_prefix == NULL;
+       struct ceph_snap_context *snapc;
+       char *object_prefix = NULL;
+       char *snap_names = NULL;
+@@ -1120,11 +1130,6 @@ static int rbd_header_from_disk(struct rbd_device *rbd_dev,
+       if (first_time) {
+               header->object_prefix = object_prefix;
+               header->obj_order = ondisk->options.order;
+-              rbd_init_layout(rbd_dev);
+-      } else {
+-              ceph_put_snap_context(header->snapc);
+-              kfree(header->snap_names);
+-              kfree(header->snap_sizes);
+       }
+       /* The remaining fields always get updated (when we refresh) */
+@@ -4914,7 +4919,9 @@ static int rbd_obj_read_sync(struct rbd_device *rbd_dev,
+  * return, the rbd_dev->header field will contain up-to-date
+  * information about the image.
+  */
+-static int rbd_dev_v1_header_info(struct rbd_device *rbd_dev)
++static int rbd_dev_v1_header_info(struct rbd_device *rbd_dev,
++                                struct rbd_image_header *header,
++                                bool first_time)
+ {
+       struct rbd_image_header_ondisk *ondisk = NULL;
+       u32 snap_count = 0;
+@@ -4962,7 +4969,7 @@ static int rbd_dev_v1_header_info(struct rbd_device *rbd_dev)
+               snap_count = le32_to_cpu(ondisk->snap_count);
+       } while (snap_count != want_count);
+-      ret = rbd_header_from_disk(rbd_dev, ondisk);
++      ret = rbd_header_from_disk(header, ondisk, first_time);
+ out:
+       kfree(ondisk);
+@@ -5541,17 +5548,12 @@ static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id,
+       return 0;
+ }
+-static int rbd_dev_v2_image_size(struct rbd_device *rbd_dev)
+-{
+-      return _rbd_dev_v2_snap_size(rbd_dev, CEPH_NOSNAP,
+-                                      &rbd_dev->header.obj_order,
+-                                      &rbd_dev->header.image_size);
+-}
+-
+-static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev)
++static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev,
++                                  char **pobject_prefix)
+ {
+       size_t size;
+       void *reply_buf;
++      char *object_prefix;
+       int ret;
+       void *p;
+@@ -5569,16 +5571,16 @@ static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev)
+               goto out;
+       p = reply_buf;
+-      rbd_dev->header.object_prefix = ceph_extract_encoded_string(&p,
+-                                              p + ret, NULL, GFP_NOIO);
++      object_prefix = ceph_extract_encoded_string(&p, p + ret, NULL,
++                                                  GFP_NOIO);
++      if (IS_ERR(object_prefix)) {
++              ret = PTR_ERR(object_prefix);
++              goto out;
++      }
+       ret = 0;
+-      if (IS_ERR(rbd_dev->header.object_prefix)) {
+-              ret = PTR_ERR(rbd_dev->header.object_prefix);
+-              rbd_dev->header.object_prefix = NULL;
+-      } else {
+-              dout("  object_prefix = %s\n", rbd_dev->header.object_prefix);
+-      }
++      *pobject_prefix = object_prefix;
++      dout("  object_prefix = %s\n", object_prefix);
+ out:
+       kfree(reply_buf);
+@@ -5629,13 +5631,6 @@ static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id,
+       return 0;
+ }
+-static int rbd_dev_v2_features(struct rbd_device *rbd_dev)
+-{
+-      return _rbd_dev_v2_snap_features(rbd_dev, CEPH_NOSNAP,
+-                                       rbd_is_ro(rbd_dev),
+-                                       &rbd_dev->header.features);
+-}
+-
+ /*
+  * These are generic image flags, but since they are used only for
+  * object map, store them in rbd_dev->object_map_flags.
+@@ -5910,14 +5905,14 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
+       return ret;
+ }
+-static int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev)
++static int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev,
++                                  u64 *stripe_unit, u64 *stripe_count)
+ {
+       struct {
+               __le64 stripe_unit;
+               __le64 stripe_count;
+       } __attribute__ ((packed)) striping_info_buf = { 0 };
+       size_t size = sizeof (striping_info_buf);
+-      void *p;
+       int ret;
+       ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
+@@ -5929,27 +5924,33 @@ static int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev)
+       if (ret < size)
+               return -ERANGE;
+-      p = &striping_info_buf;
+-      rbd_dev->header.stripe_unit = ceph_decode_64(&p);
+-      rbd_dev->header.stripe_count = ceph_decode_64(&p);
++      *stripe_unit = le64_to_cpu(striping_info_buf.stripe_unit);
++      *stripe_count = le64_to_cpu(striping_info_buf.stripe_count);
++      dout("  stripe_unit = %llu stripe_count = %llu\n", *stripe_unit,
++           *stripe_count);
++
+       return 0;
+ }
+-static int rbd_dev_v2_data_pool(struct rbd_device *rbd_dev)
++static int rbd_dev_v2_data_pool(struct rbd_device *rbd_dev, s64 *data_pool_id)
+ {
+-      __le64 data_pool_id;
++      __le64 data_pool_buf;
+       int ret;
+       ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
+                                 &rbd_dev->header_oloc, "get_data_pool",
+-                                NULL, 0, &data_pool_id, sizeof(data_pool_id));
++                                NULL, 0, &data_pool_buf,
++                                sizeof(data_pool_buf));
++      dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
+       if (ret < 0)
+               return ret;
+-      if (ret < sizeof(data_pool_id))
++      if (ret < sizeof(data_pool_buf))
+               return -EBADMSG;
+-      rbd_dev->header.data_pool_id = le64_to_cpu(data_pool_id);
+-      WARN_ON(rbd_dev->header.data_pool_id == CEPH_NOPOOL);
++      *data_pool_id = le64_to_cpu(data_pool_buf);
++      dout("  data_pool_id = %lld\n", *data_pool_id);
++      WARN_ON(*data_pool_id == CEPH_NOPOOL);
++
+       return 0;
+ }
+@@ -6141,7 +6142,8 @@ static int rbd_spec_fill_names(struct rbd_device *rbd_dev)
+       return ret;
+ }
+-static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev)
++static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev,
++                                 struct ceph_snap_context **psnapc)
+ {
+       size_t size;
+       int ret;
+@@ -6202,9 +6204,7 @@ static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev)
+       for (i = 0; i < snap_count; i++)
+               snapc->snaps[i] = ceph_decode_64(&p);
+-      ceph_put_snap_context(rbd_dev->header.snapc);
+-      rbd_dev->header.snapc = snapc;
+-
++      *psnapc = snapc;
+       dout("  snap context seq = %llu, snap_count = %u\n",
+               (unsigned long long)seq, (unsigned int)snap_count);
+ out:
+@@ -6253,38 +6253,42 @@ static const char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev,
+       return snap_name;
+ }
+-static int rbd_dev_v2_header_info(struct rbd_device *rbd_dev)
++static int rbd_dev_v2_header_info(struct rbd_device *rbd_dev,
++                                struct rbd_image_header *header,
++                                bool first_time)
+ {
+-      bool first_time = rbd_dev->header.object_prefix == NULL;
+       int ret;
+-      ret = rbd_dev_v2_image_size(rbd_dev);
++      ret = _rbd_dev_v2_snap_size(rbd_dev, CEPH_NOSNAP,
++                                  first_time ? &header->obj_order : NULL,
++                                  &header->image_size);
+       if (ret)
+               return ret;
+       if (first_time) {
+-              ret = rbd_dev_v2_header_onetime(rbd_dev);
++              ret = rbd_dev_v2_header_onetime(rbd_dev, header);
+               if (ret)
+                       return ret;
+       }
+-      ret = rbd_dev_v2_snap_context(rbd_dev);
+-      if (ret && first_time) {
+-              kfree(rbd_dev->header.object_prefix);
+-              rbd_dev->header.object_prefix = NULL;
+-      }
++      ret = rbd_dev_v2_snap_context(rbd_dev, &header->snapc);
++      if (ret)
++              return ret;
+-      return ret;
++      return 0;
+ }
+-static int rbd_dev_header_info(struct rbd_device *rbd_dev)
++static int rbd_dev_header_info(struct rbd_device *rbd_dev,
++                             struct rbd_image_header *header,
++                             bool first_time)
+ {
+       rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
++      rbd_assert(!header->object_prefix && !header->snapc);
+       if (rbd_dev->image_format == 1)
+-              return rbd_dev_v1_header_info(rbd_dev);
++              return rbd_dev_v1_header_info(rbd_dev, header, first_time);
+-      return rbd_dev_v2_header_info(rbd_dev);
++      return rbd_dev_v2_header_info(rbd_dev, header, first_time);
+ }
+ /*
+@@ -6771,60 +6775,49 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev)
+  */
+ static void rbd_dev_unprobe(struct rbd_device *rbd_dev)
+ {
+-      struct rbd_image_header *header;
+-
+       rbd_dev_parent_put(rbd_dev);
+       rbd_object_map_free(rbd_dev);
+       rbd_dev_mapping_clear(rbd_dev);
+       /* Free dynamic fields from the header, then zero it out */
+-      header = &rbd_dev->header;
+-      ceph_put_snap_context(header->snapc);
+-      kfree(header->snap_sizes);
+-      kfree(header->snap_names);
+-      kfree(header->object_prefix);
+-      memset(header, 0, sizeof (*header));
++      rbd_image_header_cleanup(&rbd_dev->header);
+ }
+-static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev)
++static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev,
++                                   struct rbd_image_header *header)
+ {
+       int ret;
+-      ret = rbd_dev_v2_object_prefix(rbd_dev);
++      ret = rbd_dev_v2_object_prefix(rbd_dev, &header->object_prefix);
+       if (ret)
+-              goto out_err;
++              return ret;
+       /*
+        * Get the and check features for the image.  Currently the
+        * features are assumed to never change.
+        */
+-      ret = rbd_dev_v2_features(rbd_dev);
++      ret = _rbd_dev_v2_snap_features(rbd_dev, CEPH_NOSNAP,
++                                      rbd_is_ro(rbd_dev), &header->features);
+       if (ret)
+-              goto out_err;
++              return ret;
+       /* If the image supports fancy striping, get its parameters */
+-      if (rbd_dev->header.features & RBD_FEATURE_STRIPINGV2) {
+-              ret = rbd_dev_v2_striping_info(rbd_dev);
+-              if (ret < 0)
+-                      goto out_err;
++      if (header->features & RBD_FEATURE_STRIPINGV2) {
++              ret = rbd_dev_v2_striping_info(rbd_dev, &header->stripe_unit,
++                                             &header->stripe_count);
++              if (ret)
++                      return ret;
+       }
+-      if (rbd_dev->header.features & RBD_FEATURE_DATA_POOL) {
+-              ret = rbd_dev_v2_data_pool(rbd_dev);
++      if (header->features & RBD_FEATURE_DATA_POOL) {
++              ret = rbd_dev_v2_data_pool(rbd_dev, &header->data_pool_id);
+               if (ret)
+-                      goto out_err;
++                      return ret;
+       }
+-      rbd_init_layout(rbd_dev);
+       return 0;
+-
+-out_err:
+-      rbd_dev->header.features = 0;
+-      kfree(rbd_dev->header.object_prefix);
+-      rbd_dev->header.object_prefix = NULL;
+-      return ret;
+ }
+ /*
+@@ -7019,13 +7012,15 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
+       if (!depth)
+               down_write(&rbd_dev->header_rwsem);
+-      ret = rbd_dev_header_info(rbd_dev);
++      ret = rbd_dev_header_info(rbd_dev, &rbd_dev->header, true);
+       if (ret) {
+               if (ret == -ENOENT && !need_watch)
+                       rbd_print_dne(rbd_dev, false);
+               goto err_out_probe;
+       }
++      rbd_init_layout(rbd_dev);
++
+       /*
+        * If this image is the one being mapped, we have pool name and
+        * id, image name and id, and snap name - need to fill snap id.
+@@ -7080,15 +7075,39 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
+       return ret;
+ }
++static void rbd_dev_update_header(struct rbd_device *rbd_dev,
++                                struct rbd_image_header *header)
++{
++      rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
++      rbd_assert(rbd_dev->header.object_prefix); /* !first_time */
++
++      rbd_dev->header.image_size = header->image_size;
++
++      ceph_put_snap_context(rbd_dev->header.snapc);
++      rbd_dev->header.snapc = header->snapc;
++      header->snapc = NULL;
++
++      if (rbd_dev->image_format == 1) {
++              kfree(rbd_dev->header.snap_names);
++              rbd_dev->header.snap_names = header->snap_names;
++              header->snap_names = NULL;
++
++              kfree(rbd_dev->header.snap_sizes);
++              rbd_dev->header.snap_sizes = header->snap_sizes;
++              header->snap_sizes = NULL;
++      }
++}
++
+ static int rbd_dev_refresh(struct rbd_device *rbd_dev)
+ {
++      struct rbd_image_header header = { 0 };
+       u64 mapping_size;
+       int ret;
+       down_write(&rbd_dev->header_rwsem);
+       mapping_size = rbd_dev->mapping.size;
+-      ret = rbd_dev_header_info(rbd_dev);
++      ret = rbd_dev_header_info(rbd_dev, &header, false);
+       if (ret)
+               goto out;
+@@ -7102,6 +7121,8 @@ static int rbd_dev_refresh(struct rbd_device *rbd_dev)
+                       goto out;
+       }
++      rbd_dev_update_header(rbd_dev, &header);
++
+       rbd_assert(!rbd_is_snap(rbd_dev));
+       rbd_dev->mapping.size = rbd_dev->header.image_size;
+@@ -7110,6 +7131,7 @@ static int rbd_dev_refresh(struct rbd_device *rbd_dev)
+       if (!ret && mapping_size != rbd_dev->mapping.size)
+               rbd_dev_update_size(rbd_dev);
++      rbd_image_header_cleanup(&header);
+       return ret;
+ }
+-- 
+2.40.1
+
diff --git a/queue-5.10/rbd-decouple-parent-info-read-in-from-updating-rbd_d.patch b/queue-5.10/rbd-decouple-parent-info-read-in-from-updating-rbd_d.patch
new file mode 100644 (file)
index 0000000..cc1c635
--- /dev/null
@@ -0,0 +1,273 @@
+From 47b421ecb9b48c1760a59495116752674cdc7ecd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 5 Oct 2023 11:59:34 +0200
+Subject: rbd: decouple parent info read-in from updating rbd_dev
+
+From: Ilya Dryomov <idryomov@gmail.com>
+
+commit c10311776f0a8ddea2276df96e255625b07045a8 upstream.
+
+Unlike header read-in, parent info read-in is already decoupled in
+get_parent_info(), but it's buried in rbd_dev_v2_parent_info() along
+with the processing logic.
+
+Separate the initial read-in and update read-in logic into
+rbd_dev_setup_parent() and rbd_dev_update_parent() respectively and
+have rbd_dev_v2_parent_info() just populate struct parent_image_info
+(i.e. what get_parent_info() did).  Some existing QoI issues, like
+flatten of a standalone clone being disregarded on refresh, remain.
+
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Reviewed-by: Dongsheng Yang <dongsheng.yang@easystack.cn>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/block/rbd.c | 142 +++++++++++++++++++++++++-------------------
+ 1 file changed, 80 insertions(+), 62 deletions(-)
+
+diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
+index 73f917a429f38..628b986351ee9 100644
+--- a/drivers/block/rbd.c
++++ b/drivers/block/rbd.c
+@@ -5667,6 +5667,14 @@ struct parent_image_info {
+       u64             overlap;
+ };
++static void rbd_parent_info_cleanup(struct parent_image_info *pii)
++{
++      kfree(pii->pool_ns);
++      kfree(pii->image_id);
++
++      memset(pii, 0, sizeof(*pii));
++}
++
+ /*
+  * The caller is responsible for @pii.
+  */
+@@ -5736,6 +5744,9 @@ static int __get_parent_info(struct rbd_device *rbd_dev,
+       if (pii->has_overlap)
+               ceph_decode_64_safe(&p, end, pii->overlap, e_inval);
++      dout("%s pool_id %llu pool_ns %s image_id %s snap_id %llu has_overlap %d overlap %llu\n",
++           __func__, pii->pool_id, pii->pool_ns, pii->image_id, pii->snap_id,
++           pii->has_overlap, pii->overlap);
+       return 0;
+ e_inval:
+@@ -5774,14 +5785,17 @@ static int __get_parent_info_legacy(struct rbd_device *rbd_dev,
+       pii->has_overlap = true;
+       ceph_decode_64_safe(&p, end, pii->overlap, e_inval);
++      dout("%s pool_id %llu pool_ns %s image_id %s snap_id %llu has_overlap %d overlap %llu\n",
++           __func__, pii->pool_id, pii->pool_ns, pii->image_id, pii->snap_id,
++           pii->has_overlap, pii->overlap);
+       return 0;
+ e_inval:
+       return -EINVAL;
+ }
+-static int get_parent_info(struct rbd_device *rbd_dev,
+-                         struct parent_image_info *pii)
++static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev,
++                                struct parent_image_info *pii)
+ {
+       struct page *req_page, *reply_page;
+       void *p;
+@@ -5809,7 +5823,7 @@ static int get_parent_info(struct rbd_device *rbd_dev,
+       return ret;
+ }
+-static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
++static int rbd_dev_setup_parent(struct rbd_device *rbd_dev)
+ {
+       struct rbd_spec *parent_spec;
+       struct parent_image_info pii = { 0 };
+@@ -5819,37 +5833,12 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
+       if (!parent_spec)
+               return -ENOMEM;
+-      ret = get_parent_info(rbd_dev, &pii);
++      ret = rbd_dev_v2_parent_info(rbd_dev, &pii);
+       if (ret)
+               goto out_err;
+-      dout("%s pool_id %llu pool_ns %s image_id %s snap_id %llu has_overlap %d overlap %llu\n",
+-           __func__, pii.pool_id, pii.pool_ns, pii.image_id, pii.snap_id,
+-           pii.has_overlap, pii.overlap);
+-
+-      if (pii.pool_id == CEPH_NOPOOL || !pii.has_overlap) {
+-              /*
+-               * Either the parent never existed, or we have
+-               * record of it but the image got flattened so it no
+-               * longer has a parent.  When the parent of a
+-               * layered image disappears we immediately set the
+-               * overlap to 0.  The effect of this is that all new
+-               * requests will be treated as if the image had no
+-               * parent.
+-               *
+-               * If !pii.has_overlap, the parent image spec is not
+-               * applicable.  It's there to avoid duplication in each
+-               * snapshot record.
+-               */
+-              if (rbd_dev->parent_overlap) {
+-                      rbd_dev->parent_overlap = 0;
+-                      rbd_dev_parent_put(rbd_dev);
+-                      pr_info("%s: clone image has been flattened\n",
+-                              rbd_dev->disk->disk_name);
+-              }
+-
++      if (pii.pool_id == CEPH_NOPOOL || !pii.has_overlap)
+               goto out;       /* No parent?  No problem. */
+-      }
+       /* The ceph file layout needs to fit pool id in 32 bits */
+@@ -5861,46 +5850,34 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
+       }
+       /*
+-       * The parent won't change (except when the clone is
+-       * flattened, already handled that).  So we only need to
+-       * record the parent spec we have not already done so.
++       * The parent won't change except when the clone is flattened,
++       * so we only need to record the parent image spec once.
+        */
+-      if (!rbd_dev->parent_spec) {
+-              parent_spec->pool_id = pii.pool_id;
+-              if (pii.pool_ns && *pii.pool_ns) {
+-                      parent_spec->pool_ns = pii.pool_ns;
+-                      pii.pool_ns = NULL;
+-              }
+-              parent_spec->image_id = pii.image_id;
+-              pii.image_id = NULL;
+-              parent_spec->snap_id = pii.snap_id;
+-
+-              rbd_dev->parent_spec = parent_spec;
+-              parent_spec = NULL;     /* rbd_dev now owns this */
++      parent_spec->pool_id = pii.pool_id;
++      if (pii.pool_ns && *pii.pool_ns) {
++              parent_spec->pool_ns = pii.pool_ns;
++              pii.pool_ns = NULL;
+       }
++      parent_spec->image_id = pii.image_id;
++      pii.image_id = NULL;
++      parent_spec->snap_id = pii.snap_id;
++
++      rbd_assert(!rbd_dev->parent_spec);
++      rbd_dev->parent_spec = parent_spec;
++      parent_spec = NULL;     /* rbd_dev now owns this */
+       /*
+-       * We always update the parent overlap.  If it's zero we issue
+-       * a warning, as we will proceed as if there was no parent.
++       * Record the parent overlap.  If it's zero, issue a warning as
++       * we will proceed as if there is no parent.
+        */
+-      if (!pii.overlap) {
+-              if (parent_spec) {
+-                      /* refresh, careful to warn just once */
+-                      if (rbd_dev->parent_overlap)
+-                              rbd_warn(rbd_dev,
+-                                  "clone now standalone (overlap became 0)");
+-              } else {
+-                      /* initial probe */
+-                      rbd_warn(rbd_dev, "clone is standalone (overlap 0)");
+-              }
+-      }
++      if (!pii.overlap)
++              rbd_warn(rbd_dev, "clone is standalone (overlap 0)");
+       rbd_dev->parent_overlap = pii.overlap;
+ out:
+       ret = 0;
+ out_err:
+-      kfree(pii.pool_ns);
+-      kfree(pii.image_id);
++      rbd_parent_info_cleanup(&pii);
+       rbd_spec_put(parent_spec);
+       return ret;
+ }
+@@ -7049,7 +7026,7 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
+       }
+       if (rbd_dev->header.features & RBD_FEATURE_LAYERING) {
+-              ret = rbd_dev_v2_parent_info(rbd_dev);
++              ret = rbd_dev_setup_parent(rbd_dev);
+               if (ret)
+                       goto err_out_probe;
+       }
+@@ -7098,9 +7075,47 @@ static void rbd_dev_update_header(struct rbd_device *rbd_dev,
+       }
+ }
++static void rbd_dev_update_parent(struct rbd_device *rbd_dev,
++                                struct parent_image_info *pii)
++{
++      if (pii->pool_id == CEPH_NOPOOL || !pii->has_overlap) {
++              /*
++               * Either the parent never existed, or we have
++               * record of it but the image got flattened so it no
++               * longer has a parent.  When the parent of a
++               * layered image disappears we immediately set the
++               * overlap to 0.  The effect of this is that all new
++               * requests will be treated as if the image had no
++               * parent.
++               *
++               * If !pii.has_overlap, the parent image spec is not
++               * applicable.  It's there to avoid duplication in each
++               * snapshot record.
++               */
++              if (rbd_dev->parent_overlap) {
++                      rbd_dev->parent_overlap = 0;
++                      rbd_dev_parent_put(rbd_dev);
++                      pr_info("%s: clone has been flattened\n",
++                              rbd_dev->disk->disk_name);
++              }
++      } else {
++              rbd_assert(rbd_dev->parent_spec);
++
++              /*
++               * Update the parent overlap.  If it became zero, issue
++               * a warning as we will proceed as if there is no parent.
++               */
++              if (!pii->overlap && rbd_dev->parent_overlap)
++                      rbd_warn(rbd_dev,
++                               "clone has become standalone (overlap 0)");
++              rbd_dev->parent_overlap = pii->overlap;
++      }
++}
++
+ static int rbd_dev_refresh(struct rbd_device *rbd_dev)
+ {
+       struct rbd_image_header header = { 0 };
++      struct parent_image_info pii = { 0 };
+       u64 mapping_size;
+       int ret;
+@@ -7116,12 +7131,14 @@ static int rbd_dev_refresh(struct rbd_device *rbd_dev)
+        * mapped image getting flattened.
+        */
+       if (rbd_dev->parent) {
+-              ret = rbd_dev_v2_parent_info(rbd_dev);
++              ret = rbd_dev_v2_parent_info(rbd_dev, &pii);
+               if (ret)
+                       goto out;
+       }
+       rbd_dev_update_header(rbd_dev, &header);
++      if (rbd_dev->parent)
++              rbd_dev_update_parent(rbd_dev, &pii);
+       rbd_assert(!rbd_is_snap(rbd_dev));
+       rbd_dev->mapping.size = rbd_dev->header.image_size;
+@@ -7131,6 +7148,7 @@ static int rbd_dev_refresh(struct rbd_device *rbd_dev)
+       if (!ret && mapping_size != rbd_dev->mapping.size)
+               rbd_dev_update_size(rbd_dev);
++      rbd_parent_info_cleanup(&pii);
+       rbd_image_header_cleanup(&header);
+       return ret;
+ }
+-- 
+2.40.1
+
diff --git a/queue-5.10/rbd-move-rbd_dev_refresh-definition.patch b/queue-5.10/rbd-move-rbd_dev_refresh-definition.patch
new file mode 100644 (file)
index 0000000..2969d50
--- /dev/null
@@ -0,0 +1,117 @@
+From afc66bcf32f4ef52342034ec6cedea384a127e94 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 5 Oct 2023 11:59:32 +0200
+Subject: rbd: move rbd_dev_refresh() definition
+
+From: Ilya Dryomov <idryomov@gmail.com>
+
+commit 0b035401c57021fc6c300272cbb1c5a889d4fe45 upstream.
+
+Move rbd_dev_refresh() definition further down to avoid having to
+move struct parent_image_info definition in the next commit.  This
+spares some forward declarations too.
+
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Reviewed-by: Dongsheng Yang <dongsheng.yang@easystack.cn>
+[idryomov@gmail.com: backport to 5.10-6.1: context]
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/block/rbd.c | 68 ++++++++++++++++++++++-----------------------
+ 1 file changed, 33 insertions(+), 35 deletions(-)
+
+diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
+index 95cbd5790ed60..82cf9be4badc5 100644
+--- a/drivers/block/rbd.c
++++ b/drivers/block/rbd.c
+@@ -633,8 +633,6 @@ static void rbd_dev_remove_parent(struct rbd_device *rbd_dev);
+ static int rbd_dev_refresh(struct rbd_device *rbd_dev);
+ static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev);
+-static int rbd_dev_header_info(struct rbd_device *rbd_dev);
+-static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev);
+ static const char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev,
+                                       u64 snap_id);
+ static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id,
+@@ -4989,39 +4987,6 @@ static void rbd_dev_update_size(struct rbd_device *rbd_dev)
+       }
+ }
+-static int rbd_dev_refresh(struct rbd_device *rbd_dev)
+-{
+-      u64 mapping_size;
+-      int ret;
+-
+-      down_write(&rbd_dev->header_rwsem);
+-      mapping_size = rbd_dev->mapping.size;
+-
+-      ret = rbd_dev_header_info(rbd_dev);
+-      if (ret)
+-              goto out;
+-
+-      /*
+-       * If there is a parent, see if it has disappeared due to the
+-       * mapped image getting flattened.
+-       */
+-      if (rbd_dev->parent) {
+-              ret = rbd_dev_v2_parent_info(rbd_dev);
+-              if (ret)
+-                      goto out;
+-      }
+-
+-      rbd_assert(!rbd_is_snap(rbd_dev));
+-      rbd_dev->mapping.size = rbd_dev->header.image_size;
+-
+-out:
+-      up_write(&rbd_dev->header_rwsem);
+-      if (!ret && mapping_size != rbd_dev->mapping.size)
+-              rbd_dev_update_size(rbd_dev);
+-
+-      return ret;
+-}
+-
+ static const struct blk_mq_ops rbd_mq_ops = {
+       .queue_rq       = rbd_queue_rq,
+ };
+@@ -7115,6 +7080,39 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
+       return ret;
+ }
++static int rbd_dev_refresh(struct rbd_device *rbd_dev)
++{
++      u64 mapping_size;
++      int ret;
++
++      down_write(&rbd_dev->header_rwsem);
++      mapping_size = rbd_dev->mapping.size;
++
++      ret = rbd_dev_header_info(rbd_dev);
++      if (ret)
++              goto out;
++
++      /*
++       * If there is a parent, see if it has disappeared due to the
++       * mapped image getting flattened.
++       */
++      if (rbd_dev->parent) {
++              ret = rbd_dev_v2_parent_info(rbd_dev);
++              if (ret)
++                      goto out;
++      }
++
++      rbd_assert(!rbd_is_snap(rbd_dev));
++      rbd_dev->mapping.size = rbd_dev->header.image_size;
++
++out:
++      up_write(&rbd_dev->header_rwsem);
++      if (!ret && mapping_size != rbd_dev->mapping.size)
++              rbd_dev_update_size(rbd_dev);
++
++      return ret;
++}
++
+ static ssize_t do_rbd_add(struct bus_type *bus,
+                         const char *buf,
+                         size_t count)
+-- 
+2.40.1
+
diff --git a/queue-5.10/rbd-take-header_rwsem-in-rbd_dev_refresh-only-when-u.patch b/queue-5.10/rbd-take-header_rwsem-in-rbd_dev_refresh-only-when-u.patch
new file mode 100644 (file)
index 0000000..8625969
--- /dev/null
@@ -0,0 +1,109 @@
+From 72983a5551f4b37f90f0770723d15707c85f0159 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 5 Oct 2023 11:59:35 +0200
+Subject: rbd: take header_rwsem in rbd_dev_refresh() only when updating
+
+From: Ilya Dryomov <idryomov@gmail.com>
+
+commit 0b207d02bd9ab8dcc31b262ca9f60dbc1822500d upstream.
+
+rbd_dev_refresh() has been holding header_rwsem across header and
+parent info read-in unnecessarily for ages.  With commit 870611e4877e
+("rbd: get snapshot context after exclusive lock is ensured to be
+held"), the potential for deadlocks became much more real owning to
+a) header_rwsem now nesting inside lock_rwsem and b) rw_semaphores
+not allowing new readers after a writer is registered.
+
+For example, assuming that I/O request 1, I/O request 2 and header
+read-in request all target the same OSD:
+
+1. I/O request 1 comes in and gets submitted
+2. watch error occurs
+3. rbd_watch_errcb() takes lock_rwsem for write, clears owner_cid and
+   releases lock_rwsem
+4. after reestablishing the watch, rbd_reregister_watch() calls
+   rbd_dev_refresh() which takes header_rwsem for write and submits
+   a header read-in request
+5. I/O request 2 comes in: after taking lock_rwsem for read in
+   __rbd_img_handle_request(), it blocks trying to take header_rwsem
+   for read in rbd_img_object_requests()
+6. another watch error occurs
+7. rbd_watch_errcb() blocks trying to take lock_rwsem for write
+8. I/O request 1 completion is received by the messenger but can't be
+   processed because lock_rwsem won't be granted anymore
+9. header read-in request completion can't be received, let alone
+   processed, because the messenger is stranded
+
+Change rbd_dev_refresh() to take header_rwsem only for actually
+updating rbd_dev->header.  Header and parent info read-in don't need
+any locking.
+
+Cc: stable@vger.kernel.org # 0b035401c570: rbd: move rbd_dev_refresh() definition
+Cc: stable@vger.kernel.org # 510a7330c82a: rbd: decouple header read-in from updating rbd_dev->header
+Cc: stable@vger.kernel.org # c10311776f0a: rbd: decouple parent info read-in from updating rbd_dev
+Cc: stable@vger.kernel.org
+Fixes: 870611e4877e ("rbd: get snapshot context after exclusive lock is ensured to be held")
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Reviewed-by: Dongsheng Yang <dongsheng.yang@easystack.cn>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/block/rbd.c | 22 +++++++++++-----------
+ 1 file changed, 11 insertions(+), 11 deletions(-)
+
+diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
+index 628b986351ee9..b0f7930524ba0 100644
+--- a/drivers/block/rbd.c
++++ b/drivers/block/rbd.c
+@@ -7058,7 +7058,14 @@ static void rbd_dev_update_header(struct rbd_device *rbd_dev,
+       rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
+       rbd_assert(rbd_dev->header.object_prefix); /* !first_time */
+-      rbd_dev->header.image_size = header->image_size;
++      if (rbd_dev->header.image_size != header->image_size) {
++              rbd_dev->header.image_size = header->image_size;
++
++              if (!rbd_is_snap(rbd_dev)) {
++                      rbd_dev->mapping.size = header->image_size;
++                      rbd_dev_update_size(rbd_dev);
++              }
++      }
+       ceph_put_snap_context(rbd_dev->header.snapc);
+       rbd_dev->header.snapc = header->snapc;
+@@ -7116,11 +7123,9 @@ static int rbd_dev_refresh(struct rbd_device *rbd_dev)
+ {
+       struct rbd_image_header header = { 0 };
+       struct parent_image_info pii = { 0 };
+-      u64 mapping_size;
+       int ret;
+-      down_write(&rbd_dev->header_rwsem);
+-      mapping_size = rbd_dev->mapping.size;
++      dout("%s rbd_dev %p\n", __func__, rbd_dev);
+       ret = rbd_dev_header_info(rbd_dev, &header, false);
+       if (ret)
+@@ -7136,18 +7141,13 @@ static int rbd_dev_refresh(struct rbd_device *rbd_dev)
+                       goto out;
+       }
++      down_write(&rbd_dev->header_rwsem);
+       rbd_dev_update_header(rbd_dev, &header);
+       if (rbd_dev->parent)
+               rbd_dev_update_parent(rbd_dev, &pii);
+-
+-      rbd_assert(!rbd_is_snap(rbd_dev));
+-      rbd_dev->mapping.size = rbd_dev->header.image_size;
+-
+-out:
+       up_write(&rbd_dev->header_rwsem);
+-      if (!ret && mapping_size != rbd_dev->mapping.size)
+-              rbd_dev_update_size(rbd_dev);
++out:
+       rbd_parent_info_cleanup(&pii);
+       rbd_image_header_cleanup(&header);
+       return ret;
+-- 
+2.40.1
+
index 2acb2328d13a82b1b5fb3fa5878de3d7c9337889..3005b95a89ef577731bb99038de9519fa08d20d0 100644 (file)
@@ -169,3 +169,7 @@ nfs-rename-nfs_client_kset-to-nfs_kset.patch
 nfsv4-fix-a-state-manager-thread-deadlock-regression.patch
 ring-buffer-remove-obsolete-comment-for-free_buffer_.patch
 ring-buffer-fix-bytes-info-in-per_cpu-buffer-stats.patch
+rbd-move-rbd_dev_refresh-definition.patch
+rbd-decouple-header-read-in-from-updating-rbd_dev-he.patch
+rbd-decouple-parent-info-read-in-from-updating-rbd_d.patch
+rbd-take-header_rwsem-in-rbd_dev_refresh-only-when-u.patch