--- /dev/null
+From e0c1fd288440b897d1369992207840ee23301615 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 5 Oct 2023 17:29:51 +0200
+Subject: rbd: decouple header read-in from updating rbd_dev->header
+
+From: Ilya Dryomov <idryomov@gmail.com>
+
+commit 510a7330c82a7754d5df0117a8589e8a539067c7 upstream.
+
+Make rbd_dev_header_info() populate a passed struct rbd_image_header
+instead of rbd_dev->header and introduce rbd_dev_update_header() for
+updating mutable fields in rbd_dev->header upon refresh. The initial
+read-in of both mutable and immutable fields in rbd_dev_image_probe()
+passes in rbd_dev->header so no update step is required there.
+
+rbd_init_layout() is now called directly from rbd_dev_image_probe()
+instead of individually in format 1 and format 2 implementations.
+
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Reviewed-by: Dongsheng Yang <dongsheng.yang@easystack.cn>
+[idryomov@gmail.com: backport to 5.4: _rbd_dev_v2_snap_features()
+ doesn't have read_only param]
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/block/rbd.c | 205 ++++++++++++++++++++++++--------------------
+ 1 file changed, 114 insertions(+), 91 deletions(-)
+
+diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
+index e015b8610e274..17d802effdc41 100644
+--- a/drivers/block/rbd.c
++++ b/drivers/block/rbd.c
+@@ -626,7 +626,8 @@ void rbd_warn(struct rbd_device *rbd_dev, const char *fmt, ...)
+ static void rbd_dev_remove_parent(struct rbd_device *rbd_dev);
+
+ static int rbd_dev_refresh(struct rbd_device *rbd_dev);
+-static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev);
++static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev,
++ struct rbd_image_header *header);
+ static const char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev,
+ u64 snap_id);
+ static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id,
+@@ -1096,15 +1097,24 @@ static void rbd_init_layout(struct rbd_device *rbd_dev)
+ RCU_INIT_POINTER(rbd_dev->layout.pool_ns, NULL);
+ }
+
++static void rbd_image_header_cleanup(struct rbd_image_header *header)
++{
++ kfree(header->object_prefix);
++ ceph_put_snap_context(header->snapc);
++ kfree(header->snap_sizes);
++ kfree(header->snap_names);
++
++ memset(header, 0, sizeof(*header));
++}
++
+ /*
+ * Fill an rbd image header with information from the given format 1
+ * on-disk header.
+ */
+-static int rbd_header_from_disk(struct rbd_device *rbd_dev,
+- struct rbd_image_header_ondisk *ondisk)
++static int rbd_header_from_disk(struct rbd_image_header *header,
++ struct rbd_image_header_ondisk *ondisk,
++ bool first_time)
+ {
+- struct rbd_image_header *header = &rbd_dev->header;
+- bool first_time = header->object_prefix == NULL;
+ struct ceph_snap_context *snapc;
+ char *object_prefix = NULL;
+ char *snap_names = NULL;
+@@ -1171,11 +1181,6 @@ static int rbd_header_from_disk(struct rbd_device *rbd_dev,
+ if (first_time) {
+ header->object_prefix = object_prefix;
+ header->obj_order = ondisk->options.order;
+- rbd_init_layout(rbd_dev);
+- } else {
+- ceph_put_snap_context(header->snapc);
+- kfree(header->snap_names);
+- kfree(header->snap_sizes);
+ }
+
+ /* The remaining fields always get updated (when we refresh) */
+@@ -4981,7 +4986,9 @@ static int rbd_obj_read_sync(struct rbd_device *rbd_dev,
+ * return, the rbd_dev->header field will contain up-to-date
+ * information about the image.
+ */
+-static int rbd_dev_v1_header_info(struct rbd_device *rbd_dev)
++static int rbd_dev_v1_header_info(struct rbd_device *rbd_dev,
++ struct rbd_image_header *header,
++ bool first_time)
+ {
+ struct rbd_image_header_ondisk *ondisk = NULL;
+ u32 snap_count = 0;
+@@ -5029,7 +5036,7 @@ static int rbd_dev_v1_header_info(struct rbd_device *rbd_dev)
+ snap_count = le32_to_cpu(ondisk->snap_count);
+ } while (snap_count != want_count);
+
+- ret = rbd_header_from_disk(rbd_dev, ondisk);
++ ret = rbd_header_from_disk(header, ondisk, first_time);
+ out:
+ kfree(ondisk);
+
+@@ -5642,17 +5649,12 @@ static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id,
+ return 0;
+ }
+
+-static int rbd_dev_v2_image_size(struct rbd_device *rbd_dev)
+-{
+- return _rbd_dev_v2_snap_size(rbd_dev, CEPH_NOSNAP,
+- &rbd_dev->header.obj_order,
+- &rbd_dev->header.image_size);
+-}
+-
+-static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev)
++static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev,
++ char **pobject_prefix)
+ {
+ size_t size;
+ void *reply_buf;
++ char *object_prefix;
+ int ret;
+ void *p;
+
+@@ -5670,16 +5672,16 @@ static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev)
+ goto out;
+
+ p = reply_buf;
+- rbd_dev->header.object_prefix = ceph_extract_encoded_string(&p,
+- p + ret, NULL, GFP_NOIO);
++ object_prefix = ceph_extract_encoded_string(&p, p + ret, NULL,
++ GFP_NOIO);
++ if (IS_ERR(object_prefix)) {
++ ret = PTR_ERR(object_prefix);
++ goto out;
++ }
+ ret = 0;
+
+- if (IS_ERR(rbd_dev->header.object_prefix)) {
+- ret = PTR_ERR(rbd_dev->header.object_prefix);
+- rbd_dev->header.object_prefix = NULL;
+- } else {
+- dout(" object_prefix = %s\n", rbd_dev->header.object_prefix);
+- }
++ *pobject_prefix = object_prefix;
++ dout(" object_prefix = %s\n", object_prefix);
+ out:
+ kfree(reply_buf);
+
+@@ -5724,12 +5726,6 @@ static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id,
+ return 0;
+ }
+
+-static int rbd_dev_v2_features(struct rbd_device *rbd_dev)
+-{
+- return _rbd_dev_v2_snap_features(rbd_dev, CEPH_NOSNAP,
+- &rbd_dev->header.features);
+-}
+-
+ /*
+ * These are generic image flags, but since they are used only for
+ * object map, store them in rbd_dev->object_map_flags.
+@@ -6004,14 +6000,14 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
+ return ret;
+ }
+
+-static int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev)
++static int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev,
++ u64 *stripe_unit, u64 *stripe_count)
+ {
+ struct {
+ __le64 stripe_unit;
+ __le64 stripe_count;
+ } __attribute__ ((packed)) striping_info_buf = { 0 };
+ size_t size = sizeof (striping_info_buf);
+- void *p;
+ int ret;
+
+ ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
+@@ -6023,27 +6019,33 @@ static int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev)
+ if (ret < size)
+ return -ERANGE;
+
+- p = &striping_info_buf;
+- rbd_dev->header.stripe_unit = ceph_decode_64(&p);
+- rbd_dev->header.stripe_count = ceph_decode_64(&p);
++ *stripe_unit = le64_to_cpu(striping_info_buf.stripe_unit);
++ *stripe_count = le64_to_cpu(striping_info_buf.stripe_count);
++ dout(" stripe_unit = %llu stripe_count = %llu\n", *stripe_unit,
++ *stripe_count);
++
+ return 0;
+ }
+
+-static int rbd_dev_v2_data_pool(struct rbd_device *rbd_dev)
++static int rbd_dev_v2_data_pool(struct rbd_device *rbd_dev, s64 *data_pool_id)
+ {
+- __le64 data_pool_id;
++ __le64 data_pool_buf;
+ int ret;
+
+ ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
+ &rbd_dev->header_oloc, "get_data_pool",
+- NULL, 0, &data_pool_id, sizeof(data_pool_id));
++ NULL, 0, &data_pool_buf,
++ sizeof(data_pool_buf));
++ dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
+ if (ret < 0)
+ return ret;
+- if (ret < sizeof(data_pool_id))
++ if (ret < sizeof(data_pool_buf))
+ return -EBADMSG;
+
+- rbd_dev->header.data_pool_id = le64_to_cpu(data_pool_id);
+- WARN_ON(rbd_dev->header.data_pool_id == CEPH_NOPOOL);
++ *data_pool_id = le64_to_cpu(data_pool_buf);
++ dout(" data_pool_id = %lld\n", *data_pool_id);
++ WARN_ON(*data_pool_id == CEPH_NOPOOL);
++
+ return 0;
+ }
+
+@@ -6235,7 +6237,8 @@ static int rbd_spec_fill_names(struct rbd_device *rbd_dev)
+ return ret;
+ }
+
+-static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev)
++static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev,
++ struct ceph_snap_context **psnapc)
+ {
+ size_t size;
+ int ret;
+@@ -6296,9 +6299,7 @@ static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev)
+ for (i = 0; i < snap_count; i++)
+ snapc->snaps[i] = ceph_decode_64(&p);
+
+- ceph_put_snap_context(rbd_dev->header.snapc);
+- rbd_dev->header.snapc = snapc;
+-
++ *psnapc = snapc;
+ dout(" snap context seq = %llu, snap_count = %u\n",
+ (unsigned long long)seq, (unsigned int)snap_count);
+ out:
+@@ -6347,38 +6348,42 @@ static const char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev,
+ return snap_name;
+ }
+
+-static int rbd_dev_v2_header_info(struct rbd_device *rbd_dev)
++static int rbd_dev_v2_header_info(struct rbd_device *rbd_dev,
++ struct rbd_image_header *header,
++ bool first_time)
+ {
+- bool first_time = rbd_dev->header.object_prefix == NULL;
+ int ret;
+
+- ret = rbd_dev_v2_image_size(rbd_dev);
++ ret = _rbd_dev_v2_snap_size(rbd_dev, CEPH_NOSNAP,
++ first_time ? &header->obj_order : NULL,
++ &header->image_size);
+ if (ret)
+ return ret;
+
+ if (first_time) {
+- ret = rbd_dev_v2_header_onetime(rbd_dev);
++ ret = rbd_dev_v2_header_onetime(rbd_dev, header);
+ if (ret)
+ return ret;
+ }
+
+- ret = rbd_dev_v2_snap_context(rbd_dev);
+- if (ret && first_time) {
+- kfree(rbd_dev->header.object_prefix);
+- rbd_dev->header.object_prefix = NULL;
+- }
++ ret = rbd_dev_v2_snap_context(rbd_dev, &header->snapc);
++ if (ret)
++ return ret;
+
+- return ret;
++ return 0;
+ }
+
+-static int rbd_dev_header_info(struct rbd_device *rbd_dev)
++static int rbd_dev_header_info(struct rbd_device *rbd_dev,
++ struct rbd_image_header *header,
++ bool first_time)
+ {
+ rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
++ rbd_assert(!header->object_prefix && !header->snapc);
+
+ if (rbd_dev->image_format == 1)
+- return rbd_dev_v1_header_info(rbd_dev);
++ return rbd_dev_v1_header_info(rbd_dev, header, first_time);
+
+- return rbd_dev_v2_header_info(rbd_dev);
++ return rbd_dev_v2_header_info(rbd_dev, header, first_time);
+ }
+
+ /*
+@@ -6728,60 +6733,49 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev)
+ */
+ static void rbd_dev_unprobe(struct rbd_device *rbd_dev)
+ {
+- struct rbd_image_header *header;
+-
+ rbd_dev_parent_put(rbd_dev);
+ rbd_object_map_free(rbd_dev);
+ rbd_dev_mapping_clear(rbd_dev);
+
+ /* Free dynamic fields from the header, then zero it out */
+
+- header = &rbd_dev->header;
+- ceph_put_snap_context(header->snapc);
+- kfree(header->snap_sizes);
+- kfree(header->snap_names);
+- kfree(header->object_prefix);
+- memset(header, 0, sizeof (*header));
++ rbd_image_header_cleanup(&rbd_dev->header);
+ }
+
+-static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev)
++static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev,
++ struct rbd_image_header *header)
+ {
+ int ret;
+
+- ret = rbd_dev_v2_object_prefix(rbd_dev);
++ ret = rbd_dev_v2_object_prefix(rbd_dev, &header->object_prefix);
+ if (ret)
+- goto out_err;
++ return ret;
+
+ /*
+ * Get the and check features for the image. Currently the
+ * features are assumed to never change.
+ */
+- ret = rbd_dev_v2_features(rbd_dev);
++ ret = _rbd_dev_v2_snap_features(rbd_dev, CEPH_NOSNAP,
++ &header->features);
+ if (ret)
+- goto out_err;
++ return ret;
+
+ /* If the image supports fancy striping, get its parameters */
+
+- if (rbd_dev->header.features & RBD_FEATURE_STRIPINGV2) {
+- ret = rbd_dev_v2_striping_info(rbd_dev);
+- if (ret < 0)
+- goto out_err;
++ if (header->features & RBD_FEATURE_STRIPINGV2) {
++ ret = rbd_dev_v2_striping_info(rbd_dev, &header->stripe_unit,
++ &header->stripe_count);
++ if (ret)
++ return ret;
+ }
+
+- if (rbd_dev->header.features & RBD_FEATURE_DATA_POOL) {
+- ret = rbd_dev_v2_data_pool(rbd_dev);
++ if (header->features & RBD_FEATURE_DATA_POOL) {
++ ret = rbd_dev_v2_data_pool(rbd_dev, &header->data_pool_id);
+ if (ret)
+- goto out_err;
++ return ret;
+ }
+
+- rbd_init_layout(rbd_dev);
+ return 0;
+-
+-out_err:
+- rbd_dev->header.features = 0;
+- kfree(rbd_dev->header.object_prefix);
+- rbd_dev->header.object_prefix = NULL;
+- return ret;
+ }
+
+ /*
+@@ -6959,10 +6953,12 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
+ if (!depth)
+ down_write(&rbd_dev->header_rwsem);
+
+- ret = rbd_dev_header_info(rbd_dev);
++ ret = rbd_dev_header_info(rbd_dev, &rbd_dev->header, true);
+ if (ret)
+ goto err_out_probe;
+
++ rbd_init_layout(rbd_dev);
++
+ /*
+ * If this image is the one being mapped, we have pool name and
+ * id, image name and id, and snap name - need to fill snap id.
+@@ -7022,15 +7018,39 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
+ return ret;
+ }
+
++static void rbd_dev_update_header(struct rbd_device *rbd_dev,
++ struct rbd_image_header *header)
++{
++ rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
++ rbd_assert(rbd_dev->header.object_prefix); /* !first_time */
++
++ rbd_dev->header.image_size = header->image_size;
++
++ ceph_put_snap_context(rbd_dev->header.snapc);
++ rbd_dev->header.snapc = header->snapc;
++ header->snapc = NULL;
++
++ if (rbd_dev->image_format == 1) {
++ kfree(rbd_dev->header.snap_names);
++ rbd_dev->header.snap_names = header->snap_names;
++ header->snap_names = NULL;
++
++ kfree(rbd_dev->header.snap_sizes);
++ rbd_dev->header.snap_sizes = header->snap_sizes;
++ header->snap_sizes = NULL;
++ }
++}
++
+ static int rbd_dev_refresh(struct rbd_device *rbd_dev)
+ {
++ struct rbd_image_header header = { 0 };
+ u64 mapping_size;
+ int ret;
+
+ down_write(&rbd_dev->header_rwsem);
+ mapping_size = rbd_dev->mapping.size;
+
+- ret = rbd_dev_header_info(rbd_dev);
++ ret = rbd_dev_header_info(rbd_dev, &header, false);
+ if (ret)
+ goto out;
+
+@@ -7044,6 +7064,8 @@ static int rbd_dev_refresh(struct rbd_device *rbd_dev)
+ goto out;
+ }
+
++ rbd_dev_update_header(rbd_dev, &header);
++
+ if (rbd_dev->spec->snap_id == CEPH_NOSNAP) {
+ rbd_dev->mapping.size = rbd_dev->header.image_size;
+ } else {
+@@ -7056,6 +7078,7 @@ static int rbd_dev_refresh(struct rbd_device *rbd_dev)
+ if (!ret && mapping_size != rbd_dev->mapping.size)
+ rbd_dev_update_size(rbd_dev);
+
++ rbd_image_header_cleanup(&header);
+ return ret;
+ }
+
+--
+2.40.1
+
--- /dev/null
+From 70d7c2a0cbec4bbbf34ab652384e8a91cd734d27 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 5 Oct 2023 17:29:52 +0200
+Subject: rbd: decouple parent info read-in from updating rbd_dev
+
+From: Ilya Dryomov <idryomov@gmail.com>
+
+commit c10311776f0a8ddea2276df96e255625b07045a8 upstream.
+
+Unlike header read-in, parent info read-in is already decoupled in
+get_parent_info(), but it's buried in rbd_dev_v2_parent_info() along
+with the processing logic.
+
+Separate the initial read-in and update read-in logic into
+rbd_dev_setup_parent() and rbd_dev_update_parent() respectively and
+have rbd_dev_v2_parent_info() just populate struct parent_image_info
+(i.e. what get_parent_info() did). Some existing QoI issues, like
+flatten of a standalone clone being disregarded on refresh, remain.
+
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Reviewed-by: Dongsheng Yang <dongsheng.yang@easystack.cn>
+[idryomov@gmail.com: backport to 5.4: context]
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/block/rbd.c | 142 +++++++++++++++++++++++++-------------------
+ 1 file changed, 80 insertions(+), 62 deletions(-)
+
+diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
+index 17d802effdc41..e86dca49fae71 100644
+--- a/drivers/block/rbd.c
++++ b/drivers/block/rbd.c
+@@ -5762,6 +5762,14 @@ struct parent_image_info {
+ u64 overlap;
+ };
+
++static void rbd_parent_info_cleanup(struct parent_image_info *pii)
++{
++ kfree(pii->pool_ns);
++ kfree(pii->image_id);
++
++ memset(pii, 0, sizeof(*pii));
++}
++
+ /*
+ * The caller is responsible for @pii.
+ */
+@@ -5831,6 +5839,9 @@ static int __get_parent_info(struct rbd_device *rbd_dev,
+ if (pii->has_overlap)
+ ceph_decode_64_safe(&p, end, pii->overlap, e_inval);
+
++ dout("%s pool_id %llu pool_ns %s image_id %s snap_id %llu has_overlap %d overlap %llu\n",
++ __func__, pii->pool_id, pii->pool_ns, pii->image_id, pii->snap_id,
++ pii->has_overlap, pii->overlap);
+ return 0;
+
+ e_inval:
+@@ -5869,14 +5880,17 @@ static int __get_parent_info_legacy(struct rbd_device *rbd_dev,
+ pii->has_overlap = true;
+ ceph_decode_64_safe(&p, end, pii->overlap, e_inval);
+
++ dout("%s pool_id %llu pool_ns %s image_id %s snap_id %llu has_overlap %d overlap %llu\n",
++ __func__, pii->pool_id, pii->pool_ns, pii->image_id, pii->snap_id,
++ pii->has_overlap, pii->overlap);
+ return 0;
+
+ e_inval:
+ return -EINVAL;
+ }
+
+-static int get_parent_info(struct rbd_device *rbd_dev,
+- struct parent_image_info *pii)
++static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev,
++ struct parent_image_info *pii)
+ {
+ struct page *req_page, *reply_page;
+ void *p;
+@@ -5904,7 +5918,7 @@ static int get_parent_info(struct rbd_device *rbd_dev,
+ return ret;
+ }
+
+-static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
++static int rbd_dev_setup_parent(struct rbd_device *rbd_dev)
+ {
+ struct rbd_spec *parent_spec;
+ struct parent_image_info pii = { 0 };
+@@ -5914,37 +5928,12 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
+ if (!parent_spec)
+ return -ENOMEM;
+
+- ret = get_parent_info(rbd_dev, &pii);
++ ret = rbd_dev_v2_parent_info(rbd_dev, &pii);
+ if (ret)
+ goto out_err;
+
+- dout("%s pool_id %llu pool_ns %s image_id %s snap_id %llu has_overlap %d overlap %llu\n",
+- __func__, pii.pool_id, pii.pool_ns, pii.image_id, pii.snap_id,
+- pii.has_overlap, pii.overlap);
+-
+- if (pii.pool_id == CEPH_NOPOOL || !pii.has_overlap) {
+- /*
+- * Either the parent never existed, or we have
+- * record of it but the image got flattened so it no
+- * longer has a parent. When the parent of a
+- * layered image disappears we immediately set the
+- * overlap to 0. The effect of this is that all new
+- * requests will be treated as if the image had no
+- * parent.
+- *
+- * If !pii.has_overlap, the parent image spec is not
+- * applicable. It's there to avoid duplication in each
+- * snapshot record.
+- */
+- if (rbd_dev->parent_overlap) {
+- rbd_dev->parent_overlap = 0;
+- rbd_dev_parent_put(rbd_dev);
+- pr_info("%s: clone image has been flattened\n",
+- rbd_dev->disk->disk_name);
+- }
+-
++ if (pii.pool_id == CEPH_NOPOOL || !pii.has_overlap)
+ goto out; /* No parent? No problem. */
+- }
+
+ /* The ceph file layout needs to fit pool id in 32 bits */
+
+@@ -5956,46 +5945,34 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
+ }
+
+ /*
+- * The parent won't change (except when the clone is
+- * flattened, already handled that). So we only need to
+- * record the parent spec we have not already done so.
++ * The parent won't change except when the clone is flattened,
++ * so we only need to record the parent image spec once.
+ */
+- if (!rbd_dev->parent_spec) {
+- parent_spec->pool_id = pii.pool_id;
+- if (pii.pool_ns && *pii.pool_ns) {
+- parent_spec->pool_ns = pii.pool_ns;
+- pii.pool_ns = NULL;
+- }
+- parent_spec->image_id = pii.image_id;
+- pii.image_id = NULL;
+- parent_spec->snap_id = pii.snap_id;
+-
+- rbd_dev->parent_spec = parent_spec;
+- parent_spec = NULL; /* rbd_dev now owns this */
++ parent_spec->pool_id = pii.pool_id;
++ if (pii.pool_ns && *pii.pool_ns) {
++ parent_spec->pool_ns = pii.pool_ns;
++ pii.pool_ns = NULL;
+ }
++ parent_spec->image_id = pii.image_id;
++ pii.image_id = NULL;
++ parent_spec->snap_id = pii.snap_id;
++
++ rbd_assert(!rbd_dev->parent_spec);
++ rbd_dev->parent_spec = parent_spec;
++ parent_spec = NULL; /* rbd_dev now owns this */
+
+ /*
+- * We always update the parent overlap. If it's zero we issue
+- * a warning, as we will proceed as if there was no parent.
++ * Record the parent overlap. If it's zero, issue a warning as
++ * we will proceed as if there is no parent.
+ */
+- if (!pii.overlap) {
+- if (parent_spec) {
+- /* refresh, careful to warn just once */
+- if (rbd_dev->parent_overlap)
+- rbd_warn(rbd_dev,
+- "clone now standalone (overlap became 0)");
+- } else {
+- /* initial probe */
+- rbd_warn(rbd_dev, "clone is standalone (overlap 0)");
+- }
+- }
++ if (!pii.overlap)
++ rbd_warn(rbd_dev, "clone is standalone (overlap 0)");
+ rbd_dev->parent_overlap = pii.overlap;
+
+ out:
+ ret = 0;
+ out_err:
+- kfree(pii.pool_ns);
+- kfree(pii.image_id);
++ rbd_parent_info_cleanup(&pii);
+ rbd_spec_put(parent_spec);
+ return ret;
+ }
+@@ -6992,7 +6969,7 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
+ }
+
+ if (rbd_dev->header.features & RBD_FEATURE_LAYERING) {
+- ret = rbd_dev_v2_parent_info(rbd_dev);
++ ret = rbd_dev_setup_parent(rbd_dev);
+ if (ret)
+ goto err_out_probe;
+ }
+@@ -7041,9 +7018,47 @@ static void rbd_dev_update_header(struct rbd_device *rbd_dev,
+ }
+ }
+
++static void rbd_dev_update_parent(struct rbd_device *rbd_dev,
++ struct parent_image_info *pii)
++{
++ if (pii->pool_id == CEPH_NOPOOL || !pii->has_overlap) {
++ /*
++ * Either the parent never existed, or we have
++ * record of it but the image got flattened so it no
++ * longer has a parent. When the parent of a
++ * layered image disappears we immediately set the
++ * overlap to 0. The effect of this is that all new
++ * requests will be treated as if the image had no
++ * parent.
++ *
++ * If !pii.has_overlap, the parent image spec is not
++ * applicable. It's there to avoid duplication in each
++ * snapshot record.
++ */
++ if (rbd_dev->parent_overlap) {
++ rbd_dev->parent_overlap = 0;
++ rbd_dev_parent_put(rbd_dev);
++ pr_info("%s: clone has been flattened\n",
++ rbd_dev->disk->disk_name);
++ }
++ } else {
++ rbd_assert(rbd_dev->parent_spec);
++
++ /*
++ * Update the parent overlap. If it became zero, issue
++ * a warning as we will proceed as if there is no parent.
++ */
++ if (!pii->overlap && rbd_dev->parent_overlap)
++ rbd_warn(rbd_dev,
++ "clone has become standalone (overlap 0)");
++ rbd_dev->parent_overlap = pii->overlap;
++ }
++}
++
+ static int rbd_dev_refresh(struct rbd_device *rbd_dev)
+ {
+ struct rbd_image_header header = { 0 };
++ struct parent_image_info pii = { 0 };
+ u64 mapping_size;
+ int ret;
+
+@@ -7059,12 +7074,14 @@ static int rbd_dev_refresh(struct rbd_device *rbd_dev)
+ * mapped image getting flattened.
+ */
+ if (rbd_dev->parent) {
+- ret = rbd_dev_v2_parent_info(rbd_dev);
++ ret = rbd_dev_v2_parent_info(rbd_dev, &pii);
+ if (ret)
+ goto out;
+ }
+
+ rbd_dev_update_header(rbd_dev, &header);
++ if (rbd_dev->parent)
++ rbd_dev_update_parent(rbd_dev, &pii);
+
+ if (rbd_dev->spec->snap_id == CEPH_NOSNAP) {
+ rbd_dev->mapping.size = rbd_dev->header.image_size;
+@@ -7078,6 +7095,7 @@ static int rbd_dev_refresh(struct rbd_device *rbd_dev)
+ if (!ret && mapping_size != rbd_dev->mapping.size)
+ rbd_dev_update_size(rbd_dev);
+
++ rbd_parent_info_cleanup(&pii);
+ rbd_image_header_cleanup(&header);
+ return ret;
+ }
+--
+2.40.1
+
--- /dev/null
+From 6ccd4888acd05ada5ccec29549a9239e24a9c60a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 5 Oct 2023 17:29:50 +0200
+Subject: rbd: move rbd_dev_refresh() definition
+
+From: Ilya Dryomov <idryomov@gmail.com>
+
+commit 0b035401c57021fc6c300272cbb1c5a889d4fe45 upstream.
+
+Move rbd_dev_refresh() definition further down to avoid having to
+move struct parent_image_info definition in the next commit. This
+spares some forward declarations too.
+
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Reviewed-by: Dongsheng Yang <dongsheng.yang@easystack.cn>
+[idryomov@gmail.com: backport to 5.4: drop rbd_is_snap() assert,
+ preserve rbd_exists_validate() call]
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/block/rbd.c | 76 ++++++++++++++++++++++-----------------------
+ 1 file changed, 37 insertions(+), 39 deletions(-)
+
+diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
+index 9d21f90f93f06..e015b8610e274 100644
+--- a/drivers/block/rbd.c
++++ b/drivers/block/rbd.c
+@@ -627,8 +627,6 @@ static void rbd_dev_remove_parent(struct rbd_device *rbd_dev);
+
+ static int rbd_dev_refresh(struct rbd_device *rbd_dev);
+ static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev);
+-static int rbd_dev_header_info(struct rbd_device *rbd_dev);
+-static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev);
+ static const char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev,
+ u64 snap_id);
+ static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id,
+@@ -5075,43 +5073,6 @@ static void rbd_dev_update_size(struct rbd_device *rbd_dev)
+ }
+ }
+
+-static int rbd_dev_refresh(struct rbd_device *rbd_dev)
+-{
+- u64 mapping_size;
+- int ret;
+-
+- down_write(&rbd_dev->header_rwsem);
+- mapping_size = rbd_dev->mapping.size;
+-
+- ret = rbd_dev_header_info(rbd_dev);
+- if (ret)
+- goto out;
+-
+- /*
+- * If there is a parent, see if it has disappeared due to the
+- * mapped image getting flattened.
+- */
+- if (rbd_dev->parent) {
+- ret = rbd_dev_v2_parent_info(rbd_dev);
+- if (ret)
+- goto out;
+- }
+-
+- if (rbd_dev->spec->snap_id == CEPH_NOSNAP) {
+- rbd_dev->mapping.size = rbd_dev->header.image_size;
+- } else {
+- /* validate mapped snapshot's EXISTS flag */
+- rbd_exists_validate(rbd_dev);
+- }
+-
+-out:
+- up_write(&rbd_dev->header_rwsem);
+- if (!ret && mapping_size != rbd_dev->mapping.size)
+- rbd_dev_update_size(rbd_dev);
+-
+- return ret;
+-}
+-
+ static int rbd_init_request(struct blk_mq_tag_set *set, struct request *rq,
+ unsigned int hctx_idx, unsigned int numa_node)
+ {
+@@ -7061,6 +7022,43 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
+ return ret;
+ }
+
++static int rbd_dev_refresh(struct rbd_device *rbd_dev)
++{
++ u64 mapping_size;
++ int ret;
++
++ down_write(&rbd_dev->header_rwsem);
++ mapping_size = rbd_dev->mapping.size;
++
++ ret = rbd_dev_header_info(rbd_dev);
++ if (ret)
++ goto out;
++
++ /*
++ * If there is a parent, see if it has disappeared due to the
++ * mapped image getting flattened.
++ */
++ if (rbd_dev->parent) {
++ ret = rbd_dev_v2_parent_info(rbd_dev);
++ if (ret)
++ goto out;
++ }
++
++ if (rbd_dev->spec->snap_id == CEPH_NOSNAP) {
++ rbd_dev->mapping.size = rbd_dev->header.image_size;
++ } else {
++ /* validate mapped snapshot's EXISTS flag */
++ rbd_exists_validate(rbd_dev);
++ }
++
++out:
++ up_write(&rbd_dev->header_rwsem);
++ if (!ret && mapping_size != rbd_dev->mapping.size)
++ rbd_dev_update_size(rbd_dev);
++
++ return ret;
++}
++
+ static ssize_t do_rbd_add(struct bus_type *bus,
+ const char *buf,
+ size_t count)
+--
+2.40.1
+
--- /dev/null
+From a597f637762d6ad7d51410260ff5db317064a551 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 5 Oct 2023 17:29:53 +0200
+Subject: rbd: take header_rwsem in rbd_dev_refresh() only when updating
+
+From: Ilya Dryomov <idryomov@gmail.com>
+
+commit 0b207d02bd9ab8dcc31b262ca9f60dbc1822500d upstream.
+
+rbd_dev_refresh() has been holding header_rwsem across header and
+parent info read-in unnecessarily for ages. With commit 870611e4877e
+("rbd: get snapshot context after exclusive lock is ensured to be
+held"), the potential for deadlocks became much more real owning to
+a) header_rwsem now nesting inside lock_rwsem and b) rw_semaphores
+not allowing new readers after a writer is registered.
+
+For example, assuming that I/O request 1, I/O request 2 and header
+read-in request all target the same OSD:
+
+1. I/O request 1 comes in and gets submitted
+2. watch error occurs
+3. rbd_watch_errcb() takes lock_rwsem for write, clears owner_cid and
+ releases lock_rwsem
+4. after reestablishing the watch, rbd_reregister_watch() calls
+ rbd_dev_refresh() which takes header_rwsem for write and submits
+ a header read-in request
+5. I/O request 2 comes in: after taking lock_rwsem for read in
+ __rbd_img_handle_request(), it blocks trying to take header_rwsem
+ for read in rbd_img_object_requests()
+6. another watch error occurs
+7. rbd_watch_errcb() blocks trying to take lock_rwsem for write
+8. I/O request 1 completion is received by the messenger but can't be
+ processed because lock_rwsem won't be granted anymore
+9. header read-in request completion can't be received, let alone
+ processed, because the messenger is stranded
+
+Change rbd_dev_refresh() to take header_rwsem only for actually
+updating rbd_dev->header. Header and parent info read-in don't need
+any locking.
+
+Cc: stable@vger.kernel.org # 0b035401c570: rbd: move rbd_dev_refresh() definition
+Cc: stable@vger.kernel.org # 510a7330c82a: rbd: decouple header read-in from updating rbd_dev->header
+Cc: stable@vger.kernel.org # c10311776f0a: rbd: decouple parent info read-in from updating rbd_dev
+Cc: stable@vger.kernel.org
+Fixes: 870611e4877e ("rbd: get snapshot context after exclusive lock is ensured to be held")
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Reviewed-by: Dongsheng Yang <dongsheng.yang@easystack.cn>
+[idryomov@gmail.com: backport to 5.4: open-code rbd_is_snap(), preserve
+ rbd_exists_validate() call]
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/block/rbd.c | 31 ++++++++++++++++---------------
+ 1 file changed, 16 insertions(+), 15 deletions(-)
+
+diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
+index e86dca49fae71..7117fa4902435 100644
+--- a/drivers/block/rbd.c
++++ b/drivers/block/rbd.c
+@@ -7001,7 +7001,19 @@ static void rbd_dev_update_header(struct rbd_device *rbd_dev,
+ rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
+ rbd_assert(rbd_dev->header.object_prefix); /* !first_time */
+
+- rbd_dev->header.image_size = header->image_size;
++ if (rbd_dev->header.image_size != header->image_size) {
++ rbd_dev->header.image_size = header->image_size;
++
++ if (rbd_dev->spec->snap_id == CEPH_NOSNAP) {
++ rbd_dev->mapping.size = header->image_size;
++ rbd_dev_update_size(rbd_dev);
++ }
++ }
++
++ if (rbd_dev->spec->snap_id != CEPH_NOSNAP) {
++ /* validate mapped snapshot's EXISTS flag */
++ rbd_exists_validate(rbd_dev);
++ }
+
+ ceph_put_snap_context(rbd_dev->header.snapc);
+ rbd_dev->header.snapc = header->snapc;
+@@ -7059,11 +7071,9 @@ static int rbd_dev_refresh(struct rbd_device *rbd_dev)
+ {
+ struct rbd_image_header header = { 0 };
+ struct parent_image_info pii = { 0 };
+- u64 mapping_size;
+ int ret;
+
+- down_write(&rbd_dev->header_rwsem);
+- mapping_size = rbd_dev->mapping.size;
++ dout("%s rbd_dev %p\n", __func__, rbd_dev);
+
+ ret = rbd_dev_header_info(rbd_dev, &header, false);
+ if (ret)
+@@ -7079,22 +7089,13 @@ static int rbd_dev_refresh(struct rbd_device *rbd_dev)
+ goto out;
+ }
+
++ down_write(&rbd_dev->header_rwsem);
+ rbd_dev_update_header(rbd_dev, &header);
+ if (rbd_dev->parent)
+ rbd_dev_update_parent(rbd_dev, &pii);
+-
+- if (rbd_dev->spec->snap_id == CEPH_NOSNAP) {
+- rbd_dev->mapping.size = rbd_dev->header.image_size;
+- } else {
+- /* validate mapped snapshot's EXISTS flag */
+- rbd_exists_validate(rbd_dev);
+- }
+-
+-out:
+ up_write(&rbd_dev->header_rwsem);
+- if (!ret && mapping_size != rbd_dev->mapping.size)
+- rbd_dev_update_size(rbd_dev);
+
++out:
+ rbd_parent_info_cleanup(&pii);
+ rbd_image_header_cleanup(&header);
+ return ret;
+--
+2.40.1
+
ata-libata-core-do-not-register-pm-operations-for-sas-ports.patch
ata-libata-sata-increase-pmp-srst-timeout-to-10s.patch
fs-binfmt_elf_efpic-fix-personality-for-elf-fdpic.patch
+rbd-move-rbd_dev_refresh-definition.patch
+rbd-decouple-header-read-in-from-updating-rbd_dev-he.patch
+rbd-decouple-parent-info-read-in-from-updating-rbd_d.patch
+rbd-take-header_rwsem-in-rbd_dev_refresh-only-when-u.patch