From: Sasha Levin <sashal@kernel.org>
Date: Thu, 30 Mar 2023 11:12:55 +0000 (-0400)
Subject: Fixes for 6.1
X-Git-Tag: v4.14.312~66
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=bd41d40ee98c9e0c20d3fcc4dde3741e7b61d7c9;p=thirdparty%2Fkernel%2Fstable-queue.git

Fixes for 6.1

Signed-off-by: Sasha Levin <sashal@kernel.org>
---

diff --git a/queue-6.1/arm-dts-aspeed-p10bmc-update-battery-node-name.patch b/queue-6.1/arm-dts-aspeed-p10bmc-update-battery-node-name.patch
new file mode 100644
index 00000000000..6768a687b7c
--- /dev/null
+++ b/queue-6.1/arm-dts-aspeed-p10bmc-update-battery-node-name.patch
@@ -0,0 +1,53 @@
+From 158a8bc8a5cfcc2c4a8feaea3ee77ddeda799d53 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 21 Feb 2023 11:03:52 +1030
+Subject: ARM: dts: aspeed: p10bmc: Update battery node name
+
+From: Eddie James <eajames@linux.ibm.com>
+
+[ Upstream commit a8cef541dd5ef9445130660008c029205c4c5aa5 ]
+
+The ADC sensor for the battery needs to be named "iio-hwmon" for
+compatibility with user space applications.
+
+Signed-off-by: Eddie James <eajames@linux.ibm.com>
+Link: https://lore.kernel.org/r/20230202152759.67069-1-eajames@linux.ibm.com
+Fixes: bf1914e2cfed ("ARM: dts: aspeed: p10bmc: Fix ADC iio-hwmon battery node name")
+Signed-off-by: Joel Stanley <joel@jms.id.au>
+Link: https://lore.kernel.org/r/20230221003352.1218797-1-joel@jms.id.au
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm/boot/dts/aspeed-bmc-ibm-everest.dts | 2 +-
+ arch/arm/boot/dts/aspeed-bmc-ibm-rainier.dts | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/arch/arm/boot/dts/aspeed-bmc-ibm-everest.dts b/arch/arm/boot/dts/aspeed-bmc-ibm-everest.dts
+index fcc890e3ad735..f11feb98fde33 100644
+--- a/arch/arm/boot/dts/aspeed-bmc-ibm-everest.dts
++++ b/arch/arm/boot/dts/aspeed-bmc-ibm-everest.dts
+@@ -244,7 +244,7 @@
+ 		};
+ 	};
+ 
+-	iio-hwmon-battery {
++	iio-hwmon {
+ 		compatible = "iio-hwmon";
+ 		io-channels = <&adc1 7>;
+ 	};
+diff --git a/arch/arm/boot/dts/aspeed-bmc-ibm-rainier.dts b/arch/arm/boot/dts/aspeed-bmc-ibm-rainier.dts
+index 4879da4cdbd25..77a3a27b04e26 100644
+--- a/arch/arm/boot/dts/aspeed-bmc-ibm-rainier.dts
++++ b/arch/arm/boot/dts/aspeed-bmc-ibm-rainier.dts
+@@ -220,7 +220,7 @@
+ 		};
+ 	};
+ 
+-	iio-hwmon-battery {
++	iio-hwmon {
+ 		compatible = "iio-hwmon";
+ 		io-channels = <&adc1 7>;
+ 	};
+-- 
+2.39.2
+
diff --git a/queue-6.1/arm64-efi-set-nx-compat-flag-in-pe-coff-header.patch b/queue-6.1/arm64-efi-set-nx-compat-flag-in-pe-coff-header.patch
new file mode 100644
index 00000000000..7562e0d1a4d
--- /dev/null
+++ b/queue-6.1/arm64-efi-set-nx-compat-flag-in-pe-coff-header.patch
@@ -0,0 +1,49 @@
+From 16b47f362260fd260e6be1320d19bf3440fc199a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 10 Mar 2023 13:30:05 +0100
+Subject: arm64: efi: Set NX compat flag in PE/COFF header
+
+From: Ard Biesheuvel <ardb@kernel.org>
+
+[ Upstream commit 3c66bb1918c262dd52fb4221a8d372619c5da70a ]
+
+The PE/COFF header has a NX compat flag which informs the firmware that
+the application does not rely on memory regions being mapped with both
+executable and writable permissions at the same time.
+
+This is typically used by the firmware to decide whether it can set the
+NX attribute on all allocations it returns, but going forward, it may be
+used to enforce a policy that only permits applications with the NX flag
+set to be loaded to begin wiht in some configurations, e.g., when Secure
+Boot is in effect.
+
+Even though the arm64 version of the EFI stub may relocate the kernel
+before executing it, it always did so after disabling the MMU, and so we
+were always in line with what the NX compat flag conveys, we just never
+bothered to set it.
+
+So let's set the flag now.
+
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm64/kernel/efi-header.S | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/arm64/kernel/efi-header.S b/arch/arm64/kernel/efi-header.S
+index 28d8a5dca5f12..d731b4655df8e 100644
+--- a/arch/arm64/kernel/efi-header.S
++++ b/arch/arm64/kernel/efi-header.S
+@@ -66,7 +66,7 @@
+ 	.long	.Lefi_header_end - .L_head		// SizeOfHeaders
+ 	.long	0					// CheckSum
+ 	.short	IMAGE_SUBSYSTEM_EFI_APPLICATION		// Subsystem
+-	.short	0					// DllCharacteristics
++	.short	IMAGE_DLL_CHARACTERISTICS_NX_COMPAT	// DllCharacteristics
+ 	.quad	0					// SizeOfStackReserve
+ 	.quad	0					// SizeOfStackCommit
+ 	.quad	0					// SizeOfHeapReserve
+-- 
+2.39.2
+
diff --git a/queue-6.1/blk-mq-fix-bad-unlock-balance-detected-on-q-srcu-in-.patch b/queue-6.1/blk-mq-fix-bad-unlock-balance-detected-on-q-srcu-in-.patch
new file mode 100644
index 00000000000..4935d04b936
--- /dev/null
+++ b/queue-6.1/blk-mq-fix-bad-unlock-balance-detected-on-q-srcu-in-.patch
@@ -0,0 +1,52 @@
+From 55cb484ac38dc54e421beb925ae5f7429300b0a6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 10 Mar 2023 09:09:13 +0800
+Subject: blk-mq: fix "bad unlock balance detected" on q->srcu in
+ __blk_mq_run_dispatch_ops
+
+From: Chris Leech <cleech@redhat.com>
+
+[ Upstream commit 00e885efcfbb8712d3e1bfc1ae30639c15ca1d3b ]
+
+The 'q' parameter of the macro __blk_mq_run_dispatch_ops may not be one
+local variable, such as, it is rq->q, then request queue pointed by
+this variable could be changed to another queue in case of
+BLK_MQ_F_TAG_QUEUE_SHARED after 'dispatch_ops' returns, then
+'bad unlock balance' is triggered.
+
+Fixes the issue by adding one local variable for doing srcu lock/unlock.
+
+Fixes: 2a904d00855f ("blk-mq: remove hctx_lock and hctx_unlock")
+Cc: Marco Patalano <mpatalan@redhat.com>
+Signed-off-by: Chris Leech <cleech@redhat.com>
+Signed-off-by: Ming Lei <ming.lei@redhat.com>
+Link: https://lore.kernel.org/r/20230310010913.1014789-1-ming.lei@redhat.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ block/blk-mq.h | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/block/blk-mq.h b/block/blk-mq.h
+index ef59fee62780d..a7482d2cc82e7 100644
+--- a/block/blk-mq.h
++++ b/block/blk-mq.h
+@@ -378,12 +378,13 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
+ #define __blk_mq_run_dispatch_ops(q, check_sleep, dispatch_ops)	\
+ do {								\
+ 	if ((q)->tag_set->flags & BLK_MQ_F_BLOCKING) {		\
++		struct blk_mq_tag_set *__tag_set = (q)->tag_set; \
+ 		int srcu_idx;					\
+ 								\
+ 		might_sleep_if(check_sleep);			\
+-		srcu_idx = srcu_read_lock((q)->tag_set->srcu);	\
++		srcu_idx = srcu_read_lock(__tag_set->srcu);	\
+ 		(dispatch_ops);					\
+-		srcu_read_unlock((q)->tag_set->srcu, srcu_idx);	\
++		srcu_read_unlock(__tag_set->srcu, srcu_idx);	\
+ 	} else {						\
+ 		rcu_read_lock();				\
+ 		(dispatch_ops);					\
+-- 
+2.39.2
+
diff --git a/queue-6.1/blk-mq-move-the-srcu_struct-used-for-quiescing-to-th.patch b/queue-6.1/blk-mq-move-the-srcu_struct-used-for-quiescing-to-th.patch
new file mode 100644
index 00000000000..f4c828f3c95
--- /dev/null
+++ b/queue-6.1/blk-mq-move-the-srcu_struct-used-for-quiescing-to-th.patch
@@ -0,0 +1,358 @@
+From f9f9f7c0add7cd173019a5c920121058e5239c0e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 1 Nov 2022 16:00:47 +0100
+Subject: blk-mq: move the srcu_struct used for quiescing to the tagset
+
+From: Christoph Hellwig <hch@lst.de>
+
+[ Upstream commit 80bd4a7aab4c9ce59bf5e35fdf52aa23d8a3c9f5 ]
+
+All I/O submissions have fairly similar latencies, and a tagset-wide
+quiesce is a fairly common operation.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Keith Busch <kbusch@kernel.org>
+Reviewed-by: Ming Lei <ming.lei@redhat.com>
+Reviewed-by: Chao Leng <lengchao@huawei.com>
+Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
+Reviewed-by: Hannes Reinecke <hare@suse.de>
+Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
+Link: https://lore.kernel.org/r/20221101150050.3510-12-hch@lst.de
+[axboe: fix whitespace]
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Stable-dep-of: 00e885efcfbb ("blk-mq: fix "bad unlock balance detected" on q->srcu in __blk_mq_run_dispatch_ops")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ block/blk-core.c       | 27 +++++----------------------
+ block/blk-mq.c         | 33 +++++++++++++++++++++++++--------
+ block/blk-mq.h         | 14 +++++++-------
+ block/blk-sysfs.c      |  9 ++-------
+ block/blk.h            |  9 +--------
+ block/genhd.c          |  2 +-
+ include/linux/blk-mq.h |  4 ++++
+ include/linux/blkdev.h |  9 ---------
+ 8 files changed, 45 insertions(+), 62 deletions(-)
+
+diff --git a/block/blk-core.c b/block/blk-core.c
+index 24ee7785a5ad5..d5da62bb4bc06 100644
+--- a/block/blk-core.c
++++ b/block/blk-core.c
+@@ -65,7 +65,6 @@ DEFINE_IDA(blk_queue_ida);
+  * For queue allocation
+  */
+ struct kmem_cache *blk_requestq_cachep;
+-struct kmem_cache *blk_requestq_srcu_cachep;
+ 
+ /*
+  * Controlling structure to kblockd
+@@ -373,26 +372,20 @@ static void blk_timeout_work(struct work_struct *work)
+ {
+ }
+ 
+-struct request_queue *blk_alloc_queue(int node_id, bool alloc_srcu)
++struct request_queue *blk_alloc_queue(int node_id)
+ {
+ 	struct request_queue *q;
+ 
+-	q = kmem_cache_alloc_node(blk_get_queue_kmem_cache(alloc_srcu),
+-			GFP_KERNEL | __GFP_ZERO, node_id);
++	q = kmem_cache_alloc_node(blk_requestq_cachep, GFP_KERNEL | __GFP_ZERO,
++				  node_id);
+ 	if (!q)
+ 		return NULL;
+ 
+-	if (alloc_srcu) {
+-		blk_queue_flag_set(QUEUE_FLAG_HAS_SRCU, q);
+-		if (init_srcu_struct(q->srcu) != 0)
+-			goto fail_q;
+-	}
+-
+ 	q->last_merge = NULL;
+ 
+ 	q->id = ida_alloc(&blk_queue_ida, GFP_KERNEL);
+ 	if (q->id < 0)
+-		goto fail_srcu;
++		goto fail_q;
+ 
+ 	q->stats = blk_alloc_queue_stats();
+ 	if (!q->stats)
+@@ -434,11 +427,8 @@ struct request_queue *blk_alloc_queue(int node_id, bool alloc_srcu)
+ 	blk_free_queue_stats(q->stats);
+ fail_id:
+ 	ida_free(&blk_queue_ida, q->id);
+-fail_srcu:
+-	if (alloc_srcu)
+-		cleanup_srcu_struct(q->srcu);
+ fail_q:
+-	kmem_cache_free(blk_get_queue_kmem_cache(alloc_srcu), q);
++	kmem_cache_free(blk_requestq_cachep, q);
+ 	return NULL;
+ }
+ 
+@@ -1190,9 +1180,6 @@ int __init blk_dev_init(void)
+ 			sizeof_field(struct request, cmd_flags));
+ 	BUILD_BUG_ON(REQ_OP_BITS + REQ_FLAG_BITS > 8 *
+ 			sizeof_field(struct bio, bi_opf));
+-	BUILD_BUG_ON(ALIGN(offsetof(struct request_queue, srcu),
+-			   __alignof__(struct request_queue)) !=
+-		     sizeof(struct request_queue));
+ 
+ 	/* used for unplugging and affects IO latency/throughput - HIGHPRI */
+ 	kblockd_workqueue = alloc_workqueue("kblockd",
+@@ -1203,10 +1190,6 @@ int __init blk_dev_init(void)
+ 	blk_requestq_cachep = kmem_cache_create("request_queue",
+ 			sizeof(struct request_queue), 0, SLAB_PANIC, NULL);
+ 
+-	blk_requestq_srcu_cachep = kmem_cache_create("request_queue_srcu",
+-			sizeof(struct request_queue) +
+-			sizeof(struct srcu_struct), 0, SLAB_PANIC, NULL);
+-
+ 	blk_debugfs_root = debugfs_create_dir("block", NULL);
+ 
+ 	return 0;
+diff --git a/block/blk-mq.c b/block/blk-mq.c
+index aa67a52c5a069..f8c97d75b8d1a 100644
+--- a/block/blk-mq.c
++++ b/block/blk-mq.c
+@@ -261,8 +261,8 @@ EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue_nowait);
+  */
+ void blk_mq_wait_quiesce_done(struct request_queue *q)
+ {
+-	if (blk_queue_has_srcu(q))
+-		synchronize_srcu(q->srcu);
++	if (q->tag_set->flags & BLK_MQ_F_BLOCKING)
++		synchronize_srcu(q->tag_set->srcu);
+ 	else
+ 		synchronize_rcu();
+ }
+@@ -4022,7 +4022,7 @@ static struct request_queue *blk_mq_init_queue_data(struct blk_mq_tag_set *set,
+ 	struct request_queue *q;
+ 	int ret;
+ 
+-	q = blk_alloc_queue(set->numa_node, set->flags & BLK_MQ_F_BLOCKING);
++	q = blk_alloc_queue(set->numa_node);
+ 	if (!q)
+ 		return ERR_PTR(-ENOMEM);
+ 	q->queuedata = queuedata;
+@@ -4194,9 +4194,6 @@ static void blk_mq_update_poll_flag(struct request_queue *q)
+ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
+ 		struct request_queue *q)
+ {
+-	WARN_ON_ONCE(blk_queue_has_srcu(q) !=
+-			!!(set->flags & BLK_MQ_F_BLOCKING));
+-
+ 	/* mark the queue as mq asap */
+ 	q->mq_ops = set->ops;
+ 
+@@ -4453,8 +4450,18 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
+ 	if (set->nr_maps == 1 && set->nr_hw_queues > nr_cpu_ids)
+ 		set->nr_hw_queues = nr_cpu_ids;
+ 
+-	if (blk_mq_alloc_tag_set_tags(set, set->nr_hw_queues) < 0)
+-		return -ENOMEM;
++	if (set->flags & BLK_MQ_F_BLOCKING) {
++		set->srcu = kmalloc(sizeof(*set->srcu), GFP_KERNEL);
++		if (!set->srcu)
++			return -ENOMEM;
++		ret = init_srcu_struct(set->srcu);
++		if (ret)
++			goto out_free_srcu;
++	}
++
++	ret = blk_mq_alloc_tag_set_tags(set, set->nr_hw_queues);
++	if (ret)
++		goto out_cleanup_srcu;
+ 
+ 	ret = -ENOMEM;
+ 	for (i = 0; i < set->nr_maps; i++) {
+@@ -4484,6 +4491,12 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
+ 	}
+ 	kfree(set->tags);
+ 	set->tags = NULL;
++out_cleanup_srcu:
++	if (set->flags & BLK_MQ_F_BLOCKING)
++		cleanup_srcu_struct(set->srcu);
++out_free_srcu:
++	if (set->flags & BLK_MQ_F_BLOCKING)
++		kfree(set->srcu);
+ 	return ret;
+ }
+ EXPORT_SYMBOL(blk_mq_alloc_tag_set);
+@@ -4523,6 +4536,10 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set)
+ 
+ 	kfree(set->tags);
+ 	set->tags = NULL;
++	if (set->flags & BLK_MQ_F_BLOCKING) {
++		cleanup_srcu_struct(set->srcu);
++		kfree(set->srcu);
++	}
+ }
+ EXPORT_SYMBOL(blk_mq_free_tag_set);
+ 
+diff --git a/block/blk-mq.h b/block/blk-mq.h
+index 0b2870839cdd6..ef59fee62780d 100644
+--- a/block/blk-mq.h
++++ b/block/blk-mq.h
+@@ -377,17 +377,17 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
+ /* run the code block in @dispatch_ops with rcu/srcu read lock held */
+ #define __blk_mq_run_dispatch_ops(q, check_sleep, dispatch_ops)	\
+ do {								\
+-	if (!blk_queue_has_srcu(q)) {				\
+-		rcu_read_lock();				\
+-		(dispatch_ops);					\
+-		rcu_read_unlock();				\
+-	} else {						\
++	if ((q)->tag_set->flags & BLK_MQ_F_BLOCKING) {		\
+ 		int srcu_idx;					\
+ 								\
+ 		might_sleep_if(check_sleep);			\
+-		srcu_idx = srcu_read_lock((q)->srcu);		\
++		srcu_idx = srcu_read_lock((q)->tag_set->srcu);	\
+ 		(dispatch_ops);					\
+-		srcu_read_unlock((q)->srcu, srcu_idx);		\
++		srcu_read_unlock((q)->tag_set->srcu, srcu_idx);	\
++	} else {						\
++		rcu_read_lock();				\
++		(dispatch_ops);					\
++		rcu_read_unlock();				\
+ 	}							\
+ } while (0)
+ 
+diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
+index e71b3b43927c0..e7871665825a3 100644
+--- a/block/blk-sysfs.c
++++ b/block/blk-sysfs.c
+@@ -739,10 +739,8 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr,
+ 
+ static void blk_free_queue_rcu(struct rcu_head *rcu_head)
+ {
+-	struct request_queue *q = container_of(rcu_head, struct request_queue,
+-					       rcu_head);
+-
+-	kmem_cache_free(blk_get_queue_kmem_cache(blk_queue_has_srcu(q)), q);
++	kmem_cache_free(blk_requestq_cachep,
++			container_of(rcu_head, struct request_queue, rcu_head));
+ }
+ 
+ /**
+@@ -779,9 +777,6 @@ static void blk_release_queue(struct kobject *kobj)
+ 	if (queue_is_mq(q))
+ 		blk_mq_release(q);
+ 
+-	if (blk_queue_has_srcu(q))
+-		cleanup_srcu_struct(q->srcu);
+-
+ 	ida_free(&blk_queue_ida, q->id);
+ 	call_rcu(&q->rcu_head, blk_free_queue_rcu);
+ }
+diff --git a/block/blk.h b/block/blk.h
+index a186ea20f39d8..4849a2efa4c50 100644
+--- a/block/blk.h
++++ b/block/blk.h
+@@ -27,7 +27,6 @@ struct blk_flush_queue {
+ };
+ 
+ extern struct kmem_cache *blk_requestq_cachep;
+-extern struct kmem_cache *blk_requestq_srcu_cachep;
+ extern struct kobj_type blk_queue_ktype;
+ extern struct ida blk_queue_ida;
+ 
+@@ -428,13 +427,7 @@ int bio_add_hw_page(struct request_queue *q, struct bio *bio,
+ 		struct page *page, unsigned int len, unsigned int offset,
+ 		unsigned int max_sectors, bool *same_page);
+ 
+-static inline struct kmem_cache *blk_get_queue_kmem_cache(bool srcu)
+-{
+-	if (srcu)
+-		return blk_requestq_srcu_cachep;
+-	return blk_requestq_cachep;
+-}
+-struct request_queue *blk_alloc_queue(int node_id, bool alloc_srcu);
++struct request_queue *blk_alloc_queue(int node_id);
+ 
+ int disk_scan_partitions(struct gendisk *disk, fmode_t mode);
+ 
+diff --git a/block/genhd.c b/block/genhd.c
+index 0b6928e948f31..4db1f905514c5 100644
+--- a/block/genhd.c
++++ b/block/genhd.c
+@@ -1436,7 +1436,7 @@ struct gendisk *__blk_alloc_disk(int node, struct lock_class_key *lkclass)
+ 	struct request_queue *q;
+ 	struct gendisk *disk;
+ 
+-	q = blk_alloc_queue(node, false);
++	q = blk_alloc_queue(node);
+ 	if (!q)
+ 		return NULL;
+ 
+diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
+index a9764cbf7f8d2..8e942e36f1c48 100644
+--- a/include/linux/blk-mq.h
++++ b/include/linux/blk-mq.h
+@@ -7,6 +7,7 @@
+ #include <linux/lockdep.h>
+ #include <linux/scatterlist.h>
+ #include <linux/prefetch.h>
++#include <linux/srcu.h>
+ 
+ struct blk_mq_tags;
+ struct blk_flush_queue;
+@@ -507,6 +508,8 @@ enum hctx_type {
+  * @tag_list_lock: Serializes tag_list accesses.
+  * @tag_list:	   List of the request queues that use this tag set. See also
+  *		   request_queue.tag_set_list.
++ * @srcu:	   Use as lock when type of the request queue is blocking
++ *		   (BLK_MQ_F_BLOCKING).
+  */
+ struct blk_mq_tag_set {
+ 	struct blk_mq_queue_map	map[HCTX_MAX_TYPES];
+@@ -527,6 +530,7 @@ struct blk_mq_tag_set {
+ 
+ 	struct mutex		tag_list_lock;
+ 	struct list_head	tag_list;
++	struct srcu_struct	*srcu;
+ };
+ 
+ /**
+diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
+index 891f8cbcd0436..36c286d22fb23 100644
+--- a/include/linux/blkdev.h
++++ b/include/linux/blkdev.h
+@@ -22,7 +22,6 @@
+ #include <linux/blkzoned.h>
+ #include <linux/sched.h>
+ #include <linux/sbitmap.h>
+-#include <linux/srcu.h>
+ #include <linux/uuid.h>
+ #include <linux/xarray.h>
+ 
+@@ -544,18 +543,11 @@ struct request_queue {
+ 	struct mutex		debugfs_mutex;
+ 
+ 	bool			mq_sysfs_init_done;
+-
+-	/**
+-	 * @srcu: Sleepable RCU. Use as lock when type of the request queue
+-	 * is blocking (BLK_MQ_F_BLOCKING). Must be the last member
+-	 */
+-	struct srcu_struct	srcu[];
+ };
+ 
+ /* Keep blk_queue_flag_name[] in sync with the definitions below */
+ #define QUEUE_FLAG_STOPPED	0	/* queue is stopped */
+ #define QUEUE_FLAG_DYING	1	/* queue being torn down */
+-#define QUEUE_FLAG_HAS_SRCU	2	/* SRCU is allocated */
+ #define QUEUE_FLAG_NOMERGES     3	/* disable merge attempts */
+ #define QUEUE_FLAG_SAME_COMP	4	/* complete on same CPU-group */
+ #define QUEUE_FLAG_FAIL_IO	5	/* fake timeout */
+@@ -591,7 +583,6 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
+ 
+ #define blk_queue_stopped(q)	test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
+ #define blk_queue_dying(q)	test_bit(QUEUE_FLAG_DYING, &(q)->queue_flags)
+-#define blk_queue_has_srcu(q)	test_bit(QUEUE_FLAG_HAS_SRCU, &(q)->queue_flags)
+ #define blk_queue_init_done(q)	test_bit(QUEUE_FLAG_INIT_DONE, &(q)->queue_flags)
+ #define blk_queue_nomerges(q)	test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
+ #define blk_queue_noxmerges(q)	\
+-- 
+2.39.2
+
diff --git a/queue-6.1/btrfs-rename-btrfs_fs_no_overcommit-to-btrfs_fs_acti.patch b/queue-6.1/btrfs-rename-btrfs_fs_no_overcommit-to-btrfs_fs_acti.patch
new file mode 100644
index 00000000000..719b9598d51
--- /dev/null
+++ b/queue-6.1/btrfs-rename-btrfs_fs_no_overcommit-to-btrfs_fs_acti.patch
@@ -0,0 +1,75 @@
+From 3c2f1e320bc487469d4a469e50d0732ff734800e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 1 Mar 2023 16:14:42 -0500
+Subject: btrfs: rename BTRFS_FS_NO_OVERCOMMIT to BTRFS_FS_ACTIVE_ZONE_TRACKING
+
+From: Josef Bacik <josef@toxicpanda.com>
+
+[ Upstream commit bf1f1fec2724a33b67ec12032402ea75f2a83622 ]
+
+This flag only gets set when we're doing active zone tracking, and we're
+going to need to use this flag for things related to this behavior.
+Rename the flag to represent what it actually means for the file system
+so it can be used in other ways and still make sense.
+
+Reviewed-by: Naohiro Aota <naohiro.aota@wdc.com>
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Reviewed-by: Anand Jain <anand.jain@oracle.com>
+Signed-off-by: Josef Bacik <josef@toxicpanda.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/ctree.h      | 7 ++-----
+ fs/btrfs/space-info.c | 2 +-
+ fs/btrfs/zoned.c      | 3 +--
+ 3 files changed, 4 insertions(+), 8 deletions(-)
+
+diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
+index a3febabacec04..3bcef0c4d6fc4 100644
+--- a/fs/btrfs/ctree.h
++++ b/fs/btrfs/ctree.h
+@@ -590,11 +590,8 @@ enum {
+ 	/* Indicate we have to finish a zone to do next allocation. */
+ 	BTRFS_FS_NEED_ZONE_FINISH,
+ 
+-	/*
+-	 * Indicate metadata over-commit is disabled. This is set when active
+-	 * zone tracking is needed.
+-	 */
+-	BTRFS_FS_NO_OVERCOMMIT,
++	/* This is set when active zone tracking is needed. */
++	BTRFS_FS_ACTIVE_ZONE_TRACKING,
+ 
+ #if BITS_PER_LONG == 32
+ 	/* Indicate if we have error/warn message printed on 32bit systems */
+diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
+index 65c010159fb5f..c7642c00a65d0 100644
+--- a/fs/btrfs/space-info.c
++++ b/fs/btrfs/space-info.c
+@@ -404,7 +404,7 @@ int btrfs_can_overcommit(struct btrfs_fs_info *fs_info,
+ 		return 0;
+ 
+ 	used = btrfs_space_info_used(space_info, true);
+-	if (test_bit(BTRFS_FS_NO_OVERCOMMIT, &fs_info->flags) &&
++	if (test_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &fs_info->flags) &&
+ 	    (space_info->flags & BTRFS_BLOCK_GROUP_METADATA))
+ 		avail = 0;
+ 	else
+diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
+index 1b72004136ef8..0d88cc46ac5db 100644
+--- a/fs/btrfs/zoned.c
++++ b/fs/btrfs/zoned.c
+@@ -538,8 +538,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
+ 		}
+ 		atomic_set(&zone_info->active_zones_left,
+ 			   max_active_zones - nactive);
+-		/* Overcommit does not work well with active zone tacking. */
+-		set_bit(BTRFS_FS_NO_OVERCOMMIT, &fs_info->flags);
++		set_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &fs_info->flags);
+ 	}
+ 
+ 	/* Validate superblock log */
+-- 
+2.39.2
+
diff --git a/queue-6.1/btrfs-zoned-count-fresh-bg-region-as-zone-unusable.patch b/queue-6.1/btrfs-zoned-count-fresh-bg-region-as-zone-unusable.patch
new file mode 100644
index 00000000000..edaadfacb86
--- /dev/null
+++ b/queue-6.1/btrfs-zoned-count-fresh-bg-region-as-zone-unusable.patch
@@ -0,0 +1,134 @@
+From f9b02dd808f9c00eff8e1f2ce669b17bde9e444f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 13 Mar 2023 16:06:13 +0900
+Subject: btrfs: zoned: count fresh BG region as zone unusable
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+[ Upstream commit fa2068d7e922b434eba5bfb0131e6d39febfdb48 ]
+
+The naming of space_info->active_total_bytes is misleading. It counts
+not only active block groups but also full ones which are previously
+active but now inactive. That confusion results in a bug not counting
+the full BGs into active_total_bytes on mount time.
+
+For a background, there are three kinds of block groups in terms of
+activation.
+
+  1. Block groups never activated
+  2. Block groups currently active
+  3. Block groups previously active and currently inactive (due to fully
+     written or zone finish)
+
+What we really wanted to exclude from "total_bytes" is the total size of
+BGs #1. They seem empty and allocatable but since they are not activated,
+we cannot rely on them to do the space reservation.
+
+And, since BGs #1 never get activated, they should have no "used",
+"reserved" and "pinned" bytes.
+
+OTOH, BGs #3 can be counted in the "total", since they are already full
+we cannot allocate from them anyway. For them, "total_bytes == used +
+reserved + pinned + zone_unusable" should hold.
+
+Tracking #2 and #3 as "active_total_bytes" (current implementation) is
+confusing. And, tracking #1 and subtract that properly from "total_bytes"
+every time you need space reservation is cumbersome.
+
+Instead, we can count the whole region of a newly allocated block group as
+zone_unusable. Then, once that block group is activated, release
+[0 ..  zone_capacity] from the zone_unusable counters. With this, we can
+eliminate the confusing ->active_total_bytes and the code will be common
+among regular and the zoned mode. Also, no additional counter is needed
+with this approach.
+
+Fixes: 6a921de58992 ("btrfs: zoned: introduce space_info->active_total_bytes")
+CC: stable@vger.kernel.org # 6.1+
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/free-space-cache.c |  8 +++++++-
+ fs/btrfs/zoned.c            | 24 +++++++++++++++++++-----
+ 2 files changed, 26 insertions(+), 6 deletions(-)
+
+diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
+index f4023651dd68b..6a8f2bd350f4b 100644
+--- a/fs/btrfs/free-space-cache.c
++++ b/fs/btrfs/free-space-cache.c
+@@ -2684,8 +2684,13 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
+ 		bg_reclaim_threshold = READ_ONCE(sinfo->bg_reclaim_threshold);
+ 
+ 	spin_lock(&ctl->tree_lock);
++	/* Count initial region as zone_unusable until it gets activated. */
+ 	if (!used)
+ 		to_free = size;
++	else if (initial &&
++		 test_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &block_group->fs_info->flags) &&
++		 (block_group->flags & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM)))
++		to_free = 0;
+ 	else if (initial)
+ 		to_free = block_group->zone_capacity;
+ 	else if (offset >= block_group->alloc_offset)
+@@ -2713,7 +2718,8 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
+ 	reclaimable_unusable = block_group->zone_unusable -
+ 			       (block_group->length - block_group->zone_capacity);
+ 	/* All the region is now unusable. Mark it as unused and reclaim */
+-	if (block_group->zone_unusable == block_group->length) {
++	if (block_group->zone_unusable == block_group->length &&
++	    block_group->alloc_offset) {
+ 		btrfs_mark_bg_unused(block_group);
+ 	} else if (bg_reclaim_threshold &&
+ 		   reclaimable_unusable >=
+diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
+index 0d88cc46ac5db..e97c5a1ac95d6 100644
+--- a/fs/btrfs/zoned.c
++++ b/fs/btrfs/zoned.c
+@@ -1575,9 +1575,19 @@ void btrfs_calc_zone_unusable(struct btrfs_block_group *cache)
+ 		return;
+ 
+ 	WARN_ON(cache->bytes_super != 0);
+-	unusable = (cache->alloc_offset - cache->used) +
+-		   (cache->length - cache->zone_capacity);
+-	free = cache->zone_capacity - cache->alloc_offset;
++
++	/* Check for block groups never get activated */
++	if (test_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &cache->fs_info->flags) &&
++	    cache->flags & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM) &&
++	    !test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &cache->runtime_flags) &&
++	    cache->alloc_offset == 0) {
++		unusable = cache->length;
++		free = 0;
++	} else {
++		unusable = (cache->alloc_offset - cache->used) +
++			   (cache->length - cache->zone_capacity);
++		free = cache->zone_capacity - cache->alloc_offset;
++	}
+ 
+ 	/* We only need ->free_space in ALLOC_SEQ block groups */
+ 	cache->cached = BTRFS_CACHE_FINISHED;
+@@ -1914,7 +1924,11 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
+ 
+ 	/* Successfully activated all the zones */
+ 	set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags);
+-	space_info->active_total_bytes += block_group->length;
++	WARN_ON(block_group->alloc_offset != 0);
++	if (block_group->zone_unusable == block_group->length) {
++		block_group->zone_unusable = block_group->length - block_group->zone_capacity;
++		space_info->bytes_zone_unusable -= block_group->zone_capacity;
++	}
+ 	spin_unlock(&block_group->lock);
+ 	btrfs_try_granting_tickets(fs_info, space_info);
+ 	spin_unlock(&space_info->lock);
+@@ -2277,7 +2291,7 @@ int btrfs_zone_finish_one_bg(struct btrfs_fs_info *fs_info)
+ 		u64 avail;
+ 
+ 		spin_lock(&block_group->lock);
+-		if (block_group->reserved ||
++		if (block_group->reserved || block_group->alloc_offset == 0 ||
+ 		    (block_group->flags & BTRFS_BLOCK_GROUP_SYSTEM)) {
+ 			spin_unlock(&block_group->lock);
+ 			continue;
+-- 
+2.39.2
+
diff --git a/queue-6.1/cifs-avoid-race-conditions-with-parallel-reconnects.patch b/queue-6.1/cifs-avoid-race-conditions-with-parallel-reconnects.patch
new file mode 100644
index 00000000000..af1a5e4bc55
--- /dev/null
+++ b/queue-6.1/cifs-avoid-race-conditions-with-parallel-reconnects.patch
@@ -0,0 +1,333 @@
+From 0ab7a952cc892cdfb993bbd2897b5cc1f1e98858 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 20 Mar 2023 06:08:19 +0000
+Subject: cifs: avoid race conditions with parallel reconnects
+
+From: Shyam Prasad N <sprasad@microsoft.com>
+
+[ Upstream commit bc962159e8e326af634a506508034a375bf2b858 ]
+
+When multiple processes/channels do reconnects in parallel
+we used to return success immediately
+negotiate/session-setup/tree-connect, causing race conditions
+between processes that enter the function in parallel.
+This caused several errors related to session not found to
+show up during parallel reconnects.
+
+Signed-off-by: Shyam Prasad N <sprasad@microsoft.com>
+Reviewed-by: Paulo Alcantara (SUSE) <pc@manguebit.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/cifs/connect.c       | 48 ++++++++++++++++++++++++++++++-----------
+ fs/cifs/smb2pdu.c       | 44 +++++++++++++++++++++----------------
+ fs/cifs/smb2transport.c | 17 ++++++++++++---
+ 3 files changed, 76 insertions(+), 33 deletions(-)
+
+diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
+index 43637c1283748..077c88c49dfdf 100644
+--- a/fs/cifs/connect.c
++++ b/fs/cifs/connect.c
+@@ -261,31 +261,42 @@ cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server,
+ 			cifs_chan_update_iface(ses, server);
+ 
+ 		spin_lock(&ses->chan_lock);
+-		if (!mark_smb_session && cifs_chan_needs_reconnect(ses, server))
+-			goto next_session;
++		if (!mark_smb_session && cifs_chan_needs_reconnect(ses, server)) {
++			spin_unlock(&ses->chan_lock);
++			continue;
++		}
+ 
+ 		if (mark_smb_session)
+ 			CIFS_SET_ALL_CHANS_NEED_RECONNECT(ses);
+ 		else
+ 			cifs_chan_set_need_reconnect(ses, server);
+ 
++		cifs_dbg(FYI, "%s: channel connect bitmap: 0x%lx\n",
++			 __func__, ses->chans_need_reconnect);
++
+ 		/* If all channels need reconnect, then tcon needs reconnect */
+-		if (!mark_smb_session && !CIFS_ALL_CHANS_NEED_RECONNECT(ses))
+-			goto next_session;
++		if (!mark_smb_session && !CIFS_ALL_CHANS_NEED_RECONNECT(ses)) {
++			spin_unlock(&ses->chan_lock);
++			continue;
++		}
++		spin_unlock(&ses->chan_lock);
+ 
++		spin_lock(&ses->ses_lock);
+ 		ses->ses_status = SES_NEED_RECON;
++		spin_unlock(&ses->ses_lock);
+ 
+ 		list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
+ 			tcon->need_reconnect = true;
++			spin_lock(&tcon->tc_lock);
+ 			tcon->status = TID_NEED_RECON;
++			spin_unlock(&tcon->tc_lock);
+ 		}
+ 		if (ses->tcon_ipc) {
+ 			ses->tcon_ipc->need_reconnect = true;
++			spin_lock(&ses->tcon_ipc->tc_lock);
+ 			ses->tcon_ipc->status = TID_NEED_RECON;
++			spin_unlock(&ses->tcon_ipc->tc_lock);
+ 		}
+-
+-next_session:
+-		spin_unlock(&ses->chan_lock);
+ 	}
+ 	spin_unlock(&cifs_tcp_ses_lock);
+ }
+@@ -4050,11 +4061,19 @@ cifs_negotiate_protocol(const unsigned int xid, struct cifs_ses *ses,
+ 
+ 	/* only send once per connect */
+ 	spin_lock(&server->srv_lock);
+-	if (!server->ops->need_neg(server) ||
++	if (server->tcpStatus != CifsGood &&
++	    server->tcpStatus != CifsNew &&
+ 	    server->tcpStatus != CifsNeedNegotiate) {
++		spin_unlock(&server->srv_lock);
++		return -EHOSTDOWN;
++	}
++
++	if (!server->ops->need_neg(server) &&
++	    server->tcpStatus == CifsGood) {
+ 		spin_unlock(&server->srv_lock);
+ 		return 0;
+ 	}
++
+ 	server->tcpStatus = CifsInNegotiate;
+ 	spin_unlock(&server->srv_lock);
+ 
+@@ -4088,23 +4107,28 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses,
+ 	bool is_binding = false;
+ 
+ 	spin_lock(&ses->ses_lock);
++	cifs_dbg(FYI, "%s: channel connect bitmap: 0x%lx\n",
++		 __func__, ses->chans_need_reconnect);
++
+ 	if (ses->ses_status != SES_GOOD &&
+ 	    ses->ses_status != SES_NEW &&
+ 	    ses->ses_status != SES_NEED_RECON) {
+ 		spin_unlock(&ses->ses_lock);
+-		return 0;
++		return -EHOSTDOWN;
+ 	}
+ 
+ 	/* only send once per connect */
+ 	spin_lock(&ses->chan_lock);
+-	if (CIFS_ALL_CHANS_GOOD(ses) ||
+-	    cifs_chan_in_reconnect(ses, server)) {
++	if (CIFS_ALL_CHANS_GOOD(ses)) {
++		if (ses->ses_status == SES_NEED_RECON)
++			ses->ses_status = SES_GOOD;
+ 		spin_unlock(&ses->chan_lock);
+ 		spin_unlock(&ses->ses_lock);
+ 		return 0;
+ 	}
+-	is_binding = !CIFS_ALL_CHANS_NEED_RECONNECT(ses);
++
+ 	cifs_chan_set_in_reconnect(ses, server);
++	is_binding = !CIFS_ALL_CHANS_NEED_RECONNECT(ses);
+ 	spin_unlock(&ses->chan_lock);
+ 
+ 	if (!is_binding)
+diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
+index 83d04cd2f9df8..f0b1ae0835d71 100644
+--- a/fs/cifs/smb2pdu.c
++++ b/fs/cifs/smb2pdu.c
+@@ -199,6 +199,7 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon,
+ 	}
+ 	spin_unlock(&server->srv_lock);
+ 
++again:
+ 	rc = cifs_wait_for_server_reconnect(server, tcon->retry);
+ 	if (rc)
+ 		return rc;
+@@ -217,6 +218,7 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon,
+ 
+ 	nls_codepage = load_nls_default();
+ 
++	mutex_lock(&ses->session_mutex);
+ 	/*
+ 	 * Recheck after acquire mutex. If another thread is negotiating
+ 	 * and the server never sends an answer the socket will be closed
+@@ -225,6 +227,11 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon,
+ 	spin_lock(&server->srv_lock);
+ 	if (server->tcpStatus == CifsNeedReconnect) {
+ 		spin_unlock(&server->srv_lock);
++		mutex_unlock(&ses->session_mutex);
++
++		if (tcon->retry)
++			goto again;
++
+ 		rc = -EHOSTDOWN;
+ 		goto out;
+ 	}
+@@ -234,19 +241,22 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon,
+ 	 * need to prevent multiple threads trying to simultaneously
+ 	 * reconnect the same SMB session
+ 	 */
++	spin_lock(&ses->ses_lock);
+ 	spin_lock(&ses->chan_lock);
+-	if (!cifs_chan_needs_reconnect(ses, server)) {
++	if (!cifs_chan_needs_reconnect(ses, server) &&
++	    ses->ses_status == SES_GOOD) {
+ 		spin_unlock(&ses->chan_lock);
+-
++		spin_unlock(&ses->ses_lock);
+ 		/* this means that we only need to tree connect */
+ 		if (tcon->need_reconnect)
+ 			goto skip_sess_setup;
+ 
++		mutex_unlock(&ses->session_mutex);
+ 		goto out;
+ 	}
+ 	spin_unlock(&ses->chan_lock);
++	spin_unlock(&ses->ses_lock);
+ 
+-	mutex_lock(&ses->session_mutex);
+ 	rc = cifs_negotiate_protocol(0, ses, server);
+ 	if (!rc) {
+ 		rc = cifs_setup_session(0, ses, server, nls_codepage);
+@@ -262,10 +272,8 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon,
+ 		mutex_unlock(&ses->session_mutex);
+ 		goto out;
+ 	}
+-	mutex_unlock(&ses->session_mutex);
+ 
+ skip_sess_setup:
+-	mutex_lock(&ses->session_mutex);
+ 	if (!tcon->need_reconnect) {
+ 		mutex_unlock(&ses->session_mutex);
+ 		goto out;
+@@ -280,7 +288,7 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon,
+ 	cifs_dbg(FYI, "reconnect tcon rc = %d\n", rc);
+ 	if (rc) {
+ 		/* If sess reconnected but tcon didn't, something strange ... */
+-		pr_warn_once("reconnect tcon failed rc = %d\n", rc);
++		cifs_dbg(VFS, "reconnect tcon failed rc = %d\n", rc);
+ 		goto out;
+ 	}
+ 
+@@ -1252,9 +1260,9 @@ SMB2_sess_alloc_buffer(struct SMB2_sess_data *sess_data)
+ 	if (rc)
+ 		return rc;
+ 
+-	spin_lock(&ses->chan_lock);
+-	is_binding = !CIFS_ALL_CHANS_NEED_RECONNECT(ses);
+-	spin_unlock(&ses->chan_lock);
++	spin_lock(&ses->ses_lock);
++	is_binding = (ses->ses_status == SES_GOOD);
++	spin_unlock(&ses->ses_lock);
+ 
+ 	if (is_binding) {
+ 		req->hdr.SessionId = cpu_to_le64(ses->Suid);
+@@ -1412,9 +1420,9 @@ SMB2_auth_kerberos(struct SMB2_sess_data *sess_data)
+ 		goto out_put_spnego_key;
+ 	}
+ 
+-	spin_lock(&ses->chan_lock);
+-	is_binding = !CIFS_ALL_CHANS_NEED_RECONNECT(ses);
+-	spin_unlock(&ses->chan_lock);
++	spin_lock(&ses->ses_lock);
++	is_binding = (ses->ses_status == SES_GOOD);
++	spin_unlock(&ses->ses_lock);
+ 
+ 	/* keep session key if binding */
+ 	if (!is_binding) {
+@@ -1538,9 +1546,9 @@ SMB2_sess_auth_rawntlmssp_negotiate(struct SMB2_sess_data *sess_data)
+ 
+ 	cifs_dbg(FYI, "rawntlmssp session setup challenge phase\n");
+ 
+-	spin_lock(&ses->chan_lock);
+-	is_binding = !CIFS_ALL_CHANS_NEED_RECONNECT(ses);
+-	spin_unlock(&ses->chan_lock);
++	spin_lock(&ses->ses_lock);
++	is_binding = (ses->ses_status == SES_GOOD);
++	spin_unlock(&ses->ses_lock);
+ 
+ 	/* keep existing ses id and flags if binding */
+ 	if (!is_binding) {
+@@ -1606,9 +1614,9 @@ SMB2_sess_auth_rawntlmssp_authenticate(struct SMB2_sess_data *sess_data)
+ 
+ 	rsp = (struct smb2_sess_setup_rsp *)sess_data->iov[0].iov_base;
+ 
+-	spin_lock(&ses->chan_lock);
+-	is_binding = !CIFS_ALL_CHANS_NEED_RECONNECT(ses);
+-	spin_unlock(&ses->chan_lock);
++	spin_lock(&ses->ses_lock);
++	is_binding = (ses->ses_status == SES_GOOD);
++	spin_unlock(&ses->ses_lock);
+ 
+ 	/* keep existing ses id and flags if binding */
+ 	if (!is_binding) {
+diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c
+index d827b7547ffad..790acf65a0926 100644
+--- a/fs/cifs/smb2transport.c
++++ b/fs/cifs/smb2transport.c
+@@ -81,6 +81,7 @@ int smb2_get_sign_key(__u64 ses_id, struct TCP_Server_Info *server, u8 *key)
+ 	struct cifs_ses *ses = NULL;
+ 	int i;
+ 	int rc = 0;
++	bool is_binding = false;
+ 
+ 	spin_lock(&cifs_tcp_ses_lock);
+ 
+@@ -97,9 +98,12 @@ int smb2_get_sign_key(__u64 ses_id, struct TCP_Server_Info *server, u8 *key)
+ 	goto out;
+ 
+ found:
++	spin_lock(&ses->ses_lock);
+ 	spin_lock(&ses->chan_lock);
+-	if (cifs_chan_needs_reconnect(ses, server) &&
+-	    !CIFS_ALL_CHANS_NEED_RECONNECT(ses)) {
++
++	is_binding = (cifs_chan_needs_reconnect(ses, server) &&
++		      ses->ses_status == SES_GOOD);
++	if (is_binding) {
+ 		/*
+ 		 * If we are in the process of binding a new channel
+ 		 * to an existing session, use the master connection
+@@ -107,6 +111,7 @@ int smb2_get_sign_key(__u64 ses_id, struct TCP_Server_Info *server, u8 *key)
+ 		 */
+ 		memcpy(key, ses->smb3signingkey, SMB3_SIGN_KEY_SIZE);
+ 		spin_unlock(&ses->chan_lock);
++		spin_unlock(&ses->ses_lock);
+ 		goto out;
+ 	}
+ 
+@@ -119,10 +124,12 @@ int smb2_get_sign_key(__u64 ses_id, struct TCP_Server_Info *server, u8 *key)
+ 		if (chan->server == server) {
+ 			memcpy(key, chan->signkey, SMB3_SIGN_KEY_SIZE);
+ 			spin_unlock(&ses->chan_lock);
++			spin_unlock(&ses->ses_lock);
+ 			goto out;
+ 		}
+ 	}
+ 	spin_unlock(&ses->chan_lock);
++	spin_unlock(&ses->ses_lock);
+ 
+ 	cifs_dbg(VFS,
+ 		 "%s: Could not find channel signing key for session 0x%llx\n",
+@@ -392,11 +399,15 @@ generate_smb3signingkey(struct cifs_ses *ses,
+ 	bool is_binding = false;
+ 	int chan_index = 0;
+ 
++	spin_lock(&ses->ses_lock);
+ 	spin_lock(&ses->chan_lock);
+-	is_binding = !CIFS_ALL_CHANS_NEED_RECONNECT(ses);
++	is_binding = (cifs_chan_needs_reconnect(ses, server) &&
++		      ses->ses_status == SES_GOOD);
++
+ 	chan_index = cifs_ses_get_chan_index(ses, server);
+ 	/* TODO: introduce ref counting for channels when the can be freed */
+ 	spin_unlock(&ses->chan_lock);
++	spin_unlock(&ses->ses_lock);
+ 
+ 	/*
+ 	 * All channels use the same encryption/decryption keys but
+-- 
+2.39.2
+
diff --git a/queue-6.1/cifs-prevent-data-race-in-cifs_reconnect_tcon.patch b/queue-6.1/cifs-prevent-data-race-in-cifs_reconnect_tcon.patch
new file mode 100644
index 00000000000..1185e8560aa
--- /dev/null
+++ b/queue-6.1/cifs-prevent-data-race-in-cifs_reconnect_tcon.patch
@@ -0,0 +1,255 @@
+From 3de8e3b54835786fe1ddf4048df5ff2822ba7bd9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 28 Feb 2023 19:01:55 -0300
+Subject: cifs: prevent data race in cifs_reconnect_tcon()
+
+From: Paulo Alcantara <pc@manguebit.com>
+
+[ Upstream commit 1bcd548d935a33c6fc58331405eb1b82fd6150de ]
+
+Make sure to get an up-to-date TCP_Server_Info::nr_targets value prior
+to waiting the server to be reconnected in cifs_reconnect_tcon().  It
+is set in cifs_tcp_ses_needs_reconnect() and protected by
+TCP_Server_Info::srv_lock.
+
+Create a new cifs_wait_for_server_reconnect() helper that can be used
+by both SMB2+ and CIFS reconnect code.
+
+Signed-off-by: Paulo Alcantara (SUSE) <pc@manguebit.com>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Stable-dep-of: bc962159e8e3 ("cifs: avoid race conditions with parallel reconnects")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/cifs/cifsproto.h |  1 +
+ fs/cifs/cifssmb.c   | 43 ++----------------------
+ fs/cifs/misc.c      | 44 ++++++++++++++++++++++++
+ fs/cifs/smb2pdu.c   | 82 ++++++++++++---------------------------------
+ 4 files changed, 69 insertions(+), 101 deletions(-)
+
+diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
+index bc4475f6c0827..98513f5af3f96 100644
+--- a/fs/cifs/cifsproto.h
++++ b/fs/cifs/cifsproto.h
+@@ -691,5 +691,6 @@ static inline int cifs_create_options(struct cifs_sb_info *cifs_sb, int options)
+ 
+ struct super_block *cifs_get_tcon_super(struct cifs_tcon *tcon);
+ void cifs_put_tcon_super(struct super_block *sb);
++int cifs_wait_for_server_reconnect(struct TCP_Server_Info *server, bool retry);
+ 
+ #endif			/* _CIFSPROTO_H */
+diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
+index 6c6a7fc47f3e3..4bc6ba87baf4c 100644
+--- a/fs/cifs/cifssmb.c
++++ b/fs/cifs/cifssmb.c
+@@ -70,7 +70,6 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command)
+ 	struct cifs_ses *ses;
+ 	struct TCP_Server_Info *server;
+ 	struct nls_table *nls_codepage;
+-	int retries;
+ 
+ 	/*
+ 	 * SMBs NegProt, SessSetup, uLogoff do not have tcon yet so check for
+@@ -98,45 +97,9 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command)
+ 	}
+ 	spin_unlock(&tcon->tc_lock);
+ 
+-	retries = server->nr_targets;
+-
+-	/*
+-	 * Give demultiplex thread up to 10 seconds to each target available for
+-	 * reconnect -- should be greater than cifs socket timeout which is 7
+-	 * seconds.
+-	 */
+-	while (server->tcpStatus == CifsNeedReconnect) {
+-		rc = wait_event_interruptible_timeout(server->response_q,
+-						      (server->tcpStatus != CifsNeedReconnect),
+-						      10 * HZ);
+-		if (rc < 0) {
+-			cifs_dbg(FYI, "%s: aborting reconnect due to a received signal by the process\n",
+-				 __func__);
+-			return -ERESTARTSYS;
+-		}
+-
+-		/* are we still trying to reconnect? */
+-		spin_lock(&server->srv_lock);
+-		if (server->tcpStatus != CifsNeedReconnect) {
+-			spin_unlock(&server->srv_lock);
+-			break;
+-		}
+-		spin_unlock(&server->srv_lock);
+-
+-		if (retries && --retries)
+-			continue;
+-
+-		/*
+-		 * on "soft" mounts we wait once. Hard mounts keep
+-		 * retrying until process is killed or server comes
+-		 * back on-line
+-		 */
+-		if (!tcon->retry) {
+-			cifs_dbg(FYI, "gave up waiting on reconnect in smb_init\n");
+-			return -EHOSTDOWN;
+-		}
+-		retries = server->nr_targets;
+-	}
++	rc = cifs_wait_for_server_reconnect(server, tcon->retry);
++	if (rc)
++		return rc;
+ 
+ 	spin_lock(&ses->chan_lock);
+ 	if (!cifs_chan_needs_reconnect(ses, server) && !tcon->need_reconnect) {
+diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
+index 4e54736a06996..832856aef4b7a 100644
+--- a/fs/cifs/misc.c
++++ b/fs/cifs/misc.c
+@@ -1382,3 +1382,47 @@ int cifs_inval_name_dfs_link_error(const unsigned int xid,
+ 	return 0;
+ }
+ #endif
++
++int cifs_wait_for_server_reconnect(struct TCP_Server_Info *server, bool retry)
++{
++	int timeout = 10;
++	int rc;
++
++	spin_lock(&server->srv_lock);
++	if (server->tcpStatus != CifsNeedReconnect) {
++		spin_unlock(&server->srv_lock);
++		return 0;
++	}
++	timeout *= server->nr_targets;
++	spin_unlock(&server->srv_lock);
++
++	/*
++	 * Give demultiplex thread up to 10 seconds to each target available for
++	 * reconnect -- should be greater than cifs socket timeout which is 7
++	 * seconds.
++	 *
++	 * On "soft" mounts we wait once. Hard mounts keep retrying until
++	 * process is killed or server comes back on-line.
++	 */
++	do {
++		rc = wait_event_interruptible_timeout(server->response_q,
++						      (server->tcpStatus != CifsNeedReconnect),
++						      timeout * HZ);
++		if (rc < 0) {
++			cifs_dbg(FYI, "%s: aborting reconnect due to received signal\n",
++				 __func__);
++			return -ERESTARTSYS;
++		}
++
++		/* are we still trying to reconnect? */
++		spin_lock(&server->srv_lock);
++		if (server->tcpStatus != CifsNeedReconnect) {
++			spin_unlock(&server->srv_lock);
++			return 0;
++		}
++		spin_unlock(&server->srv_lock);
++	} while (retry);
++
++	cifs_dbg(FYI, "%s: gave up waiting on reconnect\n", __func__);
++	return -EHOSTDOWN;
++}
+diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
+index 6e6e44d8b4c79..83d04cd2f9df8 100644
+--- a/fs/cifs/smb2pdu.c
++++ b/fs/cifs/smb2pdu.c
+@@ -139,66 +139,6 @@ smb2_hdr_assemble(struct smb2_hdr *shdr, __le16 smb2_cmd,
+ 	return;
+ }
+ 
+-static int wait_for_server_reconnect(struct TCP_Server_Info *server,
+-				     __le16 smb2_command, bool retry)
+-{
+-	int timeout = 10;
+-	int rc;
+-
+-	spin_lock(&server->srv_lock);
+-	if (server->tcpStatus != CifsNeedReconnect) {
+-		spin_unlock(&server->srv_lock);
+-		return 0;
+-	}
+-	timeout *= server->nr_targets;
+-	spin_unlock(&server->srv_lock);
+-
+-	/*
+-	 * Return to caller for TREE_DISCONNECT and LOGOFF and CLOSE
+-	 * here since they are implicitly done when session drops.
+-	 */
+-	switch (smb2_command) {
+-	/*
+-	 * BB Should we keep oplock break and add flush to exceptions?
+-	 */
+-	case SMB2_TREE_DISCONNECT:
+-	case SMB2_CANCEL:
+-	case SMB2_CLOSE:
+-	case SMB2_OPLOCK_BREAK:
+-		return -EAGAIN;
+-	}
+-
+-	/*
+-	 * Give demultiplex thread up to 10 seconds to each target available for
+-	 * reconnect -- should be greater than cifs socket timeout which is 7
+-	 * seconds.
+-	 *
+-	 * On "soft" mounts we wait once. Hard mounts keep retrying until
+-	 * process is killed or server comes back on-line.
+-	 */
+-	do {
+-		rc = wait_event_interruptible_timeout(server->response_q,
+-						      (server->tcpStatus != CifsNeedReconnect),
+-						      timeout * HZ);
+-		if (rc < 0) {
+-			cifs_dbg(FYI, "%s: aborting reconnect due to received signal\n",
+-				 __func__);
+-			return -ERESTARTSYS;
+-		}
+-
+-		/* are we still trying to reconnect? */
+-		spin_lock(&server->srv_lock);
+-		if (server->tcpStatus != CifsNeedReconnect) {
+-			spin_unlock(&server->srv_lock);
+-			return 0;
+-		}
+-		spin_unlock(&server->srv_lock);
+-	} while (retry);
+-
+-	cifs_dbg(FYI, "%s: gave up waiting on reconnect\n", __func__);
+-	return -EHOSTDOWN;
+-}
+-
+ static int
+ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon,
+ 	       struct TCP_Server_Info *server)
+@@ -239,7 +179,27 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon,
+ 	    (!tcon->ses->server) || !server)
+ 		return -EIO;
+ 
+-	rc = wait_for_server_reconnect(server, smb2_command, tcon->retry);
++	spin_lock(&server->srv_lock);
++	if (server->tcpStatus == CifsNeedReconnect) {
++		/*
++		 * Return to caller for TREE_DISCONNECT and LOGOFF and CLOSE
++		 * here since they are implicitly done when session drops.
++		 */
++		switch (smb2_command) {
++		/*
++		 * BB Should we keep oplock break and add flush to exceptions?
++		 */
++		case SMB2_TREE_DISCONNECT:
++		case SMB2_CANCEL:
++		case SMB2_CLOSE:
++		case SMB2_OPLOCK_BREAK:
++			spin_unlock(&server->srv_lock);
++			return -EAGAIN;
++		}
++	}
++	spin_unlock(&server->srv_lock);
++
++	rc = cifs_wait_for_server_reconnect(server, tcon->retry);
+ 	if (rc)
+ 		return rc;
+ 
+-- 
+2.39.2
+
diff --git a/queue-6.1/cifs-update-ip_addr-for-ses-only-for-primary-chan-se.patch b/queue-6.1/cifs-update-ip_addr-for-ses-only-for-primary-chan-se.patch
new file mode 100644
index 00000000000..71baa5fa1dd
--- /dev/null
+++ b/queue-6.1/cifs-update-ip_addr-for-ses-only-for-primary-chan-se.patch
@@ -0,0 +1,64 @@
+From 90cd3a627fdcdebe9437c5a51183285fcae5bc96 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 10 Feb 2023 17:41:17 +0000
+Subject: cifs: update ip_addr for ses only for primary chan setup
+
+From: Shyam Prasad N <sprasad@microsoft.com>
+
+[ Upstream commit e77978de4765229e09c8fabcf4f8419ff367317f ]
+
+We update ses->ip_addr whenever we do a session setup.
+But this should happen only for primary channel in mchan
+scenario.
+
+Signed-off-by: Shyam Prasad N <sprasad@microsoft.com>
+Reviewed-by: Paulo Alcantara (SUSE) <pc@cjr.nz>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Stable-dep-of: bc962159e8e3 ("cifs: avoid race conditions with parallel reconnects")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/cifs/connect.c | 18 +++++++++++-------
+ 1 file changed, 11 insertions(+), 7 deletions(-)
+
+diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
+index 7aecb1646b6fc..43637c1283748 100644
+--- a/fs/cifs/connect.c
++++ b/fs/cifs/connect.c
+@@ -4082,16 +4082,12 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses,
+ 		   struct nls_table *nls_info)
+ {
+ 	int rc = -ENOSYS;
+-	struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)&server->dstaddr;
+-	struct sockaddr_in *addr = (struct sockaddr_in *)&server->dstaddr;
++	struct TCP_Server_Info *pserver = CIFS_SERVER_IS_CHAN(server) ? server->primary_server : server;
++	struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)&pserver->dstaddr;
++	struct sockaddr_in *addr = (struct sockaddr_in *)&pserver->dstaddr;
+ 	bool is_binding = false;
+ 
+ 	spin_lock(&ses->ses_lock);
+-	if (server->dstaddr.ss_family == AF_INET6)
+-		scnprintf(ses->ip_addr, sizeof(ses->ip_addr), "%pI6", &addr6->sin6_addr);
+-	else
+-		scnprintf(ses->ip_addr, sizeof(ses->ip_addr), "%pI4", &addr->sin_addr);
+-
+ 	if (ses->ses_status != SES_GOOD &&
+ 	    ses->ses_status != SES_NEW &&
+ 	    ses->ses_status != SES_NEED_RECON) {
+@@ -4115,6 +4111,14 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses,
+ 		ses->ses_status = SES_IN_SETUP;
+ 	spin_unlock(&ses->ses_lock);
+ 
++	/* update ses ip_addr only for primary chan */
++	if (server == pserver) {
++		if (server->dstaddr.ss_family == AF_INET6)
++			scnprintf(ses->ip_addr, sizeof(ses->ip_addr), "%pI6", &addr6->sin6_addr);
++		else
++			scnprintf(ses->ip_addr, sizeof(ses->ip_addr), "%pI4", &addr->sin_addr);
++	}
++
+ 	if (!is_binding) {
+ 		ses->capabilities = server->capabilities;
+ 		if (!linuxExtEnabled)
+-- 
+2.39.2
+
diff --git a/queue-6.1/drm-msm-disp-dpu-fix-sc7280_pp-base-offset.patch b/queue-6.1/drm-msm-disp-dpu-fix-sc7280_pp-base-offset.patch
new file mode 100644
index 00000000000..85a865ea11f
--- /dev/null
+++ b/queue-6.1/drm-msm-disp-dpu-fix-sc7280_pp-base-offset.patch
@@ -0,0 +1,46 @@
+From 0dd04eeb2a1b3d70349342acfcccb3b4ec6b899c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 27 Feb 2023 13:36:40 -0800
+Subject: drm/msm/disp/dpu: fix sc7280_pp base offset
+
+From: Kuogee Hsieh <quic_khsieh@quicinc.com>
+
+[ Upstream commit ce68153edb5b36ddf87a19ed5a85131498690bbf ]
+
+At sc7280, pingpong block is used to management the dither effects
+to reduce distortion at panel. Currently pingpong-0 base offset is
+wrongly set at 0x59000. This mistake will not cause system to crash.
+However it will make dither not work. This patch correct sc7280 ping
+pong-0 block base offset.
+
+Changes in v2:
+-- add more details info n regrading of pingpong block at commit text
+
+Fixes: 591e34a091d1 ("drm/msm/disp/dpu1: add support for display for SC7280 target")
+Signed-off-by: Kuogee Hsieh <quic_khsieh@quicinc.com>
+Reviewed-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
+Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
+Patchwork: https://patchwork.freedesktop.org/patch/524332/
+Link: https://lore.kernel.org/r/1677533800-3125-1-git-send-email-quic_khsieh@quicinc.com
+Signed-off-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
+index b1131860ada17..32a3c42ec45b1 100644
+--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
++++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
+@@ -1181,7 +1181,7 @@ static const struct dpu_pingpong_cfg sm8150_pp[] = {
+ };
+ 
+ static const struct dpu_pingpong_cfg sc7280_pp[] = {
+-	PP_BLK("pingpong_0", PINGPONG_0, 0x59000, 0, sc7280_pp_sblk, -1, -1),
++	PP_BLK("pingpong_0", PINGPONG_0, 0x69000, 0, sc7280_pp_sblk, -1, -1),
+ 	PP_BLK("pingpong_1", PINGPONG_1, 0x6a000, 0, sc7280_pp_sblk, -1, -1),
+ 	PP_BLK("pingpong_2", PINGPONG_2, 0x6b000, 0, sc7280_pp_sblk, -1, -1),
+ 	PP_BLK("pingpong_3", PINGPONG_3, 0x6c000, 0, sc7280_pp_sblk, -1, -1),
+-- 
+2.39.2
+
diff --git a/queue-6.1/drm-msm-dpu-correct-sm8250-and-sm8350-scaler.patch b/queue-6.1/drm-msm-dpu-correct-sm8250-and-sm8350-scaler.patch
new file mode 100644
index 00000000000..dc9b3d00e64
--- /dev/null
+++ b/queue-6.1/drm-msm-dpu-correct-sm8250-and-sm8350-scaler.patch
@@ -0,0 +1,71 @@
+From 1be611e5125270a82315622a4066ec1ebb496e6c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 12 Feb 2023 01:12:18 +0200
+Subject: drm/msm/dpu: correct sm8250 and sm8350 scaler
+
+From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
+
+[ Upstream commit 03c0c3cb22a4ff29afba1b43f0330289ea80433f ]
+
+QSEED4 is a newer variant of QSEED3LITE, which should be used on
+sm8250 and sm8350. Fix the DPU caps structure and used feature masks.
+
+Fixes: d21fc5dfc3df ("drm/msm/dpu1: add support for qseed3lite used on sm8250")
+Fixes: 0e91bcbb0016 ("drm/msm/dpu: Add SM8350 to hw catalog")
+Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
+Reviewed-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
+Patchwork: https://patchwork.freedesktop.org/patch/522229/
+Link: https://lore.kernel.org/r/20230211231259.1308718-10-dmitry.baryshkov@linaro.org
+Signed-off-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c | 18 +++++++++---------
+ 1 file changed, 9 insertions(+), 9 deletions(-)
+
+diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
+index bbd884c8e0cb1..b1131860ada17 100644
+--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
++++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
+@@ -356,7 +356,7 @@ static const struct dpu_caps sc8180x_dpu_caps = {
+ static const struct dpu_caps sm8250_dpu_caps = {
+ 	.max_mixer_width = DEFAULT_DPU_OUTPUT_LINE_WIDTH,
+ 	.max_mixer_blendstages = 0xb,
+-	.qseed_type = DPU_SSPP_SCALER_QSEED3LITE,
++	.qseed_type = DPU_SSPP_SCALER_QSEED4,
+ 	.smart_dma_rev = DPU_SSPP_SMART_DMA_V2, /* TODO: v2.5 */
+ 	.ubwc_version = DPU_HW_UBWC_VER_40,
+ 	.has_src_split = true,
+@@ -855,22 +855,22 @@ static const struct dpu_sspp_cfg sc7180_sspp[] = {
+ };
+ 
+ static const struct dpu_sspp_sub_blks sm8250_vig_sblk_0 =
+-				_VIG_SBLK("0", 5, DPU_SSPP_SCALER_QSEED3LITE);
++				_VIG_SBLK("0", 5, DPU_SSPP_SCALER_QSEED4);
+ static const struct dpu_sspp_sub_blks sm8250_vig_sblk_1 =
+-				_VIG_SBLK("1", 6, DPU_SSPP_SCALER_QSEED3LITE);
++				_VIG_SBLK("1", 6, DPU_SSPP_SCALER_QSEED4);
+ static const struct dpu_sspp_sub_blks sm8250_vig_sblk_2 =
+-				_VIG_SBLK("2", 7, DPU_SSPP_SCALER_QSEED3LITE);
++				_VIG_SBLK("2", 7, DPU_SSPP_SCALER_QSEED4);
+ static const struct dpu_sspp_sub_blks sm8250_vig_sblk_3 =
+-				_VIG_SBLK("3", 8, DPU_SSPP_SCALER_QSEED3LITE);
++				_VIG_SBLK("3", 8, DPU_SSPP_SCALER_QSEED4);
+ 
+ static const struct dpu_sspp_cfg sm8250_sspp[] = {
+-	SSPP_BLK("sspp_0", SSPP_VIG0, 0x4000, VIG_SM8250_MASK,
++	SSPP_BLK("sspp_0", SSPP_VIG0, 0x4000, VIG_SC7180_MASK,
+ 		sm8250_vig_sblk_0, 0,  SSPP_TYPE_VIG, DPU_CLK_CTRL_VIG0),
+-	SSPP_BLK("sspp_1", SSPP_VIG1, 0x6000, VIG_SM8250_MASK,
++	SSPP_BLK("sspp_1", SSPP_VIG1, 0x6000, VIG_SC7180_MASK,
+ 		sm8250_vig_sblk_1, 4,  SSPP_TYPE_VIG, DPU_CLK_CTRL_VIG1),
+-	SSPP_BLK("sspp_2", SSPP_VIG2, 0x8000, VIG_SM8250_MASK,
++	SSPP_BLK("sspp_2", SSPP_VIG2, 0x8000, VIG_SC7180_MASK,
+ 		sm8250_vig_sblk_2, 8, SSPP_TYPE_VIG, DPU_CLK_CTRL_VIG2),
+-	SSPP_BLK("sspp_3", SSPP_VIG3, 0xa000, VIG_SM8250_MASK,
++	SSPP_BLK("sspp_3", SSPP_VIG3, 0xa000, VIG_SC7180_MASK,
+ 		sm8250_vig_sblk_3, 12,  SSPP_TYPE_VIG, DPU_CLK_CTRL_VIG3),
+ 	SSPP_BLK("sspp_8", SSPP_DMA0, 0x24000,  DMA_SDM845_MASK,
+ 		sdm845_dma_sblk_0, 1, SSPP_TYPE_DMA, DPU_CLK_CTRL_DMA0),
+-- 
+2.39.2
+
diff --git a/queue-6.1/drm-msm-dpu-refactor-sc7280_pp-location.patch b/queue-6.1/drm-msm-dpu-refactor-sc7280_pp-location.patch
new file mode 100644
index 00000000000..a68a464ec46
--- /dev/null
+++ b/queue-6.1/drm-msm-dpu-refactor-sc7280_pp-location.patch
@@ -0,0 +1,59 @@
+From 134315db11b46b7cc9edd050b7b29518262e7533 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 28 Oct 2022 14:08:05 +0200
+Subject: drm/msm/dpu: Refactor sc7280_pp location
+
+From: Robert Foss <robert.foss@linaro.org>
+
+[ Upstream commit 1a5b5372e3b0a4cc65a0cbb724b1b0859f4ac63c ]
+
+The sc7280_pp declaration is not located by the other _pp
+declarations, but rather hidden around the _merge_3d
+declarations. Let's fix this to avoid confusion.
+
+Signed-off-by: Robert Foss <robert.foss@linaro.org>
+Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
+Patchwork: https://patchwork.freedesktop.org/patch/509153/
+Link: https://lore.kernel.org/r/20221028120812.339100-3-robert.foss@linaro.org
+Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
+Stable-dep-of: 03c0c3cb22a4 ("drm/msm/dpu: correct sm8250 and sm8350 scaler")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c | 14 +++++++-------
+ 1 file changed, 7 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
+index 41c93a18d5cb3..bbd884c8e0cb1 100644
+--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
++++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
+@@ -1180,6 +1180,13 @@ static const struct dpu_pingpong_cfg sm8150_pp[] = {
+ 			-1),
+ };
+ 
++static const struct dpu_pingpong_cfg sc7280_pp[] = {
++	PP_BLK("pingpong_0", PINGPONG_0, 0x59000, 0, sc7280_pp_sblk, -1, -1),
++	PP_BLK("pingpong_1", PINGPONG_1, 0x6a000, 0, sc7280_pp_sblk, -1, -1),
++	PP_BLK("pingpong_2", PINGPONG_2, 0x6b000, 0, sc7280_pp_sblk, -1, -1),
++	PP_BLK("pingpong_3", PINGPONG_3, 0x6c000, 0, sc7280_pp_sblk, -1, -1),
++};
++
+ static struct dpu_pingpong_cfg qcm2290_pp[] = {
+ 	PP_BLK("pingpong_0", PINGPONG_0, 0x70000, 0, sdm845_pp_sblk,
+ 		DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 8),
+@@ -1203,13 +1210,6 @@ static const struct dpu_merge_3d_cfg sm8150_merge_3d[] = {
+ 	MERGE_3D_BLK("merge_3d_2", MERGE_3D_2, 0x83200),
+ };
+ 
+-static const struct dpu_pingpong_cfg sc7280_pp[] = {
+-	PP_BLK("pingpong_0", PINGPONG_0, 0x59000, 0, sc7280_pp_sblk, -1, -1),
+-	PP_BLK("pingpong_1", PINGPONG_1, 0x6a000, 0, sc7280_pp_sblk, -1, -1),
+-	PP_BLK("pingpong_2", PINGPONG_2, 0x6b000, 0, sc7280_pp_sblk, -1, -1),
+-	PP_BLK("pingpong_3", PINGPONG_3, 0x6c000, 0, sc7280_pp_sblk, -1, -1),
+-};
+-
+ /*************************************************************
+  * DSC sub blocks config
+  *************************************************************/
+-- 
+2.39.2
+
diff --git a/queue-6.1/fsverity-don-t-drop-pagecache-at-end-of-fs_ioc_enabl.patch b/queue-6.1/fsverity-don-t-drop-pagecache-at-end-of-fs_ioc_enabl.patch
new file mode 100644
index 00000000000..76b03c16535
--- /dev/null
+++ b/queue-6.1/fsverity-don-t-drop-pagecache-at-end-of-fs_ioc_enabl.patch
@@ -0,0 +1,73 @@
+From 7da9b6aea5d624d8b1133bfbd214e1647b3ebc74 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 14 Mar 2023 16:31:32 -0700
+Subject: fsverity: don't drop pagecache at end of FS_IOC_ENABLE_VERITY
+
+From: Eric Biggers <ebiggers@google.com>
+
+[ Upstream commit a075bacde257f755bea0e53400c9f1cdd1b8e8e6 ]
+
+The full pagecache drop at the end of FS_IOC_ENABLE_VERITY is causing
+performance problems and is hindering adoption of fsverity.  It was
+intended to solve a race condition where unverified pages might be left
+in the pagecache.  But actually it doesn't solve it fully.
+
+Since the incomplete solution for this race condition has too much
+performance impact for it to be worth it, let's remove it for now.
+
+Fixes: 3fda4c617e84 ("fs-verity: implement FS_IOC_ENABLE_VERITY ioctl")
+Cc: stable@vger.kernel.org
+Reviewed-by: Victor Hsieh <victorhsieh@google.com>
+Link: https://lore.kernel.org/r/20230314235332.50270-1-ebiggers@kernel.org
+Signed-off-by: Eric Biggers <ebiggers@google.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/verity/enable.c | 24 +++++++++++++-----------
+ 1 file changed, 13 insertions(+), 11 deletions(-)
+
+diff --git a/fs/verity/enable.c b/fs/verity/enable.c
+index df6b499bf6a14..400c264bf8930 100644
+--- a/fs/verity/enable.c
++++ b/fs/verity/enable.c
+@@ -390,25 +390,27 @@ int fsverity_ioctl_enable(struct file *filp, const void __user *uarg)
+ 		goto out_drop_write;
+ 
+ 	err = enable_verity(filp, &arg);
+-	if (err)
+-		goto out_allow_write_access;
+ 
+ 	/*
+-	 * Some pages of the file may have been evicted from pagecache after
+-	 * being used in the Merkle tree construction, then read into pagecache
+-	 * again by another process reading from the file concurrently.  Since
+-	 * these pages didn't undergo verification against the file digest which
+-	 * fs-verity now claims to be enforcing, we have to wipe the pagecache
+-	 * to ensure that all future reads are verified.
++	 * We no longer drop the inode's pagecache after enabling verity.  This
++	 * used to be done to try to avoid a race condition where pages could be
++	 * evicted after being used in the Merkle tree construction, then
++	 * re-instantiated by a concurrent read.  Such pages are unverified, and
++	 * the backing storage could have filled them with different content, so
++	 * they shouldn't be used to fulfill reads once verity is enabled.
++	 *
++	 * But, dropping the pagecache has a big performance impact, and it
++	 * doesn't fully solve the race condition anyway.  So for those reasons,
++	 * and also because this race condition isn't very important relatively
++	 * speaking (especially for small-ish files, where the chance of a page
++	 * being used, evicted, *and* re-instantiated all while enabling verity
++	 * is quite small), we no longer drop the inode's pagecache.
+ 	 */
+-	filemap_write_and_wait(inode->i_mapping);
+-	invalidate_inode_pages2(inode->i_mapping);
+ 
+ 	/*
+ 	 * allow_write_access() is needed to pair with deny_write_access().
+ 	 * Regardless, the filesystem won't allow writing to verity files.
+ 	 */
+-out_allow_write_access:
+ 	allow_write_access(filp);
+ out_drop_write:
+ 	mnt_drop_write_file(filp);
+-- 
+2.39.2
+
diff --git a/queue-6.1/kcsan-avoid-passing-g-for-test.patch b/queue-6.1/kcsan-avoid-passing-g-for-test.patch
new file mode 100644
index 00000000000..73b7fb7fc6a
--- /dev/null
+++ b/queue-6.1/kcsan-avoid-passing-g-for-test.patch
@@ -0,0 +1,50 @@
+From 6df0780c17f9b688e3a9a0921c2b2f62ca3d9820 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 16 Mar 2023 23:47:05 +0100
+Subject: kcsan: avoid passing -g for test
+
+From: Marco Elver <elver@google.com>
+
+[ Upstream commit 5eb39cde1e2487ba5ec1802dc5e58a77e700d99e ]
+
+Nathan reported that when building with GNU as and a version of clang that
+defaults to DWARF5, the assembler will complain with:
+
+  Error: non-constant .uleb128 is not supported
+
+This is because `-g` defaults to the compiler debug info default. If the
+assembler does not support some of the directives used, the above errors
+occur. To fix, remove the explicit passing of `-g`.
+
+All the test wants is that stack traces print valid function names, and
+debug info is not required for that. (I currently cannot recall why I
+added the explicit `-g`.)
+
+Link: https://lkml.kernel.org/r/20230316224705.709984-2-elver@google.com
+Fixes: 1fe84fd4a402 ("kcsan: Add test suite")
+Signed-off-by: Marco Elver <elver@google.com>
+Reported-by: Nathan Chancellor <nathan@kernel.org>
+Cc: Alexander Potapenko <glider@google.com>
+Cc: Dmitry Vyukov <dvyukov@google.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/kcsan/Makefile | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/kernel/kcsan/Makefile b/kernel/kcsan/Makefile
+index 8cf70f068d92d..a45f3dfc8d141 100644
+--- a/kernel/kcsan/Makefile
++++ b/kernel/kcsan/Makefile
+@@ -16,6 +16,6 @@ obj-y := core.o debugfs.o report.o
+ KCSAN_INSTRUMENT_BARRIERS_selftest.o := y
+ obj-$(CONFIG_KCSAN_SELFTEST) += selftest.o
+ 
+-CFLAGS_kcsan_test.o := $(CFLAGS_KCSAN) -g -fno-omit-frame-pointer
++CFLAGS_kcsan_test.o := $(CFLAGS_KCSAN) -fno-omit-frame-pointer
+ CFLAGS_kcsan_test.o += $(DISABLE_STRUCTLEAK_PLUGIN)
+ obj-$(CONFIG_KCSAN_KUNIT_TEST) += kcsan_test.o
+-- 
+2.39.2
+
diff --git a/queue-6.1/kernel-kcsan-kcsan_test-build-without-structleak-plu.patch b/queue-6.1/kernel-kcsan-kcsan_test-build-without-structleak-plu.patch
new file mode 100644
index 00000000000..6011d39aabd
--- /dev/null
+++ b/queue-6.1/kernel-kcsan-kcsan_test-build-without-structleak-plu.patch
@@ -0,0 +1,44 @@
+From 0750f19e829df59a44aa66d8476fa2b5608f0352 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 28 Nov 2022 11:43:58 +0100
+Subject: kernel: kcsan: kcsan_test: build without structleak plugin
+
+From: Anders Roxell <anders.roxell@linaro.org>
+
+[ Upstream commit 6fcd4267a840d0536b8e5334ad5f31e4105fce85 ]
+
+Building kcsan_test with structleak plugin enabled makes the stack frame
+size to grow.
+
+kernel/kcsan/kcsan_test.c:704:1: error: the frame size of 3296 bytes is larger than 2048 bytes [-Werror=frame-larger-than=]
+
+Turn off the structleak plugin checks for kcsan_test.
+
+Link: https://lkml.kernel.org/r/20221128104358.2660634-1-anders.roxell@linaro.org
+Signed-off-by: Anders Roxell <anders.roxell@linaro.org>
+Suggested-by: Arnd Bergmann <arnd@arndb.de>
+Acked-by: Marco Elver <elver@google.com>
+Cc: Arnd Bergmann <arnd@arndb.de>
+Cc: David Gow <davidgow@google.com>
+Cc: Jason A. Donenfeld <Jason@zx2c4.com>
+Cc: Kees Cook <keescook@chromium.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: 5eb39cde1e24 ("kcsan: avoid passing -g for test")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/kcsan/Makefile | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/kernel/kcsan/Makefile b/kernel/kcsan/Makefile
+index 4f35d1bced6a2..8cf70f068d92d 100644
+--- a/kernel/kcsan/Makefile
++++ b/kernel/kcsan/Makefile
+@@ -17,4 +17,5 @@ KCSAN_INSTRUMENT_BARRIERS_selftest.o := y
+ obj-$(CONFIG_KCSAN_SELFTEST) += selftest.o
+ 
+ CFLAGS_kcsan_test.o := $(CFLAGS_KCSAN) -g -fno-omit-frame-pointer
++CFLAGS_kcsan_test.o += $(DISABLE_STRUCTLEAK_PLUGIN)
+ obj-$(CONFIG_KCSAN_KUNIT_TEST) += kcsan_test.o
+-- 
+2.39.2
+
diff --git a/queue-6.1/net-ethernet-ti-am65-cpsw-cpts-fix-cpts-release-acti.patch b/queue-6.1/net-ethernet-ti-am65-cpsw-cpts-fix-cpts-release-acti.patch
new file mode 100644
index 00000000000..5c1b68bc8b2
--- /dev/null
+++ b/queue-6.1/net-ethernet-ti-am65-cpsw-cpts-fix-cpts-release-acti.patch
@@ -0,0 +1,134 @@
+From 51156bb1ed96e686878d2a39f3ddf65548db677d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 20 Jan 2023 12:37:31 +0530
+Subject: net: ethernet: ti: am65-cpsw/cpts: Fix CPTS release action
+
+From: Siddharth Vadapalli <s-vadapalli@ti.com>
+
+[ Upstream commit 4ad8766cd3982744e53f107f378d2c65b76ff9a8 ]
+
+The am65_cpts_release() function is registered as a devm_action in the
+am65_cpts_create() function in am65-cpts driver. When the am65-cpsw driver
+invokes am65_cpts_create(), am65_cpts_release() is added in the set of devm
+actions associated with the am65-cpsw driver's device.
+
+In the event of probe failure or probe deferral, the platform_drv_probe()
+function invokes dev_pm_domain_detach() which powers off the CPSW and the
+CPSW's CPTS hardware, both of which share the same power domain. Since the
+am65_cpts_disable() function invoked by the am65_cpts_release() function
+attempts to reset the CPTS hardware by writing to its registers, the CPTS
+hardware is assumed to be powered on at this point. However, the hardware
+is powered off before the devm actions are executed.
+
+Fix this by getting rid of the devm action for am65_cpts_release() and
+invoking it directly on the cleanup and exit paths.
+
+Fixes: f6bd59526ca5 ("net: ethernet: ti: introduce am654 common platform time sync driver")
+Signed-off-by: Siddharth Vadapalli <s-vadapalli@ti.com>
+Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
+Reviewed-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Reviewed-by: Roger Quadros <rogerq@kernel.org>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/ti/am65-cpsw-nuss.c |  2 ++
+ drivers/net/ethernet/ti/am65-cpts.c      | 15 +++++----------
+ drivers/net/ethernet/ti/am65-cpts.h      |  5 +++++
+ 3 files changed, 12 insertions(+), 10 deletions(-)
+
+diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
+index 00911e9360525..8ff1c84a23ce7 100644
+--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c
++++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
+@@ -2817,6 +2817,7 @@ static int am65_cpsw_nuss_probe(struct platform_device *pdev)
+ 
+ err_free_phylink:
+ 	am65_cpsw_nuss_phylink_cleanup(common);
++	am65_cpts_release(common->cpts);
+ err_of_clear:
+ 	of_platform_device_destroy(common->mdio_dev, NULL);
+ err_pm_clear:
+@@ -2845,6 +2846,7 @@ static int am65_cpsw_nuss_remove(struct platform_device *pdev)
+ 	 */
+ 	am65_cpsw_nuss_cleanup_ndev(common);
+ 	am65_cpsw_nuss_phylink_cleanup(common);
++	am65_cpts_release(common->cpts);
+ 
+ 	of_platform_device_destroy(common->mdio_dev, NULL);
+ 
+diff --git a/drivers/net/ethernet/ti/am65-cpts.c b/drivers/net/ethernet/ti/am65-cpts.c
+index e2f0fb286143b..9948ac14e68db 100644
+--- a/drivers/net/ethernet/ti/am65-cpts.c
++++ b/drivers/net/ethernet/ti/am65-cpts.c
+@@ -918,14 +918,13 @@ static int am65_cpts_of_parse(struct am65_cpts *cpts, struct device_node *node)
+ 	return cpts_of_mux_clk_setup(cpts, node);
+ }
+ 
+-static void am65_cpts_release(void *data)
++void am65_cpts_release(struct am65_cpts *cpts)
+ {
+-	struct am65_cpts *cpts = data;
+-
+ 	ptp_clock_unregister(cpts->ptp_clock);
+ 	am65_cpts_disable(cpts);
+ 	clk_disable_unprepare(cpts->refclk);
+ }
++EXPORT_SYMBOL_GPL(am65_cpts_release);
+ 
+ struct am65_cpts *am65_cpts_create(struct device *dev, void __iomem *regs,
+ 				   struct device_node *node)
+@@ -1003,18 +1002,12 @@ struct am65_cpts *am65_cpts_create(struct device *dev, void __iomem *regs,
+ 	}
+ 	cpts->phc_index = ptp_clock_index(cpts->ptp_clock);
+ 
+-	ret = devm_add_action_or_reset(dev, am65_cpts_release, cpts);
+-	if (ret) {
+-		dev_err(dev, "failed to add ptpclk reset action %d", ret);
+-		return ERR_PTR(ret);
+-	}
+-
+ 	ret = devm_request_threaded_irq(dev, cpts->irq, NULL,
+ 					am65_cpts_interrupt,
+ 					IRQF_ONESHOT, dev_name(dev), cpts);
+ 	if (ret < 0) {
+ 		dev_err(cpts->dev, "error attaching irq %d\n", ret);
+-		return ERR_PTR(ret);
++		goto reset_ptpclk;
+ 	}
+ 
+ 	dev_info(dev, "CPTS ver 0x%08x, freq:%u, add_val:%u\n",
+@@ -1023,6 +1016,8 @@ struct am65_cpts *am65_cpts_create(struct device *dev, void __iomem *regs,
+ 
+ 	return cpts;
+ 
++reset_ptpclk:
++	am65_cpts_release(cpts);
+ refclk_disable:
+ 	clk_disable_unprepare(cpts->refclk);
+ 	return ERR_PTR(ret);
+diff --git a/drivers/net/ethernet/ti/am65-cpts.h b/drivers/net/ethernet/ti/am65-cpts.h
+index cf9fbc28fd032..c0ae0117e5737 100644
+--- a/drivers/net/ethernet/ti/am65-cpts.h
++++ b/drivers/net/ethernet/ti/am65-cpts.h
+@@ -18,6 +18,7 @@ struct am65_cpts_estf_cfg {
+ };
+ 
+ #if IS_ENABLED(CONFIG_TI_K3_AM65_CPTS)
++void am65_cpts_release(struct am65_cpts *cpts);
+ struct am65_cpts *am65_cpts_create(struct device *dev, void __iomem *regs,
+ 				   struct device_node *node);
+ int am65_cpts_phc_index(struct am65_cpts *cpts);
+@@ -29,6 +30,10 @@ int am65_cpts_estf_enable(struct am65_cpts *cpts, int idx,
+ 			  struct am65_cpts_estf_cfg *cfg);
+ void am65_cpts_estf_disable(struct am65_cpts *cpts, int idx);
+ #else
++static inline void am65_cpts_release(struct am65_cpts *cpts)
++{
++}
++
+ static inline struct am65_cpts *am65_cpts_create(struct device *dev,
+ 						 void __iomem *regs,
+ 						 struct device_node *node)
+-- 
+2.39.2
+
diff --git a/queue-6.1/net-mscc-ocelot-fix-stats-region-batching.patch b/queue-6.1/net-mscc-ocelot-fix-stats-region-batching.patch
new file mode 100644
index 00000000000..8f1b2c569b0
--- /dev/null
+++ b/queue-6.1/net-mscc-ocelot-fix-stats-region-batching.patch
@@ -0,0 +1,86 @@
+From 1bfdf14cb0563cc7e444a87f59d86d4977edc782 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 21 Mar 2023 03:03:23 +0200
+Subject: net: mscc: ocelot: fix stats region batching
+
+From: Vladimir Oltean <vladimir.oltean@nxp.com>
+
+[ Upstream commit 6acc72a43eac78a309160d0a7512bbc59bcdd757 ]
+
+The blamed commit changed struct ocelot_stat_layout :: "u32 offset" to
+"u32 reg".
+
+However, "u32 reg" is not quite a register address, but an enum
+ocelot_reg, which in itself encodes an enum ocelot_target target in the
+upper bits, and an index into the ocelot->map[target][] array in the
+lower bits.
+
+So, whereas the previous code comparison between stats_layout[i].offset
+and last + 1 was correct (because those "offsets" at the time were
+32-bit relative addresses), the new code, comparing layout[i].reg to
+last + 4 is not correct, because the "reg" here is an enum/index, not an
+actual register address.
+
+What we want to compare are indeed register addresses, but to do that,
+we need to actually go through the same motions as
+__ocelot_bulk_read_ix() itself.
+
+With this bug, all statistics counters are deemed by
+ocelot_prepare_stats_regions() as constituting their own region.
+(Truncated) log on VSC9959 (Felix) below (prints added by me):
+
+Before:
+
+region of 1 contiguous counters starting with SYS:STAT:CNT[0x000]
+region of 1 contiguous counters starting with SYS:STAT:CNT[0x001]
+region of 1 contiguous counters starting with SYS:STAT:CNT[0x002]
+...
+region of 1 contiguous counters starting with SYS:STAT:CNT[0x041]
+region of 1 contiguous counters starting with SYS:STAT:CNT[0x042]
+region of 1 contiguous counters starting with SYS:STAT:CNT[0x080]
+region of 1 contiguous counters starting with SYS:STAT:CNT[0x081]
+...
+region of 1 contiguous counters starting with SYS:STAT:CNT[0x0ac]
+region of 1 contiguous counters starting with SYS:STAT:CNT[0x100]
+region of 1 contiguous counters starting with SYS:STAT:CNT[0x101]
+...
+region of 1 contiguous counters starting with SYS:STAT:CNT[0x111]
+
+After:
+
+region of 67 contiguous counters starting with SYS:STAT:CNT[0x000]
+region of 45 contiguous counters starting with SYS:STAT:CNT[0x080]
+region of 18 contiguous counters starting with SYS:STAT:CNT[0x100]
+
+Since commit d87b1c08f38a ("net: mscc: ocelot: use bulk reads for
+stats") intended bulking as a performance improvement, and since now,
+with trivial-sized regions, performance is even worse than without
+bulking at all, this could easily qualify as a performance regression.
+
+Fixes: d4c367650704 ("net: mscc: ocelot: keep ocelot_stat_layout by reg address, not offset")
+Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
+Acked-by: Colin Foster <colin.foster@in-advantage.com>
+Tested-by: Colin Foster <colin.foster@in-advantage.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mscc/ocelot_stats.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/mscc/ocelot_stats.c b/drivers/net/ethernet/mscc/ocelot_stats.c
+index dbd20b125ceaf..0066219bb0e89 100644
+--- a/drivers/net/ethernet/mscc/ocelot_stats.c
++++ b/drivers/net/ethernet/mscc/ocelot_stats.c
+@@ -392,7 +392,8 @@ static int ocelot_prepare_stats_regions(struct ocelot *ocelot)
+ 		if (!ocelot->stats_layout[i].reg)
+ 			continue;
+ 
+-		if (region && ocelot->stats_layout[i].reg == last + 4) {
++		if (region && ocelot->map[SYS][ocelot->stats_layout[i].reg & REG_MASK] ==
++		    ocelot->map[SYS][last & REG_MASK] + 4) {
+ 			region->count++;
+ 		} else {
+ 			region = devm_kzalloc(ocelot->dev, sizeof(*region),
+-- 
+2.39.2
+
diff --git a/queue-6.1/riscv-ftrace-fixup-panic-by-disabling-preemption.patch b/queue-6.1/riscv-ftrace-fixup-panic-by-disabling-preemption.patch
new file mode 100644
index 00000000000..15e842a170f
--- /dev/null
+++ b/queue-6.1/riscv-ftrace-fixup-panic-by-disabling-preemption.patch
@@ -0,0 +1,57 @@
+From 46b4b428b523d1e1276c23969bbdde4ad654e1e5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 12 Jan 2023 04:05:57 -0500
+Subject: riscv: ftrace: Fixup panic by disabling preemption
+
+From: Andy Chiu <andy.chiu@sifive.com>
+
+[ Upstream commit 8547649981e6631328cd64f583667501ae385531 ]
+
+In RISCV, we must use an AUIPC + JALR pair to encode an immediate,
+forming a jump that jumps to an address over 4K. This may cause errors
+if we want to enable kernel preemption and remove dependency from
+patching code with stop_machine(). For example, if a task was switched
+out on auipc. And, if we changed the ftrace function before it was
+switched back, then it would jump to an address that has updated 11:0
+bits mixing with previous XLEN:12 part.
+
+p: patched area performed by dynamic ftrace
+ftrace_prologue:
+p|      REG_S   ra, -SZREG(sp)
+p|      auipc   ra, 0x? ------------> preempted
+					...
+				change ftrace function
+					...
+p|      jalr    -?(ra) <------------- switched back
+p|      REG_L   ra, -SZREG(sp)
+func:
+	xxx
+	ret
+
+Fixes: afc76b8b8011 ("riscv: Using PATCHABLE_FUNCTION_ENTRY instead of MCOUNT")
+Signed-off-by: Andy Chiu <andy.chiu@sifive.com>
+Signed-off-by: Guo Ren <guoren@kernel.org>
+Link: https://lore.kernel.org/r/20230112090603.1295340-2-guoren@kernel.org
+Cc: stable@vger.kernel.org
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/riscv/Kconfig | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
+index ae11d5647f9d4..06b9b2f60b9fb 100644
+--- a/arch/riscv/Kconfig
++++ b/arch/riscv/Kconfig
+@@ -278,7 +278,7 @@ config ARCH_RV64I
+ 	select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE
+ 	select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL
+ 	select HAVE_FUNCTION_GRAPH_TRACER
+-	select HAVE_FUNCTION_TRACER if !XIP_KERNEL
++	select HAVE_FUNCTION_TRACER if !XIP_KERNEL && !PREEMPTION
+ 	select SWIOTLB if MMU
+ 
+ endchoice
+-- 
+2.39.2
+
diff --git a/queue-6.1/series b/queue-6.1/series
new file mode 100644
index 00000000000..441c62da5b1
--- /dev/null
+++ b/queue-6.1/series
@@ -0,0 +1,29 @@
+thunderbolt-limit-usb3-bandwidth-of-certain-intel-us.patch
+cifs-update-ip_addr-for-ses-only-for-primary-chan-se.patch
+cifs-prevent-data-race-in-cifs_reconnect_tcon.patch
+cifs-avoid-race-conditions-with-parallel-reconnects.patch
+zonefs-reorganize-code.patch
+zonefs-simplify-io-error-handling.patch
+zonefs-reduce-struct-zonefs_inode_info-size.patch
+zonefs-separate-zone-information-from-inode-informat.patch
+zonefs-fix-error-message-in-zonefs_file_dio_append.patch
+fsverity-don-t-drop-pagecache-at-end-of-fs_ioc_enabl.patch
+kernel-kcsan-kcsan_test-build-without-structleak-plu.patch
+kcsan-avoid-passing-g-for-test.patch
+btrfs-rename-btrfs_fs_no_overcommit-to-btrfs_fs_acti.patch
+btrfs-zoned-count-fresh-bg-region-as-zone-unusable.patch
+net-ethernet-ti-am65-cpsw-cpts-fix-cpts-release-acti.patch
+riscv-ftrace-fixup-panic-by-disabling-preemption.patch
+arm-dts-aspeed-p10bmc-update-battery-node-name.patch
+drm-msm-dpu-refactor-sc7280_pp-location.patch
+drm-msm-dpu-correct-sm8250-and-sm8350-scaler.patch
+drm-msm-disp-dpu-fix-sc7280_pp-base-offset.patch
+blk-mq-move-the-srcu_struct-used-for-quiescing-to-th.patch
+blk-mq-fix-bad-unlock-balance-detected-on-q-srcu-in-.patch
+tty-serial-fsl_lpuart-switch-to-new-dmaengine_termin.patch
+tty-serial-fsl_lpuart-fix-race-on-rx-dma-shutdown.patch
+tracing-add-.percent-suffix-option-to-histogram-valu.patch
+tracing-add-.graph-suffix-option-to-histogram-value.patch
+tracing-do-not-let-histogram-values-have-some-modifi.patch
+net-mscc-ocelot-fix-stats-region-batching.patch
+arm64-efi-set-nx-compat-flag-in-pe-coff-header.patch
diff --git a/queue-6.1/thunderbolt-limit-usb3-bandwidth-of-certain-intel-us.patch b/queue-6.1/thunderbolt-limit-usb3-bandwidth-of-certain-intel-us.patch
new file mode 100644
index 00000000000..1ff900e1455
--- /dev/null
+++ b/queue-6.1/thunderbolt-limit-usb3-bandwidth-of-certain-intel-us.patch
@@ -0,0 +1,138 @@
+From b0882211c51cbf4707c3bd1ea81d8feb116b01e1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 31 Jan 2023 13:04:52 +0200
+Subject: thunderbolt: Limit USB3 bandwidth of certain Intel USB4 host routers
+
+From: Gil Fine <gil.fine@linux.intel.com>
+
+[ Upstream commit f0a57dd33b3eadf540912cd130db727ea824d174 ]
+
+Current Intel USB4 host routers have hardware limitation that the USB3
+bandwidth cannot go higher than 16376 Mb/s. Work this around by adding a
+new quirk that limits the bandwidth for the affected host routers.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Gil Fine <gil.fine@linux.intel.com>
+Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/thunderbolt/quirks.c | 31 +++++++++++++++++++++++++++++++
+ drivers/thunderbolt/tb.h     |  3 +++
+ drivers/thunderbolt/usb4.c   | 17 +++++++++++++++--
+ 3 files changed, 49 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/thunderbolt/quirks.c b/drivers/thunderbolt/quirks.c
+index ae28a03fa890b..1157b8869bcca 100644
+--- a/drivers/thunderbolt/quirks.c
++++ b/drivers/thunderbolt/quirks.c
+@@ -26,6 +26,19 @@ static void quirk_clx_disable(struct tb_switch *sw)
+ 	tb_sw_dbg(sw, "disabling CL states\n");
+ }
+ 
++static void quirk_usb3_maximum_bandwidth(struct tb_switch *sw)
++{
++	struct tb_port *port;
++
++	tb_switch_for_each_port(sw, port) {
++		if (!tb_port_is_usb3_down(port))
++			continue;
++		port->max_bw = 16376;
++		tb_port_dbg(port, "USB3 maximum bandwidth limited to %u Mb/s\n",
++			    port->max_bw);
++	}
++}
++
+ struct tb_quirk {
+ 	u16 hw_vendor_id;
+ 	u16 hw_device_id;
+@@ -43,6 +56,24 @@ static const struct tb_quirk tb_quirks[] = {
+ 	 * DP buffers.
+ 	 */
+ 	{ 0x8087, 0x0b26, 0x0000, 0x0000, quirk_dp_credit_allocation },
++	/*
++	 * Limit the maximum USB3 bandwidth for the following Intel USB4
++	 * host routers due to a hardware issue.
++	 */
++	{ 0x8087, PCI_DEVICE_ID_INTEL_ADL_NHI0, 0x0000, 0x0000,
++		  quirk_usb3_maximum_bandwidth },
++	{ 0x8087, PCI_DEVICE_ID_INTEL_ADL_NHI1, 0x0000, 0x0000,
++		  quirk_usb3_maximum_bandwidth },
++	{ 0x8087, PCI_DEVICE_ID_INTEL_RPL_NHI0, 0x0000, 0x0000,
++		  quirk_usb3_maximum_bandwidth },
++	{ 0x8087, PCI_DEVICE_ID_INTEL_RPL_NHI1, 0x0000, 0x0000,
++		  quirk_usb3_maximum_bandwidth },
++	{ 0x8087, PCI_DEVICE_ID_INTEL_MTL_M_NHI0, 0x0000, 0x0000,
++		  quirk_usb3_maximum_bandwidth },
++	{ 0x8087, PCI_DEVICE_ID_INTEL_MTL_P_NHI0, 0x0000, 0x0000,
++		  quirk_usb3_maximum_bandwidth },
++	{ 0x8087, PCI_DEVICE_ID_INTEL_MTL_P_NHI1, 0x0000, 0x0000,
++		  quirk_usb3_maximum_bandwidth },
+ 	/*
+ 	 * CLx is not supported on AMD USB4 Yellow Carp and Pink Sardine platforms.
+ 	 */
+diff --git a/drivers/thunderbolt/tb.h b/drivers/thunderbolt/tb.h
+index e11d973a8f9b6..f034723b1b40e 100644
+--- a/drivers/thunderbolt/tb.h
++++ b/drivers/thunderbolt/tb.h
+@@ -252,6 +252,8 @@ struct tb_switch {
+  * @ctl_credits: Buffers reserved for control path
+  * @dma_credits: Number of credits allocated for DMA tunneling for all
+  *		 DMA paths through this port.
++ * @max_bw: Maximum possible bandwidth through this adapter if set to
++ *	    non-zero.
+  *
+  * In USB4 terminology this structure represents an adapter (protocol or
+  * lane adapter).
+@@ -277,6 +279,7 @@ struct tb_port {
+ 	unsigned int total_credits;
+ 	unsigned int ctl_credits;
+ 	unsigned int dma_credits;
++	unsigned int max_bw;
+ };
+ 
+ /**
+diff --git a/drivers/thunderbolt/usb4.c b/drivers/thunderbolt/usb4.c
+index cf8d4f769579e..3c821f5e44814 100644
+--- a/drivers/thunderbolt/usb4.c
++++ b/drivers/thunderbolt/usb4.c
+@@ -1865,6 +1865,15 @@ int usb4_port_retimer_nvm_read(struct tb_port *port, u8 index,
+ 				usb4_port_retimer_nvm_read_block, &info);
+ }
+ 
++static inline unsigned int
++usb4_usb3_port_max_bandwidth(const struct tb_port *port, unsigned int bw)
++{
++	/* Take the possible bandwidth limitation into account */
++	if (port->max_bw)
++		return min(bw, port->max_bw);
++	return bw;
++}
++
+ /**
+  * usb4_usb3_port_max_link_rate() - Maximum support USB3 link rate
+  * @port: USB3 adapter port
+@@ -1886,7 +1895,9 @@ int usb4_usb3_port_max_link_rate(struct tb_port *port)
+ 		return ret;
+ 
+ 	lr = (val & ADP_USB3_CS_4_MSLR_MASK) >> ADP_USB3_CS_4_MSLR_SHIFT;
+-	return lr == ADP_USB3_CS_4_MSLR_20G ? 20000 : 10000;
++	ret = lr == ADP_USB3_CS_4_MSLR_20G ? 20000 : 10000;
++
++	return usb4_usb3_port_max_bandwidth(port, ret);
+ }
+ 
+ /**
+@@ -1913,7 +1924,9 @@ int usb4_usb3_port_actual_link_rate(struct tb_port *port)
+ 		return 0;
+ 
+ 	lr = val & ADP_USB3_CS_4_ALR_MASK;
+-	return lr == ADP_USB3_CS_4_ALR_20G ? 20000 : 10000;
++	ret = lr == ADP_USB3_CS_4_ALR_20G ? 20000 : 10000;
++
++	return usb4_usb3_port_max_bandwidth(port, ret);
+ }
+ 
+ static int usb4_usb3_port_cm_request(struct tb_port *port, bool request)
+-- 
+2.39.2
+
diff --git a/queue-6.1/tracing-add-.graph-suffix-option-to-histogram-value.patch b/queue-6.1/tracing-add-.graph-suffix-option-to-histogram-value.patch
new file mode 100644
index 00000000000..8e2781ef242
--- /dev/null
+++ b/queue-6.1/tracing-add-.graph-suffix-option-to-histogram-value.patch
@@ -0,0 +1,240 @@
+From 30335d6f17cf3c839475858ac8a47b51ad446d01 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 20 Oct 2022 00:31:55 +0900
+Subject: tracing: Add .graph suffix option to histogram value
+
+From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+
+[ Upstream commit a2c54256dec7510477e2b4f4db187e638f7cac37 ]
+
+Add the .graph suffix which shows the bar graph of the histogram value.
+
+For example, the below example shows that the bar graph
+of the histogram of the runtime for each tasks.
+
+------
+  # cd /sys/kernel/debug/tracing/
+  # echo hist:keys=pid:vals=runtime.graph:sort=pid > \
+   events/sched/sched_stat_runtime/trigger
+  # sleep 10
+  # cat events/sched/sched_stat_runtime/hist
+ # event histogram
+ #
+ # trigger info: hist:keys=pid:vals=hitcount,runtime.graph:sort=pid:size=2048 [active]
+ #
+
+ { pid:         14 } hitcount:          2  runtime:
+ { pid:         16 } hitcount:          8  runtime:
+ { pid:         26 } hitcount:          1  runtime:
+ { pid:         57 } hitcount:          3  runtime:
+ { pid:         61 } hitcount:         20  runtime: ###
+ { pid:         66 } hitcount:          2  runtime:
+ { pid:         70 } hitcount:          3  runtime:
+ { pid:         72 } hitcount:          2  runtime:
+ { pid:        145 } hitcount:         14  runtime: ####################
+ { pid:        152 } hitcount:          5  runtime: #######
+ { pid:        153 } hitcount:          2  runtime: ####
+
+ Totals:
+     Hits: 62
+     Entries: 11
+     Dropped: 0
+-------
+
+Link: https://lore.kernel.org/linux-trace-kernel/166610813953.56030.10944148382315789485.stgit@devnote2
+
+Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Reviewed-by: Tom Zanussi <zanussi@kernel.org>
+Tested-by: Tom Zanussi <zanussi@kernel.org>
+Stable-dep-of: e0213434fe3e ("tracing: Do not let histogram values have some modifiers")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/trace/trace.c             |  3 +-
+ kernel/trace/trace_events_hist.c | 77 +++++++++++++++++++++++++-------
+ 2 files changed, 63 insertions(+), 17 deletions(-)
+
+diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
+index f714ed1f1c673..78d69b9488e45 100644
+--- a/kernel/trace/trace.c
++++ b/kernel/trace/trace.c
+@@ -5728,7 +5728,8 @@ static const char readme_msg[] =
+ 	"\t            .log2       display log2 value rather than raw number\n"
+ 	"\t            .buckets=size  display values in groups of size rather than raw number\n"
+ 	"\t            .usecs      display a common_timestamp in microseconds\n"
+-	"\t            .percent    display a number of percentage value\n\n"
++	"\t            .percent    display a number of percentage value\n"
++	"\t            .graph      display a bar-graph of a value\n\n"
+ 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
+ 	"\t    trigger or to start a hist trigger but not log any events\n"
+ 	"\t    until told to do so.  'continue' can be used to start or\n"
+diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
+index 1c207fbf5634f..8e0acf8009bde 100644
+--- a/kernel/trace/trace_events_hist.c
++++ b/kernel/trace/trace_events_hist.c
+@@ -507,6 +507,7 @@ enum hist_field_flags {
+ 	HIST_FIELD_FL_BUCKET		= 1 << 17,
+ 	HIST_FIELD_FL_CONST		= 1 << 18,
+ 	HIST_FIELD_FL_PERCENT		= 1 << 19,
++	HIST_FIELD_FL_GRAPH		= 1 << 20,
+ };
+ 
+ struct var_defs {
+@@ -1711,6 +1712,8 @@ static const char *get_hist_field_flags(struct hist_field *hist_field)
+ 		flags_str = "usecs";
+ 	else if (hist_field->flags & HIST_FIELD_FL_PERCENT)
+ 		flags_str = "percent";
++	else if (hist_field->flags & HIST_FIELD_FL_GRAPH)
++		flags_str = "graph";
+ 
+ 	return flags_str;
+ }
+@@ -2327,6 +2330,10 @@ parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file,
+ 			if (*flags & (HIST_FIELD_FL_VAR | HIST_FIELD_FL_KEY))
+ 				goto error;
+ 			*flags |= HIST_FIELD_FL_PERCENT;
++		} else if (strncmp(modifier, "graph", 5) == 0) {
++			if (*flags & (HIST_FIELD_FL_VAR | HIST_FIELD_FL_KEY))
++				goto error;
++			*flags |= HIST_FIELD_FL_GRAPH;
+ 		} else {
+  error:
+ 			hist_err(tr, HIST_ERR_BAD_FIELD_MODIFIER, errpos(modifier));
+@@ -5322,20 +5329,52 @@ static inline unsigned int __get_percentage(u64 val, u64 total)
+ 	return val ? UINT_MAX : 0;
+ }
+ 
++#define BAR_CHAR '#'
++
++static inline const char *__fill_bar_str(char *buf, int size, u64 val, u64 max)
++{
++	unsigned int len = __get_percentage(val, max);
++	int i;
++
++	if (len == UINT_MAX) {
++		snprintf(buf, size, "[ERROR]");
++		return buf;
++	}
++
++	len = len * size / 10000;
++	for (i = 0; i < len && i < size; i++)
++		buf[i] = BAR_CHAR;
++	while (i < size)
++		buf[i++] = ' ';
++	buf[size] = '\0';
++
++	return buf;
++}
++
++struct hist_val_stat {
++	u64 max;
++	u64 total;
++};
++
+ static void hist_trigger_print_val(struct seq_file *m, unsigned int idx,
+ 				   const char *field_name, unsigned long flags,
+-				   u64 *totals, struct tracing_map_elt *elt)
++				   struct hist_val_stat *stats,
++				   struct tracing_map_elt *elt)
+ {
+ 	u64 val = tracing_map_read_sum(elt, idx);
+ 	unsigned int pc;
++	char bar[21];
+ 
+ 	if (flags & HIST_FIELD_FL_PERCENT) {
+-		pc = __get_percentage(val, totals[idx]);
++		pc = __get_percentage(val, stats[idx].total);
+ 		if (pc == UINT_MAX)
+ 			seq_printf(m, " %s (%%):[ERROR]", field_name);
+ 		else
+ 			seq_printf(m, " %s (%%): %3u.%02u", field_name,
+ 					pc / 100, pc % 100);
++	} else if (flags & HIST_FIELD_FL_GRAPH) {
++		seq_printf(m, " %s: %20s", field_name,
++			   __fill_bar_str(bar, 20, val, stats[idx].max));
+ 	} else if (flags & HIST_FIELD_FL_HEX) {
+ 		seq_printf(m, " %s: %10llx", field_name, val);
+ 	} else {
+@@ -5345,7 +5384,7 @@ static void hist_trigger_print_val(struct seq_file *m, unsigned int idx,
+ 
+ static void hist_trigger_entry_print(struct seq_file *m,
+ 				     struct hist_trigger_data *hist_data,
+-				     u64 *totals,
++				     struct hist_val_stat *stats,
+ 				     void *key,
+ 				     struct tracing_map_elt *elt)
+ {
+@@ -5356,7 +5395,7 @@ static void hist_trigger_entry_print(struct seq_file *m,
+ 	hist_trigger_print_key(m, hist_data, key, elt);
+ 
+ 	/* At first, show the raw hitcount always */
+-	hist_trigger_print_val(m, i, "hitcount", 0, totals, elt);
++	hist_trigger_print_val(m, i, "hitcount", 0, stats, elt);
+ 
+ 	for (i = 1; i < hist_data->n_vals; i++) {
+ 		field_name = hist_field_name(hist_data->fields[i], 0);
+@@ -5366,7 +5405,7 @@ static void hist_trigger_entry_print(struct seq_file *m,
+ 			continue;
+ 
+ 		seq_puts(m, " ");
+-		hist_trigger_print_val(m, i, field_name, flags, totals, elt);
++		hist_trigger_print_val(m, i, field_name, flags, stats, elt);
+ 	}
+ 
+ 	print_actions(m, hist_data, elt);
+@@ -5380,7 +5419,8 @@ static int print_entries(struct seq_file *m,
+ 	struct tracing_map_sort_entry **sort_entries = NULL;
+ 	struct tracing_map *map = hist_data->map;
+ 	int i, j, n_entries;
+-	u64 *totals = NULL;
++	struct hist_val_stat *stats = NULL;
++	u64 val;
+ 
+ 	n_entries = tracing_map_sort_entries(map, hist_data->sort_keys,
+ 					     hist_data->n_sort_keys,
+@@ -5388,28 +5428,33 @@ static int print_entries(struct seq_file *m,
+ 	if (n_entries < 0)
+ 		return n_entries;
+ 
++	/* Calculate the max and the total for each field if needed. */
+ 	for (j = 0; j < hist_data->n_vals; j++) {
+-		if (!(hist_data->fields[j]->flags & HIST_FIELD_FL_PERCENT))
++		if (!(hist_data->fields[j]->flags &
++			(HIST_FIELD_FL_PERCENT | HIST_FIELD_FL_GRAPH)))
+ 			continue;
+-		if (!totals) {
+-			totals = kcalloc(hist_data->n_vals, sizeof(u64),
+-					 GFP_KERNEL);
+-			if (!totals) {
++		if (!stats) {
++			stats = kcalloc(hist_data->n_vals, sizeof(*stats),
++				       GFP_KERNEL);
++			if (!stats) {
+ 				n_entries = -ENOMEM;
+ 				goto out;
+ 			}
+ 		}
+-		for (i = 0; i < n_entries; i++)
+-			totals[j] += tracing_map_read_sum(
+-					sort_entries[i]->elt, j);
++		for (i = 0; i < n_entries; i++) {
++			val = tracing_map_read_sum(sort_entries[i]->elt, j);
++			stats[j].total += val;
++			if (stats[j].max < val)
++				stats[j].max = val;
++		}
+ 	}
+ 
+ 	for (i = 0; i < n_entries; i++)
+-		hist_trigger_entry_print(m, hist_data, totals,
++		hist_trigger_entry_print(m, hist_data, stats,
+ 					 sort_entries[i]->key,
+ 					 sort_entries[i]->elt);
+ 
+-	kfree(totals);
++	kfree(stats);
+ out:
+ 	tracing_map_destroy_sort_entries(sort_entries, n_entries);
+ 
+-- 
+2.39.2
+
diff --git a/queue-6.1/tracing-add-.percent-suffix-option-to-histogram-valu.patch b/queue-6.1/tracing-add-.percent-suffix-option-to-histogram-valu.patch
new file mode 100644
index 00000000000..a8ec432fc22
--- /dev/null
+++ b/queue-6.1/tracing-add-.percent-suffix-option-to-histogram-valu.patch
@@ -0,0 +1,226 @@
+From aa5e63131c95836508399c95d7db46f5581e9aa4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 20 Oct 2022 00:31:55 +0900
+Subject: tracing: Add .percent suffix option to histogram values
+
+From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+
+[ Upstream commit abaa5258ce5e5887a9de049f50a85dc023391a1c ]
+
+Add .percent suffix option to show the histogram values in percentage.
+This feature is useful when we need yo undersntand the overall trend
+for the histograms of large values.
+E.g. this shows the runtime percentage for each tasks.
+
+------
+  # cd /sys/kernel/debug/tracing/
+  # echo hist:keys=pid:vals=hitcount,runtime.percent:sort=pid > \
+    events/sched/sched_stat_runtime/trigger
+  # sleep 10
+  # cat events/sched/sched_stat_runtime/hist
+ # event histogram
+ #
+ # trigger info: hist:keys=pid:vals=hitcount,runtime.percent:sort=pid:size=2048 [active]
+ #
+
+ { pid:          8 } hitcount:          7  runtime (%):   4.14
+ { pid:         14 } hitcount:          5  runtime (%):   3.69
+ { pid:         16 } hitcount:         11  runtime (%):   3.41
+ { pid:         61 } hitcount:         41  runtime (%):  19.75
+ { pid:         65 } hitcount:          4  runtime (%):   1.48
+ { pid:         70 } hitcount:          6  runtime (%):   3.60
+ { pid:         72 } hitcount:          2  runtime (%):   1.10
+ { pid:        144 } hitcount:         10  runtime (%):  32.01
+ { pid:        151 } hitcount:          8  runtime (%):  22.66
+ { pid:        152 } hitcount:          2  runtime (%):   8.10
+
+ Totals:
+     Hits: 96
+     Entries: 10
+     Dropped: 0
+-----
+
+Link: https://lore.kernel.org/linux-trace-kernel/166610813077.56030.4238090506973562347.stgit@devnote2
+
+Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Reviewed-by: Tom Zanussi <zanussi@kernel.org>
+Tested-by: Tom Zanussi <zanussi@kernel.org>
+Stable-dep-of: e0213434fe3e ("tracing: Do not let histogram values have some modifiers")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/trace/trace.c             |  3 +-
+ kernel/trace/trace_events_hist.c | 90 +++++++++++++++++++++++++++-----
+ 2 files changed, 78 insertions(+), 15 deletions(-)
+
+diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
+index 888980257340f..f714ed1f1c673 100644
+--- a/kernel/trace/trace.c
++++ b/kernel/trace/trace.c
+@@ -5727,7 +5727,8 @@ static const char readme_msg[] =
+ 	"\t            .syscall    display a syscall id as a syscall name\n"
+ 	"\t            .log2       display log2 value rather than raw number\n"
+ 	"\t            .buckets=size  display values in groups of size rather than raw number\n"
+-	"\t            .usecs      display a common_timestamp in microseconds\n\n"
++	"\t            .usecs      display a common_timestamp in microseconds\n"
++	"\t            .percent    display a number of percentage value\n\n"
+ 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
+ 	"\t    trigger or to start a hist trigger but not log any events\n"
+ 	"\t    until told to do so.  'continue' can be used to start or\n"
+diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
+index e3df03cdecbcb..1c207fbf5634f 100644
+--- a/kernel/trace/trace_events_hist.c
++++ b/kernel/trace/trace_events_hist.c
+@@ -506,6 +506,7 @@ enum hist_field_flags {
+ 	HIST_FIELD_FL_ALIAS		= 1 << 16,
+ 	HIST_FIELD_FL_BUCKET		= 1 << 17,
+ 	HIST_FIELD_FL_CONST		= 1 << 18,
++	HIST_FIELD_FL_PERCENT		= 1 << 19,
+ };
+ 
+ struct var_defs {
+@@ -1708,6 +1709,8 @@ static const char *get_hist_field_flags(struct hist_field *hist_field)
+ 		flags_str = "buckets";
+ 	else if (hist_field->flags & HIST_FIELD_FL_TIMESTAMP_USECS)
+ 		flags_str = "usecs";
++	else if (hist_field->flags & HIST_FIELD_FL_PERCENT)
++		flags_str = "percent";
+ 
+ 	return flags_str;
+ }
+@@ -2320,6 +2323,10 @@ parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file,
+ 			if (ret || !(*buckets))
+ 				goto error;
+ 			*flags |= HIST_FIELD_FL_BUCKET;
++		} else if (strncmp(modifier, "percent", 7) == 0) {
++			if (*flags & (HIST_FIELD_FL_VAR | HIST_FIELD_FL_KEY))
++				goto error;
++			*flags |= HIST_FIELD_FL_PERCENT;
+ 		} else {
+  error:
+ 			hist_err(tr, HIST_ERR_BAD_FIELD_MODIFIER, errpos(modifier));
+@@ -5297,33 +5304,69 @@ static void hist_trigger_print_key(struct seq_file *m,
+ 	seq_puts(m, "}");
+ }
+ 
++/* Get the 100 times of the percentage of @val in @total */
++static inline unsigned int __get_percentage(u64 val, u64 total)
++{
++	if (!total)
++		goto div0;
++
++	if (val < (U64_MAX / 10000))
++		return (unsigned int)div64_ul(val * 10000, total);
++
++	total = div64_u64(total, 10000);
++	if (!total)
++		goto div0;
++
++	return (unsigned int)div64_ul(val, total);
++div0:
++	return val ? UINT_MAX : 0;
++}
++
++static void hist_trigger_print_val(struct seq_file *m, unsigned int idx,
++				   const char *field_name, unsigned long flags,
++				   u64 *totals, struct tracing_map_elt *elt)
++{
++	u64 val = tracing_map_read_sum(elt, idx);
++	unsigned int pc;
++
++	if (flags & HIST_FIELD_FL_PERCENT) {
++		pc = __get_percentage(val, totals[idx]);
++		if (pc == UINT_MAX)
++			seq_printf(m, " %s (%%):[ERROR]", field_name);
++		else
++			seq_printf(m, " %s (%%): %3u.%02u", field_name,
++					pc / 100, pc % 100);
++	} else if (flags & HIST_FIELD_FL_HEX) {
++		seq_printf(m, " %s: %10llx", field_name, val);
++	} else {
++		seq_printf(m, " %s: %10llu", field_name, val);
++	}
++}
++
+ static void hist_trigger_entry_print(struct seq_file *m,
+ 				     struct hist_trigger_data *hist_data,
++				     u64 *totals,
+ 				     void *key,
+ 				     struct tracing_map_elt *elt)
+ {
+ 	const char *field_name;
+-	unsigned int i;
++	unsigned int i = HITCOUNT_IDX;
++	unsigned long flags;
+ 
+ 	hist_trigger_print_key(m, hist_data, key, elt);
+ 
+-	seq_printf(m, " hitcount: %10llu",
+-		   tracing_map_read_sum(elt, HITCOUNT_IDX));
++	/* At first, show the raw hitcount always */
++	hist_trigger_print_val(m, i, "hitcount", 0, totals, elt);
+ 
+ 	for (i = 1; i < hist_data->n_vals; i++) {
+ 		field_name = hist_field_name(hist_data->fields[i], 0);
++		flags = hist_data->fields[i]->flags;
+ 
+-		if (hist_data->fields[i]->flags & HIST_FIELD_FL_VAR ||
+-		    hist_data->fields[i]->flags & HIST_FIELD_FL_EXPR)
++		if (flags & HIST_FIELD_FL_VAR || flags & HIST_FIELD_FL_EXPR)
+ 			continue;
+ 
+-		if (hist_data->fields[i]->flags & HIST_FIELD_FL_HEX) {
+-			seq_printf(m, "  %s: %10llx", field_name,
+-				   tracing_map_read_sum(elt, i));
+-		} else {
+-			seq_printf(m, "  %s: %10llu", field_name,
+-				   tracing_map_read_sum(elt, i));
+-		}
++		seq_puts(m, " ");
++		hist_trigger_print_val(m, i, field_name, flags, totals, elt);
+ 	}
+ 
+ 	print_actions(m, hist_data, elt);
+@@ -5336,7 +5379,8 @@ static int print_entries(struct seq_file *m,
+ {
+ 	struct tracing_map_sort_entry **sort_entries = NULL;
+ 	struct tracing_map *map = hist_data->map;
+-	int i, n_entries;
++	int i, j, n_entries;
++	u64 *totals = NULL;
+ 
+ 	n_entries = tracing_map_sort_entries(map, hist_data->sort_keys,
+ 					     hist_data->n_sort_keys,
+@@ -5344,11 +5388,29 @@ static int print_entries(struct seq_file *m,
+ 	if (n_entries < 0)
+ 		return n_entries;
+ 
++	for (j = 0; j < hist_data->n_vals; j++) {
++		if (!(hist_data->fields[j]->flags & HIST_FIELD_FL_PERCENT))
++			continue;
++		if (!totals) {
++			totals = kcalloc(hist_data->n_vals, sizeof(u64),
++					 GFP_KERNEL);
++			if (!totals) {
++				n_entries = -ENOMEM;
++				goto out;
++			}
++		}
++		for (i = 0; i < n_entries; i++)
++			totals[j] += tracing_map_read_sum(
++					sort_entries[i]->elt, j);
++	}
++
+ 	for (i = 0; i < n_entries; i++)
+-		hist_trigger_entry_print(m, hist_data,
++		hist_trigger_entry_print(m, hist_data, totals,
+ 					 sort_entries[i]->key,
+ 					 sort_entries[i]->elt);
+ 
++	kfree(totals);
++out:
+ 	tracing_map_destroy_sort_entries(sort_entries, n_entries);
+ 
+ 	return n_entries;
+-- 
+2.39.2
+
diff --git a/queue-6.1/tracing-do-not-let-histogram-values-have-some-modifi.patch b/queue-6.1/tracing-do-not-let-histogram-values-have-some-modifi.patch
new file mode 100644
index 00000000000..35287e79174
--- /dev/null
+++ b/queue-6.1/tracing-do-not-let-histogram-values-have-some-modifi.patch
@@ -0,0 +1,109 @@
+From 5e1ccb9a8074e6d6d95bf68dcbebc263b326c574 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 1 Mar 2023 20:00:52 -0500
+Subject: tracing: Do not let histogram values have some modifiers
+
+From: Steven Rostedt (Google) <rostedt@goodmis.org>
+
+[ Upstream commit e0213434fe3e4a0d118923dc98d31e7ff1cd9e45 ]
+
+Histogram values can not be strings, stacktraces, graphs, symbols,
+syscalls, or grouped in buckets or log. Give an error if a value is set to
+do so.
+
+Note, the histogram code was not prepared to handle these modifiers for
+histograms and caused a bug.
+
+Mark Rutland reported:
+
+ # echo 'p:copy_to_user __arch_copy_to_user n=$arg2' >> /sys/kernel/tracing/kprobe_events
+ # echo 'hist:keys=n:vals=hitcount.buckets=8:sort=hitcount' > /sys/kernel/tracing/events/kprobes/copy_to_user/trigger
+ # cat /sys/kernel/tracing/events/kprobes/copy_to_user/hist
+[  143.694628] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000
+[  143.695190] Mem abort info:
+[  143.695362]   ESR = 0x0000000096000004
+[  143.695604]   EC = 0x25: DABT (current EL), IL = 32 bits
+[  143.695889]   SET = 0, FnV = 0
+[  143.696077]   EA = 0, S1PTW = 0
+[  143.696302]   FSC = 0x04: level 0 translation fault
+[  143.702381] Data abort info:
+[  143.702614]   ISV = 0, ISS = 0x00000004
+[  143.702832]   CM = 0, WnR = 0
+[  143.703087] user pgtable: 4k pages, 48-bit VAs, pgdp=00000000448f9000
+[  143.703407] [0000000000000000] pgd=0000000000000000, p4d=0000000000000000
+[  143.704137] Internal error: Oops: 0000000096000004 [#1] PREEMPT SMP
+[  143.704714] Modules linked in:
+[  143.705273] CPU: 0 PID: 133 Comm: cat Not tainted 6.2.0-00003-g6fc512c10a7c #3
+[  143.706138] Hardware name: linux,dummy-virt (DT)
+[  143.706723] pstate: 80000005 (Nzcv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
+[  143.707120] pc : hist_field_name.part.0+0x14/0x140
+[  143.707504] lr : hist_field_name.part.0+0x104/0x140
+[  143.707774] sp : ffff800008333a30
+[  143.707952] x29: ffff800008333a30 x28: 0000000000000001 x27: 0000000000400cc0
+[  143.708429] x26: ffffd7a653b20260 x25: 0000000000000000 x24: ffff10d303ee5800
+[  143.708776] x23: ffffd7a6539b27b0 x22: ffff10d303fb8c00 x21: 0000000000000001
+[  143.709127] x20: ffff10d303ec2000 x19: 0000000000000000 x18: 0000000000000000
+[  143.709478] x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000
+[  143.709824] x14: 0000000000000000 x13: 203a6f666e692072 x12: 6567676972742023
+[  143.710179] x11: 0a230a6d6172676f x10: 000000000000002c x9 : ffffd7a6521e018c
+[  143.710584] x8 : 000000000000002c x7 : 7f7f7f7f7f7f7f7f x6 : 000000000000002c
+[  143.710915] x5 : ffff10d303b0103e x4 : ffffd7a653b20261 x3 : 000000000000003d
+[  143.711239] x2 : 0000000000020001 x1 : 0000000000000001 x0 : 0000000000000000
+[  143.711746] Call trace:
+[  143.712115]  hist_field_name.part.0+0x14/0x140
+[  143.712642]  hist_field_name.part.0+0x104/0x140
+[  143.712925]  hist_field_print+0x28/0x140
+[  143.713125]  event_hist_trigger_print+0x174/0x4d0
+[  143.713348]  hist_show+0xf8/0x980
+[  143.713521]  seq_read_iter+0x1bc/0x4b0
+[  143.713711]  seq_read+0x8c/0xc4
+[  143.713876]  vfs_read+0xc8/0x2a4
+[  143.714043]  ksys_read+0x70/0xfc
+[  143.714218]  __arm64_sys_read+0x24/0x30
+[  143.714400]  invoke_syscall+0x50/0x120
+[  143.714587]  el0_svc_common.constprop.0+0x4c/0x100
+[  143.714807]  do_el0_svc+0x44/0xd0
+[  143.714970]  el0_svc+0x2c/0x84
+[  143.715134]  el0t_64_sync_handler+0xbc/0x140
+[  143.715334]  el0t_64_sync+0x190/0x194
+[  143.715742] Code: a9bd7bfd 910003fd a90153f3 aa0003f3 (f9400000)
+[  143.716510] ---[ end trace 0000000000000000 ]---
+Segmentation fault
+
+Link: https://lkml.kernel.org/r/20230302020810.559462599@goodmis.org
+
+Cc: stable@vger.kernel.org
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Fixes: c6afad49d127f ("tracing: Add hist trigger 'sym' and 'sym-offset' modifiers")
+Reported-by: Mark Rutland <mark.rutland@arm.com>
+Tested-by: Mark Rutland <mark.rutland@arm.com>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/trace/trace_events_hist.c | 9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
+index 8e0acf8009bde..2b2120ed2460f 100644
+--- a/kernel/trace/trace_events_hist.c
++++ b/kernel/trace/trace_events_hist.c
+@@ -4193,6 +4193,15 @@ static int __create_val_field(struct hist_trigger_data *hist_data,
+ 		goto out;
+ 	}
+ 
++	/* Some types cannot be a value */
++	if (hist_field->flags & (HIST_FIELD_FL_GRAPH | HIST_FIELD_FL_PERCENT |
++				 HIST_FIELD_FL_BUCKET | HIST_FIELD_FL_LOG2 |
++				 HIST_FIELD_FL_SYM | HIST_FIELD_FL_SYM_OFFSET |
++				 HIST_FIELD_FL_SYSCALL | HIST_FIELD_FL_STACKTRACE)) {
++		hist_err(file->tr, HIST_ERR_BAD_FIELD_MODIFIER, errpos(field_str));
++		ret = -EINVAL;
++	}
++
+ 	hist_data->fields[val_idx] = hist_field;
+ 
+ 	++hist_data->n_vals;
+-- 
+2.39.2
+
diff --git a/queue-6.1/tty-serial-fsl_lpuart-fix-race-on-rx-dma-shutdown.patch b/queue-6.1/tty-serial-fsl_lpuart-fix-race-on-rx-dma-shutdown.patch
new file mode 100644
index 00000000000..dd193890d77
--- /dev/null
+++ b/queue-6.1/tty-serial-fsl_lpuart-fix-race-on-rx-dma-shutdown.patch
@@ -0,0 +1,109 @@
+From 8c4c957d731435fb3b6ada7fbd0956769ebdbee3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 9 Mar 2023 14:43:02 +0100
+Subject: tty: serial: fsl_lpuart: fix race on RX DMA shutdown
+
+From: Alexander Sverdlin <alexander.sverdlin@siemens.com>
+
+[ Upstream commit 1be6f2b15f902c02e055ae0b419ca789200473c9 ]
+
+From time to time DMA completion can come in the middle of DMA shutdown:
+
+<process ctx>:				<IRQ>:
+lpuart32_shutdown()
+  lpuart_dma_shutdown()
+    del_timer_sync()
+					lpuart_dma_rx_complete()
+					  lpuart_copy_rx_to_tty()
+					    mod_timer()
+    lpuart_dma_rx_free()
+
+When the timer fires a bit later, sport->dma_rx_desc is NULL:
+
+Unable to handle kernel NULL pointer dereference at virtual address 0000000000000004
+pc : lpuart_copy_rx_to_tty+0xcc/0x5bc
+lr : lpuart_timer_func+0x1c/0x2c
+Call trace:
+ lpuart_copy_rx_to_tty
+ lpuart_timer_func
+ call_timer_fn
+ __run_timers.part.0
+ run_timer_softirq
+ __do_softirq
+ __irq_exit_rcu
+ irq_exit
+ handle_domain_irq
+ gic_handle_irq
+ call_on_irq_stack
+ do_interrupt_handler
+ ...
+
+To fix this fold del_timer_sync() into lpuart_dma_rx_free() after
+dmaengine_terminate_sync() to make sure timer will not be re-started in
+lpuart_copy_rx_to_tty() <= lpuart_dma_rx_complete().
+
+Fixes: 4a8588a1cf86 ("serial: fsl_lpuart: delete timer on shutdown")
+Cc: stable <stable@kernel.org>
+Signed-off-by: Alexander Sverdlin <alexander.sverdlin@siemens.com>
+Link: https://lore.kernel.org/r/20230309134302.74940-2-alexander.sverdlin@siemens.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/tty/serial/fsl_lpuart.c | 11 +++--------
+ 1 file changed, 3 insertions(+), 8 deletions(-)
+
+diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c
+index 86e96696ab26d..cd98c04de0330 100644
+--- a/drivers/tty/serial/fsl_lpuart.c
++++ b/drivers/tty/serial/fsl_lpuart.c
+@@ -1334,6 +1334,7 @@ static void lpuart_dma_rx_free(struct uart_port *port)
+ 	struct dma_chan *chan = sport->dma_rx_chan;
+ 
+ 	dmaengine_terminate_sync(chan);
++	del_timer_sync(&sport->lpuart_timer);
+ 	dma_unmap_sg(chan->device->dev, &sport->rx_sgl, 1, DMA_FROM_DEVICE);
+ 	kfree(sport->rx_ring.buf);
+ 	sport->rx_ring.tail = 0;
+@@ -1757,7 +1758,6 @@ static int lpuart32_startup(struct uart_port *port)
+ static void lpuart_dma_shutdown(struct lpuart_port *sport)
+ {
+ 	if (sport->lpuart_dma_rx_use) {
+-		del_timer_sync(&sport->lpuart_timer);
+ 		lpuart_dma_rx_free(&sport->port);
+ 		sport->lpuart_dma_rx_use = false;
+ 	}
+@@ -1917,10 +1917,8 @@ lpuart_set_termios(struct uart_port *port, struct ktermios *termios,
+ 	 * Since timer function acqures sport->port.lock, need to stop before
+ 	 * acquring same lock because otherwise del_timer_sync() can deadlock.
+ 	 */
+-	if (old && sport->lpuart_dma_rx_use) {
+-		del_timer_sync(&sport->lpuart_timer);
++	if (old && sport->lpuart_dma_rx_use)
+ 		lpuart_dma_rx_free(&sport->port);
+-	}
+ 
+ 	spin_lock_irqsave(&sport->port.lock, flags);
+ 
+@@ -2154,10 +2152,8 @@ lpuart32_set_termios(struct uart_port *port, struct ktermios *termios,
+ 	 * Since timer function acqures sport->port.lock, need to stop before
+ 	 * acquring same lock because otherwise del_timer_sync() can deadlock.
+ 	 */
+-	if (old && sport->lpuart_dma_rx_use) {
+-		del_timer_sync(&sport->lpuart_timer);
++	if (old && sport->lpuart_dma_rx_use)
+ 		lpuart_dma_rx_free(&sport->port);
+-	}
+ 
+ 	spin_lock_irqsave(&sport->port.lock, flags);
+ 
+@@ -2850,7 +2846,6 @@ static int __maybe_unused lpuart_suspend(struct device *dev)
+ 		 * Rx DMA path before suspend and start Rx DMA path on resume.
+ 		 */
+ 		if (irq_wake) {
+-			del_timer_sync(&sport->lpuart_timer);
+ 			lpuart_dma_rx_free(&sport->port);
+ 		}
+ 
+-- 
+2.39.2
+
diff --git a/queue-6.1/tty-serial-fsl_lpuart-switch-to-new-dmaengine_termin.patch b/queue-6.1/tty-serial-fsl_lpuart-switch-to-new-dmaengine_termin.patch
new file mode 100644
index 00000000000..cb7f37e0485
--- /dev/null
+++ b/queue-6.1/tty-serial-fsl_lpuart-switch-to-new-dmaengine_termin.patch
@@ -0,0 +1,64 @@
+From 9da1b65e8b605ca30bf0106f430c5bcab727147f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 23 Nov 2022 10:36:19 +0800
+Subject: tty: serial: fsl_lpuart: switch to new dmaengine_terminate_* API
+
+From: Sherry Sun <sherry.sun@nxp.com>
+
+[ Upstream commit 8682ab0eea89c300ebb120c02ead3999ca5560a8 ]
+
+Convert dmaengine_terminate_all() calls to synchronous and asynchronous
+versions where appropriate.
+
+Signed-off-by: Sherry Sun <sherry.sun@nxp.com>
+Link: https://lore.kernel.org/r/20221123023619.30173-1-sherry.sun@nxp.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Stable-dep-of: 1be6f2b15f90 ("tty: serial: fsl_lpuart: fix race on RX DMA shutdown")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/tty/serial/fsl_lpuart.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c
+index c51883f34ac2b..86e96696ab26d 100644
+--- a/drivers/tty/serial/fsl_lpuart.c
++++ b/drivers/tty/serial/fsl_lpuart.c
+@@ -582,7 +582,7 @@ static void lpuart_flush_buffer(struct uart_port *port)
+ 				sport->dma_tx_nents, DMA_TO_DEVICE);
+ 			sport->dma_tx_in_progress = false;
+ 		}
+-		dmaengine_terminate_all(chan);
++		dmaengine_terminate_async(chan);
+ 	}
+ 
+ 	if (lpuart_is_32(sport)) {
+@@ -1333,7 +1333,7 @@ static void lpuart_dma_rx_free(struct uart_port *port)
+ 					struct lpuart_port, port);
+ 	struct dma_chan *chan = sport->dma_rx_chan;
+ 
+-	dmaengine_terminate_all(chan);
++	dmaengine_terminate_sync(chan);
+ 	dma_unmap_sg(chan->device->dev, &sport->rx_sgl, 1, DMA_FROM_DEVICE);
+ 	kfree(sport->rx_ring.buf);
+ 	sport->rx_ring.tail = 0;
+@@ -1766,7 +1766,7 @@ static void lpuart_dma_shutdown(struct lpuart_port *sport)
+ 		if (wait_event_interruptible_timeout(sport->dma_wait,
+ 			!sport->dma_tx_in_progress, msecs_to_jiffies(300)) <= 0) {
+ 			sport->dma_tx_in_progress = false;
+-			dmaengine_terminate_all(sport->dma_tx_chan);
++			dmaengine_terminate_sync(sport->dma_tx_chan);
+ 		}
+ 		sport->lpuart_dma_tx_use = false;
+ 	}
+@@ -2867,7 +2867,7 @@ static int __maybe_unused lpuart_suspend(struct device *dev)
+ 
+ 	if (sport->lpuart_dma_tx_use) {
+ 		sport->dma_tx_in_progress = false;
+-		dmaengine_terminate_all(sport->dma_tx_chan);
++		dmaengine_terminate_sync(sport->dma_tx_chan);
+ 	}
+ 
+ 	if (sport->port.suspended && !irq_wake)
+-- 
+2.39.2
+
diff --git a/queue-6.1/zonefs-fix-error-message-in-zonefs_file_dio_append.patch b/queue-6.1/zonefs-fix-error-message-in-zonefs_file_dio_append.patch
new file mode 100644
index 00000000000..88f40cbd4c5
--- /dev/null
+++ b/queue-6.1/zonefs-fix-error-message-in-zonefs_file_dio_append.patch
@@ -0,0 +1,41 @@
+From afcf97061101c21b9516f39b0495f576a309f29d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 20 Mar 2023 22:49:15 +0900
+Subject: zonefs: Fix error message in zonefs_file_dio_append()
+
+From: Damien Le Moal <damien.lemoal@opensource.wdc.com>
+
+[ Upstream commit 88b170088ad2c3e27086fe35769aa49f8a512564 ]
+
+Since the expected write location in a sequential file is always at the
+end of the file (append write), when an invalid write append location is
+detected in zonefs_file_dio_append(), print the invalid written location
+instead of the expected write location.
+
+Fixes: a608da3bd730 ("zonefs: Detect append writes at invalid locations")
+Cc: stable@vger.kernel.org
+Signed-off-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/zonefs/file.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/zonefs/file.c b/fs/zonefs/file.c
+index 738b0e28d74b5..c71cc0fcb3ec8 100644
+--- a/fs/zonefs/file.c
++++ b/fs/zonefs/file.c
+@@ -426,7 +426,7 @@ static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from)
+ 		if (bio->bi_iter.bi_sector != wpsector) {
+ 			zonefs_warn(inode->i_sb,
+ 				"Corrupted write pointer %llu for zone at %llu\n",
+-				wpsector, z->z_sector);
++				bio->bi_iter.bi_sector, z->z_sector);
+ 			ret = -EIO;
+ 		}
+ 	}
+-- 
+2.39.2
+
diff --git a/queue-6.1/zonefs-reduce-struct-zonefs_inode_info-size.patch b/queue-6.1/zonefs-reduce-struct-zonefs_inode_info-size.patch
new file mode 100644
index 00000000000..24190903977
--- /dev/null
+++ b/queue-6.1/zonefs-reduce-struct-zonefs_inode_info-size.patch
@@ -0,0 +1,283 @@
+From d8736462ac18e027b5e6882dcb149196d9a265cf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 24 Nov 2022 19:43:30 +0900
+Subject: zonefs: Reduce struct zonefs_inode_info size
+
+From: Damien Le Moal <damien.lemoal@opensource.wdc.com>
+
+[ Upstream commit 34422914dc00b291d1c47dbdabe93b154c2f2b25 ]
+
+Instead of using the i_ztype field in struct zonefs_inode_info to
+indicate the zone type of an inode, introduce the new inode flag
+ZONEFS_ZONE_CNV to be set in the i_flags field of struct
+zonefs_inode_info to identify conventional zones. If this flag is not
+set, the zone of an inode is considered to be a sequential zone.
+
+The helpers zonefs_zone_is_cnv(), zonefs_zone_is_seq(),
+zonefs_inode_is_cnv() and zonefs_inode_is_seq() are introduced to
+simplify testing the zone type of a struct zonefs_inode_info and of a
+struct inode.
+
+Signed-off-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Stable-dep-of: 88b170088ad2 ("zonefs: Fix error message in zonefs_file_dio_append()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/zonefs/file.c   | 35 ++++++++++++++---------------------
+ fs/zonefs/super.c  | 12 +++++++-----
+ fs/zonefs/zonefs.h | 24 +++++++++++++++++++++---
+ 3 files changed, 42 insertions(+), 29 deletions(-)
+
+diff --git a/fs/zonefs/file.c b/fs/zonefs/file.c
+index ece0f3959b6d1..64873d31d75dd 100644
+--- a/fs/zonefs/file.c
++++ b/fs/zonefs/file.c
+@@ -77,8 +77,7 @@ static int zonefs_write_iomap_begin(struct inode *inode, loff_t offset,
+ 	 * checked when writes are issued, so warn if we see a page writeback
+ 	 * operation.
+ 	 */
+-	if (WARN_ON_ONCE(zi->i_ztype == ZONEFS_ZTYPE_SEQ &&
+-			 !(flags & IOMAP_DIRECT)))
++	if (WARN_ON_ONCE(zonefs_zone_is_seq(zi) && !(flags & IOMAP_DIRECT)))
+ 		return -EIO;
+ 
+ 	/*
+@@ -128,7 +127,7 @@ static int zonefs_write_map_blocks(struct iomap_writepage_ctx *wpc,
+ {
+ 	struct zonefs_inode_info *zi = ZONEFS_I(inode);
+ 
+-	if (WARN_ON_ONCE(zi->i_ztype != ZONEFS_ZTYPE_CNV))
++	if (WARN_ON_ONCE(zonefs_zone_is_seq(zi)))
+ 		return -EIO;
+ 	if (WARN_ON_ONCE(offset >= i_size_read(inode)))
+ 		return -EIO;
+@@ -158,9 +157,8 @@ static int zonefs_swap_activate(struct swap_info_struct *sis,
+ 				struct file *swap_file, sector_t *span)
+ {
+ 	struct inode *inode = file_inode(swap_file);
+-	struct zonefs_inode_info *zi = ZONEFS_I(inode);
+ 
+-	if (zi->i_ztype != ZONEFS_ZTYPE_CNV) {
++	if (zonefs_inode_is_seq(inode)) {
+ 		zonefs_err(inode->i_sb,
+ 			   "swap file: not a conventional zone file\n");
+ 		return -EINVAL;
+@@ -196,7 +194,7 @@ int zonefs_file_truncate(struct inode *inode, loff_t isize)
+ 	 * only down to a 0 size, which is equivalent to a zone reset, and to
+ 	 * the maximum file size, which is equivalent to a zone finish.
+ 	 */
+-	if (zi->i_ztype != ZONEFS_ZTYPE_SEQ)
++	if (!zonefs_zone_is_seq(zi))
+ 		return -EPERM;
+ 
+ 	if (!isize)
+@@ -266,7 +264,7 @@ static int zonefs_file_fsync(struct file *file, loff_t start, loff_t end,
+ 	 * Since only direct writes are allowed in sequential files, page cache
+ 	 * flush is needed only for conventional zone files.
+ 	 */
+-	if (ZONEFS_I(inode)->i_ztype == ZONEFS_ZTYPE_CNV)
++	if (zonefs_inode_is_cnv(inode))
+ 		ret = file_write_and_wait_range(file, start, end);
+ 	if (!ret)
+ 		ret = blkdev_issue_flush(inode->i_sb->s_bdev);
+@@ -280,7 +278,6 @@ static int zonefs_file_fsync(struct file *file, loff_t start, loff_t end,
+ static vm_fault_t zonefs_filemap_page_mkwrite(struct vm_fault *vmf)
+ {
+ 	struct inode *inode = file_inode(vmf->vma->vm_file);
+-	struct zonefs_inode_info *zi = ZONEFS_I(inode);
+ 	vm_fault_t ret;
+ 
+ 	if (unlikely(IS_IMMUTABLE(inode)))
+@@ -290,7 +287,7 @@ static vm_fault_t zonefs_filemap_page_mkwrite(struct vm_fault *vmf)
+ 	 * Sanity check: only conventional zone files can have shared
+ 	 * writeable mappings.
+ 	 */
+-	if (WARN_ON_ONCE(zi->i_ztype != ZONEFS_ZTYPE_CNV))
++	if (zonefs_inode_is_seq(inode))
+ 		return VM_FAULT_NOPAGE;
+ 
+ 	sb_start_pagefault(inode->i_sb);
+@@ -319,7 +316,7 @@ static int zonefs_file_mmap(struct file *file, struct vm_area_struct *vma)
+ 	 * mappings are possible since there are no guarantees for write
+ 	 * ordering between msync() and page cache writeback.
+ 	 */
+-	if (ZONEFS_I(file_inode(file))->i_ztype == ZONEFS_ZTYPE_SEQ &&
++	if (zonefs_inode_is_seq(file_inode(file)) &&
+ 	    (vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE))
+ 		return -EINVAL;
+ 
+@@ -352,7 +349,7 @@ static int zonefs_file_write_dio_end_io(struct kiocb *iocb, ssize_t size,
+ 		return error;
+ 	}
+ 
+-	if (size && zi->i_ztype != ZONEFS_ZTYPE_CNV) {
++	if (size && zonefs_zone_is_seq(zi)) {
+ 		/*
+ 		 * Note that we may be seeing completions out of order,
+ 		 * but that is not a problem since a write completed
+@@ -491,7 +488,7 @@ static ssize_t zonefs_write_checks(struct kiocb *iocb, struct iov_iter *from)
+ 		return -EINVAL;
+ 
+ 	if (iocb->ki_flags & IOCB_APPEND) {
+-		if (zi->i_ztype != ZONEFS_ZTYPE_SEQ)
++		if (zonefs_zone_is_cnv(zi))
+ 			return -EINVAL;
+ 		mutex_lock(&zi->i_truncate_mutex);
+ 		iocb->ki_pos = zi->i_wpoffset;
+@@ -531,8 +528,7 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
+ 	 * as this can cause write reordering (e.g. the first aio gets EAGAIN
+ 	 * on the inode lock but the second goes through but is now unaligned).
+ 	 */
+-	if (zi->i_ztype == ZONEFS_ZTYPE_SEQ && !sync &&
+-	    (iocb->ki_flags & IOCB_NOWAIT))
++	if (zonefs_zone_is_seq(zi) && !sync && (iocb->ki_flags & IOCB_NOWAIT))
+ 		return -EOPNOTSUPP;
+ 
+ 	if (iocb->ki_flags & IOCB_NOWAIT) {
+@@ -554,7 +550,7 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
+ 	}
+ 
+ 	/* Enforce sequential writes (append only) in sequential zones */
+-	if (zi->i_ztype == ZONEFS_ZTYPE_SEQ) {
++	if (zonefs_zone_is_seq(zi)) {
+ 		mutex_lock(&zi->i_truncate_mutex);
+ 		if (iocb->ki_pos != zi->i_wpoffset) {
+ 			mutex_unlock(&zi->i_truncate_mutex);
+@@ -570,7 +566,7 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
+ 	else
+ 		ret = iomap_dio_rw(iocb, from, &zonefs_write_iomap_ops,
+ 				   &zonefs_write_dio_ops, 0, NULL, 0);
+-	if (zi->i_ztype == ZONEFS_ZTYPE_SEQ &&
++	if (zonefs_zone_is_seq(zi) &&
+ 	    (ret > 0 || ret == -EIOCBQUEUED)) {
+ 		if (ret > 0)
+ 			count = ret;
+@@ -596,14 +592,13 @@ static ssize_t zonefs_file_buffered_write(struct kiocb *iocb,
+ 					  struct iov_iter *from)
+ {
+ 	struct inode *inode = file_inode(iocb->ki_filp);
+-	struct zonefs_inode_info *zi = ZONEFS_I(inode);
+ 	ssize_t ret;
+ 
+ 	/*
+ 	 * Direct IO writes are mandatory for sequential zone files so that the
+ 	 * write IO issuing order is preserved.
+ 	 */
+-	if (zi->i_ztype != ZONEFS_ZTYPE_CNV)
++	if (zonefs_inode_is_seq(inode))
+ 		return -EIO;
+ 
+ 	if (iocb->ki_flags & IOCB_NOWAIT) {
+@@ -731,9 +726,7 @@ static ssize_t zonefs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
+ static inline bool zonefs_seq_file_need_wro(struct inode *inode,
+ 					    struct file *file)
+ {
+-	struct zonefs_inode_info *zi = ZONEFS_I(inode);
+-
+-	if (zi->i_ztype != ZONEFS_ZTYPE_SEQ)
++	if (zonefs_inode_is_cnv(inode))
+ 		return false;
+ 
+ 	if (!(file->f_mode & FMODE_WRITE))
+diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
+index 6307cc95be061..a4af29dc32e7d 100644
+--- a/fs/zonefs/super.c
++++ b/fs/zonefs/super.c
+@@ -37,7 +37,7 @@ void zonefs_account_active(struct inode *inode)
+ 
+ 	lockdep_assert_held(&zi->i_truncate_mutex);
+ 
+-	if (zi->i_ztype != ZONEFS_ZTYPE_SEQ)
++	if (zonefs_zone_is_cnv(zi))
+ 		return;
+ 
+ 	/*
+@@ -177,14 +177,14 @@ static loff_t zonefs_check_zone_condition(struct inode *inode,
+ 		zonefs_warn(inode->i_sb, "inode %lu: read-only zone\n",
+ 			    inode->i_ino);
+ 		zi->i_flags |= ZONEFS_ZONE_READONLY;
+-		if (zi->i_ztype == ZONEFS_ZTYPE_CNV)
++		if (zonefs_zone_is_cnv(zi))
+ 			return zi->i_max_size;
+ 		return zi->i_wpoffset;
+ 	case BLK_ZONE_COND_FULL:
+ 		/* The write pointer of full zones is invalid. */
+ 		return zi->i_max_size;
+ 	default:
+-		if (zi->i_ztype == ZONEFS_ZTYPE_CNV)
++		if (zonefs_zone_is_cnv(zi))
+ 			return zi->i_max_size;
+ 		return (zone->wp - zone->start) << SECTOR_SHIFT;
+ 	}
+@@ -260,7 +260,7 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
+ 	 * In all cases, warn about inode size inconsistency and handle the
+ 	 * IO error according to the zone condition and to the mount options.
+ 	 */
+-	if (zi->i_ztype == ZONEFS_ZTYPE_SEQ && isize != data_size)
++	if (zonefs_zone_is_seq(zi) && isize != data_size)
+ 		zonefs_warn(sb, "inode %lu: invalid size %lld (should be %lld)\n",
+ 			    inode->i_ino, isize, data_size);
+ 
+@@ -584,7 +584,9 @@ static int zonefs_init_file_inode(struct inode *inode, struct blk_zone *zone,
+ 	inode->i_ino = zone->start >> sbi->s_zone_sectors_shift;
+ 	inode->i_mode = S_IFREG | sbi->s_perm;
+ 
+-	zi->i_ztype = type;
++	if (type == ZONEFS_ZTYPE_CNV)
++		zi->i_flags |= ZONEFS_ZONE_CNV;
++
+ 	zi->i_zsector = zone->start;
+ 	zi->i_zone_size = zone->len << SECTOR_SHIFT;
+ 	if (zi->i_zone_size > bdev_zone_sectors(sb->s_bdev) << SECTOR_SHIFT &&
+diff --git a/fs/zonefs/zonefs.h b/fs/zonefs/zonefs.h
+index 439096445ee53..1a225f74015a0 100644
+--- a/fs/zonefs/zonefs.h
++++ b/fs/zonefs/zonefs.h
+@@ -44,6 +44,7 @@ static inline enum zonefs_ztype zonefs_zone_type(struct blk_zone *zone)
+ #define ZONEFS_ZONE_ACTIVE	(1U << 2)
+ #define ZONEFS_ZONE_OFFLINE	(1U << 3)
+ #define ZONEFS_ZONE_READONLY	(1U << 4)
++#define ZONEFS_ZONE_CNV		(1U << 31)
+ 
+ /*
+  * In-memory inode data.
+@@ -51,9 +52,6 @@ static inline enum zonefs_ztype zonefs_zone_type(struct blk_zone *zone)
+ struct zonefs_inode_info {
+ 	struct inode		i_vnode;
+ 
+-	/* File zone type */
+-	enum zonefs_ztype	i_ztype;
+-
+ 	/* File zone start sector (512B unit) */
+ 	sector_t		i_zsector;
+ 
+@@ -91,6 +89,26 @@ static inline struct zonefs_inode_info *ZONEFS_I(struct inode *inode)
+ 	return container_of(inode, struct zonefs_inode_info, i_vnode);
+ }
+ 
++static inline bool zonefs_zone_is_cnv(struct zonefs_inode_info *zi)
++{
++	return zi->i_flags & ZONEFS_ZONE_CNV;
++}
++
++static inline bool zonefs_zone_is_seq(struct zonefs_inode_info *zi)
++{
++	return !zonefs_zone_is_cnv(zi);
++}
++
++static inline bool zonefs_inode_is_cnv(struct inode *inode)
++{
++	return zonefs_zone_is_cnv(ZONEFS_I(inode));
++}
++
++static inline bool zonefs_inode_is_seq(struct inode *inode)
++{
++	return zonefs_zone_is_seq(ZONEFS_I(inode));
++}
++
+ /*
+  * On-disk super block (block 0).
+  */
+-- 
+2.39.2
+
diff --git a/queue-6.1/zonefs-reorganize-code.patch b/queue-6.1/zonefs-reorganize-code.patch
new file mode 100644
index 00000000000..9b2687bd8f6
--- /dev/null
+++ b/queue-6.1/zonefs-reorganize-code.patch
@@ -0,0 +1,1990 @@
+From 7984f3ba9ba89242cbae90529b4c1680787a207c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 25 Nov 2022 09:39:33 +0900
+Subject: zonefs: Reorganize code
+
+From: Damien Le Moal <damien.lemoal@opensource.wdc.com>
+
+[ Upstream commit 4008e2a0b01aba982356fd15b128a47bf11bd9c7 ]
+
+Move all code related to zone file operations from super.c to the new
+file.c file. Inode and zone management code remains in super.c.
+
+Signed-off-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Stable-dep-of: 88b170088ad2 ("zonefs: Fix error message in zonefs_file_dio_append()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/zonefs/Makefile |   2 +-
+ fs/zonefs/file.c   | 874 ++++++++++++++++++++++++++++++++++++++++
+ fs/zonefs/super.c  | 973 +++------------------------------------------
+ fs/zonefs/zonefs.h |  22 +
+ 4 files changed, 955 insertions(+), 916 deletions(-)
+ create mode 100644 fs/zonefs/file.c
+
+diff --git a/fs/zonefs/Makefile b/fs/zonefs/Makefile
+index 9fe54f5319f22..645f7229de4a0 100644
+--- a/fs/zonefs/Makefile
++++ b/fs/zonefs/Makefile
+@@ -3,4 +3,4 @@ ccflags-y				+= -I$(src)
+ 
+ obj-$(CONFIG_ZONEFS_FS) += zonefs.o
+ 
+-zonefs-y	:= super.o sysfs.o
++zonefs-y	:= super.o file.o sysfs.o
+diff --git a/fs/zonefs/file.c b/fs/zonefs/file.c
+new file mode 100644
+index 0000000000000..ece0f3959b6d1
+--- /dev/null
++++ b/fs/zonefs/file.c
+@@ -0,0 +1,874 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Simple file system for zoned block devices exposing zones as files.
++ *
++ * Copyright (C) 2022 Western Digital Corporation or its affiliates.
++ */
++#include <linux/module.h>
++#include <linux/pagemap.h>
++#include <linux/iomap.h>
++#include <linux/init.h>
++#include <linux/slab.h>
++#include <linux/blkdev.h>
++#include <linux/statfs.h>
++#include <linux/writeback.h>
++#include <linux/quotaops.h>
++#include <linux/seq_file.h>
++#include <linux/parser.h>
++#include <linux/uio.h>
++#include <linux/mman.h>
++#include <linux/sched/mm.h>
++#include <linux/task_io_accounting_ops.h>
++
++#include "zonefs.h"
++
++#include "trace.h"
++
++static int zonefs_read_iomap_begin(struct inode *inode, loff_t offset,
++				   loff_t length, unsigned int flags,
++				   struct iomap *iomap, struct iomap *srcmap)
++{
++	struct zonefs_inode_info *zi = ZONEFS_I(inode);
++	struct super_block *sb = inode->i_sb;
++	loff_t isize;
++
++	/*
++	 * All blocks are always mapped below EOF. If reading past EOF,
++	 * act as if there is a hole up to the file maximum size.
++	 */
++	mutex_lock(&zi->i_truncate_mutex);
++	iomap->bdev = inode->i_sb->s_bdev;
++	iomap->offset = ALIGN_DOWN(offset, sb->s_blocksize);
++	isize = i_size_read(inode);
++	if (iomap->offset >= isize) {
++		iomap->type = IOMAP_HOLE;
++		iomap->addr = IOMAP_NULL_ADDR;
++		iomap->length = length;
++	} else {
++		iomap->type = IOMAP_MAPPED;
++		iomap->addr = (zi->i_zsector << SECTOR_SHIFT) + iomap->offset;
++		iomap->length = isize - iomap->offset;
++	}
++	mutex_unlock(&zi->i_truncate_mutex);
++
++	trace_zonefs_iomap_begin(inode, iomap);
++
++	return 0;
++}
++
++static const struct iomap_ops zonefs_read_iomap_ops = {
++	.iomap_begin	= zonefs_read_iomap_begin,
++};
++
++static int zonefs_write_iomap_begin(struct inode *inode, loff_t offset,
++				    loff_t length, unsigned int flags,
++				    struct iomap *iomap, struct iomap *srcmap)
++{
++	struct zonefs_inode_info *zi = ZONEFS_I(inode);
++	struct super_block *sb = inode->i_sb;
++	loff_t isize;
++
++	/* All write I/Os should always be within the file maximum size */
++	if (WARN_ON_ONCE(offset + length > zi->i_max_size))
++		return -EIO;
++
++	/*
++	 * Sequential zones can only accept direct writes. This is already
++	 * checked when writes are issued, so warn if we see a page writeback
++	 * operation.
++	 */
++	if (WARN_ON_ONCE(zi->i_ztype == ZONEFS_ZTYPE_SEQ &&
++			 !(flags & IOMAP_DIRECT)))
++		return -EIO;
++
++	/*
++	 * For conventional zones, all blocks are always mapped. For sequential
++	 * zones, all blocks after always mapped below the inode size (zone
++	 * write pointer) and unwriten beyond.
++	 */
++	mutex_lock(&zi->i_truncate_mutex);
++	iomap->bdev = inode->i_sb->s_bdev;
++	iomap->offset = ALIGN_DOWN(offset, sb->s_blocksize);
++	iomap->addr = (zi->i_zsector << SECTOR_SHIFT) + iomap->offset;
++	isize = i_size_read(inode);
++	if (iomap->offset >= isize) {
++		iomap->type = IOMAP_UNWRITTEN;
++		iomap->length = zi->i_max_size - iomap->offset;
++	} else {
++		iomap->type = IOMAP_MAPPED;
++		iomap->length = isize - iomap->offset;
++	}
++	mutex_unlock(&zi->i_truncate_mutex);
++
++	trace_zonefs_iomap_begin(inode, iomap);
++
++	return 0;
++}
++
++static const struct iomap_ops zonefs_write_iomap_ops = {
++	.iomap_begin	= zonefs_write_iomap_begin,
++};
++
++static int zonefs_read_folio(struct file *unused, struct folio *folio)
++{
++	return iomap_read_folio(folio, &zonefs_read_iomap_ops);
++}
++
++static void zonefs_readahead(struct readahead_control *rac)
++{
++	iomap_readahead(rac, &zonefs_read_iomap_ops);
++}
++
++/*
++ * Map blocks for page writeback. This is used only on conventional zone files,
++ * which implies that the page range can only be within the fixed inode size.
++ */
++static int zonefs_write_map_blocks(struct iomap_writepage_ctx *wpc,
++				   struct inode *inode, loff_t offset)
++{
++	struct zonefs_inode_info *zi = ZONEFS_I(inode);
++
++	if (WARN_ON_ONCE(zi->i_ztype != ZONEFS_ZTYPE_CNV))
++		return -EIO;
++	if (WARN_ON_ONCE(offset >= i_size_read(inode)))
++		return -EIO;
++
++	/* If the mapping is already OK, nothing needs to be done */
++	if (offset >= wpc->iomap.offset &&
++	    offset < wpc->iomap.offset + wpc->iomap.length)
++		return 0;
++
++	return zonefs_write_iomap_begin(inode, offset, zi->i_max_size - offset,
++					IOMAP_WRITE, &wpc->iomap, NULL);
++}
++
++static const struct iomap_writeback_ops zonefs_writeback_ops = {
++	.map_blocks		= zonefs_write_map_blocks,
++};
++
++static int zonefs_writepages(struct address_space *mapping,
++			     struct writeback_control *wbc)
++{
++	struct iomap_writepage_ctx wpc = { };
++
++	return iomap_writepages(mapping, wbc, &wpc, &zonefs_writeback_ops);
++}
++
++static int zonefs_swap_activate(struct swap_info_struct *sis,
++				struct file *swap_file, sector_t *span)
++{
++	struct inode *inode = file_inode(swap_file);
++	struct zonefs_inode_info *zi = ZONEFS_I(inode);
++
++	if (zi->i_ztype != ZONEFS_ZTYPE_CNV) {
++		zonefs_err(inode->i_sb,
++			   "swap file: not a conventional zone file\n");
++		return -EINVAL;
++	}
++
++	return iomap_swapfile_activate(sis, swap_file, span,
++				       &zonefs_read_iomap_ops);
++}
++
++const struct address_space_operations zonefs_file_aops = {
++	.read_folio		= zonefs_read_folio,
++	.readahead		= zonefs_readahead,
++	.writepages		= zonefs_writepages,
++	.dirty_folio		= filemap_dirty_folio,
++	.release_folio		= iomap_release_folio,
++	.invalidate_folio	= iomap_invalidate_folio,
++	.migrate_folio		= filemap_migrate_folio,
++	.is_partially_uptodate	= iomap_is_partially_uptodate,
++	.error_remove_page	= generic_error_remove_page,
++	.direct_IO		= noop_direct_IO,
++	.swap_activate		= zonefs_swap_activate,
++};
++
++int zonefs_file_truncate(struct inode *inode, loff_t isize)
++{
++	struct zonefs_inode_info *zi = ZONEFS_I(inode);
++	loff_t old_isize;
++	enum req_op op;
++	int ret = 0;
++
++	/*
++	 * Only sequential zone files can be truncated and truncation is allowed
++	 * only down to a 0 size, which is equivalent to a zone reset, and to
++	 * the maximum file size, which is equivalent to a zone finish.
++	 */
++	if (zi->i_ztype != ZONEFS_ZTYPE_SEQ)
++		return -EPERM;
++
++	if (!isize)
++		op = REQ_OP_ZONE_RESET;
++	else if (isize == zi->i_max_size)
++		op = REQ_OP_ZONE_FINISH;
++	else
++		return -EPERM;
++
++	inode_dio_wait(inode);
++
++	/* Serialize against page faults */
++	filemap_invalidate_lock(inode->i_mapping);
++
++	/* Serialize against zonefs_iomap_begin() */
++	mutex_lock(&zi->i_truncate_mutex);
++
++	old_isize = i_size_read(inode);
++	if (isize == old_isize)
++		goto unlock;
++
++	ret = zonefs_zone_mgmt(inode, op);
++	if (ret)
++		goto unlock;
++
++	/*
++	 * If the mount option ZONEFS_MNTOPT_EXPLICIT_OPEN is set,
++	 * take care of open zones.
++	 */
++	if (zi->i_flags & ZONEFS_ZONE_OPEN) {
++		/*
++		 * Truncating a zone to EMPTY or FULL is the equivalent of
++		 * closing the zone. For a truncation to 0, we need to
++		 * re-open the zone to ensure new writes can be processed.
++		 * For a truncation to the maximum file size, the zone is
++		 * closed and writes cannot be accepted anymore, so clear
++		 * the open flag.
++		 */
++		if (!isize)
++			ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_OPEN);
++		else
++			zi->i_flags &= ~ZONEFS_ZONE_OPEN;
++	}
++
++	zonefs_update_stats(inode, isize);
++	truncate_setsize(inode, isize);
++	zi->i_wpoffset = isize;
++	zonefs_account_active(inode);
++
++unlock:
++	mutex_unlock(&zi->i_truncate_mutex);
++	filemap_invalidate_unlock(inode->i_mapping);
++
++	return ret;
++}
++
++static int zonefs_file_fsync(struct file *file, loff_t start, loff_t end,
++			     int datasync)
++{
++	struct inode *inode = file_inode(file);
++	int ret = 0;
++
++	if (unlikely(IS_IMMUTABLE(inode)))
++		return -EPERM;
++
++	/*
++	 * Since only direct writes are allowed in sequential files, page cache
++	 * flush is needed only for conventional zone files.
++	 */
++	if (ZONEFS_I(inode)->i_ztype == ZONEFS_ZTYPE_CNV)
++		ret = file_write_and_wait_range(file, start, end);
++	if (!ret)
++		ret = blkdev_issue_flush(inode->i_sb->s_bdev);
++
++	if (ret)
++		zonefs_io_error(inode, true);
++
++	return ret;
++}
++
++static vm_fault_t zonefs_filemap_page_mkwrite(struct vm_fault *vmf)
++{
++	struct inode *inode = file_inode(vmf->vma->vm_file);
++	struct zonefs_inode_info *zi = ZONEFS_I(inode);
++	vm_fault_t ret;
++
++	if (unlikely(IS_IMMUTABLE(inode)))
++		return VM_FAULT_SIGBUS;
++
++	/*
++	 * Sanity check: only conventional zone files can have shared
++	 * writeable mappings.
++	 */
++	if (WARN_ON_ONCE(zi->i_ztype != ZONEFS_ZTYPE_CNV))
++		return VM_FAULT_NOPAGE;
++
++	sb_start_pagefault(inode->i_sb);
++	file_update_time(vmf->vma->vm_file);
++
++	/* Serialize against truncates */
++	filemap_invalidate_lock_shared(inode->i_mapping);
++	ret = iomap_page_mkwrite(vmf, &zonefs_write_iomap_ops);
++	filemap_invalidate_unlock_shared(inode->i_mapping);
++
++	sb_end_pagefault(inode->i_sb);
++	return ret;
++}
++
++static const struct vm_operations_struct zonefs_file_vm_ops = {
++	.fault		= filemap_fault,
++	.map_pages	= filemap_map_pages,
++	.page_mkwrite	= zonefs_filemap_page_mkwrite,
++};
++
++static int zonefs_file_mmap(struct file *file, struct vm_area_struct *vma)
++{
++	/*
++	 * Conventional zones accept random writes, so their files can support
++	 * shared writable mappings. For sequential zone files, only read
++	 * mappings are possible since there are no guarantees for write
++	 * ordering between msync() and page cache writeback.
++	 */
++	if (ZONEFS_I(file_inode(file))->i_ztype == ZONEFS_ZTYPE_SEQ &&
++	    (vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE))
++		return -EINVAL;
++
++	file_accessed(file);
++	vma->vm_ops = &zonefs_file_vm_ops;
++
++	return 0;
++}
++
++static loff_t zonefs_file_llseek(struct file *file, loff_t offset, int whence)
++{
++	loff_t isize = i_size_read(file_inode(file));
++
++	/*
++	 * Seeks are limited to below the zone size for conventional zones
++	 * and below the zone write pointer for sequential zones. In both
++	 * cases, this limit is the inode size.
++	 */
++	return generic_file_llseek_size(file, offset, whence, isize, isize);
++}
++
++static int zonefs_file_write_dio_end_io(struct kiocb *iocb, ssize_t size,
++					int error, unsigned int flags)
++{
++	struct inode *inode = file_inode(iocb->ki_filp);
++	struct zonefs_inode_info *zi = ZONEFS_I(inode);
++
++	if (error) {
++		zonefs_io_error(inode, true);
++		return error;
++	}
++
++	if (size && zi->i_ztype != ZONEFS_ZTYPE_CNV) {
++		/*
++		 * Note that we may be seeing completions out of order,
++		 * but that is not a problem since a write completed
++		 * successfully necessarily means that all preceding writes
++		 * were also successful. So we can safely increase the inode
++		 * size to the write end location.
++		 */
++		mutex_lock(&zi->i_truncate_mutex);
++		if (i_size_read(inode) < iocb->ki_pos + size) {
++			zonefs_update_stats(inode, iocb->ki_pos + size);
++			zonefs_i_size_write(inode, iocb->ki_pos + size);
++		}
++		mutex_unlock(&zi->i_truncate_mutex);
++	}
++
++	return 0;
++}
++
++static const struct iomap_dio_ops zonefs_write_dio_ops = {
++	.end_io			= zonefs_file_write_dio_end_io,
++};
++
++static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from)
++{
++	struct inode *inode = file_inode(iocb->ki_filp);
++	struct zonefs_inode_info *zi = ZONEFS_I(inode);
++	struct block_device *bdev = inode->i_sb->s_bdev;
++	unsigned int max = bdev_max_zone_append_sectors(bdev);
++	struct bio *bio;
++	ssize_t size;
++	int nr_pages;
++	ssize_t ret;
++
++	max = ALIGN_DOWN(max << SECTOR_SHIFT, inode->i_sb->s_blocksize);
++	iov_iter_truncate(from, max);
++
++	nr_pages = iov_iter_npages(from, BIO_MAX_VECS);
++	if (!nr_pages)
++		return 0;
++
++	bio = bio_alloc(bdev, nr_pages,
++			REQ_OP_ZONE_APPEND | REQ_SYNC | REQ_IDLE, GFP_NOFS);
++	bio->bi_iter.bi_sector = zi->i_zsector;
++	bio->bi_ioprio = iocb->ki_ioprio;
++	if (iocb_is_dsync(iocb))
++		bio->bi_opf |= REQ_FUA;
++
++	ret = bio_iov_iter_get_pages(bio, from);
++	if (unlikely(ret))
++		goto out_release;
++
++	size = bio->bi_iter.bi_size;
++	task_io_account_write(size);
++
++	if (iocb->ki_flags & IOCB_HIPRI)
++		bio_set_polled(bio, iocb);
++
++	ret = submit_bio_wait(bio);
++
++	/*
++	 * If the file zone was written underneath the file system, the zone
++	 * write pointer may not be where we expect it to be, but the zone
++	 * append write can still succeed. So check manually that we wrote where
++	 * we intended to, that is, at zi->i_wpoffset.
++	 */
++	if (!ret) {
++		sector_t wpsector =
++			zi->i_zsector + (zi->i_wpoffset >> SECTOR_SHIFT);
++
++		if (bio->bi_iter.bi_sector != wpsector) {
++			zonefs_warn(inode->i_sb,
++				"Corrupted write pointer %llu for zone at %llu\n",
++				wpsector, zi->i_zsector);
++			ret = -EIO;
++		}
++	}
++
++	zonefs_file_write_dio_end_io(iocb, size, ret, 0);
++	trace_zonefs_file_dio_append(inode, size, ret);
++
++out_release:
++	bio_release_pages(bio, false);
++	bio_put(bio);
++
++	if (ret >= 0) {
++		iocb->ki_pos += size;
++		return size;
++	}
++
++	return ret;
++}
++
++/*
++ * Do not exceed the LFS limits nor the file zone size. If pos is under the
++ * limit it becomes a short access. If it exceeds the limit, return -EFBIG.
++ */
++static loff_t zonefs_write_check_limits(struct file *file, loff_t pos,
++					loff_t count)
++{
++	struct inode *inode = file_inode(file);
++	struct zonefs_inode_info *zi = ZONEFS_I(inode);
++	loff_t limit = rlimit(RLIMIT_FSIZE);
++	loff_t max_size = zi->i_max_size;
++
++	if (limit != RLIM_INFINITY) {
++		if (pos >= limit) {
++			send_sig(SIGXFSZ, current, 0);
++			return -EFBIG;
++		}
++		count = min(count, limit - pos);
++	}
++
++	if (!(file->f_flags & O_LARGEFILE))
++		max_size = min_t(loff_t, MAX_NON_LFS, max_size);
++
++	if (unlikely(pos >= max_size))
++		return -EFBIG;
++
++	return min(count, max_size - pos);
++}
++
++static ssize_t zonefs_write_checks(struct kiocb *iocb, struct iov_iter *from)
++{
++	struct file *file = iocb->ki_filp;
++	struct inode *inode = file_inode(file);
++	struct zonefs_inode_info *zi = ZONEFS_I(inode);
++	loff_t count;
++
++	if (IS_SWAPFILE(inode))
++		return -ETXTBSY;
++
++	if (!iov_iter_count(from))
++		return 0;
++
++	if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT))
++		return -EINVAL;
++
++	if (iocb->ki_flags & IOCB_APPEND) {
++		if (zi->i_ztype != ZONEFS_ZTYPE_SEQ)
++			return -EINVAL;
++		mutex_lock(&zi->i_truncate_mutex);
++		iocb->ki_pos = zi->i_wpoffset;
++		mutex_unlock(&zi->i_truncate_mutex);
++	}
++
++	count = zonefs_write_check_limits(file, iocb->ki_pos,
++					  iov_iter_count(from));
++	if (count < 0)
++		return count;
++
++	iov_iter_truncate(from, count);
++	return iov_iter_count(from);
++}
++
++/*
++ * Handle direct writes. For sequential zone files, this is the only possible
++ * write path. For these files, check that the user is issuing writes
++ * sequentially from the end of the file. This code assumes that the block layer
++ * delivers write requests to the device in sequential order. This is always the
++ * case if a block IO scheduler implementing the ELEVATOR_F_ZBD_SEQ_WRITE
++ * elevator feature is being used (e.g. mq-deadline). The block layer always
++ * automatically select such an elevator for zoned block devices during the
++ * device initialization.
++ */
++static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
++{
++	struct inode *inode = file_inode(iocb->ki_filp);
++	struct zonefs_inode_info *zi = ZONEFS_I(inode);
++	struct super_block *sb = inode->i_sb;
++	bool sync = is_sync_kiocb(iocb);
++	bool append = false;
++	ssize_t ret, count;
++
++	/*
++	 * For async direct IOs to sequential zone files, refuse IOCB_NOWAIT
++	 * as this can cause write reordering (e.g. the first aio gets EAGAIN
++	 * on the inode lock but the second goes through but is now unaligned).
++	 */
++	if (zi->i_ztype == ZONEFS_ZTYPE_SEQ && !sync &&
++	    (iocb->ki_flags & IOCB_NOWAIT))
++		return -EOPNOTSUPP;
++
++	if (iocb->ki_flags & IOCB_NOWAIT) {
++		if (!inode_trylock(inode))
++			return -EAGAIN;
++	} else {
++		inode_lock(inode);
++	}
++
++	count = zonefs_write_checks(iocb, from);
++	if (count <= 0) {
++		ret = count;
++		goto inode_unlock;
++	}
++
++	if ((iocb->ki_pos | count) & (sb->s_blocksize - 1)) {
++		ret = -EINVAL;
++		goto inode_unlock;
++	}
++
++	/* Enforce sequential writes (append only) in sequential zones */
++	if (zi->i_ztype == ZONEFS_ZTYPE_SEQ) {
++		mutex_lock(&zi->i_truncate_mutex);
++		if (iocb->ki_pos != zi->i_wpoffset) {
++			mutex_unlock(&zi->i_truncate_mutex);
++			ret = -EINVAL;
++			goto inode_unlock;
++		}
++		mutex_unlock(&zi->i_truncate_mutex);
++		append = sync;
++	}
++
++	if (append)
++		ret = zonefs_file_dio_append(iocb, from);
++	else
++		ret = iomap_dio_rw(iocb, from, &zonefs_write_iomap_ops,
++				   &zonefs_write_dio_ops, 0, NULL, 0);
++	if (zi->i_ztype == ZONEFS_ZTYPE_SEQ &&
++	    (ret > 0 || ret == -EIOCBQUEUED)) {
++		if (ret > 0)
++			count = ret;
++
++		/*
++		 * Update the zone write pointer offset assuming the write
++		 * operation succeeded. If it did not, the error recovery path
++		 * will correct it. Also do active seq file accounting.
++		 */
++		mutex_lock(&zi->i_truncate_mutex);
++		zi->i_wpoffset += count;
++		zonefs_account_active(inode);
++		mutex_unlock(&zi->i_truncate_mutex);
++	}
++
++inode_unlock:
++	inode_unlock(inode);
++
++	return ret;
++}
++
++static ssize_t zonefs_file_buffered_write(struct kiocb *iocb,
++					  struct iov_iter *from)
++{
++	struct inode *inode = file_inode(iocb->ki_filp);
++	struct zonefs_inode_info *zi = ZONEFS_I(inode);
++	ssize_t ret;
++
++	/*
++	 * Direct IO writes are mandatory for sequential zone files so that the
++	 * write IO issuing order is preserved.
++	 */
++	if (zi->i_ztype != ZONEFS_ZTYPE_CNV)
++		return -EIO;
++
++	if (iocb->ki_flags & IOCB_NOWAIT) {
++		if (!inode_trylock(inode))
++			return -EAGAIN;
++	} else {
++		inode_lock(inode);
++	}
++
++	ret = zonefs_write_checks(iocb, from);
++	if (ret <= 0)
++		goto inode_unlock;
++
++	ret = iomap_file_buffered_write(iocb, from, &zonefs_write_iomap_ops);
++	if (ret > 0)
++		iocb->ki_pos += ret;
++	else if (ret == -EIO)
++		zonefs_io_error(inode, true);
++
++inode_unlock:
++	inode_unlock(inode);
++	if (ret > 0)
++		ret = generic_write_sync(iocb, ret);
++
++	return ret;
++}
++
++static ssize_t zonefs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
++{
++	struct inode *inode = file_inode(iocb->ki_filp);
++
++	if (unlikely(IS_IMMUTABLE(inode)))
++		return -EPERM;
++
++	if (sb_rdonly(inode->i_sb))
++		return -EROFS;
++
++	/* Write operations beyond the zone size are not allowed */
++	if (iocb->ki_pos >= ZONEFS_I(inode)->i_max_size)
++		return -EFBIG;
++
++	if (iocb->ki_flags & IOCB_DIRECT) {
++		ssize_t ret = zonefs_file_dio_write(iocb, from);
++
++		if (ret != -ENOTBLK)
++			return ret;
++	}
++
++	return zonefs_file_buffered_write(iocb, from);
++}
++
++static int zonefs_file_read_dio_end_io(struct kiocb *iocb, ssize_t size,
++				       int error, unsigned int flags)
++{
++	if (error) {
++		zonefs_io_error(file_inode(iocb->ki_filp), false);
++		return error;
++	}
++
++	return 0;
++}
++
++static const struct iomap_dio_ops zonefs_read_dio_ops = {
++	.end_io			= zonefs_file_read_dio_end_io,
++};
++
++static ssize_t zonefs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
++{
++	struct inode *inode = file_inode(iocb->ki_filp);
++	struct zonefs_inode_info *zi = ZONEFS_I(inode);
++	struct super_block *sb = inode->i_sb;
++	loff_t isize;
++	ssize_t ret;
++
++	/* Offline zones cannot be read */
++	if (unlikely(IS_IMMUTABLE(inode) && !(inode->i_mode & 0777)))
++		return -EPERM;
++
++	if (iocb->ki_pos >= zi->i_max_size)
++		return 0;
++
++	if (iocb->ki_flags & IOCB_NOWAIT) {
++		if (!inode_trylock_shared(inode))
++			return -EAGAIN;
++	} else {
++		inode_lock_shared(inode);
++	}
++
++	/* Limit read operations to written data */
++	mutex_lock(&zi->i_truncate_mutex);
++	isize = i_size_read(inode);
++	if (iocb->ki_pos >= isize) {
++		mutex_unlock(&zi->i_truncate_mutex);
++		ret = 0;
++		goto inode_unlock;
++	}
++	iov_iter_truncate(to, isize - iocb->ki_pos);
++	mutex_unlock(&zi->i_truncate_mutex);
++
++	if (iocb->ki_flags & IOCB_DIRECT) {
++		size_t count = iov_iter_count(to);
++
++		if ((iocb->ki_pos | count) & (sb->s_blocksize - 1)) {
++			ret = -EINVAL;
++			goto inode_unlock;
++		}
++		file_accessed(iocb->ki_filp);
++		ret = iomap_dio_rw(iocb, to, &zonefs_read_iomap_ops,
++				   &zonefs_read_dio_ops, 0, NULL, 0);
++	} else {
++		ret = generic_file_read_iter(iocb, to);
++		if (ret == -EIO)
++			zonefs_io_error(inode, false);
++	}
++
++inode_unlock:
++	inode_unlock_shared(inode);
++
++	return ret;
++}
++
++/*
++ * Write open accounting is done only for sequential files.
++ */
++static inline bool zonefs_seq_file_need_wro(struct inode *inode,
++					    struct file *file)
++{
++	struct zonefs_inode_info *zi = ZONEFS_I(inode);
++
++	if (zi->i_ztype != ZONEFS_ZTYPE_SEQ)
++		return false;
++
++	if (!(file->f_mode & FMODE_WRITE))
++		return false;
++
++	return true;
++}
++
++static int zonefs_seq_file_write_open(struct inode *inode)
++{
++	struct zonefs_inode_info *zi = ZONEFS_I(inode);
++	int ret = 0;
++
++	mutex_lock(&zi->i_truncate_mutex);
++
++	if (!zi->i_wr_refcnt) {
++		struct zonefs_sb_info *sbi = ZONEFS_SB(inode->i_sb);
++		unsigned int wro = atomic_inc_return(&sbi->s_wro_seq_files);
++
++		if (sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN) {
++
++			if (sbi->s_max_wro_seq_files
++			    && wro > sbi->s_max_wro_seq_files) {
++				atomic_dec(&sbi->s_wro_seq_files);
++				ret = -EBUSY;
++				goto unlock;
++			}
++
++			if (i_size_read(inode) < zi->i_max_size) {
++				ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_OPEN);
++				if (ret) {
++					atomic_dec(&sbi->s_wro_seq_files);
++					goto unlock;
++				}
++				zi->i_flags |= ZONEFS_ZONE_OPEN;
++				zonefs_account_active(inode);
++			}
++		}
++	}
++
++	zi->i_wr_refcnt++;
++
++unlock:
++	mutex_unlock(&zi->i_truncate_mutex);
++
++	return ret;
++}
++
++static int zonefs_file_open(struct inode *inode, struct file *file)
++{
++	int ret;
++
++	ret = generic_file_open(inode, file);
++	if (ret)
++		return ret;
++
++	if (zonefs_seq_file_need_wro(inode, file))
++		return zonefs_seq_file_write_open(inode);
++
++	return 0;
++}
++
++static void zonefs_seq_file_write_close(struct inode *inode)
++{
++	struct zonefs_inode_info *zi = ZONEFS_I(inode);
++	struct super_block *sb = inode->i_sb;
++	struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
++	int ret = 0;
++
++	mutex_lock(&zi->i_truncate_mutex);
++
++	zi->i_wr_refcnt--;
++	if (zi->i_wr_refcnt)
++		goto unlock;
++
++	/*
++	 * The file zone may not be open anymore (e.g. the file was truncated to
++	 * its maximum size or it was fully written). For this case, we only
++	 * need to decrement the write open count.
++	 */
++	if (zi->i_flags & ZONEFS_ZONE_OPEN) {
++		ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_CLOSE);
++		if (ret) {
++			__zonefs_io_error(inode, false);
++			/*
++			 * Leaving zones explicitly open may lead to a state
++			 * where most zones cannot be written (zone resources
++			 * exhausted). So take preventive action by remounting
++			 * read-only.
++			 */
++			if (zi->i_flags & ZONEFS_ZONE_OPEN &&
++			    !(sb->s_flags & SB_RDONLY)) {
++				zonefs_warn(sb,
++					"closing zone at %llu failed %d\n",
++					zi->i_zsector, ret);
++				zonefs_warn(sb,
++					"remounting filesystem read-only\n");
++				sb->s_flags |= SB_RDONLY;
++			}
++			goto unlock;
++		}
++
++		zi->i_flags &= ~ZONEFS_ZONE_OPEN;
++		zonefs_account_active(inode);
++	}
++
++	atomic_dec(&sbi->s_wro_seq_files);
++
++unlock:
++	mutex_unlock(&zi->i_truncate_mutex);
++}
++
++static int zonefs_file_release(struct inode *inode, struct file *file)
++{
++	/*
++	 * If we explicitly open a zone we must close it again as well, but the
++	 * zone management operation can fail (either due to an IO error or as
++	 * the zone has gone offline or read-only). Make sure we don't fail the
++	 * close(2) for user-space.
++	 */
++	if (zonefs_seq_file_need_wro(inode, file))
++		zonefs_seq_file_write_close(inode);
++
++	return 0;
++}
++
++const struct file_operations zonefs_file_operations = {
++	.open		= zonefs_file_open,
++	.release	= zonefs_file_release,
++	.fsync		= zonefs_file_fsync,
++	.mmap		= zonefs_file_mmap,
++	.llseek		= zonefs_file_llseek,
++	.read_iter	= zonefs_file_read_iter,
++	.write_iter	= zonefs_file_write_iter,
++	.splice_read	= generic_file_splice_read,
++	.splice_write	= iter_file_splice_write,
++	.iopoll		= iocb_bio_iopoll,
++};
+diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
+index a9c5c3f720adf..e808276b88018 100644
+--- a/fs/zonefs/super.c
++++ b/fs/zonefs/super.c
+@@ -30,7 +30,7 @@
+ /*
+  * Manage the active zone count. Called with zi->i_truncate_mutex held.
+  */
+-static void zonefs_account_active(struct inode *inode)
++void zonefs_account_active(struct inode *inode)
+ {
+ 	struct zonefs_sb_info *sbi = ZONEFS_SB(inode->i_sb);
+ 	struct zonefs_inode_info *zi = ZONEFS_I(inode);
+@@ -68,7 +68,7 @@ static void zonefs_account_active(struct inode *inode)
+ 	}
+ }
+ 
+-static inline int zonefs_zone_mgmt(struct inode *inode, enum req_op op)
++int zonefs_zone_mgmt(struct inode *inode, enum req_op op)
+ {
+ 	struct zonefs_inode_info *zi = ZONEFS_I(inode);
+ 	int ret;
+@@ -99,7 +99,7 @@ static inline int zonefs_zone_mgmt(struct inode *inode, enum req_op op)
+ 	return 0;
+ }
+ 
+-static inline void zonefs_i_size_write(struct inode *inode, loff_t isize)
++void zonefs_i_size_write(struct inode *inode, loff_t isize)
+ {
+ 	struct zonefs_inode_info *zi = ZONEFS_I(inode);
+ 
+@@ -117,167 +117,7 @@ static inline void zonefs_i_size_write(struct inode *inode, loff_t isize)
+ 	}
+ }
+ 
+-static int zonefs_read_iomap_begin(struct inode *inode, loff_t offset,
+-				   loff_t length, unsigned int flags,
+-				   struct iomap *iomap, struct iomap *srcmap)
+-{
+-	struct zonefs_inode_info *zi = ZONEFS_I(inode);
+-	struct super_block *sb = inode->i_sb;
+-	loff_t isize;
+-
+-	/*
+-	 * All blocks are always mapped below EOF. If reading past EOF,
+-	 * act as if there is a hole up to the file maximum size.
+-	 */
+-	mutex_lock(&zi->i_truncate_mutex);
+-	iomap->bdev = inode->i_sb->s_bdev;
+-	iomap->offset = ALIGN_DOWN(offset, sb->s_blocksize);
+-	isize = i_size_read(inode);
+-	if (iomap->offset >= isize) {
+-		iomap->type = IOMAP_HOLE;
+-		iomap->addr = IOMAP_NULL_ADDR;
+-		iomap->length = length;
+-	} else {
+-		iomap->type = IOMAP_MAPPED;
+-		iomap->addr = (zi->i_zsector << SECTOR_SHIFT) + iomap->offset;
+-		iomap->length = isize - iomap->offset;
+-	}
+-	mutex_unlock(&zi->i_truncate_mutex);
+-
+-	trace_zonefs_iomap_begin(inode, iomap);
+-
+-	return 0;
+-}
+-
+-static const struct iomap_ops zonefs_read_iomap_ops = {
+-	.iomap_begin	= zonefs_read_iomap_begin,
+-};
+-
+-static int zonefs_write_iomap_begin(struct inode *inode, loff_t offset,
+-				    loff_t length, unsigned int flags,
+-				    struct iomap *iomap, struct iomap *srcmap)
+-{
+-	struct zonefs_inode_info *zi = ZONEFS_I(inode);
+-	struct super_block *sb = inode->i_sb;
+-	loff_t isize;
+-
+-	/* All write I/Os should always be within the file maximum size */
+-	if (WARN_ON_ONCE(offset + length > zi->i_max_size))
+-		return -EIO;
+-
+-	/*
+-	 * Sequential zones can only accept direct writes. This is already
+-	 * checked when writes are issued, so warn if we see a page writeback
+-	 * operation.
+-	 */
+-	if (WARN_ON_ONCE(zi->i_ztype == ZONEFS_ZTYPE_SEQ &&
+-			 !(flags & IOMAP_DIRECT)))
+-		return -EIO;
+-
+-	/*
+-	 * For conventional zones, all blocks are always mapped. For sequential
+-	 * zones, all blocks after always mapped below the inode size (zone
+-	 * write pointer) and unwriten beyond.
+-	 */
+-	mutex_lock(&zi->i_truncate_mutex);
+-	iomap->bdev = inode->i_sb->s_bdev;
+-	iomap->offset = ALIGN_DOWN(offset, sb->s_blocksize);
+-	iomap->addr = (zi->i_zsector << SECTOR_SHIFT) + iomap->offset;
+-	isize = i_size_read(inode);
+-	if (iomap->offset >= isize) {
+-		iomap->type = IOMAP_UNWRITTEN;
+-		iomap->length = zi->i_max_size - iomap->offset;
+-	} else {
+-		iomap->type = IOMAP_MAPPED;
+-		iomap->length = isize - iomap->offset;
+-	}
+-	mutex_unlock(&zi->i_truncate_mutex);
+-
+-	trace_zonefs_iomap_begin(inode, iomap);
+-
+-	return 0;
+-}
+-
+-static const struct iomap_ops zonefs_write_iomap_ops = {
+-	.iomap_begin	= zonefs_write_iomap_begin,
+-};
+-
+-static int zonefs_read_folio(struct file *unused, struct folio *folio)
+-{
+-	return iomap_read_folio(folio, &zonefs_read_iomap_ops);
+-}
+-
+-static void zonefs_readahead(struct readahead_control *rac)
+-{
+-	iomap_readahead(rac, &zonefs_read_iomap_ops);
+-}
+-
+-/*
+- * Map blocks for page writeback. This is used only on conventional zone files,
+- * which implies that the page range can only be within the fixed inode size.
+- */
+-static int zonefs_write_map_blocks(struct iomap_writepage_ctx *wpc,
+-				   struct inode *inode, loff_t offset)
+-{
+-	struct zonefs_inode_info *zi = ZONEFS_I(inode);
+-
+-	if (WARN_ON_ONCE(zi->i_ztype != ZONEFS_ZTYPE_CNV))
+-		return -EIO;
+-	if (WARN_ON_ONCE(offset >= i_size_read(inode)))
+-		return -EIO;
+-
+-	/* If the mapping is already OK, nothing needs to be done */
+-	if (offset >= wpc->iomap.offset &&
+-	    offset < wpc->iomap.offset + wpc->iomap.length)
+-		return 0;
+-
+-	return zonefs_write_iomap_begin(inode, offset, zi->i_max_size - offset,
+-					IOMAP_WRITE, &wpc->iomap, NULL);
+-}
+-
+-static const struct iomap_writeback_ops zonefs_writeback_ops = {
+-	.map_blocks		= zonefs_write_map_blocks,
+-};
+-
+-static int zonefs_writepages(struct address_space *mapping,
+-			     struct writeback_control *wbc)
+-{
+-	struct iomap_writepage_ctx wpc = { };
+-
+-	return iomap_writepages(mapping, wbc, &wpc, &zonefs_writeback_ops);
+-}
+-
+-static int zonefs_swap_activate(struct swap_info_struct *sis,
+-				struct file *swap_file, sector_t *span)
+-{
+-	struct inode *inode = file_inode(swap_file);
+-	struct zonefs_inode_info *zi = ZONEFS_I(inode);
+-
+-	if (zi->i_ztype != ZONEFS_ZTYPE_CNV) {
+-		zonefs_err(inode->i_sb,
+-			   "swap file: not a conventional zone file\n");
+-		return -EINVAL;
+-	}
+-
+-	return iomap_swapfile_activate(sis, swap_file, span,
+-				       &zonefs_read_iomap_ops);
+-}
+-
+-static const struct address_space_operations zonefs_file_aops = {
+-	.read_folio		= zonefs_read_folio,
+-	.readahead		= zonefs_readahead,
+-	.writepages		= zonefs_writepages,
+-	.dirty_folio		= filemap_dirty_folio,
+-	.release_folio		= iomap_release_folio,
+-	.invalidate_folio	= iomap_invalidate_folio,
+-	.migrate_folio		= filemap_migrate_folio,
+-	.is_partially_uptodate	= iomap_is_partially_uptodate,
+-	.error_remove_page	= generic_error_remove_page,
+-	.direct_IO		= noop_direct_IO,
+-	.swap_activate		= zonefs_swap_activate,
+-};
+-
+-static void zonefs_update_stats(struct inode *inode, loff_t new_isize)
++void zonefs_update_stats(struct inode *inode, loff_t new_isize)
+ {
+ 	struct super_block *sb = inode->i_sb;
+ 	struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
+@@ -487,7 +327,7 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
+  * eventually correct the file size and zonefs inode write pointer offset
+  * (which can be out of sync with the drive due to partial write failures).
+  */
+-static void __zonefs_io_error(struct inode *inode, bool write)
++void __zonefs_io_error(struct inode *inode, bool write)
+ {
+ 	struct zonefs_inode_info *zi = ZONEFS_I(inode);
+ 	struct super_block *sb = inode->i_sb;
+@@ -526,749 +366,6 @@ static void __zonefs_io_error(struct inode *inode, bool write)
+ 	memalloc_noio_restore(noio_flag);
+ }
+ 
+-static void zonefs_io_error(struct inode *inode, bool write)
+-{
+-	struct zonefs_inode_info *zi = ZONEFS_I(inode);
+-
+-	mutex_lock(&zi->i_truncate_mutex);
+-	__zonefs_io_error(inode, write);
+-	mutex_unlock(&zi->i_truncate_mutex);
+-}
+-
+-static int zonefs_file_truncate(struct inode *inode, loff_t isize)
+-{
+-	struct zonefs_inode_info *zi = ZONEFS_I(inode);
+-	loff_t old_isize;
+-	enum req_op op;
+-	int ret = 0;
+-
+-	/*
+-	 * Only sequential zone files can be truncated and truncation is allowed
+-	 * only down to a 0 size, which is equivalent to a zone reset, and to
+-	 * the maximum file size, which is equivalent to a zone finish.
+-	 */
+-	if (zi->i_ztype != ZONEFS_ZTYPE_SEQ)
+-		return -EPERM;
+-
+-	if (!isize)
+-		op = REQ_OP_ZONE_RESET;
+-	else if (isize == zi->i_max_size)
+-		op = REQ_OP_ZONE_FINISH;
+-	else
+-		return -EPERM;
+-
+-	inode_dio_wait(inode);
+-
+-	/* Serialize against page faults */
+-	filemap_invalidate_lock(inode->i_mapping);
+-
+-	/* Serialize against zonefs_iomap_begin() */
+-	mutex_lock(&zi->i_truncate_mutex);
+-
+-	old_isize = i_size_read(inode);
+-	if (isize == old_isize)
+-		goto unlock;
+-
+-	ret = zonefs_zone_mgmt(inode, op);
+-	if (ret)
+-		goto unlock;
+-
+-	/*
+-	 * If the mount option ZONEFS_MNTOPT_EXPLICIT_OPEN is set,
+-	 * take care of open zones.
+-	 */
+-	if (zi->i_flags & ZONEFS_ZONE_OPEN) {
+-		/*
+-		 * Truncating a zone to EMPTY or FULL is the equivalent of
+-		 * closing the zone. For a truncation to 0, we need to
+-		 * re-open the zone to ensure new writes can be processed.
+-		 * For a truncation to the maximum file size, the zone is
+-		 * closed and writes cannot be accepted anymore, so clear
+-		 * the open flag.
+-		 */
+-		if (!isize)
+-			ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_OPEN);
+-		else
+-			zi->i_flags &= ~ZONEFS_ZONE_OPEN;
+-	}
+-
+-	zonefs_update_stats(inode, isize);
+-	truncate_setsize(inode, isize);
+-	zi->i_wpoffset = isize;
+-	zonefs_account_active(inode);
+-
+-unlock:
+-	mutex_unlock(&zi->i_truncate_mutex);
+-	filemap_invalidate_unlock(inode->i_mapping);
+-
+-	return ret;
+-}
+-
+-static int zonefs_inode_setattr(struct user_namespace *mnt_userns,
+-				struct dentry *dentry, struct iattr *iattr)
+-{
+-	struct inode *inode = d_inode(dentry);
+-	int ret;
+-
+-	if (unlikely(IS_IMMUTABLE(inode)))
+-		return -EPERM;
+-
+-	ret = setattr_prepare(&init_user_ns, dentry, iattr);
+-	if (ret)
+-		return ret;
+-
+-	/*
+-	 * Since files and directories cannot be created nor deleted, do not
+-	 * allow setting any write attributes on the sub-directories grouping
+-	 * files by zone type.
+-	 */
+-	if ((iattr->ia_valid & ATTR_MODE) && S_ISDIR(inode->i_mode) &&
+-	    (iattr->ia_mode & 0222))
+-		return -EPERM;
+-
+-	if (((iattr->ia_valid & ATTR_UID) &&
+-	     !uid_eq(iattr->ia_uid, inode->i_uid)) ||
+-	    ((iattr->ia_valid & ATTR_GID) &&
+-	     !gid_eq(iattr->ia_gid, inode->i_gid))) {
+-		ret = dquot_transfer(mnt_userns, inode, iattr);
+-		if (ret)
+-			return ret;
+-	}
+-
+-	if (iattr->ia_valid & ATTR_SIZE) {
+-		ret = zonefs_file_truncate(inode, iattr->ia_size);
+-		if (ret)
+-			return ret;
+-	}
+-
+-	setattr_copy(&init_user_ns, inode, iattr);
+-
+-	return 0;
+-}
+-
+-static const struct inode_operations zonefs_file_inode_operations = {
+-	.setattr	= zonefs_inode_setattr,
+-};
+-
+-static int zonefs_file_fsync(struct file *file, loff_t start, loff_t end,
+-			     int datasync)
+-{
+-	struct inode *inode = file_inode(file);
+-	int ret = 0;
+-
+-	if (unlikely(IS_IMMUTABLE(inode)))
+-		return -EPERM;
+-
+-	/*
+-	 * Since only direct writes are allowed in sequential files, page cache
+-	 * flush is needed only for conventional zone files.
+-	 */
+-	if (ZONEFS_I(inode)->i_ztype == ZONEFS_ZTYPE_CNV)
+-		ret = file_write_and_wait_range(file, start, end);
+-	if (!ret)
+-		ret = blkdev_issue_flush(inode->i_sb->s_bdev);
+-
+-	if (ret)
+-		zonefs_io_error(inode, true);
+-
+-	return ret;
+-}
+-
+-static vm_fault_t zonefs_filemap_page_mkwrite(struct vm_fault *vmf)
+-{
+-	struct inode *inode = file_inode(vmf->vma->vm_file);
+-	struct zonefs_inode_info *zi = ZONEFS_I(inode);
+-	vm_fault_t ret;
+-
+-	if (unlikely(IS_IMMUTABLE(inode)))
+-		return VM_FAULT_SIGBUS;
+-
+-	/*
+-	 * Sanity check: only conventional zone files can have shared
+-	 * writeable mappings.
+-	 */
+-	if (WARN_ON_ONCE(zi->i_ztype != ZONEFS_ZTYPE_CNV))
+-		return VM_FAULT_NOPAGE;
+-
+-	sb_start_pagefault(inode->i_sb);
+-	file_update_time(vmf->vma->vm_file);
+-
+-	/* Serialize against truncates */
+-	filemap_invalidate_lock_shared(inode->i_mapping);
+-	ret = iomap_page_mkwrite(vmf, &zonefs_write_iomap_ops);
+-	filemap_invalidate_unlock_shared(inode->i_mapping);
+-
+-	sb_end_pagefault(inode->i_sb);
+-	return ret;
+-}
+-
+-static const struct vm_operations_struct zonefs_file_vm_ops = {
+-	.fault		= filemap_fault,
+-	.map_pages	= filemap_map_pages,
+-	.page_mkwrite	= zonefs_filemap_page_mkwrite,
+-};
+-
+-static int zonefs_file_mmap(struct file *file, struct vm_area_struct *vma)
+-{
+-	/*
+-	 * Conventional zones accept random writes, so their files can support
+-	 * shared writable mappings. For sequential zone files, only read
+-	 * mappings are possible since there are no guarantees for write
+-	 * ordering between msync() and page cache writeback.
+-	 */
+-	if (ZONEFS_I(file_inode(file))->i_ztype == ZONEFS_ZTYPE_SEQ &&
+-	    (vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE))
+-		return -EINVAL;
+-
+-	file_accessed(file);
+-	vma->vm_ops = &zonefs_file_vm_ops;
+-
+-	return 0;
+-}
+-
+-static loff_t zonefs_file_llseek(struct file *file, loff_t offset, int whence)
+-{
+-	loff_t isize = i_size_read(file_inode(file));
+-
+-	/*
+-	 * Seeks are limited to below the zone size for conventional zones
+-	 * and below the zone write pointer for sequential zones. In both
+-	 * cases, this limit is the inode size.
+-	 */
+-	return generic_file_llseek_size(file, offset, whence, isize, isize);
+-}
+-
+-static int zonefs_file_write_dio_end_io(struct kiocb *iocb, ssize_t size,
+-					int error, unsigned int flags)
+-{
+-	struct inode *inode = file_inode(iocb->ki_filp);
+-	struct zonefs_inode_info *zi = ZONEFS_I(inode);
+-
+-	if (error) {
+-		zonefs_io_error(inode, true);
+-		return error;
+-	}
+-
+-	if (size && zi->i_ztype != ZONEFS_ZTYPE_CNV) {
+-		/*
+-		 * Note that we may be seeing completions out of order,
+-		 * but that is not a problem since a write completed
+-		 * successfully necessarily means that all preceding writes
+-		 * were also successful. So we can safely increase the inode
+-		 * size to the write end location.
+-		 */
+-		mutex_lock(&zi->i_truncate_mutex);
+-		if (i_size_read(inode) < iocb->ki_pos + size) {
+-			zonefs_update_stats(inode, iocb->ki_pos + size);
+-			zonefs_i_size_write(inode, iocb->ki_pos + size);
+-		}
+-		mutex_unlock(&zi->i_truncate_mutex);
+-	}
+-
+-	return 0;
+-}
+-
+-static const struct iomap_dio_ops zonefs_write_dio_ops = {
+-	.end_io			= zonefs_file_write_dio_end_io,
+-};
+-
+-static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from)
+-{
+-	struct inode *inode = file_inode(iocb->ki_filp);
+-	struct zonefs_inode_info *zi = ZONEFS_I(inode);
+-	struct block_device *bdev = inode->i_sb->s_bdev;
+-	unsigned int max = bdev_max_zone_append_sectors(bdev);
+-	struct bio *bio;
+-	ssize_t size;
+-	int nr_pages;
+-	ssize_t ret;
+-
+-	max = ALIGN_DOWN(max << SECTOR_SHIFT, inode->i_sb->s_blocksize);
+-	iov_iter_truncate(from, max);
+-
+-	nr_pages = iov_iter_npages(from, BIO_MAX_VECS);
+-	if (!nr_pages)
+-		return 0;
+-
+-	bio = bio_alloc(bdev, nr_pages,
+-			REQ_OP_ZONE_APPEND | REQ_SYNC | REQ_IDLE, GFP_NOFS);
+-	bio->bi_iter.bi_sector = zi->i_zsector;
+-	bio->bi_ioprio = iocb->ki_ioprio;
+-	if (iocb_is_dsync(iocb))
+-		bio->bi_opf |= REQ_FUA;
+-
+-	ret = bio_iov_iter_get_pages(bio, from);
+-	if (unlikely(ret))
+-		goto out_release;
+-
+-	size = bio->bi_iter.bi_size;
+-	task_io_account_write(size);
+-
+-	if (iocb->ki_flags & IOCB_HIPRI)
+-		bio_set_polled(bio, iocb);
+-
+-	ret = submit_bio_wait(bio);
+-
+-	/*
+-	 * If the file zone was written underneath the file system, the zone
+-	 * write pointer may not be where we expect it to be, but the zone
+-	 * append write can still succeed. So check manually that we wrote where
+-	 * we intended to, that is, at zi->i_wpoffset.
+-	 */
+-	if (!ret) {
+-		sector_t wpsector =
+-			zi->i_zsector + (zi->i_wpoffset >> SECTOR_SHIFT);
+-
+-		if (bio->bi_iter.bi_sector != wpsector) {
+-			zonefs_warn(inode->i_sb,
+-				"Corrupted write pointer %llu for zone at %llu\n",
+-				wpsector, zi->i_zsector);
+-			ret = -EIO;
+-		}
+-	}
+-
+-	zonefs_file_write_dio_end_io(iocb, size, ret, 0);
+-	trace_zonefs_file_dio_append(inode, size, ret);
+-
+-out_release:
+-	bio_release_pages(bio, false);
+-	bio_put(bio);
+-
+-	if (ret >= 0) {
+-		iocb->ki_pos += size;
+-		return size;
+-	}
+-
+-	return ret;
+-}
+-
+-/*
+- * Do not exceed the LFS limits nor the file zone size. If pos is under the
+- * limit it becomes a short access. If it exceeds the limit, return -EFBIG.
+- */
+-static loff_t zonefs_write_check_limits(struct file *file, loff_t pos,
+-					loff_t count)
+-{
+-	struct inode *inode = file_inode(file);
+-	struct zonefs_inode_info *zi = ZONEFS_I(inode);
+-	loff_t limit = rlimit(RLIMIT_FSIZE);
+-	loff_t max_size = zi->i_max_size;
+-
+-	if (limit != RLIM_INFINITY) {
+-		if (pos >= limit) {
+-			send_sig(SIGXFSZ, current, 0);
+-			return -EFBIG;
+-		}
+-		count = min(count, limit - pos);
+-	}
+-
+-	if (!(file->f_flags & O_LARGEFILE))
+-		max_size = min_t(loff_t, MAX_NON_LFS, max_size);
+-
+-	if (unlikely(pos >= max_size))
+-		return -EFBIG;
+-
+-	return min(count, max_size - pos);
+-}
+-
+-static ssize_t zonefs_write_checks(struct kiocb *iocb, struct iov_iter *from)
+-{
+-	struct file *file = iocb->ki_filp;
+-	struct inode *inode = file_inode(file);
+-	struct zonefs_inode_info *zi = ZONEFS_I(inode);
+-	loff_t count;
+-
+-	if (IS_SWAPFILE(inode))
+-		return -ETXTBSY;
+-
+-	if (!iov_iter_count(from))
+-		return 0;
+-
+-	if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT))
+-		return -EINVAL;
+-
+-	if (iocb->ki_flags & IOCB_APPEND) {
+-		if (zi->i_ztype != ZONEFS_ZTYPE_SEQ)
+-			return -EINVAL;
+-		mutex_lock(&zi->i_truncate_mutex);
+-		iocb->ki_pos = zi->i_wpoffset;
+-		mutex_unlock(&zi->i_truncate_mutex);
+-	}
+-
+-	count = zonefs_write_check_limits(file, iocb->ki_pos,
+-					  iov_iter_count(from));
+-	if (count < 0)
+-		return count;
+-
+-	iov_iter_truncate(from, count);
+-	return iov_iter_count(from);
+-}
+-
+-/*
+- * Handle direct writes. For sequential zone files, this is the only possible
+- * write path. For these files, check that the user is issuing writes
+- * sequentially from the end of the file. This code assumes that the block layer
+- * delivers write requests to the device in sequential order. This is always the
+- * case if a block IO scheduler implementing the ELEVATOR_F_ZBD_SEQ_WRITE
+- * elevator feature is being used (e.g. mq-deadline). The block layer always
+- * automatically select such an elevator for zoned block devices during the
+- * device initialization.
+- */
+-static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
+-{
+-	struct inode *inode = file_inode(iocb->ki_filp);
+-	struct zonefs_inode_info *zi = ZONEFS_I(inode);
+-	struct super_block *sb = inode->i_sb;
+-	bool sync = is_sync_kiocb(iocb);
+-	bool append = false;
+-	ssize_t ret, count;
+-
+-	/*
+-	 * For async direct IOs to sequential zone files, refuse IOCB_NOWAIT
+-	 * as this can cause write reordering (e.g. the first aio gets EAGAIN
+-	 * on the inode lock but the second goes through but is now unaligned).
+-	 */
+-	if (zi->i_ztype == ZONEFS_ZTYPE_SEQ && !sync &&
+-	    (iocb->ki_flags & IOCB_NOWAIT))
+-		return -EOPNOTSUPP;
+-
+-	if (iocb->ki_flags & IOCB_NOWAIT) {
+-		if (!inode_trylock(inode))
+-			return -EAGAIN;
+-	} else {
+-		inode_lock(inode);
+-	}
+-
+-	count = zonefs_write_checks(iocb, from);
+-	if (count <= 0) {
+-		ret = count;
+-		goto inode_unlock;
+-	}
+-
+-	if ((iocb->ki_pos | count) & (sb->s_blocksize - 1)) {
+-		ret = -EINVAL;
+-		goto inode_unlock;
+-	}
+-
+-	/* Enforce sequential writes (append only) in sequential zones */
+-	if (zi->i_ztype == ZONEFS_ZTYPE_SEQ) {
+-		mutex_lock(&zi->i_truncate_mutex);
+-		if (iocb->ki_pos != zi->i_wpoffset) {
+-			mutex_unlock(&zi->i_truncate_mutex);
+-			ret = -EINVAL;
+-			goto inode_unlock;
+-		}
+-		mutex_unlock(&zi->i_truncate_mutex);
+-		append = sync;
+-	}
+-
+-	if (append)
+-		ret = zonefs_file_dio_append(iocb, from);
+-	else
+-		ret = iomap_dio_rw(iocb, from, &zonefs_write_iomap_ops,
+-				   &zonefs_write_dio_ops, 0, NULL, 0);
+-	if (zi->i_ztype == ZONEFS_ZTYPE_SEQ &&
+-	    (ret > 0 || ret == -EIOCBQUEUED)) {
+-		if (ret > 0)
+-			count = ret;
+-
+-		/*
+-		 * Update the zone write pointer offset assuming the write
+-		 * operation succeeded. If it did not, the error recovery path
+-		 * will correct it. Also do active seq file accounting.
+-		 */
+-		mutex_lock(&zi->i_truncate_mutex);
+-		zi->i_wpoffset += count;
+-		zonefs_account_active(inode);
+-		mutex_unlock(&zi->i_truncate_mutex);
+-	}
+-
+-inode_unlock:
+-	inode_unlock(inode);
+-
+-	return ret;
+-}
+-
+-static ssize_t zonefs_file_buffered_write(struct kiocb *iocb,
+-					  struct iov_iter *from)
+-{
+-	struct inode *inode = file_inode(iocb->ki_filp);
+-	struct zonefs_inode_info *zi = ZONEFS_I(inode);
+-	ssize_t ret;
+-
+-	/*
+-	 * Direct IO writes are mandatory for sequential zone files so that the
+-	 * write IO issuing order is preserved.
+-	 */
+-	if (zi->i_ztype != ZONEFS_ZTYPE_CNV)
+-		return -EIO;
+-
+-	if (iocb->ki_flags & IOCB_NOWAIT) {
+-		if (!inode_trylock(inode))
+-			return -EAGAIN;
+-	} else {
+-		inode_lock(inode);
+-	}
+-
+-	ret = zonefs_write_checks(iocb, from);
+-	if (ret <= 0)
+-		goto inode_unlock;
+-
+-	ret = iomap_file_buffered_write(iocb, from, &zonefs_write_iomap_ops);
+-	if (ret > 0)
+-		iocb->ki_pos += ret;
+-	else if (ret == -EIO)
+-		zonefs_io_error(inode, true);
+-
+-inode_unlock:
+-	inode_unlock(inode);
+-	if (ret > 0)
+-		ret = generic_write_sync(iocb, ret);
+-
+-	return ret;
+-}
+-
+-static ssize_t zonefs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
+-{
+-	struct inode *inode = file_inode(iocb->ki_filp);
+-
+-	if (unlikely(IS_IMMUTABLE(inode)))
+-		return -EPERM;
+-
+-	if (sb_rdonly(inode->i_sb))
+-		return -EROFS;
+-
+-	/* Write operations beyond the zone size are not allowed */
+-	if (iocb->ki_pos >= ZONEFS_I(inode)->i_max_size)
+-		return -EFBIG;
+-
+-	if (iocb->ki_flags & IOCB_DIRECT) {
+-		ssize_t ret = zonefs_file_dio_write(iocb, from);
+-		if (ret != -ENOTBLK)
+-			return ret;
+-	}
+-
+-	return zonefs_file_buffered_write(iocb, from);
+-}
+-
+-static int zonefs_file_read_dio_end_io(struct kiocb *iocb, ssize_t size,
+-				       int error, unsigned int flags)
+-{
+-	if (error) {
+-		zonefs_io_error(file_inode(iocb->ki_filp), false);
+-		return error;
+-	}
+-
+-	return 0;
+-}
+-
+-static const struct iomap_dio_ops zonefs_read_dio_ops = {
+-	.end_io			= zonefs_file_read_dio_end_io,
+-};
+-
+-static ssize_t zonefs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
+-{
+-	struct inode *inode = file_inode(iocb->ki_filp);
+-	struct zonefs_inode_info *zi = ZONEFS_I(inode);
+-	struct super_block *sb = inode->i_sb;
+-	loff_t isize;
+-	ssize_t ret;
+-
+-	/* Offline zones cannot be read */
+-	if (unlikely(IS_IMMUTABLE(inode) && !(inode->i_mode & 0777)))
+-		return -EPERM;
+-
+-	if (iocb->ki_pos >= zi->i_max_size)
+-		return 0;
+-
+-	if (iocb->ki_flags & IOCB_NOWAIT) {
+-		if (!inode_trylock_shared(inode))
+-			return -EAGAIN;
+-	} else {
+-		inode_lock_shared(inode);
+-	}
+-
+-	/* Limit read operations to written data */
+-	mutex_lock(&zi->i_truncate_mutex);
+-	isize = i_size_read(inode);
+-	if (iocb->ki_pos >= isize) {
+-		mutex_unlock(&zi->i_truncate_mutex);
+-		ret = 0;
+-		goto inode_unlock;
+-	}
+-	iov_iter_truncate(to, isize - iocb->ki_pos);
+-	mutex_unlock(&zi->i_truncate_mutex);
+-
+-	if (iocb->ki_flags & IOCB_DIRECT) {
+-		size_t count = iov_iter_count(to);
+-
+-		if ((iocb->ki_pos | count) & (sb->s_blocksize - 1)) {
+-			ret = -EINVAL;
+-			goto inode_unlock;
+-		}
+-		file_accessed(iocb->ki_filp);
+-		ret = iomap_dio_rw(iocb, to, &zonefs_read_iomap_ops,
+-				   &zonefs_read_dio_ops, 0, NULL, 0);
+-	} else {
+-		ret = generic_file_read_iter(iocb, to);
+-		if (ret == -EIO)
+-			zonefs_io_error(inode, false);
+-	}
+-
+-inode_unlock:
+-	inode_unlock_shared(inode);
+-
+-	return ret;
+-}
+-
+-/*
+- * Write open accounting is done only for sequential files.
+- */
+-static inline bool zonefs_seq_file_need_wro(struct inode *inode,
+-					    struct file *file)
+-{
+-	struct zonefs_inode_info *zi = ZONEFS_I(inode);
+-
+-	if (zi->i_ztype != ZONEFS_ZTYPE_SEQ)
+-		return false;
+-
+-	if (!(file->f_mode & FMODE_WRITE))
+-		return false;
+-
+-	return true;
+-}
+-
+-static int zonefs_seq_file_write_open(struct inode *inode)
+-{
+-	struct zonefs_inode_info *zi = ZONEFS_I(inode);
+-	int ret = 0;
+-
+-	mutex_lock(&zi->i_truncate_mutex);
+-
+-	if (!zi->i_wr_refcnt) {
+-		struct zonefs_sb_info *sbi = ZONEFS_SB(inode->i_sb);
+-		unsigned int wro = atomic_inc_return(&sbi->s_wro_seq_files);
+-
+-		if (sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN) {
+-
+-			if (sbi->s_max_wro_seq_files
+-			    && wro > sbi->s_max_wro_seq_files) {
+-				atomic_dec(&sbi->s_wro_seq_files);
+-				ret = -EBUSY;
+-				goto unlock;
+-			}
+-
+-			if (i_size_read(inode) < zi->i_max_size) {
+-				ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_OPEN);
+-				if (ret) {
+-					atomic_dec(&sbi->s_wro_seq_files);
+-					goto unlock;
+-				}
+-				zi->i_flags |= ZONEFS_ZONE_OPEN;
+-				zonefs_account_active(inode);
+-			}
+-		}
+-	}
+-
+-	zi->i_wr_refcnt++;
+-
+-unlock:
+-	mutex_unlock(&zi->i_truncate_mutex);
+-
+-	return ret;
+-}
+-
+-static int zonefs_file_open(struct inode *inode, struct file *file)
+-{
+-	int ret;
+-
+-	ret = generic_file_open(inode, file);
+-	if (ret)
+-		return ret;
+-
+-	if (zonefs_seq_file_need_wro(inode, file))
+-		return zonefs_seq_file_write_open(inode);
+-
+-	return 0;
+-}
+-
+-static void zonefs_seq_file_write_close(struct inode *inode)
+-{
+-	struct zonefs_inode_info *zi = ZONEFS_I(inode);
+-	struct super_block *sb = inode->i_sb;
+-	struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
+-	int ret = 0;
+-
+-	mutex_lock(&zi->i_truncate_mutex);
+-
+-	zi->i_wr_refcnt--;
+-	if (zi->i_wr_refcnt)
+-		goto unlock;
+-
+-	/*
+-	 * The file zone may not be open anymore (e.g. the file was truncated to
+-	 * its maximum size or it was fully written). For this case, we only
+-	 * need to decrement the write open count.
+-	 */
+-	if (zi->i_flags & ZONEFS_ZONE_OPEN) {
+-		ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_CLOSE);
+-		if (ret) {
+-			__zonefs_io_error(inode, false);
+-			/*
+-			 * Leaving zones explicitly open may lead to a state
+-			 * where most zones cannot be written (zone resources
+-			 * exhausted). So take preventive action by remounting
+-			 * read-only.
+-			 */
+-			if (zi->i_flags & ZONEFS_ZONE_OPEN &&
+-			    !(sb->s_flags & SB_RDONLY)) {
+-				zonefs_warn(sb,
+-					"closing zone at %llu failed %d\n",
+-					zi->i_zsector, ret);
+-				zonefs_warn(sb,
+-					"remounting filesystem read-only\n");
+-				sb->s_flags |= SB_RDONLY;
+-			}
+-			goto unlock;
+-		}
+-
+-		zi->i_flags &= ~ZONEFS_ZONE_OPEN;
+-		zonefs_account_active(inode);
+-	}
+-
+-	atomic_dec(&sbi->s_wro_seq_files);
+-
+-unlock:
+-	mutex_unlock(&zi->i_truncate_mutex);
+-}
+-
+-static int zonefs_file_release(struct inode *inode, struct file *file)
+-{
+-	/*
+-	 * If we explicitly open a zone we must close it again as well, but the
+-	 * zone management operation can fail (either due to an IO error or as
+-	 * the zone has gone offline or read-only). Make sure we don't fail the
+-	 * close(2) for user-space.
+-	 */
+-	if (zonefs_seq_file_need_wro(inode, file))
+-		zonefs_seq_file_write_close(inode);
+-
+-	return 0;
+-}
+-
+-static const struct file_operations zonefs_file_operations = {
+-	.open		= zonefs_file_open,
+-	.release	= zonefs_file_release,
+-	.fsync		= zonefs_file_fsync,
+-	.mmap		= zonefs_file_mmap,
+-	.llseek		= zonefs_file_llseek,
+-	.read_iter	= zonefs_file_read_iter,
+-	.write_iter	= zonefs_file_write_iter,
+-	.splice_read	= generic_file_splice_read,
+-	.splice_write	= iter_file_splice_write,
+-	.iopoll		= iocb_bio_iopoll,
+-};
+-
+ static struct kmem_cache *zonefs_inode_cachep;
+ 
+ static struct inode *zonefs_alloc_inode(struct super_block *sb)
+@@ -1408,13 +505,47 @@ static int zonefs_remount(struct super_block *sb, int *flags, char *data)
+ 	return zonefs_parse_options(sb, data);
+ }
+ 
+-static const struct super_operations zonefs_sops = {
+-	.alloc_inode	= zonefs_alloc_inode,
+-	.free_inode	= zonefs_free_inode,
+-	.statfs		= zonefs_statfs,
+-	.remount_fs	= zonefs_remount,
+-	.show_options	= zonefs_show_options,
+-};
++static int zonefs_inode_setattr(struct user_namespace *mnt_userns,
++				struct dentry *dentry, struct iattr *iattr)
++{
++	struct inode *inode = d_inode(dentry);
++	int ret;
++
++	if (unlikely(IS_IMMUTABLE(inode)))
++		return -EPERM;
++
++	ret = setattr_prepare(&init_user_ns, dentry, iattr);
++	if (ret)
++		return ret;
++
++	/*
++	 * Since files and directories cannot be created nor deleted, do not
++	 * allow setting any write attributes on the sub-directories grouping
++	 * files by zone type.
++	 */
++	if ((iattr->ia_valid & ATTR_MODE) && S_ISDIR(inode->i_mode) &&
++	    (iattr->ia_mode & 0222))
++		return -EPERM;
++
++	if (((iattr->ia_valid & ATTR_UID) &&
++	     !uid_eq(iattr->ia_uid, inode->i_uid)) ||
++	    ((iattr->ia_valid & ATTR_GID) &&
++	     !gid_eq(iattr->ia_gid, inode->i_gid))) {
++		ret = dquot_transfer(mnt_userns, inode, iattr);
++		if (ret)
++			return ret;
++	}
++
++	if (iattr->ia_valid & ATTR_SIZE) {
++		ret = zonefs_file_truncate(inode, iattr->ia_size);
++		if (ret)
++			return ret;
++	}
++
++	setattr_copy(&init_user_ns, inode, iattr);
++
++	return 0;
++}
+ 
+ static const struct inode_operations zonefs_dir_inode_operations = {
+ 	.lookup		= simple_lookup,
+@@ -1434,6 +565,10 @@ static void zonefs_init_dir_inode(struct inode *parent, struct inode *inode,
+ 	inc_nlink(parent);
+ }
+ 
++static const struct inode_operations zonefs_file_inode_operations = {
++	.setattr	= zonefs_inode_setattr,
++};
++
+ static int zonefs_init_file_inode(struct inode *inode, struct blk_zone *zone,
+ 				  enum zonefs_ztype type)
+ {
+@@ -1785,6 +920,14 @@ static int zonefs_read_super(struct super_block *sb)
+ 	return ret;
+ }
+ 
++static const struct super_operations zonefs_sops = {
++	.alloc_inode	= zonefs_alloc_inode,
++	.free_inode	= zonefs_free_inode,
++	.statfs		= zonefs_statfs,
++	.remount_fs	= zonefs_remount,
++	.show_options	= zonefs_show_options,
++};
++
+ /*
+  * Check that the device is zoned. If it is, get the list of zones and create
+  * sub-directories and files according to the device zone configuration and
+diff --git a/fs/zonefs/zonefs.h b/fs/zonefs/zonefs.h
+index 1dbe78119ff16..839ebe9afb6c1 100644
+--- a/fs/zonefs/zonefs.h
++++ b/fs/zonefs/zonefs.h
+@@ -209,6 +209,28 @@ static inline struct zonefs_sb_info *ZONEFS_SB(struct super_block *sb)
+ #define zonefs_warn(sb, format, args...)	\
+ 	pr_warn("zonefs (%s) WARNING: " format, sb->s_id, ## args)
+ 
++/* In super.c */
++void zonefs_account_active(struct inode *inode);
++int zonefs_zone_mgmt(struct inode *inode, enum req_op op);
++void zonefs_i_size_write(struct inode *inode, loff_t isize);
++void zonefs_update_stats(struct inode *inode, loff_t new_isize);
++void __zonefs_io_error(struct inode *inode, bool write);
++
++static inline void zonefs_io_error(struct inode *inode, bool write)
++{
++	struct zonefs_inode_info *zi = ZONEFS_I(inode);
++
++	mutex_lock(&zi->i_truncate_mutex);
++	__zonefs_io_error(inode, write);
++	mutex_unlock(&zi->i_truncate_mutex);
++}
++
++/* In file.c */
++extern const struct address_space_operations zonefs_file_aops;
++extern const struct file_operations zonefs_file_operations;
++int zonefs_file_truncate(struct inode *inode, loff_t isize);
++
++/* In sysfs.c */
+ int zonefs_sysfs_register(struct super_block *sb);
+ void zonefs_sysfs_unregister(struct super_block *sb);
+ int zonefs_sysfs_init(void);
+-- 
+2.39.2
+
diff --git a/queue-6.1/zonefs-separate-zone-information-from-inode-informat.patch b/queue-6.1/zonefs-separate-zone-information-from-inode-informat.patch
new file mode 100644
index 00000000000..64a88d8b794
--- /dev/null
+++ b/queue-6.1/zonefs-separate-zone-information-from-inode-informat.patch
@@ -0,0 +1,1485 @@
+From 7fa0c6f6351e25a9e83feab49308a1b92daf841c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 16 Nov 2022 18:15:40 +0900
+Subject: zonefs: Separate zone information from inode information
+
+From: Damien Le Moal <damien.lemoal@opensource.wdc.com>
+
+[ Upstream commit aa7f243f32e1d18036ee00d71d3ccfad70ae2121 ]
+
+In preparation for adding dynamic inode allocation, separate an inode
+zone information from the zonefs inode structure. The new data structure
+zonefs_zone is introduced to store in memory information about a zone
+that must be kept throughout the lifetime of the device mount.
+
+Linking between a zone file inode and its zone information is done by
+setting the inode i_private field to point to a struct zonefs_zone.
+Using the i_private pointer avoids the need for adding a pointer in
+struct zonefs_inode_info. Beside the vfs inode, this structure is
+reduced to a mutex and a write open counter.
+
+One struct zonefs_zone is created per file inode on mount. These
+structures are organized in an array using the new struct
+zonefs_zone_group data structure to represent zone groups. The
+zonefs_zone arrays are indexed per file number (the index of a struct
+zonefs_zone in its array directly gives the file number/name for that
+zone file inode).
+
+Signed-off-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Stable-dep-of: 88b170088ad2 ("zonefs: Fix error message in zonefs_file_dio_append()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/zonefs/file.c   |  99 ++++----
+ fs/zonefs/super.c  | 571 +++++++++++++++++++++++++++------------------
+ fs/zonefs/trace.h  |  20 +-
+ fs/zonefs/zonefs.h |  63 +++--
+ 4 files changed, 449 insertions(+), 304 deletions(-)
+
+diff --git a/fs/zonefs/file.c b/fs/zonefs/file.c
+index 64873d31d75dd..738b0e28d74b5 100644
+--- a/fs/zonefs/file.c
++++ b/fs/zonefs/file.c
+@@ -29,6 +29,7 @@ static int zonefs_read_iomap_begin(struct inode *inode, loff_t offset,
+ 				   struct iomap *iomap, struct iomap *srcmap)
+ {
+ 	struct zonefs_inode_info *zi = ZONEFS_I(inode);
++	struct zonefs_zone *z = zonefs_inode_zone(inode);
+ 	struct super_block *sb = inode->i_sb;
+ 	loff_t isize;
+ 
+@@ -46,7 +47,7 @@ static int zonefs_read_iomap_begin(struct inode *inode, loff_t offset,
+ 		iomap->length = length;
+ 	} else {
+ 		iomap->type = IOMAP_MAPPED;
+-		iomap->addr = (zi->i_zsector << SECTOR_SHIFT) + iomap->offset;
++		iomap->addr = (z->z_sector << SECTOR_SHIFT) + iomap->offset;
+ 		iomap->length = isize - iomap->offset;
+ 	}
+ 	mutex_unlock(&zi->i_truncate_mutex);
+@@ -65,11 +66,12 @@ static int zonefs_write_iomap_begin(struct inode *inode, loff_t offset,
+ 				    struct iomap *iomap, struct iomap *srcmap)
+ {
+ 	struct zonefs_inode_info *zi = ZONEFS_I(inode);
++	struct zonefs_zone *z = zonefs_inode_zone(inode);
+ 	struct super_block *sb = inode->i_sb;
+ 	loff_t isize;
+ 
+ 	/* All write I/Os should always be within the file maximum size */
+-	if (WARN_ON_ONCE(offset + length > zi->i_max_size))
++	if (WARN_ON_ONCE(offset + length > z->z_capacity))
+ 		return -EIO;
+ 
+ 	/*
+@@ -77,7 +79,7 @@ static int zonefs_write_iomap_begin(struct inode *inode, loff_t offset,
+ 	 * checked when writes are issued, so warn if we see a page writeback
+ 	 * operation.
+ 	 */
+-	if (WARN_ON_ONCE(zonefs_zone_is_seq(zi) && !(flags & IOMAP_DIRECT)))
++	if (WARN_ON_ONCE(zonefs_zone_is_seq(z) && !(flags & IOMAP_DIRECT)))
+ 		return -EIO;
+ 
+ 	/*
+@@ -88,11 +90,11 @@ static int zonefs_write_iomap_begin(struct inode *inode, loff_t offset,
+ 	mutex_lock(&zi->i_truncate_mutex);
+ 	iomap->bdev = inode->i_sb->s_bdev;
+ 	iomap->offset = ALIGN_DOWN(offset, sb->s_blocksize);
+-	iomap->addr = (zi->i_zsector << SECTOR_SHIFT) + iomap->offset;
++	iomap->addr = (z->z_sector << SECTOR_SHIFT) + iomap->offset;
+ 	isize = i_size_read(inode);
+ 	if (iomap->offset >= isize) {
+ 		iomap->type = IOMAP_UNWRITTEN;
+-		iomap->length = zi->i_max_size - iomap->offset;
++		iomap->length = z->z_capacity - iomap->offset;
+ 	} else {
+ 		iomap->type = IOMAP_MAPPED;
+ 		iomap->length = isize - iomap->offset;
+@@ -125,9 +127,9 @@ static void zonefs_readahead(struct readahead_control *rac)
+ static int zonefs_write_map_blocks(struct iomap_writepage_ctx *wpc,
+ 				   struct inode *inode, loff_t offset)
+ {
+-	struct zonefs_inode_info *zi = ZONEFS_I(inode);
++	struct zonefs_zone *z = zonefs_inode_zone(inode);
+ 
+-	if (WARN_ON_ONCE(zonefs_zone_is_seq(zi)))
++	if (WARN_ON_ONCE(zonefs_zone_is_seq(z)))
+ 		return -EIO;
+ 	if (WARN_ON_ONCE(offset >= i_size_read(inode)))
+ 		return -EIO;
+@@ -137,7 +139,8 @@ static int zonefs_write_map_blocks(struct iomap_writepage_ctx *wpc,
+ 	    offset < wpc->iomap.offset + wpc->iomap.length)
+ 		return 0;
+ 
+-	return zonefs_write_iomap_begin(inode, offset, zi->i_max_size - offset,
++	return zonefs_write_iomap_begin(inode, offset,
++					z->z_capacity - offset,
+ 					IOMAP_WRITE, &wpc->iomap, NULL);
+ }
+ 
+@@ -185,6 +188,7 @@ const struct address_space_operations zonefs_file_aops = {
+ int zonefs_file_truncate(struct inode *inode, loff_t isize)
+ {
+ 	struct zonefs_inode_info *zi = ZONEFS_I(inode);
++	struct zonefs_zone *z = zonefs_inode_zone(inode);
+ 	loff_t old_isize;
+ 	enum req_op op;
+ 	int ret = 0;
+@@ -194,12 +198,12 @@ int zonefs_file_truncate(struct inode *inode, loff_t isize)
+ 	 * only down to a 0 size, which is equivalent to a zone reset, and to
+ 	 * the maximum file size, which is equivalent to a zone finish.
+ 	 */
+-	if (!zonefs_zone_is_seq(zi))
++	if (!zonefs_zone_is_seq(z))
+ 		return -EPERM;
+ 
+ 	if (!isize)
+ 		op = REQ_OP_ZONE_RESET;
+-	else if (isize == zi->i_max_size)
++	else if (isize == z->z_capacity)
+ 		op = REQ_OP_ZONE_FINISH;
+ 	else
+ 		return -EPERM;
+@@ -216,7 +220,7 @@ int zonefs_file_truncate(struct inode *inode, loff_t isize)
+ 	if (isize == old_isize)
+ 		goto unlock;
+ 
+-	ret = zonefs_zone_mgmt(inode, op);
++	ret = zonefs_inode_zone_mgmt(inode, op);
+ 	if (ret)
+ 		goto unlock;
+ 
+@@ -224,7 +228,7 @@ int zonefs_file_truncate(struct inode *inode, loff_t isize)
+ 	 * If the mount option ZONEFS_MNTOPT_EXPLICIT_OPEN is set,
+ 	 * take care of open zones.
+ 	 */
+-	if (zi->i_flags & ZONEFS_ZONE_OPEN) {
++	if (z->z_flags & ZONEFS_ZONE_OPEN) {
+ 		/*
+ 		 * Truncating a zone to EMPTY or FULL is the equivalent of
+ 		 * closing the zone. For a truncation to 0, we need to
+@@ -234,15 +238,15 @@ int zonefs_file_truncate(struct inode *inode, loff_t isize)
+ 		 * the open flag.
+ 		 */
+ 		if (!isize)
+-			ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_OPEN);
++			ret = zonefs_inode_zone_mgmt(inode, REQ_OP_ZONE_OPEN);
+ 		else
+-			zi->i_flags &= ~ZONEFS_ZONE_OPEN;
++			z->z_flags &= ~ZONEFS_ZONE_OPEN;
+ 	}
+ 
+ 	zonefs_update_stats(inode, isize);
+ 	truncate_setsize(inode, isize);
+-	zi->i_wpoffset = isize;
+-	zonefs_account_active(inode);
++	z->z_wpoffset = isize;
++	zonefs_inode_account_active(inode);
+ 
+ unlock:
+ 	mutex_unlock(&zi->i_truncate_mutex);
+@@ -349,7 +353,7 @@ static int zonefs_file_write_dio_end_io(struct kiocb *iocb, ssize_t size,
+ 		return error;
+ 	}
+ 
+-	if (size && zonefs_zone_is_seq(zi)) {
++	if (size && zonefs_inode_is_seq(inode)) {
+ 		/*
+ 		 * Note that we may be seeing completions out of order,
+ 		 * but that is not a problem since a write completed
+@@ -375,7 +379,7 @@ static const struct iomap_dio_ops zonefs_write_dio_ops = {
+ static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from)
+ {
+ 	struct inode *inode = file_inode(iocb->ki_filp);
+-	struct zonefs_inode_info *zi = ZONEFS_I(inode);
++	struct zonefs_zone *z = zonefs_inode_zone(inode);
+ 	struct block_device *bdev = inode->i_sb->s_bdev;
+ 	unsigned int max = bdev_max_zone_append_sectors(bdev);
+ 	struct bio *bio;
+@@ -392,7 +396,7 @@ static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from)
+ 
+ 	bio = bio_alloc(bdev, nr_pages,
+ 			REQ_OP_ZONE_APPEND | REQ_SYNC | REQ_IDLE, GFP_NOFS);
+-	bio->bi_iter.bi_sector = zi->i_zsector;
++	bio->bi_iter.bi_sector = z->z_sector;
+ 	bio->bi_ioprio = iocb->ki_ioprio;
+ 	if (iocb_is_dsync(iocb))
+ 		bio->bi_opf |= REQ_FUA;
+@@ -417,12 +421,12 @@ static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from)
+ 	 */
+ 	if (!ret) {
+ 		sector_t wpsector =
+-			zi->i_zsector + (zi->i_wpoffset >> SECTOR_SHIFT);
++			z->z_sector + (z->z_wpoffset >> SECTOR_SHIFT);
+ 
+ 		if (bio->bi_iter.bi_sector != wpsector) {
+ 			zonefs_warn(inode->i_sb,
+ 				"Corrupted write pointer %llu for zone at %llu\n",
+-				wpsector, zi->i_zsector);
++				wpsector, z->z_sector);
+ 			ret = -EIO;
+ 		}
+ 	}
+@@ -450,9 +454,9 @@ static loff_t zonefs_write_check_limits(struct file *file, loff_t pos,
+ 					loff_t count)
+ {
+ 	struct inode *inode = file_inode(file);
+-	struct zonefs_inode_info *zi = ZONEFS_I(inode);
++	struct zonefs_zone *z = zonefs_inode_zone(inode);
+ 	loff_t limit = rlimit(RLIMIT_FSIZE);
+-	loff_t max_size = zi->i_max_size;
++	loff_t max_size = z->z_capacity;
+ 
+ 	if (limit != RLIM_INFINITY) {
+ 		if (pos >= limit) {
+@@ -476,6 +480,7 @@ static ssize_t zonefs_write_checks(struct kiocb *iocb, struct iov_iter *from)
+ 	struct file *file = iocb->ki_filp;
+ 	struct inode *inode = file_inode(file);
+ 	struct zonefs_inode_info *zi = ZONEFS_I(inode);
++	struct zonefs_zone *z = zonefs_inode_zone(inode);
+ 	loff_t count;
+ 
+ 	if (IS_SWAPFILE(inode))
+@@ -488,10 +493,10 @@ static ssize_t zonefs_write_checks(struct kiocb *iocb, struct iov_iter *from)
+ 		return -EINVAL;
+ 
+ 	if (iocb->ki_flags & IOCB_APPEND) {
+-		if (zonefs_zone_is_cnv(zi))
++		if (zonefs_zone_is_cnv(z))
+ 			return -EINVAL;
+ 		mutex_lock(&zi->i_truncate_mutex);
+-		iocb->ki_pos = zi->i_wpoffset;
++		iocb->ki_pos = z->z_wpoffset;
+ 		mutex_unlock(&zi->i_truncate_mutex);
+ 	}
+ 
+@@ -518,6 +523,7 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
+ {
+ 	struct inode *inode = file_inode(iocb->ki_filp);
+ 	struct zonefs_inode_info *zi = ZONEFS_I(inode);
++	struct zonefs_zone *z = zonefs_inode_zone(inode);
+ 	struct super_block *sb = inode->i_sb;
+ 	bool sync = is_sync_kiocb(iocb);
+ 	bool append = false;
+@@ -528,7 +534,7 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
+ 	 * as this can cause write reordering (e.g. the first aio gets EAGAIN
+ 	 * on the inode lock but the second goes through but is now unaligned).
+ 	 */
+-	if (zonefs_zone_is_seq(zi) && !sync && (iocb->ki_flags & IOCB_NOWAIT))
++	if (zonefs_zone_is_seq(z) && !sync && (iocb->ki_flags & IOCB_NOWAIT))
+ 		return -EOPNOTSUPP;
+ 
+ 	if (iocb->ki_flags & IOCB_NOWAIT) {
+@@ -550,9 +556,9 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
+ 	}
+ 
+ 	/* Enforce sequential writes (append only) in sequential zones */
+-	if (zonefs_zone_is_seq(zi)) {
++	if (zonefs_zone_is_seq(z)) {
+ 		mutex_lock(&zi->i_truncate_mutex);
+-		if (iocb->ki_pos != zi->i_wpoffset) {
++		if (iocb->ki_pos != z->z_wpoffset) {
+ 			mutex_unlock(&zi->i_truncate_mutex);
+ 			ret = -EINVAL;
+ 			goto inode_unlock;
+@@ -566,7 +572,7 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
+ 	else
+ 		ret = iomap_dio_rw(iocb, from, &zonefs_write_iomap_ops,
+ 				   &zonefs_write_dio_ops, 0, NULL, 0);
+-	if (zonefs_zone_is_seq(zi) &&
++	if (zonefs_zone_is_seq(z) &&
+ 	    (ret > 0 || ret == -EIOCBQUEUED)) {
+ 		if (ret > 0)
+ 			count = ret;
+@@ -577,8 +583,8 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
+ 		 * will correct it. Also do active seq file accounting.
+ 		 */
+ 		mutex_lock(&zi->i_truncate_mutex);
+-		zi->i_wpoffset += count;
+-		zonefs_account_active(inode);
++		z->z_wpoffset += count;
++		zonefs_inode_account_active(inode);
+ 		mutex_unlock(&zi->i_truncate_mutex);
+ 	}
+ 
+@@ -629,6 +635,7 @@ static ssize_t zonefs_file_buffered_write(struct kiocb *iocb,
+ static ssize_t zonefs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
+ {
+ 	struct inode *inode = file_inode(iocb->ki_filp);
++	struct zonefs_zone *z = zonefs_inode_zone(inode);
+ 
+ 	if (unlikely(IS_IMMUTABLE(inode)))
+ 		return -EPERM;
+@@ -636,8 +643,8 @@ static ssize_t zonefs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
+ 	if (sb_rdonly(inode->i_sb))
+ 		return -EROFS;
+ 
+-	/* Write operations beyond the zone size are not allowed */
+-	if (iocb->ki_pos >= ZONEFS_I(inode)->i_max_size)
++	/* Write operations beyond the zone capacity are not allowed */
++	if (iocb->ki_pos >= z->z_capacity)
+ 		return -EFBIG;
+ 
+ 	if (iocb->ki_flags & IOCB_DIRECT) {
+@@ -669,6 +676,7 @@ static ssize_t zonefs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
+ {
+ 	struct inode *inode = file_inode(iocb->ki_filp);
+ 	struct zonefs_inode_info *zi = ZONEFS_I(inode);
++	struct zonefs_zone *z = zonefs_inode_zone(inode);
+ 	struct super_block *sb = inode->i_sb;
+ 	loff_t isize;
+ 	ssize_t ret;
+@@ -677,7 +685,7 @@ static ssize_t zonefs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
+ 	if (unlikely(IS_IMMUTABLE(inode) && !(inode->i_mode & 0777)))
+ 		return -EPERM;
+ 
+-	if (iocb->ki_pos >= zi->i_max_size)
++	if (iocb->ki_pos >= z->z_capacity)
+ 		return 0;
+ 
+ 	if (iocb->ki_flags & IOCB_NOWAIT) {
+@@ -738,6 +746,7 @@ static inline bool zonefs_seq_file_need_wro(struct inode *inode,
+ static int zonefs_seq_file_write_open(struct inode *inode)
+ {
+ 	struct zonefs_inode_info *zi = ZONEFS_I(inode);
++	struct zonefs_zone *z = zonefs_inode_zone(inode);
+ 	int ret = 0;
+ 
+ 	mutex_lock(&zi->i_truncate_mutex);
+@@ -755,14 +764,15 @@ static int zonefs_seq_file_write_open(struct inode *inode)
+ 				goto unlock;
+ 			}
+ 
+-			if (i_size_read(inode) < zi->i_max_size) {
+-				ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_OPEN);
++			if (i_size_read(inode) < z->z_capacity) {
++				ret = zonefs_inode_zone_mgmt(inode,
++							     REQ_OP_ZONE_OPEN);
+ 				if (ret) {
+ 					atomic_dec(&sbi->s_wro_seq_files);
+ 					goto unlock;
+ 				}
+-				zi->i_flags |= ZONEFS_ZONE_OPEN;
+-				zonefs_account_active(inode);
++				z->z_flags |= ZONEFS_ZONE_OPEN;
++				zonefs_inode_account_active(inode);
+ 			}
+ 		}
+ 	}
+@@ -792,6 +802,7 @@ static int zonefs_file_open(struct inode *inode, struct file *file)
+ static void zonefs_seq_file_write_close(struct inode *inode)
+ {
+ 	struct zonefs_inode_info *zi = ZONEFS_I(inode);
++	struct zonefs_zone *z = zonefs_inode_zone(inode);
+ 	struct super_block *sb = inode->i_sb;
+ 	struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
+ 	int ret = 0;
+@@ -807,8 +818,8 @@ static void zonefs_seq_file_write_close(struct inode *inode)
+ 	 * its maximum size or it was fully written). For this case, we only
+ 	 * need to decrement the write open count.
+ 	 */
+-	if (zi->i_flags & ZONEFS_ZONE_OPEN) {
+-		ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_CLOSE);
++	if (z->z_flags & ZONEFS_ZONE_OPEN) {
++		ret = zonefs_inode_zone_mgmt(inode, REQ_OP_ZONE_CLOSE);
+ 		if (ret) {
+ 			__zonefs_io_error(inode, false);
+ 			/*
+@@ -817,11 +828,11 @@ static void zonefs_seq_file_write_close(struct inode *inode)
+ 			 * exhausted). So take preventive action by remounting
+ 			 * read-only.
+ 			 */
+-			if (zi->i_flags & ZONEFS_ZONE_OPEN &&
++			if (z->z_flags & ZONEFS_ZONE_OPEN &&
+ 			    !(sb->s_flags & SB_RDONLY)) {
+ 				zonefs_warn(sb,
+ 					"closing zone at %llu failed %d\n",
+-					zi->i_zsector, ret);
++					z->z_sector, ret);
+ 				zonefs_warn(sb,
+ 					"remounting filesystem read-only\n");
+ 				sb->s_flags |= SB_RDONLY;
+@@ -829,8 +840,8 @@ static void zonefs_seq_file_write_close(struct inode *inode)
+ 			goto unlock;
+ 		}
+ 
+-		zi->i_flags &= ~ZONEFS_ZONE_OPEN;
+-		zonefs_account_active(inode);
++		z->z_flags &= ~ZONEFS_ZONE_OPEN;
++		zonefs_inode_account_active(inode);
+ 	}
+ 
+ 	atomic_dec(&sbi->s_wro_seq_files);
+diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
+index a4af29dc32e7d..270ded209dde5 100644
+--- a/fs/zonefs/super.c
++++ b/fs/zonefs/super.c
+@@ -28,33 +28,47 @@
+ #include "trace.h"
+ 
+ /*
+- * Manage the active zone count. Called with zi->i_truncate_mutex held.
++ * Get the name of a zone group directory.
+  */
+-void zonefs_account_active(struct inode *inode)
++static const char *zonefs_zgroup_name(enum zonefs_ztype ztype)
+ {
+-	struct zonefs_sb_info *sbi = ZONEFS_SB(inode->i_sb);
+-	struct zonefs_inode_info *zi = ZONEFS_I(inode);
++	switch (ztype) {
++	case ZONEFS_ZTYPE_CNV:
++		return "cnv";
++	case ZONEFS_ZTYPE_SEQ:
++		return "seq";
++	default:
++		WARN_ON_ONCE(1);
++		return "???";
++	}
++}
+ 
+-	lockdep_assert_held(&zi->i_truncate_mutex);
++/*
++ * Manage the active zone count.
++ */
++static void zonefs_account_active(struct super_block *sb,
++				  struct zonefs_zone *z)
++{
++	struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
+ 
+-	if (zonefs_zone_is_cnv(zi))
++	if (zonefs_zone_is_cnv(z))
+ 		return;
+ 
+ 	/*
+ 	 * For zones that transitioned to the offline or readonly condition,
+ 	 * we only need to clear the active state.
+ 	 */
+-	if (zi->i_flags & (ZONEFS_ZONE_OFFLINE | ZONEFS_ZONE_READONLY))
++	if (z->z_flags & (ZONEFS_ZONE_OFFLINE | ZONEFS_ZONE_READONLY))
+ 		goto out;
+ 
+ 	/*
+ 	 * If the zone is active, that is, if it is explicitly open or
+ 	 * partially written, check if it was already accounted as active.
+ 	 */
+-	if ((zi->i_flags & ZONEFS_ZONE_OPEN) ||
+-	    (zi->i_wpoffset > 0 && zi->i_wpoffset < zi->i_max_size)) {
+-		if (!(zi->i_flags & ZONEFS_ZONE_ACTIVE)) {
+-			zi->i_flags |= ZONEFS_ZONE_ACTIVE;
++	if ((z->z_flags & ZONEFS_ZONE_OPEN) ||
++	    (z->z_wpoffset > 0 && z->z_wpoffset < z->z_capacity)) {
++		if (!(z->z_flags & ZONEFS_ZONE_ACTIVE)) {
++			z->z_flags |= ZONEFS_ZONE_ACTIVE;
+ 			atomic_inc(&sbi->s_active_seq_files);
+ 		}
+ 		return;
+@@ -62,18 +76,29 @@ void zonefs_account_active(struct inode *inode)
+ 
+ out:
+ 	/* The zone is not active. If it was, update the active count */
+-	if (zi->i_flags & ZONEFS_ZONE_ACTIVE) {
+-		zi->i_flags &= ~ZONEFS_ZONE_ACTIVE;
++	if (z->z_flags & ZONEFS_ZONE_ACTIVE) {
++		z->z_flags &= ~ZONEFS_ZONE_ACTIVE;
+ 		atomic_dec(&sbi->s_active_seq_files);
+ 	}
+ }
+ 
+-int zonefs_zone_mgmt(struct inode *inode, enum req_op op)
++/*
++ * Manage the active zone count. Called with zi->i_truncate_mutex held.
++ */
++void zonefs_inode_account_active(struct inode *inode)
+ {
+-	struct zonefs_inode_info *zi = ZONEFS_I(inode);
+-	int ret;
++	lockdep_assert_held(&ZONEFS_I(inode)->i_truncate_mutex);
+ 
+-	lockdep_assert_held(&zi->i_truncate_mutex);
++	return zonefs_account_active(inode->i_sb, zonefs_inode_zone(inode));
++}
++
++/*
++ * Execute a zone management operation.
++ */
++static int zonefs_zone_mgmt(struct super_block *sb,
++			    struct zonefs_zone *z, enum req_op op)
++{
++	int ret;
+ 
+ 	/*
+ 	 * With ZNS drives, closing an explicitly open zone that has not been
+@@ -83,37 +108,45 @@ int zonefs_zone_mgmt(struct inode *inode, enum req_op op)
+ 	 * are exceeded, make sure that the zone does not remain active by
+ 	 * resetting it.
+ 	 */
+-	if (op == REQ_OP_ZONE_CLOSE && !zi->i_wpoffset)
++	if (op == REQ_OP_ZONE_CLOSE && !z->z_wpoffset)
+ 		op = REQ_OP_ZONE_RESET;
+ 
+-	trace_zonefs_zone_mgmt(inode, op);
+-	ret = blkdev_zone_mgmt(inode->i_sb->s_bdev, op, zi->i_zsector,
+-			       zi->i_zone_size >> SECTOR_SHIFT, GFP_NOFS);
++	trace_zonefs_zone_mgmt(sb, z, op);
++	ret = blkdev_zone_mgmt(sb->s_bdev, op, z->z_sector,
++			       z->z_size >> SECTOR_SHIFT, GFP_NOFS);
+ 	if (ret) {
+-		zonefs_err(inode->i_sb,
++		zonefs_err(sb,
+ 			   "Zone management operation %s at %llu failed %d\n",
+-			   blk_op_str(op), zi->i_zsector, ret);
++			   blk_op_str(op), z->z_sector, ret);
+ 		return ret;
+ 	}
+ 
+ 	return 0;
+ }
+ 
++int zonefs_inode_zone_mgmt(struct inode *inode, enum req_op op)
++{
++	lockdep_assert_held(&ZONEFS_I(inode)->i_truncate_mutex);
++
++	return zonefs_zone_mgmt(inode->i_sb, zonefs_inode_zone(inode), op);
++}
++
+ void zonefs_i_size_write(struct inode *inode, loff_t isize)
+ {
+-	struct zonefs_inode_info *zi = ZONEFS_I(inode);
++	struct zonefs_zone *z = zonefs_inode_zone(inode);
+ 
+ 	i_size_write(inode, isize);
++
+ 	/*
+ 	 * A full zone is no longer open/active and does not need
+ 	 * explicit closing.
+ 	 */
+-	if (isize >= zi->i_max_size) {
++	if (isize >= z->z_capacity) {
+ 		struct zonefs_sb_info *sbi = ZONEFS_SB(inode->i_sb);
+ 
+-		if (zi->i_flags & ZONEFS_ZONE_ACTIVE)
++		if (z->z_flags & ZONEFS_ZONE_ACTIVE)
+ 			atomic_dec(&sbi->s_active_seq_files);
+-		zi->i_flags &= ~(ZONEFS_ZONE_OPEN | ZONEFS_ZONE_ACTIVE);
++		z->z_flags &= ~(ZONEFS_ZONE_OPEN | ZONEFS_ZONE_ACTIVE);
+ 	}
+ }
+ 
+@@ -150,20 +183,18 @@ void zonefs_update_stats(struct inode *inode, loff_t new_isize)
+ }
+ 
+ /*
+- * Check a zone condition and adjust its file inode access permissions for
+- * offline and readonly zones. Return the inode size corresponding to the
+- * amount of readable data in the zone.
++ * Check a zone condition. Return the amount of written (and still readable)
++ * data in the zone.
+  */
+-static loff_t zonefs_check_zone_condition(struct inode *inode,
++static loff_t zonefs_check_zone_condition(struct super_block *sb,
++					  struct zonefs_zone *z,
+ 					  struct blk_zone *zone)
+ {
+-	struct zonefs_inode_info *zi = ZONEFS_I(inode);
+-
+ 	switch (zone->cond) {
+ 	case BLK_ZONE_COND_OFFLINE:
+-		zonefs_warn(inode->i_sb, "inode %lu: offline zone\n",
+-			    inode->i_ino);
+-		zi->i_flags |= ZONEFS_ZONE_OFFLINE;
++		zonefs_warn(sb, "Zone %llu: offline zone\n",
++			    z->z_sector);
++		z->z_flags |= ZONEFS_ZONE_OFFLINE;
+ 		return 0;
+ 	case BLK_ZONE_COND_READONLY:
+ 		/*
+@@ -174,18 +205,18 @@ static loff_t zonefs_check_zone_condition(struct inode *inode,
+ 		 * the inode size as it was when last updated so that the user
+ 		 * can recover data.
+ 		 */
+-		zonefs_warn(inode->i_sb, "inode %lu: read-only zone\n",
+-			    inode->i_ino);
+-		zi->i_flags |= ZONEFS_ZONE_READONLY;
+-		if (zonefs_zone_is_cnv(zi))
+-			return zi->i_max_size;
+-		return zi->i_wpoffset;
++		zonefs_warn(sb, "Zone %llu: read-only zone\n",
++			    z->z_sector);
++		z->z_flags |= ZONEFS_ZONE_READONLY;
++		if (zonefs_zone_is_cnv(z))
++			return z->z_capacity;
++		return z->z_wpoffset;
+ 	case BLK_ZONE_COND_FULL:
+ 		/* The write pointer of full zones is invalid. */
+-		return zi->i_max_size;
++		return z->z_capacity;
+ 	default:
+-		if (zonefs_zone_is_cnv(zi))
+-			return zi->i_max_size;
++		if (zonefs_zone_is_cnv(z))
++			return z->z_capacity;
+ 		return (zone->wp - zone->start) << SECTOR_SHIFT;
+ 	}
+ }
+@@ -196,22 +227,22 @@ static loff_t zonefs_check_zone_condition(struct inode *inode,
+  */
+ static void zonefs_inode_update_mode(struct inode *inode)
+ {
+-	struct zonefs_inode_info *zi = ZONEFS_I(inode);
++	struct zonefs_zone *z = zonefs_inode_zone(inode);
+ 
+-	if (zi->i_flags & ZONEFS_ZONE_OFFLINE) {
++	if (z->z_flags & ZONEFS_ZONE_OFFLINE) {
+ 		/* Offline zones cannot be read nor written */
+ 		inode->i_flags |= S_IMMUTABLE;
+ 		inode->i_mode &= ~0777;
+-	} else if (zi->i_flags & ZONEFS_ZONE_READONLY) {
++	} else if (z->z_flags & ZONEFS_ZONE_READONLY) {
+ 		/* Readonly zones cannot be written */
+ 		inode->i_flags |= S_IMMUTABLE;
+-		if (zi->i_flags & ZONEFS_ZONE_INIT_MODE)
++		if (z->z_flags & ZONEFS_ZONE_INIT_MODE)
+ 			inode->i_mode &= ~0777;
+ 		else
+ 			inode->i_mode &= ~0222;
+ 	}
+ 
+-	zi->i_flags &= ~ZONEFS_ZONE_INIT_MODE;
++	z->z_flags &= ~ZONEFS_ZONE_INIT_MODE;
+ }
+ 
+ struct zonefs_ioerr_data {
+@@ -224,7 +255,7 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
+ {
+ 	struct zonefs_ioerr_data *err = data;
+ 	struct inode *inode = err->inode;
+-	struct zonefs_inode_info *zi = ZONEFS_I(inode);
++	struct zonefs_zone *z = zonefs_inode_zone(inode);
+ 	struct super_block *sb = inode->i_sb;
+ 	struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
+ 	loff_t isize, data_size;
+@@ -235,9 +266,9 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
+ 	 * as there is no inconsistency between the inode size and the amount of
+ 	 * data writen in the zone (data_size).
+ 	 */
+-	data_size = zonefs_check_zone_condition(inode, zone);
++	data_size = zonefs_check_zone_condition(sb, z, zone);
+ 	isize = i_size_read(inode);
+-	if (!(zi->i_flags & (ZONEFS_ZONE_READONLY | ZONEFS_ZONE_OFFLINE)) &&
++	if (!(z->z_flags & (ZONEFS_ZONE_READONLY | ZONEFS_ZONE_OFFLINE)) &&
+ 	    !err->write && isize == data_size)
+ 		return 0;
+ 
+@@ -260,8 +291,9 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
+ 	 * In all cases, warn about inode size inconsistency and handle the
+ 	 * IO error according to the zone condition and to the mount options.
+ 	 */
+-	if (zonefs_zone_is_seq(zi) && isize != data_size)
+-		zonefs_warn(sb, "inode %lu: invalid size %lld (should be %lld)\n",
++	if (zonefs_zone_is_seq(z) && isize != data_size)
++		zonefs_warn(sb,
++			    "inode %lu: invalid size %lld (should be %lld)\n",
+ 			    inode->i_ino, isize, data_size);
+ 
+ 	/*
+@@ -270,20 +302,20 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
+ 	 * zone condition to read-only and offline respectively, as if the
+ 	 * condition was signaled by the hardware.
+ 	 */
+-	if ((zi->i_flags & ZONEFS_ZONE_OFFLINE) ||
++	if ((z->z_flags & ZONEFS_ZONE_OFFLINE) ||
+ 	    (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZOL)) {
+ 		zonefs_warn(sb, "inode %lu: read/write access disabled\n",
+ 			    inode->i_ino);
+-		if (!(zi->i_flags & ZONEFS_ZONE_OFFLINE))
+-			zi->i_flags |= ZONEFS_ZONE_OFFLINE;
++		if (!(z->z_flags & ZONEFS_ZONE_OFFLINE))
++			z->z_flags |= ZONEFS_ZONE_OFFLINE;
+ 		zonefs_inode_update_mode(inode);
+ 		data_size = 0;
+-	} else if ((zi->i_flags & ZONEFS_ZONE_READONLY) ||
++	} else if ((z->z_flags & ZONEFS_ZONE_READONLY) ||
+ 		   (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZRO)) {
+ 		zonefs_warn(sb, "inode %lu: write access disabled\n",
+ 			    inode->i_ino);
+-		if (!(zi->i_flags & ZONEFS_ZONE_READONLY))
+-			zi->i_flags |= ZONEFS_ZONE_READONLY;
++		if (!(z->z_flags & ZONEFS_ZONE_READONLY))
++			z->z_flags |= ZONEFS_ZONE_READONLY;
+ 		zonefs_inode_update_mode(inode);
+ 		data_size = isize;
+ 	} else if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_RO &&
+@@ -299,8 +331,8 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
+ 	 * close of the zone when the inode file is closed.
+ 	 */
+ 	if ((sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN) &&
+-	    (zi->i_flags & (ZONEFS_ZONE_READONLY | ZONEFS_ZONE_OFFLINE)))
+-		zi->i_flags &= ~ZONEFS_ZONE_OPEN;
++	    (z->z_flags & (ZONEFS_ZONE_READONLY | ZONEFS_ZONE_OFFLINE)))
++		z->z_flags &= ~ZONEFS_ZONE_OPEN;
+ 
+ 	/*
+ 	 * If error=remount-ro was specified, any error result in remounting
+@@ -317,8 +349,8 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
+ 	 */
+ 	zonefs_update_stats(inode, data_size);
+ 	zonefs_i_size_write(inode, data_size);
+-	zi->i_wpoffset = data_size;
+-	zonefs_account_active(inode);
++	z->z_wpoffset = data_size;
++	zonefs_inode_account_active(inode);
+ 
+ 	return 0;
+ }
+@@ -332,7 +364,7 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
+  */
+ void __zonefs_io_error(struct inode *inode, bool write)
+ {
+-	struct zonefs_inode_info *zi = ZONEFS_I(inode);
++	struct zonefs_zone *z = zonefs_inode_zone(inode);
+ 	struct super_block *sb = inode->i_sb;
+ 	struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
+ 	unsigned int noio_flag;
+@@ -348,8 +380,8 @@ void __zonefs_io_error(struct inode *inode, bool write)
+ 	 * files with aggregated conventional zones, for which the inode zone
+ 	 * size is always larger than the device zone size.
+ 	 */
+-	if (zi->i_zone_size > bdev_zone_sectors(sb->s_bdev))
+-		nr_zones = zi->i_zone_size >>
++	if (z->z_size > bdev_zone_sectors(sb->s_bdev))
++		nr_zones = z->z_size >>
+ 			(sbi->s_zone_sectors_shift + SECTOR_SHIFT);
+ 
+ 	/*
+@@ -361,7 +393,7 @@ void __zonefs_io_error(struct inode *inode, bool write)
+ 	 * the GFP_NOIO context avoids both problems.
+ 	 */
+ 	noio_flag = memalloc_noio_save();
+-	ret = blkdev_report_zones(sb->s_bdev, zi->i_zsector, nr_zones,
++	ret = blkdev_report_zones(sb->s_bdev, z->z_sector, nr_zones,
+ 				  zonefs_io_error_cb, &err);
+ 	if (ret != nr_zones)
+ 		zonefs_err(sb, "Get inode %lu zone information failed %d\n",
+@@ -381,9 +413,7 @@ static struct inode *zonefs_alloc_inode(struct super_block *sb)
+ 
+ 	inode_init_once(&zi->i_vnode);
+ 	mutex_init(&zi->i_truncate_mutex);
+-	zi->i_wpoffset = 0;
+ 	zi->i_wr_refcnt = 0;
+-	zi->i_flags = 0;
+ 
+ 	return &zi->i_vnode;
+ }
+@@ -416,8 +446,8 @@ static int zonefs_statfs(struct dentry *dentry, struct kstatfs *buf)
+ 	buf->f_bavail = buf->f_bfree;
+ 
+ 	for (t = 0; t < ZONEFS_ZTYPE_MAX; t++) {
+-		if (sbi->s_nr_files[t])
+-			buf->f_files += sbi->s_nr_files[t] + 1;
++		if (sbi->s_zgroup[t].g_nr_zones)
++			buf->f_files += sbi->s_zgroup[t].g_nr_zones + 1;
+ 	}
+ 	buf->f_ffree = 0;
+ 
+@@ -557,11 +587,11 @@ static const struct inode_operations zonefs_dir_inode_operations = {
+ };
+ 
+ static void zonefs_init_dir_inode(struct inode *parent, struct inode *inode,
+-				  enum zonefs_ztype type)
++				  enum zonefs_ztype ztype)
+ {
+ 	struct super_block *sb = parent->i_sb;
+ 
+-	inode->i_ino = bdev_nr_zones(sb->s_bdev) + type + 1;
++	inode->i_ino = bdev_nr_zones(sb->s_bdev) + ztype + 1;
+ 	inode_init_owner(&init_user_ns, inode, parent, S_IFDIR | 0555);
+ 	inode->i_op = &zonefs_dir_inode_operations;
+ 	inode->i_fop = &simple_dir_operations;
+@@ -573,79 +603,34 @@ static const struct inode_operations zonefs_file_inode_operations = {
+ 	.setattr	= zonefs_inode_setattr,
+ };
+ 
+-static int zonefs_init_file_inode(struct inode *inode, struct blk_zone *zone,
+-				  enum zonefs_ztype type)
++static void zonefs_init_file_inode(struct inode *inode,
++				   struct zonefs_zone *z)
+ {
+ 	struct super_block *sb = inode->i_sb;
+ 	struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
+-	struct zonefs_inode_info *zi = ZONEFS_I(inode);
+-	int ret = 0;
+-
+-	inode->i_ino = zone->start >> sbi->s_zone_sectors_shift;
+-	inode->i_mode = S_IFREG | sbi->s_perm;
+ 
+-	if (type == ZONEFS_ZTYPE_CNV)
+-		zi->i_flags |= ZONEFS_ZONE_CNV;
+-
+-	zi->i_zsector = zone->start;
+-	zi->i_zone_size = zone->len << SECTOR_SHIFT;
+-	if (zi->i_zone_size > bdev_zone_sectors(sb->s_bdev) << SECTOR_SHIFT &&
+-	    !(sbi->s_features & ZONEFS_F_AGGRCNV)) {
+-		zonefs_err(sb,
+-			   "zone size %llu doesn't match device's zone sectors %llu\n",
+-			   zi->i_zone_size,
+-			   bdev_zone_sectors(sb->s_bdev) << SECTOR_SHIFT);
+-		return -EINVAL;
+-	}
+-
+-	zi->i_max_size = min_t(loff_t, MAX_LFS_FILESIZE,
+-			       zone->capacity << SECTOR_SHIFT);
+-	zi->i_wpoffset = zonefs_check_zone_condition(inode, zone);
++	inode->i_private = z;
+ 
++	inode->i_ino = z->z_sector >> sbi->s_zone_sectors_shift;
++	inode->i_mode = S_IFREG | sbi->s_perm;
+ 	inode->i_uid = sbi->s_uid;
+ 	inode->i_gid = sbi->s_gid;
+-	inode->i_size = zi->i_wpoffset;
+-	inode->i_blocks = zi->i_max_size >> SECTOR_SHIFT;
++	inode->i_size = z->z_wpoffset;
++	inode->i_blocks = z->z_capacity >> SECTOR_SHIFT;
+ 
+ 	inode->i_op = &zonefs_file_inode_operations;
+ 	inode->i_fop = &zonefs_file_operations;
+ 	inode->i_mapping->a_ops = &zonefs_file_aops;
+ 
+ 	/* Update the inode access rights depending on the zone condition */
+-	zi->i_flags |= ZONEFS_ZONE_INIT_MODE;
++	z->z_flags |= ZONEFS_ZONE_INIT_MODE;
+ 	zonefs_inode_update_mode(inode);
+-
+-	sb->s_maxbytes = max(zi->i_max_size, sb->s_maxbytes);
+-	sbi->s_blocks += zi->i_max_size >> sb->s_blocksize_bits;
+-	sbi->s_used_blocks += zi->i_wpoffset >> sb->s_blocksize_bits;
+-
+-	mutex_lock(&zi->i_truncate_mutex);
+-
+-	/*
+-	 * For sequential zones, make sure that any open zone is closed first
+-	 * to ensure that the initial number of open zones is 0, in sync with
+-	 * the open zone accounting done when the mount option
+-	 * ZONEFS_MNTOPT_EXPLICIT_OPEN is used.
+-	 */
+-	if (type == ZONEFS_ZTYPE_SEQ &&
+-	    (zone->cond == BLK_ZONE_COND_IMP_OPEN ||
+-	     zone->cond == BLK_ZONE_COND_EXP_OPEN)) {
+-		ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_CLOSE);
+-		if (ret)
+-			goto unlock;
+-	}
+-
+-	zonefs_account_active(inode);
+-
+-unlock:
+-	mutex_unlock(&zi->i_truncate_mutex);
+-
+-	return ret;
+ }
+ 
+ static struct dentry *zonefs_create_inode(struct dentry *parent,
+-					const char *name, struct blk_zone *zone,
+-					enum zonefs_ztype type)
++					  const char *name,
++					  struct zonefs_zone *z,
++					  enum zonefs_ztype ztype)
+ {
+ 	struct inode *dir = d_inode(parent);
+ 	struct dentry *dentry;
+@@ -661,15 +646,10 @@ static struct dentry *zonefs_create_inode(struct dentry *parent,
+ 		goto dput;
+ 
+ 	inode->i_ctime = inode->i_mtime = inode->i_atime = dir->i_ctime;
+-	if (zone) {
+-		ret = zonefs_init_file_inode(inode, zone, type);
+-		if (ret) {
+-			iput(inode);
+-			goto dput;
+-		}
+-	} else {
+-		zonefs_init_dir_inode(dir, inode, type);
+-	}
++	if (z)
++		zonefs_init_file_inode(inode, z);
++	else
++		zonefs_init_dir_inode(dir, inode, ztype);
+ 
+ 	d_add(dentry, inode);
+ 	dir->i_size++;
+@@ -685,100 +665,51 @@ static struct dentry *zonefs_create_inode(struct dentry *parent,
+ struct zonefs_zone_data {
+ 	struct super_block	*sb;
+ 	unsigned int		nr_zones[ZONEFS_ZTYPE_MAX];
++	sector_t		cnv_zone_start;
+ 	struct blk_zone		*zones;
+ };
+ 
+ /*
+- * Create a zone group and populate it with zone files.
++ * Create the inodes for a zone group.
+  */
+-static int zonefs_create_zgroup(struct zonefs_zone_data *zd,
+-				enum zonefs_ztype type)
++static int zonefs_create_zgroup_inodes(struct super_block *sb,
++				       enum zonefs_ztype ztype)
+ {
+-	struct super_block *sb = zd->sb;
+ 	struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
+-	struct blk_zone *zone, *next, *end;
+-	const char *zgroup_name;
+-	char *file_name;
++	struct zonefs_zone_group *zgroup = &sbi->s_zgroup[ztype];
+ 	struct dentry *dir, *dent;
+-	unsigned int n = 0;
+-	int ret;
++	char *file_name;
++	int i, ret = 0;
++
++	if (!zgroup)
++		return -ENOMEM;
+ 
+ 	/* If the group is empty, there is nothing to do */
+-	if (!zd->nr_zones[type])
++	if (!zgroup->g_nr_zones)
+ 		return 0;
+ 
+ 	file_name = kmalloc(ZONEFS_NAME_MAX, GFP_KERNEL);
+ 	if (!file_name)
+ 		return -ENOMEM;
+ 
+-	if (type == ZONEFS_ZTYPE_CNV)
+-		zgroup_name = "cnv";
+-	else
+-		zgroup_name = "seq";
+-
+-	dir = zonefs_create_inode(sb->s_root, zgroup_name, NULL, type);
++	dir = zonefs_create_inode(sb->s_root, zonefs_zgroup_name(ztype),
++				  NULL, ztype);
+ 	if (IS_ERR(dir)) {
+ 		ret = PTR_ERR(dir);
+ 		goto free;
+ 	}
+ 
+-	/*
+-	 * The first zone contains the super block: skip it.
+-	 */
+-	end = zd->zones + bdev_nr_zones(sb->s_bdev);
+-	for (zone = &zd->zones[1]; zone < end; zone = next) {
+-
+-		next = zone + 1;
+-		if (zonefs_zone_type(zone) != type)
+-			continue;
+-
+-		/*
+-		 * For conventional zones, contiguous zones can be aggregated
+-		 * together to form larger files. Note that this overwrites the
+-		 * length of the first zone of the set of contiguous zones
+-		 * aggregated together. If one offline or read-only zone is
+-		 * found, assume that all zones aggregated have the same
+-		 * condition.
+-		 */
+-		if (type == ZONEFS_ZTYPE_CNV &&
+-		    (sbi->s_features & ZONEFS_F_AGGRCNV)) {
+-			for (; next < end; next++) {
+-				if (zonefs_zone_type(next) != type)
+-					break;
+-				zone->len += next->len;
+-				zone->capacity += next->capacity;
+-				if (next->cond == BLK_ZONE_COND_READONLY &&
+-				    zone->cond != BLK_ZONE_COND_OFFLINE)
+-					zone->cond = BLK_ZONE_COND_READONLY;
+-				else if (next->cond == BLK_ZONE_COND_OFFLINE)
+-					zone->cond = BLK_ZONE_COND_OFFLINE;
+-			}
+-			if (zone->capacity != zone->len) {
+-				zonefs_err(sb, "Invalid conventional zone capacity\n");
+-				ret = -EINVAL;
+-				goto free;
+-			}
+-		}
+-
+-		/*
+-		 * Use the file number within its group as file name.
+-		 */
+-		snprintf(file_name, ZONEFS_NAME_MAX - 1, "%u", n);
+-		dent = zonefs_create_inode(dir, file_name, zone, type);
++	for (i = 0; i < zgroup->g_nr_zones; i++) {
++		/* Use the zone number within its group as the file name */
++		snprintf(file_name, ZONEFS_NAME_MAX - 1, "%u", i);
++		dent = zonefs_create_inode(dir, file_name,
++					   &zgroup->g_zones[i], ztype);
+ 		if (IS_ERR(dent)) {
+ 			ret = PTR_ERR(dent);
+-			goto free;
++			break;
+ 		}
+-
+-		n++;
+ 	}
+ 
+-	zonefs_info(sb, "Zone group \"%s\" has %u file%s\n",
+-		    zgroup_name, n, n > 1 ? "s" : "");
+-
+-	sbi->s_nr_files[type] = n;
+-	ret = 0;
+-
+ free:
+ 	kfree(file_name);
+ 
+@@ -789,21 +720,38 @@ static int zonefs_get_zone_info_cb(struct blk_zone *zone, unsigned int idx,
+ 				   void *data)
+ {
+ 	struct zonefs_zone_data *zd = data;
++	struct super_block *sb = zd->sb;
++	struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
++
++	/*
++	 * We do not care about the first zone: it contains the super block
++	 * and not exposed as a file.
++	 */
++	if (!idx)
++		return 0;
+ 
+ 	/*
+-	 * Count the number of usable zones: the first zone at index 0 contains
+-	 * the super block and is ignored.
++	 * Count the number of zones that will be exposed as files.
++	 * For sequential zones, we always have as many files as zones.
++	 * FOr conventional zones, the number of files depends on if we have
++	 * conventional zones aggregation enabled.
+ 	 */
+ 	switch (zone->type) {
+ 	case BLK_ZONE_TYPE_CONVENTIONAL:
+-		zone->wp = zone->start + zone->len;
+-		if (idx)
+-			zd->nr_zones[ZONEFS_ZTYPE_CNV]++;
++		if (sbi->s_features & ZONEFS_F_AGGRCNV) {
++			/* One file per set of contiguous conventional zones */
++			if (!(sbi->s_zgroup[ZONEFS_ZTYPE_CNV].g_nr_zones) ||
++			    zone->start != zd->cnv_zone_start)
++				sbi->s_zgroup[ZONEFS_ZTYPE_CNV].g_nr_zones++;
++			zd->cnv_zone_start = zone->start + zone->len;
++		} else {
++			/* One file per zone */
++			sbi->s_zgroup[ZONEFS_ZTYPE_CNV].g_nr_zones++;
++		}
+ 		break;
+ 	case BLK_ZONE_TYPE_SEQWRITE_REQ:
+ 	case BLK_ZONE_TYPE_SEQWRITE_PREF:
+-		if (idx)
+-			zd->nr_zones[ZONEFS_ZTYPE_SEQ]++;
++		sbi->s_zgroup[ZONEFS_ZTYPE_SEQ].g_nr_zones++;
+ 		break;
+ 	default:
+ 		zonefs_err(zd->sb, "Unsupported zone type 0x%x\n",
+@@ -843,11 +791,173 @@ static int zonefs_get_zone_info(struct zonefs_zone_data *zd)
+ 	return 0;
+ }
+ 
+-static inline void zonefs_cleanup_zone_info(struct zonefs_zone_data *zd)
++static inline void zonefs_free_zone_info(struct zonefs_zone_data *zd)
+ {
+ 	kvfree(zd->zones);
+ }
+ 
++/*
++ * Create a zone group and populate it with zone files.
++ */
++static int zonefs_init_zgroup(struct super_block *sb,
++			      struct zonefs_zone_data *zd,
++			      enum zonefs_ztype ztype)
++{
++	struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
++	struct zonefs_zone_group *zgroup = &sbi->s_zgroup[ztype];
++	struct blk_zone *zone, *next, *end;
++	struct zonefs_zone *z;
++	unsigned int n = 0;
++	int ret;
++
++	/* Allocate the zone group. If it is empty, we have nothing to do. */
++	if (!zgroup->g_nr_zones)
++		return 0;
++
++	zgroup->g_zones = kvcalloc(zgroup->g_nr_zones,
++				   sizeof(struct zonefs_zone), GFP_KERNEL);
++	if (!zgroup->g_zones)
++		return -ENOMEM;
++
++	/*
++	 * Initialize the zone groups using the device zone information.
++	 * We always skip the first zone as it contains the super block
++	 * and is not use to back a file.
++	 */
++	end = zd->zones + bdev_nr_zones(sb->s_bdev);
++	for (zone = &zd->zones[1]; zone < end; zone = next) {
++
++		next = zone + 1;
++		if (zonefs_zone_type(zone) != ztype)
++			continue;
++
++		if (WARN_ON_ONCE(n >= zgroup->g_nr_zones))
++			return -EINVAL;
++
++		/*
++		 * For conventional zones, contiguous zones can be aggregated
++		 * together to form larger files. Note that this overwrites the
++		 * length of the first zone of the set of contiguous zones
++		 * aggregated together. If one offline or read-only zone is
++		 * found, assume that all zones aggregated have the same
++		 * condition.
++		 */
++		if (ztype == ZONEFS_ZTYPE_CNV &&
++		    (sbi->s_features & ZONEFS_F_AGGRCNV)) {
++			for (; next < end; next++) {
++				if (zonefs_zone_type(next) != ztype)
++					break;
++				zone->len += next->len;
++				zone->capacity += next->capacity;
++				if (next->cond == BLK_ZONE_COND_READONLY &&
++				    zone->cond != BLK_ZONE_COND_OFFLINE)
++					zone->cond = BLK_ZONE_COND_READONLY;
++				else if (next->cond == BLK_ZONE_COND_OFFLINE)
++					zone->cond = BLK_ZONE_COND_OFFLINE;
++			}
++		}
++
++		z = &zgroup->g_zones[n];
++		if (ztype == ZONEFS_ZTYPE_CNV)
++			z->z_flags |= ZONEFS_ZONE_CNV;
++		z->z_sector = zone->start;
++		z->z_size = zone->len << SECTOR_SHIFT;
++		if (z->z_size > bdev_zone_sectors(sb->s_bdev) << SECTOR_SHIFT &&
++		    !(sbi->s_features & ZONEFS_F_AGGRCNV)) {
++			zonefs_err(sb,
++				"Invalid zone size %llu (device zone sectors %llu)\n",
++				z->z_size,
++				bdev_zone_sectors(sb->s_bdev) << SECTOR_SHIFT);
++			return -EINVAL;
++		}
++
++		z->z_capacity = min_t(loff_t, MAX_LFS_FILESIZE,
++				      zone->capacity << SECTOR_SHIFT);
++		z->z_wpoffset = zonefs_check_zone_condition(sb, z, zone);
++
++		sb->s_maxbytes = max(z->z_capacity, sb->s_maxbytes);
++		sbi->s_blocks += z->z_capacity >> sb->s_blocksize_bits;
++		sbi->s_used_blocks += z->z_wpoffset >> sb->s_blocksize_bits;
++
++		/*
++		 * For sequential zones, make sure that any open zone is closed
++		 * first to ensure that the initial number of open zones is 0,
++		 * in sync with the open zone accounting done when the mount
++		 * option ZONEFS_MNTOPT_EXPLICIT_OPEN is used.
++		 */
++		if (ztype == ZONEFS_ZTYPE_SEQ &&
++		    (zone->cond == BLK_ZONE_COND_IMP_OPEN ||
++		     zone->cond == BLK_ZONE_COND_EXP_OPEN)) {
++			ret = zonefs_zone_mgmt(sb, z, REQ_OP_ZONE_CLOSE);
++			if (ret)
++				return ret;
++		}
++
++		zonefs_account_active(sb, z);
++
++		n++;
++	}
++
++	if (WARN_ON_ONCE(n != zgroup->g_nr_zones))
++		return -EINVAL;
++
++	zonefs_info(sb, "Zone group \"%s\" has %u file%s\n",
++		    zonefs_zgroup_name(ztype),
++		    zgroup->g_nr_zones,
++		    zgroup->g_nr_zones > 1 ? "s" : "");
++
++	return 0;
++}
++
++static void zonefs_free_zgroups(struct super_block *sb)
++{
++	struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
++	enum zonefs_ztype ztype;
++
++	if (!sbi)
++		return;
++
++	for (ztype = 0; ztype < ZONEFS_ZTYPE_MAX; ztype++) {
++		kvfree(sbi->s_zgroup[ztype].g_zones);
++		sbi->s_zgroup[ztype].g_zones = NULL;
++	}
++}
++
++/*
++ * Create a zone group and populate it with zone files.
++ */
++static int zonefs_init_zgroups(struct super_block *sb)
++{
++	struct zonefs_zone_data zd;
++	enum zonefs_ztype ztype;
++	int ret;
++
++	/* First get the device zone information */
++	memset(&zd, 0, sizeof(struct zonefs_zone_data));
++	zd.sb = sb;
++	ret = zonefs_get_zone_info(&zd);
++	if (ret)
++		goto cleanup;
++
++	/* Allocate and initialize the zone groups */
++	for (ztype = 0; ztype < ZONEFS_ZTYPE_MAX; ztype++) {
++		ret = zonefs_init_zgroup(sb, &zd, ztype);
++		if (ret) {
++			zonefs_info(sb,
++				    "Zone group \"%s\" initialization failed\n",
++				    zonefs_zgroup_name(ztype));
++			break;
++		}
++	}
++
++cleanup:
++	zonefs_free_zone_info(&zd);
++	if (ret)
++		zonefs_free_zgroups(sb);
++
++	return ret;
++}
++
+ /*
+  * Read super block information from the device.
+  */
+@@ -945,7 +1055,6 @@ static const struct super_operations zonefs_sops = {
+  */
+ static int zonefs_fill_super(struct super_block *sb, void *data, int silent)
+ {
+-	struct zonefs_zone_data zd;
+ 	struct zonefs_sb_info *sbi;
+ 	struct inode *inode;
+ 	enum zonefs_ztype t;
+@@ -998,16 +1107,6 @@ static int zonefs_fill_super(struct super_block *sb, void *data, int silent)
+ 	if (ret)
+ 		return ret;
+ 
+-	memset(&zd, 0, sizeof(struct zonefs_zone_data));
+-	zd.sb = sb;
+-	ret = zonefs_get_zone_info(&zd);
+-	if (ret)
+-		goto cleanup;
+-
+-	ret = zonefs_sysfs_register(sb);
+-	if (ret)
+-		goto cleanup;
+-
+ 	zonefs_info(sb, "Mounting %u zones", bdev_nr_zones(sb->s_bdev));
+ 
+ 	if (!sbi->s_max_wro_seq_files &&
+@@ -1018,6 +1117,11 @@ static int zonefs_fill_super(struct super_block *sb, void *data, int silent)
+ 		sbi->s_mount_opts &= ~ZONEFS_MNTOPT_EXPLICIT_OPEN;
+ 	}
+ 
++	/* Initialize the zone groups */
++	ret = zonefs_init_zgroups(sb);
++	if (ret)
++		goto cleanup;
++
+ 	/* Create root directory inode */
+ 	ret = -ENOMEM;
+ 	inode = new_inode(sb);
+@@ -1037,13 +1141,19 @@ static int zonefs_fill_super(struct super_block *sb, void *data, int silent)
+ 
+ 	/* Create and populate files in zone groups directories */
+ 	for (t = 0; t < ZONEFS_ZTYPE_MAX; t++) {
+-		ret = zonefs_create_zgroup(&zd, t);
++		ret = zonefs_create_zgroup_inodes(sb, t);
+ 		if (ret)
+-			break;
++			goto cleanup;
+ 	}
+ 
++	ret = zonefs_sysfs_register(sb);
++	if (ret)
++		goto cleanup;
++
++	return 0;
++
+ cleanup:
+-	zonefs_cleanup_zone_info(&zd);
++	zonefs_free_zgroups(sb);
+ 
+ 	return ret;
+ }
+@@ -1062,6 +1172,7 @@ static void zonefs_kill_super(struct super_block *sb)
+ 		d_genocide(sb->s_root);
+ 
+ 	zonefs_sysfs_unregister(sb);
++	zonefs_free_zgroups(sb);
+ 	kill_block_super(sb);
+ 	kfree(sbi);
+ }
+diff --git a/fs/zonefs/trace.h b/fs/zonefs/trace.h
+index 42edcfd393ed2..9969db3a9c7dc 100644
+--- a/fs/zonefs/trace.h
++++ b/fs/zonefs/trace.h
+@@ -20,8 +20,9 @@
+ #define show_dev(dev) MAJOR(dev), MINOR(dev)
+ 
+ TRACE_EVENT(zonefs_zone_mgmt,
+-	    TP_PROTO(struct inode *inode, enum req_op op),
+-	    TP_ARGS(inode, op),
++	    TP_PROTO(struct super_block *sb, struct zonefs_zone *z,
++		     enum req_op op),
++	    TP_ARGS(sb, z, op),
+ 	    TP_STRUCT__entry(
+ 			     __field(dev_t, dev)
+ 			     __field(ino_t, ino)
+@@ -30,12 +31,12 @@ TRACE_EVENT(zonefs_zone_mgmt,
+ 			     __field(sector_t, nr_sectors)
+ 	    ),
+ 	    TP_fast_assign(
+-			   __entry->dev = inode->i_sb->s_dev;
+-			   __entry->ino = inode->i_ino;
++			   __entry->dev = sb->s_dev;
++			   __entry->ino =
++				z->z_sector >> ZONEFS_SB(sb)->s_zone_sectors_shift;
+ 			   __entry->op = op;
+-			   __entry->sector = ZONEFS_I(inode)->i_zsector;
+-			   __entry->nr_sectors =
+-				   ZONEFS_I(inode)->i_zone_size >> SECTOR_SHIFT;
++			   __entry->sector = z->z_sector;
++			   __entry->nr_sectors = z->z_size >> SECTOR_SHIFT;
+ 	    ),
+ 	    TP_printk("bdev=(%d,%d), ino=%lu op=%s, sector=%llu, nr_sectors=%llu",
+ 		      show_dev(__entry->dev), (unsigned long)__entry->ino,
+@@ -58,9 +59,10 @@ TRACE_EVENT(zonefs_file_dio_append,
+ 	    TP_fast_assign(
+ 			   __entry->dev = inode->i_sb->s_dev;
+ 			   __entry->ino = inode->i_ino;
+-			   __entry->sector = ZONEFS_I(inode)->i_zsector;
++			   __entry->sector = zonefs_inode_zone(inode)->z_sector;
+ 			   __entry->size = size;
+-			   __entry->wpoffset = ZONEFS_I(inode)->i_wpoffset;
++			   __entry->wpoffset =
++				zonefs_inode_zone(inode)->z_wpoffset;
+ 			   __entry->ret = ret;
+ 	    ),
+ 	    TP_printk("bdev=(%d, %d), ino=%lu, sector=%llu, size=%zu, wpoffset=%llu, ret=%zu",
+diff --git a/fs/zonefs/zonefs.h b/fs/zonefs/zonefs.h
+index 1a225f74015a0..2d626e18b1411 100644
+--- a/fs/zonefs/zonefs.h
++++ b/fs/zonefs/zonefs.h
+@@ -47,22 +47,39 @@ static inline enum zonefs_ztype zonefs_zone_type(struct blk_zone *zone)
+ #define ZONEFS_ZONE_CNV		(1U << 31)
+ 
+ /*
+- * In-memory inode data.
++ * In-memory per-file inode zone data.
+  */
+-struct zonefs_inode_info {
+-	struct inode		i_vnode;
++struct zonefs_zone {
++	/* Zone state flags */
++	unsigned int		z_flags;
+ 
+-	/* File zone start sector (512B unit) */
+-	sector_t		i_zsector;
++	/* Zone start sector (512B unit) */
++	sector_t		z_sector;
+ 
+-	/* File zone write pointer position (sequential zones only) */
+-	loff_t			i_wpoffset;
++	/* Zone size (bytes) */
++	loff_t			z_size;
+ 
+-	/* File maximum size */
+-	loff_t			i_max_size;
++	/* Zone capacity (file maximum size, bytes) */
++	loff_t			z_capacity;
+ 
+-	/* File zone size */
+-	loff_t			i_zone_size;
++	/* Write pointer offset in the zone (sequential zones only, bytes) */
++	loff_t			z_wpoffset;
++};
++
++/*
++ * In memory zone group information: all zones of a group are exposed
++ * as files, one file per zone.
++ */
++struct zonefs_zone_group {
++	unsigned int		g_nr_zones;
++	struct zonefs_zone	*g_zones;
++};
++
++/*
++ * In-memory inode data.
++ */
++struct zonefs_inode_info {
++	struct inode		i_vnode;
+ 
+ 	/*
+ 	 * To serialise fully against both syscall and mmap based IO and
+@@ -81,7 +98,6 @@ struct zonefs_inode_info {
+ 
+ 	/* guarded by i_truncate_mutex */
+ 	unsigned int		i_wr_refcnt;
+-	unsigned int		i_flags;
+ };
+ 
+ static inline struct zonefs_inode_info *ZONEFS_I(struct inode *inode)
+@@ -89,24 +105,29 @@ static inline struct zonefs_inode_info *ZONEFS_I(struct inode *inode)
+ 	return container_of(inode, struct zonefs_inode_info, i_vnode);
+ }
+ 
+-static inline bool zonefs_zone_is_cnv(struct zonefs_inode_info *zi)
++static inline bool zonefs_zone_is_cnv(struct zonefs_zone *z)
++{
++	return z->z_flags & ZONEFS_ZONE_CNV;
++}
++
++static inline bool zonefs_zone_is_seq(struct zonefs_zone *z)
+ {
+-	return zi->i_flags & ZONEFS_ZONE_CNV;
++	return !zonefs_zone_is_cnv(z);
+ }
+ 
+-static inline bool zonefs_zone_is_seq(struct zonefs_inode_info *zi)
++static inline struct zonefs_zone *zonefs_inode_zone(struct inode *inode)
+ {
+-	return !zonefs_zone_is_cnv(zi);
++	return inode->i_private;
+ }
+ 
+ static inline bool zonefs_inode_is_cnv(struct inode *inode)
+ {
+-	return zonefs_zone_is_cnv(ZONEFS_I(inode));
++	return zonefs_zone_is_cnv(zonefs_inode_zone(inode));
+ }
+ 
+ static inline bool zonefs_inode_is_seq(struct inode *inode)
+ {
+-	return zonefs_zone_is_seq(ZONEFS_I(inode));
++	return zonefs_zone_is_seq(zonefs_inode_zone(inode));
+ }
+ 
+ /*
+@@ -200,7 +221,7 @@ struct zonefs_sb_info {
+ 	uuid_t			s_uuid;
+ 	unsigned int		s_zone_sectors_shift;
+ 
+-	unsigned int		s_nr_files[ZONEFS_ZTYPE_MAX];
++	struct zonefs_zone_group s_zgroup[ZONEFS_ZTYPE_MAX];
+ 
+ 	loff_t			s_blocks;
+ 	loff_t			s_used_blocks;
+@@ -229,8 +250,8 @@ static inline struct zonefs_sb_info *ZONEFS_SB(struct super_block *sb)
+ 	pr_warn("zonefs (%s) WARNING: " format, sb->s_id, ## args)
+ 
+ /* In super.c */
+-void zonefs_account_active(struct inode *inode);
+-int zonefs_zone_mgmt(struct inode *inode, enum req_op op);
++void zonefs_inode_account_active(struct inode *inode);
++int zonefs_inode_zone_mgmt(struct inode *inode, enum req_op op);
+ void zonefs_i_size_write(struct inode *inode, loff_t isize);
+ void zonefs_update_stats(struct inode *inode, loff_t new_isize);
+ void __zonefs_io_error(struct inode *inode, bool write);
+-- 
+2.39.2
+
diff --git a/queue-6.1/zonefs-simplify-io-error-handling.patch b/queue-6.1/zonefs-simplify-io-error-handling.patch
new file mode 100644
index 00000000000..0bc06889a2a
--- /dev/null
+++ b/queue-6.1/zonefs-simplify-io-error-handling.patch
@@ -0,0 +1,244 @@
+From 236111cac1592239f3295e8e5de2e95dd808a786 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 25 Nov 2022 11:06:20 +0900
+Subject: zonefs: Simplify IO error handling
+
+From: Damien Le Moal <damien.lemoal@opensource.wdc.com>
+
+[ Upstream commit 46a9c526eef7fb68a00321e2a9591ce5276ae92b ]
+
+Simplify zonefs_check_zone_condition() by moving the code that changes
+an inode access rights to the new function zonefs_inode_update_mode().
+Furthermore, since on mount an inode wpoffset is always zero when
+zonefs_check_zone_condition() is called during an inode initialization,
+the "mount" boolean argument is not necessary for the readonly zone
+case. This argument is thus removed.
+
+zonefs_io_error_cb() is also modified to use the inode offline and
+zone state flags instead of checking the device zone condition. The
+multiple calls to zonefs_check_zone_condition() are reduced to the first
+call on entry, which allows removing the "warn" argument.
+zonefs_inode_update_mode() is also used to update an inode access rights
+as zonefs_io_error_cb() modifies the inode flags depending on the volume
+error handling mode (defined with a mount option). Since an inode mode
+change differs for read-only zones between mount time and IO error time,
+the flag ZONEFS_ZONE_INIT_MODE is used to differentiate both cases.
+
+Signed-off-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Stable-dep-of: 88b170088ad2 ("zonefs: Fix error message in zonefs_file_dio_append()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/zonefs/super.c  | 110 ++++++++++++++++++++++++---------------------
+ fs/zonefs/zonefs.h |   9 ++--
+ 2 files changed, 64 insertions(+), 55 deletions(-)
+
+diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
+index e808276b88018..6307cc95be061 100644
+--- a/fs/zonefs/super.c
++++ b/fs/zonefs/super.c
+@@ -155,48 +155,31 @@ void zonefs_update_stats(struct inode *inode, loff_t new_isize)
+  * amount of readable data in the zone.
+  */
+ static loff_t zonefs_check_zone_condition(struct inode *inode,
+-					  struct blk_zone *zone, bool warn,
+-					  bool mount)
++					  struct blk_zone *zone)
+ {
+ 	struct zonefs_inode_info *zi = ZONEFS_I(inode);
+ 
+ 	switch (zone->cond) {
+ 	case BLK_ZONE_COND_OFFLINE:
+-		/*
+-		 * Dead zone: make the inode immutable, disable all accesses
+-		 * and set the file size to 0 (zone wp set to zone start).
+-		 */
+-		if (warn)
+-			zonefs_warn(inode->i_sb, "inode %lu: offline zone\n",
+-				    inode->i_ino);
+-		inode->i_flags |= S_IMMUTABLE;
+-		inode->i_mode &= ~0777;
+-		zone->wp = zone->start;
++		zonefs_warn(inode->i_sb, "inode %lu: offline zone\n",
++			    inode->i_ino);
+ 		zi->i_flags |= ZONEFS_ZONE_OFFLINE;
+ 		return 0;
+ 	case BLK_ZONE_COND_READONLY:
+ 		/*
+-		 * The write pointer of read-only zones is invalid. If such a
+-		 * zone is found during mount, the file size cannot be retrieved
+-		 * so we treat the zone as offline (mount == true case).
+-		 * Otherwise, keep the file size as it was when last updated
+-		 * so that the user can recover data. In both cases, writes are
+-		 * always disabled for the zone.
++		 * The write pointer of read-only zones is invalid, so we cannot
++		 * determine the zone wpoffset (inode size). We thus keep the
++		 * zone wpoffset as is, which leads to an empty file
++		 * (wpoffset == 0) on mount. For a runtime error, this keeps
++		 * the inode size as it was when last updated so that the user
++		 * can recover data.
+ 		 */
+-		if (warn)
+-			zonefs_warn(inode->i_sb, "inode %lu: read-only zone\n",
+-				    inode->i_ino);
+-		inode->i_flags |= S_IMMUTABLE;
+-		if (mount) {
+-			zone->cond = BLK_ZONE_COND_OFFLINE;
+-			inode->i_mode &= ~0777;
+-			zone->wp = zone->start;
+-			zi->i_flags |= ZONEFS_ZONE_OFFLINE;
+-			return 0;
+-		}
++		zonefs_warn(inode->i_sb, "inode %lu: read-only zone\n",
++			    inode->i_ino);
+ 		zi->i_flags |= ZONEFS_ZONE_READONLY;
+-		inode->i_mode &= ~0222;
+-		return i_size_read(inode);
++		if (zi->i_ztype == ZONEFS_ZTYPE_CNV)
++			return zi->i_max_size;
++		return zi->i_wpoffset;
+ 	case BLK_ZONE_COND_FULL:
+ 		/* The write pointer of full zones is invalid. */
+ 		return zi->i_max_size;
+@@ -207,6 +190,30 @@ static loff_t zonefs_check_zone_condition(struct inode *inode,
+ 	}
+ }
+ 
++/*
++ * Check a zone condition and adjust its inode access permissions for
++ * offline and readonly zones.
++ */
++static void zonefs_inode_update_mode(struct inode *inode)
++{
++	struct zonefs_inode_info *zi = ZONEFS_I(inode);
++
++	if (zi->i_flags & ZONEFS_ZONE_OFFLINE) {
++		/* Offline zones cannot be read nor written */
++		inode->i_flags |= S_IMMUTABLE;
++		inode->i_mode &= ~0777;
++	} else if (zi->i_flags & ZONEFS_ZONE_READONLY) {
++		/* Readonly zones cannot be written */
++		inode->i_flags |= S_IMMUTABLE;
++		if (zi->i_flags & ZONEFS_ZONE_INIT_MODE)
++			inode->i_mode &= ~0777;
++		else
++			inode->i_mode &= ~0222;
++	}
++
++	zi->i_flags &= ~ZONEFS_ZONE_INIT_MODE;
++}
++
+ struct zonefs_ioerr_data {
+ 	struct inode	*inode;
+ 	bool		write;
+@@ -228,10 +235,9 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
+ 	 * as there is no inconsistency between the inode size and the amount of
+ 	 * data writen in the zone (data_size).
+ 	 */
+-	data_size = zonefs_check_zone_condition(inode, zone, true, false);
++	data_size = zonefs_check_zone_condition(inode, zone);
+ 	isize = i_size_read(inode);
+-	if (zone->cond != BLK_ZONE_COND_OFFLINE &&
+-	    zone->cond != BLK_ZONE_COND_READONLY &&
++	if (!(zi->i_flags & (ZONEFS_ZONE_READONLY | ZONEFS_ZONE_OFFLINE)) &&
+ 	    !err->write && isize == data_size)
+ 		return 0;
+ 
+@@ -264,24 +270,22 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
+ 	 * zone condition to read-only and offline respectively, as if the
+ 	 * condition was signaled by the hardware.
+ 	 */
+-	if (zone->cond == BLK_ZONE_COND_OFFLINE ||
+-	    sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZOL) {
++	if ((zi->i_flags & ZONEFS_ZONE_OFFLINE) ||
++	    (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZOL)) {
+ 		zonefs_warn(sb, "inode %lu: read/write access disabled\n",
+ 			    inode->i_ino);
+-		if (zone->cond != BLK_ZONE_COND_OFFLINE) {
+-			zone->cond = BLK_ZONE_COND_OFFLINE;
+-			data_size = zonefs_check_zone_condition(inode, zone,
+-								false, false);
+-		}
+-	} else if (zone->cond == BLK_ZONE_COND_READONLY ||
+-		   sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZRO) {
++		if (!(zi->i_flags & ZONEFS_ZONE_OFFLINE))
++			zi->i_flags |= ZONEFS_ZONE_OFFLINE;
++		zonefs_inode_update_mode(inode);
++		data_size = 0;
++	} else if ((zi->i_flags & ZONEFS_ZONE_READONLY) ||
++		   (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZRO)) {
+ 		zonefs_warn(sb, "inode %lu: write access disabled\n",
+ 			    inode->i_ino);
+-		if (zone->cond != BLK_ZONE_COND_READONLY) {
+-			zone->cond = BLK_ZONE_COND_READONLY;
+-			data_size = zonefs_check_zone_condition(inode, zone,
+-								false, false);
+-		}
++		if (!(zi->i_flags & ZONEFS_ZONE_READONLY))
++			zi->i_flags |= ZONEFS_ZONE_READONLY;
++		zonefs_inode_update_mode(inode);
++		data_size = isize;
+ 	} else if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_RO &&
+ 		   data_size > isize) {
+ 		/* Do not expose garbage data */
+@@ -295,8 +299,7 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
+ 	 * close of the zone when the inode file is closed.
+ 	 */
+ 	if ((sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN) &&
+-	    (zone->cond == BLK_ZONE_COND_OFFLINE ||
+-	     zone->cond == BLK_ZONE_COND_READONLY))
++	    (zi->i_flags & (ZONEFS_ZONE_READONLY | ZONEFS_ZONE_OFFLINE)))
+ 		zi->i_flags &= ~ZONEFS_ZONE_OPEN;
+ 
+ 	/*
+@@ -378,6 +381,7 @@ static struct inode *zonefs_alloc_inode(struct super_block *sb)
+ 
+ 	inode_init_once(&zi->i_vnode);
+ 	mutex_init(&zi->i_truncate_mutex);
++	zi->i_wpoffset = 0;
+ 	zi->i_wr_refcnt = 0;
+ 	zi->i_flags = 0;
+ 
+@@ -594,7 +598,7 @@ static int zonefs_init_file_inode(struct inode *inode, struct blk_zone *zone,
+ 
+ 	zi->i_max_size = min_t(loff_t, MAX_LFS_FILESIZE,
+ 			       zone->capacity << SECTOR_SHIFT);
+-	zi->i_wpoffset = zonefs_check_zone_condition(inode, zone, true, true);
++	zi->i_wpoffset = zonefs_check_zone_condition(inode, zone);
+ 
+ 	inode->i_uid = sbi->s_uid;
+ 	inode->i_gid = sbi->s_gid;
+@@ -605,6 +609,10 @@ static int zonefs_init_file_inode(struct inode *inode, struct blk_zone *zone,
+ 	inode->i_fop = &zonefs_file_operations;
+ 	inode->i_mapping->a_ops = &zonefs_file_aops;
+ 
++	/* Update the inode access rights depending on the zone condition */
++	zi->i_flags |= ZONEFS_ZONE_INIT_MODE;
++	zonefs_inode_update_mode(inode);
++
+ 	sb->s_maxbytes = max(zi->i_max_size, sb->s_maxbytes);
+ 	sbi->s_blocks += zi->i_max_size >> sb->s_blocksize_bits;
+ 	sbi->s_used_blocks += zi->i_wpoffset >> sb->s_blocksize_bits;
+diff --git a/fs/zonefs/zonefs.h b/fs/zonefs/zonefs.h
+index 839ebe9afb6c1..439096445ee53 100644
+--- a/fs/zonefs/zonefs.h
++++ b/fs/zonefs/zonefs.h
+@@ -39,10 +39,11 @@ static inline enum zonefs_ztype zonefs_zone_type(struct blk_zone *zone)
+ 	return ZONEFS_ZTYPE_SEQ;
+ }
+ 
+-#define ZONEFS_ZONE_OPEN	(1U << 0)
+-#define ZONEFS_ZONE_ACTIVE	(1U << 1)
+-#define ZONEFS_ZONE_OFFLINE	(1U << 2)
+-#define ZONEFS_ZONE_READONLY	(1U << 3)
++#define ZONEFS_ZONE_INIT_MODE	(1U << 0)
++#define ZONEFS_ZONE_OPEN	(1U << 1)
++#define ZONEFS_ZONE_ACTIVE	(1U << 2)
++#define ZONEFS_ZONE_OFFLINE	(1U << 3)
++#define ZONEFS_ZONE_READONLY	(1U << 4)
+ 
+ /*
+  * In-memory inode data.
+-- 
+2.39.2
+