Fixes for 6.6

author Sasha Levin <sashal@kernel.org>

Thu, 9 Jan 2025 13:52:24 +0000 (08:52 -0500)

committer Sasha Levin <sashal@kernel.org>

Thu, 9 Jan 2025 13:52:24 +0000 (08:52 -0500)
author Sasha Levin <sashal@kernel.org>
Thu, 9 Jan 2025 13:52:24 +0000 (08:52 -0500)
committer Sasha Levin <sashal@kernel.org>
Thu, 9 Jan 2025 13:52:24 +0000 (08:52 -0500)
diff --git a/queue-6.6/dm-array-fix-cursor-index-when-skipping-across-block.patch b/queue-6.6/dm-array-fix-cursor-index-when-skipping-across-block.patch

new file mode 100644 (file)

index 0000000..841c51f
--- /dev/null
+++ b/queue-6.6/dm-array-fix-cursor-index-when-skipping-across-block.patch
@@ -0,0 +1,74 @@
+From f3e7f5424c2e7bcf2514a3f2d832920df1cc49cd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 5 Dec 2024 19:41:53 +0800
+Subject: dm array: fix cursor index when skipping across block boundaries
+
+From: Ming-Hung Tsai <mtsai@redhat.com>
+
+[ Upstream commit 0bb1968da2737ba68fd63857d1af2b301a18d3bf ]
+
+dm_array_cursor_skip() seeks to the target position by loading array
+blocks iteratively until the specified number of entries to skip is
+reached. When seeking across block boundaries, it uses
+dm_array_cursor_next() to step into the next block.
+dm_array_cursor_skip() must first move the cursor index to the end
+of the current block; otherwise, the cursor position could incorrectly
+remain in the same block, causing the actual number of skipped entries
+to be much smaller than expected.
+
+This bug affects cache resizing in v2 metadata and could lead to data
+loss if the fast device is shrunk during the first-time resume. For
+example:
+
+1. create a cache metadata consists of 32768 blocks, with a dirty block
+   assigned to the second bitmap block. cache_restore v1.0 is required.
+
+cat <<EOF >> cmeta.xml
+<superblock uuid="" block_size="64" nr_cache_blocks="32768" \
+policy="smq" hint_width="4">
+  <mappings>
+    <mapping cache_block="32767" origin_block="0" dirty="true"/>
+  </mappings>
+</superblock>
+EOF
+dmsetup create cmeta --table "0 8192 linear /dev/sdc 0"
+cache_restore -i cmeta.xml -o /dev/mapper/cmeta --metadata-version=2
+
+2. bring up the cache while attempt to discard all the blocks belonging
+   to the second bitmap block (block# 32576 to 32767). The last command
+   is expected to fail, but it actually succeeds.
+
+dmsetup create cdata --table "0 2084864 linear /dev/sdc 8192"
+dmsetup create corig --table "0 65536 linear /dev/sdc 2105344"
+dmsetup create cache --table "0 65536 cache /dev/mapper/cmeta \
+/dev/mapper/cdata /dev/mapper/corig 64 2 metadata2 writeback smq \
+2 migration_threshold 0"
+
+In addition to the reproducer described above, this fix can be
+verified using the "array_cursor/skip" tests in dm-unit:
+  dm-unit run /pdata/array_cursor/skip/ --kernel-dir <KERNEL_DIR>
+
+Signed-off-by: Ming-Hung Tsai <mtsai@redhat.com>
+Fixes: 9b696229aa7d ("dm persistent data: add cursor skip functions to the cursor APIs")
+Reviewed-by: Joe Thornber <thornber@redhat.com>
+Signed-off-by: Mike Snitzer <snitzer@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/md/persistent-data/dm-array.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/md/persistent-data/dm-array.c b/drivers/md/persistent-data/dm-array.c
+index 788a31cae187..b1fdfe53e937 100644
+--- a/drivers/md/persistent-data/dm-array.c
++++ b/drivers/md/persistent-data/dm-array.c
+@@ -1003,6 +1003,7 @@ int dm_array_cursor_skip(struct dm_array_cursor *c, uint32_t count)
+               }
+ 
+               count -= remaining;
++              c->index += (remaining - 1);
+               r = dm_array_cursor_next(c);
+ 
+       } while (!r);
+-- 
+2.39.5
+
diff --git a/queue-6.6/dm-array-fix-releasing-a-faulty-array-block-twice-in.patch b/queue-6.6/dm-array-fix-releasing-a-faulty-array-block-twice-in.patch

new file mode 100644 (file)

index 0000000..d9e7827
--- /dev/null
+++ b/queue-6.6/dm-array-fix-releasing-a-faulty-array-block-twice-in.patch
@@ -0,0 +1,110 @@
+From 23bba3a96728fa0709670a8f556a83c6cf3d35ea Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 5 Dec 2024 19:41:51 +0800
+Subject: dm array: fix releasing a faulty array block twice in
+ dm_array_cursor_end
+
+From: Ming-Hung Tsai <mtsai@redhat.com>
+
+[ Upstream commit f2893c0804d86230ffb8f1c8703fdbb18648abc8 ]
+
+When dm_bm_read_lock() fails due to locking or checksum errors, it
+releases the faulty block implicitly while leaving an invalid output
+pointer behind. The caller of dm_bm_read_lock() should not operate on
+this invalid dm_block pointer, or it will lead to undefined result.
+For example, the dm_array_cursor incorrectly caches the invalid pointer
+on reading a faulty array block, causing a double release in
+dm_array_cursor_end(), then hitting the BUG_ON in dm-bufio cache_put().
+
+Reproduce steps:
+
+1. initialize a cache device
+
+dmsetup create cmeta --table "0 8192 linear /dev/sdc 0"
+dmsetup create cdata --table "0 65536 linear /dev/sdc 8192"
+dmsetup create corig --table "0 524288 linear /dev/sdc $262144"
+dd if=/dev/zero of=/dev/mapper/cmeta bs=4k count=1
+dmsetup create cache --table "0 524288 cache /dev/mapper/cmeta \
+/dev/mapper/cdata /dev/mapper/corig 128 2 metadata2 writethrough smq 0"
+
+2. wipe the second array block offline
+
+dmsteup remove cache cmeta cdata corig
+mapping_root=$(dd if=/dev/sdc bs=1c count=8 skip=192 \
+2>/dev/null | hexdump -e '1/8 "%u\n"')
+ablock=$(dd if=/dev/sdc bs=1c count=8 skip=$((4096*mapping_root+2056)) \
+2>/dev/null | hexdump -e '1/8 "%u\n"')
+dd if=/dev/zero of=/dev/sdc bs=4k count=1 seek=$ablock
+
+3. try reopen the cache device
+
+dmsetup create cmeta --table "0 8192 linear /dev/sdc 0"
+dmsetup create cdata --table "0 65536 linear /dev/sdc 8192"
+dmsetup create corig --table "0 524288 linear /dev/sdc $262144"
+dmsetup create cache --table "0 524288 cache /dev/mapper/cmeta \
+/dev/mapper/cdata /dev/mapper/corig 128 2 metadata2 writethrough smq 0"
+
+Kernel logs:
+
+(snip)
+device-mapper: array: array_block_check failed: blocknr 0 != wanted 10
+device-mapper: block manager: array validator check failed for block 10
+device-mapper: array: get_ablock failed
+device-mapper: cache metadata: dm_array_cursor_next for mapping failed
+------------[ cut here ]------------
+kernel BUG at drivers/md/dm-bufio.c:638!
+
+Fix by setting the cached block pointer to NULL on errors.
+
+In addition to the reproducer described above, this fix can be
+verified using the "array_cursor/damaged" test in dm-unit:
+  dm-unit run /pdata/array_cursor/damaged --kernel-dir <KERNEL_DIR>
+
+Signed-off-by: Ming-Hung Tsai <mtsai@redhat.com>
+Fixes: fdd1315aa5f0 ("dm array: introduce cursor api")
+Reviewed-by: Joe Thornber <thornber@redhat.com>
+Signed-off-by: Mike Snitzer <snitzer@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/md/persistent-data/dm-array.c | 12 ++++++++----
+ 1 file changed, 8 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/md/persistent-data/dm-array.c b/drivers/md/persistent-data/dm-array.c
+index 798c9c53a343..de303ba33857 100644
+--- a/drivers/md/persistent-data/dm-array.c
++++ b/drivers/md/persistent-data/dm-array.c
+@@ -917,23 +917,27 @@ static int load_ablock(struct dm_array_cursor *c)
+       if (c->block)
+               unlock_ablock(c->info, c->block);
+ 
+-      c->block = NULL;
+-      c->ab = NULL;
+       c->index = 0;
+ 
+       r = dm_btree_cursor_get_value(&c->cursor, &key, &value_le);
+       if (r) {
+               DMERR("dm_btree_cursor_get_value failed");
+-              dm_btree_cursor_end(&c->cursor);
++              goto out;
+ 
+       } else {
+               r = get_ablock(c->info, le64_to_cpu(value_le), &c->block, &c->ab);
+               if (r) {
+                       DMERR("get_ablock failed");
+-                      dm_btree_cursor_end(&c->cursor);
++                      goto out;
+               }
+       }
+ 
++      return 0;
++
++out:
++      dm_btree_cursor_end(&c->cursor);
++      c->block = NULL;
++      c->ab = NULL;
+       return r;
+ }
+ 
+-- 
+2.39.5
+
diff --git a/queue-6.6/dm-array-fix-unreleased-btree-blocks-on-closing-a-fa.patch b/queue-6.6/dm-array-fix-unreleased-btree-blocks-on-closing-a-fa.patch

new file mode 100644 (file)

index 0000000..2fbc6b2
--- /dev/null
+++ b/queue-6.6/dm-array-fix-unreleased-btree-blocks-on-closing-a-fa.patch
@@ -0,0 +1,49 @@
+From 70723a33c2b21a306a370ba6f122d2e085560ec3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 5 Dec 2024 19:41:52 +0800
+Subject: dm array: fix unreleased btree blocks on closing a faulty array
+ cursor
+
+From: Ming-Hung Tsai <mtsai@redhat.com>
+
+[ Upstream commit 626f128ee9c4133b1cfce4be2b34a1508949370e ]
+
+The cached block pointer in dm_array_cursor might be NULL if it reaches
+an unreadable array block, or the array is empty. Therefore,
+dm_array_cursor_end() should call dm_btree_cursor_end() unconditionally,
+to prevent leaving unreleased btree blocks.
+
+This fix can be verified using the "array_cursor/iterate/empty" test
+in dm-unit:
+  dm-unit run /pdata/array_cursor/iterate/empty --kernel-dir <KERNEL_DIR>
+
+Signed-off-by: Ming-Hung Tsai <mtsai@redhat.com>
+Fixes: fdd1315aa5f0 ("dm array: introduce cursor api")
+Reviewed-by: Joe Thornber <thornber@redhat.com>
+Signed-off-by: Mike Snitzer <snitzer@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/md/persistent-data/dm-array.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/md/persistent-data/dm-array.c b/drivers/md/persistent-data/dm-array.c
+index de303ba33857..788a31cae187 100644
+--- a/drivers/md/persistent-data/dm-array.c
++++ b/drivers/md/persistent-data/dm-array.c
+@@ -960,10 +960,10 @@ EXPORT_SYMBOL_GPL(dm_array_cursor_begin);
+ 
+ void dm_array_cursor_end(struct dm_array_cursor *c)
+ {
+-      if (c->block) {
++      if (c->block)
+               unlock_ablock(c->info, c->block);
+-              dm_btree_cursor_end(&c->cursor);
+-      }
++
++      dm_btree_cursor_end(&c->cursor);
+ }
+ EXPORT_SYMBOL_GPL(dm_array_cursor_end);
+ 
+-- 
+2.39.5
+
diff --git a/queue-6.6/exfat-fix-the-infinite-loop-in-__exfat_free_cluster.patch b/queue-6.6/exfat-fix-the-infinite-loop-in-__exfat_free_cluster.patch

new file mode 100644 (file)

index 0000000..4985ec3
--- /dev/null
+++ b/queue-6.6/exfat-fix-the-infinite-loop-in-__exfat_free_cluster.patch
@@ -0,0 +1,53 @@
+From f39f87c00803740c0a351c034cebff14d6a2e2ec Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 16 Dec 2024 13:39:42 +0800
+Subject: exfat: fix the infinite loop in __exfat_free_cluster()
+
+From: Yuezhang Mo <Yuezhang.Mo@sony.com>
+
+[ Upstream commit a5324b3a488d883aa2d42f72260054e87d0940a0 ]
+
+In __exfat_free_cluster(), the cluster chain is traversed until the
+EOF cluster. If the cluster chain includes a loop due to file system
+corruption, the EOF cluster cannot be traversed, resulting in an
+infinite loop.
+
+This commit uses the total number of clusters to prevent this infinite
+loop.
+
+Reported-by: syzbot+1de5a37cb85a2d536330@syzkaller.appspotmail.com
+Closes: https://syzkaller.appspot.com/bug?extid=1de5a37cb85a2d536330
+Tested-by: syzbot+1de5a37cb85a2d536330@syzkaller.appspotmail.com
+Fixes: 31023864e67a ("exfat: add fat entry operations")
+Signed-off-by: Yuezhang Mo <Yuezhang.Mo@sony.com>
+Reviewed-by: Sungjong Seo <sj1557.seo@samsung.com>
+Signed-off-by: Namjae Jeon <linkinjeon@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/exfat/fatent.c | 10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+diff --git a/fs/exfat/fatent.c b/fs/exfat/fatent.c
+index 56b870d9cc0d..428d862a1d2b 100644
+--- a/fs/exfat/fatent.c
++++ b/fs/exfat/fatent.c
+@@ -216,6 +216,16 @@ static int __exfat_free_cluster(struct inode *inode, struct exfat_chain *p_chain
+ 
+                       if (err)
+                               goto dec_used_clus;
++
++                      if (num_clusters >= sbi->num_clusters - EXFAT_FIRST_CLUSTER) {
++                              /*
++                               * The cluster chain includes a loop, scan the
++                               * bitmap to get the number of used clusters.
++                               */
++                              exfat_count_used_clusters(sb, &sbi->used_clusters);
++
++                              return 0;
++                      }
+               } while (clu != EXFAT_EOF_CLUSTER);
+       }
+ 
+-- 
+2.39.5
+
diff --git a/queue-6.6/exfat-fix-the-infinite-loop-in-exfat_readdir.patch b/queue-6.6/exfat-fix-the-infinite-loop-in-exfat_readdir.patch

new file mode 100644 (file)

index 0000000..7b74be1
--- /dev/null
+++ b/queue-6.6/exfat-fix-the-infinite-loop-in-exfat_readdir.patch
@@ -0,0 +1,57 @@
+From ed7dab1b962fcb63c6f274b070cdd01df4904ba6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 13 Dec 2024 13:08:37 +0800
+Subject: exfat: fix the infinite loop in exfat_readdir()
+
+From: Yuezhang Mo <Yuezhang.Mo@sony.com>
+
+[ Upstream commit fee873761bd978d077d8c55334b4966ac4cb7b59 ]
+
+If the file system is corrupted so that a cluster is linked to
+itself in the cluster chain, and there is an unused directory
+entry in the cluster, 'dentry' will not be incremented, causing
+condition 'dentry < max_dentries' unable to prevent an infinite
+loop.
+
+This infinite loop causes s_lock not to be released, and other
+tasks will hang, such as exfat_sync_fs().
+
+This commit stops traversing the cluster chain when there is unused
+directory entry in the cluster to avoid this infinite loop.
+
+Reported-by: syzbot+205c2644abdff9d3f9fc@syzkaller.appspotmail.com
+Closes: https://syzkaller.appspot.com/bug?extid=205c2644abdff9d3f9fc
+Tested-by: syzbot+205c2644abdff9d3f9fc@syzkaller.appspotmail.com
+Fixes: ca06197382bd ("exfat: add directory operations")
+Signed-off-by: Yuezhang Mo <Yuezhang.Mo@sony.com>
+Reviewed-by: Sungjong Seo <sj1557.seo@samsung.com>
+Signed-off-by: Namjae Jeon <linkinjeon@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/exfat/dir.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/fs/exfat/dir.c b/fs/exfat/dir.c
+index 7a715016b96f..f4f81e349cef 100644
+--- a/fs/exfat/dir.c
++++ b/fs/exfat/dir.c
+@@ -125,7 +125,7 @@ static int exfat_readdir(struct inode *inode, loff_t *cpos, struct exfat_dir_ent
+                       type = exfat_get_entry_type(ep);
+                       if (type == TYPE_UNUSED) {
+                               brelse(bh);
+-                              break;
++                              goto out;
+                       }
+ 
+                       if (type != TYPE_FILE && type != TYPE_DIR) {
+@@ -189,6 +189,7 @@ static int exfat_readdir(struct inode *inode, loff_t *cpos, struct exfat_dir_ent
+               }
+       }
+ 
++out:
+       dir_entry->namebuf.lfn[0] = '\0';
+       *cpos = EXFAT_DEN_TO_B(dentry);
+       return 0;
+-- 
+2.39.5
+
diff --git a/queue-6.6/jbd2-flush-filesystem-device-before-updating-tail-se.patch b/queue-6.6/jbd2-flush-filesystem-device-before-updating-tail-se.patch

new file mode 100644 (file)

index 0000000..4432e55
--- /dev/null
+++ b/queue-6.6/jbd2-flush-filesystem-device-before-updating-tail-se.patch
@@ -0,0 +1,45 @@
+From a77eb8f13ae176989506eb4ffca758e2ecb72e5d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 3 Dec 2024 09:44:07 +0800
+Subject: jbd2: flush filesystem device before updating tail sequence
+
+From: Zhang Yi <yi.zhang@huawei.com>
+
+[ Upstream commit a0851ea9cd555c333795b85ddd908898b937c4e1 ]
+
+When committing transaction in jbd2_journal_commit_transaction(), the
+disk caches for the filesystem device should be flushed before updating
+the journal tail sequence. However, this step is missed if the journal
+is not located on the filesystem device. As a result, the filesystem may
+become inconsistent following a power failure or system crash. Fix it by
+ensuring that the filesystem device is flushed appropriately.
+
+Fixes: 3339578f0578 ("jbd2: cleanup journal tail after transaction commit")
+Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
+Link: https://lore.kernel.org/r/20241203014407.805916-3-yi.zhang@huaweicloud.com
+Reviewed-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/jbd2/commit.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
+index 0cd7439470fc..84663ff7dc50 100644
+--- a/fs/jbd2/commit.c
++++ b/fs/jbd2/commit.c
+@@ -777,9 +777,9 @@ void jbd2_journal_commit_transaction(journal_t *journal)
+       /*
+        * If the journal is not located on the file system device,
+        * then we must flush the file system device before we issue
+-       * the commit record
++       * the commit record and update the journal tail sequence.
+        */
+-      if (commit_transaction->t_need_data_flush &&
++      if ((commit_transaction->t_need_data_flush || update_tail) &&
+           (journal->j_fs_dev != journal->j_dev) &&
+           (journal->j_flags & JBD2_BARRIER))
+               blkdev_issue_flush(journal->j_fs_dev);
+-- 
+2.39.5
+
diff --git a/queue-6.6/jbd2-increase-io-priority-for-writing-revoke-records.patch b/queue-6.6/jbd2-increase-io-priority-for-writing-revoke-records.patch

new file mode 100644 (file)

index 0000000..6ce6cfc
--- /dev/null
+++ b/queue-6.6/jbd2-increase-io-priority-for-writing-revoke-records.patch
@@ -0,0 +1,41 @@
+From b53f7db824b9494a0cd3645cae0b05f7d531078c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 3 Dec 2024 09:44:06 +0800
+Subject: jbd2: increase IO priority for writing revoke records
+
+From: Zhang Yi <yi.zhang@huawei.com>
+
+[ Upstream commit ac1e21bd8c883aeac2f1835fc93b39c1e6838b35 ]
+
+Commit '6a3afb6ac6df ("jbd2: increase the journal IO's priority")'
+increases the priority of journal I/O by marking I/O with the
+JBD2_JOURNAL_REQ_FLAGS. However, that commit missed the revoke buffers,
+so also addresses that kind of I/Os.
+
+Fixes: 6a3afb6ac6df ("jbd2: increase the journal IO's priority")
+Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
+Link: https://lore.kernel.org/r/20241203014407.805916-2-yi.zhang@huaweicloud.com
+Reviewed-by: Kemeng Shi <shikemeng@huaweicloud.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/jbd2/revoke.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
+index 4556e4689024..ce63d5fde9c3 100644
+--- a/fs/jbd2/revoke.c
++++ b/fs/jbd2/revoke.c
+@@ -654,7 +654,7 @@ static void flush_descriptor(journal_t *journal,
+       set_buffer_jwrite(descriptor);
+       BUFFER_TRACE(descriptor, "write");
+       set_buffer_dirty(descriptor);
+-      write_dirty_buffer(descriptor, REQ_SYNC);
++      write_dirty_buffer(descriptor, JBD2_JOURNAL_REQ_FLAGS);
+ }
+ #endif
+ 
+-- 
+2.39.5
+
diff --git a/queue-6.6/ovl-do-not-encode-lower-fh-with-upper-sb_writers-hel.patch b/queue-6.6/ovl-do-not-encode-lower-fh-with-upper-sb_writers-hel.patch

new file mode 100644 (file)

index 0000000..5756ef8
--- /dev/null
+++ b/queue-6.6/ovl-do-not-encode-lower-fh-with-upper-sb_writers-hel.patch
@@ -0,0 +1,427 @@
+From 50272acfad90e9fe923034f640686ce2daae2ab1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 16 Aug 2023 16:47:59 +0300
+Subject: ovl: do not encode lower fh with upper sb_writers held
+
+From: Amir Goldstein <amir73il@gmail.com>
+
+[ Upstream commit 5b02bfc1e7e3811c5bf7f0fa626a0694d0dbbd77 ]
+
+When lower fs is a nested overlayfs, calling encode_fh() on a lower
+directory dentry may trigger copy up and take sb_writers on the upper fs
+of the lower nested overlayfs.
+
+The lower nested overlayfs may have the same upper fs as this overlayfs,
+so nested sb_writers lock is illegal.
+
+Move all the callers that encode lower fh to before ovl_want_write().
+
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Stable-dep-of: c45beebfde34 ("ovl: support encoding fid from inode with no alias")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/overlayfs/copy_up.c   | 53 +++++++++++++++++++++++++---------------
+ fs/overlayfs/namei.c     | 37 +++++++++++++++++++++-------
+ fs/overlayfs/overlayfs.h | 26 ++++++++++++++------
+ fs/overlayfs/super.c     | 20 ++++++++++-----
+ fs/overlayfs/util.c      | 10 ++++++++
+ 5 files changed, 104 insertions(+), 42 deletions(-)
+
+diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
+index ada3fcc9c6d5..5c9af24bae4a 100644
+--- a/fs/overlayfs/copy_up.c
++++ b/fs/overlayfs/copy_up.c
+@@ -426,29 +426,29 @@ struct ovl_fh *ovl_encode_real_fh(struct ovl_fs *ofs, struct dentry *real,
+       return ERR_PTR(err);
+ }
+ 
+-int ovl_set_origin(struct ovl_fs *ofs, struct dentry *lower,
+-                 struct dentry *upper)
++struct ovl_fh *ovl_get_origin_fh(struct ovl_fs *ofs, struct dentry *origin)
+ {
+-      const struct ovl_fh *fh = NULL;
+-      int err;
+-
+       /*
+        * When lower layer doesn't support export operations store a 'null' fh,
+        * so we can use the overlay.origin xattr to distignuish between a copy
+        * up and a pure upper inode.
+        */
+-      if (ovl_can_decode_fh(lower->d_sb)) {
+-              fh = ovl_encode_real_fh(ofs, lower, false);
+-              if (IS_ERR(fh))
+-                      return PTR_ERR(fh);
+-      }
++      if (!ovl_can_decode_fh(origin->d_sb))
++              return NULL;
++
++      return ovl_encode_real_fh(ofs, origin, false);
++}
++
++int ovl_set_origin_fh(struct ovl_fs *ofs, const struct ovl_fh *fh,
++                    struct dentry *upper)
++{
++      int err;
+ 
+       /*
+        * Do not fail when upper doesn't support xattrs.
+        */
+       err = ovl_check_setxattr(ofs, upper, OVL_XATTR_ORIGIN, fh->buf,
+                                fh ? fh->fb.len : 0, 0);
+-      kfree(fh);
+ 
+       /* Ignore -EPERM from setting "user.*" on symlink/special */
+       return err == -EPERM ? 0 : err;
+@@ -476,7 +476,7 @@ static int ovl_set_upper_fh(struct ovl_fs *ofs, struct dentry *upper,
+  *
+  * Caller must hold i_mutex on indexdir.
+  */
+-static int ovl_create_index(struct dentry *dentry, struct dentry *origin,
++static int ovl_create_index(struct dentry *dentry, const struct ovl_fh *fh,
+                           struct dentry *upper)
+ {
+       struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
+@@ -502,7 +502,7 @@ static int ovl_create_index(struct dentry *dentry, struct dentry *origin,
+       if (WARN_ON(ovl_test_flag(OVL_INDEX, d_inode(dentry))))
+               return -EIO;
+ 
+-      err = ovl_get_index_name(ofs, origin, &name);
++      err = ovl_get_index_name_fh(fh, &name);
+       if (err)
+               return err;
+ 
+@@ -541,6 +541,7 @@ struct ovl_copy_up_ctx {
+       struct dentry *destdir;
+       struct qstr destname;
+       struct dentry *workdir;
++      const struct ovl_fh *origin_fh;
+       bool origin;
+       bool indexed;
+       bool metacopy;
+@@ -637,7 +638,7 @@ static int ovl_copy_up_metadata(struct ovl_copy_up_ctx *c, struct dentry *temp)
+        * hard link.
+        */
+       if (c->origin) {
+-              err = ovl_set_origin(ofs, c->lowerpath.dentry, temp);
++              err = ovl_set_origin_fh(ofs, c->origin_fh, temp);
+               if (err)
+                       return err;
+       }
+@@ -749,7 +750,7 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c)
+               goto cleanup;
+ 
+       if (S_ISDIR(c->stat.mode) && c->indexed) {
+-              err = ovl_create_index(c->dentry, c->lowerpath.dentry, temp);
++              err = ovl_create_index(c->dentry, c->origin_fh, temp);
+               if (err)
+                       goto cleanup;
+       }
+@@ -861,6 +862,8 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
+ {
+       int err;
+       struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb);
++      struct dentry *origin = c->lowerpath.dentry;
++      struct ovl_fh *fh = NULL;
+       bool to_index = false;
+ 
+       /*
+@@ -877,17 +880,25 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
+                       to_index = true;
+       }
+ 
+-      if (S_ISDIR(c->stat.mode) || c->stat.nlink == 1 || to_index)
++      if (S_ISDIR(c->stat.mode) || c->stat.nlink == 1 || to_index) {
++              fh = ovl_get_origin_fh(ofs, origin);
++              if (IS_ERR(fh))
++                      return PTR_ERR(fh);
++
++              /* origin_fh may be NULL */
++              c->origin_fh = fh;
+               c->origin = true;
++      }
+ 
+       if (to_index) {
+               c->destdir = ovl_indexdir(c->dentry->d_sb);
+-              err = ovl_get_index_name(ofs, c->lowerpath.dentry, &c->destname);
++              err = ovl_get_index_name(ofs, origin, &c->destname);
+               if (err)
+-                      return err;
++                      goto out_free_fh;
+       } else if (WARN_ON(!c->parent)) {
+               /* Disconnected dentry must be copied up to index dir */
+-              return -EIO;
++              err = -EIO;
++              goto out_free_fh;
+       } else {
+               /*
+                * Mark parent "impure" because it may now contain non-pure
+@@ -895,7 +906,7 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
+                */
+               err = ovl_set_impure(c->parent, c->destdir);
+               if (err)
+-                      return err;
++                      goto out_free_fh;
+       }
+ 
+       /* Should we copyup with O_TMPFILE or with workdir? */
+@@ -927,6 +938,8 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
+ out:
+       if (to_index)
+               kfree(c->destname.name);
++out_free_fh:
++      kfree(fh);
+       return err;
+ }
+ 
+diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
+index 80391c687c2a..f10ac4ae35f0 100644
+--- a/fs/overlayfs/namei.c
++++ b/fs/overlayfs/namei.c
+@@ -507,6 +507,19 @@ static int ovl_verify_fh(struct ovl_fs *ofs, struct dentry *dentry,
+       return err;
+ }
+ 
++int ovl_verify_set_fh(struct ovl_fs *ofs, struct dentry *dentry,
++                    enum ovl_xattr ox, const struct ovl_fh *fh,
++                    bool is_upper, bool set)
++{
++      int err;
++
++      err = ovl_verify_fh(ofs, dentry, ox, fh);
++      if (set && err == -ENODATA)
++              err = ovl_setxattr(ofs, dentry, ox, fh->buf, fh->fb.len);
++
++      return err;
++}
++
+ /*
+  * Verify that @real dentry matches the file handle stored in xattr @name.
+  *
+@@ -515,9 +528,9 @@ static int ovl_verify_fh(struct ovl_fs *ofs, struct dentry *dentry,
+  *
+  * Return 0 on match, -ESTALE on mismatch, -ENODATA on no xattr, < 0 on error.
+  */
+-int ovl_verify_set_fh(struct ovl_fs *ofs, struct dentry *dentry,
+-                    enum ovl_xattr ox, struct dentry *real, bool is_upper,
+-                    bool set)
++int ovl_verify_origin_xattr(struct ovl_fs *ofs, struct dentry *dentry,
++                          enum ovl_xattr ox, struct dentry *real,
++                          bool is_upper, bool set)
+ {
+       struct inode *inode;
+       struct ovl_fh *fh;
+@@ -530,9 +543,7 @@ int ovl_verify_set_fh(struct ovl_fs *ofs, struct dentry *dentry,
+               goto fail;
+       }
+ 
+-      err = ovl_verify_fh(ofs, dentry, ox, fh);
+-      if (set && err == -ENODATA)
+-              err = ovl_setxattr(ofs, dentry, ox, fh->buf, fh->fb.len);
++      err = ovl_verify_set_fh(ofs, dentry, ox, fh, is_upper, set);
+       if (err)
+               goto fail;
+ 
+@@ -548,6 +559,7 @@ int ovl_verify_set_fh(struct ovl_fs *ofs, struct dentry *dentry,
+       goto out;
+ }
+ 
++
+ /* Get upper dentry from index */
+ struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index,
+                              bool connected)
+@@ -684,7 +696,7 @@ int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index)
+       goto out;
+ }
+ 
+-static int ovl_get_index_name_fh(struct ovl_fh *fh, struct qstr *name)
++int ovl_get_index_name_fh(const struct ovl_fh *fh, struct qstr *name)
+ {
+       char *n, *s;
+ 
+@@ -873,20 +885,27 @@ int ovl_path_next(int idx, struct dentry *dentry, struct path *path)
+ static int ovl_fix_origin(struct ovl_fs *ofs, struct dentry *dentry,
+                         struct dentry *lower, struct dentry *upper)
+ {
++      const struct ovl_fh *fh;
+       int err;
+ 
+       if (ovl_check_origin_xattr(ofs, upper))
+               return 0;
+ 
++      fh = ovl_get_origin_fh(ofs, lower);
++      if (IS_ERR(fh))
++              return PTR_ERR(fh);
++
+       err = ovl_want_write(dentry);
+       if (err)
+-              return err;
++              goto out;
+ 
+-      err = ovl_set_origin(ofs, lower, upper);
++      err = ovl_set_origin_fh(ofs, fh, upper);
+       if (!err)
+               err = ovl_set_impure(dentry->d_parent, upper->d_parent);
+ 
+       ovl_drop_write(dentry);
++out:
++      kfree(fh);
+       return err;
+ }
+ 
+diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
+index 09ca82ed0f8c..61e03d664d7d 100644
+--- a/fs/overlayfs/overlayfs.h
++++ b/fs/overlayfs/overlayfs.h
+@@ -632,11 +632,15 @@ struct dentry *ovl_decode_real_fh(struct ovl_fs *ofs, struct ovl_fh *fh,
+ int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected,
+                       struct dentry *upperdentry, struct ovl_path **stackp);
+ int ovl_verify_set_fh(struct ovl_fs *ofs, struct dentry *dentry,
+-                    enum ovl_xattr ox, struct dentry *real, bool is_upper,
+-                    bool set);
++                    enum ovl_xattr ox, const struct ovl_fh *fh,
++                    bool is_upper, bool set);
++int ovl_verify_origin_xattr(struct ovl_fs *ofs, struct dentry *dentry,
++                          enum ovl_xattr ox, struct dentry *real,
++                          bool is_upper, bool set);
+ struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index,
+                              bool connected);
+ int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index);
++int ovl_get_index_name_fh(const struct ovl_fh *fh, struct qstr *name);
+ int ovl_get_index_name(struct ovl_fs *ofs, struct dentry *origin,
+                      struct qstr *name);
+ struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh);
+@@ -648,17 +652,24 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
+                         unsigned int flags);
+ bool ovl_lower_positive(struct dentry *dentry);
+ 
++static inline int ovl_verify_origin_fh(struct ovl_fs *ofs, struct dentry *upper,
++                                     const struct ovl_fh *fh, bool set)
++{
++      return ovl_verify_set_fh(ofs, upper, OVL_XATTR_ORIGIN, fh, false, set);
++}
++
+ static inline int ovl_verify_origin(struct ovl_fs *ofs, struct dentry *upper,
+                                   struct dentry *origin, bool set)
+ {
+-      return ovl_verify_set_fh(ofs, upper, OVL_XATTR_ORIGIN, origin,
+-                               false, set);
++      return ovl_verify_origin_xattr(ofs, upper, OVL_XATTR_ORIGIN, origin,
++                                     false, set);
+ }
+ 
+ static inline int ovl_verify_upper(struct ovl_fs *ofs, struct dentry *index,
+                                  struct dentry *upper, bool set)
+ {
+-      return ovl_verify_set_fh(ofs, index, OVL_XATTR_UPPER, upper, true, set);
++      return ovl_verify_origin_xattr(ofs, index, OVL_XATTR_UPPER, upper,
++                                     true, set);
+ }
+ 
+ /* readdir.c */
+@@ -823,8 +834,9 @@ int ovl_copy_xattr(struct super_block *sb, const struct path *path, struct dentr
+ int ovl_set_attr(struct ovl_fs *ofs, struct dentry *upper, struct kstat *stat);
+ struct ovl_fh *ovl_encode_real_fh(struct ovl_fs *ofs, struct dentry *real,
+                                 bool is_upper);
+-int ovl_set_origin(struct ovl_fs *ofs, struct dentry *lower,
+-                 struct dentry *upper);
++struct ovl_fh *ovl_get_origin_fh(struct ovl_fs *ofs, struct dentry *origin);
++int ovl_set_origin_fh(struct ovl_fs *ofs, const struct ovl_fh *fh,
++                    struct dentry *upper);
+ 
+ /* export.c */
+ extern const struct export_operations ovl_export_operations;
+diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
+index 2c056d737c27..e2574034c3fa 100644
+--- a/fs/overlayfs/super.c
++++ b/fs/overlayfs/super.c
+@@ -879,15 +879,20 @@ static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs,
+ {
+       struct vfsmount *mnt = ovl_upper_mnt(ofs);
+       struct dentry *indexdir;
++      struct dentry *origin = ovl_lowerstack(oe)->dentry;
++      const struct ovl_fh *fh;
+       int err;
+ 
++      fh = ovl_get_origin_fh(ofs, origin);
++      if (IS_ERR(fh))
++              return PTR_ERR(fh);
++
+       err = mnt_want_write(mnt);
+       if (err)
+-              return err;
++              goto out_free_fh;
+ 
+       /* Verify lower root is upper root origin */
+-      err = ovl_verify_origin(ofs, upperpath->dentry,
+-                              ovl_lowerstack(oe)->dentry, true);
++      err = ovl_verify_origin_fh(ofs, upperpath->dentry, fh, true);
+       if (err) {
+               pr_err("failed to verify upper root origin\n");
+               goto out;
+@@ -919,9 +924,10 @@ static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs,
+                * directory entries.
+                */
+               if (ovl_check_origin_xattr(ofs, ofs->indexdir)) {
+-                      err = ovl_verify_set_fh(ofs, ofs->indexdir,
+-                                              OVL_XATTR_ORIGIN,
+-                                              upperpath->dentry, true, false);
++                      err = ovl_verify_origin_xattr(ofs, ofs->indexdir,
++                                                    OVL_XATTR_ORIGIN,
++                                                    upperpath->dentry, true,
++                                                    false);
+                       if (err)
+                               pr_err("failed to verify index dir 'origin' xattr\n");
+               }
+@@ -939,6 +945,8 @@ static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs,
+ 
+ out:
+       mnt_drop_write(mnt);
++out_free_fh:
++      kfree(fh);
+       return err;
+ }
+ 
+diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
+index 0bf3ffcd072f..4e6b747e0f2e 100644
+--- a/fs/overlayfs/util.c
++++ b/fs/overlayfs/util.c
+@@ -976,12 +976,18 @@ static void ovl_cleanup_index(struct dentry *dentry)
+       struct dentry *index = NULL;
+       struct inode *inode;
+       struct qstr name = { };
++      bool got_write = false;
+       int err;
+ 
+       err = ovl_get_index_name(ofs, lowerdentry, &name);
+       if (err)
+               goto fail;
+ 
++      err = ovl_want_write(dentry);
++      if (err)
++              goto fail;
++
++      got_write = true;
+       inode = d_inode(upperdentry);
+       if (!S_ISDIR(inode->i_mode) && inode->i_nlink != 1) {
+               pr_warn_ratelimited("cleanup linked index (%pd2, ino=%lu, nlink=%u)\n",
+@@ -1019,6 +1025,8 @@ static void ovl_cleanup_index(struct dentry *dentry)
+               goto fail;
+ 
+ out:
++      if (got_write)
++              ovl_drop_write(dentry);
+       kfree(name.name);
+       dput(index);
+       return;
+@@ -1089,6 +1097,8 @@ void ovl_nlink_end(struct dentry *dentry)
+ {
+       struct inode *inode = d_inode(dentry);
+ 
++      ovl_drop_write(dentry);
++
+       if (ovl_test_flag(OVL_INDEX, inode) && inode->i_nlink == 0) {
+               const struct cred *old_cred;
+ 
+-- 
+2.39.5
+
diff --git a/queue-6.6/ovl-pass-realinode-to-ovl_encode_real_fh-instead-of-.patch b/queue-6.6/ovl-pass-realinode-to-ovl_encode_real_fh-instead-of-.patch

new file mode 100644 (file)

index 0000000..c77caba
--- /dev/null
+++ b/queue-6.6/ovl-pass-realinode-to-ovl_encode_real_fh-instead-of-.patch
@@ -0,0 +1,132 @@
+From ad02d9bcf14aba6ce39a85158546330e214ac5cc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 5 Jan 2025 17:24:03 +0100
+Subject: ovl: pass realinode to ovl_encode_real_fh() instead of realdentry
+
+From: Amir Goldstein <amir73il@gmail.com>
+
+[ Upstream commit 07aeefae7ff44d80524375253980b1bdee2396b0 ]
+
+We want to be able to encode an fid from an inode with no alias.
+
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Link: https://lore.kernel.org/r/20250105162404.357058-2-amir73il@gmail.com
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Stable-dep-of: c45beebfde34 ("ovl: support encoding fid from inode with no alias")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/overlayfs/copy_up.c   | 11 ++++++-----
+ fs/overlayfs/export.c    |  5 +++--
+ fs/overlayfs/namei.c     |  4 ++--
+ fs/overlayfs/overlayfs.h |  2 +-
+ 4 files changed, 12 insertions(+), 10 deletions(-)
+
+diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
+index 5c9af24bae4a..f14c412c5609 100644
+--- a/fs/overlayfs/copy_up.c
++++ b/fs/overlayfs/copy_up.c
+@@ -371,13 +371,13 @@ int ovl_set_attr(struct ovl_fs *ofs, struct dentry *upperdentry,
+       return err;
+ }
+ 
+-struct ovl_fh *ovl_encode_real_fh(struct ovl_fs *ofs, struct dentry *real,
++struct ovl_fh *ovl_encode_real_fh(struct ovl_fs *ofs, struct inode *realinode,
+                                 bool is_upper)
+ {
+       struct ovl_fh *fh;
+       int fh_type, dwords;
+       int buflen = MAX_HANDLE_SZ;
+-      uuid_t *uuid = &real->d_sb->s_uuid;
++      uuid_t *uuid = &realinode->i_sb->s_uuid;
+       int err;
+ 
+       /* Make sure the real fid stays 32bit aligned */
+@@ -394,7 +394,8 @@ struct ovl_fh *ovl_encode_real_fh(struct ovl_fs *ofs, struct dentry *real,
+        * the price or reconnecting the dentry.
+        */
+       dwords = buflen >> 2;
+-      fh_type = exportfs_encode_fh(real, (void *)fh->fb.fid, &dwords, 0);
++      fh_type = exportfs_encode_inode_fh(realinode, (void *)fh->fb.fid,
++                                         &dwords, NULL, 0);
+       buflen = (dwords << 2);
+ 
+       err = -EIO;
+@@ -436,7 +437,7 @@ struct ovl_fh *ovl_get_origin_fh(struct ovl_fs *ofs, struct dentry *origin)
+       if (!ovl_can_decode_fh(origin->d_sb))
+               return NULL;
+ 
+-      return ovl_encode_real_fh(ofs, origin, false);
++      return ovl_encode_real_fh(ofs, d_inode(origin), false);
+ }
+ 
+ int ovl_set_origin_fh(struct ovl_fs *ofs, const struct ovl_fh *fh,
+@@ -461,7 +462,7 @@ static int ovl_set_upper_fh(struct ovl_fs *ofs, struct dentry *upper,
+       const struct ovl_fh *fh;
+       int err;
+ 
+-      fh = ovl_encode_real_fh(ofs, upper, true);
++      fh = ovl_encode_real_fh(ofs, d_inode(upper), true);
+       if (IS_ERR(fh))
+               return PTR_ERR(fh);
+ 
+diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c
+index 611ff567a1aa..c56e4e0b8054 100644
+--- a/fs/overlayfs/export.c
++++ b/fs/overlayfs/export.c
+@@ -228,6 +228,7 @@ static int ovl_check_encode_origin(struct dentry *dentry)
+ static int ovl_dentry_to_fid(struct ovl_fs *ofs, struct dentry *dentry,
+                            u32 *fid, int buflen)
+ {
++      struct inode *inode = d_inode(dentry);
+       struct ovl_fh *fh = NULL;
+       int err, enc_lower;
+       int len;
+@@ -241,8 +242,8 @@ static int ovl_dentry_to_fid(struct ovl_fs *ofs, struct dentry *dentry,
+               goto fail;
+ 
+       /* Encode an upper or lower file handle */
+-      fh = ovl_encode_real_fh(ofs, enc_lower ? ovl_dentry_lower(dentry) :
+-                              ovl_dentry_upper(dentry), !enc_lower);
++      fh = ovl_encode_real_fh(ofs, enc_lower ? ovl_inode_lower(inode) :
++                              ovl_inode_upper(inode), !enc_lower);
+       if (IS_ERR(fh))
+               return PTR_ERR(fh);
+ 
+diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
+index f10ac4ae35f0..2d2ef671b36b 100644
+--- a/fs/overlayfs/namei.c
++++ b/fs/overlayfs/namei.c
+@@ -536,7 +536,7 @@ int ovl_verify_origin_xattr(struct ovl_fs *ofs, struct dentry *dentry,
+       struct ovl_fh *fh;
+       int err;
+ 
+-      fh = ovl_encode_real_fh(ofs, real, is_upper);
++      fh = ovl_encode_real_fh(ofs, d_inode(real), is_upper);
+       err = PTR_ERR(fh);
+       if (IS_ERR(fh)) {
+               fh = NULL;
+@@ -732,7 +732,7 @@ int ovl_get_index_name(struct ovl_fs *ofs, struct dentry *origin,
+       struct ovl_fh *fh;
+       int err;
+ 
+-      fh = ovl_encode_real_fh(ofs, origin, false);
++      fh = ovl_encode_real_fh(ofs, d_inode(origin), false);
+       if (IS_ERR(fh))
+               return PTR_ERR(fh);
+ 
+diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
+index 61e03d664d7d..ca63a26a6170 100644
+--- a/fs/overlayfs/overlayfs.h
++++ b/fs/overlayfs/overlayfs.h
+@@ -832,7 +832,7 @@ int ovl_copy_up_with_data(struct dentry *dentry);
+ int ovl_maybe_copy_up(struct dentry *dentry, int flags);
+ int ovl_copy_xattr(struct super_block *sb, const struct path *path, struct dentry *new);
+ int ovl_set_attr(struct ovl_fs *ofs, struct dentry *upper, struct kstat *stat);
+-struct ovl_fh *ovl_encode_real_fh(struct ovl_fs *ofs, struct dentry *real,
++struct ovl_fh *ovl_encode_real_fh(struct ovl_fs *ofs, struct inode *realinode,
+                                 bool is_upper);
+ struct ovl_fh *ovl_get_origin_fh(struct ovl_fs *ofs, struct dentry *origin);
+ int ovl_set_origin_fh(struct ovl_fs *ofs, const struct ovl_fh *fh,
+-- 
+2.39.5
+
diff --git a/queue-6.6/ovl-support-encoding-fid-from-inode-with-no-alias.patch b/queue-6.6/ovl-support-encoding-fid-from-inode-with-no-alias.patch

new file mode 100644 (file)

index 0000000..11f077b
--- /dev/null
+++ b/queue-6.6/ovl-support-encoding-fid-from-inode-with-no-alias.patch
@@ -0,0 +1,165 @@
+From c2c403002c16898732545f6b2be22783e037f6c4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 5 Jan 2025 17:24:04 +0100
+Subject: ovl: support encoding fid from inode with no alias
+
+From: Amir Goldstein <amir73il@gmail.com>
+
+[ Upstream commit c45beebfde34aa71afbc48b2c54cdda623515037 ]
+
+Dmitry Safonov reported that a WARN_ON() assertion can be trigered by
+userspace when calling inotify_show_fdinfo() for an overlayfs watched
+inode, whose dentry aliases were discarded with drop_caches.
+
+The WARN_ON() assertion in inotify_show_fdinfo() was removed, because
+it is possible for encoding file handle to fail for other reason, but
+the impact of failing to encode an overlayfs file handle goes beyond
+this assertion.
+
+As shown in the LTP test case mentioned in the link below, failure to
+encode an overlayfs file handle from a non-aliased inode also leads to
+failure to report an fid with FAN_DELETE_SELF fanotify events.
+
+As Dmitry notes in his analyzis of the problem, ovl_encode_fh() fails
+if it cannot find an alias for the inode, but this failure can be fixed.
+ovl_encode_fh() seldom uses the alias and in the case of non-decodable
+file handles, as is often the case with fanotify fid info,
+ovl_encode_fh() never needs to use the alias to encode a file handle.
+
+Defer finding an alias until it is actually needed so ovl_encode_fh()
+will not fail in the common case of FAN_DELETE_SELF fanotify events.
+
+Fixes: 16aac5ad1fa9 ("ovl: support encoding non-decodable file handles")
+Reported-by: Dmitry Safonov <dima@arista.com>
+Closes: https://lore.kernel.org/linux-fsdevel/CAOQ4uxiie81voLZZi2zXS1BziXZCM24nXqPAxbu8kxXCUWdwOg@mail.gmail.com/
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Link: https://lore.kernel.org/r/20250105162404.357058-3-amir73il@gmail.com
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/overlayfs/export.c | 46 +++++++++++++++++++++++--------------------
+ 1 file changed, 25 insertions(+), 21 deletions(-)
+
+diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c
+index c56e4e0b8054..3a17e4366f28 100644
+--- a/fs/overlayfs/export.c
++++ b/fs/overlayfs/export.c
+@@ -181,35 +181,37 @@ static int ovl_connect_layer(struct dentry *dentry)
+  *
+  * Return 0 for upper file handle, > 0 for lower file handle or < 0 on error.
+  */
+-static int ovl_check_encode_origin(struct dentry *dentry)
++static int ovl_check_encode_origin(struct inode *inode)
+ {
+-      struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
++      struct ovl_fs *ofs = OVL_FS(inode->i_sb);
+       bool decodable = ofs->config.nfs_export;
++      struct dentry *dentry;
++      int err;
+ 
+       /* No upper layer? */
+       if (!ovl_upper_mnt(ofs))
+               return 1;
+ 
+       /* Lower file handle for non-upper non-decodable */
+-      if (!ovl_dentry_upper(dentry) && !decodable)
++      if (!ovl_inode_upper(inode) && !decodable)
+               return 1;
+ 
+       /* Upper file handle for pure upper */
+-      if (!ovl_dentry_lower(dentry))
++      if (!ovl_inode_lower(inode))
+               return 0;
+ 
+       /*
+        * Root is never indexed, so if there's an upper layer, encode upper for
+        * root.
+        */
+-      if (dentry == dentry->d_sb->s_root)
++      if (inode == d_inode(inode->i_sb->s_root))
+               return 0;
+ 
+       /*
+        * Upper decodable file handle for non-indexed upper.
+        */
+-      if (ovl_dentry_upper(dentry) && decodable &&
+-          !ovl_test_flag(OVL_INDEX, d_inode(dentry)))
++      if (ovl_inode_upper(inode) && decodable &&
++          !ovl_test_flag(OVL_INDEX, inode))
+               return 0;
+ 
+       /*
+@@ -218,17 +220,25 @@ static int ovl_check_encode_origin(struct dentry *dentry)
+        * ovl_connect_layer() will try to make origin's layer "connected" by
+        * copying up a "connectable" ancestor.
+        */
+-      if (d_is_dir(dentry) && decodable)
+-              return ovl_connect_layer(dentry);
++      if (!decodable || !S_ISDIR(inode->i_mode))
++              return 1;
++
++      dentry = d_find_any_alias(inode);
++      if (!dentry)
++              return -ENOENT;
++
++      err = ovl_connect_layer(dentry);
++      dput(dentry);
++      if (err < 0)
++              return err;
+ 
+       /* Lower file handle for indexed and non-upper dir/non-dir */
+       return 1;
+ }
+ 
+-static int ovl_dentry_to_fid(struct ovl_fs *ofs, struct dentry *dentry,
++static int ovl_dentry_to_fid(struct ovl_fs *ofs, struct inode *inode,
+                            u32 *fid, int buflen)
+ {
+-      struct inode *inode = d_inode(dentry);
+       struct ovl_fh *fh = NULL;
+       int err, enc_lower;
+       int len;
+@@ -237,7 +247,7 @@ static int ovl_dentry_to_fid(struct ovl_fs *ofs, struct dentry *dentry,
+        * Check if we should encode a lower or upper file handle and maybe
+        * copy up an ancestor to make lower file handle connectable.
+        */
+-      err = enc_lower = ovl_check_encode_origin(dentry);
++      err = enc_lower = ovl_check_encode_origin(inode);
+       if (enc_lower < 0)
+               goto fail;
+ 
+@@ -257,8 +267,8 @@ static int ovl_dentry_to_fid(struct ovl_fs *ofs, struct dentry *dentry,
+       return err;
+ 
+ fail:
+-      pr_warn_ratelimited("failed to encode file handle (%pd2, err=%i)\n",
+-                          dentry, err);
++      pr_warn_ratelimited("failed to encode file handle (ino=%lu, err=%i)\n",
++                          inode->i_ino, err);
+       goto out;
+ }
+ 
+@@ -266,19 +276,13 @@ static int ovl_encode_fh(struct inode *inode, u32 *fid, int *max_len,
+                        struct inode *parent)
+ {
+       struct ovl_fs *ofs = OVL_FS(inode->i_sb);
+-      struct dentry *dentry;
+       int bytes, buflen = *max_len << 2;
+ 
+       /* TODO: encode connectable file handles */
+       if (parent)
+               return FILEID_INVALID;
+ 
+-      dentry = d_find_any_alias(inode);
+-      if (!dentry)
+-              return FILEID_INVALID;
+-
+-      bytes = ovl_dentry_to_fid(ofs, dentry, fid, buflen);
+-      dput(dentry);
++      bytes = ovl_dentry_to_fid(ofs, inode, fid, buflen);
+       if (bytes <= 0)
+               return FILEID_INVALID;
+ 
+-- 
+2.39.5
+
diff --git a/queue-6.6/series b/queue-6.6/series

index 543b9c6632ace5e3cac095791eecb719c1e95d61..bb22e19541143c835038ba93284e12415c95de6a 100644 (file)
--- a/queue-6.6/series
+++ b/queue-6.6/series
@@ -1,2 +1,12 @@
  memblock-make-memblock_set_node-also-warn-about-use-.patch
  memblock-use-numa_valid_node-helper-to-check-for-invalid-node-id.patch
+jbd2-increase-io-priority-for-writing-revoke-records.patch
+jbd2-flush-filesystem-device-before-updating-tail-se.patch
+dm-array-fix-releasing-a-faulty-array-block-twice-in.patch
+dm-array-fix-unreleased-btree-blocks-on-closing-a-fa.patch
+dm-array-fix-cursor-index-when-skipping-across-block.patch
+exfat-fix-the-infinite-loop-in-exfat_readdir.patch
+exfat-fix-the-infinite-loop-in-__exfat_free_cluster.patch
+ovl-do-not-encode-lower-fh-with-upper-sb_writers-hel.patch
+ovl-pass-realinode-to-ovl_encode_real_fh-instead-of-.patch
+ovl-support-encoding-fid-from-inode-with-no-alias.patch
author	Sasha Levin <sashal@kernel.org>
	Thu, 9 Jan 2025 13:52:24 +0000 (08:52 -0500)
committer	Sasha Levin <sashal@kernel.org>
	Thu, 9 Jan 2025 13:52:24 +0000 (08:52 -0500)
queue-6.6/dm-array-fix-cursor-index-when-skipping-across-block.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/dm-array-fix-releasing-a-faulty-array-block-twice-in.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/dm-array-fix-unreleased-btree-blocks-on-closing-a-fa.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/exfat-fix-the-infinite-loop-in-__exfat_free_cluster.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/exfat-fix-the-infinite-loop-in-exfat_readdir.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/jbd2-flush-filesystem-device-before-updating-tail-se.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/jbd2-increase-io-priority-for-writing-revoke-records.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/ovl-do-not-encode-lower-fh-with-upper-sb_writers-hel.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/ovl-pass-realinode-to-ovl_encode_real_fh-instead-of-.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/ovl-support-encoding-fid-from-inode-with-no-alias.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/series		patch \| blob \| blame \| history