From 564db2e197e1861fee968f43e38c1f7da901a97e Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 27 Jan 2024 13:18:51 -0800 Subject: [PATCH] 5.15-stable patches added patches: btrfs-fix-infinite-directory-reads.patch btrfs-fix-race-between-reading-a-directory-and-adding-entries-to-it.patch btrfs-refresh-dir-last-index-during-a-rewinddir-3-call.patch btrfs-set-last-dir-index-to-the-current-last-index-when-opening-dir.patch --- .../btrfs-fix-infinite-directory-reads.patch | 313 ++++++++++++++++++ ...a-directory-and-adding-entries-to-it.patch | 148 +++++++++ ...last-index-during-a-rewinddir-3-call.patch | 106 ++++++ ...-current-last-index-when-opening-dir.patch | 91 +++++ queue-5.15/series | 4 + 5 files changed, 662 insertions(+) create mode 100644 queue-5.15/btrfs-fix-infinite-directory-reads.patch create mode 100644 queue-5.15/btrfs-fix-race-between-reading-a-directory-and-adding-entries-to-it.patch create mode 100644 queue-5.15/btrfs-refresh-dir-last-index-during-a-rewinddir-3-call.patch create mode 100644 queue-5.15/btrfs-set-last-dir-index-to-the-current-last-index-when-opening-dir.patch diff --git a/queue-5.15/btrfs-fix-infinite-directory-reads.patch b/queue-5.15/btrfs-fix-infinite-directory-reads.patch new file mode 100644 index 00000000000..f68843c3dba --- /dev/null +++ b/queue-5.15/btrfs-fix-infinite-directory-reads.patch @@ -0,0 +1,313 @@ +From stable+bounces-15798-greg=kroah.com@vger.kernel.org Thu Jan 25 04:00:31 2024 +From: fdmanana@kernel.org +Date: Thu, 25 Jan 2024 11:59:35 +0000 +Subject: btrfs: fix infinite directory reads +To: linux-btrfs@vger.kernel.org +Cc: erosca@de.adit-jv.com, Maksim.Paimushkin@se.bosch.com, Matthias.Thomae@de.bosch.com, Sebastian.Unger@bosch.com, Dirk.Behme@de.bosch.com, Eugeniu.Rosca@bosch.com, wqu@suse.com, dsterba@suse.com, stable@vger.kernel.org, Filipe Manana , Rob Landley +Message-ID: + +From: Filipe Manana + +commit 9b378f6ad48cfa195ed868db9123c09ee7ec5ea2 upstream. + +The readdir implementation currently processes always up to the last index +it finds. This however can result in an infinite loop if the directory has +a large number of entries such that they won't all fit in the given buffer +passed to the readdir callback, that is, dir_emit() returns a non-zero +value. Because in that case readdir() will be called again and if in the +meanwhile new directory entries were added and we still can't put all the +remaining entries in the buffer, we keep repeating this over and over. + +The following C program and test script reproduce the problem: + + $ cat /mnt/readdir_prog.c + #include + #include + #include + + int main(int argc, char *argv[]) + { + DIR *dir = opendir("."); + struct dirent *dd; + + while ((dd = readdir(dir))) { + printf("%s\n", dd->d_name); + rename(dd->d_name, "TEMPFILE"); + rename("TEMPFILE", dd->d_name); + } + closedir(dir); + } + + $ gcc -o /mnt/readdir_prog /mnt/readdir_prog.c + + $ cat test.sh + #!/bin/bash + + DEV=/dev/sdi + MNT=/mnt/sdi + + mkfs.btrfs -f $DEV &> /dev/null + #mkfs.xfs -f $DEV &> /dev/null + #mkfs.ext4 -F $DEV &> /dev/null + + mount $DEV $MNT + + mkdir $MNT/testdir + for ((i = 1; i <= 2000; i++)); do + echo -n > $MNT/testdir/file_$i + done + + cd $MNT/testdir + /mnt/readdir_prog + + cd /mnt + + umount $MNT + +This behaviour is surprising to applications and it's unlike ext4, xfs, +tmpfs, vfat and other filesystems, which always finish. In this case where +new entries were added due to renames, some file names may be reported +more than once, but this varies according to each filesystem - for example +ext4 never reported the same file more than once while xfs reports the +first 13 file names twice. + +So change our readdir implementation to track the last index number when +opendir() is called and then make readdir() never process beyond that +index number. This gives the same behaviour as ext4. + +Reported-by: Rob Landley +Link: https://lore.kernel.org/linux-btrfs/2c8c55ec-04c6-e0dc-9c5c-8c7924778c35@landley.net/ +Link: https://bugzilla.kernel.org/show_bug.cgi?id=217681 +CC: stable@vger.kernel.org # 5.15 +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +[ Resolve a conflict due to member changes in 96d89923fa94 ] +Signed-off-by: Qu Wenruo +Reviewed-by: Eugeniu Rosca +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/ctree.h | 1 + fs/btrfs/delayed-inode.c | 5 + + fs/btrfs/delayed-inode.h | 1 + fs/btrfs/inode.c | 131 ++++++++++++++++++++++++++++------------------- + 4 files changed, 84 insertions(+), 54 deletions(-) + +--- a/fs/btrfs/ctree.h ++++ b/fs/btrfs/ctree.h +@@ -1361,6 +1361,7 @@ struct btrfs_drop_extents_args { + + struct btrfs_file_private { + void *filldir_buf; ++ u64 last_index; + }; + + +--- a/fs/btrfs/delayed-inode.c ++++ b/fs/btrfs/delayed-inode.c +@@ -1513,6 +1513,7 @@ int btrfs_inode_delayed_dir_index_count( + } + + bool btrfs_readdir_get_delayed_items(struct inode *inode, ++ u64 last_index, + struct list_head *ins_list, + struct list_head *del_list) + { +@@ -1532,14 +1533,14 @@ bool btrfs_readdir_get_delayed_items(str + + mutex_lock(&delayed_node->mutex); + item = __btrfs_first_delayed_insertion_item(delayed_node); +- while (item) { ++ while (item && item->key.offset <= last_index) { + refcount_inc(&item->refs); + list_add_tail(&item->readdir_list, ins_list); + item = __btrfs_next_delayed_item(item); + } + + item = __btrfs_first_delayed_deletion_item(delayed_node); +- while (item) { ++ while (item && item->key.offset <= last_index) { + refcount_inc(&item->refs); + list_add_tail(&item->readdir_list, del_list); + item = __btrfs_next_delayed_item(item); +--- a/fs/btrfs/delayed-inode.h ++++ b/fs/btrfs/delayed-inode.h +@@ -123,6 +123,7 @@ void btrfs_destroy_delayed_inodes(struct + + /* Used for readdir() */ + bool btrfs_readdir_get_delayed_items(struct inode *inode, ++ u64 last_index, + struct list_head *ins_list, + struct list_head *del_list); + void btrfs_readdir_put_delayed_items(struct inode *inode, +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -6122,6 +6122,74 @@ static struct dentry *btrfs_lookup(struc + } + + /* ++ * Find the highest existing sequence number in a directory and then set the ++ * in-memory index_cnt variable to the first free sequence number. ++ */ ++static int btrfs_set_inode_index_count(struct btrfs_inode *inode) ++{ ++ struct btrfs_root *root = inode->root; ++ struct btrfs_key key, found_key; ++ struct btrfs_path *path; ++ struct extent_buffer *leaf; ++ int ret; ++ ++ key.objectid = btrfs_ino(inode); ++ key.type = BTRFS_DIR_INDEX_KEY; ++ key.offset = (u64)-1; ++ ++ path = btrfs_alloc_path(); ++ if (!path) ++ return -ENOMEM; ++ ++ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); ++ if (ret < 0) ++ goto out; ++ /* FIXME: we should be able to handle this */ ++ if (ret == 0) ++ goto out; ++ ret = 0; ++ ++ if (path->slots[0] == 0) { ++ inode->index_cnt = BTRFS_DIR_START_INDEX; ++ goto out; ++ } ++ ++ path->slots[0]--; ++ ++ leaf = path->nodes[0]; ++ btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); ++ ++ if (found_key.objectid != btrfs_ino(inode) || ++ found_key.type != BTRFS_DIR_INDEX_KEY) { ++ inode->index_cnt = BTRFS_DIR_START_INDEX; ++ goto out; ++ } ++ ++ inode->index_cnt = found_key.offset + 1; ++out: ++ btrfs_free_path(path); ++ return ret; ++} ++ ++static int btrfs_get_dir_last_index(struct btrfs_inode *dir, u64 *index) ++{ ++ if (dir->index_cnt == (u64)-1) { ++ int ret; ++ ++ ret = btrfs_inode_delayed_dir_index_count(dir); ++ if (ret) { ++ ret = btrfs_set_inode_index_count(dir); ++ if (ret) ++ return ret; ++ } ++ } ++ ++ *index = dir->index_cnt; ++ ++ return 0; ++} ++ ++/* + * All this infrastructure exists because dir_emit can fault, and we are holding + * the tree lock when doing readdir. For now just allocate a buffer and copy + * our information into that, and then dir_emit from the buffer. This is +@@ -6133,10 +6201,17 @@ static struct dentry *btrfs_lookup(struc + static int btrfs_opendir(struct inode *inode, struct file *file) + { + struct btrfs_file_private *private; ++ u64 last_index; ++ int ret; ++ ++ ret = btrfs_get_dir_last_index(BTRFS_I(inode), &last_index); ++ if (ret) ++ return ret; + + private = kzalloc(sizeof(struct btrfs_file_private), GFP_KERNEL); + if (!private) + return -ENOMEM; ++ private->last_index = last_index; + private->filldir_buf = kzalloc(PAGE_SIZE, GFP_KERNEL); + if (!private->filldir_buf) { + kfree(private); +@@ -6205,7 +6280,8 @@ static int btrfs_real_readdir(struct fil + + INIT_LIST_HEAD(&ins_list); + INIT_LIST_HEAD(&del_list); +- put = btrfs_readdir_get_delayed_items(inode, &ins_list, &del_list); ++ put = btrfs_readdir_get_delayed_items(inode, private->last_index, ++ &ins_list, &del_list); + + again: + key.type = BTRFS_DIR_INDEX_KEY; +@@ -6238,6 +6314,8 @@ again: + break; + if (found_key.offset < ctx->pos) + goto next; ++ if (found_key.offset > private->last_index) ++ break; + if (btrfs_should_delete_dir_index(&del_list, found_key.offset)) + goto next; + di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); +@@ -6372,57 +6450,6 @@ static int btrfs_update_time(struct inod + } + + /* +- * find the highest existing sequence number in a directory +- * and then set the in-memory index_cnt variable to reflect +- * free sequence numbers +- */ +-static int btrfs_set_inode_index_count(struct btrfs_inode *inode) +-{ +- struct btrfs_root *root = inode->root; +- struct btrfs_key key, found_key; +- struct btrfs_path *path; +- struct extent_buffer *leaf; +- int ret; +- +- key.objectid = btrfs_ino(inode); +- key.type = BTRFS_DIR_INDEX_KEY; +- key.offset = (u64)-1; +- +- path = btrfs_alloc_path(); +- if (!path) +- return -ENOMEM; +- +- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); +- if (ret < 0) +- goto out; +- /* FIXME: we should be able to handle this */ +- if (ret == 0) +- goto out; +- ret = 0; +- +- if (path->slots[0] == 0) { +- inode->index_cnt = BTRFS_DIR_START_INDEX; +- goto out; +- } +- +- path->slots[0]--; +- +- leaf = path->nodes[0]; +- btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); +- +- if (found_key.objectid != btrfs_ino(inode) || +- found_key.type != BTRFS_DIR_INDEX_KEY) { +- inode->index_cnt = BTRFS_DIR_START_INDEX; +- goto out; +- } +- +- inode->index_cnt = found_key.offset + 1; +-out: +- btrfs_free_path(path); +- return ret; +-} +- +-/* + * helper to find a free sequence number in a given directory. This current + * code is very simple, later versions will do smarter things in the btree + */ diff --git a/queue-5.15/btrfs-fix-race-between-reading-a-directory-and-adding-entries-to-it.patch b/queue-5.15/btrfs-fix-race-between-reading-a-directory-and-adding-entries-to-it.patch new file mode 100644 index 00000000000..ee8405981ac --- /dev/null +++ b/queue-5.15/btrfs-fix-race-between-reading-a-directory-and-adding-entries-to-it.patch @@ -0,0 +1,148 @@ +From stable+bounces-15801-greg=kroah.com@vger.kernel.org Thu Jan 25 04:00:50 2024 +From: fdmanana@kernel.org +Date: Thu, 25 Jan 2024 11:59:38 +0000 +Subject: btrfs: fix race between reading a directory and adding entries to it +To: linux-btrfs@vger.kernel.org +Cc: erosca@de.adit-jv.com, Maksim.Paimushkin@se.bosch.com, Matthias.Thomae@de.bosch.com, Sebastian.Unger@bosch.com, Dirk.Behme@de.bosch.com, Eugeniu.Rosca@bosch.com, wqu@suse.com, dsterba@suse.com, stable@vger.kernel.org, Filipe Manana , ken , syzbot+d13490c82ad5353c779d@syzkaller.appspotmail.com +Message-ID: <1fd8f27289a8608c77f66c065d6fda87a7d89628.1706183427.git.fdmanana@suse.com> + +From: Filipe Manana + +commit 8e7f82deb0c0386a03b62e30082574347f8b57d5 upstream. + +When opening a directory (opendir(3)) or rewinding it (rewinddir(3)), we +are not holding the directory's inode locked, and this can result in later +attempting to add two entries to the directory with the same index number, +resulting in a transaction abort, with -EEXIST (-17), when inserting the +second delayed dir index. This results in a trace like the following: + + Sep 11 22:34:59 myhostname kernel: BTRFS error (device dm-3): err add delayed dir index item(name: cockroach-stderr.log) into the insertion tree of the delayed node(root id: 5, inode id: 4539217, errno: -17) + Sep 11 22:34:59 myhostname kernel: ------------[ cut here ]------------ + Sep 11 22:34:59 myhostname kernel: kernel BUG at fs/btrfs/delayed-inode.c:1504! + Sep 11 22:34:59 myhostname kernel: invalid opcode: 0000 [#1] PREEMPT SMP NOPTI + Sep 11 22:34:59 myhostname kernel: CPU: 0 PID: 7159 Comm: cockroach Not tainted 6.4.15-200.fc38.x86_64 #1 + Sep 11 22:34:59 myhostname kernel: Hardware name: ASUS ESC500 G3/P9D WS, BIOS 2402 06/27/2018 + Sep 11 22:34:59 myhostname kernel: RIP: 0010:btrfs_insert_delayed_dir_index+0x1da/0x260 + Sep 11 22:34:59 myhostname kernel: Code: eb dd 48 (...) + Sep 11 22:34:59 myhostname kernel: RSP: 0000:ffffa9980e0fbb28 EFLAGS: 00010282 + Sep 11 22:34:59 myhostname kernel: RAX: 0000000000000000 RBX: ffff8b10b8f4a3c0 RCX: 0000000000000000 + Sep 11 22:34:59 myhostname kernel: RDX: 0000000000000000 RSI: ffff8b177ec21540 RDI: ffff8b177ec21540 + Sep 11 22:34:59 myhostname kernel: RBP: ffff8b110cf80888 R08: 0000000000000000 R09: ffffa9980e0fb938 + Sep 11 22:34:59 myhostname kernel: R10: 0000000000000003 R11: ffffffff86146508 R12: 0000000000000014 + Sep 11 22:34:59 myhostname kernel: R13: ffff8b1131ae5b40 R14: ffff8b10b8f4a418 R15: 00000000ffffffef + Sep 11 22:34:59 myhostname kernel: FS: 00007fb14a7fe6c0(0000) GS:ffff8b177ec00000(0000) knlGS:0000000000000000 + Sep 11 22:34:59 myhostname kernel: CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + Sep 11 22:34:59 myhostname kernel: CR2: 000000c00143d000 CR3: 00000001b3b4e002 CR4: 00000000001706f0 + Sep 11 22:34:59 myhostname kernel: Call Trace: + Sep 11 22:34:59 myhostname kernel: + Sep 11 22:34:59 myhostname kernel: ? die+0x36/0x90 + Sep 11 22:34:59 myhostname kernel: ? do_trap+0xda/0x100 + Sep 11 22:34:59 myhostname kernel: ? btrfs_insert_delayed_dir_index+0x1da/0x260 + Sep 11 22:34:59 myhostname kernel: ? do_error_trap+0x6a/0x90 + Sep 11 22:34:59 myhostname kernel: ? btrfs_insert_delayed_dir_index+0x1da/0x260 + Sep 11 22:34:59 myhostname kernel: ? exc_invalid_op+0x50/0x70 + Sep 11 22:34:59 myhostname kernel: ? btrfs_insert_delayed_dir_index+0x1da/0x260 + Sep 11 22:34:59 myhostname kernel: ? asm_exc_invalid_op+0x1a/0x20 + Sep 11 22:34:59 myhostname kernel: ? btrfs_insert_delayed_dir_index+0x1da/0x260 + Sep 11 22:34:59 myhostname kernel: ? btrfs_insert_delayed_dir_index+0x1da/0x260 + Sep 11 22:34:59 myhostname kernel: btrfs_insert_dir_item+0x200/0x280 + Sep 11 22:34:59 myhostname kernel: btrfs_add_link+0xab/0x4f0 + Sep 11 22:34:59 myhostname kernel: ? ktime_get_real_ts64+0x47/0xe0 + Sep 11 22:34:59 myhostname kernel: btrfs_create_new_inode+0x7cd/0xa80 + Sep 11 22:34:59 myhostname kernel: btrfs_symlink+0x190/0x4d0 + Sep 11 22:34:59 myhostname kernel: ? schedule+0x5e/0xd0 + Sep 11 22:34:59 myhostname kernel: ? __d_lookup+0x7e/0xc0 + Sep 11 22:34:59 myhostname kernel: vfs_symlink+0x148/0x1e0 + Sep 11 22:34:59 myhostname kernel: do_symlinkat+0x130/0x140 + Sep 11 22:34:59 myhostname kernel: __x64_sys_symlinkat+0x3d/0x50 + Sep 11 22:34:59 myhostname kernel: do_syscall_64+0x5d/0x90 + Sep 11 22:34:59 myhostname kernel: ? syscall_exit_to_user_mode+0x2b/0x40 + Sep 11 22:34:59 myhostname kernel: ? do_syscall_64+0x6c/0x90 + Sep 11 22:34:59 myhostname kernel: entry_SYSCALL_64_after_hwframe+0x72/0xdc + +The race leading to the problem happens like this: + +1) Directory inode X is loaded into memory, its ->index_cnt field is + initialized to (u64)-1 (at btrfs_alloc_inode()); + +2) Task A is adding a new file to directory X, holding its vfs inode lock, + and calls btrfs_set_inode_index() to get an index number for the entry. + + Because the inode's index_cnt field is set to (u64)-1 it calls + btrfs_inode_delayed_dir_index_count() which fails because no dir index + entries were added yet to the delayed inode and then it calls + btrfs_set_inode_index_count(). This functions finds the last dir index + key and then sets index_cnt to that index value + 1. It found that the + last index key has an offset of 100. However before it assigns a value + of 101 to index_cnt... + +3) Task B calls opendir(3), ending up at btrfs_opendir(), where the VFS + lock for inode X is not taken, so it calls btrfs_get_dir_last_index() + and sees index_cnt still with a value of (u64)-1. Because of that it + calls btrfs_inode_delayed_dir_index_count() which fails since no dir + index entries were added to the delayed inode yet, and then it also + calls btrfs_set_inode_index_count(). This also finds that the last + index key has an offset of 100, and before it assigns the value 101 + to the index_cnt field of inode X... + +4) Task A assigns a value of 101 to index_cnt. And then the code flow + goes to btrfs_set_inode_index() where it increments index_cnt from + 101 to 102. Task A then creates a delayed dir index entry with a + sequence number of 101 and adds it to the delayed inode; + +5) Task B assigns 101 to the index_cnt field of inode X; + +6) At some later point when someone tries to add a new entry to the + directory, btrfs_set_inode_index() will return 101 again and shortly + after an attempt to add another delayed dir index key with index + number 101 will fail with -EEXIST resulting in a transaction abort. + +Fix this by locking the inode at btrfs_get_dir_last_index(), which is only +only used when opening a directory or attempting to lseek on it. + +Reported-by: ken +Link: https://lore.kernel.org/linux-btrfs/CAE6xmH+Lp=Q=E61bU+v9eWX8gYfLvu6jLYxjxjFpo3zHVPR0EQ@mail.gmail.com/ +Reported-by: syzbot+d13490c82ad5353c779d@syzkaller.appspotmail.com +Link: https://lore.kernel.org/linux-btrfs/00000000000036e1290603e097e0@google.com/ +Fixes: 9b378f6ad48c ("btrfs: fix infinite directory reads") +CC: stable@vger.kernel.org # 6.5+ +Signed-off-by: Filipe Manana +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Reviewed-by: Eugeniu Rosca +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/inode.c | 11 +++++++---- + 1 file changed, 7 insertions(+), 4 deletions(-) + +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -6173,21 +6173,24 @@ out: + + static int btrfs_get_dir_last_index(struct btrfs_inode *dir, u64 *index) + { +- if (dir->index_cnt == (u64)-1) { +- int ret; ++ int ret = 0; + ++ btrfs_inode_lock(&dir->vfs_inode, 0); ++ if (dir->index_cnt == (u64)-1) { + ret = btrfs_inode_delayed_dir_index_count(dir); + if (ret) { + ret = btrfs_set_inode_index_count(dir); + if (ret) +- return ret; ++ goto out; + } + } + + /* index_cnt is the index number of next new entry, so decrement it. */ + *index = dir->index_cnt - 1; ++out: ++ btrfs_inode_unlock(&dir->vfs_inode, 0); + +- return 0; ++ return ret; + } + + /* diff --git a/queue-5.15/btrfs-refresh-dir-last-index-during-a-rewinddir-3-call.patch b/queue-5.15/btrfs-refresh-dir-last-index-during-a-rewinddir-3-call.patch new file mode 100644 index 00000000000..f39541395e9 --- /dev/null +++ b/queue-5.15/btrfs-refresh-dir-last-index-during-a-rewinddir-3-call.patch @@ -0,0 +1,106 @@ +From stable+bounces-15800-greg=kroah.com@vger.kernel.org Thu Jan 25 04:01:48 2024 +From: fdmanana@kernel.org +Date: Thu, 25 Jan 2024 11:59:37 +0000 +Subject: btrfs: refresh dir last index during a rewinddir(3) call +To: linux-btrfs@vger.kernel.org +Cc: erosca@de.adit-jv.com, Maksim.Paimushkin@se.bosch.com, Matthias.Thomae@de.bosch.com, Sebastian.Unger@bosch.com, Dirk.Behme@de.bosch.com, Eugeniu.Rosca@bosch.com, wqu@suse.com, dsterba@suse.com, stable@vger.kernel.org, Filipe Manana , Ian Johnson +Message-ID: + +From: Filipe Manana + +commit e60aa5da14d01fed8411202dbe4adf6c44bd2a57 upstream. + +When opening a directory we find what's the index of its last entry and +then store it in the directory's file handle private data (struct +btrfs_file_private::last_index), so that in the case new directory entries +are added to a directory after an opendir(3) call we don't end up in an +infinite loop (see commit 9b378f6ad48c ("btrfs: fix infinite directory +reads")) when calling readdir(3). + +However once rewinddir(3) is called, POSIX states [1] that any new +directory entries added after the previous opendir(3) call, must be +returned by subsequent calls to readdir(3): + + "The rewinddir() function shall reset the position of the directory + stream to which dirp refers to the beginning of the directory. + It shall also cause the directory stream to refer to the current + state of the corresponding directory, as a call to opendir() would + have done." + +We currently don't refresh the last_index field of the struct +btrfs_file_private associated to the directory, so after a rewinddir(3) +we are not returning any new entries added after the opendir(3) call. + +Fix this by finding the current last index of the directory when llseek +is called against the directory. + +This can be reproduced by the following C program provided by Ian Johnson: + + #include + #include + + int main(void) { + DIR *dir = opendir("test"); + + FILE *file; + file = fopen("test/1", "w"); + fwrite("1", 1, 1, file); + fclose(file); + + file = fopen("test/2", "w"); + fwrite("2", 1, 1, file); + fclose(file); + + rewinddir(dir); + + struct dirent *entry; + while ((entry = readdir(dir))) { + printf("%s\n", entry->d_name); + } + closedir(dir); + return 0; + } + +Reported-by: Ian Johnson +Link: https://lore.kernel.org/linux-btrfs/YR1P0S.NGASEG570GJ8@ianjohnson.dev/ +Fixes: 9b378f6ad48c ("btrfs: fix infinite directory reads") +CC: stable@vger.kernel.org # 6.5+ +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +Reviewed-by: Eugeniu Rosca +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/inode.c | 15 ++++++++++++++- + 1 file changed, 14 insertions(+), 1 deletion(-) + +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -6222,6 +6222,19 @@ static int btrfs_opendir(struct inode *i + return 0; + } + ++static loff_t btrfs_dir_llseek(struct file *file, loff_t offset, int whence) ++{ ++ struct btrfs_file_private *private = file->private_data; ++ int ret; ++ ++ ret = btrfs_get_dir_last_index(BTRFS_I(file_inode(file)), ++ &private->last_index); ++ if (ret) ++ return ret; ++ ++ return generic_file_llseek(file, offset, whence); ++} ++ + struct dir_entry { + u64 ino; + u64 offset; +@@ -11087,7 +11100,7 @@ static const struct inode_operations btr + }; + + static const struct file_operations btrfs_dir_file_operations = { +- .llseek = generic_file_llseek, ++ .llseek = btrfs_dir_llseek, + .read = generic_read_dir, + .iterate_shared = btrfs_real_readdir, + .open = btrfs_opendir, diff --git a/queue-5.15/btrfs-set-last-dir-index-to-the-current-last-index-when-opening-dir.patch b/queue-5.15/btrfs-set-last-dir-index-to-the-current-last-index-when-opening-dir.patch new file mode 100644 index 00000000000..d9d98877d0a --- /dev/null +++ b/queue-5.15/btrfs-set-last-dir-index-to-the-current-last-index-when-opening-dir.patch @@ -0,0 +1,91 @@ +From stable+bounces-15799-greg=kroah.com@vger.kernel.org Thu Jan 25 04:02:25 2024 +From: fdmanana@kernel.org +Date: Thu, 25 Jan 2024 11:59:36 +0000 +Subject: btrfs: set last dir index to the current last index when opening dir +To: linux-btrfs@vger.kernel.org +Cc: erosca@de.adit-jv.com, Maksim.Paimushkin@se.bosch.com, Matthias.Thomae@de.bosch.com, Sebastian.Unger@bosch.com, Dirk.Behme@de.bosch.com, Eugeniu.Rosca@bosch.com, wqu@suse.com, dsterba@suse.com, stable@vger.kernel.org, Filipe Manana +Message-ID: + +From: Filipe Manana + +commit 357950361cbc6d54fb68ed878265c647384684ae upstream. + +When opening a directory for reading it, we set the last index where we +stop iteration to the value in struct btrfs_inode::index_cnt. That value +does not match the index of the most recently added directory entry but +it's instead the index number that will be assigned the next directory +entry. + +This means that if after the call to opendir(3) new directory entries are +added, a readdir(3) call will return the first new directory entry. This +is fine because POSIX says the following [1]: + + "If a file is removed from or added to the directory after the most + recent call to opendir() or rewinddir(), whether a subsequent call to + readdir() returns an entry for that file is unspecified." + +For example for the test script from commit 9b378f6ad48c ("btrfs: fix +infinite directory reads"), where we have 2000 files in a directory, ext4 +doesn't return any new directory entry after opendir(3), while xfs returns +the first 13 new directory entries added after the opendir(3) call. + +If we move to a shorter example with an empty directory when opendir(3) is +called, and 2 files added to the directory after the opendir(3) call, then +readdir(3) on btrfs will return the first file, ext4 and xfs return the 2 +files (but in a different order). A test program for this, reported by +Ian Johnson, is the following: + + #include + #include + + int main(void) { + DIR *dir = opendir("test"); + + FILE *file; + file = fopen("test/1", "w"); + fwrite("1", 1, 1, file); + fclose(file); + + file = fopen("test/2", "w"); + fwrite("2", 1, 1, file); + fclose(file); + + struct dirent *entry; + while ((entry = readdir(dir))) { + printf("%s\n", entry->d_name); + } + closedir(dir); + return 0; + } + +To make this less odd, change the behaviour to never return new entries +that were added after the opendir(3) call. This is done by setting the +last_index field of the struct btrfs_file_private attached to the +directory's file handle with a value matching btrfs_inode::index_cnt +minus 1, since that value always matches the index of the next new +directory entry and not the index of the most recently added entry. + +[1] https://pubs.opengroup.org/onlinepubs/007904875/functions/readdir_r.html + +Link: https://lore.kernel.org/linux-btrfs/YR1P0S.NGASEG570GJ8@ianjohnson.dev/ +CC: stable@vger.kernel.org # 6.5+ +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +Reviewed-by: Eugeniu Rosca +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/inode.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -6184,7 +6184,8 @@ static int btrfs_get_dir_last_index(stru + } + } + +- *index = dir->index_cnt; ++ /* index_cnt is the index number of next new entry, so decrement it. */ ++ *index = dir->index_cnt - 1; + + return 0; + } diff --git a/queue-5.15/series b/queue-5.15/series index ceaab04a18a..95c81cc2fa0 100644 --- a/queue-5.15/series +++ b/queue-5.15/series @@ -72,3 +72,7 @@ net-mvpp2-clear-bm-pool-before-initialization.patch selftests-netdevsim-fix-the-udp_tunnel_nic-test.patch fjes-fix-memleaks-in-fjes_hw_setup.patch net-fec-fix-the-unhandled-context-fault-from-smmu.patch +btrfs-fix-infinite-directory-reads.patch +btrfs-set-last-dir-index-to-the-current-last-index-when-opening-dir.patch +btrfs-refresh-dir-last-index-during-a-rewinddir-3-call.patch +btrfs-fix-race-between-reading-a-directory-and-adding-entries-to-it.patch -- 2.47.3