From: Greg Kroah-Hartman Date: Sat, 27 Jan 2024 21:10:41 +0000 (-0800) Subject: 6.1-stable patches X-Git-Tag: v6.1.76~60 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=c438e7f1703f0fb8a77b14459e6bd11847b7439c;p=thirdparty%2Fkernel%2Fstable-queue.git 6.1-stable patches added patches: btrfs-fix-infinite-directory-reads.patch btrfs-fix-race-between-reading-a-directory-and-adding-entries-to-it.patch btrfs-refresh-dir-last-index-during-a-rewinddir-3-call.patch btrfs-set-last-dir-index-to-the-current-last-index-when-opening-dir.patch --- diff --git a/queue-6.1/btrfs-fix-infinite-directory-reads.patch b/queue-6.1/btrfs-fix-infinite-directory-reads.patch new file mode 100644 index 00000000000..7f38ebd59fd --- /dev/null +++ b/queue-6.1/btrfs-fix-infinite-directory-reads.patch @@ -0,0 +1,310 @@ +From stable+bounces-16087-greg=kroah.com@vger.kernel.org Sat Jan 27 10:16:03 2024 +From: fdmanana@kernel.org +Date: Sat, 27 Jan 2024 18:15:39 +0000 +Subject: btrfs: fix infinite directory reads +To: linux-btrfs@vger.kernel.org +Cc: stable@vger.kernel.org, Filipe Manana , Rob Landley , David Sterba +Message-ID: + +From: Filipe Manana + +commit 9b378f6ad48cfa195ed868db9123c09ee7ec5ea2 upstream. + +The readdir implementation currently processes always up to the last index +it finds. This however can result in an infinite loop if the directory has +a large number of entries such that they won't all fit in the given buffer +passed to the readdir callback, that is, dir_emit() returns a non-zero +value. Because in that case readdir() will be called again and if in the +meanwhile new directory entries were added and we still can't put all the +remaining entries in the buffer, we keep repeating this over and over. + +The following C program and test script reproduce the problem: + + $ cat /mnt/readdir_prog.c + #include + #include + #include + + int main(int argc, char *argv[]) + { + DIR *dir = opendir("."); + struct dirent *dd; + + while ((dd = readdir(dir))) { + printf("%s\n", dd->d_name); + rename(dd->d_name, "TEMPFILE"); + rename("TEMPFILE", dd->d_name); + } + closedir(dir); + } + + $ gcc -o /mnt/readdir_prog /mnt/readdir_prog.c + + $ cat test.sh + #!/bin/bash + + DEV=/dev/sdi + MNT=/mnt/sdi + + mkfs.btrfs -f $DEV &> /dev/null + #mkfs.xfs -f $DEV &> /dev/null + #mkfs.ext4 -F $DEV &> /dev/null + + mount $DEV $MNT + + mkdir $MNT/testdir + for ((i = 1; i <= 2000; i++)); do + echo -n > $MNT/testdir/file_$i + done + + cd $MNT/testdir + /mnt/readdir_prog + + cd /mnt + + umount $MNT + +This behaviour is surprising to applications and it's unlike ext4, xfs, +tmpfs, vfat and other filesystems, which always finish. In this case where +new entries were added due to renames, some file names may be reported +more than once, but this varies according to each filesystem - for example +ext4 never reported the same file more than once while xfs reports the +first 13 file names twice. + +So change our readdir implementation to track the last index number when +opendir() is called and then make readdir() never process beyond that +index number. This gives the same behaviour as ext4. + +Reported-by: Rob Landley +Link: https://lore.kernel.org/linux-btrfs/2c8c55ec-04c6-e0dc-9c5c-8c7924778c35@landley.net/ +Link: https://bugzilla.kernel.org/show_bug.cgi?id=217681 +CC: stable@vger.kernel.org # 5.15 +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/ctree.h | 1 + fs/btrfs/delayed-inode.c | 5 + + fs/btrfs/delayed-inode.h | 1 + fs/btrfs/inode.c | 131 ++++++++++++++++++++++++++++------------------- + 4 files changed, 84 insertions(+), 54 deletions(-) + +--- a/fs/btrfs/ctree.h ++++ b/fs/btrfs/ctree.h +@@ -1552,6 +1552,7 @@ struct btrfs_drop_extents_args { + + struct btrfs_file_private { + void *filldir_buf; ++ u64 last_index; + }; + + +--- a/fs/btrfs/delayed-inode.c ++++ b/fs/btrfs/delayed-inode.c +@@ -1665,6 +1665,7 @@ int btrfs_inode_delayed_dir_index_count( + } + + bool btrfs_readdir_get_delayed_items(struct inode *inode, ++ u64 last_index, + struct list_head *ins_list, + struct list_head *del_list) + { +@@ -1684,14 +1685,14 @@ bool btrfs_readdir_get_delayed_items(str + + mutex_lock(&delayed_node->mutex); + item = __btrfs_first_delayed_insertion_item(delayed_node); +- while (item) { ++ while (item && item->index <= last_index) { + refcount_inc(&item->refs); + list_add_tail(&item->readdir_list, ins_list); + item = __btrfs_next_delayed_item(item); + } + + item = __btrfs_first_delayed_deletion_item(delayed_node); +- while (item) { ++ while (item && item->index <= last_index) { + refcount_inc(&item->refs); + list_add_tail(&item->readdir_list, del_list); + item = __btrfs_next_delayed_item(item); +--- a/fs/btrfs/delayed-inode.h ++++ b/fs/btrfs/delayed-inode.h +@@ -148,6 +148,7 @@ void btrfs_destroy_delayed_inodes(struct + + /* Used for readdir() */ + bool btrfs_readdir_get_delayed_items(struct inode *inode, ++ u64 last_index, + struct list_head *ins_list, + struct list_head *del_list); + void btrfs_readdir_put_delayed_items(struct inode *inode, +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -5949,6 +5949,74 @@ static struct dentry *btrfs_lookup(struc + } + + /* ++ * Find the highest existing sequence number in a directory and then set the ++ * in-memory index_cnt variable to the first free sequence number. ++ */ ++static int btrfs_set_inode_index_count(struct btrfs_inode *inode) ++{ ++ struct btrfs_root *root = inode->root; ++ struct btrfs_key key, found_key; ++ struct btrfs_path *path; ++ struct extent_buffer *leaf; ++ int ret; ++ ++ key.objectid = btrfs_ino(inode); ++ key.type = BTRFS_DIR_INDEX_KEY; ++ key.offset = (u64)-1; ++ ++ path = btrfs_alloc_path(); ++ if (!path) ++ return -ENOMEM; ++ ++ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); ++ if (ret < 0) ++ goto out; ++ /* FIXME: we should be able to handle this */ ++ if (ret == 0) ++ goto out; ++ ret = 0; ++ ++ if (path->slots[0] == 0) { ++ inode->index_cnt = BTRFS_DIR_START_INDEX; ++ goto out; ++ } ++ ++ path->slots[0]--; ++ ++ leaf = path->nodes[0]; ++ btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); ++ ++ if (found_key.objectid != btrfs_ino(inode) || ++ found_key.type != BTRFS_DIR_INDEX_KEY) { ++ inode->index_cnt = BTRFS_DIR_START_INDEX; ++ goto out; ++ } ++ ++ inode->index_cnt = found_key.offset + 1; ++out: ++ btrfs_free_path(path); ++ return ret; ++} ++ ++static int btrfs_get_dir_last_index(struct btrfs_inode *dir, u64 *index) ++{ ++ if (dir->index_cnt == (u64)-1) { ++ int ret; ++ ++ ret = btrfs_inode_delayed_dir_index_count(dir); ++ if (ret) { ++ ret = btrfs_set_inode_index_count(dir); ++ if (ret) ++ return ret; ++ } ++ } ++ ++ *index = dir->index_cnt; ++ ++ return 0; ++} ++ ++/* + * All this infrastructure exists because dir_emit can fault, and we are holding + * the tree lock when doing readdir. For now just allocate a buffer and copy + * our information into that, and then dir_emit from the buffer. This is +@@ -5960,10 +6028,17 @@ static struct dentry *btrfs_lookup(struc + static int btrfs_opendir(struct inode *inode, struct file *file) + { + struct btrfs_file_private *private; ++ u64 last_index; ++ int ret; ++ ++ ret = btrfs_get_dir_last_index(BTRFS_I(inode), &last_index); ++ if (ret) ++ return ret; + + private = kzalloc(sizeof(struct btrfs_file_private), GFP_KERNEL); + if (!private) + return -ENOMEM; ++ private->last_index = last_index; + private->filldir_buf = kzalloc(PAGE_SIZE, GFP_KERNEL); + if (!private->filldir_buf) { + kfree(private); +@@ -6030,7 +6105,8 @@ static int btrfs_real_readdir(struct fil + + INIT_LIST_HEAD(&ins_list); + INIT_LIST_HEAD(&del_list); +- put = btrfs_readdir_get_delayed_items(inode, &ins_list, &del_list); ++ put = btrfs_readdir_get_delayed_items(inode, private->last_index, ++ &ins_list, &del_list); + + again: + key.type = BTRFS_DIR_INDEX_KEY; +@@ -6047,6 +6123,8 @@ again: + break; + if (found_key.offset < ctx->pos) + continue; ++ if (found_key.offset > private->last_index) ++ break; + if (btrfs_should_delete_dir_index(&del_list, found_key.offset)) + continue; + di = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item); +@@ -6183,57 +6261,6 @@ static int btrfs_update_time(struct inod + } + + /* +- * find the highest existing sequence number in a directory +- * and then set the in-memory index_cnt variable to reflect +- * free sequence numbers +- */ +-static int btrfs_set_inode_index_count(struct btrfs_inode *inode) +-{ +- struct btrfs_root *root = inode->root; +- struct btrfs_key key, found_key; +- struct btrfs_path *path; +- struct extent_buffer *leaf; +- int ret; +- +- key.objectid = btrfs_ino(inode); +- key.type = BTRFS_DIR_INDEX_KEY; +- key.offset = (u64)-1; +- +- path = btrfs_alloc_path(); +- if (!path) +- return -ENOMEM; +- +- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); +- if (ret < 0) +- goto out; +- /* FIXME: we should be able to handle this */ +- if (ret == 0) +- goto out; +- ret = 0; +- +- if (path->slots[0] == 0) { +- inode->index_cnt = BTRFS_DIR_START_INDEX; +- goto out; +- } +- +- path->slots[0]--; +- +- leaf = path->nodes[0]; +- btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); +- +- if (found_key.objectid != btrfs_ino(inode) || +- found_key.type != BTRFS_DIR_INDEX_KEY) { +- inode->index_cnt = BTRFS_DIR_START_INDEX; +- goto out; +- } +- +- inode->index_cnt = found_key.offset + 1; +-out: +- btrfs_free_path(path); +- return ret; +-} +- +-/* + * helper to find a free sequence number in a given directory. This current + * code is very simple, later versions will do smarter things in the btree + */ diff --git a/queue-6.1/btrfs-fix-race-between-reading-a-directory-and-adding-entries-to-it.patch b/queue-6.1/btrfs-fix-race-between-reading-a-directory-and-adding-entries-to-it.patch new file mode 100644 index 00000000000..afd7d7bb0f4 --- /dev/null +++ b/queue-6.1/btrfs-fix-race-between-reading-a-directory-and-adding-entries-to-it.patch @@ -0,0 +1,147 @@ +From stable+bounces-16090-greg=kroah.com@vger.kernel.org Sat Jan 27 10:16:21 2024 +From: fdmanana@kernel.org +Date: Sat, 27 Jan 2024 18:15:42 +0000 +Subject: btrfs: fix race between reading a directory and adding entries to it +To: linux-btrfs@vger.kernel.org +Cc: stable@vger.kernel.org, Filipe Manana , ken , syzbot+d13490c82ad5353c779d@syzkaller.appspotmail.com, David Sterba +Message-ID: <20651ef2729621cac727cec97a1f27a3143def0b.1706379057.git.fdmanana@suse.com> + +From: Filipe Manana + +commit 8e7f82deb0c0386a03b62e30082574347f8b57d5 upstream. + +When opening a directory (opendir(3)) or rewinding it (rewinddir(3)), we +are not holding the directory's inode locked, and this can result in later +attempting to add two entries to the directory with the same index number, +resulting in a transaction abort, with -EEXIST (-17), when inserting the +second delayed dir index. This results in a trace like the following: + + Sep 11 22:34:59 myhostname kernel: BTRFS error (device dm-3): err add delayed dir index item(name: cockroach-stderr.log) into the insertion tree of the delayed node(root id: 5, inode id: 4539217, errno: -17) + Sep 11 22:34:59 myhostname kernel: ------------[ cut here ]------------ + Sep 11 22:34:59 myhostname kernel: kernel BUG at fs/btrfs/delayed-inode.c:1504! + Sep 11 22:34:59 myhostname kernel: invalid opcode: 0000 [#1] PREEMPT SMP NOPTI + Sep 11 22:34:59 myhostname kernel: CPU: 0 PID: 7159 Comm: cockroach Not tainted 6.4.15-200.fc38.x86_64 #1 + Sep 11 22:34:59 myhostname kernel: Hardware name: ASUS ESC500 G3/P9D WS, BIOS 2402 06/27/2018 + Sep 11 22:34:59 myhostname kernel: RIP: 0010:btrfs_insert_delayed_dir_index+0x1da/0x260 + Sep 11 22:34:59 myhostname kernel: Code: eb dd 48 (...) + Sep 11 22:34:59 myhostname kernel: RSP: 0000:ffffa9980e0fbb28 EFLAGS: 00010282 + Sep 11 22:34:59 myhostname kernel: RAX: 0000000000000000 RBX: ffff8b10b8f4a3c0 RCX: 0000000000000000 + Sep 11 22:34:59 myhostname kernel: RDX: 0000000000000000 RSI: ffff8b177ec21540 RDI: ffff8b177ec21540 + Sep 11 22:34:59 myhostname kernel: RBP: ffff8b110cf80888 R08: 0000000000000000 R09: ffffa9980e0fb938 + Sep 11 22:34:59 myhostname kernel: R10: 0000000000000003 R11: ffffffff86146508 R12: 0000000000000014 + Sep 11 22:34:59 myhostname kernel: R13: ffff8b1131ae5b40 R14: ffff8b10b8f4a418 R15: 00000000ffffffef + Sep 11 22:34:59 myhostname kernel: FS: 00007fb14a7fe6c0(0000) GS:ffff8b177ec00000(0000) knlGS:0000000000000000 + Sep 11 22:34:59 myhostname kernel: CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + Sep 11 22:34:59 myhostname kernel: CR2: 000000c00143d000 CR3: 00000001b3b4e002 CR4: 00000000001706f0 + Sep 11 22:34:59 myhostname kernel: Call Trace: + Sep 11 22:34:59 myhostname kernel: + Sep 11 22:34:59 myhostname kernel: ? die+0x36/0x90 + Sep 11 22:34:59 myhostname kernel: ? do_trap+0xda/0x100 + Sep 11 22:34:59 myhostname kernel: ? btrfs_insert_delayed_dir_index+0x1da/0x260 + Sep 11 22:34:59 myhostname kernel: ? do_error_trap+0x6a/0x90 + Sep 11 22:34:59 myhostname kernel: ? btrfs_insert_delayed_dir_index+0x1da/0x260 + Sep 11 22:34:59 myhostname kernel: ? exc_invalid_op+0x50/0x70 + Sep 11 22:34:59 myhostname kernel: ? btrfs_insert_delayed_dir_index+0x1da/0x260 + Sep 11 22:34:59 myhostname kernel: ? asm_exc_invalid_op+0x1a/0x20 + Sep 11 22:34:59 myhostname kernel: ? btrfs_insert_delayed_dir_index+0x1da/0x260 + Sep 11 22:34:59 myhostname kernel: ? btrfs_insert_delayed_dir_index+0x1da/0x260 + Sep 11 22:34:59 myhostname kernel: btrfs_insert_dir_item+0x200/0x280 + Sep 11 22:34:59 myhostname kernel: btrfs_add_link+0xab/0x4f0 + Sep 11 22:34:59 myhostname kernel: ? ktime_get_real_ts64+0x47/0xe0 + Sep 11 22:34:59 myhostname kernel: btrfs_create_new_inode+0x7cd/0xa80 + Sep 11 22:34:59 myhostname kernel: btrfs_symlink+0x190/0x4d0 + Sep 11 22:34:59 myhostname kernel: ? schedule+0x5e/0xd0 + Sep 11 22:34:59 myhostname kernel: ? __d_lookup+0x7e/0xc0 + Sep 11 22:34:59 myhostname kernel: vfs_symlink+0x148/0x1e0 + Sep 11 22:34:59 myhostname kernel: do_symlinkat+0x130/0x140 + Sep 11 22:34:59 myhostname kernel: __x64_sys_symlinkat+0x3d/0x50 + Sep 11 22:34:59 myhostname kernel: do_syscall_64+0x5d/0x90 + Sep 11 22:34:59 myhostname kernel: ? syscall_exit_to_user_mode+0x2b/0x40 + Sep 11 22:34:59 myhostname kernel: ? do_syscall_64+0x6c/0x90 + Sep 11 22:34:59 myhostname kernel: entry_SYSCALL_64_after_hwframe+0x72/0xdc + +The race leading to the problem happens like this: + +1) Directory inode X is loaded into memory, its ->index_cnt field is + initialized to (u64)-1 (at btrfs_alloc_inode()); + +2) Task A is adding a new file to directory X, holding its vfs inode lock, + and calls btrfs_set_inode_index() to get an index number for the entry. + + Because the inode's index_cnt field is set to (u64)-1 it calls + btrfs_inode_delayed_dir_index_count() which fails because no dir index + entries were added yet to the delayed inode and then it calls + btrfs_set_inode_index_count(). This functions finds the last dir index + key and then sets index_cnt to that index value + 1. It found that the + last index key has an offset of 100. However before it assigns a value + of 101 to index_cnt... + +3) Task B calls opendir(3), ending up at btrfs_opendir(), where the VFS + lock for inode X is not taken, so it calls btrfs_get_dir_last_index() + and sees index_cnt still with a value of (u64)-1. Because of that it + calls btrfs_inode_delayed_dir_index_count() which fails since no dir + index entries were added to the delayed inode yet, and then it also + calls btrfs_set_inode_index_count(). This also finds that the last + index key has an offset of 100, and before it assigns the value 101 + to the index_cnt field of inode X... + +4) Task A assigns a value of 101 to index_cnt. And then the code flow + goes to btrfs_set_inode_index() where it increments index_cnt from + 101 to 102. Task A then creates a delayed dir index entry with a + sequence number of 101 and adds it to the delayed inode; + +5) Task B assigns 101 to the index_cnt field of inode X; + +6) At some later point when someone tries to add a new entry to the + directory, btrfs_set_inode_index() will return 101 again and shortly + after an attempt to add another delayed dir index key with index + number 101 will fail with -EEXIST resulting in a transaction abort. + +Fix this by locking the inode at btrfs_get_dir_last_index(), which is only +only used when opening a directory or attempting to lseek on it. + +Reported-by: ken +Link: https://lore.kernel.org/linux-btrfs/CAE6xmH+Lp=Q=E61bU+v9eWX8gYfLvu6jLYxjxjFpo3zHVPR0EQ@mail.gmail.com/ +Reported-by: syzbot+d13490c82ad5353c779d@syzkaller.appspotmail.com +Link: https://lore.kernel.org/linux-btrfs/00000000000036e1290603e097e0@google.com/ +Fixes: 9b378f6ad48c ("btrfs: fix infinite directory reads") +CC: stable@vger.kernel.org # 6.5+ +Signed-off-by: Filipe Manana +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/inode.c | 11 +++++++---- + 1 file changed, 7 insertions(+), 4 deletions(-) + +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -6000,21 +6000,24 @@ out: + + static int btrfs_get_dir_last_index(struct btrfs_inode *dir, u64 *index) + { +- if (dir->index_cnt == (u64)-1) { +- int ret; ++ int ret = 0; + ++ btrfs_inode_lock(&dir->vfs_inode, 0); ++ if (dir->index_cnt == (u64)-1) { + ret = btrfs_inode_delayed_dir_index_count(dir); + if (ret) { + ret = btrfs_set_inode_index_count(dir); + if (ret) +- return ret; ++ goto out; + } + } + + /* index_cnt is the index number of next new entry, so decrement it. */ + *index = dir->index_cnt - 1; ++out: ++ btrfs_inode_unlock(&dir->vfs_inode, 0); + +- return 0; ++ return ret; + } + + /* diff --git a/queue-6.1/btrfs-refresh-dir-last-index-during-a-rewinddir-3-call.patch b/queue-6.1/btrfs-refresh-dir-last-index-during-a-rewinddir-3-call.patch new file mode 100644 index 00000000000..1d6d36faa36 --- /dev/null +++ b/queue-6.1/btrfs-refresh-dir-last-index-during-a-rewinddir-3-call.patch @@ -0,0 +1,105 @@ +From stable+bounces-16089-greg=kroah.com@vger.kernel.org Sat Jan 27 10:16:16 2024 +From: fdmanana@kernel.org +Date: Sat, 27 Jan 2024 18:15:41 +0000 +Subject: btrfs: refresh dir last index during a rewinddir(3) call +To: linux-btrfs@vger.kernel.org +Cc: stable@vger.kernel.org, Filipe Manana , Ian Johnson , David Sterba +Message-ID: <0fbb11500bb68ba52ba169fd20176c4b2a148cc3.1706379057.git.fdmanana@suse.com> + +From: Filipe Manana + +commit e60aa5da14d01fed8411202dbe4adf6c44bd2a57 upstream. + +When opening a directory we find what's the index of its last entry and +then store it in the directory's file handle private data (struct +btrfs_file_private::last_index), so that in the case new directory entries +are added to a directory after an opendir(3) call we don't end up in an +infinite loop (see commit 9b378f6ad48c ("btrfs: fix infinite directory +reads")) when calling readdir(3). + +However once rewinddir(3) is called, POSIX states [1] that any new +directory entries added after the previous opendir(3) call, must be +returned by subsequent calls to readdir(3): + + "The rewinddir() function shall reset the position of the directory + stream to which dirp refers to the beginning of the directory. + It shall also cause the directory stream to refer to the current + state of the corresponding directory, as a call to opendir() would + have done." + +We currently don't refresh the last_index field of the struct +btrfs_file_private associated to the directory, so after a rewinddir(3) +we are not returning any new entries added after the opendir(3) call. + +Fix this by finding the current last index of the directory when llseek +is called against the directory. + +This can be reproduced by the following C program provided by Ian Johnson: + + #include + #include + + int main(void) { + DIR *dir = opendir("test"); + + FILE *file; + file = fopen("test/1", "w"); + fwrite("1", 1, 1, file); + fclose(file); + + file = fopen("test/2", "w"); + fwrite("2", 1, 1, file); + fclose(file); + + rewinddir(dir); + + struct dirent *entry; + while ((entry = readdir(dir))) { + printf("%s\n", entry->d_name); + } + closedir(dir); + return 0; + } + +Reported-by: Ian Johnson +Link: https://lore.kernel.org/linux-btrfs/YR1P0S.NGASEG570GJ8@ianjohnson.dev/ +Fixes: 9b378f6ad48c ("btrfs: fix infinite directory reads") +CC: stable@vger.kernel.org # 6.5+ +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/inode.c | 15 ++++++++++++++- + 1 file changed, 14 insertions(+), 1 deletion(-) + +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -6049,6 +6049,19 @@ static int btrfs_opendir(struct inode *i + return 0; + } + ++static loff_t btrfs_dir_llseek(struct file *file, loff_t offset, int whence) ++{ ++ struct btrfs_file_private *private = file->private_data; ++ int ret; ++ ++ ret = btrfs_get_dir_last_index(BTRFS_I(file_inode(file)), ++ &private->last_index); ++ if (ret) ++ return ret; ++ ++ return generic_file_llseek(file, offset, whence); ++} ++ + struct dir_entry { + u64 ino; + u64 offset; +@@ -11429,7 +11442,7 @@ static const struct inode_operations btr + }; + + static const struct file_operations btrfs_dir_file_operations = { +- .llseek = generic_file_llseek, ++ .llseek = btrfs_dir_llseek, + .read = generic_read_dir, + .iterate_shared = btrfs_real_readdir, + .open = btrfs_opendir, diff --git a/queue-6.1/btrfs-set-last-dir-index-to-the-current-last-index-when-opening-dir.patch b/queue-6.1/btrfs-set-last-dir-index-to-the-current-last-index-when-opening-dir.patch new file mode 100644 index 00000000000..dc724584925 --- /dev/null +++ b/queue-6.1/btrfs-set-last-dir-index-to-the-current-last-index-when-opening-dir.patch @@ -0,0 +1,90 @@ +From stable+bounces-16088-greg=kroah.com@vger.kernel.org Sat Jan 27 10:16:10 2024 +From: fdmanana@kernel.org +Date: Sat, 27 Jan 2024 18:15:40 +0000 +Subject: btrfs: set last dir index to the current last index when opening dir +To: linux-btrfs@vger.kernel.org +Cc: stable@vger.kernel.org, Filipe Manana , David Sterba +Message-ID: <756f4eeb62e16d3eefd3c7d40a5b9b372dca45a2.1706379057.git.fdmanana@suse.com> + +From: Filipe Manana + +commit 357950361cbc6d54fb68ed878265c647384684ae upstream. + +When opening a directory for reading it, we set the last index where we +stop iteration to the value in struct btrfs_inode::index_cnt. That value +does not match the index of the most recently added directory entry but +it's instead the index number that will be assigned the next directory +entry. + +This means that if after the call to opendir(3) new directory entries are +added, a readdir(3) call will return the first new directory entry. This +is fine because POSIX says the following [1]: + + "If a file is removed from or added to the directory after the most + recent call to opendir() or rewinddir(), whether a subsequent call to + readdir() returns an entry for that file is unspecified." + +For example for the test script from commit 9b378f6ad48c ("btrfs: fix +infinite directory reads"), where we have 2000 files in a directory, ext4 +doesn't return any new directory entry after opendir(3), while xfs returns +the first 13 new directory entries added after the opendir(3) call. + +If we move to a shorter example with an empty directory when opendir(3) is +called, and 2 files added to the directory after the opendir(3) call, then +readdir(3) on btrfs will return the first file, ext4 and xfs return the 2 +files (but in a different order). A test program for this, reported by +Ian Johnson, is the following: + + #include + #include + + int main(void) { + DIR *dir = opendir("test"); + + FILE *file; + file = fopen("test/1", "w"); + fwrite("1", 1, 1, file); + fclose(file); + + file = fopen("test/2", "w"); + fwrite("2", 1, 1, file); + fclose(file); + + struct dirent *entry; + while ((entry = readdir(dir))) { + printf("%s\n", entry->d_name); + } + closedir(dir); + return 0; + } + +To make this less odd, change the behaviour to never return new entries +that were added after the opendir(3) call. This is done by setting the +last_index field of the struct btrfs_file_private attached to the +directory's file handle with a value matching btrfs_inode::index_cnt +minus 1, since that value always matches the index of the next new +directory entry and not the index of the most recently added entry. + +[1] https://pubs.opengroup.org/onlinepubs/007904875/functions/readdir_r.html + +Link: https://lore.kernel.org/linux-btrfs/YR1P0S.NGASEG570GJ8@ianjohnson.dev/ +CC: stable@vger.kernel.org # 6.5+ +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/inode.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -6011,7 +6011,8 @@ static int btrfs_get_dir_last_index(stru + } + } + +- *index = dir->index_cnt; ++ /* index_cnt is the index number of next new entry, so decrement it. */ ++ *index = dir->index_cnt - 1; + + return 0; + } diff --git a/queue-6.1/series b/queue-6.1/series index 0f5ad4495e9..c8c5770a08c 100644 --- a/queue-6.1/series +++ b/queue-6.1/series @@ -102,6 +102,10 @@ net-mlx5e-fix-a-double-free-in-arfs_create_groups.patch net-mlx5e-fix-a-potential-double-free-in-fs_any_crea.patch rcu-defer-rcu-kthreads-wakeup-when-cpu-is-dying.patch netfilter-nft_limit-reject-configurations-that-cause.patch +btrfs-fix-infinite-directory-reads.patch +btrfs-set-last-dir-index-to-the-current-last-index-when-opening-dir.patch +btrfs-refresh-dir-last-index-during-a-rewinddir-3-call.patch +btrfs-fix-race-between-reading-a-directory-and-adding-entries-to-it.patch netfilter-nf_tables-restrict-anonymous-set-and-map-n.patch netfilter-nf_tables-validate-nfproto_-family.patch net-stmmac-wait-a-bit-for-the-reset-to-take-effect.patch