--- /dev/null
+From stable+bounces-16087-greg=kroah.com@vger.kernel.org Sat Jan 27 10:16:03 2024
+From: fdmanana@kernel.org
+Date: Sat, 27 Jan 2024 18:15:39 +0000
+Subject: btrfs: fix infinite directory reads
+To: linux-btrfs@vger.kernel.org
+Cc: stable@vger.kernel.org, Filipe Manana <fdmanana@suse.com>, Rob Landley <rob@landley.net>, David Sterba <dsterba@suse.com>
+Message-ID: <a2a6f79eb2696f06904d39503e1d4d8d41d24b03.1706379057.git.fdmanana@suse.com>
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 9b378f6ad48cfa195ed868db9123c09ee7ec5ea2 upstream.
+
+The readdir implementation currently processes always up to the last index
+it finds. This however can result in an infinite loop if the directory has
+a large number of entries such that they won't all fit in the given buffer
+passed to the readdir callback, that is, dir_emit() returns a non-zero
+value. Because in that case readdir() will be called again and if in the
+meanwhile new directory entries were added and we still can't put all the
+remaining entries in the buffer, we keep repeating this over and over.
+
+The following C program and test script reproduce the problem:
+
+ $ cat /mnt/readdir_prog.c
+ #include <sys/types.h>
+ #include <dirent.h>
+ #include <stdio.h>
+
+ int main(int argc, char *argv[])
+ {
+ DIR *dir = opendir(".");
+ struct dirent *dd;
+
+ while ((dd = readdir(dir))) {
+ printf("%s\n", dd->d_name);
+ rename(dd->d_name, "TEMPFILE");
+ rename("TEMPFILE", dd->d_name);
+ }
+ closedir(dir);
+ }
+
+ $ gcc -o /mnt/readdir_prog /mnt/readdir_prog.c
+
+ $ cat test.sh
+ #!/bin/bash
+
+ DEV=/dev/sdi
+ MNT=/mnt/sdi
+
+ mkfs.btrfs -f $DEV &> /dev/null
+ #mkfs.xfs -f $DEV &> /dev/null
+ #mkfs.ext4 -F $DEV &> /dev/null
+
+ mount $DEV $MNT
+
+ mkdir $MNT/testdir
+ for ((i = 1; i <= 2000; i++)); do
+ echo -n > $MNT/testdir/file_$i
+ done
+
+ cd $MNT/testdir
+ /mnt/readdir_prog
+
+ cd /mnt
+
+ umount $MNT
+
+This behaviour is surprising to applications and it's unlike ext4, xfs,
+tmpfs, vfat and other filesystems, which always finish. In this case where
+new entries were added due to renames, some file names may be reported
+more than once, but this varies according to each filesystem - for example
+ext4 never reported the same file more than once while xfs reports the
+first 13 file names twice.
+
+So change our readdir implementation to track the last index number when
+opendir() is called and then make readdir() never process beyond that
+index number. This gives the same behaviour as ext4.
+
+Reported-by: Rob Landley <rob@landley.net>
+Link: https://lore.kernel.org/linux-btrfs/2c8c55ec-04c6-e0dc-9c5c-8c7924778c35@landley.net/
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=217681
+CC: stable@vger.kernel.org # 5.15
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/ctree.h | 1
+ fs/btrfs/delayed-inode.c | 5 +
+ fs/btrfs/delayed-inode.h | 1
+ fs/btrfs/inode.c | 131 ++++++++++++++++++++++++++++-------------------
+ 4 files changed, 84 insertions(+), 54 deletions(-)
+
+--- a/fs/btrfs/ctree.h
++++ b/fs/btrfs/ctree.h
+@@ -1552,6 +1552,7 @@ struct btrfs_drop_extents_args {
+
+ struct btrfs_file_private {
+ void *filldir_buf;
++ u64 last_index;
+ };
+
+
+--- a/fs/btrfs/delayed-inode.c
++++ b/fs/btrfs/delayed-inode.c
+@@ -1665,6 +1665,7 @@ int btrfs_inode_delayed_dir_index_count(
+ }
+
+ bool btrfs_readdir_get_delayed_items(struct inode *inode,
++ u64 last_index,
+ struct list_head *ins_list,
+ struct list_head *del_list)
+ {
+@@ -1684,14 +1685,14 @@ bool btrfs_readdir_get_delayed_items(str
+
+ mutex_lock(&delayed_node->mutex);
+ item = __btrfs_first_delayed_insertion_item(delayed_node);
+- while (item) {
++ while (item && item->index <= last_index) {
+ refcount_inc(&item->refs);
+ list_add_tail(&item->readdir_list, ins_list);
+ item = __btrfs_next_delayed_item(item);
+ }
+
+ item = __btrfs_first_delayed_deletion_item(delayed_node);
+- while (item) {
++ while (item && item->index <= last_index) {
+ refcount_inc(&item->refs);
+ list_add_tail(&item->readdir_list, del_list);
+ item = __btrfs_next_delayed_item(item);
+--- a/fs/btrfs/delayed-inode.h
++++ b/fs/btrfs/delayed-inode.h
+@@ -148,6 +148,7 @@ void btrfs_destroy_delayed_inodes(struct
+
+ /* Used for readdir() */
+ bool btrfs_readdir_get_delayed_items(struct inode *inode,
++ u64 last_index,
+ struct list_head *ins_list,
+ struct list_head *del_list);
+ void btrfs_readdir_put_delayed_items(struct inode *inode,
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -5949,6 +5949,74 @@ static struct dentry *btrfs_lookup(struc
+ }
+
+ /*
++ * Find the highest existing sequence number in a directory and then set the
++ * in-memory index_cnt variable to the first free sequence number.
++ */
++static int btrfs_set_inode_index_count(struct btrfs_inode *inode)
++{
++ struct btrfs_root *root = inode->root;
++ struct btrfs_key key, found_key;
++ struct btrfs_path *path;
++ struct extent_buffer *leaf;
++ int ret;
++
++ key.objectid = btrfs_ino(inode);
++ key.type = BTRFS_DIR_INDEX_KEY;
++ key.offset = (u64)-1;
++
++ path = btrfs_alloc_path();
++ if (!path)
++ return -ENOMEM;
++
++ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
++ if (ret < 0)
++ goto out;
++ /* FIXME: we should be able to handle this */
++ if (ret == 0)
++ goto out;
++ ret = 0;
++
++ if (path->slots[0] == 0) {
++ inode->index_cnt = BTRFS_DIR_START_INDEX;
++ goto out;
++ }
++
++ path->slots[0]--;
++
++ leaf = path->nodes[0];
++ btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
++
++ if (found_key.objectid != btrfs_ino(inode) ||
++ found_key.type != BTRFS_DIR_INDEX_KEY) {
++ inode->index_cnt = BTRFS_DIR_START_INDEX;
++ goto out;
++ }
++
++ inode->index_cnt = found_key.offset + 1;
++out:
++ btrfs_free_path(path);
++ return ret;
++}
++
++static int btrfs_get_dir_last_index(struct btrfs_inode *dir, u64 *index)
++{
++ if (dir->index_cnt == (u64)-1) {
++ int ret;
++
++ ret = btrfs_inode_delayed_dir_index_count(dir);
++ if (ret) {
++ ret = btrfs_set_inode_index_count(dir);
++ if (ret)
++ return ret;
++ }
++ }
++
++ *index = dir->index_cnt;
++
++ return 0;
++}
++
++/*
+ * All this infrastructure exists because dir_emit can fault, and we are holding
+ * the tree lock when doing readdir. For now just allocate a buffer and copy
+ * our information into that, and then dir_emit from the buffer. This is
+@@ -5960,10 +6028,17 @@ static struct dentry *btrfs_lookup(struc
+ static int btrfs_opendir(struct inode *inode, struct file *file)
+ {
+ struct btrfs_file_private *private;
++ u64 last_index;
++ int ret;
++
++ ret = btrfs_get_dir_last_index(BTRFS_I(inode), &last_index);
++ if (ret)
++ return ret;
+
+ private = kzalloc(sizeof(struct btrfs_file_private), GFP_KERNEL);
+ if (!private)
+ return -ENOMEM;
++ private->last_index = last_index;
+ private->filldir_buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!private->filldir_buf) {
+ kfree(private);
+@@ -6030,7 +6105,8 @@ static int btrfs_real_readdir(struct fil
+
+ INIT_LIST_HEAD(&ins_list);
+ INIT_LIST_HEAD(&del_list);
+- put = btrfs_readdir_get_delayed_items(inode, &ins_list, &del_list);
++ put = btrfs_readdir_get_delayed_items(inode, private->last_index,
++ &ins_list, &del_list);
+
+ again:
+ key.type = BTRFS_DIR_INDEX_KEY;
+@@ -6047,6 +6123,8 @@ again:
+ break;
+ if (found_key.offset < ctx->pos)
+ continue;
++ if (found_key.offset > private->last_index)
++ break;
+ if (btrfs_should_delete_dir_index(&del_list, found_key.offset))
+ continue;
+ di = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
+@@ -6183,57 +6261,6 @@ static int btrfs_update_time(struct inod
+ }
+
+ /*
+- * find the highest existing sequence number in a directory
+- * and then set the in-memory index_cnt variable to reflect
+- * free sequence numbers
+- */
+-static int btrfs_set_inode_index_count(struct btrfs_inode *inode)
+-{
+- struct btrfs_root *root = inode->root;
+- struct btrfs_key key, found_key;
+- struct btrfs_path *path;
+- struct extent_buffer *leaf;
+- int ret;
+-
+- key.objectid = btrfs_ino(inode);
+- key.type = BTRFS_DIR_INDEX_KEY;
+- key.offset = (u64)-1;
+-
+- path = btrfs_alloc_path();
+- if (!path)
+- return -ENOMEM;
+-
+- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+- if (ret < 0)
+- goto out;
+- /* FIXME: we should be able to handle this */
+- if (ret == 0)
+- goto out;
+- ret = 0;
+-
+- if (path->slots[0] == 0) {
+- inode->index_cnt = BTRFS_DIR_START_INDEX;
+- goto out;
+- }
+-
+- path->slots[0]--;
+-
+- leaf = path->nodes[0];
+- btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+-
+- if (found_key.objectid != btrfs_ino(inode) ||
+- found_key.type != BTRFS_DIR_INDEX_KEY) {
+- inode->index_cnt = BTRFS_DIR_START_INDEX;
+- goto out;
+- }
+-
+- inode->index_cnt = found_key.offset + 1;
+-out:
+- btrfs_free_path(path);
+- return ret;
+-}
+-
+-/*
+ * helper to find a free sequence number in a given directory. This current
+ * code is very simple, later versions will do smarter things in the btree
+ */
--- /dev/null
+From stable+bounces-16090-greg=kroah.com@vger.kernel.org Sat Jan 27 10:16:21 2024
+From: fdmanana@kernel.org
+Date: Sat, 27 Jan 2024 18:15:42 +0000
+Subject: btrfs: fix race between reading a directory and adding entries to it
+To: linux-btrfs@vger.kernel.org
+Cc: stable@vger.kernel.org, Filipe Manana <fdmanana@suse.com>, ken <ken@bllue.org>, syzbot+d13490c82ad5353c779d@syzkaller.appspotmail.com, David Sterba <dsterba@suse.com>
+Message-ID: <20651ef2729621cac727cec97a1f27a3143def0b.1706379057.git.fdmanana@suse.com>
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 8e7f82deb0c0386a03b62e30082574347f8b57d5 upstream.
+
+When opening a directory (opendir(3)) or rewinding it (rewinddir(3)), we
+are not holding the directory's inode locked, and this can result in later
+attempting to add two entries to the directory with the same index number,
+resulting in a transaction abort, with -EEXIST (-17), when inserting the
+second delayed dir index. This results in a trace like the following:
+
+ Sep 11 22:34:59 myhostname kernel: BTRFS error (device dm-3): err add delayed dir index item(name: cockroach-stderr.log) into the insertion tree of the delayed node(root id: 5, inode id: 4539217, errno: -17)
+ Sep 11 22:34:59 myhostname kernel: ------------[ cut here ]------------
+ Sep 11 22:34:59 myhostname kernel: kernel BUG at fs/btrfs/delayed-inode.c:1504!
+ Sep 11 22:34:59 myhostname kernel: invalid opcode: 0000 [#1] PREEMPT SMP NOPTI
+ Sep 11 22:34:59 myhostname kernel: CPU: 0 PID: 7159 Comm: cockroach Not tainted 6.4.15-200.fc38.x86_64 #1
+ Sep 11 22:34:59 myhostname kernel: Hardware name: ASUS ESC500 G3/P9D WS, BIOS 2402 06/27/2018
+ Sep 11 22:34:59 myhostname kernel: RIP: 0010:btrfs_insert_delayed_dir_index+0x1da/0x260
+ Sep 11 22:34:59 myhostname kernel: Code: eb dd 48 (...)
+ Sep 11 22:34:59 myhostname kernel: RSP: 0000:ffffa9980e0fbb28 EFLAGS: 00010282
+ Sep 11 22:34:59 myhostname kernel: RAX: 0000000000000000 RBX: ffff8b10b8f4a3c0 RCX: 0000000000000000
+ Sep 11 22:34:59 myhostname kernel: RDX: 0000000000000000 RSI: ffff8b177ec21540 RDI: ffff8b177ec21540
+ Sep 11 22:34:59 myhostname kernel: RBP: ffff8b110cf80888 R08: 0000000000000000 R09: ffffa9980e0fb938
+ Sep 11 22:34:59 myhostname kernel: R10: 0000000000000003 R11: ffffffff86146508 R12: 0000000000000014
+ Sep 11 22:34:59 myhostname kernel: R13: ffff8b1131ae5b40 R14: ffff8b10b8f4a418 R15: 00000000ffffffef
+ Sep 11 22:34:59 myhostname kernel: FS: 00007fb14a7fe6c0(0000) GS:ffff8b177ec00000(0000) knlGS:0000000000000000
+ Sep 11 22:34:59 myhostname kernel: CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+ Sep 11 22:34:59 myhostname kernel: CR2: 000000c00143d000 CR3: 00000001b3b4e002 CR4: 00000000001706f0
+ Sep 11 22:34:59 myhostname kernel: Call Trace:
+ Sep 11 22:34:59 myhostname kernel: <TASK>
+ Sep 11 22:34:59 myhostname kernel: ? die+0x36/0x90
+ Sep 11 22:34:59 myhostname kernel: ? do_trap+0xda/0x100
+ Sep 11 22:34:59 myhostname kernel: ? btrfs_insert_delayed_dir_index+0x1da/0x260
+ Sep 11 22:34:59 myhostname kernel: ? do_error_trap+0x6a/0x90
+ Sep 11 22:34:59 myhostname kernel: ? btrfs_insert_delayed_dir_index+0x1da/0x260
+ Sep 11 22:34:59 myhostname kernel: ? exc_invalid_op+0x50/0x70
+ Sep 11 22:34:59 myhostname kernel: ? btrfs_insert_delayed_dir_index+0x1da/0x260
+ Sep 11 22:34:59 myhostname kernel: ? asm_exc_invalid_op+0x1a/0x20
+ Sep 11 22:34:59 myhostname kernel: ? btrfs_insert_delayed_dir_index+0x1da/0x260
+ Sep 11 22:34:59 myhostname kernel: ? btrfs_insert_delayed_dir_index+0x1da/0x260
+ Sep 11 22:34:59 myhostname kernel: btrfs_insert_dir_item+0x200/0x280
+ Sep 11 22:34:59 myhostname kernel: btrfs_add_link+0xab/0x4f0
+ Sep 11 22:34:59 myhostname kernel: ? ktime_get_real_ts64+0x47/0xe0
+ Sep 11 22:34:59 myhostname kernel: btrfs_create_new_inode+0x7cd/0xa80
+ Sep 11 22:34:59 myhostname kernel: btrfs_symlink+0x190/0x4d0
+ Sep 11 22:34:59 myhostname kernel: ? schedule+0x5e/0xd0
+ Sep 11 22:34:59 myhostname kernel: ? __d_lookup+0x7e/0xc0
+ Sep 11 22:34:59 myhostname kernel: vfs_symlink+0x148/0x1e0
+ Sep 11 22:34:59 myhostname kernel: do_symlinkat+0x130/0x140
+ Sep 11 22:34:59 myhostname kernel: __x64_sys_symlinkat+0x3d/0x50
+ Sep 11 22:34:59 myhostname kernel: do_syscall_64+0x5d/0x90
+ Sep 11 22:34:59 myhostname kernel: ? syscall_exit_to_user_mode+0x2b/0x40
+ Sep 11 22:34:59 myhostname kernel: ? do_syscall_64+0x6c/0x90
+ Sep 11 22:34:59 myhostname kernel: entry_SYSCALL_64_after_hwframe+0x72/0xdc
+
+The race leading to the problem happens like this:
+
+1) Directory inode X is loaded into memory, its ->index_cnt field is
+ initialized to (u64)-1 (at btrfs_alloc_inode());
+
+2) Task A is adding a new file to directory X, holding its vfs inode lock,
+ and calls btrfs_set_inode_index() to get an index number for the entry.
+
+ Because the inode's index_cnt field is set to (u64)-1 it calls
+ btrfs_inode_delayed_dir_index_count() which fails because no dir index
+ entries were added yet to the delayed inode and then it calls
+ btrfs_set_inode_index_count(). This functions finds the last dir index
+ key and then sets index_cnt to that index value + 1. It found that the
+ last index key has an offset of 100. However before it assigns a value
+ of 101 to index_cnt...
+
+3) Task B calls opendir(3), ending up at btrfs_opendir(), where the VFS
+ lock for inode X is not taken, so it calls btrfs_get_dir_last_index()
+ and sees index_cnt still with a value of (u64)-1. Because of that it
+ calls btrfs_inode_delayed_dir_index_count() which fails since no dir
+ index entries were added to the delayed inode yet, and then it also
+ calls btrfs_set_inode_index_count(). This also finds that the last
+ index key has an offset of 100, and before it assigns the value 101
+ to the index_cnt field of inode X...
+
+4) Task A assigns a value of 101 to index_cnt. And then the code flow
+ goes to btrfs_set_inode_index() where it increments index_cnt from
+ 101 to 102. Task A then creates a delayed dir index entry with a
+ sequence number of 101 and adds it to the delayed inode;
+
+5) Task B assigns 101 to the index_cnt field of inode X;
+
+6) At some later point when someone tries to add a new entry to the
+ directory, btrfs_set_inode_index() will return 101 again and shortly
+ after an attempt to add another delayed dir index key with index
+ number 101 will fail with -EEXIST resulting in a transaction abort.
+
+Fix this by locking the inode at btrfs_get_dir_last_index(), which is only
+only used when opening a directory or attempting to lseek on it.
+
+Reported-by: ken <ken@bllue.org>
+Link: https://lore.kernel.org/linux-btrfs/CAE6xmH+Lp=Q=E61bU+v9eWX8gYfLvu6jLYxjxjFpo3zHVPR0EQ@mail.gmail.com/
+Reported-by: syzbot+d13490c82ad5353c779d@syzkaller.appspotmail.com
+Link: https://lore.kernel.org/linux-btrfs/00000000000036e1290603e097e0@google.com/
+Fixes: 9b378f6ad48c ("btrfs: fix infinite directory reads")
+CC: stable@vger.kernel.org # 6.5+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/inode.c | 11 +++++++----
+ 1 file changed, 7 insertions(+), 4 deletions(-)
+
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -6000,21 +6000,24 @@ out:
+
+ static int btrfs_get_dir_last_index(struct btrfs_inode *dir, u64 *index)
+ {
+- if (dir->index_cnt == (u64)-1) {
+- int ret;
++ int ret = 0;
+
++ btrfs_inode_lock(&dir->vfs_inode, 0);
++ if (dir->index_cnt == (u64)-1) {
+ ret = btrfs_inode_delayed_dir_index_count(dir);
+ if (ret) {
+ ret = btrfs_set_inode_index_count(dir);
+ if (ret)
+- return ret;
++ goto out;
+ }
+ }
+
+ /* index_cnt is the index number of next new entry, so decrement it. */
+ *index = dir->index_cnt - 1;
++out:
++ btrfs_inode_unlock(&dir->vfs_inode, 0);
+
+- return 0;
++ return ret;
+ }
+
+ /*
--- /dev/null
+From stable+bounces-16089-greg=kroah.com@vger.kernel.org Sat Jan 27 10:16:16 2024
+From: fdmanana@kernel.org
+Date: Sat, 27 Jan 2024 18:15:41 +0000
+Subject: btrfs: refresh dir last index during a rewinddir(3) call
+To: linux-btrfs@vger.kernel.org
+Cc: stable@vger.kernel.org, Filipe Manana <fdmanana@suse.com>, Ian Johnson <ian@ianjohnson.dev>, David Sterba <dsterba@suse.com>
+Message-ID: <0fbb11500bb68ba52ba169fd20176c4b2a148cc3.1706379057.git.fdmanana@suse.com>
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit e60aa5da14d01fed8411202dbe4adf6c44bd2a57 upstream.
+
+When opening a directory we find what's the index of its last entry and
+then store it in the directory's file handle private data (struct
+btrfs_file_private::last_index), so that in the case new directory entries
+are added to a directory after an opendir(3) call we don't end up in an
+infinite loop (see commit 9b378f6ad48c ("btrfs: fix infinite directory
+reads")) when calling readdir(3).
+
+However once rewinddir(3) is called, POSIX states [1] that any new
+directory entries added after the previous opendir(3) call, must be
+returned by subsequent calls to readdir(3):
+
+ "The rewinddir() function shall reset the position of the directory
+ stream to which dirp refers to the beginning of the directory.
+ It shall also cause the directory stream to refer to the current
+ state of the corresponding directory, as a call to opendir() would
+ have done."
+
+We currently don't refresh the last_index field of the struct
+btrfs_file_private associated to the directory, so after a rewinddir(3)
+we are not returning any new entries added after the opendir(3) call.
+
+Fix this by finding the current last index of the directory when llseek
+is called against the directory.
+
+This can be reproduced by the following C program provided by Ian Johnson:
+
+ #include <dirent.h>
+ #include <stdio.h>
+
+ int main(void) {
+ DIR *dir = opendir("test");
+
+ FILE *file;
+ file = fopen("test/1", "w");
+ fwrite("1", 1, 1, file);
+ fclose(file);
+
+ file = fopen("test/2", "w");
+ fwrite("2", 1, 1, file);
+ fclose(file);
+
+ rewinddir(dir);
+
+ struct dirent *entry;
+ while ((entry = readdir(dir))) {
+ printf("%s\n", entry->d_name);
+ }
+ closedir(dir);
+ return 0;
+ }
+
+Reported-by: Ian Johnson <ian@ianjohnson.dev>
+Link: https://lore.kernel.org/linux-btrfs/YR1P0S.NGASEG570GJ8@ianjohnson.dev/
+Fixes: 9b378f6ad48c ("btrfs: fix infinite directory reads")
+CC: stable@vger.kernel.org # 6.5+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/inode.c | 15 ++++++++++++++-
+ 1 file changed, 14 insertions(+), 1 deletion(-)
+
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -6049,6 +6049,19 @@ static int btrfs_opendir(struct inode *i
+ return 0;
+ }
+
++static loff_t btrfs_dir_llseek(struct file *file, loff_t offset, int whence)
++{
++ struct btrfs_file_private *private = file->private_data;
++ int ret;
++
++ ret = btrfs_get_dir_last_index(BTRFS_I(file_inode(file)),
++ &private->last_index);
++ if (ret)
++ return ret;
++
++ return generic_file_llseek(file, offset, whence);
++}
++
+ struct dir_entry {
+ u64 ino;
+ u64 offset;
+@@ -11429,7 +11442,7 @@ static const struct inode_operations btr
+ };
+
+ static const struct file_operations btrfs_dir_file_operations = {
+- .llseek = generic_file_llseek,
++ .llseek = btrfs_dir_llseek,
+ .read = generic_read_dir,
+ .iterate_shared = btrfs_real_readdir,
+ .open = btrfs_opendir,
--- /dev/null
+From stable+bounces-16088-greg=kroah.com@vger.kernel.org Sat Jan 27 10:16:10 2024
+From: fdmanana@kernel.org
+Date: Sat, 27 Jan 2024 18:15:40 +0000
+Subject: btrfs: set last dir index to the current last index when opening dir
+To: linux-btrfs@vger.kernel.org
+Cc: stable@vger.kernel.org, Filipe Manana <fdmanana@suse.com>, David Sterba <dsterba@suse.com>
+Message-ID: <756f4eeb62e16d3eefd3c7d40a5b9b372dca45a2.1706379057.git.fdmanana@suse.com>
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 357950361cbc6d54fb68ed878265c647384684ae upstream.
+
+When opening a directory for reading it, we set the last index where we
+stop iteration to the value in struct btrfs_inode::index_cnt. That value
+does not match the index of the most recently added directory entry but
+it's instead the index number that will be assigned the next directory
+entry.
+
+This means that if after the call to opendir(3) new directory entries are
+added, a readdir(3) call will return the first new directory entry. This
+is fine because POSIX says the following [1]:
+
+ "If a file is removed from or added to the directory after the most
+ recent call to opendir() or rewinddir(), whether a subsequent call to
+ readdir() returns an entry for that file is unspecified."
+
+For example for the test script from commit 9b378f6ad48c ("btrfs: fix
+infinite directory reads"), where we have 2000 files in a directory, ext4
+doesn't return any new directory entry after opendir(3), while xfs returns
+the first 13 new directory entries added after the opendir(3) call.
+
+If we move to a shorter example with an empty directory when opendir(3) is
+called, and 2 files added to the directory after the opendir(3) call, then
+readdir(3) on btrfs will return the first file, ext4 and xfs return the 2
+files (but in a different order). A test program for this, reported by
+Ian Johnson, is the following:
+
+ #include <dirent.h>
+ #include <stdio.h>
+
+ int main(void) {
+ DIR *dir = opendir("test");
+
+ FILE *file;
+ file = fopen("test/1", "w");
+ fwrite("1", 1, 1, file);
+ fclose(file);
+
+ file = fopen("test/2", "w");
+ fwrite("2", 1, 1, file);
+ fclose(file);
+
+ struct dirent *entry;
+ while ((entry = readdir(dir))) {
+ printf("%s\n", entry->d_name);
+ }
+ closedir(dir);
+ return 0;
+ }
+
+To make this less odd, change the behaviour to never return new entries
+that were added after the opendir(3) call. This is done by setting the
+last_index field of the struct btrfs_file_private attached to the
+directory's file handle with a value matching btrfs_inode::index_cnt
+minus 1, since that value always matches the index of the next new
+directory entry and not the index of the most recently added entry.
+
+[1] https://pubs.opengroup.org/onlinepubs/007904875/functions/readdir_r.html
+
+Link: https://lore.kernel.org/linux-btrfs/YR1P0S.NGASEG570GJ8@ianjohnson.dev/
+CC: stable@vger.kernel.org # 6.5+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/inode.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -6011,7 +6011,8 @@ static int btrfs_get_dir_last_index(stru
+ }
+ }
+
+- *index = dir->index_cnt;
++ /* index_cnt is the index number of next new entry, so decrement it. */
++ *index = dir->index_cnt - 1;
+
+ return 0;
+ }
net-mlx5e-fix-a-potential-double-free-in-fs_any_crea.patch
rcu-defer-rcu-kthreads-wakeup-when-cpu-is-dying.patch
netfilter-nft_limit-reject-configurations-that-cause.patch
+btrfs-fix-infinite-directory-reads.patch
+btrfs-set-last-dir-index-to-the-current-last-index-when-opening-dir.patch
+btrfs-refresh-dir-last-index-during-a-rewinddir-3-call.patch
+btrfs-fix-race-between-reading-a-directory-and-adding-entries-to-it.patch
netfilter-nf_tables-restrict-anonymous-set-and-map-n.patch
netfilter-nf_tables-validate-nfproto_-family.patch
net-stmmac-wait-a-bit-for-the-reset-to-take-effect.patch