From: Greg Kroah-Hartman Date: Mon, 12 Aug 2024 14:23:25 +0000 (+0200) Subject: 6.6-stable patches X-Git-Tag: v6.1.105~44 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=5ae7d5e7b4eed7e45772d567c1c70472ce42abfb;p=thirdparty%2Fkernel%2Fstable-queue.git 6.6-stable patches added patches: btrfs-fix-corruption-after-buffer-fault-in-during-direct-io-append-write.patch ipv6-fix-source-address-selection-with-route-leak.patch mm-huge_memory-don-t-force-huge-page-alignment-on-32-bit.patch mm-huge_memory-use-config_64bit-to-relax-huge-page-alignment-on-32-bit-machines.patch netfilter-nf_tables-prefer-nft_chain_validate.patch --- diff --git a/queue-6.6/btrfs-fix-corruption-after-buffer-fault-in-during-direct-io-append-write.patch b/queue-6.6/btrfs-fix-corruption-after-buffer-fault-in-during-direct-io-append-write.patch new file mode 100644 index 00000000000..79c5d162360 --- /dev/null +++ b/queue-6.6/btrfs-fix-corruption-after-buffer-fault-in-during-direct-io-append-write.patch @@ -0,0 +1,252 @@ +From 939b656bc8ab203fdbde26ccac22bcb7f0985be5 Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Fri, 26 Jul 2024 11:12:52 +0100 +Subject: btrfs: fix corruption after buffer fault in during direct IO append write + +From: Filipe Manana + +commit 939b656bc8ab203fdbde26ccac22bcb7f0985be5 upstream. + +During an append (O_APPEND write flag) direct IO write if the input buffer +was not previously faulted in, we can corrupt the file in a way that the +final size is unexpected and it includes an unexpected hole. + +The problem happens like this: + +1) We have an empty file, with size 0, for example; + +2) We do an O_APPEND direct IO with a length of 4096 bytes and the input + buffer is not currently faulted in; + +3) We enter btrfs_direct_write(), lock the inode and call + generic_write_checks(), which calls generic_write_checks_count(), and + that function sets the iocb position to 0 with the following code: + + if (iocb->ki_flags & IOCB_APPEND) + iocb->ki_pos = i_size_read(inode); + +4) We call btrfs_dio_write() and enter into iomap, which will end up + calling btrfs_dio_iomap_begin() and that calls + btrfs_get_blocks_direct_write(), where we update the i_size of the + inode to 4096 bytes; + +5) After btrfs_dio_iomap_begin() returns, iomap will attempt to access + the page of the write input buffer (at iomap_dio_bio_iter(), with a + call to bio_iov_iter_get_pages()) and fail with -EFAULT, which gets + returned to btrfs at btrfs_direct_write() via btrfs_dio_write(); + +6) At btrfs_direct_write() we get the -EFAULT error, unlock the inode, + fault in the write buffer and then goto to the label 'relock'; + +7) We lock again the inode, do all the necessary checks again and call + again generic_write_checks(), which calls generic_write_checks_count() + again, and there we set the iocb's position to 4K, which is the current + i_size of the inode, with the following code pointed above: + + if (iocb->ki_flags & IOCB_APPEND) + iocb->ki_pos = i_size_read(inode); + +8) Then we go again to btrfs_dio_write() and enter iomap and the write + succeeds, but it wrote to the file range [4K, 8K), leaving a hole in + the [0, 4K) range and an i_size of 8K, which goes against the + expectations of having the data written to the range [0, 4K) and get an + i_size of 4K. + +Fix this by not unlocking the inode before faulting in the input buffer, +in case we get -EFAULT or an incomplete write, and not jumping to the +'relock' label after faulting in the buffer - instead jump to a location +immediately before calling iomap, skipping all the write checks and +relocking. This solves this problem and it's fine even in case the input +buffer is memory mapped to the same file range, since only holding the +range locked in the inode's io tree can cause a deadlock, it's safe to +keep the inode lock (VFS lock), as was fixed and described in commit +51bd9563b678 ("btrfs: fix deadlock due to page faults during direct IO +reads and writes"). + +A sample reproducer provided by a reporter is the following: + + $ cat test.c + #ifndef _GNU_SOURCE + #define _GNU_SOURCE + #endif + + #include + #include + #include + #include + #include + + int main(int argc, char *argv[]) + { + if (argc < 2) { + fprintf(stderr, "Usage: %s \n", argv[0]); + return 1; + } + + int fd = open(argv[1], O_WRONLY | O_CREAT | O_TRUNC | O_DIRECT | + O_APPEND, 0644); + if (fd < 0) { + perror("creating test file"); + return 1; + } + + char *buf = mmap(NULL, 4096, PROT_READ, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + ssize_t ret = write(fd, buf, 4096); + if (ret < 0) { + perror("pwritev2"); + return 1; + } + + struct stat stbuf; + ret = fstat(fd, &stbuf); + if (ret < 0) { + perror("stat"); + return 1; + } + + printf("size: %llu\n", (unsigned long long)stbuf.st_size); + return stbuf.st_size == 4096 ? 0 : 1; + } + +A test case for fstests will be sent soon. + +Reported-by: Hanna Czenczek +Link: https://lore.kernel.org/linux-btrfs/0b841d46-12fe-4e64-9abb-871d8d0de271@redhat.com/ +Fixes: 8184620ae212 ("btrfs: fix lost file sync on direct IO write with nowait and dsync iocb") +CC: stable@vger.kernel.org # 6.1+ +Tested-by: Hanna Czenczek +Reviewed-by: Josef Bacik +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/ctree.h | 1 + + fs/btrfs/file.c | 55 ++++++++++++++++++++++++++++++++++++++++++------------- + 2 files changed, 43 insertions(+), 13 deletions(-) + +--- a/fs/btrfs/ctree.h ++++ b/fs/btrfs/ctree.h +@@ -445,6 +445,7 @@ struct btrfs_file_private { + void *filldir_buf; + u64 last_index; + struct extent_state *llseek_cached_state; ++ bool fsync_skip_inode_lock; + }; + + static inline u32 BTRFS_LEAF_DATA_SIZE(const struct btrfs_fs_info *info) +--- a/fs/btrfs/file.c ++++ b/fs/btrfs/file.c +@@ -1535,21 +1535,37 @@ relock: + * So here we disable page faults in the iov_iter and then retry if we + * got -EFAULT, faulting in the pages before the retry. + */ ++again: + from->nofault = true; + dio = btrfs_dio_write(iocb, from, written); + from->nofault = false; + +- /* +- * iomap_dio_complete() will call btrfs_sync_file() if we have a dsync +- * iocb, and that needs to lock the inode. So unlock it before calling +- * iomap_dio_complete() to avoid a deadlock. +- */ +- btrfs_inode_unlock(BTRFS_I(inode), ilock_flags); +- +- if (IS_ERR_OR_NULL(dio)) ++ if (IS_ERR_OR_NULL(dio)) { + err = PTR_ERR_OR_ZERO(dio); +- else ++ } else { ++ struct btrfs_file_private stack_private = { 0 }; ++ struct btrfs_file_private *private; ++ const bool have_private = (file->private_data != NULL); ++ ++ if (!have_private) ++ file->private_data = &stack_private; ++ ++ /* ++ * If we have a synchoronous write, we must make sure the fsync ++ * triggered by the iomap_dio_complete() call below doesn't ++ * deadlock on the inode lock - we are already holding it and we ++ * can't call it after unlocking because we may need to complete ++ * partial writes due to the input buffer (or parts of it) not ++ * being already faulted in. ++ */ ++ private = file->private_data; ++ private->fsync_skip_inode_lock = true; + err = iomap_dio_complete(dio); ++ private->fsync_skip_inode_lock = false; ++ ++ if (!have_private) ++ file->private_data = NULL; ++ } + + /* No increment (+=) because iomap returns a cumulative value. */ + if (err > 0) +@@ -1576,10 +1592,12 @@ relock: + } else { + fault_in_iov_iter_readable(from, left); + prev_left = left; +- goto relock; ++ goto again; + } + } + ++ btrfs_inode_unlock(BTRFS_I(inode), ilock_flags); ++ + /* + * If 'err' is -ENOTBLK or we have not written all data, then it means + * we must fallback to buffered IO. +@@ -1778,6 +1796,7 @@ static inline bool skip_inode_logging(co + */ + int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) + { ++ struct btrfs_file_private *private = file->private_data; + struct dentry *dentry = file_dentry(file); + struct inode *inode = d_inode(dentry); + struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); +@@ -1787,6 +1806,7 @@ int btrfs_sync_file(struct file *file, l + int ret = 0, err; + u64 len; + bool full_sync; ++ const bool skip_ilock = (private ? private->fsync_skip_inode_lock : false); + + trace_btrfs_sync_file(file, datasync); + +@@ -1814,7 +1834,10 @@ int btrfs_sync_file(struct file *file, l + if (ret) + goto out; + +- btrfs_inode_lock(BTRFS_I(inode), BTRFS_ILOCK_MMAP); ++ if (skip_ilock) ++ down_write(&BTRFS_I(inode)->i_mmap_lock); ++ else ++ btrfs_inode_lock(BTRFS_I(inode), BTRFS_ILOCK_MMAP); + + atomic_inc(&root->log_batch); + +@@ -1838,7 +1861,10 @@ int btrfs_sync_file(struct file *file, l + */ + ret = start_ordered_ops(inode, start, end); + if (ret) { +- btrfs_inode_unlock(BTRFS_I(inode), BTRFS_ILOCK_MMAP); ++ if (skip_ilock) ++ up_write(&BTRFS_I(inode)->i_mmap_lock); ++ else ++ btrfs_inode_unlock(BTRFS_I(inode), BTRFS_ILOCK_MMAP); + goto out; + } + +@@ -1941,7 +1967,10 @@ int btrfs_sync_file(struct file *file, l + * file again, but that will end up using the synchronization + * inside btrfs_sync_log to keep things safe. + */ +- btrfs_inode_unlock(BTRFS_I(inode), BTRFS_ILOCK_MMAP); ++ if (skip_ilock) ++ up_write(&BTRFS_I(inode)->i_mmap_lock); ++ else ++ btrfs_inode_unlock(BTRFS_I(inode), BTRFS_ILOCK_MMAP); + + if (ret == BTRFS_NO_LOG_SYNC) { + ret = btrfs_end_transaction(trans); diff --git a/queue-6.6/ipv6-fix-source-address-selection-with-route-leak.patch b/queue-6.6/ipv6-fix-source-address-selection-with-route-leak.patch new file mode 100644 index 00000000000..5d113ba3fa9 --- /dev/null +++ b/queue-6.6/ipv6-fix-source-address-selection-with-route-leak.patch @@ -0,0 +1,85 @@ +From 252442f2ae317d109ef0b4b39ce0608c09563042 Mon Sep 17 00:00:00 2001 +From: Nicolas Dichtel +Date: Wed, 10 Jul 2024 10:14:28 +0200 +Subject: ipv6: fix source address selection with route leak + +From: Nicolas Dichtel + +commit 252442f2ae317d109ef0b4b39ce0608c09563042 upstream. + +By default, an address assigned to the output interface is selected when +the source address is not specified. This is problematic when a route, +configured in a vrf, uses an interface from another vrf (aka route leak). +The original vrf does not own the selected source address. + +Let's add a check against the output interface and call the appropriate +function to select the source address. + +CC: stable@vger.kernel.org +Fixes: 0d240e7811c4 ("net: vrf: Implement get_saddr for IPv6") +Signed-off-by: Nicolas Dichtel +Link: https://patch.msgid.link/20240710081521.3809742-3-nicolas.dichtel@6wind.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + include/net/ip6_route.h | 20 ++++++++++++++------ + net/ipv6/ip6_output.c | 1 + + net/ipv6/route.c | 2 +- + 3 files changed, 16 insertions(+), 7 deletions(-) + +--- a/include/net/ip6_route.h ++++ b/include/net/ip6_route.h +@@ -128,18 +128,26 @@ void rt6_age_exceptions(struct fib6_info + + static inline int ip6_route_get_saddr(struct net *net, struct fib6_info *f6i, + const struct in6_addr *daddr, +- unsigned int prefs, ++ unsigned int prefs, int l3mdev_index, + struct in6_addr *saddr) + { ++ struct net_device *l3mdev; ++ struct net_device *dev; ++ bool same_vrf; + int err = 0; + +- if (f6i && f6i->fib6_prefsrc.plen) { ++ rcu_read_lock(); ++ ++ l3mdev = dev_get_by_index_rcu(net, l3mdev_index); ++ if (!f6i || !f6i->fib6_prefsrc.plen || l3mdev) ++ dev = f6i ? fib6_info_nh_dev(f6i) : NULL; ++ same_vrf = !l3mdev || l3mdev_master_dev_rcu(dev) == l3mdev; ++ if (f6i && f6i->fib6_prefsrc.plen && same_vrf) + *saddr = f6i->fib6_prefsrc.addr; +- } else { +- struct net_device *dev = f6i ? fib6_info_nh_dev(f6i) : NULL; ++ else ++ err = ipv6_dev_get_saddr(net, same_vrf ? dev : l3mdev, daddr, prefs, saddr); + +- err = ipv6_dev_get_saddr(net, dev, daddr, prefs, saddr); +- } ++ rcu_read_unlock(); + + return err; + } +--- a/net/ipv6/ip6_output.c ++++ b/net/ipv6/ip6_output.c +@@ -1122,6 +1122,7 @@ static int ip6_dst_lookup_tail(struct ne + from = rt ? rcu_dereference(rt->from) : NULL; + err = ip6_route_get_saddr(net, from, &fl6->daddr, + sk ? inet6_sk(sk)->srcprefs : 0, ++ fl6->flowi6_l3mdev, + &fl6->saddr); + rcu_read_unlock(); + +--- a/net/ipv6/route.c ++++ b/net/ipv6/route.c +@@ -5678,7 +5678,7 @@ static int rt6_fill_node(struct net *net + goto nla_put_failure; + } else if (dest) { + struct in6_addr saddr_buf; +- if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 && ++ if (ip6_route_get_saddr(net, rt, dest, 0, 0, &saddr_buf) == 0 && + nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf)) + goto nla_put_failure; + } diff --git a/queue-6.6/mm-huge_memory-don-t-force-huge-page-alignment-on-32-bit.patch b/queue-6.6/mm-huge_memory-don-t-force-huge-page-alignment-on-32-bit.patch new file mode 100644 index 00000000000..df594ecafd5 --- /dev/null +++ b/queue-6.6/mm-huge_memory-don-t-force-huge-page-alignment-on-32-bit.patch @@ -0,0 +1,56 @@ +From 4ef9ad19e17676b9ef071309bc62020e2373705d Mon Sep 17 00:00:00 2001 +From: Yang Shi +Date: Thu, 18 Jan 2024 10:05:05 -0800 +Subject: mm: huge_memory: don't force huge page alignment on 32 bit + +From: Yang Shi + +commit 4ef9ad19e17676b9ef071309bc62020e2373705d upstream. + +commit efa7df3e3bb5 ("mm: align larger anonymous mappings on THP +boundaries") caused two issues [1] [2] reported on 32 bit system or compat +userspace. + +It doesn't make too much sense to force huge page alignment on 32 bit +system due to the constrained virtual address space. + +[1] https://lore.kernel.org/linux-mm/d0a136a0-4a31-46bc-adf4-2db109a61672@kernel.org/ +[2] https://lore.kernel.org/linux-mm/CAJuCfpHXLdQy1a2B6xN2d7quTYwg2OoZseYPZTRpU0eHHKD-sQ@mail.gmail.com/ + +Link: https://lkml.kernel.org/r/20240118180505.2914778-1-shy828301@gmail.com +Fixes: efa7df3e3bb5 ("mm: align larger anonymous mappings on THP boundaries") +Signed-off-by: Yang Shi +Reported-by: Jiri Slaby +Reported-by: Suren Baghdasaryan +Tested-by: Jiri Slaby +Tested-by: Suren Baghdasaryan +Reviewed-by: Matthew Wilcox (Oracle) +Cc: Rik van Riel +Cc: Christopher Lameter +Signed-off-by: Andrew Morton +Cc: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + mm/huge_memory.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/mm/huge_memory.c ++++ b/mm/huge_memory.c +@@ -37,6 +37,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -601,6 +602,9 @@ static unsigned long __thp_get_unmapped_ + loff_t off_align = round_up(off, size); + unsigned long len_pad, ret; + ++ if (IS_ENABLED(CONFIG_32BIT) || in_compat_syscall()) ++ return 0; ++ + if (off_end <= off_align || (off_end - off_align) < size) + return 0; + diff --git a/queue-6.6/mm-huge_memory-use-config_64bit-to-relax-huge-page-alignment-on-32-bit-machines.patch b/queue-6.6/mm-huge_memory-use-config_64bit-to-relax-huge-page-alignment-on-32-bit-machines.patch new file mode 100644 index 00000000000..7bb3dcbd6b8 --- /dev/null +++ b/queue-6.6/mm-huge_memory-use-config_64bit-to-relax-huge-page-alignment-on-32-bit-machines.patch @@ -0,0 +1,49 @@ +From d9592025000b3cf26c742f3505da7b83aedc26d5 Mon Sep 17 00:00:00 2001 +From: Yang Shi +Date: Fri, 12 Jul 2024 08:58:55 -0700 +Subject: mm: huge_memory: use !CONFIG_64BIT to relax huge page alignment on 32 bit machines + +From: Yang Shi + +commit d9592025000b3cf26c742f3505da7b83aedc26d5 upstream. + +Yves-Alexis Perez reported commit 4ef9ad19e176 ("mm: huge_memory: don't +force huge page alignment on 32 bit") didn't work for x86_32 [1]. It is +because x86_32 uses CONFIG_X86_32 instead of CONFIG_32BIT. + +!CONFIG_64BIT should cover all 32 bit machines. + +[1] https://lore.kernel.org/linux-mm/CAHbLzkr1LwH3pcTgM+aGQ31ip2bKqiqEQ8=FQB+t2c3dhNKNHA@mail.gmail.com/ + +Link: https://lkml.kernel.org/r/20240712155855.1130330-1-yang@os.amperecomputing.com +Fixes: 4ef9ad19e176 ("mm: huge_memory: don't force huge page alignment on 32 bit") +Signed-off-by: Yang Shi +Reported-by: Yves-Alexis Perez +Tested-by: Yves-Alexis Perez +Acked-by: David Hildenbrand +Cc: Ben Hutchings +Cc: Christoph Lameter +Cc: Jiri Slaby +Cc: Matthew Wilcox (Oracle) +Cc: Rik van Riel +Cc: Salvatore Bonaccorso +Cc: Suren Baghdasaryan +Cc: [6.8+] +Signed-off-by: Andrew Morton +Cc: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + mm/huge_memory.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/mm/huge_memory.c ++++ b/mm/huge_memory.c +@@ -602,7 +602,7 @@ static unsigned long __thp_get_unmapped_ + loff_t off_align = round_up(off, size); + unsigned long len_pad, ret; + +- if (IS_ENABLED(CONFIG_32BIT) || in_compat_syscall()) ++ if (!IS_ENABLED(CONFIG_64BIT) || in_compat_syscall()) + return 0; + + if (off_end <= off_align || (off_end - off_align) < size) diff --git a/queue-6.6/netfilter-nf_tables-prefer-nft_chain_validate.patch b/queue-6.6/netfilter-nf_tables-prefer-nft_chain_validate.patch new file mode 100644 index 00000000000..bd69f59f864 --- /dev/null +++ b/queue-6.6/netfilter-nf_tables-prefer-nft_chain_validate.patch @@ -0,0 +1,228 @@ +From cff3bd012a9512ac5ed858d38e6ed65f6391008c Mon Sep 17 00:00:00 2001 +From: Florian Westphal +Date: Thu, 11 Jul 2024 11:06:39 +0200 +Subject: netfilter: nf_tables: prefer nft_chain_validate + +From: Florian Westphal + +commit cff3bd012a9512ac5ed858d38e6ed65f6391008c upstream. + +nft_chain_validate already performs loop detection because a cycle will +result in a call stack overflow (ctx->level >= NFT_JUMP_STACK_SIZE). + +It also follows maps via ->validate callback in nft_lookup, so there +appears no reason to iterate the maps again. + +nf_tables_check_loops() and all its helper functions can be removed. +This improves ruleset load time significantly, from 23s down to 12s. + +This also fixes a crash bug. Old loop detection code can result in +unbounded recursion: + +BUG: TASK stack guard page was hit at .... +Oops: stack guard page: 0000 [#1] PREEMPT SMP KASAN +CPU: 4 PID: 1539 Comm: nft Not tainted 6.10.0-rc5+ #1 +[..] + +with a suitable ruleset during validation of register stores. + +I can't see any actual reason to attempt to check for this from +nft_validate_register_store(), at this point the transaction is still in +progress, so we don't have a full picture of the rule graph. + +For nf-next it might make sense to either remove it or make this depend +on table->validate_state in case we could catch an error earlier +(for improved error reporting to userspace). + +Fixes: 20a69341f2d0 ("netfilter: nf_tables: add netlink set API") +Signed-off-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Greg Kroah-Hartman +--- + net/netfilter/nf_tables_api.c | 154 +++--------------------------------------- + 1 file changed, 13 insertions(+), 141 deletions(-) + +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -3743,6 +3743,15 @@ static void nf_tables_rule_release(const + nf_tables_rule_destroy(ctx, rule); + } + ++/** nft_chain_validate - loop detection and hook validation ++ * ++ * @ctx: context containing call depth and base chain ++ * @chain: chain to validate ++ * ++ * Walk through the rules of the given chain and chase all jumps/gotos ++ * and set lookups until either the jump limit is hit or all reachable ++ * chains have been validated. ++ */ + int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain) + { + struct nft_expr *expr, *last; +@@ -3764,6 +3773,9 @@ int nft_chain_validate(const struct nft_ + if (!expr->ops->validate) + continue; + ++ /* This may call nft_chain_validate() recursively, ++ * callers that do so must increment ctx->level. ++ */ + err = expr->ops->validate(ctx, expr, &data); + if (err < 0) + return err; +@@ -10621,146 +10633,6 @@ int nft_chain_validate_hooks(const struc + } + EXPORT_SYMBOL_GPL(nft_chain_validate_hooks); + +-/* +- * Loop detection - walk through the ruleset beginning at the destination chain +- * of a new jump until either the source chain is reached (loop) or all +- * reachable chains have been traversed. +- * +- * The loop check is performed whenever a new jump verdict is added to an +- * expression or verdict map or a verdict map is bound to a new chain. +- */ +- +-static int nf_tables_check_loops(const struct nft_ctx *ctx, +- const struct nft_chain *chain); +- +-static int nft_check_loops(const struct nft_ctx *ctx, +- const struct nft_set_ext *ext) +-{ +- const struct nft_data *data; +- int ret; +- +- data = nft_set_ext_data(ext); +- switch (data->verdict.code) { +- case NFT_JUMP: +- case NFT_GOTO: +- ret = nf_tables_check_loops(ctx, data->verdict.chain); +- break; +- default: +- ret = 0; +- break; +- } +- +- return ret; +-} +- +-static int nf_tables_loop_check_setelem(const struct nft_ctx *ctx, +- struct nft_set *set, +- const struct nft_set_iter *iter, +- struct nft_set_elem *elem) +-{ +- const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); +- +- if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) && +- *nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END) +- return 0; +- +- return nft_check_loops(ctx, ext); +-} +- +-static int nft_set_catchall_loops(const struct nft_ctx *ctx, +- struct nft_set *set) +-{ +- u8 genmask = nft_genmask_next(ctx->net); +- struct nft_set_elem_catchall *catchall; +- struct nft_set_ext *ext; +- int ret = 0; +- +- list_for_each_entry_rcu(catchall, &set->catchall_list, list) { +- ext = nft_set_elem_ext(set, catchall->elem); +- if (!nft_set_elem_active(ext, genmask)) +- continue; +- +- ret = nft_check_loops(ctx, ext); +- if (ret < 0) +- return ret; +- } +- +- return ret; +-} +- +-static int nf_tables_check_loops(const struct nft_ctx *ctx, +- const struct nft_chain *chain) +-{ +- const struct nft_rule *rule; +- const struct nft_expr *expr, *last; +- struct nft_set *set; +- struct nft_set_binding *binding; +- struct nft_set_iter iter; +- +- if (ctx->chain == chain) +- return -ELOOP; +- +- if (fatal_signal_pending(current)) +- return -EINTR; +- +- list_for_each_entry(rule, &chain->rules, list) { +- nft_rule_for_each_expr(expr, last, rule) { +- struct nft_immediate_expr *priv; +- const struct nft_data *data; +- int err; +- +- if (strcmp(expr->ops->type->name, "immediate")) +- continue; +- +- priv = nft_expr_priv(expr); +- if (priv->dreg != NFT_REG_VERDICT) +- continue; +- +- data = &priv->data; +- switch (data->verdict.code) { +- case NFT_JUMP: +- case NFT_GOTO: +- err = nf_tables_check_loops(ctx, +- data->verdict.chain); +- if (err < 0) +- return err; +- break; +- default: +- break; +- } +- } +- } +- +- list_for_each_entry(set, &ctx->table->sets, list) { +- if (!nft_is_active_next(ctx->net, set)) +- continue; +- if (!(set->flags & NFT_SET_MAP) || +- set->dtype != NFT_DATA_VERDICT) +- continue; +- +- list_for_each_entry(binding, &set->bindings, list) { +- if (!(binding->flags & NFT_SET_MAP) || +- binding->chain != chain) +- continue; +- +- iter.genmask = nft_genmask_next(ctx->net); +- iter.skip = 0; +- iter.count = 0; +- iter.err = 0; +- iter.fn = nf_tables_loop_check_setelem; +- +- set->ops->walk(ctx, set, &iter); +- if (!iter.err) +- iter.err = nft_set_catchall_loops(ctx, set); +- +- if (iter.err < 0) +- return iter.err; +- } +- } +- +- return 0; +-} +- + /** + * nft_parse_u32_check - fetch u32 attribute and check for maximum value + * +@@ -10873,7 +10745,7 @@ static int nft_validate_register_store(c + if (data != NULL && + (data->verdict.code == NFT_GOTO || + data->verdict.code == NFT_JUMP)) { +- err = nf_tables_check_loops(ctx, data->verdict.chain); ++ err = nft_chain_validate(ctx, data->verdict.chain); + if (err < 0) + return err; + } diff --git a/queue-6.6/series b/queue-6.6/series index d56ec640b69..fc2cb4932c2 100644 --- a/queue-6.6/series +++ b/queue-6.6/series @@ -170,3 +170,8 @@ selftests-mptcp-fix-error-path.patch mptcp-pm-deny-endp-with-signal-subflow-port.patch block-use-the-right-type-for-stub-rq_integrity_vec.patch revert-drm-amd-display-add-null-check-for-afb-before-dereferencing-in-amdgpu_dm_plane_handle_cursor_update.patch +mm-huge_memory-don-t-force-huge-page-alignment-on-32-bit.patch +mm-huge_memory-use-config_64bit-to-relax-huge-page-alignment-on-32-bit-machines.patch +btrfs-fix-corruption-after-buffer-fault-in-during-direct-io-append-write.patch +netfilter-nf_tables-prefer-nft_chain_validate.patch +ipv6-fix-source-address-selection-with-route-leak.patch