From 761a3193dfa33b8fbc37084459a0c877dd35a67f Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Fri, 14 Mar 2025 21:12:07 -0400 Subject: [PATCH] Fixes for 5.10 Signed-off-by: Sasha Levin --- ...-don-t-release-fb_mmio-resource-in-v.patch | 85 ++++ ...iounmap-the-correct-memory-when-remo.patch | 53 +++ ...-fix-memory-leak-in-arfs-after-reset.patch | 68 ++++ ...integer-overflow-in-do_ip_vs_get_ctl.patch | 68 ++++ ...e-errors-in-mlx5_chains_create_table.patch | 46 +++ ...t-bridge-link-show-failure-for-non-e.patch | 53 +++ ...t-creation-of-classes-with-tc_h_root.patch | 50 +++ ...-conntrack-convert-to-refcount_t-api.patch | 364 ++++++++++++++++++ ...ncount-fully-initialize-struct-nf_co.patch | 129 +++++++ ...-fix-use-after-free-when-attaching-z.patch | 47 +++ ...-use-__refcount_inc-for-per-cpu-nft_.patch | 63 +++ ...thdr-fix-offset-with-ipv4_find_optio.patch | 78 ++++ ...-rcu-read-lock-in-__netpoll_send_skb.patch | 76 ++++ ...-fix-incorrect-regmap-max_registers-.patch | 40 ++ queue-5.10/series | 14 + 15 files changed, 1234 insertions(+) create mode 100644 queue-5.10/drivers-hv-vmbus-don-t-release-fb_mmio-resource-in-v.patch create mode 100644 queue-5.10/fbdev-hyperv_fb-iounmap-the-correct-memory-when-remo.patch create mode 100644 queue-5.10/ice-fix-memory-leak-in-arfs-after-reset.patch create mode 100644 queue-5.10/ipvs-prevent-integer-overflow-in-do_ip_vs_get_ctl.patch create mode 100644 queue-5.10/net-mlx5-handle-errors-in-mlx5_chains_create_table.patch create mode 100644 queue-5.10/net-mlx5e-prevent-bridge-link-show-failure-for-non-e.patch create mode 100644 queue-5.10/net_sched-prevent-creation-of-classes-with-tc_h_root.patch create mode 100644 queue-5.10/netfilter-conntrack-convert-to-refcount_t-api.patch create mode 100644 queue-5.10/netfilter-nf_conncount-fully-initialize-struct-nf_co.patch create mode 100644 queue-5.10/netfilter-nft_ct-fix-use-after-free-when-attaching-z.patch create mode 100644 queue-5.10/netfilter-nft_ct-use-__refcount_inc-for-per-cpu-nft_.patch create mode 100644 queue-5.10/netfilter-nft_exthdr-fix-offset-with-ipv4_find_optio.patch create mode 100644 queue-5.10/netpoll-hold-rcu-read-lock-in-__netpoll_send_skb.patch create mode 100644 queue-5.10/pinctrl-bcm281xx-fix-incorrect-regmap-max_registers-.patch diff --git a/queue-5.10/drivers-hv-vmbus-don-t-release-fb_mmio-resource-in-v.patch b/queue-5.10/drivers-hv-vmbus-don-t-release-fb_mmio-resource-in-v.patch new file mode 100644 index 0000000000..dc8f8e4167 --- /dev/null +++ b/queue-5.10/drivers-hv-vmbus-don-t-release-fb_mmio-resource-in-v.patch @@ -0,0 +1,85 @@ +From fc5111849b4e08b0535239e7f35e2bd6887daba8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 9 Mar 2025 20:52:08 -0700 +Subject: Drivers: hv: vmbus: Don't release fb_mmio resource in + vmbus_free_mmio() + +From: Michael Kelley + +[ Upstream commit 73fe9073c0cc28056cb9de0c8a516dac070f1d1f ] + +The VMBus driver manages the MMIO space it owns via the hyperv_mmio +resource tree. Because the synthetic video framebuffer portion of the +MMIO space is initially setup by the Hyper-V host for each guest, the +VMBus driver does an early reserve of that portion of MMIO space in the +hyperv_mmio resource tree. It saves a pointer to that resource in +fb_mmio. When a VMBus driver requests MMIO space and passes "true" +for the "fb_overlap_ok" argument, the reserved framebuffer space is +used if possible. In that case it's not necessary to do another request +against the "shadow" hyperv_mmio resource tree because that resource +was already requested in the early reserve steps. + +However, the vmbus_free_mmio() function currently does no special +handling for the fb_mmio resource. When a framebuffer device is +removed, or the driver is unbound, the current code for +vmbus_free_mmio() releases the reserved resource, leaving fb_mmio +pointing to memory that has been freed. If the same or another +driver is subsequently bound to the device, vmbus_allocate_mmio() +checks against fb_mmio, and potentially gets garbage. Furthermore +a second unbind operation produces this "nonexistent resource" error +because of the unbalanced behavior between vmbus_allocate_mmio() and +vmbus_free_mmio(): + +[ 55.499643] resource: Trying to free nonexistent + resource <0x00000000f0000000-0x00000000f07fffff> + +Fix this by adding logic to vmbus_free_mmio() to recognize when +MMIO space in the fb_mmio reserved area would be released, and don't +release it. This filtering ensures the fb_mmio resource always exists, +and makes vmbus_free_mmio() more parallel with vmbus_allocate_mmio(). + +Fixes: be000f93e5d7 ("drivers:hv: Track allocations of children of hv_vmbus in private resource tree") +Signed-off-by: Michael Kelley +Tested-by: Saurabh Sengar +Reviewed-by: Saurabh Sengar +Link: https://lore.kernel.org/r/20250310035208.275764-1-mhklinux@outlook.com +Signed-off-by: Wei Liu +Message-ID: <20250310035208.275764-1-mhklinux@outlook.com> +Signed-off-by: Sasha Levin +--- + drivers/hv/vmbus_drv.c | 13 +++++++++++++ + 1 file changed, 13 insertions(+) + +diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c +index 39339b152b8ba..e8bea7c791691 100644 +--- a/drivers/hv/vmbus_drv.c ++++ b/drivers/hv/vmbus_drv.c +@@ -2331,12 +2331,25 @@ void vmbus_free_mmio(resource_size_t start, resource_size_t size) + struct resource *iter; + + mutex_lock(&hyperv_mmio_lock); ++ ++ /* ++ * If all bytes of the MMIO range to be released are within the ++ * special case fb_mmio shadow region, skip releasing the shadow ++ * region since no corresponding __request_region() was done ++ * in vmbus_allocate_mmio(). ++ */ ++ if (fb_mmio && start >= fb_mmio->start && ++ (start + size - 1 <= fb_mmio->end)) ++ goto skip_shadow_release; ++ + for (iter = hyperv_mmio; iter; iter = iter->sibling) { + if ((iter->start >= start + size) || (iter->end <= start)) + continue; + + __release_region(iter, start, size); + } ++ ++skip_shadow_release: + release_mem_region(start, size); + mutex_unlock(&hyperv_mmio_lock); + +-- +2.39.5 + diff --git a/queue-5.10/fbdev-hyperv_fb-iounmap-the-correct-memory-when-remo.patch b/queue-5.10/fbdev-hyperv_fb-iounmap-the-correct-memory-when-remo.patch new file mode 100644 index 0000000000..e88b8bbfc3 --- /dev/null +++ b/queue-5.10/fbdev-hyperv_fb-iounmap-the-correct-memory-when-remo.patch @@ -0,0 +1,53 @@ +From 4f5aa5c82480efb4a7e6f813b2ba41b59862c018 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 9 Feb 2025 15:52:52 -0800 +Subject: fbdev: hyperv_fb: iounmap() the correct memory when removing a device + +From: Michael Kelley + +[ Upstream commit 7241c886a71797cc51efc6fadec7076fcf6435c2 ] + +When a Hyper-V framebuffer device is removed, or the driver is unbound +from a device, any allocated and/or mapped memory must be released. In +particular, MMIO address space that was mapped to the framebuffer must +be unmapped. Current code unmaps the wrong address, resulting in an +error like: + +[ 4093.980597] iounmap: bad address 00000000c936c05c + +followed by a stack dump. + +Commit d21987d709e8 ("video: hyperv: hyperv_fb: Support deferred IO for +Hyper-V frame buffer driver") changed the kind of address stored in +info->screen_base, and the iounmap() call in hvfb_putmem() was not +updated accordingly. + +Fix this by updating hvfb_putmem() to unmap the correct address. + +Fixes: d21987d709e8 ("video: hyperv: hyperv_fb: Support deferred IO for Hyper-V frame buffer driver") +Signed-off-by: Michael Kelley +Reviewed-by: Saurabh Sengar +Link: https://lore.kernel.org/r/20250209235252.2987-1-mhklinux@outlook.com +Signed-off-by: Wei Liu +Message-ID: <20250209235252.2987-1-mhklinux@outlook.com> +Signed-off-by: Sasha Levin +--- + drivers/video/fbdev/hyperv_fb.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/video/fbdev/hyperv_fb.c b/drivers/video/fbdev/hyperv_fb.c +index f0a66a344d870..d523cd4de9d47 100644 +--- a/drivers/video/fbdev/hyperv_fb.c ++++ b/drivers/video/fbdev/hyperv_fb.c +@@ -1129,7 +1129,7 @@ static void hvfb_putmem(struct hv_device *hdev, struct fb_info *info) + + if (par->need_docopy) { + vfree(par->dio_vp); +- iounmap(info->screen_base); ++ iounmap(par->mmio_vp); + vmbus_free_mmio(par->mem->start, screen_fb_size); + } else { + hvfb_release_phymem(hdev, info->fix.smem_start, +-- +2.39.5 + diff --git a/queue-5.10/ice-fix-memory-leak-in-arfs-after-reset.patch b/queue-5.10/ice-fix-memory-leak-in-arfs-after-reset.patch new file mode 100644 index 0000000000..ae4844d135 --- /dev/null +++ b/queue-5.10/ice-fix-memory-leak-in-arfs-after-reset.patch @@ -0,0 +1,68 @@ +From 771a6b182a974a399bb7d8a4d03fa07c16a568d9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 23 Jan 2025 09:15:39 +0100 +Subject: ice: fix memory leak in aRFS after reset + +From: Grzegorz Nitka + +[ Upstream commit 23d97f18901ef5e4e264e3b1777fe65c760186b5 ] + +Fix aRFS (accelerated Receive Flow Steering) structures memory leak by +adding a checker to verify if aRFS memory is already allocated while +configuring VSI. aRFS objects are allocated in two cases: +- as part of VSI initialization (at probe), and +- as part of reset handling + +However, VSI reconfiguration executed during reset involves memory +allocation one more time, without prior releasing already allocated +resources. This led to the memory leak with the following signature: + +[root@os-delivery ~]# cat /sys/kernel/debug/kmemleak +unreferenced object 0xff3c1ca7252e6000 (size 8192): + comm "kworker/0:0", pid 8, jiffies 4296833052 + hex dump (first 32 bytes): + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ + backtrace (crc 0): + [] __kmalloc_cache_noprof+0x275/0x340 + [] ice_init_arfs+0x3a/0xe0 [ice] + [] ice_vsi_cfg_def+0x607/0x850 [ice] + [] ice_vsi_setup+0x5b/0x130 [ice] + [] ice_init+0x1c1/0x460 [ice] + [] ice_probe+0x2af/0x520 [ice] + [] local_pci_probe+0x43/0xa0 + [] work_for_cpu_fn+0x13/0x20 + [] process_one_work+0x179/0x390 + [] worker_thread+0x239/0x340 + [] kthread+0xcc/0x100 + [] ret_from_fork+0x2d/0x50 + [] ret_from_fork_asm+0x1a/0x30 + ... + +Fixes: 28bf26724fdb ("ice: Implement aRFS") +Reviewed-by: Michal Swiatkowski +Signed-off-by: Grzegorz Nitka +Reviewed-by: Simon Horman +Tested-by: Rinitha S (A Contingent worker at Intel) +Signed-off-by: Tony Nguyen +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/ice/ice_arfs.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/intel/ice/ice_arfs.c b/drivers/net/ethernet/intel/ice/ice_arfs.c +index 6560acd76c949..632f16ffee401 100644 +--- a/drivers/net/ethernet/intel/ice/ice_arfs.c ++++ b/drivers/net/ethernet/intel/ice/ice_arfs.c +@@ -510,7 +510,7 @@ void ice_init_arfs(struct ice_vsi *vsi) + struct hlist_head *arfs_fltr_list; + unsigned int i; + +- if (!vsi || vsi->type != ICE_VSI_PF) ++ if (!vsi || vsi->type != ICE_VSI_PF || ice_is_arfs_active(vsi)) + return; + + arfs_fltr_list = kzalloc(sizeof(*arfs_fltr_list) * ICE_MAX_ARFS_LIST, +-- +2.39.5 + diff --git a/queue-5.10/ipvs-prevent-integer-overflow-in-do_ip_vs_get_ctl.patch b/queue-5.10/ipvs-prevent-integer-overflow-in-do_ip_vs_get_ctl.patch new file mode 100644 index 0000000000..e99ae8f5bb --- /dev/null +++ b/queue-5.10/ipvs-prevent-integer-overflow-in-do_ip_vs_get_ctl.patch @@ -0,0 +1,68 @@ +From 8b191a94e375be6f48e81679aa0a8140941cd5c4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 10 Mar 2025 10:45:53 +0300 +Subject: ipvs: prevent integer overflow in do_ip_vs_get_ctl() + +From: Dan Carpenter + +[ Upstream commit 80b78c39eb86e6b55f56363b709eb817527da5aa ] + +The get->num_services variable is an unsigned int which is controlled by +the user. The struct_size() function ensures that the size calculation +does not overflow an unsigned long, however, we are saving the result to +an int so the calculation can overflow. + +Both "len" and "get->num_services" come from the user. This check is +just a sanity check to help the user and ensure they are using the API +correctly. An integer overflow here is not a big deal. This has no +security impact. + +Save the result from struct_size() type size_t to fix this integer +overflow bug. + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Signed-off-by: Dan Carpenter +Acked-by: Julian Anastasov +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + net/netfilter/ipvs/ip_vs_ctl.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c +index d0b64c36471d5..fb9f1badeddbf 100644 +--- a/net/netfilter/ipvs/ip_vs_ctl.c ++++ b/net/netfilter/ipvs/ip_vs_ctl.c +@@ -2852,12 +2852,12 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) + case IP_VS_SO_GET_SERVICES: + { + struct ip_vs_get_services *get; +- int size; ++ size_t size; + + get = (struct ip_vs_get_services *)arg; + size = struct_size(get, entrytable, get->num_services); + if (*len != size) { +- pr_err("length: %u != %u\n", *len, size); ++ pr_err("length: %u != %zu\n", *len, size); + ret = -EINVAL; + goto out; + } +@@ -2893,12 +2893,12 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) + case IP_VS_SO_GET_DESTS: + { + struct ip_vs_get_dests *get; +- int size; ++ size_t size; + + get = (struct ip_vs_get_dests *)arg; + size = struct_size(get, entrytable, get->num_dests); + if (*len != size) { +- pr_err("length: %u != %u\n", *len, size); ++ pr_err("length: %u != %zu\n", *len, size); + ret = -EINVAL; + goto out; + } +-- +2.39.5 + diff --git a/queue-5.10/net-mlx5-handle-errors-in-mlx5_chains_create_table.patch b/queue-5.10/net-mlx5-handle-errors-in-mlx5_chains_create_table.patch new file mode 100644 index 0000000000..bd284b08e9 --- /dev/null +++ b/queue-5.10/net-mlx5-handle-errors-in-mlx5_chains_create_table.patch @@ -0,0 +1,46 @@ +From 3601814f22fa7fc94d557fce840daf433d82fc06 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 7 Mar 2025 10:18:20 +0800 +Subject: net/mlx5: handle errors in mlx5_chains_create_table() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Wentao Liang + +[ Upstream commit eab0396353be1c778eba1c0b5180176f04dd21ce ] + +In mlx5_chains_create_table(), the return value of mlx5_get_fdb_sub_ns() +and mlx5_get_flow_namespace() must be checked to prevent NULL pointer +dereferences. If either function fails, the function should log error +message with mlx5_core_warn() and return error pointer. + +Fixes: 39ac237ce009 ("net/mlx5: E-Switch, Refactor chains and priorities") +Signed-off-by: Wentao Liang +Reviewed-by: Tariq Toukan +Link: https://patch.msgid.link/20250307021820.2646-1-vulab@iscas.ac.cn +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c +index 77c6287c90d55..0e255ff95a50d 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c +@@ -267,6 +267,11 @@ mlx5_chains_create_table(struct mlx5_fs_chains *chains, + ns = mlx5_get_flow_namespace(chains->dev, chains->ns); + } + ++ if (!ns) { ++ mlx5_core_warn(chains->dev, "Failed to get flow namespace\n"); ++ return ERR_PTR(-EOPNOTSUPP); ++ } ++ + ft_attr.autogroup.num_reserved_entries = 2; + ft_attr.autogroup.max_num_groups = chains->group_num; + ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); +-- +2.39.5 + diff --git a/queue-5.10/net-mlx5e-prevent-bridge-link-show-failure-for-non-e.patch b/queue-5.10/net-mlx5e-prevent-bridge-link-show-failure-for-non-e.patch new file mode 100644 index 0000000000..4900455cb4 --- /dev/null +++ b/queue-5.10/net-mlx5e-prevent-bridge-link-show-failure-for-non-e.patch @@ -0,0 +1,53 @@ +From ea58be2c8b80bc09db10226c312cf213c2663d4e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 11 Mar 2025 00:01:44 +0200 +Subject: net/mlx5e: Prevent bridge link show failure for non-eswitch-allowed + devices + +From: Carolina Jubran + +[ Upstream commit e92df790d07a8eea873efcb84776e7b71f81c7d5 ] + +mlx5_eswitch_get_vepa returns -EPERM if the device lacks +eswitch_manager capability, blocking mlx5e_bridge_getlink from +retrieving VEPA mode. Since mlx5e_bridge_getlink implements +ndo_bridge_getlink, returning -EPERM causes bridge link show to fail +instead of skipping devices without this capability. + +To avoid this, return -EOPNOTSUPP from mlx5e_bridge_getlink when +mlx5_eswitch_get_vepa fails, ensuring the command continues processing +other devices while ignoring those without the necessary capability. + +Fixes: 4b89251de024 ("net/mlx5: Support ndo bridge_setlink and getlink") +Signed-off-by: Carolina Jubran +Reviewed-by: Jianbo Liu +Signed-off-by: Tariq Toukan +Reviewed-by: Michal Swiatkowski +Link: https://patch.msgid.link/1741644104-97767-7-git-send-email-tariqt@nvidia.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +index aeb8bb3c549a1..c3ff1fc577a7c 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +@@ -4604,11 +4604,9 @@ static int mlx5e_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + u8 mode, setting; +- int err; + +- err = mlx5_eswitch_get_vepa(mdev->priv.eswitch, &setting); +- if (err) +- return err; ++ if (mlx5_eswitch_get_vepa(mdev->priv.eswitch, &setting)) ++ return -EOPNOTSUPP; + mode = setting ? BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB; + return ndo_dflt_bridge_getlink(skb, pid, seq, dev, + mode, +-- +2.39.5 + diff --git a/queue-5.10/net_sched-prevent-creation-of-classes-with-tc_h_root.patch b/queue-5.10/net_sched-prevent-creation-of-classes-with-tc_h_root.patch new file mode 100644 index 0000000000..7ae608ccbe --- /dev/null +++ b/queue-5.10/net_sched-prevent-creation-of-classes-with-tc_h_root.patch @@ -0,0 +1,50 @@ +From 24fb97782a2b94756f2b05f16cd1b19241f0dd28 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 6 Mar 2025 15:23:54 -0800 +Subject: net_sched: Prevent creation of classes with TC_H_ROOT + +From: Cong Wang + +[ Upstream commit 0c3057a5a04d07120b3d0ec9c79568fceb9c921e ] + +The function qdisc_tree_reduce_backlog() uses TC_H_ROOT as a termination +condition when traversing up the qdisc tree to update parent backlog +counters. However, if a class is created with classid TC_H_ROOT, the +traversal terminates prematurely at this class instead of reaching the +actual root qdisc, causing parent statistics to be incorrectly maintained. +In case of DRR, this could lead to a crash as reported by Mingi Cho. + +Prevent the creation of any Qdisc class with classid TC_H_ROOT +(0xFFFFFFFF) across all qdisc types, as suggested by Jamal. + +Reported-by: Mingi Cho +Signed-off-by: Cong Wang +Reviewed-by: Simon Horman +Fixes: 066a3b5b2346 ("[NET_SCHED] sch_api: fix qdisc_tree_decrease_qlen() loop") +Link: https://patch.msgid.link/20250306232355.93864-2-xiyou.wangcong@gmail.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/sched/sch_api.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c +index 238ae7b0ca5ba..b8fb94bfa9606 100644 +--- a/net/sched/sch_api.c ++++ b/net/sched/sch_api.c +@@ -2169,6 +2169,12 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, + return -EOPNOTSUPP; + } + ++ /* Prevent creation of traffic classes with classid TC_H_ROOT */ ++ if (clid == TC_H_ROOT) { ++ NL_SET_ERR_MSG(extack, "Cannot create traffic class with classid TC_H_ROOT"); ++ return -EINVAL; ++ } ++ + new_cl = cl; + err = -EOPNOTSUPP; + if (cops->change) +-- +2.39.5 + diff --git a/queue-5.10/netfilter-conntrack-convert-to-refcount_t-api.patch b/queue-5.10/netfilter-conntrack-convert-to-refcount_t-api.patch new file mode 100644 index 0000000000..39d0d5a941 --- /dev/null +++ b/queue-5.10/netfilter-conntrack-convert-to-refcount_t-api.patch @@ -0,0 +1,364 @@ +From a3a9f0b5316ac0c22373797fd37cd22d7f0c2dae Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 7 Jan 2022 05:03:22 +0100 +Subject: netfilter: conntrack: convert to refcount_t api + +From: Florian Westphal + +[ Upstream commit 719774377622bc4025d2a74f551b5dc2158c6c30 ] + +Convert nf_conn reference counting from atomic_t to refcount_t based api. +refcount_t api provides more runtime sanity checks and will warn on +certain constructs, e.g. refcount_inc() on a zero reference count, which +usually indicates use-after-free. + +For this reason template allocation is changed to init the refcount to +1, the subsequenct add operations are removed. + +Likewise, init_conntrack() is changed to set the initial refcount to 1 +instead refcount_inc(). + +This is safe because the new entry is not (yet) visible to other cpus. + +Signed-off-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Stable-dep-of: 5cfe5612ca95 ("netfilter: nft_ct: Use __refcount_inc() for per-CPU nft_ct_pcpu_template.") +Signed-off-by: Sasha Levin +--- + include/linux/netfilter/nf_conntrack_common.h | 8 +++--- + net/netfilter/nf_conntrack_core.c | 26 +++++++++---------- + net/netfilter/nf_conntrack_expect.c | 4 +-- + net/netfilter/nf_conntrack_netlink.c | 6 ++--- + net/netfilter/nf_conntrack_standalone.c | 4 +-- + net/netfilter/nf_flow_table_core.c | 2 +- + net/netfilter/nf_synproxy_core.c | 1 - + net/netfilter/nft_ct.c | 4 +-- + net/netfilter/xt_CT.c | 3 +-- + net/openvswitch/conntrack.c | 1 - + net/sched/act_ct.c | 1 - + 11 files changed, 27 insertions(+), 33 deletions(-) + +diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h +index 0c7d8d1e945dd..ce8a4eb69b5c1 100644 +--- a/include/linux/netfilter/nf_conntrack_common.h ++++ b/include/linux/netfilter/nf_conntrack_common.h +@@ -2,7 +2,7 @@ + #ifndef _NF_CONNTRACK_COMMON_H + #define _NF_CONNTRACK_COMMON_H + +-#include ++#include + #include + + struct ip_conntrack_stat { +@@ -24,19 +24,19 @@ struct ip_conntrack_stat { + #define NFCT_PTRMASK ~(NFCT_INFOMASK) + + struct nf_conntrack { +- atomic_t use; ++ refcount_t use; + }; + + void nf_conntrack_destroy(struct nf_conntrack *nfct); + static inline void nf_conntrack_put(struct nf_conntrack *nfct) + { +- if (nfct && atomic_dec_and_test(&nfct->use)) ++ if (nfct && refcount_dec_and_test(&nfct->use)) + nf_conntrack_destroy(nfct); + } + static inline void nf_conntrack_get(struct nf_conntrack *nfct) + { + if (nfct) +- atomic_inc(&nfct->use); ++ refcount_inc(&nfct->use); + } + + #endif /* _NF_CONNTRACK_COMMON_H */ +diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c +index 99d5d8cd3895f..b8032cc378b8e 100644 +--- a/net/netfilter/nf_conntrack_core.c ++++ b/net/netfilter/nf_conntrack_core.c +@@ -564,7 +564,7 @@ struct nf_conn *nf_ct_tmpl_alloc(struct net *net, + tmpl->status = IPS_TEMPLATE; + write_pnet(&tmpl->ct_net, net); + nf_ct_zone_add(tmpl, zone); +- atomic_set(&tmpl->ct_general.use, 0); ++ refcount_set(&tmpl->ct_general.use, 1); + + return tmpl; + } +@@ -597,7 +597,7 @@ destroy_conntrack(struct nf_conntrack *nfct) + struct nf_conn *ct = (struct nf_conn *)nfct; + + pr_debug("destroy_conntrack(%p)\n", ct); +- WARN_ON(atomic_read(&nfct->use) != 0); ++ WARN_ON(refcount_read(&nfct->use) != 0); + + if (unlikely(nf_ct_is_template(ct))) { + nf_ct_tmpl_free(ct); +@@ -716,7 +716,7 @@ nf_ct_match(const struct nf_conn *ct1, const struct nf_conn *ct2) + /* caller must hold rcu readlock and none of the nf_conntrack_locks */ + static void nf_ct_gc_expired(struct nf_conn *ct) + { +- if (!atomic_inc_not_zero(&ct->ct_general.use)) ++ if (!refcount_inc_not_zero(&ct->ct_general.use)) + return; + + if (nf_ct_should_gc(ct)) +@@ -784,7 +784,7 @@ __nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone, + * in, try to obtain a reference and re-check tuple + */ + ct = nf_ct_tuplehash_to_ctrack(h); +- if (likely(atomic_inc_not_zero(&ct->ct_general.use))) { ++ if (likely(refcount_inc_not_zero(&ct->ct_general.use))) { + if (likely(nf_ct_key_equal(h, tuple, zone, net))) + goto found; + +@@ -853,7 +853,7 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct) + + smp_wmb(); + /* The caller holds a reference to this object */ +- atomic_set(&ct->ct_general.use, 2); ++ refcount_set(&ct->ct_general.use, 2); + __nf_conntrack_hash_insert(ct, hash, reply_hash); + nf_conntrack_double_unlock(hash, reply_hash); + NF_CT_STAT_INC(net, insert); +@@ -902,7 +902,7 @@ static void __nf_conntrack_insert_prepare(struct nf_conn *ct) + { + struct nf_conn_tstamp *tstamp; + +- atomic_inc(&ct->ct_general.use); ++ refcount_inc(&ct->ct_general.use); + ct->status |= IPS_CONFIRMED; + + /* set conntrack timestamp, if enabled. */ +@@ -1281,7 +1281,7 @@ static unsigned int early_drop_list(struct net *net, + nf_ct_is_dying(tmp)) + continue; + +- if (!atomic_inc_not_zero(&tmp->ct_general.use)) ++ if (!refcount_inc_not_zero(&tmp->ct_general.use)) + continue; + + /* kill only if still in same netns -- might have moved due to +@@ -1397,7 +1397,7 @@ static void gc_worker(struct work_struct *work) + continue; + + /* need to take reference to avoid possible races */ +- if (!atomic_inc_not_zero(&tmp->ct_general.use)) ++ if (!refcount_inc_not_zero(&tmp->ct_general.use)) + continue; + + if (gc_worker_skip_ct(tmp)) { +@@ -1498,7 +1498,7 @@ __nf_conntrack_alloc(struct net *net, + /* Because we use RCU lookups, we set ct_general.use to zero before + * this is inserted in any list. + */ +- atomic_set(&ct->ct_general.use, 0); ++ refcount_set(&ct->ct_general.use, 0); + return ct; + out: + atomic_dec(&net->ct.count); +@@ -1522,7 +1522,7 @@ void nf_conntrack_free(struct nf_conn *ct) + /* A freed object has refcnt == 0, that's + * the golden rule for SLAB_TYPESAFE_BY_RCU + */ +- WARN_ON(atomic_read(&ct->ct_general.use) != 0); ++ WARN_ON(refcount_read(&ct->ct_general.use) != 0); + + nf_ct_ext_destroy(ct); + kmem_cache_free(nf_conntrack_cachep, ct); +@@ -1610,8 +1610,8 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, + if (!exp) + __nf_ct_try_assign_helper(ct, tmpl, GFP_ATOMIC); + +- /* Now it is inserted into the unconfirmed list, bump refcount */ +- nf_conntrack_get(&ct->ct_general); ++ /* Now it is inserted into the unconfirmed list, set refcount to 1. */ ++ refcount_set(&ct->ct_general.use, 1); + nf_ct_add_to_unconfirmed_list(ct); + + local_bh_enable(); +@@ -2214,7 +2214,7 @@ get_next_corpse(int (*iter)(struct nf_conn *i, void *data), + + return NULL; + found: +- atomic_inc(&ct->ct_general.use); ++ refcount_inc(&ct->ct_general.use); + spin_unlock(lockp); + local_bh_enable(); + return ct; +diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c +index 42557d2b6a908..516a9f05a87a7 100644 +--- a/net/netfilter/nf_conntrack_expect.c ++++ b/net/netfilter/nf_conntrack_expect.c +@@ -187,12 +187,12 @@ nf_ct_find_expectation(struct net *net, + * about to invoke ->destroy(), or nf_ct_delete() via timeout + * or early_drop(). + * +- * The atomic_inc_not_zero() check tells: If that fails, we ++ * The refcount_inc_not_zero() check tells: If that fails, we + * know that the ct is being destroyed. If it succeeds, we + * can be sure the ct cannot disappear underneath. + */ + if (unlikely(nf_ct_is_dying(exp->master) || +- !atomic_inc_not_zero(&exp->master->ct_general.use))) ++ !refcount_inc_not_zero(&exp->master->ct_general.use))) + return NULL; + + if (exp->flags & NF_CT_EXPECT_PERMANENT) { +diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c +index 8c9edad0826ef..705d77dc74b93 100644 +--- a/net/netfilter/nf_conntrack_netlink.c ++++ b/net/netfilter/nf_conntrack_netlink.c +@@ -506,7 +506,7 @@ static int ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct) + + static int ctnetlink_dump_use(struct sk_buff *skb, const struct nf_conn *ct) + { +- if (nla_put_be32(skb, CTA_USE, htonl(atomic_read(&ct->ct_general.use)))) ++ if (nla_put_be32(skb, CTA_USE, htonl(refcount_read(&ct->ct_general.use)))) + goto nla_put_failure; + return 0; + +@@ -1150,7 +1150,7 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) + ct = nf_ct_tuplehash_to_ctrack(h); + if (nf_ct_is_expired(ct)) { + if (i < ARRAY_SIZE(nf_ct_evict) && +- atomic_inc_not_zero(&ct->ct_general.use)) ++ refcount_inc_not_zero(&ct->ct_general.use)) + nf_ct_evict[i++] = ct; + continue; + } +@@ -1701,7 +1701,7 @@ ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb, bool dying + NFNL_MSG_TYPE(cb->nlh->nlmsg_type), + ct, dying ? true : false, 0); + if (res < 0) { +- if (!atomic_inc_not_zero(&ct->ct_general.use)) ++ if (!refcount_inc_not_zero(&ct->ct_general.use)) + continue; + cb->args[0] = cpu; + cb->args[1] = (unsigned long)ct; +diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c +index b613de96ad855..073d10e212021 100644 +--- a/net/netfilter/nf_conntrack_standalone.c ++++ b/net/netfilter/nf_conntrack_standalone.c +@@ -300,7 +300,7 @@ static int ct_seq_show(struct seq_file *s, void *v) + int ret = 0; + + WARN_ON(!ct); +- if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use))) ++ if (unlikely(!refcount_inc_not_zero(&ct->ct_general.use))) + return 0; + + if (nf_ct_should_gc(ct)) { +@@ -367,7 +367,7 @@ static int ct_seq_show(struct seq_file *s, void *v) + ct_show_zone(s, ct, NF_CT_DEFAULT_ZONE_DIR); + ct_show_delta_time(s, ct); + +- seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use)); ++ seq_printf(s, "use=%u\n", refcount_read(&ct->ct_general.use)); + + if (seq_has_overflowed(s)) + goto release; +diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c +index d091d51b5e19f..e05e09c07b971 100644 +--- a/net/netfilter/nf_flow_table_core.c ++++ b/net/netfilter/nf_flow_table_core.c +@@ -48,7 +48,7 @@ struct flow_offload *flow_offload_alloc(struct nf_conn *ct) + struct flow_offload *flow; + + if (unlikely(nf_ct_is_dying(ct) || +- !atomic_inc_not_zero(&ct->ct_general.use))) ++ !refcount_inc_not_zero(&ct->ct_general.use))) + return NULL; + + flow = kzalloc(sizeof(*flow), GFP_ATOMIC); +diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c +index 3d6d49420db8b..2dfc5dae06563 100644 +--- a/net/netfilter/nf_synproxy_core.c ++++ b/net/netfilter/nf_synproxy_core.c +@@ -349,7 +349,6 @@ static int __net_init synproxy_net_init(struct net *net) + goto err2; + + __set_bit(IPS_CONFIRMED_BIT, &ct->status); +- nf_conntrack_get(&ct->ct_general); + snet->tmpl = ct; + + snet->stats = alloc_percpu(struct synproxy_stats); +diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c +index 2a8dfa68f6e20..78631804e5c53 100644 +--- a/net/netfilter/nft_ct.c ++++ b/net/netfilter/nft_ct.c +@@ -259,7 +259,7 @@ static void nft_ct_set_zone_eval(const struct nft_expr *expr, + + ct = this_cpu_read(nft_ct_pcpu_template); + +- if (likely(atomic_read(&ct->ct_general.use) == 1)) { ++ if (likely(refcount_read(&ct->ct_general.use) == 1)) { + nf_ct_zone_add(ct, &zone); + } else { + /* previous skb got queued to userspace */ +@@ -270,7 +270,6 @@ static void nft_ct_set_zone_eval(const struct nft_expr *expr, + } + } + +- atomic_inc(&ct->ct_general.use); + nf_ct_set(skb, ct, IP_CT_NEW); + } + #endif +@@ -375,7 +374,6 @@ static bool nft_ct_tmpl_alloc_pcpu(void) + return false; + } + +- atomic_set(&tmp->ct_general.use, 1); + per_cpu(nft_ct_pcpu_template, cpu) = tmp; + } + +diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c +index d4deee39158ba..ffff1e1f79b91 100644 +--- a/net/netfilter/xt_CT.c ++++ b/net/netfilter/xt_CT.c +@@ -24,7 +24,7 @@ static inline int xt_ct_target(struct sk_buff *skb, struct nf_conn *ct) + return XT_CONTINUE; + + if (ct) { +- atomic_inc(&ct->ct_general.use); ++ refcount_inc(&ct->ct_general.use); + nf_ct_set(skb, ct, IP_CT_NEW); + } else { + nf_ct_set(skb, ct, IP_CT_UNTRACKED); +@@ -202,7 +202,6 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par, + goto err4; + } + __set_bit(IPS_CONFIRMED_BIT, &ct->status); +- nf_conntrack_get(&ct->ct_general); + out: + info->ct = ct; + return 0; +diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c +index 30f5e414018b1..9e8b3b930f926 100644 +--- a/net/openvswitch/conntrack.c ++++ b/net/openvswitch/conntrack.c +@@ -1713,7 +1713,6 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr, + goto err_free_ct; + + __set_bit(IPS_CONFIRMED_BIT, &ct_info.ct->status); +- nf_conntrack_get(&ct_info.ct->ct_general); + return 0; + err_free_ct: + __ovs_ct_free_action(&ct_info); +diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c +index 4ea7a81707f3f..d9748c917a503 100644 +--- a/net/sched/act_ct.c ++++ b/net/sched/act_ct.c +@@ -1235,7 +1235,6 @@ static int tcf_ct_fill_params(struct net *net, + return -ENOMEM; + } + __set_bit(IPS_CONFIRMED_BIT, &tmpl->status); +- nf_conntrack_get(&tmpl->ct_general); + p->tmpl = tmpl; + + return 0; +-- +2.39.5 + diff --git a/queue-5.10/netfilter-nf_conncount-fully-initialize-struct-nf_co.patch b/queue-5.10/netfilter-nf_conncount-fully-initialize-struct-nf_co.patch new file mode 100644 index 0000000000..96f1d982a3 --- /dev/null +++ b/queue-5.10/netfilter-nf_conncount-fully-initialize-struct-nf_co.patch @@ -0,0 +1,129 @@ +From 404514e55bd830f82a94f27f3b9501f5db52f27b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 9 Mar 2025 17:07:38 +0900 +Subject: netfilter: nf_conncount: Fully initialize struct nf_conncount_tuple + in insert_tree() + +From: Kohei Enju + +[ Upstream commit d653bfeb07ebb3499c403404c21ac58a16531607 ] + +Since commit b36e4523d4d5 ("netfilter: nf_conncount: fix garbage +collection confirm race"), `cpu` and `jiffies32` were introduced to +the struct nf_conncount_tuple. + +The commit made nf_conncount_add() initialize `conn->cpu` and +`conn->jiffies32` when allocating the struct. +In contrast, count_tree() was not changed to initialize them. + +By commit 34848d5c896e ("netfilter: nf_conncount: Split insert and +traversal"), count_tree() was split and the relevant allocation +code now resides in insert_tree(). +Initialize `conn->cpu` and `conn->jiffies32` in insert_tree(). + +BUG: KMSAN: uninit-value in find_or_evict net/netfilter/nf_conncount.c:117 [inline] +BUG: KMSAN: uninit-value in __nf_conncount_add+0xd9c/0x2850 net/netfilter/nf_conncount.c:143 + find_or_evict net/netfilter/nf_conncount.c:117 [inline] + __nf_conncount_add+0xd9c/0x2850 net/netfilter/nf_conncount.c:143 + count_tree net/netfilter/nf_conncount.c:438 [inline] + nf_conncount_count+0x82f/0x1e80 net/netfilter/nf_conncount.c:521 + connlimit_mt+0x7f6/0xbd0 net/netfilter/xt_connlimit.c:72 + __nft_match_eval net/netfilter/nft_compat.c:403 [inline] + nft_match_eval+0x1a5/0x300 net/netfilter/nft_compat.c:433 + expr_call_ops_eval net/netfilter/nf_tables_core.c:240 [inline] + nft_do_chain+0x426/0x2290 net/netfilter/nf_tables_core.c:288 + nft_do_chain_ipv4+0x1a5/0x230 net/netfilter/nft_chain_filter.c:23 + nf_hook_entry_hookfn include/linux/netfilter.h:154 [inline] + nf_hook_slow+0xf4/0x400 net/netfilter/core.c:626 + nf_hook_slow_list+0x24d/0x860 net/netfilter/core.c:663 + NF_HOOK_LIST include/linux/netfilter.h:350 [inline] + ip_sublist_rcv+0x17b7/0x17f0 net/ipv4/ip_input.c:633 + ip_list_rcv+0x9ef/0xa40 net/ipv4/ip_input.c:669 + __netif_receive_skb_list_ptype net/core/dev.c:5936 [inline] + __netif_receive_skb_list_core+0x15c5/0x1670 net/core/dev.c:5983 + __netif_receive_skb_list net/core/dev.c:6035 [inline] + netif_receive_skb_list_internal+0x1085/0x1700 net/core/dev.c:6126 + netif_receive_skb_list+0x5a/0x460 net/core/dev.c:6178 + xdp_recv_frames net/bpf/test_run.c:280 [inline] + xdp_test_run_batch net/bpf/test_run.c:361 [inline] + bpf_test_run_xdp_live+0x2e86/0x3480 net/bpf/test_run.c:390 + bpf_prog_test_run_xdp+0xf1d/0x1ae0 net/bpf/test_run.c:1316 + bpf_prog_test_run+0x5e5/0xa30 kernel/bpf/syscall.c:4407 + __sys_bpf+0x6aa/0xd90 kernel/bpf/syscall.c:5813 + __do_sys_bpf kernel/bpf/syscall.c:5902 [inline] + __se_sys_bpf kernel/bpf/syscall.c:5900 [inline] + __ia32_sys_bpf+0xa0/0xe0 kernel/bpf/syscall.c:5900 + ia32_sys_call+0x394d/0x4180 arch/x86/include/generated/asm/syscalls_32.h:358 + do_syscall_32_irqs_on arch/x86/entry/common.c:165 [inline] + __do_fast_syscall_32+0xb0/0x110 arch/x86/entry/common.c:387 + do_fast_syscall_32+0x38/0x80 arch/x86/entry/common.c:412 + do_SYSENTER_32+0x1f/0x30 arch/x86/entry/common.c:450 + entry_SYSENTER_compat_after_hwframe+0x84/0x8e + +Uninit was created at: + slab_post_alloc_hook mm/slub.c:4121 [inline] + slab_alloc_node mm/slub.c:4164 [inline] + kmem_cache_alloc_noprof+0x915/0xe10 mm/slub.c:4171 + insert_tree net/netfilter/nf_conncount.c:372 [inline] + count_tree net/netfilter/nf_conncount.c:450 [inline] + nf_conncount_count+0x1415/0x1e80 net/netfilter/nf_conncount.c:521 + connlimit_mt+0x7f6/0xbd0 net/netfilter/xt_connlimit.c:72 + __nft_match_eval net/netfilter/nft_compat.c:403 [inline] + nft_match_eval+0x1a5/0x300 net/netfilter/nft_compat.c:433 + expr_call_ops_eval net/netfilter/nf_tables_core.c:240 [inline] + nft_do_chain+0x426/0x2290 net/netfilter/nf_tables_core.c:288 + nft_do_chain_ipv4+0x1a5/0x230 net/netfilter/nft_chain_filter.c:23 + nf_hook_entry_hookfn include/linux/netfilter.h:154 [inline] + nf_hook_slow+0xf4/0x400 net/netfilter/core.c:626 + nf_hook_slow_list+0x24d/0x860 net/netfilter/core.c:663 + NF_HOOK_LIST include/linux/netfilter.h:350 [inline] + ip_sublist_rcv+0x17b7/0x17f0 net/ipv4/ip_input.c:633 + ip_list_rcv+0x9ef/0xa40 net/ipv4/ip_input.c:669 + __netif_receive_skb_list_ptype net/core/dev.c:5936 [inline] + __netif_receive_skb_list_core+0x15c5/0x1670 net/core/dev.c:5983 + __netif_receive_skb_list net/core/dev.c:6035 [inline] + netif_receive_skb_list_internal+0x1085/0x1700 net/core/dev.c:6126 + netif_receive_skb_list+0x5a/0x460 net/core/dev.c:6178 + xdp_recv_frames net/bpf/test_run.c:280 [inline] + xdp_test_run_batch net/bpf/test_run.c:361 [inline] + bpf_test_run_xdp_live+0x2e86/0x3480 net/bpf/test_run.c:390 + bpf_prog_test_run_xdp+0xf1d/0x1ae0 net/bpf/test_run.c:1316 + bpf_prog_test_run+0x5e5/0xa30 kernel/bpf/syscall.c:4407 + __sys_bpf+0x6aa/0xd90 kernel/bpf/syscall.c:5813 + __do_sys_bpf kernel/bpf/syscall.c:5902 [inline] + __se_sys_bpf kernel/bpf/syscall.c:5900 [inline] + __ia32_sys_bpf+0xa0/0xe0 kernel/bpf/syscall.c:5900 + ia32_sys_call+0x394d/0x4180 arch/x86/include/generated/asm/syscalls_32.h:358 + do_syscall_32_irqs_on arch/x86/entry/common.c:165 [inline] + __do_fast_syscall_32+0xb0/0x110 arch/x86/entry/common.c:387 + do_fast_syscall_32+0x38/0x80 arch/x86/entry/common.c:412 + do_SYSENTER_32+0x1f/0x30 arch/x86/entry/common.c:450 + entry_SYSENTER_compat_after_hwframe+0x84/0x8e + +Reported-by: syzbot+83fed965338b573115f7@syzkaller.appspotmail.com +Closes: https://syzkaller.appspot.com/bug?extid=83fed965338b573115f7 +Fixes: b36e4523d4d5 ("netfilter: nf_conncount: fix garbage collection confirm race") +Signed-off-by: Kohei Enju +Reviewed-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + net/netfilter/nf_conncount.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c +index 0ce12a33ffda4..a66a27fe7f458 100644 +--- a/net/netfilter/nf_conncount.c ++++ b/net/netfilter/nf_conncount.c +@@ -366,6 +366,8 @@ insert_tree(struct net *net, + + conn->tuple = *tuple; + conn->zone = *zone; ++ conn->cpu = raw_smp_processor_id(); ++ conn->jiffies32 = (u32)jiffies; + memcpy(rbconn->key, key, sizeof(u32) * data->keylen); + + nf_conncount_list_init(&rbconn->list); +-- +2.39.5 + diff --git a/queue-5.10/netfilter-nft_ct-fix-use-after-free-when-attaching-z.patch b/queue-5.10/netfilter-nft_ct-fix-use-after-free-when-attaching-z.patch new file mode 100644 index 0000000000..18dc2ad512 --- /dev/null +++ b/queue-5.10/netfilter-nft_ct-fix-use-after-free-when-attaching-z.patch @@ -0,0 +1,47 @@ +From 62285f2fd46111ead8fd82627f5d34ca7a9cd994 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 23 Jan 2022 15:24:00 +0100 +Subject: netfilter: nft_ct: fix use after free when attaching zone template + +From: Florian Westphal + +[ Upstream commit 34243b9ec856309339172b1507379074156947e8 ] + +The conversion erroneously removed the refcount increment. +In case we can use the percpu template, we need to increment +the refcount, else it will be released when the skb gets freed. + +In case the slowpath is taken, the new template already has a +refcount of 1. + +Fixes: 719774377622 ("netfilter: conntrack: convert to refcount_t api") +Reported-by: kernel test robot +Signed-off-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Stable-dep-of: 5cfe5612ca95 ("netfilter: nft_ct: Use __refcount_inc() for per-CPU nft_ct_pcpu_template.") +Signed-off-by: Sasha Levin +--- + net/netfilter/nft_ct.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c +index 78631804e5c53..4b75c7113de4d 100644 +--- a/net/netfilter/nft_ct.c ++++ b/net/netfilter/nft_ct.c +@@ -260,9 +260,12 @@ static void nft_ct_set_zone_eval(const struct nft_expr *expr, + ct = this_cpu_read(nft_ct_pcpu_template); + + if (likely(refcount_read(&ct->ct_general.use) == 1)) { ++ refcount_inc(&ct->ct_general.use); + nf_ct_zone_add(ct, &zone); + } else { +- /* previous skb got queued to userspace */ ++ /* previous skb got queued to userspace, allocate temporary ++ * one until percpu template can be reused. ++ */ + ct = nf_ct_tmpl_alloc(nft_net(pkt), &zone, GFP_ATOMIC); + if (!ct) { + regs->verdict.code = NF_DROP; +-- +2.39.5 + diff --git a/queue-5.10/netfilter-nft_ct-use-__refcount_inc-for-per-cpu-nft_.patch b/queue-5.10/netfilter-nft_ct-use-__refcount_inc-for-per-cpu-nft_.patch new file mode 100644 index 0000000000..19d5501b15 --- /dev/null +++ b/queue-5.10/netfilter-nft_ct-use-__refcount_inc-for-per-cpu-nft_.patch @@ -0,0 +1,63 @@ +From f148b30847f9410fcba977effeba5fb8de865580 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 17 Feb 2025 17:02:42 +0100 +Subject: netfilter: nft_ct: Use __refcount_inc() for per-CPU + nft_ct_pcpu_template. + +From: Sebastian Andrzej Siewior + +[ Upstream commit 5cfe5612ca9590db69b9be29dc83041dbf001108 ] + +nft_ct_pcpu_template is a per-CPU variable and relies on disabled BH for its +locking. The refcounter is read and if its value is set to one then the +refcounter is incremented and variable is used - otherwise it is already +in use and left untouched. + +Without per-CPU locking in local_bh_disable() on PREEMPT_RT the +read-then-increment operation is not atomic and therefore racy. + +This can be avoided by using unconditionally __refcount_inc() which will +increment counter and return the old value as an atomic operation. +In case the returned counter is not one, the variable is in use and we +need to decrement counter. Otherwise we can use it. + +Use __refcount_inc() instead of read and a conditional increment. + +Fixes: edee4f1e9245 ("netfilter: nft_ct: add zone id set support") +Signed-off-by: Sebastian Andrzej Siewior +Reviewed-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + net/netfilter/nft_ct.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c +index 4b75c7113de4d..f95f1dbc48dea 100644 +--- a/net/netfilter/nft_ct.c ++++ b/net/netfilter/nft_ct.c +@@ -239,6 +239,7 @@ static void nft_ct_set_zone_eval(const struct nft_expr *expr, + enum ip_conntrack_info ctinfo; + u16 value = nft_reg_load16(®s->data[priv->sreg]); + struct nf_conn *ct; ++ int oldcnt; + + ct = nf_ct_get(skb, &ctinfo); + if (ct) /* already tracked */ +@@ -259,10 +260,11 @@ static void nft_ct_set_zone_eval(const struct nft_expr *expr, + + ct = this_cpu_read(nft_ct_pcpu_template); + +- if (likely(refcount_read(&ct->ct_general.use) == 1)) { +- refcount_inc(&ct->ct_general.use); ++ __refcount_inc(&ct->ct_general.use, &oldcnt); ++ if (likely(oldcnt == 1)) { + nf_ct_zone_add(ct, &zone); + } else { ++ refcount_dec(&ct->ct_general.use); + /* previous skb got queued to userspace, allocate temporary + * one until percpu template can be reused. + */ +-- +2.39.5 + diff --git a/queue-5.10/netfilter-nft_exthdr-fix-offset-with-ipv4_find_optio.patch b/queue-5.10/netfilter-nft_exthdr-fix-offset-with-ipv4_find_optio.patch new file mode 100644 index 0000000000..cf43918ecd --- /dev/null +++ b/queue-5.10/netfilter-nft_exthdr-fix-offset-with-ipv4_find_optio.patch @@ -0,0 +1,78 @@ +From 0b82bdc1a8b7b76d089b6bc186efe93ff0300936 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 2 Mar 2025 00:14:36 +0300 +Subject: netfilter: nft_exthdr: fix offset with ipv4_find_option() + +From: Alexey Kashavkin + +[ Upstream commit 6edd78af9506bb182518da7f6feebd75655d9a0e ] + +There is an incorrect calculation in the offset variable which causes +the nft_skb_copy_to_reg() function to always return -EFAULT. Adding the +start variable is redundant. In the __ip_options_compile() function the +correct offset is specified when finding the function. There is no need +to add the size of the iphdr structure to the offset. + +Fixes: dbb5281a1f84 ("netfilter: nf_tables: add support for matching IPv4 options") +Signed-off-by: Alexey Kashavkin +Reviewed-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + net/netfilter/nft_exthdr.c | 10 ++++------ + 1 file changed, 4 insertions(+), 6 deletions(-) + +diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c +index c9f89f035ccff..5bde436b87548 100644 +--- a/net/netfilter/nft_exthdr.c ++++ b/net/netfilter/nft_exthdr.c +@@ -85,7 +85,6 @@ static int ipv4_find_option(struct net *net, struct sk_buff *skb, + unsigned char optbuf[sizeof(struct ip_options) + 40]; + struct ip_options *opt = (struct ip_options *)optbuf; + struct iphdr *iph, _iph; +- unsigned int start; + bool found = false; + __be32 info; + int optlen; +@@ -93,7 +92,6 @@ static int ipv4_find_option(struct net *net, struct sk_buff *skb, + iph = skb_header_pointer(skb, 0, sizeof(_iph), &_iph); + if (!iph) + return -EBADMSG; +- start = sizeof(struct iphdr); + + optlen = iph->ihl * 4 - (int)sizeof(struct iphdr); + if (optlen <= 0) +@@ -103,7 +101,7 @@ static int ipv4_find_option(struct net *net, struct sk_buff *skb, + /* Copy the options since __ip_options_compile() modifies + * the options. + */ +- if (skb_copy_bits(skb, start, opt->__data, optlen)) ++ if (skb_copy_bits(skb, sizeof(struct iphdr), opt->__data, optlen)) + return -EBADMSG; + opt->optlen = optlen; + +@@ -118,18 +116,18 @@ static int ipv4_find_option(struct net *net, struct sk_buff *skb, + found = target == IPOPT_SSRR ? opt->is_strictroute : + !opt->is_strictroute; + if (found) +- *offset = opt->srr + start; ++ *offset = opt->srr; + break; + case IPOPT_RR: + if (!opt->rr) + break; +- *offset = opt->rr + start; ++ *offset = opt->rr; + found = true; + break; + case IPOPT_RA: + if (!opt->router_alert) + break; +- *offset = opt->router_alert + start; ++ *offset = opt->router_alert; + found = true; + break; + default: +-- +2.39.5 + diff --git a/queue-5.10/netpoll-hold-rcu-read-lock-in-__netpoll_send_skb.patch b/queue-5.10/netpoll-hold-rcu-read-lock-in-__netpoll_send_skb.patch new file mode 100644 index 0000000000..e1f4b7649a --- /dev/null +++ b/queue-5.10/netpoll-hold-rcu-read-lock-in-__netpoll_send_skb.patch @@ -0,0 +1,76 @@ +From 17905495a30e118672cc3150b6af87af00a90abc Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 6 Mar 2025 05:16:18 -0800 +Subject: netpoll: hold rcu read lock in __netpoll_send_skb() + +From: Breno Leitao + +[ Upstream commit 505ead7ab77f289f12d8a68ac83da068e4d4408b ] + +The function __netpoll_send_skb() is being invoked without holding the +RCU read lock. This oversight triggers a warning message when +CONFIG_PROVE_RCU_LIST is enabled: + + net/core/netpoll.c:330 suspicious rcu_dereference_check() usage! + + netpoll_send_skb + netpoll_send_udp + write_ext_msg + console_flush_all + console_unlock + vprintk_emit + +To prevent npinfo from disappearing unexpectedly, ensure that +__netpoll_send_skb() is protected with the RCU read lock. + +Fixes: 2899656b494dcd1 ("netpoll: take rcu_read_lock_bh() in netpoll_send_skb_on_dev()") +Signed-off-by: Breno Leitao +Reviewed-by: Simon Horman +Link: https://patch.msgid.link/20250306-netpoll_rcu_v2-v2-1-bc4f5c51742a@debian.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/core/netpoll.c | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +diff --git a/net/core/netpoll.c b/net/core/netpoll.c +index 4475b2174bcc4..66a6f62412393 100644 +--- a/net/core/netpoll.c ++++ b/net/core/netpoll.c +@@ -326,6 +326,7 @@ static int netpoll_owner_active(struct net_device *dev) + static netdev_tx_t __netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) + { + netdev_tx_t status = NETDEV_TX_BUSY; ++ netdev_tx_t ret = NET_XMIT_DROP; + struct net_device *dev; + unsigned long tries; + /* It is up to the caller to keep npinfo alive. */ +@@ -334,11 +335,12 @@ static netdev_tx_t __netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) + lockdep_assert_irqs_disabled(); + + dev = np->dev; ++ rcu_read_lock(); + npinfo = rcu_dereference_bh(dev->npinfo); + + if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) { + dev_kfree_skb_irq(skb); +- return NET_XMIT_DROP; ++ goto out; + } + + /* don't get messages out of order, and no recursion */ +@@ -377,7 +379,10 @@ static netdev_tx_t __netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) + skb_queue_tail(&npinfo->txq, skb); + schedule_delayed_work(&npinfo->tx_work,0); + } +- return NETDEV_TX_OK; ++ ret = NETDEV_TX_OK; ++out: ++ rcu_read_unlock(); ++ return ret; + } + + netdev_tx_t netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) +-- +2.39.5 + diff --git a/queue-5.10/pinctrl-bcm281xx-fix-incorrect-regmap-max_registers-.patch b/queue-5.10/pinctrl-bcm281xx-fix-incorrect-regmap-max_registers-.patch new file mode 100644 index 0000000000..4ee05cb744 --- /dev/null +++ b/queue-5.10/pinctrl-bcm281xx-fix-incorrect-regmap-max_registers-.patch @@ -0,0 +1,40 @@ +From b6aedb37a22e93af49d8179681dbad412e787d07 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 7 Feb 2025 21:02:41 +0100 +Subject: pinctrl: bcm281xx: Fix incorrect regmap max_registers value + +From: Artur Weber + +[ Upstream commit 68283c1cb573143c0b7515e93206f3503616bc10 ] + +The max_registers value does not take into consideration the stride; +currently, it's set to the number of the last pin, but this does not +accurately represent the final register. + +Fix this by multiplying the current value by 4. + +Fixes: 54b1aa5a5b16 ("ARM: pinctrl: Add Broadcom Capri pinctrl driver") +Signed-off-by: Artur Weber +Link: https://lore.kernel.org/20250207-bcm21664-pinctrl-v1-2-e7cfac9b2d3b@gmail.com +Signed-off-by: Linus Walleij +Signed-off-by: Sasha Levin +--- + drivers/pinctrl/bcm/pinctrl-bcm281xx.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/pinctrl/bcm/pinctrl-bcm281xx.c b/drivers/pinctrl/bcm/pinctrl-bcm281xx.c +index 9ab1f427286a7..fbfddcc39d5cc 100644 +--- a/drivers/pinctrl/bcm/pinctrl-bcm281xx.c ++++ b/drivers/pinctrl/bcm/pinctrl-bcm281xx.c +@@ -981,7 +981,7 @@ static const struct regmap_config bcm281xx_pinctrl_regmap_config = { + .reg_bits = 32, + .reg_stride = 4, + .val_bits = 32, +- .max_register = BCM281XX_PIN_VC_CAM3_SDA, ++ .max_register = BCM281XX_PIN_VC_CAM3_SDA * 4, + }; + + static int bcm281xx_pinctrl_get_groups_count(struct pinctrl_dev *pctldev) +-- +2.39.5 + diff --git a/queue-5.10/series b/queue-5.10/series index 37797885ed..3c9e74b445 100644 --- a/queue-5.10/series +++ b/queue-5.10/series @@ -4,3 +4,17 @@ sched-isolation-prevent-boot-crash-when-the-boot-cpu-is-nohz_full.patch ipv6-fix-signed-integer-overflow-in-__ip6_append_data.patch kvm-x86-reject-hyper-v-s-send_ipi-hypercalls-if-local-apic-isn-t-in-kernel.patch x86-kexec-fix-memory-leak-of-elf-header-buffer.patch +fbdev-hyperv_fb-iounmap-the-correct-memory-when-remo.patch +pinctrl-bcm281xx-fix-incorrect-regmap-max_registers-.patch +netfilter-conntrack-convert-to-refcount_t-api.patch +netfilter-nft_ct-fix-use-after-free-when-attaching-z.patch +netfilter-nft_ct-use-__refcount_inc-for-per-cpu-nft_.patch +ice-fix-memory-leak-in-arfs-after-reset.patch +netpoll-hold-rcu-read-lock-in-__netpoll_send_skb.patch +drivers-hv-vmbus-don-t-release-fb_mmio-resource-in-v.patch +net-mlx5-handle-errors-in-mlx5_chains_create_table.patch +netfilter-nf_conncount-fully-initialize-struct-nf_co.patch +ipvs-prevent-integer-overflow-in-do_ip_vs_get_ctl.patch +net_sched-prevent-creation-of-classes-with-tc_h_root.patch +netfilter-nft_exthdr-fix-offset-with-ipv4_find_optio.patch +net-mlx5e-prevent-bridge-link-show-failure-for-non-e.patch -- 2.47.3