From: Sasha Levin Date: Sat, 15 Mar 2025 01:12:06 +0000 (-0400) Subject: Fixes for 5.15 X-Git-Tag: v6.6.84~51 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=df5dc106a67d048b635aa09c56c385ec677585a8;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 5.15 Signed-off-by: Sasha Levin --- diff --git a/queue-5.15/drivers-hv-vmbus-don-t-release-fb_mmio-resource-in-v.patch b/queue-5.15/drivers-hv-vmbus-don-t-release-fb_mmio-resource-in-v.patch new file mode 100644 index 0000000000..2fbce43a8c --- /dev/null +++ b/queue-5.15/drivers-hv-vmbus-don-t-release-fb_mmio-resource-in-v.patch @@ -0,0 +1,85 @@ +From 44355f5e12b0aa0ff65c019066df0e97054874e9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 9 Mar 2025 20:52:08 -0700 +Subject: Drivers: hv: vmbus: Don't release fb_mmio resource in + vmbus_free_mmio() + +From: Michael Kelley + +[ Upstream commit 73fe9073c0cc28056cb9de0c8a516dac070f1d1f ] + +The VMBus driver manages the MMIO space it owns via the hyperv_mmio +resource tree. Because the synthetic video framebuffer portion of the +MMIO space is initially setup by the Hyper-V host for each guest, the +VMBus driver does an early reserve of that portion of MMIO space in the +hyperv_mmio resource tree. It saves a pointer to that resource in +fb_mmio. When a VMBus driver requests MMIO space and passes "true" +for the "fb_overlap_ok" argument, the reserved framebuffer space is +used if possible. In that case it's not necessary to do another request +against the "shadow" hyperv_mmio resource tree because that resource +was already requested in the early reserve steps. + +However, the vmbus_free_mmio() function currently does no special +handling for the fb_mmio resource. When a framebuffer device is +removed, or the driver is unbound, the current code for +vmbus_free_mmio() releases the reserved resource, leaving fb_mmio +pointing to memory that has been freed. If the same or another +driver is subsequently bound to the device, vmbus_allocate_mmio() +checks against fb_mmio, and potentially gets garbage. Furthermore +a second unbind operation produces this "nonexistent resource" error +because of the unbalanced behavior between vmbus_allocate_mmio() and +vmbus_free_mmio(): + +[ 55.499643] resource: Trying to free nonexistent + resource <0x00000000f0000000-0x00000000f07fffff> + +Fix this by adding logic to vmbus_free_mmio() to recognize when +MMIO space in the fb_mmio reserved area would be released, and don't +release it. This filtering ensures the fb_mmio resource always exists, +and makes vmbus_free_mmio() more parallel with vmbus_allocate_mmio(). + +Fixes: be000f93e5d7 ("drivers:hv: Track allocations of children of hv_vmbus in private resource tree") +Signed-off-by: Michael Kelley +Tested-by: Saurabh Sengar +Reviewed-by: Saurabh Sengar +Link: https://lore.kernel.org/r/20250310035208.275764-1-mhklinux@outlook.com +Signed-off-by: Wei Liu +Message-ID: <20250310035208.275764-1-mhklinux@outlook.com> +Signed-off-by: Sasha Levin +--- + drivers/hv/vmbus_drv.c | 13 +++++++++++++ + 1 file changed, 13 insertions(+) + +diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c +index 02aeb192e3671..cb3a5b13c3ec2 100644 +--- a/drivers/hv/vmbus_drv.c ++++ b/drivers/hv/vmbus_drv.c +@@ -2419,12 +2419,25 @@ void vmbus_free_mmio(resource_size_t start, resource_size_t size) + struct resource *iter; + + mutex_lock(&hyperv_mmio_lock); ++ ++ /* ++ * If all bytes of the MMIO range to be released are within the ++ * special case fb_mmio shadow region, skip releasing the shadow ++ * region since no corresponding __request_region() was done ++ * in vmbus_allocate_mmio(). ++ */ ++ if (fb_mmio && start >= fb_mmio->start && ++ (start + size - 1 <= fb_mmio->end)) ++ goto skip_shadow_release; ++ + for (iter = hyperv_mmio; iter; iter = iter->sibling) { + if ((iter->start >= start + size) || (iter->end <= start)) + continue; + + __release_region(iter, start, size); + } ++ ++skip_shadow_release: + release_mem_region(start, size); + mutex_unlock(&hyperv_mmio_lock); + +-- +2.39.5 + diff --git a/queue-5.15/fbdev-hyperv_fb-iounmap-the-correct-memory-when-remo.patch b/queue-5.15/fbdev-hyperv_fb-iounmap-the-correct-memory-when-remo.patch new file mode 100644 index 0000000000..905a5707f9 --- /dev/null +++ b/queue-5.15/fbdev-hyperv_fb-iounmap-the-correct-memory-when-remo.patch @@ -0,0 +1,53 @@ +From a18560659dbdef16f28c1a32838383e32b206e95 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 9 Feb 2025 15:52:52 -0800 +Subject: fbdev: hyperv_fb: iounmap() the correct memory when removing a device + +From: Michael Kelley + +[ Upstream commit 7241c886a71797cc51efc6fadec7076fcf6435c2 ] + +When a Hyper-V framebuffer device is removed, or the driver is unbound +from a device, any allocated and/or mapped memory must be released. In +particular, MMIO address space that was mapped to the framebuffer must +be unmapped. Current code unmaps the wrong address, resulting in an +error like: + +[ 4093.980597] iounmap: bad address 00000000c936c05c + +followed by a stack dump. + +Commit d21987d709e8 ("video: hyperv: hyperv_fb: Support deferred IO for +Hyper-V frame buffer driver") changed the kind of address stored in +info->screen_base, and the iounmap() call in hvfb_putmem() was not +updated accordingly. + +Fix this by updating hvfb_putmem() to unmap the correct address. + +Fixes: d21987d709e8 ("video: hyperv: hyperv_fb: Support deferred IO for Hyper-V frame buffer driver") +Signed-off-by: Michael Kelley +Reviewed-by: Saurabh Sengar +Link: https://lore.kernel.org/r/20250209235252.2987-1-mhklinux@outlook.com +Signed-off-by: Wei Liu +Message-ID: <20250209235252.2987-1-mhklinux@outlook.com> +Signed-off-by: Sasha Levin +--- + drivers/video/fbdev/hyperv_fb.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/video/fbdev/hyperv_fb.c b/drivers/video/fbdev/hyperv_fb.c +index 6a881cfd7f5c0..5fd1b33d11238 100644 +--- a/drivers/video/fbdev/hyperv_fb.c ++++ b/drivers/video/fbdev/hyperv_fb.c +@@ -1130,7 +1130,7 @@ static void hvfb_putmem(struct hv_device *hdev, struct fb_info *info) + + if (par->need_docopy) { + vfree(par->dio_vp); +- iounmap(info->screen_base); ++ iounmap(par->mmio_vp); + vmbus_free_mmio(par->mem->start, screen_fb_size); + } else { + hvfb_release_phymem(hdev, info->fix.smem_start, +-- +2.39.5 + diff --git a/queue-5.15/gre-fix-ipv6-link-local-address-generation.patch b/queue-5.15/gre-fix-ipv6-link-local-address-generation.patch new file mode 100644 index 0000000000..a258e6372c --- /dev/null +++ b/queue-5.15/gre-fix-ipv6-link-local-address-generation.patch @@ -0,0 +1,110 @@ +From a4462a1288f255eec9ebddd0558142b25073be29 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 7 Mar 2025 20:28:53 +0100 +Subject: gre: Fix IPv6 link-local address generation. + +From: Guillaume Nault + +[ Upstream commit 183185a18ff96751db52a46ccf93fff3a1f42815 ] + +Use addrconf_addr_gen() to generate IPv6 link-local addresses on GRE +devices in most cases and fall back to using add_v4_addrs() only in +case the GRE configuration is incompatible with addrconf_addr_gen(). + +GRE used to use addrconf_addr_gen() until commit e5dd729460ca +("ip/ip6_gre: use the same logic as SIT interfaces when computing v6LL +address") restricted this use to gretap and ip6gretap devices, and +created add_v4_addrs() (borrowed from SIT) for non-Ethernet GRE ones. + +The original problem came when commit 9af28511be10 ("addrconf: refuse +isatap eui64 for INADDR_ANY") made __ipv6_isatap_ifid() fail when its +addr parameter was 0. The commit says that this would create an invalid +address, however, I couldn't find any RFC saying that the generated +interface identifier would be wrong. Anyway, since gre over IPv4 +devices pass their local tunnel address to __ipv6_isatap_ifid(), that +commit broke their IPv6 link-local address generation when the local +address was unspecified. + +Then commit e5dd729460ca ("ip/ip6_gre: use the same logic as SIT +interfaces when computing v6LL address") tried to fix that case by +defining add_v4_addrs() and calling it to generate the IPv6 link-local +address instead of using addrconf_addr_gen() (apart for gretap and +ip6gretap devices, which would still use the regular +addrconf_addr_gen(), since they have a MAC address). + +That broke several use cases because add_v4_addrs() isn't properly +integrated into the rest of IPv6 Neighbor Discovery code. Several of +these shortcomings have been fixed over time, but add_v4_addrs() +remains broken on several aspects. In particular, it doesn't send any +Router Sollicitations, so the SLAAC process doesn't start until the +interface receives a Router Advertisement. Also, add_v4_addrs() mostly +ignores the address generation mode of the interface +(/proc/sys/net/ipv6/conf/*/addr_gen_mode), thus breaking the +IN6_ADDR_GEN_MODE_RANDOM and IN6_ADDR_GEN_MODE_STABLE_PRIVACY cases. + +Fix the situation by using add_v4_addrs() only in the specific scenario +where the normal method would fail. That is, for interfaces that have +all of the following characteristics: + + * run over IPv4, + * transport IP packets directly, not Ethernet (that is, not gretap + interfaces), + * tunnel endpoint is INADDR_ANY (that is, 0), + * device address generation mode is EUI64. + +In all other cases, revert back to the regular addrconf_addr_gen(). + +Also, remove the special case for ip6gre interfaces in add_v4_addrs(), +since ip6gre devices now always use addrconf_addr_gen() instead. + +Fixes: e5dd729460ca ("ip/ip6_gre: use the same logic as SIT interfaces when computing v6LL address") +Signed-off-by: Guillaume Nault +Reviewed-by: Ido Schimmel +Link: https://patch.msgid.link/559c32ce5c9976b269e6337ac9abb6a96abe5096.1741375285.git.gnault@redhat.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + net/ipv6/addrconf.c | 15 +++++++++------ + 1 file changed, 9 insertions(+), 6 deletions(-) + +diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c +index 932a10f64adcb..f5cca40b71610 100644 +--- a/net/ipv6/addrconf.c ++++ b/net/ipv6/addrconf.c +@@ -3145,16 +3145,13 @@ static void add_v4_addrs(struct inet6_dev *idev) + struct in6_addr addr; + struct net_device *dev; + struct net *net = dev_net(idev->dev); +- int scope, plen, offset = 0; ++ int scope, plen; + u32 pflags = 0; + + ASSERT_RTNL(); + + memset(&addr, 0, sizeof(struct in6_addr)); +- /* in case of IP6GRE the dev_addr is an IPv6 and therefore we use only the last 4 bytes */ +- if (idev->dev->addr_len == sizeof(struct in6_addr)) +- offset = sizeof(struct in6_addr) - 4; +- memcpy(&addr.s6_addr32[3], idev->dev->dev_addr + offset, 4); ++ memcpy(&addr.s6_addr32[3], idev->dev->dev_addr, 4); + + if (!(idev->dev->flags & IFF_POINTOPOINT) && idev->dev->type == ARPHRD_SIT) { + scope = IPV6_ADDR_COMPATv4; +@@ -3462,7 +3459,13 @@ static void addrconf_gre_config(struct net_device *dev) + return; + } + +- if (dev->type == ARPHRD_ETHER) { ++ /* Generate the IPv6 link-local address using addrconf_addr_gen(), ++ * unless we have an IPv4 GRE device not bound to an IP address and ++ * which is in EUI64 mode (as __ipv6_isatap_ifid() would fail in this ++ * case). Such devices fall back to add_v4_addrs() instead. ++ */ ++ if (!(dev->type == ARPHRD_IPGRE && *(__be32 *)dev->dev_addr == 0 && ++ idev->cnf.addr_gen_mode == IN6_ADDR_GEN_MODE_EUI64)) { + addrconf_addr_gen(idev, true); + return; + } +-- +2.39.5 + diff --git a/queue-5.15/ice-fix-memory-leak-in-arfs-after-reset.patch b/queue-5.15/ice-fix-memory-leak-in-arfs-after-reset.patch new file mode 100644 index 0000000000..f7dff24965 --- /dev/null +++ b/queue-5.15/ice-fix-memory-leak-in-arfs-after-reset.patch @@ -0,0 +1,68 @@ +From db2bc9c5d629dc6836507d754ac1a76d616e1e9b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 23 Jan 2025 09:15:39 +0100 +Subject: ice: fix memory leak in aRFS after reset + +From: Grzegorz Nitka + +[ Upstream commit 23d97f18901ef5e4e264e3b1777fe65c760186b5 ] + +Fix aRFS (accelerated Receive Flow Steering) structures memory leak by +adding a checker to verify if aRFS memory is already allocated while +configuring VSI. aRFS objects are allocated in two cases: +- as part of VSI initialization (at probe), and +- as part of reset handling + +However, VSI reconfiguration executed during reset involves memory +allocation one more time, without prior releasing already allocated +resources. This led to the memory leak with the following signature: + +[root@os-delivery ~]# cat /sys/kernel/debug/kmemleak +unreferenced object 0xff3c1ca7252e6000 (size 8192): + comm "kworker/0:0", pid 8, jiffies 4296833052 + hex dump (first 32 bytes): + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ + backtrace (crc 0): + [] __kmalloc_cache_noprof+0x275/0x340 + [] ice_init_arfs+0x3a/0xe0 [ice] + [] ice_vsi_cfg_def+0x607/0x850 [ice] + [] ice_vsi_setup+0x5b/0x130 [ice] + [] ice_init+0x1c1/0x460 [ice] + [] ice_probe+0x2af/0x520 [ice] + [] local_pci_probe+0x43/0xa0 + [] work_for_cpu_fn+0x13/0x20 + [] process_one_work+0x179/0x390 + [] worker_thread+0x239/0x340 + [] kthread+0xcc/0x100 + [] ret_from_fork+0x2d/0x50 + [] ret_from_fork_asm+0x1a/0x30 + ... + +Fixes: 28bf26724fdb ("ice: Implement aRFS") +Reviewed-by: Michal Swiatkowski +Signed-off-by: Grzegorz Nitka +Reviewed-by: Simon Horman +Tested-by: Rinitha S (A Contingent worker at Intel) +Signed-off-by: Tony Nguyen +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/ice/ice_arfs.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/intel/ice/ice_arfs.c b/drivers/net/ethernet/intel/ice/ice_arfs.c +index 88d98c9e5f914..9cebae92364eb 100644 +--- a/drivers/net/ethernet/intel/ice/ice_arfs.c ++++ b/drivers/net/ethernet/intel/ice/ice_arfs.c +@@ -510,7 +510,7 @@ void ice_init_arfs(struct ice_vsi *vsi) + struct hlist_head *arfs_fltr_list; + unsigned int i; + +- if (!vsi || vsi->type != ICE_VSI_PF) ++ if (!vsi || vsi->type != ICE_VSI_PF || ice_is_arfs_active(vsi)) + return; + + arfs_fltr_list = kzalloc(sizeof(*arfs_fltr_list) * ICE_MAX_ARFS_LIST, +-- +2.39.5 + diff --git a/queue-5.15/ipvs-prevent-integer-overflow-in-do_ip_vs_get_ctl.patch b/queue-5.15/ipvs-prevent-integer-overflow-in-do_ip_vs_get_ctl.patch new file mode 100644 index 0000000000..7eb63fe5c6 --- /dev/null +++ b/queue-5.15/ipvs-prevent-integer-overflow-in-do_ip_vs_get_ctl.patch @@ -0,0 +1,68 @@ +From 91149fe02dab7fb06b5436d2c8593e7c4bb7161c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 10 Mar 2025 10:45:53 +0300 +Subject: ipvs: prevent integer overflow in do_ip_vs_get_ctl() + +From: Dan Carpenter + +[ Upstream commit 80b78c39eb86e6b55f56363b709eb817527da5aa ] + +The get->num_services variable is an unsigned int which is controlled by +the user. The struct_size() function ensures that the size calculation +does not overflow an unsigned long, however, we are saving the result to +an int so the calculation can overflow. + +Both "len" and "get->num_services" come from the user. This check is +just a sanity check to help the user and ensure they are using the API +correctly. An integer overflow here is not a big deal. This has no +security impact. + +Save the result from struct_size() type size_t to fix this integer +overflow bug. + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Signed-off-by: Dan Carpenter +Acked-by: Julian Anastasov +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + net/netfilter/ipvs/ip_vs_ctl.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c +index d0b64c36471d5..fb9f1badeddbf 100644 +--- a/net/netfilter/ipvs/ip_vs_ctl.c ++++ b/net/netfilter/ipvs/ip_vs_ctl.c +@@ -2852,12 +2852,12 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) + case IP_VS_SO_GET_SERVICES: + { + struct ip_vs_get_services *get; +- int size; ++ size_t size; + + get = (struct ip_vs_get_services *)arg; + size = struct_size(get, entrytable, get->num_services); + if (*len != size) { +- pr_err("length: %u != %u\n", *len, size); ++ pr_err("length: %u != %zu\n", *len, size); + ret = -EINVAL; + goto out; + } +@@ -2893,12 +2893,12 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) + case IP_VS_SO_GET_DESTS: + { + struct ip_vs_get_dests *get; +- int size; ++ size_t size; + + get = (struct ip_vs_get_dests *)arg; + size = struct_size(get, entrytable, get->num_dests); + if (*len != size) { +- pr_err("length: %u != %u\n", *len, size); ++ pr_err("length: %u != %zu\n", *len, size); + ret = -EINVAL; + goto out; + } +-- +2.39.5 + diff --git a/queue-5.15/net-dsa-mv88e6xxx-verify-after-atu-load-ops.patch b/queue-5.15/net-dsa-mv88e6xxx-verify-after-atu-load-ops.patch new file mode 100644 index 0000000000..f77f61e17c --- /dev/null +++ b/queue-5.15/net-dsa-mv88e6xxx-verify-after-atu-load-ops.patch @@ -0,0 +1,137 @@ +From 0b3e36c5225aea9bd9373fdb03cfba23101523a8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 6 Mar 2025 12:23:05 -0500 +Subject: net: dsa: mv88e6xxx: Verify after ATU Load ops + +From: Joseph Huang + +[ Upstream commit dc5340c3133a3ebe54853fd299116149e528cfaa ] + +ATU Load operations could fail silently if there's not enough space +on the device to hold the new entry. When this happens, the symptom +depends on the unknown flood settings. If unknown multicast flood is +disabled, the multicast packets are dropped when the ATU table is +full. If unknown multicast flood is enabled, the multicast packets +will be flooded to all ports. Either way, IGMP snooping is broken +when the ATU Load operation fails silently. + +Do a Read-After-Write verification after each fdb/mdb add operation +to make sure that the operation was really successful, and return +-ENOSPC otherwise. + +Fixes: defb05b9b9b4 ("net: dsa: mv88e6xxx: Add support for fdb_add, fdb_del, and fdb_getnext") +Signed-off-by: Joseph Huang +Reviewed-by: Andrew Lunn +Link: https://patch.msgid.link/20250306172306.3859214-1-Joseph.Huang@garmin.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/dsa/mv88e6xxx/chip.c | 59 ++++++++++++++++++++++++++------ + 1 file changed, 48 insertions(+), 11 deletions(-) + +diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c +index 2a55ecceab8c6..07a3f12e02dd1 100644 +--- a/drivers/net/dsa/mv88e6xxx/chip.c ++++ b/drivers/net/dsa/mv88e6xxx/chip.c +@@ -1775,13 +1775,11 @@ mv88e6xxx_port_vlan_prepare(struct dsa_switch *ds, int port, + return err; + } + +-static int mv88e6xxx_port_db_load_purge(struct mv88e6xxx_chip *chip, int port, +- const unsigned char *addr, u16 vid, +- u8 state) ++static int mv88e6xxx_port_db_get(struct mv88e6xxx_chip *chip, ++ const unsigned char *addr, u16 vid, ++ u16 *fid, struct mv88e6xxx_atu_entry *entry) + { +- struct mv88e6xxx_atu_entry entry; + struct mv88e6xxx_vtu_entry vlan; +- u16 fid; + int err; + + /* Ports have two private address databases: one for when the port is +@@ -1792,7 +1790,7 @@ static int mv88e6xxx_port_db_load_purge(struct mv88e6xxx_chip *chip, int port, + * VLAN ID into the port's database used for VLAN-unaware bridging. + */ + if (vid == 0) { +- fid = MV88E6XXX_FID_BRIDGED; ++ *fid = MV88E6XXX_FID_BRIDGED; + } else { + err = mv88e6xxx_vtu_get(chip, vid, &vlan); + if (err) +@@ -1802,14 +1800,39 @@ static int mv88e6xxx_port_db_load_purge(struct mv88e6xxx_chip *chip, int port, + if (!vlan.valid) + return -EOPNOTSUPP; + +- fid = vlan.fid; ++ *fid = vlan.fid; + } + +- entry.state = 0; +- ether_addr_copy(entry.mac, addr); +- eth_addr_dec(entry.mac); ++ entry->state = 0; ++ ether_addr_copy(entry->mac, addr); ++ eth_addr_dec(entry->mac); ++ ++ return mv88e6xxx_g1_atu_getnext(chip, *fid, entry); ++} ++ ++static bool mv88e6xxx_port_db_find(struct mv88e6xxx_chip *chip, ++ const unsigned char *addr, u16 vid) ++{ ++ struct mv88e6xxx_atu_entry entry; ++ u16 fid; ++ int err; + +- err = mv88e6xxx_g1_atu_getnext(chip, fid, &entry); ++ err = mv88e6xxx_port_db_get(chip, addr, vid, &fid, &entry); ++ if (err) ++ return false; ++ ++ return entry.state && ether_addr_equal(entry.mac, addr); ++} ++ ++static int mv88e6xxx_port_db_load_purge(struct mv88e6xxx_chip *chip, int port, ++ const unsigned char *addr, u16 vid, ++ u8 state) ++{ ++ struct mv88e6xxx_atu_entry entry; ++ u16 fid; ++ int err; ++ ++ err = mv88e6xxx_port_db_get(chip, addr, vid, &fid, &entry); + if (err) + return err; + +@@ -2324,6 +2347,13 @@ static int mv88e6xxx_port_fdb_add(struct dsa_switch *ds, int port, + mv88e6xxx_reg_lock(chip); + err = mv88e6xxx_port_db_load_purge(chip, port, addr, vid, + MV88E6XXX_G1_ATU_DATA_STATE_UC_STATIC); ++ if (err) ++ goto out; ++ ++ if (!mv88e6xxx_port_db_find(chip, addr, vid)) ++ err = -ENOSPC; ++ ++out: + mv88e6xxx_reg_unlock(chip); + + return err; +@@ -5878,6 +5908,13 @@ static int mv88e6xxx_port_mdb_add(struct dsa_switch *ds, int port, + mv88e6xxx_reg_lock(chip); + err = mv88e6xxx_port_db_load_purge(chip, port, mdb->addr, mdb->vid, + MV88E6XXX_G1_ATU_DATA_STATE_MC_STATIC); ++ if (err) ++ goto out; ++ ++ if (!mv88e6xxx_port_db_find(chip, mdb->addr, mdb->vid)) ++ err = -ENOSPC; ++ ++out: + mv88e6xxx_reg_unlock(chip); + + return err; +-- +2.39.5 + diff --git a/queue-5.15/net-mlx5-bridge-fix-the-crash-caused-by-lag-state-ch.patch b/queue-5.15/net-mlx5-bridge-fix-the-crash-caused-by-lag-state-ch.patch new file mode 100644 index 0000000000..f50269badf --- /dev/null +++ b/queue-5.15/net-mlx5-bridge-fix-the-crash-caused-by-lag-state-ch.patch @@ -0,0 +1,129 @@ +From e124b737f3368ffb96d4e3cb457b533774572cdc Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 11 Mar 2025 00:01:43 +0200 +Subject: net/mlx5: Bridge, fix the crash caused by LAG state check + +From: Jianbo Liu + +[ Upstream commit 4b8eeed4fb105770ce6dc84a2c6ef953c7b71cbb ] + +When removing LAG device from bridge, NETDEV_CHANGEUPPER event is +triggered. Driver finds the lower devices (PFs) to flush all the +offloaded entries. And mlx5_lag_is_shared_fdb is checked, it returns +false if one of PF is unloaded. In such case, +mlx5_esw_bridge_lag_rep_get() and its caller return NULL, instead of +the alive PF, and the flush is skipped. + +Besides, the bridge fdb entry's lastuse is updated in mlx5 bridge +event handler. But this SWITCHDEV_FDB_ADD_TO_BRIDGE event can be +ignored in this case because the upper interface for bond is deleted, +and the entry will never be aged because lastuse is never updated. + +To make things worse, as the entry is alive, mlx5 bridge workqueue +keeps sending that event, which is then handled by kernel bridge +notifier. It causes the following crash when accessing the passed bond +netdev which is already destroyed. + +To fix this issue, remove such checks. LAG state is already checked in +commit 15f8f168952f ("net/mlx5: Bridge, verify LAG state when adding +bond to bridge"), driver still need to skip offload if LAG becomes +invalid state after initialization. + + Oops: stack segment: 0000 [#1] SMP + CPU: 3 UID: 0 PID: 23695 Comm: kworker/u40:3 Tainted: G OE 6.11.0_mlnx #1 + Tainted: [O]=OOT_MODULE, [E]=UNSIGNED_MODULE + Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 + Workqueue: mlx5_bridge_wq mlx5_esw_bridge_update_work [mlx5_core] + RIP: 0010:br_switchdev_event+0x2c/0x110 [bridge] + Code: 44 00 00 48 8b 02 48 f7 00 00 02 00 00 74 69 41 54 55 53 48 83 ec 08 48 8b a8 08 01 00 00 48 85 ed 74 4a 48 83 fe 02 48 89 d3 <4c> 8b 65 00 74 23 76 49 48 83 fe 05 74 7e 48 83 fe 06 75 2f 0f b7 + RSP: 0018:ffffc900092cfda0 EFLAGS: 00010297 + RAX: ffff888123bfe000 RBX: ffffc900092cfe08 RCX: 00000000ffffffff + RDX: ffffc900092cfe08 RSI: 0000000000000001 RDI: ffffffffa0c585f0 + RBP: 6669746f6e690a30 R08: 0000000000000000 R09: ffff888123ae92c8 + R10: 0000000000000000 R11: fefefefefefefeff R12: ffff888123ae9c60 + R13: 0000000000000001 R14: ffffc900092cfe08 R15: 0000000000000000 + FS: 0000000000000000(0000) GS:ffff88852c980000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 00007f15914c8734 CR3: 0000000002830005 CR4: 0000000000770ef0 + DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 + DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 + PKRU: 55555554 + Call Trace: + + ? __die_body+0x1a/0x60 + ? die+0x38/0x60 + ? do_trap+0x10b/0x120 + ? do_error_trap+0x64/0xa0 + ? exc_stack_segment+0x33/0x50 + ? asm_exc_stack_segment+0x22/0x30 + ? br_switchdev_event+0x2c/0x110 [bridge] + ? sched_balance_newidle.isra.149+0x248/0x390 + notifier_call_chain+0x4b/0xa0 + atomic_notifier_call_chain+0x16/0x20 + mlx5_esw_bridge_update+0xec/0x170 [mlx5_core] + mlx5_esw_bridge_update_work+0x19/0x40 [mlx5_core] + process_scheduled_works+0x81/0x390 + worker_thread+0x106/0x250 + ? bh_worker+0x110/0x110 + kthread+0xb7/0xe0 + ? kthread_park+0x80/0x80 + ret_from_fork+0x2d/0x50 + ? kthread_park+0x80/0x80 + ret_from_fork_asm+0x11/0x20 + + +Fixes: ff9b7521468b ("net/mlx5: Bridge, support LAG") +Signed-off-by: Jianbo Liu +Reviewed-by: Vlad Buslov +Signed-off-by: Tariq Toukan +Reviewed-by: Michal Swiatkowski +Link: https://patch.msgid.link/1741644104-97767-6-git-send-email-tariqt@nvidia.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + .../net/ethernet/mellanox/mlx5/core/en/rep/bridge.c | 12 +++++------- + 1 file changed, 5 insertions(+), 7 deletions(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c +index 291bd59639044..28c3667e323f5 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c +@@ -48,15 +48,10 @@ mlx5_esw_bridge_lag_rep_get(struct net_device *dev, struct mlx5_eswitch *esw) + struct list_head *iter; + + netdev_for_each_lower_dev(dev, lower, iter) { +- struct mlx5_core_dev *mdev; +- struct mlx5e_priv *priv; +- + if (!mlx5e_eswitch_rep(lower)) + continue; + +- priv = netdev_priv(lower); +- mdev = priv->mdev; +- if (mlx5_lag_is_shared_fdb(mdev) && mlx5_esw_bridge_dev_same_esw(lower, esw)) ++ if (mlx5_esw_bridge_dev_same_esw(lower, esw)) + return lower; + } + +@@ -121,7 +116,7 @@ static bool mlx5_esw_bridge_is_local(struct net_device *dev, struct net_device * + priv = netdev_priv(rep); + mdev = priv->mdev; + if (netif_is_lag_master(dev)) +- return mlx5_lag_is_shared_fdb(mdev) && mlx5_lag_is_master(mdev); ++ return mlx5_lag_is_master(mdev); + return true; + } + +@@ -430,6 +425,9 @@ static int mlx5_esw_bridge_switchdev_event(struct notifier_block *nb, + if (!rep) + return NOTIFY_DONE; + ++ if (netif_is_lag_master(dev) && !mlx5_lag_is_shared_fdb(esw->dev)) ++ return NOTIFY_DONE; ++ + switch (event) { + case SWITCHDEV_FDB_ADD_TO_BRIDGE: + fdb_info = container_of(info, +-- +2.39.5 + diff --git a/queue-5.15/net-mlx5-handle-errors-in-mlx5_chains_create_table.patch b/queue-5.15/net-mlx5-handle-errors-in-mlx5_chains_create_table.patch new file mode 100644 index 0000000000..8391929dbb --- /dev/null +++ b/queue-5.15/net-mlx5-handle-errors-in-mlx5_chains_create_table.patch @@ -0,0 +1,46 @@ +From 359c5ff75a9c330050f391aa7c0b5f178d7b7601 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 7 Mar 2025 10:18:20 +0800 +Subject: net/mlx5: handle errors in mlx5_chains_create_table() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Wentao Liang + +[ Upstream commit eab0396353be1c778eba1c0b5180176f04dd21ce ] + +In mlx5_chains_create_table(), the return value of mlx5_get_fdb_sub_ns() +and mlx5_get_flow_namespace() must be checked to prevent NULL pointer +dereferences. If either function fails, the function should log error +message with mlx5_core_warn() and return error pointer. + +Fixes: 39ac237ce009 ("net/mlx5: E-Switch, Refactor chains and priorities") +Signed-off-by: Wentao Liang +Reviewed-by: Tariq Toukan +Link: https://patch.msgid.link/20250307021820.2646-1-vulab@iscas.ac.cn +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c +index df58cba37930a..64c1071bece8d 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c +@@ -196,6 +196,11 @@ mlx5_chains_create_table(struct mlx5_fs_chains *chains, + ns = mlx5_get_flow_namespace(chains->dev, chains->ns); + } + ++ if (!ns) { ++ mlx5_core_warn(chains->dev, "Failed to get flow namespace\n"); ++ return ERR_PTR(-EOPNOTSUPP); ++ } ++ + ft_attr.autogroup.num_reserved_entries = 2; + ft_attr.autogroup.max_num_groups = chains->group_num; + ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); +-- +2.39.5 + diff --git a/queue-5.15/net-mlx5e-prevent-bridge-link-show-failure-for-non-e.patch b/queue-5.15/net-mlx5e-prevent-bridge-link-show-failure-for-non-e.patch new file mode 100644 index 0000000000..bbb1d2fa33 --- /dev/null +++ b/queue-5.15/net-mlx5e-prevent-bridge-link-show-failure-for-non-e.patch @@ -0,0 +1,53 @@ +From 171985c7f7ede53c157ce29cfefc8bc30892e3df Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 11 Mar 2025 00:01:44 +0200 +Subject: net/mlx5e: Prevent bridge link show failure for non-eswitch-allowed + devices + +From: Carolina Jubran + +[ Upstream commit e92df790d07a8eea873efcb84776e7b71f81c7d5 ] + +mlx5_eswitch_get_vepa returns -EPERM if the device lacks +eswitch_manager capability, blocking mlx5e_bridge_getlink from +retrieving VEPA mode. Since mlx5e_bridge_getlink implements +ndo_bridge_getlink, returning -EPERM causes bridge link show to fail +instead of skipping devices without this capability. + +To avoid this, return -EOPNOTSUPP from mlx5e_bridge_getlink when +mlx5_eswitch_get_vepa fails, ensuring the command continues processing +other devices while ignoring those without the necessary capability. + +Fixes: 4b89251de024 ("net/mlx5: Support ndo bridge_setlink and getlink") +Signed-off-by: Carolina Jubran +Reviewed-by: Jianbo Liu +Signed-off-by: Tariq Toukan +Reviewed-by: Michal Swiatkowski +Link: https://patch.msgid.link/1741644104-97767-7-git-send-email-tariqt@nvidia.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +index a0870da414538..321441e6ad328 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +@@ -4186,11 +4186,9 @@ static int mlx5e_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + u8 mode, setting; +- int err; + +- err = mlx5_eswitch_get_vepa(mdev->priv.eswitch, &setting); +- if (err) +- return err; ++ if (mlx5_eswitch_get_vepa(mdev->priv.eswitch, &setting)) ++ return -EOPNOTSUPP; + mode = setting ? BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB; + return ndo_dflt_bridge_getlink(skb, pid, seq, dev, + mode, +-- +2.39.5 + diff --git a/queue-5.15/net-openvswitch-remove-misbehaving-actions-length-ch.patch b/queue-5.15/net-openvswitch-remove-misbehaving-actions-length-ch.patch new file mode 100644 index 0000000000..72a41ea1c1 --- /dev/null +++ b/queue-5.15/net-openvswitch-remove-misbehaving-actions-length-ch.patch @@ -0,0 +1,154 @@ +From 0d26d117999e5e67aecf4b5c32938e91039d4e14 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 8 Mar 2025 01:45:59 +0100 +Subject: net: openvswitch: remove misbehaving actions length check + +From: Ilya Maximets + +[ Upstream commit a1e64addf3ff9257b45b78bc7d743781c3f41340 ] + +The actions length check is unreliable and produces different results +depending on the initial length of the provided netlink attribute and +the composition of the actual actions inside of it. For example, a +user can add 4088 empty clone() actions without triggering -EMSGSIZE, +on attempt to add 4089 such actions the operation will fail with the +-EMSGSIZE verdict. However, if another 16 KB of other actions will +be *appended* to the previous 4089 clone() actions, the check passes +and the flow is successfully installed into the openvswitch datapath. + +The reason for a such a weird behavior is the way memory is allocated. +When ovs_flow_cmd_new() is invoked, it calls ovs_nla_copy_actions(), +that in turn calls nla_alloc_flow_actions() with either the actual +length of the user-provided actions or the MAX_ACTIONS_BUFSIZE. The +function adds the size of the sw_flow_actions structure and then the +actually allocated memory is rounded up to the closest power of two. + +So, if the user-provided actions are larger than MAX_ACTIONS_BUFSIZE, +then MAX_ACTIONS_BUFSIZE + sizeof(*sfa) rounded up is 32K + 24 -> 64K. +Later, while copying individual actions, we look at ksize(), which is +64K, so this way the MAX_ACTIONS_BUFSIZE check is not actually +triggered and the user can easily allocate almost 64 KB of actions. + +However, when the initial size is less than MAX_ACTIONS_BUFSIZE, but +the actions contain ones that require size increase while copying +(such as clone() or sample()), then the limit check will be performed +during the reserve_sfa_size() and the user will not be allowed to +create actions that yield more than 32 KB internally. + +This is one part of the problem. The other part is that it's not +actually possible for the userspace application to know beforehand +if the particular set of actions will be rejected or not. + +Certain actions require more space in the internal representation, +e.g. an empty clone() takes 4 bytes in the action list passed in by +the user, but it takes 12 bytes in the internal representation due +to an extra nested attribute, and some actions require less space in +the internal representations, e.g. set(tunnel(..)) normally takes +64+ bytes in the action list provided by the user, but only needs to +store a single pointer in the internal implementation, since all the +data is stored in the tunnel_info structure instead. + +And the action size limit is applied to the internal representation, +not to the action list passed by the user. So, it's not possible for +the userpsace application to predict if the certain combination of +actions will be rejected or not, because it is not possible for it to +calculate how much space these actions will take in the internal +representation without knowing kernel internals. + +All that is causing random failures in ovs-vswitchd in userspace and +inability to handle certain traffic patterns as a result. For example, +it is reported that adding a bit more than a 1100 VMs in an OpenStack +setup breaks the network due to OVS not being able to handle ARP +traffic anymore in some cases (it tries to install a proper datapath +flow, but the kernel rejects it with -EMSGSIZE, even though the action +list isn't actually that large.) + +Kernel behavior must be consistent and predictable in order for the +userspace application to use it in a reasonable way. ovs-vswitchd has +a mechanism to re-direct parts of the traffic and partially handle it +in userspace if the required action list is oversized, but that doesn't +work properly if we can't actually tell if the action list is oversized +or not. + +Solution for this is to check the size of the user-provided actions +instead of the internal representation. This commit just removes the +check from the internal part because there is already an implicit size +check imposed by the netlink protocol. The attribute can't be larger +than 64 KB. Realistically, we could reduce the limit to 32 KB, but +we'll be risking to break some existing setups that rely on the fact +that it's possible to create nearly 64 KB action lists today. + +Vast majority of flows in real setups are below 100-ish bytes. So +removal of the limit will not change real memory consumption on the +system. The absolutely worst case scenario is if someone adds a flow +with 64 KB of empty clone() actions. That will yield a 192 KB in the +internal representation consuming 256 KB block of memory. However, +that list of actions is not meaningful and also a no-op. Real world +very large action lists (that can occur for a rare cases of BUM +traffic handling) are unlikely to contain a large number of clones and +will likely have a lot of tunnel attributes making the internal +representation comparable in size to the original action list. +So, it should be fine to just remove the limit. + +Commit in the 'Fixes' tag is the first one that introduced the +difference between internal representation and the user-provided action +lists, but there were many more afterwards that lead to the situation +we have today. + +Fixes: 7d5437c709de ("openvswitch: Add tunneling interface.") +Signed-off-by: Ilya Maximets +Reviewed-by: Aaron Conole +Link: https://patch.msgid.link/20250308004609.2881861-1-i.maximets@ovn.org +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + net/openvswitch/flow_netlink.c | 15 +-------------- + 1 file changed, 1 insertion(+), 14 deletions(-) + +diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c +index b8607c3fee4be..d9bef3decd70c 100644 +--- a/net/openvswitch/flow_netlink.c ++++ b/net/openvswitch/flow_netlink.c +@@ -2273,14 +2273,10 @@ int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb) + OVS_FLOW_ATTR_MASK, true, skb); + } + +-#define MAX_ACTIONS_BUFSIZE (32 * 1024) +- + static struct sw_flow_actions *nla_alloc_flow_actions(int size) + { + struct sw_flow_actions *sfa; + +- WARN_ON_ONCE(size > MAX_ACTIONS_BUFSIZE); +- + sfa = kmalloc(kmalloc_size_roundup(sizeof(*sfa) + size), GFP_KERNEL); + if (!sfa) + return ERR_PTR(-ENOMEM); +@@ -2436,15 +2432,6 @@ static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, + + new_acts_size = max(next_offset + req_size, ksize(*sfa) * 2); + +- if (new_acts_size > MAX_ACTIONS_BUFSIZE) { +- if ((next_offset + req_size) > MAX_ACTIONS_BUFSIZE) { +- OVS_NLERR(log, "Flow action size exceeds max %u", +- MAX_ACTIONS_BUFSIZE); +- return ERR_PTR(-EMSGSIZE); +- } +- new_acts_size = MAX_ACTIONS_BUFSIZE; +- } +- + acts = nla_alloc_flow_actions(new_acts_size); + if (IS_ERR(acts)) + return (void *)acts; +@@ -3463,7 +3450,7 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, + int err; + u32 mpls_label_count = 0; + +- *sfa = nla_alloc_flow_actions(min(nla_len(attr), MAX_ACTIONS_BUFSIZE)); ++ *sfa = nla_alloc_flow_actions(nla_len(attr)); + if (IS_ERR(*sfa)) + return PTR_ERR(*sfa); + +-- +2.39.5 + diff --git a/queue-5.15/net_sched-prevent-creation-of-classes-with-tc_h_root.patch b/queue-5.15/net_sched-prevent-creation-of-classes-with-tc_h_root.patch new file mode 100644 index 0000000000..822a2d9dcf --- /dev/null +++ b/queue-5.15/net_sched-prevent-creation-of-classes-with-tc_h_root.patch @@ -0,0 +1,50 @@ +From f10a6eb1edffec06f0b9d557fdec02657d51f840 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 6 Mar 2025 15:23:54 -0800 +Subject: net_sched: Prevent creation of classes with TC_H_ROOT + +From: Cong Wang + +[ Upstream commit 0c3057a5a04d07120b3d0ec9c79568fceb9c921e ] + +The function qdisc_tree_reduce_backlog() uses TC_H_ROOT as a termination +condition when traversing up the qdisc tree to update parent backlog +counters. However, if a class is created with classid TC_H_ROOT, the +traversal terminates prematurely at this class instead of reaching the +actual root qdisc, causing parent statistics to be incorrectly maintained. +In case of DRR, this could lead to a crash as reported by Mingi Cho. + +Prevent the creation of any Qdisc class with classid TC_H_ROOT +(0xFFFFFFFF) across all qdisc types, as suggested by Jamal. + +Reported-by: Mingi Cho +Signed-off-by: Cong Wang +Reviewed-by: Simon Horman +Fixes: 066a3b5b2346 ("[NET_SCHED] sch_api: fix qdisc_tree_decrease_qlen() loop") +Link: https://patch.msgid.link/20250306232355.93864-2-xiyou.wangcong@gmail.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/sched/sch_api.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c +index 516874d943cd9..d9ce273ba43d8 100644 +--- a/net/sched/sch_api.c ++++ b/net/sched/sch_api.c +@@ -2164,6 +2164,12 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, + return -EOPNOTSUPP; + } + ++ /* Prevent creation of traffic classes with classid TC_H_ROOT */ ++ if (clid == TC_H_ROOT) { ++ NL_SET_ERR_MSG(extack, "Cannot create traffic class with classid TC_H_ROOT"); ++ return -EINVAL; ++ } ++ + new_cl = cl; + err = -EOPNOTSUPP; + if (cops->change) +-- +2.39.5 + diff --git a/queue-5.15/netfilter-nf_conncount-fully-initialize-struct-nf_co.patch b/queue-5.15/netfilter-nf_conncount-fully-initialize-struct-nf_co.patch new file mode 100644 index 0000000000..5eddb42b21 --- /dev/null +++ b/queue-5.15/netfilter-nf_conncount-fully-initialize-struct-nf_co.patch @@ -0,0 +1,129 @@ +From 69b5bd5126ddc709fee1551fcecb9d63ab8de5a3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 9 Mar 2025 17:07:38 +0900 +Subject: netfilter: nf_conncount: Fully initialize struct nf_conncount_tuple + in insert_tree() + +From: Kohei Enju + +[ Upstream commit d653bfeb07ebb3499c403404c21ac58a16531607 ] + +Since commit b36e4523d4d5 ("netfilter: nf_conncount: fix garbage +collection confirm race"), `cpu` and `jiffies32` were introduced to +the struct nf_conncount_tuple. + +The commit made nf_conncount_add() initialize `conn->cpu` and +`conn->jiffies32` when allocating the struct. +In contrast, count_tree() was not changed to initialize them. + +By commit 34848d5c896e ("netfilter: nf_conncount: Split insert and +traversal"), count_tree() was split and the relevant allocation +code now resides in insert_tree(). +Initialize `conn->cpu` and `conn->jiffies32` in insert_tree(). + +BUG: KMSAN: uninit-value in find_or_evict net/netfilter/nf_conncount.c:117 [inline] +BUG: KMSAN: uninit-value in __nf_conncount_add+0xd9c/0x2850 net/netfilter/nf_conncount.c:143 + find_or_evict net/netfilter/nf_conncount.c:117 [inline] + __nf_conncount_add+0xd9c/0x2850 net/netfilter/nf_conncount.c:143 + count_tree net/netfilter/nf_conncount.c:438 [inline] + nf_conncount_count+0x82f/0x1e80 net/netfilter/nf_conncount.c:521 + connlimit_mt+0x7f6/0xbd0 net/netfilter/xt_connlimit.c:72 + __nft_match_eval net/netfilter/nft_compat.c:403 [inline] + nft_match_eval+0x1a5/0x300 net/netfilter/nft_compat.c:433 + expr_call_ops_eval net/netfilter/nf_tables_core.c:240 [inline] + nft_do_chain+0x426/0x2290 net/netfilter/nf_tables_core.c:288 + nft_do_chain_ipv4+0x1a5/0x230 net/netfilter/nft_chain_filter.c:23 + nf_hook_entry_hookfn include/linux/netfilter.h:154 [inline] + nf_hook_slow+0xf4/0x400 net/netfilter/core.c:626 + nf_hook_slow_list+0x24d/0x860 net/netfilter/core.c:663 + NF_HOOK_LIST include/linux/netfilter.h:350 [inline] + ip_sublist_rcv+0x17b7/0x17f0 net/ipv4/ip_input.c:633 + ip_list_rcv+0x9ef/0xa40 net/ipv4/ip_input.c:669 + __netif_receive_skb_list_ptype net/core/dev.c:5936 [inline] + __netif_receive_skb_list_core+0x15c5/0x1670 net/core/dev.c:5983 + __netif_receive_skb_list net/core/dev.c:6035 [inline] + netif_receive_skb_list_internal+0x1085/0x1700 net/core/dev.c:6126 + netif_receive_skb_list+0x5a/0x460 net/core/dev.c:6178 + xdp_recv_frames net/bpf/test_run.c:280 [inline] + xdp_test_run_batch net/bpf/test_run.c:361 [inline] + bpf_test_run_xdp_live+0x2e86/0x3480 net/bpf/test_run.c:390 + bpf_prog_test_run_xdp+0xf1d/0x1ae0 net/bpf/test_run.c:1316 + bpf_prog_test_run+0x5e5/0xa30 kernel/bpf/syscall.c:4407 + __sys_bpf+0x6aa/0xd90 kernel/bpf/syscall.c:5813 + __do_sys_bpf kernel/bpf/syscall.c:5902 [inline] + __se_sys_bpf kernel/bpf/syscall.c:5900 [inline] + __ia32_sys_bpf+0xa0/0xe0 kernel/bpf/syscall.c:5900 + ia32_sys_call+0x394d/0x4180 arch/x86/include/generated/asm/syscalls_32.h:358 + do_syscall_32_irqs_on arch/x86/entry/common.c:165 [inline] + __do_fast_syscall_32+0xb0/0x110 arch/x86/entry/common.c:387 + do_fast_syscall_32+0x38/0x80 arch/x86/entry/common.c:412 + do_SYSENTER_32+0x1f/0x30 arch/x86/entry/common.c:450 + entry_SYSENTER_compat_after_hwframe+0x84/0x8e + +Uninit was created at: + slab_post_alloc_hook mm/slub.c:4121 [inline] + slab_alloc_node mm/slub.c:4164 [inline] + kmem_cache_alloc_noprof+0x915/0xe10 mm/slub.c:4171 + insert_tree net/netfilter/nf_conncount.c:372 [inline] + count_tree net/netfilter/nf_conncount.c:450 [inline] + nf_conncount_count+0x1415/0x1e80 net/netfilter/nf_conncount.c:521 + connlimit_mt+0x7f6/0xbd0 net/netfilter/xt_connlimit.c:72 + __nft_match_eval net/netfilter/nft_compat.c:403 [inline] + nft_match_eval+0x1a5/0x300 net/netfilter/nft_compat.c:433 + expr_call_ops_eval net/netfilter/nf_tables_core.c:240 [inline] + nft_do_chain+0x426/0x2290 net/netfilter/nf_tables_core.c:288 + nft_do_chain_ipv4+0x1a5/0x230 net/netfilter/nft_chain_filter.c:23 + nf_hook_entry_hookfn include/linux/netfilter.h:154 [inline] + nf_hook_slow+0xf4/0x400 net/netfilter/core.c:626 + nf_hook_slow_list+0x24d/0x860 net/netfilter/core.c:663 + NF_HOOK_LIST include/linux/netfilter.h:350 [inline] + ip_sublist_rcv+0x17b7/0x17f0 net/ipv4/ip_input.c:633 + ip_list_rcv+0x9ef/0xa40 net/ipv4/ip_input.c:669 + __netif_receive_skb_list_ptype net/core/dev.c:5936 [inline] + __netif_receive_skb_list_core+0x15c5/0x1670 net/core/dev.c:5983 + __netif_receive_skb_list net/core/dev.c:6035 [inline] + netif_receive_skb_list_internal+0x1085/0x1700 net/core/dev.c:6126 + netif_receive_skb_list+0x5a/0x460 net/core/dev.c:6178 + xdp_recv_frames net/bpf/test_run.c:280 [inline] + xdp_test_run_batch net/bpf/test_run.c:361 [inline] + bpf_test_run_xdp_live+0x2e86/0x3480 net/bpf/test_run.c:390 + bpf_prog_test_run_xdp+0xf1d/0x1ae0 net/bpf/test_run.c:1316 + bpf_prog_test_run+0x5e5/0xa30 kernel/bpf/syscall.c:4407 + __sys_bpf+0x6aa/0xd90 kernel/bpf/syscall.c:5813 + __do_sys_bpf kernel/bpf/syscall.c:5902 [inline] + __se_sys_bpf kernel/bpf/syscall.c:5900 [inline] + __ia32_sys_bpf+0xa0/0xe0 kernel/bpf/syscall.c:5900 + ia32_sys_call+0x394d/0x4180 arch/x86/include/generated/asm/syscalls_32.h:358 + do_syscall_32_irqs_on arch/x86/entry/common.c:165 [inline] + __do_fast_syscall_32+0xb0/0x110 arch/x86/entry/common.c:387 + do_fast_syscall_32+0x38/0x80 arch/x86/entry/common.c:412 + do_SYSENTER_32+0x1f/0x30 arch/x86/entry/common.c:450 + entry_SYSENTER_compat_after_hwframe+0x84/0x8e + +Reported-by: syzbot+83fed965338b573115f7@syzkaller.appspotmail.com +Closes: https://syzkaller.appspot.com/bug?extid=83fed965338b573115f7 +Fixes: b36e4523d4d5 ("netfilter: nf_conncount: fix garbage collection confirm race") +Signed-off-by: Kohei Enju +Reviewed-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + net/netfilter/nf_conncount.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c +index 0ce12a33ffda4..a66a27fe7f458 100644 +--- a/net/netfilter/nf_conncount.c ++++ b/net/netfilter/nf_conncount.c +@@ -366,6 +366,8 @@ insert_tree(struct net *net, + + conn->tuple = *tuple; + conn->zone = *zone; ++ conn->cpu = raw_smp_processor_id(); ++ conn->jiffies32 = (u32)jiffies; + memcpy(rbconn->key, key, sizeof(u32) * data->keylen); + + nf_conncount_list_init(&rbconn->list); +-- +2.39.5 + diff --git a/queue-5.15/netfilter-nft_ct-use-__refcount_inc-for-per-cpu-nft_.patch b/queue-5.15/netfilter-nft_ct-use-__refcount_inc-for-per-cpu-nft_.patch new file mode 100644 index 0000000000..57454aada0 --- /dev/null +++ b/queue-5.15/netfilter-nft_ct-use-__refcount_inc-for-per-cpu-nft_.patch @@ -0,0 +1,63 @@ +From b888a61e171fb3eb5ca9144196ea2d0485e6d374 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 17 Feb 2025 17:02:42 +0100 +Subject: netfilter: nft_ct: Use __refcount_inc() for per-CPU + nft_ct_pcpu_template. + +From: Sebastian Andrzej Siewior + +[ Upstream commit 5cfe5612ca9590db69b9be29dc83041dbf001108 ] + +nft_ct_pcpu_template is a per-CPU variable and relies on disabled BH for its +locking. The refcounter is read and if its value is set to one then the +refcounter is incremented and variable is used - otherwise it is already +in use and left untouched. + +Without per-CPU locking in local_bh_disable() on PREEMPT_RT the +read-then-increment operation is not atomic and therefore racy. + +This can be avoided by using unconditionally __refcount_inc() which will +increment counter and return the old value as an atomic operation. +In case the returned counter is not one, the variable is in use and we +need to decrement counter. Otherwise we can use it. + +Use __refcount_inc() instead of read and a conditional increment. + +Fixes: edee4f1e9245 ("netfilter: nft_ct: add zone id set support") +Signed-off-by: Sebastian Andrzej Siewior +Reviewed-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + net/netfilter/nft_ct.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c +index 69214993b5a2c..83bb3f110ea84 100644 +--- a/net/netfilter/nft_ct.c ++++ b/net/netfilter/nft_ct.c +@@ -239,6 +239,7 @@ static void nft_ct_set_zone_eval(const struct nft_expr *expr, + enum ip_conntrack_info ctinfo; + u16 value = nft_reg_load16(®s->data[priv->sreg]); + struct nf_conn *ct; ++ int oldcnt; + + ct = nf_ct_get(skb, &ctinfo); + if (ct) /* already tracked */ +@@ -259,10 +260,11 @@ static void nft_ct_set_zone_eval(const struct nft_expr *expr, + + ct = this_cpu_read(nft_ct_pcpu_template); + +- if (likely(refcount_read(&ct->ct_general.use) == 1)) { +- refcount_inc(&ct->ct_general.use); ++ __refcount_inc(&ct->ct_general.use, &oldcnt); ++ if (likely(oldcnt == 1)) { + nf_ct_zone_add(ct, &zone); + } else { ++ refcount_dec(&ct->ct_general.use); + /* previous skb got queued to userspace, allocate temporary + * one until percpu template can be reused. + */ +-- +2.39.5 + diff --git a/queue-5.15/netfilter-nft_exthdr-fix-offset-with-ipv4_find_optio.patch b/queue-5.15/netfilter-nft_exthdr-fix-offset-with-ipv4_find_optio.patch new file mode 100644 index 0000000000..c6a301c91d --- /dev/null +++ b/queue-5.15/netfilter-nft_exthdr-fix-offset-with-ipv4_find_optio.patch @@ -0,0 +1,78 @@ +From c330f901d73423beeb3478b0f3cec6769ae9f322 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 2 Mar 2025 00:14:36 +0300 +Subject: netfilter: nft_exthdr: fix offset with ipv4_find_option() + +From: Alexey Kashavkin + +[ Upstream commit 6edd78af9506bb182518da7f6feebd75655d9a0e ] + +There is an incorrect calculation in the offset variable which causes +the nft_skb_copy_to_reg() function to always return -EFAULT. Adding the +start variable is redundant. In the __ip_options_compile() function the +correct offset is specified when finding the function. There is no need +to add the size of the iphdr structure to the offset. + +Fixes: dbb5281a1f84 ("netfilter: nf_tables: add support for matching IPv4 options") +Signed-off-by: Alexey Kashavkin +Reviewed-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + net/netfilter/nft_exthdr.c | 10 ++++------ + 1 file changed, 4 insertions(+), 6 deletions(-) + +diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c +index d1dcf5b2e92e1..7c2931e024bb0 100644 +--- a/net/netfilter/nft_exthdr.c ++++ b/net/netfilter/nft_exthdr.c +@@ -85,7 +85,6 @@ static int ipv4_find_option(struct net *net, struct sk_buff *skb, + unsigned char optbuf[sizeof(struct ip_options) + 40]; + struct ip_options *opt = (struct ip_options *)optbuf; + struct iphdr *iph, _iph; +- unsigned int start; + bool found = false; + __be32 info; + int optlen; +@@ -93,7 +92,6 @@ static int ipv4_find_option(struct net *net, struct sk_buff *skb, + iph = skb_header_pointer(skb, 0, sizeof(_iph), &_iph); + if (!iph) + return -EBADMSG; +- start = sizeof(struct iphdr); + + optlen = iph->ihl * 4 - (int)sizeof(struct iphdr); + if (optlen <= 0) +@@ -103,7 +101,7 @@ static int ipv4_find_option(struct net *net, struct sk_buff *skb, + /* Copy the options since __ip_options_compile() modifies + * the options. + */ +- if (skb_copy_bits(skb, start, opt->__data, optlen)) ++ if (skb_copy_bits(skb, sizeof(struct iphdr), opt->__data, optlen)) + return -EBADMSG; + opt->optlen = optlen; + +@@ -118,18 +116,18 @@ static int ipv4_find_option(struct net *net, struct sk_buff *skb, + found = target == IPOPT_SSRR ? opt->is_strictroute : + !opt->is_strictroute; + if (found) +- *offset = opt->srr + start; ++ *offset = opt->srr; + break; + case IPOPT_RR: + if (!opt->rr) + break; +- *offset = opt->rr + start; ++ *offset = opt->rr; + found = true; + break; + case IPOPT_RA: + if (!opt->router_alert) + break; +- *offset = opt->router_alert + start; ++ *offset = opt->router_alert; + found = true; + break; + default: +-- +2.39.5 + diff --git a/queue-5.15/netpoll-hold-rcu-read-lock-in-__netpoll_send_skb.patch b/queue-5.15/netpoll-hold-rcu-read-lock-in-__netpoll_send_skb.patch new file mode 100644 index 0000000000..0fab18c433 --- /dev/null +++ b/queue-5.15/netpoll-hold-rcu-read-lock-in-__netpoll_send_skb.patch @@ -0,0 +1,76 @@ +From cde03f01a0dcce3b2955962270eabf0f999c18d5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 6 Mar 2025 05:16:18 -0800 +Subject: netpoll: hold rcu read lock in __netpoll_send_skb() + +From: Breno Leitao + +[ Upstream commit 505ead7ab77f289f12d8a68ac83da068e4d4408b ] + +The function __netpoll_send_skb() is being invoked without holding the +RCU read lock. This oversight triggers a warning message when +CONFIG_PROVE_RCU_LIST is enabled: + + net/core/netpoll.c:330 suspicious rcu_dereference_check() usage! + + netpoll_send_skb + netpoll_send_udp + write_ext_msg + console_flush_all + console_unlock + vprintk_emit + +To prevent npinfo from disappearing unexpectedly, ensure that +__netpoll_send_skb() is protected with the RCU read lock. + +Fixes: 2899656b494dcd1 ("netpoll: take rcu_read_lock_bh() in netpoll_send_skb_on_dev()") +Signed-off-by: Breno Leitao +Reviewed-by: Simon Horman +Link: https://patch.msgid.link/20250306-netpoll_rcu_v2-v2-1-bc4f5c51742a@debian.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/core/netpoll.c | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +diff --git a/net/core/netpoll.c b/net/core/netpoll.c +index 597e83e2bce86..87f5a837410c1 100644 +--- a/net/core/netpoll.c ++++ b/net/core/netpoll.c +@@ -326,6 +326,7 @@ static int netpoll_owner_active(struct net_device *dev) + static netdev_tx_t __netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) + { + netdev_tx_t status = NETDEV_TX_BUSY; ++ netdev_tx_t ret = NET_XMIT_DROP; + struct net_device *dev; + unsigned long tries; + /* It is up to the caller to keep npinfo alive. */ +@@ -334,11 +335,12 @@ static netdev_tx_t __netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) + lockdep_assert_irqs_disabled(); + + dev = np->dev; ++ rcu_read_lock(); + npinfo = rcu_dereference_bh(dev->npinfo); + + if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) { + dev_kfree_skb_irq(skb); +- return NET_XMIT_DROP; ++ goto out; + } + + /* don't get messages out of order, and no recursion */ +@@ -377,7 +379,10 @@ static netdev_tx_t __netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) + skb_queue_tail(&npinfo->txq, skb); + schedule_delayed_work(&npinfo->tx_work,0); + } +- return NETDEV_TX_OK; ++ ret = NETDEV_TX_OK; ++out: ++ rcu_read_unlock(); ++ return ret; + } + + netdev_tx_t netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) +-- +2.39.5 + diff --git a/queue-5.15/openvswitch-use-kmalloc_size_roundup-to-match-ksize-.patch b/queue-5.15/openvswitch-use-kmalloc_size_roundup-to-match-ksize-.patch new file mode 100644 index 0000000000..a3ad6b77fa --- /dev/null +++ b/queue-5.15/openvswitch-use-kmalloc_size_roundup-to-match-ksize-.patch @@ -0,0 +1,40 @@ +From 8a5a0647d1394590e4ea6edb584d4e5ef6006adb Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 18 Oct 2022 02:06:33 -0700 +Subject: openvswitch: Use kmalloc_size_roundup() to match ksize() usage + +From: Kees Cook + +[ Upstream commit ab3f7828c9793a5dfa99a54dc19ae3491c38bfa3 ] + +Round up allocations with kmalloc_size_roundup() so that openvswitch's +use of ksize() is always accurate and no special handling of the memory +is needed by KASAN, UBSAN_BOUNDS, nor FORTIFY_SOURCE. + +Cc: Pravin B Shelar +Cc: dev@openvswitch.org +Signed-off-by: Kees Cook +Link: https://lore.kernel.org/r/20221018090628.never.537-kees@kernel.org +Signed-off-by: Jakub Kicinski +Stable-dep-of: a1e64addf3ff ("net: openvswitch: remove misbehaving actions length check") +Signed-off-by: Sasha Levin +--- + net/openvswitch/flow_netlink.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c +index 1cf431d04a468..b8607c3fee4be 100644 +--- a/net/openvswitch/flow_netlink.c ++++ b/net/openvswitch/flow_netlink.c +@@ -2281,7 +2281,7 @@ static struct sw_flow_actions *nla_alloc_flow_actions(int size) + + WARN_ON_ONCE(size > MAX_ACTIONS_BUFSIZE); + +- sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL); ++ sfa = kmalloc(kmalloc_size_roundup(sizeof(*sfa) + size), GFP_KERNEL); + if (!sfa) + return ERR_PTR(-ENOMEM); + +-- +2.39.5 + diff --git a/queue-5.15/pinctrl-bcm281xx-fix-incorrect-regmap-max_registers-.patch b/queue-5.15/pinctrl-bcm281xx-fix-incorrect-regmap-max_registers-.patch new file mode 100644 index 0000000000..c910dffd3a --- /dev/null +++ b/queue-5.15/pinctrl-bcm281xx-fix-incorrect-regmap-max_registers-.patch @@ -0,0 +1,40 @@ +From bac23c678f2709f34661ebdb6f862c9771ebade4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 7 Feb 2025 21:02:41 +0100 +Subject: pinctrl: bcm281xx: Fix incorrect regmap max_registers value + +From: Artur Weber + +[ Upstream commit 68283c1cb573143c0b7515e93206f3503616bc10 ] + +The max_registers value does not take into consideration the stride; +currently, it's set to the number of the last pin, but this does not +accurately represent the final register. + +Fix this by multiplying the current value by 4. + +Fixes: 54b1aa5a5b16 ("ARM: pinctrl: Add Broadcom Capri pinctrl driver") +Signed-off-by: Artur Weber +Link: https://lore.kernel.org/20250207-bcm21664-pinctrl-v1-2-e7cfac9b2d3b@gmail.com +Signed-off-by: Linus Walleij +Signed-off-by: Sasha Levin +--- + drivers/pinctrl/bcm/pinctrl-bcm281xx.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/pinctrl/bcm/pinctrl-bcm281xx.c b/drivers/pinctrl/bcm/pinctrl-bcm281xx.c +index 9ab1f427286a7..fbfddcc39d5cc 100644 +--- a/drivers/pinctrl/bcm/pinctrl-bcm281xx.c ++++ b/drivers/pinctrl/bcm/pinctrl-bcm281xx.c +@@ -981,7 +981,7 @@ static const struct regmap_config bcm281xx_pinctrl_regmap_config = { + .reg_bits = 32, + .reg_stride = 4, + .val_bits = 32, +- .max_register = BCM281XX_PIN_VC_CAM3_SDA, ++ .max_register = BCM281XX_PIN_VC_CAM3_SDA * 4, + }; + + static int bcm281xx_pinctrl_get_groups_count(struct pinctrl_dev *pctldev) +-- +2.39.5 + diff --git a/queue-5.15/series b/queue-5.15/series index c63f6b5382..4a744a5783 100644 --- a/queue-5.15/series +++ b/queue-5.15/series @@ -2,3 +2,22 @@ vlan-fix-memory-leak-in-vlan_newlink.patch clockevents-drivers-i8253-fix-stop-sequence-for-timer-0.patch sched-isolation-prevent-boot-crash-when-the-boot-cpu-is-nohz_full.patch ipv6-fix-signed-integer-overflow-in-__ip6_append_data.patch +fbdev-hyperv_fb-iounmap-the-correct-memory-when-remo.patch +pinctrl-bcm281xx-fix-incorrect-regmap-max_registers-.patch +netfilter-nft_ct-use-__refcount_inc-for-per-cpu-nft_.patch +ice-fix-memory-leak-in-arfs-after-reset.patch +net-dsa-mv88e6xxx-verify-after-atu-load-ops.patch +netpoll-hold-rcu-read-lock-in-__netpoll_send_skb.patch +drivers-hv-vmbus-don-t-release-fb_mmio-resource-in-v.patch +net-mlx5-handle-errors-in-mlx5_chains_create_table.patch +netfilter-nf_conncount-fully-initialize-struct-nf_co.patch +ipvs-prevent-integer-overflow-in-do_ip_vs_get_ctl.patch +net_sched-prevent-creation-of-classes-with-tc_h_root.patch +netfilter-nft_exthdr-fix-offset-with-ipv4_find_optio.patch +gre-fix-ipv6-link-local-address-generation.patch +slab-clean-up-function-prototypes.patch +slab-introduce-kmalloc_size_roundup.patch +openvswitch-use-kmalloc_size_roundup-to-match-ksize-.patch +net-openvswitch-remove-misbehaving-actions-length-ch.patch +net-mlx5-bridge-fix-the-crash-caused-by-lag-state-ch.patch +net-mlx5e-prevent-bridge-link-show-failure-for-non-e.patch diff --git a/queue-5.15/slab-clean-up-function-prototypes.patch b/queue-5.15/slab-clean-up-function-prototypes.patch new file mode 100644 index 0000000000..19704348ae --- /dev/null +++ b/queue-5.15/slab-clean-up-function-prototypes.patch @@ -0,0 +1,210 @@ +From 5b5a709f40d1b03895cc46589c3a3e5bd42c3979 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 5 Nov 2021 13:36:23 -0700 +Subject: slab: clean up function prototypes + +From: Kees Cook + +[ Upstream commit 72d67229f522e3331d1eabd9f58d36ae080eb228 ] + +Based on feedback from Joe Perches and Linus Torvalds, regularize the +slab function prototypes before making attribute changes. + +Link: https://lkml.kernel.org/r/20210930222704.2631604-4-keescook@chromium.org +Signed-off-by: Kees Cook +Cc: Christoph Lameter +Cc: Pekka Enberg +Cc: David Rientjes +Cc: Joonsoo Kim +Cc: Vlastimil Babka +Cc: Alexandre Bounine +Cc: Andy Whitcroft +Cc: Daniel Micay +Cc: Dennis Zhou +Cc: Dwaipayan Ray +Cc: Gustavo A. R. Silva +Cc: Ira Weiny +Cc: Jing Xiangfeng +Cc: Joe Perches +Cc: John Hubbard +Cc: kernel test robot +Cc: Lukas Bulwahn +Cc: Matt Porter +Cc: Miguel Ojeda +Cc: Nathan Chancellor +Cc: Nick Desaulniers +Cc: Randy Dunlap +Cc: Souptick Joarder +Cc: Tejun Heo +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Stable-dep-of: a1e64addf3ff ("net: openvswitch: remove misbehaving actions length check") +Signed-off-by: Sasha Levin +--- + include/linux/slab.h | 68 ++++++++++++++++++++++---------------------- + 1 file changed, 34 insertions(+), 34 deletions(-) + +diff --git a/include/linux/slab.h b/include/linux/slab.h +index 083f3ce550bca..d9f14125d7a2b 100644 +--- a/include/linux/slab.h ++++ b/include/linux/slab.h +@@ -152,8 +152,8 @@ struct kmem_cache *kmem_cache_create_usercopy(const char *name, + slab_flags_t flags, + unsigned int useroffset, unsigned int usersize, + void (*ctor)(void *)); +-void kmem_cache_destroy(struct kmem_cache *); +-int kmem_cache_shrink(struct kmem_cache *); ++void kmem_cache_destroy(struct kmem_cache *s); ++int kmem_cache_shrink(struct kmem_cache *s); + + /* + * Please use this macro to create slab caches. Simply specify the +@@ -181,11 +181,11 @@ int kmem_cache_shrink(struct kmem_cache *); + /* + * Common kmalloc functions provided by all allocators + */ +-void * __must_check krealloc(const void *, size_t, gfp_t); +-void kfree(const void *); +-void kfree_sensitive(const void *); +-size_t __ksize(const void *); +-size_t ksize(const void *); ++void * __must_check krealloc(const void *objp, size_t new_size, gfp_t flags); ++void kfree(const void *objp); ++void kfree_sensitive(const void *objp); ++size_t __ksize(const void *objp); ++size_t ksize(const void *objp); + #ifdef CONFIG_PRINTK + bool kmem_valid_obj(void *object); + void kmem_dump_obj(void *object); +@@ -426,8 +426,8 @@ static __always_inline unsigned int __kmalloc_index(size_t size, + #endif /* !CONFIG_SLOB */ + + void *__kmalloc(size_t size, gfp_t flags) __assume_kmalloc_alignment __malloc; +-void *kmem_cache_alloc(struct kmem_cache *, gfp_t flags) __assume_slab_alignment __malloc; +-void kmem_cache_free(struct kmem_cache *, void *); ++void *kmem_cache_alloc(struct kmem_cache *s, gfp_t flags) __assume_slab_alignment __malloc; ++void kmem_cache_free(struct kmem_cache *s, void *objp); + + /* + * Bulk allocation and freeing operations. These are accelerated in an +@@ -436,8 +436,8 @@ void kmem_cache_free(struct kmem_cache *, void *); + * + * Note that interrupts must be enabled when calling these functions. + */ +-void kmem_cache_free_bulk(struct kmem_cache *, size_t, void **); +-int kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **); ++void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p); ++int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, void **p); + + /* + * Caller must not use kfree_bulk() on memory not originally allocated +@@ -450,7 +450,8 @@ static __always_inline void kfree_bulk(size_t size, void **p) + + #ifdef CONFIG_NUMA + void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment __malloc; +-void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node) __assume_slab_alignment __malloc; ++void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t flags, int node) __assume_slab_alignment ++ __malloc; + #else + static __always_inline void *__kmalloc_node(size_t size, gfp_t flags, int node) + { +@@ -464,25 +465,24 @@ static __always_inline void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t f + #endif + + #ifdef CONFIG_TRACING +-extern void *kmem_cache_alloc_trace(struct kmem_cache *, gfp_t, size_t) __assume_slab_alignment __malloc; ++extern void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t flags, size_t size) ++ __assume_slab_alignment __malloc; + + #ifdef CONFIG_NUMA +-extern void *kmem_cache_alloc_node_trace(struct kmem_cache *s, +- gfp_t gfpflags, +- int node, size_t size) __assume_slab_alignment __malloc; ++extern void *kmem_cache_alloc_node_trace(struct kmem_cache *s, gfp_t gfpflags, ++ int node, size_t size) __assume_slab_alignment __malloc; + #else +-static __always_inline void * +-kmem_cache_alloc_node_trace(struct kmem_cache *s, +- gfp_t gfpflags, +- int node, size_t size) ++static __always_inline void *kmem_cache_alloc_node_trace(struct kmem_cache *s, ++ gfp_t gfpflags, int node, ++ size_t size) + { + return kmem_cache_alloc_trace(s, gfpflags, size); + } + #endif /* CONFIG_NUMA */ + + #else /* CONFIG_TRACING */ +-static __always_inline void *kmem_cache_alloc_trace(struct kmem_cache *s, +- gfp_t flags, size_t size) ++static __always_inline void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t flags, ++ size_t size) + { + void *ret = kmem_cache_alloc(s, flags); + +@@ -490,10 +490,8 @@ static __always_inline void *kmem_cache_alloc_trace(struct kmem_cache *s, + return ret; + } + +-static __always_inline void * +-kmem_cache_alloc_node_trace(struct kmem_cache *s, +- gfp_t gfpflags, +- int node, size_t size) ++static __always_inline void *kmem_cache_alloc_node_trace(struct kmem_cache *s, gfp_t gfpflags, ++ int node, size_t size) + { + void *ret = kmem_cache_alloc_node(s, gfpflags, node); + +@@ -502,13 +500,14 @@ kmem_cache_alloc_node_trace(struct kmem_cache *s, + } + #endif /* CONFIG_TRACING */ + +-extern void *kmalloc_order(size_t size, gfp_t flags, unsigned int order) __assume_page_alignment __malloc; ++extern void *kmalloc_order(size_t size, gfp_t flags, unsigned int order) __assume_page_alignment ++ __malloc; + + #ifdef CONFIG_TRACING +-extern void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order) __assume_page_alignment __malloc; ++extern void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order) ++ __assume_page_alignment __malloc; + #else +-static __always_inline void * +-kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order) ++static __always_inline void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order) + { + return kmalloc_order(size, flags, order); + } +@@ -638,8 +637,8 @@ static inline void *kmalloc_array(size_t n, size_t size, gfp_t flags) + * @new_size: new size of a single member of the array + * @flags: the type of memory to allocate (see kmalloc) + */ +-static __must_check inline void * +-krealloc_array(void *p, size_t new_n, size_t new_size, gfp_t flags) ++static inline void * __must_check krealloc_array(void *p, size_t new_n, size_t new_size, ++ gfp_t flags) + { + size_t bytes; + +@@ -668,7 +667,7 @@ static inline void *kcalloc(size_t n, size_t size, gfp_t flags) + * allocator where we care about the real place the memory allocation + * request comes from. + */ +-extern void *__kmalloc_track_caller(size_t, gfp_t, unsigned long); ++extern void *__kmalloc_track_caller(size_t size, gfp_t flags, unsigned long caller); + #define kmalloc_track_caller(size, flags) \ + __kmalloc_track_caller(size, flags, _RET_IP_) + +@@ -691,7 +690,8 @@ static inline void *kcalloc_node(size_t n, size_t size, gfp_t flags, int node) + + + #ifdef CONFIG_NUMA +-extern void *__kmalloc_node_track_caller(size_t, gfp_t, int, unsigned long); ++extern void *__kmalloc_node_track_caller(size_t size, gfp_t flags, int node, ++ unsigned long caller); + #define kmalloc_node_track_caller(size, flags, node) \ + __kmalloc_node_track_caller(size, flags, node, \ + _RET_IP_) +-- +2.39.5 + diff --git a/queue-5.15/slab-introduce-kmalloc_size_roundup.patch b/queue-5.15/slab-introduce-kmalloc_size_roundup.patch new file mode 100644 index 0000000000..23a0ef1019 --- /dev/null +++ b/queue-5.15/slab-introduce-kmalloc_size_roundup.patch @@ -0,0 +1,213 @@ +From f4869e935bbf71c835abf2dc6b031102a58b0ace Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 23 Sep 2022 13:28:08 -0700 +Subject: slab: Introduce kmalloc_size_roundup() + +From: Kees Cook + +[ Upstream commit 05a940656e1eb2026d9ee31019d5b47e9545124d ] + +In the effort to help the compiler reason about buffer sizes, the +__alloc_size attribute was added to allocators. This improves the scope +of the compiler's ability to apply CONFIG_UBSAN_BOUNDS and (in the near +future) CONFIG_FORTIFY_SOURCE. For most allocations, this works well, +as the vast majority of callers are not expecting to use more memory +than what they asked for. + +There is, however, one common exception to this: anticipatory resizing +of kmalloc allocations. These cases all use ksize() to determine the +actual bucket size of a given allocation (e.g. 128 when 126 was asked +for). This comes in two styles in the kernel: + +1) An allocation has been determined to be too small, and needs to be + resized. Instead of the caller choosing its own next best size, it + wants to minimize the number of calls to krealloc(), so it just uses + ksize() plus some additional bytes, forcing the realloc into the next + bucket size, from which it can learn how large it is now. For example: + + data = krealloc(data, ksize(data) + 1, gfp); + data_len = ksize(data); + +2) The minimum size of an allocation is calculated, but since it may + grow in the future, just use all the space available in the chosen + bucket immediately, to avoid needing to reallocate later. A good + example of this is skbuff's allocators: + + data = kmalloc_reserve(size, gfp_mask, node, &pfmemalloc); + ... + /* kmalloc(size) might give us more room than requested. + * Put skb_shared_info exactly at the end of allocated zone, + * to allow max possible filling before reallocation. + */ + osize = ksize(data); + size = SKB_WITH_OVERHEAD(osize); + +In both cases, the "how much was actually allocated?" question is answered +_after_ the allocation, where the compiler hinting is not in an easy place +to make the association any more. This mismatch between the compiler's +view of the buffer length and the code's intention about how much it is +going to actually use has already caused problems[1]. It is possible to +fix this by reordering the use of the "actual size" information. + +We can serve the needs of users of ksize() and still have accurate buffer +length hinting for the compiler by doing the bucket size calculation +_before_ the allocation. Code can instead ask "how large an allocation +would I get for a given size?". + +Introduce kmalloc_size_roundup(), to serve this function so we can start +replacing the "anticipatory resizing" uses of ksize(). + +[1] https://github.com/ClangBuiltLinux/linux/issues/1599 + https://github.com/KSPP/linux/issues/183 + +[ vbabka@suse.cz: add SLOB version ] + +Cc: Vlastimil Babka +Cc: Christoph Lameter +Cc: Pekka Enberg +Cc: David Rientjes +Cc: Joonsoo Kim +Cc: Andrew Morton +Cc: linux-mm@kvack.org +Signed-off-by: Kees Cook +Signed-off-by: Vlastimil Babka +Stable-dep-of: a1e64addf3ff ("net: openvswitch: remove misbehaving actions length check") +Signed-off-by: Sasha Levin +--- + include/linux/slab.h | 31 +++++++++++++++++++++++++++++++ + mm/slab.c | 9 ++++++--- + mm/slab_common.c | 20 ++++++++++++++++++++ + mm/slob.c | 14 ++++++++++++++ + 4 files changed, 71 insertions(+), 3 deletions(-) + +diff --git a/include/linux/slab.h b/include/linux/slab.h +index d9f14125d7a2b..3482c2ced139e 100644 +--- a/include/linux/slab.h ++++ b/include/linux/slab.h +@@ -185,7 +185,21 @@ void * __must_check krealloc(const void *objp, size_t new_size, gfp_t flags); + void kfree(const void *objp); + void kfree_sensitive(const void *objp); + size_t __ksize(const void *objp); ++ ++/** ++ * ksize - Report actual allocation size of associated object ++ * ++ * @objp: Pointer returned from a prior kmalloc()-family allocation. ++ * ++ * This should not be used for writing beyond the originally requested ++ * allocation size. Either use krealloc() or round up the allocation size ++ * with kmalloc_size_roundup() prior to allocation. If this is used to ++ * access beyond the originally requested allocation size, UBSAN_BOUNDS ++ * and/or FORTIFY_SOURCE may trip, since they only know about the ++ * originally allocated size via the __alloc_size attribute. ++ */ + size_t ksize(const void *objp); ++ + #ifdef CONFIG_PRINTK + bool kmem_valid_obj(void *object); + void kmem_dump_obj(void *object); +@@ -733,6 +747,23 @@ static inline void *kzalloc_node(size_t size, gfp_t flags, int node) + } + + unsigned int kmem_cache_size(struct kmem_cache *s); ++ ++/** ++ * kmalloc_size_roundup - Report allocation bucket size for the given size ++ * ++ * @size: Number of bytes to round up from. ++ * ++ * This returns the number of bytes that would be available in a kmalloc() ++ * allocation of @size bytes. For example, a 126 byte request would be ++ * rounded up to the next sized kmalloc bucket, 128 bytes. (This is strictly ++ * for the general-purpose kmalloc()-based allocations, and is not for the ++ * pre-sized kmem_cache_alloc()-based allocations.) ++ * ++ * Use this to kmalloc() the full bucket size ahead of time instead of using ++ * ksize() to query the size after an allocation. ++ */ ++size_t kmalloc_size_roundup(size_t size); ++ + void __init kmem_cache_init_late(void); + + #if defined(CONFIG_SMP) && defined(CONFIG_SLAB) +diff --git a/mm/slab.c b/mm/slab.c +index f5b2246f832da..e53e50d6c29bc 100644 +--- a/mm/slab.c ++++ b/mm/slab.c +@@ -4226,11 +4226,14 @@ void __check_heap_object(const void *ptr, unsigned long n, struct page *page, + #endif /* CONFIG_HARDENED_USERCOPY */ + + /** +- * __ksize -- Uninstrumented ksize. ++ * __ksize -- Report full size of underlying allocation + * @objp: pointer to the object + * +- * Unlike ksize(), __ksize() is uninstrumented, and does not provide the same +- * safety checks as ksize() with KASAN instrumentation enabled. ++ * This should only be used internally to query the true size of allocations. ++ * It is not meant to be a way to discover the usable size of an allocation ++ * after the fact. Instead, use kmalloc_size_roundup(). Using memory beyond ++ * the originally requested allocation size may trigger KASAN, UBSAN_BOUNDS, ++ * and/or FORTIFY_SOURCE. + * + * Return: size of the actual memory used by @objp in bytes + */ +diff --git a/mm/slab_common.c b/mm/slab_common.c +index f684b06649c3e..06958c613b0ac 100644 +--- a/mm/slab_common.c ++++ b/mm/slab_common.c +@@ -758,6 +758,26 @@ struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags) + return kmalloc_caches[kmalloc_type(flags)][index]; + } + ++size_t kmalloc_size_roundup(size_t size) ++{ ++ struct kmem_cache *c; ++ ++ /* Short-circuit the 0 size case. */ ++ if (unlikely(size == 0)) ++ return 0; ++ /* Short-circuit saturated "too-large" case. */ ++ if (unlikely(size == SIZE_MAX)) ++ return SIZE_MAX; ++ /* Above the smaller buckets, size is a multiple of page size. */ ++ if (size > KMALLOC_MAX_CACHE_SIZE) ++ return PAGE_SIZE << get_order(size); ++ ++ /* The flags don't matter since size_index is common to all. */ ++ c = kmalloc_slab(size, GFP_KERNEL); ++ return c ? c->object_size : 0; ++} ++EXPORT_SYMBOL(kmalloc_size_roundup); ++ + #ifdef CONFIG_ZONE_DMA + #define KMALLOC_DMA_NAME(sz) .name[KMALLOC_DMA] = "dma-kmalloc-" #sz, + #else +diff --git a/mm/slob.c b/mm/slob.c +index f3fc15df971af..d4c80bf1930d1 100644 +--- a/mm/slob.c ++++ b/mm/slob.c +@@ -567,6 +567,20 @@ void kfree(const void *block) + } + EXPORT_SYMBOL(kfree); + ++size_t kmalloc_size_roundup(size_t size) ++{ ++ /* Short-circuit the 0 size case. */ ++ if (unlikely(size == 0)) ++ return 0; ++ /* Short-circuit saturated "too-large" case. */ ++ if (unlikely(size == SIZE_MAX)) ++ return SIZE_MAX; ++ ++ return ALIGN(size, ARCH_KMALLOC_MINALIGN); ++} ++ ++EXPORT_SYMBOL(kmalloc_size_roundup); ++ + /* can't use ksize for kmem_cache_alloc memory, only kmalloc */ + size_t __ksize(const void *block) + { +-- +2.39.5 +