From: Greg Kroah-Hartman Date: Mon, 24 Jan 2022 12:46:18 +0000 (+0100) Subject: 5.10-stable patches X-Git-Tag: v4.4.300~61 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=31e51e186e92fc9005abf94ce6318caa2f846399;p=thirdparty%2Fkernel%2Fstable-queue.git 5.10-stable patches added patches: af_unix-annote-lockless-accesses-to-unix_tot_inflight-gc_in_progress.patch arm-dts-gpio-ranges-property-is-now-required.patch arm64-bpf-remove-128mb-limit-for-bpf-jit-programs.patch bpftool-remove-inclusion-of-utilities.mak-from-makefiles.patch clk-emit-a-stern-warning-with-writable-debugfs-enabled.patch clk-si5341-fix-clock-hw-provider-cleanup.patch f2fs-compress-fix-potential-deadlock-of-compress-file.patch f2fs-fix-to-reserve-space-for-io-align-feature.patch ipv4-avoid-quadratic-behavior-in-netns-dismantle.patch ipv4-update-fib_info_cnt-under-spinlock-protection.patch net-axienet-increase-reset-timeout.patch net-axienet-wait-for-phyrstcmplt-after-core-reset.patch net-fsl-xgmac_mdio-add-workaround-for-erratum-a-009885.patch net-fsl-xgmac_mdio-fix-incorrect-iounmap-when-removing-module.patch net-smc-fix-hung_task-when-removing-smc-r-devices.patch parisc-pdc_stable-fix-memory-leak-in-pdcs_register_pathentries.patch perf-evsel-override-attr-sample_period-for-non-libpfm4-events.patch xdp-check-prog-type-before-updating-bpf-link.patch --- diff --git a/queue-5.10/af_unix-annote-lockless-accesses-to-unix_tot_inflight-gc_in_progress.patch b/queue-5.10/af_unix-annote-lockless-accesses-to-unix_tot_inflight-gc_in_progress.patch new file mode 100644 index 00000000000..56bcb84d87d --- /dev/null +++ b/queue-5.10/af_unix-annote-lockless-accesses-to-unix_tot_inflight-gc_in_progress.patch @@ -0,0 +1,128 @@ +From 9d6d7f1cb67cdee15f1a0e85aacfb924e0e02435 Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Fri, 14 Jan 2022 08:43:28 -0800 +Subject: af_unix: annote lockless accesses to unix_tot_inflight & gc_in_progress + +From: Eric Dumazet + +commit 9d6d7f1cb67cdee15f1a0e85aacfb924e0e02435 upstream. + +wait_for_unix_gc() reads unix_tot_inflight & gc_in_progress +without synchronization. + +Adds READ_ONCE()/WRITE_ONCE() and their associated comments +to better document the intent. + +BUG: KCSAN: data-race in unix_inflight / wait_for_unix_gc + +write to 0xffffffff86e2b7c0 of 4 bytes by task 9380 on cpu 0: + unix_inflight+0x1e8/0x260 net/unix/scm.c:63 + unix_attach_fds+0x10c/0x1e0 net/unix/scm.c:121 + unix_scm_to_skb net/unix/af_unix.c:1674 [inline] + unix_dgram_sendmsg+0x679/0x16b0 net/unix/af_unix.c:1817 + unix_seqpacket_sendmsg+0xcc/0x110 net/unix/af_unix.c:2258 + sock_sendmsg_nosec net/socket.c:704 [inline] + sock_sendmsg net/socket.c:724 [inline] + ____sys_sendmsg+0x39a/0x510 net/socket.c:2409 + ___sys_sendmsg net/socket.c:2463 [inline] + __sys_sendmmsg+0x267/0x4c0 net/socket.c:2549 + __do_sys_sendmmsg net/socket.c:2578 [inline] + __se_sys_sendmmsg net/socket.c:2575 [inline] + __x64_sys_sendmmsg+0x53/0x60 net/socket.c:2575 + do_syscall_x64 arch/x86/entry/common.c:50 [inline] + do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80 + entry_SYSCALL_64_after_hwframe+0x44/0xae + +read to 0xffffffff86e2b7c0 of 4 bytes by task 9375 on cpu 1: + wait_for_unix_gc+0x24/0x160 net/unix/garbage.c:196 + unix_dgram_sendmsg+0x8e/0x16b0 net/unix/af_unix.c:1772 + unix_seqpacket_sendmsg+0xcc/0x110 net/unix/af_unix.c:2258 + sock_sendmsg_nosec net/socket.c:704 [inline] + sock_sendmsg net/socket.c:724 [inline] + ____sys_sendmsg+0x39a/0x510 net/socket.c:2409 + ___sys_sendmsg net/socket.c:2463 [inline] + __sys_sendmmsg+0x267/0x4c0 net/socket.c:2549 + __do_sys_sendmmsg net/socket.c:2578 [inline] + __se_sys_sendmmsg net/socket.c:2575 [inline] + __x64_sys_sendmmsg+0x53/0x60 net/socket.c:2575 + do_syscall_x64 arch/x86/entry/common.c:50 [inline] + do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80 + entry_SYSCALL_64_after_hwframe+0x44/0xae + +value changed: 0x00000002 -> 0x00000004 + +Reported by Kernel Concurrency Sanitizer on: +CPU: 1 PID: 9375 Comm: syz-executor.1 Not tainted 5.16.0-rc7-syzkaller #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 + +Fixes: 9915672d4127 ("af_unix: limit unix_tot_inflight") +Signed-off-by: Eric Dumazet +Reported-by: syzbot +Link: https://lore.kernel.org/r/20220114164328.2038499-1-eric.dumazet@gmail.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/unix/garbage.c | 14 +++++++++++--- + net/unix/scm.c | 6 ++++-- + 2 files changed, 15 insertions(+), 5 deletions(-) + +--- a/net/unix/garbage.c ++++ b/net/unix/garbage.c +@@ -192,8 +192,11 @@ void wait_for_unix_gc(void) + { + /* If number of inflight sockets is insane, + * force a garbage collect right now. ++ * Paired with the WRITE_ONCE() in unix_inflight(), ++ * unix_notinflight() and gc_in_progress(). + */ +- if (unix_tot_inflight > UNIX_INFLIGHT_TRIGGER_GC && !gc_in_progress) ++ if (READ_ONCE(unix_tot_inflight) > UNIX_INFLIGHT_TRIGGER_GC && ++ !READ_ONCE(gc_in_progress)) + unix_gc(); + wait_event(unix_gc_wait, gc_in_progress == false); + } +@@ -213,7 +216,9 @@ void unix_gc(void) + if (gc_in_progress) + goto out; + +- gc_in_progress = true; ++ /* Paired with READ_ONCE() in wait_for_unix_gc(). */ ++ WRITE_ONCE(gc_in_progress, true); ++ + /* First, select candidates for garbage collection. Only + * in-flight sockets are considered, and from those only ones + * which don't have any external reference. +@@ -299,7 +304,10 @@ void unix_gc(void) + + /* All candidates should have been detached by now. */ + BUG_ON(!list_empty(&gc_candidates)); +- gc_in_progress = false; ++ ++ /* Paired with READ_ONCE() in wait_for_unix_gc(). */ ++ WRITE_ONCE(gc_in_progress, false); ++ + wake_up(&unix_gc_wait); + + out: +--- a/net/unix/scm.c ++++ b/net/unix/scm.c +@@ -60,7 +60,8 @@ void unix_inflight(struct user_struct *u + } else { + BUG_ON(list_empty(&u->link)); + } +- unix_tot_inflight++; ++ /* Paired with READ_ONCE() in wait_for_unix_gc() */ ++ WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + 1); + } + user->unix_inflight++; + spin_unlock(&unix_gc_lock); +@@ -80,7 +81,8 @@ void unix_notinflight(struct user_struct + + if (atomic_long_dec_and_test(&u->inflight)) + list_del_init(&u->link); +- unix_tot_inflight--; ++ /* Paired with READ_ONCE() in wait_for_unix_gc() */ ++ WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - 1); + } + user->unix_inflight--; + spin_unlock(&unix_gc_lock); diff --git a/queue-5.10/arm-dts-gpio-ranges-property-is-now-required.patch b/queue-5.10/arm-dts-gpio-ranges-property-is-now-required.patch new file mode 100644 index 00000000000..4804cddfba6 --- /dev/null +++ b/queue-5.10/arm-dts-gpio-ranges-property-is-now-required.patch @@ -0,0 +1,50 @@ +From e8f24c58d1b69ecf410a673c22f546dc732bb879 Mon Sep 17 00:00:00 2001 +From: Phil Elwell +Date: Mon, 6 Dec 2021 09:22:37 +0000 +Subject: ARM: dts: gpio-ranges property is now required + +From: Phil Elwell + +commit e8f24c58d1b69ecf410a673c22f546dc732bb879 upstream. + +Since [1], added in 5.7, the absence of a gpio-ranges property has +prevented GPIOs from being restored to inputs when released. +Add those properties for BCM283x and BCM2711 devices. + +[1] commit 2ab73c6d8323 ("gpio: Support GPIO controllers without + pin-ranges") + +Fixes: 2ab73c6d8323 ("gpio: Support GPIO controllers without pin-ranges") +Signed-off-by: Phil Elwell +Acked-by: Florian Fainelli +Reviewed-by: Linus Walleij +Link: https://lore.kernel.org/r/20211206092237.4105895-3-phil@raspberrypi.com +Signed-off-by: Linus Walleij +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm/boot/dts/bcm2711.dtsi | 2 ++ + arch/arm/boot/dts/bcm283x.dtsi | 2 ++ + 2 files changed, 4 insertions(+) + +--- a/arch/arm/boot/dts/bcm2711.dtsi ++++ b/arch/arm/boot/dts/bcm2711.dtsi +@@ -557,6 +557,8 @@ + + gpio-ranges = <&gpio 0 0 58>; + ++ gpio-ranges = <&gpio 0 0 58>; ++ + gpclk0_gpio49: gpclk0_gpio49 { + pin-gpclk { + pins = "gpio49"; +--- a/arch/arm/boot/dts/bcm283x.dtsi ++++ b/arch/arm/boot/dts/bcm283x.dtsi +@@ -128,6 +128,8 @@ + + gpio-ranges = <&gpio 0 0 54>; + ++ gpio-ranges = <&gpio 0 0 54>; ++ + /* Defines common pin muxing groups + * + * While each pin can have its mux selected diff --git a/queue-5.10/arm64-bpf-remove-128mb-limit-for-bpf-jit-programs.patch b/queue-5.10/arm64-bpf-remove-128mb-limit-for-bpf-jit-programs.patch new file mode 100644 index 00000000000..061f4abfd2a --- /dev/null +++ b/queue-5.10/arm64-bpf-remove-128mb-limit-for-bpf-jit-programs.patch @@ -0,0 +1,115 @@ +From b89ddf4cca43f1269093942cf5c4e457fd45c335 Mon Sep 17 00:00:00 2001 +From: Russell King +Date: Fri, 5 Nov 2021 16:50:45 +0000 +Subject: arm64/bpf: Remove 128MB limit for BPF JIT programs + +From: Russell King + +commit b89ddf4cca43f1269093942cf5c4e457fd45c335 upstream. + +Commit 91fc957c9b1d ("arm64/bpf: don't allocate BPF JIT programs in module +memory") restricts BPF JIT program allocation to a 128MB region to ensure +BPF programs are still in branching range of each other. However this +restriction should not apply to the aarch64 JIT, since BPF_JMP | BPF_CALL +are implemented as a 64-bit move into a register and then a BLR instruction - +which has the effect of being able to call anything without proximity +limitation. + +The practical reason to relax this restriction on JIT memory is that 128MB of +JIT memory can be quickly exhausted, especially where PAGE_SIZE is 64KB - one +page is needed per program. In cases where seccomp filters are applied to +multiple VMs on VM launch - such filters are classic BPF but converted to +BPF - this can severely limit the number of VMs that can be launched. In a +world where we support BPF JIT always on, turning off the JIT isn't always an +option either. + +Fixes: 91fc957c9b1d ("arm64/bpf: don't allocate BPF JIT programs in module memory") +Suggested-by: Ard Biesheuvel +Signed-off-by: Russell King +Signed-off-by: Daniel Borkmann +Tested-by: Alan Maguire +Link: https://lore.kernel.org/bpf/1636131046-5982-2-git-send-email-alan.maguire@oracle.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/include/asm/extable.h | 9 --------- + arch/arm64/include/asm/memory.h | 5 +---- + arch/arm64/kernel/traps.c | 2 +- + arch/arm64/mm/ptdump.c | 2 -- + arch/arm64/net/bpf_jit_comp.c | 7 ++----- + 5 files changed, 4 insertions(+), 21 deletions(-) + +--- a/arch/arm64/include/asm/extable.h ++++ b/arch/arm64/include/asm/extable.h +@@ -22,15 +22,6 @@ struct exception_table_entry + + #define ARCH_HAS_RELATIVE_EXTABLE + +-static inline bool in_bpf_jit(struct pt_regs *regs) +-{ +- if (!IS_ENABLED(CONFIG_BPF_JIT)) +- return false; +- +- return regs->pc >= BPF_JIT_REGION_START && +- regs->pc < BPF_JIT_REGION_END; +-} +- + #ifdef CONFIG_BPF_JIT + int arm64_bpf_fixup_exception(const struct exception_table_entry *ex, + struct pt_regs *regs); +--- a/arch/arm64/include/asm/memory.h ++++ b/arch/arm64/include/asm/memory.h +@@ -44,11 +44,8 @@ + #define _PAGE_OFFSET(va) (-(UL(1) << (va))) + #define PAGE_OFFSET (_PAGE_OFFSET(VA_BITS)) + #define KIMAGE_VADDR (MODULES_END) +-#define BPF_JIT_REGION_START (KASAN_SHADOW_END) +-#define BPF_JIT_REGION_SIZE (SZ_128M) +-#define BPF_JIT_REGION_END (BPF_JIT_REGION_START + BPF_JIT_REGION_SIZE) + #define MODULES_END (MODULES_VADDR + MODULES_VSIZE) +-#define MODULES_VADDR (BPF_JIT_REGION_END) ++#define MODULES_VADDR (_PAGE_END(VA_BITS_MIN)) + #define MODULES_VSIZE (SZ_128M) + #define VMEMMAP_START (-VMEMMAP_SIZE - SZ_2M) + #define VMEMMAP_END (VMEMMAP_START + VMEMMAP_SIZE) +--- a/arch/arm64/kernel/traps.c ++++ b/arch/arm64/kernel/traps.c +@@ -923,7 +923,7 @@ static struct break_hook bug_break_hook + static int reserved_fault_handler(struct pt_regs *regs, unsigned int esr) + { + pr_err("%s generated an invalid instruction at %pS!\n", +- in_bpf_jit(regs) ? "BPF JIT" : "Kernel text patching", ++ "Kernel text patching", + (void *)instruction_pointer(regs)); + + /* We cannot handle this */ +--- a/arch/arm64/mm/ptdump.c ++++ b/arch/arm64/mm/ptdump.c +@@ -41,8 +41,6 @@ static struct addr_marker address_marker + { 0 /* KASAN_SHADOW_START */, "Kasan shadow start" }, + { KASAN_SHADOW_END, "Kasan shadow end" }, + #endif +- { BPF_JIT_REGION_START, "BPF start" }, +- { BPF_JIT_REGION_END, "BPF end" }, + { MODULES_VADDR, "Modules start" }, + { MODULES_END, "Modules end" }, + { VMALLOC_START, "vmalloc() area" }, +--- a/arch/arm64/net/bpf_jit_comp.c ++++ b/arch/arm64/net/bpf_jit_comp.c +@@ -1136,15 +1136,12 @@ out: + + u64 bpf_jit_alloc_exec_limit(void) + { +- return BPF_JIT_REGION_SIZE; ++ return VMALLOC_END - VMALLOC_START; + } + + void *bpf_jit_alloc_exec(unsigned long size) + { +- return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START, +- BPF_JIT_REGION_END, GFP_KERNEL, +- PAGE_KERNEL, 0, NUMA_NO_NODE, +- __builtin_return_address(0)); ++ return vmalloc(size); + } + + void bpf_jit_free_exec(void *addr) diff --git a/queue-5.10/bpftool-remove-inclusion-of-utilities.mak-from-makefiles.patch b/queue-5.10/bpftool-remove-inclusion-of-utilities.mak-from-makefiles.patch new file mode 100644 index 00000000000..b4b5744c020 --- /dev/null +++ b/queue-5.10/bpftool-remove-inclusion-of-utilities.mak-from-makefiles.patch @@ -0,0 +1,41 @@ +From 48f5aef4c458c19ab337eed8c95a6486cc014aa3 Mon Sep 17 00:00:00 2001 +From: Quentin Monnet +Date: Wed, 10 Nov 2021 11:46:28 +0000 +Subject: bpftool: Remove inclusion of utilities.mak from Makefiles + +From: Quentin Monnet + +commit 48f5aef4c458c19ab337eed8c95a6486cc014aa3 upstream. + +Bpftool's Makefile, and the Makefile for its documentation, both include +scripts/utilities.mak, but they use none of the items defined in this +file. Remove the includes. + +Fixes: 71bb428fe2c1 ("tools: bpf: add bpftool") +Signed-off-by: Quentin Monnet +Signed-off-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20211110114632.24537-3-quentin@isovalent.com +Signed-off-by: Greg Kroah-Hartman +--- + tools/bpf/bpftool/Documentation/Makefile | 1 - + tools/bpf/bpftool/Makefile | 1 - + 2 files changed, 2 deletions(-) + +--- a/tools/bpf/bpftool/Documentation/Makefile ++++ b/tools/bpf/bpftool/Documentation/Makefile +@@ -1,6 +1,5 @@ + # SPDX-License-Identifier: GPL-2.0-only + include ../../../scripts/Makefile.include +-include ../../../scripts/utilities.mak + + INSTALL ?= install + RM ?= rm -f +--- a/tools/bpf/bpftool/Makefile ++++ b/tools/bpf/bpftool/Makefile +@@ -1,6 +1,5 @@ + # SPDX-License-Identifier: GPL-2.0-only + include ../../scripts/Makefile.include +-include ../../scripts/utilities.mak + + ifeq ($(srctree),) + srctree := $(patsubst %/,%,$(dir $(CURDIR))) diff --git a/queue-5.10/clk-emit-a-stern-warning-with-writable-debugfs-enabled.patch b/queue-5.10/clk-emit-a-stern-warning-with-writable-debugfs-enabled.patch new file mode 100644 index 00000000000..6f648f5a851 --- /dev/null +++ b/queue-5.10/clk-emit-a-stern-warning-with-writable-debugfs-enabled.patch @@ -0,0 +1,51 @@ +From 489a71964f9d74e697a12cd0ace20ed829eb1f93 Mon Sep 17 00:00:00 2001 +From: Stephen Boyd +Date: Thu, 9 Dec 2021 17:34:05 -0800 +Subject: clk: Emit a stern warning with writable debugfs enabled + +From: Stephen Boyd + +commit 489a71964f9d74e697a12cd0ace20ed829eb1f93 upstream. + +We don't want vendors to be enabling this part of the clk code and +shipping it to customers. Exposing the ability to change clk frequencies +and parents via debugfs is potentially damaging to the system if folks +don't know what they're doing. Emit a strong warning so that the message +is clear: don't enable this outside of development systems. + +Fixes: 37215da5553e ("clk: Add support for setting clk_rate via debugfs") +Cc: Geert Uytterhoeven +Link: https://lore.kernel.org/r/20211210014237.2130300-1-sboyd@kernel.org +Signed-off-by: Stephen Boyd +Signed-off-by: Greg Kroah-Hartman +--- + drivers/clk/clk.c | 18 ++++++++++++++++++ + 1 file changed, 18 insertions(+) + +--- a/drivers/clk/clk.c ++++ b/drivers/clk/clk.c +@@ -3314,6 +3314,24 @@ static int __init clk_debug_init(void) + { + struct clk_core *core; + ++#ifdef CLOCK_ALLOW_WRITE_DEBUGFS ++ pr_warn("\n"); ++ pr_warn("********************************************************************\n"); ++ pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n"); ++ pr_warn("** **\n"); ++ pr_warn("** WRITEABLE clk DebugFS SUPPORT HAS BEEN ENABLED IN THIS KERNEL **\n"); ++ pr_warn("** **\n"); ++ pr_warn("** This means that this kernel is built to expose clk operations **\n"); ++ pr_warn("** such as parent or rate setting, enabling, disabling, etc. **\n"); ++ pr_warn("** to userspace, which may compromise security on your system. **\n"); ++ pr_warn("** **\n"); ++ pr_warn("** If you see this message and you are not debugging the **\n"); ++ pr_warn("** kernel, report this immediately to your vendor! **\n"); ++ pr_warn("** **\n"); ++ pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n"); ++ pr_warn("********************************************************************\n"); ++#endif ++ + rootdir = debugfs_create_dir("clk", NULL); + + debugfs_create_file("clk_summary", 0444, rootdir, &all_lists, diff --git a/queue-5.10/clk-si5341-fix-clock-hw-provider-cleanup.patch b/queue-5.10/clk-si5341-fix-clock-hw-provider-cleanup.patch new file mode 100644 index 00000000000..fe735fb16a7 --- /dev/null +++ b/queue-5.10/clk-si5341-fix-clock-hw-provider-cleanup.patch @@ -0,0 +1,34 @@ +From 49a8f2bc8d88702783c7e163ec84374e9a022f71 Mon Sep 17 00:00:00 2001 +From: Robert Hancock +Date: Wed, 12 Jan 2022 14:38:16 -0600 +Subject: clk: si5341: Fix clock HW provider cleanup + +From: Robert Hancock + +commit 49a8f2bc8d88702783c7e163ec84374e9a022f71 upstream. + +The call to of_clk_add_hw_provider was not undone on remove or on probe +failure, which could cause an oops on a subsequent attempt to retrieve +clocks for the removed device. Switch to the devm version of the +function to avoid this issue. + +Fixes: 3044a860fd09 ("clk: Add Si5341/Si5340 driver") +Signed-off-by: Robert Hancock +Link: https://lore.kernel.org/r/20220112203816.1784610-1-robert.hancock@calian.com +Signed-off-by: Stephen Boyd +Signed-off-by: Greg Kroah-Hartman +--- + drivers/clk/clk-si5341.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/clk/clk-si5341.c ++++ b/drivers/clk/clk-si5341.c +@@ -1576,7 +1576,7 @@ static int si5341_probe(struct i2c_clien + clk_prepare(data->clk[i].hw.clk); + } + +- err = of_clk_add_hw_provider(client->dev.of_node, of_clk_si5341_get, ++ err = devm_of_clk_add_hw_provider(&client->dev, of_clk_si5341_get, + data); + if (err) { + dev_err(&client->dev, "unable to add clk provider\n"); diff --git a/queue-5.10/f2fs-compress-fix-potential-deadlock-of-compress-file.patch b/queue-5.10/f2fs-compress-fix-potential-deadlock-of-compress-file.patch new file mode 100644 index 00000000000..51a24799486 --- /dev/null +++ b/queue-5.10/f2fs-compress-fix-potential-deadlock-of-compress-file.patch @@ -0,0 +1,141 @@ +From 7377e853967ba45bf409e3b5536624d2cbc99f21 Mon Sep 17 00:00:00 2001 +From: Hyeong-Jun Kim +Date: Fri, 10 Dec 2021 13:30:12 +0900 +Subject: f2fs: compress: fix potential deadlock of compress file + +From: Hyeong-Jun Kim + +commit 7377e853967ba45bf409e3b5536624d2cbc99f21 upstream. + +There is a potential deadlock between writeback process and a process +performing write_begin() or write_cache_pages() while trying to write +same compress file, but not compressable, as below: + +[Process A] - doing checkpoint +[Process B] [Process C] +f2fs_write_cache_pages() +- lock_page() [all pages in cluster, 0-31] +- f2fs_write_multi_pages() + - f2fs_write_raw_pages() + - f2fs_write_single_data_page() + - f2fs_do_write_data_page() + - return -EAGAIN [f2fs_trylock_op() failed] + - unlock_page(page) [e.g., page 0] + - generic_perform_write() + - f2fs_write_begin() + - f2fs_prepare_compress_overwrite() + - prepare_compress_overwrite() + - lock_page() [e.g., page 0] + - lock_page() [e.g., page 1] + - lock_page(page) [e.g., page 0] + +Since there is no compress process, it is no longer necessary to hold +locks on every pages in cluster within f2fs_write_raw_pages(). + +This patch changes f2fs_write_raw_pages() to release all locks first +and then perform write same as the non-compress file in +f2fs_write_cache_pages(). + +Fixes: 4c8ff7095bef ("f2fs: support data compression") +Signed-off-by: Hyeong-Jun Kim +Signed-off-by: Sungjong Seo +Signed-off-by: Youngjin Gil +Reviewed-by: Chao Yu +Signed-off-by: Jaegeuk Kim +Signed-off-by: Greg Kroah-Hartman +--- + fs/f2fs/compress.c | 50 ++++++++++++++++++++++---------------------------- + 1 file changed, 22 insertions(+), 28 deletions(-) + +--- a/fs/f2fs/compress.c ++++ b/fs/f2fs/compress.c +@@ -1362,25 +1362,38 @@ static int f2fs_write_raw_pages(struct c + enum iostat_type io_type) + { + struct address_space *mapping = cc->inode->i_mapping; +- int _submitted, compr_blocks, ret; +- int i = -1, err = 0; ++ int _submitted, compr_blocks, ret, i; + + compr_blocks = f2fs_compressed_blocks(cc); +- if (compr_blocks < 0) { +- err = compr_blocks; +- goto out_err; ++ ++ for (i = 0; i < cc->cluster_size; i++) { ++ if (!cc->rpages[i]) ++ continue; ++ ++ redirty_page_for_writepage(wbc, cc->rpages[i]); ++ unlock_page(cc->rpages[i]); + } + ++ if (compr_blocks < 0) ++ return compr_blocks; ++ + for (i = 0; i < cc->cluster_size; i++) { + if (!cc->rpages[i]) + continue; + retry_write: ++ lock_page(cc->rpages[i]); ++ + if (cc->rpages[i]->mapping != mapping) { ++continue_unlock: + unlock_page(cc->rpages[i]); + continue; + } + +- BUG_ON(!PageLocked(cc->rpages[i])); ++ if (!PageDirty(cc->rpages[i])) ++ goto continue_unlock; ++ ++ if (!clear_page_dirty_for_io(cc->rpages[i])) ++ goto continue_unlock; + + ret = f2fs_write_single_data_page(cc->rpages[i], &_submitted, + NULL, NULL, wbc, io_type, +@@ -1395,26 +1408,15 @@ retry_write: + * avoid deadlock caused by cluster update race + * from foreground operation. + */ +- if (IS_NOQUOTA(cc->inode)) { +- err = 0; +- goto out_err; +- } ++ if (IS_NOQUOTA(cc->inode)) ++ return 0; + ret = 0; + cond_resched(); + congestion_wait(BLK_RW_ASYNC, + DEFAULT_IO_TIMEOUT); +- lock_page(cc->rpages[i]); +- +- if (!PageDirty(cc->rpages[i])) { +- unlock_page(cc->rpages[i]); +- continue; +- } +- +- clear_page_dirty_for_io(cc->rpages[i]); + goto retry_write; + } +- err = ret; +- goto out_err; ++ return ret; + } + + *submitted += _submitted; +@@ -1423,14 +1425,6 @@ retry_write: + f2fs_balance_fs(F2FS_M_SB(mapping), true); + + return 0; +-out_err: +- for (++i; i < cc->cluster_size; i++) { +- if (!cc->rpages[i]) +- continue; +- redirty_page_for_writepage(wbc, cc->rpages[i]); +- unlock_page(cc->rpages[i]); +- } +- return err; + } + + int f2fs_write_multi_pages(struct compress_ctx *cc, diff --git a/queue-5.10/f2fs-fix-to-reserve-space-for-io-align-feature.patch b/queue-5.10/f2fs-fix-to-reserve-space-for-io-align-feature.patch new file mode 100644 index 00000000000..d2ca1693137 --- /dev/null +++ b/queue-5.10/f2fs-fix-to-reserve-space-for-io-align-feature.patch @@ -0,0 +1,155 @@ +From 300a842937fbcfb5a189cea9ba15374fdb0b5c6b Mon Sep 17 00:00:00 2001 +From: Chao Yu +Date: Sat, 11 Dec 2021 21:27:36 +0800 +Subject: f2fs: fix to reserve space for IO align feature + +From: Chao Yu + +commit 300a842937fbcfb5a189cea9ba15374fdb0b5c6b upstream. + +https://bugzilla.kernel.org/show_bug.cgi?id=204137 + +With below script, we will hit panic during new segment allocation: + +DISK=bingo.img +MOUNT_DIR=/mnt/f2fs + +dd if=/dev/zero of=$DISK bs=1M count=105 +mkfs.f2fe -a 1 -o 19 -t 1 -z 1 -f -q $DISK + +mount -t f2fs $DISK $MOUNT_DIR -o "noinline_dentry,flush_merge,noextent_cache,mode=lfs,io_bits=7,fsync_mode=strict" + +for (( i = 0; i < 4096; i++ )); do + name=`head /dev/urandom | tr -dc A-Za-z0-9 | head -c 10` + mkdir $MOUNT_DIR/$name +done + +umount $MOUNT_DIR +rm $DISK + +--- + fs/f2fs/f2fs.h | 11 +++++++++++ + fs/f2fs/segment.h | 3 ++- + fs/f2fs/super.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ + fs/f2fs/sysfs.c | 4 +++- + 4 files changed, 60 insertions(+), 2 deletions(-) + +--- a/fs/f2fs/f2fs.h ++++ b/fs/f2fs/f2fs.h +@@ -955,6 +955,7 @@ struct f2fs_sm_info { + unsigned int segment_count; /* total # of segments */ + unsigned int main_segments; /* # of segments in main area */ + unsigned int reserved_segments; /* # of reserved segments */ ++ unsigned int additional_reserved_segments;/* reserved segs for IO align feature */ + unsigned int ovp_segments; /* # of overprovision segments */ + + /* a threshold to reclaim prefree segments */ +@@ -1984,6 +1985,11 @@ static inline int inc_valid_block_count( + + if (!__allow_reserved_blocks(sbi, inode, true)) + avail_user_block_count -= F2FS_OPTION(sbi).root_reserved_blocks; ++ ++ if (F2FS_IO_ALIGNED(sbi)) ++ avail_user_block_count -= sbi->blocks_per_seg * ++ SM_I(sbi)->additional_reserved_segments; ++ + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { + if (avail_user_block_count > sbi->unusable_block_count) + avail_user_block_count -= sbi->unusable_block_count; +@@ -2229,6 +2235,11 @@ static inline int inc_valid_node_count(s + + if (!__allow_reserved_blocks(sbi, inode, false)) + valid_block_count += F2FS_OPTION(sbi).root_reserved_blocks; ++ ++ if (F2FS_IO_ALIGNED(sbi)) ++ valid_block_count += sbi->blocks_per_seg * ++ SM_I(sbi)->additional_reserved_segments; ++ + user_block_count = sbi->user_block_count; + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) + user_block_count -= sbi->unusable_block_count; +--- a/fs/f2fs/segment.h ++++ b/fs/f2fs/segment.h +@@ -539,7 +539,8 @@ static inline unsigned int free_segments + + static inline unsigned int reserved_segments(struct f2fs_sb_info *sbi) + { +- return SM_I(sbi)->reserved_segments; ++ return SM_I(sbi)->reserved_segments + ++ SM_I(sbi)->additional_reserved_segments; + } + + static inline unsigned int free_sections(struct f2fs_sb_info *sbi) +--- a/fs/f2fs/super.c ++++ b/fs/f2fs/super.c +@@ -289,6 +289,46 @@ static inline void limit_reserve_root(st + F2FS_OPTION(sbi).s_resgid)); + } + ++static inline int adjust_reserved_segment(struct f2fs_sb_info *sbi) ++{ ++ unsigned int sec_blks = sbi->blocks_per_seg * sbi->segs_per_sec; ++ unsigned int avg_vblocks; ++ unsigned int wanted_reserved_segments; ++ block_t avail_user_block_count; ++ ++ if (!F2FS_IO_ALIGNED(sbi)) ++ return 0; ++ ++ /* average valid block count in section in worst case */ ++ avg_vblocks = sec_blks / F2FS_IO_SIZE(sbi); ++ ++ /* ++ * we need enough free space when migrating one section in worst case ++ */ ++ wanted_reserved_segments = (F2FS_IO_SIZE(sbi) / avg_vblocks) * ++ reserved_segments(sbi); ++ wanted_reserved_segments -= reserved_segments(sbi); ++ ++ avail_user_block_count = sbi->user_block_count - ++ sbi->current_reserved_blocks - ++ F2FS_OPTION(sbi).root_reserved_blocks; ++ ++ if (wanted_reserved_segments * sbi->blocks_per_seg > ++ avail_user_block_count) { ++ f2fs_err(sbi, "IO align feature can't grab additional reserved segment: %u, available segments: %u", ++ wanted_reserved_segments, ++ avail_user_block_count >> sbi->log_blocks_per_seg); ++ return -ENOSPC; ++ } ++ ++ SM_I(sbi)->additional_reserved_segments = wanted_reserved_segments; ++ ++ f2fs_info(sbi, "IO align feature needs additional reserved segment: %u", ++ wanted_reserved_segments); ++ ++ return 0; ++} ++ + static inline void adjust_unusable_cap_perc(struct f2fs_sb_info *sbi) + { + if (!F2FS_OPTION(sbi).unusable_cap_perc) +@@ -3736,6 +3776,10 @@ try_onemore: + goto free_nm; + } + ++ err = adjust_reserved_segment(sbi); ++ if (err) ++ goto free_nm; ++ + /* For write statistics */ + if (sb->s_bdev->bd_part) + sbi->sectors_written_start = +--- a/fs/f2fs/sysfs.c ++++ b/fs/f2fs/sysfs.c +@@ -330,7 +330,9 @@ out: + if (a->struct_type == RESERVED_BLOCKS) { + spin_lock(&sbi->stat_lock); + if (t > (unsigned long)(sbi->user_block_count - +- F2FS_OPTION(sbi).root_reserved_blocks)) { ++ F2FS_OPTION(sbi).root_reserved_blocks - ++ sbi->blocks_per_seg * ++ SM_I(sbi)->additional_reserved_segments)) { + spin_unlock(&sbi->stat_lock); + return -EINVAL; + } diff --git a/queue-5.10/ipv4-avoid-quadratic-behavior-in-netns-dismantle.patch b/queue-5.10/ipv4-avoid-quadratic-behavior-in-netns-dismantle.patch new file mode 100644 index 00000000000..37a315f1869 --- /dev/null +++ b/queue-5.10/ipv4-avoid-quadratic-behavior-in-netns-dismantle.patch @@ -0,0 +1,139 @@ +From d07418afea8f1d9896aaf9dc5ae47ac4f45b220c Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Wed, 19 Jan 2022 02:04:12 -0800 +Subject: ipv4: avoid quadratic behavior in netns dismantle + +From: Eric Dumazet + +commit d07418afea8f1d9896aaf9dc5ae47ac4f45b220c upstream. + +net/ipv4/fib_semantics.c uses an hash table of 256 slots, +keyed by device ifindexes: fib_info_devhash[DEVINDEX_HASHSIZE] + +Problem is that with network namespaces, devices tend +to use the same ifindex. + +lo device for instance has a fixed ifindex of one, +for all network namespaces. + +This means that hosts with thousands of netns spend +a lot of time looking at some hash buckets with thousands +of elements, notably at netns dismantle. + +Simply add a per netns perturbation (net_hash_mix()) +to spread elements more uniformely. + +Also change fib_devindex_hashfn() to use more entropy. + +Fixes: aa79e66eee5d ("net: Make ifindex generation per-net namespace") +Signed-off-by: Eric Dumazet +Reviewed-by: David Ahern +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/fib_semantics.c | 36 +++++++++++++++++------------------- + 1 file changed, 17 insertions(+), 19 deletions(-) + +--- a/net/ipv4/fib_semantics.c ++++ b/net/ipv4/fib_semantics.c +@@ -29,6 +29,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -321,11 +322,15 @@ static inline int nh_comp(struct fib_inf + + static inline unsigned int fib_devindex_hashfn(unsigned int val) + { +- unsigned int mask = DEVINDEX_HASHSIZE - 1; ++ return hash_32(val, DEVINDEX_HASHBITS); ++} ++ ++static struct hlist_head * ++fib_info_devhash_bucket(const struct net_device *dev) ++{ ++ u32 val = net_hash_mix(dev_net(dev)) ^ dev->ifindex; + +- return (val ^ +- (val >> DEVINDEX_HASHBITS) ^ +- (val >> (DEVINDEX_HASHBITS * 2))) & mask; ++ return &fib_info_devhash[fib_devindex_hashfn(val)]; + } + + static unsigned int fib_info_hashfn_1(int init_val, u8 protocol, u8 scope, +@@ -435,12 +440,11 @@ int ip_fib_check_default(__be32 gw, stru + { + struct hlist_head *head; + struct fib_nh *nh; +- unsigned int hash; + + spin_lock(&fib_info_lock); + +- hash = fib_devindex_hashfn(dev->ifindex); +- head = &fib_info_devhash[hash]; ++ head = fib_info_devhash_bucket(dev); ++ + hlist_for_each_entry(nh, head, nh_hash) { + if (nh->fib_nh_dev == dev && + nh->fib_nh_gw4 == gw && +@@ -1608,12 +1612,10 @@ link_it: + } else { + change_nexthops(fi) { + struct hlist_head *head; +- unsigned int hash; + + if (!nexthop_nh->fib_nh_dev) + continue; +- hash = fib_devindex_hashfn(nexthop_nh->fib_nh_dev->ifindex); +- head = &fib_info_devhash[hash]; ++ head = fib_info_devhash_bucket(nexthop_nh->fib_nh_dev); + hlist_add_head(&nexthop_nh->nh_hash, head); + } endfor_nexthops(fi) + } +@@ -1963,8 +1965,7 @@ void fib_nhc_update_mtu(struct fib_nh_co + + void fib_sync_mtu(struct net_device *dev, u32 orig_mtu) + { +- unsigned int hash = fib_devindex_hashfn(dev->ifindex); +- struct hlist_head *head = &fib_info_devhash[hash]; ++ struct hlist_head *head = fib_info_devhash_bucket(dev); + struct fib_nh *nh; + + hlist_for_each_entry(nh, head, nh_hash) { +@@ -1983,12 +1984,11 @@ void fib_sync_mtu(struct net_device *dev + */ + int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force) + { +- int ret = 0; +- int scope = RT_SCOPE_NOWHERE; ++ struct hlist_head *head = fib_info_devhash_bucket(dev); + struct fib_info *prev_fi = NULL; +- unsigned int hash = fib_devindex_hashfn(dev->ifindex); +- struct hlist_head *head = &fib_info_devhash[hash]; ++ int scope = RT_SCOPE_NOWHERE; + struct fib_nh *nh; ++ int ret = 0; + + if (force) + scope = -1; +@@ -2133,7 +2133,6 @@ out: + int fib_sync_up(struct net_device *dev, unsigned char nh_flags) + { + struct fib_info *prev_fi; +- unsigned int hash; + struct hlist_head *head; + struct fib_nh *nh; + int ret; +@@ -2149,8 +2148,7 @@ int fib_sync_up(struct net_device *dev, + } + + prev_fi = NULL; +- hash = fib_devindex_hashfn(dev->ifindex); +- head = &fib_info_devhash[hash]; ++ head = fib_info_devhash_bucket(dev); + ret = 0; + + hlist_for_each_entry(nh, head, nh_hash) { diff --git a/queue-5.10/ipv4-update-fib_info_cnt-under-spinlock-protection.patch b/queue-5.10/ipv4-update-fib_info_cnt-under-spinlock-protection.patch new file mode 100644 index 00000000000..e12dfedd344 --- /dev/null +++ b/queue-5.10/ipv4-update-fib_info_cnt-under-spinlock-protection.patch @@ -0,0 +1,140 @@ +From 0a6e6b3c7db6c34e3d149f09cd714972f8753e3f Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Sun, 16 Jan 2022 01:02:20 -0800 +Subject: ipv4: update fib_info_cnt under spinlock protection + +From: Eric Dumazet + +commit 0a6e6b3c7db6c34e3d149f09cd714972f8753e3f upstream. + +In the past, free_fib_info() was supposed to be called +under RTNL protection. + +This eventually was no longer the case. + +Instead of enforcing RTNL it seems we simply can +move fib_info_cnt changes to occur when fib_info_lock +is held. + +v2: David Laight suggested to update fib_info_cnt +only when an entry is added/deleted to/from the hash table, +as fib_info_cnt is used to make sure hash table size +is optimal. + +BUG: KCSAN: data-race in fib_create_info / free_fib_info + +write to 0xffffffff86e243a0 of 4 bytes by task 26429 on cpu 0: + fib_create_info+0xe78/0x3440 net/ipv4/fib_semantics.c:1428 + fib_table_insert+0x148/0x10c0 net/ipv4/fib_trie.c:1224 + fib_magic+0x195/0x1e0 net/ipv4/fib_frontend.c:1087 + fib_add_ifaddr+0xd0/0x2e0 net/ipv4/fib_frontend.c:1109 + fib_netdev_event+0x178/0x510 net/ipv4/fib_frontend.c:1466 + notifier_call_chain kernel/notifier.c:83 [inline] + raw_notifier_call_chain+0x53/0xb0 kernel/notifier.c:391 + __dev_notify_flags+0x1d3/0x3b0 + dev_change_flags+0xa2/0xc0 net/core/dev.c:8872 + do_setlink+0x810/0x2410 net/core/rtnetlink.c:2719 + rtnl_group_changelink net/core/rtnetlink.c:3242 [inline] + __rtnl_newlink net/core/rtnetlink.c:3396 [inline] + rtnl_newlink+0xb10/0x13b0 net/core/rtnetlink.c:3506 + rtnetlink_rcv_msg+0x745/0x7e0 net/core/rtnetlink.c:5571 + netlink_rcv_skb+0x14e/0x250 net/netlink/af_netlink.c:2496 + rtnetlink_rcv+0x18/0x20 net/core/rtnetlink.c:5589 + netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline] + netlink_unicast+0x5fc/0x6c0 net/netlink/af_netlink.c:1345 + netlink_sendmsg+0x726/0x840 net/netlink/af_netlink.c:1921 + sock_sendmsg_nosec net/socket.c:704 [inline] + sock_sendmsg net/socket.c:724 [inline] + ____sys_sendmsg+0x39a/0x510 net/socket.c:2409 + ___sys_sendmsg net/socket.c:2463 [inline] + __sys_sendmsg+0x195/0x230 net/socket.c:2492 + __do_sys_sendmsg net/socket.c:2501 [inline] + __se_sys_sendmsg net/socket.c:2499 [inline] + __x64_sys_sendmsg+0x42/0x50 net/socket.c:2499 + do_syscall_x64 arch/x86/entry/common.c:50 [inline] + do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80 + entry_SYSCALL_64_after_hwframe+0x44/0xae + +read to 0xffffffff86e243a0 of 4 bytes by task 31505 on cpu 1: + free_fib_info+0x35/0x80 net/ipv4/fib_semantics.c:252 + fib_info_put include/net/ip_fib.h:575 [inline] + nsim_fib4_rt_destroy drivers/net/netdevsim/fib.c:294 [inline] + nsim_fib4_rt_replace drivers/net/netdevsim/fib.c:403 [inline] + nsim_fib4_rt_insert drivers/net/netdevsim/fib.c:431 [inline] + nsim_fib4_event drivers/net/netdevsim/fib.c:461 [inline] + nsim_fib_event drivers/net/netdevsim/fib.c:881 [inline] + nsim_fib_event_work+0x15ca/0x2cf0 drivers/net/netdevsim/fib.c:1477 + process_one_work+0x3fc/0x980 kernel/workqueue.c:2298 + process_scheduled_works kernel/workqueue.c:2361 [inline] + worker_thread+0x7df/0xa70 kernel/workqueue.c:2447 + kthread+0x2c7/0x2e0 kernel/kthread.c:327 + ret_from_fork+0x1f/0x30 + +value changed: 0x00000d2d -> 0x00000d2e + +Reported by Kernel Concurrency Sanitizer on: +CPU: 1 PID: 31505 Comm: kworker/1:21 Not tainted 5.16.0-rc6-syzkaller #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +Workqueue: events nsim_fib_event_work + +Fixes: 48bb9eb47b27 ("netdevsim: fib: Add dummy implementation for FIB offload") +Signed-off-by: Eric Dumazet +Reported-by: syzbot +Cc: David Laight +Cc: Ido Schimmel +Cc: Jiri Pirko +Reviewed-by: Ido Schimmel +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/fib_semantics.c | 11 ++++++++--- + 1 file changed, 8 insertions(+), 3 deletions(-) + +--- a/net/ipv4/fib_semantics.c ++++ b/net/ipv4/fib_semantics.c +@@ -251,7 +251,6 @@ void free_fib_info(struct fib_info *fi) + pr_warn("Freeing alive fib_info %p\n", fi); + return; + } +- fib_info_cnt--; + + call_rcu(&fi->rcu, free_fib_info_rcu); + } +@@ -262,6 +261,10 @@ void fib_release_info(struct fib_info *f + spin_lock_bh(&fib_info_lock); + if (fi && --fi->fib_treeref == 0) { + hlist_del(&fi->fib_hash); ++ ++ /* Paired with READ_ONCE() in fib_create_info(). */ ++ WRITE_ONCE(fib_info_cnt, fib_info_cnt - 1); ++ + if (fi->fib_prefsrc) + hlist_del(&fi->fib_lhash); + if (fi->nh) { +@@ -1431,7 +1434,9 @@ struct fib_info *fib_create_info(struct + #endif + + err = -ENOBUFS; +- if (fib_info_cnt >= fib_info_hash_size) { ++ ++ /* Paired with WRITE_ONCE() in fib_release_info() */ ++ if (READ_ONCE(fib_info_cnt) >= fib_info_hash_size) { + unsigned int new_size = fib_info_hash_size << 1; + struct hlist_head *new_info_hash; + struct hlist_head *new_laddrhash; +@@ -1463,7 +1468,6 @@ struct fib_info *fib_create_info(struct + return ERR_PTR(err); + } + +- fib_info_cnt++; + fi->fib_net = net; + fi->fib_protocol = cfg->fc_protocol; + fi->fib_scope = cfg->fc_scope; +@@ -1590,6 +1594,7 @@ link_it: + fi->fib_treeref++; + refcount_set(&fi->fib_clntref, 1); + spin_lock_bh(&fib_info_lock); ++ fib_info_cnt++; + hlist_add_head(&fi->fib_hash, + &fib_info_hash[fib_info_hashfn(fi)]); + if (fi->fib_prefsrc) { diff --git a/queue-5.10/net-axienet-increase-reset-timeout.patch b/queue-5.10/net-axienet-increase-reset-timeout.patch new file mode 100644 index 00000000000..0d657ff9b47 --- /dev/null +++ b/queue-5.10/net-axienet-increase-reset-timeout.patch @@ -0,0 +1,58 @@ +From 2e5644b1bab2ccea9cfc7a9520af95b94eb0dbf1 Mon Sep 17 00:00:00 2001 +From: Robert Hancock +Date: Tue, 18 Jan 2022 15:41:24 -0600 +Subject: net: axienet: increase reset timeout + +From: Robert Hancock + +commit 2e5644b1bab2ccea9cfc7a9520af95b94eb0dbf1 upstream. + +The previous timeout of 1ms was too short to handle some cases where the +core is reset just after the input clocks were started, which will +be introduced in an upcoming patch. Increase the timeout to 50ms. Also +simplify the reset timeout checking to use read_poll_timeout. + +Fixes: 8a3b7a252dca9 ("drivers/net/ethernet/xilinx: added Xilinx AXI Ethernet driver") +Signed-off-by: Robert Hancock +Reviewed-by: Andrew Lunn +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 19 +++++++++---------- + 1 file changed, 9 insertions(+), 10 deletions(-) + +--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c ++++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +@@ -496,7 +496,8 @@ static void axienet_setoptions(struct ne + + static int __axienet_device_reset(struct axienet_local *lp) + { +- u32 timeout; ++ u32 value; ++ int ret; + + /* Reset Axi DMA. This would reset Axi Ethernet core as well. The reset + * process of Axi DMA takes a while to complete as all pending +@@ -506,15 +507,13 @@ static int __axienet_device_reset(struct + * they both reset the entire DMA core, so only one needs to be used. + */ + axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, XAXIDMA_CR_RESET_MASK); +- timeout = DELAY_OF_ONE_MILLISEC; +- while (axienet_dma_in32(lp, XAXIDMA_TX_CR_OFFSET) & +- XAXIDMA_CR_RESET_MASK) { +- udelay(1); +- if (--timeout == 0) { +- netdev_err(lp->ndev, "%s: DMA reset timeout!\n", +- __func__); +- return -ETIMEDOUT; +- } ++ ret = read_poll_timeout(axienet_dma_in32, value, ++ !(value & XAXIDMA_CR_RESET_MASK), ++ DELAY_OF_ONE_MILLISEC, 50000, false, lp, ++ XAXIDMA_TX_CR_OFFSET); ++ if (ret) { ++ dev_err(lp->dev, "%s: DMA reset timeout!\n", __func__); ++ return ret; + } + + return 0; diff --git a/queue-5.10/net-axienet-wait-for-phyrstcmplt-after-core-reset.patch b/queue-5.10/net-axienet-wait-for-phyrstcmplt-after-core-reset.patch new file mode 100644 index 00000000000..b8642f666fc --- /dev/null +++ b/queue-5.10/net-axienet-wait-for-phyrstcmplt-after-core-reset.patch @@ -0,0 +1,51 @@ +From b400c2f4f4c53c86594dd57098970d97d488bfde Mon Sep 17 00:00:00 2001 +From: Robert Hancock +Date: Tue, 18 Jan 2022 15:41:25 -0600 +Subject: net: axienet: Wait for PhyRstCmplt after core reset + +From: Robert Hancock + +commit b400c2f4f4c53c86594dd57098970d97d488bfde upstream. + +When resetting the device, wait for the PhyRstCmplt bit to be set +in the interrupt status register before continuing initialization, to +ensure that the core is actually ready. When using an external PHY, this +also ensures we do not start trying to access the PHY while it is still +in reset. The PHY reset is initiated by the core reset which is +triggered just above, but remains asserted for 5ms after the core is +reset according to the documentation. + +The MgtRdy bit could also be waited for, but unfortunately when using +7-series devices, the bit does not appear to work as documented (it +seems to behave as some sort of link state indication and not just an +indication the transceiver is ready) so it can't really be relied on for +this purpose. + +Fixes: 8a3b7a252dca9 ("drivers/net/ethernet/xilinx: added Xilinx AXI Ethernet driver") +Signed-off-by: Robert Hancock +Reviewed-by: Andrew Lunn +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c ++++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +@@ -516,6 +516,16 @@ static int __axienet_device_reset(struct + return ret; + } + ++ /* Wait for PhyRstCmplt bit to be set, indicating the PHY reset has finished */ ++ ret = read_poll_timeout(axienet_ior, value, ++ value & XAE_INT_PHYRSTCMPLT_MASK, ++ DELAY_OF_ONE_MILLISEC, 50000, false, lp, ++ XAE_IS_OFFSET); ++ if (ret) { ++ dev_err(lp->dev, "%s: timeout waiting for PhyRstCmplt\n", __func__); ++ return ret; ++ } ++ + return 0; + } + diff --git a/queue-5.10/net-fsl-xgmac_mdio-add-workaround-for-erratum-a-009885.patch b/queue-5.10/net-fsl-xgmac_mdio-add-workaround-for-erratum-a-009885.patch new file mode 100644 index 00000000000..7a22b5bfd50 --- /dev/null +++ b/queue-5.10/net-fsl-xgmac_mdio-add-workaround-for-erratum-a-009885.patch @@ -0,0 +1,98 @@ +From 6198c722019774d38018457a8bfb9ba3ed8c931e Mon Sep 17 00:00:00 2001 +From: Tobias Waldekranz +Date: Tue, 18 Jan 2022 22:50:50 +0100 +Subject: net/fsl: xgmac_mdio: Add workaround for erratum A-009885 + +From: Tobias Waldekranz + +commit 6198c722019774d38018457a8bfb9ba3ed8c931e upstream. + +Once an MDIO read transaction is initiated, we must read back the data +register within 16 MDC cycles after the transaction completes. Outside +of this window, reads may return corrupt data. + +Therefore, disable local interrupts in the critical section, to +maximize the probability that we can satisfy this requirement. + +Fixes: d55ad2967d89 ("powerpc/mpc85xx: Create dts components for the FSL QorIQ DPAA FMan") +Signed-off-by: Tobias Waldekranz +Reviewed-by: Andrew Lunn +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/freescale/xgmac_mdio.c | 25 +++++++++++++++++++------ + 1 file changed, 19 insertions(+), 6 deletions(-) + +--- a/drivers/net/ethernet/freescale/xgmac_mdio.c ++++ b/drivers/net/ethernet/freescale/xgmac_mdio.c +@@ -49,6 +49,7 @@ struct tgec_mdio_controller { + struct mdio_fsl_priv { + struct tgec_mdio_controller __iomem *mdio_base; + bool is_little_endian; ++ bool has_a009885; + bool has_a011043; + }; + +@@ -184,10 +185,10 @@ static int xgmac_mdio_read(struct mii_bu + { + struct mdio_fsl_priv *priv = (struct mdio_fsl_priv *)bus->priv; + struct tgec_mdio_controller __iomem *regs = priv->mdio_base; ++ unsigned long flags; + uint16_t dev_addr; + uint32_t mdio_stat; + uint32_t mdio_ctl; +- uint16_t value; + int ret; + bool endian = priv->is_little_endian; + +@@ -219,12 +220,18 @@ static int xgmac_mdio_read(struct mii_bu + return ret; + } + ++ if (priv->has_a009885) ++ /* Once the operation completes, i.e. MDIO_STAT_BSY clears, we ++ * must read back the data register within 16 MDC cycles. ++ */ ++ local_irq_save(flags); ++ + /* Initiate the read */ + xgmac_write32(mdio_ctl | MDIO_CTL_READ, ®s->mdio_ctl, endian); + + ret = xgmac_wait_until_done(&bus->dev, regs, endian); + if (ret) +- return ret; ++ goto irq_restore; + + /* Return all Fs if nothing was there */ + if ((xgmac_read32(®s->mdio_stat, endian) & MDIO_STAT_RD_ER) && +@@ -232,13 +239,17 @@ static int xgmac_mdio_read(struct mii_bu + dev_dbg(&bus->dev, + "Error while reading PHY%d reg at %d.%hhu\n", + phy_id, dev_addr, regnum); +- return 0xffff; ++ ret = 0xffff; ++ } else { ++ ret = xgmac_read32(®s->mdio_data, endian) & 0xffff; ++ dev_dbg(&bus->dev, "read %04x\n", ret); + } + +- value = xgmac_read32(®s->mdio_data, endian) & 0xffff; +- dev_dbg(&bus->dev, "read %04x\n", value); ++irq_restore: ++ if (priv->has_a009885) ++ local_irq_restore(flags); + +- return value; ++ return ret; + } + + static int xgmac_mdio_probe(struct platform_device *pdev) +@@ -282,6 +293,8 @@ static int xgmac_mdio_probe(struct platf + priv->is_little_endian = device_property_read_bool(&pdev->dev, + "little-endian"); + ++ priv->has_a009885 = device_property_read_bool(&pdev->dev, ++ "fsl,erratum-a009885"); + priv->has_a011043 = device_property_read_bool(&pdev->dev, + "fsl,erratum-a011043"); + diff --git a/queue-5.10/net-fsl-xgmac_mdio-fix-incorrect-iounmap-when-removing-module.patch b/queue-5.10/net-fsl-xgmac_mdio-fix-incorrect-iounmap-when-removing-module.patch new file mode 100644 index 00000000000..c219264e2a9 --- /dev/null +++ b/queue-5.10/net-fsl-xgmac_mdio-fix-incorrect-iounmap-when-removing-module.patch @@ -0,0 +1,36 @@ +From 3f7c239c7844d2044ed399399d97a5f1c6008e1b Mon Sep 17 00:00:00 2001 +From: Tobias Waldekranz +Date: Tue, 18 Jan 2022 22:50:53 +0100 +Subject: net/fsl: xgmac_mdio: Fix incorrect iounmap when removing module + +From: Tobias Waldekranz + +commit 3f7c239c7844d2044ed399399d97a5f1c6008e1b upstream. + +As reported by sparse: In the remove path, the driver would attempt to +unmap its own priv pointer - instead of the io memory that it mapped +in probe. + +Fixes: 9f35a7342cff ("net/fsl: introduce Freescale 10G MDIO driver") +Signed-off-by: Tobias Waldekranz +Reviewed-by: Andrew Lunn +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/freescale/xgmac_mdio.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/net/ethernet/freescale/xgmac_mdio.c ++++ b/drivers/net/ethernet/freescale/xgmac_mdio.c +@@ -320,9 +320,10 @@ err_ioremap: + static int xgmac_mdio_remove(struct platform_device *pdev) + { + struct mii_bus *bus = platform_get_drvdata(pdev); ++ struct mdio_fsl_priv *priv = bus->priv; + + mdiobus_unregister(bus); +- iounmap(bus->priv); ++ iounmap(priv->mdio_base); + mdiobus_free(bus); + + return 0; diff --git a/queue-5.10/net-smc-fix-hung_task-when-removing-smc-r-devices.patch b/queue-5.10/net-smc-fix-hung_task-when-removing-smc-r-devices.patch new file mode 100644 index 00000000000..1d48996e268 --- /dev/null +++ b/queue-5.10/net-smc-fix-hung_task-when-removing-smc-r-devices.patch @@ -0,0 +1,78 @@ +From 56d99e81ecbc997a5f984684d0eeb583992b2072 Mon Sep 17 00:00:00 2001 +From: Wen Gu +Date: Sun, 16 Jan 2022 15:43:42 +0800 +Subject: net/smc: Fix hung_task when removing SMC-R devices + +From: Wen Gu + +commit 56d99e81ecbc997a5f984684d0eeb583992b2072 upstream. + +A hung_task is observed when removing SMC-R devices. Suppose that +a link group has two active links(lnk_A, lnk_B) associated with two +different SMC-R devices(dev_A, dev_B). When dev_A is removed, the +link group will be removed from smc_lgr_list and added into +lgr_linkdown_list. lnk_A will be cleared and smcibdev(A)->lnk_cnt +will reach to zero. However, when dev_B is removed then, the link +group can't be found in smc_lgr_list and lnk_B won't be cleared, +making smcibdev->lnk_cnt never reaches zero, which causes a hung_task. + +This patch fixes this issue by restoring the implementation of +smc_smcr_terminate_all() to what it was before commit 349d43127dac +("net/smc: fix kernel panic caused by race of smc_sock"). The original +implementation also satisfies the intention that make sure QP destroy +earlier than CQ destroy because we will always wait for smcibdev->lnk_cnt +reaches zero, which guarantees QP has been destroyed. + +Fixes: 349d43127dac ("net/smc: fix kernel panic caused by race of smc_sock") +Signed-off-by: Wen Gu +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/smc/smc_core.c | 17 +---------------- + 1 file changed, 1 insertion(+), 16 deletions(-) + +--- a/net/smc/smc_core.c ++++ b/net/smc/smc_core.c +@@ -1002,16 +1002,11 @@ void smc_smcd_terminate_all(struct smcd_ + /* Called when an SMCR device is removed or the smc module is unloaded. + * If smcibdev is given, all SMCR link groups using this device are terminated. + * If smcibdev is NULL, all SMCR link groups are terminated. +- * +- * We must wait here for QPs been destroyed before we destroy the CQs, +- * or we won't received any CQEs and cdc_pend_tx_wr cannot reach 0 thus +- * smc_sock cannot be released. + */ + void smc_smcr_terminate_all(struct smc_ib_device *smcibdev) + { + struct smc_link_group *lgr, *lg; + LIST_HEAD(lgr_free_list); +- LIST_HEAD(lgr_linkdown_list); + int i; + + spin_lock_bh(&smc_lgr_list.lock); +@@ -1023,7 +1018,7 @@ void smc_smcr_terminate_all(struct smc_i + list_for_each_entry_safe(lgr, lg, &smc_lgr_list.list, list) { + for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { + if (lgr->lnk[i].smcibdev == smcibdev) +- list_move_tail(&lgr->list, &lgr_linkdown_list); ++ smcr_link_down_cond_sched(&lgr->lnk[i]); + } + } + } +@@ -1035,16 +1030,6 @@ void smc_smcr_terminate_all(struct smc_i + __smc_lgr_terminate(lgr, false); + } + +- list_for_each_entry_safe(lgr, lg, &lgr_linkdown_list, list) { +- for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { +- if (lgr->lnk[i].smcibdev == smcibdev) { +- mutex_lock(&lgr->llc_conf_mutex); +- smcr_link_down_cond(&lgr->lnk[i]); +- mutex_unlock(&lgr->llc_conf_mutex); +- } +- } +- } +- + if (smcibdev) { + if (atomic_read(&smcibdev->lnk_cnt)) + wait_event(smcibdev->lnks_deleted, diff --git a/queue-5.10/parisc-pdc_stable-fix-memory-leak-in-pdcs_register_pathentries.patch b/queue-5.10/parisc-pdc_stable-fix-memory-leak-in-pdcs_register_pathentries.patch new file mode 100644 index 00000000000..6b7b6274ab0 --- /dev/null +++ b/queue-5.10/parisc-pdc_stable-fix-memory-leak-in-pdcs_register_pathentries.patch @@ -0,0 +1,42 @@ +From d24846a4246b6e61ecbd036880a4adf61681d241 Mon Sep 17 00:00:00 2001 +From: Miaoqian Lin +Date: Thu, 20 Jan 2022 12:18:12 +0000 +Subject: parisc: pdc_stable: Fix memory leak in pdcs_register_pathentries +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Miaoqian Lin + +commit d24846a4246b6e61ecbd036880a4adf61681d241 upstream. + +kobject_init_and_add() takes reference even when it fails. +According to the doc of kobject_init_and_add(): + + If this function returns an error, kobject_put() must be called to + properly clean up the memory associated with the object. + +Fix memory leak by calling kobject_put(). + +Fixes: 73f368cf679b ("Kobject: change drivers/parisc/pdc_stable.c to use kobject_init_and_add") +Signed-off-by: Miaoqian Lin +Signed-off-by: Helge Deller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/parisc/pdc_stable.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/drivers/parisc/pdc_stable.c ++++ b/drivers/parisc/pdc_stable.c +@@ -979,8 +979,10 @@ pdcs_register_pathentries(void) + entry->kobj.kset = paths_kset; + err = kobject_init_and_add(&entry->kobj, &ktype_pdcspath, NULL, + "%s", entry->name); +- if (err) ++ if (err) { ++ kobject_put(&entry->kobj); + return err; ++ } + + /* kobject is now registered */ + write_lock(&entry->rw_lock); diff --git a/queue-5.10/perf-evsel-override-attr-sample_period-for-non-libpfm4-events.patch b/queue-5.10/perf-evsel-override-attr-sample_period-for-non-libpfm4-events.patch new file mode 100644 index 00000000000..2c018750276 --- /dev/null +++ b/queue-5.10/perf-evsel-override-attr-sample_period-for-non-libpfm4-events.patch @@ -0,0 +1,89 @@ +From 3606c0e1a1050d397ad759a62607e419fd8b0ccb Mon Sep 17 00:00:00 2001 +From: German Gomez +Date: Tue, 18 Jan 2022 14:40:54 +0000 +Subject: perf evsel: Override attr->sample_period for non-libpfm4 events +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: German Gomez + +commit 3606c0e1a1050d397ad759a62607e419fd8b0ccb upstream. + +A previous patch preventing "attr->sample_period" values from being +overridden in pfm events changed a related behaviour in arm-spe. + +Before said patch: + + perf record -c 10000 -e arm_spe_0// -- sleep 1 + +Would yield an SPE event with period=10000. After the patch, the period +in "-c 10000" was being ignored because the arm-spe code initializes +sample_period to a non-zero value. + +This patch restores the previous behaviour for non-libpfm4 events. + +Fixes: ae5dcc8abe31 (“perf record: Prevent override of attr->sample_period for libpfm4 events”) +Reported-by: Chase Conklin +Signed-off-by: German Gomez +Cc: Alexander Shishkin +Cc: Ian Rogers +Cc: Jiri Olsa +Cc: John Fastabend +Cc: KP Singh +Cc: Mark Rutland +Cc: Martin KaFai Lau +Cc: Namhyung Kim +Cc: Song Liu +Cc: Stephane Eranian +Cc: Yonghong Song +Cc: bpf@vger.kernel.org +Cc: netdev@vger.kernel.org +Link: http://lore.kernel.org/lkml/20220118144054.2541-1-german.gomez@arm.com +Signed-off-by: Arnaldo Carvalho de Melo +Signed-off-by: Greg Kroah-Hartman +--- + tools/perf/util/evsel.c | 25 +++++++++++++++++-------- + 1 file changed, 17 insertions(+), 8 deletions(-) + +--- a/tools/perf/util/evsel.c ++++ b/tools/perf/util/evsel.c +@@ -1014,6 +1014,17 @@ struct evsel_config_term *__evsel__get_c + return found_term; + } + ++static void evsel__set_default_freq_period(struct record_opts *opts, ++ struct perf_event_attr *attr) ++{ ++ if (opts->freq) { ++ attr->freq = 1; ++ attr->sample_freq = opts->freq; ++ } else { ++ attr->sample_period = opts->default_interval; ++ } ++} ++ + /* + * The enable_on_exec/disabled value strategy: + * +@@ -1080,14 +1091,12 @@ void evsel__config(struct evsel *evsel, + * We default some events to have a default interval. But keep + * it a weak assumption overridable by the user. + */ +- if (!attr->sample_period) { +- if (opts->freq) { +- attr->freq = 1; +- attr->sample_freq = opts->freq; +- } else { +- attr->sample_period = opts->default_interval; +- } +- } ++ if ((evsel->is_libpfm_event && !attr->sample_period) || ++ (!evsel->is_libpfm_event && (!attr->sample_period || ++ opts->user_freq != UINT_MAX || ++ opts->user_interval != ULLONG_MAX))) ++ evsel__set_default_freq_period(opts, attr); ++ + /* + * If attr->freq was set (here or earlier), ask for period + * to be sampled. diff --git a/queue-5.10/series b/queue-5.10/series index 3f15eebb00d..e4627c7eac6 100644 --- a/queue-5.10/series +++ b/queue-5.10/series @@ -503,3 +503,21 @@ revert-net-mlx5-add-retry-mechanism-to-the-command-entry-index-allocation.patch powerpc-cell-fix-clang-wimplicit-fallthrough-warning.patch powerpc-fsl-dts-enable-wa-for-erratum-a-009885-on-fman3l-mdio-buses.patch block-fix-fsync-always-failed-if-once-failed.patch +arm64-bpf-remove-128mb-limit-for-bpf-jit-programs.patch +bpftool-remove-inclusion-of-utilities.mak-from-makefiles.patch +xdp-check-prog-type-before-updating-bpf-link.patch +perf-evsel-override-attr-sample_period-for-non-libpfm4-events.patch +ipv4-update-fib_info_cnt-under-spinlock-protection.patch +ipv4-avoid-quadratic-behavior-in-netns-dismantle.patch +net-fsl-xgmac_mdio-add-workaround-for-erratum-a-009885.patch +net-fsl-xgmac_mdio-fix-incorrect-iounmap-when-removing-module.patch +parisc-pdc_stable-fix-memory-leak-in-pdcs_register_pathentries.patch +f2fs-compress-fix-potential-deadlock-of-compress-file.patch +f2fs-fix-to-reserve-space-for-io-align-feature.patch +af_unix-annote-lockless-accesses-to-unix_tot_inflight-gc_in_progress.patch +clk-emit-a-stern-warning-with-writable-debugfs-enabled.patch +clk-si5341-fix-clock-hw-provider-cleanup.patch +arm-dts-gpio-ranges-property-is-now-required.patch +net-smc-fix-hung_task-when-removing-smc-r-devices.patch +net-axienet-increase-reset-timeout.patch +net-axienet-wait-for-phyrstcmplt-after-core-reset.patch diff --git a/queue-5.10/xdp-check-prog-type-before-updating-bpf-link.patch b/queue-5.10/xdp-check-prog-type-before-updating-bpf-link.patch new file mode 100644 index 00000000000..de0a946d819 --- /dev/null +++ b/queue-5.10/xdp-check-prog-type-before-updating-bpf-link.patch @@ -0,0 +1,46 @@ +From 382778edc8262b7535f00523e9eb22edba1b9816 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= +Date: Fri, 7 Jan 2022 23:11:13 +0100 +Subject: xdp: check prog type before updating BPF link +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Toke Høiland-Jørgensen + +commit 382778edc8262b7535f00523e9eb22edba1b9816 upstream. + +The bpf_xdp_link_update() function didn't check the program type before +updating the program, which made it possible to install any program type as +an XDP program, which is obviously not good. Syzbot managed to trigger this +by swapping in an LWT program on the XDP hook which would crash in a helper +call. + +Fix this by adding a check and bailing out if the types don't match. + +Fixes: 026a4c28e1db ("bpf, xdp: Implement LINK_UPDATE for BPF XDP link") +Reported-by: syzbot+983941aa85af6ded1fd9@syzkaller.appspotmail.com +Acked-by: Andrii Nakryiko +Signed-off-by: Toke Høiland-Jørgensen +Link: https://lore.kernel.org/r/20220107221115.326171-1-toke@redhat.com +Signed-off-by: Alexei Starovoitov +Signed-off-by: Greg Kroah-Hartman +--- + net/core/dev.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -9339,6 +9339,12 @@ static int bpf_xdp_link_update(struct bp + goto out_unlock; + } + old_prog = link->prog; ++ if (old_prog->type != new_prog->type || ++ old_prog->expected_attach_type != new_prog->expected_attach_type) { ++ err = -EINVAL; ++ goto out_unlock; ++ } ++ + if (old_prog == new_prog) { + /* no-op, don't disturb drivers */ + bpf_prog_put(new_prog);