From a0bd18985c85089c6e1219583c99a47febf8245f Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Sun, 23 Jul 2023 21:25:09 -0400 Subject: [PATCH] Fixes for 5.4 Signed-off-by: Sasha Levin --- .../arm64-mm-fix-va-range-sanity-check.patch | 106 +++++++++ ...address-kcsan-report-on-bpf_lru_list.patch | 177 ++++++++++++++ ...heck-debug_objects_enabled-before-re.patch | 74 ++++++ ...devlink_port_type_warn-source-device.patch | 77 +++++++ ...ix-missing-irq-check-in-au1200fb_drv.patch | 40 ++++ ...warn-about-invalid-left-right-margin.patch | 43 ++++ ...vf-fix-use-after-free-in-free_netdev.patch | 215 ++++++++++++++++++ ...ix-igb_down-hung-on-surprise-removal.patch | 89 ++++++++ ...on-t-drop-packet-from-non-root-netns.patch | 50 ++++ ...uption-for-raid456-when-reshape-rest.patch | 60 +++++ ...event-soft-lockup-while-flush-writes.patch | 79 +++++++ ...mum-limit-of-allocated-index-in-nbd_.patch | 41 ++++ ...cpsw_ale-fix-cpsw_ale_get_field-cpsw.patch | 78 +++++++ ...ipv6-check-return-value-of-pskb_trim.patch | 39 ++++ ...limit-of-tcp_linger2-with-tcp_fin_ti.patch | 73 ++++++ ...les-can-t-schedule-in-nft_chain_vali.patch | 64 ++++++ ...les-fix-spurious-set-element-inserti.patch | 49 ++++ ...amd_pinconf_set-for-all-config-optio.patch | 108 +++++++++ ...ure-timer-id-search-loop-limit-is-va.patch | 115 ++++++++++ ...-the-lookup-process-failing-to-get-s.patch | 113 +++++++++ ...-balance-task-to-its-current-running.patch | 96 ++++++++ queue-5.4/series | 29 +++ .../spi-bcm63xx-fix-max-prepend-length.patch | 47 ++++ ...data-races-around-fastopenq.max_qlen.patch | 77 +++++++ ...-data-races-around-rskq_defer_accept.patch | 53 +++++ ...nnotate-data-races-around-tp-linger2.patch | 52 +++++ ...e-data-races-around-tp-notsent_lowat.patch | 64 ++++++ ...te-data-races-around-tp-tcp_tx_delay.patch | 46 ++++ ...mvm-avoid-baid-size-integer-overflow.patch | 47 ++++ ...ix-wstringop-overflow-warning-in-ioc.patch | 71 ++++++ 30 files changed, 2272 insertions(+) create mode 100644 queue-5.4/arm64-mm-fix-va-range-sanity-check.patch create mode 100644 queue-5.4/bpf-address-kcsan-report-on-bpf_lru_list.patch create mode 100644 queue-5.4/debugobjects-recheck-debug_objects_enabled-before-re.patch create mode 100644 queue-5.4/devlink-report-devlink_port_type_warn-source-device.patch create mode 100644 queue-5.4/fbdev-au1200fb-fix-missing-irq-check-in-au1200fb_drv.patch create mode 100644 queue-5.4/fbdev-imxfb-warn-about-invalid-left-right-margin.patch create mode 100644 queue-5.4/iavf-fix-use-after-free-in-free_netdev.patch create mode 100644 queue-5.4/igb-fix-igb_down-hung-on-surprise-removal.patch create mode 100644 queue-5.4/llc-don-t-drop-packet-from-non-root-netns.patch create mode 100644 queue-5.4/md-fix-data-corruption-for-raid456-when-reshape-rest.patch create mode 100644 queue-5.4/md-raid10-prevent-soft-lockup-while-flush-writes.patch create mode 100644 queue-5.4/nbd-add-the-maximum-limit-of-allocated-index-in-nbd_.patch create mode 100644 queue-5.4/net-ethernet-ti-cpsw_ale-fix-cpsw_ale_get_field-cpsw.patch create mode 100644 queue-5.4/net-ipv6-check-return-value-of-pskb_trim.patch create mode 100644 queue-5.4/net-replace-the-limit-of-tcp_linger2-with-tcp_fin_ti.patch create mode 100644 queue-5.4/netfilter-nf_tables-can-t-schedule-in-nft_chain_vali.patch create mode 100644 queue-5.4/netfilter-nf_tables-fix-spurious-set-element-inserti.patch create mode 100644 queue-5.4/pinctrl-amd-use-amd_pinconf_set-for-all-config-optio.patch create mode 100644 queue-5.4/posix-timers-ensure-timer-id-search-loop-limit-is-va.patch create mode 100644 queue-5.4/revert-tcp-avoid-the-lookup-process-failing-to-get-s.patch create mode 100644 queue-5.4/sched-fair-don-t-balance-task-to-its-current-running.patch create mode 100644 queue-5.4/spi-bcm63xx-fix-max-prepend-length.patch create mode 100644 queue-5.4/tcp-annotate-data-races-around-fastopenq.max_qlen.patch create mode 100644 queue-5.4/tcp-annotate-data-races-around-rskq_defer_accept.patch create mode 100644 queue-5.4/tcp-annotate-data-races-around-tp-linger2.patch create mode 100644 queue-5.4/tcp-annotate-data-races-around-tp-notsent_lowat.patch create mode 100644 queue-5.4/tcp-annotate-data-races-around-tp-tcp_tx_delay.patch create mode 100644 queue-5.4/wifi-iwlwifi-mvm-avoid-baid-size-integer-overflow.patch create mode 100644 queue-5.4/wifi-wext-core-fix-wstringop-overflow-warning-in-ioc.patch diff --git a/queue-5.4/arm64-mm-fix-va-range-sanity-check.patch b/queue-5.4/arm64-mm-fix-va-range-sanity-check.patch new file mode 100644 index 00000000000..f9104cf0bdd --- /dev/null +++ b/queue-5.4/arm64-mm-fix-va-range-sanity-check.patch @@ -0,0 +1,106 @@ +From 8de8bcd7f0e1ec75648551ace9fedfc854315920 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 15 Jun 2023 11:26:28 +0100 +Subject: arm64: mm: fix VA-range sanity check + +From: Mark Rutland + +[ Upstream commit ab9b4008092c86dc12497af155a0901cc1156999 ] + +Both create_mapping_noalloc() and update_mapping_prot() sanity-check +their 'virt' parameter, but the check itself doesn't make much sense. +The condition used today appears to be a historical accident. + +The sanity-check condition: + + if ((virt >= PAGE_END) && (virt < VMALLOC_START)) { + [ ... warning here ... ] + return; + } + +... can only be true for the KASAN shadow region or the module region, +and there's no reason to exclude these specifically for creating and +updateing mappings. + +When arm64 support was first upstreamed in commit: + + c1cc1552616d0f35 ("arm64: MMU initialisation") + +... the condition was: + + if (virt < VMALLOC_START) { + [ ... warning here ... ] + return; + } + +At the time, VMALLOC_START was the lowest kernel address, and this was +checking whether 'virt' would be translated via TTBR1. + +Subsequently in commit: + + 14c127c957c1c607 ("arm64: mm: Flip kernel VA space") + +... the condition was changed to: + + if ((virt >= VA_START) && (virt < VMALLOC_START)) { + [ ... warning here ... ] + return; + } + +This appear to have been a thinko. The commit moved the linear map to +the bottom of the kernel address space, with VMALLOC_START being at the +halfway point. The old condition would warn for changes to the linear +map below this, and at the time VA_START was the end of the linear map. + +Subsequently we cleaned up the naming of VA_START in commit: + + 77ad4ce69321abbe ("arm64: memory: rename VA_START to PAGE_END") + +... keeping the erroneous condition as: + + if ((virt >= PAGE_END) && (virt < VMALLOC_START)) { + [ ... warning here ... ] + return; + } + +Correct the condition to check against the start of the TTBR1 address +space, which is currently PAGE_OFFSET. This simplifies the logic, and +more clearly matches the "outside kernel range" message in the warning. + +Signed-off-by: Mark Rutland +Cc: Russell King +Cc: Steve Capper +Cc: Will Deacon +Reviewed-by: Russell King (Oracle) +Link: https://lore.kernel.org/r/20230615102628.1052103-1-mark.rutland@arm.com +Signed-off-by: Catalin Marinas +Signed-off-by: Sasha Levin +--- + arch/arm64/mm/mmu.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c +index 5cf575f23af28..8e934bb44f12e 100644 +--- a/arch/arm64/mm/mmu.c ++++ b/arch/arm64/mm/mmu.c +@@ -399,7 +399,7 @@ static phys_addr_t pgd_pgtable_alloc(int shift) + static void __init create_mapping_noalloc(phys_addr_t phys, unsigned long virt, + phys_addr_t size, pgprot_t prot) + { +- if ((virt >= PAGE_END) && (virt < VMALLOC_START)) { ++ if (virt < PAGE_OFFSET) { + pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n", + &phys, virt); + return; +@@ -426,7 +426,7 @@ void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, + static void update_mapping_prot(phys_addr_t phys, unsigned long virt, + phys_addr_t size, pgprot_t prot) + { +- if ((virt >= PAGE_END) && (virt < VMALLOC_START)) { ++ if (virt < PAGE_OFFSET) { + pr_warn("BUG: not updating mapping for %pa at 0x%016lx - outside kernel range\n", + &phys, virt); + return; +-- +2.39.2 + diff --git a/queue-5.4/bpf-address-kcsan-report-on-bpf_lru_list.patch b/queue-5.4/bpf-address-kcsan-report-on-bpf_lru_list.patch new file mode 100644 index 00000000000..edfcd986240 --- /dev/null +++ b/queue-5.4/bpf-address-kcsan-report-on-bpf_lru_list.patch @@ -0,0 +1,177 @@ +From 1e5428d5217a30952df66f845de9a533929933fd Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 10 May 2023 21:37:48 -0700 +Subject: bpf: Address KCSAN report on bpf_lru_list + +From: Martin KaFai Lau + +[ Upstream commit ee9fd0ac3017c4313be91a220a9ac4c99dde7ad4 ] + +KCSAN reported a data-race when accessing node->ref. +Although node->ref does not have to be accurate, +take this chance to use a more common READ_ONCE() and WRITE_ONCE() +pattern instead of data_race(). + +There is an existing bpf_lru_node_is_ref() and bpf_lru_node_set_ref(). +This patch also adds bpf_lru_node_clear_ref() to do the +WRITE_ONCE(node->ref, 0) also. + +================================================================== +BUG: KCSAN: data-race in __bpf_lru_list_rotate / __htab_lru_percpu_map_update_elem + +write to 0xffff888137038deb of 1 bytes by task 11240 on cpu 1: +__bpf_lru_node_move kernel/bpf/bpf_lru_list.c:113 [inline] +__bpf_lru_list_rotate_active kernel/bpf/bpf_lru_list.c:149 [inline] +__bpf_lru_list_rotate+0x1bf/0x750 kernel/bpf/bpf_lru_list.c:240 +bpf_lru_list_pop_free_to_local kernel/bpf/bpf_lru_list.c:329 [inline] +bpf_common_lru_pop_free kernel/bpf/bpf_lru_list.c:447 [inline] +bpf_lru_pop_free+0x638/0xe20 kernel/bpf/bpf_lru_list.c:499 +prealloc_lru_pop kernel/bpf/hashtab.c:290 [inline] +__htab_lru_percpu_map_update_elem+0xe7/0x820 kernel/bpf/hashtab.c:1316 +bpf_percpu_hash_update+0x5e/0x90 kernel/bpf/hashtab.c:2313 +bpf_map_update_value+0x2a9/0x370 kernel/bpf/syscall.c:200 +generic_map_update_batch+0x3ae/0x4f0 kernel/bpf/syscall.c:1687 +bpf_map_do_batch+0x2d9/0x3d0 kernel/bpf/syscall.c:4534 +__sys_bpf+0x338/0x810 +__do_sys_bpf kernel/bpf/syscall.c:5096 [inline] +__se_sys_bpf kernel/bpf/syscall.c:5094 [inline] +__x64_sys_bpf+0x43/0x50 kernel/bpf/syscall.c:5094 +do_syscall_x64 arch/x86/entry/common.c:50 [inline] +do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 +entry_SYSCALL_64_after_hwframe+0x63/0xcd + +read to 0xffff888137038deb of 1 bytes by task 11241 on cpu 0: +bpf_lru_node_set_ref kernel/bpf/bpf_lru_list.h:70 [inline] +__htab_lru_percpu_map_update_elem+0x2f1/0x820 kernel/bpf/hashtab.c:1332 +bpf_percpu_hash_update+0x5e/0x90 kernel/bpf/hashtab.c:2313 +bpf_map_update_value+0x2a9/0x370 kernel/bpf/syscall.c:200 +generic_map_update_batch+0x3ae/0x4f0 kernel/bpf/syscall.c:1687 +bpf_map_do_batch+0x2d9/0x3d0 kernel/bpf/syscall.c:4534 +__sys_bpf+0x338/0x810 +__do_sys_bpf kernel/bpf/syscall.c:5096 [inline] +__se_sys_bpf kernel/bpf/syscall.c:5094 [inline] +__x64_sys_bpf+0x43/0x50 kernel/bpf/syscall.c:5094 +do_syscall_x64 arch/x86/entry/common.c:50 [inline] +do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 +entry_SYSCALL_64_after_hwframe+0x63/0xcd + +value changed: 0x01 -> 0x00 + +Reported by Kernel Concurrency Sanitizer on: +CPU: 0 PID: 11241 Comm: syz-executor.3 Not tainted 6.3.0-rc7-syzkaller-00136-g6a66fdd29ea1 #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 03/30/2023 +================================================================== + +Reported-by: syzbot+ebe648a84e8784763f82@syzkaller.appspotmail.com +Signed-off-by: Martin KaFai Lau +Acked-by: Yonghong Song +Link: https://lore.kernel.org/r/20230511043748.1384166-1-martin.lau@linux.dev +Signed-off-by: Alexei Starovoitov +Signed-off-by: Sasha Levin +--- + kernel/bpf/bpf_lru_list.c | 21 +++++++++++++-------- + kernel/bpf/bpf_lru_list.h | 7 ++----- + 2 files changed, 15 insertions(+), 13 deletions(-) + +diff --git a/kernel/bpf/bpf_lru_list.c b/kernel/bpf/bpf_lru_list.c +index d99e89f113c43..3dabdd137d102 100644 +--- a/kernel/bpf/bpf_lru_list.c ++++ b/kernel/bpf/bpf_lru_list.c +@@ -41,7 +41,12 @@ static struct list_head *local_pending_list(struct bpf_lru_locallist *loc_l) + /* bpf_lru_node helpers */ + static bool bpf_lru_node_is_ref(const struct bpf_lru_node *node) + { +- return node->ref; ++ return READ_ONCE(node->ref); ++} ++ ++static void bpf_lru_node_clear_ref(struct bpf_lru_node *node) ++{ ++ WRITE_ONCE(node->ref, 0); + } + + static void bpf_lru_list_count_inc(struct bpf_lru_list *l, +@@ -89,7 +94,7 @@ static void __bpf_lru_node_move_in(struct bpf_lru_list *l, + + bpf_lru_list_count_inc(l, tgt_type); + node->type = tgt_type; +- node->ref = 0; ++ bpf_lru_node_clear_ref(node); + list_move(&node->list, &l->lists[tgt_type]); + } + +@@ -110,7 +115,7 @@ static void __bpf_lru_node_move(struct bpf_lru_list *l, + bpf_lru_list_count_inc(l, tgt_type); + node->type = tgt_type; + } +- node->ref = 0; ++ bpf_lru_node_clear_ref(node); + + /* If the moving node is the next_inactive_rotation candidate, + * move the next_inactive_rotation pointer also. +@@ -353,7 +358,7 @@ static void __local_list_add_pending(struct bpf_lru *lru, + *(u32 *)((void *)node + lru->hash_offset) = hash; + node->cpu = cpu; + node->type = BPF_LRU_LOCAL_LIST_T_PENDING; +- node->ref = 0; ++ bpf_lru_node_clear_ref(node); + list_add(&node->list, local_pending_list(loc_l)); + } + +@@ -419,7 +424,7 @@ static struct bpf_lru_node *bpf_percpu_lru_pop_free(struct bpf_lru *lru, + if (!list_empty(free_list)) { + node = list_first_entry(free_list, struct bpf_lru_node, list); + *(u32 *)((void *)node + lru->hash_offset) = hash; +- node->ref = 0; ++ bpf_lru_node_clear_ref(node); + __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_INACTIVE); + } + +@@ -522,7 +527,7 @@ static void bpf_common_lru_push_free(struct bpf_lru *lru, + } + + node->type = BPF_LRU_LOCAL_LIST_T_FREE; +- node->ref = 0; ++ bpf_lru_node_clear_ref(node); + list_move(&node->list, local_free_list(loc_l)); + + raw_spin_unlock_irqrestore(&loc_l->lock, flags); +@@ -568,7 +573,7 @@ static void bpf_common_lru_populate(struct bpf_lru *lru, void *buf, + + node = (struct bpf_lru_node *)(buf + node_offset); + node->type = BPF_LRU_LIST_T_FREE; +- node->ref = 0; ++ bpf_lru_node_clear_ref(node); + list_add(&node->list, &l->lists[BPF_LRU_LIST_T_FREE]); + buf += elem_size; + } +@@ -594,7 +599,7 @@ static void bpf_percpu_lru_populate(struct bpf_lru *lru, void *buf, + node = (struct bpf_lru_node *)(buf + node_offset); + node->cpu = cpu; + node->type = BPF_LRU_LIST_T_FREE; +- node->ref = 0; ++ bpf_lru_node_clear_ref(node); + list_add(&node->list, &l->lists[BPF_LRU_LIST_T_FREE]); + i++; + buf += elem_size; +diff --git a/kernel/bpf/bpf_lru_list.h b/kernel/bpf/bpf_lru_list.h +index f02504640e185..41f8fea530c8d 100644 +--- a/kernel/bpf/bpf_lru_list.h ++++ b/kernel/bpf/bpf_lru_list.h +@@ -63,11 +63,8 @@ struct bpf_lru { + + static inline void bpf_lru_node_set_ref(struct bpf_lru_node *node) + { +- /* ref is an approximation on access frequency. It does not +- * have to be very accurate. Hence, no protection is used. +- */ +- if (!node->ref) +- node->ref = 1; ++ if (!READ_ONCE(node->ref)) ++ WRITE_ONCE(node->ref, 1); + } + + int bpf_lru_init(struct bpf_lru *lru, bool percpu, u32 hash_offset, +-- +2.39.2 + diff --git a/queue-5.4/debugobjects-recheck-debug_objects_enabled-before-re.patch b/queue-5.4/debugobjects-recheck-debug_objects_enabled-before-re.patch new file mode 100644 index 00000000000..12a9a23dbb2 --- /dev/null +++ b/queue-5.4/debugobjects-recheck-debug_objects_enabled-before-re.patch @@ -0,0 +1,74 @@ +From 3f9b05ff93ea80f06045912ae0c882684155cdb0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 7 Jun 2023 19:19:02 +0900 +Subject: debugobjects: Recheck debug_objects_enabled before reporting + +From: Tetsuo Handa + +[ Upstream commit 8b64d420fe2450f82848178506d3e3a0bd195539 ] + +syzbot is reporting false a positive ODEBUG message immediately after +ODEBUG was disabled due to OOM. + + [ 1062.309646][T22911] ODEBUG: Out of memory. ODEBUG disabled + [ 1062.886755][ T5171] ------------[ cut here ]------------ + [ 1062.892770][ T5171] ODEBUG: assert_init not available (active state 0) object: ffffc900056afb20 object type: timer_list hint: process_timeout+0x0/0x40 + + CPU 0 [ T5171] CPU 1 [T22911] + -------------- -------------- + debug_object_assert_init() { + if (!debug_objects_enabled) + return; + db = get_bucket(addr); + lookup_object_or_alloc() { + debug_objects_enabled = 0; + return NULL; + } + debug_objects_oom() { + pr_warn("Out of memory. ODEBUG disabled\n"); + // all buckets get emptied here, and + } + lookup_object_or_alloc(addr, db, descr, false, true) { + // this bucket is already empty. + return ERR_PTR(-ENOENT); + } + // Emits false positive warning. + debug_print_object(&o, "assert_init"); + } + +Recheck debug_object_enabled in debug_print_object() to avoid that. + +Reported-by: syzbot +Suggested-by: Thomas Gleixner +Signed-off-by: Tetsuo Handa +Signed-off-by: Thomas Gleixner +Link: https://lore.kernel.org/r/492fe2ae-5141-d548-ebd5-62f5fe2e57f7@I-love.SAKURA.ne.jp +Closes: https://syzkaller.appspot.com/bug?extid=7937ba6a50bdd00fffdf +Signed-off-by: Sasha Levin +--- + lib/debugobjects.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/lib/debugobjects.c b/lib/debugobjects.c +index 26fa04335537b..b0e4301d74954 100644 +--- a/lib/debugobjects.c ++++ b/lib/debugobjects.c +@@ -474,6 +474,15 @@ static void debug_print_object(struct debug_obj *obj, char *msg) + struct debug_obj_descr *descr = obj->descr; + static int limit; + ++ /* ++ * Don't report if lookup_object_or_alloc() by the current thread ++ * failed because lookup_object_or_alloc()/debug_objects_oom() by a ++ * concurrent thread turned off debug_objects_enabled and cleared ++ * the hash buckets. ++ */ ++ if (!debug_objects_enabled) ++ return; ++ + if (limit < 5 && descr != descr_test) { + void *hint = descr->debug_hint ? + descr->debug_hint(obj->object) : NULL; +-- +2.39.2 + diff --git a/queue-5.4/devlink-report-devlink_port_type_warn-source-device.patch b/queue-5.4/devlink-report-devlink_port_type_warn-source-device.patch new file mode 100644 index 00000000000..fa06ed629ab --- /dev/null +++ b/queue-5.4/devlink-report-devlink_port_type_warn-source-device.patch @@ -0,0 +1,77 @@ +From 634c51780e8327ad19a3169eb0cae02bd9ae033d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 15 Jun 2023 11:54:47 +0200 +Subject: devlink: report devlink_port_type_warn source device + +From: Petr Oros + +[ Upstream commit a52305a81d6bb74b90b400dfa56455d37872fe4b ] + +devlink_port_type_warn is scheduled for port devlink and warning +when the port type is not set. But from this warning it is not easy +found out which device (driver) has no devlink port set. + +[ 3709.975552] Type was not set for devlink port. +[ 3709.975579] WARNING: CPU: 1 PID: 13092 at net/devlink/leftover.c:6775 devlink_port_type_warn+0x11/0x20 +[ 3709.993967] Modules linked in: openvswitch nf_conncount nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 nfnetlink bluetooth rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace fscache netfs vhost_net vhost vhost_iotlb tap tun bridge stp llc qrtr intel_rapl_msr intel_rapl_common i10nm_edac nfit libnvdimm x86_pkg_temp_thermal mlx5_ib intel_powerclamp coretemp dell_wmi ledtrig_audio sparse_keymap ipmi_ssif kvm_intel ib_uverbs rfkill ib_core video kvm iTCO_wdt acpi_ipmi intel_vsec irqbypass ipmi_si iTCO_vendor_support dcdbas ipmi_devintf mei_me ipmi_msghandler rapl mei intel_cstate isst_if_mmio isst_if_mbox_pci dell_smbios intel_uncore isst_if_common i2c_i801 dell_wmi_descriptor wmi_bmof i2c_smbus intel_pch_thermal pcspkr acpi_power_meter xfs libcrc32c sd_mod sg nvme_tcp mgag200 i2c_algo_bit nvme_fabrics drm_shmem_helper drm_kms_helper nvme syscopyarea ahci sysfillrect sysimgblt nvme_core fb_sys_fops crct10dif_pclmul libahci mlx5_core sfc crc32_pclmul nvme_common drm +[ 3709.994030] crc32c_intel mtd t10_pi mlxfw libata tg3 mdio megaraid_sas psample ghash_clmulni_intel pci_hyperv_intf wmi dm_multipath sunrpc dm_mirror dm_region_hash dm_log dm_mod be2iscsi bnx2i cnic uio cxgb4i cxgb4 tls libcxgbi libcxgb qla4xxx iscsi_boot_sysfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi fuse +[ 3710.108431] CPU: 1 PID: 13092 Comm: kworker/1:1 Kdump: loaded Not tainted 5.14.0-319.el9.x86_64 #1 +[ 3710.108435] Hardware name: Dell Inc. PowerEdge R750/0PJ80M, BIOS 1.8.2 09/14/2022 +[ 3710.108437] Workqueue: events devlink_port_type_warn +[ 3710.108440] RIP: 0010:devlink_port_type_warn+0x11/0x20 +[ 3710.108443] Code: 84 76 fe ff ff 48 c7 03 20 0e 1a ad 31 c0 e9 96 fd ff ff 66 0f 1f 44 00 00 0f 1f 44 00 00 48 c7 c7 18 24 4e ad e8 ef 71 62 ff <0f> 0b c3 cc cc cc cc 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 f6 87 +[ 3710.108445] RSP: 0018:ff3b6d2e8b3c7e90 EFLAGS: 00010282 +[ 3710.108447] RAX: 0000000000000000 RBX: ff366d6580127080 RCX: 0000000000000027 +[ 3710.108448] RDX: 0000000000000027 RSI: 00000000ffff86de RDI: ff366d753f41f8c8 +[ 3710.108449] RBP: ff366d658ff5a0c0 R08: ff366d753f41f8c0 R09: ff3b6d2e8b3c7e18 +[ 3710.108450] R10: 0000000000000001 R11: 0000000000000023 R12: ff366d753f430600 +[ 3710.108451] R13: ff366d753f436900 R14: 0000000000000000 R15: ff366d753f436905 +[ 3710.108452] FS: 0000000000000000(0000) GS:ff366d753f400000(0000) knlGS:0000000000000000 +[ 3710.108453] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 3710.108454] CR2: 00007f1c57bc74e0 CR3: 000000111d26a001 CR4: 0000000000773ee0 +[ 3710.108456] PKRU: 55555554 +[ 3710.108457] Call Trace: +[ 3710.108458] +[ 3710.108459] process_one_work+0x1e2/0x3b0 +[ 3710.108466] ? rescuer_thread+0x390/0x390 +[ 3710.108468] worker_thread+0x50/0x3a0 +[ 3710.108471] ? rescuer_thread+0x390/0x390 +[ 3710.108473] kthread+0xdd/0x100 +[ 3710.108477] ? kthread_complete_and_exit+0x20/0x20 +[ 3710.108479] ret_from_fork+0x1f/0x30 +[ 3710.108485] +[ 3710.108486] ---[ end trace 1b4b23cd0c65d6a0 ]--- + +After patch: +[ 402.473064] ice 0000:41:00.0: Type was not set for devlink port. +[ 402.473064] ice 0000:41:00.1: Type was not set for devlink port. + +Signed-off-by: Petr Oros +Reviewed-by: Pavan Chebbi +Reviewed-by: Jakub Kicinski +Link: https://lore.kernel.org/r/20230615095447.8259-1-poros@redhat.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/core/devlink.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/net/core/devlink.c b/net/core/devlink.c +index 2dd354d869cd7..b4dabe5d89f72 100644 +--- a/net/core/devlink.c ++++ b/net/core/devlink.c +@@ -6299,7 +6299,10 @@ EXPORT_SYMBOL_GPL(devlink_free); + + static void devlink_port_type_warn(struct work_struct *work) + { +- WARN(true, "Type was not set for devlink port."); ++ struct devlink_port *port = container_of(to_delayed_work(work), ++ struct devlink_port, ++ type_warn_dw); ++ dev_warn(port->devlink->dev, "Type was not set for devlink port."); + } + + static bool devlink_port_type_should_warn(struct devlink_port *devlink_port) +-- +2.39.2 + diff --git a/queue-5.4/fbdev-au1200fb-fix-missing-irq-check-in-au1200fb_drv.patch b/queue-5.4/fbdev-au1200fb-fix-missing-irq-check-in-au1200fb_drv.patch new file mode 100644 index 00000000000..1c0e655e29d --- /dev/null +++ b/queue-5.4/fbdev-au1200fb-fix-missing-irq-check-in-au1200fb_drv.patch @@ -0,0 +1,40 @@ +From e25dfa5ac6e4c44a8de6036201e1be18c4ecf443 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 15 Jul 2023 16:16:56 +0800 +Subject: fbdev: au1200fb: Fix missing IRQ check in au1200fb_drv_probe + +From: Zhang Shurong + +[ Upstream commit 4e88761f5f8c7869f15a2046b1a1116f4fab4ac8 ] + +This func misses checking for platform_get_irq()'s call and may passes the +negative error codes to request_irq(), which takes unsigned IRQ #, +causing it to fail with -EINVAL, overriding an original error code. + +Fix this by stop calling request_irq() with invalid IRQ #s. + +Fixes: 1630d85a8312 ("au1200fb: fix hardcoded IRQ") +Signed-off-by: Zhang Shurong +Signed-off-by: Helge Deller +Signed-off-by: Sasha Levin +--- + drivers/video/fbdev/au1200fb.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/drivers/video/fbdev/au1200fb.c b/drivers/video/fbdev/au1200fb.c +index 43a4dddaafd52..d0335d4d5ab54 100644 +--- a/drivers/video/fbdev/au1200fb.c ++++ b/drivers/video/fbdev/au1200fb.c +@@ -1732,6 +1732,9 @@ static int au1200fb_drv_probe(struct platform_device *dev) + + /* Now hook interrupt too */ + irq = platform_get_irq(dev, 0); ++ if (irq < 0) ++ return irq; ++ + ret = request_irq(irq, au1200fb_handle_irq, + IRQF_SHARED, "lcd", (void *)dev); + if (ret) { +-- +2.39.2 + diff --git a/queue-5.4/fbdev-imxfb-warn-about-invalid-left-right-margin.patch b/queue-5.4/fbdev-imxfb-warn-about-invalid-left-right-margin.patch new file mode 100644 index 00000000000..585b94175f1 --- /dev/null +++ b/queue-5.4/fbdev-imxfb-warn-about-invalid-left-right-margin.patch @@ -0,0 +1,43 @@ +From a04d9ccef0b4a1f90d0f96da18d27601add6d807 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 28 Jun 2023 15:24:37 +0200 +Subject: fbdev: imxfb: warn about invalid left/right margin + +From: Martin Kaiser + +[ Upstream commit 4e47382fbca916d7db95cbf9e2d7ca2e9d1ca3fe ] + +Warn about invalid var->left_margin or var->right_margin. Their values +are read from the device tree. + +We store var->left_margin-3 and var->right_margin-1 in register +fields. These fields should be >= 0. + +Fixes: 7e8549bcee00 ("imxfb: Fix margin settings") +Signed-off-by: Martin Kaiser +Signed-off-by: Helge Deller +Signed-off-by: Sasha Levin +--- + drivers/video/fbdev/imxfb.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/video/fbdev/imxfb.c b/drivers/video/fbdev/imxfb.c +index ffde3107104bc..dbc8808b093a5 100644 +--- a/drivers/video/fbdev/imxfb.c ++++ b/drivers/video/fbdev/imxfb.c +@@ -601,10 +601,10 @@ static int imxfb_activate_var(struct fb_var_screeninfo *var, struct fb_info *inf + if (var->hsync_len < 1 || var->hsync_len > 64) + printk(KERN_ERR "%s: invalid hsync_len %d\n", + info->fix.id, var->hsync_len); +- if (var->left_margin > 255) ++ if (var->left_margin < 3 || var->left_margin > 255) + printk(KERN_ERR "%s: invalid left_margin %d\n", + info->fix.id, var->left_margin); +- if (var->right_margin > 255) ++ if (var->right_margin < 1 || var->right_margin > 255) + printk(KERN_ERR "%s: invalid right_margin %d\n", + info->fix.id, var->right_margin); + if (var->yres < 1 || var->yres > ymax_mask) +-- +2.39.2 + diff --git a/queue-5.4/iavf-fix-use-after-free-in-free_netdev.patch b/queue-5.4/iavf-fix-use-after-free-in-free_netdev.patch new file mode 100644 index 00000000000..f64b6dea5e0 --- /dev/null +++ b/queue-5.4/iavf-fix-use-after-free-in-free_netdev.patch @@ -0,0 +1,215 @@ +From f8349f8f3af5f904d0f0db6d6cd33649b3e73dc6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 9 May 2023 19:11:47 +0800 +Subject: iavf: Fix use-after-free in free_netdev + +From: Ding Hui + +[ Upstream commit 5f4fa1672d98fe99d2297b03add35346f1685d6b ] + +We do netif_napi_add() for all allocated q_vectors[], but potentially +do netif_napi_del() for part of them, then kfree q_vectors and leave +invalid pointers at dev->napi_list. + +Reproducer: + + [root@host ~]# cat repro.sh + #!/bin/bash + + pf_dbsf="0000:41:00.0" + vf0_dbsf="0000:41:02.0" + g_pids=() + + function do_set_numvf() + { + echo 2 >/sys/bus/pci/devices/${pf_dbsf}/sriov_numvfs + sleep $((RANDOM%3+1)) + echo 0 >/sys/bus/pci/devices/${pf_dbsf}/sriov_numvfs + sleep $((RANDOM%3+1)) + } + + function do_set_channel() + { + local nic=$(ls -1 --indicator-style=none /sys/bus/pci/devices/${vf0_dbsf}/net/) + [ -z "$nic" ] && { sleep $((RANDOM%3)) ; return 1; } + ifconfig $nic 192.168.18.5 netmask 255.255.255.0 + ifconfig $nic up + ethtool -L $nic combined 1 + ethtool -L $nic combined 4 + sleep $((RANDOM%3)) + } + + function on_exit() + { + local pid + for pid in "${g_pids[@]}"; do + kill -0 "$pid" &>/dev/null && kill "$pid" &>/dev/null + done + g_pids=() + } + + trap "on_exit; exit" EXIT + + while :; do do_set_numvf ; done & + g_pids+=($!) + while :; do do_set_channel ; done & + g_pids+=($!) + + wait + +Result: + +[ 4093.900222] ================================================================== +[ 4093.900230] BUG: KASAN: use-after-free in free_netdev+0x308/0x390 +[ 4093.900232] Read of size 8 at addr ffff88b4dc145640 by task repro.sh/6699 +[ 4093.900233] +[ 4093.900236] CPU: 10 PID: 6699 Comm: repro.sh Kdump: loaded Tainted: G O --------- -t - 4.18.0 #1 +[ 4093.900238] Hardware name: Powerleader PR2008AL/H12DSi-N6, BIOS 2.0 04/09/2021 +[ 4093.900239] Call Trace: +[ 4093.900244] dump_stack+0x71/0xab +[ 4093.900249] print_address_description+0x6b/0x290 +[ 4093.900251] ? free_netdev+0x308/0x390 +[ 4093.900252] kasan_report+0x14a/0x2b0 +[ 4093.900254] free_netdev+0x308/0x390 +[ 4093.900261] iavf_remove+0x825/0xd20 [iavf] +[ 4093.900265] pci_device_remove+0xa8/0x1f0 +[ 4093.900268] device_release_driver_internal+0x1c6/0x460 +[ 4093.900271] pci_stop_bus_device+0x101/0x150 +[ 4093.900273] pci_stop_and_remove_bus_device+0xe/0x20 +[ 4093.900275] pci_iov_remove_virtfn+0x187/0x420 +[ 4093.900277] ? pci_iov_add_virtfn+0xe10/0xe10 +[ 4093.900278] ? pci_get_subsys+0x90/0x90 +[ 4093.900280] sriov_disable+0xed/0x3e0 +[ 4093.900282] ? bus_find_device+0x12d/0x1a0 +[ 4093.900290] i40e_free_vfs+0x754/0x1210 [i40e] +[ 4093.900298] ? i40e_reset_all_vfs+0x880/0x880 [i40e] +[ 4093.900299] ? pci_get_device+0x7c/0x90 +[ 4093.900300] ? pci_get_subsys+0x90/0x90 +[ 4093.900306] ? pci_vfs_assigned.part.7+0x144/0x210 +[ 4093.900309] ? __mutex_lock_slowpath+0x10/0x10 +[ 4093.900315] i40e_pci_sriov_configure+0x1fa/0x2e0 [i40e] +[ 4093.900318] sriov_numvfs_store+0x214/0x290 +[ 4093.900320] ? sriov_totalvfs_show+0x30/0x30 +[ 4093.900321] ? __mutex_lock_slowpath+0x10/0x10 +[ 4093.900323] ? __check_object_size+0x15a/0x350 +[ 4093.900326] kernfs_fop_write+0x280/0x3f0 +[ 4093.900329] vfs_write+0x145/0x440 +[ 4093.900330] ksys_write+0xab/0x160 +[ 4093.900332] ? __ia32_sys_read+0xb0/0xb0 +[ 4093.900334] ? fput_many+0x1a/0x120 +[ 4093.900335] ? filp_close+0xf0/0x130 +[ 4093.900338] do_syscall_64+0xa0/0x370 +[ 4093.900339] ? page_fault+0x8/0x30 +[ 4093.900341] entry_SYSCALL_64_after_hwframe+0x65/0xca +[ 4093.900357] RIP: 0033:0x7f16ad4d22c0 +[ 4093.900359] Code: 73 01 c3 48 8b 0d d8 cb 2c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 0f 1f 44 00 00 83 3d 89 24 2d 00 00 75 10 b8 01 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 fe dd 01 00 48 89 04 24 +[ 4093.900360] RSP: 002b:00007ffd6491b7f8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 +[ 4093.900362] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f16ad4d22c0 +[ 4093.900363] RDX: 0000000000000002 RSI: 0000000001a41408 RDI: 0000000000000001 +[ 4093.900364] RBP: 0000000001a41408 R08: 00007f16ad7a1780 R09: 00007f16ae1f2700 +[ 4093.900364] R10: 0000000000000001 R11: 0000000000000246 R12: 0000000000000002 +[ 4093.900365] R13: 0000000000000001 R14: 00007f16ad7a0620 R15: 0000000000000001 +[ 4093.900367] +[ 4093.900368] Allocated by task 820: +[ 4093.900371] kasan_kmalloc+0xa6/0xd0 +[ 4093.900373] __kmalloc+0xfb/0x200 +[ 4093.900376] iavf_init_interrupt_scheme+0x63b/0x1320 [iavf] +[ 4093.900380] iavf_watchdog_task+0x3d51/0x52c0 [iavf] +[ 4093.900382] process_one_work+0x56a/0x11f0 +[ 4093.900383] worker_thread+0x8f/0xf40 +[ 4093.900384] kthread+0x2a0/0x390 +[ 4093.900385] ret_from_fork+0x1f/0x40 +[ 4093.900387] 0xffffffffffffffff +[ 4093.900387] +[ 4093.900388] Freed by task 6699: +[ 4093.900390] __kasan_slab_free+0x137/0x190 +[ 4093.900391] kfree+0x8b/0x1b0 +[ 4093.900394] iavf_free_q_vectors+0x11d/0x1a0 [iavf] +[ 4093.900397] iavf_remove+0x35a/0xd20 [iavf] +[ 4093.900399] pci_device_remove+0xa8/0x1f0 +[ 4093.900400] device_release_driver_internal+0x1c6/0x460 +[ 4093.900401] pci_stop_bus_device+0x101/0x150 +[ 4093.900402] pci_stop_and_remove_bus_device+0xe/0x20 +[ 4093.900403] pci_iov_remove_virtfn+0x187/0x420 +[ 4093.900404] sriov_disable+0xed/0x3e0 +[ 4093.900409] i40e_free_vfs+0x754/0x1210 [i40e] +[ 4093.900415] i40e_pci_sriov_configure+0x1fa/0x2e0 [i40e] +[ 4093.900416] sriov_numvfs_store+0x214/0x290 +[ 4093.900417] kernfs_fop_write+0x280/0x3f0 +[ 4093.900418] vfs_write+0x145/0x440 +[ 4093.900419] ksys_write+0xab/0x160 +[ 4093.900420] do_syscall_64+0xa0/0x370 +[ 4093.900421] entry_SYSCALL_64_after_hwframe+0x65/0xca +[ 4093.900422] 0xffffffffffffffff +[ 4093.900422] +[ 4093.900424] The buggy address belongs to the object at ffff88b4dc144200 + which belongs to the cache kmalloc-8k of size 8192 +[ 4093.900425] The buggy address is located 5184 bytes inside of + 8192-byte region [ffff88b4dc144200, ffff88b4dc146200) +[ 4093.900425] The buggy address belongs to the page: +[ 4093.900427] page:ffffea00d3705000 refcount:1 mapcount:0 mapping:ffff88bf04415c80 index:0x0 compound_mapcount: 0 +[ 4093.900430] flags: 0x10000000008100(slab|head) +[ 4093.900433] raw: 0010000000008100 dead000000000100 dead000000000200 ffff88bf04415c80 +[ 4093.900434] raw: 0000000000000000 0000000000030003 00000001ffffffff 0000000000000000 +[ 4093.900434] page dumped because: kasan: bad access detected +[ 4093.900435] +[ 4093.900435] Memory state around the buggy address: +[ 4093.900436] ffff88b4dc145500: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb +[ 4093.900437] ffff88b4dc145580: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb +[ 4093.900438] >ffff88b4dc145600: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb +[ 4093.900438] ^ +[ 4093.900439] ffff88b4dc145680: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb +[ 4093.900440] ffff88b4dc145700: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb +[ 4093.900440] ================================================================== + +Although the patch #2 (of 2) can avoid the issue triggered by this +repro.sh, there still are other potential risks that if num_active_queues +is changed to less than allocated q_vectors[] by unexpected, the +mismatched netif_napi_add/del() can also cause UAF. + +Since we actually call netif_napi_add() for all allocated q_vectors +unconditionally in iavf_alloc_q_vectors(), so we should fix it by +letting netif_napi_del() match to netif_napi_add(). + +Fixes: 5eae00c57f5e ("i40evf: main driver core") +Signed-off-by: Ding Hui +Cc: Donglin Peng +Cc: Huang Cun +Reviewed-by: Simon Horman +Reviewed-by: Madhu Chittim +Reviewed-by: Leon Romanovsky +Tested-by: Rafal Romanowski +Signed-off-by: Tony Nguyen +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/iavf/iavf_main.c | 5 +---- + 1 file changed, 1 insertion(+), 4 deletions(-) + +diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c +index 838cd7881f2f7..9cf556fedc704 100644 +--- a/drivers/net/ethernet/intel/iavf/iavf_main.c ++++ b/drivers/net/ethernet/intel/iavf/iavf_main.c +@@ -1389,19 +1389,16 @@ static int iavf_alloc_q_vectors(struct iavf_adapter *adapter) + static void iavf_free_q_vectors(struct iavf_adapter *adapter) + { + int q_idx, num_q_vectors; +- int napi_vectors; + + if (!adapter->q_vectors) + return; + + num_q_vectors = adapter->num_msix_vectors - NONQ_VECS; +- napi_vectors = adapter->num_active_queues; + + for (q_idx = 0; q_idx < num_q_vectors; q_idx++) { + struct iavf_q_vector *q_vector = &adapter->q_vectors[q_idx]; + +- if (q_idx < napi_vectors) +- netif_napi_del(&q_vector->napi); ++ netif_napi_del(&q_vector->napi); + } + kfree(adapter->q_vectors); + adapter->q_vectors = NULL; +-- +2.39.2 + diff --git a/queue-5.4/igb-fix-igb_down-hung-on-surprise-removal.patch b/queue-5.4/igb-fix-igb_down-hung-on-surprise-removal.patch new file mode 100644 index 00000000000..5f2ce5b3b89 --- /dev/null +++ b/queue-5.4/igb-fix-igb_down-hung-on-surprise-removal.patch @@ -0,0 +1,89 @@ +From 0b3d3a2fbdf3027763c70d65dde0a044a8c3a6ea Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 20 Jun 2023 10:47:32 -0700 +Subject: igb: Fix igb_down hung on surprise removal + +From: Ying Hsu + +[ Upstream commit 004d25060c78fc31f66da0fa439c544dda1ac9d5 ] + +In a setup where a Thunderbolt hub connects to Ethernet and a display +through USB Type-C, users may experience a hung task timeout when they +remove the cable between the PC and the Thunderbolt hub. +This is because the igb_down function is called multiple times when +the Thunderbolt hub is unplugged. For example, the igb_io_error_detected +triggers the first call, and the igb_remove triggers the second call. +The second call to igb_down will block at napi_synchronize. +Here's the call trace: + __schedule+0x3b0/0xddb + ? __mod_timer+0x164/0x5d3 + schedule+0x44/0xa8 + schedule_timeout+0xb2/0x2a4 + ? run_local_timers+0x4e/0x4e + msleep+0x31/0x38 + igb_down+0x12c/0x22a [igb 6615058754948bfde0bf01429257eb59f13030d4] + __igb_close+0x6f/0x9c [igb 6615058754948bfde0bf01429257eb59f13030d4] + igb_close+0x23/0x2b [igb 6615058754948bfde0bf01429257eb59f13030d4] + __dev_close_many+0x95/0xec + dev_close_many+0x6e/0x103 + unregister_netdevice_many+0x105/0x5b1 + unregister_netdevice_queue+0xc2/0x10d + unregister_netdev+0x1c/0x23 + igb_remove+0xa7/0x11c [igb 6615058754948bfde0bf01429257eb59f13030d4] + pci_device_remove+0x3f/0x9c + device_release_driver_internal+0xfe/0x1b4 + pci_stop_bus_device+0x5b/0x7f + pci_stop_bus_device+0x30/0x7f + pci_stop_bus_device+0x30/0x7f + pci_stop_and_remove_bus_device+0x12/0x19 + pciehp_unconfigure_device+0x76/0xe9 + pciehp_disable_slot+0x6e/0x131 + pciehp_handle_presence_or_link_change+0x7a/0x3f7 + pciehp_ist+0xbe/0x194 + irq_thread_fn+0x22/0x4d + ? irq_thread+0x1fd/0x1fd + irq_thread+0x17b/0x1fd + ? irq_forced_thread_fn+0x5f/0x5f + kthread+0x142/0x153 + ? __irq_get_irqchip_state+0x46/0x46 + ? kthread_associate_blkcg+0x71/0x71 + ret_from_fork+0x1f/0x30 + +In this case, igb_io_error_detected detaches the network interface +and requests a PCIE slot reset, however, the PCIE reset callback is +not being invoked and thus the Ethernet connection breaks down. +As the PCIE error in this case is a non-fatal one, requesting a +slot reset can be avoided. +This patch fixes the task hung issue and preserves Ethernet +connection by ignoring non-fatal PCIE errors. + +Signed-off-by: Ying Hsu +Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) +Signed-off-by: Tony Nguyen +Reviewed-by: Simon Horman +Link: https://lore.kernel.org/r/20230620174732.4145155-1-anthony.l.nguyen@intel.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/igb/igb_main.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c +index 00d66a6e5c6e5..8c6c0d9c7f766 100644 +--- a/drivers/net/ethernet/intel/igb/igb_main.c ++++ b/drivers/net/ethernet/intel/igb/igb_main.c +@@ -9028,6 +9028,11 @@ static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev, + struct net_device *netdev = pci_get_drvdata(pdev); + struct igb_adapter *adapter = netdev_priv(netdev); + ++ if (state == pci_channel_io_normal) { ++ dev_warn(&pdev->dev, "Non-correctable non-fatal error reported.\n"); ++ return PCI_ERS_RESULT_CAN_RECOVER; ++ } ++ + netif_device_detach(netdev); + + if (state == pci_channel_io_perm_failure) +-- +2.39.2 + diff --git a/queue-5.4/llc-don-t-drop-packet-from-non-root-netns.patch b/queue-5.4/llc-don-t-drop-packet-from-non-root-netns.patch new file mode 100644 index 00000000000..12706d00597 --- /dev/null +++ b/queue-5.4/llc-don-t-drop-packet-from-non-root-netns.patch @@ -0,0 +1,50 @@ +From 5f49d88b5518144b7e6b9ddd2a5a4b3ec3dea453 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 18 Jul 2023 10:41:51 -0700 +Subject: llc: Don't drop packet from non-root netns. + +From: Kuniyuki Iwashima + +[ Upstream commit 6631463b6e6673916d2481f692938f393148aa82 ] + +Now these upper layer protocol handlers can be called from llc_rcv() +as sap->rcv_func(), which is registered by llc_sap_open(). + + * function which is passed to register_8022_client() + -> no in-kernel user calls register_8022_client(). + + * snap_rcv() + `- proto->rcvfunc() : registered by register_snap_client() + -> aarp_rcv() and atalk_rcv() drop packets from non-root netns + + * stp_pdu_rcv() + `- garp_protos[]->rcv() : registered by stp_proto_register() + -> garp_pdu_rcv() and br_stp_rcv() are netns-aware + +So, we can safely remove the netns restriction in llc_rcv(). + +Fixes: e730c15519d0 ("[NET]: Make packet reception network namespace safe") +Signed-off-by: Kuniyuki Iwashima +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + net/llc/llc_input.c | 3 --- + 1 file changed, 3 deletions(-) + +diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c +index 82cb93f66b9bd..f9e801cc50f5e 100644 +--- a/net/llc/llc_input.c ++++ b/net/llc/llc_input.c +@@ -162,9 +162,6 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev, + void (*sta_handler)(struct sk_buff *skb); + void (*sap_handler)(struct llc_sap *sap, struct sk_buff *skb); + +- if (!net_eq(dev_net(dev), &init_net)) +- goto drop; +- + /* + * When the interface is in promisc. mode, drop all the crap that it + * receives, do not try to analyse it. +-- +2.39.2 + diff --git a/queue-5.4/md-fix-data-corruption-for-raid456-when-reshape-rest.patch b/queue-5.4/md-fix-data-corruption-for-raid456-when-reshape-rest.patch new file mode 100644 index 00000000000..f08db1820b2 --- /dev/null +++ b/queue-5.4/md-fix-data-corruption-for-raid456-when-reshape-rest.patch @@ -0,0 +1,60 @@ +From 4cc78653a330f245001ff2e0e31088dbba8facdc Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 12 May 2023 09:56:07 +0800 +Subject: md: fix data corruption for raid456 when reshape restart while grow + up + +From: Yu Kuai + +[ Upstream commit 873f50ece41aad5c4f788a340960c53774b5526e ] + +Currently, if reshape is interrupted, echo "reshape" to sync_action will +restart reshape from scratch, for example: + +echo frozen > sync_action +echo reshape > sync_action + +This will corrupt data before reshape_position if the array is growing, +fix the problem by continue reshape from reshape_position. + +Reported-by: Peter Neuwirth +Link: https://lore.kernel.org/linux-raid/e2f96772-bfbc-f43b-6da1-f520e5164536@online.de/ +Signed-off-by: Yu Kuai +Signed-off-by: Song Liu +Link: https://lore.kernel.org/r/20230512015610.821290-3-yukuai1@huaweicloud.com +Signed-off-by: Sasha Levin +--- + drivers/md/md.c | 14 ++++++++++++-- + 1 file changed, 12 insertions(+), 2 deletions(-) + +diff --git a/drivers/md/md.c b/drivers/md/md.c +index 0765712513e7d..a006f3a9554bf 100644 +--- a/drivers/md/md.c ++++ b/drivers/md/md.c +@@ -4743,11 +4743,21 @@ action_store(struct mddev *mddev, const char *page, size_t len) + return -EINVAL; + err = mddev_lock(mddev); + if (!err) { +- if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) ++ if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) { + err = -EBUSY; +- else { ++ } else if (mddev->reshape_position == MaxSector || ++ mddev->pers->check_reshape == NULL || ++ mddev->pers->check_reshape(mddev)) { + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); + err = mddev->pers->start_reshape(mddev); ++ } else { ++ /* ++ * If reshape is still in progress, and ++ * md_check_recovery() can continue to reshape, ++ * don't restart reshape because data can be ++ * corrupted for raid456. ++ */ ++ clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); + } + mddev_unlock(mddev); + } +-- +2.39.2 + diff --git a/queue-5.4/md-raid10-prevent-soft-lockup-while-flush-writes.patch b/queue-5.4/md-raid10-prevent-soft-lockup-while-flush-writes.patch new file mode 100644 index 00000000000..70ece3d376a --- /dev/null +++ b/queue-5.4/md-raid10-prevent-soft-lockup-while-flush-writes.patch @@ -0,0 +1,79 @@ +From e162491c96b0f3c2366d9c0cdb870c15fca436db Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 29 May 2023 21:11:00 +0800 +Subject: md/raid10: prevent soft lockup while flush writes + +From: Yu Kuai + +[ Upstream commit 010444623e7f4da6b4a4dd603a7da7469981e293 ] + +Currently, there is no limit for raid1/raid10 plugged bio. While flushing +writes, raid1 has cond_resched() while raid10 doesn't, and too many +writes can cause soft lockup. + +Follow up soft lockup can be triggered easily with writeback test for +raid10 with ramdisks: + +watchdog: BUG: soft lockup - CPU#10 stuck for 27s! [md0_raid10:1293] +Call Trace: + + call_rcu+0x16/0x20 + put_object+0x41/0x80 + __delete_object+0x50/0x90 + delete_object_full+0x2b/0x40 + kmemleak_free+0x46/0xa0 + slab_free_freelist_hook.constprop.0+0xed/0x1a0 + kmem_cache_free+0xfd/0x300 + mempool_free_slab+0x1f/0x30 + mempool_free+0x3a/0x100 + bio_free+0x59/0x80 + bio_put+0xcf/0x2c0 + free_r10bio+0xbf/0xf0 + raid_end_bio_io+0x78/0xb0 + one_write_done+0x8a/0xa0 + raid10_end_write_request+0x1b4/0x430 + bio_endio+0x175/0x320 + brd_submit_bio+0x3b9/0x9b7 [brd] + __submit_bio+0x69/0xe0 + submit_bio_noacct_nocheck+0x1e6/0x5a0 + submit_bio_noacct+0x38c/0x7e0 + flush_pending_writes+0xf0/0x240 + raid10d+0xac/0x1ed0 + +Fix the problem by adding cond_resched() to raid10 like what raid1 did. + +Note that unlimited plugged bio still need to be optimized, for example, +in the case of lots of dirty pages writeback, this will take lots of +memory and io will spend a long time in plug, hence io latency is bad. + +Signed-off-by: Yu Kuai +Signed-off-by: Song Liu +Link: https://lore.kernel.org/r/20230529131106.2123367-2-yukuai1@huaweicloud.com +Signed-off-by: Sasha Levin +--- + drivers/md/raid10.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c +index db4de8e07cd97..3983d5c8b5cd2 100644 +--- a/drivers/md/raid10.c ++++ b/drivers/md/raid10.c +@@ -927,6 +927,7 @@ static void flush_pending_writes(struct r10conf *conf) + else + generic_make_request(bio); + bio = next; ++ cond_resched(); + } + blk_finish_plug(&plug); + } else +@@ -1112,6 +1113,7 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule) + else + generic_make_request(bio); + bio = next; ++ cond_resched(); + } + kfree(plug); + } +-- +2.39.2 + diff --git a/queue-5.4/nbd-add-the-maximum-limit-of-allocated-index-in-nbd_.patch b/queue-5.4/nbd-add-the-maximum-limit-of-allocated-index-in-nbd_.patch new file mode 100644 index 00000000000..ed65fa5a442 --- /dev/null +++ b/queue-5.4/nbd-add-the-maximum-limit-of-allocated-index-in-nbd_.patch @@ -0,0 +1,41 @@ +From 456f7e7cba47667316050e9866ad8506c9e47f6f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 5 Jun 2023 20:21:59 +0800 +Subject: nbd: Add the maximum limit of allocated index in nbd_dev_add + +From: Zhong Jinghua + +[ Upstream commit f12bc113ce904777fd6ca003b473b427782b3dde ] + +If the index allocated by idr_alloc greater than MINORMASK >> part_shift, +the device number will overflow, resulting in failure to create a block +device. + +Fix it by imiting the size of the max allocation. + +Signed-off-by: Zhong Jinghua +Reviewed-by: Christoph Hellwig +Link: https://lore.kernel.org/r/20230605122159.2134384-1-zhongjinghua@huaweicloud.com +Signed-off-by: Jens Axboe +Signed-off-by: Sasha Levin +--- + drivers/block/nbd.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c +index 218aa7e419700..37994a7a1b6f4 100644 +--- a/drivers/block/nbd.c ++++ b/drivers/block/nbd.c +@@ -1708,7 +1708,8 @@ static int nbd_dev_add(int index) + if (err == -ENOSPC) + err = -EEXIST; + } else { +- err = idr_alloc(&nbd_index_idr, nbd, 0, 0, GFP_KERNEL); ++ err = idr_alloc(&nbd_index_idr, nbd, 0, ++ (MINORMASK >> part_shift) + 1, GFP_KERNEL); + if (err >= 0) + index = err; + } +-- +2.39.2 + diff --git a/queue-5.4/net-ethernet-ti-cpsw_ale-fix-cpsw_ale_get_field-cpsw.patch b/queue-5.4/net-ethernet-ti-cpsw_ale-fix-cpsw_ale_get_field-cpsw.patch new file mode 100644 index 00000000000..806727c4c02 --- /dev/null +++ b/queue-5.4/net-ethernet-ti-cpsw_ale-fix-cpsw_ale_get_field-cpsw.patch @@ -0,0 +1,78 @@ +From 08a8346073e660036a1e3e9ae04efddd220a90fa Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 12 Jul 2023 16:36:57 +0530 +Subject: net: ethernet: ti: cpsw_ale: Fix + cpsw_ale_get_field()/cpsw_ale_set_field() + +From: Tanmay Patil + +[ Upstream commit b685f1a58956fa36cc01123f253351b25bfacfda ] + +CPSW ALE has 75 bit ALE entries which are stored within three 32 bit words. +The cpsw_ale_get_field() and cpsw_ale_set_field() functions assume that the +field will be strictly contained within one word. However, this is not +guaranteed to be the case and it is possible for ALE field entries to span +across up to two words at the most. + +Fix the methods to handle getting/setting fields spanning up to two words. + +Fixes: db82173f23c5 ("netdev: driver: ethernet: add cpsw address lookup engine support") +Signed-off-by: Tanmay Patil +[s-vadapalli@ti.com: rephrased commit message and added Fixes tag] +Signed-off-by: Siddharth Vadapalli +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/ti/cpsw_ale.c | 24 +++++++++++++++++++----- + 1 file changed, 19 insertions(+), 5 deletions(-) + +diff --git a/drivers/net/ethernet/ti/cpsw_ale.c b/drivers/net/ethernet/ti/cpsw_ale.c +index e7c24396933e9..f17619c545ae5 100644 +--- a/drivers/net/ethernet/ti/cpsw_ale.c ++++ b/drivers/net/ethernet/ti/cpsw_ale.c +@@ -60,23 +60,37 @@ + + static inline int cpsw_ale_get_field(u32 *ale_entry, u32 start, u32 bits) + { +- int idx; ++ int idx, idx2; ++ u32 hi_val = 0; + + idx = start / 32; ++ idx2 = (start + bits - 1) / 32; ++ /* Check if bits to be fetched exceed a word */ ++ if (idx != idx2) { ++ idx2 = 2 - idx2; /* flip */ ++ hi_val = ale_entry[idx2] << ((idx2 * 32) - start); ++ } + start -= idx * 32; + idx = 2 - idx; /* flip */ +- return (ale_entry[idx] >> start) & BITMASK(bits); ++ return (hi_val + (ale_entry[idx] >> start)) & BITMASK(bits); + } + + static inline void cpsw_ale_set_field(u32 *ale_entry, u32 start, u32 bits, + u32 value) + { +- int idx; ++ int idx, idx2; + + value &= BITMASK(bits); +- idx = start / 32; ++ idx = start / 32; ++ idx2 = (start + bits - 1) / 32; ++ /* Check if bits to be set exceed a word */ ++ if (idx != idx2) { ++ idx2 = 2 - idx2; /* flip */ ++ ale_entry[idx2] &= ~(BITMASK(bits + start - (idx2 * 32))); ++ ale_entry[idx2] |= (value >> ((idx2 * 32) - start)); ++ } + start -= idx * 32; +- idx = 2 - idx; /* flip */ ++ idx = 2 - idx; /* flip */ + ale_entry[idx] &= ~(BITMASK(bits) << start); + ale_entry[idx] |= (value << start); + } +-- +2.39.2 + diff --git a/queue-5.4/net-ipv6-check-return-value-of-pskb_trim.patch b/queue-5.4/net-ipv6-check-return-value-of-pskb_trim.patch new file mode 100644 index 00000000000..0244504e97e --- /dev/null +++ b/queue-5.4/net-ipv6-check-return-value-of-pskb_trim.patch @@ -0,0 +1,39 @@ +From 63eb3f5f78a3465e2d34f4145321d87171922518 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 17 Jul 2023 22:45:19 +0800 +Subject: net:ipv6: check return value of pskb_trim() + +From: Yuanjun Gong + +[ Upstream commit 4258faa130be4ea43e5e2d839467da421b8ff274 ] + +goto tx_err if an unexpected result is returned by pskb_tirm() +in ip6erspan_tunnel_xmit(). + +Fixes: 5a963eb61b7c ("ip6_gre: Add ERSPAN native tunnel support") +Signed-off-by: Yuanjun Gong +Reviewed-by: David Ahern +Reviewed-by: Kuniyuki Iwashima +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/ipv6/ip6_gre.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c +index 0977137b00dc4..2d34bd98fccea 100644 +--- a/net/ipv6/ip6_gre.c ++++ b/net/ipv6/ip6_gre.c +@@ -941,7 +941,8 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, + goto tx_err; + + if (skb->len > dev->mtu + dev->hard_header_len) { +- pskb_trim(skb, dev->mtu + dev->hard_header_len); ++ if (pskb_trim(skb, dev->mtu + dev->hard_header_len)) ++ goto tx_err; + truncate = true; + } + +-- +2.39.2 + diff --git a/queue-5.4/net-replace-the-limit-of-tcp_linger2-with-tcp_fin_ti.patch b/queue-5.4/net-replace-the-limit-of-tcp_linger2-with-tcp_fin_ti.patch new file mode 100644 index 00000000000..444d7006482 --- /dev/null +++ b/queue-5.4/net-replace-the-limit-of-tcp_linger2-with-tcp_fin_ti.patch @@ -0,0 +1,73 @@ +From 730715f20176f04274e6b8733bb5664b25fef36d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 24 Apr 2020 16:06:16 +0800 +Subject: net: Replace the limit of TCP_LINGER2 with TCP_FIN_TIMEOUT_MAX + +From: Cambda Zhu + +[ Upstream commit f0628c524fd188c3f9418e12478dfdfadacba815 ] + +This patch changes the behavior of TCP_LINGER2 about its limit. The +sysctl_tcp_fin_timeout used to be the limit of TCP_LINGER2 but now it's +only the default value. A new macro named TCP_FIN_TIMEOUT_MAX is added +as the limit of TCP_LINGER2, which is 2 minutes. + +Since TCP_LINGER2 used sysctl_tcp_fin_timeout as the default value +and the limit in the past, the system administrator cannot set the +default value for most of sockets and let some sockets have a greater +timeout. It might be a mistake that let the sysctl to be the limit of +the TCP_LINGER2. Maybe we can add a new sysctl to set the max of +TCP_LINGER2, but FIN-WAIT-2 timeout is usually no need to be too long +and 2 minutes are legal considering TCP specs. + +Changes in v3: +- Remove the new socket option and change the TCP_LINGER2 behavior so + that the timeout can be set to value between sysctl_tcp_fin_timeout + and 2 minutes. + +Changes in v2: +- Add int overflow check for the new socket option. + +Changes in v1: +- Add a new socket option to set timeout greater than + sysctl_tcp_fin_timeout. + +Signed-off-by: Cambda Zhu +Signed-off-by: David S. Miller +Stable-dep-of: 9df5335ca974 ("tcp: annotate data-races around tp->linger2") +Signed-off-by: Sasha Levin +--- + include/net/tcp.h | 1 + + net/ipv4/tcp.c | 4 ++-- + 2 files changed, 3 insertions(+), 2 deletions(-) + +diff --git a/include/net/tcp.h b/include/net/tcp.h +index 077feeca6c99e..2f456bed33ec3 100644 +--- a/include/net/tcp.h ++++ b/include/net/tcp.h +@@ -125,6 +125,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); + * to combine FIN-WAIT-2 timeout with + * TIME-WAIT timer. + */ ++#define TCP_FIN_TIMEOUT_MAX (120 * HZ) /* max TCP_LINGER2 value (two minutes) */ + + #define TCP_DELACK_MAX ((unsigned)(HZ/5)) /* maximal time to delay before sending an ACK */ + #if HZ >= 100 +diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c +index e33abcff56080..c6c73b9407098 100644 +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -3067,8 +3067,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level, + case TCP_LINGER2: + if (val < 0) + tp->linger2 = -1; +- else if (val > net->ipv4.sysctl_tcp_fin_timeout / HZ) +- tp->linger2 = 0; ++ else if (val > TCP_FIN_TIMEOUT_MAX / HZ) ++ tp->linger2 = TCP_FIN_TIMEOUT_MAX; + else + tp->linger2 = val * HZ; + break; +-- +2.39.2 + diff --git a/queue-5.4/netfilter-nf_tables-can-t-schedule-in-nft_chain_vali.patch b/queue-5.4/netfilter-nf_tables-can-t-schedule-in-nft_chain_vali.patch new file mode 100644 index 00000000000..4c65e9f1972 --- /dev/null +++ b/queue-5.4/netfilter-nf_tables-can-t-schedule-in-nft_chain_vali.patch @@ -0,0 +1,64 @@ +From f58e1406f7f9e2cf7845590dd709521813d3c261 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 18 Jul 2023 01:30:33 +0200 +Subject: netfilter: nf_tables: can't schedule in nft_chain_validate + +From: Florian Westphal + +[ Upstream commit 314c82841602a111c04a7210c21dc77e0d560242 ] + +Can be called via nft set element list iteration, which may acquire +rcu and/or bh read lock (depends on set type). + +BUG: sleeping function called from invalid context at net/netfilter/nf_tables_api.c:3353 +in_atomic(): 0, irqs_disabled(): 0, non_block: 0, pid: 1232, name: nft +preempt_count: 0, expected: 0 +RCU nest depth: 1, expected: 0 +2 locks held by nft/1232: + #0: ffff8881180e3ea8 (&nft_net->commit_mutex){+.+.}-{3:3}, at: nf_tables_valid_genid + #1: ffffffff83f5f540 (rcu_read_lock){....}-{1:2}, at: rcu_lock_acquire +Call Trace: + nft_chain_validate + nft_lookup_validate_setelem + nft_pipapo_walk + nft_lookup_validate + nft_chain_validate + nft_immediate_validate + nft_chain_validate + nf_tables_validate + nf_tables_abort + +No choice but to move it to nf_tables_validate(). + +Fixes: 81ea01066741 ("netfilter: nf_tables: add rescheduling points during loop detection walks") +Signed-off-by: Florian Westphal +Signed-off-by: Sasha Levin +--- + net/netfilter/nf_tables_api.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c +index a64aa888751cb..7d22bc8aa2787 100644 +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -2736,8 +2736,6 @@ int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain) + if (err < 0) + return err; + } +- +- cond_resched(); + } + + return 0; +@@ -2761,6 +2759,8 @@ static int nft_table_validate(struct net *net, const struct nft_table *table) + err = nft_chain_validate(&ctx, chain); + if (err < 0) + return err; ++ ++ cond_resched(); + } + + return 0; +-- +2.39.2 + diff --git a/queue-5.4/netfilter-nf_tables-fix-spurious-set-element-inserti.patch b/queue-5.4/netfilter-nf_tables-fix-spurious-set-element-inserti.patch new file mode 100644 index 00000000000..43740be294a --- /dev/null +++ b/queue-5.4/netfilter-nf_tables-fix-spurious-set-element-inserti.patch @@ -0,0 +1,49 @@ +From 9fb06bca476f67a541d7701c4c2e976894297311 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 20 Jul 2023 00:29:58 +0200 +Subject: netfilter: nf_tables: fix spurious set element insertion failure + +From: Florian Westphal + +[ Upstream commit ddbd8be68941985f166f5107109a90ce13147c44 ] + +On some platforms there is a padding hole in the nft_verdict +structure, between the verdict code and the chain pointer. + +On element insertion, if the new element clashes with an existing one and +NLM_F_EXCL flag isn't set, we want to ignore the -EEXIST error as long as +the data associated with duplicated element is the same as the existing +one. The data equality check uses memcmp. + +For normal data (NFT_DATA_VALUE) this works fine, but for NFT_DATA_VERDICT +padding area leads to spurious failure even if the verdict data is the +same. + +This then makes the insertion fail with 'already exists' error, even +though the new "key : data" matches an existing entry and userspace +told the kernel that it doesn't want to receive an error indication. + +Fixes: c016c7e45ddf ("netfilter: nf_tables: honor NLM_F_EXCL flag in set element insertion") +Signed-off-by: Florian Westphal +Signed-off-by: Sasha Levin +--- + net/netfilter/nf_tables_api.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c +index 123ef398a10dc..a64aa888751cb 100644 +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -7655,6 +7655,9 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, + + if (!tb[NFTA_VERDICT_CODE]) + return -EINVAL; ++ ++ /* zero padding hole for memcmp */ ++ memset(data, 0, sizeof(*data)); + data->verdict.code = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE])); + + switch (data->verdict.code) { +-- +2.39.2 + diff --git a/queue-5.4/pinctrl-amd-use-amd_pinconf_set-for-all-config-optio.patch b/queue-5.4/pinctrl-amd-use-amd_pinconf_set-for-all-config-optio.patch new file mode 100644 index 00000000000..57476bd164e --- /dev/null +++ b/queue-5.4/pinctrl-amd-use-amd_pinconf_set-for-all-config-optio.patch @@ -0,0 +1,108 @@ +From d60135b3aeb1520fdaae6864d4c4d75d8cc1aa49 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 5 Jul 2023 08:30:03 -0500 +Subject: pinctrl: amd: Use amd_pinconf_set() for all config options + +From: Mario Limonciello + +[ Upstream commit 635a750d958e158e17af0f524bedc484b27fbb93 ] + +On ASUS TUF A16 it is reported that the ITE5570 ACPI device connected to +GPIO 7 is causing an interrupt storm. This issue doesn't happen on +Windows. + +Comparing the GPIO register configuration between Windows and Linux +bit 20 has been configured as a pull up on Windows, but not on Linux. +Checking GPIO declaration from the firmware it is clear it *should* have +been a pull up on Linux as well. + +``` +GpioInt (Level, ActiveLow, Exclusive, PullUp, 0x0000, + "\\_SB.GPIO", 0x00, ResourceConsumer, ,) +{ // Pin list +0x0007 +} +``` + +On Linux amd_gpio_set_config() is currently only used for programming +the debounce. Actually the GPIO core calls it with all the arguments +that are supported by a GPIO, pinctrl-amd just responds `-ENOTSUPP`. + +To solve this issue expand amd_gpio_set_config() to support the other +arguments amd_pinconf_set() supports, namely `PIN_CONFIG_BIAS_PULL_DOWN`, +`PIN_CONFIG_BIAS_PULL_UP`, and `PIN_CONFIG_DRIVE_STRENGTH`. + +Reported-by: Nik P +Reported-by: Nathan Schulte +Reported-by: Friedrich Vock +Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217336 +Reported-by: dridri85@gmail.com +Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217493 +Link: https://lore.kernel.org/linux-input/20230530154058.17594-1-friedrich.vock@gmx.de/ +Tested-by: Jan Visser +Fixes: 2956b5d94a76 ("pinctrl / gpio: Introduce .set_config() callback for GPIO chips") +Signed-off-by: Mario Limonciello +Reviewed-by: Andy Shevchenko +Link: https://lore.kernel.org/r/20230705133005.577-3-mario.limonciello@amd.com +Signed-off-by: Linus Walleij +Signed-off-by: Sasha Levin +--- + drivers/pinctrl/pinctrl-amd.c | 28 +++++++++++++++------------- + 1 file changed, 15 insertions(+), 13 deletions(-) + +diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c +index ba446271e17b6..2415085eadeda 100644 +--- a/drivers/pinctrl/pinctrl-amd.c ++++ b/drivers/pinctrl/pinctrl-amd.c +@@ -186,18 +186,6 @@ static int amd_gpio_set_debounce(struct gpio_chip *gc, unsigned offset, + return ret; + } + +-static int amd_gpio_set_config(struct gpio_chip *gc, unsigned offset, +- unsigned long config) +-{ +- u32 debounce; +- +- if (pinconf_to_config_param(config) != PIN_CONFIG_INPUT_DEBOUNCE) +- return -ENOTSUPP; +- +- debounce = pinconf_to_config_argument(config); +- return amd_gpio_set_debounce(gc, offset, debounce); +-} +- + #ifdef CONFIG_DEBUG_FS + static void amd_gpio_dbg_show(struct seq_file *s, struct gpio_chip *gc) + { +@@ -682,7 +670,7 @@ static int amd_pinconf_get(struct pinctrl_dev *pctldev, + } + + static int amd_pinconf_set(struct pinctrl_dev *pctldev, unsigned int pin, +- unsigned long *configs, unsigned num_configs) ++ unsigned long *configs, unsigned int num_configs) + { + int i; + u32 arg; +@@ -772,6 +760,20 @@ static int amd_pinconf_group_set(struct pinctrl_dev *pctldev, + return 0; + } + ++static int amd_gpio_set_config(struct gpio_chip *gc, unsigned int pin, ++ unsigned long config) ++{ ++ struct amd_gpio *gpio_dev = gpiochip_get_data(gc); ++ ++ if (pinconf_to_config_param(config) == PIN_CONFIG_INPUT_DEBOUNCE) { ++ u32 debounce = pinconf_to_config_argument(config); ++ ++ return amd_gpio_set_debounce(gc, pin, debounce); ++ } ++ ++ return amd_pinconf_set(gpio_dev->pctrl, pin, &config, 1); ++} ++ + static const struct pinconf_ops amd_pinconf_ops = { + .pin_config_get = amd_pinconf_get, + .pin_config_set = amd_pinconf_set, +-- +2.39.2 + diff --git a/queue-5.4/posix-timers-ensure-timer-id-search-loop-limit-is-va.patch b/queue-5.4/posix-timers-ensure-timer-id-search-loop-limit-is-va.patch new file mode 100644 index 00000000000..f96f0e79c9d --- /dev/null +++ b/queue-5.4/posix-timers-ensure-timer-id-search-loop-limit-is-va.patch @@ -0,0 +1,115 @@ +From 4a8a548125e5818c03f7f74d4ff93ea8cb7dd43c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 1 Jun 2023 20:58:47 +0200 +Subject: posix-timers: Ensure timer ID search-loop limit is valid + +From: Thomas Gleixner + +[ Upstream commit 8ce8849dd1e78dadcee0ec9acbd259d239b7069f ] + +posix_timer_add() tries to allocate a posix timer ID by starting from the +cached ID which was stored by the last successful allocation. + +This is done in a loop searching the ID space for a free slot one by +one. The loop has to terminate when the search wrapped around to the +starting point. + +But that's racy vs. establishing the starting point. That is read out +lockless, which leads to the following problem: + +CPU0 CPU1 +posix_timer_add() + start = sig->posix_timer_id; + lock(hash_lock); + ... posix_timer_add() + if (++sig->posix_timer_id < 0) + start = sig->posix_timer_id; + sig->posix_timer_id = 0; + +So CPU1 can observe a negative start value, i.e. -1, and the loop break +never happens because the condition can never be true: + + if (sig->posix_timer_id == start) + break; + +While this is unlikely to ever turn into an endless loop as the ID space is +huge (INT_MAX), the racy read of the start value caught the attention of +KCSAN and Dmitry unearthed that incorrectness. + +Rewrite it so that all id operations are under the hash lock. + +Reported-by: syzbot+5c54bd3eb218bb595aa9@syzkaller.appspotmail.com +Reported-by: Dmitry Vyukov +Signed-off-by: Thomas Gleixner +Reviewed-by: Frederic Weisbecker +Link: https://lore.kernel.org/r/87bkhzdn6g.ffs@tglx +Signed-off-by: Sasha Levin +--- + include/linux/sched/signal.h | 2 +- + kernel/time/posix-timers.c | 31 ++++++++++++++++++------------- + 2 files changed, 19 insertions(+), 14 deletions(-) + +diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h +index b3f88470cbb58..2f355c3c0d15f 100644 +--- a/include/linux/sched/signal.h ++++ b/include/linux/sched/signal.h +@@ -123,7 +123,7 @@ struct signal_struct { + #ifdef CONFIG_POSIX_TIMERS + + /* POSIX.1b Interval Timers */ +- int posix_timer_id; ++ unsigned int next_posix_timer_id; + struct list_head posix_timers; + + /* ITIMER_REAL timer for the process */ +diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c +index efe3873021a37..f3b8313475acd 100644 +--- a/kernel/time/posix-timers.c ++++ b/kernel/time/posix-timers.c +@@ -138,25 +138,30 @@ static struct k_itimer *posix_timer_by_id(timer_t id) + static int posix_timer_add(struct k_itimer *timer) + { + struct signal_struct *sig = current->signal; +- int first_free_id = sig->posix_timer_id; + struct hlist_head *head; +- int ret = -ENOENT; ++ unsigned int cnt, id; + +- do { ++ /* ++ * FIXME: Replace this by a per signal struct xarray once there is ++ * a plan to handle the resulting CRIU regression gracefully. ++ */ ++ for (cnt = 0; cnt <= INT_MAX; cnt++) { + spin_lock(&hash_lock); +- head = &posix_timers_hashtable[hash(sig, sig->posix_timer_id)]; +- if (!__posix_timers_find(head, sig, sig->posix_timer_id)) { ++ id = sig->next_posix_timer_id; ++ ++ /* Write the next ID back. Clamp it to the positive space */ ++ sig->next_posix_timer_id = (id + 1) & INT_MAX; ++ ++ head = &posix_timers_hashtable[hash(sig, id)]; ++ if (!__posix_timers_find(head, sig, id)) { + hlist_add_head_rcu(&timer->t_hash, head); +- ret = sig->posix_timer_id; ++ spin_unlock(&hash_lock); ++ return id; + } +- if (++sig->posix_timer_id < 0) +- sig->posix_timer_id = 0; +- if ((sig->posix_timer_id == first_free_id) && (ret == -ENOENT)) +- /* Loop over all possible ids completed */ +- ret = -EAGAIN; + spin_unlock(&hash_lock); +- } while (ret == -ENOENT); +- return ret; ++ } ++ /* POSIX return code when no timer ID could be allocated */ ++ return -EAGAIN; + } + + static inline void unlock_timer(struct k_itimer *timr, unsigned long flags) +-- +2.39.2 + diff --git a/queue-5.4/revert-tcp-avoid-the-lookup-process-failing-to-get-s.patch b/queue-5.4/revert-tcp-avoid-the-lookup-process-failing-to-get-s.patch new file mode 100644 index 00000000000..02715a1a0c1 --- /dev/null +++ b/queue-5.4/revert-tcp-avoid-the-lookup-process-failing-to-get-s.patch @@ -0,0 +1,113 @@ +From a53cef2f4c210eba249992848f53ba4b0cbfe8be Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 17 Jul 2023 14:59:18 -0700 +Subject: Revert "tcp: avoid the lookup process failing to get sk in ehash + table" + +From: Kuniyuki Iwashima + +[ Upstream commit 81b3ade5d2b98ad6e0a473b0e1e420a801275592 ] + +This reverts commit 3f4ca5fafc08881d7a57daa20449d171f2887043. + +Commit 3f4ca5fafc08 ("tcp: avoid the lookup process failing to get sk in +ehash table") reversed the order in how a socket is inserted into ehash +to fix an issue that ehash-lookup could fail when reqsk/full sk/twsk are +swapped. However, it introduced another lookup failure. + +The full socket in ehash is allocated from a slab with SLAB_TYPESAFE_BY_RCU +and does not have SOCK_RCU_FREE, so the socket could be reused even while +it is being referenced on another CPU doing RCU lookup. + +Let's say a socket is reused and inserted into the same hash bucket during +lookup. After the blamed commit, a new socket is inserted at the end of +the list. If that happens, we will skip sockets placed after the previous +position of the reused socket, resulting in ehash lookup failure. + +As described in Documentation/RCU/rculist_nulls.rst, we should insert a +new socket at the head of the list to avoid such an issue. + +This issue, the swap-lookup-failure, and another variant reported in [0] +can all be handled properly by adding a locked ehash lookup suggested by +Eric Dumazet [1]. + +However, this issue could occur for every packet, thus more likely than +the other two races, so let's revert the change for now. + +Link: https://lore.kernel.org/netdev/20230606064306.9192-1-duanmuquan@baidu.com/ [0] +Link: https://lore.kernel.org/netdev/CANn89iK8snOz8TYOhhwfimC7ykYA78GA3Nyv8x06SZYa1nKdyA@mail.gmail.com/ [1] +Fixes: 3f4ca5fafc08 ("tcp: avoid the lookup process failing to get sk in ehash table") +Signed-off-by: Kuniyuki Iwashima +Link: https://lore.kernel.org/r/20230717215918.15723-1-kuniyu@amazon.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/ipv4/inet_hashtables.c | 17 ++--------------- + net/ipv4/inet_timewait_sock.c | 8 ++++---- + 2 files changed, 6 insertions(+), 19 deletions(-) + +diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c +index 9d14b3289f003..e4f2790fd6410 100644 +--- a/net/ipv4/inet_hashtables.c ++++ b/net/ipv4/inet_hashtables.c +@@ -536,20 +536,8 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk) + spin_lock(lock); + if (osk) { + WARN_ON_ONCE(sk->sk_hash != osk->sk_hash); +- ret = sk_hashed(osk); +- if (ret) { +- /* Before deleting the node, we insert a new one to make +- * sure that the look-up-sk process would not miss either +- * of them and that at least one node would exist in ehash +- * table all the time. Otherwise there's a tiny chance +- * that lookup process could find nothing in ehash table. +- */ +- __sk_nulls_add_node_tail_rcu(sk, list); +- sk_nulls_del_node_init_rcu(osk); +- } +- goto unlock; +- } +- if (found_dup_sk) { ++ ret = sk_nulls_del_node_init_rcu(osk); ++ } else if (found_dup_sk) { + *found_dup_sk = inet_ehash_lookup_by_sk(sk, list); + if (*found_dup_sk) + ret = false; +@@ -558,7 +546,6 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk) + if (ret) + __sk_nulls_add_node_rcu(sk, list); + +-unlock: + spin_unlock(lock); + + return ret; +diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c +index a00102d7c7fd4..c411c87ae865f 100644 +--- a/net/ipv4/inet_timewait_sock.c ++++ b/net/ipv4/inet_timewait_sock.c +@@ -81,10 +81,10 @@ void inet_twsk_put(struct inet_timewait_sock *tw) + } + EXPORT_SYMBOL_GPL(inet_twsk_put); + +-static void inet_twsk_add_node_tail_rcu(struct inet_timewait_sock *tw, +- struct hlist_nulls_head *list) ++static void inet_twsk_add_node_rcu(struct inet_timewait_sock *tw, ++ struct hlist_nulls_head *list) + { +- hlist_nulls_add_tail_rcu(&tw->tw_node, list); ++ hlist_nulls_add_head_rcu(&tw->tw_node, list); + } + + static void inet_twsk_add_bind_node(struct inet_timewait_sock *tw, +@@ -120,7 +120,7 @@ void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, + + spin_lock(lock); + +- inet_twsk_add_node_tail_rcu(tw, &ehead->chain); ++ inet_twsk_add_node_rcu(tw, &ehead->chain); + + /* Step 3: Remove SK from hash chain */ + if (__sk_nulls_del_node_init_rcu(sk)) +-- +2.39.2 + diff --git a/queue-5.4/sched-fair-don-t-balance-task-to-its-current-running.patch b/queue-5.4/sched-fair-don-t-balance-task-to-its-current-running.patch new file mode 100644 index 00000000000..a7976f25fdc --- /dev/null +++ b/queue-5.4/sched-fair-don-t-balance-task-to-its-current-running.patch @@ -0,0 +1,96 @@ +From 6d50b7798c7af9c0a47321925555356fa38aa6e6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 30 May 2023 16:25:07 +0800 +Subject: sched/fair: Don't balance task to its current running CPU + +From: Yicong Yang + +[ Upstream commit 0dd37d6dd33a9c23351e6115ae8cdac7863bc7de ] + +We've run into the case that the balancer tries to balance a migration +disabled task and trigger the warning in set_task_cpu() like below: + + ------------[ cut here ]------------ + WARNING: CPU: 7 PID: 0 at kernel/sched/core.c:3115 set_task_cpu+0x188/0x240 + Modules linked in: hclgevf xt_CHECKSUM ipt_REJECT nf_reject_ipv4 <...snip> + CPU: 7 PID: 0 Comm: swapper/7 Kdump: loaded Tainted: G O 6.1.0-rc4+ #1 + Hardware name: Huawei TaiShan 2280 V2/BC82AMDC, BIOS 2280-V2 CS V5.B221.01 12/09/2021 + pstate: 604000c9 (nZCv daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--) + pc : set_task_cpu+0x188/0x240 + lr : load_balance+0x5d0/0xc60 + sp : ffff80000803bc70 + x29: ffff80000803bc70 x28: ffff004089e190e8 x27: ffff004089e19040 + x26: ffff007effcabc38 x25: 0000000000000000 x24: 0000000000000001 + x23: ffff80000803be84 x22: 000000000000000c x21: ffffb093e79e2a78 + x20: 000000000000000c x19: ffff004089e19040 x18: 0000000000000000 + x17: 0000000000001fad x16: 0000000000000030 x15: 0000000000000000 + x14: 0000000000000003 x13: 0000000000000000 x12: 0000000000000000 + x11: 0000000000000001 x10: 0000000000000400 x9 : ffffb093e4cee530 + x8 : 00000000fffffffe x7 : 0000000000ce168a x6 : 000000000000013e + x5 : 00000000ffffffe1 x4 : 0000000000000001 x3 : 0000000000000b2a + x2 : 0000000000000b2a x1 : ffffb093e6d6c510 x0 : 0000000000000001 + Call trace: + set_task_cpu+0x188/0x240 + load_balance+0x5d0/0xc60 + rebalance_domains+0x26c/0x380 + _nohz_idle_balance.isra.0+0x1e0/0x370 + run_rebalance_domains+0x6c/0x80 + __do_softirq+0x128/0x3d8 + ____do_softirq+0x18/0x24 + call_on_irq_stack+0x2c/0x38 + do_softirq_own_stack+0x24/0x3c + __irq_exit_rcu+0xcc/0xf4 + irq_exit_rcu+0x18/0x24 + el1_interrupt+0x4c/0xe4 + el1h_64_irq_handler+0x18/0x2c + el1h_64_irq+0x74/0x78 + arch_cpu_idle+0x18/0x4c + default_idle_call+0x58/0x194 + do_idle+0x244/0x2b0 + cpu_startup_entry+0x30/0x3c + secondary_start_kernel+0x14c/0x190 + __secondary_switched+0xb0/0xb4 + ---[ end trace 0000000000000000 ]--- + +Further investigation shows that the warning is superfluous, the migration +disabled task is just going to be migrated to its current running CPU. +This is because that on load balance if the dst_cpu is not allowed by the +task, we'll re-select a new_dst_cpu as a candidate. If no task can be +balanced to dst_cpu we'll try to balance the task to the new_dst_cpu +instead. In this case when the migration disabled task is not on CPU it +only allows to run on its current CPU, load balance will select its +current CPU as new_dst_cpu and later triggers the warning above. + +The new_dst_cpu is chosen from the env->dst_grpmask. Currently it +contains CPUs in sched_group_span() and if we have overlapped groups it's +possible to run into this case. This patch makes env->dst_grpmask of +group_balance_mask() which exclude any CPUs from the busiest group and +solve the issue. For balancing in a domain with no overlapped groups +the behaviour keeps same as before. + +Suggested-by: Vincent Guittot +Signed-off-by: Yicong Yang +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Vincent Guittot +Link: https://lore.kernel.org/r/20230530082507.10444-1-yangyicong@huawei.com +Signed-off-by: Sasha Levin +--- + kernel/sched/fair.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index 9fcba0d2ab19b..2680216234ff2 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -8938,7 +8938,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, + .sd = sd, + .dst_cpu = this_cpu, + .dst_rq = this_rq, +- .dst_grpmask = sched_group_span(sd->groups), ++ .dst_grpmask = group_balance_mask(sd->groups), + .idle = idle, + .loop_break = sched_nr_migrate_break, + .cpus = cpus, +-- +2.39.2 + diff --git a/queue-5.4/series b/queue-5.4/series index 4f6a5497643..b330796c44d 100644 --- a/queue-5.4/series +++ b/queue-5.4/series @@ -282,3 +282,32 @@ can-bcm-fix-uaf-in-bcm_proc_show.patch drm-client-fix-memory-leak-in-drm_client_target_cloned.patch drm-client-fix-memory-leak-in-drm_client_modeset_probe.patch ext4-correct-inline-offset-when-handling-xattrs-in-inode-body.patch +debugobjects-recheck-debug_objects_enabled-before-re.patch +nbd-add-the-maximum-limit-of-allocated-index-in-nbd_.patch +md-fix-data-corruption-for-raid456-when-reshape-rest.patch +md-raid10-prevent-soft-lockup-while-flush-writes.patch +posix-timers-ensure-timer-id-search-loop-limit-is-va.patch +arm64-mm-fix-va-range-sanity-check.patch +sched-fair-don-t-balance-task-to-its-current-running.patch +bpf-address-kcsan-report-on-bpf_lru_list.patch +devlink-report-devlink_port_type_warn-source-device.patch +wifi-wext-core-fix-wstringop-overflow-warning-in-ioc.patch +wifi-iwlwifi-mvm-avoid-baid-size-integer-overflow.patch +igb-fix-igb_down-hung-on-surprise-removal.patch +spi-bcm63xx-fix-max-prepend-length.patch +fbdev-imxfb-warn-about-invalid-left-right-margin.patch +pinctrl-amd-use-amd_pinconf_set-for-all-config-optio.patch +net-ethernet-ti-cpsw_ale-fix-cpsw_ale_get_field-cpsw.patch +iavf-fix-use-after-free-in-free_netdev.patch +net-ipv6-check-return-value-of-pskb_trim.patch +revert-tcp-avoid-the-lookup-process-failing-to-get-s.patch +fbdev-au1200fb-fix-missing-irq-check-in-au1200fb_drv.patch +llc-don-t-drop-packet-from-non-root-netns.patch +netfilter-nf_tables-fix-spurious-set-element-inserti.patch +netfilter-nf_tables-can-t-schedule-in-nft_chain_vali.patch +tcp-annotate-data-races-around-tp-tcp_tx_delay.patch +net-replace-the-limit-of-tcp_linger2-with-tcp_fin_ti.patch +tcp-annotate-data-races-around-tp-linger2.patch +tcp-annotate-data-races-around-rskq_defer_accept.patch +tcp-annotate-data-races-around-tp-notsent_lowat.patch +tcp-annotate-data-races-around-fastopenq.max_qlen.patch diff --git a/queue-5.4/spi-bcm63xx-fix-max-prepend-length.patch b/queue-5.4/spi-bcm63xx-fix-max-prepend-length.patch new file mode 100644 index 00000000000..f384b46166b --- /dev/null +++ b/queue-5.4/spi-bcm63xx-fix-max-prepend-length.patch @@ -0,0 +1,47 @@ +From 6545437f3ac3092bddedf85b05718088e4af7b13 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 29 Jun 2023 09:14:52 +0200 +Subject: spi: bcm63xx: fix max prepend length + +From: Jonas Gorski + +[ Upstream commit 5158814cbb37bbb38344b3ecddc24ba2ed0365f2 ] + +The command word is defined as following: + + /* Command */ + #define SPI_CMD_COMMAND_SHIFT 0 + #define SPI_CMD_DEVICE_ID_SHIFT 4 + #define SPI_CMD_PREPEND_BYTE_CNT_SHIFT 8 + #define SPI_CMD_ONE_BYTE_SHIFT 11 + #define SPI_CMD_ONE_WIRE_SHIFT 12 + +If the prepend byte count field starts at bit 8, and the next defined +bit is SPI_CMD_ONE_BYTE at bit 11, it can be at most 3 bits wide, and +thus the max value is 7, not 15. + +Fixes: b17de076062a ("spi/bcm63xx: work around inability to keep CS up") +Signed-off-by: Jonas Gorski +Link: https://lore.kernel.org/r/20230629071453.62024-1-jonas.gorski@gmail.com +Signed-off-by: Mark Brown +Signed-off-by: Sasha Levin +--- + drivers/spi/spi-bcm63xx.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/spi/spi-bcm63xx.c b/drivers/spi/spi-bcm63xx.c +index fdd7eaa0b8ede..ff27596168732 100644 +--- a/drivers/spi/spi-bcm63xx.c ++++ b/drivers/spi/spi-bcm63xx.c +@@ -125,7 +125,7 @@ enum bcm63xx_regs_spi { + SPI_MSG_DATA_SIZE, + }; + +-#define BCM63XX_SPI_MAX_PREPEND 15 ++#define BCM63XX_SPI_MAX_PREPEND 7 + + #define BCM63XX_SPI_MAX_CS 8 + #define BCM63XX_SPI_BUS_NUM 0 +-- +2.39.2 + diff --git a/queue-5.4/tcp-annotate-data-races-around-fastopenq.max_qlen.patch b/queue-5.4/tcp-annotate-data-races-around-fastopenq.max_qlen.patch new file mode 100644 index 00000000000..9e8382c208c --- /dev/null +++ b/queue-5.4/tcp-annotate-data-races-around-fastopenq.max_qlen.patch @@ -0,0 +1,77 @@ +From 5064c367a8407c3937794c8a11c980beb080c348 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 19 Jul 2023 21:28:57 +0000 +Subject: tcp: annotate data-races around fastopenq.max_qlen + +From: Eric Dumazet + +[ Upstream commit 70f360dd7042cb843635ece9d28335a4addff9eb ] + +This field can be read locklessly. + +Fixes: 1536e2857bd3 ("tcp: Add a TCP_FASTOPEN socket option to get a max backlog on its listner") +Signed-off-by: Eric Dumazet +Link: https://lore.kernel.org/r/20230719212857.3943972-12-edumazet@google.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + include/linux/tcp.h | 2 +- + net/ipv4/tcp.c | 2 +- + net/ipv4/tcp_fastopen.c | 6 ++++-- + 3 files changed, 6 insertions(+), 4 deletions(-) + +diff --git a/include/linux/tcp.h b/include/linux/tcp.h +index 89751c89f11f4..68dacc1994376 100644 +--- a/include/linux/tcp.h ++++ b/include/linux/tcp.h +@@ -458,7 +458,7 @@ static inline void fastopen_queue_tune(struct sock *sk, int backlog) + struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; + int somaxconn = READ_ONCE(sock_net(sk)->core.sysctl_somaxconn); + +- queue->fastopenq.max_qlen = min_t(unsigned int, backlog, somaxconn); ++ WRITE_ONCE(queue->fastopenq.max_qlen, min_t(unsigned int, backlog, somaxconn)); + } + + static inline void tcp_move_syn(struct tcp_sock *tp, +diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c +index c980d18d99094..647cb664c2ad0 100644 +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -3623,7 +3623,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level, + break; + + case TCP_FASTOPEN: +- val = icsk->icsk_accept_queue.fastopenq.max_qlen; ++ val = READ_ONCE(icsk->icsk_accept_queue.fastopenq.max_qlen); + break; + + case TCP_FASTOPEN_CONNECT: +diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c +index 21705b2ddaffa..35088cd30840d 100644 +--- a/net/ipv4/tcp_fastopen.c ++++ b/net/ipv4/tcp_fastopen.c +@@ -312,6 +312,7 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk, + static bool tcp_fastopen_queue_check(struct sock *sk) + { + struct fastopen_queue *fastopenq; ++ int max_qlen; + + /* Make sure the listener has enabled fastopen, and we don't + * exceed the max # of pending TFO requests allowed before trying +@@ -324,10 +325,11 @@ static bool tcp_fastopen_queue_check(struct sock *sk) + * temporarily vs a server not supporting Fast Open at all. + */ + fastopenq = &inet_csk(sk)->icsk_accept_queue.fastopenq; +- if (fastopenq->max_qlen == 0) ++ max_qlen = READ_ONCE(fastopenq->max_qlen); ++ if (max_qlen == 0) + return false; + +- if (fastopenq->qlen >= fastopenq->max_qlen) { ++ if (fastopenq->qlen >= max_qlen) { + struct request_sock *req1; + spin_lock(&fastopenq->lock); + req1 = fastopenq->rskq_rst_head; +-- +2.39.2 + diff --git a/queue-5.4/tcp-annotate-data-races-around-rskq_defer_accept.patch b/queue-5.4/tcp-annotate-data-races-around-rskq_defer_accept.patch new file mode 100644 index 00000000000..a9db98d0bbc --- /dev/null +++ b/queue-5.4/tcp-annotate-data-races-around-rskq_defer_accept.patch @@ -0,0 +1,53 @@ +From 2cb6b6537610c3fffde9a74654ad89cc87671b53 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 19 Jul 2023 21:28:54 +0000 +Subject: tcp: annotate data-races around rskq_defer_accept + +From: Eric Dumazet + +[ Upstream commit ae488c74422fb1dcd807c0201804b3b5e8a322a3 ] + +do_tcp_getsockopt() reads rskq_defer_accept while another cpu +might change its value. + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Signed-off-by: Eric Dumazet +Link: https://lore.kernel.org/r/20230719212857.3943972-9-edumazet@google.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c +index b9475fcaa6c4f..eb70c1b866d0f 100644 +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -3075,9 +3075,9 @@ static int do_tcp_setsockopt(struct sock *sk, int level, + + case TCP_DEFER_ACCEPT: + /* Translate value in seconds to number of retransmits */ +- icsk->icsk_accept_queue.rskq_defer_accept = +- secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ, +- TCP_RTO_MAX / HZ); ++ WRITE_ONCE(icsk->icsk_accept_queue.rskq_defer_accept, ++ secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ, ++ TCP_RTO_MAX / HZ)); + break; + + case TCP_WINDOW_CLAMP: +@@ -3481,8 +3481,9 @@ static int do_tcp_getsockopt(struct sock *sk, int level, + val = (val ? : READ_ONCE(net->ipv4.sysctl_tcp_fin_timeout)) / HZ; + break; + case TCP_DEFER_ACCEPT: +- val = retrans_to_secs(icsk->icsk_accept_queue.rskq_defer_accept, +- TCP_TIMEOUT_INIT / HZ, TCP_RTO_MAX / HZ); ++ val = READ_ONCE(icsk->icsk_accept_queue.rskq_defer_accept); ++ val = retrans_to_secs(val, TCP_TIMEOUT_INIT / HZ, ++ TCP_RTO_MAX / HZ); + break; + case TCP_WINDOW_CLAMP: + val = tp->window_clamp; +-- +2.39.2 + diff --git a/queue-5.4/tcp-annotate-data-races-around-tp-linger2.patch b/queue-5.4/tcp-annotate-data-races-around-tp-linger2.patch new file mode 100644 index 00000000000..be6c0993833 --- /dev/null +++ b/queue-5.4/tcp-annotate-data-races-around-tp-linger2.patch @@ -0,0 +1,52 @@ +From 5227a8526cd7b418601ee6b5cf385be0030c4937 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 19 Jul 2023 21:28:53 +0000 +Subject: tcp: annotate data-races around tp->linger2 + +From: Eric Dumazet + +[ Upstream commit 9df5335ca974e688389c875546e5819778a80d59 ] + +do_tcp_getsockopt() reads tp->linger2 while another cpu +might change its value. + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Signed-off-by: Eric Dumazet +Link: https://lore.kernel.org/r/20230719212857.3943972-8-edumazet@google.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c +index c6c73b9407098..b9475fcaa6c4f 100644 +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -3066,11 +3066,11 @@ static int do_tcp_setsockopt(struct sock *sk, int level, + + case TCP_LINGER2: + if (val < 0) +- tp->linger2 = -1; ++ WRITE_ONCE(tp->linger2, -1); + else if (val > TCP_FIN_TIMEOUT_MAX / HZ) +- tp->linger2 = TCP_FIN_TIMEOUT_MAX; ++ WRITE_ONCE(tp->linger2, TCP_FIN_TIMEOUT_MAX); + else +- tp->linger2 = val * HZ; ++ WRITE_ONCE(tp->linger2, val * HZ); + break; + + case TCP_DEFER_ACCEPT: +@@ -3476,7 +3476,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level, + val = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries; + break; + case TCP_LINGER2: +- val = tp->linger2; ++ val = READ_ONCE(tp->linger2); + if (val >= 0) + val = (val ? : READ_ONCE(net->ipv4.sysctl_tcp_fin_timeout)) / HZ; + break; +-- +2.39.2 + diff --git a/queue-5.4/tcp-annotate-data-races-around-tp-notsent_lowat.patch b/queue-5.4/tcp-annotate-data-races-around-tp-notsent_lowat.patch new file mode 100644 index 00000000000..5b77ab9c4f4 --- /dev/null +++ b/queue-5.4/tcp-annotate-data-races-around-tp-notsent_lowat.patch @@ -0,0 +1,64 @@ +From 3537464fefdcffe80ddaabe1713aab6c192844dd Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 19 Jul 2023 21:28:55 +0000 +Subject: tcp: annotate data-races around tp->notsent_lowat + +From: Eric Dumazet + +[ Upstream commit 1aeb87bc1440c5447a7fa2d6e3c2cca52cbd206b ] + +tp->notsent_lowat can be read locklessly from do_tcp_getsockopt() +and tcp_poll(). + +Fixes: c9bee3b7fdec ("tcp: TCP_NOTSENT_LOWAT socket option") +Signed-off-by: Eric Dumazet +Link: https://lore.kernel.org/r/20230719212857.3943972-10-edumazet@google.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + include/net/tcp.h | 6 +++++- + net/ipv4/tcp.c | 4 ++-- + 2 files changed, 7 insertions(+), 3 deletions(-) + +diff --git a/include/net/tcp.h b/include/net/tcp.h +index 2f456bed33ec3..4e909148fce39 100644 +--- a/include/net/tcp.h ++++ b/include/net/tcp.h +@@ -1953,7 +1953,11 @@ void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr); + static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp) + { + struct net *net = sock_net((struct sock *)tp); +- return tp->notsent_lowat ?: READ_ONCE(net->ipv4.sysctl_tcp_notsent_lowat); ++ u32 val; ++ ++ val = READ_ONCE(tp->notsent_lowat); ++ ++ return val ?: READ_ONCE(net->ipv4.sysctl_tcp_notsent_lowat); + } + + /* @wake is one when sk_stream_write_space() calls us. +diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c +index eb70c1b866d0f..c980d18d99094 100644 +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -3165,7 +3165,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, + err = tcp_repair_set_window(tp, optval, optlen); + break; + case TCP_NOTSENT_LOWAT: +- tp->notsent_lowat = val; ++ WRITE_ONCE(tp->notsent_lowat, val); + sk->sk_write_space(sk); + break; + case TCP_INQ: +@@ -3642,7 +3642,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level, + val = tcp_time_stamp_raw() + tp->tsoffset; + break; + case TCP_NOTSENT_LOWAT: +- val = tp->notsent_lowat; ++ val = READ_ONCE(tp->notsent_lowat); + break; + case TCP_INQ: + val = tp->recvmsg_inq; +-- +2.39.2 + diff --git a/queue-5.4/tcp-annotate-data-races-around-tp-tcp_tx_delay.patch b/queue-5.4/tcp-annotate-data-races-around-tp-tcp_tx_delay.patch new file mode 100644 index 00000000000..f445a0dc8d6 --- /dev/null +++ b/queue-5.4/tcp-annotate-data-races-around-tp-tcp_tx_delay.patch @@ -0,0 +1,46 @@ +From 26a4b494c6e89c44e7b5897579e5dced37127a5b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 19 Jul 2023 21:28:47 +0000 +Subject: tcp: annotate data-races around tp->tcp_tx_delay + +From: Eric Dumazet + +[ Upstream commit 348b81b68b13ebd489a3e6a46aa1c384c731c919 ] + +do_tcp_getsockopt() reads tp->tcp_tx_delay while another cpu +might change its value. + +Fixes: a842fe1425cb ("tcp: add optional per socket transmit delay") +Signed-off-by: Eric Dumazet +Link: https://lore.kernel.org/r/20230719212857.3943972-2-edumazet@google.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c +index fdf2ddc4864df..e33abcff56080 100644 +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -3177,7 +3177,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, + case TCP_TX_DELAY: + if (val) + tcp_enable_tx_delay(); +- tp->tcp_tx_delay = val; ++ WRITE_ONCE(tp->tcp_tx_delay, val); + break; + default: + err = -ENOPROTOOPT; +@@ -3634,7 +3634,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level, + break; + + case TCP_TX_DELAY: +- val = tp->tcp_tx_delay; ++ val = READ_ONCE(tp->tcp_tx_delay); + break; + + case TCP_TIMESTAMP: +-- +2.39.2 + diff --git a/queue-5.4/wifi-iwlwifi-mvm-avoid-baid-size-integer-overflow.patch b/queue-5.4/wifi-iwlwifi-mvm-avoid-baid-size-integer-overflow.patch new file mode 100644 index 00000000000..4f154c0950b --- /dev/null +++ b/queue-5.4/wifi-iwlwifi-mvm-avoid-baid-size-integer-overflow.patch @@ -0,0 +1,47 @@ +From d9c11a2569b0a90e134d2e38e12c135e5abd3856 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 20 Jun 2023 13:04:02 +0300 +Subject: wifi: iwlwifi: mvm: avoid baid size integer overflow + +From: Johannes Berg + +[ Upstream commit 1a528ab1da324d078ec60283c34c17848580df24 ] + +Roee reported various hard-to-debug crashes with pings in +EHT aggregation scenarios. Enabling KASAN showed that we +access the BAID allocation out of bounds, and looking at +the code a bit shows that since the reorder buffer entry +(struct iwl_mvm_reorder_buf_entry) is 128 bytes if debug +such as lockdep is enabled, then staring from an agg size +512 we overflow the size calculation, and allocate a much +smaller structure than we should, causing slab corruption +once we initialize this. + +Fix this by simply using u32 instead of u16. + +Reported-by: Roee Goldfiner +Signed-off-by: Johannes Berg +Signed-off-by: Gregory Greenman +Link: https://lore.kernel.org/r/20230620125813.f428c856030d.I2c2bb808e945adb71bc15f5b2bac2d8957ea90eb@changeid +Signed-off-by: Johannes Berg +Signed-off-by: Sasha Levin +--- + drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +index a3255100e3fee..7befb92b5159c 100644 +--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c ++++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +@@ -2557,7 +2557,7 @@ int iwl_mvm_sta_rx_agg(struct iwl_mvm *mvm, struct ieee80211_sta *sta, + } + + if (iwl_mvm_has_new_rx_api(mvm) && start) { +- u16 reorder_buf_size = buf_size * sizeof(baid_data->entries[0]); ++ u32 reorder_buf_size = buf_size * sizeof(baid_data->entries[0]); + + /* sparse doesn't like the __align() so don't check */ + #ifndef __CHECKER__ +-- +2.39.2 + diff --git a/queue-5.4/wifi-wext-core-fix-wstringop-overflow-warning-in-ioc.patch b/queue-5.4/wifi-wext-core-fix-wstringop-overflow-warning-in-ioc.patch new file mode 100644 index 00000000000..c3c97165380 --- /dev/null +++ b/queue-5.4/wifi-wext-core-fix-wstringop-overflow-warning-in-ioc.patch @@ -0,0 +1,71 @@ +From 993fa417304ccbc8f9e32804e8c193cccb79430c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 15 Jun 2023 12:04:07 -0600 +Subject: wifi: wext-core: Fix -Wstringop-overflow warning in + ioctl_standard_iw_point() + +From: Gustavo A. R. Silva + +[ Upstream commit 71e7552c90db2a2767f5c17c7ec72296b0d92061 ] + +-Wstringop-overflow is legitimately warning us about extra_size +pontentially being zero at some point, hence potenially ending +up _allocating_ zero bytes of memory for extra pointer and then +trying to access such object in a call to copy_from_user(). + +Fix this by adding a sanity check to ensure we never end up +trying to allocate zero bytes of data for extra pointer, before +continue executing the rest of the code in the function. + +Address the following -Wstringop-overflow warning seen when built +m68k architecture with allyesconfig configuration: + from net/wireless/wext-core.c:11: +In function '_copy_from_user', + inlined from 'copy_from_user' at include/linux/uaccess.h:183:7, + inlined from 'ioctl_standard_iw_point' at net/wireless/wext-core.c:825:7: +arch/m68k/include/asm/string.h:48:25: warning: '__builtin_memset' writing 1 or more bytes into a region of size 0 overflows the destination [-Wstringop-overflow=] + 48 | #define memset(d, c, n) __builtin_memset(d, c, n) + | ^~~~~~~~~~~~~~~~~~~~~~~~~ +include/linux/uaccess.h:153:17: note: in expansion of macro 'memset' + 153 | memset(to + (n - res), 0, res); + | ^~~~~~ +In function 'kmalloc', + inlined from 'kzalloc' at include/linux/slab.h:694:9, + inlined from 'ioctl_standard_iw_point' at net/wireless/wext-core.c:819:10: +include/linux/slab.h:577:16: note: at offset 1 into destination object of size 0 allocated by '__kmalloc' + 577 | return __kmalloc(size, flags); + | ^~~~~~~~~~~~~~~~~~~~~~ + +This help with the ongoing efforts to globally enable +-Wstringop-overflow. + +Link: https://github.com/KSPP/linux/issues/315 +Signed-off-by: Gustavo A. R. Silva +Reviewed-by: Simon Horman +Link: https://lore.kernel.org/r/ZItSlzvIpjdjNfd8@work +Signed-off-by: Johannes Berg +Signed-off-by: Sasha Levin +--- + net/wireless/wext-core.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c +index 76a80a41615be..a57f54bc0e1a7 100644 +--- a/net/wireless/wext-core.c ++++ b/net/wireless/wext-core.c +@@ -796,6 +796,12 @@ static int ioctl_standard_iw_point(struct iw_point *iwp, unsigned int cmd, + } + } + ++ /* Sanity-check to ensure we never end up _allocating_ zero ++ * bytes of data for extra. ++ */ ++ if (extra_size <= 0) ++ return -EFAULT; ++ + /* kzalloc() ensures NULL-termination for essid_compat. */ + extra = kzalloc(extra_size, GFP_KERNEL); + if (!extra) +-- +2.39.2 + -- 2.47.3