Fixes for 5.4

author Sasha Levin <sashal@kernel.org>

Mon, 24 Jul 2023 01:25:09 +0000 (21:25 -0400)

committer Sasha Levin <sashal@kernel.org>

Mon, 24 Jul 2023 01:25:09 +0000 (21:25 -0400)
author Sasha Levin <sashal@kernel.org>
Mon, 24 Jul 2023 01:25:09 +0000 (21:25 -0400)
committer Sasha Levin <sashal@kernel.org>
Mon, 24 Jul 2023 01:25:09 +0000 (21:25 -0400)
diff --git a/queue-5.4/arm64-mm-fix-va-range-sanity-check.patch b/queue-5.4/arm64-mm-fix-va-range-sanity-check.patch

new file mode 100644 (file)

index 0000000..f9104cf
--- /dev/null
+++ b/queue-5.4/arm64-mm-fix-va-range-sanity-check.patch
@@ -0,0 +1,106 @@
+From 8de8bcd7f0e1ec75648551ace9fedfc854315920 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 15 Jun 2023 11:26:28 +0100
+Subject: arm64: mm: fix VA-range sanity check
+
+From: Mark Rutland <mark.rutland@arm.com>
+
+[ Upstream commit ab9b4008092c86dc12497af155a0901cc1156999 ]
+
+Both create_mapping_noalloc() and update_mapping_prot() sanity-check
+their 'virt' parameter, but the check itself doesn't make much sense.
+The condition used today appears to be a historical accident.
+
+The sanity-check condition:
+
+       if ((virt >= PAGE_END) && (virt < VMALLOC_START)) {
+               [ ... warning here ... ]
+               return;
+       }
+
+... can only be true for the KASAN shadow region or the module region,
+and there's no reason to exclude these specifically for creating and
+updateing mappings.
+
+When arm64 support was first upstreamed in commit:
+
+  c1cc1552616d0f35 ("arm64: MMU initialisation")
+
+... the condition was:
+
+       if (virt < VMALLOC_START) {
+               [ ... warning here ... ]
+               return;
+       }
+
+At the time, VMALLOC_START was the lowest kernel address, and this was
+checking whether 'virt' would be translated via TTBR1.
+
+Subsequently in commit:
+
+  14c127c957c1c607 ("arm64: mm: Flip kernel VA space")
+
+... the condition was changed to:
+
+       if ((virt >= VA_START) && (virt < VMALLOC_START)) {
+               [ ... warning here ... ]
+               return;
+       }
+
+This appear to have been a thinko. The commit moved the linear map to
+the bottom of the kernel address space, with VMALLOC_START being at the
+halfway point. The old condition would warn for changes to the linear
+map below this, and at the time VA_START was the end of the linear map.
+
+Subsequently we cleaned up the naming of VA_START in commit:
+
+  77ad4ce69321abbe ("arm64: memory: rename VA_START to PAGE_END")
+
+... keeping the erroneous condition as:
+
+       if ((virt >= PAGE_END) && (virt < VMALLOC_START)) {
+               [ ... warning here ... ]
+               return;
+       }
+
+Correct the condition to check against the start of the TTBR1 address
+space, which is currently PAGE_OFFSET. This simplifies the logic, and
+more clearly matches the "outside kernel range" message in the warning.
+
+Signed-off-by: Mark Rutland <mark.rutland@arm.com>
+Cc: Russell King <linux@armlinux.org.uk>
+Cc: Steve Capper <steve.capper@arm.com>
+Cc: Will Deacon <will@kernel.org>
+Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
+Link: https://lore.kernel.org/r/20230615102628.1052103-1-mark.rutland@arm.com
+Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm64/mm/mmu.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
+index 5cf575f23af28..8e934bb44f12e 100644
+--- a/arch/arm64/mm/mmu.c
++++ b/arch/arm64/mm/mmu.c
+@@ -399,7 +399,7 @@ static phys_addr_t pgd_pgtable_alloc(int shift)
+ static void __init create_mapping_noalloc(phys_addr_t phys, unsigned long virt,
+                                 phys_addr_t size, pgprot_t prot)
+ {
+-      if ((virt >= PAGE_END) && (virt < VMALLOC_START)) {
++      if (virt < PAGE_OFFSET) {
+               pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n",
+                       &phys, virt);
+               return;
+@@ -426,7 +426,7 @@ void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
+ static void update_mapping_prot(phys_addr_t phys, unsigned long virt,
+                               phys_addr_t size, pgprot_t prot)
+ {
+-      if ((virt >= PAGE_END) && (virt < VMALLOC_START)) {
++      if (virt < PAGE_OFFSET) {
+               pr_warn("BUG: not updating mapping for %pa at 0x%016lx - outside kernel range\n",
+                       &phys, virt);
+               return;
+-- 
+2.39.2
+
diff --git a/queue-5.4/bpf-address-kcsan-report-on-bpf_lru_list.patch b/queue-5.4/bpf-address-kcsan-report-on-bpf_lru_list.patch

new file mode 100644 (file)

index 0000000..edfcd98
--- /dev/null
+++ b/queue-5.4/bpf-address-kcsan-report-on-bpf_lru_list.patch
@@ -0,0 +1,177 @@
+From 1e5428d5217a30952df66f845de9a533929933fd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 10 May 2023 21:37:48 -0700
+Subject: bpf: Address KCSAN report on bpf_lru_list
+
+From: Martin KaFai Lau <martin.lau@kernel.org>
+
+[ Upstream commit ee9fd0ac3017c4313be91a220a9ac4c99dde7ad4 ]
+
+KCSAN reported a data-race when accessing node->ref.
+Although node->ref does not have to be accurate,
+take this chance to use a more common READ_ONCE() and WRITE_ONCE()
+pattern instead of data_race().
+
+There is an existing bpf_lru_node_is_ref() and bpf_lru_node_set_ref().
+This patch also adds bpf_lru_node_clear_ref() to do the
+WRITE_ONCE(node->ref, 0) also.
+
+==================================================================
+BUG: KCSAN: data-race in __bpf_lru_list_rotate / __htab_lru_percpu_map_update_elem
+
+write to 0xffff888137038deb of 1 bytes by task 11240 on cpu 1:
+__bpf_lru_node_move kernel/bpf/bpf_lru_list.c:113 [inline]
+__bpf_lru_list_rotate_active kernel/bpf/bpf_lru_list.c:149 [inline]
+__bpf_lru_list_rotate+0x1bf/0x750 kernel/bpf/bpf_lru_list.c:240
+bpf_lru_list_pop_free_to_local kernel/bpf/bpf_lru_list.c:329 [inline]
+bpf_common_lru_pop_free kernel/bpf/bpf_lru_list.c:447 [inline]
+bpf_lru_pop_free+0x638/0xe20 kernel/bpf/bpf_lru_list.c:499
+prealloc_lru_pop kernel/bpf/hashtab.c:290 [inline]
+__htab_lru_percpu_map_update_elem+0xe7/0x820 kernel/bpf/hashtab.c:1316
+bpf_percpu_hash_update+0x5e/0x90 kernel/bpf/hashtab.c:2313
+bpf_map_update_value+0x2a9/0x370 kernel/bpf/syscall.c:200
+generic_map_update_batch+0x3ae/0x4f0 kernel/bpf/syscall.c:1687
+bpf_map_do_batch+0x2d9/0x3d0 kernel/bpf/syscall.c:4534
+__sys_bpf+0x338/0x810
+__do_sys_bpf kernel/bpf/syscall.c:5096 [inline]
+__se_sys_bpf kernel/bpf/syscall.c:5094 [inline]
+__x64_sys_bpf+0x43/0x50 kernel/bpf/syscall.c:5094
+do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
+entry_SYSCALL_64_after_hwframe+0x63/0xcd
+
+read to 0xffff888137038deb of 1 bytes by task 11241 on cpu 0:
+bpf_lru_node_set_ref kernel/bpf/bpf_lru_list.h:70 [inline]
+__htab_lru_percpu_map_update_elem+0x2f1/0x820 kernel/bpf/hashtab.c:1332
+bpf_percpu_hash_update+0x5e/0x90 kernel/bpf/hashtab.c:2313
+bpf_map_update_value+0x2a9/0x370 kernel/bpf/syscall.c:200
+generic_map_update_batch+0x3ae/0x4f0 kernel/bpf/syscall.c:1687
+bpf_map_do_batch+0x2d9/0x3d0 kernel/bpf/syscall.c:4534
+__sys_bpf+0x338/0x810
+__do_sys_bpf kernel/bpf/syscall.c:5096 [inline]
+__se_sys_bpf kernel/bpf/syscall.c:5094 [inline]
+__x64_sys_bpf+0x43/0x50 kernel/bpf/syscall.c:5094
+do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
+entry_SYSCALL_64_after_hwframe+0x63/0xcd
+
+value changed: 0x01 -> 0x00
+
+Reported by Kernel Concurrency Sanitizer on:
+CPU: 0 PID: 11241 Comm: syz-executor.3 Not tainted 6.3.0-rc7-syzkaller-00136-g6a66fdd29ea1 #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 03/30/2023
+==================================================================
+
+Reported-by: syzbot+ebe648a84e8784763f82@syzkaller.appspotmail.com
+Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
+Acked-by: Yonghong Song <yhs@fb.com>
+Link: https://lore.kernel.org/r/20230511043748.1384166-1-martin.lau@linux.dev
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/bpf/bpf_lru_list.c | 21 +++++++++++++--------
+ kernel/bpf/bpf_lru_list.h |  7 ++-----
+ 2 files changed, 15 insertions(+), 13 deletions(-)
+
+diff --git a/kernel/bpf/bpf_lru_list.c b/kernel/bpf/bpf_lru_list.c
+index d99e89f113c43..3dabdd137d102 100644
+--- a/kernel/bpf/bpf_lru_list.c
++++ b/kernel/bpf/bpf_lru_list.c
+@@ -41,7 +41,12 @@ static struct list_head *local_pending_list(struct bpf_lru_locallist *loc_l)
+ /* bpf_lru_node helpers */
+ static bool bpf_lru_node_is_ref(const struct bpf_lru_node *node)
+ {
+-      return node->ref;
++      return READ_ONCE(node->ref);
++}
++
++static void bpf_lru_node_clear_ref(struct bpf_lru_node *node)
++{
++      WRITE_ONCE(node->ref, 0);
+ }
+ 
+ static void bpf_lru_list_count_inc(struct bpf_lru_list *l,
+@@ -89,7 +94,7 @@ static void __bpf_lru_node_move_in(struct bpf_lru_list *l,
+ 
+       bpf_lru_list_count_inc(l, tgt_type);
+       node->type = tgt_type;
+-      node->ref = 0;
++      bpf_lru_node_clear_ref(node);
+       list_move(&node->list, &l->lists[tgt_type]);
+ }
+ 
+@@ -110,7 +115,7 @@ static void __bpf_lru_node_move(struct bpf_lru_list *l,
+               bpf_lru_list_count_inc(l, tgt_type);
+               node->type = tgt_type;
+       }
+-      node->ref = 0;
++      bpf_lru_node_clear_ref(node);
+ 
+       /* If the moving node is the next_inactive_rotation candidate,
+        * move the next_inactive_rotation pointer also.
+@@ -353,7 +358,7 @@ static void __local_list_add_pending(struct bpf_lru *lru,
+       *(u32 *)((void *)node + lru->hash_offset) = hash;
+       node->cpu = cpu;
+       node->type = BPF_LRU_LOCAL_LIST_T_PENDING;
+-      node->ref = 0;
++      bpf_lru_node_clear_ref(node);
+       list_add(&node->list, local_pending_list(loc_l));
+ }
+ 
+@@ -419,7 +424,7 @@ static struct bpf_lru_node *bpf_percpu_lru_pop_free(struct bpf_lru *lru,
+       if (!list_empty(free_list)) {
+               node = list_first_entry(free_list, struct bpf_lru_node, list);
+               *(u32 *)((void *)node + lru->hash_offset) = hash;
+-              node->ref = 0;
++              bpf_lru_node_clear_ref(node);
+               __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_INACTIVE);
+       }
+ 
+@@ -522,7 +527,7 @@ static void bpf_common_lru_push_free(struct bpf_lru *lru,
+               }
+ 
+               node->type = BPF_LRU_LOCAL_LIST_T_FREE;
+-              node->ref = 0;
++              bpf_lru_node_clear_ref(node);
+               list_move(&node->list, local_free_list(loc_l));
+ 
+               raw_spin_unlock_irqrestore(&loc_l->lock, flags);
+@@ -568,7 +573,7 @@ static void bpf_common_lru_populate(struct bpf_lru *lru, void *buf,
+ 
+               node = (struct bpf_lru_node *)(buf + node_offset);
+               node->type = BPF_LRU_LIST_T_FREE;
+-              node->ref = 0;
++              bpf_lru_node_clear_ref(node);
+               list_add(&node->list, &l->lists[BPF_LRU_LIST_T_FREE]);
+               buf += elem_size;
+       }
+@@ -594,7 +599,7 @@ static void bpf_percpu_lru_populate(struct bpf_lru *lru, void *buf,
+               node = (struct bpf_lru_node *)(buf + node_offset);
+               node->cpu = cpu;
+               node->type = BPF_LRU_LIST_T_FREE;
+-              node->ref = 0;
++              bpf_lru_node_clear_ref(node);
+               list_add(&node->list, &l->lists[BPF_LRU_LIST_T_FREE]);
+               i++;
+               buf += elem_size;
+diff --git a/kernel/bpf/bpf_lru_list.h b/kernel/bpf/bpf_lru_list.h
+index f02504640e185..41f8fea530c8d 100644
+--- a/kernel/bpf/bpf_lru_list.h
++++ b/kernel/bpf/bpf_lru_list.h
+@@ -63,11 +63,8 @@ struct bpf_lru {
+ 
+ static inline void bpf_lru_node_set_ref(struct bpf_lru_node *node)
+ {
+-      /* ref is an approximation on access frequency.  It does not
+-       * have to be very accurate.  Hence, no protection is used.
+-       */
+-      if (!node->ref)
+-              node->ref = 1;
++      if (!READ_ONCE(node->ref))
++              WRITE_ONCE(node->ref, 1);
+ }
+ 
+ int bpf_lru_init(struct bpf_lru *lru, bool percpu, u32 hash_offset,
+-- 
+2.39.2
+
diff --git a/queue-5.4/debugobjects-recheck-debug_objects_enabled-before-re.patch b/queue-5.4/debugobjects-recheck-debug_objects_enabled-before-re.patch

new file mode 100644 (file)

index 0000000..12a9a23
--- /dev/null
+++ b/queue-5.4/debugobjects-recheck-debug_objects_enabled-before-re.patch
@@ -0,0 +1,74 @@
+From 3f9b05ff93ea80f06045912ae0c882684155cdb0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 7 Jun 2023 19:19:02 +0900
+Subject: debugobjects: Recheck debug_objects_enabled before reporting
+
+From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+
+[ Upstream commit 8b64d420fe2450f82848178506d3e3a0bd195539 ]
+
+syzbot is reporting false a positive ODEBUG message immediately after
+ODEBUG was disabled due to OOM.
+
+  [ 1062.309646][T22911] ODEBUG: Out of memory. ODEBUG disabled
+  [ 1062.886755][ T5171] ------------[ cut here ]------------
+  [ 1062.892770][ T5171] ODEBUG: assert_init not available (active state 0) object: ffffc900056afb20 object type: timer_list hint: process_timeout+0x0/0x40
+
+  CPU 0 [ T5171]                CPU 1 [T22911]
+  --------------                --------------
+  debug_object_assert_init() {
+    if (!debug_objects_enabled)
+      return;
+    db = get_bucket(addr);
+                                lookup_object_or_alloc() {
+                                  debug_objects_enabled = 0;
+                                  return NULL;
+                                }
+                                debug_objects_oom() {
+                                  pr_warn("Out of memory. ODEBUG disabled\n");
+                                  // all buckets get emptied here, and
+                                }
+    lookup_object_or_alloc(addr, db, descr, false, true) {
+      // this bucket is already empty.
+      return ERR_PTR(-ENOENT);
+    }
+    // Emits false positive warning.
+    debug_print_object(&o, "assert_init");
+  }
+
+Recheck debug_object_enabled in debug_print_object() to avoid that.
+
+Reported-by: syzbot <syzbot+7937ba6a50bdd00fffdf@syzkaller.appspotmail.com>
+Suggested-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Link: https://lore.kernel.org/r/492fe2ae-5141-d548-ebd5-62f5fe2e57f7@I-love.SAKURA.ne.jp
+Closes: https://syzkaller.appspot.com/bug?extid=7937ba6a50bdd00fffdf
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ lib/debugobjects.c | 9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+diff --git a/lib/debugobjects.c b/lib/debugobjects.c
+index 26fa04335537b..b0e4301d74954 100644
+--- a/lib/debugobjects.c
++++ b/lib/debugobjects.c
+@@ -474,6 +474,15 @@ static void debug_print_object(struct debug_obj *obj, char *msg)
+       struct debug_obj_descr *descr = obj->descr;
+       static int limit;
+ 
++      /*
++       * Don't report if lookup_object_or_alloc() by the current thread
++       * failed because lookup_object_or_alloc()/debug_objects_oom() by a
++       * concurrent thread turned off debug_objects_enabled and cleared
++       * the hash buckets.
++       */
++      if (!debug_objects_enabled)
++              return;
++
+       if (limit < 5 && descr != descr_test) {
+               void *hint = descr->debug_hint ?
+                       descr->debug_hint(obj->object) : NULL;
+-- 
+2.39.2
+
diff --git a/queue-5.4/devlink-report-devlink_port_type_warn-source-device.patch b/queue-5.4/devlink-report-devlink_port_type_warn-source-device.patch

new file mode 100644 (file)

index 0000000..fa06ed6
--- /dev/null
+++ b/queue-5.4/devlink-report-devlink_port_type_warn-source-device.patch
@@ -0,0 +1,77 @@
+From 634c51780e8327ad19a3169eb0cae02bd9ae033d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 15 Jun 2023 11:54:47 +0200
+Subject: devlink: report devlink_port_type_warn source device
+
+From: Petr Oros <poros@redhat.com>
+
+[ Upstream commit a52305a81d6bb74b90b400dfa56455d37872fe4b ]
+
+devlink_port_type_warn is scheduled for port devlink and warning
+when the port type is not set. But from this warning it is not easy
+found out which device (driver) has no devlink port set.
+
+[ 3709.975552] Type was not set for devlink port.
+[ 3709.975579] WARNING: CPU: 1 PID: 13092 at net/devlink/leftover.c:6775 devlink_port_type_warn+0x11/0x20
+[ 3709.993967] Modules linked in: openvswitch nf_conncount nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 nfnetlink bluetooth rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace fscache netfs vhost_net vhost vhost_iotlb tap tun bridge stp llc qrtr intel_rapl_msr intel_rapl_common i10nm_edac nfit libnvdimm x86_pkg_temp_thermal mlx5_ib intel_powerclamp coretemp dell_wmi ledtrig_audio sparse_keymap ipmi_ssif kvm_intel ib_uverbs rfkill ib_core video kvm iTCO_wdt acpi_ipmi intel_vsec irqbypass ipmi_si iTCO_vendor_support dcdbas ipmi_devintf mei_me ipmi_msghandler rapl mei intel_cstate isst_if_mmio isst_if_mbox_pci dell_smbios intel_uncore isst_if_common i2c_i801 dell_wmi_descriptor wmi_bmof i2c_smbus intel_pch_thermal pcspkr acpi_power_meter xfs libcrc32c sd_mod sg nvme_tcp mgag200 i2c_algo_bit nvme_fabrics drm_shmem_helper drm_kms_helper nvme syscopyarea ahci sysfillrect sysimgblt nvme_core fb_sys_fops crct10dif_pclmul libahci mlx5_core sfc crc32_pclmul nvme_common drm
+[ 3709.994030]  crc32c_intel mtd t10_pi mlxfw libata tg3 mdio megaraid_sas psample ghash_clmulni_intel pci_hyperv_intf wmi dm_multipath sunrpc dm_mirror dm_region_hash dm_log dm_mod be2iscsi bnx2i cnic uio cxgb4i cxgb4 tls libcxgbi libcxgb qla4xxx iscsi_boot_sysfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi fuse
+[ 3710.108431] CPU: 1 PID: 13092 Comm: kworker/1:1 Kdump: loaded Not tainted 5.14.0-319.el9.x86_64 #1
+[ 3710.108435] Hardware name: Dell Inc. PowerEdge R750/0PJ80M, BIOS 1.8.2 09/14/2022
+[ 3710.108437] Workqueue: events devlink_port_type_warn
+[ 3710.108440] RIP: 0010:devlink_port_type_warn+0x11/0x20
+[ 3710.108443] Code: 84 76 fe ff ff 48 c7 03 20 0e 1a ad 31 c0 e9 96 fd ff ff 66 0f 1f 44 00 00 0f 1f 44 00 00 48 c7 c7 18 24 4e ad e8 ef 71 62 ff <0f> 0b c3 cc cc cc cc 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 f6 87
+[ 3710.108445] RSP: 0018:ff3b6d2e8b3c7e90 EFLAGS: 00010282
+[ 3710.108447] RAX: 0000000000000000 RBX: ff366d6580127080 RCX: 0000000000000027
+[ 3710.108448] RDX: 0000000000000027 RSI: 00000000ffff86de RDI: ff366d753f41f8c8
+[ 3710.108449] RBP: ff366d658ff5a0c0 R08: ff366d753f41f8c0 R09: ff3b6d2e8b3c7e18
+[ 3710.108450] R10: 0000000000000001 R11: 0000000000000023 R12: ff366d753f430600
+[ 3710.108451] R13: ff366d753f436900 R14: 0000000000000000 R15: ff366d753f436905
+[ 3710.108452] FS:  0000000000000000(0000) GS:ff366d753f400000(0000) knlGS:0000000000000000
+[ 3710.108453] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[ 3710.108454] CR2: 00007f1c57bc74e0 CR3: 000000111d26a001 CR4: 0000000000773ee0
+[ 3710.108456] PKRU: 55555554
+[ 3710.108457] Call Trace:
+[ 3710.108458]  <TASK>
+[ 3710.108459]  process_one_work+0x1e2/0x3b0
+[ 3710.108466]  ? rescuer_thread+0x390/0x390
+[ 3710.108468]  worker_thread+0x50/0x3a0
+[ 3710.108471]  ? rescuer_thread+0x390/0x390
+[ 3710.108473]  kthread+0xdd/0x100
+[ 3710.108477]  ? kthread_complete_and_exit+0x20/0x20
+[ 3710.108479]  ret_from_fork+0x1f/0x30
+[ 3710.108485]  </TASK>
+[ 3710.108486] ---[ end trace 1b4b23cd0c65d6a0 ]---
+
+After patch:
+[  402.473064] ice 0000:41:00.0: Type was not set for devlink port.
+[  402.473064] ice 0000:41:00.1: Type was not set for devlink port.
+
+Signed-off-by: Petr Oros <poros@redhat.com>
+Reviewed-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
+Reviewed-by: Jakub Kicinski <kuba@kernel.org>
+Link: https://lore.kernel.org/r/20230615095447.8259-1-poros@redhat.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/devlink.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/net/core/devlink.c b/net/core/devlink.c
+index 2dd354d869cd7..b4dabe5d89f72 100644
+--- a/net/core/devlink.c
++++ b/net/core/devlink.c
+@@ -6299,7 +6299,10 @@ EXPORT_SYMBOL_GPL(devlink_free);
+ 
+ static void devlink_port_type_warn(struct work_struct *work)
+ {
+-      WARN(true, "Type was not set for devlink port.");
++      struct devlink_port *port = container_of(to_delayed_work(work),
++                                               struct devlink_port,
++                                               type_warn_dw);
++      dev_warn(port->devlink->dev, "Type was not set for devlink port.");
+ }
+ 
+ static bool devlink_port_type_should_warn(struct devlink_port *devlink_port)
+-- 
+2.39.2
+
diff --git a/queue-5.4/fbdev-au1200fb-fix-missing-irq-check-in-au1200fb_drv.patch b/queue-5.4/fbdev-au1200fb-fix-missing-irq-check-in-au1200fb_drv.patch

new file mode 100644 (file)

index 0000000..1c0e655
--- /dev/null
+++ b/queue-5.4/fbdev-au1200fb-fix-missing-irq-check-in-au1200fb_drv.patch
@@ -0,0 +1,40 @@
+From e25dfa5ac6e4c44a8de6036201e1be18c4ecf443 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 15 Jul 2023 16:16:56 +0800
+Subject: fbdev: au1200fb: Fix missing IRQ check in au1200fb_drv_probe
+
+From: Zhang Shurong <zhang_shurong@foxmail.com>
+
+[ Upstream commit 4e88761f5f8c7869f15a2046b1a1116f4fab4ac8 ]
+
+This func misses checking for platform_get_irq()'s call and may passes the
+negative error codes to request_irq(), which takes unsigned IRQ #,
+causing it to fail with -EINVAL, overriding an original error code.
+
+Fix this by stop calling request_irq() with invalid IRQ #s.
+
+Fixes: 1630d85a8312 ("au1200fb: fix hardcoded IRQ")
+Signed-off-by: Zhang Shurong <zhang_shurong@foxmail.com>
+Signed-off-by: Helge Deller <deller@gmx.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/video/fbdev/au1200fb.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/drivers/video/fbdev/au1200fb.c b/drivers/video/fbdev/au1200fb.c
+index 43a4dddaafd52..d0335d4d5ab54 100644
+--- a/drivers/video/fbdev/au1200fb.c
++++ b/drivers/video/fbdev/au1200fb.c
+@@ -1732,6 +1732,9 @@ static int au1200fb_drv_probe(struct platform_device *dev)
+ 
+       /* Now hook interrupt too */
+       irq = platform_get_irq(dev, 0);
++      if (irq < 0)
++              return irq;
++
+       ret = request_irq(irq, au1200fb_handle_irq,
+                         IRQF_SHARED, "lcd", (void *)dev);
+       if (ret) {
+-- 
+2.39.2
+
diff --git a/queue-5.4/fbdev-imxfb-warn-about-invalid-left-right-margin.patch b/queue-5.4/fbdev-imxfb-warn-about-invalid-left-right-margin.patch

new file mode 100644 (file)

index 0000000..585b941
--- /dev/null
+++ b/queue-5.4/fbdev-imxfb-warn-about-invalid-left-right-margin.patch
@@ -0,0 +1,43 @@
+From a04d9ccef0b4a1f90d0f96da18d27601add6d807 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 28 Jun 2023 15:24:37 +0200
+Subject: fbdev: imxfb: warn about invalid left/right margin
+
+From: Martin Kaiser <martin@kaiser.cx>
+
+[ Upstream commit 4e47382fbca916d7db95cbf9e2d7ca2e9d1ca3fe ]
+
+Warn about invalid var->left_margin or var->right_margin. Their values
+are read from the device tree.
+
+We store var->left_margin-3 and var->right_margin-1 in register
+fields. These fields should be >= 0.
+
+Fixes: 7e8549bcee00 ("imxfb: Fix margin settings")
+Signed-off-by: Martin Kaiser <martin@kaiser.cx>
+Signed-off-by: Helge Deller <deller@gmx.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/video/fbdev/imxfb.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/video/fbdev/imxfb.c b/drivers/video/fbdev/imxfb.c
+index ffde3107104bc..dbc8808b093a5 100644
+--- a/drivers/video/fbdev/imxfb.c
++++ b/drivers/video/fbdev/imxfb.c
+@@ -601,10 +601,10 @@ static int imxfb_activate_var(struct fb_var_screeninfo *var, struct fb_info *inf
+       if (var->hsync_len < 1    || var->hsync_len > 64)
+               printk(KERN_ERR "%s: invalid hsync_len %d\n",
+                       info->fix.id, var->hsync_len);
+-      if (var->left_margin > 255)
++      if (var->left_margin < 3  || var->left_margin > 255)
+               printk(KERN_ERR "%s: invalid left_margin %d\n",
+                       info->fix.id, var->left_margin);
+-      if (var->right_margin > 255)
++      if (var->right_margin < 1 || var->right_margin > 255)
+               printk(KERN_ERR "%s: invalid right_margin %d\n",
+                       info->fix.id, var->right_margin);
+       if (var->yres < 1 || var->yres > ymax_mask)
+-- 
+2.39.2
+
diff --git a/queue-5.4/iavf-fix-use-after-free-in-free_netdev.patch b/queue-5.4/iavf-fix-use-after-free-in-free_netdev.patch

new file mode 100644 (file)

index 0000000..f64b6de
--- /dev/null
+++ b/queue-5.4/iavf-fix-use-after-free-in-free_netdev.patch
@@ -0,0 +1,215 @@
+From f8349f8f3af5f904d0f0db6d6cd33649b3e73dc6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 9 May 2023 19:11:47 +0800
+Subject: iavf: Fix use-after-free in free_netdev
+
+From: Ding Hui <dinghui@sangfor.com.cn>
+
+[ Upstream commit 5f4fa1672d98fe99d2297b03add35346f1685d6b ]
+
+We do netif_napi_add() for all allocated q_vectors[], but potentially
+do netif_napi_del() for part of them, then kfree q_vectors and leave
+invalid pointers at dev->napi_list.
+
+Reproducer:
+
+  [root@host ~]# cat repro.sh
+  #!/bin/bash
+
+  pf_dbsf="0000:41:00.0"
+  vf0_dbsf="0000:41:02.0"
+  g_pids=()
+
+  function do_set_numvf()
+  {
+      echo 2 >/sys/bus/pci/devices/${pf_dbsf}/sriov_numvfs
+      sleep $((RANDOM%3+1))
+      echo 0 >/sys/bus/pci/devices/${pf_dbsf}/sriov_numvfs
+      sleep $((RANDOM%3+1))
+  }
+
+  function do_set_channel()
+  {
+      local nic=$(ls -1 --indicator-style=none /sys/bus/pci/devices/${vf0_dbsf}/net/)
+      [ -z "$nic" ] && { sleep $((RANDOM%3)) ; return 1; }
+      ifconfig $nic 192.168.18.5 netmask 255.255.255.0
+      ifconfig $nic up
+      ethtool -L $nic combined 1
+      ethtool -L $nic combined 4
+      sleep $((RANDOM%3))
+  }
+
+  function on_exit()
+  {
+      local pid
+      for pid in "${g_pids[@]}"; do
+          kill -0 "$pid" &>/dev/null && kill "$pid" &>/dev/null
+      done
+      g_pids=()
+  }
+
+  trap "on_exit; exit" EXIT
+
+  while :; do do_set_numvf ; done &
+  g_pids+=($!)
+  while :; do do_set_channel ; done &
+  g_pids+=($!)
+
+  wait
+
+Result:
+
+[ 4093.900222] ==================================================================
+[ 4093.900230] BUG: KASAN: use-after-free in free_netdev+0x308/0x390
+[ 4093.900232] Read of size 8 at addr ffff88b4dc145640 by task repro.sh/6699
+[ 4093.900233]
+[ 4093.900236] CPU: 10 PID: 6699 Comm: repro.sh Kdump: loaded Tainted: G           O     --------- -t - 4.18.0 #1
+[ 4093.900238] Hardware name: Powerleader PR2008AL/H12DSi-N6, BIOS 2.0 04/09/2021
+[ 4093.900239] Call Trace:
+[ 4093.900244]  dump_stack+0x71/0xab
+[ 4093.900249]  print_address_description+0x6b/0x290
+[ 4093.900251]  ? free_netdev+0x308/0x390
+[ 4093.900252]  kasan_report+0x14a/0x2b0
+[ 4093.900254]  free_netdev+0x308/0x390
+[ 4093.900261]  iavf_remove+0x825/0xd20 [iavf]
+[ 4093.900265]  pci_device_remove+0xa8/0x1f0
+[ 4093.900268]  device_release_driver_internal+0x1c6/0x460
+[ 4093.900271]  pci_stop_bus_device+0x101/0x150
+[ 4093.900273]  pci_stop_and_remove_bus_device+0xe/0x20
+[ 4093.900275]  pci_iov_remove_virtfn+0x187/0x420
+[ 4093.900277]  ? pci_iov_add_virtfn+0xe10/0xe10
+[ 4093.900278]  ? pci_get_subsys+0x90/0x90
+[ 4093.900280]  sriov_disable+0xed/0x3e0
+[ 4093.900282]  ? bus_find_device+0x12d/0x1a0
+[ 4093.900290]  i40e_free_vfs+0x754/0x1210 [i40e]
+[ 4093.900298]  ? i40e_reset_all_vfs+0x880/0x880 [i40e]
+[ 4093.900299]  ? pci_get_device+0x7c/0x90
+[ 4093.900300]  ? pci_get_subsys+0x90/0x90
+[ 4093.900306]  ? pci_vfs_assigned.part.7+0x144/0x210
+[ 4093.900309]  ? __mutex_lock_slowpath+0x10/0x10
+[ 4093.900315]  i40e_pci_sriov_configure+0x1fa/0x2e0 [i40e]
+[ 4093.900318]  sriov_numvfs_store+0x214/0x290
+[ 4093.900320]  ? sriov_totalvfs_show+0x30/0x30
+[ 4093.900321]  ? __mutex_lock_slowpath+0x10/0x10
+[ 4093.900323]  ? __check_object_size+0x15a/0x350
+[ 4093.900326]  kernfs_fop_write+0x280/0x3f0
+[ 4093.900329]  vfs_write+0x145/0x440
+[ 4093.900330]  ksys_write+0xab/0x160
+[ 4093.900332]  ? __ia32_sys_read+0xb0/0xb0
+[ 4093.900334]  ? fput_many+0x1a/0x120
+[ 4093.900335]  ? filp_close+0xf0/0x130
+[ 4093.900338]  do_syscall_64+0xa0/0x370
+[ 4093.900339]  ? page_fault+0x8/0x30
+[ 4093.900341]  entry_SYSCALL_64_after_hwframe+0x65/0xca
+[ 4093.900357] RIP: 0033:0x7f16ad4d22c0
+[ 4093.900359] Code: 73 01 c3 48 8b 0d d8 cb 2c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 0f 1f 44 00 00 83 3d 89 24 2d 00 00 75 10 b8 01 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 fe dd 01 00 48 89 04 24
+[ 4093.900360] RSP: 002b:00007ffd6491b7f8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
+[ 4093.900362] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f16ad4d22c0
+[ 4093.900363] RDX: 0000000000000002 RSI: 0000000001a41408 RDI: 0000000000000001
+[ 4093.900364] RBP: 0000000001a41408 R08: 00007f16ad7a1780 R09: 00007f16ae1f2700
+[ 4093.900364] R10: 0000000000000001 R11: 0000000000000246 R12: 0000000000000002
+[ 4093.900365] R13: 0000000000000001 R14: 00007f16ad7a0620 R15: 0000000000000001
+[ 4093.900367]
+[ 4093.900368] Allocated by task 820:
+[ 4093.900371]  kasan_kmalloc+0xa6/0xd0
+[ 4093.900373]  __kmalloc+0xfb/0x200
+[ 4093.900376]  iavf_init_interrupt_scheme+0x63b/0x1320 [iavf]
+[ 4093.900380]  iavf_watchdog_task+0x3d51/0x52c0 [iavf]
+[ 4093.900382]  process_one_work+0x56a/0x11f0
+[ 4093.900383]  worker_thread+0x8f/0xf40
+[ 4093.900384]  kthread+0x2a0/0x390
+[ 4093.900385]  ret_from_fork+0x1f/0x40
+[ 4093.900387]  0xffffffffffffffff
+[ 4093.900387]
+[ 4093.900388] Freed by task 6699:
+[ 4093.900390]  __kasan_slab_free+0x137/0x190
+[ 4093.900391]  kfree+0x8b/0x1b0
+[ 4093.900394]  iavf_free_q_vectors+0x11d/0x1a0 [iavf]
+[ 4093.900397]  iavf_remove+0x35a/0xd20 [iavf]
+[ 4093.900399]  pci_device_remove+0xa8/0x1f0
+[ 4093.900400]  device_release_driver_internal+0x1c6/0x460
+[ 4093.900401]  pci_stop_bus_device+0x101/0x150
+[ 4093.900402]  pci_stop_and_remove_bus_device+0xe/0x20
+[ 4093.900403]  pci_iov_remove_virtfn+0x187/0x420
+[ 4093.900404]  sriov_disable+0xed/0x3e0
+[ 4093.900409]  i40e_free_vfs+0x754/0x1210 [i40e]
+[ 4093.900415]  i40e_pci_sriov_configure+0x1fa/0x2e0 [i40e]
+[ 4093.900416]  sriov_numvfs_store+0x214/0x290
+[ 4093.900417]  kernfs_fop_write+0x280/0x3f0
+[ 4093.900418]  vfs_write+0x145/0x440
+[ 4093.900419]  ksys_write+0xab/0x160
+[ 4093.900420]  do_syscall_64+0xa0/0x370
+[ 4093.900421]  entry_SYSCALL_64_after_hwframe+0x65/0xca
+[ 4093.900422]  0xffffffffffffffff
+[ 4093.900422]
+[ 4093.900424] The buggy address belongs to the object at ffff88b4dc144200
+                which belongs to the cache kmalloc-8k of size 8192
+[ 4093.900425] The buggy address is located 5184 bytes inside of
+                8192-byte region [ffff88b4dc144200, ffff88b4dc146200)
+[ 4093.900425] The buggy address belongs to the page:
+[ 4093.900427] page:ffffea00d3705000 refcount:1 mapcount:0 mapping:ffff88bf04415c80 index:0x0 compound_mapcount: 0
+[ 4093.900430] flags: 0x10000000008100(slab|head)
+[ 4093.900433] raw: 0010000000008100 dead000000000100 dead000000000200 ffff88bf04415c80
+[ 4093.900434] raw: 0000000000000000 0000000000030003 00000001ffffffff 0000000000000000
+[ 4093.900434] page dumped because: kasan: bad access detected
+[ 4093.900435]
+[ 4093.900435] Memory state around the buggy address:
+[ 4093.900436]  ffff88b4dc145500: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+[ 4093.900437]  ffff88b4dc145580: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+[ 4093.900438] >ffff88b4dc145600: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+[ 4093.900438]                                            ^
+[ 4093.900439]  ffff88b4dc145680: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+[ 4093.900440]  ffff88b4dc145700: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+[ 4093.900440] ==================================================================
+
+Although the patch #2 (of 2) can avoid the issue triggered by this
+repro.sh, there still are other potential risks that if num_active_queues
+is changed to less than allocated q_vectors[] by unexpected, the
+mismatched netif_napi_add/del() can also cause UAF.
+
+Since we actually call netif_napi_add() for all allocated q_vectors
+unconditionally in iavf_alloc_q_vectors(), so we should fix it by
+letting netif_napi_del() match to netif_napi_add().
+
+Fixes: 5eae00c57f5e ("i40evf: main driver core")
+Signed-off-by: Ding Hui <dinghui@sangfor.com.cn>
+Cc: Donglin Peng <pengdonglin@sangfor.com.cn>
+Cc: Huang Cun <huangcun@sangfor.com.cn>
+Reviewed-by: Simon Horman <simon.horman@corigine.com>
+Reviewed-by: Madhu Chittim <madhu.chittim@intel.com>
+Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
+Tested-by: Rafal Romanowski <rafal.romanowski@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/iavf/iavf_main.c | 5 +----
+ 1 file changed, 1 insertion(+), 4 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
+index 838cd7881f2f7..9cf556fedc704 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
+@@ -1389,19 +1389,16 @@ static int iavf_alloc_q_vectors(struct iavf_adapter *adapter)
+ static void iavf_free_q_vectors(struct iavf_adapter *adapter)
+ {
+       int q_idx, num_q_vectors;
+-      int napi_vectors;
+ 
+       if (!adapter->q_vectors)
+               return;
+ 
+       num_q_vectors = adapter->num_msix_vectors - NONQ_VECS;
+-      napi_vectors = adapter->num_active_queues;
+ 
+       for (q_idx = 0; q_idx < num_q_vectors; q_idx++) {
+               struct iavf_q_vector *q_vector = &adapter->q_vectors[q_idx];
+ 
+-              if (q_idx < napi_vectors)
+-                      netif_napi_del(&q_vector->napi);
++              netif_napi_del(&q_vector->napi);
+       }
+       kfree(adapter->q_vectors);
+       adapter->q_vectors = NULL;
+-- 
+2.39.2
+
diff --git a/queue-5.4/igb-fix-igb_down-hung-on-surprise-removal.patch b/queue-5.4/igb-fix-igb_down-hung-on-surprise-removal.patch

new file mode 100644 (file)

index 0000000..5f2ce5b
--- /dev/null
+++ b/queue-5.4/igb-fix-igb_down-hung-on-surprise-removal.patch
@@ -0,0 +1,89 @@
+From 0b3d3a2fbdf3027763c70d65dde0a044a8c3a6ea Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 20 Jun 2023 10:47:32 -0700
+Subject: igb: Fix igb_down hung on surprise removal
+
+From: Ying Hsu <yinghsu@chromium.org>
+
+[ Upstream commit 004d25060c78fc31f66da0fa439c544dda1ac9d5 ]
+
+In a setup where a Thunderbolt hub connects to Ethernet and a display
+through USB Type-C, users may experience a hung task timeout when they
+remove the cable between the PC and the Thunderbolt hub.
+This is because the igb_down function is called multiple times when
+the Thunderbolt hub is unplugged. For example, the igb_io_error_detected
+triggers the first call, and the igb_remove triggers the second call.
+The second call to igb_down will block at napi_synchronize.
+Here's the call trace:
+    __schedule+0x3b0/0xddb
+    ? __mod_timer+0x164/0x5d3
+    schedule+0x44/0xa8
+    schedule_timeout+0xb2/0x2a4
+    ? run_local_timers+0x4e/0x4e
+    msleep+0x31/0x38
+    igb_down+0x12c/0x22a [igb 6615058754948bfde0bf01429257eb59f13030d4]
+    __igb_close+0x6f/0x9c [igb 6615058754948bfde0bf01429257eb59f13030d4]
+    igb_close+0x23/0x2b [igb 6615058754948bfde0bf01429257eb59f13030d4]
+    __dev_close_many+0x95/0xec
+    dev_close_many+0x6e/0x103
+    unregister_netdevice_many+0x105/0x5b1
+    unregister_netdevice_queue+0xc2/0x10d
+    unregister_netdev+0x1c/0x23
+    igb_remove+0xa7/0x11c [igb 6615058754948bfde0bf01429257eb59f13030d4]
+    pci_device_remove+0x3f/0x9c
+    device_release_driver_internal+0xfe/0x1b4
+    pci_stop_bus_device+0x5b/0x7f
+    pci_stop_bus_device+0x30/0x7f
+    pci_stop_bus_device+0x30/0x7f
+    pci_stop_and_remove_bus_device+0x12/0x19
+    pciehp_unconfigure_device+0x76/0xe9
+    pciehp_disable_slot+0x6e/0x131
+    pciehp_handle_presence_or_link_change+0x7a/0x3f7
+    pciehp_ist+0xbe/0x194
+    irq_thread_fn+0x22/0x4d
+    ? irq_thread+0x1fd/0x1fd
+    irq_thread+0x17b/0x1fd
+    ? irq_forced_thread_fn+0x5f/0x5f
+    kthread+0x142/0x153
+    ? __irq_get_irqchip_state+0x46/0x46
+    ? kthread_associate_blkcg+0x71/0x71
+    ret_from_fork+0x1f/0x30
+
+In this case, igb_io_error_detected detaches the network interface
+and requests a PCIE slot reset, however, the PCIE reset callback is
+not being invoked and thus the Ethernet connection breaks down.
+As the PCIE error in this case is a non-fatal one, requesting a
+slot reset can be avoided.
+This patch fixes the task hung issue and preserves Ethernet
+connection by ignoring non-fatal PCIE errors.
+
+Signed-off-by: Ying Hsu <yinghsu@chromium.org>
+Tested-by: Pucha Himasekhar Reddy <himasekharx.reddy.pucha@intel.com> (A Contingent worker at Intel)
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Reviewed-by: Simon Horman <simon.horman@corigine.com>
+Link: https://lore.kernel.org/r/20230620174732.4145155-1-anthony.l.nguyen@intel.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/igb/igb_main.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
+index 00d66a6e5c6e5..8c6c0d9c7f766 100644
+--- a/drivers/net/ethernet/intel/igb/igb_main.c
++++ b/drivers/net/ethernet/intel/igb/igb_main.c
+@@ -9028,6 +9028,11 @@ static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
+       struct net_device *netdev = pci_get_drvdata(pdev);
+       struct igb_adapter *adapter = netdev_priv(netdev);
+ 
++      if (state == pci_channel_io_normal) {
++              dev_warn(&pdev->dev, "Non-correctable non-fatal error reported.\n");
++              return PCI_ERS_RESULT_CAN_RECOVER;
++      }
++
+       netif_device_detach(netdev);
+ 
+       if (state == pci_channel_io_perm_failure)
+-- 
+2.39.2
+
diff --git a/queue-5.4/llc-don-t-drop-packet-from-non-root-netns.patch b/queue-5.4/llc-don-t-drop-packet-from-non-root-netns.patch

new file mode 100644 (file)

index 0000000..12706d0
--- /dev/null
+++ b/queue-5.4/llc-don-t-drop-packet-from-non-root-netns.patch
@@ -0,0 +1,50 @@
+From 5f49d88b5518144b7e6b9ddd2a5a4b3ec3dea453 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 18 Jul 2023 10:41:51 -0700
+Subject: llc: Don't drop packet from non-root netns.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 6631463b6e6673916d2481f692938f393148aa82 ]
+
+Now these upper layer protocol handlers can be called from llc_rcv()
+as sap->rcv_func(), which is registered by llc_sap_open().
+
+  * function which is passed to register_8022_client()
+    -> no in-kernel user calls register_8022_client().
+
+  * snap_rcv()
+    `- proto->rcvfunc() : registered by register_snap_client()
+       -> aarp_rcv() and atalk_rcv() drop packets from non-root netns
+
+  * stp_pdu_rcv()
+    `- garp_protos[]->rcv() : registered by stp_proto_register()
+       -> garp_pdu_rcv() and br_stp_rcv() are netns-aware
+
+So, we can safely remove the netns restriction in llc_rcv().
+
+Fixes: e730c15519d0 ("[NET]: Make packet reception network namespace safe")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/llc/llc_input.c | 3 ---
+ 1 file changed, 3 deletions(-)
+
+diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c
+index 82cb93f66b9bd..f9e801cc50f5e 100644
+--- a/net/llc/llc_input.c
++++ b/net/llc/llc_input.c
+@@ -162,9 +162,6 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev,
+       void (*sta_handler)(struct sk_buff *skb);
+       void (*sap_handler)(struct llc_sap *sap, struct sk_buff *skb);
+ 
+-      if (!net_eq(dev_net(dev), &init_net))
+-              goto drop;
+-
+       /*
+        * When the interface is in promisc. mode, drop all the crap that it
+        * receives, do not try to analyse it.
+-- 
+2.39.2
+
diff --git a/queue-5.4/md-fix-data-corruption-for-raid456-when-reshape-rest.patch b/queue-5.4/md-fix-data-corruption-for-raid456-when-reshape-rest.patch

new file mode 100644 (file)

index 0000000..f08db18
--- /dev/null
+++ b/queue-5.4/md-fix-data-corruption-for-raid456-when-reshape-rest.patch
@@ -0,0 +1,60 @@
+From 4cc78653a330f245001ff2e0e31088dbba8facdc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 12 May 2023 09:56:07 +0800
+Subject: md: fix data corruption for raid456 when reshape restart while grow
+ up
+
+From: Yu Kuai <yukuai3@huawei.com>
+
+[ Upstream commit 873f50ece41aad5c4f788a340960c53774b5526e ]
+
+Currently, if reshape is interrupted, echo "reshape" to sync_action will
+restart reshape from scratch, for example:
+
+echo frozen > sync_action
+echo reshape > sync_action
+
+This will corrupt data before reshape_position if the array is growing,
+fix the problem by continue reshape from reshape_position.
+
+Reported-by: Peter Neuwirth <reddunur@online.de>
+Link: https://lore.kernel.org/linux-raid/e2f96772-bfbc-f43b-6da1-f520e5164536@online.de/
+Signed-off-by: Yu Kuai <yukuai3@huawei.com>
+Signed-off-by: Song Liu <song@kernel.org>
+Link: https://lore.kernel.org/r/20230512015610.821290-3-yukuai1@huaweicloud.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/md/md.c | 14 ++++++++++++--
+ 1 file changed, 12 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/md/md.c b/drivers/md/md.c
+index 0765712513e7d..a006f3a9554bf 100644
+--- a/drivers/md/md.c
++++ b/drivers/md/md.c
+@@ -4743,11 +4743,21 @@ action_store(struct mddev *mddev, const char *page, size_t len)
+                       return -EINVAL;
+               err = mddev_lock(mddev);
+               if (!err) {
+-                      if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
++                      if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
+                               err =  -EBUSY;
+-                      else {
++                      } else if (mddev->reshape_position == MaxSector ||
++                                 mddev->pers->check_reshape == NULL ||
++                                 mddev->pers->check_reshape(mddev)) {
+                               clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+                               err = mddev->pers->start_reshape(mddev);
++                      } else {
++                              /*
++                               * If reshape is still in progress, and
++                               * md_check_recovery() can continue to reshape,
++                               * don't restart reshape because data can be
++                               * corrupted for raid456.
++                               */
++                              clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+                       }
+                       mddev_unlock(mddev);
+               }
+-- 
+2.39.2
+
diff --git a/queue-5.4/md-raid10-prevent-soft-lockup-while-flush-writes.patch b/queue-5.4/md-raid10-prevent-soft-lockup-while-flush-writes.patch

new file mode 100644 (file)

index 0000000..70ece3d
--- /dev/null
+++ b/queue-5.4/md-raid10-prevent-soft-lockup-while-flush-writes.patch
@@ -0,0 +1,79 @@
+From e162491c96b0f3c2366d9c0cdb870c15fca436db Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 29 May 2023 21:11:00 +0800
+Subject: md/raid10: prevent soft lockup while flush writes
+
+From: Yu Kuai <yukuai3@huawei.com>
+
+[ Upstream commit 010444623e7f4da6b4a4dd603a7da7469981e293 ]
+
+Currently, there is no limit for raid1/raid10 plugged bio. While flushing
+writes, raid1 has cond_resched() while raid10 doesn't, and too many
+writes can cause soft lockup.
+
+Follow up soft lockup can be triggered easily with writeback test for
+raid10 with ramdisks:
+
+watchdog: BUG: soft lockup - CPU#10 stuck for 27s! [md0_raid10:1293]
+Call Trace:
+ <TASK>
+ call_rcu+0x16/0x20
+ put_object+0x41/0x80
+ __delete_object+0x50/0x90
+ delete_object_full+0x2b/0x40
+ kmemleak_free+0x46/0xa0
+ slab_free_freelist_hook.constprop.0+0xed/0x1a0
+ kmem_cache_free+0xfd/0x300
+ mempool_free_slab+0x1f/0x30
+ mempool_free+0x3a/0x100
+ bio_free+0x59/0x80
+ bio_put+0xcf/0x2c0
+ free_r10bio+0xbf/0xf0
+ raid_end_bio_io+0x78/0xb0
+ one_write_done+0x8a/0xa0
+ raid10_end_write_request+0x1b4/0x430
+ bio_endio+0x175/0x320
+ brd_submit_bio+0x3b9/0x9b7 [brd]
+ __submit_bio+0x69/0xe0
+ submit_bio_noacct_nocheck+0x1e6/0x5a0
+ submit_bio_noacct+0x38c/0x7e0
+ flush_pending_writes+0xf0/0x240
+ raid10d+0xac/0x1ed0
+
+Fix the problem by adding cond_resched() to raid10 like what raid1 did.
+
+Note that unlimited plugged bio still need to be optimized, for example,
+in the case of lots of dirty pages writeback, this will take lots of
+memory and io will spend a long time in plug, hence io latency is bad.
+
+Signed-off-by: Yu Kuai <yukuai3@huawei.com>
+Signed-off-by: Song Liu <song@kernel.org>
+Link: https://lore.kernel.org/r/20230529131106.2123367-2-yukuai1@huaweicloud.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/md/raid10.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
+index db4de8e07cd97..3983d5c8b5cd2 100644
+--- a/drivers/md/raid10.c
++++ b/drivers/md/raid10.c
+@@ -927,6 +927,7 @@ static void flush_pending_writes(struct r10conf *conf)
+                       else
+                               generic_make_request(bio);
+                       bio = next;
++                      cond_resched();
+               }
+               blk_finish_plug(&plug);
+       } else
+@@ -1112,6 +1113,7 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
+               else
+                       generic_make_request(bio);
+               bio = next;
++              cond_resched();
+       }
+       kfree(plug);
+ }
+-- 
+2.39.2
+
diff --git a/queue-5.4/nbd-add-the-maximum-limit-of-allocated-index-in-nbd_.patch b/queue-5.4/nbd-add-the-maximum-limit-of-allocated-index-in-nbd_.patch

new file mode 100644 (file)

index 0000000..ed65fa5
--- /dev/null
+++ b/queue-5.4/nbd-add-the-maximum-limit-of-allocated-index-in-nbd_.patch
@@ -0,0 +1,41 @@
+From 456f7e7cba47667316050e9866ad8506c9e47f6f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 5 Jun 2023 20:21:59 +0800
+Subject: nbd: Add the maximum limit of allocated index in nbd_dev_add
+
+From: Zhong Jinghua <zhongjinghua@huawei.com>
+
+[ Upstream commit f12bc113ce904777fd6ca003b473b427782b3dde ]
+
+If the index allocated by idr_alloc greater than MINORMASK >> part_shift,
+the device number will overflow, resulting in failure to create a block
+device.
+
+Fix it by imiting the size of the max allocation.
+
+Signed-off-by: Zhong Jinghua <zhongjinghua@huawei.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Link: https://lore.kernel.org/r/20230605122159.2134384-1-zhongjinghua@huaweicloud.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/block/nbd.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
+index 218aa7e419700..37994a7a1b6f4 100644
+--- a/drivers/block/nbd.c
++++ b/drivers/block/nbd.c
+@@ -1708,7 +1708,8 @@ static int nbd_dev_add(int index)
+               if (err == -ENOSPC)
+                       err = -EEXIST;
+       } else {
+-              err = idr_alloc(&nbd_index_idr, nbd, 0, 0, GFP_KERNEL);
++              err = idr_alloc(&nbd_index_idr, nbd, 0,
++                              (MINORMASK >> part_shift) + 1, GFP_KERNEL);
+               if (err >= 0)
+                       index = err;
+       }
+-- 
+2.39.2
+
diff --git a/queue-5.4/net-ethernet-ti-cpsw_ale-fix-cpsw_ale_get_field-cpsw.patch b/queue-5.4/net-ethernet-ti-cpsw_ale-fix-cpsw_ale_get_field-cpsw.patch

new file mode 100644 (file)

index 0000000..806727c
--- /dev/null
+++ b/queue-5.4/net-ethernet-ti-cpsw_ale-fix-cpsw_ale_get_field-cpsw.patch
@@ -0,0 +1,78 @@
+From 08a8346073e660036a1e3e9ae04efddd220a90fa Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 12 Jul 2023 16:36:57 +0530
+Subject: net: ethernet: ti: cpsw_ale: Fix
+ cpsw_ale_get_field()/cpsw_ale_set_field()
+
+From: Tanmay Patil <t-patil@ti.com>
+
+[ Upstream commit b685f1a58956fa36cc01123f253351b25bfacfda ]
+
+CPSW ALE has 75 bit ALE entries which are stored within three 32 bit words.
+The cpsw_ale_get_field() and cpsw_ale_set_field() functions assume that the
+field will be strictly contained within one word. However, this is not
+guaranteed to be the case and it is possible for ALE field entries to span
+across up to two words at the most.
+
+Fix the methods to handle getting/setting fields spanning up to two words.
+
+Fixes: db82173f23c5 ("netdev: driver: ethernet: add cpsw address lookup engine support")
+Signed-off-by: Tanmay Patil <t-patil@ti.com>
+[s-vadapalli@ti.com: rephrased commit message and added Fixes tag]
+Signed-off-by: Siddharth Vadapalli <s-vadapalli@ti.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/ti/cpsw_ale.c | 24 +++++++++++++++++++-----
+ 1 file changed, 19 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/net/ethernet/ti/cpsw_ale.c b/drivers/net/ethernet/ti/cpsw_ale.c
+index e7c24396933e9..f17619c545ae5 100644
+--- a/drivers/net/ethernet/ti/cpsw_ale.c
++++ b/drivers/net/ethernet/ti/cpsw_ale.c
+@@ -60,23 +60,37 @@
+ 
+ static inline int cpsw_ale_get_field(u32 *ale_entry, u32 start, u32 bits)
+ {
+-      int idx;
++      int idx, idx2;
++      u32 hi_val = 0;
+ 
+       idx    = start / 32;
++      idx2 = (start + bits - 1) / 32;
++      /* Check if bits to be fetched exceed a word */
++      if (idx != idx2) {
++              idx2 = 2 - idx2; /* flip */
++              hi_val = ale_entry[idx2] << ((idx2 * 32) - start);
++      }
+       start -= idx * 32;
+       idx    = 2 - idx; /* flip */
+-      return (ale_entry[idx] >> start) & BITMASK(bits);
++      return (hi_val + (ale_entry[idx] >> start)) & BITMASK(bits);
+ }
+ 
+ static inline void cpsw_ale_set_field(u32 *ale_entry, u32 start, u32 bits,
+                                     u32 value)
+ {
+-      int idx;
++      int idx, idx2;
+ 
+       value &= BITMASK(bits);
+-      idx    = start / 32;
++      idx = start / 32;
++      idx2 = (start + bits - 1) / 32;
++      /* Check if bits to be set exceed a word */
++      if (idx != idx2) {
++              idx2 = 2 - idx2; /* flip */
++              ale_entry[idx2] &= ~(BITMASK(bits + start - (idx2 * 32)));
++              ale_entry[idx2] |= (value >> ((idx2 * 32) - start));
++      }
+       start -= idx * 32;
+-      idx    = 2 - idx; /* flip */
++      idx = 2 - idx; /* flip */
+       ale_entry[idx] &= ~(BITMASK(bits) << start);
+       ale_entry[idx] |=  (value << start);
+ }
+-- 
+2.39.2
+
diff --git a/queue-5.4/net-ipv6-check-return-value-of-pskb_trim.patch b/queue-5.4/net-ipv6-check-return-value-of-pskb_trim.patch

new file mode 100644 (file)

index 0000000..0244504
--- /dev/null
+++ b/queue-5.4/net-ipv6-check-return-value-of-pskb_trim.patch
@@ -0,0 +1,39 @@
+From 63eb3f5f78a3465e2d34f4145321d87171922518 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 17 Jul 2023 22:45:19 +0800
+Subject: net:ipv6: check return value of pskb_trim()
+
+From: Yuanjun Gong <ruc_gongyuanjun@163.com>
+
+[ Upstream commit 4258faa130be4ea43e5e2d839467da421b8ff274 ]
+
+goto tx_err if an unexpected result is returned by pskb_tirm()
+in ip6erspan_tunnel_xmit().
+
+Fixes: 5a963eb61b7c ("ip6_gre: Add ERSPAN native tunnel support")
+Signed-off-by: Yuanjun Gong <ruc_gongyuanjun@163.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv6/ip6_gre.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
+index 0977137b00dc4..2d34bd98fccea 100644
+--- a/net/ipv6/ip6_gre.c
++++ b/net/ipv6/ip6_gre.c
+@@ -941,7 +941,8 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
+               goto tx_err;
+ 
+       if (skb->len > dev->mtu + dev->hard_header_len) {
+-              pskb_trim(skb, dev->mtu + dev->hard_header_len);
++              if (pskb_trim(skb, dev->mtu + dev->hard_header_len))
++                      goto tx_err;
+               truncate = true;
+       }
+ 
+-- 
+2.39.2
+
diff --git a/queue-5.4/net-replace-the-limit-of-tcp_linger2-with-tcp_fin_ti.patch b/queue-5.4/net-replace-the-limit-of-tcp_linger2-with-tcp_fin_ti.patch

new file mode 100644 (file)

index 0000000..444d700
--- /dev/null
+++ b/queue-5.4/net-replace-the-limit-of-tcp_linger2-with-tcp_fin_ti.patch
@@ -0,0 +1,73 @@
+From 730715f20176f04274e6b8733bb5664b25fef36d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 24 Apr 2020 16:06:16 +0800
+Subject: net: Replace the limit of TCP_LINGER2 with TCP_FIN_TIMEOUT_MAX
+
+From: Cambda Zhu <cambda@linux.alibaba.com>
+
+[ Upstream commit f0628c524fd188c3f9418e12478dfdfadacba815 ]
+
+This patch changes the behavior of TCP_LINGER2 about its limit. The
+sysctl_tcp_fin_timeout used to be the limit of TCP_LINGER2 but now it's
+only the default value. A new macro named TCP_FIN_TIMEOUT_MAX is added
+as the limit of TCP_LINGER2, which is 2 minutes.
+
+Since TCP_LINGER2 used sysctl_tcp_fin_timeout as the default value
+and the limit in the past, the system administrator cannot set the
+default value for most of sockets and let some sockets have a greater
+timeout. It might be a mistake that let the sysctl to be the limit of
+the TCP_LINGER2. Maybe we can add a new sysctl to set the max of
+TCP_LINGER2, but FIN-WAIT-2 timeout is usually no need to be too long
+and 2 minutes are legal considering TCP specs.
+
+Changes in v3:
+- Remove the new socket option and change the TCP_LINGER2 behavior so
+  that the timeout can be set to value between sysctl_tcp_fin_timeout
+  and 2 minutes.
+
+Changes in v2:
+- Add int overflow check for the new socket option.
+
+Changes in v1:
+- Add a new socket option to set timeout greater than
+  sysctl_tcp_fin_timeout.
+
+Signed-off-by: Cambda Zhu <cambda@linux.alibaba.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: 9df5335ca974 ("tcp: annotate data-races around tp->linger2")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/tcp.h | 1 +
+ net/ipv4/tcp.c    | 4 ++--
+ 2 files changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/include/net/tcp.h b/include/net/tcp.h
+index 077feeca6c99e..2f456bed33ec3 100644
+--- a/include/net/tcp.h
++++ b/include/net/tcp.h
+@@ -125,6 +125,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
+                                 * to combine FIN-WAIT-2 timeout with
+                                 * TIME-WAIT timer.
+                                 */
++#define TCP_FIN_TIMEOUT_MAX (120 * HZ) /* max TCP_LINGER2 value (two minutes) */
+ 
+ #define TCP_DELACK_MAX        ((unsigned)(HZ/5))      /* maximal time to delay before sending an ACK */
+ #if HZ >= 100
+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
+index e33abcff56080..c6c73b9407098 100644
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -3067,8 +3067,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
+       case TCP_LINGER2:
+               if (val < 0)
+                       tp->linger2 = -1;
+-              else if (val > net->ipv4.sysctl_tcp_fin_timeout / HZ)
+-                      tp->linger2 = 0;
++              else if (val > TCP_FIN_TIMEOUT_MAX / HZ)
++                      tp->linger2 = TCP_FIN_TIMEOUT_MAX;
+               else
+                       tp->linger2 = val * HZ;
+               break;
+-- 
+2.39.2
+
diff --git a/queue-5.4/netfilter-nf_tables-can-t-schedule-in-nft_chain_vali.patch b/queue-5.4/netfilter-nf_tables-can-t-schedule-in-nft_chain_vali.patch

new file mode 100644 (file)

index 0000000..4c65e9f
--- /dev/null
+++ b/queue-5.4/netfilter-nf_tables-can-t-schedule-in-nft_chain_vali.patch
@@ -0,0 +1,64 @@
+From f58e1406f7f9e2cf7845590dd709521813d3c261 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 18 Jul 2023 01:30:33 +0200
+Subject: netfilter: nf_tables: can't schedule in nft_chain_validate
+
+From: Florian Westphal <fw@strlen.de>
+
+[ Upstream commit 314c82841602a111c04a7210c21dc77e0d560242 ]
+
+Can be called via nft set element list iteration, which may acquire
+rcu and/or bh read lock (depends on set type).
+
+BUG: sleeping function called from invalid context at net/netfilter/nf_tables_api.c:3353
+in_atomic(): 0, irqs_disabled(): 0, non_block: 0, pid: 1232, name: nft
+preempt_count: 0, expected: 0
+RCU nest depth: 1, expected: 0
+2 locks held by nft/1232:
+ #0: ffff8881180e3ea8 (&nft_net->commit_mutex){+.+.}-{3:3}, at: nf_tables_valid_genid
+ #1: ffffffff83f5f540 (rcu_read_lock){....}-{1:2}, at: rcu_lock_acquire
+Call Trace:
+ nft_chain_validate
+ nft_lookup_validate_setelem
+ nft_pipapo_walk
+ nft_lookup_validate
+ nft_chain_validate
+ nft_immediate_validate
+ nft_chain_validate
+ nf_tables_validate
+ nf_tables_abort
+
+No choice but to move it to nf_tables_validate().
+
+Fixes: 81ea01066741 ("netfilter: nf_tables: add rescheduling points during loop detection walks")
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nf_tables_api.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
+index a64aa888751cb..7d22bc8aa2787 100644
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -2736,8 +2736,6 @@ int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain)
+                       if (err < 0)
+                               return err;
+               }
+-
+-              cond_resched();
+       }
+ 
+       return 0;
+@@ -2761,6 +2759,8 @@ static int nft_table_validate(struct net *net, const struct nft_table *table)
+               err = nft_chain_validate(&ctx, chain);
+               if (err < 0)
+                       return err;
++
++              cond_resched();
+       }
+ 
+       return 0;
+-- 
+2.39.2
+
diff --git a/queue-5.4/netfilter-nf_tables-fix-spurious-set-element-inserti.patch b/queue-5.4/netfilter-nf_tables-fix-spurious-set-element-inserti.patch

new file mode 100644 (file)

index 0000000..43740be
--- /dev/null
+++ b/queue-5.4/netfilter-nf_tables-fix-spurious-set-element-inserti.patch
@@ -0,0 +1,49 @@
+From 9fb06bca476f67a541d7701c4c2e976894297311 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 20 Jul 2023 00:29:58 +0200
+Subject: netfilter: nf_tables: fix spurious set element insertion failure
+
+From: Florian Westphal <fw@strlen.de>
+
+[ Upstream commit ddbd8be68941985f166f5107109a90ce13147c44 ]
+
+On some platforms there is a padding hole in the nft_verdict
+structure, between the verdict code and the chain pointer.
+
+On element insertion, if the new element clashes with an existing one and
+NLM_F_EXCL flag isn't set, we want to ignore the -EEXIST error as long as
+the data associated with duplicated element is the same as the existing
+one.  The data equality check uses memcmp.
+
+For normal data (NFT_DATA_VALUE) this works fine, but for NFT_DATA_VERDICT
+padding area leads to spurious failure even if the verdict data is the
+same.
+
+This then makes the insertion fail with 'already exists' error, even
+though the new "key : data" matches an existing entry and userspace
+told the kernel that it doesn't want to receive an error indication.
+
+Fixes: c016c7e45ddf ("netfilter: nf_tables: honor NLM_F_EXCL flag in set element insertion")
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nf_tables_api.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
+index 123ef398a10dc..a64aa888751cb 100644
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -7655,6 +7655,9 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
+ 
+       if (!tb[NFTA_VERDICT_CODE])
+               return -EINVAL;
++
++      /* zero padding hole for memcmp */
++      memset(data, 0, sizeof(*data));
+       data->verdict.code = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE]));
+ 
+       switch (data->verdict.code) {
+-- 
+2.39.2
+
diff --git a/queue-5.4/pinctrl-amd-use-amd_pinconf_set-for-all-config-optio.patch b/queue-5.4/pinctrl-amd-use-amd_pinconf_set-for-all-config-optio.patch

new file mode 100644 (file)

index 0000000..57476bd
--- /dev/null
+++ b/queue-5.4/pinctrl-amd-use-amd_pinconf_set-for-all-config-optio.patch
@@ -0,0 +1,108 @@
+From d60135b3aeb1520fdaae6864d4c4d75d8cc1aa49 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 5 Jul 2023 08:30:03 -0500
+Subject: pinctrl: amd: Use amd_pinconf_set() for all config options
+
+From: Mario Limonciello <mario.limonciello@amd.com>
+
+[ Upstream commit 635a750d958e158e17af0f524bedc484b27fbb93 ]
+
+On ASUS TUF A16 it is reported that the ITE5570 ACPI device connected to
+GPIO 7 is causing an interrupt storm.  This issue doesn't happen on
+Windows.
+
+Comparing the GPIO register configuration between Windows and Linux
+bit 20 has been configured as a pull up on Windows, but not on Linux.
+Checking GPIO declaration from the firmware it is clear it *should* have
+been a pull up on Linux as well.
+
+```
+GpioInt (Level, ActiveLow, Exclusive, PullUp, 0x0000,
+        "\\_SB.GPIO", 0x00, ResourceConsumer, ,)
+{   // Pin list
+0x0007
+}
+```
+
+On Linux amd_gpio_set_config() is currently only used for programming
+the debounce. Actually the GPIO core calls it with all the arguments
+that are supported by a GPIO, pinctrl-amd just responds `-ENOTSUPP`.
+
+To solve this issue expand amd_gpio_set_config() to support the other
+arguments amd_pinconf_set() supports, namely `PIN_CONFIG_BIAS_PULL_DOWN`,
+`PIN_CONFIG_BIAS_PULL_UP`, and `PIN_CONFIG_DRIVE_STRENGTH`.
+
+Reported-by: Nik P <npliashechnikov@gmail.com>
+Reported-by: Nathan Schulte <nmschulte@gmail.com>
+Reported-by: Friedrich Vock <friedrich.vock@gmx.de>
+Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217336
+Reported-by: dridri85@gmail.com
+Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217493
+Link: https://lore.kernel.org/linux-input/20230530154058.17594-1-friedrich.vock@gmx.de/
+Tested-by: Jan Visser <starquake@linuxeverywhere.org>
+Fixes: 2956b5d94a76 ("pinctrl / gpio: Introduce .set_config() callback for GPIO chips")
+Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
+Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Link: https://lore.kernel.org/r/20230705133005.577-3-mario.limonciello@amd.com
+Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/pinctrl/pinctrl-amd.c | 28 +++++++++++++++-------------
+ 1 file changed, 15 insertions(+), 13 deletions(-)
+
+diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c
+index ba446271e17b6..2415085eadeda 100644
+--- a/drivers/pinctrl/pinctrl-amd.c
++++ b/drivers/pinctrl/pinctrl-amd.c
+@@ -186,18 +186,6 @@ static int amd_gpio_set_debounce(struct gpio_chip *gc, unsigned offset,
+       return ret;
+ }
+ 
+-static int amd_gpio_set_config(struct gpio_chip *gc, unsigned offset,
+-                             unsigned long config)
+-{
+-      u32 debounce;
+-
+-      if (pinconf_to_config_param(config) != PIN_CONFIG_INPUT_DEBOUNCE)
+-              return -ENOTSUPP;
+-
+-      debounce = pinconf_to_config_argument(config);
+-      return amd_gpio_set_debounce(gc, offset, debounce);
+-}
+-
+ #ifdef CONFIG_DEBUG_FS
+ static void amd_gpio_dbg_show(struct seq_file *s, struct gpio_chip *gc)
+ {
+@@ -682,7 +670,7 @@ static int amd_pinconf_get(struct pinctrl_dev *pctldev,
+ }
+ 
+ static int amd_pinconf_set(struct pinctrl_dev *pctldev, unsigned int pin,
+-                              unsigned long *configs, unsigned num_configs)
++                         unsigned long *configs, unsigned int num_configs)
+ {
+       int i;
+       u32 arg;
+@@ -772,6 +760,20 @@ static int amd_pinconf_group_set(struct pinctrl_dev *pctldev,
+       return 0;
+ }
+ 
++static int amd_gpio_set_config(struct gpio_chip *gc, unsigned int pin,
++                             unsigned long config)
++{
++      struct amd_gpio *gpio_dev = gpiochip_get_data(gc);
++
++      if (pinconf_to_config_param(config) == PIN_CONFIG_INPUT_DEBOUNCE) {
++              u32 debounce = pinconf_to_config_argument(config);
++
++              return amd_gpio_set_debounce(gc, pin, debounce);
++      }
++
++      return amd_pinconf_set(gpio_dev->pctrl, pin, &config, 1);
++}
++
+ static const struct pinconf_ops amd_pinconf_ops = {
+       .pin_config_get         = amd_pinconf_get,
+       .pin_config_set         = amd_pinconf_set,
+-- 
+2.39.2
+
diff --git a/queue-5.4/posix-timers-ensure-timer-id-search-loop-limit-is-va.patch b/queue-5.4/posix-timers-ensure-timer-id-search-loop-limit-is-va.patch

new file mode 100644 (file)

index 0000000..f96f0e7
--- /dev/null
+++ b/queue-5.4/posix-timers-ensure-timer-id-search-loop-limit-is-va.patch
@@ -0,0 +1,115 @@
+From 4a8a548125e5818c03f7f74d4ff93ea8cb7dd43c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 1 Jun 2023 20:58:47 +0200
+Subject: posix-timers: Ensure timer ID search-loop limit is valid
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+[ Upstream commit 8ce8849dd1e78dadcee0ec9acbd259d239b7069f ]
+
+posix_timer_add() tries to allocate a posix timer ID by starting from the
+cached ID which was stored by the last successful allocation.
+
+This is done in a loop searching the ID space for a free slot one by
+one. The loop has to terminate when the search wrapped around to the
+starting point.
+
+But that's racy vs. establishing the starting point. That is read out
+lockless, which leads to the following problem:
+
+CPU0                              CPU1
+posix_timer_add()
+  start = sig->posix_timer_id;
+  lock(hash_lock);
+  ...                             posix_timer_add()
+  if (++sig->posix_timer_id < 0)
+                                    start = sig->posix_timer_id;
+     sig->posix_timer_id = 0;
+
+So CPU1 can observe a negative start value, i.e. -1, and the loop break
+never happens because the condition can never be true:
+
+  if (sig->posix_timer_id == start)
+     break;
+
+While this is unlikely to ever turn into an endless loop as the ID space is
+huge (INT_MAX), the racy read of the start value caught the attention of
+KCSAN and Dmitry unearthed that incorrectness.
+
+Rewrite it so that all id operations are under the hash lock.
+
+Reported-by: syzbot+5c54bd3eb218bb595aa9@syzkaller.appspotmail.com
+Reported-by: Dmitry Vyukov <dvyukov@google.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
+Link: https://lore.kernel.org/r/87bkhzdn6g.ffs@tglx
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/sched/signal.h |  2 +-
+ kernel/time/posix-timers.c   | 31 ++++++++++++++++++-------------
+ 2 files changed, 19 insertions(+), 14 deletions(-)
+
+diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
+index b3f88470cbb58..2f355c3c0d15f 100644
+--- a/include/linux/sched/signal.h
++++ b/include/linux/sched/signal.h
+@@ -123,7 +123,7 @@ struct signal_struct {
+ #ifdef CONFIG_POSIX_TIMERS
+ 
+       /* POSIX.1b Interval Timers */
+-      int                     posix_timer_id;
++      unsigned int            next_posix_timer_id;
+       struct list_head        posix_timers;
+ 
+       /* ITIMER_REAL timer for the process */
+diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
+index efe3873021a37..f3b8313475acd 100644
+--- a/kernel/time/posix-timers.c
++++ b/kernel/time/posix-timers.c
+@@ -138,25 +138,30 @@ static struct k_itimer *posix_timer_by_id(timer_t id)
+ static int posix_timer_add(struct k_itimer *timer)
+ {
+       struct signal_struct *sig = current->signal;
+-      int first_free_id = sig->posix_timer_id;
+       struct hlist_head *head;
+-      int ret = -ENOENT;
++      unsigned int cnt, id;
+ 
+-      do {
++      /*
++       * FIXME: Replace this by a per signal struct xarray once there is
++       * a plan to handle the resulting CRIU regression gracefully.
++       */
++      for (cnt = 0; cnt <= INT_MAX; cnt++) {
+               spin_lock(&hash_lock);
+-              head = &posix_timers_hashtable[hash(sig, sig->posix_timer_id)];
+-              if (!__posix_timers_find(head, sig, sig->posix_timer_id)) {
++              id = sig->next_posix_timer_id;
++
++              /* Write the next ID back. Clamp it to the positive space */
++              sig->next_posix_timer_id = (id + 1) & INT_MAX;
++
++              head = &posix_timers_hashtable[hash(sig, id)];
++              if (!__posix_timers_find(head, sig, id)) {
+                       hlist_add_head_rcu(&timer->t_hash, head);
+-                      ret = sig->posix_timer_id;
++                      spin_unlock(&hash_lock);
++                      return id;
+               }
+-              if (++sig->posix_timer_id < 0)
+-                      sig->posix_timer_id = 0;
+-              if ((sig->posix_timer_id == first_free_id) && (ret == -ENOENT))
+-                      /* Loop over all possible ids completed */
+-                      ret = -EAGAIN;
+               spin_unlock(&hash_lock);
+-      } while (ret == -ENOENT);
+-      return ret;
++      }
++      /* POSIX return code when no timer ID could be allocated */
++      return -EAGAIN;
+ }
+ 
+ static inline void unlock_timer(struct k_itimer *timr, unsigned long flags)
+-- 
+2.39.2
+
diff --git a/queue-5.4/revert-tcp-avoid-the-lookup-process-failing-to-get-s.patch b/queue-5.4/revert-tcp-avoid-the-lookup-process-failing-to-get-s.patch

new file mode 100644 (file)

index 0000000..02715a1
--- /dev/null
+++ b/queue-5.4/revert-tcp-avoid-the-lookup-process-failing-to-get-s.patch
@@ -0,0 +1,113 @@
+From a53cef2f4c210eba249992848f53ba4b0cbfe8be Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 17 Jul 2023 14:59:18 -0700
+Subject: Revert "tcp: avoid the lookup process failing to get sk in ehash
+ table"
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 81b3ade5d2b98ad6e0a473b0e1e420a801275592 ]
+
+This reverts commit 3f4ca5fafc08881d7a57daa20449d171f2887043.
+
+Commit 3f4ca5fafc08 ("tcp: avoid the lookup process failing to get sk in
+ehash table") reversed the order in how a socket is inserted into ehash
+to fix an issue that ehash-lookup could fail when reqsk/full sk/twsk are
+swapped.  However, it introduced another lookup failure.
+
+The full socket in ehash is allocated from a slab with SLAB_TYPESAFE_BY_RCU
+and does not have SOCK_RCU_FREE, so the socket could be reused even while
+it is being referenced on another CPU doing RCU lookup.
+
+Let's say a socket is reused and inserted into the same hash bucket during
+lookup.  After the blamed commit, a new socket is inserted at the end of
+the list.  If that happens, we will skip sockets placed after the previous
+position of the reused socket, resulting in ehash lookup failure.
+
+As described in Documentation/RCU/rculist_nulls.rst, we should insert a
+new socket at the head of the list to avoid such an issue.
+
+This issue, the swap-lookup-failure, and another variant reported in [0]
+can all be handled properly by adding a locked ehash lookup suggested by
+Eric Dumazet [1].
+
+However, this issue could occur for every packet, thus more likely than
+the other two races, so let's revert the change for now.
+
+Link: https://lore.kernel.org/netdev/20230606064306.9192-1-duanmuquan@baidu.com/ [0]
+Link: https://lore.kernel.org/netdev/CANn89iK8snOz8TYOhhwfimC7ykYA78GA3Nyv8x06SZYa1nKdyA@mail.gmail.com/ [1]
+Fixes: 3f4ca5fafc08 ("tcp: avoid the lookup process failing to get sk in ehash table")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Link: https://lore.kernel.org/r/20230717215918.15723-1-kuniyu@amazon.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/inet_hashtables.c    | 17 ++---------------
+ net/ipv4/inet_timewait_sock.c |  8 ++++----
+ 2 files changed, 6 insertions(+), 19 deletions(-)
+
+diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
+index 9d14b3289f003..e4f2790fd6410 100644
+--- a/net/ipv4/inet_hashtables.c
++++ b/net/ipv4/inet_hashtables.c
+@@ -536,20 +536,8 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk)
+       spin_lock(lock);
+       if (osk) {
+               WARN_ON_ONCE(sk->sk_hash != osk->sk_hash);
+-              ret = sk_hashed(osk);
+-              if (ret) {
+-                      /* Before deleting the node, we insert a new one to make
+-                       * sure that the look-up-sk process would not miss either
+-                       * of them and that at least one node would exist in ehash
+-                       * table all the time. Otherwise there's a tiny chance
+-                       * that lookup process could find nothing in ehash table.
+-                       */
+-                      __sk_nulls_add_node_tail_rcu(sk, list);
+-                      sk_nulls_del_node_init_rcu(osk);
+-              }
+-              goto unlock;
+-      }
+-      if (found_dup_sk) {
++              ret = sk_nulls_del_node_init_rcu(osk);
++      } else if (found_dup_sk) {
+               *found_dup_sk = inet_ehash_lookup_by_sk(sk, list);
+               if (*found_dup_sk)
+                       ret = false;
+@@ -558,7 +546,6 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk)
+       if (ret)
+               __sk_nulls_add_node_rcu(sk, list);
+ 
+-unlock:
+       spin_unlock(lock);
+ 
+       return ret;
+diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
+index a00102d7c7fd4..c411c87ae865f 100644
+--- a/net/ipv4/inet_timewait_sock.c
++++ b/net/ipv4/inet_timewait_sock.c
+@@ -81,10 +81,10 @@ void inet_twsk_put(struct inet_timewait_sock *tw)
+ }
+ EXPORT_SYMBOL_GPL(inet_twsk_put);
+ 
+-static void inet_twsk_add_node_tail_rcu(struct inet_timewait_sock *tw,
+-                                      struct hlist_nulls_head *list)
++static void inet_twsk_add_node_rcu(struct inet_timewait_sock *tw,
++                                 struct hlist_nulls_head *list)
+ {
+-      hlist_nulls_add_tail_rcu(&tw->tw_node, list);
++      hlist_nulls_add_head_rcu(&tw->tw_node, list);
+ }
+ 
+ static void inet_twsk_add_bind_node(struct inet_timewait_sock *tw,
+@@ -120,7 +120,7 @@ void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
+ 
+       spin_lock(lock);
+ 
+-      inet_twsk_add_node_tail_rcu(tw, &ehead->chain);
++      inet_twsk_add_node_rcu(tw, &ehead->chain);
+ 
+       /* Step 3: Remove SK from hash chain */
+       if (__sk_nulls_del_node_init_rcu(sk))
+-- 
+2.39.2
+
diff --git a/queue-5.4/sched-fair-don-t-balance-task-to-its-current-running.patch b/queue-5.4/sched-fair-don-t-balance-task-to-its-current-running.patch

new file mode 100644 (file)

index 0000000..a7976f2
--- /dev/null
+++ b/queue-5.4/sched-fair-don-t-balance-task-to-its-current-running.patch
@@ -0,0 +1,96 @@
+From 6d50b7798c7af9c0a47321925555356fa38aa6e6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 30 May 2023 16:25:07 +0800
+Subject: sched/fair: Don't balance task to its current running CPU
+
+From: Yicong Yang <yangyicong@hisilicon.com>
+
+[ Upstream commit 0dd37d6dd33a9c23351e6115ae8cdac7863bc7de ]
+
+We've run into the case that the balancer tries to balance a migration
+disabled task and trigger the warning in set_task_cpu() like below:
+
+ ------------[ cut here ]------------
+ WARNING: CPU: 7 PID: 0 at kernel/sched/core.c:3115 set_task_cpu+0x188/0x240
+ Modules linked in: hclgevf xt_CHECKSUM ipt_REJECT nf_reject_ipv4 <...snip>
+ CPU: 7 PID: 0 Comm: swapper/7 Kdump: loaded Tainted: G           O       6.1.0-rc4+ #1
+ Hardware name: Huawei TaiShan 2280 V2/BC82AMDC, BIOS 2280-V2 CS V5.B221.01 12/09/2021
+ pstate: 604000c9 (nZCv daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
+ pc : set_task_cpu+0x188/0x240
+ lr : load_balance+0x5d0/0xc60
+ sp : ffff80000803bc70
+ x29: ffff80000803bc70 x28: ffff004089e190e8 x27: ffff004089e19040
+ x26: ffff007effcabc38 x25: 0000000000000000 x24: 0000000000000001
+ x23: ffff80000803be84 x22: 000000000000000c x21: ffffb093e79e2a78
+ x20: 000000000000000c x19: ffff004089e19040 x18: 0000000000000000
+ x17: 0000000000001fad x16: 0000000000000030 x15: 0000000000000000
+ x14: 0000000000000003 x13: 0000000000000000 x12: 0000000000000000
+ x11: 0000000000000001 x10: 0000000000000400 x9 : ffffb093e4cee530
+ x8 : 00000000fffffffe x7 : 0000000000ce168a x6 : 000000000000013e
+ x5 : 00000000ffffffe1 x4 : 0000000000000001 x3 : 0000000000000b2a
+ x2 : 0000000000000b2a x1 : ffffb093e6d6c510 x0 : 0000000000000001
+ Call trace:
+  set_task_cpu+0x188/0x240
+  load_balance+0x5d0/0xc60
+  rebalance_domains+0x26c/0x380
+  _nohz_idle_balance.isra.0+0x1e0/0x370
+  run_rebalance_domains+0x6c/0x80
+  __do_softirq+0x128/0x3d8
+  ____do_softirq+0x18/0x24
+  call_on_irq_stack+0x2c/0x38
+  do_softirq_own_stack+0x24/0x3c
+  __irq_exit_rcu+0xcc/0xf4
+  irq_exit_rcu+0x18/0x24
+  el1_interrupt+0x4c/0xe4
+  el1h_64_irq_handler+0x18/0x2c
+  el1h_64_irq+0x74/0x78
+  arch_cpu_idle+0x18/0x4c
+  default_idle_call+0x58/0x194
+  do_idle+0x244/0x2b0
+  cpu_startup_entry+0x30/0x3c
+  secondary_start_kernel+0x14c/0x190
+  __secondary_switched+0xb0/0xb4
+ ---[ end trace 0000000000000000 ]---
+
+Further investigation shows that the warning is superfluous, the migration
+disabled task is just going to be migrated to its current running CPU.
+This is because that on load balance if the dst_cpu is not allowed by the
+task, we'll re-select a new_dst_cpu as a candidate. If no task can be
+balanced to dst_cpu we'll try to balance the task to the new_dst_cpu
+instead. In this case when the migration disabled task is not on CPU it
+only allows to run on its current CPU, load balance will select its
+current CPU as new_dst_cpu and later triggers the warning above.
+
+The new_dst_cpu is chosen from the env->dst_grpmask. Currently it
+contains CPUs in sched_group_span() and if we have overlapped groups it's
+possible to run into this case. This patch makes env->dst_grpmask of
+group_balance_mask() which exclude any CPUs from the busiest group and
+solve the issue. For balancing in a domain with no overlapped groups
+the behaviour keeps same as before.
+
+Suggested-by: Vincent Guittot <vincent.guittot@linaro.org>
+Signed-off-by: Yicong Yang <yangyicong@hisilicon.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org>
+Link: https://lore.kernel.org/r/20230530082507.10444-1-yangyicong@huawei.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/fair.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index 9fcba0d2ab19b..2680216234ff2 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -8938,7 +8938,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
+               .sd             = sd,
+               .dst_cpu        = this_cpu,
+               .dst_rq         = this_rq,
+-              .dst_grpmask    = sched_group_span(sd->groups),
++              .dst_grpmask    = group_balance_mask(sd->groups),
+               .idle           = idle,
+               .loop_break     = sched_nr_migrate_break,
+               .cpus           = cpus,
+-- 
+2.39.2
+
diff --git a/queue-5.4/series b/queue-5.4/series

index 4f6a54976431da3091d0d32e54d0d133d9206d4b..b330796c44d256b75c6248581541b3f44827bbf6 100644 (file)
--- a/queue-5.4/series
+++ b/queue-5.4/series
@@ -282,3 +282,32 @@ can-bcm-fix-uaf-in-bcm_proc_show.patch
  drm-client-fix-memory-leak-in-drm_client_target_cloned.patch
  drm-client-fix-memory-leak-in-drm_client_modeset_probe.patch
  ext4-correct-inline-offset-when-handling-xattrs-in-inode-body.patch
+debugobjects-recheck-debug_objects_enabled-before-re.patch
+nbd-add-the-maximum-limit-of-allocated-index-in-nbd_.patch
+md-fix-data-corruption-for-raid456-when-reshape-rest.patch
+md-raid10-prevent-soft-lockup-while-flush-writes.patch
+posix-timers-ensure-timer-id-search-loop-limit-is-va.patch
+arm64-mm-fix-va-range-sanity-check.patch
+sched-fair-don-t-balance-task-to-its-current-running.patch
+bpf-address-kcsan-report-on-bpf_lru_list.patch
+devlink-report-devlink_port_type_warn-source-device.patch
+wifi-wext-core-fix-wstringop-overflow-warning-in-ioc.patch
+wifi-iwlwifi-mvm-avoid-baid-size-integer-overflow.patch
+igb-fix-igb_down-hung-on-surprise-removal.patch
+spi-bcm63xx-fix-max-prepend-length.patch
+fbdev-imxfb-warn-about-invalid-left-right-margin.patch
+pinctrl-amd-use-amd_pinconf_set-for-all-config-optio.patch
+net-ethernet-ti-cpsw_ale-fix-cpsw_ale_get_field-cpsw.patch
+iavf-fix-use-after-free-in-free_netdev.patch
+net-ipv6-check-return-value-of-pskb_trim.patch
+revert-tcp-avoid-the-lookup-process-failing-to-get-s.patch
+fbdev-au1200fb-fix-missing-irq-check-in-au1200fb_drv.patch
+llc-don-t-drop-packet-from-non-root-netns.patch
+netfilter-nf_tables-fix-spurious-set-element-inserti.patch
+netfilter-nf_tables-can-t-schedule-in-nft_chain_vali.patch
+tcp-annotate-data-races-around-tp-tcp_tx_delay.patch
+net-replace-the-limit-of-tcp_linger2-with-tcp_fin_ti.patch
+tcp-annotate-data-races-around-tp-linger2.patch
+tcp-annotate-data-races-around-rskq_defer_accept.patch
+tcp-annotate-data-races-around-tp-notsent_lowat.patch
+tcp-annotate-data-races-around-fastopenq.max_qlen.patch
diff --git a/queue-5.4/spi-bcm63xx-fix-max-prepend-length.patch b/queue-5.4/spi-bcm63xx-fix-max-prepend-length.patch

new file mode 100644 (file)

index 0000000..f384b46
--- /dev/null
+++ b/queue-5.4/spi-bcm63xx-fix-max-prepend-length.patch
@@ -0,0 +1,47 @@
+From 6545437f3ac3092bddedf85b05718088e4af7b13 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 29 Jun 2023 09:14:52 +0200
+Subject: spi: bcm63xx: fix max prepend length
+
+From: Jonas Gorski <jonas.gorski@gmail.com>
+
+[ Upstream commit 5158814cbb37bbb38344b3ecddc24ba2ed0365f2 ]
+
+The command word is defined as following:
+
+    /* Command */
+    #define SPI_CMD_COMMAND_SHIFT           0
+    #define SPI_CMD_DEVICE_ID_SHIFT         4
+    #define SPI_CMD_PREPEND_BYTE_CNT_SHIFT  8
+    #define SPI_CMD_ONE_BYTE_SHIFT          11
+    #define SPI_CMD_ONE_WIRE_SHIFT          12
+
+If the prepend byte count field starts at bit 8, and the next defined
+bit is SPI_CMD_ONE_BYTE at bit 11, it can be at most 3 bits wide, and
+thus the max value is 7, not 15.
+
+Fixes: b17de076062a ("spi/bcm63xx: work around inability to keep CS up")
+Signed-off-by: Jonas Gorski <jonas.gorski@gmail.com>
+Link: https://lore.kernel.org/r/20230629071453.62024-1-jonas.gorski@gmail.com
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/spi/spi-bcm63xx.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/spi/spi-bcm63xx.c b/drivers/spi/spi-bcm63xx.c
+index fdd7eaa0b8ede..ff27596168732 100644
+--- a/drivers/spi/spi-bcm63xx.c
++++ b/drivers/spi/spi-bcm63xx.c
+@@ -125,7 +125,7 @@ enum bcm63xx_regs_spi {
+       SPI_MSG_DATA_SIZE,
+ };
+ 
+-#define BCM63XX_SPI_MAX_PREPEND               15
++#define BCM63XX_SPI_MAX_PREPEND               7
+ 
+ #define BCM63XX_SPI_MAX_CS            8
+ #define BCM63XX_SPI_BUS_NUM           0
+-- 
+2.39.2
+
diff --git a/queue-5.4/tcp-annotate-data-races-around-fastopenq.max_qlen.patch b/queue-5.4/tcp-annotate-data-races-around-fastopenq.max_qlen.patch

new file mode 100644 (file)

index 0000000..9e8382c
--- /dev/null
+++ b/queue-5.4/tcp-annotate-data-races-around-fastopenq.max_qlen.patch
@@ -0,0 +1,77 @@
+From 5064c367a8407c3937794c8a11c980beb080c348 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 19 Jul 2023 21:28:57 +0000
+Subject: tcp: annotate data-races around fastopenq.max_qlen
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 70f360dd7042cb843635ece9d28335a4addff9eb ]
+
+This field can be read locklessly.
+
+Fixes: 1536e2857bd3 ("tcp: Add a TCP_FASTOPEN socket option to get a max backlog on its listner")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Link: https://lore.kernel.org/r/20230719212857.3943972-12-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/tcp.h     | 2 +-
+ net/ipv4/tcp.c          | 2 +-
+ net/ipv4/tcp_fastopen.c | 6 ++++--
+ 3 files changed, 6 insertions(+), 4 deletions(-)
+
+diff --git a/include/linux/tcp.h b/include/linux/tcp.h
+index 89751c89f11f4..68dacc1994376 100644
+--- a/include/linux/tcp.h
++++ b/include/linux/tcp.h
+@@ -458,7 +458,7 @@ static inline void fastopen_queue_tune(struct sock *sk, int backlog)
+       struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
+       int somaxconn = READ_ONCE(sock_net(sk)->core.sysctl_somaxconn);
+ 
+-      queue->fastopenq.max_qlen = min_t(unsigned int, backlog, somaxconn);
++      WRITE_ONCE(queue->fastopenq.max_qlen, min_t(unsigned int, backlog, somaxconn));
+ }
+ 
+ static inline void tcp_move_syn(struct tcp_sock *tp,
+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
+index c980d18d99094..647cb664c2ad0 100644
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -3623,7 +3623,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
+               break;
+ 
+       case TCP_FASTOPEN:
+-              val = icsk->icsk_accept_queue.fastopenq.max_qlen;
++              val = READ_ONCE(icsk->icsk_accept_queue.fastopenq.max_qlen);
+               break;
+ 
+       case TCP_FASTOPEN_CONNECT:
+diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
+index 21705b2ddaffa..35088cd30840d 100644
+--- a/net/ipv4/tcp_fastopen.c
++++ b/net/ipv4/tcp_fastopen.c
+@@ -312,6 +312,7 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
+ static bool tcp_fastopen_queue_check(struct sock *sk)
+ {
+       struct fastopen_queue *fastopenq;
++      int max_qlen;
+ 
+       /* Make sure the listener has enabled fastopen, and we don't
+        * exceed the max # of pending TFO requests allowed before trying
+@@ -324,10 +325,11 @@ static bool tcp_fastopen_queue_check(struct sock *sk)
+        * temporarily vs a server not supporting Fast Open at all.
+        */
+       fastopenq = &inet_csk(sk)->icsk_accept_queue.fastopenq;
+-      if (fastopenq->max_qlen == 0)
++      max_qlen = READ_ONCE(fastopenq->max_qlen);
++      if (max_qlen == 0)
+               return false;
+ 
+-      if (fastopenq->qlen >= fastopenq->max_qlen) {
++      if (fastopenq->qlen >= max_qlen) {
+               struct request_sock *req1;
+               spin_lock(&fastopenq->lock);
+               req1 = fastopenq->rskq_rst_head;
+-- 
+2.39.2
+
diff --git a/queue-5.4/tcp-annotate-data-races-around-rskq_defer_accept.patch b/queue-5.4/tcp-annotate-data-races-around-rskq_defer_accept.patch

new file mode 100644 (file)

index 0000000..a9db98d
--- /dev/null
+++ b/queue-5.4/tcp-annotate-data-races-around-rskq_defer_accept.patch
@@ -0,0 +1,53 @@
+From 2cb6b6537610c3fffde9a74654ad89cc87671b53 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 19 Jul 2023 21:28:54 +0000
+Subject: tcp: annotate data-races around rskq_defer_accept
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit ae488c74422fb1dcd807c0201804b3b5e8a322a3 ]
+
+do_tcp_getsockopt() reads rskq_defer_accept while another cpu
+might change its value.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Link: https://lore.kernel.org/r/20230719212857.3943972-9-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp.c | 11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
+index b9475fcaa6c4f..eb70c1b866d0f 100644
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -3075,9 +3075,9 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
+ 
+       case TCP_DEFER_ACCEPT:
+               /* Translate value in seconds to number of retransmits */
+-              icsk->icsk_accept_queue.rskq_defer_accept =
+-                      secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ,
+-                                      TCP_RTO_MAX / HZ);
++              WRITE_ONCE(icsk->icsk_accept_queue.rskq_defer_accept,
++                         secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ,
++                                         TCP_RTO_MAX / HZ));
+               break;
+ 
+       case TCP_WINDOW_CLAMP:
+@@ -3481,8 +3481,9 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
+                       val = (val ? : READ_ONCE(net->ipv4.sysctl_tcp_fin_timeout)) / HZ;
+               break;
+       case TCP_DEFER_ACCEPT:
+-              val = retrans_to_secs(icsk->icsk_accept_queue.rskq_defer_accept,
+-                                    TCP_TIMEOUT_INIT / HZ, TCP_RTO_MAX / HZ);
++              val = READ_ONCE(icsk->icsk_accept_queue.rskq_defer_accept);
++              val = retrans_to_secs(val, TCP_TIMEOUT_INIT / HZ,
++                                    TCP_RTO_MAX / HZ);
+               break;
+       case TCP_WINDOW_CLAMP:
+               val = tp->window_clamp;
+-- 
+2.39.2
+
diff --git a/queue-5.4/tcp-annotate-data-races-around-tp-linger2.patch b/queue-5.4/tcp-annotate-data-races-around-tp-linger2.patch

new file mode 100644 (file)

index 0000000..be6c099
--- /dev/null
+++ b/queue-5.4/tcp-annotate-data-races-around-tp-linger2.patch
@@ -0,0 +1,52 @@
+From 5227a8526cd7b418601ee6b5cf385be0030c4937 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 19 Jul 2023 21:28:53 +0000
+Subject: tcp: annotate data-races around tp->linger2
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 9df5335ca974e688389c875546e5819778a80d59 ]
+
+do_tcp_getsockopt() reads tp->linger2 while another cpu
+might change its value.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Link: https://lore.kernel.org/r/20230719212857.3943972-8-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
+index c6c73b9407098..b9475fcaa6c4f 100644
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -3066,11 +3066,11 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
+ 
+       case TCP_LINGER2:
+               if (val < 0)
+-                      tp->linger2 = -1;
++                      WRITE_ONCE(tp->linger2, -1);
+               else if (val > TCP_FIN_TIMEOUT_MAX / HZ)
+-                      tp->linger2 = TCP_FIN_TIMEOUT_MAX;
++                      WRITE_ONCE(tp->linger2, TCP_FIN_TIMEOUT_MAX);
+               else
+-                      tp->linger2 = val * HZ;
++                      WRITE_ONCE(tp->linger2, val * HZ);
+               break;
+ 
+       case TCP_DEFER_ACCEPT:
+@@ -3476,7 +3476,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
+               val = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries;
+               break;
+       case TCP_LINGER2:
+-              val = tp->linger2;
++              val = READ_ONCE(tp->linger2);
+               if (val >= 0)
+                       val = (val ? : READ_ONCE(net->ipv4.sysctl_tcp_fin_timeout)) / HZ;
+               break;
+-- 
+2.39.2
+
diff --git a/queue-5.4/tcp-annotate-data-races-around-tp-notsent_lowat.patch b/queue-5.4/tcp-annotate-data-races-around-tp-notsent_lowat.patch

new file mode 100644 (file)

index 0000000..5b77ab9
--- /dev/null
+++ b/queue-5.4/tcp-annotate-data-races-around-tp-notsent_lowat.patch
@@ -0,0 +1,64 @@
+From 3537464fefdcffe80ddaabe1713aab6c192844dd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 19 Jul 2023 21:28:55 +0000
+Subject: tcp: annotate data-races around tp->notsent_lowat
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 1aeb87bc1440c5447a7fa2d6e3c2cca52cbd206b ]
+
+tp->notsent_lowat can be read locklessly from do_tcp_getsockopt()
+and tcp_poll().
+
+Fixes: c9bee3b7fdec ("tcp: TCP_NOTSENT_LOWAT socket option")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Link: https://lore.kernel.org/r/20230719212857.3943972-10-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/tcp.h | 6 +++++-
+ net/ipv4/tcp.c    | 4 ++--
+ 2 files changed, 7 insertions(+), 3 deletions(-)
+
+diff --git a/include/net/tcp.h b/include/net/tcp.h
+index 2f456bed33ec3..4e909148fce39 100644
+--- a/include/net/tcp.h
++++ b/include/net/tcp.h
+@@ -1953,7 +1953,11 @@ void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr);
+ static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp)
+ {
+       struct net *net = sock_net((struct sock *)tp);
+-      return tp->notsent_lowat ?: READ_ONCE(net->ipv4.sysctl_tcp_notsent_lowat);
++      u32 val;
++
++      val = READ_ONCE(tp->notsent_lowat);
++
++      return val ?: READ_ONCE(net->ipv4.sysctl_tcp_notsent_lowat);
+ }
+ 
+ /* @wake is one when sk_stream_write_space() calls us.
+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
+index eb70c1b866d0f..c980d18d99094 100644
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -3165,7 +3165,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
+               err = tcp_repair_set_window(tp, optval, optlen);
+               break;
+       case TCP_NOTSENT_LOWAT:
+-              tp->notsent_lowat = val;
++              WRITE_ONCE(tp->notsent_lowat, val);
+               sk->sk_write_space(sk);
+               break;
+       case TCP_INQ:
+@@ -3642,7 +3642,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
+               val = tcp_time_stamp_raw() + tp->tsoffset;
+               break;
+       case TCP_NOTSENT_LOWAT:
+-              val = tp->notsent_lowat;
++              val = READ_ONCE(tp->notsent_lowat);
+               break;
+       case TCP_INQ:
+               val = tp->recvmsg_inq;
+-- 
+2.39.2
+
diff --git a/queue-5.4/tcp-annotate-data-races-around-tp-tcp_tx_delay.patch b/queue-5.4/tcp-annotate-data-races-around-tp-tcp_tx_delay.patch

new file mode 100644 (file)

index 0000000..f445a0d
--- /dev/null
+++ b/queue-5.4/tcp-annotate-data-races-around-tp-tcp_tx_delay.patch
@@ -0,0 +1,46 @@
+From 26a4b494c6e89c44e7b5897579e5dced37127a5b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 19 Jul 2023 21:28:47 +0000
+Subject: tcp: annotate data-races around tp->tcp_tx_delay
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 348b81b68b13ebd489a3e6a46aa1c384c731c919 ]
+
+do_tcp_getsockopt() reads tp->tcp_tx_delay while another cpu
+might change its value.
+
+Fixes: a842fe1425cb ("tcp: add optional per socket transmit delay")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Link: https://lore.kernel.org/r/20230719212857.3943972-2-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
+index fdf2ddc4864df..e33abcff56080 100644
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -3177,7 +3177,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
+       case TCP_TX_DELAY:
+               if (val)
+                       tcp_enable_tx_delay();
+-              tp->tcp_tx_delay = val;
++              WRITE_ONCE(tp->tcp_tx_delay, val);
+               break;
+       default:
+               err = -ENOPROTOOPT;
+@@ -3634,7 +3634,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
+               break;
+ 
+       case TCP_TX_DELAY:
+-              val = tp->tcp_tx_delay;
++              val = READ_ONCE(tp->tcp_tx_delay);
+               break;
+ 
+       case TCP_TIMESTAMP:
+-- 
+2.39.2
+
diff --git a/queue-5.4/wifi-iwlwifi-mvm-avoid-baid-size-integer-overflow.patch b/queue-5.4/wifi-iwlwifi-mvm-avoid-baid-size-integer-overflow.patch

new file mode 100644 (file)

index 0000000..4f154c0
--- /dev/null
+++ b/queue-5.4/wifi-iwlwifi-mvm-avoid-baid-size-integer-overflow.patch
@@ -0,0 +1,47 @@
+From d9c11a2569b0a90e134d2e38e12c135e5abd3856 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 20 Jun 2023 13:04:02 +0300
+Subject: wifi: iwlwifi: mvm: avoid baid size integer overflow
+
+From: Johannes Berg <johannes.berg@intel.com>
+
+[ Upstream commit 1a528ab1da324d078ec60283c34c17848580df24 ]
+
+Roee reported various hard-to-debug crashes with pings in
+EHT aggregation scenarios. Enabling KASAN showed that we
+access the BAID allocation out of bounds, and looking at
+the code a bit shows that since the reorder buffer entry
+(struct iwl_mvm_reorder_buf_entry) is 128 bytes if debug
+such as lockdep is enabled, then staring from an agg size
+512 we overflow the size calculation, and allocate a much
+smaller structure than we should, causing slab corruption
+once we initialize this.
+
+Fix this by simply using u32 instead of u16.
+
+Reported-by: Roee Goldfiner <roee.h.goldfiner@intel.com>
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Gregory Greenman <gregory.greenman@intel.com>
+Link: https://lore.kernel.org/r/20230620125813.f428c856030d.I2c2bb808e945adb71bc15f5b2bac2d8957ea90eb@changeid
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
+index a3255100e3fee..7befb92b5159c 100644
+--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
++++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
+@@ -2557,7 +2557,7 @@ int iwl_mvm_sta_rx_agg(struct iwl_mvm *mvm, struct ieee80211_sta *sta,
+       }
+ 
+       if (iwl_mvm_has_new_rx_api(mvm) && start) {
+-              u16 reorder_buf_size = buf_size * sizeof(baid_data->entries[0]);
++              u32 reorder_buf_size = buf_size * sizeof(baid_data->entries[0]);
+ 
+               /* sparse doesn't like the __align() so don't check */
+ #ifndef __CHECKER__
+-- 
+2.39.2
+
diff --git a/queue-5.4/wifi-wext-core-fix-wstringop-overflow-warning-in-ioc.patch b/queue-5.4/wifi-wext-core-fix-wstringop-overflow-warning-in-ioc.patch

new file mode 100644 (file)

index 0000000..c3c9716
--- /dev/null
+++ b/queue-5.4/wifi-wext-core-fix-wstringop-overflow-warning-in-ioc.patch
@@ -0,0 +1,71 @@
+From 993fa417304ccbc8f9e32804e8c193cccb79430c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 15 Jun 2023 12:04:07 -0600
+Subject: wifi: wext-core: Fix -Wstringop-overflow warning in
+ ioctl_standard_iw_point()
+
+From: Gustavo A. R. Silva <gustavoars@kernel.org>
+
+[ Upstream commit 71e7552c90db2a2767f5c17c7ec72296b0d92061 ]
+
+-Wstringop-overflow is legitimately warning us about extra_size
+pontentially being zero at some point, hence potenially ending
+up _allocating_ zero bytes of memory for extra pointer and then
+trying to access such object in a call to copy_from_user().
+
+Fix this by adding a sanity check to ensure we never end up
+trying to allocate zero bytes of data for extra pointer, before
+continue executing the rest of the code in the function.
+
+Address the following -Wstringop-overflow warning seen when built
+m68k architecture with allyesconfig configuration:
+                 from net/wireless/wext-core.c:11:
+In function '_copy_from_user',
+    inlined from 'copy_from_user' at include/linux/uaccess.h:183:7,
+    inlined from 'ioctl_standard_iw_point' at net/wireless/wext-core.c:825:7:
+arch/m68k/include/asm/string.h:48:25: warning: '__builtin_memset' writing 1 or more bytes into a region of size 0 overflows the destination [-Wstringop-overflow=]
+   48 | #define memset(d, c, n) __builtin_memset(d, c, n)
+      |                         ^~~~~~~~~~~~~~~~~~~~~~~~~
+include/linux/uaccess.h:153:17: note: in expansion of macro 'memset'
+  153 |                 memset(to + (n - res), 0, res);
+      |                 ^~~~~~
+In function 'kmalloc',
+    inlined from 'kzalloc' at include/linux/slab.h:694:9,
+    inlined from 'ioctl_standard_iw_point' at net/wireless/wext-core.c:819:10:
+include/linux/slab.h:577:16: note: at offset 1 into destination object of size 0 allocated by '__kmalloc'
+  577 |         return __kmalloc(size, flags);
+      |                ^~~~~~~~~~~~~~~~~~~~~~
+
+This help with the ongoing efforts to globally enable
+-Wstringop-overflow.
+
+Link: https://github.com/KSPP/linux/issues/315
+Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
+Reviewed-by: Simon Horman <simon.horman@corigine.com>
+Link: https://lore.kernel.org/r/ZItSlzvIpjdjNfd8@work
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/wireless/wext-core.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c
+index 76a80a41615be..a57f54bc0e1a7 100644
+--- a/net/wireless/wext-core.c
++++ b/net/wireless/wext-core.c
+@@ -796,6 +796,12 @@ static int ioctl_standard_iw_point(struct iw_point *iwp, unsigned int cmd,
+               }
+       }
+ 
++      /* Sanity-check to ensure we never end up _allocating_ zero
++       * bytes of data for extra.
++       */
++      if (extra_size <= 0)
++              return -EFAULT;
++
+       /* kzalloc() ensures NULL-termination for essid_compat. */
+       extra = kzalloc(extra_size, GFP_KERNEL);
+       if (!extra)
+-- 
+2.39.2
+
author	Sasha Levin <sashal@kernel.org>
	Mon, 24 Jul 2023 01:25:09 +0000 (21:25 -0400)
committer	Sasha Levin <sashal@kernel.org>
	Mon, 24 Jul 2023 01:25:09 +0000 (21:25 -0400)
queue-5.4/arm64-mm-fix-va-range-sanity-check.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/bpf-address-kcsan-report-on-bpf_lru_list.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/debugobjects-recheck-debug_objects_enabled-before-re.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/devlink-report-devlink_port_type_warn-source-device.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/fbdev-au1200fb-fix-missing-irq-check-in-au1200fb_drv.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/fbdev-imxfb-warn-about-invalid-left-right-margin.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/iavf-fix-use-after-free-in-free_netdev.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/igb-fix-igb_down-hung-on-surprise-removal.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/llc-don-t-drop-packet-from-non-root-netns.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/md-fix-data-corruption-for-raid456-when-reshape-rest.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/md-raid10-prevent-soft-lockup-while-flush-writes.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/nbd-add-the-maximum-limit-of-allocated-index-in-nbd_.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/net-ethernet-ti-cpsw_ale-fix-cpsw_ale_get_field-cpsw.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/net-ipv6-check-return-value-of-pskb_trim.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/net-replace-the-limit-of-tcp_linger2-with-tcp_fin_ti.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/netfilter-nf_tables-can-t-schedule-in-nft_chain_vali.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/netfilter-nf_tables-fix-spurious-set-element-inserti.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/pinctrl-amd-use-amd_pinconf_set-for-all-config-optio.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/posix-timers-ensure-timer-id-search-loop-limit-is-va.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/revert-tcp-avoid-the-lookup-process-failing-to-get-s.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/sched-fair-don-t-balance-task-to-its-current-running.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/series		patch \| blob \| blame \| history
queue-5.4/spi-bcm63xx-fix-max-prepend-length.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/tcp-annotate-data-races-around-fastopenq.max_qlen.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/tcp-annotate-data-races-around-rskq_defer_accept.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/tcp-annotate-data-races-around-tp-linger2.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/tcp-annotate-data-races-around-tp-notsent_lowat.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/tcp-annotate-data-races-around-tp-tcp_tx_delay.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/wifi-iwlwifi-mvm-avoid-baid-size-integer-overflow.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/wifi-wext-core-fix-wstringop-overflow-warning-in-ioc.patch	[new file with mode: 0644]	patch \| blob