From: Sasha Levin Date: Fri, 8 Dec 2023 10:04:26 +0000 (-0500) Subject: Fixes for 6.1 X-Git-Tag: v6.6.6~63 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=5a7b94d2a7f4e66202df1506bfb29155aabe5ad2;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 6.1 Signed-off-by: Sasha Levin --- diff --git a/queue-6.1/drm-amdgpu-correct-chunk_ptr-to-a-pointer-to-chunk.patch b/queue-6.1/drm-amdgpu-correct-chunk_ptr-to-a-pointer-to-chunk.patch new file mode 100644 index 00000000000..bfab80c175d --- /dev/null +++ b/queue-6.1/drm-amdgpu-correct-chunk_ptr-to-a-pointer-to-chunk.patch @@ -0,0 +1,40 @@ +From e8dccefd160b4f00c0ebcd32c12abebb4d5ee140 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 31 Oct 2023 10:32:37 +0800 +Subject: drm/amdgpu: correct chunk_ptr to a pointer to chunk. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: YuanShang + +[ Upstream commit 50d51374b498457c4dea26779d32ccfed12ddaff ] + +The variable "chunk_ptr" should be a pointer pointing +to a struct drm_amdgpu_cs_chunk instead of to a pointer +of that. + +Signed-off-by: YuanShang +Reviewed-by: Christian König +Signed-off-by: Alex Deucher +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +index ced4e7e8f98b5..133e4e03c143c 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +@@ -201,7 +201,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p, + } + + for (i = 0; i < p->nchunks; i++) { +- struct drm_amdgpu_cs_chunk __user **chunk_ptr = NULL; ++ struct drm_amdgpu_cs_chunk __user *chunk_ptr = NULL; + struct drm_amdgpu_cs_chunk user_chunk; + uint32_t __user *cdata; + +-- +2.42.0 + diff --git a/queue-6.1/hrtimers-push-pending-hrtimers-away-from-outgoing-cp.patch b/queue-6.1/hrtimers-push-pending-hrtimers-away-from-outgoing-cp.patch new file mode 100644 index 00000000000..53352e420e5 --- /dev/null +++ b/queue-6.1/hrtimers-push-pending-hrtimers-away-from-outgoing-cp.patch @@ -0,0 +1,155 @@ +From afb8eb915ac12693aafc9d0704517463eb694248 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 7 Nov 2023 15:57:13 +0100 +Subject: hrtimers: Push pending hrtimers away from outgoing CPU earlier + +From: Thomas Gleixner + +[ Upstream commit 5c0930ccaad5a74d74e8b18b648c5eb21ed2fe94 ] + +2b8272ff4a70 ("cpu/hotplug: Prevent self deadlock on CPU hot-unplug") +solved the straight forward CPU hotplug deadlock vs. the scheduler +bandwidth timer. Yu discovered a more involved variant where a task which +has a bandwidth timer started on the outgoing CPU holds a lock and then +gets throttled. If the lock required by one of the CPU hotplug callbacks +the hotplug operation deadlocks because the unthrottling timer event is not +handled on the dying CPU and can only be recovered once the control CPU +reaches the hotplug state which pulls the pending hrtimers from the dead +CPU. + +Solve this by pushing the hrtimers away from the dying CPU in the dying +callbacks. Nothing can queue a hrtimer on the dying CPU at that point because +all other CPUs spin in stop_machine() with interrupts disabled and once the +operation is finished the CPU is marked offline. + +Reported-by: Yu Liao +Signed-off-by: Thomas Gleixner +Tested-by: Liu Tie +Link: https://lore.kernel.org/r/87a5rphara.ffs@tglx +Signed-off-by: Sasha Levin +--- + include/linux/cpuhotplug.h | 1 + + include/linux/hrtimer.h | 4 ++-- + kernel/cpu.c | 8 +++++++- + kernel/time/hrtimer.c | 33 ++++++++++++--------------------- + 4 files changed, 22 insertions(+), 24 deletions(-) + +diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h +index c7e0d80dbf6a5..67575bc8a7e29 100644 +--- a/include/linux/cpuhotplug.h ++++ b/include/linux/cpuhotplug.h +@@ -196,6 +196,7 @@ enum cpuhp_state { + CPUHP_AP_ARM_CORESIGHT_CTI_STARTING, + CPUHP_AP_ARM64_ISNDEP_STARTING, + CPUHP_AP_SMPCFD_DYING, ++ CPUHP_AP_HRTIMERS_DYING, + CPUHP_AP_X86_TBOOT_DYING, + CPUHP_AP_ARM_CACHE_B15_RAC_DYING, + CPUHP_AP_ONLINE, +diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h +index 0ee140176f102..f2044d5a652b5 100644 +--- a/include/linux/hrtimer.h ++++ b/include/linux/hrtimer.h +@@ -531,9 +531,9 @@ extern void sysrq_timer_list_show(void); + + int hrtimers_prepare_cpu(unsigned int cpu); + #ifdef CONFIG_HOTPLUG_CPU +-int hrtimers_dead_cpu(unsigned int cpu); ++int hrtimers_cpu_dying(unsigned int cpu); + #else +-#define hrtimers_dead_cpu NULL ++#define hrtimers_cpu_dying NULL + #endif + + #endif +diff --git a/kernel/cpu.c b/kernel/cpu.c +index 0e4d362e90825..551468d9c5a85 100644 +--- a/kernel/cpu.c ++++ b/kernel/cpu.c +@@ -1733,7 +1733,7 @@ static struct cpuhp_step cpuhp_hp_states[] = { + [CPUHP_HRTIMERS_PREPARE] = { + .name = "hrtimers:prepare", + .startup.single = hrtimers_prepare_cpu, +- .teardown.single = hrtimers_dead_cpu, ++ .teardown.single = NULL, + }, + [CPUHP_SMPCFD_PREPARE] = { + .name = "smpcfd:prepare", +@@ -1800,6 +1800,12 @@ static struct cpuhp_step cpuhp_hp_states[] = { + .startup.single = NULL, + .teardown.single = smpcfd_dying_cpu, + }, ++ [CPUHP_AP_HRTIMERS_DYING] = { ++ .name = "hrtimers:dying", ++ .startup.single = NULL, ++ .teardown.single = hrtimers_cpu_dying, ++ }, ++ + /* Entry state on starting. Interrupts enabled from here on. Transient + * state for synchronsization */ + [CPUHP_AP_ONLINE] = { +diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c +index e4f0e3b0c4f4f..5561dabc9b225 100644 +--- a/kernel/time/hrtimer.c ++++ b/kernel/time/hrtimer.c +@@ -2216,29 +2216,22 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base, + } + } + +-int hrtimers_dead_cpu(unsigned int scpu) ++int hrtimers_cpu_dying(unsigned int dying_cpu) + { + struct hrtimer_cpu_base *old_base, *new_base; +- int i; ++ int i, ncpu = cpumask_first(cpu_active_mask); + +- BUG_ON(cpu_online(scpu)); +- tick_cancel_sched_timer(scpu); ++ tick_cancel_sched_timer(dying_cpu); ++ ++ old_base = this_cpu_ptr(&hrtimer_bases); ++ new_base = &per_cpu(hrtimer_bases, ncpu); + +- /* +- * this BH disable ensures that raise_softirq_irqoff() does +- * not wakeup ksoftirqd (and acquire the pi-lock) while +- * holding the cpu_base lock +- */ +- local_bh_disable(); +- local_irq_disable(); +- old_base = &per_cpu(hrtimer_bases, scpu); +- new_base = this_cpu_ptr(&hrtimer_bases); + /* + * The caller is globally serialized and nobody else + * takes two locks at once, deadlock is not possible. + */ +- raw_spin_lock(&new_base->lock); +- raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); ++ raw_spin_lock(&old_base->lock); ++ raw_spin_lock_nested(&new_base->lock, SINGLE_DEPTH_NESTING); + + for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { + migrate_hrtimer_list(&old_base->clock_base[i], +@@ -2249,15 +2242,13 @@ int hrtimers_dead_cpu(unsigned int scpu) + * The migration might have changed the first expiring softirq + * timer on this CPU. Update it. + */ +- hrtimer_update_softirq_timer(new_base, false); ++ __hrtimer_get_next_event(new_base, HRTIMER_ACTIVE_SOFT); ++ /* Tell the other CPU to retrigger the next event */ ++ smp_call_function_single(ncpu, retrigger_next_event, NULL, 0); + +- raw_spin_unlock(&old_base->lock); + raw_spin_unlock(&new_base->lock); ++ raw_spin_unlock(&old_base->lock); + +- /* Check, if we got expired work to do */ +- __hrtimer_peek_ahead_timers(); +- local_irq_enable(); +- local_bh_enable(); + return 0; + } + +-- +2.42.0 + diff --git a/queue-6.1/i2c-designware-fix-corrupted-memory-seen-in-the-isr.patch b/queue-6.1/i2c-designware-fix-corrupted-memory-seen-in-the-isr.patch new file mode 100644 index 00000000000..4bb85d46275 --- /dev/null +++ b/queue-6.1/i2c-designware-fix-corrupted-memory-seen-in-the-isr.patch @@ -0,0 +1,108 @@ +From 19686fbc1a118738c7acf12e034b15d139c054fa Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 9 Nov 2023 03:19:27 +0000 +Subject: i2c: designware: Fix corrupted memory seen in the ISR + +From: Jan Bottorff + +[ Upstream commit f726eaa787e9f9bc858c902d18a09af6bcbfcdaf ] + +When running on a many core ARM64 server, errors were +happening in the ISR that looked like corrupted memory. These +corruptions would fix themselves if small delays were inserted +in the ISR. Errors reported by the driver included "i2c_designware +APMC0D0F:00: i2c_dw_xfer_msg: invalid target address" and +"i2c_designware APMC0D0F:00:controller timed out" during +in-band IPMI SSIF stress tests. + +The problem was determined to be memory writes in the driver were not +becoming visible to all cores when execution rapidly shifted between +cores, like when a register write immediately triggers an ISR. +Processors with weak memory ordering, like ARM64, make no +guarantees about the order normal memory writes become globally +visible, unless barrier instructions are used to control ordering. + +To solve this, regmap accessor functions configured by this driver +were changed to use non-relaxed forms of the low-level register +access functions, which include a barrier on platforms that require +it. This assures memory writes before a controller register access are +visible to all cores. The community concluded defaulting to correct +operation outweighed defaulting to the small performance gains from +using relaxed access functions. Being a low speed device added weight to +this choice of default register access behavior. + +Signed-off-by: Jan Bottorff +Acked-by: Jarkko Nikula +Tested-by: Serge Semin +Reviewed-by: Serge Semin +Signed-off-by: Wolfram Sang +Signed-off-by: Sasha Levin +--- + drivers/i2c/busses/i2c-designware-common.c | 16 ++++++++-------- + 1 file changed, 8 insertions(+), 8 deletions(-) + +diff --git a/drivers/i2c/busses/i2c-designware-common.c b/drivers/i2c/busses/i2c-designware-common.c +index 6fdb25a5f8016..ad98c85ec2e7a 100644 +--- a/drivers/i2c/busses/i2c-designware-common.c ++++ b/drivers/i2c/busses/i2c-designware-common.c +@@ -63,7 +63,7 @@ static int dw_reg_read(void *context, unsigned int reg, unsigned int *val) + { + struct dw_i2c_dev *dev = context; + +- *val = readl_relaxed(dev->base + reg); ++ *val = readl(dev->base + reg); + + return 0; + } +@@ -72,7 +72,7 @@ static int dw_reg_write(void *context, unsigned int reg, unsigned int val) + { + struct dw_i2c_dev *dev = context; + +- writel_relaxed(val, dev->base + reg); ++ writel(val, dev->base + reg); + + return 0; + } +@@ -81,7 +81,7 @@ static int dw_reg_read_swab(void *context, unsigned int reg, unsigned int *val) + { + struct dw_i2c_dev *dev = context; + +- *val = swab32(readl_relaxed(dev->base + reg)); ++ *val = swab32(readl(dev->base + reg)); + + return 0; + } +@@ -90,7 +90,7 @@ static int dw_reg_write_swab(void *context, unsigned int reg, unsigned int val) + { + struct dw_i2c_dev *dev = context; + +- writel_relaxed(swab32(val), dev->base + reg); ++ writel(swab32(val), dev->base + reg); + + return 0; + } +@@ -99,8 +99,8 @@ static int dw_reg_read_word(void *context, unsigned int reg, unsigned int *val) + { + struct dw_i2c_dev *dev = context; + +- *val = readw_relaxed(dev->base + reg) | +- (readw_relaxed(dev->base + reg + 2) << 16); ++ *val = readw(dev->base + reg) | ++ (readw(dev->base + reg + 2) << 16); + + return 0; + } +@@ -109,8 +109,8 @@ static int dw_reg_write_word(void *context, unsigned int reg, unsigned int val) + { + struct dw_i2c_dev *dev = context; + +- writew_relaxed(val, dev->base + reg); +- writew_relaxed(val >> 16, dev->base + reg + 2); ++ writew(val, dev->base + reg); ++ writew(val >> 16, dev->base + reg + 2); + + return 0; + } +-- +2.42.0 + diff --git a/queue-6.1/kconfig-fix-memory-leak-from-range-properties.patch b/queue-6.1/kconfig-fix-memory-leak-from-range-properties.patch new file mode 100644 index 00000000000..1ce67f6ba7d --- /dev/null +++ b/queue-6.1/kconfig-fix-memory-leak-from-range-properties.patch @@ -0,0 +1,92 @@ +From 28fde5ce84f97b117c284395758aa9096611445e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 15 Nov 2023 13:16:53 +0900 +Subject: kconfig: fix memory leak from range properties + +From: Masahiro Yamada + +[ Upstream commit ae1eff0349f2e908fc083630e8441ea6dc434dc0 ] + +Currently, sym_validate_range() duplicates the range string using +xstrdup(), which is overwritten by a subsequent sym_calc_value() call. +It results in a memory leak. + +Instead, only the pointer should be copied. + +Below is a test case, with a summary from Valgrind. + +[Test Kconfig] + + config FOO + int "foo" + range 10 20 + +[Test .config] + + CONFIG_FOO=0 + +[Before] + + LEAK SUMMARY: + definitely lost: 3 bytes in 1 blocks + indirectly lost: 0 bytes in 0 blocks + possibly lost: 0 bytes in 0 blocks + still reachable: 17,465 bytes in 21 blocks + suppressed: 0 bytes in 0 blocks + +[After] + + LEAK SUMMARY: + definitely lost: 0 bytes in 0 blocks + indirectly lost: 0 bytes in 0 blocks + possibly lost: 0 bytes in 0 blocks + still reachable: 17,462 bytes in 20 blocks + suppressed: 0 bytes in 0 blocks + +Signed-off-by: Masahiro Yamada +Signed-off-by: Sasha Levin +--- + scripts/kconfig/symbol.c | 14 ++++++-------- + 1 file changed, 6 insertions(+), 8 deletions(-) + +diff --git a/scripts/kconfig/symbol.c b/scripts/kconfig/symbol.c +index 0572330bf8a78..a76925b46ce63 100644 +--- a/scripts/kconfig/symbol.c ++++ b/scripts/kconfig/symbol.c +@@ -122,9 +122,9 @@ static long long sym_get_range_val(struct symbol *sym, int base) + static void sym_validate_range(struct symbol *sym) + { + struct property *prop; ++ struct symbol *range_sym; + int base; + long long val, val2; +- char str[64]; + + switch (sym->type) { + case S_INT: +@@ -140,17 +140,15 @@ static void sym_validate_range(struct symbol *sym) + if (!prop) + return; + val = strtoll(sym->curr.val, NULL, base); +- val2 = sym_get_range_val(prop->expr->left.sym, base); ++ range_sym = prop->expr->left.sym; ++ val2 = sym_get_range_val(range_sym, base); + if (val >= val2) { +- val2 = sym_get_range_val(prop->expr->right.sym, base); ++ range_sym = prop->expr->right.sym; ++ val2 = sym_get_range_val(range_sym, base); + if (val <= val2) + return; + } +- if (sym->type == S_INT) +- sprintf(str, "%lld", val2); +- else +- sprintf(str, "0x%llx", val2); +- sym->curr.val = xstrdup(str); ++ sym->curr.val = range_sym->curr.val; + } + + static void sym_set_changed(struct symbol *sym) +-- +2.42.0 + diff --git a/queue-6.1/netfilter-ipset-fix-race-condition-between-swap-dest.patch b/queue-6.1/netfilter-ipset-fix-race-condition-between-swap-dest.patch new file mode 100644 index 00000000000..af17fb717d3 --- /dev/null +++ b/queue-6.1/netfilter-ipset-fix-race-condition-between-swap-dest.patch @@ -0,0 +1,105 @@ +From 0eddebfa8f404b97512ae72a1173b04bd02a7f0f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 13 Nov 2023 21:13:23 +0100 +Subject: netfilter: ipset: fix race condition between swap/destroy and kernel + side add/del/test + +From: Jozsef Kadlecsik + +[ Upstream commit 28628fa952fefc7f2072ce6e8016968cc452b1ba ] + +Linkui Xiao reported that there's a race condition when ipset swap and destroy is +called, which can lead to crash in add/del/test element operations. Swap then +destroy are usual operations to replace a set with another one in a production +system. The issue can in some cases be reproduced with the script: + +ipset create hash_ip1 hash:net family inet hashsize 1024 maxelem 1048576 +ipset add hash_ip1 172.20.0.0/16 +ipset add hash_ip1 192.168.0.0/16 +iptables -A INPUT -m set --match-set hash_ip1 src -j ACCEPT +while [ 1 ] +do + # ... Ongoing traffic... + ipset create hash_ip2 hash:net family inet hashsize 1024 maxelem 1048576 + ipset add hash_ip2 172.20.0.0/16 + ipset swap hash_ip1 hash_ip2 + ipset destroy hash_ip2 + sleep 0.05 +done + +In the race case the possible order of the operations are + + CPU0 CPU1 + ip_set_test + ipset swap hash_ip1 hash_ip2 + ipset destroy hash_ip2 + hash_net_kadt + +Swap replaces hash_ip1 with hash_ip2 and then destroy removes hash_ip2 which +is the original hash_ip1. ip_set_test was called on hash_ip1 and because destroy +removed it, hash_net_kadt crashes. + +The fix is to force ip_set_swap() to wait for all readers to finish accessing the +old set pointers by calling synchronize_rcu(). + +The first version of the patch was written by Linkui Xiao . + +v2: synchronize_rcu() is moved into ip_set_swap() in order not to burden + ip_set_destroy() unnecessarily when all sets are destroyed. +v3: Florian Westphal pointed out that all netfilter hooks run with rcu_read_lock() held + and em_ipset.c wraps the entire ip_set_test() in rcu read lock/unlock pair. + So there's no need to extend the rcu read locked area in ipset itself. + +Closes: https://lore.kernel.org/all/69e7963b-e7f8-3ad0-210-7b86eebf7f78@netfilter.org/ +Reported by: Linkui Xiao +Signed-off-by: Jozsef Kadlecsik +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + net/netfilter/ipset/ip_set_core.c | 14 +++++++------- + 1 file changed, 7 insertions(+), 7 deletions(-) + +diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c +index 20eede37d5228..d47dfdcb899b0 100644 +--- a/net/netfilter/ipset/ip_set_core.c ++++ b/net/netfilter/ipset/ip_set_core.c +@@ -61,6 +61,8 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET); + ip_set_dereference((inst)->ip_set_list)[id] + #define ip_set_ref_netlink(inst,id) \ + rcu_dereference_raw((inst)->ip_set_list)[id] ++#define ip_set_dereference_nfnl(p) \ ++ rcu_dereference_check(p, lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET)) + + /* The set types are implemented in modules and registered set types + * can be found in ip_set_type_list. Adding/deleting types is +@@ -708,15 +710,10 @@ __ip_set_put_netlink(struct ip_set *set) + static struct ip_set * + ip_set_rcu_get(struct net *net, ip_set_id_t index) + { +- struct ip_set *set; + struct ip_set_net *inst = ip_set_pernet(net); + +- rcu_read_lock(); +- /* ip_set_list itself needs to be protected */ +- set = rcu_dereference(inst->ip_set_list)[index]; +- rcu_read_unlock(); +- +- return set; ++ /* ip_set_list and the set pointer need to be protected */ ++ return ip_set_dereference_nfnl(inst->ip_set_list)[index]; + } + + static inline void +@@ -1399,6 +1396,9 @@ static int ip_set_swap(struct sk_buff *skb, const struct nfnl_info *info, + ip_set(inst, to_id) = from; + write_unlock_bh(&ip_set_ref_lock); + ++ /* Make sure all readers of the old set pointers are completed. */ ++ synchronize_rcu(); ++ + return 0; + } + +-- +2.42.0 + diff --git a/queue-6.1/series b/queue-6.1/series new file mode 100644 index 00000000000..3ef64705e4c --- /dev/null +++ b/queue-6.1/series @@ -0,0 +1,10 @@ +vdpa-mlx5-preserve-cvq-vringh-index.patch +x86-acpi-ignore-invalid-x2apic-entries.patch +hrtimers-push-pending-hrtimers-away-from-outgoing-cp.patch +i2c-designware-fix-corrupted-memory-seen-in-the-isr.patch +netfilter-ipset-fix-race-condition-between-swap-dest.patch +zstd-fix-array-index-out-of-bounds-ubsan-warning.patch +tg3-move-the-rt-x_dropped-counters-to-tg3_napi.patch +tg3-increment-tx_dropped-in-tg3_tso_bug.patch +kconfig-fix-memory-leak-from-range-properties.patch +drm-amdgpu-correct-chunk_ptr-to-a-pointer-to-chunk.patch diff --git a/queue-6.1/tg3-increment-tx_dropped-in-tg3_tso_bug.patch b/queue-6.1/tg3-increment-tx_dropped-in-tg3_tso_bug.patch new file mode 100644 index 00000000000..2ad1c49fe5e --- /dev/null +++ b/queue-6.1/tg3-increment-tx_dropped-in-tg3_tso_bug.patch @@ -0,0 +1,41 @@ +From 97a4a7f39e52e919ace70ba0eca40ff7b423499b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 13 Nov 2023 10:23:50 -0800 +Subject: tg3: Increment tx_dropped in tg3_tso_bug() + +From: Alex Pakhunov + +[ Upstream commit 17dd5efe5f36a96bd78012594fabe21efb01186b ] + +tg3_tso_bug() drops a packet if it cannot be segmented for any reason. +The number of discarded frames should be incremented accordingly. + +Signed-off-by: Alex Pakhunov +Signed-off-by: Vincent Wong +Reviewed-by: Pavan Chebbi +Link: https://lore.kernel.org/r/20231113182350.37472-2-alexey.pakhunov@spacex.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/broadcom/tg3.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c +index 830f542a7c6d2..f60a16de565ed 100644 +--- a/drivers/net/ethernet/broadcom/tg3.c ++++ b/drivers/net/ethernet/broadcom/tg3.c +@@ -7879,8 +7879,10 @@ static int tg3_tso_bug(struct tg3 *tp, struct tg3_napi *tnapi, + + segs = skb_gso_segment(skb, tp->dev->features & + ~(NETIF_F_TSO | NETIF_F_TSO6)); +- if (IS_ERR(segs) || !segs) ++ if (IS_ERR(segs) || !segs) { ++ tnapi->tx_dropped++; + goto tg3_tso_bug_end; ++ } + + skb_list_walk_safe(segs, seg, next) { + skb_mark_not_on_list(seg); +-- +2.42.0 + diff --git a/queue-6.1/tg3-move-the-rt-x_dropped-counters-to-tg3_napi.patch b/queue-6.1/tg3-move-the-rt-x_dropped-counters-to-tg3_napi.patch new file mode 100644 index 00000000000..1138e91f5d8 --- /dev/null +++ b/queue-6.1/tg3-move-the-rt-x_dropped-counters-to-tg3_napi.patch @@ -0,0 +1,139 @@ +From 480385da3c6d2d6a3155cd383a93fbf2b9f44109 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 13 Nov 2023 10:23:49 -0800 +Subject: tg3: Move the [rt]x_dropped counters to tg3_napi + +From: Alex Pakhunov + +[ Upstream commit 907d1bdb8b2cc0357d03a1c34d2a08d9943760b1 ] + +This change moves [rt]x_dropped counters to tg3_napi so that they can be +updated by a single writer, race-free. + +Signed-off-by: Alex Pakhunov +Signed-off-by: Vincent Wong +Reviewed-by: Michael Chan +Link: https://lore.kernel.org/r/20231113182350.37472-1-alexey.pakhunov@spacex.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/broadcom/tg3.c | 38 +++++++++++++++++++++++++---- + drivers/net/ethernet/broadcom/tg3.h | 4 +-- + 2 files changed, 35 insertions(+), 7 deletions(-) + +diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c +index 85570e40c8e9b..830f542a7c6d2 100644 +--- a/drivers/net/ethernet/broadcom/tg3.c ++++ b/drivers/net/ethernet/broadcom/tg3.c +@@ -6853,7 +6853,7 @@ static int tg3_rx(struct tg3_napi *tnapi, int budget) + desc_idx, *post_ptr); + drop_it_no_recycle: + /* Other statistics kept track of by card. */ +- tp->rx_dropped++; ++ tnapi->rx_dropped++; + goto next_pkt; + } + +@@ -8151,7 +8151,7 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) + drop: + dev_kfree_skb_any(skb); + drop_nofree: +- tp->tx_dropped++; ++ tnapi->tx_dropped++; + return NETDEV_TX_OK; + } + +@@ -9330,7 +9330,7 @@ static void __tg3_set_rx_mode(struct net_device *); + /* tp->lock is held. */ + static int tg3_halt(struct tg3 *tp, int kind, bool silent) + { +- int err; ++ int err, i; + + tg3_stop_fw(tp); + +@@ -9351,6 +9351,13 @@ static int tg3_halt(struct tg3 *tp, int kind, bool silent) + + /* And make sure the next sample is new data */ + memset(tp->hw_stats, 0, sizeof(struct tg3_hw_stats)); ++ ++ for (i = 0; i < TG3_IRQ_MAX_VECS; ++i) { ++ struct tg3_napi *tnapi = &tp->napi[i]; ++ ++ tnapi->rx_dropped = 0; ++ tnapi->tx_dropped = 0; ++ } + } + + return err; +@@ -11900,6 +11907,9 @@ static void tg3_get_nstats(struct tg3 *tp, struct rtnl_link_stats64 *stats) + { + struct rtnl_link_stats64 *old_stats = &tp->net_stats_prev; + struct tg3_hw_stats *hw_stats = tp->hw_stats; ++ unsigned long rx_dropped; ++ unsigned long tx_dropped; ++ int i; + + stats->rx_packets = old_stats->rx_packets + + get_stat64(&hw_stats->rx_ucast_packets) + +@@ -11946,8 +11956,26 @@ static void tg3_get_nstats(struct tg3 *tp, struct rtnl_link_stats64 *stats) + stats->rx_missed_errors = old_stats->rx_missed_errors + + get_stat64(&hw_stats->rx_discards); + +- stats->rx_dropped = tp->rx_dropped; +- stats->tx_dropped = tp->tx_dropped; ++ /* Aggregate per-queue counters. The per-queue counters are updated ++ * by a single writer, race-free. The result computed by this loop ++ * might not be 100% accurate (counters can be updated in the middle of ++ * the loop) but the next tg3_get_nstats() will recompute the current ++ * value so it is acceptable. ++ * ++ * Note that these counters wrap around at 4G on 32bit machines. ++ */ ++ rx_dropped = (unsigned long)(old_stats->rx_dropped); ++ tx_dropped = (unsigned long)(old_stats->tx_dropped); ++ ++ for (i = 0; i < tp->irq_cnt; i++) { ++ struct tg3_napi *tnapi = &tp->napi[i]; ++ ++ rx_dropped += tnapi->rx_dropped; ++ tx_dropped += tnapi->tx_dropped; ++ } ++ ++ stats->rx_dropped = rx_dropped; ++ stats->tx_dropped = tx_dropped; + } + + static int tg3_get_regs_len(struct net_device *dev) +diff --git a/drivers/net/ethernet/broadcom/tg3.h b/drivers/net/ethernet/broadcom/tg3.h +index 1000c894064f0..8d753f8c5b065 100644 +--- a/drivers/net/ethernet/broadcom/tg3.h ++++ b/drivers/net/ethernet/broadcom/tg3.h +@@ -3018,6 +3018,7 @@ struct tg3_napi { + u16 *rx_rcb_prod_idx; + struct tg3_rx_prodring_set prodring; + struct tg3_rx_buffer_desc *rx_rcb; ++ unsigned long rx_dropped; + + u32 tx_prod ____cacheline_aligned; + u32 tx_cons; +@@ -3026,6 +3027,7 @@ struct tg3_napi { + u32 prodmbox; + struct tg3_tx_buffer_desc *tx_ring; + struct tg3_tx_ring_info *tx_buffers; ++ unsigned long tx_dropped; + + dma_addr_t status_mapping; + dma_addr_t rx_rcb_mapping; +@@ -3219,8 +3221,6 @@ struct tg3 { + + + /* begin "everything else" cacheline(s) section */ +- unsigned long rx_dropped; +- unsigned long tx_dropped; + struct rtnl_link_stats64 net_stats_prev; + struct tg3_ethtool_stats estats_prev; + +-- +2.42.0 + diff --git a/queue-6.1/vdpa-mlx5-preserve-cvq-vringh-index.patch b/queue-6.1/vdpa-mlx5-preserve-cvq-vringh-index.patch new file mode 100644 index 00000000000..84a56ad6c09 --- /dev/null +++ b/queue-6.1/vdpa-mlx5-preserve-cvq-vringh-index.patch @@ -0,0 +1,66 @@ +From 5fca292e7b098d958239fe716411510baaaffc5f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 3 Nov 2023 05:26:27 -0700 +Subject: vdpa/mlx5: preserve CVQ vringh index +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Steve Sistare + +[ Upstream commit 480b3e73720f6b5d76bef2387b1f9d19ed67573b ] + +mlx5_vdpa does not preserve userland's view of vring base for the control +queue in the following sequence: + +ioctl VHOST_SET_VRING_BASE +ioctl VHOST_VDPA_SET_STATUS VIRTIO_CONFIG_S_DRIVER_OK + mlx5_vdpa_set_status() + setup_cvq_vring() + vringh_init_iotlb() + vringh_init_kern() + vrh->last_avail_idx = 0; +ioctl VHOST_GET_VRING_BASE + +To fix, restore the value of cvq->vring.last_avail_idx after calling +vringh_init_iotlb. + +Fixes: 5262912ef3cf ("vdpa/mlx5: Add support for control VQ and MAC setting") + +Signed-off-by: Steve Sistare +Acked-by: Eugenio Pérez +Acked-by: Jason Wang +Message-Id: <1699014387-194368-1-git-send-email-steven.sistare@oracle.com> +Signed-off-by: Michael S. Tsirkin +Signed-off-by: Sasha Levin +--- + drivers/vdpa/mlx5/net/mlx5_vnet.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c +index bf99654371b35..2b7e796c48897 100644 +--- a/drivers/vdpa/mlx5/net/mlx5_vnet.c ++++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c +@@ -2508,13 +2508,18 @@ static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev) + struct mlx5_control_vq *cvq = &mvdev->cvq; + int err = 0; + +- if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) ++ if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) { ++ u16 idx = cvq->vring.last_avail_idx; ++ + err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features, + MLX5_CVQ_MAX_ENT, false, + (struct vring_desc *)(uintptr_t)cvq->desc_addr, + (struct vring_avail *)(uintptr_t)cvq->driver_addr, + (struct vring_used *)(uintptr_t)cvq->device_addr); + ++ if (!err) ++ cvq->vring.last_avail_idx = cvq->vring.last_used_idx = idx; ++ } + return err; + } + +-- +2.42.0 + diff --git a/queue-6.1/x86-acpi-ignore-invalid-x2apic-entries.patch b/queue-6.1/x86-acpi-ignore-invalid-x2apic-entries.patch new file mode 100644 index 00000000000..76ac0c9bccc --- /dev/null +++ b/queue-6.1/x86-acpi-ignore-invalid-x2apic-entries.patch @@ -0,0 +1,130 @@ +From 4c7717939f49024466540344aa3463a934461ec2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 3 Jul 2023 00:28:02 +0800 +Subject: x86/acpi: Ignore invalid x2APIC entries + +From: Zhang Rui + +[ Upstream commit ec9aedb2aa1ab7ac420c00b31f5edc5be15ec167 ] + +Currently, the kernel enumerates the possible CPUs by parsing both ACPI +MADT Local APIC entries and x2APIC entries. So CPUs with "valid" APIC IDs, +even if they have duplicated APIC IDs in Local APIC and x2APIC, are always +enumerated. + +Below is what ACPI MADT Local APIC and x2APIC describes on an +Ivebridge-EP system, + +[02Ch 0044 1] Subtable Type : 00 [Processor Local APIC] +[02Fh 0047 1] Local Apic ID : 00 +... +[164h 0356 1] Subtable Type : 00 [Processor Local APIC] +[167h 0359 1] Local Apic ID : 39 +[16Ch 0364 1] Subtable Type : 00 [Processor Local APIC] +[16Fh 0367 1] Local Apic ID : FF +... +[3ECh 1004 1] Subtable Type : 09 [Processor Local x2APIC] +[3F0h 1008 4] Processor x2Apic ID : 00000000 +... +[B5Ch 2908 1] Subtable Type : 09 [Processor Local x2APIC] +[B60h 2912 4] Processor x2Apic ID : 00000077 + +As a result, kernel shows "smpboot: Allowing 168 CPUs, 120 hotplug CPUs". +And this wastes significant amount of memory for the per-cpu data. +Plus this also breaks https://lore.kernel.org/all/87edm36qqb.ffs@tglx/, +because __max_logical_packages is over-estimated by the APIC IDs in +the x2APIC entries. + +According to https://uefi.org/specs/ACPI/6.5/05_ACPI_Software_Programming_Model.html#processor-local-x2apic-structure: + + "[Compatibility note] On some legacy OSes, Logical processors with APIC + ID values less than 255 (whether in XAPIC or X2APIC mode) must use the + Processor Local APIC structure to convey their APIC information to OSPM, + and those processors must be declared in the DSDT using the Processor() + keyword. Logical processors with APIC ID values 255 and greater must use + the Processor Local x2APIC structure and be declared using the Device() + keyword." + +Therefore prevent the registration of x2APIC entries with an APIC ID less +than 255 if the local APIC table enumerates valid APIC IDs. + +[ tglx: Simplify the logic ] + +Signed-off-by: Zhang Rui +Signed-off-by: Thomas Gleixner +Tested-by: Peter Zijlstra +Link: https://lore.kernel.org/r/20230702162802.344176-1-rui.zhang@intel.com +Signed-off-by: Sasha Levin +--- + arch/x86/kernel/acpi/boot.c | 34 +++++++++++++++------------------- + 1 file changed, 15 insertions(+), 19 deletions(-) + +diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c +index 2252340b2133e..14af7fbdc6b5e 100644 +--- a/arch/x86/kernel/acpi/boot.c ++++ b/arch/x86/kernel/acpi/boot.c +@@ -62,6 +62,7 @@ int acpi_fix_pin2_polarity __initdata; + + #ifdef CONFIG_X86_LOCAL_APIC + static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; ++static bool has_lapic_cpus __initdata; + static bool acpi_support_online_capable; + #endif + +@@ -235,6 +236,14 @@ acpi_parse_x2apic(union acpi_subtable_headers *header, const unsigned long end) + if (!acpi_is_processor_usable(processor->lapic_flags)) + return 0; + ++ /* ++ * According to https://uefi.org/specs/ACPI/6.5/05_ACPI_Software_Programming_Model.html#processor-local-x2apic-structure ++ * when MADT provides both valid LAPIC and x2APIC entries, the APIC ID ++ * in x2APIC must be equal or greater than 0xff. ++ */ ++ if (has_lapic_cpus && apic_id < 0xff) ++ return 0; ++ + /* + * We need to register disabled CPU as well to permit + * counting disabled CPUs. This allows us to size +@@ -1114,10 +1123,7 @@ static int __init early_acpi_parse_madt_lapic_addr_ovr(void) + + static int __init acpi_parse_madt_lapic_entries(void) + { +- int count; +- int x2count = 0; +- int ret; +- struct acpi_subtable_proc madt_proc[2]; ++ int count, x2count = 0; + + if (!boot_cpu_has(X86_FEATURE_APIC)) + return -ENODEV; +@@ -1126,21 +1132,11 @@ static int __init acpi_parse_madt_lapic_entries(void) + acpi_parse_sapic, MAX_LOCAL_APIC); + + if (!count) { +- memset(madt_proc, 0, sizeof(madt_proc)); +- madt_proc[0].id = ACPI_MADT_TYPE_LOCAL_APIC; +- madt_proc[0].handler = acpi_parse_lapic; +- madt_proc[1].id = ACPI_MADT_TYPE_LOCAL_X2APIC; +- madt_proc[1].handler = acpi_parse_x2apic; +- ret = acpi_table_parse_entries_array(ACPI_SIG_MADT, +- sizeof(struct acpi_table_madt), +- madt_proc, ARRAY_SIZE(madt_proc), MAX_LOCAL_APIC); +- if (ret < 0) { +- pr_err("Error parsing LAPIC/X2APIC entries\n"); +- return ret; +- } +- +- count = madt_proc[0].count; +- x2count = madt_proc[1].count; ++ count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC, ++ acpi_parse_lapic, MAX_LOCAL_APIC); ++ has_lapic_cpus = count > 0; ++ x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC, ++ acpi_parse_x2apic, MAX_LOCAL_APIC); + } + if (!count && !x2count) { + pr_err("No LAPIC entries present\n"); +-- +2.42.0 + diff --git a/queue-6.1/zstd-fix-array-index-out-of-bounds-ubsan-warning.patch b/queue-6.1/zstd-fix-array-index-out-of-bounds-ubsan-warning.patch new file mode 100644 index 00000000000..49f72efa50e --- /dev/null +++ b/queue-6.1/zstd-fix-array-index-out-of-bounds-ubsan-warning.patch @@ -0,0 +1,43 @@ +From 2e29986816a0375b521ac0fd34294d36bca5384b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 12 Oct 2023 12:55:34 -0700 +Subject: zstd: Fix array-index-out-of-bounds UBSAN warning + +From: Nick Terrell + +[ Upstream commit 77618db346455129424fadbbaec596a09feaf3bb ] + +Zstd used an array of length 1 to mean a flexible array for C89 +compatibility. Switch to a C99 flexible array to fix the UBSAN warning. + +Tested locally by booting the kernel and writing to and reading from a +BtrFS filesystem with zstd compression enabled. I was unable to reproduce +the issue before the fix, however it is a trivial change. + +Link: https://lkml.kernel.org/r/20231012213428.1390905-1-nickrterrell@gmail.com +Reported-by: syzbot+1f2eb3e8cd123ffce499@syzkaller.appspotmail.com +Reported-by: Eric Biggers +Reported-by: Kees Cook +Signed-off-by: Nick Terrell +Reviewed-by: Kees Cook +Signed-off-by: Sasha Levin +--- + lib/zstd/common/fse_decompress.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/lib/zstd/common/fse_decompress.c b/lib/zstd/common/fse_decompress.c +index 2c8bbe3e4c148..f37b7aec088ec 100644 +--- a/lib/zstd/common/fse_decompress.c ++++ b/lib/zstd/common/fse_decompress.c +@@ -312,7 +312,7 @@ size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size + + typedef struct { + short ncount[FSE_MAX_SYMBOL_VALUE + 1]; +- FSE_DTable dtable[1]; /* Dynamically sized */ ++ FSE_DTable dtable[]; /* Dynamically sized */ + } FSE_DecompressWksp; + + +-- +2.42.0 +