From: Sasha Levin Date: Fri, 8 Dec 2023 10:04:27 +0000 (-0500) Subject: Fixes for 5.15 X-Git-Tag: v6.6.6~62 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=e6004880df526545c90a7aa5f60dbd6222c5f9dd;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 5.15 Signed-off-by: Sasha Levin --- diff --git a/queue-5.15/drm-amdgpu-correct-chunk_ptr-to-a-pointer-to-chunk.patch b/queue-5.15/drm-amdgpu-correct-chunk_ptr-to-a-pointer-to-chunk.patch new file mode 100644 index 00000000000..9105248352f --- /dev/null +++ b/queue-5.15/drm-amdgpu-correct-chunk_ptr-to-a-pointer-to-chunk.patch @@ -0,0 +1,40 @@ +From 99972e3ccfcbc1d4510d399a313ad1ce9167ac94 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 31 Oct 2023 10:32:37 +0800 +Subject: drm/amdgpu: correct chunk_ptr to a pointer to chunk. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: YuanShang + +[ Upstream commit 50d51374b498457c4dea26779d32ccfed12ddaff ] + +The variable "chunk_ptr" should be a pointer pointing +to a struct drm_amdgpu_cs_chunk instead of to a pointer +of that. + +Signed-off-by: YuanShang +Reviewed-by: Christian König +Signed-off-by: Alex Deucher +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +index 2d8f71dde9803..f293d0dfec613 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +@@ -142,7 +142,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs + } + + for (i = 0; i < p->nchunks; i++) { +- struct drm_amdgpu_cs_chunk __user **chunk_ptr = NULL; ++ struct drm_amdgpu_cs_chunk __user *chunk_ptr = NULL; + struct drm_amdgpu_cs_chunk user_chunk; + uint32_t __user *cdata; + +-- +2.42.0 + diff --git a/queue-5.15/hrtimers-push-pending-hrtimers-away-from-outgoing-cp.patch b/queue-5.15/hrtimers-push-pending-hrtimers-away-from-outgoing-cp.patch new file mode 100644 index 00000000000..1617fa20106 --- /dev/null +++ b/queue-5.15/hrtimers-push-pending-hrtimers-away-from-outgoing-cp.patch @@ -0,0 +1,155 @@ +From 811f4b10e55aacab186235156abf87a9a7397c5f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 7 Nov 2023 15:57:13 +0100 +Subject: hrtimers: Push pending hrtimers away from outgoing CPU earlier + +From: Thomas Gleixner + +[ Upstream commit 5c0930ccaad5a74d74e8b18b648c5eb21ed2fe94 ] + +2b8272ff4a70 ("cpu/hotplug: Prevent self deadlock on CPU hot-unplug") +solved the straight forward CPU hotplug deadlock vs. the scheduler +bandwidth timer. Yu discovered a more involved variant where a task which +has a bandwidth timer started on the outgoing CPU holds a lock and then +gets throttled. If the lock required by one of the CPU hotplug callbacks +the hotplug operation deadlocks because the unthrottling timer event is not +handled on the dying CPU and can only be recovered once the control CPU +reaches the hotplug state which pulls the pending hrtimers from the dead +CPU. + +Solve this by pushing the hrtimers away from the dying CPU in the dying +callbacks. Nothing can queue a hrtimer on the dying CPU at that point because +all other CPUs spin in stop_machine() with interrupts disabled and once the +operation is finished the CPU is marked offline. + +Reported-by: Yu Liao +Signed-off-by: Thomas Gleixner +Tested-by: Liu Tie +Link: https://lore.kernel.org/r/87a5rphara.ffs@tglx +Signed-off-by: Sasha Levin +--- + include/linux/cpuhotplug.h | 1 + + include/linux/hrtimer.h | 4 ++-- + kernel/cpu.c | 8 +++++++- + kernel/time/hrtimer.c | 33 ++++++++++++--------------------- + 4 files changed, 22 insertions(+), 24 deletions(-) + +diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h +index c7156bb56e831..c1ecc843b97d2 100644 +--- a/include/linux/cpuhotplug.h ++++ b/include/linux/cpuhotplug.h +@@ -193,6 +193,7 @@ enum cpuhp_state { + CPUHP_AP_ARM_CORESIGHT_CTI_STARTING, + CPUHP_AP_ARM64_ISNDEP_STARTING, + CPUHP_AP_SMPCFD_DYING, ++ CPUHP_AP_HRTIMERS_DYING, + CPUHP_AP_X86_TBOOT_DYING, + CPUHP_AP_ARM_CACHE_B15_RAC_DYING, + CPUHP_AP_ONLINE, +diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h +index 0ee140176f102..f2044d5a652b5 100644 +--- a/include/linux/hrtimer.h ++++ b/include/linux/hrtimer.h +@@ -531,9 +531,9 @@ extern void sysrq_timer_list_show(void); + + int hrtimers_prepare_cpu(unsigned int cpu); + #ifdef CONFIG_HOTPLUG_CPU +-int hrtimers_dead_cpu(unsigned int cpu); ++int hrtimers_cpu_dying(unsigned int cpu); + #else +-#define hrtimers_dead_cpu NULL ++#define hrtimers_cpu_dying NULL + #endif + + #endif +diff --git a/kernel/cpu.c b/kernel/cpu.c +index 393114c10c285..0e786de993e01 100644 +--- a/kernel/cpu.c ++++ b/kernel/cpu.c +@@ -1697,7 +1697,7 @@ static struct cpuhp_step cpuhp_hp_states[] = { + [CPUHP_HRTIMERS_PREPARE] = { + .name = "hrtimers:prepare", + .startup.single = hrtimers_prepare_cpu, +- .teardown.single = hrtimers_dead_cpu, ++ .teardown.single = NULL, + }, + [CPUHP_SMPCFD_PREPARE] = { + .name = "smpcfd:prepare", +@@ -1764,6 +1764,12 @@ static struct cpuhp_step cpuhp_hp_states[] = { + .startup.single = NULL, + .teardown.single = smpcfd_dying_cpu, + }, ++ [CPUHP_AP_HRTIMERS_DYING] = { ++ .name = "hrtimers:dying", ++ .startup.single = NULL, ++ .teardown.single = hrtimers_cpu_dying, ++ }, ++ + /* Entry state on starting. Interrupts enabled from here on. Transient + * state for synchronsization */ + [CPUHP_AP_ONLINE] = { +diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c +index 97409581e9dac..eebd0f31daa8b 100644 +--- a/kernel/time/hrtimer.c ++++ b/kernel/time/hrtimer.c +@@ -2216,29 +2216,22 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base, + } + } + +-int hrtimers_dead_cpu(unsigned int scpu) ++int hrtimers_cpu_dying(unsigned int dying_cpu) + { + struct hrtimer_cpu_base *old_base, *new_base; +- int i; ++ int i, ncpu = cpumask_first(cpu_active_mask); + +- BUG_ON(cpu_online(scpu)); +- tick_cancel_sched_timer(scpu); ++ tick_cancel_sched_timer(dying_cpu); ++ ++ old_base = this_cpu_ptr(&hrtimer_bases); ++ new_base = &per_cpu(hrtimer_bases, ncpu); + +- /* +- * this BH disable ensures that raise_softirq_irqoff() does +- * not wakeup ksoftirqd (and acquire the pi-lock) while +- * holding the cpu_base lock +- */ +- local_bh_disable(); +- local_irq_disable(); +- old_base = &per_cpu(hrtimer_bases, scpu); +- new_base = this_cpu_ptr(&hrtimer_bases); + /* + * The caller is globally serialized and nobody else + * takes two locks at once, deadlock is not possible. + */ +- raw_spin_lock(&new_base->lock); +- raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); ++ raw_spin_lock(&old_base->lock); ++ raw_spin_lock_nested(&new_base->lock, SINGLE_DEPTH_NESTING); + + for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { + migrate_hrtimer_list(&old_base->clock_base[i], +@@ -2249,15 +2242,13 @@ int hrtimers_dead_cpu(unsigned int scpu) + * The migration might have changed the first expiring softirq + * timer on this CPU. Update it. + */ +- hrtimer_update_softirq_timer(new_base, false); ++ __hrtimer_get_next_event(new_base, HRTIMER_ACTIVE_SOFT); ++ /* Tell the other CPU to retrigger the next event */ ++ smp_call_function_single(ncpu, retrigger_next_event, NULL, 0); + +- raw_spin_unlock(&old_base->lock); + raw_spin_unlock(&new_base->lock); ++ raw_spin_unlock(&old_base->lock); + +- /* Check, if we got expired work to do */ +- __hrtimer_peek_ahead_timers(); +- local_irq_enable(); +- local_bh_enable(); + return 0; + } + +-- +2.42.0 + diff --git a/queue-5.15/i2c-designware-fix-corrupted-memory-seen-in-the-isr.patch b/queue-5.15/i2c-designware-fix-corrupted-memory-seen-in-the-isr.patch new file mode 100644 index 00000000000..fc35bb8acb8 --- /dev/null +++ b/queue-5.15/i2c-designware-fix-corrupted-memory-seen-in-the-isr.patch @@ -0,0 +1,108 @@ +From 9a02b84b4d736ff27edf7cfb109f744387f4871b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 9 Nov 2023 03:19:27 +0000 +Subject: i2c: designware: Fix corrupted memory seen in the ISR + +From: Jan Bottorff + +[ Upstream commit f726eaa787e9f9bc858c902d18a09af6bcbfcdaf ] + +When running on a many core ARM64 server, errors were +happening in the ISR that looked like corrupted memory. These +corruptions would fix themselves if small delays were inserted +in the ISR. Errors reported by the driver included "i2c_designware +APMC0D0F:00: i2c_dw_xfer_msg: invalid target address" and +"i2c_designware APMC0D0F:00:controller timed out" during +in-band IPMI SSIF stress tests. + +The problem was determined to be memory writes in the driver were not +becoming visible to all cores when execution rapidly shifted between +cores, like when a register write immediately triggers an ISR. +Processors with weak memory ordering, like ARM64, make no +guarantees about the order normal memory writes become globally +visible, unless barrier instructions are used to control ordering. + +To solve this, regmap accessor functions configured by this driver +were changed to use non-relaxed forms of the low-level register +access functions, which include a barrier on platforms that require +it. This assures memory writes before a controller register access are +visible to all cores. The community concluded defaulting to correct +operation outweighed defaulting to the small performance gains from +using relaxed access functions. Being a low speed device added weight to +this choice of default register access behavior. + +Signed-off-by: Jan Bottorff +Acked-by: Jarkko Nikula +Tested-by: Serge Semin +Reviewed-by: Serge Semin +Signed-off-by: Wolfram Sang +Signed-off-by: Sasha Levin +--- + drivers/i2c/busses/i2c-designware-common.c | 16 ++++++++-------- + 1 file changed, 8 insertions(+), 8 deletions(-) + +diff --git a/drivers/i2c/busses/i2c-designware-common.c b/drivers/i2c/busses/i2c-designware-common.c +index 4e752321b95e0..cb1d8d192ac0c 100644 +--- a/drivers/i2c/busses/i2c-designware-common.c ++++ b/drivers/i2c/busses/i2c-designware-common.c +@@ -63,7 +63,7 @@ static int dw_reg_read(void *context, unsigned int reg, unsigned int *val) + { + struct dw_i2c_dev *dev = context; + +- *val = readl_relaxed(dev->base + reg); ++ *val = readl(dev->base + reg); + + return 0; + } +@@ -72,7 +72,7 @@ static int dw_reg_write(void *context, unsigned int reg, unsigned int val) + { + struct dw_i2c_dev *dev = context; + +- writel_relaxed(val, dev->base + reg); ++ writel(val, dev->base + reg); + + return 0; + } +@@ -81,7 +81,7 @@ static int dw_reg_read_swab(void *context, unsigned int reg, unsigned int *val) + { + struct dw_i2c_dev *dev = context; + +- *val = swab32(readl_relaxed(dev->base + reg)); ++ *val = swab32(readl(dev->base + reg)); + + return 0; + } +@@ -90,7 +90,7 @@ static int dw_reg_write_swab(void *context, unsigned int reg, unsigned int val) + { + struct dw_i2c_dev *dev = context; + +- writel_relaxed(swab32(val), dev->base + reg); ++ writel(swab32(val), dev->base + reg); + + return 0; + } +@@ -99,8 +99,8 @@ static int dw_reg_read_word(void *context, unsigned int reg, unsigned int *val) + { + struct dw_i2c_dev *dev = context; + +- *val = readw_relaxed(dev->base + reg) | +- (readw_relaxed(dev->base + reg + 2) << 16); ++ *val = readw(dev->base + reg) | ++ (readw(dev->base + reg + 2) << 16); + + return 0; + } +@@ -109,8 +109,8 @@ static int dw_reg_write_word(void *context, unsigned int reg, unsigned int val) + { + struct dw_i2c_dev *dev = context; + +- writew_relaxed(val, dev->base + reg); +- writew_relaxed(val >> 16, dev->base + reg + 2); ++ writew(val, dev->base + reg); ++ writew(val >> 16, dev->base + reg + 2); + + return 0; + } +-- +2.42.0 + diff --git a/queue-5.15/kconfig-fix-memory-leak-from-range-properties.patch b/queue-5.15/kconfig-fix-memory-leak-from-range-properties.patch new file mode 100644 index 00000000000..001fb1a0c0b --- /dev/null +++ b/queue-5.15/kconfig-fix-memory-leak-from-range-properties.patch @@ -0,0 +1,92 @@ +From e3aa660c16d164ef0185c40ae6ffce3e939eb8ab Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 15 Nov 2023 13:16:53 +0900 +Subject: kconfig: fix memory leak from range properties + +From: Masahiro Yamada + +[ Upstream commit ae1eff0349f2e908fc083630e8441ea6dc434dc0 ] + +Currently, sym_validate_range() duplicates the range string using +xstrdup(), which is overwritten by a subsequent sym_calc_value() call. +It results in a memory leak. + +Instead, only the pointer should be copied. + +Below is a test case, with a summary from Valgrind. + +[Test Kconfig] + + config FOO + int "foo" + range 10 20 + +[Test .config] + + CONFIG_FOO=0 + +[Before] + + LEAK SUMMARY: + definitely lost: 3 bytes in 1 blocks + indirectly lost: 0 bytes in 0 blocks + possibly lost: 0 bytes in 0 blocks + still reachable: 17,465 bytes in 21 blocks + suppressed: 0 bytes in 0 blocks + +[After] + + LEAK SUMMARY: + definitely lost: 0 bytes in 0 blocks + indirectly lost: 0 bytes in 0 blocks + possibly lost: 0 bytes in 0 blocks + still reachable: 17,462 bytes in 20 blocks + suppressed: 0 bytes in 0 blocks + +Signed-off-by: Masahiro Yamada +Signed-off-by: Sasha Levin +--- + scripts/kconfig/symbol.c | 14 ++++++-------- + 1 file changed, 6 insertions(+), 8 deletions(-) + +diff --git a/scripts/kconfig/symbol.c b/scripts/kconfig/symbol.c +index 5844d636d38f4..7f8013dcef002 100644 +--- a/scripts/kconfig/symbol.c ++++ b/scripts/kconfig/symbol.c +@@ -122,9 +122,9 @@ static long long sym_get_range_val(struct symbol *sym, int base) + static void sym_validate_range(struct symbol *sym) + { + struct property *prop; ++ struct symbol *range_sym; + int base; + long long val, val2; +- char str[64]; + + switch (sym->type) { + case S_INT: +@@ -140,17 +140,15 @@ static void sym_validate_range(struct symbol *sym) + if (!prop) + return; + val = strtoll(sym->curr.val, NULL, base); +- val2 = sym_get_range_val(prop->expr->left.sym, base); ++ range_sym = prop->expr->left.sym; ++ val2 = sym_get_range_val(range_sym, base); + if (val >= val2) { +- val2 = sym_get_range_val(prop->expr->right.sym, base); ++ range_sym = prop->expr->right.sym; ++ val2 = sym_get_range_val(range_sym, base); + if (val <= val2) + return; + } +- if (sym->type == S_INT) +- sprintf(str, "%lld", val2); +- else +- sprintf(str, "0x%llx", val2); +- sym->curr.val = xstrdup(str); ++ sym->curr.val = range_sym->curr.val; + } + + static void sym_set_changed(struct symbol *sym) +-- +2.42.0 + diff --git a/queue-5.15/netfilter-ipset-fix-race-condition-between-swap-dest.patch b/queue-5.15/netfilter-ipset-fix-race-condition-between-swap-dest.patch new file mode 100644 index 00000000000..4f599277dc1 --- /dev/null +++ b/queue-5.15/netfilter-ipset-fix-race-condition-between-swap-dest.patch @@ -0,0 +1,105 @@ +From 2b6cee966dda601de6cf3f912a48cb9e40759d14 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 13 Nov 2023 21:13:23 +0100 +Subject: netfilter: ipset: fix race condition between swap/destroy and kernel + side add/del/test + +From: Jozsef Kadlecsik + +[ Upstream commit 28628fa952fefc7f2072ce6e8016968cc452b1ba ] + +Linkui Xiao reported that there's a race condition when ipset swap and destroy is +called, which can lead to crash in add/del/test element operations. Swap then +destroy are usual operations to replace a set with another one in a production +system. The issue can in some cases be reproduced with the script: + +ipset create hash_ip1 hash:net family inet hashsize 1024 maxelem 1048576 +ipset add hash_ip1 172.20.0.0/16 +ipset add hash_ip1 192.168.0.0/16 +iptables -A INPUT -m set --match-set hash_ip1 src -j ACCEPT +while [ 1 ] +do + # ... Ongoing traffic... + ipset create hash_ip2 hash:net family inet hashsize 1024 maxelem 1048576 + ipset add hash_ip2 172.20.0.0/16 + ipset swap hash_ip1 hash_ip2 + ipset destroy hash_ip2 + sleep 0.05 +done + +In the race case the possible order of the operations are + + CPU0 CPU1 + ip_set_test + ipset swap hash_ip1 hash_ip2 + ipset destroy hash_ip2 + hash_net_kadt + +Swap replaces hash_ip1 with hash_ip2 and then destroy removes hash_ip2 which +is the original hash_ip1. ip_set_test was called on hash_ip1 and because destroy +removed it, hash_net_kadt crashes. + +The fix is to force ip_set_swap() to wait for all readers to finish accessing the +old set pointers by calling synchronize_rcu(). + +The first version of the patch was written by Linkui Xiao . + +v2: synchronize_rcu() is moved into ip_set_swap() in order not to burden + ip_set_destroy() unnecessarily when all sets are destroyed. +v3: Florian Westphal pointed out that all netfilter hooks run with rcu_read_lock() held + and em_ipset.c wraps the entire ip_set_test() in rcu read lock/unlock pair. + So there's no need to extend the rcu read locked area in ipset itself. + +Closes: https://lore.kernel.org/all/69e7963b-e7f8-3ad0-210-7b86eebf7f78@netfilter.org/ +Reported by: Linkui Xiao +Signed-off-by: Jozsef Kadlecsik +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + net/netfilter/ipset/ip_set_core.c | 14 +++++++------- + 1 file changed, 7 insertions(+), 7 deletions(-) + +diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c +index 33869db42bb6b..978014928d07a 100644 +--- a/net/netfilter/ipset/ip_set_core.c ++++ b/net/netfilter/ipset/ip_set_core.c +@@ -61,6 +61,8 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET); + ip_set_dereference((inst)->ip_set_list)[id] + #define ip_set_ref_netlink(inst,id) \ + rcu_dereference_raw((inst)->ip_set_list)[id] ++#define ip_set_dereference_nfnl(p) \ ++ rcu_dereference_check(p, lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET)) + + /* The set types are implemented in modules and registered set types + * can be found in ip_set_type_list. Adding/deleting types is +@@ -708,15 +710,10 @@ __ip_set_put_netlink(struct ip_set *set) + static struct ip_set * + ip_set_rcu_get(struct net *net, ip_set_id_t index) + { +- struct ip_set *set; + struct ip_set_net *inst = ip_set_pernet(net); + +- rcu_read_lock(); +- /* ip_set_list itself needs to be protected */ +- set = rcu_dereference(inst->ip_set_list)[index]; +- rcu_read_unlock(); +- +- return set; ++ /* ip_set_list and the set pointer need to be protected */ ++ return ip_set_dereference_nfnl(inst->ip_set_list)[index]; + } + + static inline void +@@ -1399,6 +1396,9 @@ static int ip_set_swap(struct sk_buff *skb, const struct nfnl_info *info, + ip_set(inst, to_id) = from; + write_unlock_bh(&ip_set_ref_lock); + ++ /* Make sure all readers of the old set pointers are completed. */ ++ synchronize_rcu(); ++ + return 0; + } + +-- +2.42.0 + diff --git a/queue-5.15/series b/queue-5.15/series new file mode 100644 index 00000000000..334e39ec076 --- /dev/null +++ b/queue-5.15/series @@ -0,0 +1,8 @@ +vdpa-mlx5-preserve-cvq-vringh-index.patch +hrtimers-push-pending-hrtimers-away-from-outgoing-cp.patch +i2c-designware-fix-corrupted-memory-seen-in-the-isr.patch +netfilter-ipset-fix-race-condition-between-swap-dest.patch +tg3-move-the-rt-x_dropped-counters-to-tg3_napi.patch +tg3-increment-tx_dropped-in-tg3_tso_bug.patch +kconfig-fix-memory-leak-from-range-properties.patch +drm-amdgpu-correct-chunk_ptr-to-a-pointer-to-chunk.patch diff --git a/queue-5.15/tg3-increment-tx_dropped-in-tg3_tso_bug.patch b/queue-5.15/tg3-increment-tx_dropped-in-tg3_tso_bug.patch new file mode 100644 index 00000000000..6b0a6dcae9f --- /dev/null +++ b/queue-5.15/tg3-increment-tx_dropped-in-tg3_tso_bug.patch @@ -0,0 +1,41 @@ +From 1ef141cc14b757fe99f49407747b9f01c47f58cf Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 13 Nov 2023 10:23:50 -0800 +Subject: tg3: Increment tx_dropped in tg3_tso_bug() + +From: Alex Pakhunov + +[ Upstream commit 17dd5efe5f36a96bd78012594fabe21efb01186b ] + +tg3_tso_bug() drops a packet if it cannot be segmented for any reason. +The number of discarded frames should be incremented accordingly. + +Signed-off-by: Alex Pakhunov +Signed-off-by: Vincent Wong +Reviewed-by: Pavan Chebbi +Link: https://lore.kernel.org/r/20231113182350.37472-2-alexey.pakhunov@spacex.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/broadcom/tg3.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c +index 946b4decac0ce..fc487a6f050a2 100644 +--- a/drivers/net/ethernet/broadcom/tg3.c ++++ b/drivers/net/ethernet/broadcom/tg3.c +@@ -7880,8 +7880,10 @@ static int tg3_tso_bug(struct tg3 *tp, struct tg3_napi *tnapi, + + segs = skb_gso_segment(skb, tp->dev->features & + ~(NETIF_F_TSO | NETIF_F_TSO6)); +- if (IS_ERR(segs) || !segs) ++ if (IS_ERR(segs) || !segs) { ++ tnapi->tx_dropped++; + goto tg3_tso_bug_end; ++ } + + skb_list_walk_safe(segs, seg, next) { + skb_mark_not_on_list(seg); +-- +2.42.0 + diff --git a/queue-5.15/tg3-move-the-rt-x_dropped-counters-to-tg3_napi.patch b/queue-5.15/tg3-move-the-rt-x_dropped-counters-to-tg3_napi.patch new file mode 100644 index 00000000000..00660a0658f --- /dev/null +++ b/queue-5.15/tg3-move-the-rt-x_dropped-counters-to-tg3_napi.patch @@ -0,0 +1,139 @@ +From be54d3eb60e5e844692bc2869608e70b00909957 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 13 Nov 2023 10:23:49 -0800 +Subject: tg3: Move the [rt]x_dropped counters to tg3_napi + +From: Alex Pakhunov + +[ Upstream commit 907d1bdb8b2cc0357d03a1c34d2a08d9943760b1 ] + +This change moves [rt]x_dropped counters to tg3_napi so that they can be +updated by a single writer, race-free. + +Signed-off-by: Alex Pakhunov +Signed-off-by: Vincent Wong +Reviewed-by: Michael Chan +Link: https://lore.kernel.org/r/20231113182350.37472-1-alexey.pakhunov@spacex.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/broadcom/tg3.c | 38 +++++++++++++++++++++++++---- + drivers/net/ethernet/broadcom/tg3.h | 4 +-- + 2 files changed, 35 insertions(+), 7 deletions(-) + +diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c +index 2c41852a082bb..946b4decac0ce 100644 +--- a/drivers/net/ethernet/broadcom/tg3.c ++++ b/drivers/net/ethernet/broadcom/tg3.c +@@ -6854,7 +6854,7 @@ static int tg3_rx(struct tg3_napi *tnapi, int budget) + desc_idx, *post_ptr); + drop_it_no_recycle: + /* Other statistics kept track of by card. */ +- tp->rx_dropped++; ++ tnapi->rx_dropped++; + goto next_pkt; + } + +@@ -8152,7 +8152,7 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) + drop: + dev_kfree_skb_any(skb); + drop_nofree: +- tp->tx_dropped++; ++ tnapi->tx_dropped++; + return NETDEV_TX_OK; + } + +@@ -9331,7 +9331,7 @@ static void __tg3_set_rx_mode(struct net_device *); + /* tp->lock is held. */ + static int tg3_halt(struct tg3 *tp, int kind, bool silent) + { +- int err; ++ int err, i; + + tg3_stop_fw(tp); + +@@ -9352,6 +9352,13 @@ static int tg3_halt(struct tg3 *tp, int kind, bool silent) + + /* And make sure the next sample is new data */ + memset(tp->hw_stats, 0, sizeof(struct tg3_hw_stats)); ++ ++ for (i = 0; i < TG3_IRQ_MAX_VECS; ++i) { ++ struct tg3_napi *tnapi = &tp->napi[i]; ++ ++ tnapi->rx_dropped = 0; ++ tnapi->tx_dropped = 0; ++ } + } + + return err; +@@ -11906,6 +11913,9 @@ static void tg3_get_nstats(struct tg3 *tp, struct rtnl_link_stats64 *stats) + { + struct rtnl_link_stats64 *old_stats = &tp->net_stats_prev; + struct tg3_hw_stats *hw_stats = tp->hw_stats; ++ unsigned long rx_dropped; ++ unsigned long tx_dropped; ++ int i; + + stats->rx_packets = old_stats->rx_packets + + get_stat64(&hw_stats->rx_ucast_packets) + +@@ -11952,8 +11962,26 @@ static void tg3_get_nstats(struct tg3 *tp, struct rtnl_link_stats64 *stats) + stats->rx_missed_errors = old_stats->rx_missed_errors + + get_stat64(&hw_stats->rx_discards); + +- stats->rx_dropped = tp->rx_dropped; +- stats->tx_dropped = tp->tx_dropped; ++ /* Aggregate per-queue counters. The per-queue counters are updated ++ * by a single writer, race-free. The result computed by this loop ++ * might not be 100% accurate (counters can be updated in the middle of ++ * the loop) but the next tg3_get_nstats() will recompute the current ++ * value so it is acceptable. ++ * ++ * Note that these counters wrap around at 4G on 32bit machines. ++ */ ++ rx_dropped = (unsigned long)(old_stats->rx_dropped); ++ tx_dropped = (unsigned long)(old_stats->tx_dropped); ++ ++ for (i = 0; i < tp->irq_cnt; i++) { ++ struct tg3_napi *tnapi = &tp->napi[i]; ++ ++ rx_dropped += tnapi->rx_dropped; ++ tx_dropped += tnapi->tx_dropped; ++ } ++ ++ stats->rx_dropped = rx_dropped; ++ stats->tx_dropped = tx_dropped; + } + + static int tg3_get_regs_len(struct net_device *dev) +diff --git a/drivers/net/ethernet/broadcom/tg3.h b/drivers/net/ethernet/broadcom/tg3.h +index 1000c894064f0..8d753f8c5b065 100644 +--- a/drivers/net/ethernet/broadcom/tg3.h ++++ b/drivers/net/ethernet/broadcom/tg3.h +@@ -3018,6 +3018,7 @@ struct tg3_napi { + u16 *rx_rcb_prod_idx; + struct tg3_rx_prodring_set prodring; + struct tg3_rx_buffer_desc *rx_rcb; ++ unsigned long rx_dropped; + + u32 tx_prod ____cacheline_aligned; + u32 tx_cons; +@@ -3026,6 +3027,7 @@ struct tg3_napi { + u32 prodmbox; + struct tg3_tx_buffer_desc *tx_ring; + struct tg3_tx_ring_info *tx_buffers; ++ unsigned long tx_dropped; + + dma_addr_t status_mapping; + dma_addr_t rx_rcb_mapping; +@@ -3219,8 +3221,6 @@ struct tg3 { + + + /* begin "everything else" cacheline(s) section */ +- unsigned long rx_dropped; +- unsigned long tx_dropped; + struct rtnl_link_stats64 net_stats_prev; + struct tg3_ethtool_stats estats_prev; + +-- +2.42.0 + diff --git a/queue-5.15/vdpa-mlx5-preserve-cvq-vringh-index.patch b/queue-5.15/vdpa-mlx5-preserve-cvq-vringh-index.patch new file mode 100644 index 00000000000..7b66b1c5e30 --- /dev/null +++ b/queue-5.15/vdpa-mlx5-preserve-cvq-vringh-index.patch @@ -0,0 +1,66 @@ +From cca1eef99ce3ffa38f7c2c5281d5d2657ff55619 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 3 Nov 2023 05:26:27 -0700 +Subject: vdpa/mlx5: preserve CVQ vringh index +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Steve Sistare + +[ Upstream commit 480b3e73720f6b5d76bef2387b1f9d19ed67573b ] + +mlx5_vdpa does not preserve userland's view of vring base for the control +queue in the following sequence: + +ioctl VHOST_SET_VRING_BASE +ioctl VHOST_VDPA_SET_STATUS VIRTIO_CONFIG_S_DRIVER_OK + mlx5_vdpa_set_status() + setup_cvq_vring() + vringh_init_iotlb() + vringh_init_kern() + vrh->last_avail_idx = 0; +ioctl VHOST_GET_VRING_BASE + +To fix, restore the value of cvq->vring.last_avail_idx after calling +vringh_init_iotlb. + +Fixes: 5262912ef3cf ("vdpa/mlx5: Add support for control VQ and MAC setting") + +Signed-off-by: Steve Sistare +Acked-by: Eugenio Pérez +Acked-by: Jason Wang +Message-Id: <1699014387-194368-1-git-send-email-steven.sistare@oracle.com> +Signed-off-by: Michael S. Tsirkin +Signed-off-by: Sasha Levin +--- + drivers/vdpa/mlx5/net/mlx5_vnet.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c +index e748c00789f04..46c72e6d3a29b 100644 +--- a/drivers/vdpa/mlx5/net/mlx5_vnet.c ++++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c +@@ -2178,13 +2178,18 @@ static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev) + struct mlx5_control_vq *cvq = &mvdev->cvq; + int err = 0; + +- if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) ++ if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) { ++ u16 idx = cvq->vring.last_avail_idx; ++ + err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features, + MLX5_CVQ_MAX_ENT, false, + (struct vring_desc *)(uintptr_t)cvq->desc_addr, + (struct vring_avail *)(uintptr_t)cvq->driver_addr, + (struct vring_used *)(uintptr_t)cvq->device_addr); + ++ if (!err) ++ cvq->vring.last_avail_idx = cvq->vring.last_used_idx = idx; ++ } + return err; + } + +-- +2.42.0 +