Fixes for 5.15

author Sasha Levin <sashal@kernel.org>

Fri, 8 Dec 2023 10:04:27 +0000 (05:04 -0500)

committer Sasha Levin <sashal@kernel.org>

Fri, 8 Dec 2023 10:04:27 +0000 (05:04 -0500)
author Sasha Levin <sashal@kernel.org>
Fri, 8 Dec 2023 10:04:27 +0000 (05:04 -0500)
committer Sasha Levin <sashal@kernel.org>
Fri, 8 Dec 2023 10:04:27 +0000 (05:04 -0500)
diff --git a/queue-5.15/drm-amdgpu-correct-chunk_ptr-to-a-pointer-to-chunk.patch b/queue-5.15/drm-amdgpu-correct-chunk_ptr-to-a-pointer-to-chunk.patch

new file mode 100644 (file)

index 0000000..9105248
--- /dev/null
+++ b/queue-5.15/drm-amdgpu-correct-chunk_ptr-to-a-pointer-to-chunk.patch
@@ -0,0 +1,40 @@
+From 99972e3ccfcbc1d4510d399a313ad1ce9167ac94 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 31 Oct 2023 10:32:37 +0800
+Subject: drm/amdgpu: correct chunk_ptr to a pointer to chunk.
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: YuanShang <YuanShang.Mao@amd.com>
+
+[ Upstream commit 50d51374b498457c4dea26779d32ccfed12ddaff ]
+
+The variable "chunk_ptr" should be a pointer pointing
+to a struct drm_amdgpu_cs_chunk instead of to a pointer
+of that.
+
+Signed-off-by: YuanShang <YuanShang.Mao@amd.com>
+Reviewed-by: Christian König <christian.koenig@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+index 2d8f71dde9803..f293d0dfec613 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+@@ -142,7 +142,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs
+       }
+ 
+       for (i = 0; i < p->nchunks; i++) {
+-              struct drm_amdgpu_cs_chunk __user **chunk_ptr = NULL;
++              struct drm_amdgpu_cs_chunk __user *chunk_ptr = NULL;
+               struct drm_amdgpu_cs_chunk user_chunk;
+               uint32_t __user *cdata;
+ 
+-- 
+2.42.0
+
diff --git a/queue-5.15/hrtimers-push-pending-hrtimers-away-from-outgoing-cp.patch b/queue-5.15/hrtimers-push-pending-hrtimers-away-from-outgoing-cp.patch

new file mode 100644 (file)

index 0000000..1617fa2
--- /dev/null
+++ b/queue-5.15/hrtimers-push-pending-hrtimers-away-from-outgoing-cp.patch
@@ -0,0 +1,155 @@
+From 811f4b10e55aacab186235156abf87a9a7397c5f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 7 Nov 2023 15:57:13 +0100
+Subject: hrtimers: Push pending hrtimers away from outgoing CPU earlier
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+[ Upstream commit 5c0930ccaad5a74d74e8b18b648c5eb21ed2fe94 ]
+
+2b8272ff4a70 ("cpu/hotplug: Prevent self deadlock on CPU hot-unplug")
+solved the straight forward CPU hotplug deadlock vs. the scheduler
+bandwidth timer. Yu discovered a more involved variant where a task which
+has a bandwidth timer started on the outgoing CPU holds a lock and then
+gets throttled. If the lock required by one of the CPU hotplug callbacks
+the hotplug operation deadlocks because the unthrottling timer event is not
+handled on the dying CPU and can only be recovered once the control CPU
+reaches the hotplug state which pulls the pending hrtimers from the dead
+CPU.
+
+Solve this by pushing the hrtimers away from the dying CPU in the dying
+callbacks. Nothing can queue a hrtimer on the dying CPU at that point because
+all other CPUs spin in stop_machine() with interrupts disabled and once the
+operation is finished the CPU is marked offline.
+
+Reported-by: Yu Liao <liaoyu15@huawei.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Liu Tie <liutie4@huawei.com>
+Link: https://lore.kernel.org/r/87a5rphara.ffs@tglx
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/cpuhotplug.h |  1 +
+ include/linux/hrtimer.h    |  4 ++--
+ kernel/cpu.c               |  8 +++++++-
+ kernel/time/hrtimer.c      | 33 ++++++++++++---------------------
+ 4 files changed, 22 insertions(+), 24 deletions(-)
+
+diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
+index c7156bb56e831..c1ecc843b97d2 100644
+--- a/include/linux/cpuhotplug.h
++++ b/include/linux/cpuhotplug.h
+@@ -193,6 +193,7 @@ enum cpuhp_state {
+       CPUHP_AP_ARM_CORESIGHT_CTI_STARTING,
+       CPUHP_AP_ARM64_ISNDEP_STARTING,
+       CPUHP_AP_SMPCFD_DYING,
++      CPUHP_AP_HRTIMERS_DYING,
+       CPUHP_AP_X86_TBOOT_DYING,
+       CPUHP_AP_ARM_CACHE_B15_RAC_DYING,
+       CPUHP_AP_ONLINE,
+diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
+index 0ee140176f102..f2044d5a652b5 100644
+--- a/include/linux/hrtimer.h
++++ b/include/linux/hrtimer.h
+@@ -531,9 +531,9 @@ extern void sysrq_timer_list_show(void);
+ 
+ int hrtimers_prepare_cpu(unsigned int cpu);
+ #ifdef CONFIG_HOTPLUG_CPU
+-int hrtimers_dead_cpu(unsigned int cpu);
++int hrtimers_cpu_dying(unsigned int cpu);
+ #else
+-#define hrtimers_dead_cpu     NULL
++#define hrtimers_cpu_dying    NULL
+ #endif
+ 
+ #endif
+diff --git a/kernel/cpu.c b/kernel/cpu.c
+index 393114c10c285..0e786de993e01 100644
+--- a/kernel/cpu.c
++++ b/kernel/cpu.c
+@@ -1697,7 +1697,7 @@ static struct cpuhp_step cpuhp_hp_states[] = {
+       [CPUHP_HRTIMERS_PREPARE] = {
+               .name                   = "hrtimers:prepare",
+               .startup.single         = hrtimers_prepare_cpu,
+-              .teardown.single        = hrtimers_dead_cpu,
++              .teardown.single        = NULL,
+       },
+       [CPUHP_SMPCFD_PREPARE] = {
+               .name                   = "smpcfd:prepare",
+@@ -1764,6 +1764,12 @@ static struct cpuhp_step cpuhp_hp_states[] = {
+               .startup.single         = NULL,
+               .teardown.single        = smpcfd_dying_cpu,
+       },
++      [CPUHP_AP_HRTIMERS_DYING] = {
++              .name                   = "hrtimers:dying",
++              .startup.single         = NULL,
++              .teardown.single        = hrtimers_cpu_dying,
++      },
++
+       /* Entry state on starting. Interrupts enabled from here on. Transient
+        * state for synchronsization */
+       [CPUHP_AP_ONLINE] = {
+diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
+index 97409581e9dac..eebd0f31daa8b 100644
+--- a/kernel/time/hrtimer.c
++++ b/kernel/time/hrtimer.c
+@@ -2216,29 +2216,22 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
+       }
+ }
+ 
+-int hrtimers_dead_cpu(unsigned int scpu)
++int hrtimers_cpu_dying(unsigned int dying_cpu)
+ {
+       struct hrtimer_cpu_base *old_base, *new_base;
+-      int i;
++      int i, ncpu = cpumask_first(cpu_active_mask);
+ 
+-      BUG_ON(cpu_online(scpu));
+-      tick_cancel_sched_timer(scpu);
++      tick_cancel_sched_timer(dying_cpu);
++
++      old_base = this_cpu_ptr(&hrtimer_bases);
++      new_base = &per_cpu(hrtimer_bases, ncpu);
+ 
+-      /*
+-       * this BH disable ensures that raise_softirq_irqoff() does
+-       * not wakeup ksoftirqd (and acquire the pi-lock) while
+-       * holding the cpu_base lock
+-       */
+-      local_bh_disable();
+-      local_irq_disable();
+-      old_base = &per_cpu(hrtimer_bases, scpu);
+-      new_base = this_cpu_ptr(&hrtimer_bases);
+       /*
+        * The caller is globally serialized and nobody else
+        * takes two locks at once, deadlock is not possible.
+        */
+-      raw_spin_lock(&new_base->lock);
+-      raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
++      raw_spin_lock(&old_base->lock);
++      raw_spin_lock_nested(&new_base->lock, SINGLE_DEPTH_NESTING);
+ 
+       for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
+               migrate_hrtimer_list(&old_base->clock_base[i],
+@@ -2249,15 +2242,13 @@ int hrtimers_dead_cpu(unsigned int scpu)
+        * The migration might have changed the first expiring softirq
+        * timer on this CPU. Update it.
+        */
+-      hrtimer_update_softirq_timer(new_base, false);
++      __hrtimer_get_next_event(new_base, HRTIMER_ACTIVE_SOFT);
++      /* Tell the other CPU to retrigger the next event */
++      smp_call_function_single(ncpu, retrigger_next_event, NULL, 0);
+ 
+-      raw_spin_unlock(&old_base->lock);
+       raw_spin_unlock(&new_base->lock);
++      raw_spin_unlock(&old_base->lock);
+ 
+-      /* Check, if we got expired work to do */
+-      __hrtimer_peek_ahead_timers();
+-      local_irq_enable();
+-      local_bh_enable();
+       return 0;
+ }
+ 
+-- 
+2.42.0
+
diff --git a/queue-5.15/i2c-designware-fix-corrupted-memory-seen-in-the-isr.patch b/queue-5.15/i2c-designware-fix-corrupted-memory-seen-in-the-isr.patch

new file mode 100644 (file)

index 0000000..fc35bb8
--- /dev/null
+++ b/queue-5.15/i2c-designware-fix-corrupted-memory-seen-in-the-isr.patch
@@ -0,0 +1,108 @@
+From 9a02b84b4d736ff27edf7cfb109f744387f4871b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 9 Nov 2023 03:19:27 +0000
+Subject: i2c: designware: Fix corrupted memory seen in the ISR
+
+From: Jan Bottorff <janb@os.amperecomputing.com>
+
+[ Upstream commit f726eaa787e9f9bc858c902d18a09af6bcbfcdaf ]
+
+When running on a many core ARM64 server, errors were
+happening in the ISR that looked like corrupted memory. These
+corruptions would fix themselves if small delays were inserted
+in the ISR. Errors reported by the driver included "i2c_designware
+APMC0D0F:00: i2c_dw_xfer_msg: invalid target address" and
+"i2c_designware APMC0D0F:00:controller timed out" during
+in-band IPMI SSIF stress tests.
+
+The problem was determined to be memory writes in the driver were not
+becoming visible to all cores when execution rapidly shifted between
+cores, like when a register write immediately triggers an ISR.
+Processors with weak memory ordering, like ARM64, make no
+guarantees about the order normal memory writes become globally
+visible, unless barrier instructions are used to control ordering.
+
+To solve this, regmap accessor functions configured by this driver
+were changed to use non-relaxed forms of the low-level register
+access functions, which include a barrier on platforms that require
+it. This assures memory writes before a controller register access are
+visible to all cores. The community concluded defaulting to correct
+operation outweighed defaulting to the small performance gains from
+using relaxed access functions. Being a low speed device added weight to
+this choice of default register access behavior.
+
+Signed-off-by: Jan Bottorff <janb@os.amperecomputing.com>
+Acked-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
+Tested-by: Serge Semin <fancer.lancer@gmail.com>
+Reviewed-by: Serge Semin <fancer.lancer@gmail.com>
+Signed-off-by: Wolfram Sang <wsa@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/i2c/busses/i2c-designware-common.c | 16 ++++++++--------
+ 1 file changed, 8 insertions(+), 8 deletions(-)
+
+diff --git a/drivers/i2c/busses/i2c-designware-common.c b/drivers/i2c/busses/i2c-designware-common.c
+index 4e752321b95e0..cb1d8d192ac0c 100644
+--- a/drivers/i2c/busses/i2c-designware-common.c
++++ b/drivers/i2c/busses/i2c-designware-common.c
+@@ -63,7 +63,7 @@ static int dw_reg_read(void *context, unsigned int reg, unsigned int *val)
+ {
+       struct dw_i2c_dev *dev = context;
+ 
+-      *val = readl_relaxed(dev->base + reg);
++      *val = readl(dev->base + reg);
+ 
+       return 0;
+ }
+@@ -72,7 +72,7 @@ static int dw_reg_write(void *context, unsigned int reg, unsigned int val)
+ {
+       struct dw_i2c_dev *dev = context;
+ 
+-      writel_relaxed(val, dev->base + reg);
++      writel(val, dev->base + reg);
+ 
+       return 0;
+ }
+@@ -81,7 +81,7 @@ static int dw_reg_read_swab(void *context, unsigned int reg, unsigned int *val)
+ {
+       struct dw_i2c_dev *dev = context;
+ 
+-      *val = swab32(readl_relaxed(dev->base + reg));
++      *val = swab32(readl(dev->base + reg));
+ 
+       return 0;
+ }
+@@ -90,7 +90,7 @@ static int dw_reg_write_swab(void *context, unsigned int reg, unsigned int val)
+ {
+       struct dw_i2c_dev *dev = context;
+ 
+-      writel_relaxed(swab32(val), dev->base + reg);
++      writel(swab32(val), dev->base + reg);
+ 
+       return 0;
+ }
+@@ -99,8 +99,8 @@ static int dw_reg_read_word(void *context, unsigned int reg, unsigned int *val)
+ {
+       struct dw_i2c_dev *dev = context;
+ 
+-      *val = readw_relaxed(dev->base + reg) |
+-              (readw_relaxed(dev->base + reg + 2) << 16);
++      *val = readw(dev->base + reg) |
++              (readw(dev->base + reg + 2) << 16);
+ 
+       return 0;
+ }
+@@ -109,8 +109,8 @@ static int dw_reg_write_word(void *context, unsigned int reg, unsigned int val)
+ {
+       struct dw_i2c_dev *dev = context;
+ 
+-      writew_relaxed(val, dev->base + reg);
+-      writew_relaxed(val >> 16, dev->base + reg + 2);
++      writew(val, dev->base + reg);
++      writew(val >> 16, dev->base + reg + 2);
+ 
+       return 0;
+ }
+-- 
+2.42.0
+
diff --git a/queue-5.15/kconfig-fix-memory-leak-from-range-properties.patch b/queue-5.15/kconfig-fix-memory-leak-from-range-properties.patch

new file mode 100644 (file)

index 0000000..001fb1a
--- /dev/null
+++ b/queue-5.15/kconfig-fix-memory-leak-from-range-properties.patch
@@ -0,0 +1,92 @@
+From e3aa660c16d164ef0185c40ae6ffce3e939eb8ab Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 15 Nov 2023 13:16:53 +0900
+Subject: kconfig: fix memory leak from range properties
+
+From: Masahiro Yamada <masahiroy@kernel.org>
+
+[ Upstream commit ae1eff0349f2e908fc083630e8441ea6dc434dc0 ]
+
+Currently, sym_validate_range() duplicates the range string using
+xstrdup(), which is overwritten by a subsequent sym_calc_value() call.
+It results in a memory leak.
+
+Instead, only the pointer should be copied.
+
+Below is a test case, with a summary from Valgrind.
+
+[Test Kconfig]
+
+  config FOO
+          int "foo"
+          range 10 20
+
+[Test .config]
+
+  CONFIG_FOO=0
+
+[Before]
+
+  LEAK SUMMARY:
+     definitely lost: 3 bytes in 1 blocks
+     indirectly lost: 0 bytes in 0 blocks
+       possibly lost: 0 bytes in 0 blocks
+     still reachable: 17,465 bytes in 21 blocks
+          suppressed: 0 bytes in 0 blocks
+
+[After]
+
+  LEAK SUMMARY:
+     definitely lost: 0 bytes in 0 blocks
+     indirectly lost: 0 bytes in 0 blocks
+       possibly lost: 0 bytes in 0 blocks
+     still reachable: 17,462 bytes in 20 blocks
+          suppressed: 0 bytes in 0 blocks
+
+Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ scripts/kconfig/symbol.c | 14 ++++++--------
+ 1 file changed, 6 insertions(+), 8 deletions(-)
+
+diff --git a/scripts/kconfig/symbol.c b/scripts/kconfig/symbol.c
+index 5844d636d38f4..7f8013dcef002 100644
+--- a/scripts/kconfig/symbol.c
++++ b/scripts/kconfig/symbol.c
+@@ -122,9 +122,9 @@ static long long sym_get_range_val(struct symbol *sym, int base)
+ static void sym_validate_range(struct symbol *sym)
+ {
+       struct property *prop;
++      struct symbol *range_sym;
+       int base;
+       long long val, val2;
+-      char str[64];
+ 
+       switch (sym->type) {
+       case S_INT:
+@@ -140,17 +140,15 @@ static void sym_validate_range(struct symbol *sym)
+       if (!prop)
+               return;
+       val = strtoll(sym->curr.val, NULL, base);
+-      val2 = sym_get_range_val(prop->expr->left.sym, base);
++      range_sym = prop->expr->left.sym;
++      val2 = sym_get_range_val(range_sym, base);
+       if (val >= val2) {
+-              val2 = sym_get_range_val(prop->expr->right.sym, base);
++              range_sym = prop->expr->right.sym;
++              val2 = sym_get_range_val(range_sym, base);
+               if (val <= val2)
+                       return;
+       }
+-      if (sym->type == S_INT)
+-              sprintf(str, "%lld", val2);
+-      else
+-              sprintf(str, "0x%llx", val2);
+-      sym->curr.val = xstrdup(str);
++      sym->curr.val = range_sym->curr.val;
+ }
+ 
+ static void sym_set_changed(struct symbol *sym)
+-- 
+2.42.0
+
diff --git a/queue-5.15/netfilter-ipset-fix-race-condition-between-swap-dest.patch b/queue-5.15/netfilter-ipset-fix-race-condition-between-swap-dest.patch

new file mode 100644 (file)

index 0000000..4f59927
--- /dev/null
+++ b/queue-5.15/netfilter-ipset-fix-race-condition-between-swap-dest.patch
@@ -0,0 +1,105 @@
+From 2b6cee966dda601de6cf3f912a48cb9e40759d14 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 13 Nov 2023 21:13:23 +0100
+Subject: netfilter: ipset: fix race condition between swap/destroy and kernel
+ side add/del/test
+
+From: Jozsef Kadlecsik <kadlec@netfilter.org>
+
+[ Upstream commit 28628fa952fefc7f2072ce6e8016968cc452b1ba ]
+
+Linkui Xiao reported that there's a race condition when ipset swap and destroy is
+called, which can lead to crash in add/del/test element operations. Swap then
+destroy are usual operations to replace a set with another one in a production
+system. The issue can in some cases be reproduced with the script:
+
+ipset create hash_ip1 hash:net family inet hashsize 1024 maxelem 1048576
+ipset add hash_ip1 172.20.0.0/16
+ipset add hash_ip1 192.168.0.0/16
+iptables -A INPUT -m set --match-set hash_ip1 src -j ACCEPT
+while [ 1 ]
+do
+       # ... Ongoing traffic...
+        ipset create hash_ip2 hash:net family inet hashsize 1024 maxelem 1048576
+        ipset add hash_ip2 172.20.0.0/16
+        ipset swap hash_ip1 hash_ip2
+        ipset destroy hash_ip2
+        sleep 0.05
+done
+
+In the race case the possible order of the operations are
+
+       CPU0                    CPU1
+       ip_set_test
+                               ipset swap hash_ip1 hash_ip2
+                               ipset destroy hash_ip2
+       hash_net_kadt
+
+Swap replaces hash_ip1 with hash_ip2 and then destroy removes hash_ip2 which
+is the original hash_ip1. ip_set_test was called on hash_ip1 and because destroy
+removed it, hash_net_kadt crashes.
+
+The fix is to force ip_set_swap() to wait for all readers to finish accessing the
+old set pointers by calling synchronize_rcu().
+
+The first version of the patch was written by Linkui Xiao <xiaolinkui@kylinos.cn>.
+
+v2: synchronize_rcu() is moved into ip_set_swap() in order not to burden
+    ip_set_destroy() unnecessarily when all sets are destroyed.
+v3: Florian Westphal pointed out that all netfilter hooks run with rcu_read_lock() held
+    and em_ipset.c wraps the entire ip_set_test() in rcu read lock/unlock pair.
+    So there's no need to extend the rcu read locked area in ipset itself.
+
+Closes: https://lore.kernel.org/all/69e7963b-e7f8-3ad0-210-7b86eebf7f78@netfilter.org/
+Reported by: Linkui Xiao <xiaolinkui@kylinos.cn>
+Signed-off-by: Jozsef Kadlecsik <kadlec@netfilter.org>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/ipset/ip_set_core.c | 14 +++++++-------
+ 1 file changed, 7 insertions(+), 7 deletions(-)
+
+diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
+index 33869db42bb6b..978014928d07a 100644
+--- a/net/netfilter/ipset/ip_set_core.c
++++ b/net/netfilter/ipset/ip_set_core.c
+@@ -61,6 +61,8 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET);
+       ip_set_dereference((inst)->ip_set_list)[id]
+ #define ip_set_ref_netlink(inst,id)   \
+       rcu_dereference_raw((inst)->ip_set_list)[id]
++#define ip_set_dereference_nfnl(p)    \
++      rcu_dereference_check(p, lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET))
+ 
+ /* The set types are implemented in modules and registered set types
+  * can be found in ip_set_type_list. Adding/deleting types is
+@@ -708,15 +710,10 @@ __ip_set_put_netlink(struct ip_set *set)
+ static struct ip_set *
+ ip_set_rcu_get(struct net *net, ip_set_id_t index)
+ {
+-      struct ip_set *set;
+       struct ip_set_net *inst = ip_set_pernet(net);
+ 
+-      rcu_read_lock();
+-      /* ip_set_list itself needs to be protected */
+-      set = rcu_dereference(inst->ip_set_list)[index];
+-      rcu_read_unlock();
+-
+-      return set;
++      /* ip_set_list and the set pointer need to be protected */
++      return ip_set_dereference_nfnl(inst->ip_set_list)[index];
+ }
+ 
+ static inline void
+@@ -1399,6 +1396,9 @@ static int ip_set_swap(struct sk_buff *skb, const struct nfnl_info *info,
+       ip_set(inst, to_id) = from;
+       write_unlock_bh(&ip_set_ref_lock);
+ 
++      /* Make sure all readers of the old set pointers are completed. */
++      synchronize_rcu();
++
+       return 0;
+ }
+ 
+-- 
+2.42.0
+
diff --git a/queue-5.15/series b/queue-5.15/series

new file mode 100644 (file)

index 0000000..334e39e
--- /dev/null
+++ b/queue-5.15/series
@@ -0,0 +1,8 @@
+vdpa-mlx5-preserve-cvq-vringh-index.patch
+hrtimers-push-pending-hrtimers-away-from-outgoing-cp.patch
+i2c-designware-fix-corrupted-memory-seen-in-the-isr.patch
+netfilter-ipset-fix-race-condition-between-swap-dest.patch
+tg3-move-the-rt-x_dropped-counters-to-tg3_napi.patch
+tg3-increment-tx_dropped-in-tg3_tso_bug.patch
+kconfig-fix-memory-leak-from-range-properties.patch
+drm-amdgpu-correct-chunk_ptr-to-a-pointer-to-chunk.patch
diff --git a/queue-5.15/tg3-increment-tx_dropped-in-tg3_tso_bug.patch b/queue-5.15/tg3-increment-tx_dropped-in-tg3_tso_bug.patch

new file mode 100644 (file)

index 0000000..6b0a6dc
--- /dev/null
+++ b/queue-5.15/tg3-increment-tx_dropped-in-tg3_tso_bug.patch
@@ -0,0 +1,41 @@
+From 1ef141cc14b757fe99f49407747b9f01c47f58cf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 13 Nov 2023 10:23:50 -0800
+Subject: tg3: Increment tx_dropped in tg3_tso_bug()
+
+From: Alex Pakhunov <alexey.pakhunov@spacex.com>
+
+[ Upstream commit 17dd5efe5f36a96bd78012594fabe21efb01186b ]
+
+tg3_tso_bug() drops a packet if it cannot be segmented for any reason.
+The number of discarded frames should be incremented accordingly.
+
+Signed-off-by: Alex Pakhunov <alexey.pakhunov@spacex.com>
+Signed-off-by: Vincent Wong <vincent.wong2@spacex.com>
+Reviewed-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
+Link: https://lore.kernel.org/r/20231113182350.37472-2-alexey.pakhunov@spacex.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/broadcom/tg3.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
+index 946b4decac0ce..fc487a6f050a2 100644
+--- a/drivers/net/ethernet/broadcom/tg3.c
++++ b/drivers/net/ethernet/broadcom/tg3.c
+@@ -7880,8 +7880,10 @@ static int tg3_tso_bug(struct tg3 *tp, struct tg3_napi *tnapi,
+ 
+       segs = skb_gso_segment(skb, tp->dev->features &
+                                   ~(NETIF_F_TSO | NETIF_F_TSO6));
+-      if (IS_ERR(segs) || !segs)
++      if (IS_ERR(segs) || !segs) {
++              tnapi->tx_dropped++;
+               goto tg3_tso_bug_end;
++      }
+ 
+       skb_list_walk_safe(segs, seg, next) {
+               skb_mark_not_on_list(seg);
+-- 
+2.42.0
+
diff --git a/queue-5.15/tg3-move-the-rt-x_dropped-counters-to-tg3_napi.patch b/queue-5.15/tg3-move-the-rt-x_dropped-counters-to-tg3_napi.patch

new file mode 100644 (file)

index 0000000..00660a0
--- /dev/null
+++ b/queue-5.15/tg3-move-the-rt-x_dropped-counters-to-tg3_napi.patch
@@ -0,0 +1,139 @@
+From be54d3eb60e5e844692bc2869608e70b00909957 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 13 Nov 2023 10:23:49 -0800
+Subject: tg3: Move the [rt]x_dropped counters to tg3_napi
+
+From: Alex Pakhunov <alexey.pakhunov@spacex.com>
+
+[ Upstream commit 907d1bdb8b2cc0357d03a1c34d2a08d9943760b1 ]
+
+This change moves [rt]x_dropped counters to tg3_napi so that they can be
+updated by a single writer, race-free.
+
+Signed-off-by: Alex Pakhunov <alexey.pakhunov@spacex.com>
+Signed-off-by: Vincent Wong <vincent.wong2@spacex.com>
+Reviewed-by: Michael Chan <michael.chan@broadcom.com>
+Link: https://lore.kernel.org/r/20231113182350.37472-1-alexey.pakhunov@spacex.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/broadcom/tg3.c | 38 +++++++++++++++++++++++++----
+ drivers/net/ethernet/broadcom/tg3.h |  4 +--
+ 2 files changed, 35 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
+index 2c41852a082bb..946b4decac0ce 100644
+--- a/drivers/net/ethernet/broadcom/tg3.c
++++ b/drivers/net/ethernet/broadcom/tg3.c
+@@ -6854,7 +6854,7 @@ static int tg3_rx(struct tg3_napi *tnapi, int budget)
+                                      desc_idx, *post_ptr);
+               drop_it_no_recycle:
+                       /* Other statistics kept track of by card. */
+-                      tp->rx_dropped++;
++                      tnapi->rx_dropped++;
+                       goto next_pkt;
+               }
+ 
+@@ -8152,7 +8152,7 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
+ drop:
+       dev_kfree_skb_any(skb);
+ drop_nofree:
+-      tp->tx_dropped++;
++      tnapi->tx_dropped++;
+       return NETDEV_TX_OK;
+ }
+ 
+@@ -9331,7 +9331,7 @@ static void __tg3_set_rx_mode(struct net_device *);
+ /* tp->lock is held. */
+ static int tg3_halt(struct tg3 *tp, int kind, bool silent)
+ {
+-      int err;
++      int err, i;
+ 
+       tg3_stop_fw(tp);
+ 
+@@ -9352,6 +9352,13 @@ static int tg3_halt(struct tg3 *tp, int kind, bool silent)
+ 
+               /* And make sure the next sample is new data */
+               memset(tp->hw_stats, 0, sizeof(struct tg3_hw_stats));
++
++              for (i = 0; i < TG3_IRQ_MAX_VECS; ++i) {
++                      struct tg3_napi *tnapi = &tp->napi[i];
++
++                      tnapi->rx_dropped = 0;
++                      tnapi->tx_dropped = 0;
++              }
+       }
+ 
+       return err;
+@@ -11906,6 +11913,9 @@ static void tg3_get_nstats(struct tg3 *tp, struct rtnl_link_stats64 *stats)
+ {
+       struct rtnl_link_stats64 *old_stats = &tp->net_stats_prev;
+       struct tg3_hw_stats *hw_stats = tp->hw_stats;
++      unsigned long rx_dropped;
++      unsigned long tx_dropped;
++      int i;
+ 
+       stats->rx_packets = old_stats->rx_packets +
+               get_stat64(&hw_stats->rx_ucast_packets) +
+@@ -11952,8 +11962,26 @@ static void tg3_get_nstats(struct tg3 *tp, struct rtnl_link_stats64 *stats)
+       stats->rx_missed_errors = old_stats->rx_missed_errors +
+               get_stat64(&hw_stats->rx_discards);
+ 
+-      stats->rx_dropped = tp->rx_dropped;
+-      stats->tx_dropped = tp->tx_dropped;
++      /* Aggregate per-queue counters. The per-queue counters are updated
++       * by a single writer, race-free. The result computed by this loop
++       * might not be 100% accurate (counters can be updated in the middle of
++       * the loop) but the next tg3_get_nstats() will recompute the current
++       * value so it is acceptable.
++       *
++       * Note that these counters wrap around at 4G on 32bit machines.
++       */
++      rx_dropped = (unsigned long)(old_stats->rx_dropped);
++      tx_dropped = (unsigned long)(old_stats->tx_dropped);
++
++      for (i = 0; i < tp->irq_cnt; i++) {
++              struct tg3_napi *tnapi = &tp->napi[i];
++
++              rx_dropped += tnapi->rx_dropped;
++              tx_dropped += tnapi->tx_dropped;
++      }
++
++      stats->rx_dropped = rx_dropped;
++      stats->tx_dropped = tx_dropped;
+ }
+ 
+ static int tg3_get_regs_len(struct net_device *dev)
+diff --git a/drivers/net/ethernet/broadcom/tg3.h b/drivers/net/ethernet/broadcom/tg3.h
+index 1000c894064f0..8d753f8c5b065 100644
+--- a/drivers/net/ethernet/broadcom/tg3.h
++++ b/drivers/net/ethernet/broadcom/tg3.h
+@@ -3018,6 +3018,7 @@ struct tg3_napi {
+       u16                             *rx_rcb_prod_idx;
+       struct tg3_rx_prodring_set      prodring;
+       struct tg3_rx_buffer_desc       *rx_rcb;
++      unsigned long                   rx_dropped;
+ 
+       u32                             tx_prod ____cacheline_aligned;
+       u32                             tx_cons;
+@@ -3026,6 +3027,7 @@ struct tg3_napi {
+       u32                             prodmbox;
+       struct tg3_tx_buffer_desc       *tx_ring;
+       struct tg3_tx_ring_info         *tx_buffers;
++      unsigned long                   tx_dropped;
+ 
+       dma_addr_t                      status_mapping;
+       dma_addr_t                      rx_rcb_mapping;
+@@ -3219,8 +3221,6 @@ struct tg3 {
+ 
+ 
+       /* begin "everything else" cacheline(s) section */
+-      unsigned long                   rx_dropped;
+-      unsigned long                   tx_dropped;
+       struct rtnl_link_stats64        net_stats_prev;
+       struct tg3_ethtool_stats        estats_prev;
+ 
+-- 
+2.42.0
+
diff --git a/queue-5.15/vdpa-mlx5-preserve-cvq-vringh-index.patch b/queue-5.15/vdpa-mlx5-preserve-cvq-vringh-index.patch

new file mode 100644 (file)

index 0000000..7b66b1c
--- /dev/null
+++ b/queue-5.15/vdpa-mlx5-preserve-cvq-vringh-index.patch
@@ -0,0 +1,66 @@
+From cca1eef99ce3ffa38f7c2c5281d5d2657ff55619 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 3 Nov 2023 05:26:27 -0700
+Subject: vdpa/mlx5: preserve CVQ vringh index
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Steve Sistare <steven.sistare@oracle.com>
+
+[ Upstream commit 480b3e73720f6b5d76bef2387b1f9d19ed67573b ]
+
+mlx5_vdpa does not preserve userland's view of vring base for the control
+queue in the following sequence:
+
+ioctl VHOST_SET_VRING_BASE
+ioctl VHOST_VDPA_SET_STATUS VIRTIO_CONFIG_S_DRIVER_OK
+  mlx5_vdpa_set_status()
+    setup_cvq_vring()
+      vringh_init_iotlb()
+        vringh_init_kern()
+          vrh->last_avail_idx = 0;
+ioctl VHOST_GET_VRING_BASE
+
+To fix, restore the value of cvq->vring.last_avail_idx after calling
+vringh_init_iotlb.
+
+Fixes: 5262912ef3cf ("vdpa/mlx5: Add support for control VQ and MAC setting")
+
+Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
+Acked-by: Eugenio Pérez <eperezma@redhat.com>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Message-Id: <1699014387-194368-1-git-send-email-steven.sistare@oracle.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/vdpa/mlx5/net/mlx5_vnet.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
+index e748c00789f04..46c72e6d3a29b 100644
+--- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
++++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
+@@ -2178,13 +2178,18 @@ static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev)
+       struct mlx5_control_vq *cvq = &mvdev->cvq;
+       int err = 0;
+ 
+-      if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))
++      if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) {
++              u16 idx = cvq->vring.last_avail_idx;
++
+               err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features,
+                                       MLX5_CVQ_MAX_ENT, false,
+                                       (struct vring_desc *)(uintptr_t)cvq->desc_addr,
+                                       (struct vring_avail *)(uintptr_t)cvq->driver_addr,
+                                       (struct vring_used *)(uintptr_t)cvq->device_addr);
+ 
++              if (!err)
++                      cvq->vring.last_avail_idx = cvq->vring.last_used_idx = idx;
++      }
+       return err;
+ }
+ 
+-- 
+2.42.0
+
author	Sasha Levin <sashal@kernel.org>
	Fri, 8 Dec 2023 10:04:27 +0000 (05:04 -0500)
committer	Sasha Levin <sashal@kernel.org>
	Fri, 8 Dec 2023 10:04:27 +0000 (05:04 -0500)
queue-5.15/drm-amdgpu-correct-chunk_ptr-to-a-pointer-to-chunk.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/hrtimers-push-pending-hrtimers-away-from-outgoing-cp.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/i2c-designware-fix-corrupted-memory-seen-in-the-isr.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/kconfig-fix-memory-leak-from-range-properties.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/netfilter-ipset-fix-race-condition-between-swap-dest.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/series	[new file with mode: 0644]	patch \| blob
queue-5.15/tg3-increment-tx_dropped-in-tg3_tso_bug.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/tg3-move-the-rt-x_dropped-counters-to-tg3_napi.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/vdpa-mlx5-preserve-cvq-vringh-index.patch	[new file with mode: 0644]	patch \| blob