]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 6.1
authorSasha Levin <sashal@kernel.org>
Fri, 8 Dec 2023 10:04:26 +0000 (05:04 -0500)
committerSasha Levin <sashal@kernel.org>
Fri, 8 Dec 2023 10:04:26 +0000 (05:04 -0500)
Signed-off-by: Sasha Levin <sashal@kernel.org>
queue-6.1/drm-amdgpu-correct-chunk_ptr-to-a-pointer-to-chunk.patch [new file with mode: 0644]
queue-6.1/hrtimers-push-pending-hrtimers-away-from-outgoing-cp.patch [new file with mode: 0644]
queue-6.1/i2c-designware-fix-corrupted-memory-seen-in-the-isr.patch [new file with mode: 0644]
queue-6.1/kconfig-fix-memory-leak-from-range-properties.patch [new file with mode: 0644]
queue-6.1/netfilter-ipset-fix-race-condition-between-swap-dest.patch [new file with mode: 0644]
queue-6.1/series [new file with mode: 0644]
queue-6.1/tg3-increment-tx_dropped-in-tg3_tso_bug.patch [new file with mode: 0644]
queue-6.1/tg3-move-the-rt-x_dropped-counters-to-tg3_napi.patch [new file with mode: 0644]
queue-6.1/vdpa-mlx5-preserve-cvq-vringh-index.patch [new file with mode: 0644]
queue-6.1/x86-acpi-ignore-invalid-x2apic-entries.patch [new file with mode: 0644]
queue-6.1/zstd-fix-array-index-out-of-bounds-ubsan-warning.patch [new file with mode: 0644]

diff --git a/queue-6.1/drm-amdgpu-correct-chunk_ptr-to-a-pointer-to-chunk.patch b/queue-6.1/drm-amdgpu-correct-chunk_ptr-to-a-pointer-to-chunk.patch
new file mode 100644 (file)
index 0000000..bfab80c
--- /dev/null
@@ -0,0 +1,40 @@
+From e8dccefd160b4f00c0ebcd32c12abebb4d5ee140 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 31 Oct 2023 10:32:37 +0800
+Subject: drm/amdgpu: correct chunk_ptr to a pointer to chunk.
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: YuanShang <YuanShang.Mao@amd.com>
+
+[ Upstream commit 50d51374b498457c4dea26779d32ccfed12ddaff ]
+
+The variable "chunk_ptr" should be a pointer pointing
+to a struct drm_amdgpu_cs_chunk instead of to a pointer
+of that.
+
+Signed-off-by: YuanShang <YuanShang.Mao@amd.com>
+Reviewed-by: Christian König <christian.koenig@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+index ced4e7e8f98b5..133e4e03c143c 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+@@ -201,7 +201,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
+       }
+       for (i = 0; i < p->nchunks; i++) {
+-              struct drm_amdgpu_cs_chunk __user **chunk_ptr = NULL;
++              struct drm_amdgpu_cs_chunk __user *chunk_ptr = NULL;
+               struct drm_amdgpu_cs_chunk user_chunk;
+               uint32_t __user *cdata;
+-- 
+2.42.0
+
diff --git a/queue-6.1/hrtimers-push-pending-hrtimers-away-from-outgoing-cp.patch b/queue-6.1/hrtimers-push-pending-hrtimers-away-from-outgoing-cp.patch
new file mode 100644 (file)
index 0000000..53352e4
--- /dev/null
@@ -0,0 +1,155 @@
+From afb8eb915ac12693aafc9d0704517463eb694248 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 7 Nov 2023 15:57:13 +0100
+Subject: hrtimers: Push pending hrtimers away from outgoing CPU earlier
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+[ Upstream commit 5c0930ccaad5a74d74e8b18b648c5eb21ed2fe94 ]
+
+2b8272ff4a70 ("cpu/hotplug: Prevent self deadlock on CPU hot-unplug")
+solved the straight forward CPU hotplug deadlock vs. the scheduler
+bandwidth timer. Yu discovered a more involved variant where a task which
+has a bandwidth timer started on the outgoing CPU holds a lock and then
+gets throttled. If the lock required by one of the CPU hotplug callbacks
+the hotplug operation deadlocks because the unthrottling timer event is not
+handled on the dying CPU and can only be recovered once the control CPU
+reaches the hotplug state which pulls the pending hrtimers from the dead
+CPU.
+
+Solve this by pushing the hrtimers away from the dying CPU in the dying
+callbacks. Nothing can queue a hrtimer on the dying CPU at that point because
+all other CPUs spin in stop_machine() with interrupts disabled and once the
+operation is finished the CPU is marked offline.
+
+Reported-by: Yu Liao <liaoyu15@huawei.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Liu Tie <liutie4@huawei.com>
+Link: https://lore.kernel.org/r/87a5rphara.ffs@tglx
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/cpuhotplug.h |  1 +
+ include/linux/hrtimer.h    |  4 ++--
+ kernel/cpu.c               |  8 +++++++-
+ kernel/time/hrtimer.c      | 33 ++++++++++++---------------------
+ 4 files changed, 22 insertions(+), 24 deletions(-)
+
+diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
+index c7e0d80dbf6a5..67575bc8a7e29 100644
+--- a/include/linux/cpuhotplug.h
++++ b/include/linux/cpuhotplug.h
+@@ -196,6 +196,7 @@ enum cpuhp_state {
+       CPUHP_AP_ARM_CORESIGHT_CTI_STARTING,
+       CPUHP_AP_ARM64_ISNDEP_STARTING,
+       CPUHP_AP_SMPCFD_DYING,
++      CPUHP_AP_HRTIMERS_DYING,
+       CPUHP_AP_X86_TBOOT_DYING,
+       CPUHP_AP_ARM_CACHE_B15_RAC_DYING,
+       CPUHP_AP_ONLINE,
+diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
+index 0ee140176f102..f2044d5a652b5 100644
+--- a/include/linux/hrtimer.h
++++ b/include/linux/hrtimer.h
+@@ -531,9 +531,9 @@ extern void sysrq_timer_list_show(void);
+ int hrtimers_prepare_cpu(unsigned int cpu);
+ #ifdef CONFIG_HOTPLUG_CPU
+-int hrtimers_dead_cpu(unsigned int cpu);
++int hrtimers_cpu_dying(unsigned int cpu);
+ #else
+-#define hrtimers_dead_cpu     NULL
++#define hrtimers_cpu_dying    NULL
+ #endif
+ #endif
+diff --git a/kernel/cpu.c b/kernel/cpu.c
+index 0e4d362e90825..551468d9c5a85 100644
+--- a/kernel/cpu.c
++++ b/kernel/cpu.c
+@@ -1733,7 +1733,7 @@ static struct cpuhp_step cpuhp_hp_states[] = {
+       [CPUHP_HRTIMERS_PREPARE] = {
+               .name                   = "hrtimers:prepare",
+               .startup.single         = hrtimers_prepare_cpu,
+-              .teardown.single        = hrtimers_dead_cpu,
++              .teardown.single        = NULL,
+       },
+       [CPUHP_SMPCFD_PREPARE] = {
+               .name                   = "smpcfd:prepare",
+@@ -1800,6 +1800,12 @@ static struct cpuhp_step cpuhp_hp_states[] = {
+               .startup.single         = NULL,
+               .teardown.single        = smpcfd_dying_cpu,
+       },
++      [CPUHP_AP_HRTIMERS_DYING] = {
++              .name                   = "hrtimers:dying",
++              .startup.single         = NULL,
++              .teardown.single        = hrtimers_cpu_dying,
++      },
++
+       /* Entry state on starting. Interrupts enabled from here on. Transient
+        * state for synchronsization */
+       [CPUHP_AP_ONLINE] = {
+diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
+index e4f0e3b0c4f4f..5561dabc9b225 100644
+--- a/kernel/time/hrtimer.c
++++ b/kernel/time/hrtimer.c
+@@ -2216,29 +2216,22 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
+       }
+ }
+-int hrtimers_dead_cpu(unsigned int scpu)
++int hrtimers_cpu_dying(unsigned int dying_cpu)
+ {
+       struct hrtimer_cpu_base *old_base, *new_base;
+-      int i;
++      int i, ncpu = cpumask_first(cpu_active_mask);
+-      BUG_ON(cpu_online(scpu));
+-      tick_cancel_sched_timer(scpu);
++      tick_cancel_sched_timer(dying_cpu);
++
++      old_base = this_cpu_ptr(&hrtimer_bases);
++      new_base = &per_cpu(hrtimer_bases, ncpu);
+-      /*
+-       * this BH disable ensures that raise_softirq_irqoff() does
+-       * not wakeup ksoftirqd (and acquire the pi-lock) while
+-       * holding the cpu_base lock
+-       */
+-      local_bh_disable();
+-      local_irq_disable();
+-      old_base = &per_cpu(hrtimer_bases, scpu);
+-      new_base = this_cpu_ptr(&hrtimer_bases);
+       /*
+        * The caller is globally serialized and nobody else
+        * takes two locks at once, deadlock is not possible.
+        */
+-      raw_spin_lock(&new_base->lock);
+-      raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
++      raw_spin_lock(&old_base->lock);
++      raw_spin_lock_nested(&new_base->lock, SINGLE_DEPTH_NESTING);
+       for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
+               migrate_hrtimer_list(&old_base->clock_base[i],
+@@ -2249,15 +2242,13 @@ int hrtimers_dead_cpu(unsigned int scpu)
+        * The migration might have changed the first expiring softirq
+        * timer on this CPU. Update it.
+        */
+-      hrtimer_update_softirq_timer(new_base, false);
++      __hrtimer_get_next_event(new_base, HRTIMER_ACTIVE_SOFT);
++      /* Tell the other CPU to retrigger the next event */
++      smp_call_function_single(ncpu, retrigger_next_event, NULL, 0);
+-      raw_spin_unlock(&old_base->lock);
+       raw_spin_unlock(&new_base->lock);
++      raw_spin_unlock(&old_base->lock);
+-      /* Check, if we got expired work to do */
+-      __hrtimer_peek_ahead_timers();
+-      local_irq_enable();
+-      local_bh_enable();
+       return 0;
+ }
+-- 
+2.42.0
+
diff --git a/queue-6.1/i2c-designware-fix-corrupted-memory-seen-in-the-isr.patch b/queue-6.1/i2c-designware-fix-corrupted-memory-seen-in-the-isr.patch
new file mode 100644 (file)
index 0000000..4bb85d4
--- /dev/null
@@ -0,0 +1,108 @@
+From 19686fbc1a118738c7acf12e034b15d139c054fa Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 9 Nov 2023 03:19:27 +0000
+Subject: i2c: designware: Fix corrupted memory seen in the ISR
+
+From: Jan Bottorff <janb@os.amperecomputing.com>
+
+[ Upstream commit f726eaa787e9f9bc858c902d18a09af6bcbfcdaf ]
+
+When running on a many core ARM64 server, errors were
+happening in the ISR that looked like corrupted memory. These
+corruptions would fix themselves if small delays were inserted
+in the ISR. Errors reported by the driver included "i2c_designware
+APMC0D0F:00: i2c_dw_xfer_msg: invalid target address" and
+"i2c_designware APMC0D0F:00:controller timed out" during
+in-band IPMI SSIF stress tests.
+
+The problem was determined to be memory writes in the driver were not
+becoming visible to all cores when execution rapidly shifted between
+cores, like when a register write immediately triggers an ISR.
+Processors with weak memory ordering, like ARM64, make no
+guarantees about the order normal memory writes become globally
+visible, unless barrier instructions are used to control ordering.
+
+To solve this, regmap accessor functions configured by this driver
+were changed to use non-relaxed forms of the low-level register
+access functions, which include a barrier on platforms that require
+it. This assures memory writes before a controller register access are
+visible to all cores. The community concluded defaulting to correct
+operation outweighed defaulting to the small performance gains from
+using relaxed access functions. Being a low speed device added weight to
+this choice of default register access behavior.
+
+Signed-off-by: Jan Bottorff <janb@os.amperecomputing.com>
+Acked-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
+Tested-by: Serge Semin <fancer.lancer@gmail.com>
+Reviewed-by: Serge Semin <fancer.lancer@gmail.com>
+Signed-off-by: Wolfram Sang <wsa@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/i2c/busses/i2c-designware-common.c | 16 ++++++++--------
+ 1 file changed, 8 insertions(+), 8 deletions(-)
+
+diff --git a/drivers/i2c/busses/i2c-designware-common.c b/drivers/i2c/busses/i2c-designware-common.c
+index 6fdb25a5f8016..ad98c85ec2e7a 100644
+--- a/drivers/i2c/busses/i2c-designware-common.c
++++ b/drivers/i2c/busses/i2c-designware-common.c
+@@ -63,7 +63,7 @@ static int dw_reg_read(void *context, unsigned int reg, unsigned int *val)
+ {
+       struct dw_i2c_dev *dev = context;
+-      *val = readl_relaxed(dev->base + reg);
++      *val = readl(dev->base + reg);
+       return 0;
+ }
+@@ -72,7 +72,7 @@ static int dw_reg_write(void *context, unsigned int reg, unsigned int val)
+ {
+       struct dw_i2c_dev *dev = context;
+-      writel_relaxed(val, dev->base + reg);
++      writel(val, dev->base + reg);
+       return 0;
+ }
+@@ -81,7 +81,7 @@ static int dw_reg_read_swab(void *context, unsigned int reg, unsigned int *val)
+ {
+       struct dw_i2c_dev *dev = context;
+-      *val = swab32(readl_relaxed(dev->base + reg));
++      *val = swab32(readl(dev->base + reg));
+       return 0;
+ }
+@@ -90,7 +90,7 @@ static int dw_reg_write_swab(void *context, unsigned int reg, unsigned int val)
+ {
+       struct dw_i2c_dev *dev = context;
+-      writel_relaxed(swab32(val), dev->base + reg);
++      writel(swab32(val), dev->base + reg);
+       return 0;
+ }
+@@ -99,8 +99,8 @@ static int dw_reg_read_word(void *context, unsigned int reg, unsigned int *val)
+ {
+       struct dw_i2c_dev *dev = context;
+-      *val = readw_relaxed(dev->base + reg) |
+-              (readw_relaxed(dev->base + reg + 2) << 16);
++      *val = readw(dev->base + reg) |
++              (readw(dev->base + reg + 2) << 16);
+       return 0;
+ }
+@@ -109,8 +109,8 @@ static int dw_reg_write_word(void *context, unsigned int reg, unsigned int val)
+ {
+       struct dw_i2c_dev *dev = context;
+-      writew_relaxed(val, dev->base + reg);
+-      writew_relaxed(val >> 16, dev->base + reg + 2);
++      writew(val, dev->base + reg);
++      writew(val >> 16, dev->base + reg + 2);
+       return 0;
+ }
+-- 
+2.42.0
+
diff --git a/queue-6.1/kconfig-fix-memory-leak-from-range-properties.patch b/queue-6.1/kconfig-fix-memory-leak-from-range-properties.patch
new file mode 100644 (file)
index 0000000..1ce67f6
--- /dev/null
@@ -0,0 +1,92 @@
+From 28fde5ce84f97b117c284395758aa9096611445e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 15 Nov 2023 13:16:53 +0900
+Subject: kconfig: fix memory leak from range properties
+
+From: Masahiro Yamada <masahiroy@kernel.org>
+
+[ Upstream commit ae1eff0349f2e908fc083630e8441ea6dc434dc0 ]
+
+Currently, sym_validate_range() duplicates the range string using
+xstrdup(), which is overwritten by a subsequent sym_calc_value() call.
+It results in a memory leak.
+
+Instead, only the pointer should be copied.
+
+Below is a test case, with a summary from Valgrind.
+
+[Test Kconfig]
+
+  config FOO
+          int "foo"
+          range 10 20
+
+[Test .config]
+
+  CONFIG_FOO=0
+
+[Before]
+
+  LEAK SUMMARY:
+     definitely lost: 3 bytes in 1 blocks
+     indirectly lost: 0 bytes in 0 blocks
+       possibly lost: 0 bytes in 0 blocks
+     still reachable: 17,465 bytes in 21 blocks
+          suppressed: 0 bytes in 0 blocks
+
+[After]
+
+  LEAK SUMMARY:
+     definitely lost: 0 bytes in 0 blocks
+     indirectly lost: 0 bytes in 0 blocks
+       possibly lost: 0 bytes in 0 blocks
+     still reachable: 17,462 bytes in 20 blocks
+          suppressed: 0 bytes in 0 blocks
+
+Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ scripts/kconfig/symbol.c | 14 ++++++--------
+ 1 file changed, 6 insertions(+), 8 deletions(-)
+
+diff --git a/scripts/kconfig/symbol.c b/scripts/kconfig/symbol.c
+index 0572330bf8a78..a76925b46ce63 100644
+--- a/scripts/kconfig/symbol.c
++++ b/scripts/kconfig/symbol.c
+@@ -122,9 +122,9 @@ static long long sym_get_range_val(struct symbol *sym, int base)
+ static void sym_validate_range(struct symbol *sym)
+ {
+       struct property *prop;
++      struct symbol *range_sym;
+       int base;
+       long long val, val2;
+-      char str[64];
+       switch (sym->type) {
+       case S_INT:
+@@ -140,17 +140,15 @@ static void sym_validate_range(struct symbol *sym)
+       if (!prop)
+               return;
+       val = strtoll(sym->curr.val, NULL, base);
+-      val2 = sym_get_range_val(prop->expr->left.sym, base);
++      range_sym = prop->expr->left.sym;
++      val2 = sym_get_range_val(range_sym, base);
+       if (val >= val2) {
+-              val2 = sym_get_range_val(prop->expr->right.sym, base);
++              range_sym = prop->expr->right.sym;
++              val2 = sym_get_range_val(range_sym, base);
+               if (val <= val2)
+                       return;
+       }
+-      if (sym->type == S_INT)
+-              sprintf(str, "%lld", val2);
+-      else
+-              sprintf(str, "0x%llx", val2);
+-      sym->curr.val = xstrdup(str);
++      sym->curr.val = range_sym->curr.val;
+ }
+ static void sym_set_changed(struct symbol *sym)
+-- 
+2.42.0
+
diff --git a/queue-6.1/netfilter-ipset-fix-race-condition-between-swap-dest.patch b/queue-6.1/netfilter-ipset-fix-race-condition-between-swap-dest.patch
new file mode 100644 (file)
index 0000000..af17fb7
--- /dev/null
@@ -0,0 +1,105 @@
+From 0eddebfa8f404b97512ae72a1173b04bd02a7f0f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 13 Nov 2023 21:13:23 +0100
+Subject: netfilter: ipset: fix race condition between swap/destroy and kernel
+ side add/del/test
+
+From: Jozsef Kadlecsik <kadlec@netfilter.org>
+
+[ Upstream commit 28628fa952fefc7f2072ce6e8016968cc452b1ba ]
+
+Linkui Xiao reported that there's a race condition when ipset swap and destroy is
+called, which can lead to crash in add/del/test element operations. Swap then
+destroy are usual operations to replace a set with another one in a production
+system. The issue can in some cases be reproduced with the script:
+
+ipset create hash_ip1 hash:net family inet hashsize 1024 maxelem 1048576
+ipset add hash_ip1 172.20.0.0/16
+ipset add hash_ip1 192.168.0.0/16
+iptables -A INPUT -m set --match-set hash_ip1 src -j ACCEPT
+while [ 1 ]
+do
+       # ... Ongoing traffic...
+        ipset create hash_ip2 hash:net family inet hashsize 1024 maxelem 1048576
+        ipset add hash_ip2 172.20.0.0/16
+        ipset swap hash_ip1 hash_ip2
+        ipset destroy hash_ip2
+        sleep 0.05
+done
+
+In the race case the possible order of the operations are
+
+       CPU0                    CPU1
+       ip_set_test
+                               ipset swap hash_ip1 hash_ip2
+                               ipset destroy hash_ip2
+       hash_net_kadt
+
+Swap replaces hash_ip1 with hash_ip2 and then destroy removes hash_ip2 which
+is the original hash_ip1. ip_set_test was called on hash_ip1 and because destroy
+removed it, hash_net_kadt crashes.
+
+The fix is to force ip_set_swap() to wait for all readers to finish accessing the
+old set pointers by calling synchronize_rcu().
+
+The first version of the patch was written by Linkui Xiao <xiaolinkui@kylinos.cn>.
+
+v2: synchronize_rcu() is moved into ip_set_swap() in order not to burden
+    ip_set_destroy() unnecessarily when all sets are destroyed.
+v3: Florian Westphal pointed out that all netfilter hooks run with rcu_read_lock() held
+    and em_ipset.c wraps the entire ip_set_test() in rcu read lock/unlock pair.
+    So there's no need to extend the rcu read locked area in ipset itself.
+
+Closes: https://lore.kernel.org/all/69e7963b-e7f8-3ad0-210-7b86eebf7f78@netfilter.org/
+Reported by: Linkui Xiao <xiaolinkui@kylinos.cn>
+Signed-off-by: Jozsef Kadlecsik <kadlec@netfilter.org>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/ipset/ip_set_core.c | 14 +++++++-------
+ 1 file changed, 7 insertions(+), 7 deletions(-)
+
+diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
+index 20eede37d5228..d47dfdcb899b0 100644
+--- a/net/netfilter/ipset/ip_set_core.c
++++ b/net/netfilter/ipset/ip_set_core.c
+@@ -61,6 +61,8 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET);
+       ip_set_dereference((inst)->ip_set_list)[id]
+ #define ip_set_ref_netlink(inst,id)   \
+       rcu_dereference_raw((inst)->ip_set_list)[id]
++#define ip_set_dereference_nfnl(p)    \
++      rcu_dereference_check(p, lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET))
+ /* The set types are implemented in modules and registered set types
+  * can be found in ip_set_type_list. Adding/deleting types is
+@@ -708,15 +710,10 @@ __ip_set_put_netlink(struct ip_set *set)
+ static struct ip_set *
+ ip_set_rcu_get(struct net *net, ip_set_id_t index)
+ {
+-      struct ip_set *set;
+       struct ip_set_net *inst = ip_set_pernet(net);
+-      rcu_read_lock();
+-      /* ip_set_list itself needs to be protected */
+-      set = rcu_dereference(inst->ip_set_list)[index];
+-      rcu_read_unlock();
+-
+-      return set;
++      /* ip_set_list and the set pointer need to be protected */
++      return ip_set_dereference_nfnl(inst->ip_set_list)[index];
+ }
+ static inline void
+@@ -1399,6 +1396,9 @@ static int ip_set_swap(struct sk_buff *skb, const struct nfnl_info *info,
+       ip_set(inst, to_id) = from;
+       write_unlock_bh(&ip_set_ref_lock);
++      /* Make sure all readers of the old set pointers are completed. */
++      synchronize_rcu();
++
+       return 0;
+ }
+-- 
+2.42.0
+
diff --git a/queue-6.1/series b/queue-6.1/series
new file mode 100644 (file)
index 0000000..3ef6470
--- /dev/null
@@ -0,0 +1,10 @@
+vdpa-mlx5-preserve-cvq-vringh-index.patch
+x86-acpi-ignore-invalid-x2apic-entries.patch
+hrtimers-push-pending-hrtimers-away-from-outgoing-cp.patch
+i2c-designware-fix-corrupted-memory-seen-in-the-isr.patch
+netfilter-ipset-fix-race-condition-between-swap-dest.patch
+zstd-fix-array-index-out-of-bounds-ubsan-warning.patch
+tg3-move-the-rt-x_dropped-counters-to-tg3_napi.patch
+tg3-increment-tx_dropped-in-tg3_tso_bug.patch
+kconfig-fix-memory-leak-from-range-properties.patch
+drm-amdgpu-correct-chunk_ptr-to-a-pointer-to-chunk.patch
diff --git a/queue-6.1/tg3-increment-tx_dropped-in-tg3_tso_bug.patch b/queue-6.1/tg3-increment-tx_dropped-in-tg3_tso_bug.patch
new file mode 100644 (file)
index 0000000..2ad1c49
--- /dev/null
@@ -0,0 +1,41 @@
+From 97a4a7f39e52e919ace70ba0eca40ff7b423499b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 13 Nov 2023 10:23:50 -0800
+Subject: tg3: Increment tx_dropped in tg3_tso_bug()
+
+From: Alex Pakhunov <alexey.pakhunov@spacex.com>
+
+[ Upstream commit 17dd5efe5f36a96bd78012594fabe21efb01186b ]
+
+tg3_tso_bug() drops a packet if it cannot be segmented for any reason.
+The number of discarded frames should be incremented accordingly.
+
+Signed-off-by: Alex Pakhunov <alexey.pakhunov@spacex.com>
+Signed-off-by: Vincent Wong <vincent.wong2@spacex.com>
+Reviewed-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
+Link: https://lore.kernel.org/r/20231113182350.37472-2-alexey.pakhunov@spacex.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/broadcom/tg3.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
+index 830f542a7c6d2..f60a16de565ed 100644
+--- a/drivers/net/ethernet/broadcom/tg3.c
++++ b/drivers/net/ethernet/broadcom/tg3.c
+@@ -7879,8 +7879,10 @@ static int tg3_tso_bug(struct tg3 *tp, struct tg3_napi *tnapi,
+       segs = skb_gso_segment(skb, tp->dev->features &
+                                   ~(NETIF_F_TSO | NETIF_F_TSO6));
+-      if (IS_ERR(segs) || !segs)
++      if (IS_ERR(segs) || !segs) {
++              tnapi->tx_dropped++;
+               goto tg3_tso_bug_end;
++      }
+       skb_list_walk_safe(segs, seg, next) {
+               skb_mark_not_on_list(seg);
+-- 
+2.42.0
+
diff --git a/queue-6.1/tg3-move-the-rt-x_dropped-counters-to-tg3_napi.patch b/queue-6.1/tg3-move-the-rt-x_dropped-counters-to-tg3_napi.patch
new file mode 100644 (file)
index 0000000..1138e91
--- /dev/null
@@ -0,0 +1,139 @@
+From 480385da3c6d2d6a3155cd383a93fbf2b9f44109 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 13 Nov 2023 10:23:49 -0800
+Subject: tg3: Move the [rt]x_dropped counters to tg3_napi
+
+From: Alex Pakhunov <alexey.pakhunov@spacex.com>
+
+[ Upstream commit 907d1bdb8b2cc0357d03a1c34d2a08d9943760b1 ]
+
+This change moves [rt]x_dropped counters to tg3_napi so that they can be
+updated by a single writer, race-free.
+
+Signed-off-by: Alex Pakhunov <alexey.pakhunov@spacex.com>
+Signed-off-by: Vincent Wong <vincent.wong2@spacex.com>
+Reviewed-by: Michael Chan <michael.chan@broadcom.com>
+Link: https://lore.kernel.org/r/20231113182350.37472-1-alexey.pakhunov@spacex.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/broadcom/tg3.c | 38 +++++++++++++++++++++++++----
+ drivers/net/ethernet/broadcom/tg3.h |  4 +--
+ 2 files changed, 35 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
+index 85570e40c8e9b..830f542a7c6d2 100644
+--- a/drivers/net/ethernet/broadcom/tg3.c
++++ b/drivers/net/ethernet/broadcom/tg3.c
+@@ -6853,7 +6853,7 @@ static int tg3_rx(struct tg3_napi *tnapi, int budget)
+                                      desc_idx, *post_ptr);
+               drop_it_no_recycle:
+                       /* Other statistics kept track of by card. */
+-                      tp->rx_dropped++;
++                      tnapi->rx_dropped++;
+                       goto next_pkt;
+               }
+@@ -8151,7 +8151,7 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
+ drop:
+       dev_kfree_skb_any(skb);
+ drop_nofree:
+-      tp->tx_dropped++;
++      tnapi->tx_dropped++;
+       return NETDEV_TX_OK;
+ }
+@@ -9330,7 +9330,7 @@ static void __tg3_set_rx_mode(struct net_device *);
+ /* tp->lock is held. */
+ static int tg3_halt(struct tg3 *tp, int kind, bool silent)
+ {
+-      int err;
++      int err, i;
+       tg3_stop_fw(tp);
+@@ -9351,6 +9351,13 @@ static int tg3_halt(struct tg3 *tp, int kind, bool silent)
+               /* And make sure the next sample is new data */
+               memset(tp->hw_stats, 0, sizeof(struct tg3_hw_stats));
++
++              for (i = 0; i < TG3_IRQ_MAX_VECS; ++i) {
++                      struct tg3_napi *tnapi = &tp->napi[i];
++
++                      tnapi->rx_dropped = 0;
++                      tnapi->tx_dropped = 0;
++              }
+       }
+       return err;
+@@ -11900,6 +11907,9 @@ static void tg3_get_nstats(struct tg3 *tp, struct rtnl_link_stats64 *stats)
+ {
+       struct rtnl_link_stats64 *old_stats = &tp->net_stats_prev;
+       struct tg3_hw_stats *hw_stats = tp->hw_stats;
++      unsigned long rx_dropped;
++      unsigned long tx_dropped;
++      int i;
+       stats->rx_packets = old_stats->rx_packets +
+               get_stat64(&hw_stats->rx_ucast_packets) +
+@@ -11946,8 +11956,26 @@ static void tg3_get_nstats(struct tg3 *tp, struct rtnl_link_stats64 *stats)
+       stats->rx_missed_errors = old_stats->rx_missed_errors +
+               get_stat64(&hw_stats->rx_discards);
+-      stats->rx_dropped = tp->rx_dropped;
+-      stats->tx_dropped = tp->tx_dropped;
++      /* Aggregate per-queue counters. The per-queue counters are updated
++       * by a single writer, race-free. The result computed by this loop
++       * might not be 100% accurate (counters can be updated in the middle of
++       * the loop) but the next tg3_get_nstats() will recompute the current
++       * value so it is acceptable.
++       *
++       * Note that these counters wrap around at 4G on 32bit machines.
++       */
++      rx_dropped = (unsigned long)(old_stats->rx_dropped);
++      tx_dropped = (unsigned long)(old_stats->tx_dropped);
++
++      for (i = 0; i < tp->irq_cnt; i++) {
++              struct tg3_napi *tnapi = &tp->napi[i];
++
++              rx_dropped += tnapi->rx_dropped;
++              tx_dropped += tnapi->tx_dropped;
++      }
++
++      stats->rx_dropped = rx_dropped;
++      stats->tx_dropped = tx_dropped;
+ }
+ static int tg3_get_regs_len(struct net_device *dev)
+diff --git a/drivers/net/ethernet/broadcom/tg3.h b/drivers/net/ethernet/broadcom/tg3.h
+index 1000c894064f0..8d753f8c5b065 100644
+--- a/drivers/net/ethernet/broadcom/tg3.h
++++ b/drivers/net/ethernet/broadcom/tg3.h
+@@ -3018,6 +3018,7 @@ struct tg3_napi {
+       u16                             *rx_rcb_prod_idx;
+       struct tg3_rx_prodring_set      prodring;
+       struct tg3_rx_buffer_desc       *rx_rcb;
++      unsigned long                   rx_dropped;
+       u32                             tx_prod ____cacheline_aligned;
+       u32                             tx_cons;
+@@ -3026,6 +3027,7 @@ struct tg3_napi {
+       u32                             prodmbox;
+       struct tg3_tx_buffer_desc       *tx_ring;
+       struct tg3_tx_ring_info         *tx_buffers;
++      unsigned long                   tx_dropped;
+       dma_addr_t                      status_mapping;
+       dma_addr_t                      rx_rcb_mapping;
+@@ -3219,8 +3221,6 @@ struct tg3 {
+       /* begin "everything else" cacheline(s) section */
+-      unsigned long                   rx_dropped;
+-      unsigned long                   tx_dropped;
+       struct rtnl_link_stats64        net_stats_prev;
+       struct tg3_ethtool_stats        estats_prev;
+-- 
+2.42.0
+
diff --git a/queue-6.1/vdpa-mlx5-preserve-cvq-vringh-index.patch b/queue-6.1/vdpa-mlx5-preserve-cvq-vringh-index.patch
new file mode 100644 (file)
index 0000000..84a56ad
--- /dev/null
@@ -0,0 +1,66 @@
+From 5fca292e7b098d958239fe716411510baaaffc5f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 3 Nov 2023 05:26:27 -0700
+Subject: vdpa/mlx5: preserve CVQ vringh index
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Steve Sistare <steven.sistare@oracle.com>
+
+[ Upstream commit 480b3e73720f6b5d76bef2387b1f9d19ed67573b ]
+
+mlx5_vdpa does not preserve userland's view of vring base for the control
+queue in the following sequence:
+
+ioctl VHOST_SET_VRING_BASE
+ioctl VHOST_VDPA_SET_STATUS VIRTIO_CONFIG_S_DRIVER_OK
+  mlx5_vdpa_set_status()
+    setup_cvq_vring()
+      vringh_init_iotlb()
+        vringh_init_kern()
+          vrh->last_avail_idx = 0;
+ioctl VHOST_GET_VRING_BASE
+
+To fix, restore the value of cvq->vring.last_avail_idx after calling
+vringh_init_iotlb.
+
+Fixes: 5262912ef3cf ("vdpa/mlx5: Add support for control VQ and MAC setting")
+
+Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
+Acked-by: Eugenio Pérez <eperezma@redhat.com>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Message-Id: <1699014387-194368-1-git-send-email-steven.sistare@oracle.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/vdpa/mlx5/net/mlx5_vnet.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
+index bf99654371b35..2b7e796c48897 100644
+--- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
++++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
+@@ -2508,13 +2508,18 @@ static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev)
+       struct mlx5_control_vq *cvq = &mvdev->cvq;
+       int err = 0;
+-      if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))
++      if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) {
++              u16 idx = cvq->vring.last_avail_idx;
++
+               err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features,
+                                       MLX5_CVQ_MAX_ENT, false,
+                                       (struct vring_desc *)(uintptr_t)cvq->desc_addr,
+                                       (struct vring_avail *)(uintptr_t)cvq->driver_addr,
+                                       (struct vring_used *)(uintptr_t)cvq->device_addr);
++              if (!err)
++                      cvq->vring.last_avail_idx = cvq->vring.last_used_idx = idx;
++      }
+       return err;
+ }
+-- 
+2.42.0
+
diff --git a/queue-6.1/x86-acpi-ignore-invalid-x2apic-entries.patch b/queue-6.1/x86-acpi-ignore-invalid-x2apic-entries.patch
new file mode 100644 (file)
index 0000000..76ac0c9
--- /dev/null
@@ -0,0 +1,130 @@
+From 4c7717939f49024466540344aa3463a934461ec2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 3 Jul 2023 00:28:02 +0800
+Subject: x86/acpi: Ignore invalid x2APIC entries
+
+From: Zhang Rui <rui.zhang@intel.com>
+
+[ Upstream commit ec9aedb2aa1ab7ac420c00b31f5edc5be15ec167 ]
+
+Currently, the kernel enumerates the possible CPUs by parsing both ACPI
+MADT Local APIC entries and x2APIC entries. So CPUs with "valid" APIC IDs,
+even if they have duplicated APIC IDs in Local APIC and x2APIC, are always
+enumerated.
+
+Below is what ACPI MADT Local APIC and x2APIC describes on an
+Ivebridge-EP system,
+
+[02Ch 0044   1]                Subtable Type : 00 [Processor Local APIC]
+[02Fh 0047   1]                Local Apic ID : 00
+...
+[164h 0356   1]                Subtable Type : 00 [Processor Local APIC]
+[167h 0359   1]                Local Apic ID : 39
+[16Ch 0364   1]                Subtable Type : 00 [Processor Local APIC]
+[16Fh 0367   1]                Local Apic ID : FF
+...
+[3ECh 1004   1]                Subtable Type : 09 [Processor Local x2APIC]
+[3F0h 1008   4]                Processor x2Apic ID : 00000000
+...
+[B5Ch 2908   1]                Subtable Type : 09 [Processor Local x2APIC]
+[B60h 2912   4]                Processor x2Apic ID : 00000077
+
+As a result, kernel shows "smpboot: Allowing 168 CPUs, 120 hotplug CPUs".
+And this wastes significant amount of memory for the per-cpu data.
+Plus this also breaks https://lore.kernel.org/all/87edm36qqb.ffs@tglx/,
+because __max_logical_packages is over-estimated by the APIC IDs in
+the x2APIC entries.
+
+According to https://uefi.org/specs/ACPI/6.5/05_ACPI_Software_Programming_Model.html#processor-local-x2apic-structure:
+
+  "[Compatibility note] On some legacy OSes, Logical processors with APIC
+   ID values less than 255 (whether in XAPIC or X2APIC mode) must use the
+   Processor Local APIC structure to convey their APIC information to OSPM,
+   and those processors must be declared in the DSDT using the Processor()
+   keyword. Logical processors with APIC ID values 255 and greater must use
+   the Processor Local x2APIC structure and be declared using the Device()
+   keyword."
+
+Therefore prevent the registration of x2APIC entries with an APIC ID less
+than 255 if the local APIC table enumerates valid APIC IDs.
+
+[ tglx: Simplify the logic ]
+
+Signed-off-by: Zhang Rui <rui.zhang@intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Peter Zijlstra <peterz@infradead.org>
+Link: https://lore.kernel.org/r/20230702162802.344176-1-rui.zhang@intel.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kernel/acpi/boot.c | 34 +++++++++++++++-------------------
+ 1 file changed, 15 insertions(+), 19 deletions(-)
+
+diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
+index 2252340b2133e..14af7fbdc6b5e 100644
+--- a/arch/x86/kernel/acpi/boot.c
++++ b/arch/x86/kernel/acpi/boot.c
+@@ -62,6 +62,7 @@ int acpi_fix_pin2_polarity __initdata;
+ #ifdef CONFIG_X86_LOCAL_APIC
+ static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
++static bool has_lapic_cpus __initdata;
+ static bool acpi_support_online_capable;
+ #endif
+@@ -235,6 +236,14 @@ acpi_parse_x2apic(union acpi_subtable_headers *header, const unsigned long end)
+       if (!acpi_is_processor_usable(processor->lapic_flags))
+               return 0;
++      /*
++       * According to https://uefi.org/specs/ACPI/6.5/05_ACPI_Software_Programming_Model.html#processor-local-x2apic-structure
++       * when MADT provides both valid LAPIC and x2APIC entries, the APIC ID
++       * in x2APIC must be equal or greater than 0xff.
++       */
++      if (has_lapic_cpus && apic_id < 0xff)
++              return 0;
++
+       /*
+        * We need to register disabled CPU as well to permit
+        * counting disabled CPUs. This allows us to size
+@@ -1114,10 +1123,7 @@ static int __init early_acpi_parse_madt_lapic_addr_ovr(void)
+ static int __init acpi_parse_madt_lapic_entries(void)
+ {
+-      int count;
+-      int x2count = 0;
+-      int ret;
+-      struct acpi_subtable_proc madt_proc[2];
++      int count, x2count = 0;
+       if (!boot_cpu_has(X86_FEATURE_APIC))
+               return -ENODEV;
+@@ -1126,21 +1132,11 @@ static int __init acpi_parse_madt_lapic_entries(void)
+                                     acpi_parse_sapic, MAX_LOCAL_APIC);
+       if (!count) {
+-              memset(madt_proc, 0, sizeof(madt_proc));
+-              madt_proc[0].id = ACPI_MADT_TYPE_LOCAL_APIC;
+-              madt_proc[0].handler = acpi_parse_lapic;
+-              madt_proc[1].id = ACPI_MADT_TYPE_LOCAL_X2APIC;
+-              madt_proc[1].handler = acpi_parse_x2apic;
+-              ret = acpi_table_parse_entries_array(ACPI_SIG_MADT,
+-                              sizeof(struct acpi_table_madt),
+-                              madt_proc, ARRAY_SIZE(madt_proc), MAX_LOCAL_APIC);
+-              if (ret < 0) {
+-                      pr_err("Error parsing LAPIC/X2APIC entries\n");
+-                      return ret;
+-              }
+-
+-              count = madt_proc[0].count;
+-              x2count = madt_proc[1].count;
++              count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC,
++                                      acpi_parse_lapic, MAX_LOCAL_APIC);
++              has_lapic_cpus = count > 0;
++              x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC,
++                                      acpi_parse_x2apic, MAX_LOCAL_APIC);
+       }
+       if (!count && !x2count) {
+               pr_err("No LAPIC entries present\n");
+-- 
+2.42.0
+
diff --git a/queue-6.1/zstd-fix-array-index-out-of-bounds-ubsan-warning.patch b/queue-6.1/zstd-fix-array-index-out-of-bounds-ubsan-warning.patch
new file mode 100644 (file)
index 0000000..49f72ef
--- /dev/null
@@ -0,0 +1,43 @@
+From 2e29986816a0375b521ac0fd34294d36bca5384b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 12 Oct 2023 12:55:34 -0700
+Subject: zstd: Fix array-index-out-of-bounds UBSAN warning
+
+From: Nick Terrell <terrelln@fb.com>
+
+[ Upstream commit 77618db346455129424fadbbaec596a09feaf3bb ]
+
+Zstd used an array of length 1 to mean a flexible array for C89
+compatibility. Switch to a C99 flexible array to fix the UBSAN warning.
+
+Tested locally by booting the kernel and writing to and reading from a
+BtrFS filesystem with zstd compression enabled. I was unable to reproduce
+the issue before the fix, however it is a trivial change.
+
+Link: https://lkml.kernel.org/r/20231012213428.1390905-1-nickrterrell@gmail.com
+Reported-by: syzbot+1f2eb3e8cd123ffce499@syzkaller.appspotmail.com
+Reported-by: Eric Biggers <ebiggers@kernel.org>
+Reported-by: Kees Cook <keescook@chromium.org>
+Signed-off-by: Nick Terrell <terrelln@fb.com>
+Reviewed-by: Kees Cook <keescook@chromium.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ lib/zstd/common/fse_decompress.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/lib/zstd/common/fse_decompress.c b/lib/zstd/common/fse_decompress.c
+index 2c8bbe3e4c148..f37b7aec088ec 100644
+--- a/lib/zstd/common/fse_decompress.c
++++ b/lib/zstd/common/fse_decompress.c
+@@ -312,7 +312,7 @@ size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size
+ typedef struct {
+     short ncount[FSE_MAX_SYMBOL_VALUE + 1];
+-    FSE_DTable dtable[1]; /* Dynamically sized */
++    FSE_DTable dtable[]; /* Dynamically sized */
+ } FSE_DecompressWksp;
+-- 
+2.42.0
+