Fixes for 6.4

author Sasha Levin <sashal@kernel.org>

Mon, 17 Jul 2023 01:01:17 +0000 (21:01 -0400)

committer Sasha Levin <sashal@kernel.org>

Thu, 20 Jul 2023 15:34:47 +0000 (11:34 -0400)
author Sasha Levin <sashal@kernel.org>
Mon, 17 Jul 2023 01:01:17 +0000 (21:01 -0400)
committer Sasha Levin <sashal@kernel.org>
Thu, 20 Jul 2023 15:34:47 +0000 (11:34 -0400)
diff --git a/queue-6.4/blk-crypto-use-dynamic-lock-class-for-blk_crypto_pro.patch b/queue-6.4/blk-crypto-use-dynamic-lock-class-for-blk_crypto_pro.patch

new file mode 100644 (file)

index 0000000..aeea989
--- /dev/null
+++ b/queue-6.4/blk-crypto-use-dynamic-lock-class-for-blk_crypto_pro.patch
@@ -0,0 +1,110 @@
+From 049354d92774b515eee4d0942a55012adfdfd0ed Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 9 Jun 2023 23:11:39 -0700
+Subject: blk-crypto: use dynamic lock class for blk_crypto_profile::lock
+
+From: Eric Biggers <ebiggers@google.com>
+
+[ Upstream commit 2fb48d88e77f29bf9d278f25bcfe82cf59a0e09b ]
+
+When a device-mapper device is passing through the inline encryption
+support of an underlying device, calls to blk_crypto_evict_key() take
+the blk_crypto_profile::lock of the device-mapper device, then take the
+blk_crypto_profile::lock of the underlying device (nested).  This isn't
+a real deadlock, but it causes a lockdep report because there is only
+one lock class for all instances of this lock.
+
+Lockdep subclasses don't really work here because the hierarchy of block
+devices is dynamic and could have more than 2 levels.
+
+Instead, register a dynamic lock class for each blk_crypto_profile, and
+associate that with the lock.
+
+This avoids false-positive lockdep reports like the following:
+
+    ============================================
+    WARNING: possible recursive locking detected
+    6.4.0-rc5 #2 Not tainted
+    --------------------------------------------
+    fscryptctl/1421 is trying to acquire lock:
+    ffffff80829ca418 (&profile->lock){++++}-{3:3}, at: __blk_crypto_evict_key+0x44/0x1c0
+
+                   but task is already holding lock:
+    ffffff8086b68ca8 (&profile->lock){++++}-{3:3}, at: __blk_crypto_evict_key+0xc8/0x1c0
+
+                   other info that might help us debug this:
+     Possible unsafe locking scenario:
+
+           CPU0
+           ----
+      lock(&profile->lock);
+      lock(&profile->lock);
+
+                    *** DEADLOCK ***
+
+     May be due to missing lock nesting notation
+
+Fixes: 1b2628397058 ("block: Keyslot Manager for Inline Encryption")
+Reported-by: Bart Van Assche <bvanassche@acm.org>
+Signed-off-by: Eric Biggers <ebiggers@google.com>
+Reviewed-by: Bart Van Assche <bvanassche@acm.org>
+Link: https://lore.kernel.org/r/20230610061139.212085-1-ebiggers@kernel.org
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ block/blk-crypto-profile.c         | 12 ++++++++++--
+ include/linux/blk-crypto-profile.h |  1 +
+ 2 files changed, 11 insertions(+), 2 deletions(-)
+
+diff --git a/block/blk-crypto-profile.c b/block/blk-crypto-profile.c
+index 2a67d3fb63e5c..7fabc883e39f1 100644
+--- a/block/blk-crypto-profile.c
++++ b/block/blk-crypto-profile.c
+@@ -79,7 +79,14 @@ int blk_crypto_profile_init(struct blk_crypto_profile *profile,
+       unsigned int slot_hashtable_size;
+ 
+       memset(profile, 0, sizeof(*profile));
+-      init_rwsem(&profile->lock);
++
++      /*
++       * profile->lock of an underlying device can nest inside profile->lock
++       * of a device-mapper device, so use a dynamic lock class to avoid
++       * false-positive lockdep reports.
++       */
++      lockdep_register_key(&profile->lockdep_key);
++      __init_rwsem(&profile->lock, "&profile->lock", &profile->lockdep_key);
+ 
+       if (num_slots == 0)
+               return 0;
+@@ -89,7 +96,7 @@ int blk_crypto_profile_init(struct blk_crypto_profile *profile,
+       profile->slots = kvcalloc(num_slots, sizeof(profile->slots[0]),
+                                 GFP_KERNEL);
+       if (!profile->slots)
+-              return -ENOMEM;
++              goto err_destroy;
+ 
+       profile->num_slots = num_slots;
+ 
+@@ -435,6 +442,7 @@ void blk_crypto_profile_destroy(struct blk_crypto_profile *profile)
+ {
+       if (!profile)
+               return;
++      lockdep_unregister_key(&profile->lockdep_key);
+       kvfree(profile->slot_hashtable);
+       kvfree_sensitive(profile->slots,
+                        sizeof(profile->slots[0]) * profile->num_slots);
+diff --git a/include/linux/blk-crypto-profile.h b/include/linux/blk-crypto-profile.h
+index e6802b69cdd64..90ab33cb5d0ef 100644
+--- a/include/linux/blk-crypto-profile.h
++++ b/include/linux/blk-crypto-profile.h
+@@ -111,6 +111,7 @@ struct blk_crypto_profile {
+        * keyslots while ensuring that they can't be changed concurrently.
+        */
+       struct rw_semaphore lock;
++      struct lock_class_key lockdep_key;
+ 
+       /* List of idle slots, with least recently used slot at front */
+       wait_queue_head_t idle_slots_wait_queue;
+-- 
+2.39.2
+
diff --git a/queue-6.4/bpf-cpumap-fix-memory-leak-in-cpu_map_update_elem.patch b/queue-6.4/bpf-cpumap-fix-memory-leak-in-cpu_map_update_elem.patch

new file mode 100644 (file)

index 0000000..1458f7e
--- /dev/null
+++ b/queue-6.4/bpf-cpumap-fix-memory-leak-in-cpu_map_update_elem.patch
@@ -0,0 +1,138 @@
+From a4e1b6b58194575ab350ac438efcc0521fe963dd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 11 Jul 2023 19:58:48 +0800
+Subject: bpf: cpumap: Fix memory leak in cpu_map_update_elem
+
+From: Pu Lehui <pulehui@huawei.com>
+
+[ Upstream commit 4369016497319a9635702da010d02af1ebb1849d ]
+
+Syzkaller reported a memory leak as follows:
+
+BUG: memory leak
+unreferenced object 0xff110001198ef748 (size 192):
+  comm "syz-executor.3", pid 17672, jiffies 4298118891 (age 9.906s)
+  hex dump (first 32 bytes):
+    00 00 00 00 4a 19 00 00 80 ad e3 e4 fe ff c0 00  ....J...........
+    00 b2 d3 0c 01 00 11 ff 28 f5 8e 19 01 00 11 ff  ........(.......
+  backtrace:
+    [<ffffffffadd28087>] __cpu_map_entry_alloc+0xf7/0xb00
+    [<ffffffffadd28d8e>] cpu_map_update_elem+0x2fe/0x3d0
+    [<ffffffffadc6d0fd>] bpf_map_update_value.isra.0+0x2bd/0x520
+    [<ffffffffadc7349b>] map_update_elem+0x4cb/0x720
+    [<ffffffffadc7d983>] __se_sys_bpf+0x8c3/0xb90
+    [<ffffffffb029cc80>] do_syscall_64+0x30/0x40
+    [<ffffffffb0400099>] entry_SYSCALL_64_after_hwframe+0x61/0xc6
+
+BUG: memory leak
+unreferenced object 0xff110001198ef528 (size 192):
+  comm "syz-executor.3", pid 17672, jiffies 4298118891 (age 9.906s)
+  hex dump (first 32 bytes):
+    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
+    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
+  backtrace:
+    [<ffffffffadd281f0>] __cpu_map_entry_alloc+0x260/0xb00
+    [<ffffffffadd28d8e>] cpu_map_update_elem+0x2fe/0x3d0
+    [<ffffffffadc6d0fd>] bpf_map_update_value.isra.0+0x2bd/0x520
+    [<ffffffffadc7349b>] map_update_elem+0x4cb/0x720
+    [<ffffffffadc7d983>] __se_sys_bpf+0x8c3/0xb90
+    [<ffffffffb029cc80>] do_syscall_64+0x30/0x40
+    [<ffffffffb0400099>] entry_SYSCALL_64_after_hwframe+0x61/0xc6
+
+BUG: memory leak
+unreferenced object 0xff1100010fd93d68 (size 8):
+  comm "syz-executor.3", pid 17672, jiffies 4298118891 (age 9.906s)
+  hex dump (first 8 bytes):
+    00 00 00 00 00 00 00 00                          ........
+  backtrace:
+    [<ffffffffade5db3e>] kvmalloc_node+0x11e/0x170
+    [<ffffffffadd28280>] __cpu_map_entry_alloc+0x2f0/0xb00
+    [<ffffffffadd28d8e>] cpu_map_update_elem+0x2fe/0x3d0
+    [<ffffffffadc6d0fd>] bpf_map_update_value.isra.0+0x2bd/0x520
+    [<ffffffffadc7349b>] map_update_elem+0x4cb/0x720
+    [<ffffffffadc7d983>] __se_sys_bpf+0x8c3/0xb90
+    [<ffffffffb029cc80>] do_syscall_64+0x30/0x40
+    [<ffffffffb0400099>] entry_SYSCALL_64_after_hwframe+0x61/0xc6
+
+In the cpu_map_update_elem flow, when kthread_stop is called before
+calling the threadfn of rcpu->kthread, since the KTHREAD_SHOULD_STOP bit
+of kthread has been set by kthread_stop, the threadfn of rcpu->kthread
+will never be executed, and rcpu->refcnt will never be 0, which will
+lead to the allocated rcpu, rcpu->queue and rcpu->queue->queue cannot be
+released.
+
+Calling kthread_stop before executing kthread's threadfn will return
+-EINTR. We can complete the release of memory resources in this state.
+
+Fixes: 6710e1126934 ("bpf: introduce new bpf cpu map type BPF_MAP_TYPE_CPUMAP")
+Signed-off-by: Pu Lehui <pulehui@huawei.com>
+Acked-by: Jesper Dangaard Brouer <hawk@kernel.org>
+Acked-by: Hou Tao <houtao1@huawei.com>
+Link: https://lore.kernel.org/r/20230711115848.2701559-1-pulehui@huaweicloud.com
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/bpf/cpumap.c | 40 ++++++++++++++++++++++++----------------
+ 1 file changed, 24 insertions(+), 16 deletions(-)
+
+diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
+index 8ec18faa74ac3..3da63be602d1c 100644
+--- a/kernel/bpf/cpumap.c
++++ b/kernel/bpf/cpumap.c
+@@ -126,22 +126,6 @@ static void get_cpu_map_entry(struct bpf_cpu_map_entry *rcpu)
+       atomic_inc(&rcpu->refcnt);
+ }
+ 
+-/* called from workqueue, to workaround syscall using preempt_disable */
+-static void cpu_map_kthread_stop(struct work_struct *work)
+-{
+-      struct bpf_cpu_map_entry *rcpu;
+-
+-      rcpu = container_of(work, struct bpf_cpu_map_entry, kthread_stop_wq);
+-
+-      /* Wait for flush in __cpu_map_entry_free(), via full RCU barrier,
+-       * as it waits until all in-flight call_rcu() callbacks complete.
+-       */
+-      rcu_barrier();
+-
+-      /* kthread_stop will wake_up_process and wait for it to complete */
+-      kthread_stop(rcpu->kthread);
+-}
+-
+ static void __cpu_map_ring_cleanup(struct ptr_ring *ring)
+ {
+       /* The tear-down procedure should have made sure that queue is
+@@ -169,6 +153,30 @@ static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu)
+       }
+ }
+ 
++/* called from workqueue, to workaround syscall using preempt_disable */
++static void cpu_map_kthread_stop(struct work_struct *work)
++{
++      struct bpf_cpu_map_entry *rcpu;
++      int err;
++
++      rcpu = container_of(work, struct bpf_cpu_map_entry, kthread_stop_wq);
++
++      /* Wait for flush in __cpu_map_entry_free(), via full RCU barrier,
++       * as it waits until all in-flight call_rcu() callbacks complete.
++       */
++      rcu_barrier();
++
++      /* kthread_stop will wake_up_process and wait for it to complete */
++      err = kthread_stop(rcpu->kthread);
++      if (err) {
++              /* kthread_stop may be called before cpu_map_kthread_run
++               * is executed, so we need to release the memory related
++               * to rcpu.
++               */
++              put_cpu_map_entry(rcpu);
++      }
++}
++
+ static void cpu_map_bpf_prog_run_skb(struct bpf_cpu_map_entry *rcpu,
+                                    struct list_head *listp,
+                                    struct xdp_cpumap_stats *stats)
+-- 
+2.39.2
+
diff --git a/queue-6.4/bpf-fix-max-stack-depth-check-for-async-callbacks.patch b/queue-6.4/bpf-fix-max-stack-depth-check-for-async-callbacks.patch

new file mode 100644 (file)

index 0000000..76fd0dd
--- /dev/null
+++ b/queue-6.4/bpf-fix-max-stack-depth-check-for-async-callbacks.patch
@@ -0,0 +1,59 @@
+From 13aba8cc9c46174b275c69e259ce56721c4fc8da Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 5 Jul 2023 20:17:29 +0530
+Subject: bpf: Fix max stack depth check for async callbacks
+
+From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
+
+[ Upstream commit 5415ccd50a8620c8cbaa32d6f18c946c453566f5 ]
+
+The check_max_stack_depth pass happens after the verifier's symbolic
+execution, and attempts to walk the call graph of the BPF program,
+ensuring that the stack usage stays within bounds for all possible call
+chains. There are two cases to consider: bpf_pseudo_func and
+bpf_pseudo_call. In the former case, the callback pointer is loaded into
+a register, and is assumed that it is passed to some helper later which
+calls it (however there is no way to be sure), but the check remains
+conservative and accounts the stack usage anyway. For this particular
+case, asynchronous callbacks are skipped as they execute asynchronously
+when their corresponding event fires.
+
+The case of bpf_pseudo_call is simpler and we know that the call is
+definitely made, hence the stack depth of the subprog is accounted for.
+
+However, the current check still skips an asynchronous callback even if
+a bpf_pseudo_call was made for it. This is erroneous, as it will miss
+accounting for the stack usage of the asynchronous callback, which can
+be used to breach the maximum stack depth limit.
+
+Fix this by only skipping asynchronous callbacks when the instruction is
+not a pseudo call to the subprog.
+
+Fixes: 7ddc80a476c2 ("bpf: Teach stack depth check about async callbacks.")
+Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
+Link: https://lore.kernel.org/r/20230705144730.235802-2-memxor@gmail.com
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/bpf/verifier.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
+index 30fabae47a07b..aac31e33323bb 100644
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -5450,8 +5450,9 @@ static int check_max_stack_depth(struct bpf_verifier_env *env)
+                               verbose(env, "verifier bug. subprog has tail_call and async cb\n");
+                               return -EFAULT;
+                       }
+-                       /* async callbacks don't increase bpf prog stack size */
+-                      continue;
++                      /* async callbacks don't increase bpf prog stack size unless called directly */
++                      if (!bpf_pseudo_call(insn + i))
++                              continue;
+               }
+               i = next_insn;
+ 
+-- 
+2.39.2
+
diff --git a/queue-6.4/drm-bridge-dw_hdmi-fix-connector-access-for-scdc.patch b/queue-6.4/drm-bridge-dw_hdmi-fix-connector-access-for-scdc.patch

new file mode 100644 (file)

index 0000000..9ab4155
--- /dev/null
+++ b/queue-6.4/drm-bridge-dw_hdmi-fix-connector-access-for-scdc.patch
@@ -0,0 +1,81 @@
+From 55ccff99ae176eb0c945916524cfd9719d1abd4c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 1 Jun 2023 13:31:53 +0100
+Subject: drm: bridge: dw_hdmi: fix connector access for scdc
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Adrián Larumbe <adrian.larumbe@collabora.com>
+
+[ Upstream commit 98703e4e061fb8715c7613cd227e32cdfd136b23 ]
+
+Commit 5d844091f237 ("drm/scdc-helper: Pimp SCDC debugs") changed the scdc
+interface to pick up an i2c adapter from a connector instead. However, in
+the case of dw-hdmi, the wrong connector was being used to pass i2c adapter
+information, since dw-hdmi's embedded connector structure is only populated
+when the bridge attachment callback explicitly asks for it.
+
+drm-meson is handling connector creation, so this won't happen, leading to
+a NULL pointer dereference.
+
+Fix it by having scdc functions access dw-hdmi's current connector pointer
+instead, which is assigned during the bridge enablement stage.
+
+Fixes: 5d844091f237 ("drm/scdc-helper: Pimp SCDC debugs")
+Signed-off-by: Adrián Larumbe <adrian.larumbe@collabora.com>
+Reported-by: Lukas F. Hartmann <lukas@mntre.com>
+Acked-by: Neil Armstrong <neil.armstrong@linaro.org>
+[narmstrong: moved Fixes tag before first S-o-b and added Reported-by tag]
+Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
+Link: https://patchwork.freedesktop.org/patch/msgid/20230601123153.196867-1-adrian.larumbe@collabora.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/bridge/synopsys/dw-hdmi.c | 9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
+index 603bb3c51027b..3b40e0fdca5cb 100644
+--- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
++++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
+@@ -1426,9 +1426,9 @@ void dw_hdmi_set_high_tmds_clock_ratio(struct dw_hdmi *hdmi,
+       /* Control for TMDS Bit Period/TMDS Clock-Period Ratio */
+       if (dw_hdmi_support_scdc(hdmi, display)) {
+               if (mtmdsclock > HDMI14_MAX_TMDSCLK)
+-                      drm_scdc_set_high_tmds_clock_ratio(&hdmi->connector, 1);
++                      drm_scdc_set_high_tmds_clock_ratio(hdmi->curr_conn, 1);
+               else
+-                      drm_scdc_set_high_tmds_clock_ratio(&hdmi->connector, 0);
++                      drm_scdc_set_high_tmds_clock_ratio(hdmi->curr_conn, 0);
+       }
+ }
+ EXPORT_SYMBOL_GPL(dw_hdmi_set_high_tmds_clock_ratio);
+@@ -2116,7 +2116,7 @@ static void hdmi_av_composer(struct dw_hdmi *hdmi,
+                               min_t(u8, bytes, SCDC_MIN_SOURCE_VERSION));
+ 
+                       /* Enabled Scrambling in the Sink */
+-                      drm_scdc_set_scrambling(&hdmi->connector, 1);
++                      drm_scdc_set_scrambling(hdmi->curr_conn, 1);
+ 
+                       /*
+                        * To activate the scrambler feature, you must ensure
+@@ -2132,7 +2132,7 @@ static void hdmi_av_composer(struct dw_hdmi *hdmi,
+                       hdmi_writeb(hdmi, 0, HDMI_FC_SCRAMBLER_CTRL);
+                       hdmi_writeb(hdmi, (u8)~HDMI_MC_SWRSTZ_TMDSSWRST_REQ,
+                                   HDMI_MC_SWRSTZ);
+-                      drm_scdc_set_scrambling(&hdmi->connector, 0);
++                      drm_scdc_set_scrambling(hdmi->curr_conn, 0);
+               }
+       }
+ 
+@@ -3553,6 +3553,7 @@ struct dw_hdmi *dw_hdmi_probe(struct platform_device *pdev,
+       hdmi->bridge.ops = DRM_BRIDGE_OP_DETECT | DRM_BRIDGE_OP_EDID
+                        | DRM_BRIDGE_OP_HPD;
+       hdmi->bridge.interlace_allowed = true;
++      hdmi->bridge.ddc = hdmi->ddc;
+ #ifdef CONFIG_OF
+       hdmi->bridge.of_node = pdev->dev.of_node;
+ #endif
+-- 
+2.39.2
+
diff --git a/queue-6.4/drm-bridge-ti-sn65dsi86-fix-auxiliary-bus-lifetime.patch b/queue-6.4/drm-bridge-ti-sn65dsi86-fix-auxiliary-bus-lifetime.patch

new file mode 100644 (file)

index 0000000..0f68902
--- /dev/null
+++ b/queue-6.4/drm-bridge-ti-sn65dsi86-fix-auxiliary-bus-lifetime.patch
@@ -0,0 +1,112 @@
+From 270b8bee9af7ad5cd07b9d7e5e88c3dfaa772065 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 13 Jun 2023 06:58:13 -0700
+Subject: drm/bridge: ti-sn65dsi86: Fix auxiliary bus lifetime
+
+From: Douglas Anderson <dianders@chromium.org>
+
+[ Upstream commit 7aa83fbd712a6f08ffa67890061f26d140c2a84f ]
+
+Memory for the "struct device" for any given device isn't supposed to
+be released until the device's release() is called. This is important
+because someone might be holding a kobject reference to the "struct
+device" and might try to access one of its members even after any
+other cleanup/uninitialization has happened.
+
+Code analysis of ti-sn65dsi86 shows that this isn't quite right. When
+the code was written, it was believed that we could rely on the fact
+that the child devices would all be freed before the parent devices
+and thus we didn't need to worry about a release() function. While I
+still believe that the parent's "struct device" is guaranteed to
+outlive the child's "struct device" (because the child holds a kobject
+reference to the parent), the parent's "devm" allocated memory is a
+different story. That appears to be freed much earlier.
+
+Let's make this better for ti-sn65dsi86 by allocating each auxiliary
+with kzalloc and then free that memory in the release().
+
+Fixes: bf73537f411b ("drm/bridge: ti-sn65dsi86: Break GPIO and MIPI-to-eDP bridge into sub-drivers")
+Suggested-by: Stephen Boyd <swboyd@chromium.org>
+Reviewed-by: Stephen Boyd <swboyd@chromium.org>
+Signed-off-by: Douglas Anderson <dianders@chromium.org>
+Link: https://patchwork.freedesktop.org/patch/msgid/20230613065812.v2.1.I24b838a5b4151fb32bccd6f36397998ea2df9fbb@changeid
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/bridge/ti-sn65dsi86.c | 35 +++++++++++++++++----------
+ 1 file changed, 22 insertions(+), 13 deletions(-)
+
+diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c b/drivers/gpu/drm/bridge/ti-sn65dsi86.c
+index 4676cf2900dfd..3c8fd6ea6d6a4 100644
+--- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c
++++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c
+@@ -170,10 +170,10 @@
+  * @pwm_refclk_freq: Cache for the reference clock input to the PWM.
+  */
+ struct ti_sn65dsi86 {
+-      struct auxiliary_device         bridge_aux;
+-      struct auxiliary_device         gpio_aux;
+-      struct auxiliary_device         aux_aux;
+-      struct auxiliary_device         pwm_aux;
++      struct auxiliary_device         *bridge_aux;
++      struct auxiliary_device         *gpio_aux;
++      struct auxiliary_device         *aux_aux;
++      struct auxiliary_device         *pwm_aux;
+ 
+       struct device                   *dev;
+       struct regmap                   *regmap;
+@@ -468,27 +468,34 @@ static void ti_sn65dsi86_delete_aux(void *data)
+       auxiliary_device_delete(data);
+ }
+ 
+-/*
+- * AUX bus docs say that a non-NULL release is mandatory, but it makes no
+- * sense for the model used here where all of the aux devices are allocated
+- * in the single shared structure. We'll use this noop as a workaround.
+- */
+-static void ti_sn65dsi86_noop(struct device *dev) {}
++static void ti_sn65dsi86_aux_device_release(struct device *dev)
++{
++      struct auxiliary_device *aux = container_of(dev, struct auxiliary_device, dev);
++
++      kfree(aux);
++}
+ 
+ static int ti_sn65dsi86_add_aux_device(struct ti_sn65dsi86 *pdata,
+-                                     struct auxiliary_device *aux,
++                                     struct auxiliary_device **aux_out,
+                                      const char *name)
+ {
+       struct device *dev = pdata->dev;
++      struct auxiliary_device *aux;
+       int ret;
+ 
++      aux = kzalloc(sizeof(*aux), GFP_KERNEL);
++      if (!aux)
++              return -ENOMEM;
++
+       aux->name = name;
+       aux->dev.parent = dev;
+-      aux->dev.release = ti_sn65dsi86_noop;
++      aux->dev.release = ti_sn65dsi86_aux_device_release;
+       device_set_of_node_from_dev(&aux->dev, dev);
+       ret = auxiliary_device_init(aux);
+-      if (ret)
++      if (ret) {
++              kfree(aux);
+               return ret;
++      }
+       ret = devm_add_action_or_reset(dev, ti_sn65dsi86_uninit_aux, aux);
+       if (ret)
+               return ret;
+@@ -497,6 +504,8 @@ static int ti_sn65dsi86_add_aux_device(struct ti_sn65dsi86 *pdata,
+       if (ret)
+               return ret;
+       ret = devm_add_action_or_reset(dev, ti_sn65dsi86_delete_aux, aux);
++      if (!ret)
++              *aux_out = aux;
+ 
+       return ret;
+ }
+-- 
+2.39.2
+
diff --git a/queue-6.4/drm-fbdev-dma-fix-documented-default-preferred_bpp-v.patch b/queue-6.4/drm-fbdev-dma-fix-documented-default-preferred_bpp-v.patch

new file mode 100644 (file)

index 0000000..45dae98
--- /dev/null
+++ b/queue-6.4/drm-fbdev-dma-fix-documented-default-preferred_bpp-v.patch
@@ -0,0 +1,42 @@
+From 9874576143edd04da17ddc4c1e193e5a747512f6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 6 Jul 2023 17:30:31 +0200
+Subject: drm/fbdev-dma: Fix documented default preferred_bpp value
+
+From: Geert Uytterhoeven <geert+renesas@glider.be>
+
+[ Upstream commit 15008052b34efaa86c1d56190ac73c4bf8c462f9 ]
+
+As of commit 6c80a93be62d398e ("drm/fb-helper: Initialize fb-helper's
+preferred BPP in prepare function"), the preferred_bpp parameter of
+drm_fb_helper_prepare() defaults to 32 instead of
+drm_mode_config.preferred_depth.  Hence this also applies to
+drm_fbdev_dma_setup(), which just passes its own preferred_bpp
+parameter.
+
+Fixes: b79fe9abd58bab73 ("drm/fbdev-dma: Implement fbdev emulation for GEM DMA helpers")
+Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
+Reviewed-by: Thomas Zimmermann <tzimmermann@suse.de>
+Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
+Link: https://patchwork.freedesktop.org/patch/msgid/91f093ffe436a9f94d58fb2bfbc1407f1ebe8bb0.1688656591.git.geert+renesas@glider.be
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/drm_fbdev_dma.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/gpu/drm/drm_fbdev_dma.c b/drivers/gpu/drm/drm_fbdev_dma.c
+index 728deffcc0d92..e85cdf69cd6c4 100644
+--- a/drivers/gpu/drm/drm_fbdev_dma.c
++++ b/drivers/gpu/drm/drm_fbdev_dma.c
+@@ -218,7 +218,7 @@ static const struct drm_client_funcs drm_fbdev_dma_client_funcs = {
+  * drm_fbdev_dma_setup() - Setup fbdev emulation for GEM DMA helpers
+  * @dev: DRM device
+  * @preferred_bpp: Preferred bits per pixel for the device.
+- *                 @dev->mode_config.preferred_depth is used if this is zero.
++ *                 32 is used if this is zero.
+  *
+  * This function sets up fbdev emulation for GEM DMA drivers that support
+  * dumb buffers with a virtual address and that can be mmap'ed.
+-- 
+2.39.2
+
diff --git a/queue-6.4/drm-i915-don-t-preserve-dpll_hw_state-for-slave-crtc.patch b/queue-6.4/drm-i915-don-t-preserve-dpll_hw_state-for-slave-crtc.patch

new file mode 100644 (file)

index 0000000..1f98566
--- /dev/null
+++ b/queue-6.4/drm-i915-don-t-preserve-dpll_hw_state-for-slave-crtc.patch
@@ -0,0 +1,41 @@
+From c33a440dd21cd1cbd590b5f9cc48a1f556c59732 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 28 Jun 2023 17:10:17 +0300
+Subject: drm/i915: Don't preserve dpll_hw_state for slave crtc in Bigjoiner
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
+
+[ Upstream commit 5c413188c68da0e4bffc93de1c80257e20741e69 ]
+
+If we are using Bigjoiner dpll_hw_state is supposed to be exactly
+same as for master crtc, so no need to save it's state for slave crtc.
+
+Signed-off-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
+Fixes: 0ff0e219d9b8 ("drm/i915: Compute clocks earlier")
+Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20230628141017.18937-1-stanislav.lisovskiy@intel.com
+(cherry picked from commit cbaf758809952c95ec00e796695049babb08bb60)
+Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/i915/display/intel_display.c | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
+index 7749f95d5d02a..a805b57f3d912 100644
+--- a/drivers/gpu/drm/i915/display/intel_display.c
++++ b/drivers/gpu/drm/i915/display/intel_display.c
+@@ -4968,7 +4968,6 @@ copy_bigjoiner_crtc_state_modeset(struct intel_atomic_state *state,
+       saved_state->uapi = slave_crtc_state->uapi;
+       saved_state->scaler_state = slave_crtc_state->scaler_state;
+       saved_state->shared_dpll = slave_crtc_state->shared_dpll;
+-      saved_state->dpll_hw_state = slave_crtc_state->dpll_hw_state;
+       saved_state->crc_enabled = slave_crtc_state->crc_enabled;
+ 
+       intel_crtc_free_hw_state(slave_crtc_state);
+-- 
+2.39.2
+
diff --git a/queue-6.4/drm-i915-fix-one-wrong-caching-mode-enum-usage.patch b/queue-6.4/drm-i915-fix-one-wrong-caching-mode-enum-usage.patch

new file mode 100644 (file)

index 0000000..1a23b03
--- /dev/null
+++ b/queue-6.4/drm-i915-fix-one-wrong-caching-mode-enum-usage.patch
@@ -0,0 +1,45 @@
+From 1b7eae674754c36f9db7a4d8ba556602591973ca Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 7 Jul 2023 13:55:03 +0100
+Subject: drm/i915: Fix one wrong caching mode enum usage
+
+From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
+
+[ Upstream commit 113899c2669dff148b2a5bea4780123811aecc13 ]
+
+Commit a4d86249c773 ("drm/i915/gt: Provide a utility to create a scratch
+buffer") mistakenly passed in uapi I915_CACHING_CACHED as argument to
+i915_gem_object_set_cache_coherency(), which actually takes internal
+enum i915_cache_level.
+
+No functional issue since the value matches I915_CACHE_LLC (1 == 1), which
+is the intended caching mode, but lets clean it up nevertheless.
+
+Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
+Fixes: a4d86249c773 ("drm/i915/gt: Provide a utility to create a scratch buffer")
+Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
+Reviewed-by: Tejas Upadhyay <tejas.upadhyay@intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20230707125503.3965817-1-tvrtko.ursulin@linux.intel.com
+(cherry picked from commit 49c60b2f0867ac36fd54d513882a48431aeccae7)
+Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/i915/gt/intel_gtt.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c
+index 4f436ba7a3c83..123b82f29a1bf 100644
+--- a/drivers/gpu/drm/i915/gt/intel_gtt.c
++++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
+@@ -625,7 +625,7 @@ __vm_create_scratch_for_read(struct i915_address_space *vm, unsigned long size)
+       if (IS_ERR(obj))
+               return ERR_CAST(obj);
+ 
+-      i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED);
++      i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
+ 
+       vma = i915_vma_instance(obj, vm, NULL);
+       if (IS_ERR(vma)) {
+-- 
+2.39.2
+
diff --git a/queue-6.4/drm-nouveau-acr-abort-loading-acr-if-no-firmware-was.patch b/queue-6.4/drm-nouveau-acr-abort-loading-acr-if-no-firmware-was.patch

new file mode 100644 (file)

index 0000000..8bbcb55
--- /dev/null
+++ b/queue-6.4/drm-nouveau-acr-abort-loading-acr-if-no-firmware-was.patch
@@ -0,0 +1,38 @@
+From 68f7b5d7a40cfd63e381066ace1770379c8a7c51 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 May 2023 22:18:38 +0200
+Subject: drm/nouveau/acr: Abort loading ACR if no firmware was found
+
+From: Karol Herbst <kherbst@redhat.com>
+
+[ Upstream commit 938a06c8b7913455073506c33ae3bff029c3c4ef ]
+
+This fixes a NULL pointer access inside nvkm_acr_oneinit in case necessary
+firmware files couldn't be loaded.
+
+Closes: https://gitlab.freedesktop.org/drm/nouveau/-/issues/212
+Fixes: 4b569ded09fd ("drm/nouveau/acr/ga102: initial support")
+Signed-off-by: Karol Herbst <kherbst@redhat.com>
+Reviewed-by: Dave Airlie <airlied@redhat.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20230522201838.1496622-1-kherbst@redhat.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/nouveau/nvkm/subdev/acr/base.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/acr/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/acr/base.c
+index 795f3a649b122..9b8ca4e898f90 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/subdev/acr/base.c
++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/acr/base.c
+@@ -224,7 +224,7 @@ nvkm_acr_oneinit(struct nvkm_subdev *subdev)
+       u64 falcons;
+       int ret, i;
+ 
+-      if (list_empty(&acr->hsfw)) {
++      if (list_empty(&acr->hsfw) || !acr->func || !acr->func->wpr_layout) {
+               nvkm_debug(subdev, "No HSFW(s)\n");
+               nvkm_acr_cleanup(acr);
+               return 0;
+-- 
+2.39.2
+
diff --git a/queue-6.4/drm-nouveau-bring-back-blit-subchannel-for-pre-nv50-.patch b/queue-6.4/drm-nouveau-bring-back-blit-subchannel-for-pre-nv50-.patch

new file mode 100644 (file)

index 0000000..dbb4829
--- /dev/null
+++ b/queue-6.4/drm-nouveau-bring-back-blit-subchannel-for-pre-nv50-.patch
@@ -0,0 +1,93 @@
+From 6baa2918216778a5c148247d43684e5a2f83e6d1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 26 May 2023 11:10:52 +0200
+Subject: drm/nouveau: bring back blit subchannel for pre nv50 GPUs
+
+From: Karol Herbst <kherbst@redhat.com>
+
+[ Upstream commit 835a65f51790e1f72b1ab106ec89db9ac15b47d6 ]
+
+1ba6113a90a0 removed a lot of the kernel GPU channel, but method 0x128
+was important as otherwise the GPU spams us with `CACHE_ERROR` messages.
+
+We use the blit subchannel inside our vblank handling, so we should keep
+at least this part.
+
+v2: Only do it for NV11+ GPUs
+
+Closes: https://gitlab.freedesktop.org/drm/nouveau/-/issues/201
+Fixes: 4a16dd9d18a0 ("drm/nouveau/kms: switch to drm fbdev helpers")
+Signed-off-by: Karol Herbst <kherbst@redhat.com>
+Reviewed-by: Ben Skeggs <bskeggs@redhat.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20230526091052.2169044-1-kherbst@redhat.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/nouveau/nouveau_chan.c |  1 +
+ drivers/gpu/drm/nouveau/nouveau_chan.h |  1 +
+ drivers/gpu/drm/nouveau/nouveau_drm.c  | 20 +++++++++++++++++---
+ 3 files changed, 19 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c b/drivers/gpu/drm/nouveau/nouveau_chan.c
+index e648ecd0c1a03..3dfbc374478e6 100644
+--- a/drivers/gpu/drm/nouveau/nouveau_chan.c
++++ b/drivers/gpu/drm/nouveau/nouveau_chan.c
+@@ -90,6 +90,7 @@ nouveau_channel_del(struct nouveau_channel **pchan)
+               if (cli)
+                       nouveau_svmm_part(chan->vmm->svmm, chan->inst);
+ 
++              nvif_object_dtor(&chan->blit);
+               nvif_object_dtor(&chan->nvsw);
+               nvif_object_dtor(&chan->gart);
+               nvif_object_dtor(&chan->vram);
+diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.h b/drivers/gpu/drm/nouveau/nouveau_chan.h
+index e06a8ffed31a8..bad7466bd0d59 100644
+--- a/drivers/gpu/drm/nouveau/nouveau_chan.h
++++ b/drivers/gpu/drm/nouveau/nouveau_chan.h
+@@ -53,6 +53,7 @@ struct nouveau_channel {
+       u32 user_put;
+ 
+       struct nvif_object user;
++      struct nvif_object blit;
+ 
+       struct nvif_event kill;
+       atomic_t killed;
+diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
+index 7aac9384600ed..40fb9a8349180 100644
+--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
++++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
+@@ -375,15 +375,29 @@ nouveau_accel_gr_init(struct nouveau_drm *drm)
+               ret = nvif_object_ctor(&drm->channel->user, "drmNvsw",
+                                      NVDRM_NVSW, nouveau_abi16_swclass(drm),
+                                      NULL, 0, &drm->channel->nvsw);
++
++              if (ret == 0 && device->info.chipset >= 0x11) {
++                      ret = nvif_object_ctor(&drm->channel->user, "drmBlit",
++                                             0x005f, 0x009f,
++                                             NULL, 0, &drm->channel->blit);
++              }
++
+               if (ret == 0) {
+                       struct nvif_push *push = drm->channel->chan.push;
+-                      ret = PUSH_WAIT(push, 2);
+-                      if (ret == 0)
++                      ret = PUSH_WAIT(push, 8);
++                      if (ret == 0) {
++                              if (device->info.chipset >= 0x11) {
++                                      PUSH_NVSQ(push, NV05F, 0x0000, drm->channel->blit.handle);
++                                      PUSH_NVSQ(push, NV09F, 0x0120, 0,
++                                                             0x0124, 1,
++                                                             0x0128, 2);
++                              }
+                               PUSH_NVSQ(push, NV_SW, 0x0000, drm->channel->nvsw.handle);
++                      }
+               }
+ 
+               if (ret) {
+-                      NV_ERROR(drm, "failed to allocate sw class, %d\n", ret);
++                      NV_ERROR(drm, "failed to allocate sw or blit class, %d\n", ret);
+                       nouveau_accel_gr_fini(drm);
+                       return;
+               }
+-- 
+2.39.2
+
diff --git a/queue-6.4/drm-nouveau-disp-fix-hdmi-on-gt215.patch b/queue-6.4/drm-nouveau-disp-fix-hdmi-on-gt215.patch

new file mode 100644 (file)

index 0000000..5334d54
--- /dev/null
+++ b/queue-6.4/drm-nouveau-disp-fix-hdmi-on-gt215.patch
@@ -0,0 +1,37 @@
+From d92dc097c5c959d8a0645d0fdc7f502852665962 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 28 Jun 2023 23:22:46 +0200
+Subject: drm/nouveau/disp: fix HDMI on gt215+
+
+From: Karol Herbst <kherbst@redhat.com>
+
+[ Upstream commit d94303699921bda8141ad33554ae55b615ddd149 ]
+
+Cc: Ben Skeggs <bskeggs@redhat.com>
+Cc: Lyude Paul <lyude@redhat.com>
+Fixes: f530bc60a30b ("drm/nouveau/disp: move HDMI config into acquire + infoframe methods")
+Signed-off-by: Karol Herbst <kherbst@redhat.com>
+Reviewed-by: Ben Skeggs <bskeggs@redhat.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20230628212248.3798605-1-kherbst@redhat.com
+Signed-off-by: Karol Herbst <kherbst@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/nouveau/nvkm/engine/disp/gt215.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gt215.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gt215.c
+index a2c7c6f83dcdb..506ffbe7b8421 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gt215.c
++++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gt215.c
+@@ -125,7 +125,7 @@ gt215_sor_hdmi_infoframe_avi(struct nvkm_ior *ior, int head, void *data, u32 siz
+       pack_hdmi_infoframe(&avi, data, size);
+ 
+       nvkm_mask(device, 0x61c520 + soff, 0x00000001, 0x00000000);
+-      if (size)
++      if (!size)
+               return;
+ 
+       nvkm_wr32(device, 0x61c528 + soff, avi.header);
+-- 
+2.39.2
+
diff --git a/queue-6.4/drm-nouveau-disp-g94-enable-hdmi.patch b/queue-6.4/drm-nouveau-disp-g94-enable-hdmi.patch

new file mode 100644 (file)

index 0000000..b9fd97a
--- /dev/null
+++ b/queue-6.4/drm-nouveau-disp-g94-enable-hdmi.patch
@@ -0,0 +1,36 @@
+From c4673406b30a28d2fa8ed21b37962828adf965e2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 30 Jun 2023 18:06:45 +0200
+Subject: drm/nouveau/disp/g94: enable HDMI
+
+From: Karol Herbst <kherbst@redhat.com>
+
+[ Upstream commit c177872cb056e0b499af4717d8d1977017fd53df ]
+
+Cc: Ben Skeggs <bskeggs@redhat.com>
+Cc: Lyude Paul <lyude@redhat.com>
+Fixes: f530bc60a30b ("drm/nouveau/disp: move HDMI config into acquire + infoframe methods")
+Signed-off-by: Karol Herbst <kherbst@redhat.com>
+Reviewed-by: Ben Skeggs <bskeggs@redhat.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20230630160645.3984596-1-kherbst@redhat.com
+Signed-off-by: Karol Herbst <kherbst@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/nouveau/nvkm/engine/disp/g94.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/g94.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/g94.c
+index a4853c4e5ee3a..67ef889a0c5f4 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/g94.c
++++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/g94.c
+@@ -295,6 +295,7 @@ g94_sor = {
+       .clock = nv50_sor_clock,
+       .war_2 = g94_sor_war_2,
+       .war_3 = g94_sor_war_3,
++      .hdmi = &g84_sor_hdmi,
+       .dp = &g94_sor_dp,
+ };
+ 
+-- 
+2.39.2
+
diff --git a/queue-6.4/drm-panel-simple-add-connector_type-for-innolux_at04.patch b/queue-6.4/drm-panel-simple-add-connector_type-for-innolux_at04.patch

new file mode 100644 (file)

index 0000000..db25f07
--- /dev/null
+++ b/queue-6.4/drm-panel-simple-add-connector_type-for-innolux_at04.patch
@@ -0,0 +1,39 @@
+From 6ca5c4bfdbae1b72a46946af102d9677577fcd6d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 20 Jun 2023 08:22:02 -0300
+Subject: drm/panel: simple: Add connector_type for innolux_at043tn24
+
+From: Fabio Estevam <festevam@denx.de>
+
+[ Upstream commit 2c56a751845ddfd3078ebe79981aaaa182629163 ]
+
+The innolux at043tn24 display is a parallel LCD. Pass the 'connector_type'
+information to avoid the following warning:
+
+panel-simple panel: Specify missing connector_type
+
+Signed-off-by: Fabio Estevam <festevam@denx.de>
+Fixes: 41bcceb4de9c ("drm/panel: simple: Add support for Innolux AT043TN24")
+Reviewed-by: Sam Ravnborg <sam@ravnborg.org>
+Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
+Link: https://patchwork.freedesktop.org/patch/msgid/20230620112202.654981-1-festevam@gmail.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/panel/panel-simple.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c
+index d8efbcee9bc12..1927fef9aed67 100644
+--- a/drivers/gpu/drm/panel/panel-simple.c
++++ b/drivers/gpu/drm/panel/panel-simple.c
+@@ -2117,6 +2117,7 @@ static const struct panel_desc innolux_at043tn24 = {
+               .height = 54,
+       },
+       .bus_format = MEDIA_BUS_FMT_RGB888_1X24,
++      .connector_type = DRM_MODE_CONNECTOR_DPI,
+       .bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_DRIVE_POSEDGE,
+ };
+ 
+-- 
+2.39.2
+
diff --git a/queue-6.4/drm-panel-simple-add-powertip-ph800480t013-drm_displ.patch b/queue-6.4/drm-panel-simple-add-powertip-ph800480t013-drm_displ.patch

new file mode 100644 (file)

index 0000000..99cfa65
--- /dev/null
+++ b/queue-6.4/drm-panel-simple-add-powertip-ph800480t013-drm_displ.patch
@@ -0,0 +1,38 @@
+From e0bf3f1aa9d658c8219ebdd882c6aacb1bdf32ba Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 15 Jun 2023 22:16:02 +0200
+Subject: drm/panel: simple: Add Powertip PH800480T013 drm_display_mode flags
+
+From: Marek Vasut <marex@denx.de>
+
+[ Upstream commit 1c519980aced3da1fae37c1339cf43b24eccdee7 ]
+
+Add missing drm_display_mode DRM_MODE_FLAG_NVSYNC | DRM_MODE_FLAG_NHSYNC
+flags. Those are used by various bridges in the pipeline to correctly
+configure its sync signals polarity.
+
+Fixes: d69de69f2be1 ("drm/panel: simple: Add Powertip PH800480T013 panel")
+Signed-off-by: Marek Vasut <marex@denx.de>
+Reviewed-by: Sam Ravnborg <sam@ravnborg.org>
+Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
+Link: https://patchwork.freedesktop.org/patch/msgid/20230615201602.565948-1-marex@denx.de
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/panel/panel-simple.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c
+index 1927fef9aed67..e02249b212c2a 100644
+--- a/drivers/gpu/drm/panel/panel-simple.c
++++ b/drivers/gpu/drm/panel/panel-simple.c
+@@ -3110,6 +3110,7 @@ static const struct drm_display_mode powertip_ph800480t013_idf02_mode = {
+       .vsync_start = 480 + 49,
+       .vsync_end = 480 + 49 + 2,
+       .vtotal = 480 + 49 + 2 + 22,
++      .flags = DRM_MODE_FLAG_NVSYNC | DRM_MODE_FLAG_NHSYNC,
+ };
+ 
+ static const struct panel_desc powertip_ph800480t013_idf02  = {
+-- 
+2.39.2
+
diff --git a/queue-6.4/erofs-avoid-infinite-loop-in-z_erofs_do_read_page-wh.patch b/queue-6.4/erofs-avoid-infinite-loop-in-z_erofs_do_read_page-wh.patch

new file mode 100644 (file)

index 0000000..c6cf620
--- /dev/null
+++ b/queue-6.4/erofs-avoid-infinite-loop-in-z_erofs_do_read_page-wh.patch
@@ -0,0 +1,54 @@
+From 179c93cb4147df032e78c950833127e4fe6344e5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 10 Jul 2023 17:34:10 +0800
+Subject: erofs: avoid infinite loop in z_erofs_do_read_page() when reading
+ beyond EOF
+
+From: Chunhai Guo <guochunhai@vivo.com>
+
+[ Upstream commit 8191213a5835b0317c5e4d0d337ae1ae00c75253 ]
+
+z_erofs_do_read_page() may loop infinitely due to the inappropriate
+truncation in the below statement. Since the offset is 64 bits and min_t()
+truncates the result to 32 bits. The solution is to replace unsigned int
+with a 64-bit type, such as erofs_off_t.
+    cur = end - min_t(unsigned int, offset + end - map->m_la, end);
+
+    - For example:
+        - offset = 0x400160000
+        - end = 0x370
+        - map->m_la = 0x160370
+        - offset + end - map->m_la = 0x400000000
+        - offset + end - map->m_la = 0x00000000 (truncated as unsigned int)
+    - Expected result:
+        - cur = 0
+    - Actual result:
+        - cur = 0x370
+
+Signed-off-by: Chunhai Guo <guochunhai@vivo.com>
+Fixes: 3883a79abd02 ("staging: erofs: introduce VLE decompression support")
+Reviewed-by: Gao Xiang <hsiangkao@linux.alibaba.com>
+Reviewed-by: Chao Yu <chao@kernel.org>
+Link: https://lore.kernel.org/r/20230710093410.44071-1-guochunhai@vivo.com
+Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/erofs/zdata.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
+index bedfff5d45faf..997ca4b32e87f 100644
+--- a/fs/erofs/zdata.c
++++ b/fs/erofs/zdata.c
+@@ -990,7 +990,7 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
+        */
+       tight &= (fe->mode > Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE);
+ 
+-      cur = end - min_t(unsigned int, offset + end - map->m_la, end);
++      cur = end - min_t(erofs_off_t, offset + end - map->m_la, end);
+       if (!(map->m_flags & EROFS_MAP_MAPPED)) {
+               zero_user_segment(page, cur, end);
+               goto next_part;
+-- 
+2.39.2
+
diff --git a/queue-6.4/erofs-avoid-useless-loops-in-z_erofs_pcluster_readmo.patch b/queue-6.4/erofs-avoid-useless-loops-in-z_erofs_pcluster_readmo.patch

new file mode 100644 (file)

index 0000000..0746f68
--- /dev/null
+++ b/queue-6.4/erofs-avoid-useless-loops-in-z_erofs_pcluster_readmo.patch
@@ -0,0 +1,46 @@
+From 201f9e4e49bcb83a199bb05c153545ff504140bf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 10 Jul 2023 12:25:31 +0800
+Subject: erofs: avoid useless loops in z_erofs_pcluster_readmore() when
+ reading beyond EOF
+
+From: Chunhai Guo <guochunhai@vivo.com>
+
+[ Upstream commit 936aa701d82d397c2d1afcd18ce2c739471d978d ]
+
+z_erofs_pcluster_readmore() may take a long time to loop when the page
+offset is large enough, which is unnecessary should be prevented.
+
+For example, when the following case is encountered, it will loop 4691368
+times, taking about 27 seconds:
+    - offset = 19217289215
+    - inode_size = 1442672
+
+Signed-off-by: Chunhai Guo <guochunhai@vivo.com>
+Fixes: 386292919c25 ("erofs: introduce readmore decompression strategy")
+Reviewed-by: Gao Xiang <hsiangkao@linux.alibaba.com>
+Reviewed-by: Yue Hu <huyue2@coolpad.com>
+Reviewed-by: Chao Yu <chao@kernel.org>
+Link: https://lore.kernel.org/r/20230710042531.28761-1-guochunhai@vivo.com
+Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/erofs/zdata.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
+index 502893e3da010..bedfff5d45faf 100644
+--- a/fs/erofs/zdata.c
++++ b/fs/erofs/zdata.c
+@@ -1807,7 +1807,7 @@ static void z_erofs_pcluster_readmore(struct z_erofs_decompress_frontend *f,
+       }
+ 
+       cur = map->m_la + map->m_llen - 1;
+-      while (cur >= end) {
++      while ((cur >= end) && (cur < i_size_read(inode))) {
+               pgoff_t index = cur >> PAGE_SHIFT;
+               struct page *page;
+ 
+-- 
+2.39.2
+
diff --git a/queue-6.4/erofs-fix-fsdax-unavailability-for-chunk-based-regul.patch b/queue-6.4/erofs-fix-fsdax-unavailability-for-chunk-based-regul.patch

new file mode 100644 (file)

index 0000000..9ac8285
--- /dev/null
+++ b/queue-6.4/erofs-fix-fsdax-unavailability-for-chunk-based-regul.patch
@@ -0,0 +1,42 @@
+From 625c8bd6b37ff491ca8ee641313b1693e0a5d5f2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 11 Jul 2023 14:21:30 +0800
+Subject: erofs: fix fsdax unavailability for chunk-based regular files
+
+From: Xin Yin <yinxin.x@bytedance.com>
+
+[ Upstream commit 18bddc5b67038722cb88fcf51fbf41a0277092cb ]
+
+DAX can be used to share page cache between VMs, reducing guest memory
+overhead. And chunk based data format is widely used for VM and
+container image. So enable dax support for it, make erofs better used
+for VM scenarios.
+
+Fixes: c5aa903a59db ("erofs: support reading chunk-based uncompressed files")
+Signed-off-by: Xin Yin <yinxin.x@bytedance.com>
+Reviewed-by: Gao Xiang <hsiangkao@linux.alibaba.com>
+Reviewed-by: Chao Yu <chao@kernel.org>
+Link: https://lore.kernel.org/r/20230711062130.7860-1-yinxin.x@bytedance.com
+Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/erofs/inode.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c
+index d70b12b81507f..e12592727a546 100644
+--- a/fs/erofs/inode.c
++++ b/fs/erofs/inode.c
+@@ -183,7 +183,8 @@ static void *erofs_read_inode(struct erofs_buf *buf,
+ 
+       inode->i_flags &= ~S_DAX;
+       if (test_opt(&sbi->opt, DAX_ALWAYS) && S_ISREG(inode->i_mode) &&
+-          vi->datalayout == EROFS_INODE_FLAT_PLAIN)
++          (vi->datalayout == EROFS_INODE_FLAT_PLAIN ||
++           vi->datalayout == EROFS_INODE_CHUNK_BASED))
+               inode->i_flags |= S_DAX;
+ 
+       if (!nblks)
+-- 
+2.39.2
+
diff --git a/queue-6.4/fprobe-add-unlock-to-match-a-succeeded-ftrace_test_r.patch b/queue-6.4/fprobe-add-unlock-to-match-a-succeeded-ftrace_test_r.patch

new file mode 100644 (file)

index 0000000..bda7ba8
--- /dev/null
+++ b/queue-6.4/fprobe-add-unlock-to-match-a-succeeded-ftrace_test_r.patch
@@ -0,0 +1,50 @@
+From 9ea6b366f8d472e6814dbc9aad4e5bfb1d00ac39 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 3 Jul 2023 17:23:36 +0800
+Subject: fprobe: add unlock to match a succeeded ftrace_test_recursion_trylock
+
+From: Ze Gao <zegao2021@gmail.com>
+
+[ Upstream commit 5f0c584daf7464f04114c65dd07269ee2bfedc13 ]
+
+Unlock ftrace recursion lock when fprobe_kprobe_handler() is failed
+because of some running kprobe.
+
+Link: https://lore.kernel.org/all/20230703092336.268371-1-zegao@tencent.com/
+
+Fixes: 3cc4e2c5fbae ("fprobe: make fprobe_kprobe_handler recursion free")
+Reported-by: Yafang <laoar.shao@gmail.com>
+Closes: https://lore.kernel.org/linux-trace-kernel/CALOAHbC6UpfFOOibdDiC7xFc5YFUgZnk3MZ=3Ny6we=AcrNbew@mail.gmail.com/
+Signed-off-by: Ze Gao <zegao@tencent.com>
+Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Acked-by: Yafang Shao <laoar.shao@gmail.com>
+Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/trace/fprobe.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
+index 18d36842faf57..93b3e361bb97a 100644
+--- a/kernel/trace/fprobe.c
++++ b/kernel/trace/fprobe.c
+@@ -102,12 +102,14 @@ static void fprobe_kprobe_handler(unsigned long ip, unsigned long parent_ip,
+ 
+       if (unlikely(kprobe_running())) {
+               fp->nmissed++;
+-              return;
++              goto recursion_unlock;
+       }
+ 
+       kprobe_busy_begin();
+       __fprobe_handler(ip, parent_ip, ops, fregs);
+       kprobe_busy_end();
++
++recursion_unlock:
+       ftrace_test_recursion_unlock(bit);
+ }
+ 
+-- 
+2.39.2
+
diff --git a/queue-6.4/gve-set-default-duplex-configuration-to-full.patch b/queue-6.4/gve-set-default-duplex-configuration-to-full.patch

new file mode 100644 (file)

index 0000000..3517d13
--- /dev/null
+++ b/queue-6.4/gve-set-default-duplex-configuration-to-full.patch
@@ -0,0 +1,43 @@
+From 756748806f6ce7d1f3efe286826e4813f7725296 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 6 Jul 2023 12:41:28 +0800
+Subject: gve: Set default duplex configuration to full
+
+From: Junfeng Guo <junfeng.guo@intel.com>
+
+[ Upstream commit 0503efeadbf6bb8bf24397613a73b67e665eac5f ]
+
+Current duplex mode was unset in the driver, resulting in the default
+parameter being set to 0, which corresponds to half duplex. It might
+mislead users to have incorrect expectation about the driver's
+transmission capabilities.
+Set the default duplex configuration to full, as the driver runs in
+full duplex mode at this point.
+
+Fixes: 7e074d5a76ca ("gve: Enable Link Speed Reporting in the driver.")
+Signed-off-by: Junfeng Guo <junfeng.guo@intel.com>
+Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
+Message-ID: <20230706044128.2726747-1-junfeng.guo@intel.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/google/gve/gve_ethtool.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c
+index cfd4b8d284d12..50162ec9424df 100644
+--- a/drivers/net/ethernet/google/gve/gve_ethtool.c
++++ b/drivers/net/ethernet/google/gve/gve_ethtool.c
+@@ -590,6 +590,9 @@ static int gve_get_link_ksettings(struct net_device *netdev,
+               err = gve_adminq_report_link_speed(priv);
+ 
+       cmd->base.speed = priv->link_speed;
++
++      cmd->base.duplex = DUPLEX_FULL;
++
+       return err;
+ }
+ 
+-- 
+2.39.2
+
diff --git a/queue-6.4/hid-hyperv-avoid-struct-memcpy-overrun-warning.patch b/queue-6.4/hid-hyperv-avoid-struct-memcpy-overrun-warning.patch

new file mode 100644 (file)

index 0000000..48fb7d1
--- /dev/null
+++ b/queue-6.4/hid-hyperv-avoid-struct-memcpy-overrun-warning.patch
@@ -0,0 +1,65 @@
+From b4bc4dd1ce538c2050276f4c029b3d91ea87604f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 5 Jul 2023 16:02:24 +0200
+Subject: HID: hyperv: avoid struct memcpy overrun warning
+
+From: Arnd Bergmann <arnd@arndb.de>
+
+[ Upstream commit 5f151364b1da6bd217632fd4ee8cc24eaf66a497 ]
+
+A previous patch addressed the fortified memcpy warning for most
+builds, but I still see this one with gcc-9:
+
+In file included from include/linux/string.h:254,
+                 from drivers/hid/hid-hyperv.c:8:
+In function 'fortify_memcpy_chk',
+    inlined from 'mousevsc_on_receive' at drivers/hid/hid-hyperv.c:272:3:
+include/linux/fortify-string.h:583:4: error: call to '__write_overflow_field' declared with attribute warning: detected write beyond size of field (1st parameter); maybe use struct_group()? [-Werror=attribute-warning]
+  583 |    __write_overflow_field(p_size_field, size);
+      |    ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+My guess is that the WARN_ON() itself is what confuses gcc, so it no
+longer sees that there is a correct range check. Rework the code in a
+way that helps readability and avoids the warning.
+
+Fixes: 542f25a94471 ("HID: hyperv: Replace one-element array with flexible-array member")
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Reviewed-by: Michael Kelley <mikelley@microsoft.com>
+Link: https://lore.kernel.org/r/20230705140242.844167-1-arnd@kernel.org
+Signed-off-by: Benjamin Tissoires <bentiss@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/hid/hid-hyperv.c | 10 ++++------
+ 1 file changed, 4 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/hid/hid-hyperv.c b/drivers/hid/hid-hyperv.c
+index 49d4a26895e76..f33485d83d24f 100644
+--- a/drivers/hid/hid-hyperv.c
++++ b/drivers/hid/hid-hyperv.c
+@@ -258,19 +258,17 @@ static void mousevsc_on_receive(struct hv_device *device,
+ 
+       switch (hid_msg_hdr->type) {
+       case SYNTH_HID_PROTOCOL_RESPONSE:
++              len = struct_size(pipe_msg, data, pipe_msg->size);
++
+               /*
+                * While it will be impossible for us to protect against
+                * malicious/buggy hypervisor/host, add a check here to
+                * ensure we don't corrupt memory.
+                */
+-              if (struct_size(pipe_msg, data, pipe_msg->size)
+-                      > sizeof(struct mousevsc_prt_msg)) {
+-                      WARN_ON(1);
++              if (WARN_ON(len > sizeof(struct mousevsc_prt_msg)))
+                       break;
+-              }
+ 
+-              memcpy(&input_dev->protocol_resp, pipe_msg,
+-                              struct_size(pipe_msg, data, pipe_msg->size));
++              memcpy(&input_dev->protocol_resp, pipe_msg, len);
+               complete(&input_dev->wait_event);
+               break;
+ 
+-- 
+2.39.2
+
diff --git a/queue-6.4/ice-fix-max_rate-check-while-configuring-tx-rate-lim.patch b/queue-6.4/ice-fix-max_rate-check-while-configuring-tx-rate-lim.patch

new file mode 100644 (file)

index 0000000..5b57d1a
--- /dev/null
+++ b/queue-6.4/ice-fix-max_rate-check-while-configuring-tx-rate-lim.patch
@@ -0,0 +1,86 @@
+From c426f5ca59f9bd7765ca00379db289f3b8213478 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 9 Jun 2023 17:40:23 -0700
+Subject: ice: Fix max_rate check while configuring TX rate limits
+
+From: Sridhar Samudrala <sridhar.samudrala@intel.com>
+
+[ Upstream commit 5f16da6ee6ac32e6c8098bc4cfcc4f170694f9da ]
+
+Remove incorrect check in ice_validate_mqprio_opt() that limits
+filter configuration when sum of max_rates of all TCs exceeds
+the link speed. The max rate of each TC is unrelated to value
+used by other TCs and is valid as long as it is less than link
+speed.
+
+Fixes: fbc7b27af0f9 ("ice: enable ndo_setup_tc support for mqprio_qdisc")
+Signed-off-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
+Signed-off-by: Sudheer Mogilappagari <sudheer.mogilappagari@intel.com>
+Tested-by: Bharathi Sreenivas <bharathi.sreenivas@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/ice/ice_main.c | 16 ++++++++--------
+ 1 file changed, 8 insertions(+), 8 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
+index fcc027c938fda..eef7c1224887a 100644
+--- a/drivers/net/ethernet/intel/ice/ice_main.c
++++ b/drivers/net/ethernet/intel/ice/ice_main.c
+@@ -8114,10 +8114,10 @@ static int
+ ice_validate_mqprio_qopt(struct ice_vsi *vsi,
+                        struct tc_mqprio_qopt_offload *mqprio_qopt)
+ {
+-      u64 sum_max_rate = 0, sum_min_rate = 0;
+       int non_power_of_2_qcount = 0;
+       struct ice_pf *pf = vsi->back;
+       int max_rss_q_cnt = 0;
++      u64 sum_min_rate = 0;
+       struct device *dev;
+       int i, speed;
+       u8 num_tc;
+@@ -8133,6 +8133,7 @@ ice_validate_mqprio_qopt(struct ice_vsi *vsi,
+       dev = ice_pf_to_dev(pf);
+       vsi->ch_rss_size = 0;
+       num_tc = mqprio_qopt->qopt.num_tc;
++      speed = ice_get_link_speed_kbps(vsi);
+ 
+       for (i = 0; num_tc; i++) {
+               int qcount = mqprio_qopt->qopt.count[i];
+@@ -8173,7 +8174,6 @@ ice_validate_mqprio_qopt(struct ice_vsi *vsi,
+                */
+               max_rate = mqprio_qopt->max_rate[i];
+               max_rate = div_u64(max_rate, ICE_BW_KBPS_DIVISOR);
+-              sum_max_rate += max_rate;
+ 
+               /* min_rate is minimum guaranteed rate and it can't be zero */
+               min_rate = mqprio_qopt->min_rate[i];
+@@ -8186,6 +8186,12 @@ ice_validate_mqprio_qopt(struct ice_vsi *vsi,
+                       return -EINVAL;
+               }
+ 
++              if (max_rate && max_rate > speed) {
++                      dev_err(dev, "TC%d: max_rate(%llu Kbps) > link speed of %u Kbps\n",
++                              i, max_rate, speed);
++                      return -EINVAL;
++              }
++
+               iter_div_u64_rem(min_rate, ICE_MIN_BW_LIMIT, &rem);
+               if (rem) {
+                       dev_err(dev, "TC%d: Min Rate not multiple of %u Kbps",
+@@ -8223,12 +8229,6 @@ ice_validate_mqprio_qopt(struct ice_vsi *vsi,
+           (mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i]))
+               return -EINVAL;
+ 
+-      speed = ice_get_link_speed_kbps(vsi);
+-      if (sum_max_rate && sum_max_rate > (u64)speed) {
+-              dev_err(dev, "Invalid max Tx rate(%llu) Kbps > speed(%u) Kbps specified\n",
+-                      sum_max_rate, speed);
+-              return -EINVAL;
+-      }
+       if (sum_min_rate && sum_min_rate > (u64)speed) {
+               dev_err(dev, "Invalid min Tx rate(%llu) Kbps > speed (%u) Kbps specified\n",
+                       sum_min_rate, speed);
+-- 
+2.39.2
+
diff --git a/queue-6.4/ice-fix-tx-queue-rate-limit-when-tcs-are-configured.patch b/queue-6.4/ice-fix-tx-queue-rate-limit-when-tcs-are-configured.patch

new file mode 100644 (file)

index 0000000..96d1346
--- /dev/null
+++ b/queue-6.4/ice-fix-tx-queue-rate-limit-when-tcs-are-configured.patch
@@ -0,0 +1,123 @@
+From 1f0c01f03b4c0b4ebc5b97bf3e0679fc1e9b7c3b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 9 Jun 2023 17:40:24 -0700
+Subject: ice: Fix tx queue rate limit when TCs are configured
+
+From: Sridhar Samudrala <sridhar.samudrala@intel.com>
+
+[ Upstream commit 479cdfe388a04a16fdd127f3e9e9e019e45e5573 ]
+
+Configuring tx_maxrate via sysfs interface
+/sys/class/net/eth0/queues/tx-1/tx_maxrate was not working when
+TCs are configured because always main VSI was being used. Fix by
+using correct VSI in ice_set_tx_maxrate when TCs are configured.
+
+Fixes: 1ddef455f4a8 ("ice: Add NDO callback to set the maximum per-queue bitrate")
+Signed-off-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
+Signed-off-by: Sudheer Mogilappagari <sudheer.mogilappagari@intel.com>
+Tested-by: Bharathi Sreenivas <bharathi.sreenivas@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/ice/ice_main.c   |  7 +++++++
+ drivers/net/ethernet/intel/ice/ice_tc_lib.c | 22 ++++++++++-----------
+ drivers/net/ethernet/intel/ice/ice_tc_lib.h |  1 +
+ 3 files changed, 19 insertions(+), 11 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
+index eef7c1224887a..1277e0a044ee4 100644
+--- a/drivers/net/ethernet/intel/ice/ice_main.c
++++ b/drivers/net/ethernet/intel/ice/ice_main.c
+@@ -5969,6 +5969,13 @@ ice_set_tx_maxrate(struct net_device *netdev, int queue_index, u32 maxrate)
+       q_handle = vsi->tx_rings[queue_index]->q_handle;
+       tc = ice_dcb_get_tc(vsi, queue_index);
+ 
++      vsi = ice_locate_vsi_using_queue(vsi, queue_index);
++      if (!vsi) {
++              netdev_err(netdev, "Invalid VSI for given queue %d\n",
++                         queue_index);
++              return -EINVAL;
++      }
++
+       /* Set BW back to default, when user set maxrate to 0 */
+       if (!maxrate)
+               status = ice_cfg_q_bw_dflt_lmt(vsi->port_info, vsi->idx, tc,
+diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.c b/drivers/net/ethernet/intel/ice/ice_tc_lib.c
+index d1a31f236d26a..8578dc1cb967d 100644
+--- a/drivers/net/ethernet/intel/ice/ice_tc_lib.c
++++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.c
+@@ -735,17 +735,16 @@ ice_eswitch_add_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr)
+ /**
+  * ice_locate_vsi_using_queue - locate VSI using queue (forward to queue action)
+  * @vsi: Pointer to VSI
+- * @tc_fltr: Pointer to tc_flower_filter
++ * @queue: Queue index
+  *
+- * Locate the VSI using specified queue. When ADQ is not enabled, always
+- * return input VSI, otherwise locate corresponding VSI based on per channel
+- * offset and qcount
++ * Locate the VSI using specified "queue". When ADQ is not enabled,
++ * always return input VSI, otherwise locate corresponding
++ * VSI based on per channel "offset" and "qcount"
+  */
+-static struct ice_vsi *
+-ice_locate_vsi_using_queue(struct ice_vsi *vsi,
+-                         struct ice_tc_flower_fltr *tc_fltr)
++struct ice_vsi *
++ice_locate_vsi_using_queue(struct ice_vsi *vsi, int queue)
+ {
+-      int num_tc, tc, queue;
++      int num_tc, tc;
+ 
+       /* if ADQ is not active, passed VSI is the candidate VSI */
+       if (!ice_is_adq_active(vsi->back))
+@@ -755,7 +754,6 @@ ice_locate_vsi_using_queue(struct ice_vsi *vsi,
+        * upon queue number)
+        */
+       num_tc = vsi->mqprio_qopt.qopt.num_tc;
+-      queue = tc_fltr->action.fwd.q.queue;
+ 
+       for (tc = 0; tc < num_tc; tc++) {
+               int qcount = vsi->mqprio_qopt.qopt.count[tc];
+@@ -797,6 +795,7 @@ ice_tc_forward_action(struct ice_vsi *vsi, struct ice_tc_flower_fltr *tc_fltr)
+       struct ice_pf *pf = vsi->back;
+       struct device *dev;
+       u32 tc_class;
++      int q;
+ 
+       dev = ice_pf_to_dev(pf);
+ 
+@@ -825,7 +824,8 @@ ice_tc_forward_action(struct ice_vsi *vsi, struct ice_tc_flower_fltr *tc_fltr)
+               /* Determine destination VSI even though the action is
+                * FWD_TO_QUEUE, because QUEUE is associated with VSI
+                */
+-              dest_vsi = tc_fltr->dest_vsi;
++              q = tc_fltr->action.fwd.q.queue;
++              dest_vsi = ice_locate_vsi_using_queue(vsi, q);
+               break;
+       default:
+               dev_err(dev,
+@@ -1702,7 +1702,7 @@ ice_tc_forward_to_queue(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr,
+       /* If ADQ is configured, and the queue belongs to ADQ VSI, then prepare
+        * ADQ switch filter
+        */
+-      ch_vsi = ice_locate_vsi_using_queue(vsi, fltr);
++      ch_vsi = ice_locate_vsi_using_queue(vsi, fltr->action.fwd.q.queue);
+       if (!ch_vsi)
+               return -EINVAL;
+       fltr->dest_vsi = ch_vsi;
+diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.h b/drivers/net/ethernet/intel/ice/ice_tc_lib.h
+index 8d5e22ac7023c..189c73d885356 100644
+--- a/drivers/net/ethernet/intel/ice/ice_tc_lib.h
++++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.h
+@@ -203,6 +203,7 @@ static inline int ice_chnl_dmac_fltr_cnt(struct ice_pf *pf)
+       return pf->num_dmac_chnl_fltrs;
+ }
+ 
++struct ice_vsi *ice_locate_vsi_using_queue(struct ice_vsi *vsi, int queue);
+ int
+ ice_add_cls_flower(struct net_device *netdev, struct ice_vsi *vsi,
+                  struct flow_cls_offload *cls_flower);
+-- 
+2.39.2
+
diff --git a/queue-6.4/icmp6-fix-null-ptr-deref-of-ip6_null_entry-rt6i_idev.patch b/queue-6.4/icmp6-fix-null-ptr-deref-of-ip6_null_entry-rt6i_idev.patch

new file mode 100644 (file)

index 0000000..ca6c596
--- /dev/null
+++ b/queue-6.4/icmp6-fix-null-ptr-deref-of-ip6_null_entry-rt6i_idev.patch
@@ -0,0 +1,145 @@
+From a22ee92cac3b600e237a63c814daa71e2083eb91 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 7 Jul 2023 18:43:27 -0700
+Subject: icmp6: Fix null-ptr-deref of ip6_null_entry->rt6i_idev in
+ icmp6_dev().
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 2aaa8a15de73874847d62eb595c6683bface80fd ]
+
+With some IPv6 Ext Hdr (RPL, SRv6, etc.), we can send a packet that
+has the link-local address as src and dst IP and will be forwarded to
+an external IP in the IPv6 Ext Hdr.
+
+For example, the script below generates a packet whose src IP is the
+link-local address and dst is updated to 11::.
+
+  # for f in $(find /proc/sys/net/ -name *seg6_enabled*); do echo 1 > $f; done
+  # python3
+  >>> from socket import *
+  >>> from scapy.all import *
+  >>>
+  >>> SRC_ADDR = DST_ADDR = "fe80::5054:ff:fe12:3456"
+  >>>
+  >>> pkt = IPv6(src=SRC_ADDR, dst=DST_ADDR)
+  >>> pkt /= IPv6ExtHdrSegmentRouting(type=4, addresses=["11::", "22::"], segleft=1)
+  >>>
+  >>> sk = socket(AF_INET6, SOCK_RAW, IPPROTO_RAW)
+  >>> sk.sendto(bytes(pkt), (DST_ADDR, 0))
+
+For such a packet, we call ip6_route_input() to look up a route for the
+next destination in these three functions depending on the header type.
+
+  * ipv6_rthdr_rcv()
+  * ipv6_rpl_srh_rcv()
+  * ipv6_srh_rcv()
+
+If no route is found, ip6_null_entry is set to skb, and the following
+dst_input(skb) calls ip6_pkt_drop().
+
+Finally, in icmp6_dev(), we dereference skb_rt6_info(skb)->rt6i_idev->dev
+as the input device is the loopback interface.  Then, we have to check if
+skb_rt6_info(skb)->rt6i_idev is NULL or not to avoid NULL pointer deref
+for ip6_null_entry.
+
+BUG: kernel NULL pointer dereference, address: 0000000000000000
+ PF: supervisor read access in kernel mode
+ PF: error_code(0x0000) - not-present page
+PGD 0 P4D 0
+Oops: 0000 [#1] PREEMPT SMP PTI
+CPU: 0 PID: 157 Comm: python3 Not tainted 6.4.0-11996-gb121d614371c #35
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014
+RIP: 0010:icmp6_send (net/ipv6/icmp.c:436 net/ipv6/icmp.c:503)
+Code: fe ff ff 48 c7 40 30 c0 86 5d 83 e8 c6 44 1c 00 e9 c8 fc ff ff 49 8b 46 58 48 83 e0 fe 0f 84 4a fb ff ff 48 8b 80 d0 00 00 00 <48> 8b 00 44 8b 88 e0 00 00 00 e9 34 fb ff ff 4d 85 ed 0f 85 69 01
+RSP: 0018:ffffc90000003c70 EFLAGS: 00000286
+RAX: 0000000000000000 RBX: 0000000000000001 RCX: 00000000000000e0
+RDX: 0000000000000021 RSI: 0000000000000000 RDI: ffff888006d72a18
+RBP: ffffc90000003d80 R08: 0000000000000000 R09: 0000000000000001
+R10: ffffc90000003d98 R11: 0000000000000040 R12: ffff888006d72a10
+R13: 0000000000000000 R14: ffff8880057fb800 R15: ffffffff835d86c0
+FS:  00007f9dc72ee740(0000) GS:ffff88807dc00000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 0000000000000000 CR3: 00000000057b2000 CR4: 00000000007506f0
+PKRU: 55555554
+Call Trace:
+ <IRQ>
+ ip6_pkt_drop (net/ipv6/route.c:4513)
+ ipv6_rthdr_rcv (net/ipv6/exthdrs.c:640 net/ipv6/exthdrs.c:686)
+ ip6_protocol_deliver_rcu (net/ipv6/ip6_input.c:437 (discriminator 5))
+ ip6_input_finish (./include/linux/rcupdate.h:781 net/ipv6/ip6_input.c:483)
+ __netif_receive_skb_one_core (net/core/dev.c:5455)
+ process_backlog (./include/linux/rcupdate.h:781 net/core/dev.c:5895)
+ __napi_poll (net/core/dev.c:6460)
+ net_rx_action (net/core/dev.c:6529 net/core/dev.c:6660)
+ __do_softirq (./arch/x86/include/asm/jump_label.h:27 ./include/linux/jump_label.h:207 ./include/trace/events/irq.h:142 kernel/softirq.c:554)
+ do_softirq (kernel/softirq.c:454 kernel/softirq.c:441)
+ </IRQ>
+ <TASK>
+ __local_bh_enable_ip (kernel/softirq.c:381)
+ __dev_queue_xmit (net/core/dev.c:4231)
+ ip6_finish_output2 (./include/net/neighbour.h:544 net/ipv6/ip6_output.c:135)
+ rawv6_sendmsg (./include/net/dst.h:458 ./include/linux/netfilter.h:303 net/ipv6/raw.c:656 net/ipv6/raw.c:914)
+ sock_sendmsg (net/socket.c:725 net/socket.c:748)
+ __sys_sendto (net/socket.c:2134)
+ __x64_sys_sendto (net/socket.c:2146 net/socket.c:2142 net/socket.c:2142)
+ do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80)
+ entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:120)
+RIP: 0033:0x7f9dc751baea
+Code: d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 f3 0f 1e fa 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 15 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 7e c3 0f 1f 44 00 00 41 54 48 83 ec 30 44 89
+RSP: 002b:00007ffe98712c38 EFLAGS: 00000246 ORIG_RAX: 000000000000002c
+RAX: ffffffffffffffda RBX: 00007ffe98712cf8 RCX: 00007f9dc751baea
+RDX: 0000000000000060 RSI: 00007f9dc6460b90 RDI: 0000000000000003
+RBP: 00007f9dc56e8be0 R08: 00007ffe98712d70 R09: 000000000000001c
+R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
+R13: ffffffffc4653600 R14: 0000000000000001 R15: 00007f9dc6af5d1b
+ </TASK>
+Modules linked in:
+CR2: 0000000000000000
+ ---[ end trace 0000000000000000 ]---
+RIP: 0010:icmp6_send (net/ipv6/icmp.c:436 net/ipv6/icmp.c:503)
+Code: fe ff ff 48 c7 40 30 c0 86 5d 83 e8 c6 44 1c 00 e9 c8 fc ff ff 49 8b 46 58 48 83 e0 fe 0f 84 4a fb ff ff 48 8b 80 d0 00 00 00 <48> 8b 00 44 8b 88 e0 00 00 00 e9 34 fb ff ff 4d 85 ed 0f 85 69 01
+RSP: 0018:ffffc90000003c70 EFLAGS: 00000286
+RAX: 0000000000000000 RBX: 0000000000000001 RCX: 00000000000000e0
+RDX: 0000000000000021 RSI: 0000000000000000 RDI: ffff888006d72a18
+RBP: ffffc90000003d80 R08: 0000000000000000 R09: 0000000000000001
+R10: ffffc90000003d98 R11: 0000000000000040 R12: ffff888006d72a10
+R13: 0000000000000000 R14: ffff8880057fb800 R15: ffffffff835d86c0
+FS:  00007f9dc72ee740(0000) GS:ffff88807dc00000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 0000000000000000 CR3: 00000000057b2000 CR4: 00000000007506f0
+PKRU: 55555554
+Kernel panic - not syncing: Fatal exception in interrupt
+Kernel Offset: disabled
+
+Fixes: 4832c30d5458 ("net: ipv6: put host and anycast routes on device with address")
+Reported-by: Wang Yufen <wangyufen@huawei.com>
+Closes: https://lore.kernel.org/netdev/c41403a9-c2f6-3b7e-0c96-e1901e605cd0@huawei.com/
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv6/icmp.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
+index 9edf1f45b1ed6..65fa5014bc85e 100644
+--- a/net/ipv6/icmp.c
++++ b/net/ipv6/icmp.c
+@@ -424,7 +424,10 @@ static struct net_device *icmp6_dev(const struct sk_buff *skb)
+       if (unlikely(dev->ifindex == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) {
+               const struct rt6_info *rt6 = skb_rt6_info(skb);
+ 
+-              if (rt6)
++              /* The destination could be an external IP in Ext Hdr (SRv6, RPL, etc.),
++               * and ip6_null_entry could be set to skb if no route is found.
++               */
++              if (rt6 && rt6->rt6i_idev)
+                       dev = rt6->rt6i_idev->dev;
+       }
+ 
+-- 
+2.39.2
+
diff --git a/queue-6.4/igc-add-condition-for-qbv_config_change_errors-count.patch b/queue-6.4/igc-add-condition-for-qbv_config_change_errors-count.patch

new file mode 100644 (file)

index 0000000..958e18a
--- /dev/null
+++ b/queue-6.4/igc-add-condition-for-qbv_config_change_errors-count.patch
@@ -0,0 +1,66 @@
+From b34edb600747de56af3d2abd046a7ba09ca8a345 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 15 May 2023 14:03:36 +0800
+Subject: igc: Add condition for qbv_config_change_errors counter
+
+From: Muhammad Husaini Zulkifli <muhammad.husaini.zulkifli@intel.com>
+
+[ Upstream commit ed89b74d2dc920cb61d3094e0e97ec8775b13086 ]
+
+Add condition to increase the qbv counter during taprio qbv
+configuration only.
+
+There might be a case when TC already been setup then user configure
+the ETF/CBS qdisc and this counter will increase if no condition above.
+
+Fixes: ae4fe4698300 ("igc: Add qbv_config_change_errors counter")
+Signed-off-by: Muhammad Husaini Zulkifli <muhammad.husaini.zulkifli@intel.com>
+Tested-by: Naama Meir <naamax.meir@linux.intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/igc/igc.h      | 1 +
+ drivers/net/ethernet/intel/igc/igc_main.c | 2 ++
+ drivers/net/ethernet/intel/igc/igc_tsn.c  | 1 +
+ 3 files changed, 4 insertions(+)
+
+diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
+index 9dc9b982a7ea6..9902f726f06a9 100644
+--- a/drivers/net/ethernet/intel/igc/igc.h
++++ b/drivers/net/ethernet/intel/igc/igc.h
+@@ -184,6 +184,7 @@ struct igc_adapter {
+       u32 max_frame_size;
+       u32 min_frame_size;
+ 
++      int tc_setup_type;
+       ktime_t base_time;
+       ktime_t cycle_time;
+       bool qbv_enable;
+diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
+index 5f2e8bcd75973..a8815ccf7887d 100644
+--- a/drivers/net/ethernet/intel/igc/igc_main.c
++++ b/drivers/net/ethernet/intel/igc/igc_main.c
+@@ -6295,6 +6295,8 @@ static int igc_setup_tc(struct net_device *dev, enum tc_setup_type type,
+ {
+       struct igc_adapter *adapter = netdev_priv(dev);
+ 
++      adapter->tc_setup_type = type;
++
+       switch (type) {
+       case TC_QUERY_CAPS:
+               return igc_tc_query_caps(adapter, type_data);
+diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c b/drivers/net/ethernet/intel/igc/igc_tsn.c
+index 94a2b0dfb54d4..6b299b83e7ef2 100644
+--- a/drivers/net/ethernet/intel/igc/igc_tsn.c
++++ b/drivers/net/ethernet/intel/igc/igc_tsn.c
+@@ -249,6 +249,7 @@ static int igc_tsn_enable_offload(struct igc_adapter *adapter)
+                * Gate Control List (GCL) is running.
+                */
+               if ((rd32(IGC_BASET_H) || rd32(IGC_BASET_L)) &&
++                  (adapter->tc_setup_type == TC_SETUP_QDISC_TAPRIO) &&
+                   tsn_mode_reconfig)
+                       adapter->qbv_config_change_errors++;
+       } else {
+-- 
+2.39.2
+
diff --git a/queue-6.4/igc-add-igc_xdp_buff-wrapper-for-xdp_buff-in-driver.patch b/queue-6.4/igc-add-igc_xdp_buff-wrapper-for-xdp_buff-in-driver.patch

new file mode 100644 (file)

index 0000000..639a98e
--- /dev/null
+++ b/queue-6.4/igc-add-igc_xdp_buff-wrapper-for-xdp_buff-in-driver.patch
@@ -0,0 +1,104 @@
+From 8c665525c75977297efb957a7a7e51b89b5ca546 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 18 Apr 2023 15:30:47 +0200
+Subject: igc: Add igc_xdp_buff wrapper for xdp_buff in driver
+
+From: Jesper Dangaard Brouer <brouer@redhat.com>
+
+[ Upstream commit 73b7123de0cfa4f6609677e927ab02cb05b593c2 ]
+
+Driver specific metadata data for XDP-hints kfuncs are propagated via tail
+extending the struct xdp_buff with a locally scoped driver struct.
+
+Zero-Copy AF_XDP/XSK does similar tricks via struct xdp_buff_xsk. This
+xdp_buff_xsk struct contains a CB area (24 bytes) that can be used for
+extending the locally scoped driver into. The XSK_CHECK_PRIV_TYPE define
+catch size violations build time.
+
+The changes needed for AF_XDP zero-copy in igc_clean_rx_irq_zc()
+is done in next patch, because the member rx_desc isn't available
+at this point.
+
+Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Song Yoong Siang <yoong.siang.song@intel.com>
+Link: https://lore.kernel.org/bpf/168182464779.616355.3761989884165609387.stgit@firesoul
+Stable-dep-of: 175c241288c0 ("igc: Fix TX Hang issue when QBV Gate is closed")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/igc/igc.h      |  5 +++++
+ drivers/net/ethernet/intel/igc/igc_main.c | 16 +++++++++-------
+ 2 files changed, 14 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
+index 9902f726f06a9..3bb48840a249e 100644
+--- a/drivers/net/ethernet/intel/igc/igc.h
++++ b/drivers/net/ethernet/intel/igc/igc.h
+@@ -502,6 +502,11 @@ struct igc_rx_buffer {
+       };
+ };
+ 
++/* context wrapper around xdp_buff to provide access to descriptor metadata */
++struct igc_xdp_buff {
++      struct xdp_buff xdp;
++};
++
+ struct igc_q_vector {
+       struct igc_adapter *adapter;    /* backlink */
+       void __iomem *itr_register;
+diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
+index b131c8f2b03df..c6169357f72fc 100644
+--- a/drivers/net/ethernet/intel/igc/igc_main.c
++++ b/drivers/net/ethernet/intel/igc/igc_main.c
+@@ -2246,6 +2246,8 @@ static bool igc_alloc_rx_buffers_zc(struct igc_ring *ring, u16 count)
+       if (!count)
+               return ok;
+ 
++      XSK_CHECK_PRIV_TYPE(struct igc_xdp_buff);
++
+       desc = IGC_RX_DESC(ring, i);
+       bi = &ring->rx_buffer_info[i];
+       i -= ring->count;
+@@ -2530,8 +2532,8 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
+               union igc_adv_rx_desc *rx_desc;
+               struct igc_rx_buffer *rx_buffer;
+               unsigned int size, truesize;
++              struct igc_xdp_buff ctx;
+               ktime_t timestamp = 0;
+-              struct xdp_buff xdp;
+               int pkt_offset = 0;
+               void *pktbuf;
+ 
+@@ -2565,13 +2567,13 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
+               }
+ 
+               if (!skb) {
+-                      xdp_init_buff(&xdp, truesize, &rx_ring->xdp_rxq);
+-                      xdp_prepare_buff(&xdp, pktbuf - igc_rx_offset(rx_ring),
++                      xdp_init_buff(&ctx.xdp, truesize, &rx_ring->xdp_rxq);
++                      xdp_prepare_buff(&ctx.xdp, pktbuf - igc_rx_offset(rx_ring),
+                                        igc_rx_offset(rx_ring) + pkt_offset,
+                                        size, true);
+-                      xdp_buff_clear_frags_flag(&xdp);
++                      xdp_buff_clear_frags_flag(&ctx.xdp);
+ 
+-                      skb = igc_xdp_run_prog(adapter, &xdp);
++                      skb = igc_xdp_run_prog(adapter, &ctx.xdp);
+               }
+ 
+               if (IS_ERR(skb)) {
+@@ -2593,9 +2595,9 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
+               } else if (skb)
+                       igc_add_rx_frag(rx_ring, rx_buffer, skb, size);
+               else if (ring_uses_build_skb(rx_ring))
+-                      skb = igc_build_skb(rx_ring, rx_buffer, &xdp);
++                      skb = igc_build_skb(rx_ring, rx_buffer, &ctx.xdp);
+               else
+-                      skb = igc_construct_skb(rx_ring, rx_buffer, &xdp,
++                      skb = igc_construct_skb(rx_ring, rx_buffer, &ctx.xdp,
+                                               timestamp);
+ 
+               /* exit if we failed to retrieve a buffer */
+-- 
+2.39.2
+
diff --git a/queue-6.4/igc-add-xdp-hints-kfuncs-for-rx-hash.patch b/queue-6.4/igc-add-xdp-hints-kfuncs-for-rx-hash.patch

new file mode 100644 (file)

index 0000000..1f85426
--- /dev/null
+++ b/queue-6.4/igc-add-xdp-hints-kfuncs-for-rx-hash.patch
@@ -0,0 +1,145 @@
+From 8f7e2ee37d15e7153091b9b0842b2e9cab18e3c7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 18 Apr 2023 15:30:52 +0200
+Subject: igc: Add XDP hints kfuncs for RX hash
+
+From: Jesper Dangaard Brouer <brouer@redhat.com>
+
+[ Upstream commit 8416814fffa9cfa74c18da149f522dd9e1850987 ]
+
+This implements XDP hints kfunc for RX-hash (xmo_rx_hash).
+The HW rss hash type is handled via mapping table.
+
+This igc driver (default config) does L3 hashing for UDP packets
+(excludes UDP src/dest ports in hash calc).  Meaning RSS hash type is
+L3 based.  Tested that the igc_rss_type_num for UDP is either
+IGC_RSS_TYPE_HASH_IPV4 or IGC_RSS_TYPE_HASH_IPV6.
+
+This patch also updates AF_XDP zero-copy function igc_clean_rx_irq_zc()
+to use the xdp_buff wrapper struct igc_xdp_buff.
+
+Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Song Yoong Siang <yoong.siang.song@intel.com>
+Link: https://lore.kernel.org/bpf/168182465285.616355.2701740913376314790.stgit@firesoul
+Stable-dep-of: 175c241288c0 ("igc: Fix TX Hang issue when QBV Gate is closed")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/igc/igc.h      |  1 +
+ drivers/net/ethernet/intel/igc/igc_main.c | 53 +++++++++++++++++++++++
+ 2 files changed, 54 insertions(+)
+
+diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
+index 3bb48840a249e..f09c6a65e3ab8 100644
+--- a/drivers/net/ethernet/intel/igc/igc.h
++++ b/drivers/net/ethernet/intel/igc/igc.h
+@@ -505,6 +505,7 @@ struct igc_rx_buffer {
+ /* context wrapper around xdp_buff to provide access to descriptor metadata */
+ struct igc_xdp_buff {
+       struct xdp_buff xdp;
++      union igc_adv_rx_desc *rx_desc;
+ };
+ 
+ struct igc_q_vector {
+diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
+index c6169357f72fc..c0e21701e7817 100644
+--- a/drivers/net/ethernet/intel/igc/igc_main.c
++++ b/drivers/net/ethernet/intel/igc/igc_main.c
+@@ -2572,6 +2572,7 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
+                                        igc_rx_offset(rx_ring) + pkt_offset,
+                                        size, true);
+                       xdp_buff_clear_frags_flag(&ctx.xdp);
++                      ctx.rx_desc = rx_desc;
+ 
+                       skb = igc_xdp_run_prog(adapter, &ctx.xdp);
+               }
+@@ -2698,6 +2699,15 @@ static void igc_dispatch_skb_zc(struct igc_q_vector *q_vector,
+       napi_gro_receive(&q_vector->napi, skb);
+ }
+ 
++static struct igc_xdp_buff *xsk_buff_to_igc_ctx(struct xdp_buff *xdp)
++{
++      /* xdp_buff pointer used by ZC code path is alloc as xdp_buff_xsk. The
++       * igc_xdp_buff shares its layout with xdp_buff_xsk and private
++       * igc_xdp_buff fields fall into xdp_buff_xsk->cb
++       */
++       return (struct igc_xdp_buff *)xdp;
++}
++
+ static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget)
+ {
+       struct igc_adapter *adapter = q_vector->adapter;
+@@ -2716,6 +2726,7 @@ static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget)
+       while (likely(total_packets < budget)) {
+               union igc_adv_rx_desc *desc;
+               struct igc_rx_buffer *bi;
++              struct igc_xdp_buff *ctx;
+               ktime_t timestamp = 0;
+               unsigned int size;
+               int res;
+@@ -2733,6 +2744,9 @@ static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget)
+ 
+               bi = &ring->rx_buffer_info[ntc];
+ 
++              ctx = xsk_buff_to_igc_ctx(bi->xdp);
++              ctx->rx_desc = desc;
++
+               if (igc_test_staterr(desc, IGC_RXDADV_STAT_TSIP)) {
+                       timestamp = igc_ptp_rx_pktstamp(q_vector->adapter,
+                                                       bi->xdp->data);
+@@ -6490,6 +6504,44 @@ u32 igc_rd32(struct igc_hw *hw, u32 reg)
+       return value;
+ }
+ 
++/* Mapping HW RSS Type to enum xdp_rss_hash_type */
++static enum xdp_rss_hash_type igc_xdp_rss_type[IGC_RSS_TYPE_MAX_TABLE] = {
++      [IGC_RSS_TYPE_NO_HASH]          = XDP_RSS_TYPE_L2,
++      [IGC_RSS_TYPE_HASH_TCP_IPV4]    = XDP_RSS_TYPE_L4_IPV4_TCP,
++      [IGC_RSS_TYPE_HASH_IPV4]        = XDP_RSS_TYPE_L3_IPV4,
++      [IGC_RSS_TYPE_HASH_TCP_IPV6]    = XDP_RSS_TYPE_L4_IPV6_TCP,
++      [IGC_RSS_TYPE_HASH_IPV6_EX]     = XDP_RSS_TYPE_L3_IPV6_EX,
++      [IGC_RSS_TYPE_HASH_IPV6]        = XDP_RSS_TYPE_L3_IPV6,
++      [IGC_RSS_TYPE_HASH_TCP_IPV6_EX] = XDP_RSS_TYPE_L4_IPV6_TCP_EX,
++      [IGC_RSS_TYPE_HASH_UDP_IPV4]    = XDP_RSS_TYPE_L4_IPV4_UDP,
++      [IGC_RSS_TYPE_HASH_UDP_IPV6]    = XDP_RSS_TYPE_L4_IPV6_UDP,
++      [IGC_RSS_TYPE_HASH_UDP_IPV6_EX] = XDP_RSS_TYPE_L4_IPV6_UDP_EX,
++      [10] = XDP_RSS_TYPE_NONE, /* RSS Type above 9 "Reserved" by HW  */
++      [11] = XDP_RSS_TYPE_NONE, /* keep array sized for SW bit-mask   */
++      [12] = XDP_RSS_TYPE_NONE, /* to handle future HW revisons       */
++      [13] = XDP_RSS_TYPE_NONE,
++      [14] = XDP_RSS_TYPE_NONE,
++      [15] = XDP_RSS_TYPE_NONE,
++};
++
++static int igc_xdp_rx_hash(const struct xdp_md *_ctx, u32 *hash,
++                         enum xdp_rss_hash_type *rss_type)
++{
++      const struct igc_xdp_buff *ctx = (void *)_ctx;
++
++      if (!(ctx->xdp.rxq->dev->features & NETIF_F_RXHASH))
++              return -ENODATA;
++
++      *hash = le32_to_cpu(ctx->rx_desc->wb.lower.hi_dword.rss);
++      *rss_type = igc_xdp_rss_type[igc_rss_type(ctx->rx_desc)];
++
++      return 0;
++}
++
++static const struct xdp_metadata_ops igc_xdp_metadata_ops = {
++      .xmo_rx_hash                    = igc_xdp_rx_hash,
++};
++
+ /**
+  * igc_probe - Device Initialization Routine
+  * @pdev: PCI device information struct
+@@ -6563,6 +6615,7 @@ static int igc_probe(struct pci_dev *pdev,
+       hw->hw_addr = adapter->io_addr;
+ 
+       netdev->netdev_ops = &igc_netdev_ops;
++      netdev->xdp_metadata_ops = &igc_xdp_metadata_ops;
+       igc_ethtool_set_ops(netdev);
+       netdev->watchdog_timeo = 5 * HZ;
+ 
+-- 
+2.39.2
+
diff --git a/queue-6.4/igc-do-not-enable-taprio-offload-for-invalid-argumen.patch b/queue-6.4/igc-do-not-enable-taprio-offload-for-invalid-argumen.patch

new file mode 100644 (file)

index 0000000..b7e1585
--- /dev/null
+++ b/queue-6.4/igc-do-not-enable-taprio-offload-for-invalid-argumen.patch
@@ -0,0 +1,109 @@
+From 2c4ff73ed926e50f603809bb4611a7f59b76753e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 14 Jun 2023 16:07:10 +0200
+Subject: igc: Do not enable taprio offload for invalid arguments
+
+From: Florian Kauer <florian.kauer@linutronix.de>
+
+[ Upstream commit 82ff5f29b7377d614f0c01fd74b5d0cb225f0adc ]
+
+Only set adapter->taprio_offload_enable after validating the arguments.
+Otherwise, it stays set even if the offload was not enabled.
+Since the subsequent code does not get executed in case of invalid
+arguments, it will not be read at first.
+However, by activating and then deactivating another offload
+(e.g. ETF/TX launchtime offload), taprio_offload_enable is read
+and erroneously keeps the offload feature of the NIC enabled.
+
+This can be reproduced as follows:
+
+    # TAPRIO offload (flags == 0x2) and negative base-time leading to expected -ERANGE
+    sudo tc qdisc replace dev enp1s0 parent root handle 100 stab overhead 24 taprio \
+           num_tc 1 \
+           map 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \
+           queues 1@0 \
+           base-time -1000 \
+           sched-entry S 01 300000 \
+           flags 0x2
+
+    # IGC_TQAVCTRL is 0x0 as expected (iomem=relaxed for reading register)
+    sudo pcimem /sys/bus/pci/devices/0000:01:00.0/resource0 0x3570 w*1
+
+    # Activate ETF offload
+    sudo tc qdisc replace dev enp1s0 parent root handle 6666 mqprio \
+           num_tc 3 \
+           map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 \
+           queues 1@0 1@1 2@2 \
+           hw 0
+    sudo tc qdisc add dev enp1s0 parent 6666:1 etf \
+           clockid CLOCK_TAI \
+           delta 500000 \
+           offload
+
+    # IGC_TQAVCTRL is 0x9 as expected
+    sudo pcimem /sys/bus/pci/devices/0000:01:00.0/resource0 0x3570 w*1
+
+    # Deactivate ETF offload again
+    sudo tc qdisc delete dev enp1s0 parent 6666:1
+
+    # IGC_TQAVCTRL should now be 0x0 again, but is observed as 0x9
+    sudo pcimem /sys/bus/pci/devices/0000:01:00.0/resource0 0x3570 w*1
+
+Fixes: e17090eb2494 ("igc: allow BaseTime 0 enrollment for Qbv")
+Signed-off-by: Florian Kauer <florian.kauer@linutronix.de>
+Reviewed-by: Kurt Kanzenbach <kurt@linutronix.de>
+Tested-by: Naama Meir <naamax.meir@linux.intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/igc/igc_main.c | 18 ++++++------------
+ 1 file changed, 6 insertions(+), 12 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
+index 6bed12224120f..f051ca733af1b 100644
+--- a/drivers/net/ethernet/intel/igc/igc_main.c
++++ b/drivers/net/ethernet/intel/igc/igc_main.c
+@@ -6090,6 +6090,7 @@ static int igc_tsn_clear_schedule(struct igc_adapter *adapter)
+ 
+       adapter->base_time = 0;
+       adapter->cycle_time = NSEC_PER_SEC;
++      adapter->taprio_offload_enable = false;
+       adapter->qbv_config_change_errors = 0;
+       adapter->qbv_transition = false;
+       adapter->qbv_count = 0;
+@@ -6117,20 +6118,12 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter,
+       size_t n;
+       int i;
+ 
+-      switch (qopt->cmd) {
+-      case TAPRIO_CMD_REPLACE:
+-              adapter->taprio_offload_enable = true;
+-              break;
+-      case TAPRIO_CMD_DESTROY:
+-              adapter->taprio_offload_enable = false;
+-              break;
+-      default:
+-              return -EOPNOTSUPP;
+-      }
+-
+-      if (!adapter->taprio_offload_enable)
++      if (qopt->cmd == TAPRIO_CMD_DESTROY)
+               return igc_tsn_clear_schedule(adapter);
+ 
++      if (qopt->cmd != TAPRIO_CMD_REPLACE)
++              return -EOPNOTSUPP;
++
+       if (qopt->base_time < 0)
+               return -ERANGE;
+ 
+@@ -6142,6 +6135,7 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter,
+ 
+       adapter->cycle_time = qopt->cycle_time;
+       adapter->base_time = qopt->base_time;
++      adapter->taprio_offload_enable = true;
+ 
+       igc_ptp_read(adapter, &now);
+ 
+-- 
+2.39.2
+
diff --git a/queue-6.4/igc-fix-inserting-of-empty-frame-for-launchtime.patch b/queue-6.4/igc-fix-inserting-of-empty-frame-for-launchtime.patch

new file mode 100644 (file)

index 0000000..20fa4e6
--- /dev/null
+++ b/queue-6.4/igc-fix-inserting-of-empty-frame-for-launchtime.patch
@@ -0,0 +1,128 @@
+From 6b68b0e96e9c61cab6fdc99d116c0f3373e355f5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 14 Jun 2023 16:07:14 +0200
+Subject: igc: Fix inserting of empty frame for launchtime
+
+From: Florian Kauer <florian.kauer@linutronix.de>
+
+[ Upstream commit 0bcc62858d6ba62cbade957d69745e6adeed5f3d ]
+
+The insertion of an empty frame was introduced with
+commit db0b124f02ba ("igc: Enhance Qbv scheduling by using first flag bit")
+in order to ensure that the current cycle has at least one packet if
+there is some packet to be scheduled for the next cycle.
+
+However, the current implementation does not properly check if
+a packet is already scheduled for the current cycle. Currently,
+an empty packet is always inserted if and only if
+txtime >= end_of_cycle && txtime > last_tx_cycle
+but since last_tx_cycle is always either the end of the current
+cycle (end_of_cycle) or the end of a previous cycle, the
+second part (txtime > last_tx_cycle) is always true unless
+txtime == last_tx_cycle.
+
+What actually needs to be checked here is if the last_tx_cycle
+was already written within the current cycle, so an empty frame
+should only be inserted if and only if
+txtime >= end_of_cycle && end_of_cycle > last_tx_cycle.
+
+This patch does not only avoid an unnecessary insertion, but it
+can actually be harmful to insert an empty packet if packets
+are already scheduled in the current cycle, because it can lead
+to a situation where the empty packet is actually processed
+as the first packet in the upcoming cycle shifting the packet
+with the first_flag even one cycle into the future, finally leading
+to a TX hang.
+
+The TX hang can be reproduced on a i225 with:
+
+    sudo tc qdisc replace dev enp1s0 parent root handle 100 taprio \
+           num_tc 1 \
+           map 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \
+           queues 1@0 \
+           base-time 0 \
+           sched-entry S 01 300000 \
+           flags 0x1 \
+           txtime-delay 500000 \
+           clockid CLOCK_TAI
+    sudo tc qdisc replace dev enp1s0 parent 100:1 etf \
+           clockid CLOCK_TAI \
+           delta 500000 \
+           offload \
+           skip_sock_check
+
+and traffic generator
+
+    sudo trafgen -i traffic.cfg -o enp1s0 --cpp -n0 -q -t1400ns
+
+with traffic.cfg
+
+    #define ETH_P_IP        0x0800
+
+    {
+      /* Ethernet Header */
+      0x30, 0x1f, 0x9a, 0xd0, 0xf0, 0x0e,  # MAC Dest - adapt as needed
+      0x24, 0x5e, 0xbe, 0x57, 0x2e, 0x36,  # MAC Src  - adapt as needed
+      const16(ETH_P_IP),
+
+      /* IPv4 Header */
+      0b01000101, 0,   # IPv4 version, IHL, TOS
+      const16(1028),   # IPv4 total length (UDP length + 20 bytes (IP header))
+      const16(2),      # IPv4 ident
+      0b01000000, 0,   # IPv4 flags, fragmentation off
+      64,              # IPv4 TTL
+      17,              # Protocol UDP
+      csumip(14, 33),  # IPv4 checksum
+
+      /* UDP Header */
+      10,  0, 48, 1,   # IP Src - adapt as needed
+      10,  0, 48, 10,  # IP Dest - adapt as needed
+      const16(5555),   # UDP Src Port
+      const16(6666),   # UDP Dest Port
+      const16(1008),   # UDP length (UDP header 8 bytes + payload length)
+      csumudp(14, 34), # UDP checksum
+
+      /* Payload */
+      fill('W', 1000),
+    }
+
+and the observed message with that is for example
+
+ igc 0000:01:00.0 enp1s0: Detected Tx Unit Hang
+   Tx Queue             <0>
+   TDH                  <32>
+   TDT                  <3c>
+   next_to_use          <3c>
+   next_to_clean        <32>
+ buffer_info[next_to_clean]
+   time_stamp           <ffff26a8>
+   next_to_watch        <00000000632a1828>
+   jiffies              <ffff27f8>
+   desc.status          <1048000>
+
+Fixes: db0b124f02ba ("igc: Enhance Qbv scheduling by using first flag bit")
+Signed-off-by: Florian Kauer <florian.kauer@linutronix.de>
+Reviewed-by: Kurt Kanzenbach <kurt@linutronix.de>
+Tested-by: Naama Meir <naamax.meir@linux.intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/igc/igc_main.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
+index 96a2f6e6f6b8a..44aa4342cbbb5 100644
+--- a/drivers/net/ethernet/intel/igc/igc_main.c
++++ b/drivers/net/ethernet/intel/igc/igc_main.c
+@@ -1029,7 +1029,7 @@ static __le32 igc_tx_launchtime(struct igc_ring *ring, ktime_t txtime,
+                       *first_flag = true;
+                       ring->last_ff_cycle = baset_est;
+ 
+-                      if (ktime_compare(txtime, ring->last_tx_cycle) > 0)
++                      if (ktime_compare(end_of_cycle, ring->last_tx_cycle) > 0)
+                               *insert_empty = true;
+               }
+       }
+-- 
+2.39.2
+
diff --git a/queue-6.4/igc-fix-launchtime-before-start-of-cycle.patch b/queue-6.4/igc-fix-launchtime-before-start-of-cycle.patch

new file mode 100644 (file)

index 0000000..e9b033a
--- /dev/null
+++ b/queue-6.4/igc-fix-launchtime-before-start-of-cycle.patch
@@ -0,0 +1,46 @@
+From eb725bbffeb2dd4b8cf1e08c265041dc518ca66b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 14 Jun 2023 16:07:13 +0200
+Subject: igc: Fix launchtime before start of cycle
+
+From: Florian Kauer <florian.kauer@linutronix.de>
+
+[ Upstream commit c1bca9ac0bcb355be11354c2e68bc7bf31f5ac5a ]
+
+It is possible (verified on a running system) that frames are processed
+by igc_tx_launchtime with a txtime before the start of the cycle
+(baset_est).
+
+However, the result of txtime - baset_est is written into a u32,
+leading to a wrap around to a positive number. The following
+launchtime > 0 check will only branch to executing launchtime = 0
+if launchtime is already 0.
+
+Fix it by using a s32 before checking launchtime > 0.
+
+Fixes: db0b124f02ba ("igc: Enhance Qbv scheduling by using first flag bit")
+Signed-off-by: Florian Kauer <florian.kauer@linutronix.de>
+Reviewed-by: Kurt Kanzenbach <kurt@linutronix.de>
+Tested-by: Naama Meir <naamax.meir@linux.intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/igc/igc_main.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
+index 97eb3c390de9a..96a2f6e6f6b8a 100644
+--- a/drivers/net/ethernet/intel/igc/igc_main.c
++++ b/drivers/net/ethernet/intel/igc/igc_main.c
+@@ -1016,7 +1016,7 @@ static __le32 igc_tx_launchtime(struct igc_ring *ring, ktime_t txtime,
+       ktime_t base_time = adapter->base_time;
+       ktime_t now = ktime_get_clocktai();
+       ktime_t baset_est, end_of_cycle;
+-      u32 launchtime;
++      s32 launchtime;
+       s64 n;
+ 
+       n = div64_s64(ktime_sub_ns(now, base_time), cycle_time);
+-- 
+2.39.2
+
diff --git a/queue-6.4/igc-fix-tx-hang-issue-when-qbv-gate-is-closed.patch b/queue-6.4/igc-fix-tx-hang-issue-when-qbv-gate-is-closed.patch

new file mode 100644 (file)

index 0000000..37fe891
--- /dev/null
+++ b/queue-6.4/igc-fix-tx-hang-issue-when-qbv-gate-is-closed.patch
@@ -0,0 +1,316 @@
+From 42ea27f8a5e10b6150f8d50d0ee4ea639c8892e7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 3 Jun 2023 20:59:34 +0800
+Subject: igc: Fix TX Hang issue when QBV Gate is closed
+
+From: Muhammad Husaini Zulkifli <muhammad.husaini.zulkifli@intel.com>
+
+[ Upstream commit 175c241288c09f81eb7b44d65c1ef6045efa4d1a ]
+
+If a user schedules a Gate Control List (GCL) to close one of
+the QBV gates while also transmitting a packet to that closed gate,
+TX Hang will be happen. HW would not drop any packet when the gate
+is closed and keep queuing up in HW TX FIFO until the gate is re-opened.
+This patch implements the solution to drop the packet for the closed
+gate.
+
+This patch will also reset the adapter to perform SW initialization
+for each 1st Gate Control List (GCL) to avoid hang.
+This is due to the HW design, where changing to TSN transmit mode
+requires SW initialization. Intel Discrete I225/6 transmit mode
+cannot be changed when in dynamic mode according to Software User
+Manual Section 7.5.2.1. Subsequent Gate Control List (GCL) operations
+will proceed without a reset, as they already are in TSN Mode.
+
+Step to reproduce:
+
+DUT:
+1) Configure GCL List with certain gate close.
+
+BASE=$(date +%s%N)
+tc qdisc replace dev $IFACE parent root handle 100 taprio \
+    num_tc 4 \
+    map 0 1 2 3 3 3 3 3 3 3 3 3 3 3 3 3 \
+    queues 1@0 1@1 1@2 1@3 \
+    base-time $BASE \
+    sched-entry S 0x8 500000 \
+    sched-entry S 0x4 500000 \
+    flags 0x2
+
+2) Transmit the packet to closed gate. You may use udp_tai
+application to transmit UDP packet to any of the closed gate.
+
+./udp_tai -i <interface> -P 100000 -p 90 -c 1 -t <0/1> -u 30004
+
+Fixes: ec50a9d437f0 ("igc: Add support for taprio offloading")
+Co-developed-by: Tan Tee Min <tee.min.tan@linux.intel.com>
+Signed-off-by: Tan Tee Min <tee.min.tan@linux.intel.com>
+Tested-by: Chwee Lin Choong <chwee.lin.choong@intel.com>
+Signed-off-by: Muhammad Husaini Zulkifli <muhammad.husaini.zulkifli@intel.com>
+Tested-by: Naama Meir <naamax.meir@linux.intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/igc/igc.h      |  6 +++
+ drivers/net/ethernet/intel/igc/igc_main.c | 58 +++++++++++++++++++++--
+ drivers/net/ethernet/intel/igc/igc_tsn.c  | 41 ++++++++++------
+ 3 files changed, 87 insertions(+), 18 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
+index f09c6a65e3ab8..c0a07af36cb23 100644
+--- a/drivers/net/ethernet/intel/igc/igc.h
++++ b/drivers/net/ethernet/intel/igc/igc.h
+@@ -14,6 +14,7 @@
+ #include <linux/timecounter.h>
+ #include <linux/net_tstamp.h>
+ #include <linux/bitfield.h>
++#include <linux/hrtimer.h>
+ 
+ #include "igc_hw.h"
+ 
+@@ -101,6 +102,8 @@ struct igc_ring {
+       u32 start_time;
+       u32 end_time;
+       u32 max_sdu;
++      bool oper_gate_closed;          /* Operating gate. True if the TX Queue is closed */
++      bool admin_gate_closed;         /* Future gate. True if the TX Queue will be closed */
+ 
+       /* CBS parameters */
+       bool cbs_enable;                /* indicates if CBS is enabled */
+@@ -160,6 +163,7 @@ struct igc_adapter {
+       struct timer_list watchdog_timer;
+       struct timer_list dma_err_timer;
+       struct timer_list phy_info_timer;
++      struct hrtimer hrtimer;
+ 
+       u32 wol;
+       u32 en_mng_pt;
+@@ -189,6 +193,8 @@ struct igc_adapter {
+       ktime_t cycle_time;
+       bool qbv_enable;
+       u32 qbv_config_change_errors;
++      bool qbv_transition;
++      unsigned int qbv_count;
+ 
+       /* OS defined structs */
+       struct pci_dev *pdev;
+diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
+index c0e21701e7817..826556e609800 100644
+--- a/drivers/net/ethernet/intel/igc/igc_main.c
++++ b/drivers/net/ethernet/intel/igc/igc_main.c
+@@ -1572,6 +1572,9 @@ static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb,
+       first->bytecount = skb->len;
+       first->gso_segs = 1;
+ 
++      if (adapter->qbv_transition || tx_ring->oper_gate_closed)
++              goto out_drop;
++
+       if (tx_ring->max_sdu > 0) {
+               u32 max_sdu = 0;
+ 
+@@ -3004,8 +3007,8 @@ static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget)
+                   time_after(jiffies, tx_buffer->time_stamp +
+                   (adapter->tx_timeout_factor * HZ)) &&
+                   !(rd32(IGC_STATUS) & IGC_STATUS_TXOFF) &&
+-                  (rd32(IGC_TDH(tx_ring->reg_idx)) !=
+-                   readl(tx_ring->tail))) {
++                  (rd32(IGC_TDH(tx_ring->reg_idx)) != readl(tx_ring->tail)) &&
++                  !tx_ring->oper_gate_closed) {
+                       /* detected Tx unit hang */
+                       netdev_err(tx_ring->netdev,
+                                  "Detected Tx Unit Hang\n"
+@@ -6095,6 +6098,8 @@ static int igc_tsn_clear_schedule(struct igc_adapter *adapter)
+       adapter->base_time = 0;
+       adapter->cycle_time = NSEC_PER_SEC;
+       adapter->qbv_config_change_errors = 0;
++      adapter->qbv_transition = false;
++      adapter->qbv_count = 0;
+ 
+       for (i = 0; i < adapter->num_tx_queues; i++) {
+               struct igc_ring *ring = adapter->tx_ring[i];
+@@ -6102,6 +6107,8 @@ static int igc_tsn_clear_schedule(struct igc_adapter *adapter)
+               ring->start_time = 0;
+               ring->end_time = NSEC_PER_SEC;
+               ring->max_sdu = 0;
++              ring->oper_gate_closed = false;
++              ring->admin_gate_closed = false;
+       }
+ 
+       return 0;
+@@ -6113,6 +6120,7 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter,
+       bool queue_configured[IGC_MAX_TX_QUEUES] = { };
+       struct igc_hw *hw = &adapter->hw;
+       u32 start_time = 0, end_time = 0;
++      struct timespec64 now;
+       size_t n;
+       int i;
+ 
+@@ -6133,6 +6141,8 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter,
+       adapter->cycle_time = qopt->cycle_time;
+       adapter->base_time = qopt->base_time;
+ 
++      igc_ptp_read(adapter, &now);
++
+       for (n = 0; n < qopt->num_entries; n++) {
+               struct tc_taprio_sched_entry *e = &qopt->entries[n];
+ 
+@@ -6167,7 +6177,10 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter,
+                               ring->start_time = start_time;
+                       ring->end_time = end_time;
+ 
+-                      queue_configured[i] = true;
++                      if (ring->start_time >= adapter->cycle_time)
++                              queue_configured[i] = false;
++                      else
++                              queue_configured[i] = true;
+               }
+ 
+               start_time += e->interval;
+@@ -6177,8 +6190,20 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter,
+        * If not, set the start and end time to be end time.
+        */
+       for (i = 0; i < adapter->num_tx_queues; i++) {
++              struct igc_ring *ring = adapter->tx_ring[i];
++
++              if (!is_base_time_past(qopt->base_time, &now)) {
++                      ring->admin_gate_closed = false;
++              } else {
++                      ring->oper_gate_closed = false;
++                      ring->admin_gate_closed = false;
++              }
++
+               if (!queue_configured[i]) {
+-                      struct igc_ring *ring = adapter->tx_ring[i];
++                      if (!is_base_time_past(qopt->base_time, &now))
++                              ring->admin_gate_closed = true;
++                      else
++                              ring->oper_gate_closed = true;
+ 
+                       ring->start_time = end_time;
+                       ring->end_time = end_time;
+@@ -6542,6 +6567,27 @@ static const struct xdp_metadata_ops igc_xdp_metadata_ops = {
+       .xmo_rx_hash                    = igc_xdp_rx_hash,
+ };
+ 
++static enum hrtimer_restart igc_qbv_scheduling_timer(struct hrtimer *timer)
++{
++      struct igc_adapter *adapter = container_of(timer, struct igc_adapter,
++                                                 hrtimer);
++      unsigned int i;
++
++      adapter->qbv_transition = true;
++      for (i = 0; i < adapter->num_tx_queues; i++) {
++              struct igc_ring *tx_ring = adapter->tx_ring[i];
++
++              if (tx_ring->admin_gate_closed) {
++                      tx_ring->admin_gate_closed = false;
++                      tx_ring->oper_gate_closed = true;
++              } else {
++                      tx_ring->oper_gate_closed = false;
++              }
++      }
++      adapter->qbv_transition = false;
++      return HRTIMER_NORESTART;
++}
++
+ /**
+  * igc_probe - Device Initialization Routine
+  * @pdev: PCI device information struct
+@@ -6720,6 +6766,9 @@ static int igc_probe(struct pci_dev *pdev,
+       INIT_WORK(&adapter->reset_task, igc_reset_task);
+       INIT_WORK(&adapter->watchdog_task, igc_watchdog_task);
+ 
++      hrtimer_init(&adapter->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
++      adapter->hrtimer.function = &igc_qbv_scheduling_timer;
++
+       /* Initialize link properties that are user-changeable */
+       adapter->fc_autoneg = true;
+       hw->mac.autoneg = true;
+@@ -6823,6 +6872,7 @@ static void igc_remove(struct pci_dev *pdev)
+ 
+       cancel_work_sync(&adapter->reset_task);
+       cancel_work_sync(&adapter->watchdog_task);
++      hrtimer_cancel(&adapter->hrtimer);
+ 
+       /* Release control of h/w to f/w.  If f/w is AMT enabled, this
+        * would have already happened in close and is redundant.
+diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c b/drivers/net/ethernet/intel/igc/igc_tsn.c
+index 6b299b83e7ef2..3cdb0c9887283 100644
+--- a/drivers/net/ethernet/intel/igc/igc_tsn.c
++++ b/drivers/net/ethernet/intel/igc/igc_tsn.c
+@@ -114,7 +114,6 @@ static int igc_tsn_disable_offload(struct igc_adapter *adapter)
+ static int igc_tsn_enable_offload(struct igc_adapter *adapter)
+ {
+       struct igc_hw *hw = &adapter->hw;
+-      bool tsn_mode_reconfig = false;
+       u32 tqavctrl, baset_l, baset_h;
+       u32 sec, nsec, cycle;
+       ktime_t base_time, systim;
+@@ -228,11 +227,10 @@ static int igc_tsn_enable_offload(struct igc_adapter *adapter)
+ 
+       tqavctrl = rd32(IGC_TQAVCTRL) & ~IGC_TQAVCTRL_FUTSCDDIS;
+ 
+-      if (tqavctrl & IGC_TQAVCTRL_TRANSMIT_MODE_TSN)
+-              tsn_mode_reconfig = true;
+-
+       tqavctrl |= IGC_TQAVCTRL_TRANSMIT_MODE_TSN | IGC_TQAVCTRL_ENHANCED_QAV;
+ 
++      adapter->qbv_count++;
++
+       cycle = adapter->cycle_time;
+       base_time = adapter->base_time;
+ 
+@@ -250,17 +248,28 @@ static int igc_tsn_enable_offload(struct igc_adapter *adapter)
+                */
+               if ((rd32(IGC_BASET_H) || rd32(IGC_BASET_L)) &&
+                   (adapter->tc_setup_type == TC_SETUP_QDISC_TAPRIO) &&
+-                  tsn_mode_reconfig)
++                  (adapter->qbv_count > 1))
+                       adapter->qbv_config_change_errors++;
+       } else {
+-              /* According to datasheet section 7.5.2.9.3.3, FutScdDis bit
+-               * has to be configured before the cycle time and base time.
+-               * Tx won't hang if there is a GCL is already running,
+-               * so in this case we don't need to set FutScdDis.
+-               */
+-              if (igc_is_device_id_i226(hw) &&
+-                  !(rd32(IGC_BASET_H) || rd32(IGC_BASET_L)))
+-                      tqavctrl |= IGC_TQAVCTRL_FUTSCDDIS;
++              if (igc_is_device_id_i226(hw)) {
++                      ktime_t adjust_time, expires_time;
++
++                     /* According to datasheet section 7.5.2.9.3.3, FutScdDis bit
++                      * has to be configured before the cycle time and base time.
++                      * Tx won't hang if a GCL is already running,
++                      * so in this case we don't need to set FutScdDis.
++                      */
++                      if (!(rd32(IGC_BASET_H) || rd32(IGC_BASET_L)))
++                              tqavctrl |= IGC_TQAVCTRL_FUTSCDDIS;
++
++                      nsec = rd32(IGC_SYSTIML);
++                      sec = rd32(IGC_SYSTIMH);
++                      systim = ktime_set(sec, nsec);
++
++                      adjust_time = adapter->base_time;
++                      expires_time = ktime_sub_ns(adjust_time, systim);
++                      hrtimer_start(&adapter->hrtimer, expires_time, HRTIMER_MODE_REL);
++              }
+       }
+ 
+       wr32(IGC_TQAVCTRL, tqavctrl);
+@@ -306,7 +315,11 @@ int igc_tsn_offload_apply(struct igc_adapter *adapter)
+ {
+       struct igc_hw *hw = &adapter->hw;
+ 
+-      if (netif_running(adapter->netdev) && igc_is_device_id_i225(hw)) {
++      /* Per I225/6 HW Design Section 7.5.2.1, transmit mode
++       * cannot be changed dynamically. Require reset the adapter.
++       */
++      if (netif_running(adapter->netdev) &&
++          (igc_is_device_id_i225(hw) || !adapter->qbv_count)) {
+               schedule_work(&adapter->reset_task);
+               return 0;
+       }
+-- 
+2.39.2
+
diff --git a/queue-6.4/igc-handle-already-enabled-taprio-offload-for-baseti.patch b/queue-6.4/igc-handle-already-enabled-taprio-offload-for-baseti.patch

new file mode 100644 (file)

index 0000000..038e2f7
--- /dev/null
+++ b/queue-6.4/igc-handle-already-enabled-taprio-offload-for-baseti.patch
@@ -0,0 +1,62 @@
+From 5b7662266cd1b15b3162ed4f1c1bf7509118f885 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 14 Jun 2023 16:07:11 +0200
+Subject: igc: Handle already enabled taprio offload for basetime 0
+
+From: Florian Kauer <florian.kauer@linutronix.de>
+
+[ Upstream commit e5d88c53d03f8df864776431175d08c053645f50 ]
+
+Since commit e17090eb2494 ("igc: allow BaseTime 0 enrollment for Qbv")
+it is possible to enable taprio offload with a basetime of 0.
+However, the check if taprio offload is already enabled (and thus -EALREADY
+should be returned for igc_save_qbv_schedule) still relied on
+adapter->base_time > 0.
+
+This can be reproduced as follows:
+
+    # TAPRIO offload (flags == 0x2) and base-time = 0
+    sudo tc qdisc replace dev enp1s0 parent root handle 100 stab overhead 24 taprio \
+           num_tc 1 \
+           map 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \
+           queues 1@0 \
+           base-time 0 \
+           sched-entry S 01 300000 \
+           flags 0x2
+
+    # The second call should fail with "Error: Device failed to setup taprio offload."
+    # But that only happens if base-time was != 0
+    sudo tc qdisc replace dev enp1s0 parent root handle 100 stab overhead 24 taprio \
+           num_tc 1 \
+           map 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \
+           queues 1@0 \
+           base-time 0 \
+           sched-entry S 01 300000 \
+           flags 0x2
+
+Fixes: e17090eb2494 ("igc: allow BaseTime 0 enrollment for Qbv")
+Signed-off-by: Florian Kauer <florian.kauer@linutronix.de>
+Reviewed-by: Kurt Kanzenbach <kurt@linutronix.de>
+Tested-by: Naama Meir <naamax.meir@linux.intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/igc/igc_main.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
+index f051ca733af1b..97eb3c390de9a 100644
+--- a/drivers/net/ethernet/intel/igc/igc_main.c
++++ b/drivers/net/ethernet/intel/igc/igc_main.c
+@@ -6127,7 +6127,7 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter,
+       if (qopt->base_time < 0)
+               return -ERANGE;
+ 
+-      if (igc_is_device_id_i225(hw) && adapter->base_time)
++      if (igc_is_device_id_i225(hw) && adapter->taprio_offload_enable)
+               return -EALREADY;
+ 
+       if (!validate_schedule(adapter, qopt))
+-- 
+2.39.2
+
diff --git a/queue-6.4/igc-handle-pps-start-time-programming-for-past-time-.patch b/queue-6.4/igc-handle-pps-start-time-programming-for-past-time-.patch

new file mode 100644 (file)

index 0000000..2bcbc72
--- /dev/null
+++ b/queue-6.4/igc-handle-pps-start-time-programming-for-past-time-.patch
@@ -0,0 +1,109 @@
+From e831b1688eeeb1e282688d82ce7d06c77c59889d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 15 Jun 2023 12:00:43 +0530
+Subject: igc: Handle PPS start time programming for past time values
+
+From: Aravindhan Gunasekaran <aravindhan.gunasekaran@intel.com>
+
+[ Upstream commit 84a192e46106355de1a314d709e657231d4b1026 ]
+
+I225/6 hardware can be programmed to start PPS output once
+the time in Target Time registers is reached. The time
+programmed in these registers should always be into future.
+Only then PPS output is triggered when SYSTIM register
+reaches the programmed value. There are two modes in i225/6
+hardware to program PPS, pulse and clock mode.
+
+There were issues reported where PPS is not generated when
+start time is in past.
+
+Example 1, "echo 0 0 0 2 0 > /sys/class/ptp/ptp0/period"
+
+In the current implementation, a value of '0' is programmed
+into Target time registers and PPS output is in pulse mode.
+Eventually an interrupt which is triggered upon SYSTIM
+register reaching Target time is not fired. Thus no PPS
+output is generated.
+
+Example 2, "echo 0 0 0 1 0 > /sys/class/ptp/ptp0/period"
+
+Above case, a value of '0' is programmed into Target time
+registers and PPS output is in clock mode. Here, HW tries to
+catch-up the current time by incrementing Target Time
+register. This catch-up time seem to vary according to
+programmed PPS period time as per the HW design. In my
+experiments, the delay ranged between few tens of seconds to
+few minutes. The PPS output is only generated after the
+Target time register reaches current time.
+
+In my experiments, I also observed PPS stopped working with
+below test and could not recover until module is removed and
+loaded again.
+
+1) echo 0 <future time> 0 1 0 > /sys/class/ptp/ptp1/period
+2) echo 0 0 0 1 0 > /sys/class/ptp/ptp1/period
+3) echo 0 0 0 1 0 > /sys/class/ptp/ptp1/period
+
+After this PPS did not work even if i re-program with proper
+values. I could only get this back working by reloading the
+driver.
+
+This patch takes care of calculating and programming
+appropriate future time value into Target Time registers.
+
+Fixes: 5e91c72e560c ("igc: Fix PPS delta between two synchronized end-points")
+Signed-off-by: Aravindhan Gunasekaran <aravindhan.gunasekaran@intel.com>
+Reviewed-by: Muhammad Husaini Zulkifli <muhammad.husaini.zulkifli@intel.com>
+Tested-by: Naama Meir <naamax.meir@linux.intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/igc/igc_ptp.c | 25 +++++++++++++++++++++---
+ 1 file changed, 22 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c
+index 32ef112f8291a..f0b979a706552 100644
+--- a/drivers/net/ethernet/intel/igc/igc_ptp.c
++++ b/drivers/net/ethernet/intel/igc/igc_ptp.c
+@@ -356,16 +356,35 @@ static int igc_ptp_feature_enable_i225(struct ptp_clock_info *ptp,
+                       tsim &= ~IGC_TSICR_TT0;
+               }
+               if (on) {
++                      struct timespec64 safe_start;
+                       int i = rq->perout.index;
+ 
+                       igc_pin_perout(igc, i, pin, use_freq);
+-                      igc->perout[i].start.tv_sec = rq->perout.start.sec;
++                      igc_ptp_read(igc, &safe_start);
++
++                      /* PPS output start time is triggered by Target time(TT)
++                       * register. Programming any past time value into TT
++                       * register will cause PPS to never start. Need to make
++                       * sure we program the TT register a time ahead in
++                       * future. There isn't a stringent need to fire PPS out
++                       * right away. Adding +2 seconds should take care of
++                       * corner cases. Let's say if the SYSTIML is close to
++                       * wrap up and the timer keeps ticking as we program the
++                       * register, adding +2seconds is safe bet.
++                       */
++                      safe_start.tv_sec += 2;
++
++                      if (rq->perout.start.sec < safe_start.tv_sec)
++                              igc->perout[i].start.tv_sec = safe_start.tv_sec;
++                      else
++                              igc->perout[i].start.tv_sec = rq->perout.start.sec;
+                       igc->perout[i].start.tv_nsec = rq->perout.start.nsec;
+                       igc->perout[i].period.tv_sec = ts.tv_sec;
+                       igc->perout[i].period.tv_nsec = ts.tv_nsec;
+-                      wr32(trgttimh, rq->perout.start.sec);
++                      wr32(trgttimh, (u32)igc->perout[i].start.tv_sec);
+                       /* For now, always select timer 0 as source. */
+-                      wr32(trgttiml, rq->perout.start.nsec | IGC_TT_IO_TIMER_SEL_SYSTIM0);
++                      wr32(trgttiml, (u32)(igc->perout[i].start.tv_nsec |
++                                           IGC_TT_IO_TIMER_SEL_SYSTIM0));
+                       if (use_freq)
+                               wr32(freqout, ns);
+                       tsauxc |= tsauxc_mask;
+-- 
+2.39.2
+
diff --git a/queue-6.4/igc-include-the-length-type-field-and-vlan-tag-in-qu.patch b/queue-6.4/igc-include-the-length-type-field-and-vlan-tag-in-qu.patch

new file mode 100644 (file)

index 0000000..b1af1cd
--- /dev/null
+++ b/queue-6.4/igc-include-the-length-type-field-and-vlan-tag-in-qu.patch
@@ -0,0 +1,68 @@
+From 94e338939954b57bd1507d45a8dfdadaee968f5b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 9 Jun 2023 11:28:42 +0800
+Subject: igc: Include the length/type field and VLAN tag in queueMaxSDU
+
+From: Tan Tee Min <tee.min.tan@linux.intel.com>
+
+[ Upstream commit 25102893e409bc02761ab82dbcfa092006404790 ]
+
+IEEE 802.1Q does not have clear definitions of what constitutes an
+SDU (Service Data Unit), but IEEE Std 802.3 clause 3.1.2 does define
+the MAC service primitives and clause 3.2.7 does define the MAC Client
+Data for Q-tagged frames.
+
+It shows that the mac_service_data_unit (MSDU) does NOT contain the
+preamble, destination and source address, or FCS. The MSDU does contain
+the length/type field, MAC client data, VLAN tag and any padding
+data (prior to the FCS).
+
+Thus, the maximum 802.3 frame size that is allowed to be transmitted
+should be QueueMaxSDU (MSDU) + 16 (6 byte SA + 6 byte DA + 4 byte FCS).
+
+Fixes: 92a0dcb8427d ("igc: offload queue max SDU from tc-taprio")
+Signed-off-by: Tan Tee Min <tee.min.tan@linux.intel.com>
+Reviewed-by: Muhammad Husaini Zulkifli <muhammad.husaini.zulkifli@intel.com>
+Tested-by: Naama Meir <naamax.meir@linux.intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/igc/igc_main.c | 15 ++++-----------
+ 1 file changed, 4 insertions(+), 11 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
+index 826556e609800..e7bd2c60ee383 100644
+--- a/drivers/net/ethernet/intel/igc/igc_main.c
++++ b/drivers/net/ethernet/intel/igc/igc_main.c
+@@ -1575,16 +1575,9 @@ static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb,
+       if (adapter->qbv_transition || tx_ring->oper_gate_closed)
+               goto out_drop;
+ 
+-      if (tx_ring->max_sdu > 0) {
+-              u32 max_sdu = 0;
+-
+-              max_sdu = tx_ring->max_sdu +
+-                        (skb_vlan_tagged(first->skb) ? VLAN_HLEN : 0);
+-
+-              if (first->bytecount > max_sdu) {
+-                      adapter->stats.txdrop++;
+-                      goto out_drop;
+-              }
++      if (tx_ring->max_sdu > 0 && first->bytecount > tx_ring->max_sdu) {
++              adapter->stats.txdrop++;
++              goto out_drop;
+       }
+ 
+       if (unlikely(test_bit(IGC_RING_FLAG_TX_HWTSTAMP, &tx_ring->flags) &&
+@@ -6215,7 +6208,7 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter,
+               struct net_device *dev = adapter->netdev;
+ 
+               if (qopt->max_sdu[i])
+-                      ring->max_sdu = qopt->max_sdu[i] + dev->hard_header_len;
++                      ring->max_sdu = qopt->max_sdu[i] + dev->hard_header_len - ETH_TLEN;
+               else
+                       ring->max_sdu = 0;
+       }
+-- 
+2.39.2
+
diff --git a/queue-6.4/igc-no-strict-mode-in-pure-launchtime-cbs-offload.patch b/queue-6.4/igc-no-strict-mode-in-pure-launchtime-cbs-offload.patch

new file mode 100644 (file)

index 0000000..82372c6
--- /dev/null
+++ b/queue-6.4/igc-no-strict-mode-in-pure-launchtime-cbs-offload.patch
@@ -0,0 +1,153 @@
+From 61956d16d51fe7d6d2b7ae9f849d0920cdfc2e00 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 14 Jun 2023 16:07:12 +0200
+Subject: igc: No strict mode in pure launchtime/CBS offload
+
+From: Florian Kauer <florian.kauer@linutronix.de>
+
+[ Upstream commit 8b86f10ab64eca0287ea8f7c94e9ad8b2e101c01 ]
+
+The flags IGC_TXQCTL_STRICT_CYCLE and IGC_TXQCTL_STRICT_END
+prevent the packet transmission over slot and cycle boundaries.
+This is important for taprio offload where the slots and
+cycles correspond to the slots and cycles configured for the
+network.
+
+However, the Qbv offload feature of the i225 is also used for
+enabling TX launchtime / ETF offload. In that case, however,
+the cycle has no meaning for the network and is only used
+internally to adapt the base time register after a second has
+passed.
+
+Enabling strict mode in this case would unnecessarily prevent
+the transmission of certain packets (i.e. at the boundary of a
+second) and thus interferes with the ETF qdisc that promises
+transmission at a certain point in time.
+
+Similar to ETF, this also applies to CBS offload that also should
+not be influenced by strict mode unless taprio offload would be
+enabled at the same time.
+
+This fully reverts
+commit d8f45be01dd9 ("igc: Use strict cycles for Qbv scheduling")
+but its commit message only describes what was already implemented
+before that commit. The difference to a plain revert of that commit
+is that it now copes with the base_time = 0 case that was fixed with
+commit e17090eb2494 ("igc: allow BaseTime 0 enrollment for Qbv")
+
+In particular, enabling strict mode leads to TX hang situations
+under high traffic if taprio is applied WITHOUT taprio offload
+but WITH ETF offload, e.g. as in
+
+    sudo tc qdisc replace dev enp1s0 parent root handle 100 taprio \
+           num_tc 1 \
+           map 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \
+           queues 1@0 \
+           base-time 0 \
+           sched-entry S 01 300000 \
+           flags 0x1 \
+           txtime-delay 500000 \
+           clockid CLOCK_TAI
+    sudo tc qdisc replace dev enp1s0 parent 100:1 etf \
+           clockid CLOCK_TAI \
+           delta 500000 \
+           offload \
+           skip_sock_check
+
+and traffic generator
+
+    sudo trafgen -i traffic.cfg -o enp1s0 --cpp -n0 -q -t1400ns
+
+with traffic.cfg
+
+    #define ETH_P_IP        0x0800
+
+    {
+      /* Ethernet Header */
+      0x30, 0x1f, 0x9a, 0xd0, 0xf0, 0x0e,  # MAC Dest - adapt as needed
+      0x24, 0x5e, 0xbe, 0x57, 0x2e, 0x36,  # MAC Src  - adapt as needed
+      const16(ETH_P_IP),
+
+      /* IPv4 Header */
+      0b01000101, 0,   # IPv4 version, IHL, TOS
+      const16(1028),   # IPv4 total length (UDP length + 20 bytes (IP header))
+      const16(2),      # IPv4 ident
+      0b01000000, 0,   # IPv4 flags, fragmentation off
+      64,              # IPv4 TTL
+      17,              # Protocol UDP
+      csumip(14, 33),  # IPv4 checksum
+
+      /* UDP Header */
+      10,  0, 48, 1,   # IP Src - adapt as needed
+      10,  0, 48, 10,  # IP Dest - adapt as needed
+      const16(5555),   # UDP Src Port
+      const16(6666),   # UDP Dest Port
+      const16(1008),   # UDP length (UDP header 8 bytes + payload length)
+      csumudp(14, 34), # UDP checksum
+
+      /* Payload */
+      fill('W', 1000),
+    }
+
+and the observed message with that is for example
+
+ igc 0000:01:00.0 enp1s0: Detected Tx Unit Hang
+   Tx Queue             <0>
+   TDH                  <d0>
+   TDT                  <f0>
+   next_to_use          <f0>
+   next_to_clean        <d0>
+ buffer_info[next_to_clean]
+   time_stamp           <ffff661f>
+   next_to_watch        <00000000245a4efb>
+   jiffies              <ffff6e48>
+   desc.status          <1048000>
+
+Fixes: d8f45be01dd9 ("igc: Use strict cycles for Qbv scheduling")
+Signed-off-by: Florian Kauer <florian.kauer@linutronix.de>
+Reviewed-by: Kurt Kanzenbach <kurt@linutronix.de>
+Tested-by: Naama Meir <naamax.meir@linux.intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/igc/igc_tsn.c | 24 ++++++++++++++++++++++--
+ 1 file changed, 22 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c b/drivers/net/ethernet/intel/igc/igc_tsn.c
+index b76ebfc10b1d5..a9c08321aca90 100644
+--- a/drivers/net/ethernet/intel/igc/igc_tsn.c
++++ b/drivers/net/ethernet/intel/igc/igc_tsn.c
+@@ -132,8 +132,28 @@ static int igc_tsn_enable_offload(struct igc_adapter *adapter)
+               wr32(IGC_STQT(i), ring->start_time);
+               wr32(IGC_ENDQT(i), ring->end_time);
+ 
+-              txqctl |= IGC_TXQCTL_STRICT_CYCLE |
+-                      IGC_TXQCTL_STRICT_END;
++              if (adapter->taprio_offload_enable) {
++                      /* If taprio_offload_enable is set we are in "taprio"
++                       * mode and we need to be strict about the
++                       * cycles: only transmit a packet if it can be
++                       * completed during that cycle.
++                       *
++                       * If taprio_offload_enable is NOT true when
++                       * enabling TSN offload, the cycle should have
++                       * no external effects, but is only used internally
++                       * to adapt the base time register after a second
++                       * has passed.
++                       *
++                       * Enabling strict mode in this case would
++                       * unnecessarily prevent the transmission of
++                       * certain packets (i.e. at the boundary of a
++                       * second) and thus interfere with the launchtime
++                       * feature that promises transmission at a
++                       * certain point in time.
++                       */
++                      txqctl |= IGC_TXQCTL_STRICT_CYCLE |
++                              IGC_TXQCTL_STRICT_END;
++              }
+ 
+               if (ring->launchtime_enable)
+                       txqctl |= IGC_TXQCTL_QUEUE_MODE_LAUNCHT;
+-- 
+2.39.2
+
diff --git a/queue-6.4/igc-remove-delay-during-tx-ring-configuration.patch b/queue-6.4/igc-remove-delay-during-tx-ring-configuration.patch

new file mode 100644 (file)

index 0000000..78b4617
--- /dev/null
+++ b/queue-6.4/igc-remove-delay-during-tx-ring-configuration.patch
@@ -0,0 +1,46 @@
+From 73bfe462e0a94fef7a5dc2bb51a4d33dd891aa8e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 17 May 2023 08:18:12 +0800
+Subject: igc: Remove delay during TX ring configuration
+
+From: Muhammad Husaini Zulkifli <muhammad.husaini.zulkifli@intel.com>
+
+[ Upstream commit cca28ceac7c7857bc2d313777017585aef00bcc4 ]
+
+Remove unnecessary delay during the TX ring configuration.
+This will cause delay, especially during link down and
+link up activity.
+
+Furthermore, old SKUs like as I225 will call the reset_adapter
+to reset the controller during TSN mode Gate Control List (GCL)
+setting. This will add more time to the configuration of the
+real-time use case.
+
+It doesn't mentioned about this delay in the Software User Manual.
+It might have been ported from legacy code I210 in the past.
+
+Fixes: 13b5b7fd6a4a ("igc: Add support for Tx/Rx rings")
+Signed-off-by: Muhammad Husaini Zulkifli <muhammad.husaini.zulkifli@intel.com>
+Acked-by: Sasha Neftin <sasha.neftin@intel.com>
+Tested-by: Naama Meir <naamax.meir@linux.intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/igc/igc_main.c | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
+index a8815ccf7887d..b131c8f2b03df 100644
+--- a/drivers/net/ethernet/intel/igc/igc_main.c
++++ b/drivers/net/ethernet/intel/igc/igc_main.c
+@@ -711,7 +711,6 @@ static void igc_configure_tx_ring(struct igc_adapter *adapter,
+       /* disable the queue */
+       wr32(IGC_TXDCTL(reg_idx), 0);
+       wrfl();
+-      mdelay(10);
+ 
+       wr32(IGC_TDLEN(reg_idx),
+            ring->count * sizeof(union igc_adv_tx_desc));
+-- 
+2.39.2
+
diff --git a/queue-6.4/igc-rename-qbv_enable-to-taprio_offload_enable.patch b/queue-6.4/igc-rename-qbv_enable-to-taprio_offload_enable.patch

new file mode 100644 (file)

index 0000000..a8a9570
--- /dev/null
+++ b/queue-6.4/igc-rename-qbv_enable-to-taprio_offload_enable.patch
@@ -0,0 +1,87 @@
+From d1a187de552f05b69cea2eec119c49c6b8d52d27 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 14 Jun 2023 16:07:09 +0200
+Subject: igc: Rename qbv_enable to taprio_offload_enable
+
+From: Florian Kauer <florian.kauer@linutronix.de>
+
+[ Upstream commit 8046063df887bee35c002224267ba46f41be7cf6 ]
+
+In the current implementation the flags adapter->qbv_enable
+and IGC_FLAG_TSN_QBV_ENABLED have a similar name, but do not
+have the same meaning. The first one is used only to indicate
+taprio offload (i.e. when igc_save_qbv_schedule was called),
+while the second one corresponds to the Qbv mode of the hardware.
+However, the second one is also used to support the TX launchtime
+feature, i.e. ETF qdisc offload. This leads to situations where
+adapter->qbv_enable is false, but the flag IGC_FLAG_TSN_QBV_ENABLED
+is set. This is prone to confusion.
+
+The rename should reduce this confusion. Since it is a pure
+rename, it has no impact on functionality.
+
+Fixes: e17090eb2494 ("igc: allow BaseTime 0 enrollment for Qbv")
+Signed-off-by: Florian Kauer <florian.kauer@linutronix.de>
+Reviewed-by: Kurt Kanzenbach <kurt@linutronix.de>
+Tested-by: Naama Meir <naamax.meir@linux.intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/igc/igc.h      | 2 +-
+ drivers/net/ethernet/intel/igc/igc_main.c | 6 +++---
+ drivers/net/ethernet/intel/igc/igc_tsn.c  | 2 +-
+ 3 files changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
+index c0a07af36cb23..345d3a4e8ed44 100644
+--- a/drivers/net/ethernet/intel/igc/igc.h
++++ b/drivers/net/ethernet/intel/igc/igc.h
+@@ -191,7 +191,7 @@ struct igc_adapter {
+       int tc_setup_type;
+       ktime_t base_time;
+       ktime_t cycle_time;
+-      bool qbv_enable;
++      bool taprio_offload_enable;
+       u32 qbv_config_change_errors;
+       bool qbv_transition;
+       unsigned int qbv_count;
+diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
+index ae986e44a4718..6bed12224120f 100644
+--- a/drivers/net/ethernet/intel/igc/igc_main.c
++++ b/drivers/net/ethernet/intel/igc/igc_main.c
+@@ -6119,16 +6119,16 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter,
+ 
+       switch (qopt->cmd) {
+       case TAPRIO_CMD_REPLACE:
+-              adapter->qbv_enable = true;
++              adapter->taprio_offload_enable = true;
+               break;
+       case TAPRIO_CMD_DESTROY:
+-              adapter->qbv_enable = false;
++              adapter->taprio_offload_enable = false;
+               break;
+       default:
+               return -EOPNOTSUPP;
+       }
+ 
+-      if (!adapter->qbv_enable)
++      if (!adapter->taprio_offload_enable)
+               return igc_tsn_clear_schedule(adapter);
+ 
+       if (qopt->base_time < 0)
+diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c b/drivers/net/ethernet/intel/igc/igc_tsn.c
+index 3cdb0c9887283..b76ebfc10b1d5 100644
+--- a/drivers/net/ethernet/intel/igc/igc_tsn.c
++++ b/drivers/net/ethernet/intel/igc/igc_tsn.c
+@@ -37,7 +37,7 @@ static unsigned int igc_tsn_new_flags(struct igc_adapter *adapter)
+ {
+       unsigned int new_flags = adapter->flags & ~IGC_FLAG_TSN_ANY_ENABLED;
+ 
+-      if (adapter->qbv_enable)
++      if (adapter->taprio_offload_enable)
+               new_flags |= IGC_FLAG_TSN_QBV_ENABLED;
+ 
+       if (is_any_launchtime(adapter))
+-- 
+2.39.2
+
diff --git a/queue-6.4/igc-set-tp-bit-in-supported-and-advertising-fields-o.patch b/queue-6.4/igc-set-tp-bit-in-supported-and-advertising-fields-o.patch

new file mode 100644 (file)

index 0000000..d05c14e
--- /dev/null
+++ b/queue-6.4/igc-set-tp-bit-in-supported-and-advertising-fields-o.patch
@@ -0,0 +1,39 @@
+From 1d3082ab17cf6c7fcee80d799b5321636a266645 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 5 Jun 2023 11:09:01 -0700
+Subject: igc: set TP bit in 'supported' and 'advertising' fields of
+ ethtool_link_ksettings
+
+From: Prasad Koya <prasad@arista.com>
+
+[ Upstream commit 9ac3fc2f42e5ffa1e927dcbffb71b15fa81459e2 ]
+
+set TP bit in the 'supported' and 'advertising' fields. i225/226 parts
+only support twisted pair copper.
+
+Fixes: 8c5ad0dae93c ("igc: Add ethtool support")
+Signed-off-by: Prasad Koya <prasad@arista.com>
+Acked-by: Sasha Neftin <sasha.neftin@intel.com>
+Tested-by: Naama Meir <naamax.meir@linux.intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/igc/igc_ethtool.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c
+index 0e2cb00622d1a..93bce729be76a 100644
+--- a/drivers/net/ethernet/intel/igc/igc_ethtool.c
++++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c
+@@ -1708,6 +1708,8 @@ static int igc_ethtool_get_link_ksettings(struct net_device *netdev,
+       /* twisted pair */
+       cmd->base.port = PORT_TP;
+       cmd->base.phy_address = hw->phy.addr;
++      ethtool_link_ksettings_add_link_mode(cmd, supported, TP);
++      ethtool_link_ksettings_add_link_mode(cmd, advertising, TP);
+ 
+       /* advertising link modes */
+       if (hw->phy.autoneg_advertised & ADVERTISE_10_HALF)
+-- 
+2.39.2
+
diff --git a/queue-6.4/ionic-remove-warn_on-to-prevent-panic_on_warn.patch b/queue-6.4/ionic-remove-warn_on-to-prevent-panic_on_warn.patch

new file mode 100644 (file)

index 0000000..544ea4c
--- /dev/null
+++ b/queue-6.4/ionic-remove-warn_on-to-prevent-panic_on_warn.patch
@@ -0,0 +1,42 @@
+From 17988b459e31fad01e0ae817b91651d5da7ccab1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 6 Jul 2023 11:20:06 -0700
+Subject: ionic: remove WARN_ON to prevent panic_on_warn
+
+From: Nitya Sunkad <nitya.sunkad@amd.com>
+
+[ Upstream commit abfb2a58a5377ebab717d4362d6180f901b6e5c1 ]
+
+Remove unnecessary early code development check and the WARN_ON
+that it uses.  The irq alloc and free paths have long been
+cleaned up and this check shouldn't have stuck around so long.
+
+Fixes: 77ceb68e29cc ("ionic: Add notifyq support")
+Signed-off-by: Nitya Sunkad <nitya.sunkad@amd.com>
+Signed-off-by: Shannon Nelson <shannon.nelson@amd.com>
+Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/pensando/ionic/ionic_lif.c | 5 -----
+ 1 file changed, 5 deletions(-)
+
+diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+index 957027e546b30..e03a94f2469ab 100644
+--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
++++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+@@ -474,11 +474,6 @@ static void ionic_qcqs_free(struct ionic_lif *lif)
+ static void ionic_link_qcq_interrupts(struct ionic_qcq *src_qcq,
+                                     struct ionic_qcq *n_qcq)
+ {
+-      if (WARN_ON(n_qcq->flags & IONIC_QCQ_F_INTR)) {
+-              ionic_intr_free(n_qcq->cq.lif->ionic, n_qcq->intr.index);
+-              n_qcq->flags &= ~IONIC_QCQ_F_INTR;
+-      }
+-
+       n_qcq->intr.vector = src_qcq->intr.vector;
+       n_qcq->intr.index = src_qcq->intr.index;
+       n_qcq->napi_qcq = src_qcq->napi_qcq;
+-- 
+2.39.2
+
diff --git a/queue-6.4/ipv6-addrconf-fix-a-potential-refcount-underflow-for.patch b/queue-6.4/ipv6-addrconf-fix-a-potential-refcount-underflow-for.patch

new file mode 100644 (file)

index 0000000..6c3f896
--- /dev/null
+++ b/queue-6.4/ipv6-addrconf-fix-a-potential-refcount-underflow-for.patch
@@ -0,0 +1,53 @@
+From b5b491a50e6aee6415aa41b22d508b32edea7c17 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 8 Jul 2023 14:59:10 +0800
+Subject: ipv6/addrconf: fix a potential refcount underflow for idev
+
+From: Ziyang Xuan <william.xuanziyang@huawei.com>
+
+[ Upstream commit 06a0716949c22e2aefb648526580671197151acc ]
+
+Now in addrconf_mod_rs_timer(), reference idev depends on whether
+rs_timer is not pending. Then modify rs_timer timeout.
+
+There is a time gap in [1], during which if the pending rs_timer
+becomes not pending. It will miss to hold idev, but the rs_timer
+is activated. Thus rs_timer callback function addrconf_rs_timer()
+will be executed and put idev later without holding idev. A refcount
+underflow issue for idev can be caused by this.
+
+       if (!timer_pending(&idev->rs_timer))
+               in6_dev_hold(idev);
+                 <--------------[1]
+       mod_timer(&idev->rs_timer, jiffies + when);
+
+To fix the issue, hold idev if mod_timer() return 0.
+
+Fixes: b7b1bfce0bb6 ("ipv6: split duplicate address detection and router solicitation timer")
+Suggested-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv6/addrconf.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
+index 3797917237d03..5affca8e2f53a 100644
+--- a/net/ipv6/addrconf.c
++++ b/net/ipv6/addrconf.c
+@@ -318,9 +318,8 @@ static void addrconf_del_dad_work(struct inet6_ifaddr *ifp)
+ static void addrconf_mod_rs_timer(struct inet6_dev *idev,
+                                 unsigned long when)
+ {
+-      if (!timer_pending(&idev->rs_timer))
++      if (!mod_timer(&idev->rs_timer, jiffies + when))
+               in6_dev_hold(idev);
+-      mod_timer(&idev->rs_timer, jiffies + when);
+ }
+ 
+ static void addrconf_mod_dad_work(struct inet6_ifaddr *ifp,
+-- 
+2.39.2
+
diff --git a/queue-6.4/kernel-trace-fix-cleanup-logic-of-enable_trace_eprob.patch b/queue-6.4/kernel-trace-fix-cleanup-logic-of-enable_trace_eprob.patch

new file mode 100644 (file)

index 0000000..46b7d02
--- /dev/null
+++ b/queue-6.4/kernel-trace-fix-cleanup-logic-of-enable_trace_eprob.patch
@@ -0,0 +1,71 @@
+From 8653c27f9ad4d0d299c6fdc3694facc1e45869ba Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 3 Jul 2023 07:28:53 +0300
+Subject: kernel/trace: Fix cleanup logic of enable_trace_eprobe
+
+From: Tzvetomir Stoyanov (VMware) <tz.stoyanov@gmail.com>
+
+[ Upstream commit cf0a624dc706c306294c14e6b3e7694702f25191 ]
+
+The enable_trace_eprobe() function enables all event probes, attached
+to given trace probe. If an error occurs in enabling one of the event
+probes, all others should be roll backed. There is a bug in that roll
+back logic - instead of all event probes, only the failed one is
+disabled.
+
+Link: https://lore.kernel.org/all/20230703042853.1427493-1-tz.stoyanov@gmail.com/
+
+Reported-by: Dan Carpenter <dan.carpenter@linaro.org>
+Fixes: 7491e2c44278 ("tracing: Add a probe that attaches to trace events")
+Signed-off-by: Tzvetomir Stoyanov (VMware) <tz.stoyanov@gmail.com>
+Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/trace/trace_eprobe.c | 18 ++++++++++++++++--
+ 1 file changed, 16 insertions(+), 2 deletions(-)
+
+diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c
+index 67e854979d53e..3f04f0ffe0d70 100644
+--- a/kernel/trace/trace_eprobe.c
++++ b/kernel/trace/trace_eprobe.c
+@@ -675,6 +675,7 @@ static int enable_trace_eprobe(struct trace_event_call *call,
+       struct trace_eprobe *ep;
+       bool enabled;
+       int ret = 0;
++      int cnt = 0;
+ 
+       tp = trace_probe_primary_from_call(call);
+       if (WARN_ON_ONCE(!tp))
+@@ -698,12 +699,25 @@ static int enable_trace_eprobe(struct trace_event_call *call,
+               if (ret)
+                       break;
+               enabled = true;
++              cnt++;
+       }
+ 
+       if (ret) {
+               /* Failed to enable one of them. Roll back all */
+-              if (enabled)
+-                      disable_eprobe(ep, file->tr);
++              if (enabled) {
++                      /*
++                       * It's a bug if one failed for something other than memory
++                       * not being available but another eprobe succeeded.
++                       */
++                      WARN_ON_ONCE(ret != -ENOMEM);
++
++                      list_for_each_entry(pos, trace_probe_probe_list(tp), list) {
++                              ep = container_of(pos, struct trace_eprobe, tp);
++                              disable_eprobe(ep, file->tr);
++                              if (!--cnt)
++                                      break;
++                      }
++              }
+               if (file)
+                       trace_probe_remove_file(tp, file);
+               else
+-- 
+2.39.2
+
diff --git a/queue-6.4/net-bgmac-postpone-turning-irqs-off-to-avoid-soc-han.patch b/queue-6.4/net-bgmac-postpone-turning-irqs-off-to-avoid-soc-han.patch

new file mode 100644 (file)

index 0000000..ff8137d
--- /dev/null
+++ b/queue-6.4/net-bgmac-postpone-turning-irqs-off-to-avoid-soc-han.patch
@@ -0,0 +1,55 @@
+From da4d7c5cc9088c82294ef515fadc245827befeda Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 7 Jul 2023 08:53:25 +0200
+Subject: net: bgmac: postpone turning IRQs off to avoid SoC hangs
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Rafał Miłecki <rafal@milecki.pl>
+
+[ Upstream commit e7731194fdf085f46d58b1adccfddbd0dfee4873 ]
+
+Turning IRQs off is done by accessing Ethernet controller registers.
+That can't be done until device's clock is enabled. It results in a SoC
+hang otherwise.
+
+This bug remained unnoticed for years as most bootloaders keep all
+Ethernet interfaces turned on. It seems to only affect a niche SoC
+family BCM47189. It has two Ethernet controllers but CFE bootloader uses
+only the first one.
+
+Fixes: 34322615cbaa ("net: bgmac: Mask interrupts during probe")
+Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
+Reviewed-by: Michal Kubiak <michal.kubiak@intel.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/broadcom/bgmac.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
+index 1761df8fb7f96..10c7c232cc4ec 100644
+--- a/drivers/net/ethernet/broadcom/bgmac.c
++++ b/drivers/net/ethernet/broadcom/bgmac.c
+@@ -1492,8 +1492,6 @@ int bgmac_enet_probe(struct bgmac *bgmac)
+ 
+       bgmac->in_init = true;
+ 
+-      bgmac_chip_intrs_off(bgmac);
+-
+       net_dev->irq = bgmac->irq;
+       SET_NETDEV_DEV(net_dev, bgmac->dev);
+       dev_set_drvdata(bgmac->dev, bgmac);
+@@ -1511,6 +1509,8 @@ int bgmac_enet_probe(struct bgmac *bgmac)
+        */
+       bgmac_clk_enable(bgmac, 0);
+ 
++      bgmac_chip_intrs_off(bgmac);
++
+       /* This seems to be fixing IRQ by assigning OOB #6 to the core */
+       if (!(bgmac->feature_flags & BGMAC_FEAT_IDM_MASK)) {
+               if (bgmac->feature_flags & BGMAC_FEAT_IRQ_ID_OOB_6)
+-- 
+2.39.2
+
diff --git a/queue-6.4/net-dsa-felix-make-vsc9959_tas_guard_bands_update-vi.patch b/queue-6.4/net-dsa-felix-make-vsc9959_tas_guard_bands_update-vi.patch

new file mode 100644 (file)

index 0000000..4269156
--- /dev/null
+++ b/queue-6.4/net-dsa-felix-make-vsc9959_tas_guard_bands_update-vi.patch
@@ -0,0 +1,100 @@
+From 9c24e69609c9264256560c1ac114ed3bb51aed5d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 5 Jul 2023 13:44:21 +0300
+Subject: net: dsa: felix: make vsc9959_tas_guard_bands_update() visible to
+ ocelot->ops
+
+From: Vladimir Oltean <vladimir.oltean@nxp.com>
+
+[ Upstream commit c60819149b637d0f9f7f66e110d2a0d90a3993ea ]
+
+In a future change we will need to make
+ocelot_port_update_active_preemptible_tcs() call
+vsc9959_tas_guard_bands_update(), but that is currently not possible,
+since the ocelot switch lib does not have access to functions private to
+the DSA wrapper.
+
+Move the pointer to vsc9959_tas_guard_bands_update() from felix->info
+(which is private to the DSA driver) to ocelot->ops (which is also
+visible to the ocelot switch lib).
+
+Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
+Message-ID: <20230705104422.49025-3-vladimir.oltean@nxp.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: c6efb4ae387c ("net: mscc: ocelot: fix oversize frame dropping for preemptible TCs")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/dsa/ocelot/felix.c         | 5 ++---
+ drivers/net/dsa/ocelot/felix.h         | 1 -
+ drivers/net/dsa/ocelot/felix_vsc9959.c | 2 +-
+ include/soc/mscc/ocelot.h              | 1 +
+ 4 files changed, 4 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c
+index 70c0e2b1936b3..8348da2b3c97a 100644
+--- a/drivers/net/dsa/ocelot/felix.c
++++ b/drivers/net/dsa/ocelot/felix.c
+@@ -1786,14 +1786,13 @@ static int felix_change_mtu(struct dsa_switch *ds, int port, int new_mtu)
+ {
+       struct ocelot *ocelot = ds->priv;
+       struct ocelot_port *ocelot_port = ocelot->ports[port];
+-      struct felix *felix = ocelot_to_felix(ocelot);
+ 
+       ocelot_port_set_maxlen(ocelot, port, new_mtu);
+ 
+       mutex_lock(&ocelot->tas_lock);
+ 
+-      if (ocelot_port->taprio && felix->info->tas_guard_bands_update)
+-              felix->info->tas_guard_bands_update(ocelot, port);
++      if (ocelot_port->taprio && ocelot->ops->tas_guard_bands_update)
++              ocelot->ops->tas_guard_bands_update(ocelot, port);
+ 
+       mutex_unlock(&ocelot->tas_lock);
+ 
+diff --git a/drivers/net/dsa/ocelot/felix.h b/drivers/net/dsa/ocelot/felix.h
+index 96008c046da53..1d4befe7cfe8e 100644
+--- a/drivers/net/dsa/ocelot/felix.h
++++ b/drivers/net/dsa/ocelot/felix.h
+@@ -57,7 +57,6 @@ struct felix_info {
+       void    (*mdio_bus_free)(struct ocelot *ocelot);
+       int     (*port_setup_tc)(struct dsa_switch *ds, int port,
+                                enum tc_setup_type type, void *type_data);
+-      void    (*tas_guard_bands_update)(struct ocelot *ocelot, int port);
+       void    (*port_sched_speed_set)(struct ocelot *ocelot, int port,
+                                       u32 speed);
+       void    (*phylink_mac_config)(struct ocelot *ocelot, int port,
+diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c
+index d172a3e9736c4..219fb672a68d7 100644
+--- a/drivers/net/dsa/ocelot/felix_vsc9959.c
++++ b/drivers/net/dsa/ocelot/felix_vsc9959.c
+@@ -2600,6 +2600,7 @@ static const struct ocelot_ops vsc9959_ops = {
+       .cut_through_fwd        = vsc9959_cut_through_fwd,
+       .tas_clock_adjust       = vsc9959_tas_clock_adjust,
+       .update_stats           = vsc9959_update_stats,
++      .tas_guard_bands_update = vsc9959_tas_guard_bands_update,
+ };
+ 
+ static const struct felix_info felix_info_vsc9959 = {
+@@ -2625,7 +2626,6 @@ static const struct felix_info felix_info_vsc9959 = {
+       .port_modes             = vsc9959_port_modes,
+       .port_setup_tc          = vsc9959_port_setup_tc,
+       .port_sched_speed_set   = vsc9959_sched_speed_set,
+-      .tas_guard_bands_update = vsc9959_tas_guard_bands_update,
+ };
+ 
+ /* The INTB interrupt is shared between for PTP TX timestamp availability
+diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
+index 22aae505c813b..85a726fb006ca 100644
+--- a/include/soc/mscc/ocelot.h
++++ b/include/soc/mscc/ocelot.h
+@@ -663,6 +663,7 @@ struct ocelot_ops {
+                             struct flow_stats *stats);
+       void (*cut_through_fwd)(struct ocelot *ocelot);
+       void (*tas_clock_adjust)(struct ocelot *ocelot);
++      void (*tas_guard_bands_update)(struct ocelot *ocelot, int port);
+       void (*update_stats)(struct ocelot *ocelot);
+ };
+ 
+-- 
+2.39.2
+
diff --git a/queue-6.4/net-dsa-qca8k-add-check-for-skb_copy.patch b/queue-6.4/net-dsa-qca8k-add-check-for-skb_copy.patch

new file mode 100644 (file)

index 0000000..b7f1d32
--- /dev/null
+++ b/queue-6.4/net-dsa-qca8k-add-check-for-skb_copy.patch
@@ -0,0 +1,38 @@
+From b2967cb2e10684574c536ff370379d49e4f2d27b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 10 Jul 2023 09:39:07 +0800
+Subject: net: dsa: qca8k: Add check for skb_copy
+
+From: Jiasheng Jiang <jiasheng@iscas.ac.cn>
+
+[ Upstream commit 87355b7c3da9bfd81935caba0ab763355147f7b0 ]
+
+Add check for the return value of skb_copy in order to avoid NULL pointer
+dereference.
+
+Fixes: 2cd548566384 ("net: dsa: qca8k: add support for phy read/write with mgmt Ethernet")
+Signed-off-by: Jiasheng Jiang <jiasheng@iscas.ac.cn>
+Reviewed-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/dsa/qca/qca8k-8xxx.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/drivers/net/dsa/qca/qca8k-8xxx.c b/drivers/net/dsa/qca/qca8k-8xxx.c
+index 6d5ac7588a691..d775a14784f7e 100644
+--- a/drivers/net/dsa/qca/qca8k-8xxx.c
++++ b/drivers/net/dsa/qca/qca8k-8xxx.c
+@@ -588,6 +588,9 @@ qca8k_phy_eth_busy_wait(struct qca8k_mgmt_eth_data *mgmt_eth_data,
+       bool ack;
+       int ret;
+ 
++      if (!skb)
++              return -ENOMEM;
++
+       reinit_completion(&mgmt_eth_data->rw_done);
+ 
+       /* Increment seq_num and set it in the copy pkt */
+-- 
+2.39.2
+
diff --git a/queue-6.4/net-dsa-removed-unneeded-of_node_put-in-felix_parse_.patch b/queue-6.4/net-dsa-removed-unneeded-of_node_put-in-felix_parse_.patch

new file mode 100644 (file)

index 0000000..a8e7b48
--- /dev/null
+++ b/queue-6.4/net-dsa-removed-unneeded-of_node_put-in-felix_parse_.patch
@@ -0,0 +1,38 @@
+From e63a1d795279609a3479ddd881e2263853d8e98e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 10 Jul 2023 11:18:59 +0800
+Subject: net: dsa: Removed unneeded of_node_put in felix_parse_ports_node
+
+From: Lu Hongfei <luhongfei@vivo.com>
+
+[ Upstream commit 04499f28b40bfc24f20b0e2331008bb90a54a6cf ]
+
+Remove unnecessary of_node_put from the continue path to prevent
+child node from being released twice, which could avoid resource
+leak or other unexpected issues.
+
+Signed-off-by: Lu Hongfei <luhongfei@vivo.com>
+Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
+Fixes: de879a016a94 ("net: dsa: felix: add functionality when not all ports are supported")
+Link: https://lore.kernel.org/r/20230710031859.36784-1-luhongfei@vivo.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/dsa/ocelot/felix.c | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c
+index 8348da2b3c97a..d78b4bd4787e8 100644
+--- a/drivers/net/dsa/ocelot/felix.c
++++ b/drivers/net/dsa/ocelot/felix.c
+@@ -1286,7 +1286,6 @@ static int felix_parse_ports_node(struct felix *felix,
+               if (err < 0) {
+                       dev_info(dev, "Unsupported PHY mode %s on port %d\n",
+                                phy_modes(phy_mode), port);
+-                      of_node_put(child);
+ 
+                       /* Leave port_phy_modes[port] = 0, which is also
+                        * PHY_INTERFACE_MODE_NA. This will perform a
+-- 
+2.39.2
+
diff --git a/queue-6.4/net-fec-increase-the-size-of-tx-ring-and-update-tx_w.patch b/queue-6.4/net-fec-increase-the-size-of-tx-ring-and-update-tx_w.patch

new file mode 100644 (file)

index 0000000..23dfde6
--- /dev/null
+++ b/queue-6.4/net-fec-increase-the-size-of-tx-ring-and-update-tx_w.patch
@@ -0,0 +1,83 @@
+From 04b9c610dfeb96ae1fa770e036bf8e8d46994305 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 6 Jul 2023 16:10:11 +0800
+Subject: net: fec: increase the size of tx ring and update tx_wake_threshold
+
+From: Wei Fang <wei.fang@nxp.com>
+
+[ Upstream commit 56b3c6ba53d0e9649ea5e4089b39cadde13aaef8 ]
+
+When the XDP feature is enabled and with heavy XDP frames to be
+transmitted, there is a considerable probability that available
+tx BDs are insufficient. This will lead to some XDP frames to be
+discarded and the "NOT enough BD for SG!" error log will appear
+in the console (as shown below).
+
+[  160.013112] fec 30be0000.ethernet eth0: NOT enough BD for SG!
+[  160.023116] fec 30be0000.ethernet eth0: NOT enough BD for SG!
+[  160.028926] fec 30be0000.ethernet eth0: NOT enough BD for SG!
+[  160.038946] fec 30be0000.ethernet eth0: NOT enough BD for SG!
+[  160.044758] fec 30be0000.ethernet eth0: NOT enough BD for SG!
+
+In the case of heavy XDP traffic, sometimes the speed of recycling
+tx BDs may be slower than the speed of sending XDP frames. There
+may be several specific reasons, such as the interrupt is not
+responsed in time, the efficiency of the NAPI callback function is
+too low due to all the queues (tx queues and rx queues) share the
+same NAPI, and so on.
+
+After trying various methods, I think that increase the size of tx
+BD ring is simple and effective. Maybe the best resolution is that
+allocate NAPI for each queue to improve the efficiency of the NAPI
+callback, but this change is a bit big and I didn't try this method.
+Perheps this method will be implemented in a future patch.
+
+This patch also updates the tx_wake_threshold of tx ring which is
+related to the size of tx ring in the previous logic. Otherwise,
+the tx_wake_threshold will be too high (403 BDs), which is more
+likely to impact the slow path in the case of heavy XDP traffic,
+because XDP path and slow path share the tx BD rings. According
+to Jakub's suggestion, the tx_wake_threshold is at least equal to
+tx_stop_threshold + 2 * MAX_SKB_FRAGS, if a queue of hundreds of
+entries is overflowing, we should be able to apply a hysteresis
+of a few tens of entries.
+
+Fixes: 6d6b39f180b8 ("net: fec: add initial XDP support")
+Signed-off-by: Wei Fang <wei.fang@nxp.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/freescale/fec.h      | 2 +-
+ drivers/net/ethernet/freescale/fec_main.c | 3 +--
+ 2 files changed, 2 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
+index 8c0226d061fec..63a053dea819d 100644
+--- a/drivers/net/ethernet/freescale/fec.h
++++ b/drivers/net/ethernet/freescale/fec.h
+@@ -355,7 +355,7 @@ struct bufdesc_ex {
+ #define RX_RING_SIZE          (FEC_ENET_RX_FRPPG * FEC_ENET_RX_PAGES)
+ #define FEC_ENET_TX_FRSIZE    2048
+ #define FEC_ENET_TX_FRPPG     (PAGE_SIZE / FEC_ENET_TX_FRSIZE)
+-#define TX_RING_SIZE          512     /* Must be power of two */
++#define TX_RING_SIZE          1024    /* Must be power of two */
+ #define TX_RING_MOD_MASK      511     /*   for this to work */
+ 
+ #define BD_ENET_RX_INT                0x00800000
+diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
+index e6ed36e5daefa..7659888a96917 100644
+--- a/drivers/net/ethernet/freescale/fec_main.c
++++ b/drivers/net/ethernet/freescale/fec_main.c
+@@ -3347,8 +3347,7 @@ static int fec_enet_alloc_queue(struct net_device *ndev)
+               fep->total_tx_ring_size += fep->tx_queue[i]->bd.ring_size;
+ 
+               txq->tx_stop_threshold = FEC_MAX_SKB_DESCS;
+-              txq->tx_wake_threshold =
+-                      (txq->bd.ring_size - txq->tx_stop_threshold) / 2;
++              txq->tx_wake_threshold = FEC_MAX_SKB_DESCS + 2 * MAX_SKB_FRAGS;
+ 
+               txq->tso_hdrs = dma_alloc_coherent(&fep->pdev->dev,
+                                       txq->bd.ring_size * TSO_HEADER_SIZE,
+-- 
+2.39.2
+
diff --git a/queue-6.4/net-fec-recycle-pages-for-transmitted-xdp-frames.patch b/queue-6.4/net-fec-recycle-pages-for-transmitted-xdp-frames.patch

new file mode 100644 (file)

index 0000000..3aaac7a
--- /dev/null
+++ b/queue-6.4/net-fec-recycle-pages-for-transmitted-xdp-frames.patch
@@ -0,0 +1,297 @@
+From 9d13d0210d9db3c4192c3ce53045e1fe7e5217d6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 6 Jul 2023 16:10:10 +0800
+Subject: net: fec: recycle pages for transmitted XDP frames
+
+From: Wei Fang <wei.fang@nxp.com>
+
+[ Upstream commit 20f797399035a8052dbd7297fdbe094079a9482e ]
+
+Once the XDP frames have been successfully transmitted through the
+ndo_xdp_xmit() interface, it's the driver responsibility to free
+the frames so that the page_pool can recycle the pages and reuse
+them. However, this action is not implemented in the fec driver.
+This leads to a user-visible problem that the console will print
+the following warning log.
+
+[  157.568851] page_pool_release_retry() stalled pool shutdown 1389 inflight 60 sec
+[  217.983446] page_pool_release_retry() stalled pool shutdown 1389 inflight 120 sec
+[  278.399006] page_pool_release_retry() stalled pool shutdown 1389 inflight 181 sec
+[  338.812885] page_pool_release_retry() stalled pool shutdown 1389 inflight 241 sec
+[  399.226946] page_pool_release_retry() stalled pool shutdown 1389 inflight 302 sec
+
+Therefore, to solve this issue, we free XDP frames via xdp_return_frame()
+while cleaning the tx BD ring.
+
+Fixes: 6d6b39f180b8 ("net: fec: add initial XDP support")
+Signed-off-by: Wei Fang <wei.fang@nxp.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/freescale/fec.h      |  15 ++-
+ drivers/net/ethernet/freescale/fec_main.c | 148 +++++++++++++++-------
+ 2 files changed, 115 insertions(+), 48 deletions(-)
+
+diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
+index 9939ccafb5566..8c0226d061fec 100644
+--- a/drivers/net/ethernet/freescale/fec.h
++++ b/drivers/net/ethernet/freescale/fec.h
+@@ -544,10 +544,23 @@ enum {
+       XDP_STATS_TOTAL,
+ };
+ 
++enum fec_txbuf_type {
++      FEC_TXBUF_T_SKB,
++      FEC_TXBUF_T_XDP_NDO,
++};
++
++struct fec_tx_buffer {
++      union {
++              struct sk_buff *skb;
++              struct xdp_frame *xdp;
++      };
++      enum fec_txbuf_type type;
++};
++
+ struct fec_enet_priv_tx_q {
+       struct bufdesc_prop bd;
+       unsigned char *tx_bounce[TX_RING_SIZE];
+-      struct  sk_buff *tx_skbuff[TX_RING_SIZE];
++      struct fec_tx_buffer tx_buf[TX_RING_SIZE];
+ 
+       unsigned short tx_stop_threshold;
+       unsigned short tx_wake_threshold;
+diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
+index 40d71be45f604..e6ed36e5daefa 100644
+--- a/drivers/net/ethernet/freescale/fec_main.c
++++ b/drivers/net/ethernet/freescale/fec_main.c
+@@ -397,7 +397,7 @@ static void fec_dump(struct net_device *ndev)
+                       fec16_to_cpu(bdp->cbd_sc),
+                       fec32_to_cpu(bdp->cbd_bufaddr),
+                       fec16_to_cpu(bdp->cbd_datlen),
+-                      txq->tx_skbuff[index]);
++                      txq->tx_buf[index].skb);
+               bdp = fec_enet_get_nextdesc(bdp, &txq->bd);
+               index++;
+       } while (bdp != txq->bd.base);
+@@ -654,7 +654,7 @@ static int fec_enet_txq_submit_skb(struct fec_enet_priv_tx_q *txq,
+ 
+       index = fec_enet_get_bd_index(last_bdp, &txq->bd);
+       /* Save skb pointer */
+-      txq->tx_skbuff[index] = skb;
++      txq->tx_buf[index].skb = skb;
+ 
+       /* Make sure the updates to rest of the descriptor are performed before
+        * transferring ownership.
+@@ -672,9 +672,7 @@ static int fec_enet_txq_submit_skb(struct fec_enet_priv_tx_q *txq,
+ 
+       skb_tx_timestamp(skb);
+ 
+-      /* Make sure the update to bdp and tx_skbuff are performed before
+-       * txq->bd.cur.
+-       */
++      /* Make sure the update to bdp is performed before txq->bd.cur. */
+       wmb();
+       txq->bd.cur = bdp;
+ 
+@@ -862,7 +860,7 @@ static int fec_enet_txq_submit_tso(struct fec_enet_priv_tx_q *txq,
+       }
+ 
+       /* Save skb pointer */
+-      txq->tx_skbuff[index] = skb;
++      txq->tx_buf[index].skb = skb;
+ 
+       skb_tx_timestamp(skb);
+       txq->bd.cur = bdp;
+@@ -952,16 +950,33 @@ static void fec_enet_bd_init(struct net_device *dev)
+               for (i = 0; i < txq->bd.ring_size; i++) {
+                       /* Initialize the BD for every fragment in the page. */
+                       bdp->cbd_sc = cpu_to_fec16(0);
+-                      if (bdp->cbd_bufaddr &&
+-                          !IS_TSO_HEADER(txq, fec32_to_cpu(bdp->cbd_bufaddr)))
+-                              dma_unmap_single(&fep->pdev->dev,
+-                                               fec32_to_cpu(bdp->cbd_bufaddr),
+-                                               fec16_to_cpu(bdp->cbd_datlen),
+-                                               DMA_TO_DEVICE);
+-                      if (txq->tx_skbuff[i]) {
+-                              dev_kfree_skb_any(txq->tx_skbuff[i]);
+-                              txq->tx_skbuff[i] = NULL;
++                      if (txq->tx_buf[i].type == FEC_TXBUF_T_SKB) {
++                              if (bdp->cbd_bufaddr &&
++                                  !IS_TSO_HEADER(txq, fec32_to_cpu(bdp->cbd_bufaddr)))
++                                      dma_unmap_single(&fep->pdev->dev,
++                                                       fec32_to_cpu(bdp->cbd_bufaddr),
++                                                       fec16_to_cpu(bdp->cbd_datlen),
++                                                       DMA_TO_DEVICE);
++                              if (txq->tx_buf[i].skb) {
++                                      dev_kfree_skb_any(txq->tx_buf[i].skb);
++                                      txq->tx_buf[i].skb = NULL;
++                              }
++                      } else {
++                              if (bdp->cbd_bufaddr)
++                                      dma_unmap_single(&fep->pdev->dev,
++                                                       fec32_to_cpu(bdp->cbd_bufaddr),
++                                                       fec16_to_cpu(bdp->cbd_datlen),
++                                                       DMA_TO_DEVICE);
++
++                              if (txq->tx_buf[i].xdp) {
++                                      xdp_return_frame(txq->tx_buf[i].xdp);
++                                      txq->tx_buf[i].xdp = NULL;
++                              }
++
++                              /* restore default tx buffer type: FEC_TXBUF_T_SKB */
++                              txq->tx_buf[i].type = FEC_TXBUF_T_SKB;
+                       }
++
+                       bdp->cbd_bufaddr = cpu_to_fec32(0);
+                       bdp = fec_enet_get_nextdesc(bdp, &txq->bd);
+               }
+@@ -1360,6 +1375,7 @@ static void
+ fec_enet_tx_queue(struct net_device *ndev, u16 queue_id)
+ {
+       struct  fec_enet_private *fep;
++      struct xdp_frame *xdpf;
+       struct bufdesc *bdp;
+       unsigned short status;
+       struct  sk_buff *skb;
+@@ -1387,16 +1403,31 @@ fec_enet_tx_queue(struct net_device *ndev, u16 queue_id)
+ 
+               index = fec_enet_get_bd_index(bdp, &txq->bd);
+ 
+-              skb = txq->tx_skbuff[index];
+-              txq->tx_skbuff[index] = NULL;
+-              if (!IS_TSO_HEADER(txq, fec32_to_cpu(bdp->cbd_bufaddr)))
+-                      dma_unmap_single(&fep->pdev->dev,
+-                                       fec32_to_cpu(bdp->cbd_bufaddr),
+-                                       fec16_to_cpu(bdp->cbd_datlen),
+-                                       DMA_TO_DEVICE);
+-              bdp->cbd_bufaddr = cpu_to_fec32(0);
+-              if (!skb)
+-                      goto skb_done;
++              if (txq->tx_buf[index].type == FEC_TXBUF_T_SKB) {
++                      skb = txq->tx_buf[index].skb;
++                      txq->tx_buf[index].skb = NULL;
++                      if (bdp->cbd_bufaddr &&
++                          !IS_TSO_HEADER(txq, fec32_to_cpu(bdp->cbd_bufaddr)))
++                              dma_unmap_single(&fep->pdev->dev,
++                                               fec32_to_cpu(bdp->cbd_bufaddr),
++                                               fec16_to_cpu(bdp->cbd_datlen),
++                                               DMA_TO_DEVICE);
++                      bdp->cbd_bufaddr = cpu_to_fec32(0);
++                      if (!skb)
++                              goto tx_buf_done;
++              } else {
++                      xdpf = txq->tx_buf[index].xdp;
++                      if (bdp->cbd_bufaddr)
++                              dma_unmap_single(&fep->pdev->dev,
++                                               fec32_to_cpu(bdp->cbd_bufaddr),
++                                               fec16_to_cpu(bdp->cbd_datlen),
++                                               DMA_TO_DEVICE);
++                      bdp->cbd_bufaddr = cpu_to_fec32(0);
++                      if (!xdpf) {
++                              txq->tx_buf[index].type = FEC_TXBUF_T_SKB;
++                              goto tx_buf_done;
++                      }
++              }
+ 
+               /* Check for errors. */
+               if (status & (BD_ENET_TX_HB | BD_ENET_TX_LC |
+@@ -1415,21 +1446,11 @@ fec_enet_tx_queue(struct net_device *ndev, u16 queue_id)
+                               ndev->stats.tx_carrier_errors++;
+               } else {
+                       ndev->stats.tx_packets++;
+-                      ndev->stats.tx_bytes += skb->len;
+-              }
+ 
+-              /* NOTE: SKBTX_IN_PROGRESS being set does not imply it's we who
+-               * are to time stamp the packet, so we still need to check time
+-               * stamping enabled flag.
+-               */
+-              if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS &&
+-                           fep->hwts_tx_en) &&
+-                  fep->bufdesc_ex) {
+-                      struct skb_shared_hwtstamps shhwtstamps;
+-                      struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp;
+-
+-                      fec_enet_hwtstamp(fep, fec32_to_cpu(ebdp->ts), &shhwtstamps);
+-                      skb_tstamp_tx(skb, &shhwtstamps);
++                      if (txq->tx_buf[index].type == FEC_TXBUF_T_SKB)
++                              ndev->stats.tx_bytes += skb->len;
++                      else
++                              ndev->stats.tx_bytes += xdpf->len;
+               }
+ 
+               /* Deferred means some collisions occurred during transmit,
+@@ -1438,10 +1459,32 @@ fec_enet_tx_queue(struct net_device *ndev, u16 queue_id)
+               if (status & BD_ENET_TX_DEF)
+                       ndev->stats.collisions++;
+ 
+-              /* Free the sk buffer associated with this last transmit */
+-              dev_kfree_skb_any(skb);
+-skb_done:
+-              /* Make sure the update to bdp and tx_skbuff are performed
++              if (txq->tx_buf[index].type == FEC_TXBUF_T_SKB) {
++                      /* NOTE: SKBTX_IN_PROGRESS being set does not imply it's we who
++                       * are to time stamp the packet, so we still need to check time
++                       * stamping enabled flag.
++                       */
++                      if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS &&
++                                   fep->hwts_tx_en) && fep->bufdesc_ex) {
++                              struct skb_shared_hwtstamps shhwtstamps;
++                              struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp;
++
++                              fec_enet_hwtstamp(fep, fec32_to_cpu(ebdp->ts), &shhwtstamps);
++                              skb_tstamp_tx(skb, &shhwtstamps);
++                      }
++
++                      /* Free the sk buffer associated with this last transmit */
++                      dev_kfree_skb_any(skb);
++              } else {
++                      xdp_return_frame(xdpf);
++
++                      txq->tx_buf[index].xdp = NULL;
++                      /* restore default tx buffer type: FEC_TXBUF_T_SKB */
++                      txq->tx_buf[index].type = FEC_TXBUF_T_SKB;
++              }
++
++tx_buf_done:
++              /* Make sure the update to bdp and tx_buf are performed
+                * before dirty_tx
+                */
+               wmb();
+@@ -3247,9 +3290,19 @@ static void fec_enet_free_buffers(struct net_device *ndev)
+               for (i = 0; i < txq->bd.ring_size; i++) {
+                       kfree(txq->tx_bounce[i]);
+                       txq->tx_bounce[i] = NULL;
+-                      skb = txq->tx_skbuff[i];
+-                      txq->tx_skbuff[i] = NULL;
+-                      dev_kfree_skb(skb);
++
++                      if (txq->tx_buf[i].type == FEC_TXBUF_T_SKB) {
++                              skb = txq->tx_buf[i].skb;
++                              txq->tx_buf[i].skb = NULL;
++                              dev_kfree_skb(skb);
++                      } else {
++                              if (txq->tx_buf[i].xdp) {
++                                      xdp_return_frame(txq->tx_buf[i].xdp);
++                                      txq->tx_buf[i].xdp = NULL;
++                              }
++
++                              txq->tx_buf[i].type = FEC_TXBUF_T_SKB;
++                      }
+               }
+       }
+ }
+@@ -3809,7 +3862,8 @@ static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep,
+               ebdp->cbd_esc = cpu_to_fec32(estatus);
+       }
+ 
+-      txq->tx_skbuff[index] = NULL;
++      txq->tx_buf[index].type = FEC_TXBUF_T_XDP_NDO;
++      txq->tx_buf[index].xdp = frame;
+ 
+       /* Make sure the updates to rest of the descriptor are performed before
+        * transferring ownership.
+-- 
+2.39.2
+
diff --git a/queue-6.4/net-fec-remove-last_bdp-from-fec_enet_txq_xmit_frame.patch b/queue-6.4/net-fec-remove-last_bdp-from-fec_enet_txq_xmit_frame.patch

new file mode 100644 (file)

index 0000000..d247607
--- /dev/null
+++ b/queue-6.4/net-fec-remove-last_bdp-from-fec_enet_txq_xmit_frame.patch
@@ -0,0 +1,64 @@
+From b9d9804caf6a9776a5772f3c89ce55fe04f60fdc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 29 May 2023 10:26:15 +0800
+Subject: net: fec: remove last_bdp from fec_enet_txq_xmit_frame()
+
+From: Wei Fang <wei.fang@nxp.com>
+
+[ Upstream commit bc638eabfed90fdc798fd5765e67e41abea76152 ]
+
+The last_bdp is initialized to bdp, and both last_bdp and bdp are
+not changed. That is to say that last_bdp and bdp are always equal.
+So bdp can be used directly.
+
+Signed-off-by: Wei Fang <wei.fang@nxp.com>
+Reviewed-by: Simon Horman <simon.horman@corigine.com>
+Link: https://lore.kernel.org/r/20230529022615.669589-1-wei.fang@nxp.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Stable-dep-of: 20f797399035 ("net: fec: recycle pages for transmitted XDP frames")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/freescale/fec_main.c | 6 ++----
+ 1 file changed, 2 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
+index c08331f7da7b3..40d71be45f604 100644
+--- a/drivers/net/ethernet/freescale/fec_main.c
++++ b/drivers/net/ethernet/freescale/fec_main.c
+@@ -3770,7 +3770,7 @@ static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep,
+                                  struct xdp_frame *frame)
+ {
+       unsigned int index, status, estatus;
+-      struct bufdesc *bdp, *last_bdp;
++      struct bufdesc *bdp;
+       dma_addr_t dma_addr;
+       int entries_free;
+ 
+@@ -3782,7 +3782,6 @@ static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep,
+ 
+       /* Fill in a Tx ring entry */
+       bdp = txq->bd.cur;
+-      last_bdp = bdp;
+       status = fec16_to_cpu(bdp->cbd_sc);
+       status &= ~BD_ENET_TX_STATS;
+ 
+@@ -3810,7 +3809,6 @@ static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep,
+               ebdp->cbd_esc = cpu_to_fec32(estatus);
+       }
+ 
+-      index = fec_enet_get_bd_index(last_bdp, &txq->bd);
+       txq->tx_skbuff[index] = NULL;
+ 
+       /* Make sure the updates to rest of the descriptor are performed before
+@@ -3825,7 +3823,7 @@ static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep,
+       bdp->cbd_sc = cpu_to_fec16(status);
+ 
+       /* If this was the last BD in the ring, start at the beginning again. */
+-      bdp = fec_enet_get_nextdesc(last_bdp, &txq->bd);
++      bdp = fec_enet_get_nextdesc(bdp, &txq->bd);
+ 
+       /* Make sure the update to bdp are performed before txq->bd.cur. */
+       dma_wmb();
+-- 
+2.39.2
+
diff --git a/queue-6.4/net-fec-remove-useless-fec_enet_reset_skb.patch b/queue-6.4/net-fec-remove-useless-fec_enet_reset_skb.patch

new file mode 100644 (file)

index 0000000..e5305f4
--- /dev/null
+++ b/queue-6.4/net-fec-remove-useless-fec_enet_reset_skb.patch
@@ -0,0 +1,66 @@
+From f09a2cd78c7abcfe06f26e33813370a6caced0b0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 19 May 2023 10:01:13 +0800
+Subject: net: fec: remove useless fec_enet_reset_skb()
+
+From: Wei Fang <wei.fang@nxp.com>
+
+[ Upstream commit 2ae9c66b04554bf5b3eeaab8c12a0bfb9f28ebde ]
+
+This patch is a cleanup for fec driver. The fec_enet_reset_skb()
+is used to free skb buffers for tx queues and is only invoked in
+fec_restart(). However, fec_enet_bd_init() also resets skb buffers
+and is invoked in fec_restart() too. So fec_enet_reset_skb() is
+redundant and useless.
+
+Signed-off-by: Wei Fang <wei.fang@nxp.com>
+Reviewed-by: Simon Horman <simon.horman@corigine.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: 20f797399035 ("net: fec: recycle pages for transmitted XDP frames")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/freescale/fec_main.c | 21 ---------------------
+ 1 file changed, 21 deletions(-)
+
+diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
+index 38e5b5abe067c..c08331f7da7b3 100644
+--- a/drivers/net/ethernet/freescale/fec_main.c
++++ b/drivers/net/ethernet/freescale/fec_main.c
+@@ -1011,24 +1011,6 @@ static void fec_enet_enable_ring(struct net_device *ndev)
+       }
+ }
+ 
+-static void fec_enet_reset_skb(struct net_device *ndev)
+-{
+-      struct fec_enet_private *fep = netdev_priv(ndev);
+-      struct fec_enet_priv_tx_q *txq;
+-      int i, j;
+-
+-      for (i = 0; i < fep->num_tx_queues; i++) {
+-              txq = fep->tx_queue[i];
+-
+-              for (j = 0; j < txq->bd.ring_size; j++) {
+-                      if (txq->tx_skbuff[j]) {
+-                              dev_kfree_skb_any(txq->tx_skbuff[j]);
+-                              txq->tx_skbuff[j] = NULL;
+-                      }
+-              }
+-      }
+-}
+-
+ /*
+  * This function is called to start or restart the FEC during a link
+  * change, transmit timeout, or to reconfigure the FEC.  The network
+@@ -1071,9 +1053,6 @@ fec_restart(struct net_device *ndev)
+ 
+       fec_enet_enable_ring(ndev);
+ 
+-      /* Reset tx SKB buffers. */
+-      fec_enet_reset_skb(ndev);
+-
+       /* Enable MII mode */
+       if (fep->full_duplex == DUPLEX_FULL) {
+               /* FD enable */
+-- 
+2.39.2
+
diff --git a/queue-6.4/net-mlx5-query-hca_cap_2-only-when-supported.patch b/queue-6.4/net-mlx5-query-hca_cap_2-only-when-supported.patch

new file mode 100644 (file)

index 0000000..98ec5bc
--- /dev/null
+++ b/queue-6.4/net-mlx5-query-hca_cap_2-only-when-supported.patch
@@ -0,0 +1,42 @@
+From a34d2ad002d2b201833052ecee081b78e238b9e3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 20 Jun 2023 14:07:03 +0300
+Subject: net/mlx5: Query hca_cap_2 only when supported
+
+From: Maher Sanalla <msanalla@nvidia.com>
+
+[ Upstream commit 6496357aa5f710eec96f91345b9da1b37c3231f6 ]
+
+On vport enable, where fw's hca caps are queried, the driver queries
+hca_caps_2 without checking if fw truly supports them, causing a false
+failure of vfs vport load and blocking SRIOV enablement on old devices
+such as CX4 where hca_caps_2 support is missing.
+
+Thus, add a check for the said caps support before accessing them.
+
+Fixes: e5b9642a33be ("net/mlx5: E-Switch, Implement devlink port function cmds to control migratable")
+Signed-off-by: Maher Sanalla <msanalla@nvidia.com>
+Reviewed-by: Shay Drory <shayd@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+index 901c53751b0aa..f81c6d8d5e0f4 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+@@ -800,6 +800,9 @@ static int mlx5_esw_vport_caps_get(struct mlx5_eswitch *esw, struct mlx5_vport *
+       hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability);
+       vport->info.roce_enabled = MLX5_GET(cmd_hca_cap, hca_caps, roce);
+ 
++      if (!MLX5_CAP_GEN_MAX(esw->dev, hca_cap_2))
++              goto out_free;
++
+       memset(query_ctx, 0, query_out_sz);
+       err = mlx5_vport_get_other_func_cap(esw->dev, vport->vport, query_ctx,
+                                           MLX5_CAP_GENERAL_2);
+-- 
+2.39.2
+
diff --git a/queue-6.4/net-mlx5-register-a-unique-thermal-zone-per-device.patch b/queue-6.4/net-mlx5-register-a-unique-thermal-zone-per-device.patch

new file mode 100644 (file)

index 0000000..2d0e06e
--- /dev/null
+++ b/queue-6.4/net-mlx5-register-a-unique-thermal-zone-per-device.patch
@@ -0,0 +1,84 @@
+From 52acc5f90291c9483639677ab04e86981279f2ca Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 26 Jun 2023 20:36:41 -0700
+Subject: net/mlx5: Register a unique thermal zone per device
+
+From: Saeed Mahameed <saeedm@nvidia.com>
+
+[ Upstream commit 631079e08aa4a20b73e70de4cf457886194f029f ]
+
+Prior to this patch only one "mlx5" thermal zone could have been
+registered regardless of the number of individual mlx5 devices in the
+system.
+
+To fix this setup a unique name per device to register its own thermal
+zone.
+
+In order to not register a thermal zone for a virtual device (VF/SF) add
+a check for PF device type.
+
+The new name is a concatenation between "mlx5_" and "<PCI_DEV_BDF>", which
+will also help associating a thermal zone with its PCI device.
+
+$ lspci | grep ConnectX
+00:04.0 Ethernet controller: Mellanox Technologies MT2892 Family [ConnectX-6 Dx]
+00:05.0 Ethernet controller: Mellanox Technologies MT2892 Family [ConnectX-6 Dx]
+
+$ cat /sys/devices/virtual/thermal/thermal_zone0/type
+mlx5_0000:00:04.0
+$ cat /sys/devices/virtual/thermal/thermal_zone1/type
+mlx5_0000:00:05.0
+
+Fixes: c1fef618d611 ("net/mlx5: Implement thermal zone")
+CC: Sandipan Patra <spatra@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../net/ethernet/mellanox/mlx5/core/thermal.c | 19 ++++++++++++-------
+ 1 file changed, 12 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/thermal.c b/drivers/net/ethernet/mellanox/mlx5/core/thermal.c
+index e47fa6fb836f1..89a22ff04cb60 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/thermal.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/thermal.c
+@@ -68,14 +68,19 @@ static struct thermal_zone_device_ops mlx5_thermal_ops = {
+ 
+ int mlx5_thermal_init(struct mlx5_core_dev *mdev)
+ {
++      char data[THERMAL_NAME_LENGTH];
+       struct mlx5_thermal *thermal;
+-      struct thermal_zone_device *tzd;
+-      const char *data = "mlx5";
++      int err;
+ 
+-      tzd = thermal_zone_get_zone_by_name(data);
+-      if (!IS_ERR(tzd))
++      if (!mlx5_core_is_pf(mdev) && !mlx5_core_is_ecpf(mdev))
+               return 0;
+ 
++      err = snprintf(data, sizeof(data), "mlx5_%s", dev_name(mdev->device));
++      if (err < 0 || err >= sizeof(data)) {
++              mlx5_core_err(mdev, "Failed to setup thermal zone name, %d\n", err);
++              return -EINVAL;
++      }
++
+       thermal = kzalloc(sizeof(*thermal), GFP_KERNEL);
+       if (!thermal)
+               return -ENOMEM;
+@@ -88,10 +93,10 @@ int mlx5_thermal_init(struct mlx5_core_dev *mdev)
+                                                     &mlx5_thermal_ops,
+                                                     NULL, 0, MLX5_THERMAL_POLL_INT_MSEC);
+       if (IS_ERR(thermal->tzdev)) {
+-              dev_err(mdev->device, "Failed to register thermal zone device (%s) %ld\n",
+-                      data, PTR_ERR(thermal->tzdev));
++              err = PTR_ERR(thermal->tzdev);
++              mlx5_core_err(mdev, "Failed to register thermal zone device (%s) %d\n", data, err);
+               kfree(thermal);
+-              return -EINVAL;
++              return err;
+       }
+ 
+       mdev->thermal = thermal;
+-- 
+2.39.2
+
diff --git a/queue-6.4/net-mlx5e-check-for-not_ready-flag-state-after-locki.patch b/queue-6.4/net-mlx5e-check-for-not_ready-flag-state-after-locki.patch

new file mode 100644 (file)

index 0000000..ea3817a
--- /dev/null
+++ b/queue-6.4/net-mlx5e-check-for-not_ready-flag-state-after-locki.patch
@@ -0,0 +1,133 @@
+From ff417c1989674958a9b8ca169580be9bde998850 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 8 Jun 2023 09:32:10 +0200
+Subject: net/mlx5e: Check for NOT_READY flag state after locking
+
+From: Vlad Buslov <vladbu@nvidia.com>
+
+[ Upstream commit 65e64640e97c0f223e77f9ea69b5a46186b93470 ]
+
+Currently the check for NOT_READY flag is performed before obtaining the
+necessary lock. This opens a possibility for race condition when the flow
+is concurrently removed from unready_flows list by the workqueue task,
+which causes a double-removal from the list and a crash[0]. Fix the issue
+by moving the flag check inside the section protected by
+uplink_priv->unready_flows_lock mutex.
+
+[0]:
+[44376.389654] general protection fault, probably for non-canonical address 0xdead000000000108: 0000 [#1] SMP
+[44376.391665] CPU: 7 PID: 59123 Comm: tc Not tainted 6.4.0-rc4+ #1
+[44376.392984] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
+[44376.395342] RIP: 0010:mlx5e_tc_del_fdb_flow+0xb3/0x340 [mlx5_core]
+[44376.396857] Code: 00 48 8b b8 68 ce 02 00 e8 8a 4d 02 00 4c 8d a8 a8 01 00 00 4c 89 ef e8 8b 79 88 e1 48 8b 83 98 06 00 00 48 8b 93 90 06 00 00 <48> 89 42 08 48 89 10 48 b8 00 01 00 00 00 00 ad de 48 89 83 90 06
+[44376.399167] RSP: 0018:ffff88812cc97570 EFLAGS: 00010246
+[44376.399680] RAX: dead000000000122 RBX: ffff8881088e3800 RCX: ffff8881881bac00
+[44376.400337] RDX: dead000000000100 RSI: ffff88812cc97500 RDI: ffff8881242f71b0
+[44376.401001] RBP: ffff88811cbb0940 R08: 0000000000000400 R09: 0000000000000001
+[44376.401663] R10: 0000000000000001 R11: 0000000000000000 R12: ffff88812c944000
+[44376.402342] R13: ffff8881242f71a8 R14: ffff8881222b4000 R15: 0000000000000000
+[44376.402999] FS:  00007f0451104800(0000) GS:ffff88852cb80000(0000) knlGS:0000000000000000
+[44376.403787] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[44376.404343] CR2: 0000000000489108 CR3: 0000000123a79003 CR4: 0000000000370ea0
+[44376.405004] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+[44376.405665] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+[44376.406339] Call Trace:
+[44376.406651]  <TASK>
+[44376.406939]  ? die_addr+0x33/0x90
+[44376.407311]  ? exc_general_protection+0x192/0x390
+[44376.407795]  ? asm_exc_general_protection+0x22/0x30
+[44376.408292]  ? mlx5e_tc_del_fdb_flow+0xb3/0x340 [mlx5_core]
+[44376.408876]  __mlx5e_tc_del_fdb_peer_flow+0xbc/0xe0 [mlx5_core]
+[44376.409482]  mlx5e_tc_del_flow+0x42/0x210 [mlx5_core]
+[44376.410055]  mlx5e_flow_put+0x25/0x50 [mlx5_core]
+[44376.410529]  mlx5e_delete_flower+0x24b/0x350 [mlx5_core]
+[44376.411043]  tc_setup_cb_reoffload+0x22/0x80
+[44376.411462]  fl_reoffload+0x261/0x2f0 [cls_flower]
+[44376.411907]  ? mlx5e_rep_indr_setup_ft_cb+0x160/0x160 [mlx5_core]
+[44376.412481]  ? mlx5e_rep_indr_setup_ft_cb+0x160/0x160 [mlx5_core]
+[44376.413044]  tcf_block_playback_offloads+0x76/0x170
+[44376.413497]  tcf_block_unbind+0x7b/0xd0
+[44376.413881]  tcf_block_setup+0x17d/0x1c0
+[44376.414269]  tcf_block_offload_cmd.isra.0+0xf1/0x130
+[44376.414725]  tcf_block_offload_unbind+0x43/0x70
+[44376.415153]  __tcf_block_put+0x82/0x150
+[44376.415532]  ingress_destroy+0x22/0x30 [sch_ingress]
+[44376.415986]  qdisc_destroy+0x3b/0xd0
+[44376.416343]  qdisc_graft+0x4d0/0x620
+[44376.416706]  tc_get_qdisc+0x1c9/0x3b0
+[44376.417074]  rtnetlink_rcv_msg+0x29c/0x390
+[44376.419978]  ? rep_movs_alternative+0x3a/0xa0
+[44376.420399]  ? rtnl_calcit.isra.0+0x120/0x120
+[44376.420813]  netlink_rcv_skb+0x54/0x100
+[44376.421192]  netlink_unicast+0x1f6/0x2c0
+[44376.421573]  netlink_sendmsg+0x232/0x4a0
+[44376.421980]  sock_sendmsg+0x38/0x60
+[44376.422328]  ____sys_sendmsg+0x1d0/0x1e0
+[44376.422709]  ? copy_msghdr_from_user+0x6d/0xa0
+[44376.423127]  ___sys_sendmsg+0x80/0xc0
+[44376.423495]  ? ___sys_recvmsg+0x8b/0xc0
+[44376.423869]  __sys_sendmsg+0x51/0x90
+[44376.424226]  do_syscall_64+0x3d/0x90
+[44376.424587]  entry_SYSCALL_64_after_hwframe+0x46/0xb0
+[44376.425046] RIP: 0033:0x7f045134f887
+[44376.425403] Code: 0a 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b9 0f 1f 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 89 54 24 1c 48 89 74 24 10
+[44376.426914] RSP: 002b:00007ffd63a82b98 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
+[44376.427592] RAX: ffffffffffffffda RBX: 000000006481955f RCX: 00007f045134f887
+[44376.428195] RDX: 0000000000000000 RSI: 00007ffd63a82c00 RDI: 0000000000000003
+[44376.428796] RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000000
+[44376.429404] R10: 00007f0451208708 R11: 0000000000000246 R12: 0000000000000001
+[44376.430039] R13: 0000000000409980 R14: 000000000047e538 R15: 0000000000485400
+[44376.430644]  </TASK>
+[44376.430907] Modules linked in: mlx5_ib mlx5_core act_mirred act_tunnel_key cls_flower vxlan dummy sch_ingress openvswitch nsh rpcrdma rdma_ucm ib_iser libiscsi scsi_transport_iscsi ib_umad rdma_cm ib_ipoib iw_cm ib_cm ib_uverbs ib_core xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_nat nf_nat br_netfilter rpcsec_g
+ss_krb5 auth_rpcgss oid_registry overlay zram zsmalloc fuse [last unloaded: mlx5_core]
+[44376.433936] ---[ end trace 0000000000000000 ]---
+[44376.434373] RIP: 0010:mlx5e_tc_del_fdb_flow+0xb3/0x340 [mlx5_core]
+[44376.434951] Code: 00 48 8b b8 68 ce 02 00 e8 8a 4d 02 00 4c 8d a8 a8 01 00 00 4c 89 ef e8 8b 79 88 e1 48 8b 83 98 06 00 00 48 8b 93 90 06 00 00 <48> 89 42 08 48 89 10 48 b8 00 01 00 00 00 00 ad de 48 89 83 90 06
+[44376.436452] RSP: 0018:ffff88812cc97570 EFLAGS: 00010246
+[44376.436924] RAX: dead000000000122 RBX: ffff8881088e3800 RCX: ffff8881881bac00
+[44376.437530] RDX: dead000000000100 RSI: ffff88812cc97500 RDI: ffff8881242f71b0
+[44376.438179] RBP: ffff88811cbb0940 R08: 0000000000000400 R09: 0000000000000001
+[44376.438786] R10: 0000000000000001 R11: 0000000000000000 R12: ffff88812c944000
+[44376.439393] R13: ffff8881242f71a8 R14: ffff8881222b4000 R15: 0000000000000000
+[44376.439998] FS:  00007f0451104800(0000) GS:ffff88852cb80000(0000) knlGS:0000000000000000
+[44376.440714] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[44376.441225] CR2: 0000000000489108 CR3: 0000000123a79003 CR4: 0000000000370ea0
+[44376.441843] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+[44376.442471] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+
+Fixes: ad86755b18d5 ("net/mlx5e: Protect unready flows with dedicated lock")
+Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
+Reviewed-by: Roi Dayan <roid@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+index b9b1da751a3b8..ed05ac8ae1de5 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+@@ -1639,7 +1639,8 @@ static void remove_unready_flow(struct mlx5e_tc_flow *flow)
+       uplink_priv = &rpriv->uplink_priv;
+ 
+       mutex_lock(&uplink_priv->unready_flows_lock);
+-      unready_flow_del(flow);
++      if (flow_flag_test(flow, NOT_READY))
++              unready_flow_del(flow);
+       mutex_unlock(&uplink_priv->unready_flows_lock);
+ }
+ 
+@@ -1932,8 +1933,7 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
+       esw_attr = attr->esw_attr;
+       mlx5e_put_flow_tunnel_id(flow);
+ 
+-      if (flow_flag_test(flow, NOT_READY))
+-              remove_unready_flow(flow);
++      remove_unready_flow(flow);
+ 
+       if (mlx5e_is_offloaded_flow(flow)) {
+               if (flow_flag_test(flow, SLOW))
+-- 
+2.39.2
+
diff --git a/queue-6.4/net-mlx5e-fix-double-free-in-mlx5e_destroy_flow_tabl.patch b/queue-6.4/net-mlx5e-fix-double-free-in-mlx5e_destroy_flow_tabl.patch

new file mode 100644 (file)

index 0000000..2375796
--- /dev/null
+++ b/queue-6.4/net-mlx5e-fix-double-free-in-mlx5e_destroy_flow_tabl.patch
@@ -0,0 +1,38 @@
+From d7e4e0b298f7e025a8d1d477793ef1fc0ddad78a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 28 Jun 2023 08:59:34 +0800
+Subject: net/mlx5e: fix double free in mlx5e_destroy_flow_table
+
+From: Zhengchao Shao <shaozhengchao@huawei.com>
+
+[ Upstream commit 884abe45a9014d0de2e6edb0630dfd64f23f1d1b ]
+
+In function accel_fs_tcp_create_groups(), when the ft->g memory is
+successfully allocated but the 'in' memory fails to be allocated, the
+memory pointed to by ft->g is released once. And in function
+accel_fs_tcp_create_table, mlx5e_destroy_flow_table is called to release
+the memory pointed to by ft->g again. This will cause double free problem.
+
+Fixes: c062d52ac24c ("net/mlx5e: Receive flow steering framework for accelerated TCP flows")
+Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c
+index 88a5aed9d6781..c7d191f66ad1b 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c
+@@ -190,6 +190,7 @@ static int accel_fs_tcp_create_groups(struct mlx5e_flow_table *ft,
+       in = kvzalloc(inlen, GFP_KERNEL);
+       if  (!in || !ft->g) {
+               kfree(ft->g);
++              ft->g = NULL;
+               kvfree(in);
+               return -ENOMEM;
+       }
+-- 
+2.39.2
+
diff --git a/queue-6.4/net-mlx5e-fix-memory-leak-in-mlx5e_fs_tt_redirect_an.patch b/queue-6.4/net-mlx5e-fix-memory-leak-in-mlx5e_fs_tt_redirect_an.patch

new file mode 100644 (file)

index 0000000..652c792
--- /dev/null
+++ b/queue-6.4/net-mlx5e-fix-memory-leak-in-mlx5e_fs_tt_redirect_an.patch
@@ -0,0 +1,51 @@
+From 98b9d970ea12c3f1cdc033d3db131647cf868f8c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 30 Jun 2023 09:49:02 +0800
+Subject: net/mlx5e: fix memory leak in mlx5e_fs_tt_redirect_any_create
+
+From: Zhengchao Shao <shaozhengchao@huawei.com>
+
+[ Upstream commit 3250affdc658557a41df9c5fb567723e421f8bf2 ]
+
+The memory pointed to by the fs->any pointer is not freed in the error
+path of mlx5e_fs_tt_redirect_any_create, which can lead to a memory leak.
+Fix by freeing the memory in the error path, thereby making the error path
+identical to mlx5e_fs_tt_redirect_any_destroy().
+
+Fixes: 0f575c20bf06 ("net/mlx5e: Introduce Flow Steering ANY API")
+Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
+Reviewed-by: Simon Horman <simon.horman@corigine.com>
+Reviewed-by: Rahul Rameshbabu <rrameshbabu@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c
+index 03cb79adf912f..be83ad9db82a4 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c
+@@ -594,7 +594,7 @@ int mlx5e_fs_tt_redirect_any_create(struct mlx5e_flow_steering *fs)
+ 
+       err = fs_any_create_table(fs);
+       if (err)
+-              return err;
++              goto err_free_any;
+ 
+       err = fs_any_enable(fs);
+       if (err)
+@@ -606,8 +606,8 @@ int mlx5e_fs_tt_redirect_any_create(struct mlx5e_flow_steering *fs)
+ 
+ err_destroy_table:
+       fs_any_destroy_table(fs_any);
+-
+-      kfree(fs_any);
++err_free_any:
+       mlx5e_fs_set_any(fs, NULL);
++      kfree(fs_any);
+       return err;
+ }
+-- 
+2.39.2
+
diff --git a/queue-6.4/net-mlx5e-fix-memory-leak-in-mlx5e_ptp_open.patch b/queue-6.4/net-mlx5e-fix-memory-leak-in-mlx5e_ptp_open.patch

new file mode 100644 (file)

index 0000000..5fdef13
--- /dev/null
+++ b/queue-6.4/net-mlx5e-fix-memory-leak-in-mlx5e_ptp_open.patch
@@ -0,0 +1,44 @@
+From c7fe10d9e258be469ad14d38f7e422c7c5b53fc7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 30 Jun 2023 09:49:03 +0800
+Subject: net/mlx5e: fix memory leak in mlx5e_ptp_open
+
+From: Zhengchao Shao <shaozhengchao@huawei.com>
+
+[ Upstream commit d543b649ffe58a0cb4b6948b3305069c5980a1fa ]
+
+When kvzalloc_node or kvzalloc failed in mlx5e_ptp_open, the memory
+pointed by "c" or "cparams" is not freed, which can lead to a memory
+leak. Fix by freeing the array in the error path.
+
+Fixes: 145e5637d941 ("net/mlx5e: Add TX PTP port object support")
+Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
+Reviewed-by: Rahul Rameshbabu <rrameshbabu@nvidia.com>
+Reviewed-by: Gal Pressman <gal@nvidia.com>
+Reviewed-by: Simon Horman <simon.horman@corigine.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
+index 3cbebfba582bd..b0b429a0321ed 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
+@@ -729,8 +729,10 @@ int mlx5e_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params,
+ 
+       c = kvzalloc_node(sizeof(*c), GFP_KERNEL, dev_to_node(mlx5_core_dma_dev(mdev)));
+       cparams = kvzalloc(sizeof(*cparams), GFP_KERNEL);
+-      if (!c || !cparams)
+-              return -ENOMEM;
++      if (!c || !cparams) {
++              err = -ENOMEM;
++              goto err_free;
++      }
+ 
+       c->priv     = priv;
+       c->mdev     = priv->mdev;
+-- 
+2.39.2
+
diff --git a/queue-6.4/net-mlx5e-rx-fix-flush-and-close-release-flow-of-reg.patch b/queue-6.4/net-mlx5e-rx-fix-flush-and-close-release-flow-of-reg.patch

new file mode 100644 (file)

index 0000000..53066e5
--- /dev/null
+++ b/queue-6.4/net-mlx5e-rx-fix-flush-and-close-release-flow-of-reg.patch
@@ -0,0 +1,52 @@
+From f7c41e83186a2321edeb0617b48d595a94c0f87b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 May 2023 21:18:53 +0300
+Subject: net/mlx5e: RX, Fix flush and close release flow of regular rq for
+ legacy rq
+
+From: Dragos Tatulea <dtatulea@nvidia.com>
+
+[ Upstream commit 2e2d1965794d22fbe86df45bf4f933216743577d ]
+
+Regular (non-XSK) RQs get flushed on XSK setup and re-activated on XSK
+close. If the same regular RQ is closed (a config change for example)
+soon after the XSK close, a double release occurs because the missing
+wqes get released a second time.
+
+Fixes: 3f93f82988bc ("net/mlx5e: RX, Defer page release in legacy rq for better recycling")
+Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
+Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 12 ++++++++++--
+ 1 file changed, 10 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+index 69634829558e2..111f6a4a64b64 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+@@ -390,10 +390,18 @@ static void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix)
+ {
+       struct mlx5e_wqe_frag_info *wi = get_frag(rq, ix);
+ 
+-      if (rq->xsk_pool)
++      if (rq->xsk_pool) {
+               mlx5e_xsk_free_rx_wqe(wi);
+-      else
++      } else {
+               mlx5e_free_rx_wqe(rq, wi);
++
++              /* Avoid a second release of the wqe pages: dealloc is called
++               * for the same missing wqes on regular RQ flush and on regular
++               * RQ close. This happens when XSK RQs come into play.
++               */
++              for (int i = 0; i < rq->wqe.info.num_frags; i++, wi++)
++                      wi->flags |= BIT(MLX5E_WQE_FRAG_SKIP_RELEASE);
++      }
+ }
+ 
+ static void mlx5e_xsk_free_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk)
+-- 
+2.39.2
+
diff --git a/queue-6.4/net-mlx5e-rx-fix-page_pool-page-fragment-tracking-fo.patch b/queue-6.4/net-mlx5e-rx-fix-page_pool-page-fragment-tracking-fo.patch

new file mode 100644 (file)

index 0000000..e2f5dfe
--- /dev/null
+++ b/queue-6.4/net-mlx5e-rx-fix-page_pool-page-fragment-tracking-fo.patch
@@ -0,0 +1,124 @@
+From 59ad05f1f9dbd8088f1ca0e32352df75c7c095a6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 31 May 2023 21:18:49 +0300
+Subject: net/mlx5e: RX, Fix page_pool page fragment tracking for XDP
+
+From: Dragos Tatulea <dtatulea@nvidia.com>
+
+[ Upstream commit 7abd955a58fb0fcd4e756fa2065c03ae488fcfa7 ]
+
+Currently mlx5e releases pages directly to the page_pool for XDP_TX and
+does page fragment counting for XDP_REDIRECT. RX pages from the
+page_pool are leaking on XDP_REDIRECT because the xdp core will release
+only one fragment out of MLX5E_PAGECNT_BIAS_MAX and subsequently the page
+is marked as "skip release" which avoids the driver release.
+
+A fix would be to take an extra fragment for XDP_REDIRECT and not set the
+"skip release" bit so that the release on the driver side can handle the
+remaining bias fragments. But this would be a shortsighted solution.
+Instead, this patch converges the two XDP paths (XDP_TX and XDP_REDIRECT) to
+always do fragment tracking. The "skip release" bit is no longer
+necessary for XDP.
+
+Fixes: 6f5742846053 ("net/mlx5e: RX, Enable skb page recycling through the page_pool")
+Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
+Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../net/ethernet/mellanox/mlx5/core/en/xdp.c  |  3 +-
+ .../net/ethernet/mellanox/mlx5/core/en_rx.c   | 32 +++++++------------
+ 2 files changed, 13 insertions(+), 22 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+index f0e6095809faf..40589cebb7730 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+@@ -662,8 +662,7 @@ static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq,
+                               /* No need to check ((page->pp_magic & ~0x3UL) == PP_SIGNATURE)
+                                * as we know this is a page_pool page.
+                                */
+-                              page_pool_put_defragged_page(page->pp,
+-                                                           page, -1, true);
++                              page_pool_recycle_direct(page->pp, page);
+                       } while (++n < num);
+ 
+                       break;
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+index 111f6a4a64b64..08e08489f4220 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+@@ -1753,11 +1753,11 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi
+ 
+       prog = rcu_dereference(rq->xdp_prog);
+       if (prog && mlx5e_xdp_handle(rq, prog, &mxbuf)) {
+-              if (test_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) {
++              if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) {
+                       struct mlx5e_wqe_frag_info *pwi;
+ 
+                       for (pwi = head_wi; pwi < wi; pwi++)
+-                              pwi->flags |= BIT(MLX5E_WQE_FRAG_SKIP_RELEASE);
++                              pwi->frag_page->frags++;
+               }
+               return NULL; /* page/packet was consumed by XDP */
+       }
+@@ -1827,12 +1827,8 @@ static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
+                             rq, wi, cqe, cqe_bcnt);
+       if (!skb) {
+               /* probably for XDP */
+-              if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) {
+-                      /* do not return page to cache,
+-                       * it will be returned on XDP_TX completion.
+-                       */
+-                      wi->flags |= BIT(MLX5E_WQE_FRAG_SKIP_RELEASE);
+-              }
++              if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))
++                      wi->frag_page->frags++;
+               goto wq_cyc_pop;
+       }
+ 
+@@ -1878,12 +1874,8 @@ static void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
+                             rq, wi, cqe, cqe_bcnt);
+       if (!skb) {
+               /* probably for XDP */
+-              if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) {
+-                      /* do not return page to cache,
+-                       * it will be returned on XDP_TX completion.
+-                       */
+-                      wi->flags |= BIT(MLX5E_WQE_FRAG_SKIP_RELEASE);
+-              }
++              if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))
++                      wi->frag_page->frags++;
+               goto wq_cyc_pop;
+       }
+ 
+@@ -2062,12 +2054,12 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
+       if (prog) {
+               if (mlx5e_xdp_handle(rq, prog, &mxbuf)) {
+                       if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) {
+-                              int i;
++                              struct mlx5e_frag_page *pfp;
++
++                              for (pfp = head_page; pfp < frag_page; pfp++)
++                                      pfp->frags++;
+ 
+-                              for (i = 0; i < sinfo->nr_frags; i++)
+-                                      /* non-atomic */
+-                                      __set_bit(page_idx + i, wi->skip_release_bitmap);
+-                              return NULL;
++                              wi->linear_page.frags++;
+                       }
+                       mlx5e_page_release_fragmented(rq, &wi->linear_page);
+                       return NULL; /* page/packet was consumed by XDP */
+@@ -2165,7 +2157,7 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
+                                cqe_bcnt, &mxbuf);
+               if (mlx5e_xdp_handle(rq, prog, &mxbuf)) {
+                       if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))
+-                              __set_bit(page_idx, wi->skip_release_bitmap); /* non-atomic */
++                              frag_page->frags++;
+                       return NULL; /* page/packet was consumed by XDP */
+               }
+ 
+-- 
+2.39.2
+
diff --git a/queue-6.4/net-mlx5e-tc-ct-offload-ct-clear-only-once.patch b/queue-6.4/net-mlx5e-tc-ct-offload-ct-clear-only-once.patch

new file mode 100644 (file)

index 0000000..596630e
--- /dev/null
+++ b/queue-6.4/net-mlx5e-tc-ct-offload-ct-clear-only-once.patch
@@ -0,0 +1,92 @@
+From e31e91b5b2ccba22c8394e1c6632516613d95bbb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 4 Jun 2023 12:45:38 +0300
+Subject: net/mlx5e: TC, CT: Offload ct clear only once
+
+From: Yevgeny Kliteynik <kliteyn@nvidia.com>
+
+[ Upstream commit f7a485115ad4cfc560833942014bf791abf1f827 ]
+
+Non-clear CT action causes a flow rule split, while CT clear action
+doesn't and is just a header-rewrite to the current flow rule.
+But ct offload is done in post_parse and is per ct action instance,
+so ct clear offload is parsed multiple times, while its deleted once.
+
+Fix this by post_parsing the ct action only once per flow attribute
+(which is per flow rule) by using a offloaded ct_attr flag.
+
+Fixes: 08fe94ec5f77 ("net/mlx5e: TC, Remove special handling of CT action")
+Signed-off-by: Paul Blakey <paulb@nvidia.com>
+Signed-off-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c | 14 +++++++++++---
+ drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h |  1 +
+ 2 files changed, 12 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
+index a254e728ac954..fadfa8b50bebe 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
+@@ -1545,7 +1545,8 @@ mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
+ 
+       attr->ct_attr.ct_action |= act->ct.action; /* So we can have clear + ct */
+       attr->ct_attr.zone = act->ct.zone;
+-      attr->ct_attr.nf_ft = act->ct.flow_table;
++      if (!(act->ct.action & TCA_CT_ACT_CLEAR))
++              attr->ct_attr.nf_ft = act->ct.flow_table;
+       attr->ct_attr.act_miss_cookie = act->miss_cookie;
+ 
+       return 0;
+@@ -1990,6 +1991,9 @@ mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv, struct mlx5_flow_attr *att
+       if (!priv)
+               return -EOPNOTSUPP;
+ 
++      if (attr->ct_attr.offloaded)
++              return 0;
++
+       if (attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR) {
+               err = mlx5_tc_ct_entry_set_registers(priv, &attr->parse_attr->mod_hdr_acts,
+                                                    0, 0, 0, 0);
+@@ -1999,11 +2003,15 @@ mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv, struct mlx5_flow_attr *att
+               attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+       }
+ 
+-      if (!attr->ct_attr.nf_ft) /* means only ct clear action, and not ct_clear,ct() */
++      if (!attr->ct_attr.nf_ft) { /* means only ct clear action, and not ct_clear,ct() */
++              attr->ct_attr.offloaded = true;
+               return 0;
++      }
+ 
+       mutex_lock(&priv->control_lock);
+       err = __mlx5_tc_ct_flow_offload(priv, attr);
++      if (!err)
++              attr->ct_attr.offloaded = true;
+       mutex_unlock(&priv->control_lock);
+ 
+       return err;
+@@ -2021,7 +2029,7 @@ void
+ mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv,
+                      struct mlx5_flow_attr *attr)
+ {
+-      if (!attr->ct_attr.ft) /* no ct action, return */
++      if (!attr->ct_attr.offloaded) /* no ct action, return */
+               return;
+       if (!attr->ct_attr.nf_ft) /* means only ct clear action, and not ct_clear,ct() */
+               return;
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h
+index 8e9316fa46d4b..b66c5f98067f7 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h
+@@ -29,6 +29,7 @@ struct mlx5_ct_attr {
+       u32 ct_labels_id;
+       u32 act_miss_mapping;
+       u64 act_miss_cookie;
++      bool offloaded;
+       struct mlx5_ct_ft *ft;
+ };
+ 
+-- 
+2.39.2
+
diff --git a/queue-6.4/net-mscc-ocelot-fix-oversize-frame-dropping-for-pree.patch b/queue-6.4/net-mscc-ocelot-fix-oversize-frame-dropping-for-pree.patch

new file mode 100644 (file)

index 0000000..50a9344
--- /dev/null
+++ b/queue-6.4/net-mscc-ocelot-fix-oversize-frame-dropping-for-pree.patch
@@ -0,0 +1,133 @@
+From fea32e0ddf5b2d1794d710488bd9e12dea65d7ea Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 5 Jul 2023 13:44:22 +0300
+Subject: net: mscc: ocelot: fix oversize frame dropping for preemptible TCs
+
+From: Vladimir Oltean <vladimir.oltean@nxp.com>
+
+[ Upstream commit c6efb4ae387c79bf0d4da286108c810b7b40de3c ]
+
+This switch implements Hold/Release in a strange way, with no control
+from the user as required by IEEE 802.1Q-2018 through Set-And-Hold-MAC
+and Set-And-Release-MAC, but rather, it emits HOLD requests implicitly
+based on the schedule.
+
+Namely, when the gate of a preemptible TC is about to close (actually
+QSYS::PREEMPTION_CFG.HOLD_ADVANCE octet times in advance of this event),
+the QSYS seems to emit a HOLD request pulse towards the MAC which
+preempts the currently transmitted packet, and further packets are held
+back in the queue system.
+
+This allows large frames to be squeezed through small time slots,
+because HOLD requests initiated by the gate events result in the frame
+being segmented in multiple fragments, the bit time of which is equal to
+the size of the time slot.
+
+It has been reported that the vsc9959_tas_guard_bands_update() logic
+breaks this, because it doesn't take preemptible TCs into account, and
+enables oversized frame dropping when the time slot doesn't allow a full
+MTU to be sent, but it does allow 2*minFragSize to be sent (128B).
+Packets larger than 128B are dropped instead of being sent in multiple
+fragments.
+
+Confusingly, the manual says:
+
+| For guard band, SDU calculation of a traffic class of a port, if
+| preemption is enabled (through 'QSYS::PREEMPTION_CFG.P_QUEUES') then
+| QSYS::PREEMPTION_CFG.HOLD_ADVANCE is used, otherwise
+| QSYS::QMAXSDU_CFG_*.QMAXSDU_* is used.
+
+but this only refers to the static guard band durations, and the
+QMAXSDU_CFG_* registers have dual purpose - the other being oversized
+frame dropping, which takes place irrespective of whether frames are
+preemptible or express.
+
+So, to fix the problem, we need to call vsc9959_tas_guard_bands_update()
+from ocelot_port_update_active_preemptible_tcs(), and modify the guard
+band logic to consider a different (lower) oversize limit for
+preemptible traffic classes.
+
+Fixes: 403ffc2c34de ("net: mscc: ocelot: add support for preemptible traffic classes")
+Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
+Message-ID: <20230705104422.49025-4-vladimir.oltean@nxp.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/dsa/ocelot/felix_vsc9959.c | 21 +++++++++++++++++----
+ drivers/net/ethernet/mscc/ocelot_mm.c  |  7 +++++--
+ 2 files changed, 22 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c
+index 219fb672a68d7..bd11f9fb95e54 100644
+--- a/drivers/net/dsa/ocelot/felix_vsc9959.c
++++ b/drivers/net/dsa/ocelot/felix_vsc9959.c
+@@ -1221,11 +1221,13 @@ static u32 vsc9959_tas_tc_max_sdu(struct tc_taprio_qopt_offload *taprio, int tc)
+ static void vsc9959_tas_guard_bands_update(struct ocelot *ocelot, int port)
+ {
+       struct ocelot_port *ocelot_port = ocelot->ports[port];
++      struct ocelot_mm_state *mm = &ocelot->mm[port];
+       struct tc_taprio_qopt_offload *taprio;
+       u64 min_gate_len[OCELOT_NUM_TC];
++      u32 val, maxlen, add_frag_size;
++      u64 needed_min_frag_time_ps;
+       int speed, picos_per_byte;
+       u64 needed_bit_time_ps;
+-      u32 val, maxlen;
+       u8 tas_speed;
+       int tc;
+ 
+@@ -1265,9 +1267,18 @@ static void vsc9959_tas_guard_bands_update(struct ocelot *ocelot, int port)
+        */
+       needed_bit_time_ps = (u64)(maxlen + 24) * picos_per_byte;
+ 
++      /* Preemptible TCs don't need to pass a full MTU, the port will
++       * automatically emit a HOLD request when a preemptible TC gate closes
++       */
++      val = ocelot_read_rix(ocelot, QSYS_PREEMPTION_CFG, port);
++      add_frag_size = QSYS_PREEMPTION_CFG_MM_ADD_FRAG_SIZE_X(val);
++      needed_min_frag_time_ps = picos_per_byte *
++              (u64)(24 + 2 * ethtool_mm_frag_size_add_to_min(add_frag_size));
++
+       dev_dbg(ocelot->dev,
+-              "port %d: max frame size %d needs %llu ps at speed %d\n",
+-              port, maxlen, needed_bit_time_ps, speed);
++              "port %d: max frame size %d needs %llu ps, %llu ps for mPackets at speed %d\n",
++              port, maxlen, needed_bit_time_ps, needed_min_frag_time_ps,
++              speed);
+ 
+       vsc9959_tas_min_gate_lengths(taprio, min_gate_len);
+ 
+@@ -1281,7 +1292,9 @@ static void vsc9959_tas_guard_bands_update(struct ocelot *ocelot, int port)
+               remaining_gate_len_ps =
+                       vsc9959_tas_remaining_gate_len_ps(min_gate_len[tc]);
+ 
+-              if (remaining_gate_len_ps > needed_bit_time_ps) {
++              if ((mm->active_preemptible_tcs & BIT(tc)) ?
++                  remaining_gate_len_ps > needed_min_frag_time_ps :
++                  remaining_gate_len_ps > needed_bit_time_ps) {
+                       /* Setting QMAXSDU_CFG to 0 disables oversized frame
+                        * dropping.
+                        */
+diff --git a/drivers/net/ethernet/mscc/ocelot_mm.c b/drivers/net/ethernet/mscc/ocelot_mm.c
+index fb3145118d686..99b29d1e62449 100644
+--- a/drivers/net/ethernet/mscc/ocelot_mm.c
++++ b/drivers/net/ethernet/mscc/ocelot_mm.c
+@@ -67,10 +67,13 @@ void ocelot_port_update_active_preemptible_tcs(struct ocelot *ocelot, int port)
+               val = mm->preemptible_tcs;
+ 
+       /* Cut through switching doesn't work for preemptible priorities,
+-       * so first make sure it is disabled.
++       * so first make sure it is disabled. Also, changing the preemptible
++       * TCs affects the oversized frame dropping logic, so that needs to be
++       * re-triggered. And since tas_guard_bands_update() also implicitly
++       * calls cut_through_fwd(), we don't need to explicitly call it.
+        */
+       mm->active_preemptible_tcs = val;
+-      ocelot->ops->cut_through_fwd(ocelot);
++      ocelot->ops->tas_guard_bands_update(ocelot, port);
+ 
+       dev_dbg(ocelot->dev,
+               "port %d %s/%s, MM TX %s, preemptible TCs 0x%x, active 0x%x\n",
+-- 
+2.39.2
+
diff --git a/queue-6.4/net-mvneta-fix-txq_map-in-case-of-txq_number-1.patch b/queue-6.4/net-mvneta-fix-txq_map-in-case-of-txq_number-1.patch

new file mode 100644 (file)

index 0000000..9d3a6bd
--- /dev/null
+++ b/queue-6.4/net-mvneta-fix-txq_map-in-case-of-txq_number-1.patch
@@ -0,0 +1,48 @@
+From bbbecf69e697b454d49b4534c8578fac9b3928f1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 5 Jul 2023 07:37:12 +0200
+Subject: net: mvneta: fix txq_map in case of txq_number==1
+
+From: Klaus Kudielka <klaus.kudielka@gmail.com>
+
+[ Upstream commit 21327f81db6337c8843ce755b01523c7d3df715b ]
+
+If we boot with mvneta.txq_number=1, the txq_map is set incorrectly:
+MVNETA_CPU_TXQ_ACCESS(1) refers to TX queue 1, but only TX queue 0 is
+initialized. Fix this.
+
+Fixes: 50bf8cb6fc9c ("net: mvneta: Configure XPS support")
+Signed-off-by: Klaus Kudielka <klaus.kudielka@gmail.com>
+Reviewed-by: Michal Kubiak <michal.kubiak@intel.com>
+Link: https://lore.kernel.org/r/20230705053712.3914-1-klaus.kudielka@gmail.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/marvell/mvneta.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
+index 2cad76d0a50ef..4401fad31fb98 100644
+--- a/drivers/net/ethernet/marvell/mvneta.c
++++ b/drivers/net/ethernet/marvell/mvneta.c
+@@ -1505,7 +1505,7 @@ static void mvneta_defaults_set(struct mvneta_port *pp)
+                        */
+                       if (txq_number == 1)
+                               txq_map = (cpu == pp->rxq_def) ?
+-                                      MVNETA_CPU_TXQ_ACCESS(1) : 0;
++                                      MVNETA_CPU_TXQ_ACCESS(0) : 0;
+ 
+               } else {
+                       txq_map = MVNETA_CPU_TXQ_ACCESS_ALL_MASK;
+@@ -4295,7 +4295,7 @@ static void mvneta_percpu_elect(struct mvneta_port *pp)
+                */
+               if (txq_number == 1)
+                       txq_map = (cpu == elected_cpu) ?
+-                              MVNETA_CPU_TXQ_ACCESS(1) : 0;
++                              MVNETA_CPU_TXQ_ACCESS(0) : 0;
+               else
+                       txq_map = mvreg_read(pp, MVNETA_CPU_MAP(cpu)) &
+                               MVNETA_CPU_TXQ_ACCESS_ALL_MASK;
+-- 
+2.39.2
+
diff --git a/queue-6.4/net-prevent-skb-corruption-on-frag-list-segmentation.patch b/queue-6.4/net-prevent-skb-corruption-on-frag-list-segmentation.patch

new file mode 100644 (file)

index 0000000..197960b
--- /dev/null
+++ b/queue-6.4/net-prevent-skb-corruption-on-frag-list-segmentation.patch
@@ -0,0 +1,102 @@
+From e38314d12545f455b99b83eca2878449aefaef47 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 7 Jul 2023 10:11:10 +0200
+Subject: net: prevent skb corruption on frag list segmentation
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+[ Upstream commit c329b261afe71197d9da83c1f18eb45a7e97e089 ]
+
+Ian reported several skb corruptions triggered by rx-gro-list,
+collecting different oops alike:
+
+[   62.624003] BUG: kernel NULL pointer dereference, address: 00000000000000c0
+[   62.631083] #PF: supervisor read access in kernel mode
+[   62.636312] #PF: error_code(0x0000) - not-present page
+[   62.641541] PGD 0 P4D 0
+[   62.644174] Oops: 0000 [#1] PREEMPT SMP NOPTI
+[   62.648629] CPU: 1 PID: 913 Comm: napi/eno2-79 Not tainted 6.4.0 #364
+[   62.655162] Hardware name: Supermicro Super Server/A2SDi-12C-HLN4F, BIOS 1.7a 10/13/2022
+[   62.663344] RIP: 0010:__udp_gso_segment (./include/linux/skbuff.h:2858
+./include/linux/udp.h:23 net/ipv4/udp_offload.c:228 net/ipv4/udp_offload.c:261
+net/ipv4/udp_offload.c:277)
+[   62.687193] RSP: 0018:ffffbd3a83b4f868 EFLAGS: 00010246
+[   62.692515] RAX: 00000000000000ce RBX: 0000000000000000 RCX: 0000000000000000
+[   62.699743] RDX: ffffa124def8a000 RSI: 0000000000000079 RDI: ffffa125952a14d4
+[   62.706970] RBP: ffffa124def8a000 R08: 0000000000000022 R09: 00002000001558c9
+[   62.714199] R10: 0000000000000000 R11: 00000000be554639 R12: 00000000000000e2
+[   62.721426] R13: ffffa125952a1400 R14: ffffa125952a1400 R15: 00002000001558c9
+[   62.728654] FS:  0000000000000000(0000) GS:ffffa127efa40000(0000)
+knlGS:0000000000000000
+[   62.736852] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[   62.742702] CR2: 00000000000000c0 CR3: 00000001034b0000 CR4: 00000000003526e0
+[   62.749948] Call Trace:
+[   62.752498]  <TASK>
+[   62.779267] inet_gso_segment (net/ipv4/af_inet.c:1398)
+[   62.787605] skb_mac_gso_segment (net/core/gro.c:141)
+[   62.791906] __skb_gso_segment (net/core/dev.c:3403 (discriminator 2))
+[   62.800492] validate_xmit_skb (./include/linux/netdevice.h:4862
+net/core/dev.c:3659)
+[   62.804695] validate_xmit_skb_list (net/core/dev.c:3710)
+[   62.809158] sch_direct_xmit (net/sched/sch_generic.c:330)
+[   62.813198] __dev_queue_xmit (net/core/dev.c:3805 net/core/dev.c:4210)
+net/netfilter/core.c:626)
+[   62.821093] br_dev_queue_push_xmit (net/bridge/br_forward.c:55)
+[   62.825652] maybe_deliver (net/bridge/br_forward.c:193)
+[   62.829420] br_flood (net/bridge/br_forward.c:233)
+[   62.832758] br_handle_frame_finish (net/bridge/br_input.c:215)
+[   62.837403] br_handle_frame (net/bridge/br_input.c:298
+net/bridge/br_input.c:416)
+[   62.851417] __netif_receive_skb_core.constprop.0 (net/core/dev.c:5387)
+[   62.866114] __netif_receive_skb_list_core (net/core/dev.c:5570)
+[   62.871367] netif_receive_skb_list_internal (net/core/dev.c:5638
+net/core/dev.c:5727)
+[   62.876795] napi_complete_done (./include/linux/list.h:37
+./include/net/gro.h:434 ./include/net/gro.h:429 net/core/dev.c:6067)
+[   62.881004] ixgbe_poll (drivers/net/ethernet/intel/ixgbe/ixgbe_main.c:3191)
+[   62.893534] __napi_poll (net/core/dev.c:6498)
+[   62.897133] napi_threaded_poll (./include/linux/netpoll.h:89
+net/core/dev.c:6640)
+[   62.905276] kthread (kernel/kthread.c:379)
+[   62.913435] ret_from_fork (arch/x86/entry/entry_64.S:314)
+[   62.917119]  </TASK>
+
+In the critical scenario, rx-gro-list GRO-ed packets are fed, via a
+bridge, both to the local input path and to an egress device (tun).
+
+The segmentation of such packets unsafely writes to the cloned skbs
+with shared heads.
+
+This change addresses the issue by uncloning as needed the
+to-be-segmented skbs.
+
+Reported-by: Ian Kumlien <ian.kumlien@gmail.com>
+Tested-by: Ian Kumlien <ian.kumlien@gmail.com>
+Fixes: 3a1296a38d0c ("net: Support GRO/GSO fraglist chaining.")
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/skbuff.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/net/core/skbuff.c b/net/core/skbuff.c
+index cea28d30abb55..1b6a1d99869dc 100644
+--- a/net/core/skbuff.c
++++ b/net/core/skbuff.c
+@@ -4270,6 +4270,11 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
+ 
+       skb_push(skb, -skb_network_offset(skb) + offset);
+ 
++      /* Ensure the head is writeable before touching the shared info */
++      err = skb_unclone(skb, GFP_ATOMIC);
++      if (err)
++              goto err_linearize;
++
+       skb_shinfo(skb)->frag_list = NULL;
+ 
+       while (list_skb) {
+-- 
+2.39.2
+
diff --git a/queue-6.4/net-sched-cls_fw-fix-improper-refcount-update-leads-.patch b/queue-6.4/net-sched-cls_fw-fix-improper-refcount-update-leads-.patch

new file mode 100644 (file)

index 0000000..8be0868
--- /dev/null
+++ b/queue-6.4/net-sched-cls_fw-fix-improper-refcount-update-leads-.patch
@@ -0,0 +1,62 @@
+From f6a83e813f95e57de0c82a8c14e364de598faf03 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 5 Jul 2023 12:15:30 -0400
+Subject: net/sched: cls_fw: Fix improper refcount update leads to
+ use-after-free
+
+From: M A Ramdhan <ramdhan@starlabs.sg>
+
+[ Upstream commit 0323bce598eea038714f941ce2b22541c46d488f ]
+
+In the event of a failure in tcf_change_indev(), fw_set_parms() will
+immediately return an error after incrementing or decrementing
+reference counter in tcf_bind_filter().  If attacker can control
+reference counter to zero and make reference freed, leading to
+use after free.
+
+In order to prevent this, move the point of possible failure above the
+point where the TC_FW_CLASSID is handled.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Reported-by: M A Ramdhan <ramdhan@starlabs.sg>
+Signed-off-by: M A Ramdhan <ramdhan@starlabs.sg>
+Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Reviewed-by: Pedro Tammela <pctammela@mojatatu.com>
+Message-ID: <20230705161530.52003-1-ramdhan@starlabs.sg>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/cls_fw.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
+index ae9439a6c56c9..8641f80593179 100644
+--- a/net/sched/cls_fw.c
++++ b/net/sched/cls_fw.c
+@@ -212,11 +212,6 @@ static int fw_set_parms(struct net *net, struct tcf_proto *tp,
+       if (err < 0)
+               return err;
+ 
+-      if (tb[TCA_FW_CLASSID]) {
+-              f->res.classid = nla_get_u32(tb[TCA_FW_CLASSID]);
+-              tcf_bind_filter(tp, &f->res, base);
+-      }
+-
+       if (tb[TCA_FW_INDEV]) {
+               int ret;
+               ret = tcf_change_indev(net, tb[TCA_FW_INDEV], extack);
+@@ -233,6 +228,11 @@ static int fw_set_parms(struct net *net, struct tcf_proto *tp,
+       } else if (head->mask != 0xFFFFFFFF)
+               return err;
+ 
++      if (tb[TCA_FW_CLASSID]) {
++              f->res.classid = nla_get_u32(tb[TCA_FW_CLASSID]);
++              tcf_bind_filter(tp, &f->res, base);
++      }
++
+       return 0;
+ }
+ 
+-- 
+2.39.2
+
diff --git a/queue-6.4/net-sched-flower-ensure-both-minimum-and-maximum-por.patch b/queue-6.4/net-sched-flower-ensure-both-minimum-and-maximum-por.patch

new file mode 100644 (file)

index 0000000..ddcc34c
--- /dev/null
+++ b/queue-6.4/net-sched-flower-ensure-both-minimum-and-maximum-por.patch
@@ -0,0 +1,82 @@
+From deadee03222c098c10faf927753a1bb2e00a2762 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 11 Jul 2023 10:08:09 +0300
+Subject: net/sched: flower: Ensure both minimum and maximum ports are
+ specified
+
+From: Ido Schimmel <idosch@nvidia.com>
+
+[ Upstream commit d3f87278bcb80bd7f9519669d928b43320363d4f ]
+
+The kernel does not currently validate that both the minimum and maximum
+ports of a port range are specified. This can lead user space to think
+that a filter matching on a port range was successfully added, when in
+fact it was not. For example, with a patched (buggy) iproute2 that only
+sends the minimum port, the following commands do not return an error:
+
+ # tc filter add dev swp1 ingress pref 1 proto ip flower ip_proto udp src_port 100-200 action pass
+
+ # tc filter add dev swp1 ingress pref 1 proto ip flower ip_proto udp dst_port 100-200 action pass
+
+ # tc filter show dev swp1 ingress
+ filter protocol ip pref 1 flower chain 0
+ filter protocol ip pref 1 flower chain 0 handle 0x1
+   eth_type ipv4
+   ip_proto udp
+   not_in_hw
+         action order 1: gact action pass
+          random type none pass val 0
+          index 1 ref 1 bind 1
+
+ filter protocol ip pref 1 flower chain 0 handle 0x2
+   eth_type ipv4
+   ip_proto udp
+   not_in_hw
+         action order 1: gact action pass
+          random type none pass val 0
+          index 2 ref 1 bind 1
+
+Fix by returning an error unless both ports are specified:
+
+ # tc filter add dev swp1 ingress pref 1 proto ip flower ip_proto udp src_port 100-200 action pass
+ Error: Both min and max source ports must be specified.
+ We have an error talking to the kernel
+
+ # tc filter add dev swp1 ingress pref 1 proto ip flower ip_proto udp dst_port 100-200 action pass
+ Error: Both min and max destination ports must be specified.
+ We have an error talking to the kernel
+
+Fixes: 5c72299fba9d ("net: sched: cls_flower: Classify packets using port ranges")
+Signed-off-by: Ido Schimmel <idosch@nvidia.com>
+Reviewed-by: Petr Machata <petrm@nvidia.com>
+Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/cls_flower.c | 10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
+index 815c3e416bc54..652158f612fc2 100644
+--- a/net/sched/cls_flower.c
++++ b/net/sched/cls_flower.c
+@@ -799,6 +799,16 @@ static int fl_set_key_port_range(struct nlattr **tb, struct fl_flow_key *key,
+                      TCA_FLOWER_KEY_PORT_SRC_MAX, &mask->tp_range.tp_max.src,
+                      TCA_FLOWER_UNSPEC, sizeof(key->tp_range.tp_max.src));
+ 
++      if (mask->tp_range.tp_min.dst != mask->tp_range.tp_max.dst) {
++              NL_SET_ERR_MSG(extack,
++                             "Both min and max destination ports must be specified");
++              return -EINVAL;
++      }
++      if (mask->tp_range.tp_min.src != mask->tp_range.tp_max.src) {
++              NL_SET_ERR_MSG(extack,
++                             "Both min and max source ports must be specified");
++              return -EINVAL;
++      }
+       if (mask->tp_range.tp_min.dst && mask->tp_range.tp_max.dst &&
+           ntohs(key->tp_range.tp_max.dst) <=
+           ntohs(key->tp_range.tp_min.dst)) {
+-- 
+2.39.2
+
diff --git a/queue-6.4/net-sched-make-psched_mtu-rtnl-less-safe.patch b/queue-6.4/net-sched-make-psched_mtu-rtnl-less-safe.patch

new file mode 100644 (file)

index 0000000..d93417c
--- /dev/null
+++ b/queue-6.4/net-sched-make-psched_mtu-rtnl-less-safe.patch
@@ -0,0 +1,49 @@
+From 10d76bd0d19f2b3e7cf085d812e9f2ba3263efee Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 10 Jul 2023 23:16:34 -0300
+Subject: net/sched: make psched_mtu() RTNL-less safe
+
+From: Pedro Tammela <pctammela@mojatatu.com>
+
+[ Upstream commit 150e33e62c1fa4af5aaab02776b6c3812711d478 ]
+
+Eric Dumazet says[1]:
+-------
+Speaking of psched_mtu(), I see that net/sched/sch_pie.c is using it
+without holding RTNL, so dev->mtu can be changed underneath.
+KCSAN could issue a warning.
+-------
+
+Annotate dev->mtu with READ_ONCE() so KCSAN don't issue a warning.
+
+[1] https://lore.kernel.org/all/CANn89iJoJO5VtaJ-2=_d2aOQhb0Xw8iBT_Cxqp2HyuS-zj6azw@mail.gmail.com/
+
+v1 -> v2: Fix commit message
+
+Fixes: d4b36210c2e6 ("net: pkt_sched: PIE AQM scheme")
+Suggested-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
+Reviewed-by: Simon Horman <simon.horman@corigine.com>
+Link: https://lore.kernel.org/r/20230711021634.561598-1-pctammela@mojatatu.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/pkt_sched.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
+index 7dba1c3a7b801..2465d1e79d10e 100644
+--- a/include/net/pkt_sched.h
++++ b/include/net/pkt_sched.h
+@@ -134,7 +134,7 @@ extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1];
+  */
+ static inline unsigned int psched_mtu(const struct net_device *dev)
+ {
+-      return dev->mtu + dev->hard_header_len;
++      return READ_ONCE(dev->mtu) + dev->hard_header_len;
+ }
+ 
+ static inline struct net *qdisc_net(struct Qdisc *q)
+-- 
+2.39.2
+
diff --git a/queue-6.4/net-sched-sch_qfq-account-for-stab-overhead-in-qfq_e.patch b/queue-6.4/net-sched-sch_qfq-account-for-stab-overhead-in-qfq_e.patch

new file mode 100644 (file)

index 0000000..a60164f
--- /dev/null
+++ b/queue-6.4/net-sched-sch_qfq-account-for-stab-overhead-in-qfq_e.patch
@@ -0,0 +1,96 @@
+From 6a853a023604a3d01a17866ce9b4d5a5355edc97 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 11 Jul 2023 18:01:02 -0300
+Subject: net/sched: sch_qfq: account for stab overhead in qfq_enqueue
+
+From: Pedro Tammela <pctammela@mojatatu.com>
+
+[ Upstream commit 3e337087c3b5805fe0b8a46ba622a962880b5d64 ]
+
+Lion says:
+-------
+In the QFQ scheduler a similar issue to CVE-2023-31436
+persists.
+
+Consider the following code in net/sched/sch_qfq.c:
+
+static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+                struct sk_buff **to_free)
+{
+     unsigned int len = qdisc_pkt_len(skb), gso_segs;
+
+    // ...
+
+     if (unlikely(cl->agg->lmax < len)) {
+         pr_debug("qfq: increasing maxpkt from %u to %u for class %u",
+              cl->agg->lmax, len, cl->common.classid);
+         err = qfq_change_agg(sch, cl, cl->agg->class_weight, len);
+         if (err) {
+             cl->qstats.drops++;
+             return qdisc_drop(skb, sch, to_free);
+         }
+
+    // ...
+
+     }
+
+Similarly to CVE-2023-31436, "lmax" is increased without any bounds
+checks according to the packet length "len". Usually this would not
+impose a problem because packet sizes are naturally limited.
+
+This is however not the actual packet length, rather the
+"qdisc_pkt_len(skb)" which might apply size transformations according to
+"struct qdisc_size_table" as created by "qdisc_get_stab()" in
+net/sched/sch_api.c if the TCA_STAB option was set when modifying the qdisc.
+
+A user may choose virtually any size using such a table.
+
+As a result the same issue as in CVE-2023-31436 can occur, allowing heap
+out-of-bounds read / writes in the kmalloc-8192 cache.
+-------
+
+We can create the issue with the following commands:
+
+tc qdisc add dev $DEV root handle 1: stab mtu 2048 tsize 512 mpu 0 \
+overhead 999999999 linklayer ethernet qfq
+tc class add dev $DEV parent 1: classid 1:1 htb rate 6mbit burst 15k
+tc filter add dev $DEV parent 1: matchall classid 1:1
+ping -I $DEV 1.1.1.2
+
+This is caused by incorrectly assuming that qdisc_pkt_len() returns a
+length within the QFQ_MIN_LMAX < len < QFQ_MAX_LMAX.
+
+Fixes: 462dbc9101ac ("pkt_sched: QFQ Plus: fair-queueing service at DRR cost")
+Reported-by: Lion <nnamrec@gmail.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
+Reviewed-by: Simon Horman <simon.horman@corigine.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/sch_qfq.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
+index 63a5b277c117f..befaf74b33caa 100644
+--- a/net/sched/sch_qfq.c
++++ b/net/sched/sch_qfq.c
+@@ -381,8 +381,13 @@ static int qfq_change_agg(struct Qdisc *sch, struct qfq_class *cl, u32 weight,
+                          u32 lmax)
+ {
+       struct qfq_sched *q = qdisc_priv(sch);
+-      struct qfq_aggregate *new_agg = qfq_find_agg(q, lmax, weight);
++      struct qfq_aggregate *new_agg;
+ 
++      /* 'lmax' can range from [QFQ_MIN_LMAX, pktlen + stab overhead] */
++      if (lmax > QFQ_MAX_LMAX)
++              return -EINVAL;
++
++      new_agg = qfq_find_agg(q, lmax, weight);
+       if (new_agg == NULL) { /* create new aggregate */
+               new_agg = kzalloc(sizeof(*new_agg), GFP_ATOMIC);
+               if (new_agg == NULL)
+-- 
+2.39.2
+
diff --git a/queue-6.4/net-sched-sch_qfq-reintroduce-lmax-bound-check-for-m.patch b/queue-6.4/net-sched-sch_qfq-reintroduce-lmax-bound-check-for-m.patch

new file mode 100644 (file)

index 0000000..57fad1f
--- /dev/null
+++ b/queue-6.4/net-sched-sch_qfq-reintroduce-lmax-bound-check-for-m.patch
@@ -0,0 +1,52 @@
+From 995657ed88cb063b7ecfad81292ee1ca3ae6f64e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 11 Jul 2023 18:01:00 -0300
+Subject: net/sched: sch_qfq: reintroduce lmax bound check for MTU
+
+From: Pedro Tammela <pctammela@mojatatu.com>
+
+[ Upstream commit 158810b261d02fc7dd92ca9c392d8f8a211a2401 ]
+
+25369891fcef deletes a check for the case where no 'lmax' is
+specified which 3037933448f6 previously fixed as 'lmax'
+could be set to the device's MTU without any bound checking
+for QFQ_LMAX_MIN and QFQ_LMAX_MAX. Therefore, reintroduce the check.
+
+Fixes: 25369891fcef ("net/sched: sch_qfq: refactor parsing of netlink parameters")
+Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
+Reviewed-by: Simon Horman <simon.horman@corigine.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/sch_qfq.c | 11 +++++++++--
+ 1 file changed, 9 insertions(+), 2 deletions(-)
+
+diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
+index dfd9a99e62570..63a5b277c117f 100644
+--- a/net/sched/sch_qfq.c
++++ b/net/sched/sch_qfq.c
+@@ -423,10 +423,17 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
+       else
+               weight = 1;
+ 
+-      if (tb[TCA_QFQ_LMAX])
++      if (tb[TCA_QFQ_LMAX]) {
+               lmax = nla_get_u32(tb[TCA_QFQ_LMAX]);
+-      else
++      } else {
++              /* MTU size is user controlled */
+               lmax = psched_mtu(qdisc_dev(sch));
++              if (lmax < QFQ_MIN_LMAX || lmax > QFQ_MAX_LMAX) {
++                      NL_SET_ERR_MSG_MOD(extack,
++                                         "MTU size out of bounds for qfq");
++                      return -EINVAL;
++              }
++      }
+ 
+       inv_w = ONE_FP / weight;
+       weight = ONE_FP / inv_w;
+-- 
+2.39.2
+
diff --git a/queue-6.4/net-sched-taprio-replace-tc_taprio_qopt_offload-enab.patch b/queue-6.4/net-sched-taprio-replace-tc_taprio_qopt_offload-enab.patch

new file mode 100644 (file)

index 0000000..5849dee
--- /dev/null
+++ b/queue-6.4/net-sched-taprio-replace-tc_taprio_qopt_offload-enab.patch
@@ -0,0 +1,390 @@
+From b56c299a48f29f6799850dcc1ba7c0497b531ed9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 30 May 2023 12:19:45 +0300
+Subject: net/sched: taprio: replace tc_taprio_qopt_offload :: enable with a
+ "cmd" enum
+
+From: Vladimir Oltean <vladimir.oltean@nxp.com>
+
+[ Upstream commit 2d800bc500fb3fb07a0fb42e2d0a1356fb9e1e8f ]
+
+Inspired from struct flow_cls_offload :: cmd, in order for taprio to be
+able to report statistics (which is future work), it seems that we need
+to drill one step further with the ndo_setup_tc(TC_SETUP_QDISC_TAPRIO)
+multiplexing, and pass the command as part of the common portion of the
+muxed structure.
+
+Since we already have an "enable" variable in tc_taprio_qopt_offload,
+refactor all drivers to check for "cmd" instead of "enable", and reject
+every other command except "replace" and "destroy" - to be future proof.
+
+Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
+Reviewed-by: Horatiu Vultur <horatiu.vultur@microchip.com> # for lan966x
+Acked-by: Kurt Kanzenbach <kurt@linutronix.de> # hellcreek
+Reviewed-by: Muhammad Husaini Zulkifli <muhammad.husaini.zulkifli@intel.com>
+Reviewed-by: Gerhard Engleder <gerhard@engleder-embedded.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: 8046063df887 ("igc: Rename qbv_enable to taprio_offload_enable")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/dsa/hirschmann/hellcreek.c             | 14 +++++++++-----
+ drivers/net/dsa/ocelot/felix_vsc9959.c             |  4 +++-
+ drivers/net/dsa/sja1105/sja1105_tas.c              |  7 +++++--
+ drivers/net/ethernet/engleder/tsnep_selftests.c    | 12 ++++++------
+ drivers/net/ethernet/engleder/tsnep_tc.c           |  4 +++-
+ drivers/net/ethernet/freescale/enetc/enetc_qos.c   |  6 +++++-
+ drivers/net/ethernet/intel/igc/igc_main.c          | 13 +++++++++++--
+ .../net/ethernet/microchip/lan966x/lan966x_tc.c    | 10 ++++++++--
+ drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c    |  7 +++++--
+ drivers/net/ethernet/ti/am65-cpsw-qos.c            | 11 ++++++++---
+ include/net/pkt_sched.h                            |  7 ++++++-
+ net/sched/sch_taprio.c                             |  4 ++--
+ 12 files changed, 71 insertions(+), 28 deletions(-)
+
+diff --git a/drivers/net/dsa/hirschmann/hellcreek.c b/drivers/net/dsa/hirschmann/hellcreek.c
+index 595a548bb0a80..af50001ccdd4e 100644
+--- a/drivers/net/dsa/hirschmann/hellcreek.c
++++ b/drivers/net/dsa/hirschmann/hellcreek.c
+@@ -1885,13 +1885,17 @@ static int hellcreek_port_setup_tc(struct dsa_switch *ds, int port,
+       case TC_SETUP_QDISC_TAPRIO: {
+               struct tc_taprio_qopt_offload *taprio = type_data;
+ 
+-              if (!hellcreek_validate_schedule(hellcreek, taprio))
+-                      return -EOPNOTSUPP;
++              switch (taprio->cmd) {
++              case TAPRIO_CMD_REPLACE:
++                      if (!hellcreek_validate_schedule(hellcreek, taprio))
++                              return -EOPNOTSUPP;
+ 
+-              if (taprio->enable)
+                       return hellcreek_port_set_schedule(ds, port, taprio);
+-
+-              return hellcreek_port_del_schedule(ds, port);
++              case TAPRIO_CMD_DESTROY:
++                      return hellcreek_port_del_schedule(ds, port);
++              default:
++                      return -EOPNOTSUPP;
++              }
+       }
+       default:
+               return -EOPNOTSUPP;
+diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c
+index bd11f9fb95e54..772f8b817390b 100644
+--- a/drivers/net/dsa/ocelot/felix_vsc9959.c
++++ b/drivers/net/dsa/ocelot/felix_vsc9959.c
+@@ -1436,7 +1436,7 @@ static int vsc9959_qos_port_tas_set(struct ocelot *ocelot, int port,
+ 
+       mutex_lock(&ocelot->tas_lock);
+ 
+-      if (!taprio->enable) {
++      if (taprio->cmd == TAPRIO_CMD_DESTROY) {
+               ocelot_port_mqprio(ocelot, port, &taprio->mqprio);
+               ocelot_rmw_rix(ocelot, 0, QSYS_TAG_CONFIG_ENABLE,
+                              QSYS_TAG_CONFIG, port);
+@@ -1448,6 +1448,8 @@ static int vsc9959_qos_port_tas_set(struct ocelot *ocelot, int port,
+ 
+               mutex_unlock(&ocelot->tas_lock);
+               return 0;
++      } else if (taprio->cmd != TAPRIO_CMD_REPLACE) {
++              return -EOPNOTSUPP;
+       }
+ 
+       ret = ocelot_port_mqprio(ocelot, port, &taprio->mqprio);
+diff --git a/drivers/net/dsa/sja1105/sja1105_tas.c b/drivers/net/dsa/sja1105/sja1105_tas.c
+index e6153848a9509..d7818710bc028 100644
+--- a/drivers/net/dsa/sja1105/sja1105_tas.c
++++ b/drivers/net/dsa/sja1105/sja1105_tas.c
+@@ -516,10 +516,11 @@ int sja1105_setup_tc_taprio(struct dsa_switch *ds, int port,
+       /* Can't change an already configured port (must delete qdisc first).
+        * Can't delete the qdisc from an unconfigured port.
+        */
+-      if (!!tas_data->offload[port] == admin->enable)
++      if ((!!tas_data->offload[port] && admin->cmd == TAPRIO_CMD_REPLACE) ||
++          (!tas_data->offload[port] && admin->cmd == TAPRIO_CMD_DESTROY))
+               return -EINVAL;
+ 
+-      if (!admin->enable) {
++      if (admin->cmd == TAPRIO_CMD_DESTROY) {
+               taprio_offload_free(tas_data->offload[port]);
+               tas_data->offload[port] = NULL;
+ 
+@@ -528,6 +529,8 @@ int sja1105_setup_tc_taprio(struct dsa_switch *ds, int port,
+                       return rc;
+ 
+               return sja1105_static_config_reload(priv, SJA1105_SCHEDULING);
++      } else if (admin->cmd != TAPRIO_CMD_REPLACE) {
++              return -EOPNOTSUPP;
+       }
+ 
+       /* The cycle time extension is the amount of time the last cycle from
+diff --git a/drivers/net/ethernet/engleder/tsnep_selftests.c b/drivers/net/ethernet/engleder/tsnep_selftests.c
+index 1581d6b222320..8a9145f93147c 100644
+--- a/drivers/net/ethernet/engleder/tsnep_selftests.c
++++ b/drivers/net/ethernet/engleder/tsnep_selftests.c
+@@ -329,7 +329,7 @@ static bool disable_taprio(struct tsnep_adapter *adapter)
+       int retval;
+ 
+       memset(&qopt, 0, sizeof(qopt));
+-      qopt.enable = 0;
++      qopt.cmd = TAPRIO_CMD_DESTROY;
+       retval = tsnep_tc_setup(adapter->netdev, TC_SETUP_QDISC_TAPRIO, &qopt);
+       if (retval)
+               return false;
+@@ -360,7 +360,7 @@ static bool tsnep_test_taprio(struct tsnep_adapter *adapter)
+       for (i = 0; i < 255; i++)
+               qopt->entries[i].command = TC_TAPRIO_CMD_SET_GATES;
+ 
+-      qopt->enable = 1;
++      qopt->cmd = TAPRIO_CMD_REPLACE;
+       qopt->base_time = ktime_set(0, 0);
+       qopt->cycle_time = 1500000;
+       qopt->cycle_time_extension = 0;
+@@ -382,7 +382,7 @@ static bool tsnep_test_taprio(struct tsnep_adapter *adapter)
+       if (!run_taprio(adapter, qopt, 100))
+               goto failed;
+ 
+-      qopt->enable = 1;
++      qopt->cmd = TAPRIO_CMD_REPLACE;
+       qopt->base_time = ktime_set(0, 0);
+       qopt->cycle_time = 411854;
+       qopt->cycle_time_extension = 0;
+@@ -406,7 +406,7 @@ static bool tsnep_test_taprio(struct tsnep_adapter *adapter)
+       if (!run_taprio(adapter, qopt, 100))
+               goto failed;
+ 
+-      qopt->enable = 1;
++      qopt->cmd = TAPRIO_CMD_REPLACE;
+       qopt->base_time = ktime_set(0, 0);
+       delay_base_time(adapter, qopt, 12);
+       qopt->cycle_time = 125000;
+@@ -457,7 +457,7 @@ static bool tsnep_test_taprio_change(struct tsnep_adapter *adapter)
+       for (i = 0; i < 255; i++)
+               qopt->entries[i].command = TC_TAPRIO_CMD_SET_GATES;
+ 
+-      qopt->enable = 1;
++      qopt->cmd = TAPRIO_CMD_REPLACE;
+       qopt->base_time = ktime_set(0, 0);
+       qopt->cycle_time = 100000;
+       qopt->cycle_time_extension = 0;
+@@ -610,7 +610,7 @@ static bool tsnep_test_taprio_extension(struct tsnep_adapter *adapter)
+       for (i = 0; i < 255; i++)
+               qopt->entries[i].command = TC_TAPRIO_CMD_SET_GATES;
+ 
+-      qopt->enable = 1;
++      qopt->cmd = TAPRIO_CMD_REPLACE;
+       qopt->base_time = ktime_set(0, 0);
+       qopt->cycle_time = 100000;
+       qopt->cycle_time_extension = 50000;
+diff --git a/drivers/net/ethernet/engleder/tsnep_tc.c b/drivers/net/ethernet/engleder/tsnep_tc.c
+index d083e6684f120..745b191a55402 100644
+--- a/drivers/net/ethernet/engleder/tsnep_tc.c
++++ b/drivers/net/ethernet/engleder/tsnep_tc.c
+@@ -325,7 +325,7 @@ static int tsnep_taprio(struct tsnep_adapter *adapter,
+       if (!adapter->gate_control)
+               return -EOPNOTSUPP;
+ 
+-      if (!qopt->enable) {
++      if (qopt->cmd == TAPRIO_CMD_DESTROY) {
+               /* disable gate control if active */
+               mutex_lock(&adapter->gate_control_lock);
+ 
+@@ -337,6 +337,8 @@ static int tsnep_taprio(struct tsnep_adapter *adapter,
+               mutex_unlock(&adapter->gate_control_lock);
+ 
+               return 0;
++      } else if (qopt->cmd != TAPRIO_CMD_REPLACE) {
++              return -EOPNOTSUPP;
+       }
+ 
+       retval = tsnep_validate_gcl(qopt);
+diff --git a/drivers/net/ethernet/freescale/enetc/enetc_qos.c b/drivers/net/ethernet/freescale/enetc/enetc_qos.c
+index 126007ab70f61..dfec50106106f 100644
+--- a/drivers/net/ethernet/freescale/enetc/enetc_qos.c
++++ b/drivers/net/ethernet/freescale/enetc/enetc_qos.c
+@@ -65,7 +65,7 @@ static int enetc_setup_taprio(struct net_device *ndev,
+       gcl_len = admin_conf->num_entries;
+ 
+       tge = enetc_rd(hw, ENETC_PTGCR);
+-      if (!admin_conf->enable) {
++      if (admin_conf->cmd == TAPRIO_CMD_DESTROY) {
+               enetc_wr(hw, ENETC_PTGCR, tge & ~ENETC_PTGCR_TGE);
+               enetc_reset_ptcmsdur(hw);
+ 
+@@ -138,6 +138,10 @@ int enetc_setup_tc_taprio(struct net_device *ndev, void *type_data)
+       struct enetc_ndev_priv *priv = netdev_priv(ndev);
+       int err, i;
+ 
++      if (taprio->cmd != TAPRIO_CMD_REPLACE &&
++          taprio->cmd != TAPRIO_CMD_DESTROY)
++              return -EOPNOTSUPP;
++
+       /* TSD and Qbv are mutually exclusive in hardware */
+       for (i = 0; i < priv->num_tx_rings; i++)
+               if (priv->tx_ring[i]->tsd_enable)
+diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
+index e7bd2c60ee383..ae986e44a4718 100644
+--- a/drivers/net/ethernet/intel/igc/igc_main.c
++++ b/drivers/net/ethernet/intel/igc/igc_main.c
+@@ -6117,9 +6117,18 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter,
+       size_t n;
+       int i;
+ 
+-      adapter->qbv_enable = qopt->enable;
++      switch (qopt->cmd) {
++      case TAPRIO_CMD_REPLACE:
++              adapter->qbv_enable = true;
++              break;
++      case TAPRIO_CMD_DESTROY:
++              adapter->qbv_enable = false;
++              break;
++      default:
++              return -EOPNOTSUPP;
++      }
+ 
+-      if (!qopt->enable)
++      if (!adapter->qbv_enable)
+               return igc_tsn_clear_schedule(adapter);
+ 
+       if (qopt->base_time < 0)
+diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_tc.c b/drivers/net/ethernet/microchip/lan966x/lan966x_tc.c
+index cf0cc7562d042..ee652f2d23595 100644
+--- a/drivers/net/ethernet/microchip/lan966x/lan966x_tc.c
++++ b/drivers/net/ethernet/microchip/lan966x/lan966x_tc.c
+@@ -21,8 +21,14 @@ static int lan966x_tc_setup_qdisc_mqprio(struct lan966x_port *port,
+ static int lan966x_tc_setup_qdisc_taprio(struct lan966x_port *port,
+                                        struct tc_taprio_qopt_offload *taprio)
+ {
+-      return taprio->enable ? lan966x_taprio_add(port, taprio) :
+-                              lan966x_taprio_del(port);
++      switch (taprio->cmd) {
++      case TAPRIO_CMD_REPLACE:
++              return lan966x_taprio_add(port, taprio);
++      case TAPRIO_CMD_DESTROY:
++              return lan966x_taprio_del(port);
++      default:
++              return -EOPNOTSUPP;
++      }
+ }
+ 
+ static int lan966x_tc_setup_qdisc_tbf(struct lan966x_port *port,
+diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
+index 9d55226479b4a..ac41ef4cbd2f0 100644
+--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
+@@ -966,8 +966,11 @@ static int tc_setup_taprio(struct stmmac_priv *priv,
+               return -EOPNOTSUPP;
+       }
+ 
+-      if (!qopt->enable)
++      if (qopt->cmd == TAPRIO_CMD_DESTROY)
+               goto disable;
++      else if (qopt->cmd != TAPRIO_CMD_REPLACE)
++              return -EOPNOTSUPP;
++
+       if (qopt->num_entries >= dep)
+               return -EINVAL;
+       if (!qopt->cycle_time)
+@@ -988,7 +991,7 @@ static int tc_setup_taprio(struct stmmac_priv *priv,
+ 
+       mutex_lock(&priv->plat->est->lock);
+       priv->plat->est->gcl_size = size;
+-      priv->plat->est->enable = qopt->enable;
++      priv->plat->est->enable = qopt->cmd == TAPRIO_CMD_REPLACE;
+       mutex_unlock(&priv->plat->est->lock);
+ 
+       for (i = 0; i < size; i++) {
+diff --git a/drivers/net/ethernet/ti/am65-cpsw-qos.c b/drivers/net/ethernet/ti/am65-cpsw-qos.c
+index 3a908db6e5b22..eced87fa261c9 100644
+--- a/drivers/net/ethernet/ti/am65-cpsw-qos.c
++++ b/drivers/net/ethernet/ti/am65-cpsw-qos.c
+@@ -450,7 +450,7 @@ static int am65_cpsw_configure_taprio(struct net_device *ndev,
+ 
+       am65_cpsw_est_update_state(ndev);
+ 
+-      if (!est_new->taprio.enable) {
++      if (est_new->taprio.cmd == TAPRIO_CMD_DESTROY) {
+               am65_cpsw_stop_est(ndev);
+               return ret;
+       }
+@@ -476,7 +476,7 @@ static int am65_cpsw_configure_taprio(struct net_device *ndev,
+       am65_cpsw_est_set_sched_list(ndev, est_new);
+       am65_cpsw_port_est_assign_buf_num(ndev, est_new->buf);
+ 
+-      am65_cpsw_est_set(ndev, est_new->taprio.enable);
++      am65_cpsw_est_set(ndev, est_new->taprio.cmd == TAPRIO_CMD_REPLACE);
+ 
+       if (tact == TACT_PROG) {
+               ret = am65_cpsw_timer_set(ndev, est_new);
+@@ -520,7 +520,7 @@ static int am65_cpsw_set_taprio(struct net_device *ndev, void *type_data)
+       am65_cpsw_cp_taprio(taprio, &est_new->taprio);
+       ret = am65_cpsw_configure_taprio(ndev, est_new);
+       if (!ret) {
+-              if (taprio->enable) {
++              if (taprio->cmd == TAPRIO_CMD_REPLACE) {
+                       devm_kfree(&ndev->dev, port->qos.est_admin);
+ 
+                       port->qos.est_admin = est_new;
+@@ -564,8 +564,13 @@ static void am65_cpsw_est_link_up(struct net_device *ndev, int link_speed)
+ static int am65_cpsw_setup_taprio(struct net_device *ndev, void *type_data)
+ {
+       struct am65_cpsw_port *port = am65_ndev_to_port(ndev);
++      struct tc_taprio_qopt_offload *taprio = type_data;
+       struct am65_cpsw_common *common = port->common;
+ 
++      if (taprio->cmd != TAPRIO_CMD_REPLACE &&
++          taprio->cmd != TAPRIO_CMD_DESTROY)
++              return -EOPNOTSUPP;
++
+       if (!IS_ENABLED(CONFIG_TI_AM65_CPSW_TAS))
+               return -ENODEV;
+ 
+diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
+index 5722931d83d43..7dba1c3a7b801 100644
+--- a/include/net/pkt_sched.h
++++ b/include/net/pkt_sched.h
+@@ -187,6 +187,11 @@ struct tc_taprio_caps {
+       bool broken_mqprio:1;
+ };
+ 
++enum tc_taprio_qopt_cmd {
++      TAPRIO_CMD_REPLACE,
++      TAPRIO_CMD_DESTROY,
++};
++
+ struct tc_taprio_sched_entry {
+       u8 command; /* TC_TAPRIO_CMD_* */
+ 
+@@ -198,7 +203,7 @@ struct tc_taprio_sched_entry {
+ struct tc_taprio_qopt_offload {
+       struct tc_mqprio_qopt_offload mqprio;
+       struct netlink_ext_ack *extack;
+-      u8 enable;
++      enum tc_taprio_qopt_cmd cmd;
+       ktime_t base_time;
+       u64 cycle_time;
+       u64 cycle_time_extension;
+diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
+index cf0e61ed92253..4caf80ddc6721 100644
+--- a/net/sched/sch_taprio.c
++++ b/net/sched/sch_taprio.c
+@@ -1527,7 +1527,7 @@ static int taprio_enable_offload(struct net_device *dev,
+                              "Not enough memory for enabling offload mode");
+               return -ENOMEM;
+       }
+-      offload->enable = 1;
++      offload->cmd = TAPRIO_CMD_REPLACE;
+       offload->extack = extack;
+       mqprio_qopt_reconstruct(dev, &offload->mqprio.qopt);
+       offload->mqprio.extack = extack;
+@@ -1575,7 +1575,7 @@ static int taprio_disable_offload(struct net_device *dev,
+                              "Not enough memory to disable offload mode");
+               return -ENOMEM;
+       }
+-      offload->enable = 0;
++      offload->cmd = TAPRIO_CMD_DESTROY;
+ 
+       err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TAPRIO, offload);
+       if (err < 0) {
+-- 
+2.39.2
+
diff --git a/queue-6.4/net-txgbe-fix-eeprom-calculation-error.patch b/queue-6.4/net-txgbe-fix-eeprom-calculation-error.patch

new file mode 100644 (file)

index 0000000..c4f34d0
--- /dev/null
+++ b/queue-6.4/net-txgbe-fix-eeprom-calculation-error.patch
@@ -0,0 +1,40 @@
+From a1a847163ec939ce74bf72f6a3e83a252842afac Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 11 Jul 2023 14:34:14 +0800
+Subject: net: txgbe: fix eeprom calculation error
+
+From: Jiawen Wu <jiawenwu@trustnetic.com>
+
+[ Upstream commit aa846677a9fb19a0f2c58154c140398aa92a87ba ]
+
+For some device types like TXGBE_ID_XAUI, *checksum computed in
+txgbe_calc_eeprom_checksum() is larger than TXGBE_EEPROM_SUM. Remove the
+limit on the size of *checksum.
+
+Fixes: 049fe5365324 ("net: txgbe: Add operations to interact with firmware")
+Fixes: 5e2ea7801fac ("net: txgbe: Fix unsigned comparison to zero in txgbe_calc_eeprom_checksum()")
+Signed-off-by: Jiawen Wu <jiawenwu@trustnetic.com>
+Link: https://lore.kernel.org/r/20230711063414.3311-1-jiawenwu@trustnetic.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c | 3 ---
+ 1 file changed, 3 deletions(-)
+
+diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c
+index ebc46f3be0569..fc37af2e71ffc 100644
+--- a/drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c
++++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c
+@@ -196,9 +196,6 @@ static int txgbe_calc_eeprom_checksum(struct wx *wx, u16 *checksum)
+       if (eeprom_ptrs)
+               kvfree(eeprom_ptrs);
+ 
+-      if (*checksum > TXGBE_EEPROM_SUM)
+-              return -EINVAL;
+-
+       *checksum = TXGBE_EEPROM_SUM - *checksum;
+ 
+       return 0;
+-- 
+2.39.2
+
diff --git a/queue-6.4/netdevsim-fix-uninitialized-data-in-nsim_dev_trap_fa.patch b/queue-6.4/netdevsim-fix-uninitialized-data-in-nsim_dev_trap_fa.patch

new file mode 100644 (file)

index 0000000..70c14f4
--- /dev/null
+++ b/queue-6.4/netdevsim-fix-uninitialized-data-in-nsim_dev_trap_fa.patch
@@ -0,0 +1,55 @@
+From 9904cc769d6876b45e8f23b7ac006d00e56bb023 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 11 Jul 2023 11:52:26 +0300
+Subject: netdevsim: fix uninitialized data in nsim_dev_trap_fa_cookie_write()
+
+From: Dan Carpenter <dan.carpenter@linaro.org>
+
+[ Upstream commit f72207a5c0dbaaf6921cf9a6c0d2fd0bc249ea78 ]
+
+The simple_write_to_buffer() function is designed to handle partial
+writes.  It returns negatives on error, otherwise it returns the number
+of bytes that were able to be copied.  This code doesn't check the
+return properly.  We only know that the first byte is written, the rest
+of the buffer might be uninitialized.
+
+There is no need to use the simple_write_to_buffer() function.
+Partial writes are prohibited by the "if (*ppos != 0)" check at the
+start of the function.  Just use memdup_user() and copy the whole
+buffer.
+
+Fixes: d3cbb907ae57 ("netdevsim: add ACL trap reporting cookie as a metadata")
+Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
+Reviewed-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
+Reviewed-by: Ido Schimmel <idosch@nvidia.com>
+Link: https://lore.kernel.org/r/7c1f950b-3a7d-4252-82a6-876e53078ef7@moroto.mountain
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/netdevsim/dev.c | 9 +++------
+ 1 file changed, 3 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c
+index 6045bece2654d..b4d3b9cde8bd6 100644
+--- a/drivers/net/netdevsim/dev.c
++++ b/drivers/net/netdevsim/dev.c
+@@ -184,13 +184,10 @@ static ssize_t nsim_dev_trap_fa_cookie_write(struct file *file,
+       cookie_len = (count - 1) / 2;
+       if ((count - 1) % 2)
+               return -EINVAL;
+-      buf = kmalloc(count, GFP_KERNEL | __GFP_NOWARN);
+-      if (!buf)
+-              return -ENOMEM;
+ 
+-      ret = simple_write_to_buffer(buf, count, ppos, data, count);
+-      if (ret < 0)
+-              goto free_buf;
++      buf = memdup_user(data, count);
++      if (IS_ERR(buf))
++              return PTR_ERR(buf);
+ 
+       fa_cookie = kmalloc(sizeof(*fa_cookie) + cookie_len,
+                           GFP_KERNEL | __GFP_NOWARN);
+-- 
+2.39.2
+
diff --git a/queue-6.4/netfilter-conntrack-don-t-fold-port-numbers-into-add.patch b/queue-6.4/netfilter-conntrack-don-t-fold-port-numbers-into-add.patch

new file mode 100644 (file)

index 0000000..263f24f
--- /dev/null
+++ b/queue-6.4/netfilter-conntrack-don-t-fold-port-numbers-into-add.patch
@@ -0,0 +1,94 @@
+From a0f10b653153e7204eca71f671829ee8f377403a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 4 Jul 2023 12:25:23 +0200
+Subject: netfilter: conntrack: don't fold port numbers into addresses before
+ hashing
+
+From: Florian Westphal <fw@strlen.de>
+
+[ Upstream commit eaf9e7192ec9af2fbf1b6eb2299dd0feca6c5f7e ]
+
+Originally this used jhash2() over tuple and folded the zone id,
+the pernet hash value, destination port and l4 protocol number into the
+32bit seed value.
+
+When the switch to siphash was done, I used an on-stack temporary
+buffer to build a suitable key to be hashed via siphash().
+
+But this showed up as performance regression, so I got rid of
+the temporary copy and collected to-be-hashed data in 4 u64 variables.
+
+This makes it easy to build tuples that produce the same hash, which isn't
+desirable even though chain lengths are limited.
+
+Switch back to plain siphash, but just like with jhash2(), take advantage
+of the fact that most of to-be-hashed data is already in a suitable order.
+
+Use an empty struct as annotation in 'struct nf_conntrack_tuple' to mark
+last member that can be used as hash input.
+
+The only remaining data that isn't present in the tuple structure are the
+zone identifier and the pernet hash: fold those into the key.
+
+Fixes: d2c806abcf0b ("netfilter: conntrack: use siphash_4u64")
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/netfilter/nf_conntrack_tuple.h |  3 +++
+ net/netfilter/nf_conntrack_core.c          | 20 +++++++-------------
+ 2 files changed, 10 insertions(+), 13 deletions(-)
+
+diff --git a/include/net/netfilter/nf_conntrack_tuple.h b/include/net/netfilter/nf_conntrack_tuple.h
+index 9334371c94e2b..f7dd950ff2509 100644
+--- a/include/net/netfilter/nf_conntrack_tuple.h
++++ b/include/net/netfilter/nf_conntrack_tuple.h
+@@ -67,6 +67,9 @@ struct nf_conntrack_tuple {
+               /* The protocol. */
+               u_int8_t protonum;
+ 
++              /* The direction must be ignored for the tuplehash */
++              struct { } __nfct_hash_offsetend;
++
+               /* The direction (for tuplehash) */
+               u_int8_t dir;
+       } dst;
+diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
+index d119f1d4c2fc8..992393102d5f5 100644
+--- a/net/netfilter/nf_conntrack_core.c
++++ b/net/netfilter/nf_conntrack_core.c
+@@ -211,24 +211,18 @@ static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple,
+                             unsigned int zoneid,
+                             const struct net *net)
+ {
+-      u64 a, b, c, d;
++      siphash_key_t key;
+ 
+       get_random_once(&nf_conntrack_hash_rnd, sizeof(nf_conntrack_hash_rnd));
+ 
+-      /* The direction must be ignored, handle usable tuplehash members manually */
+-      a = (u64)tuple->src.u3.all[0] << 32 | tuple->src.u3.all[3];
+-      b = (u64)tuple->dst.u3.all[0] << 32 | tuple->dst.u3.all[3];
++      key = nf_conntrack_hash_rnd;
+ 
+-      c = (__force u64)tuple->src.u.all << 32 | (__force u64)tuple->dst.u.all << 16;
+-      c |= tuple->dst.protonum;
++      key.key[0] ^= zoneid;
++      key.key[1] ^= net_hash_mix(net);
+ 
+-      d = (u64)zoneid << 32 | net_hash_mix(net);
+-
+-      /* IPv4: u3.all[1,2,3] == 0 */
+-      c ^= (u64)tuple->src.u3.all[1] << 32 | tuple->src.u3.all[2];
+-      d += (u64)tuple->dst.u3.all[1] << 32 | tuple->dst.u3.all[2];
+-
+-      return (u32)siphash_4u64(a, b, c, d, &nf_conntrack_hash_rnd);
++      return siphash((void *)tuple,
++                      offsetofend(struct nf_conntrack_tuple, dst.__nfct_hash_offsetend),
++                      &key);
+ }
+ 
+ static u32 scale_hash(u32 hash)
+-- 
+2.39.2
+
diff --git a/queue-6.4/netfilter-nf_tables-report-use-refcount-overflow.patch b/queue-6.4/netfilter-nf_tables-report-use-refcount-overflow.patch

new file mode 100644 (file)

index 0000000..94bf990
--- /dev/null
+++ b/queue-6.4/netfilter-nf_tables-report-use-refcount-overflow.patch
@@ -0,0 +1,752 @@
+From 6020f819a7c3532db9f038c037830fd7e15d9058 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 28 Jun 2023 16:24:27 +0200
+Subject: netfilter: nf_tables: report use refcount overflow
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+[ Upstream commit 1689f25924ada8fe14a4a82c38925d04994c7142 ]
+
+Overflow use refcount checks are not complete.
+
+Add helper function to deal with object reference counter tracking.
+Report -EMFILE in case UINT_MAX is reached.
+
+nft_use_dec() splats in case that reference counter underflows,
+which should not ever happen.
+
+Add nft_use_inc_restore() and nft_use_dec_restore() which are used
+to restore reference counter from error and abort paths.
+
+Use u32 in nft_flowtable and nft_object since helper functions cannot
+work on bitfields.
+
+Remove the few early incomplete checks now that the helper functions
+are in place and used to check for refcount overflow.
+
+Fixes: 96518518cc41 ("netfilter: add nftables")
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/netfilter/nf_tables.h |  31 +++++-
+ net/netfilter/nf_tables_api.c     | 163 ++++++++++++++++++------------
+ net/netfilter/nft_flow_offload.c  |   6 +-
+ net/netfilter/nft_immediate.c     |   8 +-
+ net/netfilter/nft_objref.c        |   8 +-
+ 5 files changed, 141 insertions(+), 75 deletions(-)
+
+diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
+index ee47d7143d99f..1b0beb8f08aee 100644
+--- a/include/net/netfilter/nf_tables.h
++++ b/include/net/netfilter/nf_tables.h
+@@ -1211,6 +1211,29 @@ int __nft_release_basechain(struct nft_ctx *ctx);
+ 
+ unsigned int nft_do_chain(struct nft_pktinfo *pkt, void *priv);
+ 
++static inline bool nft_use_inc(u32 *use)
++{
++      if (*use == UINT_MAX)
++              return false;
++
++      (*use)++;
++
++      return true;
++}
++
++static inline void nft_use_dec(u32 *use)
++{
++      WARN_ON_ONCE((*use)-- == 0);
++}
++
++/* For error and abort path: restore use counter to previous state. */
++static inline void nft_use_inc_restore(u32 *use)
++{
++      WARN_ON_ONCE(!nft_use_inc(use));
++}
++
++#define nft_use_dec_restore   nft_use_dec
++
+ /**
+  *    struct nft_table - nf_tables table
+  *
+@@ -1296,8 +1319,8 @@ struct nft_object {
+       struct list_head                list;
+       struct rhlist_head              rhlhead;
+       struct nft_object_hash_key      key;
+-      u32                             genmask:2,
+-                                      use:30;
++      u32                             genmask:2;
++      u32                             use;
+       u64                             handle;
+       u16                             udlen;
+       u8                              *udata;
+@@ -1399,8 +1422,8 @@ struct nft_flowtable {
+       char                            *name;
+       int                             hooknum;
+       int                             ops_len;
+-      u32                             genmask:2,
+-                                      use:30;
++      u32                             genmask:2;
++      u32                             use;
+       u64                             handle;
+       /* runtime data below here */
+       struct list_head                hook_list ____cacheline_aligned;
+diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
+index 79719e8cda799..18546f9b2a63a 100644
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -253,8 +253,10 @@ int nf_tables_bind_chain(const struct nft_ctx *ctx, struct nft_chain *chain)
+       if (chain->bound)
+               return -EBUSY;
+ 
++      if (!nft_use_inc(&chain->use))
++              return -EMFILE;
++
+       chain->bound = true;
+-      chain->use++;
+       nft_chain_trans_bind(ctx, chain);
+ 
+       return 0;
+@@ -437,7 +439,7 @@ static int nft_delchain(struct nft_ctx *ctx)
+       if (IS_ERR(trans))
+               return PTR_ERR(trans);
+ 
+-      ctx->table->use--;
++      nft_use_dec(&ctx->table->use);
+       nft_deactivate_next(ctx->net, ctx->chain);
+ 
+       return 0;
+@@ -476,7 +478,7 @@ nf_tables_delrule_deactivate(struct nft_ctx *ctx, struct nft_rule *rule)
+       /* You cannot delete the same rule twice */
+       if (nft_is_active_next(ctx->net, rule)) {
+               nft_deactivate_next(ctx->net, rule);
+-              ctx->chain->use--;
++              nft_use_dec(&ctx->chain->use);
+               return 0;
+       }
+       return -ENOENT;
+@@ -643,7 +645,7 @@ static int nft_delset(const struct nft_ctx *ctx, struct nft_set *set)
+               nft_map_deactivate(ctx, set);
+ 
+       nft_deactivate_next(ctx->net, set);
+-      ctx->table->use--;
++      nft_use_dec(&ctx->table->use);
+ 
+       return err;
+ }
+@@ -675,7 +677,7 @@ static int nft_delobj(struct nft_ctx *ctx, struct nft_object *obj)
+               return err;
+ 
+       nft_deactivate_next(ctx->net, obj);
+-      ctx->table->use--;
++      nft_use_dec(&ctx->table->use);
+ 
+       return err;
+ }
+@@ -710,7 +712,7 @@ static int nft_delflowtable(struct nft_ctx *ctx,
+               return err;
+ 
+       nft_deactivate_next(ctx->net, flowtable);
+-      ctx->table->use--;
++      nft_use_dec(&ctx->table->use);
+ 
+       return err;
+ }
+@@ -2395,9 +2397,6 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
+       struct nft_chain *chain;
+       int err;
+ 
+-      if (table->use == UINT_MAX)
+-              return -EOVERFLOW;
+-
+       if (nla[NFTA_CHAIN_HOOK]) {
+               struct nft_stats __percpu *stats = NULL;
+               struct nft_chain_hook hook = {};
+@@ -2493,6 +2492,11 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
+       if (err < 0)
+               goto err_destroy_chain;
+ 
++      if (!nft_use_inc(&table->use)) {
++              err = -EMFILE;
++              goto err_use;
++      }
++
+       trans = nft_trans_chain_add(ctx, NFT_MSG_NEWCHAIN);
+       if (IS_ERR(trans)) {
+               err = PTR_ERR(trans);
+@@ -2509,10 +2513,11 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
+               goto err_unregister_hook;
+       }
+ 
+-      table->use++;
+-
+       return 0;
++
+ err_unregister_hook:
++      nft_use_dec_restore(&table->use);
++err_use:
+       nf_tables_unregister_hook(net, table, chain);
+ err_destroy_chain:
+       nf_tables_chain_destroy(ctx);
+@@ -3841,9 +3846,6 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
+                       return -EINVAL;
+               handle = nf_tables_alloc_handle(table);
+ 
+-              if (chain->use == UINT_MAX)
+-                      return -EOVERFLOW;
+-
+               if (nla[NFTA_RULE_POSITION]) {
+                       pos_handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_POSITION]));
+                       old_rule = __nft_rule_lookup(chain, pos_handle);
+@@ -3937,6 +3939,11 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
+               }
+       }
+ 
++      if (!nft_use_inc(&chain->use)) {
++              err = -EMFILE;
++              goto err_release_rule;
++      }
++
+       if (info->nlh->nlmsg_flags & NLM_F_REPLACE) {
+               err = nft_delrule(&ctx, old_rule);
+               if (err < 0)
+@@ -3968,7 +3975,6 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
+               }
+       }
+       kvfree(expr_info);
+-      chain->use++;
+ 
+       if (flow)
+               nft_trans_flow_rule(trans) = flow;
+@@ -3979,6 +3985,7 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
+       return 0;
+ 
+ err_destroy_flow_rule:
++      nft_use_dec_restore(&chain->use);
+       if (flow)
+               nft_flow_rule_destroy(flow);
+ err_release_rule:
+@@ -5015,9 +5022,15 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
+       alloc_size = sizeof(*set) + size + udlen;
+       if (alloc_size < size || alloc_size > INT_MAX)
+               return -ENOMEM;
++
++      if (!nft_use_inc(&table->use))
++              return -EMFILE;
++
+       set = kvzalloc(alloc_size, GFP_KERNEL_ACCOUNT);
+-      if (!set)
+-              return -ENOMEM;
++      if (!set) {
++              err = -ENOMEM;
++              goto err_alloc;
++      }
+ 
+       name = nla_strdup(nla[NFTA_SET_NAME], GFP_KERNEL_ACCOUNT);
+       if (!name) {
+@@ -5075,7 +5088,7 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
+               goto err_set_expr_alloc;
+ 
+       list_add_tail_rcu(&set->list, &table->sets);
+-      table->use++;
++
+       return 0;
+ 
+ err_set_expr_alloc:
+@@ -5087,6 +5100,9 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
+       kfree(set->name);
+ err_set_name:
+       kvfree(set);
++err_alloc:
++      nft_use_dec_restore(&table->use);
++
+       return err;
+ }
+ 
+@@ -5225,9 +5241,6 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
+       struct nft_set_binding *i;
+       struct nft_set_iter iter;
+ 
+-      if (set->use == UINT_MAX)
+-              return -EOVERFLOW;
+-
+       if (!list_empty(&set->bindings) && nft_set_is_anonymous(set))
+               return -EBUSY;
+ 
+@@ -5255,10 +5268,12 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
+                       return iter.err;
+       }
+ bind:
++      if (!nft_use_inc(&set->use))
++              return -EMFILE;
++
+       binding->chain = ctx->chain;
+       list_add_tail_rcu(&binding->list, &set->bindings);
+       nft_set_trans_bind(ctx, set);
+-      set->use++;
+ 
+       return 0;
+ }
+@@ -5332,7 +5347,7 @@ void nf_tables_activate_set(const struct nft_ctx *ctx, struct nft_set *set)
+               nft_clear(ctx->net, set);
+       }
+ 
+-      set->use++;
++      nft_use_inc_restore(&set->use);
+ }
+ EXPORT_SYMBOL_GPL(nf_tables_activate_set);
+ 
+@@ -5348,7 +5363,7 @@ void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set,
+               else
+                       list_del_rcu(&binding->list);
+ 
+-              set->use--;
++              nft_use_dec(&set->use);
+               break;
+       case NFT_TRANS_PREPARE:
+               if (nft_set_is_anonymous(set)) {
+@@ -5357,7 +5372,7 @@ void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set,
+ 
+                       nft_deactivate_next(ctx->net, set);
+               }
+-              set->use--;
++              nft_use_dec(&set->use);
+               return;
+       case NFT_TRANS_ABORT:
+       case NFT_TRANS_RELEASE:
+@@ -5365,7 +5380,7 @@ void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set,
+                   set->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
+                       nft_map_deactivate(ctx, set);
+ 
+-              set->use--;
++              nft_use_dec(&set->use);
+               fallthrough;
+       default:
+               nf_tables_unbind_set(ctx, set, binding,
+@@ -6134,7 +6149,7 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem,
+               nft_set_elem_expr_destroy(&ctx, nft_set_ext_expr(ext));
+ 
+       if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF))
+-              (*nft_set_ext_obj(ext))->use--;
++              nft_use_dec(&(*nft_set_ext_obj(ext))->use);
+       kfree(elem);
+ }
+ EXPORT_SYMBOL_GPL(nft_set_elem_destroy);
+@@ -6636,8 +6651,16 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
+                                    set->objtype, genmask);
+               if (IS_ERR(obj)) {
+                       err = PTR_ERR(obj);
++                      obj = NULL;
+                       goto err_parse_key_end;
+               }
++
++              if (!nft_use_inc(&obj->use)) {
++                      err = -EMFILE;
++                      obj = NULL;
++                      goto err_parse_key_end;
++              }
++
+               err = nft_set_ext_add(&tmpl, NFT_SET_EXT_OBJREF);
+               if (err < 0)
+                       goto err_parse_key_end;
+@@ -6706,10 +6729,9 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
+       if (flags)
+               *nft_set_ext_flags(ext) = flags;
+ 
+-      if (obj) {
++      if (obj)
+               *nft_set_ext_obj(ext) = obj;
+-              obj->use++;
+-      }
++
+       if (ulen > 0) {
+               if (nft_set_ext_check(&tmpl, NFT_SET_EXT_USERDATA, ulen) < 0) {
+                       err = -EINVAL;
+@@ -6774,12 +6796,13 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
+       kfree(trans);
+ err_elem_free:
+       nf_tables_set_elem_destroy(ctx, set, elem.priv);
+-      if (obj)
+-              obj->use--;
+ err_parse_data:
+       if (nla[NFTA_SET_ELEM_DATA] != NULL)
+               nft_data_release(&elem.data.val, desc.type);
+ err_parse_key_end:
++      if (obj)
++              nft_use_dec_restore(&obj->use);
++
+       nft_data_release(&elem.key_end.val, NFT_DATA_VALUE);
+ err_parse_key:
+       nft_data_release(&elem.key.val, NFT_DATA_VALUE);
+@@ -6859,7 +6882,7 @@ void nft_data_hold(const struct nft_data *data, enum nft_data_types type)
+               case NFT_JUMP:
+               case NFT_GOTO:
+                       chain = data->verdict.chain;
+-                      chain->use++;
++                      nft_use_inc_restore(&chain->use);
+                       break;
+               }
+       }
+@@ -6874,7 +6897,7 @@ static void nft_setelem_data_activate(const struct net *net,
+       if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
+               nft_data_hold(nft_set_ext_data(ext), set->dtype);
+       if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF))
+-              (*nft_set_ext_obj(ext))->use++;
++              nft_use_inc_restore(&(*nft_set_ext_obj(ext))->use);
+ }
+ 
+ static void nft_setelem_data_deactivate(const struct net *net,
+@@ -6886,7 +6909,7 @@ static void nft_setelem_data_deactivate(const struct net *net,
+       if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
+               nft_data_release(nft_set_ext_data(ext), set->dtype);
+       if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF))
+-              (*nft_set_ext_obj(ext))->use--;
++              nft_use_dec(&(*nft_set_ext_obj(ext))->use);
+ }
+ 
+ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
+@@ -7429,9 +7452,14 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info,
+ 
+       nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
+ 
++      if (!nft_use_inc(&table->use))
++              return -EMFILE;
++
+       type = nft_obj_type_get(net, objtype);
+-      if (IS_ERR(type))
+-              return PTR_ERR(type);
++      if (IS_ERR(type)) {
++              err = PTR_ERR(type);
++              goto err_type;
++      }
+ 
+       obj = nft_obj_init(&ctx, type, nla[NFTA_OBJ_DATA]);
+       if (IS_ERR(obj)) {
+@@ -7465,7 +7493,7 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info,
+               goto err_obj_ht;
+ 
+       list_add_tail_rcu(&obj->list, &table->objects);
+-      table->use++;
++
+       return 0;
+ err_obj_ht:
+       /* queued in transaction log */
+@@ -7481,6 +7509,9 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info,
+       kfree(obj);
+ err_init:
+       module_put(type->owner);
++err_type:
++      nft_use_dec_restore(&table->use);
++
+       return err;
+ }
+ 
+@@ -7882,7 +7913,7 @@ void nf_tables_deactivate_flowtable(const struct nft_ctx *ctx,
+       case NFT_TRANS_PREPARE:
+       case NFT_TRANS_ABORT:
+       case NFT_TRANS_RELEASE:
+-              flowtable->use--;
++              nft_use_dec(&flowtable->use);
+               fallthrough;
+       default:
+               return;
+@@ -8236,9 +8267,14 @@ static int nf_tables_newflowtable(struct sk_buff *skb,
+ 
+       nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
+ 
++      if (!nft_use_inc(&table->use))
++              return -EMFILE;
++
+       flowtable = kzalloc(sizeof(*flowtable), GFP_KERNEL_ACCOUNT);
+-      if (!flowtable)
+-              return -ENOMEM;
++      if (!flowtable) {
++              err = -ENOMEM;
++              goto flowtable_alloc;
++      }
+ 
+       flowtable->table = table;
+       flowtable->handle = nf_tables_alloc_handle(table);
+@@ -8293,7 +8329,6 @@ static int nf_tables_newflowtable(struct sk_buff *skb,
+               goto err5;
+ 
+       list_add_tail_rcu(&flowtable->list, &table->flowtables);
+-      table->use++;
+ 
+       return 0;
+ err5:
+@@ -8310,6 +8345,9 @@ static int nf_tables_newflowtable(struct sk_buff *skb,
+       kfree(flowtable->name);
+ err1:
+       kfree(flowtable);
++flowtable_alloc:
++      nft_use_dec_restore(&table->use);
++
+       return err;
+ }
+ 
+@@ -9680,7 +9718,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
+                                */
+                               if (nft_set_is_anonymous(nft_trans_set(trans)) &&
+                                   !list_empty(&nft_trans_set(trans)->bindings))
+-                                      trans->ctx.table->use--;
++                                      nft_use_dec(&trans->ctx.table->use);
+                       }
+                       nf_tables_set_notify(&trans->ctx, nft_trans_set(trans),
+                                            NFT_MSG_NEWSET, GFP_KERNEL);
+@@ -9910,7 +9948,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
+                                       nft_trans_destroy(trans);
+                                       break;
+                               }
+-                              trans->ctx.table->use--;
++                              nft_use_dec_restore(&trans->ctx.table->use);
+                               nft_chain_del(trans->ctx.chain);
+                               nf_tables_unregister_hook(trans->ctx.net,
+                                                         trans->ctx.table,
+@@ -9923,7 +9961,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
+                               list_splice(&nft_trans_chain_hooks(trans),
+                                           &nft_trans_basechain(trans)->hook_list);
+                       } else {
+-                              trans->ctx.table->use++;
++                              nft_use_inc_restore(&trans->ctx.table->use);
+                               nft_clear(trans->ctx.net, trans->ctx.chain);
+                       }
+                       nft_trans_destroy(trans);
+@@ -9933,7 +9971,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
+                               nft_trans_destroy(trans);
+                               break;
+                       }
+-                      trans->ctx.chain->use--;
++                      nft_use_dec_restore(&trans->ctx.chain->use);
+                       list_del_rcu(&nft_trans_rule(trans)->list);
+                       nft_rule_expr_deactivate(&trans->ctx,
+                                                nft_trans_rule(trans),
+@@ -9943,7 +9981,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
+                       break;
+               case NFT_MSG_DELRULE:
+               case NFT_MSG_DESTROYRULE:
+-                      trans->ctx.chain->use++;
++                      nft_use_inc_restore(&trans->ctx.chain->use);
+                       nft_clear(trans->ctx.net, nft_trans_rule(trans));
+                       nft_rule_expr_activate(&trans->ctx, nft_trans_rule(trans));
+                       if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)
+@@ -9956,7 +9994,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
+                               nft_trans_destroy(trans);
+                               break;
+                       }
+-                      trans->ctx.table->use--;
++                      nft_use_dec_restore(&trans->ctx.table->use);
+                       if (nft_trans_set_bound(trans)) {
+                               nft_trans_destroy(trans);
+                               break;
+@@ -9965,7 +10003,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
+                       break;
+               case NFT_MSG_DELSET:
+               case NFT_MSG_DESTROYSET:
+-                      trans->ctx.table->use++;
++                      nft_use_inc_restore(&trans->ctx.table->use);
+                       nft_clear(trans->ctx.net, nft_trans_set(trans));
+                       if (nft_trans_set(trans)->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
+                               nft_map_activate(&trans->ctx, nft_trans_set(trans));
+@@ -10009,13 +10047,13 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
+                               nft_obj_destroy(&trans->ctx, nft_trans_obj_newobj(trans));
+                               nft_trans_destroy(trans);
+                       } else {
+-                              trans->ctx.table->use--;
++                              nft_use_dec_restore(&trans->ctx.table->use);
+                               nft_obj_del(nft_trans_obj(trans));
+                       }
+                       break;
+               case NFT_MSG_DELOBJ:
+               case NFT_MSG_DESTROYOBJ:
+-                      trans->ctx.table->use++;
++                      nft_use_inc_restore(&trans->ctx.table->use);
+                       nft_clear(trans->ctx.net, nft_trans_obj(trans));
+                       nft_trans_destroy(trans);
+                       break;
+@@ -10024,7 +10062,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
+                               nft_unregister_flowtable_net_hooks(net,
+                                               &nft_trans_flowtable_hooks(trans));
+                       } else {
+-                              trans->ctx.table->use--;
++                              nft_use_dec_restore(&trans->ctx.table->use);
+                               list_del_rcu(&nft_trans_flowtable(trans)->list);
+                               nft_unregister_flowtable_net_hooks(net,
+                                               &nft_trans_flowtable(trans)->hook_list);
+@@ -10036,7 +10074,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
+                               list_splice(&nft_trans_flowtable_hooks(trans),
+                                           &nft_trans_flowtable(trans)->hook_list);
+                       } else {
+-                              trans->ctx.table->use++;
++                              nft_use_inc_restore(&trans->ctx.table->use);
+                               nft_clear(trans->ctx.net, nft_trans_flowtable(trans));
+                       }
+                       nft_trans_destroy(trans);
+@@ -10486,8 +10524,9 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
+               if (desc->flags & NFT_DATA_DESC_SETELEM &&
+                   chain->flags & NFT_CHAIN_BINDING)
+                       return -EINVAL;
++              if (!nft_use_inc(&chain->use))
++                      return -EMFILE;
+ 
+-              chain->use++;
+               data->verdict.chain = chain;
+               break;
+       }
+@@ -10505,7 +10544,7 @@ static void nft_verdict_uninit(const struct nft_data *data)
+       case NFT_JUMP:
+       case NFT_GOTO:
+               chain = data->verdict.chain;
+-              chain->use--;
++              nft_use_dec(&chain->use);
+               break;
+       }
+ }
+@@ -10674,11 +10713,11 @@ int __nft_release_basechain(struct nft_ctx *ctx)
+       nf_tables_unregister_hook(ctx->net, ctx->chain->table, ctx->chain);
+       list_for_each_entry_safe(rule, nr, &ctx->chain->rules, list) {
+               list_del(&rule->list);
+-              ctx->chain->use--;
++              nft_use_dec(&ctx->chain->use);
+               nf_tables_rule_release(ctx, rule);
+       }
+       nft_chain_del(ctx->chain);
+-      ctx->table->use--;
++      nft_use_dec(&ctx->table->use);
+       nf_tables_chain_destroy(ctx);
+ 
+       return 0;
+@@ -10728,18 +10767,18 @@ static void __nft_release_table(struct net *net, struct nft_table *table)
+               ctx.chain = chain;
+               list_for_each_entry_safe(rule, nr, &chain->rules, list) {
+                       list_del(&rule->list);
+-                      chain->use--;
++                      nft_use_dec(&chain->use);
+                       nf_tables_rule_release(&ctx, rule);
+               }
+       }
+       list_for_each_entry_safe(flowtable, nf, &table->flowtables, list) {
+               list_del(&flowtable->list);
+-              table->use--;
++              nft_use_dec(&table->use);
+               nf_tables_flowtable_destroy(flowtable);
+       }
+       list_for_each_entry_safe(set, ns, &table->sets, list) {
+               list_del(&set->list);
+-              table->use--;
++              nft_use_dec(&table->use);
+               if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
+                       nft_map_deactivate(&ctx, set);
+ 
+@@ -10747,13 +10786,13 @@ static void __nft_release_table(struct net *net, struct nft_table *table)
+       }
+       list_for_each_entry_safe(obj, ne, &table->objects, list) {
+               nft_obj_del(obj);
+-              table->use--;
++              nft_use_dec(&table->use);
+               nft_obj_destroy(&ctx, obj);
+       }
+       list_for_each_entry_safe(chain, nc, &table->chains, list) {
+               ctx.chain = chain;
+               nft_chain_del(chain);
+-              table->use--;
++              nft_use_dec(&table->use);
+               nf_tables_chain_destroy(&ctx);
+       }
+       nf_tables_table_destroy(&ctx);
+diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
+index e860d8fe0e5e2..03159c6c6c4b6 100644
+--- a/net/netfilter/nft_flow_offload.c
++++ b/net/netfilter/nft_flow_offload.c
+@@ -404,8 +404,10 @@ static int nft_flow_offload_init(const struct nft_ctx *ctx,
+       if (IS_ERR(flowtable))
+               return PTR_ERR(flowtable);
+ 
++      if (!nft_use_inc(&flowtable->use))
++              return -EMFILE;
++
+       priv->flowtable = flowtable;
+-      flowtable->use++;
+ 
+       return nf_ct_netns_get(ctx->net, ctx->family);
+ }
+@@ -424,7 +426,7 @@ static void nft_flow_offload_activate(const struct nft_ctx *ctx,
+ {
+       struct nft_flow_offload *priv = nft_expr_priv(expr);
+ 
+-      priv->flowtable->use++;
++      nft_use_inc_restore(&priv->flowtable->use);
+ }
+ 
+ static void nft_flow_offload_destroy(const struct nft_ctx *ctx,
+diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
+index 3d76ebfe8939b..407d7197f75bb 100644
+--- a/net/netfilter/nft_immediate.c
++++ b/net/netfilter/nft_immediate.c
+@@ -159,7 +159,7 @@ static void nft_immediate_deactivate(const struct nft_ctx *ctx,
+                       default:
+                               nft_chain_del(chain);
+                               chain->bound = false;
+-                              chain->table->use--;
++                              nft_use_dec(&chain->table->use);
+                               break;
+                       }
+                       break;
+@@ -198,7 +198,7 @@ static void nft_immediate_destroy(const struct nft_ctx *ctx,
+                * let the transaction records release this chain and its rules.
+                */
+               if (chain->bound) {
+-                      chain->use--;
++                      nft_use_dec(&chain->use);
+                       break;
+               }
+ 
+@@ -206,9 +206,9 @@ static void nft_immediate_destroy(const struct nft_ctx *ctx,
+               chain_ctx = *ctx;
+               chain_ctx.chain = chain;
+ 
+-              chain->use--;
++              nft_use_dec(&chain->use);
+               list_for_each_entry_safe(rule, n, &chain->rules, list) {
+-                      chain->use--;
++                      nft_use_dec(&chain->use);
+                       list_del(&rule->list);
+                       nf_tables_rule_destroy(&chain_ctx, rule);
+               }
+diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c
+index a48dd5b5d45b1..509011b1ef597 100644
+--- a/net/netfilter/nft_objref.c
++++ b/net/netfilter/nft_objref.c
+@@ -41,8 +41,10 @@ static int nft_objref_init(const struct nft_ctx *ctx,
+       if (IS_ERR(obj))
+               return -ENOENT;
+ 
++      if (!nft_use_inc(&obj->use))
++              return -EMFILE;
++
+       nft_objref_priv(expr) = obj;
+-      obj->use++;
+ 
+       return 0;
+ }
+@@ -72,7 +74,7 @@ static void nft_objref_deactivate(const struct nft_ctx *ctx,
+       if (phase == NFT_TRANS_COMMIT)
+               return;
+ 
+-      obj->use--;
++      nft_use_dec(&obj->use);
+ }
+ 
+ static void nft_objref_activate(const struct nft_ctx *ctx,
+@@ -80,7 +82,7 @@ static void nft_objref_activate(const struct nft_ctx *ctx,
+ {
+       struct nft_object *obj = nft_objref_priv(expr);
+ 
+-      obj->use++;
++      nft_use_inc_restore(&obj->use);
+ }
+ 
+ static const struct nft_expr_ops nft_objref_ops = {
+-- 
+2.39.2
+
diff --git a/queue-6.4/ntb-amd-fix-error-handling-in-amd_ntb_pci_driver_ini.patch b/queue-6.4/ntb-amd-fix-error-handling-in-amd_ntb_pci_driver_ini.patch

new file mode 100644 (file)

index 0000000..e23079b
--- /dev/null
+++ b/queue-6.4/ntb-amd-fix-error-handling-in-amd_ntb_pci_driver_ini.patch
@@ -0,0 +1,64 @@
+From 7e2c2a25a65dd8ef5fe16a5b71182c1181093d7c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 5 Nov 2022 09:43:09 +0000
+Subject: NTB: amd: Fix error handling in amd_ntb_pci_driver_init()
+
+From: Yuan Can <yuancan@huawei.com>
+
+[ Upstream commit 98af0a33c1101c29b3ce4f0cf4715fd927c717f9 ]
+
+A problem about ntb_hw_amd create debugfs failed is triggered with the
+following log given:
+
+ [  618.431232] AMD(R) PCI-E Non-Transparent Bridge Driver 1.0
+ [  618.433284] debugfs: Directory 'ntb_hw_amd' with parent '/' already present!
+
+The reason is that amd_ntb_pci_driver_init() returns pci_register_driver()
+directly without checking its return value, if pci_register_driver()
+failed, it returns without destroy the newly created debugfs, resulting
+the debugfs of ntb_hw_amd can never be created later.
+
+ amd_ntb_pci_driver_init()
+   debugfs_create_dir() # create debugfs directory
+   pci_register_driver()
+     driver_register()
+       bus_add_driver()
+         priv = kzalloc(...) # OOM happened
+   # return without destroy debugfs directory
+
+Fix by removing debugfs when pci_register_driver() returns error.
+
+Fixes: a1b3695820aa ("NTB: Add support for AMD PCI-Express Non-Transparent Bridge")
+Signed-off-by: Yuan Can <yuancan@huawei.com>
+Signed-off-by: Jon Mason <jdmason@kudzu.us>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/ntb/hw/amd/ntb_hw_amd.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/ntb/hw/amd/ntb_hw_amd.c b/drivers/ntb/hw/amd/ntb_hw_amd.c
+index 04550b1f984c6..730f2103b91d1 100644
+--- a/drivers/ntb/hw/amd/ntb_hw_amd.c
++++ b/drivers/ntb/hw/amd/ntb_hw_amd.c
+@@ -1338,12 +1338,17 @@ static struct pci_driver amd_ntb_pci_driver = {
+ 
+ static int __init amd_ntb_pci_driver_init(void)
+ {
++      int ret;
+       pr_info("%s %s\n", NTB_DESC, NTB_VER);
+ 
+       if (debugfs_initialized())
+               debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL);
+ 
+-      return pci_register_driver(&amd_ntb_pci_driver);
++      ret = pci_register_driver(&amd_ntb_pci_driver);
++      if (ret)
++              debugfs_remove_recursive(debugfs_dir);
++
++      return ret;
+ }
+ module_init(amd_ntb_pci_driver_init);
+ 
+-- 
+2.39.2
+
diff --git a/queue-6.4/ntb-idt-fix-error-handling-in-idt_pci_driver_init.patch b/queue-6.4/ntb-idt-fix-error-handling-in-idt_pci_driver_init.patch

new file mode 100644 (file)

index 0000000..4f60eb2
--- /dev/null
+++ b/queue-6.4/ntb-idt-fix-error-handling-in-idt_pci_driver_init.patch
@@ -0,0 +1,66 @@
+From 180209186f21d1030d210c3ead622260d62483cb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 5 Nov 2022 09:43:01 +0000
+Subject: ntb: idt: Fix error handling in idt_pci_driver_init()
+
+From: Yuan Can <yuancan@huawei.com>
+
+[ Upstream commit c012968259b451dc4db407f2310fe131eaefd800 ]
+
+A problem about ntb_hw_idt create debugfs failed is triggered with the
+following log given:
+
+ [ 1236.637636] IDT PCI-E Non-Transparent Bridge Driver 2.0
+ [ 1236.639292] debugfs: Directory 'ntb_hw_idt' with parent '/' already present!
+
+The reason is that idt_pci_driver_init() returns pci_register_driver()
+directly without checking its return value, if pci_register_driver()
+failed, it returns without destroy the newly created debugfs, resulting
+the debugfs of ntb_hw_idt can never be created later.
+
+ idt_pci_driver_init()
+   debugfs_create_dir() # create debugfs directory
+   pci_register_driver()
+     driver_register()
+       bus_add_driver()
+         priv = kzalloc(...) # OOM happened
+   # return without destroy debugfs directory
+
+Fix by removing debugfs when pci_register_driver() returns error.
+
+Fixes: bf2a952d31d2 ("NTB: Add IDT 89HPESxNTx PCIe-switches support")
+Signed-off-by: Yuan Can <yuancan@huawei.com>
+Signed-off-by: Jon Mason <jdmason@kudzu.us>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/ntb/hw/idt/ntb_hw_idt.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/ntb/hw/idt/ntb_hw_idt.c b/drivers/ntb/hw/idt/ntb_hw_idt.c
+index 0ed6f809ff2ee..51799fccf8404 100644
+--- a/drivers/ntb/hw/idt/ntb_hw_idt.c
++++ b/drivers/ntb/hw/idt/ntb_hw_idt.c
+@@ -2891,6 +2891,7 @@ static struct pci_driver idt_pci_driver = {
+ 
+ static int __init idt_pci_driver_init(void)
+ {
++      int ret;
+       pr_info("%s %s\n", NTB_DESC, NTB_VER);
+ 
+       /* Create the top DebugFS directory if the FS is initialized */
+@@ -2898,7 +2899,11 @@ static int __init idt_pci_driver_init(void)
+               dbgfs_topdir = debugfs_create_dir(KBUILD_MODNAME, NULL);
+ 
+       /* Register the NTB hardware driver to handle the PCI device */
+-      return pci_register_driver(&idt_pci_driver);
++      ret = pci_register_driver(&idt_pci_driver);
++      if (ret)
++              debugfs_remove_recursive(dbgfs_topdir);
++
++      return ret;
+ }
+ module_init(idt_pci_driver_init);
+ 
+-- 
+2.39.2
+
diff --git a/queue-6.4/ntb-intel-fix-error-handling-in-intel_ntb_pci_driver.patch b/queue-6.4/ntb-intel-fix-error-handling-in-intel_ntb_pci_driver.patch

new file mode 100644 (file)

index 0000000..2ef3e24
--- /dev/null
+++ b/queue-6.4/ntb-intel-fix-error-handling-in-intel_ntb_pci_driver.patch
@@ -0,0 +1,65 @@
+From 7b9f5f4f4ba4a81054b4298375bdf38a3828e4c0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 5 Nov 2022 09:43:22 +0000
+Subject: ntb: intel: Fix error handling in intel_ntb_pci_driver_init()
+
+From: Yuan Can <yuancan@huawei.com>
+
+[ Upstream commit 4c3c796aca02883ad35bb117468938cc4022ca41 ]
+
+A problem about ntb_hw_intel create debugfs failed is triggered with the
+following log given:
+
+ [  273.112733] Intel(R) PCI-E Non-Transparent Bridge Driver 2.0
+ [  273.115342] debugfs: Directory 'ntb_hw_intel' with parent '/' already present!
+
+The reason is that intel_ntb_pci_driver_init() returns
+pci_register_driver() directly without checking its return value, if
+pci_register_driver() failed, it returns without destroy the newly created
+debugfs, resulting the debugfs of ntb_hw_intel can never be created later.
+
+ intel_ntb_pci_driver_init()
+   debugfs_create_dir() # create debugfs directory
+   pci_register_driver()
+     driver_register()
+       bus_add_driver()
+         priv = kzalloc(...) # OOM happened
+   # return without destroy debugfs directory
+
+Fix by removing debugfs when pci_register_driver() returns error.
+
+Fixes: e26a5843f7f5 ("NTB: Split ntb_hw_intel and ntb_transport drivers")
+Signed-off-by: Yuan Can <yuancan@huawei.com>
+Acked-by: Dave Jiang <dave.jiang@intel.com>
+Signed-off-by: Jon Mason <jdmason@kudzu.us>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/ntb/hw/intel/ntb_hw_gen1.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/ntb/hw/intel/ntb_hw_gen1.c b/drivers/ntb/hw/intel/ntb_hw_gen1.c
+index 84772013812bf..60a4ebc7bf35a 100644
+--- a/drivers/ntb/hw/intel/ntb_hw_gen1.c
++++ b/drivers/ntb/hw/intel/ntb_hw_gen1.c
+@@ -2064,12 +2064,17 @@ static struct pci_driver intel_ntb_pci_driver = {
+ 
+ static int __init intel_ntb_pci_driver_init(void)
+ {
++      int ret;
+       pr_info("%s %s\n", NTB_DESC, NTB_VER);
+ 
+       if (debugfs_initialized())
+               debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL);
+ 
+-      return pci_register_driver(&intel_ntb_pci_driver);
++      ret = pci_register_driver(&intel_ntb_pci_driver);
++      if (ret)
++              debugfs_remove_recursive(debugfs_dir);
++
++      return ret;
+ }
+ module_init(intel_ntb_pci_driver_init);
+ 
+-- 
+2.39.2
+
diff --git a/queue-6.4/ntb-ntb_tool-add-check-for-devm_kcalloc.patch b/queue-6.4/ntb-ntb_tool-add-check-for-devm_kcalloc.patch

new file mode 100644 (file)

index 0000000..64acc43
--- /dev/null
+++ b/queue-6.4/ntb-ntb_tool-add-check-for-devm_kcalloc.patch
@@ -0,0 +1,39 @@
+From 2f49422fef2e45f2bafb822a8c585bf41de7b492 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 22 Nov 2022 11:32:44 +0800
+Subject: NTB: ntb_tool: Add check for devm_kcalloc
+
+From: Jiasheng Jiang <jiasheng@iscas.ac.cn>
+
+[ Upstream commit 2790143f09938776a3b4f69685b380bae8fd06c7 ]
+
+As the devm_kcalloc may return NULL pointer,
+it should be better to add check for the return
+value, as same as the others.
+
+Fixes: 7f46c8b3a552 ("NTB: ntb_tool: Add full multi-port NTB API support")
+Signed-off-by: Jiasheng Jiang <jiasheng@iscas.ac.cn>
+Reviewed-by: Serge Semin <fancer.lancer@gmail.com>
+Reviewed-by: Dave Jiang <dave.jiang@intel.com>
+Signed-off-by: Jon Mason <jdmason@kudzu.us>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/ntb/test/ntb_tool.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/drivers/ntb/test/ntb_tool.c b/drivers/ntb/test/ntb_tool.c
+index 5ee0afa621a95..eeeb4b1c97d2c 100644
+--- a/drivers/ntb/test/ntb_tool.c
++++ b/drivers/ntb/test/ntb_tool.c
+@@ -998,6 +998,8 @@ static int tool_init_mws(struct tool_ctx *tc)
+               tc->peers[pidx].outmws =
+                       devm_kcalloc(&tc->ntb->dev, tc->peers[pidx].outmw_cnt,
+                                  sizeof(*tc->peers[pidx].outmws), GFP_KERNEL);
++              if (tc->peers[pidx].outmws == NULL)
++                      return -ENOMEM;
+ 
+               for (widx = 0; widx < tc->peers[pidx].outmw_cnt; widx++) {
+                       tc->peers[pidx].outmws[widx].pidx = pidx;
+-- 
+2.39.2
+
diff --git a/queue-6.4/ntb-ntb_transport-fix-possible-memory-leak-while-dev.patch b/queue-6.4/ntb-ntb_transport-fix-possible-memory-leak-while-dev.patch

new file mode 100644 (file)

index 0000000..403d266
--- /dev/null
+++ b/queue-6.4/ntb-ntb_transport-fix-possible-memory-leak-while-dev.patch
@@ -0,0 +1,42 @@
+From 1be077967706a5225aa70459dc1f29e59f16d173 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 10 Nov 2022 23:19:17 +0800
+Subject: NTB: ntb_transport: fix possible memory leak while device_register()
+ fails
+
+From: Yang Yingliang <yangyingliang@huawei.com>
+
+[ Upstream commit 8623ccbfc55d962e19a3537652803676ad7acb90 ]
+
+If device_register() returns error, the name allocated by
+dev_set_name() need be freed. As comment of device_register()
+says, it should use put_device() to give up the reference in
+the error path. So fix this by calling put_device(), then the
+name can be freed in kobject_cleanup(), and client_dev is freed
+in ntb_transport_client_release().
+
+Fixes: fce8a7bb5b4b ("PCI-Express Non-Transparent Bridge Support")
+Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
+Reviewed-by: Dave Jiang <dave.jiang@intel.com>
+Signed-off-by: Jon Mason <jdmason@kudzu.us>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/ntb/ntb_transport.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c
+index a9b97ebc71ac5..2abd2235bbcab 100644
+--- a/drivers/ntb/ntb_transport.c
++++ b/drivers/ntb/ntb_transport.c
+@@ -410,7 +410,7 @@ int ntb_transport_register_client_dev(char *device_name)
+ 
+               rc = device_register(dev);
+               if (rc) {
+-                      kfree(client_dev);
++                      put_device(dev);
+                       goto err;
+               }
+ 
+-- 
+2.39.2
+
diff --git a/queue-6.4/nvme-fix-the-nvme_id_ns_nvm_sts_mask-definition.patch b/queue-6.4/nvme-fix-the-nvme_id_ns_nvm_sts_mask-definition.patch

new file mode 100644 (file)

index 0000000..42ce8dc
--- /dev/null
+++ b/queue-6.4/nvme-fix-the-nvme_id_ns_nvm_sts_mask-definition.patch
@@ -0,0 +1,36 @@
+From 3b568c0eb266f839b2b2d7a1072cb7c86cb4e780 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 23 Jun 2023 18:08:05 +0530
+Subject: nvme: fix the NVME_ID_NS_NVM_STS_MASK definition
+
+From: Ankit Kumar <ankit.kumar@samsung.com>
+
+[ Upstream commit b938e6603660652dc3db66d3c915fbfed3bce21d ]
+
+As per NVMe command set specification 1.0c Storage tag size is 7 bits.
+
+Fixes: 4020aad85c67 ("nvme: add support for enhanced metadata")
+Signed-off-by: Ankit Kumar <ankit.kumar@samsung.com>
+Reviewed-by: Kanchan Joshi <joshi.k@samsung.com>
+Signed-off-by: Keith Busch <kbusch@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/nvme.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/include/linux/nvme.h b/include/linux/nvme.h
+index 779507ac750b8..2819d6c3a6b5d 100644
+--- a/include/linux/nvme.h
++++ b/include/linux/nvme.h
+@@ -473,7 +473,7 @@ struct nvme_id_ns_nvm {
+ };
+ 
+ enum {
+-      NVME_ID_NS_NVM_STS_MASK         = 0x3f,
++      NVME_ID_NS_NVM_STS_MASK         = 0x7f,
+       NVME_ID_NS_NVM_GUARD_SHIFT      = 7,
+       NVME_ID_NS_NVM_GUARD_MASK       = 0x3,
+ };
+-- 
+2.39.2
+
diff --git a/queue-6.4/nvme-pci-fix-dma-direction-of-unmapping-integrity-da.patch b/queue-6.4/nvme-pci-fix-dma-direction-of-unmapping-integrity-da.patch

new file mode 100644 (file)

index 0000000..06d3b11
--- /dev/null
+++ b/queue-6.4/nvme-pci-fix-dma-direction-of-unmapping-integrity-da.patch
@@ -0,0 +1,40 @@
+From 65b3c7307116caccad741bb3f16b88a32e5423ac Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 13 Jul 2023 17:26:20 +0800
+Subject: nvme-pci: fix DMA direction of unmapping integrity data
+
+From: Ming Lei <ming.lei@redhat.com>
+
+[ Upstream commit b8f6446b6853768cb99e7c201bddce69ca60c15e ]
+
+DMA direction should be taken in dma_unmap_page() for unmapping integrity
+data.
+
+Fix this DMA direction, and reported in Guangwu's test.
+
+Reported-by: Guangwu Zhang <guazhang@redhat.com>
+Fixes: 4aedb705437f ("nvme-pci: split metadata handling from nvme_map_data / nvme_unmap_data")
+Signed-off-by: Ming Lei <ming.lei@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Keith Busch <kbusch@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/nvme/host/pci.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
+index 492f319ebdf37..5b5303f0e2c20 100644
+--- a/drivers/nvme/host/pci.c
++++ b/drivers/nvme/host/pci.c
+@@ -968,7 +968,7 @@ static __always_inline void nvme_pci_unmap_rq(struct request *req)
+               struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+ 
+               dma_unmap_page(dev->dev, iod->meta_dma,
+-                             rq_integrity_vec(req)->bv_len, rq_data_dir(req));
++                             rq_integrity_vec(req)->bv_len, rq_dma_dir(req));
+       }
+ 
+       if (blk_rq_nr_phys_segments(req))
+-- 
+2.39.2
+
diff --git a/queue-6.4/octeontx2-af-move-validation-of-ptp-pointer-before-i.patch b/queue-6.4/octeontx2-af-move-validation-of-ptp-pointer-before-i.patch

new file mode 100644 (file)

index 0000000..f118880
--- /dev/null
+++ b/queue-6.4/octeontx2-af-move-validation-of-ptp-pointer-before-i.patch
@@ -0,0 +1,110 @@
+From a369d12b3781cc00b48dea406925dc0c01d8c53e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 6 Jul 2023 13:59:36 +0530
+Subject: octeontx2-af: Move validation of ptp pointer before its usage
+
+From: Sai Krishna <saikrishnag@marvell.com>
+
+[ Upstream commit 7709fbd4922c197efabda03660d93e48a3e80323 ]
+
+Moved PTP pointer validation before its use to avoid smatch warning.
+Also used kzalloc/kfree instead of devm_kzalloc/devm_kfree.
+
+Fixes: 2ef4e45d99b1 ("octeontx2-af: Add PTP PPS Errata workaround on CN10K silicon")
+Signed-off-by: Naveen Mamindlapalli <naveenm@marvell.com>
+Signed-off-by: Sunil Goutham <sgoutham@marvell.com>
+Signed-off-by: Sai Krishna <saikrishnag@marvell.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../net/ethernet/marvell/octeontx2/af/ptp.c   | 19 +++++++++----------
+ .../net/ethernet/marvell/octeontx2/af/rvu.c   |  2 +-
+ 2 files changed, 10 insertions(+), 11 deletions(-)
+
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/ptp.c b/drivers/net/ethernet/marvell/octeontx2/af/ptp.c
+index 3411e2e47d46b..0ee420a489fc4 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/ptp.c
++++ b/drivers/net/ethernet/marvell/octeontx2/af/ptp.c
+@@ -208,7 +208,7 @@ struct ptp *ptp_get(void)
+       /* Check driver is bound to PTP block */
+       if (!ptp)
+               ptp = ERR_PTR(-EPROBE_DEFER);
+-      else
++      else if (!IS_ERR(ptp))
+               pci_dev_get(ptp->pdev);
+ 
+       return ptp;
+@@ -388,11 +388,10 @@ static int ptp_extts_on(struct ptp *ptp, int on)
+ static int ptp_probe(struct pci_dev *pdev,
+                    const struct pci_device_id *ent)
+ {
+-      struct device *dev = &pdev->dev;
+       struct ptp *ptp;
+       int err;
+ 
+-      ptp = devm_kzalloc(dev, sizeof(*ptp), GFP_KERNEL);
++      ptp = kzalloc(sizeof(*ptp), GFP_KERNEL);
+       if (!ptp) {
+               err = -ENOMEM;
+               goto error;
+@@ -428,20 +427,19 @@ static int ptp_probe(struct pci_dev *pdev,
+       return 0;
+ 
+ error_free:
+-      devm_kfree(dev, ptp);
++      kfree(ptp);
+ 
+ error:
+       /* For `ptp_get()` we need to differentiate between the case
+        * when the core has not tried to probe this device and the case when
+-       * the probe failed.  In the later case we pretend that the
+-       * initialization was successful and keep the error in
++       * the probe failed.  In the later case we keep the error in
+        * `dev->driver_data`.
+        */
+       pci_set_drvdata(pdev, ERR_PTR(err));
+       if (!first_ptp_block)
+               first_ptp_block = ERR_PTR(err);
+ 
+-      return 0;
++      return err;
+ }
+ 
+ static void ptp_remove(struct pci_dev *pdev)
+@@ -449,16 +447,17 @@ static void ptp_remove(struct pci_dev *pdev)
+       struct ptp *ptp = pci_get_drvdata(pdev);
+       u64 clock_cfg;
+ 
+-      if (cn10k_ptp_errata(ptp) && hrtimer_active(&ptp->hrtimer))
+-              hrtimer_cancel(&ptp->hrtimer);
+-
+       if (IS_ERR_OR_NULL(ptp))
+               return;
+ 
++      if (cn10k_ptp_errata(ptp) && hrtimer_active(&ptp->hrtimer))
++              hrtimer_cancel(&ptp->hrtimer);
++
+       /* Disable PTP clock */
+       clock_cfg = readq(ptp->reg_base + PTP_CLOCK_CFG);
+       clock_cfg &= ~PTP_CLOCK_CFG_PTP_EN;
+       writeq(clock_cfg, ptp->reg_base + PTP_CLOCK_CFG);
++      kfree(ptp);
+ }
+ 
+ static const struct pci_device_id ptp_id_table[] = {
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
+index b26b013216933..73932e2755bca 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
+@@ -3253,7 +3253,7 @@ static int rvu_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+       rvu->ptp = ptp_get();
+       if (IS_ERR(rvu->ptp)) {
+               err = PTR_ERR(rvu->ptp);
+-              if (err == -EPROBE_DEFER)
++              if (err)
+                       goto err_release_regions;
+               rvu->ptp = NULL;
+       }
+-- 
+2.39.2
+
diff --git a/queue-6.4/octeontx2-af-promisc-enable-disable-through-mbox.patch b/queue-6.4/octeontx2-af-promisc-enable-disable-through-mbox.patch

new file mode 100644 (file)

index 0000000..6a94e57
--- /dev/null
+++ b/queue-6.4/octeontx2-af-promisc-enable-disable-through-mbox.patch
@@ -0,0 +1,118 @@
+From 0ab9b97172a9cb991254a7818274eb8296aacd12 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 6 Jul 2023 09:57:05 +0530
+Subject: octeontx2-af: Promisc enable/disable through mbox
+
+From: Ratheesh Kannoth <rkannoth@marvell.com>
+
+[ Upstream commit af42088bdaf292060b8d8a00d8644ca7b2b3f2d1 ]
+
+In legacy silicon, promiscuous mode is only modified
+through CGX mbox messages. In CN10KB silicon, it is modified
+from CGX mbox and NIX. This breaks legacy application
+behaviour. Fix this by removing call from NIX.
+
+Fixes: d6c9784baf59 ("octeontx2-af: Invoke exact match functions if supported")
+Signed-off-by: Ratheesh Kannoth <rkannoth@marvell.com>
+Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
+Reviewed-by: Michal Kubiak <michal.kubiak@intel.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../ethernet/marvell/octeontx2/af/rvu_nix.c   | 11 ++-------
+ .../marvell/octeontx2/af/rvu_npc_hash.c       | 23 +++++++++++++++++--
+ 2 files changed, 23 insertions(+), 11 deletions(-)
+
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+index f01d057ad025a..8cdf91a5bf44f 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+@@ -3815,21 +3815,14 @@ int rvu_mbox_handler_nix_set_rx_mode(struct rvu *rvu, struct nix_rx_mode *req,
+       }
+ 
+       /* install/uninstall promisc entry */
+-      if (promisc) {
++      if (promisc)
+               rvu_npc_install_promisc_entry(rvu, pcifunc, nixlf,
+                                             pfvf->rx_chan_base,
+                                             pfvf->rx_chan_cnt);
+-
+-              if (rvu_npc_exact_has_match_table(rvu))
+-                      rvu_npc_exact_promisc_enable(rvu, pcifunc);
+-      } else {
++      else
+               if (!nix_rx_multicast)
+                       rvu_npc_enable_promisc_entry(rvu, pcifunc, nixlf, false);
+ 
+-              if (rvu_npc_exact_has_match_table(rvu))
+-                      rvu_npc_exact_promisc_disable(rvu, pcifunc);
+-      }
+-
+       return 0;
+ }
+ 
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c
+index 9f11c1e407373..6fe67f3a7f6f1 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c
++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c
+@@ -1164,8 +1164,10 @@ static u16 __rvu_npc_exact_cmd_rules_cnt_update(struct rvu *rvu, int drop_mcam_i
+ {
+       struct npc_exact_table *table;
+       u16 *cnt, old_cnt;
++      bool promisc;
+ 
+       table = rvu->hw->table;
++      promisc = table->promisc_mode[drop_mcam_idx];
+ 
+       cnt = &table->cnt_cmd_rules[drop_mcam_idx];
+       old_cnt = *cnt;
+@@ -1177,13 +1179,18 @@ static u16 __rvu_npc_exact_cmd_rules_cnt_update(struct rvu *rvu, int drop_mcam_i
+ 
+       *enable_or_disable_cam = false;
+ 
+-      /* If all rules are deleted, disable cam */
++      if (promisc)
++              goto done;
++
++      /* If all rules are deleted and not already in promisc mode;
++       * disable cam
++       */
+       if (!*cnt && val < 0) {
+               *enable_or_disable_cam = true;
+               goto done;
+       }
+ 
+-      /* If rule got added, enable cam */
++      /* If rule got added and not already in promisc mode; enable cam */
+       if (!old_cnt && val > 0) {
+               *enable_or_disable_cam = true;
+               goto done;
+@@ -1462,6 +1469,12 @@ int rvu_npc_exact_promisc_disable(struct rvu *rvu, u16 pcifunc)
+       *promisc = false;
+       mutex_unlock(&table->lock);
+ 
++      /* Enable drop rule */
++      rvu_npc_enable_mcam_by_entry_index(rvu, drop_mcam_idx, NIX_INTF_RX,
++                                         true);
++
++      dev_dbg(rvu->dev, "%s: disabled  promisc mode (cgx=%d lmac=%d)\n",
++              __func__, cgx_id, lmac_id);
+       return 0;
+ }
+ 
+@@ -1503,6 +1516,12 @@ int rvu_npc_exact_promisc_enable(struct rvu *rvu, u16 pcifunc)
+       *promisc = true;
+       mutex_unlock(&table->lock);
+ 
++      /*  disable drop rule */
++      rvu_npc_enable_mcam_by_entry_index(rvu, drop_mcam_idx, NIX_INTF_RX,
++                                         false);
++
++      dev_dbg(rvu->dev, "%s: Enabled promisc mode (cgx=%d lmac=%d)\n",
++              __func__, cgx_id, lmac_id);
+       return 0;
+ }
+ 
+-- 
+2.39.2
+
diff --git a/queue-6.4/octeontx2-pf-add-additional-check-for-mcam-rules.patch b/queue-6.4/octeontx2-pf-add-additional-check-for-mcam-rules.patch

new file mode 100644 (file)

index 0000000..bf71c67
--- /dev/null
+++ b/queue-6.4/octeontx2-pf-add-additional-check-for-mcam-rules.patch
@@ -0,0 +1,71 @@
+From ea03ebec26393aa8d6ce2220cad7c458bcde9935 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 10 Jul 2023 16:00:27 +0530
+Subject: octeontx2-pf: Add additional check for MCAM rules
+
+From: Suman Ghosh <sumang@marvell.com>
+
+[ Upstream commit 8278ee2a2646b9acf747317895e47a640ba933c9 ]
+
+Due to hardware limitation, MCAM drop rule with
+ether_type == 802.1Q and vlan_id == 0 is not supported. Hence rejecting
+such rules.
+
+Fixes: dce677da57c0 ("octeontx2-pf: Add vlan-etype to ntuple filters")
+Signed-off-by: Suman Ghosh <sumang@marvell.com>
+Link: https://lore.kernel.org/r/20230710103027.2244139-1-sumang@marvell.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../ethernet/marvell/octeontx2/nic/otx2_flows.c   |  8 ++++++++
+ .../net/ethernet/marvell/octeontx2/nic/otx2_tc.c  | 15 +++++++++++++++
+ 2 files changed, 23 insertions(+)
+
+diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c
+index 10e11262d48a0..2d7713a1a1539 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c
++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c
+@@ -872,6 +872,14 @@ static int otx2_prepare_flow_request(struct ethtool_rx_flow_spec *fsp,
+                               return -EINVAL;
+ 
+                       vlan_etype = be16_to_cpu(fsp->h_ext.vlan_etype);
++
++                      /* Drop rule with vlan_etype == 802.1Q
++                       * and vlan_id == 0 is not supported
++                       */
++                      if (vlan_etype == ETH_P_8021Q && !fsp->m_ext.vlan_tci &&
++                          fsp->ring_cookie == RX_CLS_FLOW_DISC)
++                              return -EINVAL;
++
+                       /* Only ETH_P_8021Q and ETH_P_802AD types supported */
+                       if (vlan_etype != ETH_P_8021Q &&
+                           vlan_etype != ETH_P_8021AD)
+diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
+index 8392f63e433fc..293bd3f29b077 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
+@@ -604,6 +604,21 @@ static int otx2_tc_prepare_flow(struct otx2_nic *nic, struct otx2_tc_flow *node,
+                       return -EOPNOTSUPP;
+               }
+ 
++              if (!match.mask->vlan_id) {
++                      struct flow_action_entry *act;
++                      int i;
++
++                      flow_action_for_each(i, act, &rule->action) {
++                              if (act->id == FLOW_ACTION_DROP) {
++                                      netdev_err(nic->netdev,
++                                                 "vlan tpid 0x%x with vlan_id %d is not supported for DROP rule.\n",
++                                                 ntohs(match.key->vlan_tpid),
++                                                 match.key->vlan_id);
++                                      return -EOPNOTSUPP;
++                              }
++                      }
++              }
++
+               if (match.mask->vlan_id ||
+                   match.mask->vlan_dei ||
+                   match.mask->vlan_priority) {
+-- 
+2.39.2
+
diff --git a/queue-6.4/openrisc-union-fpcsr-and-oldmask-in-sigcontext-to-un.patch b/queue-6.4/openrisc-union-fpcsr-and-oldmask-in-sigcontext-to-un.patch

new file mode 100644 (file)

index 0000000..cdd46db
--- /dev/null
+++ b/queue-6.4/openrisc-union-fpcsr-and-oldmask-in-sigcontext-to-un.patch
@@ -0,0 +1,75 @@
+From 696ca48d335bae7910f2e1abae3078b90a63c6aa Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 28 Jun 2023 17:54:40 +0100
+Subject: openrisc: Union fpcsr and oldmask in sigcontext to unbreak userspace
+ ABI
+
+From: Stafford Horne <shorne@gmail.com>
+
+[ Upstream commit dceaafd668812115037fc13a1893d068b7b880f5 ]
+
+With commit 27267655c531 ("openrisc: Support floating point user api") I
+added an entry to the struct sigcontext which caused an unwanted change
+to the userspace ABI.
+
+To fix this we use the previously unused oldmask field space for the
+floating point fpcsr state.  We do this with a union to restore the ABI
+back to the pre kernel v6.4 ABI and keep API compatibility.
+
+This does mean if there is some code somewhere that is setting oldmask
+in an OpenRISC specific userspace sighandler it would end up setting the
+floating point register status, but I think it's unlikely as oldmask was
+never functional before.
+
+Fixes: 27267655c531 ("openrisc: Support floating point user api")
+Reported-by: Szabolcs Nagy <nsz@port70.net>
+Closes: https://lore.kernel.org/openrisc/20230626213840.GA1236108@port70.net/
+Signed-off-by: Stafford Horne <shorne@gmail.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/openrisc/include/uapi/asm/sigcontext.h | 6 ++++--
+ arch/openrisc/kernel/signal.c               | 4 ++--
+ 2 files changed, 6 insertions(+), 4 deletions(-)
+
+diff --git a/arch/openrisc/include/uapi/asm/sigcontext.h b/arch/openrisc/include/uapi/asm/sigcontext.h
+index ca585e4af6b8e..e7ffb58ff58fb 100644
+--- a/arch/openrisc/include/uapi/asm/sigcontext.h
++++ b/arch/openrisc/include/uapi/asm/sigcontext.h
+@@ -28,8 +28,10 @@
+ 
+ struct sigcontext {
+       struct user_regs_struct regs;  /* needs to be first */
+-      struct __or1k_fpu_state fpu;
+-      unsigned long oldmask;
++      union {
++              unsigned long fpcsr;
++              unsigned long oldmask;  /* unused */
++      };
+ };
+ 
+ #endif /* __ASM_OPENRISC_SIGCONTEXT_H */
+diff --git a/arch/openrisc/kernel/signal.c b/arch/openrisc/kernel/signal.c
+index 4664a18f0787d..2e7257a433ff4 100644
+--- a/arch/openrisc/kernel/signal.c
++++ b/arch/openrisc/kernel/signal.c
+@@ -50,7 +50,7 @@ static int restore_sigcontext(struct pt_regs *regs,
+       err |= __copy_from_user(regs, sc->regs.gpr, 32 * sizeof(unsigned long));
+       err |= __copy_from_user(&regs->pc, &sc->regs.pc, sizeof(unsigned long));
+       err |= __copy_from_user(&regs->sr, &sc->regs.sr, sizeof(unsigned long));
+-      err |= __copy_from_user(&regs->fpcsr, &sc->fpu.fpcsr, sizeof(unsigned long));
++      err |= __copy_from_user(&regs->fpcsr, &sc->fpcsr, sizeof(unsigned long));
+ 
+       /* make sure the SM-bit is cleared so user-mode cannot fool us */
+       regs->sr &= ~SPR_SR_SM;
+@@ -113,7 +113,7 @@ static int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
+       err |= __copy_to_user(sc->regs.gpr, regs, 32 * sizeof(unsigned long));
+       err |= __copy_to_user(&sc->regs.pc, &regs->pc, sizeof(unsigned long));
+       err |= __copy_to_user(&sc->regs.sr, &regs->sr, sizeof(unsigned long));
+-      err |= __copy_to_user(&sc->fpu.fpcsr, &regs->fpcsr, sizeof(unsigned long));
++      err |= __copy_to_user(&sc->fpcsr, &regs->fpcsr, sizeof(unsigned long));
+ 
+       return err;
+ }
+-- 
+2.39.2
+
diff --git a/queue-6.4/platform-x86-wmi-break-possible-infinite-loop-when-p.patch b/queue-6.4/platform-x86-wmi-break-possible-infinite-loop-when-p.patch

new file mode 100644 (file)

index 0000000..29b7fc1
--- /dev/null
+++ b/queue-6.4/platform-x86-wmi-break-possible-infinite-loop-when-p.patch
@@ -0,0 +1,84 @@
+From fe769e2ae656339ebc5ea9b89b775402633f4d2d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 21 Jun 2023 18:11:54 +0300
+Subject: platform/x86: wmi: Break possible infinite loop when parsing GUID
+
+From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+
+[ Upstream commit 028e6e204ace1f080cfeacd72c50397eb8ae8883 ]
+
+The while-loop may break on one of the two conditions, either ID string
+is empty or GUID matches. The second one, may never be reached if the
+parsed string is not correct GUID. In such a case the loop will never
+advance to check the next ID.
+
+Break possible infinite loop by factoring out guid_parse_and_compare()
+helper which may be moved to the generic header for everyone later on
+and preventing from similar mistake in the future.
+
+Interestingly that firstly it appeared when WMI was turned into a bus
+driver, but later when duplicated GUIDs were checked, the while-loop
+has been replaced by for-loop and hence no mistake made again.
+
+Fixes: a48e23385fcf ("platform/x86: wmi: add context pointer field to struct wmi_device_id")
+Fixes: 844af950da94 ("platform/x86: wmi: Turn WMI into a bus driver")
+Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Link: https://lore.kernel.org/r/20230621151155.78279-1-andriy.shevchenko@linux.intel.com
+Tested-by: Armin Wolf <W_Armin@gmx.de>
+Reviewed-by: Hans de Goede <hdegoede@redhat.com>
+Signed-off-by: Hans de Goede <hdegoede@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/platform/x86/wmi.c | 22 ++++++++++++----------
+ 1 file changed, 12 insertions(+), 10 deletions(-)
+
+diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c
+index d81319a502efc..e1a3bfeeed529 100644
+--- a/drivers/platform/x86/wmi.c
++++ b/drivers/platform/x86/wmi.c
+@@ -136,6 +136,16 @@ static acpi_status find_guid(const char *guid_string, struct wmi_block **out)
+       return AE_NOT_FOUND;
+ }
+ 
++static bool guid_parse_and_compare(const char *string, const guid_t *guid)
++{
++      guid_t guid_input;
++
++      if (guid_parse(string, &guid_input))
++              return false;
++
++      return guid_equal(&guid_input, guid);
++}
++
+ static const void *find_guid_context(struct wmi_block *wblock,
+                                    struct wmi_driver *wdriver)
+ {
+@@ -146,11 +156,7 @@ static const void *find_guid_context(struct wmi_block *wblock,
+               return NULL;
+ 
+       while (*id->guid_string) {
+-              guid_t guid_input;
+-
+-              if (guid_parse(id->guid_string, &guid_input))
+-                      continue;
+-              if (guid_equal(&wblock->gblock.guid, &guid_input))
++              if (guid_parse_and_compare(id->guid_string, &wblock->gblock.guid))
+                       return id->context;
+               id++;
+       }
+@@ -827,11 +833,7 @@ static int wmi_dev_match(struct device *dev, struct device_driver *driver)
+               return 0;
+ 
+       while (*id->guid_string) {
+-              guid_t driver_guid;
+-
+-              if (WARN_ON(guid_parse(id->guid_string, &driver_guid)))
+-                      continue;
+-              if (guid_equal(&driver_guid, &wblock->gblock.guid))
++              if (guid_parse_and_compare(id->guid_string, &wblock->gblock.guid))
+                       return 1;
+ 
+               id++;
+-- 
+2.39.2
+
diff --git a/queue-6.4/riscv-bpf-fix-inconsistent-jit-image-generation.patch b/queue-6.4/riscv-bpf-fix-inconsistent-jit-image-generation.patch

new file mode 100644 (file)

index 0000000..1a0e56b
--- /dev/null
+++ b/queue-6.4/riscv-bpf-fix-inconsistent-jit-image-generation.patch
@@ -0,0 +1,137 @@
+From 7c62c3b0f47ff2d49c0b22727b5753f5b0e98584 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 10 Jul 2023 09:41:31 +0200
+Subject: riscv, bpf: Fix inconsistent JIT image generation
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Björn Töpel <bjorn@rivosinc.com>
+
+[ Upstream commit c56fb2aab23505bb7160d06097c8de100b82b851 ]
+
+In order to generate the prologue and epilogue, the BPF JIT needs to
+know which registers that are clobbered. Therefore, the during
+pre-final passes, the prologue is generated after the body of the
+program body-prologue-epilogue. Then, in the final pass, a proper
+prologue-body-epilogue JITted image is generated.
+
+This scheme has worked most of the time. However, for some large
+programs with many jumps, e.g. the test_kmod.sh BPF selftest with
+hardening enabled (blinding constants), this has shown to be
+incorrect. For the final pass, when the proper prologue-body-epilogue
+is generated, the image has not converged. This will lead to that the
+final image will have incorrect jump offsets. The following is an
+excerpt from an incorrect image:
+
+  | ...
+  |     3b8:       00c50663                beq     a0,a2,3c4 <.text+0x3c4>
+  |     3bc:       0020e317                auipc   t1,0x20e
+  |     3c0:       49630067                jalr    zero,1174(t1) # 20e852 <.text+0x20e852>
+  | ...
+  |  20e84c:       8796                    c.mv    a5,t0
+  |  20e84e:       6422                    c.ldsp  s0,8(sp)    # Epilogue start
+  |  20e850:       6141                    c.addi16sp      sp,16
+  |  20e852:       853e                    c.mv    a0,a5       # Incorrect jump target
+  |  20e854:       8082                    c.jr    ra
+
+The image has shrunk, and the epilogue offset is incorrect in the
+final pass.
+
+Correct the problem by always generating proper prologue-body-epilogue
+outputs, which means that the first pass will only generate the body
+to track what registers that are touched.
+
+Fixes: 2353ecc6f91f ("bpf, riscv: add BPF JIT for RV64G")
+Signed-off-by: Björn Töpel <bjorn@rivosinc.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Link: https://lore.kernel.org/bpf/20230710074131.19596-1-bjorn@kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/riscv/net/bpf_jit.h      |  6 +++---
+ arch/riscv/net/bpf_jit_core.c | 19 +++++++++++++------
+ 2 files changed, 16 insertions(+), 9 deletions(-)
+
+diff --git a/arch/riscv/net/bpf_jit.h b/arch/riscv/net/bpf_jit.h
+index bf9802a63061d..2717f54904287 100644
+--- a/arch/riscv/net/bpf_jit.h
++++ b/arch/riscv/net/bpf_jit.h
+@@ -69,7 +69,7 @@ struct rv_jit_context {
+       struct bpf_prog *prog;
+       u16 *insns;             /* RV insns */
+       int ninsns;
+-      int body_len;
++      int prologue_len;
+       int epilogue_offset;
+       int *offset;            /* BPF to RV */
+       int nexentries;
+@@ -216,8 +216,8 @@ static inline int rv_offset(int insn, int off, struct rv_jit_context *ctx)
+       int from, to;
+ 
+       off++; /* BPF branch is from PC+1, RV is from PC */
+-      from = (insn > 0) ? ctx->offset[insn - 1] : 0;
+-      to = (insn + off > 0) ? ctx->offset[insn + off - 1] : 0;
++      from = (insn > 0) ? ctx->offset[insn - 1] : ctx->prologue_len;
++      to = (insn + off > 0) ? ctx->offset[insn + off - 1] : ctx->prologue_len;
+       return ninsns_rvoff(to - from);
+ }
+ 
+diff --git a/arch/riscv/net/bpf_jit_core.c b/arch/riscv/net/bpf_jit_core.c
+index 737baf8715da7..7a26a3e1c73cf 100644
+--- a/arch/riscv/net/bpf_jit_core.c
++++ b/arch/riscv/net/bpf_jit_core.c
+@@ -44,7 +44,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
+       unsigned int prog_size = 0, extable_size = 0;
+       bool tmp_blinded = false, extra_pass = false;
+       struct bpf_prog *tmp, *orig_prog = prog;
+-      int pass = 0, prev_ninsns = 0, prologue_len, i;
++      int pass = 0, prev_ninsns = 0, i;
+       struct rv_jit_data *jit_data;
+       struct rv_jit_context *ctx;
+ 
+@@ -83,6 +83,12 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
+               prog = orig_prog;
+               goto out_offset;
+       }
++
++      if (build_body(ctx, extra_pass, NULL)) {
++              prog = orig_prog;
++              goto out_offset;
++      }
++
+       for (i = 0; i < prog->len; i++) {
+               prev_ninsns += 32;
+               ctx->offset[i] = prev_ninsns;
+@@ -91,12 +97,15 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
+       for (i = 0; i < NR_JIT_ITERATIONS; i++) {
+               pass++;
+               ctx->ninsns = 0;
++
++              bpf_jit_build_prologue(ctx);
++              ctx->prologue_len = ctx->ninsns;
++
+               if (build_body(ctx, extra_pass, ctx->offset)) {
+                       prog = orig_prog;
+                       goto out_offset;
+               }
+-              ctx->body_len = ctx->ninsns;
+-              bpf_jit_build_prologue(ctx);
++
+               ctx->epilogue_offset = ctx->ninsns;
+               bpf_jit_build_epilogue(ctx);
+ 
+@@ -162,10 +171,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
+ 
+       if (!prog->is_func || extra_pass) {
+               bpf_jit_binary_lock_ro(jit_data->header);
+-              prologue_len = ctx->epilogue_offset - ctx->body_len;
+               for (i = 0; i < prog->len; i++)
+-                      ctx->offset[i] = ninsns_rvoff(prologue_len +
+-                                                    ctx->offset[i]);
++                      ctx->offset[i] = ninsns_rvoff(ctx->offset[i]);
+               bpf_prog_fill_jited_linfo(prog, ctx->offset);
+ out_offset:
+               kfree(ctx->offset);
+-- 
+2.39.2
+
diff --git a/queue-6.4/riscv-mm-fix-truncation-warning-on-rv32.patch b/queue-6.4/riscv-mm-fix-truncation-warning-on-rv32.patch

new file mode 100644 (file)

index 0000000..7075d9b
--- /dev/null
+++ b/queue-6.4/riscv-mm-fix-truncation-warning-on-rv32.patch
@@ -0,0 +1,46 @@
+From 0711941654500c97465899b2d6b84b346d829f42 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 10 Jul 2023 01:10:36 +0800
+Subject: riscv: mm: fix truncation warning on RV32
+
+From: Jisheng Zhang <jszhang@kernel.org>
+
+[ Upstream commit b690e266dae2f85f4dfea21fa6a05e3500a51054 ]
+
+lkp reports below sparse warning when building for RV32:
+arch/riscv/mm/init.c:1204:48: sparse: warning: cast truncates bits from
+constant value (100000000 becomes 0)
+
+IMO, the reason we didn't see this truncates bug in real world is "0"
+means MEMBLOCK_ALLOC_ACCESSIBLE in memblock and there's no RV32 HW
+with more than 4GB memory.
+
+Fix it anyway to make sparse happy.
+
+Fixes: decf89f86ecd ("riscv: try to allocate crashkern region from 32bit addressible memory")
+Signed-off-by: Jisheng Zhang <jszhang@kernel.org>
+Reported-by: kernel test robot <lkp@intel.com>
+Closes: https://lore.kernel.org/oe-kbuild-all/202306080034.SLiCiOMn-lkp@intel.com/
+Link: https://lore.kernel.org/r/20230709171036.1906-1-jszhang@kernel.org
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/riscv/mm/init.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
+index 1306149aad57a..93e7bb9f67fd4 100644
+--- a/arch/riscv/mm/init.c
++++ b/arch/riscv/mm/init.c
+@@ -1346,7 +1346,7 @@ static void __init reserve_crashkernel(void)
+        */
+       crash_base = memblock_phys_alloc_range(crash_size, PMD_SIZE,
+                                              search_start,
+-                                             min(search_end, (unsigned long) SZ_4G));
++                                             min(search_end, (unsigned long)(SZ_4G - 1)));
+       if (crash_base == 0) {
+               /* Try again without restricting region to 32bit addressible memory */
+               crash_base = memblock_phys_alloc_range(crash_size, PMD_SIZE,
+-- 
+2.39.2
+
diff --git a/queue-6.4/s390-ism-do-not-unregister-clients-with-registered-d.patch b/queue-6.4/s390-ism-do-not-unregister-clients-with-registered-d.patch

new file mode 100644 (file)

index 0000000..e584f41
--- /dev/null
+++ b/queue-6.4/s390-ism-do-not-unregister-clients-with-registered-d.patch
@@ -0,0 +1,74 @@
+From 8c9160ca105b8a4c5e9cc7c58c474696aba788c3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 7 Jul 2023 12:56:22 +0200
+Subject: s390/ism: Do not unregister clients with registered DMBs
+
+From: Niklas Schnelle <schnelle@linux.ibm.com>
+
+[ Upstream commit 266deeea34ffd28c6b6a63edf2af9b5a07161c24 ]
+
+When ism_unregister_client() is called but the client still has DMBs
+registered it returns -EBUSY and prints an error. This only happens
+after the client has already been unregistered however. This is
+unexpected as the unregister claims to have failed. Furthermore as this
+implies a client bug a WARN() is more appropriate. Thus move the
+deregistration after the check and use WARN().
+
+Fixes: 89e7d2ba61b7 ("net/ism: Add new API for client registration")
+Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/s390/net/ism_drv.c | 23 +++++++++++++----------
+ 1 file changed, 13 insertions(+), 10 deletions(-)
+
+diff --git a/drivers/s390/net/ism_drv.c b/drivers/s390/net/ism_drv.c
+index d65571b3d5cad..6db5cf7e901f9 100644
+--- a/drivers/s390/net/ism_drv.c
++++ b/drivers/s390/net/ism_drv.c
+@@ -96,29 +96,32 @@ int ism_unregister_client(struct ism_client *client)
+       int rc = 0;
+ 
+       mutex_lock(&ism_dev_list.mutex);
+-      mutex_lock(&clients_lock);
+-      clients[client->id] = NULL;
+-      if (client->id + 1 == max_client)
+-              max_client--;
+-      mutex_unlock(&clients_lock);
+       list_for_each_entry(ism, &ism_dev_list.list, list) {
+               spin_lock_irqsave(&ism->lock, flags);
+               /* Stop forwarding IRQs and events */
+               ism->subs[client->id] = NULL;
+               for (int i = 0; i < ISM_NR_DMBS; ++i) {
+                       if (ism->sba_client_arr[i] == client->id) {
+-                              pr_err("%s: attempt to unregister client '%s'"
+-                                     "with registered dmb(s)\n", __func__,
+-                                     client->name);
++                              WARN(1, "%s: attempt to unregister '%s' with registered dmb(s)\n",
++                                   __func__, client->name);
+                               rc = -EBUSY;
+-                              goto out;
++                              goto err_reg_dmb;
+                       }
+               }
+               spin_unlock_irqrestore(&ism->lock, flags);
+       }
+-out:
+       mutex_unlock(&ism_dev_list.mutex);
+ 
++      mutex_lock(&clients_lock);
++      clients[client->id] = NULL;
++      if (client->id + 1 == max_client)
++              max_client--;
++      mutex_unlock(&clients_lock);
++      return rc;
++
++err_reg_dmb:
++      spin_unlock_irqrestore(&ism->lock, flags);
++      mutex_unlock(&ism_dev_list.mutex);
+       return rc;
+ }
+ EXPORT_SYMBOL_GPL(ism_unregister_client);
+-- 
+2.39.2
+
diff --git a/queue-6.4/s390-ism-fix-and-simplify-add-remove-callback-handli.patch b/queue-6.4/s390-ism-fix-and-simplify-add-remove-callback-handli.patch

new file mode 100644 (file)

index 0000000..9c19dcc
--- /dev/null
+++ b/queue-6.4/s390-ism-fix-and-simplify-add-remove-callback-handli.patch
@@ -0,0 +1,242 @@
+From 82e901cb9437e020d30429e55f1be39ce1457fc7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 7 Jul 2023 12:56:21 +0200
+Subject: s390/ism: Fix and simplify add()/remove() callback handling
+
+From: Niklas Schnelle <schnelle@linux.ibm.com>
+
+[ Upstream commit 76631ffa2fd2d45bae5ad717eef716b94144e0e7 ]
+
+Previously the clients_lock was protecting the clients array against
+concurrent addition/removal of clients but was also accessed from IRQ
+context. This meant that it had to be a spinlock and that the add() and
+remove() callbacks in which clients need to do allocation and take
+mutexes can't be called under the clients_lock. To work around this these
+callbacks were moved to workqueues. This not only introduced significant
+complexity but is also subtly broken in at least one way.
+
+In ism_dev_init() and ism_dev_exit() clients[i]->tgt_ism is used to
+communicate the added/removed ISM device to the work function. While
+write access to client[i]->tgt_ism is protected by the clients_lock and
+the code waits that there is no pending add/remove work before and after
+setting clients[i]->tgt_ism this is not enough. The problem is that the
+wait happens based on per ISM device counters. Thus a concurrent
+ism_dev_init()/ism_dev_exit() for a different ISM device may overwrite
+a clients[i]->tgt_ism between unlocking the clients_lock and the
+subsequent wait for the work to finnish.
+
+Thankfully with the clients_lock no longer held in IRQ context it can be
+turned into a mutex which can be held during the calls to add()/remove()
+completely removing the need for the workqueues and the associated
+broken housekeeping including the per ISM device counters and the
+clients[i]->tgt_ism.
+
+Fixes: 89e7d2ba61b7 ("net/ism: Add new API for client registration")
+Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/s390/net/ism_drv.c | 86 +++++++++++---------------------------
+ include/linux/ism.h        |  6 ---
+ 2 files changed, 24 insertions(+), 68 deletions(-)
+
+diff --git a/drivers/s390/net/ism_drv.c b/drivers/s390/net/ism_drv.c
+index 216eb4b386286..d65571b3d5cad 100644
+--- a/drivers/s390/net/ism_drv.c
++++ b/drivers/s390/net/ism_drv.c
+@@ -36,7 +36,7 @@ static const struct smcd_ops ism_ops;
+ static struct ism_client *clients[MAX_CLIENTS];       /* use an array rather than */
+                                               /* a list for fast mapping  */
+ static u8 max_client;
+-static DEFINE_SPINLOCK(clients_lock);
++static DEFINE_MUTEX(clients_lock);
+ struct ism_dev_list {
+       struct list_head list;
+       struct mutex mutex; /* protects ism device list */
+@@ -59,11 +59,10 @@ static void ism_setup_forwarding(struct ism_client *client, struct ism_dev *ism)
+ int ism_register_client(struct ism_client *client)
+ {
+       struct ism_dev *ism;
+-      unsigned long flags;
+       int i, rc = -ENOSPC;
+ 
+       mutex_lock(&ism_dev_list.mutex);
+-      spin_lock_irqsave(&clients_lock, flags);
++      mutex_lock(&clients_lock);
+       for (i = 0; i < MAX_CLIENTS; ++i) {
+               if (!clients[i]) {
+                       clients[i] = client;
+@@ -74,7 +73,8 @@ int ism_register_client(struct ism_client *client)
+                       break;
+               }
+       }
+-      spin_unlock_irqrestore(&clients_lock, flags);
++      mutex_unlock(&clients_lock);
++
+       if (i < MAX_CLIENTS) {
+               /* initialize with all devices that we got so far */
+               list_for_each_entry(ism, &ism_dev_list.list, list) {
+@@ -96,11 +96,11 @@ int ism_unregister_client(struct ism_client *client)
+       int rc = 0;
+ 
+       mutex_lock(&ism_dev_list.mutex);
+-      spin_lock_irqsave(&clients_lock, flags);
++      mutex_lock(&clients_lock);
+       clients[client->id] = NULL;
+       if (client->id + 1 == max_client)
+               max_client--;
+-      spin_unlock_irqrestore(&clients_lock, flags);
++      mutex_unlock(&clients_lock);
+       list_for_each_entry(ism, &ism_dev_list.list, list) {
+               spin_lock_irqsave(&ism->lock, flags);
+               /* Stop forwarding IRQs and events */
+@@ -571,21 +571,9 @@ static u64 ism_get_local_gid(struct ism_dev *ism)
+       return ism->local_gid;
+ }
+ 
+-static void ism_dev_add_work_func(struct work_struct *work)
+-{
+-      struct ism_client *client = container_of(work, struct ism_client,
+-                                               add_work);
+-
+-      client->add(client->tgt_ism);
+-      ism_setup_forwarding(client, client->tgt_ism);
+-      atomic_dec(&client->tgt_ism->add_dev_cnt);
+-      wake_up(&client->tgt_ism->waitq);
+-}
+-
+ static int ism_dev_init(struct ism_dev *ism)
+ {
+       struct pci_dev *pdev = ism->pdev;
+-      unsigned long flags;
+       int i, ret;
+ 
+       ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI);
+@@ -618,25 +606,16 @@ static int ism_dev_init(struct ism_dev *ism)
+               /* hardware is V2 capable */
+               ism_create_system_eid();
+ 
+-      init_waitqueue_head(&ism->waitq);
+-      atomic_set(&ism->free_clients_cnt, 0);
+-      atomic_set(&ism->add_dev_cnt, 0);
+-
+-      wait_event(ism->waitq, !atomic_read(&ism->add_dev_cnt));
+-      spin_lock_irqsave(&clients_lock, flags);
+-      for (i = 0; i < max_client; ++i)
++      mutex_lock(&ism_dev_list.mutex);
++      mutex_lock(&clients_lock);
++      for (i = 0; i < max_client; ++i) {
+               if (clients[i]) {
+-                      INIT_WORK(&clients[i]->add_work,
+-                                ism_dev_add_work_func);
+-                      clients[i]->tgt_ism = ism;
+-                      atomic_inc(&ism->add_dev_cnt);
+-                      schedule_work(&clients[i]->add_work);
++                      clients[i]->add(ism);
++                      ism_setup_forwarding(clients[i], ism);
+               }
+-      spin_unlock_irqrestore(&clients_lock, flags);
+-
+-      wait_event(ism->waitq, !atomic_read(&ism->add_dev_cnt));
++      }
++      mutex_unlock(&clients_lock);
+ 
+-      mutex_lock(&ism_dev_list.mutex);
+       list_add(&ism->list, &ism_dev_list.list);
+       mutex_unlock(&ism_dev_list.mutex);
+ 
+@@ -711,40 +690,24 @@ static int ism_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+       return ret;
+ }
+ 
+-static void ism_dev_remove_work_func(struct work_struct *work)
+-{
+-      struct ism_client *client = container_of(work, struct ism_client,
+-                                               remove_work);
+-      unsigned long flags;
+-
+-      spin_lock_irqsave(&client->tgt_ism->lock, flags);
+-      client->tgt_ism->subs[client->id] = NULL;
+-      spin_unlock_irqrestore(&client->tgt_ism->lock, flags);
+-      client->remove(client->tgt_ism);
+-      atomic_dec(&client->tgt_ism->free_clients_cnt);
+-      wake_up(&client->tgt_ism->waitq);
+-}
+-
+-/* Callers must hold ism_dev_list.mutex */
+ static void ism_dev_exit(struct ism_dev *ism)
+ {
+       struct pci_dev *pdev = ism->pdev;
+       unsigned long flags;
+       int i;
+ 
+-      wait_event(ism->waitq, !atomic_read(&ism->free_clients_cnt));
+-      spin_lock_irqsave(&clients_lock, flags);
++      spin_lock_irqsave(&ism->lock, flags);
+       for (i = 0; i < max_client; ++i)
+-              if (clients[i]) {
+-                      INIT_WORK(&clients[i]->remove_work,
+-                                ism_dev_remove_work_func);
+-                      clients[i]->tgt_ism = ism;
+-                      atomic_inc(&ism->free_clients_cnt);
+-                      schedule_work(&clients[i]->remove_work);
+-              }
+-      spin_unlock_irqrestore(&clients_lock, flags);
++              ism->subs[i] = NULL;
++      spin_unlock_irqrestore(&ism->lock, flags);
+ 
+-      wait_event(ism->waitq, !atomic_read(&ism->free_clients_cnt));
++      mutex_lock(&ism_dev_list.mutex);
++      mutex_lock(&clients_lock);
++      for (i = 0; i < max_client; ++i) {
++              if (clients[i])
++                      clients[i]->remove(ism);
++      }
++      mutex_unlock(&clients_lock);
+ 
+       if (SYSTEM_EID.serial_number[0] != '0' ||
+           SYSTEM_EID.type[0] != '0')
+@@ -755,15 +718,14 @@ static void ism_dev_exit(struct ism_dev *ism)
+       kfree(ism->sba_client_arr);
+       pci_free_irq_vectors(pdev);
+       list_del_init(&ism->list);
++      mutex_unlock(&ism_dev_list.mutex);
+ }
+ 
+ static void ism_remove(struct pci_dev *pdev)
+ {
+       struct ism_dev *ism = dev_get_drvdata(&pdev->dev);
+ 
+-      mutex_lock(&ism_dev_list.mutex);
+       ism_dev_exit(ism);
+-      mutex_unlock(&ism_dev_list.mutex);
+ 
+       pci_release_mem_regions(pdev);
+       pci_disable_device(pdev);
+diff --git a/include/linux/ism.h b/include/linux/ism.h
+index 5160d47e5ea9e..9a4c204df3da1 100644
+--- a/include/linux/ism.h
++++ b/include/linux/ism.h
+@@ -45,9 +45,6 @@ struct ism_dev {
+       int ieq_idx;
+ 
+       struct ism_client *subs[MAX_CLIENTS];
+-      atomic_t free_clients_cnt;
+-      atomic_t add_dev_cnt;
+-      wait_queue_head_t waitq;
+ };
+ 
+ struct ism_event {
+@@ -69,9 +66,6 @@ struct ism_client {
+        */
+       void (*handle_irq)(struct ism_dev *dev, unsigned int bit, u16 dmbemask);
+       /* Private area - don't touch! */
+-      struct work_struct remove_work;
+-      struct work_struct add_work;
+-      struct ism_dev *tgt_ism;
+       u8 id;
+ };
+ 
+-- 
+2.39.2
+
diff --git a/queue-6.4/s390-ism-fix-locking-for-forwarding-of-irqs-and-even.patch b/queue-6.4/s390-ism-fix-locking-for-forwarding-of-irqs-and-even.patch

new file mode 100644 (file)

index 0000000..ac08f13
--- /dev/null
+++ b/queue-6.4/s390-ism-fix-locking-for-forwarding-of-irqs-and-even.patch
@@ -0,0 +1,209 @@
+From 120b472bc2a8e8531d07e4cbb70b758a9ee6a46c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 7 Jul 2023 12:56:20 +0200
+Subject: s390/ism: Fix locking for forwarding of IRQs and events to clients
+
+From: Niklas Schnelle <schnelle@linux.ibm.com>
+
+[ Upstream commit 6b5c13b591d753c6022fbd12f8c0c0a9a07fc065 ]
+
+The clients array references all registered clients and is protected by
+the clients_lock. Besides its use as general list of clients the clients
+array is accessed in ism_handle_irq() to forward ISM device events to
+clients.
+
+While the clients_lock is taken in the IRQ handler when calling
+handle_event() it is however incorrectly not held during the
+client->handle_irq() call and for the preceding clients[] access leaving
+it unprotected against concurrent client (un-)registration.
+
+Furthermore the accesses to ism->sba_client_arr[] in ism_register_dmb()
+and ism_unregister_dmb() are not protected by any lock. This is
+especially problematic as the client ID from the ism->sba_client_arr[]
+is not checked against NO_CLIENT and neither is the client pointer
+checked.
+
+Instead of expanding the use of the clients_lock further add a separate
+array in struct ism_dev which references clients subscribed to the
+device's events and IRQs. This array is protected by ism->lock which is
+already taken in ism_handle_irq() and can be taken outside the IRQ
+handler when adding/removing subscribers or the accessing
+ism->sba_client_arr[]. This also means that the clients_lock is no
+longer taken in IRQ context.
+
+Fixes: 89e7d2ba61b7 ("net/ism: Add new API for client registration")
+Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
+Reviewed-by: Alexandra Winter <wintera@linux.ibm.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/s390/net/ism_drv.c | 44 +++++++++++++++++++++++++++++++-------
+ include/linux/ism.h        |  1 +
+ 2 files changed, 37 insertions(+), 8 deletions(-)
+
+diff --git a/drivers/s390/net/ism_drv.c b/drivers/s390/net/ism_drv.c
+index c2096e4bba319..216eb4b386286 100644
+--- a/drivers/s390/net/ism_drv.c
++++ b/drivers/s390/net/ism_drv.c
+@@ -47,6 +47,15 @@ static struct ism_dev_list ism_dev_list = {
+       .mutex = __MUTEX_INITIALIZER(ism_dev_list.mutex),
+ };
+ 
++static void ism_setup_forwarding(struct ism_client *client, struct ism_dev *ism)
++{
++      unsigned long flags;
++
++      spin_lock_irqsave(&ism->lock, flags);
++      ism->subs[client->id] = client;
++      spin_unlock_irqrestore(&ism->lock, flags);
++}
++
+ int ism_register_client(struct ism_client *client)
+ {
+       struct ism_dev *ism;
+@@ -71,6 +80,7 @@ int ism_register_client(struct ism_client *client)
+               list_for_each_entry(ism, &ism_dev_list.list, list) {
+                       ism->priv[i] = NULL;
+                       client->add(ism);
++                      ism_setup_forwarding(client, ism);
+               }
+       }
+       mutex_unlock(&ism_dev_list.mutex);
+@@ -92,6 +102,9 @@ int ism_unregister_client(struct ism_client *client)
+               max_client--;
+       spin_unlock_irqrestore(&clients_lock, flags);
+       list_for_each_entry(ism, &ism_dev_list.list, list) {
++              spin_lock_irqsave(&ism->lock, flags);
++              /* Stop forwarding IRQs and events */
++              ism->subs[client->id] = NULL;
+               for (int i = 0; i < ISM_NR_DMBS; ++i) {
+                       if (ism->sba_client_arr[i] == client->id) {
+                               pr_err("%s: attempt to unregister client '%s'"
+@@ -101,6 +114,7 @@ int ism_unregister_client(struct ism_client *client)
+                               goto out;
+                       }
+               }
++              spin_unlock_irqrestore(&ism->lock, flags);
+       }
+ out:
+       mutex_unlock(&ism_dev_list.mutex);
+@@ -328,6 +342,7 @@ int ism_register_dmb(struct ism_dev *ism, struct ism_dmb *dmb,
+                    struct ism_client *client)
+ {
+       union ism_reg_dmb cmd;
++      unsigned long flags;
+       int ret;
+ 
+       ret = ism_alloc_dmb(ism, dmb);
+@@ -351,7 +366,9 @@ int ism_register_dmb(struct ism_dev *ism, struct ism_dmb *dmb,
+               goto out;
+       }
+       dmb->dmb_tok = cmd.response.dmb_tok;
++      spin_lock_irqsave(&ism->lock, flags);
+       ism->sba_client_arr[dmb->sba_idx - ISM_DMB_BIT_OFFSET] = client->id;
++      spin_unlock_irqrestore(&ism->lock, flags);
+ out:
+       return ret;
+ }
+@@ -360,6 +377,7 @@ EXPORT_SYMBOL_GPL(ism_register_dmb);
+ int ism_unregister_dmb(struct ism_dev *ism, struct ism_dmb *dmb)
+ {
+       union ism_unreg_dmb cmd;
++      unsigned long flags;
+       int ret;
+ 
+       memset(&cmd, 0, sizeof(cmd));
+@@ -368,7 +386,9 @@ int ism_unregister_dmb(struct ism_dev *ism, struct ism_dmb *dmb)
+ 
+       cmd.request.dmb_tok = dmb->dmb_tok;
+ 
++      spin_lock_irqsave(&ism->lock, flags);
+       ism->sba_client_arr[dmb->sba_idx - ISM_DMB_BIT_OFFSET] = NO_CLIENT;
++      spin_unlock_irqrestore(&ism->lock, flags);
+ 
+       ret = ism_cmd(ism, &cmd);
+       if (ret && ret != ISM_ERROR)
+@@ -491,6 +511,7 @@ static u16 ism_get_chid(struct ism_dev *ism)
+ static void ism_handle_event(struct ism_dev *ism)
+ {
+       struct ism_event *entry;
++      struct ism_client *clt;
+       int i;
+ 
+       while ((ism->ieq_idx + 1) != READ_ONCE(ism->ieq->header.idx)) {
+@@ -499,21 +520,21 @@ static void ism_handle_event(struct ism_dev *ism)
+ 
+               entry = &ism->ieq->entry[ism->ieq_idx];
+               debug_event(ism_debug_info, 2, entry, sizeof(*entry));
+-              spin_lock(&clients_lock);
+-              for (i = 0; i < max_client; ++i)
+-                      if (clients[i])
+-                              clients[i]->handle_event(ism, entry);
+-              spin_unlock(&clients_lock);
++              for (i = 0; i < max_client; ++i) {
++                      clt = ism->subs[i];
++                      if (clt)
++                              clt->handle_event(ism, entry);
++              }
+       }
+ }
+ 
+ static irqreturn_t ism_handle_irq(int irq, void *data)
+ {
+       struct ism_dev *ism = data;
+-      struct ism_client *clt;
+       unsigned long bit, end;
+       unsigned long *bv;
+       u16 dmbemask;
++      u8 client_id;
+ 
+       bv = (void *) &ism->sba->dmb_bits[ISM_DMB_WORD_OFFSET];
+       end = sizeof(ism->sba->dmb_bits) * BITS_PER_BYTE - ISM_DMB_BIT_OFFSET;
+@@ -530,8 +551,10 @@ static irqreturn_t ism_handle_irq(int irq, void *data)
+               dmbemask = ism->sba->dmbe_mask[bit + ISM_DMB_BIT_OFFSET];
+               ism->sba->dmbe_mask[bit + ISM_DMB_BIT_OFFSET] = 0;
+               barrier();
+-              clt = clients[ism->sba_client_arr[bit]];
+-              clt->handle_irq(ism, bit + ISM_DMB_BIT_OFFSET, dmbemask);
++              client_id = ism->sba_client_arr[bit];
++              if (unlikely(client_id == NO_CLIENT || !ism->subs[client_id]))
++                      continue;
++              ism->subs[client_id]->handle_irq(ism, bit + ISM_DMB_BIT_OFFSET, dmbemask);
+       }
+ 
+       if (ism->sba->e) {
+@@ -554,6 +577,7 @@ static void ism_dev_add_work_func(struct work_struct *work)
+                                                add_work);
+ 
+       client->add(client->tgt_ism);
++      ism_setup_forwarding(client, client->tgt_ism);
+       atomic_dec(&client->tgt_ism->add_dev_cnt);
+       wake_up(&client->tgt_ism->waitq);
+ }
+@@ -691,7 +715,11 @@ static void ism_dev_remove_work_func(struct work_struct *work)
+ {
+       struct ism_client *client = container_of(work, struct ism_client,
+                                                remove_work);
++      unsigned long flags;
+ 
++      spin_lock_irqsave(&client->tgt_ism->lock, flags);
++      client->tgt_ism->subs[client->id] = NULL;
++      spin_unlock_irqrestore(&client->tgt_ism->lock, flags);
+       client->remove(client->tgt_ism);
+       atomic_dec(&client->tgt_ism->free_clients_cnt);
+       wake_up(&client->tgt_ism->waitq);
+diff --git a/include/linux/ism.h b/include/linux/ism.h
+index ea2bcdae74012..5160d47e5ea9e 100644
+--- a/include/linux/ism.h
++++ b/include/linux/ism.h
+@@ -44,6 +44,7 @@ struct ism_dev {
+       u64 local_gid;
+       int ieq_idx;
+ 
++      struct ism_client *subs[MAX_CLIENTS];
+       atomic_t free_clients_cnt;
+       atomic_t add_dev_cnt;
+       wait_queue_head_t waitq;
+-- 
+2.39.2
+
diff --git a/queue-6.4/scsi-qla2xxx-fix-error-code-in-qla2x00_start_sp.patch b/queue-6.4/scsi-qla2xxx-fix-error-code-in-qla2x00_start_sp.patch

new file mode 100644 (file)

index 0000000..482d423
--- /dev/null
+++ b/queue-6.4/scsi-qla2xxx-fix-error-code-in-qla2x00_start_sp.patch
@@ -0,0 +1,38 @@
+From ba6f8103f9b47e25147279392d0190719036955e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 26 Jun 2023 13:58:47 +0300
+Subject: scsi: qla2xxx: Fix error code in qla2x00_start_sp()
+
+From: Dan Carpenter <dan.carpenter@linaro.org>
+
+[ Upstream commit e579b007eff3ff8d29d59d16214cd85fb9e573f7 ]
+
+This should be negative -EAGAIN instead of positive.  The callers treat
+non-zero error codes the same so it doesn't really impact runtime beyond
+some trivial differences to debug output.
+
+Fixes: 80676d054e5a ("scsi: qla2xxx: Fix session cleanup hang")
+Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
+Link: https://lore.kernel.org/r/49866d28-4cfe-47b0-842b-78f110e61aab@moroto.mountain
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/scsi/qla2xxx/qla_iocb.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c
+index b9b3e6f80ea9b..1ed13199f27ce 100644
+--- a/drivers/scsi/qla2xxx/qla_iocb.c
++++ b/drivers/scsi/qla2xxx/qla_iocb.c
+@@ -3892,7 +3892,7 @@ qla2x00_start_sp(srb_t *sp)
+ 
+       pkt = __qla2x00_alloc_iocbs(sp->qpair, sp);
+       if (!pkt) {
+-              rval = EAGAIN;
++              rval = -EAGAIN;
+               ql_log(ql_log_warn, vha, 0x700c,
+                   "qla2x00_alloc_iocbs failed.\n");
+               goto done;
+-- 
+2.39.2
+
diff --git a/queue-6.4/scsi-ufs-ufs-mediatek-add-dependency-for-reset_contr.patch b/queue-6.4/scsi-ufs-ufs-mediatek-add-dependency-for-reset_contr.patch

new file mode 100644 (file)

index 0000000..2be6cf5
--- /dev/null
+++ b/queue-6.4/scsi-ufs-ufs-mediatek-add-dependency-for-reset_contr.patch
@@ -0,0 +1,55 @@
+From 577164d03134b5cdfa2ba8c54c8c325857472318 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 30 Jun 2023 22:23:48 -0700
+Subject: scsi: ufs: ufs-mediatek: Add dependency for RESET_CONTROLLER
+
+From: Randy Dunlap <rdunlap@infradead.org>
+
+[ Upstream commit 89f7ef7f2b23b2a7b8ce346c23161916eae5b15c ]
+
+When RESET_CONTROLLER is not set, kconfig complains about missing
+dependencies for RESET_TI_SYSCON, so add the missing dependency just as is
+done above for SCSI_UFS_QCOM.
+
+Silences this kconfig warning:
+
+WARNING: unmet direct dependencies detected for RESET_TI_SYSCON
+  Depends on [n]: RESET_CONTROLLER [=n] && HAS_IOMEM [=y]
+  Selected by [m]:
+  - SCSI_UFS_MEDIATEK [=m] && SCSI_UFSHCD [=y] && SCSI_UFSHCD_PLATFORM [=y] && ARCH_MEDIATEK [=y]
+
+Fixes: de48898d0cb6 ("scsi: ufs-mediatek: Create reset control device_link")
+Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
+Link: lore.kernel.org/r/202306020859.1wHg9AaT-lkp@intel.com
+Link: https://lore.kernel.org/r/20230701052348.28046-1-rdunlap@infradead.org
+Cc: Stanley Chu <stanley.chu@mediatek.com>
+Cc: Peter Wang <peter.wang@mediatek.com>
+Cc: Paul Gazzillo <paul@pgazz.com>
+Cc: Necip Fazil Yildiran <fazilyildiran@gmail.com>
+Cc: linux-scsi@vger.kernel.org
+Cc: linux-arm-kernel@lists.infradead.org
+Cc: linux-mediatek@lists.infradead.org
+Cc: "James E.J. Bottomley" <jejb@linux.ibm.com>
+Cc: "Martin K. Petersen" <martin.petersen@oracle.com>
+Reported-by: kernel test robot <lkp@intel.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/ufs/host/Kconfig | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/ufs/host/Kconfig b/drivers/ufs/host/Kconfig
+index 8793e34335806..f11e98c9e6652 100644
+--- a/drivers/ufs/host/Kconfig
++++ b/drivers/ufs/host/Kconfig
+@@ -72,6 +72,7 @@ config SCSI_UFS_QCOM
+ config SCSI_UFS_MEDIATEK
+       tristate "Mediatek specific hooks to UFS controller platform driver"
+       depends on SCSI_UFSHCD_PLATFORM && ARCH_MEDIATEK
++      depends on RESET_CONTROLLER
+       select PHY_MTK_UFS
+       select RESET_TI_SYSCON
+       help
+-- 
+2.39.2
+
diff --git a/queue-6.4/series b/queue-6.4/series

new file mode 100644 (file)

index 0000000..5fa083d
--- /dev/null
+++ b/queue-6.4/series
@@ -0,0 +1,98 @@
+drm-panel-simple-add-connector_type-for-innolux_at04.patch
+drm-bridge-dw_hdmi-fix-connector-access-for-scdc.patch
+drm-bridge-ti-sn65dsi86-fix-auxiliary-bus-lifetime.patch
+swiotlb-always-set-the-number-of-areas-before-alloca.patch
+swiotlb-reduce-the-number-of-areas-to-match-actual-m.patch
+drm-panel-simple-add-powertip-ph800480t013-drm_displ.patch
+xen-virtio-fix-null-deref-when-a-bridge-of-pci-root-.patch
+netfilter-nf_tables-report-use-refcount-overflow.patch
+netfilter-conntrack-don-t-fold-port-numbers-into-add.patch
+ice-fix-max_rate-check-while-configuring-tx-rate-lim.patch
+ice-fix-tx-queue-rate-limit-when-tcs-are-configured.patch
+igc-add-condition-for-qbv_config_change_errors-count.patch
+igc-remove-delay-during-tx-ring-configuration.patch
+igc-add-igc_xdp_buff-wrapper-for-xdp_buff-in-driver.patch
+igc-add-xdp-hints-kfuncs-for-rx-hash.patch
+igc-fix-tx-hang-issue-when-qbv-gate-is-closed.patch
+net-mlx5e-fix-double-free-in-mlx5e_destroy_flow_tabl.patch
+net-mlx5e-fix-memory-leak-in-mlx5e_fs_tt_redirect_an.patch
+net-mlx5e-fix-memory-leak-in-mlx5e_ptp_open.patch
+net-mlx5e-rx-fix-flush-and-close-release-flow-of-reg.patch
+net-mlx5-register-a-unique-thermal-zone-per-device.patch
+net-mlx5e-check-for-not_ready-flag-state-after-locki.patch
+net-mlx5e-tc-ct-offload-ct-clear-only-once.patch
+net-mlx5-query-hca_cap_2-only-when-supported.patch
+net-mlx5e-rx-fix-page_pool-page-fragment-tracking-fo.patch
+igc-set-tp-bit-in-supported-and-advertising-fields-o.patch
+igc-include-the-length-type-field-and-vlan-tag-in-qu.patch
+igc-handle-pps-start-time-programming-for-past-time-.patch
+blk-crypto-use-dynamic-lock-class-for-blk_crypto_pro.patch
+scsi-qla2xxx-fix-error-code-in-qla2x00_start_sp.patch
+scsi-ufs-ufs-mediatek-add-dependency-for-reset_contr.patch
+bpf-fix-max-stack-depth-check-for-async-callbacks.patch
+net-mvneta-fix-txq_map-in-case-of-txq_number-1.patch
+net-dsa-felix-make-vsc9959_tas_guard_bands_update-vi.patch
+net-mscc-ocelot-fix-oversize-frame-dropping-for-pree.patch
+net-sched-cls_fw-fix-improper-refcount-update-leads-.patch
+gve-set-default-duplex-configuration-to-full.patch
+drm-fbdev-dma-fix-documented-default-preferred_bpp-v.patch
+octeontx2-af-promisc-enable-disable-through-mbox.patch
+octeontx2-af-move-validation-of-ptp-pointer-before-i.patch
+ionic-remove-warn_on-to-prevent-panic_on_warn.patch
+udp6-add-a-missing-call-into-udp_fail_queue_rcv_skb-.patch
+net-bgmac-postpone-turning-irqs-off-to-avoid-soc-han.patch
+net-prevent-skb-corruption-on-frag-list-segmentation.patch
+s390-ism-fix-locking-for-forwarding-of-irqs-and-even.patch
+s390-ism-fix-and-simplify-add-remove-callback-handli.patch
+s390-ism-do-not-unregister-clients-with-registered-d.patch
+icmp6-fix-null-ptr-deref-of-ip6_null_entry-rt6i_idev.patch
+udp6-fix-udp6_ehashfn-typo.patch
+ntb-idt-fix-error-handling-in-idt_pci_driver_init.patch
+ntb-amd-fix-error-handling-in-amd_ntb_pci_driver_ini.patch
+ntb-intel-fix-error-handling-in-intel_ntb_pci_driver.patch
+ntb-ntb_transport-fix-possible-memory-leak-while-dev.patch
+ntb-ntb_tool-add-check-for-devm_kcalloc.patch
+ipv6-addrconf-fix-a-potential-refcount-underflow-for.patch
+hid-hyperv-avoid-struct-memcpy-overrun-warning.patch
+net-dsa-qca8k-add-check-for-skb_copy.patch
+x86-fineibt-poison-endbr-at-0.patch
+platform-x86-wmi-break-possible-infinite-loop-when-p.patch
+net-sched-taprio-replace-tc_taprio_qopt_offload-enab.patch
+igc-rename-qbv_enable-to-taprio_offload_enable.patch
+igc-do-not-enable-taprio-offload-for-invalid-argumen.patch
+igc-handle-already-enabled-taprio-offload-for-baseti.patch
+kernel-trace-fix-cleanup-logic-of-enable_trace_eprob.patch
+fprobe-add-unlock-to-match-a-succeeded-ftrace_test_r.patch
+igc-no-strict-mode-in-pure-launchtime-cbs-offload.patch
+igc-fix-launchtime-before-start-of-cycle.patch
+igc-fix-inserting-of-empty-frame-for-launchtime.patch
+nvme-fix-the-nvme_id_ns_nvm_sts_mask-definition.patch
+openrisc-union-fpcsr-and-oldmask-in-sigcontext-to-un.patch
+riscv-bpf-fix-inconsistent-jit-image-generation.patch
+net-fec-remove-useless-fec_enet_reset_skb.patch
+net-fec-remove-last_bdp-from-fec_enet_txq_xmit_frame.patch
+net-fec-recycle-pages-for-transmitted-xdp-frames.patch
+net-fec-increase-the-size-of-tx-ring-and-update-tx_w.patch
+drm-i915-don-t-preserve-dpll_hw_state-for-slave-crtc.patch
+drm-i915-fix-one-wrong-caching-mode-enum-usage.patch
+net-dsa-removed-unneeded-of_node_put-in-felix_parse_.patch
+octeontx2-pf-add-additional-check-for-mcam-rules.patch
+erofs-avoid-useless-loops-in-z_erofs_pcluster_readmo.patch
+erofs-avoid-infinite-loop-in-z_erofs_do_read_page-wh.patch
+erofs-fix-fsdax-unavailability-for-chunk-based-regul.patch
+wifi-airo-avoid-uninitialized-warning-in-airo_get_ra.patch
+bpf-cpumap-fix-memory-leak-in-cpu_map_update_elem.patch
+xdp-use-trusted-arguments-in-xdp-hints-kfuncs.patch
+net-sched-flower-ensure-both-minimum-and-maximum-por.patch
+riscv-mm-fix-truncation-warning-on-rv32.patch
+drm-nouveau-disp-fix-hdmi-on-gt215.patch
+drm-nouveau-disp-g94-enable-hdmi.patch
+netdevsim-fix-uninitialized-data-in-nsim_dev_trap_fa.patch
+drm-nouveau-acr-abort-loading-acr-if-no-firmware-was.patch
+drm-nouveau-bring-back-blit-subchannel-for-pre-nv50-.patch
+net-sched-make-psched_mtu-rtnl-less-safe.patch
+net-txgbe-fix-eeprom-calculation-error.patch
+wifi-rtw89-debug-fix-error-code-in-rtw89_debug_priv_.patch
+net-sched-sch_qfq-reintroduce-lmax-bound-check-for-m.patch
+net-sched-sch_qfq-account-for-stab-overhead-in-qfq_e.patch
+nvme-pci-fix-dma-direction-of-unmapping-integrity-da.patch
diff --git a/queue-6.4/swiotlb-always-set-the-number-of-areas-before-alloca.patch b/queue-6.4/swiotlb-always-set-the-number-of-areas-before-alloca.patch

new file mode 100644 (file)

index 0000000..642f691
--- /dev/null
+++ b/queue-6.4/swiotlb-always-set-the-number-of-areas-before-alloca.patch
@@ -0,0 +1,85 @@
+From 3e0aead0125493b7ae55fcf63b7b58f4cd13b0aa Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 26 Jun 2023 15:01:03 +0200
+Subject: swiotlb: always set the number of areas before allocating the pool
+
+From: Petr Tesarik <petr.tesarik.ext@huawei.com>
+
+[ Upstream commit aabd12609f91155f26584508b01f548215cc3c0c ]
+
+The number of areas defaults to the number of possible CPUs. However, the
+total number of slots may have to be increased after adjusting the number
+of areas. Consequently, the number of areas must be determined before
+allocating the memory pool. This is even explained with a comment in
+swiotlb_init_remap(), but swiotlb_init_late() adjusts the number of areas
+after slots are already allocated. The areas may end up being smaller than
+IO_TLB_SEGSIZE, which breaks per-area locking.
+
+While fixing swiotlb_init_late(), move all relevant comments before the
+definition of swiotlb_adjust_nareas() and convert them to kernel-doc.
+
+Fixes: 20347fca71a3 ("swiotlb: split up the global swiotlb lock")
+Signed-off-by: Petr Tesarik <petr.tesarik.ext@huawei.com>
+Reviewed-by: Roberto Sassu <roberto.sassu@huawei.com>
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/dma/swiotlb.c | 19 +++++++++++--------
+ 1 file changed, 11 insertions(+), 8 deletions(-)
+
+diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
+index af2e304c672c4..16f53d8c51bcf 100644
+--- a/kernel/dma/swiotlb.c
++++ b/kernel/dma/swiotlb.c
+@@ -115,9 +115,16 @@ static bool round_up_default_nslabs(void)
+       return true;
+ }
+ 
++/**
++ * swiotlb_adjust_nareas() - adjust the number of areas and slots
++ * @nareas:   Desired number of areas. Zero is treated as 1.
++ *
++ * Adjust the default number of areas in a memory pool.
++ * The default size of the memory pool may also change to meet minimum area
++ * size requirements.
++ */
+ static void swiotlb_adjust_nareas(unsigned int nareas)
+ {
+-      /* use a single area when non is specified */
+       if (!nareas)
+               nareas = 1;
+       else if (!is_power_of_2(nareas))
+@@ -298,10 +305,6 @@ void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags,
+       if (swiotlb_force_disable)
+               return;
+ 
+-      /*
+-       * default_nslabs maybe changed when adjust area number.
+-       * So allocate bounce buffer after adjusting area number.
+-       */
+       if (!default_nareas)
+               swiotlb_adjust_nareas(num_possible_cpus());
+ 
+@@ -363,6 +366,9 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask,
+       if (swiotlb_force_disable)
+               return 0;
+ 
++      if (!default_nareas)
++              swiotlb_adjust_nareas(num_possible_cpus());
++
+ retry:
+       order = get_order(nslabs << IO_TLB_SHIFT);
+       nslabs = SLABS_PER_PAGE << order;
+@@ -397,9 +403,6 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask,
+                       (PAGE_SIZE << order) >> 20);
+       }
+ 
+-      if (!default_nareas)
+-              swiotlb_adjust_nareas(num_possible_cpus());
+-
+       area_order = get_order(array_size(sizeof(*mem->areas),
+               default_nareas));
+       mem->areas = (struct io_tlb_area *)
+-- 
+2.39.2
+
diff --git a/queue-6.4/swiotlb-reduce-the-number-of-areas-to-match-actual-m.patch b/queue-6.4/swiotlb-reduce-the-number-of-areas-to-match-actual-m.patch

new file mode 100644 (file)

index 0000000..1b1673f
--- /dev/null
+++ b/queue-6.4/swiotlb-reduce-the-number-of-areas-to-match-actual-m.patch
@@ -0,0 +1,114 @@
+From 0745d7a9e77343a03937e00b50782642061849c2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 26 Jun 2023 15:01:04 +0200
+Subject: swiotlb: reduce the number of areas to match actual memory pool size
+
+From: Petr Tesarik <petr.tesarik.ext@huawei.com>
+
+[ Upstream commit 8ac04063354a01a484d2e55d20ed1958aa0d3392 ]
+
+Although the desired size of the SWIOTLB memory pool is increased in
+swiotlb_adjust_nareas() to match the number of areas, the actual allocation
+may be smaller, which may require reducing the number of areas.
+
+For example, Xen uses swiotlb_init_late(), which in turn uses the page
+allocator. On x86, page size is 4 KiB and MAX_ORDER is 10 (1024 pages),
+resulting in a maximum memory pool size of 4 MiB. This corresponds to 2048
+slots of 2 KiB each. The minimum area size is 128 (IO_TLB_SEGSIZE),
+allowing at most 2048 / 128 = 16 areas.
+
+If num_possible_cpus() is greater than the maximum number of areas, areas
+are smaller than IO_TLB_SEGSIZE and contiguous groups of free slots will
+span multiple areas. When allocating and freeing slots, only one area will
+be properly locked, causing race conditions on the unlocked slots and
+ultimately data corruption, kernel hangs and crashes.
+
+Fixes: 20347fca71a3 ("swiotlb: split up the global swiotlb lock")
+Signed-off-by: Petr Tesarik <petr.tesarik.ext@huawei.com>
+Reviewed-by: Roberto Sassu <roberto.sassu@huawei.com>
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/dma/swiotlb.c | 27 ++++++++++++++++++++++++---
+ 1 file changed, 24 insertions(+), 3 deletions(-)
+
+diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
+index 16f53d8c51bcf..b1bbd6270ba79 100644
+--- a/kernel/dma/swiotlb.c
++++ b/kernel/dma/swiotlb.c
+@@ -138,6 +138,23 @@ static void swiotlb_adjust_nareas(unsigned int nareas)
+                       (default_nslabs << IO_TLB_SHIFT) >> 20);
+ }
+ 
++/**
++ * limit_nareas() - get the maximum number of areas for a given memory pool size
++ * @nareas:   Desired number of areas.
++ * @nslots:   Total number of slots in the memory pool.
++ *
++ * Limit the number of areas to the maximum possible number of areas in
++ * a memory pool of the given size.
++ *
++ * Return: Maximum possible number of areas.
++ */
++static unsigned int limit_nareas(unsigned int nareas, unsigned long nslots)
++{
++      if (nslots < nareas * IO_TLB_SEGSIZE)
++              return nslots / IO_TLB_SEGSIZE;
++      return nareas;
++}
++
+ static int __init
+ setup_io_tlb_npages(char *str)
+ {
+@@ -297,6 +314,7 @@ void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags,
+ {
+       struct io_tlb_mem *mem = &io_tlb_default_mem;
+       unsigned long nslabs;
++      unsigned int nareas;
+       size_t alloc_size;
+       void *tlb;
+ 
+@@ -309,10 +327,12 @@ void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags,
+               swiotlb_adjust_nareas(num_possible_cpus());
+ 
+       nslabs = default_nslabs;
++      nareas = limit_nareas(default_nareas, nslabs);
+       while ((tlb = swiotlb_memblock_alloc(nslabs, flags, remap)) == NULL) {
+               if (nslabs <= IO_TLB_MIN_SLABS)
+                       return;
+               nslabs = ALIGN(nslabs >> 1, IO_TLB_SEGSIZE);
++              nareas = limit_nareas(nareas, nslabs);
+       }
+ 
+       if (default_nslabs != nslabs) {
+@@ -358,6 +378,7 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask,
+ {
+       struct io_tlb_mem *mem = &io_tlb_default_mem;
+       unsigned long nslabs = ALIGN(size >> IO_TLB_SHIFT, IO_TLB_SEGSIZE);
++      unsigned int nareas;
+       unsigned char *vstart = NULL;
+       unsigned int order, area_order;
+       bool retried = false;
+@@ -403,8 +424,8 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask,
+                       (PAGE_SIZE << order) >> 20);
+       }
+ 
+-      area_order = get_order(array_size(sizeof(*mem->areas),
+-              default_nareas));
++      nareas = limit_nareas(default_nareas, nslabs);
++      area_order = get_order(array_size(sizeof(*mem->areas), nareas));
+       mem->areas = (struct io_tlb_area *)
+               __get_free_pages(GFP_KERNEL | __GFP_ZERO, area_order);
+       if (!mem->areas)
+@@ -418,7 +439,7 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask,
+       set_memory_decrypted((unsigned long)vstart,
+                            (nslabs << IO_TLB_SHIFT) >> PAGE_SHIFT);
+       swiotlb_init_io_tlb_mem(mem, virt_to_phys(vstart), nslabs, 0, true,
+-                              default_nareas);
++                              nareas);
+ 
+       swiotlb_print_info();
+       return 0;
+-- 
+2.39.2
+
diff --git a/queue-6.4/udp6-add-a-missing-call-into-udp_fail_queue_rcv_skb-.patch b/queue-6.4/udp6-add-a-missing-call-into-udp_fail_queue_rcv_skb-.patch

new file mode 100644 (file)

index 0000000..80cf831
--- /dev/null
+++ b/queue-6.4/udp6-add-a-missing-call-into-udp_fail_queue_rcv_skb-.patch
@@ -0,0 +1,57 @@
+From 8b9f1e208289fa8437d0478c29172a6ebf9f8da2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 6 Jul 2023 21:39:20 -0700
+Subject: udp6: add a missing call into udp_fail_queue_rcv_skb tracepoint
+
+From: Ivan Babrou <ivan@cloudflare.com>
+
+[ Upstream commit 8139dccd464aaee4a2c351506ff883733c6ca5a3 ]
+
+The tracepoint has existed for 12 years, but it only covered udp
+over the legacy IPv4 protocol. Having it enabled for udp6 removes
+the unnecessary difference in error visibility.
+
+Signed-off-by: Ivan Babrou <ivan@cloudflare.com>
+Fixes: 296f7ea75b45 ("udp: add tracepoints for queueing skb to rcvbuf")
+Acked-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/net-traces.c | 2 ++
+ net/ipv6/udp.c        | 2 ++
+ 2 files changed, 4 insertions(+)
+
+diff --git a/net/core/net-traces.c b/net/core/net-traces.c
+index 805b7385dd8da..6aef976bc1da2 100644
+--- a/net/core/net-traces.c
++++ b/net/core/net-traces.c
+@@ -63,4 +63,6 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(napi_poll);
+ EXPORT_TRACEPOINT_SYMBOL_GPL(tcp_send_reset);
+ EXPORT_TRACEPOINT_SYMBOL_GPL(tcp_bad_csum);
+ 
++EXPORT_TRACEPOINT_SYMBOL_GPL(udp_fail_queue_rcv_skb);
++
+ EXPORT_TRACEPOINT_SYMBOL_GPL(sk_data_ready);
+diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
+index e5a337e6b9705..debb98fb23c0b 100644
+--- a/net/ipv6/udp.c
++++ b/net/ipv6/udp.c
+@@ -45,6 +45,7 @@
+ #include <net/tcp_states.h>
+ #include <net/ip6_checksum.h>
+ #include <net/ip6_tunnel.h>
++#include <trace/events/udp.h>
+ #include <net/xfrm.h>
+ #include <net/inet_hashtables.h>
+ #include <net/inet6_hashtables.h>
+@@ -680,6 +681,7 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+               }
+               UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
+               kfree_skb_reason(skb, drop_reason);
++              trace_udp_fail_queue_rcv_skb(rc, sk);
+               return -1;
+       }
+ 
+-- 
+2.39.2
+
diff --git a/queue-6.4/udp6-fix-udp6_ehashfn-typo.patch b/queue-6.4/udp6-fix-udp6_ehashfn-typo.patch

new file mode 100644 (file)

index 0000000..ccca51c
--- /dev/null
+++ b/queue-6.4/udp6-fix-udp6_ehashfn-typo.patch
@@ -0,0 +1,40 @@
+From d63211de5fb52a2741b61eae43f615679bbddba7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 8 Jul 2023 08:29:58 +0000
+Subject: udp6: fix udp6_ehashfn() typo
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 51d03e2f2203e76ed02d33fb5ffbb5fc85ffaf54 ]
+
+Amit Klein reported that udp6_ehash_secret was initialized but never used.
+
+Fixes: 1bbdceef1e53 ("inet: convert inet_ehash_secret and ipv6_hash_secret to net_get_random_once")
+Reported-by: Amit Klein <aksecurity@gmail.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Willy Tarreau <w@1wt.eu>
+Cc: Willem de Bruijn <willemdebruijn.kernel@gmail.com>
+Cc: David Ahern <dsahern@kernel.org>
+Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv6/udp.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
+index debb98fb23c0b..d594a0425749b 100644
+--- a/net/ipv6/udp.c
++++ b/net/ipv6/udp.c
+@@ -91,7 +91,7 @@ static u32 udp6_ehashfn(const struct net *net,
+       fhash = __ipv6_addr_jhash(faddr, udp_ipv6_hash_secret);
+ 
+       return __inet6_ehashfn(lhash, lport, fhash, fport,
+-                             udp_ipv6_hash_secret + net_hash_mix(net));
++                             udp6_ehash_secret + net_hash_mix(net));
+ }
+ 
+ int udp_v6_get_port(struct sock *sk, unsigned short snum)
+-- 
+2.39.2
+
diff --git a/queue-6.4/wifi-airo-avoid-uninitialized-warning-in-airo_get_ra.patch b/queue-6.4/wifi-airo-avoid-uninitialized-warning-in-airo_get_ra.patch

new file mode 100644 (file)

index 0000000..6967b0f
--- /dev/null
+++ b/queue-6.4/wifi-airo-avoid-uninitialized-warning-in-airo_get_ra.patch
@@ -0,0 +1,47 @@
+From 2685e6ba08190492fa99432286f029afadcb06ef Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 9 Jul 2023 06:31:54 -0700
+Subject: wifi: airo: avoid uninitialized warning in airo_get_rate()
+
+From: Randy Dunlap <rdunlap@infradead.org>
+
+[ Upstream commit 9373771aaed17f5c2c38485f785568abe3a9f8c1 ]
+
+Quieten a gcc (11.3.0) build error or warning by checking the function
+call status and returning -EBUSY if the function call failed.
+This is similar to what several other wireless drivers do for the
+SIOCGIWRATE ioctl call when there is a locking problem.
+
+drivers/net/wireless/cisco/airo.c: error: 'status_rid.currentXmitRate' is used uninitialized [-Werror=uninitialized]
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
+Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
+Link: https://lore.kernel.org/r/39abf2c7-24a-f167-91da-ed4c5435d1c4@linux-m68k.org
+Link: https://lore.kernel.org/r/20230709133154.26206-1-rdunlap@infradead.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/wireless/cisco/airo.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/wireless/cisco/airo.c b/drivers/net/wireless/cisco/airo.c
+index 7c4cc5f5e1eb4..dbd13f7aa3e6e 100644
+--- a/drivers/net/wireless/cisco/airo.c
++++ b/drivers/net/wireless/cisco/airo.c
+@@ -6157,8 +6157,11 @@ static int airo_get_rate(struct net_device *dev,
+       struct iw_param *vwrq = &wrqu->bitrate;
+       struct airo_info *local = dev->ml_priv;
+       StatusRid status_rid;           /* Card status info */
++      int ret;
+ 
+-      readStatusRid(local, &status_rid, 1);
++      ret = readStatusRid(local, &status_rid, 1);
++      if (ret)
++              return -EBUSY;
+ 
+       vwrq->value = le16_to_cpu(status_rid.currentXmitRate) * 500000;
+       /* If more than one rate, set auto */
+-- 
+2.39.2
+
diff --git a/queue-6.4/wifi-rtw89-debug-fix-error-code-in-rtw89_debug_priv_.patch b/queue-6.4/wifi-rtw89-debug-fix-error-code-in-rtw89_debug_priv_.patch

new file mode 100644 (file)

index 0000000..3461fb1
--- /dev/null
+++ b/queue-6.4/wifi-rtw89-debug-fix-error-code-in-rtw89_debug_priv_.patch
@@ -0,0 +1,51 @@
+From cf9f44752b619631f770e4dc300984d1dac80d17 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 6 Jul 2023 10:45:00 +0800
+Subject: wifi: rtw89: debug: fix error code in rtw89_debug_priv_send_h2c_set()
+
+From: Zhang Shurong <zhang_shurong@foxmail.com>
+
+[ Upstream commit 4f4626cd049576af1276c7568d5b44eb3f7bb1b1 ]
+
+If there is a failure during rtw89_fw_h2c_raw() rtw89_debug_priv_send_h2c
+should return negative error code instead of a positive value count.
+Fix this bug by returning correct error code.
+
+Fixes: e3ec7017f6a2 ("rtw89: add Realtek 802.11ax driver")
+Signed-off-by: Zhang Shurong <zhang_shurong@foxmail.com>
+Acked-by: Ping-Ke Shih <pkshih@realtek.com>
+Link: https://lore.kernel.org/r/tencent_AD09A61BC4DA92AD1EB0790F5C850E544D07@qq.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/wireless/realtek/rtw89/debug.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/wireless/realtek/rtw89/debug.c b/drivers/net/wireless/realtek/rtw89/debug.c
+index 1e5b7a9987163..858494ddfb12e 100644
+--- a/drivers/net/wireless/realtek/rtw89/debug.c
++++ b/drivers/net/wireless/realtek/rtw89/debug.c
+@@ -2998,17 +2998,18 @@ static ssize_t rtw89_debug_priv_send_h2c_set(struct file *filp,
+       struct rtw89_debugfs_priv *debugfs_priv = filp->private_data;
+       struct rtw89_dev *rtwdev = debugfs_priv->rtwdev;
+       u8 *h2c;
++      int ret;
+       u16 h2c_len = count / 2;
+ 
+       h2c = rtw89_hex2bin_user(rtwdev, user_buf, count);
+       if (IS_ERR(h2c))
+               return -EFAULT;
+ 
+-      rtw89_fw_h2c_raw(rtwdev, h2c, h2c_len);
++      ret = rtw89_fw_h2c_raw(rtwdev, h2c, h2c_len);
+ 
+       kfree(h2c);
+ 
+-      return count;
++      return ret ? ret : count;
+ }
+ 
+ static int
+-- 
+2.39.2
+
diff --git a/queue-6.4/x86-fineibt-poison-endbr-at-0.patch b/queue-6.4/x86-fineibt-poison-endbr-at-0.patch

new file mode 100644 (file)

index 0000000..40fbeec
--- /dev/null
+++ b/queue-6.4/x86-fineibt-poison-endbr-at-0.patch
@@ -0,0 +1,89 @@
+From 79f7f4bdb8ae801346e94933d8c848c76e4ea88b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 15 Jun 2023 21:35:48 +0200
+Subject: x86/fineibt: Poison ENDBR at +0
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+[ Upstream commit 04505bbbbb15da950ea0239e328a76a3ad2376e0 ]
+
+Alyssa noticed that when building the kernel with CFI_CLANG+IBT and
+booting on IBT enabled hardware to obtain FineIBT, the indirect
+functions look like:
+
+  __cfi_foo:
+       endbr64
+       subl    $hash, %r10d
+       jz      1f
+       ud2
+       nop
+  1:
+  foo:
+       endbr64
+
+This is because the compiler generates code for kCFI+IBT. In that case
+the caller does the hash check and will jump to +0, so there must be
+an ENDBR there. The compiler doesn't know about FineIBT at all; also
+it is possible to actually use kCFI+IBT when booting with 'cfi=kcfi'
+on IBT enabled hardware.
+
+Having this second ENDBR however makes it possible to elide the CFI
+check. Therefore, we should poison this second ENDBR when switching to
+FineIBT mode.
+
+Fixes: 931ab63664f0 ("x86/ibt: Implement FineIBT")
+Reported-by: "Milburn, Alyssa" <alyssa.milburn@intel.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Kees Cook <keescook@chromium.org>
+Reviewed-by: Sami Tolvanen <samitolvanen@google.com>
+Link: https://lore.kernel.org/r/20230615193722.194131053@infradead.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kernel/alternative.c | 16 ++++++++++++++++
+ 1 file changed, 16 insertions(+)
+
+diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
+index f615e0cb6d932..4e2c70f88e05b 100644
+--- a/arch/x86/kernel/alternative.c
++++ b/arch/x86/kernel/alternative.c
+@@ -940,6 +940,17 @@ static int cfi_rewrite_preamble(s32 *start, s32 *end)
+       return 0;
+ }
+ 
++static void cfi_rewrite_endbr(s32 *start, s32 *end)
++{
++      s32 *s;
++
++      for (s = start; s < end; s++) {
++              void *addr = (void *)s + *s;
++
++              poison_endbr(addr+16, false);
++      }
++}
++
+ /* .retpoline_sites */
+ static int cfi_rand_callers(s32 *start, s32 *end)
+ {
+@@ -1034,14 +1045,19 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
+               return;
+ 
+       case CFI_FINEIBT:
++              /* place the FineIBT preamble at func()-16 */
+               ret = cfi_rewrite_preamble(start_cfi, end_cfi);
+               if (ret)
+                       goto err;
+ 
++              /* rewrite the callers to target func()-16 */
+               ret = cfi_rewrite_callers(start_retpoline, end_retpoline);
+               if (ret)
+                       goto err;
+ 
++              /* now that nobody targets func()+0, remove ENDBR there */
++              cfi_rewrite_endbr(start_cfi, end_cfi);
++
+               if (builtin)
+                       pr_info("Using FineIBT CFI\n");
+               return;
+-- 
+2.39.2
+
diff --git a/queue-6.4/xdp-use-trusted-arguments-in-xdp-hints-kfuncs.patch b/queue-6.4/xdp-use-trusted-arguments-in-xdp-hints-kfuncs.patch

new file mode 100644 (file)

index 0000000..83ad713
--- /dev/null
+++ b/queue-6.4/xdp-use-trusted-arguments-in-xdp-hints-kfuncs.patch
@@ -0,0 +1,53 @@
+From ef7d0bc0116eed8b256212c207fd0aaa11f2a936 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 11 Jul 2023 12:59:26 +0200
+Subject: xdp: use trusted arguments in XDP hints kfuncs
+
+From: Larysa Zaremba <larysa.zaremba@intel.com>
+
+[ Upstream commit 2e06c57d66d3f6c26faa5f5b479fb3add34ce85a ]
+
+Currently, verifier does not reject XDP programs that pass NULL pointer to
+hints functions. At the same time, this case is not handled in any driver
+implementation (including veth). For example, changing
+
+bpf_xdp_metadata_rx_timestamp(ctx, &timestamp);
+
+to
+
+bpf_xdp_metadata_rx_timestamp(ctx, NULL);
+
+in xdp_metadata test successfully crashes the system.
+
+Add KF_TRUSTED_ARGS flag to hints kfunc definitions, so driver code
+does not have to worry about getting invalid pointers.
+
+Fixes: 3d76a4d3d4e5 ("bpf: XDP metadata RX kfuncs")
+Reported-by: Stanislav Fomichev <sdf@google.com>
+Closes: https://lore.kernel.org/bpf/ZKWo0BbpLfkZHbyE@google.com/
+Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
+Acked-by: Jesper Dangaard Brouer <hawk@kernel.org>
+Acked-by: Stanislav Fomichev <sdf@google.com>
+Link: https://lore.kernel.org/r/20230711105930.29170-1-larysa.zaremba@intel.com
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/xdp.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/core/xdp.c b/net/core/xdp.c
+index 41e5ca8643ec9..8362130bf085d 100644
+--- a/net/core/xdp.c
++++ b/net/core/xdp.c
+@@ -741,7 +741,7 @@ __bpf_kfunc int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash,
+ __diag_pop();
+ 
+ BTF_SET8_START(xdp_metadata_kfunc_ids)
+-#define XDP_METADATA_KFUNC(_, name) BTF_ID_FLAGS(func, name, 0)
++#define XDP_METADATA_KFUNC(_, name) BTF_ID_FLAGS(func, name, KF_TRUSTED_ARGS)
+ XDP_METADATA_KFUNC_xxx
+ #undef XDP_METADATA_KFUNC
+ BTF_SET8_END(xdp_metadata_kfunc_ids)
+-- 
+2.39.2
+
diff --git a/queue-6.4/xen-virtio-fix-null-deref-when-a-bridge-of-pci-root-.patch b/queue-6.4/xen-virtio-fix-null-deref-when-a-bridge-of-pci-root-.patch

new file mode 100644 (file)

index 0000000..d1548ee
--- /dev/null
+++ b/queue-6.4/xen-virtio-fix-null-deref-when-a-bridge-of-pci-root-.patch
@@ -0,0 +1,90 @@
+From 2eedacb8d561774f8457305e7eb3c924ce6f9ca9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 21 Jun 2023 15:12:13 +0200
+Subject: xen/virtio: Fix NULL deref when a bridge of PCI root bus has no
+ parent
+
+From: Petr Pavlu <petr.pavlu@suse.com>
+
+[ Upstream commit 21a235bce12361e64adfc2ef97e4ae2e51ad63d4 ]
+
+When attempting to run Xen on a QEMU/KVM virtual machine with virtio
+devices (all x86_64), function xen_dt_get_node() crashes on accessing
+bus->bridge->parent->of_node because a bridge of the PCI root bus has no
+parent set:
+
+[    1.694192][    T1] BUG: kernel NULL pointer dereference, address: 0000000000000288
+[    1.695688][    T1] #PF: supervisor read access in kernel mode
+[    1.696297][    T1] #PF: error_code(0x0000) - not-present page
+[    1.696297][    T1] PGD 0 P4D 0
+[    1.696297][    T1] Oops: 0000 [#1] PREEMPT SMP NOPTI
+[    1.696297][    T1] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.3.7-1-default #1 openSUSE Tumbleweed a577eae57964bb7e83477b5a5645a1781df990f0
+[    1.696297][    T1] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.15.0-0-g2dd4b9b-rebuilt.opensuse.org 04/01/2014
+[    1.696297][    T1] RIP: e030:xen_virtio_restricted_mem_acc+0xd9/0x1c0
+[    1.696297][    T1] Code: 45 0c 83 e8 c9 a3 ea ff 31 c0 eb d7 48 8b 87 40 ff ff ff 48 89 c2 48 8b 40 10 48 85 c0 75 f4 48 8b 82 10 01 00 00 48 8b 40 40 <48> 83 b8 88 02 00 00 00 0f 84 45 ff ff ff 66 90 31 c0 eb a5 48 89
+[    1.696297][    T1] RSP: e02b:ffffc90040013cc8 EFLAGS: 00010246
+[    1.696297][    T1] RAX: 0000000000000000 RBX: ffff888006c75000 RCX: 0000000000000029
+[    1.696297][    T1] RDX: ffff888005ed1000 RSI: ffffc900400f100c RDI: ffff888005ee30d0
+[    1.696297][    T1] RBP: ffff888006c75010 R08: 0000000000000001 R09: 0000000330000006
+[    1.696297][    T1] R10: ffff888005850028 R11: 0000000000000002 R12: ffffffff830439a0
+[    1.696297][    T1] R13: 0000000000000000 R14: ffff888005657900 R15: ffff888006e3e1e8
+[    1.696297][    T1] FS:  0000000000000000(0000) GS:ffff88804a000000(0000) knlGS:0000000000000000
+[    1.696297][    T1] CS:  e030 DS: 0000 ES: 0000 CR0: 0000000080050033
+[    1.696297][    T1] CR2: 0000000000000288 CR3: 0000000002e36000 CR4: 0000000000050660
+[    1.696297][    T1] Call Trace:
+[    1.696297][    T1]  <TASK>
+[    1.696297][    T1]  virtio_features_ok+0x1b/0xd0
+[    1.696297][    T1]  virtio_dev_probe+0x19c/0x270
+[    1.696297][    T1]  really_probe+0x19b/0x3e0
+[    1.696297][    T1]  __driver_probe_device+0x78/0x160
+[    1.696297][    T1]  driver_probe_device+0x1f/0x90
+[    1.696297][    T1]  __driver_attach+0xd2/0x1c0
+[    1.696297][    T1]  bus_for_each_dev+0x74/0xc0
+[    1.696297][    T1]  bus_add_driver+0x116/0x220
+[    1.696297][    T1]  driver_register+0x59/0x100
+[    1.696297][    T1]  virtio_console_init+0x7f/0x110
+[    1.696297][    T1]  do_one_initcall+0x47/0x220
+[    1.696297][    T1]  kernel_init_freeable+0x328/0x480
+[    1.696297][    T1]  kernel_init+0x1a/0x1c0
+[    1.696297][    T1]  ret_from_fork+0x29/0x50
+[    1.696297][    T1]  </TASK>
+[    1.696297][    T1] Modules linked in:
+[    1.696297][    T1] CR2: 0000000000000288
+[    1.696297][    T1] ---[ end trace 0000000000000000 ]---
+
+The PCI root bus is in this case created from ACPI description via
+acpi_pci_root_add() -> pci_acpi_scan_root() -> acpi_pci_root_create() ->
+pci_create_root_bus() where the last function is called with
+parent=NULL. It indicates that no parent is present and then
+bus->bridge->parent is NULL too.
+
+Fix the problem by checking bus->bridge->parent in xen_dt_get_node() for
+NULL first.
+
+Fixes: ef8ae384b4c9 ("xen/virtio: Handle PCI devices which Host controller is described in DT")
+Signed-off-by: Petr Pavlu <petr.pavlu@suse.com>
+Reviewed-by: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
+Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
+Link: https://lore.kernel.org/r/20230621131214.9398-2-petr.pavlu@suse.com
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/xen/grant-dma-ops.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/drivers/xen/grant-dma-ops.c b/drivers/xen/grant-dma-ops.c
+index 9784a77fa3c99..76f6f26265a3b 100644
+--- a/drivers/xen/grant-dma-ops.c
++++ b/drivers/xen/grant-dma-ops.c
+@@ -303,6 +303,8 @@ static struct device_node *xen_dt_get_node(struct device *dev)
+               while (!pci_is_root_bus(bus))
+                       bus = bus->parent;
+ 
++              if (!bus->bridge->parent)
++                      return NULL;
+               return of_node_get(bus->bridge->parent->of_node);
+       }
+ 
+-- 
+2.39.2
+
author	Sasha Levin <sashal@kernel.org>
	Mon, 17 Jul 2023 01:01:17 +0000 (21:01 -0400)
committer	Sasha Levin <sashal@kernel.org>
	Thu, 20 Jul 2023 15:34:47 +0000 (11:34 -0400)
queue-6.4/blk-crypto-use-dynamic-lock-class-for-blk_crypto_pro.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/bpf-cpumap-fix-memory-leak-in-cpu_map_update_elem.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/bpf-fix-max-stack-depth-check-for-async-callbacks.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/drm-bridge-dw_hdmi-fix-connector-access-for-scdc.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/drm-bridge-ti-sn65dsi86-fix-auxiliary-bus-lifetime.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/drm-fbdev-dma-fix-documented-default-preferred_bpp-v.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/drm-i915-don-t-preserve-dpll_hw_state-for-slave-crtc.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/drm-i915-fix-one-wrong-caching-mode-enum-usage.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/drm-nouveau-acr-abort-loading-acr-if-no-firmware-was.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/drm-nouveau-bring-back-blit-subchannel-for-pre-nv50-.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/drm-nouveau-disp-fix-hdmi-on-gt215.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/drm-nouveau-disp-g94-enable-hdmi.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/drm-panel-simple-add-connector_type-for-innolux_at04.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/drm-panel-simple-add-powertip-ph800480t013-drm_displ.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/erofs-avoid-infinite-loop-in-z_erofs_do_read_page-wh.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/erofs-avoid-useless-loops-in-z_erofs_pcluster_readmo.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/erofs-fix-fsdax-unavailability-for-chunk-based-regul.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/fprobe-add-unlock-to-match-a-succeeded-ftrace_test_r.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/gve-set-default-duplex-configuration-to-full.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/hid-hyperv-avoid-struct-memcpy-overrun-warning.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/ice-fix-max_rate-check-while-configuring-tx-rate-lim.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/ice-fix-tx-queue-rate-limit-when-tcs-are-configured.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/icmp6-fix-null-ptr-deref-of-ip6_null_entry-rt6i_idev.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/igc-add-condition-for-qbv_config_change_errors-count.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/igc-add-igc_xdp_buff-wrapper-for-xdp_buff-in-driver.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/igc-add-xdp-hints-kfuncs-for-rx-hash.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/igc-do-not-enable-taprio-offload-for-invalid-argumen.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/igc-fix-inserting-of-empty-frame-for-launchtime.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/igc-fix-launchtime-before-start-of-cycle.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/igc-fix-tx-hang-issue-when-qbv-gate-is-closed.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/igc-handle-already-enabled-taprio-offload-for-baseti.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/igc-handle-pps-start-time-programming-for-past-time-.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/igc-include-the-length-type-field-and-vlan-tag-in-qu.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/igc-no-strict-mode-in-pure-launchtime-cbs-offload.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/igc-remove-delay-during-tx-ring-configuration.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/igc-rename-qbv_enable-to-taprio_offload_enable.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/igc-set-tp-bit-in-supported-and-advertising-fields-o.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/ionic-remove-warn_on-to-prevent-panic_on_warn.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/ipv6-addrconf-fix-a-potential-refcount-underflow-for.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/kernel-trace-fix-cleanup-logic-of-enable_trace_eprob.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-bgmac-postpone-turning-irqs-off-to-avoid-soc-han.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-dsa-felix-make-vsc9959_tas_guard_bands_update-vi.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-dsa-qca8k-add-check-for-skb_copy.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-dsa-removed-unneeded-of_node_put-in-felix_parse_.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-fec-increase-the-size-of-tx-ring-and-update-tx_w.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-fec-recycle-pages-for-transmitted-xdp-frames.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-fec-remove-last_bdp-from-fec_enet_txq_xmit_frame.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-fec-remove-useless-fec_enet_reset_skb.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-mlx5-query-hca_cap_2-only-when-supported.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-mlx5-register-a-unique-thermal-zone-per-device.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-mlx5e-check-for-not_ready-flag-state-after-locki.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-mlx5e-fix-double-free-in-mlx5e_destroy_flow_tabl.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-mlx5e-fix-memory-leak-in-mlx5e_fs_tt_redirect_an.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-mlx5e-fix-memory-leak-in-mlx5e_ptp_open.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-mlx5e-rx-fix-flush-and-close-release-flow-of-reg.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-mlx5e-rx-fix-page_pool-page-fragment-tracking-fo.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-mlx5e-tc-ct-offload-ct-clear-only-once.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-mscc-ocelot-fix-oversize-frame-dropping-for-pree.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-mvneta-fix-txq_map-in-case-of-txq_number-1.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-prevent-skb-corruption-on-frag-list-segmentation.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-sched-cls_fw-fix-improper-refcount-update-leads-.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-sched-flower-ensure-both-minimum-and-maximum-por.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-sched-make-psched_mtu-rtnl-less-safe.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-sched-sch_qfq-account-for-stab-overhead-in-qfq_e.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-sched-sch_qfq-reintroduce-lmax-bound-check-for-m.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-sched-taprio-replace-tc_taprio_qopt_offload-enab.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-txgbe-fix-eeprom-calculation-error.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/netdevsim-fix-uninitialized-data-in-nsim_dev_trap_fa.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/netfilter-conntrack-don-t-fold-port-numbers-into-add.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/netfilter-nf_tables-report-use-refcount-overflow.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/ntb-amd-fix-error-handling-in-amd_ntb_pci_driver_ini.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/ntb-idt-fix-error-handling-in-idt_pci_driver_init.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/ntb-intel-fix-error-handling-in-intel_ntb_pci_driver.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/ntb-ntb_tool-add-check-for-devm_kcalloc.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/ntb-ntb_transport-fix-possible-memory-leak-while-dev.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/nvme-fix-the-nvme_id_ns_nvm_sts_mask-definition.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/nvme-pci-fix-dma-direction-of-unmapping-integrity-da.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/octeontx2-af-move-validation-of-ptp-pointer-before-i.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/octeontx2-af-promisc-enable-disable-through-mbox.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/octeontx2-pf-add-additional-check-for-mcam-rules.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/openrisc-union-fpcsr-and-oldmask-in-sigcontext-to-un.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/platform-x86-wmi-break-possible-infinite-loop-when-p.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/riscv-bpf-fix-inconsistent-jit-image-generation.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/riscv-mm-fix-truncation-warning-on-rv32.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/s390-ism-do-not-unregister-clients-with-registered-d.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/s390-ism-fix-and-simplify-add-remove-callback-handli.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/s390-ism-fix-locking-for-forwarding-of-irqs-and-even.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/scsi-qla2xxx-fix-error-code-in-qla2x00_start_sp.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/scsi-ufs-ufs-mediatek-add-dependency-for-reset_contr.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/series	[new file with mode: 0644]	patch \| blob
queue-6.4/swiotlb-always-set-the-number-of-areas-before-alloca.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/swiotlb-reduce-the-number-of-areas-to-match-actual-m.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/udp6-add-a-missing-call-into-udp_fail_queue_rcv_skb-.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/udp6-fix-udp6_ehashfn-typo.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/wifi-airo-avoid-uninitialized-warning-in-airo_get_ra.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/wifi-rtw89-debug-fix-error-code-in-rtw89_debug_priv_.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/x86-fineibt-poison-endbr-at-0.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/xdp-use-trusted-arguments-in-xdp-hints-kfuncs.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/xen-virtio-fix-null-deref-when-a-bridge-of-pci-root-.patch	[new file with mode: 0644]	patch \| blob