]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 6.10
authorSasha Levin <sashal@kernel.org>
Fri, 13 Sep 2024 23:37:04 +0000 (19:37 -0400)
committerSasha Levin <sashal@kernel.org>
Fri, 13 Sep 2024 23:37:04 +0000 (19:37 -0400)
Signed-off-by: Sasha Levin <sashal@kernel.org>
29 files changed:
queue-6.10/drivers-perf-fix-smp_processor_id-use-in-preemptible.patch [new file with mode: 0644]
queue-6.10/fou-fix-initialization-of-grc.patch [new file with mode: 0644]
queue-6.10/hwmon-pmbus-conditionally-clear-individual-status-bi.patch [new file with mode: 0644]
queue-6.10/ice-fix-accounting-for-filters-shared-by-multiple-vs.patch [new file with mode: 0644]
queue-6.10/ice-fix-lldp-packets-dropping-after-changing-the-num.patch [new file with mode: 0644]
queue-6.10/ice-fix-vsi-lists-confusion-when-adding-vlans.patch [new file with mode: 0644]
queue-6.10/igb-always-call-igb_xdp_ring_update_tail-under-tx-lo.patch [new file with mode: 0644]
queue-6.10/net-dpaa-pad-packets-to-eth_zlen.patch [new file with mode: 0644]
queue-6.10/net-dsa-felix-ignore-pending-status-of-tas-module-wh.patch [new file with mode: 0644]
queue-6.10/net-ftgmac100-enable-tx-interrupt-to-avoid-tx-timeou.patch [new file with mode: 0644]
queue-6.10/net-hsr-prevent-null-pointer-dereference-in-hsr_prox.patch [new file with mode: 0644]
queue-6.10/net-hsr-remove-seqnr_lock.patch [new file with mode: 0644]
queue-6.10/net-hsr-send-supervisory-frames-to-hsr-network-with-.patch [new file with mode: 0644]
queue-6.10/net-mlx5-add-missing-masks-and-qos-bit-masks-for-sch.patch [new file with mode: 0644]
queue-6.10/net-mlx5-correct-tasr-typo-into-tsar.patch [new file with mode: 0644]
queue-6.10/net-mlx5-explicitly-set-scheduling-element-and-tsar-.patch [new file with mode: 0644]
queue-6.10/net-mlx5-fix-bridge-mode-operations-when-there-are-n.patch [new file with mode: 0644]
queue-6.10/net-mlx5-update-the-list-of-the-pci-supported-device.patch [new file with mode: 0644]
queue-6.10/net-mlx5-verify-support-for-scheduling-element-and-t.patch [new file with mode: 0644]
queue-6.10/net-mlx5e-add-missing-link-mode-to-ptys2ext_ethtool_.patch [new file with mode: 0644]
queue-6.10/net-mlx5e-add-missing-link-modes-to-ptys2ethtool_map.patch [new file with mode: 0644]
queue-6.10/net-phy-dp83822-fix-null-pointer-dereference-on-dp83.patch [new file with mode: 0644]
queue-6.10/netfilter-nft_socket-fix-sk-refcount-leaks.patch [new file with mode: 0644]
queue-6.10/netfilter-nft_socket-make-cgroupsv2-matching-work-wi.patch [new file with mode: 0644]
queue-6.10/netlink-specs-mptcp-fix-port-endianness.patch [new file with mode: 0644]
queue-6.10/octeontx2-af-modify-smq-flush-sequence-to-drop-packe.patch [new file with mode: 0644]
queue-6.10/riscv-disable-preemption-while-handling-pr_riscv_ctx.patch [new file with mode: 0644]
queue-6.10/selftests-net-csum-fix-checksums-for-packets-with-no.patch [new file with mode: 0644]
queue-6.10/series

diff --git a/queue-6.10/drivers-perf-fix-smp_processor_id-use-in-preemptible.patch b/queue-6.10/drivers-perf-fix-smp_processor_id-use-in-preemptible.patch
new file mode 100644 (file)
index 0000000..464ffb5
--- /dev/null
@@ -0,0 +1,60 @@
+From e7e913873b300d0c5e12ad65ff58502315ff6b60 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 26 Aug 2024 18:52:10 +0200
+Subject: drivers: perf: Fix smp_processor_id() use in preemptible code
+
+From: Alexandre Ghiti <alexghiti@rivosinc.com>
+
+[ Upstream commit 2840dadf0dde92638d13b97998026c5fcddbdceb ]
+
+As reported in [1], the use of smp_processor_id() in
+pmu_sbi_device_probe() must be protected by disabling the preemption, so
+simple use get_cpu()/put_cpu() instead.
+
+Reported-by: Nam Cao <namcao@linutronix.de>
+Closes: https://lore.kernel.org/linux-riscv/20240820074925.ReMKUPP3@linutronix.de/ [1]
+Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
+Reviewed-by: Anup Patel <anup@brainfault.org>
+Tested-by: Nam Cao <namcao@linutronix.de>
+Fixes: a8625217a054 ("drivers/perf: riscv: Implement SBI PMU snapshot function")
+Reported-by: Andrea Parri <parri.andrea@gmail.com>
+Tested-by: Andrea Parri <parri.andrea@gmail.com>
+Link: https://lore.kernel.org/r/20240826165210.124696-1-alexghiti@rivosinc.com
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/perf/riscv_pmu_sbi.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c
+index 11c7c85047ed..765bda7924f7 100644
+--- a/drivers/perf/riscv_pmu_sbi.c
++++ b/drivers/perf/riscv_pmu_sbi.c
+@@ -1368,11 +1368,15 @@ static int pmu_sbi_device_probe(struct platform_device *pdev)
+       /* SBI PMU Snapsphot is only available in SBI v2.0 */
+       if (sbi_v2_available) {
++              int cpu;
++
+               ret = pmu_sbi_snapshot_alloc(pmu);
+               if (ret)
+                       goto out_unregister;
+-              ret = pmu_sbi_snapshot_setup(pmu, smp_processor_id());
++              cpu = get_cpu();
++
++              ret = pmu_sbi_snapshot_setup(pmu, cpu);
+               if (ret) {
+                       /* Snapshot is an optional feature. Continue if not available */
+                       pmu_sbi_snapshot_free(pmu);
+@@ -1386,6 +1390,7 @@ static int pmu_sbi_device_probe(struct platform_device *pdev)
+                        */
+                       static_branch_enable(&sbi_pmu_snapshot_available);
+               }
++              put_cpu();
+       }
+       register_sysctl("kernel", sbi_pmu_sysctl_table);
+-- 
+2.43.0
+
diff --git a/queue-6.10/fou-fix-initialization-of-grc.patch b/queue-6.10/fou-fix-initialization-of-grc.patch
new file mode 100644 (file)
index 0000000..d849a4a
--- /dev/null
@@ -0,0 +1,44 @@
+From f696b5656fe0cb6547e00a8c69dddd9e357374d2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 6 Sep 2024 15:28:39 +0500
+Subject: fou: fix initialization of grc
+
+From: Muhammad Usama Anjum <usama.anjum@collabora.com>
+
+[ Upstream commit 4c8002277167125078e6b9b90137bdf443ebaa08 ]
+
+The grc must be initialize first. There can be a condition where if
+fou is NULL, goto out will be executed and grc would be used
+uninitialized.
+
+Fixes: 7e4196935069 ("fou: Fix null-ptr-deref in GRO.")
+Signed-off-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
+Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Link: https://patch.msgid.link/20240906102839.202798-1-usama.anjum@collabora.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/fou_core.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/net/ipv4/fou_core.c b/net/ipv4/fou_core.c
+index 78b869b31492..3e30745e2c09 100644
+--- a/net/ipv4/fou_core.c
++++ b/net/ipv4/fou_core.c
+@@ -336,11 +336,11 @@ static struct sk_buff *gue_gro_receive(struct sock *sk,
+       struct gro_remcsum grc;
+       u8 proto;
++      skb_gro_remcsum_init(&grc);
++
+       if (!fou)
+               goto out;
+-      skb_gro_remcsum_init(&grc);
+-
+       off = skb_gro_offset(skb);
+       len = off + sizeof(*guehdr);
+-- 
+2.43.0
+
diff --git a/queue-6.10/hwmon-pmbus-conditionally-clear-individual-status-bi.patch b/queue-6.10/hwmon-pmbus-conditionally-clear-individual-status-bi.patch
new file mode 100644 (file)
index 0000000..e565b05
--- /dev/null
@@ -0,0 +1,99 @@
+From d8d9bd714a52b6d2185c2bd9af89d89973d2747c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 9 Sep 2024 11:30:28 +0200
+Subject: hwmon: (pmbus) Conditionally clear individual status bits for pmbus
+ rev >= 1.2
+
+From: Patryk Biel <pbiel7@gmail.com>
+
+[ Upstream commit 20471071f198c8626dbe3951ac9834055b387844 ]
+
+The current implementation of pmbus_show_boolean assumes that all devices
+support write-back operation of status register to clear pending warnings
+or faults. Since clearing individual bits in the status registers was only
+introduced in PMBus specification 1.2, this operation may not be supported
+by some older devices. This can result in an error while reading boolean
+attributes such as temp1_max_alarm.
+
+Fetch PMBus revision supported by the device and modify pmbus_show_boolean
+so that it only tries to clear individual status bits if the device is
+compliant with PMBus specs >= 1.2. Otherwise clear all fault indicators
+on the current page after a fault status was reported.
+
+Fixes: 35f165f08950a ("hwmon: (pmbus) Clear pmbus fault/warning bits after read")
+Signed-off-by: Patryk Biel <pbiel7@gmail.com>
+Message-ID: <20240909-pmbus-status-reg-clearing-v1-1-f1c0d68c6408@gmail.com>
+[groeck:
+ Rewrote description
+ Moved revision detection code ahead of clear faults command
+ Assigned revision if return value from PMBUS_REVISION command is 0
+ Improved return value check from calling _pmbus_write_byte_data()]
+Signed-off-by: Guenter Roeck <linux@roeck-us.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/hwmon/pmbus/pmbus.h      |  6 ++++++
+ drivers/hwmon/pmbus/pmbus_core.c | 17 ++++++++++++++---
+ 2 files changed, 20 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/hwmon/pmbus/pmbus.h b/drivers/hwmon/pmbus/pmbus.h
+index fb442fae7b3e..0bea603994e7 100644
+--- a/drivers/hwmon/pmbus/pmbus.h
++++ b/drivers/hwmon/pmbus/pmbus.h
+@@ -418,6 +418,12 @@ enum pmbus_sensor_classes {
+ enum pmbus_data_format { linear = 0, ieee754, direct, vid };
+ enum vrm_version { vr11 = 0, vr12, vr13, imvp9, amd625mv };
++/* PMBus revision identifiers */
++#define PMBUS_REV_10 0x00     /* PMBus revision 1.0 */
++#define PMBUS_REV_11 0x11     /* PMBus revision 1.1 */
++#define PMBUS_REV_12 0x22     /* PMBus revision 1.2 */
++#define PMBUS_REV_13 0x33     /* PMBus revision 1.3 */
++
+ struct pmbus_driver_info {
+       int pages;              /* Total number of pages */
+       u8 phases[PMBUS_PAGES]; /* Number of phases per page */
+diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c
+index cb4c65a7f288..e592446b2665 100644
+--- a/drivers/hwmon/pmbus/pmbus_core.c
++++ b/drivers/hwmon/pmbus/pmbus_core.c
+@@ -85,6 +85,8 @@ struct pmbus_data {
+       u32 flags;              /* from platform data */
++      u8 revision;    /* The PMBus revision the device is compliant with */
++
+       int exponent[PMBUS_PAGES];
+                               /* linear mode: exponent for output voltages */
+@@ -1095,9 +1097,14 @@ static int pmbus_get_boolean(struct i2c_client *client, struct pmbus_boolean *b,
+       regval = status & mask;
+       if (regval) {
+-              ret = _pmbus_write_byte_data(client, page, reg, regval);
+-              if (ret)
+-                      goto unlock;
++              if (data->revision >= PMBUS_REV_12) {
++                      ret = _pmbus_write_byte_data(client, page, reg, regval);
++                      if (ret)
++                              goto unlock;
++              } else {
++                      pmbus_clear_fault_page(client, page);
++              }
++
+       }
+       if (s1 && s2) {
+               s64 v1, v2;
+@@ -2640,6 +2647,10 @@ static int pmbus_init_common(struct i2c_client *client, struct pmbus_data *data,
+                       data->flags |= PMBUS_WRITE_PROTECTED | PMBUS_SKIP_STATUS_CHECK;
+       }
++      ret = i2c_smbus_read_byte_data(client, PMBUS_REVISION);
++      if (ret >= 0)
++              data->revision = ret;
++
+       if (data->info->pages)
+               pmbus_clear_faults(client);
+       else
+-- 
+2.43.0
+
diff --git a/queue-6.10/ice-fix-accounting-for-filters-shared-by-multiple-vs.patch b/queue-6.10/ice-fix-accounting-for-filters-shared-by-multiple-vs.patch
new file mode 100644 (file)
index 0000000..3e591f4
--- /dev/null
@@ -0,0 +1,69 @@
+From c3747646f201e206e47313224bda6924f6798eaa Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 31 Jul 2024 09:55:55 -0700
+Subject: ice: fix accounting for filters shared by multiple VSIs
+
+From: Jacob Keller <jacob.e.keller@intel.com>
+
+[ Upstream commit e843cf7b34fe2e0c1afc55e1f3057375c9b77a14 ]
+
+When adding a switch filter (such as a MAC or VLAN filter), it is expected
+that the driver will detect the case where the filter already exists, and
+return -EEXIST. This is used by calling code such as ice_vc_add_mac_addr,
+and ice_vsi_add_vlan to avoid incrementing the accounting fields such as
+vsi->num_vlan or vf->num_mac.
+
+This logic works correctly for the case where only a single VSI has added a
+given switch filter.
+
+When a second VSI adds the same switch filter, the driver converts the
+existing filter from an ICE_FWD_TO_VSI filter into an ICE_FWD_TO_VSI_LIST
+filter. This saves switch resources, by ensuring that multiple VSIs can
+re-use the same filter.
+
+The ice_add_update_vsi_list() function is responsible for doing this
+conversion. When first converting a filter from the FWD_TO_VSI into
+FWD_TO_VSI_LIST, it checks if the VSI being added is the same as the
+existing rule's VSI. In such a case it returns -EEXIST.
+
+However, when the switch rule has already been converted to a
+FWD_TO_VSI_LIST, the logic is different. Adding a new VSI in this case just
+requires extending the VSI list entry. The logic for checking if the rule
+already exists in this case returns 0 instead of -EEXIST.
+
+This breaks the accounting logic mentioned above, so the counters for how
+many MAC and VLAN filters exist for a given VF or VSI no longer accurately
+reflect the actual count. This breaks other code which relies on these
+counts.
+
+In typical usage this primarily affects such filters generally shared by
+multiple VSIs such as VLAN 0, or broadcast and multicast MAC addresses.
+
+Fix this by correctly reporting -EEXIST in the case of adding the same VSI
+to a switch rule already converted to ICE_FWD_TO_VSI_LIST.
+
+Fixes: 9daf8208dd4d ("ice: Add support for switch filter programming")
+Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
+Tested-by: Rafal Romanowski <rafal.romanowski@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/ice/ice_switch.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c
+index ffd6c42bda1e..17a8a0c553d2 100644
+--- a/drivers/net/ethernet/intel/ice/ice_switch.c
++++ b/drivers/net/ethernet/intel/ice/ice_switch.c
+@@ -3219,7 +3219,7 @@ ice_add_update_vsi_list(struct ice_hw *hw,
+               /* A rule already exists with the new VSI being added */
+               if (test_bit(vsi_handle, m_entry->vsi_list_info->vsi_map))
+-                      return 0;
++                      return -EEXIST;
+               /* Update the previously created VSI list set with
+                * the new VSI ID passed in
+-- 
+2.43.0
+
diff --git a/queue-6.10/ice-fix-lldp-packets-dropping-after-changing-the-num.patch b/queue-6.10/ice-fix-lldp-packets-dropping-after-changing-the-num.patch
new file mode 100644 (file)
index 0000000..3f4f1f0
--- /dev/null
@@ -0,0 +1,70 @@
+From 12505620d518e3a7f0cd48e20de15fad4cdae66d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 26 Jun 2024 11:43:42 +0200
+Subject: ice: Fix lldp packets dropping after changing the number of channels
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Martyna Szapar-Mudlaw <martyna.szapar-mudlaw@linux.intel.com>
+
+[ Upstream commit 9debb703e14939dfafa5d403f27c4feb2e9f6501 ]
+
+After vsi setup refactor commit 6624e780a577 ("ice: split ice_vsi_setup
+into smaller functions") ice_cfg_sw_lldp function which removes rx rule
+directing LLDP packets to vsi is moved from ice_vsi_release to
+ice_vsi_decfg function. ice_vsi_decfg is used in more cases than just in
+vsi_release resulting in unnecessary removal of rx lldp packets handling
+switch rule. This leads to lldp packets being dropped after a change number
+of channels via ethtool.
+This patch moves ice_cfg_sw_lldp function that removes rx lldp sw rule back
+to ice_vsi_release function.
+
+Fixes: 6624e780a577 ("ice: split ice_vsi_setup into smaller functions")
+Reported-by: Matěj Grégr <mgregr@netx.as>
+Closes: https://lore.kernel.org/intel-wired-lan/1be45a76-90af-4813-824f-8398b69745a9@netx.as/T/#u
+Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
+Signed-off-by: Martyna Szapar-Mudlaw <martyna.szapar-mudlaw@linux.intel.com>
+Tested-by: Pucha Himasekhar Reddy <himasekharx.reddy.pucha@intel.com> (A Contingent worker at Intel)
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/ice/ice_lib.c | 15 ++++++++-------
+ 1 file changed, 8 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
+index 7076a7738864..c2ba58659347 100644
+--- a/drivers/net/ethernet/intel/ice/ice_lib.c
++++ b/drivers/net/ethernet/intel/ice/ice_lib.c
+@@ -2413,13 +2413,6 @@ void ice_vsi_decfg(struct ice_vsi *vsi)
+       struct ice_pf *pf = vsi->back;
+       int err;
+-      /* The Rx rule will only exist to remove if the LLDP FW
+-       * engine is currently stopped
+-       */
+-      if (!ice_is_safe_mode(pf) && vsi->type == ICE_VSI_PF &&
+-          !test_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags))
+-              ice_cfg_sw_lldp(vsi, false, false);
+-
+       ice_rm_vsi_lan_cfg(vsi->port_info, vsi->idx);
+       err = ice_rm_vsi_rdma_cfg(vsi->port_info, vsi->idx);
+       if (err)
+@@ -2764,6 +2757,14 @@ int ice_vsi_release(struct ice_vsi *vsi)
+               ice_rss_clean(vsi);
+       ice_vsi_close(vsi);
++
++      /* The Rx rule will only exist to remove if the LLDP FW
++       * engine is currently stopped
++       */
++      if (!ice_is_safe_mode(pf) && vsi->type == ICE_VSI_PF &&
++          !test_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags))
++              ice_cfg_sw_lldp(vsi, false, false);
++
+       ice_vsi_decfg(vsi);
+       /* retain SW VSI data structure since it is needed to unregister and
+-- 
+2.43.0
+
diff --git a/queue-6.10/ice-fix-vsi-lists-confusion-when-adding-vlans.patch b/queue-6.10/ice-fix-vsi-lists-confusion-when-adding-vlans.patch
new file mode 100644 (file)
index 0000000..e26973b
--- /dev/null
@@ -0,0 +1,91 @@
+From 956df7652a50a8a3e1166b27f6ba9ca3bae622d4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 4 Sep 2024 11:39:22 +0200
+Subject: ice: fix VSI lists confusion when adding VLANs
+
+From: Michal Schmidt <mschmidt@redhat.com>
+
+[ Upstream commit d2940002b0aa42898de815a1453b29d440292386 ]
+
+The description of function ice_find_vsi_list_entry says:
+  Search VSI list map with VSI count 1
+
+However, since the blamed commit (see Fixes below), the function no
+longer checks vsi_count. This causes a problem in ice_add_vlan_internal,
+where the decision to share VSI lists between filter rules relies on the
+vsi_count of the found existing VSI list being 1.
+
+The reproducing steps:
+1. Have a PF and two VFs.
+   There will be a filter rule for VLAN 0, referring to a VSI list
+   containing VSIs: 0 (PF), 2 (VF#0), 3 (VF#1).
+2. Add VLAN 1234 to VF#0.
+   ice will make the wrong decision to share the VSI list with the new
+   rule. The wrong behavior may not be immediately apparent, but it can
+   be observed with debug prints.
+3. Add VLAN 1234 to VF#1.
+   ice will unshare the VSI list for the VLAN 1234 rule. Due to the
+   earlier bad decision, the newly created VSI list will contain
+   VSIs 0 (PF) and 3 (VF#1), instead of expected 2 (VF#0) and 3 (VF#1).
+4. Try pinging a network peer over the VLAN interface on VF#0.
+   This fails.
+
+Reproducer script at:
+https://gitlab.com/mschmidt2/repro/-/blob/master/RHEL-46814/test-vlan-vsi-list-confusion.sh
+Commented debug trace:
+https://gitlab.com/mschmidt2/repro/-/blob/master/RHEL-46814/ice-vlan-vsi-lists-debug.txt
+Patch adding the debug prints:
+https://gitlab.com/mschmidt2/linux/-/commit/f8a8814623944a45091a77c6094c40bfe726bfdb
+(Unsafe, by the way. Lacks rule_lock when dumping in ice_remove_vlan.)
+
+Michal Swiatkowski added to the explanation that the bug is caused by
+reusing a VSI list created for VLAN 0. All created VFs' VSIs are added
+to VLAN 0 filter. When a non-zero VLAN is created on a VF which is already
+in VLAN 0 (normal case), the VSI list from VLAN 0 is reused.
+It leads to a problem because all VFs (VSIs to be specific) that are
+subscribed to VLAN 0 will now receive a new VLAN tag traffic. This is
+one bug, another is the bug described above. Removing filters from
+one VF will remove VLAN filter from the previous VF. It happens a VF is
+reset. Example:
+- creation of 3 VFs
+- we have VSI list (used for VLAN 0) [0 (pf), 2 (vf1), 3 (vf2), 4 (vf3)]
+- we are adding VLAN 100 on VF1, we are reusing the previous list
+  because 2 is there
+- VLAN traffic works fine, but VLAN 100 tagged traffic can be received
+  on all VSIs from the list (for example broadcast or unicast)
+- trust is turning on VF2, VF2 is resetting, all filters from VF2 are
+  removed; the VLAN 100 filter is also removed because 3 is on the list
+- VLAN traffic to VF1 isn't working anymore, there is a need to recreate
+  VLAN interface to readd VLAN filter
+
+One thing I'm not certain about is the implications for the LAG feature,
+which is another caller of ice_find_vsi_list_entry. I don't have a
+LAG-capable card at hand to test.
+
+Fixes: 23ccae5ce15f ("ice: changes to the interface with the HW and FW for SRIOV_VF+LAG")
+Reviewed-by: Michal Swiatkowski <michal.swiatkowski@linux.intel.com>
+Signed-off-by: Michal Schmidt <mschmidt@redhat.com>
+Reviewed-by: Dave Ertman <David.m.ertman@intel.com>
+Tested-by: Rafal Romanowski <rafal.romanowski@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/ice/ice_switch.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c
+index 17a8a0c553d2..0b85b3653a68 100644
+--- a/drivers/net/ethernet/intel/ice/ice_switch.c
++++ b/drivers/net/ethernet/intel/ice/ice_switch.c
+@@ -3289,7 +3289,7 @@ ice_find_vsi_list_entry(struct ice_hw *hw, u8 recp_id, u16 vsi_handle,
+       list_head = &sw->recp_list[recp_id].filt_rules;
+       list_for_each_entry(list_itr, list_head, list_entry) {
+-              if (list_itr->vsi_list_info) {
++              if (list_itr->vsi_count == 1 && list_itr->vsi_list_info) {
+                       map_info = list_itr->vsi_list_info;
+                       if (test_bit(vsi_handle, map_info->vsi_map)) {
+                               *vsi_list_id = map_info->vsi_list_id;
+-- 
+2.43.0
+
diff --git a/queue-6.10/igb-always-call-igb_xdp_ring_update_tail-under-tx-lo.patch b/queue-6.10/igb-always-call-igb_xdp_ring_update_tail-under-tx-lo.patch
new file mode 100644 (file)
index 0000000..2a78209
--- /dev/null
@@ -0,0 +1,95 @@
+From 21afe6b03b58f74a36b1d022639b5ea7aaab380e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 22 Aug 2024 09:42:07 +0200
+Subject: igb: Always call igb_xdp_ring_update_tail() under Tx lock
+
+From: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
+
+[ Upstream commit 27717f8b17c098c4373ddb8fe89e1a1899c7779d ]
+
+Always call igb_xdp_ring_update_tail() under __netif_tx_lock, add a comment
+and lockdep assert to indicate that. This is needed to share the same TX
+ring between XDP, XSK and slow paths. Furthermore, the current XDP
+implementation is racy on tail updates.
+
+Fixes: 9cbc948b5a20 ("igb: add XDP support")
+Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
+[Kurt: Add lockdep assert and fixes tag]
+Signed-off-by: Kurt Kanzenbach <kurt@linutronix.de>
+Acked-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
+Tested-by: George Kuruvinakunnel <george.kuruvinakunnel@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/igb/igb_main.c | 17 +++++++++++++----
+ 1 file changed, 13 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
+index a27d0a4d3d9c..6dc5c11aebbd 100644
+--- a/drivers/net/ethernet/intel/igb/igb_main.c
++++ b/drivers/net/ethernet/intel/igb/igb_main.c
+@@ -33,6 +33,7 @@
+ #include <linux/bpf_trace.h>
+ #include <linux/pm_runtime.h>
+ #include <linux/etherdevice.h>
++#include <linux/lockdep.h>
+ #ifdef CONFIG_IGB_DCA
+ #include <linux/dca.h>
+ #endif
+@@ -2915,8 +2916,11 @@ static int igb_xdp(struct net_device *dev, struct netdev_bpf *xdp)
+       }
+ }
++/* This function assumes __netif_tx_lock is held by the caller. */
+ static void igb_xdp_ring_update_tail(struct igb_ring *ring)
+ {
++      lockdep_assert_held(&txring_txq(ring)->_xmit_lock);
++
+       /* Force memory writes to complete before letting h/w know there
+        * are new descriptors to fetch.
+        */
+@@ -3001,11 +3005,11 @@ static int igb_xdp_xmit(struct net_device *dev, int n,
+               nxmit++;
+       }
+-      __netif_tx_unlock(nq);
+-
+       if (unlikely(flags & XDP_XMIT_FLUSH))
+               igb_xdp_ring_update_tail(tx_ring);
++      __netif_tx_unlock(nq);
++
+       return nxmit;
+ }
+@@ -8865,12 +8869,14 @@ static void igb_put_rx_buffer(struct igb_ring *rx_ring,
+ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
+ {
++      unsigned int total_bytes = 0, total_packets = 0;
+       struct igb_adapter *adapter = q_vector->adapter;
+       struct igb_ring *rx_ring = q_vector->rx.ring;
+-      struct sk_buff *skb = rx_ring->skb;
+-      unsigned int total_bytes = 0, total_packets = 0;
+       u16 cleaned_count = igb_desc_unused(rx_ring);
++      struct sk_buff *skb = rx_ring->skb;
++      int cpu = smp_processor_id();
+       unsigned int xdp_xmit = 0;
++      struct netdev_queue *nq;
+       struct xdp_buff xdp;
+       u32 frame_sz = 0;
+       int rx_buf_pgcnt;
+@@ -8998,7 +9004,10 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
+       if (xdp_xmit & IGB_XDP_TX) {
+               struct igb_ring *tx_ring = igb_xdp_tx_queue_mapping(adapter);
++              nq = txring_txq(tx_ring);
++              __netif_tx_lock(nq, cpu);
+               igb_xdp_ring_update_tail(tx_ring);
++              __netif_tx_unlock(nq);
+       }
+       u64_stats_update_begin(&rx_ring->rx_syncp);
+-- 
+2.43.0
+
diff --git a/queue-6.10/net-dpaa-pad-packets-to-eth_zlen.patch b/queue-6.10/net-dpaa-pad-packets-to-eth_zlen.patch
new file mode 100644 (file)
index 0000000..0a1c6d0
--- /dev/null
@@ -0,0 +1,62 @@
+From 172b42b5a25aad818dcdaec096b5a9ff18f521e4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 10 Sep 2024 10:31:44 -0400
+Subject: net: dpaa: Pad packets to ETH_ZLEN
+
+From: Sean Anderson <sean.anderson@linux.dev>
+
+[ Upstream commit cbd7ec083413c6a2e0c326d49e24ec7d12c7a9e0 ]
+
+When sending packets under 60 bytes, up to three bytes of the buffer
+following the data may be leaked. Avoid this by extending all packets to
+ETH_ZLEN, ensuring nothing is leaked in the padding. This bug can be
+reproduced by running
+
+       $ ping -s 11 destination
+
+Fixes: 9ad1a3749333 ("dpaa_eth: add support for DPAA Ethernet")
+Suggested-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Sean Anderson <sean.anderson@linux.dev>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Link: https://patch.msgid.link/20240910143144.1439910-1-sean.anderson@linux.dev
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/freescale/dpaa/dpaa_eth.c | 9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
+index 946c3d3b69d9..669fb5804d3b 100644
+--- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
++++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
+@@ -2285,12 +2285,12 @@ static netdev_tx_t
+ dpaa_start_xmit(struct sk_buff *skb, struct net_device *net_dev)
+ {
+       const int queue_mapping = skb_get_queue_mapping(skb);
+-      bool nonlinear = skb_is_nonlinear(skb);
+       struct rtnl_link_stats64 *percpu_stats;
+       struct dpaa_percpu_priv *percpu_priv;
+       struct netdev_queue *txq;
+       struct dpaa_priv *priv;
+       struct qm_fd fd;
++      bool nonlinear;
+       int offset = 0;
+       int err = 0;
+@@ -2300,6 +2300,13 @@ dpaa_start_xmit(struct sk_buff *skb, struct net_device *net_dev)
+       qm_fd_clear_fd(&fd);
++      /* Packet data is always read as 32-bit words, so zero out any part of
++       * the skb which might be sent if we have to pad the packet
++       */
++      if (__skb_put_padto(skb, ETH_ZLEN, false))
++              goto enomem;
++
++      nonlinear = skb_is_nonlinear(skb);
+       if (!nonlinear) {
+               /* We're going to store the skb backpointer at the beginning
+                * of the data buffer, so we need a privately owned skb
+-- 
+2.43.0
+
diff --git a/queue-6.10/net-dsa-felix-ignore-pending-status-of-tas-module-wh.patch b/queue-6.10/net-dsa-felix-ignore-pending-status-of-tas-module-wh.patch
new file mode 100644 (file)
index 0000000..c892206
--- /dev/null
@@ -0,0 +1,59 @@
+From c29a0b92862994b714550e84bc062a05a02e83fe Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 6 Sep 2024 17:35:50 +0800
+Subject: net: dsa: felix: ignore pending status of TAS module when it's
+ disabled
+
+From: Xiaoliang Yang <xiaoliang.yang_1@nxp.com>
+
+[ Upstream commit 70654f4c212e83898feced125d91ebb3695950d8 ]
+
+The TAS module could not be configured when it's running in pending
+status. We need disable the module and configure it again. However, the
+pending status is not cleared after the module disabled. TC taprio set
+will always return busy even it's disabled.
+
+For example, a user uses tc-taprio to configure Qbv and a future
+basetime. The TAS module will run in a pending status. There is no way
+to reconfigure Qbv, it always returns busy.
+
+Actually the TAS module can be reconfigured when it's disabled. So it
+doesn't need to check the pending status if the TAS module is disabled.
+
+After the patch, user can delete the tc taprio configuration to disable
+Qbv and reconfigure it again.
+
+Fixes: de143c0e274b ("net: dsa: felix: Configure Time-Aware Scheduler via taprio offload")
+Signed-off-by: Xiaoliang Yang <xiaoliang.yang_1@nxp.com>
+Link: https://patch.msgid.link/20240906093550.29985-1-xiaoliang.yang_1@nxp.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/dsa/ocelot/felix_vsc9959.c | 11 +++++++----
+ 1 file changed, 7 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c
+index 85952d841f28..bd061997618d 100644
+--- a/drivers/net/dsa/ocelot/felix_vsc9959.c
++++ b/drivers/net/dsa/ocelot/felix_vsc9959.c
+@@ -1474,10 +1474,13 @@ static int vsc9959_qos_port_tas_set(struct ocelot *ocelot, int port,
+       /* Hardware errata -  Admin config could not be overwritten if
+        * config is pending, need reset the TAS module
+        */
+-      val = ocelot_read(ocelot, QSYS_PARAM_STATUS_REG_8);
+-      if (val & QSYS_PARAM_STATUS_REG_8_CONFIG_PENDING) {
+-              ret = -EBUSY;
+-              goto err_reset_tc;
++      val = ocelot_read_rix(ocelot, QSYS_TAG_CONFIG, port);
++      if (val & QSYS_TAG_CONFIG_ENABLE) {
++              val = ocelot_read(ocelot, QSYS_PARAM_STATUS_REG_8);
++              if (val & QSYS_PARAM_STATUS_REG_8_CONFIG_PENDING) {
++                      ret = -EBUSY;
++                      goto err_reset_tc;
++              }
+       }
+       ocelot_rmw_rix(ocelot,
+-- 
+2.43.0
+
diff --git a/queue-6.10/net-ftgmac100-enable-tx-interrupt-to-avoid-tx-timeou.patch b/queue-6.10/net-ftgmac100-enable-tx-interrupt-to-avoid-tx-timeou.patch
new file mode 100644 (file)
index 0000000..0611a44
--- /dev/null
@@ -0,0 +1,69 @@
+From 2d2594d912eae09fef8158d8a3855652bd21ba0c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 6 Sep 2024 14:28:31 +0800
+Subject: net: ftgmac100: Enable TX interrupt to avoid TX timeout
+
+From: Jacky Chou <jacky_chou@aspeedtech.com>
+
+[ Upstream commit fef2843bb49f414d1523ca007d088071dee0e055 ]
+
+Currently, the driver only enables RX interrupt to handle RX
+packets and TX resources. Sometimes there is not RX traffic,
+so the TX resource needs to wait for RX interrupt to free.
+This situation will toggle the TX timeout watchdog when the MAC
+TX ring has no more resources to transmit packets.
+Therefore, enable TX interrupt to release TX resources at any time.
+
+When I am verifying iperf3 over UDP, the network hangs.
+Like the log below.
+
+root# iperf3 -c 192.168.100.100 -i1 -t10 -u -b0
+Connecting to host 192.168.100.100, port 5201
+[  4] local 192.168.100.101 port 35773 connected to 192.168.100.100 port 5201
+[ ID] Interval           Transfer     Bandwidth       Total Datagrams
+[  4]   0.00-20.42  sec   160 KBytes  64.2 Kbits/sec  20
+[  4]  20.42-20.42  sec  0.00 Bytes  0.00 bits/sec  0
+[  4]  20.42-20.42  sec  0.00 Bytes  0.00 bits/sec  0
+[  4]  20.42-20.42  sec  0.00 Bytes  0.00 bits/sec  0
+[  4]  20.42-20.42  sec  0.00 Bytes  0.00 bits/sec  0
+[  4]  20.42-20.42  sec  0.00 Bytes  0.00 bits/sec  0
+[  4]  20.42-20.42  sec  0.00 Bytes  0.00 bits/sec  0
+[  4]  20.42-20.42  sec  0.00 Bytes  0.00 bits/sec  0
+[  4]  20.42-20.42  sec  0.00 Bytes  0.00 bits/sec  0
+[  4]  20.42-20.42  sec  0.00 Bytes  0.00 bits/sec  0
+- - - - - - - - - - - - - - - - - - - - - - - - -
+[ ID] Interval          Transfer    Bandwidth      Jitter   Lost/Total Datagrams
+[  4]   0.00-20.42  sec  160 KBytes 64.2 Kbits/sec 0.000 ms 0/20 (0%)
+[  4] Sent 20 datagrams
+iperf3: error - the server has terminated
+
+The network topology is FTGMAC connects directly to a PC.
+UDP does not need to wait for ACK, unlike TCP.
+Therefore, FTGMAC needs to enable TX interrupt to release TX resources instead
+of waiting for the RX interrupt.
+
+Fixes: 10cbd6407609 ("ftgmac100: Rework NAPI & interrupts handling")
+Signed-off-by: Jacky Chou <jacky_chou@aspeedtech.com>
+Link: https://patch.msgid.link/20240906062831.2243399-1-jacky_chou@aspeedtech.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/faraday/ftgmac100.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/faraday/ftgmac100.h b/drivers/net/ethernet/faraday/ftgmac100.h
+index 63b3e02fab16..4968f6f0bdbc 100644
+--- a/drivers/net/ethernet/faraday/ftgmac100.h
++++ b/drivers/net/ethernet/faraday/ftgmac100.h
+@@ -84,7 +84,7 @@
+                           FTGMAC100_INT_RPKT_BUF)
+ /* All the interrupts we care about */
+-#define FTGMAC100_INT_ALL (FTGMAC100_INT_RPKT_BUF  |  \
++#define FTGMAC100_INT_ALL (FTGMAC100_INT_RXTX  |  \
+                          FTGMAC100_INT_BAD)
+ /*
+-- 
+2.43.0
+
diff --git a/queue-6.10/net-hsr-prevent-null-pointer-dereference-in-hsr_prox.patch b/queue-6.10/net-hsr-prevent-null-pointer-dereference-in-hsr_prox.patch
new file mode 100644 (file)
index 0000000..d16f504
--- /dev/null
@@ -0,0 +1,55 @@
+From 20662849c16c34f7437970c17e513ee4dcd16b81 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 8 Sep 2024 04:03:41 +0900
+Subject: net: hsr: prevent NULL pointer dereference in hsr_proxy_announce()
+
+From: Jeongjun Park <aha310510@gmail.com>
+
+[ Upstream commit a7789fd4caaf96ecfed5e28c4cddb927e6bebadb ]
+
+In the function hsr_proxy_annouance() added in the previous commit
+5f703ce5c981 ("net: hsr: Send supervisory frames to HSR network
+with ProxyNodeTable data"), the return value of the hsr_port_get_hsr()
+function is not checked to be a NULL pointer, which causes a NULL
+pointer dereference.
+
+To solve this, we need to add code to check whether the return value
+of hsr_port_get_hsr() is NULL.
+
+Reported-by: syzbot+02a42d9b1bd395cbcab4@syzkaller.appspotmail.com
+Fixes: 5f703ce5c981 ("net: hsr: Send supervisory frames to HSR network with ProxyNodeTable data")
+Signed-off-by: Jeongjun Park <aha310510@gmail.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Acked-by: Lukasz Majewski <lukma@denx.de>
+Link: https://patch.msgid.link/20240907190341.162289-1-aha310510@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/hsr/hsr_device.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
+index ac56784c327c..049e22bdaafb 100644
+--- a/net/hsr/hsr_device.c
++++ b/net/hsr/hsr_device.c
+@@ -414,6 +414,9 @@ static void hsr_proxy_announce(struct timer_list *t)
+        * of SAN nodes stored in ProxyNodeTable.
+        */
+       interlink = hsr_port_get_hsr(hsr, HSR_PT_INTERLINK);
++      if (!interlink)
++              goto done;
++
+       list_for_each_entry_rcu(node, &hsr->proxy_node_db, mac_list) {
+               if (hsr_addr_is_redbox(hsr, node->macaddress_A))
+                       continue;
+@@ -428,6 +431,7 @@ static void hsr_proxy_announce(struct timer_list *t)
+               mod_timer(&hsr->announce_proxy_timer, jiffies + interval);
+       }
++done:
+       rcu_read_unlock();
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.10/net-hsr-remove-seqnr_lock.patch b/queue-6.10/net-hsr-remove-seqnr_lock.patch
new file mode 100644 (file)
index 0000000..cd7c5a6
--- /dev/null
@@ -0,0 +1,213 @@
+From 2df9e4d98088c7ec91d973a55bb91ccaf4ac72c1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 4 Sep 2024 13:37:25 +0000
+Subject: net: hsr: remove seqnr_lock
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit b3c9e65eb227269ed72a115ba22f4f51b4e62b4d ]
+
+syzbot found a new splat [1].
+
+Instead of adding yet another spin_lock_bh(&hsr->seqnr_lock) /
+spin_unlock_bh(&hsr->seqnr_lock) pair, remove seqnr_lock
+and use atomic_t for hsr->sequence_nr and hsr->sup_sequence_nr.
+
+This also avoid a race in hsr_fill_info().
+
+Also remove interlink_sequence_nr which is unused.
+
+[1]
+ WARNING: CPU: 1 PID: 9723 at net/hsr/hsr_forward.c:602 handle_std_frame+0x247/0x2c0 net/hsr/hsr_forward.c:602
+Modules linked in:
+CPU: 1 UID: 0 PID: 9723 Comm: syz.0.1657 Not tainted 6.11.0-rc6-syzkaller-00026-g88fac17500f4 #0
+Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014
+ RIP: 0010:handle_std_frame+0x247/0x2c0 net/hsr/hsr_forward.c:602
+Code: 49 8d bd b0 01 00 00 be ff ff ff ff e8 e2 58 25 00 31 ff 89 c5 89 c6 e8 47 53 a8 f6 85 ed 0f 85 5a ff ff ff e8 fa 50 a8 f6 90 <0f> 0b 90 e9 4c ff ff ff e8 cc e7 06 f7 e9 8f fe ff ff e8 52 e8 06
+RSP: 0018:ffffc90000598598 EFLAGS: 00010246
+RAX: 0000000000000000 RBX: ffffc90000598670 RCX: ffffffff8ae2c919
+RDX: ffff888024e94880 RSI: ffffffff8ae2c926 RDI: 0000000000000005
+RBP: 0000000000000000 R08: 0000000000000005 R09: 0000000000000000
+R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000003
+R13: ffff8880627a8cc0 R14: 0000000000000000 R15: ffff888012b03c3a
+FS:  0000000000000000(0000) GS:ffff88802b700000(0063) knlGS:00000000f5696b40
+CS:  0010 DS: 002b ES: 002b CR0: 0000000080050033
+CR2: 0000000020010000 CR3: 00000000768b4000 CR4: 0000000000350ef0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+Call Trace:
+ <IRQ>
+  hsr_fill_frame_info+0x2c8/0x360 net/hsr/hsr_forward.c:630
+  fill_frame_info net/hsr/hsr_forward.c:700 [inline]
+  hsr_forward_skb+0x7df/0x25c0 net/hsr/hsr_forward.c:715
+  hsr_handle_frame+0x603/0x850 net/hsr/hsr_slave.c:70
+  __netif_receive_skb_core.constprop.0+0xa3d/0x4330 net/core/dev.c:5555
+  __netif_receive_skb_list_core+0x357/0x950 net/core/dev.c:5737
+  __netif_receive_skb_list net/core/dev.c:5804 [inline]
+  netif_receive_skb_list_internal+0x753/0xda0 net/core/dev.c:5896
+  gro_normal_list include/net/gro.h:515 [inline]
+  gro_normal_list include/net/gro.h:511 [inline]
+  napi_complete_done+0x23f/0x9a0 net/core/dev.c:6247
+  gro_cell_poll+0x162/0x210 net/core/gro_cells.c:66
+  __napi_poll.constprop.0+0xb7/0x550 net/core/dev.c:6772
+  napi_poll net/core/dev.c:6841 [inline]
+  net_rx_action+0xa92/0x1010 net/core/dev.c:6963
+  handle_softirqs+0x216/0x8f0 kernel/softirq.c:554
+  do_softirq kernel/softirq.c:455 [inline]
+  do_softirq+0xb2/0xf0 kernel/softirq.c:442
+ </IRQ>
+ <TASK>
+
+Fixes: 06afd2c31d33 ("hsr: Synchronize sending frames to have always incremented outgoing seq nr.")
+Fixes: f421436a591d ("net/hsr: Add support for the High-availability Seamless Redundancy protocol (HSRv0)")
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/hsr/hsr_device.c  | 35 ++++++++++-------------------------
+ net/hsr/hsr_forward.c |  4 +---
+ net/hsr/hsr_main.h    |  6 ++----
+ net/hsr/hsr_netlink.c |  2 +-
+ 4 files changed, 14 insertions(+), 33 deletions(-)
+
+diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
+index e4cc6b78dcfc..ac56784c327c 100644
+--- a/net/hsr/hsr_device.c
++++ b/net/hsr/hsr_device.c
+@@ -231,9 +231,7 @@ static netdev_tx_t hsr_dev_xmit(struct sk_buff *skb, struct net_device *dev)
+               skb->dev = master->dev;
+               skb_reset_mac_header(skb);
+               skb_reset_mac_len(skb);
+-              spin_lock_bh(&hsr->seqnr_lock);
+               hsr_forward_skb(skb, master);
+-              spin_unlock_bh(&hsr->seqnr_lock);
+       } else {
+               dev_core_stats_tx_dropped_inc(dev);
+               dev_kfree_skb_any(skb);
+@@ -314,14 +312,10 @@ static void send_hsr_supervision_frame(struct hsr_port *port,
+       set_hsr_stag_HSR_ver(hsr_stag, hsr->prot_version);
+       /* From HSRv1 on we have separate supervision sequence numbers. */
+-      spin_lock_bh(&hsr->seqnr_lock);
+-      if (hsr->prot_version > 0) {
+-              hsr_stag->sequence_nr = htons(hsr->sup_sequence_nr);
+-              hsr->sup_sequence_nr++;
+-      } else {
+-              hsr_stag->sequence_nr = htons(hsr->sequence_nr);
+-              hsr->sequence_nr++;
+-      }
++      if (hsr->prot_version > 0)
++              hsr_stag->sequence_nr = htons(atomic_inc_return(&hsr->sup_sequence_nr));
++      else
++              hsr_stag->sequence_nr = htons(atomic_inc_return(&hsr->sequence_nr));
+       hsr_stag->tlv.HSR_TLV_type = type;
+       /* TODO: Why 12 in HSRv0? */
+@@ -343,13 +337,11 @@ static void send_hsr_supervision_frame(struct hsr_port *port,
+               ether_addr_copy(hsr_sp->macaddress_A, hsr->macaddress_redbox);
+       }
+-      if (skb_put_padto(skb, ETH_ZLEN)) {
+-              spin_unlock_bh(&hsr->seqnr_lock);
++      if (skb_put_padto(skb, ETH_ZLEN))
+               return;
+-      }
+       hsr_forward_skb(skb, port);
+-      spin_unlock_bh(&hsr->seqnr_lock);
++
+       return;
+ }
+@@ -374,9 +366,7 @@ static void send_prp_supervision_frame(struct hsr_port *master,
+       set_hsr_stag_HSR_ver(hsr_stag, (hsr->prot_version ? 1 : 0));
+       /* From HSRv1 on we have separate supervision sequence numbers. */
+-      spin_lock_bh(&hsr->seqnr_lock);
+-      hsr_stag->sequence_nr = htons(hsr->sup_sequence_nr);
+-      hsr->sup_sequence_nr++;
++      hsr_stag->sequence_nr = htons(atomic_inc_return(&hsr->sup_sequence_nr));
+       hsr_stag->tlv.HSR_TLV_type = PRP_TLV_LIFE_CHECK_DD;
+       hsr_stag->tlv.HSR_TLV_length = sizeof(struct hsr_sup_payload);
+@@ -384,13 +374,10 @@ static void send_prp_supervision_frame(struct hsr_port *master,
+       hsr_sp = skb_put(skb, sizeof(struct hsr_sup_payload));
+       ether_addr_copy(hsr_sp->macaddress_A, master->dev->dev_addr);
+-      if (skb_put_padto(skb, ETH_ZLEN)) {
+-              spin_unlock_bh(&hsr->seqnr_lock);
++      if (skb_put_padto(skb, ETH_ZLEN))
+               return;
+-      }
+       hsr_forward_skb(skb, master);
+-      spin_unlock_bh(&hsr->seqnr_lock);
+ }
+ /* Announce (supervision frame) timer function
+@@ -621,11 +608,9 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
+       if (res < 0)
+               return res;
+-      spin_lock_init(&hsr->seqnr_lock);
+       /* Overflow soon to find bugs easier: */
+-      hsr->sequence_nr = HSR_SEQNR_START;
+-      hsr->sup_sequence_nr = HSR_SUP_SEQNR_START;
+-      hsr->interlink_sequence_nr = HSR_SEQNR_START;
++      atomic_set(&hsr->sequence_nr, HSR_SEQNR_START);
++      atomic_set(&hsr->sup_sequence_nr, HSR_SUP_SEQNR_START);
+       timer_setup(&hsr->announce_timer, hsr_announce, 0);
+       timer_setup(&hsr->prune_timer, hsr_prune_nodes, 0);
+diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c
+index 960ef386bc3a..9254037e9436 100644
+--- a/net/hsr/hsr_forward.c
++++ b/net/hsr/hsr_forward.c
+@@ -599,9 +599,7 @@ static void handle_std_frame(struct sk_buff *skb,
+       if (port->type == HSR_PT_MASTER ||
+           port->type == HSR_PT_INTERLINK) {
+               /* Sequence nr for the master/interlink node */
+-              lockdep_assert_held(&hsr->seqnr_lock);
+-              frame->sequence_nr = hsr->sequence_nr;
+-              hsr->sequence_nr++;
++              frame->sequence_nr = atomic_inc_return(&hsr->sequence_nr);
+       }
+ }
+diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h
+index ab1f8d35d9dc..6f7bbf01f3e4 100644
+--- a/net/hsr/hsr_main.h
++++ b/net/hsr/hsr_main.h
+@@ -202,11 +202,9 @@ struct hsr_priv {
+       struct timer_list       prune_timer;
+       struct timer_list       prune_proxy_timer;
+       int announce_count;
+-      u16 sequence_nr;
+-      u16 interlink_sequence_nr; /* Interlink port seq_nr */
+-      u16 sup_sequence_nr;    /* For HSRv1 separate seq_nr for supervision */
++      atomic_t sequence_nr;
++      atomic_t sup_sequence_nr;       /* For HSRv1 separate seq_nr for supervision */
+       enum hsr_version prot_version;  /* Indicate if HSRv0, HSRv1 or PRPv1 */
+-      spinlock_t seqnr_lock;  /* locking for sequence_nr */
+       spinlock_t list_lock;   /* locking for node list */
+       struct hsr_proto_ops    *proto_ops;
+ #define PRP_LAN_ID    0x5     /* 0x1010 for A and 0x1011 for B. Bit 0 is set
+diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c
+index f6ff0b61e08a..8aea4ff5f49e 100644
+--- a/net/hsr/hsr_netlink.c
++++ b/net/hsr/hsr_netlink.c
+@@ -163,7 +163,7 @@ static int hsr_fill_info(struct sk_buff *skb, const struct net_device *dev)
+       if (nla_put(skb, IFLA_HSR_SUPERVISION_ADDR, ETH_ALEN,
+                   hsr->sup_multicast_addr) ||
+-          nla_put_u16(skb, IFLA_HSR_SEQ_NR, hsr->sequence_nr))
++          nla_put_u16(skb, IFLA_HSR_SEQ_NR, atomic_read(&hsr->sequence_nr)))
+               goto nla_put_failure;
+       if (hsr->prot_version == PRP_V1)
+               proto = HSR_PROTOCOL_PRP;
+-- 
+2.43.0
+
diff --git a/queue-6.10/net-hsr-send-supervisory-frames-to-hsr-network-with-.patch b/queue-6.10/net-hsr-send-supervisory-frames-to-hsr-network-with-.patch
new file mode 100644 (file)
index 0000000..04a3b99
--- /dev/null
@@ -0,0 +1,324 @@
+From 4f76ab73b1896f96c51bc1cd5c80832fc61cd243 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 10 Jun 2024 15:39:14 +0200
+Subject: net: hsr: Send supervisory frames to HSR network with ProxyNodeTable
+ data
+
+From: Lukasz Majewski <lukma@denx.de>
+
+[ Upstream commit 5f703ce5c981ee02c00e210d5b155bbbfbf11263 ]
+
+This patch provides support for sending supervision HSR frames with
+MAC addresses stored in ProxyNodeTable when RedBox (i.e. HSR-SAN) is
+enabled.
+
+Supervision frames with RedBox MAC address (appended as second TLV)
+are only send for ProxyNodeTable nodes.
+
+This patch series shall be tested with hsr_redbox.sh script.
+
+Signed-off-by: Lukasz Majewski <lukma@denx.de>
+Reviewed-by: Wojciech Drewek <wojciech.drewek@intel.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: b3c9e65eb227 ("net: hsr: remove seqnr_lock")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/hsr/hsr_device.c   | 63 ++++++++++++++++++++++++++++++++++--------
+ net/hsr/hsr_forward.c  | 37 +++++++++++++++++++++++--
+ net/hsr/hsr_framereg.c | 12 ++++++++
+ net/hsr/hsr_framereg.h |  2 ++
+ net/hsr/hsr_main.h     |  4 ++-
+ net/hsr/hsr_netlink.c  |  1 +
+ 6 files changed, 105 insertions(+), 14 deletions(-)
+
+diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
+index e6904288d40d..e4cc6b78dcfc 100644
+--- a/net/hsr/hsr_device.c
++++ b/net/hsr/hsr_device.c
+@@ -73,9 +73,15 @@ static void hsr_check_announce(struct net_device *hsr_dev)
+                       mod_timer(&hsr->announce_timer, jiffies +
+                                 msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL));
+               }
++
++              if (hsr->redbox && !timer_pending(&hsr->announce_proxy_timer))
++                      mod_timer(&hsr->announce_proxy_timer, jiffies +
++                                msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL) / 2);
+       } else {
+               /* Deactivate the announce timer  */
+               timer_delete(&hsr->announce_timer);
++              if (hsr->redbox)
++                      timer_delete(&hsr->announce_proxy_timer);
+       }
+ }
+@@ -279,10 +285,11 @@ static struct sk_buff *hsr_init_skb(struct hsr_port *master)
+       return NULL;
+ }
+-static void send_hsr_supervision_frame(struct hsr_port *master,
+-                                     unsigned long *interval)
++static void send_hsr_supervision_frame(struct hsr_port *port,
++                                     unsigned long *interval,
++                                     const unsigned char *addr)
+ {
+-      struct hsr_priv *hsr = master->hsr;
++      struct hsr_priv *hsr = port->hsr;
+       __u8 type = HSR_TLV_LIFE_CHECK;
+       struct hsr_sup_payload *hsr_sp;
+       struct hsr_sup_tlv *hsr_stlv;
+@@ -296,9 +303,9 @@ static void send_hsr_supervision_frame(struct hsr_port *master,
+               hsr->announce_count++;
+       }
+-      skb = hsr_init_skb(master);
++      skb = hsr_init_skb(port);
+       if (!skb) {
+-              netdev_warn_once(master->dev, "HSR: Could not send supervision frame\n");
++              netdev_warn_once(port->dev, "HSR: Could not send supervision frame\n");
+               return;
+       }
+@@ -321,11 +328,12 @@ static void send_hsr_supervision_frame(struct hsr_port *master,
+       hsr_stag->tlv.HSR_TLV_length = hsr->prot_version ?
+                               sizeof(struct hsr_sup_payload) : 12;
+-      /* Payload: MacAddressA */
++      /* Payload: MacAddressA / SAN MAC from ProxyNodeTable */
+       hsr_sp = skb_put(skb, sizeof(struct hsr_sup_payload));
+-      ether_addr_copy(hsr_sp->macaddress_A, master->dev->dev_addr);
++      ether_addr_copy(hsr_sp->macaddress_A, addr);
+-      if (hsr->redbox) {
++      if (hsr->redbox &&
++          hsr_is_node_in_db(&hsr->proxy_node_db, addr)) {
+               hsr_stlv = skb_put(skb, sizeof(struct hsr_sup_tlv));
+               hsr_stlv->HSR_TLV_type = PRP_TLV_REDBOX_MAC;
+               hsr_stlv->HSR_TLV_length = sizeof(struct hsr_sup_payload);
+@@ -340,13 +348,14 @@ static void send_hsr_supervision_frame(struct hsr_port *master,
+               return;
+       }
+-      hsr_forward_skb(skb, master);
++      hsr_forward_skb(skb, port);
+       spin_unlock_bh(&hsr->seqnr_lock);
+       return;
+ }
+ static void send_prp_supervision_frame(struct hsr_port *master,
+-                                     unsigned long *interval)
++                                     unsigned long *interval,
++                                     const unsigned char *addr)
+ {
+       struct hsr_priv *hsr = master->hsr;
+       struct hsr_sup_payload *hsr_sp;
+@@ -396,7 +405,7 @@ static void hsr_announce(struct timer_list *t)
+       rcu_read_lock();
+       master = hsr_port_get_hsr(hsr, HSR_PT_MASTER);
+-      hsr->proto_ops->send_sv_frame(master, &interval);
++      hsr->proto_ops->send_sv_frame(master, &interval, master->dev->dev_addr);
+       if (is_admin_up(master->dev))
+               mod_timer(&hsr->announce_timer, jiffies + interval);
+@@ -404,6 +413,37 @@ static void hsr_announce(struct timer_list *t)
+       rcu_read_unlock();
+ }
++/* Announce (supervision frame) timer function for RedBox
++ */
++static void hsr_proxy_announce(struct timer_list *t)
++{
++      struct hsr_priv *hsr = from_timer(hsr, t, announce_proxy_timer);
++      struct hsr_port *interlink;
++      unsigned long interval = 0;
++      struct hsr_node *node;
++
++      rcu_read_lock();
++      /* RedBOX sends supervisory frames to HSR network with MAC addresses
++       * of SAN nodes stored in ProxyNodeTable.
++       */
++      interlink = hsr_port_get_hsr(hsr, HSR_PT_INTERLINK);
++      list_for_each_entry_rcu(node, &hsr->proxy_node_db, mac_list) {
++              if (hsr_addr_is_redbox(hsr, node->macaddress_A))
++                      continue;
++              hsr->proto_ops->send_sv_frame(interlink, &interval,
++                                            node->macaddress_A);
++      }
++
++      if (is_admin_up(interlink->dev)) {
++              if (!interval)
++                      interval = msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL);
++
++              mod_timer(&hsr->announce_proxy_timer, jiffies + interval);
++      }
++
++      rcu_read_unlock();
++}
++
+ void hsr_del_ports(struct hsr_priv *hsr)
+ {
+       struct hsr_port *port;
+@@ -590,6 +630,7 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
+       timer_setup(&hsr->announce_timer, hsr_announce, 0);
+       timer_setup(&hsr->prune_timer, hsr_prune_nodes, 0);
+       timer_setup(&hsr->prune_proxy_timer, hsr_prune_proxy_nodes, 0);
++      timer_setup(&hsr->announce_proxy_timer, hsr_proxy_announce, 0);
+       ether_addr_copy(hsr->sup_multicast_addr, def_multicast_addr);
+       hsr->sup_multicast_addr[ETH_ALEN - 1] = multicast_spec;
+diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c
+index 05a61b8286ec..960ef386bc3a 100644
+--- a/net/hsr/hsr_forward.c
++++ b/net/hsr/hsr_forward.c
+@@ -117,6 +117,35 @@ static bool is_supervision_frame(struct hsr_priv *hsr, struct sk_buff *skb)
+       return true;
+ }
++static bool is_proxy_supervision_frame(struct hsr_priv *hsr,
++                                     struct sk_buff *skb)
++{
++      struct hsr_sup_payload *payload;
++      struct ethhdr *eth_hdr;
++      u16 total_length = 0;
++
++      eth_hdr = (struct ethhdr *)skb_mac_header(skb);
++
++      /* Get the HSR protocol revision. */
++      if (eth_hdr->h_proto == htons(ETH_P_HSR))
++              total_length = sizeof(struct hsrv1_ethhdr_sp);
++      else
++              total_length = sizeof(struct hsrv0_ethhdr_sp);
++
++      if (!pskb_may_pull(skb, total_length + sizeof(struct hsr_sup_payload)))
++              return false;
++
++      skb_pull(skb, total_length);
++      payload = (struct hsr_sup_payload *)skb->data;
++      skb_push(skb, total_length);
++
++      /* For RedBox (HSR-SAN) check if we have received the supervision
++       * frame with MAC addresses from own ProxyNodeTable.
++       */
++      return hsr_is_node_in_db(&hsr->proxy_node_db,
++                               payload->macaddress_A);
++}
++
+ static struct sk_buff *create_stripped_skb_hsr(struct sk_buff *skb_in,
+                                              struct hsr_frame_info *frame)
+ {
+@@ -499,7 +528,8 @@ static void hsr_forward_do(struct hsr_frame_info *frame)
+                                          frame->sequence_nr))
+                       continue;
+-              if (frame->is_supervision && port->type == HSR_PT_MASTER) {
++              if (frame->is_supervision && port->type == HSR_PT_MASTER &&
++                  !frame->is_proxy_supervision) {
+                       hsr_handle_sup_frame(frame);
+                       continue;
+               }
+@@ -637,6 +667,9 @@ static int fill_frame_info(struct hsr_frame_info *frame,
+       memset(frame, 0, sizeof(*frame));
+       frame->is_supervision = is_supervision_frame(port->hsr, skb);
++      if (frame->is_supervision && hsr->redbox)
++              frame->is_proxy_supervision =
++                      is_proxy_supervision_frame(port->hsr, skb);
+       n_db = &hsr->node_db;
+       if (port->type == HSR_PT_INTERLINK)
+@@ -688,7 +721,7 @@ void hsr_forward_skb(struct sk_buff *skb, struct hsr_port *port)
+       /* Gets called for ingress frames as well as egress from master port.
+        * So check and increment stats for master port only here.
+        */
+-      if (port->type == HSR_PT_MASTER) {
++      if (port->type == HSR_PT_MASTER || port->type == HSR_PT_INTERLINK) {
+               port->dev->stats.tx_packets++;
+               port->dev->stats.tx_bytes += skb->len;
+       }
+diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c
+index 614df9649794..73bc6f659812 100644
+--- a/net/hsr/hsr_framereg.c
++++ b/net/hsr/hsr_framereg.c
+@@ -36,6 +36,14 @@ static bool seq_nr_after(u16 a, u16 b)
+ #define seq_nr_before(a, b)           seq_nr_after((b), (a))
+ #define seq_nr_before_or_eq(a, b)     (!seq_nr_after((a), (b)))
++bool hsr_addr_is_redbox(struct hsr_priv *hsr, unsigned char *addr)
++{
++      if (!hsr->redbox || !is_valid_ether_addr(hsr->macaddress_redbox))
++              return false;
++
++      return ether_addr_equal(addr, hsr->macaddress_redbox);
++}
++
+ bool hsr_addr_is_self(struct hsr_priv *hsr, unsigned char *addr)
+ {
+       struct hsr_self_node *sn;
+@@ -591,6 +599,10 @@ void hsr_prune_proxy_nodes(struct timer_list *t)
+       spin_lock_bh(&hsr->list_lock);
+       list_for_each_entry_safe(node, tmp, &hsr->proxy_node_db, mac_list) {
++              /* Don't prune RedBox node. */
++              if (hsr_addr_is_redbox(hsr, node->macaddress_A))
++                      continue;
++
+               timestamp = node->time_in[HSR_PT_INTERLINK];
+               /* Prune old entries */
+diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h
+index 7619e31c1d2d..993fa950d814 100644
+--- a/net/hsr/hsr_framereg.h
++++ b/net/hsr/hsr_framereg.h
+@@ -22,6 +22,7 @@ struct hsr_frame_info {
+       struct hsr_node *node_src;
+       u16 sequence_nr;
+       bool is_supervision;
++      bool is_proxy_supervision;
+       bool is_vlan;
+       bool is_local_dest;
+       bool is_local_exclusive;
+@@ -35,6 +36,7 @@ struct hsr_node *hsr_get_node(struct hsr_port *port, struct list_head *node_db,
+                             enum hsr_port_type rx_port);
+ void hsr_handle_sup_frame(struct hsr_frame_info *frame);
+ bool hsr_addr_is_self(struct hsr_priv *hsr, unsigned char *addr);
++bool hsr_addr_is_redbox(struct hsr_priv *hsr, unsigned char *addr);
+ void hsr_addr_subst_source(struct hsr_node *node, struct sk_buff *skb);
+ void hsr_addr_subst_dest(struct hsr_node *node_src, struct sk_buff *skb,
+diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h
+index 23850b16d1ea..ab1f8d35d9dc 100644
+--- a/net/hsr/hsr_main.h
++++ b/net/hsr/hsr_main.h
+@@ -170,7 +170,8 @@ struct hsr_node;
+ struct hsr_proto_ops {
+       /* format and send supervision frame */
+-      void (*send_sv_frame)(struct hsr_port *port, unsigned long *interval);
++      void (*send_sv_frame)(struct hsr_port *port, unsigned long *interval,
++                            const unsigned char addr[ETH_ALEN]);
+       void (*handle_san_frame)(bool san, enum hsr_port_type port,
+                                struct hsr_node *node);
+       bool (*drop_frame)(struct hsr_frame_info *frame, struct hsr_port *port);
+@@ -197,6 +198,7 @@ struct hsr_priv {
+       struct list_head        proxy_node_db;  /* RedBox HSR proxy nodes */
+       struct hsr_self_node    __rcu *self_node;       /* MACs of slaves */
+       struct timer_list       announce_timer; /* Supervision frame dispatch */
++      struct timer_list       announce_proxy_timer;
+       struct timer_list       prune_timer;
+       struct timer_list       prune_proxy_timer;
+       int announce_count;
+diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c
+index 898f18c6da53..f6ff0b61e08a 100644
+--- a/net/hsr/hsr_netlink.c
++++ b/net/hsr/hsr_netlink.c
+@@ -131,6 +131,7 @@ static void hsr_dellink(struct net_device *dev, struct list_head *head)
+       del_timer_sync(&hsr->prune_timer);
+       del_timer_sync(&hsr->prune_proxy_timer);
+       del_timer_sync(&hsr->announce_timer);
++      timer_delete_sync(&hsr->announce_proxy_timer);
+       hsr_debugfs_term(hsr);
+       hsr_del_ports(hsr);
+-- 
+2.43.0
+
diff --git a/queue-6.10/net-mlx5-add-missing-masks-and-qos-bit-masks-for-sch.patch b/queue-6.10/net-mlx5-add-missing-masks-and-qos-bit-masks-for-sch.patch
new file mode 100644 (file)
index 0000000..062dead
--- /dev/null
@@ -0,0 +1,62 @@
+From 6ea723bd0b1fab76d60b4238fba4f75f69dbb5f4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 5 Aug 2024 10:03:20 +0300
+Subject: net/mlx5: Add missing masks and QoS bit masks for scheduling elements
+
+From: Carolina Jubran <cjubran@nvidia.com>
+
+[ Upstream commit 452ef7f86036392005940de54228d42ca0044192 ]
+
+Add the missing masks for supported element types and Transmit
+Scheduling Arbiter (TSAR) types in scheduling elements.
+
+Also, add the corresponding bit masks for these types in the QoS
+capabilities of a NIC scheduler.
+
+Fixes: 214baf22870c ("net/mlx5e: Support HTB offload")
+Signed-off-by: Carolina Jubran <cjubran@nvidia.com>
+Reviewed-by: Cosmin Ratiu <cratiu@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/mlx5/mlx5_ifc.h | 10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
+index d45bfb7cf81d..d4dd7e2d8ffe 100644
+--- a/include/linux/mlx5/mlx5_ifc.h
++++ b/include/linux/mlx5/mlx5_ifc.h
+@@ -1027,7 +1027,8 @@ struct mlx5_ifc_qos_cap_bits {
+       u8         max_tsar_bw_share[0x20];
+-      u8         reserved_at_100[0x20];
++      u8         nic_element_type[0x10];
++      u8         nic_tsar_type[0x10];
+       u8         reserved_at_120[0x3];
+       u8         log_meter_aso_granularity[0x5];
+@@ -3916,6 +3917,7 @@ enum {
+       ELEMENT_TYPE_CAP_MASK_VPORT             = 1 << 1,
+       ELEMENT_TYPE_CAP_MASK_VPORT_TC          = 1 << 2,
+       ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC     = 1 << 3,
++      ELEMENT_TYPE_CAP_MASK_QUEUE_GROUP       = 1 << 4,
+ };
+ struct mlx5_ifc_scheduling_context_bits {
+@@ -4623,6 +4625,12 @@ enum {
+       TSAR_ELEMENT_TSAR_TYPE_ETS = 0x2,
+ };
++enum {
++      TSAR_TYPE_CAP_MASK_DWRR         = 1 << 0,
++      TSAR_TYPE_CAP_MASK_ROUND_ROBIN  = 1 << 1,
++      TSAR_TYPE_CAP_MASK_ETS          = 1 << 2,
++};
++
+ struct mlx5_ifc_tsar_element_bits {
+       u8         reserved_at_0[0x8];
+       u8         tsar_type[0x8];
+-- 
+2.43.0
+
diff --git a/queue-6.10/net-mlx5-correct-tasr-typo-into-tsar.patch b/queue-6.10/net-mlx5-correct-tasr-typo-into-tsar.patch
new file mode 100644 (file)
index 0000000..7b2f40f
--- /dev/null
@@ -0,0 +1,52 @@
+From 24ff2f07002405d42cbfb7d10334a92cd047661c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 14 Jun 2024 00:00:31 +0300
+Subject: net/mlx5: Correct TASR typo into TSAR
+
+From: Cosmin Ratiu <cratiu@nvidia.com>
+
+[ Upstream commit e575d3a6dd22123888defb622b1742aa2d45b942 ]
+
+TSAR is the correct spelling (Transmit Scheduling ARbiter).
+
+Signed-off-by: Cosmin Ratiu <cratiu@nvidia.com>
+Reviewed-by: Gal Pressman <gal@nvidia.com>
+Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
+Link: https://lore.kernel.org/r/20240613210036.1125203-2-tariqt@nvidia.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: 861cd9b9cb62 ("net/mlx5: Verify support for scheduling element and TSAR type")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c | 2 +-
+ include/linux/mlx5/mlx5_ifc.h                     | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
+index bcea5f06807a..997c412a81af 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
+@@ -538,7 +538,7 @@ static bool esw_qos_element_type_supported(struct mlx5_core_dev *dev, int type)
+       switch (type) {
+       case SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR:
+               return MLX5_CAP_QOS(dev, esw_element_type) &
+-                     ELEMENT_TYPE_CAP_MASK_TASR;
++                     ELEMENT_TYPE_CAP_MASK_TSAR;
+       case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT:
+               return MLX5_CAP_QOS(dev, esw_element_type) &
+                      ELEMENT_TYPE_CAP_MASK_VPORT;
+diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
+index d4dd7e2d8ffe..6ffafd596d39 100644
+--- a/include/linux/mlx5/mlx5_ifc.h
++++ b/include/linux/mlx5/mlx5_ifc.h
+@@ -3913,7 +3913,7 @@ enum {
+ };
+ enum {
+-      ELEMENT_TYPE_CAP_MASK_TASR              = 1 << 0,
++      ELEMENT_TYPE_CAP_MASK_TSAR              = 1 << 0,
+       ELEMENT_TYPE_CAP_MASK_VPORT             = 1 << 1,
+       ELEMENT_TYPE_CAP_MASK_VPORT_TC          = 1 << 2,
+       ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC     = 1 << 3,
+-- 
+2.43.0
+
diff --git a/queue-6.10/net-mlx5-explicitly-set-scheduling-element-and-tsar-.patch b/queue-6.10/net-mlx5-explicitly-set-scheduling-element-and-tsar-.patch
new file mode 100644 (file)
index 0000000..d6e40d8
--- /dev/null
@@ -0,0 +1,51 @@
+From de735d74ecf03745c7f3fadb3342f8b920603dad Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 2 Sep 2024 11:46:14 +0300
+Subject: net/mlx5: Explicitly set scheduling element and TSAR type
+
+From: Carolina Jubran <cjubran@nvidia.com>
+
+[ Upstream commit c88146abe4d0f8cf659b2b8883fdc33936d2e3b8 ]
+
+Ensure the scheduling element type and TSAR type are explicitly
+initialized in the QoS rate group creation.
+
+This prevents potential issues due to default values.
+
+Fixes: 1ae258f8b343 ("net/mlx5: E-switch, Introduce rate limiting groups API")
+Signed-off-by: Carolina Jubran <cjubran@nvidia.com>
+Reviewed-by: Cosmin Ratiu <cratiu@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
+index d2ebe56c3977..bcea5f06807a 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
+@@ -421,6 +421,7 @@ __esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *ex
+ {
+       u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
+       struct mlx5_esw_rate_group *group;
++      __be32 *attr;
+       u32 divider;
+       int err;
+@@ -428,6 +429,12 @@ __esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *ex
+       if (!group)
+               return ERR_PTR(-ENOMEM);
++      MLX5_SET(scheduling_context, tsar_ctx, element_type,
++               SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR);
++
++      attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes);
++      *attr = cpu_to_be32(TSAR_ELEMENT_TSAR_TYPE_DWRR << 16);
++
+       MLX5_SET(scheduling_context, tsar_ctx, parent_element_id,
+                esw->qos.root_tsar_ix);
+       err = mlx5_create_scheduling_element_cmd(esw->dev,
+-- 
+2.43.0
+
diff --git a/queue-6.10/net-mlx5-fix-bridge-mode-operations-when-there-are-n.patch b/queue-6.10/net-mlx5-fix-bridge-mode-operations-when-there-are-n.patch
new file mode 100644 (file)
index 0000000..9f6ade3
--- /dev/null
@@ -0,0 +1,78 @@
+From 3b0962b60e0e852afbba0170e103ae0a89624d27 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 30 Aug 2024 08:39:27 -0400
+Subject: net/mlx5: Fix bridge mode operations when there are no VFs
+
+From: Benjamin Poirier <bpoirier@nvidia.com>
+
+[ Upstream commit b1d305abef4640af1b4f1b4774d513cd81b10cfc ]
+
+Currently, trying to set the bridge mode attribute when numvfs=0 leads to a
+crash:
+
+bridge link set dev eth2 hwmode vepa
+
+[  168.967392] BUG: kernel NULL pointer dereference, address: 0000000000000030
+[...]
+[  168.969989] RIP: 0010:mlx5_add_flow_rules+0x1f/0x300 [mlx5_core]
+[...]
+[  168.976037] Call Trace:
+[  168.976188]  <TASK>
+[  168.978620]  _mlx5_eswitch_set_vepa_locked+0x113/0x230 [mlx5_core]
+[  168.979074]  mlx5_eswitch_set_vepa+0x7f/0xa0 [mlx5_core]
+[  168.979471]  rtnl_bridge_setlink+0xe9/0x1f0
+[  168.979714]  rtnetlink_rcv_msg+0x159/0x400
+[  168.980451]  netlink_rcv_skb+0x54/0x100
+[  168.980675]  netlink_unicast+0x241/0x360
+[  168.980918]  netlink_sendmsg+0x1f6/0x430
+[  168.981162]  ____sys_sendmsg+0x3bb/0x3f0
+[  168.982155]  ___sys_sendmsg+0x88/0xd0
+[  168.985036]  __sys_sendmsg+0x59/0xa0
+[  168.985477]  do_syscall_64+0x79/0x150
+[  168.987273]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
+[  168.987773] RIP: 0033:0x7f8f7950f917
+
+(esw->fdb_table.legacy.vepa_fdb is null)
+
+The bridge mode is only relevant when there are multiple functions per
+port. Therefore, prevent setting and getting this setting when there are no
+VFs.
+
+Note that after this change, there are no settings to change on the PF
+interface using `bridge link` when there are no VFs, so the interface no
+longer appears in the `bridge link` output.
+
+Fixes: 4b89251de024 ("net/mlx5: Support ndo bridge_setlink and getlink")
+Signed-off-by: Benjamin Poirier <bpoirier@nvidia.com>
+Reviewed-by: Cosmin Ratiu <cratiu@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c
+index 255bc8b749f9..8587cd572da5 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c
+@@ -319,7 +319,7 @@ int mlx5_eswitch_set_vepa(struct mlx5_eswitch *esw, u8 setting)
+               return -EPERM;
+       mutex_lock(&esw->state_lock);
+-      if (esw->mode != MLX5_ESWITCH_LEGACY) {
++      if (esw->mode != MLX5_ESWITCH_LEGACY || !mlx5_esw_is_fdb_created(esw)) {
+               err = -EOPNOTSUPP;
+               goto out;
+       }
+@@ -339,7 +339,7 @@ int mlx5_eswitch_get_vepa(struct mlx5_eswitch *esw, u8 *setting)
+       if (!mlx5_esw_allowed(esw))
+               return -EPERM;
+-      if (esw->mode != MLX5_ESWITCH_LEGACY)
++      if (esw->mode != MLX5_ESWITCH_LEGACY || !mlx5_esw_is_fdb_created(esw))
+               return -EOPNOTSUPP;
+       *setting = esw->fdb_table.legacy.vepa_uplink_rule ? 1 : 0;
+-- 
+2.43.0
+
diff --git a/queue-6.10/net-mlx5-update-the-list-of-the-pci-supported-device.patch b/queue-6.10/net-mlx5-update-the-list-of-the-pci-supported-device.patch
new file mode 100644 (file)
index 0000000..f7b8181
--- /dev/null
@@ -0,0 +1,36 @@
+From dda1638ee637719e667513d8ca323c294659126d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 15 Aug 2024 11:02:34 +0300
+Subject: net/mlx5: Update the list of the PCI supported devices
+
+From: Maher Sanalla <msanalla@nvidia.com>
+
+[ Upstream commit 7472d157cb8014103105433bcc0705af2e6f7184 ]
+
+Add the upcoming ConnectX-9 device ID to the table of supported
+PCI device IDs.
+
+Fixes: f908a35b2218 ("net/mlx5: Update the list of the PCI supported devices")
+Signed-off-by: Maher Sanalla <msanalla@nvidia.com>
+Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/main.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
+index 3e55a6c6a7c9..211194df9619 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
+@@ -2215,6 +2215,7 @@ static const struct pci_device_id mlx5_core_pci_table[] = {
+       { PCI_VDEVICE(MELLANOX, 0x101f) },                      /* ConnectX-6 LX */
+       { PCI_VDEVICE(MELLANOX, 0x1021) },                      /* ConnectX-7 */
+       { PCI_VDEVICE(MELLANOX, 0x1023) },                      /* ConnectX-8 */
++      { PCI_VDEVICE(MELLANOX, 0x1025) },                      /* ConnectX-9 */
+       { PCI_VDEVICE(MELLANOX, 0xa2d2) },                      /* BlueField integrated ConnectX-5 network controller */
+       { PCI_VDEVICE(MELLANOX, 0xa2d3), MLX5_PCI_DEV_IS_VF},   /* BlueField integrated ConnectX-5 network controller VF */
+       { PCI_VDEVICE(MELLANOX, 0xa2d6) },                      /* BlueField-2 integrated ConnectX-6 Dx network controller */
+-- 
+2.43.0
+
diff --git a/queue-6.10/net-mlx5-verify-support-for-scheduling-element-and-t.patch b/queue-6.10/net-mlx5-verify-support-for-scheduling-element-and-t.patch
new file mode 100644 (file)
index 0000000..1babd1e
--- /dev/null
@@ -0,0 +1,130 @@
+From ae8a076fc3f860784f4c98a658732096bd06e649 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 5 Aug 2024 13:13:03 +0300
+Subject: net/mlx5: Verify support for scheduling element and TSAR type
+
+From: Carolina Jubran <cjubran@nvidia.com>
+
+[ Upstream commit 861cd9b9cb62feb244b8d77e68fd6ddedbbf66e9 ]
+
+Before creating a scheduling element in a NIC or E-Switch scheduler,
+ensure that the requested element type is supported. If the element is
+of type Transmit Scheduling Arbiter (TSAR), also verify that the
+specific TSAR type is supported.
+
+Fixes: 214baf22870c ("net/mlx5e: Support HTB offload")
+Fixes: 85c5f7c9200e ("net/mlx5: E-switch, Create QoS on demand")
+Fixes: 0fe132eac38c ("net/mlx5: E-switch, Allow to add vports to rate groups")
+Signed-off-by: Carolina Jubran <cjubran@nvidia.com>
+Reviewed-by: Cosmin Ratiu <cratiu@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../net/ethernet/mellanox/mlx5/core/esw/qos.c | 44 ++++++++++---------
+ drivers/net/ethernet/mellanox/mlx5/core/qos.c |  7 +++
+ 2 files changed, 31 insertions(+), 20 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
+index 997c412a81af..02a3563f51ad 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
+@@ -312,6 +312,25 @@ static int esw_qos_set_group_max_rate(struct mlx5_eswitch *esw,
+       return err;
+ }
++static bool esw_qos_element_type_supported(struct mlx5_core_dev *dev, int type)
++{
++      switch (type) {
++      case SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR:
++              return MLX5_CAP_QOS(dev, esw_element_type) &
++                     ELEMENT_TYPE_CAP_MASK_TSAR;
++      case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT:
++              return MLX5_CAP_QOS(dev, esw_element_type) &
++                     ELEMENT_TYPE_CAP_MASK_VPORT;
++      case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC:
++              return MLX5_CAP_QOS(dev, esw_element_type) &
++                     ELEMENT_TYPE_CAP_MASK_VPORT_TC;
++      case SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC:
++              return MLX5_CAP_QOS(dev, esw_element_type) &
++                     ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC;
++      }
++      return false;
++}
++
+ static int esw_qos_vport_create_sched_element(struct mlx5_eswitch *esw,
+                                             struct mlx5_vport *vport,
+                                             u32 max_rate, u32 bw_share)
+@@ -323,6 +342,9 @@ static int esw_qos_vport_create_sched_element(struct mlx5_eswitch *esw,
+       void *vport_elem;
+       int err;
++      if (!esw_qos_element_type_supported(dev, SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT))
++              return -EOPNOTSUPP;
++
+       parent_tsar_ix = group ? group->tsar_ix : esw->qos.root_tsar_ix;
+       MLX5_SET(scheduling_context, sched_ctx, element_type,
+                SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
+@@ -533,25 +555,6 @@ static int esw_qos_destroy_rate_group(struct mlx5_eswitch *esw,
+       return err;
+ }
+-static bool esw_qos_element_type_supported(struct mlx5_core_dev *dev, int type)
+-{
+-      switch (type) {
+-      case SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR:
+-              return MLX5_CAP_QOS(dev, esw_element_type) &
+-                     ELEMENT_TYPE_CAP_MASK_TSAR;
+-      case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT:
+-              return MLX5_CAP_QOS(dev, esw_element_type) &
+-                     ELEMENT_TYPE_CAP_MASK_VPORT;
+-      case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC:
+-              return MLX5_CAP_QOS(dev, esw_element_type) &
+-                     ELEMENT_TYPE_CAP_MASK_VPORT_TC;
+-      case SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC:
+-              return MLX5_CAP_QOS(dev, esw_element_type) &
+-                     ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC;
+-      }
+-      return false;
+-}
+-
+ static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
+ {
+       u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
+@@ -562,7 +565,8 @@ static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack *exta
+       if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
+               return -EOPNOTSUPP;
+-      if (!esw_qos_element_type_supported(dev, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR))
++      if (!esw_qos_element_type_supported(dev, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR) ||
++          !(MLX5_CAP_QOS(dev, esw_tsar_type) & TSAR_TYPE_CAP_MASK_DWRR))
+               return -EOPNOTSUPP;
+       MLX5_SET(scheduling_context, tsar_ctx, element_type,
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/qos.c
+index 8bce730b5c5b..db2bd3ad63ba 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/qos.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/qos.c
+@@ -28,6 +28,9 @@ int mlx5_qos_create_leaf_node(struct mlx5_core_dev *mdev, u32 parent_id,
+ {
+       u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0};
++      if (!(MLX5_CAP_QOS(mdev, nic_element_type) & ELEMENT_TYPE_CAP_MASK_QUEUE_GROUP))
++              return -EOPNOTSUPP;
++
+       MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_id);
+       MLX5_SET(scheduling_context, sched_ctx, element_type,
+                SCHEDULING_CONTEXT_ELEMENT_TYPE_QUEUE_GROUP);
+@@ -44,6 +47,10 @@ int mlx5_qos_create_inner_node(struct mlx5_core_dev *mdev, u32 parent_id,
+       u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0};
+       void *attr;
++      if (!(MLX5_CAP_QOS(mdev, nic_element_type) & ELEMENT_TYPE_CAP_MASK_TSAR) ||
++          !(MLX5_CAP_QOS(mdev, nic_tsar_type) & TSAR_TYPE_CAP_MASK_DWRR))
++              return -EOPNOTSUPP;
++
+       MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_id);
+       MLX5_SET(scheduling_context, sched_ctx, element_type,
+                SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR);
+-- 
+2.43.0
+
diff --git a/queue-6.10/net-mlx5e-add-missing-link-mode-to-ptys2ext_ethtool_.patch b/queue-6.10/net-mlx5e-add-missing-link-mode-to-ptys2ext_ethtool_.patch
new file mode 100644 (file)
index 0000000..371fa58
--- /dev/null
@@ -0,0 +1,42 @@
+From 2dc2a3681366c1b170a801787def9723825a99e8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 11 Aug 2024 13:58:04 +0300
+Subject: net/mlx5e: Add missing link mode to ptys2ext_ethtool_map
+
+From: Shahar Shitrit <shshitrit@nvidia.com>
+
+[ Upstream commit 80bf474242b21d64a514fd2bb65faa7a17ca8d8d ]
+
+Add MLX5E_400GAUI_8_400GBASE_CR8 to the extended modes
+in ptys2ext_ethtool_table, since it was missing.
+
+Fixes: 6a897372417e ("net/mlx5: ethtool, Add ethtool support for 50Gbps per lane link modes")
+Signed-off-by: Shahar Shitrit <shshitrit@nvidia.com>
+Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
+Reviewed-by: Carolina Jubran <cjubran@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+index 5b3b442c4a58..9d2d67e24205 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+@@ -208,6 +208,12 @@ void mlx5e_build_ptys2ethtool_map(void)
+                                      ETHTOOL_LINK_MODE_200000baseLR4_ER4_FR4_Full_BIT,
+                                      ETHTOOL_LINK_MODE_200000baseDR4_Full_BIT,
+                                      ETHTOOL_LINK_MODE_200000baseCR4_Full_BIT);
++      MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_400GAUI_8_400GBASE_CR8, ext,
++                                     ETHTOOL_LINK_MODE_400000baseKR8_Full_BIT,
++                                     ETHTOOL_LINK_MODE_400000baseSR8_Full_BIT,
++                                     ETHTOOL_LINK_MODE_400000baseLR8_ER8_FR8_Full_BIT,
++                                     ETHTOOL_LINK_MODE_400000baseDR8_Full_BIT,
++                                     ETHTOOL_LINK_MODE_400000baseCR8_Full_BIT);
+       MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GAUI_1_100GBASE_CR_KR, ext,
+                                      ETHTOOL_LINK_MODE_100000baseKR_Full_BIT,
+                                      ETHTOOL_LINK_MODE_100000baseSR_Full_BIT,
+-- 
+2.43.0
+
diff --git a/queue-6.10/net-mlx5e-add-missing-link-modes-to-ptys2ethtool_map.patch b/queue-6.10/net-mlx5e-add-missing-link-modes-to-ptys2ethtool_map.patch
new file mode 100644 (file)
index 0000000..3c2382f
--- /dev/null
@@ -0,0 +1,40 @@
+From c206c0bcbbd62e683a5773947c1e83742f0c7b7a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 11 Aug 2024 13:56:13 +0300
+Subject: net/mlx5e: Add missing link modes to ptys2ethtool_map
+
+From: Shahar Shitrit <shshitrit@nvidia.com>
+
+[ Upstream commit 7617d62cba4a8a3ff3ed3fda0171c43f135c142e ]
+
+Add MLX5E_1000BASE_T and MLX5E_100BASE_TX to the legacy
+modes in ptys2legacy_ethtool_table, since they were missing.
+
+Fixes: 665bc53969d7 ("net/mlx5e: Use new ethtool get/set link ksettings API")
+Signed-off-by: Shahar Shitrit <shshitrit@nvidia.com>
+Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
+Reviewed-by: Carolina Jubran <cjubran@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+index 58eb96a68853..5b3b442c4a58 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+@@ -139,6 +139,10 @@ void mlx5e_build_ptys2ethtool_map(void)
+                                      ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT);
+       MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_LR4, legacy,
+                                      ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT);
++      MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100BASE_TX, legacy,
++                                     ETHTOOL_LINK_MODE_100baseT_Full_BIT);
++      MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_1000BASE_T, legacy,
++                                     ETHTOOL_LINK_MODE_1000baseT_Full_BIT);
+       MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_T, legacy,
+                                      ETHTOOL_LINK_MODE_10000baseT_Full_BIT);
+       MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_CR, legacy,
+-- 
+2.43.0
+
diff --git a/queue-6.10/net-phy-dp83822-fix-null-pointer-dereference-on-dp83.patch b/queue-6.10/net-phy-dp83822-fix-null-pointer-dereference-on-dp83.patch
new file mode 100644 (file)
index 0000000..94063f4
--- /dev/null
@@ -0,0 +1,115 @@
+From 4c0f3a34675bd5ff6d25a48c234e73543810930d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 6 Sep 2024 12:52:40 +0200
+Subject: net: phy: dp83822: Fix NULL pointer dereference on DP83825 devices
+
+From: Tomas Paukrt <tomaspaukrt@email.cz>
+
+[ Upstream commit 3f62ea572b3e8e3f10c39a9cb4f04ca9ae5f2952 ]
+
+The probe() function is only used for DP83822 and DP83826 PHY,
+leaving the private data pointer uninitialized for the DP83825 models
+which causes a NULL pointer dereference in the recently introduced/changed
+functions dp8382x_config_init() and dp83822_set_wol().
+
+Add the dp8382x_probe() function, so all PHY models will have a valid
+private data pointer to fix this issue and also prevent similar issues
+in the future.
+
+Fixes: 9ef9ecfa9e9f ("net: phy: dp8382x: keep WOL settings across suspends")
+Signed-off-by: Tomas Paukrt <tomaspaukrt@email.cz>
+Reviewed-by: Maxime Chevallier <maxime.chevallier@bootlin.com>
+Link: https://patch.msgid.link/66w.ZbGt.65Ljx42yHo5.1csjxu@seznam.cz
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/phy/dp83822.c | 35 ++++++++++++++++++++++-------------
+ 1 file changed, 22 insertions(+), 13 deletions(-)
+
+diff --git a/drivers/net/phy/dp83822.c b/drivers/net/phy/dp83822.c
+index efeb643c1373..fc247f479257 100644
+--- a/drivers/net/phy/dp83822.c
++++ b/drivers/net/phy/dp83822.c
+@@ -271,8 +271,7 @@ static int dp83822_config_intr(struct phy_device *phydev)
+                               DP83822_ENERGY_DET_INT_EN |
+                               DP83822_LINK_QUAL_INT_EN);
+-              /* Private data pointer is NULL on DP83825 */
+-              if (!dp83822 || !dp83822->fx_enabled)
++              if (!dp83822->fx_enabled)
+                       misr_status |= DP83822_ANEG_COMPLETE_INT_EN |
+                                      DP83822_DUP_MODE_CHANGE_INT_EN |
+                                      DP83822_SPEED_CHANGED_INT_EN;
+@@ -292,8 +291,7 @@ static int dp83822_config_intr(struct phy_device *phydev)
+                               DP83822_PAGE_RX_INT_EN |
+                               DP83822_EEE_ERROR_CHANGE_INT_EN);
+-              /* Private data pointer is NULL on DP83825 */
+-              if (!dp83822 || !dp83822->fx_enabled)
++              if (!dp83822->fx_enabled)
+                       misr_status |= DP83822_ANEG_ERR_INT_EN |
+                                      DP83822_WOL_PKT_INT_EN;
+@@ -691,10 +689,9 @@ static int dp83822_read_straps(struct phy_device *phydev)
+       return 0;
+ }
+-static int dp83822_probe(struct phy_device *phydev)
++static int dp8382x_probe(struct phy_device *phydev)
+ {
+       struct dp83822_private *dp83822;
+-      int ret;
+       dp83822 = devm_kzalloc(&phydev->mdio.dev, sizeof(*dp83822),
+                              GFP_KERNEL);
+@@ -703,6 +700,20 @@ static int dp83822_probe(struct phy_device *phydev)
+       phydev->priv = dp83822;
++      return 0;
++}
++
++static int dp83822_probe(struct phy_device *phydev)
++{
++      struct dp83822_private *dp83822;
++      int ret;
++
++      ret = dp8382x_probe(phydev);
++      if (ret)
++              return ret;
++
++      dp83822 = phydev->priv;
++
+       ret = dp83822_read_straps(phydev);
+       if (ret)
+               return ret;
+@@ -717,14 +728,11 @@ static int dp83822_probe(struct phy_device *phydev)
+ static int dp83826_probe(struct phy_device *phydev)
+ {
+-      struct dp83822_private *dp83822;
+-
+-      dp83822 = devm_kzalloc(&phydev->mdio.dev, sizeof(*dp83822),
+-                             GFP_KERNEL);
+-      if (!dp83822)
+-              return -ENOMEM;
++      int ret;
+-      phydev->priv = dp83822;
++      ret = dp8382x_probe(phydev);
++      if (ret)
++              return ret;
+       dp83826_of_init(phydev);
+@@ -795,6 +803,7 @@ static int dp83822_resume(struct phy_device *phydev)
+               PHY_ID_MATCH_MODEL(_id),                        \
+               .name           = (_name),                      \
+               /* PHY_BASIC_FEATURES */                        \
++              .probe          = dp8382x_probe,                \
+               .soft_reset     = dp83822_phy_reset,            \
+               .config_init    = dp8382x_config_init,          \
+               .get_wol = dp83822_get_wol,                     \
+-- 
+2.43.0
+
diff --git a/queue-6.10/netfilter-nft_socket-fix-sk-refcount-leaks.patch b/queue-6.10/netfilter-nft_socket-fix-sk-refcount-leaks.patch
new file mode 100644 (file)
index 0000000..349f273
--- /dev/null
@@ -0,0 +1,59 @@
+From 1411e0bb438586745d6a59f09c18e324402daaf3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 5 Sep 2024 12:54:46 +0200
+Subject: netfilter: nft_socket: fix sk refcount leaks
+
+From: Florian Westphal <fw@strlen.de>
+
+[ Upstream commit 8b26ff7af8c32cb4148b3e147c52f9e4c695209c ]
+
+We must put 'sk' reference before returning.
+
+Fixes: 039b1f4f24ec ("netfilter: nft_socket: fix erroneous socket assignment")
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nft_socket.c | 7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c
+index f30163e2ca62..765ffd6e06bc 100644
+--- a/net/netfilter/nft_socket.c
++++ b/net/netfilter/nft_socket.c
+@@ -110,13 +110,13 @@ static void nft_socket_eval(const struct nft_expr *expr,
+                       *dest = READ_ONCE(sk->sk_mark);
+               } else {
+                       regs->verdict.code = NFT_BREAK;
+-                      return;
++                      goto out_put_sk;
+               }
+               break;
+       case NFT_SOCKET_WILDCARD:
+               if (!sk_fullsock(sk)) {
+                       regs->verdict.code = NFT_BREAK;
+-                      return;
++                      goto out_put_sk;
+               }
+               nft_socket_wildcard(pkt, regs, sk, dest);
+               break;
+@@ -124,7 +124,7 @@ static void nft_socket_eval(const struct nft_expr *expr,
+       case NFT_SOCKET_CGROUPV2:
+               if (!nft_sock_get_eval_cgroupv2(dest, sk, pkt, priv->level)) {
+                       regs->verdict.code = NFT_BREAK;
+-                      return;
++                      goto out_put_sk;
+               }
+               break;
+ #endif
+@@ -133,6 +133,7 @@ static void nft_socket_eval(const struct nft_expr *expr,
+               regs->verdict.code = NFT_BREAK;
+       }
++out_put_sk:
+       if (sk != skb->sk)
+               sock_gen_put(sk);
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.10/netfilter-nft_socket-make-cgroupsv2-matching-work-wi.patch b/queue-6.10/netfilter-nft_socket-make-cgroupsv2-matching-work-wi.patch
new file mode 100644 (file)
index 0000000..eef715c
--- /dev/null
@@ -0,0 +1,149 @@
+From 3666ebb403366008bbbfaf723124e54828ea4d3b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 7 Sep 2024 16:07:49 +0200
+Subject: netfilter: nft_socket: make cgroupsv2 matching work with namespaces
+
+From: Florian Westphal <fw@strlen.de>
+
+[ Upstream commit 7f3287db654395f9c5ddd246325ff7889f550286 ]
+
+When running in container environmment, /sys/fs/cgroup/ might not be
+the real root node of the sk-attached cgroup.
+
+Example:
+
+In container:
+% stat /sys//fs/cgroup/
+Device: 0,21    Inode: 2214  ..
+% stat /sys/fs/cgroup/foo
+Device: 0,21    Inode: 2264  ..
+
+The expectation would be for:
+
+  nft add rule .. socket cgroupv2 level 1 "foo" counter
+
+to match traffic from a process that got added to "foo" via
+"echo $pid > /sys/fs/cgroup/foo/cgroup.procs".
+
+However, 'level 3' is needed to make this work.
+
+Seen from initial namespace, the complete hierarchy is:
+
+% stat /sys/fs/cgroup/system.slice/docker-.../foo
+  Device: 0,21    Inode: 2264 ..
+
+i.e. hierarchy is
+0    1               2              3
+/ -> system.slice -> docker-1... -> foo
+
+... but the container doesn't know that its "/" is the "docker-1.."
+cgroup.  Current code will retrieve the 'system.slice' cgroup node
+and store its kn->id in the destination register, so compare with
+2264 ("foo" cgroup id) will not match.
+
+Fetch "/" cgroup from ->init() and add its level to the level we try to
+extract.  cgroup root-level is 0 for the init-namespace or the level
+of the ancestor that is exposed as the cgroup root inside the container.
+
+In the above case, cgrp->level of "/" resolved in the container is 2
+(docker-1...scope/) and request for 'level 1' will get adjusted
+to fetch the actual level (3).
+
+v2: use CONFIG_SOCK_CGROUP_DATA, eval function depends on it.
+    (kernel test robot)
+
+Cc: cgroups@vger.kernel.org
+Fixes: e0bb96db96f8 ("netfilter: nft_socket: add support for cgroupsv2")
+Reported-by: Nadia Pinaeva <n.m.pinaeva@gmail.com>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nft_socket.c | 41 +++++++++++++++++++++++++++++++++++---
+ 1 file changed, 38 insertions(+), 3 deletions(-)
+
+diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c
+index 765ffd6e06bc..12cdff640492 100644
+--- a/net/netfilter/nft_socket.c
++++ b/net/netfilter/nft_socket.c
+@@ -9,7 +9,8 @@
+ struct nft_socket {
+       enum nft_socket_keys            key:8;
+-      u8                              level;
++      u8                              level;          /* cgroupv2 level to extract */
++      u8                              level_user;     /* cgroupv2 level provided by userspace */
+       u8                              len;
+       union {
+               u8                      dreg;
+@@ -53,6 +54,28 @@ nft_sock_get_eval_cgroupv2(u32 *dest, struct sock *sk, const struct nft_pktinfo
+       memcpy(dest, &cgid, sizeof(u64));
+       return true;
+ }
++
++/* process context only, uses current->nsproxy. */
++static noinline int nft_socket_cgroup_subtree_level(void)
++{
++      struct cgroup *cgrp = cgroup_get_from_path("/");
++      int level;
++
++      if (!cgrp)
++              return -ENOENT;
++
++      level = cgrp->level;
++
++      cgroup_put(cgrp);
++
++      if (WARN_ON_ONCE(level > 255))
++              return -ERANGE;
++
++      if (WARN_ON_ONCE(level < 0))
++              return -EINVAL;
++
++      return level;
++}
+ #endif
+ static struct sock *nft_socket_do_lookup(const struct nft_pktinfo *pkt)
+@@ -174,9 +197,10 @@ static int nft_socket_init(const struct nft_ctx *ctx,
+       case NFT_SOCKET_MARK:
+               len = sizeof(u32);
+               break;
+-#ifdef CONFIG_CGROUPS
++#ifdef CONFIG_SOCK_CGROUP_DATA
+       case NFT_SOCKET_CGROUPV2: {
+               unsigned int level;
++              int err;
+               if (!tb[NFTA_SOCKET_LEVEL])
+                       return -EINVAL;
+@@ -185,6 +209,17 @@ static int nft_socket_init(const struct nft_ctx *ctx,
+               if (level > 255)
+                       return -EOPNOTSUPP;
++              err = nft_socket_cgroup_subtree_level();
++              if (err < 0)
++                      return err;
++
++              priv->level_user = level;
++
++              level += err;
++              /* Implies a giant cgroup tree */
++              if (WARN_ON_ONCE(level > 255))
++                      return -EOPNOTSUPP;
++
+               priv->level = level;
+               len = sizeof(u64);
+               break;
+@@ -209,7 +244,7 @@ static int nft_socket_dump(struct sk_buff *skb,
+       if (nft_dump_register(skb, NFTA_SOCKET_DREG, priv->dreg))
+               return -1;
+       if (priv->key == NFT_SOCKET_CGROUPV2 &&
+-          nla_put_be32(skb, NFTA_SOCKET_LEVEL, htonl(priv->level)))
++          nla_put_be32(skb, NFTA_SOCKET_LEVEL, htonl(priv->level_user)))
+               return -1;
+       return 0;
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.10/netlink-specs-mptcp-fix-port-endianness.patch b/queue-6.10/netlink-specs-mptcp-fix-port-endianness.patch
new file mode 100644 (file)
index 0000000..7f25080
--- /dev/null
@@ -0,0 +1,54 @@
+From e44556ec017f93ebeedfdff39fe3aaac874a392c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 11 Sep 2024 09:10:02 +0000
+Subject: netlink: specs: mptcp: fix port endianness
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Asbjørn Sloth Tønnesen <ast@fiberby.net>
+
+[ Upstream commit 09a45a5553792bbf20beba0a1ac90b4692324d06 ]
+
+The MPTCP port attribute is in host endianness, but was documented
+as big-endian in the ynl specification.
+
+Below are two examples from net/mptcp/pm_netlink.c showing that the
+attribute is converted to/from host endianness for use with netlink.
+
+Import from netlink:
+  addr->port = htons(nla_get_u16(tb[MPTCP_PM_ADDR_ATTR_PORT]))
+
+Export to netlink:
+  nla_put_u16(skb, MPTCP_PM_ADDR_ATTR_PORT, ntohs(addr->port))
+
+Where addr->port is defined as __be16.
+
+No functional change intended.
+
+Fixes: bc8aeb2045e2 ("Documentation: netlink: add a YAML spec for mptcp")
+Signed-off-by: Asbjørn Sloth Tønnesen <ast@fiberby.net>
+Reviewed-by: Davide Caratti <dcaratti@redhat.com>
+Reviewed-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Link: https://patch.msgid.link/20240911091003.1112179-1-ast@fiberby.net
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ Documentation/netlink/specs/mptcp_pm.yaml | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/Documentation/netlink/specs/mptcp_pm.yaml b/Documentation/netlink/specs/mptcp_pm.yaml
+index af525ed29792..30d8342cacc8 100644
+--- a/Documentation/netlink/specs/mptcp_pm.yaml
++++ b/Documentation/netlink/specs/mptcp_pm.yaml
+@@ -109,7 +109,6 @@ attribute-sets:
+       -
+         name: port
+         type: u16
+-        byte-order: big-endian
+       -
+         name: flags
+         type: u32
+-- 
+2.43.0
+
diff --git a/queue-6.10/octeontx2-af-modify-smq-flush-sequence-to-drop-packe.patch b/queue-6.10/octeontx2-af-modify-smq-flush-sequence-to-drop-packe.patch
new file mode 100644 (file)
index 0000000..94d13bd
--- /dev/null
@@ -0,0 +1,179 @@
+From fdc9a16eeaf9386c37cbfea53c23c6f4b65f8ca4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 6 Sep 2024 10:28:38 +0530
+Subject: octeontx2-af: Modify SMQ flush sequence to drop packets
+
+From: Naveen Mamindlapalli <naveenm@marvell.com>
+
+[ Upstream commit 019aba04f08c2102b35ce7fee9d4628d349f56c0 ]
+
+The current implementation of SMQ flush sequence waits for the packets
+in the TM pipeline to be transmitted out of the link. This sequence
+doesn't succeed in HW when there is any issue with link such as lack of
+link credits, link down or any other traffic that is fully occupying the
+link bandwidth (QoS). This patch modifies the SMQ flush sequence to
+drop the packets after TL1 level (SQM) instead of polling for the packets
+to be sent out of RPM/CGX link.
+
+Fixes: 5d9b976d4480 ("octeontx2-af: Support fixed transmit scheduler topology")
+Signed-off-by: Naveen Mamindlapalli <naveenm@marvell.com>
+Reviewed-by: Sunil Kovvuri Goutham <sgoutham@marvell.com>
+Link: https://patch.msgid.link/20240906045838.1620308-1-naveenm@marvell.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../net/ethernet/marvell/octeontx2/af/rvu.h   |  3 +-
+ .../ethernet/marvell/octeontx2/af/rvu_nix.c   | 59 +++++++++++++++----
+ 2 files changed, 48 insertions(+), 14 deletions(-)
+
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
+index 35834687e40f..96a7b23428be 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
+@@ -318,6 +318,7 @@ struct nix_mark_format {
+ /* smq(flush) to tl1 cir/pir info */
+ struct nix_smq_tree_ctx {
++      u16 schq;
+       u64 cir_off;
+       u64 cir_val;
+       u64 pir_off;
+@@ -327,8 +328,6 @@ struct nix_smq_tree_ctx {
+ /* smq flush context */
+ struct nix_smq_flush_ctx {
+       int smq;
+-      u16 tl1_schq;
+-      u16 tl2_schq;
+       struct nix_smq_tree_ctx smq_tree_ctx[NIX_TXSCH_LVL_CNT];
+ };
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+index 3dc828cf6c5a..10f8efff7843 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+@@ -2259,14 +2259,13 @@ static void nix_smq_flush_fill_ctx(struct rvu *rvu, int blkaddr, int smq,
+       schq = smq;
+       for (lvl = NIX_TXSCH_LVL_SMQ; lvl <= NIX_TXSCH_LVL_TL1; lvl++) {
+               smq_tree_ctx = &smq_flush_ctx->smq_tree_ctx[lvl];
++              smq_tree_ctx->schq = schq;
+               if (lvl == NIX_TXSCH_LVL_TL1) {
+-                      smq_flush_ctx->tl1_schq = schq;
+                       smq_tree_ctx->cir_off = NIX_AF_TL1X_CIR(schq);
+                       smq_tree_ctx->pir_off = 0;
+                       smq_tree_ctx->pir_val = 0;
+                       parent_off = 0;
+               } else if (lvl == NIX_TXSCH_LVL_TL2) {
+-                      smq_flush_ctx->tl2_schq = schq;
+                       smq_tree_ctx->cir_off = NIX_AF_TL2X_CIR(schq);
+                       smq_tree_ctx->pir_off = NIX_AF_TL2X_PIR(schq);
+                       parent_off = NIX_AF_TL2X_PARENT(schq);
+@@ -2301,8 +2300,8 @@ static void nix_smq_flush_enadis_xoff(struct rvu *rvu, int blkaddr,
+ {
+       struct nix_txsch *txsch;
+       struct nix_hw *nix_hw;
++      int tl2, tl2_schq;
+       u64 regoff;
+-      int tl2;
+       nix_hw = get_nix_hw(rvu->hw, blkaddr);
+       if (!nix_hw)
+@@ -2310,16 +2309,17 @@ static void nix_smq_flush_enadis_xoff(struct rvu *rvu, int blkaddr,
+       /* loop through all TL2s with matching PF_FUNC */
+       txsch = &nix_hw->txsch[NIX_TXSCH_LVL_TL2];
++      tl2_schq = smq_flush_ctx->smq_tree_ctx[NIX_TXSCH_LVL_TL2].schq;
+       for (tl2 = 0; tl2 < txsch->schq.max; tl2++) {
+               /* skip the smq(flush) TL2 */
+-              if (tl2 == smq_flush_ctx->tl2_schq)
++              if (tl2 == tl2_schq)
+                       continue;
+               /* skip unused TL2s */
+               if (TXSCH_MAP_FLAGS(txsch->pfvf_map[tl2]) & NIX_TXSCHQ_FREE)
+                       continue;
+               /* skip if PF_FUNC doesn't match */
+               if ((TXSCH_MAP_FUNC(txsch->pfvf_map[tl2]) & ~RVU_PFVF_FUNC_MASK) !=
+-                  (TXSCH_MAP_FUNC(txsch->pfvf_map[smq_flush_ctx->tl2_schq] &
++                  (TXSCH_MAP_FUNC(txsch->pfvf_map[tl2_schq] &
+                                   ~RVU_PFVF_FUNC_MASK)))
+                       continue;
+               /* enable/disable XOFF */
+@@ -2361,10 +2361,12 @@ static int nix_smq_flush(struct rvu *rvu, int blkaddr,
+                        int smq, u16 pcifunc, int nixlf)
+ {
+       struct nix_smq_flush_ctx *smq_flush_ctx;
++      int err, restore_tx_en = 0, i;
+       int pf = rvu_get_pf(pcifunc);
+       u8 cgx_id = 0, lmac_id = 0;
+-      int err, restore_tx_en = 0;
+-      u64 cfg;
++      u16 tl2_tl3_link_schq;
++      u8 link, link_level;
++      u64 cfg, bmap = 0;
+       if (!is_rvu_otx2(rvu)) {
+               /* Skip SMQ flush if pkt count is zero */
+@@ -2388,16 +2390,38 @@ static int nix_smq_flush(struct rvu *rvu, int blkaddr,
+       nix_smq_flush_enadis_xoff(rvu, blkaddr, smq_flush_ctx, true);
+       nix_smq_flush_enadis_rate(rvu, blkaddr, smq_flush_ctx, false);
+-      cfg = rvu_read64(rvu, blkaddr, NIX_AF_SMQX_CFG(smq));
+-      /* Do SMQ flush and set enqueue xoff */
+-      cfg |= BIT_ULL(50) | BIT_ULL(49);
+-      rvu_write64(rvu, blkaddr, NIX_AF_SMQX_CFG(smq), cfg);
+-
+       /* Disable backpressure from physical link,
+        * otherwise SMQ flush may stall.
+        */
+       rvu_cgx_enadis_rx_bp(rvu, pf, false);
++      link_level = rvu_read64(rvu, blkaddr, NIX_AF_PSE_CHANNEL_LEVEL) & 0x01 ?
++                      NIX_TXSCH_LVL_TL3 : NIX_TXSCH_LVL_TL2;
++      tl2_tl3_link_schq = smq_flush_ctx->smq_tree_ctx[link_level].schq;
++      link = smq_flush_ctx->smq_tree_ctx[NIX_TXSCH_LVL_TL1].schq;
++
++      /* SMQ set enqueue xoff */
++      cfg = rvu_read64(rvu, blkaddr, NIX_AF_SMQX_CFG(smq));
++      cfg |= BIT_ULL(50);
++      rvu_write64(rvu, blkaddr, NIX_AF_SMQX_CFG(smq), cfg);
++
++      /* Clear all NIX_AF_TL3_TL2_LINK_CFG[ENA] for the TL3/TL2 queue */
++      for (i = 0; i < (rvu->hw->cgx_links + rvu->hw->lbk_links); i++) {
++              cfg = rvu_read64(rvu, blkaddr,
++                               NIX_AF_TL3_TL2X_LINKX_CFG(tl2_tl3_link_schq, link));
++              if (!(cfg & BIT_ULL(12)))
++                      continue;
++              bmap |= (1 << i);
++              cfg &= ~BIT_ULL(12);
++              rvu_write64(rvu, blkaddr,
++                          NIX_AF_TL3_TL2X_LINKX_CFG(tl2_tl3_link_schq, link), cfg);
++      }
++
++      /* Do SMQ flush and set enqueue xoff */
++      cfg = rvu_read64(rvu, blkaddr, NIX_AF_SMQX_CFG(smq));
++      cfg |= BIT_ULL(50) | BIT_ULL(49);
++      rvu_write64(rvu, blkaddr, NIX_AF_SMQX_CFG(smq), cfg);
++
+       /* Wait for flush to complete */
+       err = rvu_poll_reg(rvu, blkaddr,
+                          NIX_AF_SMQX_CFG(smq), BIT_ULL(49), true);
+@@ -2406,6 +2430,17 @@ static int nix_smq_flush(struct rvu *rvu, int blkaddr,
+                        "NIXLF%d: SMQ%d flush failed, txlink might be busy\n",
+                        nixlf, smq);
++      /* Set NIX_AF_TL3_TL2_LINKX_CFG[ENA] for the TL3/TL2 queue */
++      for (i = 0; i < (rvu->hw->cgx_links + rvu->hw->lbk_links); i++) {
++              if (!(bmap & (1 << i)))
++                      continue;
++              cfg = rvu_read64(rvu, blkaddr,
++                               NIX_AF_TL3_TL2X_LINKX_CFG(tl2_tl3_link_schq, link));
++              cfg |= BIT_ULL(12);
++              rvu_write64(rvu, blkaddr,
++                          NIX_AF_TL3_TL2X_LINKX_CFG(tl2_tl3_link_schq, link), cfg);
++      }
++
+       /* clear XOFF on TL2s */
+       nix_smq_flush_enadis_rate(rvu, blkaddr, smq_flush_ctx, true);
+       nix_smq_flush_enadis_xoff(rvu, blkaddr, smq_flush_ctx, false);
+-- 
+2.43.0
+
diff --git a/queue-6.10/riscv-disable-preemption-while-handling-pr_riscv_ctx.patch b/queue-6.10/riscv-disable-preemption-while-handling-pr_riscv_ctx.patch
new file mode 100644 (file)
index 0000000..e1d1eab
--- /dev/null
@@ -0,0 +1,83 @@
+From 3db126dcdd5dee7030c9cd2a9062cce5842a88ff Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 3 Sep 2024 15:52:34 -0700
+Subject: riscv: Disable preemption while handling PR_RISCV_CTX_SW_FENCEI_OFF
+
+From: Charlie Jenkins <charlie@rivosinc.com>
+
+[ Upstream commit 7c1e5b9690b0e14acead4ff98d8a6c40f2dff54b ]
+
+The icache will be flushed in switch_to() if force_icache_flush is true,
+or in flush_icache_deferred() if icache_stale_mask is set. Between
+setting force_icache_flush to false and calculating the new
+icache_stale_mask, preemption needs to be disabled. There are two
+reasons for this:
+
+1. If CPU migration happens between force_icache_flush = false, and the
+   icache_stale_mask is set, an icache flush will not be emitted.
+2. smp_processor_id() is used in set_icache_stale_mask() to mark the
+   current CPU as not needing another flush since a flush will have
+   happened either by userspace or by the kernel when performing the
+   migration. smp_processor_id() is currently called twice with preemption
+   enabled which causes a race condition. It allows
+   icache_stale_mask to be populated with inconsistent CPU ids.
+
+Resolve these two issues by setting the icache_stale_mask before setting
+force_icache_flush to false, and using get_cpu()/put_cpu() to obtain the
+smp_processor_id().
+
+Signed-off-by: Charlie Jenkins <charlie@rivosinc.com>
+Fixes: 6b9391b581fd ("riscv: Include riscv_set_icache_flush_ctx prctl")
+Link: https://lore.kernel.org/r/20240903-fix_fencei_optimization-v2-1-8025f20171fc@rivosinc.com
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/riscv/mm/cacheflush.c | 12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c
+index a03c994eed3b..b81672729887 100644
+--- a/arch/riscv/mm/cacheflush.c
++++ b/arch/riscv/mm/cacheflush.c
+@@ -158,6 +158,7 @@ void __init riscv_init_cbo_blocksizes(void)
+ #ifdef CONFIG_SMP
+ static void set_icache_stale_mask(void)
+ {
++      int cpu = get_cpu();
+       cpumask_t *mask;
+       bool stale_cpu;
+@@ -168,10 +169,11 @@ static void set_icache_stale_mask(void)
+        * concurrently on different harts.
+        */
+       mask = &current->mm->context.icache_stale_mask;
+-      stale_cpu = cpumask_test_cpu(smp_processor_id(), mask);
++      stale_cpu = cpumask_test_cpu(cpu, mask);
+       cpumask_setall(mask);
+-      cpumask_assign_cpu(smp_processor_id(), mask, stale_cpu);
++      cpumask_assign_cpu(cpu, mask, stale_cpu);
++      put_cpu();
+ }
+ #endif
+@@ -239,14 +241,12 @@ int riscv_set_icache_flush_ctx(unsigned long ctx, unsigned long scope)
+       case PR_RISCV_CTX_SW_FENCEI_OFF:
+               switch (scope) {
+               case PR_RISCV_SCOPE_PER_PROCESS:
+-                      current->mm->context.force_icache_flush = false;
+-
+                       set_icache_stale_mask();
++                      current->mm->context.force_icache_flush = false;
+                       break;
+               case PR_RISCV_SCOPE_PER_THREAD:
+-                      current->thread.force_icache_flush = false;
+-
+                       set_icache_stale_mask();
++                      current->thread.force_icache_flush = false;
+                       break;
+               default:
+                       return -EINVAL;
+-- 
+2.43.0
+
diff --git a/queue-6.10/selftests-net-csum-fix-checksums-for-packets-with-no.patch b/queue-6.10/selftests-net-csum-fix-checksums-for-packets-with-no.patch
new file mode 100644 (file)
index 0000000..2b18b34
--- /dev/null
@@ -0,0 +1,79 @@
+From 760403a07ddf3f6f73307e8faf6900b6e805e9cb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 6 Sep 2024 17:07:43 -0400
+Subject: selftests: net: csum: Fix checksums for packets with non-zero padding
+
+From: Sean Anderson <sean.anderson@linux.dev>
+
+[ Upstream commit e8a63d473b49011a68a748aea1c8aefa046ebacf ]
+
+Padding is not included in UDP and TCP checksums. Therefore, reduce the
+length of the checksummed data to include only the data in the IP
+payload. This fixes spurious reported checksum failures like
+
+rx: pkt: sport=33000 len=26 csum=0xc850 verify=0xf9fe
+pkt: bad csum
+
+Technically it is possible for there to be trailing bytes after the UDP
+data but before the Ethernet padding (e.g. if sizeof(ip) + sizeof(udp) +
+udp.len < ip.len). However, we don't generate such packets.
+
+Fixes: 91a7de85600d ("selftests/net: add csum offload test")
+Signed-off-by: Sean Anderson <sean.anderson@linux.dev>
+Reviewed-by: Willem de Bruijn <willemb@google.com>
+Link: https://patch.msgid.link/20240906210743.627413-1-sean.anderson@linux.dev
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/net/lib/csum.c | 16 ++++++++++++++--
+ 1 file changed, 14 insertions(+), 2 deletions(-)
+
+diff --git a/tools/testing/selftests/net/lib/csum.c b/tools/testing/selftests/net/lib/csum.c
+index b9f3fc3c3426..e0a34e5e8dd5 100644
+--- a/tools/testing/selftests/net/lib/csum.c
++++ b/tools/testing/selftests/net/lib/csum.c
+@@ -654,10 +654,16 @@ static int recv_verify_packet_ipv4(void *nh, int len)
+ {
+       struct iphdr *iph = nh;
+       uint16_t proto = cfg_encap ? IPPROTO_UDP : cfg_proto;
++      uint16_t ip_len;
+       if (len < sizeof(*iph) || iph->protocol != proto)
+               return -1;
++      ip_len = ntohs(iph->tot_len);
++      if (ip_len > len || ip_len < sizeof(*iph))
++              return -1;
++
++      len = ip_len;
+       iph_addr_p = &iph->saddr;
+       if (proto == IPPROTO_TCP)
+               return recv_verify_packet_tcp(iph + 1, len - sizeof(*iph));
+@@ -669,16 +675,22 @@ static int recv_verify_packet_ipv6(void *nh, int len)
+ {
+       struct ipv6hdr *ip6h = nh;
+       uint16_t proto = cfg_encap ? IPPROTO_UDP : cfg_proto;
++      uint16_t ip_len;
+       if (len < sizeof(*ip6h) || ip6h->nexthdr != proto)
+               return -1;
++      ip_len = ntohs(ip6h->payload_len);
++      if (ip_len > len - sizeof(*ip6h))
++              return -1;
++
++      len = ip_len;
+       iph_addr_p = &ip6h->saddr;
+       if (proto == IPPROTO_TCP)
+-              return recv_verify_packet_tcp(ip6h + 1, len - sizeof(*ip6h));
++              return recv_verify_packet_tcp(ip6h + 1, len);
+       else
+-              return recv_verify_packet_udp(ip6h + 1, len - sizeof(*ip6h));
++              return recv_verify_packet_udp(ip6h + 1, len);
+ }
+ /* return whether auxdata includes TP_STATUS_CSUM_VALID */
+-- 
+2.43.0
+
index bebc4bcdd4915b938c11dce938403ff00e32093f..183b7697616c8582e3758a3169d88afe59898d60 100644 (file)
@@ -66,3 +66,31 @@ clk-sophgo-using-bug-instead-of-unreachable-in-mmux_.patch
 cxl-core-fix-incorrect-vendor-debug-uuid-define.patch
 cxl-restore-xor-d-position-bits-during-address-trans.patch
 selftests-bpf-support-sock_stream-in-unix_inet_redir.patch
+net-hsr-send-supervisory-frames-to-hsr-network-with-.patch
+net-hsr-remove-seqnr_lock.patch
+hwmon-pmbus-conditionally-clear-individual-status-bi.patch
+ice-fix-lldp-packets-dropping-after-changing-the-num.patch
+ice-fix-accounting-for-filters-shared-by-multiple-vs.patch
+ice-fix-vsi-lists-confusion-when-adding-vlans.patch
+igb-always-call-igb_xdp_ring_update_tail-under-tx-lo.patch
+net-mlx5-update-the-list-of-the-pci-supported-device.patch
+net-mlx5e-add-missing-link-modes-to-ptys2ethtool_map.patch
+net-mlx5e-add-missing-link-mode-to-ptys2ext_ethtool_.patch
+net-mlx5-explicitly-set-scheduling-element-and-tsar-.patch
+net-mlx5-add-missing-masks-and-qos-bit-masks-for-sch.patch
+net-mlx5-correct-tasr-typo-into-tsar.patch
+net-mlx5-verify-support-for-scheduling-element-and-t.patch
+net-mlx5-fix-bridge-mode-operations-when-there-are-n.patch
+fou-fix-initialization-of-grc.patch
+octeontx2-af-modify-smq-flush-sequence-to-drop-packe.patch
+net-ftgmac100-enable-tx-interrupt-to-avoid-tx-timeou.patch
+net-phy-dp83822-fix-null-pointer-dereference-on-dp83.patch
+selftests-net-csum-fix-checksums-for-packets-with-no.patch
+drivers-perf-fix-smp_processor_id-use-in-preemptible.patch
+riscv-disable-preemption-while-handling-pr_riscv_ctx.patch
+netfilter-nft_socket-fix-sk-refcount-leaks.patch
+netfilter-nft_socket-make-cgroupsv2-matching-work-wi.patch
+net-hsr-prevent-null-pointer-dereference-in-hsr_prox.patch
+net-dsa-felix-ignore-pending-status-of-tas-module-wh.patch
+net-dpaa-pad-packets-to-eth_zlen.patch
+netlink-specs-mptcp-fix-port-endianness.patch