--- /dev/null
+From 121fb09447aa9df6c35eda3b4aa2f7a68b939f28 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 6 Sep 2024 15:28:39 +0500
+Subject: fou: fix initialization of grc
+
+From: Muhammad Usama Anjum <usama.anjum@collabora.com>
+
+[ Upstream commit 4c8002277167125078e6b9b90137bdf443ebaa08 ]
+
+The grc must be initialize first. There can be a condition where if
+fou is NULL, goto out will be executed and grc would be used
+uninitialized.
+
+Fixes: 7e4196935069 ("fou: Fix null-ptr-deref in GRO.")
+Signed-off-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
+Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Link: https://patch.msgid.link/20240906102839.202798-1-usama.anjum@collabora.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/fou_core.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/net/ipv4/fou_core.c b/net/ipv4/fou_core.c
+index e0b8d6b17a34..4e0a7d038e21 100644
+--- a/net/ipv4/fou_core.c
++++ b/net/ipv4/fou_core.c
+@@ -336,11 +336,11 @@ static struct sk_buff *gue_gro_receive(struct sock *sk,
+ struct gro_remcsum grc;
+ u8 proto;
+
++ skb_gro_remcsum_init(&grc);
++
+ if (!fou)
+ goto out;
+
+- skb_gro_remcsum_init(&grc);
+-
+ off = skb_gro_offset(skb);
+ len = off + sizeof(*guehdr);
+
+--
+2.43.0
+
--- /dev/null
+From 203e2bd7162c1056d8dee4dd28377411a415266f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 9 Sep 2024 11:30:28 +0200
+Subject: hwmon: (pmbus) Conditionally clear individual status bits for pmbus
+ rev >= 1.2
+
+From: Patryk Biel <pbiel7@gmail.com>
+
+[ Upstream commit 20471071f198c8626dbe3951ac9834055b387844 ]
+
+The current implementation of pmbus_show_boolean assumes that all devices
+support write-back operation of status register to clear pending warnings
+or faults. Since clearing individual bits in the status registers was only
+introduced in PMBus specification 1.2, this operation may not be supported
+by some older devices. This can result in an error while reading boolean
+attributes such as temp1_max_alarm.
+
+Fetch PMBus revision supported by the device and modify pmbus_show_boolean
+so that it only tries to clear individual status bits if the device is
+compliant with PMBus specs >= 1.2. Otherwise clear all fault indicators
+on the current page after a fault status was reported.
+
+Fixes: 35f165f08950a ("hwmon: (pmbus) Clear pmbus fault/warning bits after read")
+Signed-off-by: Patryk Biel <pbiel7@gmail.com>
+Message-ID: <20240909-pmbus-status-reg-clearing-v1-1-f1c0d68c6408@gmail.com>
+[groeck:
+ Rewrote description
+ Moved revision detection code ahead of clear faults command
+ Assigned revision if return value from PMBUS_REVISION command is 0
+ Improved return value check from calling _pmbus_write_byte_data()]
+Signed-off-by: Guenter Roeck <linux@roeck-us.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/hwmon/pmbus/pmbus.h | 6 ++++++
+ drivers/hwmon/pmbus/pmbus_core.c | 17 ++++++++++++++---
+ 2 files changed, 20 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/hwmon/pmbus/pmbus.h b/drivers/hwmon/pmbus/pmbus.h
+index b0832a4c690d..76c2b364c3fe 100644
+--- a/drivers/hwmon/pmbus/pmbus.h
++++ b/drivers/hwmon/pmbus/pmbus.h
+@@ -409,6 +409,12 @@ enum pmbus_sensor_classes {
+ enum pmbus_data_format { linear = 0, ieee754, direct, vid };
+ enum vrm_version { vr11 = 0, vr12, vr13, imvp9, amd625mv };
+
++/* PMBus revision identifiers */
++#define PMBUS_REV_10 0x00 /* PMBus revision 1.0 */
++#define PMBUS_REV_11 0x11 /* PMBus revision 1.1 */
++#define PMBUS_REV_12 0x22 /* PMBus revision 1.2 */
++#define PMBUS_REV_13 0x33 /* PMBus revision 1.3 */
++
+ struct pmbus_driver_info {
+ int pages; /* Total number of pages */
+ u8 phases[PMBUS_PAGES]; /* Number of phases per page */
+diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c
+index 1363d9f89181..728c07c42651 100644
+--- a/drivers/hwmon/pmbus/pmbus_core.c
++++ b/drivers/hwmon/pmbus/pmbus_core.c
+@@ -85,6 +85,8 @@ struct pmbus_data {
+
+ u32 flags; /* from platform data */
+
++ u8 revision; /* The PMBus revision the device is compliant with */
++
+ int exponent[PMBUS_PAGES];
+ /* linear mode: exponent for output voltages */
+
+@@ -1095,9 +1097,14 @@ static int pmbus_get_boolean(struct i2c_client *client, struct pmbus_boolean *b,
+
+ regval = status & mask;
+ if (regval) {
+- ret = _pmbus_write_byte_data(client, page, reg, regval);
+- if (ret)
+- goto unlock;
++ if (data->revision >= PMBUS_REV_12) {
++ ret = _pmbus_write_byte_data(client, page, reg, regval);
++ if (ret)
++ goto unlock;
++ } else {
++ pmbus_clear_fault_page(client, page);
++ }
++
+ }
+ if (s1 && s2) {
+ s64 v1, v2;
+@@ -2640,6 +2647,10 @@ static int pmbus_init_common(struct i2c_client *client, struct pmbus_data *data,
+ data->flags |= PMBUS_WRITE_PROTECTED | PMBUS_SKIP_STATUS_CHECK;
+ }
+
++ ret = i2c_smbus_read_byte_data(client, PMBUS_REVISION);
++ if (ret >= 0)
++ data->revision = ret;
++
+ if (data->info->pages)
+ pmbus_clear_faults(client);
+ else
+--
+2.43.0
+
--- /dev/null
+From 7febc43545512929ccd647399bc64983bfcf1e0d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Sep 2023 13:07:43 +0300
+Subject: IB/mlx5: Rename 400G_8X speed to comply to naming convention
+
+From: Patrisious Haddad <phaddad@nvidia.com>
+
+[ Upstream commit b28ad32442bec2f0d9cb660d7d698a1a53c13d08 ]
+
+Rename 400G_8X speed to comply to naming convention.
+
+Signed-off-by: Patrisious Haddad <phaddad@nvidia.com>
+Reviewed-by: Mark Zhang <markzhang@nvidia.com>
+Link: https://lore.kernel.org/r/ac98447cac8379a43fbdb36d56e5fb2b741a97ff.1695204156.git.leon@kernel.org
+Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
+Signed-off-by: Leon Romanovsky <leon@kernel.org>
+Stable-dep-of: 80bf474242b2 ("net/mlx5e: Add missing link mode to ptys2ext_ethtool_map")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/infiniband/hw/mlx5/main.c | 2 +-
+ drivers/net/ethernet/mellanox/mlx5/core/port.c | 2 +-
+ include/linux/mlx5/port.h | 2 +-
+ 3 files changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
+index 45a497c0258b..2d179bc56ce6 100644
+--- a/drivers/infiniband/hw/mlx5/main.c
++++ b/drivers/infiniband/hw/mlx5/main.c
+@@ -444,7 +444,7 @@ static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u16 *active_speed,
+ *active_width = IB_WIDTH_2X;
+ *active_speed = IB_SPEED_NDR;
+ break;
+- case MLX5E_PROT_MASK(MLX5E_400GAUI_8):
++ case MLX5E_PROT_MASK(MLX5E_400GAUI_8_400GBASE_CR8):
+ *active_width = IB_WIDTH_8X;
+ *active_speed = IB_SPEED_HDR;
+ break;
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c
+index be70d1f23a5d..749f0fc2c189 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
+@@ -1098,7 +1098,7 @@ static const u32 mlx5e_ext_link_speed[MLX5E_EXT_LINK_MODES_NUMBER] = {
+ [MLX5E_CAUI_4_100GBASE_CR4_KR4] = 100000,
+ [MLX5E_100GAUI_2_100GBASE_CR2_KR2] = 100000,
+ [MLX5E_200GAUI_4_200GBASE_CR4_KR4] = 200000,
+- [MLX5E_400GAUI_8] = 400000,
++ [MLX5E_400GAUI_8_400GBASE_CR8] = 400000,
+ [MLX5E_100GAUI_1_100GBASE_CR_KR] = 100000,
+ [MLX5E_200GAUI_2_200GBASE_CR2_KR2] = 200000,
+ [MLX5E_400GAUI_4_400GBASE_CR4_KR4] = 400000,
+diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h
+index 98b2e1e149f9..5cc34216f23c 100644
+--- a/include/linux/mlx5/port.h
++++ b/include/linux/mlx5/port.h
+@@ -115,7 +115,7 @@ enum mlx5e_ext_link_mode {
+ MLX5E_100GAUI_1_100GBASE_CR_KR = 11,
+ MLX5E_200GAUI_4_200GBASE_CR4_KR4 = 12,
+ MLX5E_200GAUI_2_200GBASE_CR2_KR2 = 13,
+- MLX5E_400GAUI_8 = 15,
++ MLX5E_400GAUI_8_400GBASE_CR8 = 15,
+ MLX5E_400GAUI_4_400GBASE_CR4_KR4 = 16,
+ MLX5E_EXT_LINK_MODES_NUMBER,
+ };
+--
+2.43.0
+
--- /dev/null
+From 7c685fbc610e97e129113a48358a005458efc293 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 31 Jul 2024 09:55:55 -0700
+Subject: ice: fix accounting for filters shared by multiple VSIs
+
+From: Jacob Keller <jacob.e.keller@intel.com>
+
+[ Upstream commit e843cf7b34fe2e0c1afc55e1f3057375c9b77a14 ]
+
+When adding a switch filter (such as a MAC or VLAN filter), it is expected
+that the driver will detect the case where the filter already exists, and
+return -EEXIST. This is used by calling code such as ice_vc_add_mac_addr,
+and ice_vsi_add_vlan to avoid incrementing the accounting fields such as
+vsi->num_vlan or vf->num_mac.
+
+This logic works correctly for the case where only a single VSI has added a
+given switch filter.
+
+When a second VSI adds the same switch filter, the driver converts the
+existing filter from an ICE_FWD_TO_VSI filter into an ICE_FWD_TO_VSI_LIST
+filter. This saves switch resources, by ensuring that multiple VSIs can
+re-use the same filter.
+
+The ice_add_update_vsi_list() function is responsible for doing this
+conversion. When first converting a filter from the FWD_TO_VSI into
+FWD_TO_VSI_LIST, it checks if the VSI being added is the same as the
+existing rule's VSI. In such a case it returns -EEXIST.
+
+However, when the switch rule has already been converted to a
+FWD_TO_VSI_LIST, the logic is different. Adding a new VSI in this case just
+requires extending the VSI list entry. The logic for checking if the rule
+already exists in this case returns 0 instead of -EEXIST.
+
+This breaks the accounting logic mentioned above, so the counters for how
+many MAC and VLAN filters exist for a given VF or VSI no longer accurately
+reflect the actual count. This breaks other code which relies on these
+counts.
+
+In typical usage this primarily affects such filters generally shared by
+multiple VSIs such as VLAN 0, or broadcast and multicast MAC addresses.
+
+Fix this by correctly reporting -EEXIST in the case of adding the same VSI
+to a switch rule already converted to ICE_FWD_TO_VSI_LIST.
+
+Fixes: 9daf8208dd4d ("ice: Add support for switch filter programming")
+Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
+Tested-by: Rafal Romanowski <rafal.romanowski@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/ice/ice_switch.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c
+index 88ee2491312a..1b48fa8c435d 100644
+--- a/drivers/net/ethernet/intel/ice/ice_switch.c
++++ b/drivers/net/ethernet/intel/ice/ice_switch.c
+@@ -3072,7 +3072,7 @@ ice_add_update_vsi_list(struct ice_hw *hw,
+
+ /* A rule already exists with the new VSI being added */
+ if (test_bit(vsi_handle, m_entry->vsi_list_info->vsi_map))
+- return 0;
++ return -EEXIST;
+
+ /* Update the previously created VSI list set with
+ * the new VSI ID passed in
+--
+2.43.0
+
--- /dev/null
+From 847a0b62e574a75a299160f86feeb38971739a94 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 26 Jun 2024 11:43:42 +0200
+Subject: ice: Fix lldp packets dropping after changing the number of channels
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Martyna Szapar-Mudlaw <martyna.szapar-mudlaw@linux.intel.com>
+
+[ Upstream commit 9debb703e14939dfafa5d403f27c4feb2e9f6501 ]
+
+After vsi setup refactor commit 6624e780a577 ("ice: split ice_vsi_setup
+into smaller functions") ice_cfg_sw_lldp function which removes rx rule
+directing LLDP packets to vsi is moved from ice_vsi_release to
+ice_vsi_decfg function. ice_vsi_decfg is used in more cases than just in
+vsi_release resulting in unnecessary removal of rx lldp packets handling
+switch rule. This leads to lldp packets being dropped after a change number
+of channels via ethtool.
+This patch moves ice_cfg_sw_lldp function that removes rx lldp sw rule back
+to ice_vsi_release function.
+
+Fixes: 6624e780a577 ("ice: split ice_vsi_setup into smaller functions")
+Reported-by: Matěj Grégr <mgregr@netx.as>
+Closes: https://lore.kernel.org/intel-wired-lan/1be45a76-90af-4813-824f-8398b69745a9@netx.as/T/#u
+Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
+Signed-off-by: Martyna Szapar-Mudlaw <martyna.szapar-mudlaw@linux.intel.com>
+Tested-by: Pucha Himasekhar Reddy <himasekharx.reddy.pucha@intel.com> (A Contingent worker at Intel)
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/ice/ice_lib.c | 15 ++++++++-------
+ 1 file changed, 8 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
+index b3010a53f1b4..3a0ef56d3edc 100644
+--- a/drivers/net/ethernet/intel/ice/ice_lib.c
++++ b/drivers/net/ethernet/intel/ice/ice_lib.c
+@@ -2600,13 +2600,6 @@ void ice_vsi_decfg(struct ice_vsi *vsi)
+ struct ice_pf *pf = vsi->back;
+ int err;
+
+- /* The Rx rule will only exist to remove if the LLDP FW
+- * engine is currently stopped
+- */
+- if (!ice_is_safe_mode(pf) && vsi->type == ICE_VSI_PF &&
+- !test_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags))
+- ice_cfg_sw_lldp(vsi, false, false);
+-
+ ice_rm_vsi_lan_cfg(vsi->port_info, vsi->idx);
+ err = ice_rm_vsi_rdma_cfg(vsi->port_info, vsi->idx);
+ if (err)
+@@ -2953,6 +2946,14 @@ int ice_vsi_release(struct ice_vsi *vsi)
+ ice_rss_clean(vsi);
+
+ ice_vsi_close(vsi);
++
++ /* The Rx rule will only exist to remove if the LLDP FW
++ * engine is currently stopped
++ */
++ if (!ice_is_safe_mode(pf) && vsi->type == ICE_VSI_PF &&
++ !test_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags))
++ ice_cfg_sw_lldp(vsi, false, false);
++
+ ice_vsi_decfg(vsi);
+
+ /* retain SW VSI data structure since it is needed to unregister and
+--
+2.43.0
+
--- /dev/null
+From 3337259b21e7841263285a342511c59cc15e1bdb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 4 Sep 2024 11:39:22 +0200
+Subject: ice: fix VSI lists confusion when adding VLANs
+
+From: Michal Schmidt <mschmidt@redhat.com>
+
+[ Upstream commit d2940002b0aa42898de815a1453b29d440292386 ]
+
+The description of function ice_find_vsi_list_entry says:
+ Search VSI list map with VSI count 1
+
+However, since the blamed commit (see Fixes below), the function no
+longer checks vsi_count. This causes a problem in ice_add_vlan_internal,
+where the decision to share VSI lists between filter rules relies on the
+vsi_count of the found existing VSI list being 1.
+
+The reproducing steps:
+1. Have a PF and two VFs.
+ There will be a filter rule for VLAN 0, referring to a VSI list
+ containing VSIs: 0 (PF), 2 (VF#0), 3 (VF#1).
+2. Add VLAN 1234 to VF#0.
+ ice will make the wrong decision to share the VSI list with the new
+ rule. The wrong behavior may not be immediately apparent, but it can
+ be observed with debug prints.
+3. Add VLAN 1234 to VF#1.
+ ice will unshare the VSI list for the VLAN 1234 rule. Due to the
+ earlier bad decision, the newly created VSI list will contain
+ VSIs 0 (PF) and 3 (VF#1), instead of expected 2 (VF#0) and 3 (VF#1).
+4. Try pinging a network peer over the VLAN interface on VF#0.
+ This fails.
+
+Reproducer script at:
+https://gitlab.com/mschmidt2/repro/-/blob/master/RHEL-46814/test-vlan-vsi-list-confusion.sh
+Commented debug trace:
+https://gitlab.com/mschmidt2/repro/-/blob/master/RHEL-46814/ice-vlan-vsi-lists-debug.txt
+Patch adding the debug prints:
+https://gitlab.com/mschmidt2/linux/-/commit/f8a8814623944a45091a77c6094c40bfe726bfdb
+(Unsafe, by the way. Lacks rule_lock when dumping in ice_remove_vlan.)
+
+Michal Swiatkowski added to the explanation that the bug is caused by
+reusing a VSI list created for VLAN 0. All created VFs' VSIs are added
+to VLAN 0 filter. When a non-zero VLAN is created on a VF which is already
+in VLAN 0 (normal case), the VSI list from VLAN 0 is reused.
+It leads to a problem because all VFs (VSIs to be specific) that are
+subscribed to VLAN 0 will now receive a new VLAN tag traffic. This is
+one bug, another is the bug described above. Removing filters from
+one VF will remove VLAN filter from the previous VF. It happens a VF is
+reset. Example:
+- creation of 3 VFs
+- we have VSI list (used for VLAN 0) [0 (pf), 2 (vf1), 3 (vf2), 4 (vf3)]
+- we are adding VLAN 100 on VF1, we are reusing the previous list
+ because 2 is there
+- VLAN traffic works fine, but VLAN 100 tagged traffic can be received
+ on all VSIs from the list (for example broadcast or unicast)
+- trust is turning on VF2, VF2 is resetting, all filters from VF2 are
+ removed; the VLAN 100 filter is also removed because 3 is on the list
+- VLAN traffic to VF1 isn't working anymore, there is a need to recreate
+ VLAN interface to readd VLAN filter
+
+One thing I'm not certain about is the implications for the LAG feature,
+which is another caller of ice_find_vsi_list_entry. I don't have a
+LAG-capable card at hand to test.
+
+Fixes: 23ccae5ce15f ("ice: changes to the interface with the HW and FW for SRIOV_VF+LAG")
+Reviewed-by: Michal Swiatkowski <michal.swiatkowski@linux.intel.com>
+Signed-off-by: Michal Schmidt <mschmidt@redhat.com>
+Reviewed-by: Dave Ertman <David.m.ertman@intel.com>
+Tested-by: Rafal Romanowski <rafal.romanowski@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/ice/ice_switch.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c
+index 1b48fa8c435d..355716e6bcc8 100644
+--- a/drivers/net/ethernet/intel/ice/ice_switch.c
++++ b/drivers/net/ethernet/intel/ice/ice_switch.c
+@@ -3142,7 +3142,7 @@ ice_find_vsi_list_entry(struct ice_hw *hw, u8 recp_id, u16 vsi_handle,
+
+ list_head = &sw->recp_list[recp_id].filt_rules;
+ list_for_each_entry(list_itr, list_head, list_entry) {
+- if (list_itr->vsi_list_info) {
++ if (list_itr->vsi_count == 1 && list_itr->vsi_list_info) {
+ map_info = list_itr->vsi_list_info;
+ if (test_bit(vsi_handle, map_info->vsi_map)) {
+ *vsi_list_id = map_info->vsi_list_id;
+--
+2.43.0
+
--- /dev/null
+From a60cac960d3a741f57734c81e0fb661e3f50ecf5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 22 Aug 2024 09:42:07 +0200
+Subject: igb: Always call igb_xdp_ring_update_tail() under Tx lock
+
+From: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
+
+[ Upstream commit 27717f8b17c098c4373ddb8fe89e1a1899c7779d ]
+
+Always call igb_xdp_ring_update_tail() under __netif_tx_lock, add a comment
+and lockdep assert to indicate that. This is needed to share the same TX
+ring between XDP, XSK and slow paths. Furthermore, the current XDP
+implementation is racy on tail updates.
+
+Fixes: 9cbc948b5a20 ("igb: add XDP support")
+Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
+[Kurt: Add lockdep assert and fixes tag]
+Signed-off-by: Kurt Kanzenbach <kurt@linutronix.de>
+Acked-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
+Tested-by: George Kuruvinakunnel <george.kuruvinakunnel@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/igb/igb_main.c | 17 +++++++++++++----
+ 1 file changed, 13 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
+index fa268d7bd1bc..986bcbf0a6ab 100644
+--- a/drivers/net/ethernet/intel/igb/igb_main.c
++++ b/drivers/net/ethernet/intel/igb/igb_main.c
+@@ -33,6 +33,7 @@
+ #include <linux/bpf_trace.h>
+ #include <linux/pm_runtime.h>
+ #include <linux/etherdevice.h>
++#include <linux/lockdep.h>
+ #ifdef CONFIG_IGB_DCA
+ #include <linux/dca.h>
+ #endif
+@@ -2939,8 +2940,11 @@ static int igb_xdp(struct net_device *dev, struct netdev_bpf *xdp)
+ }
+ }
+
++/* This function assumes __netif_tx_lock is held by the caller. */
+ static void igb_xdp_ring_update_tail(struct igb_ring *ring)
+ {
++ lockdep_assert_held(&txring_txq(ring)->_xmit_lock);
++
+ /* Force memory writes to complete before letting h/w know there
+ * are new descriptors to fetch.
+ */
+@@ -3025,11 +3029,11 @@ static int igb_xdp_xmit(struct net_device *dev, int n,
+ nxmit++;
+ }
+
+- __netif_tx_unlock(nq);
+-
+ if (unlikely(flags & XDP_XMIT_FLUSH))
+ igb_xdp_ring_update_tail(tx_ring);
+
++ __netif_tx_unlock(nq);
++
+ return nxmit;
+ }
+
+@@ -8889,12 +8893,14 @@ static void igb_put_rx_buffer(struct igb_ring *rx_ring,
+
+ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
+ {
++ unsigned int total_bytes = 0, total_packets = 0;
+ struct igb_adapter *adapter = q_vector->adapter;
+ struct igb_ring *rx_ring = q_vector->rx.ring;
+- struct sk_buff *skb = rx_ring->skb;
+- unsigned int total_bytes = 0, total_packets = 0;
+ u16 cleaned_count = igb_desc_unused(rx_ring);
++ struct sk_buff *skb = rx_ring->skb;
++ int cpu = smp_processor_id();
+ unsigned int xdp_xmit = 0;
++ struct netdev_queue *nq;
+ struct xdp_buff xdp;
+ u32 frame_sz = 0;
+ int rx_buf_pgcnt;
+@@ -9022,7 +9028,10 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
+ if (xdp_xmit & IGB_XDP_TX) {
+ struct igb_ring *tx_ring = igb_xdp_tx_queue_mapping(adapter);
+
++ nq = txring_txq(tx_ring);
++ __netif_tx_lock(nq, cpu);
+ igb_xdp_ring_update_tail(tx_ring);
++ __netif_tx_unlock(nq);
+ }
+
+ u64_stats_update_begin(&rx_ring->rx_syncp);
+--
+2.43.0
+
--- /dev/null
+From 05beecce07488966cfd794c219a3b83399f1ee54 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 10 Sep 2024 10:31:44 -0400
+Subject: net: dpaa: Pad packets to ETH_ZLEN
+
+From: Sean Anderson <sean.anderson@linux.dev>
+
+[ Upstream commit cbd7ec083413c6a2e0c326d49e24ec7d12c7a9e0 ]
+
+When sending packets under 60 bytes, up to three bytes of the buffer
+following the data may be leaked. Avoid this by extending all packets to
+ETH_ZLEN, ensuring nothing is leaked in the padding. This bug can be
+reproduced by running
+
+ $ ping -s 11 destination
+
+Fixes: 9ad1a3749333 ("dpaa_eth: add support for DPAA Ethernet")
+Suggested-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Sean Anderson <sean.anderson@linux.dev>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Link: https://patch.msgid.link/20240910143144.1439910-1-sean.anderson@linux.dev
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/freescale/dpaa/dpaa_eth.c | 9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
+index c6a3eefd83bf..e7bf70ac9a4c 100644
+--- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
++++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
+@@ -2285,12 +2285,12 @@ static netdev_tx_t
+ dpaa_start_xmit(struct sk_buff *skb, struct net_device *net_dev)
+ {
+ const int queue_mapping = skb_get_queue_mapping(skb);
+- bool nonlinear = skb_is_nonlinear(skb);
+ struct rtnl_link_stats64 *percpu_stats;
+ struct dpaa_percpu_priv *percpu_priv;
+ struct netdev_queue *txq;
+ struct dpaa_priv *priv;
+ struct qm_fd fd;
++ bool nonlinear;
+ int offset = 0;
+ int err = 0;
+
+@@ -2300,6 +2300,13 @@ dpaa_start_xmit(struct sk_buff *skb, struct net_device *net_dev)
+
+ qm_fd_clear_fd(&fd);
+
++ /* Packet data is always read as 32-bit words, so zero out any part of
++ * the skb which might be sent if we have to pad the packet
++ */
++ if (__skb_put_padto(skb, ETH_ZLEN, false))
++ goto enomem;
++
++ nonlinear = skb_is_nonlinear(skb);
+ if (!nonlinear) {
+ /* We're going to store the skb backpointer at the beginning
+ * of the data buffer, so we need a privately owned skb
+--
+2.43.0
+
--- /dev/null
+From fa0f91a3956e4fda6534cefc711b6155367c8bcf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 6 Sep 2024 17:35:50 +0800
+Subject: net: dsa: felix: ignore pending status of TAS module when it's
+ disabled
+
+From: Xiaoliang Yang <xiaoliang.yang_1@nxp.com>
+
+[ Upstream commit 70654f4c212e83898feced125d91ebb3695950d8 ]
+
+The TAS module could not be configured when it's running in pending
+status. We need disable the module and configure it again. However, the
+pending status is not cleared after the module disabled. TC taprio set
+will always return busy even it's disabled.
+
+For example, a user uses tc-taprio to configure Qbv and a future
+basetime. The TAS module will run in a pending status. There is no way
+to reconfigure Qbv, it always returns busy.
+
+Actually the TAS module can be reconfigured when it's disabled. So it
+doesn't need to check the pending status if the TAS module is disabled.
+
+After the patch, user can delete the tc taprio configuration to disable
+Qbv and reconfigure it again.
+
+Fixes: de143c0e274b ("net: dsa: felix: Configure Time-Aware Scheduler via taprio offload")
+Signed-off-by: Xiaoliang Yang <xiaoliang.yang_1@nxp.com>
+Link: https://patch.msgid.link/20240906093550.29985-1-xiaoliang.yang_1@nxp.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/dsa/ocelot/felix_vsc9959.c | 11 +++++++----
+ 1 file changed, 7 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c
+index 3c5509e75a54..afb5dae4439c 100644
+--- a/drivers/net/dsa/ocelot/felix_vsc9959.c
++++ b/drivers/net/dsa/ocelot/felix_vsc9959.c
+@@ -1474,10 +1474,13 @@ static int vsc9959_qos_port_tas_set(struct ocelot *ocelot, int port,
+ /* Hardware errata - Admin config could not be overwritten if
+ * config is pending, need reset the TAS module
+ */
+- val = ocelot_read(ocelot, QSYS_PARAM_STATUS_REG_8);
+- if (val & QSYS_PARAM_STATUS_REG_8_CONFIG_PENDING) {
+- ret = -EBUSY;
+- goto err_reset_tc;
++ val = ocelot_read_rix(ocelot, QSYS_TAG_CONFIG, port);
++ if (val & QSYS_TAG_CONFIG_ENABLE) {
++ val = ocelot_read(ocelot, QSYS_PARAM_STATUS_REG_8);
++ if (val & QSYS_PARAM_STATUS_REG_8_CONFIG_PENDING) {
++ ret = -EBUSY;
++ goto err_reset_tc;
++ }
+ }
+
+ ocelot_rmw_rix(ocelot,
+--
+2.43.0
+
--- /dev/null
+From 0efd68d7df3a5d9fc371358d465d20bce4362ada Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 6 Sep 2024 14:28:31 +0800
+Subject: net: ftgmac100: Enable TX interrupt to avoid TX timeout
+
+From: Jacky Chou <jacky_chou@aspeedtech.com>
+
+[ Upstream commit fef2843bb49f414d1523ca007d088071dee0e055 ]
+
+Currently, the driver only enables RX interrupt to handle RX
+packets and TX resources. Sometimes there is not RX traffic,
+so the TX resource needs to wait for RX interrupt to free.
+This situation will toggle the TX timeout watchdog when the MAC
+TX ring has no more resources to transmit packets.
+Therefore, enable TX interrupt to release TX resources at any time.
+
+When I am verifying iperf3 over UDP, the network hangs.
+Like the log below.
+
+root# iperf3 -c 192.168.100.100 -i1 -t10 -u -b0
+Connecting to host 192.168.100.100, port 5201
+[ 4] local 192.168.100.101 port 35773 connected to 192.168.100.100 port 5201
+[ ID] Interval Transfer Bandwidth Total Datagrams
+[ 4] 0.00-20.42 sec 160 KBytes 64.2 Kbits/sec 20
+[ 4] 20.42-20.42 sec 0.00 Bytes 0.00 bits/sec 0
+[ 4] 20.42-20.42 sec 0.00 Bytes 0.00 bits/sec 0
+[ 4] 20.42-20.42 sec 0.00 Bytes 0.00 bits/sec 0
+[ 4] 20.42-20.42 sec 0.00 Bytes 0.00 bits/sec 0
+[ 4] 20.42-20.42 sec 0.00 Bytes 0.00 bits/sec 0
+[ 4] 20.42-20.42 sec 0.00 Bytes 0.00 bits/sec 0
+[ 4] 20.42-20.42 sec 0.00 Bytes 0.00 bits/sec 0
+[ 4] 20.42-20.42 sec 0.00 Bytes 0.00 bits/sec 0
+[ 4] 20.42-20.42 sec 0.00 Bytes 0.00 bits/sec 0
+- - - - - - - - - - - - - - - - - - - - - - - - -
+[ ID] Interval Transfer Bandwidth Jitter Lost/Total Datagrams
+[ 4] 0.00-20.42 sec 160 KBytes 64.2 Kbits/sec 0.000 ms 0/20 (0%)
+[ 4] Sent 20 datagrams
+iperf3: error - the server has terminated
+
+The network topology is FTGMAC connects directly to a PC.
+UDP does not need to wait for ACK, unlike TCP.
+Therefore, FTGMAC needs to enable TX interrupt to release TX resources instead
+of waiting for the RX interrupt.
+
+Fixes: 10cbd6407609 ("ftgmac100: Rework NAPI & interrupts handling")
+Signed-off-by: Jacky Chou <jacky_chou@aspeedtech.com>
+Link: https://patch.msgid.link/20240906062831.2243399-1-jacky_chou@aspeedtech.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/faraday/ftgmac100.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/faraday/ftgmac100.h b/drivers/net/ethernet/faraday/ftgmac100.h
+index 63b3e02fab16..4968f6f0bdbc 100644
+--- a/drivers/net/ethernet/faraday/ftgmac100.h
++++ b/drivers/net/ethernet/faraday/ftgmac100.h
+@@ -84,7 +84,7 @@
+ FTGMAC100_INT_RPKT_BUF)
+
+ /* All the interrupts we care about */
+-#define FTGMAC100_INT_ALL (FTGMAC100_INT_RPKT_BUF | \
++#define FTGMAC100_INT_ALL (FTGMAC100_INT_RXTX | \
+ FTGMAC100_INT_BAD)
+
+ /*
+--
+2.43.0
+
--- /dev/null
+From 543884f2772cb5abb9946b2ca35f9f2fa8d5764d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 5 Aug 2024 10:03:20 +0300
+Subject: net/mlx5: Add missing masks and QoS bit masks for scheduling elements
+
+From: Carolina Jubran <cjubran@nvidia.com>
+
+[ Upstream commit 452ef7f86036392005940de54228d42ca0044192 ]
+
+Add the missing masks for supported element types and Transmit
+Scheduling Arbiter (TSAR) types in scheduling elements.
+
+Also, add the corresponding bit masks for these types in the QoS
+capabilities of a NIC scheduler.
+
+Fixes: 214baf22870c ("net/mlx5e: Support HTB offload")
+Signed-off-by: Carolina Jubran <cjubran@nvidia.com>
+Reviewed-by: Cosmin Ratiu <cratiu@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/mlx5/mlx5_ifc.h | 10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
+index 3d1cd726df34..0e20d7109028 100644
+--- a/include/linux/mlx5/mlx5_ifc.h
++++ b/include/linux/mlx5/mlx5_ifc.h
+@@ -1010,7 +1010,8 @@ struct mlx5_ifc_qos_cap_bits {
+
+ u8 max_tsar_bw_share[0x20];
+
+- u8 reserved_at_100[0x20];
++ u8 nic_element_type[0x10];
++ u8 nic_tsar_type[0x10];
+
+ u8 reserved_at_120[0x3];
+ u8 log_meter_aso_granularity[0x5];
+@@ -3847,6 +3848,7 @@ enum {
+ ELEMENT_TYPE_CAP_MASK_VPORT = 1 << 1,
+ ELEMENT_TYPE_CAP_MASK_VPORT_TC = 1 << 2,
+ ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC = 1 << 3,
++ ELEMENT_TYPE_CAP_MASK_QUEUE_GROUP = 1 << 4,
+ };
+
+ struct mlx5_ifc_scheduling_context_bits {
+@@ -4546,6 +4548,12 @@ enum {
+ TSAR_ELEMENT_TSAR_TYPE_ETS = 0x2,
+ };
+
++enum {
++ TSAR_TYPE_CAP_MASK_DWRR = 1 << 0,
++ TSAR_TYPE_CAP_MASK_ROUND_ROBIN = 1 << 1,
++ TSAR_TYPE_CAP_MASK_ETS = 1 << 2,
++};
++
+ struct mlx5_ifc_tsar_element_bits {
+ u8 reserved_at_0[0x8];
+ u8 tsar_type[0x8];
+--
+2.43.0
+
--- /dev/null
+From 9a4d6ae060356c1ac1b08c21d914e9c2244807bf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 14 Jun 2024 00:00:31 +0300
+Subject: net/mlx5: Correct TASR typo into TSAR
+
+From: Cosmin Ratiu <cratiu@nvidia.com>
+
+[ Upstream commit e575d3a6dd22123888defb622b1742aa2d45b942 ]
+
+TSAR is the correct spelling (Transmit Scheduling ARbiter).
+
+Signed-off-by: Cosmin Ratiu <cratiu@nvidia.com>
+Reviewed-by: Gal Pressman <gal@nvidia.com>
+Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
+Link: https://lore.kernel.org/r/20240613210036.1125203-2-tariqt@nvidia.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: 861cd9b9cb62 ("net/mlx5: Verify support for scheduling element and TSAR type")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c | 2 +-
+ include/linux/mlx5/mlx5_ifc.h | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
+index 627cdb072573..f4cd6bb1870d 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
+@@ -537,7 +537,7 @@ static bool esw_qos_element_type_supported(struct mlx5_core_dev *dev, int type)
+ switch (type) {
+ case SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR:
+ return MLX5_CAP_QOS(dev, esw_element_type) &
+- ELEMENT_TYPE_CAP_MASK_TASR;
++ ELEMENT_TYPE_CAP_MASK_TSAR;
+ case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT:
+ return MLX5_CAP_QOS(dev, esw_element_type) &
+ ELEMENT_TYPE_CAP_MASK_VPORT;
+diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
+index 0e20d7109028..9106771bb92f 100644
+--- a/include/linux/mlx5/mlx5_ifc.h
++++ b/include/linux/mlx5/mlx5_ifc.h
+@@ -3844,7 +3844,7 @@ enum {
+ };
+
+ enum {
+- ELEMENT_TYPE_CAP_MASK_TASR = 1 << 0,
++ ELEMENT_TYPE_CAP_MASK_TSAR = 1 << 0,
+ ELEMENT_TYPE_CAP_MASK_VPORT = 1 << 1,
+ ELEMENT_TYPE_CAP_MASK_VPORT_TC = 1 << 2,
+ ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC = 1 << 3,
+--
+2.43.0
+
--- /dev/null
+From e7f3e3eeec442fb468d8ac07d243c8324c517781 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 2 Sep 2024 11:46:14 +0300
+Subject: net/mlx5: Explicitly set scheduling element and TSAR type
+
+From: Carolina Jubran <cjubran@nvidia.com>
+
+[ Upstream commit c88146abe4d0f8cf659b2b8883fdc33936d2e3b8 ]
+
+Ensure the scheduling element type and TSAR type are explicitly
+initialized in the QoS rate group creation.
+
+This prevents potential issues due to default values.
+
+Fixes: 1ae258f8b343 ("net/mlx5: E-switch, Introduce rate limiting groups API")
+Signed-off-by: Carolina Jubran <cjubran@nvidia.com>
+Reviewed-by: Cosmin Ratiu <cratiu@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
+index 1887a24ee414..627cdb072573 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
+@@ -420,6 +420,7 @@ __esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *ex
+ {
+ u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
+ struct mlx5_esw_rate_group *group;
++ __be32 *attr;
+ u32 divider;
+ int err;
+
+@@ -427,6 +428,12 @@ __esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *ex
+ if (!group)
+ return ERR_PTR(-ENOMEM);
+
++ MLX5_SET(scheduling_context, tsar_ctx, element_type,
++ SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR);
++
++ attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes);
++ *attr = cpu_to_be32(TSAR_ELEMENT_TSAR_TYPE_DWRR << 16);
++
+ MLX5_SET(scheduling_context, tsar_ctx, parent_element_id,
+ esw->qos.root_tsar_ix);
+ err = mlx5_create_scheduling_element_cmd(esw->dev,
+--
+2.43.0
+
--- /dev/null
+From 60a9c60ab71b5bbea99fc5da76d5ecbd820a41ba Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 30 Aug 2024 08:39:27 -0400
+Subject: net/mlx5: Fix bridge mode operations when there are no VFs
+
+From: Benjamin Poirier <bpoirier@nvidia.com>
+
+[ Upstream commit b1d305abef4640af1b4f1b4774d513cd81b10cfc ]
+
+Currently, trying to set the bridge mode attribute when numvfs=0 leads to a
+crash:
+
+bridge link set dev eth2 hwmode vepa
+
+[ 168.967392] BUG: kernel NULL pointer dereference, address: 0000000000000030
+[...]
+[ 168.969989] RIP: 0010:mlx5_add_flow_rules+0x1f/0x300 [mlx5_core]
+[...]
+[ 168.976037] Call Trace:
+[ 168.976188] <TASK>
+[ 168.978620] _mlx5_eswitch_set_vepa_locked+0x113/0x230 [mlx5_core]
+[ 168.979074] mlx5_eswitch_set_vepa+0x7f/0xa0 [mlx5_core]
+[ 168.979471] rtnl_bridge_setlink+0xe9/0x1f0
+[ 168.979714] rtnetlink_rcv_msg+0x159/0x400
+[ 168.980451] netlink_rcv_skb+0x54/0x100
+[ 168.980675] netlink_unicast+0x241/0x360
+[ 168.980918] netlink_sendmsg+0x1f6/0x430
+[ 168.981162] ____sys_sendmsg+0x3bb/0x3f0
+[ 168.982155] ___sys_sendmsg+0x88/0xd0
+[ 168.985036] __sys_sendmsg+0x59/0xa0
+[ 168.985477] do_syscall_64+0x79/0x150
+[ 168.987273] entry_SYSCALL_64_after_hwframe+0x76/0x7e
+[ 168.987773] RIP: 0033:0x7f8f7950f917
+
+(esw->fdb_table.legacy.vepa_fdb is null)
+
+The bridge mode is only relevant when there are multiple functions per
+port. Therefore, prevent setting and getting this setting when there are no
+VFs.
+
+Note that after this change, there are no settings to change on the PF
+interface using `bridge link` when there are no VFs, so the interface no
+longer appears in the `bridge link` output.
+
+Fixes: 4b89251de024 ("net/mlx5: Support ndo bridge_setlink and getlink")
+Signed-off-by: Benjamin Poirier <bpoirier@nvidia.com>
+Reviewed-by: Cosmin Ratiu <cratiu@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c
+index 255bc8b749f9..8587cd572da5 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c
+@@ -319,7 +319,7 @@ int mlx5_eswitch_set_vepa(struct mlx5_eswitch *esw, u8 setting)
+ return -EPERM;
+
+ mutex_lock(&esw->state_lock);
+- if (esw->mode != MLX5_ESWITCH_LEGACY) {
++ if (esw->mode != MLX5_ESWITCH_LEGACY || !mlx5_esw_is_fdb_created(esw)) {
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+@@ -339,7 +339,7 @@ int mlx5_eswitch_get_vepa(struct mlx5_eswitch *esw, u8 *setting)
+ if (!mlx5_esw_allowed(esw))
+ return -EPERM;
+
+- if (esw->mode != MLX5_ESWITCH_LEGACY)
++ if (esw->mode != MLX5_ESWITCH_LEGACY || !mlx5_esw_is_fdb_created(esw))
+ return -EOPNOTSUPP;
+
+ *setting = esw->fdb_table.legacy.vepa_uplink_rule ? 1 : 0;
+--
+2.43.0
+
--- /dev/null
+From d5b0f2b37513f36d670a91ac27dfe0943b83ed95 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 15 Aug 2024 11:02:34 +0300
+Subject: net/mlx5: Update the list of the PCI supported devices
+
+From: Maher Sanalla <msanalla@nvidia.com>
+
+[ Upstream commit 7472d157cb8014103105433bcc0705af2e6f7184 ]
+
+Add the upcoming ConnectX-9 device ID to the table of supported
+PCI device IDs.
+
+Fixes: f908a35b2218 ("net/mlx5: Update the list of the PCI supported devices")
+Signed-off-by: Maher Sanalla <msanalla@nvidia.com>
+Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/main.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
+index 11f11248feb8..96136229b1b0 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
+@@ -2205,6 +2205,7 @@ static const struct pci_device_id mlx5_core_pci_table[] = {
+ { PCI_VDEVICE(MELLANOX, 0x101f) }, /* ConnectX-6 LX */
+ { PCI_VDEVICE(MELLANOX, 0x1021) }, /* ConnectX-7 */
+ { PCI_VDEVICE(MELLANOX, 0x1023) }, /* ConnectX-8 */
++ { PCI_VDEVICE(MELLANOX, 0x1025) }, /* ConnectX-9 */
+ { PCI_VDEVICE(MELLANOX, 0xa2d2) }, /* BlueField integrated ConnectX-5 network controller */
+ { PCI_VDEVICE(MELLANOX, 0xa2d3), MLX5_PCI_DEV_IS_VF}, /* BlueField integrated ConnectX-5 network controller VF */
+ { PCI_VDEVICE(MELLANOX, 0xa2d6) }, /* BlueField-2 integrated ConnectX-6 Dx network controller */
+--
+2.43.0
+
--- /dev/null
+From 405b329a60ff74f77aa8b11eb2b51927fbff679a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 5 Aug 2024 13:13:03 +0300
+Subject: net/mlx5: Verify support for scheduling element and TSAR type
+
+From: Carolina Jubran <cjubran@nvidia.com>
+
+[ Upstream commit 861cd9b9cb62feb244b8d77e68fd6ddedbbf66e9 ]
+
+Before creating a scheduling element in a NIC or E-Switch scheduler,
+ensure that the requested element type is supported. If the element is
+of type Transmit Scheduling Arbiter (TSAR), also verify that the
+specific TSAR type is supported.
+
+Fixes: 214baf22870c ("net/mlx5e: Support HTB offload")
+Fixes: 85c5f7c9200e ("net/mlx5: E-switch, Create QoS on demand")
+Fixes: 0fe132eac38c ("net/mlx5: E-switch, Allow to add vports to rate groups")
+Signed-off-by: Carolina Jubran <cjubran@nvidia.com>
+Reviewed-by: Cosmin Ratiu <cratiu@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../net/ethernet/mellanox/mlx5/core/esw/qos.c | 44 ++++++++++---------
+ drivers/net/ethernet/mellanox/mlx5/core/qos.c | 7 +++
+ 2 files changed, 31 insertions(+), 20 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
+index f4cd6bb1870d..cc0f2be21a26 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
+@@ -311,6 +311,25 @@ static int esw_qos_set_group_max_rate(struct mlx5_eswitch *esw,
+ return err;
+ }
+
++static bool esw_qos_element_type_supported(struct mlx5_core_dev *dev, int type)
++{
++ switch (type) {
++ case SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR:
++ return MLX5_CAP_QOS(dev, esw_element_type) &
++ ELEMENT_TYPE_CAP_MASK_TSAR;
++ case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT:
++ return MLX5_CAP_QOS(dev, esw_element_type) &
++ ELEMENT_TYPE_CAP_MASK_VPORT;
++ case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC:
++ return MLX5_CAP_QOS(dev, esw_element_type) &
++ ELEMENT_TYPE_CAP_MASK_VPORT_TC;
++ case SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC:
++ return MLX5_CAP_QOS(dev, esw_element_type) &
++ ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC;
++ }
++ return false;
++}
++
+ static int esw_qos_vport_create_sched_element(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport,
+ u32 max_rate, u32 bw_share)
+@@ -322,6 +341,9 @@ static int esw_qos_vport_create_sched_element(struct mlx5_eswitch *esw,
+ void *vport_elem;
+ int err;
+
++ if (!esw_qos_element_type_supported(dev, SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT))
++ return -EOPNOTSUPP;
++
+ parent_tsar_ix = group ? group->tsar_ix : esw->qos.root_tsar_ix;
+ MLX5_SET(scheduling_context, sched_ctx, element_type,
+ SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
+@@ -532,25 +554,6 @@ static int esw_qos_destroy_rate_group(struct mlx5_eswitch *esw,
+ return err;
+ }
+
+-static bool esw_qos_element_type_supported(struct mlx5_core_dev *dev, int type)
+-{
+- switch (type) {
+- case SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR:
+- return MLX5_CAP_QOS(dev, esw_element_type) &
+- ELEMENT_TYPE_CAP_MASK_TSAR;
+- case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT:
+- return MLX5_CAP_QOS(dev, esw_element_type) &
+- ELEMENT_TYPE_CAP_MASK_VPORT;
+- case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC:
+- return MLX5_CAP_QOS(dev, esw_element_type) &
+- ELEMENT_TYPE_CAP_MASK_VPORT_TC;
+- case SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC:
+- return MLX5_CAP_QOS(dev, esw_element_type) &
+- ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC;
+- }
+- return false;
+-}
+-
+ static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
+ {
+ u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
+@@ -561,7 +564,8 @@ static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack *exta
+ if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
+ return -EOPNOTSUPP;
+
+- if (!esw_qos_element_type_supported(dev, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR))
++ if (!esw_qos_element_type_supported(dev, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR) ||
++ !(MLX5_CAP_QOS(dev, esw_tsar_type) & TSAR_TYPE_CAP_MASK_DWRR))
+ return -EOPNOTSUPP;
+
+ MLX5_SET(scheduling_context, tsar_ctx, element_type,
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/qos.c
+index 8bce730b5c5b..db2bd3ad63ba 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/qos.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/qos.c
+@@ -28,6 +28,9 @@ int mlx5_qos_create_leaf_node(struct mlx5_core_dev *mdev, u32 parent_id,
+ {
+ u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0};
+
++ if (!(MLX5_CAP_QOS(mdev, nic_element_type) & ELEMENT_TYPE_CAP_MASK_QUEUE_GROUP))
++ return -EOPNOTSUPP;
++
+ MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_id);
+ MLX5_SET(scheduling_context, sched_ctx, element_type,
+ SCHEDULING_CONTEXT_ELEMENT_TYPE_QUEUE_GROUP);
+@@ -44,6 +47,10 @@ int mlx5_qos_create_inner_node(struct mlx5_core_dev *mdev, u32 parent_id,
+ u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0};
+ void *attr;
+
++ if (!(MLX5_CAP_QOS(mdev, nic_element_type) & ELEMENT_TYPE_CAP_MASK_TSAR) ||
++ !(MLX5_CAP_QOS(mdev, nic_tsar_type) & TSAR_TYPE_CAP_MASK_DWRR))
++ return -EOPNOTSUPP;
++
+ MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_id);
+ MLX5_SET(scheduling_context, sched_ctx, element_type,
+ SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR);
+--
+2.43.0
+
--- /dev/null
+From 86f66441a18fa085b688aa05db4cb490ca699143 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 11 Aug 2024 13:58:04 +0300
+Subject: net/mlx5e: Add missing link mode to ptys2ext_ethtool_map
+
+From: Shahar Shitrit <shshitrit@nvidia.com>
+
+[ Upstream commit 80bf474242b21d64a514fd2bb65faa7a17ca8d8d ]
+
+Add MLX5E_400GAUI_8_400GBASE_CR8 to the extended modes
+in ptys2ext_ethtool_table, since it was missing.
+
+Fixes: 6a897372417e ("net/mlx5: ethtool, Add ethtool support for 50Gbps per lane link modes")
+Signed-off-by: Shahar Shitrit <shshitrit@nvidia.com>
+Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
+Reviewed-by: Carolina Jubran <cjubran@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+index f973314b1724..54379297a748 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+@@ -205,6 +205,12 @@ void mlx5e_build_ptys2ethtool_map(void)
+ ETHTOOL_LINK_MODE_200000baseLR4_ER4_FR4_Full_BIT,
+ ETHTOOL_LINK_MODE_200000baseDR4_Full_BIT,
+ ETHTOOL_LINK_MODE_200000baseCR4_Full_BIT);
++ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_400GAUI_8_400GBASE_CR8, ext,
++ ETHTOOL_LINK_MODE_400000baseKR8_Full_BIT,
++ ETHTOOL_LINK_MODE_400000baseSR8_Full_BIT,
++ ETHTOOL_LINK_MODE_400000baseLR8_ER8_FR8_Full_BIT,
++ ETHTOOL_LINK_MODE_400000baseDR8_Full_BIT,
++ ETHTOOL_LINK_MODE_400000baseCR8_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GAUI_1_100GBASE_CR_KR, ext,
+ ETHTOOL_LINK_MODE_100000baseKR_Full_BIT,
+ ETHTOOL_LINK_MODE_100000baseSR_Full_BIT,
+--
+2.43.0
+
--- /dev/null
+From dd6513d8f9d839af8f04ad109a94b9c387a87a27 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 11 Aug 2024 13:56:13 +0300
+Subject: net/mlx5e: Add missing link modes to ptys2ethtool_map
+
+From: Shahar Shitrit <shshitrit@nvidia.com>
+
+[ Upstream commit 7617d62cba4a8a3ff3ed3fda0171c43f135c142e ]
+
+Add MLX5E_1000BASE_T and MLX5E_100BASE_TX to the legacy
+modes in ptys2legacy_ethtool_table, since they were missing.
+
+Fixes: 665bc53969d7 ("net/mlx5e: Use new ethtool get/set link ksettings API")
+Signed-off-by: Shahar Shitrit <shshitrit@nvidia.com>
+Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
+Reviewed-by: Carolina Jubran <cjubran@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+index 50db127e6371..f973314b1724 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+@@ -136,6 +136,10 @@ void mlx5e_build_ptys2ethtool_map(void)
+ ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_LR4, legacy,
+ ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT);
++ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100BASE_TX, legacy,
++ ETHTOOL_LINK_MODE_100baseT_Full_BIT);
++ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_1000BASE_T, legacy,
++ ETHTOOL_LINK_MODE_1000baseT_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_T, legacy,
+ ETHTOOL_LINK_MODE_10000baseT_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_CR, legacy,
+--
+2.43.0
+
--- /dev/null
+From 82043c23763a5a619b0222591327d1c55c106b39 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 5 Sep 2024 12:54:46 +0200
+Subject: netfilter: nft_socket: fix sk refcount leaks
+
+From: Florian Westphal <fw@strlen.de>
+
+[ Upstream commit 8b26ff7af8c32cb4148b3e147c52f9e4c695209c ]
+
+We must put 'sk' reference before returning.
+
+Fixes: 039b1f4f24ec ("netfilter: nft_socket: fix erroneous socket assignment")
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nft_socket.c | 7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c
+index f30163e2ca62..765ffd6e06bc 100644
+--- a/net/netfilter/nft_socket.c
++++ b/net/netfilter/nft_socket.c
+@@ -110,13 +110,13 @@ static void nft_socket_eval(const struct nft_expr *expr,
+ *dest = READ_ONCE(sk->sk_mark);
+ } else {
+ regs->verdict.code = NFT_BREAK;
+- return;
++ goto out_put_sk;
+ }
+ break;
+ case NFT_SOCKET_WILDCARD:
+ if (!sk_fullsock(sk)) {
+ regs->verdict.code = NFT_BREAK;
+- return;
++ goto out_put_sk;
+ }
+ nft_socket_wildcard(pkt, regs, sk, dest);
+ break;
+@@ -124,7 +124,7 @@ static void nft_socket_eval(const struct nft_expr *expr,
+ case NFT_SOCKET_CGROUPV2:
+ if (!nft_sock_get_eval_cgroupv2(dest, sk, pkt, priv->level)) {
+ regs->verdict.code = NFT_BREAK;
+- return;
++ goto out_put_sk;
+ }
+ break;
+ #endif
+@@ -133,6 +133,7 @@ static void nft_socket_eval(const struct nft_expr *expr,
+ regs->verdict.code = NFT_BREAK;
+ }
+
++out_put_sk:
+ if (sk != skb->sk)
+ sock_gen_put(sk);
+ }
+--
+2.43.0
+
--- /dev/null
+From 5dd0738faba0150678d02eb8e04277fa8c242a77 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 7 Sep 2024 16:07:49 +0200
+Subject: netfilter: nft_socket: make cgroupsv2 matching work with namespaces
+
+From: Florian Westphal <fw@strlen.de>
+
+[ Upstream commit 7f3287db654395f9c5ddd246325ff7889f550286 ]
+
+When running in container environmment, /sys/fs/cgroup/ might not be
+the real root node of the sk-attached cgroup.
+
+Example:
+
+In container:
+% stat /sys//fs/cgroup/
+Device: 0,21 Inode: 2214 ..
+% stat /sys/fs/cgroup/foo
+Device: 0,21 Inode: 2264 ..
+
+The expectation would be for:
+
+ nft add rule .. socket cgroupv2 level 1 "foo" counter
+
+to match traffic from a process that got added to "foo" via
+"echo $pid > /sys/fs/cgroup/foo/cgroup.procs".
+
+However, 'level 3' is needed to make this work.
+
+Seen from initial namespace, the complete hierarchy is:
+
+% stat /sys/fs/cgroup/system.slice/docker-.../foo
+ Device: 0,21 Inode: 2264 ..
+
+i.e. hierarchy is
+0 1 2 3
+/ -> system.slice -> docker-1... -> foo
+
+... but the container doesn't know that its "/" is the "docker-1.."
+cgroup. Current code will retrieve the 'system.slice' cgroup node
+and store its kn->id in the destination register, so compare with
+2264 ("foo" cgroup id) will not match.
+
+Fetch "/" cgroup from ->init() and add its level to the level we try to
+extract. cgroup root-level is 0 for the init-namespace or the level
+of the ancestor that is exposed as the cgroup root inside the container.
+
+In the above case, cgrp->level of "/" resolved in the container is 2
+(docker-1...scope/) and request for 'level 1' will get adjusted
+to fetch the actual level (3).
+
+v2: use CONFIG_SOCK_CGROUP_DATA, eval function depends on it.
+ (kernel test robot)
+
+Cc: cgroups@vger.kernel.org
+Fixes: e0bb96db96f8 ("netfilter: nft_socket: add support for cgroupsv2")
+Reported-by: Nadia Pinaeva <n.m.pinaeva@gmail.com>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nft_socket.c | 41 +++++++++++++++++++++++++++++++++++---
+ 1 file changed, 38 insertions(+), 3 deletions(-)
+
+diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c
+index 765ffd6e06bc..12cdff640492 100644
+--- a/net/netfilter/nft_socket.c
++++ b/net/netfilter/nft_socket.c
+@@ -9,7 +9,8 @@
+
+ struct nft_socket {
+ enum nft_socket_keys key:8;
+- u8 level;
++ u8 level; /* cgroupv2 level to extract */
++ u8 level_user; /* cgroupv2 level provided by userspace */
+ u8 len;
+ union {
+ u8 dreg;
+@@ -53,6 +54,28 @@ nft_sock_get_eval_cgroupv2(u32 *dest, struct sock *sk, const struct nft_pktinfo
+ memcpy(dest, &cgid, sizeof(u64));
+ return true;
+ }
++
++/* process context only, uses current->nsproxy. */
++static noinline int nft_socket_cgroup_subtree_level(void)
++{
++ struct cgroup *cgrp = cgroup_get_from_path("/");
++ int level;
++
++ if (!cgrp)
++ return -ENOENT;
++
++ level = cgrp->level;
++
++ cgroup_put(cgrp);
++
++ if (WARN_ON_ONCE(level > 255))
++ return -ERANGE;
++
++ if (WARN_ON_ONCE(level < 0))
++ return -EINVAL;
++
++ return level;
++}
+ #endif
+
+ static struct sock *nft_socket_do_lookup(const struct nft_pktinfo *pkt)
+@@ -174,9 +197,10 @@ static int nft_socket_init(const struct nft_ctx *ctx,
+ case NFT_SOCKET_MARK:
+ len = sizeof(u32);
+ break;
+-#ifdef CONFIG_CGROUPS
++#ifdef CONFIG_SOCK_CGROUP_DATA
+ case NFT_SOCKET_CGROUPV2: {
+ unsigned int level;
++ int err;
+
+ if (!tb[NFTA_SOCKET_LEVEL])
+ return -EINVAL;
+@@ -185,6 +209,17 @@ static int nft_socket_init(const struct nft_ctx *ctx,
+ if (level > 255)
+ return -EOPNOTSUPP;
+
++ err = nft_socket_cgroup_subtree_level();
++ if (err < 0)
++ return err;
++
++ priv->level_user = level;
++
++ level += err;
++ /* Implies a giant cgroup tree */
++ if (WARN_ON_ONCE(level > 255))
++ return -EOPNOTSUPP;
++
+ priv->level = level;
+ len = sizeof(u64);
+ break;
+@@ -209,7 +244,7 @@ static int nft_socket_dump(struct sk_buff *skb,
+ if (nft_dump_register(skb, NFTA_SOCKET_DREG, priv->dreg))
+ return -1;
+ if (priv->key == NFT_SOCKET_CGROUPV2 &&
+- nla_put_be32(skb, NFTA_SOCKET_LEVEL, htonl(priv->level)))
++ nla_put_be32(skb, NFTA_SOCKET_LEVEL, htonl(priv->level_user)))
+ return -1;
+ return 0;
+ }
+--
+2.43.0
+
--- /dev/null
+From 26676598870fd0c364fb98e5ccf50223b6f7e22e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 6 Sep 2024 10:28:38 +0530
+Subject: octeontx2-af: Modify SMQ flush sequence to drop packets
+
+From: Naveen Mamindlapalli <naveenm@marvell.com>
+
+[ Upstream commit 019aba04f08c2102b35ce7fee9d4628d349f56c0 ]
+
+The current implementation of SMQ flush sequence waits for the packets
+in the TM pipeline to be transmitted out of the link. This sequence
+doesn't succeed in HW when there is any issue with link such as lack of
+link credits, link down or any other traffic that is fully occupying the
+link bandwidth (QoS). This patch modifies the SMQ flush sequence to
+drop the packets after TL1 level (SQM) instead of polling for the packets
+to be sent out of RPM/CGX link.
+
+Fixes: 5d9b976d4480 ("octeontx2-af: Support fixed transmit scheduler topology")
+Signed-off-by: Naveen Mamindlapalli <naveenm@marvell.com>
+Reviewed-by: Sunil Kovvuri Goutham <sgoutham@marvell.com>
+Link: https://patch.msgid.link/20240906045838.1620308-1-naveenm@marvell.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../net/ethernet/marvell/octeontx2/af/rvu.h | 3 +-
+ .../ethernet/marvell/octeontx2/af/rvu_nix.c | 59 +++++++++++++++----
+ 2 files changed, 48 insertions(+), 14 deletions(-)
+
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
+index 185c296eaaf0..e81cfcaf9ce4 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
+@@ -290,6 +290,7 @@ struct nix_mark_format {
+
+ /* smq(flush) to tl1 cir/pir info */
+ struct nix_smq_tree_ctx {
++ u16 schq;
+ u64 cir_off;
+ u64 cir_val;
+ u64 pir_off;
+@@ -299,8 +300,6 @@ struct nix_smq_tree_ctx {
+ /* smq flush context */
+ struct nix_smq_flush_ctx {
+ int smq;
+- u16 tl1_schq;
+- u16 tl2_schq;
+ struct nix_smq_tree_ctx smq_tree_ctx[NIX_TXSCH_LVL_CNT];
+ };
+
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+index a07e5c8786c4..224a025283ca 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+@@ -2146,14 +2146,13 @@ static void nix_smq_flush_fill_ctx(struct rvu *rvu, int blkaddr, int smq,
+ schq = smq;
+ for (lvl = NIX_TXSCH_LVL_SMQ; lvl <= NIX_TXSCH_LVL_TL1; lvl++) {
+ smq_tree_ctx = &smq_flush_ctx->smq_tree_ctx[lvl];
++ smq_tree_ctx->schq = schq;
+ if (lvl == NIX_TXSCH_LVL_TL1) {
+- smq_flush_ctx->tl1_schq = schq;
+ smq_tree_ctx->cir_off = NIX_AF_TL1X_CIR(schq);
+ smq_tree_ctx->pir_off = 0;
+ smq_tree_ctx->pir_val = 0;
+ parent_off = 0;
+ } else if (lvl == NIX_TXSCH_LVL_TL2) {
+- smq_flush_ctx->tl2_schq = schq;
+ smq_tree_ctx->cir_off = NIX_AF_TL2X_CIR(schq);
+ smq_tree_ctx->pir_off = NIX_AF_TL2X_PIR(schq);
+ parent_off = NIX_AF_TL2X_PARENT(schq);
+@@ -2188,8 +2187,8 @@ static void nix_smq_flush_enadis_xoff(struct rvu *rvu, int blkaddr,
+ {
+ struct nix_txsch *txsch;
+ struct nix_hw *nix_hw;
++ int tl2, tl2_schq;
+ u64 regoff;
+- int tl2;
+
+ nix_hw = get_nix_hw(rvu->hw, blkaddr);
+ if (!nix_hw)
+@@ -2197,16 +2196,17 @@ static void nix_smq_flush_enadis_xoff(struct rvu *rvu, int blkaddr,
+
+ /* loop through all TL2s with matching PF_FUNC */
+ txsch = &nix_hw->txsch[NIX_TXSCH_LVL_TL2];
++ tl2_schq = smq_flush_ctx->smq_tree_ctx[NIX_TXSCH_LVL_TL2].schq;
+ for (tl2 = 0; tl2 < txsch->schq.max; tl2++) {
+ /* skip the smq(flush) TL2 */
+- if (tl2 == smq_flush_ctx->tl2_schq)
++ if (tl2 == tl2_schq)
+ continue;
+ /* skip unused TL2s */
+ if (TXSCH_MAP_FLAGS(txsch->pfvf_map[tl2]) & NIX_TXSCHQ_FREE)
+ continue;
+ /* skip if PF_FUNC doesn't match */
+ if ((TXSCH_MAP_FUNC(txsch->pfvf_map[tl2]) & ~RVU_PFVF_FUNC_MASK) !=
+- (TXSCH_MAP_FUNC(txsch->pfvf_map[smq_flush_ctx->tl2_schq] &
++ (TXSCH_MAP_FUNC(txsch->pfvf_map[tl2_schq] &
+ ~RVU_PFVF_FUNC_MASK)))
+ continue;
+ /* enable/disable XOFF */
+@@ -2248,10 +2248,12 @@ static int nix_smq_flush(struct rvu *rvu, int blkaddr,
+ int smq, u16 pcifunc, int nixlf)
+ {
+ struct nix_smq_flush_ctx *smq_flush_ctx;
++ int err, restore_tx_en = 0, i;
+ int pf = rvu_get_pf(pcifunc);
+ u8 cgx_id = 0, lmac_id = 0;
+- int err, restore_tx_en = 0;
+- u64 cfg;
++ u16 tl2_tl3_link_schq;
++ u8 link, link_level;
++ u64 cfg, bmap = 0;
+
+ if (!is_rvu_otx2(rvu)) {
+ /* Skip SMQ flush if pkt count is zero */
+@@ -2275,16 +2277,38 @@ static int nix_smq_flush(struct rvu *rvu, int blkaddr,
+ nix_smq_flush_enadis_xoff(rvu, blkaddr, smq_flush_ctx, true);
+ nix_smq_flush_enadis_rate(rvu, blkaddr, smq_flush_ctx, false);
+
+- cfg = rvu_read64(rvu, blkaddr, NIX_AF_SMQX_CFG(smq));
+- /* Do SMQ flush and set enqueue xoff */
+- cfg |= BIT_ULL(50) | BIT_ULL(49);
+- rvu_write64(rvu, blkaddr, NIX_AF_SMQX_CFG(smq), cfg);
+-
+ /* Disable backpressure from physical link,
+ * otherwise SMQ flush may stall.
+ */
+ rvu_cgx_enadis_rx_bp(rvu, pf, false);
+
++ link_level = rvu_read64(rvu, blkaddr, NIX_AF_PSE_CHANNEL_LEVEL) & 0x01 ?
++ NIX_TXSCH_LVL_TL3 : NIX_TXSCH_LVL_TL2;
++ tl2_tl3_link_schq = smq_flush_ctx->smq_tree_ctx[link_level].schq;
++ link = smq_flush_ctx->smq_tree_ctx[NIX_TXSCH_LVL_TL1].schq;
++
++ /* SMQ set enqueue xoff */
++ cfg = rvu_read64(rvu, blkaddr, NIX_AF_SMQX_CFG(smq));
++ cfg |= BIT_ULL(50);
++ rvu_write64(rvu, blkaddr, NIX_AF_SMQX_CFG(smq), cfg);
++
++ /* Clear all NIX_AF_TL3_TL2_LINK_CFG[ENA] for the TL3/TL2 queue */
++ for (i = 0; i < (rvu->hw->cgx_links + rvu->hw->lbk_links); i++) {
++ cfg = rvu_read64(rvu, blkaddr,
++ NIX_AF_TL3_TL2X_LINKX_CFG(tl2_tl3_link_schq, link));
++ if (!(cfg & BIT_ULL(12)))
++ continue;
++ bmap |= (1 << i);
++ cfg &= ~BIT_ULL(12);
++ rvu_write64(rvu, blkaddr,
++ NIX_AF_TL3_TL2X_LINKX_CFG(tl2_tl3_link_schq, link), cfg);
++ }
++
++ /* Do SMQ flush and set enqueue xoff */
++ cfg = rvu_read64(rvu, blkaddr, NIX_AF_SMQX_CFG(smq));
++ cfg |= BIT_ULL(50) | BIT_ULL(49);
++ rvu_write64(rvu, blkaddr, NIX_AF_SMQX_CFG(smq), cfg);
++
+ /* Wait for flush to complete */
+ err = rvu_poll_reg(rvu, blkaddr,
+ NIX_AF_SMQX_CFG(smq), BIT_ULL(49), true);
+@@ -2293,6 +2317,17 @@ static int nix_smq_flush(struct rvu *rvu, int blkaddr,
+ "NIXLF%d: SMQ%d flush failed, txlink might be busy\n",
+ nixlf, smq);
+
++ /* Set NIX_AF_TL3_TL2_LINKX_CFG[ENA] for the TL3/TL2 queue */
++ for (i = 0; i < (rvu->hw->cgx_links + rvu->hw->lbk_links); i++) {
++ if (!(bmap & (1 << i)))
++ continue;
++ cfg = rvu_read64(rvu, blkaddr,
++ NIX_AF_TL3_TL2X_LINKX_CFG(tl2_tl3_link_schq, link));
++ cfg |= BIT_ULL(12);
++ rvu_write64(rvu, blkaddr,
++ NIX_AF_TL3_TL2X_LINKX_CFG(tl2_tl3_link_schq, link), cfg);
++ }
++
+ /* clear XOFF on TL2s */
+ nix_smq_flush_enadis_rate(rvu, blkaddr, smq_flush_ctx, true);
+ nix_smq_flush_enadis_xoff(rvu, blkaddr, smq_flush_ctx, false);
+--
+2.43.0
+
--- /dev/null
+From ff041ad604a628609aa09049281dee315f45fb69 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 6 Sep 2024 17:07:43 -0400
+Subject: selftests: net: csum: Fix checksums for packets with non-zero padding
+
+From: Sean Anderson <sean.anderson@linux.dev>
+
+[ Upstream commit e8a63d473b49011a68a748aea1c8aefa046ebacf ]
+
+Padding is not included in UDP and TCP checksums. Therefore, reduce the
+length of the checksummed data to include only the data in the IP
+payload. This fixes spurious reported checksum failures like
+
+rx: pkt: sport=33000 len=26 csum=0xc850 verify=0xf9fe
+pkt: bad csum
+
+Technically it is possible for there to be trailing bytes after the UDP
+data but before the Ethernet padding (e.g. if sizeof(ip) + sizeof(udp) +
+udp.len < ip.len). However, we don't generate such packets.
+
+Fixes: 91a7de85600d ("selftests/net: add csum offload test")
+Signed-off-by: Sean Anderson <sean.anderson@linux.dev>
+Reviewed-by: Willem de Bruijn <willemb@google.com>
+Link: https://patch.msgid.link/20240906210743.627413-1-sean.anderson@linux.dev
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/net/csum.c | 16 ++++++++++++++--
+ 1 file changed, 14 insertions(+), 2 deletions(-)
+
+diff --git a/tools/testing/selftests/net/csum.c b/tools/testing/selftests/net/csum.c
+index 90eb06fefa59..eef72b50270c 100644
+--- a/tools/testing/selftests/net/csum.c
++++ b/tools/testing/selftests/net/csum.c
+@@ -654,10 +654,16 @@ static int recv_verify_packet_ipv4(void *nh, int len)
+ {
+ struct iphdr *iph = nh;
+ uint16_t proto = cfg_encap ? IPPROTO_UDP : cfg_proto;
++ uint16_t ip_len;
+
+ if (len < sizeof(*iph) || iph->protocol != proto)
+ return -1;
+
++ ip_len = ntohs(iph->tot_len);
++ if (ip_len > len || ip_len < sizeof(*iph))
++ return -1;
++
++ len = ip_len;
+ iph_addr_p = &iph->saddr;
+ if (proto == IPPROTO_TCP)
+ return recv_verify_packet_tcp(iph + 1, len - sizeof(*iph));
+@@ -669,16 +675,22 @@ static int recv_verify_packet_ipv6(void *nh, int len)
+ {
+ struct ipv6hdr *ip6h = nh;
+ uint16_t proto = cfg_encap ? IPPROTO_UDP : cfg_proto;
++ uint16_t ip_len;
+
+ if (len < sizeof(*ip6h) || ip6h->nexthdr != proto)
+ return -1;
+
++ ip_len = ntohs(ip6h->payload_len);
++ if (ip_len > len - sizeof(*ip6h))
++ return -1;
++
++ len = ip_len;
+ iph_addr_p = &ip6h->saddr;
+
+ if (proto == IPPROTO_TCP)
+- return recv_verify_packet_tcp(ip6h + 1, len - sizeof(*ip6h));
++ return recv_verify_packet_tcp(ip6h + 1, len);
+ else
+- return recv_verify_packet_udp(ip6h + 1, len - sizeof(*ip6h));
++ return recv_verify_packet_udp(ip6h + 1, len);
+ }
+
+ /* return whether auxdata includes TP_STATUS_CSUM_VALID */
+--
+2.43.0
+
eeprom-digsy_mtc-fix-93xx46-driver-probe-failure.patch
cxl-core-fix-incorrect-vendor-debug-uuid-define.patch
selftests-bpf-support-sock_stream-in-unix_inet_redir.patch
+hwmon-pmbus-conditionally-clear-individual-status-bi.patch
+ice-fix-lldp-packets-dropping-after-changing-the-num.patch
+ice-fix-accounting-for-filters-shared-by-multiple-vs.patch
+ice-fix-vsi-lists-confusion-when-adding-vlans.patch
+igb-always-call-igb_xdp_ring_update_tail-under-tx-lo.patch
+net-mlx5-update-the-list-of-the-pci-supported-device.patch
+net-mlx5e-add-missing-link-modes-to-ptys2ethtool_map.patch
+ib-mlx5-rename-400g_8x-speed-to-comply-to-naming-con.patch
+net-mlx5e-add-missing-link-mode-to-ptys2ext_ethtool_.patch
+net-mlx5-explicitly-set-scheduling-element-and-tsar-.patch
+net-mlx5-add-missing-masks-and-qos-bit-masks-for-sch.patch
+net-mlx5-correct-tasr-typo-into-tsar.patch
+net-mlx5-verify-support-for-scheduling-element-and-t.patch
+net-mlx5-fix-bridge-mode-operations-when-there-are-n.patch
+fou-fix-initialization-of-grc.patch
+octeontx2-af-modify-smq-flush-sequence-to-drop-packe.patch
+net-ftgmac100-enable-tx-interrupt-to-avoid-tx-timeou.patch
+selftests-net-csum-fix-checksums-for-packets-with-no.patch
+netfilter-nft_socket-fix-sk-refcount-leaks.patch
+netfilter-nft_socket-make-cgroupsv2-matching-work-wi.patch
+net-dsa-felix-ignore-pending-status-of-tas-module-wh.patch
+net-dpaa-pad-packets-to-eth_zlen.patch