--- /dev/null
+From 12d6b0c64a6edd2d782074a926a63be1bdcd8330 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 3 Feb 2025 21:46:29 +0200
+Subject: ACPI: property: Fix return value for nval == 0 in
+ acpi_data_prop_read()
+
+From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+
+[ Upstream commit ab930483eca9f3e816c35824b5868599af0c61d7 ]
+
+While analysing code for software and OF node for the corner case when
+caller asks to read zero items in the supposed to be an array of values
+I found that ACPI behaves differently to what OF does, i.e.
+
+ 1. It returns -EINVAL when caller asks to read zero items from integer
+ array, while OF returns 0, if no other errors happened.
+
+ 2. It returns -EINVAL when caller asks to read zero items from string
+ array, while OF returns -ENODATA, if no other errors happened.
+
+Amend ACPI implementation to follow what OF does.
+
+Fixes: b31384fa5de3 ("Driver core: Unified device properties interface for platform firmware")
+Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Link: https://patch.msgid.link/20250203194629.3731895-1-andriy.shevchenko@linux.intel.com
+[ rjw: Added empty line after a conditional ]
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/acpi/property.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c
+index 80a52a4e66dd1..e9186339f6e6b 100644
+--- a/drivers/acpi/property.c
++++ b/drivers/acpi/property.c
+@@ -1187,8 +1187,6 @@ static int acpi_data_prop_read(const struct acpi_device_data *data,
+ }
+ break;
+ }
+- if (nval == 0)
+- return -EINVAL;
+
+ if (obj->type == ACPI_TYPE_BUFFER) {
+ if (proptype != DEV_PROP_U8)
+@@ -1212,9 +1210,11 @@ static int acpi_data_prop_read(const struct acpi_device_data *data,
+ ret = acpi_copy_property_array_uint(items, (u64 *)val, nval);
+ break;
+ case DEV_PROP_STRING:
+- ret = acpi_copy_property_array_string(
+- items, (char **)val,
+- min_t(u32, nval, obj->package.count));
++ nval = min_t(u32, nval, obj->package.count);
++ if (nval == 0)
++ return -ENODATA;
++
++ ret = acpi_copy_property_array_string(items, (char **)val, nval);
+ break;
+ default:
+ ret = -EINVAL;
+--
+2.39.5
+
--- /dev/null
+From c2990ac2b24b3c5afc53c58d2cbf9896d5c1c95d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 30 Jan 2025 10:46:06 +0530
+Subject: drm/i915/dp: fix the Adaptive sync Operation mode for SDP
+
+From: Ankit Nautiyal <ankit.k.nautiyal@intel.com>
+
+[ Upstream commit 4466302262b38f5e6c65325035b4036a42efc934 ]
+
+Currently we support Adaptive sync operation mode with dynamic frame
+rate, but instead the operation mode with fixed rate is set.
+This was initially set correctly in the earlier version of changes but
+later got changed, while defining a macro for the same.
+
+Fixes: a5bd5991cb8a ("drm/i915/display: Compute AS SDP parameters")
+Cc: Mitul Golani <mitulkumar.ajitkumar.golani@intel.com>
+Cc: Ankit Nautiyal <ankit.k.nautiyal@intel.com>
+Cc: Jani Nikula <jani.nikula@linux.intel.com>
+Reviewed-by: Mitul Golani <mitulkumar.ajitkumar.golani@intel.com>
+Signed-off-by: Ankit Nautiyal <ankit.k.nautiyal@intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20250130051609.1796524-4-mitulkumar.ajitkumar.golani@intel.com
+(cherry picked from commit c5806862543ff6c2ad242409fcdf0667eac26dae)
+Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/i915/display/intel_dp.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
+index 90fa73575feb1..7befd260f5949 100644
+--- a/drivers/gpu/drm/i915/display/intel_dp.c
++++ b/drivers/gpu/drm/i915/display/intel_dp.c
+@@ -2738,7 +2738,6 @@ static void intel_dp_compute_as_sdp(struct intel_dp *intel_dp,
+
+ crtc_state->infoframes.enable |= intel_hdmi_infoframe_enable(DP_SDP_ADAPTIVE_SYNC);
+
+- /* Currently only DP_AS_SDP_AVT_FIXED_VTOTAL mode supported */
+ as_sdp->sdp_type = DP_SDP_ADAPTIVE_SYNC;
+ as_sdp->length = 0x9;
+ as_sdp->duration_incr_ms = 0;
+@@ -2750,7 +2749,7 @@ static void intel_dp_compute_as_sdp(struct intel_dp *intel_dp,
+ as_sdp->target_rr = drm_mode_vrefresh(adjusted_mode);
+ as_sdp->target_rr_divider = true;
+ } else {
+- as_sdp->mode = DP_AS_SDP_AVT_FIXED_VTOTAL;
++ as_sdp->mode = DP_AS_SDP_AVT_DYNAMIC_VTOTAL;
+ as_sdp->vtotal = adjusted_mode->vtotal;
+ as_sdp->target_rr = 0;
+ }
+--
+2.39.5
+
--- /dev/null
+From 83cb8c5c6fff22d00a7c7ec51ad0092a6dd4301e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 31 Jan 2025 17:30:37 -0800
+Subject: ethtool: rss: fix hiding unsupported fields in dumps
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+[ Upstream commit 244f8aa46fa9e2f4ea5fe0e04988b395d5e30fc7 ]
+
+Commit ec6e57beaf8b ("ethtool: rss: don't report key if device
+doesn't support it") intended to stop reporting key fields for
+additional rss contexts if device has a global hashing key.
+
+Later we added dump support and the filtering wasn't properly
+added there. So we end up reporting the key fields in dumps
+but not in dos:
+
+ # ./pyynl/cli.py --spec netlink/specs/ethtool.yaml --do rss-get \
+ --json '{"header": {"dev-index":2}, "context": 1 }'
+ {
+ "header": { ... },
+ "context": 1,
+ "indir": [0, 1, 2, 3, ...]]
+ }
+
+ # ./pyynl/cli.py --spec netlink/specs/ethtool.yaml --dump rss-get
+ [
+ ... snip context 0 ...
+ { "header": { ... },
+ "context": 1,
+ "indir": [0, 1, 2, 3, ...],
+ -> "input_xfrm": 255,
+ -> "hfunc": 1,
+ -> "hkey": "000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000"
+ }
+ ]
+
+Hide these fields correctly.
+
+The drivers/net/hw/rss_ctx.py selftest catches this when run on
+a device with single key, already:
+
+ # Check| At /root/./ksft-net-drv/drivers/net/hw/rss_ctx.py, line 381, in test_rss_context_dump:
+ # Check| ksft_ne(set(data.get('hkey', [1])), {0}, "key is all zero")
+ # Check failed {0} == {0} key is all zero
+ not ok 8 rss_ctx.test_rss_context_dump
+
+Fixes: f6122900f4e2 ("ethtool: rss: support dumping RSS contexts")
+Reviewed-by: Gal Pressman <gal@nvidia.com>
+Reviewed-by: Joe Damato <jdamato@fastly.com>
+Link: https://patch.msgid.link/20250201013040.725123-2-kuba@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ethtool/rss.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/net/ethtool/rss.c b/net/ethtool/rss.c
+index e07386275e142..8aa45f3fdfdf0 100644
+--- a/net/ethtool/rss.c
++++ b/net/ethtool/rss.c
+@@ -107,6 +107,8 @@ rss_prepare_ctx(const struct rss_req_info *request, struct net_device *dev,
+ u32 total_size, indir_bytes;
+ u8 *rss_config;
+
++ data->no_key_fields = !dev->ethtool_ops->rxfh_per_ctx_key;
++
+ ctx = xa_load(&dev->ethtool->rss_ctx, request->rss_context);
+ if (!ctx)
+ return -ENOENT;
+@@ -153,7 +155,6 @@ rss_prepare_data(const struct ethnl_req_info *req_base,
+ if (!ops->cap_rss_ctx_supported && !ops->create_rxfh_context)
+ return -EOPNOTSUPP;
+
+- data->no_key_fields = !ops->rxfh_per_ctx_key;
+ return rss_prepare_ctx(request, dev, data, info);
+ }
+
+--
+2.39.5
+
--- /dev/null
+From 3bd3c99764473753848647844d0bbddebdb9aa60 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 11 Mar 2024 16:21:22 +0530
+Subject: firmware: iscsi_ibft: fix ISCSI_IBFT Kconfig entry
+
+From: Prasad Pandit <pjp@fedoraproject.org>
+
+[ Upstream commit e1e17a1715982201034024863efbf238bee2bdf9 ]
+
+Fix ISCSI_IBFT Kconfig entry, replace tab with a space character.
+
+Fixes: 138fe4e0697 ("Firmware: add iSCSI iBFT Support")
+Signed-off-by: Prasad Pandit <pjp@fedoraproject.org>
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/firmware/Kconfig | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig
+index 71d8b26c4103b..9f35f69e0f9e2 100644
+--- a/drivers/firmware/Kconfig
++++ b/drivers/firmware/Kconfig
+@@ -106,7 +106,7 @@ config ISCSI_IBFT
+ select ISCSI_BOOT_SYSFS
+ select ISCSI_IBFT_FIND if X86
+ depends on ACPI && SCSI && SCSI_LOWLEVEL
+- default n
++ default n
+ help
+ This option enables support for detection and exposing of iSCSI
+ Boot Firmware Table (iBFT) via sysfs to userspace. If you wish to
+--
+2.39.5
+
--- /dev/null
+From ccbbac2cec99a5200d09bfecdac89cb68000d953 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 6 Jun 2024 15:31:02 +1200
+Subject: gpio: pca953x: Improve interrupt support
+
+From: Mark Tomlinson <mark.tomlinson@alliedtelesis.co.nz>
+
+[ Upstream commit d6179f6c6204f9932aed3a7a2100b4a295dfed9d ]
+
+The GPIO drivers with latch interrupt support (typically types starting
+with PCAL) have interrupt status registers to determine which particular
+inputs have caused an interrupt. Unfortunately there is no atomic
+operation to read these registers and clear the interrupt. Clearing the
+interrupt is done by reading the input registers.
+
+The code was reading the interrupt status registers, and then reading
+the input registers. If an input changed between these two events it was
+lost.
+
+The solution in this patch is to revert to the non-latch version of
+code, i.e. remembering the previous input status, and looking for the
+changes. This system results in no more I2C transfers, so is no slower.
+The latch property of the device still means interrupts will still be
+noticed if the input changes back to its initial state.
+
+Fixes: 44896beae605 ("gpio: pca953x: add PCAL9535 interrupt support for Galileo Gen2")
+Signed-off-by: Mark Tomlinson <mark.tomlinson@alliedtelesis.co.nz>
+Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Link: https://lore.kernel.org/r/20240606033102.2271916-1-mark.tomlinson@alliedtelesis.co.nz
+Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpio/gpio-pca953x.c | 19 -------------------
+ 1 file changed, 19 deletions(-)
+
+diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c
+index e49802f26e07f..d764a3af63467 100644
+--- a/drivers/gpio/gpio-pca953x.c
++++ b/drivers/gpio/gpio-pca953x.c
+@@ -841,25 +841,6 @@ static bool pca953x_irq_pending(struct pca953x_chip *chip, unsigned long *pendin
+ DECLARE_BITMAP(trigger, MAX_LINE);
+ int ret;
+
+- if (chip->driver_data & PCA_PCAL) {
+- /* Read the current interrupt status from the device */
+- ret = pca953x_read_regs(chip, PCAL953X_INT_STAT, trigger);
+- if (ret)
+- return false;
+-
+- /* Check latched inputs and clear interrupt status */
+- ret = pca953x_read_regs(chip, chip->regs->input, cur_stat);
+- if (ret)
+- return false;
+-
+- /* Apply filter for rising/falling edge selection */
+- bitmap_replace(new_stat, chip->irq_trig_fall, chip->irq_trig_raise, cur_stat, gc->ngpio);
+-
+- bitmap_and(pending, new_stat, trigger, gc->ngpio);
+-
+- return !bitmap_empty(pending, gc->ngpio);
+- }
+-
+ ret = pca953x_read_regs(chip, chip->regs->input, cur_stat);
+ if (ret)
+ return false;
+--
+2.39.5
+
--- /dev/null
+From cb74157dc98c075c5bb905ad002c7f6d7424423a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 3 Feb 2025 12:01:23 +0100
+Subject: gpio: sim: lock hog configfs items if present
+
+From: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
+
+[ Upstream commit 015b7dae084fa95465ff89f6cbf15fe49906a370 ]
+
+Depending on the user config, the leaf entry may be the hog directory,
+not line. Check it and lock the correct item.
+
+Fixes: 8bd76b3d3f3a ("gpio: sim: lock up configfs that an instantiated device depends on")
+Tested-by: Koichiro Den <koichiro.den@canonical.com>
+Link: https://lore.kernel.org/r/20250203110123.87701-1-brgl@bgdev.pl
+Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpio/gpio-sim.c | 13 ++++++++-----
+ 1 file changed, 8 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/gpio/gpio-sim.c b/drivers/gpio/gpio-sim.c
+index deedacdeb2395..f83a8b5a51d0d 100644
+--- a/drivers/gpio/gpio-sim.c
++++ b/drivers/gpio/gpio-sim.c
+@@ -1036,20 +1036,23 @@ gpio_sim_device_lockup_configfs(struct gpio_sim_device *dev, bool lock)
+ struct configfs_subsystem *subsys = dev->group.cg_subsys;
+ struct gpio_sim_bank *bank;
+ struct gpio_sim_line *line;
++ struct config_item *item;
+
+ /*
+- * The device only needs to depend on leaf line entries. This is
++ * The device only needs to depend on leaf entries. This is
+ * sufficient to lock up all the configfs entries that the
+ * instantiated, alive device depends on.
+ */
+ list_for_each_entry(bank, &dev->bank_list, siblings) {
+ list_for_each_entry(line, &bank->line_list, siblings) {
++ item = line->hog ? &line->hog->item
++ : &line->group.cg_item;
++
+ if (lock)
+- WARN_ON(configfs_depend_item_unlocked(
+- subsys, &line->group.cg_item));
++ WARN_ON(configfs_depend_item_unlocked(subsys,
++ item));
+ else
+- configfs_undepend_item_unlocked(
+- &line->group.cg_item);
++ configfs_undepend_item_unlocked(item);
+ }
+ }
+ }
+--
+2.39.5
+
--- /dev/null
+From 091a2f1a43f99622cea65514897c5344445f6b94 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 29 Jan 2025 10:51:48 +0100
+Subject: gpu: drm_dp_cec: fix broken CEC adapter properties check
+
+From: Hans Verkuil <hverkuil@xs4all.nl>
+
+[ Upstream commit 6daaae5ff7f3b23a2dacc9c387ff3d4f95b67cad ]
+
+If the hotplug detect of a display is low for longer than one second
+(configurable through drm_dp_cec_unregister_delay), then the CEC adapter
+is unregistered since we assume the display was disconnected. If the
+HPD went low for less than one second, then we check if the properties
+of the CEC adapter have changed, since that indicates that we actually
+switch to new hardware and we have to unregister the old CEC device and
+register a new one.
+
+Unfortunately, the test for changed properties was written poorly, and
+after a new CEC capability was added to the CEC core code the test always
+returned true (i.e. the properties had changed).
+
+As a result the CEC device was unregistered and re-registered for every
+HPD toggle. If the CEC remote controller integration was also enabled
+(CONFIG_MEDIA_CEC_RC was set), then the corresponding input device was
+also unregistered and re-registered. As a result the input device in
+/sys would keep incrementing its number, e.g.:
+
+/sys/devices/pci0000:00/0000:00:08.1/0000:e7:00.0/rc/rc0/input20
+
+Since short HPD toggles are common, the number could over time get into
+the thousands.
+
+While not a serious issue (i.e. nothing crashes), it is not intended
+to work that way.
+
+This patch changes the test so that it only checks for the single CEC
+capability that can actually change, and it ignores any other
+capabilities, so this is now safe as well if new caps are added in
+the future.
+
+With the changed test the bit under #ifndef CONFIG_MEDIA_CEC_RC can be
+dropped as well, so that's a nice cleanup.
+
+Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
+Reported-by: Farblos <farblos@vodafonemail.de>
+Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
+Fixes: 2c6d1fffa1d9 ("drm: add support for DisplayPort CEC-Tunneling-over-AUX")
+Tested-by: Farblos <farblos@vodafonemail.de>
+Link: https://patchwork.freedesktop.org/patch/msgid/361bb03d-1691-4e23-84da-0861ead5dbdc@xs4all.nl
+Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/display/drm_dp_cec.c | 14 +++-----------
+ 1 file changed, 3 insertions(+), 11 deletions(-)
+
+diff --git a/drivers/gpu/drm/display/drm_dp_cec.c b/drivers/gpu/drm/display/drm_dp_cec.c
+index 007ceb281d00d..56a4965e518cc 100644
+--- a/drivers/gpu/drm/display/drm_dp_cec.c
++++ b/drivers/gpu/drm/display/drm_dp_cec.c
+@@ -311,16 +311,6 @@ void drm_dp_cec_attach(struct drm_dp_aux *aux, u16 source_physical_address)
+ if (!aux->transfer)
+ return;
+
+-#ifndef CONFIG_MEDIA_CEC_RC
+- /*
+- * CEC_CAP_RC is part of CEC_CAP_DEFAULTS, but it is stripped by
+- * cec_allocate_adapter() if CONFIG_MEDIA_CEC_RC is undefined.
+- *
+- * Do this here as well to ensure the tests against cec_caps are
+- * correct.
+- */
+- cec_caps &= ~CEC_CAP_RC;
+-#endif
+ cancel_delayed_work_sync(&aux->cec.unregister_work);
+
+ mutex_lock(&aux->cec.lock);
+@@ -337,7 +327,9 @@ void drm_dp_cec_attach(struct drm_dp_aux *aux, u16 source_physical_address)
+ num_las = CEC_MAX_LOG_ADDRS;
+
+ if (aux->cec.adap) {
+- if (aux->cec.adap->capabilities == cec_caps &&
++ /* Check if the adapter properties have changed */
++ if ((aux->cec.adap->capabilities & CEC_CAP_MONITOR_ALL) ==
++ (cec_caps & CEC_CAP_MONITOR_ALL) &&
+ aux->cec.adap->available_log_addrs == num_las) {
+ /* Unchanged, so just set the phys addr */
+ cec_s_phys_addr(aux->cec.adap, source_physical_address, false);
+--
+2.39.5
+
--- /dev/null
+From c36a194ff0b1e72ae274dc25dd266c3ea4b4dcec Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 31 Jan 2025 01:38:32 +0000
+Subject: ice: Add check for devm_kzalloc()
+
+From: Jiasheng Jiang <jiashengjiangcool@gmail.com>
+
+[ Upstream commit a8aa6a6ddce9b5585f2b74f27f3feea1427fb4e7 ]
+
+Add check for the return value of devm_kzalloc() to guarantee the success
+of allocation.
+
+Fixes: 42c2eb6b1f43 ("ice: Implement devlink-rate API")
+Signed-off-by: Jiasheng Jiang <jiashengjiangcool@gmail.com>
+Reviewed-by: Michal Swiatkowski <michal.swiatkowski@linux.intel.com>
+Link: https://patch.msgid.link/20250131013832.24805-1-jiashengjiangcool@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/ice/devlink/devlink.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/drivers/net/ethernet/intel/ice/devlink/devlink.c b/drivers/net/ethernet/intel/ice/devlink/devlink.c
+index 415445cefdb2a..b1efd287b3309 100644
+--- a/drivers/net/ethernet/intel/ice/devlink/devlink.c
++++ b/drivers/net/ethernet/intel/ice/devlink/devlink.c
+@@ -977,6 +977,9 @@ static int ice_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv
+
+ /* preallocate memory for ice_sched_node */
+ node = devm_kzalloc(ice_hw_to_dev(pi->hw), sizeof(*node), GFP_KERNEL);
++ if (!node)
++ return -ENOMEM;
++
+ *priv = node;
+
+ return 0;
+--
+2.39.5
+
--- /dev/null
+From 7ca9856191d44cd7902508d34512628529635415 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 23 Jan 2025 16:01:17 +0100
+Subject: ice: gather page_count()'s of each frag right before XDP prog call
+
+From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
+
+[ Upstream commit 11c4aa074d547d825b19cd8d9f288254d89d805c ]
+
+If we store the pgcnt on few fragments while being in the middle of
+gathering the whole frame and we stumbled upon DD bit not being set, we
+terminate the NAPI Rx processing loop and come back later on. Then on
+next NAPI execution we work on previously stored pgcnt.
+
+Imagine that second half of page was used actively by networking stack
+and by the time we came back, stack is not busy with this page anymore
+and decremented the refcnt. The page reuse algorithm in this case should
+be good to reuse the page but given the old refcnt it will not do so and
+attempt to release the page via page_frag_cache_drain() with
+pagecnt_bias used as an arg. This in turn will result in negative refcnt
+on struct page, which was initially observed by Xu Du.
+
+Therefore, move the page count storage from ice_get_rx_buf() to a place
+where we are sure that whole frame has been collected, but before
+calling XDP program as it internally can also change the page count of
+fragments belonging to xdp_buff.
+
+Fixes: ac0753391195 ("ice: Store page count inside ice_rx_buf")
+Reported-and-tested-by: Xu Du <xudu@redhat.com>
+Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Co-developed-by: Jacob Keller <jacob.e.keller@intel.com>
+Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
+Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
+Tested-by: Chandan Kumar Rout <chandanx.rout@intel.com> (A Contingent Worker at Intel)
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/ice/ice_txrx.c | 27 ++++++++++++++++++++++-
+ 1 file changed, 26 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
+index 4660e2302e2ae..e2150d2c093bf 100644
+--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
++++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
+@@ -924,7 +924,6 @@ ice_get_rx_buf(struct ice_rx_ring *rx_ring, const unsigned int size,
+ struct ice_rx_buf *rx_buf;
+
+ rx_buf = &rx_ring->rx_buf[ntc];
+- rx_buf->pgcnt = page_count(rx_buf->page);
+ prefetchw(rx_buf->page);
+
+ if (!size)
+@@ -940,6 +939,31 @@ ice_get_rx_buf(struct ice_rx_ring *rx_ring, const unsigned int size,
+ return rx_buf;
+ }
+
++/**
++ * ice_get_pgcnts - grab page_count() for gathered fragments
++ * @rx_ring: Rx descriptor ring to store the page counts on
++ *
++ * This function is intended to be called right before running XDP
++ * program so that the page recycling mechanism will be able to take
++ * a correct decision regarding underlying pages; this is done in such
++ * way as XDP program can change the refcount of page
++ */
++static void ice_get_pgcnts(struct ice_rx_ring *rx_ring)
++{
++ u32 nr_frags = rx_ring->nr_frags + 1;
++ u32 idx = rx_ring->first_desc;
++ struct ice_rx_buf *rx_buf;
++ u32 cnt = rx_ring->count;
++
++ for (int i = 0; i < nr_frags; i++) {
++ rx_buf = &rx_ring->rx_buf[idx];
++ rx_buf->pgcnt = page_count(rx_buf->page);
++
++ if (++idx == cnt)
++ idx = 0;
++ }
++}
++
+ /**
+ * ice_build_skb - Build skb around an existing buffer
+ * @rx_ring: Rx descriptor ring to transact packets on
+@@ -1241,6 +1265,7 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
+ if (ice_is_non_eop(rx_ring, rx_desc))
+ continue;
+
++ ice_get_pgcnts(rx_ring);
+ ice_run_xdp(rx_ring, xdp, xdp_prog, xdp_ring, rx_buf, rx_desc);
+ if (rx_buf->act == ICE_XDP_PASS)
+ goto construct_skb;
+--
+2.39.5
+
--- /dev/null
+From abde409aab46b7bca81dee8543007d679c2cff61 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 23 Jan 2025 16:01:16 +0100
+Subject: ice: put Rx buffers after being done with current frame
+
+From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
+
+[ Upstream commit 743bbd93cf29f653fae0e1416a31f03231689911 ]
+
+Introduce a new helper ice_put_rx_mbuf() that will go through gathered
+frags from current frame and will call ice_put_rx_buf() on them. Current
+logic that was supposed to simplify and optimize the driver where we go
+through a batch of all buffers processed in current NAPI instance turned
+out to be broken for jumbo frames and very heavy load that was coming
+from both multi-thread iperf and nginx/wrk pair between server and
+client. The delay introduced by approach that we are dropping is simply
+too big and we need to take the decision regarding page
+recycling/releasing as quick as we can.
+
+While at it, address an error path of ice_add_xdp_frag() - we were
+missing buffer putting from day 1 there.
+
+As a nice side effect we get rid of annoying and repetitive three-liner:
+
+ xdp->data = NULL;
+ rx_ring->first_desc = ntc;
+ rx_ring->nr_frags = 0;
+
+by embedding it within introduced routine.
+
+Fixes: 1dc1a7e7f410 ("ice: Centrallize Rx buffer recycling")
+Reported-and-tested-by: Xu Du <xudu@redhat.com>
+Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Co-developed-by: Jacob Keller <jacob.e.keller@intel.com>
+Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
+Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
+Tested-by: Chandan Kumar Rout <chandanx.rout@intel.com> (A Contingent Worker at Intel)
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/ice/ice_txrx.c | 79 ++++++++++++++---------
+ 1 file changed, 50 insertions(+), 29 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
+index 8208055d6e7fc..4660e2302e2ae 100644
+--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
++++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
+@@ -1103,6 +1103,49 @@ ice_put_rx_buf(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf)
+ rx_buf->page = NULL;
+ }
+
++/**
++ * ice_put_rx_mbuf - ice_put_rx_buf() caller, for all frame frags
++ * @rx_ring: Rx ring with all the auxiliary data
++ * @xdp: XDP buffer carrying linear + frags part
++ * @xdp_xmit: XDP_TX/XDP_REDIRECT verdict storage
++ * @ntc: a current next_to_clean value to be stored at rx_ring
++ *
++ * Walk through gathered fragments and satisfy internal page
++ * recycle mechanism; we take here an action related to verdict
++ * returned by XDP program;
++ */
++static void ice_put_rx_mbuf(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
++ u32 *xdp_xmit, u32 ntc)
++{
++ u32 nr_frags = rx_ring->nr_frags + 1;
++ u32 idx = rx_ring->first_desc;
++ u32 cnt = rx_ring->count;
++ struct ice_rx_buf *buf;
++ int i;
++
++ for (i = 0; i < nr_frags; i++) {
++ buf = &rx_ring->rx_buf[idx];
++
++ if (buf->act & (ICE_XDP_TX | ICE_XDP_REDIR)) {
++ ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz);
++ *xdp_xmit |= buf->act;
++ } else if (buf->act & ICE_XDP_CONSUMED) {
++ buf->pagecnt_bias++;
++ } else if (buf->act == ICE_XDP_PASS) {
++ ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz);
++ }
++
++ ice_put_rx_buf(rx_ring, buf);
++
++ if (++idx == cnt)
++ idx = 0;
++ }
++
++ xdp->data = NULL;
++ rx_ring->first_desc = ntc;
++ rx_ring->nr_frags = 0;
++}
++
+ /**
+ * ice_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf
+ * @rx_ring: Rx descriptor ring to transact packets on
+@@ -1120,7 +1163,6 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
+ unsigned int total_rx_bytes = 0, total_rx_pkts = 0;
+ unsigned int offset = rx_ring->rx_offset;
+ struct xdp_buff *xdp = &rx_ring->xdp;
+- u32 cached_ntc = rx_ring->first_desc;
+ struct ice_tx_ring *xdp_ring = NULL;
+ struct bpf_prog *xdp_prog = NULL;
+ u32 ntc = rx_ring->next_to_clean;
+@@ -1128,7 +1170,6 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
+ u32 xdp_xmit = 0;
+ u32 cached_ntu;
+ bool failure;
+- u32 first;
+
+ xdp_prog = READ_ONCE(rx_ring->xdp_prog);
+ if (xdp_prog) {
+@@ -1190,6 +1231,7 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
+ xdp_prepare_buff(xdp, hard_start, offset, size, !!offset);
+ xdp_buff_clear_frags_flag(xdp);
+ } else if (ice_add_xdp_frag(rx_ring, xdp, rx_buf, size)) {
++ ice_put_rx_mbuf(rx_ring, xdp, NULL, ntc);
+ break;
+ }
+ if (++ntc == cnt)
+@@ -1205,9 +1247,8 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
+ total_rx_bytes += xdp_get_buff_len(xdp);
+ total_rx_pkts++;
+
+- xdp->data = NULL;
+- rx_ring->first_desc = ntc;
+- rx_ring->nr_frags = 0;
++ ice_put_rx_mbuf(rx_ring, xdp, &xdp_xmit, ntc);
++
+ continue;
+ construct_skb:
+ if (likely(ice_ring_uses_build_skb(rx_ring)))
+@@ -1221,14 +1262,11 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
+ if (unlikely(xdp_buff_has_frags(xdp)))
+ ice_set_rx_bufs_act(xdp, rx_ring,
+ ICE_XDP_CONSUMED);
+- xdp->data = NULL;
+- rx_ring->first_desc = ntc;
+- rx_ring->nr_frags = 0;
+- break;
+ }
+- xdp->data = NULL;
+- rx_ring->first_desc = ntc;
+- rx_ring->nr_frags = 0;
++ ice_put_rx_mbuf(rx_ring, xdp, &xdp_xmit, ntc);
++
++ if (!skb)
++ break;
+
+ stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_RXE_S);
+ if (unlikely(ice_test_staterr(rx_desc->wb.status_error0,
+@@ -1257,23 +1295,6 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
+ total_rx_pkts++;
+ }
+
+- first = rx_ring->first_desc;
+- while (cached_ntc != first) {
+- struct ice_rx_buf *buf = &rx_ring->rx_buf[cached_ntc];
+-
+- if (buf->act & (ICE_XDP_TX | ICE_XDP_REDIR)) {
+- ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz);
+- xdp_xmit |= buf->act;
+- } else if (buf->act & ICE_XDP_CONSUMED) {
+- buf->pagecnt_bias++;
+- } else if (buf->act == ICE_XDP_PASS) {
+- ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz);
+- }
+-
+- ice_put_rx_buf(rx_ring, buf);
+- if (++cached_ntc >= cnt)
+- cached_ntc = 0;
+- }
+ rx_ring->next_to_clean = ntc;
+ /* return up to cleaned_count buffers to hardware */
+ failure = ice_alloc_rx_bufs(rx_ring, ICE_RX_DESC_UNUSED(rx_ring));
+--
+2.39.5
+
--- /dev/null
+From fb3aca8dee1b56a69e784eaacd7f728159dcd4e8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 23 Jan 2025 16:01:18 +0100
+Subject: ice: stop storing XDP verdict within ice_rx_buf
+
+From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
+
+[ Upstream commit 468a1952df78f65c5991b7ac885c8b5b7dd87bab ]
+
+Idea behind having ice_rx_buf::act was to simplify and speed up the Rx
+data path by walking through buffers that were representing cleaned HW
+Rx descriptors. Since it caused us a major headache recently and we
+rolled back to old approach that 'puts' Rx buffers right after running
+XDP prog/creating skb, this is useless now and should be removed.
+
+Get rid of ice_rx_buf::act and related logic. We still need to take care
+of a corner case where XDP program releases a particular fragment.
+
+Make ice_run_xdp() to return its result and use it within
+ice_put_rx_mbuf().
+
+Fixes: 2fba7dc5157b ("ice: Add support for XDP multi-buffer on Rx side")
+Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
+Tested-by: Chandan Kumar Rout <chandanx.rout@intel.com> (A Contingent Worker at Intel)
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/ice/ice_txrx.c | 62 +++++++++++--------
+ drivers/net/ethernet/intel/ice/ice_txrx.h | 1 -
+ drivers/net/ethernet/intel/ice/ice_txrx_lib.h | 43 -------------
+ 3 files changed, 36 insertions(+), 70 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
+index e2150d2c093bf..f12fb3a2b6ad9 100644
+--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
++++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
+@@ -527,15 +527,14 @@ int ice_setup_rx_ring(struct ice_rx_ring *rx_ring)
+ * @xdp: xdp_buff used as input to the XDP program
+ * @xdp_prog: XDP program to run
+ * @xdp_ring: ring to be used for XDP_TX action
+- * @rx_buf: Rx buffer to store the XDP action
+ * @eop_desc: Last descriptor in packet to read metadata from
+ *
+ * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR}
+ */
+-static void
++static u32
+ ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
+ struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring,
+- struct ice_rx_buf *rx_buf, union ice_32b_rx_flex_desc *eop_desc)
++ union ice_32b_rx_flex_desc *eop_desc)
+ {
+ unsigned int ret = ICE_XDP_PASS;
+ u32 act;
+@@ -574,7 +573,7 @@ ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
+ ret = ICE_XDP_CONSUMED;
+ }
+ exit:
+- ice_set_rx_bufs_act(xdp, rx_ring, ret);
++ return ret;
+ }
+
+ /**
+@@ -860,10 +859,8 @@ ice_add_xdp_frag(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
+ xdp_buff_set_frags_flag(xdp);
+ }
+
+- if (unlikely(sinfo->nr_frags == MAX_SKB_FRAGS)) {
+- ice_set_rx_bufs_act(xdp, rx_ring, ICE_XDP_CONSUMED);
++ if (unlikely(sinfo->nr_frags == MAX_SKB_FRAGS))
+ return -ENOMEM;
+- }
+
+ __skb_fill_page_desc_noacc(sinfo, sinfo->nr_frags++, rx_buf->page,
+ rx_buf->page_offset, size);
+@@ -1075,12 +1072,12 @@ ice_construct_skb(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp)
+ rx_buf->page_offset + headlen, size,
+ xdp->frame_sz);
+ } else {
+- /* buffer is unused, change the act that should be taken later
+- * on; data was copied onto skb's linear part so there's no
++ /* buffer is unused, restore biased page count in Rx buffer;
++ * data was copied onto skb's linear part so there's no
+ * need for adjusting page offset and we can reuse this buffer
+ * as-is
+ */
+- rx_buf->act = ICE_SKB_CONSUMED;
++ rx_buf->pagecnt_bias++;
+ }
+
+ if (unlikely(xdp_buff_has_frags(xdp))) {
+@@ -1133,29 +1130,34 @@ ice_put_rx_buf(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf)
+ * @xdp: XDP buffer carrying linear + frags part
+ * @xdp_xmit: XDP_TX/XDP_REDIRECT verdict storage
+ * @ntc: a current next_to_clean value to be stored at rx_ring
++ * @verdict: return code from XDP program execution
+ *
+ * Walk through gathered fragments and satisfy internal page
+ * recycle mechanism; we take here an action related to verdict
+ * returned by XDP program;
+ */
+ static void ice_put_rx_mbuf(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
+- u32 *xdp_xmit, u32 ntc)
++ u32 *xdp_xmit, u32 ntc, u32 verdict)
+ {
+ u32 nr_frags = rx_ring->nr_frags + 1;
+ u32 idx = rx_ring->first_desc;
+ u32 cnt = rx_ring->count;
++ u32 post_xdp_frags = 1;
+ struct ice_rx_buf *buf;
+ int i;
+
+- for (i = 0; i < nr_frags; i++) {
++ if (unlikely(xdp_buff_has_frags(xdp)))
++ post_xdp_frags += xdp_get_shared_info_from_buff(xdp)->nr_frags;
++
++ for (i = 0; i < post_xdp_frags; i++) {
+ buf = &rx_ring->rx_buf[idx];
+
+- if (buf->act & (ICE_XDP_TX | ICE_XDP_REDIR)) {
++ if (verdict & (ICE_XDP_TX | ICE_XDP_REDIR)) {
+ ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz);
+- *xdp_xmit |= buf->act;
+- } else if (buf->act & ICE_XDP_CONSUMED) {
++ *xdp_xmit |= verdict;
++ } else if (verdict & ICE_XDP_CONSUMED) {
+ buf->pagecnt_bias++;
+- } else if (buf->act == ICE_XDP_PASS) {
++ } else if (verdict == ICE_XDP_PASS) {
+ ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz);
+ }
+
+@@ -1164,6 +1166,17 @@ static void ice_put_rx_mbuf(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
+ if (++idx == cnt)
+ idx = 0;
+ }
++ /* handle buffers that represented frags released by XDP prog;
++ * for these we keep pagecnt_bias as-is; refcount from struct page
++ * has been decremented within XDP prog and we do not have to increase
++ * the biased refcnt
++ */
++ for (; i < nr_frags; i++) {
++ buf = &rx_ring->rx_buf[idx];
++ ice_put_rx_buf(rx_ring, buf);
++ if (++idx == cnt)
++ idx = 0;
++ }
+
+ xdp->data = NULL;
+ rx_ring->first_desc = ntc;
+@@ -1190,9 +1203,9 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
+ struct ice_tx_ring *xdp_ring = NULL;
+ struct bpf_prog *xdp_prog = NULL;
+ u32 ntc = rx_ring->next_to_clean;
++ u32 cached_ntu, xdp_verdict;
+ u32 cnt = rx_ring->count;
+ u32 xdp_xmit = 0;
+- u32 cached_ntu;
+ bool failure;
+
+ xdp_prog = READ_ONCE(rx_ring->xdp_prog);
+@@ -1255,7 +1268,7 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
+ xdp_prepare_buff(xdp, hard_start, offset, size, !!offset);
+ xdp_buff_clear_frags_flag(xdp);
+ } else if (ice_add_xdp_frag(rx_ring, xdp, rx_buf, size)) {
+- ice_put_rx_mbuf(rx_ring, xdp, NULL, ntc);
++ ice_put_rx_mbuf(rx_ring, xdp, NULL, ntc, ICE_XDP_CONSUMED);
+ break;
+ }
+ if (++ntc == cnt)
+@@ -1266,13 +1279,13 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
+ continue;
+
+ ice_get_pgcnts(rx_ring);
+- ice_run_xdp(rx_ring, xdp, xdp_prog, xdp_ring, rx_buf, rx_desc);
+- if (rx_buf->act == ICE_XDP_PASS)
++ xdp_verdict = ice_run_xdp(rx_ring, xdp, xdp_prog, xdp_ring, rx_desc);
++ if (xdp_verdict == ICE_XDP_PASS)
+ goto construct_skb;
+ total_rx_bytes += xdp_get_buff_len(xdp);
+ total_rx_pkts++;
+
+- ice_put_rx_mbuf(rx_ring, xdp, &xdp_xmit, ntc);
++ ice_put_rx_mbuf(rx_ring, xdp, &xdp_xmit, ntc, xdp_verdict);
+
+ continue;
+ construct_skb:
+@@ -1283,12 +1296,9 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
+ /* exit if we failed to retrieve a buffer */
+ if (!skb) {
+ rx_ring->ring_stats->rx_stats.alloc_page_failed++;
+- rx_buf->act = ICE_XDP_CONSUMED;
+- if (unlikely(xdp_buff_has_frags(xdp)))
+- ice_set_rx_bufs_act(xdp, rx_ring,
+- ICE_XDP_CONSUMED);
++ xdp_verdict = ICE_XDP_CONSUMED;
+ }
+- ice_put_rx_mbuf(rx_ring, xdp, &xdp_xmit, ntc);
++ ice_put_rx_mbuf(rx_ring, xdp, &xdp_xmit, ntc, xdp_verdict);
+
+ if (!skb)
+ break;
+diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
+index feba314a3fe44..7130992d41779 100644
+--- a/drivers/net/ethernet/intel/ice/ice_txrx.h
++++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
+@@ -201,7 +201,6 @@ struct ice_rx_buf {
+ struct page *page;
+ unsigned int page_offset;
+ unsigned int pgcnt;
+- unsigned int act;
+ unsigned int pagecnt_bias;
+ };
+
+diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
+index afcead4baef4b..f6c2b16ab4567 100644
+--- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
++++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
+@@ -5,49 +5,6 @@
+ #define _ICE_TXRX_LIB_H_
+ #include "ice.h"
+
+-/**
+- * ice_set_rx_bufs_act - propagate Rx buffer action to frags
+- * @xdp: XDP buffer representing frame (linear and frags part)
+- * @rx_ring: Rx ring struct
+- * act: action to store onto Rx buffers related to XDP buffer parts
+- *
+- * Set action that should be taken before putting Rx buffer from first frag
+- * to the last.
+- */
+-static inline void
+-ice_set_rx_bufs_act(struct xdp_buff *xdp, const struct ice_rx_ring *rx_ring,
+- const unsigned int act)
+-{
+- u32 sinfo_frags = xdp_get_shared_info_from_buff(xdp)->nr_frags;
+- u32 nr_frags = rx_ring->nr_frags + 1;
+- u32 idx = rx_ring->first_desc;
+- u32 cnt = rx_ring->count;
+- struct ice_rx_buf *buf;
+-
+- for (int i = 0; i < nr_frags; i++) {
+- buf = &rx_ring->rx_buf[idx];
+- buf->act = act;
+-
+- if (++idx == cnt)
+- idx = 0;
+- }
+-
+- /* adjust pagecnt_bias on frags freed by XDP prog */
+- if (sinfo_frags < rx_ring->nr_frags && act == ICE_XDP_CONSUMED) {
+- u32 delta = rx_ring->nr_frags - sinfo_frags;
+-
+- while (delta) {
+- if (idx == 0)
+- idx = cnt - 1;
+- else
+- idx--;
+- buf = &rx_ring->rx_buf[idx];
+- buf->pagecnt_bias--;
+- delta--;
+- }
+- }
+-}
+-
+ /**
+ * ice_test_staterr - tests bits in Rx descriptor status and error fields
+ * @status_err_n: Rx descriptor status_error0 or status_error1 bits
+--
+2.39.5
+
--- /dev/null
+From a4315eb6daaf4b61bcd7168f478bf2d3c6eab7ab Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 3 Feb 2025 09:36:05 -0500
+Subject: net: atlantic: fix warning during hot unplug
+
+From: Jacob Moroni <mail@jakemoroni.com>
+
+[ Upstream commit 028676bb189ed6d1b550a0fc570a9d695b6acfd3 ]
+
+Firmware deinitialization performs MMIO accesses which are not
+necessary if the device has already been removed. In some cases,
+these accesses happen via readx_poll_timeout_atomic which ends up
+timing out, resulting in a warning at hw_atl2_utils_fw.c:112:
+
+[ 104.595913] Call Trace:
+[ 104.595915] <TASK>
+[ 104.595918] ? show_regs+0x6c/0x80
+[ 104.595923] ? __warn+0x8d/0x150
+[ 104.595925] ? aq_a2_fw_deinit+0xcf/0xe0 [atlantic]
+[ 104.595934] ? report_bug+0x182/0x1b0
+[ 104.595938] ? handle_bug+0x6e/0xb0
+[ 104.595940] ? exc_invalid_op+0x18/0x80
+[ 104.595942] ? asm_exc_invalid_op+0x1b/0x20
+[ 104.595944] ? aq_a2_fw_deinit+0xcf/0xe0 [atlantic]
+[ 104.595952] ? aq_a2_fw_deinit+0xcf/0xe0 [atlantic]
+[ 104.595959] aq_nic_deinit.part.0+0xbd/0xf0 [atlantic]
+[ 104.595964] aq_nic_deinit+0x17/0x30 [atlantic]
+[ 104.595970] aq_ndev_close+0x2b/0x40 [atlantic]
+[ 104.595975] __dev_close_many+0xad/0x160
+[ 104.595978] dev_close_many+0x99/0x170
+[ 104.595979] unregister_netdevice_many_notify+0x18b/0xb20
+[ 104.595981] ? __call_rcu_common+0xcd/0x700
+[ 104.595984] unregister_netdevice_queue+0xc6/0x110
+[ 104.595986] unregister_netdev+0x1c/0x30
+[ 104.595988] aq_pci_remove+0xb1/0xc0 [atlantic]
+
+Fix this by skipping firmware deinitialization altogether if the
+PCI device is no longer present.
+
+Tested with an AQC113 attached via Thunderbolt by performing
+repeated unplug cycles while traffic was running via iperf.
+
+Fixes: 97bde5c4f909 ("net: ethernet: aquantia: Support for NIC-specific code")
+Signed-off-by: Jacob Moroni <mail@jakemoroni.com>
+Reviewed-by: Igor Russkikh <irusskikh@marvell.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Link: https://patch.msgid.link/20250203143604.24930-3-mail@jakemoroni.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/aquantia/atlantic/aq_nic.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
+index fe0e3e2a81171..71e50fc65c147 100644
+--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
++++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
+@@ -1441,7 +1441,9 @@ void aq_nic_deinit(struct aq_nic_s *self, bool link_down)
+ aq_ptp_ring_free(self);
+ aq_ptp_free(self);
+
+- if (likely(self->aq_fw_ops->deinit) && link_down) {
++ /* May be invoked during hot unplug. */
++ if (pci_device_is_present(self->pdev) &&
++ likely(self->aq_fw_ops->deinit) && link_down) {
+ mutex_lock(&self->fwreq_mutex);
+ self->aq_fw_ops->deinit(self->aq_hw);
+ mutex_unlock(&self->fwreq_mutex);
+--
+2.39.5
+
--- /dev/null
+From 6ca0b78343b97dd8592ff782fb94d763ed647957 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 29 Jan 2025 15:13:42 -0800
+Subject: net: bcmgenet: Correct overlaying of PHY and MAC Wake-on-LAN
+
+From: Florian Fainelli <florian.fainelli@broadcom.com>
+
+[ Upstream commit 46ded709232344b5750a852747a8881763c721ab ]
+
+Some Wake-on-LAN modes such as WAKE_FILTER may only be supported by the MAC,
+while others might be only supported by the PHY. Make sure that the .get_wol()
+returns the union of both rather than only that of the PHY if the PHY supports
+Wake-on-LAN.
+
+When disabling Wake-on-LAN, make sure that this is done at both the PHY
+and MAC level, rather than doing an early return from the PHY driver.
+
+Fixes: 7e400ff35cbe ("net: bcmgenet: Add support for PHY-based Wake-on-LAN")
+Fixes: 9ee09edc05f2 ("net: bcmgenet: Properly overlay PHY and MAC Wake-on-LAN capabilities")
+Signed-off-by: Florian Fainelli <florian.fainelli@broadcom.com>
+Link: https://patch.msgid.link/20250129231342.35013-1-florian.fainelli@broadcom.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../net/ethernet/broadcom/genet/bcmgenet_wol.c | 16 ++++++++++++----
+ 1 file changed, 12 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c
+index 0715ea5bf13ed..3b082114f2e53 100644
+--- a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c
++++ b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c
+@@ -41,9 +41,12 @@ void bcmgenet_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
+ {
+ struct bcmgenet_priv *priv = netdev_priv(dev);
+ struct device *kdev = &priv->pdev->dev;
++ u32 phy_wolopts = 0;
+
+- if (dev->phydev)
++ if (dev->phydev) {
+ phy_ethtool_get_wol(dev->phydev, wol);
++ phy_wolopts = wol->wolopts;
++ }
+
+ /* MAC is not wake-up capable, return what the PHY does */
+ if (!device_can_wakeup(kdev))
+@@ -51,9 +54,14 @@ void bcmgenet_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
+
+ /* Overlay MAC capabilities with that of the PHY queried before */
+ wol->supported |= WAKE_MAGIC | WAKE_MAGICSECURE | WAKE_FILTER;
+- wol->wolopts = priv->wolopts;
+- memset(wol->sopass, 0, sizeof(wol->sopass));
++ wol->wolopts |= priv->wolopts;
+
++ /* Return the PHY configured magic password */
++ if (phy_wolopts & WAKE_MAGICSECURE)
++ return;
++
++ /* Otherwise the MAC one */
++ memset(wol->sopass, 0, sizeof(wol->sopass));
+ if (wol->wolopts & WAKE_MAGICSECURE)
+ memcpy(wol->sopass, priv->sopass, sizeof(priv->sopass));
+ }
+@@ -70,7 +78,7 @@ int bcmgenet_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
+ /* Try Wake-on-LAN from the PHY first */
+ if (dev->phydev) {
+ ret = phy_ethtool_set_wol(dev->phydev, wol);
+- if (ret != -EOPNOTSUPP)
++ if (ret != -EOPNOTSUPP && wol->wolopts)
+ return ret;
+ }
+
+--
+2.39.5
+
--- /dev/null
+From 728eb0dbccab6bb8e4ce5932dd4beba6b8db91a2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 3 Feb 2025 17:08:38 +0000
+Subject: net: rose: lock the socket in rose_bind()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit a1300691aed9ee852b0a9192e29e2bdc2411a7e6 ]
+
+syzbot reported a soft lockup in rose_loopback_timer(),
+with a repro calling bind() from multiple threads.
+
+rose_bind() must lock the socket to avoid this issue.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Reported-by: syzbot+7ff41b5215f0c534534e@syzkaller.appspotmail.com
+Closes: https://lore.kernel.org/netdev/67a0f78d.050a0220.d7c5a.00a0.GAE@google.com/T/#u
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Acked-by: Paolo Abeni <pabeni@redhat.com>
+Link: https://patch.msgid.link/20250203170838.3521361-1-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/rose/af_rose.c | 24 ++++++++++++++++--------
+ 1 file changed, 16 insertions(+), 8 deletions(-)
+
+diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
+index 72c65d938a150..a4a668b88a8f2 100644
+--- a/net/rose/af_rose.c
++++ b/net/rose/af_rose.c
+@@ -701,11 +701,9 @@ static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+ struct net_device *dev;
+ ax25_address *source;
+ ax25_uid_assoc *user;
++ int err = -EINVAL;
+ int n;
+
+- if (!sock_flag(sk, SOCK_ZAPPED))
+- return -EINVAL;
+-
+ if (addr_len != sizeof(struct sockaddr_rose) && addr_len != sizeof(struct full_sockaddr_rose))
+ return -EINVAL;
+
+@@ -718,8 +716,15 @@ static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+ if ((unsigned int) addr->srose_ndigis > ROSE_MAX_DIGIS)
+ return -EINVAL;
+
+- if ((dev = rose_dev_get(&addr->srose_addr)) == NULL)
+- return -EADDRNOTAVAIL;
++ lock_sock(sk);
++
++ if (!sock_flag(sk, SOCK_ZAPPED))
++ goto out_release;
++
++ err = -EADDRNOTAVAIL;
++ dev = rose_dev_get(&addr->srose_addr);
++ if (!dev)
++ goto out_release;
+
+ source = &addr->srose_call;
+
+@@ -730,7 +735,8 @@ static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+ } else {
+ if (ax25_uid_policy && !capable(CAP_NET_BIND_SERVICE)) {
+ dev_put(dev);
+- return -EACCES;
++ err = -EACCES;
++ goto out_release;
+ }
+ rose->source_call = *source;
+ }
+@@ -753,8 +759,10 @@ static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+ rose_insert_socket(sk);
+
+ sock_reset_flag(sk, SOCK_ZAPPED);
+-
+- return 0;
++ err = 0;
++out_release:
++ release_sock(sk);
++ return err;
+ }
+
+ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags)
+--
+2.39.5
+
--- /dev/null
+From 9fdc22cd92f4521e8203fc2e0027f53449d61237 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 4 Feb 2025 14:38:39 +0200
+Subject: net: sched: Fix truncation of offloaded action statistics
+
+From: Ido Schimmel <idosch@nvidia.com>
+
+[ Upstream commit 811b8f534fd85e17077bd2ac0413bcd16cc8fb9b ]
+
+In case of tc offload, when user space queries the kernel for tc action
+statistics, tc will query the offloaded statistics from device drivers.
+Among other statistics, drivers are expected to pass the number of
+packets that hit the action since the last query as a 64-bit number.
+
+Unfortunately, tc treats the number of packets as a 32-bit number,
+leading to truncation and incorrect statistics when the number of
+packets since the last query exceeds 0xffffffff:
+
+$ tc -s filter show dev swp2 ingress
+filter protocol all pref 1 flower chain 0
+filter protocol all pref 1 flower chain 0 handle 0x1
+ skip_sw
+ in_hw in_hw_count 1
+ action order 1: mirred (Egress Redirect to device swp1) stolen
+ index 1 ref 1 bind 1 installed 58 sec used 0 sec
+ Action statistics:
+ Sent 1133877034176 bytes 536959475 pkt (dropped 0, overlimits 0 requeues 0)
+[...]
+
+According to the above, 2111-byte packets were redirected which is
+impossible as only 64-byte packets were transmitted and the MTU was
+1500.
+
+Fix by treating packets as a 64-bit number:
+
+$ tc -s filter show dev swp2 ingress
+filter protocol all pref 1 flower chain 0
+filter protocol all pref 1 flower chain 0 handle 0x1
+ skip_sw
+ in_hw in_hw_count 1
+ action order 1: mirred (Egress Redirect to device swp1) stolen
+ index 1 ref 1 bind 1 installed 61 sec used 0 sec
+ Action statistics:
+ Sent 1370624380864 bytes 21416005951 pkt (dropped 0, overlimits 0 requeues 0)
+[...]
+
+Which shows that only 64-byte packets were redirected (1370624380864 /
+21416005951 = 64).
+
+Fixes: 380407023526 ("net/sched: Enable netdev drivers to update statistics of offloaded actions")
+Reported-by: Joe Botha <joe@atomic.ac>
+Signed-off-by: Ido Schimmel <idosch@nvidia.com>
+Reviewed-by: Petr Machata <petrm@nvidia.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Link: https://patch.msgid.link/20250204123839.1151804-1-idosch@nvidia.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/sch_generic.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
+index 1e6324f0d4efd..24e48af7e8f74 100644
+--- a/include/net/sch_generic.h
++++ b/include/net/sch_generic.h
+@@ -851,7 +851,7 @@ static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+ }
+
+ static inline void _bstats_update(struct gnet_stats_basic_sync *bstats,
+- __u64 bytes, __u32 packets)
++ __u64 bytes, __u64 packets)
+ {
+ u64_stats_update_begin(&bstats->syncp);
+ u64_stats_add(&bstats->bytes, bytes);
+--
+2.39.5
+
--- /dev/null
+From 944010266fa6b448259f14df40c7314cd75653a8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 3 Feb 2025 16:58:40 -0800
+Subject: netem: Update sch->q.qlen before qdisc_tree_reduce_backlog()
+
+From: Cong Wang <cong.wang@bytedance.com>
+
+[ Upstream commit 638ba5089324796c2ee49af10427459c2de35f71 ]
+
+qdisc_tree_reduce_backlog() notifies parent qdisc only if child
+qdisc becomes empty, therefore we need to reduce the backlog of the
+child qdisc before calling it. Otherwise it would miss the opportunity
+to call cops->qlen_notify(), in the case of DRR, it resulted in UAF
+since DRR uses ->qlen_notify() to maintain its active list.
+
+Fixes: f8d4bc455047 ("net/sched: netem: account for backlog updates from child qdisc")
+Cc: Martin Ottens <martin.ottens@fau.de>
+Reported-by: Mingi Cho <mincho@theori.io>
+Signed-off-by: Cong Wang <cong.wang@bytedance.com>
+Link: https://patch.msgid.link/20250204005841.223511-4-xiyou.wangcong@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/sch_netem.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
+index 3b519adc01259..68a08f6d1fbce 100644
+--- a/net/sched/sch_netem.c
++++ b/net/sched/sch_netem.c
+@@ -748,9 +748,9 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
+ if (err != NET_XMIT_SUCCESS) {
+ if (net_xmit_drop_count(err))
+ qdisc_qstats_drop(sch);
+- qdisc_tree_reduce_backlog(sch, 1, pkt_len);
+ sch->qstats.backlog -= pkt_len;
+ sch->q.qlen--;
++ qdisc_tree_reduce_backlog(sch, 1, pkt_len);
+ }
+ goto tfifo_dequeue;
+ }
+--
+2.39.5
+
--- /dev/null
+From 7683ba62df9bc12135959e982b062d39f93d25f5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 28 Jan 2025 17:34:47 +0100
+Subject: nvme-fc: use ctrl state getter
+
+From: Daniel Wagner <wagi@kernel.org>
+
+[ Upstream commit c8ed6cb5d37bc09c7e25e49a670e9fd1a3bd1dfa ]
+
+Do not access the state variable directly, instead use proper
+synchronization so not stale data is read.
+
+Fixes: e6e7f7ac03e4 ("nvme: ensure reset state check ordering")
+Signed-off-by: Daniel Wagner <wagi@kernel.org>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
+Signed-off-by: Keith Busch <kbusch@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/nvme/host/fc.c | 9 ++++++---
+ 1 file changed, 6 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
+index b81af7919e94c..682234da2fabe 100644
+--- a/drivers/nvme/host/fc.c
++++ b/drivers/nvme/host/fc.c
+@@ -2080,7 +2080,8 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
+ nvme_fc_complete_rq(rq);
+
+ check_error:
+- if (terminate_assoc && ctrl->ctrl.state != NVME_CTRL_RESETTING)
++ if (terminate_assoc &&
++ nvme_ctrl_state(&ctrl->ctrl) != NVME_CTRL_RESETTING)
+ queue_work(nvme_reset_wq, &ctrl->ioerr_work);
+ }
+
+@@ -2534,6 +2535,8 @@ __nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues)
+ static void
+ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg)
+ {
++ enum nvme_ctrl_state state = nvme_ctrl_state(&ctrl->ctrl);
++
+ /*
+ * if an error (io timeout, etc) while (re)connecting, the remote
+ * port requested terminating of the association (disconnect_ls)
+@@ -2541,7 +2544,7 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg)
+ * the controller. Abort any ios on the association and let the
+ * create_association error path resolve things.
+ */
+- if (ctrl->ctrl.state == NVME_CTRL_CONNECTING) {
++ if (state == NVME_CTRL_CONNECTING) {
+ __nvme_fc_abort_outstanding_ios(ctrl, true);
+ set_bit(ASSOC_FAILED, &ctrl->flags);
+ dev_warn(ctrl->ctrl.device,
+@@ -2551,7 +2554,7 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg)
+ }
+
+ /* Otherwise, only proceed if in LIVE state - e.g. on first error */
+- if (ctrl->ctrl.state != NVME_CTRL_LIVE)
++ if (state != NVME_CTRL_LIVE)
+ return;
+
+ dev_warn(ctrl->ctrl.device,
+--
+2.39.5
+
--- /dev/null
+From db1af470ff8e29ba4693dca2c7ae4efb80618cc5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 9 Jan 2025 14:30:48 +0100
+Subject: nvme: handle connectivity loss in nvme_set_queue_count
+
+From: Daniel Wagner <wagi@kernel.org>
+
+[ Upstream commit 294b2b7516fd06a8dd82e4a6118f318ec521e706 ]
+
+When the set feature attempts fails with any NVME status code set in
+nvme_set_queue_count, the function still report success. Though the
+numbers of queues set to 0. This is done to support controllers in
+degraded state (the admin queue is still up and running but no IO
+queues).
+
+Though there is an exception. When nvme_set_features reports an host
+path error, nvme_set_queue_count should propagate this error as the
+connectivity is lost, which means also the admin queue is not working
+anymore.
+
+Fixes: 9a0be7abb62f ("nvme: refactor set_queue_count")
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Hannes Reinecke <hare@suse.de>
+Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
+Signed-off-by: Daniel Wagner <wagi@kernel.org>
+Signed-off-by: Keith Busch <kbusch@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/nvme/host/core.c | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
+index 4c409efd8cec1..8da50df56b079 100644
+--- a/drivers/nvme/host/core.c
++++ b/drivers/nvme/host/core.c
+@@ -1691,7 +1691,13 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count)
+
+ status = nvme_set_features(ctrl, NVME_FEAT_NUM_QUEUES, q_count, NULL, 0,
+ &result);
+- if (status < 0)
++
++ /*
++ * It's either a kernel error or the host observed a connection
++ * lost. In either case it's not possible communicate with the
++ * controller and thus enter the error code path.
++ */
++ if (status < 0 || status == NVME_SC_HOST_PATH_ERROR)
+ return status;
+
+ /*
+--
+2.39.5
+
--- /dev/null
+From 93bea40a76a7c53e72245db77dda957cbc8195ed Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 28 Jan 2025 07:22:31 -0800
+Subject: nvme: make nvme_tls_attrs_group static
+
+From: Keith Busch <kbusch@kernel.org>
+
+[ Upstream commit 2d1a2dab95cdc6f2e0c6af3c0514b0bea94af482 ]
+
+To suppress the compiler "warning: symbol 'nvme_tls_attrs_group' was not
+declared. Should it be static?"
+
+Fixes: 1e48b34c9bc79a ("nvme: split off TLS sysfs attributes into a separate group")
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
+Signed-off-by: Keith Busch <kbusch@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/nvme/host/sysfs.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c
+index b68a9e5f1ea39..3a41b9ab0f13c 100644
+--- a/drivers/nvme/host/sysfs.c
++++ b/drivers/nvme/host/sysfs.c
+@@ -792,7 +792,7 @@ static umode_t nvme_tls_attrs_are_visible(struct kobject *kobj,
+ return a->mode;
+ }
+
+-const struct attribute_group nvme_tls_attrs_group = {
++static const struct attribute_group nvme_tls_attrs_group = {
+ .attrs = nvme_tls_attrs,
+ .is_visible = nvme_tls_attrs_are_visible,
+ };
+--
+2.39.5
+
--- /dev/null
+From d0e2deb424437c88a1a176d41e8ae0be3ca31c69 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 3 Feb 2025 16:58:38 -0800
+Subject: pfifo_tail_enqueue: Drop new packet when sch->limit == 0
+
+From: Quang Le <quanglex97@gmail.com>
+
+[ Upstream commit 647cef20e649c576dff271e018d5d15d998b629d ]
+
+Expected behaviour:
+In case we reach scheduler's limit, pfifo_tail_enqueue() will drop a
+packet in scheduler's queue and decrease scheduler's qlen by one.
+Then, pfifo_tail_enqueue() enqueue new packet and increase
+scheduler's qlen by one. Finally, pfifo_tail_enqueue() return
+`NET_XMIT_CN` status code.
+
+Weird behaviour:
+In case we set `sch->limit == 0` and trigger pfifo_tail_enqueue() on a
+scheduler that has no packet, the 'drop a packet' step will do nothing.
+This means the scheduler's qlen still has value equal 0.
+Then, we continue to enqueue new packet and increase scheduler's qlen by
+one. In summary, we can leverage pfifo_tail_enqueue() to increase qlen by
+one and return `NET_XMIT_CN` status code.
+
+The problem is:
+Let's say we have two qdiscs: Qdisc_A and Qdisc_B.
+ - Qdisc_A's type must have '->graft()' function to create parent/child relationship.
+ Let's say Qdisc_A's type is `hfsc`. Enqueue packet to this qdisc will trigger `hfsc_enqueue`.
+ - Qdisc_B's type is pfifo_head_drop. Enqueue packet to this qdisc will trigger `pfifo_tail_enqueue`.
+ - Qdisc_B is configured to have `sch->limit == 0`.
+ - Qdisc_A is configured to route the enqueued's packet to Qdisc_B.
+
+Enqueue packet through Qdisc_A will lead to:
+ - hfsc_enqueue(Qdisc_A) -> pfifo_tail_enqueue(Qdisc_B)
+ - Qdisc_B->q.qlen += 1
+ - pfifo_tail_enqueue() return `NET_XMIT_CN`
+ - hfsc_enqueue() check for `NET_XMIT_SUCCESS` and see `NET_XMIT_CN` => hfsc_enqueue() don't increase qlen of Qdisc_A.
+
+The whole process lead to a situation where Qdisc_A->q.qlen == 0 and Qdisc_B->q.qlen == 1.
+Replace 'hfsc' with other type (for example: 'drr') still lead to the same problem.
+This violate the design where parent's qlen should equal to the sum of its childrens'qlen.
+
+Bug impact: This issue can be used for user->kernel privilege escalation when it is reachable.
+
+Fixes: 57dbb2d83d10 ("sched: add head drop fifo queue")
+Reported-by: Quang Le <quanglex97@gmail.com>
+Signed-off-by: Quang Le <quanglex97@gmail.com>
+Signed-off-by: Cong Wang <cong.wang@bytedance.com>
+Link: https://patch.msgid.link/20250204005841.223511-2-xiyou.wangcong@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/sch_fifo.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
+index b50b2c2cc09bc..e6bfd39ff3396 100644
+--- a/net/sched/sch_fifo.c
++++ b/net/sched/sch_fifo.c
+@@ -40,6 +40,9 @@ static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+ {
+ unsigned int prev_backlog;
+
++ if (unlikely(READ_ONCE(sch->limit) == 0))
++ return qdisc_drop(skb, sch, to_free);
++
+ if (likely(sch->q.qlen < READ_ONCE(sch->limit)))
+ return qdisc_enqueue_tail(skb, sch);
+
+--
+2.39.5
+
--- /dev/null
+From 6db16ac161df205cc7e9320b31be56389682f50a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 4 Feb 2025 23:05:53 +0000
+Subject: rxrpc: Fix call state set to not include the SERVER_SECURING state
+
+From: David Howells <dhowells@redhat.com>
+
+[ Upstream commit 41b996ce83bf944de5569d6263c8dbd5513e7ed0 ]
+
+The RXRPC_CALL_SERVER_SECURING state doesn't really belong with the other
+states in the call's state set as the other states govern the call's Rx/Tx
+phase transition and govern when packets can and can't be received or
+transmitted. The "Securing" state doesn't actually govern the reception of
+packets and would need to be split depending on whether or not we've
+received the last packet yet (to mirror RECV_REQUEST/ACK_REQUEST).
+
+The "Securing" state is more about whether or not we can start forwarding
+packets to the application as recvmsg will need to decode them and the
+decoding can't take place until the challenge/response exchange has
+completed.
+
+Fix this by removing the RXRPC_CALL_SERVER_SECURING state from the state
+set and, instead, using a flag, RXRPC_CALL_CONN_CHALLENGING, to track
+whether or not we can queue the call for reception by recvmsg() or notify
+the kernel app that data is ready. In the event that we've already
+received all the packets, the connection event handler will poke the app
+layer in the appropriate manner.
+
+Also there's a race whereby the app layer sees the last packet before rxrpc
+has managed to end the rx phase and change the state to one amenable to
+allowing a reply. Fix this by queuing the packet after calling
+rxrpc_end_rx_phase().
+
+Fixes: 17926a79320a ("[AF_RXRPC]: Provide secure RxRPC sockets for use by userspace and kernel both")
+Signed-off-by: David Howells <dhowells@redhat.com>
+cc: Marc Dionne <marc.dionne@auristor.com>
+cc: Simon Horman <horms@kernel.org>
+cc: linux-afs@lists.infradead.org
+Link: https://patch.msgid.link/20250204230558.712536-2-dhowells@redhat.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/rxrpc/ar-internal.h | 2 +-
+ net/rxrpc/call_object.c | 6 ++----
+ net/rxrpc/conn_event.c | 4 +---
+ net/rxrpc/input.c | 2 +-
+ net/rxrpc/sendmsg.c | 2 +-
+ 5 files changed, 6 insertions(+), 10 deletions(-)
+
+diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
+index d0fd37bdcfe9c..6b036c0564c7a 100644
+--- a/net/rxrpc/ar-internal.h
++++ b/net/rxrpc/ar-internal.h
+@@ -567,6 +567,7 @@ enum rxrpc_call_flag {
+ RXRPC_CALL_EXCLUSIVE, /* The call uses a once-only connection */
+ RXRPC_CALL_RX_IS_IDLE, /* recvmsg() is idle - send an ACK */
+ RXRPC_CALL_RECVMSG_READ_ALL, /* recvmsg() read all of the received data */
++ RXRPC_CALL_CONN_CHALLENGING, /* The connection is being challenged */
+ };
+
+ /*
+@@ -587,7 +588,6 @@ enum rxrpc_call_state {
+ RXRPC_CALL_CLIENT_AWAIT_REPLY, /* - client awaiting reply */
+ RXRPC_CALL_CLIENT_RECV_REPLY, /* - client receiving reply phase */
+ RXRPC_CALL_SERVER_PREALLOC, /* - service preallocation */
+- RXRPC_CALL_SERVER_SECURING, /* - server securing request connection */
+ RXRPC_CALL_SERVER_RECV_REQUEST, /* - server receiving request */
+ RXRPC_CALL_SERVER_ACK_REQUEST, /* - server pending ACK of request */
+ RXRPC_CALL_SERVER_SEND_REPLY, /* - server sending reply */
+diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
+index f9e983a12c149..e379a2a9375ae 100644
+--- a/net/rxrpc/call_object.c
++++ b/net/rxrpc/call_object.c
+@@ -22,7 +22,6 @@ const char *const rxrpc_call_states[NR__RXRPC_CALL_STATES] = {
+ [RXRPC_CALL_CLIENT_AWAIT_REPLY] = "ClAwtRpl",
+ [RXRPC_CALL_CLIENT_RECV_REPLY] = "ClRcvRpl",
+ [RXRPC_CALL_SERVER_PREALLOC] = "SvPrealc",
+- [RXRPC_CALL_SERVER_SECURING] = "SvSecure",
+ [RXRPC_CALL_SERVER_RECV_REQUEST] = "SvRcvReq",
+ [RXRPC_CALL_SERVER_ACK_REQUEST] = "SvAckReq",
+ [RXRPC_CALL_SERVER_SEND_REPLY] = "SvSndRpl",
+@@ -453,17 +452,16 @@ void rxrpc_incoming_call(struct rxrpc_sock *rx,
+ call->cong_tstamp = skb->tstamp;
+
+ __set_bit(RXRPC_CALL_EXPOSED, &call->flags);
+- rxrpc_set_call_state(call, RXRPC_CALL_SERVER_SECURING);
++ rxrpc_set_call_state(call, RXRPC_CALL_SERVER_RECV_REQUEST);
+
+ spin_lock(&conn->state_lock);
+
+ switch (conn->state) {
+ case RXRPC_CONN_SERVICE_UNSECURED:
+ case RXRPC_CONN_SERVICE_CHALLENGING:
+- rxrpc_set_call_state(call, RXRPC_CALL_SERVER_SECURING);
++ __set_bit(RXRPC_CALL_CONN_CHALLENGING, &call->flags);
+ break;
+ case RXRPC_CONN_SERVICE:
+- rxrpc_set_call_state(call, RXRPC_CALL_SERVER_RECV_REQUEST);
+ break;
+
+ case RXRPC_CONN_ABORTED:
+diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
+index ca5e694ab858b..c4eb7986efddf 100644
+--- a/net/rxrpc/conn_event.c
++++ b/net/rxrpc/conn_event.c
+@@ -222,10 +222,8 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn)
+ */
+ static void rxrpc_call_is_secure(struct rxrpc_call *call)
+ {
+- if (call && __rxrpc_call_state(call) == RXRPC_CALL_SERVER_SECURING) {
+- rxrpc_set_call_state(call, RXRPC_CALL_SERVER_RECV_REQUEST);
++ if (call && __test_and_clear_bit(RXRPC_CALL_CONN_CHALLENGING, &call->flags))
+ rxrpc_notify_socket(call);
+- }
+ }
+
+ /*
+diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
+index 16d49a861dbb5..6a075a7c190db 100644
+--- a/net/rxrpc/input.c
++++ b/net/rxrpc/input.c
+@@ -573,7 +573,7 @@ static bool rxrpc_input_split_jumbo(struct rxrpc_call *call, struct sk_buff *skb
+ rxrpc_propose_delay_ACK(call, sp->hdr.serial,
+ rxrpc_propose_ack_input_data);
+ }
+- if (notify) {
++ if (notify && !test_bit(RXRPC_CALL_CONN_CHALLENGING, &call->flags)) {
+ trace_rxrpc_notify_socket(call->debug_id, sp->hdr.serial);
+ rxrpc_notify_socket(call);
+ }
+diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
+index 23d18fe5de9f0..154f650efb0ab 100644
+--- a/net/rxrpc/sendmsg.c
++++ b/net/rxrpc/sendmsg.c
+@@ -654,7 +654,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
+ } else {
+ switch (rxrpc_call_state(call)) {
+ case RXRPC_CALL_CLIENT_AWAIT_CONN:
+- case RXRPC_CALL_SERVER_SECURING:
++ case RXRPC_CALL_SERVER_RECV_REQUEST:
+ if (p.command == RXRPC_CMD_SEND_ABORT)
+ break;
+ fallthrough;
+--
+2.39.5
+
--- /dev/null
+From d89bce9836de246776d7b324bdf3d6170cf25c2b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 3 Feb 2025 11:03:04 +0000
+Subject: rxrpc: Fix the rxrpc_connection attend queue handling
+
+From: David Howells <dhowells@redhat.com>
+
+[ Upstream commit 4241a702e0d0c2ca9364cfac08dbf134264962de ]
+
+The rxrpc_connection attend queue is never used because conn::attend_link
+is never initialised and so is always NULL'd out and thus always appears to
+be busy. This requires the following fix:
+
+ (1) Fix this the attend queue problem by initialising conn::attend_link.
+
+And, consequently, two further fixes for things masked by the above bug:
+
+ (2) Fix rxrpc_input_conn_event() to handle being invoked with a NULL
+ sk_buff pointer - something that can now happen with the above change.
+
+ (3) Fix the RXRPC_SKB_MARK_SERVICE_CONN_SECURED message to carry a pointer
+ to the connection and a ref on it.
+
+Signed-off-by: David Howells <dhowells@redhat.com>
+cc: Marc Dionne <marc.dionne@auristor.com>
+cc: Jakub Kicinski <kuba@kernel.org>
+cc: "David S. Miller" <davem@davemloft.net>
+cc: Eric Dumazet <edumazet@google.com>
+cc: Paolo Abeni <pabeni@redhat.com>
+cc: Simon Horman <horms@kernel.org>
+cc: linux-afs@lists.infradead.org
+cc: netdev@vger.kernel.org
+Fixes: f2cce89a074e ("rxrpc: Implement a mechanism to send an event notification to a connection")
+Link: https://patch.msgid.link/20250203110307.7265-3-dhowells@redhat.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/trace/events/rxrpc.h | 1 +
+ net/rxrpc/conn_event.c | 17 ++++++++++-------
+ net/rxrpc/conn_object.c | 1 +
+ 3 files changed, 12 insertions(+), 7 deletions(-)
+
+diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h
+index 666fe1779ccc6..e1a37e9c2d42d 100644
+--- a/include/trace/events/rxrpc.h
++++ b/include/trace/events/rxrpc.h
+@@ -218,6 +218,7 @@
+ EM(rxrpc_conn_get_conn_input, "GET inp-conn") \
+ EM(rxrpc_conn_get_idle, "GET idle ") \
+ EM(rxrpc_conn_get_poke_abort, "GET pk-abort") \
++ EM(rxrpc_conn_get_poke_secured, "GET secured ") \
+ EM(rxrpc_conn_get_poke_timer, "GET poke ") \
+ EM(rxrpc_conn_get_service_conn, "GET svc-conn") \
+ EM(rxrpc_conn_new_client, "NEW client ") \
+diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
+index 2a1396cd892f3..ca5e694ab858b 100644
+--- a/net/rxrpc/conn_event.c
++++ b/net/rxrpc/conn_event.c
+@@ -266,6 +266,7 @@ static int rxrpc_process_event(struct rxrpc_connection *conn,
+ * we've already received the packet, put it on the
+ * front of the queue.
+ */
++ sp->conn = rxrpc_get_connection(conn, rxrpc_conn_get_poke_secured);
+ skb->mark = RXRPC_SKB_MARK_SERVICE_CONN_SECURED;
+ rxrpc_get_skb(skb, rxrpc_skb_get_conn_secured);
+ skb_queue_head(&conn->local->rx_queue, skb);
+@@ -431,14 +432,16 @@ void rxrpc_input_conn_event(struct rxrpc_connection *conn, struct sk_buff *skb)
+ if (test_and_clear_bit(RXRPC_CONN_EV_ABORT_CALLS, &conn->events))
+ rxrpc_abort_calls(conn);
+
+- switch (skb->mark) {
+- case RXRPC_SKB_MARK_SERVICE_CONN_SECURED:
+- if (conn->state != RXRPC_CONN_SERVICE)
+- break;
++ if (skb) {
++ switch (skb->mark) {
++ case RXRPC_SKB_MARK_SERVICE_CONN_SECURED:
++ if (conn->state != RXRPC_CONN_SERVICE)
++ break;
+
+- for (loop = 0; loop < RXRPC_MAXCALLS; loop++)
+- rxrpc_call_is_secure(conn->channels[loop].call);
+- break;
++ for (loop = 0; loop < RXRPC_MAXCALLS; loop++)
++ rxrpc_call_is_secure(conn->channels[loop].call);
++ break;
++ }
+ }
+
+ /* Process delayed ACKs whose time has come. */
+diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
+index 1539d315afe74..7bc68135966e2 100644
+--- a/net/rxrpc/conn_object.c
++++ b/net/rxrpc/conn_object.c
+@@ -67,6 +67,7 @@ struct rxrpc_connection *rxrpc_alloc_connection(struct rxrpc_net *rxnet,
+ INIT_WORK(&conn->destructor, rxrpc_clean_up_connection);
+ INIT_LIST_HEAD(&conn->proc_link);
+ INIT_LIST_HEAD(&conn->link);
++ INIT_LIST_HEAD(&conn->attend_link);
+ mutex_init(&conn->security_lock);
+ mutex_init(&conn->tx_data_alloc_lock);
+ skb_queue_head_init(&conn->rx_queue);
+--
+2.39.5
+
--- /dev/null
+From 1f0309b88874381903bd24c0abe1a99875124349 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 17 Jan 2025 10:58:52 +0000
+Subject: sched/fair: Fix inaccurate h_nr_runnable accounting with delayed
+ dequeue
+
+From: K Prateek Nayak <kprateek.nayak@amd.com>
+
+[ Upstream commit 3429dd57f0deb1a602c2624a1dd7c4c11b6c4734 ]
+
+set_delayed() adjusts cfs_rq->h_nr_runnable for the hierarchy when an
+entity is delayed irrespective of whether the entity corresponds to a
+task or a cfs_rq.
+
+Consider the following scenario:
+
+ root
+ / \
+ A B (*) delayed since B is no longer eligible on root
+ | |
+ Task0 Task1 <--- dequeue_task_fair() - task blocks
+
+When Task1 blocks (dequeue_entity() for task's se returns true),
+dequeue_entities() will continue adjusting cfs_rq->h_nr_* for the
+hierarchy of Task1. However, when the sched_entity corresponding to
+cfs_rq B is delayed, set_delayed() will adjust the h_nr_runnable for the
+hierarchy too leading to both dequeue_entity() and set_delayed()
+decrementing h_nr_runnable for the dequeue of the same task.
+
+A SCHED_WARN_ON() to inspect h_nr_runnable post its update in
+dequeue_entities() like below:
+
+ cfs_rq->h_nr_runnable -= h_nr_runnable;
+ SCHED_WARN_ON(((int) cfs_rq->h_nr_runnable) < 0);
+
+is consistently tripped when running wakeup intensive workloads like
+hackbench in a cgroup.
+
+This error is self correcting since cfs_rq are per-cpu and cannot
+migrate. The entitiy is either picked for full dequeue or is requeued
+when a task wakes up below it. Both those paths call clear_delayed()
+which again increments h_nr_runnable of the hierarchy without
+considering if the entity corresponds to a task or not.
+
+h_nr_runnable will eventually reflect the correct value however in the
+interim, the incorrect values can still influence PELT calculation which
+uses se->runnable_weight or cfs_rq->h_nr_runnable.
+
+Since only delayed tasks take the early return path in
+dequeue_entities() and enqueue_task_fair(), adjust the
+h_nr_runnable in {set,clear}_delayed() only when a task is delayed as
+this path skips the h_nr_* update loops and returns early.
+
+For entities corresponding to cfs_rq, the h_nr_* update loop in the
+caller will do the right thing.
+
+Fixes: 76f2f783294d ("sched/eevdf: More PELT vs DELAYED_DEQUEUE")
+Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
+Tested-by: Swapnil Sapkal <swapnil.sapkal@amd.com>
+Link: https://lkml.kernel.org/r/20250117105852.23908-1-kprateek.nayak@amd.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/fair.c | 19 +++++++++++++++++++
+ 1 file changed, 19 insertions(+)
+
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index 65e7be6448720..ddc096d6b0c20 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -5481,6 +5481,15 @@ static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq);
+ static void set_delayed(struct sched_entity *se)
+ {
+ se->sched_delayed = 1;
++
++ /*
++ * Delayed se of cfs_rq have no tasks queued on them.
++ * Do not adjust h_nr_runnable since dequeue_entities()
++ * will account it for blocked tasks.
++ */
++ if (!entity_is_task(se))
++ return;
++
+ for_each_sched_entity(se) {
+ struct cfs_rq *cfs_rq = cfs_rq_of(se);
+
+@@ -5493,6 +5502,16 @@ static void set_delayed(struct sched_entity *se)
+ static void clear_delayed(struct sched_entity *se)
+ {
+ se->sched_delayed = 0;
++
++ /*
++ * Delayed se of cfs_rq have no tasks queued on them.
++ * Do not adjust h_nr_runnable since a dequeue has
++ * already accounted for it or an enqueue of a task
++ * below it will account for it in enqueue_task_fair().
++ */
++ if (!entity_is_task(se))
++ return;
++
+ for_each_sched_entity(se) {
+ struct cfs_rq *cfs_rq = cfs_rq_of(se);
+
+--
+2.39.5
+
tty-xilinx_uartps-split-sysrq-handling.patch
tty-permit-some-tiocl_setsel-modes-without-cap_sys_admin.patch
platform-x86-serdev_helpers-check-for-serial_ctrl_uid-null.patch
+sched-fair-fix-inaccurate-h_nr_runnable-accounting-w.patch
+nvme-handle-connectivity-loss-in-nvme_set_queue_coun.patch
+firmware-iscsi_ibft-fix-iscsi_ibft-kconfig-entry.patch
+gpu-drm_dp_cec-fix-broken-cec-adapter-properties-che.patch
+ice-put-rx-buffers-after-being-done-with-current-fra.patch
+ice-gather-page_count-s-of-each-frag-right-before-xd.patch
+ice-stop-storing-xdp-verdict-within-ice_rx_buf.patch
+nvme-make-nvme_tls_attrs_group-static.patch
+nvme-fc-use-ctrl-state-getter.patch
+net-bcmgenet-correct-overlaying-of-phy-and-mac-wake-.patch
+ice-add-check-for-devm_kzalloc.patch
+vmxnet3-fix-tx-queue-race-condition-with-xdp.patch
+tg3-disable-tg3-pcie-aer-on-system-reboot.patch
+udp-gso-do-not-drop-small-packets-when-pmtu-reduces.patch
+drm-i915-dp-fix-the-adaptive-sync-operation-mode-for.patch
+ethtool-rss-fix-hiding-unsupported-fields-in-dumps.patch
+rxrpc-fix-the-rxrpc_connection-attend-queue-handling.patch
+gpio-pca953x-improve-interrupt-support.patch
+net-atlantic-fix-warning-during-hot-unplug.patch
+net-rose-lock-the-socket-in-rose_bind.patch
+gpio-sim-lock-hog-configfs-items-if-present.patch
+x86-xen-fix-xen_hypercall_hvm-to-not-clobber-rbx.patch
+x86-xen-add-frame_end-to-xen_hypercall_hvm.patch
+acpi-property-fix-return-value-for-nval-0-in-acpi_da.patch
+pfifo_tail_enqueue-drop-new-packet-when-sch-limit-0.patch
+netem-update-sch-q.qlen-before-qdisc_tree_reduce_bac.patch
+tun-revert-fix-group-permission-check.patch
+net-sched-fix-truncation-of-offloaded-action-statist.patch
+rxrpc-fix-call-state-set-to-not-include-the-server_s.patch
--- /dev/null
+From 0ed4e6cc91ac1720c0ba1fa86bddff19c03a2712 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 30 Jan 2025 16:57:54 -0500
+Subject: tg3: Disable tg3 PCIe AER on system reboot
+
+From: Lenny Szubowicz <lszubowi@redhat.com>
+
+[ Upstream commit e0efe83ed325277bb70f9435d4d9fc70bebdcca8 ]
+
+Disable PCIe AER on the tg3 device on system reboot on a limited
+list of Dell PowerEdge systems. This prevents a fatal PCIe AER event
+on the tg3 device during the ACPI _PTS (prepare to sleep) method for
+S5 on those systems. The _PTS is invoked by acpi_enter_sleep_state_prep()
+as part of the kernel's reboot sequence as a result of commit
+38f34dba806a ("PM: ACPI: reboot: Reinstate S5 for reboot").
+
+There was an earlier fix for this problem by commit 2ca1c94ce0b6
+("tg3: Disable tg3 device on system reboot to avoid triggering AER").
+But it was discovered that this earlier fix caused a reboot hang
+when some Dell PowerEdge servers were booted via ipxe. To address
+this reboot hang, the earlier fix was essentially reverted by commit
+9fc3bc764334 ("tg3: power down device only on SYSTEM_POWER_OFF").
+This re-exposed the tg3 PCIe AER on reboot problem.
+
+This fix is not an ideal solution because the root cause of the AER
+is in system firmware. Instead, it's a targeted work-around in the
+tg3 driver.
+
+Note also that the PCIe AER must be disabled on the tg3 device even
+if the system is configured to use "firmware first" error handling.
+
+V3:
+ - Fix sparse warning on improper comparison of pdev->current_state
+ - Adhere to netdev comment style
+
+Fixes: 9fc3bc764334 ("tg3: power down device only on SYSTEM_POWER_OFF")
+Signed-off-by: Lenny Szubowicz <lszubowi@redhat.com>
+Reviewed-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/broadcom/tg3.c | 58 +++++++++++++++++++++++++++++
+ 1 file changed, 58 insertions(+)
+
+diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
+index d178138981a96..717e110d23c91 100644
+--- a/drivers/net/ethernet/broadcom/tg3.c
++++ b/drivers/net/ethernet/broadcom/tg3.c
+@@ -55,6 +55,7 @@
+ #include <linux/hwmon.h>
+ #include <linux/hwmon-sysfs.h>
+ #include <linux/crc32poly.h>
++#include <linux/dmi.h>
+
+ #include <net/checksum.h>
+ #include <net/gso.h>
+@@ -18154,6 +18155,50 @@ static int tg3_resume(struct device *device)
+
+ static SIMPLE_DEV_PM_OPS(tg3_pm_ops, tg3_suspend, tg3_resume);
+
++/* Systems where ACPI _PTS (Prepare To Sleep) S5 will result in a fatal
++ * PCIe AER event on the tg3 device if the tg3 device is not, or cannot
++ * be, powered down.
++ */
++static const struct dmi_system_id tg3_restart_aer_quirk_table[] = {
++ {
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
++ DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge R440"),
++ },
++ },
++ {
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
++ DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge R540"),
++ },
++ },
++ {
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
++ DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge R640"),
++ },
++ },
++ {
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
++ DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge R650"),
++ },
++ },
++ {
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
++ DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge R740"),
++ },
++ },
++ {
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
++ DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge R750"),
++ },
++ },
++ {}
++};
++
+ static void tg3_shutdown(struct pci_dev *pdev)
+ {
+ struct net_device *dev = pci_get_drvdata(pdev);
+@@ -18170,6 +18215,19 @@ static void tg3_shutdown(struct pci_dev *pdev)
+
+ if (system_state == SYSTEM_POWER_OFF)
+ tg3_power_down(tp);
++ else if (system_state == SYSTEM_RESTART &&
++ dmi_first_match(tg3_restart_aer_quirk_table) &&
++ pdev->current_state != PCI_D3cold &&
++ pdev->current_state != PCI_UNKNOWN) {
++ /* Disable PCIe AER on the tg3 to avoid a fatal
++ * error during this system restart.
++ */
++ pcie_capability_clear_word(pdev, PCI_EXP_DEVCTL,
++ PCI_EXP_DEVCTL_CERE |
++ PCI_EXP_DEVCTL_NFERE |
++ PCI_EXP_DEVCTL_FERE |
++ PCI_EXP_DEVCTL_URRE);
++ }
+
+ rtnl_unlock();
+
+--
+2.39.5
+
--- /dev/null
+From 4f698b896e8b8d9bd0a41085295205e6df3e69a5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 4 Feb 2025 11:10:06 -0500
+Subject: tun: revert fix group permission check
+
+From: Willem de Bruijn <willemb@google.com>
+
+[ Upstream commit a70c7b3cbc0688016810bb2e0b9b8a0d6a530045 ]
+
+This reverts commit 3ca459eaba1bf96a8c7878de84fa8872259a01e3.
+
+The blamed commit caused a regression when neither tun->owner nor
+tun->group is set. This is intended to be allowed, but now requires
+CAP_NET_ADMIN.
+
+Discussion in the referenced thread pointed out that the original
+issue that prompted this patch can be resolved in userspace.
+
+The relaxed access control may also make a device accessible when it
+previously wasn't, while existing users may depend on it to not be.
+
+This is a clean pure git revert, except for fixing the indentation on
+the gid_valid line that checkpatch correctly flagged.
+
+Fixes: 3ca459eaba1b ("tun: fix group permission check")
+Link: https://lore.kernel.org/netdev/CAFqZXNtkCBT4f+PwyVRmQGoT3p1eVa01fCG_aNtpt6dakXncUg@mail.gmail.com/
+Signed-off-by: Willem de Bruijn <willemb@google.com>
+Cc: Ondrej Mosnacek <omosnace@redhat.com>
+Cc: Stas Sergeev <stsp2@yandex.ru>
+Link: https://patch.msgid.link/20250204161015.739430-1-willemdebruijn.kernel@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/tun.c | 14 +++++---------
+ 1 file changed, 5 insertions(+), 9 deletions(-)
+
+diff --git a/drivers/net/tun.c b/drivers/net/tun.c
+index 6c24a9ce6c155..fae1a0ab36bdf 100644
+--- a/drivers/net/tun.c
++++ b/drivers/net/tun.c
+@@ -574,18 +574,14 @@ static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb,
+ return ret;
+ }
+
+-static inline bool tun_capable(struct tun_struct *tun)
++static inline bool tun_not_capable(struct tun_struct *tun)
+ {
+ const struct cred *cred = current_cred();
+ struct net *net = dev_net(tun->dev);
+
+- if (ns_capable(net->user_ns, CAP_NET_ADMIN))
+- return 1;
+- if (uid_valid(tun->owner) && uid_eq(cred->euid, tun->owner))
+- return 1;
+- if (gid_valid(tun->group) && in_egroup_p(tun->group))
+- return 1;
+- return 0;
++ return ((uid_valid(tun->owner) && !uid_eq(cred->euid, tun->owner)) ||
++ (gid_valid(tun->group) && !in_egroup_p(tun->group))) &&
++ !ns_capable(net->user_ns, CAP_NET_ADMIN);
+ }
+
+ static void tun_set_real_num_queues(struct tun_struct *tun)
+@@ -2782,7 +2778,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
+ !!(tun->flags & IFF_MULTI_QUEUE))
+ return -EINVAL;
+
+- if (!tun_capable(tun))
++ if (tun_not_capable(tun))
+ return -EPERM;
+ err = security_tun_dev_open(tun->security);
+ if (err < 0)
+--
+2.39.5
+
--- /dev/null
+From 8c70fdd73bebbed1d19ad85be82f8a1f2e212fac Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 31 Jan 2025 00:31:39 -0800
+Subject: udp: gso: do not drop small packets when PMTU reduces
+
+From: Yan Zhai <yan@cloudflare.com>
+
+[ Upstream commit 235174b2bed88501fda689c113c55737f99332d8 ]
+
+Commit 4094871db1d6 ("udp: only do GSO if # of segs > 1") avoided GSO
+for small packets. But the kernel currently dismisses GSO requests only
+after checking MTU/PMTU on gso_size. This means any packets, regardless
+of their payload sizes, could be dropped when PMTU becomes smaller than
+requested gso_size. We encountered this issue in production and it
+caused a reliability problem that new QUIC connection cannot be
+established before PMTU cache expired, while non GSO sockets still
+worked fine at the same time.
+
+Ideally, do not check any GSO related constraints when payload size is
+smaller than requested gso_size, and return EMSGSIZE instead of EINVAL
+on MTU/PMTU check failure to be more specific on the error cause.
+
+Fixes: 4094871db1d6 ("udp: only do GSO if # of segs > 1")
+Signed-off-by: Yan Zhai <yan@cloudflare.com>
+Suggested-by: Willem de Bruijn <willemdebruijn.kernel@gmail.com>
+Reviewed-by: Willem de Bruijn <willemb@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/udp.c | 4 ++--
+ net/ipv6/udp.c | 4 ++--
+ tools/testing/selftests/net/udpgso.c | 26 ++++++++++++++++++++++++++
+ 3 files changed, 30 insertions(+), 4 deletions(-)
+
+diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
+index d2eeb6fc49b38..8da74dc63061c 100644
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -985,9 +985,9 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4,
+ const int hlen = skb_network_header_len(skb) +
+ sizeof(struct udphdr);
+
+- if (hlen + cork->gso_size > cork->fragsize) {
++ if (hlen + min(datalen, cork->gso_size) > cork->fragsize) {
+ kfree_skb(skb);
+- return -EINVAL;
++ return -EMSGSIZE;
+ }
+ if (datalen > cork->gso_size * UDP_MAX_SEGMENTS) {
+ kfree_skb(skb);
+diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
+index 896c9c827a288..197d0ac47592a 100644
+--- a/net/ipv6/udp.c
++++ b/net/ipv6/udp.c
+@@ -1294,9 +1294,9 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6,
+ const int hlen = skb_network_header_len(skb) +
+ sizeof(struct udphdr);
+
+- if (hlen + cork->gso_size > cork->fragsize) {
++ if (hlen + min(datalen, cork->gso_size) > cork->fragsize) {
+ kfree_skb(skb);
+- return -EINVAL;
++ return -EMSGSIZE;
+ }
+ if (datalen > cork->gso_size * UDP_MAX_SEGMENTS) {
+ kfree_skb(skb);
+diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c
+index 3f2fca02fec53..36ff28af4b190 100644
+--- a/tools/testing/selftests/net/udpgso.c
++++ b/tools/testing/selftests/net/udpgso.c
+@@ -102,6 +102,19 @@ struct testcase testcases_v4[] = {
+ .gso_len = CONST_MSS_V4,
+ .r_num_mss = 1,
+ },
++ {
++ /* datalen <= MSS < gso_len: will fall back to no GSO */
++ .tlen = CONST_MSS_V4,
++ .gso_len = CONST_MSS_V4 + 1,
++ .r_num_mss = 0,
++ .r_len_last = CONST_MSS_V4,
++ },
++ {
++ /* MSS < datalen < gso_len: fail */
++ .tlen = CONST_MSS_V4 + 1,
++ .gso_len = CONST_MSS_V4 + 2,
++ .tfail = true,
++ },
+ {
+ /* send a single MSS + 1B */
+ .tlen = CONST_MSS_V4 + 1,
+@@ -205,6 +218,19 @@ struct testcase testcases_v6[] = {
+ .gso_len = CONST_MSS_V6,
+ .r_num_mss = 1,
+ },
++ {
++ /* datalen <= MSS < gso_len: will fall back to no GSO */
++ .tlen = CONST_MSS_V6,
++ .gso_len = CONST_MSS_V6 + 1,
++ .r_num_mss = 0,
++ .r_len_last = CONST_MSS_V6,
++ },
++ {
++ /* MSS < datalen < gso_len: fail */
++ .tlen = CONST_MSS_V6 + 1,
++ .gso_len = CONST_MSS_V6 + 2,
++ .tfail = true
++ },
+ {
+ /* send a single MSS + 1B */
+ .tlen = CONST_MSS_V6 + 1,
+--
+2.39.5
+
--- /dev/null
+From 259ca2ca1b2fd657fdb94df286a9c3e010801fc3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 31 Jan 2025 09:53:41 +0530
+Subject: vmxnet3: Fix tx queue race condition with XDP
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Sankararaman Jayaraman <sankararaman.jayaraman@broadcom.com>
+
+[ Upstream commit 3f1baa91a1fdf3de9dbad4bd615b35fab347874b ]
+
+If XDP traffic runs on a CPU which is greater than or equal to
+the number of the Tx queues of the NIC, then vmxnet3_xdp_get_tq()
+always picks up queue 0 for transmission as it uses reciprocal scale
+instead of simple modulo operation.
+
+vmxnet3_xdp_xmit() and vmxnet3_xdp_xmit_frame() use the above
+returned queue without any locking which can lead to race conditions
+when multiple XDP xmits run in parallel on different CPU's.
+
+This patch uses a simple module scheme when the current CPU equals or
+exceeds the number of Tx queues on the NIC. It also adds locking in
+vmxnet3_xdp_xmit() and vmxnet3_xdp_xmit_frame() functions.
+
+Fixes: 54f00cce1178 ("vmxnet3: Add XDP support.")
+Signed-off-by: Sankararaman Jayaraman <sankararaman.jayaraman@broadcom.com>
+Signed-off-by: Ronak Doshi <ronak.doshi@broadcom.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Link: https://patch.msgid.link/20250131042340.156547-1-sankararaman.jayaraman@broadcom.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/vmxnet3/vmxnet3_xdp.c | 14 ++++++++++++--
+ 1 file changed, 12 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/vmxnet3/vmxnet3_xdp.c b/drivers/net/vmxnet3/vmxnet3_xdp.c
+index 1341374a4588a..616ecc38d1726 100644
+--- a/drivers/net/vmxnet3/vmxnet3_xdp.c
++++ b/drivers/net/vmxnet3/vmxnet3_xdp.c
+@@ -28,7 +28,7 @@ vmxnet3_xdp_get_tq(struct vmxnet3_adapter *adapter)
+ if (likely(cpu < tq_number))
+ tq = &adapter->tx_queue[cpu];
+ else
+- tq = &adapter->tx_queue[reciprocal_scale(cpu, tq_number)];
++ tq = &adapter->tx_queue[cpu % tq_number];
+
+ return tq;
+ }
+@@ -124,6 +124,7 @@ vmxnet3_xdp_xmit_frame(struct vmxnet3_adapter *adapter,
+ u32 buf_size;
+ u32 dw2;
+
++ spin_lock_irq(&tq->tx_lock);
+ dw2 = (tq->tx_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
+ dw2 |= xdpf->len;
+ ctx.sop_txd = tq->tx_ring.base + tq->tx_ring.next2fill;
+@@ -134,6 +135,7 @@ vmxnet3_xdp_xmit_frame(struct vmxnet3_adapter *adapter,
+
+ if (vmxnet3_cmd_ring_desc_avail(&tq->tx_ring) == 0) {
+ tq->stats.tx_ring_full++;
++ spin_unlock_irq(&tq->tx_lock);
+ return -ENOSPC;
+ }
+
+@@ -142,8 +144,10 @@ vmxnet3_xdp_xmit_frame(struct vmxnet3_adapter *adapter,
+ tbi->dma_addr = dma_map_single(&adapter->pdev->dev,
+ xdpf->data, buf_size,
+ DMA_TO_DEVICE);
+- if (dma_mapping_error(&adapter->pdev->dev, tbi->dma_addr))
++ if (dma_mapping_error(&adapter->pdev->dev, tbi->dma_addr)) {
++ spin_unlock_irq(&tq->tx_lock);
+ return -EFAULT;
++ }
+ tbi->map_type |= VMXNET3_MAP_SINGLE;
+ } else { /* XDP buffer from page pool */
+ page = virt_to_page(xdpf->data);
+@@ -182,6 +186,7 @@ vmxnet3_xdp_xmit_frame(struct vmxnet3_adapter *adapter,
+ dma_wmb();
+ gdesc->dword[2] = cpu_to_le32(le32_to_cpu(gdesc->dword[2]) ^
+ VMXNET3_TXD_GEN);
++ spin_unlock_irq(&tq->tx_lock);
+
+ /* No need to handle the case when tx_num_deferred doesn't reach
+ * threshold. Backend driver at hypervisor side will poll and reset
+@@ -225,6 +230,7 @@ vmxnet3_xdp_xmit(struct net_device *dev,
+ {
+ struct vmxnet3_adapter *adapter = netdev_priv(dev);
+ struct vmxnet3_tx_queue *tq;
++ struct netdev_queue *nq;
+ int i;
+
+ if (unlikely(test_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state)))
+@@ -236,6 +242,9 @@ vmxnet3_xdp_xmit(struct net_device *dev,
+ if (tq->stopped)
+ return -ENETDOWN;
+
++ nq = netdev_get_tx_queue(adapter->netdev, tq->qid);
++
++ __netif_tx_lock(nq, smp_processor_id());
+ for (i = 0; i < n; i++) {
+ if (vmxnet3_xdp_xmit_frame(adapter, frames[i], tq, true)) {
+ tq->stats.xdp_xmit_err++;
+@@ -243,6 +252,7 @@ vmxnet3_xdp_xmit(struct net_device *dev,
+ }
+ }
+ tq->stats.xdp_xmit += i;
++ __netif_tx_unlock(nq);
+
+ return i;
+ }
+--
+2.39.5
+
--- /dev/null
+From 0d950c23a37d041289c301e693643e5dd0a13d59 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 5 Feb 2025 10:07:56 +0100
+Subject: x86/xen: add FRAME_END to xen_hypercall_hvm()
+
+From: Juergen Gross <jgross@suse.com>
+
+[ Upstream commit 0bd797b801bd8ee06c822844e20d73aaea0878dd ]
+
+xen_hypercall_hvm() is missing a FRAME_END at the end, add it.
+
+Reported-by: kernel test robot <lkp@intel.com>
+Closes: https://lore.kernel.org/oe-kbuild-all/202502030848.HTNTTuo9-lkp@intel.com/
+Fixes: b4845bb63838 ("x86/xen: add central hypercall functions")
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/xen/xen-head.S | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
+index ce96877c3c4fe..55978e0dc1755 100644
+--- a/arch/x86/xen/xen-head.S
++++ b/arch/x86/xen/xen-head.S
+@@ -132,6 +132,7 @@ SYM_FUNC_START(xen_hypercall_hvm)
+ pop %rcx
+ pop %rax
+ #endif
++ FRAME_END
+ /* Use correct hypercall function. */
+ jz xen_hypercall_amd
+ jmp xen_hypercall_intel
+--
+2.39.5
+
--- /dev/null
+From 92662edb9522c377f8a707fac35e2ba2cd8b98d4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 5 Feb 2025 09:43:31 +0100
+Subject: x86/xen: fix xen_hypercall_hvm() to not clobber %rbx
+
+From: Juergen Gross <jgross@suse.com>
+
+[ Upstream commit 98a5cfd2320966f40fe049a9855f8787f0126825 ]
+
+xen_hypercall_hvm(), which is used when running as a Xen PVH guest at
+most only once during early boot, is clobbering %rbx. Depending on
+whether the caller relies on %rbx to be preserved across the call or
+not, this clobbering might result in an early crash of the system.
+
+This can be avoided by using an already saved register instead of %rbx.
+
+Fixes: b4845bb63838 ("x86/xen: add central hypercall functions")
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/xen/xen-head.S | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
+index 721a57700a3b0..ce96877c3c4fe 100644
+--- a/arch/x86/xen/xen-head.S
++++ b/arch/x86/xen/xen-head.S
+@@ -117,8 +117,8 @@ SYM_FUNC_START(xen_hypercall_hvm)
+ pop %ebx
+ pop %eax
+ #else
+- lea xen_hypercall_amd(%rip), %rbx
+- cmp %rax, %rbx
++ lea xen_hypercall_amd(%rip), %rcx
++ cmp %rax, %rcx
+ #ifdef CONFIG_FRAME_POINTER
+ pop %rax /* Dummy pop. */
+ #endif
+--
+2.39.5
+