]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 6.12
authorSasha Levin <sashal@kernel.org>
Mon, 10 Feb 2025 03:58:34 +0000 (22:58 -0500)
committerSasha Levin <sashal@kernel.org>
Mon, 10 Feb 2025 03:58:34 +0000 (22:58 -0500)
Signed-off-by: Sasha Levin <sashal@kernel.org>
30 files changed:
queue-6.12/acpi-property-fix-return-value-for-nval-0-in-acpi_da.patch [new file with mode: 0644]
queue-6.12/drm-i915-dp-fix-the-adaptive-sync-operation-mode-for.patch [new file with mode: 0644]
queue-6.12/ethtool-rss-fix-hiding-unsupported-fields-in-dumps.patch [new file with mode: 0644]
queue-6.12/firmware-iscsi_ibft-fix-iscsi_ibft-kconfig-entry.patch [new file with mode: 0644]
queue-6.12/gpio-pca953x-improve-interrupt-support.patch [new file with mode: 0644]
queue-6.12/gpio-sim-lock-hog-configfs-items-if-present.patch [new file with mode: 0644]
queue-6.12/gpu-drm_dp_cec-fix-broken-cec-adapter-properties-che.patch [new file with mode: 0644]
queue-6.12/ice-add-check-for-devm_kzalloc.patch [new file with mode: 0644]
queue-6.12/ice-gather-page_count-s-of-each-frag-right-before-xd.patch [new file with mode: 0644]
queue-6.12/ice-put-rx-buffers-after-being-done-with-current-fra.patch [new file with mode: 0644]
queue-6.12/ice-stop-storing-xdp-verdict-within-ice_rx_buf.patch [new file with mode: 0644]
queue-6.12/net-atlantic-fix-warning-during-hot-unplug.patch [new file with mode: 0644]
queue-6.12/net-bcmgenet-correct-overlaying-of-phy-and-mac-wake-.patch [new file with mode: 0644]
queue-6.12/net-rose-lock-the-socket-in-rose_bind.patch [new file with mode: 0644]
queue-6.12/net-sched-fix-truncation-of-offloaded-action-statist.patch [new file with mode: 0644]
queue-6.12/netem-update-sch-q.qlen-before-qdisc_tree_reduce_bac.patch [new file with mode: 0644]
queue-6.12/nvme-fc-use-ctrl-state-getter.patch [new file with mode: 0644]
queue-6.12/nvme-handle-connectivity-loss-in-nvme_set_queue_coun.patch [new file with mode: 0644]
queue-6.12/nvme-make-nvme_tls_attrs_group-static.patch [new file with mode: 0644]
queue-6.12/pfifo_tail_enqueue-drop-new-packet-when-sch-limit-0.patch [new file with mode: 0644]
queue-6.12/rxrpc-fix-call-state-set-to-not-include-the-server_s.patch [new file with mode: 0644]
queue-6.12/rxrpc-fix-the-rxrpc_connection-attend-queue-handling.patch [new file with mode: 0644]
queue-6.12/sched-fair-fix-inaccurate-h_nr_runnable-accounting-w.patch [new file with mode: 0644]
queue-6.12/series
queue-6.12/tg3-disable-tg3-pcie-aer-on-system-reboot.patch [new file with mode: 0644]
queue-6.12/tun-revert-fix-group-permission-check.patch [new file with mode: 0644]
queue-6.12/udp-gso-do-not-drop-small-packets-when-pmtu-reduces.patch [new file with mode: 0644]
queue-6.12/vmxnet3-fix-tx-queue-race-condition-with-xdp.patch [new file with mode: 0644]
queue-6.12/x86-xen-add-frame_end-to-xen_hypercall_hvm.patch [new file with mode: 0644]
queue-6.12/x86-xen-fix-xen_hypercall_hvm-to-not-clobber-rbx.patch [new file with mode: 0644]

diff --git a/queue-6.12/acpi-property-fix-return-value-for-nval-0-in-acpi_da.patch b/queue-6.12/acpi-property-fix-return-value-for-nval-0-in-acpi_da.patch
new file mode 100644 (file)
index 0000000..4d8651b
--- /dev/null
@@ -0,0 +1,63 @@
+From 12d6b0c64a6edd2d782074a926a63be1bdcd8330 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 3 Feb 2025 21:46:29 +0200
+Subject: ACPI: property: Fix return value for nval == 0 in
+ acpi_data_prop_read()
+
+From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+
+[ Upstream commit ab930483eca9f3e816c35824b5868599af0c61d7 ]
+
+While analysing code for software and OF node for the corner case when
+caller asks to read zero items in the supposed to be an array of values
+I found that ACPI behaves differently to what OF does, i.e.
+
+ 1. It returns -EINVAL when caller asks to read zero items from integer
+    array, while OF returns 0, if no other errors happened.
+
+ 2. It returns -EINVAL when caller asks to read zero items from string
+    array, while OF returns -ENODATA, if no other errors happened.
+
+Amend ACPI implementation to follow what OF does.
+
+Fixes: b31384fa5de3 ("Driver core: Unified device properties interface for platform firmware")
+Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Link: https://patch.msgid.link/20250203194629.3731895-1-andriy.shevchenko@linux.intel.com
+[ rjw: Added empty line after a conditional ]
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/acpi/property.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c
+index 80a52a4e66dd1..e9186339f6e6b 100644
+--- a/drivers/acpi/property.c
++++ b/drivers/acpi/property.c
+@@ -1187,8 +1187,6 @@ static int acpi_data_prop_read(const struct acpi_device_data *data,
+               }
+               break;
+       }
+-      if (nval == 0)
+-              return -EINVAL;
+       if (obj->type == ACPI_TYPE_BUFFER) {
+               if (proptype != DEV_PROP_U8)
+@@ -1212,9 +1210,11 @@ static int acpi_data_prop_read(const struct acpi_device_data *data,
+               ret = acpi_copy_property_array_uint(items, (u64 *)val, nval);
+               break;
+       case DEV_PROP_STRING:
+-              ret = acpi_copy_property_array_string(
+-                      items, (char **)val,
+-                      min_t(u32, nval, obj->package.count));
++              nval = min_t(u32, nval, obj->package.count);
++              if (nval == 0)
++                      return -ENODATA;
++
++              ret = acpi_copy_property_array_string(items, (char **)val, nval);
+               break;
+       default:
+               ret = -EINVAL;
+-- 
+2.39.5
+
diff --git a/queue-6.12/drm-i915-dp-fix-the-adaptive-sync-operation-mode-for.patch b/queue-6.12/drm-i915-dp-fix-the-adaptive-sync-operation-mode-for.patch
new file mode 100644 (file)
index 0000000..13acfbb
--- /dev/null
@@ -0,0 +1,52 @@
+From c2990ac2b24b3c5afc53c58d2cbf9896d5c1c95d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 30 Jan 2025 10:46:06 +0530
+Subject: drm/i915/dp: fix the Adaptive sync Operation mode for SDP
+
+From: Ankit Nautiyal <ankit.k.nautiyal@intel.com>
+
+[ Upstream commit 4466302262b38f5e6c65325035b4036a42efc934 ]
+
+Currently we support Adaptive sync operation mode with dynamic frame
+rate, but instead the operation mode with fixed rate is set.
+This was initially set correctly in the earlier version of changes but
+later got changed, while defining a macro for the same.
+
+Fixes: a5bd5991cb8a ("drm/i915/display: Compute AS SDP parameters")
+Cc: Mitul Golani <mitulkumar.ajitkumar.golani@intel.com>
+Cc: Ankit Nautiyal <ankit.k.nautiyal@intel.com>
+Cc: Jani Nikula <jani.nikula@linux.intel.com>
+Reviewed-by: Mitul Golani <mitulkumar.ajitkumar.golani@intel.com>
+Signed-off-by: Ankit Nautiyal <ankit.k.nautiyal@intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20250130051609.1796524-4-mitulkumar.ajitkumar.golani@intel.com
+(cherry picked from commit c5806862543ff6c2ad242409fcdf0667eac26dae)
+Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/i915/display/intel_dp.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
+index 90fa73575feb1..7befd260f5949 100644
+--- a/drivers/gpu/drm/i915/display/intel_dp.c
++++ b/drivers/gpu/drm/i915/display/intel_dp.c
+@@ -2738,7 +2738,6 @@ static void intel_dp_compute_as_sdp(struct intel_dp *intel_dp,
+       crtc_state->infoframes.enable |= intel_hdmi_infoframe_enable(DP_SDP_ADAPTIVE_SYNC);
+-      /* Currently only DP_AS_SDP_AVT_FIXED_VTOTAL mode supported */
+       as_sdp->sdp_type = DP_SDP_ADAPTIVE_SYNC;
+       as_sdp->length = 0x9;
+       as_sdp->duration_incr_ms = 0;
+@@ -2750,7 +2749,7 @@ static void intel_dp_compute_as_sdp(struct intel_dp *intel_dp,
+               as_sdp->target_rr = drm_mode_vrefresh(adjusted_mode);
+               as_sdp->target_rr_divider = true;
+       } else {
+-              as_sdp->mode = DP_AS_SDP_AVT_FIXED_VTOTAL;
++              as_sdp->mode = DP_AS_SDP_AVT_DYNAMIC_VTOTAL;
+               as_sdp->vtotal = adjusted_mode->vtotal;
+               as_sdp->target_rr = 0;
+       }
+-- 
+2.39.5
+
diff --git a/queue-6.12/ethtool-rss-fix-hiding-unsupported-fields-in-dumps.patch b/queue-6.12/ethtool-rss-fix-hiding-unsupported-fields-in-dumps.patch
new file mode 100644 (file)
index 0000000..b0a7bcd
--- /dev/null
@@ -0,0 +1,81 @@
+From 83cb8c5c6fff22d00a7c7ec51ad0092a6dd4301e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 31 Jan 2025 17:30:37 -0800
+Subject: ethtool: rss: fix hiding unsupported fields in dumps
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+[ Upstream commit 244f8aa46fa9e2f4ea5fe0e04988b395d5e30fc7 ]
+
+Commit ec6e57beaf8b ("ethtool: rss: don't report key if device
+doesn't support it") intended to stop reporting key fields for
+additional rss contexts if device has a global hashing key.
+
+Later we added dump support and the filtering wasn't properly
+added there. So we end up reporting the key fields in dumps
+but not in dos:
+
+  # ./pyynl/cli.py --spec netlink/specs/ethtool.yaml --do rss-get \
+               --json '{"header": {"dev-index":2}, "context": 1 }'
+  {
+     "header": { ... },
+     "context": 1,
+     "indir": [0, 1, 2, 3, ...]]
+  }
+
+  # ./pyynl/cli.py --spec netlink/specs/ethtool.yaml --dump rss-get
+  [
+     ... snip context 0 ...
+     { "header": { ... },
+       "context": 1,
+       "indir": [0, 1, 2, 3, ...],
+ ->    "input_xfrm": 255,
+ ->    "hfunc": 1,
+ ->    "hkey": "000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000"
+     }
+  ]
+
+Hide these fields correctly.
+
+The drivers/net/hw/rss_ctx.py selftest catches this when run on
+a device with single key, already:
+
+  # Check| At /root/./ksft-net-drv/drivers/net/hw/rss_ctx.py, line 381, in test_rss_context_dump:
+  # Check|     ksft_ne(set(data.get('hkey', [1])), {0}, "key is all zero")
+  # Check failed {0} == {0} key is all zero
+  not ok 8 rss_ctx.test_rss_context_dump
+
+Fixes: f6122900f4e2 ("ethtool: rss: support dumping RSS contexts")
+Reviewed-by: Gal Pressman <gal@nvidia.com>
+Reviewed-by: Joe Damato <jdamato@fastly.com>
+Link: https://patch.msgid.link/20250201013040.725123-2-kuba@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ethtool/rss.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/net/ethtool/rss.c b/net/ethtool/rss.c
+index e07386275e142..8aa45f3fdfdf0 100644
+--- a/net/ethtool/rss.c
++++ b/net/ethtool/rss.c
+@@ -107,6 +107,8 @@ rss_prepare_ctx(const struct rss_req_info *request, struct net_device *dev,
+       u32 total_size, indir_bytes;
+       u8 *rss_config;
++      data->no_key_fields = !dev->ethtool_ops->rxfh_per_ctx_key;
++
+       ctx = xa_load(&dev->ethtool->rss_ctx, request->rss_context);
+       if (!ctx)
+               return -ENOENT;
+@@ -153,7 +155,6 @@ rss_prepare_data(const struct ethnl_req_info *req_base,
+               if (!ops->cap_rss_ctx_supported && !ops->create_rxfh_context)
+                       return -EOPNOTSUPP;
+-              data->no_key_fields = !ops->rxfh_per_ctx_key;
+               return rss_prepare_ctx(request, dev, data, info);
+       }
+-- 
+2.39.5
+
diff --git a/queue-6.12/firmware-iscsi_ibft-fix-iscsi_ibft-kconfig-entry.patch b/queue-6.12/firmware-iscsi_ibft-fix-iscsi_ibft-kconfig-entry.patch
new file mode 100644 (file)
index 0000000..0e6c76b
--- /dev/null
@@ -0,0 +1,35 @@
+From 3bd3c99764473753848647844d0bbddebdb9aa60 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 11 Mar 2024 16:21:22 +0530
+Subject: firmware: iscsi_ibft: fix ISCSI_IBFT Kconfig entry
+
+From: Prasad Pandit <pjp@fedoraproject.org>
+
+[ Upstream commit e1e17a1715982201034024863efbf238bee2bdf9 ]
+
+Fix ISCSI_IBFT Kconfig entry, replace tab with a space character.
+
+Fixes: 138fe4e0697 ("Firmware: add iSCSI iBFT Support")
+Signed-off-by: Prasad Pandit <pjp@fedoraproject.org>
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/firmware/Kconfig | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig
+index 71d8b26c4103b..9f35f69e0f9e2 100644
+--- a/drivers/firmware/Kconfig
++++ b/drivers/firmware/Kconfig
+@@ -106,7 +106,7 @@ config ISCSI_IBFT
+       select ISCSI_BOOT_SYSFS
+       select ISCSI_IBFT_FIND if X86
+       depends on ACPI && SCSI && SCSI_LOWLEVEL
+-      default n
++      default n
+       help
+         This option enables support for detection and exposing of iSCSI
+         Boot Firmware Table (iBFT) via sysfs to userspace. If you wish to
+-- 
+2.39.5
+
diff --git a/queue-6.12/gpio-pca953x-improve-interrupt-support.patch b/queue-6.12/gpio-pca953x-improve-interrupt-support.patch
new file mode 100644 (file)
index 0000000..077034b
--- /dev/null
@@ -0,0 +1,68 @@
+From ccbbac2cec99a5200d09bfecdac89cb68000d953 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 6 Jun 2024 15:31:02 +1200
+Subject: gpio: pca953x: Improve interrupt support
+
+From: Mark Tomlinson <mark.tomlinson@alliedtelesis.co.nz>
+
+[ Upstream commit d6179f6c6204f9932aed3a7a2100b4a295dfed9d ]
+
+The GPIO drivers with latch interrupt support (typically types starting
+with PCAL) have interrupt status registers to determine which particular
+inputs have caused an interrupt. Unfortunately there is no atomic
+operation to read these registers and clear the interrupt. Clearing the
+interrupt is done by reading the input registers.
+
+The code was reading the interrupt status registers, and then reading
+the input registers. If an input changed between these two events it was
+lost.
+
+The solution in this patch is to revert to the non-latch version of
+code, i.e. remembering the previous input status, and looking for the
+changes. This system results in no more I2C transfers, so is no slower.
+The latch property of the device still means interrupts will still be
+noticed if the input changes back to its initial state.
+
+Fixes: 44896beae605 ("gpio: pca953x: add PCAL9535 interrupt support for Galileo Gen2")
+Signed-off-by: Mark Tomlinson <mark.tomlinson@alliedtelesis.co.nz>
+Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Link: https://lore.kernel.org/r/20240606033102.2271916-1-mark.tomlinson@alliedtelesis.co.nz
+Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpio/gpio-pca953x.c | 19 -------------------
+ 1 file changed, 19 deletions(-)
+
+diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c
+index e49802f26e07f..d764a3af63467 100644
+--- a/drivers/gpio/gpio-pca953x.c
++++ b/drivers/gpio/gpio-pca953x.c
+@@ -841,25 +841,6 @@ static bool pca953x_irq_pending(struct pca953x_chip *chip, unsigned long *pendin
+       DECLARE_BITMAP(trigger, MAX_LINE);
+       int ret;
+-      if (chip->driver_data & PCA_PCAL) {
+-              /* Read the current interrupt status from the device */
+-              ret = pca953x_read_regs(chip, PCAL953X_INT_STAT, trigger);
+-              if (ret)
+-                      return false;
+-
+-              /* Check latched inputs and clear interrupt status */
+-              ret = pca953x_read_regs(chip, chip->regs->input, cur_stat);
+-              if (ret)
+-                      return false;
+-
+-              /* Apply filter for rising/falling edge selection */
+-              bitmap_replace(new_stat, chip->irq_trig_fall, chip->irq_trig_raise, cur_stat, gc->ngpio);
+-
+-              bitmap_and(pending, new_stat, trigger, gc->ngpio);
+-
+-              return !bitmap_empty(pending, gc->ngpio);
+-      }
+-
+       ret = pca953x_read_regs(chip, chip->regs->input, cur_stat);
+       if (ret)
+               return false;
+-- 
+2.39.5
+
diff --git a/queue-6.12/gpio-sim-lock-hog-configfs-items-if-present.patch b/queue-6.12/gpio-sim-lock-hog-configfs-items-if-present.patch
new file mode 100644 (file)
index 0000000..11772ee
--- /dev/null
@@ -0,0 +1,57 @@
+From cb74157dc98c075c5bb905ad002c7f6d7424423a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 3 Feb 2025 12:01:23 +0100
+Subject: gpio: sim: lock hog configfs items if present
+
+From: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
+
+[ Upstream commit 015b7dae084fa95465ff89f6cbf15fe49906a370 ]
+
+Depending on the user config, the leaf entry may be the hog directory,
+not line. Check it and lock the correct item.
+
+Fixes: 8bd76b3d3f3a ("gpio: sim: lock up configfs that an instantiated device depends on")
+Tested-by: Koichiro Den <koichiro.den@canonical.com>
+Link: https://lore.kernel.org/r/20250203110123.87701-1-brgl@bgdev.pl
+Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpio/gpio-sim.c | 13 ++++++++-----
+ 1 file changed, 8 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/gpio/gpio-sim.c b/drivers/gpio/gpio-sim.c
+index deedacdeb2395..f83a8b5a51d0d 100644
+--- a/drivers/gpio/gpio-sim.c
++++ b/drivers/gpio/gpio-sim.c
+@@ -1036,20 +1036,23 @@ gpio_sim_device_lockup_configfs(struct gpio_sim_device *dev, bool lock)
+       struct configfs_subsystem *subsys = dev->group.cg_subsys;
+       struct gpio_sim_bank *bank;
+       struct gpio_sim_line *line;
++      struct config_item *item;
+       /*
+-       * The device only needs to depend on leaf line entries. This is
++       * The device only needs to depend on leaf entries. This is
+        * sufficient to lock up all the configfs entries that the
+        * instantiated, alive device depends on.
+        */
+       list_for_each_entry(bank, &dev->bank_list, siblings) {
+               list_for_each_entry(line, &bank->line_list, siblings) {
++                      item = line->hog ? &line->hog->item
++                                       : &line->group.cg_item;
++
+                       if (lock)
+-                              WARN_ON(configfs_depend_item_unlocked(
+-                                              subsys, &line->group.cg_item));
++                              WARN_ON(configfs_depend_item_unlocked(subsys,
++                                                                    item));
+                       else
+-                              configfs_undepend_item_unlocked(
+-                                              &line->group.cg_item);
++                              configfs_undepend_item_unlocked(item);
+               }
+       }
+ }
+-- 
+2.39.5
+
diff --git a/queue-6.12/gpu-drm_dp_cec-fix-broken-cec-adapter-properties-che.patch b/queue-6.12/gpu-drm_dp_cec-fix-broken-cec-adapter-properties-che.patch
new file mode 100644 (file)
index 0000000..0040d16
--- /dev/null
@@ -0,0 +1,90 @@
+From 091a2f1a43f99622cea65514897c5344445f6b94 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 29 Jan 2025 10:51:48 +0100
+Subject: gpu: drm_dp_cec: fix broken CEC adapter properties check
+
+From: Hans Verkuil <hverkuil@xs4all.nl>
+
+[ Upstream commit 6daaae5ff7f3b23a2dacc9c387ff3d4f95b67cad ]
+
+If the hotplug detect of a display is low for longer than one second
+(configurable through drm_dp_cec_unregister_delay), then the CEC adapter
+is unregistered since we assume the display was disconnected. If the
+HPD went low for less than one second, then we check if the properties
+of the CEC adapter have changed, since that indicates that we actually
+switch to new hardware and we have to unregister the old CEC device and
+register a new one.
+
+Unfortunately, the test for changed properties was written poorly, and
+after a new CEC capability was added to the CEC core code the test always
+returned true (i.e. the properties had changed).
+
+As a result the CEC device was unregistered and re-registered for every
+HPD toggle. If the CEC remote controller integration was also enabled
+(CONFIG_MEDIA_CEC_RC was set), then the corresponding input device was
+also unregistered and re-registered. As a result the input device in
+/sys would keep incrementing its number, e.g.:
+
+/sys/devices/pci0000:00/0000:00:08.1/0000:e7:00.0/rc/rc0/input20
+
+Since short HPD toggles are common, the number could over time get into
+the thousands.
+
+While not a serious issue (i.e. nothing crashes), it is not intended
+to work that way.
+
+This patch changes the test so that it only checks for the single CEC
+capability that can actually change, and it ignores any other
+capabilities, so this is now safe as well if new caps are added in
+the future.
+
+With the changed test the bit under #ifndef CONFIG_MEDIA_CEC_RC can be
+dropped as well, so that's a nice cleanup.
+
+Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
+Reported-by: Farblos <farblos@vodafonemail.de>
+Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
+Fixes: 2c6d1fffa1d9 ("drm: add support for DisplayPort CEC-Tunneling-over-AUX")
+Tested-by: Farblos <farblos@vodafonemail.de>
+Link: https://patchwork.freedesktop.org/patch/msgid/361bb03d-1691-4e23-84da-0861ead5dbdc@xs4all.nl
+Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/display/drm_dp_cec.c | 14 +++-----------
+ 1 file changed, 3 insertions(+), 11 deletions(-)
+
+diff --git a/drivers/gpu/drm/display/drm_dp_cec.c b/drivers/gpu/drm/display/drm_dp_cec.c
+index 007ceb281d00d..56a4965e518cc 100644
+--- a/drivers/gpu/drm/display/drm_dp_cec.c
++++ b/drivers/gpu/drm/display/drm_dp_cec.c
+@@ -311,16 +311,6 @@ void drm_dp_cec_attach(struct drm_dp_aux *aux, u16 source_physical_address)
+       if (!aux->transfer)
+               return;
+-#ifndef CONFIG_MEDIA_CEC_RC
+-      /*
+-       * CEC_CAP_RC is part of CEC_CAP_DEFAULTS, but it is stripped by
+-       * cec_allocate_adapter() if CONFIG_MEDIA_CEC_RC is undefined.
+-       *
+-       * Do this here as well to ensure the tests against cec_caps are
+-       * correct.
+-       */
+-      cec_caps &= ~CEC_CAP_RC;
+-#endif
+       cancel_delayed_work_sync(&aux->cec.unregister_work);
+       mutex_lock(&aux->cec.lock);
+@@ -337,7 +327,9 @@ void drm_dp_cec_attach(struct drm_dp_aux *aux, u16 source_physical_address)
+               num_las = CEC_MAX_LOG_ADDRS;
+       if (aux->cec.adap) {
+-              if (aux->cec.adap->capabilities == cec_caps &&
++              /* Check if the adapter properties have changed */
++              if ((aux->cec.adap->capabilities & CEC_CAP_MONITOR_ALL) ==
++                  (cec_caps & CEC_CAP_MONITOR_ALL) &&
+                   aux->cec.adap->available_log_addrs == num_las) {
+                       /* Unchanged, so just set the phys addr */
+                       cec_s_phys_addr(aux->cec.adap, source_physical_address, false);
+-- 
+2.39.5
+
diff --git a/queue-6.12/ice-add-check-for-devm_kzalloc.patch b/queue-6.12/ice-add-check-for-devm_kzalloc.patch
new file mode 100644 (file)
index 0000000..a2c1072
--- /dev/null
@@ -0,0 +1,39 @@
+From c36a194ff0b1e72ae274dc25dd266c3ea4b4dcec Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 31 Jan 2025 01:38:32 +0000
+Subject: ice: Add check for devm_kzalloc()
+
+From: Jiasheng Jiang <jiashengjiangcool@gmail.com>
+
+[ Upstream commit a8aa6a6ddce9b5585f2b74f27f3feea1427fb4e7 ]
+
+Add check for the return value of devm_kzalloc() to guarantee the success
+of allocation.
+
+Fixes: 42c2eb6b1f43 ("ice: Implement devlink-rate API")
+Signed-off-by: Jiasheng Jiang <jiashengjiangcool@gmail.com>
+Reviewed-by: Michal Swiatkowski <michal.swiatkowski@linux.intel.com>
+Link: https://patch.msgid.link/20250131013832.24805-1-jiashengjiangcool@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/ice/devlink/devlink.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/drivers/net/ethernet/intel/ice/devlink/devlink.c b/drivers/net/ethernet/intel/ice/devlink/devlink.c
+index 415445cefdb2a..b1efd287b3309 100644
+--- a/drivers/net/ethernet/intel/ice/devlink/devlink.c
++++ b/drivers/net/ethernet/intel/ice/devlink/devlink.c
+@@ -977,6 +977,9 @@ static int ice_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv
+       /* preallocate memory for ice_sched_node */
+       node = devm_kzalloc(ice_hw_to_dev(pi->hw), sizeof(*node), GFP_KERNEL);
++      if (!node)
++              return -ENOMEM;
++
+       *priv = node;
+       return 0;
+-- 
+2.39.5
+
diff --git a/queue-6.12/ice-gather-page_count-s-of-each-frag-right-before-xd.patch b/queue-6.12/ice-gather-page_count-s-of-each-frag-right-before-xd.patch
new file mode 100644 (file)
index 0000000..7c34d37
--- /dev/null
@@ -0,0 +1,96 @@
+From 7ca9856191d44cd7902508d34512628529635415 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 23 Jan 2025 16:01:17 +0100
+Subject: ice: gather page_count()'s of each frag right before XDP prog call
+
+From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
+
+[ Upstream commit 11c4aa074d547d825b19cd8d9f288254d89d805c ]
+
+If we store the pgcnt on few fragments while being in the middle of
+gathering the whole frame and we stumbled upon DD bit not being set, we
+terminate the NAPI Rx processing loop and come back later on. Then on
+next NAPI execution we work on previously stored pgcnt.
+
+Imagine that second half of page was used actively by networking stack
+and by the time we came back, stack is not busy with this page anymore
+and decremented the refcnt. The page reuse algorithm in this case should
+be good to reuse the page but given the old refcnt it will not do so and
+attempt to release the page via page_frag_cache_drain() with
+pagecnt_bias used as an arg. This in turn will result in negative refcnt
+on struct page, which was initially observed by Xu Du.
+
+Therefore, move the page count storage from ice_get_rx_buf() to a place
+where we are sure that whole frame has been collected, but before
+calling XDP program as it internally can also change the page count of
+fragments belonging to xdp_buff.
+
+Fixes: ac0753391195 ("ice: Store page count inside ice_rx_buf")
+Reported-and-tested-by: Xu Du <xudu@redhat.com>
+Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Co-developed-by: Jacob Keller <jacob.e.keller@intel.com>
+Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
+Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
+Tested-by: Chandan Kumar Rout <chandanx.rout@intel.com> (A Contingent Worker at Intel)
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/ice/ice_txrx.c | 27 ++++++++++++++++++++++-
+ 1 file changed, 26 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
+index 4660e2302e2ae..e2150d2c093bf 100644
+--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
++++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
+@@ -924,7 +924,6 @@ ice_get_rx_buf(struct ice_rx_ring *rx_ring, const unsigned int size,
+       struct ice_rx_buf *rx_buf;
+       rx_buf = &rx_ring->rx_buf[ntc];
+-      rx_buf->pgcnt = page_count(rx_buf->page);
+       prefetchw(rx_buf->page);
+       if (!size)
+@@ -940,6 +939,31 @@ ice_get_rx_buf(struct ice_rx_ring *rx_ring, const unsigned int size,
+       return rx_buf;
+ }
++/**
++ * ice_get_pgcnts - grab page_count() for gathered fragments
++ * @rx_ring: Rx descriptor ring to store the page counts on
++ *
++ * This function is intended to be called right before running XDP
++ * program so that the page recycling mechanism will be able to take
++ * a correct decision regarding underlying pages; this is done in such
++ * way as XDP program can change the refcount of page
++ */
++static void ice_get_pgcnts(struct ice_rx_ring *rx_ring)
++{
++      u32 nr_frags = rx_ring->nr_frags + 1;
++      u32 idx = rx_ring->first_desc;
++      struct ice_rx_buf *rx_buf;
++      u32 cnt = rx_ring->count;
++
++      for (int i = 0; i < nr_frags; i++) {
++              rx_buf = &rx_ring->rx_buf[idx];
++              rx_buf->pgcnt = page_count(rx_buf->page);
++
++              if (++idx == cnt)
++                      idx = 0;
++      }
++}
++
+ /**
+  * ice_build_skb - Build skb around an existing buffer
+  * @rx_ring: Rx descriptor ring to transact packets on
+@@ -1241,6 +1265,7 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
+               if (ice_is_non_eop(rx_ring, rx_desc))
+                       continue;
++              ice_get_pgcnts(rx_ring);
+               ice_run_xdp(rx_ring, xdp, xdp_prog, xdp_ring, rx_buf, rx_desc);
+               if (rx_buf->act == ICE_XDP_PASS)
+                       goto construct_skb;
+-- 
+2.39.5
+
diff --git a/queue-6.12/ice-put-rx-buffers-after-being-done-with-current-fra.patch b/queue-6.12/ice-put-rx-buffers-after-being-done-with-current-fra.patch
new file mode 100644 (file)
index 0000000..4cdb5d6
--- /dev/null
@@ -0,0 +1,180 @@
+From abde409aab46b7bca81dee8543007d679c2cff61 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 23 Jan 2025 16:01:16 +0100
+Subject: ice: put Rx buffers after being done with current frame
+
+From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
+
+[ Upstream commit 743bbd93cf29f653fae0e1416a31f03231689911 ]
+
+Introduce a new helper ice_put_rx_mbuf() that will go through gathered
+frags from current frame and will call ice_put_rx_buf() on them. Current
+logic that was supposed to simplify and optimize the driver where we go
+through a batch of all buffers processed in current NAPI instance turned
+out to be broken for jumbo frames and very heavy load that was coming
+from both multi-thread iperf and nginx/wrk pair between server and
+client. The delay introduced by approach that we are dropping is simply
+too big and we need to take the decision regarding page
+recycling/releasing as quick as we can.
+
+While at it, address an error path of ice_add_xdp_frag() - we were
+missing buffer putting from day 1 there.
+
+As a nice side effect we get rid of annoying and repetitive three-liner:
+
+       xdp->data = NULL;
+       rx_ring->first_desc = ntc;
+       rx_ring->nr_frags = 0;
+
+by embedding it within introduced routine.
+
+Fixes: 1dc1a7e7f410 ("ice: Centrallize Rx buffer recycling")
+Reported-and-tested-by: Xu Du <xudu@redhat.com>
+Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Co-developed-by: Jacob Keller <jacob.e.keller@intel.com>
+Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
+Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
+Tested-by: Chandan Kumar Rout <chandanx.rout@intel.com> (A Contingent Worker at Intel)
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/ice/ice_txrx.c | 79 ++++++++++++++---------
+ 1 file changed, 50 insertions(+), 29 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
+index 8208055d6e7fc..4660e2302e2ae 100644
+--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
++++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
+@@ -1103,6 +1103,49 @@ ice_put_rx_buf(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf)
+       rx_buf->page = NULL;
+ }
++/**
++ * ice_put_rx_mbuf - ice_put_rx_buf() caller, for all frame frags
++ * @rx_ring: Rx ring with all the auxiliary data
++ * @xdp: XDP buffer carrying linear + frags part
++ * @xdp_xmit: XDP_TX/XDP_REDIRECT verdict storage
++ * @ntc: a current next_to_clean value to be stored at rx_ring
++ *
++ * Walk through gathered fragments and satisfy internal page
++ * recycle mechanism; we take here an action related to verdict
++ * returned by XDP program;
++ */
++static void ice_put_rx_mbuf(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
++                          u32 *xdp_xmit, u32 ntc)
++{
++      u32 nr_frags = rx_ring->nr_frags + 1;
++      u32 idx = rx_ring->first_desc;
++      u32 cnt = rx_ring->count;
++      struct ice_rx_buf *buf;
++      int i;
++
++      for (i = 0; i < nr_frags; i++) {
++              buf = &rx_ring->rx_buf[idx];
++
++              if (buf->act & (ICE_XDP_TX | ICE_XDP_REDIR)) {
++                      ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz);
++                      *xdp_xmit |= buf->act;
++              } else if (buf->act & ICE_XDP_CONSUMED) {
++                      buf->pagecnt_bias++;
++              } else if (buf->act == ICE_XDP_PASS) {
++                      ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz);
++              }
++
++              ice_put_rx_buf(rx_ring, buf);
++
++              if (++idx == cnt)
++                      idx = 0;
++      }
++
++      xdp->data = NULL;
++      rx_ring->first_desc = ntc;
++      rx_ring->nr_frags = 0;
++}
++
+ /**
+  * ice_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf
+  * @rx_ring: Rx descriptor ring to transact packets on
+@@ -1120,7 +1163,6 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
+       unsigned int total_rx_bytes = 0, total_rx_pkts = 0;
+       unsigned int offset = rx_ring->rx_offset;
+       struct xdp_buff *xdp = &rx_ring->xdp;
+-      u32 cached_ntc = rx_ring->first_desc;
+       struct ice_tx_ring *xdp_ring = NULL;
+       struct bpf_prog *xdp_prog = NULL;
+       u32 ntc = rx_ring->next_to_clean;
+@@ -1128,7 +1170,6 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
+       u32 xdp_xmit = 0;
+       u32 cached_ntu;
+       bool failure;
+-      u32 first;
+       xdp_prog = READ_ONCE(rx_ring->xdp_prog);
+       if (xdp_prog) {
+@@ -1190,6 +1231,7 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
+                       xdp_prepare_buff(xdp, hard_start, offset, size, !!offset);
+                       xdp_buff_clear_frags_flag(xdp);
+               } else if (ice_add_xdp_frag(rx_ring, xdp, rx_buf, size)) {
++                      ice_put_rx_mbuf(rx_ring, xdp, NULL, ntc);
+                       break;
+               }
+               if (++ntc == cnt)
+@@ -1205,9 +1247,8 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
+               total_rx_bytes += xdp_get_buff_len(xdp);
+               total_rx_pkts++;
+-              xdp->data = NULL;
+-              rx_ring->first_desc = ntc;
+-              rx_ring->nr_frags = 0;
++              ice_put_rx_mbuf(rx_ring, xdp, &xdp_xmit, ntc);
++
+               continue;
+ construct_skb:
+               if (likely(ice_ring_uses_build_skb(rx_ring)))
+@@ -1221,14 +1262,11 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
+                       if (unlikely(xdp_buff_has_frags(xdp)))
+                               ice_set_rx_bufs_act(xdp, rx_ring,
+                                                   ICE_XDP_CONSUMED);
+-                      xdp->data = NULL;
+-                      rx_ring->first_desc = ntc;
+-                      rx_ring->nr_frags = 0;
+-                      break;
+               }
+-              xdp->data = NULL;
+-              rx_ring->first_desc = ntc;
+-              rx_ring->nr_frags = 0;
++              ice_put_rx_mbuf(rx_ring, xdp, &xdp_xmit, ntc);
++
++              if (!skb)
++                      break;
+               stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_RXE_S);
+               if (unlikely(ice_test_staterr(rx_desc->wb.status_error0,
+@@ -1257,23 +1295,6 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
+               total_rx_pkts++;
+       }
+-      first = rx_ring->first_desc;
+-      while (cached_ntc != first) {
+-              struct ice_rx_buf *buf = &rx_ring->rx_buf[cached_ntc];
+-
+-              if (buf->act & (ICE_XDP_TX | ICE_XDP_REDIR)) {
+-                      ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz);
+-                      xdp_xmit |= buf->act;
+-              } else if (buf->act & ICE_XDP_CONSUMED) {
+-                      buf->pagecnt_bias++;
+-              } else if (buf->act == ICE_XDP_PASS) {
+-                      ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz);
+-              }
+-
+-              ice_put_rx_buf(rx_ring, buf);
+-              if (++cached_ntc >= cnt)
+-                      cached_ntc = 0;
+-      }
+       rx_ring->next_to_clean = ntc;
+       /* return up to cleaned_count buffers to hardware */
+       failure = ice_alloc_rx_bufs(rx_ring, ICE_RX_DESC_UNUSED(rx_ring));
+-- 
+2.39.5
+
diff --git a/queue-6.12/ice-stop-storing-xdp-verdict-within-ice_rx_buf.patch b/queue-6.12/ice-stop-storing-xdp-verdict-within-ice_rx_buf.patch
new file mode 100644 (file)
index 0000000..50c5ee7
--- /dev/null
@@ -0,0 +1,273 @@
+From fb3aca8dee1b56a69e784eaacd7f728159dcd4e8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 23 Jan 2025 16:01:18 +0100
+Subject: ice: stop storing XDP verdict within ice_rx_buf
+
+From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
+
+[ Upstream commit 468a1952df78f65c5991b7ac885c8b5b7dd87bab ]
+
+Idea behind having ice_rx_buf::act was to simplify and speed up the Rx
+data path by walking through buffers that were representing cleaned HW
+Rx descriptors. Since it caused us a major headache recently and we
+rolled back to old approach that 'puts' Rx buffers right after running
+XDP prog/creating skb, this is useless now and should be removed.
+
+Get rid of ice_rx_buf::act and related logic. We still need to take care
+of a corner case where XDP program releases a particular fragment.
+
+Make ice_run_xdp() to return its result and use it within
+ice_put_rx_mbuf().
+
+Fixes: 2fba7dc5157b ("ice: Add support for XDP multi-buffer on Rx side")
+Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
+Tested-by: Chandan Kumar Rout <chandanx.rout@intel.com> (A Contingent Worker at Intel)
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/ice/ice_txrx.c     | 62 +++++++++++--------
+ drivers/net/ethernet/intel/ice/ice_txrx.h     |  1 -
+ drivers/net/ethernet/intel/ice/ice_txrx_lib.h | 43 -------------
+ 3 files changed, 36 insertions(+), 70 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
+index e2150d2c093bf..f12fb3a2b6ad9 100644
+--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
++++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
+@@ -527,15 +527,14 @@ int ice_setup_rx_ring(struct ice_rx_ring *rx_ring)
+  * @xdp: xdp_buff used as input to the XDP program
+  * @xdp_prog: XDP program to run
+  * @xdp_ring: ring to be used for XDP_TX action
+- * @rx_buf: Rx buffer to store the XDP action
+  * @eop_desc: Last descriptor in packet to read metadata from
+  *
+  * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR}
+  */
+-static void
++static u32
+ ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
+           struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring,
+-          struct ice_rx_buf *rx_buf, union ice_32b_rx_flex_desc *eop_desc)
++          union ice_32b_rx_flex_desc *eop_desc)
+ {
+       unsigned int ret = ICE_XDP_PASS;
+       u32 act;
+@@ -574,7 +573,7 @@ ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
+               ret = ICE_XDP_CONSUMED;
+       }
+ exit:
+-      ice_set_rx_bufs_act(xdp, rx_ring, ret);
++      return ret;
+ }
+ /**
+@@ -860,10 +859,8 @@ ice_add_xdp_frag(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
+               xdp_buff_set_frags_flag(xdp);
+       }
+-      if (unlikely(sinfo->nr_frags == MAX_SKB_FRAGS)) {
+-              ice_set_rx_bufs_act(xdp, rx_ring, ICE_XDP_CONSUMED);
++      if (unlikely(sinfo->nr_frags == MAX_SKB_FRAGS))
+               return -ENOMEM;
+-      }
+       __skb_fill_page_desc_noacc(sinfo, sinfo->nr_frags++, rx_buf->page,
+                                  rx_buf->page_offset, size);
+@@ -1075,12 +1072,12 @@ ice_construct_skb(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp)
+                               rx_buf->page_offset + headlen, size,
+                               xdp->frame_sz);
+       } else {
+-              /* buffer is unused, change the act that should be taken later
+-               * on; data was copied onto skb's linear part so there's no
++              /* buffer is unused, restore biased page count in Rx buffer;
++               * data was copied onto skb's linear part so there's no
+                * need for adjusting page offset and we can reuse this buffer
+                * as-is
+                */
+-              rx_buf->act = ICE_SKB_CONSUMED;
++              rx_buf->pagecnt_bias++;
+       }
+       if (unlikely(xdp_buff_has_frags(xdp))) {
+@@ -1133,29 +1130,34 @@ ice_put_rx_buf(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf)
+  * @xdp: XDP buffer carrying linear + frags part
+  * @xdp_xmit: XDP_TX/XDP_REDIRECT verdict storage
+  * @ntc: a current next_to_clean value to be stored at rx_ring
++ * @verdict: return code from XDP program execution
+  *
+  * Walk through gathered fragments and satisfy internal page
+  * recycle mechanism; we take here an action related to verdict
+  * returned by XDP program;
+  */
+ static void ice_put_rx_mbuf(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
+-                          u32 *xdp_xmit, u32 ntc)
++                          u32 *xdp_xmit, u32 ntc, u32 verdict)
+ {
+       u32 nr_frags = rx_ring->nr_frags + 1;
+       u32 idx = rx_ring->first_desc;
+       u32 cnt = rx_ring->count;
++      u32 post_xdp_frags = 1;
+       struct ice_rx_buf *buf;
+       int i;
+-      for (i = 0; i < nr_frags; i++) {
++      if (unlikely(xdp_buff_has_frags(xdp)))
++              post_xdp_frags += xdp_get_shared_info_from_buff(xdp)->nr_frags;
++
++      for (i = 0; i < post_xdp_frags; i++) {
+               buf = &rx_ring->rx_buf[idx];
+-              if (buf->act & (ICE_XDP_TX | ICE_XDP_REDIR)) {
++              if (verdict & (ICE_XDP_TX | ICE_XDP_REDIR)) {
+                       ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz);
+-                      *xdp_xmit |= buf->act;
+-              } else if (buf->act & ICE_XDP_CONSUMED) {
++                      *xdp_xmit |= verdict;
++              } else if (verdict & ICE_XDP_CONSUMED) {
+                       buf->pagecnt_bias++;
+-              } else if (buf->act == ICE_XDP_PASS) {
++              } else if (verdict == ICE_XDP_PASS) {
+                       ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz);
+               }
+@@ -1164,6 +1166,17 @@ static void ice_put_rx_mbuf(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
+               if (++idx == cnt)
+                       idx = 0;
+       }
++      /* handle buffers that represented frags released by XDP prog;
++       * for these we keep pagecnt_bias as-is; refcount from struct page
++       * has been decremented within XDP prog and we do not have to increase
++       * the biased refcnt
++       */
++      for (; i < nr_frags; i++) {
++              buf = &rx_ring->rx_buf[idx];
++              ice_put_rx_buf(rx_ring, buf);
++              if (++idx == cnt)
++                      idx = 0;
++      }
+       xdp->data = NULL;
+       rx_ring->first_desc = ntc;
+@@ -1190,9 +1203,9 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
+       struct ice_tx_ring *xdp_ring = NULL;
+       struct bpf_prog *xdp_prog = NULL;
+       u32 ntc = rx_ring->next_to_clean;
++      u32 cached_ntu, xdp_verdict;
+       u32 cnt = rx_ring->count;
+       u32 xdp_xmit = 0;
+-      u32 cached_ntu;
+       bool failure;
+       xdp_prog = READ_ONCE(rx_ring->xdp_prog);
+@@ -1255,7 +1268,7 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
+                       xdp_prepare_buff(xdp, hard_start, offset, size, !!offset);
+                       xdp_buff_clear_frags_flag(xdp);
+               } else if (ice_add_xdp_frag(rx_ring, xdp, rx_buf, size)) {
+-                      ice_put_rx_mbuf(rx_ring, xdp, NULL, ntc);
++                      ice_put_rx_mbuf(rx_ring, xdp, NULL, ntc, ICE_XDP_CONSUMED);
+                       break;
+               }
+               if (++ntc == cnt)
+@@ -1266,13 +1279,13 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
+                       continue;
+               ice_get_pgcnts(rx_ring);
+-              ice_run_xdp(rx_ring, xdp, xdp_prog, xdp_ring, rx_buf, rx_desc);
+-              if (rx_buf->act == ICE_XDP_PASS)
++              xdp_verdict = ice_run_xdp(rx_ring, xdp, xdp_prog, xdp_ring, rx_desc);
++              if (xdp_verdict == ICE_XDP_PASS)
+                       goto construct_skb;
+               total_rx_bytes += xdp_get_buff_len(xdp);
+               total_rx_pkts++;
+-              ice_put_rx_mbuf(rx_ring, xdp, &xdp_xmit, ntc);
++              ice_put_rx_mbuf(rx_ring, xdp, &xdp_xmit, ntc, xdp_verdict);
+               continue;
+ construct_skb:
+@@ -1283,12 +1296,9 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
+               /* exit if we failed to retrieve a buffer */
+               if (!skb) {
+                       rx_ring->ring_stats->rx_stats.alloc_page_failed++;
+-                      rx_buf->act = ICE_XDP_CONSUMED;
+-                      if (unlikely(xdp_buff_has_frags(xdp)))
+-                              ice_set_rx_bufs_act(xdp, rx_ring,
+-                                                  ICE_XDP_CONSUMED);
++                      xdp_verdict = ICE_XDP_CONSUMED;
+               }
+-              ice_put_rx_mbuf(rx_ring, xdp, &xdp_xmit, ntc);
++              ice_put_rx_mbuf(rx_ring, xdp, &xdp_xmit, ntc, xdp_verdict);
+               if (!skb)
+                       break;
+diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
+index feba314a3fe44..7130992d41779 100644
+--- a/drivers/net/ethernet/intel/ice/ice_txrx.h
++++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
+@@ -201,7 +201,6 @@ struct ice_rx_buf {
+       struct page *page;
+       unsigned int page_offset;
+       unsigned int pgcnt;
+-      unsigned int act;
+       unsigned int pagecnt_bias;
+ };
+diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
+index afcead4baef4b..f6c2b16ab4567 100644
+--- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
++++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
+@@ -5,49 +5,6 @@
+ #define _ICE_TXRX_LIB_H_
+ #include "ice.h"
+-/**
+- * ice_set_rx_bufs_act - propagate Rx buffer action to frags
+- * @xdp: XDP buffer representing frame (linear and frags part)
+- * @rx_ring: Rx ring struct
+- * act: action to store onto Rx buffers related to XDP buffer parts
+- *
+- * Set action that should be taken before putting Rx buffer from first frag
+- * to the last.
+- */
+-static inline void
+-ice_set_rx_bufs_act(struct xdp_buff *xdp, const struct ice_rx_ring *rx_ring,
+-                  const unsigned int act)
+-{
+-      u32 sinfo_frags = xdp_get_shared_info_from_buff(xdp)->nr_frags;
+-      u32 nr_frags = rx_ring->nr_frags + 1;
+-      u32 idx = rx_ring->first_desc;
+-      u32 cnt = rx_ring->count;
+-      struct ice_rx_buf *buf;
+-
+-      for (int i = 0; i < nr_frags; i++) {
+-              buf = &rx_ring->rx_buf[idx];
+-              buf->act = act;
+-
+-              if (++idx == cnt)
+-                      idx = 0;
+-      }
+-
+-      /* adjust pagecnt_bias on frags freed by XDP prog */
+-      if (sinfo_frags < rx_ring->nr_frags && act == ICE_XDP_CONSUMED) {
+-              u32 delta = rx_ring->nr_frags - sinfo_frags;
+-
+-              while (delta) {
+-                      if (idx == 0)
+-                              idx = cnt - 1;
+-                      else
+-                              idx--;
+-                      buf = &rx_ring->rx_buf[idx];
+-                      buf->pagecnt_bias--;
+-                      delta--;
+-              }
+-      }
+-}
+-
+ /**
+  * ice_test_staterr - tests bits in Rx descriptor status and error fields
+  * @status_err_n: Rx descriptor status_error0 or status_error1 bits
+-- 
+2.39.5
+
diff --git a/queue-6.12/net-atlantic-fix-warning-during-hot-unplug.patch b/queue-6.12/net-atlantic-fix-warning-during-hot-unplug.patch
new file mode 100644 (file)
index 0000000..5f86c98
--- /dev/null
@@ -0,0 +1,71 @@
+From a4315eb6daaf4b61bcd7168f478bf2d3c6eab7ab Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 3 Feb 2025 09:36:05 -0500
+Subject: net: atlantic: fix warning during hot unplug
+
+From: Jacob Moroni <mail@jakemoroni.com>
+
+[ Upstream commit 028676bb189ed6d1b550a0fc570a9d695b6acfd3 ]
+
+Firmware deinitialization performs MMIO accesses which are not
+necessary if the device has already been removed. In some cases,
+these accesses happen via readx_poll_timeout_atomic which ends up
+timing out, resulting in a warning at hw_atl2_utils_fw.c:112:
+
+[  104.595913] Call Trace:
+[  104.595915]  <TASK>
+[  104.595918]  ? show_regs+0x6c/0x80
+[  104.595923]  ? __warn+0x8d/0x150
+[  104.595925]  ? aq_a2_fw_deinit+0xcf/0xe0 [atlantic]
+[  104.595934]  ? report_bug+0x182/0x1b0
+[  104.595938]  ? handle_bug+0x6e/0xb0
+[  104.595940]  ? exc_invalid_op+0x18/0x80
+[  104.595942]  ? asm_exc_invalid_op+0x1b/0x20
+[  104.595944]  ? aq_a2_fw_deinit+0xcf/0xe0 [atlantic]
+[  104.595952]  ? aq_a2_fw_deinit+0xcf/0xe0 [atlantic]
+[  104.595959]  aq_nic_deinit.part.0+0xbd/0xf0 [atlantic]
+[  104.595964]  aq_nic_deinit+0x17/0x30 [atlantic]
+[  104.595970]  aq_ndev_close+0x2b/0x40 [atlantic]
+[  104.595975]  __dev_close_many+0xad/0x160
+[  104.595978]  dev_close_many+0x99/0x170
+[  104.595979]  unregister_netdevice_many_notify+0x18b/0xb20
+[  104.595981]  ? __call_rcu_common+0xcd/0x700
+[  104.595984]  unregister_netdevice_queue+0xc6/0x110
+[  104.595986]  unregister_netdev+0x1c/0x30
+[  104.595988]  aq_pci_remove+0xb1/0xc0 [atlantic]
+
+Fix this by skipping firmware deinitialization altogether if the
+PCI device is no longer present.
+
+Tested with an AQC113 attached via Thunderbolt by performing
+repeated unplug cycles while traffic was running via iperf.
+
+Fixes: 97bde5c4f909 ("net: ethernet: aquantia: Support for NIC-specific code")
+Signed-off-by: Jacob Moroni <mail@jakemoroni.com>
+Reviewed-by: Igor Russkikh <irusskikh@marvell.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Link: https://patch.msgid.link/20250203143604.24930-3-mail@jakemoroni.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/aquantia/atlantic/aq_nic.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
+index fe0e3e2a81171..71e50fc65c147 100644
+--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
++++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
+@@ -1441,7 +1441,9 @@ void aq_nic_deinit(struct aq_nic_s *self, bool link_down)
+       aq_ptp_ring_free(self);
+       aq_ptp_free(self);
+-      if (likely(self->aq_fw_ops->deinit) && link_down) {
++      /* May be invoked during hot unplug. */
++      if (pci_device_is_present(self->pdev) &&
++          likely(self->aq_fw_ops->deinit) && link_down) {
+               mutex_lock(&self->fwreq_mutex);
+               self->aq_fw_ops->deinit(self->aq_hw);
+               mutex_unlock(&self->fwreq_mutex);
+-- 
+2.39.5
+
diff --git a/queue-6.12/net-bcmgenet-correct-overlaying-of-phy-and-mac-wake-.patch b/queue-6.12/net-bcmgenet-correct-overlaying-of-phy-and-mac-wake-.patch
new file mode 100644 (file)
index 0000000..6541b43
--- /dev/null
@@ -0,0 +1,74 @@
+From 6ca0b78343b97dd8592ff782fb94d763ed647957 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 29 Jan 2025 15:13:42 -0800
+Subject: net: bcmgenet: Correct overlaying of PHY and MAC Wake-on-LAN
+
+From: Florian Fainelli <florian.fainelli@broadcom.com>
+
+[ Upstream commit 46ded709232344b5750a852747a8881763c721ab ]
+
+Some Wake-on-LAN modes such as WAKE_FILTER may only be supported by the MAC,
+while others might be only supported by the PHY. Make sure that the .get_wol()
+returns the union of both rather than only that of the PHY if the PHY supports
+Wake-on-LAN.
+
+When disabling Wake-on-LAN, make sure that this is done at both the PHY
+and MAC level, rather than doing an early return from the PHY driver.
+
+Fixes: 7e400ff35cbe ("net: bcmgenet: Add support for PHY-based Wake-on-LAN")
+Fixes: 9ee09edc05f2 ("net: bcmgenet: Properly overlay PHY and MAC Wake-on-LAN capabilities")
+Signed-off-by: Florian Fainelli <florian.fainelli@broadcom.com>
+Link: https://patch.msgid.link/20250129231342.35013-1-florian.fainelli@broadcom.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../net/ethernet/broadcom/genet/bcmgenet_wol.c   | 16 ++++++++++++----
+ 1 file changed, 12 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c
+index 0715ea5bf13ed..3b082114f2e53 100644
+--- a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c
++++ b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c
+@@ -41,9 +41,12 @@ void bcmgenet_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
+ {
+       struct bcmgenet_priv *priv = netdev_priv(dev);
+       struct device *kdev = &priv->pdev->dev;
++      u32 phy_wolopts = 0;
+-      if (dev->phydev)
++      if (dev->phydev) {
+               phy_ethtool_get_wol(dev->phydev, wol);
++              phy_wolopts = wol->wolopts;
++      }
+       /* MAC is not wake-up capable, return what the PHY does */
+       if (!device_can_wakeup(kdev))
+@@ -51,9 +54,14 @@ void bcmgenet_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
+       /* Overlay MAC capabilities with that of the PHY queried before */
+       wol->supported |= WAKE_MAGIC | WAKE_MAGICSECURE | WAKE_FILTER;
+-      wol->wolopts = priv->wolopts;
+-      memset(wol->sopass, 0, sizeof(wol->sopass));
++      wol->wolopts |= priv->wolopts;
++      /* Return the PHY configured magic password */
++      if (phy_wolopts & WAKE_MAGICSECURE)
++              return;
++
++      /* Otherwise the MAC one */
++      memset(wol->sopass, 0, sizeof(wol->sopass));
+       if (wol->wolopts & WAKE_MAGICSECURE)
+               memcpy(wol->sopass, priv->sopass, sizeof(priv->sopass));
+ }
+@@ -70,7 +78,7 @@ int bcmgenet_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
+       /* Try Wake-on-LAN from the PHY first */
+       if (dev->phydev) {
+               ret = phy_ethtool_set_wol(dev->phydev, wol);
+-              if (ret != -EOPNOTSUPP)
++              if (ret != -EOPNOTSUPP && wol->wolopts)
+                       return ret;
+       }
+-- 
+2.39.5
+
diff --git a/queue-6.12/net-rose-lock-the-socket-in-rose_bind.patch b/queue-6.12/net-rose-lock-the-socket-in-rose_bind.patch
new file mode 100644 (file)
index 0000000..4037197
--- /dev/null
@@ -0,0 +1,87 @@
+From 728eb0dbccab6bb8e4ce5932dd4beba6b8db91a2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 3 Feb 2025 17:08:38 +0000
+Subject: net: rose: lock the socket in rose_bind()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit a1300691aed9ee852b0a9192e29e2bdc2411a7e6 ]
+
+syzbot reported a soft lockup in rose_loopback_timer(),
+with a repro calling bind() from multiple threads.
+
+rose_bind() must lock the socket to avoid this issue.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Reported-by: syzbot+7ff41b5215f0c534534e@syzkaller.appspotmail.com
+Closes: https://lore.kernel.org/netdev/67a0f78d.050a0220.d7c5a.00a0.GAE@google.com/T/#u
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Acked-by: Paolo Abeni <pabeni@redhat.com>
+Link: https://patch.msgid.link/20250203170838.3521361-1-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/rose/af_rose.c | 24 ++++++++++++++++--------
+ 1 file changed, 16 insertions(+), 8 deletions(-)
+
+diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
+index 72c65d938a150..a4a668b88a8f2 100644
+--- a/net/rose/af_rose.c
++++ b/net/rose/af_rose.c
+@@ -701,11 +701,9 @@ static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+       struct net_device *dev;
+       ax25_address *source;
+       ax25_uid_assoc *user;
++      int err = -EINVAL;
+       int n;
+-      if (!sock_flag(sk, SOCK_ZAPPED))
+-              return -EINVAL;
+-
+       if (addr_len != sizeof(struct sockaddr_rose) && addr_len != sizeof(struct full_sockaddr_rose))
+               return -EINVAL;
+@@ -718,8 +716,15 @@ static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+       if ((unsigned int) addr->srose_ndigis > ROSE_MAX_DIGIS)
+               return -EINVAL;
+-      if ((dev = rose_dev_get(&addr->srose_addr)) == NULL)
+-              return -EADDRNOTAVAIL;
++      lock_sock(sk);
++
++      if (!sock_flag(sk, SOCK_ZAPPED))
++              goto out_release;
++
++      err = -EADDRNOTAVAIL;
++      dev = rose_dev_get(&addr->srose_addr);
++      if (!dev)
++              goto out_release;
+       source = &addr->srose_call;
+@@ -730,7 +735,8 @@ static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+       } else {
+               if (ax25_uid_policy && !capable(CAP_NET_BIND_SERVICE)) {
+                       dev_put(dev);
+-                      return -EACCES;
++                      err = -EACCES;
++                      goto out_release;
+               }
+               rose->source_call   = *source;
+       }
+@@ -753,8 +759,10 @@ static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+       rose_insert_socket(sk);
+       sock_reset_flag(sk, SOCK_ZAPPED);
+-
+-      return 0;
++      err = 0;
++out_release:
++      release_sock(sk);
++      return err;
+ }
+ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags)
+-- 
+2.39.5
+
diff --git a/queue-6.12/net-sched-fix-truncation-of-offloaded-action-statist.patch b/queue-6.12/net-sched-fix-truncation-of-offloaded-action-statist.patch
new file mode 100644 (file)
index 0000000..c8d3a56
--- /dev/null
@@ -0,0 +1,77 @@
+From 9fdc22cd92f4521e8203fc2e0027f53449d61237 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 4 Feb 2025 14:38:39 +0200
+Subject: net: sched: Fix truncation of offloaded action statistics
+
+From: Ido Schimmel <idosch@nvidia.com>
+
+[ Upstream commit 811b8f534fd85e17077bd2ac0413bcd16cc8fb9b ]
+
+In case of tc offload, when user space queries the kernel for tc action
+statistics, tc will query the offloaded statistics from device drivers.
+Among other statistics, drivers are expected to pass the number of
+packets that hit the action since the last query as a 64-bit number.
+
+Unfortunately, tc treats the number of packets as a 32-bit number,
+leading to truncation and incorrect statistics when the number of
+packets since the last query exceeds 0xffffffff:
+
+$ tc -s filter show dev swp2 ingress
+filter protocol all pref 1 flower chain 0
+filter protocol all pref 1 flower chain 0 handle 0x1
+  skip_sw
+  in_hw in_hw_count 1
+        action order 1: mirred (Egress Redirect to device swp1) stolen
+        index 1 ref 1 bind 1 installed 58 sec used 0 sec
+        Action statistics:
+        Sent 1133877034176 bytes 536959475 pkt (dropped 0, overlimits 0 requeues 0)
+[...]
+
+According to the above, 2111-byte packets were redirected which is
+impossible as only 64-byte packets were transmitted and the MTU was
+1500.
+
+Fix by treating packets as a 64-bit number:
+
+$ tc -s filter show dev swp2 ingress
+filter protocol all pref 1 flower chain 0
+filter protocol all pref 1 flower chain 0 handle 0x1
+  skip_sw
+  in_hw in_hw_count 1
+        action order 1: mirred (Egress Redirect to device swp1) stolen
+        index 1 ref 1 bind 1 installed 61 sec used 0 sec
+        Action statistics:
+        Sent 1370624380864 bytes 21416005951 pkt (dropped 0, overlimits 0 requeues 0)
+[...]
+
+Which shows that only 64-byte packets were redirected (1370624380864 /
+21416005951 = 64).
+
+Fixes: 380407023526 ("net/sched: Enable netdev drivers to update statistics of offloaded actions")
+Reported-by: Joe Botha <joe@atomic.ac>
+Signed-off-by: Ido Schimmel <idosch@nvidia.com>
+Reviewed-by: Petr Machata <petrm@nvidia.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Link: https://patch.msgid.link/20250204123839.1151804-1-idosch@nvidia.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/sch_generic.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
+index 1e6324f0d4efd..24e48af7e8f74 100644
+--- a/include/net/sch_generic.h
++++ b/include/net/sch_generic.h
+@@ -851,7 +851,7 @@ static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+ }
+ static inline void _bstats_update(struct gnet_stats_basic_sync *bstats,
+-                                __u64 bytes, __u32 packets)
++                                __u64 bytes, __u64 packets)
+ {
+       u64_stats_update_begin(&bstats->syncp);
+       u64_stats_add(&bstats->bytes, bytes);
+-- 
+2.39.5
+
diff --git a/queue-6.12/netem-update-sch-q.qlen-before-qdisc_tree_reduce_bac.patch b/queue-6.12/netem-update-sch-q.qlen-before-qdisc_tree_reduce_bac.patch
new file mode 100644 (file)
index 0000000..57cbd0e
--- /dev/null
@@ -0,0 +1,44 @@
+From 944010266fa6b448259f14df40c7314cd75653a8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 3 Feb 2025 16:58:40 -0800
+Subject: netem: Update sch->q.qlen before qdisc_tree_reduce_backlog()
+
+From: Cong Wang <cong.wang@bytedance.com>
+
+[ Upstream commit 638ba5089324796c2ee49af10427459c2de35f71 ]
+
+qdisc_tree_reduce_backlog() notifies parent qdisc only if child
+qdisc becomes empty, therefore we need to reduce the backlog of the
+child qdisc before calling it. Otherwise it would miss the opportunity
+to call cops->qlen_notify(), in the case of DRR, it resulted in UAF
+since DRR uses ->qlen_notify() to maintain its active list.
+
+Fixes: f8d4bc455047 ("net/sched: netem: account for backlog updates from child qdisc")
+Cc: Martin Ottens <martin.ottens@fau.de>
+Reported-by: Mingi Cho <mincho@theori.io>
+Signed-off-by: Cong Wang <cong.wang@bytedance.com>
+Link: https://patch.msgid.link/20250204005841.223511-4-xiyou.wangcong@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/sch_netem.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
+index 3b519adc01259..68a08f6d1fbce 100644
+--- a/net/sched/sch_netem.c
++++ b/net/sched/sch_netem.c
+@@ -748,9 +748,9 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
+                               if (err != NET_XMIT_SUCCESS) {
+                                       if (net_xmit_drop_count(err))
+                                               qdisc_qstats_drop(sch);
+-                                      qdisc_tree_reduce_backlog(sch, 1, pkt_len);
+                                       sch->qstats.backlog -= pkt_len;
+                                       sch->q.qlen--;
++                                      qdisc_tree_reduce_backlog(sch, 1, pkt_len);
+                               }
+                               goto tfifo_dequeue;
+                       }
+-- 
+2.39.5
+
diff --git a/queue-6.12/nvme-fc-use-ctrl-state-getter.patch b/queue-6.12/nvme-fc-use-ctrl-state-getter.patch
new file mode 100644 (file)
index 0000000..57ed350
--- /dev/null
@@ -0,0 +1,66 @@
+From 7683ba62df9bc12135959e982b062d39f93d25f5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 28 Jan 2025 17:34:47 +0100
+Subject: nvme-fc: use ctrl state getter
+
+From: Daniel Wagner <wagi@kernel.org>
+
+[ Upstream commit c8ed6cb5d37bc09c7e25e49a670e9fd1a3bd1dfa ]
+
+Do not access the state variable directly, instead use proper
+synchronization so not stale data is read.
+
+Fixes: e6e7f7ac03e4 ("nvme: ensure reset state check ordering")
+Signed-off-by: Daniel Wagner <wagi@kernel.org>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
+Signed-off-by: Keith Busch <kbusch@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/nvme/host/fc.c | 9 ++++++---
+ 1 file changed, 6 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
+index b81af7919e94c..682234da2fabe 100644
+--- a/drivers/nvme/host/fc.c
++++ b/drivers/nvme/host/fc.c
+@@ -2080,7 +2080,8 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
+               nvme_fc_complete_rq(rq);
+ check_error:
+-      if (terminate_assoc && ctrl->ctrl.state != NVME_CTRL_RESETTING)
++      if (terminate_assoc &&
++          nvme_ctrl_state(&ctrl->ctrl) != NVME_CTRL_RESETTING)
+               queue_work(nvme_reset_wq, &ctrl->ioerr_work);
+ }
+@@ -2534,6 +2535,8 @@ __nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues)
+ static void
+ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg)
+ {
++      enum nvme_ctrl_state state = nvme_ctrl_state(&ctrl->ctrl);
++
+       /*
+        * if an error (io timeout, etc) while (re)connecting, the remote
+        * port requested terminating of the association (disconnect_ls)
+@@ -2541,7 +2544,7 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg)
+        * the controller.  Abort any ios on the association and let the
+        * create_association error path resolve things.
+        */
+-      if (ctrl->ctrl.state == NVME_CTRL_CONNECTING) {
++      if (state == NVME_CTRL_CONNECTING) {
+               __nvme_fc_abort_outstanding_ios(ctrl, true);
+               set_bit(ASSOC_FAILED, &ctrl->flags);
+               dev_warn(ctrl->ctrl.device,
+@@ -2551,7 +2554,7 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg)
+       }
+       /* Otherwise, only proceed if in LIVE state - e.g. on first error */
+-      if (ctrl->ctrl.state != NVME_CTRL_LIVE)
++      if (state != NVME_CTRL_LIVE)
+               return;
+       dev_warn(ctrl->ctrl.device,
+-- 
+2.39.5
+
diff --git a/queue-6.12/nvme-handle-connectivity-loss-in-nvme_set_queue_coun.patch b/queue-6.12/nvme-handle-connectivity-loss-in-nvme_set_queue_coun.patch
new file mode 100644 (file)
index 0000000..4627f6c
--- /dev/null
@@ -0,0 +1,53 @@
+From db1af470ff8e29ba4693dca2c7ae4efb80618cc5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 9 Jan 2025 14:30:48 +0100
+Subject: nvme: handle connectivity loss in nvme_set_queue_count
+
+From: Daniel Wagner <wagi@kernel.org>
+
+[ Upstream commit 294b2b7516fd06a8dd82e4a6118f318ec521e706 ]
+
+When the set feature attempts fails with any NVME status code set in
+nvme_set_queue_count, the function still report success. Though the
+numbers of queues set to 0. This is done to support controllers in
+degraded state (the admin queue is still up and running but no IO
+queues).
+
+Though there is an exception. When nvme_set_features reports an host
+path error, nvme_set_queue_count should propagate this error as the
+connectivity is lost, which means also the admin queue is not working
+anymore.
+
+Fixes: 9a0be7abb62f ("nvme: refactor set_queue_count")
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Hannes Reinecke <hare@suse.de>
+Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
+Signed-off-by: Daniel Wagner <wagi@kernel.org>
+Signed-off-by: Keith Busch <kbusch@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/nvme/host/core.c | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
+index 4c409efd8cec1..8da50df56b079 100644
+--- a/drivers/nvme/host/core.c
++++ b/drivers/nvme/host/core.c
+@@ -1691,7 +1691,13 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count)
+       status = nvme_set_features(ctrl, NVME_FEAT_NUM_QUEUES, q_count, NULL, 0,
+                       &result);
+-      if (status < 0)
++
++      /*
++       * It's either a kernel error or the host observed a connection
++       * lost. In either case it's not possible communicate with the
++       * controller and thus enter the error code path.
++       */
++      if (status < 0 || status == NVME_SC_HOST_PATH_ERROR)
+               return status;
+       /*
+-- 
+2.39.5
+
diff --git a/queue-6.12/nvme-make-nvme_tls_attrs_group-static.patch b/queue-6.12/nvme-make-nvme_tls_attrs_group-static.patch
new file mode 100644 (file)
index 0000000..8229b67
--- /dev/null
@@ -0,0 +1,37 @@
+From 93bea40a76a7c53e72245db77dda957cbc8195ed Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 28 Jan 2025 07:22:31 -0800
+Subject: nvme: make nvme_tls_attrs_group static
+
+From: Keith Busch <kbusch@kernel.org>
+
+[ Upstream commit 2d1a2dab95cdc6f2e0c6af3c0514b0bea94af482 ]
+
+To suppress the compiler "warning: symbol 'nvme_tls_attrs_group' was not
+declared. Should it be static?"
+
+Fixes: 1e48b34c9bc79a ("nvme: split off TLS sysfs attributes into a separate group")
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
+Signed-off-by: Keith Busch <kbusch@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/nvme/host/sysfs.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c
+index b68a9e5f1ea39..3a41b9ab0f13c 100644
+--- a/drivers/nvme/host/sysfs.c
++++ b/drivers/nvme/host/sysfs.c
+@@ -792,7 +792,7 @@ static umode_t nvme_tls_attrs_are_visible(struct kobject *kobj,
+       return a->mode;
+ }
+-const struct attribute_group nvme_tls_attrs_group = {
++static const struct attribute_group nvme_tls_attrs_group = {
+       .attrs          = nvme_tls_attrs,
+       .is_visible     = nvme_tls_attrs_are_visible,
+ };
+-- 
+2.39.5
+
diff --git a/queue-6.12/pfifo_tail_enqueue-drop-new-packet-when-sch-limit-0.patch b/queue-6.12/pfifo_tail_enqueue-drop-new-packet-when-sch-limit-0.patch
new file mode 100644 (file)
index 0000000..a65e32b
--- /dev/null
@@ -0,0 +1,72 @@
+From d0e2deb424437c88a1a176d41e8ae0be3ca31c69 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 3 Feb 2025 16:58:38 -0800
+Subject: pfifo_tail_enqueue: Drop new packet when sch->limit == 0
+
+From: Quang Le <quanglex97@gmail.com>
+
+[ Upstream commit 647cef20e649c576dff271e018d5d15d998b629d ]
+
+Expected behaviour:
+In case we reach scheduler's limit, pfifo_tail_enqueue() will drop a
+packet in scheduler's queue and decrease scheduler's qlen by one.
+Then, pfifo_tail_enqueue() enqueue new packet and increase
+scheduler's qlen by one. Finally, pfifo_tail_enqueue() return
+`NET_XMIT_CN` status code.
+
+Weird behaviour:
+In case we set `sch->limit == 0` and trigger pfifo_tail_enqueue() on a
+scheduler that has no packet, the 'drop a packet' step will do nothing.
+This means the scheduler's qlen still has value equal 0.
+Then, we continue to enqueue new packet and increase scheduler's qlen by
+one. In summary, we can leverage pfifo_tail_enqueue() to increase qlen by
+one and return `NET_XMIT_CN` status code.
+
+The problem is:
+Let's say we have two qdiscs: Qdisc_A and Qdisc_B.
+ - Qdisc_A's type must have '->graft()' function to create parent/child relationship.
+   Let's say Qdisc_A's type is `hfsc`. Enqueue packet to this qdisc will trigger `hfsc_enqueue`.
+ - Qdisc_B's type is pfifo_head_drop. Enqueue packet to this qdisc will trigger `pfifo_tail_enqueue`.
+ - Qdisc_B is configured to have `sch->limit == 0`.
+ - Qdisc_A is configured to route the enqueued's packet to Qdisc_B.
+
+Enqueue packet through Qdisc_A will lead to:
+ - hfsc_enqueue(Qdisc_A) -> pfifo_tail_enqueue(Qdisc_B)
+ - Qdisc_B->q.qlen += 1
+ - pfifo_tail_enqueue() return `NET_XMIT_CN`
+ - hfsc_enqueue() check for `NET_XMIT_SUCCESS` and see `NET_XMIT_CN` => hfsc_enqueue() don't increase qlen of Qdisc_A.
+
+The whole process lead to a situation where Qdisc_A->q.qlen == 0 and Qdisc_B->q.qlen == 1.
+Replace 'hfsc' with other type (for example: 'drr') still lead to the same problem.
+This violate the design where parent's qlen should equal to the sum of its childrens'qlen.
+
+Bug impact: This issue can be used for user->kernel privilege escalation when it is reachable.
+
+Fixes: 57dbb2d83d10 ("sched: add head drop fifo queue")
+Reported-by: Quang Le <quanglex97@gmail.com>
+Signed-off-by: Quang Le <quanglex97@gmail.com>
+Signed-off-by: Cong Wang <cong.wang@bytedance.com>
+Link: https://patch.msgid.link/20250204005841.223511-2-xiyou.wangcong@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/sch_fifo.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
+index b50b2c2cc09bc..e6bfd39ff3396 100644
+--- a/net/sched/sch_fifo.c
++++ b/net/sched/sch_fifo.c
+@@ -40,6 +40,9 @@ static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+ {
+       unsigned int prev_backlog;
++      if (unlikely(READ_ONCE(sch->limit) == 0))
++              return qdisc_drop(skb, sch, to_free);
++
+       if (likely(sch->q.qlen < READ_ONCE(sch->limit)))
+               return qdisc_enqueue_tail(skb, sch);
+-- 
+2.39.5
+
diff --git a/queue-6.12/rxrpc-fix-call-state-set-to-not-include-the-server_s.patch b/queue-6.12/rxrpc-fix-call-state-set-to-not-include-the-server_s.patch
new file mode 100644 (file)
index 0000000..9d4cf54
--- /dev/null
@@ -0,0 +1,146 @@
+From 6db16ac161df205cc7e9320b31be56389682f50a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 4 Feb 2025 23:05:53 +0000
+Subject: rxrpc: Fix call state set to not include the SERVER_SECURING state
+
+From: David Howells <dhowells@redhat.com>
+
+[ Upstream commit 41b996ce83bf944de5569d6263c8dbd5513e7ed0 ]
+
+The RXRPC_CALL_SERVER_SECURING state doesn't really belong with the other
+states in the call's state set as the other states govern the call's Rx/Tx
+phase transition and govern when packets can and can't be received or
+transmitted.  The "Securing" state doesn't actually govern the reception of
+packets and would need to be split depending on whether or not we've
+received the last packet yet (to mirror RECV_REQUEST/ACK_REQUEST).
+
+The "Securing" state is more about whether or not we can start forwarding
+packets to the application as recvmsg will need to decode them and the
+decoding can't take place until the challenge/response exchange has
+completed.
+
+Fix this by removing the RXRPC_CALL_SERVER_SECURING state from the state
+set and, instead, using a flag, RXRPC_CALL_CONN_CHALLENGING, to track
+whether or not we can queue the call for reception by recvmsg() or notify
+the kernel app that data is ready.  In the event that we've already
+received all the packets, the connection event handler will poke the app
+layer in the appropriate manner.
+
+Also there's a race whereby the app layer sees the last packet before rxrpc
+has managed to end the rx phase and change the state to one amenable to
+allowing a reply.  Fix this by queuing the packet after calling
+rxrpc_end_rx_phase().
+
+Fixes: 17926a79320a ("[AF_RXRPC]: Provide secure RxRPC sockets for use by userspace and kernel both")
+Signed-off-by: David Howells <dhowells@redhat.com>
+cc: Marc Dionne <marc.dionne@auristor.com>
+cc: Simon Horman <horms@kernel.org>
+cc: linux-afs@lists.infradead.org
+Link: https://patch.msgid.link/20250204230558.712536-2-dhowells@redhat.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/rxrpc/ar-internal.h | 2 +-
+ net/rxrpc/call_object.c | 6 ++----
+ net/rxrpc/conn_event.c  | 4 +---
+ net/rxrpc/input.c       | 2 +-
+ net/rxrpc/sendmsg.c     | 2 +-
+ 5 files changed, 6 insertions(+), 10 deletions(-)
+
+diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
+index d0fd37bdcfe9c..6b036c0564c7a 100644
+--- a/net/rxrpc/ar-internal.h
++++ b/net/rxrpc/ar-internal.h
+@@ -567,6 +567,7 @@ enum rxrpc_call_flag {
+       RXRPC_CALL_EXCLUSIVE,           /* The call uses a once-only connection */
+       RXRPC_CALL_RX_IS_IDLE,          /* recvmsg() is idle - send an ACK */
+       RXRPC_CALL_RECVMSG_READ_ALL,    /* recvmsg() read all of the received data */
++      RXRPC_CALL_CONN_CHALLENGING,    /* The connection is being challenged */
+ };
+ /*
+@@ -587,7 +588,6 @@ enum rxrpc_call_state {
+       RXRPC_CALL_CLIENT_AWAIT_REPLY,  /* - client awaiting reply */
+       RXRPC_CALL_CLIENT_RECV_REPLY,   /* - client receiving reply phase */
+       RXRPC_CALL_SERVER_PREALLOC,     /* - service preallocation */
+-      RXRPC_CALL_SERVER_SECURING,     /* - server securing request connection */
+       RXRPC_CALL_SERVER_RECV_REQUEST, /* - server receiving request */
+       RXRPC_CALL_SERVER_ACK_REQUEST,  /* - server pending ACK of request */
+       RXRPC_CALL_SERVER_SEND_REPLY,   /* - server sending reply */
+diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
+index f9e983a12c149..e379a2a9375ae 100644
+--- a/net/rxrpc/call_object.c
++++ b/net/rxrpc/call_object.c
+@@ -22,7 +22,6 @@ const char *const rxrpc_call_states[NR__RXRPC_CALL_STATES] = {
+       [RXRPC_CALL_CLIENT_AWAIT_REPLY]         = "ClAwtRpl",
+       [RXRPC_CALL_CLIENT_RECV_REPLY]          = "ClRcvRpl",
+       [RXRPC_CALL_SERVER_PREALLOC]            = "SvPrealc",
+-      [RXRPC_CALL_SERVER_SECURING]            = "SvSecure",
+       [RXRPC_CALL_SERVER_RECV_REQUEST]        = "SvRcvReq",
+       [RXRPC_CALL_SERVER_ACK_REQUEST]         = "SvAckReq",
+       [RXRPC_CALL_SERVER_SEND_REPLY]          = "SvSndRpl",
+@@ -453,17 +452,16 @@ void rxrpc_incoming_call(struct rxrpc_sock *rx,
+       call->cong_tstamp       = skb->tstamp;
+       __set_bit(RXRPC_CALL_EXPOSED, &call->flags);
+-      rxrpc_set_call_state(call, RXRPC_CALL_SERVER_SECURING);
++      rxrpc_set_call_state(call, RXRPC_CALL_SERVER_RECV_REQUEST);
+       spin_lock(&conn->state_lock);
+       switch (conn->state) {
+       case RXRPC_CONN_SERVICE_UNSECURED:
+       case RXRPC_CONN_SERVICE_CHALLENGING:
+-              rxrpc_set_call_state(call, RXRPC_CALL_SERVER_SECURING);
++              __set_bit(RXRPC_CALL_CONN_CHALLENGING, &call->flags);
+               break;
+       case RXRPC_CONN_SERVICE:
+-              rxrpc_set_call_state(call, RXRPC_CALL_SERVER_RECV_REQUEST);
+               break;
+       case RXRPC_CONN_ABORTED:
+diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
+index ca5e694ab858b..c4eb7986efddf 100644
+--- a/net/rxrpc/conn_event.c
++++ b/net/rxrpc/conn_event.c
+@@ -222,10 +222,8 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn)
+  */
+ static void rxrpc_call_is_secure(struct rxrpc_call *call)
+ {
+-      if (call && __rxrpc_call_state(call) == RXRPC_CALL_SERVER_SECURING) {
+-              rxrpc_set_call_state(call, RXRPC_CALL_SERVER_RECV_REQUEST);
++      if (call && __test_and_clear_bit(RXRPC_CALL_CONN_CHALLENGING, &call->flags))
+               rxrpc_notify_socket(call);
+-      }
+ }
+ /*
+diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
+index 16d49a861dbb5..6a075a7c190db 100644
+--- a/net/rxrpc/input.c
++++ b/net/rxrpc/input.c
+@@ -573,7 +573,7 @@ static bool rxrpc_input_split_jumbo(struct rxrpc_call *call, struct sk_buff *skb
+               rxrpc_propose_delay_ACK(call, sp->hdr.serial,
+                                       rxrpc_propose_ack_input_data);
+       }
+-      if (notify) {
++      if (notify && !test_bit(RXRPC_CALL_CONN_CHALLENGING, &call->flags)) {
+               trace_rxrpc_notify_socket(call->debug_id, sp->hdr.serial);
+               rxrpc_notify_socket(call);
+       }
+diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
+index 23d18fe5de9f0..154f650efb0ab 100644
+--- a/net/rxrpc/sendmsg.c
++++ b/net/rxrpc/sendmsg.c
+@@ -654,7 +654,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
+       } else {
+               switch (rxrpc_call_state(call)) {
+               case RXRPC_CALL_CLIENT_AWAIT_CONN:
+-              case RXRPC_CALL_SERVER_SECURING:
++              case RXRPC_CALL_SERVER_RECV_REQUEST:
+                       if (p.command == RXRPC_CMD_SEND_ABORT)
+                               break;
+                       fallthrough;
+-- 
+2.39.5
+
diff --git a/queue-6.12/rxrpc-fix-the-rxrpc_connection-attend-queue-handling.patch b/queue-6.12/rxrpc-fix-the-rxrpc_connection-attend-queue-handling.patch
new file mode 100644 (file)
index 0000000..f14aa61
--- /dev/null
@@ -0,0 +1,105 @@
+From d89bce9836de246776d7b324bdf3d6170cf25c2b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 3 Feb 2025 11:03:04 +0000
+Subject: rxrpc: Fix the rxrpc_connection attend queue handling
+
+From: David Howells <dhowells@redhat.com>
+
+[ Upstream commit 4241a702e0d0c2ca9364cfac08dbf134264962de ]
+
+The rxrpc_connection attend queue is never used because conn::attend_link
+is never initialised and so is always NULL'd out and thus always appears to
+be busy.  This requires the following fix:
+
+ (1) Fix this the attend queue problem by initialising conn::attend_link.
+
+And, consequently, two further fixes for things masked by the above bug:
+
+ (2) Fix rxrpc_input_conn_event() to handle being invoked with a NULL
+     sk_buff pointer - something that can now happen with the above change.
+
+ (3) Fix the RXRPC_SKB_MARK_SERVICE_CONN_SECURED message to carry a pointer
+     to the connection and a ref on it.
+
+Signed-off-by: David Howells <dhowells@redhat.com>
+cc: Marc Dionne <marc.dionne@auristor.com>
+cc: Jakub Kicinski <kuba@kernel.org>
+cc: "David S. Miller" <davem@davemloft.net>
+cc: Eric Dumazet <edumazet@google.com>
+cc: Paolo Abeni <pabeni@redhat.com>
+cc: Simon Horman <horms@kernel.org>
+cc: linux-afs@lists.infradead.org
+cc: netdev@vger.kernel.org
+Fixes: f2cce89a074e ("rxrpc: Implement a mechanism to send an event notification to a connection")
+Link: https://patch.msgid.link/20250203110307.7265-3-dhowells@redhat.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/trace/events/rxrpc.h |  1 +
+ net/rxrpc/conn_event.c       | 17 ++++++++++-------
+ net/rxrpc/conn_object.c      |  1 +
+ 3 files changed, 12 insertions(+), 7 deletions(-)
+
+diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h
+index 666fe1779ccc6..e1a37e9c2d42d 100644
+--- a/include/trace/events/rxrpc.h
++++ b/include/trace/events/rxrpc.h
+@@ -218,6 +218,7 @@
+       EM(rxrpc_conn_get_conn_input,           "GET inp-conn") \
+       EM(rxrpc_conn_get_idle,                 "GET idle    ") \
+       EM(rxrpc_conn_get_poke_abort,           "GET pk-abort") \
++      EM(rxrpc_conn_get_poke_secured,         "GET secured ") \
+       EM(rxrpc_conn_get_poke_timer,           "GET poke    ") \
+       EM(rxrpc_conn_get_service_conn,         "GET svc-conn") \
+       EM(rxrpc_conn_new_client,               "NEW client  ") \
+diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
+index 2a1396cd892f3..ca5e694ab858b 100644
+--- a/net/rxrpc/conn_event.c
++++ b/net/rxrpc/conn_event.c
+@@ -266,6 +266,7 @@ static int rxrpc_process_event(struct rxrpc_connection *conn,
+                        * we've already received the packet, put it on the
+                        * front of the queue.
+                        */
++                      sp->conn = rxrpc_get_connection(conn, rxrpc_conn_get_poke_secured);
+                       skb->mark = RXRPC_SKB_MARK_SERVICE_CONN_SECURED;
+                       rxrpc_get_skb(skb, rxrpc_skb_get_conn_secured);
+                       skb_queue_head(&conn->local->rx_queue, skb);
+@@ -431,14 +432,16 @@ void rxrpc_input_conn_event(struct rxrpc_connection *conn, struct sk_buff *skb)
+       if (test_and_clear_bit(RXRPC_CONN_EV_ABORT_CALLS, &conn->events))
+               rxrpc_abort_calls(conn);
+-      switch (skb->mark) {
+-      case RXRPC_SKB_MARK_SERVICE_CONN_SECURED:
+-              if (conn->state != RXRPC_CONN_SERVICE)
+-                      break;
++      if (skb) {
++              switch (skb->mark) {
++              case RXRPC_SKB_MARK_SERVICE_CONN_SECURED:
++                      if (conn->state != RXRPC_CONN_SERVICE)
++                              break;
+-              for (loop = 0; loop < RXRPC_MAXCALLS; loop++)
+-                      rxrpc_call_is_secure(conn->channels[loop].call);
+-              break;
++                      for (loop = 0; loop < RXRPC_MAXCALLS; loop++)
++                              rxrpc_call_is_secure(conn->channels[loop].call);
++                      break;
++              }
+       }
+       /* Process delayed ACKs whose time has come. */
+diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
+index 1539d315afe74..7bc68135966e2 100644
+--- a/net/rxrpc/conn_object.c
++++ b/net/rxrpc/conn_object.c
+@@ -67,6 +67,7 @@ struct rxrpc_connection *rxrpc_alloc_connection(struct rxrpc_net *rxnet,
+               INIT_WORK(&conn->destructor, rxrpc_clean_up_connection);
+               INIT_LIST_HEAD(&conn->proc_link);
+               INIT_LIST_HEAD(&conn->link);
++              INIT_LIST_HEAD(&conn->attend_link);
+               mutex_init(&conn->security_lock);
+               mutex_init(&conn->tx_data_alloc_lock);
+               skb_queue_head_init(&conn->rx_queue);
+-- 
+2.39.5
+
diff --git a/queue-6.12/sched-fair-fix-inaccurate-h_nr_runnable-accounting-w.patch b/queue-6.12/sched-fair-fix-inaccurate-h_nr_runnable-accounting-w.patch
new file mode 100644 (file)
index 0000000..1d29cb1
--- /dev/null
@@ -0,0 +1,107 @@
+From 1f0309b88874381903bd24c0abe1a99875124349 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 17 Jan 2025 10:58:52 +0000
+Subject: sched/fair: Fix inaccurate h_nr_runnable accounting with delayed
+ dequeue
+
+From: K Prateek Nayak <kprateek.nayak@amd.com>
+
+[ Upstream commit 3429dd57f0deb1a602c2624a1dd7c4c11b6c4734 ]
+
+set_delayed() adjusts cfs_rq->h_nr_runnable for the hierarchy when an
+entity is delayed irrespective of whether the entity corresponds to a
+task or a cfs_rq.
+
+Consider the following scenario:
+
+       root
+       /    \
+      A             B (*) delayed since B is no longer eligible on root
+      |             |
+    Task0  Task1 <--- dequeue_task_fair() - task blocks
+
+When Task1 blocks (dequeue_entity() for task's se returns true),
+dequeue_entities() will continue adjusting cfs_rq->h_nr_* for the
+hierarchy of Task1. However, when the sched_entity corresponding to
+cfs_rq B is delayed, set_delayed() will adjust the h_nr_runnable for the
+hierarchy too leading to both dequeue_entity() and set_delayed()
+decrementing h_nr_runnable for the dequeue of the same task.
+
+A SCHED_WARN_ON() to inspect h_nr_runnable post its update in
+dequeue_entities() like below:
+
+    cfs_rq->h_nr_runnable -= h_nr_runnable;
+    SCHED_WARN_ON(((int) cfs_rq->h_nr_runnable) < 0);
+
+is consistently tripped when running wakeup intensive workloads like
+hackbench in a cgroup.
+
+This error is self correcting since cfs_rq are per-cpu and cannot
+migrate. The entitiy is either picked for full dequeue or is requeued
+when a task wakes up below it. Both those paths call clear_delayed()
+which again increments h_nr_runnable of the hierarchy without
+considering if the entity corresponds to a task or not.
+
+h_nr_runnable will eventually reflect the correct value however in the
+interim, the incorrect values can still influence PELT calculation which
+uses se->runnable_weight or cfs_rq->h_nr_runnable.
+
+Since only delayed tasks take the early return path in
+dequeue_entities() and enqueue_task_fair(), adjust the
+h_nr_runnable in {set,clear}_delayed() only when a task is delayed as
+this path skips the h_nr_* update loops and returns early.
+
+For entities corresponding to cfs_rq, the h_nr_* update loop in the
+caller will do the right thing.
+
+Fixes: 76f2f783294d ("sched/eevdf: More PELT vs DELAYED_DEQUEUE")
+Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
+Tested-by: Swapnil Sapkal <swapnil.sapkal@amd.com>
+Link: https://lkml.kernel.org/r/20250117105852.23908-1-kprateek.nayak@amd.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/fair.c | 19 +++++++++++++++++++
+ 1 file changed, 19 insertions(+)
+
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index 65e7be6448720..ddc096d6b0c20 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -5481,6 +5481,15 @@ static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq);
+ static void set_delayed(struct sched_entity *se)
+ {
+       se->sched_delayed = 1;
++
++      /*
++       * Delayed se of cfs_rq have no tasks queued on them.
++       * Do not adjust h_nr_runnable since dequeue_entities()
++       * will account it for blocked tasks.
++       */
++      if (!entity_is_task(se))
++              return;
++
+       for_each_sched_entity(se) {
+               struct cfs_rq *cfs_rq = cfs_rq_of(se);
+@@ -5493,6 +5502,16 @@ static void set_delayed(struct sched_entity *se)
+ static void clear_delayed(struct sched_entity *se)
+ {
+       se->sched_delayed = 0;
++
++      /*
++       * Delayed se of cfs_rq have no tasks queued on them.
++       * Do not adjust h_nr_runnable since a dequeue has
++       * already accounted for it or an enqueue of a task
++       * below it will account for it in enqueue_task_fair().
++       */
++      if (!entity_is_task(se))
++              return;
++
+       for_each_sched_entity(se) {
+               struct cfs_rq *cfs_rq = cfs_rq_of(se);
+-- 
+2.39.5
+
index cfed7574e99dbe1fdac7052d8ecfae63af87054d..975997fcf4cd50090dfc4a930018077f393518be 100644 (file)
@@ -96,3 +96,32 @@ xfs-don-t-over-report-free-space-or-inodes-in-statvf.patch
 tty-xilinx_uartps-split-sysrq-handling.patch
 tty-permit-some-tiocl_setsel-modes-without-cap_sys_admin.patch
 platform-x86-serdev_helpers-check-for-serial_ctrl_uid-null.patch
+sched-fair-fix-inaccurate-h_nr_runnable-accounting-w.patch
+nvme-handle-connectivity-loss-in-nvme_set_queue_coun.patch
+firmware-iscsi_ibft-fix-iscsi_ibft-kconfig-entry.patch
+gpu-drm_dp_cec-fix-broken-cec-adapter-properties-che.patch
+ice-put-rx-buffers-after-being-done-with-current-fra.patch
+ice-gather-page_count-s-of-each-frag-right-before-xd.patch
+ice-stop-storing-xdp-verdict-within-ice_rx_buf.patch
+nvme-make-nvme_tls_attrs_group-static.patch
+nvme-fc-use-ctrl-state-getter.patch
+net-bcmgenet-correct-overlaying-of-phy-and-mac-wake-.patch
+ice-add-check-for-devm_kzalloc.patch
+vmxnet3-fix-tx-queue-race-condition-with-xdp.patch
+tg3-disable-tg3-pcie-aer-on-system-reboot.patch
+udp-gso-do-not-drop-small-packets-when-pmtu-reduces.patch
+drm-i915-dp-fix-the-adaptive-sync-operation-mode-for.patch
+ethtool-rss-fix-hiding-unsupported-fields-in-dumps.patch
+rxrpc-fix-the-rxrpc_connection-attend-queue-handling.patch
+gpio-pca953x-improve-interrupt-support.patch
+net-atlantic-fix-warning-during-hot-unplug.patch
+net-rose-lock-the-socket-in-rose_bind.patch
+gpio-sim-lock-hog-configfs-items-if-present.patch
+x86-xen-fix-xen_hypercall_hvm-to-not-clobber-rbx.patch
+x86-xen-add-frame_end-to-xen_hypercall_hvm.patch
+acpi-property-fix-return-value-for-nval-0-in-acpi_da.patch
+pfifo_tail_enqueue-drop-new-packet-when-sch-limit-0.patch
+netem-update-sch-q.qlen-before-qdisc_tree_reduce_bac.patch
+tun-revert-fix-group-permission-check.patch
+net-sched-fix-truncation-of-offloaded-action-statist.patch
+rxrpc-fix-call-state-set-to-not-include-the-server_s.patch
diff --git a/queue-6.12/tg3-disable-tg3-pcie-aer-on-system-reboot.patch b/queue-6.12/tg3-disable-tg3-pcie-aer-on-system-reboot.patch
new file mode 100644 (file)
index 0000000..ef3cdc0
--- /dev/null
@@ -0,0 +1,131 @@
+From 0ed4e6cc91ac1720c0ba1fa86bddff19c03a2712 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 30 Jan 2025 16:57:54 -0500
+Subject: tg3: Disable tg3 PCIe AER on system reboot
+
+From: Lenny Szubowicz <lszubowi@redhat.com>
+
+[ Upstream commit e0efe83ed325277bb70f9435d4d9fc70bebdcca8 ]
+
+Disable PCIe AER on the tg3 device on system reboot on a limited
+list of Dell PowerEdge systems. This prevents a fatal PCIe AER event
+on the tg3 device during the ACPI _PTS (prepare to sleep) method for
+S5 on those systems. The _PTS is invoked by acpi_enter_sleep_state_prep()
+as part of the kernel's reboot sequence as a result of commit
+38f34dba806a ("PM: ACPI: reboot: Reinstate S5 for reboot").
+
+There was an earlier fix for this problem by commit 2ca1c94ce0b6
+("tg3: Disable tg3 device on system reboot to avoid triggering AER").
+But it was discovered that this earlier fix caused a reboot hang
+when some Dell PowerEdge servers were booted via ipxe. To address
+this reboot hang, the earlier fix was essentially reverted by commit
+9fc3bc764334 ("tg3: power down device only on SYSTEM_POWER_OFF").
+This re-exposed the tg3 PCIe AER on reboot problem.
+
+This fix is not an ideal solution because the root cause of the AER
+is in system firmware. Instead, it's a targeted work-around in the
+tg3 driver.
+
+Note also that the PCIe AER must be disabled on the tg3 device even
+if the system is configured to use "firmware first" error handling.
+
+V3:
+   - Fix sparse warning on improper comparison of pdev->current_state
+   - Adhere to netdev comment style
+
+Fixes: 9fc3bc764334 ("tg3: power down device only on SYSTEM_POWER_OFF")
+Signed-off-by: Lenny Szubowicz <lszubowi@redhat.com>
+Reviewed-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/broadcom/tg3.c | 58 +++++++++++++++++++++++++++++
+ 1 file changed, 58 insertions(+)
+
+diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
+index d178138981a96..717e110d23c91 100644
+--- a/drivers/net/ethernet/broadcom/tg3.c
++++ b/drivers/net/ethernet/broadcom/tg3.c
+@@ -55,6 +55,7 @@
+ #include <linux/hwmon.h>
+ #include <linux/hwmon-sysfs.h>
+ #include <linux/crc32poly.h>
++#include <linux/dmi.h>
+ #include <net/checksum.h>
+ #include <net/gso.h>
+@@ -18154,6 +18155,50 @@ static int tg3_resume(struct device *device)
+ static SIMPLE_DEV_PM_OPS(tg3_pm_ops, tg3_suspend, tg3_resume);
++/* Systems where ACPI _PTS (Prepare To Sleep) S5 will result in a fatal
++ * PCIe AER event on the tg3 device if the tg3 device is not, or cannot
++ * be, powered down.
++ */
++static const struct dmi_system_id tg3_restart_aer_quirk_table[] = {
++      {
++              .matches = {
++                      DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
++                      DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge R440"),
++              },
++      },
++      {
++              .matches = {
++                      DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
++                      DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge R540"),
++              },
++      },
++      {
++              .matches = {
++                      DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
++                      DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge R640"),
++              },
++      },
++      {
++              .matches = {
++                      DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
++                      DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge R650"),
++              },
++      },
++      {
++              .matches = {
++                      DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
++                      DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge R740"),
++              },
++      },
++      {
++              .matches = {
++                      DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
++                      DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge R750"),
++              },
++      },
++      {}
++};
++
+ static void tg3_shutdown(struct pci_dev *pdev)
+ {
+       struct net_device *dev = pci_get_drvdata(pdev);
+@@ -18170,6 +18215,19 @@ static void tg3_shutdown(struct pci_dev *pdev)
+       if (system_state == SYSTEM_POWER_OFF)
+               tg3_power_down(tp);
++      else if (system_state == SYSTEM_RESTART &&
++               dmi_first_match(tg3_restart_aer_quirk_table) &&
++               pdev->current_state != PCI_D3cold &&
++               pdev->current_state != PCI_UNKNOWN) {
++              /* Disable PCIe AER on the tg3 to avoid a fatal
++               * error during this system restart.
++               */
++              pcie_capability_clear_word(pdev, PCI_EXP_DEVCTL,
++                                         PCI_EXP_DEVCTL_CERE |
++                                         PCI_EXP_DEVCTL_NFERE |
++                                         PCI_EXP_DEVCTL_FERE |
++                                         PCI_EXP_DEVCTL_URRE);
++      }
+       rtnl_unlock();
+-- 
+2.39.5
+
diff --git a/queue-6.12/tun-revert-fix-group-permission-check.patch b/queue-6.12/tun-revert-fix-group-permission-check.patch
new file mode 100644 (file)
index 0000000..13f3cab
--- /dev/null
@@ -0,0 +1,75 @@
+From 4f698b896e8b8d9bd0a41085295205e6df3e69a5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 4 Feb 2025 11:10:06 -0500
+Subject: tun: revert fix group permission check
+
+From: Willem de Bruijn <willemb@google.com>
+
+[ Upstream commit a70c7b3cbc0688016810bb2e0b9b8a0d6a530045 ]
+
+This reverts commit 3ca459eaba1bf96a8c7878de84fa8872259a01e3.
+
+The blamed commit caused a regression when neither tun->owner nor
+tun->group is set. This is intended to be allowed, but now requires
+CAP_NET_ADMIN.
+
+Discussion in the referenced thread pointed out that the original
+issue that prompted this patch can be resolved in userspace.
+
+The relaxed access control may also make a device accessible when it
+previously wasn't, while existing users may depend on it to not be.
+
+This is a clean pure git revert, except for fixing the indentation on
+the gid_valid line that checkpatch correctly flagged.
+
+Fixes: 3ca459eaba1b ("tun: fix group permission check")
+Link: https://lore.kernel.org/netdev/CAFqZXNtkCBT4f+PwyVRmQGoT3p1eVa01fCG_aNtpt6dakXncUg@mail.gmail.com/
+Signed-off-by: Willem de Bruijn <willemb@google.com>
+Cc: Ondrej Mosnacek <omosnace@redhat.com>
+Cc: Stas Sergeev <stsp2@yandex.ru>
+Link: https://patch.msgid.link/20250204161015.739430-1-willemdebruijn.kernel@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/tun.c | 14 +++++---------
+ 1 file changed, 5 insertions(+), 9 deletions(-)
+
+diff --git a/drivers/net/tun.c b/drivers/net/tun.c
+index 6c24a9ce6c155..fae1a0ab36bdf 100644
+--- a/drivers/net/tun.c
++++ b/drivers/net/tun.c
+@@ -574,18 +574,14 @@ static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb,
+       return ret;
+ }
+-static inline bool tun_capable(struct tun_struct *tun)
++static inline bool tun_not_capable(struct tun_struct *tun)
+ {
+       const struct cred *cred = current_cred();
+       struct net *net = dev_net(tun->dev);
+-      if (ns_capable(net->user_ns, CAP_NET_ADMIN))
+-              return 1;
+-      if (uid_valid(tun->owner) && uid_eq(cred->euid, tun->owner))
+-              return 1;
+-      if (gid_valid(tun->group) && in_egroup_p(tun->group))
+-              return 1;
+-      return 0;
++      return ((uid_valid(tun->owner) && !uid_eq(cred->euid, tun->owner)) ||
++              (gid_valid(tun->group) && !in_egroup_p(tun->group))) &&
++              !ns_capable(net->user_ns, CAP_NET_ADMIN);
+ }
+ static void tun_set_real_num_queues(struct tun_struct *tun)
+@@ -2782,7 +2778,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
+                   !!(tun->flags & IFF_MULTI_QUEUE))
+                       return -EINVAL;
+-              if (!tun_capable(tun))
++              if (tun_not_capable(tun))
+                       return -EPERM;
+               err = security_tun_dev_open(tun->security);
+               if (err < 0)
+-- 
+2.39.5
+
diff --git a/queue-6.12/udp-gso-do-not-drop-small-packets-when-pmtu-reduces.patch b/queue-6.12/udp-gso-do-not-drop-small-packets-when-pmtu-reduces.patch
new file mode 100644 (file)
index 0000000..90d7501
--- /dev/null
@@ -0,0 +1,113 @@
+From 8c70fdd73bebbed1d19ad85be82f8a1f2e212fac Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 31 Jan 2025 00:31:39 -0800
+Subject: udp: gso: do not drop small packets when PMTU reduces
+
+From: Yan Zhai <yan@cloudflare.com>
+
+[ Upstream commit 235174b2bed88501fda689c113c55737f99332d8 ]
+
+Commit 4094871db1d6 ("udp: only do GSO if # of segs > 1") avoided GSO
+for small packets. But the kernel currently dismisses GSO requests only
+after checking MTU/PMTU on gso_size. This means any packets, regardless
+of their payload sizes, could be dropped when PMTU becomes smaller than
+requested gso_size. We encountered this issue in production and it
+caused a reliability problem that new QUIC connection cannot be
+established before PMTU cache expired, while non GSO sockets still
+worked fine at the same time.
+
+Ideally, do not check any GSO related constraints when payload size is
+smaller than requested gso_size, and return EMSGSIZE instead of EINVAL
+on MTU/PMTU check failure to be more specific on the error cause.
+
+Fixes: 4094871db1d6 ("udp: only do GSO if # of segs > 1")
+Signed-off-by: Yan Zhai <yan@cloudflare.com>
+Suggested-by: Willem de Bruijn <willemdebruijn.kernel@gmail.com>
+Reviewed-by: Willem de Bruijn <willemb@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/udp.c                       |  4 ++--
+ net/ipv6/udp.c                       |  4 ++--
+ tools/testing/selftests/net/udpgso.c | 26 ++++++++++++++++++++++++++
+ 3 files changed, 30 insertions(+), 4 deletions(-)
+
+diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
+index d2eeb6fc49b38..8da74dc63061c 100644
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -985,9 +985,9 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4,
+               const int hlen = skb_network_header_len(skb) +
+                                sizeof(struct udphdr);
+-              if (hlen + cork->gso_size > cork->fragsize) {
++              if (hlen + min(datalen, cork->gso_size) > cork->fragsize) {
+                       kfree_skb(skb);
+-                      return -EINVAL;
++                      return -EMSGSIZE;
+               }
+               if (datalen > cork->gso_size * UDP_MAX_SEGMENTS) {
+                       kfree_skb(skb);
+diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
+index 896c9c827a288..197d0ac47592a 100644
+--- a/net/ipv6/udp.c
++++ b/net/ipv6/udp.c
+@@ -1294,9 +1294,9 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6,
+               const int hlen = skb_network_header_len(skb) +
+                                sizeof(struct udphdr);
+-              if (hlen + cork->gso_size > cork->fragsize) {
++              if (hlen + min(datalen, cork->gso_size) > cork->fragsize) {
+                       kfree_skb(skb);
+-                      return -EINVAL;
++                      return -EMSGSIZE;
+               }
+               if (datalen > cork->gso_size * UDP_MAX_SEGMENTS) {
+                       kfree_skb(skb);
+diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c
+index 3f2fca02fec53..36ff28af4b190 100644
+--- a/tools/testing/selftests/net/udpgso.c
++++ b/tools/testing/selftests/net/udpgso.c
+@@ -102,6 +102,19 @@ struct testcase testcases_v4[] = {
+               .gso_len = CONST_MSS_V4,
+               .r_num_mss = 1,
+       },
++      {
++              /* datalen <= MSS < gso_len: will fall back to no GSO */
++              .tlen = CONST_MSS_V4,
++              .gso_len = CONST_MSS_V4 + 1,
++              .r_num_mss = 0,
++              .r_len_last = CONST_MSS_V4,
++      },
++      {
++              /* MSS < datalen < gso_len: fail */
++              .tlen = CONST_MSS_V4 + 1,
++              .gso_len = CONST_MSS_V4 + 2,
++              .tfail = true,
++      },
+       {
+               /* send a single MSS + 1B */
+               .tlen = CONST_MSS_V4 + 1,
+@@ -205,6 +218,19 @@ struct testcase testcases_v6[] = {
+               .gso_len = CONST_MSS_V6,
+               .r_num_mss = 1,
+       },
++      {
++              /* datalen <= MSS < gso_len: will fall back to no GSO */
++              .tlen = CONST_MSS_V6,
++              .gso_len = CONST_MSS_V6 + 1,
++              .r_num_mss = 0,
++              .r_len_last = CONST_MSS_V6,
++      },
++      {
++              /* MSS < datalen < gso_len: fail */
++              .tlen = CONST_MSS_V6 + 1,
++              .gso_len = CONST_MSS_V6 + 2,
++              .tfail = true
++      },
+       {
+               /* send a single MSS + 1B */
+               .tlen = CONST_MSS_V6 + 1,
+-- 
+2.39.5
+
diff --git a/queue-6.12/vmxnet3-fix-tx-queue-race-condition-with-xdp.patch b/queue-6.12/vmxnet3-fix-tx-queue-race-condition-with-xdp.patch
new file mode 100644 (file)
index 0000000..294d716
--- /dev/null
@@ -0,0 +1,114 @@
+From 259ca2ca1b2fd657fdb94df286a9c3e010801fc3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 31 Jan 2025 09:53:41 +0530
+Subject: vmxnet3: Fix tx queue race condition with XDP
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Sankararaman Jayaraman <sankararaman.jayaraman@broadcom.com>
+
+[ Upstream commit 3f1baa91a1fdf3de9dbad4bd615b35fab347874b ]
+
+If XDP traffic runs on a CPU which is greater than or equal to
+the number of the Tx queues of the NIC, then vmxnet3_xdp_get_tq()
+always picks up queue 0 for transmission as it uses reciprocal scale
+instead of simple modulo operation.
+
+vmxnet3_xdp_xmit() and vmxnet3_xdp_xmit_frame() use the above
+returned queue without any locking which can lead to race conditions
+when multiple XDP xmits run in parallel on different CPU's.
+
+This patch uses a simple module scheme when the current CPU equals or
+exceeds the number of Tx queues on the NIC. It also adds locking in
+vmxnet3_xdp_xmit() and vmxnet3_xdp_xmit_frame() functions.
+
+Fixes: 54f00cce1178 ("vmxnet3: Add XDP support.")
+Signed-off-by: Sankararaman Jayaraman <sankararaman.jayaraman@broadcom.com>
+Signed-off-by: Ronak Doshi <ronak.doshi@broadcom.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Link: https://patch.msgid.link/20250131042340.156547-1-sankararaman.jayaraman@broadcom.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/vmxnet3/vmxnet3_xdp.c | 14 ++++++++++++--
+ 1 file changed, 12 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/vmxnet3/vmxnet3_xdp.c b/drivers/net/vmxnet3/vmxnet3_xdp.c
+index 1341374a4588a..616ecc38d1726 100644
+--- a/drivers/net/vmxnet3/vmxnet3_xdp.c
++++ b/drivers/net/vmxnet3/vmxnet3_xdp.c
+@@ -28,7 +28,7 @@ vmxnet3_xdp_get_tq(struct vmxnet3_adapter *adapter)
+       if (likely(cpu < tq_number))
+               tq = &adapter->tx_queue[cpu];
+       else
+-              tq = &adapter->tx_queue[reciprocal_scale(cpu, tq_number)];
++              tq = &adapter->tx_queue[cpu % tq_number];
+       return tq;
+ }
+@@ -124,6 +124,7 @@ vmxnet3_xdp_xmit_frame(struct vmxnet3_adapter *adapter,
+       u32 buf_size;
+       u32 dw2;
++      spin_lock_irq(&tq->tx_lock);
+       dw2 = (tq->tx_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
+       dw2 |= xdpf->len;
+       ctx.sop_txd = tq->tx_ring.base + tq->tx_ring.next2fill;
+@@ -134,6 +135,7 @@ vmxnet3_xdp_xmit_frame(struct vmxnet3_adapter *adapter,
+       if (vmxnet3_cmd_ring_desc_avail(&tq->tx_ring) == 0) {
+               tq->stats.tx_ring_full++;
++              spin_unlock_irq(&tq->tx_lock);
+               return -ENOSPC;
+       }
+@@ -142,8 +144,10 @@ vmxnet3_xdp_xmit_frame(struct vmxnet3_adapter *adapter,
+               tbi->dma_addr = dma_map_single(&adapter->pdev->dev,
+                                              xdpf->data, buf_size,
+                                              DMA_TO_DEVICE);
+-              if (dma_mapping_error(&adapter->pdev->dev, tbi->dma_addr))
++              if (dma_mapping_error(&adapter->pdev->dev, tbi->dma_addr)) {
++                      spin_unlock_irq(&tq->tx_lock);
+                       return -EFAULT;
++              }
+               tbi->map_type |= VMXNET3_MAP_SINGLE;
+       } else { /* XDP buffer from page pool */
+               page = virt_to_page(xdpf->data);
+@@ -182,6 +186,7 @@ vmxnet3_xdp_xmit_frame(struct vmxnet3_adapter *adapter,
+       dma_wmb();
+       gdesc->dword[2] = cpu_to_le32(le32_to_cpu(gdesc->dword[2]) ^
+                                                 VMXNET3_TXD_GEN);
++      spin_unlock_irq(&tq->tx_lock);
+       /* No need to handle the case when tx_num_deferred doesn't reach
+        * threshold. Backend driver at hypervisor side will poll and reset
+@@ -225,6 +230,7 @@ vmxnet3_xdp_xmit(struct net_device *dev,
+ {
+       struct vmxnet3_adapter *adapter = netdev_priv(dev);
+       struct vmxnet3_tx_queue *tq;
++      struct netdev_queue *nq;
+       int i;
+       if (unlikely(test_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state)))
+@@ -236,6 +242,9 @@ vmxnet3_xdp_xmit(struct net_device *dev,
+       if (tq->stopped)
+               return -ENETDOWN;
++      nq = netdev_get_tx_queue(adapter->netdev, tq->qid);
++
++      __netif_tx_lock(nq, smp_processor_id());
+       for (i = 0; i < n; i++) {
+               if (vmxnet3_xdp_xmit_frame(adapter, frames[i], tq, true)) {
+                       tq->stats.xdp_xmit_err++;
+@@ -243,6 +252,7 @@ vmxnet3_xdp_xmit(struct net_device *dev,
+               }
+       }
+       tq->stats.xdp_xmit += i;
++      __netif_tx_unlock(nq);
+       return i;
+ }
+-- 
+2.39.5
+
diff --git a/queue-6.12/x86-xen-add-frame_end-to-xen_hypercall_hvm.patch b/queue-6.12/x86-xen-add-frame_end-to-xen_hypercall_hvm.patch
new file mode 100644 (file)
index 0000000..ab13f28
--- /dev/null
@@ -0,0 +1,38 @@
+From 0d950c23a37d041289c301e693643e5dd0a13d59 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 5 Feb 2025 10:07:56 +0100
+Subject: x86/xen: add FRAME_END to xen_hypercall_hvm()
+
+From: Juergen Gross <jgross@suse.com>
+
+[ Upstream commit 0bd797b801bd8ee06c822844e20d73aaea0878dd ]
+
+xen_hypercall_hvm() is missing a FRAME_END at the end, add it.
+
+Reported-by: kernel test robot <lkp@intel.com>
+Closes: https://lore.kernel.org/oe-kbuild-all/202502030848.HTNTTuo9-lkp@intel.com/
+Fixes: b4845bb63838 ("x86/xen: add central hypercall functions")
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/xen/xen-head.S | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
+index ce96877c3c4fe..55978e0dc1755 100644
+--- a/arch/x86/xen/xen-head.S
++++ b/arch/x86/xen/xen-head.S
+@@ -132,6 +132,7 @@ SYM_FUNC_START(xen_hypercall_hvm)
+       pop %rcx
+       pop %rax
+ #endif
++      FRAME_END
+       /* Use correct hypercall function. */
+       jz xen_hypercall_amd
+       jmp xen_hypercall_intel
+-- 
+2.39.5
+
diff --git a/queue-6.12/x86-xen-fix-xen_hypercall_hvm-to-not-clobber-rbx.patch b/queue-6.12/x86-xen-fix-xen_hypercall_hvm-to-not-clobber-rbx.patch
new file mode 100644 (file)
index 0000000..7c9bda6
--- /dev/null
@@ -0,0 +1,44 @@
+From 92662edb9522c377f8a707fac35e2ba2cd8b98d4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 5 Feb 2025 09:43:31 +0100
+Subject: x86/xen: fix xen_hypercall_hvm() to not clobber %rbx
+
+From: Juergen Gross <jgross@suse.com>
+
+[ Upstream commit 98a5cfd2320966f40fe049a9855f8787f0126825 ]
+
+xen_hypercall_hvm(), which is used when running as a Xen PVH guest at
+most only once during early boot, is clobbering %rbx. Depending on
+whether the caller relies on %rbx to be preserved across the call or
+not, this clobbering might result in an early crash of the system.
+
+This can be avoided by using an already saved register instead of %rbx.
+
+Fixes: b4845bb63838 ("x86/xen: add central hypercall functions")
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/xen/xen-head.S | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
+index 721a57700a3b0..ce96877c3c4fe 100644
+--- a/arch/x86/xen/xen-head.S
++++ b/arch/x86/xen/xen-head.S
+@@ -117,8 +117,8 @@ SYM_FUNC_START(xen_hypercall_hvm)
+       pop %ebx
+       pop %eax
+ #else
+-      lea xen_hypercall_amd(%rip), %rbx
+-      cmp %rax, %rbx
++      lea xen_hypercall_amd(%rip), %rcx
++      cmp %rax, %rcx
+ #ifdef CONFIG_FRAME_POINTER
+       pop %rax        /* Dummy pop. */
+ #endif
+-- 
+2.39.5
+