6.16-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sun, 21 Sep 2025 13:01:24 +0000 (15:01 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sun, 21 Sep 2025 13:01:24 +0000 (15:01 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 21 Sep 2025 13:01:24 +0000 (15:01 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 21 Sep 2025 13:01:24 +0000 (15:01 +0200)
diff --git a/queue-6.16/alsa-hda-realtek-fix-mute-led-for-hp-laptop-15-dw4xx.patch b/queue-6.16/alsa-hda-realtek-fix-mute-led-for-hp-laptop-15-dw4xx.patch

new file mode 100644 (file)

index 0000000..5bdd08c
--- /dev/null
+++ b/queue-6.16/alsa-hda-realtek-fix-mute-led-for-hp-laptop-15-dw4xx.patch
@@ -0,0 +1,30 @@
+From d33c3471047fc54966621d19329e6a23ebc8ec50 Mon Sep 17 00:00:00 2001
+From: Praful Adiga <praful.adiga@gmail.com>
+Date: Thu, 18 Sep 2025 12:40:18 -0400
+Subject: ALSA: hda/realtek: Fix mute led for HP Laptop 15-dw4xx
+
+From: Praful Adiga <praful.adiga@gmail.com>
+
+commit d33c3471047fc54966621d19329e6a23ebc8ec50 upstream.
+
+This laptop uses the ALC236 codec with COEF 0x7 and idx 1 to
+control the mute LED. Enable the existing quirk for this device.
+
+Signed-off-by: Praful Adiga <praful.adiga@gmail.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ sound/pci/hda/patch_realtek.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/sound/pci/hda/patch_realtek.c
++++ b/sound/pci/hda/patch_realtek.c
+@@ -10752,6 +10752,7 @@ static const struct hda_quirk alc269_fix
+       SND_PCI_QUIRK(0x103c, 0x8992, "HP EliteBook 845 G9", ALC287_FIXUP_CS35L41_I2C_2),
+       SND_PCI_QUIRK(0x103c, 0x8994, "HP EliteBook 855 G9", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED),
+       SND_PCI_QUIRK(0x103c, 0x8995, "HP EliteBook 855 G9", ALC287_FIXUP_CS35L41_I2C_2),
++      SND_PCI_QUIRK(0x103c, 0x89a0, "HP Laptop 15-dw4xxx", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2),
+       SND_PCI_QUIRK(0x103c, 0x89a4, "HP ProBook 440 G9", ALC236_FIXUP_HP_GPIO_LED),
+       SND_PCI_QUIRK(0x103c, 0x89a6, "HP ProBook 450 G9", ALC236_FIXUP_HP_GPIO_LED),
+       SND_PCI_QUIRK(0x103c, 0x89aa, "HP EliteBook 630 G9", ALC236_FIXUP_HP_GPIO_LED),
diff --git a/queue-6.16/asoc-qcom-audioreach-fix-lpaif_type-configuration-for-the-i2s-interface.patch b/queue-6.16/asoc-qcom-audioreach-fix-lpaif_type-configuration-for-the-i2s-interface.patch

new file mode 100644 (file)

index 0000000..d7307ef
--- /dev/null
+++ b/queue-6.16/asoc-qcom-audioreach-fix-lpaif_type-configuration-for-the-i2s-interface.patch
@@ -0,0 +1,34 @@
+From 5f1af203ef964e7f7bf9d32716dfa5f332cc6f09 Mon Sep 17 00:00:00 2001
+From: Mohammad Rafi Shaik <mohammad.rafi.shaik@oss.qualcomm.com>
+Date: Mon, 8 Sep 2025 11:06:29 +0530
+Subject: ASoC: qcom: audioreach: Fix lpaif_type configuration for the I2S interface
+
+From: Mohammad Rafi Shaik <mohammad.rafi.shaik@oss.qualcomm.com>
+
+commit 5f1af203ef964e7f7bf9d32716dfa5f332cc6f09 upstream.
+
+Fix missing lpaif_type configuration for the I2S interface.
+The proper lpaif interface type required to allow DSP to vote
+appropriate clock setting for I2S interface.
+
+Fixes: 25ab80db6b133 ("ASoC: qdsp6: audioreach: add module configuration command helpers")
+Cc: stable@vger.kernel.org
+Reviewed-by: Srinivas Kandagatla <srinivas.kandagatla@oss.qualcomm.com>
+Signed-off-by: Mohammad Rafi Shaik <mohammad.rafi.shaik@oss.qualcomm.com>
+Message-ID: <20250908053631.70978-2-mohammad.rafi.shaik@oss.qualcomm.com>
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ sound/soc/qcom/qdsp6/audioreach.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/sound/soc/qcom/qdsp6/audioreach.c
++++ b/sound/soc/qcom/qdsp6/audioreach.c
+@@ -971,6 +971,7 @@ static int audioreach_i2s_set_media_form
+       param_data->param_id = PARAM_ID_I2S_INTF_CFG;
+       param_data->param_size = ic_sz - APM_MODULE_PARAM_DATA_SIZE;
+ 
++      intf_cfg->cfg.lpaif_type = module->hw_interface_type;
+       intf_cfg->cfg.intf_idx = module->hw_interface_idx;
+       intf_cfg->cfg.sd_line_idx = module->sd_line_idx;
+ 
diff --git a/queue-6.16/asoc-qcom-q6apm-lpass-dais-fix-missing-set_fmt-dai-op-for-i2s.patch b/queue-6.16/asoc-qcom-q6apm-lpass-dais-fix-missing-set_fmt-dai-op-for-i2s.patch

new file mode 100644 (file)

index 0000000..ce8a42c
--- /dev/null
+++ b/queue-6.16/asoc-qcom-q6apm-lpass-dais-fix-missing-set_fmt-dai-op-for-i2s.patch
@@ -0,0 +1,35 @@
+From 33b55b94bca904ca25a9585e3cd43d15f0467969 Mon Sep 17 00:00:00 2001
+From: Mohammad Rafi Shaik <mohammad.rafi.shaik@oss.qualcomm.com>
+Date: Mon, 8 Sep 2025 11:06:30 +0530
+Subject: ASoC: qcom: q6apm-lpass-dais: Fix missing set_fmt DAI op for I2S
+
+From: Mohammad Rafi Shaik <mohammad.rafi.shaik@oss.qualcomm.com>
+
+commit 33b55b94bca904ca25a9585e3cd43d15f0467969 upstream.
+
+The q6i2s_set_fmt() function was defined but never linked into the
+I2S DAI operations, resulting DAI format settings is being ignored
+during stream setup. This change fixes the issue by properly linking
+the .set_fmt handler within the DAI ops.
+
+Fixes: 30ad723b93ade ("ASoC: qdsp6: audioreach: add q6apm lpass dai support")
+Cc: stable@vger.kernel.org
+Reviewed-by: Srinivas Kandagatla <srinivas.kandagatla@oss.qualcomm.com>
+Signed-off-by: Mohammad Rafi Shaik <mohammad.rafi.shaik@oss.qualcomm.com>
+Message-ID: <20250908053631.70978-3-mohammad.rafi.shaik@oss.qualcomm.com>
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ sound/soc/qcom/qdsp6/q6apm-lpass-dais.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/sound/soc/qcom/qdsp6/q6apm-lpass-dais.c
++++ b/sound/soc/qcom/qdsp6/q6apm-lpass-dais.c
+@@ -262,6 +262,7 @@ static const struct snd_soc_dai_ops q6i2
+       .shutdown       = q6apm_lpass_dai_shutdown,
+       .set_channel_map  = q6dma_set_channel_map,
+       .hw_params        = q6dma_hw_params,
++      .set_fmt        = q6i2s_set_fmt,
+ };
+ 
+ static const struct snd_soc_dai_ops q6hdmi_ops = {
diff --git a/queue-6.16/asoc-qcom-q6apm-lpass-dais-fix-null-pointer-dereference-if-source-graph-failed.patch b/queue-6.16/asoc-qcom-q6apm-lpass-dais-fix-null-pointer-dereference-if-source-graph-failed.patch

new file mode 100644 (file)

index 0000000..eb8f40b
--- /dev/null
+++ b/queue-6.16/asoc-qcom-q6apm-lpass-dais-fix-null-pointer-dereference-if-source-graph-failed.patch
@@ -0,0 +1,55 @@
+From 68f27f7c7708183e7873c585ded2f1b057ac5b97 Mon Sep 17 00:00:00 2001
+From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+Date: Thu, 4 Sep 2025 12:18:50 +0200
+Subject: ASoC: qcom: q6apm-lpass-dais: Fix NULL pointer dereference if source graph failed
+
+From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+
+commit 68f27f7c7708183e7873c585ded2f1b057ac5b97 upstream.
+
+If earlier opening of source graph fails (e.g. ADSP rejects due to
+incorrect audioreach topology), the graph is closed and
+"dai_data->graph[dai->id]" is assigned NULL.  Preparing the DAI for sink
+graph continues though and next call to q6apm_lpass_dai_prepare()
+receives dai_data->graph[dai->id]=NULL leading to NULL pointer
+exception:
+
+  qcom-apm gprsvc:service:2:1: Error (1) Processing 0x01001002 cmd
+  qcom-apm gprsvc:service:2:1: DSP returned error[1001002] 1
+  q6apm-lpass-dais 30000000.remoteproc:glink-edge:gpr:service@1:bedais: fail to start APM port 78
+  q6apm-lpass-dais 30000000.remoteproc:glink-edge:gpr:service@1:bedais: ASoC: error at snd_soc_pcm_dai_prepare on TX_CODEC_DMA_TX_3: -22
+  Unable to handle kernel NULL pointer dereference at virtual address 00000000000000a8
+  ...
+  Call trace:
+   q6apm_graph_media_format_pcm+0x48/0x120 (P)
+   q6apm_lpass_dai_prepare+0x110/0x1b4
+   snd_soc_pcm_dai_prepare+0x74/0x108
+   __soc_pcm_prepare+0x44/0x160
+   dpcm_be_dai_prepare+0x124/0x1c0
+
+Fixes: 30ad723b93ad ("ASoC: qdsp6: audioreach: add q6apm lpass dai support")
+Cc: stable@vger.kernel.org
+Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+Reviewed-by: Srinivas Kandagatla <srinivas.kandagatla@oss.qualcomm.com>
+Message-ID: <20250904101849.121503-2-krzysztof.kozlowski@linaro.org>
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ sound/soc/qcom/qdsp6/q6apm-lpass-dais.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/sound/soc/qcom/qdsp6/q6apm-lpass-dais.c
++++ b/sound/soc/qcom/qdsp6/q6apm-lpass-dais.c
+@@ -213,8 +213,10 @@ static int q6apm_lpass_dai_prepare(struc
+ 
+       return 0;
+ err:
+-      q6apm_graph_close(dai_data->graph[dai->id]);
+-      dai_data->graph[dai->id] = NULL;
++      if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
++              q6apm_graph_close(dai_data->graph[dai->id]);
++              dai_data->graph[dai->id] = NULL;
++      }
+       return rc;
+ }
+ 
diff --git a/queue-6.16/asoc-sdca-add-quirk-for-incorrect-function-types-for-3-systems.patch b/queue-6.16/asoc-sdca-add-quirk-for-incorrect-function-types-for-3-systems.patch

new file mode 100644 (file)

index 0000000..7929fc5
--- /dev/null
+++ b/queue-6.16/asoc-sdca-add-quirk-for-incorrect-function-types-for-3-systems.patch
@@ -0,0 +1,105 @@
+From 28edfaa10ca1b370b1a27fde632000d35c43402c Mon Sep 17 00:00:00 2001
+From: Maciej Strozek <mstrozek@opensource.cirrus.com>
+Date: Mon, 1 Sep 2025 16:15:07 +0100
+Subject: ASoC: SDCA: Add quirk for incorrect function types for 3 systems
+
+From: Maciej Strozek <mstrozek@opensource.cirrus.com>
+
+commit 28edfaa10ca1b370b1a27fde632000d35c43402c upstream.
+
+Certain systems have CS42L43 DisCo that claims to conform to version 0.6.28
+but uses the function types from the 1.0 spec. Add a quirk as a workaround.
+
+Closes: https://github.com/thesofproject/linux/issues/5515
+Cc: stable@vger.kernel.org
+Signed-off-by: Maciej Strozek <mstrozek@opensource.cirrus.com>
+Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.dev>
+Link: https://patch.msgid.link/20250901151518.3197941-1-mstrozek@opensource.cirrus.com
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/sound/sdca.h            |    1 +
+ sound/soc/sdca/sdca_device.c    |   20 ++++++++++++++++++++
+ sound/soc/sdca/sdca_functions.c |   13 ++++++++-----
+ 3 files changed, 29 insertions(+), 5 deletions(-)
+
+--- a/include/sound/sdca.h
++++ b/include/sound/sdca.h
+@@ -46,6 +46,7 @@ struct sdca_device_data {
+ 
+ enum sdca_quirk {
+       SDCA_QUIRKS_RT712_VB,
++      SDCA_QUIRKS_SKIP_FUNC_TYPE_PATCHING,
+ };
+ 
+ #if IS_ENABLED(CONFIG_ACPI) && IS_ENABLED(CONFIG_SND_SOC_SDCA)
+--- a/sound/soc/sdca/sdca_device.c
++++ b/sound/soc/sdca/sdca_device.c
+@@ -7,6 +7,7 @@
+  */
+ 
+ #include <linux/acpi.h>
++#include <linux/dmi.h>
+ #include <linux/module.h>
+ #include <linux/property.h>
+ #include <linux/soundwire/sdw.h>
+@@ -55,11 +56,30 @@ static bool sdca_device_quirk_rt712_vb(s
+       return false;
+ }
+ 
++static bool sdca_device_quirk_skip_func_type_patching(struct sdw_slave *slave)
++{
++      const char *vendor, *sku;
++
++      vendor = dmi_get_system_info(DMI_SYS_VENDOR);
++      sku = dmi_get_system_info(DMI_PRODUCT_SKU);
++
++      if (vendor && sku &&
++          !strcmp(vendor, "Dell Inc.") &&
++          (!strcmp(sku, "0C62") || !strcmp(sku, "0C63") || !strcmp(sku, "0C6B")) &&
++          slave->sdca_data.interface_revision == 0x061c &&
++          slave->id.mfg_id == 0x01fa && slave->id.part_id == 0x4243)
++              return true;
++
++      return false;
++}
++
+ bool sdca_device_quirk_match(struct sdw_slave *slave, enum sdca_quirk quirk)
+ {
+       switch (quirk) {
+       case SDCA_QUIRKS_RT712_VB:
+               return sdca_device_quirk_rt712_vb(slave);
++      case SDCA_QUIRKS_SKIP_FUNC_TYPE_PATCHING:
++              return sdca_device_quirk_skip_func_type_patching(slave);
+       default:
+               break;
+       }
+--- a/sound/soc/sdca/sdca_functions.c
++++ b/sound/soc/sdca/sdca_functions.c
+@@ -89,6 +89,7 @@ static int find_sdca_function(struct acp
+ {
+       struct fwnode_handle *function_node = acpi_fwnode_handle(adev);
+       struct sdca_device_data *sdca_data = data;
++      struct sdw_slave *slave = container_of(sdca_data, struct sdw_slave, sdca_data);
+       struct device *dev = &adev->dev;
+       struct fwnode_handle *control5; /* used to identify function type */
+       const char *function_name;
+@@ -136,11 +137,13 @@ static int find_sdca_function(struct acp
+               return ret;
+       }
+ 
+-      ret = patch_sdca_function_type(sdca_data->interface_revision, &function_type);
+-      if (ret < 0) {
+-              dev_err(dev, "SDCA version %#x invalid function type %d\n",
+-                      sdca_data->interface_revision, function_type);
+-              return ret;
++      if (!sdca_device_quirk_match(slave, SDCA_QUIRKS_SKIP_FUNC_TYPE_PATCHING)) {
++              ret = patch_sdca_function_type(sdca_data->interface_revision, &function_type);
++              if (ret < 0) {
++                      dev_err(dev, "SDCA version %#x invalid function type %d\n",
++                              sdca_data->interface_revision, function_type);
++                      return ret;
++              }
+       }
+ 
+       function_name = get_sdca_function_name(function_type);
diff --git a/queue-6.16/btrfs-initialize-inode-file_extent_tree-after-i_mode-has-been-set.patch b/queue-6.16/btrfs-initialize-inode-file_extent_tree-after-i_mode-has-been-set.patch

new file mode 100644 (file)

index 0000000..0e00f62
--- /dev/null
+++ b/queue-6.16/btrfs-initialize-inode-file_extent_tree-after-i_mode-has-been-set.patch
@@ -0,0 +1,82 @@
+From 8679d2687c351824d08cf1f0e86f3b65f22a00fe Mon Sep 17 00:00:00 2001
+From: austinchang <austinchang@synology.com>
+Date: Thu, 11 Sep 2025 06:06:29 +0000
+Subject: btrfs: initialize inode::file_extent_tree after i_mode has been set
+
+From: austinchang <austinchang@synology.com>
+
+commit 8679d2687c351824d08cf1f0e86f3b65f22a00fe upstream.
+
+btrfs_init_file_extent_tree() uses S_ISREG() to determine if the file is
+a regular file. In the beginning of btrfs_read_locked_inode(), the i_mode
+hasn't been read from inode item, then file_extent_tree won't be used at
+all in volumes without NO_HOLES.
+
+Fix this by calling btrfs_init_file_extent_tree() after i_mode is
+initialized in btrfs_read_locked_inode().
+
+Fixes: 3d7db6e8bd22e6 ("btrfs: don't allocate file extent tree for non regular files")
+CC: stable@vger.kernel.org # 6.12+
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: austinchang <austinchang@synology.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/delayed-inode.c |    3 ---
+ fs/btrfs/inode.c         |   11 +++++------
+ 2 files changed, 5 insertions(+), 9 deletions(-)
+
+--- a/fs/btrfs/delayed-inode.c
++++ b/fs/btrfs/delayed-inode.c
+@@ -1843,7 +1843,6 @@ static void fill_stack_inode_item(struct
+ 
+ int btrfs_fill_inode(struct btrfs_inode *inode, u32 *rdev)
+ {
+-      struct btrfs_fs_info *fs_info = inode->root->fs_info;
+       struct btrfs_delayed_node *delayed_node;
+       struct btrfs_inode_item *inode_item;
+       struct inode *vfs_inode = &inode->vfs_inode;
+@@ -1864,8 +1863,6 @@ int btrfs_fill_inode(struct btrfs_inode
+       i_uid_write(vfs_inode, btrfs_stack_inode_uid(inode_item));
+       i_gid_write(vfs_inode, btrfs_stack_inode_gid(inode_item));
+       btrfs_i_size_write(inode, btrfs_stack_inode_size(inode_item));
+-      btrfs_inode_set_file_extent_range(inode, 0,
+-                      round_up(i_size_read(vfs_inode), fs_info->sectorsize));
+       vfs_inode->i_mode = btrfs_stack_inode_mode(inode_item);
+       set_nlink(vfs_inode, btrfs_stack_inode_nlink(inode_item));
+       inode_set_bytes(vfs_inode, btrfs_stack_inode_nbytes(inode_item));
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -3881,10 +3881,6 @@ static int btrfs_read_locked_inode(struc
+       bool filled = false;
+       int first_xattr_slot;
+ 
+-      ret = btrfs_init_file_extent_tree(inode);
+-      if (ret)
+-              goto out;
+-
+       ret = btrfs_fill_inode(inode, &rdev);
+       if (!ret)
+               filled = true;
+@@ -3916,8 +3912,6 @@ static int btrfs_read_locked_inode(struc
+       i_uid_write(vfs_inode, btrfs_inode_uid(leaf, inode_item));
+       i_gid_write(vfs_inode, btrfs_inode_gid(leaf, inode_item));
+       btrfs_i_size_write(inode, btrfs_inode_size(leaf, inode_item));
+-      btrfs_inode_set_file_extent_range(inode, 0,
+-                      round_up(i_size_read(vfs_inode), fs_info->sectorsize));
+ 
+       inode_set_atime(vfs_inode, btrfs_timespec_sec(leaf, &inode_item->atime),
+                       btrfs_timespec_nsec(leaf, &inode_item->atime));
+@@ -3948,6 +3942,11 @@ static int btrfs_read_locked_inode(struc
+       btrfs_update_inode_mapping_flags(inode);
+ 
+ cache_index:
++      ret = btrfs_init_file_extent_tree(inode);
++      if (ret)
++              goto out;
++      btrfs_inode_set_file_extent_range(inode, 0,
++                      round_up(i_size_read(vfs_inode), fs_info->sectorsize));
+       /*
+        * If we were modified in the current generation and evicted from memory
+        * and then re-read we need to do a full sync since we don't have any
diff --git a/queue-6.16/btrfs-tree-checker-fix-the-incorrect-inode-ref-size-check.patch b/queue-6.16/btrfs-tree-checker-fix-the-incorrect-inode-ref-size-check.patch

new file mode 100644 (file)

index 0000000..55ec66a
--- /dev/null
+++ b/queue-6.16/btrfs-tree-checker-fix-the-incorrect-inode-ref-size-check.patch
@@ -0,0 +1,52 @@
+From 96fa515e70f3e4b98685ef8cac9d737fc62f10e1 Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Tue, 16 Sep 2025 07:54:06 +0930
+Subject: btrfs: tree-checker: fix the incorrect inode ref size check
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit 96fa515e70f3e4b98685ef8cac9d737fc62f10e1 upstream.
+
+[BUG]
+Inside check_inode_ref(), we need to make sure every structure,
+including the btrfs_inode_extref header, is covered by the item.  But
+our code is incorrectly using "sizeof(iref)", where @iref is just a
+pointer.
+
+This means "sizeof(iref)" will always be "sizeof(void *)", which is much
+smaller than "sizeof(struct btrfs_inode_extref)".
+
+This will allow some bad inode extrefs to sneak in, defeating tree-checker.
+
+[FIX]
+Fix the typo by calling "sizeof(*iref)", which is the same as
+"sizeof(struct btrfs_inode_extref)", and will be the correct behavior we
+want.
+
+Fixes: 71bf92a9b877 ("btrfs: tree-checker: Add check for INODE_REF")
+CC: stable@vger.kernel.org # 6.1+
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/tree-checker.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/fs/btrfs/tree-checker.c
++++ b/fs/btrfs/tree-checker.c
+@@ -1756,10 +1756,10 @@ static int check_inode_ref(struct extent
+       while (ptr < end) {
+               u16 namelen;
+ 
+-              if (unlikely(ptr + sizeof(iref) > end)) {
++              if (unlikely(ptr + sizeof(*iref) > end)) {
+                       inode_ref_err(leaf, slot,
+                       "inode ref overflow, ptr %lu end %lu inode_ref_size %zu",
+-                              ptr, end, sizeof(iref));
++                              ptr, end, sizeof(*iref));
+                       return -EUCLEAN;
+               }
+ 
diff --git a/queue-6.16/crypto-af_alg-disallow-concurrent-writes-in-af_alg_sendmsg.patch b/queue-6.16/crypto-af_alg-disallow-concurrent-writes-in-af_alg_sendmsg.patch

new file mode 100644 (file)

index 0000000..b0a6ff0
--- /dev/null
+++ b/queue-6.16/crypto-af_alg-disallow-concurrent-writes-in-af_alg_sendmsg.patch
@@ -0,0 +1,76 @@
+From 1b34cbbf4f011a121ef7b2d7d6e6920a036d5285 Mon Sep 17 00:00:00 2001
+From: Herbert Xu <herbert@gondor.apana.org.au>
+Date: Tue, 16 Sep 2025 17:20:59 +0800
+Subject: crypto: af_alg - Disallow concurrent writes in af_alg_sendmsg
+
+From: Herbert Xu <herbert@gondor.apana.org.au>
+
+commit 1b34cbbf4f011a121ef7b2d7d6e6920a036d5285 upstream.
+
+Issuing two writes to the same af_alg socket is bogus as the
+data will be interleaved in an unpredictable fashion.  Furthermore,
+concurrent writes may create inconsistencies in the internal
+socket state.
+
+Disallow this by adding a new ctx->write field that indiciates
+exclusive ownership for writing.
+
+Fixes: 8ff590903d5 ("crypto: algif_skcipher - User-space interface for skcipher operations")
+Reported-by: Muhammad Alifa Ramdhan <ramdhan@starlabs.sg>
+Reported-by: Bing-Jhong Billy Jheng <billy@starlabs.sg>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ crypto/af_alg.c         |    7 +++++++
+ include/crypto/if_alg.h |   10 ++++++----
+ 2 files changed, 13 insertions(+), 4 deletions(-)
+
+--- a/crypto/af_alg.c
++++ b/crypto/af_alg.c
+@@ -970,6 +970,12 @@ int af_alg_sendmsg(struct socket *sock,
+       }
+ 
+       lock_sock(sk);
++      if (ctx->write) {
++              release_sock(sk);
++              return -EBUSY;
++      }
++      ctx->write = true;
++
+       if (ctx->init && !ctx->more) {
+               if (ctx->used) {
+                       err = -EINVAL;
+@@ -1104,6 +1110,7 @@ int af_alg_sendmsg(struct socket *sock,
+ 
+ unlock:
+       af_alg_data_wakeup(sk);
++      ctx->write = false;
+       release_sock(sk);
+ 
+       return copied ?: err;
+--- a/include/crypto/if_alg.h
++++ b/include/crypto/if_alg.h
+@@ -135,6 +135,7 @@ struct af_alg_async_req {
+  *                    SG?
+  * @enc:              Cryptographic operation to be performed when
+  *                    recvmsg is invoked.
++ * @write:            True if we are in the middle of a write.
+  * @init:             True if metadata has been sent.
+  * @len:              Length of memory allocated for this data structure.
+  * @inflight:         Non-zero when AIO requests are in flight.
+@@ -151,10 +152,11 @@ struct af_alg_ctx {
+       size_t used;
+       atomic_t rcvused;
+ 
+-      bool more;
+-      bool merge;
+-      bool enc;
+-      bool init;
++      u32             more:1,
++                      merge:1,
++                      enc:1,
++                      write:1,
++                      init:1;
+ 
+       unsigned int len;
+ 
diff --git a/queue-6.16/crypto-ccp-always-pass-in-an-error-pointer-to-__sev_platform_shutdown_locked.patch b/queue-6.16/crypto-ccp-always-pass-in-an-error-pointer-to-__sev_platform_shutdown_locked.patch

new file mode 100644 (file)

index 0000000..46301ba
--- /dev/null
+++ b/queue-6.16/crypto-ccp-always-pass-in-an-error-pointer-to-__sev_platform_shutdown_locked.patch
@@ -0,0 +1,102 @@
+From 46834d90a9a13549264b9581067d8f746b4b36cc Mon Sep 17 00:00:00 2001
+From: "Borislav Petkov (AMD)" <bp@alien8.de>
+Date: Sat, 6 Sep 2025 14:21:45 +0200
+Subject: crypto: ccp - Always pass in an error pointer to __sev_platform_shutdown_locked()
+
+From: Borislav Petkov (AMD) <bp@alien8.de>
+
+commit 46834d90a9a13549264b9581067d8f746b4b36cc upstream.
+
+When
+
+  9770b428b1a2 ("crypto: ccp - Move dev_info/err messages for SEV/SNP init and shutdown")
+
+moved the error messages dumping so that they don't need to be issued by
+the callers, it missed the case where __sev_firmware_shutdown() calls
+__sev_platform_shutdown_locked() with a NULL argument which leads to
+a NULL ptr deref on the shutdown path, during suspend to disk:
+
+  #PF: supervisor read access in kernel mode
+  #PF: error_code(0x0000) - not-present page
+  PGD 0 P4D 0
+  Oops: Oops: 0000 [#1] SMP NOPTI
+  CPU: 0 UID: 0 PID: 983 Comm: hib.sh Not tainted 6.17.0-rc4+ #1 PREEMPT(voluntary)
+  Hardware name: Supermicro Super Server/H12SSL-i, BIOS 2.5 09/08/2022
+  RIP: 0010:__sev_platform_shutdown_locked.cold+0x0/0x21 [ccp]
+
+That rIP is:
+
+  00000000000006fd <__sev_platform_shutdown_locked.cold>:
+   6fd:   8b 13                   mov    (%rbx),%edx
+   6ff:   48 8b 7d 00             mov    0x0(%rbp),%rdi
+   703:   89 c1                   mov    %eax,%ecx
+
+  Code: 74 05 31 ff 41 89 3f 49 8b 3e 89 ea 48 c7 c6 a0 8e 54 a0 41 bf 92 ff ff ff e8 e5 2e 09 e1 c6 05 2a d4 38 00 01 e9 26 af ff ff <8b> 13 48 8b 7d 00 89 c1 48 c7 c6 18 90 54 a0 89 44 24 04 e8 c1 2e
+  RSP: 0018:ffffc90005467d00 EFLAGS: 00010282
+  RAX: 00000000ffffff92 RBX: 0000000000000000 RCX: 0000000000000000
+                            ^^^^^^^^^^^^^^^^
+and %rbx is nice and clean.
+
+  Call Trace:
+   <TASK>
+   __sev_firmware_shutdown.isra.0
+   sev_dev_destroy
+   psp_dev_destroy
+   sp_destroy
+   pci_device_shutdown
+   device_shutdown
+   kernel_power_off
+   hibernate.cold
+   state_store
+   kernfs_fop_write_iter
+   vfs_write
+   ksys_write
+   do_syscall_64
+   entry_SYSCALL_64_after_hwframe
+
+Pass in a pointer to the function-local error var in the caller.
+
+With that addressed, suspending the ccp shows the error properly at
+least:
+
+  ccp 0000:47:00.1: sev command 0x2 timed out, disabling PSP
+  ccp 0000:47:00.1: SEV: failed to SHUTDOWN error 0x0, rc -110
+  SEV-SNP: Leaking PFN range 0x146800-0x146a00
+  SEV-SNP: PFN 0x146800 unassigned, dumping non-zero entries in 2M PFN region: [0x146800 - 0x146a00]
+  ...
+  ccp 0000:47:00.1: SEV-SNP firmware shutdown failed, rc -16, error 0x0
+  ACPI: PM: Preparing to enter system sleep state S5
+  kvm: exiting hardware virtualization
+  reboot: Power down
+
+Btw, this driver is crying to be cleaned up to pass in a proper I/O
+struct which can be used to store information between the different
+functions, otherwise stuff like that will happen in the future again.
+
+Fixes: 9770b428b1a2 ("crypto: ccp - Move dev_info/err messages for SEV/SNP init and shutdown")
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Cc: <stable@kernel.org>
+Reviewed-by: Ashish Kalra <ashish.kalra@amd.com>
+Acked-by: Tom Lendacky <thomas.lendacky@amd.com>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/crypto/ccp/sev-dev.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c
+index e058ba027792..9f5ccc1720cb 100644
+--- a/drivers/crypto/ccp/sev-dev.c
++++ b/drivers/crypto/ccp/sev-dev.c
+@@ -2430,7 +2430,7 @@ static void __sev_firmware_shutdown(struct sev_device *sev, bool panic)
+ {
+       int error;
+ 
+-      __sev_platform_shutdown_locked(NULL);
++      __sev_platform_shutdown_locked(&error);
+ 
+       if (sev_es_tmr) {
+               /*
+-- 
+2.51.0
+
diff --git a/queue-6.16/dm-raid-don-t-set-io_min-and-io_opt-for-raid1.patch b/queue-6.16/dm-raid-don-t-set-io_min-and-io_opt-for-raid1.patch

new file mode 100644 (file)

index 0000000..fe73958
--- /dev/null
+++ b/queue-6.16/dm-raid-don-t-set-io_min-and-io_opt-for-raid1.patch
@@ -0,0 +1,46 @@
+From a86556264696b797d94238d99d8284d0d34ed960 Mon Sep 17 00:00:00 2001
+From: Mikulas Patocka <mpatocka@redhat.com>
+Date: Mon, 15 Sep 2025 16:12:40 +0200
+Subject: dm-raid: don't set io_min and io_opt for raid1
+
+From: Mikulas Patocka <mpatocka@redhat.com>
+
+commit a86556264696b797d94238d99d8284d0d34ed960 upstream.
+
+These commands
+ modprobe brd rd_size=1048576
+ vgcreate vg /dev/ram*
+ lvcreate -m4 -L10 -n lv vg
+trigger the following warnings:
+device-mapper: table: 252:10: adding target device (start sect 0 len 24576) caused an alignment inconsistency
+device-mapper: table: 252:10: adding target device (start sect 0 len 24576) caused an alignment inconsistency
+
+The warnings are caused by the fact that io_min is 512 and physical block
+size is 4096.
+
+If there's chunk-less raid, such as raid1, io_min shouldn't be set to zero
+because it would be raised to 512 and it would trigger the warning.
+
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-raid.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/drivers/md/dm-raid.c
++++ b/drivers/md/dm-raid.c
+@@ -3810,8 +3810,10 @@ static void raid_io_hints(struct dm_targ
+       struct raid_set *rs = ti->private;
+       unsigned int chunk_size_bytes = to_bytes(rs->md.chunk_sectors);
+ 
+-      limits->io_min = chunk_size_bytes;
+-      limits->io_opt = chunk_size_bytes * mddev_data_stripes(rs);
++      if (chunk_size_bytes) {
++              limits->io_min = chunk_size_bytes;
++              limits->io_opt = chunk_size_bytes * mddev_data_stripes(rs);
++      }
+ }
+ 
+ static void raid_presuspend(struct dm_target *ti)
diff --git a/queue-6.16/dm-stripe-fix-a-possible-integer-overflow.patch b/queue-6.16/dm-stripe-fix-a-possible-integer-overflow.patch

new file mode 100644 (file)

index 0000000..aa7c767
--- /dev/null
+++ b/queue-6.16/dm-stripe-fix-a-possible-integer-overflow.patch
@@ -0,0 +1,43 @@
+From 1071d560afb4c245c2076494226df47db5a35708 Mon Sep 17 00:00:00 2001
+From: Mikulas Patocka <mpatocka@redhat.com>
+Date: Mon, 11 Aug 2025 13:17:32 +0200
+Subject: dm-stripe: fix a possible integer overflow
+
+From: Mikulas Patocka <mpatocka@redhat.com>
+
+commit 1071d560afb4c245c2076494226df47db5a35708 upstream.
+
+There's a possible integer overflow in stripe_io_hints if we have too
+large chunk size. Test if the overflow happened, and if it did, don't set
+limits->io_min and limits->io_opt;
+
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+Reviewed-by: John Garry <john.g.garry@oracle.com>
+Suggested-by: Dongsheng Yang <dongsheng.yang@linux.dev>
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-stripe.c |   10 +++++++---
+ 1 file changed, 7 insertions(+), 3 deletions(-)
+
+--- a/drivers/md/dm-stripe.c
++++ b/drivers/md/dm-stripe.c
+@@ -456,11 +456,15 @@ static void stripe_io_hints(struct dm_ta
+                           struct queue_limits *limits)
+ {
+       struct stripe_c *sc = ti->private;
+-      unsigned int chunk_size = sc->chunk_size << SECTOR_SHIFT;
++      unsigned int io_min, io_opt;
+ 
+       limits->chunk_sectors = sc->chunk_size;
+-      limits->io_min = chunk_size;
+-      limits->io_opt = chunk_size * sc->stripes;
++
++      if (!check_shl_overflow(sc->chunk_size, SECTOR_SHIFT, &io_min) &&
++          !check_mul_overflow(io_min, sc->stripes, &io_opt)) {
++              limits->io_min = io_min;
++              limits->io_opt = io_opt;
++      }
+ }
+ 
+ static struct target_type stripe_target = {
diff --git a/queue-6.16/drm-amd-display-allow-rx6xxx-rx7700-to-invoke-amdgpu_irq_get-put.patch b/queue-6.16/drm-amd-display-allow-rx6xxx-rx7700-to-invoke-amdgpu_irq_get-put.patch

new file mode 100644 (file)

index 0000000..a27d3c2
--- /dev/null
+++ b/queue-6.16/drm-amd-display-allow-rx6xxx-rx7700-to-invoke-amdgpu_irq_get-put.patch
@@ -0,0 +1,92 @@
+From 29a2f430475357f760679b249f33e7282688e292 Mon Sep 17 00:00:00 2001
+From: Ivan Lipski <ivan.lipski@amd.com>
+Date: Tue, 2 Sep 2025 16:20:09 -0400
+Subject: drm/amd/display: Allow RX6xxx & RX7700 to invoke amdgpu_irq_get/put
+
+From: Ivan Lipski <ivan.lipski@amd.com>
+
+commit 29a2f430475357f760679b249f33e7282688e292 upstream.
+
+[Why&How]
+As reported on https://gitlab.freedesktop.org/drm/amd/-/issues/3936,
+SMU hang can occur if the interrupts are not enabled appropriately,
+causing a vblank timeout.
+
+This patch reverts commit 5009628d8509 ("drm/amd/display: Remove unnecessary
+amdgpu_irq_get/put"), but only for RX6xxx & RX7700 GPUs, on which the
+issue was observed.
+
+This will re-enable interrupts regardless of whether the user space needed
+it or not.
+
+Fixes: 5009628d8509 ("drm/amd/display: Remove unnecessary amdgpu_irq_get/put")
+Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3936
+Suggested-by: Sun peng Li <sunpeng.li@amd.com>
+Reviewed-by: Sun peng Li <sunpeng.li@amd.com>
+Signed-off-by: Ivan Lipski <ivan.lipski@amd.com>
+Signed-off-by: Ray Wu <ray.wu@amd.com>
+Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+(cherry picked from commit 95d168b367aa28a59f94fc690ff76ebf69312c6d)
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |   39 +++++++++++++++++++++-
+ 1 file changed, 38 insertions(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+@@ -8689,7 +8689,16 @@ static int amdgpu_dm_encoder_init(struct
+ static void manage_dm_interrupts(struct amdgpu_device *adev,
+                                struct amdgpu_crtc *acrtc,
+                                struct dm_crtc_state *acrtc_state)
+-{
++{     /*
++       * We cannot be sure that the frontend index maps to the same
++       * backend index - some even map to more than one.
++       * So we have to go through the CRTC to find the right IRQ.
++       */
++      int irq_type = amdgpu_display_crtc_idx_to_irq_type(
++                      adev,
++                      acrtc->crtc_id);
++      struct drm_device *dev = adev_to_drm(adev);
++
+       struct drm_vblank_crtc_config config = {0};
+       struct dc_crtc_timing *timing;
+       int offdelay;
+@@ -8742,7 +8751,35 @@ static void manage_dm_interrupts(struct
+ 
+               drm_crtc_vblank_on_config(&acrtc->base,
+                                         &config);
++              /* Allow RX6xxx, RX7700, RX7800 GPUs to call amdgpu_irq_get.*/
++              switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) {
++              case IP_VERSION(3, 0, 0):
++              case IP_VERSION(3, 0, 2):
++              case IP_VERSION(3, 0, 3):
++              case IP_VERSION(3, 2, 0):
++                      if (amdgpu_irq_get(adev, &adev->pageflip_irq, irq_type))
++                              drm_err(dev, "DM_IRQ: Cannot get pageflip irq!\n");
++#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
++                      if (amdgpu_irq_get(adev, &adev->vline0_irq, irq_type))
++                              drm_err(dev, "DM_IRQ: Cannot get vline0 irq!\n");
++#endif
++              }
++
+       } else {
++              /* Allow RX6xxx, RX7700, RX7800 GPUs to call amdgpu_irq_put.*/
++              switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) {
++              case IP_VERSION(3, 0, 0):
++              case IP_VERSION(3, 0, 2):
++              case IP_VERSION(3, 0, 3):
++              case IP_VERSION(3, 2, 0):
++#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
++                      if (amdgpu_irq_put(adev, &adev->vline0_irq, irq_type))
++                              drm_err(dev, "DM_IRQ: Cannot put vline0 irq!\n");
++#endif
++                      if (amdgpu_irq_put(adev, &adev->pageflip_irq, irq_type))
++                              drm_err(dev, "DM_IRQ: Cannot put pageflip irq!\n");
++              }
++
+               drm_crtc_vblank_off(&acrtc->base);
+       }
+ }
diff --git a/queue-6.16/drm-amd-only-restore-cached-manual-clock-settings-in-restore-if-od-enabled.patch b/queue-6.16/drm-amd-only-restore-cached-manual-clock-settings-in-restore-if-od-enabled.patch

new file mode 100644 (file)

index 0000000..2b2c776
--- /dev/null
+++ b/queue-6.16/drm-amd-only-restore-cached-manual-clock-settings-in-restore-if-od-enabled.patch
@@ -0,0 +1,42 @@
+From f9b80514a7227c589291792cb6743b0ddf41c2bc Mon Sep 17 00:00:00 2001
+From: Mario Limonciello <mario.limonciello@amd.com>
+Date: Mon, 15 Sep 2025 20:59:02 -0500
+Subject: drm/amd: Only restore cached manual clock settings in restore if OD enabled
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Mario Limonciello <mario.limonciello@amd.com>
+
+commit f9b80514a7227c589291792cb6743b0ddf41c2bc upstream.
+
+If OD is not enabled then restoring cached clock settings doesn't make
+sense and actually leads to errors in resume.
+
+Check if enabled before restoring settings.
+
+Fixes: 4e9526924d09 ("drm/amd: Restore cached manual clock settings during resume")
+Reported-by: Jérôme Lécuyer <jerome.4a4c@gmail.com>
+Closes: https://lore.kernel.org/amd-gfx/0ffe2692-7bfa-4821-856e-dd0f18e2c32b@amd.com/T/#me6db8ddb192626360c462b7570ed7eba0c6c9733
+Suggested-by: Jérôme Lécuyer <jerome.4a4c@gmail.com>
+Acked-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+(cherry picked from commit 1a4dd33cc6e1baaa81efdbe68227a19f51c50f20)
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+@@ -2185,7 +2185,7 @@ static int smu_resume(struct amdgpu_ip_b
+                       return ret;
+       }
+ 
+-      if (smu_dpm_ctx->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL) {
++      if (smu_dpm_ctx->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL && smu->od_enabled) {
+               ret = smu_od_edit_dpm_table(smu, PP_OD_COMMIT_DPM_TABLE, NULL, 0);
+               if (ret)
+                       return ret;
diff --git a/queue-6.16/drm-amdgpu-suspend-kfd-and-kgd-user-queues-for-s0ix.patch b/queue-6.16/drm-amdgpu-suspend-kfd-and-kgd-user-queues-for-s0ix.patch

new file mode 100644 (file)

index 0000000..9824914
--- /dev/null
+++ b/queue-6.16/drm-amdgpu-suspend-kfd-and-kgd-user-queues-for-s0ix.patch
@@ -0,0 +1,77 @@
+From 9272bb34b066993f5f468b219b4a26ba3f2b25a1 Mon Sep 17 00:00:00 2001
+From: Alex Deucher <alexander.deucher@amd.com>
+Date: Wed, 17 Sep 2025 12:42:11 -0400
+Subject: drm/amdgpu: suspend KFD and KGD user queues for S0ix
+
+From: Alex Deucher <alexander.deucher@amd.com>
+
+commit 9272bb34b066993f5f468b219b4a26ba3f2b25a1 upstream.
+
+We need to make sure the user queues are preempted so
+GFX can enter gfxoff.
+
+Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
+Tested-by: David Perry <david.perry@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+(cherry picked from commit f8b367e6fa1716cab7cc232b9e3dff29187fc99d)
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |   24 ++++++++++--------------
+ 1 file changed, 10 insertions(+), 14 deletions(-)
+
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+@@ -5055,7 +5055,7 @@ int amdgpu_device_suspend(struct drm_dev
+       adev->in_suspend = true;
+ 
+       if (amdgpu_sriov_vf(adev)) {
+-              if (!adev->in_s0ix && !adev->in_runpm)
++              if (!adev->in_runpm)
+                       amdgpu_amdkfd_suspend_process(adev);
+               amdgpu_virt_fini_data_exchange(adev);
+               r = amdgpu_virt_request_full_gpu(adev, false);
+@@ -5075,10 +5075,8 @@ int amdgpu_device_suspend(struct drm_dev
+ 
+       amdgpu_device_ip_suspend_phase1(adev);
+ 
+-      if (!adev->in_s0ix) {
+-              amdgpu_amdkfd_suspend(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm);
+-              amdgpu_userq_suspend(adev);
+-      }
++      amdgpu_amdkfd_suspend(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm);
++      amdgpu_userq_suspend(adev);
+ 
+       r = amdgpu_device_evict_resources(adev);
+       if (r)
+@@ -5141,15 +5139,13 @@ int amdgpu_device_resume(struct drm_devi
+               goto exit;
+       }
+ 
+-      if (!adev->in_s0ix) {
+-              r = amdgpu_amdkfd_resume(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm);
+-              if (r)
+-                      goto exit;
++      r = amdgpu_amdkfd_resume(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm);
++      if (r)
++              goto exit;
+ 
+-              r = amdgpu_userq_resume(adev);
+-              if (r)
+-                      goto exit;
+-      }
++      r = amdgpu_userq_resume(adev);
++      if (r)
++              goto exit;
+ 
+       r = amdgpu_device_ip_late_init(adev);
+       if (r)
+@@ -5162,7 +5158,7 @@ exit:
+               amdgpu_virt_init_data_exchange(adev);
+               amdgpu_virt_release_full_gpu(adev, true);
+ 
+-              if (!adev->in_s0ix && !r && !adev->in_runpm)
++              if (!r && !adev->in_runpm)
+                       r = amdgpu_amdkfd_resume_process(adev);
+       }
+ 
diff --git a/queue-6.16/drm-amdkfd-add-proper-handling-for-s0ix.patch b/queue-6.16/drm-amdkfd-add-proper-handling-for-s0ix.patch

new file mode 100644 (file)

index 0000000..a98e883
--- /dev/null
+++ b/queue-6.16/drm-amdkfd-add-proper-handling-for-s0ix.patch
@@ -0,0 +1,140 @@
+From 2ade36eaa9ac05e4913e9785df19c2cde8f912fb Mon Sep 17 00:00:00 2001
+From: Alex Deucher <alexander.deucher@amd.com>
+Date: Wed, 17 Sep 2025 12:42:09 -0400
+Subject: drm/amdkfd: add proper handling for S0ix
+
+From: Alex Deucher <alexander.deucher@amd.com>
+
+commit 2ade36eaa9ac05e4913e9785df19c2cde8f912fb upstream.
+
+When in S0i3, the GFX state is retained, so all we need to do
+is stop the runlist so GFX can enter gfxoff.
+
+Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
+Tested-by: David Perry <david.perry@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+(cherry picked from commit 4bfa8609934dbf39bbe6e75b4f971469384b50b1)
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c |   16 +++++++++---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h |   12 +++++++++
+ drivers/gpu/drm/amd/amdkfd/kfd_device.c    |   36 +++++++++++++++++++++++++++++
+ 3 files changed, 60 insertions(+), 4 deletions(-)
+
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+@@ -250,16 +250,24 @@ void amdgpu_amdkfd_interrupt(struct amdg
+ 
+ void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool suspend_proc)
+ {
+-      if (adev->kfd.dev)
+-              kgd2kfd_suspend(adev->kfd.dev, suspend_proc);
++      if (adev->kfd.dev) {
++              if (adev->in_s0ix)
++                      kgd2kfd_stop_sched_all_nodes(adev->kfd.dev);
++              else
++                      kgd2kfd_suspend(adev->kfd.dev, suspend_proc);
++      }
+ }
+ 
+ int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool resume_proc)
+ {
+       int r = 0;
+ 
+-      if (adev->kfd.dev)
+-              r = kgd2kfd_resume(adev->kfd.dev, resume_proc);
++      if (adev->kfd.dev) {
++              if (adev->in_s0ix)
++                      r = kgd2kfd_start_sched_all_nodes(adev->kfd.dev);
++              else
++                      r = kgd2kfd_resume(adev->kfd.dev, resume_proc);
++      }
+ 
+       return r;
+ }
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+@@ -426,7 +426,9 @@ void kgd2kfd_smi_event_throttle(struct k
+ int kgd2kfd_check_and_lock_kfd(void);
+ void kgd2kfd_unlock_kfd(void);
+ int kgd2kfd_start_sched(struct kfd_dev *kfd, uint32_t node_id);
++int kgd2kfd_start_sched_all_nodes(struct kfd_dev *kfd);
+ int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id);
++int kgd2kfd_stop_sched_all_nodes(struct kfd_dev *kfd);
+ bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id);
+ bool kgd2kfd_vmfault_fast_path(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry,
+                              bool retry_fault);
+@@ -516,10 +518,20 @@ static inline int kgd2kfd_start_sched(st
+       return 0;
+ }
+ 
++static inline int kgd2kfd_start_sched_all_nodes(struct kfd_dev *kfd)
++{
++      return 0;
++}
++
+ static inline int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id)
+ {
+       return 0;
+ }
++
++static inline int kgd2kfd_stop_sched_all_nodes(struct kfd_dev *kfd)
++{
++      return 0;
++}
+ 
+ static inline bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id)
+ {
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+@@ -1501,6 +1501,25 @@ int kgd2kfd_start_sched(struct kfd_dev *
+       return ret;
+ }
+ 
++int kgd2kfd_start_sched_all_nodes(struct kfd_dev *kfd)
++{
++      struct kfd_node *node;
++      int i, r;
++
++      if (!kfd->init_complete)
++              return 0;
++
++      for (i = 0; i < kfd->num_nodes; i++) {
++              node = kfd->nodes[i];
++              r = node->dqm->ops.unhalt(node->dqm);
++              if (r) {
++                      dev_err(kfd_device, "Error in starting scheduler\n");
++                      return r;
++              }
++      }
++      return 0;
++}
++
+ int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id)
+ {
+       struct kfd_node *node;
+@@ -1518,6 +1537,23 @@ int kgd2kfd_stop_sched(struct kfd_dev *k
+       return node->dqm->ops.halt(node->dqm);
+ }
+ 
++int kgd2kfd_stop_sched_all_nodes(struct kfd_dev *kfd)
++{
++      struct kfd_node *node;
++      int i, r;
++
++      if (!kfd->init_complete)
++              return 0;
++
++      for (i = 0; i < kfd->num_nodes; i++) {
++              node = kfd->nodes[i];
++              r = node->dqm->ops.halt(node->dqm);
++              if (r)
++                      return r;
++      }
++      return 0;
++}
++
+ bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id)
+ {
+       struct kfd_node *node;
diff --git a/queue-6.16/gpiolib-acpi-initialize-acpi_gpio_info-struct.patch b/queue-6.16/gpiolib-acpi-initialize-acpi_gpio_info-struct.patch

new file mode 100644 (file)

index 0000000..2432bb7
--- /dev/null
+++ b/queue-6.16/gpiolib-acpi-initialize-acpi_gpio_info-struct.patch
@@ -0,0 +1,57 @@
+From 19c839a98c731169f06d32e7c9e00c78a0086ebe Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?S=C3=A9bastien=20Szymanski?=
+ <sebastien.szymanski@armadeus.com>
+Date: Fri, 12 Sep 2025 22:18:50 +0200
+Subject: gpiolib: acpi: initialize acpi_gpio_info struct
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Sébastien Szymanski <sebastien.szymanski@armadeus.com>
+
+commit 19c839a98c731169f06d32e7c9e00c78a0086ebe upstream.
+
+Since commit 7c010d463372 ("gpiolib: acpi: Make sure we fill struct
+acpi_gpio_info"), uninitialized acpi_gpio_info struct are passed to
+__acpi_find_gpio() and later in the call stack info->quirks is used in
+acpi_populate_gpio_lookup. This breaks the i2c_hid_cpi driver:
+
+[   58.122916] i2c_hid_acpi i2c-UNIW0001:00: HID over i2c has not been provided an Int IRQ
+[   58.123097] i2c_hid_acpi i2c-UNIW0001:00: probe with driver i2c_hid_acpi failed with error -22
+
+Fix this by initializing the acpi_gpio_info pass to __acpi_find_gpio()
+
+Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220388
+Fixes: 7c010d463372 ("gpiolib: acpi: Make sure we fill struct acpi_gpio_info")
+Signed-off-by: Sébastien Szymanski <sebastien.szymanski@armadeus.com>
+Tested-by: Hans de Goede <hansg@kernel.org>
+Reviewed-by: Hans de Goede <hansg@kernel.org>
+Acked-by: Mika Westerberg <mika.westerberg@linux.intel.com>
+Tested-By: Calvin Owens <calvin@wbinvd.org>
+Cc: stable@vger.kernel.org
+Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpio/gpiolib-acpi-core.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/gpio/gpiolib-acpi-core.c
++++ b/drivers/gpio/gpiolib-acpi-core.c
+@@ -942,7 +942,7 @@ struct gpio_desc *acpi_find_gpio(struct
+ {
+       struct acpi_device *adev = to_acpi_device_node(fwnode);
+       bool can_fallback = acpi_can_fallback_to_crs(adev, con_id);
+-      struct acpi_gpio_info info;
++      struct acpi_gpio_info info = {};
+       struct gpio_desc *desc;
+ 
+       desc = __acpi_find_gpio(fwnode, con_id, idx, can_fallback, &info);
+@@ -992,7 +992,7 @@ int acpi_dev_gpio_irq_wake_get_by(struct
+       int ret;
+ 
+       for (i = 0, idx = 0; idx <= index; i++) {
+-              struct acpi_gpio_info info;
++              struct acpi_gpio_info info = {};
+               struct gpio_desc *desc;
+ 
+               /* Ignore -EPROBE_DEFER, it only matters if idx matches */
diff --git a/queue-6.16/gup-optimize-longterm-pin_user_pages-for-large-folio.patch b/queue-6.16/gup-optimize-longterm-pin_user_pages-for-large-folio.patch

new file mode 100644 (file)

index 0000000..f99ff33
--- /dev/null
+++ b/queue-6.16/gup-optimize-longterm-pin_user_pages-for-large-folio.patch
@@ -0,0 +1,132 @@
+From a03db236aebfaeadf79396dbd570896b870bda01 Mon Sep 17 00:00:00 2001
+From: Li Zhe <lizhe.67@bytedance.com>
+Date: Fri, 6 Jun 2025 10:37:42 +0800
+Subject: gup: optimize longterm pin_user_pages() for large folio
+
+From: Li Zhe <lizhe.67@bytedance.com>
+
+commit a03db236aebfaeadf79396dbd570896b870bda01 upstream.
+
+In the current implementation of longterm pin_user_pages(), we invoke
+collect_longterm_unpinnable_folios().  This function iterates through the
+list to check whether each folio belongs to the "longterm_unpinnabled"
+category.  The folios in this list essentially correspond to a contiguous
+region of userspace addresses, with each folio representing a physical
+address in increments of PAGESIZE.
+
+If this userspace address range is mapped with large folio, we can
+optimize the performance of function collect_longterm_unpinnable_folios()
+by reducing the using of READ_ONCE() invoked in
+pofs_get_folio()->page_folio()->_compound_head().
+
+Also, we can simplify the logic of collect_longterm_unpinnable_folios().
+Instead of comparing with prev_folio after calling pofs_get_folio(), we
+can check whether the next page is within the same folio.
+
+The performance test results, based on v6.15, obtained through the
+gup_test tool from the kernel source tree are as follows.  We achieve an
+improvement of over 66% for large folio with pagesize=2M.  For small
+folio, we have only observed a very slight degradation in performance.
+
+Without this patch:
+
+    [root@localhost ~] ./gup_test -HL -m 8192 -n 512
+    TAP version 13
+    1..1
+    # PIN_LONGTERM_BENCHMARK: Time: get:14391 put:10858 us#
+    ok 1 ioctl status 0
+    # Totals: pass:1 fail:0 xfail:0 xpass:0 skip:0 error:0
+    [root@localhost ~]# ./gup_test -LT -m 8192 -n 512
+    TAP version 13
+    1..1
+    # PIN_LONGTERM_BENCHMARK: Time: get:130538 put:31676 us#
+    ok 1 ioctl status 0
+    # Totals: pass:1 fail:0 xfail:0 xpass:0 skip:0 error:0
+
+With this patch:
+
+    [root@localhost ~] ./gup_test -HL -m 8192 -n 512
+    TAP version 13
+    1..1
+    # PIN_LONGTERM_BENCHMARK: Time: get:4867 put:10516 us#
+    ok 1 ioctl status 0
+    # Totals: pass:1 fail:0 xfail:0 xpass:0 skip:0 error:0
+    [root@localhost ~]# ./gup_test -LT -m 8192 -n 512
+    TAP version 13
+    1..1
+    # PIN_LONGTERM_BENCHMARK: Time: get:131798 put:31328 us#
+    ok 1 ioctl status 0
+    # Totals: pass:1 fail:0 xfail:0 xpass:0 skip:0 error:0
+
+[lizhe.67@bytedance.com: whitespace fix, per David]
+  Link: https://lkml.kernel.org/r/20250606091917.91384-1-lizhe.67@bytedance.com
+Link: https://lkml.kernel.org/r/20250606023742.58344-1-lizhe.67@bytedance.com
+Signed-off-by: Li Zhe <lizhe.67@bytedance.com>
+Cc: David Hildenbrand <david@redhat.com>
+Cc: Dev Jain <dev.jain@arm.com>
+Cc: Jason Gunthorpe <jgg@ziepe.ca>
+Cc: John Hubbard <jhubbard@nvidia.com>
+Cc: Muchun Song <muchun.song@linux.dev>
+Cc: Peter Xu <peterx@redhat.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/gup.c |   38 ++++++++++++++++++++++++++++++--------
+ 1 file changed, 30 insertions(+), 8 deletions(-)
+
+--- a/mm/gup.c
++++ b/mm/gup.c
+@@ -2300,6 +2300,31 @@ static void pofs_unpin(struct pages_or_f
+               unpin_user_pages(pofs->pages, pofs->nr_entries);
+ }
+ 
++static struct folio *pofs_next_folio(struct folio *folio,
++              struct pages_or_folios *pofs, long *index_ptr)
++{
++      long i = *index_ptr + 1;
++
++      if (!pofs->has_folios && folio_test_large(folio)) {
++              const unsigned long start_pfn = folio_pfn(folio);
++              const unsigned long end_pfn = start_pfn + folio_nr_pages(folio);
++
++              for (; i < pofs->nr_entries; i++) {
++                      unsigned long pfn = page_to_pfn(pofs->pages[i]);
++
++                      /* Is this page part of this folio? */
++                      if (pfn < start_pfn || pfn >= end_pfn)
++                              break;
++              }
++      }
++
++      if (unlikely(i == pofs->nr_entries))
++              return NULL;
++      *index_ptr = i;
++
++      return pofs_get_folio(pofs, i);
++}
++
+ /*
+  * Returns the number of collected folios. Return value is always >= 0.
+  */
+@@ -2307,16 +2332,13 @@ static unsigned long collect_longterm_un
+               struct list_head *movable_folio_list,
+               struct pages_or_folios *pofs)
+ {
+-      unsigned long i, collected = 0;
+-      struct folio *prev_folio = NULL;
++      unsigned long collected = 0;
+       bool drain_allow = true;
++      struct folio *folio;
++      long i = 0;
+ 
+-      for (i = 0; i < pofs->nr_entries; i++) {
+-              struct folio *folio = pofs_get_folio(pofs, i);
+-
+-              if (folio == prev_folio)
+-                      continue;
+-              prev_folio = folio;
++      for (folio = pofs_get_folio(pofs, i); folio;
++           folio = pofs_next_folio(folio, pofs, &i)) {
+ 
+               if (folio_is_longterm_pinnable(folio))
+                       continue;
diff --git a/queue-6.16/io_uring-include-dying-ring-in-task_work-should-cancel-state.patch b/queue-6.16/io_uring-include-dying-ring-in-task_work-should-cancel-state.patch

new file mode 100644 (file)

index 0000000..4db6bfd
--- /dev/null
+++ b/queue-6.16/io_uring-include-dying-ring-in-task_work-should-cancel-state.patch
@@ -0,0 +1,94 @@
+From 3539b1467e94336d5854ebf976d9627bfb65d6c3 Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Thu, 18 Sep 2025 10:21:14 -0600
+Subject: io_uring: include dying ring in task_work "should cancel" state
+
+From: Jens Axboe <axboe@kernel.dk>
+
+commit 3539b1467e94336d5854ebf976d9627bfb65d6c3 upstream.
+
+When running task_work for an exiting task, rather than perform the
+issue retry attempt, the task_work is canceled. However, this isn't
+done for a ring that has been closed. This can lead to requests being
+successfully completed post the ring being closed, which is somewhat
+confusing and surprising to an application.
+
+Rather than just check the task exit state, also include the ring
+ref state in deciding whether or not to terminate a given request when
+run from task_work.
+
+Cc: stable@vger.kernel.org # 6.1+
+Link: https://github.com/axboe/liburing/discussions/1459
+Reported-by: Benedek Thaler <thaler@thaler.hu>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/io_uring.c  |    6 ++++--
+ io_uring/io_uring.h  |    4 ++--
+ io_uring/poll.c      |    2 +-
+ io_uring/timeout.c   |    2 +-
+ io_uring/uring_cmd.c |    2 +-
+ 5 files changed, 9 insertions(+), 7 deletions(-)
+
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -1371,8 +1371,10 @@ static void io_req_task_cancel(struct io
+ 
+ void io_req_task_submit(struct io_kiocb *req, io_tw_token_t tw)
+ {
+-      io_tw_lock(req->ctx, tw);
+-      if (unlikely(io_should_terminate_tw()))
++      struct io_ring_ctx *ctx = req->ctx;
++
++      io_tw_lock(ctx, tw);
++      if (unlikely(io_should_terminate_tw(ctx)))
+               io_req_defer_failed(req, -EFAULT);
+       else if (req->flags & REQ_F_FORCE_ASYNC)
+               io_queue_iowq(req);
+--- a/io_uring/io_uring.h
++++ b/io_uring/io_uring.h
+@@ -470,9 +470,9 @@ static inline bool io_allowed_run_tw(str
+  * 2) PF_KTHREAD is set, in which case the invoker of the task_work is
+  *    our fallback task_work.
+  */
+-static inline bool io_should_terminate_tw(void)
++static inline bool io_should_terminate_tw(struct io_ring_ctx *ctx)
+ {
+-      return current->flags & (PF_KTHREAD | PF_EXITING);
++      return (current->flags & (PF_KTHREAD | PF_EXITING)) || percpu_ref_is_dying(&ctx->refs);
+ }
+ 
+ static inline void io_req_queue_tw_complete(struct io_kiocb *req, s32 res)
+--- a/io_uring/poll.c
++++ b/io_uring/poll.c
+@@ -224,7 +224,7 @@ static int io_poll_check_events(struct i
+ {
+       int v;
+ 
+-      if (unlikely(io_should_terminate_tw()))
++      if (unlikely(io_should_terminate_tw(req->ctx)))
+               return -ECANCELED;
+ 
+       do {
+--- a/io_uring/timeout.c
++++ b/io_uring/timeout.c
+@@ -324,7 +324,7 @@ static void io_req_task_link_timeout(str
+       int ret;
+ 
+       if (prev) {
+-              if (!io_should_terminate_tw()) {
++              if (!io_should_terminate_tw(req->ctx)) {
+                       struct io_cancel_data cd = {
+                               .ctx            = req->ctx,
+                               .data           = prev->cqe.user_data,
+--- a/io_uring/uring_cmd.c
++++ b/io_uring/uring_cmd.c
+@@ -123,7 +123,7 @@ static void io_uring_cmd_work(struct io_
+       struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
+       unsigned int flags = IO_URING_F_COMPLETE_DEFER;
+ 
+-      if (io_should_terminate_tw())
++      if (io_should_terminate_tw(req->ctx))
+               flags |= IO_URING_F_TASK_DEAD;
+ 
+       /* task_work executor checks the deffered list completion */
diff --git a/queue-6.16/io_uring-io-wq-fix-max_workers-breakage-and-nr_workers-underflow.patch b/queue-6.16/io_uring-io-wq-fix-max_workers-breakage-and-nr_workers-underflow.patch

new file mode 100644 (file)

index 0000000..6adbdf2
--- /dev/null
+++ b/queue-6.16/io_uring-io-wq-fix-max_workers-breakage-and-nr_workers-underflow.patch
@@ -0,0 +1,63 @@
+From cd4ea81be3eb94047ad023c631afd9bd6c295400 Mon Sep 17 00:00:00 2001
+From: Max Kellermann <max.kellermann@ionos.com>
+Date: Fri, 12 Sep 2025 02:06:09 +0200
+Subject: io_uring/io-wq: fix `max_workers` breakage and `nr_workers` underflow
+
+From: Max Kellermann <max.kellermann@ionos.com>
+
+commit cd4ea81be3eb94047ad023c631afd9bd6c295400 upstream.
+
+Commit 88e6c42e40de ("io_uring/io-wq: add check free worker before
+create new worker") reused the variable `do_create` for something
+else, abusing it for the free worker check.
+
+This caused the value to effectively always be `true` at the time
+`nr_workers < max_workers` was checked, but it should really be
+`false`.  This means the `max_workers` setting was ignored, and worse:
+if the limit had already been reached, incrementing `nr_workers` was
+skipped even though another worker would be created.
+
+When later lots of workers exit, the `nr_workers` field could easily
+underflow, making the problem worse because more and more workers
+would be created without incrementing `nr_workers`.
+
+The simple solution is to use a different variable for the free worker
+check instead of using one variable for two different things.
+
+Cc: stable@vger.kernel.org
+Fixes: 88e6c42e40de ("io_uring/io-wq: add check free worker before create new worker")
+Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
+Reviewed-by: Fengnan Chang <changfengnan@bytedance.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/io-wq.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c
+index 17dfaa0395c4..1d03b2fc4b25 100644
+--- a/io_uring/io-wq.c
++++ b/io_uring/io-wq.c
+@@ -352,16 +352,16 @@ static void create_worker_cb(struct callback_head *cb)
+       struct io_wq *wq;
+ 
+       struct io_wq_acct *acct;
+-      bool do_create = false;
++      bool activated_free_worker, do_create = false;
+ 
+       worker = container_of(cb, struct io_worker, create_work);
+       wq = worker->wq;
+       acct = worker->acct;
+ 
+       rcu_read_lock();
+-      do_create = !io_acct_activate_free_worker(acct);
++      activated_free_worker = io_acct_activate_free_worker(acct);
+       rcu_read_unlock();
+-      if (!do_create)
++      if (activated_free_worker)
+               goto no_need_create;
+ 
+       raw_spin_lock(&acct->workers_lock);
+-- 
+2.51.0
+
diff --git a/queue-6.16/iommu-amd-fix-ivrs_base-memleak-in-early_amd_iommu_init.patch b/queue-6.16/iommu-amd-fix-ivrs_base-memleak-in-early_amd_iommu_init.patch

new file mode 100644 (file)

index 0000000..e7ce805
--- /dev/null
+++ b/queue-6.16/iommu-amd-fix-ivrs_base-memleak-in-early_amd_iommu_init.patch
@@ -0,0 +1,35 @@
+From 923b70581cb6acede90f8aaf4afe5d1c58c67b71 Mon Sep 17 00:00:00 2001
+From: Zhen Ni <zhen.ni@easystack.cn>
+Date: Fri, 22 Aug 2025 10:49:15 +0800
+Subject: iommu/amd: Fix ivrs_base memleak in early_amd_iommu_init()
+
+From: Zhen Ni <zhen.ni@easystack.cn>
+
+commit 923b70581cb6acede90f8aaf4afe5d1c58c67b71 upstream.
+
+Fix a permanent ACPI table memory leak in early_amd_iommu_init() when
+CMPXCHG16B feature is not supported
+
+Fixes: 82582f85ed22 ("iommu/amd: Disable AMD IOMMU if CMPXCHG16B feature is not supported")
+Cc: stable@vger.kernel.org
+Signed-off-by: Zhen Ni <zhen.ni@easystack.cn>
+Reviewed-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
+Link: https://lore.kernel.org/r/20250822024915.673427-1-zhen.ni@easystack.cn
+Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/iommu/amd/init.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/iommu/amd/init.c
++++ b/drivers/iommu/amd/init.c
+@@ -3048,7 +3048,8 @@ static int __init early_amd_iommu_init(v
+ 
+       if (!boot_cpu_has(X86_FEATURE_CX16)) {
+               pr_err("Failed to initialize. The CMPXCHG16B feature is required.\n");
+-              return -EINVAL;
++              ret = -EINVAL;
++              goto out;
+       }
+ 
+       /*
diff --git a/queue-6.16/iommu-amd-pgtbl-fix-possible-race-while-increase-page-table-level.patch b/queue-6.16/iommu-amd-pgtbl-fix-possible-race-while-increase-page-table-level.patch

new file mode 100644 (file)

index 0000000..84e66ae
--- /dev/null
+++ b/queue-6.16/iommu-amd-pgtbl-fix-possible-race-while-increase-page-table-level.patch
@@ -0,0 +1,139 @@
+From 1e56310b40fd2e7e0b9493da9ff488af145bdd0c Mon Sep 17 00:00:00 2001
+From: Vasant Hegde <vasant.hegde@amd.com>
+Date: Sat, 13 Sep 2025 06:26:57 +0000
+Subject: iommu/amd/pgtbl: Fix possible race while increase page table level
+
+From: Vasant Hegde <vasant.hegde@amd.com>
+
+commit 1e56310b40fd2e7e0b9493da9ff488af145bdd0c upstream.
+
+The AMD IOMMU host page table implementation supports dynamic page table levels
+(up to 6 levels), starting with a 3-level configuration that expands based on
+IOVA address. The kernel maintains a root pointer and current page table level
+to enable proper page table walks in alloc_pte()/fetch_pte() operations.
+
+The IOMMU IOVA allocator initially starts with 32-bit address and onces its
+exhuasted it switches to 64-bit address (max address is determined based
+on IOMMU and device DMA capability). To support larger IOVA, AMD IOMMU
+driver increases page table level.
+
+But in unmap path (iommu_v1_unmap_pages()), fetch_pte() reads
+pgtable->[root/mode] without lock. So its possible that in exteme corner case,
+when increase_address_space() is updating pgtable->[root/mode], fetch_pte()
+reads wrong page table level (pgtable->mode). It does compare the value with
+level encoded in page table and returns NULL. This will result is
+iommu_unmap ops to fail and upper layer may retry/log WARN_ON.
+
+CPU 0                                         CPU 1
+------                                       ------
+map pages                                    unmap pages
+alloc_pte() -> increase_address_space()      iommu_v1_unmap_pages() -> fetch_pte()
+  pgtable->root = pte (new root value)
+                                             READ pgtable->[mode/root]
+                                              Reads new root, old mode
+  Updates mode (pgtable->mode += 1)
+
+Since Page table level updates are infrequent and already synchronized with a
+spinlock, implement seqcount to enable lock-free read operations on the read path.
+
+Fixes: 754265bcab7 ("iommu/amd: Fix race in increase_address_space()")
+Reported-by: Alejandro Jimenez <alejandro.j.jimenez@oracle.com>
+Cc: stable@vger.kernel.org
+Cc: Joao Martins <joao.m.martins@oracle.com>
+Cc: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
+Signed-off-by: Vasant Hegde <vasant.hegde@amd.com>
+Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/iommu/amd/amd_iommu_types.h |    1 +
+ drivers/iommu/amd/io_pgtable.c      |   25 +++++++++++++++++++++----
+ 2 files changed, 22 insertions(+), 4 deletions(-)
+
+--- a/drivers/iommu/amd/amd_iommu_types.h
++++ b/drivers/iommu/amd/amd_iommu_types.h
+@@ -551,6 +551,7 @@ struct gcr3_tbl_info {
+ };
+ 
+ struct amd_io_pgtable {
++      seqcount_t              seqcount;       /* Protects root/mode update */
+       struct io_pgtable       pgtbl;
+       int                     mode;
+       u64                     *root;
+--- a/drivers/iommu/amd/io_pgtable.c
++++ b/drivers/iommu/amd/io_pgtable.c
+@@ -17,6 +17,7 @@
+ #include <linux/slab.h>
+ #include <linux/types.h>
+ #include <linux/dma-mapping.h>
++#include <linux/seqlock.h>
+ 
+ #include <asm/barrier.h>
+ 
+@@ -130,8 +131,11 @@ static bool increase_address_space(struc
+ 
+       *pte = PM_LEVEL_PDE(pgtable->mode, iommu_virt_to_phys(pgtable->root));
+ 
++      write_seqcount_begin(&pgtable->seqcount);
+       pgtable->root  = pte;
+       pgtable->mode += 1;
++      write_seqcount_end(&pgtable->seqcount);
++
+       amd_iommu_update_and_flush_device_table(domain);
+ 
+       pte = NULL;
+@@ -153,6 +157,7 @@ static u64 *alloc_pte(struct amd_io_pgta
+ {
+       unsigned long last_addr = address + (page_size - 1);
+       struct io_pgtable_cfg *cfg = &pgtable->pgtbl.cfg;
++      unsigned int seqcount;
+       int level, end_lvl;
+       u64 *pte, *page;
+ 
+@@ -170,8 +175,14 @@ static u64 *alloc_pte(struct amd_io_pgta
+       }
+ 
+ 
+-      level   = pgtable->mode - 1;
+-      pte     = &pgtable->root[PM_LEVEL_INDEX(level, address)];
++      do {
++              seqcount = read_seqcount_begin(&pgtable->seqcount);
++
++              level   = pgtable->mode - 1;
++              pte     = &pgtable->root[PM_LEVEL_INDEX(level, address)];
++      } while (read_seqcount_retry(&pgtable->seqcount, seqcount));
++
++
+       address = PAGE_SIZE_ALIGN(address, page_size);
+       end_lvl = PAGE_SIZE_LEVEL(page_size);
+ 
+@@ -249,6 +260,7 @@ static u64 *fetch_pte(struct amd_io_pgta
+                     unsigned long *page_size)
+ {
+       int level;
++      unsigned int seqcount;
+       u64 *pte;
+ 
+       *page_size = 0;
+@@ -256,8 +268,12 @@ static u64 *fetch_pte(struct amd_io_pgta
+       if (address > PM_LEVEL_SIZE(pgtable->mode))
+               return NULL;
+ 
+-      level      =  pgtable->mode - 1;
+-      pte        = &pgtable->root[PM_LEVEL_INDEX(level, address)];
++      do {
++              seqcount = read_seqcount_begin(&pgtable->seqcount);
++              level      =  pgtable->mode - 1;
++              pte        = &pgtable->root[PM_LEVEL_INDEX(level, address)];
++      } while (read_seqcount_retry(&pgtable->seqcount, seqcount));
++
+       *page_size =  PTE_LEVEL_PAGE_SIZE(level);
+ 
+       while (level > 0) {
+@@ -541,6 +557,7 @@ static struct io_pgtable *v1_alloc_pgtab
+       if (!pgtable->root)
+               return NULL;
+       pgtable->mode = PAGE_MODE_3_LEVEL;
++      seqcount_init(&pgtable->seqcount);
+ 
+       cfg->pgsize_bitmap  = amd_iommu_pgsize_bitmap;
+       cfg->ias            = IOMMU_IN_ADDR_BIT_SIZE;
diff --git a/queue-6.16/iommu-s390-fix-memory-corruption-when-using-identity-domain.patch b/queue-6.16/iommu-s390-fix-memory-corruption-when-using-identity-domain.patch

new file mode 100644 (file)

index 0000000..8e1cf4d
--- /dev/null
+++ b/queue-6.16/iommu-s390-fix-memory-corruption-when-using-identity-domain.patch
@@ -0,0 +1,47 @@
+From b3506e9bcc777ed6af2ab631c86a9990ed97b474 Mon Sep 17 00:00:00 2001
+From: Matthew Rosato <mjrosato@linux.ibm.com>
+Date: Wed, 27 Aug 2025 17:08:27 -0400
+Subject: iommu/s390: Fix memory corruption when using identity domain
+
+From: Matthew Rosato <mjrosato@linux.ibm.com>
+
+commit b3506e9bcc777ed6af2ab631c86a9990ed97b474 upstream.
+
+zpci_get_iommu_ctrs() returns counter information to be reported as part
+of device statistics; these counters are stored as part of the s390_domain.
+The problem, however, is that the identity domain is not backed by an
+s390_domain and so the conversion via to_s390_domain() yields a bad address
+that is zero'd initially and read on-demand later via a sysfs read.
+These counters aren't necessary for the identity domain; just return NULL
+in this case.
+
+This issue was discovered via KASAN with reports that look like:
+BUG: KASAN: global-out-of-bounds in zpci_fmb_enable_device
+when using the identity domain for a device on s390.
+
+Cc: stable@vger.kernel.org
+Fixes: 64af12c6ec3a ("iommu/s390: implement iommu passthrough via identity domain")
+Reported-by: Cam Miller <cam@linux.ibm.com>
+Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
+Tested-by: Cam Miller <cam@linux.ibm.com>
+Reviewed-by: Farhan Ali <alifm@linux.ibm.com>
+Reviewed-by: Niklas Schnelle <schnelle@linux.ibm.com>
+Link: https://lore.kernel.org/r/20250827210828.274527-1-mjrosato@linux.ibm.com
+Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/iommu/s390-iommu.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/iommu/s390-iommu.c
++++ b/drivers/iommu/s390-iommu.c
+@@ -1031,7 +1031,8 @@ struct zpci_iommu_ctrs *zpci_get_iommu_c
+ 
+       lockdep_assert_held(&zdev->dom_lock);
+ 
+-      if (zdev->s390_domain->type == IOMMU_DOMAIN_BLOCKED)
++      if (zdev->s390_domain->type == IOMMU_DOMAIN_BLOCKED ||
++          zdev->s390_domain->type == IOMMU_DOMAIN_IDENTITY)
+               return NULL;
+ 
+       s390_domain = to_s390_domain(zdev->s390_domain);
diff --git a/queue-6.16/iommu-s390-make-attach-succeed-when-the-device-was-surprise-removed.patch b/queue-6.16/iommu-s390-make-attach-succeed-when-the-device-was-surprise-removed.patch

new file mode 100644 (file)

index 0000000..e2f000d
--- /dev/null
+++ b/queue-6.16/iommu-s390-make-attach-succeed-when-the-device-was-surprise-removed.patch
@@ -0,0 +1,120 @@
+From 9ffaf5229055fcfbb3b3d6f1c7e58d63715c3f73 Mon Sep 17 00:00:00 2001
+From: Niklas Schnelle <schnelle@linux.ibm.com>
+Date: Thu, 4 Sep 2025 10:59:49 +0200
+Subject: iommu/s390: Make attach succeed when the device was surprise removed
+
+From: Niklas Schnelle <schnelle@linux.ibm.com>
+
+commit 9ffaf5229055fcfbb3b3d6f1c7e58d63715c3f73 upstream.
+
+When a PCI device is removed with surprise hotplug, there may still be
+attempts to attach the device to the default domain as part of tear down
+via (__iommu_release_dma_ownership()), or because the removal happens
+during probe (__iommu_probe_device()). In both cases zpci_register_ioat()
+fails with a cc value indicating that the device handle is invalid. This
+is because the device is no longer part of the instance as far as the
+hypervisor is concerned.
+
+Currently this leads to an error return and s390_iommu_attach_device()
+fails. This triggers the WARN_ON() in __iommu_group_set_domain_nofail()
+because attaching to the default domain must never fail.
+
+With the device fenced by the hypervisor no DMAs to or from memory are
+possible and the IOMMU translations have no effect. Proceed as if the
+registration was successful and let the hotplug event handling clean up
+the device.
+
+This is similar to how devices in the error state are handled since
+commit 59bbf596791b ("iommu/s390: Make attach succeed even if the device
+is in error state") except that for removal the domain will not be
+registered later. This approach was also previously discussed at the
+link.
+
+Handle both cases, error state and removal, in a helper which checks if
+the error needs to be propagated or ignored. Avoid magic number
+condition codes by using the pre-existing, but never used, defines for
+PCI load/store condition codes and rename them to reflect that they
+apply to all PCI instructions.
+
+Cc: stable@vger.kernel.org # v6.2
+Link: https://lore.kernel.org/linux-iommu/20240808194155.GD1985367@ziepe.ca/
+Suggested-by: Jason Gunthorpe <jgg@ziepe.ca>
+Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
+Reviewed-by: Matthew Rosato <mjrosato@linux.ibm.com>
+Reviewed-by: Benjamin Block <bblock@linux.ibm.com>
+Link: https://lore.kernel.org/r/20250904-iommu_succeed_attach_removed-v1-1-e7f333d2f80f@linux.ibm.com
+Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/include/asm/pci_insn.h |   10 +++++-----
+ drivers/iommu/s390-iommu.c       |   26 +++++++++++++++++++-------
+ 2 files changed, 24 insertions(+), 12 deletions(-)
+
+--- a/arch/s390/include/asm/pci_insn.h
++++ b/arch/s390/include/asm/pci_insn.h
+@@ -16,11 +16,11 @@
+ #define ZPCI_PCI_ST_FUNC_NOT_AVAIL            40
+ #define ZPCI_PCI_ST_ALREADY_IN_RQ_STATE               44
+ 
+-/* Load/Store return codes */
+-#define ZPCI_PCI_LS_OK                                0
+-#define ZPCI_PCI_LS_ERR                               1
+-#define ZPCI_PCI_LS_BUSY                      2
+-#define ZPCI_PCI_LS_INVAL_HANDLE              3
++/* PCI instruction condition codes */
++#define ZPCI_CC_OK                            0
++#define ZPCI_CC_ERR                           1
++#define ZPCI_CC_BUSY                          2
++#define ZPCI_CC_INVAL_HANDLE                  3
+ 
+ /* Load/Store address space identifiers */
+ #define ZPCI_PCIAS_MEMIO_0                    0
+--- a/drivers/iommu/s390-iommu.c
++++ b/drivers/iommu/s390-iommu.c
+@@ -611,6 +611,23 @@ static u64 get_iota_region_flag(struct s
+       }
+ }
+ 
++static bool reg_ioat_propagate_error(int cc, u8 status)
++{
++      /*
++       * If the device is in the error state the reset routine
++       * will register the IOAT of the newly set domain on re-enable
++       */
++      if (cc == ZPCI_CC_ERR && status == ZPCI_PCI_ST_FUNC_NOT_AVAIL)
++              return false;
++      /*
++       * If the device was removed treat registration as success
++       * and let the subsequent error event trigger tear down.
++       */
++      if (cc == ZPCI_CC_INVAL_HANDLE)
++              return false;
++      return cc != ZPCI_CC_OK;
++}
++
+ static int s390_iommu_domain_reg_ioat(struct zpci_dev *zdev,
+                                     struct iommu_domain *domain, u8 *status)
+ {
+@@ -695,7 +712,7 @@ static int s390_iommu_attach_device(stru
+ 
+       /* If we fail now DMA remains blocked via blocking domain */
+       cc = s390_iommu_domain_reg_ioat(zdev, domain, &status);
+-      if (cc && status != ZPCI_PCI_ST_FUNC_NOT_AVAIL)
++      if (reg_ioat_propagate_error(cc, status))
+               return -EIO;
+       zdev->dma_table = s390_domain->dma_table;
+       zdev_s390_domain_update(zdev, domain);
+@@ -1123,12 +1140,7 @@ static int s390_attach_dev_identity(stru
+ 
+       /* If we fail now DMA remains blocked via blocking domain */
+       cc = s390_iommu_domain_reg_ioat(zdev, domain, &status);
+-
+-      /*
+-       * If the device is undergoing error recovery the reset code
+-       * will re-establish the new domain.
+-       */
+-      if (cc && status != ZPCI_PCI_ST_FUNC_NOT_AVAIL)
++      if (reg_ioat_propagate_error(cc, status))
+               return -EIO;
+ 
+       zdev_s390_domain_update(zdev, domain);
diff --git a/queue-6.16/iommu-vt-d-fix-__domain_mapping-s-usage-of-switch_to_super_page.patch b/queue-6.16/iommu-vt-d-fix-__domain_mapping-s-usage-of-switch_to_super_page.patch

new file mode 100644 (file)

index 0000000..197bacd
--- /dev/null
+++ b/queue-6.16/iommu-vt-d-fix-__domain_mapping-s-usage-of-switch_to_super_page.patch
@@ -0,0 +1,67 @@
+From dce043c07ca1ac19cfbe2844a6dc71e35c322353 Mon Sep 17 00:00:00 2001
+From: Eugene Koira <eugkoira@amazon.com>
+Date: Wed, 3 Sep 2025 13:53:29 +0800
+Subject: iommu/vt-d: Fix __domain_mapping()'s usage of switch_to_super_page()
+
+From: Eugene Koira <eugkoira@amazon.com>
+
+commit dce043c07ca1ac19cfbe2844a6dc71e35c322353 upstream.
+
+switch_to_super_page() assumes the memory range it's working on is aligned
+to the target large page level. Unfortunately, __domain_mapping() doesn't
+take this into account when using it, and will pass unaligned ranges
+ultimately freeing a PTE range larger than expected.
+
+Take for example a mapping with the following iov_pfn range [0x3fe400,
+0x4c0600), which should be backed by the following mappings:
+
+   iov_pfn [0x3fe400, 0x3fffff] covered by 2MiB pages
+   iov_pfn [0x400000, 0x4bffff] covered by 1GiB pages
+   iov_pfn [0x4c0000, 0x4c05ff] covered by 2MiB pages
+
+Under this circumstance, __domain_mapping() will pass [0x400000, 0x4c05ff]
+to switch_to_super_page() at a 1 GiB granularity, which will in turn
+free PTEs all the way to iov_pfn 0x4fffff.
+
+Mitigate this by rounding down the iov_pfn range passed to
+switch_to_super_page() in __domain_mapping()
+to the target large page level.
+
+Additionally add range alignment checks to switch_to_super_page.
+
+Fixes: 9906b9352a35 ("iommu/vt-d: Avoid duplicate removing in __domain_mapping()")
+Signed-off-by: Eugene Koira <eugkoira@amazon.com>
+Cc: stable@vger.kernel.org
+Reviewed-by: Nicolas Saenz Julienne <nsaenz@amazon.com>
+Reviewed-by: David Woodhouse <dwmw@amazon.co.uk>
+Link: https://lore.kernel.org/r/20250826143816.38686-1-eugkoira@amazon.com
+Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
+Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/iommu/intel/iommu.c |    7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/drivers/iommu/intel/iommu.c
++++ b/drivers/iommu/intel/iommu.c
+@@ -1592,6 +1592,10 @@ static void switch_to_super_page(struct
+       unsigned long lvl_pages = lvl_to_nr_pages(level);
+       struct dma_pte *pte = NULL;
+ 
++      if (WARN_ON(!IS_ALIGNED(start_pfn, lvl_pages) ||
++                  !IS_ALIGNED(end_pfn + 1, lvl_pages)))
++              return;
++
+       while (start_pfn <= end_pfn) {
+               if (!pte)
+                       pte = pfn_to_dma_pte(domain, start_pfn, &level,
+@@ -1667,7 +1671,8 @@ __domain_mapping(struct dmar_domain *dom
+                               unsigned long pages_to_remove;
+ 
+                               pteval |= DMA_PTE_LARGE_PAGE;
+-                              pages_to_remove = min_t(unsigned long, nr_pages,
++                              pages_to_remove = min_t(unsigned long,
++                                                      round_down(nr_pages, lvl_pages),
+                                                       nr_pte_to_next_page(pte) * lvl_pages);
+                               end_pfn = iov_pfn + pages_to_remove - 1;
+                               switch_to_super_page(domain, iov_pfn, end_pfn, largepage_lvl);
diff --git a/queue-6.16/ksmbd-smbdirect-validate-data_offset-and-data_length-field-of-smb_direct_data_transfer.patch b/queue-6.16/ksmbd-smbdirect-validate-data_offset-and-data_length-field-of-smb_direct_data_transfer.patch

new file mode 100644 (file)

index 0000000..5a76de1
--- /dev/null
+++ b/queue-6.16/ksmbd-smbdirect-validate-data_offset-and-data_length-field-of-smb_direct_data_transfer.patch
@@ -0,0 +1,58 @@
+From 5282491fc49d5614ac6ddcd012e5743eecb6a67c Mon Sep 17 00:00:00 2001
+From: Namjae Jeon <linkinjeon@kernel.org>
+Date: Wed, 10 Sep 2025 11:22:52 +0900
+Subject: ksmbd: smbdirect: validate data_offset and data_length field of smb_direct_data_transfer
+
+From: Namjae Jeon <linkinjeon@kernel.org>
+
+commit 5282491fc49d5614ac6ddcd012e5743eecb6a67c upstream.
+
+If data_offset and data_length of smb_direct_data_transfer struct are
+invalid, out of bounds issue could happen.
+This patch validate data_offset and data_length field in recv_done.
+
+Cc: stable@vger.kernel.org
+Fixes: 2ea086e35c3d ("ksmbd: add buffer validation for smb direct")
+Reviewed-by: Stefan Metzmacher <metze@samba.org>
+Reported-by: Luigino Camastra, Aisle Research <luigino.camastra@aisle.com>
+Signed-off-by: Namjae Jeon <linkinjeon@kernel.org>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/smb/server/transport_rdma.c |   17 +++++++++--------
+ 1 file changed, 9 insertions(+), 8 deletions(-)
+
+--- a/fs/smb/server/transport_rdma.c
++++ b/fs/smb/server/transport_rdma.c
+@@ -554,7 +554,7 @@ static void recv_done(struct ib_cq *cq,
+       case SMB_DIRECT_MSG_DATA_TRANSFER: {
+               struct smb_direct_data_transfer *data_transfer =
+                       (struct smb_direct_data_transfer *)recvmsg->packet;
+-              unsigned int data_length;
++              unsigned int data_offset, data_length;
+               int avail_recvmsg_count, receive_credits;
+ 
+               if (wc->byte_len <
+@@ -565,14 +565,15 @@ static void recv_done(struct ib_cq *cq,
+               }
+ 
+               data_length = le32_to_cpu(data_transfer->data_length);
+-              if (data_length) {
+-                      if (wc->byte_len < sizeof(struct smb_direct_data_transfer) +
+-                          (u64)data_length) {
+-                              put_recvmsg(t, recvmsg);
+-                              smb_direct_disconnect_rdma_connection(t);
+-                              return;
+-                      }
++              data_offset = le32_to_cpu(data_transfer->data_offset);
++              if (wc->byte_len < data_offset ||
++                  wc->byte_len < (u64)data_offset + data_length) {
++                      put_recvmsg(t, recvmsg);
++                      smb_direct_disconnect_rdma_connection(t);
++                      return;
++              }
+ 
++              if (data_length) {
+                       if (t->full_packet_received)
+                               recvmsg->first_segment = true;
+ 
diff --git a/queue-6.16/ksmbd-smbdirect-verify-remaining_data_length-respects-max_fragmented_recv_size.patch b/queue-6.16/ksmbd-smbdirect-verify-remaining_data_length-respects-max_fragmented_recv_size.patch

new file mode 100644 (file)

index 0000000..bfb8fa8
--- /dev/null
+++ b/queue-6.16/ksmbd-smbdirect-verify-remaining_data_length-respects-max_fragmented_recv_size.patch
@@ -0,0 +1,59 @@
+From e1868ba37fd27c6a68e31565402b154beaa65df0 Mon Sep 17 00:00:00 2001
+From: Stefan Metzmacher <metze@samba.org>
+Date: Thu, 11 Sep 2025 10:05:23 +0900
+Subject: ksmbd: smbdirect: verify remaining_data_length respects max_fragmented_recv_size
+
+From: Stefan Metzmacher <metze@samba.org>
+
+commit e1868ba37fd27c6a68e31565402b154beaa65df0 upstream.
+
+This is inspired by the check for data_offset + data_length.
+
+Cc: Steve French <smfrench@gmail.com>
+Cc: Tom Talpey <tom@talpey.com>
+Cc: linux-cifs@vger.kernel.org
+Cc: samba-technical@lists.samba.org
+Cc: stable@vger.kernel.org
+Fixes: 2ea086e35c3d ("ksmbd: add buffer validation for smb direct")
+Acked-by: Namjae Jeon <linkinjeon@kernel.org>
+Signed-off-by: Stefan Metzmacher <metze@samba.org>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/smb/server/transport_rdma.c |   11 ++++++++++-
+ 1 file changed, 10 insertions(+), 1 deletion(-)
+
+--- a/fs/smb/server/transport_rdma.c
++++ b/fs/smb/server/transport_rdma.c
+@@ -554,7 +554,7 @@ static void recv_done(struct ib_cq *cq,
+       case SMB_DIRECT_MSG_DATA_TRANSFER: {
+               struct smb_direct_data_transfer *data_transfer =
+                       (struct smb_direct_data_transfer *)recvmsg->packet;
+-              unsigned int data_offset, data_length;
++              u32 remaining_data_length, data_offset, data_length;
+               int avail_recvmsg_count, receive_credits;
+ 
+               if (wc->byte_len <
+@@ -564,6 +564,7 @@ static void recv_done(struct ib_cq *cq,
+                       return;
+               }
+ 
++              remaining_data_length = le32_to_cpu(data_transfer->remaining_data_length);
+               data_length = le32_to_cpu(data_transfer->data_length);
+               data_offset = le32_to_cpu(data_transfer->data_offset);
+               if (wc->byte_len < data_offset ||
+@@ -571,6 +572,14 @@ static void recv_done(struct ib_cq *cq,
+                       put_recvmsg(t, recvmsg);
+                       smb_direct_disconnect_rdma_connection(t);
+                       return;
++              }
++              if (remaining_data_length > t->max_fragmented_recv_size ||
++                  data_length > t->max_fragmented_recv_size ||
++                  (u64)remaining_data_length + (u64)data_length >
++                  (u64)t->max_fragmented_recv_size) {
++                      put_recvmsg(t, recvmsg);
++                      smb_direct_disconnect_rdma_connection(t);
++                      return;
+               }
+ 
+               if (data_length) {
diff --git a/queue-6.16/kvm-svm-sync-tpr-from-lapic-into-vmcb-v_tpr-even-if-avic-is-active.patch b/queue-6.16/kvm-svm-sync-tpr-from-lapic-into-vmcb-v_tpr-even-if-avic-is-active.patch

new file mode 100644 (file)

index 0000000..e7af783
--- /dev/null
+++ b/queue-6.16/kvm-svm-sync-tpr-from-lapic-into-vmcb-v_tpr-even-if-avic-is-active.patch
@@ -0,0 +1,56 @@
+From d02e48830e3fce9701265f6c5a58d9bdaf906a76 Mon Sep 17 00:00:00 2001
+From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
+Date: Mon, 25 Aug 2025 18:44:28 +0200
+Subject: KVM: SVM: Sync TPR from LAPIC into VMCB::V_TPR even if AVIC is active
+
+From: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
+
+commit d02e48830e3fce9701265f6c5a58d9bdaf906a76 upstream.
+
+Commit 3bbf3565f48c ("svm: Do not intercept CR8 when enable AVIC")
+inhibited pre-VMRUN sync of TPR from LAPIC into VMCB::V_TPR in
+sync_lapic_to_cr8() when AVIC is active.
+
+AVIC does automatically sync between these two fields, however it does
+so only on explicit guest writes to one of these fields, not on a bare
+VMRUN.
+
+This meant that when AVIC is enabled host changes to TPR in the LAPIC
+state might not get automatically copied into the V_TPR field of VMCB.
+
+This is especially true when it is the userspace setting LAPIC state via
+KVM_SET_LAPIC ioctl() since userspace does not have access to the guest
+VMCB.
+
+Practice shows that it is the V_TPR that is actually used by the AVIC to
+decide whether to issue pending interrupts to the CPU (not TPR in TASKPRI),
+so any leftover value in V_TPR will cause serious interrupt delivery issues
+in the guest when AVIC is enabled.
+
+Fix this issue by doing pre-VMRUN TPR sync from LAPIC into VMCB::V_TPR
+even when AVIC is enabled.
+
+Fixes: 3bbf3565f48c ("svm: Do not intercept CR8 when enable AVIC")
+Cc: stable@vger.kernel.org
+Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
+Reviewed-by: Naveen N Rao (AMD) <naveen@kernel.org>
+Link: https://lore.kernel.org/r/c231be64280b1461e854e1ce3595d70cde3a2e9d.1756139678.git.maciej.szmigiero@oracle.com
+[sean: tag for stable@]
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/svm.c |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -4204,8 +4204,7 @@ static inline void sync_lapic_to_cr8(str
+       struct vcpu_svm *svm = to_svm(vcpu);
+       u64 cr8;
+ 
+-      if (nested_svm_virtualize_tpr(vcpu) ||
+-          kvm_vcpu_apicv_active(vcpu))
++      if (nested_svm_virtualize_tpr(vcpu))
+               return;
+ 
+       cr8 = kvm_get_cr8(vcpu);
diff --git a/queue-6.16/loongarch-align-acpi-structures-if-arch_strict_align-enabled.patch b/queue-6.16/loongarch-align-acpi-structures-if-arch_strict_align-enabled.patch

new file mode 100644 (file)

index 0000000..a68db66
--- /dev/null
+++ b/queue-6.16/loongarch-align-acpi-structures-if-arch_strict_align-enabled.patch
@@ -0,0 +1,41 @@
+From a9d13433fe17be0e867e51e71a1acd2731fbef8d Mon Sep 17 00:00:00 2001
+From: Huacai Chen <chenhuacai@loongson.cn>
+Date: Thu, 18 Sep 2025 19:44:01 +0800
+Subject: LoongArch: Align ACPI structures if ARCH_STRICT_ALIGN enabled
+
+From: Huacai Chen <chenhuacai@loongson.cn>
+
+commit a9d13433fe17be0e867e51e71a1acd2731fbef8d upstream.
+
+ARCH_STRICT_ALIGN is used for hardware without UAL, now it only control
+the -mstrict-align flag. However, ACPI structures are packed by default
+so will cause unaligned accesses.
+
+To avoid this, define ACPI_MISALIGNMENT_NOT_SUPPORTED in asm/acenv.h to
+align ACPI structures if ARCH_STRICT_ALIGN enabled.
+
+Cc: stable@vger.kernel.org
+Reported-by: Binbin Zhou <zhoubinbin@loongson.cn>
+Suggested-by: Xi Ruoyao <xry111@xry111.site>
+Suggested-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
+Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/loongarch/include/asm/acenv.h |    7 +++----
+ 1 file changed, 3 insertions(+), 4 deletions(-)
+
+--- a/arch/loongarch/include/asm/acenv.h
++++ b/arch/loongarch/include/asm/acenv.h
+@@ -10,9 +10,8 @@
+ #ifndef _ASM_LOONGARCH_ACENV_H
+ #define _ASM_LOONGARCH_ACENV_H
+ 
+-/*
+- * This header is required by ACPI core, but we have nothing to fill in
+- * right now. Will be updated later when needed.
+- */
++#ifdef CONFIG_ARCH_STRICT_ALIGN
++#define ACPI_MISALIGNMENT_NOT_SUPPORTED
++#endif /* CONFIG_ARCH_STRICT_ALIGN */
+ 
+ #endif /* _ASM_LOONGARCH_ACENV_H */
diff --git a/queue-6.16/loongarch-check-the-return-value-when-creating-kobj.patch b/queue-6.16/loongarch-check-the-return-value-when-creating-kobj.patch

new file mode 100644 (file)

index 0000000..fb03df3
--- /dev/null
+++ b/queue-6.16/loongarch-check-the-return-value-when-creating-kobj.patch
@@ -0,0 +1,31 @@
+From 51adb03e6b865c0c6790f29659ff52d56742de2e Mon Sep 17 00:00:00 2001
+From: Tao Cui <cuitao@kylinos.cn>
+Date: Thu, 18 Sep 2025 19:44:04 +0800
+Subject: LoongArch: Check the return value when creating kobj
+
+From: Tao Cui <cuitao@kylinos.cn>
+
+commit 51adb03e6b865c0c6790f29659ff52d56742de2e upstream.
+
+Add a check for the return value of kobject_create_and_add(), to ensure
+that the kobj allocation succeeds for later use.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Tao Cui <cuitao@kylinos.cn>
+Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/loongarch/kernel/env.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/arch/loongarch/kernel/env.c
++++ b/arch/loongarch/kernel/env.c
+@@ -109,6 +109,8 @@ static int __init boardinfo_init(void)
+       struct kobject *loongson_kobj;
+ 
+       loongson_kobj = kobject_create_and_add("loongson", firmware_kobj);
++      if (!loongson_kobj)
++              return -ENOMEM;
+ 
+       return sysfs_create_file(loongson_kobj, &boardinfo_attr.attr);
+ }
diff --git a/queue-6.16/loongarch-fix-unreliable-stack-for-live-patching.patch b/queue-6.16/loongarch-fix-unreliable-stack-for-live-patching.patch

new file mode 100644 (file)

index 0000000..c865aac
--- /dev/null
+++ b/queue-6.16/loongarch-fix-unreliable-stack-for-live-patching.patch
@@ -0,0 +1,93 @@
+From 677d4a52d4dc4a147d5e84af9ff207832578be70 Mon Sep 17 00:00:00 2001
+From: Tiezhu Yang <yangtiezhu@loongson.cn>
+Date: Thu, 18 Sep 2025 19:44:08 +0800
+Subject: LoongArch: Fix unreliable stack for live patching
+
+From: Tiezhu Yang <yangtiezhu@loongson.cn>
+
+commit 677d4a52d4dc4a147d5e84af9ff207832578be70 upstream.
+
+When testing the kernel live patching with "modprobe livepatch-sample",
+there is a timeout over 15 seconds from "starting patching transition"
+to "patching complete". The dmesg command shows "unreliable stack" for
+user tasks in debug mode, here is one of the messages:
+
+  livepatch: klp_try_switch_task: bash:1193 has an unreliable stack
+
+The "unreliable stack" is because it can not unwind from do_syscall()
+to its previous frame handle_syscall(). It should use fp to find the
+original stack top due to secondary stack in do_syscall(), but fp is
+not used for some other functions, then fp can not be restored by the
+next frame of do_syscall(), so it is necessary to save fp if task is
+not current, in order to get the stack top of do_syscall().
+
+Here are the call chains:
+
+  klp_enable_patch()
+    klp_try_complete_transition()
+      klp_try_switch_task()
+        klp_check_and_switch_task()
+          klp_check_stack()
+            stack_trace_save_tsk_reliable()
+              arch_stack_walk_reliable()
+
+When executing "rmmod livepatch-sample", there exists a similar issue.
+With this patch, it takes a short time for patching and unpatching.
+
+Before:
+
+  # modprobe livepatch-sample
+  # dmesg -T | tail -3
+  [Sat Sep  6 11:00:20 2025] livepatch: 'livepatch_sample': starting patching transition
+  [Sat Sep  6 11:00:35 2025] livepatch: signaling remaining tasks
+  [Sat Sep  6 11:00:36 2025] livepatch: 'livepatch_sample': patching complete
+
+  # echo 0 > /sys/kernel/livepatch/livepatch_sample/enabled
+  # rmmod livepatch_sample
+  rmmod: ERROR: Module livepatch_sample is in use
+  # rmmod livepatch_sample
+  # dmesg -T | tail -3
+  [Sat Sep  6 11:06:05 2025] livepatch: 'livepatch_sample': starting unpatching transition
+  [Sat Sep  6 11:06:20 2025] livepatch: signaling remaining tasks
+  [Sat Sep  6 11:06:21 2025] livepatch: 'livepatch_sample': unpatching complete
+
+After:
+
+  # modprobe livepatch-sample
+  # dmesg -T | tail -2
+  [Tue Sep 16 16:19:30 2025] livepatch: 'livepatch_sample': starting patching transition
+  [Tue Sep 16 16:19:31 2025] livepatch: 'livepatch_sample': patching complete
+
+  # echo 0 > /sys/kernel/livepatch/livepatch_sample/enabled
+  # rmmod livepatch_sample
+  # dmesg -T | tail -2
+  [Tue Sep 16 16:19:36 2025] livepatch: 'livepatch_sample': starting unpatching transition
+  [Tue Sep 16 16:19:37 2025] livepatch: 'livepatch_sample': unpatching complete
+
+Cc: stable@vger.kernel.org # v6.9+
+Fixes: 199cc14cb4f1 ("LoongArch: Add kernel livepatching support")
+Reported-by: Xi Zhang <zhangxi@kylinos.cn>
+Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
+Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/loongarch/kernel/stacktrace.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/arch/loongarch/kernel/stacktrace.c
++++ b/arch/loongarch/kernel/stacktrace.c
+@@ -51,12 +51,13 @@ int arch_stack_walk_reliable(stack_trace
+       if (task == current) {
+               regs->regs[3] = (unsigned long)__builtin_frame_address(0);
+               regs->csr_era = (unsigned long)__builtin_return_address(0);
++              regs->regs[22] = 0;
+       } else {
+               regs->regs[3] = thread_saved_fp(task);
+               regs->csr_era = thread_saved_ra(task);
++              regs->regs[22] = task->thread.reg22;
+       }
+       regs->regs[1] = 0;
+-      regs->regs[22] = 0;
+ 
+       for (unwind_start(&state, task, regs);
+            !unwind_done(&state) && !unwind_error(&state); unwind_next_frame(&state)) {
diff --git a/queue-6.16/loongarch-handle-jump-tables-options-for-rust.patch b/queue-6.16/loongarch-handle-jump-tables-options-for-rust.patch

new file mode 100644 (file)

index 0000000..fce84e7
--- /dev/null
+++ b/queue-6.16/loongarch-handle-jump-tables-options-for-rust.patch
@@ -0,0 +1,79 @@
+From 74f8295c6fb8436bec9995baf6ba463151b6fb68 Mon Sep 17 00:00:00 2001
+From: Tiezhu Yang <yangtiezhu@loongson.cn>
+Date: Thu, 18 Sep 2025 19:43:42 +0800
+Subject: LoongArch: Handle jump tables options for RUST
+
+From: Tiezhu Yang <yangtiezhu@loongson.cn>
+
+commit 74f8295c6fb8436bec9995baf6ba463151b6fb68 upstream.
+
+When compiling with LLVM and CONFIG_RUST is set, there exist objtool
+warnings in rust/core.o and rust/kernel.o, like this:
+
+    rust/core.o: warning: objtool:
+_RNvXs1_NtNtCs5QSdWC790r4_4core5ascii10ascii_charNtB5_9AsciiCharNtNtB9_3fmt5Debug3fmt+0x54:
+sibling call from callable instruction with modified stack frame
+
+For this special case, the related object file shows that there is no
+generated relocation section '.rela.discard.tablejump_annotate' for the
+table jump instruction jirl, thus objtool can not know that what is the
+actual destination address.
+
+If rustc has the option "-Cllvm-args=--loongarch-annotate-tablejump",
+pass the option to enable jump tables for objtool, otherwise it should
+pass "-Zno-jump-tables" to keep compatibility with older rustc.
+
+How to test:
+
+  $ rustup component add rust-src
+  $ make LLVM=1 rustavailable
+  $ make ARCH=loongarch LLVM=1 clean defconfig
+  $ scripts/config -d MODVERSIONS \
+    -e RUST -e SAMPLES -e SAMPLES_RUST \
+    -e SAMPLE_RUST_CONFIGFS -e SAMPLE_RUST_MINIMAL \
+    -e SAMPLE_RUST_MISC_DEVICE -e SAMPLE_RUST_PRINT \
+    -e SAMPLE_RUST_DMA -e SAMPLE_RUST_DRIVER_PCI \
+    -e SAMPLE_RUST_DRIVER_PLATFORM -e SAMPLE_RUST_DRIVER_FAUX \
+    -e SAMPLE_RUST_DRIVER_AUXILIARY -e SAMPLE_RUST_HOSTPROGS
+  $ make ARCH=loongarch LLVM=1 olddefconfig all
+
+Cc: stable@vger.kernel.org
+Acked-by: Miguel Ojeda <ojeda@kernel.org>
+Reported-by: Miguel Ojeda <ojeda@kernel.org>
+Closes: https://lore.kernel.org/rust-for-linux/CANiq72mNeCuPkCDrG2db3w=AX+O-zYrfprisDPmRac_qh65Dmg@mail.gmail.com/
+Suggested-by: WANG Rui <wangrui@loongson.cn>
+Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
+Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/loongarch/Kconfig  |    4 ++++
+ arch/loongarch/Makefile |    5 +++++
+ 2 files changed, 9 insertions(+)
+
+--- a/arch/loongarch/Kconfig
++++ b/arch/loongarch/Kconfig
+@@ -301,6 +301,10 @@ config AS_HAS_LVZ_EXTENSION
+ config CC_HAS_ANNOTATE_TABLEJUMP
+       def_bool $(cc-option,-mannotate-tablejump)
+ 
++config RUSTC_HAS_ANNOTATE_TABLEJUMP
++      depends on RUST
++      def_bool $(rustc-option,-Cllvm-args=--loongarch-annotate-tablejump)
++
+ menu "Kernel type and options"
+ 
+ source "kernel/Kconfig.hz"
+--- a/arch/loongarch/Makefile
++++ b/arch/loongarch/Makefile
+@@ -106,6 +106,11 @@ KBUILD_CFLAGS                     += -mannotate-tablejump
+ else
+ KBUILD_CFLAGS                 += -fno-jump-tables # keep compatibility with older compilers
+ endif
++ifdef CONFIG_RUSTC_HAS_ANNOTATE_TABLEJUMP
++KBUILD_RUSTFLAGS              += -Cllvm-args=--loongarch-annotate-tablejump
++else
++KBUILD_RUSTFLAGS              += -Zno-jump-tables # keep compatibility with older compilers
++endif
+ ifdef CONFIG_LTO_CLANG
+ # The annotate-tablejump option can not be passed to LLVM backend when LTO is enabled.
+ # Ensure it is aware of linker with LTO, '--loongarch-annotate-tablejump' also needs to
diff --git a/queue-6.16/loongarch-kvm-avoid-copy_-_user-with-lock-hold-in-kvm_eiointc_ctrl_access.patch b/queue-6.16/loongarch-kvm-avoid-copy_-_user-with-lock-hold-in-kvm_eiointc_ctrl_access.patch

new file mode 100644 (file)

index 0000000..0369690
--- /dev/null
+++ b/queue-6.16/loongarch-kvm-avoid-copy_-_user-with-lock-hold-in-kvm_eiointc_ctrl_access.patch
@@ -0,0 +1,97 @@
+From 47256c4c8b1bfbc63223a0da2d4fa90b6ede5cbb Mon Sep 17 00:00:00 2001
+From: Bibo Mao <maobibo@loongson.cn>
+Date: Thu, 18 Sep 2025 19:44:22 +0800
+Subject: LoongArch: KVM: Avoid copy_*_user() with lock hold in kvm_eiointc_ctrl_access()
+
+From: Bibo Mao <maobibo@loongson.cn>
+
+commit 47256c4c8b1bfbc63223a0da2d4fa90b6ede5cbb upstream.
+
+Function copy_from_user() and copy_to_user() may sleep because of page
+fault, and they cannot be called in spin_lock hold context. Here move
+function calling of copy_from_user() and copy_to_user() before spinlock
+context in function kvm_eiointc_ctrl_access().
+
+Otherwise there will be possible warning such as:
+
+BUG: sleeping function called from invalid context at include/linux/uaccess.h:192
+in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 6292, name: qemu-system-loo
+preempt_count: 1, expected: 0
+RCU nest depth: 0, expected: 0
+INFO: lockdep is turned off.
+irq event stamp: 0
+hardirqs last  enabled at (0): [<0000000000000000>] 0x0
+hardirqs last disabled at (0): [<9000000004c4a554>] copy_process+0x90c/0x1d40
+softirqs last  enabled at (0): [<9000000004c4a554>] copy_process+0x90c/0x1d40
+softirqs last disabled at (0): [<0000000000000000>] 0x0
+CPU: 41 UID: 0 PID: 6292 Comm: qemu-system-loo Tainted: G W 6.17.0-rc3+ #31 PREEMPT(full)
+Tainted: [W]=WARN
+Stack : 0000000000000076 0000000000000000 9000000004c28264 9000100092ff4000
+        9000100092ff7b80 9000100092ff7b88 0000000000000000 9000100092ff7cc8
+        9000100092ff7cc0 9000100092ff7cc0 9000100092ff7a00 0000000000000001
+        0000000000000001 9000100092ff7b88 947d2f9216a5e8b9 900010008773d880
+        00000000ffff8b9f fffffffffffffffe 0000000000000ba1 fffffffffffffffe
+        000000000000003e 900000000825a15b 000010007ad38000 9000100092ff7ec0
+        0000000000000000 0000000000000000 9000000006f3ac60 9000000007252000
+        0000000000000000 00007ff746ff2230 0000000000000053 9000200088a021b0
+        0000555556c9d190 0000000000000000 9000000004c2827c 000055556cfb5f40
+        00000000000000b0 0000000000000007 0000000000000007 0000000000071c1d
+Call Trace:
+[<9000000004c2827c>] show_stack+0x5c/0x180
+[<9000000004c20fac>] dump_stack_lvl+0x94/0xe4
+[<9000000004c99c7c>] __might_resched+0x26c/0x290
+[<9000000004f68968>] __might_fault+0x20/0x88
+[<ffff800002311de0>] kvm_eiointc_ctrl_access.isra.0+0x88/0x380 [kvm]
+[<ffff8000022f8514>] kvm_device_ioctl+0x194/0x290 [kvm]
+[<900000000506b0d8>] sys_ioctl+0x388/0x1010
+[<90000000063ed210>] do_syscall+0xb0/0x2d8
+[<9000000004c25ef8>] handle_syscall+0xb8/0x158
+
+Cc: stable@vger.kernel.org
+Fixes: 1ad7efa552fd5 ("LoongArch: KVM: Add EIOINTC user mode read and write functions")
+Signed-off-by: Bibo Mao <maobibo@loongson.cn>
+Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/loongarch/kvm/intc/eiointc.c |   25 +++++++++++++++----------
+ 1 file changed, 15 insertions(+), 10 deletions(-)
+
+--- a/arch/loongarch/kvm/intc/eiointc.c
++++ b/arch/loongarch/kvm/intc/eiointc.c
+@@ -810,21 +810,26 @@ static int kvm_eiointc_ctrl_access(struc
+       struct loongarch_eiointc *s = dev->kvm->arch.eiointc;
+ 
+       data = (void __user *)attr->addr;
+-      spin_lock_irqsave(&s->lock, flags);
+       switch (type) {
+       case KVM_DEV_LOONGARCH_EXTIOI_CTRL_INIT_NUM_CPU:
++      case KVM_DEV_LOONGARCH_EXTIOI_CTRL_INIT_FEATURE:
+               if (copy_from_user(&val, data, 4))
+-                      ret = -EFAULT;
+-              else {
+-                      if (val >= EIOINTC_ROUTE_MAX_VCPUS)
+-                              ret = -EINVAL;
+-                      else
+-                              s->num_cpu = val;
+-              }
++                      return -EFAULT;
++              break;
++      default:
++              break;
++      }
++
++      spin_lock_irqsave(&s->lock, flags);
++      switch (type) {
++      case KVM_DEV_LOONGARCH_EXTIOI_CTRL_INIT_NUM_CPU:
++              if (val >= EIOINTC_ROUTE_MAX_VCPUS)
++                      ret = -EINVAL;
++              else
++                      s->num_cpu = val;
+               break;
+       case KVM_DEV_LOONGARCH_EXTIOI_CTRL_INIT_FEATURE:
+-              if (copy_from_user(&s->features, data, 4))
+-                      ret = -EFAULT;
++              s->features = val;
+               if (!(s->features & BIT(EIOINTC_HAS_VIRT_EXTENSION)))
+                       s->status |= BIT(EIOINTC_ENABLE);
+               break;
diff --git a/queue-6.16/loongarch-kvm-avoid-copy_-_user-with-lock-hold-in-kvm_eiointc_regs_access.patch b/queue-6.16/loongarch-kvm-avoid-copy_-_user-with-lock-hold-in-kvm_eiointc_regs_access.patch

new file mode 100644 (file)

index 0000000..cf490ab
--- /dev/null
+++ b/queue-6.16/loongarch-kvm-avoid-copy_-_user-with-lock-hold-in-kvm_eiointc_regs_access.patch
@@ -0,0 +1,137 @@
+From 62f11796a0dfa1a2ef5f50a2d1bc81c81628fb8e Mon Sep 17 00:00:00 2001
+From: Bibo Mao <maobibo@loongson.cn>
+Date: Thu, 18 Sep 2025 19:44:22 +0800
+Subject: LoongArch: KVM: Avoid copy_*_user() with lock hold in kvm_eiointc_regs_access()
+
+From: Bibo Mao <maobibo@loongson.cn>
+
+commit 62f11796a0dfa1a2ef5f50a2d1bc81c81628fb8e upstream.
+
+Function copy_from_user() and copy_to_user() may sleep because of page
+fault, and they cannot be called in spin_lock hold context. Here move
+function calling of copy_from_user() and copy_to_user() before spinlock
+context in function kvm_eiointc_ctrl_access().
+
+Otherwise there will be possible warning such as:
+
+BUG: sleeping function called from invalid context at include/linux/uaccess.h:192
+in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 6292, name: qemu-system-loo
+preempt_count: 1, expected: 0
+RCU nest depth: 0, expected: 0
+INFO: lockdep is turned off.
+irq event stamp: 0
+hardirqs last  enabled at (0): [<0000000000000000>] 0x0
+hardirqs last disabled at (0): [<9000000004c4a554>] copy_process+0x90c/0x1d40
+softirqs last  enabled at (0): [<9000000004c4a554>] copy_process+0x90c/0x1d40
+softirqs last disabled at (0): [<0000000000000000>] 0x0
+CPU: 41 UID: 0 PID: 6292 Comm: qemu-system-loo Tainted: G W 6.17.0-rc3+ #31 PREEMPT(full)
+Tainted: [W]=WARN
+Stack : 0000000000000076 0000000000000000 9000000004c28264 9000100092ff4000
+        9000100092ff7b80 9000100092ff7b88 0000000000000000 9000100092ff7cc8
+        9000100092ff7cc0 9000100092ff7cc0 9000100092ff7a00 0000000000000001
+        0000000000000001 9000100092ff7b88 947d2f9216a5e8b9 900010008773d880
+        00000000ffff8b9f fffffffffffffffe 0000000000000ba1 fffffffffffffffe
+        000000000000003e 900000000825a15b 000010007ad38000 9000100092ff7ec0
+        0000000000000000 0000000000000000 9000000006f3ac60 9000000007252000
+        0000000000000000 00007ff746ff2230 0000000000000053 9000200088a021b0
+        0000555556c9d190 0000000000000000 9000000004c2827c 000055556cfb5f40
+        00000000000000b0 0000000000000007 0000000000000007 0000000000071c1d
+Call Trace:
+[<9000000004c2827c>] show_stack+0x5c/0x180
+[<9000000004c20fac>] dump_stack_lvl+0x94/0xe4
+[<9000000004c99c7c>] __might_resched+0x26c/0x290
+[<9000000004f68968>] __might_fault+0x20/0x88
+[<ffff800002311de0>] kvm_eiointc_regs_access.isra.0+0x88/0x380 [kvm]
+[<ffff8000022f8514>] kvm_device_ioctl+0x194/0x290 [kvm]
+[<900000000506b0d8>] sys_ioctl+0x388/0x1010
+[<90000000063ed210>] do_syscall+0xb0/0x2d8
+[<9000000004c25ef8>] handle_syscall+0xb8/0x158
+
+Cc: stable@vger.kernel.org
+Fixes: 1ad7efa552fd5 ("LoongArch: KVM: Add EIOINTC user mode read and write functions")
+Signed-off-by: Bibo Mao <maobibo@loongson.cn>
+Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/loongarch/kvm/intc/eiointc.c |   33 +++++++++++++++++++++------------
+ 1 file changed, 21 insertions(+), 12 deletions(-)
+
+--- a/arch/loongarch/kvm/intc/eiointc.c
++++ b/arch/loongarch/kvm/intc/eiointc.c
+@@ -851,19 +851,17 @@ static int kvm_eiointc_ctrl_access(struc
+ 
+ static int kvm_eiointc_regs_access(struct kvm_device *dev,
+                                       struct kvm_device_attr *attr,
+-                                      bool is_write)
++                                      bool is_write, int *data)
+ {
+       int addr, cpu, offset, ret = 0;
+       unsigned long flags;
+       void *p = NULL;
+-      void __user *data;
+       struct loongarch_eiointc *s;
+ 
+       s = dev->kvm->arch.eiointc;
+       addr = attr->attr;
+       cpu = addr >> 16;
+       addr &= 0xffff;
+-      data = (void __user *)attr->addr;
+       switch (addr) {
+       case EIOINTC_NODETYPE_START ... EIOINTC_NODETYPE_END:
+               offset = (addr - EIOINTC_NODETYPE_START) / 4;
+@@ -902,13 +900,10 @@ static int kvm_eiointc_regs_access(struc
+       }
+ 
+       spin_lock_irqsave(&s->lock, flags);
+-      if (is_write) {
+-              if (copy_from_user(p, data, 4))
+-                      ret = -EFAULT;
+-      } else {
+-              if (copy_to_user(data, p, 4))
+-                      ret = -EFAULT;
+-      }
++      if (is_write)
++              memcpy(p, data, 4);
++      else
++              memcpy(data, p, 4);
+       spin_unlock_irqrestore(&s->lock, flags);
+ 
+       return ret;
+@@ -965,9 +960,18 @@ static int kvm_eiointc_sw_status_access(
+ static int kvm_eiointc_get_attr(struct kvm_device *dev,
+                               struct kvm_device_attr *attr)
+ {
++      int ret, data;
++
+       switch (attr->group) {
+       case KVM_DEV_LOONGARCH_EXTIOI_GRP_REGS:
+-              return kvm_eiointc_regs_access(dev, attr, false);
++              ret = kvm_eiointc_regs_access(dev, attr, false, &data);
++              if (ret)
++                      return ret;
++
++              if (copy_to_user((void __user *)attr->addr, &data, 4))
++                      ret = -EFAULT;
++
++              return ret;
+       case KVM_DEV_LOONGARCH_EXTIOI_GRP_SW_STATUS:
+               return kvm_eiointc_sw_status_access(dev, attr, false);
+       default:
+@@ -978,11 +982,16 @@ static int kvm_eiointc_get_attr(struct k
+ static int kvm_eiointc_set_attr(struct kvm_device *dev,
+                               struct kvm_device_attr *attr)
+ {
++      int data;
++
+       switch (attr->group) {
+       case KVM_DEV_LOONGARCH_EXTIOI_GRP_CTRL:
+               return kvm_eiointc_ctrl_access(dev, attr);
+       case KVM_DEV_LOONGARCH_EXTIOI_GRP_REGS:
+-              return kvm_eiointc_regs_access(dev, attr, true);
++              if (copy_from_user(&data, (void __user *)attr->addr, 4))
++                      return -EFAULT;
++
++              return kvm_eiointc_regs_access(dev, attr, true, &data);
+       case KVM_DEV_LOONGARCH_EXTIOI_GRP_SW_STATUS:
+               return kvm_eiointc_sw_status_access(dev, attr, true);
+       default:
diff --git a/queue-6.16/loongarch-kvm-avoid-copy_-_user-with-lock-hold-in-kvm_eiointc_sw_status_access.patch b/queue-6.16/loongarch-kvm-avoid-copy_-_user-with-lock-hold-in-kvm_eiointc_sw_status_access.patch

new file mode 100644 (file)

index 0000000..fceca8d
--- /dev/null
+++ b/queue-6.16/loongarch-kvm-avoid-copy_-_user-with-lock-hold-in-kvm_eiointc_sw_status_access.patch
@@ -0,0 +1,127 @@
+From 01a8e68396a6d51f5ba92021ad1a4b8eaabdd0e7 Mon Sep 17 00:00:00 2001
+From: Bibo Mao <maobibo@loongson.cn>
+Date: Thu, 18 Sep 2025 19:44:22 +0800
+Subject: LoongArch: KVM: Avoid copy_*_user() with lock hold in kvm_eiointc_sw_status_access()
+
+From: Bibo Mao <maobibo@loongson.cn>
+
+commit 01a8e68396a6d51f5ba92021ad1a4b8eaabdd0e7 upstream.
+
+Function copy_from_user() and copy_to_user() may sleep because of page
+fault, and they cannot be called in spin_lock hold context. Here move
+funtcion calling of copy_from_user() and copy_to_user() out of function
+kvm_eiointc_sw_status_access().
+
+Otherwise there will be possible warning such as:
+
+BUG: sleeping function called from invalid context at include/linux/uaccess.h:192
+in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 6292, name: qemu-system-loo
+preempt_count: 1, expected: 0
+RCU nest depth: 0, expected: 0
+INFO: lockdep is turned off.
+irq event stamp: 0
+hardirqs last  enabled at (0): [<0000000000000000>] 0x0
+hardirqs last disabled at (0): [<9000000004c4a554>] copy_process+0x90c/0x1d40
+softirqs last  enabled at (0): [<9000000004c4a554>] copy_process+0x90c/0x1d40
+softirqs last disabled at (0): [<0000000000000000>] 0x0
+CPU: 41 UID: 0 PID: 6292 Comm: qemu-system-loo Tainted: G W 6.17.0-rc3+ #31 PREEMPT(full)
+Tainted: [W]=WARN
+Stack : 0000000000000076 0000000000000000 9000000004c28264 9000100092ff4000
+        9000100092ff7b80 9000100092ff7b88 0000000000000000 9000100092ff7cc8
+        9000100092ff7cc0 9000100092ff7cc0 9000100092ff7a00 0000000000000001
+        0000000000000001 9000100092ff7b88 947d2f9216a5e8b9 900010008773d880
+        00000000ffff8b9f fffffffffffffffe 0000000000000ba1 fffffffffffffffe
+        000000000000003e 900000000825a15b 000010007ad38000 9000100092ff7ec0
+        0000000000000000 0000000000000000 9000000006f3ac60 9000000007252000
+        0000000000000000 00007ff746ff2230 0000000000000053 9000200088a021b0
+        0000555556c9d190 0000000000000000 9000000004c2827c 000055556cfb5f40
+        00000000000000b0 0000000000000007 0000000000000007 0000000000071c1d
+Call Trace:
+[<9000000004c2827c>] show_stack+0x5c/0x180
+[<9000000004c20fac>] dump_stack_lvl+0x94/0xe4
+[<9000000004c99c7c>] __might_resched+0x26c/0x290
+[<9000000004f68968>] __might_fault+0x20/0x88
+[<ffff800002311de0>] kvm_eiointc_sw_status_access.isra.0+0x88/0x380 [kvm]
+[<ffff8000022f8514>] kvm_device_ioctl+0x194/0x290 [kvm]
+[<900000000506b0d8>] sys_ioctl+0x388/0x1010
+[<90000000063ed210>] do_syscall+0xb0/0x2d8
+[<9000000004c25ef8>] handle_syscall+0xb8/0x158
+
+Cc: stable@vger.kernel.org
+Fixes: 1ad7efa552fd5 ("LoongArch: KVM: Add EIOINTC user mode read and write functions")
+Signed-off-by: Bibo Mao <maobibo@loongson.cn>
+Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/loongarch/kvm/intc/eiointc.c |   29 +++++++++++++++++------------
+ 1 file changed, 17 insertions(+), 12 deletions(-)
+
+--- a/arch/loongarch/kvm/intc/eiointc.c
++++ b/arch/loongarch/kvm/intc/eiointc.c
+@@ -911,19 +911,17 @@ static int kvm_eiointc_regs_access(struc
+ 
+ static int kvm_eiointc_sw_status_access(struct kvm_device *dev,
+                                       struct kvm_device_attr *attr,
+-                                      bool is_write)
++                                      bool is_write, int *data)
+ {
+       int addr, ret = 0;
+       unsigned long flags;
+       void *p = NULL;
+-      void __user *data;
+       struct loongarch_eiointc *s;
+ 
+       s = dev->kvm->arch.eiointc;
+       addr = attr->attr;
+       addr &= 0xffff;
+ 
+-      data = (void __user *)attr->addr;
+       switch (addr) {
+       case KVM_DEV_LOONGARCH_EXTIOI_SW_STATUS_NUM_CPU:
+               if (is_write)
+@@ -945,13 +943,10 @@ static int kvm_eiointc_sw_status_access(
+               return -EINVAL;
+       }
+       spin_lock_irqsave(&s->lock, flags);
+-      if (is_write) {
+-              if (copy_from_user(p, data, 4))
+-                      ret = -EFAULT;
+-      } else {
+-              if (copy_to_user(data, p, 4))
+-                      ret = -EFAULT;
+-      }
++      if (is_write)
++              memcpy(p, data, 4);
++      else
++              memcpy(data, p, 4);
+       spin_unlock_irqrestore(&s->lock, flags);
+ 
+       return ret;
+@@ -973,7 +968,14 @@ static int kvm_eiointc_get_attr(struct k
+ 
+               return ret;
+       case KVM_DEV_LOONGARCH_EXTIOI_GRP_SW_STATUS:
+-              return kvm_eiointc_sw_status_access(dev, attr, false);
++              ret = kvm_eiointc_sw_status_access(dev, attr, false, &data);
++              if (ret)
++                      return ret;
++
++              if (copy_to_user((void __user *)attr->addr, &data, 4))
++                      ret = -EFAULT;
++
++              return ret;
+       default:
+               return -EINVAL;
+       }
+@@ -993,7 +995,10 @@ static int kvm_eiointc_set_attr(struct k
+ 
+               return kvm_eiointc_regs_access(dev, attr, true, &data);
+       case KVM_DEV_LOONGARCH_EXTIOI_GRP_SW_STATUS:
+-              return kvm_eiointc_sw_status_access(dev, attr, true);
++              if (copy_from_user(&data, (void __user *)attr->addr, 4))
++                      return -EFAULT;
++
++              return kvm_eiointc_sw_status_access(dev, attr, true, &data);
+       default:
+               return -EINVAL;
+       }
diff --git a/queue-6.16/loongarch-kvm-avoid-copy_-_user-with-lock-hold-in-kvm_pch_pic_regs_access.patch b/queue-6.16/loongarch-kvm-avoid-copy_-_user-with-lock-hold-in-kvm_pch_pic_regs_access.patch

new file mode 100644 (file)

index 0000000..48cd25c
--- /dev/null
+++ b/queue-6.16/loongarch-kvm-avoid-copy_-_user-with-lock-hold-in-kvm_pch_pic_regs_access.patch
@@ -0,0 +1,99 @@
+From 8dc5245673cf7f33743e5c0d2a4207c0b8df3067 Mon Sep 17 00:00:00 2001
+From: Bibo Mao <maobibo@loongson.cn>
+Date: Thu, 18 Sep 2025 19:44:25 +0800
+Subject: LoongArch: KVM: Avoid copy_*_user() with lock hold in kvm_pch_pic_regs_access()
+
+From: Bibo Mao <maobibo@loongson.cn>
+
+commit 8dc5245673cf7f33743e5c0d2a4207c0b8df3067 upstream.
+
+Function copy_from_user() and copy_to_user() may sleep because of page
+fault, and they cannot be called in spin_lock hold context. Here move
+function calling of copy_from_user() and copy_to_user() out of spinlock
+context in function kvm_pch_pic_regs_access().
+
+Otherwise there will be possible warning such as:
+
+BUG: sleeping function called from invalid context at include/linux/uaccess.h:192
+in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 6292, name: qemu-system-loo
+preempt_count: 1, expected: 0
+RCU nest depth: 0, expected: 0
+INFO: lockdep is turned off.
+irq event stamp: 0
+hardirqs last  enabled at (0): [<0000000000000000>] 0x0
+hardirqs last disabled at (0): [<9000000004c4a554>] copy_process+0x90c/0x1d40
+softirqs last  enabled at (0): [<9000000004c4a554>] copy_process+0x90c/0x1d40
+softirqs last disabled at (0): [<0000000000000000>] 0x0
+CPU: 41 UID: 0 PID: 6292 Comm: qemu-system-loo Tainted: G W 6.17.0-rc3+ #31 PREEMPT(full)
+Tainted: [W]=WARN
+Stack : 0000000000000076 0000000000000000 9000000004c28264 9000100092ff4000
+        9000100092ff7b80 9000100092ff7b88 0000000000000000 9000100092ff7cc8
+        9000100092ff7cc0 9000100092ff7cc0 9000100092ff7a00 0000000000000001
+        0000000000000001 9000100092ff7b88 947d2f9216a5e8b9 900010008773d880
+        00000000ffff8b9f fffffffffffffffe 0000000000000ba1 fffffffffffffffe
+        000000000000003e 900000000825a15b 000010007ad38000 9000100092ff7ec0
+        0000000000000000 0000000000000000 9000000006f3ac60 9000000007252000
+        0000000000000000 00007ff746ff2230 0000000000000053 9000200088a021b0
+        0000555556c9d190 0000000000000000 9000000004c2827c 000055556cfb5f40
+        00000000000000b0 0000000000000007 0000000000000007 0000000000071c1d
+Call Trace:
+[<9000000004c2827c>] show_stack+0x5c/0x180
+[<9000000004c20fac>] dump_stack_lvl+0x94/0xe4
+[<9000000004c99c7c>] __might_resched+0x26c/0x290
+[<9000000004f68968>] __might_fault+0x20/0x88
+[<ffff800002311de0>] kvm_pch_pic_regs_access.isra.0+0x88/0x380 [kvm]
+[<ffff8000022f8514>] kvm_device_ioctl+0x194/0x290 [kvm]
+[<900000000506b0d8>] sys_ioctl+0x388/0x1010
+[<90000000063ed210>] do_syscall+0xb0/0x2d8
+[<9000000004c25ef8>] handle_syscall+0xb8/0x158
+
+Cc: stable@vger.kernel.org
+Fixes: d206d95148732 ("LoongArch: KVM: Add PCHPIC user mode read and write functions")
+Signed-off-by: Bibo Mao <maobibo@loongson.cn>
+Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/loongarch/kvm/intc/pch_pic.c |   21 ++++++++++++++-------
+ 1 file changed, 14 insertions(+), 7 deletions(-)
+
+--- a/arch/loongarch/kvm/intc/pch_pic.c
++++ b/arch/loongarch/kvm/intc/pch_pic.c
+@@ -348,6 +348,7 @@ static int kvm_pch_pic_regs_access(struc
+                               struct kvm_device_attr *attr,
+                               bool is_write)
+ {
++      char buf[8];
+       int addr, offset, len = 8, ret = 0;
+       void __user *data;
+       void *p = NULL;
+@@ -397,17 +398,23 @@ static int kvm_pch_pic_regs_access(struc
+               return -EINVAL;
+       }
+ 
+-      spin_lock(&s->lock);
+-      /* write or read value according to is_write */
+       if (is_write) {
+-              if (copy_from_user(p, data, len))
+-                      ret = -EFAULT;
+-      } else {
+-              if (copy_to_user(data, p, len))
+-                      ret = -EFAULT;
++              if (copy_from_user(buf, data, len))
++                      return -EFAULT;
+       }
++
++      spin_lock(&s->lock);
++      if (is_write)
++              memcpy(p, buf, len);
++      else
++              memcpy(buf, p, len);
+       spin_unlock(&s->lock);
+ 
++      if (!is_write) {
++              if (copy_to_user(data, buf, len))
++                      return -EFAULT;
++      }
++
+       return ret;
+ }
+ 
diff --git a/queue-6.16/loongarch-kvm-fix-vm-migration-failure-with-ptw-enabled.patch b/queue-6.16/loongarch-kvm-fix-vm-migration-failure-with-ptw-enabled.patch

new file mode 100644 (file)

index 0000000..4a94246
--- /dev/null
+++ b/queue-6.16/loongarch-kvm-fix-vm-migration-failure-with-ptw-enabled.patch
@@ -0,0 +1,118 @@
+From f58c9aa1065f73d243904b267c71f6a9d1e9f90e Mon Sep 17 00:00:00 2001
+From: Bibo Mao <maobibo@loongson.cn>
+Date: Thu, 18 Sep 2025 19:44:22 +0800
+Subject: LoongArch: KVM: Fix VM migration failure with PTW enabled
+
+From: Bibo Mao <maobibo@loongson.cn>
+
+commit f58c9aa1065f73d243904b267c71f6a9d1e9f90e upstream.
+
+With PTW disabled system, bit _PAGE_DIRTY is a HW bit for page writing.
+However with PTW enabled system, bit _PAGE_WRITE is also a "HW bit" for
+page writing, because hardware synchronizes _PAGE_WRITE to _PAGE_DIRTY
+automatically. Previously, _PAGE_WRITE is treated as a SW bit to record
+the page writeable attribute for the fast page fault handling in the
+secondary MMU, however with PTW enabled machine, this bit is used by HW
+already (so setting it will silence the TLB modify exception).
+
+Here define KVM_PAGE_WRITEABLE with the SW bit _PAGE_MODIFIED, so that
+it can work on both PTW disabled and enabled machines. And for HW write
+bits, both _PAGE_DIRTY and _PAGE_WRITE are set or clear together.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Bibo Mao <maobibo@loongson.cn>
+Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/loongarch/include/asm/kvm_mmu.h |   20 ++++++++++++++++----
+ arch/loongarch/kvm/mmu.c             |    8 ++++----
+ 2 files changed, 20 insertions(+), 8 deletions(-)
+
+--- a/arch/loongarch/include/asm/kvm_mmu.h
++++ b/arch/loongarch/include/asm/kvm_mmu.h
+@@ -16,6 +16,13 @@
+  */
+ #define KVM_MMU_CACHE_MIN_PAGES       (CONFIG_PGTABLE_LEVELS - 1)
+ 
++/*
++ * _PAGE_MODIFIED is a SW pte bit, it records page ever written on host
++ * kernel, on secondary MMU it records the page writeable attribute, in
++ * order for fast path handling.
++ */
++#define KVM_PAGE_WRITEABLE    _PAGE_MODIFIED
++
+ #define _KVM_FLUSH_PGTABLE    0x1
+ #define _KVM_HAS_PGMASK               0x2
+ #define kvm_pfn_pte(pfn, prot)        (((pfn) << PFN_PTE_SHIFT) | pgprot_val(prot))
+@@ -52,10 +59,10 @@ static inline void kvm_set_pte(kvm_pte_t
+       WRITE_ONCE(*ptep, val);
+ }
+ 
+-static inline int kvm_pte_write(kvm_pte_t pte) { return pte & _PAGE_WRITE; }
+-static inline int kvm_pte_dirty(kvm_pte_t pte) { return pte & _PAGE_DIRTY; }
+ static inline int kvm_pte_young(kvm_pte_t pte) { return pte & _PAGE_ACCESSED; }
+ static inline int kvm_pte_huge(kvm_pte_t pte) { return pte & _PAGE_HUGE; }
++static inline int kvm_pte_dirty(kvm_pte_t pte) { return pte & __WRITEABLE; }
++static inline int kvm_pte_writeable(kvm_pte_t pte) { return pte & KVM_PAGE_WRITEABLE; }
+ 
+ static inline kvm_pte_t kvm_pte_mkyoung(kvm_pte_t pte)
+ {
+@@ -69,12 +76,12 @@ static inline kvm_pte_t kvm_pte_mkold(kv
+ 
+ static inline kvm_pte_t kvm_pte_mkdirty(kvm_pte_t pte)
+ {
+-      return pte | _PAGE_DIRTY;
++      return pte | __WRITEABLE;
+ }
+ 
+ static inline kvm_pte_t kvm_pte_mkclean(kvm_pte_t pte)
+ {
+-      return pte & ~_PAGE_DIRTY;
++      return pte & ~__WRITEABLE;
+ }
+ 
+ static inline kvm_pte_t kvm_pte_mkhuge(kvm_pte_t pte)
+@@ -87,6 +94,11 @@ static inline kvm_pte_t kvm_pte_mksmall(
+       return pte & ~_PAGE_HUGE;
+ }
+ 
++static inline kvm_pte_t kvm_pte_mkwriteable(kvm_pte_t pte)
++{
++      return pte | KVM_PAGE_WRITEABLE;
++}
++
+ static inline int kvm_need_flush(kvm_ptw_ctx *ctx)
+ {
+       return ctx->flag & _KVM_FLUSH_PGTABLE;
+--- a/arch/loongarch/kvm/mmu.c
++++ b/arch/loongarch/kvm/mmu.c
+@@ -569,7 +569,7 @@ static int kvm_map_page_fast(struct kvm_
+       /* Track access to pages marked old */
+       new = kvm_pte_mkyoung(*ptep);
+       if (write && !kvm_pte_dirty(new)) {
+-              if (!kvm_pte_write(new)) {
++              if (!kvm_pte_writeable(new)) {
+                       ret = -EFAULT;
+                       goto out;
+               }
+@@ -856,9 +856,9 @@ retry:
+               prot_bits |= _CACHE_SUC;
+ 
+       if (writeable) {
+-              prot_bits |= _PAGE_WRITE;
++              prot_bits = kvm_pte_mkwriteable(prot_bits);
+               if (write)
+-                      prot_bits |= __WRITEABLE;
++                      prot_bits = kvm_pte_mkdirty(prot_bits);
+       }
+ 
+       /* Disable dirty logging on HugePages */
+@@ -904,7 +904,7 @@ retry:
+       kvm_release_faultin_page(kvm, page, false, writeable);
+       spin_unlock(&kvm->mmu_lock);
+ 
+-      if (prot_bits & _PAGE_DIRTY)
++      if (kvm_pte_dirty(prot_bits))
+               mark_page_dirty_in_slot(kvm, memslot, gfn);
+ 
+ out:
diff --git a/queue-6.16/loongarch-make-lto-case-independent-in-makefile.patch b/queue-6.16/loongarch-make-lto-case-independent-in-makefile.patch

new file mode 100644 (file)

index 0000000..bed0be6
--- /dev/null
+++ b/queue-6.16/loongarch-make-lto-case-independent-in-makefile.patch
@@ -0,0 +1,47 @@
+From b15212824a01cb0b62f7b522f4ee334622cf982a Mon Sep 17 00:00:00 2001
+From: Tiezhu Yang <yangtiezhu@loongson.cn>
+Date: Thu, 18 Sep 2025 19:43:42 +0800
+Subject: LoongArch: Make LTO case independent in Makefile
+
+From: Tiezhu Yang <yangtiezhu@loongson.cn>
+
+commit b15212824a01cb0b62f7b522f4ee334622cf982a upstream.
+
+LTO is not only used for Clang, but maybe also used for Rust, make LTO
+case out of CONFIG_CC_HAS_ANNOTATE_TABLEJUMP in Makefile.
+
+This is preparation for later patch, no function changes.
+
+Cc: stable@vger.kernel.org
+Suggested-by: WANG Rui <wangrui@loongson.cn>
+Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
+Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/loongarch/Makefile |   10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+--- a/arch/loongarch/Makefile
++++ b/arch/loongarch/Makefile
+@@ -102,16 +102,16 @@ KBUILD_CFLAGS                    += $(call cc-option,-mth
+ 
+ ifdef CONFIG_OBJTOOL
+ ifdef CONFIG_CC_HAS_ANNOTATE_TABLEJUMP
++KBUILD_CFLAGS                 += -mannotate-tablejump
++else
++KBUILD_CFLAGS                 += -fno-jump-tables # keep compatibility with older compilers
++endif
++ifdef CONFIG_LTO_CLANG
+ # The annotate-tablejump option can not be passed to LLVM backend when LTO is enabled.
+ # Ensure it is aware of linker with LTO, '--loongarch-annotate-tablejump' also needs to
+ # be passed via '-mllvm' to ld.lld.
+-KBUILD_CFLAGS                 += -mannotate-tablejump
+-ifdef CONFIG_LTO_CLANG
+ KBUILD_LDFLAGS                        += -mllvm --loongarch-annotate-tablejump
+ endif
+-else
+-KBUILD_CFLAGS                 += -fno-jump-tables # keep compatibility with older compilers
+-endif
+ endif
+ 
+ KBUILD_RUSTFLAGS              += --target=loongarch64-unknown-none-softfloat -Ccode-model=small
diff --git a/queue-6.16/loongarch-update-help-info-of-arch_strict_align.patch b/queue-6.16/loongarch-update-help-info-of-arch_strict_align.patch

new file mode 100644 (file)

index 0000000..7337b12
--- /dev/null
+++ b/queue-6.16/loongarch-update-help-info-of-arch_strict_align.patch
@@ -0,0 +1,43 @@
+From f5003098e2f337d8e8a87dc636250e3fa978d9ad Mon Sep 17 00:00:00 2001
+From: Tiezhu Yang <yangtiezhu@loongson.cn>
+Date: Thu, 18 Sep 2025 19:43:42 +0800
+Subject: LoongArch: Update help info of ARCH_STRICT_ALIGN
+
+From: Tiezhu Yang <yangtiezhu@loongson.cn>
+
+commit f5003098e2f337d8e8a87dc636250e3fa978d9ad upstream.
+
+Loongson-3A6000 and 3C6000 CPUs also support unaligned memory access, so
+the current description is out of date to some extent.
+
+Actually, all of Loongson-3 series processors based on LoongArch support
+unaligned memory access, this hardware capability is indicated by the bit
+20 (UAL) of CPUCFG1 register, update the help info to reflect the reality.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
+Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/loongarch/Kconfig |    8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/arch/loongarch/Kconfig
++++ b/arch/loongarch/Kconfig
+@@ -566,10 +566,14 @@ config ARCH_STRICT_ALIGN
+         -mstrict-align build parameter to prevent unaligned accesses.
+ 
+         CPUs with h/w unaligned access support:
+-        Loongson-2K2000/2K3000/3A5000/3C5000/3D5000.
++        Loongson-2K2000/2K3000 and all of Loongson-3 series processors
++        based on LoongArch.
+ 
+         CPUs without h/w unaligned access support:
+-        Loongson-2K500/2K1000.
++        Loongson-2K0300/2K0500/2K1000.
++
++        If you want to make sure whether to support unaligned memory access
++        on your hardware, please read the bit 20 (UAL) of CPUCFG1 register.
+ 
+         This option is enabled by default to make the kernel be able to run
+         on all LoongArch systems. But you can disable it manually if you want
diff --git a/queue-6.16/loongarch-vdso-check-kcalloc-result-in-init_vdso.patch b/queue-6.16/loongarch-vdso-check-kcalloc-result-in-init_vdso.patch

new file mode 100644 (file)

index 0000000..ab0a103
--- /dev/null
+++ b/queue-6.16/loongarch-vdso-check-kcalloc-result-in-init_vdso.patch
@@ -0,0 +1,34 @@
+From ac398f570724c41e5e039d54e4075519f6af7408 Mon Sep 17 00:00:00 2001
+From: Guangshuo Li <202321181@mail.sdu.edu.cn>
+Date: Thu, 18 Sep 2025 19:44:10 +0800
+Subject: LoongArch: vDSO: Check kcalloc() result in init_vdso()
+
+From: Guangshuo Li <202321181@mail.sdu.edu.cn>
+
+commit ac398f570724c41e5e039d54e4075519f6af7408 upstream.
+
+Add a NULL-pointer check after the kcalloc() call in init_vdso(). If
+allocation fails, return -ENOMEM to prevent a possible dereference of
+vdso_info.code_mapping.pages when it is NULL.
+
+Cc: stable@vger.kernel.org
+Fixes: 2ed119aef60d ("LoongArch: Set correct size for vDSO code mapping")
+Signed-off-by: Guangshuo Li <202321181@mail.sdu.edu.cn>
+Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/loongarch/kernel/vdso.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/arch/loongarch/kernel/vdso.c
++++ b/arch/loongarch/kernel/vdso.c
+@@ -54,6 +54,9 @@ static int __init init_vdso(void)
+       vdso_info.code_mapping.pages =
+               kcalloc(vdso_info.size / PAGE_SIZE, sizeof(struct page *), GFP_KERNEL);
+ 
++      if (!vdso_info.code_mapping.pages)
++              return -ENOMEM;
++
+       pfn = __phys_to_pfn(__pa_symbol(vdso_info.vdso));
+       for (i = 0; i < vdso_info.size / PAGE_SIZE; i++)
+               vdso_info.code_mapping.pages[i] = pfn_to_page(pfn + i);
diff --git a/queue-6.16/mm-folio_may_be_lru_cached-unless-folio_test_large.patch b/queue-6.16/mm-folio_may_be_lru_cached-unless-folio_test_large.patch

new file mode 100644 (file)

index 0000000..452e482
--- /dev/null
+++ b/queue-6.16/mm-folio_may_be_lru_cached-unless-folio_test_large.patch
@@ -0,0 +1,137 @@
+From 2da6de30e60dd9bb14600eff1cc99df2fa2ddae3 Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Mon, 8 Sep 2025 15:23:15 -0700
+Subject: mm: folio_may_be_lru_cached() unless folio_test_large()
+
+From: Hugh Dickins <hughd@google.com>
+
+commit 2da6de30e60dd9bb14600eff1cc99df2fa2ddae3 upstream.
+
+mm/swap.c and mm/mlock.c agree to drain any per-CPU batch as soon as a
+large folio is added: so collect_longterm_unpinnable_folios() just wastes
+effort when calling lru_add_drain[_all]() on a large folio.
+
+But although there is good reason not to batch up PMD-sized folios, we
+might well benefit from batching a small number of low-order mTHPs (though
+unclear how that "small number" limitation will be implemented).
+
+So ask if folio_may_be_lru_cached() rather than !folio_test_large(), to
+insulate those particular checks from future change.  Name preferred to
+"folio_is_batchable" because large folios can well be put on a batch: it's
+just the per-CPU LRU caches, drained much later, which need care.
+
+Marked for stable, to counter the increase in lru_add_drain_all()s from
+"mm/gup: check ref_count instead of lru before migration".
+
+Link: https://lkml.kernel.org/r/57d2eaf8-3607-f318-e0c5-be02dce61ad0@google.com
+Fixes: 9a4e9f3b2d73 ("mm: update get_user_pages_longterm to migrate pages allocated from CMA region")
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Suggested-by: David Hildenbrand <david@redhat.com>
+Acked-by: David Hildenbrand <david@redhat.com>
+Cc: "Aneesh Kumar K.V" <aneesh.kumar@kernel.org>
+Cc: Axel Rasmussen <axelrasmussen@google.com>
+Cc: Chris Li <chrisl@kernel.org>
+Cc: Christoph Hellwig <hch@infradead.org>
+Cc: Jason Gunthorpe <jgg@ziepe.ca>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: John Hubbard <jhubbard@nvidia.com>
+Cc: Keir Fraser <keirf@google.com>
+Cc: Konstantin Khlebnikov <koct9i@gmail.com>
+Cc: Li Zhe <lizhe.67@bytedance.com>
+Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Rik van Riel <riel@surriel.com>
+Cc: Shivank Garg <shivankg@amd.com>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: Wei Xu <weixugc@google.com>
+Cc: Will Deacon <will@kernel.org>
+Cc: yangge <yangge1116@126.com>
+Cc: Yuanchu Xie <yuanchu@google.com>
+Cc: Yu Zhao <yuzhao@google.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/swap.h |   10 ++++++++++
+ mm/gup.c             |    4 ++--
+ mm/mlock.c           |    6 +++---
+ mm/swap.c            |    2 +-
+ 4 files changed, 16 insertions(+), 6 deletions(-)
+
+--- a/include/linux/swap.h
++++ b/include/linux/swap.h
+@@ -384,6 +384,16 @@ void folio_add_lru_vma(struct folio *, s
+ void mark_page_accessed(struct page *);
+ void folio_mark_accessed(struct folio *);
+ 
++static inline bool folio_may_be_lru_cached(struct folio *folio)
++{
++      /*
++       * Holding PMD-sized folios in per-CPU LRU cache unbalances accounting.
++       * Holding small numbers of low-order mTHP folios in per-CPU LRU cache
++       * will be sensible, but nobody has implemented and tested that yet.
++       */
++      return !folio_test_large(folio);
++}
++
+ extern atomic_t lru_disable_count;
+ 
+ static inline bool lru_cache_disabled(void)
+--- a/mm/gup.c
++++ b/mm/gup.c
+@@ -2353,13 +2353,13 @@ static unsigned long collect_longterm_un
+                       continue;
+               }
+ 
+-              if (drained == 0 &&
++              if (drained == 0 && folio_may_be_lru_cached(folio) &&
+                               folio_ref_count(folio) !=
+                               folio_expected_ref_count(folio) + 1) {
+                       lru_add_drain();
+                       drained = 1;
+               }
+-              if (drained == 1 &&
++              if (drained == 1 && folio_may_be_lru_cached(folio) &&
+                               folio_ref_count(folio) !=
+                               folio_expected_ref_count(folio) + 1) {
+                       lru_add_drain_all();
+--- a/mm/mlock.c
++++ b/mm/mlock.c
+@@ -255,7 +255,7 @@ void mlock_folio(struct folio *folio)
+ 
+       folio_get(folio);
+       if (!folio_batch_add(fbatch, mlock_lru(folio)) ||
+-          folio_test_large(folio) || lru_cache_disabled())
++          !folio_may_be_lru_cached(folio) || lru_cache_disabled())
+               mlock_folio_batch(fbatch);
+       local_unlock(&mlock_fbatch.lock);
+ }
+@@ -278,7 +278,7 @@ void mlock_new_folio(struct folio *folio
+ 
+       folio_get(folio);
+       if (!folio_batch_add(fbatch, mlock_new(folio)) ||
+-          folio_test_large(folio) || lru_cache_disabled())
++          !folio_may_be_lru_cached(folio) || lru_cache_disabled())
+               mlock_folio_batch(fbatch);
+       local_unlock(&mlock_fbatch.lock);
+ }
+@@ -299,7 +299,7 @@ void munlock_folio(struct folio *folio)
+        */
+       folio_get(folio);
+       if (!folio_batch_add(fbatch, folio) ||
+-          folio_test_large(folio) || lru_cache_disabled())
++          !folio_may_be_lru_cached(folio) || lru_cache_disabled())
+               mlock_folio_batch(fbatch);
+       local_unlock(&mlock_fbatch.lock);
+ }
+--- a/mm/swap.c
++++ b/mm/swap.c
+@@ -192,7 +192,7 @@ static void __folio_batch_add_and_move(s
+               local_lock(&cpu_fbatches.lock);
+ 
+       if (!folio_batch_add(this_cpu_ptr(fbatch), folio) ||
+-                      folio_test_large(folio) || lru_cache_disabled())
++                      !folio_may_be_lru_cached(folio) || lru_cache_disabled())
+               folio_batch_move_lru(this_cpu_ptr(fbatch), move_fn);
+ 
+       if (disable_irq)
diff --git a/queue-6.16/mm-gup-check-ref_count-instead-of-lru-before-migration.patch b/queue-6.16/mm-gup-check-ref_count-instead-of-lru-before-migration.patch

new file mode 100644 (file)

index 0000000..45153d9
--- /dev/null
+++ b/queue-6.16/mm-gup-check-ref_count-instead-of-lru-before-migration.patch
@@ -0,0 +1,137 @@
+From 98c6d259319ecf6e8d027abd3f14b81324b8c0ad Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Mon, 8 Sep 2025 15:15:03 -0700
+Subject: mm/gup: check ref_count instead of lru before migration
+
+From: Hugh Dickins <hughd@google.com>
+
+commit 98c6d259319ecf6e8d027abd3f14b81324b8c0ad upstream.
+
+Patch series "mm: better GUP pin lru_add_drain_all()", v2.
+
+Series of lru_add_drain_all()-related patches, arising from recent mm/gup
+migration report from Will Deacon.
+
+
+This patch (of 5):
+
+Will Deacon reports:-
+
+When taking a longterm GUP pin via pin_user_pages(),
+__gup_longterm_locked() tries to migrate target folios that should not be
+longterm pinned, for example because they reside in a CMA region or
+movable zone.  This is done by first pinning all of the target folios
+anyway, collecting all of the longterm-unpinnable target folios into a
+list, dropping the pins that were just taken and finally handing the list
+off to migrate_pages() for the actual migration.
+
+It is critically important that no unexpected references are held on the
+folios being migrated, otherwise the migration will fail and
+pin_user_pages() will return -ENOMEM to its caller.  Unfortunately, it is
+relatively easy to observe migration failures when running pKVM (which
+uses pin_user_pages() on crosvm's virtual address space to resolve stage-2
+page faults from the guest) on a 6.15-based Pixel 6 device and this
+results in the VM terminating prematurely.
+
+In the failure case, 'crosvm' has called mlock(MLOCK_ONFAULT) on its
+mapping of guest memory prior to the pinning.  Subsequently, when
+pin_user_pages() walks the page-table, the relevant 'pte' is not present
+and so the faulting logic allocates a new folio, mlocks it with
+mlock_folio() and maps it in the page-table.
+
+Since commit 2fbb0c10d1e8 ("mm/munlock: mlock_page() munlock_page() batch
+by pagevec"), mlock/munlock operations on a folio (formerly page), are
+deferred.  For example, mlock_folio() takes an additional reference on the
+target folio before placing it into a per-cpu 'folio_batch' for later
+processing by mlock_folio_batch(), which drops the refcount once the
+operation is complete.  Processing of the batches is coupled with the LRU
+batch logic and can be forcefully drained with lru_add_drain_all() but as
+long as a folio remains unprocessed on the batch, its refcount will be
+elevated.
+
+This deferred batching therefore interacts poorly with the pKVM pinning
+scenario as we can find ourselves in a situation where the migration code
+fails to migrate a folio due to the elevated refcount from the pending
+mlock operation.
+
+Hugh Dickins adds:-
+
+!folio_test_lru() has never been a very reliable way to tell if an
+lru_add_drain_all() is worth calling, to remove LRU cache references to
+make the folio migratable: the LRU flag may be set even while the folio is
+held with an extra reference in a per-CPU LRU cache.
+
+5.18 commit 2fbb0c10d1e8 may have made it more unreliable.  Then 6.11
+commit 33dfe9204f29 ("mm/gup: clear the LRU flag of a page before adding
+to LRU batch") tried to make it reliable, by moving LRU flag clearing; but
+missed the mlock/munlock batches, so still unreliable as reported.
+
+And it turns out to be difficult to extend 33dfe9204f29's LRU flag
+clearing to the mlock/munlock batches: if they do benefit from batching,
+mlock/munlock cannot be so effective when easily suppressed while !LRU.
+
+Instead, switch to an expected ref_count check, which was more reliable
+all along: some more false positives (unhelpful drains) than before, and
+never a guarantee that the folio will prove migratable, but better.
+
+Note on PG_private_2: ceph and nfs are still using the deprecated
+PG_private_2 flag, with the aid of netfs and filemap support functions.
+Although it is consistently matched by an increment of folio ref_count,
+folio_expected_ref_count() intentionally does not recognize it, and ceph
+folio migration currently depends on that for PG_private_2 folios to be
+rejected.  New references to the deprecated flag are discouraged, so do
+not add it into the collect_longterm_unpinnable_folios() calculation: but
+longterm pinning of transiently PG_private_2 ceph and nfs folios (an
+uncommon case) may invoke a redundant lru_add_drain_all().  And this makes
+easy the backport to earlier releases: up to and including 6.12, btrfs
+also used PG_private_2, but without a ref_count increment.
+
+Note for stable backports: requires 6.16 commit 86ebd50224c0 ("mm:
+add folio_expected_ref_count() for reference count calculation").
+
+Link: https://lkml.kernel.org/r/41395944-b0e3-c3ac-d648-8ddd70451d28@google.com
+Link: https://lkml.kernel.org/r/bd1f314a-fca1-8f19-cac0-b936c9614557@google.com
+Fixes: 9a4e9f3b2d73 ("mm: update get_user_pages_longterm to migrate pages allocated from CMA region")
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Reported-by: Will Deacon <will@kernel.org>
+Closes: https://lore.kernel.org/linux-mm/20250815101858.24352-1-will@kernel.org/
+Acked-by: Kiryl Shutsemau <kas@kernel.org>
+Acked-by: David Hildenbrand <david@redhat.com>
+Cc: "Aneesh Kumar K.V" <aneesh.kumar@kernel.org>
+Cc: Axel Rasmussen <axelrasmussen@google.com>
+Cc: Chris Li <chrisl@kernel.org>
+Cc: Christoph Hellwig <hch@infradead.org>
+Cc: Jason Gunthorpe <jgg@ziepe.ca>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: John Hubbard <jhubbard@nvidia.com>
+Cc: Keir Fraser <keirf@google.com>
+Cc: Konstantin Khlebnikov <koct9i@gmail.com>
+Cc: Li Zhe <lizhe.67@bytedance.com>
+Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Rik van Riel <riel@surriel.com>
+Cc: Shivank Garg <shivankg@amd.com>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: Wei Xu <weixugc@google.com>
+Cc: yangge <yangge1116@126.com>
+Cc: Yuanchu Xie <yuanchu@google.com>
+Cc: Yu Zhao <yuzhao@google.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/gup.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/mm/gup.c
++++ b/mm/gup.c
+@@ -2331,7 +2331,8 @@ static unsigned long collect_longterm_un
+                       continue;
+               }
+ 
+-              if (!folio_test_lru(folio) && drain_allow) {
++              if (drain_allow && folio_ref_count(folio) !=
++                                 folio_expected_ref_count(folio) + 1) {
+                       lru_add_drain_all();
+                       drain_allow = false;
+               }
diff --git a/queue-6.16/mm-gup-local-lru_add_drain-to-avoid-lru_add_drain_all.patch b/queue-6.16/mm-gup-local-lru_add_drain-to-avoid-lru_add_drain_all.patch

new file mode 100644 (file)

index 0000000..ca29402
--- /dev/null
+++ b/queue-6.16/mm-gup-local-lru_add_drain-to-avoid-lru_add_drain_all.patch
@@ -0,0 +1,82 @@
+From a09a8a1fbb374e0053b97306da9dbc05bd384685 Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Mon, 8 Sep 2025 15:16:53 -0700
+Subject: mm/gup: local lru_add_drain() to avoid lru_add_drain_all()
+
+From: Hugh Dickins <hughd@google.com>
+
+commit a09a8a1fbb374e0053b97306da9dbc05bd384685 upstream.
+
+In many cases, if collect_longterm_unpinnable_folios() does need to drain
+the LRU cache to release a reference, the cache in question is on this
+same CPU, and much more efficiently drained by a preliminary local
+lru_add_drain(), than the later cross-CPU lru_add_drain_all().
+
+Marked for stable, to counter the increase in lru_add_drain_all()s from
+"mm/gup: check ref_count instead of lru before migration".  Note for clean
+backports: can take 6.16 commit a03db236aebf ("gup: optimize longterm
+pin_user_pages() for large folio") first.
+
+Link: https://lkml.kernel.org/r/66f2751f-283e-816d-9530-765db7edc465@google.com
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Acked-by: David Hildenbrand <david@redhat.com>
+Cc: "Aneesh Kumar K.V" <aneesh.kumar@kernel.org>
+Cc: Axel Rasmussen <axelrasmussen@google.com>
+Cc: Chris Li <chrisl@kernel.org>
+Cc: Christoph Hellwig <hch@infradead.org>
+Cc: Jason Gunthorpe <jgg@ziepe.ca>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: John Hubbard <jhubbard@nvidia.com>
+Cc: Keir Fraser <keirf@google.com>
+Cc: Konstantin Khlebnikov <koct9i@gmail.com>
+Cc: Li Zhe <lizhe.67@bytedance.com>
+Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Rik van Riel <riel@surriel.com>
+Cc: Shivank Garg <shivankg@amd.com>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: Wei Xu <weixugc@google.com>
+Cc: Will Deacon <will@kernel.org>
+Cc: yangge <yangge1116@126.com>
+Cc: Yuanchu Xie <yuanchu@google.com>
+Cc: Yu Zhao <yuzhao@google.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/gup.c |   15 +++++++++++----
+ 1 file changed, 11 insertions(+), 4 deletions(-)
+
+--- a/mm/gup.c
++++ b/mm/gup.c
+@@ -2333,8 +2333,8 @@ static unsigned long collect_longterm_un
+               struct pages_or_folios *pofs)
+ {
+       unsigned long collected = 0;
+-      bool drain_allow = true;
+       struct folio *folio;
++      int drained = 0;
+       long i = 0;
+ 
+       for (folio = pofs_get_folio(pofs, i); folio;
+@@ -2353,10 +2353,17 @@ static unsigned long collect_longterm_un
+                       continue;
+               }
+ 
+-              if (drain_allow && folio_ref_count(folio) !=
+-                                 folio_expected_ref_count(folio) + 1) {
++              if (drained == 0 &&
++                              folio_ref_count(folio) !=
++                              folio_expected_ref_count(folio) + 1) {
++                      lru_add_drain();
++                      drained = 1;
++              }
++              if (drained == 1 &&
++                              folio_ref_count(folio) !=
++                              folio_expected_ref_count(folio) + 1) {
+                       lru_add_drain_all();
+-                      drain_allow = false;
++                      drained = 2;
+               }
+ 
+               if (!folio_isolate_lru(folio))
diff --git a/queue-6.16/mm-revert-mm-gup-clear-the-lru-flag-of-a-page-before-adding-to-lru-batch.patch b/queue-6.16/mm-revert-mm-gup-clear-the-lru-flag-of-a-page-before-adding-to-lru-batch.patch

new file mode 100644 (file)

index 0000000..6a97243
--- /dev/null
+++ b/queue-6.16/mm-revert-mm-gup-clear-the-lru-flag-of-a-page-before-adding-to-lru-batch.patch
@@ -0,0 +1,200 @@
+From afb99e9f500485160f34b8cad6d3763ada3e80e8 Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Mon, 8 Sep 2025 15:19:17 -0700
+Subject: mm: revert "mm/gup: clear the LRU flag of a page before adding to LRU batch"
+
+From: Hugh Dickins <hughd@google.com>
+
+commit afb99e9f500485160f34b8cad6d3763ada3e80e8 upstream.
+
+This reverts commit 33dfe9204f29: now that
+collect_longterm_unpinnable_folios() is checking ref_count instead of lru,
+and mlock/munlock do not participate in the revised LRU flag clearing,
+those changes are misleading, and enlarge the window during which
+mlock/munlock may miss an mlock_count update.
+
+It is possible (I'd hesitate to claim probable) that the greater
+likelihood of missed mlock_count updates would explain the "Realtime
+threads delayed due to kcompactd0" observed on 6.12 in the Link below.  If
+that is the case, this reversion will help; but a complete solution needs
+also a further patch, beyond the scope of this series.
+
+Included some 80-column cleanup around folio_batch_add_and_move().
+
+The role of folio_test_clear_lru() (before taking per-memcg lru_lock) is
+questionable since 6.13 removed mem_cgroup_move_account() etc; but perhaps
+there are still some races which need it - not examined here.
+
+Link: https://lore.kernel.org/linux-mm/DU0PR01MB10385345F7153F334100981888259A@DU0PR01MB10385.eurprd01.prod.exchangelabs.com/
+Link: https://lkml.kernel.org/r/05905d7b-ed14-68b1-79d8-bdec30367eba@google.com
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Acked-by: David Hildenbrand <david@redhat.com>
+Cc: "Aneesh Kumar K.V" <aneesh.kumar@kernel.org>
+Cc: Axel Rasmussen <axelrasmussen@google.com>
+Cc: Chris Li <chrisl@kernel.org>
+Cc: Christoph Hellwig <hch@infradead.org>
+Cc: Jason Gunthorpe <jgg@ziepe.ca>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: John Hubbard <jhubbard@nvidia.com>
+Cc: Keir Fraser <keirf@google.com>
+Cc: Konstantin Khlebnikov <koct9i@gmail.com>
+Cc: Li Zhe <lizhe.67@bytedance.com>
+Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Rik van Riel <riel@surriel.com>
+Cc: Shivank Garg <shivankg@amd.com>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: Wei Xu <weixugc@google.com>
+Cc: Will Deacon <will@kernel.org>
+Cc: yangge <yangge1116@126.com>
+Cc: Yuanchu Xie <yuanchu@google.com>
+Cc: Yu Zhao <yuzhao@google.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/swap.c |   50 ++++++++++++++++++++++++++------------------------
+ 1 file changed, 26 insertions(+), 24 deletions(-)
+
+--- a/mm/swap.c
++++ b/mm/swap.c
+@@ -164,6 +164,10 @@ static void folio_batch_move_lru(struct
+       for (i = 0; i < folio_batch_count(fbatch); i++) {
+               struct folio *folio = fbatch->folios[i];
+ 
++              /* block memcg migration while the folio moves between lru */
++              if (move_fn != lru_add && !folio_test_clear_lru(folio))
++                      continue;
++
+               folio_lruvec_relock_irqsave(folio, &lruvec, &flags);
+               move_fn(lruvec, folio);
+ 
+@@ -176,14 +180,10 @@ static void folio_batch_move_lru(struct
+ }
+ 
+ static void __folio_batch_add_and_move(struct folio_batch __percpu *fbatch,
+-              struct folio *folio, move_fn_t move_fn,
+-              bool on_lru, bool disable_irq)
++              struct folio *folio, move_fn_t move_fn, bool disable_irq)
+ {
+       unsigned long flags;
+ 
+-      if (on_lru && !folio_test_clear_lru(folio))
+-              return;
+-
+       folio_get(folio);
+ 
+       if (disable_irq)
+@@ -191,8 +191,8 @@ static void __folio_batch_add_and_move(s
+       else
+               local_lock(&cpu_fbatches.lock);
+ 
+-      if (!folio_batch_add(this_cpu_ptr(fbatch), folio) || folio_test_large(folio) ||
+-          lru_cache_disabled())
++      if (!folio_batch_add(this_cpu_ptr(fbatch), folio) ||
++                      folio_test_large(folio) || lru_cache_disabled())
+               folio_batch_move_lru(this_cpu_ptr(fbatch), move_fn);
+ 
+       if (disable_irq)
+@@ -201,13 +201,13 @@ static void __folio_batch_add_and_move(s
+               local_unlock(&cpu_fbatches.lock);
+ }
+ 
+-#define folio_batch_add_and_move(folio, op, on_lru)                                           \
+-      __folio_batch_add_and_move(                                                             \
+-              &cpu_fbatches.op,                                                               \
+-              folio,                                                                          \
+-              op,                                                                             \
+-              on_lru,                                                                         \
+-              offsetof(struct cpu_fbatches, op) >= offsetof(struct cpu_fbatches, lock_irq)    \
++#define folio_batch_add_and_move(folio, op)           \
++      __folio_batch_add_and_move(                     \
++              &cpu_fbatches.op,                       \
++              folio,                                  \
++              op,                                     \
++              offsetof(struct cpu_fbatches, op) >=    \
++              offsetof(struct cpu_fbatches, lock_irq) \
+       )
+ 
+ static void lru_move_tail(struct lruvec *lruvec, struct folio *folio)
+@@ -231,10 +231,10 @@ static void lru_move_tail(struct lruvec
+ void folio_rotate_reclaimable(struct folio *folio)
+ {
+       if (folio_test_locked(folio) || folio_test_dirty(folio) ||
+-          folio_test_unevictable(folio))
++          folio_test_unevictable(folio) || !folio_test_lru(folio))
+               return;
+ 
+-      folio_batch_add_and_move(folio, lru_move_tail, true);
++      folio_batch_add_and_move(folio, lru_move_tail);
+ }
+ 
+ void lru_note_cost(struct lruvec *lruvec, bool file,
+@@ -323,10 +323,11 @@ static void folio_activate_drain(int cpu
+ 
+ void folio_activate(struct folio *folio)
+ {
+-      if (folio_test_active(folio) || folio_test_unevictable(folio))
++      if (folio_test_active(folio) || folio_test_unevictable(folio) ||
++          !folio_test_lru(folio))
+               return;
+ 
+-      folio_batch_add_and_move(folio, lru_activate, true);
++      folio_batch_add_and_move(folio, lru_activate);
+ }
+ 
+ #else
+@@ -502,7 +503,7 @@ void folio_add_lru(struct folio *folio)
+           lru_gen_in_fault() && !(current->flags & PF_MEMALLOC))
+               folio_set_active(folio);
+ 
+-      folio_batch_add_and_move(folio, lru_add, false);
++      folio_batch_add_and_move(folio, lru_add);
+ }
+ EXPORT_SYMBOL(folio_add_lru);
+ 
+@@ -680,13 +681,13 @@ void lru_add_drain_cpu(int cpu)
+ void deactivate_file_folio(struct folio *folio)
+ {
+       /* Deactivating an unevictable folio will not accelerate reclaim */
+-      if (folio_test_unevictable(folio))
++      if (folio_test_unevictable(folio) || !folio_test_lru(folio))
+               return;
+ 
+       if (lru_gen_enabled() && lru_gen_clear_refs(folio))
+               return;
+ 
+-      folio_batch_add_and_move(folio, lru_deactivate_file, true);
++      folio_batch_add_and_move(folio, lru_deactivate_file);
+ }
+ 
+ /*
+@@ -699,13 +700,13 @@ void deactivate_file_folio(struct folio
+  */
+ void folio_deactivate(struct folio *folio)
+ {
+-      if (folio_test_unevictable(folio))
++      if (folio_test_unevictable(folio) || !folio_test_lru(folio))
+               return;
+ 
+       if (lru_gen_enabled() ? lru_gen_clear_refs(folio) : !folio_test_active(folio))
+               return;
+ 
+-      folio_batch_add_and_move(folio, lru_deactivate, true);
++      folio_batch_add_and_move(folio, lru_deactivate);
+ }
+ 
+ /**
+@@ -718,10 +719,11 @@ void folio_deactivate(struct folio *foli
+ void folio_mark_lazyfree(struct folio *folio)
+ {
+       if (!folio_test_anon(folio) || !folio_test_swapbacked(folio) ||
++          !folio_test_lru(folio) ||
+           folio_test_swapcache(folio) || folio_test_unevictable(folio))
+               return;
+ 
+-      folio_batch_add_and_move(folio, lru_lazyfree, true);
++      folio_batch_add_and_move(folio, lru_lazyfree);
+ }
+ 
+ void lru_add_drain(void)
diff --git a/queue-6.16/mm-revert-mm-vmscan.c-fix-oom-on-swap-stress-test.patch b/queue-6.16/mm-revert-mm-vmscan.c-fix-oom-on-swap-stress-test.patch

new file mode 100644 (file)

index 0000000..bcf6613
--- /dev/null
+++ b/queue-6.16/mm-revert-mm-vmscan.c-fix-oom-on-swap-stress-test.patch
@@ -0,0 +1,53 @@
+From 8d79ed36bfc83d0583ab72216b7980340478cdfb Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Mon, 8 Sep 2025 15:21:12 -0700
+Subject: mm: revert "mm: vmscan.c: fix OOM on swap stress test"
+
+From: Hugh Dickins <hughd@google.com>
+
+commit 8d79ed36bfc83d0583ab72216b7980340478cdfb upstream.
+
+This reverts commit 0885ef470560: that was a fix to the reverted
+33dfe9204f29b415bbc0abb1a50642d1ba94f5e9.
+
+Link: https://lkml.kernel.org/r/aa0e9d67-fbcd-9d79-88a1-641dfbe1d9d1@google.com
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Acked-by: David Hildenbrand <david@redhat.com>
+Cc: "Aneesh Kumar K.V" <aneesh.kumar@kernel.org>
+Cc: Axel Rasmussen <axelrasmussen@google.com>
+Cc: Chris Li <chrisl@kernel.org>
+Cc: Christoph Hellwig <hch@infradead.org>
+Cc: Jason Gunthorpe <jgg@ziepe.ca>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: John Hubbard <jhubbard@nvidia.com>
+Cc: Keir Fraser <keirf@google.com>
+Cc: Konstantin Khlebnikov <koct9i@gmail.com>
+Cc: Li Zhe <lizhe.67@bytedance.com>
+Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Rik van Riel <riel@surriel.com>
+Cc: Shivank Garg <shivankg@amd.com>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: Wei Xu <weixugc@google.com>
+Cc: Will Deacon <will@kernel.org>
+Cc: yangge <yangge1116@126.com>
+Cc: Yuanchu Xie <yuanchu@google.com>
+Cc: Yu Zhao <yuzhao@google.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/vmscan.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/mm/vmscan.c
++++ b/mm/vmscan.c
+@@ -4505,7 +4505,7 @@ static bool sort_folio(struct lruvec *lr
+       }
+ 
+       /* ineligible */
+-      if (!folio_test_lru(folio) || zone > sc->reclaim_idx) {
++      if (zone > sc->reclaim_idx) {
+               gen = folio_inc_gen(lruvec, folio, false);
+               list_move_tail(&folio->lru, &lrugen->folios[gen][type][zone]);
+               return true;
diff --git a/queue-6.16/mmc-mvsdio-fix-dma_unmap_sg-nents-value.patch b/queue-6.16/mmc-mvsdio-fix-dma_unmap_sg-nents-value.patch

new file mode 100644 (file)

index 0000000..d3706bd
--- /dev/null
+++ b/queue-6.16/mmc-mvsdio-fix-dma_unmap_sg-nents-value.patch
@@ -0,0 +1,33 @@
+From 8ab2f1c35669bff7d7ed1bb16bf5cc989b3e2e17 Mon Sep 17 00:00:00 2001
+From: Thomas Fourier <fourier.thomas@gmail.com>
+Date: Tue, 26 Aug 2025 09:58:08 +0200
+Subject: mmc: mvsdio: Fix dma_unmap_sg() nents value
+
+From: Thomas Fourier <fourier.thomas@gmail.com>
+
+commit 8ab2f1c35669bff7d7ed1bb16bf5cc989b3e2e17 upstream.
+
+The dma_unmap_sg() functions should be called with the same nents as the
+dma_map_sg(), not the value the map function returned.
+
+Fixes: 236caa7cc351 ("mmc: SDIO driver for Marvell SoCs")
+Signed-off-by: Thomas Fourier <fourier.thomas@gmail.com>
+Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
+Cc: stable@vger.kernel.org
+Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/mmc/host/mvsdio.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/mmc/host/mvsdio.c
++++ b/drivers/mmc/host/mvsdio.c
+@@ -292,7 +292,7 @@ static u32 mvsd_finish_data(struct mvsd_
+               host->pio_ptr = NULL;
+               host->pio_size = 0;
+       } else {
+-              dma_unmap_sg(mmc_dev(host->mmc), data->sg, host->sg_frags,
++              dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
+                            mmc_get_dma_dir(data));
+       }
+ 
diff --git a/queue-6.16/mmc-sdhci-move-the-code-related-to-setting-the-clock-from-sdhci_set_ios_common-into-sdhci_set_ios.patch b/queue-6.16/mmc-sdhci-move-the-code-related-to-setting-the-clock-from-sdhci_set_ios_common-into-sdhci_set_ios.patch

new file mode 100644 (file)

index 0000000..a9f216a
--- /dev/null
+++ b/queue-6.16/mmc-sdhci-move-the-code-related-to-setting-the-clock-from-sdhci_set_ios_common-into-sdhci_set_ios.patch
@@ -0,0 +1,90 @@
+From 7b7e71683b4ccbe0dbd7d434707623327e852f20 Mon Sep 17 00:00:00 2001
+From: Ben Chuang <ben.chuang@genesyslogic.com.tw>
+Date: Thu, 11 Sep 2025 10:40:20 +0800
+Subject: mmc: sdhci: Move the code related to setting the clock from sdhci_set_ios_common() into sdhci_set_ios()
+
+From: Ben Chuang <ben.chuang@genesyslogic.com.tw>
+
+commit 7b7e71683b4ccbe0dbd7d434707623327e852f20 upstream.
+
+The sdhci_set_clock() is called in sdhci_set_ios_common() and
+__sdhci_uhs2_set_ios(). According to Section 3.13.2 "Card Interface
+Detection Sequence" of the SD Host Controller Standard Specification
+Version 7.00, the SD clock is supplied after power is supplied, so we only
+need one in __sdhci_uhs2_set_ios(). Let's move the code related to setting
+the clock from sdhci_set_ios_common() into sdhci_set_ios() and modify
+the parameters passed to sdhci_set_clock() in __sdhci_uhs2_set_ios().
+
+Fixes: 10c8298a052b ("mmc: sdhci-uhs2: add set_ios()")
+Cc: stable@vger.kernel.org # v6.13+
+Signed-off-by: Ben Chuang <ben.chuang@genesyslogic.com.tw>
+Acked-by: Adrian Hunter <adrian.hunter@intel.com>
+Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/mmc/host/sdhci-uhs2.c |    3 ++-
+ drivers/mmc/host/sdhci.c      |   34 +++++++++++++++++-----------------
+ 2 files changed, 19 insertions(+), 18 deletions(-)
+
+--- a/drivers/mmc/host/sdhci-uhs2.c
++++ b/drivers/mmc/host/sdhci-uhs2.c
+@@ -295,7 +295,8 @@ static void __sdhci_uhs2_set_ios(struct
+       else
+               sdhci_uhs2_set_power(host, ios->power_mode, ios->vdd);
+ 
+-      sdhci_set_clock(host, host->clock);
++      sdhci_set_clock(host, ios->clock);
++      host->clock = ios->clock;
+ }
+ 
+ static int sdhci_uhs2_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
+--- a/drivers/mmc/host/sdhci.c
++++ b/drivers/mmc/host/sdhci.c
+@@ -2367,23 +2367,6 @@ void sdhci_set_ios_common(struct mmc_hos
+               (ios->power_mode == MMC_POWER_UP) &&
+               !(host->quirks2 & SDHCI_QUIRK2_PRESET_VALUE_BROKEN))
+               sdhci_enable_preset_value(host, false);
+-
+-      if (!ios->clock || ios->clock != host->clock) {
+-              host->ops->set_clock(host, ios->clock);
+-              host->clock = ios->clock;
+-
+-              if (host->quirks & SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK &&
+-                  host->clock) {
+-                      host->timeout_clk = mmc->actual_clock ?
+-                                              mmc->actual_clock / 1000 :
+-                                              host->clock / 1000;
+-                      mmc->max_busy_timeout =
+-                              host->ops->get_max_timeout_count ?
+-                              host->ops->get_max_timeout_count(host) :
+-                              1 << 27;
+-                      mmc->max_busy_timeout /= host->timeout_clk;
+-              }
+-      }
+ }
+ EXPORT_SYMBOL_GPL(sdhci_set_ios_common);
+ 
+@@ -2410,6 +2393,23 @@ void sdhci_set_ios(struct mmc_host *mmc,
+ 
+       sdhci_set_ios_common(mmc, ios);
+ 
++      if (!ios->clock || ios->clock != host->clock) {
++              host->ops->set_clock(host, ios->clock);
++              host->clock = ios->clock;
++
++              if (host->quirks & SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK &&
++                  host->clock) {
++                      host->timeout_clk = mmc->actual_clock ?
++                                              mmc->actual_clock / 1000 :
++                                              host->clock / 1000;
++                      mmc->max_busy_timeout =
++                              host->ops->get_max_timeout_count ?
++                              host->ops->get_max_timeout_count(host) :
++                              1 << 27;
++                      mmc->max_busy_timeout /= host->timeout_clk;
++              }
++      }
++
+       if (host->ops->set_power)
+               host->ops->set_power(host, ios->power_mode, ios->vdd);
+       else
diff --git a/queue-6.16/mmc-sdhci-pci-gli-gl9767-fix-initializing-the-uhs-ii-interface-during-a-power-on.patch b/queue-6.16/mmc-sdhci-pci-gli-gl9767-fix-initializing-the-uhs-ii-interface-during-a-power-on.patch

new file mode 100644 (file)

index 0000000..efc4ffe
--- /dev/null
+++ b/queue-6.16/mmc-sdhci-pci-gli-gl9767-fix-initializing-the-uhs-ii-interface-during-a-power-on.patch
@@ -0,0 +1,121 @@
+From 77a436c93d10d68201bfd4941d1ca3230dfd1f40 Mon Sep 17 00:00:00 2001
+From: Ben Chuang <ben.chuang@genesyslogic.com.tw>
+Date: Thu, 11 Sep 2025 10:42:42 +0800
+Subject: mmc: sdhci-pci-gli: GL9767: Fix initializing the UHS-II interface during a power-on
+
+From: Ben Chuang <ben.chuang@genesyslogic.com.tw>
+
+commit 77a436c93d10d68201bfd4941d1ca3230dfd1f40 upstream.
+
+According to the power structure of IC hardware design for UHS-II
+interface, reset control and timing must be added to the initialization
+process of powering on the UHS-II interface.
+
+Fixes: 27dd3b82557a ("mmc: sdhci-pci-gli: enable UHS-II mode for GL9767")
+Cc: stable@vger.kernel.org # v6.13+
+Signed-off-by: Ben Chuang <ben.chuang@genesyslogic.com.tw>
+Acked-by: Adrian Hunter <adrian.hunter@intel.com>
+Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/mmc/host/sdhci-pci-gli.c |   68 ++++++++++++++++++++++++++++++++++++++-
+ 1 file changed, 67 insertions(+), 1 deletion(-)
+
+--- a/drivers/mmc/host/sdhci-pci-gli.c
++++ b/drivers/mmc/host/sdhci-pci-gli.c
+@@ -283,6 +283,8 @@
+ #define   PCIE_GLI_9767_UHS2_CTL2_ZC_VALUE      0xb
+ #define   PCIE_GLI_9767_UHS2_CTL2_ZC_CTL        BIT(6)
+ #define   PCIE_GLI_9767_UHS2_CTL2_ZC_CTL_VALUE          0x1
++#define   PCIE_GLI_9767_UHS2_CTL2_FORCE_PHY_RESETN    BIT(13)
++#define   PCIE_GLI_9767_UHS2_CTL2_FORCE_RESETN_VALUE  BIT(14)
+ 
+ #define GLI_MAX_TUNING_LOOP 40
+ 
+@@ -1179,6 +1181,65 @@ static void gl9767_set_low_power_negotia
+       gl9767_vhs_read(pdev);
+ }
+ 
++static void sdhci_gl9767_uhs2_phy_reset(struct sdhci_host *host, bool assert)
++{
++      struct sdhci_pci_slot *slot = sdhci_priv(host);
++      struct pci_dev *pdev = slot->chip->pdev;
++      u32 value, set, clr;
++
++      if (assert) {
++              /* Assert reset, set RESETN and clean RESETN_VALUE */
++              set = PCIE_GLI_9767_UHS2_CTL2_FORCE_PHY_RESETN;
++              clr = PCIE_GLI_9767_UHS2_CTL2_FORCE_RESETN_VALUE;
++      } else {
++              /* De-assert reset, clean RESETN and set RESETN_VALUE */
++              set = PCIE_GLI_9767_UHS2_CTL2_FORCE_RESETN_VALUE;
++              clr = PCIE_GLI_9767_UHS2_CTL2_FORCE_PHY_RESETN;
++      }
++
++      gl9767_vhs_write(pdev);
++      pci_read_config_dword(pdev, PCIE_GLI_9767_UHS2_CTL2, &value);
++      value |= set;
++      pci_write_config_dword(pdev, PCIE_GLI_9767_UHS2_CTL2, value);
++      value &= ~clr;
++      pci_write_config_dword(pdev, PCIE_GLI_9767_UHS2_CTL2, value);
++      gl9767_vhs_read(pdev);
++}
++
++static void __gl9767_uhs2_set_power(struct sdhci_host *host, unsigned char mode, unsigned short vdd)
++{
++      u8 pwr = 0;
++
++      if (mode != MMC_POWER_OFF) {
++              pwr = sdhci_get_vdd_value(vdd);
++              if (!pwr)
++                      WARN(1, "%s: Invalid vdd %#x\n",
++                           mmc_hostname(host->mmc), vdd);
++              pwr |= SDHCI_VDD2_POWER_180;
++      }
++
++      if (host->pwr == pwr)
++              return;
++
++      host->pwr = pwr;
++
++      if (pwr == 0) {
++              sdhci_writeb(host, 0, SDHCI_POWER_CONTROL);
++      } else {
++              sdhci_writeb(host, 0, SDHCI_POWER_CONTROL);
++
++              pwr |= SDHCI_POWER_ON;
++              sdhci_writeb(host, pwr & 0xf, SDHCI_POWER_CONTROL);
++              usleep_range(5000, 6250);
++
++              /* Assert reset */
++              sdhci_gl9767_uhs2_phy_reset(host, true);
++              pwr |= SDHCI_VDD2_POWER_ON;
++              sdhci_writeb(host, pwr, SDHCI_POWER_CONTROL);
++              usleep_range(5000, 6250);
++      }
++}
++
+ static void sdhci_gl9767_set_clock(struct sdhci_host *host, unsigned int clock)
+ {
+       struct sdhci_pci_slot *slot = sdhci_priv(host);
+@@ -1205,6 +1266,11 @@ static void sdhci_gl9767_set_clock(struc
+       }
+ 
+       sdhci_enable_clk(host, clk);
++
++      if (mmc_card_uhs2(host->mmc))
++              /* De-assert reset */
++              sdhci_gl9767_uhs2_phy_reset(host, false);
++
+       gl9767_set_low_power_negotiation(pdev, true);
+ }
+ 
+@@ -1476,7 +1542,7 @@ static void sdhci_gl9767_set_power(struc
+               gl9767_vhs_read(pdev);
+ 
+               sdhci_gli_overcurrent_event_enable(host, false);
+-              sdhci_uhs2_set_power(host, mode, vdd);
++              __gl9767_uhs2_set_power(host, mode, vdd);
+               sdhci_gli_overcurrent_event_enable(host, true);
+       } else {
+               gl9767_vhs_write(pdev);
diff --git a/queue-6.16/mmc-sdhci-uhs2-fix-calling-incorrect-sdhci_set_clock-function.patch b/queue-6.16/mmc-sdhci-uhs2-fix-calling-incorrect-sdhci_set_clock-function.patch

new file mode 100644 (file)

index 0000000..f7805ff
--- /dev/null
+++ b/queue-6.16/mmc-sdhci-uhs2-fix-calling-incorrect-sdhci_set_clock-function.patch
@@ -0,0 +1,33 @@
+From 09c2b628f6403ad467fc73326a50020590603871 Mon Sep 17 00:00:00 2001
+From: Ben Chuang <ben.chuang@genesyslogic.com.tw>
+Date: Thu, 11 Sep 2025 10:41:01 +0800
+Subject: mmc: sdhci-uhs2: Fix calling incorrect sdhci_set_clock() function
+
+From: Ben Chuang <ben.chuang@genesyslogic.com.tw>
+
+commit 09c2b628f6403ad467fc73326a50020590603871 upstream.
+
+Fix calling incorrect sdhci_set_clock() in __sdhci_uhs2_set_ios() when the
+vendor defines its own sdhci_set_clock().
+
+Fixes: 10c8298a052b ("mmc: sdhci-uhs2: add set_ios()")
+Cc: stable@vger.kernel.org # v6.13+
+Signed-off-by: Ben Chuang <ben.chuang@genesyslogic.com.tw>
+Acked-by: Adrian Hunter <adrian.hunter@intel.com>
+Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/mmc/host/sdhci-uhs2.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/mmc/host/sdhci-uhs2.c
++++ b/drivers/mmc/host/sdhci-uhs2.c
+@@ -295,7 +295,7 @@ static void __sdhci_uhs2_set_ios(struct
+       else
+               sdhci_uhs2_set_power(host, ios->power_mode, ios->vdd);
+ 
+-      sdhci_set_clock(host, ios->clock);
++      host->ops->set_clock(host, ios->clock);
+       host->clock = ios->clock;
+ }
+ 
diff --git a/queue-6.16/mptcp-propagate-shutdown-to-subflows-when-possible.patch b/queue-6.16/mptcp-propagate-shutdown-to-subflows-when-possible.patch

new file mode 100644 (file)

index 0000000..de3b88d
--- /dev/null
+++ b/queue-6.16/mptcp-propagate-shutdown-to-subflows-when-possible.patch
@@ -0,0 +1,79 @@
+From f755be0b1ff429a2ecf709beeb1bcd7abc111c2b Mon Sep 17 00:00:00 2001
+From: "Matthieu Baerts (NGI0)" <matttbe@kernel.org>
+Date: Fri, 12 Sep 2025 14:25:50 +0200
+Subject: mptcp: propagate shutdown to subflows when possible
+
+From: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+
+commit f755be0b1ff429a2ecf709beeb1bcd7abc111c2b upstream.
+
+When the MPTCP DATA FIN have been ACKed, there is no more MPTCP related
+metadata to exchange, and all subflows can be safely shutdown.
+
+Before this patch, the subflows were actually terminated at 'close()'
+time. That's certainly fine most of the time, but not when the userspace
+'shutdown()' a connection, without close()ing it. When doing so, the
+subflows were staying in LAST_ACK state on one side -- and consequently
+in FIN_WAIT2 on the other side -- until the 'close()' of the MPTCP
+socket.
+
+Now, when the DATA FIN have been ACKed, all subflows are shutdown. A
+consequence of this is that the TCP 'FIN' flag can be set earlier now,
+but the end result is the same. This affects the packetdrill tests
+looking at the end of the MPTCP connections, but for a good reason.
+
+Note that tcp_shutdown() will check the subflow state, so no need to do
+that again before calling it.
+
+Fixes: 3721b9b64676 ("mptcp: Track received DATA_FIN sequence number and add related helpers")
+Cc: stable@vger.kernel.org
+Fixes: 16a9a9da1723 ("mptcp: Add helper to process acks of DATA_FIN")
+Reviewed-by: Mat Martineau <martineau@kernel.org>
+Reviewed-by: Geliang Tang <geliang@kernel.org>
+Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Link: https://patch.msgid.link/20250912-net-mptcp-fix-sft-connect-v1-1-d40e77cbbf02@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/mptcp/protocol.c |   16 ++++++++++++++++
+ 1 file changed, 16 insertions(+)
+
+--- a/net/mptcp/protocol.c
++++ b/net/mptcp/protocol.c
+@@ -350,6 +350,20 @@ static void mptcp_close_wake_up(struct s
+               sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
+ }
+ 
++static void mptcp_shutdown_subflows(struct mptcp_sock *msk)
++{
++      struct mptcp_subflow_context *subflow;
++
++      mptcp_for_each_subflow(msk, subflow) {
++              struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
++              bool slow;
++
++              slow = lock_sock_fast(ssk);
++              tcp_shutdown(ssk, SEND_SHUTDOWN);
++              unlock_sock_fast(ssk, slow);
++      }
++}
++
+ /* called under the msk socket lock */
+ static bool mptcp_pending_data_fin_ack(struct sock *sk)
+ {
+@@ -374,6 +388,7 @@ static void mptcp_check_data_fin_ack(str
+                       break;
+               case TCP_CLOSING:
+               case TCP_LAST_ACK:
++                      mptcp_shutdown_subflows(msk);
+                       mptcp_set_state(sk, TCP_CLOSE);
+                       break;
+               }
+@@ -542,6 +557,7 @@ static bool mptcp_check_data_fin(struct
+                       mptcp_set_state(sk, TCP_CLOSING);
+                       break;
+               case TCP_FIN_WAIT2:
++                      mptcp_shutdown_subflows(msk);
+                       mptcp_set_state(sk, TCP_CLOSE);
+                       break;
+               default:
diff --git a/queue-6.16/net-rfkill-gpio-fix-crash-due-to-dereferencering-uninitialized-pointer.patch b/queue-6.16/net-rfkill-gpio-fix-crash-due-to-dereferencering-uninitialized-pointer.patch

new file mode 100644 (file)

index 0000000..358b68a
--- /dev/null
+++ b/queue-6.16/net-rfkill-gpio-fix-crash-due-to-dereferencering-uninitialized-pointer.patch
@@ -0,0 +1,56 @@
+From b6f56a44e4c1014b08859dcf04ed246500e310e5 Mon Sep 17 00:00:00 2001
+From: Hans de Goede <hansg@kernel.org>
+Date: Sat, 13 Sep 2025 13:35:15 +0200
+Subject: net: rfkill: gpio: Fix crash due to dereferencering uninitialized pointer
+
+From: Hans de Goede <hansg@kernel.org>
+
+commit b6f56a44e4c1014b08859dcf04ed246500e310e5 upstream.
+
+Since commit 7d5e9737efda ("net: rfkill: gpio: get the name and type from
+device property") rfkill_find_type() gets called with the possibly
+uninitialized "const char *type_name;" local variable.
+
+On x86 systems when rfkill-gpio binds to a "BCM4752" or "LNV4752"
+acpi_device, the rfkill->type is set based on the ACPI acpi_device_id:
+
+        rfkill->type = (unsigned)id->driver_data;
+
+and there is no "type" property so device_property_read_string() will fail
+and leave type_name uninitialized, leading to a potential crash.
+
+rfkill_find_type() does accept a NULL pointer, fix the potential crash
+by initializing type_name to NULL.
+
+Note likely sofar this has not been caught because:
+
+1. Not many x86 machines actually have a "BCM4752"/"LNV4752" acpi_device
+2. The stack happened to contain NULL where type_name is stored
+
+Fixes: 7d5e9737efda ("net: rfkill: gpio: get the name and type from device property")
+Cc: stable@vger.kernel.org
+Cc: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Signed-off-by: Hans de Goede <hansg@kernel.org>
+Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Link: https://patch.msgid.link/20250913113515.21698-1-hansg@kernel.org
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/rfkill/rfkill-gpio.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/net/rfkill/rfkill-gpio.c
++++ b/net/rfkill/rfkill-gpio.c
+@@ -94,10 +94,10 @@ static const struct dmi_system_id rfkill
+ static int rfkill_gpio_probe(struct platform_device *pdev)
+ {
+       struct rfkill_gpio_data *rfkill;
+-      struct gpio_desc *gpio;
++      const char *type_name = NULL;
+       const char *name_property;
+       const char *type_property;
+-      const char *type_name;
++      struct gpio_desc *gpio;
+       int ret;
+ 
+       if (dmi_check_system(rfkill_gpio_deny_table))
diff --git a/queue-6.16/nilfs2-fix-cfi-failure-when-accessing-sys-fs-nilfs2-features.patch b/queue-6.16/nilfs2-fix-cfi-failure-when-accessing-sys-fs-nilfs2-features.patch

new file mode 100644 (file)

index 0000000..dcbce1f
--- /dev/null
+++ b/queue-6.16/nilfs2-fix-cfi-failure-when-accessing-sys-fs-nilfs2-features.patch
@@ -0,0 +1,95 @@
+From 025e87f8ea2ae3a28bf1fe2b052bfa412c27ed4a Mon Sep 17 00:00:00 2001
+From: Nathan Chancellor <nathan@kernel.org>
+Date: Sat, 6 Sep 2025 23:43:34 +0900
+Subject: nilfs2: fix CFI failure when accessing /sys/fs/nilfs2/features/*
+
+From: Nathan Chancellor <nathan@kernel.org>
+
+commit 025e87f8ea2ae3a28bf1fe2b052bfa412c27ed4a upstream.
+
+When accessing one of the files under /sys/fs/nilfs2/features when
+CONFIG_CFI_CLANG is enabled, there is a CFI violation:
+
+  CFI failure at kobj_attr_show+0x59/0x80 (target: nilfs_feature_revision_show+0x0/0x30; expected type: 0xfc392c4d)
+  ...
+  Call Trace:
+   <TASK>
+   sysfs_kf_seq_show+0x2a6/0x390
+   ? __cfi_kobj_attr_show+0x10/0x10
+   kernfs_seq_show+0x104/0x15b
+   seq_read_iter+0x580/0xe2b
+  ...
+
+When the kobject of the kset for /sys/fs/nilfs2 is initialized, its ktype
+is set to kset_ktype, which has a ->sysfs_ops of kobj_sysfs_ops.  When
+nilfs_feature_attr_group is added to that kobject via
+sysfs_create_group(), the kernfs_ops of each files is sysfs_file_kfops_rw,
+which will call sysfs_kf_seq_show() when ->seq_show() is called.
+sysfs_kf_seq_show() in turn calls kobj_attr_show() through
+->sysfs_ops->show().  kobj_attr_show() casts the provided attribute out to
+a 'struct kobj_attribute' via container_of() and calls ->show(), resulting
+in the CFI violation since neither nilfs_feature_revision_show() nor
+nilfs_feature_README_show() match the prototype of ->show() in 'struct
+kobj_attribute'.
+
+Resolve the CFI violation by adjusting the second parameter in
+nilfs_feature_{revision,README}_show() from 'struct attribute' to 'struct
+kobj_attribute' to match the expected prototype.
+
+Link: https://lkml.kernel.org/r/20250906144410.22511-1-konishi.ryusuke@gmail.com
+Fixes: aebe17f68444 ("nilfs2: add /sys/fs/nilfs2/features group")
+Signed-off-by: Nathan Chancellor <nathan@kernel.org>
+Signed-off-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Reported-by: kernel test robot <oliver.sang@intel.com>
+Closes: https://lore.kernel.org/oe-lkp/202509021646.bc78d9ef-lkp@intel.com/
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nilfs2/sysfs.c |    4 ++--
+ fs/nilfs2/sysfs.h |    8 ++++----
+ 2 files changed, 6 insertions(+), 6 deletions(-)
+
+--- a/fs/nilfs2/sysfs.c
++++ b/fs/nilfs2/sysfs.c
+@@ -1075,7 +1075,7 @@ void nilfs_sysfs_delete_device_group(str
+  ************************************************************************/
+ 
+ static ssize_t nilfs_feature_revision_show(struct kobject *kobj,
+-                                          struct attribute *attr, char *buf)
++                                          struct kobj_attribute *attr, char *buf)
+ {
+       return sysfs_emit(buf, "%d.%d\n",
+                       NILFS_CURRENT_REV, NILFS_MINOR_REV);
+@@ -1087,7 +1087,7 @@ static const char features_readme_str[]
+       "(1) revision\n\tshow current revision of NILFS file system driver.\n";
+ 
+ static ssize_t nilfs_feature_README_show(struct kobject *kobj,
+-                                       struct attribute *attr,
++                                       struct kobj_attribute *attr,
+                                        char *buf)
+ {
+       return sysfs_emit(buf, features_readme_str);
+--- a/fs/nilfs2/sysfs.h
++++ b/fs/nilfs2/sysfs.h
+@@ -50,16 +50,16 @@ struct nilfs_sysfs_dev_subgroups {
+       struct completion sg_segments_kobj_unregister;
+ };
+ 
+-#define NILFS_COMMON_ATTR_STRUCT(name) \
++#define NILFS_KOBJ_ATTR_STRUCT(name) \
+ struct nilfs_##name##_attr { \
+       struct attribute attr; \
+-      ssize_t (*show)(struct kobject *, struct attribute *, \
++      ssize_t (*show)(struct kobject *, struct kobj_attribute *, \
+                       char *); \
+-      ssize_t (*store)(struct kobject *, struct attribute *, \
++      ssize_t (*store)(struct kobject *, struct kobj_attribute *, \
+                        const char *, size_t); \
+ }
+ 
+-NILFS_COMMON_ATTR_STRUCT(feature);
++NILFS_KOBJ_ATTR_STRUCT(feature);
+ 
+ #define NILFS_DEV_ATTR_STRUCT(name) \
+ struct nilfs_##name##_attr { \
diff --git a/queue-6.16/objtool-loongarch-mark-special-atomic-instruction-as-insn_bug-type.patch b/queue-6.16/objtool-loongarch-mark-special-atomic-instruction-as-insn_bug-type.patch

new file mode 100644 (file)

index 0000000..74923a2
--- /dev/null
+++ b/queue-6.16/objtool-loongarch-mark-special-atomic-instruction-as-insn_bug-type.patch
@@ -0,0 +1,105 @@
+From 539d7344d4feaea37e05863e9aa86bd31f28e46f Mon Sep 17 00:00:00 2001
+From: Tiezhu Yang <yangtiezhu@loongson.cn>
+Date: Thu, 18 Sep 2025 19:43:36 +0800
+Subject: objtool/LoongArch: Mark special atomic instruction as INSN_BUG type
+
+From: Tiezhu Yang <yangtiezhu@loongson.cn>
+
+commit 539d7344d4feaea37e05863e9aa86bd31f28e46f upstream.
+
+When compiling with LLVM and CONFIG_RUST is set, there exists the
+following objtool warning:
+
+  rust/compiler_builtins.o: warning: objtool: __rust__unordsf2(): unexpected end of section .text.unlikely.
+
+objdump shows that the end of section .text.unlikely is an atomic
+instruction:
+
+  amswap.w        $zero, $ra, $zero
+
+According to the LoongArch Reference Manual, if the amswap.w atomic
+memory access instruction has the same register number as rd and rj,
+the execution will trigger an Instruction Non-defined Exception, so
+mark the above instruction as INSN_BUG type to fix the warning.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
+Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/arch/loongarch/include/asm/inst.h |   12 ++++++++++++
+ tools/objtool/arch/loongarch/decode.c   |   21 +++++++++++++++++++++
+ 2 files changed, 33 insertions(+)
+
+--- a/tools/arch/loongarch/include/asm/inst.h
++++ b/tools/arch/loongarch/include/asm/inst.h
+@@ -51,6 +51,10 @@ enum reg2i16_op {
+       bgeu_op         = 0x1b,
+ };
+ 
++enum reg3_op {
++      amswapw_op      = 0x70c0,
++};
++
+ struct reg0i15_format {
+       unsigned int immediate : 15;
+       unsigned int opcode : 17;
+@@ -96,6 +100,13 @@ struct reg2i16_format {
+       unsigned int opcode : 6;
+ };
+ 
++struct reg3_format {
++      unsigned int rd : 5;
++      unsigned int rj : 5;
++      unsigned int rk : 5;
++      unsigned int opcode : 17;
++};
++
+ union loongarch_instruction {
+       unsigned int word;
+       struct reg0i15_format   reg0i15_format;
+@@ -105,6 +116,7 @@ union loongarch_instruction {
+       struct reg2i12_format   reg2i12_format;
+       struct reg2i14_format   reg2i14_format;
+       struct reg2i16_format   reg2i16_format;
++      struct reg3_format      reg3_format;
+ };
+ 
+ #define LOONGARCH_INSN_SIZE   sizeof(union loongarch_instruction)
+--- a/tools/objtool/arch/loongarch/decode.c
++++ b/tools/objtool/arch/loongarch/decode.c
+@@ -278,6 +278,25 @@ static bool decode_insn_reg2i16_fomat(un
+       return true;
+ }
+ 
++static bool decode_insn_reg3_fomat(union loongarch_instruction inst,
++                                 struct instruction *insn)
++{
++      switch (inst.reg3_format.opcode) {
++      case amswapw_op:
++              if (inst.reg3_format.rd == LOONGARCH_GPR_ZERO &&
++                  inst.reg3_format.rk == LOONGARCH_GPR_RA &&
++                  inst.reg3_format.rj == LOONGARCH_GPR_ZERO) {
++                      /* amswap.w $zero, $ra, $zero */
++                      insn->type = INSN_BUG;
++              }
++              break;
++      default:
++              return false;
++      }
++
++      return true;
++}
++
+ int arch_decode_instruction(struct objtool_file *file, const struct section *sec,
+                           unsigned long offset, unsigned int maxlen,
+                           struct instruction *insn)
+@@ -309,6 +328,8 @@ int arch_decode_instruction(struct objto
+               return 0;
+       if (decode_insn_reg2i16_fomat(inst, insn))
+               return 0;
++      if (decode_insn_reg3_fomat(inst, insn))
++              return 0;
+ 
+       if (inst.word == 0) {
+               /* andi $zero, $zero, 0x0 */
diff --git a/queue-6.16/objtool-loongarch-mark-types-based-on-break-immediate-code.patch b/queue-6.16/objtool-loongarch-mark-types-based-on-break-immediate-code.patch

new file mode 100644 (file)

index 0000000..9f23a0b
--- /dev/null
+++ b/queue-6.16/objtool-loongarch-mark-types-based-on-break-immediate-code.patch
@@ -0,0 +1,46 @@
+From baad7830ee9a56756b3857348452fe756cb0a702 Mon Sep 17 00:00:00 2001
+From: Tiezhu Yang <yangtiezhu@loongson.cn>
+Date: Thu, 18 Sep 2025 19:43:36 +0800
+Subject: objtool/LoongArch: Mark types based on break immediate code
+
+From: Tiezhu Yang <yangtiezhu@loongson.cn>
+
+commit baad7830ee9a56756b3857348452fe756cb0a702 upstream.
+
+If the break immediate code is 0, it should mark the type as
+INSN_TRAP. If the break immediate code is 1, it should mark the
+type as INSN_BUG.
+
+While at it, format the code style and add the code comment for nop.
+
+Cc: stable@vger.kernel.org
+Suggested-by: WANG Rui <wangrui@loongson.cn>
+Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
+Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/arch/loongarch/decode.c |   12 +++++++++---
+ 1 file changed, 9 insertions(+), 3 deletions(-)
+
+--- a/tools/objtool/arch/loongarch/decode.c
++++ b/tools/objtool/arch/loongarch/decode.c
+@@ -310,10 +310,16 @@ int arch_decode_instruction(struct objto
+       if (decode_insn_reg2i16_fomat(inst, insn))
+               return 0;
+ 
+-      if (inst.word == 0)
++      if (inst.word == 0) {
++              /* andi $zero, $zero, 0x0 */
+               insn->type = INSN_NOP;
+-      else if (inst.reg0i15_format.opcode == break_op) {
+-              /* break */
++      } else if (inst.reg0i15_format.opcode == break_op &&
++                 inst.reg0i15_format.immediate == 0x0) {
++              /* break 0x0 */
++              insn->type = INSN_TRAP;
++      } else if (inst.reg0i15_format.opcode == break_op &&
++                 inst.reg0i15_format.immediate == 0x1) {
++              /* break 0x1 */
+               insn->type = INSN_BUG;
+       } else if (inst.reg2_format.opcode == ertn_op) {
+               /* ertn */
diff --git a/queue-6.16/octeontx2-pf-fix-use-after-free-bugs-in-otx2_sync_ts.patch b/queue-6.16/octeontx2-pf-fix-use-after-free-bugs-in-otx2_sync_ts.patch

index 366c3587a93160cdfeca4b007baf1f76d8b51a37..77dd55626775f322aadbab8a2a67b4e8dfd32c14 100644 (file)
--- a/queue-6.16/octeontx2-pf-fix-use-after-free-bugs-in-otx2_sync_ts.patch
+++ b/queue-6.16/octeontx2-pf-fix-use-after-free-bugs-in-otx2_sync_ts.patch
@@ -105,14 +105,12 @@ Reviewed-by: Vadim Fedorenko <vadim.fedorenko@linux.dev>
  Signed-off-by: Jakub Kicinski <kuba@kernel.org>
  Signed-off-by: Sasha Levin <sashal@kernel.org>
  ---
- drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c | 2 +-
+ drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c |    2 +-
   1 file changed, 1 insertion(+), 1 deletion(-)
  
-diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c
-index 63130ba37e9df..69b435ed8fbbe 100644
  --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c
  +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c
-@@ -491,7 +491,7 @@ void otx2_ptp_destroy(struct otx2_nic *pfvf)
+@@ -491,7 +491,7 @@ void otx2_ptp_destroy(struct otx2_nic *p
         if (!ptp)
                 return;
   
@@ -121,6 +119,3 @@ index 63130ba37e9df..69b435ed8fbbe 100644
   
         ptp_clock_unregister(ptp->ptp_clock);
         kfree(ptp);
--- 
-2.51.0
-
diff --git a/queue-6.16/power-supply-bq27xxx-fix-error-return-in-case-of-no-bq27000-hdq-battery.patch b/queue-6.16/power-supply-bq27xxx-fix-error-return-in-case-of-no-bq27000-hdq-battery.patch

new file mode 100644 (file)

index 0000000..e9a3346
--- /dev/null
+++ b/queue-6.16/power-supply-bq27xxx-fix-error-return-in-case-of-no-bq27000-hdq-battery.patch
@@ -0,0 +1,66 @@
+From 2c334d038466ac509468fbe06905a32d202117db Mon Sep 17 00:00:00 2001
+From: "H. Nikolaus Schaller" <hns@goldelico.com>
+Date: Sat, 23 Aug 2025 12:34:56 +0200
+Subject: power: supply: bq27xxx: fix error return in case of no bq27000 hdq battery
+
+From: H. Nikolaus Schaller <hns@goldelico.com>
+
+commit 2c334d038466ac509468fbe06905a32d202117db upstream.
+
+Since commit
+
+       commit f16d9fb6cf03 ("power: supply: bq27xxx: Retrieve again when busy")
+
+the console log of some devices with hdq enabled but no bq27000 battery
+(like e.g. the Pandaboard) is flooded with messages like:
+
+[   34.247833] power_supply bq27000-battery: driver failed to report 'status' property: -1
+
+as soon as user-space is finding a /sys entry and trying to read the
+"status" property.
+
+It turns out that the offending commit changes the logic to now return the
+value of cache.flags if it is <0. This is likely under the assumption that
+it is an error number. In normal errors from bq27xxx_read() this is indeed
+the case.
+
+But there is special code to detect if no bq27000 is installed or accessible
+through hdq/1wire and wants to report this. In that case, the cache.flags
+are set historically by
+
+       commit 3dd843e1c26a ("bq27000: report missing device better.")
+
+to constant -1 which did make reading properties return -ENODEV. So everything
+appeared to be fine before the return value was passed upwards.
+
+Now the -1 is returned as -EPERM instead of -ENODEV, triggering the error
+condition in power_supply_format_property() which then floods the console log.
+
+So we change the detection of missing bq27000 battery to simply set
+
+       cache.flags = -ENODEV
+
+instead of -1.
+
+Fixes: f16d9fb6cf03 ("power: supply: bq27xxx: Retrieve again when busy")
+Cc: Jerry Lv <Jerry.Lv@axis.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: H. Nikolaus Schaller <hns@goldelico.com>
+Link: https://lore.kernel.org/r/692f79eb6fd541adb397038ea6e750d4de2deddf.1755945297.git.hns@goldelico.com
+Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/power/supply/bq27xxx_battery.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/power/supply/bq27xxx_battery.c
++++ b/drivers/power/supply/bq27xxx_battery.c
+@@ -1920,7 +1920,7 @@ static void bq27xxx_battery_update_unloc
+ 
+       cache.flags = bq27xxx_read(di, BQ27XXX_REG_FLAGS, has_singe_flag);
+       if ((cache.flags & 0xff) == 0xff)
+-              cache.flags = -1; /* read error */
++              cache.flags = -ENODEV; /* read error */
+       if (cache.flags >= 0) {
+               cache.capacity = bq27xxx_battery_read_soc(di);
+ 
diff --git a/queue-6.16/power-supply-bq27xxx-restrict-no-battery-detection-to-bq27000.patch b/queue-6.16/power-supply-bq27xxx-restrict-no-battery-detection-to-bq27000.patch

new file mode 100644 (file)

index 0000000..14f74b5
--- /dev/null
+++ b/queue-6.16/power-supply-bq27xxx-restrict-no-battery-detection-to-bq27000.patch
@@ -0,0 +1,48 @@
+From 1e451977e1703b6db072719b37cd1b8e250b9cc9 Mon Sep 17 00:00:00 2001
+From: "H. Nikolaus Schaller" <hns@goldelico.com>
+Date: Sat, 23 Aug 2025 12:34:57 +0200
+Subject: power: supply: bq27xxx: restrict no-battery detection to bq27000
+
+From: H. Nikolaus Schaller <hns@goldelico.com>
+
+commit 1e451977e1703b6db072719b37cd1b8e250b9cc9 upstream.
+
+There are fuel gauges in the bq27xxx series (e.g. bq27z561) which may in some
+cases report 0xff as the value of BQ27XXX_REG_FLAGS that should not be
+interpreted as "no battery" like for a disconnected battery with some built
+in bq27000 chip.
+
+So restrict the no-battery detection originally introduced by
+
+    commit 3dd843e1c26a ("bq27000: report missing device better.")
+
+to the bq27000.
+
+There is no need to backport further because this was hidden before
+
+       commit f16d9fb6cf03 ("power: supply: bq27xxx: Retrieve again when busy")
+
+Fixes: f16d9fb6cf03 ("power: supply: bq27xxx: Retrieve again when busy")
+Suggested-by: Jerry Lv <Jerry.Lv@axis.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: H. Nikolaus Schaller <hns@goldelico.com>
+Link: https://lore.kernel.org/r/dd979fa6855fd051ee5117016c58daaa05966e24.1755945297.git.hns@goldelico.com
+Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/power/supply/bq27xxx_battery.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/power/supply/bq27xxx_battery.c
++++ b/drivers/power/supply/bq27xxx_battery.c
+@@ -1919,8 +1919,8 @@ static void bq27xxx_battery_update_unloc
+       bool has_singe_flag = di->opts & BQ27XXX_O_ZERO;
+ 
+       cache.flags = bq27xxx_read(di, BQ27XXX_REG_FLAGS, has_singe_flag);
+-      if ((cache.flags & 0xff) == 0xff)
+-              cache.flags = -ENODEV; /* read error */
++      if (di->chip == BQ27000 && (cache.flags & 0xff) == 0xff)
++              cache.flags = -ENODEV; /* bq27000 hdq read error */
+       if (cache.flags >= 0) {
+               cache.capacity = bq27xxx_battery_read_soc(di);
+ 
diff --git a/queue-6.16/rds-ib-increment-i_fastreg_wrs-before-bailing-out.patch b/queue-6.16/rds-ib-increment-i_fastreg_wrs-before-bailing-out.patch

new file mode 100644 (file)

index 0000000..dbc9e3d
--- /dev/null
+++ b/queue-6.16/rds-ib-increment-i_fastreg_wrs-before-bailing-out.patch
@@ -0,0 +1,82 @@
+From 4351ca3fcb3ffecf12631b4996bf085a2dad0db6 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?H=C3=A5kon=20Bugge?= <haakon.bugge@oracle.com>
+Date: Thu, 11 Sep 2025 15:33:34 +0200
+Subject: rds: ib: Increment i_fastreg_wrs before bailing out
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Håkon Bugge <haakon.bugge@oracle.com>
+
+commit 4351ca3fcb3ffecf12631b4996bf085a2dad0db6 upstream.
+
+We need to increment i_fastreg_wrs before we bail out from
+rds_ib_post_reg_frmr().
+
+We have a fixed budget of how many FRWR operations that can be
+outstanding using the dedicated QP used for memory registrations and
+de-registrations. This budget is enforced by the atomic_t
+i_fastreg_wrs. If we bail out early in rds_ib_post_reg_frmr(), we will
+"leak" the possibility of posting an FRWR operation, and if that
+accumulates, no FRWR operation can be carried out.
+
+Fixes: 1659185fb4d0 ("RDS: IB: Support Fastreg MR (FRMR) memory registration mode")
+Fixes: 3a2886cca703 ("net/rds: Keep track of and wait for FRWR segments in use upon shutdown")
+Cc: stable@vger.kernel.org
+Signed-off-by: Håkon Bugge <haakon.bugge@oracle.com>
+Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
+Link: https://patch.msgid.link/20250911133336.451212-1-haakon.bugge@oracle.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/rds/ib_frmr.c |   20 ++++++++++++--------
+ 1 file changed, 12 insertions(+), 8 deletions(-)
+
+--- a/net/rds/ib_frmr.c
++++ b/net/rds/ib_frmr.c
+@@ -133,12 +133,15 @@ static int rds_ib_post_reg_frmr(struct r
+ 
+       ret = ib_map_mr_sg_zbva(frmr->mr, ibmr->sg, ibmr->sg_dma_len,
+                               &off, PAGE_SIZE);
+-      if (unlikely(ret != ibmr->sg_dma_len))
+-              return ret < 0 ? ret : -EINVAL;
++      if (unlikely(ret != ibmr->sg_dma_len)) {
++              ret = ret < 0 ? ret : -EINVAL;
++              goto out_inc;
++      }
+ 
+-      if (cmpxchg(&frmr->fr_state,
+-                  FRMR_IS_FREE, FRMR_IS_INUSE) != FRMR_IS_FREE)
+-              return -EBUSY;
++      if (cmpxchg(&frmr->fr_state, FRMR_IS_FREE, FRMR_IS_INUSE) != FRMR_IS_FREE) {
++              ret = -EBUSY;
++              goto out_inc;
++      }
+ 
+       atomic_inc(&ibmr->ic->i_fastreg_inuse_count);
+ 
+@@ -166,11 +169,10 @@ static int rds_ib_post_reg_frmr(struct r
+               /* Failure here can be because of -ENOMEM as well */
+               rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_STALE);
+ 
+-              atomic_inc(&ibmr->ic->i_fastreg_wrs);
+               if (printk_ratelimit())
+                       pr_warn("RDS/IB: %s returned error(%d)\n",
+                               __func__, ret);
+-              goto out;
++              goto out_inc;
+       }
+ 
+       /* Wait for the registration to complete in order to prevent an invalid
+@@ -179,8 +181,10 @@ static int rds_ib_post_reg_frmr(struct r
+        */
+       wait_event(frmr->fr_reg_done, !frmr->fr_reg);
+ 
+-out:
++      return ret;
+ 
++out_inc:
++      atomic_inc(&ibmr->ic->i_fastreg_wrs);
+       return ret;
+ }
+ 
diff --git a/queue-6.16/revert-sched_ext-skip-per-cpu-tasks-in-scx_bpf_reenqueue_local.patch b/queue-6.16/revert-sched_ext-skip-per-cpu-tasks-in-scx_bpf_reenqueue_local.patch

new file mode 100644 (file)

index 0000000..fd8f984
--- /dev/null
+++ b/queue-6.16/revert-sched_ext-skip-per-cpu-tasks-in-scx_bpf_reenqueue_local.patch
@@ -0,0 +1,102 @@
+From 0b47b6c3543efd65f2e620e359b05f4938314fbd Mon Sep 17 00:00:00 2001
+From: Andrea Righi <arighi@nvidia.com>
+Date: Fri, 12 Sep 2025 18:14:38 +0200
+Subject: Revert "sched_ext: Skip per-CPU tasks in scx_bpf_reenqueue_local()"
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Andrea Righi <arighi@nvidia.com>
+
+commit 0b47b6c3543efd65f2e620e359b05f4938314fbd upstream.
+
+scx_bpf_reenqueue_local() can be called from ops.cpu_release() when a
+CPU is taken by a higher scheduling class to give tasks queued to the
+CPU's local DSQ a chance to be migrated somewhere else, instead of
+waiting indefinitely for that CPU to become available again.
+
+In doing so, we decided to skip migration-disabled tasks, under the
+assumption that they cannot be migrated anyway.
+
+However, when a higher scheduling class preempts a CPU, the running task
+is always inserted at the head of the local DSQ as a migration-disabled
+task. This means it is always skipped by scx_bpf_reenqueue_local(), and
+ends up being confined to the same CPU even if that CPU is heavily
+contended by other higher scheduling class tasks.
+
+As an example, let's consider the following scenario:
+
+ $ schedtool -a 0,1, -e yes > /dev/null
+ $ sudo schedtool -F -p 99 -a 0, -e \
+   stress-ng -c 1 --cpu-load 99 --cpu-load-slice 1000
+
+The first task (SCHED_EXT) can run on CPU0 or CPU1. The second task
+(SCHED_FIFO) is pinned to CPU0 and consumes ~99% of it. If the SCHED_EXT
+task initially runs on CPU0, it will remain there because it always sees
+CPU0 as "idle" in the short gaps left by the RT task, resulting in ~1%
+utilization while CPU1 stays idle:
+
+    0[||||||||||||||||||||||100.0%]   8[                        0.0%]
+    1[                        0.0%]   9[                        0.0%]
+    2[                        0.0%]  10[                        0.0%]
+    3[                        0.0%]  11[                        0.0%]
+    4[                        0.0%]  12[                        0.0%]
+    5[                        0.0%]  13[                        0.0%]
+    6[                        0.0%]  14[                        0.0%]
+    7[                        0.0%]  15[                        0.0%]
+  PID USER       PRI  NI  S CPU  CPU%▽MEM%   TIME+  Command
+ 1067 root        RT   0  R   0  99.0  0.2  0:31.16 stress-ng-cpu [run]
+  975 arighi      20   0  R   0   1.0  0.0  0:26.32 yes
+
+By allowing scx_bpf_reenqueue_local() to re-enqueue migration-disabled
+tasks, the scheduler can choose to migrate them to other CPUs (CPU1 in
+this case) via ops.enqueue(), leading to better CPU utilization:
+
+    0[||||||||||||||||||||||100.0%]   8[                        0.0%]
+    1[||||||||||||||||||||||100.0%]   9[                        0.0%]
+    2[                        0.0%]  10[                        0.0%]
+    3[                        0.0%]  11[                        0.0%]
+    4[                        0.0%]  12[                        0.0%]
+    5[                        0.0%]  13[                        0.0%]
+    6[                        0.0%]  14[                        0.0%]
+    7[                        0.0%]  15[                        0.0%]
+  PID USER       PRI  NI  S CPU  CPU%▽MEM%   TIME+  Command
+  577 root        RT   0  R   0 100.0  0.2  0:23.17 stress-ng-cpu [run]
+  555 arighi      20   0  R   1 100.0  0.0  0:28.67 yes
+
+It's debatable whether per-CPU tasks should be re-enqueued as well, but
+doing so is probably safer: the scheduler can recognize re-enqueued
+tasks through the %SCX_ENQ_REENQ flag, reassess their placement, and
+either put them back at the head of the local DSQ or let another task
+attempt to take the CPU.
+
+This also prevents giving per-CPU tasks an implicit priority boost,
+which would otherwise make them more likely to reclaim CPUs preempted by
+higher scheduling classes.
+
+Fixes: 97e13ecb02668 ("sched_ext: Skip per-CPU tasks in scx_bpf_reenqueue_local()")
+Cc: stable@vger.kernel.org # v6.15+
+Signed-off-by: Andrea Righi <arighi@nvidia.com>
+Acked-by: Changwoo Min <changwoo@igalia.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/sched/ext.c |    6 +-----
+ 1 file changed, 1 insertion(+), 5 deletions(-)
+
+--- a/kernel/sched/ext.c
++++ b/kernel/sched/ext.c
+@@ -6794,12 +6794,8 @@ __bpf_kfunc u32 scx_bpf_reenqueue_local(
+                * CPUs disagree, they use %ENQUEUE_RESTORE which is bypassed to
+                * the current local DSQ for running tasks and thus are not
+                * visible to the BPF scheduler.
+-               *
+-               * Also skip re-enqueueing tasks that can only run on this
+-               * CPU, as they would just be re-added to the same local
+-               * DSQ without any benefit.
+                */
+-              if (p->migration_pending || is_migration_disabled(p) || p->nr_cpus_allowed == 1)
++              if (p->migration_pending)
+                       continue;
+ 
+               dispatch_dequeue(rq, p);
diff --git a/queue-6.16/selftests-mptcp-avoid-spurious-errors-on-tcp-disconnect.patch b/queue-6.16/selftests-mptcp-avoid-spurious-errors-on-tcp-disconnect.patch

new file mode 100644 (file)

index 0000000..3416053
--- /dev/null
+++ b/queue-6.16/selftests-mptcp-avoid-spurious-errors-on-tcp-disconnect.patch
@@ -0,0 +1,93 @@
+From 8708c5d8b3fb3f6d5d3b9e6bfe01a505819f519a Mon Sep 17 00:00:00 2001
+From: "Matthieu Baerts (NGI0)" <matttbe@kernel.org>
+Date: Fri, 12 Sep 2025 14:25:52 +0200
+Subject: selftests: mptcp: avoid spurious errors on TCP disconnect
+
+From: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+
+commit 8708c5d8b3fb3f6d5d3b9e6bfe01a505819f519a upstream.
+
+The disconnect test-case, with 'plain' TCP sockets generates spurious
+errors, e.g.
+
+  07 ns1 TCP   -> ns1 (dead:beef:1::1:10006) MPTCP
+  read: Connection reset by peer
+  read: Connection reset by peer
+  (duration   155ms) [FAIL] client exit code 3, server 3
+
+  netns ns1-FloSdv (listener) socket stat for 10006:
+  TcpActiveOpens                  2                  0.0
+  TcpPassiveOpens                 2                  0.0
+  TcpEstabResets                  2                  0.0
+  TcpInSegs                       274                0.0
+  TcpOutSegs                      276                0.0
+  TcpOutRsts                      3                  0.0
+  TcpExtPruneCalled               2                  0.0
+  TcpExtRcvPruned                 1                  0.0
+  TcpExtTCPPureAcks               104                0.0
+  TcpExtTCPRcvCollapsed           2                  0.0
+  TcpExtTCPBacklogCoalesce        42                 0.0
+  TcpExtTCPRcvCoalesce            43                 0.0
+  TcpExtTCPChallengeACK           1                  0.0
+  TcpExtTCPFromZeroWindowAdv      42                 0.0
+  TcpExtTCPToZeroWindowAdv        41                 0.0
+  TcpExtTCPWantZeroWindowAdv      13                 0.0
+  TcpExtTCPOrigDataSent           164                0.0
+  TcpExtTCPDelivered              165                0.0
+  TcpExtTCPRcvQDrop               1                  0.0
+
+In the failing scenarios (TCP -> MPTCP), the involved sockets are
+actually plain TCP ones, as fallbacks for passive sockets at 2WHS time
+cause the MPTCP listeners to actually create 'plain' TCP sockets.
+
+Similar to commit 218cc166321f ("selftests: mptcp: avoid spurious errors
+on disconnect"), the root cause is in the user-space bits: the test
+program tries to disconnect as soon as all the pending data has been
+spooled, generating an RST. If such option reaches the peer before the
+connection has reached the closed status, the TCP socket will report an
+error to the user-space, as per protocol specification, causing the
+above failure. Note that it looks like this issue got more visible since
+the "tcp: receiver changes" series from commit 06baf9bfa6ca ("Merge
+branch 'tcp-receiver-changes'").
+
+Address the issue by explicitly waiting for the TCP sockets (-t) to
+reach a closed status before performing the disconnect. More precisely,
+the test program now waits for plain TCP sockets or TCP subflows in
+addition to the MPTCP sockets that were already monitored.
+
+While at it, use 'ss' with '-n' to avoid resolving service names, which
+is not needed here.
+
+Fixes: 218cc166321f ("selftests: mptcp: avoid spurious errors on disconnect")
+Cc: stable@vger.kernel.org
+Suggested-by: Paolo Abeni <pabeni@redhat.com>
+Reviewed-by: Mat Martineau <martineau@kernel.org>
+Reviewed-by: Geliang Tang <geliang@kernel.org>
+Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Link: https://patch.msgid.link/20250912-net-mptcp-fix-sft-connect-v1-3-d40e77cbbf02@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/net/mptcp/mptcp_connect.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c
++++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c
+@@ -1248,7 +1248,7 @@ void xdisconnect(int fd)
+       else
+               xerror("bad family");
+ 
+-      strcpy(cmd, "ss -M | grep -q ");
++      strcpy(cmd, "ss -Mnt | grep -q ");
+       cmdlen = strlen(cmd);
+       if (!inet_ntop(addr.ss_family, raw_addr, &cmd[cmdlen],
+                      sizeof(cmd) - cmdlen))
+@@ -1258,7 +1258,7 @@ void xdisconnect(int fd)
+ 
+       /*
+        * wait until the pending data is completely flushed and all
+-       * the MPTCP sockets reached the closed status.
++       * the sockets reached the closed status.
+        * disconnect will bypass/ignore/drop any pending data.
+        */
+       for (i = 0; ; i += msec_sleep) {
diff --git a/queue-6.16/selftests-mptcp-connect-catch-io-errors-on-listen-side.patch b/queue-6.16/selftests-mptcp-connect-catch-io-errors-on-listen-side.patch

new file mode 100644 (file)

index 0000000..57765aa
--- /dev/null
+++ b/queue-6.16/selftests-mptcp-connect-catch-io-errors-on-listen-side.patch
@@ -0,0 +1,71 @@
+From 14e22b43df25dbd4301351b882486ea38892ae4f Mon Sep 17 00:00:00 2001
+From: "Matthieu Baerts (NGI0)" <matttbe@kernel.org>
+Date: Fri, 12 Sep 2025 14:25:51 +0200
+Subject: selftests: mptcp: connect: catch IO errors on listen side
+
+From: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+
+commit 14e22b43df25dbd4301351b882486ea38892ae4f upstream.
+
+IO errors were correctly printed to stderr, and propagated up to the
+main loop for the server side, but the returned value was ignored. As a
+consequence, the program for the listener side was no longer exiting
+with an error code in case of IO issues.
+
+Because of that, some issues might not have been seen. But very likely,
+most issues either had an effect on the client side, or the file
+transfer was not the expected one, e.g. the connection got reset before
+the end. Still, it is better to fix this.
+
+The main consequence of this issue is the error that was reported by the
+selftests: the received and sent files were different, and the MIB
+counters were not printed. Also, when such errors happened during the
+'disconnect' tests, the program tried to continue until the timeout.
+
+Now when an IO error is detected, the program exits directly with an
+error.
+
+Fixes: 05be5e273c84 ("selftests: mptcp: add disconnect tests")
+Cc: stable@vger.kernel.org
+Reviewed-by: Mat Martineau <martineau@kernel.org>
+Reviewed-by: Geliang Tang <geliang@kernel.org>
+Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Link: https://patch.msgid.link/20250912-net-mptcp-fix-sft-connect-v1-2-d40e77cbbf02@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/net/mptcp/mptcp_connect.c |    7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c
++++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c
+@@ -1093,6 +1093,7 @@ int main_loop_s(int listensock)
+       struct pollfd polls;
+       socklen_t salen;
+       int remotesock;
++      int err = 0;
+       int fd = 0;
+ 
+ again:
+@@ -1125,7 +1126,7 @@ again:
+               SOCK_TEST_TCPULP(remotesock, 0);
+ 
+               memset(&winfo, 0, sizeof(winfo));
+-              copyfd_io(fd, remotesock, 1, true, &winfo);
++              err = copyfd_io(fd, remotesock, 1, true, &winfo);
+       } else {
+               perror("accept");
+               return 1;
+@@ -1134,10 +1135,10 @@ again:
+       if (cfg_input)
+               close(fd);
+ 
+-      if (--cfg_repeat > 0)
++      if (!err && --cfg_repeat > 0)
+               goto again;
+ 
+-      return 0;
++      return err;
+ }
+ 
+ static void init_rng(void)
diff --git a/queue-6.16/series b/queue-6.16/series

index e146e05a1c77e356531c503e8f93e2f46250305c..ba880eb4f9a06641ca2f41d3efcecb78b9c611c7 100644 (file)
--- a/queue-6.16/series
+++ b/queue-6.16/series
@@ -44,3 +44,64 @@ net-clear-sk-sk_ino-in-sk_set_socket-sk-null.patch
  net-liquidio-fix-overflow-in-octeon_init_instr_queue.patch
  cnic-fix-use-after-free-bugs-in-cnic_delete_task.patch
  octeontx2-pf-fix-use-after-free-bugs-in-otx2_sync_ts.patch
+ksmbd-smbdirect-validate-data_offset-and-data_length-field-of-smb_direct_data_transfer.patch
+ksmbd-smbdirect-verify-remaining_data_length-respects-max_fragmented_recv_size.patch
+zram-fix-slot-write-race-condition.patch
+nilfs2-fix-cfi-failure-when-accessing-sys-fs-nilfs2-features.patch
+crypto-af_alg-disallow-concurrent-writes-in-af_alg_sendmsg.patch
+power-supply-bq27xxx-fix-error-return-in-case-of-no-bq27000-hdq-battery.patch
+power-supply-bq27xxx-restrict-no-battery-detection-to-bq27000.patch
+revert-sched_ext-skip-per-cpu-tasks-in-scx_bpf_reenqueue_local.patch
+btrfs-initialize-inode-file_extent_tree-after-i_mode-has-been-set.patch
+dm-raid-don-t-set-io_min-and-io_opt-for-raid1.patch
+dm-stripe-fix-a-possible-integer-overflow.patch
+mm-gup-check-ref_count-instead-of-lru-before-migration.patch
+mm-revert-mm-gup-clear-the-lru-flag-of-a-page-before-adding-to-lru-batch.patch
+gup-optimize-longterm-pin_user_pages-for-large-folio.patch
+mm-gup-local-lru_add_drain-to-avoid-lru_add_drain_all.patch
+mm-revert-mm-vmscan.c-fix-oom-on-swap-stress-test.patch
+mm-folio_may_be_lru_cached-unless-folio_test_large.patch
+loongarch-update-help-info-of-arch_strict_align.patch
+objtool-loongarch-mark-types-based-on-break-immediate-code.patch
+objtool-loongarch-mark-special-atomic-instruction-as-insn_bug-type.patch
+loongarch-fix-unreliable-stack-for-live-patching.patch
+loongarch-vdso-check-kcalloc-result-in-init_vdso.patch
+loongarch-align-acpi-structures-if-arch_strict_align-enabled.patch
+loongarch-check-the-return-value-when-creating-kobj.patch
+loongarch-make-lto-case-independent-in-makefile.patch
+loongarch-handle-jump-tables-options-for-rust.patch
+loongarch-kvm-avoid-copy_-_user-with-lock-hold-in-kvm_eiointc_ctrl_access.patch
+loongarch-kvm-avoid-copy_-_user-with-lock-hold-in-kvm_eiointc_regs_access.patch
+loongarch-kvm-avoid-copy_-_user-with-lock-hold-in-kvm_eiointc_sw_status_access.patch
+loongarch-kvm-avoid-copy_-_user-with-lock-hold-in-kvm_pch_pic_regs_access.patch
+loongarch-kvm-fix-vm-migration-failure-with-ptw-enabled.patch
+iommu-vt-d-fix-__domain_mapping-s-usage-of-switch_to_super_page.patch
+iommu-amd-fix-ivrs_base-memleak-in-early_amd_iommu_init.patch
+iommu-amd-pgtbl-fix-possible-race-while-increase-page-table-level.patch
+iommu-s390-fix-memory-corruption-when-using-identity-domain.patch
+iommu-s390-make-attach-succeed-when-the-device-was-surprise-removed.patch
+btrfs-tree-checker-fix-the-incorrect-inode-ref-size-check.patch
+asoc-sdca-add-quirk-for-incorrect-function-types-for-3-systems.patch
+asoc-qcom-audioreach-fix-lpaif_type-configuration-for-the-i2s-interface.patch
+asoc-qcom-q6apm-lpass-dais-fix-null-pointer-dereference-if-source-graph-failed.patch
+asoc-qcom-q6apm-lpass-dais-fix-missing-set_fmt-dai-op-for-i2s.patch
+mmc-mvsdio-fix-dma_unmap_sg-nents-value.patch
+mmc-sdhci-move-the-code-related-to-setting-the-clock-from-sdhci_set_ios_common-into-sdhci_set_ios.patch
+mmc-sdhci-pci-gli-gl9767-fix-initializing-the-uhs-ii-interface-during-a-power-on.patch
+mmc-sdhci-uhs2-fix-calling-incorrect-sdhci_set_clock-function.patch
+x86-sev-guard-sev_evict_cache-with-config_amd_mem_encrypt.patch
+kvm-svm-sync-tpr-from-lapic-into-vmcb-v_tpr-even-if-avic-is-active.patch
+drm-amdkfd-add-proper-handling-for-s0ix.patch
+drm-amdgpu-suspend-kfd-and-kgd-user-queues-for-s0ix.patch
+drm-amd-display-allow-rx6xxx-rx7700-to-invoke-amdgpu_irq_get-put.patch
+drm-amd-only-restore-cached-manual-clock-settings-in-restore-if-od-enabled.patch
+io_uring-io-wq-fix-max_workers-breakage-and-nr_workers-underflow.patch
+io_uring-include-dying-ring-in-task_work-should-cancel-state.patch
+net-rfkill-gpio-fix-crash-due-to-dereferencering-uninitialized-pointer.patch
+gpiolib-acpi-initialize-acpi_gpio_info-struct.patch
+crypto-ccp-always-pass-in-an-error-pointer-to-__sev_platform_shutdown_locked.patch
+rds-ib-increment-i_fastreg_wrs-before-bailing-out.patch
+mptcp-propagate-shutdown-to-subflows-when-possible.patch
+selftests-mptcp-connect-catch-io-errors-on-listen-side.patch
+selftests-mptcp-avoid-spurious-errors-on-tcp-disconnect.patch
+alsa-hda-realtek-fix-mute-led-for-hp-laptop-15-dw4xx.patch
diff --git a/queue-6.16/x86-sev-guard-sev_evict_cache-with-config_amd_mem_encrypt.patch b/queue-6.16/x86-sev-guard-sev_evict_cache-with-config_amd_mem_encrypt.patch

new file mode 100644 (file)

index 0000000..a961684
--- /dev/null
+++ b/queue-6.16/x86-sev-guard-sev_evict_cache-with-config_amd_mem_encrypt.patch
@@ -0,0 +1,95 @@
+From 7f830e126dc357fc086905ce9730140fd4528d66 Mon Sep 17 00:00:00 2001
+From: Tom Lendacky <thomas.lendacky@amd.com>
+Date: Mon, 15 Sep 2025 11:04:12 -0500
+Subject: x86/sev: Guard sev_evict_cache() with CONFIG_AMD_MEM_ENCRYPT
+
+From: Tom Lendacky <thomas.lendacky@amd.com>
+
+commit 7f830e126dc357fc086905ce9730140fd4528d66 upstream.
+
+The sev_evict_cache() is guest-related code and should be guarded by
+CONFIG_AMD_MEM_ENCRYPT, not CONFIG_KVM_AMD_SEV.
+
+CONFIG_AMD_MEM_ENCRYPT=y is required for a guest to run properly as an SEV-SNP
+guest, but a guest kernel built with CONFIG_KVM_AMD_SEV=n would get the stub
+function of sev_evict_cache() instead of the version that performs the actual
+eviction. Move the function declarations under the appropriate #ifdef.
+
+Fixes: 7b306dfa326f ("x86/sev: Evict cache lines during SNP memory validation")
+Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Cc: stable@kernel.org # 6.16.x
+Link: https://lore.kernel.org/r/70e38f2c4a549063de54052c9f64929705313526.1757708959.git.thomas.lendacky@amd.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/sev.h |   38 +++++++++++++++++++-------------------
+ 1 file changed, 19 insertions(+), 19 deletions(-)
+
+--- a/arch/x86/include/asm/sev.h
++++ b/arch/x86/include/asm/sev.h
+@@ -564,6 +564,24 @@ enum es_result sev_es_ghcb_hv_call(struc
+ 
+ extern struct ghcb *boot_ghcb;
+ 
++static inline void sev_evict_cache(void *va, int npages)
++{
++      volatile u8 val __always_unused;
++      u8 *bytes = va;
++      int page_idx;
++
++      /*
++       * For SEV guests, a read from the first/last cache-lines of a 4K page
++       * using the guest key is sufficient to cause a flush of all cache-lines
++       * associated with that 4K page without incurring all the overhead of a
++       * full CLFLUSH sequence.
++       */
++      for (page_idx = 0; page_idx < npages; page_idx++) {
++              val = bytes[page_idx * PAGE_SIZE];
++              val = bytes[page_idx * PAGE_SIZE + PAGE_SIZE - 1];
++      }
++}
++
+ #else /* !CONFIG_AMD_MEM_ENCRYPT */
+ 
+ #define snp_vmpl 0
+@@ -607,6 +625,7 @@ static inline int snp_send_guest_request
+ static inline int snp_svsm_vtpm_send_command(u8 *buffer) { return -ENODEV; }
+ static inline void __init snp_secure_tsc_prepare(void) { }
+ static inline void __init snp_secure_tsc_init(void) { }
++static inline void sev_evict_cache(void *va, int npages) {}
+ 
+ #endif        /* CONFIG_AMD_MEM_ENCRYPT */
+ 
+@@ -621,24 +640,6 @@ int rmp_make_shared(u64 pfn, enum pg_lev
+ void snp_leak_pages(u64 pfn, unsigned int npages);
+ void kdump_sev_callback(void);
+ void snp_fixup_e820_tables(void);
+-
+-static inline void sev_evict_cache(void *va, int npages)
+-{
+-      volatile u8 val __always_unused;
+-      u8 *bytes = va;
+-      int page_idx;
+-
+-      /*
+-       * For SEV guests, a read from the first/last cache-lines of a 4K page
+-       * using the guest key is sufficient to cause a flush of all cache-lines
+-       * associated with that 4K page without incurring all the overhead of a
+-       * full CLFLUSH sequence.
+-       */
+-      for (page_idx = 0; page_idx < npages; page_idx++) {
+-              val = bytes[page_idx * PAGE_SIZE];
+-              val = bytes[page_idx * PAGE_SIZE + PAGE_SIZE - 1];
+-      }
+-}
+ #else
+ static inline bool snp_probe_rmptable_info(void) { return false; }
+ static inline int snp_rmptable_init(void) { return -ENOSYS; }
+@@ -654,7 +655,6 @@ static inline int rmp_make_shared(u64 pf
+ static inline void snp_leak_pages(u64 pfn, unsigned int npages) {}
+ static inline void kdump_sev_callback(void) { }
+ static inline void snp_fixup_e820_tables(void) {}
+-static inline void sev_evict_cache(void *va, int npages) {}
+ #endif
+ 
+ #endif
diff --git a/queue-6.16/zram-fix-slot-write-race-condition.patch b/queue-6.16/zram-fix-slot-write-race-condition.patch

new file mode 100644 (file)

index 0000000..d4cc01f
--- /dev/null
+++ b/queue-6.16/zram-fix-slot-write-race-condition.patch
@@ -0,0 +1,86 @@
+From ce4be9e4307c5a60701ff6e0cafa74caffdc54ce Mon Sep 17 00:00:00 2001
+From: Sergey Senozhatsky <senozhatsky@chromium.org>
+Date: Tue, 9 Sep 2025 13:48:35 +0900
+Subject: zram: fix slot write race condition
+
+From: Sergey Senozhatsky <senozhatsky@chromium.org>
+
+commit ce4be9e4307c5a60701ff6e0cafa74caffdc54ce upstream.
+
+Parallel concurrent writes to the same zram index result in leaked
+zsmalloc handles.  Schematically we can have something like this:
+
+CPU0                              CPU1
+zram_slot_lock()
+zs_free(handle)
+zram_slot_lock()
+                               zram_slot_lock()
+                               zs_free(handle)
+                               zram_slot_lock()
+
+compress                       compress
+handle = zs_malloc()           handle = zs_malloc()
+zram_slot_lock
+zram_set_handle(handle)
+zram_slot_lock
+                               zram_slot_lock
+                               zram_set_handle(handle)
+                               zram_slot_lock
+
+Either CPU0 or CPU1 zsmalloc handle will leak because zs_free() is done
+too early.  In fact, we need to reset zram entry right before we set its
+new handle, all under the same slot lock scope.
+
+Link: https://lkml.kernel.org/r/20250909045150.635345-1-senozhatsky@chromium.org
+Fixes: 71268035f5d7 ("zram: free slot memory early during write")
+Signed-off-by: Sergey Senozhatsky <senozhatsky@chromium.org>
+Reported-by: Changhui Zhong <czhong@redhat.com>
+Closes: https://lore.kernel.org/all/CAGVVp+UtpGoW5WEdEU7uVTtsSCjPN=ksN6EcvyypAtFDOUf30A@mail.gmail.com/
+Tested-by: Changhui Zhong <czhong@redhat.com>
+Cc: Jens Axboe <axboe@kernel.dk>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/block/zram/zram_drv.c |    8 +++-----
+ 1 file changed, 3 insertions(+), 5 deletions(-)
+
+--- a/drivers/block/zram/zram_drv.c
++++ b/drivers/block/zram/zram_drv.c
+@@ -1794,6 +1794,7 @@ static int write_same_filled_page(struct
+                                 u32 index)
+ {
+       zram_slot_lock(zram, index);
++      zram_free_page(zram, index);
+       zram_set_flag(zram, index, ZRAM_SAME);
+       zram_set_handle(zram, index, fill);
+       zram_slot_unlock(zram, index);
+@@ -1831,6 +1832,7 @@ static int write_incompressible_page(str
+       kunmap_local(src);
+ 
+       zram_slot_lock(zram, index);
++      zram_free_page(zram, index);
+       zram_set_flag(zram, index, ZRAM_HUGE);
+       zram_set_handle(zram, index, handle);
+       zram_set_obj_size(zram, index, PAGE_SIZE);
+@@ -1854,11 +1856,6 @@ static int zram_write_page(struct zram *
+       unsigned long element;
+       bool same_filled;
+ 
+-      /* First, free memory allocated to this slot (if any) */
+-      zram_slot_lock(zram, index);
+-      zram_free_page(zram, index);
+-      zram_slot_unlock(zram, index);
+-
+       mem = kmap_local_page(page);
+       same_filled = page_same_filled(mem, &element);
+       kunmap_local(mem);
+@@ -1900,6 +1897,7 @@ static int zram_write_page(struct zram *
+       zcomp_stream_put(zstrm);
+ 
+       zram_slot_lock(zram, index);
++      zram_free_page(zram, index);
+       zram_set_handle(zram, index, handle);
+       zram_set_obj_size(zram, index, comp_len);
+       zram_slot_unlock(zram, index);
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sun, 21 Sep 2025 13:01:24 +0000 (15:01 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sun, 21 Sep 2025 13:01:24 +0000 (15:01 +0200)
queue-6.16/alsa-hda-realtek-fix-mute-led-for-hp-laptop-15-dw4xx.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/asoc-qcom-audioreach-fix-lpaif_type-configuration-for-the-i2s-interface.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/asoc-qcom-q6apm-lpass-dais-fix-missing-set_fmt-dai-op-for-i2s.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/asoc-qcom-q6apm-lpass-dais-fix-null-pointer-dereference-if-source-graph-failed.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/asoc-sdca-add-quirk-for-incorrect-function-types-for-3-systems.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/btrfs-initialize-inode-file_extent_tree-after-i_mode-has-been-set.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/btrfs-tree-checker-fix-the-incorrect-inode-ref-size-check.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/crypto-af_alg-disallow-concurrent-writes-in-af_alg_sendmsg.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/crypto-ccp-always-pass-in-an-error-pointer-to-__sev_platform_shutdown_locked.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/dm-raid-don-t-set-io_min-and-io_opt-for-raid1.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/dm-stripe-fix-a-possible-integer-overflow.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/drm-amd-display-allow-rx6xxx-rx7700-to-invoke-amdgpu_irq_get-put.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/drm-amd-only-restore-cached-manual-clock-settings-in-restore-if-od-enabled.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/drm-amdgpu-suspend-kfd-and-kgd-user-queues-for-s0ix.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/drm-amdkfd-add-proper-handling-for-s0ix.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/gpiolib-acpi-initialize-acpi_gpio_info-struct.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/gup-optimize-longterm-pin_user_pages-for-large-folio.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/io_uring-include-dying-ring-in-task_work-should-cancel-state.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/io_uring-io-wq-fix-max_workers-breakage-and-nr_workers-underflow.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/iommu-amd-fix-ivrs_base-memleak-in-early_amd_iommu_init.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/iommu-amd-pgtbl-fix-possible-race-while-increase-page-table-level.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/iommu-s390-fix-memory-corruption-when-using-identity-domain.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/iommu-s390-make-attach-succeed-when-the-device-was-surprise-removed.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/iommu-vt-d-fix-__domain_mapping-s-usage-of-switch_to_super_page.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/ksmbd-smbdirect-validate-data_offset-and-data_length-field-of-smb_direct_data_transfer.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/ksmbd-smbdirect-verify-remaining_data_length-respects-max_fragmented_recv_size.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/kvm-svm-sync-tpr-from-lapic-into-vmcb-v_tpr-even-if-avic-is-active.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/loongarch-align-acpi-structures-if-arch_strict_align-enabled.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/loongarch-check-the-return-value-when-creating-kobj.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/loongarch-fix-unreliable-stack-for-live-patching.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/loongarch-handle-jump-tables-options-for-rust.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/loongarch-kvm-avoid-copy_-_user-with-lock-hold-in-kvm_eiointc_ctrl_access.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/loongarch-kvm-avoid-copy_-_user-with-lock-hold-in-kvm_eiointc_regs_access.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/loongarch-kvm-avoid-copy_-_user-with-lock-hold-in-kvm_eiointc_sw_status_access.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/loongarch-kvm-avoid-copy_-_user-with-lock-hold-in-kvm_pch_pic_regs_access.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/loongarch-kvm-fix-vm-migration-failure-with-ptw-enabled.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/loongarch-make-lto-case-independent-in-makefile.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/loongarch-update-help-info-of-arch_strict_align.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/loongarch-vdso-check-kcalloc-result-in-init_vdso.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/mm-folio_may_be_lru_cached-unless-folio_test_large.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/mm-gup-check-ref_count-instead-of-lru-before-migration.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/mm-gup-local-lru_add_drain-to-avoid-lru_add_drain_all.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/mm-revert-mm-gup-clear-the-lru-flag-of-a-page-before-adding-to-lru-batch.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/mm-revert-mm-vmscan.c-fix-oom-on-swap-stress-test.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/mmc-mvsdio-fix-dma_unmap_sg-nents-value.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/mmc-sdhci-move-the-code-related-to-setting-the-clock-from-sdhci_set_ios_common-into-sdhci_set_ios.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/mmc-sdhci-pci-gli-gl9767-fix-initializing-the-uhs-ii-interface-during-a-power-on.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/mmc-sdhci-uhs2-fix-calling-incorrect-sdhci_set_clock-function.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/mptcp-propagate-shutdown-to-subflows-when-possible.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/net-rfkill-gpio-fix-crash-due-to-dereferencering-uninitialized-pointer.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/nilfs2-fix-cfi-failure-when-accessing-sys-fs-nilfs2-features.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/objtool-loongarch-mark-special-atomic-instruction-as-insn_bug-type.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/objtool-loongarch-mark-types-based-on-break-immediate-code.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/octeontx2-pf-fix-use-after-free-bugs-in-otx2_sync_ts.patch		patch \| blob \| blame \| history
queue-6.16/power-supply-bq27xxx-fix-error-return-in-case-of-no-bq27000-hdq-battery.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/power-supply-bq27xxx-restrict-no-battery-detection-to-bq27000.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/rds-ib-increment-i_fastreg_wrs-before-bailing-out.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/revert-sched_ext-skip-per-cpu-tasks-in-scx_bpf_reenqueue_local.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/selftests-mptcp-avoid-spurious-errors-on-tcp-disconnect.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/selftests-mptcp-connect-catch-io-errors-on-listen-side.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/series		patch \| blob \| blame \| history
queue-6.16/x86-sev-guard-sev_evict_cache-with-config_amd_mem_encrypt.patch	[new file with mode: 0644]	patch \| blob
queue-6.16/zram-fix-slot-write-race-condition.patch	[new file with mode: 0644]	patch \| blob