From: Greg Kroah-Hartman Date: Sat, 30 Mar 2024 09:37:56 +0000 (+0100) Subject: 6.8-stable patches X-Git-Tag: v6.7.12~117 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=25c5d9f8edea339f5f8d7ebb9b0ff99deb4e89cc;p=thirdparty%2Fkernel%2Fstable-queue.git 6.8-stable patches added patches: alsa-hda-tas2781-add-locks-to-kcontrols.patch alsa-hda-tas2781-remove-digital-gain-kcontrol.patch arm-prctl-reject-pr_set_mdwe-on-pre-armv6.patch btrfs-fix-race-in-read_extent_buffer_pages.patch btrfs-validate-device-maj-min-during-open.patch crypto-iaa-fix-nr_cpus-nr_iaa-case.patch drm-amd-display-prevent-crash-when-disable-stream.patch efi-libstub-cast-away-type-warning-in-use-of-max.patch init-open-initrd.image-with-o_largefile.patch mm-zswap-fix-writeback-shinker-gfp_noio-gfp_nofs-recursion.patch prctl-generalize-pr_set_mdwe-support-check-to-be-per-arch.patch tmpfs-fix-race-on-handling-dquot-rbtree.patch x86-efistub-add-missing-boot_params-for-mixed-mode-compat-entry.patch x86-efistub-reinstate-soft-limit-for-initrd-loading.patch --- diff --git a/queue-6.8/alsa-hda-tas2781-add-locks-to-kcontrols.patch b/queue-6.8/alsa-hda-tas2781-add-locks-to-kcontrols.patch new file mode 100644 index 00000000000..74f1a0dfe1c --- /dev/null +++ b/queue-6.8/alsa-hda-tas2781-add-locks-to-kcontrols.patch @@ -0,0 +1,180 @@ +From 15bc3066d2378eef1b45254be9df23b0dd7f1667 Mon Sep 17 00:00:00 2001 +From: Gergo Koteles +Date: Tue, 26 Mar 2024 17:18:46 +0100 +Subject: ALSA: hda/tas2781: add locks to kcontrols + +From: Gergo Koteles + +commit 15bc3066d2378eef1b45254be9df23b0dd7f1667 upstream. + +The rcabin.profile_cfg_id, cur_prog, cur_conf, force_fwload_status +variables are acccessible from multiple threads and therefore require +locking. + +Fixes: 5be27f1e3ec9 ("ALSA: hda/tas2781: Add tas2781 HDA driver") +CC: stable@vger.kernel.org +Signed-off-by: Gergo Koteles +Message-ID: +Signed-off-by: Takashi Iwai +Signed-off-by: Greg Kroah-Hartman +--- + sound/pci/hda/tas2781_hda_i2c.c | 50 ++++++++++++++++++++++++++++++++++++++-- + 1 file changed, 48 insertions(+), 2 deletions(-) + +--- a/sound/pci/hda/tas2781_hda_i2c.c ++++ b/sound/pci/hda/tas2781_hda_i2c.c +@@ -197,8 +197,12 @@ static int tasdevice_get_profile_id(stru + { + struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol); + ++ mutex_lock(&tas_priv->codec_lock); ++ + ucontrol->value.integer.value[0] = tas_priv->rcabin.profile_cfg_id; + ++ mutex_unlock(&tas_priv->codec_lock); ++ + return 0; + } + +@@ -212,11 +216,15 @@ static int tasdevice_set_profile_id(stru + + val = clamp(nr_profile, 0, max); + ++ mutex_lock(&tas_priv->codec_lock); ++ + if (tas_priv->rcabin.profile_cfg_id != val) { + tas_priv->rcabin.profile_cfg_id = val; + ret = 1; + } + ++ mutex_unlock(&tas_priv->codec_lock); ++ + return ret; + } + +@@ -253,8 +261,12 @@ static int tasdevice_program_get(struct + { + struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol); + ++ mutex_lock(&tas_priv->codec_lock); ++ + ucontrol->value.integer.value[0] = tas_priv->cur_prog; + ++ mutex_unlock(&tas_priv->codec_lock); ++ + return 0; + } + +@@ -269,11 +281,15 @@ static int tasdevice_program_put(struct + + val = clamp(nr_program, 0, max); + ++ mutex_lock(&tas_priv->codec_lock); ++ + if (tas_priv->cur_prog != val) { + tas_priv->cur_prog = val; + ret = 1; + } + ++ mutex_unlock(&tas_priv->codec_lock); ++ + return ret; + } + +@@ -282,8 +298,12 @@ static int tasdevice_config_get(struct s + { + struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol); + ++ mutex_lock(&tas_priv->codec_lock); ++ + ucontrol->value.integer.value[0] = tas_priv->cur_conf; + ++ mutex_unlock(&tas_priv->codec_lock); ++ + return 0; + } + +@@ -298,11 +318,15 @@ static int tasdevice_config_put(struct s + + val = clamp(nr_config, 0, max); + ++ mutex_lock(&tas_priv->codec_lock); ++ + if (tas_priv->cur_conf != val) { + tas_priv->cur_conf = val; + ret = 1; + } + ++ mutex_unlock(&tas_priv->codec_lock); ++ + return ret; + } + +@@ -312,8 +336,15 @@ static int tas2781_amp_getvol(struct snd + struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol); + struct soc_mixer_control *mc = + (struct soc_mixer_control *)kcontrol->private_value; ++ int ret; + +- return tasdevice_amp_getvol(tas_priv, ucontrol, mc); ++ mutex_lock(&tas_priv->codec_lock); ++ ++ ret = tasdevice_amp_getvol(tas_priv, ucontrol, mc); ++ ++ mutex_unlock(&tas_priv->codec_lock); ++ ++ return ret; + } + + static int tas2781_amp_putvol(struct snd_kcontrol *kcontrol, +@@ -322,9 +353,16 @@ static int tas2781_amp_putvol(struct snd + struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol); + struct soc_mixer_control *mc = + (struct soc_mixer_control *)kcontrol->private_value; ++ int ret; ++ ++ mutex_lock(&tas_priv->codec_lock); + + /* The check of the given value is in tasdevice_amp_putvol. */ +- return tasdevice_amp_putvol(tas_priv, ucontrol, mc); ++ ret = tasdevice_amp_putvol(tas_priv, ucontrol, mc); ++ ++ mutex_unlock(&tas_priv->codec_lock); ++ ++ return ret; + } + + static int tas2781_force_fwload_get(struct snd_kcontrol *kcontrol, +@@ -332,10 +370,14 @@ static int tas2781_force_fwload_get(stru + { + struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol); + ++ mutex_lock(&tas_priv->codec_lock); ++ + ucontrol->value.integer.value[0] = (int)tas_priv->force_fwload_status; + dev_dbg(tas_priv->dev, "%s : Force FWload %s\n", __func__, + tas_priv->force_fwload_status ? "ON" : "OFF"); + ++ mutex_unlock(&tas_priv->codec_lock); ++ + return 0; + } + +@@ -345,6 +387,8 @@ static int tas2781_force_fwload_put(stru + struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol); + bool change, val = (bool)ucontrol->value.integer.value[0]; + ++ mutex_lock(&tas_priv->codec_lock); ++ + if (tas_priv->force_fwload_status == val) + change = false; + else { +@@ -354,6 +398,8 @@ static int tas2781_force_fwload_put(stru + dev_dbg(tas_priv->dev, "%s : Force FWload %s\n", __func__, + tas_priv->force_fwload_status ? "ON" : "OFF"); + ++ mutex_unlock(&tas_priv->codec_lock); ++ + return change; + } + diff --git a/queue-6.8/alsa-hda-tas2781-remove-digital-gain-kcontrol.patch b/queue-6.8/alsa-hda-tas2781-remove-digital-gain-kcontrol.patch new file mode 100644 index 00000000000..352fa736d6b --- /dev/null +++ b/queue-6.8/alsa-hda-tas2781-remove-digital-gain-kcontrol.patch @@ -0,0 +1,96 @@ +From ae065d0ce9e36ca4efdfb9b96ce3395bd1c19372 Mon Sep 17 00:00:00 2001 +From: Gergo Koteles +Date: Tue, 26 Mar 2024 17:18:45 +0100 +Subject: ALSA: hda/tas2781: remove digital gain kcontrol + +From: Gergo Koteles + +commit ae065d0ce9e36ca4efdfb9b96ce3395bd1c19372 upstream. + +The "Speaker Digital Gain" kcontrol controls the TAS2781_DVC_LVL (0x1A) +register. Unfortunately the tas2563 does not have DVC_LVL, but has +INT_MASK0 in 0x1A, which has been misused so far. + +Since commit c1947ce61ff4 ("ALSA: hda/realtek: tas2781: enable subwoofer +volume control") the volume of the tas2781 amplifiers can be controlled +by the master volume, so this digital gain kcontrol is not needed. + +Remove it. + +Fixes: 5be27f1e3ec9 ("ALSA: hda/tas2781: Add tas2781 HDA driver") +CC: stable@vger.kernel.org +Signed-off-by: Gergo Koteles +Message-ID: <741fc21db994efd58f83e7aef38931204961e5b2.1711469583.git.soyer@irl.hu> +Signed-off-by: Takashi Iwai +Signed-off-by: Greg Kroah-Hartman +--- + sound/pci/hda/tas2781_hda_i2c.c | 37 +------------------------------------ + 1 file changed, 1 insertion(+), 36 deletions(-) + +--- a/sound/pci/hda/tas2781_hda_i2c.c ++++ b/sound/pci/hda/tas2781_hda_i2c.c +@@ -89,7 +89,7 @@ struct tas2781_hda { + struct snd_kcontrol *dsp_prog_ctl; + struct snd_kcontrol *dsp_conf_ctl; + struct snd_kcontrol *prof_ctl; +- struct snd_kcontrol *snd_ctls[3]; ++ struct snd_kcontrol *snd_ctls[2]; + }; + + static int tas2781_get_i2c_res(struct acpi_resource *ares, void *data) +@@ -306,27 +306,6 @@ static int tasdevice_config_put(struct s + return ret; + } + +-/* +- * tas2781_digital_getvol - get the volum control +- * @kcontrol: control pointer +- * @ucontrol: User data +- * Customer Kcontrol for tas2781 is primarily for regmap booking, paging +- * depends on internal regmap mechanism. +- * tas2781 contains book and page two-level register map, especially +- * book switching will set the register BXXP00R7F, after switching to the +- * correct book, then leverage the mechanism for paging to access the +- * register. +- */ +-static int tas2781_digital_getvol(struct snd_kcontrol *kcontrol, +- struct snd_ctl_elem_value *ucontrol) +-{ +- struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol); +- struct soc_mixer_control *mc = +- (struct soc_mixer_control *)kcontrol->private_value; +- +- return tasdevice_digital_getvol(tas_priv, ucontrol, mc); +-} +- + static int tas2781_amp_getvol(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) + { +@@ -337,17 +316,6 @@ static int tas2781_amp_getvol(struct snd + return tasdevice_amp_getvol(tas_priv, ucontrol, mc); + } + +-static int tas2781_digital_putvol(struct snd_kcontrol *kcontrol, +- struct snd_ctl_elem_value *ucontrol) +-{ +- struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol); +- struct soc_mixer_control *mc = +- (struct soc_mixer_control *)kcontrol->private_value; +- +- /* The check of the given value is in tasdevice_digital_putvol. */ +- return tasdevice_digital_putvol(tas_priv, ucontrol, mc); +-} +- + static int tas2781_amp_putvol(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) + { +@@ -393,9 +361,6 @@ static const struct snd_kcontrol_new tas + ACARD_SINGLE_RANGE_EXT_TLV("Speaker Analog Gain", TAS2781_AMP_LEVEL, + 1, 0, 20, 0, tas2781_amp_getvol, + tas2781_amp_putvol, amp_vol_tlv), +- ACARD_SINGLE_RANGE_EXT_TLV("Speaker Digital Gain", TAS2781_DVC_LVL, +- 0, 0, 200, 1, tas2781_digital_getvol, +- tas2781_digital_putvol, dvc_tlv), + ACARD_SINGLE_BOOL_EXT("Speaker Force Firmware Load", 0, + tas2781_force_fwload_get, tas2781_force_fwload_put), + }; diff --git a/queue-6.8/arm-prctl-reject-pr_set_mdwe-on-pre-armv6.patch b/queue-6.8/arm-prctl-reject-pr_set_mdwe-on-pre-armv6.patch new file mode 100644 index 00000000000..8876d45731f --- /dev/null +++ b/queue-6.8/arm-prctl-reject-pr_set_mdwe-on-pre-armv6.patch @@ -0,0 +1,61 @@ +From 166ce846dc5974a266f6c2a2896dbef5425a6f21 Mon Sep 17 00:00:00 2001 +From: Zev Weiss +Date: Mon, 26 Feb 2024 17:35:42 -0800 +Subject: ARM: prctl: reject PR_SET_MDWE on pre-ARMv6 + +From: Zev Weiss + +commit 166ce846dc5974a266f6c2a2896dbef5425a6f21 upstream. + +On v5 and lower CPUs we can't provide MDWE protection, so ensure we fail +any attempt to enable it via prctl(PR_SET_MDWE). + +Previously such an attempt would misleadingly succeed, leading to any +subsequent mmap(PROT_READ|PROT_WRITE) or execve() failing unconditionally +(the latter somewhat violently via force_fatal_sig(SIGSEGV) due to +READ_IMPLIES_EXEC). + +Link: https://lkml.kernel.org/r/20240227013546.15769-6-zev@bewilderbeest.net +Signed-off-by: Zev Weiss +Cc: [6.3+] +Cc: Borislav Petkov +Cc: David Hildenbrand +Cc: Florent Revest +Cc: Helge Deller +Cc: "James E.J. Bottomley" +Cc: Josh Triplett +Cc: Kees Cook +Cc: Miguel Ojeda +Cc: Mike Rapoport (IBM) +Cc: Oleg Nesterov +Cc: Ondrej Mosnacek +Cc: Rick Edgecombe +Cc: Russell King (Oracle) +Cc: Sam James +Cc: Stefan Roesch +Cc: Yang Shi +Cc: Yin Fengwei +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm/include/asm/mman.h | 14 ++++++++++++++ + 1 file changed, 14 insertions(+) + create mode 100644 arch/arm/include/asm/mman.h + +--- /dev/null ++++ b/arch/arm/include/asm/mman.h +@@ -0,0 +1,14 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++#ifndef __ASM_MMAN_H__ ++#define __ASM_MMAN_H__ ++ ++#include ++#include ++ ++static inline bool arch_memory_deny_write_exec_supported(void) ++{ ++ return cpu_architecture() >= CPU_ARCH_ARMv6; ++} ++#define arch_memory_deny_write_exec_supported arch_memory_deny_write_exec_supported ++ ++#endif /* __ASM_MMAN_H__ */ diff --git a/queue-6.8/btrfs-fix-race-in-read_extent_buffer_pages.patch b/queue-6.8/btrfs-fix-race-in-read_extent_buffer_pages.patch new file mode 100644 index 00000000000..b1ecbbd9184 --- /dev/null +++ b/queue-6.8/btrfs-fix-race-in-read_extent_buffer_pages.patch @@ -0,0 +1,98 @@ +From ef1e68236b9153c27cb7cf29ead0c532870d4215 Mon Sep 17 00:00:00 2001 +From: Tavian Barnes +Date: Fri, 15 Mar 2024 21:14:29 -0400 +Subject: btrfs: fix race in read_extent_buffer_pages() + +From: Tavian Barnes + +commit ef1e68236b9153c27cb7cf29ead0c532870d4215 upstream. + +There are reports from tree-checker that detects corrupted nodes, +without any obvious pattern so possibly an overwrite in memory. +After some debugging it turns out there's a race when reading an extent +buffer the uptodate status can be missed. + +To prevent concurrent reads for the same extent buffer, +read_extent_buffer_pages() performs these checks: + + /* (1) */ + if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) + return 0; + + /* (2) */ + if (test_and_set_bit(EXTENT_BUFFER_READING, &eb->bflags)) + goto done; + +At this point, it seems safe to start the actual read operation. Once +that completes, end_bbio_meta_read() does + + /* (3) */ + set_extent_buffer_uptodate(eb); + + /* (4) */ + clear_bit(EXTENT_BUFFER_READING, &eb->bflags); + +Normally, this is enough to ensure only one read happens, and all other +callers wait for it to finish before returning. Unfortunately, there is +a racey interleaving: + + Thread A | Thread B | Thread C + ---------+----------+--------- + (1) | | + | (1) | + (2) | | + (3) | | + (4) | | + | (2) | + | | (1) + +When this happens, thread B kicks of an unnecessary read. Worse, thread +C will see UPTODATE set and return immediately, while the read from +thread B is still in progress. This race could result in tree-checker +errors like this as the extent buffer is concurrently modified: + + BTRFS critical (device dm-0): corrupted node, root=256 + block=8550954455682405139 owner mismatch, have 11858205567642294356 + expect [256, 18446744073709551360] + +Fix it by testing UPTODATE again after setting the READING bit, and if +it's been set, skip the unnecessary read. + +Fixes: d7172f52e993 ("btrfs: use per-buffer locking for extent_buffer reading") +Link: https://lore.kernel.org/linux-btrfs/CAHk-=whNdMaN9ntZ47XRKP6DBes2E5w7fi-0U3H2+PS18p+Pzw@mail.gmail.com/ +Link: https://lore.kernel.org/linux-btrfs/f51a6d5d7432455a6a858d51b49ecac183e0bbc9.1706312914.git.wqu@suse.com/ +Link: https://lore.kernel.org/linux-btrfs/c7241ea4-fcc6-48d2-98c8-b5ea790d6c89@gmx.com/ +CC: stable@vger.kernel.org # 6.5+ +Reviewed-by: Qu Wenruo +Reviewed-by: Christoph Hellwig +Signed-off-by: Tavian Barnes +Reviewed-by: David Sterba +[ minor update of changelog ] +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/extent_io.c | 13 +++++++++++++ + 1 file changed, 13 insertions(+) + +--- a/fs/btrfs/extent_io.c ++++ b/fs/btrfs/extent_io.c +@@ -4319,6 +4319,19 @@ int read_extent_buffer_pages(struct exte + if (test_and_set_bit(EXTENT_BUFFER_READING, &eb->bflags)) + goto done; + ++ /* ++ * Between the initial test_bit(EXTENT_BUFFER_UPTODATE) and the above ++ * test_and_set_bit(EXTENT_BUFFER_READING), someone else could have ++ * started and finished reading the same eb. In this case, UPTODATE ++ * will now be set, and we shouldn't read it in again. ++ */ ++ if (unlikely(test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))) { ++ clear_bit(EXTENT_BUFFER_READING, &eb->bflags); ++ smp_mb__after_atomic(); ++ wake_up_bit(&eb->bflags, EXTENT_BUFFER_READING); ++ return 0; ++ } ++ + clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags); + eb->read_mirror = 0; + check_buffer_tree_ref(eb); diff --git a/queue-6.8/btrfs-validate-device-maj-min-during-open.patch b/queue-6.8/btrfs-validate-device-maj-min-during-open.patch new file mode 100644 index 00000000000..65be691e798 --- /dev/null +++ b/queue-6.8/btrfs-validate-device-maj-min-during-open.patch @@ -0,0 +1,56 @@ +From 9f7eb8405dcbc79c5434821e9e3e92abe187ee8e Mon Sep 17 00:00:00 2001 +From: Anand Jain +Date: Fri, 1 Mar 2024 08:42:13 +0800 +Subject: btrfs: validate device maj:min during open + +From: Anand Jain + +commit 9f7eb8405dcbc79c5434821e9e3e92abe187ee8e upstream. + +Boris managed to create a device capable of changing its maj:min without +altering its device path. + +Only multi-devices can be scanned. A device that gets scanned and remains +in the btrfs kernel cache might end up with an incorrect maj:min. + +Despite the temp-fsid feature patch did not introduce this bug, it could +lead to issues if the above multi-device is converted to a single device +with a stale maj:min. Subsequently, attempting to mount the same device +with the correct maj:min might mistake it for another device with the same +fsid, potentially resulting in wrongly auto-enabling the temp-fsid feature. + +To address this, this patch validates the device's maj:min at the time of +device open and updates it if it has changed since the last scan. + +CC: stable@vger.kernel.org # 6.7+ +Fixes: a5b8a5f9f835 ("btrfs: support cloned-device mount capability") +Reported-by: Boris Burkov +Co-developed-by: Boris Burkov +Reviewed-by: Boris Burkov # +Signed-off-by: Anand Jain +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/volumes.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +--- a/fs/btrfs/volumes.c ++++ b/fs/btrfs/volumes.c +@@ -694,6 +694,16 @@ static int btrfs_open_one_device(struct + device->bdev = bdev_handle->bdev; + clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state); + ++ if (device->devt != device->bdev->bd_dev) { ++ btrfs_warn(NULL, ++ "device %s maj:min changed from %d:%d to %d:%d", ++ device->name->str, MAJOR(device->devt), ++ MINOR(device->devt), MAJOR(device->bdev->bd_dev), ++ MINOR(device->bdev->bd_dev)); ++ ++ device->devt = device->bdev->bd_dev; ++ } ++ + fs_devices->open_devices++; + if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) && + device->devid != BTRFS_DEV_REPLACE_DEVID) { diff --git a/queue-6.8/crypto-iaa-fix-nr_cpus-nr_iaa-case.patch b/queue-6.8/crypto-iaa-fix-nr_cpus-nr_iaa-case.patch new file mode 100644 index 00000000000..ae8a7e969ce --- /dev/null +++ b/queue-6.8/crypto-iaa-fix-nr_cpus-nr_iaa-case.patch @@ -0,0 +1,52 @@ +From 5a7e89d3315d1be86aff8a8bf849023cda6547f7 Mon Sep 17 00:00:00 2001 +From: Tom Zanussi +Date: Thu, 21 Mar 2024 16:08:45 -0500 +Subject: crypto: iaa - Fix nr_cpus < nr_iaa case + +From: Tom Zanussi + +commit 5a7e89d3315d1be86aff8a8bf849023cda6547f7 upstream. + +If nr_cpus < nr_iaa, the calculated cpus_per_iaa will be 0, which +causes a divide-by-0 in rebalance_wq_table(). + +Make sure cpus_per_iaa is 1 in that case, and also in the nr_iaa == 0 +case, even though cpus_per_iaa is never used if nr_iaa == 0, for +paranoia. + +Cc: # v6.8+ +Reported-by: Jerry Snitselaar +Signed-off-by: Tom Zanussi +Signed-off-by: Herbert Xu +Signed-off-by: Greg Kroah-Hartman +--- + drivers/crypto/intel/iaa/iaa_crypto_main.c | 10 +++++++--- + 1 file changed, 7 insertions(+), 3 deletions(-) + +--- a/drivers/crypto/intel/iaa/iaa_crypto_main.c ++++ b/drivers/crypto/intel/iaa/iaa_crypto_main.c +@@ -908,6 +908,8 @@ static int save_iaa_wq(struct idxd_wq *w + return -EINVAL; + + cpus_per_iaa = (nr_nodes * nr_cpus_per_node) / nr_iaa; ++ if (!cpus_per_iaa) ++ cpus_per_iaa = 1; + out: + return 0; + } +@@ -923,10 +925,12 @@ static void remove_iaa_wq(struct idxd_wq + } + } + +- if (nr_iaa) ++ if (nr_iaa) { + cpus_per_iaa = (nr_nodes * nr_cpus_per_node) / nr_iaa; +- else +- cpus_per_iaa = 0; ++ if (!cpus_per_iaa) ++ cpus_per_iaa = 1; ++ } else ++ cpus_per_iaa = 1; + } + + static int wq_table_add_wqs(int iaa, int cpu) diff --git a/queue-6.8/drm-amd-display-prevent-crash-when-disable-stream.patch b/queue-6.8/drm-amd-display-prevent-crash-when-disable-stream.patch new file mode 100644 index 00000000000..b553ba98c14 --- /dev/null +++ b/queue-6.8/drm-amd-display-prevent-crash-when-disable-stream.patch @@ -0,0 +1,40 @@ +From 72d72e8fddbcd6c98e1b02d32cf6f2b04e10bd1c Mon Sep 17 00:00:00 2001 +From: Chris Park +Date: Tue, 5 Mar 2024 17:41:15 -0500 +Subject: drm/amd/display: Prevent crash when disable stream + +From: Chris Park + +commit 72d72e8fddbcd6c98e1b02d32cf6f2b04e10bd1c upstream. + +[Why] +Disabling stream encoder invokes a function that no longer exists. + +[How] +Check if the function declaration is NULL in disable stream encoder. + +Cc: Mario Limonciello +Cc: Alex Deucher +Cc: stable@vger.kernel.org +Reviewed-by: Charlene Liu +Acked-by: Wayne Lin +Signed-off-by: Chris Park +Tested-by: Daniel Wheeler +Signed-off-by: Alex Deucher +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c ++++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +@@ -1185,7 +1185,8 @@ void dce110_disable_stream(struct pipe_c + if (dccg) { + dccg->funcs->disable_symclk32_se(dccg, dp_hpo_inst); + dccg->funcs->set_dpstreamclk(dccg, REFCLK, tg->inst, dp_hpo_inst); +- dccg->funcs->set_dtbclk_dto(dccg, &dto_params); ++ if (dccg && dccg->funcs->set_dtbclk_dto) ++ dccg->funcs->set_dtbclk_dto(dccg, &dto_params); + } + } else if (dccg && dccg->funcs->disable_symclk_se) { + dccg->funcs->disable_symclk_se(dccg, stream_enc->stream_enc_inst, diff --git a/queue-6.8/efi-libstub-cast-away-type-warning-in-use-of-max.patch b/queue-6.8/efi-libstub-cast-away-type-warning-in-use-of-max.patch new file mode 100644 index 00000000000..a353607f134 --- /dev/null +++ b/queue-6.8/efi-libstub-cast-away-type-warning-in-use-of-max.patch @@ -0,0 +1,30 @@ +From 61d130f261a3c15ae2c4b6f3ac3517d5d5b78855 Mon Sep 17 00:00:00 2001 +From: Ard Biesheuvel +Date: Tue, 26 Mar 2024 11:15:25 +0100 +Subject: efi/libstub: Cast away type warning in use of max() + +From: Ard Biesheuvel + +commit 61d130f261a3c15ae2c4b6f3ac3517d5d5b78855 upstream. + +Avoid a type mismatch warning in max() by switching to max_t() and +providing the type explicitly. + +Fixes: 3cb4a4827596abc82e ("efi/libstub: fix efi_random_alloc() ...") +Signed-off-by: Ard Biesheuvel +Signed-off-by: Greg Kroah-Hartman +--- + drivers/firmware/efi/libstub/randomalloc.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/firmware/efi/libstub/randomalloc.c ++++ b/drivers/firmware/efi/libstub/randomalloc.c +@@ -120,7 +120,7 @@ efi_status_t efi_random_alloc(unsigned l + continue; + } + +- target = round_up(max(md->phys_addr, alloc_min), align) + target_slot * align; ++ target = round_up(max_t(u64, md->phys_addr, alloc_min), align) + target_slot * align; + pages = size / EFI_PAGE_SIZE; + + status = efi_bs_call(allocate_pages, EFI_ALLOCATE_ADDRESS, diff --git a/queue-6.8/init-open-initrd.image-with-o_largefile.patch b/queue-6.8/init-open-initrd.image-with-o_largefile.patch new file mode 100644 index 00000000000..57d45ca5cc3 --- /dev/null +++ b/queue-6.8/init-open-initrd.image-with-o_largefile.patch @@ -0,0 +1,36 @@ +From 4624b346cf67400ef46a31771011fb798dd2f999 Mon Sep 17 00:00:00 2001 +From: John Sperbeck +Date: Sun, 17 Mar 2024 15:15:22 -0700 +Subject: init: open /initrd.image with O_LARGEFILE + +From: John Sperbeck + +commit 4624b346cf67400ef46a31771011fb798dd2f999 upstream. + +If initrd data is larger than 2Gb, we'll eventually fail to write to the +/initrd.image file when we hit that limit, unless O_LARGEFILE is set. + +Link: https://lkml.kernel.org/r/20240317221522.896040-1-jsperbeck@google.com +Signed-off-by: John Sperbeck +Cc: Jens Axboe +Cc: Nick Desaulniers +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + init/initramfs.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/init/initramfs.c ++++ b/init/initramfs.c +@@ -683,7 +683,7 @@ static void __init populate_initrd_image + + printk(KERN_INFO "rootfs image is not initramfs (%s); looks like an initrd\n", + err); +- file = filp_open("/initrd.image", O_WRONLY | O_CREAT, 0700); ++ file = filp_open("/initrd.image", O_WRONLY|O_CREAT|O_LARGEFILE, 0700); + if (IS_ERR(file)) + return; + diff --git a/queue-6.8/mm-zswap-fix-writeback-shinker-gfp_noio-gfp_nofs-recursion.patch b/queue-6.8/mm-zswap-fix-writeback-shinker-gfp_noio-gfp_nofs-recursion.patch new file mode 100644 index 00000000000..1d518fc6044 --- /dev/null +++ b/queue-6.8/mm-zswap-fix-writeback-shinker-gfp_noio-gfp_nofs-recursion.patch @@ -0,0 +1,95 @@ +From 30fb6a8d9e3378919f378f9bf561142b4a6d2637 Mon Sep 17 00:00:00 2001 +From: Johannes Weiner +Date: Thu, 21 Mar 2024 14:25:32 -0400 +Subject: mm: zswap: fix writeback shinker GFP_NOIO/GFP_NOFS recursion +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Johannes Weiner + +commit 30fb6a8d9e3378919f378f9bf561142b4a6d2637 upstream. + +Kent forwards this bug report of zswap re-entering the block layer +from an IO request allocation and locking up: + +[10264.128242] sysrq: Show Blocked State +[10264.128268] task:kworker/20:0H state:D stack:0 pid:143 tgid:143 ppid:2 flags:0x00004000 +[10264.128271] Workqueue: bcachefs_io btree_write_submit [bcachefs] +[10264.128295] Call Trace: +[10264.128295] +[10264.128297] __schedule+0x3e6/0x1520 +[10264.128303] schedule+0x32/0xd0 +[10264.128304] schedule_timeout+0x98/0x160 +[10264.128308] io_schedule_timeout+0x50/0x80 +[10264.128309] wait_for_completion_io_timeout+0x7f/0x180 +[10264.128310] submit_bio_wait+0x78/0xb0 +[10264.128313] swap_writepage_bdev_sync+0xf6/0x150 +[10264.128317] zswap_writeback_entry+0xf2/0x180 +[10264.128319] shrink_memcg_cb+0xe7/0x2f0 +[10264.128322] __list_lru_walk_one+0xb9/0x1d0 +[10264.128325] list_lru_walk_one+0x5d/0x90 +[10264.128326] zswap_shrinker_scan+0xc4/0x130 +[10264.128327] do_shrink_slab+0x13f/0x360 +[10264.128328] shrink_slab+0x28e/0x3c0 +[10264.128329] shrink_one+0x123/0x1b0 +[10264.128331] shrink_node+0x97e/0xbc0 +[10264.128332] do_try_to_free_pages+0xe7/0x5b0 +[10264.128333] try_to_free_pages+0xe1/0x200 +[10264.128334] __alloc_pages_slowpath.constprop.0+0x343/0xde0 +[10264.128337] __alloc_pages+0x32d/0x350 +[10264.128338] allocate_slab+0x400/0x460 +[10264.128339] ___slab_alloc+0x40d/0xa40 +[10264.128345] kmem_cache_alloc+0x2e7/0x330 +[10264.128348] mempool_alloc+0x86/0x1b0 +[10264.128349] bio_alloc_bioset+0x200/0x4f0 +[10264.128352] bio_alloc_clone+0x23/0x60 +[10264.128354] alloc_io+0x26/0xf0 [dm_mod 7e9e6b44df4927f93fb3e4b5c782767396f58382] +[10264.128361] dm_submit_bio+0xb8/0x580 [dm_mod 7e9e6b44df4927f93fb3e4b5c782767396f58382] +[10264.128366] __submit_bio+0xb0/0x170 +[10264.128367] submit_bio_noacct_nocheck+0x159/0x370 +[10264.128368] bch2_submit_wbio_replicas+0x21c/0x3a0 [bcachefs 85f1b9a7a824f272eff794653a06dde1a94439f2] +[10264.128391] btree_write_submit+0x1cf/0x220 [bcachefs 85f1b9a7a824f272eff794653a06dde1a94439f2] +[10264.128406] process_one_work+0x178/0x350 +[10264.128408] worker_thread+0x30f/0x450 +[10264.128409] kthread+0xe5/0x120 + +The zswap shrinker resumes the swap_writepage()s that were intercepted +by the zswap store. This will enter the block layer, and may even +enter the filesystem depending on the swap backing file. + +Make it respect GFP_NOIO and GFP_NOFS. + +Link: https://lore.kernel.org/linux-mm/rc4pk2r42oyvjo4dc62z6sovquyllq56i5cdgcaqbd7wy3hfzr@n4nbxido3fme/ +Link: https://lkml.kernel.org/r/20240321182532.60000-1-hannes@cmpxchg.org +Fixes: b5ba474f3f51 ("zswap: shrink zswap pool based on memory pressure") +Signed-off-by: Johannes Weiner +Reported-by: Kent Overstreet +Acked-by: Yosry Ahmed +Reported-by: Jérôme Poulin +Reviewed-by: Nhat Pham +Reviewed-by: Chengming Zhou +Cc: stable@vger.kernel.org [v6.8] +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/zswap.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +--- a/mm/zswap.c ++++ b/mm/zswap.c +@@ -645,6 +645,14 @@ static unsigned long zswap_shrinker_coun + if (!zswap_shrinker_enabled || !mem_cgroup_zswap_writeback_enabled(memcg)) + return 0; + ++ /* ++ * The shrinker resumes swap writeback, which will enter block ++ * and may enter fs. XXX: Harmonize with vmscan.c __GFP_FS ++ * rules (may_enter_fs()), which apply on a per-folio basis. ++ */ ++ if (!gfp_has_io_fs(sc->gfp_mask)) ++ return 0; ++ + #ifdef CONFIG_MEMCG_KMEM + mem_cgroup_flush_stats(memcg); + nr_backing = memcg_page_state(memcg, MEMCG_ZSWAP_B) >> PAGE_SHIFT; diff --git a/queue-6.8/prctl-generalize-pr_set_mdwe-support-check-to-be-per-arch.patch b/queue-6.8/prctl-generalize-pr_set_mdwe-support-check-to-be-per-arch.patch new file mode 100644 index 00000000000..7171fca0e9f --- /dev/null +++ b/queue-6.8/prctl-generalize-pr_set_mdwe-support-check-to-be-per-arch.patch @@ -0,0 +1,118 @@ +From d5aad4c2ca057e760a92a9a7d65bd38d72963f27 Mon Sep 17 00:00:00 2001 +From: Zev Weiss +Date: Mon, 26 Feb 2024 17:35:41 -0800 +Subject: prctl: generalize PR_SET_MDWE support check to be per-arch + +From: Zev Weiss + +commit d5aad4c2ca057e760a92a9a7d65bd38d72963f27 upstream. + +Patch series "ARM: prctl: Reject PR_SET_MDWE where not supported". + +I noticed after a recent kernel update that my ARM926 system started +segfaulting on any execve() after calling prctl(PR_SET_MDWE). After some +investigation it appears that ARMv5 is incapable of providing the +appropriate protections for MDWE, since any readable memory is also +implicitly executable. + +The prctl_set_mdwe() function already had some special-case logic added +disabling it on PARISC (commit 793838138c15, "prctl: Disable +prctl(PR_SET_MDWE) on parisc"); this patch series (1) generalizes that +check to use an arch_*() function, and (2) adds a corresponding override +for ARM to disable MDWE on pre-ARMv6 CPUs. + +With the series applied, prctl(PR_SET_MDWE) is rejected on ARMv5 and +subsequent execve() calls (as well as mmap(PROT_READ|PROT_WRITE)) can +succeed instead of unconditionally failing; on ARMv6 the prctl works as it +did previously. + +[0] https://lore.kernel.org/all/2023112456-linked-nape-bf19@gregkh/ + + +This patch (of 2): + +There exist systems other than PARISC where MDWE may not be feasible to +support; rather than cluttering up the generic code with additional +arch-specific logic let's add a generic function for checking MDWE support +and allow each arch to override it as needed. + +Link: https://lkml.kernel.org/r/20240227013546.15769-4-zev@bewilderbeest.net +Link: https://lkml.kernel.org/r/20240227013546.15769-5-zev@bewilderbeest.net +Signed-off-by: Zev Weiss +Acked-by: Helge Deller [parisc] +Cc: Borislav Petkov +Cc: David Hildenbrand +Cc: Florent Revest +Cc: "James E.J. Bottomley" +Cc: Josh Triplett +Cc: Kees Cook +Cc: Miguel Ojeda +Cc: Mike Rapoport (IBM) +Cc: Oleg Nesterov +Cc: Ondrej Mosnacek +Cc: Rick Edgecombe +Cc: Russell King (Oracle) +Cc: Sam James +Cc: Stefan Roesch +Cc: Yang Shi +Cc: Yin Fengwei +Cc: [6.3+] +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + arch/parisc/include/asm/mman.h | 14 ++++++++++++++ + include/linux/mman.h | 8 ++++++++ + kernel/sys.c | 7 +++++-- + 3 files changed, 27 insertions(+), 2 deletions(-) + create mode 100644 arch/parisc/include/asm/mman.h + +--- /dev/null ++++ b/arch/parisc/include/asm/mman.h +@@ -0,0 +1,14 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++#ifndef __ASM_MMAN_H__ ++#define __ASM_MMAN_H__ ++ ++#include ++ ++/* PARISC cannot allow mdwe as it needs writable stacks */ ++static inline bool arch_memory_deny_write_exec_supported(void) ++{ ++ return false; ++} ++#define arch_memory_deny_write_exec_supported arch_memory_deny_write_exec_supported ++ ++#endif /* __ASM_MMAN_H__ */ +--- a/include/linux/mman.h ++++ b/include/linux/mman.h +@@ -162,6 +162,14 @@ calc_vm_flag_bits(unsigned long flags) + + unsigned long vm_commit_limit(void); + ++#ifndef arch_memory_deny_write_exec_supported ++static inline bool arch_memory_deny_write_exec_supported(void) ++{ ++ return true; ++} ++#define arch_memory_deny_write_exec_supported arch_memory_deny_write_exec_supported ++#endif ++ + /* + * Denies creating a writable executable mapping or gaining executable permissions. + * +--- a/kernel/sys.c ++++ b/kernel/sys.c +@@ -2408,8 +2408,11 @@ static inline int prctl_set_mdwe(unsigne + if (bits & PR_MDWE_NO_INHERIT && !(bits & PR_MDWE_REFUSE_EXEC_GAIN)) + return -EINVAL; + +- /* PARISC cannot allow mdwe as it needs writable stacks */ +- if (IS_ENABLED(CONFIG_PARISC)) ++ /* ++ * EOPNOTSUPP might be more appropriate here in principle, but ++ * existing userspace depends on EINVAL specifically. ++ */ ++ if (!arch_memory_deny_write_exec_supported()) + return -EINVAL; + + current_bits = get_current_mdwe(); diff --git a/queue-6.8/series b/queue-6.8/series index 4726e4ba6d7..48a1e4e1b4a 100644 --- a/queue-6.8/series +++ b/queue-6.8/series @@ -279,3 +279,17 @@ drm-i915-replace-a-memset-with-zero-initialization.patch drm-i915-try-to-preserve-the-current-shared_dpll-for-fastset-on-type-c-ports.patch drm-i915-include-the-pll-name-in-the-debug-messages.patch drm-i915-suppress-old-pll-pipe_mask-checks-for-mg-tc-tbt-plls.patch +crypto-iaa-fix-nr_cpus-nr_iaa-case.patch +drm-amd-display-prevent-crash-when-disable-stream.patch +alsa-hda-tas2781-remove-digital-gain-kcontrol.patch +alsa-hda-tas2781-add-locks-to-kcontrols.patch +mm-zswap-fix-writeback-shinker-gfp_noio-gfp_nofs-recursion.patch +init-open-initrd.image-with-o_largefile.patch +x86-efistub-add-missing-boot_params-for-mixed-mode-compat-entry.patch +efi-libstub-cast-away-type-warning-in-use-of-max.patch +x86-efistub-reinstate-soft-limit-for-initrd-loading.patch +prctl-generalize-pr_set_mdwe-support-check-to-be-per-arch.patch +arm-prctl-reject-pr_set_mdwe-on-pre-armv6.patch +tmpfs-fix-race-on-handling-dquot-rbtree.patch +btrfs-validate-device-maj-min-during-open.patch +btrfs-fix-race-in-read_extent_buffer_pages.patch diff --git a/queue-6.8/tmpfs-fix-race-on-handling-dquot-rbtree.patch b/queue-6.8/tmpfs-fix-race-on-handling-dquot-rbtree.patch new file mode 100644 index 00000000000..178c8890270 --- /dev/null +++ b/queue-6.8/tmpfs-fix-race-on-handling-dquot-rbtree.patch @@ -0,0 +1,100 @@ +From 0a69b6b3a026543bc215ccc866d0aea5579e6ce2 Mon Sep 17 00:00:00 2001 +From: Carlos Maiolino +Date: Wed, 20 Mar 2024 13:39:59 +0100 +Subject: tmpfs: fix race on handling dquot rbtree + +From: Carlos Maiolino + +commit 0a69b6b3a026543bc215ccc866d0aea5579e6ce2 upstream. + +A syzkaller reproducer found a race while attempting to remove dquot +information from the rb tree. + +Fetching the rb_tree root node must also be protected by the +dqopt->dqio_sem, otherwise, giving the right timing, shmem_release_dquot() +will trigger a warning because it couldn't find a node in the tree, when +the real reason was the root node changing before the search starts: + +Thread 1 Thread 2 +- shmem_release_dquot() - shmem_{acquire,release}_dquot() + +- fetch ROOT - Fetch ROOT + + - acquire dqio_sem +- wait dqio_sem + + - do something, triger a tree rebalance + - release dqio_sem + +- acquire dqio_sem +- start searching for the node, but + from the wrong location, missing + the node, and triggering a warning. + +Link: https://lkml.kernel.org/r/20240320124011.398847-1-cem@kernel.org +Fixes: eafc474e2029 ("shmem: prepare shmem quota infrastructure") +Signed-off-by: Carlos Maiolino +Reported-by: Ubisectech Sirius +Reviewed-by: Jan Kara +Cc: Hugh Dickins +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/shmem_quota.c | 10 +++++++--- + 1 file changed, 7 insertions(+), 3 deletions(-) + +--- a/mm/shmem_quota.c ++++ b/mm/shmem_quota.c +@@ -116,7 +116,7 @@ static int shmem_free_file_info(struct s + static int shmem_get_next_id(struct super_block *sb, struct kqid *qid) + { + struct mem_dqinfo *info = sb_dqinfo(sb, qid->type); +- struct rb_node *node = ((struct rb_root *)info->dqi_priv)->rb_node; ++ struct rb_node *node; + qid_t id = from_kqid(&init_user_ns, *qid); + struct quota_info *dqopt = sb_dqopt(sb); + struct quota_id *entry = NULL; +@@ -126,6 +126,7 @@ static int shmem_get_next_id(struct supe + return -ESRCH; + + down_read(&dqopt->dqio_sem); ++ node = ((struct rb_root *)info->dqi_priv)->rb_node; + while (node) { + entry = rb_entry(node, struct quota_id, node); + +@@ -165,7 +166,7 @@ out_unlock: + static int shmem_acquire_dquot(struct dquot *dquot) + { + struct mem_dqinfo *info = sb_dqinfo(dquot->dq_sb, dquot->dq_id.type); +- struct rb_node **n = &((struct rb_root *)info->dqi_priv)->rb_node; ++ struct rb_node **n; + struct shmem_sb_info *sbinfo = dquot->dq_sb->s_fs_info; + struct rb_node *parent = NULL, *new_node = NULL; + struct quota_id *new_entry, *entry; +@@ -176,6 +177,8 @@ static int shmem_acquire_dquot(struct dq + mutex_lock(&dquot->dq_lock); + + down_write(&dqopt->dqio_sem); ++ n = &((struct rb_root *)info->dqi_priv)->rb_node; ++ + while (*n) { + parent = *n; + entry = rb_entry(parent, struct quota_id, node); +@@ -264,7 +267,7 @@ static bool shmem_is_empty_dquot(struct + static int shmem_release_dquot(struct dquot *dquot) + { + struct mem_dqinfo *info = sb_dqinfo(dquot->dq_sb, dquot->dq_id.type); +- struct rb_node *node = ((struct rb_root *)info->dqi_priv)->rb_node; ++ struct rb_node *node; + qid_t id = from_kqid(&init_user_ns, dquot->dq_id); + struct quota_info *dqopt = sb_dqopt(dquot->dq_sb); + struct quota_id *entry = NULL; +@@ -275,6 +278,7 @@ static int shmem_release_dquot(struct dq + goto out_dqlock; + + down_write(&dqopt->dqio_sem); ++ node = ((struct rb_root *)info->dqi_priv)->rb_node; + while (node) { + entry = rb_entry(node, struct quota_id, node); + diff --git a/queue-6.8/x86-efistub-add-missing-boot_params-for-mixed-mode-compat-entry.patch b/queue-6.8/x86-efistub-add-missing-boot_params-for-mixed-mode-compat-entry.patch new file mode 100644 index 00000000000..c87bae3f6c7 --- /dev/null +++ b/queue-6.8/x86-efistub-add-missing-boot_params-for-mixed-mode-compat-entry.patch @@ -0,0 +1,142 @@ +From d21f5a59ea773826cc489acb287811d690b703cc Mon Sep 17 00:00:00 2001 +From: Ard Biesheuvel +Date: Sun, 24 Mar 2024 17:10:53 +0100 +Subject: x86/efistub: Add missing boot_params for mixed mode compat entry + +From: Ard Biesheuvel + +commit d21f5a59ea773826cc489acb287811d690b703cc upstream. + +The pure EFI stub entry point does not take a struct boot_params from +the boot loader, but creates it from scratch, and populates only the +fields that still have meaning in this context (command line, initrd +base and size, etc) + +The original mixed mode implementation used the EFI handover protocol +instead, where the boot loader (i.e., GRUB) populates a boot_params +struct and passes it to a special Linux specific EFI entry point that +takes the boot_params pointer as its third argument. + +When the new mixed mode implementation was introduced, using a special +32-bit PE entrypoint in the 64-bit kernel, it adopted the pure approach, +and relied on the EFI stub to create the struct boot_params. This is +preferred because it makes the bootloader side much easier to implement, +as it does not need any x86-specific knowledge on how struct boot_params +and struct setup_header are put together. This mixed mode implementation +was adopted by systemd-boot version 252 and later. + +When commit + + e2ab9eab324c ("x86/boot/compressed: Move 32-bit entrypoint code into .text section") + +refactored this code and moved it out of head_64.S, the fact that ESI +was populated with the address of the base of the image was overlooked, +and to simplify the code flow, ESI is now zeroed and stored to memory +unconditionally in shared code, so that the NULL-ness of that variable +can still be used later to determine which mixed mode boot protocol is +in use. + +With ESI pointing to the base of the image, it can serve as a struct +boot_params pointer for startup_32(), which only accesses the init_data +and kernel_alignment fields (and the scratch field as a temporary +stack). Zeroing ESI means that those accesses produce garbage now, even +though things appear to work if the first page of memory happens to be +zeroed, and the region right before LOAD_PHYSICAL_ADDR (== 16 MiB) +happens to be free. + +The solution is to pass a special, temporary struct boot_params to +startup_32() via ESI, one that is sufficient for getting it to create +the page tables correctly and is discarded right after. This involves +setting a minimal alignment of 4k, only to get the statically allocated +page tables line up correctly, and setting init_size to the executable +image size (_end - startup_32). This ensures that the page tables are +covered by the static footprint of the PE image. + +Given that EFI boot no longer calls the decompressor and no longer pads +the image to permit the decompressor to execute in place, the same +temporary struct boot_params should be used in the EFI handover protocol +based mixed mode implementation as well, to prevent the page tables from +being placed outside of allocated memory. + +Fixes: e2ab9eab324c ("x86/boot/compressed: Move 32-bit entrypoint code into .text section") +Cc: # v6.1+ +Closes: https://lore.kernel.org/all/20240321150510.GI8211@craftyguy.net/ +Reported-by: Clayton Craft +Tested-by: Clayton Craft +Tested-by: Hans de Goede +Signed-off-by: Ard Biesheuvel +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/boot/compressed/efi_mixed.S | 20 +++++++++++++++----- + 1 file changed, 15 insertions(+), 5 deletions(-) + +--- a/arch/x86/boot/compressed/efi_mixed.S ++++ b/arch/x86/boot/compressed/efi_mixed.S +@@ -15,10 +15,12 @@ + */ + + #include ++#include + #include + #include + #include + #include ++#include + + .code64 + .text +@@ -149,6 +151,7 @@ SYM_FUNC_END(__efi64_thunk) + SYM_FUNC_START(efi32_stub_entry) + call 1f + 1: popl %ecx ++ leal (efi32_boot_args - 1b)(%ecx), %ebx + + /* Clear BSS */ + xorl %eax, %eax +@@ -163,6 +166,7 @@ SYM_FUNC_START(efi32_stub_entry) + popl %ecx + popl %edx + popl %esi ++ movl %esi, 8(%ebx) + jmp efi32_entry + SYM_FUNC_END(efi32_stub_entry) + #endif +@@ -239,8 +243,6 @@ SYM_FUNC_END(efi_enter32) + * + * Arguments: %ecx image handle + * %edx EFI system table pointer +- * %esi struct bootparams pointer (or NULL when not using +- * the EFI handover protocol) + * + * Since this is the point of no return for ordinary execution, no registers + * are considered live except for the function parameters. [Note that the EFI +@@ -266,9 +268,18 @@ SYM_FUNC_START_LOCAL(efi32_entry) + leal (efi32_boot_args - 1b)(%ebx), %ebx + movl %ecx, 0(%ebx) + movl %edx, 4(%ebx) +- movl %esi, 8(%ebx) + movb $0x0, 12(%ebx) // efi_is64 + ++ /* ++ * Allocate some memory for a temporary struct boot_params, which only ++ * needs the minimal pieces that startup_32() relies on. ++ */ ++ subl $PARAM_SIZE, %esp ++ movl %esp, %esi ++ movl $PAGE_SIZE, BP_kernel_alignment(%esi) ++ movl $_end - 1b, BP_init_size(%esi) ++ subl $startup_32 - 1b, BP_init_size(%esi) ++ + /* Disable paging */ + movl %cr0, %eax + btrl $X86_CR0_PG_BIT, %eax +@@ -294,8 +305,7 @@ SYM_FUNC_START(efi32_pe_entry) + + movl 8(%ebp), %ecx // image_handle + movl 12(%ebp), %edx // sys_table +- xorl %esi, %esi +- jmp efi32_entry // pass %ecx, %edx, %esi ++ jmp efi32_entry // pass %ecx, %edx + // no other registers remain live + + 2: popl %edi // restore callee-save registers diff --git a/queue-6.8/x86-efistub-reinstate-soft-limit-for-initrd-loading.patch b/queue-6.8/x86-efistub-reinstate-soft-limit-for-initrd-loading.patch new file mode 100644 index 00000000000..34e90a556f6 --- /dev/null +++ b/queue-6.8/x86-efistub-reinstate-soft-limit-for-initrd-loading.patch @@ -0,0 +1,56 @@ +From decd347c2a75d32984beb8807d470b763a53b542 Mon Sep 17 00:00:00 2001 +From: Ard Biesheuvel +Date: Thu, 28 Mar 2024 15:49:48 +0100 +Subject: x86/efistub: Reinstate soft limit for initrd loading + +From: Ard Biesheuvel + +commit decd347c2a75d32984beb8807d470b763a53b542 upstream. + +Commit + + 8117961d98fb2 ("x86/efi: Disregard setup header of loaded image") + +dropped the memcopy of the image's setup header into the boot_params +struct provided to the core kernel, on the basis that EFI boot does not +need it and should rely only on a single protocol to interface with the +boot chain. It is also a prerequisite for being able to increase the +section alignment to 4k, which is needed to enable memory protections +when running in the boot services. + +So only the setup_header fields that matter to the core kernel are +populated explicitly, and everything else is ignored. One thing was +overlooked, though: the initrd_addr_max field in the setup_header is not +used by the core kernel, but it is used by the EFI stub itself when it +loads the initrd, where its default value of INT_MAX is used as the soft +limit for memory allocation. + +This means that, in the old situation, the initrd was virtually always +loaded in the lower 2G of memory, but now, due to initrd_addr_max being +0x0, the initrd may end up anywhere in memory. This should not be an +issue principle, as most systems can deal with this fine. However, it +does appear to tickle some problems in older UEFI implementations, where +the memory ends up being corrupted, resulting in errors when unpacking +the initramfs. + +So set the initrd_addr_max field to INT_MAX like it was before. + +Fixes: 8117961d98fb2 ("x86/efi: Disregard setup header of loaded image") +Reported-by: Radek Podgorny +Closes: https://lore.kernel.org/all/a99a831a-8ad5-4cb0-bff9-be637311f771@podgorny.cz +Signed-off-by: Ard Biesheuvel +Signed-off-by: Greg Kroah-Hartman +--- + drivers/firmware/efi/libstub/x86-stub.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/firmware/efi/libstub/x86-stub.c ++++ b/drivers/firmware/efi/libstub/x86-stub.c +@@ -487,6 +487,7 @@ efi_status_t __efiapi efi_pe_entry(efi_h + hdr->vid_mode = 0xffff; + + hdr->type_of_loader = 0x21; ++ hdr->initrd_addr_max = INT_MAX; + + /* Convert unicode cmdline to ascii */ + cmdline_ptr = efi_convert_cmdline(image, &options_size);