From b3bc8c0bf23f7b048470b033d939a0c4ffde2d47 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Tue, 5 Nov 2024 20:54:04 -0500 Subject: [PATCH] Fixes for 6.11 Signed-off-by: Sasha Levin --- ...-fix-noc-firewall-interrupt-handling.patch | 101 ++++++ ...-fix-headset-mic-on-tuxedo-gemini-17.patch | 36 ++ ...-fix-headset-mic-on-tuxedo-stellaris.patch | 36 ++ ...-limit-internal-mic-boost-on-dell-pl.patch | 97 ++++++ ...anity-checks-in-blk_rq_map_user_bvec.patch | 59 ++++ ...-not-merging-contiguous-extents-due-.patch | 119 +++++++ ...-fix-error-propagation-of-split-bios.patch | 243 +++++++++++++ ...-map-merging-not-happening-for-adjac.patch | 109 ++++++ ...ter-free-of-block-device-file-in-__b.patch | 78 +++++ ...s_orig_bbio_end_io-into-btrfs_bio_en.patch | 152 ++++++++ ...-dedicated-workqueue-for-cgroup-bpf-.patch | 154 ++++++++ ...ports-ready-at-cxl_acpi_probe-return.patch | 52 +++ ...cxl_bus_rescan-vs-bus_rescan_devices.patch | 65 ++++ ...gh-fix-kernel-memory-out-of-bounds-w.patch | 100 ++++++ ...m-amdgpu-smu13-fix-profile-reporting.patch | 62 ++++ ...mi-fix-memory-leak-in-drm_display_mo.patch | 162 +++++++++ ...ix-memory-leaks-in-drm_display_mode_.patch | 89 +++++ ...s-add-helper-for-drm_display_mode_fr.patch | 102 ++++++ ...add-mmio-read-before-ggtt-invalidate.patch | 62 ++++ ...hort-circuit-tdr-on-jobs-not-started.patch | 83 +++++ ...gister-definition-order-in-xe_regs.h.patch | 44 +++ .../drm-xe-kill-regs-xe_sriov_regs.h.patch | 147 ++++++++ ...-invoke-uffd-on-fork-if-error-occurs.patch | 157 +++++++++ ...oke-khugepaged-ksm-hooks-if-no-error.patch | 112 ++++++ ...-fix-debugfs-dangling-chip-separator.patch | 39 +++ ...iolib-fix-debugfs-newline-separators.patch | 47 +++ ...fix-microlux-value-calculation.patch-18046 | 47 +++ ...x06-fix-regmap-leak-when-probe-fails.patch | 82 +++++ ...sion-when-re-registering-input-handl.patch | 253 ++++++++++++++ ...missing-nowait-check-for-o_direct-st.patch | 121 +++++++ ...y_page_from_iter_atomic-if-kmap_loca.patch | 70 ++++ .../kasan-remove-vmalloc_percpu-test.patch | 87 +++++ .../mctp-i2c-handle-null-header-address.patch | 44 +++ .../mei-use-kvmalloc-for-read-buffer.patch | 55 +++ ...p-alignment-of-anonymous-mappings-to.patch | 75 ++++ ...-ignore-non-leaf-pmd_young-for-force.patch | 65 ++++ ...-remove-mm_leaf_old-and-mm_nonleaf_t.patch | 153 ++++++++ ...ru-use-ptep-pmdp-_clear_young_notify.patch | 329 ++++++++++++++++++ ...t-gfp_atomic-order-0-allocs-access-h.patch | 88 +++++ ...p-folio-mapped-by-an-exiting-process.patch | 98 ++++++ ...nit-protect-sched-with-rcu_read_lock.patch | 79 +++++ ...r-handling-for-io_uring-nvme-passthr.patch | 48 +++ ...n-dh_key-to-null-after-kfree_sensiti.patch | 41 +++ ...o-ocfs2_truncate_inline-maybe-overfl.patch | 60 ++++ ...x8m-pcie-do-cmn_rst-just-before-phy-.patch | 97 ++++++ ...-clear-tick_dep_bit_posix_timer-on-c.patch | 92 +++++ ...alk_system_ram_res_rev-must-retain-r.patch | 121 +++++++ ...set-nx-compat-flag-in-pe-coff-header.patch | 48 +++ ...t-a-bad-reference-count-on-cpu-nodes.patch | 66 ++++ .../riscv-remove-duplicated-get_rm.patch | 38 ++ ...remove-unused-generating_asm_offsets.patch | 44 +++ ...cv-use-u-to-format-the-output-of-cpu.patch | 43 +++ ...nt-the-compiler-from-inserting-calls.patch | 40 +++ ...he-potential-null-pointer-dereferenc.patch | 90 +++++ ...x-another-deadlock-during-rtc-update.patch | 43 +++ queue-6.11/series | 63 ++++ ...-fix-crash-when-not-using-gpio-chip-.patch | 86 +++++ .../tpm-lazily-flush-the-auth-session.patch | 214 ++++++++++++ ...sessions_init-when-null-key-creation.patch | 52 +++ queue-6.11/tpm-rollback-tpm2_load_null.patch | 85 +++++ ...ix-page-count-imbalance-on-node-stat.patch | 75 ++++ .../x86-traps-enable-ubsan-traps-on-x86.patch | 195 +++++++++++ ...msan-check-after-instrumentation_beg.patch | 78 +++++ ...a-last-resort-ag-in-xfs_filestream_p.patch | 121 +++++++ 64 files changed, 5993 insertions(+) create mode 100644 queue-6.11/accel-ivpu-fix-noc-firewall-interrupt-handling.patch create mode 100644 queue-6.11/alsa-hda-realtek-fix-headset-mic-on-tuxedo-gemini-17.patch create mode 100644 queue-6.11/alsa-hda-realtek-fix-headset-mic-on-tuxedo-stellaris.patch create mode 100644 queue-6.11/alsa-hda-realtek-limit-internal-mic-boost-on-dell-pl.patch create mode 100644 queue-6.11/block-fix-sanity-checks-in-blk_rq_map_user_bvec.patch create mode 100644 queue-6.11/btrfs-fix-defrag-not-merging-contiguous-extents-due-.patch create mode 100644 queue-6.11/btrfs-fix-error-propagation-of-split-bios.patch create mode 100644 queue-6.11/btrfs-fix-extent-map-merging-not-happening-for-adjac.patch create mode 100644 queue-6.11/btrfs-fix-use-after-free-of-block-device-file-in-__b.patch create mode 100644 queue-6.11/btrfs-merge-btrfs_orig_bbio_end_io-into-btrfs_bio_en.patch create mode 100644 queue-6.11/cgroup-bpf-use-a-dedicated-workqueue-for-cgroup-bpf-.patch create mode 100644 queue-6.11/cxl-acpi-ensure-ports-ready-at-cxl_acpi_probe-return.patch create mode 100644 queue-6.11/cxl-port-fix-cxl_bus_rescan-vs-bus_rescan_devices.patch create mode 100644 queue-6.11/drm-amd-pm-vangogh-fix-kernel-memory-out-of-bounds-w.patch create mode 100644 queue-6.11/drm-amdgpu-smu13-fix-profile-reporting.patch create mode 100644 queue-6.11/drm-connector-hdmi-fix-memory-leak-in-drm_display_mo.patch create mode 100644 queue-6.11/drm-tests-hdmi-fix-memory-leaks-in-drm_display_mode_.patch create mode 100644 queue-6.11/drm-tests-helpers-add-helper-for-drm_display_mode_fr.patch create mode 100644 queue-6.11/drm-xe-add-mmio-read-before-ggtt-invalidate.patch create mode 100644 queue-6.11/drm-xe-don-t-short-circuit-tdr-on-jobs-not-started.patch create mode 100644 queue-6.11/drm-xe-fix-register-definition-order-in-xe_regs.h.patch create mode 100644 queue-6.11/drm-xe-kill-regs-xe_sriov_regs.h.patch create mode 100644 queue-6.11/fork-do-not-invoke-uffd-on-fork-if-error-occurs.patch create mode 100644 queue-6.11/fork-only-invoke-khugepaged-ksm-hooks-if-no-error.patch create mode 100644 queue-6.11/gpiolib-fix-debugfs-dangling-chip-separator.patch create mode 100644 queue-6.11/gpiolib-fix-debugfs-newline-separators.patch create mode 100644 queue-6.11/iio-light-veml6030-fix-microlux-value-calculation.patch-18046 create mode 100644 queue-6.11/input-edt-ft5x06-fix-regmap-leak-when-probe-fails.patch create mode 100644 queue-6.11/input-fix-regression-when-re-registering-input-handl.patch create mode 100644 queue-6.11/io_uring-rw-fix-missing-nowait-check-for-o_direct-st.patch create mode 100644 queue-6.11/iov_iter-fix-copy_page_from_iter_atomic-if-kmap_loca.patch create mode 100644 queue-6.11/kasan-remove-vmalloc_percpu-test.patch create mode 100644 queue-6.11/mctp-i2c-handle-null-header-address.patch create mode 100644 queue-6.11/mei-use-kvmalloc-for-read-buffer.patch create mode 100644 queue-6.11/mm-mmap-limit-thp-alignment-of-anonymous-mappings-to.patch create mode 100644 queue-6.11/mm-multi-gen-lru-ignore-non-leaf-pmd_young-for-force.patch create mode 100644 queue-6.11/mm-multi-gen-lru-remove-mm_leaf_old-and-mm_nonleaf_t.patch create mode 100644 queue-6.11/mm-multi-gen-lru-use-ptep-pmdp-_clear_young_notify.patch create mode 100644 queue-6.11/mm-page_alloc-let-gfp_atomic-order-0-allocs-access-h.patch create mode 100644 queue-6.11/mm-shrink-skip-folio-mapped-by-an-exiting-process.patch create mode 100644 queue-6.11/mptcp-init-protect-sched-with-rcu_read_lock.patch create mode 100644 queue-6.11/nvme-re-fix-error-handling-for-io_uring-nvme-passthr.patch create mode 100644 queue-6.11/nvmet-auth-assign-dh_key-to-null-after-kfree_sensiti.patch create mode 100644 queue-6.11/ocfs2-pass-u64-to-ocfs2_truncate_inline-maybe-overfl.patch create mode 100644 queue-6.11/phy-freescale-imx8m-pcie-do-cmn_rst-just-before-phy-.patch create mode 100644 queue-6.11/posix-cpu-timers-clear-tick_dep_bit_posix_timer-on-c.patch create mode 100644 queue-6.11/resource-kexec-walk_system_ram_res_rev-must-retain-r.patch create mode 100644 queue-6.11/riscv-efi-set-nx-compat-flag-in-pe-coff-header.patch create mode 100644 queue-6.11/riscv-prevent-a-bad-reference-count-on-cpu-nodes.patch create mode 100644 queue-6.11/riscv-remove-duplicated-get_rm.patch create mode 100644 queue-6.11/riscv-remove-unused-generating_asm_offsets.patch create mode 100644 queue-6.11/riscv-use-u-to-format-the-output-of-cpu.patch create mode 100644 queue-6.11/riscv-vdso-prevent-the-compiler-from-inserting-calls.patch create mode 100644 queue-6.11/sched-numa-fix-the-potential-null-pointer-dereferenc.patch create mode 100644 queue-6.11/scsi-ufs-core-fix-another-deadlock-during-rtc-update.patch create mode 100644 queue-6.11/spi-spi-fsl-dspi-fix-crash-when-not-using-gpio-chip-.patch create mode 100644 queue-6.11/tpm-lazily-flush-the-auth-session.patch create mode 100644 queue-6.11/tpm-return-tpm2_sessions_init-when-null-key-creation.patch create mode 100644 queue-6.11/tpm-rollback-tpm2_load_null.patch create mode 100644 queue-6.11/vmscan-migrate-fix-page-count-imbalance-on-node-stat.patch create mode 100644 queue-6.11/x86-traps-enable-ubsan-traps-on-x86.patch create mode 100644 queue-6.11/x86-traps-move-kmsan-check-after-instrumentation_beg.patch create mode 100644 queue-6.11/xfs-fix-finding-a-last-resort-ag-in-xfs_filestream_p.patch diff --git a/queue-6.11/accel-ivpu-fix-noc-firewall-interrupt-handling.patch b/queue-6.11/accel-ivpu-fix-noc-firewall-interrupt-handling.patch new file mode 100644 index 00000000000..a2b3c9b09b7 --- /dev/null +++ b/queue-6.11/accel-ivpu-fix-noc-firewall-interrupt-handling.patch @@ -0,0 +1,101 @@ +From 733b0f88c8d512282b90d5a1903a5e90c0b76cca Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 17 Oct 2024 16:49:58 +0200 +Subject: accel/ivpu: Fix NOC firewall interrupt handling + +From: Andrzej Kacprowski + +[ Upstream commit 72f7e16eccddde99386a10eb2d08833e805917c6 ] + +The NOC firewall interrupt means that the HW prevented +unauthorized access to a protected resource, so there +is no need to trigger device reset in such case. + +To facilitate security testing add firewall_irq_counter +debugfs file that tracks firewall interrupts. + +Fixes: 8a27ad81f7d3 ("accel/ivpu: Split IP and buttress code") +Cc: stable@vger.kernel.org # v6.11+ +Signed-off-by: Andrzej Kacprowski +Reviewed-by: Jacek Lawrynowicz +Reviewed-by: Jeffrey Hugo +Signed-off-by: Jacek Lawrynowicz +Link: https://patchwork.freedesktop.org/patch/msgid/20241017144958.79327-1-jacek.lawrynowicz@linux.intel.com +Signed-off-by: Sasha Levin +--- + drivers/accel/ivpu/ivpu_debugfs.c | 9 +++++++++ + drivers/accel/ivpu/ivpu_hw.c | 1 + + drivers/accel/ivpu/ivpu_hw.h | 1 + + drivers/accel/ivpu/ivpu_hw_ip.c | 5 ++++- + 4 files changed, 15 insertions(+), 1 deletion(-) + +diff --git a/drivers/accel/ivpu/ivpu_debugfs.c b/drivers/accel/ivpu/ivpu_debugfs.c +index 6f86f8df30db0..8d50981594d15 100644 +--- a/drivers/accel/ivpu/ivpu_debugfs.c ++++ b/drivers/accel/ivpu/ivpu_debugfs.c +@@ -108,6 +108,14 @@ static int reset_pending_show(struct seq_file *s, void *v) + return 0; + } + ++static int firewall_irq_counter_show(struct seq_file *s, void *v) ++{ ++ struct ivpu_device *vdev = seq_to_ivpu(s); ++ ++ seq_printf(s, "%d\n", atomic_read(&vdev->hw->firewall_irq_counter)); ++ return 0; ++} ++ + static const struct drm_debugfs_info vdev_debugfs_list[] = { + {"bo_list", bo_list_show, 0}, + {"fw_name", fw_name_show, 0}, +@@ -116,6 +124,7 @@ static const struct drm_debugfs_info vdev_debugfs_list[] = { + {"last_bootmode", last_bootmode_show, 0}, + {"reset_counter", reset_counter_show, 0}, + {"reset_pending", reset_pending_show, 0}, ++ {"firewall_irq_counter", firewall_irq_counter_show, 0}, + }; + + static ssize_t +diff --git a/drivers/accel/ivpu/ivpu_hw.c b/drivers/accel/ivpu/ivpu_hw.c +index 27f0fe4d54e00..e69c0613513f1 100644 +--- a/drivers/accel/ivpu/ivpu_hw.c ++++ b/drivers/accel/ivpu/ivpu_hw.c +@@ -249,6 +249,7 @@ int ivpu_hw_init(struct ivpu_device *vdev) + platform_init(vdev); + wa_init(vdev); + timeouts_init(vdev); ++ atomic_set(&vdev->hw->firewall_irq_counter, 0); + + return 0; + } +diff --git a/drivers/accel/ivpu/ivpu_hw.h b/drivers/accel/ivpu/ivpu_hw.h +index 1c0c98e3afb88..a96a05b2acda9 100644 +--- a/drivers/accel/ivpu/ivpu_hw.h ++++ b/drivers/accel/ivpu/ivpu_hw.h +@@ -52,6 +52,7 @@ struct ivpu_hw_info { + int dma_bits; + ktime_t d0i3_entry_host_ts; + u64 d0i3_entry_vpu_ts; ++ atomic_t firewall_irq_counter; + }; + + int ivpu_hw_init(struct ivpu_device *vdev); +diff --git a/drivers/accel/ivpu/ivpu_hw_ip.c b/drivers/accel/ivpu/ivpu_hw_ip.c +index dfd2f4a5b5268..60b33fc59d96e 100644 +--- a/drivers/accel/ivpu/ivpu_hw_ip.c ++++ b/drivers/accel/ivpu/ivpu_hw_ip.c +@@ -1062,7 +1062,10 @@ static void irq_wdt_mss_handler(struct ivpu_device *vdev) + + static void irq_noc_firewall_handler(struct ivpu_device *vdev) + { +- ivpu_pm_trigger_recovery(vdev, "NOC Firewall IRQ"); ++ atomic_inc(&vdev->hw->firewall_irq_counter); ++ ++ ivpu_dbg(vdev, IRQ, "NOC Firewall interrupt detected, counter %d\n", ++ atomic_read(&vdev->hw->firewall_irq_counter)); + } + + /* Handler for IRQs from NPU core */ +-- +2.43.0 + diff --git a/queue-6.11/alsa-hda-realtek-fix-headset-mic-on-tuxedo-gemini-17.patch b/queue-6.11/alsa-hda-realtek-fix-headset-mic-on-tuxedo-gemini-17.patch new file mode 100644 index 00000000000..874b9396022 --- /dev/null +++ b/queue-6.11/alsa-hda-realtek-fix-headset-mic-on-tuxedo-gemini-17.patch @@ -0,0 +1,36 @@ +From a2816e4957d37d5a472129d2feffc1f586662fdc Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 29 Oct 2024 16:16:52 +0100 +Subject: ALSA: hda/realtek: Fix headset mic on TUXEDO Gemini 17 Gen3 + +From: Christoffer Sandberg + +[ Upstream commit 0b04fbe886b4274c8e5855011233aaa69fec6e75 ] + +Quirk is needed to enable headset microphone on missing pin 0x19. + +Signed-off-by: Christoffer Sandberg +Signed-off-by: Werner Sembach +Cc: +Link: https://patch.msgid.link/20241029151653.80726-1-wse@tuxedocomputers.com +Signed-off-by: Takashi Iwai +Signed-off-by: Sasha Levin +--- + sound/pci/hda/patch_realtek.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c +index 8d6f446d507c2..25d4c417d3c07 100644 +--- a/sound/pci/hda/patch_realtek.c ++++ b/sound/pci/hda/patch_realtek.c +@@ -10729,6 +10729,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { + SND_PCI_QUIRK(0x1558, 0x1404, "Clevo N150CU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x14a1, "Clevo L141MU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x2624, "Clevo L240TU", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), ++ SND_PCI_QUIRK(0x1558, 0x28c1, "Clevo V370VND", ALC2XX_FIXUP_HEADSET_MIC), + SND_PCI_QUIRK(0x1558, 0x4018, "Clevo NV40M[BE]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x4019, "Clevo NV40MZ", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x4020, "Clevo NV40MB", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), +-- +2.43.0 + diff --git a/queue-6.11/alsa-hda-realtek-fix-headset-mic-on-tuxedo-stellaris.patch b/queue-6.11/alsa-hda-realtek-fix-headset-mic-on-tuxedo-stellaris.patch new file mode 100644 index 00000000000..44f2deb2acc --- /dev/null +++ b/queue-6.11/alsa-hda-realtek-fix-headset-mic-on-tuxedo-stellaris.patch @@ -0,0 +1,36 @@ +From 83b52e2959734734c3d15f52435cec1a85988591 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 29 Oct 2024 16:16:53 +0100 +Subject: ALSA: hda/realtek: Fix headset mic on TUXEDO Stellaris 16 Gen6 mb1 + +From: Christoffer Sandberg + +[ Upstream commit e49370d769e71456db3fbd982e95bab8c69f73e8 ] + +Quirk is needed to enable headset microphone on missing pin 0x19. + +Signed-off-by: Christoffer Sandberg +Signed-off-by: Werner Sembach +Cc: +Link: https://patch.msgid.link/20241029151653.80726-2-wse@tuxedocomputers.com +Signed-off-by: Takashi Iwai +Signed-off-by: Sasha Levin +--- + sound/pci/hda/patch_realtek.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c +index 25d4c417d3c07..660fd984a9285 100644 +--- a/sound/pci/hda/patch_realtek.c ++++ b/sound/pci/hda/patch_realtek.c +@@ -10971,6 +10971,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { + SND_PCI_QUIRK(0x1d05, 0x115c, "TongFang GMxTGxx", ALC269_FIXUP_NO_SHUTUP), + SND_PCI_QUIRK(0x1d05, 0x121b, "TongFang GMxAGxx", ALC269_FIXUP_NO_SHUTUP), + SND_PCI_QUIRK(0x1d05, 0x1387, "TongFang GMxIXxx", ALC2XX_FIXUP_HEADSET_MIC), ++ SND_PCI_QUIRK(0x1d05, 0x1409, "TongFang GMxIXxx", ALC2XX_FIXUP_HEADSET_MIC), + SND_PCI_QUIRK(0x1d17, 0x3288, "Haier Boyue G42", ALC269VC_FIXUP_ACER_VCOPPERBOX_PINS), + SND_PCI_QUIRK(0x1d72, 0x1602, "RedmiBook", ALC255_FIXUP_XIAOMI_HEADSET_MIC), + SND_PCI_QUIRK(0x1d72, 0x1701, "XiaomiNotebook Pro", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE), +-- +2.43.0 + diff --git a/queue-6.11/alsa-hda-realtek-limit-internal-mic-boost-on-dell-pl.patch b/queue-6.11/alsa-hda-realtek-limit-internal-mic-boost-on-dell-pl.patch new file mode 100644 index 00000000000..905429eefd8 --- /dev/null +++ b/queue-6.11/alsa-hda-realtek-limit-internal-mic-boost-on-dell-pl.patch @@ -0,0 +1,97 @@ +From a9024ea866983de0fa38466135dfca87b37dffc4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 18 Oct 2024 13:53:24 +0800 +Subject: ALSA: hda/realtek: Limit internal Mic boost on Dell platform + +From: Kailang Yang + +[ Upstream commit 78e7be018784934081afec77f96d49a2483f9188 ] + +Dell want to limit internal Mic boost on all Dell platform. + +Signed-off-by: Kailang Yang +Cc: +Link: https://lore.kernel.org/561fc5f5eff04b6cbd79ed173cd1c1db@realtek.com +Signed-off-by: Takashi Iwai +Signed-off-by: Sasha Levin +--- + sound/pci/hda/patch_realtek.c | 21 ++++++++++++++++++--- + 1 file changed, 18 insertions(+), 3 deletions(-) + +diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c +index 2583081c0a3a5..8d6f446d507c2 100644 +--- a/sound/pci/hda/patch_realtek.c ++++ b/sound/pci/hda/patch_realtek.c +@@ -7507,6 +7507,7 @@ enum { + ALC286_FIXUP_SONY_MIC_NO_PRESENCE, + ALC269_FIXUP_PINCFG_NO_HP_TO_LINEOUT, + ALC269_FIXUP_DELL1_MIC_NO_PRESENCE, ++ ALC269_FIXUP_DELL1_LIMIT_INT_MIC_BOOST, + ALC269_FIXUP_DELL2_MIC_NO_PRESENCE, + ALC269_FIXUP_DELL3_MIC_NO_PRESENCE, + ALC269_FIXUP_DELL4_MIC_NO_PRESENCE, +@@ -7541,6 +7542,7 @@ enum { + ALC255_FIXUP_ACER_MIC_NO_PRESENCE, + ALC255_FIXUP_ASUS_MIC_NO_PRESENCE, + ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, ++ ALC255_FIXUP_DELL1_LIMIT_INT_MIC_BOOST, + ALC255_FIXUP_DELL2_MIC_NO_PRESENCE, + ALC255_FIXUP_HEADSET_MODE, + ALC255_FIXUP_HEADSET_MODE_NO_HP_MIC, +@@ -8102,6 +8104,12 @@ static const struct hda_fixup alc269_fixups[] = { + .chained = true, + .chain_id = ALC269_FIXUP_HEADSET_MODE + }, ++ [ALC269_FIXUP_DELL1_LIMIT_INT_MIC_BOOST] = { ++ .type = HDA_FIXUP_FUNC, ++ .v.func = alc269_fixup_limit_int_mic_boost, ++ .chained = true, ++ .chain_id = ALC269_FIXUP_DELL1_MIC_NO_PRESENCE ++ }, + [ALC269_FIXUP_DELL2_MIC_NO_PRESENCE] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { +@@ -8382,6 +8390,12 @@ static const struct hda_fixup alc269_fixups[] = { + .chained = true, + .chain_id = ALC255_FIXUP_HEADSET_MODE + }, ++ [ALC255_FIXUP_DELL1_LIMIT_INT_MIC_BOOST] = { ++ .type = HDA_FIXUP_FUNC, ++ .v.func = alc269_fixup_limit_int_mic_boost, ++ .chained = true, ++ .chain_id = ALC255_FIXUP_DELL1_MIC_NO_PRESENCE ++ }, + [ALC255_FIXUP_DELL2_MIC_NO_PRESENCE] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { +@@ -11050,6 +11064,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = { + {.id = ALC269_FIXUP_DELL2_MIC_NO_PRESENCE, .name = "dell-headset-dock"}, + {.id = ALC269_FIXUP_DELL3_MIC_NO_PRESENCE, .name = "dell-headset3"}, + {.id = ALC269_FIXUP_DELL4_MIC_NO_PRESENCE, .name = "dell-headset4"}, ++ {.id = ALC269_FIXUP_DELL4_MIC_NO_PRESENCE_QUIET, .name = "dell-headset4-quiet"}, + {.id = ALC283_FIXUP_CHROME_BOOK, .name = "alc283-dac-wcaps"}, + {.id = ALC283_FIXUP_SENSE_COMBO_JACK, .name = "alc283-sense-combo"}, + {.id = ALC292_FIXUP_TPT440_DOCK, .name = "tpt440-dock"}, +@@ -11604,16 +11619,16 @@ static const struct snd_hda_pin_quirk alc269_fallback_pin_fixup_tbl[] = { + SND_HDA_PIN_QUIRK(0x10ec0289, 0x1028, "Dell", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE, + {0x19, 0x40000000}, + {0x1b, 0x40000000}), +- SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE, ++ SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE_QUIET, + {0x19, 0x40000000}, + {0x1b, 0x40000000}), + SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, + {0x19, 0x40000000}, + {0x1a, 0x40000000}), +- SND_HDA_PIN_QUIRK(0x10ec0236, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, ++ SND_HDA_PIN_QUIRK(0x10ec0236, 0x1028, "Dell", ALC255_FIXUP_DELL1_LIMIT_INT_MIC_BOOST, + {0x19, 0x40000000}, + {0x1a, 0x40000000}), +- SND_HDA_PIN_QUIRK(0x10ec0274, 0x1028, "Dell", ALC274_FIXUP_DELL_AIO_LINEOUT_VERB, ++ SND_HDA_PIN_QUIRK(0x10ec0274, 0x1028, "Dell", ALC269_FIXUP_DELL1_LIMIT_INT_MIC_BOOST, + {0x19, 0x40000000}, + {0x1a, 0x40000000}), + SND_HDA_PIN_QUIRK(0x10ec0256, 0x1043, "ASUS", ALC2XX_FIXUP_HEADSET_MIC, +-- +2.43.0 + diff --git a/queue-6.11/block-fix-sanity-checks-in-blk_rq_map_user_bvec.patch b/queue-6.11/block-fix-sanity-checks-in-blk_rq_map_user_bvec.patch new file mode 100644 index 00000000000..13a6f758893 --- /dev/null +++ b/queue-6.11/block-fix-sanity-checks-in-blk_rq_map_user_bvec.patch @@ -0,0 +1,59 @@ +From 5335fe64877ec5c011062af1001ab730ee756a17 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 23 Oct 2024 15:15:19 -0600 +Subject: block: fix sanity checks in blk_rq_map_user_bvec + +From: Xinyu Zhang + +[ Upstream commit 2ff949441802a8d076d9013c7761f63e8ae5a9bd ] + +blk_rq_map_user_bvec contains a check bytes + bv->bv_len > nr_iter which +causes unnecessary failures in NVMe passthrough I/O, reproducible as +follows: + +- register a 2 page, page-aligned buffer against a ring +- use that buffer to do a 1 page io_uring NVMe passthrough read + +The second (i = 1) iteration of the loop in blk_rq_map_user_bvec will +then have nr_iter == 1 page, bytes == 1 page, bv->bv_len == 1 page, so +the check bytes + bv->bv_len > nr_iter will succeed, causing the I/O to +fail. This failure is unnecessary, as when the check succeeds, it means +we've checked the entire buffer that will be used by the request - i.e. +blk_rq_map_user_bvec should complete successfully. Therefore, terminate +the loop early and return successfully when the check bytes + bv->bv_len +> nr_iter succeeds. + +While we're at it, also remove the check that all segments in the bvec +are single-page. While this seems to be true for all users of the +function, it doesn't appear to be required anywhere downstream. + +CC: stable@vger.kernel.org +Signed-off-by: Xinyu Zhang +Co-developed-by: Uday Shankar +Signed-off-by: Uday Shankar +Fixes: 37987547932c ("block: extend functionality to map bvec iterator") +Link: https://lore.kernel.org/r/20241023211519.4177873-1-ushankar@purestorage.com +Signed-off-by: Jens Axboe +Signed-off-by: Sasha Levin +--- + block/blk-map.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +diff --git a/block/blk-map.c b/block/blk-map.c +index 0e1167b239342..6ef2ec1f7d78b 100644 +--- a/block/blk-map.c ++++ b/block/blk-map.c +@@ -600,9 +600,7 @@ static int blk_rq_map_user_bvec(struct request *rq, const struct iov_iter *iter) + if (nsegs >= nr_segs || bytes > UINT_MAX - bv->bv_len) + goto put_bio; + if (bytes + bv->bv_len > nr_iter) +- goto put_bio; +- if (bv->bv_offset + bv->bv_len > PAGE_SIZE) +- goto put_bio; ++ break; + + nsegs++; + bytes += bv->bv_len; +-- +2.43.0 + diff --git a/queue-6.11/btrfs-fix-defrag-not-merging-contiguous-extents-due-.patch b/queue-6.11/btrfs-fix-defrag-not-merging-contiguous-extents-due-.patch new file mode 100644 index 00000000000..46e97589296 --- /dev/null +++ b/queue-6.11/btrfs-fix-defrag-not-merging-contiguous-extents-due-.patch @@ -0,0 +1,119 @@ +From 60e96501621d271121d40e3002f5fe2317f42273 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 29 Oct 2024 15:18:45 +0000 +Subject: btrfs: fix defrag not merging contiguous extents due to merged extent + maps + +From: Filipe Manana + +[ Upstream commit 77b0d113eec49a7390ff1a08ca1923e89f5f86c6 ] + +When running defrag (manual defrag) against a file that has extents that +are contiguous and we already have the respective extent maps loaded and +merged, we end up not defragging the range covered by those contiguous +extents. This happens when we have an extent map that was the result of +merging multiple extent maps for contiguous extents and the length of the +merged extent map is greater than or equals to the defrag threshold +length. + +The script below reproduces this scenario: + + $ cat test.sh + #!/bin/bash + + DEV=/dev/sdi + MNT=/mnt/sdi + + mkfs.btrfs -f $DEV + mount $DEV $MNT + + # Create a 256K file with 4 extents of 64K each. + xfs_io -f -c "falloc 0 64K" \ + -c "pwrite 0 64K" \ + -c "falloc 64K 64K" \ + -c "pwrite 64K 64K" \ + -c "falloc 128K 64K" \ + -c "pwrite 128K 64K" \ + -c "falloc 192K 64K" \ + -c "pwrite 192K 64K" \ + $MNT/foo + + umount $MNT + echo -n "Initial number of file extent items: " + btrfs inspect-internal dump-tree -t 5 $DEV | grep EXTENT_DATA | wc -l + + mount $DEV $MNT + # Read the whole file in order to load and merge extent maps. + cat $MNT/foo > /dev/null + + btrfs filesystem defragment -t 128K $MNT/foo + umount $MNT + echo -n "Number of file extent items after defrag with 128K threshold: " + btrfs inspect-internal dump-tree -t 5 $DEV | grep EXTENT_DATA | wc -l + + mount $DEV $MNT + # Read the whole file in order to load and merge extent maps. + cat $MNT/foo > /dev/null + + btrfs filesystem defragment -t 256K $MNT/foo + umount $MNT + echo -n "Number of file extent items after defrag with 256K threshold: " + btrfs inspect-internal dump-tree -t 5 $DEV | grep EXTENT_DATA | wc -l + +Running it: + + $ ./test.sh + Initial number of file extent items: 4 + Number of file extent items after defrag with 128K threshold: 4 + Number of file extent items after defrag with 256K threshold: 4 + +The 4 extents don't get merged because we have an extent map with a size +of 256K that is the result of merging the individual extent maps for each +of the four 64K extents and at defrag_lookup_extent() we have a value of +zero for the generation threshold ('newer_than' argument) since this is a +manual defrag. As a consequence we don't call defrag_get_extent() to get +an extent map representing a single file extent item in the inode's +subvolume tree, so we end up using the merged extent map at +defrag_collect_targets() and decide not to defrag. + +Fix this by updating defrag_lookup_extent() to always discard extent maps +that were merged and call defrag_get_extent() regardless of the minimum +generation threshold ('newer_than' argument). + +A test case for fstests will be sent along soon. + +CC: stable@vger.kernel.org # 6.1+ +Fixes: 199257a78bb0 ("btrfs: defrag: don't use merged extent map for their generation check") +Reviewed-by: Qu Wenruo +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Sasha Levin +--- + fs/btrfs/defrag.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/fs/btrfs/defrag.c b/fs/btrfs/defrag.c +index f6dbda37a3615..990ef97accec4 100644 +--- a/fs/btrfs/defrag.c ++++ b/fs/btrfs/defrag.c +@@ -772,12 +772,12 @@ static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start, + * We can get a merged extent, in that case, we need to re-search + * tree to get the original em for defrag. + * +- * If @newer_than is 0 or em::generation < newer_than, we can trust +- * this em, as either we don't care about the generation, or the +- * merged extent map will be rejected anyway. ++ * This is because even if we have adjacent extents that are contiguous ++ * and compatible (same type and flags), we still want to defrag them ++ * so that we use less metadata (extent items in the extent tree and ++ * file extent items in the inode's subvolume tree). + */ +- if (em && (em->flags & EXTENT_FLAG_MERGED) && +- newer_than && em->generation >= newer_than) { ++ if (em && (em->flags & EXTENT_FLAG_MERGED)) { + free_extent_map(em); + em = NULL; + } +-- +2.43.0 + diff --git a/queue-6.11/btrfs-fix-error-propagation-of-split-bios.patch b/queue-6.11/btrfs-fix-error-propagation-of-split-bios.patch new file mode 100644 index 00000000000..dd1990b7ca8 --- /dev/null +++ b/queue-6.11/btrfs-fix-error-propagation-of-split-bios.patch @@ -0,0 +1,243 @@ +From ba1a42ef6f2a8f4e82e915247d69fb42ed3ec46c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 9 Oct 2024 22:52:06 +0900 +Subject: btrfs: fix error propagation of split bios + +From: Naohiro Aota + +[ Upstream commit d48e1dea3931de64c26717adc2b89743c7ab6594 ] + +The purpose of btrfs_bbio_propagate_error() shall be propagating an error +of split bio to its original btrfs_bio, and tell the error to the upper +layer. However, it's not working well on some cases. + +* Case 1. Immediate (or quick) end_bio with an error + +When btrfs sends btrfs_bio to mirrored devices, btrfs calls +btrfs_bio_end_io() when all the mirroring bios are completed. If that +btrfs_bio was split, it is from btrfs_clone_bioset and its end_io function +is btrfs_orig_write_end_io. For this case, btrfs_bbio_propagate_error() +accesses the orig_bbio's bio context to increase the error count. + +That works well in most cases. However, if the end_io is called enough +fast, orig_bbio's (remaining part after split) bio context may not be +properly set at that time. Since the bio context is set when the orig_bbio +(the last btrfs_bio) is sent to devices, that might be too late for earlier +split btrfs_bio's completion. That will result in NULL pointer +dereference. + +That bug is easily reproducible by running btrfs/146 on zoned devices [1] +and it shows the following trace. + +[1] You need raid-stripe-tree feature as it create "-d raid0 -m raid1" FS. + + BUG: kernel NULL pointer dereference, address: 0000000000000020 + #PF: supervisor read access in kernel mode + #PF: error_code(0x0000) - not-present page + PGD 0 P4D 0 + Oops: Oops: 0000 [#1] PREEMPT SMP PTI + CPU: 1 UID: 0 PID: 13 Comm: kworker/u32:1 Not tainted 6.11.0-rc7-BTRFS-ZNS+ #474 + Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 + Workqueue: writeback wb_workfn (flush-btrfs-5) + RIP: 0010:btrfs_bio_end_io+0xae/0xc0 [btrfs] + BTRFS error (device dm-0): bdev /dev/mapper/error-test errs: wr 2, rd 0, flush 0, corrupt 0, gen 0 + RSP: 0018:ffffc9000006f248 EFLAGS: 00010246 + RAX: 0000000000000000 RBX: ffff888005a7f080 RCX: ffffc9000006f1dc + RDX: 0000000000000000 RSI: 000000000000000a RDI: ffff888005a7f080 + RBP: ffff888011dfc540 R08: 0000000000000000 R09: 0000000000000001 + R10: ffffffff82e508e0 R11: 0000000000000005 R12: ffff88800ddfbe58 + R13: ffff888005a7f080 R14: ffff888005a7f158 R15: ffff888005a7f158 + FS: 0000000000000000(0000) GS:ffff88803ea80000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 0000000000000020 CR3: 0000000002e22006 CR4: 0000000000370ef0 + DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 + DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 + Call Trace: + + ? __die_body.cold+0x19/0x26 + ? page_fault_oops+0x13e/0x2b0 + ? _printk+0x58/0x73 + ? do_user_addr_fault+0x5f/0x750 + ? exc_page_fault+0x76/0x240 + ? asm_exc_page_fault+0x22/0x30 + ? btrfs_bio_end_io+0xae/0xc0 [btrfs] + ? btrfs_log_dev_io_error+0x7f/0x90 [btrfs] + btrfs_orig_write_end_io+0x51/0x90 [btrfs] + dm_submit_bio+0x5c2/0xa50 [dm_mod] + ? find_held_lock+0x2b/0x80 + ? blk_try_enter_queue+0x90/0x1e0 + __submit_bio+0xe0/0x130 + ? ktime_get+0x10a/0x160 + ? lockdep_hardirqs_on+0x74/0x100 + submit_bio_noacct_nocheck+0x199/0x410 + btrfs_submit_bio+0x7d/0x150 [btrfs] + btrfs_submit_chunk+0x1a1/0x6d0 [btrfs] + ? lockdep_hardirqs_on+0x74/0x100 + ? __folio_start_writeback+0x10/0x2c0 + btrfs_submit_bbio+0x1c/0x40 [btrfs] + submit_one_bio+0x44/0x60 [btrfs] + submit_extent_folio+0x13f/0x330 [btrfs] + ? btrfs_set_range_writeback+0xa3/0xd0 [btrfs] + extent_writepage_io+0x18b/0x360 [btrfs] + extent_write_locked_range+0x17c/0x340 [btrfs] + ? __pfx_end_bbio_data_write+0x10/0x10 [btrfs] + run_delalloc_cow+0x71/0xd0 [btrfs] + btrfs_run_delalloc_range+0x176/0x500 [btrfs] + ? find_lock_delalloc_range+0x119/0x260 [btrfs] + writepage_delalloc+0x2ab/0x480 [btrfs] + extent_write_cache_pages+0x236/0x7d0 [btrfs] + btrfs_writepages+0x72/0x130 [btrfs] + do_writepages+0xd4/0x240 + ? find_held_lock+0x2b/0x80 + ? wbc_attach_and_unlock_inode+0x12c/0x290 + ? wbc_attach_and_unlock_inode+0x12c/0x290 + __writeback_single_inode+0x5c/0x4c0 + ? do_raw_spin_unlock+0x49/0xb0 + writeback_sb_inodes+0x22c/0x560 + __writeback_inodes_wb+0x4c/0xe0 + wb_writeback+0x1d6/0x3f0 + wb_workfn+0x334/0x520 + process_one_work+0x1ee/0x570 + ? lock_is_held_type+0xc6/0x130 + worker_thread+0x1d1/0x3b0 + ? __pfx_worker_thread+0x10/0x10 + kthread+0xee/0x120 + ? __pfx_kthread+0x10/0x10 + ret_from_fork+0x30/0x50 + ? __pfx_kthread+0x10/0x10 + ret_from_fork_asm+0x1a/0x30 + + Modules linked in: dm_mod btrfs blake2b_generic xor raid6_pq rapl + CR2: 0000000000000020 + +* Case 2. Earlier completion of orig_bbio for mirrored btrfs_bios + +btrfs_bbio_propagate_error() assumes the end_io function for orig_bbio is +called last among split bios. In that case, btrfs_orig_write_end_io() sets +the bio->bi_status to BLK_STS_IOERR by seeing the bioc->error [2]. +Otherwise, the increased orig_bio's bioc->error is not checked by anyone +and return BLK_STS_OK to the upper layer. + +[2] Actually, this is not true. Because we only increases orig_bioc->errors +by max_errors, the condition "atomic_read(&bioc->error) > bioc->max_errors" +is still not met if only one split btrfs_bio fails. + +* Case 3. Later completion of orig_bbio for un-mirrored btrfs_bios + +In contrast to the above case, btrfs_bbio_propagate_error() is not working +well if un-mirrored orig_bbio is completed last. It sets +orig_bbio->bio.bi_status to the btrfs_bio's error. But, that is easily +over-written by orig_bbio's completion status. If the status is BLK_STS_OK, +the upper layer would not know the failure. + +* Solution + +Considering the above cases, we can only save the error status in the +orig_bbio (remaining part after split) itself as it is always +available. Also, the saved error status should be propagated when all the +split btrfs_bios are finished (i.e, bbio->pending_ios == 0). + +This commit introduces "status" to btrfs_bbio and saves the first error of +split bios to original btrfs_bio's "status" variable. When all the split +bios are finished, the saved status is loaded into original btrfs_bio's +status. + +With this commit, btrfs/146 on zoned devices does not hit the NULL pointer +dereference anymore. + +Fixes: 852eee62d31a ("btrfs: allow btrfs_submit_bio to split bios") +CC: stable@vger.kernel.org # 6.6+ +Reviewed-by: Qu Wenruo +Reviewed-by: Christoph Hellwig +Reviewed-by: Johannes Thumshirn +Signed-off-by: Naohiro Aota +Signed-off-by: David Sterba +Signed-off-by: Sasha Levin +--- + fs/btrfs/bio.c | 37 +++++++++++++------------------------ + fs/btrfs/bio.h | 3 +++ + 2 files changed, 16 insertions(+), 24 deletions(-) + +diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c +index e93d376796a28..31e437d94869d 100644 +--- a/fs/btrfs/bio.c ++++ b/fs/btrfs/bio.c +@@ -49,6 +49,7 @@ void btrfs_bio_init(struct btrfs_bio *bbio, struct btrfs_fs_info *fs_info, + bbio->end_io = end_io; + bbio->private = private; + atomic_set(&bbio->pending_ios, 1); ++ WRITE_ONCE(bbio->status, BLK_STS_OK); + } + + /* +@@ -120,41 +121,29 @@ static void __btrfs_bio_end_io(struct btrfs_bio *bbio) + } + } + +-static void btrfs_orig_write_end_io(struct bio *bio); +- +-static void btrfs_bbio_propagate_error(struct btrfs_bio *bbio, +- struct btrfs_bio *orig_bbio) +-{ +- /* +- * For writes we tolerate nr_mirrors - 1 write failures, so we can't +- * just blindly propagate a write failure here. Instead increment the +- * error count in the original I/O context so that it is guaranteed to +- * be larger than the error tolerance. +- */ +- if (bbio->bio.bi_end_io == &btrfs_orig_write_end_io) { +- struct btrfs_io_stripe *orig_stripe = orig_bbio->bio.bi_private; +- struct btrfs_io_context *orig_bioc = orig_stripe->bioc; +- +- atomic_add(orig_bioc->max_errors, &orig_bioc->error); +- } else { +- orig_bbio->bio.bi_status = bbio->bio.bi_status; +- } +-} +- + void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status) + { + bbio->bio.bi_status = status; + if (bbio->bio.bi_pool == &btrfs_clone_bioset) { + struct btrfs_bio *orig_bbio = bbio->private; + +- if (bbio->bio.bi_status) +- btrfs_bbio_propagate_error(bbio, orig_bbio); + btrfs_cleanup_bio(bbio); + bbio = orig_bbio; + } + +- if (atomic_dec_and_test(&bbio->pending_ios)) ++ /* ++ * At this point, bbio always points to the original btrfs_bio. Save ++ * the first error in it. ++ */ ++ if (status != BLK_STS_OK) ++ cmpxchg(&bbio->status, BLK_STS_OK, status); ++ ++ if (atomic_dec_and_test(&bbio->pending_ios)) { ++ /* Load split bio's error which might be set above. */ ++ if (status == BLK_STS_OK) ++ bbio->bio.bi_status = READ_ONCE(bbio->status); + __btrfs_bio_end_io(bbio); ++ } + } + + static int next_repair_mirror(struct btrfs_failed_bio *fbio, int cur_mirror) +diff --git a/fs/btrfs/bio.h b/fs/btrfs/bio.h +index d9dd5276093df..043f94562166b 100644 +--- a/fs/btrfs/bio.h ++++ b/fs/btrfs/bio.h +@@ -79,6 +79,9 @@ struct btrfs_bio { + /* File system that this I/O operates on. */ + struct btrfs_fs_info *fs_info; + ++ /* Save the first error status of split bio. */ ++ blk_status_t status; ++ + /* + * This member must come last, bio_alloc_bioset will allocate enough + * bytes for entire btrfs_bio but relies on bio being last. +-- +2.43.0 + diff --git a/queue-6.11/btrfs-fix-extent-map-merging-not-happening-for-adjac.patch b/queue-6.11/btrfs-fix-extent-map-merging-not-happening-for-adjac.patch new file mode 100644 index 00000000000..4f2521006ca --- /dev/null +++ b/queue-6.11/btrfs-fix-extent-map-merging-not-happening-for-adjac.patch @@ -0,0 +1,109 @@ +From 5fa935d4185f50c9e50ded8a6982ca433892847a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 28 Oct 2024 16:23:00 +0000 +Subject: btrfs: fix extent map merging not happening for adjacent extents + +From: Filipe Manana + +[ Upstream commit a0f0625390858321525c2a8d04e174a546bd19b3 ] + +If we have 3 or more adjacent extents in a file, that is, consecutive file +extent items pointing to adjacent extents, within a contiguous file range +and compatible flags, we end up not merging all the extents into a single +extent map. + +For example: + + $ mkfs.btrfs -f /dev/sdc + $ mount /dev/sdc /mnt/sdc + + $ xfs_io -f -d -c "pwrite -b 64K 0 64K" \ + -c "pwrite -b 64K 64K 64K" \ + -c "pwrite -b 64K 128K 64K" \ + -c "pwrite -b 64K 192K 64K" \ + /mnt/sdc/foo + +After all the ordered extents complete we unpin the extent maps and try +to merge them, but instead of getting a single extent map we get two +because: + +1) When the first ordered extent completes (file range [0, 64K)) we + unpin its extent map and attempt to merge it with the extent map for + the range [64K, 128K), but we can't because that extent map is still + pinned; + +2) When the second ordered extent completes (file range [64K, 128K)), we + unpin its extent map and merge it with the previous extent map, for + file range [0, 64K), but we can't merge with the next extent map, for + the file range [128K, 192K), because this one is still pinned. + + The merged extent map for the file range [0, 128K) gets the flag + EXTENT_MAP_MERGED set; + +3) When the third ordered extent completes (file range [128K, 192K)), we + unpin its extent map and attempt to merge it with the previous extent + map, for file range [0, 128K), but we can't because that extent map + has the flag EXTENT_MAP_MERGED set (mergeable_maps() returns false + due to different flags) while the extent map for the range [128K, 192K) + doesn't have that flag set. + + We also can't merge it with the next extent map, for file range + [192K, 256K), because that one is still pinned. + + At this moment we have 3 extent maps: + + One for file range [0, 128K), with the flag EXTENT_MAP_MERGED set. + One for file range [128K, 192K). + One for file range [192K, 256K) which is still pinned; + +4) When the fourth and final extent completes (file range [192K, 256K)), + we unpin its extent map and attempt to merge it with the previous + extent map, for file range [128K, 192K), which succeeds since none + of these extent maps have the EXTENT_MAP_MERGED flag set. + + So we end up with 2 extent maps: + + One for file range [0, 128K), with the flag EXTENT_MAP_MERGED set. + One for file range [128K, 256K), with the flag EXTENT_MAP_MERGED set. + + Since after merging extent maps we don't attempt to merge again, that + is, merge the resulting extent map with the one that is now preceding + it (and the one following it), we end up with those two extent maps, + when we could have had a single extent map to represent the whole file. + +Fix this by making mergeable_maps() ignore the EXTENT_MAP_MERGED flag. +While this doesn't present any functional issue, it prevents the merging +of extent maps which allows to save memory, and can make defrag not +merging extents too (that will be addressed in the next patch). + +Fixes: 199257a78bb0 ("btrfs: defrag: don't use merged extent map for their generation check") +CC: stable@vger.kernel.org # 6.1+ +Reviewed-by: Qu Wenruo +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Sasha Levin +--- + fs/btrfs/extent_map.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c +index 72ae8f64482c6..b56ec83bf9528 100644 +--- a/fs/btrfs/extent_map.c ++++ b/fs/btrfs/extent_map.c +@@ -227,7 +227,12 @@ static bool mergeable_maps(const struct extent_map *prev, const struct extent_ma + if (extent_map_end(prev) != next->start) + return false; + +- if (prev->flags != next->flags) ++ /* ++ * The merged flag is not an on-disk flag, it just indicates we had the ++ * extent maps of 2 (or more) adjacent extents merged, so factor it out. ++ */ ++ if ((prev->flags & ~EXTENT_FLAG_MERGED) != ++ (next->flags & ~EXTENT_FLAG_MERGED)) + return false; + + if (next->disk_bytenr < EXTENT_MAP_LAST_BYTE - 1) +-- +2.43.0 + diff --git a/queue-6.11/btrfs-fix-use-after-free-of-block-device-file-in-__b.patch b/queue-6.11/btrfs-fix-use-after-free-of-block-device-file-in-__b.patch new file mode 100644 index 00000000000..cd2a20a5013 --- /dev/null +++ b/queue-6.11/btrfs-fix-use-after-free-of-block-device-file-in-__b.patch @@ -0,0 +1,78 @@ +From 51a566b2d28380de4fe53a243104685e9e5fbc55 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 21 Oct 2024 22:02:15 +0800 +Subject: btrfs: fix use-after-free of block device file in + __btrfs_free_extra_devids() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Zhihao Cheng + +[ Upstream commit aec8e6bf839101784f3ef037dcdb9432c3f32343 ] + +Mounting btrfs from two images (which have the same one fsid and two +different dev_uuids) in certain executing order may trigger an UAF for +variable 'device->bdev_file' in __btrfs_free_extra_devids(). And +following are the details: + +1. Attach image_1 to loop0, attach image_2 to loop1, and scan btrfs + devices by ioctl(BTRFS_IOC_SCAN_DEV): + + / btrfs_device_1 → loop0 + fs_device + \ btrfs_device_2 → loop1 +2. mount /dev/loop0 /mnt + btrfs_open_devices + btrfs_device_1->bdev_file = btrfs_get_bdev_and_sb(loop0) + btrfs_device_2->bdev_file = btrfs_get_bdev_and_sb(loop1) + btrfs_fill_super + open_ctree + fail: btrfs_close_devices // -ENOMEM + btrfs_close_bdev(btrfs_device_1) + fput(btrfs_device_1->bdev_file) + // btrfs_device_1->bdev_file is freed + btrfs_close_bdev(btrfs_device_2) + fput(btrfs_device_2->bdev_file) + +3. mount /dev/loop1 /mnt + btrfs_open_devices + btrfs_get_bdev_and_sb(&bdev_file) + // EIO, btrfs_device_1->bdev_file is not assigned, + // which points to a freed memory area + btrfs_device_2->bdev_file = btrfs_get_bdev_and_sb(loop1) + btrfs_fill_super + open_ctree + btrfs_free_extra_devids + if (btrfs_device_1->bdev_file) + fput(btrfs_device_1->bdev_file) // UAF ! + +Fix it by setting 'device->bdev_file' as 'NULL' after closing the +btrfs_device in btrfs_close_one_device(). + +Fixes: 142388194191 ("btrfs: do not background blkdev_put()") +CC: stable@vger.kernel.org # 4.19+ +Link: https://bugzilla.kernel.org/show_bug.cgi?id=219408 +Signed-off-by: Zhihao Cheng +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Sasha Levin +--- + fs/btrfs/volumes.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c +index fcedc43ef291a..0485143cd75e0 100644 +--- a/fs/btrfs/volumes.c ++++ b/fs/btrfs/volumes.c +@@ -1103,6 +1103,7 @@ static void btrfs_close_one_device(struct btrfs_device *device) + if (device->bdev) { + fs_devices->open_devices--; + device->bdev = NULL; ++ device->bdev_file = NULL; + } + clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state); + btrfs_destroy_dev_zone_info(device); +-- +2.43.0 + diff --git a/queue-6.11/btrfs-merge-btrfs_orig_bbio_end_io-into-btrfs_bio_en.patch b/queue-6.11/btrfs-merge-btrfs_orig_bbio_end_io-into-btrfs_bio_en.patch new file mode 100644 index 00000000000..5e4ebf9f358 --- /dev/null +++ b/queue-6.11/btrfs-merge-btrfs_orig_bbio_end_io-into-btrfs_bio_en.patch @@ -0,0 +1,152 @@ +From 7b6c09ddc9114f07ef62e41a6c9e79046df8adf1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 24 Aug 2024 19:36:43 +0930 +Subject: btrfs: merge btrfs_orig_bbio_end_io() into btrfs_bio_end_io() + +From: Qu Wenruo + +[ Upstream commit 9ca0e58cb752b09816f56f7a3147a39773d5e831 ] + +There are only two differences between the two functions: + +- btrfs_orig_bbio_end_io() does extra error propagation + This is mostly to allow tolerance for write errors. + +- btrfs_orig_bbio_end_io() does extra pending_ios check + This check can handle both the original bio, or the cloned one. + (All accounting happens in the original one). + +This makes btrfs_orig_bbio_end_io() a much safer call. +In fact we already had a double freeing error due to usage of +btrfs_bio_end_io() in the error path of btrfs_submit_chunk(). + +So just move the whole content of btrfs_orig_bbio_end_io() into +btrfs_bio_end_io(). + +For normal paths this brings no change, because they are already calling +btrfs_orig_bbio_end_io() in the first place. + +For error paths (not only inside bio.c but also external callers), this +change will introduce extra checks, especially for external callers, as +they will error out without submitting the btrfs bio. + +But considering it's already in the error path, such slower but much +safer checks are still an overall win. + +Signed-off-by: Qu Wenruo +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Stable-dep-of: d48e1dea3931 ("btrfs: fix error propagation of split bios") +Signed-off-by: Sasha Levin +--- + fs/btrfs/bio.c | 29 +++++++++++------------------ + 1 file changed, 11 insertions(+), 18 deletions(-) + +diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c +index b4e31ae17cd95..e93d376796a28 100644 +--- a/fs/btrfs/bio.c ++++ b/fs/btrfs/bio.c +@@ -120,12 +120,6 @@ static void __btrfs_bio_end_io(struct btrfs_bio *bbio) + } + } + +-void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status) +-{ +- bbio->bio.bi_status = status; +- __btrfs_bio_end_io(bbio); +-} +- + static void btrfs_orig_write_end_io(struct bio *bio); + + static void btrfs_bbio_propagate_error(struct btrfs_bio *bbio, +@@ -147,8 +141,9 @@ static void btrfs_bbio_propagate_error(struct btrfs_bio *bbio, + } + } + +-static void btrfs_orig_bbio_end_io(struct btrfs_bio *bbio) ++void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status) + { ++ bbio->bio.bi_status = status; + if (bbio->bio.bi_pool == &btrfs_clone_bioset) { + struct btrfs_bio *orig_bbio = bbio->private; + +@@ -179,7 +174,7 @@ static int prev_repair_mirror(struct btrfs_failed_bio *fbio, int cur_mirror) + static void btrfs_repair_done(struct btrfs_failed_bio *fbio) + { + if (atomic_dec_and_test(&fbio->repair_count)) { +- btrfs_orig_bbio_end_io(fbio->bbio); ++ btrfs_bio_end_io(fbio->bbio, fbio->bbio->bio.bi_status); + mempool_free(fbio, &btrfs_failed_bio_pool); + } + } +@@ -326,7 +321,7 @@ static void btrfs_check_read_bio(struct btrfs_bio *bbio, struct btrfs_device *de + if (fbio) + btrfs_repair_done(fbio); + else +- btrfs_orig_bbio_end_io(bbio); ++ btrfs_bio_end_io(bbio, bbio->bio.bi_status); + } + + static void btrfs_log_dev_io_error(struct bio *bio, struct btrfs_device *dev) +@@ -360,7 +355,7 @@ static void btrfs_end_bio_work(struct work_struct *work) + if (is_data_bbio(bbio)) + btrfs_check_read_bio(bbio, bbio->bio.bi_private); + else +- btrfs_orig_bbio_end_io(bbio); ++ btrfs_bio_end_io(bbio, bbio->bio.bi_status); + } + + static void btrfs_simple_end_io(struct bio *bio) +@@ -380,7 +375,7 @@ static void btrfs_simple_end_io(struct bio *bio) + } else { + if (bio_op(bio) == REQ_OP_ZONE_APPEND && !bio->bi_status) + btrfs_record_physical_zoned(bbio); +- btrfs_orig_bbio_end_io(bbio); ++ btrfs_bio_end_io(bbio, bbio->bio.bi_status); + } + } + +@@ -394,7 +389,7 @@ static void btrfs_raid56_end_io(struct bio *bio) + if (bio_op(bio) == REQ_OP_READ && is_data_bbio(bbio)) + btrfs_check_read_bio(bbio, NULL); + else +- btrfs_orig_bbio_end_io(bbio); ++ btrfs_bio_end_io(bbio, bbio->bio.bi_status); + + btrfs_put_bioc(bioc); + } +@@ -424,7 +419,7 @@ static void btrfs_orig_write_end_io(struct bio *bio) + if (bio_op(bio) == REQ_OP_ZONE_APPEND && !bio->bi_status) + stripe->physical = bio->bi_iter.bi_sector << SECTOR_SHIFT; + +- btrfs_orig_bbio_end_io(bbio); ++ btrfs_bio_end_io(bbio, bbio->bio.bi_status); + btrfs_put_bioc(bioc); + } + +@@ -593,7 +588,7 @@ static void run_one_async_done(struct btrfs_work *work, bool do_free) + + /* If an error occurred we just want to clean up the bio and move on. */ + if (bio->bi_status) { +- btrfs_orig_bbio_end_io(async->bbio); ++ btrfs_bio_end_io(async->bbio, async->bbio->bio.bi_status); + return; + } + +@@ -765,11 +760,9 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num) + ASSERT(bbio->bio.bi_pool == &btrfs_clone_bioset); + ASSERT(remaining); + +- remaining->bio.bi_status = ret; +- btrfs_orig_bbio_end_io(remaining); ++ btrfs_bio_end_io(remaining, ret); + } +- bbio->bio.bi_status = ret; +- btrfs_orig_bbio_end_io(bbio); ++ btrfs_bio_end_io(bbio, ret); + /* Do not submit another chunk */ + return true; + } +-- +2.43.0 + diff --git a/queue-6.11/cgroup-bpf-use-a-dedicated-workqueue-for-cgroup-bpf-.patch b/queue-6.11/cgroup-bpf-use-a-dedicated-workqueue-for-cgroup-bpf-.patch new file mode 100644 index 00000000000..295432e966b --- /dev/null +++ b/queue-6.11/cgroup-bpf-use-a-dedicated-workqueue-for-cgroup-bpf-.patch @@ -0,0 +1,154 @@ +From aae339a4d594e3581eb68221a4bff0aa58baf7de Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 8 Oct 2024 11:24:56 +0000 +Subject: cgroup/bpf: use a dedicated workqueue for cgroup bpf destruction + +From: Chen Ridong + +[ Upstream commit 117932eea99b729ee5d12783601a4f7f5fd58a23 ] + +A hung_task problem shown below was found: + +INFO: task kworker/0:0:8 blocked for more than 327 seconds. +"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. +Workqueue: events cgroup_bpf_release +Call Trace: + + __schedule+0x5a2/0x2050 + ? find_held_lock+0x33/0x100 + ? wq_worker_sleeping+0x9e/0xe0 + schedule+0x9f/0x180 + schedule_preempt_disabled+0x25/0x50 + __mutex_lock+0x512/0x740 + ? cgroup_bpf_release+0x1e/0x4d0 + ? cgroup_bpf_release+0xcf/0x4d0 + ? process_scheduled_works+0x161/0x8a0 + ? cgroup_bpf_release+0x1e/0x4d0 + ? mutex_lock_nested+0x2b/0x40 + ? __pfx_delay_tsc+0x10/0x10 + mutex_lock_nested+0x2b/0x40 + cgroup_bpf_release+0xcf/0x4d0 + ? process_scheduled_works+0x161/0x8a0 + ? trace_event_raw_event_workqueue_execute_start+0x64/0xd0 + ? process_scheduled_works+0x161/0x8a0 + process_scheduled_works+0x23a/0x8a0 + worker_thread+0x231/0x5b0 + ? __pfx_worker_thread+0x10/0x10 + kthread+0x14d/0x1c0 + ? __pfx_kthread+0x10/0x10 + ret_from_fork+0x59/0x70 + ? __pfx_kthread+0x10/0x10 + ret_from_fork_asm+0x1b/0x30 + + +This issue can be reproduced by the following pressuse test: +1. A large number of cpuset cgroups are deleted. +2. Set cpu on and off repeatly. +3. Set watchdog_thresh repeatly. +The scripts can be obtained at LINK mentioned above the signature. + +The reason for this issue is cgroup_mutex and cpu_hotplug_lock are +acquired in different tasks, which may lead to deadlock. +It can lead to a deadlock through the following steps: +1. A large number of cpusets are deleted asynchronously, which puts a + large number of cgroup_bpf_release works into system_wq. The max_active + of system_wq is WQ_DFL_ACTIVE(256). Consequently, all active works are + cgroup_bpf_release works, and many cgroup_bpf_release works will be put + into inactive queue. As illustrated in the diagram, there are 256 (in + the acvtive queue) + n (in the inactive queue) works. +2. Setting watchdog_thresh will hold cpu_hotplug_lock.read and put + smp_call_on_cpu work into system_wq. However step 1 has already filled + system_wq, 'sscs.work' is put into inactive queue. 'sscs.work' has + to wait until the works that were put into the inacvtive queue earlier + have executed (n cgroup_bpf_release), so it will be blocked for a while. +3. Cpu offline requires cpu_hotplug_lock.write, which is blocked by step 2. +4. Cpusets that were deleted at step 1 put cgroup_release works into + cgroup_destroy_wq. They are competing to get cgroup_mutex all the time. + When cgroup_metux is acqured by work at css_killed_work_fn, it will + call cpuset_css_offline, which needs to acqure cpu_hotplug_lock.read. + However, cpuset_css_offline will be blocked for step 3. +5. At this moment, there are 256 works in active queue that are + cgroup_bpf_release, they are attempting to acquire cgroup_mutex, and as + a result, all of them are blocked. Consequently, sscs.work can not be + executed. Ultimately, this situation leads to four processes being + blocked, forming a deadlock. + +system_wq(step1) WatchDog(step2) cpu offline(step3) cgroup_destroy_wq(step4) +... +2000+ cgroups deleted asyn +256 actives + n inactives + __lockup_detector_reconfigure + P(cpu_hotplug_lock.read) + put sscs.work into system_wq +256 + n + 1(sscs.work) +sscs.work wait to be executed + warting sscs.work finish + percpu_down_write + P(cpu_hotplug_lock.write) + ...blocking... + css_killed_work_fn + P(cgroup_mutex) + cpuset_css_offline + P(cpu_hotplug_lock.read) + ...blocking... +256 cgroup_bpf_release +mutex_lock(&cgroup_mutex); +..blocking... + +To fix the problem, place cgroup_bpf_release works on a dedicated +workqueue which can break the loop and solve the problem. System wqs are +for misc things which shouldn't create a large number of concurrent work +items. If something is going to generate >WQ_DFL_ACTIVE(256) concurrent +work items, it should use its own dedicated workqueue. + +Fixes: 4bfc0bb2c60e ("bpf: decouple the lifetime of cgroup_bpf from cgroup itself") +Cc: stable@vger.kernel.org # v5.3+ +Link: https://lore.kernel.org/cgroups/e90c32d2-2a85-4f28-9154-09c7d320cb60@huawei.com/T/#t +Tested-by: Vishal Chourasia +Signed-off-by: Chen Ridong +Signed-off-by: Tejun Heo +Signed-off-by: Sasha Levin +--- + kernel/bpf/cgroup.c | 19 ++++++++++++++++++- + 1 file changed, 18 insertions(+), 1 deletion(-) + +diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c +index 8ba73042a2395..479a2ea5d9af6 100644 +--- a/kernel/bpf/cgroup.c ++++ b/kernel/bpf/cgroup.c +@@ -24,6 +24,23 @@ + DEFINE_STATIC_KEY_ARRAY_FALSE(cgroup_bpf_enabled_key, MAX_CGROUP_BPF_ATTACH_TYPE); + EXPORT_SYMBOL(cgroup_bpf_enabled_key); + ++/* ++ * cgroup bpf destruction makes heavy use of work items and there can be a lot ++ * of concurrent destructions. Use a separate workqueue so that cgroup bpf ++ * destruction work items don't end up filling up max_active of system_wq ++ * which may lead to deadlock. ++ */ ++static struct workqueue_struct *cgroup_bpf_destroy_wq; ++ ++static int __init cgroup_bpf_wq_init(void) ++{ ++ cgroup_bpf_destroy_wq = alloc_workqueue("cgroup_bpf_destroy", 0, 1); ++ if (!cgroup_bpf_destroy_wq) ++ panic("Failed to alloc workqueue for cgroup bpf destroy.\n"); ++ return 0; ++} ++core_initcall(cgroup_bpf_wq_init); ++ + /* __always_inline is necessary to prevent indirect call through run_prog + * function pointer. + */ +@@ -334,7 +351,7 @@ static void cgroup_bpf_release_fn(struct percpu_ref *ref) + struct cgroup *cgrp = container_of(ref, struct cgroup, bpf.refcnt); + + INIT_WORK(&cgrp->bpf.release_work, cgroup_bpf_release); +- queue_work(system_wq, &cgrp->bpf.release_work); ++ queue_work(cgroup_bpf_destroy_wq, &cgrp->bpf.release_work); + } + + /* Get underlying bpf_prog of bpf_prog_list entry, regardless if it's through +-- +2.43.0 + diff --git a/queue-6.11/cxl-acpi-ensure-ports-ready-at-cxl_acpi_probe-return.patch b/queue-6.11/cxl-acpi-ensure-ports-ready-at-cxl_acpi_probe-return.patch new file mode 100644 index 00000000000..956db3a088b --- /dev/null +++ b/queue-6.11/cxl-acpi-ensure-ports-ready-at-cxl_acpi_probe-return.patch @@ -0,0 +1,52 @@ +From 766c3af9a1083669e7f2e58be4cbd65e0c861039 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 22 Oct 2024 18:43:40 -0700 +Subject: cxl/acpi: Ensure ports ready at cxl_acpi_probe() return + +From: Dan Williams + +[ Upstream commit 48f62d38a07d464a499fa834638afcfd2b68f852 ] + +In order to ensure root CXL ports are enabled upon cxl_acpi_probe() +when the 'cxl_port' driver is built as a module, arrange for the +module to be pre-loaded or built-in. + +The "Fixes:" but no "Cc: stable" on this patch reflects that the issue +is merely by inspection since the bug that triggered the discovery of +this potential problem [1] is fixed by other means. However, a stable +backport should do no harm. + +Fixes: 8dd2bc0f8e02 ("cxl/mem: Add the cxl_mem driver") +Link: http://lore.kernel.org/20241004212504.1246-1-gourry@gourry.net [1] +Signed-off-by: Dan Williams +Tested-by: Gregory Price +Reviewed-by: Jonathan Cameron +Reviewed-by: Ira Weiny +Link: https://patch.msgid.link/172964781969.81806.17276352414854540808.stgit@dwillia2-xfh.jf.intel.com +Signed-off-by: Ira Weiny +Signed-off-by: Sasha Levin +--- + drivers/cxl/acpi.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c +index 82b78e331d8ed..432b7cfd12a8e 100644 +--- a/drivers/cxl/acpi.c ++++ b/drivers/cxl/acpi.c +@@ -924,6 +924,13 @@ static void __exit cxl_acpi_exit(void) + + /* load before dax_hmem sees 'Soft Reserved' CXL ranges */ + subsys_initcall(cxl_acpi_init); ++ ++/* ++ * Arrange for host-bridge ports to be active synchronous with ++ * cxl_acpi_probe() exit. ++ */ ++MODULE_SOFTDEP("pre: cxl_port"); ++ + module_exit(cxl_acpi_exit); + MODULE_DESCRIPTION("CXL ACPI: Platform Support"); + MODULE_LICENSE("GPL v2"); +-- +2.43.0 + diff --git a/queue-6.11/cxl-port-fix-cxl_bus_rescan-vs-bus_rescan_devices.patch b/queue-6.11/cxl-port-fix-cxl_bus_rescan-vs-bus_rescan_devices.patch new file mode 100644 index 00000000000..700db3187d9 --- /dev/null +++ b/queue-6.11/cxl-port-fix-cxl_bus_rescan-vs-bus_rescan_devices.patch @@ -0,0 +1,65 @@ +From 6d020f71c952bb55609ca8d6c8c4d20280696f47 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 22 Oct 2024 18:43:32 -0700 +Subject: cxl/port: Fix cxl_bus_rescan() vs bus_rescan_devices() + +From: Dan Williams + +[ Upstream commit 3d6ebf16438de5d712030fefbb4182b46373d677 ] + +It turns out since its original introduction, pre-2.6.12, +bus_rescan_devices() has skipped devices that might be in the process of +attaching or detaching from their driver. For CXL this behavior is +unwanted and expects that cxl_bus_rescan() is a probe barrier. + +That behavior is simple enough to achieve with bus_for_each_dev() paired +with call to device_attach(), and it is unclear why bus_rescan_devices() +took the position of lockless consumption of dev->driver which is racy. + +The "Fixes:" but no "Cc: stable" on this patch reflects that the issue +is merely by inspection since the bug that triggered the discovery of +this potential problem [1] is fixed by other means. However, a stable +backport should do no harm. + +Fixes: 8dd2bc0f8e02 ("cxl/mem: Add the cxl_mem driver") +Link: http://lore.kernel.org/20241004212504.1246-1-gourry@gourry.net [1] +Signed-off-by: Dan Williams +Tested-by: Gregory Price +Reviewed-by: Jonathan Cameron +Reviewed-by: Ira Weiny +Link: https://patch.msgid.link/172964781104.81806.4277549800082443769.stgit@dwillia2-xfh.jf.intel.com +Signed-off-by: Ira Weiny +Signed-off-by: Sasha Levin +--- + drivers/cxl/core/port.c | 13 ++++++++++--- + 1 file changed, 10 insertions(+), 3 deletions(-) + +diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c +index 1d5007e3795a3..d3237346f6877 100644 +--- a/drivers/cxl/core/port.c ++++ b/drivers/cxl/core/port.c +@@ -2088,11 +2088,18 @@ static void cxl_bus_remove(struct device *dev) + + static struct workqueue_struct *cxl_bus_wq; + +-static void cxl_bus_rescan_queue(struct work_struct *w) ++static int cxl_rescan_attach(struct device *dev, void *data) + { +- int rc = bus_rescan_devices(&cxl_bus_type); ++ int rc = device_attach(dev); ++ ++ dev_vdbg(dev, "rescan: %s\n", rc ? "attach" : "detached"); + +- pr_debug("CXL bus rescan result: %d\n", rc); ++ return 0; ++} ++ ++static void cxl_bus_rescan_queue(struct work_struct *w) ++{ ++ bus_for_each_dev(&cxl_bus_type, NULL, NULL, cxl_rescan_attach); + } + + void cxl_bus_rescan(void) +-- +2.43.0 + diff --git a/queue-6.11/drm-amd-pm-vangogh-fix-kernel-memory-out-of-bounds-w.patch b/queue-6.11/drm-amd-pm-vangogh-fix-kernel-memory-out-of-bounds-w.patch new file mode 100644 index 00000000000..d168cece0fb --- /dev/null +++ b/queue-6.11/drm-amd-pm-vangogh-fix-kernel-memory-out-of-bounds-w.patch @@ -0,0 +1,100 @@ +From 9b1bad33fb30daa0772b6492902fe79557767f10 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 25 Oct 2024 15:56:39 +0100 +Subject: drm/amd/pm: Vangogh: Fix kernel memory out of bounds write + +From: Tvrtko Ursulin + +[ Upstream commit 4aa923a6e6406b43566ef6ac35a3d9a3197fa3e8 ] + +KASAN reports that the GPU metrics table allocated in +vangogh_tables_init() is not large enough for the memset done in +smu_cmn_init_soft_gpu_metrics(). Condensed report follows: + +[ 33.861314] BUG: KASAN: slab-out-of-bounds in smu_cmn_init_soft_gpu_metrics+0x73/0x200 [amdgpu] +[ 33.861799] Write of size 168 at addr ffff888129f59500 by task mangoapp/1067 +... +[ 33.861808] CPU: 6 UID: 1000 PID: 1067 Comm: mangoapp Tainted: G W 6.12.0-rc4 #356 1a56f59a8b5182eeaf67eb7cb8b13594dd23b544 +[ 33.861816] Tainted: [W]=WARN +[ 33.861818] Hardware name: Valve Galileo/Galileo, BIOS F7G0107 12/01/2023 +[ 33.861822] Call Trace: +[ 33.861826] +[ 33.861829] dump_stack_lvl+0x66/0x90 +[ 33.861838] print_report+0xce/0x620 +[ 33.861853] kasan_report+0xda/0x110 +[ 33.862794] kasan_check_range+0xfd/0x1a0 +[ 33.862799] __asan_memset+0x23/0x40 +[ 33.862803] smu_cmn_init_soft_gpu_metrics+0x73/0x200 [amdgpu 13b1bc364ec578808f676eba412c20eaab792779] +[ 33.863306] vangogh_get_gpu_metrics_v2_4+0x123/0xad0 [amdgpu 13b1bc364ec578808f676eba412c20eaab792779] +[ 33.864257] vangogh_common_get_gpu_metrics+0xb0c/0xbc0 [amdgpu 13b1bc364ec578808f676eba412c20eaab792779] +[ 33.865682] amdgpu_dpm_get_gpu_metrics+0xcc/0x110 [amdgpu 13b1bc364ec578808f676eba412c20eaab792779] +[ 33.866160] amdgpu_get_gpu_metrics+0x154/0x2d0 [amdgpu 13b1bc364ec578808f676eba412c20eaab792779] +[ 33.867135] dev_attr_show+0x43/0xc0 +[ 33.867147] sysfs_kf_seq_show+0x1f1/0x3b0 +[ 33.867155] seq_read_iter+0x3f8/0x1140 +[ 33.867173] vfs_read+0x76c/0xc50 +[ 33.867198] ksys_read+0xfb/0x1d0 +[ 33.867214] do_syscall_64+0x90/0x160 +... +[ 33.867353] Allocated by task 378 on cpu 7 at 22.794876s: +[ 33.867358] kasan_save_stack+0x33/0x50 +[ 33.867364] kasan_save_track+0x17/0x60 +[ 33.867367] __kasan_kmalloc+0x87/0x90 +[ 33.867371] vangogh_init_smc_tables+0x3f9/0x840 [amdgpu] +[ 33.867835] smu_sw_init+0xa32/0x1850 [amdgpu] +[ 33.868299] amdgpu_device_init+0x467b/0x8d90 [amdgpu] +[ 33.868733] amdgpu_driver_load_kms+0x19/0xf0 [amdgpu] +[ 33.869167] amdgpu_pci_probe+0x2d6/0xcd0 [amdgpu] +[ 33.869608] local_pci_probe+0xda/0x180 +[ 33.869614] pci_device_probe+0x43f/0x6b0 + +Empirically we can confirm that the former allocates 152 bytes for the +table, while the latter memsets the 168 large block. + +Root cause appears that when GPU metrics tables for v2_4 parts were added +it was not considered to enlarge the table to fit. + +The fix in this patch is rather "brute force" and perhaps later should be +done in a smarter way, by extracting and consolidating the part version to +size logic to a common helper, instead of brute forcing the largest +possible allocation. Nevertheless, for now this works and fixes the out of +bounds write. + +v2: + * Drop impossible v3_0 case. (Mario) + +Signed-off-by: Tvrtko Ursulin +Fixes: 41cec40bc9ba ("drm/amd/pm: Vangogh: Add new gpu_metrics_v2_4 to acquire gpu_metrics") +Cc: Mario Limonciello +Cc: Evan Quan +Cc: Wenyou Yang +Cc: Alex Deucher +Reviewed-by: Mario Limonciello +Link: https://lore.kernel.org/r/20241025145639.19124-1-tursulin@igalia.com +Signed-off-by: Mario Limonciello +Signed-off-by: Alex Deucher +(cherry picked from commit 0880f58f9609f0200483a49429af0f050d281703) +Cc: stable@vger.kernel.org # v6.6+ +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c +index 22737b11b1bfb..1fe020f1f4dbe 100644 +--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c ++++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c +@@ -242,7 +242,9 @@ static int vangogh_tables_init(struct smu_context *smu) + goto err0_out; + smu_table->metrics_time = 0; + +- smu_table->gpu_metrics_table_size = max(sizeof(struct gpu_metrics_v2_3), sizeof(struct gpu_metrics_v2_2)); ++ smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v2_2); ++ smu_table->gpu_metrics_table_size = max(smu_table->gpu_metrics_table_size, sizeof(struct gpu_metrics_v2_3)); ++ smu_table->gpu_metrics_table_size = max(smu_table->gpu_metrics_table_size, sizeof(struct gpu_metrics_v2_4)); + smu_table->gpu_metrics_table = kzalloc(smu_table->gpu_metrics_table_size, GFP_KERNEL); + if (!smu_table->gpu_metrics_table) + goto err1_out; +-- +2.43.0 + diff --git a/queue-6.11/drm-amdgpu-smu13-fix-profile-reporting.patch b/queue-6.11/drm-amdgpu-smu13-fix-profile-reporting.patch new file mode 100644 index 00000000000..95b5b269e04 --- /dev/null +++ b/queue-6.11/drm-amdgpu-smu13-fix-profile-reporting.patch @@ -0,0 +1,62 @@ +From a3cfd48d8a538fd8f5bf83fc3e0dea6ddbb80b59 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 23 Oct 2024 09:13:21 -0400 +Subject: drm/amdgpu/smu13: fix profile reporting + +From: Alex Deucher + +[ Upstream commit 935abb86a95def8c20dbb184ce30051db168e541 ] + +The following 3 commits landed in parallel: +commit d7d2688bf4ea ("drm/amd/pm: update workload mask after the setting") +commit 7a1613e47e65 ("drm/amdgpu/smu13: always apply the powersave optimization") +commit 7c210ca5a2d7 ("drm/amdgpu: handle default profile on on devices without fullscreen 3D") +While everything is set correctly, this caused the profile to be +reported incorrectly because both the powersave and fullscreen3d bits +were set in the mask and when the driver prints the profile, it looks +for the first bit set. + +Fixes: d7d2688bf4ea ("drm/amd/pm: update workload mask after the setting") +Reviewed-by: Kenneth Feng +Signed-off-by: Alex Deucher +(cherry picked from commit ecfe9b237687a55d596fff0650ccc8cc455edd3f) +Cc: stable@vger.kernel.org +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +index cb923e33fd6fc..d53e162dcd8de 100644 +--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c ++++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +@@ -2485,7 +2485,7 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu, + DpmActivityMonitorCoeffInt_t *activity_monitor = + &(activity_monitor_external.DpmActivityMonitorCoeffInt); + int workload_type, ret = 0; +- u32 workload_mask; ++ u32 workload_mask, selected_workload_mask; + + smu->power_profile_mode = input[size]; + +@@ -2552,7 +2552,7 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu, + if (workload_type < 0) + return -EINVAL; + +- workload_mask = 1 << workload_type; ++ selected_workload_mask = workload_mask = 1 << workload_type; + + /* Add optimizations for SMU13.0.0/10. Reuse the power saving profile */ + if ((amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 0) && +@@ -2572,7 +2572,7 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu, + workload_mask, + NULL); + if (!ret) +- smu->workload_mask = workload_mask; ++ smu->workload_mask = selected_workload_mask; + + return ret; + } +-- +2.43.0 + diff --git a/queue-6.11/drm-connector-hdmi-fix-memory-leak-in-drm_display_mo.patch b/queue-6.11/drm-connector-hdmi-fix-memory-leak-in-drm_display_mo.patch new file mode 100644 index 00000000000..0bd8d75ec59 --- /dev/null +++ b/queue-6.11/drm-connector-hdmi-fix-memory-leak-in-drm_display_mo.patch @@ -0,0 +1,162 @@ +From 4d87c86a04ab2ae04c89669954e50908ab48df70 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 30 Oct 2024 10:35:03 +0800 +Subject: drm/connector: hdmi: Fix memory leak in + drm_display_mode_from_cea_vic() + +From: Jinjie Ruan + +[ Upstream commit 926163342a2e7595d950e84c17c693b1272bd491 ] + +modprobe drm_connector_test and then rmmod drm_connector_test, +the following memory leak occurs. + +The `mode` allocated in drm_mode_duplicate() called by +drm_display_mode_from_cea_vic() is not freed, which cause the memory leak: + + unreferenced object 0xffffff80cb0ee400 (size 128): + comm "kunit_try_catch", pid 1948, jiffies 4294950339 + hex dump (first 32 bytes): + 14 44 02 00 80 07 d8 07 04 08 98 08 00 00 38 04 .D............8. + 3c 04 41 04 65 04 00 00 05 00 00 00 00 00 00 00 <.A.e........... + backtrace (crc 90e9585c): + [<00000000ec42e3d7>] kmemleak_alloc+0x34/0x40 + [<00000000d0ef055a>] __kmalloc_cache_noprof+0x26c/0x2f4 + [<00000000c2062161>] drm_mode_duplicate+0x44/0x19c + [<00000000f96c74aa>] drm_display_mode_from_cea_vic+0x88/0x98 + [<00000000d8f2c8b4>] 0xffffffdc982a4868 + [<000000005d164dbc>] kunit_try_run_case+0x13c/0x3ac + [<000000006fb23398>] kunit_generic_run_threadfn_adapter+0x80/0xec + [<000000006ea56ca0>] kthread+0x2e8/0x374 + [<000000000676063f>] ret_from_fork+0x10/0x20 + ...... + +Free `mode` by using drm_kunit_display_mode_from_cea_vic() +to fix it. + +Cc: stable@vger.kernel.org +Fixes: abb6f74973e2 ("drm/tests: Add HDMI TDMS character rate tests") +Acked-by: Maxime Ripard +Signed-off-by: Jinjie Ruan +Link: https://patchwork.freedesktop.org/patch/msgid/20241030023504.530425-3-ruanjinjie@huawei.com +Signed-off-by: Maxime Ripard +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/tests/drm_connector_test.c | 24 +++++++++++----------- + 1 file changed, 12 insertions(+), 12 deletions(-) + +diff --git a/drivers/gpu/drm/tests/drm_connector_test.c b/drivers/gpu/drm/tests/drm_connector_test.c +index 15e36a8db6858..6bba97d0be88e 100644 +--- a/drivers/gpu/drm/tests/drm_connector_test.c ++++ b/drivers/gpu/drm/tests/drm_connector_test.c +@@ -996,7 +996,7 @@ static void drm_test_drm_hdmi_compute_mode_clock_rgb(struct kunit *test) + unsigned long long rate; + struct drm_device *drm = &priv->drm; + +- mode = drm_display_mode_from_cea_vic(drm, 16); ++ mode = drm_kunit_display_mode_from_cea_vic(test, drm, 16); + KUNIT_ASSERT_NOT_NULL(test, mode); + + KUNIT_ASSERT_FALSE(test, mode->flags & DRM_MODE_FLAG_DBLCLK); +@@ -1017,7 +1017,7 @@ static void drm_test_drm_hdmi_compute_mode_clock_rgb_10bpc(struct kunit *test) + unsigned long long rate; + struct drm_device *drm = &priv->drm; + +- mode = drm_display_mode_from_cea_vic(drm, 16); ++ mode = drm_kunit_display_mode_from_cea_vic(test, drm, 16); + KUNIT_ASSERT_NOT_NULL(test, mode); + + KUNIT_ASSERT_FALSE(test, mode->flags & DRM_MODE_FLAG_DBLCLK); +@@ -1038,7 +1038,7 @@ static void drm_test_drm_hdmi_compute_mode_clock_rgb_10bpc_vic_1(struct kunit *t + unsigned long long rate; + struct drm_device *drm = &priv->drm; + +- mode = drm_display_mode_from_cea_vic(drm, 1); ++ mode = drm_kunit_display_mode_from_cea_vic(test, drm, 1); + KUNIT_ASSERT_NOT_NULL(test, mode); + + rate = drm_hdmi_compute_mode_clock(mode, 10, HDMI_COLORSPACE_RGB); +@@ -1056,7 +1056,7 @@ static void drm_test_drm_hdmi_compute_mode_clock_rgb_12bpc(struct kunit *test) + unsigned long long rate; + struct drm_device *drm = &priv->drm; + +- mode = drm_display_mode_from_cea_vic(drm, 16); ++ mode = drm_kunit_display_mode_from_cea_vic(test, drm, 16); + KUNIT_ASSERT_NOT_NULL(test, mode); + + KUNIT_ASSERT_FALSE(test, mode->flags & DRM_MODE_FLAG_DBLCLK); +@@ -1077,7 +1077,7 @@ static void drm_test_drm_hdmi_compute_mode_clock_rgb_12bpc_vic_1(struct kunit *t + unsigned long long rate; + struct drm_device *drm = &priv->drm; + +- mode = drm_display_mode_from_cea_vic(drm, 1); ++ mode = drm_kunit_display_mode_from_cea_vic(test, drm, 1); + KUNIT_ASSERT_NOT_NULL(test, mode); + + rate = drm_hdmi_compute_mode_clock(mode, 12, HDMI_COLORSPACE_RGB); +@@ -1095,7 +1095,7 @@ static void drm_test_drm_hdmi_compute_mode_clock_rgb_double(struct kunit *test) + unsigned long long rate; + struct drm_device *drm = &priv->drm; + +- mode = drm_display_mode_from_cea_vic(drm, 6); ++ mode = drm_kunit_display_mode_from_cea_vic(test, drm, 6); + KUNIT_ASSERT_NOT_NULL(test, mode); + + KUNIT_ASSERT_TRUE(test, mode->flags & DRM_MODE_FLAG_DBLCLK); +@@ -1118,7 +1118,7 @@ static void drm_test_connector_hdmi_compute_mode_clock_yuv420_valid(struct kunit + unsigned long long rate; + unsigned int vic = *(unsigned int *)test->param_value; + +- mode = drm_display_mode_from_cea_vic(drm, vic); ++ mode = drm_kunit_display_mode_from_cea_vic(test, drm, vic); + KUNIT_ASSERT_NOT_NULL(test, mode); + + KUNIT_ASSERT_FALSE(test, mode->flags & DRM_MODE_FLAG_DBLCLK); +@@ -1155,7 +1155,7 @@ static void drm_test_connector_hdmi_compute_mode_clock_yuv420_10_bpc(struct kuni + drm_hdmi_compute_mode_clock_yuv420_vic_valid_tests[0]; + unsigned long long rate; + +- mode = drm_display_mode_from_cea_vic(drm, vic); ++ mode = drm_kunit_display_mode_from_cea_vic(test, drm, vic); + KUNIT_ASSERT_NOT_NULL(test, mode); + + KUNIT_ASSERT_FALSE(test, mode->flags & DRM_MODE_FLAG_DBLCLK); +@@ -1180,7 +1180,7 @@ static void drm_test_connector_hdmi_compute_mode_clock_yuv420_12_bpc(struct kuni + drm_hdmi_compute_mode_clock_yuv420_vic_valid_tests[0]; + unsigned long long rate; + +- mode = drm_display_mode_from_cea_vic(drm, vic); ++ mode = drm_kunit_display_mode_from_cea_vic(test, drm, vic); + KUNIT_ASSERT_NOT_NULL(test, mode); + + KUNIT_ASSERT_FALSE(test, mode->flags & DRM_MODE_FLAG_DBLCLK); +@@ -1203,7 +1203,7 @@ static void drm_test_connector_hdmi_compute_mode_clock_yuv422_8_bpc(struct kunit + struct drm_device *drm = &priv->drm; + unsigned long long rate; + +- mode = drm_display_mode_from_cea_vic(drm, 16); ++ mode = drm_kunit_display_mode_from_cea_vic(test, drm, 16); + KUNIT_ASSERT_NOT_NULL(test, mode); + + KUNIT_ASSERT_FALSE(test, mode->flags & DRM_MODE_FLAG_DBLCLK); +@@ -1225,7 +1225,7 @@ static void drm_test_connector_hdmi_compute_mode_clock_yuv422_10_bpc(struct kuni + struct drm_device *drm = &priv->drm; + unsigned long long rate; + +- mode = drm_display_mode_from_cea_vic(drm, 16); ++ mode = drm_kunit_display_mode_from_cea_vic(test, drm, 16); + KUNIT_ASSERT_NOT_NULL(test, mode); + + KUNIT_ASSERT_FALSE(test, mode->flags & DRM_MODE_FLAG_DBLCLK); +@@ -1247,7 +1247,7 @@ static void drm_test_connector_hdmi_compute_mode_clock_yuv422_12_bpc(struct kuni + struct drm_device *drm = &priv->drm; + unsigned long long rate; + +- mode = drm_display_mode_from_cea_vic(drm, 16); ++ mode = drm_kunit_display_mode_from_cea_vic(test, drm, 16); + KUNIT_ASSERT_NOT_NULL(test, mode); + + KUNIT_ASSERT_FALSE(test, mode->flags & DRM_MODE_FLAG_DBLCLK); +-- +2.43.0 + diff --git a/queue-6.11/drm-tests-hdmi-fix-memory-leaks-in-drm_display_mode_.patch b/queue-6.11/drm-tests-hdmi-fix-memory-leaks-in-drm_display_mode_.patch new file mode 100644 index 00000000000..0e552452345 --- /dev/null +++ b/queue-6.11/drm-tests-hdmi-fix-memory-leaks-in-drm_display_mode_.patch @@ -0,0 +1,89 @@ +From c6d8355bd7745dd50292fd94fe86e4eb519109cb Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 30 Oct 2024 10:35:04 +0800 +Subject: drm/tests: hdmi: Fix memory leaks in drm_display_mode_from_cea_vic() + +From: Jinjie Ruan + +[ Upstream commit add4163aca0d4a86e9fe4aa513865e4237db8aef ] + +modprobe drm_hdmi_state_helper_test and then rmmod it, the following +memory leak occurs. + +The `mode` allocated in drm_mode_duplicate() called by +drm_display_mode_from_cea_vic() is not freed, which cause the memory leak: + + unreferenced object 0xffffff80ccd18100 (size 128): + comm "kunit_try_catch", pid 1851, jiffies 4295059695 + hex dump (first 32 bytes): + 57 62 00 00 80 02 90 02 f0 02 20 03 00 00 e0 01 Wb........ ..... + ea 01 ec 01 0d 02 00 00 0a 00 00 00 00 00 00 00 ................ + backtrace (crc c2f1aa95): + [<000000000f10b11b>] kmemleak_alloc+0x34/0x40 + [<000000001cd4cf73>] __kmalloc_cache_noprof+0x26c/0x2f4 + [<00000000f1f3cffa>] drm_mode_duplicate+0x44/0x19c + [<000000008cbeef13>] drm_display_mode_from_cea_vic+0x88/0x98 + [<0000000019daaacf>] 0xffffffedc11ae69c + [<000000000aad0f85>] kunit_try_run_case+0x13c/0x3ac + [<00000000a9210bac>] kunit_generic_run_threadfn_adapter+0x80/0xec + [<000000000a0b2e9e>] kthread+0x2e8/0x374 + [<00000000bd668858>] ret_from_fork+0x10/0x20 + ...... + +Free `mode` by using drm_kunit_display_mode_from_cea_vic() +to fix it. + +Cc: stable@vger.kernel.org +Fixes: 4af70f19e559 ("drm/tests: Add RGB Quantization tests") +Acked-by: Maxime Ripard +Signed-off-by: Jinjie Ruan +Link: https://patchwork.freedesktop.org/patch/msgid/20241030023504.530425-4-ruanjinjie@huawei.com +Signed-off-by: Maxime Ripard +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c b/drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c +index 34ee95d41f296..294773342e710 100644 +--- a/drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c ++++ b/drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c +@@ -441,7 +441,7 @@ static void drm_test_check_broadcast_rgb_auto_cea_mode_vic_1(struct kunit *test) + ctx = drm_kunit_helper_acquire_ctx_alloc(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + +- mode = drm_display_mode_from_cea_vic(drm, 1); ++ mode = drm_kunit_display_mode_from_cea_vic(test, drm, 1); + KUNIT_ASSERT_NOT_NULL(test, mode); + + drm = &priv->drm; +@@ -555,7 +555,7 @@ static void drm_test_check_broadcast_rgb_full_cea_mode_vic_1(struct kunit *test) + ctx = drm_kunit_helper_acquire_ctx_alloc(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + +- mode = drm_display_mode_from_cea_vic(drm, 1); ++ mode = drm_kunit_display_mode_from_cea_vic(test, drm, 1); + KUNIT_ASSERT_NOT_NULL(test, mode); + + drm = &priv->drm; +@@ -671,7 +671,7 @@ static void drm_test_check_broadcast_rgb_limited_cea_mode_vic_1(struct kunit *te + ctx = drm_kunit_helper_acquire_ctx_alloc(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + +- mode = drm_display_mode_from_cea_vic(drm, 1); ++ mode = drm_kunit_display_mode_from_cea_vic(test, drm, 1); + KUNIT_ASSERT_NOT_NULL(test, mode); + + drm = &priv->drm; +@@ -1263,7 +1263,7 @@ static void drm_test_check_output_bpc_format_vic_1(struct kunit *test) + ctx = drm_kunit_helper_acquire_ctx_alloc(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + +- mode = drm_display_mode_from_cea_vic(drm, 1); ++ mode = drm_kunit_display_mode_from_cea_vic(test, drm, 1); + KUNIT_ASSERT_NOT_NULL(test, mode); + + /* +-- +2.43.0 + diff --git a/queue-6.11/drm-tests-helpers-add-helper-for-drm_display_mode_fr.patch b/queue-6.11/drm-tests-helpers-add-helper-for-drm_display_mode_fr.patch new file mode 100644 index 00000000000..ea095a3513d --- /dev/null +++ b/queue-6.11/drm-tests-helpers-add-helper-for-drm_display_mode_fr.patch @@ -0,0 +1,102 @@ +From 7d8d06f477ce63209b81a74c217ce2ce4b92c282 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 30 Oct 2024 10:35:02 +0800 +Subject: drm/tests: helpers: Add helper for drm_display_mode_from_cea_vic() + +From: Jinjie Ruan + +[ Upstream commit caa714f86699bcfb01aa2d698db12d91af7d0d81 ] + +As Maxime suggested, add a new helper +drm_kunit_display_mode_from_cea_vic(), it can replace the direct call +of drm_display_mode_from_cea_vic(), and it will help solving +the `mode` memory leaks. + +Acked-by: Maxime Ripard +Suggested-by: Maxime Ripard +Signed-off-by: Jinjie Ruan +Link: https://patchwork.freedesktop.org/patch/msgid/20241030023504.530425-2-ruanjinjie@huawei.com +Signed-off-by: Maxime Ripard +Stable-dep-of: 926163342a2e ("drm/connector: hdmi: Fix memory leak in drm_display_mode_from_cea_vic()") +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/tests/drm_kunit_helpers.c | 42 +++++++++++++++++++++++ + include/drm/drm_kunit_helpers.h | 4 +++ + 2 files changed, 46 insertions(+) + +diff --git a/drivers/gpu/drm/tests/drm_kunit_helpers.c b/drivers/gpu/drm/tests/drm_kunit_helpers.c +index aa62719dab0e4..04a6b8cc62ac6 100644 +--- a/drivers/gpu/drm/tests/drm_kunit_helpers.c ++++ b/drivers/gpu/drm/tests/drm_kunit_helpers.c +@@ -3,6 +3,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -311,6 +312,47 @@ drm_kunit_helper_create_crtc(struct kunit *test, + } + EXPORT_SYMBOL_GPL(drm_kunit_helper_create_crtc); + ++static void kunit_action_drm_mode_destroy(void *ptr) ++{ ++ struct drm_display_mode *mode = ptr; ++ ++ drm_mode_destroy(NULL, mode); ++} ++ ++/** ++ * drm_kunit_display_mode_from_cea_vic() - return a mode for CEA VIC ++ for a KUnit test ++ * @test: The test context object ++ * @dev: DRM device ++ * @video_code: CEA VIC of the mode ++ * ++ * Creates a new mode matching the specified CEA VIC for a KUnit test. ++ * ++ * Resources will be cleaned up automatically. ++ * ++ * Returns: A new drm_display_mode on success or NULL on failure ++ */ ++struct drm_display_mode * ++drm_kunit_display_mode_from_cea_vic(struct kunit *test, struct drm_device *dev, ++ u8 video_code) ++{ ++ struct drm_display_mode *mode; ++ int ret; ++ ++ mode = drm_display_mode_from_cea_vic(dev, video_code); ++ if (!mode) ++ return NULL; ++ ++ ret = kunit_add_action_or_reset(test, ++ kunit_action_drm_mode_destroy, ++ mode); ++ if (ret) ++ return NULL; ++ ++ return mode; ++} ++EXPORT_SYMBOL_GPL(drm_kunit_display_mode_from_cea_vic); ++ + MODULE_AUTHOR("Maxime Ripard "); + MODULE_DESCRIPTION("KUnit test suite helper functions"); + MODULE_LICENSE("GPL"); +diff --git a/include/drm/drm_kunit_helpers.h b/include/drm/drm_kunit_helpers.h +index e7cc17ee4934a..afdd46ef04f70 100644 +--- a/include/drm/drm_kunit_helpers.h ++++ b/include/drm/drm_kunit_helpers.h +@@ -120,4 +120,8 @@ drm_kunit_helper_create_crtc(struct kunit *test, + const struct drm_crtc_funcs *funcs, + const struct drm_crtc_helper_funcs *helper_funcs); + ++struct drm_display_mode * ++drm_kunit_display_mode_from_cea_vic(struct kunit *test, struct drm_device *dev, ++ u8 video_code); ++ + #endif // DRM_KUNIT_HELPERS_H_ +-- +2.43.0 + diff --git a/queue-6.11/drm-xe-add-mmio-read-before-ggtt-invalidate.patch b/queue-6.11/drm-xe-add-mmio-read-before-ggtt-invalidate.patch new file mode 100644 index 00000000000..3fea0bb5cfa --- /dev/null +++ b/queue-6.11/drm-xe-add-mmio-read-before-ggtt-invalidate.patch @@ -0,0 +1,62 @@ +From ef77231ea76a679ce2c45b3633fc73d1968865ba Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 23 Oct 2024 15:12:00 -0700 +Subject: drm/xe: Add mmio read before GGTT invalidate +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Matthew Brost + +[ Upstream commit 993ca0eccec65a2cacc3cefb15d35ffadc6f00fb ] + +On LNL without a mmio read before a GGTT invalidate the GuC can +incorrectly read the GGTT scratch page upon next access leading to jobs +not getting scheduled. A mmio read before a GGTT invalidate seems to fix +this. Since a GGTT invalidate is not a hot code path, blindly do a mmio +read before each GGTT invalidate. + +Cc: John Harrison +Cc: Daniele Ceraolo Spurio +Cc: Thomas Hellström +Cc: Lucas De Marchi +Cc: stable@vger.kernel.org +Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") +Reported-by: Paulo Zanoni +Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/3164 +Signed-off-by: Matthew Brost +Reviewed-by: Lucas De Marchi +Link: https://patchwork.freedesktop.org/patch/msgid/20241023221200.1797832-1-matthew.brost@intel.com +Signed-off-by: Lucas De Marchi +(cherry picked from commit 5a710196883e0ac019ac6df2a6d79c16ad3c32fa) +[ Fix conflict with mmio vs gt argument ] +Signed-off-by: Lucas De Marchi +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/xe/xe_ggtt.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c +index 0cdbc1296e885..226542bb1442e 100644 +--- a/drivers/gpu/drm/xe/xe_ggtt.c ++++ b/drivers/gpu/drm/xe/xe_ggtt.c +@@ -309,6 +309,16 @@ static void ggtt_invalidate_gt_tlb(struct xe_gt *gt) + + static void xe_ggtt_invalidate(struct xe_ggtt *ggtt) + { ++ struct xe_device *xe = tile_to_xe(ggtt->tile); ++ ++ /* ++ * XXX: Barrier for GGTT pages. Unsure exactly why this required but ++ * without this LNL is having issues with the GuC reading scratch page ++ * vs. correct GGTT page. Not particularly a hot code path so blindly ++ * do a mmio read here which results in GuC reading correct GGTT page. ++ */ ++ xe_mmio_read32(xe_root_mmio_gt(xe), VF_CAP_REG); ++ + /* Each GT in a tile has its own TLB to cache GGTT lookups */ + ggtt_invalidate_gt_tlb(ggtt->tile->primary_gt); + ggtt_invalidate_gt_tlb(ggtt->tile->media_gt); +-- +2.43.0 + diff --git a/queue-6.11/drm-xe-don-t-short-circuit-tdr-on-jobs-not-started.patch b/queue-6.11/drm-xe-don-t-short-circuit-tdr-on-jobs-not-started.patch new file mode 100644 index 00000000000..1c4c35a3449 --- /dev/null +++ b/queue-6.11/drm-xe-don-t-short-circuit-tdr-on-jobs-not-started.patch @@ -0,0 +1,83 @@ +From 133a844e2efa74fa38eb3be012c531cbefec816a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 25 Oct 2024 14:43:29 -0700 +Subject: drm/xe: Don't short circuit TDR on jobs not started + +From: Matthew Brost + +[ Upstream commit fe05cee4d9533892210e1ee90147175d87e7c053 ] + +Short circuiting TDR on jobs not started is an optimization which is not +required. On LNL we are facing an issue where jobs do not get scheduled +by the GuC if it misses a GGTT page update. When this occurs let the TDR +fire, toggle the scheduling which may get the job unstuck, and print a +warning message. If the TDR fires twice on job that hasn't started, +timeout the job. + +v2: + - Add warning message (Paulo) + - Add fixes tag (Paulo) + - Timeout job which hasn't started after TDR firing twice +v3: + - Include local change +v4: + - Short circuit check_timeout on job not started + - use warn level rather than notice (Paulo) + +Fixes: 7ddb9403dd74 ("drm/xe: Sample ctx timestamp to determine if jobs have timed out") +Cc: stable@vger.kernel.org +Cc: Paulo Zanoni +Signed-off-by: Matthew Brost +Reviewed-by: Lucas De Marchi +Link: https://patchwork.freedesktop.org/patch/msgid/20241025214330.2010521-2-matthew.brost@intel.com +Signed-off-by: Lucas De Marchi +(cherry picked from commit 35d25a4a0012e690ef0cc4c5440231176db595cc) +Signed-off-by: Lucas De Marchi +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/xe/xe_guc_submit.c | 18 ++++++++++++------ + 1 file changed, 12 insertions(+), 6 deletions(-) + +diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c +index dfd809e7bbd25..cbdd44567d107 100644 +--- a/drivers/gpu/drm/xe/xe_guc_submit.c ++++ b/drivers/gpu/drm/xe/xe_guc_submit.c +@@ -989,12 +989,22 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w) + static bool check_timeout(struct xe_exec_queue *q, struct xe_sched_job *job) + { + struct xe_gt *gt = guc_to_gt(exec_queue_to_guc(q)); +- u32 ctx_timestamp = xe_lrc_ctx_timestamp(q->lrc[0]); +- u32 ctx_job_timestamp = xe_lrc_ctx_job_timestamp(q->lrc[0]); ++ u32 ctx_timestamp, ctx_job_timestamp; + u32 timeout_ms = q->sched_props.job_timeout_ms; + u32 diff; + u64 running_time_ms; + ++ if (!xe_sched_job_started(job)) { ++ xe_gt_warn(gt, "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, not started", ++ xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), ++ q->guc->id); ++ ++ return xe_sched_invalidate_job(job, 2); ++ } ++ ++ ctx_timestamp = xe_lrc_ctx_timestamp(q->lrc[0]); ++ ctx_job_timestamp = xe_lrc_ctx_job_timestamp(q->lrc[0]); ++ + /* + * Counter wraps at ~223s at the usual 19.2MHz, be paranoid catch + * possible overflows with a high timeout. +@@ -1120,10 +1130,6 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) + exec_queue_killed_or_banned_or_wedged(q) || + exec_queue_destroyed(q); + +- /* Job hasn't started, can't be timed out */ +- if (!skip_timeout_check && !xe_sched_job_started(job)) +- goto rearm; +- + /* + * XXX: Sampling timeout doesn't work in wedged mode as we have to + * modify scheduling state to read timestamp. We could read the +-- +2.43.0 + diff --git a/queue-6.11/drm-xe-fix-register-definition-order-in-xe_regs.h.patch b/queue-6.11/drm-xe-fix-register-definition-order-in-xe_regs.h.patch new file mode 100644 index 00000000000..d5414d6321d --- /dev/null +++ b/queue-6.11/drm-xe-fix-register-definition-order-in-xe_regs.h.patch @@ -0,0 +1,44 @@ +From 6a5c8533fc687e76f07c2c8014ca7f5fa9c9f106 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 2 Jul 2024 20:37:02 +0200 +Subject: drm/xe: Fix register definition order in xe_regs.h + +From: Michal Wajdeczko + +[ Upstream commit 9dae9751c7b0086963f5cbb82424b5e4cf58f123 ] + +Swap XEHP_CLOCK_GATE_DIS(0x101014) with GU_DEBUG(x101018). + +Signed-off-by: Michal Wajdeczko +Reviewed-by: Matt Roper +Reviewed-by: Himal Prasad Ghimiray +Link: https://patchwork.freedesktop.org/patch/msgid/20240702183704.1022-2-michal.wajdeczko@intel.com +Stable-dep-of: 993ca0eccec6 ("drm/xe: Add mmio read before GGTT invalidate") +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/xe/regs/xe_regs.h | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h +index 23e33ec849022..23ecba38ed419 100644 +--- a/drivers/gpu/drm/xe/regs/xe_regs.h ++++ b/drivers/gpu/drm/xe/regs/xe_regs.h +@@ -24,12 +24,12 @@ + #define LMEM_INIT REG_BIT(7) + #define DRIVERFLR REG_BIT(31) + +-#define GU_DEBUG XE_REG(0x101018) +-#define DRIVERFLR_STATUS REG_BIT(31) +- + #define XEHP_CLOCK_GATE_DIS XE_REG(0x101014) + #define SGSI_SIDECLK_DIS REG_BIT(17) + ++#define GU_DEBUG XE_REG(0x101018) ++#define DRIVERFLR_STATUS REG_BIT(31) ++ + #define XEHP_MTCFG_ADDR XE_REG(0x101800) + #define TILE_COUNT REG_GENMASK(15, 8) + +-- +2.43.0 + diff --git a/queue-6.11/drm-xe-kill-regs-xe_sriov_regs.h.patch b/queue-6.11/drm-xe-kill-regs-xe_sriov_regs.h.patch new file mode 100644 index 00000000000..10a4a89aaa8 --- /dev/null +++ b/queue-6.11/drm-xe-kill-regs-xe_sriov_regs.h.patch @@ -0,0 +1,147 @@ +From c69ebdccd5fb1b3314bd863e30c4404e1e790b04 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 2 Jul 2024 20:37:03 +0200 +Subject: drm/xe: Kill regs/xe_sriov_regs.h + +From: Michal Wajdeczko + +[ Upstream commit 466a6c3855cf00653c14a92a6e9f8ae50077b77d ] + +There is no real benefit to maintain a separate file. The register +definitions related to SR-IOV can be placed in existing headers. + +Signed-off-by: Michal Wajdeczko +Reviewed-by: Matt Roper +Reviewed-by: Himal Prasad Ghimiray +Link: https://patchwork.freedesktop.org/patch/msgid/20240702183704.1022-3-michal.wajdeczko@intel.com +Stable-dep-of: 993ca0eccec6 ("drm/xe: Add mmio read before GGTT invalidate") +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/xe/regs/xe_gt_regs.h | 6 ++++++ + drivers/gpu/drm/xe/regs/xe_regs.h | 6 ++++++ + drivers/gpu/drm/xe/regs/xe_sriov_regs.h | 23 ----------------------- + drivers/gpu/drm/xe/xe_gt_sriov_pf.c | 2 +- + drivers/gpu/drm/xe/xe_lmtt.c | 2 +- + drivers/gpu/drm/xe/xe_sriov.c | 2 +- + 6 files changed, 15 insertions(+), 26 deletions(-) + delete mode 100644 drivers/gpu/drm/xe/regs/xe_sriov_regs.h + +diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h +index 3c28650400586..a8c4998384d68 100644 +--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h ++++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h +@@ -91,6 +91,8 @@ + #define VE1_AUX_INV XE_REG(0x42b8) + #define AUX_INV REG_BIT(0) + ++#define XE2_LMEM_CFG XE_REG(0x48b0) ++ + #define XEHP_TILE_ADDR_RANGE(_idx) XE_REG_MCR(0x4900 + (_idx) * 4) + #define XEHP_FLAT_CCS_BASE_ADDR XE_REG_MCR(0x4910) + #define XEHP_FLAT_CCS_PTR REG_GENMASK(31, 8) +@@ -403,6 +405,10 @@ + #define INVALIDATION_BROADCAST_MODE_DIS REG_BIT(12) + #define GLOBAL_INVALIDATION_MODE REG_BIT(2) + ++#define LMEM_CFG XE_REG(0xcf58) ++#define LMEM_EN REG_BIT(31) ++#define LMTT_DIR_PTR REG_GENMASK(30, 0) /* in multiples of 64KB */ ++ + #define HALF_SLICE_CHICKEN5 XE_REG_MCR(0xe188, XE_REG_OPTION_MASKED) + #define DISABLE_SAMPLE_G_PERFORMANCE REG_BIT(0) + +diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h +index 23ecba38ed419..55bf47c990169 100644 +--- a/drivers/gpu/drm/xe/regs/xe_regs.h ++++ b/drivers/gpu/drm/xe/regs/xe_regs.h +@@ -30,6 +30,9 @@ + #define GU_DEBUG XE_REG(0x101018) + #define DRIVERFLR_STATUS REG_BIT(31) + ++#define VIRTUAL_CTRL_REG XE_REG(0x10108c) ++#define GUEST_GTT_UPDATE_EN REG_BIT(8) ++ + #define XEHP_MTCFG_ADDR XE_REG(0x101800) + #define TILE_COUNT REG_GENMASK(15, 8) + +@@ -66,6 +69,9 @@ + #define DISPLAY_IRQ REG_BIT(16) + #define GT_DW_IRQ(x) REG_BIT(x) + ++#define VF_CAP_REG XE_REG(0x1901f8, XE_REG_OPTION_VF) ++#define VF_CAP REG_BIT(0) ++ + #define PVC_RP_STATE_CAP XE_REG(0x281014) + + #endif +diff --git a/drivers/gpu/drm/xe/regs/xe_sriov_regs.h b/drivers/gpu/drm/xe/regs/xe_sriov_regs.h +deleted file mode 100644 +index 017b4ddd1ecf4..0000000000000 +--- a/drivers/gpu/drm/xe/regs/xe_sriov_regs.h ++++ /dev/null +@@ -1,23 +0,0 @@ +-/* SPDX-License-Identifier: MIT */ +-/* +- * Copyright © 2023 Intel Corporation +- */ +- +-#ifndef _REGS_XE_SRIOV_REGS_H_ +-#define _REGS_XE_SRIOV_REGS_H_ +- +-#include "regs/xe_reg_defs.h" +- +-#define XE2_LMEM_CFG XE_REG(0x48b0) +- +-#define LMEM_CFG XE_REG(0xcf58) +-#define LMEM_EN REG_BIT(31) +-#define LMTT_DIR_PTR REG_GENMASK(30, 0) /* in multiples of 64KB */ +- +-#define VIRTUAL_CTRL_REG XE_REG(0x10108c) +-#define GUEST_GTT_UPDATE_EN REG_BIT(8) +- +-#define VF_CAP_REG XE_REG(0x1901f8, XE_REG_OPTION_VF) +-#define VF_CAP REG_BIT(0) +- +-#endif +diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c +index 9dbba9ab7a9ab..ef239440963ce 100644 +--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c ++++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c +@@ -5,7 +5,7 @@ + + #include + +-#include "regs/xe_sriov_regs.h" ++#include "regs/xe_regs.h" + + #include "xe_gt_sriov_pf.h" + #include "xe_gt_sriov_pf_config.h" +diff --git a/drivers/gpu/drm/xe/xe_lmtt.c b/drivers/gpu/drm/xe/xe_lmtt.c +index 418661a889183..c5fdb36b6d336 100644 +--- a/drivers/gpu/drm/xe/xe_lmtt.c ++++ b/drivers/gpu/drm/xe/xe_lmtt.c +@@ -7,7 +7,7 @@ + + #include + +-#include "regs/xe_sriov_regs.h" ++#include "regs/xe_gt_regs.h" + + #include "xe_assert.h" + #include "xe_bo.h" +diff --git a/drivers/gpu/drm/xe/xe_sriov.c b/drivers/gpu/drm/xe/xe_sriov.c +index a274a5fb14018..5a1d65e4f19f2 100644 +--- a/drivers/gpu/drm/xe/xe_sriov.c ++++ b/drivers/gpu/drm/xe/xe_sriov.c +@@ -5,7 +5,7 @@ + + #include + +-#include "regs/xe_sriov_regs.h" ++#include "regs/xe_regs.h" + + #include "xe_assert.h" + #include "xe_device.h" +-- +2.43.0 + diff --git a/queue-6.11/fork-do-not-invoke-uffd-on-fork-if-error-occurs.patch b/queue-6.11/fork-do-not-invoke-uffd-on-fork-if-error-occurs.patch new file mode 100644 index 00000000000..e96464dbd93 --- /dev/null +++ b/queue-6.11/fork-do-not-invoke-uffd-on-fork-if-error-occurs.patch @@ -0,0 +1,157 @@ +From 9a138c22afb6bcd6434157db0d6827b4424d6f13 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 15 Oct 2024 18:56:05 +0100 +Subject: fork: do not invoke uffd on fork if error occurs + +From: Lorenzo Stoakes + +[ Upstream commit f64e67e5d3a45a4a04286c47afade4b518acd47b ] + +Patch series "fork: do not expose incomplete mm on fork". + +During fork we may place the virtual memory address space into an +inconsistent state before the fork operation is complete. + +In addition, we may encounter an error during the fork operation that +indicates that the virtual memory address space is invalidated. + +As a result, we should not be exposing it in any way to external machinery +that might interact with the mm or VMAs, machinery that is not designed to +deal with incomplete state. + +We specifically update the fork logic to defer khugepaged and ksm to the +end of the operation and only to be invoked if no error arose, and +disallow uffd from observing fork events should an error have occurred. + +This patch (of 2): + +Currently on fork we expose the virtual address space of a process to +userland unconditionally if uffd is registered in VMAs, regardless of +whether an error arose in the fork. + +This is performed in dup_userfaultfd_complete() which is invoked +unconditionally, and performs two duties - invoking registered handlers +for the UFFD_EVENT_FORK event via dup_fctx(), and clearing down +userfaultfd_fork_ctx objects established in dup_userfaultfd(). + +This is problematic, because the virtual address space may not yet be +correctly initialised if an error arose. + +The change in commit d24062914837 ("fork: use __mt_dup() to duplicate +maple tree in dup_mmap()") makes this more pertinent as we may be in a +state where entries in the maple tree are not yet consistent. + +We address this by, on fork error, ensuring that we roll back state that +we would otherwise expect to clean up through the event being handled by +userland and perform the memory freeing duty otherwise performed by +dup_userfaultfd_complete(). + +We do this by implementing a new function, dup_userfaultfd_fail(), which +performs the same loop, only decrementing reference counts. + +Note that we perform mmgrab() on the parent and child mm's, however +userfaultfd_ctx_put() will mmdrop() this once the reference count drops to +zero, so we will avoid memory leaks correctly here. + +Link: https://lkml.kernel.org/r/cover.1729014377.git.lorenzo.stoakes@oracle.com +Link: https://lkml.kernel.org/r/d3691d58bb58712b6fb3df2be441d175bd3cdf07.1729014377.git.lorenzo.stoakes@oracle.com +Fixes: d24062914837 ("fork: use __mt_dup() to duplicate maple tree in dup_mmap()") +Signed-off-by: Lorenzo Stoakes +Reported-by: Jann Horn +Reviewed-by: Jann Horn +Reviewed-by: Liam R. Howlett +Cc: Alexander Viro +Cc: Christian Brauner +Cc: Jan Kara +Cc: Linus Torvalds +Cc: Vlastimil Babka +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Sasha Levin +--- + fs/userfaultfd.c | 28 ++++++++++++++++++++++++++++ + include/linux/userfaultfd_k.h | 5 +++++ + kernel/fork.c | 5 ++++- + 3 files changed, 37 insertions(+), 1 deletion(-) + +diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c +index 27a3e9285fbf6..2f302da629cb4 100644 +--- a/fs/userfaultfd.c ++++ b/fs/userfaultfd.c +@@ -731,6 +731,34 @@ void dup_userfaultfd_complete(struct list_head *fcs) + } + } + ++void dup_userfaultfd_fail(struct list_head *fcs) ++{ ++ struct userfaultfd_fork_ctx *fctx, *n; ++ ++ /* ++ * An error has occurred on fork, we will tear memory down, but have ++ * allocated memory for fctx's and raised reference counts for both the ++ * original and child contexts (and on the mm for each as a result). ++ * ++ * These would ordinarily be taken care of by a user handling the event, ++ * but we are no longer doing so, so manually clean up here. ++ * ++ * mm tear down will take care of cleaning up VMA contexts. ++ */ ++ list_for_each_entry_safe(fctx, n, fcs, list) { ++ struct userfaultfd_ctx *octx = fctx->orig; ++ struct userfaultfd_ctx *ctx = fctx->new; ++ ++ atomic_dec(&octx->mmap_changing); ++ VM_BUG_ON(atomic_read(&octx->mmap_changing) < 0); ++ userfaultfd_ctx_put(octx); ++ userfaultfd_ctx_put(ctx); ++ ++ list_del(&fctx->list); ++ kfree(fctx); ++ } ++} ++ + void mremap_userfaultfd_prep(struct vm_area_struct *vma, + struct vm_userfaultfd_ctx *vm_ctx) + { +diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h +index a12bcf042551e..f4a45a37229ad 100644 +--- a/include/linux/userfaultfd_k.h ++++ b/include/linux/userfaultfd_k.h +@@ -249,6 +249,7 @@ static inline bool vma_can_userfault(struct vm_area_struct *vma, + + extern int dup_userfaultfd(struct vm_area_struct *, struct list_head *); + extern void dup_userfaultfd_complete(struct list_head *); ++void dup_userfaultfd_fail(struct list_head *); + + extern void mremap_userfaultfd_prep(struct vm_area_struct *, + struct vm_userfaultfd_ctx *); +@@ -332,6 +333,10 @@ static inline void dup_userfaultfd_complete(struct list_head *l) + { + } + ++static inline void dup_userfaultfd_fail(struct list_head *l) ++{ ++} ++ + static inline void mremap_userfaultfd_prep(struct vm_area_struct *vma, + struct vm_userfaultfd_ctx *ctx) + { +diff --git a/kernel/fork.c b/kernel/fork.c +index dbf3c5d81df3b..6423ce60b8f97 100644 +--- a/kernel/fork.c ++++ b/kernel/fork.c +@@ -775,7 +775,10 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, + mmap_write_unlock(mm); + flush_tlb_mm(oldmm); + mmap_write_unlock(oldmm); +- dup_userfaultfd_complete(&uf); ++ if (!retval) ++ dup_userfaultfd_complete(&uf); ++ else ++ dup_userfaultfd_fail(&uf); + fail_uprobe_end: + uprobe_end_dup_mmap(); + return retval; +-- +2.43.0 + diff --git a/queue-6.11/fork-only-invoke-khugepaged-ksm-hooks-if-no-error.patch b/queue-6.11/fork-only-invoke-khugepaged-ksm-hooks-if-no-error.patch new file mode 100644 index 00000000000..c513ab2a01a --- /dev/null +++ b/queue-6.11/fork-only-invoke-khugepaged-ksm-hooks-if-no-error.patch @@ -0,0 +1,112 @@ +From 40ae57f840f266659ae573e60968ea68dc27fb90 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 15 Oct 2024 18:56:06 +0100 +Subject: fork: only invoke khugepaged, ksm hooks if no error + +From: Lorenzo Stoakes + +[ Upstream commit 985da552a98e27096444508ce5d853244019111f ] + +There is no reason to invoke these hooks early against an mm that is in an +incomplete state. + +The change in commit d24062914837 ("fork: use __mt_dup() to duplicate +maple tree in dup_mmap()") makes this more pertinent as we may be in a +state where entries in the maple tree are not yet consistent. + +Their placement early in dup_mmap() only appears to have been meaningful +for early error checking, and since functionally it'd require a very small +allocation to fail (in practice 'too small to fail') that'd only occur in +the most dire circumstances, meaning the fork would fail or be OOM'd in +any case. + +Since both khugepaged and KSM tracking are there to provide optimisations +to memory performance rather than critical functionality, it doesn't +really matter all that much if, under such dire memory pressure, we fail +to register an mm with these. + +As a result, we follow the example of commit d2081b2bf819 ("mm: +khugepaged: make khugepaged_enter() void function") and make ksm_fork() a +void function also. + +We only expose the mm to these functions once we are done with them and +only if no error occurred in the fork operation. + +Link: https://lkml.kernel.org/r/e0cb8b840c9d1d5a6e84d4f8eff5f3f2022aa10c.1729014377.git.lorenzo.stoakes@oracle.com +Fixes: d24062914837 ("fork: use __mt_dup() to duplicate maple tree in dup_mmap()") +Signed-off-by: Lorenzo Stoakes +Reported-by: Jann Horn +Reviewed-by: Liam R. Howlett +Reviewed-by: Vlastimil Babka +Reviewed-by: Jann Horn +Cc: Alexander Viro +Cc: Christian Brauner +Cc: Jan Kara +Cc: Linus Torvalds +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Sasha Levin +--- + include/linux/ksm.h | 10 ++++------ + kernel/fork.c | 7 ++----- + 2 files changed, 6 insertions(+), 11 deletions(-) + +diff --git a/include/linux/ksm.h b/include/linux/ksm.h +index 11690dacd9868..ec9c05044d4fe 100644 +--- a/include/linux/ksm.h ++++ b/include/linux/ksm.h +@@ -54,12 +54,11 @@ static inline long mm_ksm_zero_pages(struct mm_struct *mm) + return atomic_long_read(&mm->ksm_zero_pages); + } + +-static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm) ++static inline void ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm) + { ++ /* Adding mm to ksm is best effort on fork. */ + if (test_bit(MMF_VM_MERGEABLE, &oldmm->flags)) +- return __ksm_enter(mm); +- +- return 0; ++ __ksm_enter(mm); + } + + static inline int ksm_execve(struct mm_struct *mm) +@@ -107,9 +106,8 @@ static inline int ksm_disable(struct mm_struct *mm) + return 0; + } + +-static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm) ++static inline void ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm) + { +- return 0; + } + + static inline int ksm_execve(struct mm_struct *mm) +diff --git a/kernel/fork.c b/kernel/fork.c +index 6423ce60b8f97..dc08a23747338 100644 +--- a/kernel/fork.c ++++ b/kernel/fork.c +@@ -653,11 +653,6 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, + mm->exec_vm = oldmm->exec_vm; + mm->stack_vm = oldmm->stack_vm; + +- retval = ksm_fork(mm, oldmm); +- if (retval) +- goto out; +- khugepaged_fork(mm, oldmm); +- + /* Use __mt_dup() to efficiently build an identical maple tree. */ + retval = __mt_dup(&oldmm->mm_mt, &mm->mm_mt, GFP_KERNEL); + if (unlikely(retval)) +@@ -760,6 +755,8 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, + vma_iter_free(&vmi); + if (!retval) { + mt_set_in_rcu(vmi.mas.tree); ++ ksm_fork(mm, oldmm); ++ khugepaged_fork(mm, oldmm); + } else if (mpnt) { + /* + * The entire maple tree has already been duplicated. If the +-- +2.43.0 + diff --git a/queue-6.11/gpiolib-fix-debugfs-dangling-chip-separator.patch b/queue-6.11/gpiolib-fix-debugfs-dangling-chip-separator.patch new file mode 100644 index 00000000000..cdddb44bf85 --- /dev/null +++ b/queue-6.11/gpiolib-fix-debugfs-dangling-chip-separator.patch @@ -0,0 +1,39 @@ +From d622bb0ff1402d90cf7b5fc220887f64d4a6b2d1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 28 Oct 2024 13:49:59 +0100 +Subject: gpiolib: fix debugfs dangling chip separator + +From: Johan Hovold + +[ Upstream commit 604888f8c3d01fddd9366161efc65cb3182831f1 ] + +Add the missing newline after entries for recently removed gpio chips +so that the chip sections are separated by a newline as intended. + +Fixes: e348544f7994 ("gpio: protect the list of GPIO devices with SRCU") +Cc: stable@vger.kernel.org # 6.9 +Cc: Bartosz Golaszewski +Signed-off-by: Johan Hovold +Link: https://lore.kernel.org/r/20241028125000.24051-3-johan+linaro@kernel.org +Signed-off-by: Bartosz Golaszewski +Signed-off-by: Sasha Levin +--- + drivers/gpio/gpiolib.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c +index 1f522499c6fc5..337971080dfde 100644 +--- a/drivers/gpio/gpiolib.c ++++ b/drivers/gpio/gpiolib.c +@@ -4879,7 +4879,7 @@ static int gpiolib_seq_show(struct seq_file *s, void *v) + + gc = srcu_dereference(gdev->chip, &gdev->srcu); + if (!gc) { +- seq_printf(s, "%s%s: (dangling chip)", ++ seq_printf(s, "%s%s: (dangling chip)\n", + priv->newline ? "\n" : "", + dev_name(&gdev->dev)); + return 0; +-- +2.43.0 + diff --git a/queue-6.11/gpiolib-fix-debugfs-newline-separators.patch b/queue-6.11/gpiolib-fix-debugfs-newline-separators.patch new file mode 100644 index 00000000000..c0b2c265f1f --- /dev/null +++ b/queue-6.11/gpiolib-fix-debugfs-newline-separators.patch @@ -0,0 +1,47 @@ +From e36937357da8906fa4217d93f2830a8cbcddd910 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 28 Oct 2024 13:49:58 +0100 +Subject: gpiolib: fix debugfs newline separators + +From: Johan Hovold + +[ Upstream commit 3e8b7238b427e05498034c240451af5f5495afda ] + +The gpiolib debugfs interface exports a list of all gpio chips in a +system and the state of their pins. + +The gpio chip sections are supposed to be separated by a newline +character, but a long-standing bug prevents the separator from +being included when output is generated in multiple sessions, making the +output inconsistent and hard to read. + +Make sure to only suppress the newline separator at the beginning of the +file as intended. + +Fixes: f9c4a31f6150 ("gpiolib: Use seq_file's iterator interface") +Cc: stable@vger.kernel.org # 3.7 +Cc: Thierry Reding +Signed-off-by: Johan Hovold +Link: https://lore.kernel.org/r/20241028125000.24051-2-johan+linaro@kernel.org +Signed-off-by: Bartosz Golaszewski +Signed-off-by: Sasha Levin +--- + drivers/gpio/gpiolib.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c +index 148bcfbf98e02..1f522499c6fc5 100644 +--- a/drivers/gpio/gpiolib.c ++++ b/drivers/gpio/gpiolib.c +@@ -4834,6 +4834,8 @@ static void *gpiolib_seq_start(struct seq_file *s, loff_t *pos) + return NULL; + + s->private = priv; ++ if (*pos > 0) ++ priv->newline = true; + priv->idx = srcu_read_lock(&gpio_devices_srcu); + + list_for_each_entry_srcu(gdev, &gpio_devices, list, +-- +2.43.0 + diff --git a/queue-6.11/iio-light-veml6030-fix-microlux-value-calculation.patch-18046 b/queue-6.11/iio-light-veml6030-fix-microlux-value-calculation.patch-18046 new file mode 100644 index 00000000000..9e7eef75ccf --- /dev/null +++ b/queue-6.11/iio-light-veml6030-fix-microlux-value-calculation.patch-18046 @@ -0,0 +1,47 @@ +From 617e0a742fa1ec269638d280602c22c4ebd702b2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 16 Oct 2024 19:04:31 +0200 +Subject: iio: light: veml6030: fix microlux value calculation + +From: Javier Carrasco + +[ Upstream commit 63dd163cd61dda6f38343776b42331cc6b7e56e0 ] + +The raw value conversion to obtain a measurement in lux as +INT_PLUS_MICRO does not calculate the decimal part properly to display +it as micro (in this case microlux). It only calculates the module to +obtain the decimal part from a resolution that is 10000 times the +provided in the datasheet (0.5376 lux/cnt for the veml6030). The +resulting value must still be multiplied by 100 to make it micro. + +This bug was introduced with the original implementation of the driver. + +Only the illuminance channel is fixed becuase the scale is non sensical +for the intensity channels anyway. + +Cc: stable@vger.kernel.org +Fixes: 7b779f573c48 ("iio: light: add driver for veml6030 ambient light sensor") +Signed-off-by: Javier Carrasco +Link: https://patch.msgid.link/20241016-veml6030-fix-processed-micro-v1-1-4a5644796437@gmail.com +Signed-off-by: Jonathan Cameron +Signed-off-by: Sasha Levin +--- + drivers/iio/light/veml6030.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/iio/light/veml6030.c b/drivers/iio/light/veml6030.c +index 621428885455c..8e76c828afddc 100644 +--- a/drivers/iio/light/veml6030.c ++++ b/drivers/iio/light/veml6030.c +@@ -535,7 +535,7 @@ static int veml6030_read_raw(struct iio_dev *indio_dev, + } + if (mask == IIO_CHAN_INFO_PROCESSED) { + *val = (reg * data->cur_resolution) / 10000; +- *val2 = (reg * data->cur_resolution) % 10000; ++ *val2 = (reg * data->cur_resolution) % 10000 * 100; + return IIO_VAL_INT_PLUS_MICRO; + } + *val = reg; +-- +2.43.0 + diff --git a/queue-6.11/input-edt-ft5x06-fix-regmap-leak-when-probe-fails.patch b/queue-6.11/input-edt-ft5x06-fix-regmap-leak-when-probe-fails.patch new file mode 100644 index 00000000000..3794a02f332 --- /dev/null +++ b/queue-6.11/input-edt-ft5x06-fix-regmap-leak-when-probe-fails.patch @@ -0,0 +1,82 @@ +From 598aa32497b9cd1af523303a8bc56899807ebf31 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 18 Oct 2024 17:17:48 -0700 +Subject: Input: edt-ft5x06 - fix regmap leak when probe fails + +From: Dmitry Torokhov + +[ Upstream commit bffdf9d7e51a7be8eeaac2ccf9e54a5fde01ff65 ] + +The driver neglects to free the instance of I2C regmap constructed at +the beginning of the edt_ft5x06_ts_probe() method when probe fails. +Additionally edt_ft5x06_ts_remove() is freeing the regmap too early, +before the rest of the device resources that are managed by devm are +released. + +Fix this by installing a custom devm action that will ensure that the +regmap is released at the right time during normal teardown as well as +in case of probe failure. + +Note that devm_regmap_init_i2c() could not be used because the driver +may replace the original regmap with a regmap specific for M06 devices +in the middle of the probe, and using devm_regmap_init_i2c() would +result in releasing the M06 regmap too early. + +Reported-by: Li Zetao +Fixes: 9dfd9708ffba ("Input: edt-ft5x06 - convert to use regmap API") +Cc: stable@vger.kernel.org +Reviewed-by: Oliver Graute +Link: https://lore.kernel.org/r/ZxL6rIlVlgsAu-Jv@google.com +Signed-off-by: Dmitry Torokhov +Signed-off-by: Sasha Levin +--- + drivers/input/touchscreen/edt-ft5x06.c | 19 ++++++++++++++++++- + 1 file changed, 18 insertions(+), 1 deletion(-) + +diff --git a/drivers/input/touchscreen/edt-ft5x06.c b/drivers/input/touchscreen/edt-ft5x06.c +index e70415f189a55..126b0ed85aa50 100644 +--- a/drivers/input/touchscreen/edt-ft5x06.c ++++ b/drivers/input/touchscreen/edt-ft5x06.c +@@ -1121,6 +1121,14 @@ static void edt_ft5x06_ts_set_regs(struct edt_ft5x06_ts_data *tsdata) + } + } + ++static void edt_ft5x06_exit_regmap(void *arg) ++{ ++ struct edt_ft5x06_ts_data *data = arg; ++ ++ if (!IS_ERR_OR_NULL(data->regmap)) ++ regmap_exit(data->regmap); ++} ++ + static void edt_ft5x06_disable_regulators(void *arg) + { + struct edt_ft5x06_ts_data *data = arg; +@@ -1154,6 +1162,16 @@ static int edt_ft5x06_ts_probe(struct i2c_client *client) + return PTR_ERR(tsdata->regmap); + } + ++ /* ++ * We are not using devm_regmap_init_i2c() and instead install a ++ * custom action because we may replace regmap with M06-specific one ++ * and we need to make sure that it will not be released too early. ++ */ ++ error = devm_add_action_or_reset(&client->dev, edt_ft5x06_exit_regmap, ++ tsdata); ++ if (error) ++ return error; ++ + chip_data = device_get_match_data(&client->dev); + if (!chip_data) + chip_data = (const struct edt_i2c_chip_data *)id->driver_data; +@@ -1347,7 +1365,6 @@ static void edt_ft5x06_ts_remove(struct i2c_client *client) + struct edt_ft5x06_ts_data *tsdata = i2c_get_clientdata(client); + + edt_ft5x06_ts_teardown_debugfs(tsdata); +- regmap_exit(tsdata->regmap); + } + + static int edt_ft5x06_ts_suspend(struct device *dev) +-- +2.43.0 + diff --git a/queue-6.11/input-fix-regression-when-re-registering-input-handl.patch b/queue-6.11/input-fix-regression-when-re-registering-input-handl.patch new file mode 100644 index 00000000000..bf56486a6f7 --- /dev/null +++ b/queue-6.11/input-fix-regression-when-re-registering-input-handl.patch @@ -0,0 +1,253 @@ +From 0284dac550ec179dc0ddc39ab67ba9f172876b4f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 27 Oct 2024 22:31:15 -0700 +Subject: Input: fix regression when re-registering input handlers + +From: Dmitry Torokhov + +[ Upstream commit 071b24b54d2d05fbf39ddbb27dee08abd1d713f3 ] + +Commit d469647bafd9 ("Input: simplify event handling logic") introduced +code that would set handler->events() method to either +input_handler_events_filter() or input_handler_events_default() or +input_handler_events_null(), depending on the kind of input handler +(a filter or a regular one) we are dealing with. Unfortunately this +breaks cases when we try to re-register the same filter (as is the case +with sysrq handler): after initial registration the handler will have 2 +event handling methods defined, and will run afoul of the check in +input_handler_check_methods(): + + input: input_handler_check_methods: only one event processing method can be defined (sysrq) + sysrq: Failed to register input handler, error -22 + +Fix this by adding handle_events() method to input_handle structure and +setting it up when registering a new input handle according to event +handling methods defined in associated input_handler structure, thus +avoiding modifying the input_handler structure. + +Reported-by: "Ned T. Crigler" +Reported-by: Christian Heusel +Tested-by: "Ned T. Crigler" +Tested-by: Peter Seiderer +Fixes: d469647bafd9 ("Input: simplify event handling logic") +Link: https://lore.kernel.org/r/Zx2iQp6csn42PJA7@xavtug +Cc: stable@vger.kernel.org +Signed-off-by: Dmitry Torokhov +Signed-off-by: Sasha Levin +--- + drivers/input/input.c | 134 +++++++++++++++++++++++------------------- + include/linux/input.h | 10 +++- + 2 files changed, 82 insertions(+), 62 deletions(-) + +diff --git a/drivers/input/input.c b/drivers/input/input.c +index 54c57b267b25f..865d3f8e97a66 100644 +--- a/drivers/input/input.c ++++ b/drivers/input/input.c +@@ -119,12 +119,12 @@ static void input_pass_values(struct input_dev *dev, + + handle = rcu_dereference(dev->grab); + if (handle) { +- count = handle->handler->events(handle, vals, count); ++ count = handle->handle_events(handle, vals, count); + } else { + list_for_each_entry_rcu(handle, &dev->h_list, d_node) + if (handle->open) { +- count = handle->handler->events(handle, vals, +- count); ++ count = handle->handle_events(handle, vals, ++ count); + if (!count) + break; + } +@@ -2537,57 +2537,6 @@ static int input_handler_check_methods(const struct input_handler *handler) + return 0; + } + +-/* +- * An implementation of input_handler's events() method that simply +- * invokes handler->event() method for each event one by one. +- */ +-static unsigned int input_handler_events_default(struct input_handle *handle, +- struct input_value *vals, +- unsigned int count) +-{ +- struct input_handler *handler = handle->handler; +- struct input_value *v; +- +- for (v = vals; v != vals + count; v++) +- handler->event(handle, v->type, v->code, v->value); +- +- return count; +-} +- +-/* +- * An implementation of input_handler's events() method that invokes +- * handler->filter() method for each event one by one and removes events +- * that were filtered out from the "vals" array. +- */ +-static unsigned int input_handler_events_filter(struct input_handle *handle, +- struct input_value *vals, +- unsigned int count) +-{ +- struct input_handler *handler = handle->handler; +- struct input_value *end = vals; +- struct input_value *v; +- +- for (v = vals; v != vals + count; v++) { +- if (handler->filter(handle, v->type, v->code, v->value)) +- continue; +- if (end != v) +- *end = *v; +- end++; +- } +- +- return end - vals; +-} +- +-/* +- * An implementation of input_handler's events() method that does nothing. +- */ +-static unsigned int input_handler_events_null(struct input_handle *handle, +- struct input_value *vals, +- unsigned int count) +-{ +- return count; +-} +- + /** + * input_register_handler - register a new input handler + * @handler: handler to be registered +@@ -2607,13 +2556,6 @@ int input_register_handler(struct input_handler *handler) + + INIT_LIST_HEAD(&handler->h_list); + +- if (handler->filter) +- handler->events = input_handler_events_filter; +- else if (handler->event) +- handler->events = input_handler_events_default; +- else if (!handler->events) +- handler->events = input_handler_events_null; +- + error = mutex_lock_interruptible(&input_mutex); + if (error) + return error; +@@ -2687,6 +2629,75 @@ int input_handler_for_each_handle(struct input_handler *handler, void *data, + } + EXPORT_SYMBOL(input_handler_for_each_handle); + ++/* ++ * An implementation of input_handle's handle_events() method that simply ++ * invokes handler->event() method for each event one by one. ++ */ ++static unsigned int input_handle_events_default(struct input_handle *handle, ++ struct input_value *vals, ++ unsigned int count) ++{ ++ struct input_handler *handler = handle->handler; ++ struct input_value *v; ++ ++ for (v = vals; v != vals + count; v++) ++ handler->event(handle, v->type, v->code, v->value); ++ ++ return count; ++} ++ ++/* ++ * An implementation of input_handle's handle_events() method that invokes ++ * handler->filter() method for each event one by one and removes events ++ * that were filtered out from the "vals" array. ++ */ ++static unsigned int input_handle_events_filter(struct input_handle *handle, ++ struct input_value *vals, ++ unsigned int count) ++{ ++ struct input_handler *handler = handle->handler; ++ struct input_value *end = vals; ++ struct input_value *v; ++ ++ for (v = vals; v != vals + count; v++) { ++ if (handler->filter(handle, v->type, v->code, v->value)) ++ continue; ++ if (end != v) ++ *end = *v; ++ end++; ++ } ++ ++ return end - vals; ++} ++ ++/* ++ * An implementation of input_handle's handle_events() method that does nothing. ++ */ ++static unsigned int input_handle_events_null(struct input_handle *handle, ++ struct input_value *vals, ++ unsigned int count) ++{ ++ return count; ++} ++ ++/* ++ * Sets up appropriate handle->event_handler based on the input_handler ++ * associated with the handle. ++ */ ++static void input_handle_setup_event_handler(struct input_handle *handle) ++{ ++ struct input_handler *handler = handle->handler; ++ ++ if (handler->filter) ++ handle->handle_events = input_handle_events_filter; ++ else if (handler->event) ++ handle->handle_events = input_handle_events_default; ++ else if (handler->events) ++ handle->handle_events = handler->events; ++ else ++ handle->handle_events = input_handle_events_null; ++} ++ + /** + * input_register_handle - register a new input handle + * @handle: handle to register +@@ -2704,6 +2715,7 @@ int input_register_handle(struct input_handle *handle) + struct input_dev *dev = handle->dev; + int error; + ++ input_handle_setup_event_handler(handle); + /* + * We take dev->mutex here to prevent race with + * input_release_device(). +diff --git a/include/linux/input.h b/include/linux/input.h +index 89a0be6ee0e23..cd866b020a01d 100644 +--- a/include/linux/input.h ++++ b/include/linux/input.h +@@ -339,12 +339,16 @@ struct input_handler { + * @name: name given to the handle by handler that created it + * @dev: input device the handle is attached to + * @handler: handler that works with the device through this handle ++ * @handle_events: event sequence handler. It is set up by the input core ++ * according to event handling method specified in the @handler. See ++ * input_handle_setup_event_handler(). ++ * This method is being called by the input core with interrupts disabled ++ * and dev->event_lock spinlock held and so it may not sleep. + * @d_node: used to put the handle on device's list of attached handles + * @h_node: used to put the handle on handler's list of handles from which + * it gets events + */ + struct input_handle { +- + void *private; + + int open; +@@ -353,6 +357,10 @@ struct input_handle { + struct input_dev *dev; + struct input_handler *handler; + ++ unsigned int (*handle_events)(struct input_handle *handle, ++ struct input_value *vals, ++ unsigned int count); ++ + struct list_head d_node; + struct list_head h_node; + }; +-- +2.43.0 + diff --git a/queue-6.11/io_uring-rw-fix-missing-nowait-check-for-o_direct-st.patch b/queue-6.11/io_uring-rw-fix-missing-nowait-check-for-o_direct-st.patch new file mode 100644 index 00000000000..8235fb11ac5 --- /dev/null +++ b/queue-6.11/io_uring-rw-fix-missing-nowait-check-for-o_direct-st.patch @@ -0,0 +1,121 @@ +From abf6e0b067d865cd89978d9399581fe76c39e449 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 31 Oct 2024 08:05:44 -0600 +Subject: io_uring/rw: fix missing NOWAIT check for O_DIRECT start write + +From: Jens Axboe + +[ Upstream commit 1d60d74e852647255bd8e76f5a22dc42531e4389 ] + +When io_uring starts a write, it'll call kiocb_start_write() to bump the +super block rwsem, preventing any freezes from happening while that +write is in-flight. The freeze side will grab that rwsem for writing, +excluding any new writers from happening and waiting for existing writes +to finish. But io_uring unconditionally uses kiocb_start_write(), which +will block if someone is currently attempting to freeze the mount point. +This causes a deadlock where freeze is waiting for previous writes to +complete, but the previous writes cannot complete, as the task that is +supposed to complete them is blocked waiting on starting a new write. +This results in the following stuck trace showing that dependency with +the write blocked starting a new write: + +task:fio state:D stack:0 pid:886 tgid:886 ppid:876 +Call trace: + __switch_to+0x1d8/0x348 + __schedule+0x8e8/0x2248 + schedule+0x110/0x3f0 + percpu_rwsem_wait+0x1e8/0x3f8 + __percpu_down_read+0xe8/0x500 + io_write+0xbb8/0xff8 + io_issue_sqe+0x10c/0x1020 + io_submit_sqes+0x614/0x2110 + __arm64_sys_io_uring_enter+0x524/0x1038 + invoke_syscall+0x74/0x268 + el0_svc_common.constprop.0+0x160/0x238 + do_el0_svc+0x44/0x60 + el0_svc+0x44/0xb0 + el0t_64_sync_handler+0x118/0x128 + el0t_64_sync+0x168/0x170 +INFO: task fsfreeze:7364 blocked for more than 15 seconds. + Not tainted 6.12.0-rc5-00063-g76aaf945701c #7963 + +with the attempting freezer stuck trying to grab the rwsem: + +task:fsfreeze state:D stack:0 pid:7364 tgid:7364 ppid:995 +Call trace: + __switch_to+0x1d8/0x348 + __schedule+0x8e8/0x2248 + schedule+0x110/0x3f0 + percpu_down_write+0x2b0/0x680 + freeze_super+0x248/0x8a8 + do_vfs_ioctl+0x149c/0x1b18 + __arm64_sys_ioctl+0xd0/0x1a0 + invoke_syscall+0x74/0x268 + el0_svc_common.constprop.0+0x160/0x238 + do_el0_svc+0x44/0x60 + el0_svc+0x44/0xb0 + el0t_64_sync_handler+0x118/0x128 + el0t_64_sync+0x168/0x170 + +Fix this by having the io_uring side honor IOCB_NOWAIT, and only attempt a +blocking grab of the super block rwsem if it isn't set. For normal issue +where IOCB_NOWAIT would always be set, this returns -EAGAIN which will +have io_uring core issue a blocking attempt of the write. That will in +turn also get completions run, ensuring forward progress. + +Since freezing requires CAP_SYS_ADMIN in the first place, this isn't +something that can be triggered by a regular user. + +Cc: stable@vger.kernel.org # 5.10+ +Reported-by: Peter Mann +Link: https://lore.kernel.org/io-uring/38c94aec-81c9-4f62-b44e-1d87f5597644@sh.cz +Signed-off-by: Jens Axboe +Signed-off-by: Sasha Levin +--- + io_uring/rw.c | 23 +++++++++++++++++++++-- + 1 file changed, 21 insertions(+), 2 deletions(-) + +diff --git a/io_uring/rw.c b/io_uring/rw.c +index 6b3bc0876f7fe..19e2c1f9c4a21 100644 +--- a/io_uring/rw.c ++++ b/io_uring/rw.c +@@ -1016,6 +1016,25 @@ int io_read_mshot(struct io_kiocb *req, unsigned int issue_flags) + return IOU_OK; + } + ++static bool io_kiocb_start_write(struct io_kiocb *req, struct kiocb *kiocb) ++{ ++ struct inode *inode; ++ bool ret; ++ ++ if (!(req->flags & REQ_F_ISREG)) ++ return true; ++ if (!(kiocb->ki_flags & IOCB_NOWAIT)) { ++ kiocb_start_write(kiocb); ++ return true; ++ } ++ ++ inode = file_inode(kiocb->ki_filp); ++ ret = sb_start_write_trylock(inode->i_sb); ++ if (ret) ++ __sb_writers_release(inode->i_sb, SB_FREEZE_WRITE); ++ return ret; ++} ++ + int io_write(struct io_kiocb *req, unsigned int issue_flags) + { + bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; +@@ -1053,8 +1072,8 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags) + if (unlikely(ret)) + return ret; + +- if (req->flags & REQ_F_ISREG) +- kiocb_start_write(kiocb); ++ if (unlikely(!io_kiocb_start_write(req, kiocb))) ++ return -EAGAIN; + kiocb->ki_flags |= IOCB_WRITE; + + if (likely(req->file->f_op->write_iter)) +-- +2.43.0 + diff --git a/queue-6.11/iov_iter-fix-copy_page_from_iter_atomic-if-kmap_loca.patch b/queue-6.11/iov_iter-fix-copy_page_from_iter_atomic-if-kmap_loca.patch new file mode 100644 index 00000000000..eae63843ff4 --- /dev/null +++ b/queue-6.11/iov_iter-fix-copy_page_from_iter_atomic-if-kmap_loca.patch @@ -0,0 +1,70 @@ +From 42c16d9696004979bb2213d9208be27c1c867551 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 27 Oct 2024 15:23:23 -0700 +Subject: iov_iter: fix copy_page_from_iter_atomic() if KMAP_LOCAL_FORCE_MAP + +From: Hugh Dickins + +[ Upstream commit c749d9b7ebbc5716af7a95f7768634b30d9446ec ] + +generic/077 on x86_32 CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP=y with highmem, +on huge=always tmpfs, issues a warning and then hangs (interruptibly): + +WARNING: CPU: 5 PID: 3517 at mm/highmem.c:622 kunmap_local_indexed+0x62/0xc9 +CPU: 5 UID: 0 PID: 3517 Comm: cp Not tainted 6.12.0-rc4 #2 +... +copy_page_from_iter_atomic+0xa6/0x5ec +generic_perform_write+0xf6/0x1b4 +shmem_file_write_iter+0x54/0x67 + +Fix copy_page_from_iter_atomic() by limiting it in that case +(include/linux/skbuff.h skb_frag_must_loop() does similar). + +But going forward, perhaps CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP is too +surprising, has outlived its usefulness, and should just be removed? + +Fixes: 908a1ad89466 ("iov_iter: Handle compound highmem pages in copy_page_from_iter_atomic()") +Signed-off-by: Hugh Dickins +Link: https://lore.kernel.org/r/dd5f0c89-186e-18e1-4f43-19a60f5a9774@google.com +Reviewed-by: Christoph Hellwig +Cc: stable@vger.kernel.org +Signed-off-by: Christian Brauner +Signed-off-by: Sasha Levin +--- + lib/iov_iter.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/lib/iov_iter.c b/lib/iov_iter.c +index 4a6a9f419bd7e..b892894228b03 100644 +--- a/lib/iov_iter.c ++++ b/lib/iov_iter.c +@@ -461,6 +461,8 @@ size_t copy_page_from_iter_atomic(struct page *page, size_t offset, + size_t bytes, struct iov_iter *i) + { + size_t n, copied = 0; ++ bool uses_kmap = IS_ENABLED(CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP) || ++ PageHighMem(page); + + if (!page_copy_sane(page, offset, bytes)) + return 0; +@@ -471,7 +473,7 @@ size_t copy_page_from_iter_atomic(struct page *page, size_t offset, + char *p; + + n = bytes - copied; +- if (PageHighMem(page)) { ++ if (uses_kmap) { + page += offset / PAGE_SIZE; + offset %= PAGE_SIZE; + n = min_t(size_t, n, PAGE_SIZE - offset); +@@ -482,7 +484,7 @@ size_t copy_page_from_iter_atomic(struct page *page, size_t offset, + kunmap_atomic(p); + copied += n; + offset += n; +- } while (PageHighMem(page) && copied != bytes && n > 0); ++ } while (uses_kmap && copied != bytes && n > 0); + + return copied; + } +-- +2.43.0 + diff --git a/queue-6.11/kasan-remove-vmalloc_percpu-test.patch b/queue-6.11/kasan-remove-vmalloc_percpu-test.patch new file mode 100644 index 00000000000..b9780e76999 --- /dev/null +++ b/queue-6.11/kasan-remove-vmalloc_percpu-test.patch @@ -0,0 +1,87 @@ +From 649215b22f013e76187a53e7f19ed2f2a4480fd3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 22 Oct 2024 18:07:06 +0200 +Subject: kasan: remove vmalloc_percpu test + +From: Andrey Konovalov + +[ Upstream commit 330d8df81f3673d6fb74550bbc9bb159d81b35f7 ] + +Commit 1a2473f0cbc0 ("kasan: improve vmalloc tests") added the +vmalloc_percpu KASAN test with the assumption that __alloc_percpu always +uses vmalloc internally, which is tagged by KASAN. + +However, __alloc_percpu might allocate memory from the first per-CPU +chunk, which is not allocated via vmalloc(). As a result, the test might +fail. + +Remove the test until proper KASAN annotation for the per-CPU allocated +are added; tracked in https://bugzilla.kernel.org/show_bug.cgi?id=215019. + +Link: https://lkml.kernel.org/r/20241022160706.38943-1-andrey.konovalov@linux.dev +Fixes: 1a2473f0cbc0 ("kasan: improve vmalloc tests") +Signed-off-by: Andrey Konovalov +Reported-by: Samuel Holland +Link: https://lore.kernel.org/all/4a245fff-cc46-44d1-a5f9-fd2f1c3764ae@sifive.com/ +Reported-by: Sabyrzhan Tasbolatov +Link: https://lore.kernel.org/all/CACzwLxiWzNqPBp4C1VkaXZ2wDwvY3yZeetCi1TLGFipKW77drA@mail.gmail.com/ +Cc: Alexander Potapenko +Cc: Andrey Ryabinin +Cc: Dmitry Vyukov +Cc: Marco Elver +Cc: Sabyrzhan Tasbolatov +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Sasha Levin +--- + mm/kasan/kasan_test.c | 27 --------------------------- + 1 file changed, 27 deletions(-) + +diff --git a/mm/kasan/kasan_test.c b/mm/kasan/kasan_test.c +index 7b32be2a3cf0e..9efde47f80698 100644 +--- a/mm/kasan/kasan_test.c ++++ b/mm/kasan/kasan_test.c +@@ -1765,32 +1765,6 @@ static void vm_map_ram_tags(struct kunit *test) + free_pages((unsigned long)p_ptr, 1); + } + +-static void vmalloc_percpu(struct kunit *test) +-{ +- char __percpu *ptr; +- int cpu; +- +- /* +- * This test is specifically crafted for the software tag-based mode, +- * the only tag-based mode that poisons percpu mappings. +- */ +- KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_SW_TAGS); +- +- ptr = __alloc_percpu(PAGE_SIZE, PAGE_SIZE); +- +- for_each_possible_cpu(cpu) { +- char *c_ptr = per_cpu_ptr(ptr, cpu); +- +- KUNIT_EXPECT_GE(test, (u8)get_tag(c_ptr), (u8)KASAN_TAG_MIN); +- KUNIT_EXPECT_LT(test, (u8)get_tag(c_ptr), (u8)KASAN_TAG_KERNEL); +- +- /* Make sure that in-bounds accesses don't crash the kernel. */ +- *c_ptr = 0; +- } +- +- free_percpu(ptr); +-} +- + /* + * Check that the assigned pointer tag falls within the [KASAN_TAG_MIN, + * KASAN_TAG_KERNEL) range (note: excluding the match-all tag) for tag-based +@@ -1967,7 +1941,6 @@ static struct kunit_case kasan_kunit_test_cases[] = { + KUNIT_CASE(vmalloc_oob), + KUNIT_CASE(vmap_tags), + KUNIT_CASE(vm_map_ram_tags), +- KUNIT_CASE(vmalloc_percpu), + KUNIT_CASE(match_all_not_assigned), + KUNIT_CASE(match_all_ptr_tag), + KUNIT_CASE(match_all_mem_tag), +-- +2.43.0 + diff --git a/queue-6.11/mctp-i2c-handle-null-header-address.patch b/queue-6.11/mctp-i2c-handle-null-header-address.patch new file mode 100644 index 00000000000..ca76de8c9c6 --- /dev/null +++ b/queue-6.11/mctp-i2c-handle-null-header-address.patch @@ -0,0 +1,44 @@ +From 3f2fafda72b327eddc56b97f4ef72a706ba040e7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 22 Oct 2024 18:25:14 +0800 +Subject: mctp i2c: handle NULL header address + +From: Matt Johnston + +[ Upstream commit 01e215975fd80af81b5b79f009d49ddd35976c13 ] + +daddr can be NULL if there is no neighbour table entry present, +in that case the tx packet should be dropped. + +saddr will usually be set by MCTP core, but check for NULL in case a +packet is transmitted by a different protocol. + +Fixes: f5b8abf9fc3d ("mctp i2c: MCTP I2C binding driver") +Cc: stable@vger.kernel.org +Reported-by: Dung Cao +Signed-off-by: Matt Johnston +Reviewed-by: Simon Horman +Link: https://patch.msgid.link/20241022-mctp-i2c-null-dest-v3-1-e929709956c5@codeconstruct.com.au +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/mctp/mctp-i2c.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/drivers/net/mctp/mctp-i2c.c b/drivers/net/mctp/mctp-i2c.c +index 4dc057c121f5d..e70fb66879941 100644 +--- a/drivers/net/mctp/mctp-i2c.c ++++ b/drivers/net/mctp/mctp-i2c.c +@@ -588,6 +588,9 @@ static int mctp_i2c_header_create(struct sk_buff *skb, struct net_device *dev, + if (len > MCTP_I2C_MAXMTU) + return -EMSGSIZE; + ++ if (!daddr || !saddr) ++ return -EINVAL; ++ + lldst = *((u8 *)daddr); + llsrc = *((u8 *)saddr); + +-- +2.43.0 + diff --git a/queue-6.11/mei-use-kvmalloc-for-read-buffer.patch b/queue-6.11/mei-use-kvmalloc-for-read-buffer.patch new file mode 100644 index 00000000000..a257b9ef822 --- /dev/null +++ b/queue-6.11/mei-use-kvmalloc-for-read-buffer.patch @@ -0,0 +1,55 @@ +From 2d23e9505edce6c98d763ca56fa41049743e5a0f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 15 Oct 2024 15:31:57 +0300 +Subject: mei: use kvmalloc for read buffer + +From: Alexander Usyskin + +[ Upstream commit 4adf613e01bf99e1739f6ff3e162ad5b7d578d1a ] + +Read buffer is allocated according to max message size, reported by +the firmware and may reach 64K in systems with pxp client. +Contiguous 64k allocation may fail under memory pressure. +Read buffer is used as in-driver message storage and not required +to be contiguous. +Use kvmalloc to allow kernel to allocate non-contiguous memory. + +Fixes: 3030dc056459 ("mei: add wrapper for queuing control commands.") +Cc: stable +Reported-by: Rohit Agarwal +Closes: https://lore.kernel.org/all/20240813084542.2921300-1-rohiagar@chromium.org/ +Tested-by: Brian Geffon +Signed-off-by: Alexander Usyskin +Acked-by: Tomas Winkler +Link: https://lore.kernel.org/r/20241015123157.2337026-1-alexander.usyskin@intel.com +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sasha Levin +--- + drivers/misc/mei/client.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/misc/mei/client.c b/drivers/misc/mei/client.c +index 9d090fa07516f..be011cef12e5d 100644 +--- a/drivers/misc/mei/client.c ++++ b/drivers/misc/mei/client.c +@@ -321,7 +321,7 @@ void mei_io_cb_free(struct mei_cl_cb *cb) + return; + + list_del(&cb->list); +- kfree(cb->buf.data); ++ kvfree(cb->buf.data); + kfree(cb->ext_hdr); + kfree(cb); + } +@@ -497,7 +497,7 @@ struct mei_cl_cb *mei_cl_alloc_cb(struct mei_cl *cl, size_t length, + if (length == 0) + return cb; + +- cb->buf.data = kmalloc(roundup(length, MEI_SLOT_SIZE), GFP_KERNEL); ++ cb->buf.data = kvmalloc(roundup(length, MEI_SLOT_SIZE), GFP_KERNEL); + if (!cb->buf.data) { + mei_io_cb_free(cb); + return NULL; +-- +2.43.0 + diff --git a/queue-6.11/mm-mmap-limit-thp-alignment-of-anonymous-mappings-to.patch b/queue-6.11/mm-mmap-limit-thp-alignment-of-anonymous-mappings-to.patch new file mode 100644 index 00000000000..9ca54384705 --- /dev/null +++ b/queue-6.11/mm-mmap-limit-thp-alignment-of-anonymous-mappings-to.patch @@ -0,0 +1,75 @@ +From 4947414760b1f675ae0cc4cd81ff2fc2e877fdb4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 24 Oct 2024 17:12:29 +0200 +Subject: mm, mmap: limit THP alignment of anonymous mappings to PMD-aligned + sizes + +From: Vlastimil Babka + +[ Upstream commit d4148aeab412432bf928f311eca8a2ba52bb05df ] + +Since commit efa7df3e3bb5 ("mm: align larger anonymous mappings on THP +boundaries") a mmap() of anonymous memory without a specific address hint +and of at least PMD_SIZE will be aligned to PMD so that it can benefit +from a THP backing page. + +However this change has been shown to regress some workloads +significantly. [1] reports regressions in various spec benchmarks, with +up to 600% slowdown of the cactusBSSN benchmark on some platforms. The +benchmark seems to create many mappings of 4632kB, which would have merged +to a large THP-backed area before commit efa7df3e3bb5 and now they are +fragmented to multiple areas each aligned to PMD boundary with gaps +between. The regression then seems to be caused mainly due to the +benchmark's memory access pattern suffering from TLB or cache aliasing due +to the aligned boundaries of the individual areas. + +Another known regression bisected to commit efa7df3e3bb5 is darktable [2] +[3] and early testing suggests this patch fixes the regression there as +well. + +To fix the regression but still try to benefit from THP-friendly anonymous +mapping alignment, add a condition that the size of the mapping must be a +multiple of PMD size instead of at least PMD size. In case of many +odd-sized mapping like the cactusBSSN creates, those will stop being +aligned and with gaps between, and instead naturally merge again. + +Link: https://lkml.kernel.org/r/20241024151228.101841-2-vbabka@suse.cz +Fixes: efa7df3e3bb5 ("mm: align larger anonymous mappings on THP boundaries") +Signed-off-by: Vlastimil Babka +Reported-by: Michael Matz +Debugged-by: Gabriel Krisman Bertazi +Closes: https://bugzilla.suse.com/show_bug.cgi?id=1229012 [1] +Reported-by: Matthias Bodenbinder +Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219366 [2] +Closes: https://lore.kernel.org/all/2050f0d4-57b0-481d-bab8-05e8d48fed0c@leemhuis.info/ [3] +Reviewed-by: Lorenzo Stoakes +Reviewed-by: Yang Shi +Cc: Rik van Riel +Cc: Jann Horn +Cc: Liam R. Howlett +Cc: Petr Tesarik +Cc: Thorsten Leemhuis +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Sasha Levin +--- + mm/mmap.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/mm/mmap.c b/mm/mmap.c +index 18fddcce03b85..8a04f29aa4230 100644 +--- a/mm/mmap.c ++++ b/mm/mmap.c +@@ -1952,7 +1952,8 @@ __get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, + + if (get_area) { + addr = get_area(file, addr, len, pgoff, flags); +- } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { ++ } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) ++ && IS_ALIGNED(len, PMD_SIZE)) { + /* Ensures that larger anonymous mappings are THP aligned. */ + addr = thp_get_unmapped_area_vmflags(file, addr, len, + pgoff, flags, vm_flags); +-- +2.43.0 + diff --git a/queue-6.11/mm-multi-gen-lru-ignore-non-leaf-pmd_young-for-force.patch b/queue-6.11/mm-multi-gen-lru-ignore-non-leaf-pmd_young-for-force.patch new file mode 100644 index 00000000000..09db9a5bc47 --- /dev/null +++ b/queue-6.11/mm-multi-gen-lru-ignore-non-leaf-pmd_young-for-force.patch @@ -0,0 +1,65 @@ +From 3d088fce5c0e855d74428f888cb999405935c393 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 13 Aug 2024 09:37:59 -0700 +Subject: mm: multi-gen LRU: ignore non-leaf pmd_young for force_scan=true + +From: Yuanchu Xie + +[ Upstream commit bceeeaed4817ba7ad9013b4116c97220a60fcf7c ] + +When non-leaf pmd accessed bits are available, MGLRU page table walks can +clear the non-leaf pmd accessed bit and ignore the accessed bit on the pte +if it's on a different node, skipping a generation update as well. If +another scan occurs on the same node as said skipped pte. + +The non-leaf pmd accessed bit might remain cleared and the pte accessed +bits won't be checked. While this is sufficient for reclaim-driven aging, +where the goal is to select a reasonably cold page, the access can be +missed when aging proactively for workingset estimation of a node/memcg. + +In more detail, get_pfn_folio returns NULL if the folio's nid != node +under scanning, so the page table walk skips processing of said pte. Now +the pmd_young flag on this pmd is cleared, and if none of the pte's are +accessed before another scan occurs on the folio's node, the pmd_young +check fails and the pte accessed bit is skipped. + +Since force_scan disables various other optimizations, we check force_scan +to ignore the non-leaf pmd accessed bit. + +Link: https://lkml.kernel.org/r/20240813163759.742675-1-yuanchu@google.com +Signed-off-by: Yuanchu Xie +Acked-by: Yu Zhao +Cc: "Huang, Ying" +Cc: Lance Yang +Signed-off-by: Andrew Morton +Stable-dep-of: ddd6d8e975b1 ("mm: multi-gen LRU: remove MM_LEAF_OLD and MM_NONLEAF_TOTAL stats") +Signed-off-by: Sasha Levin +--- + mm/vmscan.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/mm/vmscan.c b/mm/vmscan.c +index 128f307da6eea..b1f88638c5ab4 100644 +--- a/mm/vmscan.c ++++ b/mm/vmscan.c +@@ -3456,7 +3456,7 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long addr, struct vm_area + goto next; + + if (!pmd_trans_huge(pmd[i])) { +- if (should_clear_pmd_young()) ++ if (!walk->force_scan && should_clear_pmd_young()) + pmdp_test_and_clear_young(vma, addr, pmd + i); + goto next; + } +@@ -3543,7 +3543,7 @@ static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end, + + walk->mm_stats[MM_NONLEAF_TOTAL]++; + +- if (should_clear_pmd_young()) { ++ if (!walk->force_scan && should_clear_pmd_young()) { + if (!pmd_young(val)) + continue; + +-- +2.43.0 + diff --git a/queue-6.11/mm-multi-gen-lru-remove-mm_leaf_old-and-mm_nonleaf_t.patch b/queue-6.11/mm-multi-gen-lru-remove-mm_leaf_old-and-mm_nonleaf_t.patch new file mode 100644 index 00000000000..f75c6d69862 --- /dev/null +++ b/queue-6.11/mm-multi-gen-lru-remove-mm_leaf_old-and-mm_nonleaf_t.patch @@ -0,0 +1,153 @@ +From fd4ecc890b00d30a50f93c6fa9e07fdf95eddb1b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 19 Oct 2024 01:29:38 +0000 +Subject: mm: multi-gen LRU: remove MM_LEAF_OLD and MM_NONLEAF_TOTAL stats + +From: Yu Zhao + +[ Upstream commit ddd6d8e975b171ea3f63a011a75820883ff0d479 ] + +Patch series "mm: multi-gen LRU: Have secondary MMUs participate in +MM_WALK". + +Today, the MM_WALK capability causes MGLRU to clear the young bit from +PMDs and PTEs during the page table walk before eviction, but MGLRU does +not call the clear_young() MMU notifier in this case. By not calling this +notifier, the MM walk takes less time/CPU, but it causes pages that are +accessed mostly through KVM / secondary MMUs to appear younger than they +should be. + +We do call the clear_young() notifier today, but only when attempting to +evict the page, so we end up clearing young/accessed information less +frequently for secondary MMUs than for mm PTEs, and therefore they appear +younger and are less likely to be evicted. Therefore, memory that is +*not* being accessed mostly by KVM will be evicted *more* frequently, +worsening performance. + +ChromeOS observed a tab-open latency regression when enabling MGLRU with a +setup that involved running a VM: + + Tab-open latency histogram (ms) +Version p50 mean p95 p99 max +base 1315 1198 2347 3454 10319 +mglru 2559 1311 7399 12060 43758 +fix 1119 926 2470 4211 6947 + +This series replaces the final non-selftest patchs from this series[1], +which introduced a similar change (and a new MMU notifier) with KVM +optimizations. I'll send a separate series (to Sean and Paolo) for the +KVM optimizations. + +This series also makes proactive reclaim with MGLRU possible for KVM +memory. I have verified that this functions correctly with the selftest +from [1], but given that that test is a KVM selftest, I'll send it with +the rest of the KVM optimizations later. Andrew, let me know if you'd +like to take the test now anyway. + +[1]: https://lore.kernel.org/linux-mm/20240926013506.860253-18-jthoughton@google.com/ + +This patch (of 2): + +The removed stats, MM_LEAF_OLD and MM_NONLEAF_TOTAL, are not very helpful +and become more complicated to properly compute when adding +test/clear_young() notifiers in MGLRU's mm walk. + +Link: https://lkml.kernel.org/r/20241019012940.3656292-1-jthoughton@google.com +Link: https://lkml.kernel.org/r/20241019012940.3656292-2-jthoughton@google.com +Fixes: bd74fdaea146 ("mm: multi-gen LRU: support page table walks") +Signed-off-by: Yu Zhao +Signed-off-by: James Houghton +Cc: Axel Rasmussen +Cc: David Matlack +Cc: David Rientjes +Cc: David Stevens +Cc: Oliver Upton +Cc: Paolo Bonzini +Cc: Sean Christopherson +Cc: Wei Xu +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Sasha Levin +--- + include/linux/mmzone.h | 2 -- + mm/vmscan.c | 14 +++++--------- + 2 files changed, 5 insertions(+), 11 deletions(-) + +diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h +index 1dc6248feb832..5f44d24ed9ffe 100644 +--- a/include/linux/mmzone.h ++++ b/include/linux/mmzone.h +@@ -458,9 +458,7 @@ struct lru_gen_folio { + + enum { + MM_LEAF_TOTAL, /* total leaf entries */ +- MM_LEAF_OLD, /* old leaf entries */ + MM_LEAF_YOUNG, /* young leaf entries */ +- MM_NONLEAF_TOTAL, /* total non-leaf entries */ + MM_NONLEAF_FOUND, /* non-leaf entries found in Bloom filters */ + MM_NONLEAF_ADDED, /* non-leaf entries added to Bloom filters */ + NR_MM_STATS +diff --git a/mm/vmscan.c b/mm/vmscan.c +index b1f88638c5ab4..c6d9f5f4f6002 100644 +--- a/mm/vmscan.c ++++ b/mm/vmscan.c +@@ -3376,7 +3376,6 @@ static bool walk_pte_range(pmd_t *pmd, unsigned long start, unsigned long end, + continue; + + if (!pte_young(ptent)) { +- walk->mm_stats[MM_LEAF_OLD]++; + continue; + } + +@@ -3529,7 +3528,6 @@ static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end, + walk->mm_stats[MM_LEAF_TOTAL]++; + + if (!pmd_young(val)) { +- walk->mm_stats[MM_LEAF_OLD]++; + continue; + } + +@@ -3541,8 +3539,6 @@ static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end, + continue; + } + +- walk->mm_stats[MM_NONLEAF_TOTAL]++; +- + if (!walk->force_scan && should_clear_pmd_young()) { + if (!pmd_young(val)) + continue; +@@ -5231,11 +5227,11 @@ static void lru_gen_seq_show_full(struct seq_file *m, struct lruvec *lruvec, + for (tier = 0; tier < MAX_NR_TIERS; tier++) { + seq_printf(m, " %10d", tier); + for (type = 0; type < ANON_AND_FILE; type++) { +- const char *s = " "; ++ const char *s = "xxx"; + unsigned long n[3] = {}; + + if (seq == max_seq) { +- s = "RT "; ++ s = "RTx"; + n[0] = READ_ONCE(lrugen->avg_refaulted[type][tier]); + n[1] = READ_ONCE(lrugen->avg_total[type][tier]); + } else if (seq == min_seq[type] || NR_HIST_GENS > 1) { +@@ -5257,14 +5253,14 @@ static void lru_gen_seq_show_full(struct seq_file *m, struct lruvec *lruvec, + + seq_puts(m, " "); + for (i = 0; i < NR_MM_STATS; i++) { +- const char *s = " "; ++ const char *s = "xxxx"; + unsigned long n = 0; + + if (seq == max_seq && NR_HIST_GENS == 1) { +- s = "LOYNFA"; ++ s = "TYFA"; + n = READ_ONCE(mm_state->stats[hist][i]); + } else if (seq != max_seq && NR_HIST_GENS > 1) { +- s = "loynfa"; ++ s = "tyfa"; + n = READ_ONCE(mm_state->stats[hist][i]); + } + +-- +2.43.0 + diff --git a/queue-6.11/mm-multi-gen-lru-use-ptep-pmdp-_clear_young_notify.patch b/queue-6.11/mm-multi-gen-lru-use-ptep-pmdp-_clear_young_notify.patch new file mode 100644 index 00000000000..67d6071fc49 --- /dev/null +++ b/queue-6.11/mm-multi-gen-lru-use-ptep-pmdp-_clear_young_notify.patch @@ -0,0 +1,329 @@ +From 802dda2dc6f15bd992a84ed1e03135b99cc3d808 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 19 Oct 2024 01:29:39 +0000 +Subject: mm: multi-gen LRU: use {ptep,pmdp}_clear_young_notify() + +From: Yu Zhao + +[ Upstream commit 1d4832becdc2cdb2cffe2a6050c9d9fd8ff1c58c ] + +When the MM_WALK capability is enabled, memory that is mostly accessed by +a VM appears younger than it really is, therefore this memory will be less +likely to be evicted. Therefore, the presence of a running VM can +significantly increase swap-outs for non-VM memory, regressing the +performance for the rest of the system. + +Fix this regression by always calling {ptep,pmdp}_clear_young_notify() +whenever we clear the young bits on PMDs/PTEs. + +[jthoughton@google.com: fix link-time error] +Link: https://lkml.kernel.org/r/20241019012940.3656292-3-jthoughton@google.com +Fixes: bd74fdaea146 ("mm: multi-gen LRU: support page table walks") +Signed-off-by: Yu Zhao +Signed-off-by: James Houghton +Reported-by: David Stevens +Cc: Axel Rasmussen +Cc: David Matlack +Cc: David Rientjes +Cc: Oliver Upton +Cc: Paolo Bonzini +Cc: Sean Christopherson +Cc: Wei Xu +Cc: +Cc: kernel test robot +Signed-off-by: Andrew Morton +Signed-off-by: Sasha Levin +--- + include/linux/mmzone.h | 5 ++- + mm/rmap.c | 9 ++--- + mm/vmscan.c | 88 +++++++++++++++++++++++------------------- + 3 files changed, 55 insertions(+), 47 deletions(-) + +diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h +index 5f44d24ed9ffe..fd04c8e942250 100644 +--- a/include/linux/mmzone.h ++++ b/include/linux/mmzone.h +@@ -555,7 +555,7 @@ struct lru_gen_memcg { + + void lru_gen_init_pgdat(struct pglist_data *pgdat); + void lru_gen_init_lruvec(struct lruvec *lruvec); +-void lru_gen_look_around(struct page_vma_mapped_walk *pvmw); ++bool lru_gen_look_around(struct page_vma_mapped_walk *pvmw); + + void lru_gen_init_memcg(struct mem_cgroup *memcg); + void lru_gen_exit_memcg(struct mem_cgroup *memcg); +@@ -574,8 +574,9 @@ static inline void lru_gen_init_lruvec(struct lruvec *lruvec) + { + } + +-static inline void lru_gen_look_around(struct page_vma_mapped_walk *pvmw) ++static inline bool lru_gen_look_around(struct page_vma_mapped_walk *pvmw) + { ++ return false; + } + + static inline void lru_gen_init_memcg(struct mem_cgroup *memcg) +diff --git a/mm/rmap.c b/mm/rmap.c +index 2630bde38640c..3d89847f01dad 100644 +--- a/mm/rmap.c ++++ b/mm/rmap.c +@@ -885,13 +885,10 @@ static bool folio_referenced_one(struct folio *folio, + return false; + } + +- if (pvmw.pte) { +- if (lru_gen_enabled() && +- pte_young(ptep_get(pvmw.pte))) { +- lru_gen_look_around(&pvmw); ++ if (lru_gen_enabled() && pvmw.pte) { ++ if (lru_gen_look_around(&pvmw)) + referenced++; +- } +- ++ } else if (pvmw.pte) { + if (ptep_clear_flush_young_notify(vma, address, + pvmw.pte)) + referenced++; +diff --git a/mm/vmscan.c b/mm/vmscan.c +index a2ad17092abdf..f5bcd08527ae0 100644 +--- a/mm/vmscan.c ++++ b/mm/vmscan.c +@@ -56,6 +56,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -3276,7 +3277,8 @@ static bool get_next_vma(unsigned long mask, unsigned long size, struct mm_walk + return false; + } + +-static unsigned long get_pte_pfn(pte_t pte, struct vm_area_struct *vma, unsigned long addr) ++static unsigned long get_pte_pfn(pte_t pte, struct vm_area_struct *vma, unsigned long addr, ++ struct pglist_data *pgdat) + { + unsigned long pfn = pte_pfn(pte); + +@@ -3288,13 +3290,20 @@ static unsigned long get_pte_pfn(pte_t pte, struct vm_area_struct *vma, unsigned + if (WARN_ON_ONCE(pte_devmap(pte) || pte_special(pte))) + return -1; + ++ if (!pte_young(pte) && !mm_has_notifiers(vma->vm_mm)) ++ return -1; ++ + if (WARN_ON_ONCE(!pfn_valid(pfn))) + return -1; + ++ if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat)) ++ return -1; ++ + return pfn; + } + +-static unsigned long get_pmd_pfn(pmd_t pmd, struct vm_area_struct *vma, unsigned long addr) ++static unsigned long get_pmd_pfn(pmd_t pmd, struct vm_area_struct *vma, unsigned long addr, ++ struct pglist_data *pgdat) + { + unsigned long pfn = pmd_pfn(pmd); + +@@ -3306,9 +3315,15 @@ static unsigned long get_pmd_pfn(pmd_t pmd, struct vm_area_struct *vma, unsigned + if (WARN_ON_ONCE(pmd_devmap(pmd))) + return -1; + ++ if (!pmd_young(pmd) && !mm_has_notifiers(vma->vm_mm)) ++ return -1; ++ + if (WARN_ON_ONCE(!pfn_valid(pfn))) + return -1; + ++ if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat)) ++ return -1; ++ + return pfn; + } + +@@ -3317,10 +3332,6 @@ static struct folio *get_pfn_folio(unsigned long pfn, struct mem_cgroup *memcg, + { + struct folio *folio; + +- /* try to avoid unnecessary memory loads */ +- if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat)) +- return NULL; +- + folio = pfn_folio(pfn); + if (folio_nid(folio) != pgdat->node_id) + return NULL; +@@ -3376,20 +3387,16 @@ static bool walk_pte_range(pmd_t *pmd, unsigned long start, unsigned long end, + total++; + walk->mm_stats[MM_LEAF_TOTAL]++; + +- pfn = get_pte_pfn(ptent, args->vma, addr); ++ pfn = get_pte_pfn(ptent, args->vma, addr, pgdat); + if (pfn == -1) + continue; + +- if (!pte_young(ptent)) { +- continue; +- } +- + folio = get_pfn_folio(pfn, memcg, pgdat, walk->can_swap); + if (!folio) + continue; + +- if (!ptep_test_and_clear_young(args->vma, addr, pte + i)) +- VM_WARN_ON_ONCE(true); ++ if (!ptep_clear_young_notify(args->vma, addr, pte + i)) ++ continue; + + young++; + walk->mm_stats[MM_LEAF_YOUNG]++; +@@ -3455,21 +3462,25 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long addr, struct vm_area + /* don't round down the first address */ + addr = i ? (*first & PMD_MASK) + i * PMD_SIZE : *first; + +- pfn = get_pmd_pfn(pmd[i], vma, addr); +- if (pfn == -1) ++ if (!pmd_present(pmd[i])) + goto next; + + if (!pmd_trans_huge(pmd[i])) { +- if (!walk->force_scan && should_clear_pmd_young()) ++ if (!walk->force_scan && should_clear_pmd_young() && ++ !mm_has_notifiers(args->mm)) + pmdp_test_and_clear_young(vma, addr, pmd + i); + goto next; + } + ++ pfn = get_pmd_pfn(pmd[i], vma, addr, pgdat); ++ if (pfn == -1) ++ goto next; ++ + folio = get_pfn_folio(pfn, memcg, pgdat, walk->can_swap); + if (!folio) + goto next; + +- if (!pmdp_test_and_clear_young(vma, addr, pmd + i)) ++ if (!pmdp_clear_young_notify(vma, addr, pmd + i)) + goto next; + + walk->mm_stats[MM_LEAF_YOUNG]++; +@@ -3527,24 +3538,18 @@ static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end, + } + + if (pmd_trans_huge(val)) { +- unsigned long pfn = pmd_pfn(val); + struct pglist_data *pgdat = lruvec_pgdat(walk->lruvec); ++ unsigned long pfn = get_pmd_pfn(val, vma, addr, pgdat); + + walk->mm_stats[MM_LEAF_TOTAL]++; + +- if (!pmd_young(val)) { +- continue; +- } +- +- /* try to avoid unnecessary memory loads */ +- if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat)) +- continue; +- +- walk_pmd_range_locked(pud, addr, vma, args, bitmap, &first); ++ if (pfn != -1) ++ walk_pmd_range_locked(pud, addr, vma, args, bitmap, &first); + continue; + } + +- if (!walk->force_scan && should_clear_pmd_young()) { ++ if (!walk->force_scan && should_clear_pmd_young() && ++ !mm_has_notifiers(args->mm)) { + if (!pmd_young(val)) + continue; + +@@ -4018,13 +4023,13 @@ static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc) + * the PTE table to the Bloom filter. This forms a feedback loop between the + * eviction and the aging. + */ +-void lru_gen_look_around(struct page_vma_mapped_walk *pvmw) ++bool lru_gen_look_around(struct page_vma_mapped_walk *pvmw) + { + int i; + unsigned long start; + unsigned long end; + struct lru_gen_mm_walk *walk; +- int young = 0; ++ int young = 1; + pte_t *pte = pvmw->pte; + unsigned long addr = pvmw->address; + struct vm_area_struct *vma = pvmw->vma; +@@ -4040,12 +4045,15 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw) + lockdep_assert_held(pvmw->ptl); + VM_WARN_ON_ONCE_FOLIO(folio_test_lru(folio), folio); + ++ if (!ptep_clear_young_notify(vma, addr, pte)) ++ return false; ++ + if (spin_is_contended(pvmw->ptl)) +- return; ++ return true; + + /* exclude special VMAs containing anon pages from COW */ + if (vma->vm_flags & VM_SPECIAL) +- return; ++ return true; + + /* avoid taking the LRU lock under the PTL when possible */ + walk = current->reclaim_state ? current->reclaim_state->mm_walk : NULL; +@@ -4053,6 +4061,9 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw) + start = max(addr & PMD_MASK, vma->vm_start); + end = min(addr | ~PMD_MASK, vma->vm_end - 1) + 1; + ++ if (end - start == PAGE_SIZE) ++ return true; ++ + if (end - start > MIN_LRU_BATCH * PAGE_SIZE) { + if (addr - start < MIN_LRU_BATCH * PAGE_SIZE / 2) + end = start + MIN_LRU_BATCH * PAGE_SIZE; +@@ -4066,7 +4077,7 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw) + + /* folio_update_gen() requires stable folio_memcg() */ + if (!mem_cgroup_trylock_pages(memcg)) +- return; ++ return true; + + arch_enter_lazy_mmu_mode(); + +@@ -4076,19 +4087,16 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw) + unsigned long pfn; + pte_t ptent = ptep_get(pte + i); + +- pfn = get_pte_pfn(ptent, vma, addr); ++ pfn = get_pte_pfn(ptent, vma, addr, pgdat); + if (pfn == -1) + continue; + +- if (!pte_young(ptent)) +- continue; +- + folio = get_pfn_folio(pfn, memcg, pgdat, can_swap); + if (!folio) + continue; + +- if (!ptep_test_and_clear_young(vma, addr, pte + i)) +- VM_WARN_ON_ONCE(true); ++ if (!ptep_clear_young_notify(vma, addr, pte + i)) ++ continue; + + young++; + +@@ -4118,6 +4126,8 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw) + /* feedback from rmap walkers to page table walkers */ + if (mm_state && suitable_to_scan(i, young)) + update_bloom_filter(mm_state, max_seq, pvmw->pmd); ++ ++ return true; + } + + /****************************************************************************** +-- +2.43.0 + diff --git a/queue-6.11/mm-page_alloc-let-gfp_atomic-order-0-allocs-access-h.patch b/queue-6.11/mm-page_alloc-let-gfp_atomic-order-0-allocs-access-h.patch new file mode 100644 index 00000000000..991847d6598 --- /dev/null +++ b/queue-6.11/mm-page_alloc-let-gfp_atomic-order-0-allocs-access-h.patch @@ -0,0 +1,88 @@ +From 4006d5c15746fc70b8b7ede4c29128a2be296aae Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 11 Oct 2024 13:07:37 +0100 +Subject: mm/page_alloc: let GFP_ATOMIC order-0 allocs access highatomic + reserves + +From: Matt Fleming + +[ Upstream commit 281dd25c1a018261a04d1b8bf41a0674000bfe38 ] + +Under memory pressure it's possible for GFP_ATOMIC order-0 allocations to +fail even though free pages are available in the highatomic reserves. +GFP_ATOMIC allocations cannot trigger unreserve_highatomic_pageblock() +since it's only run from reclaim. + +Given that such allocations will pass the watermarks in +__zone_watermark_unusable_free(), it makes sense to fallback to highatomic +reserves the same way that ALLOC_OOM can. + +This fixes order-0 page allocation failures observed on Cloudflare's fleet +when handling network packets: + + kswapd1: page allocation failure: order:0, mode:0x820(GFP_ATOMIC), + nodemask=(null),cpuset=/,mems_allowed=0-7 + CPU: 10 PID: 696 Comm: kswapd1 Kdump: loaded Tainted: G O 6.6.43-CUSTOM #1 + Hardware name: MACHINE + Call Trace: + + dump_stack_lvl+0x3c/0x50 + warn_alloc+0x13a/0x1c0 + __alloc_pages_slowpath.constprop.0+0xc9d/0xd10 + __alloc_pages+0x327/0x340 + __napi_alloc_skb+0x16d/0x1f0 + bnxt_rx_page_skb+0x96/0x1b0 [bnxt_en] + bnxt_rx_pkt+0x201/0x15e0 [bnxt_en] + __bnxt_poll_work+0x156/0x2b0 [bnxt_en] + bnxt_poll+0xd9/0x1c0 [bnxt_en] + __napi_poll+0x2b/0x1b0 + bpf_trampoline_6442524138+0x7d/0x1000 + __napi_poll+0x5/0x1b0 + net_rx_action+0x342/0x740 + handle_softirqs+0xcf/0x2b0 + irq_exit_rcu+0x6c/0x90 + sysvec_apic_timer_interrupt+0x72/0x90 + + +[mfleming@cloudflare.com: update comment] + Link: https://lkml.kernel.org/r/20241015125158.3597702-1-matt@readmodwrite.com +Link: https://lkml.kernel.org/r/20241011120737.3300370-1-matt@readmodwrite.com +Link: https://lore.kernel.org/all/CAGis_TWzSu=P7QJmjD58WWiu3zjMTVKSzdOwWE8ORaGytzWJwQ@mail.gmail.com/ +Fixes: 1d91df85f399 ("mm/page_alloc: handle a missing case for memalloc_nocma_{save/restore} APIs") +Signed-off-by: Matt Fleming +Suggested-by: Vlastimil Babka +Reviewed-by: Vlastimil Babka +Cc: Mel Gorman +Cc: Michal Hocko +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Sasha Levin +--- + mm/page_alloc.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/mm/page_alloc.c b/mm/page_alloc.c +index 91ace8ca97e21..ec459522c2934 100644 +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -2874,12 +2874,12 @@ struct page *rmqueue_buddy(struct zone *preferred_zone, struct zone *zone, + page = __rmqueue(zone, order, migratetype, alloc_flags); + + /* +- * If the allocation fails, allow OOM handling access +- * to HIGHATOMIC reserves as failing now is worse than +- * failing a high-order atomic allocation in the +- * future. ++ * If the allocation fails, allow OOM handling and ++ * order-0 (atomic) allocs access to HIGHATOMIC ++ * reserves as failing now is worse than failing a ++ * high-order atomic allocation in the future. + */ +- if (!page && (alloc_flags & ALLOC_OOM)) ++ if (!page && (alloc_flags & (ALLOC_OOM|ALLOC_NON_BLOCK))) + page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC); + + if (!page) { +-- +2.43.0 + diff --git a/queue-6.11/mm-shrink-skip-folio-mapped-by-an-exiting-process.patch b/queue-6.11/mm-shrink-skip-folio-mapped-by-an-exiting-process.patch new file mode 100644 index 00000000000..a2ed2f425ed --- /dev/null +++ b/queue-6.11/mm-shrink-skip-folio-mapped-by-an-exiting-process.patch @@ -0,0 +1,98 @@ +From 145f20a56a59e3098e1c51e0c1c3341de4018532 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 10 Jul 2024 16:36:41 +0800 +Subject: mm: shrink skip folio mapped by an exiting process + +From: Zhiguo Jiang + +[ Upstream commit c495b97624d0c059b0403e26dadb166d69918409 ] + +The releasing process of the non-shared anonymous folio mapped solely by +an exiting process may go through two flows: 1) the anonymous folio is +firstly is swaped-out into swapspace and transformed into a swp_entry in +shrink_folio_list; 2) then the swp_entry is released in the process +exiting flow. This will result in the high cpu load of releasing a +non-shared anonymous folio mapped solely by an exiting process. + +When the low system memory and the exiting process exist at the same time, +it will be likely to happen, because the non-shared anonymous folio mapped +solely by an exiting process may be reclaimed by shrink_folio_list. + +This patch is that shrink skips the non-shared anonymous folio solely +mapped by an exting process and this folio is only released directly in +the process exiting flow, which will save swap-out time and alleviate the +load of the process exiting. + +Barry provided some effectiveness testing in [1]. "I observed that +this patch effectively skipped 6114 folios (either 4KB or 64KB mTHP), +potentially reducing the swap-out by up to 92MB (97,300,480 bytes) +during the process exit. The working set size is 256MB." + +Link: https://lkml.kernel.org/r/20240710083641.546-1-justinjiang@vivo.com +Link: https://lore.kernel.org/linux-mm/20240710033212.36497-1-21cnbao@gmail.com/ [1] +Signed-off-by: Zhiguo Jiang +Acked-by: Barry Song +Cc: David Hildenbrand +Cc: Matthew Wilcox +Signed-off-by: Andrew Morton +Stable-dep-of: 1d4832becdc2 ("mm: multi-gen LRU: use {ptep,pmdp}_clear_young_notify()") +Signed-off-by: Sasha Levin +--- + mm/rmap.c | 15 +++++++++++++++ + mm/vmscan.c | 7 ++++++- + 2 files changed, 21 insertions(+), 1 deletion(-) + +diff --git a/mm/rmap.c b/mm/rmap.c +index 2490e727e2dcb..2630bde38640c 100644 +--- a/mm/rmap.c ++++ b/mm/rmap.c +@@ -75,6 +75,7 @@ + #include + #include + #include ++#include + + #include + +@@ -870,6 +871,20 @@ static bool folio_referenced_one(struct folio *folio, + continue; + } + ++ /* ++ * Skip the non-shared swapbacked folio mapped solely by ++ * the exiting or OOM-reaped process. This avoids redundant ++ * swap-out followed by an immediate unmap. ++ */ ++ if ((!atomic_read(&vma->vm_mm->mm_users) || ++ check_stable_address_space(vma->vm_mm)) && ++ folio_test_anon(folio) && folio_test_swapbacked(folio) && ++ !folio_likely_mapped_shared(folio)) { ++ pra->referenced = -1; ++ page_vma_mapped_walk_done(&pvmw); ++ return false; ++ } ++ + if (pvmw.pte) { + if (lru_gen_enabled() && + pte_young(ptep_get(pvmw.pte))) { +diff --git a/mm/vmscan.c b/mm/vmscan.c +index c6d9f5f4f6002..a2ad17092abdf 100644 +--- a/mm/vmscan.c ++++ b/mm/vmscan.c +@@ -863,7 +863,12 @@ static enum folio_references folio_check_references(struct folio *folio, + if (vm_flags & VM_LOCKED) + return FOLIOREF_ACTIVATE; + +- /* rmap lock contention: rotate */ ++ /* ++ * There are two cases to consider. ++ * 1) Rmap lock contention: rotate. ++ * 2) Skip the non-shared swapbacked folio mapped solely by ++ * the exiting or OOM-reaped process. ++ */ + if (referenced_ptes == -1) + return FOLIOREF_KEEP; + +-- +2.43.0 + diff --git a/queue-6.11/mptcp-init-protect-sched-with-rcu_read_lock.patch b/queue-6.11/mptcp-init-protect-sched-with-rcu_read_lock.patch new file mode 100644 index 00000000000..c657b5f68fe --- /dev/null +++ b/queue-6.11/mptcp-init-protect-sched-with-rcu_read_lock.patch @@ -0,0 +1,79 @@ +From 5ff8dc7ec233f1cad08d9df82748819a7e9a0b6f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 21 Oct 2024 12:25:26 +0200 +Subject: mptcp: init: protect sched with rcu_read_lock + +From: Matthieu Baerts (NGI0) + +[ Upstream commit 3deb12c788c385e17142ce6ec50f769852fcec65 ] + +Enabling CONFIG_PROVE_RCU_LIST with its dependence CONFIG_RCU_EXPERT +creates this splat when an MPTCP socket is created: + + ============================= + WARNING: suspicious RCU usage + 6.12.0-rc2+ #11 Not tainted + ----------------------------- + net/mptcp/sched.c:44 RCU-list traversed in non-reader section!! + + other info that might help us debug this: + + rcu_scheduler_active = 2, debug_locks = 1 + no locks held by mptcp_connect/176. + + stack backtrace: + CPU: 0 UID: 0 PID: 176 Comm: mptcp_connect Not tainted 6.12.0-rc2+ #11 + Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 + Call Trace: + + dump_stack_lvl (lib/dump_stack.c:123) + lockdep_rcu_suspicious (kernel/locking/lockdep.c:6822) + mptcp_sched_find (net/mptcp/sched.c:44 (discriminator 7)) + mptcp_init_sock (net/mptcp/protocol.c:2867 (discriminator 1)) + ? sock_init_data_uid (arch/x86/include/asm/atomic.h:28) + inet_create.part.0.constprop.0 (net/ipv4/af_inet.c:386) + ? __sock_create (include/linux/rcupdate.h:347 (discriminator 1)) + __sock_create (net/socket.c:1576) + __sys_socket (net/socket.c:1671) + ? __pfx___sys_socket (net/socket.c:1712) + ? do_user_addr_fault (arch/x86/mm/fault.c:1419 (discriminator 1)) + __x64_sys_socket (net/socket.c:1728) + do_syscall_64 (arch/x86/entry/common.c:52 (discriminator 1)) + entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130) + +That's because when the socket is initialised, rcu_read_lock() is not +used despite the explicit comment written above the declaration of +mptcp_sched_find() in sched.c. Adding the missing lock/unlock avoids the +warning. + +Fixes: 1730b2b2c5a5 ("mptcp: add sched in mptcp_sock") +Cc: stable@vger.kernel.org +Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/523 +Reviewed-by: Geliang Tang +Signed-off-by: Matthieu Baerts (NGI0) +Reviewed-by: Simon Horman +Link: https://patch.msgid.link/20241021-net-mptcp-sched-lock-v1-1-637759cf061c@kernel.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/mptcp/protocol.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c +index d4b3bc46cdaaf..ec87b36f0d451 100644 +--- a/net/mptcp/protocol.c ++++ b/net/mptcp/protocol.c +@@ -2864,8 +2864,10 @@ static int mptcp_init_sock(struct sock *sk) + if (unlikely(!net->mib.mptcp_statistics) && !mptcp_mib_alloc(net)) + return -ENOMEM; + ++ rcu_read_lock(); + ret = mptcp_init_sched(mptcp_sk(sk), + mptcp_sched_find(mptcp_get_scheduler(net))); ++ rcu_read_unlock(); + if (ret) + return ret; + +-- +2.43.0 + diff --git a/queue-6.11/nvme-re-fix-error-handling-for-io_uring-nvme-passthr.patch b/queue-6.11/nvme-re-fix-error-handling-for-io_uring-nvme-passthr.patch new file mode 100644 index 00000000000..a2cc4916259 --- /dev/null +++ b/queue-6.11/nvme-re-fix-error-handling-for-io_uring-nvme-passthr.patch @@ -0,0 +1,48 @@ +From 5ef1d4567243e77f8f9b35e036672034bc7ae1b7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 28 Oct 2024 13:45:46 -0700 +Subject: nvme: re-fix error-handling for io_uring nvme-passthrough + +From: Keith Busch + +[ Upstream commit 5eed4fb274cd6579f2fb4190b11c4c86c553cd06 ] + +This was previously fixed with commit 1147dd0503564fa0e0348 +("nvme: fix error-handling for io_uring nvme-passthrough"), but the +change was mistakenly undone in a later commit. + +Fixes: d6aacee9255e7f ("nvme: use bio_integrity_map_user") +Cc: stable@vger.kernel.org +Reported-by: Jens Axboe +Reviewed-by: Christoph Hellwig +Reviewed-by: Anuj Gupta +Reviewed-by: Kanchan Joshi +Signed-off-by: Keith Busch +Signed-off-by: Sasha Levin +--- + drivers/nvme/host/ioctl.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c +index 15c93ce07e263..2cb35c4528a93 100644 +--- a/drivers/nvme/host/ioctl.c ++++ b/drivers/nvme/host/ioctl.c +@@ -423,10 +423,13 @@ static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req, + struct io_uring_cmd *ioucmd = req->end_io_data; + struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); + +- if (nvme_req(req)->flags & NVME_REQ_CANCELLED) ++ if (nvme_req(req)->flags & NVME_REQ_CANCELLED) { + pdu->status = -EINTR; +- else ++ } else { + pdu->status = nvme_req(req)->status; ++ if (!pdu->status) ++ pdu->status = blk_status_to_errno(err); ++ } + pdu->result = le64_to_cpu(nvme_req(req)->result.u64); + + /* +-- +2.43.0 + diff --git a/queue-6.11/nvmet-auth-assign-dh_key-to-null-after-kfree_sensiti.patch b/queue-6.11/nvmet-auth-assign-dh_key-to-null-after-kfree_sensiti.patch new file mode 100644 index 00000000000..9f9262c2859 --- /dev/null +++ b/queue-6.11/nvmet-auth-assign-dh_key-to-null-after-kfree_sensiti.patch @@ -0,0 +1,41 @@ +From 24d4b379165fb7dc6b55d71277da5e190dc9243f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 16 Sep 2024 22:41:37 +0500 +Subject: nvmet-auth: assign dh_key to NULL after kfree_sensitive + +From: Vitaliy Shevtsov + +[ Upstream commit d2f551b1f72b4c508ab9298419f6feadc3b5d791 ] + +ctrl->dh_key might be used across multiple calls to nvmet_setup_dhgroup() +for the same controller. So it's better to nullify it after release on +error path in order to avoid double free later in nvmet_destroy_auth(). + +Found by Linux Verification Center (linuxtesting.org) with Svace. + +Fixes: 7a277c37d352 ("nvmet-auth: Diffie-Hellman key exchange support") +Cc: stable@vger.kernel.org +Signed-off-by: Vitaliy Shevtsov +Reviewed-by: Christoph Hellwig +Reviewed-by: Hannes Reinecke +Signed-off-by: Keith Busch +Signed-off-by: Sasha Levin +--- + drivers/nvme/target/auth.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/nvme/target/auth.c b/drivers/nvme/target/auth.c +index 8bc3f431c77f6..8c41a47dfed17 100644 +--- a/drivers/nvme/target/auth.c ++++ b/drivers/nvme/target/auth.c +@@ -103,6 +103,7 @@ int nvmet_setup_dhgroup(struct nvmet_ctrl *ctrl, u8 dhgroup_id) + pr_debug("%s: ctrl %d failed to generate private key, err %d\n", + __func__, ctrl->cntlid, ret); + kfree_sensitive(ctrl->dh_key); ++ ctrl->dh_key = NULL; + return ret; + } + ctrl->dh_keysize = crypto_kpp_maxsize(ctrl->dh_tfm); +-- +2.43.0 + diff --git a/queue-6.11/ocfs2-pass-u64-to-ocfs2_truncate_inline-maybe-overfl.patch b/queue-6.11/ocfs2-pass-u64-to-ocfs2_truncate_inline-maybe-overfl.patch new file mode 100644 index 00000000000..79b075a689a --- /dev/null +++ b/queue-6.11/ocfs2-pass-u64-to-ocfs2_truncate_inline-maybe-overfl.patch @@ -0,0 +1,60 @@ +From dbfb2ead9ec9755ada44cb33ed7a48e82347cd98 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 16 Oct 2024 19:43:47 +0800 +Subject: ocfs2: pass u64 to ocfs2_truncate_inline maybe overflow + +From: Edward Adam Davis + +[ Upstream commit bc0a2f3a73fcdac651fca64df39306d1e5ebe3b0 ] + +Syzbot reported a kernel BUG in ocfs2_truncate_inline. There are two +reasons for this: first, the parameter value passed is greater than +ocfs2_max_inline_data_with_xattr, second, the start and end parameters of +ocfs2_truncate_inline are "unsigned int". + +So, we need to add a sanity check for byte_start and byte_len right before +ocfs2_truncate_inline() in ocfs2_remove_inode_range(), if they are greater +than ocfs2_max_inline_data_with_xattr return -EINVAL. + +Link: https://lkml.kernel.org/r/tencent_D48DB5122ADDAEDDD11918CFB68D93258C07@qq.com +Fixes: 1afc32b95233 ("ocfs2: Write support for inline data") +Signed-off-by: Edward Adam Davis +Reported-by: syzbot+81092778aac03460d6b7@syzkaller.appspotmail.com +Closes: https://syzkaller.appspot.com/bug?extid=81092778aac03460d6b7 +Reviewed-by: Joseph Qi +Cc: Joel Becker +Cc: Joseph Qi +Cc: Mark Fasheh +Cc: Junxiao Bi +Cc: Changwei Ge +Cc: Gang He +Cc: Jun Piao +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Sasha Levin +--- + fs/ocfs2/file.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c +index ccc57038a9779..02d2beb7ddb95 100644 +--- a/fs/ocfs2/file.c ++++ b/fs/ocfs2/file.c +@@ -1783,6 +1783,14 @@ int ocfs2_remove_inode_range(struct inode *inode, + return 0; + + if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { ++ int id_count = ocfs2_max_inline_data_with_xattr(inode->i_sb, di); ++ ++ if (byte_start > id_count || byte_start + byte_len > id_count) { ++ ret = -EINVAL; ++ mlog_errno(ret); ++ goto out; ++ } ++ + ret = ocfs2_truncate_inline(inode, di_bh, byte_start, + byte_start + byte_len, 0); + if (ret) { +-- +2.43.0 + diff --git a/queue-6.11/phy-freescale-imx8m-pcie-do-cmn_rst-just-before-phy-.patch b/queue-6.11/phy-freescale-imx8m-pcie-do-cmn_rst-just-before-phy-.patch new file mode 100644 index 00000000000..8face5dbd7b --- /dev/null +++ b/queue-6.11/phy-freescale-imx8m-pcie-do-cmn_rst-just-before-phy-.patch @@ -0,0 +1,97 @@ +From fe97ed0862384798e6288056ef6aef29d8a3383d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 21 Oct 2024 11:52:41 -0400 +Subject: phy: freescale: imx8m-pcie: Do CMN_RST just before PHY PLL lock check + +From: Richard Zhu + +[ Upstream commit f89263b69731e0144d275fff777ee0dd92069200 ] + +When enable initcall_debug together with higher debug level below. +CONFIG_CONSOLE_LOGLEVEL_DEFAULT=9 +CONFIG_CONSOLE_LOGLEVEL_QUIET=9 +CONFIG_MESSAGE_LOGLEVEL_DEFAULT=7 + +The initialization of i.MX8MP PCIe PHY might be timeout failed randomly. +To fix this issue, adjust the sequence of the resets refer to the power +up sequence listed below. + +i.MX8MP PCIe PHY power up sequence: + /--------------------------------------------- +1.8v supply ---------/ + /--------------------------------------------------- +0.8v supply ---/ + + ---\ /-------------------------------------------------- + X REFCLK Valid +Reference Clock ---/ \-------------------------------------------------- + ------------------------------------------- + | +i_init_restn -------------- + ------------------------------------ + | +i_cmn_rstn --------------------- + ------------------------------- + | +o_pll_lock_done -------------------------- + +Logs: +imx6q-pcie 33800000.pcie: host bridge /soc@0/pcie@33800000 ranges: +imx6q-pcie 33800000.pcie: IO 0x001ff80000..0x001ff8ffff -> 0x0000000000 +imx6q-pcie 33800000.pcie: MEM 0x0018000000..0x001fefffff -> 0x0018000000 +probe of clk_imx8mp_audiomix.reset.0 returned 0 after 1052 usecs +probe of 30e20000.clock-controller returned 0 after 32971 usecs +phy phy-32f00000.pcie-phy.4: phy poweron failed --> -110 +probe of 30e10000.dma-controller returned 0 after 10235 usecs +imx6q-pcie 33800000.pcie: waiting for PHY ready timeout! +dwhdmi-imx 32fd8000.hdmi: Detected HDMI TX controller v2.13a with HDCP (samsung_dw_hdmi_phy2) +imx6q-pcie 33800000.pcie: probe with driver imx6q-pcie failed with error -110 + +Fixes: dce9edff16ee ("phy: freescale: imx8m-pcie: Add i.MX8MP PCIe PHY support") +Cc: stable@vger.kernel.org +Signed-off-by: Richard Zhu +Signed-off-by: Frank Li + +v2 changes: +- Rebase to latest fixes branch of linux-phy git repo. +- Richard's environment have problem and can't sent out patch. So I help +post this fix patch. + +Link: https://lore.kernel.org/r/20241021155241.943665-1-Frank.Li@nxp.com +Signed-off-by: Vinod Koul +Signed-off-by: Sasha Levin +--- + drivers/phy/freescale/phy-fsl-imx8m-pcie.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/drivers/phy/freescale/phy-fsl-imx8m-pcie.c b/drivers/phy/freescale/phy-fsl-imx8m-pcie.c +index 11fcb1867118c..e98361dcdeadf 100644 +--- a/drivers/phy/freescale/phy-fsl-imx8m-pcie.c ++++ b/drivers/phy/freescale/phy-fsl-imx8m-pcie.c +@@ -141,11 +141,6 @@ static int imx8_pcie_phy_power_on(struct phy *phy) + IMX8MM_GPR_PCIE_REF_CLK_PLL); + usleep_range(100, 200); + +- /* Do the PHY common block reset */ +- regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14, +- IMX8MM_GPR_PCIE_CMN_RST, +- IMX8MM_GPR_PCIE_CMN_RST); +- + switch (imx8_phy->drvdata->variant) { + case IMX8MP: + reset_control_deassert(imx8_phy->perst); +@@ -156,6 +151,11 @@ static int imx8_pcie_phy_power_on(struct phy *phy) + break; + } + ++ /* Do the PHY common block reset */ ++ regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14, ++ IMX8MM_GPR_PCIE_CMN_RST, ++ IMX8MM_GPR_PCIE_CMN_RST); ++ + /* Polling to check the phy is ready or not. */ + ret = readl_poll_timeout(imx8_phy->base + IMX8MM_PCIE_PHY_CMN_REG075, + val, val == ANA_PLL_DONE, 10, 20000); +-- +2.43.0 + diff --git a/queue-6.11/posix-cpu-timers-clear-tick_dep_bit_posix_timer-on-c.patch b/queue-6.11/posix-cpu-timers-clear-tick_dep_bit_posix_timer-on-c.patch new file mode 100644 index 00000000000..4ab257ad7bb --- /dev/null +++ b/queue-6.11/posix-cpu-timers-clear-tick_dep_bit_posix_timer-on-c.patch @@ -0,0 +1,92 @@ +From c528ea9e359867bcd3f91efc07cb58749992a3fc Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 25 Oct 2024 18:35:35 -0700 +Subject: posix-cpu-timers: Clear TICK_DEP_BIT_POSIX_TIMER on clone + +From: Benjamin Segall + +[ Upstream commit b5413156bad91dc2995a5c4eab1b05e56914638a ] + +When cloning a new thread, its posix_cputimers are not inherited, and +are cleared by posix_cputimers_init(). However, this does not clear the +tick dependency it creates in tsk->tick_dep_mask, and the handler does +not reach the code to clear the dependency if there were no timers to +begin with. + +Thus if a thread has a cputimer running before clone/fork, all +descendants will prevent nohz_full unless they create a cputimer of +their own. + +Fix this by entirely clearing the tick_dep_mask in copy_process(). +(There is currently no inherited state that needs a tick dependency) + +Process-wide timers do not have this problem because fork does not copy +signal_struct as a baseline, it creates one from scratch. + +Fixes: b78783000d5c ("posix-cpu-timers: Migrate to use new tick dependency mask model") +Signed-off-by: Ben Segall +Signed-off-by: Thomas Gleixner +Reviewed-by: Frederic Weisbecker +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/all/xm26o737bq8o.fsf@google.com +Signed-off-by: Sasha Levin +--- + include/linux/tick.h | 8 ++++++++ + kernel/fork.c | 2 ++ + 2 files changed, 10 insertions(+) + +diff --git a/include/linux/tick.h b/include/linux/tick.h +index 72744638c5b0f..99c9c5a7252aa 100644 +--- a/include/linux/tick.h ++++ b/include/linux/tick.h +@@ -251,12 +251,19 @@ static inline void tick_dep_set_task(struct task_struct *tsk, + if (tick_nohz_full_enabled()) + tick_nohz_dep_set_task(tsk, bit); + } ++ + static inline void tick_dep_clear_task(struct task_struct *tsk, + enum tick_dep_bits bit) + { + if (tick_nohz_full_enabled()) + tick_nohz_dep_clear_task(tsk, bit); + } ++ ++static inline void tick_dep_init_task(struct task_struct *tsk) ++{ ++ atomic_set(&tsk->tick_dep_mask, 0); ++} ++ + static inline void tick_dep_set_signal(struct task_struct *tsk, + enum tick_dep_bits bit) + { +@@ -290,6 +297,7 @@ static inline void tick_dep_set_task(struct task_struct *tsk, + enum tick_dep_bits bit) { } + static inline void tick_dep_clear_task(struct task_struct *tsk, + enum tick_dep_bits bit) { } ++static inline void tick_dep_init_task(struct task_struct *tsk) { } + static inline void tick_dep_set_signal(struct task_struct *tsk, + enum tick_dep_bits bit) { } + static inline void tick_dep_clear_signal(struct signal_struct *signal, +diff --git a/kernel/fork.c b/kernel/fork.c +index 6b97fb2ac4af5..dbf3c5d81df3b 100644 +--- a/kernel/fork.c ++++ b/kernel/fork.c +@@ -104,6 +104,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -2290,6 +2291,7 @@ __latent_entropy struct task_struct *copy_process( + acct_clear_integrals(p); + + posix_cputimers_init(&p->posix_cputimers); ++ tick_dep_init_task(p); + + p->io_context = NULL; + audit_set_context(p, NULL); +-- +2.43.0 + diff --git a/queue-6.11/resource-kexec-walk_system_ram_res_rev-must-retain-r.patch b/queue-6.11/resource-kexec-walk_system_ram_res_rev-must-retain-r.patch new file mode 100644 index 00000000000..2544ea1b657 --- /dev/null +++ b/queue-6.11/resource-kexec-walk_system_ram_res_rev-must-retain-r.patch @@ -0,0 +1,121 @@ +From 3c8fde3e4e19afe65a67f0b0aefa9a5a627ebc85 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 17 Oct 2024 15:03:47 -0400 +Subject: resource,kexec: walk_system_ram_res_rev must retain resource flags +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Gregory Price + +[ Upstream commit b125a0def25a082ae944c9615208bf359abdb61c ] + +walk_system_ram_res_rev() erroneously discards resource flags when passing +the information to the callback. + +This causes systems with IORESOURCE_SYSRAM_DRIVER_MANAGED memory to have +these resources selected during kexec to store kexec buffers if that +memory happens to be at placed above normal system ram. + +This leads to undefined behavior after reboot. If the kexec buffer is +never touched, nothing happens. If the kexec buffer is touched, it could +lead to a crash (like below) or undefined behavior. + +Tested on a system with CXL memory expanders with driver managed memory, +TPM enabled, and CONFIG_IMA_KEXEC=y. Adding printk's showed the flags +were being discarded and as a result the check for +IORESOURCE_SYSRAM_DRIVER_MANAGED passes. + +find_next_iomem_res: name(System RAM (kmem)) + start(10000000000) + end(1034fffffff) + flags(83000200) + +locate_mem_hole_top_down: start(10000000000) end(1034fffffff) flags(0) + +[.] BUG: unable to handle page fault for address: ffff89834ffff000 +[.] #PF: supervisor read access in kernel mode +[.] #PF: error_code(0x0000) - not-present page +[.] PGD c04c8bf067 P4D c04c8bf067 PUD c04c8be067 PMD 0 +[.] Oops: 0000 [#1] SMP +[.] RIP: 0010:ima_restore_measurement_list+0x95/0x4b0 +[.] RSP: 0018:ffffc900000d3a80 EFLAGS: 00010286 +[.] RAX: 0000000000001000 RBX: 0000000000000000 RCX: ffff89834ffff000 +[.] RDX: 0000000000000018 RSI: ffff89834ffff000 RDI: ffff89834ffff018 +[.] RBP: ffffc900000d3ba0 R08: 0000000000000020 R09: ffff888132b8a900 +[.] R10: 4000000000000000 R11: 000000003a616d69 R12: 0000000000000000 +[.] R13: ffffffff8404ac28 R14: 0000000000000000 R15: ffff89834ffff000 +[.] FS: 0000000000000000(0000) GS:ffff893d44640000(0000) knlGS:0000000000000000 +[.] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[.] ata5: SATA link down (SStatus 0 SControl 300) +[.] CR2: ffff89834ffff000 CR3: 000001034d00f001 CR4: 0000000000770ef0 +[.] PKRU: 55555554 +[.] Call Trace: +[.] +[.] ? __die+0x78/0xc0 +[.] ? page_fault_oops+0x2a8/0x3a0 +[.] ? exc_page_fault+0x84/0x130 +[.] ? asm_exc_page_fault+0x22/0x30 +[.] ? ima_restore_measurement_list+0x95/0x4b0 +[.] ? template_desc_init_fields+0x317/0x410 +[.] ? crypto_alloc_tfm_node+0x9c/0xc0 +[.] ? init_ima_lsm+0x30/0x30 +[.] ima_load_kexec_buffer+0x72/0xa0 +[.] ima_init+0x44/0xa0 +[.] __initstub__kmod_ima__373_1201_init_ima7+0x1e/0xb0 +[.] ? init_ima_lsm+0x30/0x30 +[.] do_one_initcall+0xad/0x200 +[.] ? idr_alloc_cyclic+0xaa/0x110 +[.] ? new_slab+0x12c/0x420 +[.] ? new_slab+0x12c/0x420 +[.] ? number+0x12a/0x430 +[.] ? sysvec_apic_timer_interrupt+0xa/0x80 +[.] ? asm_sysvec_apic_timer_interrupt+0x16/0x20 +[.] ? parse_args+0xd4/0x380 +[.] ? parse_args+0x14b/0x380 +[.] kernel_init_freeable+0x1c1/0x2b0 +[.] ? rest_init+0xb0/0xb0 +[.] kernel_init+0x16/0x1a0 +[.] ret_from_fork+0x2f/0x40 +[.] ? rest_init+0xb0/0xb0 +[.] ret_from_fork_asm+0x11/0x20 +[.] + +Link: https://lore.kernel.org/all/20231114091658.228030-1-bhe@redhat.com/ +Link: https://lkml.kernel.org/r/20241017190347.5578-1-gourry@gourry.net +Fixes: 7acf164b259d ("resource: add walk_system_ram_res_rev()") +Signed-off-by: Gregory Price +Reviewed-by: Dan Williams +Acked-by: Baoquan He +Cc: AKASHI Takahiro +Cc: Andy Shevchenko +Cc: Bjorn Helgaas +Cc: "Huang, Ying" +Cc: Ilpo Järvinen +Cc: Mika Westerberg +Cc: Thomas Gleixner +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Sasha Levin +--- + kernel/resource.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +diff --git a/kernel/resource.c b/kernel/resource.c +index 1681ab5012e12..4f3df25176caa 100644 +--- a/kernel/resource.c ++++ b/kernel/resource.c +@@ -460,9 +460,7 @@ int walk_system_ram_res_rev(u64 start, u64 end, void *arg, + rams_size += 16; + } + +- rams[i].start = res.start; +- rams[i++].end = res.end; +- ++ rams[i++] = res; + start = res.end + 1; + } + +-- +2.43.0 + diff --git a/queue-6.11/riscv-efi-set-nx-compat-flag-in-pe-coff-header.patch b/queue-6.11/riscv-efi-set-nx-compat-flag-in-pe-coff-header.patch new file mode 100644 index 00000000000..580d7d50c36 --- /dev/null +++ b/queue-6.11/riscv-efi-set-nx-compat-flag-in-pe-coff-header.patch @@ -0,0 +1,48 @@ +From 5a9b486cb221960cd9a9636cced3d4cae1cb45c7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 29 Sep 2024 16:02:33 +0200 +Subject: riscv: efi: Set NX compat flag in PE/COFF header + +From: Heinrich Schuchardt + +[ Upstream commit d41373a4b910961df5a5e3527d7bde6ad45ca438 ] + +The IMAGE_DLLCHARACTERISTICS_NX_COMPAT informs the firmware that the +EFI binary does not rely on pages that are both executable and +writable. + +The flag is used by some distro versions of GRUB to decide if the EFI +binary may be executed. + +As the Linux kernel neither has RWX sections nor needs RWX pages for +relocation we should set the flag. + +Cc: Ard Biesheuvel +Cc: +Signed-off-by: Heinrich Schuchardt +Reviewed-by: Emil Renner Berthing +Fixes: cb7d2dd5612a ("RISC-V: Add PE/COFF header for EFI stub") +Acked-by: Ard Biesheuvel +Link: https://lore.kernel.org/r/20240929140233.211800-1-heinrich.schuchardt@canonical.com +Signed-off-by: Palmer Dabbelt +Signed-off-by: Sasha Levin +--- + arch/riscv/kernel/efi-header.S | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/riscv/kernel/efi-header.S b/arch/riscv/kernel/efi-header.S +index 515b2dfbca75b..c5f17c2710b58 100644 +--- a/arch/riscv/kernel/efi-header.S ++++ b/arch/riscv/kernel/efi-header.S +@@ -64,7 +64,7 @@ extra_header_fields: + .long efi_header_end - _start // SizeOfHeaders + .long 0 // CheckSum + .short IMAGE_SUBSYSTEM_EFI_APPLICATION // Subsystem +- .short 0 // DllCharacteristics ++ .short IMAGE_DLL_CHARACTERISTICS_NX_COMPAT // DllCharacteristics + .quad 0 // SizeOfStackReserve + .quad 0 // SizeOfStackCommit + .quad 0 // SizeOfHeapReserve +-- +2.43.0 + diff --git a/queue-6.11/riscv-prevent-a-bad-reference-count-on-cpu-nodes.patch b/queue-6.11/riscv-prevent-a-bad-reference-count-on-cpu-nodes.patch new file mode 100644 index 00000000000..7a71274e0d8 --- /dev/null +++ b/queue-6.11/riscv-prevent-a-bad-reference-count-on-cpu-nodes.patch @@ -0,0 +1,66 @@ +From 4431ae6ccacaf738fc8dca807771a7725985ae15 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 13 Sep 2024 10:00:52 +0200 +Subject: riscv: Prevent a bad reference count on CPU nodes +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Miquel Sabaté Solà + +[ Upstream commit 37233169a6ea912020c572f870075a63293b786a ] + +When populating cache leaves we previously fetched the CPU device node +at the very beginning. But when ACPI is enabled we go through a +specific branch which returns early and does not call 'of_node_put' for +the node that was acquired. + +Since we are not using a CPU device node for the ACPI code anyways, we +can simply move the initialization of it just passed the ACPI block, and +we are guaranteed to have an 'of_node_put' call for the acquired node. +This prevents a bad reference count of the CPU device node. + +Moreover, the previous function did not check for errors when acquiring +the device node, so a return -ENOENT has been added for that case. + +Signed-off-by: Miquel Sabaté Solà +Reviewed-by: Sudeep Holla +Reviewed-by: Sunil V L +Reviewed-by: Alexandre Ghiti +Fixes: 604f32ea6909 ("riscv: cacheinfo: initialize cacheinfo's level and type from ACPI PPTT") +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20240913080053.36636-1-mikisabate@gmail.com +Signed-off-by: Palmer Dabbelt +Signed-off-by: Sasha Levin +--- + arch/riscv/kernel/cacheinfo.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/arch/riscv/kernel/cacheinfo.c b/arch/riscv/kernel/cacheinfo.c +index d6c108c50cba9..d32dfdba083e1 100644 +--- a/arch/riscv/kernel/cacheinfo.c ++++ b/arch/riscv/kernel/cacheinfo.c +@@ -75,8 +75,7 @@ int populate_cache_leaves(unsigned int cpu) + { + struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); + struct cacheinfo *this_leaf = this_cpu_ci->info_list; +- struct device_node *np = of_cpu_device_node_get(cpu); +- struct device_node *prev = NULL; ++ struct device_node *np, *prev; + int levels = 1, level = 1; + + if (!acpi_disabled) { +@@ -100,6 +99,10 @@ int populate_cache_leaves(unsigned int cpu) + return 0; + } + ++ np = of_cpu_device_node_get(cpu); ++ if (!np) ++ return -ENOENT; ++ + if (of_property_read_bool(np, "cache-size")) + ci_leaf_init(this_leaf++, CACHE_TYPE_UNIFIED, level); + if (of_property_read_bool(np, "i-cache-size")) +-- +2.43.0 + diff --git a/queue-6.11/riscv-remove-duplicated-get_rm.patch b/queue-6.11/riscv-remove-duplicated-get_rm.patch new file mode 100644 index 00000000000..1130c9fe883 --- /dev/null +++ b/queue-6.11/riscv-remove-duplicated-get_rm.patch @@ -0,0 +1,38 @@ +From b8bca16f3281116851f7e9b59e2a8c3eefac5af5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 8 Oct 2024 17:41:39 +0800 +Subject: riscv: Remove duplicated GET_RM + +From: Chunyan Zhang + +[ Upstream commit 164f66de6bb6ef454893f193c898dc8f1da6d18b ] + +The macro GET_RM defined twice in this file, one can be removed. + +Reviewed-by: Alexandre Ghiti +Signed-off-by: Chunyan Zhang +Fixes: 956d705dd279 ("riscv: Unaligned load/store handling for M_MODE") +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20241008094141.549248-3-zhangchunyan@iscas.ac.cn +Signed-off-by: Palmer Dabbelt +Signed-off-by: Sasha Levin +--- + arch/riscv/kernel/traps_misaligned.c | 2 -- + 1 file changed, 2 deletions(-) + +diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c +index d4fd8af7aaf5a..1b9867136b610 100644 +--- a/arch/riscv/kernel/traps_misaligned.c ++++ b/arch/riscv/kernel/traps_misaligned.c +@@ -136,8 +136,6 @@ + #define REG_PTR(insn, pos, regs) \ + (ulong *)((ulong)(regs) + REG_OFFSET(insn, pos)) + +-#define GET_RM(insn) (((insn) >> 12) & 7) +- + #define GET_RS1(insn, regs) (*REG_PTR(insn, SH_RS1, regs)) + #define GET_RS2(insn, regs) (*REG_PTR(insn, SH_RS2, regs)) + #define GET_RS1S(insn, regs) (*REG_PTR(RVC_RS1S(insn), 0, regs)) +-- +2.43.0 + diff --git a/queue-6.11/riscv-remove-unused-generating_asm_offsets.patch b/queue-6.11/riscv-remove-unused-generating_asm_offsets.patch new file mode 100644 index 00000000000..7adb676435a --- /dev/null +++ b/queue-6.11/riscv-remove-unused-generating_asm_offsets.patch @@ -0,0 +1,44 @@ +From a7393a0a42f66bb6aca48a9e82d700cdf30aac48 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 8 Oct 2024 17:41:38 +0800 +Subject: riscv: Remove unused GENERATING_ASM_OFFSETS + +From: Chunyan Zhang + +[ Upstream commit 46d4e5ac6f2f801f97bcd0ec82365969197dc9b1 ] + +The macro is not used in the current version of kernel, it looks like +can be removed to avoid a build warning: + +../arch/riscv/kernel/asm-offsets.c: At top level: +../arch/riscv/kernel/asm-offsets.c:7: warning: macro "GENERATING_ASM_OFFSETS" is not used [-Wunused-macros] + 7 | #define GENERATING_ASM_OFFSETS + +Fixes: 9639a44394b9 ("RISC-V: Provide a cleaner raw_smp_processor_id()") +Cc: stable@vger.kernel.org +Reviewed-by: Alexandre Ghiti +Tested-by: Alexandre Ghiti +Signed-off-by: Chunyan Zhang +Link: https://lore.kernel.org/r/20241008094141.549248-2-zhangchunyan@iscas.ac.cn +Signed-off-by: Palmer Dabbelt +Signed-off-by: Sasha Levin +--- + arch/riscv/kernel/asm-offsets.c | 2 -- + 1 file changed, 2 deletions(-) + +diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c +index b09ca5f944f77..cb09f0c4f62c7 100644 +--- a/arch/riscv/kernel/asm-offsets.c ++++ b/arch/riscv/kernel/asm-offsets.c +@@ -4,8 +4,6 @@ + * Copyright (C) 2017 SiFive + */ + +-#define GENERATING_ASM_OFFSETS +- + #include + #include + #include +-- +2.43.0 + diff --git a/queue-6.11/riscv-use-u-to-format-the-output-of-cpu.patch b/queue-6.11/riscv-use-u-to-format-the-output-of-cpu.patch new file mode 100644 index 00000000000..75c298ca674 --- /dev/null +++ b/queue-6.11/riscv-use-u-to-format-the-output-of-cpu.patch @@ -0,0 +1,43 @@ +From 139afa4d70e25bea309c492ae9f4d68364ac9029 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 17 Oct 2024 11:20:10 +0800 +Subject: riscv: Use '%u' to format the output of 'cpu' + +From: WangYuli + +[ Upstream commit e0872ab72630dada3ae055bfa410bf463ff1d1e0 ] + +'cpu' is an unsigned integer, so its conversion specifier should +be %u, not %d. + +Suggested-by: Wentao Guan +Suggested-by: Maciej W. Rozycki +Link: https://lore.kernel.org/all/alpine.DEB.2.21.2409122309090.40372@angie.orcam.me.uk/ +Signed-off-by: WangYuli +Reviewed-by: Charlie Jenkins +Tested-by: Charlie Jenkins +Fixes: f1e58583b9c7 ("RISC-V: Support cpu hotplug") +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/4C127DEECDA287C8+20241017032010.96772-1-wangyuli@uniontech.com +Signed-off-by: Palmer Dabbelt +Signed-off-by: Sasha Levin +--- + arch/riscv/kernel/cpu-hotplug.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/riscv/kernel/cpu-hotplug.c b/arch/riscv/kernel/cpu-hotplug.c +index 28b58fc5ad199..a1e38ecfc8be2 100644 +--- a/arch/riscv/kernel/cpu-hotplug.c ++++ b/arch/riscv/kernel/cpu-hotplug.c +@@ -58,7 +58,7 @@ void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu) + if (cpu_ops->cpu_is_stopped) + ret = cpu_ops->cpu_is_stopped(cpu); + if (ret) +- pr_warn("CPU%d may not have stopped: %d\n", cpu, ret); ++ pr_warn("CPU%u may not have stopped: %d\n", cpu, ret); + } + + /* +-- +2.43.0 + diff --git a/queue-6.11/riscv-vdso-prevent-the-compiler-from-inserting-calls.patch b/queue-6.11/riscv-vdso-prevent-the-compiler-from-inserting-calls.patch new file mode 100644 index 00000000000..1a0b1ef3811 --- /dev/null +++ b/queue-6.11/riscv-vdso-prevent-the-compiler-from-inserting-calls.patch @@ -0,0 +1,40 @@ +From 8db7792ec8fbce816af7a6506cc3fe226f54365b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 16 Oct 2024 10:36:24 +0200 +Subject: riscv: vdso: Prevent the compiler from inserting calls to memset() + +From: Alexandre Ghiti + +[ Upstream commit bf40167d54d55d4b54d0103713d86a8638fb9290 ] + +The compiler is smart enough to insert a call to memset() in +riscv_vdso_get_cpus(), which generates a dynamic relocation. + +So prevent this by using -fno-builtin option. + +Fixes: e2c0cdfba7f6 ("RISC-V: User-facing API") +Cc: stable@vger.kernel.org +Signed-off-by: Alexandre Ghiti +Reviewed-by: Guo Ren +Link: https://lore.kernel.org/r/20241016083625.136311-2-alexghiti@rivosinc.com +Signed-off-by: Palmer Dabbelt +Signed-off-by: Sasha Levin +--- + arch/riscv/kernel/vdso/Makefile | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile +index f7ef8ad9b550d..54a7fec25d5f8 100644 +--- a/arch/riscv/kernel/vdso/Makefile ++++ b/arch/riscv/kernel/vdso/Makefile +@@ -18,6 +18,7 @@ obj-vdso = $(patsubst %, %.o, $(vdso-syms)) note.o + + ccflags-y := -fno-stack-protector + ccflags-y += -DDISABLE_BRANCH_PROFILING ++ccflags-y += -fno-builtin + + ifneq ($(c-gettimeofday-y),) + CFLAGS_vgettimeofday.o += -fPIC -include $(c-gettimeofday-y) +-- +2.43.0 + diff --git a/queue-6.11/sched-numa-fix-the-potential-null-pointer-dereferenc.patch b/queue-6.11/sched-numa-fix-the-potential-null-pointer-dereferenc.patch new file mode 100644 index 00000000000..69145252475 --- /dev/null +++ b/queue-6.11/sched-numa-fix-the-potential-null-pointer-dereferenc.patch @@ -0,0 +1,90 @@ +From cbf6ae0a8b90bb28788732c8ad9bd6784ef1c7c3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 25 Oct 2024 10:22:08 +0800 +Subject: sched/numa: Fix the potential null pointer dereference in + task_numa_work() + +From: Shawn Wang + +[ Upstream commit 9c70b2a33cd2aa6a5a59c5523ef053bd42265209 ] + +When running stress-ng-vm-segv test, we found a null pointer dereference +error in task_numa_work(). Here is the backtrace: + + [323676.066985] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000020 + ...... + [323676.067108] CPU: 35 PID: 2694524 Comm: stress-ng-vm-se + ...... + [323676.067113] pstate: 23401009 (nzCv daif +PAN -UAO +TCO +DIT +SSBS BTYPE=--) + [323676.067115] pc : vma_migratable+0x1c/0xd0 + [323676.067122] lr : task_numa_work+0x1ec/0x4e0 + [323676.067127] sp : ffff8000ada73d20 + [323676.067128] x29: ffff8000ada73d20 x28: 0000000000000000 x27: 000000003e89f010 + [323676.067130] x26: 0000000000080000 x25: ffff800081b5c0d8 x24: ffff800081b27000 + [323676.067133] x23: 0000000000010000 x22: 0000000104d18cc0 x21: ffff0009f7158000 + [323676.067135] x20: 0000000000000000 x19: 0000000000000000 x18: ffff8000ada73db8 + [323676.067138] x17: 0001400000000000 x16: ffff800080df40b0 x15: 0000000000000035 + [323676.067140] x14: ffff8000ada73cc8 x13: 1fffe0017cc72001 x12: ffff8000ada73cc8 + [323676.067142] x11: ffff80008001160c x10: ffff000be639000c x9 : ffff8000800f4ba4 + [323676.067145] x8 : ffff000810375000 x7 : ffff8000ada73974 x6 : 0000000000000001 + [323676.067147] x5 : 0068000b33e26707 x4 : 0000000000000001 x3 : ffff0009f7158000 + [323676.067149] x2 : 0000000000000041 x1 : 0000000000004400 x0 : 0000000000000000 + [323676.067152] Call trace: + [323676.067153] vma_migratable+0x1c/0xd0 + [323676.067155] task_numa_work+0x1ec/0x4e0 + [323676.067157] task_work_run+0x78/0xd8 + [323676.067161] do_notify_resume+0x1ec/0x290 + [323676.067163] el0_svc+0x150/0x160 + [323676.067167] el0t_64_sync_handler+0xf8/0x128 + [323676.067170] el0t_64_sync+0x17c/0x180 + [323676.067173] Code: d2888001 910003fd f9000bf3 aa0003f3 (f9401000) + [323676.067177] SMP: stopping secondary CPUs + [323676.070184] Starting crashdump kernel... + +stress-ng-vm-segv in stress-ng is used to stress test the SIGSEGV error +handling function of the system, which tries to cause a SIGSEGV error on +return from unmapping the whole address space of the child process. + +Normally this program will not cause kernel crashes. But before the +munmap system call returns to user mode, a potential task_numa_work() +for numa balancing could be added and executed. In this scenario, since the +child process has no vma after munmap, the vma_next() in task_numa_work() +will return a null pointer even if the vma iterator restarts from 0. + +Recheck the vma pointer before dereferencing it in task_numa_work(). + +Fixes: 214dbc428137 ("sched: convert to vma iterator") +Signed-off-by: Shawn Wang +Signed-off-by: Peter Zijlstra (Intel) +Cc: stable@vger.kernel.org # v6.2+ +Link: https://lkml.kernel.org/r/20241025022208.125527-1-shawnwang@linux.alibaba.com +Signed-off-by: Sasha Levin +--- + kernel/sched/fair.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index 1d2cbdb162a67..425348b8d9eb3 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -3289,7 +3289,7 @@ static void task_numa_work(struct callback_head *work) + vma = vma_next(&vmi); + } + +- do { ++ for (; vma; vma = vma_next(&vmi)) { + if (!vma_migratable(vma) || !vma_policy_mof(vma) || + is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_MIXEDMAP)) { + trace_sched_skip_vma_numa(mm, vma, NUMAB_SKIP_UNSUITABLE); +@@ -3411,7 +3411,7 @@ static void task_numa_work(struct callback_head *work) + */ + if (vma_pids_forced) + break; +- } for_each_vma(vmi, vma); ++ } + + /* + * If no VMAs are remaining and VMAs were skipped due to the PID +-- +2.43.0 + diff --git a/queue-6.11/scsi-ufs-core-fix-another-deadlock-during-rtc-update.patch b/queue-6.11/scsi-ufs-core-fix-another-deadlock-during-rtc-update.patch new file mode 100644 index 00000000000..41a2f7e5181 --- /dev/null +++ b/queue-6.11/scsi-ufs-core-fix-another-deadlock-during-rtc-update.patch @@ -0,0 +1,43 @@ +From 571a3552681bc8f34155b1e4b0a905eaf0f43359 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 24 Oct 2024 09:54:53 +0800 +Subject: scsi: ufs: core: Fix another deadlock during RTC update + +From: Peter Wang + +[ Upstream commit cb7e509c4e0197f63717fee54fb41c4990ba8d3a ] + +If ufshcd_rtc_work calls ufshcd_rpm_put_sync() and the pm's usage_count +is 0, we will enter the runtime suspend callback. However, the runtime +suspend callback will wait to flush ufshcd_rtc_work, causing a deadlock. + +Replace ufshcd_rpm_put_sync() with ufshcd_rpm_put() to avoid the +deadlock. + +Fixes: 6bf999e0eb41 ("scsi: ufs: core: Add UFS RTC support") +Cc: stable@vger.kernel.org #6.11.x +Signed-off-by: Peter Wang +Link: https://lore.kernel.org/r/20241024015453.21684-1-peter.wang@mediatek.com +Reviewed-by: Bart Van Assche +Signed-off-by: Martin K. Petersen +Signed-off-by: Sasha Levin +--- + drivers/ufs/core/ufshcd.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c +index 09408642a6efb..83567388a7b58 100644 +--- a/drivers/ufs/core/ufshcd.c ++++ b/drivers/ufs/core/ufshcd.c +@@ -8224,7 +8224,7 @@ static void ufshcd_update_rtc(struct ufs_hba *hba) + + err = ufshcd_query_attr(hba, UPIU_QUERY_OPCODE_WRITE_ATTR, QUERY_ATTR_IDN_SECONDS_PASSED, + 0, 0, &val); +- ufshcd_rpm_put_sync(hba); ++ ufshcd_rpm_put(hba); + + if (err) + dev_err(hba->dev, "%s: Failed to update rtc %d\n", __func__, err); +-- +2.43.0 + diff --git a/queue-6.11/series b/queue-6.11/series index fb277f98db5..ea753d29c01 100644 --- a/queue-6.11/series +++ b/queue-6.11/series @@ -144,3 +144,66 @@ cxl-port-fix-use-after-free-permit-out-of-order-decoder-shutdown.patch cxl-port-fix-cxl-port-initialization-order-when-the-subsystem-is-built-in.patch mmc-sdhci-pci-gli-gl9767-fix-low-power-mode-on-the-set-clock-function.patch mmc-sdhci-pci-gli-gl9767-fix-low-power-mode-in-the-sd-express-process.patch +block-fix-sanity-checks-in-blk_rq_map_user_bvec.patch +cgroup-bpf-use-a-dedicated-workqueue-for-cgroup-bpf-.patch +phy-freescale-imx8m-pcie-do-cmn_rst-just-before-phy-.patch +btrfs-merge-btrfs_orig_bbio_end_io-into-btrfs_bio_en.patch +btrfs-fix-error-propagation-of-split-bios.patch +spi-spi-fsl-dspi-fix-crash-when-not-using-gpio-chip-.patch +iio-light-veml6030-fix-microlux-value-calculation.patch-18046 +riscv-vdso-prevent-the-compiler-from-inserting-calls.patch +input-edt-ft5x06-fix-regmap-leak-when-probe-fails.patch +alsa-hda-realtek-limit-internal-mic-boost-on-dell-pl.patch +riscv-efi-set-nx-compat-flag-in-pe-coff-header.patch +riscv-prevent-a-bad-reference-count-on-cpu-nodes.patch +riscv-use-u-to-format-the-output-of-cpu.patch +riscv-remove-unused-generating_asm_offsets.patch +riscv-remove-duplicated-get_rm.patch +scsi-ufs-core-fix-another-deadlock-during-rtc-update.patch +cxl-port-fix-cxl_bus_rescan-vs-bus_rescan_devices.patch +cxl-acpi-ensure-ports-ready-at-cxl_acpi_probe-return.patch +sched-numa-fix-the-potential-null-pointer-dereferenc.patch +posix-cpu-timers-clear-tick_dep_bit_posix_timer-on-c.patch +iov_iter-fix-copy_page_from_iter_atomic-if-kmap_loca.patch +tpm-return-tpm2_sessions_init-when-null-key-creation.patch +tpm-rollback-tpm2_load_null.patch +drm-amd-pm-vangogh-fix-kernel-memory-out-of-bounds-w.patch +drm-amdgpu-smu13-fix-profile-reporting.patch +tpm-lazily-flush-the-auth-session.patch +mptcp-init-protect-sched-with-rcu_read_lock.patch +mei-use-kvmalloc-for-read-buffer.patch +fork-do-not-invoke-uffd-on-fork-if-error-occurs.patch +fork-only-invoke-khugepaged-ksm-hooks-if-no-error.patch +mm-page_alloc-let-gfp_atomic-order-0-allocs-access-h.patch +x86-traps-enable-ubsan-traps-on-x86.patch +x86-traps-move-kmsan-check-after-instrumentation_beg.patch +ocfs2-pass-u64-to-ocfs2_truncate_inline-maybe-overfl.patch +resource-kexec-walk_system_ram_res_rev-must-retain-r.patch +mctp-i2c-handle-null-header-address.patch +btrfs-fix-use-after-free-of-block-device-file-in-__b.patch +accel-ivpu-fix-noc-firewall-interrupt-handling.patch +xfs-fix-finding-a-last-resort-ag-in-xfs_filestream_p.patch +alsa-hda-realtek-fix-headset-mic-on-tuxedo-gemini-17.patch +alsa-hda-realtek-fix-headset-mic-on-tuxedo-stellaris.patch +nvmet-auth-assign-dh_key-to-null-after-kfree_sensiti.patch +nvme-re-fix-error-handling-for-io_uring-nvme-passthr.patch +kasan-remove-vmalloc_percpu-test.patch +drm-tests-helpers-add-helper-for-drm_display_mode_fr.patch +drm-connector-hdmi-fix-memory-leak-in-drm_display_mo.patch +drm-tests-hdmi-fix-memory-leaks-in-drm_display_mode_.patch +drm-xe-fix-register-definition-order-in-xe_regs.h.patch +drm-xe-kill-regs-xe_sriov_regs.h.patch +drm-xe-add-mmio-read-before-ggtt-invalidate.patch +drm-xe-don-t-short-circuit-tdr-on-jobs-not-started.patch +io_uring-rw-fix-missing-nowait-check-for-o_direct-st.patch +btrfs-fix-extent-map-merging-not-happening-for-adjac.patch +btrfs-fix-defrag-not-merging-contiguous-extents-due-.patch +gpiolib-fix-debugfs-newline-separators.patch +gpiolib-fix-debugfs-dangling-chip-separator.patch +vmscan-migrate-fix-page-count-imbalance-on-node-stat.patch +mm-mmap-limit-thp-alignment-of-anonymous-mappings-to.patch +input-fix-regression-when-re-registering-input-handl.patch +mm-multi-gen-lru-ignore-non-leaf-pmd_young-for-force.patch +mm-multi-gen-lru-remove-mm_leaf_old-and-mm_nonleaf_t.patch +mm-shrink-skip-folio-mapped-by-an-exiting-process.patch +mm-multi-gen-lru-use-ptep-pmdp-_clear_young_notify.patch diff --git a/queue-6.11/spi-spi-fsl-dspi-fix-crash-when-not-using-gpio-chip-.patch b/queue-6.11/spi-spi-fsl-dspi-fix-crash-when-not-using-gpio-chip-.patch new file mode 100644 index 00000000000..2f611a00457 --- /dev/null +++ b/queue-6.11/spi-spi-fsl-dspi-fix-crash-when-not-using-gpio-chip-.patch @@ -0,0 +1,86 @@ +From dd51986062472b24ac98f9a7b4a8c313bb640b5d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 23 Oct 2024 16:30:32 -0400 +Subject: spi: spi-fsl-dspi: Fix crash when not using GPIO chip select + +From: Frank Li + +[ Upstream commit 25f00a13dccf8e45441265768de46c8bf58e08f6 ] + +Add check for the return value of spi_get_csgpiod() to avoid passing a NULL +pointer to gpiod_direction_output(), preventing a crash when GPIO chip +select is not used. + +Fix below crash: +[ 4.251960] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 +[ 4.260762] Mem abort info: +[ 4.263556] ESR = 0x0000000096000004 +[ 4.267308] EC = 0x25: DABT (current EL), IL = 32 bits +[ 4.272624] SET = 0, FnV = 0 +[ 4.275681] EA = 0, S1PTW = 0 +[ 4.278822] FSC = 0x04: level 0 translation fault +[ 4.283704] Data abort info: +[ 4.286583] ISV = 0, ISS = 0x00000004, ISS2 = 0x00000000 +[ 4.292074] CM = 0, WnR = 0, TnD = 0, TagAccess = 0 +[ 4.297130] GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 +[ 4.302445] [0000000000000000] user address but active_mm is swapper +[ 4.308805] Internal error: Oops: 0000000096000004 [#1] PREEMPT SMP +[ 4.315072] Modules linked in: +[ 4.318124] CPU: 2 UID: 0 PID: 1 Comm: swapper/0 Not tainted 6.12.0-rc4-next-20241023-00008-ga20ec42c5fc1 #359 +[ 4.328130] Hardware name: LS1046A QDS Board (DT) +[ 4.332832] pstate: 40000005 (nZcv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) +[ 4.339794] pc : gpiod_direction_output+0x34/0x5c +[ 4.344505] lr : gpiod_direction_output+0x18/0x5c +[ 4.349208] sp : ffff80008003b8f0 +[ 4.352517] x29: ffff80008003b8f0 x28: 0000000000000000 x27: ffffc96bcc7e9068 +[ 4.359659] x26: ffffc96bcc6e00b0 x25: ffffc96bcc598398 x24: ffff447400132810 +[ 4.366800] x23: 0000000000000000 x22: 0000000011e1a300 x21: 0000000000020002 +[ 4.373940] x20: 0000000000000000 x19: 0000000000000000 x18: ffffffffffffffff +[ 4.381081] x17: ffff44740016e600 x16: 0000000500000003 x15: 0000000000000007 +[ 4.388221] x14: 0000000000989680 x13: 0000000000020000 x12: 000000000000001e +[ 4.395362] x11: 0044b82fa09b5a53 x10: 0000000000000019 x9 : 0000000000000008 +[ 4.402502] x8 : 0000000000000002 x7 : 0000000000000007 x6 : 0000000000000000 +[ 4.409641] x5 : 0000000000000200 x4 : 0000000002000000 x3 : 0000000000000000 +[ 4.416781] x2 : 0000000000022202 x1 : 0000000000000000 x0 : 0000000000000000 +[ 4.423921] Call trace: +[ 4.426362] gpiod_direction_output+0x34/0x5c (P) +[ 4.431067] gpiod_direction_output+0x18/0x5c (L) +[ 4.435771] dspi_setup+0x220/0x334 + +Fixes: 9e264f3f85a5 ("spi: Replace all spi->chip_select and spi->cs_gpiod references with function call") +Cc: stable@vger.kernel.org +Signed-off-by: Frank Li +Link: https://patch.msgid.link/20241023203032.1388491-1-Frank.Li@nxp.com +Signed-off-by: Mark Brown +Signed-off-by: Sasha Levin +--- + drivers/spi/spi-fsl-dspi.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c +index 191de1917f831..3fa990fb59c78 100644 +--- a/drivers/spi/spi-fsl-dspi.c ++++ b/drivers/spi/spi-fsl-dspi.c +@@ -1003,6 +1003,7 @@ static int dspi_setup(struct spi_device *spi) + u32 cs_sck_delay = 0, sck_cs_delay = 0; + struct fsl_dspi_platform_data *pdata; + unsigned char pasc = 0, asc = 0; ++ struct gpio_desc *gpio_cs; + struct chip_data *chip; + unsigned long clkrate; + bool cs = true; +@@ -1077,7 +1078,10 @@ static int dspi_setup(struct spi_device *spi) + chip->ctar_val |= SPI_CTAR_LSBFE; + } + +- gpiod_direction_output(spi_get_csgpiod(spi, 0), false); ++ gpio_cs = spi_get_csgpiod(spi, 0); ++ if (gpio_cs) ++ gpiod_direction_output(gpio_cs, false); ++ + dspi_deassert_cs(spi, &cs); + + spi_set_ctldata(spi, chip); +-- +2.43.0 + diff --git a/queue-6.11/tpm-lazily-flush-the-auth-session.patch b/queue-6.11/tpm-lazily-flush-the-auth-session.patch new file mode 100644 index 00000000000..7f11134dd5b --- /dev/null +++ b/queue-6.11/tpm-lazily-flush-the-auth-session.patch @@ -0,0 +1,214 @@ +From 514d33f63cd15cd23c6cd3d505cf3e8223283770 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 28 Oct 2024 07:50:01 +0200 +Subject: tpm: Lazily flush the auth session + +From: Jarkko Sakkinen + +[ Upstream commit df745e25098dcb2f706399c0d06dd8d1bab6b6ec ] + +Move the allocation of chip->auth to tpm2_start_auth_session() so that this +field can be used as flag to tell whether auth session is active or not. + +Instead of flushing and reloading the auth session for every transaction +separately, keep the session open unless /dev/tpm0 is used. + +Reported-by: Pengyu Ma +Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219229 +Cc: stable@vger.kernel.org # v6.10+ +Fixes: 7ca110f2679b ("tpm: Address !chip->auth in tpm_buf_append_hmac_session*()") +Tested-by: Pengyu Ma +Tested-by: Stefan Berger +Reviewed-by: Stefan Berger +Signed-off-by: Jarkko Sakkinen +Signed-off-by: Sasha Levin +--- + drivers/char/tpm/tpm-chip.c | 10 +++++++ + drivers/char/tpm/tpm-dev-common.c | 3 +++ + drivers/char/tpm/tpm-interface.c | 6 +++-- + drivers/char/tpm/tpm2-sessions.c | 45 ++++++++++++++++++------------- + 4 files changed, 44 insertions(+), 20 deletions(-) + +diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c +index 854546000c92b..1ff99a7091bbb 100644 +--- a/drivers/char/tpm/tpm-chip.c ++++ b/drivers/char/tpm/tpm-chip.c +@@ -674,6 +674,16 @@ EXPORT_SYMBOL_GPL(tpm_chip_register); + */ + void tpm_chip_unregister(struct tpm_chip *chip) + { ++#ifdef CONFIG_TCG_TPM2_HMAC ++ int rc; ++ ++ rc = tpm_try_get_ops(chip); ++ if (!rc) { ++ tpm2_end_auth_session(chip); ++ tpm_put_ops(chip); ++ } ++#endif ++ + tpm_del_legacy_sysfs(chip); + if (tpm_is_hwrng_enabled(chip)) + hwrng_unregister(&chip->hwrng); +diff --git a/drivers/char/tpm/tpm-dev-common.c b/drivers/char/tpm/tpm-dev-common.c +index c3fbbf4d3db79..48ff87444f851 100644 +--- a/drivers/char/tpm/tpm-dev-common.c ++++ b/drivers/char/tpm/tpm-dev-common.c +@@ -27,6 +27,9 @@ static ssize_t tpm_dev_transmit(struct tpm_chip *chip, struct tpm_space *space, + struct tpm_header *header = (void *)buf; + ssize_t ret, len; + ++ if (chip->flags & TPM_CHIP_FLAG_TPM2) ++ tpm2_end_auth_session(chip); ++ + ret = tpm2_prepare_space(chip, space, buf, bufsiz); + /* If the command is not implemented by the TPM, synthesize a + * response with a TPM2_RC_COMMAND_CODE return for user-space. +diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c +index 5da134f12c9a4..8134f002b121f 100644 +--- a/drivers/char/tpm/tpm-interface.c ++++ b/drivers/char/tpm/tpm-interface.c +@@ -379,10 +379,12 @@ int tpm_pm_suspend(struct device *dev) + + rc = tpm_try_get_ops(chip); + if (!rc) { +- if (chip->flags & TPM_CHIP_FLAG_TPM2) ++ if (chip->flags & TPM_CHIP_FLAG_TPM2) { ++ tpm2_end_auth_session(chip); + tpm2_shutdown(chip, TPM2_SU_STATE); +- else ++ } else { + rc = tpm1_pm_suspend(chip, tpm_suspend_pcr); ++ } + + tpm_put_ops(chip); + } +diff --git a/drivers/char/tpm/tpm2-sessions.c b/drivers/char/tpm/tpm2-sessions.c +index a194535619929..c8fdfe901dfb7 100644 +--- a/drivers/char/tpm/tpm2-sessions.c ++++ b/drivers/char/tpm/tpm2-sessions.c +@@ -333,6 +333,9 @@ void tpm_buf_append_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf, + } + + #ifdef CONFIG_TCG_TPM2_HMAC ++ /* The first write to /dev/tpm{rm0} will flush the session. */ ++ attributes |= TPM2_SA_CONTINUE_SESSION; ++ + /* + * The Architecture Guide requires us to strip trailing zeros + * before computing the HMAC +@@ -484,7 +487,8 @@ static void tpm2_KDFe(u8 z[EC_PT_SZ], const char *str, u8 *pt_u, u8 *pt_v, + sha256_final(&sctx, out); + } + +-static void tpm_buf_append_salt(struct tpm_buf *buf, struct tpm_chip *chip) ++static void tpm_buf_append_salt(struct tpm_buf *buf, struct tpm_chip *chip, ++ struct tpm2_auth *auth) + { + struct crypto_kpp *kpp; + struct kpp_request *req; +@@ -543,7 +547,7 @@ static void tpm_buf_append_salt(struct tpm_buf *buf, struct tpm_chip *chip) + sg_set_buf(&s[0], chip->null_ec_key_x, EC_PT_SZ); + sg_set_buf(&s[1], chip->null_ec_key_y, EC_PT_SZ); + kpp_request_set_input(req, s, EC_PT_SZ*2); +- sg_init_one(d, chip->auth->salt, EC_PT_SZ); ++ sg_init_one(d, auth->salt, EC_PT_SZ); + kpp_request_set_output(req, d, EC_PT_SZ); + crypto_kpp_compute_shared_secret(req); + kpp_request_free(req); +@@ -554,8 +558,7 @@ static void tpm_buf_append_salt(struct tpm_buf *buf, struct tpm_chip *chip) + * This works because KDFe fully consumes the secret before it + * writes the salt + */ +- tpm2_KDFe(chip->auth->salt, "SECRET", x, chip->null_ec_key_x, +- chip->auth->salt); ++ tpm2_KDFe(auth->salt, "SECRET", x, chip->null_ec_key_x, auth->salt); + + out: + crypto_free_kpp(kpp); +@@ -853,7 +856,9 @@ int tpm_buf_check_hmac_response(struct tpm_chip *chip, struct tpm_buf *buf, + if (rc) + /* manually close the session if it wasn't consumed */ + tpm2_flush_context(chip, auth->handle); +- memzero_explicit(auth, sizeof(*auth)); ++ ++ kfree_sensitive(auth); ++ chip->auth = NULL; + } else { + /* reset for next use */ + auth->session = TPM_HEADER_SIZE; +@@ -881,7 +886,8 @@ void tpm2_end_auth_session(struct tpm_chip *chip) + return; + + tpm2_flush_context(chip, auth->handle); +- memzero_explicit(auth, sizeof(*auth)); ++ kfree_sensitive(auth); ++ chip->auth = NULL; + } + EXPORT_SYMBOL(tpm2_end_auth_session); + +@@ -962,16 +968,20 @@ static int tpm2_load_null(struct tpm_chip *chip, u32 *null_key) + */ + int tpm2_start_auth_session(struct tpm_chip *chip) + { ++ struct tpm2_auth *auth; + struct tpm_buf buf; +- struct tpm2_auth *auth = chip->auth; +- int rc; + u32 null_key; ++ int rc; + +- if (!auth) { +- dev_warn_once(&chip->dev, "auth session is not active\n"); ++ if (chip->auth) { ++ dev_warn_once(&chip->dev, "auth session is active\n"); + return 0; + } + ++ auth = kzalloc(sizeof(*auth), GFP_KERNEL); ++ if (!auth) ++ return -ENOMEM; ++ + rc = tpm2_load_null(chip, &null_key); + if (rc) + goto out; +@@ -992,7 +1002,7 @@ int tpm2_start_auth_session(struct tpm_chip *chip) + tpm_buf_append(&buf, auth->our_nonce, sizeof(auth->our_nonce)); + + /* append encrypted salt and squirrel away unencrypted in auth */ +- tpm_buf_append_salt(&buf, chip); ++ tpm_buf_append_salt(&buf, chip, auth); + /* session type (HMAC, audit or policy) */ + tpm_buf_append_u8(&buf, TPM2_SE_HMAC); + +@@ -1014,10 +1024,13 @@ int tpm2_start_auth_session(struct tpm_chip *chip) + + tpm_buf_destroy(&buf); + +- if (rc) +- goto out; ++ if (rc == TPM2_RC_SUCCESS) { ++ chip->auth = auth; ++ return 0; ++ } + +- out: ++out: ++ kfree_sensitive(auth); + return rc; + } + EXPORT_SYMBOL(tpm2_start_auth_session); +@@ -1367,10 +1380,6 @@ int tpm2_sessions_init(struct tpm_chip *chip) + return rc; + } + +- chip->auth = kmalloc(sizeof(*chip->auth), GFP_KERNEL); +- if (!chip->auth) +- return -ENOMEM; +- + return rc; + } + EXPORT_SYMBOL(tpm2_sessions_init); +-- +2.43.0 + diff --git a/queue-6.11/tpm-return-tpm2_sessions_init-when-null-key-creation.patch b/queue-6.11/tpm-return-tpm2_sessions_init-when-null-key-creation.patch new file mode 100644 index 00000000000..727c124642c --- /dev/null +++ b/queue-6.11/tpm-return-tpm2_sessions_init-when-null-key-creation.patch @@ -0,0 +1,52 @@ +From 96f0ec2253f3ed749a64b9681a5a7564cb0a35f5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 28 Oct 2024 07:49:59 +0200 +Subject: tpm: Return tpm2_sessions_init() when null key creation fails + +From: Jarkko Sakkinen + +[ Upstream commit d658d59471ed80c4a8aaf082ccc3e83cdf5ae4c1 ] + +Do not continue tpm2_sessions_init() further if the null key pair creation +fails. + +Cc: stable@vger.kernel.org # v6.10+ +Fixes: d2add27cf2b8 ("tpm: Add NULL primary creation") +Reviewed-by: Stefan Berger +Signed-off-by: Jarkko Sakkinen +Signed-off-by: Sasha Levin +--- + drivers/char/tpm/tpm2-sessions.c | 11 +++++++++-- + 1 file changed, 9 insertions(+), 2 deletions(-) + +diff --git a/drivers/char/tpm/tpm2-sessions.c b/drivers/char/tpm/tpm2-sessions.c +index 44f60730cff44..9551eeca6d691 100644 +--- a/drivers/char/tpm/tpm2-sessions.c ++++ b/drivers/char/tpm/tpm2-sessions.c +@@ -1347,14 +1347,21 @@ static int tpm2_create_null_primary(struct tpm_chip *chip) + * + * Derive and context save the null primary and allocate memory in the + * struct tpm_chip for the authorizations. ++ * ++ * Return: ++ * * 0 - OK ++ * * -errno - A system error ++ * * TPM_RC - A TPM error + */ + int tpm2_sessions_init(struct tpm_chip *chip) + { + int rc; + + rc = tpm2_create_null_primary(chip); +- if (rc) +- dev_err(&chip->dev, "TPM: security failed (NULL seed derivation): %d\n", rc); ++ if (rc) { ++ dev_err(&chip->dev, "null key creation failed with %d\n", rc); ++ return rc; ++ } + + chip->auth = kmalloc(sizeof(*chip->auth), GFP_KERNEL); + if (!chip->auth) +-- +2.43.0 + diff --git a/queue-6.11/tpm-rollback-tpm2_load_null.patch b/queue-6.11/tpm-rollback-tpm2_load_null.patch new file mode 100644 index 00000000000..52924b25eb3 --- /dev/null +++ b/queue-6.11/tpm-rollback-tpm2_load_null.patch @@ -0,0 +1,85 @@ +From 28f4491938d4191efbed1eb498343d7c602362e8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 28 Oct 2024 07:50:00 +0200 +Subject: tpm: Rollback tpm2_load_null() + +From: Jarkko Sakkinen + +[ Upstream commit cc7d8594342a25693d40fe96f97e5c6c29ee609c ] + +Do not continue on tpm2_create_primary() failure in tpm2_load_null(). + +Cc: stable@vger.kernel.org # v6.10+ +Fixes: eb24c9788cd9 ("tpm: disable the TPM if NULL name changes") +Reviewed-by: Stefan Berger +Signed-off-by: Jarkko Sakkinen +Signed-off-by: Sasha Levin +--- + drivers/char/tpm/tpm2-sessions.c | 44 +++++++++++++++++--------------- + 1 file changed, 24 insertions(+), 20 deletions(-) + +diff --git a/drivers/char/tpm/tpm2-sessions.c b/drivers/char/tpm/tpm2-sessions.c +index 9551eeca6d691..a194535619929 100644 +--- a/drivers/char/tpm/tpm2-sessions.c ++++ b/drivers/char/tpm/tpm2-sessions.c +@@ -915,33 +915,37 @@ static int tpm2_parse_start_auth_session(struct tpm2_auth *auth, + + static int tpm2_load_null(struct tpm_chip *chip, u32 *null_key) + { +- int rc; + unsigned int offset = 0; /* dummy offset for null seed context */ + u8 name[SHA256_DIGEST_SIZE + 2]; ++ u32 tmp_null_key; ++ int rc; + + rc = tpm2_load_context(chip, chip->null_key_context, &offset, +- null_key); +- if (rc != -EINVAL) +- return rc; ++ &tmp_null_key); ++ if (rc != -EINVAL) { ++ if (!rc) ++ *null_key = tmp_null_key; ++ goto err; ++ } + +- /* an integrity failure may mean the TPM has been reset */ +- dev_err(&chip->dev, "NULL key integrity failure!\n"); +- /* check the null name against what we know */ +- tpm2_create_primary(chip, TPM2_RH_NULL, NULL, name); +- if (memcmp(name, chip->null_key_name, sizeof(name)) == 0) +- /* name unchanged, assume transient integrity failure */ +- return rc; +- /* +- * Fatal TPM failure: the NULL seed has actually changed, so +- * the TPM must have been illegally reset. All in-kernel TPM +- * operations will fail because the NULL primary can't be +- * loaded to salt the sessions, but disable the TPM anyway so +- * userspace programmes can't be compromised by it. +- */ +- dev_err(&chip->dev, "NULL name has changed, disabling TPM due to interference\n"); ++ /* Try to re-create null key, given the integrity failure: */ ++ rc = tpm2_create_primary(chip, TPM2_RH_NULL, &tmp_null_key, name); ++ if (rc) ++ goto err; ++ ++ /* Return null key if the name has not been changed: */ ++ if (!memcmp(name, chip->null_key_name, sizeof(name))) { ++ *null_key = tmp_null_key; ++ return 0; ++ } ++ ++ /* Deduce from the name change TPM interference: */ ++ dev_err(&chip->dev, "null key integrity check failed\n"); ++ tpm2_flush_context(chip, tmp_null_key); + chip->flags |= TPM_CHIP_FLAG_DISABLE; + +- return rc; ++err: ++ return rc ? -ENODEV : 0; + } + + /** +-- +2.43.0 + diff --git a/queue-6.11/vmscan-migrate-fix-page-count-imbalance-on-node-stat.patch b/queue-6.11/vmscan-migrate-fix-page-count-imbalance-on-node-stat.patch new file mode 100644 index 00000000000..a7effaddef8 --- /dev/null +++ b/queue-6.11/vmscan-migrate-fix-page-count-imbalance-on-node-stat.patch @@ -0,0 +1,75 @@ +From f529757c93ed2d2ae478c3405bfd882a15c72cec Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 25 Oct 2024 10:17:24 -0400 +Subject: vmscan,migrate: fix page count imbalance on node stats when demoting + pages + +From: Gregory Price + +[ Upstream commit 35e41024c4c2b02ef8207f61b9004f6956cf037b ] + +When numa balancing is enabled with demotion, vmscan will call +migrate_pages when shrinking LRUs. migrate_pages will decrement the +the node's isolated page count, leading to an imbalanced count when +invoked from (MG)LRU code. + +The result is dmesg output like such: + +$ cat /proc/sys/vm/stat_refresh + +[77383.088417] vmstat_refresh: nr_isolated_anon -103212 +[77383.088417] vmstat_refresh: nr_isolated_file -899642 + +This negative value may impact compaction and reclaim throttling. + +The following path produces the decrement: + +shrink_folio_list + demote_folio_list + migrate_pages + migrate_pages_batch + migrate_folio_move + migrate_folio_done + mod_node_page_state(-ve) <- decrement + +This path happens for SUCCESSFUL migrations, not failures. Typically +callers to migrate_pages are required to handle putback/accounting for +failures, but this is already handled in the shrink code. + +When accounting for migrations, instead do not decrement the count when +the migration reason is MR_DEMOTION. As of v6.11, this demotion logic +is the only source of MR_DEMOTION. + +Link: https://lkml.kernel.org/r/20241025141724.17927-1-gourry@gourry.net +Fixes: 26aa2d199d6f ("mm/migrate: demote pages during reclaim") +Signed-off-by: Gregory Price +Reviewed-by: Yang Shi +Reviewed-by: Davidlohr Bueso +Reviewed-by: Shakeel Butt +Reviewed-by: "Huang, Ying" +Reviewed-by: Oscar Salvador +Cc: Dave Hansen +Cc: Wei Xu +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Sasha Levin +--- + mm/migrate.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/mm/migrate.c b/mm/migrate.c +index 368ab3878fa6e..75b858bd6aa58 100644 +--- a/mm/migrate.c ++++ b/mm/migrate.c +@@ -1099,7 +1099,7 @@ static void migrate_folio_done(struct folio *src, + * not accounted to NR_ISOLATED_*. They can be recognized + * as __folio_test_movable + */ +- if (likely(!__folio_test_movable(src))) ++ if (likely(!__folio_test_movable(src)) && reason != MR_DEMOTION) + mod_node_page_state(folio_pgdat(src), NR_ISOLATED_ANON + + folio_is_file_lru(src), -folio_nr_pages(src)); + +-- +2.43.0 + diff --git a/queue-6.11/x86-traps-enable-ubsan-traps-on-x86.patch b/queue-6.11/x86-traps-enable-ubsan-traps-on-x86.patch new file mode 100644 index 00000000000..d22fa7956de --- /dev/null +++ b/queue-6.11/x86-traps-enable-ubsan-traps-on-x86.patch @@ -0,0 +1,195 @@ +From 7ac38556c9a20e6ee1d3397f728262ec03fe2368 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 24 Jul 2024 00:01:55 +0000 +Subject: x86/traps: Enable UBSAN traps on x86 + +From: Gatlin Newhouse + +[ Upstream commit 7424fc6b86c8980a87169e005f5cd4438d18efe6 ] + +Currently ARM64 extracts which specific sanitizer has caused a trap via +encoded data in the trap instruction. Clang on x86 currently encodes the +same data in the UD1 instruction but x86 handle_bug() and +is_valid_bugaddr() currently only look at UD2. + +Bring x86 to parity with ARM64, similar to commit 25b84002afb9 ("arm64: +Support Clang UBSAN trap codes for better reporting"). See the llvm +links for information about the code generation. + +Enable the reporting of UBSAN sanitizer details on x86 compiled with clang +when CONFIG_UBSAN_TRAP=y by analysing UD1 and retrieving the type immediate +which is encoded by the compiler after the UD1. + +[ tglx: Simplified it by moving the printk() into handle_bug() ] + +Signed-off-by: Gatlin Newhouse +Signed-off-by: Thomas Gleixner +Acked-by: Peter Zijlstra (Intel) +Cc: Kees Cook +Link: https://lore.kernel.org/all/20240724000206.451425-1-gatlin.newhouse@gmail.com +Link: https://github.com/llvm/llvm-project/commit/c5978f42ec8e9#diff-bb68d7cd885f41cfc35843998b0f9f534adb60b415f647109e597ce448e92d9f +Link: https://github.com/llvm/llvm-project/blob/main/llvm/lib/Target/X86/X86InstrSystem.td#L27 +Stable-dep-of: 1db272864ff2 ("x86/traps: move kmsan check after instrumentation_begin") +Signed-off-by: Sasha Levin +--- + arch/x86/include/asm/bug.h | 12 ++++++++ + arch/x86/kernel/traps.c | 59 ++++++++++++++++++++++++++++++++++---- + include/linux/ubsan.h | 5 ++++ + lib/Kconfig.ubsan | 4 +-- + 4 files changed, 73 insertions(+), 7 deletions(-) + +diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h +index a3ec87d198ac8..806649c7f23dc 100644 +--- a/arch/x86/include/asm/bug.h ++++ b/arch/x86/include/asm/bug.h +@@ -13,6 +13,18 @@ + #define INSN_UD2 0x0b0f + #define LEN_UD2 2 + ++/* ++ * In clang we have UD1s reporting UBSAN failures on X86, 64 and 32bit. ++ */ ++#define INSN_ASOP 0x67 ++#define OPCODE_ESCAPE 0x0f ++#define SECOND_BYTE_OPCODE_UD1 0xb9 ++#define SECOND_BYTE_OPCODE_UD2 0x0b ++ ++#define BUG_NONE 0xffff ++#define BUG_UD1 0xfffe ++#define BUG_UD2 0xfffd ++ + #ifdef CONFIG_GENERIC_BUG + + #ifdef CONFIG_X86_32 +diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c +index 4fa0b17e5043a..415881607c5df 100644 +--- a/arch/x86/kernel/traps.c ++++ b/arch/x86/kernel/traps.c +@@ -42,6 +42,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -91,6 +92,47 @@ __always_inline int is_valid_bugaddr(unsigned long addr) + return *(unsigned short *)addr == INSN_UD2; + } + ++/* ++ * Check for UD1 or UD2, accounting for Address Size Override Prefixes. ++ * If it's a UD1, get the ModRM byte to pass along to UBSan. ++ */ ++__always_inline int decode_bug(unsigned long addr, u32 *imm) ++{ ++ u8 v; ++ ++ if (addr < TASK_SIZE_MAX) ++ return BUG_NONE; ++ ++ v = *(u8 *)(addr++); ++ if (v == INSN_ASOP) ++ v = *(u8 *)(addr++); ++ if (v != OPCODE_ESCAPE) ++ return BUG_NONE; ++ ++ v = *(u8 *)(addr++); ++ if (v == SECOND_BYTE_OPCODE_UD2) ++ return BUG_UD2; ++ ++ if (!IS_ENABLED(CONFIG_UBSAN_TRAP) || v != SECOND_BYTE_OPCODE_UD1) ++ return BUG_NONE; ++ ++ /* Retrieve the immediate (type value) for the UBSAN UD1 */ ++ v = *(u8 *)(addr++); ++ if (X86_MODRM_RM(v) == 4) ++ addr++; ++ ++ *imm = 0; ++ if (X86_MODRM_MOD(v) == 1) ++ *imm = *(u8 *)addr; ++ else if (X86_MODRM_MOD(v) == 2) ++ *imm = *(u32 *)addr; ++ else ++ WARN_ONCE(1, "Unexpected MODRM_MOD: %u\n", X86_MODRM_MOD(v)); ++ ++ return BUG_UD1; ++} ++ ++ + static nokprobe_inline int + do_trap_no_signal(struct task_struct *tsk, int trapnr, const char *str, + struct pt_regs *regs, long error_code) +@@ -216,6 +258,8 @@ static inline void handle_invalid_op(struct pt_regs *regs) + static noinstr bool handle_bug(struct pt_regs *regs) + { + bool handled = false; ++ int ud_type; ++ u32 imm; + + /* + * Normally @regs are unpoisoned by irqentry_enter(), but handle_bug() +@@ -223,7 +267,8 @@ static noinstr bool handle_bug(struct pt_regs *regs) + * irqentry_enter(). + */ + kmsan_unpoison_entry_regs(regs); +- if (!is_valid_bugaddr(regs->ip)) ++ ud_type = decode_bug(regs->ip, &imm); ++ if (ud_type == BUG_NONE) + return handled; + + /* +@@ -236,10 +281,14 @@ static noinstr bool handle_bug(struct pt_regs *regs) + */ + if (regs->flags & X86_EFLAGS_IF) + raw_local_irq_enable(); +- if (report_bug(regs->ip, regs) == BUG_TRAP_TYPE_WARN || +- handle_cfi_failure(regs) == BUG_TRAP_TYPE_WARN) { +- regs->ip += LEN_UD2; +- handled = true; ++ if (ud_type == BUG_UD2) { ++ if (report_bug(regs->ip, regs) == BUG_TRAP_TYPE_WARN || ++ handle_cfi_failure(regs) == BUG_TRAP_TYPE_WARN) { ++ regs->ip += LEN_UD2; ++ handled = true; ++ } ++ } else if (IS_ENABLED(CONFIG_UBSAN_TRAP)) { ++ pr_crit("%s at %pS\n", report_ubsan_failure(regs, imm), (void *)regs->ip); + } + if (regs->flags & X86_EFLAGS_IF) + raw_local_irq_disable(); +diff --git a/include/linux/ubsan.h b/include/linux/ubsan.h +index bff7445498ded..d8219cbe09ff8 100644 +--- a/include/linux/ubsan.h ++++ b/include/linux/ubsan.h +@@ -4,6 +4,11 @@ + + #ifdef CONFIG_UBSAN_TRAP + const char *report_ubsan_failure(struct pt_regs *regs, u32 check_type); ++#else ++static inline const char *report_ubsan_failure(struct pt_regs *regs, u32 check_type) ++{ ++ return NULL; ++} + #endif + + #endif +diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan +index bdda600f8dfbe..1d4aa7a83b3a5 100644 +--- a/lib/Kconfig.ubsan ++++ b/lib/Kconfig.ubsan +@@ -29,8 +29,8 @@ config UBSAN_TRAP + + Also note that selecting Y will cause your kernel to Oops + with an "illegal instruction" error with no further details +- when a UBSAN violation occurs. (Except on arm64, which will +- report which Sanitizer failed.) This may make it hard to ++ when a UBSAN violation occurs. (Except on arm64 and x86, which ++ will report which Sanitizer failed.) This may make it hard to + determine whether an Oops was caused by UBSAN or to figure + out the details of a UBSAN violation. It makes the kernel log + output less useful for bug reports. +-- +2.43.0 + diff --git a/queue-6.11/x86-traps-move-kmsan-check-after-instrumentation_beg.patch b/queue-6.11/x86-traps-move-kmsan-check-after-instrumentation_beg.patch new file mode 100644 index 00000000000..0dc9fed3120 --- /dev/null +++ b/queue-6.11/x86-traps-move-kmsan-check-after-instrumentation_beg.patch @@ -0,0 +1,78 @@ +From 2831a9462bb651fdd49c33fc8be9e64c2dc7212a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 16 Oct 2024 20:24:07 +0500 +Subject: x86/traps: move kmsan check after instrumentation_begin + +From: Sabyrzhan Tasbolatov + +[ Upstream commit 1db272864ff250b5e607283eaec819e1186c8e26 ] + +During x86_64 kernel build with CONFIG_KMSAN, the objtool warns following: + + AR built-in.a + AR vmlinux.a + LD vmlinux.o +vmlinux.o: warning: objtool: handle_bug+0x4: call to + kmsan_unpoison_entry_regs() leaves .noinstr.text section + OBJCOPY modules.builtin.modinfo + GEN modules.builtin + MODPOST Module.symvers + CC .vmlinux.export.o + +Moving kmsan_unpoison_entry_regs() _after_ instrumentation_begin() fixes +the warning. + +There is decode_bug(regs->ip, &imm) is left before KMSAN unpoisoining, but +it has the return condition and if we include it after +instrumentation_begin() it results the warning "return with +instrumentation enabled", hence, I'm concerned that regs will not be KMSAN +unpoisoned if `ud_type == BUG_NONE` is true. + +Link: https://lkml.kernel.org/r/20241016152407.3149001-1-snovitoll@gmail.com +Fixes: ba54d194f8da ("x86/traps: avoid KMSAN bugs originating from handle_bug()") +Signed-off-by: Sabyrzhan Tasbolatov +Reviewed-by: Alexander Potapenko +Cc: Borislav Petkov (AMD) +Cc: Dave Hansen +Cc: Ingo Molnar +Cc: Thomas Gleixner +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Sasha Levin +--- + arch/x86/kernel/traps.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c +index 415881607c5df..29ec49209ae01 100644 +--- a/arch/x86/kernel/traps.c ++++ b/arch/x86/kernel/traps.c +@@ -261,12 +261,6 @@ static noinstr bool handle_bug(struct pt_regs *regs) + int ud_type; + u32 imm; + +- /* +- * Normally @regs are unpoisoned by irqentry_enter(), but handle_bug() +- * is a rare case that uses @regs without passing them to +- * irqentry_enter(). +- */ +- kmsan_unpoison_entry_regs(regs); + ud_type = decode_bug(regs->ip, &imm); + if (ud_type == BUG_NONE) + return handled; +@@ -275,6 +269,12 @@ static noinstr bool handle_bug(struct pt_regs *regs) + * All lies, just get the WARN/BUG out. + */ + instrumentation_begin(); ++ /* ++ * Normally @regs are unpoisoned by irqentry_enter(), but handle_bug() ++ * is a rare case that uses @regs without passing them to ++ * irqentry_enter(). ++ */ ++ kmsan_unpoison_entry_regs(regs); + /* + * Since we're emulating a CALL with exceptions, restore the interrupt + * state to what it was at the exception site. +-- +2.43.0 + diff --git a/queue-6.11/xfs-fix-finding-a-last-resort-ag-in-xfs_filestream_p.patch b/queue-6.11/xfs-fix-finding-a-last-resort-ag-in-xfs_filestream_p.patch new file mode 100644 index 00000000000..c26048c8d60 --- /dev/null +++ b/queue-6.11/xfs-fix-finding-a-last-resort-ag-in-xfs_filestream_p.patch @@ -0,0 +1,121 @@ +From 2d7f5f36f1ba0597997cee4a9832384ed862b05e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 23 Oct 2024 15:37:22 +0200 +Subject: xfs: fix finding a last resort AG in xfs_filestream_pick_ag + +From: Christoph Hellwig + +[ Upstream commit dc60992ce76fbc2f71c2674f435ff6bde2108028 ] + +When the main loop in xfs_filestream_pick_ag fails to find a suitable +AG it tries to just pick the online AG. But the loop for that uses +args->pag as loop iterator while the later code expects pag to be +set. Fix this by reusing the max_pag case for this last resort, and +also add a check for impossible case of no AG just to make sure that +the uninitialized pag doesn't even escape in theory. + +Reported-by: syzbot+4125a3c514e3436a02e6@syzkaller.appspotmail.com +Signed-off-by: Christoph Hellwig +Tested-by: syzbot+4125a3c514e3436a02e6@syzkaller.appspotmail.com +Fixes: f8f1ed1ab3baba ("xfs: return a referenced perag from filestreams allocator") +Cc: # v6.3 +Reviewed-by: Darrick J. Wong +Signed-off-by: Carlos Maiolino +Signed-off-by: Sasha Levin +--- + fs/xfs/xfs_filestream.c | 23 ++++++++++++----------- + fs/xfs/xfs_trace.h | 15 +++++---------- + 2 files changed, 17 insertions(+), 21 deletions(-) + +diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c +index e3aaa05555978..88bd23ce74cde 100644 +--- a/fs/xfs/xfs_filestream.c ++++ b/fs/xfs/xfs_filestream.c +@@ -64,7 +64,7 @@ xfs_filestream_pick_ag( + struct xfs_perag *pag; + struct xfs_perag *max_pag = NULL; + xfs_extlen_t minlen = *longest; +- xfs_extlen_t free = 0, minfree, maxfree = 0; ++ xfs_extlen_t minfree, maxfree = 0; + xfs_agnumber_t agno; + bool first_pass = true; + int err; +@@ -107,7 +107,6 @@ xfs_filestream_pick_ag( + !(flags & XFS_PICK_USERDATA) || + (flags & XFS_PICK_LOWSPACE))) { + /* Break out, retaining the reference on the AG. */ +- free = pag->pagf_freeblks; + break; + } + } +@@ -150,23 +149,25 @@ xfs_filestream_pick_ag( + * grab. + */ + if (!max_pag) { +- for_each_perag_wrap(args->mp, 0, start_agno, args->pag) ++ for_each_perag_wrap(args->mp, 0, start_agno, pag) { ++ max_pag = pag; + break; +- atomic_inc(&args->pag->pagf_fstrms); +- *longest = 0; +- } else { +- pag = max_pag; +- free = maxfree; +- atomic_inc(&pag->pagf_fstrms); ++ } ++ ++ /* Bail if there are no AGs at all to select from. */ ++ if (!max_pag) ++ return -ENOSPC; + } ++ ++ pag = max_pag; ++ atomic_inc(&pag->pagf_fstrms); + } else if (max_pag) { + xfs_perag_rele(max_pag); + } + +- trace_xfs_filestream_pick(pag, pino, free); ++ trace_xfs_filestream_pick(pag, pino); + args->pag = pag; + return 0; +- + } + + static struct xfs_inode * +diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h +index 180ce697305a9..f681a195a7441 100644 +--- a/fs/xfs/xfs_trace.h ++++ b/fs/xfs/xfs_trace.h +@@ -691,8 +691,8 @@ DEFINE_FILESTREAM_EVENT(xfs_filestream_lookup); + DEFINE_FILESTREAM_EVENT(xfs_filestream_scan); + + TRACE_EVENT(xfs_filestream_pick, +- TP_PROTO(struct xfs_perag *pag, xfs_ino_t ino, xfs_extlen_t free), +- TP_ARGS(pag, ino, free), ++ TP_PROTO(struct xfs_perag *pag, xfs_ino_t ino), ++ TP_ARGS(pag, ino), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) +@@ -703,14 +703,9 @@ TRACE_EVENT(xfs_filestream_pick, + TP_fast_assign( + __entry->dev = pag->pag_mount->m_super->s_dev; + __entry->ino = ino; +- if (pag) { +- __entry->agno = pag->pag_agno; +- __entry->streams = atomic_read(&pag->pagf_fstrms); +- } else { +- __entry->agno = NULLAGNUMBER; +- __entry->streams = 0; +- } +- __entry->free = free; ++ __entry->agno = pag->pag_agno; ++ __entry->streams = atomic_read(&pag->pagf_fstrms); ++ __entry->free = pag->pagf_freeblks; + ), + TP_printk("dev %d:%d ino 0x%llx agno 0x%x streams %d free %d", + MAJOR(__entry->dev), MINOR(__entry->dev), +-- +2.43.0 + -- 2.47.3