From: Greg Kroah-Hartman Date: Mon, 30 Jun 2025 10:06:51 +0000 (+0200) Subject: 6.15-stable patches X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=3a4af1405a21206372204cc75c1c59ff5c159d0e;p=thirdparty%2Fkernel%2Fstable-queue.git 6.15-stable patches added patches: bcache-remove-unnecessary-select-min_heap.patch bluetooth-l2cap-fix-l2cap-mtu-negotiation.patch btrfs-fix-a-race-between-renames-and-directory-logging.patch btrfs-fix-invalid-inode-pointer-dereferences-during-log-replay.patch btrfs-update-superblock-s-device-bytes_used-when-dropping-chunk.patch dm-raid-fix-variable-in-journal-device-check.patch drm-amdkfd-fix-race-in-gws-queue-scheduling.patch drm-ast-fix-comment-on-modeset-lock.patch drm-cirrus-qemu-fix-pitch-programming.patch drm-etnaviv-protect-the-scheduler-s-pending-list-with-its-lock.patch drm-msm-gpu-fix-crash-when-throttling-gpu-immediately-during-boot.patch drm-panel-simple-tianma-tm070jdhg34-00-add-delays.patch drm-simpledrm-do-not-upcast-in-release-helpers.patch drm-tegra-assign-plane-type-before-registration.patch drm-tegra-fix-a-possible-null-pointer-dereference.patch drm-udl-unregister-device-before-cleaning-up-on-disconnect.patch dt-bindings-serial-8250-make-clocks-and-clock-frequency-exclusive.patch edac-amd64-fix-size-calculation-for-non-power-of-two-dimms.patch f2fs-fix-to-zero-post-eof-page.patch hid-appletb-kbd-fix-appletb_backlight-backlight-device-reference-counting.patch hid-lenovo-restrict-f7-9-11-mode-to-compact-keyboards-only.patch hid-wacom-fix-kobject-reference-count-leak.patch hid-wacom-fix-memory-leak-on-kobject-creation-failure.patch hid-wacom-fix-memory-leak-on-sysfs-attribute-creation-failure.patch maple_tree-fix-ma_state_prealloc-flag-in-mas_preallocate.patch mm-gup-revert-mm-gup-fix-infinite-loop-within-__get_longterm_locked.patch mm-shmem-swap-fix-softlockup-with-mthp-swapin.patch mm-userfaultfd-fix-race-of-userfaultfd_move-and-swap-cache.patch net-libwx-fix-the-creation-of-page_pool.patch revert-bcache-remove-heap-related-macros-and-switch-to-generic-min_heap.patch revert-bcache-update-min_heap_callbacks-to-use-default-builtin-swap.patch scsi-fnic-fix-crash-in-fnic_wq_cmpl_handler-when-fdmi-times-out.patch scsi-fnic-turn-off-fdmi-active-flags-on-link-down.patch scsi-megaraid_sas-fix-invalid-node-index.patch scsi-ufs-core-fix-clk-scaling-to-be-conditional-in-reset-and-restore.patch selinux-change-security_compute_sid-to-return-the-ssid-or-tsid-on-match.patch serial-core-restore-of_node-information-in-sysfs.patch serial-imx-restore-original-rxtl-for-console-to-fix-data-loss.patch spi-spi-cadence-quadspi-fix-pm-runtime-unbalance.patch staging-rtl8723bs-avoid-memset-in-aes_cipher-and-aes_decipher.patch x86-traps-initialize-dr6-by-writing-its-architectural-reset-value.patch --- diff --git a/queue-6.15/bcache-remove-unnecessary-select-min_heap.patch b/queue-6.15/bcache-remove-unnecessary-select-min_heap.patch new file mode 100644 index 0000000000..c0466b008b --- /dev/null +++ b/queue-6.15/bcache-remove-unnecessary-select-min_heap.patch @@ -0,0 +1,53 @@ +From 95b2e31e1752494d477c5da89d6789f769b0d67b Mon Sep 17 00:00:00 2001 +From: Kuan-Wei Chiu +Date: Sun, 15 Jun 2025 04:23:53 +0800 +Subject: bcache: remove unnecessary select MIN_HEAP + +From: Kuan-Wei Chiu + +commit 95b2e31e1752494d477c5da89d6789f769b0d67b upstream. + +After reverting the transition to the generic min heap library, bcache no +longer depends on MIN_HEAP. The select entry can be removed to reduce +code size and shrink the kernel's attack surface. + +This change effectively reverts the bcache-related part of commit +92a8b224b833 ("lib/min_heap: introduce non-inline versions of min heap API +functions"). + +This is part of a series of changes to address a performance regression +caused by the use of the generic min_heap implementation. + +As reported by Robert, bcache now suffers from latency spikes, with P100 +(max) latency increasing from 600 ms to 2.4 seconds every 5 minutes. +These regressions degrade bcache's effectiveness as a low-latency cache +layer and lead to frequent timeouts and application stalls in production +environments. + +Link: https://lore.kernel.org/lkml/CAJhEC05+0S69z+3+FB2Cd0hD+pCRyWTKLEOsc8BOmH73p1m+KQ@mail.gmail.com +Link: https://lkml.kernel.org/r/20250614202353.1632957-4-visitorckw@gmail.com +Fixes: 866898efbb25 ("bcache: remove heap-related macros and switch to generic min_heap") +Fixes: 92a8b224b833 ("lib/min_heap: introduce non-inline versions of min heap API functions") +Signed-off-by: Kuan-Wei Chiu +Reported-by: Robert Pang +Closes: https://lore.kernel.org/linux-bcache/CAJhEC06F_AtrPgw2-7CvCqZgeStgCtitbD-ryuPpXQA-JG5XXw@mail.gmail.com +Acked-by: Coly Li +Cc: Ching-Chun (Jim) Huang +Cc: Kent Overstreet +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + drivers/md/bcache/Kconfig | 1 - + 1 file changed, 1 deletion(-) + +--- a/drivers/md/bcache/Kconfig ++++ b/drivers/md/bcache/Kconfig +@@ -5,7 +5,6 @@ config BCACHE + select BLOCK_HOLDER_DEPRECATED if SYSFS + select CRC64 + select CLOSURES +- select MIN_HEAP + help + Allows a block device to be used as cache for other devices; uses + a btree for indexing and the layout is optimized for SSDs. diff --git a/queue-6.15/bluetooth-l2cap-fix-l2cap-mtu-negotiation.patch b/queue-6.15/bluetooth-l2cap-fix-l2cap-mtu-negotiation.patch new file mode 100644 index 0000000000..0cfe298079 --- /dev/null +++ b/queue-6.15/bluetooth-l2cap-fix-l2cap-mtu-negotiation.patch @@ -0,0 +1,226 @@ +From 042bb9603c44620dce98717a2d23235ca57a00d7 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Danis?= +Date: Thu, 12 Jun 2025 09:50:34 +0200 +Subject: Bluetooth: L2CAP: Fix L2CAP MTU negotiation +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Frédéric Danis + +commit 042bb9603c44620dce98717a2d23235ca57a00d7 upstream. + +OBEX download from iPhone is currently slow due to small packet size +used to transfer data which doesn't follow the MTU negotiated during +L2CAP connection, i.e. 672 bytes instead of 32767: + + < ACL Data TX: Handle 11 flags 0x00 dlen 12 + L2CAP: Connection Request (0x02) ident 18 len 4 + PSM: 4103 (0x1007) + Source CID: 72 + > ACL Data RX: Handle 11 flags 0x02 dlen 16 + L2CAP: Connection Response (0x03) ident 18 len 8 + Destination CID: 14608 + Source CID: 72 + Result: Connection successful (0x0000) + Status: No further information available (0x0000) + < ACL Data TX: Handle 11 flags 0x00 dlen 27 + L2CAP: Configure Request (0x04) ident 20 len 19 + Destination CID: 14608 + Flags: 0x0000 + Option: Maximum Transmission Unit (0x01) [mandatory] + MTU: 32767 + Option: Retransmission and Flow Control (0x04) [mandatory] + Mode: Enhanced Retransmission (0x03) + TX window size: 63 + Max transmit: 3 + Retransmission timeout: 2000 + Monitor timeout: 12000 + Maximum PDU size: 1009 + > ACL Data RX: Handle 11 flags 0x02 dlen 26 + L2CAP: Configure Request (0x04) ident 72 len 18 + Destination CID: 72 + Flags: 0x0000 + Option: Retransmission and Flow Control (0x04) [mandatory] + Mode: Enhanced Retransmission (0x03) + TX window size: 32 + Max transmit: 255 + Retransmission timeout: 0 + Monitor timeout: 0 + Maximum PDU size: 65527 + Option: Frame Check Sequence (0x05) [mandatory] + FCS: 16-bit FCS (0x01) + < ACL Data TX: Handle 11 flags 0x00 dlen 29 + L2CAP: Configure Response (0x05) ident 72 len 21 + Source CID: 14608 + Flags: 0x0000 + Result: Success (0x0000) + Option: Maximum Transmission Unit (0x01) [mandatory] + MTU: 672 + Option: Retransmission and Flow Control (0x04) [mandatory] + Mode: Enhanced Retransmission (0x03) + TX window size: 32 + Max transmit: 255 + Retransmission timeout: 2000 + Monitor timeout: 12000 + Maximum PDU size: 1009 + > ACL Data RX: Handle 11 flags 0x02 dlen 32 + L2CAP: Configure Response (0x05) ident 20 len 24 + Source CID: 72 + Flags: 0x0000 + Result: Success (0x0000) + Option: Maximum Transmission Unit (0x01) [mandatory] + MTU: 32767 + Option: Retransmission and Flow Control (0x04) [mandatory] + Mode: Enhanced Retransmission (0x03) + TX window size: 63 + Max transmit: 3 + Retransmission timeout: 2000 + Monitor timeout: 12000 + Maximum PDU size: 1009 + Option: Frame Check Sequence (0x05) [mandatory] + FCS: 16-bit FCS (0x01) + ... + > ACL Data RX: Handle 11 flags 0x02 dlen 680 + Channel: 72 len 676 ctrl 0x0202 [PSM 4103 mode Enhanced Retransmission (0x03)] {chan 8} + I-frame: Unsegmented TxSeq 1 ReqSeq 2 + < ACL Data TX: Handle 11 flags 0x00 dlen 13 + Channel: 14608 len 9 ctrl 0x0204 [PSM 4103 mode Enhanced Retransmission (0x03)] {chan 8} + I-frame: Unsegmented TxSeq 2 ReqSeq 2 + > ACL Data RX: Handle 11 flags 0x02 dlen 680 + Channel: 72 len 676 ctrl 0x0304 [PSM 4103 mode Enhanced Retransmission (0x03)] {chan 8} + I-frame: Unsegmented TxSeq 2 ReqSeq 3 + +The MTUs are negotiated for each direction. In this traces 32767 for +iPhone->localhost and no MTU for localhost->iPhone, which based on +'4.4 L2CAP_CONFIGURATION_REQ' (Core specification v5.4, Vol. 3, Part +A): + + The only parameters that should be included in the + L2CAP_CONFIGURATION_REQ packet are those that require different + values than the default or previously agreed values. + ... + Any missing configuration parameters are assumed to have their + most recently explicitly or implicitly accepted values. + +and '5.1 Maximum transmission unit (MTU)': + + If the remote device sends a positive L2CAP_CONFIGURATION_RSP + packet it should include the actual MTU to be used on this channel + for traffic flowing into the local device. + ... + The default value is 672 octets. + +is set by BlueZ to 672 bytes. + +It seems that the iPhone used the lowest negotiated value to transfer +data to the localhost instead of the negotiated one for the incoming +direction. + +This could be fixed by using the MTU negotiated for the other +direction, if exists, in the L2CAP_CONFIGURATION_RSP. +This allows to use segmented packets as in the following traces: + + < ACL Data TX: Handle 11 flags 0x00 dlen 12 + L2CAP: Connection Request (0x02) ident 22 len 4 + PSM: 4103 (0x1007) + Source CID: 72 + < ACL Data TX: Handle 11 flags 0x00 dlen 27 + L2CAP: Configure Request (0x04) ident 24 len 19 + Destination CID: 2832 + Flags: 0x0000 + Option: Maximum Transmission Unit (0x01) [mandatory] + MTU: 32767 + Option: Retransmission and Flow Control (0x04) [mandatory] + Mode: Enhanced Retransmission (0x03) + TX window size: 63 + Max transmit: 3 + Retransmission timeout: 2000 + Monitor timeout: 12000 + Maximum PDU size: 1009 + > ACL Data RX: Handle 11 flags 0x02 dlen 26 + L2CAP: Configure Request (0x04) ident 15 len 18 + Destination CID: 72 + Flags: 0x0000 + Option: Retransmission and Flow Control (0x04) [mandatory] + Mode: Enhanced Retransmission (0x03) + TX window size: 32 + Max transmit: 255 + Retransmission timeout: 0 + Monitor timeout: 0 + Maximum PDU size: 65527 + Option: Frame Check Sequence (0x05) [mandatory] + FCS: 16-bit FCS (0x01) + < ACL Data TX: Handle 11 flags 0x00 dlen 29 + L2CAP: Configure Response (0x05) ident 15 len 21 + Source CID: 2832 + Flags: 0x0000 + Result: Success (0x0000) + Option: Maximum Transmission Unit (0x01) [mandatory] + MTU: 32767 + Option: Retransmission and Flow Control (0x04) [mandatory] + Mode: Enhanced Retransmission (0x03) + TX window size: 32 + Max transmit: 255 + Retransmission timeout: 2000 + Monitor timeout: 12000 + Maximum PDU size: 1009 + > ACL Data RX: Handle 11 flags 0x02 dlen 32 + L2CAP: Configure Response (0x05) ident 24 len 24 + Source CID: 72 + Flags: 0x0000 + Result: Success (0x0000) + Option: Maximum Transmission Unit (0x01) [mandatory] + MTU: 32767 + Option: Retransmission and Flow Control (0x04) [mandatory] + Mode: Enhanced Retransmission (0x03) + TX window size: 63 + Max transmit: 3 + Retransmission timeout: 2000 + Monitor timeout: 12000 + Maximum PDU size: 1009 + Option: Frame Check Sequence (0x05) [mandatory] + FCS: 16-bit FCS (0x01) + ... + > ACL Data RX: Handle 11 flags 0x02 dlen 1009 + Channel: 72 len 1005 ctrl 0x4202 [PSM 4103 mode Enhanced Retransmission (0x03)] {chan 8} + I-frame: Start (len 21884) TxSeq 1 ReqSeq 2 + > ACL Data RX: Handle 11 flags 0x02 dlen 1009 + Channel: 72 len 1005 ctrl 0xc204 [PSM 4103 mode Enhanced Retransmission (0x03)] {chan 8} + I-frame: Continuation TxSeq 2 ReqSeq 2 + +This has been tested with kernel 5.4 and BlueZ 5.77. + +Cc: stable@vger.kernel.org +Signed-off-by: Frédéric Danis +Signed-off-by: Luiz Augusto von Dentz +Signed-off-by: Greg Kroah-Hartman +--- + net/bluetooth/l2cap_core.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +--- a/net/bluetooth/l2cap_core.c ++++ b/net/bluetooth/l2cap_core.c +@@ -3415,7 +3415,7 @@ static int l2cap_parse_conf_req(struct l + struct l2cap_conf_rfc rfc = { .mode = L2CAP_MODE_BASIC }; + struct l2cap_conf_efs efs; + u8 remote_efs = 0; +- u16 mtu = L2CAP_DEFAULT_MTU; ++ u16 mtu = 0; + u16 result = L2CAP_CONF_SUCCESS; + u16 size; + +@@ -3520,6 +3520,13 @@ done: + /* Configure output options and let the other side know + * which ones we don't like. */ + ++ /* If MTU is not provided in configure request, use the most recently ++ * explicitly or implicitly accepted value for the other direction, ++ * or the default value. ++ */ ++ if (mtu == 0) ++ mtu = chan->imtu ? chan->imtu : L2CAP_DEFAULT_MTU; ++ + if (mtu < L2CAP_DEFAULT_MIN_MTU) + result = L2CAP_CONF_UNACCEPT; + else { diff --git a/queue-6.15/btrfs-fix-a-race-between-renames-and-directory-logging.patch b/queue-6.15/btrfs-fix-a-race-between-renames-and-directory-logging.patch new file mode 100644 index 0000000000..7e890a1b3a --- /dev/null +++ b/queue-6.15/btrfs-fix-a-race-between-renames-and-directory-logging.patch @@ -0,0 +1,246 @@ +From 3ca864de852bc91007b32d2a0d48993724f4abad Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Wed, 28 May 2025 12:28:27 +0100 +Subject: btrfs: fix a race between renames and directory logging + +From: Filipe Manana + +commit 3ca864de852bc91007b32d2a0d48993724f4abad upstream. + +We have a race between a rename and directory inode logging that if it +happens and we crash/power fail before the rename completes, the next time +the filesystem is mounted, the log replay code will end up deleting the +file that was being renamed. + +This is best explained following a step by step analysis of an interleaving +of steps that lead into this situation. + +Consider the initial conditions: + +1) We are at transaction N; + +2) We have directories A and B created in a past transaction (< N); + +3) We have inode X corresponding to a file that has 2 hardlinks, one in + directory A and the other in directory B, so we'll name them as + "A/foo_link1" and "B/foo_link2". Both hard links were persisted in a + past transaction (< N); + +4) We have inode Y corresponding to a file that as a single hard link and + is located in directory A, we'll name it as "A/bar". This file was also + persisted in a past transaction (< N). + +The steps leading to a file loss are the following and for all of them we +are under transaction N: + + 1) Link "A/foo_link1" is removed, so inode's X last_unlink_trans field + is updated to N, through btrfs_unlink() -> btrfs_record_unlink_dir(); + + 2) Task A starts a rename for inode Y, with the goal of renaming from + "A/bar" to "A/baz", so we enter btrfs_rename(); + + 3) Task A inserts the new BTRFS_INODE_REF_KEY for inode Y by calling + btrfs_insert_inode_ref(); + + 4) Because the rename happens in the same directory, we don't set the + last_unlink_trans field of directoty A's inode to the current + transaction id, that is, we don't cal btrfs_record_unlink_dir(); + + 5) Task A then removes the entries from directory A (BTRFS_DIR_ITEM_KEY + and BTRFS_DIR_INDEX_KEY items) when calling __btrfs_unlink_inode() + (actually the dir index item is added as a delayed item, but the + effect is the same); + + 6) Now before task A adds the new entry "A/baz" to directory A by + calling btrfs_add_link(), another task, task B is logging inode X; + + 7) Task B starts a fsync of inode X and after logging inode X, at + btrfs_log_inode_parent() it calls btrfs_log_all_parents(), since + inode X has a last_unlink_trans value of N, set at in step 1; + + 8) At btrfs_log_all_parents() we search for all parent directories of + inode X using the commit root, so we find directories A and B and log + them. Bu when logging direct A, we don't have a dir index item for + inode Y anymore, neither the old name "A/bar" nor for the new name + "A/baz" since the rename has deleted the old name but has not yet + inserted the new name - task A hasn't called yet btrfs_add_link() to + do that. + + Note that logging directory A doesn't fallback to a transaction + commit because its last_unlink_trans has a lower value than the + current transaction's id (see step 4); + + 9) Task B finishes logging directories A and B and gets back to + btrfs_sync_file() where it calls btrfs_sync_log() to persist the log + tree; + +10) Task B successfully persisted the log tree, btrfs_sync_log() completed + with success, and a power failure happened. + + We have a log tree without any directory entry for inode Y, so the + log replay code deletes the entry for inode Y, name "A/bar", from the + subvolume tree since it doesn't exist in the log tree and the log + tree is authorative for its index (we logged a BTRFS_DIR_LOG_INDEX_KEY + item that covers the index range for the dentry that corresponds to + "A/bar"). + + Since there's no other hard link for inode Y and the log replay code + deletes the name "A/bar", the file is lost. + +The issue wouldn't happen if task B synced the log only after task A +called btrfs_log_new_name(), which would update the log with the new name +for inode Y ("A/bar"). + +Fix this by pinning the log root during renames before removing the old +directory entry, and unpinning after btrfs_log_new_name() is called. + +Fixes: 259c4b96d78d ("btrfs: stop doing unnecessary log updates during a rename") +CC: stable@vger.kernel.org # 5.18+ +Reviewed-by: Boris Burkov +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/inode.c | 81 +++++++++++++++++++++++++++++++++++++++++++------------ + 1 file changed, 64 insertions(+), 17 deletions(-) + +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -7979,6 +7979,7 @@ static int btrfs_rename_exchange(struct + int ret; + int ret2; + bool need_abort = false; ++ bool logs_pinned = false; + struct fscrypt_name old_fname, new_fname; + struct fscrypt_str *old_name, *new_name; + +@@ -8102,6 +8103,31 @@ static int btrfs_rename_exchange(struct + inode_inc_iversion(new_inode); + simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry); + ++ if (old_ino != BTRFS_FIRST_FREE_OBJECTID && ++ new_ino != BTRFS_FIRST_FREE_OBJECTID) { ++ /* ++ * If we are renaming in the same directory (and it's not for ++ * root entries) pin the log early to prevent any concurrent ++ * task from logging the directory after we removed the old ++ * entries and before we add the new entries, otherwise that ++ * task can sync a log without any entry for the inodes we are ++ * renaming and therefore replaying that log, if a power failure ++ * happens after syncing the log, would result in deleting the ++ * inodes. ++ * ++ * If the rename affects two different directories, we want to ++ * make sure the that there's no log commit that contains ++ * updates for only one of the directories but not for the ++ * other. ++ * ++ * If we are renaming an entry for a root, we don't care about ++ * log updates since we called btrfs_set_log_full_commit(). ++ */ ++ btrfs_pin_log_trans(root); ++ btrfs_pin_log_trans(dest); ++ logs_pinned = true; ++ } ++ + if (old_dentry->d_parent != new_dentry->d_parent) { + btrfs_record_unlink_dir(trans, BTRFS_I(old_dir), + BTRFS_I(old_inode), true); +@@ -8173,30 +8199,23 @@ static int btrfs_rename_exchange(struct + BTRFS_I(new_inode)->dir_index = new_idx; + + /* +- * Now pin the logs of the roots. We do it to ensure that no other task +- * can sync the logs while we are in progress with the rename, because +- * that could result in an inconsistency in case any of the inodes that +- * are part of this rename operation were logged before. ++ * Do the log updates for all inodes. ++ * ++ * If either entry is for a root we don't need to update the logs since ++ * we've called btrfs_set_log_full_commit() before. + */ +- if (old_ino != BTRFS_FIRST_FREE_OBJECTID) +- btrfs_pin_log_trans(root); +- if (new_ino != BTRFS_FIRST_FREE_OBJECTID) +- btrfs_pin_log_trans(dest); +- +- /* Do the log updates for all inodes. */ +- if (old_ino != BTRFS_FIRST_FREE_OBJECTID) ++ if (logs_pinned) { + btrfs_log_new_name(trans, old_dentry, BTRFS_I(old_dir), + old_rename_ctx.index, new_dentry->d_parent); +- if (new_ino != BTRFS_FIRST_FREE_OBJECTID) + btrfs_log_new_name(trans, new_dentry, BTRFS_I(new_dir), + new_rename_ctx.index, old_dentry->d_parent); ++ } + +- /* Now unpin the logs. */ +- if (old_ino != BTRFS_FIRST_FREE_OBJECTID) ++out_fail: ++ if (logs_pinned) { + btrfs_end_log_trans(root); +- if (new_ino != BTRFS_FIRST_FREE_OBJECTID) + btrfs_end_log_trans(dest); +-out_fail: ++ } + ret2 = btrfs_end_transaction(trans); + ret = ret ? ret : ret2; + out_notrans: +@@ -8246,6 +8265,7 @@ static int btrfs_rename(struct mnt_idmap + int ret2; + u64 old_ino = btrfs_ino(BTRFS_I(old_inode)); + struct fscrypt_name old_fname, new_fname; ++ bool logs_pinned = false; + + if (btrfs_ino(BTRFS_I(new_dir)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) + return -EPERM; +@@ -8380,6 +8400,29 @@ static int btrfs_rename(struct mnt_idmap + inode_inc_iversion(old_inode); + simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry); + ++ if (old_ino != BTRFS_FIRST_FREE_OBJECTID) { ++ /* ++ * If we are renaming in the same directory (and it's not a ++ * root entry) pin the log to prevent any concurrent task from ++ * logging the directory after we removed the old entry and ++ * before we add the new entry, otherwise that task can sync ++ * a log without any entry for the inode we are renaming and ++ * therefore replaying that log, if a power failure happens ++ * after syncing the log, would result in deleting the inode. ++ * ++ * If the rename affects two different directories, we want to ++ * make sure the that there's no log commit that contains ++ * updates for only one of the directories but not for the ++ * other. ++ * ++ * If we are renaming an entry for a root, we don't care about ++ * log updates since we called btrfs_set_log_full_commit(). ++ */ ++ btrfs_pin_log_trans(root); ++ btrfs_pin_log_trans(dest); ++ logs_pinned = true; ++ } ++ + if (old_dentry->d_parent != new_dentry->d_parent) + btrfs_record_unlink_dir(trans, BTRFS_I(old_dir), + BTRFS_I(old_inode), true); +@@ -8444,7 +8487,7 @@ static int btrfs_rename(struct mnt_idmap + if (old_inode->i_nlink == 1) + BTRFS_I(old_inode)->dir_index = index; + +- if (old_ino != BTRFS_FIRST_FREE_OBJECTID) ++ if (logs_pinned) + btrfs_log_new_name(trans, old_dentry, BTRFS_I(old_dir), + rename_ctx.index, new_dentry->d_parent); + +@@ -8460,6 +8503,10 @@ static int btrfs_rename(struct mnt_idmap + } + } + out_fail: ++ if (logs_pinned) { ++ btrfs_end_log_trans(root); ++ btrfs_end_log_trans(dest); ++ } + ret2 = btrfs_end_transaction(trans); + ret = ret ? ret : ret2; + out_notrans: diff --git a/queue-6.15/btrfs-fix-invalid-inode-pointer-dereferences-during-log-replay.patch b/queue-6.15/btrfs-fix-invalid-inode-pointer-dereferences-during-log-replay.patch new file mode 100644 index 0000000000..4528ae6fed --- /dev/null +++ b/queue-6.15/btrfs-fix-invalid-inode-pointer-dereferences-during-log-replay.patch @@ -0,0 +1,76 @@ +From 2dcf838cf5c2f0f4501edaa1680fcad03618d760 Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Tue, 3 Jun 2025 19:29:01 +0100 +Subject: btrfs: fix invalid inode pointer dereferences during log replay + +From: Filipe Manana + +commit 2dcf838cf5c2f0f4501edaa1680fcad03618d760 upstream. + +In a few places where we call read_one_inode(), if we get a NULL pointer +we end up jumping into an error path, or fallthrough in case of +__add_inode_ref(), where we then do something like this: + + iput(&inode->vfs_inode); + +which results in an invalid inode pointer that triggers an invalid memory +access, resulting in a crash. + +Fix this by making sure we don't do such dereferences. + +Fixes: b4c50cbb01a1 ("btrfs: return a btrfs_inode from read_one_inode()") +CC: stable@vger.kernel.org # 6.15+ +Signed-off-by: Filipe Manana +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/tree-log.c | 14 ++++++-------- + 1 file changed, 6 insertions(+), 8 deletions(-) + +diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c +index 97e933113b82..21d2f3dded51 100644 +--- a/fs/btrfs/tree-log.c ++++ b/fs/btrfs/tree-log.c +@@ -668,15 +668,12 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, + extent_end = ALIGN(start + size, + fs_info->sectorsize); + } else { +- ret = 0; +- goto out; ++ return 0; + } + + inode = read_one_inode(root, key->objectid); +- if (!inode) { +- ret = -EIO; +- goto out; +- } ++ if (!inode) ++ return -EIO; + + /* + * first check to see if we already have this extent in the +@@ -961,7 +958,8 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans, + ret = unlink_inode_for_log_replay(trans, dir, inode, &name); + out: + kfree(name.name); +- iput(&inode->vfs_inode); ++ if (inode) ++ iput(&inode->vfs_inode); + return ret; + } + +@@ -1176,8 +1174,8 @@ static inline int __add_inode_ref(struct btrfs_trans_handle *trans, + ret = unlink_inode_for_log_replay(trans, + victim_parent, + inode, &victim_name); ++ iput(&victim_parent->vfs_inode); + } +- iput(&victim_parent->vfs_inode); + kfree(victim_name.name); + if (ret) + return ret; +-- +2.50.0 + diff --git a/queue-6.15/btrfs-update-superblock-s-device-bytes_used-when-dropping-chunk.patch b/queue-6.15/btrfs-update-superblock-s-device-bytes_used-when-dropping-chunk.patch new file mode 100644 index 0000000000..bfacd4efcd --- /dev/null +++ b/queue-6.15/btrfs-update-superblock-s-device-bytes_used-when-dropping-chunk.patch @@ -0,0 +1,66 @@ +From ae4477f937569d097ca5dbce92a89ba384b49bc6 Mon Sep 17 00:00:00 2001 +From: Mark Harmstone +Date: Thu, 29 May 2025 10:37:44 +0100 +Subject: btrfs: update superblock's device bytes_used when dropping chunk + +From: Mark Harmstone + +commit ae4477f937569d097ca5dbce92a89ba384b49bc6 upstream. + +Each superblock contains a copy of the device item for that device. In a +transaction which drops a chunk but doesn't create any new ones, we were +correctly updating the device item in the chunk tree but not copying +over the new bytes_used value to the superblock. + +This can be seen by doing the following: + + # dd if=/dev/zero of=test bs=4096 count=2621440 + # mkfs.btrfs test + # mount test /root/temp + + # cd /root/temp + # for i in {00..10}; do dd if=/dev/zero of=$i bs=4096 count=32768; done + # sync + # rm * + # sync + # btrfs balance start -dusage=0 . + # sync + + # cd + # umount /root/temp + # btrfs check test + +For btrfs-check to detect this, you will also need my patch at +https://github.com/kdave/btrfs-progs/pull/991. + +Change btrfs_remove_dev_extents() so that it adds the devices to the +fs_info->post_commit_list if they're not there already. This causes +btrfs_commit_device_sizes() to be called, which updates the bytes_used +value in the superblock. + +Fixes: bbbf7243d62d ("btrfs: combine device update operations during transaction commit") +CC: stable@vger.kernel.org # 5.10+ +Reviewed-by: Qu Wenruo +Signed-off-by: Mark Harmstone +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/volumes.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/fs/btrfs/volumes.c ++++ b/fs/btrfs/volumes.c +@@ -3281,6 +3281,12 @@ int btrfs_remove_chunk(struct btrfs_tran + device->bytes_used - dev_extent_len); + atomic64_add(dev_extent_len, &fs_info->free_chunk_space); + btrfs_clear_space_info_full(fs_info); ++ ++ if (list_empty(&device->post_commit_list)) { ++ list_add_tail(&device->post_commit_list, ++ &trans->transaction->dev_update_list); ++ } ++ + mutex_unlock(&fs_info->chunk_mutex); + } + } diff --git a/queue-6.15/dm-raid-fix-variable-in-journal-device-check.patch b/queue-6.15/dm-raid-fix-variable-in-journal-device-check.patch new file mode 100644 index 0000000000..5315de61b8 --- /dev/null +++ b/queue-6.15/dm-raid-fix-variable-in-journal-device-check.patch @@ -0,0 +1,31 @@ +From db53805156f1e0aa6d059c0d3f9ac660d4ef3eb4 Mon Sep 17 00:00:00 2001 +From: Heinz Mauelshagen +Date: Tue, 10 Jun 2025 20:53:30 +0200 +Subject: dm-raid: fix variable in journal device check + +From: Heinz Mauelshagen + +commit db53805156f1e0aa6d059c0d3f9ac660d4ef3eb4 upstream. + +Replace "rdev" with correct loop variable name "r". + +Signed-off-by: Heinz Mauelshagen +Cc: stable@vger.kernel.org +Fixes: 63c32ed4afc2 ("dm raid: add raid4/5/6 journaling support") +Signed-off-by: Mikulas Patocka +Signed-off-by: Greg Kroah-Hartman +--- + drivers/md/dm-raid.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/md/dm-raid.c ++++ b/drivers/md/dm-raid.c +@@ -2410,7 +2410,7 @@ static int super_init_validation(struct + */ + sb_retrieve_failed_devices(sb, failed_devices); + rdev_for_each(r, mddev) { +- if (test_bit(Journal, &rdev->flags) || ++ if (test_bit(Journal, &r->flags) || + !r->sb_page) + continue; + sb2 = page_address(r->sb_page); diff --git a/queue-6.15/drm-amdkfd-fix-race-in-gws-queue-scheduling.patch b/queue-6.15/drm-amdkfd-fix-race-in-gws-queue-scheduling.patch new file mode 100644 index 0000000000..dc210de624 --- /dev/null +++ b/queue-6.15/drm-amdkfd-fix-race-in-gws-queue-scheduling.patch @@ -0,0 +1,37 @@ +From cfb05257ae168a0496c7637e1d9e3ab8a25cbffe Mon Sep 17 00:00:00 2001 +From: Jay Cornwall +Date: Wed, 11 Jun 2025 09:52:14 -0500 +Subject: drm/amdkfd: Fix race in GWS queue scheduling + +From: Jay Cornwall + +commit cfb05257ae168a0496c7637e1d9e3ab8a25cbffe upstream. + +q->gws is not updated atomically with qpd->mapped_gws_queue. If a +runlist is created between pqm_set_gws and update_queue it will +contain a queue which uses GWS in a process with no GWS allocated. +This will result in a scheduler hang. + +Use q->properties.is_gws which is changed while holding the DQM lock. + +Signed-off-by: Jay Cornwall +Reviewed-by: Harish Kasiviswanathan +Signed-off-by: Alex Deucher +(cherry picked from commit b98370220eb3110e82248e3354e16a489a492cfb) +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c +@@ -237,7 +237,7 @@ static int pm_map_queues_v9(struct packe + + packet->bitfields2.engine_sel = + engine_sel__mes_map_queues__compute_vi; +- packet->bitfields2.gws_control_queue = q->gws ? 1 : 0; ++ packet->bitfields2.gws_control_queue = q->properties.is_gws ? 1 : 0; + packet->bitfields2.extended_engine_sel = + extended_engine_sel__mes_map_queues__legacy_engine_sel; + packet->bitfields2.queue_type = diff --git a/queue-6.15/drm-ast-fix-comment-on-modeset-lock.patch b/queue-6.15/drm-ast-fix-comment-on-modeset-lock.patch new file mode 100644 index 0000000000..5281da59d4 --- /dev/null +++ b/queue-6.15/drm-ast-fix-comment-on-modeset-lock.patch @@ -0,0 +1,43 @@ +From 7cce65f3789e04c0f7668a66563e680d81d54493 Mon Sep 17 00:00:00 2001 +From: Thomas Zimmermann +Date: Mon, 24 Mar 2025 10:44:09 +0100 +Subject: drm/ast: Fix comment on modeset lock + +From: Thomas Zimmermann + +commit 7cce65f3789e04c0f7668a66563e680d81d54493 upstream. + +The ast driver protects the commit tail against concurrent reads +of the display modes by acquiring a lock. The comment is misleading +as the lock is not released in atomic_flush, but at the end of the +commit-tail helper. Rewrite the comment. + +Signed-off-by: Thomas Zimmermann +Fixes: 1fe182154984 ("drm/ast: Acquire I/O-register lock in atomic_commit_tail function") +Cc: Thomas Zimmermann +Cc: Jocelyn Falempe +Cc: Dave Airlie +Cc: dri-devel@lists.freedesktop.org +Cc: # v6.2+ +Reviewed-by: Jocelyn Falempe +Link: https://lore.kernel.org/r/20250324094520.192974-2-tzimmermann@suse.de +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/ast/ast_mode.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/drivers/gpu/drm/ast/ast_mode.c ++++ b/drivers/gpu/drm/ast/ast_mode.c +@@ -922,9 +922,9 @@ static void ast_mode_config_helper_atomi + + /* + * Concurrent operations could possibly trigger a call to +- * drm_connector_helper_funcs.get_modes by trying to read the +- * display modes. Protect access to I/O registers by acquiring +- * the I/O-register lock. Released in atomic_flush(). ++ * drm_connector_helper_funcs.get_modes by reading the display ++ * modes. Protect access to registers by acquiring the modeset ++ * lock. + */ + mutex_lock(&ast->modeset_lock); + drm_atomic_helper_commit_tail(state); diff --git a/queue-6.15/drm-cirrus-qemu-fix-pitch-programming.patch b/queue-6.15/drm-cirrus-qemu-fix-pitch-programming.patch new file mode 100644 index 0000000000..7ff91d0ca8 --- /dev/null +++ b/queue-6.15/drm-cirrus-qemu-fix-pitch-programming.patch @@ -0,0 +1,44 @@ +From 4bfb389a0136a13f0802eeb5e97a0e76d88f77ae Mon Sep 17 00:00:00 2001 +From: Thomas Zimmermann +Date: Fri, 28 Mar 2025 10:17:05 +0100 +Subject: drm/cirrus-qemu: Fix pitch programming + +From: Thomas Zimmermann + +commit 4bfb389a0136a13f0802eeb5e97a0e76d88f77ae upstream. + +Do not set CR1B[6] when programming the pitch. The bit effects VGA +text mode and is not interpreted by qemu. [1] It has no affect on +the scanline pitch. + +The scanline bit that is set into CR1B[6] belongs into CR13[7], which +the driver sets up correctly. + +This bug goes back to the driver's initial commit. + +Signed-off-by: Thomas Zimmermann +Acked-by: Gerd Hoffmann +Link: https://gitlab.com/qemu-project/qemu/-/blob/stable-9.2/hw/display/cirrus_vga.c?ref_type=heads#L1112 # 1 +Fixes: f9aa76a85248 ("drm/kms: driver for virtual cirrus under qemu") +Cc: Adam Jackson +Cc: Dave Airlie +Cc: Maarten Lankhorst +Cc: Maxime Ripard +Cc: Thomas Zimmermann +Cc: # v3.5+ +Link: https://lore.kernel.org/r/20250328091821.195061-2-tzimmermann@suse.de +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/tiny/cirrus-qemu.c | 1 - + 1 file changed, 1 deletion(-) + +--- a/drivers/gpu/drm/tiny/cirrus-qemu.c ++++ b/drivers/gpu/drm/tiny/cirrus-qemu.c +@@ -318,7 +318,6 @@ static void cirrus_pitch_set(struct cirr + /* Enable extended blanking and pitch bits, and enable full memory */ + cr1b = 0x22; + cr1b |= (pitch >> 7) & 0x10; +- cr1b |= (pitch >> 6) & 0x40; + wreg_crt(cirrus, 0x1b, cr1b); + + cirrus_set_start_address(cirrus, 0); diff --git a/queue-6.15/drm-etnaviv-protect-the-scheduler-s-pending-list-with-its-lock.patch b/queue-6.15/drm-etnaviv-protect-the-scheduler-s-pending-list-with-its-lock.patch new file mode 100644 index 0000000000..ef946a03fe --- /dev/null +++ b/queue-6.15/drm-etnaviv-protect-the-scheduler-s-pending-list-with-its-lock.patch @@ -0,0 +1,55 @@ +From 61ee19dedb8d753249e20308782bf4e9e2fb7344 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Ma=C3=ADra=20Canal?= +Date: Mon, 2 Jun 2025 10:22:16 -0300 +Subject: drm/etnaviv: Protect the scheduler's pending list with its lock +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Maíra Canal + +commit 61ee19dedb8d753249e20308782bf4e9e2fb7344 upstream. + +Commit 704d3d60fec4 ("drm/etnaviv: don't block scheduler when GPU is still +active") ensured that active jobs are returned to the pending list when +extending the timeout. However, it didn't use the pending list's lock to +manipulate the list, which causes a race condition as the scheduler's +workqueues are running. + +Hold the lock while manipulating the scheduler's pending list to prevent +a race. + +Cc: stable@vger.kernel.org +Fixes: 704d3d60fec4 ("drm/etnaviv: don't block scheduler when GPU is still active") +Reported-by: Philipp Stanner +Closes: https://lore.kernel.org/dri-devel/964e59ba1539083ef29b06d3c78f5e2e9b138ab8.camel@mailbox.org/ +Reviewed-by: Lucas Stach +Reviewed-by: Philipp Stanner +Link: https://lore.kernel.org/r/20250602132240.93314-1-mcanal@igalia.com +Signed-off-by: Maíra Canal +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/etnaviv/etnaviv_sched.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c ++++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c +@@ -35,6 +35,7 @@ static enum drm_gpu_sched_stat etnaviv_s + *sched_job) + { + struct etnaviv_gem_submit *submit = to_etnaviv_submit(sched_job); ++ struct drm_gpu_scheduler *sched = sched_job->sched; + struct etnaviv_gpu *gpu = submit->gpu; + u32 dma_addr, primid = 0; + int change; +@@ -89,7 +90,9 @@ static enum drm_gpu_sched_stat etnaviv_s + return DRM_GPU_SCHED_STAT_NOMINAL; + + out_no_timeout: +- list_add(&sched_job->list, &sched_job->sched->pending_list); ++ spin_lock(&sched->job_list_lock); ++ list_add(&sched_job->list, &sched->pending_list); ++ spin_unlock(&sched->job_list_lock); + return DRM_GPU_SCHED_STAT_NOMINAL; + } + diff --git a/queue-6.15/drm-msm-gpu-fix-crash-when-throttling-gpu-immediately-during-boot.patch b/queue-6.15/drm-msm-gpu-fix-crash-when-throttling-gpu-immediately-during-boot.patch new file mode 100644 index 0000000000..5eabff010a --- /dev/null +++ b/queue-6.15/drm-msm-gpu-fix-crash-when-throttling-gpu-immediately-during-boot.patch @@ -0,0 +1,76 @@ +From b71717735be48d7743a34897e9e44a0b53e30c0e Mon Sep 17 00:00:00 2001 +From: Stephan Gerhold +Date: Tue, 29 Apr 2025 10:33:56 +0200 +Subject: drm/msm/gpu: Fix crash when throttling GPU immediately during boot + +From: Stephan Gerhold + +commit b71717735be48d7743a34897e9e44a0b53e30c0e upstream. + +There is a small chance that the GPU is already hot during boot. In that +case, the call to of_devfreq_cooling_register() will immediately try to +apply devfreq cooling, as seen in the following crash: + + Unable to handle kernel paging request at virtual address 0000000000014110 + pc : a6xx_gpu_busy+0x1c/0x58 [msm] + lr : msm_devfreq_get_dev_status+0xbc/0x140 [msm] + Call trace: + a6xx_gpu_busy+0x1c/0x58 [msm] (P) + devfreq_simple_ondemand_func+0x3c/0x150 + devfreq_update_target+0x44/0xd8 + qos_max_notifier_call+0x30/0x84 + blocking_notifier_call_chain+0x6c/0xa0 + pm_qos_update_target+0xd0/0x110 + freq_qos_apply+0x3c/0x74 + apply_constraint+0x88/0x148 + __dev_pm_qos_update_request+0x7c/0xcc + dev_pm_qos_update_request+0x38/0x5c + devfreq_cooling_set_cur_state+0x98/0xf0 + __thermal_cdev_update+0x64/0xb4 + thermal_cdev_update+0x4c/0x58 + step_wise_manage+0x1f0/0x318 + __thermal_zone_device_update+0x278/0x424 + __thermal_cooling_device_register+0x2bc/0x308 + thermal_of_cooling_device_register+0x10/0x1c + of_devfreq_cooling_register_power+0x240/0x2bc + of_devfreq_cooling_register+0x14/0x20 + msm_devfreq_init+0xc4/0x1a0 [msm] + msm_gpu_init+0x304/0x574 [msm] + adreno_gpu_init+0x1c4/0x2e0 [msm] + a6xx_gpu_init+0x5c8/0x9c8 [msm] + adreno_bind+0x2a8/0x33c [msm] + ... + +At this point we haven't initialized the GMU at all yet, so we cannot read +the GMU registers inside a6xx_gpu_busy(). A similar issue was fixed before +in commit 6694482a70e9 ("drm/msm: Avoid unclocked GMU register access in +6xx gpu_busy"): msm_devfreq_init() does call devfreq_suspend_device(), but +unlike msm_devfreq_suspend(), it doesn't set the df->suspended flag +accordingly. This means the df->suspended flag does not match the actual +devfreq state after initialization and msm_devfreq_get_dev_status() will +end up accessing GMU registers, causing the crash. + +Fix this by setting df->suspended correctly during initialization. + +Cc: stable@vger.kernel.org +Fixes: 6694482a70e9 ("drm/msm: Avoid unclocked GMU register access in 6xx gpu_busy") +Signed-off-by: Stephan Gerhold +Reviewed-by: Douglas Anderson +Reviewed-by: Konrad Dybcio +Patchwork: https://patchwork.freedesktop.org/patch/650772/ +Signed-off-by: Rob Clark +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/msm/msm_gpu_devfreq.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/gpu/drm/msm/msm_gpu_devfreq.c ++++ b/drivers/gpu/drm/msm/msm_gpu_devfreq.c +@@ -156,6 +156,7 @@ void msm_devfreq_init(struct msm_gpu *gp + priv->gpu_devfreq_config.downdifferential = 10; + + mutex_init(&df->lock); ++ df->suspended = true; + + ret = dev_pm_qos_add_request(&gpu->pdev->dev, &df->boost_freq, + DEV_PM_QOS_MIN_FREQUENCY, 0); diff --git a/queue-6.15/drm-panel-simple-tianma-tm070jdhg34-00-add-delays.patch b/queue-6.15/drm-panel-simple-tianma-tm070jdhg34-00-add-delays.patch new file mode 100644 index 0000000000..8254f365a3 --- /dev/null +++ b/queue-6.15/drm-panel-simple-tianma-tm070jdhg34-00-add-delays.patch @@ -0,0 +1,38 @@ +From 716c75afd83c837f14042309126e838de040658b Mon Sep 17 00:00:00 2001 +From: Luca Ceresoli +Date: Fri, 11 Apr 2025 21:19:45 +0200 +Subject: drm/panel: simple: Tianma TM070JDHG34-00: add delays + +From: Luca Ceresoli + +commit 716c75afd83c837f14042309126e838de040658b upstream. + +Add power on/off delays for the Tianma TM070JDHG34-00. + +Fixes: bf6daaa281f7 ("drm/panel: simple: Add Tianma TM070JDHG34-00 panel support") +Cc: stable@vger.kernel.org +Signed-off-by: Luca Ceresoli +Reviewed-by: Neil Armstrong +Link: https://lore.kernel.org/r/20250411-tianma-p0700wxf1mbaa-v3-2-acbefe9ea669@bootlin.com +Signed-off-by: Neil Armstrong +Link: https://lore.kernel.org/r/20250411-tianma-p0700wxf1mbaa-v3-2-acbefe9ea669@bootlin.com +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/panel/panel-simple.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/drivers/gpu/drm/panel/panel-simple.c ++++ b/drivers/gpu/drm/panel/panel-simple.c +@@ -4455,6 +4455,12 @@ static const struct panel_desc tianma_tm + .width = 150, /* 149.76 */ + .height = 94, /* 93.60 */ + }, ++ .delay = { ++ .prepare = 15, /* Tp1 */ ++ .enable = 150, /* Tp2 */ ++ .disable = 150, /* Tp4 */ ++ .unprepare = 120, /* Tp3 */ ++ }, + .bus_format = MEDIA_BUS_FMT_RGB888_1X7X4_SPWG, + .connector_type = DRM_MODE_CONNECTOR_LVDS, + }; diff --git a/queue-6.15/drm-simpledrm-do-not-upcast-in-release-helpers.patch b/queue-6.15/drm-simpledrm-do-not-upcast-in-release-helpers.patch new file mode 100644 index 0000000000..ef7ef5755e --- /dev/null +++ b/queue-6.15/drm-simpledrm-do-not-upcast-in-release-helpers.patch @@ -0,0 +1,46 @@ +From d231cde7c84359fb18fb268cf6cff03b5bce48ff Mon Sep 17 00:00:00 2001 +From: Thomas Zimmermann +Date: Mon, 7 Apr 2025 15:47:24 +0200 +Subject: drm/simpledrm: Do not upcast in release helpers + +From: Thomas Zimmermann + +commit d231cde7c84359fb18fb268cf6cff03b5bce48ff upstream. + +The res pointer passed to simpledrm_device_release_clocks() and +simpledrm_device_release_regulators() points to an instance of +struct simpledrm_device. No need to upcast from struct drm_device. +The upcast is harmless, as DRM device is the first field in struct +simpledrm_device. + +Signed-off-by: Thomas Zimmermann +Fixes: 11e8f5fd223b ("drm: Add simpledrm driver") +Cc: # v5.14+ +Reviewed-by: Javier Martinez Canillas +Reviewed-by: Jocelyn Falempe +Link: https://lore.kernel.org/r/20250407134753.985925-2-tzimmermann@suse.de +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/tiny/simpledrm.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/gpu/drm/tiny/simpledrm.c ++++ b/drivers/gpu/drm/tiny/simpledrm.c +@@ -284,7 +284,7 @@ static struct simpledrm_device *simpledr + + static void simpledrm_device_release_clocks(void *res) + { +- struct simpledrm_device *sdev = simpledrm_device_of_dev(res); ++ struct simpledrm_device *sdev = res; + unsigned int i; + + for (i = 0; i < sdev->clk_count; ++i) { +@@ -382,7 +382,7 @@ static int simpledrm_device_init_clocks( + + static void simpledrm_device_release_regulators(void *res) + { +- struct simpledrm_device *sdev = simpledrm_device_of_dev(res); ++ struct simpledrm_device *sdev = res; + unsigned int i; + + for (i = 0; i < sdev->regulator_count; ++i) { diff --git a/queue-6.15/drm-tegra-assign-plane-type-before-registration.patch b/queue-6.15/drm-tegra-assign-plane-type-before-registration.patch new file mode 100644 index 0000000000..03d27c46e6 --- /dev/null +++ b/queue-6.15/drm-tegra-assign-plane-type-before-registration.patch @@ -0,0 +1,87 @@ +From 9ff4fdf4f44b69237c0afc1d3a8dac916ce66f3e Mon Sep 17 00:00:00 2001 +From: Thierry Reding +Date: Mon, 21 Apr 2025 11:13:05 -0500 +Subject: drm/tegra: Assign plane type before registration + +From: Thierry Reding + +commit 9ff4fdf4f44b69237c0afc1d3a8dac916ce66f3e upstream. + +Changes to a plane's type after it has been registered aren't propagated +to userspace automatically. This could possibly be achieved by updating +the property, but since we can already determine which type this should +be before the registration, passing in the right type from the start is +a much better solution. + +Suggested-by: Aaron Kling +Signed-off-by: Thierry Reding +Cc: stable@vger.kernel.org +Fixes: 473079549f27 ("drm/tegra: dc: Add Tegra186 support") +Signed-off-by: Aaron Kling +Signed-off-by: Thierry Reding +Link: https://lore.kernel.org/r/20250421-tegra-drm-primary-v2-1-7f740c4c2121@gmail.com +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/tegra/dc.c | 12 ++++++++---- + drivers/gpu/drm/tegra/hub.c | 4 ++-- + drivers/gpu/drm/tegra/hub.h | 3 ++- + 3 files changed, 12 insertions(+), 7 deletions(-) + +--- a/drivers/gpu/drm/tegra/dc.c ++++ b/drivers/gpu/drm/tegra/dc.c +@@ -1321,10 +1321,16 @@ static struct drm_plane *tegra_dc_add_sh + if (wgrp->dc == dc->pipe) { + for (j = 0; j < wgrp->num_windows; j++) { + unsigned int index = wgrp->windows[j]; ++ enum drm_plane_type type; ++ ++ if (primary) ++ type = DRM_PLANE_TYPE_OVERLAY; ++ else ++ type = DRM_PLANE_TYPE_PRIMARY; + + plane = tegra_shared_plane_create(drm, dc, + wgrp->index, +- index); ++ index, type); + if (IS_ERR(plane)) + return plane; + +@@ -1332,10 +1338,8 @@ static struct drm_plane *tegra_dc_add_sh + * Choose the first shared plane owned by this + * head as the primary plane. + */ +- if (!primary) { +- plane->type = DRM_PLANE_TYPE_PRIMARY; ++ if (!primary) + primary = plane; +- } + } + } + } +--- a/drivers/gpu/drm/tegra/hub.c ++++ b/drivers/gpu/drm/tegra/hub.c +@@ -755,9 +755,9 @@ static const struct drm_plane_helper_fun + struct drm_plane *tegra_shared_plane_create(struct drm_device *drm, + struct tegra_dc *dc, + unsigned int wgrp, +- unsigned int index) ++ unsigned int index, ++ enum drm_plane_type type) + { +- enum drm_plane_type type = DRM_PLANE_TYPE_OVERLAY; + struct tegra_drm *tegra = drm->dev_private; + struct tegra_display_hub *hub = tegra->hub; + struct tegra_shared_plane *plane; +--- a/drivers/gpu/drm/tegra/hub.h ++++ b/drivers/gpu/drm/tegra/hub.h +@@ -80,7 +80,8 @@ void tegra_display_hub_cleanup(struct te + struct drm_plane *tegra_shared_plane_create(struct drm_device *drm, + struct tegra_dc *dc, + unsigned int wgrp, +- unsigned int index); ++ unsigned int index, ++ enum drm_plane_type type); + + int tegra_display_hub_atomic_check(struct drm_device *drm, + struct drm_atomic_state *state); diff --git a/queue-6.15/drm-tegra-fix-a-possible-null-pointer-dereference.patch b/queue-6.15/drm-tegra-fix-a-possible-null-pointer-dereference.patch new file mode 100644 index 0000000000..6324383722 --- /dev/null +++ b/queue-6.15/drm-tegra-fix-a-possible-null-pointer-dereference.patch @@ -0,0 +1,37 @@ +From 780351a5f61416ed2ba1199cc57e4a076fca644d Mon Sep 17 00:00:00 2001 +From: Qiu-ji Chen +Date: Wed, 6 Nov 2024 17:59:06 +0800 +Subject: drm/tegra: Fix a possible null pointer dereference + +From: Qiu-ji Chen + +commit 780351a5f61416ed2ba1199cc57e4a076fca644d upstream. + +In tegra_crtc_reset(), new memory is allocated with kzalloc(), but +no check is performed. Before calling __drm_atomic_helper_crtc_reset, +state should be checked to prevent possible null pointer dereference. + +Fixes: b7e0b04ae450 ("drm/tegra: Convert to using __drm_atomic_helper_crtc_reset() for reset.") +Cc: stable@vger.kernel.org +Signed-off-by: Qiu-ji Chen +Signed-off-by: Thierry Reding +Link: https://lore.kernel.org/r/20241106095906.15247-1-chenqiuji666@gmail.com +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/tegra/dc.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/drivers/gpu/drm/tegra/dc.c ++++ b/drivers/gpu/drm/tegra/dc.c +@@ -1393,7 +1393,10 @@ static void tegra_crtc_reset(struct drm_ + if (crtc->state) + tegra_crtc_atomic_destroy_state(crtc, crtc->state); + +- __drm_atomic_helper_crtc_reset(crtc, &state->base); ++ if (state) ++ __drm_atomic_helper_crtc_reset(crtc, &state->base); ++ else ++ __drm_atomic_helper_crtc_reset(crtc, NULL); + } + + static struct drm_crtc_state * diff --git a/queue-6.15/drm-udl-unregister-device-before-cleaning-up-on-disconnect.patch b/queue-6.15/drm-udl-unregister-device-before-cleaning-up-on-disconnect.patch new file mode 100644 index 0000000000..9451e229bf --- /dev/null +++ b/queue-6.15/drm-udl-unregister-device-before-cleaning-up-on-disconnect.patch @@ -0,0 +1,48 @@ +From ff9cb6d2035c586ea7c8f1754d4409eec7a2d26d Mon Sep 17 00:00:00 2001 +From: Thomas Zimmermann +Date: Mon, 3 Mar 2025 15:52:56 +0100 +Subject: drm/udl: Unregister device before cleaning up on disconnect + +From: Thomas Zimmermann + +commit ff9cb6d2035c586ea7c8f1754d4409eec7a2d26d upstream. + +Disconnecting a DisplayLink device results in the following kernel +error messages + +[ 93.041748] [drm:udl_urb_completion [udl]] *ERROR* udl_urb_completion - nonzero write bulk status received: -115 +[ 93.055299] [drm:udl_submit_urb [udl]] *ERROR* usb_submit_urb error fffffffe +[ 93.065363] [drm:udl_urb_completion [udl]] *ERROR* udl_urb_completion - nonzero write bulk status received: -115 +[ 93.078207] [drm:udl_submit_urb [udl]] *ERROR* usb_submit_urb error fffffffe + +coming from KMS poll helpers. Shutting down poll helpers runs them +one final time when the USB device is already gone. + +Run drm_dev_unplug() first in udl's USB disconnect handler. Udl's +polling code already handles disconnects gracefully if the device has +been marked as unplugged. + +Signed-off-by: Thomas Zimmermann +Fixes: b1a981bd5576 ("drm/udl: drop drm_driver.release hook") +Cc: dri-devel@lists.freedesktop.org +Cc: # v5.8+ +Reviewed-by: Patrik Jakobsson +Link: https://patchwork.freedesktop.org/patch/msgid/20250303145604.62962-2-tzimmermann@suse.de +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/udl/udl_drv.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/gpu/drm/udl/udl_drv.c ++++ b/drivers/gpu/drm/udl/udl_drv.c +@@ -127,9 +127,9 @@ static void udl_usb_disconnect(struct us + { + struct drm_device *dev = usb_get_intfdata(interface); + ++ drm_dev_unplug(dev); + drm_kms_helper_poll_fini(dev); + udl_drop_usb(dev); +- drm_dev_unplug(dev); + } + + /* diff --git a/queue-6.15/dt-bindings-serial-8250-make-clocks-and-clock-frequency-exclusive.patch b/queue-6.15/dt-bindings-serial-8250-make-clocks-and-clock-frequency-exclusive.patch new file mode 100644 index 0000000000..626810d655 --- /dev/null +++ b/queue-6.15/dt-bindings-serial-8250-make-clocks-and-clock-frequency-exclusive.patch @@ -0,0 +1,51 @@ +From 09812134071b3941fb81def30b61ed36d3a5dfb5 Mon Sep 17 00:00:00 2001 +From: Yao Zi +Date: Mon, 23 Jun 2025 09:34:45 +0000 +Subject: dt-bindings: serial: 8250: Make clocks and clock-frequency exclusive + +From: Yao Zi + +commit 09812134071b3941fb81def30b61ed36d3a5dfb5 upstream. + +The 8250 binding before converting to json-schema states, + + - clock-frequency : the input clock frequency for the UART + or + - clocks phandle to refer to the clk used as per Documentation/devicetree + +for clock-related properties, where "or" indicates these properties +shouldn't exist at the same time. + +Additionally, the behavior of Linux's driver is strange when both clocks +and clock-frequency are specified: it ignores clocks and obtains the +frequency from clock-frequency, left the specified clocks unclaimed. It +may even be disabled, which is undesired most of the time. + +But "anyOf" doesn't prevent these two properties from coexisting, as it +considers the object valid as long as there's at LEAST one match. + +Let's switch to "oneOf" and disallows the other property if one exists, +precisely matching the original binding and avoiding future confusion on +the driver's behavior. + +Fixes: e69f5dc623f9 ("dt-bindings: serial: Convert 8250 to json-schema") +Cc: stable +Signed-off-by: Yao Zi +Reviewed-by: Conor Dooley +Link: https://lore.kernel.org/r/20250623093445.62327-1-ziyao@disroot.org +Signed-off-by: Greg Kroah-Hartman +--- + Documentation/devicetree/bindings/serial/8250.yaml | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/Documentation/devicetree/bindings/serial/8250.yaml ++++ b/Documentation/devicetree/bindings/serial/8250.yaml +@@ -45,7 +45,7 @@ allOf: + - ns16550 + - ns16550a + then: +- anyOf: ++ oneOf: + - required: [ clock-frequency ] + - required: [ clocks ] + diff --git a/queue-6.15/edac-amd64-fix-size-calculation-for-non-power-of-two-dimms.patch b/queue-6.15/edac-amd64-fix-size-calculation-for-non-power-of-two-dimms.patch new file mode 100644 index 0000000000..226e9a51c4 --- /dev/null +++ b/queue-6.15/edac-amd64-fix-size-calculation-for-non-power-of-two-dimms.patch @@ -0,0 +1,161 @@ +From a3f3040657417aeadb9622c629d4a0c2693a0f93 Mon Sep 17 00:00:00 2001 +From: Avadhut Naik +Date: Thu, 29 May 2025 20:50:04 +0000 +Subject: EDAC/amd64: Fix size calculation for Non-Power-of-Two DIMMs +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Avadhut Naik + +commit a3f3040657417aeadb9622c629d4a0c2693a0f93 upstream. + +Each Chip-Select (CS) of a Unified Memory Controller (UMC) on AMD Zen-based +SOCs has an Address Mask and a Secondary Address Mask register associated with +it. The amd64_edac module logs DIMM sizes on a per-UMC per-CS granularity +during init using these two registers. + +Currently, the module primarily considers only the Address Mask register for +computing DIMM sizes. The Secondary Address Mask register is only considered +for odd CS. Additionally, if it has been considered, the Address Mask register +is ignored altogether for that CS. For power-of-two DIMMs i.e. DIMMs whose +total capacity is a power of two (32GB, 64GB, etc), this is not an issue +since only the Address Mask register is used. + +For non-power-of-two DIMMs i.e., DIMMs whose total capacity is not a power of +two (48GB, 96GB, etc), however, the Secondary Address Mask register is used +in conjunction with the Address Mask register. However, since the module only +considers either of the two registers for a CS, the size computed by the +module is incorrect. The Secondary Address Mask register is not considered for +even CS, and the Address Mask register is not considered for odd CS. + +Introduce a new helper function so that both Address Mask and Secondary +Address Mask registers are considered, when valid, for computing DIMM sizes. +Furthermore, also rename some variables for greater clarity. + +Fixes: 81f5090db843 ("EDAC/amd64: Support asymmetric dual-rank DIMMs") +Closes: https://lore.kernel.org/dbec22b6-00f2-498b-b70d-ab6f8a5ec87e@natrix.lt +Reported-by: Žilvinas Žaltiena +Signed-off-by: Avadhut Naik +Signed-off-by: Borislav Petkov (AMD) +Reviewed-by: Yazen Ghannam +Tested-by: Žilvinas Žaltiena +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/20250529205013.403450-1-avadhut.naik@amd.com +Signed-off-by: Greg Kroah-Hartman +--- + drivers/edac/amd64_edac.c | 57 +++++++++++++++++++++++++++++----------------- + 1 file changed, 36 insertions(+), 21 deletions(-) + +--- a/drivers/edac/amd64_edac.c ++++ b/drivers/edac/amd64_edac.c +@@ -1209,7 +1209,9 @@ static int umc_get_cs_mode(int dimm, u8 + if (csrow_enabled(2 * dimm + 1, ctrl, pvt)) + cs_mode |= CS_ODD_PRIMARY; + +- /* Asymmetric dual-rank DIMM support. */ ++ if (csrow_sec_enabled(2 * dimm, ctrl, pvt)) ++ cs_mode |= CS_EVEN_SECONDARY; ++ + if (csrow_sec_enabled(2 * dimm + 1, ctrl, pvt)) + cs_mode |= CS_ODD_SECONDARY; + +@@ -1230,12 +1232,13 @@ static int umc_get_cs_mode(int dimm, u8 + return cs_mode; + } + +-static int __addr_mask_to_cs_size(u32 addr_mask_orig, unsigned int cs_mode, +- int csrow_nr, int dimm) ++static int calculate_cs_size(u32 mask, unsigned int cs_mode) + { +- u32 msb, weight, num_zero_bits; +- u32 addr_mask_deinterleaved; +- int size = 0; ++ int msb, weight, num_zero_bits; ++ u32 deinterleaved_mask; ++ ++ if (!mask) ++ return 0; + + /* + * The number of zero bits in the mask is equal to the number of bits +@@ -1248,19 +1251,30 @@ static int __addr_mask_to_cs_size(u32 ad + * without swapping with the most significant bit. This can be handled + * by keeping the MSB where it is and ignoring the single zero bit. + */ +- msb = fls(addr_mask_orig) - 1; +- weight = hweight_long(addr_mask_orig); ++ msb = fls(mask) - 1; ++ weight = hweight_long(mask); + num_zero_bits = msb - weight - !!(cs_mode & CS_3R_INTERLEAVE); + + /* Take the number of zero bits off from the top of the mask. */ +- addr_mask_deinterleaved = GENMASK_ULL(msb - num_zero_bits, 1); ++ deinterleaved_mask = GENMASK(msb - num_zero_bits, 1); ++ edac_dbg(1, " Deinterleaved AddrMask: 0x%x\n", deinterleaved_mask); ++ ++ return (deinterleaved_mask >> 2) + 1; ++} ++ ++static int __addr_mask_to_cs_size(u32 addr_mask, u32 addr_mask_sec, ++ unsigned int cs_mode, int csrow_nr, int dimm) ++{ ++ int size; + + edac_dbg(1, "CS%d DIMM%d AddrMasks:\n", csrow_nr, dimm); +- edac_dbg(1, " Original AddrMask: 0x%x\n", addr_mask_orig); +- edac_dbg(1, " Deinterleaved AddrMask: 0x%x\n", addr_mask_deinterleaved); ++ edac_dbg(1, " Primary AddrMask: 0x%x\n", addr_mask); + + /* Register [31:1] = Address [39:9]. Size is in kBs here. */ +- size = (addr_mask_deinterleaved >> 2) + 1; ++ size = calculate_cs_size(addr_mask, cs_mode); ++ ++ edac_dbg(1, " Secondary AddrMask: 0x%x\n", addr_mask_sec); ++ size += calculate_cs_size(addr_mask_sec, cs_mode); + + /* Return size in MBs. */ + return size >> 10; +@@ -1269,8 +1283,8 @@ static int __addr_mask_to_cs_size(u32 ad + static int umc_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc, + unsigned int cs_mode, int csrow_nr) + { ++ u32 addr_mask = 0, addr_mask_sec = 0; + int cs_mask_nr = csrow_nr; +- u32 addr_mask_orig; + int dimm, size = 0; + + /* No Chip Selects are enabled. */ +@@ -1308,13 +1322,13 @@ static int umc_addr_mask_to_cs_size(stru + if (!pvt->flags.zn_regs_v2) + cs_mask_nr >>= 1; + +- /* Asymmetric dual-rank DIMM support. */ +- if ((csrow_nr & 1) && (cs_mode & CS_ODD_SECONDARY)) +- addr_mask_orig = pvt->csels[umc].csmasks_sec[cs_mask_nr]; +- else +- addr_mask_orig = pvt->csels[umc].csmasks[cs_mask_nr]; ++ if (cs_mode & (CS_EVEN_PRIMARY | CS_ODD_PRIMARY)) ++ addr_mask = pvt->csels[umc].csmasks[cs_mask_nr]; ++ ++ if (cs_mode & (CS_EVEN_SECONDARY | CS_ODD_SECONDARY)) ++ addr_mask_sec = pvt->csels[umc].csmasks_sec[cs_mask_nr]; + +- return __addr_mask_to_cs_size(addr_mask_orig, cs_mode, csrow_nr, dimm); ++ return __addr_mask_to_cs_size(addr_mask, addr_mask_sec, cs_mode, csrow_nr, dimm); + } + + static void umc_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl) +@@ -3512,9 +3526,10 @@ static void gpu_get_err_info(struct mce + static int gpu_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc, + unsigned int cs_mode, int csrow_nr) + { +- u32 addr_mask_orig = pvt->csels[umc].csmasks[csrow_nr]; ++ u32 addr_mask = pvt->csels[umc].csmasks[csrow_nr]; ++ u32 addr_mask_sec = pvt->csels[umc].csmasks_sec[csrow_nr]; + +- return __addr_mask_to_cs_size(addr_mask_orig, cs_mode, csrow_nr, csrow_nr >> 1); ++ return __addr_mask_to_cs_size(addr_mask, addr_mask_sec, cs_mode, csrow_nr, csrow_nr >> 1); + } + + static void gpu_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl) diff --git a/queue-6.15/f2fs-fix-to-zero-post-eof-page.patch b/queue-6.15/f2fs-fix-to-zero-post-eof-page.patch new file mode 100644 index 0000000000..6e0d62655d --- /dev/null +++ b/queue-6.15/f2fs-fix-to-zero-post-eof-page.patch @@ -0,0 +1,165 @@ +From ba8dac350faf16afc129ce6303ca4feaf083ccb1 Mon Sep 17 00:00:00 2001 +From: Chao Yu +Date: Thu, 5 Jun 2025 11:26:33 +0800 +Subject: f2fs: fix to zero post-eof page +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Chao Yu + +commit ba8dac350faf16afc129ce6303ca4feaf083ccb1 upstream. + +fstest reports a f2fs bug: + +#generic/363 42s ... [failed, exit status 1]- output mismatch (see /share/git/fstests/results//generic/363.out.bad) +# --- tests/generic/363.out 2025-01-12 21:57:40.271440542 +0800 +# +++ /share/git/fstests/results//generic/363.out.bad 2025-05-19 19:55:58.000000000 +0800 +# @@ -1,2 +1,78 @@ +# QA output created by 363 +# fsx -q -S 0 -e 1 -N 100000 +# +READ BAD DATA: offset = 0xd6fb, size = 0xf044, fname = /mnt/f2fs/junk +# +OFFSET GOOD BAD RANGE +# +0x1540d 0x0000 0x2a25 0x0 +# +operation# (mod 256) for the bad data may be 37 +# +0x1540e 0x0000 0x2527 0x1 +# ... +# (Run 'diff -u /share/git/fstests/tests/generic/363.out /share/git/fstests/results//generic/363.out.bad' to see the entire diff) +Ran: generic/363 +Failures: generic/363 +Failed 1 of 1 tests + +The root cause is user can update post-eof page via mmap [1], however, f2fs +missed to zero post-eof page in below operations, so, once it expands i_size, +then it will include dummy data locates previous post-eof page, so during +below operations, we need to zero post-eof page. + +Operations which can include dummy data after previous i_size after expanding +i_size: +- write +- mapwrite [1] +- truncate +- fallocate + * preallocate + * zero_range + * insert_range + * collapse_range +- clone_range (doesn’t support in f2fs) +- copy_range (doesn’t support in f2fs) + +[1] https://man7.org/linux/man-pages/man2/mmap.2.html 'BUG section' + +Cc: stable@kernel.org +Signed-off-by: Chao Yu +Reviewed-by: Zhiguo Niu +Signed-off-by: Jaegeuk Kim +Signed-off-by: Greg Kroah-Hartman +--- + fs/f2fs/file.c | 38 ++++++++++++++++++++++++++++++++++++++ + 1 file changed, 38 insertions(+) + +--- a/fs/f2fs/file.c ++++ b/fs/f2fs/file.c +@@ -35,6 +35,17 @@ + #include + #include + ++static void f2fs_zero_post_eof_page(struct inode *inode, loff_t new_size) ++{ ++ loff_t old_size = i_size_read(inode); ++ ++ if (old_size >= new_size) ++ return; ++ ++ /* zero or drop pages only in range of [old_size, new_size] */ ++ truncate_pagecache(inode, old_size); ++} ++ + static vm_fault_t f2fs_filemap_fault(struct vm_fault *vmf) + { + struct inode *inode = file_inode(vmf->vma->vm_file); +@@ -103,8 +114,13 @@ static vm_fault_t f2fs_vm_page_mkwrite(s + + f2fs_bug_on(sbi, f2fs_has_inline_data(inode)); + ++ filemap_invalidate_lock(inode->i_mapping); ++ f2fs_zero_post_eof_page(inode, (folio->index + 1) << PAGE_SHIFT); ++ filemap_invalidate_unlock(inode->i_mapping); ++ + file_update_time(vmf->vma->vm_file); + filemap_invalidate_lock_shared(inode->i_mapping); ++ + folio_lock(folio); + if (unlikely(folio->mapping != inode->i_mapping || + folio_pos(folio) > i_size_read(inode) || +@@ -1106,6 +1122,8 @@ int f2fs_setattr(struct mnt_idmap *idmap + f2fs_down_write(&fi->i_gc_rwsem[WRITE]); + filemap_invalidate_lock(inode->i_mapping); + ++ if (attr->ia_size > old_size) ++ f2fs_zero_post_eof_page(inode, attr->ia_size); + truncate_setsize(inode, attr->ia_size); + + if (attr->ia_size <= old_size) +@@ -1224,6 +1242,10 @@ static int f2fs_punch_hole(struct inode + if (ret) + return ret; + ++ filemap_invalidate_lock(inode->i_mapping); ++ f2fs_zero_post_eof_page(inode, offset + len); ++ filemap_invalidate_unlock(inode->i_mapping); ++ + pg_start = ((unsigned long long) offset) >> PAGE_SHIFT; + pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT; + +@@ -1507,6 +1529,8 @@ static int f2fs_do_collapse(struct inode + f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + filemap_invalidate_lock(inode->i_mapping); + ++ f2fs_zero_post_eof_page(inode, offset + len); ++ + f2fs_lock_op(sbi); + f2fs_drop_extent_tree(inode); + truncate_pagecache(inode, offset); +@@ -1628,6 +1652,10 @@ static int f2fs_zero_range(struct inode + if (ret) + return ret; + ++ filemap_invalidate_lock(mapping); ++ f2fs_zero_post_eof_page(inode, offset + len); ++ filemap_invalidate_unlock(mapping); ++ + pg_start = ((unsigned long long) offset) >> PAGE_SHIFT; + pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT; + +@@ -1759,6 +1787,8 @@ static int f2fs_insert_range(struct inod + /* avoid gc operation during block exchange */ + f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + filemap_invalidate_lock(mapping); ++ ++ f2fs_zero_post_eof_page(inode, offset + len); + truncate_pagecache(inode, offset); + + while (!ret && idx > pg_start) { +@@ -1816,6 +1846,10 @@ static int f2fs_expand_inode_data(struct + if (err) + return err; + ++ filemap_invalidate_lock(inode->i_mapping); ++ f2fs_zero_post_eof_page(inode, offset + len); ++ filemap_invalidate_unlock(inode->i_mapping); ++ + f2fs_balance_fs(sbi, true); + + pg_start = ((unsigned long long)offset) >> PAGE_SHIFT; +@@ -4846,6 +4880,10 @@ static ssize_t f2fs_write_checks(struct + err = file_modified(file); + if (err) + return err; ++ ++ filemap_invalidate_lock(inode->i_mapping); ++ f2fs_zero_post_eof_page(inode, iocb->ki_pos + iov_iter_count(from)); ++ filemap_invalidate_unlock(inode->i_mapping); + return count; + } + diff --git a/queue-6.15/hid-appletb-kbd-fix-appletb_backlight-backlight-device-reference-counting.patch b/queue-6.15/hid-appletb-kbd-fix-appletb_backlight-backlight-device-reference-counting.patch new file mode 100644 index 0000000000..3f5b89cf62 --- /dev/null +++ b/queue-6.15/hid-appletb-kbd-fix-appletb_backlight-backlight-device-reference-counting.patch @@ -0,0 +1,50 @@ +From 4540e41e753a7d69ecd3f5bad51fe620205c3a18 Mon Sep 17 00:00:00 2001 +From: Qasim Ijaz +Date: Sun, 15 Jun 2025 23:59:41 +0100 +Subject: HID: appletb-kbd: fix "appletb_backlight" backlight device reference counting + +From: Qasim Ijaz + +commit 4540e41e753a7d69ecd3f5bad51fe620205c3a18 upstream. + +During appletb_kbd_probe, probe attempts to get the backlight device +by name. When this happens backlight_device_get_by_name looks for a +device in the backlight class which has name "appletb_backlight" and +upon finding a match it increments the reference count for the device +and returns it to the caller. However this reference is never released +leading to a reference leak. + +Fix this by decrementing the backlight device reference count on removal +via put_device and on probe failure. + +Fixes: 93a0fc489481 ("HID: hid-appletb-kbd: add support for automatic brightness control while using the touchbar") +Cc: stable@vger.kernel.org +Signed-off-by: Qasim Ijaz +Reviewed-by: Aditya Garg +Signed-off-by: Jiri Kosina +Signed-off-by: Greg Kroah-Hartman +--- + drivers/hid/hid-appletb-kbd.c | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/drivers/hid/hid-appletb-kbd.c ++++ b/drivers/hid/hid-appletb-kbd.c +@@ -435,6 +435,8 @@ static int appletb_kbd_probe(struct hid_ + return 0; + + close_hw: ++ if (kbd->backlight_dev) ++ put_device(&kbd->backlight_dev->dev); + hid_hw_close(hdev); + stop_hw: + hid_hw_stop(hdev); +@@ -450,6 +452,9 @@ static void appletb_kbd_remove(struct hi + input_unregister_handler(&kbd->inp_handler); + timer_delete_sync(&kbd->inactivity_timer); + ++ if (kbd->backlight_dev) ++ put_device(&kbd->backlight_dev->dev); ++ + hid_hw_close(hdev); + hid_hw_stop(hdev); + } diff --git a/queue-6.15/hid-lenovo-restrict-f7-9-11-mode-to-compact-keyboards-only.patch b/queue-6.15/hid-lenovo-restrict-f7-9-11-mode-to-compact-keyboards-only.patch new file mode 100644 index 0000000000..8ec659fe38 --- /dev/null +++ b/queue-6.15/hid-lenovo-restrict-f7-9-11-mode-to-compact-keyboards-only.patch @@ -0,0 +1,49 @@ +From 9327e3ee5b077c4ab4495a09b67624f670ed88b6 Mon Sep 17 00:00:00 2001 +From: Iusico Maxim +Date: Thu, 5 Jun 2025 19:55:50 +0200 +Subject: HID: lenovo: Restrict F7/9/11 mode to compact keyboards only + +From: Iusico Maxim + +commit 9327e3ee5b077c4ab4495a09b67624f670ed88b6 upstream. + +Commit 2f2bd7cbd1d1 ("hid: lenovo: Resend all settings on reset_resume +for compact keyboards") introduced a regression for ThinkPad TrackPoint +Keyboard II by removing the conditional check for enabling F7/9/11 mode +needed for compact keyboards only. As a result, the non-compact +keyboards can no longer toggle Fn-lock via Fn+Esc, although it can be +controlled via sysfs knob that directly sends raw commands. + +This patch restores the previous conditional check without any +additions. + +Cc: stable@vger.kernel.org +Fixes: 2f2bd7cbd1d1 ("hid: lenovo: Resend all settings on reset_resume for compact keyboards") +Signed-off-by: Iusico Maxim +Signed-off-by: Jiri Kosina +Signed-off-by: Greg Kroah-Hartman +--- + drivers/hid/hid-lenovo.c | 11 +++++++---- + 1 file changed, 7 insertions(+), 4 deletions(-) + +--- a/drivers/hid/hid-lenovo.c ++++ b/drivers/hid/hid-lenovo.c +@@ -548,11 +548,14 @@ static void lenovo_features_set_cptkbd(s + + /* + * Tell the keyboard a driver understands it, and turn F7, F9, F11 into +- * regular keys ++ * regular keys (Compact only) + */ +- ret = lenovo_send_cmd_cptkbd(hdev, 0x01, 0x03); +- if (ret) +- hid_warn(hdev, "Failed to switch F7/9/11 mode: %d\n", ret); ++ if (hdev->product == USB_DEVICE_ID_LENOVO_CUSBKBD || ++ hdev->product == USB_DEVICE_ID_LENOVO_CBTKBD) { ++ ret = lenovo_send_cmd_cptkbd(hdev, 0x01, 0x03); ++ if (ret) ++ hid_warn(hdev, "Failed to switch F7/9/11 mode: %d\n", ret); ++ } + + /* Switch middle button to native mode */ + ret = lenovo_send_cmd_cptkbd(hdev, 0x09, 0x01); diff --git a/queue-6.15/hid-wacom-fix-kobject-reference-count-leak.patch b/queue-6.15/hid-wacom-fix-kobject-reference-count-leak.patch new file mode 100644 index 0000000000..d18a5e6b55 --- /dev/null +++ b/queue-6.15/hid-wacom-fix-kobject-reference-count-leak.patch @@ -0,0 +1,37 @@ +From 85a720f4337f0ddf1603c8b75a8f1ffbbe022ef9 Mon Sep 17 00:00:00 2001 +From: Qasim Ijaz +Date: Fri, 6 Jun 2025 19:49:59 +0100 +Subject: HID: wacom: fix kobject reference count leak + +From: Qasim Ijaz + +commit 85a720f4337f0ddf1603c8b75a8f1ffbbe022ef9 upstream. + +When sysfs_create_files() fails in wacom_initialize_remotes() the error +is returned and the cleanup action will not have been registered yet. + +As a result the kobject???s refcount is never dropped, so the +kobject can never be freed leading to a reference leak. + +Fix this by calling kobject_put() before returning. + +Fixes: 83e6b40e2de6 ("HID: wacom: EKR: have the wacom resources dynamically allocated") +Acked-by: Ping Cheng +Cc: stable@vger.kernel.org +Signed-off-by: Qasim Ijaz +Signed-off-by: Jiri Kosina +Signed-off-by: Greg Kroah-Hartman +--- + drivers/hid/wacom_sys.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/hid/wacom_sys.c ++++ b/drivers/hid/wacom_sys.c +@@ -2059,6 +2059,7 @@ static int wacom_initialize_remotes(stru + hid_err(wacom->hdev, + "cannot create sysfs group err: %d\n", error); + kfifo_free(&remote->remote_fifo); ++ kobject_put(remote->remote_dir); + return error; + } + diff --git a/queue-6.15/hid-wacom-fix-memory-leak-on-kobject-creation-failure.patch b/queue-6.15/hid-wacom-fix-memory-leak-on-kobject-creation-failure.patch new file mode 100644 index 0000000000..9e24de4ac0 --- /dev/null +++ b/queue-6.15/hid-wacom-fix-memory-leak-on-kobject-creation-failure.patch @@ -0,0 +1,44 @@ +From 5ae416c5b1e2e816aee7b3fc8347adf70afabb4c Mon Sep 17 00:00:00 2001 +From: Qasim Ijaz +Date: Fri, 6 Jun 2025 19:49:57 +0100 +Subject: HID: wacom: fix memory leak on kobject creation failure + +From: Qasim Ijaz + +commit 5ae416c5b1e2e816aee7b3fc8347adf70afabb4c upstream. + +During wacom_initialize_remotes() a fifo buffer is allocated +with kfifo_alloc() and later a cleanup action is registered +during devm_add_action_or_reset() to clean it up. + +However if the code fails to create a kobject and register it +with sysfs the code simply returns -ENOMEM before the cleanup +action is registered leading to a memory leak. + +Fix this by ensuring the fifo is freed when the kobject creation +and registration process fails. + +Fixes: 83e6b40e2de6 ("HID: wacom: EKR: have the wacom resources dynamically allocated") +Reviewed-by: Ping Cheng +Cc: stable@vger.kernel.org +Signed-off-by: Qasim Ijaz +Signed-off-by: Jiri Kosina +Signed-off-by: Greg Kroah-Hartman +--- + drivers/hid/wacom_sys.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/drivers/hid/wacom_sys.c ++++ b/drivers/hid/wacom_sys.c +@@ -2048,8 +2048,10 @@ static int wacom_initialize_remotes(stru + + remote->remote_dir = kobject_create_and_add("wacom_remote", + &wacom->hdev->dev.kobj); +- if (!remote->remote_dir) ++ if (!remote->remote_dir) { ++ kfifo_free(&remote->remote_fifo); + return -ENOMEM; ++ } + + error = sysfs_create_files(remote->remote_dir, remote_unpair_attrs); + diff --git a/queue-6.15/hid-wacom-fix-memory-leak-on-sysfs-attribute-creation-failure.patch b/queue-6.15/hid-wacom-fix-memory-leak-on-sysfs-attribute-creation-failure.patch new file mode 100644 index 0000000000..31bdfab6ed --- /dev/null +++ b/queue-6.15/hid-wacom-fix-memory-leak-on-sysfs-attribute-creation-failure.patch @@ -0,0 +1,34 @@ +From 1a19ae437ca5d5c7d9ec2678946fb339b1c706bf Mon Sep 17 00:00:00 2001 +From: Qasim Ijaz +Date: Fri, 6 Jun 2025 19:49:58 +0100 +Subject: HID: wacom: fix memory leak on sysfs attribute creation failure + +From: Qasim Ijaz + +commit 1a19ae437ca5d5c7d9ec2678946fb339b1c706bf upstream. + +When sysfs_create_files() fails during wacom_initialize_remotes() the +fifo buffer is not freed leading to a memory leak. + +Fix this by calling kfifo_free() before returning. + +Fixes: 83e6b40e2de6 ("HID: wacom: EKR: have the wacom resources dynamically allocated") +Reviewed-by: Ping Cheng +Cc: stable@vger.kernel.org +Signed-off-by: Qasim Ijaz +Signed-off-by: Jiri Kosina +Signed-off-by: Greg Kroah-Hartman +--- + drivers/hid/wacom_sys.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/hid/wacom_sys.c ++++ b/drivers/hid/wacom_sys.c +@@ -2058,6 +2058,7 @@ static int wacom_initialize_remotes(stru + if (error) { + hid_err(wacom->hdev, + "cannot create sysfs group err: %d\n", error); ++ kfifo_free(&remote->remote_fifo); + return error; + } + diff --git a/queue-6.15/maple_tree-fix-ma_state_prealloc-flag-in-mas_preallocate.patch b/queue-6.15/maple_tree-fix-ma_state_prealloc-flag-in-mas_preallocate.patch new file mode 100644 index 0000000000..cc65833e43 --- /dev/null +++ b/queue-6.15/maple_tree-fix-ma_state_prealloc-flag-in-mas_preallocate.patch @@ -0,0 +1,68 @@ +From fba46a5d83ca8decb338722fb4899026d8d9ead2 Mon Sep 17 00:00:00 2001 +From: "Liam R. Howlett" +Date: Mon, 16 Jun 2025 14:45:20 -0400 +Subject: maple_tree: fix MA_STATE_PREALLOC flag in mas_preallocate() + +From: Liam R. Howlett + +commit fba46a5d83ca8decb338722fb4899026d8d9ead2 upstream. + +Temporarily clear the preallocation flag when explicitly requesting +allocations. Pre-existing allocations are already counted against the +request through mas_node_count_gfp(), but the allocations will not happen +if the MA_STATE_PREALLOC flag is set. This flag is meant to avoid +re-allocating in bulk allocation mode, and to detect issues with +preallocation calculations. + +The MA_STATE_PREALLOC flag should also always be set on zero allocations +so that detection of underflow allocations will print a WARN_ON() during +consumption. + +User visible effect of this flaw is a WARN_ON() followed by a null pointer +dereference when subsequent requests for larger number of nodes is +ignored, such as the vma merge retry in mmap_region() caused by drivers +altering the vma flags (which happens in v6.6, at least) + +Link: https://lkml.kernel.org/r/20250616184521.3382795-3-Liam.Howlett@oracle.com +Fixes: 54a611b60590 ("Maple Tree: add new data structure") +Signed-off-by: Liam R. Howlett +Reported-by: Zhaoyang Huang +Reported-by: Hailong Liu +Link: https://lore.kernel.org/all/1652f7eb-a51b-4fee-8058-c73af63bacd1@oppo.com/ +Link: https://lore.kernel.org/all/20250428184058.1416274-1-Liam.Howlett@oracle.com/ +Link: https://lore.kernel.org/all/20250429014754.1479118-1-Liam.Howlett@oracle.com/ +Cc: Lorenzo Stoakes +Cc: Suren Baghdasaryan +Cc: Hailong Liu +Cc: zhangpeng.00@bytedance.com +Cc: Steve Kang +Cc: Matthew Wilcox +Cc: Sidhartha Kumar +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + lib/maple_tree.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/lib/maple_tree.c ++++ b/lib/maple_tree.c +@@ -5496,8 +5496,9 @@ int mas_preallocate(struct ma_state *mas + mas->store_type = mas_wr_store_type(&wr_mas); + request = mas_prealloc_calc(mas, entry); + if (!request) +- return ret; ++ goto set_flag; + ++ mas->mas_flags &= ~MA_STATE_PREALLOC; + mas_node_count_gfp(mas, request, gfp); + if (mas_is_err(mas)) { + mas_set_alloc_req(mas, 0); +@@ -5507,6 +5508,7 @@ int mas_preallocate(struct ma_state *mas + return ret; + } + ++set_flag: + mas->mas_flags |= MA_STATE_PREALLOC; + return ret; + } diff --git a/queue-6.15/mm-gup-revert-mm-gup-fix-infinite-loop-within-__get_longterm_locked.patch b/queue-6.15/mm-gup-revert-mm-gup-fix-infinite-loop-within-__get_longterm_locked.patch new file mode 100644 index 0000000000..e53aabdd8c --- /dev/null +++ b/queue-6.15/mm-gup-revert-mm-gup-fix-infinite-loop-within-__get_longterm_locked.patch @@ -0,0 +1,104 @@ +From 517f496e1e61bd169d585dab4dd77e7147506322 Mon Sep 17 00:00:00 2001 +From: David Hildenbrand +Date: Wed, 11 Jun 2025 15:13:14 +0200 +Subject: mm/gup: revert "mm: gup: fix infinite loop within __get_longterm_locked" + +From: David Hildenbrand + +commit 517f496e1e61bd169d585dab4dd77e7147506322 upstream. + +After commit 1aaf8c122918 ("mm: gup: fix infinite loop within +__get_longterm_locked") we are able to longterm pin folios that are not +supposed to get longterm pinned, simply because they temporarily have the +LRU flag cleared (esp. temporarily isolated). + +For example, two __get_longterm_locked() callers can race, or +__get_longterm_locked() can race with anything else that temporarily +isolates folios. + +The introducing commit mentions the use case of a driver that uses +vm_ops->fault to insert pages allocated through cma_alloc() into the page +tables, assuming they can later get longterm pinned. These pages/ folios +would never have the LRU flag set and consequently cannot get isolated. +There is no known in-tree user making use of that so far, fortunately. + +To handle that in the future -- and avoid retrying forever to +isolate/migrate them -- we will need a different mechanism for the CMA +area *owner* to indicate that it actually already allocated the page and +is fine with longterm pinning it. The LRU flag is not suitable for that. + +Probably we can lookup the relevant CMA area and query the bitmap; we only +have have to care about some races, probably. If already allocated, we +could just allow longterm pinning) + +Anyhow, let's fix the "must not be longterm pinned" problem first by +reverting the original commit. + +Link: https://lkml.kernel.org/r/20250611131314.594529-1-david@redhat.com +Fixes: 1aaf8c122918 ("mm: gup: fix infinite loop within __get_longterm_locked") +Signed-off-by: David Hildenbrand +Closes: https://lore.kernel.org/all/20250522092755.GA3277597@tiffany/ +Reported-by: Hyesoo Yu +Reviewed-by: John Hubbard +Cc: Jason Gunthorpe +Cc: Peter Xu +Cc: Zhaoyang Huang +Cc: Aijun Sun +Cc: Alistair Popple +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/gup.c | 14 ++++++++++---- + 1 file changed, 10 insertions(+), 4 deletions(-) + +--- a/mm/gup.c ++++ b/mm/gup.c +@@ -2320,13 +2320,13 @@ static void pofs_unpin(struct pages_or_f + /* + * Returns the number of collected folios. Return value is always >= 0. + */ +-static void collect_longterm_unpinnable_folios( ++static unsigned long collect_longterm_unpinnable_folios( + struct list_head *movable_folio_list, + struct pages_or_folios *pofs) + { ++ unsigned long i, collected = 0; + struct folio *prev_folio = NULL; + bool drain_allow = true; +- unsigned long i; + + for (i = 0; i < pofs->nr_entries; i++) { + struct folio *folio = pofs_get_folio(pofs, i); +@@ -2338,6 +2338,8 @@ static void collect_longterm_unpinnable_ + if (folio_is_longterm_pinnable(folio)) + continue; + ++ collected++; ++ + if (folio_is_device_coherent(folio)) + continue; + +@@ -2359,6 +2361,8 @@ static void collect_longterm_unpinnable_ + NR_ISOLATED_ANON + folio_is_file_lru(folio), + folio_nr_pages(folio)); + } ++ ++ return collected; + } + + /* +@@ -2435,9 +2439,11 @@ static long + check_and_migrate_movable_pages_or_folios(struct pages_or_folios *pofs) + { + LIST_HEAD(movable_folio_list); ++ unsigned long collected; + +- collect_longterm_unpinnable_folios(&movable_folio_list, pofs); +- if (list_empty(&movable_folio_list)) ++ collected = collect_longterm_unpinnable_folios(&movable_folio_list, ++ pofs); ++ if (!collected) + return 0; + + return migrate_longterm_unpinnable_folios(&movable_folio_list, pofs); diff --git a/queue-6.15/mm-shmem-swap-fix-softlockup-with-mthp-swapin.patch b/queue-6.15/mm-shmem-swap-fix-softlockup-with-mthp-swapin.patch new file mode 100644 index 0000000000..a98921c19b --- /dev/null +++ b/queue-6.15/mm-shmem-swap-fix-softlockup-with-mthp-swapin.patch @@ -0,0 +1,195 @@ +From a05dd8ae5cbb1cb45f349922cfea4f548a5e5d6f Mon Sep 17 00:00:00 2001 +From: Kairui Song +Date: Tue, 10 Jun 2025 01:17:51 +0800 +Subject: mm/shmem, swap: fix softlockup with mTHP swapin +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Kairui Song + +commit a05dd8ae5cbb1cb45f349922cfea4f548a5e5d6f upstream. + +Following softlockup can be easily reproduced on my test machine with: + +echo always > /sys/kernel/mm/transparent_hugepage/hugepages-64kB/enabled +swapon /dev/zram0 # zram0 is a 48G swap device +mkdir -p /sys/fs/cgroup/memory/test +echo 1G > /sys/fs/cgroup/test/memory.max +echo $BASHPID > /sys/fs/cgroup/test/cgroup.procs +while true; do + dd if=/dev/zero of=/tmp/test.img bs=1M count=5120 + cat /tmp/test.img > /dev/null + rm /tmp/test.img +done + +Then after a while: +watchdog: BUG: soft lockup - CPU#0 stuck for 763s! [cat:5787] +Modules linked in: zram virtiofs +CPU: 0 UID: 0 PID: 5787 Comm: cat Kdump: loaded Tainted: G L 6.15.0.orig-gf3021d9246bc-dirty #118 PREEMPT(voluntary)· +Tainted: [L]=SOFTLOCKUP +Hardware name: Red Hat KVM/RHEL-AV, BIOS 0.0.0 02/06/2015 +RIP: 0010:mpol_shared_policy_lookup+0xd/0x70 +Code: e9 b8 b4 ff ff 31 c0 c3 cc cc cc cc 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 66 0f 1f 00 0f 1f 44 00 00 41 54 55 53 <48> 8b 1f 48 85 db 74 41 4c 8d 67 08 48 89 fb 48 89 f5 4c 89 e7 e8 +RSP: 0018:ffffc90002b1fc28 EFLAGS: 00000202 +RAX: 00000000001c20ca RBX: 0000000000724e1e RCX: 0000000000000001 +RDX: ffff888118e214c8 RSI: 0000000000057d42 RDI: ffff888118e21518 +RBP: 000000000002bec8 R08: 0000000000000001 R09: 0000000000000000 +R10: 0000000000000bf4 R11: 0000000000000000 R12: 0000000000000001 +R13: 00000000001c20ca R14: 00000000001c20ca R15: 0000000000000000 +FS: 00007f03f995c740(0000) GS:ffff88a07ad9a000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 00007f03f98f1000 CR3: 0000000144626004 CR4: 0000000000770eb0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +PKRU: 55555554 +Call Trace: + + shmem_alloc_folio+0x31/0xc0 + shmem_swapin_folio+0x309/0xcf0 + ? filemap_get_entry+0x117/0x1e0 + ? xas_load+0xd/0xb0 + ? filemap_get_entry+0x101/0x1e0 + shmem_get_folio_gfp+0x2ed/0x5b0 + shmem_file_read_iter+0x7f/0x2e0 + vfs_read+0x252/0x330 + ksys_read+0x68/0xf0 + do_syscall_64+0x4c/0x1c0 + entry_SYSCALL_64_after_hwframe+0x76/0x7e +RIP: 0033:0x7f03f9a46991 +Code: 00 48 8b 15 81 14 10 00 f7 d8 64 89 02 b8 ff ff ff ff eb bd e8 20 ad 01 00 f3 0f 1e fa 80 3d 35 97 10 00 00 74 13 31 c0 0f 05 <48> 3d 00 f0 ff ff 77 4f c3 66 0f 1f 44 00 00 55 48 89 e5 48 83 ec +RSP: 002b:00007fff3c52bd28 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 +RAX: ffffffffffffffda RBX: 0000000000040000 RCX: 00007f03f9a46991 +RDX: 0000000000040000 RSI: 00007f03f98ba000 RDI: 0000000000000003 +RBP: 00007fff3c52bd50 R08: 0000000000000000 R09: 00007f03f9b9a380 +R10: 0000000000000022 R11: 0000000000000246 R12: 0000000000040000 +R13: 00007f03f98ba000 R14: 0000000000000003 R15: 0000000000000000 + + +The reason is simple, readahead brought some order 0 folio in swap cache, +and the swapin mTHP folio being allocated is in conflict with it, so +swapcache_prepare fails and causes shmem_swap_alloc_folio to return +-EEXIST, and shmem simply retries again and again causing this loop. + +Fix it by applying a similar fix for anon mTHP swapin. + +The performance change is very slight, time of swapin 10g zero folios +with shmem (test for 12 times): +Before: 2.47s +After: 2.48s + +[kasong@tencent.com: add comment] + Link: https://lkml.kernel.org/r/20250610181645.45922-1-ryncsn@gmail.com +Link: https://lkml.kernel.org/r/20250610181645.45922-1-ryncsn@gmail.com +Link: https://lkml.kernel.org/r/20250609171751.36305-1-ryncsn@gmail.com +Fixes: 1dd44c0af4fa ("mm: shmem: skip swapcache for swapin of synchronous swap device") +Signed-off-by: Kairui Song +Reviewed-by: Barry Song +Acked-by: Nhat Pham +Reviewed-by: Baolin Wang +Cc: Baoquan He +Cc: Chris Li +Cc: Hugh Dickins +Cc: Kemeng Shi +Cc: Usama Arif +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/memory.c | 20 -------------------- + mm/shmem.c | 6 +++++- + mm/swap.h | 23 +++++++++++++++++++++++ + 3 files changed, 28 insertions(+), 21 deletions(-) + +--- a/mm/memory.c ++++ b/mm/memory.c +@@ -4224,26 +4224,6 @@ static struct folio *__alloc_swap_folio( + } + + #ifdef CONFIG_TRANSPARENT_HUGEPAGE +-static inline int non_swapcache_batch(swp_entry_t entry, int max_nr) +-{ +- struct swap_info_struct *si = swp_swap_info(entry); +- pgoff_t offset = swp_offset(entry); +- int i; +- +- /* +- * While allocating a large folio and doing swap_read_folio, which is +- * the case the being faulted pte doesn't have swapcache. We need to +- * ensure all PTEs have no cache as well, otherwise, we might go to +- * swap devices while the content is in swapcache. +- */ +- for (i = 0; i < max_nr; i++) { +- if ((si->swap_map[offset + i] & SWAP_HAS_CACHE)) +- return i; +- } +- +- return i; +-} +- + /* + * Check if the PTEs within a range are contiguous swap entries + * and have consistent swapcache, zeromap. +--- a/mm/shmem.c ++++ b/mm/shmem.c +@@ -2262,6 +2262,7 @@ static int shmem_swapin_folio(struct ino + folio = swap_cache_get_folio(swap, NULL, 0); + order = xa_get_order(&mapping->i_pages, index); + if (!folio) { ++ int nr_pages = 1 << order; + bool fallback_order0 = false; + + /* Or update major stats only when swapin succeeds?? */ +@@ -2275,9 +2276,12 @@ static int shmem_swapin_folio(struct ino + * If uffd is active for the vma, we need per-page fault + * fidelity to maintain the uffd semantics, then fallback + * to swapin order-0 folio, as well as for zswap case. ++ * Any existing sub folio in the swap cache also blocks ++ * mTHP swapin. + */ + if (order > 0 && ((vma && unlikely(userfaultfd_armed(vma))) || +- !zswap_never_enabled())) ++ !zswap_never_enabled() || ++ non_swapcache_batch(swap, nr_pages) != nr_pages)) + fallback_order0 = true; + + /* Skip swapcache for synchronous device. */ +--- a/mm/swap.h ++++ b/mm/swap.h +@@ -106,6 +106,25 @@ static inline int swap_zeromap_batch(swp + return find_next_bit(sis->zeromap, end, start) - start; + } + ++static inline int non_swapcache_batch(swp_entry_t entry, int max_nr) ++{ ++ struct swap_info_struct *si = swp_swap_info(entry); ++ pgoff_t offset = swp_offset(entry); ++ int i; ++ ++ /* ++ * While allocating a large folio and doing mTHP swapin, we need to ++ * ensure all entries are not cached, otherwise, the mTHP folio will ++ * be in conflict with the folio in swap cache. ++ */ ++ for (i = 0; i < max_nr; i++) { ++ if ((si->swap_map[offset + i] & SWAP_HAS_CACHE)) ++ return i; ++ } ++ ++ return i; ++} ++ + #else /* CONFIG_SWAP */ + struct swap_iocb; + static inline void swap_read_folio(struct folio *folio, struct swap_iocb **plug) +@@ -199,6 +218,10 @@ static inline int swap_zeromap_batch(swp + return 0; + } + ++static inline int non_swapcache_batch(swp_entry_t entry, int max_nr) ++{ ++ return 0; ++} + #endif /* CONFIG_SWAP */ + + #endif /* _MM_SWAP_H */ diff --git a/queue-6.15/mm-userfaultfd-fix-race-of-userfaultfd_move-and-swap-cache.patch b/queue-6.15/mm-userfaultfd-fix-race-of-userfaultfd_move-and-swap-cache.patch new file mode 100644 index 0000000000..69c8847598 --- /dev/null +++ b/queue-6.15/mm-userfaultfd-fix-race-of-userfaultfd_move-and-swap-cache.patch @@ -0,0 +1,196 @@ +From 0ea148a799198518d8ebab63ddd0bb6114a103bc Mon Sep 17 00:00:00 2001 +From: Kairui Song +Date: Wed, 4 Jun 2025 23:10:38 +0800 +Subject: mm: userfaultfd: fix race of userfaultfd_move and swap cache + +From: Kairui Song + +commit 0ea148a799198518d8ebab63ddd0bb6114a103bc upstream. + +This commit fixes two kinds of races, they may have different results: + +Barry reported a BUG_ON in commit c50f8e6053b0, we may see the same +BUG_ON if the filemap lookup returned NULL and folio is added to swap +cache after that. + +If another kind of race is triggered (folio changed after lookup) we +may see RSS counter is corrupted: + +[ 406.893936] BUG: Bad rss-counter state mm:ffff0000c5a9ddc0 +type:MM_ANONPAGES val:-1 +[ 406.894071] BUG: Bad rss-counter state mm:ffff0000c5a9ddc0 +type:MM_SHMEMPAGES val:1 + +Because the folio is being accounted to the wrong VMA. + +I'm not sure if there will be any data corruption though, seems no. +The issues above are critical already. + + +On seeing a swap entry PTE, userfaultfd_move does a lockless swap cache +lookup, and tries to move the found folio to the faulting vma. Currently, +it relies on checking the PTE value to ensure that the moved folio still +belongs to the src swap entry and that no new folio has been added to the +swap cache, which turns out to be unreliable. + +While working and reviewing the swap table series with Barry, following +existing races are observed and reproduced [1]: + +In the example below, move_pages_pte is moving src_pte to dst_pte, where +src_pte is a swap entry PTE holding swap entry S1, and S1 is not in the +swap cache: + +CPU1 CPU2 +userfaultfd_move + move_pages_pte() + entry = pte_to_swp_entry(orig_src_pte); + // Here it got entry = S1 + ... < interrupted> ... + + // folio A is a new allocated folio + // and get installed into src_pte + + // src_pte now points to folio A, S1 + // has swap count == 0, it can be freed + // by folio_swap_swap or swap + // allocator's reclaim. + + // folio B is a folio in another VMA. + + // S1 is freed, folio B can use it + // for swap out with no problem. + ... + folio = filemap_get_folio(S1) + // Got folio B here !!! + ... < interrupted again> ... + + // Now S1 is free to be used again. + + // Now src_pte is a swap entry PTE + // holding S1 again. + folio_trylock(folio) + move_swap_pte + double_pt_lock + is_pte_pages_stable + // Check passed because src_pte == S1 + folio_move_anon_rmap(...) + // Moved invalid folio B here !!! + +The race window is very short and requires multiple collisions of multiple +rare events, so it's very unlikely to happen, but with a deliberately +constructed reproducer and increased time window, it can be reproduced +easily. + +This can be fixed by checking if the folio returned by filemap is the +valid swap cache folio after acquiring the folio lock. + +Another similar race is possible: filemap_get_folio may return NULL, but +folio (A) could be swapped in and then swapped out again using the same +swap entry after the lookup. In such a case, folio (A) may remain in the +swap cache, so it must be moved too: + +CPU1 CPU2 +userfaultfd_move + move_pages_pte() + entry = pte_to_swp_entry(orig_src_pte); + // Here it got entry = S1, and S1 is not in swap cache + folio = filemap_get_folio(S1) + // Got NULL + ... < interrupted again> ... + + + move_swap_pte + double_pt_lock + is_pte_pages_stable + // Check passed because src_pte == S1 + folio_move_anon_rmap(...) + // folio A is ignored !!! + +Fix this by checking the swap cache again after acquiring the src_pte +lock. And to avoid the filemap overhead, we check swap_map directly [2]. + +The SWP_SYNCHRONOUS_IO path does make the problem more complex, but so far +we don't need to worry about that, since folios can only be exposed to the +swap cache in the swap out path, and this is covered in this patch by +checking the swap cache again after acquiring the src_pte lock. + +Testing with a simple C program that allocates and moves several GB of +memory did not show any observable performance change. + +Link: https://lkml.kernel.org/r/20250604151038.21968-1-ryncsn@gmail.com +Fixes: adef440691ba ("userfaultfd: UFFDIO_MOVE uABI") +Signed-off-by: Kairui Song +Closes: https://lore.kernel.org/linux-mm/CAMgjq7B1K=6OOrK2OUZ0-tqCzi+EJt+2_K97TPGoSt=9+JwP7Q@mail.gmail.com/ [1] +Link: https://lore.kernel.org/all/CAGsJ_4yJhJBo16XhiC-nUzSheyX-V3-nFE+tAi=8Y560K8eT=A@mail.gmail.com/ [2] +Reviewed-by: Lokesh Gidra +Acked-by: Peter Xu +Reviewed-by: Suren Baghdasaryan +Reviewed-by: Barry Song +Reviewed-by: Chris Li +Cc: Andrea Arcangeli +Cc: David Hildenbrand +Cc: Kairui Song +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/userfaultfd.c | 33 +++++++++++++++++++++++++++++++-- + 1 file changed, 31 insertions(+), 2 deletions(-) + +--- a/mm/userfaultfd.c ++++ b/mm/userfaultfd.c +@@ -1084,8 +1084,18 @@ static int move_swap_pte(struct mm_struc + pte_t orig_dst_pte, pte_t orig_src_pte, + pmd_t *dst_pmd, pmd_t dst_pmdval, + spinlock_t *dst_ptl, spinlock_t *src_ptl, +- struct folio *src_folio) ++ struct folio *src_folio, ++ struct swap_info_struct *si, swp_entry_t entry) + { ++ /* ++ * Check if the folio still belongs to the target swap entry after ++ * acquiring the lock. Folio can be freed in the swap cache while ++ * not locked. ++ */ ++ if (src_folio && unlikely(!folio_test_swapcache(src_folio) || ++ entry.val != src_folio->swap.val)) ++ return -EAGAIN; ++ + double_pt_lock(dst_ptl, src_ptl); + + if (!is_pte_pages_stable(dst_pte, src_pte, orig_dst_pte, orig_src_pte, +@@ -1102,6 +1112,25 @@ static int move_swap_pte(struct mm_struc + if (src_folio) { + folio_move_anon_rmap(src_folio, dst_vma); + src_folio->index = linear_page_index(dst_vma, dst_addr); ++ } else { ++ /* ++ * Check if the swap entry is cached after acquiring the src_pte ++ * lock. Otherwise, we might miss a newly loaded swap cache folio. ++ * ++ * Check swap_map directly to minimize overhead, READ_ONCE is sufficient. ++ * We are trying to catch newly added swap cache, the only possible case is ++ * when a folio is swapped in and out again staying in swap cache, using the ++ * same entry before the PTE check above. The PTL is acquired and released ++ * twice, each time after updating the swap_map's flag. So holding ++ * the PTL here ensures we see the updated value. False positive is possible, ++ * e.g. SWP_SYNCHRONOUS_IO swapin may set the flag without touching the ++ * cache, or during the tiny synchronization window between swap cache and ++ * swap_map, but it will be gone very quickly, worst result is retry jitters. ++ */ ++ if (READ_ONCE(si->swap_map[swp_offset(entry)]) & SWAP_HAS_CACHE) { ++ double_pt_unlock(dst_ptl, src_ptl); ++ return -EAGAIN; ++ } + } + + orig_src_pte = ptep_get_and_clear(mm, src_addr, src_pte); +@@ -1412,7 +1441,7 @@ retry: + } + err = move_swap_pte(mm, dst_vma, dst_addr, src_addr, dst_pte, src_pte, + orig_dst_pte, orig_src_pte, dst_pmd, dst_pmdval, +- dst_ptl, src_ptl, src_folio); ++ dst_ptl, src_ptl, src_folio, si, entry); + } + + out: diff --git a/queue-6.15/net-libwx-fix-the-creation-of-page_pool.patch b/queue-6.15/net-libwx-fix-the-creation-of-page_pool.patch new file mode 100644 index 0000000000..e0ff58f204 --- /dev/null +++ b/queue-6.15/net-libwx-fix-the-creation-of-page_pool.patch @@ -0,0 +1,41 @@ +From 85720e04d9af0b77f8092b12a06661a8d459d4a0 Mon Sep 17 00:00:00 2001 +From: Jiawen Wu +Date: Wed, 25 Jun 2025 10:39:24 +0800 +Subject: net: libwx: fix the creation of page_pool + +From: Jiawen Wu + +commit 85720e04d9af0b77f8092b12a06661a8d459d4a0 upstream. + +'rx_ring->size' means the count of ring descriptors multiplied by the +size of one descriptor. When increasing the count of ring descriptors, +it may exceed the limit of pool size. + +[ 864.209610] page_pool_create_percpu() gave up with errno -7 +[ 864.209613] txgbe 0000:11:00.0: Page pool creation failed: -7 + +Fix to set the pool_size to the count of ring descriptors. + +Fixes: 850b971110b2 ("net: libwx: Allocate Rx and Tx resources") +Cc: stable@vger.kernel.org +Signed-off-by: Jiawen Wu +Reviewed-by: Simon Horman +Reviewed-by: Mina Almasry +Link: https://patch.msgid.link/434C72BFB40E350A+20250625023924.21821-1-jiawenwu@trustnetic.com +Signed-off-by: Paolo Abeni +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/wangxun/libwx/wx_lib.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/wangxun/libwx/wx_lib.c ++++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.c +@@ -2496,7 +2496,7 @@ static int wx_alloc_page_pool(struct wx_ + struct page_pool_params pp_params = { + .flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV, + .order = 0, +- .pool_size = rx_ring->size, ++ .pool_size = rx_ring->count, + .nid = dev_to_node(rx_ring->dev), + .dev = rx_ring->dev, + .dma_dir = DMA_FROM_DEVICE, diff --git a/queue-6.15/revert-bcache-remove-heap-related-macros-and-switch-to-generic-min_heap.patch b/queue-6.15/revert-bcache-remove-heap-related-macros-and-switch-to-generic-min_heap.patch new file mode 100644 index 0000000000..71f93e1a97 --- /dev/null +++ b/queue-6.15/revert-bcache-remove-heap-related-macros-and-switch-to-generic-min_heap.patch @@ -0,0 +1,1088 @@ +From 48fd7ebe00c1cdc782b42576548b25185902f64c Mon Sep 17 00:00:00 2001 +From: Kuan-Wei Chiu +Date: Sun, 15 Jun 2025 04:23:52 +0800 +Subject: Revert "bcache: remove heap-related macros and switch to generic min_heap" + +From: Kuan-Wei Chiu + +commit 48fd7ebe00c1cdc782b42576548b25185902f64c upstream. + +This reverts commit 866898efbb25bb44fd42848318e46db9e785973a. + +The generic bottom-up min_heap implementation causes performance +regression in invalidate_buckets_lru(), a hot path in bcache. Before the +cache is fully populated, new_bucket_prio() often returns zero, leading to +many equal comparisons. In such cases, bottom-up sift_down performs up to +2 * log2(n) comparisons, while the original top-down approach completes +with just O() comparisons, resulting in a measurable performance gap. + +The performance degradation is further worsened by the non-inlined +min_heap API functions introduced in commit 92a8b224b833 ("lib/min_heap: +introduce non-inline versions of min heap API functions"), adding function +call overhead to this critical path. + +As reported by Robert, bcache now suffers from latency spikes, with P100 +(max) latency increasing from 600 ms to 2.4 seconds every 5 minutes. +These regressions degrade bcache's effectiveness as a low-latency cache +layer and lead to frequent timeouts and application stalls in production +environments. + +This revert aims to restore bcache's original low-latency behavior. + +Link: https://lore.kernel.org/lkml/CAJhEC05+0S69z+3+FB2Cd0hD+pCRyWTKLEOsc8BOmH73p1m+KQ@mail.gmail.com +Link: https://lkml.kernel.org/r/20250614202353.1632957-3-visitorckw@gmail.com +Fixes: 866898efbb25 ("bcache: remove heap-related macros and switch to generic min_heap") +Fixes: 92a8b224b833 ("lib/min_heap: introduce non-inline versions of min heap API functions") +Signed-off-by: Kuan-Wei Chiu +Reported-by: Robert Pang +Closes: https://lore.kernel.org/linux-bcache/CAJhEC06F_AtrPgw2-7CvCqZgeStgCtitbD-ryuPpXQA-JG5XXw@mail.gmail.com +Acked-by: Coly Li +Cc: Ching-Chun (Jim) Huang +Cc: Kent Overstreet +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + drivers/md/bcache/alloc.c | 64 +++++---------------- + drivers/md/bcache/bcache.h | 2 + drivers/md/bcache/bset.c | 124 +++++++++++++++--------------------------- + drivers/md/bcache/bset.h | 42 ++++++++------ + drivers/md/bcache/btree.c | 69 +++++++++-------------- + drivers/md/bcache/extents.c | 51 ++++++----------- + drivers/md/bcache/movinggc.c | 41 +++---------- + drivers/md/bcache/super.c | 3 - + drivers/md/bcache/sysfs.c | 4 - + drivers/md/bcache/util.h | 67 ++++++++++++++++++++++ + drivers/md/bcache/writeback.c | 13 +--- + 11 files changed, 217 insertions(+), 263 deletions(-) + +--- a/drivers/md/bcache/alloc.c ++++ b/drivers/md/bcache/alloc.c +@@ -164,68 +164,40 @@ static void bch_invalidate_one_bucket(st + * prio is worth 1/8th of what INITIAL_PRIO is worth. + */ + +-static inline unsigned int new_bucket_prio(struct cache *ca, struct bucket *b) +-{ +- unsigned int min_prio = (INITIAL_PRIO - ca->set->min_prio) / 8; +- +- return (b->prio - ca->set->min_prio + min_prio) * GC_SECTORS_USED(b); +-} +- +-static inline bool new_bucket_max_cmp(const void *l, const void *r, void *args) +-{ +- struct bucket **lhs = (struct bucket **)l; +- struct bucket **rhs = (struct bucket **)r; +- struct cache *ca = args; +- +- return new_bucket_prio(ca, *lhs) > new_bucket_prio(ca, *rhs); +-} +- +-static inline bool new_bucket_min_cmp(const void *l, const void *r, void *args) +-{ +- struct bucket **lhs = (struct bucket **)l; +- struct bucket **rhs = (struct bucket **)r; +- struct cache *ca = args; +- +- return new_bucket_prio(ca, *lhs) < new_bucket_prio(ca, *rhs); +-} +- +-static inline void new_bucket_swap(void *l, void *r, void __always_unused *args) +-{ +- struct bucket **lhs = l, **rhs = r; ++#define bucket_prio(b) \ ++({ \ ++ unsigned int min_prio = (INITIAL_PRIO - ca->set->min_prio) / 8; \ ++ \ ++ (b->prio - ca->set->min_prio + min_prio) * GC_SECTORS_USED(b); \ ++}) + +- swap(*lhs, *rhs); +-} ++#define bucket_max_cmp(l, r) (bucket_prio(l) < bucket_prio(r)) ++#define bucket_min_cmp(l, r) (bucket_prio(l) > bucket_prio(r)) + + static void invalidate_buckets_lru(struct cache *ca) + { + struct bucket *b; +- const struct min_heap_callbacks bucket_max_cmp_callback = { +- .less = new_bucket_max_cmp, +- .swp = new_bucket_swap, +- }; +- const struct min_heap_callbacks bucket_min_cmp_callback = { +- .less = new_bucket_min_cmp, +- .swp = new_bucket_swap, +- }; ++ ssize_t i; + +- ca->heap.nr = 0; ++ ca->heap.used = 0; + + for_each_bucket(b, ca) { + if (!bch_can_invalidate_bucket(ca, b)) + continue; + +- if (!min_heap_full(&ca->heap)) +- min_heap_push(&ca->heap, &b, &bucket_max_cmp_callback, ca); +- else if (!new_bucket_max_cmp(&b, min_heap_peek(&ca->heap), ca)) { ++ if (!heap_full(&ca->heap)) ++ heap_add(&ca->heap, b, bucket_max_cmp); ++ else if (bucket_max_cmp(b, heap_peek(&ca->heap))) { + ca->heap.data[0] = b; +- min_heap_sift_down(&ca->heap, 0, &bucket_max_cmp_callback, ca); ++ heap_sift(&ca->heap, 0, bucket_max_cmp); + } + } + +- min_heapify_all(&ca->heap, &bucket_min_cmp_callback, ca); ++ for (i = ca->heap.used / 2 - 1; i >= 0; --i) ++ heap_sift(&ca->heap, i, bucket_min_cmp); + + while (!fifo_full(&ca->free_inc)) { +- if (!ca->heap.nr) { ++ if (!heap_pop(&ca->heap, b, bucket_min_cmp)) { + /* + * We don't want to be calling invalidate_buckets() + * multiple times when it can't do anything +@@ -234,8 +206,6 @@ static void invalidate_buckets_lru(struc + wake_up_gc(ca->set); + return; + } +- b = min_heap_peek(&ca->heap)[0]; +- min_heap_pop(&ca->heap, &bucket_min_cmp_callback, ca); + + bch_invalidate_one_bucket(ca, b); + } +--- a/drivers/md/bcache/bcache.h ++++ b/drivers/md/bcache/bcache.h +@@ -458,7 +458,7 @@ struct cache { + /* Allocation stuff: */ + struct bucket *buckets; + +- DEFINE_MIN_HEAP(struct bucket *, cache_heap) heap; ++ DECLARE_HEAP(struct bucket *, heap); + + /* + * If nonzero, we know we aren't going to find any buckets to invalidate +--- a/drivers/md/bcache/bset.c ++++ b/drivers/md/bcache/bset.c +@@ -54,11 +54,9 @@ void bch_dump_bucket(struct btree_keys * + int __bch_count_data(struct btree_keys *b) + { + unsigned int ret = 0; +- struct btree_iter iter; ++ struct btree_iter_stack iter; + struct bkey *k; + +- min_heap_init(&iter.heap, NULL, MAX_BSETS); +- + if (b->ops->is_extents) + for_each_key(b, k, &iter) + ret += KEY_SIZE(k); +@@ -69,11 +67,9 @@ void __bch_check_keys(struct btree_keys + { + va_list args; + struct bkey *k, *p = NULL; +- struct btree_iter iter; ++ struct btree_iter_stack iter; + const char *err; + +- min_heap_init(&iter.heap, NULL, MAX_BSETS); +- + for_each_key(b, k, &iter) { + if (b->ops->is_extents) { + err = "Keys out of order"; +@@ -114,9 +110,9 @@ bug: + + static void bch_btree_iter_next_check(struct btree_iter *iter) + { +- struct bkey *k = iter->heap.data->k, *next = bkey_next(k); ++ struct bkey *k = iter->data->k, *next = bkey_next(k); + +- if (next < iter->heap.data->end && ++ if (next < iter->data->end && + bkey_cmp(k, iter->b->ops->is_extents ? + &START_KEY(next) : next) > 0) { + bch_dump_bucket(iter->b); +@@ -883,14 +879,12 @@ unsigned int bch_btree_insert_key(struct + unsigned int status = BTREE_INSERT_STATUS_NO_INSERT; + struct bset *i = bset_tree_last(b)->data; + struct bkey *m, *prev = NULL; +- struct btree_iter iter; ++ struct btree_iter_stack iter; + struct bkey preceding_key_on_stack = ZERO_KEY; + struct bkey *preceding_key_p = &preceding_key_on_stack; + + BUG_ON(b->ops->is_extents && !KEY_SIZE(k)); + +- min_heap_init(&iter.heap, NULL, MAX_BSETS); +- + /* + * If k has preceding key, preceding_key_p will be set to address + * of k's preceding key; otherwise preceding_key_p will be set +@@ -901,9 +895,9 @@ unsigned int bch_btree_insert_key(struct + else + preceding_key(k, &preceding_key_p); + +- m = bch_btree_iter_init(b, &iter, preceding_key_p); ++ m = bch_btree_iter_stack_init(b, &iter, preceding_key_p); + +- if (b->ops->insert_fixup(b, k, &iter, replace_key)) ++ if (b->ops->insert_fixup(b, k, &iter.iter, replace_key)) + return status; + + status = BTREE_INSERT_STATUS_INSERT; +@@ -1083,102 +1077,79 @@ struct bkey *__bch_bset_search(struct bt + + /* Btree iterator */ + +-typedef bool (new_btree_iter_cmp_fn)(const void *, const void *, void *); +- +-static inline bool new_btree_iter_cmp(const void *l, const void *r, void __always_unused *args) +-{ +- const struct btree_iter_set *_l = l; +- const struct btree_iter_set *_r = r; +- +- return bkey_cmp(_l->k, _r->k) <= 0; +-} ++typedef bool (btree_iter_cmp_fn)(struct btree_iter_set, ++ struct btree_iter_set); + +-static inline void new_btree_iter_swap(void *iter1, void *iter2, void __always_unused *args) ++static inline bool btree_iter_cmp(struct btree_iter_set l, ++ struct btree_iter_set r) + { +- struct btree_iter_set *_iter1 = iter1; +- struct btree_iter_set *_iter2 = iter2; +- +- swap(*_iter1, *_iter2); ++ return bkey_cmp(l.k, r.k) > 0; + } + + static inline bool btree_iter_end(struct btree_iter *iter) + { +- return !iter->heap.nr; ++ return !iter->used; + } + + void bch_btree_iter_push(struct btree_iter *iter, struct bkey *k, + struct bkey *end) + { +- const struct min_heap_callbacks callbacks = { +- .less = new_btree_iter_cmp, +- .swp = new_btree_iter_swap, +- }; +- + if (k != end) +- BUG_ON(!min_heap_push(&iter->heap, +- &((struct btree_iter_set) { k, end }), +- &callbacks, +- NULL)); ++ BUG_ON(!heap_add(iter, ++ ((struct btree_iter_set) { k, end }), ++ btree_iter_cmp)); + } + +-static struct bkey *__bch_btree_iter_init(struct btree_keys *b, +- struct btree_iter *iter, +- struct bkey *search, +- struct bset_tree *start) ++static struct bkey *__bch_btree_iter_stack_init(struct btree_keys *b, ++ struct btree_iter_stack *iter, ++ struct bkey *search, ++ struct bset_tree *start) + { + struct bkey *ret = NULL; + +- iter->heap.size = ARRAY_SIZE(iter->heap.preallocated); +- iter->heap.nr = 0; ++ iter->iter.size = ARRAY_SIZE(iter->stack_data); ++ iter->iter.used = 0; + + #ifdef CONFIG_BCACHE_DEBUG +- iter->b = b; ++ iter->iter.b = b; + #endif + + for (; start <= bset_tree_last(b); start++) { + ret = bch_bset_search(b, start, search); +- bch_btree_iter_push(iter, ret, bset_bkey_last(start->data)); ++ bch_btree_iter_push(&iter->iter, ret, bset_bkey_last(start->data)); + } + + return ret; + } + +-struct bkey *bch_btree_iter_init(struct btree_keys *b, +- struct btree_iter *iter, ++struct bkey *bch_btree_iter_stack_init(struct btree_keys *b, ++ struct btree_iter_stack *iter, + struct bkey *search) + { +- return __bch_btree_iter_init(b, iter, search, b->set); ++ return __bch_btree_iter_stack_init(b, iter, search, b->set); + } + + static inline struct bkey *__bch_btree_iter_next(struct btree_iter *iter, +- new_btree_iter_cmp_fn *cmp) ++ btree_iter_cmp_fn *cmp) + { + struct btree_iter_set b __maybe_unused; + struct bkey *ret = NULL; +- const struct min_heap_callbacks callbacks = { +- .less = cmp, +- .swp = new_btree_iter_swap, +- }; + + if (!btree_iter_end(iter)) { + bch_btree_iter_next_check(iter); + +- ret = iter->heap.data->k; +- iter->heap.data->k = bkey_next(iter->heap.data->k); ++ ret = iter->data->k; ++ iter->data->k = bkey_next(iter->data->k); + +- if (iter->heap.data->k > iter->heap.data->end) { ++ if (iter->data->k > iter->data->end) { + WARN_ONCE(1, "bset was corrupt!\n"); +- iter->heap.data->k = iter->heap.data->end; ++ iter->data->k = iter->data->end; + } + +- if (iter->heap.data->k == iter->heap.data->end) { +- if (iter->heap.nr) { +- b = min_heap_peek(&iter->heap)[0]; +- min_heap_pop(&iter->heap, &callbacks, NULL); +- } +- } ++ if (iter->data->k == iter->data->end) ++ heap_pop(iter, b, cmp); + else +- min_heap_sift_down(&iter->heap, 0, &callbacks, NULL); ++ heap_sift(iter, 0, cmp); + } + + return ret; +@@ -1186,7 +1157,7 @@ static inline struct bkey *__bch_btree_i + + struct bkey *bch_btree_iter_next(struct btree_iter *iter) + { +- return __bch_btree_iter_next(iter, new_btree_iter_cmp); ++ return __bch_btree_iter_next(iter, btree_iter_cmp); + + } + +@@ -1224,18 +1195,16 @@ static void btree_mergesort(struct btree + struct btree_iter *iter, + bool fixup, bool remove_stale) + { ++ int i; + struct bkey *k, *last = NULL; + BKEY_PADDED(k) tmp; + bool (*bad)(struct btree_keys *, const struct bkey *) = remove_stale + ? bch_ptr_bad + : bch_ptr_invalid; +- const struct min_heap_callbacks callbacks = { +- .less = b->ops->sort_cmp, +- .swp = new_btree_iter_swap, +- }; + + /* Heapify the iterator, using our comparison function */ +- min_heapify_all(&iter->heap, &callbacks, NULL); ++ for (i = iter->used / 2 - 1; i >= 0; --i) ++ heap_sift(iter, i, b->ops->sort_cmp); + + while (!btree_iter_end(iter)) { + if (b->ops->sort_fixup && fixup) +@@ -1324,11 +1293,10 @@ void bch_btree_sort_partial(struct btree + struct bset_sort_state *state) + { + size_t order = b->page_order, keys = 0; +- struct btree_iter iter; ++ struct btree_iter_stack iter; + int oldsize = bch_count_data(b); + +- min_heap_init(&iter.heap, NULL, MAX_BSETS); +- __bch_btree_iter_init(b, &iter, NULL, &b->set[start]); ++ __bch_btree_iter_stack_init(b, &iter, NULL, &b->set[start]); + + if (start) { + unsigned int i; +@@ -1339,7 +1307,7 @@ void bch_btree_sort_partial(struct btree + order = get_order(__set_bytes(b->set->data, keys)); + } + +- __btree_sort(b, &iter, start, order, false, state); ++ __btree_sort(b, &iter.iter, start, order, false, state); + + EBUG_ON(oldsize >= 0 && bch_count_data(b) != oldsize); + } +@@ -1355,13 +1323,11 @@ void bch_btree_sort_into(struct btree_ke + struct bset_sort_state *state) + { + uint64_t start_time = local_clock(); +- struct btree_iter iter; +- +- min_heap_init(&iter.heap, NULL, MAX_BSETS); ++ struct btree_iter_stack iter; + +- bch_btree_iter_init(b, &iter, NULL); ++ bch_btree_iter_stack_init(b, &iter, NULL); + +- btree_mergesort(b, new->set->data, &iter, false, true); ++ btree_mergesort(b, new->set->data, &iter.iter, false, true); + + bch_time_stats_update(&state->time, start_time); + +--- a/drivers/md/bcache/bset.h ++++ b/drivers/md/bcache/bset.h +@@ -187,9 +187,8 @@ struct bset_tree { + }; + + struct btree_keys_ops { +- bool (*sort_cmp)(const void *l, +- const void *r, +- void *args); ++ bool (*sort_cmp)(struct btree_iter_set l, ++ struct btree_iter_set r); + struct bkey *(*sort_fixup)(struct btree_iter *iter, + struct bkey *tmp); + bool (*insert_fixup)(struct btree_keys *b, +@@ -313,17 +312,23 @@ enum { + BTREE_INSERT_STATUS_FRONT_MERGE, + }; + +-struct btree_iter_set { +- struct bkey *k, *end; +-}; +- + /* Btree key iteration */ + + struct btree_iter { ++ size_t size, used; + #ifdef CONFIG_BCACHE_DEBUG + struct btree_keys *b; + #endif +- MIN_HEAP_PREALLOCATED(struct btree_iter_set, btree_iter_heap, MAX_BSETS) heap; ++ struct btree_iter_set { ++ struct bkey *k, *end; ++ } data[]; ++}; ++ ++/* Fixed-size btree_iter that can be allocated on the stack */ ++ ++struct btree_iter_stack { ++ struct btree_iter iter; ++ struct btree_iter_set stack_data[MAX_BSETS]; + }; + + typedef bool (*ptr_filter_fn)(struct btree_keys *b, const struct bkey *k); +@@ -335,9 +340,9 @@ struct bkey *bch_btree_iter_next_filter( + + void bch_btree_iter_push(struct btree_iter *iter, struct bkey *k, + struct bkey *end); +-struct bkey *bch_btree_iter_init(struct btree_keys *b, +- struct btree_iter *iter, +- struct bkey *search); ++struct bkey *bch_btree_iter_stack_init(struct btree_keys *b, ++ struct btree_iter_stack *iter, ++ struct bkey *search); + + struct bkey *__bch_bset_search(struct btree_keys *b, struct bset_tree *t, + const struct bkey *search); +@@ -352,13 +357,14 @@ static inline struct bkey *bch_bset_sear + return search ? __bch_bset_search(b, t, search) : t->data->start; + } + +-#define for_each_key_filter(b, k, iter, filter) \ +- for (bch_btree_iter_init((b), (iter), NULL); \ +- ((k) = bch_btree_iter_next_filter((iter), (b), filter));) +- +-#define for_each_key(b, k, iter) \ +- for (bch_btree_iter_init((b), (iter), NULL); \ +- ((k) = bch_btree_iter_next(iter));) ++#define for_each_key_filter(b, k, stack_iter, filter) \ ++ for (bch_btree_iter_stack_init((b), (stack_iter), NULL); \ ++ ((k) = bch_btree_iter_next_filter(&((stack_iter)->iter), (b), \ ++ filter));) ++ ++#define for_each_key(b, k, stack_iter) \ ++ for (bch_btree_iter_stack_init((b), (stack_iter), NULL); \ ++ ((k) = bch_btree_iter_next(&((stack_iter)->iter)));) + + /* Sorting */ + +--- a/drivers/md/bcache/btree.c ++++ b/drivers/md/bcache/btree.c +@@ -149,19 +149,19 @@ void bch_btree_node_read_done(struct btr + { + const char *err = "bad btree header"; + struct bset *i = btree_bset_first(b); +- struct btree_iter iter; ++ struct btree_iter *iter; + + /* + * c->fill_iter can allocate an iterator with more memory space + * than static MAX_BSETS. + * See the comment arount cache_set->fill_iter. + */ +- iter.heap.data = mempool_alloc(&b->c->fill_iter, GFP_NOIO); +- iter.heap.size = b->c->cache->sb.bucket_size / b->c->cache->sb.block_size; +- iter.heap.nr = 0; ++ iter = mempool_alloc(&b->c->fill_iter, GFP_NOIO); ++ iter->size = b->c->cache->sb.bucket_size / b->c->cache->sb.block_size; ++ iter->used = 0; + + #ifdef CONFIG_BCACHE_DEBUG +- iter.b = &b->keys; ++ iter->b = &b->keys; + #endif + + if (!i->seq) +@@ -199,7 +199,7 @@ void bch_btree_node_read_done(struct btr + if (i != b->keys.set[0].data && !i->keys) + goto err; + +- bch_btree_iter_push(&iter, i->start, bset_bkey_last(i)); ++ bch_btree_iter_push(iter, i->start, bset_bkey_last(i)); + + b->written += set_blocks(i, block_bytes(b->c->cache)); + } +@@ -211,7 +211,7 @@ void bch_btree_node_read_done(struct btr + if (i->seq == b->keys.set[0].data->seq) + goto err; + +- bch_btree_sort_and_fix_extents(&b->keys, &iter, &b->c->sort); ++ bch_btree_sort_and_fix_extents(&b->keys, iter, &b->c->sort); + + i = b->keys.set[0].data; + err = "short btree key"; +@@ -223,7 +223,7 @@ void bch_btree_node_read_done(struct btr + bch_bset_init_next(&b->keys, write_block(b), + bset_magic(&b->c->cache->sb)); + out: +- mempool_free(iter.heap.data, &b->c->fill_iter); ++ mempool_free(iter, &b->c->fill_iter); + return; + err: + set_btree_node_io_error(b); +@@ -1309,11 +1309,9 @@ static bool btree_gc_mark_node(struct bt + uint8_t stale = 0; + unsigned int keys = 0, good_keys = 0; + struct bkey *k; +- struct btree_iter iter; ++ struct btree_iter_stack iter; + struct bset_tree *t; + +- min_heap_init(&iter.heap, NULL, MAX_BSETS); +- + gc->nodes++; + + for_each_key_filter(&b->keys, k, &iter, bch_ptr_invalid) { +@@ -1572,11 +1570,9 @@ static int btree_gc_rewrite_node(struct + static unsigned int btree_gc_count_keys(struct btree *b) + { + struct bkey *k; +- struct btree_iter iter; ++ struct btree_iter_stack iter; + unsigned int ret = 0; + +- min_heap_init(&iter.heap, NULL, MAX_BSETS); +- + for_each_key_filter(&b->keys, k, &iter, bch_ptr_bad) + ret += bkey_u64s(k); + +@@ -1615,18 +1611,18 @@ static int btree_gc_recurse(struct btree + int ret = 0; + bool should_rewrite; + struct bkey *k; +- struct btree_iter iter; ++ struct btree_iter_stack iter; + struct gc_merge_info r[GC_MERGE_NODES]; + struct gc_merge_info *i, *last = r + ARRAY_SIZE(r) - 1; + +- min_heap_init(&iter.heap, NULL, MAX_BSETS); +- bch_btree_iter_init(&b->keys, &iter, &b->c->gc_done); ++ bch_btree_iter_stack_init(&b->keys, &iter, &b->c->gc_done); + + for (i = r; i < r + ARRAY_SIZE(r); i++) + i->b = ERR_PTR(-EINTR); + + while (1) { +- k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad); ++ k = bch_btree_iter_next_filter(&iter.iter, &b->keys, ++ bch_ptr_bad); + if (k) { + r->b = bch_btree_node_get(b->c, op, k, b->level - 1, + true, b); +@@ -1921,9 +1917,7 @@ static int bch_btree_check_recurse(struc + { + int ret = 0; + struct bkey *k, *p = NULL; +- struct btree_iter iter; +- +- min_heap_init(&iter.heap, NULL, MAX_BSETS); ++ struct btree_iter_stack iter; + + for_each_key_filter(&b->keys, k, &iter, bch_ptr_invalid) + bch_initial_mark_key(b->c, b->level, k); +@@ -1931,10 +1925,10 @@ static int bch_btree_check_recurse(struc + bch_initial_mark_key(b->c, b->level + 1, &b->key); + + if (b->level) { +- bch_btree_iter_init(&b->keys, &iter, NULL); ++ bch_btree_iter_stack_init(&b->keys, &iter, NULL); + + do { +- k = bch_btree_iter_next_filter(&iter, &b->keys, ++ k = bch_btree_iter_next_filter(&iter.iter, &b->keys, + bch_ptr_bad); + if (k) { + btree_node_prefetch(b, k); +@@ -1962,7 +1956,7 @@ static int bch_btree_check_thread(void * + struct btree_check_info *info = arg; + struct btree_check_state *check_state = info->state; + struct cache_set *c = check_state->c; +- struct btree_iter iter; ++ struct btree_iter_stack iter; + struct bkey *k, *p; + int cur_idx, prev_idx, skip_nr; + +@@ -1970,11 +1964,9 @@ static int bch_btree_check_thread(void * + cur_idx = prev_idx = 0; + ret = 0; + +- min_heap_init(&iter.heap, NULL, MAX_BSETS); +- + /* root node keys are checked before thread created */ +- bch_btree_iter_init(&c->root->keys, &iter, NULL); +- k = bch_btree_iter_next_filter(&iter, &c->root->keys, bch_ptr_bad); ++ bch_btree_iter_stack_init(&c->root->keys, &iter, NULL); ++ k = bch_btree_iter_next_filter(&iter.iter, &c->root->keys, bch_ptr_bad); + BUG_ON(!k); + + p = k; +@@ -1992,7 +1984,7 @@ static int bch_btree_check_thread(void * + skip_nr = cur_idx - prev_idx; + + while (skip_nr) { +- k = bch_btree_iter_next_filter(&iter, ++ k = bch_btree_iter_next_filter(&iter.iter, + &c->root->keys, + bch_ptr_bad); + if (k) +@@ -2065,11 +2057,9 @@ int bch_btree_check(struct cache_set *c) + int ret = 0; + int i; + struct bkey *k = NULL; +- struct btree_iter iter; ++ struct btree_iter_stack iter; + struct btree_check_state check_state; + +- min_heap_init(&iter.heap, NULL, MAX_BSETS); +- + /* check and mark root node keys */ + for_each_key_filter(&c->root->keys, k, &iter, bch_ptr_invalid) + bch_initial_mark_key(c, c->root->level, k); +@@ -2563,12 +2553,11 @@ static int bch_btree_map_nodes_recurse(s + + if (b->level) { + struct bkey *k; +- struct btree_iter iter; ++ struct btree_iter_stack iter; + +- min_heap_init(&iter.heap, NULL, MAX_BSETS); +- bch_btree_iter_init(&b->keys, &iter, from); ++ bch_btree_iter_stack_init(&b->keys, &iter, from); + +- while ((k = bch_btree_iter_next_filter(&iter, &b->keys, ++ while ((k = bch_btree_iter_next_filter(&iter.iter, &b->keys, + bch_ptr_bad))) { + ret = bcache_btree(map_nodes_recurse, k, b, + op, from, fn, flags); +@@ -2597,12 +2586,12 @@ int bch_btree_map_keys_recurse(struct bt + { + int ret = MAP_CONTINUE; + struct bkey *k; +- struct btree_iter iter; ++ struct btree_iter_stack iter; + +- min_heap_init(&iter.heap, NULL, MAX_BSETS); +- bch_btree_iter_init(&b->keys, &iter, from); ++ bch_btree_iter_stack_init(&b->keys, &iter, from); + +- while ((k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad))) { ++ while ((k = bch_btree_iter_next_filter(&iter.iter, &b->keys, ++ bch_ptr_bad))) { + ret = !b->level + ? fn(op, b, k) + : bcache_btree(map_keys_recurse, k, +--- a/drivers/md/bcache/extents.c ++++ b/drivers/md/bcache/extents.c +@@ -33,16 +33,15 @@ static void sort_key_next(struct btree_i + i->k = bkey_next(i->k); + + if (i->k == i->end) +- *i = iter->heap.data[--iter->heap.nr]; ++ *i = iter->data[--iter->used]; + } + +-static bool new_bch_key_sort_cmp(const void *l, const void *r, void *args) ++static bool bch_key_sort_cmp(struct btree_iter_set l, ++ struct btree_iter_set r) + { +- struct btree_iter_set *_l = (struct btree_iter_set *)l; +- struct btree_iter_set *_r = (struct btree_iter_set *)r; +- int64_t c = bkey_cmp(_l->k, _r->k); ++ int64_t c = bkey_cmp(l.k, r.k); + +- return !(c ? c > 0 : _l->k < _r->k); ++ return c ? c > 0 : l.k < r.k; + } + + static bool __ptr_invalid(struct cache_set *c, const struct bkey *k) +@@ -239,7 +238,7 @@ static bool bch_btree_ptr_insert_fixup(s + } + + const struct btree_keys_ops bch_btree_keys_ops = { +- .sort_cmp = new_bch_key_sort_cmp, ++ .sort_cmp = bch_key_sort_cmp, + .insert_fixup = bch_btree_ptr_insert_fixup, + .key_invalid = bch_btree_ptr_invalid, + .key_bad = bch_btree_ptr_bad, +@@ -256,36 +255,22 @@ const struct btree_keys_ops bch_btree_ke + * Necessary for btree_sort_fixup() - if there are multiple keys that compare + * equal in different sets, we have to process them newest to oldest. + */ +- +-static bool new_bch_extent_sort_cmp(const void *l, const void *r, void __always_unused *args) +-{ +- struct btree_iter_set *_l = (struct btree_iter_set *)l; +- struct btree_iter_set *_r = (struct btree_iter_set *)r; +- int64_t c = bkey_cmp(&START_KEY(_l->k), &START_KEY(_r->k)); +- +- return !(c ? c > 0 : _l->k < _r->k); +-} +- +-static inline void new_btree_iter_swap(void *iter1, void *iter2, void __always_unused *args) ++static bool bch_extent_sort_cmp(struct btree_iter_set l, ++ struct btree_iter_set r) + { +- struct btree_iter_set *_iter1 = iter1; +- struct btree_iter_set *_iter2 = iter2; ++ int64_t c = bkey_cmp(&START_KEY(l.k), &START_KEY(r.k)); + +- swap(*_iter1, *_iter2); ++ return c ? c > 0 : l.k < r.k; + } + + static struct bkey *bch_extent_sort_fixup(struct btree_iter *iter, + struct bkey *tmp) + { +- const struct min_heap_callbacks callbacks = { +- .less = new_bch_extent_sort_cmp, +- .swp = new_btree_iter_swap, +- }; +- while (iter->heap.nr > 1) { +- struct btree_iter_set *top = iter->heap.data, *i = top + 1; ++ while (iter->used > 1) { ++ struct btree_iter_set *top = iter->data, *i = top + 1; + +- if (iter->heap.nr > 2 && +- !new_bch_extent_sort_cmp(&i[0], &i[1], NULL)) ++ if (iter->used > 2 && ++ bch_extent_sort_cmp(i[0], i[1])) + i++; + + if (bkey_cmp(top->k, &START_KEY(i->k)) <= 0) +@@ -293,7 +278,7 @@ static struct bkey *bch_extent_sort_fixu + + if (!KEY_SIZE(i->k)) { + sort_key_next(iter, i); +- min_heap_sift_down(&iter->heap, i - top, &callbacks, NULL); ++ heap_sift(iter, i - top, bch_extent_sort_cmp); + continue; + } + +@@ -303,7 +288,7 @@ static struct bkey *bch_extent_sort_fixu + else + bch_cut_front(top->k, i->k); + +- min_heap_sift_down(&iter->heap, i - top, &callbacks, NULL); ++ heap_sift(iter, i - top, bch_extent_sort_cmp); + } else { + /* can't happen because of comparison func */ + BUG_ON(!bkey_cmp(&START_KEY(top->k), &START_KEY(i->k))); +@@ -313,7 +298,7 @@ static struct bkey *bch_extent_sort_fixu + + bch_cut_back(&START_KEY(i->k), tmp); + bch_cut_front(i->k, top->k); +- min_heap_sift_down(&iter->heap, 0, &callbacks, NULL); ++ heap_sift(iter, 0, bch_extent_sort_cmp); + + return tmp; + } else { +@@ -633,7 +618,7 @@ static bool bch_extent_merge(struct btre + } + + const struct btree_keys_ops bch_extent_keys_ops = { +- .sort_cmp = new_bch_extent_sort_cmp, ++ .sort_cmp = bch_extent_sort_cmp, + .sort_fixup = bch_extent_sort_fixup, + .insert_fixup = bch_extent_insert_fixup, + .key_invalid = bch_extent_invalid, +--- a/drivers/md/bcache/movinggc.c ++++ b/drivers/md/bcache/movinggc.c +@@ -182,27 +182,16 @@ err: if (!IS_ERR_OR_NULL(w->private)) + closure_sync(&cl); + } + +-static bool new_bucket_cmp(const void *l, const void *r, void __always_unused *args) ++static bool bucket_cmp(struct bucket *l, struct bucket *r) + { +- struct bucket **_l = (struct bucket **)l; +- struct bucket **_r = (struct bucket **)r; +- +- return GC_SECTORS_USED(*_l) >= GC_SECTORS_USED(*_r); +-} +- +-static void new_bucket_swap(void *l, void *r, void __always_unused *args) +-{ +- struct bucket **_l = l; +- struct bucket **_r = r; +- +- swap(*_l, *_r); ++ return GC_SECTORS_USED(l) < GC_SECTORS_USED(r); + } + + static unsigned int bucket_heap_top(struct cache *ca) + { + struct bucket *b; + +- return (b = min_heap_peek(&ca->heap)[0]) ? GC_SECTORS_USED(b) : 0; ++ return (b = heap_peek(&ca->heap)) ? GC_SECTORS_USED(b) : 0; + } + + void bch_moving_gc(struct cache_set *c) +@@ -210,10 +199,6 @@ void bch_moving_gc(struct cache_set *c) + struct cache *ca = c->cache; + struct bucket *b; + unsigned long sectors_to_move, reserve_sectors; +- const struct min_heap_callbacks callbacks = { +- .less = new_bucket_cmp, +- .swp = new_bucket_swap, +- }; + + if (!c->copy_gc_enabled) + return; +@@ -224,7 +209,7 @@ void bch_moving_gc(struct cache_set *c) + reserve_sectors = ca->sb.bucket_size * + fifo_used(&ca->free[RESERVE_MOVINGGC]); + +- ca->heap.nr = 0; ++ ca->heap.used = 0; + + for_each_bucket(b, ca) { + if (GC_MARK(b) == GC_MARK_METADATA || +@@ -233,31 +218,25 @@ void bch_moving_gc(struct cache_set *c) + atomic_read(&b->pin)) + continue; + +- if (!min_heap_full(&ca->heap)) { ++ if (!heap_full(&ca->heap)) { + sectors_to_move += GC_SECTORS_USED(b); +- min_heap_push(&ca->heap, &b, &callbacks, NULL); +- } else if (!new_bucket_cmp(&b, min_heap_peek(&ca->heap), ca)) { ++ heap_add(&ca->heap, b, bucket_cmp); ++ } else if (bucket_cmp(b, heap_peek(&ca->heap))) { + sectors_to_move -= bucket_heap_top(ca); + sectors_to_move += GC_SECTORS_USED(b); + + ca->heap.data[0] = b; +- min_heap_sift_down(&ca->heap, 0, &callbacks, NULL); ++ heap_sift(&ca->heap, 0, bucket_cmp); + } + } + + while (sectors_to_move > reserve_sectors) { +- if (ca->heap.nr) { +- b = min_heap_peek(&ca->heap)[0]; +- min_heap_pop(&ca->heap, &callbacks, NULL); +- } ++ heap_pop(&ca->heap, b, bucket_cmp); + sectors_to_move -= GC_SECTORS_USED(b); + } + +- while (ca->heap.nr) { +- b = min_heap_peek(&ca->heap)[0]; +- min_heap_pop(&ca->heap, &callbacks, NULL); ++ while (heap_pop(&ca->heap, b, bucket_cmp)) + SET_GC_MOVE(b, 1); +- } + + mutex_unlock(&c->bucket_lock); + +--- a/drivers/md/bcache/super.c ++++ b/drivers/md/bcache/super.c +@@ -1912,7 +1912,8 @@ struct cache_set *bch_cache_set_alloc(st + INIT_LIST_HEAD(&c->btree_cache_freed); + INIT_LIST_HEAD(&c->data_buckets); + +- iter_size = ((meta_bucket_pages(sb) * PAGE_SECTORS) / sb->block_size) * ++ iter_size = sizeof(struct btree_iter) + ++ ((meta_bucket_pages(sb) * PAGE_SECTORS) / sb->block_size) * + sizeof(struct btree_iter_set); + + c->devices = kcalloc(c->nr_uuids, sizeof(void *), GFP_KERNEL); +--- a/drivers/md/bcache/sysfs.c ++++ b/drivers/md/bcache/sysfs.c +@@ -660,9 +660,7 @@ static unsigned int bch_root_usage(struc + unsigned int bytes = 0; + struct bkey *k; + struct btree *b; +- struct btree_iter iter; +- +- min_heap_init(&iter.heap, NULL, MAX_BSETS); ++ struct btree_iter_stack iter; + + goto lock_root; + +--- a/drivers/md/bcache/util.h ++++ b/drivers/md/bcache/util.h +@@ -9,7 +9,6 @@ + #include + #include + #include +-#include + #include + #include + #include +@@ -31,10 +30,16 @@ struct closure; + + #endif + ++#define DECLARE_HEAP(type, name) \ ++ struct { \ ++ size_t size, used; \ ++ type *data; \ ++ } name ++ + #define init_heap(heap, _size, gfp) \ + ({ \ + size_t _bytes; \ +- (heap)->nr = 0; \ ++ (heap)->used = 0; \ + (heap)->size = (_size); \ + _bytes = (heap)->size * sizeof(*(heap)->data); \ + (heap)->data = kvmalloc(_bytes, (gfp) & GFP_KERNEL); \ +@@ -47,6 +52,64 @@ do { \ + (heap)->data = NULL; \ + } while (0) + ++#define heap_swap(h, i, j) swap((h)->data[i], (h)->data[j]) ++ ++#define heap_sift(h, i, cmp) \ ++do { \ ++ size_t _r, _j = i; \ ++ \ ++ for (; _j * 2 + 1 < (h)->used; _j = _r) { \ ++ _r = _j * 2 + 1; \ ++ if (_r + 1 < (h)->used && \ ++ cmp((h)->data[_r], (h)->data[_r + 1])) \ ++ _r++; \ ++ \ ++ if (cmp((h)->data[_r], (h)->data[_j])) \ ++ break; \ ++ heap_swap(h, _r, _j); \ ++ } \ ++} while (0) ++ ++#define heap_sift_down(h, i, cmp) \ ++do { \ ++ while (i) { \ ++ size_t p = (i - 1) / 2; \ ++ if (cmp((h)->data[i], (h)->data[p])) \ ++ break; \ ++ heap_swap(h, i, p); \ ++ i = p; \ ++ } \ ++} while (0) ++ ++#define heap_add(h, d, cmp) \ ++({ \ ++ bool _r = !heap_full(h); \ ++ if (_r) { \ ++ size_t _i = (h)->used++; \ ++ (h)->data[_i] = d; \ ++ \ ++ heap_sift_down(h, _i, cmp); \ ++ heap_sift(h, _i, cmp); \ ++ } \ ++ _r; \ ++}) ++ ++#define heap_pop(h, d, cmp) \ ++({ \ ++ bool _r = (h)->used; \ ++ if (_r) { \ ++ (d) = (h)->data[0]; \ ++ (h)->used--; \ ++ heap_swap(h, 0, (h)->used); \ ++ heap_sift(h, 0, cmp); \ ++ } \ ++ _r; \ ++}) ++ ++#define heap_peek(h) ((h)->used ? (h)->data[0] : NULL) ++ ++#define heap_full(h) ((h)->used == (h)->size) ++ + #define DECLARE_FIFO(type, name) \ + struct { \ + size_t front, back, size, mask; \ +--- a/drivers/md/bcache/writeback.c ++++ b/drivers/md/bcache/writeback.c +@@ -908,16 +908,15 @@ static int bch_dirty_init_thread(void *a + struct dirty_init_thrd_info *info = arg; + struct bch_dirty_init_state *state = info->state; + struct cache_set *c = state->c; +- struct btree_iter iter; ++ struct btree_iter_stack iter; + struct bkey *k, *p; + int cur_idx, prev_idx, skip_nr; + + k = p = NULL; + prev_idx = 0; + +- min_heap_init(&iter.heap, NULL, MAX_BSETS); +- bch_btree_iter_init(&c->root->keys, &iter, NULL); +- k = bch_btree_iter_next_filter(&iter, &c->root->keys, bch_ptr_bad); ++ bch_btree_iter_stack_init(&c->root->keys, &iter, NULL); ++ k = bch_btree_iter_next_filter(&iter.iter, &c->root->keys, bch_ptr_bad); + BUG_ON(!k); + + p = k; +@@ -931,7 +930,7 @@ static int bch_dirty_init_thread(void *a + skip_nr = cur_idx - prev_idx; + + while (skip_nr) { +- k = bch_btree_iter_next_filter(&iter, ++ k = bch_btree_iter_next_filter(&iter.iter, + &c->root->keys, + bch_ptr_bad); + if (k) +@@ -980,13 +979,11 @@ void bch_sectors_dirty_init(struct bcach + int i; + struct btree *b = NULL; + struct bkey *k = NULL; +- struct btree_iter iter; ++ struct btree_iter_stack iter; + struct sectors_dirty_init op; + struct cache_set *c = d->c; + struct bch_dirty_init_state state; + +- min_heap_init(&iter.heap, NULL, MAX_BSETS); +- + retry_lock: + b = c->root; + rw_lock(0, b, b->level); diff --git a/queue-6.15/revert-bcache-update-min_heap_callbacks-to-use-default-builtin-swap.patch b/queue-6.15/revert-bcache-update-min_heap_callbacks-to-use-default-builtin-swap.patch new file mode 100644 index 0000000000..7eb5780294 --- /dev/null +++ b/queue-6.15/revert-bcache-update-min_heap_callbacks-to-use-default-builtin-swap.patch @@ -0,0 +1,204 @@ +From 845f1f2d69f3f49b3d8c142265952c8257e3368c Mon Sep 17 00:00:00 2001 +From: Kuan-Wei Chiu +Date: Sun, 15 Jun 2025 04:23:51 +0800 +Subject: Revert "bcache: update min_heap_callbacks to use default builtin swap" + +From: Kuan-Wei Chiu + +commit 845f1f2d69f3f49b3d8c142265952c8257e3368c upstream. + +Patch series "bcache: Revert min_heap migration due to performance +regression". + +This patch series reverts the migration of bcache from its original heap +implementation to the generic min_heap library. While the original change +aimed to simplify the code and improve maintainability, it introduced a +severe performance regression in real-world scenarios. + +As reported by Robert, systems using bcache now suffer from periodic +latency spikes, with P100 (max) latency increasing from 600 ms to 2.4 +seconds every 5 minutes. This degrades bcache's value as a low-latency +caching layer, and leads to frequent timeouts and application stalls in +production environments. + +The primary cause of this regression is the behavior of the generic +min_heap implementation's bottom-up sift_down, which performs up to 2 * +log2(n) comparisons when many elements are equal. The original top-down +variant used by bcache only required O(1) comparisons in such cases. The +issue was further exacerbated by commit 92a8b224b833 ("lib/min_heap: +introduce non-inline versions of min heap API functions"), which +introduced non-inlined versions of the min_heap API, adding function call +overhead to a performance-critical hot path. + + +This patch (of 3): + +This reverts commit 3d8a9a1c35227c3f1b0bd132c9f0a80dbda07b65. + +Although removing the custom swap function simplified the code, this +change is part of a broader migration to the generic min_heap API that +introduced significant performance regressions in bcache. + +As reported by Robert, bcache now suffers from latency spikes, with P100 +(max) latency increasing from 600 ms to 2.4 seconds every 5 minutes. +These regressions degrade bcache's effectiveness as a low-latency cache +layer and lead to frequent timeouts and application stalls in production +environments. + +This revert is part of a series of changes to restore previous performance +by undoing the min_heap transition. + +Link: https://lkml.kernel.org/r/20250614202353.1632957-1-visitorckw@gmail.com +Link: https://lore.kernel.org/lkml/CAJhEC05+0S69z+3+FB2Cd0hD+pCRyWTKLEOsc8BOmH73p1m+KQ@mail.gmail.com +Link: https://lkml.kernel.org/r/20250614202353.1632957-2-visitorckw@gmail.com +Fixes: 866898efbb25 ("bcache: remove heap-related macros and switch to generic min_heap") +Fixes: 92a8b224b833 ("lib/min_heap: introduce non-inline versions of min heap API functions") +Signed-off-by: Kuan-Wei Chiu +Reported-by: Robert Pang +Closes: https://lore.kernel.org/linux-bcache/CAJhEC06F_AtrPgw2-7CvCqZgeStgCtitbD-ryuPpXQA-JG5XXw@mail.gmail.com +Acked-by: Coly Li +Cc: Ching-Chun (Jim) Huang +Cc: Kent Overstreet +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + drivers/md/bcache/alloc.c | 11 +++++++++-- + drivers/md/bcache/bset.c | 14 +++++++++++--- + drivers/md/bcache/extents.c | 10 +++++++++- + drivers/md/bcache/movinggc.c | 10 +++++++++- + 4 files changed, 38 insertions(+), 7 deletions(-) + +diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c +index 8998e61efa40..da50f6661bae 100644 +--- a/drivers/md/bcache/alloc.c ++++ b/drivers/md/bcache/alloc.c +@@ -189,16 +189,23 @@ static inline bool new_bucket_min_cmp(const void *l, const void *r, void *args) + return new_bucket_prio(ca, *lhs) < new_bucket_prio(ca, *rhs); + } + ++static inline void new_bucket_swap(void *l, void *r, void __always_unused *args) ++{ ++ struct bucket **lhs = l, **rhs = r; ++ ++ swap(*lhs, *rhs); ++} ++ + static void invalidate_buckets_lru(struct cache *ca) + { + struct bucket *b; + const struct min_heap_callbacks bucket_max_cmp_callback = { + .less = new_bucket_max_cmp, +- .swp = NULL, ++ .swp = new_bucket_swap, + }; + const struct min_heap_callbacks bucket_min_cmp_callback = { + .less = new_bucket_min_cmp, +- .swp = NULL, ++ .swp = new_bucket_swap, + }; + + ca->heap.nr = 0; +diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c +index 68258a16e125..bd97d8626887 100644 +--- a/drivers/md/bcache/bset.c ++++ b/drivers/md/bcache/bset.c +@@ -1093,6 +1093,14 @@ static inline bool new_btree_iter_cmp(const void *l, const void *r, void __alway + return bkey_cmp(_l->k, _r->k) <= 0; + } + ++static inline void new_btree_iter_swap(void *iter1, void *iter2, void __always_unused *args) ++{ ++ struct btree_iter_set *_iter1 = iter1; ++ struct btree_iter_set *_iter2 = iter2; ++ ++ swap(*_iter1, *_iter2); ++} ++ + static inline bool btree_iter_end(struct btree_iter *iter) + { + return !iter->heap.nr; +@@ -1103,7 +1111,7 @@ void bch_btree_iter_push(struct btree_iter *iter, struct bkey *k, + { + const struct min_heap_callbacks callbacks = { + .less = new_btree_iter_cmp, +- .swp = NULL, ++ .swp = new_btree_iter_swap, + }; + + if (k != end) +@@ -1149,7 +1157,7 @@ static inline struct bkey *__bch_btree_iter_next(struct btree_iter *iter, + struct bkey *ret = NULL; + const struct min_heap_callbacks callbacks = { + .less = cmp, +- .swp = NULL, ++ .swp = new_btree_iter_swap, + }; + + if (!btree_iter_end(iter)) { +@@ -1223,7 +1231,7 @@ static void btree_mergesort(struct btree_keys *b, struct bset *out, + : bch_ptr_invalid; + const struct min_heap_callbacks callbacks = { + .less = b->ops->sort_cmp, +- .swp = NULL, ++ .swp = new_btree_iter_swap, + }; + + /* Heapify the iterator, using our comparison function */ +diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c +index 4b84fda1530a..a7221e5dbe81 100644 +--- a/drivers/md/bcache/extents.c ++++ b/drivers/md/bcache/extents.c +@@ -266,12 +266,20 @@ static bool new_bch_extent_sort_cmp(const void *l, const void *r, void __always_ + return !(c ? c > 0 : _l->k < _r->k); + } + ++static inline void new_btree_iter_swap(void *iter1, void *iter2, void __always_unused *args) ++{ ++ struct btree_iter_set *_iter1 = iter1; ++ struct btree_iter_set *_iter2 = iter2; ++ ++ swap(*_iter1, *_iter2); ++} ++ + static struct bkey *bch_extent_sort_fixup(struct btree_iter *iter, + struct bkey *tmp) + { + const struct min_heap_callbacks callbacks = { + .less = new_bch_extent_sort_cmp, +- .swp = NULL, ++ .swp = new_btree_iter_swap, + }; + while (iter->heap.nr > 1) { + struct btree_iter_set *top = iter->heap.data, *i = top + 1; +diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c +index 45ca134cbf02..d6c73dd8eb2b 100644 +--- a/drivers/md/bcache/movinggc.c ++++ b/drivers/md/bcache/movinggc.c +@@ -190,6 +190,14 @@ static bool new_bucket_cmp(const void *l, const void *r, void __always_unused *a + return GC_SECTORS_USED(*_l) >= GC_SECTORS_USED(*_r); + } + ++static void new_bucket_swap(void *l, void *r, void __always_unused *args) ++{ ++ struct bucket **_l = l; ++ struct bucket **_r = r; ++ ++ swap(*_l, *_r); ++} ++ + static unsigned int bucket_heap_top(struct cache *ca) + { + struct bucket *b; +@@ -204,7 +212,7 @@ void bch_moving_gc(struct cache_set *c) + unsigned long sectors_to_move, reserve_sectors; + const struct min_heap_callbacks callbacks = { + .less = new_bucket_cmp, +- .swp = NULL, ++ .swp = new_bucket_swap, + }; + + if (!c->copy_gc_enabled) +-- +2.50.0 + diff --git a/queue-6.15/scsi-fnic-fix-crash-in-fnic_wq_cmpl_handler-when-fdmi-times-out.patch b/queue-6.15/scsi-fnic-fix-crash-in-fnic_wq_cmpl_handler-when-fdmi-times-out.patch new file mode 100644 index 0000000000..8b663db6e0 --- /dev/null +++ b/queue-6.15/scsi-fnic-fix-crash-in-fnic_wq_cmpl_handler-when-fdmi-times-out.patch @@ -0,0 +1,250 @@ +From a35b29bdedb4d2ae3160d4d6684a6f1ecd9ca7c2 Mon Sep 17 00:00:00 2001 +From: Karan Tilak Kumar +Date: Tue, 17 Jun 2025 17:34:28 -0700 +Subject: scsi: fnic: Fix crash in fnic_wq_cmpl_handler when FDMI times out + +From: Karan Tilak Kumar + +commit a35b29bdedb4d2ae3160d4d6684a6f1ecd9ca7c2 upstream. + +When both the RHBA and RPA FDMI requests time out, fnic reuses a frame to +send ABTS for each of them. On send completion, this causes an attempt to +free the same frame twice that leads to a crash. + +Fix crash by allocating separate frames for RHBA and RPA, and modify ABTS +logic accordingly. + +Tested by checking MDS for FDMI information. + +Tested by using instrumented driver to: + + - Drop PLOGI response + - Drop RHBA response + - Drop RPA response + - Drop RHBA and RPA response + - Drop PLOGI response + ABTS response + - Drop RHBA response + ABTS response + - Drop RPA response + ABTS response + - Drop RHBA and RPA response + ABTS response for both of them + +Fixes: 09c1e6ab4ab2 ("scsi: fnic: Add and integrate support for FDMI") +Reviewed-by: Sesidhar Baddela +Reviewed-by: Arulprabhu Ponnusamy +Reviewed-by: Gian Carlo Boffa +Tested-by: Arun Easi +Co-developed-by: Arun Easi +Signed-off-by: Arun Easi +Tested-by: Karan Tilak Kumar +Cc: stable@vger.kernel.org +Signed-off-by: Karan Tilak Kumar +Link: https://lore.kernel.org/r/20250618003431.6314-1-kartilak@cisco.com +Reviewed-by: John Meneghini +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/scsi/fnic/fdls_disc.c | 113 +++++++++++++++++++++++++++++++----------- + drivers/scsi/fnic/fnic.h | 2 + drivers/scsi/fnic/fnic_fdls.h | 1 + 3 files changed, 87 insertions(+), 29 deletions(-) + +--- a/drivers/scsi/fnic/fdls_disc.c ++++ b/drivers/scsi/fnic/fdls_disc.c +@@ -763,47 +763,69 @@ static void fdls_send_fabric_abts(struct + iport->fabric.timer_pending = 1; + } + +-static void fdls_send_fdmi_abts(struct fnic_iport_s *iport) ++static uint8_t *fdls_alloc_init_fdmi_abts_frame(struct fnic_iport_s *iport, ++ uint16_t oxid) + { +- uint8_t *frame; ++ struct fc_frame_header *pfdmi_abts; + uint8_t d_id[3]; ++ uint8_t *frame; + struct fnic *fnic = iport->fnic; +- struct fc_frame_header *pfabric_abts; +- unsigned long fdmi_tov; +- uint16_t oxid; +- uint16_t frame_size = FNIC_ETH_FCOE_HDRS_OFFSET + +- sizeof(struct fc_frame_header); + + frame = fdls_alloc_frame(iport); + if (frame == NULL) { + FNIC_FCS_DBG(KERN_ERR, fnic->host, fnic->fnic_num, + "Failed to allocate frame to send FDMI ABTS"); +- return; ++ return NULL; + } + +- pfabric_abts = (struct fc_frame_header *) (frame + FNIC_ETH_FCOE_HDRS_OFFSET); ++ pfdmi_abts = (struct fc_frame_header *) (frame + FNIC_ETH_FCOE_HDRS_OFFSET); + fdls_init_fabric_abts_frame(frame, iport); + + hton24(d_id, FC_FID_MGMT_SERV); +- FNIC_STD_SET_D_ID(*pfabric_abts, d_id); ++ FNIC_STD_SET_D_ID(*pfdmi_abts, d_id); ++ FNIC_STD_SET_OX_ID(*pfdmi_abts, oxid); ++ ++ return frame; ++} ++ ++static void fdls_send_fdmi_abts(struct fnic_iport_s *iport) ++{ ++ uint8_t *frame; ++ unsigned long fdmi_tov; ++ uint16_t frame_size = FNIC_ETH_FCOE_HDRS_OFFSET + ++ sizeof(struct fc_frame_header); + + if (iport->fabric.fdmi_pending & FDLS_FDMI_PLOGI_PENDING) { +- oxid = iport->active_oxid_fdmi_plogi; +- FNIC_STD_SET_OX_ID(*pfabric_abts, oxid); ++ frame = fdls_alloc_init_fdmi_abts_frame(iport, ++ iport->active_oxid_fdmi_plogi); ++ if (frame == NULL) ++ return; ++ + fnic_send_fcoe_frame(iport, frame, frame_size); + } else { + if (iport->fabric.fdmi_pending & FDLS_FDMI_REG_HBA_PENDING) { +- oxid = iport->active_oxid_fdmi_rhba; +- FNIC_STD_SET_OX_ID(*pfabric_abts, oxid); ++ frame = fdls_alloc_init_fdmi_abts_frame(iport, ++ iport->active_oxid_fdmi_rhba); ++ if (frame == NULL) ++ return; ++ + fnic_send_fcoe_frame(iport, frame, frame_size); + } + if (iport->fabric.fdmi_pending & FDLS_FDMI_RPA_PENDING) { +- oxid = iport->active_oxid_fdmi_rpa; +- FNIC_STD_SET_OX_ID(*pfabric_abts, oxid); ++ frame = fdls_alloc_init_fdmi_abts_frame(iport, ++ iport->active_oxid_fdmi_rpa); ++ if (frame == NULL) { ++ if (iport->fabric.fdmi_pending & FDLS_FDMI_REG_HBA_PENDING) ++ goto arm_timer; ++ else ++ return; ++ } ++ + fnic_send_fcoe_frame(iport, frame, frame_size); + } + } + ++arm_timer: + fdmi_tov = jiffies + msecs_to_jiffies(2 * iport->e_d_tov); + mod_timer(&iport->fabric.fdmi_timer, round_jiffies(fdmi_tov)); + iport->fabric.fdmi_pending |= FDLS_FDMI_ABORT_PENDING; +@@ -2244,6 +2266,21 @@ void fdls_fabric_timer_callback(struct t + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + } + ++void fdls_fdmi_retry_plogi(struct fnic_iport_s *iport) ++{ ++ struct fnic *fnic = iport->fnic; ++ ++ iport->fabric.fdmi_pending = 0; ++ /* If max retries not exhausted, start over from fdmi plogi */ ++ if (iport->fabric.fdmi_retry < FDLS_FDMI_MAX_RETRY) { ++ iport->fabric.fdmi_retry++; ++ FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, ++ "Retry FDMI PLOGI. FDMI retry: %d", ++ iport->fabric.fdmi_retry); ++ fdls_send_fdmi_plogi(iport); ++ } ++} ++ + void fdls_fdmi_timer_callback(struct timer_list *t) + { + struct fnic_fdls_fabric_s *fabric = from_timer(fabric, t, fdmi_timer); +@@ -2289,14 +2326,7 @@ void fdls_fdmi_timer_callback(struct tim + FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, + "fdmi timer callback : 0x%x\n", iport->fabric.fdmi_pending); + +- iport->fabric.fdmi_pending = 0; +- /* If max retries not exhaused, start over from fdmi plogi */ +- if (iport->fabric.fdmi_retry < FDLS_FDMI_MAX_RETRY) { +- iport->fabric.fdmi_retry++; +- FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, +- "retry fdmi timer %d", iport->fabric.fdmi_retry); +- fdls_send_fdmi_plogi(iport); +- } ++ fdls_fdmi_retry_plogi(iport); + FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, + "fdmi timer callback : 0x%x\n", iport->fabric.fdmi_pending); + spin_unlock_irqrestore(&fnic->fnic_lock, flags); +@@ -3714,11 +3744,32 @@ static void fdls_process_fdmi_abts_rsp(s + switch (FNIC_FRAME_TYPE(oxid)) { + case FNIC_FRAME_TYPE_FDMI_PLOGI: + fdls_free_oxid(iport, oxid, &iport->active_oxid_fdmi_plogi); ++ ++ iport->fabric.fdmi_pending &= ~FDLS_FDMI_PLOGI_PENDING; ++ iport->fabric.fdmi_pending &= ~FDLS_FDMI_ABORT_PENDING; + break; + case FNIC_FRAME_TYPE_FDMI_RHBA: ++ iport->fabric.fdmi_pending &= ~FDLS_FDMI_REG_HBA_PENDING; ++ ++ /* If RPA is still pending, don't turn off ABORT PENDING. ++ * We count on the timer to detect the ABTS timeout and take ++ * corrective action. ++ */ ++ if (!(iport->fabric.fdmi_pending & FDLS_FDMI_RPA_PENDING)) ++ iport->fabric.fdmi_pending &= ~FDLS_FDMI_ABORT_PENDING; ++ + fdls_free_oxid(iport, oxid, &iport->active_oxid_fdmi_rhba); + break; + case FNIC_FRAME_TYPE_FDMI_RPA: ++ iport->fabric.fdmi_pending &= ~FDLS_FDMI_RPA_PENDING; ++ ++ /* If RHBA is still pending, don't turn off ABORT PENDING. ++ * We count on the timer to detect the ABTS timeout and take ++ * corrective action. ++ */ ++ if (!(iport->fabric.fdmi_pending & FDLS_FDMI_REG_HBA_PENDING)) ++ iport->fabric.fdmi_pending &= ~FDLS_FDMI_ABORT_PENDING; ++ + fdls_free_oxid(iport, oxid, &iport->active_oxid_fdmi_rpa); + break; + default: +@@ -3728,10 +3779,16 @@ static void fdls_process_fdmi_abts_rsp(s + break; + } + +- timer_delete_sync(&iport->fabric.fdmi_timer); +- iport->fabric.fdmi_pending &= ~FDLS_FDMI_ABORT_PENDING; +- +- fdls_send_fdmi_plogi(iport); ++ /* ++ * Only if ABORT PENDING is off, delete the timer, and if no other ++ * operations are pending, retry FDMI. ++ * Otherwise, let the timer pop and take the appropriate action. ++ */ ++ if (!(iport->fabric.fdmi_pending & FDLS_FDMI_ABORT_PENDING)) { ++ timer_delete_sync(&iport->fabric.fdmi_timer); ++ if (!iport->fabric.fdmi_pending) ++ fdls_fdmi_retry_plogi(iport); ++ } + } + + static void +--- a/drivers/scsi/fnic/fnic.h ++++ b/drivers/scsi/fnic/fnic.h +@@ -30,7 +30,7 @@ + + #define DRV_NAME "fnic" + #define DRV_DESCRIPTION "Cisco FCoE HBA Driver" +-#define DRV_VERSION "1.8.0.0" ++#define DRV_VERSION "1.8.0.1" + #define PFX DRV_NAME ": " + #define DFX DRV_NAME "%d: " + +--- a/drivers/scsi/fnic/fnic_fdls.h ++++ b/drivers/scsi/fnic/fnic_fdls.h +@@ -394,6 +394,7 @@ void fdls_send_tport_abts(struct fnic_ip + bool fdls_delete_tport(struct fnic_iport_s *iport, + struct fnic_tport_s *tport); + void fdls_fdmi_timer_callback(struct timer_list *t); ++void fdls_fdmi_retry_plogi(struct fnic_iport_s *iport); + + /* fnic_fcs.c */ + void fnic_fdls_init(struct fnic *fnic, int usefip); diff --git a/queue-6.15/scsi-fnic-turn-off-fdmi-active-flags-on-link-down.patch b/queue-6.15/scsi-fnic-turn-off-fdmi-active-flags-on-link-down.patch new file mode 100644 index 0000000000..a214b206b8 --- /dev/null +++ b/queue-6.15/scsi-fnic-turn-off-fdmi-active-flags-on-link-down.patch @@ -0,0 +1,60 @@ +From 74f46a0524f8d2f01dc7ca95bb5fc463a8603e72 Mon Sep 17 00:00:00 2001 +From: Karan Tilak Kumar +Date: Tue, 17 Jun 2025 17:34:29 -0700 +Subject: scsi: fnic: Turn off FDMI ACTIVE flags on link down + +From: Karan Tilak Kumar + +commit 74f46a0524f8d2f01dc7ca95bb5fc463a8603e72 upstream. + +When the link goes down and comes up, FDMI requests are not sent out +anymore. + +Fix bug by turning off FNIC_FDMI_ACTIVE when the link goes down. + +Fixes: 09c1e6ab4ab2 ("scsi: fnic: Add and integrate support for FDMI") +Reviewed-by: Sesidhar Baddela +Reviewed-by: Arulprabhu Ponnusamy +Reviewed-by: Gian Carlo Boffa +Reviewed-by: Arun Easi +Tested-by: Karan Tilak Kumar +Cc: stable@vger.kernel.org +Signed-off-by: Karan Tilak Kumar +Link: https://lore.kernel.org/r/20250618003431.6314-2-kartilak@cisco.com +Reviewed-by: John Meneghini +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/scsi/fnic/fdls_disc.c | 9 ++++++--- + drivers/scsi/fnic/fnic.h | 2 +- + 2 files changed, 7 insertions(+), 4 deletions(-) + +--- a/drivers/scsi/fnic/fdls_disc.c ++++ b/drivers/scsi/fnic/fdls_disc.c +@@ -5027,9 +5027,12 @@ void fnic_fdls_link_down(struct fnic_ipo + fdls_delete_tport(iport, tport); + } + +- if ((fnic_fdmi_support == 1) && (iport->fabric.fdmi_pending > 0)) { +- timer_delete_sync(&iport->fabric.fdmi_timer); +- iport->fabric.fdmi_pending = 0; ++ if (fnic_fdmi_support == 1) { ++ if (iport->fabric.fdmi_pending > 0) { ++ timer_delete_sync(&iport->fabric.fdmi_timer); ++ iport->fabric.fdmi_pending = 0; ++ } ++ iport->flags &= ~FNIC_FDMI_ACTIVE; + } + + FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, +--- a/drivers/scsi/fnic/fnic.h ++++ b/drivers/scsi/fnic/fnic.h +@@ -30,7 +30,7 @@ + + #define DRV_NAME "fnic" + #define DRV_DESCRIPTION "Cisco FCoE HBA Driver" +-#define DRV_VERSION "1.8.0.1" ++#define DRV_VERSION "1.8.0.2" + #define PFX DRV_NAME ": " + #define DFX DRV_NAME "%d: " + diff --git a/queue-6.15/scsi-megaraid_sas-fix-invalid-node-index.patch b/queue-6.15/scsi-megaraid_sas-fix-invalid-node-index.patch new file mode 100644 index 0000000000..eec1078574 --- /dev/null +++ b/queue-6.15/scsi-megaraid_sas-fix-invalid-node-index.patch @@ -0,0 +1,66 @@ +From 752eb816b55adb0673727ba0ed96609a17895654 Mon Sep 17 00:00:00 2001 +From: Chen Yu +Date: Wed, 4 Jun 2025 12:25:56 +0800 +Subject: scsi: megaraid_sas: Fix invalid node index + +From: Chen Yu + +commit 752eb816b55adb0673727ba0ed96609a17895654 upstream. + +On a system with DRAM interleave enabled, out-of-bound access is +detected: + +megaraid_sas 0000:3f:00.0: requested/available msix 128/128 poll_queue 0 +------------[ cut here ]------------ +UBSAN: array-index-out-of-bounds in ./arch/x86/include/asm/topology.h:72:28 +index -1 is out of range for type 'cpumask *[1024]' +dump_stack_lvl+0x5d/0x80 +ubsan_epilogue+0x5/0x2b +__ubsan_handle_out_of_bounds.cold+0x46/0x4b +megasas_alloc_irq_vectors+0x149/0x190 [megaraid_sas] +megasas_probe_one.cold+0xa4d/0x189c [megaraid_sas] +local_pci_probe+0x42/0x90 +pci_device_probe+0xdc/0x290 +really_probe+0xdb/0x340 +__driver_probe_device+0x78/0x110 +driver_probe_device+0x1f/0xa0 +__driver_attach+0xba/0x1c0 +bus_for_each_dev+0x8b/0xe0 +bus_add_driver+0x142/0x220 +driver_register+0x72/0xd0 +megasas_init+0xdf/0xff0 [megaraid_sas] +do_one_initcall+0x57/0x310 +do_init_module+0x90/0x250 +init_module_from_file+0x85/0xc0 +idempotent_init_module+0x114/0x310 +__x64_sys_finit_module+0x65/0xc0 +do_syscall_64+0x82/0x170 +entry_SYSCALL_64_after_hwframe+0x76/0x7e + +Fix it accordingly. + +Signed-off-by: Chen Yu +Link: https://lore.kernel.org/r/20250604042556.3731059-1-yu.c.chen@intel.com +Fixes: 8049da6f3943 ("scsi: megaraid_sas: Use irq_set_affinity_and_hint()") +Cc: stable@vger.kernel.org +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/scsi/megaraid/megaraid_sas_base.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/drivers/scsi/megaraid/megaraid_sas_base.c ++++ b/drivers/scsi/megaraid/megaraid_sas_base.c +@@ -5910,7 +5910,11 @@ megasas_set_high_iops_queue_affinity_and + const struct cpumask *mask; + + if (instance->perf_mode == MR_BALANCED_PERF_MODE) { +- mask = cpumask_of_node(dev_to_node(&instance->pdev->dev)); ++ int nid = dev_to_node(&instance->pdev->dev); ++ ++ if (nid == NUMA_NO_NODE) ++ nid = 0; ++ mask = cpumask_of_node(nid); + + for (i = 0; i < instance->low_latency_index_start; i++) { + irq = pci_irq_vector(instance->pdev, i); diff --git a/queue-6.15/scsi-ufs-core-fix-clk-scaling-to-be-conditional-in-reset-and-restore.patch b/queue-6.15/scsi-ufs-core-fix-clk-scaling-to-be-conditional-in-reset-and-restore.patch new file mode 100644 index 0000000000..f27623d058 --- /dev/null +++ b/queue-6.15/scsi-ufs-core-fix-clk-scaling-to-be-conditional-in-reset-and-restore.patch @@ -0,0 +1,36 @@ +From 2e083cd802294693a5414e4557a183dd7e442e71 Mon Sep 17 00:00:00 2001 +From: anvithdosapati +Date: Mon, 16 Jun 2025 08:57:34 +0000 +Subject: scsi: ufs: core: Fix clk scaling to be conditional in reset and restore + +From: anvithdosapati + +commit 2e083cd802294693a5414e4557a183dd7e442e71 upstream. + +In ufshcd_host_reset_and_restore(), scale up clocks only when clock +scaling is supported. Without this change CPU latency is voted for 0 +(ufshcd_pm_qos_update) during resume unconditionally. + +Signed-off-by: anvithdosapati +Link: https://lore.kernel.org/r/20250616085734.2133581-1-anvithdosapati@google.com +Fixes: a3cd5ec55f6c ("scsi: ufs: add load based scaling of UFS gear") +Cc: stable@vger.kernel.org +Reviewed-by: Bart Van Assche +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/ufs/core/ufshcd.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/ufs/core/ufshcd.c ++++ b/drivers/ufs/core/ufshcd.c +@@ -7753,7 +7753,8 @@ static int ufshcd_host_reset_and_restore + hba->silence_err_logs = false; + + /* scale up clocks to max frequency before full reinitialization */ +- ufshcd_scale_clks(hba, ULONG_MAX, true); ++ if (ufshcd_is_clkscaling_supported(hba)) ++ ufshcd_scale_clks(hba, ULONG_MAX, true); + + err = ufshcd_hba_enable(hba); + diff --git a/queue-6.15/selinux-change-security_compute_sid-to-return-the-ssid-or-tsid-on-match.patch b/queue-6.15/selinux-change-security_compute_sid-to-return-the-ssid-or-tsid-on-match.patch new file mode 100644 index 0000000000..c770b4847b --- /dev/null +++ b/queue-6.15/selinux-change-security_compute_sid-to-return-the-ssid-or-tsid-on-match.patch @@ -0,0 +1,50 @@ +From fde46f60f6c5138ee422087addbc5bf5b4968bf1 Mon Sep 17 00:00:00 2001 +From: Stephen Smalley +Date: Tue, 10 Jun 2025 15:48:27 -0400 +Subject: selinux: change security_compute_sid to return the ssid or tsid on match + +From: Stephen Smalley + +commit fde46f60f6c5138ee422087addbc5bf5b4968bf1 upstream. + +If the end result of a security_compute_sid() computation matches the +ssid or tsid, return that SID rather than looking it up again. This +avoids the problem of multiple initial SIDs that map to the same +context. + +Cc: stable@vger.kernel.org +Reported-by: Guido Trentalancia +Fixes: ae254858ce07 ("selinux: introduce an initial SID for early boot processes") +Signed-off-by: Stephen Smalley +Tested-by: Guido Trentalancia +Signed-off-by: Paul Moore +Signed-off-by: Greg Kroah-Hartman +--- + security/selinux/ss/services.c | 16 +++++++++++----- + 1 file changed, 11 insertions(+), 5 deletions(-) + +--- a/security/selinux/ss/services.c ++++ b/security/selinux/ss/services.c +@@ -1909,11 +1909,17 @@ retry: + goto out_unlock; + } + /* Obtain the sid for the context. */ +- rc = sidtab_context_to_sid(sidtab, &newcontext, out_sid); +- if (rc == -ESTALE) { +- rcu_read_unlock(); +- context_destroy(&newcontext); +- goto retry; ++ if (context_equal(scontext, &newcontext)) ++ *out_sid = ssid; ++ else if (context_equal(tcontext, &newcontext)) ++ *out_sid = tsid; ++ else { ++ rc = sidtab_context_to_sid(sidtab, &newcontext, out_sid); ++ if (rc == -ESTALE) { ++ rcu_read_unlock(); ++ context_destroy(&newcontext); ++ goto retry; ++ } + } + out_unlock: + rcu_read_unlock(); diff --git a/queue-6.15/serial-core-restore-of_node-information-in-sysfs.patch b/queue-6.15/serial-core-restore-of_node-information-in-sysfs.patch new file mode 100644 index 0000000000..d4cdca21c6 --- /dev/null +++ b/queue-6.15/serial-core-restore-of_node-information-in-sysfs.patch @@ -0,0 +1,35 @@ +From d36f0e9a0002f04f4d6dd9be908d58fe5bd3a279 Mon Sep 17 00:00:00 2001 +From: Aidan Stewart +Date: Tue, 17 Jun 2025 10:48:19 -0600 +Subject: serial: core: restore of_node information in sysfs + +From: Aidan Stewart + +commit d36f0e9a0002f04f4d6dd9be908d58fe5bd3a279 upstream. + +Since in v6.8-rc1, the of_node symlink under tty devices is +missing. This breaks any udev rules relying on this information. + +Link the of_node information in the serial controller device with the +parent defined in the device tree. This will also apply to the serial +device which takes the serial controller as a parent device. + +Fixes: b286f4e87e32 ("serial: core: Move tty and serdev to be children of serial core port device") +Cc: stable@vger.kernel.org +Signed-off-by: Aidan Stewart +Link: https://lore.kernel.org/r/20250617164819.13912-1-astewart@tektelic.com +Signed-off-by: Greg Kroah-Hartman +--- + drivers/tty/serial/serial_base_bus.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/tty/serial/serial_base_bus.c ++++ b/drivers/tty/serial/serial_base_bus.c +@@ -72,6 +72,7 @@ static int serial_base_device_init(struc + dev->parent = parent_dev; + dev->bus = &serial_base_bus_type; + dev->release = release; ++ device_set_of_node_from_dev(dev, parent_dev); + + if (!serial_base_initialized) { + dev_dbg(port->dev, "uart_add_one_port() called before arch_initcall()?\n"); diff --git a/queue-6.15/serial-imx-restore-original-rxtl-for-console-to-fix-data-loss.patch b/queue-6.15/serial-imx-restore-original-rxtl-for-console-to-fix-data-loss.patch new file mode 100644 index 0000000000..0690e76d24 --- /dev/null +++ b/queue-6.15/serial-imx-restore-original-rxtl-for-console-to-fix-data-loss.patch @@ -0,0 +1,107 @@ +From f23c52aafb1675ab1d1f46914556d8e29cbbf7b3 Mon Sep 17 00:00:00 2001 +From: Fabio Estevam +Date: Thu, 19 Jun 2025 08:46:17 -0300 +Subject: serial: imx: Restore original RXTL for console to fix data loss + +From: Fabio Estevam + +commit f23c52aafb1675ab1d1f46914556d8e29cbbf7b3 upstream. + +Commit 7a637784d517 ("serial: imx: reduce RX interrupt frequency") +introduced a regression on the i.MX6UL EVK board. The issue can be +reproduced with the following steps: + +- Open vi on the board. +- Paste a text file (~150 characters). +- Save the file, then repeat the process. +- Compare the sha256sum of the saved files. + +The checksums do not match due to missing characters or entire lines. + +Fix this by restoring the RXTL value to 1 when the UART is used as a +console. + +This ensures timely RX interrupts and reliable data reception in console +mode. + +With this change, pasted content is saved correctly, and checksums are +always consistent. + +Cc: stable +Fixes: 7a637784d517 ("serial: imx: reduce RX interrupt frequency") +Signed-off-by: Fabio Estevam +Reviewed-by: Stefan Wahren +Link: https://lore.kernel.org/r/20250619114617.2791939-1-festevam@gmail.com +Signed-off-by: Greg Kroah-Hartman +--- + drivers/tty/serial/imx.c | 17 ++++++++++++----- + 1 file changed, 12 insertions(+), 5 deletions(-) + +--- a/drivers/tty/serial/imx.c ++++ b/drivers/tty/serial/imx.c +@@ -235,6 +235,7 @@ struct imx_port { + enum imx_tx_state tx_state; + struct hrtimer trigger_start_tx; + struct hrtimer trigger_stop_tx; ++ unsigned int rxtl; + }; + + struct imx_port_ucrs { +@@ -1339,6 +1340,7 @@ static void imx_uart_clear_rx_errors(str + + #define TXTL_DEFAULT 8 + #define RXTL_DEFAULT 8 /* 8 characters or aging timer */ ++#define RXTL_CONSOLE_DEFAULT 1 + #define TXTL_DMA 8 /* DMA burst setting */ + #define RXTL_DMA 9 /* DMA burst setting */ + +@@ -1457,7 +1459,7 @@ static void imx_uart_disable_dma(struct + ucr1 &= ~(UCR1_RXDMAEN | UCR1_TXDMAEN | UCR1_ATDMAEN); + imx_uart_writel(sport, ucr1, UCR1); + +- imx_uart_setup_ufcr(sport, TXTL_DEFAULT, RXTL_DEFAULT); ++ imx_uart_setup_ufcr(sport, TXTL_DEFAULT, sport->rxtl); + + sport->dma_is_enabled = 0; + } +@@ -1482,7 +1484,12 @@ static int imx_uart_startup(struct uart_ + return retval; + } + +- imx_uart_setup_ufcr(sport, TXTL_DEFAULT, RXTL_DEFAULT); ++ if (uart_console(&sport->port)) ++ sport->rxtl = RXTL_CONSOLE_DEFAULT; ++ else ++ sport->rxtl = RXTL_DEFAULT; ++ ++ imx_uart_setup_ufcr(sport, TXTL_DEFAULT, sport->rxtl); + + /* disable the DREN bit (Data Ready interrupt enable) before + * requesting IRQs +@@ -1948,7 +1955,7 @@ static int imx_uart_poll_init(struct uar + if (retval) + clk_disable_unprepare(sport->clk_ipg); + +- imx_uart_setup_ufcr(sport, TXTL_DEFAULT, RXTL_DEFAULT); ++ imx_uart_setup_ufcr(sport, TXTL_DEFAULT, sport->rxtl); + + uart_port_lock_irqsave(&sport->port, &flags); + +@@ -2040,7 +2047,7 @@ static int imx_uart_rs485_config(struct + /* If the receiver trigger is 0, set it to a default value */ + ufcr = imx_uart_readl(sport, UFCR); + if ((ufcr & UFCR_RXTL_MASK) == 0) +- imx_uart_setup_ufcr(sport, TXTL_DEFAULT, RXTL_DEFAULT); ++ imx_uart_setup_ufcr(sport, TXTL_DEFAULT, sport->rxtl); + imx_uart_start_rx(port); + } + +@@ -2302,7 +2309,7 @@ imx_uart_console_setup(struct console *c + else + imx_uart_console_get_options(sport, &baud, &parity, &bits); + +- imx_uart_setup_ufcr(sport, TXTL_DEFAULT, RXTL_DEFAULT); ++ imx_uart_setup_ufcr(sport, TXTL_DEFAULT, sport->rxtl); + + retval = uart_set_options(&sport->port, co, baud, parity, bits, flow); + diff --git a/queue-6.15/series b/queue-6.15/series index 5210a0e800..987b4d677f 100644 --- a/queue-6.15/series +++ b/queue-6.15/series @@ -168,3 +168,44 @@ drm-bridge-ti-sn65dsi86-add-hpd-for-displayport-conn.patch drm-xe-guc-explicitly-exit-ct-safe-mode-on-unwind.patch drm-xe-process-deferred-ggtt-node-removals-on-device.patch smb-client-fix-potential-deadlock-when-reconnecting-.patch +edac-amd64-fix-size-calculation-for-non-power-of-two-dimms.patch +x86-traps-initialize-dr6-by-writing-its-architectural-reset-value.patch +staging-rtl8723bs-avoid-memset-in-aes_cipher-and-aes_decipher.patch +dt-bindings-serial-8250-make-clocks-and-clock-frequency-exclusive.patch +serial-core-restore-of_node-information-in-sysfs.patch +serial-imx-restore-original-rxtl-for-console-to-fix-data-loss.patch +bluetooth-l2cap-fix-l2cap-mtu-negotiation.patch +dm-raid-fix-variable-in-journal-device-check.patch +bcache-remove-unnecessary-select-min_heap.patch +btrfs-fix-a-race-between-renames-and-directory-logging.patch +btrfs-update-superblock-s-device-bytes_used-when-dropping-chunk.patch +btrfs-fix-invalid-inode-pointer-dereferences-during-log-replay.patch +revert-bcache-update-min_heap_callbacks-to-use-default-builtin-swap.patch +revert-bcache-remove-heap-related-macros-and-switch-to-generic-min_heap.patch +selinux-change-security_compute_sid-to-return-the-ssid-or-tsid-on-match.patch +spi-spi-cadence-quadspi-fix-pm-runtime-unbalance.patch +net-libwx-fix-the-creation-of-page_pool.patch +maple_tree-fix-ma_state_prealloc-flag-in-mas_preallocate.patch +mm-userfaultfd-fix-race-of-userfaultfd_move-and-swap-cache.patch +mm-shmem-swap-fix-softlockup-with-mthp-swapin.patch +mm-gup-revert-mm-gup-fix-infinite-loop-within-__get_longterm_locked.patch +f2fs-fix-to-zero-post-eof-page.patch +hid-appletb-kbd-fix-appletb_backlight-backlight-device-reference-counting.patch +hid-lenovo-restrict-f7-9-11-mode-to-compact-keyboards-only.patch +hid-wacom-fix-memory-leak-on-kobject-creation-failure.patch +hid-wacom-fix-memory-leak-on-sysfs-attribute-creation-failure.patch +hid-wacom-fix-kobject-reference-count-leak.patch +scsi-megaraid_sas-fix-invalid-node-index.patch +scsi-ufs-core-fix-clk-scaling-to-be-conditional-in-reset-and-restore.patch +scsi-fnic-fix-crash-in-fnic_wq_cmpl_handler-when-fdmi-times-out.patch +scsi-fnic-turn-off-fdmi-active-flags-on-link-down.patch +drm-ast-fix-comment-on-modeset-lock.patch +drm-cirrus-qemu-fix-pitch-programming.patch +drm-etnaviv-protect-the-scheduler-s-pending-list-with-its-lock.patch +drm-panel-simple-tianma-tm070jdhg34-00-add-delays.patch +drm-simpledrm-do-not-upcast-in-release-helpers.patch +drm-tegra-assign-plane-type-before-registration.patch +drm-tegra-fix-a-possible-null-pointer-dereference.patch +drm-udl-unregister-device-before-cleaning-up-on-disconnect.patch +drm-msm-gpu-fix-crash-when-throttling-gpu-immediately-during-boot.patch +drm-amdkfd-fix-race-in-gws-queue-scheduling.patch diff --git a/queue-6.15/spi-spi-cadence-quadspi-fix-pm-runtime-unbalance.patch b/queue-6.15/spi-spi-cadence-quadspi-fix-pm-runtime-unbalance.patch new file mode 100644 index 0000000000..7f9de61345 --- /dev/null +++ b/queue-6.15/spi-spi-cadence-quadspi-fix-pm-runtime-unbalance.patch @@ -0,0 +1,85 @@ +From b07f349d1864abe29436f45e3047da2bdd476462 Mon Sep 17 00:00:00 2001 +From: Khairul Anuar Romli +Date: Mon, 16 Jun 2025 09:13:53 +0800 +Subject: spi: spi-cadence-quadspi: Fix pm runtime unbalance + +From: Khairul Anuar Romli + +commit b07f349d1864abe29436f45e3047da2bdd476462 upstream. + +Having PM put sync in remove function is causing PM underflow during +remove operation. This is caused by the function, runtime_pm_get_sync, +not being called anywhere during the op. Ensure that calls to +pm_runtime_enable()/pm_runtime_disable() and +pm_runtime_get_sync()/pm_runtime_put_sync() match. + +echo 108d2000.spi > /sys/bus/platform/drivers/cadence-qspi/unbind +[ 49.644256] Deleting MTD partitions on "108d2000.spi.0": +[ 49.649575] Deleting u-boot MTD partition +[ 49.684087] Deleting root MTD partition +[ 49.724188] cadence-qspi 108d2000.spi: Runtime PM usage count underflow! + +Continuous bind/unbind will result in an "Unbalanced pm_runtime_enable" error. +Subsequent unbind attempts will return a "No such device" error, while bind +attempts will return a "Resource temporarily unavailable" error. + +[ 47.592434] cadence-qspi 108d2000.spi: Runtime PM usage count underflow! +[ 49.592233] cadence-qspi 108d2000.spi: detected FIFO depth (1024) different from config (128) +[ 53.232309] cadence-qspi 108d2000.spi: Runtime PM usage count underflow! +[ 55.828550] cadence-qspi 108d2000.spi: detected FIFO depth (1024) different from config (128) +[ 57.940627] cadence-qspi 108d2000.spi: Runtime PM usage count underflow! +[ 59.912490] cadence-qspi 108d2000.spi: detected FIFO depth (1024) different from config (128) +[ 61.876243] cadence-qspi 108d2000.spi: Runtime PM usage count underflow! +[ 61.883000] platform 108d2000.spi: Unbalanced pm_runtime_enable! +[ 532.012270] cadence-qspi 108d2000.spi: probe with driver cadence-qspi failed1 + +Also, change clk_disable_unprepare() to clk_disable() since continuous +bind and unbind operations will trigger a warning indicating that the clock is +already unprepared. + +Fixes: 4892b374c9b7 ("mtd: spi-nor: cadence-quadspi: Add runtime PM support") +cc: stable@vger.kernel.org # 6.6+ +Signed-off-by: Khairul Anuar Romli +Reviewed-by: Matthew Gerlach +Link: https://patch.msgid.link/4e7a4b8aba300e629b45a04f90bddf665fbdb335.1749601877.git.khairul.anuar.romli@altera.com +Signed-off-by: Mark Brown +Signed-off-by: Greg Kroah-Hartman +--- + drivers/spi/spi-cadence-quadspi.c | 12 +++++++----- + 1 file changed, 7 insertions(+), 5 deletions(-) + +--- a/drivers/spi/spi-cadence-quadspi.c ++++ b/drivers/spi/spi-cadence-quadspi.c +@@ -1958,10 +1958,10 @@ static int cqspi_probe(struct platform_d + goto probe_setup_failed; + } + +- ret = devm_pm_runtime_enable(dev); +- if (ret) { +- if (cqspi->rx_chan) +- dma_release_channel(cqspi->rx_chan); ++ pm_runtime_enable(dev); ++ ++ if (cqspi->rx_chan) { ++ dma_release_channel(cqspi->rx_chan); + goto probe_setup_failed; + } + +@@ -1981,6 +1981,7 @@ static int cqspi_probe(struct platform_d + return 0; + probe_setup_failed: + cqspi_controller_enable(cqspi, 0); ++ pm_runtime_disable(dev); + probe_reset_failed: + if (cqspi->is_jh7110) + cqspi_jh7110_disable_clk(pdev, cqspi); +@@ -1999,7 +2000,8 @@ static void cqspi_remove(struct platform + if (cqspi->rx_chan) + dma_release_channel(cqspi->rx_chan); + +- clk_disable_unprepare(cqspi->clk); ++ if (pm_runtime_get_sync(&pdev->dev) >= 0) ++ clk_disable(cqspi->clk); + + if (cqspi->is_jh7110) + cqspi_jh7110_disable_clk(pdev, cqspi); diff --git a/queue-6.15/staging-rtl8723bs-avoid-memset-in-aes_cipher-and-aes_decipher.patch b/queue-6.15/staging-rtl8723bs-avoid-memset-in-aes_cipher-and-aes_decipher.patch new file mode 100644 index 0000000000..038aa5c1b4 --- /dev/null +++ b/queue-6.15/staging-rtl8723bs-avoid-memset-in-aes_cipher-and-aes_decipher.patch @@ -0,0 +1,137 @@ +From a55bc4ffc06d8c965a7d6f0a01ed0ed41380df28 Mon Sep 17 00:00:00 2001 +From: Nathan Chancellor +Date: Mon, 9 Jun 2025 14:13:14 -0700 +Subject: staging: rtl8723bs: Avoid memset() in aes_cipher() and aes_decipher() + +From: Nathan Chancellor + +commit a55bc4ffc06d8c965a7d6f0a01ed0ed41380df28 upstream. + +After commit 6f110a5e4f99 ("Disable SLUB_TINY for build testing"), which +causes CONFIG_KASAN to be enabled in allmodconfig again, arm64 +allmodconfig builds with older versions of clang (15 through 17) show an +instance of -Wframe-larger-than (which breaks the build with +CONFIG_WERROR=y): + + drivers/staging/rtl8723bs/core/rtw_security.c:1287:5: error: stack frame size (2208) exceeds limit (2048) in 'rtw_aes_decrypt' [-Werror,-Wframe-larger-than] + 1287 | u32 rtw_aes_decrypt(struct adapter *padapter, u8 *precvframe) + | ^ + +This comes from aes_decipher() being inlined in rtw_aes_decrypt(). +Running the same build with CONFIG_FRAME_WARN=128 shows aes_cipher() +also uses a decent amount of stack, just under the limit of 2048: + + drivers/staging/rtl8723bs/core/rtw_security.c:864:19: warning: stack frame size (1952) exceeds limit (128) in 'aes_cipher' [-Wframe-larger-than] + 864 | static signed int aes_cipher(u8 *key, uint hdrlen, + | ^ + +-Rpass-analysis=stack-frame-layout only shows one large structure on the +stack, which is the ctx variable inlined from aes128k128d(). A good +number of the other variables come from the additional checks of +fortified string routines, which are present in memset(), which both +aes_cipher() and aes_decipher() use to initialize some temporary +buffers. In this case, since the size is known at compile time, these +additional checks should not result in any code generation changes but +allmodconfig has several sanitizers enabled, which may make it harder +for the compiler to eliminate the compile time checks and the variables +that come about from them. + +The memset() calls are just initializing these buffers to zero, so use +'= {}' instead, which is used all over the kernel and does the exact +same thing as memset() without the fortify checks, which drops the stack +usage of these functions by a few hundred kilobytes. + + drivers/staging/rtl8723bs/core/rtw_security.c:864:19: warning: stack frame size (1584) exceeds limit (128) in 'aes_cipher' [-Wframe-larger-than] + 864 | static signed int aes_cipher(u8 *key, uint hdrlen, + | ^ + drivers/staging/rtl8723bs/core/rtw_security.c:1271:5: warning: stack frame size (1456) exceeds limit (128) in 'rtw_aes_decrypt' [-Wframe-larger-than] + 1271 | u32 rtw_aes_decrypt(struct adapter *padapter, u8 *precvframe) + | ^ + +Cc: stable@vger.kernel.org +Fixes: 554c0a3abf21 ("staging: Add rtl8723bs sdio wifi driver") +Signed-off-by: Nathan Chancellor +Reviewed-by: Dan Carpenter +Link: https://lore.kernel.org/r/20250609-rtl8723bs-fix-clang-arm64-wflt-v1-1-e2accba43def@kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + drivers/staging/rtl8723bs/core/rtw_security.c | 44 ++++++++------------------ + 1 file changed, 14 insertions(+), 30 deletions(-) + +--- a/drivers/staging/rtl8723bs/core/rtw_security.c ++++ b/drivers/staging/rtl8723bs/core/rtw_security.c +@@ -868,29 +868,21 @@ static signed int aes_cipher(u8 *key, ui + num_blocks, payload_index; + + u8 pn_vector[6]; +- u8 mic_iv[16]; +- u8 mic_header1[16]; +- u8 mic_header2[16]; +- u8 ctr_preload[16]; ++ u8 mic_iv[16] = {}; ++ u8 mic_header1[16] = {}; ++ u8 mic_header2[16] = {}; ++ u8 ctr_preload[16] = {}; + + /* Intermediate Buffers */ +- u8 chain_buffer[16]; +- u8 aes_out[16]; +- u8 padded_buffer[16]; ++ u8 chain_buffer[16] = {}; ++ u8 aes_out[16] = {}; ++ u8 padded_buffer[16] = {}; + u8 mic[8]; + uint frtype = GetFrameType(pframe); + uint frsubtype = GetFrameSubType(pframe); + + frsubtype = frsubtype>>4; + +- memset((void *)mic_iv, 0, 16); +- memset((void *)mic_header1, 0, 16); +- memset((void *)mic_header2, 0, 16); +- memset((void *)ctr_preload, 0, 16); +- memset((void *)chain_buffer, 0, 16); +- memset((void *)aes_out, 0, 16); +- memset((void *)padded_buffer, 0, 16); +- + if ((hdrlen == WLAN_HDR_A3_LEN) || (hdrlen == WLAN_HDR_A3_QOS_LEN)) + a4_exists = 0; + else +@@ -1080,15 +1072,15 @@ static signed int aes_decipher(u8 *key, + num_blocks, payload_index; + signed int res = _SUCCESS; + u8 pn_vector[6]; +- u8 mic_iv[16]; +- u8 mic_header1[16]; +- u8 mic_header2[16]; +- u8 ctr_preload[16]; ++ u8 mic_iv[16] = {}; ++ u8 mic_header1[16] = {}; ++ u8 mic_header2[16] = {}; ++ u8 ctr_preload[16] = {}; + + /* Intermediate Buffers */ +- u8 chain_buffer[16]; +- u8 aes_out[16]; +- u8 padded_buffer[16]; ++ u8 chain_buffer[16] = {}; ++ u8 aes_out[16] = {}; ++ u8 padded_buffer[16] = {}; + u8 mic[8]; + + uint frtype = GetFrameType(pframe); +@@ -1096,14 +1088,6 @@ static signed int aes_decipher(u8 *key, + + frsubtype = frsubtype>>4; + +- memset((void *)mic_iv, 0, 16); +- memset((void *)mic_header1, 0, 16); +- memset((void *)mic_header2, 0, 16); +- memset((void *)ctr_preload, 0, 16); +- memset((void *)chain_buffer, 0, 16); +- memset((void *)aes_out, 0, 16); +- memset((void *)padded_buffer, 0, 16); +- + /* start to decrypt the payload */ + + num_blocks = (plen-8) / 16; /* plen including LLC, payload_length and mic) */ diff --git a/queue-6.15/x86-traps-initialize-dr6-by-writing-its-architectural-reset-value.patch b/queue-6.15/x86-traps-initialize-dr6-by-writing-its-architectural-reset-value.patch new file mode 100644 index 0000000000..f840f88644 --- /dev/null +++ b/queue-6.15/x86-traps-initialize-dr6-by-writing-its-architectural-reset-value.patch @@ -0,0 +1,206 @@ +From 5f465c148c61e876b6d6eacd8e8e365f2d47758f Mon Sep 17 00:00:00 2001 +From: "Xin Li (Intel)" +Date: Fri, 20 Jun 2025 16:15:03 -0700 +Subject: x86/traps: Initialize DR6 by writing its architectural reset value + +From: Xin Li (Intel) + +commit 5f465c148c61e876b6d6eacd8e8e365f2d47758f upstream. + +Initialize DR6 by writing its architectural reset value to avoid +incorrectly zeroing DR6 to clear DR6.BLD at boot time, which leads +to a false bus lock detected warning. + +The Intel SDM says: + + 1) Certain debug exceptions may clear bits 0-3 of DR6. + + 2) BLD induced #DB clears DR6.BLD and any other debug exception + doesn't modify DR6.BLD. + + 3) RTM induced #DB clears DR6.RTM and any other debug exception + sets DR6.RTM. + + To avoid confusion in identifying debug exceptions, debug handlers + should set DR6.BLD and DR6.RTM, and clear other DR6 bits before + returning. + +The DR6 architectural reset value 0xFFFF0FF0, already defined as +macro DR6_RESERVED, satisfies these requirements, so just use it to +reinitialize DR6 whenever needed. + +Since clear_all_debug_regs() no longer zeros all debug registers, +rename it to initialize_debug_regs() to better reflect its current +behavior. + +Since debug_read_clear_dr6() no longer clears DR6, rename it to +debug_read_reset_dr6() to better reflect its current behavior. + +Fixes: ebb1064e7c2e9 ("x86/traps: Handle #DB for bus lock") +Reported-by: Sohil Mehta +Suggested-by: H. Peter Anvin (Intel) +Signed-off-by: Xin Li (Intel) +Signed-off-by: Dave Hansen +Reviewed-by: H. Peter Anvin (Intel) +Reviewed-by: Sohil Mehta +Acked-by: Peter Zijlstra (Intel) +Tested-by: Sohil Mehta +Link: https://lore.kernel.org/lkml/06e68373-a92b-472e-8fd9-ba548119770c@intel.com/ +Cc:stable@vger.kernel.org +Link: https://lore.kernel.org/all/20250620231504.2676902-2-xin%40zytor.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/uapi/asm/debugreg.h | 21 ++++++++++++++++++++- + arch/x86/kernel/cpu/common.c | 24 ++++++++++-------------- + arch/x86/kernel/traps.c | 34 +++++++++++++++++++++------------- + 3 files changed, 51 insertions(+), 28 deletions(-) + +--- a/arch/x86/include/uapi/asm/debugreg.h ++++ b/arch/x86/include/uapi/asm/debugreg.h +@@ -15,7 +15,26 @@ + which debugging register was responsible for the trap. The other bits + are either reserved or not of interest to us. */ + +-/* Define reserved bits in DR6 which are always set to 1 */ ++/* ++ * Define bits in DR6 which are set to 1 by default. ++ * ++ * This is also the DR6 architectural value following Power-up, Reset or INIT. ++ * ++ * Note, with the introduction of Bus Lock Detection (BLD) and Restricted ++ * Transactional Memory (RTM), the DR6 register has been modified: ++ * ++ * 1) BLD flag (bit 11) is no longer reserved to 1 if the CPU supports ++ * Bus Lock Detection. The assertion of a bus lock could clear it. ++ * ++ * 2) RTM flag (bit 16) is no longer reserved to 1 if the CPU supports ++ * restricted transactional memory. #DB occurred inside an RTM region ++ * could clear it. ++ * ++ * Apparently, DR6.BLD and DR6.RTM are active low bits. ++ * ++ * As a result, DR6_RESERVED is an incorrect name now, but it is kept for ++ * compatibility. ++ */ + #define DR6_RESERVED (0xFFFF0FF0) + + #define DR_TRAP0 (0x1) /* db0 */ +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -2205,20 +2205,16 @@ EXPORT_PER_CPU_SYMBOL(__stack_chk_guard) + #endif + #endif + +-/* +- * Clear all 6 debug registers: +- */ +-static void clear_all_debug_regs(void) ++static void initialize_debug_regs(void) + { +- int i; +- +- for (i = 0; i < 8; i++) { +- /* Ignore db4, db5 */ +- if ((i == 4) || (i == 5)) +- continue; +- +- set_debugreg(0, i); +- } ++ /* Control register first -- to make sure everything is disabled. */ ++ set_debugreg(0, 7); ++ set_debugreg(DR6_RESERVED, 6); ++ /* dr5 and dr4 don't exist */ ++ set_debugreg(0, 3); ++ set_debugreg(0, 2); ++ set_debugreg(0, 1); ++ set_debugreg(0, 0); + } + + #ifdef CONFIG_KGDB +@@ -2379,7 +2375,7 @@ void cpu_init(void) + + load_mm_ldt(&init_mm); + +- clear_all_debug_regs(); ++ initialize_debug_regs(); + dbg_restore_debug_regs(); + + doublefault_init_cpu_tss(); +--- a/arch/x86/kernel/traps.c ++++ b/arch/x86/kernel/traps.c +@@ -1021,24 +1021,32 @@ static bool is_sysenter_singlestep(struc + #endif + } + +-static __always_inline unsigned long debug_read_clear_dr6(void) ++static __always_inline unsigned long debug_read_reset_dr6(void) + { + unsigned long dr6; + ++ get_debugreg(dr6, 6); ++ dr6 ^= DR6_RESERVED; /* Flip to positive polarity */ ++ + /* + * The Intel SDM says: + * +- * Certain debug exceptions may clear bits 0-3. The remaining +- * contents of the DR6 register are never cleared by the +- * processor. To avoid confusion in identifying debug +- * exceptions, debug handlers should clear the register before +- * returning to the interrupted task. ++ * Certain debug exceptions may clear bits 0-3 of DR6. ++ * ++ * BLD induced #DB clears DR6.BLD and any other debug ++ * exception doesn't modify DR6.BLD. + * +- * Keep it simple: clear DR6 immediately. ++ * RTM induced #DB clears DR6.RTM and any other debug ++ * exception sets DR6.RTM. ++ * ++ * To avoid confusion in identifying debug exceptions, ++ * debug handlers should set DR6.BLD and DR6.RTM, and ++ * clear other DR6 bits before returning. ++ * ++ * Keep it simple: write DR6 with its architectural reset ++ * value 0xFFFF0FF0, defined as DR6_RESERVED, immediately. + */ +- get_debugreg(dr6, 6); + set_debugreg(DR6_RESERVED, 6); +- dr6 ^= DR6_RESERVED; /* Flip to positive polarity */ + + return dr6; + } +@@ -1238,13 +1246,13 @@ out: + /* IST stack entry */ + DEFINE_IDTENTRY_DEBUG(exc_debug) + { +- exc_debug_kernel(regs, debug_read_clear_dr6()); ++ exc_debug_kernel(regs, debug_read_reset_dr6()); + } + + /* User entry, runs on regular task stack */ + DEFINE_IDTENTRY_DEBUG_USER(exc_debug) + { +- exc_debug_user(regs, debug_read_clear_dr6()); ++ exc_debug_user(regs, debug_read_reset_dr6()); + } + + #ifdef CONFIG_X86_FRED +@@ -1263,7 +1271,7 @@ DEFINE_FREDENTRY_DEBUG(exc_debug) + { + /* + * FRED #DB stores DR6 on the stack in the format which +- * debug_read_clear_dr6() returns for the IDT entry points. ++ * debug_read_reset_dr6() returns for the IDT entry points. + */ + unsigned long dr6 = fred_event_data(regs); + +@@ -1278,7 +1286,7 @@ DEFINE_FREDENTRY_DEBUG(exc_debug) + /* 32 bit does not have separate entry points. */ + DEFINE_IDTENTRY_RAW(exc_debug) + { +- unsigned long dr6 = debug_read_clear_dr6(); ++ unsigned long dr6 = debug_read_reset_dr6(); + + if (user_mode(regs)) + exc_debug_user(regs, dr6);