From: Greg Kroah-Hartman Date: Tue, 12 May 2026 14:28:32 +0000 (+0200) Subject: 7.0-stable patches X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=dfb22b9157685df271c4e0285c34d48fb62bbb3d;p=thirdparty%2Fkernel%2Fstable-queue.git 7.0-stable patches added patches: f2fs-add-read_once-for-i_blocks-in-f2fs_update_inode.patch f2fs-fix-false-alarm-of-lockdep-on-cp_global_sem-lock.patch f2fs-fix-fiemap-boundary-handling-when-read-extent-cache-is-incomplete.patch f2fs-fix-fsck-inconsistency-caused-by-fggc-of-node-block.patch f2fs-fix-fsck-inconsistency-caused-by-incorrect-nat_entry-flag-usage.patch f2fs-fix-incorrect-file-address-mapping-when-inline-inode-is-unwritten.patch f2fs-fix-incorrect-multidevice-info-in-trace_f2fs_map_blocks.patch f2fs-fix-inline-data-not-being-written-to-disk-in-writeback-path.patch f2fs-fix-node_cnt-race-between-extent-node-destroy-and-writeback.patch f2fs-fix-uninitialized-kobject-put-in-f2fs_init_sysfs.patch f2fs-refactor-f2fs_move_node_folio-function.patch kvm-arm64-fix-feat_debugv8p9-to-check-debugver-not-pmuver.patch kvm-arm64-fix-feat_spe_fne-to-use-pmsidr_el1.fne-not-pmsver.patch kvm-arm64-fix-initialisation-order-in-__pkvm_init_finalise.patch kvm-arm64-fix-pin-leak-and-publication-ordering-in-__pkvm_init_vcpu.patch kvm-arm64-vgic-fix-iidr-revision-field-extracted-from-wrong-value.patch kvm-arm64-wake-up-from-wfi-when-iqrchip-is-in-userspace.patch loongarch-fix-potential-ade-in-loongson_gpu_fixup_dma_hang.patch loongarch-kvm-cap-kvm_cap_nr_vcpus-by-kvm_cap_max_vcpus.patch loongarch-kvm-fix-hw-timer-interrupt-lost-when-inject-interrupt-by-software.patch loongarch-kvm-fix-unreliable-stack-for-kvm_exc_entry.patch loongarch-kvm-move-unconditional-delay-into-timer-clear-scenery.patch loongarch-kvm-use-kvm_set_pte-in-kvm_flush_pte.patch loongarch-use-per-root-bridge-pcih-flag-to-skip-mem-resource-fixup.patch mptcp-fastclose-msk-when-linger-time-is-0.patch mptcp-fix-rx-timestamp-corruption-on-fastopen.patch mptcp-fix-scheduling-with-atomic-in-timestamp-sockopt.patch mptcp-pm-add_addr-rtx-allow-id-0.patch mptcp-pm-add_addr-rtx-always-decrease-sk-refcount.patch mptcp-pm-add_addr-rtx-fix-potential-data-race.patch mptcp-pm-add_addr-rtx-free-sk-if-last.patch mptcp-pm-add_addr-rtx-resched-blocked-add_addr-quicker.patch mptcp-pm-add_addr-rtx-return-early-if-no-retrans.patch mptcp-pm-kernel-correctly-retransmit-add_addr-id-0.patch mptcp-pm-kernel-reset-fullmesh-counter-after-flush.patch mptcp-pm-prio-skip-closed-subflows.patch mptcp-sockopt-increase-seq-in-mptcp_setsockopt_all_sf.patch mptcp-sockopt-set-timestamp-flags-on-subflow-socket-not-msk.patch mptcp-use-mpjoinsynackhmacfailure-for-synack-hmac-failure.patch mptcp-use-mptcp_rst_emptcp-for-ack-hmac-validation-failure.patch pci-aer-clear-only-error-bits-in-pcie-device-status.patch pci-aer-stop-ruling-out-unbound-devices-as-error-source.patch pci-aspm-fix-pci_clear_and_set_config_dword-usage.patch perf-x86-intel-always-reprogram-acr-events-to-prevent-stale-masks.patch perf-x86-intel-disable-pmi-for-self-reloaded-acr-events.patch perf-x86-intel-enable-auto-counter-reload-for-dmr.patch power-supply-max17042-avoid-overflow-when-determining-health.patch powerpc-xive-fix-kmemleak-caused-by-incorrect-chip_data-lookup.patch rdma-ionic-bound-node_desc-sysfs-read-with-.64s.patch rdma-ionic-fix-typo-in-format-string.patch rdma-mana-fix-error-unwind-in-mana_ib_create_qp_rss.patch rdma-mana-fix-mana_destroy_wq_obj-cleanup-in-mana_ib_create_qp_rss.patch rdma-mana-remove-user-triggerable-warn_on-in-mana_ib_create_qp_rss.patch rdma-mana-validate-rx_hash_key_len.patch rdma-mlx4-fix-mis-use-of-rcu-in-mlx4_srq_event.patch rdma-mlx4-fix-resource-leak-on-error-in-mlx4_ib_create_srq.patch rdma-mlx5-fix-error-path-fall-through-in-mlx5_ib_dev_res_srq_init.patch rdma-ocrdma-don-t-null-deref-uctx-on-errors-in-ocrdma_copy_pd_uresp.patch rdma-rxe-reject-non-8-byte-atomic_write-payloads.patch rdma-rxe-reject-unknown-opcodes-before-icrc-processing.patch rdma-vmw_pvrdma-fix-double-free-on-pvrdma_alloc_ucontext-error-path.patch remoteproc-imx_rproc-fix-null-vs-is_err-bug-in-imx_rproc_addr_init.patch remoteproc-k3-fix-null-vs-is_err-bug-in-k3_reserved_mem_init.patch sched_ext-idle-recheck-prev_cpu-after-narrowing-allowed-mask.patch sched_ext-skip-tasks-with-stale-task_rq-in-bypass_lb_cpu.patch sched_ext-use-dsq-first_task-instead-of-list_empty-in-dispatch_enqueue-fifo-tail.patch selftests-mptcp-check-output-catch-cmd-errors.patch selftests-mptcp-pm-restrict-unknown-check-to-pm_nl_ctl.patch --- diff --git a/queue-7.0/f2fs-add-read_once-for-i_blocks-in-f2fs_update_inode.patch b/queue-7.0/f2fs-add-read_once-for-i_blocks-in-f2fs_update_inode.patch new file mode 100644 index 0000000000..1e12a8c4ad --- /dev/null +++ b/queue-7.0/f2fs-add-read_once-for-i_blocks-in-f2fs_update_inode.patch @@ -0,0 +1,38 @@ +From 5471834a96fb697874be2ca0b052e74bcf3c23d1 Mon Sep 17 00:00:00 2001 +From: Cen Zhang +Date: Wed, 18 Mar 2026 15:32:53 +0800 +Subject: f2fs: add READ_ONCE() for i_blocks in f2fs_update_inode() + +From: Cen Zhang + +commit 5471834a96fb697874be2ca0b052e74bcf3c23d1 upstream. + +f2fs_update_inode() reads inode->i_blocks without holding i_lock to +serialize it to the on-disk inode, while concurrent truncate or +allocation paths may modify i_blocks under i_lock. Since blkcnt_t is +u64, this risks torn reads on 32-bit architectures. + +Following the approach in ext4_inode_blocks_set(), add READ_ONCE() to prevent +potential compiler-induced tearing. + +Fixes: 19f99cee206c ("f2fs: add core inode operations") +Cc: stable@vger.kernel.org +Signed-off-by: Cen Zhang +Reviewed-by: Chao Yu +Signed-off-by: Jaegeuk Kim +Signed-off-by: Greg Kroah-Hartman +--- + fs/f2fs/inode.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/f2fs/inode.c ++++ b/fs/f2fs/inode.c +@@ -687,7 +687,7 @@ void f2fs_update_inode(struct inode *ino + ri->i_uid = cpu_to_le32(i_uid_read(inode)); + ri->i_gid = cpu_to_le32(i_gid_read(inode)); + ri->i_links = cpu_to_le32(inode->i_nlink); +- ri->i_blocks = cpu_to_le64(SECTOR_TO_BLOCK(inode->i_blocks) + 1); ++ ri->i_blocks = cpu_to_le64(SECTOR_TO_BLOCK(READ_ONCE(inode->i_blocks)) + 1); + + if (!f2fs_is_atomic_file(inode) || + is_inode_flag_set(inode, FI_ATOMIC_COMMITTED)) diff --git a/queue-7.0/f2fs-fix-false-alarm-of-lockdep-on-cp_global_sem-lock.patch b/queue-7.0/f2fs-fix-false-alarm-of-lockdep-on-cp_global_sem-lock.patch new file mode 100644 index 0000000000..49f2c9e8d1 --- /dev/null +++ b/queue-7.0/f2fs-fix-false-alarm-of-lockdep-on-cp_global_sem-lock.patch @@ -0,0 +1,97 @@ +From 6a5e3de9c2bb0b691d16789a5d19e9276a09b308 Mon Sep 17 00:00:00 2001 +From: Chao Yu +Date: Fri, 6 Mar 2026 12:24:20 +0000 +Subject: f2fs: fix false alarm of lockdep on cp_global_sem lock + +From: Chao Yu + +commit 6a5e3de9c2bb0b691d16789a5d19e9276a09b308 upstream. + +lockdep reported a potential deadlock: + +a) TCMU device removal context: + - call del_gendisk() to get q->q_usage_counter + - call start_flush_work() to get work_completion of wb->dwork +b) f2fs writeback context: + - in wb_workfn(), which holds work_completion of wb->dwork + - call f2fs_balance_fs() to get sbi->gc_lock +c) f2fs vfs_write context: + - call f2fs_gc() to get sbi->gc_lock + - call f2fs_write_checkpoint() to get sbi->cp_global_sem +d) f2fs mount context: + - call recover_fsync_data() to get sbi->cp_global_sem + - call f2fs_check_and_fix_write_pointer() to call blkdev_report_zones() + that goes down to blk_mq_alloc_request and get q->q_usage_counter + +Original callstack is in Closes tag. + +However, I think this is a false alarm due to before mount returns +successfully (context d), we can not access file therein via vfs_write +(context c). + +Let's introduce per-sb cp_global_sem_key, and assign the key for +cp_global_sem, so that lockdep can recognize cp_global_sem from +different super block correctly. + +A lot of work are done by Shin'ichiro Kawasaki, thanks a lot for +the work. + +Fixes: c426d99127b1 ("f2fs: Check write pointer consistency of open zones") +Cc: stable@kernel.org +Reported-and-tested-by: Shin'ichiro Kawasaki +Closes: https://lore.kernel.org/linux-f2fs-devel/20260218125237.3340441-1-shinichiro.kawasaki@wdc.com +Signed-off-by: Shin'ichiro Kawasaki +Signed-off-by: Chao Yu +Signed-off-by: Jaegeuk Kim +Signed-off-by: Greg Kroah-Hartman +--- + fs/f2fs/f2fs.h | 3 +++ + fs/f2fs/super.c | 11 +++++++++++ + 2 files changed, 14 insertions(+) + +--- a/fs/f2fs/f2fs.h ++++ b/fs/f2fs/f2fs.h +@@ -2042,6 +2042,9 @@ struct f2fs_sb_info { + spinlock_t iostat_lat_lock; + struct iostat_lat_info *iostat_io_lat; + #endif ++#ifdef CONFIG_DEBUG_LOCK_ALLOC ++ struct lock_class_key cp_global_sem_key; ++#endif + }; + + /* Definitions to access f2fs_sb_info */ +--- a/fs/f2fs/super.c ++++ b/fs/f2fs/super.c +@@ -4953,6 +4953,11 @@ try_onemore: + init_f2fs_rwsem_trace(&sbi->gc_lock, sbi, LOCK_NAME_GC_LOCK); + mutex_init(&sbi->writepages); + init_f2fs_rwsem_trace(&sbi->cp_global_sem, sbi, LOCK_NAME_CP_GLOBAL); ++#ifdef CONFIG_DEBUG_LOCK_ALLOC ++ lockdep_register_key(&sbi->cp_global_sem_key); ++ lockdep_set_class(&sbi->cp_global_sem.internal_rwsem, ++ &sbi->cp_global_sem_key); ++#endif + init_f2fs_rwsem_trace(&sbi->node_write, sbi, LOCK_NAME_NODE_WRITE); + init_f2fs_rwsem_trace(&sbi->node_change, sbi, LOCK_NAME_NODE_CHANGE); + spin_lock_init(&sbi->stat_lock); +@@ -5424,6 +5429,9 @@ free_options: + free_sb_buf: + kfree(raw_super); + free_sbi: ++#ifdef CONFIG_DEBUG_LOCK_ALLOC ++ lockdep_unregister_key(&sbi->cp_global_sem_key); ++#endif + kfree(sbi); + sb->s_fs_info = NULL; + +@@ -5505,6 +5513,9 @@ static void kill_f2fs_super(struct super + /* Release block devices last, after fscrypt_destroy_keyring(). */ + if (sbi) { + destroy_device_list(sbi); ++#ifdef CONFIG_DEBUG_LOCK_ALLOC ++ lockdep_unregister_key(&sbi->cp_global_sem_key); ++#endif + kfree(sbi); + sb->s_fs_info = NULL; + } diff --git a/queue-7.0/f2fs-fix-fiemap-boundary-handling-when-read-extent-cache-is-incomplete.patch b/queue-7.0/f2fs-fix-fiemap-boundary-handling-when-read-extent-cache-is-incomplete.patch new file mode 100644 index 0000000000..d11af427bb --- /dev/null +++ b/queue-7.0/f2fs-fix-fiemap-boundary-handling-when-read-extent-cache-is-incomplete.patch @@ -0,0 +1,99 @@ +From 95e159ad3e52f7478cfd22e44ec37c9f334f8993 Mon Sep 17 00:00:00 2001 +From: Yongpeng Yang +Date: Mon, 23 Mar 2026 20:06:24 +0800 +Subject: f2fs: fix fiemap boundary handling when read extent cache is incomplete +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Yongpeng Yang + +commit 95e159ad3e52f7478cfd22e44ec37c9f334f8993 upstream. + +f2fs_fiemap() calls f2fs_map_blocks() to obtain the block mapping a +file, and then merges contiguous mappings into extents. If the mapping +is found in the read extent cache, node blocks do not need to be read. +However, in the following scenario, a contiguous extent can be split +into two extents: + +$ dd if=/dev/zero of=data.128M bs=1M count=128 +$ losetup -f data.128M +$ mkfs.f2fs /dev/loop0 -f +$ mount -o mode=lfs /dev/loop0 /mnt/f2fs/ +$ cd /mnt/f2fs/ +$ dd if=/dev/zero of=data.72M bs=1M count=72 && sync +$ dd if=/dev/zero of=data.4M bs=1M count=4 && sync +$ dd if=/dev/zero of=data.4M bs=1M count=2 seek=2 conv=notrunc && sync +$ echo 3 > /proc/sys/vm/drop_caches +$ dd if=/dev/zero of=data.4M bs=1M count=2 seek=0 conv=notrunc && sync +$ dd if=/dev/zero of=data.4M bs=1M count=2 seek=0 conv=notrunc && sync +$ f2fs_io fiemap 0 1024 data.4M +Fiemap: offset = 0 len = 1024 +logical addr. physical addr. length flags +0 0000000000000000 0000000006400000 0000000000200000 00001000 +1 0000000000200000 0000000006600000 0000000000200000 00001001 + +Although the physical addresses of the ranges 0~2MB and 2M~4MB are +contiguous, the mapping for the 2M~4MB range is not present in memory. +When the physical addresses for the 0~2MB range are updated, no merge +happens because the adjacent mapping is missing from the in-memory +cache. As a result, fiemap reports two separate extents instead of a +single contiguous one. + +The root cause is that the read extent cache does not guarantee that all +blocks of an extent are present in memory. Therefore, when the extent +length returned by f2fs_map_blocks_cached() is smaller than maxblocks, +the remaining mappings are retrieved via f2fs_get_dnode_of_data() to +ensure correct fiemap extent boundary handling. + +Cc: stable@kernel.org +Fixes: cd8fc5226bef ("f2fs: remove the create argument to f2fs_map_blocks") +Signed-off-by: Yongpeng Yang +Reviewed-by: Chao Yu +Signed-off-by: Jaegeuk Kim +Signed-off-by: Greg Kroah-Hartman +--- + fs/f2fs/data.c | 25 ++++++++++++++++++++++--- + 1 file changed, 22 insertions(+), 3 deletions(-) + +--- a/fs/f2fs/data.c ++++ b/fs/f2fs/data.c +@@ -1636,8 +1636,26 @@ int f2fs_map_blocks(struct inode *inode, + lfs_dio_write = (flag == F2FS_GET_BLOCK_DIO && f2fs_lfs_mode(sbi) && + map->m_may_create); + +- if (!map->m_may_create && f2fs_map_blocks_cached(inode, map, flag)) +- goto out; ++ if (!map->m_may_create && f2fs_map_blocks_cached(inode, map, flag)) { ++ struct extent_info ei; ++ ++ /* ++ * 1. If map->m_multidev_dio is true, map->m_pblk cannot be ++ * waitted by f2fs_wait_on_block_writeback_range() and are not ++ * mergeable. ++ * 2. If pgofs hits the read extent cache, it means the mapping ++ * is already cached in the extent cache, but it is not ++ * mergeable, and there is no need to query the mapping again ++ * via f2fs_get_dnode_of_data(). ++ */ ++ pgofs = (pgoff_t)map->m_lblk + map->m_len; ++ if (map->m_len == maxblocks || ++ map->m_multidev_dio || ++ f2fs_lookup_read_extent_cache(inode, pgofs, &ei)) ++ goto out; ++ ofs = map->m_len; ++ goto map_more; ++ } + + map->m_bdev = inode->i_sb->s_bdev; + map->m_multidev_dio = +@@ -1648,7 +1666,8 @@ int f2fs_map_blocks(struct inode *inode, + + /* it only supports block size == page size */ + pgofs = (pgoff_t)map->m_lblk; +- end = pgofs + maxblocks; ++map_more: ++ end = (pgoff_t)map->m_lblk + maxblocks; + + if (flag == F2FS_GET_BLOCK_PRECACHE) + mode = LOOKUP_NODE_RA; diff --git a/queue-7.0/f2fs-fix-fsck-inconsistency-caused-by-fggc-of-node-block.patch b/queue-7.0/f2fs-fix-fsck-inconsistency-caused-by-fggc-of-node-block.patch new file mode 100644 index 0000000000..69164b135d --- /dev/null +++ b/queue-7.0/f2fs-fix-fsck-inconsistency-caused-by-fggc-of-node-block.patch @@ -0,0 +1,120 @@ +From c3e238bd1f56993f205ef83889d406dfeaf717a8 Mon Sep 17 00:00:00 2001 +From: Yongpeng Yang +Date: Wed, 18 Mar 2026 16:45:34 +0800 +Subject: f2fs: fix fsck inconsistency caused by FGGC of node block + +From: Yongpeng Yang + +commit c3e238bd1f56993f205ef83889d406dfeaf717a8 upstream. + +During FGGC node block migration, fsck may incorrectly treat the +migrated node block as fsync-written data. + +The reproduction scenario: +root@vm:/mnt/f2fs# seq 1 2048 | xargs -n 1 ./test_sync // write inline inode and sync +root@vm:/mnt/f2fs# rm -f 1 +root@vm:/mnt/f2fs# sync +root@vm:/mnt/f2fs# f2fs_io gc_range // move data block in sync mode and not write CP + SPO, "fsck --dry-run" find inode has already checkpointed but still + with DENT_BIT_SHIFT set + +The root cause is that GC does not clear the dentry mark and fsync mark +during node block migration, leading fsck to misinterpret them as +user-issued fsync writes. + +In BGGC mode, node block migration is handled by f2fs_sync_node_pages(), +which guarantees the dentry and fsync marks are cleared before writing. + +This patch move the set/clear of the fsync|dentry marks into +__write_node_folio to make the logic clearer, and ensures the +fsync|dentry mark is cleared in FGGC. + +Cc: stable@kernel.org +Fixes: da011cc0da8c ("f2fs: move node pages only in victim section during GC") +Signed-off-by: Yongpeng Yang +Reviewed-by: Chao Yu +Signed-off-by: Jaegeuk Kim +Signed-off-by: Greg Kroah-Hartman +--- + fs/f2fs/node.c | 27 +++++++++++++-------------- + 1 file changed, 13 insertions(+), 14 deletions(-) + +--- a/fs/f2fs/node.c ++++ b/fs/f2fs/node.c +@@ -1729,9 +1729,10 @@ continue_unlock: + return last_folio; + } + +-static bool __write_node_folio(struct folio *folio, bool atomic, bool *submitted, +- struct writeback_control *wbc, bool do_balance, +- enum iostat_type io_type, unsigned int *seq_id) ++static bool __write_node_folio(struct folio *folio, bool atomic, bool do_fsync, ++ bool *submitted, struct writeback_control *wbc, ++ bool do_balance, enum iostat_type io_type, ++ unsigned int *seq_id) + { + struct f2fs_sb_info *sbi = F2FS_F_SB(folio); + nid_t nid; +@@ -1804,6 +1805,8 @@ static bool __write_node_folio(struct fo + if (atomic && !test_opt(sbi, NOBARRIER)) + fio.op_flags |= REQ_PREFLUSH | REQ_FUA; + ++ set_dentry_mark(folio, false); ++ set_fsync_mark(folio, do_fsync); + if (IS_INODE(folio) && (atomic || is_fsync_dnode(folio))) + set_dentry_mark(folio, + f2fs_need_dentry_mark(sbi, ino_of_node(folio))); +@@ -1870,7 +1873,7 @@ int f2fs_write_single_node_folio(struct + goto out_folio; + } + +- if (!__write_node_folio(node_folio, false, NULL, ++ if (!__write_node_folio(node_folio, false, false, NULL, + &wbc, false, FS_GC_NODE_IO, NULL)) + err = -EAGAIN; + goto release_folio; +@@ -1917,6 +1920,7 @@ retry: + for (i = 0; i < nr_folios; i++) { + struct folio *folio = fbatch.folios[i]; + bool submitted = false; ++ bool do_fsync = false; + + if (unlikely(f2fs_cp_error(sbi))) { + f2fs_folio_put(last_folio, false); +@@ -1947,11 +1951,8 @@ continue_unlock: + + f2fs_folio_wait_writeback(folio, NODE, true, true); + +- set_fsync_mark(folio, 0); +- set_dentry_mark(folio, 0); +- + if (!atomic || folio == last_folio) { +- set_fsync_mark(folio, 1); ++ do_fsync = true; + percpu_counter_inc(&sbi->rf_node_block_count); + if (IS_INODE(folio)) { + if (is_inode_flag_set(inode, +@@ -1968,8 +1969,9 @@ continue_unlock: + + if (!__write_node_folio(folio, atomic && + folio == last_folio, +- &submitted, wbc, true, +- FS_NODE_IO, seq_id)) { ++ do_fsync, &submitted, ++ wbc, true, FS_NODE_IO, ++ seq_id)) { + f2fs_folio_put(last_folio, false); + folio_batch_release(&fbatch); + ret = -EIO; +@@ -2169,10 +2171,7 @@ write_node: + if (!folio_clear_dirty_for_io(folio)) + goto continue_unlock; + +- set_fsync_mark(folio, 0); +- set_dentry_mark(folio, 0); +- +- if (!__write_node_folio(folio, false, &submitted, ++ if (!__write_node_folio(folio, false, false, &submitted, + wbc, do_balance, io_type, NULL)) { + folio_batch_release(&fbatch); + ret = -EIO; diff --git a/queue-7.0/f2fs-fix-fsck-inconsistency-caused-by-incorrect-nat_entry-flag-usage.patch b/queue-7.0/f2fs-fix-fsck-inconsistency-caused-by-incorrect-nat_entry-flag-usage.patch new file mode 100644 index 0000000000..039b6fdd15 --- /dev/null +++ b/queue-7.0/f2fs-fix-fsck-inconsistency-caused-by-incorrect-nat_entry-flag-usage.patch @@ -0,0 +1,79 @@ +From 019f9dda7f66e55eb94cd32e1d3fff5835f73fbc Mon Sep 17 00:00:00 2001 +From: Yongpeng Yang +Date: Tue, 10 Mar 2026 17:36:12 +0800 +Subject: f2fs: fix fsck inconsistency caused by incorrect nat_entry flag usage + +From: Yongpeng Yang + +commit 019f9dda7f66e55eb94cd32e1d3fff5835f73fbc upstream. + +f2fs_need_dentry_mark() reads nat_entry flags without mutual exclusion +with the checkpoint path, which can result in an incorrect inode block +marking state. The scenario is as follows: + +create & write & fsync 'file A' write checkpoint +- f2fs_do_sync_file // inline inode + - f2fs_write_inode // inode folio is dirty + - f2fs_write_checkpoint + - f2fs_flush_merged_writes + - f2fs_sync_node_pages + - f2fs_fsync_node_pages // no dirty node + - f2fs_need_inode_block_update // return true + - f2fs_fsync_node_pages // inode dirtied + - f2fs_need_dentry_mark //return true + - f2fs_flush_nat_entries + - f2fs_write_checkpoint end + - __write_node_folio // inode with DENT_BIT_SHIFT set + SPO, "fsck --dry-run" find inode has already checkpointed but still + with DENT_BIT_SHIFT set + +The state observed by f2fs_need_dentry_mark() can differ from the state +observed in __write_node_folio() after acquiring sbi->node_write. The +root cause is that the semantics of IS_CHECKPOINTED and +HAS_FSYNCED_INODE are only guaranteed after the checkpoint write has +fully completed. + +This patch moves set_dentry_mark() into __write_node_folio() and +protects it with the sbi->node_write lock. + +Cc: stable@kernel.org +Fixes: 88bd02c9472a ("f2fs: fix conditions to remain recovery information in f2fs_sync_file") +Signed-off-by: Yongpeng Yang +Reviewed-by: Chao Yu +Signed-off-by: Jaegeuk Kim +Signed-off-by: Greg Kroah-Hartman +--- + fs/f2fs/node.c | 14 +++++--------- + 1 file changed, 5 insertions(+), 9 deletions(-) + +--- a/fs/f2fs/node.c ++++ b/fs/f2fs/node.c +@@ -1801,13 +1801,12 @@ static bool __write_node_folio(struct fo + goto redirty_out; + } + +- if (atomic) { +- if (!test_opt(sbi, NOBARRIER)) +- fio.op_flags |= REQ_PREFLUSH | REQ_FUA; +- if (IS_INODE(folio)) +- set_dentry_mark(folio, ++ if (atomic && !test_opt(sbi, NOBARRIER)) ++ fio.op_flags |= REQ_PREFLUSH | REQ_FUA; ++ ++ if (IS_INODE(folio) && (atomic || is_fsync_dnode(folio))) ++ set_dentry_mark(folio, + f2fs_need_dentry_mark(sbi, ino_of_node(folio))); +- } + + /* should add to global list before clearing PAGECACHE status */ + if (f2fs_in_warm_node_list(sbi, folio)) { +@@ -1948,9 +1947,6 @@ continue_unlock: + if (is_inode_flag_set(inode, + FI_DIRTY_INODE)) + f2fs_update_inode(inode, folio); +- if (!atomic) +- set_dentry_mark(folio, +- f2fs_need_dentry_mark(sbi, ino)); + } + /* may be written by other thread */ + if (!folio_test_dirty(folio)) diff --git a/queue-7.0/f2fs-fix-incorrect-file-address-mapping-when-inline-inode-is-unwritten.patch b/queue-7.0/f2fs-fix-incorrect-file-address-mapping-when-inline-inode-is-unwritten.patch new file mode 100644 index 0000000000..7c43b54995 --- /dev/null +++ b/queue-7.0/f2fs-fix-incorrect-file-address-mapping-when-inline-inode-is-unwritten.patch @@ -0,0 +1,63 @@ +From 68a0178981a0f493295afa29f8880246e561494c Mon Sep 17 00:00:00 2001 +From: Yongpeng Yang +Date: Tue, 3 Feb 2026 21:36:35 +0800 +Subject: f2fs: fix incorrect file address mapping when inline inode is unwritten + +From: Yongpeng Yang + +commit 68a0178981a0f493295afa29f8880246e561494c upstream. + +When `fileinfo->fi_flags` does not have the `FIEMAP_FLAG_SYNC` bit set +and inline data has not been persisted yet, the physical address of the +extent is calculated incorrectly for unwritten inline inodes. + +root@vm:/mnt/f2fs# dd if=/dev/zero of=data.3k bs=3k count=1 +root@vm:/mnt/f2fs# f2fs_io fiemap 0 100 data.3k +Fiemap: offset = 0 len = 100 + logical addr. physical addr. length flags +0 0000000000000000 00000ffffffff16c 0000000000000c00 00000301 + +This patch fixes the issue by checking if the inode's address is valid. +If the inline inode is unwritten, set the physical address to 0 and +mark the extent with `FIEMAP_EXTENT_UNKNOWN | FIEMAP_EXTENT_DELALLOC` +flags. + +Cc: stable@kernel.org +Fixes: 67f8cf3cee6f ("f2fs: support fiemap for inline_data") +Signed-off-by: Yongpeng Yang +Reviewed-by: Chao Yu +Signed-off-by: Jaegeuk Kim +Signed-off-by: Greg Kroah-Hartman +--- + fs/f2fs/inline.c | 13 +++++++++---- + 1 file changed, 9 insertions(+), 4 deletions(-) + +--- a/fs/f2fs/inline.c ++++ b/fs/f2fs/inline.c +@@ -792,7 +792,7 @@ int f2fs_read_inline_dir(struct file *fi + int f2fs_inline_data_fiemap(struct inode *inode, + struct fiemap_extent_info *fieinfo, __u64 start, __u64 len) + { +- __u64 byteaddr, ilen; ++ __u64 byteaddr = 0, ilen; + __u32 flags = FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_NOT_ALIGNED | + FIEMAP_EXTENT_LAST; + struct node_info ni; +@@ -825,9 +825,14 @@ int f2fs_inline_data_fiemap(struct inode + if (err) + goto out; + +- byteaddr = (__u64)ni.blk_addr << inode->i_sb->s_blocksize_bits; +- byteaddr += (char *)inline_data_addr(inode, ifolio) - +- (char *)F2FS_INODE(ifolio); ++ if (__is_valid_data_blkaddr(ni.blk_addr)) { ++ byteaddr = (__u64)ni.blk_addr << inode->i_sb->s_blocksize_bits; ++ byteaddr += (char *)inline_data_addr(inode, ifolio) - ++ (char *)F2FS_INODE(ifolio); ++ } else { ++ f2fs_bug_on(F2FS_I_SB(inode), ni.blk_addr != NEW_ADDR); ++ flags |= FIEMAP_EXTENT_DELALLOC | FIEMAP_EXTENT_UNKNOWN; ++ } + err = fiemap_fill_next_extent(fieinfo, start, byteaddr, ilen, flags); + trace_f2fs_fiemap(inode, start, byteaddr, ilen, flags, err); + out: diff --git a/queue-7.0/f2fs-fix-incorrect-multidevice-info-in-trace_f2fs_map_blocks.patch b/queue-7.0/f2fs-fix-incorrect-multidevice-info-in-trace_f2fs_map_blocks.patch new file mode 100644 index 0000000000..86b95bb4fd --- /dev/null +++ b/queue-7.0/f2fs-fix-incorrect-multidevice-info-in-trace_f2fs_map_blocks.patch @@ -0,0 +1,38 @@ +From eb2ca3ca983551a80e16a4a25df5a4ce59df8484 Mon Sep 17 00:00:00 2001 +From: Yongpeng Yang +Date: Mon, 23 Mar 2026 20:06:22 +0800 +Subject: f2fs: fix incorrect multidevice info in trace_f2fs_map_blocks() + +From: Yongpeng Yang + +commit eb2ca3ca983551a80e16a4a25df5a4ce59df8484 upstream. + +When f2fs_map_blocks()->f2fs_map_blocks_cached() hits the read extent +cache, map->m_multidev_dio is not updated, which leads to incorrect +multidevice information being reported by trace_f2fs_map_blocks(). + +This patch updates map->m_multidev_dio in f2fs_map_blocks_cached() when +the read extent cache is hit. + +Cc: stable@kernel.org +Fixes: 0094e98bd147 ("f2fs: factor a f2fs_map_blocks_cached helper") +Signed-off-by: Yongpeng Yang +Reviewed-by: Chao Yu +Signed-off-by: Jaegeuk Kim +Signed-off-by: Greg Kroah-Hartman +--- + fs/f2fs/data.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/fs/f2fs/data.c ++++ b/fs/f2fs/data.c +@@ -1576,7 +1576,8 @@ static bool f2fs_map_blocks_cached(struc + f2fs_wait_on_block_writeback_range(inode, + map->m_pblk, map->m_len); + +- if (f2fs_allow_multi_device_dio(sbi, flag)) { ++ map->m_multidev_dio = f2fs_allow_multi_device_dio(sbi, flag); ++ if (map->m_multidev_dio) { + int bidx = f2fs_target_device_index(sbi, map->m_pblk); + struct f2fs_dev_info *dev = &sbi->devs[bidx]; + diff --git a/queue-7.0/f2fs-fix-inline-data-not-being-written-to-disk-in-writeback-path.patch b/queue-7.0/f2fs-fix-inline-data-not-being-written-to-disk-in-writeback-path.patch new file mode 100644 index 0000000000..5d411021b7 --- /dev/null +++ b/queue-7.0/f2fs-fix-inline-data-not-being-written-to-disk-in-writeback-path.patch @@ -0,0 +1,87 @@ +From fe9b8b30b97102859a9102be7bd2a09803bd90bd Mon Sep 17 00:00:00 2001 +From: Yongpeng Yang +Date: Wed, 18 Mar 2026 16:46:35 +0800 +Subject: f2fs: fix inline data not being written to disk in writeback path + +From: Yongpeng Yang + +commit fe9b8b30b97102859a9102be7bd2a09803bd90bd upstream. + +When f2fs_fiemap() is called with `fileinfo->fi_flags` containing the +FIEMAP_FLAG_SYNC flag, it attempts to write data to disk before +retrieving file mappings via filemap_write_and_wait(). However, there is +an issue where the file does not get mapped as expected. The following +scenario can occur: + +root@vm:/mnt/f2fs# dd if=/dev/zero of=data.3k bs=3k count=1 +root@vm:/mnt/f2fs# xfs_io data.3k -c "fiemap -v 0 4096" +data.3k: + EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS + 0: [0..5]: 0..5 6 0x307 + +The root cause of this issue is that f2fs_write_single_data_page() only +calls f2fs_write_inline_data() to copy data from the data folio to the +inode folio, and it clears the dirty flag on the data folio. However, it +does not mark the data folio as writeback. When +__filemap_fdatawait_range() checks for folios with the writeback flag, +it returns early, causing f2fs_fiemap() to report that the file has no +mapping. + +To fix this issue, the solution is to call +f2fs_write_single_node_folio() in f2fs_inline_data_fiemap() when +getting fiemap with FIEMAP_FLAG_SYNC flags. This patch ensures that the +inode folio is written back and the writeback process completes before +proceeding. + +Cc: stable@kernel.org +Fixes: 9ffe0fb5f3bb ("f2fs: handle inline data operations") +Signed-off-by: Yongpeng Yang +Reviewed-by: Chao Yu +Signed-off-by: Jaegeuk Kim +Signed-off-by: Greg Kroah-Hartman +--- + fs/f2fs/f2fs.h | 2 ++ + fs/f2fs/inline.c | 9 +++++++++ + fs/f2fs/node.c | 2 +- + 3 files changed, 12 insertions(+), 1 deletion(-) + +--- a/fs/f2fs/f2fs.h ++++ b/fs/f2fs/f2fs.h +@@ -3950,6 +3950,8 @@ int f2fs_sanity_check_node_footer(struct + enum node_type ntype, bool in_irq); + struct folio *f2fs_get_inode_folio(struct f2fs_sb_info *sbi, pgoff_t ino); + struct folio *f2fs_get_xnode_folio(struct f2fs_sb_info *sbi, pgoff_t xnid); ++int f2fs_write_single_node_folio(struct folio *node_folio, int sync_mode, ++ bool mark_dirty, enum iostat_type io_type); + int f2fs_move_node_folio(struct folio *node_folio, int gc_type); + void f2fs_flush_inline_data(struct f2fs_sb_info *sbi); + int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, +--- a/fs/f2fs/inline.c ++++ b/fs/f2fs/inline.c +@@ -814,6 +814,15 @@ int f2fs_inline_data_fiemap(struct inode + goto out; + } + ++ if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC) { ++ err = f2fs_write_single_node_folio(ifolio, true, false, FS_NODE_IO); ++ if (err) ++ return err; ++ ifolio = f2fs_get_inode_folio(F2FS_I_SB(inode), inode->i_ino); ++ if (IS_ERR(ifolio)) ++ return PTR_ERR(ifolio); ++ f2fs_folio_wait_writeback(ifolio, NODE, true, true); ++ } + ilen = min_t(size_t, MAX_INLINE_DATA(inode), i_size_read(inode)); + if (start >= ilen) + goto out; +--- a/fs/f2fs/node.c ++++ b/fs/f2fs/node.c +@@ -1842,7 +1842,7 @@ redirty_out: + return false; + } + +-static int f2fs_write_single_node_folio(struct folio *node_folio, int sync_mode, ++int f2fs_write_single_node_folio(struct folio *node_folio, int sync_mode, + bool mark_dirty, enum iostat_type io_type) + { + int err = 0; diff --git a/queue-7.0/f2fs-fix-node_cnt-race-between-extent-node-destroy-and-writeback.patch b/queue-7.0/f2fs-fix-node_cnt-race-between-extent-node-destroy-and-writeback.patch new file mode 100644 index 0000000000..8e1c4b038f --- /dev/null +++ b/queue-7.0/f2fs-fix-node_cnt-race-between-extent-node-destroy-and-writeback.patch @@ -0,0 +1,93 @@ +From ed78aeebef05212ef7dca93bd931e4eff67c113f Mon Sep 17 00:00:00 2001 +From: Yongpeng Yang +Date: Fri, 3 Apr 2026 22:40:17 +0800 +Subject: f2fs: fix node_cnt race between extent node destroy and writeback + +From: Yongpeng Yang + +commit ed78aeebef05212ef7dca93bd931e4eff67c113f upstream. + +f2fs_destroy_extent_node() does not set FI_NO_EXTENT before clearing +extent nodes. When called from f2fs_drop_inode() with I_SYNC set, +concurrent kworker writeback can insert new extent nodes into the same +extent tree, racing with the destroy and triggering f2fs_bug_on() in +__destroy_extent_node(). The scenario is as follows: + +drop inode writeback + - iput + - f2fs_drop_inode // I_SYNC set + - f2fs_destroy_extent_node + - __destroy_extent_node + - while (node_cnt) { + write_lock(&et->lock) + __free_extent_tree + write_unlock(&et->lock) + - __writeback_single_inode + - f2fs_outplace_write_data + - f2fs_update_read_extent_cache + - __update_extent_tree_range + // FI_NO_EXTENT not set, + // insert new extent node + } // node_cnt == 0, exit while + - f2fs_bug_on(node_cnt) // node_cnt > 0 + +Additionally, __update_extent_tree_range() only checks FI_NO_EXTENT for +EX_READ type, leaving EX_BLOCK_AGE updates completely unprotected. + +This patch set FI_NO_EXTENT under et->lock in __destroy_extent_node(), +consistent with other callers (__update_extent_tree_range and +__drop_extent_tree) and check FI_NO_EXTENT for both EX_READ and +EX_BLOCK_AGE tree. + +Fixes: 3fc5d5a182f6 ("f2fs: fix to shrink read extent node in batches") +Cc: stable@vger.kernel.org +Signed-off-by: Yongpeng Yang +Reviewed-by: Chao Yu +Signed-off-by: Jaegeuk Kim +Signed-off-by: Greg Kroah-Hartman +--- + fs/f2fs/extent_cache.c | 17 ++++++++++------- + 1 file changed, 10 insertions(+), 7 deletions(-) + +--- a/fs/f2fs/extent_cache.c ++++ b/fs/f2fs/extent_cache.c +@@ -119,9 +119,10 @@ static bool __may_extent_tree(struct ino + if (!__init_may_extent_tree(inode, type)) + return false; + ++ if (is_inode_flag_set(inode, FI_NO_EXTENT)) ++ return false; ++ + if (type == EX_READ) { +- if (is_inode_flag_set(inode, FI_NO_EXTENT)) +- return false; + if (is_inode_flag_set(inode, FI_COMPRESSED_FILE) && + !f2fs_sb_has_readonly(F2FS_I_SB(inode))) + return false; +@@ -644,6 +645,8 @@ static unsigned int __destroy_extent_nod + + while (atomic_read(&et->node_cnt)) { + write_lock(&et->lock); ++ if (!is_inode_flag_set(inode, FI_NO_EXTENT)) ++ set_inode_flag(inode, FI_NO_EXTENT); + node_cnt += __free_extent_tree(sbi, et, nr_shrink); + write_unlock(&et->lock); + } +@@ -688,12 +691,12 @@ static void __update_extent_tree_range(s + + write_lock(&et->lock); + +- if (type == EX_READ) { +- if (is_inode_flag_set(inode, FI_NO_EXTENT)) { +- write_unlock(&et->lock); +- return; +- } ++ if (is_inode_flag_set(inode, FI_NO_EXTENT)) { ++ write_unlock(&et->lock); ++ return; ++ } + ++ if (type == EX_READ) { + prev = et->largest; + dei.len = 0; + diff --git a/queue-7.0/f2fs-fix-uninitialized-kobject-put-in-f2fs_init_sysfs.patch b/queue-7.0/f2fs-fix-uninitialized-kobject-put-in-f2fs_init_sysfs.patch new file mode 100644 index 0000000000..b272bd9d5b --- /dev/null +++ b/queue-7.0/f2fs-fix-uninitialized-kobject-put-in-f2fs_init_sysfs.patch @@ -0,0 +1,62 @@ +From b635f2ecdb5ad34f9c967cabb704d6bed9382fd0 Mon Sep 17 00:00:00 2001 +From: Guangshuo Li +Date: Fri, 10 Apr 2026 20:47:26 +0800 +Subject: f2fs: fix uninitialized kobject put in f2fs_init_sysfs() + +From: Guangshuo Li + +commit b635f2ecdb5ad34f9c967cabb704d6bed9382fd0 upstream. + +In f2fs_init_sysfs(), all failure paths after kset_register() jump to +put_kobject, which unconditionally releases both f2fs_tune and +f2fs_feat. + +If kobject_init_and_add(&f2fs_feat, ...) fails, f2fs_tune has not been +initialized yet, so calling kobject_put(&f2fs_tune) is invalid. + +Fix this by splitting the unwind path so each error path only releases +objects that were successfully initialized. + +Fixes: a907f3a68ee26ba4 ("f2fs: add a sysfs entry to reclaim POSIX_FADV_NOREUSE pages") +Cc: stable@vger.kernel.org +Signed-off-by: Guangshuo Li +Reviewed-by: Chao Yu +Signed-off-by: Jaegeuk Kim +Signed-off-by: Greg Kroah-Hartman +--- + fs/f2fs/sysfs.c | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +--- a/fs/f2fs/sysfs.c ++++ b/fs/f2fs/sysfs.c +@@ -1984,24 +1984,26 @@ int __init f2fs_init_sysfs(void) + ret = kobject_init_and_add(&f2fs_feat, &f2fs_feat_ktype, + NULL, "features"); + if (ret) +- goto put_kobject; ++ goto unregister_kset; + + ret = kobject_init_and_add(&f2fs_tune, &f2fs_tune_ktype, + NULL, "tuning"); + if (ret) +- goto put_kobject; ++ goto put_feat; + + f2fs_proc_root = proc_mkdir("fs/f2fs", NULL); + if (!f2fs_proc_root) { + ret = -ENOMEM; +- goto put_kobject; ++ goto put_tune; + } + + return 0; + +-put_kobject: ++put_tune: + kobject_put(&f2fs_tune); ++put_feat: + kobject_put(&f2fs_feat); ++unregister_kset: + kset_unregister(&f2fs_kset); + return ret; + } diff --git a/queue-7.0/f2fs-refactor-f2fs_move_node_folio-function.patch b/queue-7.0/f2fs-refactor-f2fs_move_node_folio-function.patch new file mode 100644 index 0000000000..7382a97712 --- /dev/null +++ b/queue-7.0/f2fs-refactor-f2fs_move_node_folio-function.patch @@ -0,0 +1,97 @@ +From 92c20989366e023b74fa0c1028af9436c1917dbf Mon Sep 17 00:00:00 2001 +From: Yongpeng Yang +Date: Wed, 18 Mar 2026 16:45:32 +0800 +Subject: f2fs: refactor f2fs_move_node_folio function + +From: Yongpeng Yang + +commit 92c20989366e023b74fa0c1028af9436c1917dbf upstream. + +This patch refactor the f2fs_move_node_folio() function. No logical +changes. + +Cc: stable@kernel.org +Signed-off-by: Yongpeng Yang +Reviewed-by: Chao Yu +Signed-off-by: Jaegeuk Kim +Signed-off-by: Greg Kroah-Hartman +--- + fs/f2fs/node.c | 54 ++++++++++++++++++++++++++++++++---------------------- + 1 file changed, 32 insertions(+), 22 deletions(-) + +--- a/fs/f2fs/node.c ++++ b/fs/f2fs/node.c +@@ -1842,41 +1842,51 @@ redirty_out: + return false; + } + +-int f2fs_move_node_folio(struct folio *node_folio, int gc_type) ++static int f2fs_write_single_node_folio(struct folio *node_folio, int sync_mode, ++ bool mark_dirty, enum iostat_type io_type) + { + int err = 0; ++ struct writeback_control wbc = { ++ .sync_mode = WB_SYNC_ALL, ++ .nr_to_write = 1, ++ }; + +- if (gc_type == FG_GC) { +- struct writeback_control wbc = { +- .sync_mode = WB_SYNC_ALL, +- .nr_to_write = 1, +- }; ++ if (!sync_mode) { ++ /* set page dirty and write it */ ++ if (!folio_test_writeback(node_folio)) ++ folio_mark_dirty(node_folio); ++ goto out_folio; ++ } + +- f2fs_folio_wait_writeback(node_folio, NODE, true, true); ++ f2fs_folio_wait_writeback(node_folio, NODE, true, true); + ++ if (mark_dirty) + folio_mark_dirty(node_folio); ++ else if (!folio_test_dirty(node_folio)) ++ goto out_folio; + +- if (!folio_clear_dirty_for_io(node_folio)) { +- err = -EAGAIN; +- goto out_page; +- } +- +- if (!__write_node_folio(node_folio, false, NULL, +- &wbc, false, FS_GC_NODE_IO, NULL)) +- err = -EAGAIN; +- goto release_page; +- } else { +- /* set page dirty and write it */ +- if (!folio_test_writeback(node_folio)) +- folio_mark_dirty(node_folio); ++ if (!folio_clear_dirty_for_io(node_folio)) { ++ err = -EAGAIN; ++ goto out_folio; + } +-out_page: ++ ++ if (!__write_node_folio(node_folio, false, NULL, ++ &wbc, false, FS_GC_NODE_IO, NULL)) ++ err = -EAGAIN; ++ goto release_folio; ++out_folio: + folio_unlock(node_folio); +-release_page: ++release_folio: + f2fs_folio_put(node_folio, false); + return err; + } + ++int f2fs_move_node_folio(struct folio *node_folio, int gc_type) ++{ ++ return f2fs_write_single_node_folio(node_folio, gc_type == FG_GC, ++ true, FS_GC_NODE_IO); ++} ++ + int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, + struct writeback_control *wbc, bool atomic, + unsigned int *seq_id) diff --git a/queue-7.0/kvm-arm64-fix-feat_debugv8p9-to-check-debugver-not-pmuver.patch b/queue-7.0/kvm-arm64-fix-feat_debugv8p9-to-check-debugver-not-pmuver.patch new file mode 100644 index 0000000000..56bce6693e --- /dev/null +++ b/queue-7.0/kvm-arm64-fix-feat_debugv8p9-to-check-debugver-not-pmuver.patch @@ -0,0 +1,50 @@ +From 7fe2cd4e1a3ad230d8fcc00cc99c4bcce4412a75 Mon Sep 17 00:00:00 2001 +From: Fuad Tabba +Date: Fri, 24 Apr 2026 09:49:03 +0100 +Subject: KVM: arm64: Fix FEAT_Debugv8p9 to check DebugVer, not PMUVer + +From: Fuad Tabba + +commit 7fe2cd4e1a3ad230d8fcc00cc99c4bcce4412a75 upstream. + +FEAT_Debugv8p9 is incorrectly defined against ID_AA64DFR0_EL1.PMUVer +instead of ID_AA64DFR0_EL1.DebugVer. All three consumers of the macro +gate features that are architecturally tied to FEAT_Debugv8p9 +(DebugVer = 0b1011, DDI0487 M.b A2.2.10): + + - HDFGRTR2_EL2.nMDSELR_EL1, HDFGWTR2_EL2.nMDSELR_EL1: MDSELR_EL1 + is present only when FEAT_Debugv8p9 is implemented (D24.3.21). + + - MDCR_EL2.EBWE: the Extended Breakpoint and Watchpoint Enable bit + is RES0 unless FEAT_Debugv8p9 is implemented (D24.3.17). + +Neither register has any dependency on PMUVer. + +FEAT_Debugv8p9 and FEAT_PMUv3p9 are independent. Per DDI0487 M.b +A2.2.10, FEAT_Debugv8p9 is unconditionally mandatory from Armv8.9, +whereas FEAT_PMUv3p9 is mandatory only when FEAT_PMUv3 is implemented. +An Armv8.9 CPU without a PMU has DebugVer = 0b1011 but PMUVer = 0b0000, +so the wrong field check would cause KVM to incorrectly treat EBWE and +MDSELR_EL1 as RES0 on such hardware. + +Fixes: 4bc0fe089840 ("KVM: arm64: Add sanitisation for FEAT_FGT2 registers") +Signed-off-by: Fuad Tabba +Link: https://patch.msgid.link/20260424084908.370776-2-tabba@google.com +Signed-off-by: Marc Zyngier +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/kvm/config.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/arm64/kvm/config.c ++++ b/arch/arm64/kvm/config.c +@@ -191,7 +191,7 @@ struct reg_feat_map_desc { + #define FEAT_SRMASK ID_AA64MMFR4_EL1, SRMASK, IMP + #define FEAT_PoPS ID_AA64MMFR4_EL1, PoPS, IMP + #define FEAT_PFAR ID_AA64PFR1_EL1, PFAR, IMP +-#define FEAT_Debugv8p9 ID_AA64DFR0_EL1, PMUVer, V3P9 ++#define FEAT_Debugv8p9 ID_AA64DFR0_EL1, DebugVer, V8P9 + #define FEAT_PMUv3_SS ID_AA64DFR0_EL1, PMSS, IMP + #define FEAT_SEBEP ID_AA64DFR0_EL1, SEBEP, IMP + #define FEAT_EBEP ID_AA64DFR1_EL1, EBEP, IMP diff --git a/queue-7.0/kvm-arm64-fix-feat_spe_fne-to-use-pmsidr_el1.fne-not-pmsver.patch b/queue-7.0/kvm-arm64-fix-feat_spe_fne-to-use-pmsidr_el1.fne-not-pmsver.patch new file mode 100644 index 0000000000..515bec07b2 --- /dev/null +++ b/queue-7.0/kvm-arm64-fix-feat_spe_fne-to-use-pmsidr_el1.fne-not-pmsver.patch @@ -0,0 +1,77 @@ +From 08d715338287a1affb4c7ad5733decef4558a5c8 Mon Sep 17 00:00:00 2001 +From: Fuad Tabba +Date: Fri, 24 Apr 2026 09:49:05 +0100 +Subject: KVM: arm64: Fix FEAT_SPE_FnE to use PMSIDR_EL1.FnE, not PMSVer + +From: Fuad Tabba + +commit 08d715338287a1affb4c7ad5733decef4558a5c8 upstream. + +FEAT_SPE_FnE is architecturally detected via PMSIDR_EL1.FnE [6], not +ID_AA64DFR0_EL1.PMSVer. The FEAT_X macro form (register, field, value) +cannot encode a PMSIDR_EL1-based feature, so FEAT_SPE_FnE was defined +identically to FEAT_SPEv1p2 (ID_AA64DFR0_EL1, PMSVer, V1P2), producing +a duplicate that used PMSVer >= V1P2 as a proxy. + +Replace the macro with feat_spe_fne(), following the same pattern as +the sibling feat_spe_fds(): guard on FEAT_SPEv1p2 and read +PMSIDR_EL1.FnE [6] directly. Wire the two NEEDS_FEAT consumers to use +the new function. + +Remove the now-unused FEAT_SPE_FnE macro. + +Fixes: 63d423a7635b ("KVM: arm64: Switch to table-driven FGU configuration") +Signed-off-by: Fuad Tabba +Link: https://patch.msgid.link/20260424084908.370776-4-tabba@google.com +Signed-off-by: Marc Zyngier +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/kvm/config.c | 15 ++++++++++++--- + 1 file changed, 12 insertions(+), 3 deletions(-) + +--- a/arch/arm64/kvm/config.c ++++ b/arch/arm64/kvm/config.c +@@ -131,7 +131,6 @@ struct reg_feat_map_desc { + } + + #define FEAT_SPE ID_AA64DFR0_EL1, PMSVer, IMP +-#define FEAT_SPE_FnE ID_AA64DFR0_EL1, PMSVer, V1P2 + #define FEAT_BRBE ID_AA64DFR0_EL1, BRBE, IMP + #define FEAT_TRC_SR ID_AA64DFR0_EL1, TraceVer, IMP + #define FEAT_PMUv3 ID_AA64DFR0_EL1, PMUVer, IMP +@@ -301,6 +300,16 @@ static bool feat_spe_fds(struct kvm *kvm + (read_sysreg_s(SYS_PMSIDR_EL1) & PMSIDR_EL1_FDS)); + } + ++static bool feat_spe_fne(struct kvm *kvm) ++{ ++ /* ++ * Revisit this if KVM ever supports SPE -- this really should ++ * look at the guest's view of PMSIDR_EL1. ++ */ ++ return (kvm_has_feat(kvm, FEAT_SPEv1p2) && ++ (read_sysreg_s(SYS_PMSIDR_EL1) & PMSIDR_EL1_FnE)); ++} ++ + static bool feat_trbe_mpam(struct kvm *kvm) + { + /* +@@ -536,7 +545,7 @@ static const struct reg_bits_to_feat_map + HDFGRTR_EL2_PMBPTR_EL1 | + HDFGRTR_EL2_PMBLIMITR_EL1, + FEAT_SPE), +- NEEDS_FEAT(HDFGRTR_EL2_nPMSNEVFR_EL1, FEAT_SPE_FnE), ++ NEEDS_FEAT(HDFGRTR_EL2_nPMSNEVFR_EL1, feat_spe_fne), + NEEDS_FEAT(HDFGRTR_EL2_nBRBDATA | + HDFGRTR_EL2_nBRBCTL | + HDFGRTR_EL2_nBRBIDR, +@@ -604,7 +613,7 @@ static const struct reg_bits_to_feat_map + HDFGWTR_EL2_PMBPTR_EL1 | + HDFGWTR_EL2_PMBLIMITR_EL1, + FEAT_SPE), +- NEEDS_FEAT(HDFGWTR_EL2_nPMSNEVFR_EL1, FEAT_SPE_FnE), ++ NEEDS_FEAT(HDFGWTR_EL2_nPMSNEVFR_EL1, feat_spe_fne), + NEEDS_FEAT(HDFGWTR_EL2_nBRBDATA | + HDFGWTR_EL2_nBRBCTL, + FEAT_BRBE), diff --git a/queue-7.0/kvm-arm64-fix-initialisation-order-in-__pkvm_init_finalise.patch b/queue-7.0/kvm-arm64-fix-initialisation-order-in-__pkvm_init_finalise.patch new file mode 100644 index 0000000000..1700c548db --- /dev/null +++ b/queue-7.0/kvm-arm64-fix-initialisation-order-in-__pkvm_init_finalise.patch @@ -0,0 +1,56 @@ +From 5bb0aed57ba944f8c201e4e82ec066e0187e0f85 Mon Sep 17 00:00:00 2001 +From: Quentin Perret +Date: Fri, 24 Apr 2026 09:49:08 +0100 +Subject: KVM: arm64: Fix initialisation order in __pkvm_init_finalise() + +From: Quentin Perret + +commit 5bb0aed57ba944f8c201e4e82ec066e0187e0f85 upstream. + +fix_host_ownership() walks the hypervisor's stage-1 page-table to +adjust the host's stage-2 accordingly. Any such adjustment that +requires cache maintenance operations depends on the per-CPU hyp +fixmap being present. However, fix_host_ownership() is currently +called before fix_hyp_pgtable_refcnt() and hyp_create_fixmap(), so +the fixmap does not yet exist when it runs. + +This is benign today because the host stage-2 starts empty and no +CMOs are needed, but it becomes a latent crash as soon as +fix_host_ownership() is extended to operate on a non-empty +page-table. + +Reorder the calls so that fix_hyp_pgtable_refcnt() and +hyp_create_fixmap() complete before fix_host_ownership() is invoked. + +Fixes: 0d16d12eb26e ("KVM: arm64: Fix-up hyp stage-1 refcounts for all pages mapped at EL2") +Signed-off-by: Quentin Perret +Signed-off-by: Fuad Tabba +Link: https://patch.msgid.link/20260424084908.370776-7-tabba@google.com +Signed-off-by: Marc Zyngier +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/kvm/hyp/nvhe/setup.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/arch/arm64/kvm/hyp/nvhe/setup.c ++++ b/arch/arm64/kvm/hyp/nvhe/setup.c +@@ -312,15 +312,15 @@ void __noreturn __pkvm_init_finalise(voi + }; + pkvm_pgtable.mm_ops = &pkvm_pgtable_mm_ops; + +- ret = fix_host_ownership(); ++ ret = fix_hyp_pgtable_refcnt(); + if (ret) + goto out; + +- ret = fix_hyp_pgtable_refcnt(); ++ ret = hyp_create_fixmap(); + if (ret) + goto out; + +- ret = hyp_create_fixmap(); ++ ret = fix_host_ownership(); + if (ret) + goto out; + diff --git a/queue-7.0/kvm-arm64-fix-pin-leak-and-publication-ordering-in-__pkvm_init_vcpu.patch b/queue-7.0/kvm-arm64-fix-pin-leak-and-publication-ordering-in-__pkvm_init_vcpu.patch new file mode 100644 index 0000000000..305a9cadff --- /dev/null +++ b/queue-7.0/kvm-arm64-fix-pin-leak-and-publication-ordering-in-__pkvm_init_vcpu.patch @@ -0,0 +1,113 @@ +From 73b9c1e5da84cd69b1a86e374e450817cd051371 Mon Sep 17 00:00:00 2001 +From: Fuad Tabba +Date: Fri, 24 Apr 2026 09:49:07 +0100 +Subject: KVM: arm64: Fix pin leak and publication ordering in __pkvm_init_vcpu() + +From: Fuad Tabba + +commit 73b9c1e5da84cd69b1a86e374e450817cd051371 upstream. + +Two bugs exist in the vCPU initialisation path: + +1. If a check fails after hyp_pin_shared_mem() succeeds, the cleanup + path jumps to 'unlock' without calling unpin_host_vcpu() or + unpin_host_sve_state(), permanently leaking pin references on the + host vCPU and SVE state pages. + + Extract a register_hyp_vcpu() helper that performs the checks and + the store. When register_hyp_vcpu() returns an error, call + unpin_host_vcpu() and unpin_host_sve_state() inline before falling + through to the existing 'unlock' label. + +2. register_hyp_vcpu() publishes the new vCPU pointer into + 'hyp_vm->vcpus[]' with a bare store, allowing a concurrent caller + of pkvm_load_hyp_vcpu() to observe a partially initialised vCPU + object. + + Ensure the store uses smp_store_release() and the load uses + smp_load_acquire(). While 'vm_table_lock' currently serialises the + store and the load, these barriers ensure the reader sees the fully + initialised 'hyp_vcpu' object even if there were a lockless path or + if the lock's own ordering guarantees were insufficient for nested + object initialization. + +Fixes: 49af6ddb8e5c ("KVM: arm64: Add infrastructure to create and track pKVM instances at EL2") +Reported-by: Ben Simner +Co-developed-by: Will Deacon +Signed-off-by: Will Deacon +Signed-off-by: Fuad Tabba +Link: https://patch.msgid.link/20260424084908.370776-6-tabba@google.com +Signed-off-by: Marc Zyngier +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/kvm/hyp/nvhe/pkvm.c | 38 +++++++++++++++++++++++++------------- + 1 file changed, 25 insertions(+), 13 deletions(-) + +--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c ++++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c +@@ -258,7 +258,8 @@ struct pkvm_hyp_vcpu *pkvm_load_hyp_vcpu + if (!hyp_vm || hyp_vm->kvm.created_vcpus <= vcpu_idx) + goto unlock; + +- hyp_vcpu = hyp_vm->vcpus[vcpu_idx]; ++ /* Pairs with smp_store_release() in register_hyp_vcpu(). */ ++ hyp_vcpu = smp_load_acquire(&hyp_vm->vcpus[vcpu_idx]); + if (!hyp_vcpu) + goto unlock; + +@@ -803,12 +804,30 @@ err_unpin_kvm: + * the page-aligned size of 'struct pkvm_hyp_vcpu'. + * Return 0 on success, negative error code on failure. + */ ++static int register_hyp_vcpu(struct pkvm_hyp_vm *hyp_vm, ++ struct pkvm_hyp_vcpu *hyp_vcpu) ++{ ++ unsigned int idx = hyp_vcpu->vcpu.vcpu_idx; ++ ++ if (idx >= hyp_vm->kvm.created_vcpus) ++ return -EINVAL; ++ ++ if (hyp_vm->vcpus[idx]) ++ return -EINVAL; ++ ++ /* ++ * Ensure the hyp_vcpu is initialised before publishing it to ++ * the vCPU-load path via 'hyp_vm->vcpus[]'. ++ */ ++ smp_store_release(&hyp_vm->vcpus[idx], hyp_vcpu); ++ return 0; ++} ++ + int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu, + unsigned long vcpu_hva) + { + struct pkvm_hyp_vcpu *hyp_vcpu; + struct pkvm_hyp_vm *hyp_vm; +- unsigned int idx; + int ret; + + hyp_vcpu = map_donated_memory(vcpu_hva, sizeof(*hyp_vcpu)); +@@ -827,18 +846,11 @@ int __pkvm_init_vcpu(pkvm_handle_t handl + if (ret) + goto unlock; + +- idx = hyp_vcpu->vcpu.vcpu_idx; +- if (idx >= hyp_vm->kvm.created_vcpus) { +- ret = -EINVAL; +- goto unlock; +- } +- +- if (hyp_vm->vcpus[idx]) { +- ret = -EINVAL; +- goto unlock; ++ ret = register_hyp_vcpu(hyp_vm, hyp_vcpu); ++ if (ret) { ++ unpin_host_vcpu(host_vcpu); ++ unpin_host_sve_state(hyp_vcpu); + } +- +- hyp_vm->vcpus[idx] = hyp_vcpu; + unlock: + hyp_spin_unlock(&vm_table_lock); + diff --git a/queue-7.0/kvm-arm64-vgic-fix-iidr-revision-field-extracted-from-wrong-value.patch b/queue-7.0/kvm-arm64-vgic-fix-iidr-revision-field-extracted-from-wrong-value.patch new file mode 100644 index 0000000000..093bf8cf0c --- /dev/null +++ b/queue-7.0/kvm-arm64-vgic-fix-iidr-revision-field-extracted-from-wrong-value.patch @@ -0,0 +1,54 @@ +From a0e6ae45af17e8b27958830595799c702ffbab8d Mon Sep 17 00:00:00 2001 +From: David Woodhouse +Date: Tue, 7 Apr 2026 21:27:02 +0100 +Subject: KVM: arm64: vgic: Fix IIDR revision field extracted from wrong value +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: David Woodhouse + +commit a0e6ae45af17e8b27958830595799c702ffbab8d upstream. + +The uaccess write handlers for GICD_IIDR in both GICv2 and GICv3 +extract the revision field from 'reg' (the current IIDR value read back +from the emulated distributor) instead of 'val' (the value userspace is +trying to write). This means userspace can never actually change the +implementation revision — the extracted value is always the current one. + +Fix the FIELD_GET to use 'val' so that userspace can select a different +revision for migration compatibility. + +Fixes: 49a1a2c70a7f ("KVM: arm64: vgic-v3: Advertise GICR_CTLR.{IR, CES} as a new GICD_IIDR revision") +Signed-off-by: David Woodhouse +Link: https://patch.msgid.link/20260407210949.2076251-2-dwmw2@infradead.org +Signed-off-by: Marc Zyngier +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/kvm/vgic/vgic-mmio-v2.c | 2 +- + arch/arm64/kvm/vgic/vgic-mmio-v3.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +--- a/arch/arm64/kvm/vgic/vgic-mmio-v2.c ++++ b/arch/arm64/kvm/vgic/vgic-mmio-v2.c +@@ -91,7 +91,7 @@ static int vgic_mmio_uaccess_write_v2_mi + * migration from old kernels to new kernels with legacy + * userspace. + */ +- reg = FIELD_GET(GICD_IIDR_REVISION_MASK, reg); ++ reg = FIELD_GET(GICD_IIDR_REVISION_MASK, val); + switch (reg) { + case KVM_VGIC_IMP_REV_2: + case KVM_VGIC_IMP_REV_3: +--- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c ++++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c +@@ -194,7 +194,7 @@ static int vgic_mmio_uaccess_write_v3_mi + if ((reg ^ val) & ~GICD_IIDR_REVISION_MASK) + return -EINVAL; + +- reg = FIELD_GET(GICD_IIDR_REVISION_MASK, reg); ++ reg = FIELD_GET(GICD_IIDR_REVISION_MASK, val); + switch (reg) { + case KVM_VGIC_IMP_REV_2: + case KVM_VGIC_IMP_REV_3: diff --git a/queue-7.0/kvm-arm64-wake-up-from-wfi-when-iqrchip-is-in-userspace.patch b/queue-7.0/kvm-arm64-wake-up-from-wfi-when-iqrchip-is-in-userspace.patch new file mode 100644 index 0000000000..f3a13591de --- /dev/null +++ b/queue-7.0/kvm-arm64-wake-up-from-wfi-when-iqrchip-is-in-userspace.patch @@ -0,0 +1,42 @@ +From 4ce98bf0865c349e7026ad9c14f48da264920953 Mon Sep 17 00:00:00 2001 +From: Marc Zyngier +Date: Thu, 23 Apr 2026 17:36:07 +0100 +Subject: KVM: arm64: Wake-up from WFI when iqrchip is in userspace + +From: Marc Zyngier + +commit 4ce98bf0865c349e7026ad9c14f48da264920953 upstream. + +It appears that there is nothing in the wake-up path that +evaluates whether the in-kernel interrupts are pending unless +we have a vgic. + +This means that the userspace irqchip support has been broken for +about four years, and nobody noticed. It was also broken before +as we wouldn't wake-up on a PMU interrupt, but hey, who cares... + +It is probably time to remove the feature altogether, because it +was a terrible idea 10 years ago, and it still is. + +Fixes: b57de4ffd7c6d ("KVM: arm64: Simplify kvm_cpu_has_pending_timer()") +Link: https://patch.msgid.link/20260423163607.486345-1-maz@kernel.org +Signed-off-by: Marc Zyngier +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/kvm/arm.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/arch/arm64/kvm/arm.c ++++ b/arch/arm64/kvm/arm.c +@@ -805,6 +805,10 @@ int kvm_arch_vcpu_runnable(struct kvm_vc + { + bool irq_lines = *vcpu_hcr(v) & (HCR_VI | HCR_VF | HCR_VSE); + ++ irq_lines |= (!irqchip_in_kernel(v->kvm) && ++ (kvm_timer_should_notify_user(v) || ++ kvm_pmu_should_notify_user(v))); ++ + return ((irq_lines || kvm_vgic_vcpu_pending_irq(v)) + && !kvm_arm_vcpu_stopped(v) && !v->arch.pause); + } diff --git a/queue-7.0/loongarch-fix-potential-ade-in-loongson_gpu_fixup_dma_hang.patch b/queue-7.0/loongarch-fix-potential-ade-in-loongson_gpu_fixup_dma_hang.patch new file mode 100644 index 0000000000..b64333b2d8 --- /dev/null +++ b/queue-7.0/loongarch-fix-potential-ade-in-loongson_gpu_fixup_dma_hang.patch @@ -0,0 +1,103 @@ +From 8dfa2f8780e486d05b9a0ffce70b8f5fbd62053e Mon Sep 17 00:00:00 2001 +From: Wentao Guan +Date: Mon, 4 May 2026 09:00:20 +0800 +Subject: LoongArch: Fix potential ADE in loongson_gpu_fixup_dma_hang() + +From: Wentao Guan + +commit 8dfa2f8780e486d05b9a0ffce70b8f5fbd62053e upstream. + +The switch case in loongson_gpu_fixup_dma_hang() may not DC2 or DC3, and +readl(crtc_reg) will access with random address, because the "device" is +from "base+PCI_DEVICE_ID", "base" is from "pdev->devfn+1". This is wrong +when my platform inserts a discrete GPU: + +lspci -tv +-[0000:00]-+-00.0 Loongson Technology LLC Hyper Transport Bridge Controller +... + +-06.0 Loongson Technology LLC LG100 GPU + +-06.2 Loongson Technology LLC Device 7a37 +... + +Add a default switch case to fix the panic as below: + + Kernel ade access[#1]: + CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.6.136-loong64-desktop-hwe+ #4 + pc 90000000017e5534 ra 90000000017e54c0 tp 90000001002f8000 sp 90000001002fb6c0 + a0 80000efe00003100 a1 0000000000003100 a2 0000000000000000 a3 0000000000000002 + a4 90000001002fb6b4 a5 900000087cdb58fd a6 90000000027af000 a7 0000000000000001 + t0 00000000000085b9 t1 000000000000ffff t2 0000000000000000 t3 0000000000000000 + t4 fffffffffffffffd t5 00000000fffb6d9c t6 0000000000083b00 t7 00000000000070c0 + t8 900000087cdb4d94 u0 900000087cdb58fd s9 90000001002fb826 s0 90000000031c12c8 + s1 7fffffffffffff00 s2 90000000031c12d0 s3 0000000000002710 s4 0000000000000000 + s5 0000000000000000 s6 9000000100053000 s7 7fffffffffffff00 s8 90000000030d4000 + ra: 90000000017e54c0 loongson_gpu_fixup_dma_hang+0x40/0x210 + ERA: 90000000017e5534 loongson_gpu_fixup_dma_hang+0xb4/0x210 + CRMD: 000000b0 (PLV0 -IE -DA +PG DACF=CC DACM=CC -WE) + PRMD: 00000004 (PPLV0 +PIE -PWE) + EUEN: 00000000 (-FPE -SXE -ASXE -BTE) + ECFG: 00071c1d (LIE=0,2-4,10-12 VS=7) + ESTAT: 00480000 [ADEM] (IS= ECode=8 EsubCode=1) + BADV: 7fffffffffffff00 + PRID: 0014d000 (Loongson-64bit, Loongson-3A6000-HV) + Modules linked in: + Process swapper/0 (pid: 1, threadinfo=(____ptrval____), task=(____ptrval____)) + Stack : 0000000000000006 90000001002fb778 90000001002fb704 0000000000000007 + 0000000016a65700 90000000017e5690 000000000000ffff ffffffffffffffff + 900000000209f7c0 9000000100053000 900000000209f7a8 9000000000eebc08 + 0000000000000000 0000000000000000 0000000000000006 90000001002fb778 + 90000001000530b8 90000000027af000 0000000000000000 9000000100054000 + 9000000100053000 9000000000ebb70c 9000000100004c00 9000000004000001 + 90000001002fb7e4 bae765461f31cb12 0000000000000000 0000000000000000 + 0000000000000006 90000000027af000 0000000000000030 90000000027af000 + 900000087cd6f800 9000000100053000 0000000000000000 9000000000ebc560 + 7a2500147cdaf720 bae765461f31cb12 0000000000000001 0000000000000030 + ... + Call Trace: + [<90000000017e5534>] loongson_gpu_fixup_dma_hang+0xb4/0x210 + [<9000000000eebc08>] pci_fixup_device+0x108/0x280 + [<9000000000ebb70c>] pci_setup_device+0x24c/0x690 + [<9000000000ebc560>] pci_scan_single_device+0xe0/0x140 + [<9000000000ebc684>] pci_scan_slot+0xc4/0x280 + [<9000000000ebdd00>] pci_scan_child_bus_extend+0x60/0x3f0 + [<9000000000f5bc94>] acpi_pci_root_create+0x2b4/0x420 + [<90000000017e5e74>] pci_acpi_scan_root+0x2d4/0x440 + [<9000000000f5b02c>] acpi_pci_root_add+0x21c/0x3a0 + [<9000000000f4ee54>] acpi_bus_attach+0x1a4/0x3c0 + [<90000000010e200c>] device_for_each_child+0x6c/0xe0 + [<9000000000f4bbf4>] acpi_dev_for_each_child+0x44/0x70 + [<9000000000f4ef40>] acpi_bus_attach+0x290/0x3c0 + [<90000000010e200c>] device_for_each_child+0x6c/0xe0 + [<9000000000f4bbf4>] acpi_dev_for_each_child+0x44/0x70 + [<9000000000f4ef40>] acpi_bus_attach+0x290/0x3c0 + [<9000000000f5211c>] acpi_bus_scan+0x6c/0x280 + [<900000000189c028>] acpi_scan_init+0x194/0x310 + [<900000000189bc6c>] acpi_init+0xcc/0x140 + [<9000000000220cdc>] do_one_initcall+0x4c/0x310 + [<90000000018618fc>] kernel_init_freeable+0x258/0x2d4 + [<900000000184326c>] kernel_init+0x28/0x13c + [<9000000000222008>] ret_from_kernel_thread+0xc/0xa4 + +Cc: stable@vger.kernel.org +Fixes: 95db0c9f526d ("LoongArch: Workaround LS2K/LS7A GPU DMA hang bug") +Link: https://gist.github.com/opsiff/ebf2dac51b4013d22462f2124c55f807 +Link: https://gist.github.com/opsiff/a62f2a73db0492b3c49bf223a339b133 +Signed-off-by: Wentao Guan +Signed-off-by: Huacai Chen +Signed-off-by: Greg Kroah-Hartman +--- + arch/loongarch/pci/pci.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/arch/loongarch/pci/pci.c ++++ b/arch/loongarch/pci/pci.c +@@ -132,6 +132,9 @@ static void loongson_gpu_fixup_dma_hang( + crtc_reg = regbase; + crtc_offset = 0x400; + break; ++ default: ++ iounmap(regbase); ++ return; + } + + for (i = 0; i < CRTC_NUM_MAX; i++, crtc_reg += crtc_offset) { diff --git a/queue-7.0/loongarch-kvm-cap-kvm_cap_nr_vcpus-by-kvm_cap_max_vcpus.patch b/queue-7.0/loongarch-kvm-cap-kvm_cap_nr_vcpus-by-kvm_cap_max_vcpus.patch new file mode 100644 index 0000000000..821d11438e --- /dev/null +++ b/queue-7.0/loongarch-kvm-cap-kvm_cap_nr_vcpus-by-kvm_cap_max_vcpus.patch @@ -0,0 +1,35 @@ +From b3e31a6650d4cab63f0814c37c0b360372c6ee9e Mon Sep 17 00:00:00 2001 +From: Qiang Ma +Date: Mon, 4 May 2026 09:00:37 +0800 +Subject: LoongArch: KVM: Cap KVM_CAP_NR_VCPUS by KVM_CAP_MAX_VCPUS + +From: Qiang Ma + +commit b3e31a6650d4cab63f0814c37c0b360372c6ee9e upstream. + +It doesn't make sense to return the recommended maximum number of vCPUs +which exceeds the maximum possible number of vCPUs. + +Other architectures have already done this, such as commit 57a2e13ebdda +("KVM: MIPS: Cap KVM_CAP_NR_VCPUS by KVM_CAP_MAX_VCPUS") + +Cc: stable@vger.kernel.org +Reviewed-by: Bibo Mao +Signed-off-by: Qiang Ma +Signed-off-by: Huacai Chen +Signed-off-by: Greg Kroah-Hartman +--- + arch/loongarch/kvm/vm.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/loongarch/kvm/vm.c ++++ b/arch/loongarch/kvm/vm.c +@@ -125,7 +125,7 @@ int kvm_vm_ioctl_check_extension(struct + r = 1; + break; + case KVM_CAP_NR_VCPUS: +- r = num_online_cpus(); ++ r = min_t(unsigned int, num_online_cpus(), KVM_MAX_VCPUS); + break; + case KVM_CAP_MAX_VCPUS: + r = KVM_MAX_VCPUS; diff --git a/queue-7.0/loongarch-kvm-fix-hw-timer-interrupt-lost-when-inject-interrupt-by-software.patch b/queue-7.0/loongarch-kvm-fix-hw-timer-interrupt-lost-when-inject-interrupt-by-software.patch new file mode 100644 index 0000000000..148bb85f69 --- /dev/null +++ b/queue-7.0/loongarch-kvm-fix-hw-timer-interrupt-lost-when-inject-interrupt-by-software.patch @@ -0,0 +1,74 @@ +From 2433f3f5724b3af569d9fb411ba728629524738b Mon Sep 17 00:00:00 2001 +From: Bibo Mao +Date: Mon, 4 May 2026 09:00:48 +0800 +Subject: LoongArch: KVM: Fix HW timer interrupt lost when inject interrupt by software + +From: Bibo Mao + +commit 2433f3f5724b3af569d9fb411ba728629524738b upstream. + +With passthrough HW timer, timer interrupt is injected by HW. When +inject emulated CPU interrupt by software such SIP0/SIP1/IPI, HW timer +interrupt may be lost. + +Here check whether there is timer tick value inversion before and after +injecting emulated CPU interrupt by software, timer enabling by reading +timer cfg register is skipped. If the timer tick value is detected with +changing, then timer should be enabled. And inject a timer interrupt by +software if there is. + +Cc: +Fixes: f45ad5b8aa93 ("LoongArch: KVM: Implement vcpu interrupt operations"). +Signed-off-by: Bibo Mao +Signed-off-by: Huacai Chen +Signed-off-by: Greg Kroah-Hartman +--- + arch/loongarch/kvm/interrupt.c | 14 ++++++++++++++ + 1 file changed, 14 insertions(+) + +--- a/arch/loongarch/kvm/interrupt.c ++++ b/arch/loongarch/kvm/interrupt.c +@@ -27,6 +27,7 @@ static unsigned int priority_to_irq[EXCC + static int kvm_irq_deliver(struct kvm_vcpu *vcpu, unsigned int priority) + { + unsigned int irq = 0; ++ unsigned long old, new; + + clear_bit(priority, &vcpu->arch.irq_pending); + if (priority < EXCCODE_INT_NUM) +@@ -42,7 +43,13 @@ static int kvm_irq_deliver(struct kvm_vc + case INT_IPI: + case INT_SWI0: + case INT_SWI1: ++ old = kvm_read_hw_gcsr(LOONGARCH_CSR_TVAL); + set_gcsr_estat(irq); ++ new = kvm_read_hw_gcsr(LOONGARCH_CSR_TVAL); ++ ++ /* Inject TI if TVAL inverted */ ++ if (new > old) ++ set_gcsr_estat(CPU_TIMER); + break; + + case INT_HWI0 ... INT_HWI7: +@@ -59,6 +66,7 @@ static int kvm_irq_deliver(struct kvm_vc + static int kvm_irq_clear(struct kvm_vcpu *vcpu, unsigned int priority) + { + unsigned int irq = 0; ++ unsigned long old, new; + + clear_bit(priority, &vcpu->arch.irq_clear); + if (priority < EXCCODE_INT_NUM) +@@ -74,7 +82,13 @@ static int kvm_irq_clear(struct kvm_vcpu + case INT_IPI: + case INT_SWI0: + case INT_SWI1: ++ old = kvm_read_hw_gcsr(LOONGARCH_CSR_TVAL); + clear_gcsr_estat(irq); ++ new = kvm_read_hw_gcsr(LOONGARCH_CSR_TVAL); ++ ++ /* Inject TI if TVAL inverted */ ++ if (new > old) ++ set_gcsr_estat(CPU_TIMER); + break; + + case INT_HWI0 ... INT_HWI7: diff --git a/queue-7.0/loongarch-kvm-fix-unreliable-stack-for-kvm_exc_entry.patch b/queue-7.0/loongarch-kvm-fix-unreliable-stack-for-kvm_exc_entry.patch new file mode 100644 index 0000000000..7cd78e93bd --- /dev/null +++ b/queue-7.0/loongarch-kvm-fix-unreliable-stack-for-kvm_exc_entry.patch @@ -0,0 +1,34 @@ +From b323a441da602dfdfc24f30d3190cac786ffebf2 Mon Sep 17 00:00:00 2001 +From: Xianglai Li +Date: Mon, 4 May 2026 09:00:37 +0800 +Subject: LoongArch: KVM: Fix "unreliable stack" for kvm_exc_entry + +From: Xianglai Li + +commit b323a441da602dfdfc24f30d3190cac786ffebf2 upstream. + +Insert the appropriate UNWIND hint into the kvm_exc_entry assembly +function to guide the generation of correct ORC table entries, thereby +solving the timeout problem ("unreliable stack") while loading the +livepatch-sample module on a physical machine running virtual machines +with multiple vcpus. + +Cc: stable@vger.kernel.org +Signed-off-by: Xianglai Li +Signed-off-by: Huacai Chen +Signed-off-by: Greg Kroah-Hartman +--- + arch/loongarch/kvm/switch.S | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/loongarch/kvm/switch.S ++++ b/arch/loongarch/kvm/switch.S +@@ -111,7 +111,7 @@ + .p2align PAGE_SHIFT + .cfi_sections .debug_frame + SYM_CODE_START(kvm_exc_entry) +- UNWIND_HINT_UNDEFINED ++ UNWIND_HINT_END_OF_STACK + csrwr a2, KVM_TEMP_KS + csrrd a2, KVM_VCPU_KS + addi.d a2, a2, KVM_VCPU_ARCH diff --git a/queue-7.0/loongarch-kvm-move-unconditional-delay-into-timer-clear-scenery.patch b/queue-7.0/loongarch-kvm-move-unconditional-delay-into-timer-clear-scenery.patch new file mode 100644 index 0000000000..bc4028643f --- /dev/null +++ b/queue-7.0/loongarch-kvm-move-unconditional-delay-into-timer-clear-scenery.patch @@ -0,0 +1,59 @@ +From 5a873d77ba792410a796595a917be6a440f9b7d2 Mon Sep 17 00:00:00 2001 +From: Bibo Mao +Date: Mon, 4 May 2026 09:00:48 +0800 +Subject: LoongArch: KVM: Move unconditional delay into timer clear scenery + +From: Bibo Mao + +commit 5a873d77ba792410a796595a917be6a440f9b7d2 upstream. + +When timer interrupt arrives in guest kernel, guest kernel clears the +timer interrupt and program timer with the next incoming event. + +During this stage, timer tick is -1 and timer interrupt status is +disabled in ESTAT register. KVM hypervisor need write zero with timer +tick register and wait timer interrupt injection from HW side, and +then clear timer interrupt. + +So there is 2 cycle delay in KVM hypervisor to emulate such scenery, +and the delay is unnecessary if there is no need to clear the timer +interrupt. + +Here move 2 cycle delay into timer clear scenery and add timer ESTAT +checking after delay, and set max timer expire value if timer interrupt +does not arrive still. + +Cc: stable@vger.kernel.org +Signed-off-by: Bibo Mao +Signed-off-by: Huacai Chen +Signed-off-by: Greg Kroah-Hartman +--- + arch/loongarch/kvm/timer.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +--- a/arch/loongarch/kvm/timer.c ++++ b/arch/loongarch/kvm/timer.c +@@ -96,15 +96,21 @@ void kvm_restore_timer(struct kvm_vcpu * + * and set CSR TVAL with -1 + */ + write_gcsr_timertick(0); +- __delay(2); /* Wait cycles until timer interrupt injected */ + + /* + * Writing CSR_TINTCLR_TI to LOONGARCH_CSR_TINTCLR will clear + * timer interrupt, and CSR TVAL keeps unchanged with -1, it + * avoids spurious timer interrupt + */ +- if (!(estat & CPU_TIMER)) ++ if (!(estat & CPU_TIMER)) { ++ __delay(2); /* Wait cycles until timer interrupt injected */ ++ ++ /* Write TVAL with max value if no TI shot */ ++ estat = kvm_read_hw_gcsr(LOONGARCH_CSR_ESTAT); ++ if (!(estat & CPU_TIMER)) ++ write_gcsr_timertick(CSR_TCFG_VAL); + gcsr_write(CSR_TINTCLR_TI, LOONGARCH_CSR_TINTCLR); ++ } + return; + } + diff --git a/queue-7.0/loongarch-kvm-use-kvm_set_pte-in-kvm_flush_pte.patch b/queue-7.0/loongarch-kvm-use-kvm_set_pte-in-kvm_flush_pte.patch new file mode 100644 index 0000000000..9e56f56ddc --- /dev/null +++ b/queue-7.0/loongarch-kvm-use-kvm_set_pte-in-kvm_flush_pte.patch @@ -0,0 +1,35 @@ +From 81e18777d61440511451866c7c80b34a8bdd6b33 Mon Sep 17 00:00:00 2001 +From: Tao Cui +Date: Mon, 4 May 2026 09:00:38 +0800 +Subject: LoongArch: KVM: Use kvm_set_pte() in kvm_flush_pte() + +From: Tao Cui + +commit 81e18777d61440511451866c7c80b34a8bdd6b33 upstream. + +kvm_flush_pte() is the only caller that directly assigns *pte instead +of using the kvm_set_pte() wrapper. Use the wrapper for consistency with +the rest of the file. + +No functional change intended. + +Cc: stable@vger.kernel.org +Reviewed-by: Bibo Mao +Signed-off-by: Tao Cui +Signed-off-by: Huacai Chen +Signed-off-by: Greg Kroah-Hartman +--- + arch/loongarch/kvm/mmu.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/loongarch/kvm/mmu.c ++++ b/arch/loongarch/kvm/mmu.c +@@ -95,7 +95,7 @@ static int kvm_flush_pte(kvm_pte_t *pte, + else + kvm->stat.pages--; + +- *pte = ctx->invalid_entry; ++ kvm_set_pte(pte, ctx->invalid_entry); + + return 1; + } diff --git a/queue-7.0/loongarch-use-per-root-bridge-pcih-flag-to-skip-mem-resource-fixup.patch b/queue-7.0/loongarch-use-per-root-bridge-pcih-flag-to-skip-mem-resource-fixup.patch new file mode 100644 index 0000000000..2ed5a06420 --- /dev/null +++ b/queue-7.0/loongarch-use-per-root-bridge-pcih-flag-to-skip-mem-resource-fixup.patch @@ -0,0 +1,58 @@ +From 49f33840dcc907d21313d369e34872880846b61c Mon Sep 17 00:00:00 2001 +From: Huacai Chen +Date: Mon, 4 May 2026 09:00:20 +0800 +Subject: LoongArch: Use per-root-bridge PCIH flag to skip mem resource fixup + +From: Huacai Chen + +commit 49f33840dcc907d21313d369e34872880846b61c upstream. + +When firmware enables 64-bit PCI host bridge support, some root bridges +already provide valid 64-bit mem resource windows through ACPI. + +In this case, the LoongArch-specific mem resource high-bits fixup in +acpi_prepare_root_resources() should not be applied unconditionally. +Otherwise, the kernel may override the native resource layout derived +from firmware, and later BAR assignment can fail to place device BARs +into the intended 64-bit address space correctly. + +Add a per-root-bridge ACPI flag, PCIH, and evaluate it from the current +root bridge device scope. When PCIH is set, skip the mem resource high- +bits fixup path and let the kernel use the firmware-provided resource +description directly. When PCIH is absent or cleared, keep the existing +behavior and continue filling the high address bits from the host bridge +address. + +This makes the behavior per-root-bridge configurable and avoids breaking +valid 64-bit BAR space allocation on bridges whose 64-bit windows have +already been fully described by firmware. + +Cc: stable@vger.kernel.org +Suggested-by: Chao Li +Tested-by: Dongyan Qian +Signed-off-by: Dongyan Qian +Signed-off-by: Huacai Chen +Signed-off-by: Greg Kroah-Hartman +--- + arch/loongarch/pci/acpi.c | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/arch/loongarch/pci/acpi.c ++++ b/arch/loongarch/pci/acpi.c +@@ -61,11 +61,16 @@ static void acpi_release_root_info(struc + static int acpi_prepare_root_resources(struct acpi_pci_root_info *ci) + { + int status; ++ unsigned long long pci_h = 0; + struct resource_entry *entry, *tmp; + struct acpi_device *device = ci->bridge; + + status = acpi_pci_probe_root_resources(ci); + if (status > 0) { ++ acpi_evaluate_integer(device->handle, "PCIH", NULL, &pci_h); ++ if (pci_h) ++ return status; ++ + resource_list_for_each_entry_safe(entry, tmp, &ci->resources) { + if (entry->res->flags & IORESOURCE_MEM) { + entry->offset = ci->root->mcfg_addr & GENMASK_ULL(63, 40); diff --git a/queue-7.0/mptcp-fastclose-msk-when-linger-time-is-0.patch b/queue-7.0/mptcp-fastclose-msk-when-linger-time-is-0.patch new file mode 100644 index 0000000000..303bd7af5a --- /dev/null +++ b/queue-7.0/mptcp-fastclose-msk-when-linger-time-is-0.patch @@ -0,0 +1,51 @@ +From f14d6e9c3678a067f304abba561e0c5446c7e845 Mon Sep 17 00:00:00 2001 +From: "Matthieu Baerts (NGI0)" +Date: Mon, 27 Apr 2026 21:54:35 +0200 +Subject: mptcp: fastclose msk when linger time is 0 + +From: Matthieu Baerts (NGI0) + +commit f14d6e9c3678a067f304abba561e0c5446c7e845 upstream. + +The SO_LINGER socket option has been supported for a while with MPTCP +sockets [1], but it didn't cause the equivalent of a TCP reset as +expected when enabled and its time was set to 0. This was causing some +behavioural differences with TCP where some connections were not +promptly stopped as expected. + +To fix that, an extra condition is checked at close() time before +sending an MP_FASTCLOSE, the MPTCP equivalent of a TCP reset. + +Note that backporting up to [1] will be difficult as more changes are +needed to be able to send MP_FASTCLOSE. It seems better to stop at [2], +which was supposed to already imitate TCP. + +Validated with MPTCP packetdrill tests [3]. + +Fixes: 268b12387460 ("mptcp: setsockopt: support SO_LINGER") [1] +Fixes: d21f83485518 ("mptcp: use fastclose on more edge scenarios") [2] +Cc: stable@vger.kernel.org +Reported-by: Lance Tuller +Closes: https://github.com/lance0/xfr/pull/67 +Link: https://github.com/multipath-tcp/packetdrill/pull/196 [3] +Reviewed-by: Mat Martineau +Signed-off-by: Matthieu Baerts (NGI0) +Link: https://patch.msgid.link/20260427-net-mptcp-misc-fixes-7-1-rc2-v1-3-7432b7f279fa@kernel.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/mptcp/protocol.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/mptcp/protocol.c ++++ b/net/mptcp/protocol.c +@@ -3279,7 +3279,8 @@ bool __mptcp_close(struct sock *sk, long + goto cleanup; + } + +- if (mptcp_data_avail(msk) || timeout < 0) { ++ if (mptcp_data_avail(msk) || timeout < 0 || ++ (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime)) { + /* If the msk has read data, or the caller explicitly ask it, + * do the MPTCP equivalent of TCP reset, aka MPTCP fastclose + */ diff --git a/queue-7.0/mptcp-fix-rx-timestamp-corruption-on-fastopen.patch b/queue-7.0/mptcp-fix-rx-timestamp-corruption-on-fastopen.patch new file mode 100644 index 0000000000..8f4cc20606 --- /dev/null +++ b/queue-7.0/mptcp-fix-rx-timestamp-corruption-on-fastopen.patch @@ -0,0 +1,50 @@ +From 6254a16d6f0c672e3809ca5d7c9a28a55d71f764 Mon Sep 17 00:00:00 2001 +From: Paolo Abeni +Date: Fri, 1 May 2026 21:35:36 +0200 +Subject: mptcp: fix rx timestamp corruption on fastopen + +From: Paolo Abeni + +commit 6254a16d6f0c672e3809ca5d7c9a28a55d71f764 upstream. + +The skb cb offset containing the timestamp presence flag is cleared +before loading such information. Cache such value before MPTCP CB +initialization. + +Fixes: 36b122baf6a8 ("mptcp: add subflow_v(4,6)_send_synack()") +Cc: stable@vger.kernel.org +Signed-off-by: Paolo Abeni +Reviewed-by: Matthieu Baerts (NGI0) +Signed-off-by: Matthieu Baerts (NGI0) +Link: https://patch.msgid.link/20260501-net-mptcp-misc-fixes-7-1-rc3-v1-3-b70118df778e@kernel.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/mptcp/fastopen.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/net/mptcp/fastopen.c ++++ b/net/mptcp/fastopen.c +@@ -12,6 +12,7 @@ void mptcp_fastopen_subflow_synack_set_p + struct sock *sk, *ssk; + struct sk_buff *skb; + struct tcp_sock *tp; ++ bool has_rxtstamp; + + /* on early fallback the subflow context is deleted by + * subflow_syn_recv_sock() +@@ -40,12 +41,13 @@ void mptcp_fastopen_subflow_synack_set_p + */ + tp->copied_seq += skb->len; + subflow->ssn_offset += skb->len; ++ has_rxtstamp = TCP_SKB_CB(skb)->has_rxtstamp; + + /* Only the sequence delta is relevant */ + MPTCP_SKB_CB(skb)->map_seq = -skb->len; + MPTCP_SKB_CB(skb)->end_seq = 0; + MPTCP_SKB_CB(skb)->offset = 0; +- MPTCP_SKB_CB(skb)->has_rxtstamp = TCP_SKB_CB(skb)->has_rxtstamp; ++ MPTCP_SKB_CB(skb)->has_rxtstamp = has_rxtstamp; + MPTCP_SKB_CB(skb)->cant_coalesce = 1; + + mptcp_data_lock(sk); diff --git a/queue-7.0/mptcp-fix-scheduling-with-atomic-in-timestamp-sockopt.patch b/queue-7.0/mptcp-fix-scheduling-with-atomic-in-timestamp-sockopt.patch new file mode 100644 index 0000000000..9d1a25bae3 --- /dev/null +++ b/queue-7.0/mptcp-fix-scheduling-with-atomic-in-timestamp-sockopt.patch @@ -0,0 +1,57 @@ +From b5c52908d52c6c8eb8933264aa6087a0600fd892 Mon Sep 17 00:00:00 2001 +From: Gang Yan +Date: Mon, 27 Apr 2026 21:54:34 +0200 +Subject: mptcp: fix scheduling with atomic in timestamp sockopt + +From: Gang Yan + +commit b5c52908d52c6c8eb8933264aa6087a0600fd892 upstream. + +Using lock_sock_fast() (atomic context) around sock_set_timestamp() +and sock_set_timestamping() is unsafe, as both helpers can sleep. + +Replace lock_sock_fast() with sleepable lock_sock()/release_sock() +to avoid scheduling while atomic panic. + +Fixes: 9061f24bf82e ("mptcp: sockopt: propagate timestamp request to subflows") +Cc: stable@vger.kernel.org +Reported-by: Sashiko +Closes: https://sashiko.dev/#/patchset/20260420093343.16443-1-gang.yan@linux.dev +Signed-off-by: Gang Yan +Reviewed-by: Matthieu Baerts (NGI0) +Signed-off-by: Matthieu Baerts (NGI0) +Link: https://patch.msgid.link/20260427-net-mptcp-misc-fixes-7-1-rc2-v1-2-7432b7f279fa@kernel.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/mptcp/sockopt.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/net/mptcp/sockopt.c ++++ b/net/mptcp/sockopt.c +@@ -159,10 +159,10 @@ static int mptcp_setsockopt_sol_socket_t + lock_sock(sk); + mptcp_for_each_subflow(msk, subflow) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); +- bool slow = lock_sock_fast(ssk); + ++ lock_sock(ssk); + sock_set_timestamp(ssk, optname, !!val); +- unlock_sock_fast(ssk, slow); ++ release_sock(ssk); + } + + release_sock(sk); +@@ -235,10 +235,10 @@ static int mptcp_setsockopt_sol_socket_t + + mptcp_for_each_subflow(msk, subflow) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); +- bool slow = lock_sock_fast(ssk); + ++ lock_sock(ssk); + sock_set_timestamping(ssk, optname, timestamping); +- unlock_sock_fast(ssk, slow); ++ release_sock(ssk); + } + + release_sock(sk); diff --git a/queue-7.0/mptcp-pm-add_addr-rtx-allow-id-0.patch b/queue-7.0/mptcp-pm-add_addr-rtx-allow-id-0.patch new file mode 100644 index 0000000000..ba7a210357 --- /dev/null +++ b/queue-7.0/mptcp-pm-add_addr-rtx-allow-id-0.patch @@ -0,0 +1,39 @@ +From 03f324f3f1f7619a47b9c91282cb12775ab0a2f1 Mon Sep 17 00:00:00 2001 +From: "Matthieu Baerts (NGI0)" +Date: Tue, 5 May 2026 17:00:50 +0200 +Subject: mptcp: pm: ADD_ADDR rtx: allow ID 0 + +From: Matthieu Baerts (NGI0) + +commit 03f324f3f1f7619a47b9c91282cb12775ab0a2f1 upstream. + +ADD_ADDR can be sent for the ID 0, which corresponds to the local +address and port linked to the initial subflow. + +Indeed, this address could be removed, and re-added later on, e.g. what +is done in the "delete re-add signal" MPTCP Join selftests. So no reason +to ignore it. + +Fixes: 00cfd77b9063 ("mptcp: retransmit ADD_ADDR when timeout") +Cc: stable@vger.kernel.org +Reviewed-by: Mat Martineau +Signed-off-by: Matthieu Baerts (NGI0) +Link: https://patch.msgid.link/20260505-net-mptcp-pm-fixes-7-1-rc3-v1-2-fca8091060a4@kernel.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/mptcp/pm.c | 3 --- + 1 file changed, 3 deletions(-) + +--- a/net/mptcp/pm.c ++++ b/net/mptcp/pm.c +@@ -350,9 +350,6 @@ static void mptcp_pm_add_timer(struct ti + if (inet_sk_state_load(sk) == TCP_CLOSE) + return; + +- if (!entry->addr.id) +- return; +- + if (mptcp_pm_should_add_signal_addr(msk)) { + sk_reset_timer(sk, timer, jiffies + TCP_RTO_MAX / 8); + goto out; diff --git a/queue-7.0/mptcp-pm-add_addr-rtx-always-decrease-sk-refcount.patch b/queue-7.0/mptcp-pm-add_addr-rtx-always-decrease-sk-refcount.patch new file mode 100644 index 0000000000..8a43e35ee2 --- /dev/null +++ b/queue-7.0/mptcp-pm-add_addr-rtx-always-decrease-sk-refcount.patch @@ -0,0 +1,55 @@ +From 9634cb35af17019baec21ca648516ce376fa10e6 Mon Sep 17 00:00:00 2001 +From: "Matthieu Baerts (NGI0)" +Date: Tue, 5 May 2026 17:00:52 +0200 +Subject: mptcp: pm: ADD_ADDR rtx: always decrease sk refcount + +From: Matthieu Baerts (NGI0) + +commit 9634cb35af17019baec21ca648516ce376fa10e6 upstream. + +When an ADD_ADDR is retransmitted, the sk is held in sk_reset_timer(). +It should then be released in all cases at the end. + +Some (unlikely) checks were returning directly instead of calling +sock_put() to decrease the refcount. Jump to a new 'exit' label to call +__sock_put() (which will become sock_put() in the next commit) to fix +this potential leak. + +While at it, drop the '!msk' check which cannot happen because it is +never reset, and explicitly mark the remaining one as "unlikely". + +Fixes: 00cfd77b9063 ("mptcp: retransmit ADD_ADDR when timeout") +Cc: stable@vger.kernel.org +Reviewed-by: Mat Martineau +Signed-off-by: Matthieu Baerts (NGI0) +Link: https://patch.msgid.link/20260505-net-mptcp-pm-fixes-7-1-rc3-v1-4-fca8091060a4@kernel.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/mptcp/pm.c | 8 +++----- + 1 file changed, 3 insertions(+), 5 deletions(-) + +--- a/net/mptcp/pm.c ++++ b/net/mptcp/pm.c +@@ -344,11 +344,8 @@ static void mptcp_pm_add_timer(struct ti + + pr_debug("msk=%p\n", msk); + +- if (!msk) +- return; +- +- if (inet_sk_state_load(sk) == TCP_CLOSE) +- return; ++ if (unlikely(inet_sk_state_load(sk) == TCP_CLOSE)) ++ goto exit; + + bh_lock_sock(sk); + if (sock_owned_by_user(sk)) { +@@ -386,6 +383,7 @@ static void mptcp_pm_add_timer(struct ti + + out: + bh_unlock_sock(sk); ++exit: + __sock_put(sk); + } + diff --git a/queue-7.0/mptcp-pm-add_addr-rtx-fix-potential-data-race.patch b/queue-7.0/mptcp-pm-add_addr-rtx-fix-potential-data-race.patch new file mode 100644 index 0000000000..931596c21f --- /dev/null +++ b/queue-7.0/mptcp-pm-add_addr-rtx-fix-potential-data-race.patch @@ -0,0 +1,51 @@ +From 5cd6e0ad79d2615264f63929f8b457ad97ae550d Mon Sep 17 00:00:00 2001 +From: "Matthieu Baerts (NGI0)" +Date: Tue, 5 May 2026 17:00:51 +0200 +Subject: mptcp: pm: ADD_ADDR rtx: fix potential data-race + +From: Matthieu Baerts (NGI0) + +commit 5cd6e0ad79d2615264f63929f8b457ad97ae550d upstream. + +This mptcp_pm_add_timer() helper is executed as a timer callback in +softirq context. To avoid any data races, the socket lock needs to be +held with bh_lock_sock(). + +If the socket is in use, retry again soon after, similar to what is done +with the keepalive timer. + +Fixes: 00cfd77b9063 ("mptcp: retransmit ADD_ADDR when timeout") +Cc: stable@vger.kernel.org +Reviewed-by: Mat Martineau +Signed-off-by: Matthieu Baerts (NGI0) +Link: https://patch.msgid.link/20260505-net-mptcp-pm-fixes-7-1-rc3-v1-3-fca8091060a4@kernel.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/mptcp/pm.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +--- a/net/mptcp/pm.c ++++ b/net/mptcp/pm.c +@@ -350,6 +350,13 @@ static void mptcp_pm_add_timer(struct ti + if (inet_sk_state_load(sk) == TCP_CLOSE) + return; + ++ bh_lock_sock(sk); ++ if (sock_owned_by_user(sk)) { ++ /* Try again later. */ ++ sk_reset_timer(sk, timer, jiffies + HZ / 20); ++ goto out; ++ } ++ + if (mptcp_pm_should_add_signal_addr(msk)) { + sk_reset_timer(sk, timer, jiffies + TCP_RTO_MAX / 8); + goto out; +@@ -378,6 +385,7 @@ static void mptcp_pm_add_timer(struct ti + mptcp_pm_subflow_established(msk); + + out: ++ bh_unlock_sock(sk); + __sock_put(sk); + } + diff --git a/queue-7.0/mptcp-pm-add_addr-rtx-free-sk-if-last.patch b/queue-7.0/mptcp-pm-add_addr-rtx-free-sk-if-last.patch new file mode 100644 index 0000000000..012fbbd7a5 --- /dev/null +++ b/queue-7.0/mptcp-pm-add_addr-rtx-free-sk-if-last.patch @@ -0,0 +1,116 @@ +From b7b9a461569734d33d3259d58d2507adfac107ed Mon Sep 17 00:00:00 2001 +From: "Matthieu Baerts (NGI0)" +Date: Tue, 5 May 2026 17:00:53 +0200 +Subject: mptcp: pm: ADD_ADDR rtx: free sk if last + +From: Matthieu Baerts (NGI0) + +commit b7b9a461569734d33d3259d58d2507adfac107ed upstream. + +When an ADD_ADDR is retransmitted, the sk is held in sk_reset_timer(), +and released at the end. + +If at that moment, it was the last reference being held, the sk would +not be freed. sock_put() should then be called instead of __sock_put(). + +But that's not enough: if it is the last reference, sock_put() will call +sk_free(), which will end up calling sk_stop_timer_sync() on the same +timer, and waiting indefinitely to finish. So it is needed to mark that +the timer is done at the end of the timer handler when it has not been +rescheduled, not to call sk_stop_timer_sync() on "itself". + +Fixes: 00cfd77b9063 ("mptcp: retransmit ADD_ADDR when timeout") +Cc: stable@vger.kernel.org +Reviewed-by: Mat Martineau +Signed-off-by: Matthieu Baerts (NGI0) +Link: https://patch.msgid.link/20260505-net-mptcp-pm-fixes-7-1-rc3-v1-5-fca8091060a4@kernel.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/mptcp/pm.c | 28 ++++++++++++++++++---------- + 1 file changed, 18 insertions(+), 10 deletions(-) + +--- a/net/mptcp/pm.c ++++ b/net/mptcp/pm.c +@@ -16,6 +16,7 @@ struct mptcp_pm_add_entry { + struct list_head list; + struct mptcp_addr_info addr; + u8 retrans_times; ++ bool timer_done; + struct timer_list add_timer; + struct mptcp_sock *sock; + struct rcu_head rcu; +@@ -340,22 +341,22 @@ static void mptcp_pm_add_timer(struct ti + add_timer); + struct mptcp_sock *msk = entry->sock; + struct sock *sk = (struct sock *)msk; +- unsigned int timeout; ++ unsigned int timeout = 0; + + pr_debug("msk=%p\n", msk); + ++ bh_lock_sock(sk); + if (unlikely(inet_sk_state_load(sk) == TCP_CLOSE)) +- goto exit; ++ goto out; + +- bh_lock_sock(sk); + if (sock_owned_by_user(sk)) { + /* Try again later. */ +- sk_reset_timer(sk, timer, jiffies + HZ / 20); ++ timeout = HZ / 20; + goto out; + } + + if (mptcp_pm_should_add_signal_addr(msk)) { +- sk_reset_timer(sk, timer, jiffies + TCP_RTO_MAX / 8); ++ timeout = TCP_RTO_MAX / 8; + goto out; + } + +@@ -373,8 +374,9 @@ static void mptcp_pm_add_timer(struct ti + } + + if (entry->retrans_times < ADD_ADDR_RETRANS_MAX) +- sk_reset_timer(sk, timer, +- jiffies + (timeout << entry->retrans_times)); ++ timeout <<= entry->retrans_times; ++ else ++ timeout = 0; + + spin_unlock_bh(&msk->pm.lock); + +@@ -382,9 +384,13 @@ static void mptcp_pm_add_timer(struct ti + mptcp_pm_subflow_established(msk); + + out: ++ if (timeout) ++ sk_reset_timer(sk, timer, jiffies + timeout); ++ else ++ /* if sock_put calls sk_free: avoid waiting for this timer */ ++ entry->timer_done = true; + bh_unlock_sock(sk); +-exit: +- __sock_put(sk); ++ sock_put(sk); + } + + struct mptcp_pm_add_entry * +@@ -447,6 +453,7 @@ bool mptcp_pm_alloc_anno_list(struct mpt + + timer_setup(&add_entry->add_timer, mptcp_pm_add_timer, 0); + reset_timer: ++ add_entry->timer_done = false; + timeout = mptcp_adjust_add_addr_timeout(msk); + if (timeout) + sk_reset_timer(sk, &add_entry->add_timer, jiffies + timeout); +@@ -467,7 +474,8 @@ static void mptcp_pm_free_anno_list(stru + spin_unlock_bh(&msk->pm.lock); + + list_for_each_entry_safe(entry, tmp, &free_list, list) { +- sk_stop_timer_sync(sk, &entry->add_timer); ++ if (!entry->timer_done) ++ sk_stop_timer_sync(sk, &entry->add_timer); + kfree_rcu(entry, rcu); + } + } diff --git a/queue-7.0/mptcp-pm-add_addr-rtx-resched-blocked-add_addr-quicker.patch b/queue-7.0/mptcp-pm-add_addr-rtx-resched-blocked-add_addr-quicker.patch new file mode 100644 index 0000000000..935ce7cd2e --- /dev/null +++ b/queue-7.0/mptcp-pm-add_addr-rtx-resched-blocked-add_addr-quicker.patch @@ -0,0 +1,54 @@ +From 3cf12492891c4b5ff54dda404a2de4ec54c9e1b5 Mon Sep 17 00:00:00 2001 +From: "Matthieu Baerts (NGI0)" +Date: Tue, 5 May 2026 17:00:54 +0200 +Subject: mptcp: pm: ADD_ADDR rtx: resched blocked ADD_ADDR quicker + +From: Matthieu Baerts (NGI0) + +commit 3cf12492891c4b5ff54dda404a2de4ec54c9e1b5 upstream. + +When an ADD_ADDR needs to be retransmitted and another one has already +been prepared -- e.g. multiple ADD_ADDRs have been sent in a row and +need to be retransmitted later -- this additional retransmission will +need to wait. + +In this case, the timer was reset to TCP_RTO_MAX / 8, which is ~15 +seconds. This delay is unnecessary long: it should just be rescheduled +at the next opportunity, e.g. after the retransmission timeout. + +Without this modification, some issues can be seen from time to time in +the selftests when multiple ADD_ADDRs are sent, and the host takes time +to process them, e.g. the "signal addresses, ADD_ADDR timeout" MPTCP +Join selftest, especially with a debug kernel config. + +Note that on older kernels, 'timeout' is not available. It should be +enough to replace it by one second (HZ). + +Fixes: 00cfd77b9063 ("mptcp: retransmit ADD_ADDR when timeout") +Cc: stable@vger.kernel.org +Reviewed-by: Mat Martineau +Signed-off-by: Matthieu Baerts (NGI0) +Link: https://patch.msgid.link/20260505-net-mptcp-pm-fixes-7-1-rc3-v1-6-fca8091060a4@kernel.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/mptcp/pm.c | 7 +------ + 1 file changed, 1 insertion(+), 6 deletions(-) + +--- a/net/mptcp/pm.c ++++ b/net/mptcp/pm.c +@@ -355,13 +355,8 @@ static void mptcp_pm_add_timer(struct ti + goto out; + } + +- if (mptcp_pm_should_add_signal_addr(msk)) { +- timeout = TCP_RTO_MAX / 8; +- goto out; +- } +- + timeout = mptcp_adjust_add_addr_timeout(msk); +- if (!timeout) ++ if (!timeout || mptcp_pm_should_add_signal_addr(msk)) + goto out; + + spin_lock_bh(&msk->pm.lock); diff --git a/queue-7.0/mptcp-pm-add_addr-rtx-return-early-if-no-retrans.patch b/queue-7.0/mptcp-pm-add_addr-rtx-return-early-if-no-retrans.patch new file mode 100644 index 0000000000..6447ca1341 --- /dev/null +++ b/queue-7.0/mptcp-pm-add_addr-rtx-return-early-if-no-retrans.patch @@ -0,0 +1,37 @@ +From 62a9b19dce77e72426f049fb99b9d1d032b9a8ea Mon Sep 17 00:00:00 2001 +From: "Matthieu Baerts (NGI0)" +Date: Tue, 5 May 2026 17:00:56 +0200 +Subject: mptcp: pm: ADD_ADDR rtx: return early if no retrans + +From: Matthieu Baerts (NGI0) + +commit 62a9b19dce77e72426f049fb99b9d1d032b9a8ea upstream. + +No need to iterate over all subflows if there is no retransmission +needed. + +Exit early in this case then. + +Fixes: 30549eebc4d8 ("mptcp: make ADD_ADDR retransmission timeout adaptive") +Cc: stable@vger.kernel.org +Reviewed-by: Mat Martineau +Signed-off-by: Matthieu Baerts (NGI0) +Link: https://patch.msgid.link/20260505-net-mptcp-pm-fixes-7-1-rc3-v1-8-fca8091060a4@kernel.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/mptcp/pm.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/net/mptcp/pm.c ++++ b/net/mptcp/pm.c +@@ -311,6 +311,9 @@ static unsigned int mptcp_adjust_add_add + struct mptcp_subflow_context *subflow; + unsigned int max = 0, max_stale = 0; + ++ if (!rto) ++ return 0; ++ + mptcp_for_each_subflow(msk, subflow) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + struct inet_connection_sock *icsk = inet_csk(ssk); diff --git a/queue-7.0/mptcp-pm-kernel-correctly-retransmit-add_addr-id-0.patch b/queue-7.0/mptcp-pm-kernel-correctly-retransmit-add_addr-id-0.patch new file mode 100644 index 0000000000..298a83f5b5 --- /dev/null +++ b/queue-7.0/mptcp-pm-kernel-correctly-retransmit-add_addr-id-0.patch @@ -0,0 +1,66 @@ +From b12014d2d36eaed4e4bec5f1ac7e91110eeb100d Mon Sep 17 00:00:00 2001 +From: "Matthieu Baerts (NGI0)" +Date: Tue, 5 May 2026 17:00:49 +0200 +Subject: mptcp: pm: kernel: correctly retransmit ADD_ADDR ID 0 + +From: Matthieu Baerts (NGI0) + +commit b12014d2d36eaed4e4bec5f1ac7e91110eeb100d upstream. + +When adding the ADD_ADDR to the list, the address including the IP, port +and ID are copied. On the other hand, when the endpoint corresponds to +the one from the initial subflow, the ID is set to 0, as specified by +the MPTCP protocol. + +The issue is that the ID was reset after having copied the ID in the +ADD_ADDR entry. So the retransmission was done, but using a different ID +than the initial one. + +Fixes: 8b8ed1b429f8 ("mptcp: pm: reuse ID 0 after delete and re-add") +Cc: stable@vger.kernel.org +Reviewed-by: Mat Martineau +Signed-off-by: Matthieu Baerts (NGI0) +Link: https://patch.msgid.link/20260505-net-mptcp-pm-fixes-7-1-rc3-v1-1-fca8091060a4@kernel.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/mptcp/pm_kernel.c | 13 ++++++++----- + 1 file changed, 8 insertions(+), 5 deletions(-) + +--- a/net/mptcp/pm_kernel.c ++++ b/net/mptcp/pm_kernel.c +@@ -347,6 +347,8 @@ static void mptcp_pm_create_subflow_or_s + + /* check first for announce */ + if (msk->pm.add_addr_signaled < endp_signal_max) { ++ u8 endp_id; ++ + /* due to racing events on both ends we can reach here while + * previous add address is still running: if we invoke now + * mptcp_pm_announce_addr(), that will fail and the +@@ -360,19 +362,20 @@ static void mptcp_pm_create_subflow_or_s + if (!select_signal_address(pernet, msk, &local)) + goto subflow; + ++ /* Special case for ID0: set the correct ID */ ++ endp_id = local.addr.id; ++ if (endp_id == msk->mpc_endpoint_id) ++ local.addr.id = 0; ++ + /* If the alloc fails, we are on memory pressure, not worth + * continuing, and trying to create subflows. + */ + if (!mptcp_pm_alloc_anno_list(msk, &local.addr)) + return; + +- __clear_bit(local.addr.id, msk->pm.id_avail_bitmap); ++ __clear_bit(endp_id, msk->pm.id_avail_bitmap); + msk->pm.add_addr_signaled++; + +- /* Special case for ID0: set the correct ID */ +- if (local.addr.id == msk->mpc_endpoint_id) +- local.addr.id = 0; +- + mptcp_pm_announce_addr(msk, &local.addr, false); + mptcp_pm_addr_send_ack(msk); + diff --git a/queue-7.0/mptcp-pm-kernel-reset-fullmesh-counter-after-flush.patch b/queue-7.0/mptcp-pm-kernel-reset-fullmesh-counter-after-flush.patch new file mode 100644 index 0000000000..e78272d81b --- /dev/null +++ b/queue-7.0/mptcp-pm-kernel-reset-fullmesh-counter-after-flush.patch @@ -0,0 +1,40 @@ +From 1774d3cf3cf17baaf30c095606cda496268283b3 Mon Sep 17 00:00:00 2001 +From: "Matthieu Baerts (NGI0)" +Date: Mon, 27 Apr 2026 21:54:36 +0200 +Subject: mptcp: pm: kernel: reset fullmesh counter after flush + +From: Matthieu Baerts (NGI0) + +commit 1774d3cf3cf17baaf30c095606cda496268283b3 upstream. + +This variable counts how many MPTCP endpoints have a 'fullmesh' flag +set. After having flushed all MPTCP endpoints, it is then needed to +reset this counter. + +Without this reset, this counter exposed to the userspace is wrong, but +also non-fullmesh endpoints added after the flush will not be taken into +account to create subflows in reaction to ADD_ADDRs. + +Fixes: f88191c7f361 ("mptcp: pm: in-kernel: record fullmesh endp nb") +Cc: stable@vger.kernel.org +Reported-by: Sashiko +Closes: https://sashiko.dev/#/patchset/20260422-mptcp-inc-limits-v6-0-903181771530%40kernel.org?part=15 +Reviewed-by: Mat Martineau +Signed-off-by: Matthieu Baerts (NGI0) +Link: https://patch.msgid.link/20260427-net-mptcp-misc-fixes-7-1-rc2-v1-4-7432b7f279fa@kernel.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/mptcp/pm_kernel.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/mptcp/pm_kernel.c ++++ b/net/mptcp/pm_kernel.c +@@ -1278,6 +1278,7 @@ static void __reset_counters(struct pm_n + WRITE_ONCE(pernet->endp_signal_max, 0); + WRITE_ONCE(pernet->endp_subflow_max, 0); + WRITE_ONCE(pernet->endp_laminar_max, 0); ++ WRITE_ONCE(pernet->endp_fullmesh_max, 0); + pernet->endpoints = 0; + } + diff --git a/queue-7.0/mptcp-pm-prio-skip-closed-subflows.patch b/queue-7.0/mptcp-pm-prio-skip-closed-subflows.patch new file mode 100644 index 0000000000..aa3a7dd10e --- /dev/null +++ b/queue-7.0/mptcp-pm-prio-skip-closed-subflows.patch @@ -0,0 +1,41 @@ +From 166b78344031bf7ac9f55cb5282776cfd85f220e Mon Sep 17 00:00:00 2001 +From: "Matthieu Baerts (NGI0)" +Date: Tue, 5 May 2026 17:00:57 +0200 +Subject: mptcp: pm: prio: skip closed subflows + +From: Matthieu Baerts (NGI0) + +commit 166b78344031bf7ac9f55cb5282776cfd85f220e upstream. + +When sending an MP_PRIO, closed subflows need to be skipped. + +This fixes the case where the initial subflow got closed, re-opened +later, then an MP_PRIO is needed for the same local address. + +Note that explicit MP_PRIO cannot be sent during the 3WHS, so it is fine +to use __mptcp_subflow_active(). + +Fixes: 067065422fcd ("mptcp: add the outgoing MP_PRIO support") +Cc: stable@vger.kernel.org +Fixes: b29fcfb54cd7 ("mptcp: full disconnect implementation") +Reviewed-by: Mat Martineau +Signed-off-by: Matthieu Baerts (NGI0) +Link: https://patch.msgid.link/20260505-net-mptcp-pm-fixes-7-1-rc3-v1-9-fca8091060a4@kernel.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/mptcp/pm.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/net/mptcp/pm.c ++++ b/net/mptcp/pm.c +@@ -283,6 +283,9 @@ int mptcp_pm_mp_prio_send_ack(struct mpt + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + struct mptcp_addr_info local, remote; + ++ if (!__mptcp_subflow_active(subflow)) ++ continue; ++ + mptcp_local_address((struct sock_common *)ssk, &local); + if (!mptcp_addresses_equal(&local, addr, addr->port)) + continue; diff --git a/queue-7.0/mptcp-sockopt-increase-seq-in-mptcp_setsockopt_all_sf.patch b/queue-7.0/mptcp-sockopt-increase-seq-in-mptcp_setsockopt_all_sf.patch new file mode 100644 index 0000000000..4b4efc4c1f --- /dev/null +++ b/queue-7.0/mptcp-sockopt-increase-seq-in-mptcp_setsockopt_all_sf.patch @@ -0,0 +1,41 @@ +From 70ece9d7021c54cf40c72b31b066e9088f5f75f5 Mon Sep 17 00:00:00 2001 +From: "Matthieu Baerts (NGI0)" +Date: Fri, 1 May 2026 21:35:37 +0200 +Subject: mptcp: sockopt: increase seq in mptcp_setsockopt_all_sf + +From: Matthieu Baerts (NGI0) + +commit 70ece9d7021c54cf40c72b31b066e9088f5f75f5 upstream. + +mptcp_setsockopt_all_sf() was missing a call to sockopt_seq_inc(). This +is required not to cause missing synchronization for newer subflows +created later on. + +This helper is called each time a socket option is set on subflows, and +future ones will need to inherit this option after their creation. + +Fixes: 51c5fd09e1b4 ("mptcp: add TCP_MAXSEG sockopt support") +Cc: stable@vger.kernel.org +Suggested-by: Paolo Abeni +Reviewed-by: Mat Martineau +Signed-off-by: Matthieu Baerts (NGI0) +Link: https://patch.msgid.link/20260501-net-mptcp-misc-fixes-7-1-rc3-v1-4-b70118df778e@kernel.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/mptcp/sockopt.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/net/mptcp/sockopt.c ++++ b/net/mptcp/sockopt.c +@@ -812,6 +812,10 @@ static int mptcp_setsockopt_all_sf(struc + if (ret) + break; + } ++ ++ if (!ret) ++ sockopt_seq_inc(msk); ++ + return ret; + } + diff --git a/queue-7.0/mptcp-sockopt-set-timestamp-flags-on-subflow-socket-not-msk.patch b/queue-7.0/mptcp-sockopt-set-timestamp-flags-on-subflow-socket-not-msk.patch new file mode 100644 index 0000000000..3fda447f18 --- /dev/null +++ b/queue-7.0/mptcp-sockopt-set-timestamp-flags-on-subflow-socket-not-msk.patch @@ -0,0 +1,50 @@ +From 5f95c21fc23a7ef22b4d27d1ed9bb55557ffb926 Mon Sep 17 00:00:00 2001 +From: Gang Yan +Date: Mon, 27 Apr 2026 21:54:33 +0200 +Subject: mptcp: sockopt: set timestamp flags on subflow socket, not msk + +From: Gang Yan + +commit 5f95c21fc23a7ef22b4d27d1ed9bb55557ffb926 upstream. + +Both mptcp_setsockopt_sol_socket_tstamp() and +mptcp_setsockopt_sol_socket_timestamping() iterate over subflows, +acquire the subflow socket lock, but then erroneously pass the MPTCP +msk socket to sock_set_timestamp() / sock_set_timestamping() instead +of the subflow ssk. As a result, the timestamp flags are set on the +wrong socket and have no effect on the actual subflows. + +Pass ssk instead of sk to both helpers. + +Fixes: 9061f24bf82e ("mptcp: sockopt: propagate timestamp request to subflows") +Cc: stable@vger.kernel.org +Signed-off-by: Gang Yan +Reviewed-by: Matthieu Baerts (NGI0) +Signed-off-by: Matthieu Baerts (NGI0) +Link: https://patch.msgid.link/20260427-net-mptcp-misc-fixes-7-1-rc2-v1-1-7432b7f279fa@kernel.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/mptcp/sockopt.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/net/mptcp/sockopt.c ++++ b/net/mptcp/sockopt.c +@@ -161,7 +161,7 @@ static int mptcp_setsockopt_sol_socket_t + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + bool slow = lock_sock_fast(ssk); + +- sock_set_timestamp(sk, optname, !!val); ++ sock_set_timestamp(ssk, optname, !!val); + unlock_sock_fast(ssk, slow); + } + +@@ -237,7 +237,7 @@ static int mptcp_setsockopt_sol_socket_t + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + bool slow = lock_sock_fast(ssk); + +- sock_set_timestamping(sk, optname, timestamping); ++ sock_set_timestamping(ssk, optname, timestamping); + unlock_sock_fast(ssk, slow); + } + diff --git a/queue-7.0/mptcp-use-mpjoinsynackhmacfailure-for-synack-hmac-failure.patch b/queue-7.0/mptcp-use-mpjoinsynackhmacfailure-for-synack-hmac-failure.patch new file mode 100644 index 0000000000..35dad71f11 --- /dev/null +++ b/queue-7.0/mptcp-use-mpjoinsynackhmacfailure-for-synack-hmac-failure.patch @@ -0,0 +1,45 @@ +From c4a99a921949cddc590b22bb14eeb23dffcc3ba6 Mon Sep 17 00:00:00 2001 +From: Shardul Bankar +Date: Fri, 1 May 2026 21:35:34 +0200 +Subject: mptcp: use MPJoinSynAckHMacFailure for SynAck HMAC failure + +From: Shardul Bankar + +commit c4a99a921949cddc590b22bb14eeb23dffcc3ba6 upstream. + +In subflow_finish_connect(), HMAC validation of the server's HMAC +in SYN/ACK + MP_JOIN increments MPTCP_MIB_JOINACKMAC ("HMAC was +wrong on ACK + MP_JOIN") on failure. The function processes the +SYN/ACK, not the ACK; the matching MPTCP_MIB_JOINSYNACKMAC counter +("HMAC was wrong on SYN/ACK + MP_JOIN") exists but is not +incremented anywhere in the tree. + +The mirror site on the server, subflow_syn_recv_sock(), already +uses JOINACKMAC correctly for ACK HMAC failure. Use JOINSYNACKMAC +at the SYN/ACK validation site so each counter reflects the packet +whose HMAC actually failed. + +Suggested-by: Matthieu Baerts (NGI0) +Fixes: fc518953bc9c ("mptcp: add and use MIB counter infrastructure") +Cc: stable@vger.kernel.org +Signed-off-by: Shardul Bankar +Reviewed-by: Matthieu Baerts (NGI0) +Signed-off-by: Matthieu Baerts (NGI0) +Link: https://patch.msgid.link/20260501-net-mptcp-misc-fixes-7-1-rc3-v1-1-b70118df778e@kernel.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/mptcp/subflow.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/mptcp/subflow.c ++++ b/net/mptcp/subflow.c +@@ -581,7 +581,7 @@ static void subflow_finish_connect(struc + subflow->backup); + + if (!subflow_thmac_valid(subflow)) { +- MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINACKMAC); ++ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKMAC); + subflow->reset_reason = MPTCP_RST_EMPTCP; + goto do_reset; + } diff --git a/queue-7.0/mptcp-use-mptcp_rst_emptcp-for-ack-hmac-validation-failure.patch b/queue-7.0/mptcp-use-mptcp_rst_emptcp-for-ack-hmac-validation-failure.patch new file mode 100644 index 0000000000..c05db45fdc --- /dev/null +++ b/queue-7.0/mptcp-use-mptcp_rst_emptcp-for-ack-hmac-validation-failure.patch @@ -0,0 +1,44 @@ +From a6da02d4c00fdda2417e42ad2b762a9209e6cc49 Mon Sep 17 00:00:00 2001 +From: Shardul Bankar +Date: Fri, 1 May 2026 21:35:35 +0200 +Subject: mptcp: use MPTCP_RST_EMPTCP for ACK HMAC validation failure + +From: Shardul Bankar + +commit a6da02d4c00fdda2417e42ad2b762a9209e6cc49 upstream. + +When HMAC validation fails on a received ACK + MP_JOIN in +subflow_syn_recv_sock(), the subflow is reset with reason +MPTCP_RST_EPROHIBIT ("Administratively prohibited"). This is +incorrect: HMAC validation failure is an MPTCP protocol-level +error, not an administrative policy denial. + +The mirror site on the client, in subflow_finish_connect(), already +uses MPTCP_RST_EMPTCP ("MPTCP-specific error") for the same kind of +HMAC failure on the SYN/ACK + MP_JOIN. Use the same reason on the +server side for symmetry and accuracy. + +Suggested-by: Matthieu Baerts (NGI0) +Fixes: 443041deb5ef ("mptcp: fix NULL pointer in can_accept_new_subflow") +Cc: stable@vger.kernel.org +Signed-off-by: Shardul Bankar +Reviewed-by: Matthieu Baerts (NGI0) +Signed-off-by: Matthieu Baerts (NGI0) +Link: https://patch.msgid.link/20260501-net-mptcp-misc-fixes-7-1-rc3-v1-2-b70118df778e@kernel.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/mptcp/subflow.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/mptcp/subflow.c ++++ b/net/mptcp/subflow.c +@@ -908,7 +908,7 @@ create_child: + + if (!subflow_hmac_valid(subflow_req, &mp_opt)) { + SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC); +- subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT); ++ subflow_add_reset_reason(skb, MPTCP_RST_EMPTCP); + goto dispose_child; + } + diff --git a/queue-7.0/pci-aer-clear-only-error-bits-in-pcie-device-status.patch b/queue-7.0/pci-aer-clear-only-error-bits-in-pcie-device-status.patch new file mode 100644 index 0000000000..daf6a8f9d7 --- /dev/null +++ b/queue-7.0/pci-aer-clear-only-error-bits-in-pcie-device-status.patch @@ -0,0 +1,64 @@ +From a8aeea1bf3c80cc87983689e0118770e019bd4f3 Mon Sep 17 00:00:00 2001 +From: Shuai Xue +Date: Wed, 11 Feb 2026 20:46:24 +0800 +Subject: PCI/AER: Clear only error bits in PCIe Device Status + +From: Shuai Xue + +commit a8aeea1bf3c80cc87983689e0118770e019bd4f3 upstream. + +Currently, pcie_clear_device_status() clears the entire PCIe Device Status +register (PCI_EXP_DEVSTA) by writing back the value read from the register, +which affects not only the error status bits but also other writable bits. + +According to PCIe r7.0, sec 7.5.3.5, this register contains: + + - RW1C error status bits (CED, NFED, FED, URD at bits 0-3): These are the + four error status bits that need to be cleared. + + - Read-only bits (AUXPD at bit 4, TRPND at bit 5): Writing to these has + no effect. + + - Emergency Power Reduction Detected (bit 6): A RW1C non-error bit + introduced in PCIe r5.0 (2019). This is currently the only writable + non-error bit in the Device Status register. Unconditionally clearing + this bit can interfere with other software components that rely on this + power management indication. + + - Reserved bits (RsvdZ): These bits are required to be written as zero. + Writing 1s to them (as the current implementation may do) violates the + specification. + +To prevent unintended side effects, modify pcie_clear_device_status() to +only write 1s to the four error status bits (CED, NFED, FED, URD), leaving +the Emergency Power Reduction Detected bit and reserved bits unaffected. + +Fixes: ec752f5d54d7 ("PCI/AER: Clear device status bits during ERR_FATAL and ERR_NONFATAL") +Suggested-by: Lukas Wunner +Signed-off-by: Shuai Xue +Signed-off-by: Bjorn Helgaas +Reviewed-by: Kuppuswamy Sathyanarayanan +Reviewed-by: Lukas Wunner +Cc: stable@vger.kernel.org +Link: https://patch.msgid.link/20260211124624.49656-1-xueshuai@linux.alibaba.com +Signed-off-by: Greg Kroah-Hartman +--- + drivers/pci/pci.c | 7 +++---- + 1 file changed, 3 insertions(+), 4 deletions(-) + +--- a/drivers/pci/pci.c ++++ b/drivers/pci/pci.c +@@ -2241,10 +2241,9 @@ EXPORT_SYMBOL_GPL(pci_set_pcie_reset_sta + #ifdef CONFIG_PCIEAER + void pcie_clear_device_status(struct pci_dev *dev) + { +- u16 sta; +- +- pcie_capability_read_word(dev, PCI_EXP_DEVSTA, &sta); +- pcie_capability_write_word(dev, PCI_EXP_DEVSTA, sta); ++ pcie_capability_write_word(dev, PCI_EXP_DEVSTA, ++ PCI_EXP_DEVSTA_CED | PCI_EXP_DEVSTA_NFED | ++ PCI_EXP_DEVSTA_FED | PCI_EXP_DEVSTA_URD); + } + #endif + diff --git a/queue-7.0/pci-aer-stop-ruling-out-unbound-devices-as-error-source.patch b/queue-7.0/pci-aer-stop-ruling-out-unbound-devices-as-error-source.patch new file mode 100644 index 0000000000..5f340f8fc9 --- /dev/null +++ b/queue-7.0/pci-aer-stop-ruling-out-unbound-devices-as-error-source.patch @@ -0,0 +1,53 @@ +From 1ab4a3c805084d752ec571efc78272295a9f2f74 Mon Sep 17 00:00:00 2001 +From: Lukas Wunner +Date: Fri, 27 Mar 2026 10:56:43 +0100 +Subject: PCI/AER: Stop ruling out unbound devices as error source + +From: Lukas Wunner + +commit 1ab4a3c805084d752ec571efc78272295a9f2f74 upstream. + +When searching for the error source, the AER driver rules out devices whose +enable_cnt is zero. This was introduced in 2009 by commit 28eb27cf0839 +("PCI AER: support invalid error source IDs") without providing a +rationale. + +Drivers typically call pci_enable_device() on probe, hence the enable_cnt +check essentially filters out unbound devices. At the time of the commit, +drivers had to opt in to AER by calling pci_enable_pcie_error_reporting() +and so any AER-enabled device could be assumed to be bound to a driver. +The check thus made sense because it allowed skipping config space accesses +to devices which were known not to be the error source. + +But since 2022, AER is universally enabled on all devices when they are +enumerated, cf. commit f26e58bf6f54 ("PCI/AER: Enable error reporting when +AER is native"). + +Errors may very well be reported by unbound devices, e.g. due to link +instability. By ruling them out as error source, errors reported by them +are neither logged nor cleared. When they do get bound and another error +occurs, the earlier error is reported together with the new error, which +may confuse users. Stop doing so. + +Fixes: f26e58bf6f54 ("PCI/AER: Enable error reporting when AER is native") +Signed-off-by: Lukas Wunner +Signed-off-by: Bjorn Helgaas +Reviewed-by: Stefan Roese +Cc: stable@vger.kernel.org # v6.0+ +Link: https://patch.msgid.link/734338c2e8b669db5a5a3b45d34131b55ffebfca.1774605029.git.lukas@wunner.de +Signed-off-by: Greg Kroah-Hartman +--- + drivers/pci/pcie/aer.c | 2 -- + 1 file changed, 2 deletions(-) + +--- a/drivers/pci/pcie/aer.c ++++ b/drivers/pci/pcie/aer.c +@@ -1041,8 +1041,6 @@ static bool is_error_source(struct pci_d + * 3) There are multiple errors and prior ID comparing fails; + * We check AER status registers to find possible reporter. + */ +- if (atomic_read(&dev->enable_cnt) == 0) +- return false; + + /* Check if AER is enabled */ + pcie_capability_read_word(dev, PCI_EXP_DEVCTL, ®16); diff --git a/queue-7.0/pci-aspm-fix-pci_clear_and_set_config_dword-usage.patch b/queue-7.0/pci-aspm-fix-pci_clear_and_set_config_dword-usage.patch new file mode 100644 index 0000000000..e1728d5d4e --- /dev/null +++ b/queue-7.0/pci-aspm-fix-pci_clear_and_set_config_dword-usage.patch @@ -0,0 +1,91 @@ +From cc33985d26c92a5c908c0185239c59ec35b8637c Mon Sep 17 00:00:00 2001 +From: Lukas Wunner +Date: Mon, 16 Feb 2026 08:46:13 +0100 +Subject: PCI/ASPM: Fix pci_clear_and_set_config_dword() usage +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Lukas Wunner + +commit cc33985d26c92a5c908c0185239c59ec35b8637c upstream. + +When aspm_calc_l12_info() programs the L1 PM Substates Control 1 register +fields Common_Mode_Restore_Time, LTR_L1.2_THRESHOLD_Value and _Scale, it +invokes pci_clear_and_set_config_dword() in an incorrect way: + +For the bits to clear it selects those corresponding to the field. So far +so good. But for the bits to set it passes a full register value. +pci_clear_and_set_config_dword() performs a boolean OR operation which +sets all bits of that value, not just the ones that were just cleared. + +Thus, when setting the LTR_L1.2_THRESHOLD_Value and _Scale on the child of +an ASPM link, aspm_calc_l12_info() also sets the Common_Mode_Restore_Time. +That's a spec violation: PCIe r7.0 sec 7.8.3.3 says this field is RsvdP +for Upstream Ports. On Adrià's Pixelbook Eve, Common_Mode_Restore_Time +of the Intel 7265 "Stone Peak" wifi card is zero, yet aspm_calc_l12_info() +does not preserve the zero bits but instead programs the value calculated +for the Root Port into the wifi card. + +Likewise, when setting the Common_Mode_Restore_Time on the Root Port, +aspm_calc_l12_info() also changes the LTR_L1.2_THRESHOLD_Value and _Scale +from the initial 163840 nsec to 237568 nsec (due to ORing those fields), +only to reduce it afterwards to 106496 nsec. + +Amend all invocations of pci_clear_and_set_config_dword() to only set bits +which are cleared. + +Finally, when setting the T_POWER_ON_Value and _Scale on the Root Port and +the wifi card, aspm_calc_l12_info() fails to preserve bits declared RsvdP +and instead overwrites them with zeroes. Replace pci_write_config_dword() +with pci_clear_and_set_config_dword() to avoid this. + +Fixes: aeda9adebab8 ("PCI/ASPM: Configure L1 substate settings") +Link: https://bugzilla.kernel.org/show_bug.cgi?id=220705#c22 +Signed-off-by: Lukas Wunner +Signed-off-by: Bjorn Helgaas +Tested-by: Adrià Vilanova Martínez +Cc: stable@vger.kernel.org # v4.11+ +Link: https://patch.msgid.link/5c1752d7512eed0f4ea57b84b12d7ee08ca61fc5.1771226659.git.lukas@wunner.de +Signed-off-by: Greg Kroah-Hartman +--- + drivers/pci/pcie/aspm.c | 17 ++++++++++++----- + 1 file changed, 12 insertions(+), 5 deletions(-) + +--- a/drivers/pci/pcie/aspm.c ++++ b/drivers/pci/pcie/aspm.c +@@ -706,22 +706,29 @@ static void aspm_calc_l12_info(struct pc + } + + /* Program T_POWER_ON times in both ports */ +- pci_write_config_dword(parent, parent->l1ss + PCI_L1SS_CTL2, ctl2); +- pci_write_config_dword(child, child->l1ss + PCI_L1SS_CTL2, ctl2); ++ pci_clear_and_set_config_dword(parent, parent->l1ss + PCI_L1SS_CTL2, ++ PCI_L1SS_CTL2_T_PWR_ON_VALUE | ++ PCI_L1SS_CTL2_T_PWR_ON_SCALE, ctl2); ++ pci_clear_and_set_config_dword(child, child->l1ss + PCI_L1SS_CTL2, ++ PCI_L1SS_CTL2_T_PWR_ON_VALUE | ++ PCI_L1SS_CTL2_T_PWR_ON_SCALE, ctl2); + + /* Program Common_Mode_Restore_Time in upstream device */ + pci_clear_and_set_config_dword(parent, parent->l1ss + PCI_L1SS_CTL1, +- PCI_L1SS_CTL1_CM_RESTORE_TIME, ctl1); ++ PCI_L1SS_CTL1_CM_RESTORE_TIME, ++ ctl1 & PCI_L1SS_CTL1_CM_RESTORE_TIME); + + /* Program LTR_L1.2_THRESHOLD time in both ports */ + pci_clear_and_set_config_dword(parent, parent->l1ss + PCI_L1SS_CTL1, + PCI_L1SS_CTL1_LTR_L12_TH_VALUE | + PCI_L1SS_CTL1_LTR_L12_TH_SCALE, +- ctl1); ++ ctl1 & (PCI_L1SS_CTL1_LTR_L12_TH_VALUE | ++ PCI_L1SS_CTL1_LTR_L12_TH_SCALE)); + pci_clear_and_set_config_dword(child, child->l1ss + PCI_L1SS_CTL1, + PCI_L1SS_CTL1_LTR_L12_TH_VALUE | + PCI_L1SS_CTL1_LTR_L12_TH_SCALE, +- ctl1); ++ ctl1 & (PCI_L1SS_CTL1_LTR_L12_TH_VALUE | ++ PCI_L1SS_CTL1_LTR_L12_TH_SCALE)); + + if (pl1_2_enables || cl1_2_enables) { + pci_clear_and_set_config_dword(parent, diff --git a/queue-7.0/perf-x86-intel-always-reprogram-acr-events-to-prevent-stale-masks.patch b/queue-7.0/perf-x86-intel-always-reprogram-acr-events-to-prevent-stale-masks.patch new file mode 100644 index 0000000000..835479fb85 --- /dev/null +++ b/queue-7.0/perf-x86-intel-always-reprogram-acr-events-to-prevent-stale-masks.patch @@ -0,0 +1,69 @@ +From 8ba0b706a485b1e607594cf4210786d517ad1611 Mon Sep 17 00:00:00 2001 +From: Dapeng Mi +Date: Thu, 30 Apr 2026 08:25:55 +0800 +Subject: perf/x86/intel: Always reprogram ACR events to prevent stale masks + +From: Dapeng Mi + +commit 8ba0b706a485b1e607594cf4210786d517ad1611 upstream. + +Members of an ACR group are logically linked via a bitmask of their +hardware counter indices. If some members of the group are assigned new +hardware counters during rescheduling, even events that keep their +original counter index must be updated with a new mask. + +Without this, an event will continue to use a stale acr_mask that +references the old indices of its group peers. Ensure all ACR events are +reprogrammed during the scheduling path to maintain consistency across +the group. + +Fixes: ec980e4facef ("perf/x86/intel: Support auto counter reload") +Signed-off-by: Dapeng Mi +Signed-off-by: Peter Zijlstra (Intel) +Cc: stable@vger.kernel.org +Link: https://patch.msgid.link/20260430002558.712334-3-dapeng1.mi@linux.intel.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/events/core.c | 13 ++++++++----- + 1 file changed, 8 insertions(+), 5 deletions(-) + +--- a/arch/x86/events/core.c ++++ b/arch/x86/events/core.c +@@ -1294,13 +1294,16 @@ int x86_perf_rdpmc_index(struct perf_eve + return event->hw.event_base_rdpmc; + } + +-static inline int match_prev_assignment(struct hw_perf_event *hwc, ++static inline int match_prev_assignment(struct perf_event *event, + struct cpu_hw_events *cpuc, + int i) + { ++ struct hw_perf_event *hwc = &event->hw; ++ + return hwc->idx == cpuc->assign[i] && +- hwc->last_cpu == smp_processor_id() && +- hwc->last_tag == cpuc->tags[i]; ++ hwc->last_cpu == smp_processor_id() && ++ hwc->last_tag == cpuc->tags[i] && ++ !is_acr_event_group(event); + } + + static void x86_pmu_start(struct perf_event *event, int flags); +@@ -1346,7 +1349,7 @@ static void x86_pmu_enable(struct pmu *p + * - no other event has used the counter since + */ + if (hwc->idx == -1 || +- match_prev_assignment(hwc, cpuc, i)) ++ match_prev_assignment(event, cpuc, i)) + continue; + + /* +@@ -1367,7 +1370,7 @@ static void x86_pmu_enable(struct pmu *p + event = cpuc->event_list[i]; + hwc = &event->hw; + +- if (!match_prev_assignment(hwc, cpuc, i)) ++ if (!match_prev_assignment(event, cpuc, i)) + x86_assign_hw_event(event, cpuc, i); + else if (i < n_running) + continue; diff --git a/queue-7.0/perf-x86-intel-disable-pmi-for-self-reloaded-acr-events.patch b/queue-7.0/perf-x86-intel-disable-pmi-for-self-reloaded-acr-events.patch new file mode 100644 index 0000000000..1272f78596 --- /dev/null +++ b/queue-7.0/perf-x86-intel-disable-pmi-for-self-reloaded-acr-events.patch @@ -0,0 +1,93 @@ +From 1271aeccc307066315b2d3b0d5af2510e27018b5 Mon Sep 17 00:00:00 2001 +From: Dapeng Mi +Date: Thu, 30 Apr 2026 08:25:56 +0800 +Subject: perf/x86/intel: Disable PMI for self-reloaded ACR events + +From: Dapeng Mi + +commit 1271aeccc307066315b2d3b0d5af2510e27018b5 upstream. + +On platforms with Auto Counter Reload (ACR) support, such as NVL, a +"NMI received for unknown reason 30" warning is observed when running +multiple events in a group with ACR enabled: + + $ perf record -e '{instructions/period=20000,acr_mask=0x2/u,\ + cycles/period=40000,acr_mask=0x3/u}' ./test + +The warning occurs because the Performance Monitoring Interrupt (PMI) +is enabled for the self-reloaded event (the cycles event in this case). +According to the Intel SDM, the overflow bit +(IA32_PERF_GLOBAL_STATUS.PMCn_OVF) is never set for self-reloaded events. +Since the bit is not set, the perf NMI handler cannot identify the source +of the interrupt, leading to the "unknown reason" message. + +Furthermore, enabling PMI for self-reloaded events is unnecessary and +can lead to extraneous records that pollute the user's requested data. + +Disable the interrupt bit for all events configured with ACR self-reload. + +Fixes: ec980e4facef ("perf/x86/intel: Support auto counter reload") +Reported-by: Andi Kleen +Signed-off-by: Dapeng Mi +Signed-off-by: Peter Zijlstra (Intel) +Cc: stable@vger.kernel.org +Link: https://patch.msgid.link/20260430002558.712334-4-dapeng1.mi@linux.intel.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/events/intel/core.c | 17 +++++++++++++---- + arch/x86/events/perf_event.h | 10 ++++++++++ + 2 files changed, 23 insertions(+), 4 deletions(-) + +--- a/arch/x86/events/intel/core.c ++++ b/arch/x86/events/intel/core.c +@@ -3118,11 +3118,11 @@ static void intel_pmu_enable_fixed(struc + intel_set_masks(event, idx); + + /* +- * Enable IRQ generation (0x8), if not PEBS, +- * and enable ring-3 counting (0x2) and ring-0 counting (0x1) +- * if requested: ++ * Enable IRQ generation (0x8), if not PEBS or self-reloaded ++ * ACR event, and enable ring-3 counting (0x2) and ring-0 ++ * counting (0x1) if requested: + */ +- if (!event->attr.precise_ip) ++ if (!event->attr.precise_ip && !is_acr_self_reload_event(event)) + bits |= INTEL_FIXED_0_ENABLE_PMI; + if (hwc->config & ARCH_PERFMON_EVENTSEL_USR) + bits |= INTEL_FIXED_0_USER; +@@ -3306,6 +3306,15 @@ static void intel_pmu_enable_event(struc + intel_set_masks(event, idx); + static_call_cond(intel_pmu_enable_acr_event)(event); + static_call_cond(intel_pmu_enable_event_ext)(event); ++ /* ++ * For self-reloaded ACR event, don't enable PMI since ++ * HW won't set overflow bit in GLOBAL_STATUS. Otherwise, ++ * the PMI would be recognized as a suspicious NMI. ++ */ ++ if (is_acr_self_reload_event(event)) ++ hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; ++ else if (!event->attr.precise_ip) ++ hwc->config |= ARCH_PERFMON_EVENTSEL_INT; + __x86_pmu_enable_event(hwc, enable_mask); + break; + case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1: +--- a/arch/x86/events/perf_event.h ++++ b/arch/x86/events/perf_event.h +@@ -137,6 +137,16 @@ static inline bool is_acr_event_group(st + return check_leader_group(event->group_leader, PERF_X86_EVENT_ACR); + } + ++static inline bool is_acr_self_reload_event(struct perf_event *event) ++{ ++ struct hw_perf_event *hwc = &event->hw; ++ ++ if (hwc->idx < 0) ++ return false; ++ ++ return test_bit(hwc->idx, (unsigned long *)&hwc->config1); ++} ++ + struct amd_nb { + int nb_id; /* NorthBridge id */ + int refcnt; /* reference count */ diff --git a/queue-7.0/perf-x86-intel-enable-auto-counter-reload-for-dmr.patch b/queue-7.0/perf-x86-intel-enable-auto-counter-reload-for-dmr.patch new file mode 100644 index 0000000000..3582d6629a --- /dev/null +++ b/queue-7.0/perf-x86-intel-enable-auto-counter-reload-for-dmr.patch @@ -0,0 +1,39 @@ +From aa4384bc8f4360167f3c3d5322121fe892289ea2 Mon Sep 17 00:00:00 2001 +From: Dapeng Mi +Date: Thu, 30 Apr 2026 08:25:57 +0800 +Subject: perf/x86/intel: Enable auto counter reload for DMR +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Dapeng Mi + +commit aa4384bc8f4360167f3c3d5322121fe892289ea2 upstream. + +Panther cove µarch starts to support auto counter reload (ACR), but the +static_call intel_pmu_enable_acr_event() is not updated for the Panther +Cove µarch used by DMR. It leads to the auto counter reload is not +really enabled on DMR. + +Update static_call intel_pmu_enable_acr_event() in intel_pmu_init_pnc(). + +Fixes: d345b6bb8860 ("perf/x86/intel: Add core PMU support for DMR") +Signed-off-by: Dapeng Mi +Signed-off-by: Peter Zijlstra (Intel) +Cc: stable@vger.kernel.org +Link: https://patch.msgid.link/20260430002558.712334-5-dapeng1.mi@linux.intel.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/events/intel/core.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/x86/events/intel/core.c ++++ b/arch/x86/events/intel/core.c +@@ -7525,6 +7525,7 @@ static __always_inline void intel_pmu_in + hybrid(pmu, event_constraints) = intel_pnc_event_constraints; + hybrid(pmu, pebs_constraints) = intel_pnc_pebs_event_constraints; + hybrid(pmu, extra_regs) = intel_pnc_extra_regs; ++ static_call_update(intel_pmu_enable_acr_event, intel_pmu_enable_acr); + } + + static __always_inline void intel_pmu_init_skt(struct pmu *pmu) diff --git a/queue-7.0/power-supply-max17042-avoid-overflow-when-determining-health.patch b/queue-7.0/power-supply-max17042-avoid-overflow-when-determining-health.patch new file mode 100644 index 0000000000..6becf6beef --- /dev/null +++ b/queue-7.0/power-supply-max17042-avoid-overflow-when-determining-health.patch @@ -0,0 +1,41 @@ +From 9a44949da669708f19d29141e65b3ac774d08f5a Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Andr=C3=A9=20Draszik?= +Date: Mon, 2 Mar 2026 13:32:05 +0000 +Subject: power: supply: max17042: avoid overflow when determining health +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: André Draszik + +commit 9a44949da669708f19d29141e65b3ac774d08f5a upstream. + +If vmax has the default value of INT_MAX (e.g. because not specified in +DT), battery health is reported as over-voltage. This is because adding +any value to vmax (the vmax tolerance in this case) causes it to wrap +around, making it negative and smaller than the measured battery +voltage. + +Avoid that by using size_add(). + +Fixes: edd4ab055931 ("power: max17042_battery: add HEALTH and TEMP_* properties support") +Cc: stable@vger.kernel.org +Signed-off-by: André Draszik +Link: https://patch.msgid.link/20260302-max77759-fg-v3-6-3c5f01dbda23@linaro.org +Signed-off-by: Sebastian Reichel +Signed-off-by: Greg Kroah-Hartman +--- + drivers/power/supply/max17042_battery.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/power/supply/max17042_battery.c ++++ b/drivers/power/supply/max17042_battery.c +@@ -201,7 +201,7 @@ static int max17042_get_battery_health(s + goto out; + } + +- if (vbatt > chip->pdata->vmax + MAX17042_VMAX_TOLERANCE) { ++ if (vbatt > size_add(chip->pdata->vmax, MAX17042_VMAX_TOLERANCE)) { + *health = POWER_SUPPLY_HEALTH_OVERVOLTAGE; + goto out; + } diff --git a/queue-7.0/powerpc-xive-fix-kmemleak-caused-by-incorrect-chip_data-lookup.patch b/queue-7.0/powerpc-xive-fix-kmemleak-caused-by-incorrect-chip_data-lookup.patch new file mode 100644 index 0000000000..5021ff6105 --- /dev/null +++ b/queue-7.0/powerpc-xive-fix-kmemleak-caused-by-incorrect-chip_data-lookup.patch @@ -0,0 +1,109 @@ +From 6771c54728c278bf1e4bfdab4fddbbb186e33498 Mon Sep 17 00:00:00 2001 +From: Nilay Shroff +Date: Wed, 11 Mar 2026 19:13:31 +0530 +Subject: powerpc/xive: fix kmemleak caused by incorrect chip_data lookup + +From: Nilay Shroff + +commit 6771c54728c278bf1e4bfdab4fddbbb186e33498 upstream. + +The kmemleak reports the following memory leak: + +Unreferenced object 0xc0000002a7fbc640 (size 64): + comm "kworker/8:1", pid 540, jiffies 4294937872 + hex dump (first 32 bytes): + 01 00 00 00 00 00 00 00 00 00 09 04 00 04 00 00 ................ + 00 00 a7 81 00 00 0a c0 00 00 08 04 00 04 00 00 ................ + backtrace (crc 177d48f6): + __kmalloc_cache_noprof+0x520/0x730 + xive_irq_alloc_data.constprop.0+0x40/0xe0 + xive_irq_domain_alloc+0xd0/0x1b0 + irq_domain_alloc_irqs_parent+0x44/0x6c + pseries_irq_domain_alloc+0x1cc/0x354 + irq_domain_alloc_irqs_parent+0x44/0x6c + msi_domain_alloc+0xb0/0x220 + irq_domain_alloc_irqs_locked+0x138/0x4d0 + __irq_domain_alloc_irqs+0x8c/0xfc + __msi_domain_alloc_irqs+0x214/0x4d8 + msi_domain_alloc_irqs_all_locked+0x70/0xf8 + pci_msi_setup_msi_irqs+0x60/0x78 + __pci_enable_msix_range+0x54c/0x98c + pci_alloc_irq_vectors_affinity+0x16c/0x1d4 + nvme_pci_enable+0xac/0x9c0 [nvme] + nvme_probe+0x340/0x764 [nvme] + +This occurs when allocating MSI-X vectors for an NVMe device. During +allocation the XIVE code creates a struct xive_irq_data and stores it +in irq_data->chip_data. + +When the MSI-X irqdomain is later freed, xive_irq_free_data() is +responsible for retrieving this structure and freeing it. However, +after commit cc0cc23babc9 ("powerpc/xive: Untangle xive from child +interrupt controller drivers"), xive_irq_free_data() retrieves the +chip_data using irq_get_chip_data(), which looks up the data through +the child domain. + +This is incorrect because the XIVE-specific irq data is associated with +the XIVE (parent) domain. As a result the lookup fails and the allocated +struct xive_irq_data is never freed, leading to the kmemleak report +shown above. + +Fix this by retrieving the irq_data from the correct domain using +irq_domain_get_irq_data() and then accessing the chip_data via +irq_data_get_irq_chip_data(). + +Cc: stable@vger.kernel.org +Fixes: cc0cc23babc9 ("powerpc/xive: Untangle xive from child interrupt controller drivers") +Signed-off-by: Nilay Shroff +Tested-by: Venkat Rao Bagalkote +Reviewed-by: Nam Cao +Signed-off-by: Madhavan Srinivasan +Link: https://patch.msgid.link/20260311134336.326996-1-nilay@linux.ibm.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/powerpc/sysdev/xive/common.c | 16 +++++++++++----- + 1 file changed, 11 insertions(+), 5 deletions(-) + +--- a/arch/powerpc/sysdev/xive/common.c ++++ b/arch/powerpc/sysdev/xive/common.c +@@ -1038,13 +1038,19 @@ static struct xive_irq_data *xive_irq_al + return xd; + } + +-static void xive_irq_free_data(unsigned int virq) ++static void xive_irq_free_data(struct irq_domain *domain, unsigned int virq) + { +- struct xive_irq_data *xd = irq_get_chip_data(virq); ++ struct xive_irq_data *xd; ++ struct irq_data *data = irq_domain_get_irq_data(domain, virq); ++ ++ if (!data) ++ return; + ++ xd = irq_data_get_irq_chip_data(data); + if (!xd) + return; +- irq_set_chip_data(virq, NULL); ++ ++ irq_domain_reset_irq_data(data); + xive_cleanup_irq_data(xd); + kfree(xd); + } +@@ -1305,7 +1311,7 @@ static int xive_irq_domain_map(struct ir + + static void xive_irq_domain_unmap(struct irq_domain *d, unsigned int virq) + { +- xive_irq_free_data(virq); ++ xive_irq_free_data(d, virq); + } + + static int xive_irq_domain_xlate(struct irq_domain *h, struct device_node *ct, +@@ -1443,7 +1449,7 @@ static void xive_irq_domain_free(struct + pr_debug("%s %d #%d\n", __func__, virq, nr_irqs); + + for (i = 0; i < nr_irqs; i++) +- xive_irq_free_data(virq + i); ++ xive_irq_free_data(domain, virq + i); + } + #endif + diff --git a/queue-7.0/rdma-ionic-bound-node_desc-sysfs-read-with-.64s.patch b/queue-7.0/rdma-ionic-bound-node_desc-sysfs-read-with-.64s.patch new file mode 100644 index 0000000000..94c3450db3 --- /dev/null +++ b/queue-7.0/rdma-ionic-bound-node_desc-sysfs-read-with-.64s.patch @@ -0,0 +1,48 @@ +From 654a27f25530d052eeedf086e6c3e2d585c203bd Mon Sep 17 00:00:00 2001 +From: Kai Zen +Date: Tue, 7 Apr 2026 12:20:22 +0300 +Subject: RDMA/ionic: bound node_desc sysfs read with %.64s + +From: Kai Zen + +commit 654a27f25530d052eeedf086e6c3e2d585c203bd upstream. + +node_desc[64] in struct ib_device is not guaranteed to be NUL- +terminated. The core IB sysfs handler uses "%.64s" for exactly this +reason (drivers/infiniband/core/sysfs.c:1307), since node_desc_store() +performs a raw memcpy of up to IB_DEVICE_NODE_DESC_MAX bytes with no NUL +termination: + + memcpy(desc.node_desc, buf, min_t(int, count, IB_DEVICE_NODE_DESC_MAX)); + +If exactly 64 bytes are written via the node_desc sysfs file, the array +contains no NUL byte. The ionic hca_type_show() handler uses unbounded +"%s" and will read past the end of node_desc into adjacent fields of +struct ib_device until it encounters a NUL. + +ionic supports IB_DEVICE_MODIFY_NODE_DESC, so this is triggerable by +userspace. + +Match the core handler and bound the format specifier. + +Cc: stable@vger.kernel.org +Fixes: 2075bbe8ef03 ("RDMA/ionic: Register device ops for miscellaneous functionality") +Link: https://patch.msgid.link/r/CALynFi7NAbhDCt1tdaDbf6TnLvAqbaHa6-Wqf6OkzREbA_PAfg@mail.gmail.com +Signed-off-by: Kai Aizen +Signed-off-by: Jason Gunthorpe +Signed-off-by: Greg Kroah-Hartman +--- + drivers/infiniband/hw/ionic/ionic_ibdev.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/infiniband/hw/ionic/ionic_ibdev.c ++++ b/drivers/infiniband/hw/ionic/ionic_ibdev.c +@@ -185,7 +185,7 @@ static ssize_t hca_type_show(struct devi + struct ionic_ibdev *dev = + rdma_device_to_drv_device(device, struct ionic_ibdev, ibdev); + +- return sysfs_emit(buf, "%s\n", dev->ibdev.node_desc); ++ return sysfs_emit(buf, "%s.64\n", dev->ibdev.node_desc); + } + static DEVICE_ATTR_RO(hca_type); + diff --git a/queue-7.0/rdma-ionic-fix-typo-in-format-string.patch b/queue-7.0/rdma-ionic-fix-typo-in-format-string.patch new file mode 100644 index 0000000000..41e5f10b38 --- /dev/null +++ b/queue-7.0/rdma-ionic-fix-typo-in-format-string.patch @@ -0,0 +1,33 @@ +From 70f780edcd1e86350202d8a409de026b2d2e2067 Mon Sep 17 00:00:00 2001 +From: Jason Gunthorpe +Date: Tue, 28 Apr 2026 13:17:34 -0300 +Subject: RDMA/ionic: Fix typo in format string + +From: Jason Gunthorpe + +commit 70f780edcd1e86350202d8a409de026b2d2e2067 upstream. + +Applying the corrupted patch by hand mangled the format string, put the s +in the right place. + +Cc: stable@vger.kernel.org +Fixes: 654a27f25530 ("RDMA/ionic: bound node_desc sysfs read with %.64s") +Link: https://patch.msgid.link/r/1-v1-41f3135e5565+9d2-rdma_ai_fixes1_jgg@nvidia.com +Reported-by: Brad Spengler +Signed-off-by: Jason Gunthorpe +Signed-off-by: Greg Kroah-Hartman +--- + drivers/infiniband/hw/ionic/ionic_ibdev.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/infiniband/hw/ionic/ionic_ibdev.c ++++ b/drivers/infiniband/hw/ionic/ionic_ibdev.c +@@ -185,7 +185,7 @@ static ssize_t hca_type_show(struct devi + struct ionic_ibdev *dev = + rdma_device_to_drv_device(device, struct ionic_ibdev, ibdev); + +- return sysfs_emit(buf, "%s.64\n", dev->ibdev.node_desc); ++ return sysfs_emit(buf, "%.64s\n", dev->ibdev.node_desc); + } + static DEVICE_ATTR_RO(hca_type); + diff --git a/queue-7.0/rdma-mana-fix-error-unwind-in-mana_ib_create_qp_rss.patch b/queue-7.0/rdma-mana-fix-error-unwind-in-mana_ib_create_qp_rss.patch new file mode 100644 index 0000000000..ab4ad46696 --- /dev/null +++ b/queue-7.0/rdma-mana-fix-error-unwind-in-mana_ib_create_qp_rss.patch @@ -0,0 +1,42 @@ +From 6aaa978c6b6218cfac15fe1dab17c76fe229ce3f Mon Sep 17 00:00:00 2001 +From: Jason Gunthorpe +Date: Tue, 28 Apr 2026 13:17:40 -0300 +Subject: RDMA/mana: Fix error unwind in mana_ib_create_qp_rss() + +From: Jason Gunthorpe + +commit 6aaa978c6b6218cfac15fe1dab17c76fe229ce3f upstream. + +Sashiko points out that mana_ib_cfg_vport_steering() is leaked, the normal +destroy path cleans it up. + +Cc: stable@vger.kernel.org +Fixes: 0266a177631d ("RDMA/mana_ib: Add a driver for Microsoft Azure Network Adapter") +Link: https://sashiko.dev/#/patchset/0-v1-e911b76a94d1%2B65d95-rdma_udata_rep_jgg%40nvidia.com?part=4 +Link: https://patch.msgid.link/r/7-v1-41f3135e5565+9d2-rdma_ai_fixes1_jgg@nvidia.com +Reviewed-by: Long Li +Signed-off-by: Jason Gunthorpe +Signed-off-by: Greg Kroah-Hartman +--- + drivers/infiniband/hw/mana/qp.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/drivers/infiniband/hw/mana/qp.c ++++ b/drivers/infiniband/hw/mana/qp.c +@@ -235,13 +235,15 @@ static int mana_ib_create_qp_rss(struct + ibdev_dbg(&mdev->ib_dev, + "Failed to copy to udata create rss-qp, %d\n", + ret); +- goto fail; ++ goto err_disable_vport_rx; + } + + kfree(mana_ind_table); + + return 0; + ++err_disable_vport_rx: ++ mana_disable_vport_rx(mpc); + fail: + while (i-- > 0) { + ibwq = ind_tbl->ind_tbl[i]; diff --git a/queue-7.0/rdma-mana-fix-mana_destroy_wq_obj-cleanup-in-mana_ib_create_qp_rss.patch b/queue-7.0/rdma-mana-fix-mana_destroy_wq_obj-cleanup-in-mana_ib_create_qp_rss.patch new file mode 100644 index 0000000000..e46b62ff2e --- /dev/null +++ b/queue-7.0/rdma-mana-fix-mana_destroy_wq_obj-cleanup-in-mana_ib_create_qp_rss.patch @@ -0,0 +1,56 @@ +From 34ecf795692ee57c393109f4a24ccc313091e137 Mon Sep 17 00:00:00 2001 +From: Jason Gunthorpe +Date: Tue, 28 Apr 2026 13:17:39 -0300 +Subject: RDMA/mana: Fix mana_destroy_wq_obj() cleanup in mana_ib_create_qp_rss() + +From: Jason Gunthorpe + +commit 34ecf795692ee57c393109f4a24ccc313091e137 upstream. + +Sashiko points out there are two bugs here in the error unwind flow, both +related to how the WQ table is unwound. + +First there is a double i-- on the first failure path due to the while loop +having a i--, remove it. + +Second if mana_ib_install_cq_cb() fails then mana_create_wq_obj() is not +undone due to the above i--. + +Cc: stable@vger.kernel.org +Fixes: c15d7802a424 ("RDMA/mana_ib: Add CQ interrupt support for RAW QP") +Link: https://sashiko.dev/#/patchset/0-v2-1c49eeb88c48%2B91-rdma_udata_rep_jgg%40nvidia.com?part=1 +Link: https://patch.msgid.link/r/6-v1-41f3135e5565+9d2-rdma_ai_fixes1_jgg@nvidia.com +Reviewed-by: Long Li +Signed-off-by: Jason Gunthorpe +Signed-off-by: Greg Kroah-Hartman +--- + drivers/infiniband/hw/mana/qp.c | 9 ++++----- + 1 file changed, 4 insertions(+), 5 deletions(-) + +--- a/drivers/infiniband/hw/mana/qp.c ++++ b/drivers/infiniband/hw/mana/qp.c +@@ -193,11 +193,8 @@ static int mana_ib_create_qp_rss(struct + + ret = mana_create_wq_obj(mpc, mpc->port_handle, GDMA_RQ, + &wq_spec, &cq_spec, &wq->rx_object); +- if (ret) { +- /* Do cleanup starting with index i-1 */ +- i--; ++ if (ret) + goto fail; +- } + + /* The GDMA regions are now owned by the WQ object */ + wq->queue.gdma_region = GDMA_INVALID_DMA_REGION; +@@ -217,8 +214,10 @@ static int mana_ib_create_qp_rss(struct + + /* Create CQ table entry */ + ret = mana_ib_install_cq_cb(mdev, cq); +- if (ret) ++ if (ret) { ++ mana_destroy_wq_obj(mpc, GDMA_RQ, wq->rx_object); + goto fail; ++ } + } + resp.num_entries = i; + diff --git a/queue-7.0/rdma-mana-remove-user-triggerable-warn_on-in-mana_ib_create_qp_rss.patch b/queue-7.0/rdma-mana-remove-user-triggerable-warn_on-in-mana_ib_create_qp_rss.patch new file mode 100644 index 0000000000..3d67efb25d --- /dev/null +++ b/queue-7.0/rdma-mana-remove-user-triggerable-warn_on-in-mana_ib_create_qp_rss.patch @@ -0,0 +1,40 @@ +From 159f2efabc89d3f931d38f2d35876535d4abf0a3 Mon Sep 17 00:00:00 2001 +From: Jason Gunthorpe +Date: Tue, 28 Apr 2026 13:17:38 -0300 +Subject: RDMA/mana: Remove user triggerable WARN_ON() in mana_ib_create_qp_rss() + +From: Jason Gunthorpe + +commit 159f2efabc89d3f931d38f2d35876535d4abf0a3 upstream. + +Sashiko points out that the user can specify WQs sharing the same CQ as a +part of the uAPI and this will trigger the WARN_ON() then go on to corrupt +the kernel. + +Just reject it outright and fail the QP creation. + +Cc: stable@vger.kernel.org +Fixes: c15d7802a424 ("RDMA/mana_ib: Add CQ interrupt support for RAW QP") +Link: https://sashiko.dev/#/patchset/0-v2-1c49eeb88c48%2B91-rdma_udata_rep_jgg%40nvidia.com?part=1 +Link: https://patch.msgid.link/r/5-v1-41f3135e5565+9d2-rdma_ai_fixes1_jgg@nvidia.com +Reviewed-by: Long Li +Signed-off-by: Jason Gunthorpe +Signed-off-by: Greg Kroah-Hartman +--- + drivers/infiniband/hw/mana/cq.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/drivers/infiniband/hw/mana/cq.c ++++ b/drivers/infiniband/hw/mana/cq.c +@@ -142,8 +142,9 @@ int mana_ib_install_cq_cb(struct mana_ib + + if (cq->queue.id >= gc->max_num_cqs) + return -EINVAL; +- /* Create CQ table entry */ +- WARN_ON(gc->cq_table[cq->queue.id]); ++ /* Create CQ table entry, sharing a CQ between WQs is not supported */ ++ if (gc->cq_table[cq->queue.id]) ++ return -EINVAL; + if (cq->queue.kmem) + gdma_cq = cq->queue.kmem; + else diff --git a/queue-7.0/rdma-mana-validate-rx_hash_key_len.patch b/queue-7.0/rdma-mana-validate-rx_hash_key_len.patch new file mode 100644 index 0000000000..fa5efa7b7a --- /dev/null +++ b/queue-7.0/rdma-mana-validate-rx_hash_key_len.patch @@ -0,0 +1,36 @@ +From 6dd2d4ad9c8429523b1c220c5132bd551c006425 Mon Sep 17 00:00:00 2001 +From: Jason Gunthorpe +Date: Tue, 28 Apr 2026 13:17:37 -0300 +Subject: RDMA/mana: Validate rx_hash_key_len + +From: Jason Gunthorpe + +commit 6dd2d4ad9c8429523b1c220c5132bd551c006425 upstream. + +Sashiko points out that rx_hash_key_len comes from a uAPI structure and is +blindly passed to memcpy, allowing the userspace to trash kernel +memory. Bounds check it so the memcpy cannot overflow. + +Cc: stable@vger.kernel.org +Fixes: 0266a177631d ("RDMA/mana_ib: Add a driver for Microsoft Azure Network Adapter") +Link: https://sashiko.dev/#/patchset/0-v2-1c49eeb88c48%2B91-rdma_udata_rep_jgg%40nvidia.com?part=1 +Link: https://patch.msgid.link/r/4-v1-41f3135e5565+9d2-rdma_ai_fixes1_jgg@nvidia.com +Reviewed-by: Long Li +Signed-off-by: Jason Gunthorpe +Signed-off-by: Greg Kroah-Hartman +--- + drivers/infiniband/hw/mana/qp.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/drivers/infiniband/hw/mana/qp.c ++++ b/drivers/infiniband/hw/mana/qp.c +@@ -21,6 +21,9 @@ static int mana_ib_cfg_vport_steering(st + + gc = mdev_to_gc(dev); + ++ if (rx_hash_key_len > sizeof(req->hashkey)) ++ return -EINVAL; ++ + req_buf_size = struct_size(req, indir_tab, MANA_INDIRECT_TABLE_DEF_SIZE); + req = kzalloc(req_buf_size, GFP_KERNEL); + if (!req) diff --git a/queue-7.0/rdma-mlx4-fix-mis-use-of-rcu-in-mlx4_srq_event.patch b/queue-7.0/rdma-mlx4-fix-mis-use-of-rcu-in-mlx4_srq_event.patch new file mode 100644 index 0000000000..277b8d15cb --- /dev/null +++ b/queue-7.0/rdma-mlx4-fix-mis-use-of-rcu-in-mlx4_srq_event.patch @@ -0,0 +1,60 @@ +From c9341307ea16b9395c2e4c9c94d8499d91fe31d0 Mon Sep 17 00:00:00 2001 +From: Jason Gunthorpe +Date: Tue, 28 Apr 2026 13:17:45 -0300 +Subject: RDMA/mlx4: Fix mis-use of RCU in mlx4_srq_event() + +From: Jason Gunthorpe + +commit c9341307ea16b9395c2e4c9c94d8499d91fe31d0 upstream. + +Sashiko points out the radix_tree itself is RCU safe, but nothing ever +frees the mlx4_srq struct with RCU, and it isn't even accessed within the +RCU critical section. It also will crash if an event is delivered before +the srq object is finished initializing. + +Use the spinlock since it isn't easy to make RCU work, use +refcount_inc_not_zero() to protect against partially initialized objects, +and order the refcount_set() to be after the srq is fully initialized. + +Cc: stable@vger.kernel.org +Fixes: 30353bfc43a1 ("net/mlx4_core: Use RCU to perform radix tree lookup for SRQ") +Link: https://sashiko.dev/#/patchset/0-v2-1c49eeb88c48%2B91-rdma_udata_rep_jgg%40nvidia.com?part=5 +Link: https://patch.msgid.link/r/12-v1-41f3135e5565+9d2-rdma_ai_fixes1_jgg@nvidia.com +Signed-off-by: Jason Gunthorpe +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx4/srq.c | 13 +++++++------ + 1 file changed, 7 insertions(+), 6 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx4/srq.c ++++ b/drivers/net/ethernet/mellanox/mlx4/srq.c +@@ -44,13 +44,14 @@ void mlx4_srq_event(struct mlx4_dev *dev + { + struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table; + struct mlx4_srq *srq; ++ unsigned long flags; + +- rcu_read_lock(); ++ spin_lock_irqsave(&srq_table->lock, flags); + srq = radix_tree_lookup(&srq_table->tree, srqn & (dev->caps.num_srqs - 1)); +- rcu_read_unlock(); +- if (srq) +- refcount_inc(&srq->refcount); +- else { ++ if (!srq || !refcount_inc_not_zero(&srq->refcount)) ++ srq = NULL; ++ spin_unlock_irqrestore(&srq_table->lock, flags); ++ if (!srq) { + mlx4_warn(dev, "Async event for bogus SRQ %08x\n", srqn); + return; + } +@@ -203,8 +204,8 @@ int mlx4_srq_alloc(struct mlx4_dev *dev, + if (err) + goto err_radix; + +- refcount_set(&srq->refcount, 1); + init_completion(&srq->free); ++ refcount_set_release(&srq->refcount, 1); + + return 0; + diff --git a/queue-7.0/rdma-mlx4-fix-resource-leak-on-error-in-mlx4_ib_create_srq.patch b/queue-7.0/rdma-mlx4-fix-resource-leak-on-error-in-mlx4_ib_create_srq.patch new file mode 100644 index 0000000000..5a284c73d8 --- /dev/null +++ b/queue-7.0/rdma-mlx4-fix-resource-leak-on-error-in-mlx4_ib_create_srq.patch @@ -0,0 +1,41 @@ +From c54c7e4cb679c0aaa1cb489b9c3f2cd98e63a44c Mon Sep 17 00:00:00 2001 +From: Jason Gunthorpe +Date: Tue, 28 Apr 2026 13:17:44 -0300 +Subject: RDMA/mlx4: Fix resource leak on error in mlx4_ib_create_srq() + +From: Jason Gunthorpe + +commit c54c7e4cb679c0aaa1cb489b9c3f2cd98e63a44c upstream. + +Sashiko points out that mlx4_srq_alloc() was not undone during error +unwind, add the missing call to mlx4_srq_free(). + +Cc: stable@vger.kernel.org +Fixes: 225c7b1feef1 ("IB/mlx4: Add a driver Mellanox ConnectX InfiniBand adapters") +Link: https://sashiko.dev/#/patchset/0-v1-e911b76a94d1%2B65d95-rdma_udata_rep_jgg%40nvidia.com?part=8 +Link: https://patch.msgid.link/r/11-v1-41f3135e5565+9d2-rdma_ai_fixes1_jgg@nvidia.com +Signed-off-by: Jason Gunthorpe +Signed-off-by: Greg Kroah-Hartman +--- + drivers/infiniband/hw/mlx4/srq.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/drivers/infiniband/hw/mlx4/srq.c ++++ b/drivers/infiniband/hw/mlx4/srq.c +@@ -193,13 +193,15 @@ int mlx4_ib_create_srq(struct ib_srq *ib + if (udata) + if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof (__u32))) { + err = -EFAULT; +- goto err_wrid; ++ goto err_srq; + } + + init_attr->attr.max_wr = srq->msrq.max - 1; + + return 0; + ++err_srq: ++ mlx4_srq_free(dev->dev, &srq->msrq); + err_wrid: + if (udata) + mlx4_ib_db_unmap_user(ucontext, &srq->db); diff --git a/queue-7.0/rdma-mlx5-fix-error-path-fall-through-in-mlx5_ib_dev_res_srq_init.patch b/queue-7.0/rdma-mlx5-fix-error-path-fall-through-in-mlx5_ib_dev_res_srq_init.patch new file mode 100644 index 0000000000..dcf73f607e --- /dev/null +++ b/queue-7.0/rdma-mlx5-fix-error-path-fall-through-in-mlx5_ib_dev_res_srq_init.patch @@ -0,0 +1,43 @@ +From c488df06bd552bb8b6e14fa0cfd5ad986c6e9525 Mon Sep 17 00:00:00 2001 +From: Junrui Luo +Date: Fri, 24 Apr 2026 13:51:02 +0800 +Subject: RDMA/mlx5: Fix error path fall-through in mlx5_ib_dev_res_srq_init() + +From: Junrui Luo + +commit c488df06bd552bb8b6e14fa0cfd5ad986c6e9525 upstream. + +mlx5_ib_dev_res_srq_init() allocates two SRQs, s0 and s1. When +ib_create_srq() fails for s1, the error branch destroys s0 but falls +through and unconditionally assigns the freed s0 and the ERR_PTR s1 to +devr->s0 and devr->s1. + +This leads to several problems: the lock-free fast path checks +"if (devr->s1) return 0;" and treats the ERR_PTR as already initialised; +users in mlx5_ib_create_qp() dereference the freed SRQ or ERR_PTR via +to_msrq(devr->s0)->msrq.srqn; and mlx5_ib_dev_res_cleanup() dereferences +the ERR_PTR and double-frees s0 on teardown. + +Fix by adding the same `goto unlock` in the s1 failure path. + +Cc: stable@vger.kernel.org +Fixes: 5895e70f2e6e ("IB/mlx5: Allocate resources just before first QP/SRQ is created") +Link: https://patch.msgid.link/r/SYBPR01MB7881E1E0970268BD69C0BA75AF2B2@SYBPR01MB7881.ausprd01.prod.outlook.com +Reported-by: Yuhao Jiang +Signed-off-by: Junrui Luo +Signed-off-by: Jason Gunthorpe +Signed-off-by: Greg Kroah-Hartman +--- + drivers/infiniband/hw/mlx5/main.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/infiniband/hw/mlx5/main.c ++++ b/drivers/infiniband/hw/mlx5/main.c +@@ -3380,6 +3380,7 @@ int mlx5_ib_dev_res_srq_init(struct mlx5 + "Couldn't create SRQ 1 for res init, err=%pe\n", + s1); + ib_destroy_srq(s0); ++ goto unlock; + } + + devr->s0 = s0; diff --git a/queue-7.0/rdma-ocrdma-don-t-null-deref-uctx-on-errors-in-ocrdma_copy_pd_uresp.patch b/queue-7.0/rdma-ocrdma-don-t-null-deref-uctx-on-errors-in-ocrdma_copy_pd_uresp.patch new file mode 100644 index 0000000000..761b86747d --- /dev/null +++ b/queue-7.0/rdma-ocrdma-don-t-null-deref-uctx-on-errors-in-ocrdma_copy_pd_uresp.patch @@ -0,0 +1,37 @@ +From 34fbf48cf3b410d2a6e8c586fa952a36331ca5ba Mon Sep 17 00:00:00 2001 +From: Jason Gunthorpe +Date: Tue, 28 Apr 2026 13:17:42 -0300 +Subject: RDMA/ocrdma: Don't NULL deref uctx on errors in ocrdma_copy_pd_uresp() + +From: Jason Gunthorpe + +commit 34fbf48cf3b410d2a6e8c586fa952a36331ca5ba upstream. + +Sashiko points out that pd->uctx isn't initialized until late in the +function so all these error flow references are NULL and will crash. Use +the uctx that isn't NULL. + +Cc: stable@vger.kernel.org +Fixes: fe2caefcdf58 ("RDMA/ocrdma: Add driver for Emulex OneConnect IBoE RDMA adapter") +Link: https://sashiko.dev/#/patchset/0-v1-e911b76a94d1%2B65d95-rdma_udata_rep_jgg%40nvidia.com?part=4 +Link: https://patch.msgid.link/r/9-v1-41f3135e5565+9d2-rdma_ai_fixes1_jgg@nvidia.com +Signed-off-by: Jason Gunthorpe +Signed-off-by: Greg Kroah-Hartman +--- + drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c ++++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +@@ -620,9 +620,9 @@ static int ocrdma_copy_pd_uresp(struct o + + ucopy_err: + if (pd->dpp_enabled) +- ocrdma_del_mmap(pd->uctx, dpp_page_addr, PAGE_SIZE); ++ ocrdma_del_mmap(uctx, dpp_page_addr, PAGE_SIZE); + dpp_map_err: +- ocrdma_del_mmap(pd->uctx, db_page_addr, db_page_size); ++ ocrdma_del_mmap(uctx, db_page_addr, db_page_size); + return status; + } + diff --git a/queue-7.0/rdma-rxe-reject-non-8-byte-atomic_write-payloads.patch b/queue-7.0/rdma-rxe-reject-non-8-byte-atomic_write-payloads.patch new file mode 100644 index 0000000000..499c51f56f --- /dev/null +++ b/queue-7.0/rdma-rxe-reject-non-8-byte-atomic_write-payloads.patch @@ -0,0 +1,69 @@ +From 1114c87aa6f195cf07da55a27b2122ae26557b26 Mon Sep 17 00:00:00 2001 +From: Michael Bommarito +Date: Sat, 18 Apr 2026 12:21:41 -0400 +Subject: RDMA/rxe: Reject non-8-byte ATOMIC_WRITE payloads + +From: Michael Bommarito + +commit 1114c87aa6f195cf07da55a27b2122ae26557b26 upstream. + +atomic_write_reply() at drivers/infiniband/sw/rxe/rxe_resp.c +unconditionally dereferences 8 bytes at payload_addr(pkt): + + value = *(u64 *)payload_addr(pkt); + +check_rkey() previously accepted an ATOMIC_WRITE request with pktlen == +resid == 0 because the length validation only compared pktlen against +resid. A remote initiator that sets the RETH length to 0 therefore reaches +atomic_write_reply() with a zero-byte logical payload, and the responder +reads sizeof(u64) bytes from past the logical end of the packet into +skb->head tailroom, then writes those 8 bytes into the attacker's MR via +rxe_mr_do_atomic_write(). That is a remote disclosure of 4 bytes of kernel +tailroom per probe (the other 4 bytes are the packet's own trailing ICRC). + +IBA oA19-28 defines ATOMIC_WRITE as exactly 8 bytes. Anything else is +protocol-invalid. Hoist a strict length check into check_rkey() so the +responder never reaches the unchecked dereference, and keep the existing +WRITE-family length logic for the normal RDMA WRITE path. + +Reproduced on mainline with an unmodified rxe driver: a sustained +zero-length ATOMIC_WRITE probe repeatedly leaks adjacent skb head-buffer +bytes into the attacker's MR, including recognisable kernel strings and +partial kernel-direct-map pointer words. With this patch applied the +responder rejects the PDU and the MR stays all-zero. + +Cc: stable@vger.kernel.org +Fixes: 034e285f8b99 ("RDMA/rxe: Make responder support atomic write on RC service") +Link: https://patch.msgid.link/r/20260418162141.3610201-1-michael.bommarito@gmail.com +Assisted-by: Claude:claude-opus-4-7 +Signed-off-by: Michael Bommarito +Reviewed-by: Zhu Yanjun +Signed-off-by: Jason Gunthorpe +Signed-off-by: Greg Kroah-Hartman +--- + drivers/infiniband/sw/rxe/rxe_resp.c | 14 +++++++++++++- + 1 file changed, 13 insertions(+), 1 deletion(-) + +--- a/drivers/infiniband/sw/rxe/rxe_resp.c ++++ b/drivers/infiniband/sw/rxe/rxe_resp.c +@@ -526,7 +526,19 @@ static enum resp_states check_rkey(struc + } + + skip_check_range: +- if (pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) { ++ if (pkt->mask & RXE_ATOMIC_WRITE_MASK) { ++ /* IBA oA19-28: ATOMIC_WRITE payload is exactly 8 bytes. ++ * Reject any other length before the responder reads ++ * sizeof(u64) bytes from payload_addr(pkt); a shorter ++ * payload would read past the logical end of the packet ++ * into skb->head tailroom. ++ */ ++ if (resid != sizeof(u64) || pktlen != sizeof(u64) || ++ bth_pad(pkt)) { ++ state = RESPST_ERR_LENGTH; ++ goto err; ++ } ++ } else if (pkt->mask & RXE_WRITE_MASK) { + if (resid > mtu) { + if (pktlen != mtu || bth_pad(pkt)) { + state = RESPST_ERR_LENGTH; diff --git a/queue-7.0/rdma-rxe-reject-unknown-opcodes-before-icrc-processing.patch b/queue-7.0/rdma-rxe-reject-unknown-opcodes-before-icrc-processing.patch new file mode 100644 index 0000000000..ae4fc0efc4 --- /dev/null +++ b/queue-7.0/rdma-rxe-reject-unknown-opcodes-before-icrc-processing.patch @@ -0,0 +1,94 @@ +From 4c6f86d85d03cdb33addce86aa69aa795ca6c47a Mon Sep 17 00:00:00 2001 +From: Michael Bommarito +Date: Tue, 14 Apr 2026 07:15:55 -0400 +Subject: RDMA/rxe: Reject unknown opcodes before ICRC processing + +From: Michael Bommarito + +commit 4c6f86d85d03cdb33addce86aa69aa795ca6c47a upstream. + +Even after applying commit 7244491dab34 ("RDMA/rxe: Validate pad and ICRC +before payload_size() in rxe_rcv"), a single unauthenticated UDP packet +can still trigger panic. That patch handled payload_size() underflow only +for valid opcodes with short packets, not for packets carrying an unknown +opcode. The unknown-opcode OOB read described below predates that commit +and reaches back to the initial Soft RoCE driver. + +The check added there reads + + pkt->paylen < header_size(pkt) + bth_pad(pkt) + RXE_ICRC_SIZE + +where header_size(pkt) expands to rxe_opcode[pkt->opcode].length. The +rxe_opcode[] array has 256 entries but is only populated for defined IB +opcodes; any other entry (for example opcode 0xff) is zero-initialized, so +length == 0 and the check degenerates to + + pkt->paylen < 0 + bth_pad(pkt) + RXE_ICRC_SIZE + +which does not constrain pkt->paylen enough. rxe_icrc_hdr() then computes + + rxe_opcode[pkt->opcode].length - RXE_BTH_BYTES + +which underflows when length == 0 and passes a huge value to rxe_crc32(), +causing an out-of-bounds read of the skb payload. + +Reproduced on v7.0-rc7 with that fix applied, QEMU/KVM with +CONFIG_RDMA_RXE=y and CONFIG_KASAN=y, after + + rdma link add rxe0 type rxe netdev eth0 + +A single 48-byte UDP packet to port 4791 with BTH opcode=0xff and +QPN=IB_MULTICAST_QPN triggers: + + BUG: KASAN: slab-out-of-bounds in crc32_le+0x115/0x170 + Read of size 1 at addr ... + The buggy address is located 0 bytes to the right of + allocated 704-byte region + Call Trace: + crc32_le+0x115/0x170 + rxe_icrc_hdr.isra.0+0x226/0x300 + rxe_icrc_check+0x13f/0x3a0 + rxe_rcv+0x6e1/0x16e0 + rxe_udp_encap_recv+0x20a/0x320 + udp_queue_rcv_one_skb+0x7ed/0x12c0 + +Subsequent packets with the same shape fault on unmapped memory and panic +the kernel. The trigger requires only module load and "rdma link add"; no +QP, no connection, and no authentication. + +Fix this by rejecting packets whose opcode has no rxe_opcode[] entry, +detected via the zero mask or zero length, before any length arithmetic +runs. + +Cc: stable@vger.kernel.org +Fixes: 8700e3e7c485 ("Soft RoCE driver") +Link: https://patch.msgid.link/r/20260414111555.3386793-1-michael.bommarito@gmail.com +Assisted-by: Claude:claude-opus-4-6 +Signed-off-by: Michael Bommarito +Reviewed-by: Zhu Yanjun +Signed-off-by: Jason Gunthorpe +Signed-off-by: Greg Kroah-Hartman +--- + drivers/infiniband/sw/rxe/rxe_recv.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +--- a/drivers/infiniband/sw/rxe/rxe_recv.c ++++ b/drivers/infiniband/sw/rxe/rxe_recv.c +@@ -330,6 +330,17 @@ void rxe_rcv(struct sk_buff *skb) + pkt->qp = NULL; + pkt->mask |= rxe_opcode[pkt->opcode].mask; + ++ /* ++ * Unknown opcodes have a zero-initialized rxe_opcode[] entry, so ++ * both mask and length are 0. Reject them before any length math: ++ * rxe_icrc_hdr() would otherwise compute length - RXE_BTH_BYTES ++ * and pass the underflowed value to rxe_crc32(), producing an ++ * out-of-bounds read. ++ */ ++ if (unlikely(!rxe_opcode[pkt->opcode].mask || ++ !rxe_opcode[pkt->opcode].length)) ++ goto drop; ++ + if (unlikely(pkt->paylen < header_size(pkt) + bth_pad(pkt) + + RXE_ICRC_SIZE)) + goto drop; diff --git a/queue-7.0/rdma-vmw_pvrdma-fix-double-free-on-pvrdma_alloc_ucontext-error-path.patch b/queue-7.0/rdma-vmw_pvrdma-fix-double-free-on-pvrdma_alloc_ucontext-error-path.patch new file mode 100644 index 0000000000..002aac0f8c --- /dev/null +++ b/queue-7.0/rdma-vmw_pvrdma-fix-double-free-on-pvrdma_alloc_ucontext-error-path.patch @@ -0,0 +1,33 @@ +From e38e86995df27f1f854063dab1f0c6a513db3faf Mon Sep 17 00:00:00 2001 +From: Jason Gunthorpe +Date: Tue, 28 Apr 2026 13:17:43 -0300 +Subject: RDMA/vmw_pvrdma: Fix double free on pvrdma_alloc_ucontext() error path + +From: Jason Gunthorpe + +commit e38e86995df27f1f854063dab1f0c6a513db3faf upstream. + +Sashiko points out that pvrdma_uar_free() is already called within +pvrdma_dealloc_ucontext(), so calling it before triggers a double free. + +Cc: stable@vger.kernel.org +Fixes: 29c8d9eba550 ("IB: Add vmw_pvrdma driver") +Link: https://sashiko.dev/#/patchset/0-v1-e911b76a94d1%2B65d95-rdma_udata_rep_jgg%40nvidia.com?part=4 +Link: https://patch.msgid.link/r/10-v1-41f3135e5565+9d2-rdma_ai_fixes1_jgg@nvidia.com +Signed-off-by: Jason Gunthorpe +Signed-off-by: Greg Kroah-Hartman +--- + drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c ++++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c +@@ -322,7 +322,7 @@ int pvrdma_alloc_ucontext(struct ib_ucon + uresp.qp_tab_size = vdev->dsr->caps.max_qp; + ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); + if (ret) { +- pvrdma_uar_free(vdev, &context->uar); ++ /* pvrdma_dealloc_ucontext() also frees the UAR */ + pvrdma_dealloc_ucontext(&context->ibucontext); + return -EFAULT; + } diff --git a/queue-7.0/remoteproc-imx_rproc-fix-null-vs-is_err-bug-in-imx_rproc_addr_init.patch b/queue-7.0/remoteproc-imx_rproc-fix-null-vs-is_err-bug-in-imx_rproc_addr_init.patch new file mode 100644 index 0000000000..37851e1d5e --- /dev/null +++ b/queue-7.0/remoteproc-imx_rproc-fix-null-vs-is_err-bug-in-imx_rproc_addr_init.patch @@ -0,0 +1,34 @@ +From 665eebebb029690a5b2f92e481020877cc6c8d36 Mon Sep 17 00:00:00 2001 +From: Chen Ni +Date: Fri, 27 Feb 2026 17:15:46 +0800 +Subject: remoteproc: imx_rproc: Fix NULL vs IS_ERR() bug in imx_rproc_addr_init() + +From: Chen Ni + +commit 665eebebb029690a5b2f92e481020877cc6c8d36 upstream. + +The devm_ioremap_resource_wc() function never returns NULL, it returns +error pointers. Update the error checking to match. + +Fixes: 67a7bc7f0358 ("remoteproc: Use of_reserved_mem_region_* functions for "memory-region"") +Signed-off-by: Chen Ni +Reviewed-by: Peng Fan +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20260227091546.4044246-1-nichen@iscas.ac.cn +Signed-off-by: Mathieu Poirier +Signed-off-by: Greg Kroah-Hartman +--- + drivers/remoteproc/imx_rproc.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/remoteproc/imx_rproc.c ++++ b/drivers/remoteproc/imx_rproc.c +@@ -812,7 +812,7 @@ static int imx_rproc_addr_init(struct im + + /* Not use resource version, because we might share region */ + priv->mem[b].cpu_addr = devm_ioremap_resource_wc(&pdev->dev, &res); +- if (!priv->mem[b].cpu_addr) { ++ if (IS_ERR(priv->mem[b].cpu_addr)) { + dev_err(dev, "failed to remap %pr\n", &res); + return -ENOMEM; + } diff --git a/queue-7.0/remoteproc-k3-fix-null-vs-is_err-bug-in-k3_reserved_mem_init.patch b/queue-7.0/remoteproc-k3-fix-null-vs-is_err-bug-in-k3_reserved_mem_init.patch new file mode 100644 index 0000000000..740871444d --- /dev/null +++ b/queue-7.0/remoteproc-k3-fix-null-vs-is_err-bug-in-k3_reserved_mem_init.patch @@ -0,0 +1,39 @@ +From 5b1f4b5c72cc40e676293b8609cacef7e1545beb Mon Sep 17 00:00:00 2001 +From: Chen Ni +Date: Fri, 27 Feb 2026 17:21:10 +0800 +Subject: remoteproc: k3: Fix NULL vs IS_ERR() bug in k3_reserved_mem_init() + +From: Chen Ni + +commit 5b1f4b5c72cc40e676293b8609cacef7e1545beb upstream. + +The devm_ioremap_resource_wc() function never returns NULL, it returns +error pointers. Update the error checking to match. + +Fixes: 67a7bc7f0358 ("remoteproc: Use of_reserved_mem_region_* functions for "memory-region"") +Signed-off-by: Chen Ni +Reviewed-by: Peng Fan +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20260227092110.4044313-1-nichen@iscas.ac.cn +Signed-off-by: Mathieu Poirier +Signed-off-by: Greg Kroah-Hartman +--- + drivers/remoteproc/ti_k3_common.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/remoteproc/ti_k3_common.c b/drivers/remoteproc/ti_k3_common.c +index 32aa954dc5be..3cb8ae5d72f6 100644 +--- a/drivers/remoteproc/ti_k3_common.c ++++ b/drivers/remoteproc/ti_k3_common.c +@@ -513,7 +513,7 @@ int k3_reserved_mem_init(struct k3_rproc *kproc) + kproc->rmem[i].dev_addr = (u32)res.start; + kproc->rmem[i].size = resource_size(&res); + kproc->rmem[i].cpu_addr = devm_ioremap_resource_wc(dev, &res); +- if (!kproc->rmem[i].cpu_addr) { ++ if (IS_ERR(kproc->rmem[i].cpu_addr)) { + dev_err(dev, "failed to map reserved memory#%d at %pR\n", + i + 1, &res); + return -ENOMEM; +-- +2.54.0 + diff --git a/queue-7.0/sched_ext-idle-recheck-prev_cpu-after-narrowing-allowed-mask.patch b/queue-7.0/sched_ext-idle-recheck-prev_cpu-after-narrowing-allowed-mask.patch new file mode 100644 index 0000000000..d67d1c3585 --- /dev/null +++ b/queue-7.0/sched_ext-idle-recheck-prev_cpu-after-narrowing-allowed-mask.patch @@ -0,0 +1,61 @@ +From b34c82777a2c0648ee053595f4b290fd5249b093 Mon Sep 17 00:00:00 2001 +From: David Carlier +Date: Thu, 30 Apr 2026 10:27:47 +0100 +Subject: sched_ext: idle: Recheck prev_cpu after narrowing allowed mask + +From: David Carlier + +commit b34c82777a2c0648ee053595f4b290fd5249b093 upstream. + +scx_select_cpu_dfl() narrows @allowed to @cpus_allowed & @p->cpus_ptr +when the BPF caller supplies a @cpus_allowed that differs from +@p->cpus_ptr and @p doesn't have full affinity. However, +@is_prev_allowed was computed against the original (wider) +@cpus_allowed, so the prev_cpu fast paths could pick a @prev_cpu that +is in @cpus_allowed but not in @p->cpus_ptr, violating the intended +invariant that the returned CPU is always usable by @p. The kernel +masks this via the SCX_EV_SELECT_CPU_FALLBACK fallback, but the +behavior contradicts the documented contract. + +Move the @is_prev_allowed evaluation past the narrowing block so it +tests against the final @allowed mask. + +Fixes: ee9a4e92799d ("sched_ext: idle: Properly handle invalid prev_cpu during idle selection") +Cc: stable@vger.kernel.org # v6.16+ +Assisted-by: Claude +Signed-off-by: David Carlier +Reviewed-by: Andrea Righi +Signed-off-by: Tejun Heo +Signed-off-by: Greg Kroah-Hartman +--- + kernel/sched/ext_idle.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +--- a/kernel/sched/ext_idle.c ++++ b/kernel/sched/ext_idle.c +@@ -460,12 +460,6 @@ s32 scx_select_cpu_dfl(struct task_struc + preempt_disable(); + + /* +- * Check whether @prev_cpu is still within the allowed set. If not, +- * we can still try selecting a nearby CPU. +- */ +- is_prev_allowed = cpumask_test_cpu(prev_cpu, allowed); +- +- /* + * Determine the subset of CPUs usable by @p within @cpus_allowed. + */ + if (allowed != p->cpus_ptr) { +@@ -482,6 +476,12 @@ s32 scx_select_cpu_dfl(struct task_struc + } + + /* ++ * Check whether @prev_cpu is still within the allowed set. If not, ++ * we can still try selecting a nearby CPU. ++ */ ++ is_prev_allowed = cpumask_test_cpu(prev_cpu, allowed); ++ ++ /* + * This is necessary to protect llc_cpus. + */ + rcu_read_lock(); diff --git a/queue-7.0/sched_ext-skip-tasks-with-stale-task_rq-in-bypass_lb_cpu.patch b/queue-7.0/sched_ext-skip-tasks-with-stale-task_rq-in-bypass_lb_cpu.patch new file mode 100644 index 0000000000..2aa8a05c85 --- /dev/null +++ b/queue-7.0/sched_ext-skip-tasks-with-stale-task_rq-in-bypass_lb_cpu.patch @@ -0,0 +1,45 @@ +From da2d81b4118a74e65d2335e221a38d665902a98c Mon Sep 17 00:00:00 2001 +From: Tejun Heo +Date: Fri, 24 Apr 2026 14:31:35 -1000 +Subject: sched_ext: Skip tasks with stale task_rq in bypass_lb_cpu() + +From: Tejun Heo + +commit da2d81b4118a74e65d2335e221a38d665902a98c upstream. + +bypass_lb_cpu() transfers tasks between per-CPU bypass DSQs without +migrating them - task_cpu() only updates when the donee later consumes the +task via move_remote_task_to_local_dsq(). If the LB timer fires again before +consumption and the new DSQ becomes a donor, @p is still on the previous CPU +and task_rq(@p) != donor_rq. @p can't be moved without its own rq locked. + +Skip such tasks. + +Fixes: 95d1df610cdc ("sched_ext: Implement load balancer for bypass mode") +Cc: stable@vger.kernel.org # v6.19+ +Reported-by: Chris Mason +Signed-off-by: Tejun Heo +Reviewed-by: Andrea Righi +Signed-off-by: Greg Kroah-Hartman +--- + kernel/sched/ext.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +--- a/kernel/sched/ext.c ++++ b/kernel/sched/ext.c +@@ -4008,6 +4008,15 @@ resume: + if (cpumask_empty(donee_mask)) + break; + ++ /* ++ * If an earlier pass placed @p on @donor_dsq from a different ++ * CPU and the donee hasn't consumed it yet, @p is still on the ++ * previous CPU and task_rq(@p) != @donor_rq. @p can't be moved ++ * without its rq locked. Skip. ++ */ ++ if (task_rq(p) != donor_rq) ++ continue; ++ + donee = cpumask_any_and_distribute(donee_mask, p->cpus_ptr); + if (donee >= nr_cpu_ids) + continue; diff --git a/queue-7.0/sched_ext-use-dsq-first_task-instead-of-list_empty-in-dispatch_enqueue-fifo-tail.patch b/queue-7.0/sched_ext-use-dsq-first_task-instead-of-list_empty-in-dispatch_enqueue-fifo-tail.patch new file mode 100644 index 0000000000..0d69571829 --- /dev/null +++ b/queue-7.0/sched_ext-use-dsq-first_task-instead-of-list_empty-in-dispatch_enqueue-fifo-tail.patch @@ -0,0 +1,51 @@ +From 2f2ea77092660b53bfcbc4acc590b57ce9ab5dce Mon Sep 17 00:00:00 2001 +From: Tejun Heo +Date: Fri, 24 Apr 2026 14:31:35 -1000 +Subject: sched_ext: Use dsq->first_task instead of list_empty() in dispatch_enqueue() FIFO-tail + +From: Tejun Heo + +commit 2f2ea77092660b53bfcbc4acc590b57ce9ab5dce upstream. + +dispatch_enqueue()'s FIFO-tail path used list_empty(&dsq->list) to decide +whether to set dsq->first_task on enqueue. dsq->list can contain parked BPF +iterator cursors (SCX_DSQ_LNODE_ITER_CURSOR), so list_empty() is not a +reliable "no real task" check. If the last real task is unlinked while a +cursor is parked, first_task becomes NULL; the next FIFO-tail enqueue then +sees list_empty() == false and skips the first_task update, leaving +scx_bpf_dsq_peek() returning NULL for a non-empty DSQ. + +Test dsq->first_task directly, which already tracks only real tasks and is +maintained under dsq->lock. + +Fixes: 44f5c8ec5b9a ("sched_ext: Add lockless peek operation for DSQs") +Cc: stable@vger.kernel.org # v6.19+ +Reported-by: Chris Mason +Signed-off-by: Tejun Heo +Reviewed-by: Andrea Righi +Cc: Ryan Newton +Signed-off-by: Greg Kroah-Hartman +--- + kernel/sched/ext.c | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +--- a/kernel/sched/ext.c ++++ b/kernel/sched/ext.c +@@ -1093,11 +1093,13 @@ static void dispatch_enqueue(struct scx_ + if (!(dsq->id & SCX_DSQ_FLAG_BUILTIN)) + rcu_assign_pointer(dsq->first_task, p); + } else { +- bool was_empty; +- +- was_empty = list_empty(&dsq->list); ++ /* ++ * dsq->list can contain parked BPF iterator cursors, so ++ * list_empty() here isn't a reliable proxy for "no real ++ * task in the DSQ". Test dsq->first_task directly. ++ */ + list_add_tail(&p->scx.dsq_list.node, &dsq->list); +- if (was_empty && !(dsq->id & SCX_DSQ_FLAG_BUILTIN)) ++ if (!dsq->first_task && !(dsq->id & SCX_DSQ_FLAG_BUILTIN)) + rcu_assign_pointer(dsq->first_task, p); + } + } diff --git a/queue-7.0/selftests-mptcp-check-output-catch-cmd-errors.patch b/queue-7.0/selftests-mptcp-check-output-catch-cmd-errors.patch new file mode 100644 index 0000000000..20606ed48c --- /dev/null +++ b/queue-7.0/selftests-mptcp-check-output-catch-cmd-errors.patch @@ -0,0 +1,111 @@ +From 65db7b27b90e2ea8d4966935aa9a50b6a60c31ac Mon Sep 17 00:00:00 2001 +From: "Matthieu Baerts (NGI0)" +Date: Tue, 5 May 2026 17:00:58 +0200 +Subject: selftests: mptcp: check output: catch cmd errors + +From: Matthieu Baerts (NGI0) + +commit 65db7b27b90e2ea8d4966935aa9a50b6a60c31ac upstream. + +Using '${?}' inside the if-statement to check the returned value from +the command that was evaluated as part of the if-statement is not +correct: here, '${?}' will be linked to the previous instruction, not +the one that is expected here (${cmd}). + +Instead, simply mark the error, except if an error is expected. If +that's the case, 1 can be passed as the 4th argument of this helper. +Three checks from pm_netlink.sh expect an error. + +While at it, improve the error message when the command unexpectedly +fails or succeeds. + +Note that we could expect a specific returned value, but the checks +currently expecting an error can be used with 'ip mptcp' or 'pm_nl_ctl', +and these two tools don't return the same error code. + +Fixes: 2d0c1d27ea4e ("selftests: mptcp: add mptcp_lib_check_output helper") +Cc: stable@vger.kernel.org +Reviewed-by: Mat Martineau +Signed-off-by: Matthieu Baerts (NGI0) +Link: https://patch.msgid.link/20260505-net-mptcp-pm-fixes-7-1-rc3-v1-10-fca8091060a4@kernel.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/net/mptcp/mptcp_lib.sh | 16 ++++++++++------ + tools/testing/selftests/net/mptcp/pm_netlink.sh | 10 ++++++---- + 2 files changed, 16 insertions(+), 10 deletions(-) + +--- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh ++++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh +@@ -474,20 +474,24 @@ mptcp_lib_wait_local_port_listen() { + wait_local_port_listen "${@}" "tcp" + } + ++# $1: error file, $2: cmd, $3: expected msg, [$4: expected error] + mptcp_lib_check_output() { + local err="${1}" + local cmd="${2}" + local expected="${3}" ++ local exp_error="${4:-0}" + local cmd_ret=0 + local out + +- if ! out=$(${cmd} 2>"${err}"); then +- cmd_ret=${?} +- fi ++ out=$(${cmd} 2>"${err}") || cmd_ret=1 + +- if [ ${cmd_ret} -ne 0 ]; then +- mptcp_lib_pr_fail "command execution '${cmd}' stderr" +- cat "${err}" ++ if [ "${cmd_ret}" != "${exp_error}" ]; then ++ mptcp_lib_pr_fail "unexpected returned code for '${cmd}', info:" ++ if [ "${exp_error}" = 0 ]; then ++ cat "${err}" ++ else ++ echo "${out}" ++ fi + return 2 + elif [ "${out}" = "${expected}" ]; then + return 0 +--- a/tools/testing/selftests/net/mptcp/pm_netlink.sh ++++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh +@@ -122,10 +122,12 @@ check() + local cmd="$1" + local expected="$2" + local msg="$3" ++ local exp_error="$4" + local rc=0 + + mptcp_lib_print_title "$msg" +- mptcp_lib_check_output "${err}" "${cmd}" "${expected}" || rc=${?} ++ mptcp_lib_check_output "${err}" "${cmd}" "${expected}" "${exp_error}" || ++ rc=${?} + if [ ${rc} -eq 2 ]; then + mptcp_lib_result_fail "${msg} # error ${rc}" + ret=${KSFT_FAIL} +@@ -158,13 +160,13 @@ check "show_endpoints" \ + "3,10.0.1.3,signal backup")" "dump addrs" + + del_endpoint 2 +-check "get_endpoint 2" "" "simple del addr" ++check "get_endpoint 2" "" "simple del addr" 1 + check "show_endpoints" \ + "$(format_endpoints "1,10.0.1.1" \ + "3,10.0.1.3,signal backup")" "dump addrs after del" + + add_endpoint 10.0.1.3 2>/dev/null +-check "get_endpoint 4" "" "duplicate addr" ++check "get_endpoint 4" "" "duplicate addr" 1 + + add_endpoint 10.0.1.4 flags signal + check "get_endpoint 4" "$(format_endpoints "4,10.0.1.4,signal")" "id addr increment" +@@ -173,7 +175,7 @@ for i in $(seq 5 9); do + add_endpoint "10.0.1.${i}" flags signal >/dev/null 2>&1 + done + check "get_endpoint 9" "$(format_endpoints "9,10.0.1.9,signal")" "hard addr limit" +-check "get_endpoint 10" "" "above hard addr limit" ++check "get_endpoint 10" "" "above hard addr limit" 1 + + del_endpoint 9 + for i in $(seq 10 255); do diff --git a/queue-7.0/selftests-mptcp-pm-restrict-unknown-check-to-pm_nl_ctl.patch b/queue-7.0/selftests-mptcp-pm-restrict-unknown-check-to-pm_nl_ctl.patch new file mode 100644 index 0000000000..7e16ca835e --- /dev/null +++ b/queue-7.0/selftests-mptcp-pm-restrict-unknown-check-to-pm_nl_ctl.patch @@ -0,0 +1,47 @@ +From 53705ddfa18408f8e1f064331b6387509fa19f7f Mon Sep 17 00:00:00 2001 +From: "Matthieu Baerts (NGI0)" +Date: Tue, 5 May 2026 17:00:59 +0200 +Subject: selftests: mptcp: pm: restrict 'unknown' check to pm_nl_ctl + +From: Matthieu Baerts (NGI0) + +commit 53705ddfa18408f8e1f064331b6387509fa19f7f upstream. + +When pm_netlink.sh is executed with '-i', 'ip mptcp' is used instead of +'pm_nl_ctl'. IPRoute2 doesn't support the 'unknown' flag, which has only +been added to 'pm_nl_ctl' for this specific check: to ensure that the +kernel ignores such unsupported flag. + +No reason to add this flag to 'ip mptcp'. Then, this check should be +skipped when 'ip mptcp' is used. + +Fixes: 0cef6fcac24d ("selftests: mptcp: ip_mptcp option for more scripts") +Cc: stable@vger.kernel.org +Reviewed-by: Mat Martineau +Signed-off-by: Matthieu Baerts (NGI0) +Link: https://patch.msgid.link/20260505-net-mptcp-pm-fixes-7-1-rc3-v1-11-fca8091060a4@kernel.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/net/mptcp/pm_netlink.sh | 10 +++++++--- + 1 file changed, 7 insertions(+), 3 deletions(-) + +--- a/tools/testing/selftests/net/mptcp/pm_netlink.sh ++++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh +@@ -194,9 +194,13 @@ check "show_endpoints" \ + flush_endpoint + check "show_endpoints" "" "flush addrs" + +-add_endpoint 10.0.1.1 flags unknown +-check "show_endpoints" "$(format_endpoints "1,10.0.1.1")" "ignore unknown flags" +-flush_endpoint ++# "unknown" flag is only supported by pm_nl_ctl ++if ! mptcp_lib_is_ip_mptcp; then ++ add_endpoint 10.0.1.1 flags unknown ++ check "show_endpoints" "$(format_endpoints "1,10.0.1.1")" \ ++ "ignore unknown flags" ++ flush_endpoint ++fi + + set_limits 9 1 2>/dev/null + check "get_limits" "${default_limits}" "rcv addrs above hard limit" diff --git a/queue-7.0/series b/queue-7.0/series index 4716911b17..583bbc129d 100644 --- a/queue-7.0/series +++ b/queue-7.0/series @@ -220,3 +220,71 @@ mm-damon-stat-detect-and-use-fresh-enabled-value.patch mm-damon-sysfs-schemes-protect-memcg_path-kfree-with-damon_sysfs_lock.patch mm-damon-sysfs-schemes-protect-path-kfree-with-damon_sysfs_lock.patch pci-update-saved_config_space-upon-resource-assignment.patch +pci-aer-clear-only-error-bits-in-pcie-device-status.patch +pci-aer-stop-ruling-out-unbound-devices-as-error-source.patch +pci-aspm-fix-pci_clear_and_set_config_dword-usage.patch +power-supply-max17042-avoid-overflow-when-determining-health.patch +powerpc-xive-fix-kmemleak-caused-by-incorrect-chip_data-lookup.patch +perf-x86-intel-always-reprogram-acr-events-to-prevent-stale-masks.patch +perf-x86-intel-disable-pmi-for-self-reloaded-acr-events.patch +perf-x86-intel-enable-auto-counter-reload-for-dmr.patch +rdma-ionic-bound-node_desc-sysfs-read-with-.64s.patch +rdma-ionic-fix-typo-in-format-string.patch +rdma-mana-fix-error-unwind-in-mana_ib_create_qp_rss.patch +rdma-mana-fix-mana_destroy_wq_obj-cleanup-in-mana_ib_create_qp_rss.patch +rdma-mana-remove-user-triggerable-warn_on-in-mana_ib_create_qp_rss.patch +rdma-mana-validate-rx_hash_key_len.patch +rdma-mlx4-fix-mis-use-of-rcu-in-mlx4_srq_event.patch +rdma-mlx4-fix-resource-leak-on-error-in-mlx4_ib_create_srq.patch +rdma-mlx5-fix-error-path-fall-through-in-mlx5_ib_dev_res_srq_init.patch +rdma-ocrdma-don-t-null-deref-uctx-on-errors-in-ocrdma_copy_pd_uresp.patch +rdma-rxe-reject-non-8-byte-atomic_write-payloads.patch +rdma-rxe-reject-unknown-opcodes-before-icrc-processing.patch +rdma-vmw_pvrdma-fix-double-free-on-pvrdma_alloc_ucontext-error-path.patch +remoteproc-imx_rproc-fix-null-vs-is_err-bug-in-imx_rproc_addr_init.patch +remoteproc-k3-fix-null-vs-is_err-bug-in-k3_reserved_mem_init.patch +sched_ext-idle-recheck-prev_cpu-after-narrowing-allowed-mask.patch +sched_ext-skip-tasks-with-stale-task_rq-in-bypass_lb_cpu.patch +sched_ext-use-dsq-first_task-instead-of-list_empty-in-dispatch_enqueue-fifo-tail.patch +selftests-mptcp-check-output-catch-cmd-errors.patch +selftests-mptcp-pm-restrict-unknown-check-to-pm_nl_ctl.patch +mptcp-fastclose-msk-when-linger-time-is-0.patch +mptcp-use-mpjoinsynackhmacfailure-for-synack-hmac-failure.patch +mptcp-use-mptcp_rst_emptcp-for-ack-hmac-validation-failure.patch +mptcp-sockopt-set-timestamp-flags-on-subflow-socket-not-msk.patch +mptcp-sockopt-increase-seq-in-mptcp_setsockopt_all_sf.patch +mptcp-fix-rx-timestamp-corruption-on-fastopen.patch +mptcp-fix-scheduling-with-atomic-in-timestamp-sockopt.patch +mptcp-pm-prio-skip-closed-subflows.patch +mptcp-pm-kernel-reset-fullmesh-counter-after-flush.patch +mptcp-pm-kernel-correctly-retransmit-add_addr-id-0.patch +mptcp-pm-add_addr-rtx-allow-id-0.patch +mptcp-pm-add_addr-rtx-fix-potential-data-race.patch +mptcp-pm-add_addr-rtx-always-decrease-sk-refcount.patch +mptcp-pm-add_addr-rtx-free-sk-if-last.patch +mptcp-pm-add_addr-rtx-resched-blocked-add_addr-quicker.patch +mptcp-pm-add_addr-rtx-return-early-if-no-retrans.patch +f2fs-add-read_once-for-i_blocks-in-f2fs_update_inode.patch +f2fs-fix-false-alarm-of-lockdep-on-cp_global_sem-lock.patch +f2fs-fix-fiemap-boundary-handling-when-read-extent-cache-is-incomplete.patch +f2fs-fix-fsck-inconsistency-caused-by-incorrect-nat_entry-flag-usage.patch +f2fs-fix-incorrect-file-address-mapping-when-inline-inode-is-unwritten.patch +f2fs-fix-incorrect-multidevice-info-in-trace_f2fs_map_blocks.patch +f2fs-fix-node_cnt-race-between-extent-node-destroy-and-writeback.patch +f2fs-fix-uninitialized-kobject-put-in-f2fs_init_sysfs.patch +f2fs-refactor-f2fs_move_node_folio-function.patch +f2fs-fix-inline-data-not-being-written-to-disk-in-writeback-path.patch +f2fs-fix-fsck-inconsistency-caused-by-fggc-of-node-block.patch +kvm-arm64-wake-up-from-wfi-when-iqrchip-is-in-userspace.patch +kvm-arm64-vgic-fix-iidr-revision-field-extracted-from-wrong-value.patch +kvm-arm64-fix-initialisation-order-in-__pkvm_init_finalise.patch +kvm-arm64-fix-feat_spe_fne-to-use-pmsidr_el1.fne-not-pmsver.patch +kvm-arm64-fix-feat_debugv8p9-to-check-debugver-not-pmuver.patch +kvm-arm64-fix-pin-leak-and-publication-ordering-in-__pkvm_init_vcpu.patch +loongarch-fix-potential-ade-in-loongson_gpu_fixup_dma_hang.patch +loongarch-kvm-cap-kvm_cap_nr_vcpus-by-kvm_cap_max_vcpus.patch +loongarch-kvm-fix-unreliable-stack-for-kvm_exc_entry.patch +loongarch-kvm-fix-hw-timer-interrupt-lost-when-inject-interrupt-by-software.patch +loongarch-kvm-move-unconditional-delay-into-timer-clear-scenery.patch +loongarch-kvm-use-kvm_set_pte-in-kvm_flush_pte.patch +loongarch-use-per-root-bridge-pcih-flag-to-skip-mem-resource-fixup.patch