]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
7.0-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 12 May 2026 14:28:32 +0000 (16:28 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 12 May 2026 14:28:32 +0000 (16:28 +0200)
added patches:
f2fs-add-read_once-for-i_blocks-in-f2fs_update_inode.patch
f2fs-fix-false-alarm-of-lockdep-on-cp_global_sem-lock.patch
f2fs-fix-fiemap-boundary-handling-when-read-extent-cache-is-incomplete.patch
f2fs-fix-fsck-inconsistency-caused-by-fggc-of-node-block.patch
f2fs-fix-fsck-inconsistency-caused-by-incorrect-nat_entry-flag-usage.patch
f2fs-fix-incorrect-file-address-mapping-when-inline-inode-is-unwritten.patch
f2fs-fix-incorrect-multidevice-info-in-trace_f2fs_map_blocks.patch
f2fs-fix-inline-data-not-being-written-to-disk-in-writeback-path.patch
f2fs-fix-node_cnt-race-between-extent-node-destroy-and-writeback.patch
f2fs-fix-uninitialized-kobject-put-in-f2fs_init_sysfs.patch
f2fs-refactor-f2fs_move_node_folio-function.patch
kvm-arm64-fix-feat_debugv8p9-to-check-debugver-not-pmuver.patch
kvm-arm64-fix-feat_spe_fne-to-use-pmsidr_el1.fne-not-pmsver.patch
kvm-arm64-fix-initialisation-order-in-__pkvm_init_finalise.patch
kvm-arm64-fix-pin-leak-and-publication-ordering-in-__pkvm_init_vcpu.patch
kvm-arm64-vgic-fix-iidr-revision-field-extracted-from-wrong-value.patch
kvm-arm64-wake-up-from-wfi-when-iqrchip-is-in-userspace.patch
loongarch-fix-potential-ade-in-loongson_gpu_fixup_dma_hang.patch
loongarch-kvm-cap-kvm_cap_nr_vcpus-by-kvm_cap_max_vcpus.patch
loongarch-kvm-fix-hw-timer-interrupt-lost-when-inject-interrupt-by-software.patch
loongarch-kvm-fix-unreliable-stack-for-kvm_exc_entry.patch
loongarch-kvm-move-unconditional-delay-into-timer-clear-scenery.patch
loongarch-kvm-use-kvm_set_pte-in-kvm_flush_pte.patch
loongarch-use-per-root-bridge-pcih-flag-to-skip-mem-resource-fixup.patch
mptcp-fastclose-msk-when-linger-time-is-0.patch
mptcp-fix-rx-timestamp-corruption-on-fastopen.patch
mptcp-fix-scheduling-with-atomic-in-timestamp-sockopt.patch
mptcp-pm-add_addr-rtx-allow-id-0.patch
mptcp-pm-add_addr-rtx-always-decrease-sk-refcount.patch
mptcp-pm-add_addr-rtx-fix-potential-data-race.patch
mptcp-pm-add_addr-rtx-free-sk-if-last.patch
mptcp-pm-add_addr-rtx-resched-blocked-add_addr-quicker.patch
mptcp-pm-add_addr-rtx-return-early-if-no-retrans.patch
mptcp-pm-kernel-correctly-retransmit-add_addr-id-0.patch
mptcp-pm-kernel-reset-fullmesh-counter-after-flush.patch
mptcp-pm-prio-skip-closed-subflows.patch
mptcp-sockopt-increase-seq-in-mptcp_setsockopt_all_sf.patch
mptcp-sockopt-set-timestamp-flags-on-subflow-socket-not-msk.patch
mptcp-use-mpjoinsynackhmacfailure-for-synack-hmac-failure.patch
mptcp-use-mptcp_rst_emptcp-for-ack-hmac-validation-failure.patch
pci-aer-clear-only-error-bits-in-pcie-device-status.patch
pci-aer-stop-ruling-out-unbound-devices-as-error-source.patch
pci-aspm-fix-pci_clear_and_set_config_dword-usage.patch
perf-x86-intel-always-reprogram-acr-events-to-prevent-stale-masks.patch
perf-x86-intel-disable-pmi-for-self-reloaded-acr-events.patch
perf-x86-intel-enable-auto-counter-reload-for-dmr.patch
power-supply-max17042-avoid-overflow-when-determining-health.patch
powerpc-xive-fix-kmemleak-caused-by-incorrect-chip_data-lookup.patch
rdma-ionic-bound-node_desc-sysfs-read-with-.64s.patch
rdma-ionic-fix-typo-in-format-string.patch
rdma-mana-fix-error-unwind-in-mana_ib_create_qp_rss.patch
rdma-mana-fix-mana_destroy_wq_obj-cleanup-in-mana_ib_create_qp_rss.patch
rdma-mana-remove-user-triggerable-warn_on-in-mana_ib_create_qp_rss.patch
rdma-mana-validate-rx_hash_key_len.patch
rdma-mlx4-fix-mis-use-of-rcu-in-mlx4_srq_event.patch
rdma-mlx4-fix-resource-leak-on-error-in-mlx4_ib_create_srq.patch
rdma-mlx5-fix-error-path-fall-through-in-mlx5_ib_dev_res_srq_init.patch
rdma-ocrdma-don-t-null-deref-uctx-on-errors-in-ocrdma_copy_pd_uresp.patch
rdma-rxe-reject-non-8-byte-atomic_write-payloads.patch
rdma-rxe-reject-unknown-opcodes-before-icrc-processing.patch
rdma-vmw_pvrdma-fix-double-free-on-pvrdma_alloc_ucontext-error-path.patch
remoteproc-imx_rproc-fix-null-vs-is_err-bug-in-imx_rproc_addr_init.patch
remoteproc-k3-fix-null-vs-is_err-bug-in-k3_reserved_mem_init.patch
sched_ext-idle-recheck-prev_cpu-after-narrowing-allowed-mask.patch
sched_ext-skip-tasks-with-stale-task_rq-in-bypass_lb_cpu.patch
sched_ext-use-dsq-first_task-instead-of-list_empty-in-dispatch_enqueue-fifo-tail.patch
selftests-mptcp-check-output-catch-cmd-errors.patch
selftests-mptcp-pm-restrict-unknown-check-to-pm_nl_ctl.patch

69 files changed:
queue-7.0/f2fs-add-read_once-for-i_blocks-in-f2fs_update_inode.patch [new file with mode: 0644]
queue-7.0/f2fs-fix-false-alarm-of-lockdep-on-cp_global_sem-lock.patch [new file with mode: 0644]
queue-7.0/f2fs-fix-fiemap-boundary-handling-when-read-extent-cache-is-incomplete.patch [new file with mode: 0644]
queue-7.0/f2fs-fix-fsck-inconsistency-caused-by-fggc-of-node-block.patch [new file with mode: 0644]
queue-7.0/f2fs-fix-fsck-inconsistency-caused-by-incorrect-nat_entry-flag-usage.patch [new file with mode: 0644]
queue-7.0/f2fs-fix-incorrect-file-address-mapping-when-inline-inode-is-unwritten.patch [new file with mode: 0644]
queue-7.0/f2fs-fix-incorrect-multidevice-info-in-trace_f2fs_map_blocks.patch [new file with mode: 0644]
queue-7.0/f2fs-fix-inline-data-not-being-written-to-disk-in-writeback-path.patch [new file with mode: 0644]
queue-7.0/f2fs-fix-node_cnt-race-between-extent-node-destroy-and-writeback.patch [new file with mode: 0644]
queue-7.0/f2fs-fix-uninitialized-kobject-put-in-f2fs_init_sysfs.patch [new file with mode: 0644]
queue-7.0/f2fs-refactor-f2fs_move_node_folio-function.patch [new file with mode: 0644]
queue-7.0/kvm-arm64-fix-feat_debugv8p9-to-check-debugver-not-pmuver.patch [new file with mode: 0644]
queue-7.0/kvm-arm64-fix-feat_spe_fne-to-use-pmsidr_el1.fne-not-pmsver.patch [new file with mode: 0644]
queue-7.0/kvm-arm64-fix-initialisation-order-in-__pkvm_init_finalise.patch [new file with mode: 0644]
queue-7.0/kvm-arm64-fix-pin-leak-and-publication-ordering-in-__pkvm_init_vcpu.patch [new file with mode: 0644]
queue-7.0/kvm-arm64-vgic-fix-iidr-revision-field-extracted-from-wrong-value.patch [new file with mode: 0644]
queue-7.0/kvm-arm64-wake-up-from-wfi-when-iqrchip-is-in-userspace.patch [new file with mode: 0644]
queue-7.0/loongarch-fix-potential-ade-in-loongson_gpu_fixup_dma_hang.patch [new file with mode: 0644]
queue-7.0/loongarch-kvm-cap-kvm_cap_nr_vcpus-by-kvm_cap_max_vcpus.patch [new file with mode: 0644]
queue-7.0/loongarch-kvm-fix-hw-timer-interrupt-lost-when-inject-interrupt-by-software.patch [new file with mode: 0644]
queue-7.0/loongarch-kvm-fix-unreliable-stack-for-kvm_exc_entry.patch [new file with mode: 0644]
queue-7.0/loongarch-kvm-move-unconditional-delay-into-timer-clear-scenery.patch [new file with mode: 0644]
queue-7.0/loongarch-kvm-use-kvm_set_pte-in-kvm_flush_pte.patch [new file with mode: 0644]
queue-7.0/loongarch-use-per-root-bridge-pcih-flag-to-skip-mem-resource-fixup.patch [new file with mode: 0644]
queue-7.0/mptcp-fastclose-msk-when-linger-time-is-0.patch [new file with mode: 0644]
queue-7.0/mptcp-fix-rx-timestamp-corruption-on-fastopen.patch [new file with mode: 0644]
queue-7.0/mptcp-fix-scheduling-with-atomic-in-timestamp-sockopt.patch [new file with mode: 0644]
queue-7.0/mptcp-pm-add_addr-rtx-allow-id-0.patch [new file with mode: 0644]
queue-7.0/mptcp-pm-add_addr-rtx-always-decrease-sk-refcount.patch [new file with mode: 0644]
queue-7.0/mptcp-pm-add_addr-rtx-fix-potential-data-race.patch [new file with mode: 0644]
queue-7.0/mptcp-pm-add_addr-rtx-free-sk-if-last.patch [new file with mode: 0644]
queue-7.0/mptcp-pm-add_addr-rtx-resched-blocked-add_addr-quicker.patch [new file with mode: 0644]
queue-7.0/mptcp-pm-add_addr-rtx-return-early-if-no-retrans.patch [new file with mode: 0644]
queue-7.0/mptcp-pm-kernel-correctly-retransmit-add_addr-id-0.patch [new file with mode: 0644]
queue-7.0/mptcp-pm-kernel-reset-fullmesh-counter-after-flush.patch [new file with mode: 0644]
queue-7.0/mptcp-pm-prio-skip-closed-subflows.patch [new file with mode: 0644]
queue-7.0/mptcp-sockopt-increase-seq-in-mptcp_setsockopt_all_sf.patch [new file with mode: 0644]
queue-7.0/mptcp-sockopt-set-timestamp-flags-on-subflow-socket-not-msk.patch [new file with mode: 0644]
queue-7.0/mptcp-use-mpjoinsynackhmacfailure-for-synack-hmac-failure.patch [new file with mode: 0644]
queue-7.0/mptcp-use-mptcp_rst_emptcp-for-ack-hmac-validation-failure.patch [new file with mode: 0644]
queue-7.0/pci-aer-clear-only-error-bits-in-pcie-device-status.patch [new file with mode: 0644]
queue-7.0/pci-aer-stop-ruling-out-unbound-devices-as-error-source.patch [new file with mode: 0644]
queue-7.0/pci-aspm-fix-pci_clear_and_set_config_dword-usage.patch [new file with mode: 0644]
queue-7.0/perf-x86-intel-always-reprogram-acr-events-to-prevent-stale-masks.patch [new file with mode: 0644]
queue-7.0/perf-x86-intel-disable-pmi-for-self-reloaded-acr-events.patch [new file with mode: 0644]
queue-7.0/perf-x86-intel-enable-auto-counter-reload-for-dmr.patch [new file with mode: 0644]
queue-7.0/power-supply-max17042-avoid-overflow-when-determining-health.patch [new file with mode: 0644]
queue-7.0/powerpc-xive-fix-kmemleak-caused-by-incorrect-chip_data-lookup.patch [new file with mode: 0644]
queue-7.0/rdma-ionic-bound-node_desc-sysfs-read-with-.64s.patch [new file with mode: 0644]
queue-7.0/rdma-ionic-fix-typo-in-format-string.patch [new file with mode: 0644]
queue-7.0/rdma-mana-fix-error-unwind-in-mana_ib_create_qp_rss.patch [new file with mode: 0644]
queue-7.0/rdma-mana-fix-mana_destroy_wq_obj-cleanup-in-mana_ib_create_qp_rss.patch [new file with mode: 0644]
queue-7.0/rdma-mana-remove-user-triggerable-warn_on-in-mana_ib_create_qp_rss.patch [new file with mode: 0644]
queue-7.0/rdma-mana-validate-rx_hash_key_len.patch [new file with mode: 0644]
queue-7.0/rdma-mlx4-fix-mis-use-of-rcu-in-mlx4_srq_event.patch [new file with mode: 0644]
queue-7.0/rdma-mlx4-fix-resource-leak-on-error-in-mlx4_ib_create_srq.patch [new file with mode: 0644]
queue-7.0/rdma-mlx5-fix-error-path-fall-through-in-mlx5_ib_dev_res_srq_init.patch [new file with mode: 0644]
queue-7.0/rdma-ocrdma-don-t-null-deref-uctx-on-errors-in-ocrdma_copy_pd_uresp.patch [new file with mode: 0644]
queue-7.0/rdma-rxe-reject-non-8-byte-atomic_write-payloads.patch [new file with mode: 0644]
queue-7.0/rdma-rxe-reject-unknown-opcodes-before-icrc-processing.patch [new file with mode: 0644]
queue-7.0/rdma-vmw_pvrdma-fix-double-free-on-pvrdma_alloc_ucontext-error-path.patch [new file with mode: 0644]
queue-7.0/remoteproc-imx_rproc-fix-null-vs-is_err-bug-in-imx_rproc_addr_init.patch [new file with mode: 0644]
queue-7.0/remoteproc-k3-fix-null-vs-is_err-bug-in-k3_reserved_mem_init.patch [new file with mode: 0644]
queue-7.0/sched_ext-idle-recheck-prev_cpu-after-narrowing-allowed-mask.patch [new file with mode: 0644]
queue-7.0/sched_ext-skip-tasks-with-stale-task_rq-in-bypass_lb_cpu.patch [new file with mode: 0644]
queue-7.0/sched_ext-use-dsq-first_task-instead-of-list_empty-in-dispatch_enqueue-fifo-tail.patch [new file with mode: 0644]
queue-7.0/selftests-mptcp-check-output-catch-cmd-errors.patch [new file with mode: 0644]
queue-7.0/selftests-mptcp-pm-restrict-unknown-check-to-pm_nl_ctl.patch [new file with mode: 0644]
queue-7.0/series

diff --git a/queue-7.0/f2fs-add-read_once-for-i_blocks-in-f2fs_update_inode.patch b/queue-7.0/f2fs-add-read_once-for-i_blocks-in-f2fs_update_inode.patch
new file mode 100644 (file)
index 0000000..1e12a8c
--- /dev/null
@@ -0,0 +1,38 @@
+From 5471834a96fb697874be2ca0b052e74bcf3c23d1 Mon Sep 17 00:00:00 2001
+From: Cen Zhang <zzzccc427@gmail.com>
+Date: Wed, 18 Mar 2026 15:32:53 +0800
+Subject: f2fs: add READ_ONCE() for i_blocks in f2fs_update_inode()
+
+From: Cen Zhang <zzzccc427@gmail.com>
+
+commit 5471834a96fb697874be2ca0b052e74bcf3c23d1 upstream.
+
+f2fs_update_inode() reads inode->i_blocks without holding i_lock to
+serialize it to the on-disk inode, while concurrent truncate or
+allocation paths may modify i_blocks under i_lock.  Since blkcnt_t is
+u64, this risks torn reads on 32-bit architectures.
+
+Following the approach in ext4_inode_blocks_set(), add READ_ONCE() to prevent
+potential compiler-induced tearing.
+
+Fixes: 19f99cee206c ("f2fs: add core inode operations")
+Cc: stable@vger.kernel.org
+Signed-off-by: Cen Zhang <zzzccc427@gmail.com>
+Reviewed-by: Chao Yu <chao@kernel.org>
+Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/f2fs/inode.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/f2fs/inode.c
++++ b/fs/f2fs/inode.c
+@@ -687,7 +687,7 @@ void f2fs_update_inode(struct inode *ino
+       ri->i_uid = cpu_to_le32(i_uid_read(inode));
+       ri->i_gid = cpu_to_le32(i_gid_read(inode));
+       ri->i_links = cpu_to_le32(inode->i_nlink);
+-      ri->i_blocks = cpu_to_le64(SECTOR_TO_BLOCK(inode->i_blocks) + 1);
++      ri->i_blocks = cpu_to_le64(SECTOR_TO_BLOCK(READ_ONCE(inode->i_blocks)) + 1);
+       if (!f2fs_is_atomic_file(inode) ||
+                       is_inode_flag_set(inode, FI_ATOMIC_COMMITTED))
diff --git a/queue-7.0/f2fs-fix-false-alarm-of-lockdep-on-cp_global_sem-lock.patch b/queue-7.0/f2fs-fix-false-alarm-of-lockdep-on-cp_global_sem-lock.patch
new file mode 100644 (file)
index 0000000..49f2c9e
--- /dev/null
@@ -0,0 +1,97 @@
+From 6a5e3de9c2bb0b691d16789a5d19e9276a09b308 Mon Sep 17 00:00:00 2001
+From: Chao Yu <chao@kernel.org>
+Date: Fri, 6 Mar 2026 12:24:20 +0000
+Subject: f2fs: fix false alarm of lockdep on cp_global_sem lock
+
+From: Chao Yu <chao@kernel.org>
+
+commit 6a5e3de9c2bb0b691d16789a5d19e9276a09b308 upstream.
+
+lockdep reported a potential deadlock:
+
+a) TCMU device removal context:
+ - call del_gendisk() to get q->q_usage_counter
+ - call start_flush_work() to get work_completion of wb->dwork
+b) f2fs writeback context:
+ - in wb_workfn(), which holds work_completion of wb->dwork
+ - call f2fs_balance_fs() to get sbi->gc_lock
+c) f2fs vfs_write context:
+ - call f2fs_gc() to get sbi->gc_lock
+ - call f2fs_write_checkpoint() to get sbi->cp_global_sem
+d) f2fs mount context:
+ - call recover_fsync_data() to get sbi->cp_global_sem
+ - call f2fs_check_and_fix_write_pointer() to call blkdev_report_zones()
+   that goes down to blk_mq_alloc_request and get q->q_usage_counter
+
+Original callstack is in Closes tag.
+
+However, I think this is a false alarm due to before mount returns
+successfully (context d), we can not access file therein via vfs_write
+(context c).
+
+Let's introduce per-sb cp_global_sem_key, and assign the key for
+cp_global_sem, so that lockdep can recognize cp_global_sem from
+different super block correctly.
+
+A lot of work are done by Shin'ichiro Kawasaki, thanks a lot for
+the work.
+
+Fixes: c426d99127b1 ("f2fs: Check write pointer consistency of open zones")
+Cc: stable@kernel.org
+Reported-and-tested-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
+Closes: https://lore.kernel.org/linux-f2fs-devel/20260218125237.3340441-1-shinichiro.kawasaki@wdc.com
+Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
+Signed-off-by: Chao Yu <chao@kernel.org>
+Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/f2fs/f2fs.h  |    3 +++
+ fs/f2fs/super.c |   11 +++++++++++
+ 2 files changed, 14 insertions(+)
+
+--- a/fs/f2fs/f2fs.h
++++ b/fs/f2fs/f2fs.h
+@@ -2042,6 +2042,9 @@ struct f2fs_sb_info {
+       spinlock_t iostat_lat_lock;
+       struct iostat_lat_info *iostat_io_lat;
+ #endif
++#ifdef CONFIG_DEBUG_LOCK_ALLOC
++      struct lock_class_key cp_global_sem_key;
++#endif
+ };
+ /* Definitions to access f2fs_sb_info */
+--- a/fs/f2fs/super.c
++++ b/fs/f2fs/super.c
+@@ -4953,6 +4953,11 @@ try_onemore:
+       init_f2fs_rwsem_trace(&sbi->gc_lock, sbi, LOCK_NAME_GC_LOCK);
+       mutex_init(&sbi->writepages);
+       init_f2fs_rwsem_trace(&sbi->cp_global_sem, sbi, LOCK_NAME_CP_GLOBAL);
++#ifdef CONFIG_DEBUG_LOCK_ALLOC
++      lockdep_register_key(&sbi->cp_global_sem_key);
++      lockdep_set_class(&sbi->cp_global_sem.internal_rwsem,
++                                      &sbi->cp_global_sem_key);
++#endif
+       init_f2fs_rwsem_trace(&sbi->node_write, sbi, LOCK_NAME_NODE_WRITE);
+       init_f2fs_rwsem_trace(&sbi->node_change, sbi, LOCK_NAME_NODE_CHANGE);
+       spin_lock_init(&sbi->stat_lock);
+@@ -5424,6 +5429,9 @@ free_options:
+ free_sb_buf:
+       kfree(raw_super);
+ free_sbi:
++#ifdef CONFIG_DEBUG_LOCK_ALLOC
++      lockdep_unregister_key(&sbi->cp_global_sem_key);
++#endif
+       kfree(sbi);
+       sb->s_fs_info = NULL;
+@@ -5505,6 +5513,9 @@ static void kill_f2fs_super(struct super
+       /* Release block devices last, after fscrypt_destroy_keyring(). */
+       if (sbi) {
+               destroy_device_list(sbi);
++#ifdef CONFIG_DEBUG_LOCK_ALLOC
++              lockdep_unregister_key(&sbi->cp_global_sem_key);
++#endif
+               kfree(sbi);
+               sb->s_fs_info = NULL;
+       }
diff --git a/queue-7.0/f2fs-fix-fiemap-boundary-handling-when-read-extent-cache-is-incomplete.patch b/queue-7.0/f2fs-fix-fiemap-boundary-handling-when-read-extent-cache-is-incomplete.patch
new file mode 100644 (file)
index 0000000..d11af42
--- /dev/null
@@ -0,0 +1,99 @@
+From 95e159ad3e52f7478cfd22e44ec37c9f334f8993 Mon Sep 17 00:00:00 2001
+From: Yongpeng Yang <yangyongpeng@xiaomi.com>
+Date: Mon, 23 Mar 2026 20:06:24 +0800
+Subject: f2fs: fix fiemap boundary handling when read extent cache is incomplete
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Yongpeng Yang <yangyongpeng@xiaomi.com>
+
+commit 95e159ad3e52f7478cfd22e44ec37c9f334f8993 upstream.
+
+f2fs_fiemap() calls f2fs_map_blocks() to obtain the block mapping a
+file, and then merges contiguous mappings into extents. If the mapping
+is found in the read extent cache, node blocks do not need to be read.
+However, in the following scenario, a contiguous extent can be split
+into two extents:
+
+$ dd if=/dev/zero of=data.128M bs=1M count=128
+$ losetup -f data.128M
+$ mkfs.f2fs /dev/loop0 -f
+$ mount -o mode=lfs /dev/loop0 /mnt/f2fs/
+$ cd /mnt/f2fs/
+$ dd if=/dev/zero of=data.72M bs=1M count=72 && sync
+$ dd if=/dev/zero of=data.4M bs=1M count=4 && sync
+$ dd if=/dev/zero of=data.4M bs=1M count=2 seek=2 conv=notrunc && sync
+$ echo 3 > /proc/sys/vm/drop_caches
+$ dd if=/dev/zero of=data.4M bs=1M count=2 seek=0 conv=notrunc && sync
+$ dd if=/dev/zero of=data.4M bs=1M count=2 seek=0 conv=notrunc && sync
+$ f2fs_io fiemap 0 1024 data.4M
+Fiemap: offset = 0 len = 1024
+logical addr.    physical addr.   length           flags
+0      0000000000000000 0000000006400000 0000000000200000 00001000
+1      0000000000200000 0000000006600000 0000000000200000 00001001
+
+Although the physical addresses of the ranges 0~2MB and 2M~4MB are
+contiguous, the mapping for the 2M~4MB range is not present in memory.
+When the physical addresses for the 0~2MB range are updated, no merge
+happens because the adjacent mapping is missing from the in-memory
+cache. As a result, fiemap reports two separate extents instead of a
+single contiguous one.
+
+The root cause is that the read extent cache does not guarantee that all
+blocks of an extent are present in memory. Therefore, when the extent
+length returned by f2fs_map_blocks_cached() is smaller than maxblocks,
+the remaining mappings are retrieved via f2fs_get_dnode_of_data() to
+ensure correct fiemap extent boundary handling.
+
+Cc: stable@kernel.org
+Fixes: cd8fc5226bef ("f2fs: remove the create argument to f2fs_map_blocks")
+Signed-off-by: Yongpeng Yang <yangyongpeng@xiaomi.com>
+Reviewed-by: Chao Yu <chao@kernel.org>
+Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/f2fs/data.c |   25 ++++++++++++++++++++++---
+ 1 file changed, 22 insertions(+), 3 deletions(-)
+
+--- a/fs/f2fs/data.c
++++ b/fs/f2fs/data.c
+@@ -1636,8 +1636,26 @@ int f2fs_map_blocks(struct inode *inode,
+       lfs_dio_write = (flag == F2FS_GET_BLOCK_DIO && f2fs_lfs_mode(sbi) &&
+                               map->m_may_create);
+-      if (!map->m_may_create && f2fs_map_blocks_cached(inode, map, flag))
+-              goto out;
++      if (!map->m_may_create && f2fs_map_blocks_cached(inode, map, flag)) {
++              struct extent_info ei;
++
++              /*
++               * 1. If map->m_multidev_dio is true, map->m_pblk cannot be
++               * waitted by f2fs_wait_on_block_writeback_range() and are not
++               * mergeable.
++               * 2. If pgofs hits the read extent cache, it means the mapping
++               * is already cached in the extent cache, but it is not
++               * mergeable, and there is no need to query the mapping again
++               * via f2fs_get_dnode_of_data().
++               */
++              pgofs = (pgoff_t)map->m_lblk + map->m_len;
++              if (map->m_len == maxblocks ||
++                      map->m_multidev_dio ||
++                      f2fs_lookup_read_extent_cache(inode, pgofs, &ei))
++                      goto out;
++              ofs = map->m_len;
++              goto map_more;
++      }
+       map->m_bdev = inode->i_sb->s_bdev;
+       map->m_multidev_dio =
+@@ -1648,7 +1666,8 @@ int f2fs_map_blocks(struct inode *inode,
+       /* it only supports block size == page size */
+       pgofs = (pgoff_t)map->m_lblk;
+-      end = pgofs + maxblocks;
++map_more:
++      end = (pgoff_t)map->m_lblk + maxblocks;
+       if (flag == F2FS_GET_BLOCK_PRECACHE)
+               mode = LOOKUP_NODE_RA;
diff --git a/queue-7.0/f2fs-fix-fsck-inconsistency-caused-by-fggc-of-node-block.patch b/queue-7.0/f2fs-fix-fsck-inconsistency-caused-by-fggc-of-node-block.patch
new file mode 100644 (file)
index 0000000..69164b1
--- /dev/null
@@ -0,0 +1,120 @@
+From c3e238bd1f56993f205ef83889d406dfeaf717a8 Mon Sep 17 00:00:00 2001
+From: Yongpeng Yang <yangyongpeng@xiaomi.com>
+Date: Wed, 18 Mar 2026 16:45:34 +0800
+Subject: f2fs: fix fsck inconsistency caused by FGGC of node block
+
+From: Yongpeng Yang <yangyongpeng@xiaomi.com>
+
+commit c3e238bd1f56993f205ef83889d406dfeaf717a8 upstream.
+
+During FGGC node block migration, fsck may incorrectly treat the
+migrated node block as fsync-written data.
+
+The reproduction scenario:
+root@vm:/mnt/f2fs# seq 1 2048 | xargs -n 1 ./test_sync // write inline inode and sync
+root@vm:/mnt/f2fs# rm -f 1
+root@vm:/mnt/f2fs# sync
+root@vm:/mnt/f2fs# f2fs_io gc_range // move data block in sync mode and not write CP
+  SPO, "fsck --dry-run" find inode has already checkpointed but still
+  with DENT_BIT_SHIFT set
+
+The root cause is that GC does not clear the dentry mark and fsync mark
+during node block migration, leading fsck to misinterpret them as
+user-issued fsync writes.
+
+In BGGC mode, node block migration is handled by f2fs_sync_node_pages(),
+which guarantees the dentry and fsync marks are cleared before writing.
+
+This patch move the set/clear of the fsync|dentry marks into
+__write_node_folio to make the logic clearer, and ensures the
+fsync|dentry mark is cleared in FGGC.
+
+Cc: stable@kernel.org
+Fixes: da011cc0da8c ("f2fs: move node pages only in victim section during GC")
+Signed-off-by: Yongpeng Yang <yangyongpeng@xiaomi.com>
+Reviewed-by: Chao Yu <chao@kernel.org>
+Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/f2fs/node.c |   27 +++++++++++++--------------
+ 1 file changed, 13 insertions(+), 14 deletions(-)
+
+--- a/fs/f2fs/node.c
++++ b/fs/f2fs/node.c
+@@ -1729,9 +1729,10 @@ continue_unlock:
+       return last_folio;
+ }
+-static bool __write_node_folio(struct folio *folio, bool atomic, bool *submitted,
+-                              struct writeback_control *wbc, bool do_balance,
+-                              enum iostat_type io_type, unsigned int *seq_id)
++static bool __write_node_folio(struct folio *folio, bool atomic, bool do_fsync,
++                              bool *submitted, struct writeback_control *wbc,
++                              bool do_balance, enum iostat_type io_type,
++                              unsigned int *seq_id)
+ {
+       struct f2fs_sb_info *sbi = F2FS_F_SB(folio);
+       nid_t nid;
+@@ -1804,6 +1805,8 @@ static bool __write_node_folio(struct fo
+       if (atomic && !test_opt(sbi, NOBARRIER))
+               fio.op_flags |= REQ_PREFLUSH | REQ_FUA;
++      set_dentry_mark(folio, false);
++      set_fsync_mark(folio, do_fsync);
+       if (IS_INODE(folio) && (atomic || is_fsync_dnode(folio)))
+               set_dentry_mark(folio,
+                               f2fs_need_dentry_mark(sbi, ino_of_node(folio)));
+@@ -1870,7 +1873,7 @@ int f2fs_write_single_node_folio(struct
+               goto out_folio;
+       }
+-      if (!__write_node_folio(node_folio, false, NULL,
++      if (!__write_node_folio(node_folio, false, false, NULL,
+                               &wbc, false, FS_GC_NODE_IO, NULL))
+               err = -EAGAIN;
+       goto release_folio;
+@@ -1917,6 +1920,7 @@ retry:
+               for (i = 0; i < nr_folios; i++) {
+                       struct folio *folio = fbatch.folios[i];
+                       bool submitted = false;
++                      bool do_fsync = false;
+                       if (unlikely(f2fs_cp_error(sbi))) {
+                               f2fs_folio_put(last_folio, false);
+@@ -1947,11 +1951,8 @@ continue_unlock:
+                       f2fs_folio_wait_writeback(folio, NODE, true, true);
+-                      set_fsync_mark(folio, 0);
+-                      set_dentry_mark(folio, 0);
+-
+                       if (!atomic || folio == last_folio) {
+-                              set_fsync_mark(folio, 1);
++                              do_fsync = true;
+                               percpu_counter_inc(&sbi->rf_node_block_count);
+                               if (IS_INODE(folio)) {
+                                       if (is_inode_flag_set(inode,
+@@ -1968,8 +1969,9 @@ continue_unlock:
+                       if (!__write_node_folio(folio, atomic &&
+                                               folio == last_folio,
+-                                              &submitted, wbc, true,
+-                                              FS_NODE_IO, seq_id)) {
++                                              do_fsync, &submitted,
++                                              wbc, true, FS_NODE_IO,
++                                              seq_id)) {
+                               f2fs_folio_put(last_folio, false);
+                               folio_batch_release(&fbatch);
+                               ret = -EIO;
+@@ -2169,10 +2171,7 @@ write_node:
+                       if (!folio_clear_dirty_for_io(folio))
+                               goto continue_unlock;
+-                      set_fsync_mark(folio, 0);
+-                      set_dentry_mark(folio, 0);
+-
+-                      if (!__write_node_folio(folio, false, &submitted,
++                      if (!__write_node_folio(folio, false, false, &submitted,
+                                       wbc, do_balance, io_type, NULL)) {
+                               folio_batch_release(&fbatch);
+                               ret = -EIO;
diff --git a/queue-7.0/f2fs-fix-fsck-inconsistency-caused-by-incorrect-nat_entry-flag-usage.patch b/queue-7.0/f2fs-fix-fsck-inconsistency-caused-by-incorrect-nat_entry-flag-usage.patch
new file mode 100644 (file)
index 0000000..039b6fd
--- /dev/null
@@ -0,0 +1,79 @@
+From 019f9dda7f66e55eb94cd32e1d3fff5835f73fbc Mon Sep 17 00:00:00 2001
+From: Yongpeng Yang <yangyongpeng@xiaomi.com>
+Date: Tue, 10 Mar 2026 17:36:12 +0800
+Subject: f2fs: fix fsck inconsistency caused by incorrect nat_entry flag usage
+
+From: Yongpeng Yang <yangyongpeng@xiaomi.com>
+
+commit 019f9dda7f66e55eb94cd32e1d3fff5835f73fbc upstream.
+
+f2fs_need_dentry_mark() reads nat_entry flags without mutual exclusion
+with the checkpoint path, which can result in an incorrect inode block
+marking state. The scenario is as follows:
+
+create & write & fsync 'file A'                 write checkpoint
+- f2fs_do_sync_file // inline inode
+ - f2fs_write_inode // inode folio is dirty
+                                                - f2fs_write_checkpoint
+                                                 - f2fs_flush_merged_writes
+                                                 - f2fs_sync_node_pages
+ - f2fs_fsync_node_pages // no dirty node
+ - f2fs_need_inode_block_update // return true
+ - f2fs_fsync_node_pages // inode dirtied
+  - f2fs_need_dentry_mark //return true
+                                                 - f2fs_flush_nat_entries
+                                                - f2fs_write_checkpoint end
+  - __write_node_folio // inode with DENT_BIT_SHIFT set
+  SPO, "fsck --dry-run" find inode has already checkpointed but still
+  with DENT_BIT_SHIFT set
+
+The state observed by f2fs_need_dentry_mark() can differ from the state
+observed in __write_node_folio() after acquiring sbi->node_write. The
+root cause is that the semantics of IS_CHECKPOINTED and
+HAS_FSYNCED_INODE are only guaranteed after the checkpoint write has
+fully completed.
+
+This patch moves set_dentry_mark() into __write_node_folio() and
+protects it with the sbi->node_write lock.
+
+Cc: stable@kernel.org
+Fixes: 88bd02c9472a ("f2fs: fix conditions to remain recovery information in f2fs_sync_file")
+Signed-off-by: Yongpeng Yang <yangyongpeng@xiaomi.com>
+Reviewed-by: Chao Yu <chao@kernel.org>
+Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/f2fs/node.c |   14 +++++---------
+ 1 file changed, 5 insertions(+), 9 deletions(-)
+
+--- a/fs/f2fs/node.c
++++ b/fs/f2fs/node.c
+@@ -1801,13 +1801,12 @@ static bool __write_node_folio(struct fo
+               goto redirty_out;
+       }
+-      if (atomic) {
+-              if (!test_opt(sbi, NOBARRIER))
+-                      fio.op_flags |= REQ_PREFLUSH | REQ_FUA;
+-              if (IS_INODE(folio))
+-                      set_dentry_mark(folio,
++      if (atomic && !test_opt(sbi, NOBARRIER))
++              fio.op_flags |= REQ_PREFLUSH | REQ_FUA;
++
++      if (IS_INODE(folio) && (atomic || is_fsync_dnode(folio)))
++              set_dentry_mark(folio,
+                               f2fs_need_dentry_mark(sbi, ino_of_node(folio)));
+-      }
+       /* should add to global list before clearing PAGECACHE status */
+       if (f2fs_in_warm_node_list(sbi, folio)) {
+@@ -1948,9 +1947,6 @@ continue_unlock:
+                                       if (is_inode_flag_set(inode,
+                                                               FI_DIRTY_INODE))
+                                               f2fs_update_inode(inode, folio);
+-                                      if (!atomic)
+-                                              set_dentry_mark(folio,
+-                                                      f2fs_need_dentry_mark(sbi, ino));
+                               }
+                               /* may be written by other thread */
+                               if (!folio_test_dirty(folio))
diff --git a/queue-7.0/f2fs-fix-incorrect-file-address-mapping-when-inline-inode-is-unwritten.patch b/queue-7.0/f2fs-fix-incorrect-file-address-mapping-when-inline-inode-is-unwritten.patch
new file mode 100644 (file)
index 0000000..7c43b54
--- /dev/null
@@ -0,0 +1,63 @@
+From 68a0178981a0f493295afa29f8880246e561494c Mon Sep 17 00:00:00 2001
+From: Yongpeng Yang <yangyongpeng@xiaomi.com>
+Date: Tue, 3 Feb 2026 21:36:35 +0800
+Subject: f2fs: fix incorrect file address mapping when inline inode is unwritten
+
+From: Yongpeng Yang <yangyongpeng@xiaomi.com>
+
+commit 68a0178981a0f493295afa29f8880246e561494c upstream.
+
+When `fileinfo->fi_flags` does not have the `FIEMAP_FLAG_SYNC` bit set
+and inline data has not been persisted yet, the physical address of the
+extent is calculated incorrectly for unwritten inline inodes.
+
+root@vm:/mnt/f2fs# dd if=/dev/zero of=data.3k bs=3k count=1
+root@vm:/mnt/f2fs# f2fs_io fiemap 0 100 data.3k
+Fiemap: offset = 0 len = 100
+       logical addr.    physical addr.   length           flags
+0      0000000000000000 00000ffffffff16c 0000000000000c00 00000301
+
+This patch fixes the issue by checking if the inode's address is valid.
+If the inline inode is unwritten, set the physical address to 0 and
+mark the extent with `FIEMAP_EXTENT_UNKNOWN | FIEMAP_EXTENT_DELALLOC`
+flags.
+
+Cc: stable@kernel.org
+Fixes: 67f8cf3cee6f ("f2fs: support fiemap for inline_data")
+Signed-off-by: Yongpeng Yang <yangyongpeng@xiaomi.com>
+Reviewed-by: Chao Yu <chao@kernel.org>
+Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/f2fs/inline.c |   13 +++++++++----
+ 1 file changed, 9 insertions(+), 4 deletions(-)
+
+--- a/fs/f2fs/inline.c
++++ b/fs/f2fs/inline.c
+@@ -792,7 +792,7 @@ int f2fs_read_inline_dir(struct file *fi
+ int f2fs_inline_data_fiemap(struct inode *inode,
+               struct fiemap_extent_info *fieinfo, __u64 start, __u64 len)
+ {
+-      __u64 byteaddr, ilen;
++      __u64 byteaddr = 0, ilen;
+       __u32 flags = FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_NOT_ALIGNED |
+               FIEMAP_EXTENT_LAST;
+       struct node_info ni;
+@@ -825,9 +825,14 @@ int f2fs_inline_data_fiemap(struct inode
+       if (err)
+               goto out;
+-      byteaddr = (__u64)ni.blk_addr << inode->i_sb->s_blocksize_bits;
+-      byteaddr += (char *)inline_data_addr(inode, ifolio) -
+-                                      (char *)F2FS_INODE(ifolio);
++      if (__is_valid_data_blkaddr(ni.blk_addr)) {
++              byteaddr = (__u64)ni.blk_addr << inode->i_sb->s_blocksize_bits;
++              byteaddr += (char *)inline_data_addr(inode, ifolio) -
++                                              (char *)F2FS_INODE(ifolio);
++      } else {
++              f2fs_bug_on(F2FS_I_SB(inode), ni.blk_addr != NEW_ADDR);
++              flags |= FIEMAP_EXTENT_DELALLOC | FIEMAP_EXTENT_UNKNOWN;
++      }
+       err = fiemap_fill_next_extent(fieinfo, start, byteaddr, ilen, flags);
+       trace_f2fs_fiemap(inode, start, byteaddr, ilen, flags, err);
+ out:
diff --git a/queue-7.0/f2fs-fix-incorrect-multidevice-info-in-trace_f2fs_map_blocks.patch b/queue-7.0/f2fs-fix-incorrect-multidevice-info-in-trace_f2fs_map_blocks.patch
new file mode 100644 (file)
index 0000000..86b95bb
--- /dev/null
@@ -0,0 +1,38 @@
+From eb2ca3ca983551a80e16a4a25df5a4ce59df8484 Mon Sep 17 00:00:00 2001
+From: Yongpeng Yang <yangyongpeng@xiaomi.com>
+Date: Mon, 23 Mar 2026 20:06:22 +0800
+Subject: f2fs: fix incorrect multidevice info in trace_f2fs_map_blocks()
+
+From: Yongpeng Yang <yangyongpeng@xiaomi.com>
+
+commit eb2ca3ca983551a80e16a4a25df5a4ce59df8484 upstream.
+
+When f2fs_map_blocks()->f2fs_map_blocks_cached() hits the read extent
+cache, map->m_multidev_dio is not updated, which leads to incorrect
+multidevice information being reported by trace_f2fs_map_blocks().
+
+This patch updates map->m_multidev_dio in f2fs_map_blocks_cached() when
+the read extent cache is hit.
+
+Cc: stable@kernel.org
+Fixes: 0094e98bd147 ("f2fs: factor a f2fs_map_blocks_cached helper")
+Signed-off-by: Yongpeng Yang <yangyongpeng@xiaomi.com>
+Reviewed-by: Chao Yu <chao@kernel.org>
+Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/f2fs/data.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/fs/f2fs/data.c
++++ b/fs/f2fs/data.c
+@@ -1576,7 +1576,8 @@ static bool f2fs_map_blocks_cached(struc
+               f2fs_wait_on_block_writeback_range(inode,
+                                       map->m_pblk, map->m_len);
+-      if (f2fs_allow_multi_device_dio(sbi, flag)) {
++      map->m_multidev_dio = f2fs_allow_multi_device_dio(sbi, flag);
++      if (map->m_multidev_dio) {
+               int bidx = f2fs_target_device_index(sbi, map->m_pblk);
+               struct f2fs_dev_info *dev = &sbi->devs[bidx];
diff --git a/queue-7.0/f2fs-fix-inline-data-not-being-written-to-disk-in-writeback-path.patch b/queue-7.0/f2fs-fix-inline-data-not-being-written-to-disk-in-writeback-path.patch
new file mode 100644 (file)
index 0000000..5d41102
--- /dev/null
@@ -0,0 +1,87 @@
+From fe9b8b30b97102859a9102be7bd2a09803bd90bd Mon Sep 17 00:00:00 2001
+From: Yongpeng Yang <yangyongpeng@xiaomi.com>
+Date: Wed, 18 Mar 2026 16:46:35 +0800
+Subject: f2fs: fix inline data not being written to disk in writeback path
+
+From: Yongpeng Yang <yangyongpeng@xiaomi.com>
+
+commit fe9b8b30b97102859a9102be7bd2a09803bd90bd upstream.
+
+When f2fs_fiemap() is called with `fileinfo->fi_flags` containing the
+FIEMAP_FLAG_SYNC flag, it attempts to write data to disk before
+retrieving file mappings via filemap_write_and_wait(). However, there is
+an issue where the file does not get mapped as expected. The following
+scenario can occur:
+
+root@vm:/mnt/f2fs# dd if=/dev/zero of=data.3k bs=3k count=1
+root@vm:/mnt/f2fs# xfs_io data.3k -c "fiemap -v 0 4096"
+data.3k:
+ EXT: FILE-OFFSET      BLOCK-RANGE      TOTAL FLAGS
+   0: [0..5]:          0..5                 6 0x307
+
+The root cause of this issue is that f2fs_write_single_data_page() only
+calls f2fs_write_inline_data() to copy data from the data folio to the
+inode folio, and it clears the dirty flag on the data folio. However, it
+does not mark the data folio as writeback. When
+__filemap_fdatawait_range() checks for folios with the writeback flag,
+it returns early, causing f2fs_fiemap() to report that the file has no
+mapping.
+
+To fix this issue, the solution is to call
+f2fs_write_single_node_folio() in f2fs_inline_data_fiemap() when
+getting fiemap with FIEMAP_FLAG_SYNC flags. This patch ensures that the
+inode folio is written back and the writeback process completes before
+proceeding.
+
+Cc: stable@kernel.org
+Fixes: 9ffe0fb5f3bb ("f2fs: handle inline data operations")
+Signed-off-by: Yongpeng Yang <yangyongpeng@xiaomi.com>
+Reviewed-by: Chao Yu <chao@kernel.org>
+Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/f2fs/f2fs.h   |    2 ++
+ fs/f2fs/inline.c |    9 +++++++++
+ fs/f2fs/node.c   |    2 +-
+ 3 files changed, 12 insertions(+), 1 deletion(-)
+
+--- a/fs/f2fs/f2fs.h
++++ b/fs/f2fs/f2fs.h
+@@ -3950,6 +3950,8 @@ int f2fs_sanity_check_node_footer(struct
+                                       enum node_type ntype, bool in_irq);
+ struct folio *f2fs_get_inode_folio(struct f2fs_sb_info *sbi, pgoff_t ino);
+ struct folio *f2fs_get_xnode_folio(struct f2fs_sb_info *sbi, pgoff_t xnid);
++int f2fs_write_single_node_folio(struct folio *node_folio, int sync_mode,
++                      bool mark_dirty, enum iostat_type io_type);
+ int f2fs_move_node_folio(struct folio *node_folio, int gc_type);
+ void f2fs_flush_inline_data(struct f2fs_sb_info *sbi);
+ int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
+--- a/fs/f2fs/inline.c
++++ b/fs/f2fs/inline.c
+@@ -814,6 +814,15 @@ int f2fs_inline_data_fiemap(struct inode
+               goto out;
+       }
++      if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC) {
++              err = f2fs_write_single_node_folio(ifolio, true, false, FS_NODE_IO);
++              if (err)
++                      return err;
++              ifolio = f2fs_get_inode_folio(F2FS_I_SB(inode), inode->i_ino);
++              if (IS_ERR(ifolio))
++                      return PTR_ERR(ifolio);
++              f2fs_folio_wait_writeback(ifolio, NODE, true, true);
++      }
+       ilen = min_t(size_t, MAX_INLINE_DATA(inode), i_size_read(inode));
+       if (start >= ilen)
+               goto out;
+--- a/fs/f2fs/node.c
++++ b/fs/f2fs/node.c
+@@ -1842,7 +1842,7 @@ redirty_out:
+       return false;
+ }
+-static int f2fs_write_single_node_folio(struct folio *node_folio, int sync_mode,
++int f2fs_write_single_node_folio(struct folio *node_folio, int sync_mode,
+                       bool mark_dirty, enum iostat_type io_type)
+ {
+       int err = 0;
diff --git a/queue-7.0/f2fs-fix-node_cnt-race-between-extent-node-destroy-and-writeback.patch b/queue-7.0/f2fs-fix-node_cnt-race-between-extent-node-destroy-and-writeback.patch
new file mode 100644 (file)
index 0000000..8e1c4b0
--- /dev/null
@@ -0,0 +1,93 @@
+From ed78aeebef05212ef7dca93bd931e4eff67c113f Mon Sep 17 00:00:00 2001
+From: Yongpeng Yang <yangyongpeng@xiaomi.com>
+Date: Fri, 3 Apr 2026 22:40:17 +0800
+Subject: f2fs: fix node_cnt race between extent node destroy and writeback
+
+From: Yongpeng Yang <yangyongpeng@xiaomi.com>
+
+commit ed78aeebef05212ef7dca93bd931e4eff67c113f upstream.
+
+f2fs_destroy_extent_node() does not set FI_NO_EXTENT before clearing
+extent nodes. When called from f2fs_drop_inode() with I_SYNC set,
+concurrent kworker writeback can insert new extent nodes into the same
+extent tree, racing with the destroy and triggering f2fs_bug_on() in
+__destroy_extent_node(). The scenario is as follows:
+
+drop inode                            writeback
+ - iput
+  - f2fs_drop_inode  // I_SYNC set
+   - f2fs_destroy_extent_node
+    - __destroy_extent_node
+     - while (node_cnt) {
+        write_lock(&et->lock)
+        __free_extent_tree
+        write_unlock(&et->lock)
+                                       - __writeback_single_inode
+                                        - f2fs_outplace_write_data
+                                         - f2fs_update_read_extent_cache
+                                          - __update_extent_tree_range
+                                           // FI_NO_EXTENT not set,
+                                           // insert new extent node
+       } // node_cnt == 0, exit while
+     - f2fs_bug_on(node_cnt)  // node_cnt > 0
+
+Additionally, __update_extent_tree_range() only checks FI_NO_EXTENT for
+EX_READ type, leaving EX_BLOCK_AGE updates completely unprotected.
+
+This patch set FI_NO_EXTENT under et->lock in __destroy_extent_node(),
+consistent with other callers (__update_extent_tree_range and
+__drop_extent_tree) and check FI_NO_EXTENT for both EX_READ and
+EX_BLOCK_AGE tree.
+
+Fixes: 3fc5d5a182f6 ("f2fs: fix to shrink read extent node in batches")
+Cc: stable@vger.kernel.org
+Signed-off-by: Yongpeng Yang <yangyongpeng@xiaomi.com>
+Reviewed-by: Chao Yu <chao@kernel.org>
+Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/f2fs/extent_cache.c |   17 ++++++++++-------
+ 1 file changed, 10 insertions(+), 7 deletions(-)
+
+--- a/fs/f2fs/extent_cache.c
++++ b/fs/f2fs/extent_cache.c
+@@ -119,9 +119,10 @@ static bool __may_extent_tree(struct ino
+       if (!__init_may_extent_tree(inode, type))
+               return false;
++      if (is_inode_flag_set(inode, FI_NO_EXTENT))
++              return false;
++
+       if (type == EX_READ) {
+-              if (is_inode_flag_set(inode, FI_NO_EXTENT))
+-                      return false;
+               if (is_inode_flag_set(inode, FI_COMPRESSED_FILE) &&
+                                !f2fs_sb_has_readonly(F2FS_I_SB(inode)))
+                       return false;
+@@ -644,6 +645,8 @@ static unsigned int __destroy_extent_nod
+       while (atomic_read(&et->node_cnt)) {
+               write_lock(&et->lock);
++              if (!is_inode_flag_set(inode, FI_NO_EXTENT))
++                      set_inode_flag(inode, FI_NO_EXTENT);
+               node_cnt += __free_extent_tree(sbi, et, nr_shrink);
+               write_unlock(&et->lock);
+       }
+@@ -688,12 +691,12 @@ static void __update_extent_tree_range(s
+       write_lock(&et->lock);
+-      if (type == EX_READ) {
+-              if (is_inode_flag_set(inode, FI_NO_EXTENT)) {
+-                      write_unlock(&et->lock);
+-                      return;
+-              }
++      if (is_inode_flag_set(inode, FI_NO_EXTENT)) {
++              write_unlock(&et->lock);
++              return;
++      }
++      if (type == EX_READ) {
+               prev = et->largest;
+               dei.len = 0;
diff --git a/queue-7.0/f2fs-fix-uninitialized-kobject-put-in-f2fs_init_sysfs.patch b/queue-7.0/f2fs-fix-uninitialized-kobject-put-in-f2fs_init_sysfs.patch
new file mode 100644 (file)
index 0000000..b272bd9
--- /dev/null
@@ -0,0 +1,62 @@
+From b635f2ecdb5ad34f9c967cabb704d6bed9382fd0 Mon Sep 17 00:00:00 2001
+From: Guangshuo Li <lgs201920130244@gmail.com>
+Date: Fri, 10 Apr 2026 20:47:26 +0800
+Subject: f2fs: fix uninitialized kobject put in f2fs_init_sysfs()
+
+From: Guangshuo Li <lgs201920130244@gmail.com>
+
+commit b635f2ecdb5ad34f9c967cabb704d6bed9382fd0 upstream.
+
+In f2fs_init_sysfs(), all failure paths after kset_register() jump to
+put_kobject, which unconditionally releases both f2fs_tune and
+f2fs_feat.
+
+If kobject_init_and_add(&f2fs_feat, ...) fails, f2fs_tune has not been
+initialized yet, so calling kobject_put(&f2fs_tune) is invalid.
+
+Fix this by splitting the unwind path so each error path only releases
+objects that were successfully initialized.
+
+Fixes: a907f3a68ee26ba4 ("f2fs: add a sysfs entry to reclaim POSIX_FADV_NOREUSE pages")
+Cc: stable@vger.kernel.org
+Signed-off-by: Guangshuo Li <lgs201920130244@gmail.com>
+Reviewed-by: Chao Yu <chao@kernel.org>
+Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/f2fs/sysfs.c |   10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+--- a/fs/f2fs/sysfs.c
++++ b/fs/f2fs/sysfs.c
+@@ -1984,24 +1984,26 @@ int __init f2fs_init_sysfs(void)
+       ret = kobject_init_and_add(&f2fs_feat, &f2fs_feat_ktype,
+                                  NULL, "features");
+       if (ret)
+-              goto put_kobject;
++              goto unregister_kset;
+       ret = kobject_init_and_add(&f2fs_tune, &f2fs_tune_ktype,
+                                  NULL, "tuning");
+       if (ret)
+-              goto put_kobject;
++              goto put_feat;
+       f2fs_proc_root = proc_mkdir("fs/f2fs", NULL);
+       if (!f2fs_proc_root) {
+               ret = -ENOMEM;
+-              goto put_kobject;
++              goto put_tune;
+       }
+       return 0;
+-put_kobject:
++put_tune:
+       kobject_put(&f2fs_tune);
++put_feat:
+       kobject_put(&f2fs_feat);
++unregister_kset:
+       kset_unregister(&f2fs_kset);
+       return ret;
+ }
diff --git a/queue-7.0/f2fs-refactor-f2fs_move_node_folio-function.patch b/queue-7.0/f2fs-refactor-f2fs_move_node_folio-function.patch
new file mode 100644 (file)
index 0000000..7382a97
--- /dev/null
@@ -0,0 +1,97 @@
+From 92c20989366e023b74fa0c1028af9436c1917dbf Mon Sep 17 00:00:00 2001
+From: Yongpeng Yang <yangyongpeng@xiaomi.com>
+Date: Wed, 18 Mar 2026 16:45:32 +0800
+Subject: f2fs: refactor f2fs_move_node_folio function
+
+From: Yongpeng Yang <yangyongpeng@xiaomi.com>
+
+commit 92c20989366e023b74fa0c1028af9436c1917dbf upstream.
+
+This patch refactor the f2fs_move_node_folio() function. No logical
+changes.
+
+Cc: stable@kernel.org
+Signed-off-by: Yongpeng Yang <yangyongpeng@xiaomi.com>
+Reviewed-by: Chao Yu <chao@kernel.org>
+Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/f2fs/node.c |   54 ++++++++++++++++++++++++++++++++----------------------
+ 1 file changed, 32 insertions(+), 22 deletions(-)
+
+--- a/fs/f2fs/node.c
++++ b/fs/f2fs/node.c
+@@ -1842,41 +1842,51 @@ redirty_out:
+       return false;
+ }
+-int f2fs_move_node_folio(struct folio *node_folio, int gc_type)
++static int f2fs_write_single_node_folio(struct folio *node_folio, int sync_mode,
++                      bool mark_dirty, enum iostat_type io_type)
+ {
+       int err = 0;
++      struct writeback_control wbc = {
++              .sync_mode = WB_SYNC_ALL,
++              .nr_to_write = 1,
++      };
+-      if (gc_type == FG_GC) {
+-              struct writeback_control wbc = {
+-                      .sync_mode = WB_SYNC_ALL,
+-                      .nr_to_write = 1,
+-              };
++      if (!sync_mode) {
++              /* set page dirty and write it */
++              if (!folio_test_writeback(node_folio))
++                      folio_mark_dirty(node_folio);
++              goto out_folio;
++      }
+-              f2fs_folio_wait_writeback(node_folio, NODE, true, true);
++      f2fs_folio_wait_writeback(node_folio, NODE, true, true);
++      if (mark_dirty)
+               folio_mark_dirty(node_folio);
++      else if (!folio_test_dirty(node_folio))
++              goto out_folio;
+-              if (!folio_clear_dirty_for_io(node_folio)) {
+-                      err = -EAGAIN;
+-                      goto out_page;
+-              }
+-
+-              if (!__write_node_folio(node_folio, false, NULL,
+-                                      &wbc, false, FS_GC_NODE_IO, NULL))
+-                      err = -EAGAIN;
+-              goto release_page;
+-      } else {
+-              /* set page dirty and write it */
+-              if (!folio_test_writeback(node_folio))
+-                      folio_mark_dirty(node_folio);
++      if (!folio_clear_dirty_for_io(node_folio)) {
++              err = -EAGAIN;
++              goto out_folio;
+       }
+-out_page:
++
++      if (!__write_node_folio(node_folio, false, NULL,
++                              &wbc, false, FS_GC_NODE_IO, NULL))
++              err = -EAGAIN;
++      goto release_folio;
++out_folio:
+       folio_unlock(node_folio);
+-release_page:
++release_folio:
+       f2fs_folio_put(node_folio, false);
+       return err;
+ }
++int f2fs_move_node_folio(struct folio *node_folio, int gc_type)
++{
++      return f2fs_write_single_node_folio(node_folio, gc_type == FG_GC,
++                      true, FS_GC_NODE_IO);
++}
++
+ int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
+                       struct writeback_control *wbc, bool atomic,
+                       unsigned int *seq_id)
diff --git a/queue-7.0/kvm-arm64-fix-feat_debugv8p9-to-check-debugver-not-pmuver.patch b/queue-7.0/kvm-arm64-fix-feat_debugv8p9-to-check-debugver-not-pmuver.patch
new file mode 100644 (file)
index 0000000..56bce66
--- /dev/null
@@ -0,0 +1,50 @@
+From 7fe2cd4e1a3ad230d8fcc00cc99c4bcce4412a75 Mon Sep 17 00:00:00 2001
+From: Fuad Tabba <tabba@google.com>
+Date: Fri, 24 Apr 2026 09:49:03 +0100
+Subject: KVM: arm64: Fix FEAT_Debugv8p9 to check DebugVer, not PMUVer
+
+From: Fuad Tabba <tabba@google.com>
+
+commit 7fe2cd4e1a3ad230d8fcc00cc99c4bcce4412a75 upstream.
+
+FEAT_Debugv8p9 is incorrectly defined against ID_AA64DFR0_EL1.PMUVer
+instead of ID_AA64DFR0_EL1.DebugVer.  All three consumers of the macro
+gate features that are architecturally tied to FEAT_Debugv8p9
+(DebugVer = 0b1011, DDI0487 M.b A2.2.10):
+
+  - HDFGRTR2_EL2.nMDSELR_EL1, HDFGWTR2_EL2.nMDSELR_EL1: MDSELR_EL1
+    is present only when FEAT_Debugv8p9 is implemented (D24.3.21).
+
+  - MDCR_EL2.EBWE: the Extended Breakpoint and Watchpoint Enable bit
+    is RES0 unless FEAT_Debugv8p9 is implemented (D24.3.17).
+
+Neither register has any dependency on PMUVer.
+
+FEAT_Debugv8p9 and FEAT_PMUv3p9 are independent.  Per DDI0487 M.b
+A2.2.10, FEAT_Debugv8p9 is unconditionally mandatory from Armv8.9,
+whereas FEAT_PMUv3p9 is mandatory only when FEAT_PMUv3 is implemented.
+An Armv8.9 CPU without a PMU has DebugVer = 0b1011 but PMUVer = 0b0000,
+so the wrong field check would cause KVM to incorrectly treat EBWE and
+MDSELR_EL1 as RES0 on such hardware.
+
+Fixes: 4bc0fe089840 ("KVM: arm64: Add sanitisation for FEAT_FGT2 registers")
+Signed-off-by: Fuad Tabba <tabba@google.com>
+Link: https://patch.msgid.link/20260424084908.370776-2-tabba@google.com
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/kvm/config.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/arm64/kvm/config.c
++++ b/arch/arm64/kvm/config.c
+@@ -191,7 +191,7 @@ struct reg_feat_map_desc {
+ #define FEAT_SRMASK           ID_AA64MMFR4_EL1, SRMASK, IMP
+ #define FEAT_PoPS             ID_AA64MMFR4_EL1, PoPS, IMP
+ #define FEAT_PFAR             ID_AA64PFR1_EL1, PFAR, IMP
+-#define FEAT_Debugv8p9                ID_AA64DFR0_EL1, PMUVer, V3P9
++#define FEAT_Debugv8p9                ID_AA64DFR0_EL1, DebugVer, V8P9
+ #define FEAT_PMUv3_SS         ID_AA64DFR0_EL1, PMSS, IMP
+ #define FEAT_SEBEP            ID_AA64DFR0_EL1, SEBEP, IMP
+ #define FEAT_EBEP             ID_AA64DFR1_EL1, EBEP, IMP
diff --git a/queue-7.0/kvm-arm64-fix-feat_spe_fne-to-use-pmsidr_el1.fne-not-pmsver.patch b/queue-7.0/kvm-arm64-fix-feat_spe_fne-to-use-pmsidr_el1.fne-not-pmsver.patch
new file mode 100644 (file)
index 0000000..515bec0
--- /dev/null
@@ -0,0 +1,77 @@
+From 08d715338287a1affb4c7ad5733decef4558a5c8 Mon Sep 17 00:00:00 2001
+From: Fuad Tabba <tabba@google.com>
+Date: Fri, 24 Apr 2026 09:49:05 +0100
+Subject: KVM: arm64: Fix FEAT_SPE_FnE to use PMSIDR_EL1.FnE, not PMSVer
+
+From: Fuad Tabba <tabba@google.com>
+
+commit 08d715338287a1affb4c7ad5733decef4558a5c8 upstream.
+
+FEAT_SPE_FnE is architecturally detected via PMSIDR_EL1.FnE [6], not
+ID_AA64DFR0_EL1.PMSVer. The FEAT_X macro form (register, field, value)
+cannot encode a PMSIDR_EL1-based feature, so FEAT_SPE_FnE was defined
+identically to FEAT_SPEv1p2 (ID_AA64DFR0_EL1, PMSVer, V1P2), producing
+a duplicate that used PMSVer >= V1P2 as a proxy.
+
+Replace the macro with feat_spe_fne(), following the same pattern as
+the sibling feat_spe_fds(): guard on FEAT_SPEv1p2 and read
+PMSIDR_EL1.FnE [6] directly. Wire the two NEEDS_FEAT consumers to use
+the new function.
+
+Remove the now-unused FEAT_SPE_FnE macro.
+
+Fixes: 63d423a7635b ("KVM: arm64: Switch to table-driven FGU configuration")
+Signed-off-by: Fuad Tabba <tabba@google.com>
+Link: https://patch.msgid.link/20260424084908.370776-4-tabba@google.com
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/kvm/config.c |   15 ++++++++++++---
+ 1 file changed, 12 insertions(+), 3 deletions(-)
+
+--- a/arch/arm64/kvm/config.c
++++ b/arch/arm64/kvm/config.c
+@@ -131,7 +131,6 @@ struct reg_feat_map_desc {
+       }
+ #define FEAT_SPE              ID_AA64DFR0_EL1, PMSVer, IMP
+-#define FEAT_SPE_FnE          ID_AA64DFR0_EL1, PMSVer, V1P2
+ #define FEAT_BRBE             ID_AA64DFR0_EL1, BRBE, IMP
+ #define FEAT_TRC_SR           ID_AA64DFR0_EL1, TraceVer, IMP
+ #define FEAT_PMUv3            ID_AA64DFR0_EL1, PMUVer, IMP
+@@ -301,6 +300,16 @@ static bool feat_spe_fds(struct kvm *kvm
+               (read_sysreg_s(SYS_PMSIDR_EL1) & PMSIDR_EL1_FDS));
+ }
++static bool feat_spe_fne(struct kvm *kvm)
++{
++      /*
++       * Revisit this if KVM ever supports SPE -- this really should
++       * look at the guest's view of PMSIDR_EL1.
++       */
++      return (kvm_has_feat(kvm, FEAT_SPEv1p2) &&
++              (read_sysreg_s(SYS_PMSIDR_EL1) & PMSIDR_EL1_FnE));
++}
++
+ static bool feat_trbe_mpam(struct kvm *kvm)
+ {
+       /*
+@@ -536,7 +545,7 @@ static const struct reg_bits_to_feat_map
+                  HDFGRTR_EL2_PMBPTR_EL1       |
+                  HDFGRTR_EL2_PMBLIMITR_EL1,
+                  FEAT_SPE),
+-      NEEDS_FEAT(HDFGRTR_EL2_nPMSNEVFR_EL1, FEAT_SPE_FnE),
++      NEEDS_FEAT(HDFGRTR_EL2_nPMSNEVFR_EL1, feat_spe_fne),
+       NEEDS_FEAT(HDFGRTR_EL2_nBRBDATA         |
+                  HDFGRTR_EL2_nBRBCTL          |
+                  HDFGRTR_EL2_nBRBIDR,
+@@ -604,7 +613,7 @@ static const struct reg_bits_to_feat_map
+                  HDFGWTR_EL2_PMBPTR_EL1       |
+                  HDFGWTR_EL2_PMBLIMITR_EL1,
+                  FEAT_SPE),
+-      NEEDS_FEAT(HDFGWTR_EL2_nPMSNEVFR_EL1, FEAT_SPE_FnE),
++      NEEDS_FEAT(HDFGWTR_EL2_nPMSNEVFR_EL1, feat_spe_fne),
+       NEEDS_FEAT(HDFGWTR_EL2_nBRBDATA         |
+                  HDFGWTR_EL2_nBRBCTL,
+                  FEAT_BRBE),
diff --git a/queue-7.0/kvm-arm64-fix-initialisation-order-in-__pkvm_init_finalise.patch b/queue-7.0/kvm-arm64-fix-initialisation-order-in-__pkvm_init_finalise.patch
new file mode 100644 (file)
index 0000000..1700c54
--- /dev/null
@@ -0,0 +1,56 @@
+From 5bb0aed57ba944f8c201e4e82ec066e0187e0f85 Mon Sep 17 00:00:00 2001
+From: Quentin Perret <qperret@google.com>
+Date: Fri, 24 Apr 2026 09:49:08 +0100
+Subject: KVM: arm64: Fix initialisation order in __pkvm_init_finalise()
+
+From: Quentin Perret <qperret@google.com>
+
+commit 5bb0aed57ba944f8c201e4e82ec066e0187e0f85 upstream.
+
+fix_host_ownership() walks the hypervisor's stage-1 page-table to
+adjust the host's stage-2 accordingly. Any such adjustment that
+requires cache maintenance operations depends on the per-CPU hyp
+fixmap being present. However, fix_host_ownership() is currently
+called before fix_hyp_pgtable_refcnt() and hyp_create_fixmap(), so
+the fixmap does not yet exist when it runs.
+
+This is benign today because the host stage-2 starts empty and no
+CMOs are needed, but it becomes a latent crash as soon as
+fix_host_ownership() is extended to operate on a non-empty
+page-table.
+
+Reorder the calls so that fix_hyp_pgtable_refcnt() and
+hyp_create_fixmap() complete before fix_host_ownership() is invoked.
+
+Fixes: 0d16d12eb26e ("KVM: arm64: Fix-up hyp stage-1 refcounts for all pages mapped at EL2")
+Signed-off-by: Quentin Perret <qperret@google.com>
+Signed-off-by: Fuad Tabba <tabba@google.com>
+Link: https://patch.msgid.link/20260424084908.370776-7-tabba@google.com
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/kvm/hyp/nvhe/setup.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/arch/arm64/kvm/hyp/nvhe/setup.c
++++ b/arch/arm64/kvm/hyp/nvhe/setup.c
+@@ -312,15 +312,15 @@ void __noreturn __pkvm_init_finalise(voi
+       };
+       pkvm_pgtable.mm_ops = &pkvm_pgtable_mm_ops;
+-      ret = fix_host_ownership();
++      ret = fix_hyp_pgtable_refcnt();
+       if (ret)
+               goto out;
+-      ret = fix_hyp_pgtable_refcnt();
++      ret = hyp_create_fixmap();
+       if (ret)
+               goto out;
+-      ret = hyp_create_fixmap();
++      ret = fix_host_ownership();
+       if (ret)
+               goto out;
diff --git a/queue-7.0/kvm-arm64-fix-pin-leak-and-publication-ordering-in-__pkvm_init_vcpu.patch b/queue-7.0/kvm-arm64-fix-pin-leak-and-publication-ordering-in-__pkvm_init_vcpu.patch
new file mode 100644 (file)
index 0000000..305a9ca
--- /dev/null
@@ -0,0 +1,113 @@
+From 73b9c1e5da84cd69b1a86e374e450817cd051371 Mon Sep 17 00:00:00 2001
+From: Fuad Tabba <tabba@google.com>
+Date: Fri, 24 Apr 2026 09:49:07 +0100
+Subject: KVM: arm64: Fix pin leak and publication ordering in __pkvm_init_vcpu()
+
+From: Fuad Tabba <tabba@google.com>
+
+commit 73b9c1e5da84cd69b1a86e374e450817cd051371 upstream.
+
+Two bugs exist in the vCPU initialisation path:
+
+1. If a check fails after hyp_pin_shared_mem() succeeds, the cleanup
+   path jumps to 'unlock' without calling unpin_host_vcpu() or
+   unpin_host_sve_state(), permanently leaking pin references on the
+   host vCPU and SVE state pages.
+
+   Extract a register_hyp_vcpu() helper that performs the checks and
+   the store. When register_hyp_vcpu() returns an error, call
+   unpin_host_vcpu() and unpin_host_sve_state() inline before falling
+   through to the existing 'unlock' label.
+
+2. register_hyp_vcpu() publishes the new vCPU pointer into
+   'hyp_vm->vcpus[]' with a bare store, allowing a concurrent caller
+   of pkvm_load_hyp_vcpu() to observe a partially initialised vCPU
+   object.
+
+   Ensure the store uses smp_store_release() and the load uses
+   smp_load_acquire(). While 'vm_table_lock' currently serialises the
+   store and the load, these barriers ensure the reader sees the fully
+   initialised 'hyp_vcpu' object even if there were a lockless path or
+   if the lock's own ordering guarantees were insufficient for nested
+   object initialization.
+
+Fixes: 49af6ddb8e5c ("KVM: arm64: Add infrastructure to create and track pKVM instances at EL2")
+Reported-by: Ben Simner <ben.simner@cl.cam.ac.uk>
+Co-developed-by: Will Deacon <willdeacon@google.com>
+Signed-off-by: Will Deacon <willdeacon@google.com>
+Signed-off-by: Fuad Tabba <tabba@google.com>
+Link: https://patch.msgid.link/20260424084908.370776-6-tabba@google.com
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/kvm/hyp/nvhe/pkvm.c |   38 +++++++++++++++++++++++++-------------
+ 1 file changed, 25 insertions(+), 13 deletions(-)
+
+--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
++++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
+@@ -258,7 +258,8 @@ struct pkvm_hyp_vcpu *pkvm_load_hyp_vcpu
+       if (!hyp_vm || hyp_vm->kvm.created_vcpus <= vcpu_idx)
+               goto unlock;
+-      hyp_vcpu = hyp_vm->vcpus[vcpu_idx];
++      /* Pairs with smp_store_release() in register_hyp_vcpu(). */
++      hyp_vcpu = smp_load_acquire(&hyp_vm->vcpus[vcpu_idx]);
+       if (!hyp_vcpu)
+               goto unlock;
+@@ -803,12 +804,30 @@ err_unpin_kvm:
+  *         the page-aligned size of 'struct pkvm_hyp_vcpu'.
+  * Return 0 on success, negative error code on failure.
+  */
++static int register_hyp_vcpu(struct pkvm_hyp_vm *hyp_vm,
++                            struct pkvm_hyp_vcpu *hyp_vcpu)
++{
++      unsigned int idx = hyp_vcpu->vcpu.vcpu_idx;
++
++      if (idx >= hyp_vm->kvm.created_vcpus)
++              return -EINVAL;
++
++      if (hyp_vm->vcpus[idx])
++              return -EINVAL;
++
++      /*
++       * Ensure the hyp_vcpu is initialised before publishing it to
++       * the vCPU-load path via 'hyp_vm->vcpus[]'.
++       */
++      smp_store_release(&hyp_vm->vcpus[idx], hyp_vcpu);
++      return 0;
++}
++
+ int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu,
+                    unsigned long vcpu_hva)
+ {
+       struct pkvm_hyp_vcpu *hyp_vcpu;
+       struct pkvm_hyp_vm *hyp_vm;
+-      unsigned int idx;
+       int ret;
+       hyp_vcpu = map_donated_memory(vcpu_hva, sizeof(*hyp_vcpu));
+@@ -827,18 +846,11 @@ int __pkvm_init_vcpu(pkvm_handle_t handl
+       if (ret)
+               goto unlock;
+-      idx = hyp_vcpu->vcpu.vcpu_idx;
+-      if (idx >= hyp_vm->kvm.created_vcpus) {
+-              ret = -EINVAL;
+-              goto unlock;
+-      }
+-
+-      if (hyp_vm->vcpus[idx]) {
+-              ret = -EINVAL;
+-              goto unlock;
++      ret = register_hyp_vcpu(hyp_vm, hyp_vcpu);
++      if (ret) {
++              unpin_host_vcpu(host_vcpu);
++              unpin_host_sve_state(hyp_vcpu);
+       }
+-
+-      hyp_vm->vcpus[idx] = hyp_vcpu;
+ unlock:
+       hyp_spin_unlock(&vm_table_lock);
diff --git a/queue-7.0/kvm-arm64-vgic-fix-iidr-revision-field-extracted-from-wrong-value.patch b/queue-7.0/kvm-arm64-vgic-fix-iidr-revision-field-extracted-from-wrong-value.patch
new file mode 100644 (file)
index 0000000..093bf8c
--- /dev/null
@@ -0,0 +1,54 @@
+From a0e6ae45af17e8b27958830595799c702ffbab8d Mon Sep 17 00:00:00 2001
+From: David Woodhouse <dwmw@amazon.co.uk>
+Date: Tue, 7 Apr 2026 21:27:02 +0100
+Subject: KVM: arm64: vgic: Fix IIDR revision field extracted from wrong value
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: David Woodhouse <dwmw@amazon.co.uk>
+
+commit a0e6ae45af17e8b27958830595799c702ffbab8d upstream.
+
+The uaccess write handlers for GICD_IIDR in both GICv2 and GICv3
+extract the revision field from 'reg' (the current IIDR value read back
+from the emulated distributor) instead of 'val' (the value userspace is
+trying to write). This means userspace can never actually change the
+implementation revision â€” the extracted value is always the current one.
+
+Fix the FIELD_GET to use 'val' so that userspace can select a different
+revision for migration compatibility.
+
+Fixes: 49a1a2c70a7f ("KVM: arm64: vgic-v3: Advertise GICR_CTLR.{IR, CES} as a new GICD_IIDR revision")
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Link: https://patch.msgid.link/20260407210949.2076251-2-dwmw2@infradead.org
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/kvm/vgic/vgic-mmio-v2.c |    2 +-
+ arch/arm64/kvm/vgic/vgic-mmio-v3.c |    2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/arm64/kvm/vgic/vgic-mmio-v2.c
++++ b/arch/arm64/kvm/vgic/vgic-mmio-v2.c
+@@ -91,7 +91,7 @@ static int vgic_mmio_uaccess_write_v2_mi
+                * migration from old kernels to new kernels with legacy
+                * userspace.
+                */
+-              reg = FIELD_GET(GICD_IIDR_REVISION_MASK, reg);
++              reg = FIELD_GET(GICD_IIDR_REVISION_MASK, val);
+               switch (reg) {
+               case KVM_VGIC_IMP_REV_2:
+               case KVM_VGIC_IMP_REV_3:
+--- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c
++++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
+@@ -194,7 +194,7 @@ static int vgic_mmio_uaccess_write_v3_mi
+               if ((reg ^ val) & ~GICD_IIDR_REVISION_MASK)
+                       return -EINVAL;
+-              reg = FIELD_GET(GICD_IIDR_REVISION_MASK, reg);
++              reg = FIELD_GET(GICD_IIDR_REVISION_MASK, val);
+               switch (reg) {
+               case KVM_VGIC_IMP_REV_2:
+               case KVM_VGIC_IMP_REV_3:
diff --git a/queue-7.0/kvm-arm64-wake-up-from-wfi-when-iqrchip-is-in-userspace.patch b/queue-7.0/kvm-arm64-wake-up-from-wfi-when-iqrchip-is-in-userspace.patch
new file mode 100644 (file)
index 0000000..f3a1359
--- /dev/null
@@ -0,0 +1,42 @@
+From 4ce98bf0865c349e7026ad9c14f48da264920953 Mon Sep 17 00:00:00 2001
+From: Marc Zyngier <maz@kernel.org>
+Date: Thu, 23 Apr 2026 17:36:07 +0100
+Subject: KVM: arm64: Wake-up from WFI when iqrchip is in userspace
+
+From: Marc Zyngier <maz@kernel.org>
+
+commit 4ce98bf0865c349e7026ad9c14f48da264920953 upstream.
+
+It appears that there is nothing in the wake-up path that
+evaluates whether the in-kernel interrupts are pending unless
+we have a vgic.
+
+This means that the userspace irqchip support has been broken for
+about four years, and nobody noticed. It was also broken before
+as we wouldn't wake-up on a PMU interrupt, but hey, who cares...
+
+It is probably time to remove the feature altogether, because it
+was a terrible idea 10 years ago, and it still is.
+
+Fixes: b57de4ffd7c6d ("KVM: arm64: Simplify kvm_cpu_has_pending_timer()")
+Link: https://patch.msgid.link/20260423163607.486345-1-maz@kernel.org
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/kvm/arm.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/arch/arm64/kvm/arm.c
++++ b/arch/arm64/kvm/arm.c
+@@ -805,6 +805,10 @@ int kvm_arch_vcpu_runnable(struct kvm_vc
+ {
+       bool irq_lines = *vcpu_hcr(v) & (HCR_VI | HCR_VF | HCR_VSE);
++      irq_lines |= (!irqchip_in_kernel(v->kvm) &&
++                    (kvm_timer_should_notify_user(v) ||
++                     kvm_pmu_should_notify_user(v)));
++
+       return ((irq_lines || kvm_vgic_vcpu_pending_irq(v))
+               && !kvm_arm_vcpu_stopped(v) && !v->arch.pause);
+ }
diff --git a/queue-7.0/loongarch-fix-potential-ade-in-loongson_gpu_fixup_dma_hang.patch b/queue-7.0/loongarch-fix-potential-ade-in-loongson_gpu_fixup_dma_hang.patch
new file mode 100644 (file)
index 0000000..b64333b
--- /dev/null
@@ -0,0 +1,103 @@
+From 8dfa2f8780e486d05b9a0ffce70b8f5fbd62053e Mon Sep 17 00:00:00 2001
+From: Wentao Guan <guanwentao@uniontech.com>
+Date: Mon, 4 May 2026 09:00:20 +0800
+Subject: LoongArch: Fix potential ADE in loongson_gpu_fixup_dma_hang()
+
+From: Wentao Guan <guanwentao@uniontech.com>
+
+commit 8dfa2f8780e486d05b9a0ffce70b8f5fbd62053e upstream.
+
+The switch case in loongson_gpu_fixup_dma_hang() may not DC2 or DC3, and
+readl(crtc_reg) will access with random address, because the "device" is
+from "base+PCI_DEVICE_ID", "base" is from "pdev->devfn+1". This is wrong
+when my platform inserts a discrete GPU:
+
+lspci -tv
+-[0000:00]-+-00.0  Loongson Technology LLC Hyper Transport Bridge Controller
+...
+           +-06.0  Loongson Technology LLC LG100 GPU
+           +-06.2  Loongson Technology LLC Device 7a37
+...
+
+Add a default switch case to fix the panic as below:
+
+ Kernel ade access[#1]:
+ CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.6.136-loong64-desktop-hwe+ #4
+ pc 90000000017e5534 ra 90000000017e54c0 tp 90000001002f8000 sp 90000001002fb6c0
+ a0 80000efe00003100 a1 0000000000003100 a2 0000000000000000 a3 0000000000000002
+ a4 90000001002fb6b4 a5 900000087cdb58fd a6 90000000027af000 a7 0000000000000001
+ t0 00000000000085b9 t1 000000000000ffff t2 0000000000000000 t3 0000000000000000
+ t4 fffffffffffffffd t5 00000000fffb6d9c t6 0000000000083b00 t7 00000000000070c0
+ t8 900000087cdb4d94 u0 900000087cdb58fd s9 90000001002fb826 s0 90000000031c12c8
+ s1 7fffffffffffff00 s2 90000000031c12d0 s3 0000000000002710 s4 0000000000000000
+ s5 0000000000000000 s6 9000000100053000 s7 7fffffffffffff00 s8 90000000030d4000
+    ra: 90000000017e54c0 loongson_gpu_fixup_dma_hang+0x40/0x210
+   ERA: 90000000017e5534 loongson_gpu_fixup_dma_hang+0xb4/0x210
+  CRMD: 000000b0 (PLV0 -IE -DA +PG DACF=CC DACM=CC -WE)
+  PRMD: 00000004 (PPLV0 +PIE -PWE)
+  EUEN: 00000000 (-FPE -SXE -ASXE -BTE)
+  ECFG: 00071c1d (LIE=0,2-4,10-12 VS=7)
+ ESTAT: 00480000 [ADEM] (IS= ECode=8 EsubCode=1)
+  BADV: 7fffffffffffff00
+  PRID: 0014d000 (Loongson-64bit, Loongson-3A6000-HV)
+ Modules linked in:
+ Process swapper/0 (pid: 1, threadinfo=(____ptrval____), task=(____ptrval____))
+ Stack : 0000000000000006 90000001002fb778 90000001002fb704 0000000000000007
+         0000000016a65700 90000000017e5690 000000000000ffff ffffffffffffffff
+         900000000209f7c0 9000000100053000 900000000209f7a8 9000000000eebc08
+         0000000000000000 0000000000000000 0000000000000006 90000001002fb778
+         90000001000530b8 90000000027af000 0000000000000000 9000000100054000
+         9000000100053000 9000000000ebb70c 9000000100004c00 9000000004000001
+         90000001002fb7e4 bae765461f31cb12 0000000000000000 0000000000000000
+         0000000000000006 90000000027af000 0000000000000030 90000000027af000
+         900000087cd6f800 9000000100053000 0000000000000000 9000000000ebc560
+         7a2500147cdaf720 bae765461f31cb12 0000000000000001 0000000000000030
+         ...
+ Call Trace:
+ [<90000000017e5534>] loongson_gpu_fixup_dma_hang+0xb4/0x210
+ [<9000000000eebc08>] pci_fixup_device+0x108/0x280
+ [<9000000000ebb70c>] pci_setup_device+0x24c/0x690
+ [<9000000000ebc560>] pci_scan_single_device+0xe0/0x140
+ [<9000000000ebc684>] pci_scan_slot+0xc4/0x280
+ [<9000000000ebdd00>] pci_scan_child_bus_extend+0x60/0x3f0
+ [<9000000000f5bc94>] acpi_pci_root_create+0x2b4/0x420
+ [<90000000017e5e74>] pci_acpi_scan_root+0x2d4/0x440
+ [<9000000000f5b02c>] acpi_pci_root_add+0x21c/0x3a0
+ [<9000000000f4ee54>] acpi_bus_attach+0x1a4/0x3c0
+ [<90000000010e200c>] device_for_each_child+0x6c/0xe0
+ [<9000000000f4bbf4>] acpi_dev_for_each_child+0x44/0x70
+ [<9000000000f4ef40>] acpi_bus_attach+0x290/0x3c0
+ [<90000000010e200c>] device_for_each_child+0x6c/0xe0
+ [<9000000000f4bbf4>] acpi_dev_for_each_child+0x44/0x70
+ [<9000000000f4ef40>] acpi_bus_attach+0x290/0x3c0
+ [<9000000000f5211c>] acpi_bus_scan+0x6c/0x280
+ [<900000000189c028>] acpi_scan_init+0x194/0x310
+ [<900000000189bc6c>] acpi_init+0xcc/0x140
+ [<9000000000220cdc>] do_one_initcall+0x4c/0x310
+ [<90000000018618fc>] kernel_init_freeable+0x258/0x2d4
+ [<900000000184326c>] kernel_init+0x28/0x13c
+ [<9000000000222008>] ret_from_kernel_thread+0xc/0xa4
+
+Cc: stable@vger.kernel.org
+Fixes: 95db0c9f526d ("LoongArch: Workaround LS2K/LS7A GPU DMA hang bug")
+Link: https://gist.github.com/opsiff/ebf2dac51b4013d22462f2124c55f807
+Link: https://gist.github.com/opsiff/a62f2a73db0492b3c49bf223a339b133
+Signed-off-by: Wentao Guan <guanwentao@uniontech.com>
+Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/loongarch/pci/pci.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/arch/loongarch/pci/pci.c
++++ b/arch/loongarch/pci/pci.c
+@@ -132,6 +132,9 @@ static void loongson_gpu_fixup_dma_hang(
+               crtc_reg = regbase;
+               crtc_offset = 0x400;
+               break;
++      default:
++              iounmap(regbase);
++              return;
+       }
+       for (i = 0; i < CRTC_NUM_MAX; i++, crtc_reg += crtc_offset) {
diff --git a/queue-7.0/loongarch-kvm-cap-kvm_cap_nr_vcpus-by-kvm_cap_max_vcpus.patch b/queue-7.0/loongarch-kvm-cap-kvm_cap_nr_vcpus-by-kvm_cap_max_vcpus.patch
new file mode 100644 (file)
index 0000000..821d114
--- /dev/null
@@ -0,0 +1,35 @@
+From b3e31a6650d4cab63f0814c37c0b360372c6ee9e Mon Sep 17 00:00:00 2001
+From: Qiang Ma <maqianga@uniontech.com>
+Date: Mon, 4 May 2026 09:00:37 +0800
+Subject: LoongArch: KVM: Cap KVM_CAP_NR_VCPUS by KVM_CAP_MAX_VCPUS
+
+From: Qiang Ma <maqianga@uniontech.com>
+
+commit b3e31a6650d4cab63f0814c37c0b360372c6ee9e upstream.
+
+It doesn't make sense to return the recommended maximum number of vCPUs
+which exceeds the maximum possible number of vCPUs.
+
+Other architectures have already done this, such as commit 57a2e13ebdda
+("KVM: MIPS: Cap KVM_CAP_NR_VCPUS by KVM_CAP_MAX_VCPUS")
+
+Cc: stable@vger.kernel.org
+Reviewed-by: Bibo Mao <maobibo@loongson.cn>
+Signed-off-by: Qiang Ma <maqianga@uniontech.com>
+Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/loongarch/kvm/vm.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/loongarch/kvm/vm.c
++++ b/arch/loongarch/kvm/vm.c
+@@ -125,7 +125,7 @@ int kvm_vm_ioctl_check_extension(struct
+               r = 1;
+               break;
+       case KVM_CAP_NR_VCPUS:
+-              r = num_online_cpus();
++              r = min_t(unsigned int, num_online_cpus(), KVM_MAX_VCPUS);
+               break;
+       case KVM_CAP_MAX_VCPUS:
+               r = KVM_MAX_VCPUS;
diff --git a/queue-7.0/loongarch-kvm-fix-hw-timer-interrupt-lost-when-inject-interrupt-by-software.patch b/queue-7.0/loongarch-kvm-fix-hw-timer-interrupt-lost-when-inject-interrupt-by-software.patch
new file mode 100644 (file)
index 0000000..148bb85
--- /dev/null
@@ -0,0 +1,74 @@
+From 2433f3f5724b3af569d9fb411ba728629524738b Mon Sep 17 00:00:00 2001
+From: Bibo Mao <maobibo@loongson.cn>
+Date: Mon, 4 May 2026 09:00:48 +0800
+Subject: LoongArch: KVM: Fix HW timer interrupt lost when inject interrupt by software
+
+From: Bibo Mao <maobibo@loongson.cn>
+
+commit 2433f3f5724b3af569d9fb411ba728629524738b upstream.
+
+With passthrough HW timer, timer interrupt is injected by HW. When
+inject emulated CPU interrupt by software such SIP0/SIP1/IPI, HW timer
+interrupt may be lost.
+
+Here check whether there is timer tick value inversion before and after
+injecting emulated CPU interrupt by software, timer enabling by reading
+timer cfg register is skipped. If the timer tick value is detected with
+changing, then timer should be enabled. And inject a timer interrupt by
+software if there is.
+
+Cc: <stable@vger.kernel.org>
+Fixes: f45ad5b8aa93 ("LoongArch: KVM: Implement vcpu interrupt operations").
+Signed-off-by: Bibo Mao <maobibo@loongson.cn>
+Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/loongarch/kvm/interrupt.c |   14 ++++++++++++++
+ 1 file changed, 14 insertions(+)
+
+--- a/arch/loongarch/kvm/interrupt.c
++++ b/arch/loongarch/kvm/interrupt.c
+@@ -27,6 +27,7 @@ static unsigned int priority_to_irq[EXCC
+ static int kvm_irq_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
+ {
+       unsigned int irq = 0;
++      unsigned long old, new;
+       clear_bit(priority, &vcpu->arch.irq_pending);
+       if (priority < EXCCODE_INT_NUM)
+@@ -42,7 +43,13 @@ static int kvm_irq_deliver(struct kvm_vc
+       case INT_IPI:
+       case INT_SWI0:
+       case INT_SWI1:
++              old = kvm_read_hw_gcsr(LOONGARCH_CSR_TVAL);
+               set_gcsr_estat(irq);
++              new = kvm_read_hw_gcsr(LOONGARCH_CSR_TVAL);
++
++              /* Inject TI if TVAL inverted */
++              if (new > old)
++                      set_gcsr_estat(CPU_TIMER);
+               break;
+       case INT_HWI0 ... INT_HWI7:
+@@ -59,6 +66,7 @@ static int kvm_irq_deliver(struct kvm_vc
+ static int kvm_irq_clear(struct kvm_vcpu *vcpu, unsigned int priority)
+ {
+       unsigned int irq = 0;
++      unsigned long old, new;
+       clear_bit(priority, &vcpu->arch.irq_clear);
+       if (priority < EXCCODE_INT_NUM)
+@@ -74,7 +82,13 @@ static int kvm_irq_clear(struct kvm_vcpu
+       case INT_IPI:
+       case INT_SWI0:
+       case INT_SWI1:
++              old = kvm_read_hw_gcsr(LOONGARCH_CSR_TVAL);
+               clear_gcsr_estat(irq);
++              new = kvm_read_hw_gcsr(LOONGARCH_CSR_TVAL);
++
++              /* Inject TI if TVAL inverted */
++              if (new > old)
++                      set_gcsr_estat(CPU_TIMER);
+               break;
+       case INT_HWI0 ... INT_HWI7:
diff --git a/queue-7.0/loongarch-kvm-fix-unreliable-stack-for-kvm_exc_entry.patch b/queue-7.0/loongarch-kvm-fix-unreliable-stack-for-kvm_exc_entry.patch
new file mode 100644 (file)
index 0000000..7cd78e9
--- /dev/null
@@ -0,0 +1,34 @@
+From b323a441da602dfdfc24f30d3190cac786ffebf2 Mon Sep 17 00:00:00 2001
+From: Xianglai Li <lixianglai@loongson.cn>
+Date: Mon, 4 May 2026 09:00:37 +0800
+Subject: LoongArch: KVM: Fix "unreliable stack" for kvm_exc_entry
+
+From: Xianglai Li <lixianglai@loongson.cn>
+
+commit b323a441da602dfdfc24f30d3190cac786ffebf2 upstream.
+
+Insert the appropriate UNWIND hint into the kvm_exc_entry assembly
+function to guide the generation of correct ORC table entries, thereby
+solving the timeout problem ("unreliable stack") while loading the
+livepatch-sample module on a physical machine running virtual machines
+with multiple vcpus.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Xianglai Li <lixianglai@loongson.cn>
+Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/loongarch/kvm/switch.S |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/loongarch/kvm/switch.S
++++ b/arch/loongarch/kvm/switch.S
+@@ -111,7 +111,7 @@
+       .p2align PAGE_SHIFT
+       .cfi_sections   .debug_frame
+ SYM_CODE_START(kvm_exc_entry)
+-      UNWIND_HINT_UNDEFINED
++      UNWIND_HINT_END_OF_STACK
+       csrwr   a2,   KVM_TEMP_KS
+       csrrd   a2,   KVM_VCPU_KS
+       addi.d  a2,   a2, KVM_VCPU_ARCH
diff --git a/queue-7.0/loongarch-kvm-move-unconditional-delay-into-timer-clear-scenery.patch b/queue-7.0/loongarch-kvm-move-unconditional-delay-into-timer-clear-scenery.patch
new file mode 100644 (file)
index 0000000..bc40286
--- /dev/null
@@ -0,0 +1,59 @@
+From 5a873d77ba792410a796595a917be6a440f9b7d2 Mon Sep 17 00:00:00 2001
+From: Bibo Mao <maobibo@loongson.cn>
+Date: Mon, 4 May 2026 09:00:48 +0800
+Subject: LoongArch: KVM: Move unconditional delay into timer clear scenery
+
+From: Bibo Mao <maobibo@loongson.cn>
+
+commit 5a873d77ba792410a796595a917be6a440f9b7d2 upstream.
+
+When timer interrupt arrives in guest kernel, guest kernel clears the
+timer interrupt and program timer with the next incoming event.
+
+During this stage, timer tick is -1 and timer interrupt status is
+disabled in ESTAT register. KVM hypervisor need write zero with timer
+tick register and wait timer interrupt injection from HW side, and
+then clear timer interrupt.
+
+So there is 2 cycle delay in KVM hypervisor to emulate such scenery,
+and the delay is unnecessary if there is no need to clear the timer
+interrupt.
+
+Here move 2 cycle delay into timer clear scenery and add timer ESTAT
+checking after delay, and set max timer expire value if timer interrupt
+does not arrive still.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Bibo Mao <maobibo@loongson.cn>
+Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/loongarch/kvm/timer.c |   10 ++++++++--
+ 1 file changed, 8 insertions(+), 2 deletions(-)
+
+--- a/arch/loongarch/kvm/timer.c
++++ b/arch/loongarch/kvm/timer.c
+@@ -96,15 +96,21 @@ void kvm_restore_timer(struct kvm_vcpu *
+                * and set CSR TVAL with -1
+                */
+               write_gcsr_timertick(0);
+-              __delay(2); /* Wait cycles until timer interrupt injected */
+               /*
+                * Writing CSR_TINTCLR_TI to LOONGARCH_CSR_TINTCLR will clear
+                * timer interrupt, and CSR TVAL keeps unchanged with -1, it
+                * avoids spurious timer interrupt
+                */
+-              if (!(estat & CPU_TIMER))
++              if (!(estat & CPU_TIMER)) {
++                      __delay(2); /* Wait cycles until timer interrupt injected */
++
++                      /* Write TVAL with max value if no TI shot */
++                      estat = kvm_read_hw_gcsr(LOONGARCH_CSR_ESTAT);
++                      if (!(estat & CPU_TIMER))
++                              write_gcsr_timertick(CSR_TCFG_VAL);
+                       gcsr_write(CSR_TINTCLR_TI, LOONGARCH_CSR_TINTCLR);
++              }
+               return;
+       }
diff --git a/queue-7.0/loongarch-kvm-use-kvm_set_pte-in-kvm_flush_pte.patch b/queue-7.0/loongarch-kvm-use-kvm_set_pte-in-kvm_flush_pte.patch
new file mode 100644 (file)
index 0000000..9e56f56
--- /dev/null
@@ -0,0 +1,35 @@
+From 81e18777d61440511451866c7c80b34a8bdd6b33 Mon Sep 17 00:00:00 2001
+From: Tao Cui <cuitao@kylinos.cn>
+Date: Mon, 4 May 2026 09:00:38 +0800
+Subject: LoongArch: KVM: Use kvm_set_pte() in kvm_flush_pte()
+
+From: Tao Cui <cuitao@kylinos.cn>
+
+commit 81e18777d61440511451866c7c80b34a8bdd6b33 upstream.
+
+kvm_flush_pte() is the only caller that directly assigns *pte instead
+of using the kvm_set_pte() wrapper. Use the wrapper for consistency with
+the rest of the file.
+
+No functional change intended.
+
+Cc: stable@vger.kernel.org
+Reviewed-by: Bibo Mao <maobibo@loongson.cn>
+Signed-off-by: Tao Cui <cuitao@kylinos.cn>
+Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/loongarch/kvm/mmu.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/loongarch/kvm/mmu.c
++++ b/arch/loongarch/kvm/mmu.c
+@@ -95,7 +95,7 @@ static int kvm_flush_pte(kvm_pte_t *pte,
+       else
+               kvm->stat.pages--;
+-      *pte = ctx->invalid_entry;
++      kvm_set_pte(pte, ctx->invalid_entry);
+       return 1;
+ }
diff --git a/queue-7.0/loongarch-use-per-root-bridge-pcih-flag-to-skip-mem-resource-fixup.patch b/queue-7.0/loongarch-use-per-root-bridge-pcih-flag-to-skip-mem-resource-fixup.patch
new file mode 100644 (file)
index 0000000..2ed5a06
--- /dev/null
@@ -0,0 +1,58 @@
+From 49f33840dcc907d21313d369e34872880846b61c Mon Sep 17 00:00:00 2001
+From: Huacai Chen <chenhuacai@loongson.cn>
+Date: Mon, 4 May 2026 09:00:20 +0800
+Subject: LoongArch: Use per-root-bridge PCIH flag to skip mem resource fixup
+
+From: Huacai Chen <chenhuacai@loongson.cn>
+
+commit 49f33840dcc907d21313d369e34872880846b61c upstream.
+
+When firmware enables 64-bit PCI host bridge support, some root bridges
+already provide valid 64-bit mem resource windows through ACPI.
+
+In this case, the LoongArch-specific mem resource high-bits fixup in
+acpi_prepare_root_resources() should not be applied unconditionally.
+Otherwise, the kernel may override the native resource layout derived
+from firmware, and later BAR assignment can fail to place device BARs
+into the intended 64-bit address space correctly.
+
+Add a per-root-bridge ACPI flag, PCIH, and evaluate it from the current
+root bridge device scope. When PCIH is set, skip the mem resource high-
+bits fixup path and let the kernel use the firmware-provided resource
+description directly. When PCIH is absent or cleared, keep the existing
+behavior and continue filling the high address bits from the host bridge
+address.
+
+This makes the behavior per-root-bridge configurable and avoids breaking
+valid 64-bit BAR space allocation on bridges whose 64-bit windows have
+already been fully described by firmware.
+
+Cc: stable@vger.kernel.org
+Suggested-by: Chao Li <lichao@loongson.cn>
+Tested-by: Dongyan Qian <qiandongyan@loongson.cn>
+Signed-off-by: Dongyan Qian <qiandongyan@loongson.cn>
+Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/loongarch/pci/acpi.c |    5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/arch/loongarch/pci/acpi.c
++++ b/arch/loongarch/pci/acpi.c
+@@ -61,11 +61,16 @@ static void acpi_release_root_info(struc
+ static int acpi_prepare_root_resources(struct acpi_pci_root_info *ci)
+ {
+       int status;
++      unsigned long long pci_h = 0;
+       struct resource_entry *entry, *tmp;
+       struct acpi_device *device = ci->bridge;
+       status = acpi_pci_probe_root_resources(ci);
+       if (status > 0) {
++              acpi_evaluate_integer(device->handle, "PCIH", NULL, &pci_h);
++              if (pci_h)
++                      return status;
++
+               resource_list_for_each_entry_safe(entry, tmp, &ci->resources) {
+                       if (entry->res->flags & IORESOURCE_MEM) {
+                               entry->offset = ci->root->mcfg_addr & GENMASK_ULL(63, 40);
diff --git a/queue-7.0/mptcp-fastclose-msk-when-linger-time-is-0.patch b/queue-7.0/mptcp-fastclose-msk-when-linger-time-is-0.patch
new file mode 100644 (file)
index 0000000..303bd7a
--- /dev/null
@@ -0,0 +1,51 @@
+From f14d6e9c3678a067f304abba561e0c5446c7e845 Mon Sep 17 00:00:00 2001
+From: "Matthieu Baerts (NGI0)" <matttbe@kernel.org>
+Date: Mon, 27 Apr 2026 21:54:35 +0200
+Subject: mptcp: fastclose msk when linger time is 0
+
+From: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+
+commit f14d6e9c3678a067f304abba561e0c5446c7e845 upstream.
+
+The SO_LINGER socket option has been supported for a while with MPTCP
+sockets [1], but it didn't cause the equivalent of a TCP reset as
+expected when enabled and its time was set to 0. This was causing some
+behavioural differences with TCP where some connections were not
+promptly stopped as expected.
+
+To fix that, an extra condition is checked at close() time before
+sending an MP_FASTCLOSE, the MPTCP equivalent of a TCP reset.
+
+Note that backporting up to [1] will be difficult as more changes are
+needed to be able to send MP_FASTCLOSE. It seems better to stop at [2],
+which was supposed to already imitate TCP.
+
+Validated with MPTCP packetdrill tests [3].
+
+Fixes: 268b12387460 ("mptcp: setsockopt: support SO_LINGER") [1]
+Fixes: d21f83485518 ("mptcp: use fastclose on more edge scenarios") [2]
+Cc: stable@vger.kernel.org
+Reported-by: Lance Tuller <lance@lance0.com>
+Closes: https://github.com/lance0/xfr/pull/67
+Link: https://github.com/multipath-tcp/packetdrill/pull/196 [3]
+Reviewed-by: Mat Martineau <martineau@kernel.org>
+Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Link: https://patch.msgid.link/20260427-net-mptcp-misc-fixes-7-1-rc2-v1-3-7432b7f279fa@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/mptcp/protocol.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/net/mptcp/protocol.c
++++ b/net/mptcp/protocol.c
+@@ -3279,7 +3279,8 @@ bool __mptcp_close(struct sock *sk, long
+               goto cleanup;
+       }
+-      if (mptcp_data_avail(msk) || timeout < 0) {
++      if (mptcp_data_avail(msk) || timeout < 0 ||
++          (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime)) {
+               /* If the msk has read data, or the caller explicitly ask it,
+                * do the MPTCP equivalent of TCP reset, aka MPTCP fastclose
+                */
diff --git a/queue-7.0/mptcp-fix-rx-timestamp-corruption-on-fastopen.patch b/queue-7.0/mptcp-fix-rx-timestamp-corruption-on-fastopen.patch
new file mode 100644 (file)
index 0000000..8f4cc20
--- /dev/null
@@ -0,0 +1,50 @@
+From 6254a16d6f0c672e3809ca5d7c9a28a55d71f764 Mon Sep 17 00:00:00 2001
+From: Paolo Abeni <pabeni@redhat.com>
+Date: Fri, 1 May 2026 21:35:36 +0200
+Subject: mptcp: fix rx timestamp corruption on fastopen
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+commit 6254a16d6f0c672e3809ca5d7c9a28a55d71f764 upstream.
+
+The skb cb offset containing the timestamp presence flag is cleared
+before loading such information. Cache such value before MPTCP CB
+initialization.
+
+Fixes: 36b122baf6a8 ("mptcp: add subflow_v(4,6)_send_synack()")
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Reviewed-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Link: https://patch.msgid.link/20260501-net-mptcp-misc-fixes-7-1-rc3-v1-3-b70118df778e@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/mptcp/fastopen.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/net/mptcp/fastopen.c
++++ b/net/mptcp/fastopen.c
+@@ -12,6 +12,7 @@ void mptcp_fastopen_subflow_synack_set_p
+       struct sock *sk, *ssk;
+       struct sk_buff *skb;
+       struct tcp_sock *tp;
++      bool has_rxtstamp;
+       /* on early fallback the subflow context is deleted by
+        * subflow_syn_recv_sock()
+@@ -40,12 +41,13 @@ void mptcp_fastopen_subflow_synack_set_p
+        */
+       tp->copied_seq += skb->len;
+       subflow->ssn_offset += skb->len;
++      has_rxtstamp = TCP_SKB_CB(skb)->has_rxtstamp;
+       /* Only the sequence delta is relevant */
+       MPTCP_SKB_CB(skb)->map_seq = -skb->len;
+       MPTCP_SKB_CB(skb)->end_seq = 0;
+       MPTCP_SKB_CB(skb)->offset = 0;
+-      MPTCP_SKB_CB(skb)->has_rxtstamp = TCP_SKB_CB(skb)->has_rxtstamp;
++      MPTCP_SKB_CB(skb)->has_rxtstamp = has_rxtstamp;
+       MPTCP_SKB_CB(skb)->cant_coalesce = 1;
+       mptcp_data_lock(sk);
diff --git a/queue-7.0/mptcp-fix-scheduling-with-atomic-in-timestamp-sockopt.patch b/queue-7.0/mptcp-fix-scheduling-with-atomic-in-timestamp-sockopt.patch
new file mode 100644 (file)
index 0000000..9d1a25b
--- /dev/null
@@ -0,0 +1,57 @@
+From b5c52908d52c6c8eb8933264aa6087a0600fd892 Mon Sep 17 00:00:00 2001
+From: Gang Yan <yangang@kylinos.cn>
+Date: Mon, 27 Apr 2026 21:54:34 +0200
+Subject: mptcp: fix scheduling with atomic in timestamp sockopt
+
+From: Gang Yan <yangang@kylinos.cn>
+
+commit b5c52908d52c6c8eb8933264aa6087a0600fd892 upstream.
+
+Using lock_sock_fast() (atomic context) around sock_set_timestamp()
+and sock_set_timestamping() is unsafe, as both helpers can sleep.
+
+Replace lock_sock_fast() with sleepable lock_sock()/release_sock()
+to avoid scheduling while atomic panic.
+
+Fixes: 9061f24bf82e ("mptcp: sockopt: propagate timestamp request to subflows")
+Cc: stable@vger.kernel.org
+Reported-by: Sashiko <sashiko-bot@kernel.org>
+Closes: https://sashiko.dev/#/patchset/20260420093343.16443-1-gang.yan@linux.dev
+Signed-off-by: Gang Yan <yangang@kylinos.cn>
+Reviewed-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Link: https://patch.msgid.link/20260427-net-mptcp-misc-fixes-7-1-rc2-v1-2-7432b7f279fa@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/mptcp/sockopt.c |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/net/mptcp/sockopt.c
++++ b/net/mptcp/sockopt.c
+@@ -159,10 +159,10 @@ static int mptcp_setsockopt_sol_socket_t
+       lock_sock(sk);
+       mptcp_for_each_subflow(msk, subflow) {
+               struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+-              bool slow = lock_sock_fast(ssk);
++              lock_sock(ssk);
+               sock_set_timestamp(ssk, optname, !!val);
+-              unlock_sock_fast(ssk, slow);
++              release_sock(ssk);
+       }
+       release_sock(sk);
+@@ -235,10 +235,10 @@ static int mptcp_setsockopt_sol_socket_t
+       mptcp_for_each_subflow(msk, subflow) {
+               struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+-              bool slow = lock_sock_fast(ssk);
++              lock_sock(ssk);
+               sock_set_timestamping(ssk, optname, timestamping);
+-              unlock_sock_fast(ssk, slow);
++              release_sock(ssk);
+       }
+       release_sock(sk);
diff --git a/queue-7.0/mptcp-pm-add_addr-rtx-allow-id-0.patch b/queue-7.0/mptcp-pm-add_addr-rtx-allow-id-0.patch
new file mode 100644 (file)
index 0000000..ba7a210
--- /dev/null
@@ -0,0 +1,39 @@
+From 03f324f3f1f7619a47b9c91282cb12775ab0a2f1 Mon Sep 17 00:00:00 2001
+From: "Matthieu Baerts (NGI0)" <matttbe@kernel.org>
+Date: Tue, 5 May 2026 17:00:50 +0200
+Subject: mptcp: pm: ADD_ADDR rtx: allow ID 0
+
+From: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+
+commit 03f324f3f1f7619a47b9c91282cb12775ab0a2f1 upstream.
+
+ADD_ADDR can be sent for the ID 0, which corresponds to the local
+address and port linked to the initial subflow.
+
+Indeed, this address could be removed, and re-added later on, e.g. what
+is done in the "delete re-add signal" MPTCP Join selftests. So no reason
+to ignore it.
+
+Fixes: 00cfd77b9063 ("mptcp: retransmit ADD_ADDR when timeout")
+Cc: stable@vger.kernel.org
+Reviewed-by: Mat Martineau <martineau@kernel.org>
+Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Link: https://patch.msgid.link/20260505-net-mptcp-pm-fixes-7-1-rc3-v1-2-fca8091060a4@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/mptcp/pm.c |    3 ---
+ 1 file changed, 3 deletions(-)
+
+--- a/net/mptcp/pm.c
++++ b/net/mptcp/pm.c
+@@ -350,9 +350,6 @@ static void mptcp_pm_add_timer(struct ti
+       if (inet_sk_state_load(sk) == TCP_CLOSE)
+               return;
+-      if (!entry->addr.id)
+-              return;
+-
+       if (mptcp_pm_should_add_signal_addr(msk)) {
+               sk_reset_timer(sk, timer, jiffies + TCP_RTO_MAX / 8);
+               goto out;
diff --git a/queue-7.0/mptcp-pm-add_addr-rtx-always-decrease-sk-refcount.patch b/queue-7.0/mptcp-pm-add_addr-rtx-always-decrease-sk-refcount.patch
new file mode 100644 (file)
index 0000000..8a43e35
--- /dev/null
@@ -0,0 +1,55 @@
+From 9634cb35af17019baec21ca648516ce376fa10e6 Mon Sep 17 00:00:00 2001
+From: "Matthieu Baerts (NGI0)" <matttbe@kernel.org>
+Date: Tue, 5 May 2026 17:00:52 +0200
+Subject: mptcp: pm: ADD_ADDR rtx: always decrease sk refcount
+
+From: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+
+commit 9634cb35af17019baec21ca648516ce376fa10e6 upstream.
+
+When an ADD_ADDR is retransmitted, the sk is held in sk_reset_timer().
+It should then be released in all cases at the end.
+
+Some (unlikely) checks were returning directly instead of calling
+sock_put() to decrease the refcount. Jump to a new 'exit' label to call
+__sock_put() (which will become sock_put() in the next commit) to fix
+this potential leak.
+
+While at it, drop the '!msk' check which cannot happen because it is
+never reset, and explicitly mark the remaining one as "unlikely".
+
+Fixes: 00cfd77b9063 ("mptcp: retransmit ADD_ADDR when timeout")
+Cc: stable@vger.kernel.org
+Reviewed-by: Mat Martineau <martineau@kernel.org>
+Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Link: https://patch.msgid.link/20260505-net-mptcp-pm-fixes-7-1-rc3-v1-4-fca8091060a4@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/mptcp/pm.c |    8 +++-----
+ 1 file changed, 3 insertions(+), 5 deletions(-)
+
+--- a/net/mptcp/pm.c
++++ b/net/mptcp/pm.c
+@@ -344,11 +344,8 @@ static void mptcp_pm_add_timer(struct ti
+       pr_debug("msk=%p\n", msk);
+-      if (!msk)
+-              return;
+-
+-      if (inet_sk_state_load(sk) == TCP_CLOSE)
+-              return;
++      if (unlikely(inet_sk_state_load(sk) == TCP_CLOSE))
++              goto exit;
+       bh_lock_sock(sk);
+       if (sock_owned_by_user(sk)) {
+@@ -386,6 +383,7 @@ static void mptcp_pm_add_timer(struct ti
+ out:
+       bh_unlock_sock(sk);
++exit:
+       __sock_put(sk);
+ }
diff --git a/queue-7.0/mptcp-pm-add_addr-rtx-fix-potential-data-race.patch b/queue-7.0/mptcp-pm-add_addr-rtx-fix-potential-data-race.patch
new file mode 100644 (file)
index 0000000..931596c
--- /dev/null
@@ -0,0 +1,51 @@
+From 5cd6e0ad79d2615264f63929f8b457ad97ae550d Mon Sep 17 00:00:00 2001
+From: "Matthieu Baerts (NGI0)" <matttbe@kernel.org>
+Date: Tue, 5 May 2026 17:00:51 +0200
+Subject: mptcp: pm: ADD_ADDR rtx: fix potential data-race
+
+From: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+
+commit 5cd6e0ad79d2615264f63929f8b457ad97ae550d upstream.
+
+This mptcp_pm_add_timer() helper is executed as a timer callback in
+softirq context. To avoid any data races, the socket lock needs to be
+held with bh_lock_sock().
+
+If the socket is in use, retry again soon after, similar to what is done
+with the keepalive timer.
+
+Fixes: 00cfd77b9063 ("mptcp: retransmit ADD_ADDR when timeout")
+Cc: stable@vger.kernel.org
+Reviewed-by: Mat Martineau <martineau@kernel.org>
+Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Link: https://patch.msgid.link/20260505-net-mptcp-pm-fixes-7-1-rc3-v1-3-fca8091060a4@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/mptcp/pm.c |    8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/net/mptcp/pm.c
++++ b/net/mptcp/pm.c
+@@ -350,6 +350,13 @@ static void mptcp_pm_add_timer(struct ti
+       if (inet_sk_state_load(sk) == TCP_CLOSE)
+               return;
++      bh_lock_sock(sk);
++      if (sock_owned_by_user(sk)) {
++              /* Try again later. */
++              sk_reset_timer(sk, timer, jiffies + HZ / 20);
++              goto out;
++      }
++
+       if (mptcp_pm_should_add_signal_addr(msk)) {
+               sk_reset_timer(sk, timer, jiffies + TCP_RTO_MAX / 8);
+               goto out;
+@@ -378,6 +385,7 @@ static void mptcp_pm_add_timer(struct ti
+               mptcp_pm_subflow_established(msk);
+ out:
++      bh_unlock_sock(sk);
+       __sock_put(sk);
+ }
diff --git a/queue-7.0/mptcp-pm-add_addr-rtx-free-sk-if-last.patch b/queue-7.0/mptcp-pm-add_addr-rtx-free-sk-if-last.patch
new file mode 100644 (file)
index 0000000..012fbbd
--- /dev/null
@@ -0,0 +1,116 @@
+From b7b9a461569734d33d3259d58d2507adfac107ed Mon Sep 17 00:00:00 2001
+From: "Matthieu Baerts (NGI0)" <matttbe@kernel.org>
+Date: Tue, 5 May 2026 17:00:53 +0200
+Subject: mptcp: pm: ADD_ADDR rtx: free sk if last
+
+From: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+
+commit b7b9a461569734d33d3259d58d2507adfac107ed upstream.
+
+When an ADD_ADDR is retransmitted, the sk is held in sk_reset_timer(),
+and released at the end.
+
+If at that moment, it was the last reference being held, the sk would
+not be freed. sock_put() should then be called instead of __sock_put().
+
+But that's not enough: if it is the last reference, sock_put() will call
+sk_free(), which will end up calling sk_stop_timer_sync() on the same
+timer, and waiting indefinitely to finish. So it is needed to mark that
+the timer is done at the end of the timer handler when it has not been
+rescheduled, not to call sk_stop_timer_sync() on "itself".
+
+Fixes: 00cfd77b9063 ("mptcp: retransmit ADD_ADDR when timeout")
+Cc: stable@vger.kernel.org
+Reviewed-by: Mat Martineau <martineau@kernel.org>
+Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Link: https://patch.msgid.link/20260505-net-mptcp-pm-fixes-7-1-rc3-v1-5-fca8091060a4@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/mptcp/pm.c |   28 ++++++++++++++++++----------
+ 1 file changed, 18 insertions(+), 10 deletions(-)
+
+--- a/net/mptcp/pm.c
++++ b/net/mptcp/pm.c
+@@ -16,6 +16,7 @@ struct mptcp_pm_add_entry {
+       struct list_head        list;
+       struct mptcp_addr_info  addr;
+       u8                      retrans_times;
++      bool                    timer_done;
+       struct timer_list       add_timer;
+       struct mptcp_sock       *sock;
+       struct rcu_head         rcu;
+@@ -340,22 +341,22 @@ static void mptcp_pm_add_timer(struct ti
+                                                             add_timer);
+       struct mptcp_sock *msk = entry->sock;
+       struct sock *sk = (struct sock *)msk;
+-      unsigned int timeout;
++      unsigned int timeout = 0;
+       pr_debug("msk=%p\n", msk);
++      bh_lock_sock(sk);
+       if (unlikely(inet_sk_state_load(sk) == TCP_CLOSE))
+-              goto exit;
++              goto out;
+-      bh_lock_sock(sk);
+       if (sock_owned_by_user(sk)) {
+               /* Try again later. */
+-              sk_reset_timer(sk, timer, jiffies + HZ / 20);
++              timeout = HZ / 20;
+               goto out;
+       }
+       if (mptcp_pm_should_add_signal_addr(msk)) {
+-              sk_reset_timer(sk, timer, jiffies + TCP_RTO_MAX / 8);
++              timeout = TCP_RTO_MAX / 8;
+               goto out;
+       }
+@@ -373,8 +374,9 @@ static void mptcp_pm_add_timer(struct ti
+       }
+       if (entry->retrans_times < ADD_ADDR_RETRANS_MAX)
+-              sk_reset_timer(sk, timer,
+-                             jiffies + (timeout << entry->retrans_times));
++              timeout <<= entry->retrans_times;
++      else
++              timeout = 0;
+       spin_unlock_bh(&msk->pm.lock);
+@@ -382,9 +384,13 @@ static void mptcp_pm_add_timer(struct ti
+               mptcp_pm_subflow_established(msk);
+ out:
++      if (timeout)
++              sk_reset_timer(sk, timer, jiffies + timeout);
++      else
++              /* if sock_put calls sk_free: avoid waiting for this timer */
++              entry->timer_done = true;
+       bh_unlock_sock(sk);
+-exit:
+-      __sock_put(sk);
++      sock_put(sk);
+ }
+ struct mptcp_pm_add_entry *
+@@ -447,6 +453,7 @@ bool mptcp_pm_alloc_anno_list(struct mpt
+       timer_setup(&add_entry->add_timer, mptcp_pm_add_timer, 0);
+ reset_timer:
++      add_entry->timer_done = false;
+       timeout = mptcp_adjust_add_addr_timeout(msk);
+       if (timeout)
+               sk_reset_timer(sk, &add_entry->add_timer, jiffies + timeout);
+@@ -467,7 +474,8 @@ static void mptcp_pm_free_anno_list(stru
+       spin_unlock_bh(&msk->pm.lock);
+       list_for_each_entry_safe(entry, tmp, &free_list, list) {
+-              sk_stop_timer_sync(sk, &entry->add_timer);
++              if (!entry->timer_done)
++                      sk_stop_timer_sync(sk, &entry->add_timer);
+               kfree_rcu(entry, rcu);
+       }
+ }
diff --git a/queue-7.0/mptcp-pm-add_addr-rtx-resched-blocked-add_addr-quicker.patch b/queue-7.0/mptcp-pm-add_addr-rtx-resched-blocked-add_addr-quicker.patch
new file mode 100644 (file)
index 0000000..935ce7c
--- /dev/null
@@ -0,0 +1,54 @@
+From 3cf12492891c4b5ff54dda404a2de4ec54c9e1b5 Mon Sep 17 00:00:00 2001
+From: "Matthieu Baerts (NGI0)" <matttbe@kernel.org>
+Date: Tue, 5 May 2026 17:00:54 +0200
+Subject: mptcp: pm: ADD_ADDR rtx: resched blocked ADD_ADDR quicker
+
+From: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+
+commit 3cf12492891c4b5ff54dda404a2de4ec54c9e1b5 upstream.
+
+When an ADD_ADDR needs to be retransmitted and another one has already
+been prepared -- e.g. multiple ADD_ADDRs have been sent in a row and
+need to be retransmitted later -- this additional retransmission will
+need to wait.
+
+In this case, the timer was reset to TCP_RTO_MAX / 8, which is ~15
+seconds. This delay is unnecessary long: it should just be rescheduled
+at the next opportunity, e.g. after the retransmission timeout.
+
+Without this modification, some issues can be seen from time to time in
+the selftests when multiple ADD_ADDRs are sent, and the host takes time
+to process them, e.g. the "signal addresses, ADD_ADDR timeout" MPTCP
+Join selftest, especially with a debug kernel config.
+
+Note that on older kernels, 'timeout' is not available. It should be
+enough to replace it by one second (HZ).
+
+Fixes: 00cfd77b9063 ("mptcp: retransmit ADD_ADDR when timeout")
+Cc: stable@vger.kernel.org
+Reviewed-by: Mat Martineau <martineau@kernel.org>
+Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Link: https://patch.msgid.link/20260505-net-mptcp-pm-fixes-7-1-rc3-v1-6-fca8091060a4@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/mptcp/pm.c |    7 +------
+ 1 file changed, 1 insertion(+), 6 deletions(-)
+
+--- a/net/mptcp/pm.c
++++ b/net/mptcp/pm.c
+@@ -355,13 +355,8 @@ static void mptcp_pm_add_timer(struct ti
+               goto out;
+       }
+-      if (mptcp_pm_should_add_signal_addr(msk)) {
+-              timeout = TCP_RTO_MAX / 8;
+-              goto out;
+-      }
+-
+       timeout = mptcp_adjust_add_addr_timeout(msk);
+-      if (!timeout)
++      if (!timeout || mptcp_pm_should_add_signal_addr(msk))
+               goto out;
+       spin_lock_bh(&msk->pm.lock);
diff --git a/queue-7.0/mptcp-pm-add_addr-rtx-return-early-if-no-retrans.patch b/queue-7.0/mptcp-pm-add_addr-rtx-return-early-if-no-retrans.patch
new file mode 100644 (file)
index 0000000..6447ca1
--- /dev/null
@@ -0,0 +1,37 @@
+From 62a9b19dce77e72426f049fb99b9d1d032b9a8ea Mon Sep 17 00:00:00 2001
+From: "Matthieu Baerts (NGI0)" <matttbe@kernel.org>
+Date: Tue, 5 May 2026 17:00:56 +0200
+Subject: mptcp: pm: ADD_ADDR rtx: return early if no retrans
+
+From: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+
+commit 62a9b19dce77e72426f049fb99b9d1d032b9a8ea upstream.
+
+No need to iterate over all subflows if there is no retransmission
+needed.
+
+Exit early in this case then.
+
+Fixes: 30549eebc4d8 ("mptcp: make ADD_ADDR retransmission timeout adaptive")
+Cc: stable@vger.kernel.org
+Reviewed-by: Mat Martineau <martineau@kernel.org>
+Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Link: https://patch.msgid.link/20260505-net-mptcp-pm-fixes-7-1-rc3-v1-8-fca8091060a4@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/mptcp/pm.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/net/mptcp/pm.c
++++ b/net/mptcp/pm.c
+@@ -311,6 +311,9 @@ static unsigned int mptcp_adjust_add_add
+       struct mptcp_subflow_context *subflow;
+       unsigned int max = 0, max_stale = 0;
++      if (!rto)
++              return 0;
++
+       mptcp_for_each_subflow(msk, subflow) {
+               struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+               struct inet_connection_sock *icsk = inet_csk(ssk);
diff --git a/queue-7.0/mptcp-pm-kernel-correctly-retransmit-add_addr-id-0.patch b/queue-7.0/mptcp-pm-kernel-correctly-retransmit-add_addr-id-0.patch
new file mode 100644 (file)
index 0000000..298a83f
--- /dev/null
@@ -0,0 +1,66 @@
+From b12014d2d36eaed4e4bec5f1ac7e91110eeb100d Mon Sep 17 00:00:00 2001
+From: "Matthieu Baerts (NGI0)" <matttbe@kernel.org>
+Date: Tue, 5 May 2026 17:00:49 +0200
+Subject: mptcp: pm: kernel: correctly retransmit ADD_ADDR ID 0
+
+From: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+
+commit b12014d2d36eaed4e4bec5f1ac7e91110eeb100d upstream.
+
+When adding the ADD_ADDR to the list, the address including the IP, port
+and ID are copied. On the other hand, when the endpoint corresponds to
+the one from the initial subflow, the ID is set to 0, as specified by
+the MPTCP protocol.
+
+The issue is that the ID was reset after having copied the ID in the
+ADD_ADDR entry. So the retransmission was done, but using a different ID
+than the initial one.
+
+Fixes: 8b8ed1b429f8 ("mptcp: pm: reuse ID 0 after delete and re-add")
+Cc: stable@vger.kernel.org
+Reviewed-by: Mat Martineau <martineau@kernel.org>
+Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Link: https://patch.msgid.link/20260505-net-mptcp-pm-fixes-7-1-rc3-v1-1-fca8091060a4@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/mptcp/pm_kernel.c |   13 ++++++++-----
+ 1 file changed, 8 insertions(+), 5 deletions(-)
+
+--- a/net/mptcp/pm_kernel.c
++++ b/net/mptcp/pm_kernel.c
+@@ -347,6 +347,8 @@ static void mptcp_pm_create_subflow_or_s
+       /* check first for announce */
+       if (msk->pm.add_addr_signaled < endp_signal_max) {
++              u8 endp_id;
++
+               /* due to racing events on both ends we can reach here while
+                * previous add address is still running: if we invoke now
+                * mptcp_pm_announce_addr(), that will fail and the
+@@ -360,19 +362,20 @@ static void mptcp_pm_create_subflow_or_s
+               if (!select_signal_address(pernet, msk, &local))
+                       goto subflow;
++              /* Special case for ID0: set the correct ID */
++              endp_id = local.addr.id;
++              if (endp_id == msk->mpc_endpoint_id)
++                      local.addr.id = 0;
++
+               /* If the alloc fails, we are on memory pressure, not worth
+                * continuing, and trying to create subflows.
+                */
+               if (!mptcp_pm_alloc_anno_list(msk, &local.addr))
+                       return;
+-              __clear_bit(local.addr.id, msk->pm.id_avail_bitmap);
++              __clear_bit(endp_id, msk->pm.id_avail_bitmap);
+               msk->pm.add_addr_signaled++;
+-              /* Special case for ID0: set the correct ID */
+-              if (local.addr.id == msk->mpc_endpoint_id)
+-                      local.addr.id = 0;
+-
+               mptcp_pm_announce_addr(msk, &local.addr, false);
+               mptcp_pm_addr_send_ack(msk);
diff --git a/queue-7.0/mptcp-pm-kernel-reset-fullmesh-counter-after-flush.patch b/queue-7.0/mptcp-pm-kernel-reset-fullmesh-counter-after-flush.patch
new file mode 100644 (file)
index 0000000..e78272d
--- /dev/null
@@ -0,0 +1,40 @@
+From 1774d3cf3cf17baaf30c095606cda496268283b3 Mon Sep 17 00:00:00 2001
+From: "Matthieu Baerts (NGI0)" <matttbe@kernel.org>
+Date: Mon, 27 Apr 2026 21:54:36 +0200
+Subject: mptcp: pm: kernel: reset fullmesh counter after flush
+
+From: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+
+commit 1774d3cf3cf17baaf30c095606cda496268283b3 upstream.
+
+This variable counts how many MPTCP endpoints have a 'fullmesh' flag
+set. After having flushed all MPTCP endpoints, it is then needed to
+reset this counter.
+
+Without this reset, this counter exposed to the userspace is wrong, but
+also non-fullmesh endpoints added after the flush will not be taken into
+account to create subflows in reaction to ADD_ADDRs.
+
+Fixes: f88191c7f361 ("mptcp: pm: in-kernel: record fullmesh endp nb")
+Cc: stable@vger.kernel.org
+Reported-by: Sashiko <sashiko-bot@kernel.org>
+Closes: https://sashiko.dev/#/patchset/20260422-mptcp-inc-limits-v6-0-903181771530%40kernel.org?part=15
+Reviewed-by: Mat Martineau <martineau@kernel.org>
+Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Link: https://patch.msgid.link/20260427-net-mptcp-misc-fixes-7-1-rc2-v1-4-7432b7f279fa@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/mptcp/pm_kernel.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/mptcp/pm_kernel.c
++++ b/net/mptcp/pm_kernel.c
+@@ -1278,6 +1278,7 @@ static void __reset_counters(struct pm_n
+       WRITE_ONCE(pernet->endp_signal_max, 0);
+       WRITE_ONCE(pernet->endp_subflow_max, 0);
+       WRITE_ONCE(pernet->endp_laminar_max, 0);
++      WRITE_ONCE(pernet->endp_fullmesh_max, 0);
+       pernet->endpoints = 0;
+ }
diff --git a/queue-7.0/mptcp-pm-prio-skip-closed-subflows.patch b/queue-7.0/mptcp-pm-prio-skip-closed-subflows.patch
new file mode 100644 (file)
index 0000000..aa3a7dd
--- /dev/null
@@ -0,0 +1,41 @@
+From 166b78344031bf7ac9f55cb5282776cfd85f220e Mon Sep 17 00:00:00 2001
+From: "Matthieu Baerts (NGI0)" <matttbe@kernel.org>
+Date: Tue, 5 May 2026 17:00:57 +0200
+Subject: mptcp: pm: prio: skip closed subflows
+
+From: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+
+commit 166b78344031bf7ac9f55cb5282776cfd85f220e upstream.
+
+When sending an MP_PRIO, closed subflows need to be skipped.
+
+This fixes the case where the initial subflow got closed, re-opened
+later, then an MP_PRIO is needed for the same local address.
+
+Note that explicit MP_PRIO cannot be sent during the 3WHS, so it is fine
+to use __mptcp_subflow_active().
+
+Fixes: 067065422fcd ("mptcp: add the outgoing MP_PRIO support")
+Cc: stable@vger.kernel.org
+Fixes: b29fcfb54cd7 ("mptcp: full disconnect implementation")
+Reviewed-by: Mat Martineau <martineau@kernel.org>
+Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Link: https://patch.msgid.link/20260505-net-mptcp-pm-fixes-7-1-rc3-v1-9-fca8091060a4@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/mptcp/pm.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/net/mptcp/pm.c
++++ b/net/mptcp/pm.c
+@@ -283,6 +283,9 @@ int mptcp_pm_mp_prio_send_ack(struct mpt
+               struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+               struct mptcp_addr_info local, remote;
++              if (!__mptcp_subflow_active(subflow))
++                      continue;
++
+               mptcp_local_address((struct sock_common *)ssk, &local);
+               if (!mptcp_addresses_equal(&local, addr, addr->port))
+                       continue;
diff --git a/queue-7.0/mptcp-sockopt-increase-seq-in-mptcp_setsockopt_all_sf.patch b/queue-7.0/mptcp-sockopt-increase-seq-in-mptcp_setsockopt_all_sf.patch
new file mode 100644 (file)
index 0000000..4b4efc4
--- /dev/null
@@ -0,0 +1,41 @@
+From 70ece9d7021c54cf40c72b31b066e9088f5f75f5 Mon Sep 17 00:00:00 2001
+From: "Matthieu Baerts (NGI0)" <matttbe@kernel.org>
+Date: Fri, 1 May 2026 21:35:37 +0200
+Subject: mptcp: sockopt: increase seq in mptcp_setsockopt_all_sf
+
+From: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+
+commit 70ece9d7021c54cf40c72b31b066e9088f5f75f5 upstream.
+
+mptcp_setsockopt_all_sf() was missing a call to sockopt_seq_inc(). This
+is required not to cause missing synchronization for newer subflows
+created later on.
+
+This helper is called each time a socket option is set on subflows, and
+future ones will need to inherit this option after their creation.
+
+Fixes: 51c5fd09e1b4 ("mptcp: add TCP_MAXSEG sockopt support")
+Cc: stable@vger.kernel.org
+Suggested-by: Paolo Abeni <pabeni@redhat.com>
+Reviewed-by: Mat Martineau <martineau@kernel.org>
+Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Link: https://patch.msgid.link/20260501-net-mptcp-misc-fixes-7-1-rc3-v1-4-b70118df778e@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/mptcp/sockopt.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/net/mptcp/sockopt.c
++++ b/net/mptcp/sockopt.c
+@@ -812,6 +812,10 @@ static int mptcp_setsockopt_all_sf(struc
+               if (ret)
+                       break;
+       }
++
++      if (!ret)
++              sockopt_seq_inc(msk);
++
+       return ret;
+ }
diff --git a/queue-7.0/mptcp-sockopt-set-timestamp-flags-on-subflow-socket-not-msk.patch b/queue-7.0/mptcp-sockopt-set-timestamp-flags-on-subflow-socket-not-msk.patch
new file mode 100644 (file)
index 0000000..3fda447
--- /dev/null
@@ -0,0 +1,50 @@
+From 5f95c21fc23a7ef22b4d27d1ed9bb55557ffb926 Mon Sep 17 00:00:00 2001
+From: Gang Yan <yangang@kylinos.cn>
+Date: Mon, 27 Apr 2026 21:54:33 +0200
+Subject: mptcp: sockopt: set timestamp flags on subflow socket, not msk
+
+From: Gang Yan <yangang@kylinos.cn>
+
+commit 5f95c21fc23a7ef22b4d27d1ed9bb55557ffb926 upstream.
+
+Both mptcp_setsockopt_sol_socket_tstamp() and
+mptcp_setsockopt_sol_socket_timestamping() iterate over subflows,
+acquire the subflow socket lock, but then erroneously pass the MPTCP
+msk socket to sock_set_timestamp() / sock_set_timestamping() instead
+of the subflow ssk. As a result, the timestamp flags are set on the
+wrong socket and have no effect on the actual subflows.
+
+Pass ssk instead of sk to both helpers.
+
+Fixes: 9061f24bf82e ("mptcp: sockopt: propagate timestamp request to subflows")
+Cc: stable@vger.kernel.org
+Signed-off-by: Gang Yan <yangang@kylinos.cn>
+Reviewed-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Link: https://patch.msgid.link/20260427-net-mptcp-misc-fixes-7-1-rc2-v1-1-7432b7f279fa@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/mptcp/sockopt.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/net/mptcp/sockopt.c
++++ b/net/mptcp/sockopt.c
+@@ -161,7 +161,7 @@ static int mptcp_setsockopt_sol_socket_t
+               struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+               bool slow = lock_sock_fast(ssk);
+-              sock_set_timestamp(sk, optname, !!val);
++              sock_set_timestamp(ssk, optname, !!val);
+               unlock_sock_fast(ssk, slow);
+       }
+@@ -237,7 +237,7 @@ static int mptcp_setsockopt_sol_socket_t
+               struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+               bool slow = lock_sock_fast(ssk);
+-              sock_set_timestamping(sk, optname, timestamping);
++              sock_set_timestamping(ssk, optname, timestamping);
+               unlock_sock_fast(ssk, slow);
+       }
diff --git a/queue-7.0/mptcp-use-mpjoinsynackhmacfailure-for-synack-hmac-failure.patch b/queue-7.0/mptcp-use-mpjoinsynackhmacfailure-for-synack-hmac-failure.patch
new file mode 100644 (file)
index 0000000..35dad71
--- /dev/null
@@ -0,0 +1,45 @@
+From c4a99a921949cddc590b22bb14eeb23dffcc3ba6 Mon Sep 17 00:00:00 2001
+From: Shardul Bankar <shardul.b@mpiricsoftware.com>
+Date: Fri, 1 May 2026 21:35:34 +0200
+Subject: mptcp: use MPJoinSynAckHMacFailure for SynAck HMAC failure
+
+From: Shardul Bankar <shardul.b@mpiricsoftware.com>
+
+commit c4a99a921949cddc590b22bb14eeb23dffcc3ba6 upstream.
+
+In subflow_finish_connect(), HMAC validation of the server's HMAC
+in SYN/ACK + MP_JOIN increments MPTCP_MIB_JOINACKMAC ("HMAC was
+wrong on ACK + MP_JOIN") on failure. The function processes the
+SYN/ACK, not the ACK; the matching MPTCP_MIB_JOINSYNACKMAC counter
+("HMAC was wrong on SYN/ACK + MP_JOIN") exists but is not
+incremented anywhere in the tree.
+
+The mirror site on the server, subflow_syn_recv_sock(), already
+uses JOINACKMAC correctly for ACK HMAC failure. Use JOINSYNACKMAC
+at the SYN/ACK validation site so each counter reflects the packet
+whose HMAC actually failed.
+
+Suggested-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Fixes: fc518953bc9c ("mptcp: add and use MIB counter infrastructure")
+Cc: stable@vger.kernel.org
+Signed-off-by: Shardul Bankar <shardul.b@mpiricsoftware.com>
+Reviewed-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Link: https://patch.msgid.link/20260501-net-mptcp-misc-fixes-7-1-rc3-v1-1-b70118df778e@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/mptcp/subflow.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/mptcp/subflow.c
++++ b/net/mptcp/subflow.c
+@@ -581,7 +581,7 @@ static void subflow_finish_connect(struc
+                        subflow->backup);
+               if (!subflow_thmac_valid(subflow)) {
+-                      MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINACKMAC);
++                      MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKMAC);
+                       subflow->reset_reason = MPTCP_RST_EMPTCP;
+                       goto do_reset;
+               }
diff --git a/queue-7.0/mptcp-use-mptcp_rst_emptcp-for-ack-hmac-validation-failure.patch b/queue-7.0/mptcp-use-mptcp_rst_emptcp-for-ack-hmac-validation-failure.patch
new file mode 100644 (file)
index 0000000..c05db45
--- /dev/null
@@ -0,0 +1,44 @@
+From a6da02d4c00fdda2417e42ad2b762a9209e6cc49 Mon Sep 17 00:00:00 2001
+From: Shardul Bankar <shardul.b@mpiricsoftware.com>
+Date: Fri, 1 May 2026 21:35:35 +0200
+Subject: mptcp: use MPTCP_RST_EMPTCP for ACK HMAC validation failure
+
+From: Shardul Bankar <shardul.b@mpiricsoftware.com>
+
+commit a6da02d4c00fdda2417e42ad2b762a9209e6cc49 upstream.
+
+When HMAC validation fails on a received ACK + MP_JOIN in
+subflow_syn_recv_sock(), the subflow is reset with reason
+MPTCP_RST_EPROHIBIT ("Administratively prohibited"). This is
+incorrect: HMAC validation failure is an MPTCP protocol-level
+error, not an administrative policy denial.
+
+The mirror site on the client, in subflow_finish_connect(), already
+uses MPTCP_RST_EMPTCP ("MPTCP-specific error") for the same kind of
+HMAC failure on the SYN/ACK + MP_JOIN. Use the same reason on the
+server side for symmetry and accuracy.
+
+Suggested-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Fixes: 443041deb5ef ("mptcp: fix NULL pointer in can_accept_new_subflow")
+Cc: stable@vger.kernel.org
+Signed-off-by: Shardul Bankar <shardul.b@mpiricsoftware.com>
+Reviewed-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Link: https://patch.msgid.link/20260501-net-mptcp-misc-fixes-7-1-rc3-v1-2-b70118df778e@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/mptcp/subflow.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/mptcp/subflow.c
++++ b/net/mptcp/subflow.c
+@@ -908,7 +908,7 @@ create_child:
+                       if (!subflow_hmac_valid(subflow_req, &mp_opt)) {
+                               SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC);
+-                              subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT);
++                              subflow_add_reset_reason(skb, MPTCP_RST_EMPTCP);
+                               goto dispose_child;
+                       }
diff --git a/queue-7.0/pci-aer-clear-only-error-bits-in-pcie-device-status.patch b/queue-7.0/pci-aer-clear-only-error-bits-in-pcie-device-status.patch
new file mode 100644 (file)
index 0000000..daf6a8f
--- /dev/null
@@ -0,0 +1,64 @@
+From a8aeea1bf3c80cc87983689e0118770e019bd4f3 Mon Sep 17 00:00:00 2001
+From: Shuai Xue <xueshuai@linux.alibaba.com>
+Date: Wed, 11 Feb 2026 20:46:24 +0800
+Subject: PCI/AER: Clear only error bits in PCIe Device Status
+
+From: Shuai Xue <xueshuai@linux.alibaba.com>
+
+commit a8aeea1bf3c80cc87983689e0118770e019bd4f3 upstream.
+
+Currently, pcie_clear_device_status() clears the entire PCIe Device Status
+register (PCI_EXP_DEVSTA) by writing back the value read from the register,
+which affects not only the error status bits but also other writable bits.
+
+According to PCIe r7.0, sec 7.5.3.5, this register contains:
+
+  - RW1C error status bits (CED, NFED, FED, URD at bits 0-3): These are the
+    four error status bits that need to be cleared.
+
+  - Read-only bits (AUXPD at bit 4, TRPND at bit 5): Writing to these has
+    no effect.
+
+  - Emergency Power Reduction Detected (bit 6): A RW1C non-error bit
+    introduced in PCIe r5.0 (2019). This is currently the only writable
+    non-error bit in the Device Status register. Unconditionally clearing
+    this bit can interfere with other software components that rely on this
+    power management indication.
+
+  - Reserved bits (RsvdZ): These bits are required to be written as zero.
+    Writing 1s to them (as the current implementation may do) violates the
+    specification.
+
+To prevent unintended side effects, modify pcie_clear_device_status() to
+only write 1s to the four error status bits (CED, NFED, FED, URD), leaving
+the Emergency Power Reduction Detected bit and reserved bits unaffected.
+
+Fixes: ec752f5d54d7 ("PCI/AER: Clear device status bits during ERR_FATAL and ERR_NONFATAL")
+Suggested-by: Lukas Wunner <lukas@wunner.de>
+Signed-off-by: Shuai Xue <xueshuai@linux.alibaba.com>
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
+Reviewed-by: Lukas Wunner <lukas@wunner.de>
+Cc: stable@vger.kernel.org
+Link: https://patch.msgid.link/20260211124624.49656-1-xueshuai@linux.alibaba.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/pci/pci.c |    7 +++----
+ 1 file changed, 3 insertions(+), 4 deletions(-)
+
+--- a/drivers/pci/pci.c
++++ b/drivers/pci/pci.c
+@@ -2241,10 +2241,9 @@ EXPORT_SYMBOL_GPL(pci_set_pcie_reset_sta
+ #ifdef CONFIG_PCIEAER
+ void pcie_clear_device_status(struct pci_dev *dev)
+ {
+-      u16 sta;
+-
+-      pcie_capability_read_word(dev, PCI_EXP_DEVSTA, &sta);
+-      pcie_capability_write_word(dev, PCI_EXP_DEVSTA, sta);
++      pcie_capability_write_word(dev, PCI_EXP_DEVSTA,
++                                 PCI_EXP_DEVSTA_CED | PCI_EXP_DEVSTA_NFED |
++                                 PCI_EXP_DEVSTA_FED | PCI_EXP_DEVSTA_URD);
+ }
+ #endif
diff --git a/queue-7.0/pci-aer-stop-ruling-out-unbound-devices-as-error-source.patch b/queue-7.0/pci-aer-stop-ruling-out-unbound-devices-as-error-source.patch
new file mode 100644 (file)
index 0000000..5f340f8
--- /dev/null
@@ -0,0 +1,53 @@
+From 1ab4a3c805084d752ec571efc78272295a9f2f74 Mon Sep 17 00:00:00 2001
+From: Lukas Wunner <lukas@wunner.de>
+Date: Fri, 27 Mar 2026 10:56:43 +0100
+Subject: PCI/AER: Stop ruling out unbound devices as error source
+
+From: Lukas Wunner <lukas@wunner.de>
+
+commit 1ab4a3c805084d752ec571efc78272295a9f2f74 upstream.
+
+When searching for the error source, the AER driver rules out devices whose
+enable_cnt is zero.  This was introduced in 2009 by commit 28eb27cf0839
+("PCI AER: support invalid error source IDs") without providing a
+rationale.
+
+Drivers typically call pci_enable_device() on probe, hence the enable_cnt
+check essentially filters out unbound devices.  At the time of the commit,
+drivers had to opt in to AER by calling pci_enable_pcie_error_reporting()
+and so any AER-enabled device could be assumed to be bound to a driver.
+The check thus made sense because it allowed skipping config space accesses
+to devices which were known not to be the error source.
+
+But since 2022, AER is universally enabled on all devices when they are
+enumerated, cf. commit f26e58bf6f54 ("PCI/AER: Enable error reporting when
+AER is native").
+
+Errors may very well be reported by unbound devices, e.g. due to link
+instability.  By ruling them out as error source, errors reported by them
+are neither logged nor cleared.  When they do get bound and another error
+occurs, the earlier error is reported together with the new error, which
+may confuse users.  Stop doing so.
+
+Fixes: f26e58bf6f54 ("PCI/AER: Enable error reporting when AER is native")
+Signed-off-by: Lukas Wunner <lukas@wunner.de>
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Reviewed-by: Stefan Roese <stefan.roese@mailbox.org>
+Cc: stable@vger.kernel.org # v6.0+
+Link: https://patch.msgid.link/734338c2e8b669db5a5a3b45d34131b55ffebfca.1774605029.git.lukas@wunner.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/pci/pcie/aer.c |    2 --
+ 1 file changed, 2 deletions(-)
+
+--- a/drivers/pci/pcie/aer.c
++++ b/drivers/pci/pcie/aer.c
+@@ -1041,8 +1041,6 @@ static bool is_error_source(struct pci_d
+        *      3) There are multiple errors and prior ID comparing fails;
+        * We check AER status registers to find possible reporter.
+        */
+-      if (atomic_read(&dev->enable_cnt) == 0)
+-              return false;
+       /* Check if AER is enabled */
+       pcie_capability_read_word(dev, PCI_EXP_DEVCTL, &reg16);
diff --git a/queue-7.0/pci-aspm-fix-pci_clear_and_set_config_dword-usage.patch b/queue-7.0/pci-aspm-fix-pci_clear_and_set_config_dword-usage.patch
new file mode 100644 (file)
index 0000000..e1728d5
--- /dev/null
@@ -0,0 +1,91 @@
+From cc33985d26c92a5c908c0185239c59ec35b8637c Mon Sep 17 00:00:00 2001
+From: Lukas Wunner <lukas@wunner.de>
+Date: Mon, 16 Feb 2026 08:46:13 +0100
+Subject: PCI/ASPM: Fix pci_clear_and_set_config_dword() usage
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Lukas Wunner <lukas@wunner.de>
+
+commit cc33985d26c92a5c908c0185239c59ec35b8637c upstream.
+
+When aspm_calc_l12_info() programs the L1 PM Substates Control 1 register
+fields Common_Mode_Restore_Time, LTR_L1.2_THRESHOLD_Value and _Scale, it
+invokes pci_clear_and_set_config_dword() in an incorrect way:
+
+For the bits to clear it selects those corresponding to the field.  So far
+so good.  But for the bits to set it passes a full register value.
+pci_clear_and_set_config_dword() performs a boolean OR operation which
+sets all bits of that value, not just the ones that were just cleared.
+
+Thus, when setting the LTR_L1.2_THRESHOLD_Value and _Scale on the child of
+an ASPM link, aspm_calc_l12_info() also sets the Common_Mode_Restore_Time.
+That's a spec violation:  PCIe r7.0 sec 7.8.3.3 says this field is RsvdP
+for Upstream Ports.  On Adrià's Pixelbook Eve, Common_Mode_Restore_Time
+of the Intel 7265 "Stone Peak" wifi card is zero, yet aspm_calc_l12_info()
+does not preserve the zero bits but instead programs the value calculated
+for the Root Port into the wifi card.
+
+Likewise, when setting the Common_Mode_Restore_Time on the Root Port,
+aspm_calc_l12_info() also changes the LTR_L1.2_THRESHOLD_Value and _Scale
+from the initial 163840 nsec to 237568 nsec (due to ORing those fields),
+only to reduce it afterwards to 106496 nsec.
+
+Amend all invocations of pci_clear_and_set_config_dword() to only set bits
+which are cleared.
+
+Finally, when setting the T_POWER_ON_Value and _Scale on the Root Port and
+the wifi card, aspm_calc_l12_info() fails to preserve bits declared RsvdP
+and instead overwrites them with zeroes.  Replace pci_write_config_dword()
+with pci_clear_and_set_config_dword() to avoid this.
+
+Fixes: aeda9adebab8 ("PCI/ASPM: Configure L1 substate settings")
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=220705#c22
+Signed-off-by: Lukas Wunner <lukas@wunner.de>
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Tested-by: Adrià Vilanova Martínez <me@avm99963.com>
+Cc: stable@vger.kernel.org # v4.11+
+Link: https://patch.msgid.link/5c1752d7512eed0f4ea57b84b12d7ee08ca61fc5.1771226659.git.lukas@wunner.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/pci/pcie/aspm.c |   17 ++++++++++++-----
+ 1 file changed, 12 insertions(+), 5 deletions(-)
+
+--- a/drivers/pci/pcie/aspm.c
++++ b/drivers/pci/pcie/aspm.c
+@@ -706,22 +706,29 @@ static void aspm_calc_l12_info(struct pc
+       }
+       /* Program T_POWER_ON times in both ports */
+-      pci_write_config_dword(parent, parent->l1ss + PCI_L1SS_CTL2, ctl2);
+-      pci_write_config_dword(child, child->l1ss + PCI_L1SS_CTL2, ctl2);
++      pci_clear_and_set_config_dword(parent, parent->l1ss + PCI_L1SS_CTL2,
++                                     PCI_L1SS_CTL2_T_PWR_ON_VALUE |
++                                     PCI_L1SS_CTL2_T_PWR_ON_SCALE, ctl2);
++      pci_clear_and_set_config_dword(child, child->l1ss + PCI_L1SS_CTL2,
++                                     PCI_L1SS_CTL2_T_PWR_ON_VALUE |
++                                     PCI_L1SS_CTL2_T_PWR_ON_SCALE, ctl2);
+       /* Program Common_Mode_Restore_Time in upstream device */
+       pci_clear_and_set_config_dword(parent, parent->l1ss + PCI_L1SS_CTL1,
+-                                     PCI_L1SS_CTL1_CM_RESTORE_TIME, ctl1);
++                                     PCI_L1SS_CTL1_CM_RESTORE_TIME,
++                                     ctl1 & PCI_L1SS_CTL1_CM_RESTORE_TIME);
+       /* Program LTR_L1.2_THRESHOLD time in both ports */
+       pci_clear_and_set_config_dword(parent, parent->l1ss + PCI_L1SS_CTL1,
+                                      PCI_L1SS_CTL1_LTR_L12_TH_VALUE |
+                                      PCI_L1SS_CTL1_LTR_L12_TH_SCALE,
+-                                     ctl1);
++                                     ctl1 & (PCI_L1SS_CTL1_LTR_L12_TH_VALUE |
++                                             PCI_L1SS_CTL1_LTR_L12_TH_SCALE));
+       pci_clear_and_set_config_dword(child, child->l1ss + PCI_L1SS_CTL1,
+                                      PCI_L1SS_CTL1_LTR_L12_TH_VALUE |
+                                      PCI_L1SS_CTL1_LTR_L12_TH_SCALE,
+-                                     ctl1);
++                                     ctl1 & (PCI_L1SS_CTL1_LTR_L12_TH_VALUE |
++                                             PCI_L1SS_CTL1_LTR_L12_TH_SCALE));
+       if (pl1_2_enables || cl1_2_enables) {
+               pci_clear_and_set_config_dword(parent,
diff --git a/queue-7.0/perf-x86-intel-always-reprogram-acr-events-to-prevent-stale-masks.patch b/queue-7.0/perf-x86-intel-always-reprogram-acr-events-to-prevent-stale-masks.patch
new file mode 100644 (file)
index 0000000..835479f
--- /dev/null
@@ -0,0 +1,69 @@
+From 8ba0b706a485b1e607594cf4210786d517ad1611 Mon Sep 17 00:00:00 2001
+From: Dapeng Mi <dapeng1.mi@linux.intel.com>
+Date: Thu, 30 Apr 2026 08:25:55 +0800
+Subject: perf/x86/intel: Always reprogram ACR events to prevent stale masks
+
+From: Dapeng Mi <dapeng1.mi@linux.intel.com>
+
+commit 8ba0b706a485b1e607594cf4210786d517ad1611 upstream.
+
+Members of an ACR group are logically linked via a bitmask of their
+hardware counter indices. If some members of the group are assigned new
+hardware counters during rescheduling, even events that keep their
+original counter index must be updated with a new mask.
+
+Without this, an event will continue to use a stale acr_mask that
+references the old indices of its group peers. Ensure all ACR events are
+reprogrammed during the scheduling path to maintain consistency across
+the group.
+
+Fixes: ec980e4facef ("perf/x86/intel: Support auto counter reload")
+Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: stable@vger.kernel.org
+Link: https://patch.msgid.link/20260430002558.712334-3-dapeng1.mi@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/events/core.c |   13 ++++++++-----
+ 1 file changed, 8 insertions(+), 5 deletions(-)
+
+--- a/arch/x86/events/core.c
++++ b/arch/x86/events/core.c
+@@ -1294,13 +1294,16 @@ int x86_perf_rdpmc_index(struct perf_eve
+       return event->hw.event_base_rdpmc;
+ }
+-static inline int match_prev_assignment(struct hw_perf_event *hwc,
++static inline int match_prev_assignment(struct perf_event *event,
+                                       struct cpu_hw_events *cpuc,
+                                       int i)
+ {
++      struct hw_perf_event *hwc = &event->hw;
++
+       return hwc->idx == cpuc->assign[i] &&
+-              hwc->last_cpu == smp_processor_id() &&
+-              hwc->last_tag == cpuc->tags[i];
++             hwc->last_cpu == smp_processor_id() &&
++             hwc->last_tag == cpuc->tags[i] &&
++             !is_acr_event_group(event);
+ }
+ static void x86_pmu_start(struct perf_event *event, int flags);
+@@ -1346,7 +1349,7 @@ static void x86_pmu_enable(struct pmu *p
+                        * - no other event has used the counter since
+                        */
+                       if (hwc->idx == -1 ||
+-                          match_prev_assignment(hwc, cpuc, i))
++                          match_prev_assignment(event, cpuc, i))
+                               continue;
+                       /*
+@@ -1367,7 +1370,7 @@ static void x86_pmu_enable(struct pmu *p
+                       event = cpuc->event_list[i];
+                       hwc = &event->hw;
+-                      if (!match_prev_assignment(hwc, cpuc, i))
++                      if (!match_prev_assignment(event, cpuc, i))
+                               x86_assign_hw_event(event, cpuc, i);
+                       else if (i < n_running)
+                               continue;
diff --git a/queue-7.0/perf-x86-intel-disable-pmi-for-self-reloaded-acr-events.patch b/queue-7.0/perf-x86-intel-disable-pmi-for-self-reloaded-acr-events.patch
new file mode 100644 (file)
index 0000000..1272f78
--- /dev/null
@@ -0,0 +1,93 @@
+From 1271aeccc307066315b2d3b0d5af2510e27018b5 Mon Sep 17 00:00:00 2001
+From: Dapeng Mi <dapeng1.mi@linux.intel.com>
+Date: Thu, 30 Apr 2026 08:25:56 +0800
+Subject: perf/x86/intel: Disable PMI for self-reloaded ACR events
+
+From: Dapeng Mi <dapeng1.mi@linux.intel.com>
+
+commit 1271aeccc307066315b2d3b0d5af2510e27018b5 upstream.
+
+On platforms with Auto Counter Reload (ACR) support, such as NVL, a
+"NMI received for unknown reason 30" warning is observed when running
+multiple events in a group with ACR enabled:
+
+  $ perf record -e '{instructions/period=20000,acr_mask=0x2/u,\
+    cycles/period=40000,acr_mask=0x3/u}' ./test
+
+The warning occurs because the Performance Monitoring Interrupt (PMI)
+is enabled for the self-reloaded event (the cycles event in this case).
+According to the Intel SDM, the overflow bit
+(IA32_PERF_GLOBAL_STATUS.PMCn_OVF) is never set for self-reloaded events.
+Since the bit is not set, the perf NMI handler cannot identify the source
+of the interrupt, leading to the "unknown reason" message.
+
+Furthermore, enabling PMI for self-reloaded events is unnecessary and
+can lead to extraneous records that pollute the user's requested data.
+
+Disable the interrupt bit for all events configured with ACR self-reload.
+
+Fixes: ec980e4facef ("perf/x86/intel: Support auto counter reload")
+Reported-by: Andi Kleen <ak@linux.intel.com>
+Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: stable@vger.kernel.org
+Link: https://patch.msgid.link/20260430002558.712334-4-dapeng1.mi@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/events/intel/core.c |   17 +++++++++++++----
+ arch/x86/events/perf_event.h |   10 ++++++++++
+ 2 files changed, 23 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/events/intel/core.c
++++ b/arch/x86/events/intel/core.c
+@@ -3118,11 +3118,11 @@ static void intel_pmu_enable_fixed(struc
+       intel_set_masks(event, idx);
+       /*
+-       * Enable IRQ generation (0x8), if not PEBS,
+-       * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
+-       * if requested:
++       * Enable IRQ generation (0x8), if not PEBS or self-reloaded
++       * ACR event, and enable ring-3 counting (0x2) and ring-0
++       * counting (0x1) if requested:
+        */
+-      if (!event->attr.precise_ip)
++      if (!event->attr.precise_ip && !is_acr_self_reload_event(event))
+               bits |= INTEL_FIXED_0_ENABLE_PMI;
+       if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
+               bits |= INTEL_FIXED_0_USER;
+@@ -3306,6 +3306,15 @@ static void intel_pmu_enable_event(struc
+               intel_set_masks(event, idx);
+               static_call_cond(intel_pmu_enable_acr_event)(event);
+               static_call_cond(intel_pmu_enable_event_ext)(event);
++              /*
++               * For self-reloaded ACR event, don't enable PMI since
++               * HW won't set overflow bit in GLOBAL_STATUS. Otherwise,
++               * the PMI would be recognized as a suspicious NMI.
++               */
++              if (is_acr_self_reload_event(event))
++                      hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
++              else if (!event->attr.precise_ip)
++                      hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
+               __x86_pmu_enable_event(hwc, enable_mask);
+               break;
+       case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1:
+--- a/arch/x86/events/perf_event.h
++++ b/arch/x86/events/perf_event.h
+@@ -137,6 +137,16 @@ static inline bool is_acr_event_group(st
+       return check_leader_group(event->group_leader, PERF_X86_EVENT_ACR);
+ }
++static inline bool is_acr_self_reload_event(struct perf_event *event)
++{
++      struct hw_perf_event *hwc = &event->hw;
++
++      if (hwc->idx < 0)
++              return false;
++
++      return test_bit(hwc->idx, (unsigned long *)&hwc->config1);
++}
++
+ struct amd_nb {
+       int nb_id;  /* NorthBridge id */
+       int refcnt; /* reference count */
diff --git a/queue-7.0/perf-x86-intel-enable-auto-counter-reload-for-dmr.patch b/queue-7.0/perf-x86-intel-enable-auto-counter-reload-for-dmr.patch
new file mode 100644 (file)
index 0000000..3582d66
--- /dev/null
@@ -0,0 +1,39 @@
+From aa4384bc8f4360167f3c3d5322121fe892289ea2 Mon Sep 17 00:00:00 2001
+From: Dapeng Mi <dapeng1.mi@linux.intel.com>
+Date: Thu, 30 Apr 2026 08:25:57 +0800
+Subject: perf/x86/intel: Enable auto counter reload for DMR
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Dapeng Mi <dapeng1.mi@linux.intel.com>
+
+commit aa4384bc8f4360167f3c3d5322121fe892289ea2 upstream.
+
+Panther cove Âµarch starts to support auto counter reload (ACR), but the
+static_call intel_pmu_enable_acr_event() is not updated for the Panther
+Cove Âµarch used by DMR. It leads to the auto counter reload is not
+really enabled on DMR.
+
+Update static_call intel_pmu_enable_acr_event() in intel_pmu_init_pnc().
+
+Fixes: d345b6bb8860 ("perf/x86/intel: Add core PMU support for DMR")
+Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: stable@vger.kernel.org
+Link: https://patch.msgid.link/20260430002558.712334-5-dapeng1.mi@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/events/intel/core.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/x86/events/intel/core.c
++++ b/arch/x86/events/intel/core.c
+@@ -7525,6 +7525,7 @@ static __always_inline void intel_pmu_in
+       hybrid(pmu, event_constraints) = intel_pnc_event_constraints;
+       hybrid(pmu, pebs_constraints) = intel_pnc_pebs_event_constraints;
+       hybrid(pmu, extra_regs) = intel_pnc_extra_regs;
++      static_call_update(intel_pmu_enable_acr_event, intel_pmu_enable_acr);
+ }
+ static __always_inline void intel_pmu_init_skt(struct pmu *pmu)
diff --git a/queue-7.0/power-supply-max17042-avoid-overflow-when-determining-health.patch b/queue-7.0/power-supply-max17042-avoid-overflow-when-determining-health.patch
new file mode 100644 (file)
index 0000000..6becf6b
--- /dev/null
@@ -0,0 +1,41 @@
+From 9a44949da669708f19d29141e65b3ac774d08f5a Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Andr=C3=A9=20Draszik?= <andre.draszik@linaro.org>
+Date: Mon, 2 Mar 2026 13:32:05 +0000
+Subject: power: supply: max17042: avoid overflow when determining health
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: André Draszik <andre.draszik@linaro.org>
+
+commit 9a44949da669708f19d29141e65b3ac774d08f5a upstream.
+
+If vmax has the default value of INT_MAX (e.g. because not specified in
+DT), battery health is reported as over-voltage. This is because adding
+any value to vmax (the vmax tolerance in this case) causes it to wrap
+around, making it negative and smaller than the measured battery
+voltage.
+
+Avoid that by using size_add().
+
+Fixes: edd4ab055931 ("power: max17042_battery: add HEALTH and TEMP_* properties support")
+Cc: stable@vger.kernel.org
+Signed-off-by: André Draszik <andre.draszik@linaro.org>
+Link: https://patch.msgid.link/20260302-max77759-fg-v3-6-3c5f01dbda23@linaro.org
+Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/power/supply/max17042_battery.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/power/supply/max17042_battery.c
++++ b/drivers/power/supply/max17042_battery.c
+@@ -201,7 +201,7 @@ static int max17042_get_battery_health(s
+               goto out;
+       }
+-      if (vbatt > chip->pdata->vmax + MAX17042_VMAX_TOLERANCE) {
++      if (vbatt > size_add(chip->pdata->vmax, MAX17042_VMAX_TOLERANCE)) {
+               *health = POWER_SUPPLY_HEALTH_OVERVOLTAGE;
+               goto out;
+       }
diff --git a/queue-7.0/powerpc-xive-fix-kmemleak-caused-by-incorrect-chip_data-lookup.patch b/queue-7.0/powerpc-xive-fix-kmemleak-caused-by-incorrect-chip_data-lookup.patch
new file mode 100644 (file)
index 0000000..5021ff6
--- /dev/null
@@ -0,0 +1,109 @@
+From 6771c54728c278bf1e4bfdab4fddbbb186e33498 Mon Sep 17 00:00:00 2001
+From: Nilay Shroff <nilay@linux.ibm.com>
+Date: Wed, 11 Mar 2026 19:13:31 +0530
+Subject: powerpc/xive: fix kmemleak caused by incorrect chip_data lookup
+
+From: Nilay Shroff <nilay@linux.ibm.com>
+
+commit 6771c54728c278bf1e4bfdab4fddbbb186e33498 upstream.
+
+The kmemleak reports the following memory leak:
+
+Unreferenced object 0xc0000002a7fbc640 (size 64):
+  comm "kworker/8:1", pid 540, jiffies 4294937872
+  hex dump (first 32 bytes):
+    01 00 00 00 00 00 00 00 00 00 09 04 00 04 00 00  ................
+    00 00 a7 81 00 00 0a c0 00 00 08 04 00 04 00 00  ................
+  backtrace (crc 177d48f6):
+    __kmalloc_cache_noprof+0x520/0x730
+    xive_irq_alloc_data.constprop.0+0x40/0xe0
+    xive_irq_domain_alloc+0xd0/0x1b0
+    irq_domain_alloc_irqs_parent+0x44/0x6c
+    pseries_irq_domain_alloc+0x1cc/0x354
+    irq_domain_alloc_irqs_parent+0x44/0x6c
+    msi_domain_alloc+0xb0/0x220
+    irq_domain_alloc_irqs_locked+0x138/0x4d0
+    __irq_domain_alloc_irqs+0x8c/0xfc
+    __msi_domain_alloc_irqs+0x214/0x4d8
+    msi_domain_alloc_irqs_all_locked+0x70/0xf8
+    pci_msi_setup_msi_irqs+0x60/0x78
+    __pci_enable_msix_range+0x54c/0x98c
+    pci_alloc_irq_vectors_affinity+0x16c/0x1d4
+    nvme_pci_enable+0xac/0x9c0 [nvme]
+    nvme_probe+0x340/0x764 [nvme]
+
+This occurs when allocating MSI-X vectors for an NVMe device. During
+allocation the XIVE code creates a struct xive_irq_data and stores it
+in irq_data->chip_data.
+
+When the MSI-X irqdomain is later freed, xive_irq_free_data() is
+responsible for retrieving this structure and freeing it. However,
+after commit cc0cc23babc9 ("powerpc/xive: Untangle xive from child
+interrupt controller drivers"), xive_irq_free_data() retrieves the
+chip_data using irq_get_chip_data(), which looks up the data through
+the child domain.
+
+This is incorrect because the XIVE-specific irq data is associated with
+the XIVE (parent) domain. As a result the lookup fails and the allocated
+struct xive_irq_data is never freed, leading to the kmemleak report
+shown above.
+
+Fix this by retrieving the irq_data from the correct domain using
+irq_domain_get_irq_data() and then accessing the chip_data via
+irq_data_get_irq_chip_data().
+
+Cc: stable@vger.kernel.org
+Fixes: cc0cc23babc9 ("powerpc/xive: Untangle xive from child interrupt controller drivers")
+Signed-off-by: Nilay Shroff <nilay@linux.ibm.com>
+Tested-by: Venkat Rao Bagalkote <venkat88@linux.ibm.com>
+Reviewed-by: Nam Cao <namcao@linutronix.de>
+Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com>
+Link: https://patch.msgid.link/20260311134336.326996-1-nilay@linux.ibm.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/powerpc/sysdev/xive/common.c |   16 +++++++++++-----
+ 1 file changed, 11 insertions(+), 5 deletions(-)
+
+--- a/arch/powerpc/sysdev/xive/common.c
++++ b/arch/powerpc/sysdev/xive/common.c
+@@ -1038,13 +1038,19 @@ static struct xive_irq_data *xive_irq_al
+       return xd;
+ }
+-static void xive_irq_free_data(unsigned int virq)
++static void xive_irq_free_data(struct irq_domain *domain, unsigned int virq)
+ {
+-      struct xive_irq_data *xd = irq_get_chip_data(virq);
++      struct xive_irq_data *xd;
++      struct irq_data *data = irq_domain_get_irq_data(domain, virq);
++
++      if (!data)
++              return;
++      xd = irq_data_get_irq_chip_data(data);
+       if (!xd)
+               return;
+-      irq_set_chip_data(virq, NULL);
++
++      irq_domain_reset_irq_data(data);
+       xive_cleanup_irq_data(xd);
+       kfree(xd);
+ }
+@@ -1305,7 +1311,7 @@ static int xive_irq_domain_map(struct ir
+ static void xive_irq_domain_unmap(struct irq_domain *d, unsigned int virq)
+ {
+-      xive_irq_free_data(virq);
++      xive_irq_free_data(d, virq);
+ }
+ static int xive_irq_domain_xlate(struct irq_domain *h, struct device_node *ct,
+@@ -1443,7 +1449,7 @@ static void xive_irq_domain_free(struct
+       pr_debug("%s %d #%d\n", __func__, virq, nr_irqs);
+       for (i = 0; i < nr_irqs; i++)
+-              xive_irq_free_data(virq + i);
++              xive_irq_free_data(domain, virq + i);
+ }
+ #endif
diff --git a/queue-7.0/rdma-ionic-bound-node_desc-sysfs-read-with-.64s.patch b/queue-7.0/rdma-ionic-bound-node_desc-sysfs-read-with-.64s.patch
new file mode 100644 (file)
index 0000000..94c3450
--- /dev/null
@@ -0,0 +1,48 @@
+From 654a27f25530d052eeedf086e6c3e2d585c203bd Mon Sep 17 00:00:00 2001
+From: Kai Zen <kai.aizen.dev@gmail.com>
+Date: Tue, 7 Apr 2026 12:20:22 +0300
+Subject: RDMA/ionic: bound node_desc sysfs read with %.64s
+
+From: Kai Zen <kai.aizen.dev@gmail.com>
+
+commit 654a27f25530d052eeedf086e6c3e2d585c203bd upstream.
+
+node_desc[64] in struct ib_device is not guaranteed to be NUL-
+terminated. The core IB sysfs handler uses "%.64s" for exactly this
+reason (drivers/infiniband/core/sysfs.c:1307), since node_desc_store()
+performs a raw memcpy of up to IB_DEVICE_NODE_DESC_MAX bytes with no NUL
+termination:
+
+  memcpy(desc.node_desc, buf, min_t(int, count, IB_DEVICE_NODE_DESC_MAX));
+
+If exactly 64 bytes are written via the node_desc sysfs file, the array
+contains no NUL byte. The ionic hca_type_show() handler uses unbounded
+"%s" and will read past the end of node_desc into adjacent fields of
+struct ib_device until it encounters a NUL.
+
+ionic supports IB_DEVICE_MODIFY_NODE_DESC, so this is triggerable by
+userspace.
+
+Match the core handler and bound the format specifier.
+
+Cc: stable@vger.kernel.org
+Fixes: 2075bbe8ef03 ("RDMA/ionic: Register device ops for miscellaneous functionality")
+Link: https://patch.msgid.link/r/CALynFi7NAbhDCt1tdaDbf6TnLvAqbaHa6-Wqf6OkzREbA_PAfg@mail.gmail.com
+Signed-off-by: Kai Aizen <kai.aizen.dev@gmail.com>
+Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/infiniband/hw/ionic/ionic_ibdev.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/infiniband/hw/ionic/ionic_ibdev.c
++++ b/drivers/infiniband/hw/ionic/ionic_ibdev.c
+@@ -185,7 +185,7 @@ static ssize_t hca_type_show(struct devi
+       struct ionic_ibdev *dev =
+               rdma_device_to_drv_device(device, struct ionic_ibdev, ibdev);
+-      return sysfs_emit(buf, "%s\n", dev->ibdev.node_desc);
++      return sysfs_emit(buf, "%s.64\n", dev->ibdev.node_desc);
+ }
+ static DEVICE_ATTR_RO(hca_type);
diff --git a/queue-7.0/rdma-ionic-fix-typo-in-format-string.patch b/queue-7.0/rdma-ionic-fix-typo-in-format-string.patch
new file mode 100644 (file)
index 0000000..41e5f10
--- /dev/null
@@ -0,0 +1,33 @@
+From 70f780edcd1e86350202d8a409de026b2d2e2067 Mon Sep 17 00:00:00 2001
+From: Jason Gunthorpe <jgg@nvidia.com>
+Date: Tue, 28 Apr 2026 13:17:34 -0300
+Subject: RDMA/ionic: Fix typo in format string
+
+From: Jason Gunthorpe <jgg@nvidia.com>
+
+commit 70f780edcd1e86350202d8a409de026b2d2e2067 upstream.
+
+Applying the corrupted patch by hand mangled the format string, put the s
+in the right place.
+
+Cc: stable@vger.kernel.org
+Fixes: 654a27f25530 ("RDMA/ionic: bound node_desc sysfs read with %.64s")
+Link: https://patch.msgid.link/r/1-v1-41f3135e5565+9d2-rdma_ai_fixes1_jgg@nvidia.com
+Reported-by: Brad Spengler <brad.spengler@opensrcsec.com>
+Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/infiniband/hw/ionic/ionic_ibdev.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/infiniband/hw/ionic/ionic_ibdev.c
++++ b/drivers/infiniband/hw/ionic/ionic_ibdev.c
+@@ -185,7 +185,7 @@ static ssize_t hca_type_show(struct devi
+       struct ionic_ibdev *dev =
+               rdma_device_to_drv_device(device, struct ionic_ibdev, ibdev);
+-      return sysfs_emit(buf, "%s.64\n", dev->ibdev.node_desc);
++      return sysfs_emit(buf, "%.64s\n", dev->ibdev.node_desc);
+ }
+ static DEVICE_ATTR_RO(hca_type);
diff --git a/queue-7.0/rdma-mana-fix-error-unwind-in-mana_ib_create_qp_rss.patch b/queue-7.0/rdma-mana-fix-error-unwind-in-mana_ib_create_qp_rss.patch
new file mode 100644 (file)
index 0000000..ab4ad46
--- /dev/null
@@ -0,0 +1,42 @@
+From 6aaa978c6b6218cfac15fe1dab17c76fe229ce3f Mon Sep 17 00:00:00 2001
+From: Jason Gunthorpe <jgg@nvidia.com>
+Date: Tue, 28 Apr 2026 13:17:40 -0300
+Subject: RDMA/mana: Fix error unwind in mana_ib_create_qp_rss()
+
+From: Jason Gunthorpe <jgg@nvidia.com>
+
+commit 6aaa978c6b6218cfac15fe1dab17c76fe229ce3f upstream.
+
+Sashiko points out that mana_ib_cfg_vport_steering() is leaked, the normal
+destroy path cleans it up.
+
+Cc: stable@vger.kernel.org
+Fixes: 0266a177631d ("RDMA/mana_ib: Add a driver for Microsoft Azure Network Adapter")
+Link: https://sashiko.dev/#/patchset/0-v1-e911b76a94d1%2B65d95-rdma_udata_rep_jgg%40nvidia.com?part=4
+Link: https://patch.msgid.link/r/7-v1-41f3135e5565+9d2-rdma_ai_fixes1_jgg@nvidia.com
+Reviewed-by: Long Li <longli@microsoft.com>
+Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/infiniband/hw/mana/qp.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/drivers/infiniband/hw/mana/qp.c
++++ b/drivers/infiniband/hw/mana/qp.c
+@@ -235,13 +235,15 @@ static int mana_ib_create_qp_rss(struct
+               ibdev_dbg(&mdev->ib_dev,
+                         "Failed to copy to udata create rss-qp, %d\n",
+                         ret);
+-              goto fail;
++              goto err_disable_vport_rx;
+       }
+       kfree(mana_ind_table);
+       return 0;
++err_disable_vport_rx:
++      mana_disable_vport_rx(mpc);
+ fail:
+       while (i-- > 0) {
+               ibwq = ind_tbl->ind_tbl[i];
diff --git a/queue-7.0/rdma-mana-fix-mana_destroy_wq_obj-cleanup-in-mana_ib_create_qp_rss.patch b/queue-7.0/rdma-mana-fix-mana_destroy_wq_obj-cleanup-in-mana_ib_create_qp_rss.patch
new file mode 100644 (file)
index 0000000..e46b62f
--- /dev/null
@@ -0,0 +1,56 @@
+From 34ecf795692ee57c393109f4a24ccc313091e137 Mon Sep 17 00:00:00 2001
+From: Jason Gunthorpe <jgg@nvidia.com>
+Date: Tue, 28 Apr 2026 13:17:39 -0300
+Subject: RDMA/mana: Fix mana_destroy_wq_obj() cleanup in mana_ib_create_qp_rss()
+
+From: Jason Gunthorpe <jgg@nvidia.com>
+
+commit 34ecf795692ee57c393109f4a24ccc313091e137 upstream.
+
+Sashiko points out there are two bugs here in the error unwind flow, both
+related to how the WQ table is unwound.
+
+First there is a double i-- on the first failure path due to the while loop
+having a i--, remove it.
+
+Second if mana_ib_install_cq_cb() fails then mana_create_wq_obj() is not
+undone due to the above i--.
+
+Cc: stable@vger.kernel.org
+Fixes: c15d7802a424 ("RDMA/mana_ib: Add CQ interrupt support for RAW QP")
+Link: https://sashiko.dev/#/patchset/0-v2-1c49eeb88c48%2B91-rdma_udata_rep_jgg%40nvidia.com?part=1
+Link: https://patch.msgid.link/r/6-v1-41f3135e5565+9d2-rdma_ai_fixes1_jgg@nvidia.com
+Reviewed-by: Long Li <longli@microsoft.com>
+Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/infiniband/hw/mana/qp.c |    9 ++++-----
+ 1 file changed, 4 insertions(+), 5 deletions(-)
+
+--- a/drivers/infiniband/hw/mana/qp.c
++++ b/drivers/infiniband/hw/mana/qp.c
+@@ -193,11 +193,8 @@ static int mana_ib_create_qp_rss(struct
+               ret = mana_create_wq_obj(mpc, mpc->port_handle, GDMA_RQ,
+                                        &wq_spec, &cq_spec, &wq->rx_object);
+-              if (ret) {
+-                      /* Do cleanup starting with index i-1 */
+-                      i--;
++              if (ret)
+                       goto fail;
+-              }
+               /* The GDMA regions are now owned by the WQ object */
+               wq->queue.gdma_region = GDMA_INVALID_DMA_REGION;
+@@ -217,8 +214,10 @@ static int mana_ib_create_qp_rss(struct
+               /* Create CQ table entry */
+               ret = mana_ib_install_cq_cb(mdev, cq);
+-              if (ret)
++              if (ret) {
++                      mana_destroy_wq_obj(mpc, GDMA_RQ, wq->rx_object);
+                       goto fail;
++              }
+       }
+       resp.num_entries = i;
diff --git a/queue-7.0/rdma-mana-remove-user-triggerable-warn_on-in-mana_ib_create_qp_rss.patch b/queue-7.0/rdma-mana-remove-user-triggerable-warn_on-in-mana_ib_create_qp_rss.patch
new file mode 100644 (file)
index 0000000..3d67efb
--- /dev/null
@@ -0,0 +1,40 @@
+From 159f2efabc89d3f931d38f2d35876535d4abf0a3 Mon Sep 17 00:00:00 2001
+From: Jason Gunthorpe <jgg@nvidia.com>
+Date: Tue, 28 Apr 2026 13:17:38 -0300
+Subject: RDMA/mana: Remove user triggerable WARN_ON() in mana_ib_create_qp_rss()
+
+From: Jason Gunthorpe <jgg@nvidia.com>
+
+commit 159f2efabc89d3f931d38f2d35876535d4abf0a3 upstream.
+
+Sashiko points out that the user can specify WQs sharing the same CQ as a
+part of the uAPI and this will trigger the WARN_ON() then go on to corrupt
+the kernel.
+
+Just reject it outright and fail the QP creation.
+
+Cc: stable@vger.kernel.org
+Fixes: c15d7802a424 ("RDMA/mana_ib: Add CQ interrupt support for RAW QP")
+Link: https://sashiko.dev/#/patchset/0-v2-1c49eeb88c48%2B91-rdma_udata_rep_jgg%40nvidia.com?part=1
+Link: https://patch.msgid.link/r/5-v1-41f3135e5565+9d2-rdma_ai_fixes1_jgg@nvidia.com
+Reviewed-by: Long Li <longli@microsoft.com>
+Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/infiniband/hw/mana/cq.c |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/drivers/infiniband/hw/mana/cq.c
++++ b/drivers/infiniband/hw/mana/cq.c
+@@ -142,8 +142,9 @@ int mana_ib_install_cq_cb(struct mana_ib
+       if (cq->queue.id >= gc->max_num_cqs)
+               return -EINVAL;
+-      /* Create CQ table entry */
+-      WARN_ON(gc->cq_table[cq->queue.id]);
++      /* Create CQ table entry, sharing a CQ between WQs is not supported */
++      if (gc->cq_table[cq->queue.id])
++              return -EINVAL;
+       if (cq->queue.kmem)
+               gdma_cq = cq->queue.kmem;
+       else
diff --git a/queue-7.0/rdma-mana-validate-rx_hash_key_len.patch b/queue-7.0/rdma-mana-validate-rx_hash_key_len.patch
new file mode 100644 (file)
index 0000000..fa5efa7
--- /dev/null
@@ -0,0 +1,36 @@
+From 6dd2d4ad9c8429523b1c220c5132bd551c006425 Mon Sep 17 00:00:00 2001
+From: Jason Gunthorpe <jgg@nvidia.com>
+Date: Tue, 28 Apr 2026 13:17:37 -0300
+Subject: RDMA/mana: Validate rx_hash_key_len
+
+From: Jason Gunthorpe <jgg@nvidia.com>
+
+commit 6dd2d4ad9c8429523b1c220c5132bd551c006425 upstream.
+
+Sashiko points out that rx_hash_key_len comes from a uAPI structure and is
+blindly passed to memcpy, allowing the userspace to trash kernel
+memory. Bounds check it so the memcpy cannot overflow.
+
+Cc: stable@vger.kernel.org
+Fixes: 0266a177631d ("RDMA/mana_ib: Add a driver for Microsoft Azure Network Adapter")
+Link: https://sashiko.dev/#/patchset/0-v2-1c49eeb88c48%2B91-rdma_udata_rep_jgg%40nvidia.com?part=1
+Link: https://patch.msgid.link/r/4-v1-41f3135e5565+9d2-rdma_ai_fixes1_jgg@nvidia.com
+Reviewed-by: Long Li <longli@microsoft.com>
+Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/infiniband/hw/mana/qp.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/drivers/infiniband/hw/mana/qp.c
++++ b/drivers/infiniband/hw/mana/qp.c
+@@ -21,6 +21,9 @@ static int mana_ib_cfg_vport_steering(st
+       gc = mdev_to_gc(dev);
++      if (rx_hash_key_len > sizeof(req->hashkey))
++              return -EINVAL;
++
+       req_buf_size = struct_size(req, indir_tab, MANA_INDIRECT_TABLE_DEF_SIZE);
+       req = kzalloc(req_buf_size, GFP_KERNEL);
+       if (!req)
diff --git a/queue-7.0/rdma-mlx4-fix-mis-use-of-rcu-in-mlx4_srq_event.patch b/queue-7.0/rdma-mlx4-fix-mis-use-of-rcu-in-mlx4_srq_event.patch
new file mode 100644 (file)
index 0000000..277b8d1
--- /dev/null
@@ -0,0 +1,60 @@
+From c9341307ea16b9395c2e4c9c94d8499d91fe31d0 Mon Sep 17 00:00:00 2001
+From: Jason Gunthorpe <jgg@nvidia.com>
+Date: Tue, 28 Apr 2026 13:17:45 -0300
+Subject: RDMA/mlx4: Fix mis-use of RCU in mlx4_srq_event()
+
+From: Jason Gunthorpe <jgg@nvidia.com>
+
+commit c9341307ea16b9395c2e4c9c94d8499d91fe31d0 upstream.
+
+Sashiko points out the radix_tree itself is RCU safe, but nothing ever
+frees the mlx4_srq struct with RCU, and it isn't even accessed within the
+RCU critical section. It also will crash if an event is delivered before
+the srq object is finished initializing.
+
+Use the spinlock since it isn't easy to make RCU work, use
+refcount_inc_not_zero() to protect against partially initialized objects,
+and order the refcount_set() to be after the srq is fully initialized.
+
+Cc: stable@vger.kernel.org
+Fixes: 30353bfc43a1 ("net/mlx4_core: Use RCU to perform radix tree lookup for SRQ")
+Link: https://sashiko.dev/#/patchset/0-v2-1c49eeb88c48%2B91-rdma_udata_rep_jgg%40nvidia.com?part=5
+Link: https://patch.msgid.link/r/12-v1-41f3135e5565+9d2-rdma_ai_fixes1_jgg@nvidia.com
+Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx4/srq.c |   13 +++++++------
+ 1 file changed, 7 insertions(+), 6 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx4/srq.c
++++ b/drivers/net/ethernet/mellanox/mlx4/srq.c
+@@ -44,13 +44,14 @@ void mlx4_srq_event(struct mlx4_dev *dev
+ {
+       struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table;
+       struct mlx4_srq *srq;
++      unsigned long flags;
+-      rcu_read_lock();
++      spin_lock_irqsave(&srq_table->lock, flags);
+       srq = radix_tree_lookup(&srq_table->tree, srqn & (dev->caps.num_srqs - 1));
+-      rcu_read_unlock();
+-      if (srq)
+-              refcount_inc(&srq->refcount);
+-      else {
++      if (!srq || !refcount_inc_not_zero(&srq->refcount))
++              srq = NULL;
++      spin_unlock_irqrestore(&srq_table->lock, flags);
++      if (!srq) {
+               mlx4_warn(dev, "Async event for bogus SRQ %08x\n", srqn);
+               return;
+       }
+@@ -203,8 +204,8 @@ int mlx4_srq_alloc(struct mlx4_dev *dev,
+       if (err)
+               goto err_radix;
+-      refcount_set(&srq->refcount, 1);
+       init_completion(&srq->free);
++      refcount_set_release(&srq->refcount, 1);
+       return 0;
diff --git a/queue-7.0/rdma-mlx4-fix-resource-leak-on-error-in-mlx4_ib_create_srq.patch b/queue-7.0/rdma-mlx4-fix-resource-leak-on-error-in-mlx4_ib_create_srq.patch
new file mode 100644 (file)
index 0000000..5a284c7
--- /dev/null
@@ -0,0 +1,41 @@
+From c54c7e4cb679c0aaa1cb489b9c3f2cd98e63a44c Mon Sep 17 00:00:00 2001
+From: Jason Gunthorpe <jgg@nvidia.com>
+Date: Tue, 28 Apr 2026 13:17:44 -0300
+Subject: RDMA/mlx4: Fix resource leak on error in mlx4_ib_create_srq()
+
+From: Jason Gunthorpe <jgg@nvidia.com>
+
+commit c54c7e4cb679c0aaa1cb489b9c3f2cd98e63a44c upstream.
+
+Sashiko points out that mlx4_srq_alloc() was not undone during error
+unwind, add the missing call to mlx4_srq_free().
+
+Cc: stable@vger.kernel.org
+Fixes: 225c7b1feef1 ("IB/mlx4: Add a driver Mellanox ConnectX InfiniBand adapters")
+Link: https://sashiko.dev/#/patchset/0-v1-e911b76a94d1%2B65d95-rdma_udata_rep_jgg%40nvidia.com?part=8
+Link: https://patch.msgid.link/r/11-v1-41f3135e5565+9d2-rdma_ai_fixes1_jgg@nvidia.com
+Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/infiniband/hw/mlx4/srq.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/drivers/infiniband/hw/mlx4/srq.c
++++ b/drivers/infiniband/hw/mlx4/srq.c
+@@ -193,13 +193,15 @@ int mlx4_ib_create_srq(struct ib_srq *ib
+       if (udata)
+               if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof (__u32))) {
+                       err = -EFAULT;
+-                      goto err_wrid;
++                      goto err_srq;
+               }
+       init_attr->attr.max_wr = srq->msrq.max - 1;
+       return 0;
++err_srq:
++      mlx4_srq_free(dev->dev, &srq->msrq);
+ err_wrid:
+       if (udata)
+               mlx4_ib_db_unmap_user(ucontext, &srq->db);
diff --git a/queue-7.0/rdma-mlx5-fix-error-path-fall-through-in-mlx5_ib_dev_res_srq_init.patch b/queue-7.0/rdma-mlx5-fix-error-path-fall-through-in-mlx5_ib_dev_res_srq_init.patch
new file mode 100644 (file)
index 0000000..dcf73f6
--- /dev/null
@@ -0,0 +1,43 @@
+From c488df06bd552bb8b6e14fa0cfd5ad986c6e9525 Mon Sep 17 00:00:00 2001
+From: Junrui Luo <moonafterrain@outlook.com>
+Date: Fri, 24 Apr 2026 13:51:02 +0800
+Subject: RDMA/mlx5: Fix error path fall-through in mlx5_ib_dev_res_srq_init()
+
+From: Junrui Luo <moonafterrain@outlook.com>
+
+commit c488df06bd552bb8b6e14fa0cfd5ad986c6e9525 upstream.
+
+mlx5_ib_dev_res_srq_init() allocates two SRQs, s0 and s1. When
+ib_create_srq() fails for s1, the error branch destroys s0 but falls
+through and unconditionally assigns the freed s0 and the ERR_PTR s1 to
+devr->s0 and devr->s1.
+
+This leads to several problems: the lock-free fast path checks
+"if (devr->s1) return 0;" and treats the ERR_PTR as already initialised;
+users in mlx5_ib_create_qp() dereference the freed SRQ or ERR_PTR via
+to_msrq(devr->s0)->msrq.srqn; and mlx5_ib_dev_res_cleanup() dereferences
+the ERR_PTR and double-frees s0 on teardown.
+
+Fix by adding the same `goto unlock` in the s1 failure path.
+
+Cc: stable@vger.kernel.org
+Fixes: 5895e70f2e6e ("IB/mlx5: Allocate resources just before first QP/SRQ is created")
+Link: https://patch.msgid.link/r/SYBPR01MB7881E1E0970268BD69C0BA75AF2B2@SYBPR01MB7881.ausprd01.prod.outlook.com
+Reported-by: Yuhao Jiang <danisjiang@gmail.com>
+Signed-off-by: Junrui Luo <moonafterrain@outlook.com>
+Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/infiniband/hw/mlx5/main.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/infiniband/hw/mlx5/main.c
++++ b/drivers/infiniband/hw/mlx5/main.c
+@@ -3380,6 +3380,7 @@ int mlx5_ib_dev_res_srq_init(struct mlx5
+                           "Couldn't create SRQ 1 for res init, err=%pe\n",
+                           s1);
+               ib_destroy_srq(s0);
++              goto unlock;
+       }
+       devr->s0 = s0;
diff --git a/queue-7.0/rdma-ocrdma-don-t-null-deref-uctx-on-errors-in-ocrdma_copy_pd_uresp.patch b/queue-7.0/rdma-ocrdma-don-t-null-deref-uctx-on-errors-in-ocrdma_copy_pd_uresp.patch
new file mode 100644 (file)
index 0000000..761b867
--- /dev/null
@@ -0,0 +1,37 @@
+From 34fbf48cf3b410d2a6e8c586fa952a36331ca5ba Mon Sep 17 00:00:00 2001
+From: Jason Gunthorpe <jgg@nvidia.com>
+Date: Tue, 28 Apr 2026 13:17:42 -0300
+Subject: RDMA/ocrdma: Don't NULL deref uctx on errors in ocrdma_copy_pd_uresp()
+
+From: Jason Gunthorpe <jgg@nvidia.com>
+
+commit 34fbf48cf3b410d2a6e8c586fa952a36331ca5ba upstream.
+
+Sashiko points out that pd->uctx isn't initialized until late in the
+function so all these error flow references are NULL and will crash. Use
+the uctx that isn't NULL.
+
+Cc: stable@vger.kernel.org
+Fixes: fe2caefcdf58 ("RDMA/ocrdma: Add driver for Emulex OneConnect IBoE RDMA adapter")
+Link: https://sashiko.dev/#/patchset/0-v1-e911b76a94d1%2B65d95-rdma_udata_rep_jgg%40nvidia.com?part=4
+Link: https://patch.msgid.link/r/9-v1-41f3135e5565+9d2-rdma_ai_fixes1_jgg@nvidia.com
+Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/infiniband/hw/ocrdma/ocrdma_verbs.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
++++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+@@ -620,9 +620,9 @@ static int ocrdma_copy_pd_uresp(struct o
+ ucopy_err:
+       if (pd->dpp_enabled)
+-              ocrdma_del_mmap(pd->uctx, dpp_page_addr, PAGE_SIZE);
++              ocrdma_del_mmap(uctx, dpp_page_addr, PAGE_SIZE);
+ dpp_map_err:
+-      ocrdma_del_mmap(pd->uctx, db_page_addr, db_page_size);
++      ocrdma_del_mmap(uctx, db_page_addr, db_page_size);
+       return status;
+ }
diff --git a/queue-7.0/rdma-rxe-reject-non-8-byte-atomic_write-payloads.patch b/queue-7.0/rdma-rxe-reject-non-8-byte-atomic_write-payloads.patch
new file mode 100644 (file)
index 0000000..499c51f
--- /dev/null
@@ -0,0 +1,69 @@
+From 1114c87aa6f195cf07da55a27b2122ae26557b26 Mon Sep 17 00:00:00 2001
+From: Michael Bommarito <michael.bommarito@gmail.com>
+Date: Sat, 18 Apr 2026 12:21:41 -0400
+Subject: RDMA/rxe: Reject non-8-byte ATOMIC_WRITE payloads
+
+From: Michael Bommarito <michael.bommarito@gmail.com>
+
+commit 1114c87aa6f195cf07da55a27b2122ae26557b26 upstream.
+
+atomic_write_reply() at drivers/infiniband/sw/rxe/rxe_resp.c
+unconditionally dereferences 8 bytes at payload_addr(pkt):
+
+    value = *(u64 *)payload_addr(pkt);
+
+check_rkey() previously accepted an ATOMIC_WRITE request with pktlen ==
+resid == 0 because the length validation only compared pktlen against
+resid. A remote initiator that sets the RETH length to 0 therefore reaches
+atomic_write_reply() with a zero-byte logical payload, and the responder
+reads sizeof(u64) bytes from past the logical end of the packet into
+skb->head tailroom, then writes those 8 bytes into the attacker's MR via
+rxe_mr_do_atomic_write(). That is a remote disclosure of 4 bytes of kernel
+tailroom per probe (the other 4 bytes are the packet's own trailing ICRC).
+
+IBA oA19-28 defines ATOMIC_WRITE as exactly 8 bytes. Anything else is
+protocol-invalid. Hoist a strict length check into check_rkey() so the
+responder never reaches the unchecked dereference, and keep the existing
+WRITE-family length logic for the normal RDMA WRITE path.
+
+Reproduced on mainline with an unmodified rxe driver: a sustained
+zero-length ATOMIC_WRITE probe repeatedly leaks adjacent skb head-buffer
+bytes into the attacker's MR, including recognisable kernel strings and
+partial kernel-direct-map pointer words.  With this patch applied the
+responder rejects the PDU and the MR stays all-zero.
+
+Cc: stable@vger.kernel.org
+Fixes: 034e285f8b99 ("RDMA/rxe: Make responder support atomic write on RC service")
+Link: https://patch.msgid.link/r/20260418162141.3610201-1-michael.bommarito@gmail.com
+Assisted-by: Claude:claude-opus-4-7
+Signed-off-by: Michael Bommarito <michael.bommarito@gmail.com>
+Reviewed-by: Zhu Yanjun <yanjun.zhu@linux.dev>
+Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/infiniband/sw/rxe/rxe_resp.c |   14 +++++++++++++-
+ 1 file changed, 13 insertions(+), 1 deletion(-)
+
+--- a/drivers/infiniband/sw/rxe/rxe_resp.c
++++ b/drivers/infiniband/sw/rxe/rxe_resp.c
+@@ -526,7 +526,19 @@ static enum resp_states check_rkey(struc
+       }
+ skip_check_range:
+-      if (pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) {
++      if (pkt->mask & RXE_ATOMIC_WRITE_MASK) {
++              /* IBA oA19-28: ATOMIC_WRITE payload is exactly 8 bytes.
++               * Reject any other length before the responder reads
++               * sizeof(u64) bytes from payload_addr(pkt); a shorter
++               * payload would read past the logical end of the packet
++               * into skb->head tailroom.
++               */
++              if (resid != sizeof(u64) || pktlen != sizeof(u64) ||
++                  bth_pad(pkt)) {
++                      state = RESPST_ERR_LENGTH;
++                      goto err;
++              }
++      } else if (pkt->mask & RXE_WRITE_MASK) {
+               if (resid > mtu) {
+                       if (pktlen != mtu || bth_pad(pkt)) {
+                               state = RESPST_ERR_LENGTH;
diff --git a/queue-7.0/rdma-rxe-reject-unknown-opcodes-before-icrc-processing.patch b/queue-7.0/rdma-rxe-reject-unknown-opcodes-before-icrc-processing.patch
new file mode 100644 (file)
index 0000000..ae4fc0e
--- /dev/null
@@ -0,0 +1,94 @@
+From 4c6f86d85d03cdb33addce86aa69aa795ca6c47a Mon Sep 17 00:00:00 2001
+From: Michael Bommarito <michael.bommarito@gmail.com>
+Date: Tue, 14 Apr 2026 07:15:55 -0400
+Subject: RDMA/rxe: Reject unknown opcodes before ICRC processing
+
+From: Michael Bommarito <michael.bommarito@gmail.com>
+
+commit 4c6f86d85d03cdb33addce86aa69aa795ca6c47a upstream.
+
+Even after applying commit 7244491dab34 ("RDMA/rxe: Validate pad and ICRC
+before payload_size() in rxe_rcv"), a single unauthenticated UDP packet
+can still trigger panic.  That patch handled payload_size() underflow only
+for valid opcodes with short packets, not for packets carrying an unknown
+opcode.  The unknown-opcode OOB read described below predates that commit
+and reaches back to the initial Soft RoCE driver.
+
+The check added there reads
+
+    pkt->paylen < header_size(pkt) + bth_pad(pkt) + RXE_ICRC_SIZE
+
+where header_size(pkt) expands to rxe_opcode[pkt->opcode].length.  The
+rxe_opcode[] array has 256 entries but is only populated for defined IB
+opcodes; any other entry (for example opcode 0xff) is zero-initialized, so
+length == 0 and the check degenerates to
+
+    pkt->paylen < 0 + bth_pad(pkt) + RXE_ICRC_SIZE
+
+which does not constrain pkt->paylen enough.  rxe_icrc_hdr() then computes
+
+    rxe_opcode[pkt->opcode].length - RXE_BTH_BYTES
+
+which underflows when length == 0 and passes a huge value to rxe_crc32(),
+causing an out-of-bounds read of the skb payload.
+
+Reproduced on v7.0-rc7 with that fix applied, QEMU/KVM with
+CONFIG_RDMA_RXE=y and CONFIG_KASAN=y, after
+
+    rdma link add rxe0 type rxe netdev eth0
+
+A single 48-byte UDP packet to port 4791 with BTH opcode=0xff and
+QPN=IB_MULTICAST_QPN triggers:
+
+    BUG: KASAN: slab-out-of-bounds in crc32_le+0x115/0x170
+    Read of size 1 at addr ...
+    The buggy address is located 0 bytes to the right of
+     allocated 704-byte region
+    Call Trace:
+     crc32_le+0x115/0x170
+     rxe_icrc_hdr.isra.0+0x226/0x300
+     rxe_icrc_check+0x13f/0x3a0
+     rxe_rcv+0x6e1/0x16e0
+     rxe_udp_encap_recv+0x20a/0x320
+     udp_queue_rcv_one_skb+0x7ed/0x12c0
+
+Subsequent packets with the same shape fault on unmapped memory and panic
+the kernel.  The trigger requires only module load and "rdma link add"; no
+QP, no connection, and no authentication.
+
+Fix this by rejecting packets whose opcode has no rxe_opcode[] entry,
+detected via the zero mask or zero length, before any length arithmetic
+runs.
+
+Cc: stable@vger.kernel.org
+Fixes: 8700e3e7c485 ("Soft RoCE driver")
+Link: https://patch.msgid.link/r/20260414111555.3386793-1-michael.bommarito@gmail.com
+Assisted-by: Claude:claude-opus-4-6
+Signed-off-by: Michael Bommarito <michael.bommarito@gmail.com>
+Reviewed-by: Zhu Yanjun <yanjun.zhu@linux.dev>
+Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/infiniband/sw/rxe/rxe_recv.c |   11 +++++++++++
+ 1 file changed, 11 insertions(+)
+
+--- a/drivers/infiniband/sw/rxe/rxe_recv.c
++++ b/drivers/infiniband/sw/rxe/rxe_recv.c
+@@ -330,6 +330,17 @@ void rxe_rcv(struct sk_buff *skb)
+       pkt->qp = NULL;
+       pkt->mask |= rxe_opcode[pkt->opcode].mask;
++      /*
++       * Unknown opcodes have a zero-initialized rxe_opcode[] entry, so
++       * both mask and length are 0.  Reject them before any length math:
++       * rxe_icrc_hdr() would otherwise compute length - RXE_BTH_BYTES
++       * and pass the underflowed value to rxe_crc32(), producing an
++       * out-of-bounds read.
++       */
++      if (unlikely(!rxe_opcode[pkt->opcode].mask ||
++                   !rxe_opcode[pkt->opcode].length))
++              goto drop;
++
+       if (unlikely(pkt->paylen < header_size(pkt) + bth_pad(pkt) +
+                      RXE_ICRC_SIZE))
+               goto drop;
diff --git a/queue-7.0/rdma-vmw_pvrdma-fix-double-free-on-pvrdma_alloc_ucontext-error-path.patch b/queue-7.0/rdma-vmw_pvrdma-fix-double-free-on-pvrdma_alloc_ucontext-error-path.patch
new file mode 100644 (file)
index 0000000..002aac0
--- /dev/null
@@ -0,0 +1,33 @@
+From e38e86995df27f1f854063dab1f0c6a513db3faf Mon Sep 17 00:00:00 2001
+From: Jason Gunthorpe <jgg@nvidia.com>
+Date: Tue, 28 Apr 2026 13:17:43 -0300
+Subject: RDMA/vmw_pvrdma: Fix double free on pvrdma_alloc_ucontext() error path
+
+From: Jason Gunthorpe <jgg@nvidia.com>
+
+commit e38e86995df27f1f854063dab1f0c6a513db3faf upstream.
+
+Sashiko points out that pvrdma_uar_free() is already called within
+pvrdma_dealloc_ucontext(), so calling it before triggers a double free.
+
+Cc: stable@vger.kernel.org
+Fixes: 29c8d9eba550 ("IB: Add vmw_pvrdma driver")
+Link: https://sashiko.dev/#/patchset/0-v1-e911b76a94d1%2B65d95-rdma_udata_rep_jgg%40nvidia.com?part=4
+Link: https://patch.msgid.link/r/10-v1-41f3135e5565+9d2-rdma_ai_fixes1_jgg@nvidia.com
+Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
++++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
+@@ -322,7 +322,7 @@ int pvrdma_alloc_ucontext(struct ib_ucon
+       uresp.qp_tab_size = vdev->dsr->caps.max_qp;
+       ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+       if (ret) {
+-              pvrdma_uar_free(vdev, &context->uar);
++              /* pvrdma_dealloc_ucontext() also frees the UAR */
+               pvrdma_dealloc_ucontext(&context->ibucontext);
+               return -EFAULT;
+       }
diff --git a/queue-7.0/remoteproc-imx_rproc-fix-null-vs-is_err-bug-in-imx_rproc_addr_init.patch b/queue-7.0/remoteproc-imx_rproc-fix-null-vs-is_err-bug-in-imx_rproc_addr_init.patch
new file mode 100644 (file)
index 0000000..37851e1
--- /dev/null
@@ -0,0 +1,34 @@
+From 665eebebb029690a5b2f92e481020877cc6c8d36 Mon Sep 17 00:00:00 2001
+From: Chen Ni <nichen@iscas.ac.cn>
+Date: Fri, 27 Feb 2026 17:15:46 +0800
+Subject: remoteproc: imx_rproc: Fix NULL vs IS_ERR() bug in imx_rproc_addr_init()
+
+From: Chen Ni <nichen@iscas.ac.cn>
+
+commit 665eebebb029690a5b2f92e481020877cc6c8d36 upstream.
+
+The devm_ioremap_resource_wc() function never returns NULL, it returns
+error pointers.  Update the error checking to match.
+
+Fixes: 67a7bc7f0358 ("remoteproc: Use of_reserved_mem_region_* functions for "memory-region"")
+Signed-off-by: Chen Ni <nichen@iscas.ac.cn>
+Reviewed-by: Peng Fan <peng.fan@nxp.com>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20260227091546.4044246-1-nichen@iscas.ac.cn
+Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/remoteproc/imx_rproc.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/remoteproc/imx_rproc.c
++++ b/drivers/remoteproc/imx_rproc.c
+@@ -812,7 +812,7 @@ static int imx_rproc_addr_init(struct im
+               /* Not use resource version, because we might share region */
+               priv->mem[b].cpu_addr = devm_ioremap_resource_wc(&pdev->dev, &res);
+-              if (!priv->mem[b].cpu_addr) {
++              if (IS_ERR(priv->mem[b].cpu_addr)) {
+                       dev_err(dev, "failed to remap %pr\n", &res);
+                       return -ENOMEM;
+               }
diff --git a/queue-7.0/remoteproc-k3-fix-null-vs-is_err-bug-in-k3_reserved_mem_init.patch b/queue-7.0/remoteproc-k3-fix-null-vs-is_err-bug-in-k3_reserved_mem_init.patch
new file mode 100644 (file)
index 0000000..7408714
--- /dev/null
@@ -0,0 +1,39 @@
+From 5b1f4b5c72cc40e676293b8609cacef7e1545beb Mon Sep 17 00:00:00 2001
+From: Chen Ni <nichen@iscas.ac.cn>
+Date: Fri, 27 Feb 2026 17:21:10 +0800
+Subject: remoteproc: k3: Fix NULL vs IS_ERR() bug in k3_reserved_mem_init()
+
+From: Chen Ni <nichen@iscas.ac.cn>
+
+commit 5b1f4b5c72cc40e676293b8609cacef7e1545beb upstream.
+
+The devm_ioremap_resource_wc() function never returns NULL, it returns
+error pointers.  Update the error checking to match.
+
+Fixes: 67a7bc7f0358 ("remoteproc: Use of_reserved_mem_region_* functions for "memory-region"")
+Signed-off-by: Chen Ni <nichen@iscas.ac.cn>
+Reviewed-by: Peng Fan <peng.fan@nxp.com>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20260227092110.4044313-1-nichen@iscas.ac.cn
+Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/remoteproc/ti_k3_common.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/remoteproc/ti_k3_common.c b/drivers/remoteproc/ti_k3_common.c
+index 32aa954dc5be..3cb8ae5d72f6 100644
+--- a/drivers/remoteproc/ti_k3_common.c
++++ b/drivers/remoteproc/ti_k3_common.c
+@@ -513,7 +513,7 @@ int k3_reserved_mem_init(struct k3_rproc *kproc)
+               kproc->rmem[i].dev_addr = (u32)res.start;
+               kproc->rmem[i].size = resource_size(&res);
+               kproc->rmem[i].cpu_addr = devm_ioremap_resource_wc(dev, &res);
+-              if (!kproc->rmem[i].cpu_addr) {
++              if (IS_ERR(kproc->rmem[i].cpu_addr)) {
+                       dev_err(dev, "failed to map reserved memory#%d at %pR\n",
+                               i + 1, &res);
+                       return -ENOMEM;
+-- 
+2.54.0
+
diff --git a/queue-7.0/sched_ext-idle-recheck-prev_cpu-after-narrowing-allowed-mask.patch b/queue-7.0/sched_ext-idle-recheck-prev_cpu-after-narrowing-allowed-mask.patch
new file mode 100644 (file)
index 0000000..d67d1c3
--- /dev/null
@@ -0,0 +1,61 @@
+From b34c82777a2c0648ee053595f4b290fd5249b093 Mon Sep 17 00:00:00 2001
+From: David Carlier <devnexen@gmail.com>
+Date: Thu, 30 Apr 2026 10:27:47 +0100
+Subject: sched_ext: idle: Recheck prev_cpu after narrowing allowed mask
+
+From: David Carlier <devnexen@gmail.com>
+
+commit b34c82777a2c0648ee053595f4b290fd5249b093 upstream.
+
+scx_select_cpu_dfl() narrows @allowed to @cpus_allowed & @p->cpus_ptr
+when the BPF caller supplies a @cpus_allowed that differs from
+@p->cpus_ptr and @p doesn't have full affinity. However,
+@is_prev_allowed was computed against the original (wider)
+@cpus_allowed, so the prev_cpu fast paths could pick a @prev_cpu that
+is in @cpus_allowed but not in @p->cpus_ptr, violating the intended
+invariant that the returned CPU is always usable by @p. The kernel
+masks this via the SCX_EV_SELECT_CPU_FALLBACK fallback, but the
+behavior contradicts the documented contract.
+
+Move the @is_prev_allowed evaluation past the narrowing block so it
+tests against the final @allowed mask.
+
+Fixes: ee9a4e92799d ("sched_ext: idle: Properly handle invalid prev_cpu during idle selection")
+Cc: stable@vger.kernel.org # v6.16+
+Assisted-by: Claude <noreply@anthropic.com>
+Signed-off-by: David Carlier <devnexen@gmail.com>
+Reviewed-by: Andrea Righi <arighi@nvidia.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/sched/ext_idle.c |   12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+--- a/kernel/sched/ext_idle.c
++++ b/kernel/sched/ext_idle.c
+@@ -460,12 +460,6 @@ s32 scx_select_cpu_dfl(struct task_struc
+       preempt_disable();
+       /*
+-       * Check whether @prev_cpu is still within the allowed set. If not,
+-       * we can still try selecting a nearby CPU.
+-       */
+-      is_prev_allowed = cpumask_test_cpu(prev_cpu, allowed);
+-
+-      /*
+        * Determine the subset of CPUs usable by @p within @cpus_allowed.
+        */
+       if (allowed != p->cpus_ptr) {
+@@ -482,6 +476,12 @@ s32 scx_select_cpu_dfl(struct task_struc
+       }
+       /*
++       * Check whether @prev_cpu is still within the allowed set. If not,
++       * we can still try selecting a nearby CPU.
++       */
++      is_prev_allowed = cpumask_test_cpu(prev_cpu, allowed);
++
++      /*
+        * This is necessary to protect llc_cpus.
+        */
+       rcu_read_lock();
diff --git a/queue-7.0/sched_ext-skip-tasks-with-stale-task_rq-in-bypass_lb_cpu.patch b/queue-7.0/sched_ext-skip-tasks-with-stale-task_rq-in-bypass_lb_cpu.patch
new file mode 100644 (file)
index 0000000..2aa8a05
--- /dev/null
@@ -0,0 +1,45 @@
+From da2d81b4118a74e65d2335e221a38d665902a98c Mon Sep 17 00:00:00 2001
+From: Tejun Heo <tj@kernel.org>
+Date: Fri, 24 Apr 2026 14:31:35 -1000
+Subject: sched_ext: Skip tasks with stale task_rq in bypass_lb_cpu()
+
+From: Tejun Heo <tj@kernel.org>
+
+commit da2d81b4118a74e65d2335e221a38d665902a98c upstream.
+
+bypass_lb_cpu() transfers tasks between per-CPU bypass DSQs without
+migrating them - task_cpu() only updates when the donee later consumes the
+task via move_remote_task_to_local_dsq(). If the LB timer fires again before
+consumption and the new DSQ becomes a donor, @p is still on the previous CPU
+and task_rq(@p) != donor_rq. @p can't be moved without its own rq locked.
+
+Skip such tasks.
+
+Fixes: 95d1df610cdc ("sched_ext: Implement load balancer for bypass mode")
+Cc: stable@vger.kernel.org # v6.19+
+Reported-by: Chris Mason <clm@meta.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Reviewed-by: Andrea Righi <arighi@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/sched/ext.c |    9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+--- a/kernel/sched/ext.c
++++ b/kernel/sched/ext.c
+@@ -4008,6 +4008,15 @@ resume:
+               if (cpumask_empty(donee_mask))
+                       break;
++              /*
++               * If an earlier pass placed @p on @donor_dsq from a different
++               * CPU and the donee hasn't consumed it yet, @p is still on the
++               * previous CPU and task_rq(@p) != @donor_rq. @p can't be moved
++               * without its rq locked. Skip.
++               */
++              if (task_rq(p) != donor_rq)
++                      continue;
++
+               donee = cpumask_any_and_distribute(donee_mask, p->cpus_ptr);
+               if (donee >= nr_cpu_ids)
+                       continue;
diff --git a/queue-7.0/sched_ext-use-dsq-first_task-instead-of-list_empty-in-dispatch_enqueue-fifo-tail.patch b/queue-7.0/sched_ext-use-dsq-first_task-instead-of-list_empty-in-dispatch_enqueue-fifo-tail.patch
new file mode 100644 (file)
index 0000000..0d69571
--- /dev/null
@@ -0,0 +1,51 @@
+From 2f2ea77092660b53bfcbc4acc590b57ce9ab5dce Mon Sep 17 00:00:00 2001
+From: Tejun Heo <tj@kernel.org>
+Date: Fri, 24 Apr 2026 14:31:35 -1000
+Subject: sched_ext: Use dsq->first_task instead of list_empty() in dispatch_enqueue() FIFO-tail
+
+From: Tejun Heo <tj@kernel.org>
+
+commit 2f2ea77092660b53bfcbc4acc590b57ce9ab5dce upstream.
+
+dispatch_enqueue()'s FIFO-tail path used list_empty(&dsq->list) to decide
+whether to set dsq->first_task on enqueue. dsq->list can contain parked BPF
+iterator cursors (SCX_DSQ_LNODE_ITER_CURSOR), so list_empty() is not a
+reliable "no real task" check. If the last real task is unlinked while a
+cursor is parked, first_task becomes NULL; the next FIFO-tail enqueue then
+sees list_empty() == false and skips the first_task update, leaving
+scx_bpf_dsq_peek() returning NULL for a non-empty DSQ.
+
+Test dsq->first_task directly, which already tracks only real tasks and is
+maintained under dsq->lock.
+
+Fixes: 44f5c8ec5b9a ("sched_ext: Add lockless peek operation for DSQs")
+Cc: stable@vger.kernel.org # v6.19+
+Reported-by: Chris Mason <clm@meta.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Reviewed-by: Andrea Righi <arighi@nvidia.com>
+Cc: Ryan Newton <newton@meta.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/sched/ext.c |   10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+--- a/kernel/sched/ext.c
++++ b/kernel/sched/ext.c
+@@ -1093,11 +1093,13 @@ static void dispatch_enqueue(struct scx_
+                       if (!(dsq->id & SCX_DSQ_FLAG_BUILTIN))
+                               rcu_assign_pointer(dsq->first_task, p);
+               } else {
+-                      bool was_empty;
+-
+-                      was_empty = list_empty(&dsq->list);
++                      /*
++                       * dsq->list can contain parked BPF iterator cursors, so
++                       * list_empty() here isn't a reliable proxy for "no real
++                       * task in the DSQ". Test dsq->first_task directly.
++                       */
+                       list_add_tail(&p->scx.dsq_list.node, &dsq->list);
+-                      if (was_empty && !(dsq->id & SCX_DSQ_FLAG_BUILTIN))
++                      if (!dsq->first_task && !(dsq->id & SCX_DSQ_FLAG_BUILTIN))
+                               rcu_assign_pointer(dsq->first_task, p);
+               }
+       }
diff --git a/queue-7.0/selftests-mptcp-check-output-catch-cmd-errors.patch b/queue-7.0/selftests-mptcp-check-output-catch-cmd-errors.patch
new file mode 100644 (file)
index 0000000..20606ed
--- /dev/null
@@ -0,0 +1,111 @@
+From 65db7b27b90e2ea8d4966935aa9a50b6a60c31ac Mon Sep 17 00:00:00 2001
+From: "Matthieu Baerts (NGI0)" <matttbe@kernel.org>
+Date: Tue, 5 May 2026 17:00:58 +0200
+Subject: selftests: mptcp: check output: catch cmd errors
+
+From: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+
+commit 65db7b27b90e2ea8d4966935aa9a50b6a60c31ac upstream.
+
+Using '${?}' inside the if-statement to check the returned value from
+the command that was evaluated as part of the if-statement is not
+correct: here, '${?}' will be linked to the previous instruction, not
+the one that is expected here (${cmd}).
+
+Instead, simply mark the error, except if an error is expected. If
+that's the case, 1 can be passed as the 4th argument of this helper.
+Three checks from pm_netlink.sh expect an error.
+
+While at it, improve the error message when the command unexpectedly
+fails or succeeds.
+
+Note that we could expect a specific returned value, but the checks
+currently expecting an error can be used with 'ip mptcp' or 'pm_nl_ctl',
+and these two tools don't return the same error code.
+
+Fixes: 2d0c1d27ea4e ("selftests: mptcp: add mptcp_lib_check_output helper")
+Cc: stable@vger.kernel.org
+Reviewed-by: Mat Martineau <martineau@kernel.org>
+Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Link: https://patch.msgid.link/20260505-net-mptcp-pm-fixes-7-1-rc3-v1-10-fca8091060a4@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/net/mptcp/mptcp_lib.sh  |   16 ++++++++++------
+ tools/testing/selftests/net/mptcp/pm_netlink.sh |   10 ++++++----
+ 2 files changed, 16 insertions(+), 10 deletions(-)
+
+--- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh
++++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
+@@ -474,20 +474,24 @@ mptcp_lib_wait_local_port_listen() {
+       wait_local_port_listen "${@}" "tcp"
+ }
++# $1: error file, $2: cmd, $3: expected msg, [$4: expected error]
+ mptcp_lib_check_output() {
+       local err="${1}"
+       local cmd="${2}"
+       local expected="${3}"
++      local exp_error="${4:-0}"
+       local cmd_ret=0
+       local out
+-      if ! out=$(${cmd} 2>"${err}"); then
+-              cmd_ret=${?}
+-      fi
++      out=$(${cmd} 2>"${err}") || cmd_ret=1
+-      if [ ${cmd_ret} -ne 0 ]; then
+-              mptcp_lib_pr_fail "command execution '${cmd}' stderr"
+-              cat "${err}"
++      if [ "${cmd_ret}" != "${exp_error}" ]; then
++              mptcp_lib_pr_fail "unexpected returned code for '${cmd}', info:"
++              if [ "${exp_error}" = 0 ]; then
++                      cat "${err}"
++              else
++                      echo "${out}"
++              fi
+               return 2
+       elif [ "${out}" = "${expected}" ]; then
+               return 0
+--- a/tools/testing/selftests/net/mptcp/pm_netlink.sh
++++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh
+@@ -122,10 +122,12 @@ check()
+       local cmd="$1"
+       local expected="$2"
+       local msg="$3"
++      local exp_error="$4"
+       local rc=0
+       mptcp_lib_print_title "$msg"
+-      mptcp_lib_check_output "${err}" "${cmd}" "${expected}" || rc=${?}
++      mptcp_lib_check_output "${err}" "${cmd}" "${expected}" "${exp_error}" ||
++              rc=${?}
+       if [ ${rc} -eq 2 ]; then
+               mptcp_lib_result_fail "${msg} # error ${rc}"
+               ret=${KSFT_FAIL}
+@@ -158,13 +160,13 @@ check "show_endpoints" \
+                           "3,10.0.1.3,signal backup")" "dump addrs"
+ del_endpoint 2
+-check "get_endpoint 2" "" "simple del addr"
++check "get_endpoint 2" "" "simple del addr" 1
+ check "show_endpoints" \
+       "$(format_endpoints "1,10.0.1.1" \
+                           "3,10.0.1.3,signal backup")" "dump addrs after del"
+ add_endpoint 10.0.1.3 2>/dev/null
+-check "get_endpoint 4" "" "duplicate addr"
++check "get_endpoint 4" "" "duplicate addr" 1
+ add_endpoint 10.0.1.4 flags signal
+ check "get_endpoint 4" "$(format_endpoints "4,10.0.1.4,signal")" "id addr increment"
+@@ -173,7 +175,7 @@ for i in $(seq 5 9); do
+       add_endpoint "10.0.1.${i}" flags signal >/dev/null 2>&1
+ done
+ check "get_endpoint 9" "$(format_endpoints "9,10.0.1.9,signal")" "hard addr limit"
+-check "get_endpoint 10" "" "above hard addr limit"
++check "get_endpoint 10" "" "above hard addr limit" 1
+ del_endpoint 9
+ for i in $(seq 10 255); do
diff --git a/queue-7.0/selftests-mptcp-pm-restrict-unknown-check-to-pm_nl_ctl.patch b/queue-7.0/selftests-mptcp-pm-restrict-unknown-check-to-pm_nl_ctl.patch
new file mode 100644 (file)
index 0000000..7e16ca8
--- /dev/null
@@ -0,0 +1,47 @@
+From 53705ddfa18408f8e1f064331b6387509fa19f7f Mon Sep 17 00:00:00 2001
+From: "Matthieu Baerts (NGI0)" <matttbe@kernel.org>
+Date: Tue, 5 May 2026 17:00:59 +0200
+Subject: selftests: mptcp: pm: restrict 'unknown' check to pm_nl_ctl
+
+From: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+
+commit 53705ddfa18408f8e1f064331b6387509fa19f7f upstream.
+
+When pm_netlink.sh is executed with '-i', 'ip mptcp' is used instead of
+'pm_nl_ctl'. IPRoute2 doesn't support the 'unknown' flag, which has only
+been added to 'pm_nl_ctl' for this specific check: to ensure that the
+kernel ignores such unsupported flag.
+
+No reason to add this flag to 'ip mptcp'. Then, this check should be
+skipped when 'ip mptcp' is used.
+
+Fixes: 0cef6fcac24d ("selftests: mptcp: ip_mptcp option for more scripts")
+Cc: stable@vger.kernel.org
+Reviewed-by: Mat Martineau <martineau@kernel.org>
+Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Link: https://patch.msgid.link/20260505-net-mptcp-pm-fixes-7-1-rc3-v1-11-fca8091060a4@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/net/mptcp/pm_netlink.sh |   10 +++++++---
+ 1 file changed, 7 insertions(+), 3 deletions(-)
+
+--- a/tools/testing/selftests/net/mptcp/pm_netlink.sh
++++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh
+@@ -194,9 +194,13 @@ check "show_endpoints" \
+ flush_endpoint
+ check "show_endpoints" "" "flush addrs"
+-add_endpoint 10.0.1.1 flags unknown
+-check "show_endpoints" "$(format_endpoints "1,10.0.1.1")" "ignore unknown flags"
+-flush_endpoint
++# "unknown" flag is only supported by pm_nl_ctl
++if ! mptcp_lib_is_ip_mptcp; then
++      add_endpoint 10.0.1.1 flags unknown
++      check "show_endpoints" "$(format_endpoints "1,10.0.1.1")" \
++            "ignore unknown flags"
++      flush_endpoint
++fi
+ set_limits 9 1 2>/dev/null
+ check "get_limits" "${default_limits}" "rcv addrs above hard limit"
index 4716911b174b707ed5a1f607aa9be65a0f844b54..583bbc129d09253cdbbe182fecec5e623174956a 100644 (file)
@@ -220,3 +220,71 @@ mm-damon-stat-detect-and-use-fresh-enabled-value.patch
 mm-damon-sysfs-schemes-protect-memcg_path-kfree-with-damon_sysfs_lock.patch
 mm-damon-sysfs-schemes-protect-path-kfree-with-damon_sysfs_lock.patch
 pci-update-saved_config_space-upon-resource-assignment.patch
+pci-aer-clear-only-error-bits-in-pcie-device-status.patch
+pci-aer-stop-ruling-out-unbound-devices-as-error-source.patch
+pci-aspm-fix-pci_clear_and_set_config_dword-usage.patch
+power-supply-max17042-avoid-overflow-when-determining-health.patch
+powerpc-xive-fix-kmemleak-caused-by-incorrect-chip_data-lookup.patch
+perf-x86-intel-always-reprogram-acr-events-to-prevent-stale-masks.patch
+perf-x86-intel-disable-pmi-for-self-reloaded-acr-events.patch
+perf-x86-intel-enable-auto-counter-reload-for-dmr.patch
+rdma-ionic-bound-node_desc-sysfs-read-with-.64s.patch
+rdma-ionic-fix-typo-in-format-string.patch
+rdma-mana-fix-error-unwind-in-mana_ib_create_qp_rss.patch
+rdma-mana-fix-mana_destroy_wq_obj-cleanup-in-mana_ib_create_qp_rss.patch
+rdma-mana-remove-user-triggerable-warn_on-in-mana_ib_create_qp_rss.patch
+rdma-mana-validate-rx_hash_key_len.patch
+rdma-mlx4-fix-mis-use-of-rcu-in-mlx4_srq_event.patch
+rdma-mlx4-fix-resource-leak-on-error-in-mlx4_ib_create_srq.patch
+rdma-mlx5-fix-error-path-fall-through-in-mlx5_ib_dev_res_srq_init.patch
+rdma-ocrdma-don-t-null-deref-uctx-on-errors-in-ocrdma_copy_pd_uresp.patch
+rdma-rxe-reject-non-8-byte-atomic_write-payloads.patch
+rdma-rxe-reject-unknown-opcodes-before-icrc-processing.patch
+rdma-vmw_pvrdma-fix-double-free-on-pvrdma_alloc_ucontext-error-path.patch
+remoteproc-imx_rproc-fix-null-vs-is_err-bug-in-imx_rproc_addr_init.patch
+remoteproc-k3-fix-null-vs-is_err-bug-in-k3_reserved_mem_init.patch
+sched_ext-idle-recheck-prev_cpu-after-narrowing-allowed-mask.patch
+sched_ext-skip-tasks-with-stale-task_rq-in-bypass_lb_cpu.patch
+sched_ext-use-dsq-first_task-instead-of-list_empty-in-dispatch_enqueue-fifo-tail.patch
+selftests-mptcp-check-output-catch-cmd-errors.patch
+selftests-mptcp-pm-restrict-unknown-check-to-pm_nl_ctl.patch
+mptcp-fastclose-msk-when-linger-time-is-0.patch
+mptcp-use-mpjoinsynackhmacfailure-for-synack-hmac-failure.patch
+mptcp-use-mptcp_rst_emptcp-for-ack-hmac-validation-failure.patch
+mptcp-sockopt-set-timestamp-flags-on-subflow-socket-not-msk.patch
+mptcp-sockopt-increase-seq-in-mptcp_setsockopt_all_sf.patch
+mptcp-fix-rx-timestamp-corruption-on-fastopen.patch
+mptcp-fix-scheduling-with-atomic-in-timestamp-sockopt.patch
+mptcp-pm-prio-skip-closed-subflows.patch
+mptcp-pm-kernel-reset-fullmesh-counter-after-flush.patch
+mptcp-pm-kernel-correctly-retransmit-add_addr-id-0.patch
+mptcp-pm-add_addr-rtx-allow-id-0.patch
+mptcp-pm-add_addr-rtx-fix-potential-data-race.patch
+mptcp-pm-add_addr-rtx-always-decrease-sk-refcount.patch
+mptcp-pm-add_addr-rtx-free-sk-if-last.patch
+mptcp-pm-add_addr-rtx-resched-blocked-add_addr-quicker.patch
+mptcp-pm-add_addr-rtx-return-early-if-no-retrans.patch
+f2fs-add-read_once-for-i_blocks-in-f2fs_update_inode.patch
+f2fs-fix-false-alarm-of-lockdep-on-cp_global_sem-lock.patch
+f2fs-fix-fiemap-boundary-handling-when-read-extent-cache-is-incomplete.patch
+f2fs-fix-fsck-inconsistency-caused-by-incorrect-nat_entry-flag-usage.patch
+f2fs-fix-incorrect-file-address-mapping-when-inline-inode-is-unwritten.patch
+f2fs-fix-incorrect-multidevice-info-in-trace_f2fs_map_blocks.patch
+f2fs-fix-node_cnt-race-between-extent-node-destroy-and-writeback.patch
+f2fs-fix-uninitialized-kobject-put-in-f2fs_init_sysfs.patch
+f2fs-refactor-f2fs_move_node_folio-function.patch
+f2fs-fix-inline-data-not-being-written-to-disk-in-writeback-path.patch
+f2fs-fix-fsck-inconsistency-caused-by-fggc-of-node-block.patch
+kvm-arm64-wake-up-from-wfi-when-iqrchip-is-in-userspace.patch
+kvm-arm64-vgic-fix-iidr-revision-field-extracted-from-wrong-value.patch
+kvm-arm64-fix-initialisation-order-in-__pkvm_init_finalise.patch
+kvm-arm64-fix-feat_spe_fne-to-use-pmsidr_el1.fne-not-pmsver.patch
+kvm-arm64-fix-feat_debugv8p9-to-check-debugver-not-pmuver.patch
+kvm-arm64-fix-pin-leak-and-publication-ordering-in-__pkvm_init_vcpu.patch
+loongarch-fix-potential-ade-in-loongson_gpu_fixup_dma_hang.patch
+loongarch-kvm-cap-kvm_cap_nr_vcpus-by-kvm_cap_max_vcpus.patch
+loongarch-kvm-fix-unreliable-stack-for-kvm_exc_entry.patch
+loongarch-kvm-fix-hw-timer-interrupt-lost-when-inject-interrupt-by-software.patch
+loongarch-kvm-move-unconditional-delay-into-timer-clear-scenery.patch
+loongarch-kvm-use-kvm_set_pte-in-kvm_flush_pte.patch
+loongarch-use-per-root-bridge-pcih-flag-to-skip-mem-resource-fixup.patch