From 8c7875e89a0127135da30f55ee480569df31086b Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Fri, 12 Sep 2025 12:44:10 -0400 Subject: [PATCH] Fixes for all trees Signed-off-by: Sasha Levin --- ...ix-null-checks-on-result-of-ff_layou.patch | 90 +++++ ...nfs_cap_xattr-flag-if-not-supported-.patch | 35 ++ ...ear-capabilities-that-won-t-be-reset.patch | 35 ++ ...xfiles-fix-layout-merge-mirror-check.patch | 42 +++ ...y-all-sampling-events-by-counter-pmu.patch | 50 +++ queue-5.10/series | 7 + ...msg_free-when-tcp_bpf_send_verdict-f.patch | 98 ++++++ ...ing_marker-may-trigger-page-fault-du.patch | 89 +++++ ...ix-null-checks-on-result-of-ff_layou.patch | 90 +++++ ...nfs_cap_fs_locations-flag-if-it-is-n.patch | 38 ++ ...nfs_cap_xattr-flag-if-not-supported-.patch | 35 ++ ...ear-capabilities-that-won-t-be-reset.patch | 35 ++ ...xfiles-fix-layout-merge-mirror-check.patch | 42 +++ ...y-all-sampling-events-by-counter-pmu.patch | 50 +++ queue-5.15/series | 8 + ...msg_free-when-tcp_bpf_send_verdict-f.patch | 98 ++++++ ...ing_marker-may-trigger-page-fault-du.patch | 89 +++++ ...ear-capabilities-that-won-t-be-reset.patch | 35 ++ ...y-all-sampling-events-by-counter-pmu.patch | 50 +++ queue-5.4/series | 3 + ...msg_free-when-tcp_bpf_send_verdict-f.patch | 98 ++++++ ...ix-null-checks-on-result-of-ff_layou.patch | 90 +++++ ...amples-fix-function-size-computation.patch | 38 ++ ...nfs_cap_fs_locations-flag-if-it-is-n.patch | 38 ++ ...nfs_cap_xattr-flag-if-not-supported-.patch | 35 ++ ...ear-capabilities-that-won-t-be-reset.patch | 35 ++ ...xfiles-fix-layout-merge-mirror-check.patch | 42 +++ ...-fix-type-confusion-in-pde_set_flags.patch | 56 +++ ...y-all-sampling-events-by-counter-pmu.patch | 50 +++ queue-6.1/series | 11 + ...msg_free-when-tcp_bpf_send_verdict-f.patch | 98 ++++++ ...ing_marker-may-trigger-page-fault-du.patch | 89 +++++ ...warning-when-chunk-allocation-fails-.patch | 97 ++++++ ...-fix-built-in-mic-assignment-on-asus.patch | 44 +++ ...ack-to-interpreter-for-programs-with.patch | 110 ++++++ ...ounds-dynptr-write-in-bpf_crypto_cry.patch | 59 ++++ ...o-use-allow_spinning-false-path-in-b.patch | 92 +++++ ...ysical-address-calculation-for-struc.patch | 96 ++++++ ...swapped-dir-flags-arguments-to-trace.patch | 39 +++ ...fix-error-pointers-in-amdgpu_dm_crtc.patch | 81 +++++ ...ack-jpeg-to-video-caps-for-carrizo-a.patch | 43 +++ ...15-pmu-fix-zero-delta-busyness-issue.patch | 92 +++++ ...introduce-linear-search-for-dentries.patch | 103 ++++++ ...ix-null-checks-on-result-of-ff_layou.patch | 90 +++++ ...s-nfs-io-make-nfs_start_io_-killable.patch | 222 ++++++++++++ ...amples-fix-function-size-computation.patch | 38 ++ ...direct-io-enablement-with-sync-and-a.patch | 261 ++++++++++++++ ...ve-extra-indirect-nfs_to-call-to-che.patch | 122 +++++++ ...ore-creds-before-releasing-pageio-da.patch | 65 ++++ ...te_folio-must-observe-the-offset-and.patch | 55 +++ ...-serialise-o_direct-i-o-and-truncate.patch | 100 ++++++ ...cap_open_xor-and-nfs_cap_delegtime-i.patch | 36 ++ ...nfs_cap_fs_locations-flag-if-it-is-n.patch | 38 ++ ...nfs_cap_xattr-flag-if-not-supported-.patch | 35 ++ ...ear-capabilities-that-won-t-be-reset.patch | 35 ++ ...xfiles-fix-layout-merge-mirror-check.patch | 42 +++ ...rialise-o_direct-i-o-and-clone-range.patch | 38 ++ ...erialise-o_direct-i-o-and-copy-range.patch | 34 ++ ...serialise-o_direct-i-o-and-fallocate.patch | 34 ++ ...kip-nvme_write_sq_db-on-empty-rqlist.patch | 37 ++ ...-fix-type-confusion-in-pde_set_flags.patch | 56 +++ ...isplay-optimize-cursor-position-upda.patch | 108 ++++++ ...abuf-fix-vmap_udmabuf-error-page-set.patch | 86 +++++ ...y-all-sampling-events-by-counter-pmu.patch | 50 +++ ...y-all-events-not-handled-by-this-pmu.patch | 60 ++++ queue-6.12/series | 37 ++ ...ll-xs_sock_process_cmsg-for-all-cmsg.patch | 48 +++ ...msg_free-when-tcp_bpf_send_verdict-f.patch | 98 ++++++ .../trace-fgraph-fix-error-handling.patch | 51 +++ ...ing_marker-may-trigger-page-fault-du.patch | 89 +++++ ...warning-when-chunk-allocation-fails-.patch | 97 ++++++ ...ack-to-interpreter-for-programs-with.patch | 110 ++++++ ...le-page_pool-direct-xdp_return-need-.patch | 80 +++++ ...ounds-dynptr-write-in-bpf_crypto_cry.patch | 59 ++++ ...o-use-allow_spinning-false-path-in-b.patch | 92 +++++ ...te-fix-a-regression-leading-to-epp-0.patch | 64 ++++ ...te-fix-setting-of-cppc.min_perf-in-a.patch | 90 +++++ ...ix-null-checks-on-result-of-ff_layou.patch | 90 +++++ ...amples-fix-function-size-computation.patch | 38 ++ ...keep-recovery_cp-in-mdp_superblock_s.patch | 69 ++++ ...ore-creds-before-releasing-pageio-da.patch | 65 ++++ ...te_folio-must-observe-the-offset-and.patch | 55 +++ ...-serialise-o_direct-i-o-and-truncate.patch | 100 ++++++ ...cap_open_xor-and-nfs_cap_delegtime-i.patch | 36 ++ ...nfs_cap_fs_locations-flag-if-it-is-n.patch | 38 ++ ...nfs_cap_xattr-flag-if-not-supported-.patch | 35 ++ ...ear-capabilities-that-won-t-be-reset.patch | 35 ++ ...xfiles-fix-layout-merge-mirror-check.patch | 42 +++ ...rialise-o_direct-i-o-and-clone-range.patch | 38 ++ ...erialise-o_direct-i-o-and-copy-range.patch | 34 ++ ...serialise-o_direct-i-o-and-fallocate.patch | 34 ++ ...-fix-type-confusion-in-pde_set_flags.patch | 56 +++ ...ose-trylock-fallback-for-nmi-waiters.patch | 87 +++++ ...y-all-sampling-events-by-counter-pmu.patch | 50 +++ ...y-all-events-not-handled-by-this-pmu.patch | 60 ++++ queue-6.16/series | 31 ++ ...ll-xs_sock_process_cmsg-for-all-cmsg.patch | 48 +++ ...msg_free-when-tcp_bpf_send_verdict-f.patch | 98 ++++++ .../trace-fgraph-fix-error-handling.patch | 51 +++ ...ing_marker-may-trigger-page-fault-du.patch | 89 +++++ ...fix-null-ptr-deref-in-bitmap_parseli.patch | 63 ++++ ...warning-when-chunk-allocation-fails-.patch | 97 ++++++ ...ix-immature-cq-descriptor-production.patch | 324 ++++++++++++++++++ ...o-use-allow_spinning-false-path-in-b.patch | 92 +++++ ...ix-null-checks-on-result-of-ff_layou.patch | 90 +++++ ...s-nfs-io-make-nfs_start_io_-killable.patch | 222 ++++++++++++ ...amples-fix-function-size-computation.patch | 38 ++ ...-serialise-o_direct-i-o-and-truncate.patch | 100 ++++++ ...nfs_cap_fs_locations-flag-if-it-is-n.patch | 38 ++ ...nfs_cap_xattr-flag-if-not-supported-.patch | 35 ++ ...ear-capabilities-that-won-t-be-reset.patch | 35 ++ ...xfiles-fix-layout-merge-mirror-check.patch | 42 +++ ...rialise-o_direct-i-o-and-clone-range.patch | 38 ++ ...erialise-o_direct-i-o-and-copy-range.patch | 34 ++ ...serialise-o_direct-i-o-and-fallocate.patch | 34 ++ ...-fix-type-confusion-in-pde_set_flags.patch | 56 +++ ...y-all-sampling-events-by-counter-pmu.patch | 50 +++ queue-6.6/series | 18 + ...ll-xs_sock_process_cmsg-for-all-cmsg.patch | 48 +++ ...msg_free-when-tcp_bpf_send_verdict-f.patch | 98 ++++++ ...ing_marker-may-trigger-page-fault-du.patch | 89 +++++ ...warning-when-chunk-allocation-fails-.patch | 97 ++++++ 122 files changed, 8129 insertions(+) create mode 100644 queue-5.10/flexfiles-pnfs-fix-null-checks-on-result-of-ff_layou.patch create mode 100644 queue-5.10/nfsv4-clear-the-nfs_cap_xattr-flag-if-not-supported-.patch create mode 100644 queue-5.10/nfsv4-don-t-clear-capabilities-that-won-t-be-reset.patch create mode 100644 queue-5.10/nfsv4-flexfiles-fix-layout-merge-mirror-check.patch create mode 100644 queue-5.10/s390-cpum_cf-deny-all-sampling-events-by-counter-pmu.patch create mode 100644 queue-5.10/tcp_bpf-call-sk_msg_free-when-tcp_bpf_send_verdict-f.patch create mode 100644 queue-5.10/tracing-fix-tracing_marker-may-trigger-page-fault-du.patch create mode 100644 queue-5.15/flexfiles-pnfs-fix-null-checks-on-result-of-ff_layou.patch create mode 100644 queue-5.15/nfsv4-clear-the-nfs_cap_fs_locations-flag-if-it-is-n.patch create mode 100644 queue-5.15/nfsv4-clear-the-nfs_cap_xattr-flag-if-not-supported-.patch create mode 100644 queue-5.15/nfsv4-don-t-clear-capabilities-that-won-t-be-reset.patch create mode 100644 queue-5.15/nfsv4-flexfiles-fix-layout-merge-mirror-check.patch create mode 100644 queue-5.15/s390-cpum_cf-deny-all-sampling-events-by-counter-pmu.patch create mode 100644 queue-5.15/tcp_bpf-call-sk_msg_free-when-tcp_bpf_send_verdict-f.patch create mode 100644 queue-5.15/tracing-fix-tracing_marker-may-trigger-page-fault-du.patch create mode 100644 queue-5.4/nfsv4-don-t-clear-capabilities-that-won-t-be-reset.patch create mode 100644 queue-5.4/s390-cpum_cf-deny-all-sampling-events-by-counter-pmu.patch create mode 100644 queue-5.4/tcp_bpf-call-sk_msg_free-when-tcp_bpf_send_verdict-f.patch create mode 100644 queue-6.1/flexfiles-pnfs-fix-null-checks-on-result-of-ff_layou.patch create mode 100644 queue-6.1/ftrace-samples-fix-function-size-computation.patch create mode 100644 queue-6.1/nfsv4-clear-the-nfs_cap_fs_locations-flag-if-it-is-n.patch create mode 100644 queue-6.1/nfsv4-clear-the-nfs_cap_xattr-flag-if-not-supported-.patch create mode 100644 queue-6.1/nfsv4-don-t-clear-capabilities-that-won-t-be-reset.patch create mode 100644 queue-6.1/nfsv4-flexfiles-fix-layout-merge-mirror-check.patch create mode 100644 queue-6.1/proc-fix-type-confusion-in-pde_set_flags.patch create mode 100644 queue-6.1/s390-cpum_cf-deny-all-sampling-events-by-counter-pmu.patch create mode 100644 queue-6.1/tcp_bpf-call-sk_msg_free-when-tcp_bpf_send_verdict-f.patch create mode 100644 queue-6.1/tracing-fix-tracing_marker-may-trigger-page-fault-du.patch create mode 100644 queue-6.1/tracing-silence-warning-when-chunk-allocation-fails-.patch create mode 100644 queue-6.12/alsa-hda-realtek-fix-built-in-mic-assignment-on-asus.patch create mode 100644 queue-6.12/bpf-allow-fall-back-to-interpreter-for-programs-with.patch create mode 100644 queue-6.12/bpf-fix-out-of-bounds-dynptr-write-in-bpf_crypto_cry.patch create mode 100644 queue-6.12/bpf-tell-memcg-to-use-allow_spinning-false-path-in-b.patch create mode 100644 queue-6.12/dma-debug-fix-physical-address-calculation-for-struc.patch create mode 100644 queue-6.12/dma-mapping-fix-swapped-dir-flags-arguments-to-trace.patch create mode 100644 queue-6.12/drm-amd-display-fix-error-pointers-in-amdgpu_dm_crtc.patch create mode 100644 queue-6.12/drm-amdgpu-add-back-jpeg-to-video-caps-for-carrizo-a.patch create mode 100644 queue-6.12/drm-i915-pmu-fix-zero-delta-busyness-issue.patch create mode 100644 queue-6.12/ext4-introduce-linear-search-for-dentries.patch create mode 100644 queue-6.12/flexfiles-pnfs-fix-null-checks-on-result-of-ff_layou.patch create mode 100644 queue-6.12/fs-nfs-io-make-nfs_start_io_-killable.patch create mode 100644 queue-6.12/ftrace-samples-fix-function-size-computation.patch create mode 100644 queue-6.12/nfs-localio-add-direct-io-enablement-with-sync-and-a.patch create mode 100644 queue-6.12/nfs-localio-remove-extra-indirect-nfs_to-call-to-che.patch create mode 100644 queue-6.12/nfs-localio-restore-creds-before-releasing-pageio-da.patch create mode 100644 queue-6.12/nfs-nfs_invalidate_folio-must-observe-the-offset-and.patch create mode 100644 queue-6.12/nfs-serialise-o_direct-i-o-and-truncate.patch create mode 100644 queue-6.12/nfsv4-clear-nfs_cap_open_xor-and-nfs_cap_delegtime-i.patch create mode 100644 queue-6.12/nfsv4-clear-the-nfs_cap_fs_locations-flag-if-it-is-n.patch create mode 100644 queue-6.12/nfsv4-clear-the-nfs_cap_xattr-flag-if-not-supported-.patch create mode 100644 queue-6.12/nfsv4-don-t-clear-capabilities-that-won-t-be-reset.patch create mode 100644 queue-6.12/nfsv4-flexfiles-fix-layout-merge-mirror-check.patch create mode 100644 queue-6.12/nfsv4.2-serialise-o_direct-i-o-and-clone-range.patch create mode 100644 queue-6.12/nfsv4.2-serialise-o_direct-i-o-and-copy-range.patch create mode 100644 queue-6.12/nfsv4.2-serialise-o_direct-i-o-and-fallocate.patch create mode 100644 queue-6.12/nvme-pci-skip-nvme_write_sq_db-on-empty-rqlist.patch create mode 100644 queue-6.12/proc-fix-type-confusion-in-pde_set_flags.patch create mode 100644 queue-6.12/revert-drm-amd-display-optimize-cursor-position-upda.patch create mode 100644 queue-6.12/revert-udmabuf-fix-vmap_udmabuf-error-page-set.patch create mode 100644 queue-6.12/s390-cpum_cf-deny-all-sampling-events-by-counter-pmu.patch create mode 100644 queue-6.12/s390-pai-deny-all-events-not-handled-by-this-pmu.patch create mode 100644 queue-6.12/sunrpc-call-xs_sock_process_cmsg-for-all-cmsg.patch create mode 100644 queue-6.12/tcp_bpf-call-sk_msg_free-when-tcp_bpf_send_verdict-f.patch create mode 100644 queue-6.12/trace-fgraph-fix-error-handling.patch create mode 100644 queue-6.12/tracing-fix-tracing_marker-may-trigger-page-fault-du.patch create mode 100644 queue-6.12/tracing-silence-warning-when-chunk-allocation-fails-.patch create mode 100644 queue-6.16/bpf-allow-fall-back-to-interpreter-for-programs-with.patch create mode 100644 queue-6.16/bpf-cpumap-disable-page_pool-direct-xdp_return-need-.patch create mode 100644 queue-6.16/bpf-fix-out-of-bounds-dynptr-write-in-bpf_crypto_cry.patch create mode 100644 queue-6.16/bpf-tell-memcg-to-use-allow_spinning-false-path-in-b.patch create mode 100644 queue-6.16/cpufreq-amd-pstate-fix-a-regression-leading-to-epp-0.patch create mode 100644 queue-6.16/cpufreq-amd-pstate-fix-setting-of-cppc.min_perf-in-a.patch create mode 100644 queue-6.16/flexfiles-pnfs-fix-null-checks-on-result-of-ff_layou.patch create mode 100644 queue-6.16/ftrace-samples-fix-function-size-computation.patch create mode 100644 queue-6.16/md-keep-recovery_cp-in-mdp_superblock_s.patch create mode 100644 queue-6.16/nfs-localio-restore-creds-before-releasing-pageio-da.patch create mode 100644 queue-6.16/nfs-nfs_invalidate_folio-must-observe-the-offset-and.patch create mode 100644 queue-6.16/nfs-serialise-o_direct-i-o-and-truncate.patch create mode 100644 queue-6.16/nfsv4-clear-nfs_cap_open_xor-and-nfs_cap_delegtime-i.patch create mode 100644 queue-6.16/nfsv4-clear-the-nfs_cap_fs_locations-flag-if-it-is-n.patch create mode 100644 queue-6.16/nfsv4-clear-the-nfs_cap_xattr-flag-if-not-supported-.patch create mode 100644 queue-6.16/nfsv4-don-t-clear-capabilities-that-won-t-be-reset.patch create mode 100644 queue-6.16/nfsv4-flexfiles-fix-layout-merge-mirror-check.patch create mode 100644 queue-6.16/nfsv4.2-serialise-o_direct-i-o-and-clone-range.patch create mode 100644 queue-6.16/nfsv4.2-serialise-o_direct-i-o-and-copy-range.patch create mode 100644 queue-6.16/nfsv4.2-serialise-o_direct-i-o-and-fallocate.patch create mode 100644 queue-6.16/proc-fix-type-confusion-in-pde_set_flags.patch create mode 100644 queue-6.16/rqspinlock-choose-trylock-fallback-for-nmi-waiters.patch create mode 100644 queue-6.16/s390-cpum_cf-deny-all-sampling-events-by-counter-pmu.patch create mode 100644 queue-6.16/s390-pai-deny-all-events-not-handled-by-this-pmu.patch create mode 100644 queue-6.16/sunrpc-call-xs_sock_process_cmsg-for-all-cmsg.patch create mode 100644 queue-6.16/tcp_bpf-call-sk_msg_free-when-tcp_bpf_send_verdict-f.patch create mode 100644 queue-6.16/trace-fgraph-fix-error-handling.patch create mode 100644 queue-6.16/tracing-fix-tracing_marker-may-trigger-page-fault-du.patch create mode 100644 queue-6.16/tracing-osnoise-fix-null-ptr-deref-in-bitmap_parseli.patch create mode 100644 queue-6.16/tracing-silence-warning-when-chunk-allocation-fails-.patch create mode 100644 queue-6.16/xsk-fix-immature-cq-descriptor-production.patch create mode 100644 queue-6.6/bpf-tell-memcg-to-use-allow_spinning-false-path-in-b.patch create mode 100644 queue-6.6/flexfiles-pnfs-fix-null-checks-on-result-of-ff_layou.patch create mode 100644 queue-6.6/fs-nfs-io-make-nfs_start_io_-killable.patch create mode 100644 queue-6.6/ftrace-samples-fix-function-size-computation.patch create mode 100644 queue-6.6/nfs-serialise-o_direct-i-o-and-truncate.patch create mode 100644 queue-6.6/nfsv4-clear-the-nfs_cap_fs_locations-flag-if-it-is-n.patch create mode 100644 queue-6.6/nfsv4-clear-the-nfs_cap_xattr-flag-if-not-supported-.patch create mode 100644 queue-6.6/nfsv4-don-t-clear-capabilities-that-won-t-be-reset.patch create mode 100644 queue-6.6/nfsv4-flexfiles-fix-layout-merge-mirror-check.patch create mode 100644 queue-6.6/nfsv4.2-serialise-o_direct-i-o-and-clone-range.patch create mode 100644 queue-6.6/nfsv4.2-serialise-o_direct-i-o-and-copy-range.patch create mode 100644 queue-6.6/nfsv4.2-serialise-o_direct-i-o-and-fallocate.patch create mode 100644 queue-6.6/proc-fix-type-confusion-in-pde_set_flags.patch create mode 100644 queue-6.6/s390-cpum_cf-deny-all-sampling-events-by-counter-pmu.patch create mode 100644 queue-6.6/sunrpc-call-xs_sock_process_cmsg-for-all-cmsg.patch create mode 100644 queue-6.6/tcp_bpf-call-sk_msg_free-when-tcp_bpf_send_verdict-f.patch create mode 100644 queue-6.6/tracing-fix-tracing_marker-may-trigger-page-fault-du.patch create mode 100644 queue-6.6/tracing-silence-warning-when-chunk-allocation-fails-.patch diff --git a/queue-5.10/flexfiles-pnfs-fix-null-checks-on-result-of-ff_layou.patch b/queue-5.10/flexfiles-pnfs-fix-null-checks-on-result-of-ff_layou.patch new file mode 100644 index 0000000000..de93e63e04 --- /dev/null +++ b/queue-5.10/flexfiles-pnfs-fix-null-checks-on-result-of-ff_layou.patch @@ -0,0 +1,90 @@ +From c3a3fd144159d29298aabc440b89fe2cdce7812f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 28 Aug 2025 16:51:00 +0200 +Subject: flexfiles/pNFS: fix NULL checks on result of + ff_layout_choose_ds_for_read + +From: Tigran Mkrtchyan + +[ Upstream commit 5a46d2339a5ae268ede53a221f20433d8ea4f2f9 ] + +Recent commit f06bedfa62d5 ("pNFS/flexfiles: don't attempt pnfs on fatal DS +errors") has changed the error return type of ff_layout_choose_ds_for_read() from +NULL to an error pointer. However, not all code paths have been updated +to match the change. Thus, some non-NULL checks will accept error pointers +as a valid return value. + +Reported-by: Dan Carpenter +Suggested-by: Dan Carpenter +Fixes: f06bedfa62d5 ("pNFS/flexfiles: don't attempt pnfs on fatal DS errors") +Signed-off-by: Tigran Mkrtchyan +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + fs/nfs/flexfilelayout/flexfilelayout.c | 19 ++++++++++++------- + 1 file changed, 12 insertions(+), 7 deletions(-) + +diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c +index 57150b27c0fd2..43aefff2e6b06 100644 +--- a/fs/nfs/flexfilelayout/flexfilelayout.c ++++ b/fs/nfs/flexfilelayout/flexfilelayout.c +@@ -750,8 +750,11 @@ ff_layout_choose_ds_for_read(struct pnfs_layout_segment *lseg, + continue; + + if (check_device && +- nfs4_test_deviceid_unavailable(&mirror->mirror_ds->id_node)) ++ nfs4_test_deviceid_unavailable(&mirror->mirror_ds->id_node)) { ++ // reinitialize the error state in case if this is the last iteration ++ ds = ERR_PTR(-EINVAL); + continue; ++ } + + *best_idx = idx; + break; +@@ -781,7 +784,7 @@ ff_layout_choose_best_ds_for_read(struct pnfs_layout_segment *lseg, + struct nfs4_pnfs_ds *ds; + + ds = ff_layout_choose_valid_ds_for_read(lseg, start_idx, best_idx); +- if (ds) ++ if (!IS_ERR(ds)) + return ds; + return ff_layout_choose_any_ds_for_read(lseg, start_idx, best_idx); + } +@@ -795,7 +798,7 @@ ff_layout_get_ds_for_read(struct nfs_pageio_descriptor *pgio, + + ds = ff_layout_choose_best_ds_for_read(lseg, pgio->pg_mirror_idx, + best_idx); +- if (ds || !pgio->pg_mirror_idx) ++ if (!IS_ERR(ds) || !pgio->pg_mirror_idx) + return ds; + return ff_layout_choose_best_ds_for_read(lseg, 0, best_idx); + } +@@ -856,7 +859,7 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio, + req->wb_nio = 0; + + ds = ff_layout_get_ds_for_read(pgio, &ds_idx); +- if (!ds) { ++ if (IS_ERR(ds)) { + if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg)) + goto out_mds; + pnfs_generic_pg_cleanup(pgio); +@@ -1066,11 +1069,13 @@ static void ff_layout_resend_pnfs_read(struct nfs_pgio_header *hdr) + { + u32 idx = hdr->pgio_mirror_idx + 1; + u32 new_idx = 0; ++ struct nfs4_pnfs_ds *ds; + +- if (ff_layout_choose_any_ds_for_read(hdr->lseg, idx, &new_idx)) +- ff_layout_send_layouterror(hdr->lseg); +- else ++ ds = ff_layout_choose_any_ds_for_read(hdr->lseg, idx, &new_idx); ++ if (IS_ERR(ds)) + pnfs_error_mark_layout_for_return(hdr->inode, hdr->lseg); ++ else ++ ff_layout_send_layouterror(hdr->lseg); + pnfs_read_resend_pnfs(hdr, new_idx); + } + +-- +2.51.0 + diff --git a/queue-5.10/nfsv4-clear-the-nfs_cap_xattr-flag-if-not-supported-.patch b/queue-5.10/nfsv4-clear-the-nfs_cap_xattr-flag-if-not-supported-.patch new file mode 100644 index 0000000000..db1ee5fd7d --- /dev/null +++ b/queue-5.10/nfsv4-clear-the-nfs_cap_xattr-flag-if-not-supported-.patch @@ -0,0 +1,35 @@ +From 68b2cb55670f8bccfb378d40c17030d664d02362 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 29 Aug 2025 09:15:12 -0700 +Subject: NFSv4: Clear the NFS_CAP_XATTR flag if not supported by the server + +From: Trond Myklebust + +[ Upstream commit 4fb2b677fc1f70ee642c0beecc3cabf226ef5707 ] + +nfs_server_set_fsinfo() shouldn't assume that NFS_CAP_XATTR is unset +on entry to the function. + +Fixes: b78ef845c35d ("NFSv4.2: query the server for extended attribute support") +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + fs/nfs/client.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/fs/nfs/client.c b/fs/nfs/client.c +index ac2fbbba1521a..6134101184fae 100644 +--- a/fs/nfs/client.c ++++ b/fs/nfs/client.c +@@ -850,6 +850,8 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, + + if (fsinfo->xattr_support) + server->caps |= NFS_CAP_XATTR; ++ else ++ server->caps &= ~NFS_CAP_XATTR; + #endif + } + +-- +2.51.0 + diff --git a/queue-5.10/nfsv4-don-t-clear-capabilities-that-won-t-be-reset.patch b/queue-5.10/nfsv4-don-t-clear-capabilities-that-won-t-be-reset.patch new file mode 100644 index 0000000000..bfca59694c --- /dev/null +++ b/queue-5.10/nfsv4-don-t-clear-capabilities-that-won-t-be-reset.patch @@ -0,0 +1,35 @@ +From 0427ccf5c8399285b054c3be88b4464eec6b4882 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 29 Aug 2025 09:02:16 -0700 +Subject: NFSv4: Don't clear capabilities that won't be reset + +From: Trond Myklebust + +[ Upstream commit 31f1a960ad1a14def94fa0b8c25d62b4c032813f ] + +Don't clear the capabilities that are not going to get reset by the call +to _nfs4_server_capabilities(). + +Reported-by: Scott Haiden +Fixes: b01f21cacde9 ("NFS: Fix the setting of capabilities when automounting a new filesystem") +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + fs/nfs/nfs4proc.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c +index 973b708ff3328..c094413c17541 100644 +--- a/fs/nfs/nfs4proc.c ++++ b/fs/nfs/nfs4proc.c +@@ -3935,7 +3935,6 @@ int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) + }; + int err; + +- nfs_server_set_init_caps(server); + do { + err = nfs4_handle_exception(server, + _nfs4_server_capabilities(server, fhandle), +-- +2.51.0 + diff --git a/queue-5.10/nfsv4-flexfiles-fix-layout-merge-mirror-check.patch b/queue-5.10/nfsv4-flexfiles-fix-layout-merge-mirror-check.patch new file mode 100644 index 0000000000..c6a1b38e10 --- /dev/null +++ b/queue-5.10/nfsv4-flexfiles-fix-layout-merge-mirror-check.patch @@ -0,0 +1,42 @@ +From 8fef58d4178ba1a8a66369eba2a19e89b5a59235 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 8 Sep 2025 17:35:16 +0000 +Subject: NFSv4/flexfiles: Fix layout merge mirror check. + +From: Jonathan Curley + +[ Upstream commit dd2fa82473453661d12723c46c9f43d9876a7efd ] + +Typo in ff_lseg_match_mirrors makes the diff ineffective. This results +in merge happening all the time. Merge happening all the time is +problematic because it marks lsegs invalid. Marking lsegs invalid +causes all outstanding IO to get restarted with EAGAIN and connections +to get closed. + +Closing connections constantly triggers race conditions in the RDMA +implementation... + +Fixes: 660d1eb22301c ("pNFS/flexfile: Don't merge layout segments if the mirrors don't match") +Signed-off-by: Jonathan Curley +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + fs/nfs/flexfilelayout/flexfilelayout.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c +index 43aefff2e6b06..ee103cde19cdf 100644 +--- a/fs/nfs/flexfilelayout/flexfilelayout.c ++++ b/fs/nfs/flexfilelayout/flexfilelayout.c +@@ -270,7 +270,7 @@ ff_lseg_match_mirrors(struct pnfs_layout_segment *l1, + struct pnfs_layout_segment *l2) + { + const struct nfs4_ff_layout_segment *fl1 = FF_LAYOUT_LSEG(l1); +- const struct nfs4_ff_layout_segment *fl2 = FF_LAYOUT_LSEG(l1); ++ const struct nfs4_ff_layout_segment *fl2 = FF_LAYOUT_LSEG(l2); + u32 i; + + if (fl1->mirror_array_cnt != fl2->mirror_array_cnt) +-- +2.51.0 + diff --git a/queue-5.10/s390-cpum_cf-deny-all-sampling-events-by-counter-pmu.patch b/queue-5.10/s390-cpum_cf-deny-all-sampling-events-by-counter-pmu.patch new file mode 100644 index 0000000000..9a59da61af --- /dev/null +++ b/queue-5.10/s390-cpum_cf-deny-all-sampling-events-by-counter-pmu.patch @@ -0,0 +1,50 @@ +From 511565af7a431f82d5a72ba39bb4c4b141f30464 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 22 Aug 2025 14:05:57 +0200 +Subject: s390/cpum_cf: Deny all sampling events by counter PMU + +From: Thomas Richter + +[ Upstream commit ce971233242b5391d99442271f3ca096fb49818d ] + +Deny all sampling event by the CPUMF counter facility device driver +and return -ENOENT. This return value is used to try other PMUs. +Up to now events for type PERF_TYPE_HARDWARE were not tested for +sampling and returned later on -EOPNOTSUPP. This ends the search +for alternative PMUs. Change that behavior and try other PMUs +instead. + +Fixes: 613a41b0d16e ("s390/cpum_cf: Reject request for sampling in event initialization") +Acked-by: Sumanth Korikkar +Signed-off-by: Thomas Richter +Signed-off-by: Alexander Gordeev +Signed-off-by: Sasha Levin +--- + arch/s390/kernel/perf_cpum_cf.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c +index dddb32e53db8b..c772bee528c07 100644 +--- a/arch/s390/kernel/perf_cpum_cf.c ++++ b/arch/s390/kernel/perf_cpum_cf.c +@@ -218,8 +218,6 @@ static int __hw_perf_event_init(struct perf_event *event, unsigned int type) + break; + + case PERF_TYPE_HARDWARE: +- if (is_sampling_event(event)) /* No sampling support */ +- return -ENOENT; + ev = attr->config; + /* Count user space (problem-state) only */ + if (!attr->exclude_user && attr->exclude_kernel) { +@@ -317,6 +315,8 @@ static int cpumf_pmu_event_init(struct perf_event *event) + unsigned int type = event->attr.type; + int err; + ++ if (is_sampling_event(event)) /* No sampling support */ ++ return err; + if (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_RAW) + err = __hw_perf_event_init(event, type); + else if (event->pmu->type == type) +-- +2.51.0 + diff --git a/queue-5.10/series b/queue-5.10/series index 87f3bd7f5d..50bb03257e 100644 --- a/queue-5.10/series +++ b/queue-5.10/series @@ -3,3 +3,10 @@ media-mtk-vcodec-venc-avoid-wenum-compare-conditional-warning.patch media-i2c-imx214-fix-link-frequency-validation.patch net-fix-null-ptr-deref-by-sock_lock_init_class_and_name-and-rmmod.patch mtd-add-check-for-devm_kcalloc.patch +flexfiles-pnfs-fix-null-checks-on-result-of-ff_layou.patch +nfsv4-don-t-clear-capabilities-that-won-t-be-reset.patch +nfsv4-clear-the-nfs_cap_xattr-flag-if-not-supported-.patch +tracing-fix-tracing_marker-may-trigger-page-fault-du.patch +nfsv4-flexfiles-fix-layout-merge-mirror-check.patch +s390-cpum_cf-deny-all-sampling-events-by-counter-pmu.patch +tcp_bpf-call-sk_msg_free-when-tcp_bpf_send_verdict-f.patch diff --git a/queue-5.10/tcp_bpf-call-sk_msg_free-when-tcp_bpf_send_verdict-f.patch b/queue-5.10/tcp_bpf-call-sk_msg_free-when-tcp_bpf_send_verdict-f.patch new file mode 100644 index 0000000000..9295f504b0 --- /dev/null +++ b/queue-5.10/tcp_bpf-call-sk_msg_free-when-tcp_bpf_send_verdict-f.patch @@ -0,0 +1,98 @@ +From 39da6d17b8e3a44712e32f2e1c3694bdaf6944f1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 9 Sep 2025 23:26:12 +0000 +Subject: tcp_bpf: Call sk_msg_free() when tcp_bpf_send_verdict() fails to + allocate psock->cork. + +From: Kuniyuki Iwashima + +[ Upstream commit a3967baad4d533dc254c31e0d221e51c8d223d58 ] + +syzbot reported the splat below. [0] + +The repro does the following: + + 1. Load a sk_msg prog that calls bpf_msg_cork_bytes(msg, cork_bytes) + 2. Attach the prog to a SOCKMAP + 3. Add a socket to the SOCKMAP + 4. Activate fault injection + 5. Send data less than cork_bytes + +At 5., the data is carried over to the next sendmsg() as it is +smaller than the cork_bytes specified by bpf_msg_cork_bytes(). + +Then, tcp_bpf_send_verdict() tries to allocate psock->cork to hold +the data, but this fails silently due to fault injection + __GFP_NOWARN. + +If the allocation fails, we need to revert the sk->sk_forward_alloc +change done by sk_msg_alloc(). + +Let's call sk_msg_free() when tcp_bpf_send_verdict fails to allocate +psock->cork. + +The "*copied" also needs to be updated such that a proper error can +be returned to the caller, sendmsg. It fails to allocate psock->cork. +Nothing has been corked so far, so this patch simply sets "*copied" +to 0. + +[0]: +WARNING: net/ipv4/af_inet.c:156 at inet_sock_destruct+0x623/0x730 net/ipv4/af_inet.c:156, CPU#1: syz-executor/5983 +Modules linked in: +CPU: 1 UID: 0 PID: 5983 Comm: syz-executor Not tainted syzkaller #0 PREEMPT(full) +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/12/2025 +RIP: 0010:inet_sock_destruct+0x623/0x730 net/ipv4/af_inet.c:156 +Code: 0f 0b 90 e9 62 fe ff ff e8 7a db b5 f7 90 0f 0b 90 e9 95 fe ff ff e8 6c db b5 f7 90 0f 0b 90 e9 bb fe ff ff e8 5e db b5 f7 90 <0f> 0b 90 e9 e1 fe ff ff 89 f9 80 e1 07 80 c1 03 38 c1 0f 8c 9f fc +RSP: 0018:ffffc90000a08b48 EFLAGS: 00010246 +RAX: ffffffff8a09d0b2 RBX: dffffc0000000000 RCX: ffff888024a23c80 +RDX: 0000000000000100 RSI: 0000000000000fff RDI: 0000000000000000 +RBP: 0000000000000fff R08: ffff88807e07c627 R09: 1ffff1100fc0f8c4 +R10: dffffc0000000000 R11: ffffed100fc0f8c5 R12: ffff88807e07c380 +R13: dffffc0000000000 R14: ffff88807e07c60c R15: 1ffff1100fc0f872 +FS: 00005555604c4500(0000) GS:ffff888125af1000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 00005555604df5c8 CR3: 0000000032b06000 CR4: 00000000003526f0 +Call Trace: + + __sk_destruct+0x86/0x660 net/core/sock.c:2339 + rcu_do_batch kernel/rcu/tree.c:2605 [inline] + rcu_core+0xca8/0x1770 kernel/rcu/tree.c:2861 + handle_softirqs+0x286/0x870 kernel/softirq.c:579 + __do_softirq kernel/softirq.c:613 [inline] + invoke_softirq kernel/softirq.c:453 [inline] + __irq_exit_rcu+0xca/0x1f0 kernel/softirq.c:680 + irq_exit_rcu+0x9/0x30 kernel/softirq.c:696 + instr_sysvec_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1052 [inline] + sysvec_apic_timer_interrupt+0xa6/0xc0 arch/x86/kernel/apic/apic.c:1052 + + +Fixes: 4f738adba30a ("bpf: create tcp_bpf_ulp allowing BPF to monitor socket TX/RX data") +Reported-by: syzbot+4cabd1d2fa917a456db8@syzkaller.appspotmail.com +Closes: https://lore.kernel.org/netdev/68c0b6b5.050a0220.3c6139.0013.GAE@google.com/ +Signed-off-by: Kuniyuki Iwashima +Signed-off-by: Martin KaFai Lau +Link: https://patch.msgid.link/20250909232623.4151337-1-kuniyu@google.com +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp_bpf.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c +index f97e357e2644d..bcd5fc484f777 100644 +--- a/net/ipv4/tcp_bpf.c ++++ b/net/ipv4/tcp_bpf.c +@@ -341,8 +341,11 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock, + if (!psock->cork) { + psock->cork = kzalloc(sizeof(*psock->cork), + GFP_ATOMIC | __GFP_NOWARN); +- if (!psock->cork) ++ if (!psock->cork) { ++ sk_msg_free(sk, msg); ++ *copied = 0; + return -ENOMEM; ++ } + } + memcpy(psock->cork, msg, sizeof(*msg)); + return 0; +-- +2.51.0 + diff --git a/queue-5.10/tracing-fix-tracing_marker-may-trigger-page-fault-du.patch b/queue-5.10/tracing-fix-tracing_marker-may-trigger-page-fault-du.patch new file mode 100644 index 0000000000..0d760f9fcd --- /dev/null +++ b/queue-5.10/tracing-fix-tracing_marker-may-trigger-page-fault-du.patch @@ -0,0 +1,89 @@ +From d58f7edb56eb894ef2e84f38def12994f25969f4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 19 Aug 2025 10:51:52 +0000 +Subject: tracing: Fix tracing_marker may trigger page fault during + preempt_disable + +From: Luo Gengkun + +[ Upstream commit 3d62ab32df065e4a7797204a918f6489ddb8a237 ] + +Both tracing_mark_write and tracing_mark_raw_write call +__copy_from_user_inatomic during preempt_disable. But in some case, +__copy_from_user_inatomic may trigger page fault, and will call schedule() +subtly. And if a task is migrated to other cpu, the following warning will +be trigger: + if (RB_WARN_ON(cpu_buffer, + !local_read(&cpu_buffer->committing))) + +An example can illustrate this issue: + +process flow CPU +--------------------------------------------------------------------- + +tracing_mark_raw_write(): cpu:0 + ... + ring_buffer_lock_reserve(): cpu:0 + ... + cpu = raw_smp_processor_id() cpu:0 + cpu_buffer = buffer->buffers[cpu] cpu:0 + ... + ... + __copy_from_user_inatomic(): cpu:0 + ... + # page fault + do_mem_abort(): cpu:0 + ... + # Call schedule + schedule() cpu:0 + ... + # the task schedule to cpu1 + __buffer_unlock_commit(): cpu:1 + ... + ring_buffer_unlock_commit(): cpu:1 + ... + cpu = raw_smp_processor_id() cpu:1 + cpu_buffer = buffer->buffers[cpu] cpu:1 + +As shown above, the process will acquire cpuid twice and the return values +are not the same. + +To fix this problem using copy_from_user_nofault instead of +__copy_from_user_inatomic, as the former performs 'access_ok' before +copying. + +Link: https://lore.kernel.org/20250819105152.2766363-1-luogengkun@huaweicloud.com +Fixes: 656c7f0d2d2b ("tracing: Replace kmap with copy_from_user() in trace_marker writing") +Signed-off-by: Luo Gengkun +Reviewed-by: Masami Hiramatsu (Google) +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Sasha Levin +--- + kernel/trace/trace.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c +index d08320c47a150..8f4d6c974372b 100644 +--- a/kernel/trace/trace.c ++++ b/kernel/trace/trace.c +@@ -6891,7 +6891,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, + entry = ring_buffer_event_data(event); + entry->ip = _THIS_IP_; + +- len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt); ++ len = copy_from_user_nofault(&entry->buf, ubuf, cnt); + if (len) { + memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE); + cnt = FAULTED_SIZE; +@@ -6971,7 +6971,7 @@ tracing_mark_raw_write(struct file *filp, const char __user *ubuf, + + entry = ring_buffer_event_data(event); + +- len = __copy_from_user_inatomic(&entry->id, ubuf, cnt); ++ len = copy_from_user_nofault(&entry->id, ubuf, cnt); + if (len) { + entry->id = -1; + memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE); +-- +2.51.0 + diff --git a/queue-5.15/flexfiles-pnfs-fix-null-checks-on-result-of-ff_layou.patch b/queue-5.15/flexfiles-pnfs-fix-null-checks-on-result-of-ff_layou.patch new file mode 100644 index 0000000000..cc1376fe5a --- /dev/null +++ b/queue-5.15/flexfiles-pnfs-fix-null-checks-on-result-of-ff_layou.patch @@ -0,0 +1,90 @@ +From c0e9847acfd9fea8f0c9bba1b0b2a6f236c770d2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 28 Aug 2025 16:51:00 +0200 +Subject: flexfiles/pNFS: fix NULL checks on result of + ff_layout_choose_ds_for_read + +From: Tigran Mkrtchyan + +[ Upstream commit 5a46d2339a5ae268ede53a221f20433d8ea4f2f9 ] + +Recent commit f06bedfa62d5 ("pNFS/flexfiles: don't attempt pnfs on fatal DS +errors") has changed the error return type of ff_layout_choose_ds_for_read() from +NULL to an error pointer. However, not all code paths have been updated +to match the change. Thus, some non-NULL checks will accept error pointers +as a valid return value. + +Reported-by: Dan Carpenter +Suggested-by: Dan Carpenter +Fixes: f06bedfa62d5 ("pNFS/flexfiles: don't attempt pnfs on fatal DS errors") +Signed-off-by: Tigran Mkrtchyan +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + fs/nfs/flexfilelayout/flexfilelayout.c | 19 ++++++++++++------- + 1 file changed, 12 insertions(+), 7 deletions(-) + +diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c +index 14c7de8fd7812..798e2e32b3fb6 100644 +--- a/fs/nfs/flexfilelayout/flexfilelayout.c ++++ b/fs/nfs/flexfilelayout/flexfilelayout.c +@@ -750,8 +750,11 @@ ff_layout_choose_ds_for_read(struct pnfs_layout_segment *lseg, + continue; + + if (check_device && +- nfs4_test_deviceid_unavailable(&mirror->mirror_ds->id_node)) ++ nfs4_test_deviceid_unavailable(&mirror->mirror_ds->id_node)) { ++ // reinitialize the error state in case if this is the last iteration ++ ds = ERR_PTR(-EINVAL); + continue; ++ } + + *best_idx = idx; + break; +@@ -781,7 +784,7 @@ ff_layout_choose_best_ds_for_read(struct pnfs_layout_segment *lseg, + struct nfs4_pnfs_ds *ds; + + ds = ff_layout_choose_valid_ds_for_read(lseg, start_idx, best_idx); +- if (ds) ++ if (!IS_ERR(ds)) + return ds; + return ff_layout_choose_any_ds_for_read(lseg, start_idx, best_idx); + } +@@ -795,7 +798,7 @@ ff_layout_get_ds_for_read(struct nfs_pageio_descriptor *pgio, + + ds = ff_layout_choose_best_ds_for_read(lseg, pgio->pg_mirror_idx, + best_idx); +- if (ds || !pgio->pg_mirror_idx) ++ if (!IS_ERR(ds) || !pgio->pg_mirror_idx) + return ds; + return ff_layout_choose_best_ds_for_read(lseg, 0, best_idx); + } +@@ -856,7 +859,7 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio, + req->wb_nio = 0; + + ds = ff_layout_get_ds_for_read(pgio, &ds_idx); +- if (!ds) { ++ if (IS_ERR(ds)) { + if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg)) + goto out_mds; + pnfs_generic_pg_cleanup(pgio); +@@ -1066,11 +1069,13 @@ static void ff_layout_resend_pnfs_read(struct nfs_pgio_header *hdr) + { + u32 idx = hdr->pgio_mirror_idx + 1; + u32 new_idx = 0; ++ struct nfs4_pnfs_ds *ds; + +- if (ff_layout_choose_any_ds_for_read(hdr->lseg, idx, &new_idx)) +- ff_layout_send_layouterror(hdr->lseg); +- else ++ ds = ff_layout_choose_any_ds_for_read(hdr->lseg, idx, &new_idx); ++ if (IS_ERR(ds)) + pnfs_error_mark_layout_for_return(hdr->inode, hdr->lseg); ++ else ++ ff_layout_send_layouterror(hdr->lseg); + pnfs_read_resend_pnfs(hdr, new_idx); + } + +-- +2.51.0 + diff --git a/queue-5.15/nfsv4-clear-the-nfs_cap_fs_locations-flag-if-it-is-n.patch b/queue-5.15/nfsv4-clear-the-nfs_cap_fs_locations-flag-if-it-is-n.patch new file mode 100644 index 0000000000..8cc5efa0b0 --- /dev/null +++ b/queue-5.15/nfsv4-clear-the-nfs_cap_fs_locations-flag-if-it-is-n.patch @@ -0,0 +1,38 @@ +From 5252dc5bfdaba42ed6a46ed90f1ee8482aaac970 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 29 Aug 2025 09:07:22 -0700 +Subject: NFSv4: Clear the NFS_CAP_FS_LOCATIONS flag if it is not set + +From: Trond Myklebust + +[ Upstream commit dd5a8621b886b02f8341c5d4ea68eb2c552ebd3e ] + +_nfs4_server_capabilities() is expected to clear any flags that are not +supported by the server. + +Fixes: 8a59bb93b7e3 ("NFSv4 store server support for fs_location attribute") +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + fs/nfs/nfs4proc.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c +index 65dae25d6856a..3d854e2537bc2 100644 +--- a/fs/nfs/nfs4proc.c ++++ b/fs/nfs/nfs4proc.c +@@ -3904,8 +3904,9 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f + res.attr_bitmask[2] &= FATTR4_WORD2_NFS42_MASK; + } + memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask)); +- server->caps &= ~(NFS_CAP_ACLS | NFS_CAP_HARDLINKS | +- NFS_CAP_SYMLINKS| NFS_CAP_SECURITY_LABEL); ++ server->caps &= ++ ~(NFS_CAP_ACLS | NFS_CAP_HARDLINKS | NFS_CAP_SYMLINKS | ++ NFS_CAP_SECURITY_LABEL | NFS_CAP_FS_LOCATIONS); + server->fattr_valid = NFS_ATTR_FATTR_V4; + if (res.attr_bitmask[0] & FATTR4_WORD0_ACL && + res.acl_bitmask & ACL4_SUPPORT_ALLOW_ACL) +-- +2.51.0 + diff --git a/queue-5.15/nfsv4-clear-the-nfs_cap_xattr-flag-if-not-supported-.patch b/queue-5.15/nfsv4-clear-the-nfs_cap_xattr-flag-if-not-supported-.patch new file mode 100644 index 0000000000..7d4b9ffd2b --- /dev/null +++ b/queue-5.15/nfsv4-clear-the-nfs_cap_xattr-flag-if-not-supported-.patch @@ -0,0 +1,35 @@ +From 054e78ccb739afaeef435df4771f6c84f648f395 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 29 Aug 2025 09:15:12 -0700 +Subject: NFSv4: Clear the NFS_CAP_XATTR flag if not supported by the server + +From: Trond Myklebust + +[ Upstream commit 4fb2b677fc1f70ee642c0beecc3cabf226ef5707 ] + +nfs_server_set_fsinfo() shouldn't assume that NFS_CAP_XATTR is unset +on entry to the function. + +Fixes: b78ef845c35d ("NFSv4.2: query the server for extended attribute support") +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + fs/nfs/client.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/fs/nfs/client.c b/fs/nfs/client.c +index 443b67beec376..c29bc0a30dd75 100644 +--- a/fs/nfs/client.c ++++ b/fs/nfs/client.c +@@ -862,6 +862,8 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, + + if (fsinfo->xattr_support) + server->caps |= NFS_CAP_XATTR; ++ else ++ server->caps &= ~NFS_CAP_XATTR; + #endif + } + +-- +2.51.0 + diff --git a/queue-5.15/nfsv4-don-t-clear-capabilities-that-won-t-be-reset.patch b/queue-5.15/nfsv4-don-t-clear-capabilities-that-won-t-be-reset.patch new file mode 100644 index 0000000000..2256541fad --- /dev/null +++ b/queue-5.15/nfsv4-don-t-clear-capabilities-that-won-t-be-reset.patch @@ -0,0 +1,35 @@ +From 8d6f06744284bf3753148c6d93a382a67a599893 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 29 Aug 2025 09:02:16 -0700 +Subject: NFSv4: Don't clear capabilities that won't be reset + +From: Trond Myklebust + +[ Upstream commit 31f1a960ad1a14def94fa0b8c25d62b4c032813f ] + +Don't clear the capabilities that are not going to get reset by the call +to _nfs4_server_capabilities(). + +Reported-by: Scott Haiden +Fixes: b01f21cacde9 ("NFS: Fix the setting of capabilities when automounting a new filesystem") +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + fs/nfs/nfs4proc.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c +index 5976a31b09b02..65dae25d6856a 100644 +--- a/fs/nfs/nfs4proc.c ++++ b/fs/nfs/nfs4proc.c +@@ -3969,7 +3969,6 @@ int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) + }; + int err; + +- nfs_server_set_init_caps(server); + do { + err = nfs4_handle_exception(server, + _nfs4_server_capabilities(server, fhandle), +-- +2.51.0 + diff --git a/queue-5.15/nfsv4-flexfiles-fix-layout-merge-mirror-check.patch b/queue-5.15/nfsv4-flexfiles-fix-layout-merge-mirror-check.patch new file mode 100644 index 0000000000..05a1723af6 --- /dev/null +++ b/queue-5.15/nfsv4-flexfiles-fix-layout-merge-mirror-check.patch @@ -0,0 +1,42 @@ +From a9a6e10ab52eab0c32f84d8333ce29b10014bb89 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 8 Sep 2025 17:35:16 +0000 +Subject: NFSv4/flexfiles: Fix layout merge mirror check. + +From: Jonathan Curley + +[ Upstream commit dd2fa82473453661d12723c46c9f43d9876a7efd ] + +Typo in ff_lseg_match_mirrors makes the diff ineffective. This results +in merge happening all the time. Merge happening all the time is +problematic because it marks lsegs invalid. Marking lsegs invalid +causes all outstanding IO to get restarted with EAGAIN and connections +to get closed. + +Closing connections constantly triggers race conditions in the RDMA +implementation... + +Fixes: 660d1eb22301c ("pNFS/flexfile: Don't merge layout segments if the mirrors don't match") +Signed-off-by: Jonathan Curley +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + fs/nfs/flexfilelayout/flexfilelayout.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c +index 798e2e32b3fb6..24d97d7ba12d5 100644 +--- a/fs/nfs/flexfilelayout/flexfilelayout.c ++++ b/fs/nfs/flexfilelayout/flexfilelayout.c +@@ -270,7 +270,7 @@ ff_lseg_match_mirrors(struct pnfs_layout_segment *l1, + struct pnfs_layout_segment *l2) + { + const struct nfs4_ff_layout_segment *fl1 = FF_LAYOUT_LSEG(l1); +- const struct nfs4_ff_layout_segment *fl2 = FF_LAYOUT_LSEG(l1); ++ const struct nfs4_ff_layout_segment *fl2 = FF_LAYOUT_LSEG(l2); + u32 i; + + if (fl1->mirror_array_cnt != fl2->mirror_array_cnt) +-- +2.51.0 + diff --git a/queue-5.15/s390-cpum_cf-deny-all-sampling-events-by-counter-pmu.patch b/queue-5.15/s390-cpum_cf-deny-all-sampling-events-by-counter-pmu.patch new file mode 100644 index 0000000000..93183f4540 --- /dev/null +++ b/queue-5.15/s390-cpum_cf-deny-all-sampling-events-by-counter-pmu.patch @@ -0,0 +1,50 @@ +From 7205e58278d378ec81325f35aa8604eea113c8ad Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 22 Aug 2025 14:05:57 +0200 +Subject: s390/cpum_cf: Deny all sampling events by counter PMU + +From: Thomas Richter + +[ Upstream commit ce971233242b5391d99442271f3ca096fb49818d ] + +Deny all sampling event by the CPUMF counter facility device driver +and return -ENOENT. This return value is used to try other PMUs. +Up to now events for type PERF_TYPE_HARDWARE were not tested for +sampling and returned later on -EOPNOTSUPP. This ends the search +for alternative PMUs. Change that behavior and try other PMUs +instead. + +Fixes: 613a41b0d16e ("s390/cpum_cf: Reject request for sampling in event initialization") +Acked-by: Sumanth Korikkar +Signed-off-by: Thomas Richter +Signed-off-by: Alexander Gordeev +Signed-off-by: Sasha Levin +--- + arch/s390/kernel/perf_cpum_cf.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c +index d2a2a18b55808..98b8bdbf31e44 100644 +--- a/arch/s390/kernel/perf_cpum_cf.c ++++ b/arch/s390/kernel/perf_cpum_cf.c +@@ -453,8 +453,6 @@ static int __hw_perf_event_init(struct perf_event *event, unsigned int type) + break; + + case PERF_TYPE_HARDWARE: +- if (is_sampling_event(event)) /* No sampling support */ +- return -ENOENT; + ev = attr->config; + /* Count user space (problem-state) only */ + if (!attr->exclude_user && attr->exclude_kernel) { +@@ -541,6 +539,8 @@ static int cpumf_pmu_event_init(struct perf_event *event) + unsigned int type = event->attr.type; + int err; + ++ if (is_sampling_event(event)) /* No sampling support */ ++ return err; + if (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_RAW) + err = __hw_perf_event_init(event, type); + else if (event->pmu->type == type) +-- +2.51.0 + diff --git a/queue-5.15/series b/queue-5.15/series index 60cb2cf7c6..b8de571919 100644 --- a/queue-5.15/series +++ b/queue-5.15/series @@ -7,3 +7,11 @@ media-i2c-imx214-fix-link-frequency-validation.patch net-fix-null-ptr-deref-by-sock_lock_init_class_and_name-and-rmmod.patch tracing-do-not-add-length-to-print-format-in-synthetic-events.patch mm-rmap-reject-hugetlb-folios-in-folio_make_device_exclusive.patch +flexfiles-pnfs-fix-null-checks-on-result-of-ff_layou.patch +nfsv4-don-t-clear-capabilities-that-won-t-be-reset.patch +nfsv4-clear-the-nfs_cap_fs_locations-flag-if-it-is-n.patch +nfsv4-clear-the-nfs_cap_xattr-flag-if-not-supported-.patch +tracing-fix-tracing_marker-may-trigger-page-fault-du.patch +nfsv4-flexfiles-fix-layout-merge-mirror-check.patch +s390-cpum_cf-deny-all-sampling-events-by-counter-pmu.patch +tcp_bpf-call-sk_msg_free-when-tcp_bpf_send_verdict-f.patch diff --git a/queue-5.15/tcp_bpf-call-sk_msg_free-when-tcp_bpf_send_verdict-f.patch b/queue-5.15/tcp_bpf-call-sk_msg_free-when-tcp_bpf_send_verdict-f.patch new file mode 100644 index 0000000000..0628aa362b --- /dev/null +++ b/queue-5.15/tcp_bpf-call-sk_msg_free-when-tcp_bpf_send_verdict-f.patch @@ -0,0 +1,98 @@ +From f24bdf0178052773ae611977c2e6bedd497785c9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 9 Sep 2025 23:26:12 +0000 +Subject: tcp_bpf: Call sk_msg_free() when tcp_bpf_send_verdict() fails to + allocate psock->cork. + +From: Kuniyuki Iwashima + +[ Upstream commit a3967baad4d533dc254c31e0d221e51c8d223d58 ] + +syzbot reported the splat below. [0] + +The repro does the following: + + 1. Load a sk_msg prog that calls bpf_msg_cork_bytes(msg, cork_bytes) + 2. Attach the prog to a SOCKMAP + 3. Add a socket to the SOCKMAP + 4. Activate fault injection + 5. Send data less than cork_bytes + +At 5., the data is carried over to the next sendmsg() as it is +smaller than the cork_bytes specified by bpf_msg_cork_bytes(). + +Then, tcp_bpf_send_verdict() tries to allocate psock->cork to hold +the data, but this fails silently due to fault injection + __GFP_NOWARN. + +If the allocation fails, we need to revert the sk->sk_forward_alloc +change done by sk_msg_alloc(). + +Let's call sk_msg_free() when tcp_bpf_send_verdict fails to allocate +psock->cork. + +The "*copied" also needs to be updated such that a proper error can +be returned to the caller, sendmsg. It fails to allocate psock->cork. +Nothing has been corked so far, so this patch simply sets "*copied" +to 0. + +[0]: +WARNING: net/ipv4/af_inet.c:156 at inet_sock_destruct+0x623/0x730 net/ipv4/af_inet.c:156, CPU#1: syz-executor/5983 +Modules linked in: +CPU: 1 UID: 0 PID: 5983 Comm: syz-executor Not tainted syzkaller #0 PREEMPT(full) +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/12/2025 +RIP: 0010:inet_sock_destruct+0x623/0x730 net/ipv4/af_inet.c:156 +Code: 0f 0b 90 e9 62 fe ff ff e8 7a db b5 f7 90 0f 0b 90 e9 95 fe ff ff e8 6c db b5 f7 90 0f 0b 90 e9 bb fe ff ff e8 5e db b5 f7 90 <0f> 0b 90 e9 e1 fe ff ff 89 f9 80 e1 07 80 c1 03 38 c1 0f 8c 9f fc +RSP: 0018:ffffc90000a08b48 EFLAGS: 00010246 +RAX: ffffffff8a09d0b2 RBX: dffffc0000000000 RCX: ffff888024a23c80 +RDX: 0000000000000100 RSI: 0000000000000fff RDI: 0000000000000000 +RBP: 0000000000000fff R08: ffff88807e07c627 R09: 1ffff1100fc0f8c4 +R10: dffffc0000000000 R11: ffffed100fc0f8c5 R12: ffff88807e07c380 +R13: dffffc0000000000 R14: ffff88807e07c60c R15: 1ffff1100fc0f872 +FS: 00005555604c4500(0000) GS:ffff888125af1000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 00005555604df5c8 CR3: 0000000032b06000 CR4: 00000000003526f0 +Call Trace: + + __sk_destruct+0x86/0x660 net/core/sock.c:2339 + rcu_do_batch kernel/rcu/tree.c:2605 [inline] + rcu_core+0xca8/0x1770 kernel/rcu/tree.c:2861 + handle_softirqs+0x286/0x870 kernel/softirq.c:579 + __do_softirq kernel/softirq.c:613 [inline] + invoke_softirq kernel/softirq.c:453 [inline] + __irq_exit_rcu+0xca/0x1f0 kernel/softirq.c:680 + irq_exit_rcu+0x9/0x30 kernel/softirq.c:696 + instr_sysvec_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1052 [inline] + sysvec_apic_timer_interrupt+0xa6/0xc0 arch/x86/kernel/apic/apic.c:1052 + + +Fixes: 4f738adba30a ("bpf: create tcp_bpf_ulp allowing BPF to monitor socket TX/RX data") +Reported-by: syzbot+4cabd1d2fa917a456db8@syzkaller.appspotmail.com +Closes: https://lore.kernel.org/netdev/68c0b6b5.050a0220.3c6139.0013.GAE@google.com/ +Signed-off-by: Kuniyuki Iwashima +Signed-off-by: Martin KaFai Lau +Link: https://patch.msgid.link/20250909232623.4151337-1-kuniyu@google.com +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp_bpf.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c +index 9e24542251b1c..11cb3a353cc6d 100644 +--- a/net/ipv4/tcp_bpf.c ++++ b/net/ipv4/tcp_bpf.c +@@ -363,8 +363,11 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock, + if (!psock->cork) { + psock->cork = kzalloc(sizeof(*psock->cork), + GFP_ATOMIC | __GFP_NOWARN); +- if (!psock->cork) ++ if (!psock->cork) { ++ sk_msg_free(sk, msg); ++ *copied = 0; + return -ENOMEM; ++ } + } + memcpy(psock->cork, msg, sizeof(*msg)); + return 0; +-- +2.51.0 + diff --git a/queue-5.15/tracing-fix-tracing_marker-may-trigger-page-fault-du.patch b/queue-5.15/tracing-fix-tracing_marker-may-trigger-page-fault-du.patch new file mode 100644 index 0000000000..3d14c75c7a --- /dev/null +++ b/queue-5.15/tracing-fix-tracing_marker-may-trigger-page-fault-du.patch @@ -0,0 +1,89 @@ +From 25247eef337680a5a4853c65b0213f337714c4a6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 19 Aug 2025 10:51:52 +0000 +Subject: tracing: Fix tracing_marker may trigger page fault during + preempt_disable + +From: Luo Gengkun + +[ Upstream commit 3d62ab32df065e4a7797204a918f6489ddb8a237 ] + +Both tracing_mark_write and tracing_mark_raw_write call +__copy_from_user_inatomic during preempt_disable. But in some case, +__copy_from_user_inatomic may trigger page fault, and will call schedule() +subtly. And if a task is migrated to other cpu, the following warning will +be trigger: + if (RB_WARN_ON(cpu_buffer, + !local_read(&cpu_buffer->committing))) + +An example can illustrate this issue: + +process flow CPU +--------------------------------------------------------------------- + +tracing_mark_raw_write(): cpu:0 + ... + ring_buffer_lock_reserve(): cpu:0 + ... + cpu = raw_smp_processor_id() cpu:0 + cpu_buffer = buffer->buffers[cpu] cpu:0 + ... + ... + __copy_from_user_inatomic(): cpu:0 + ... + # page fault + do_mem_abort(): cpu:0 + ... + # Call schedule + schedule() cpu:0 + ... + # the task schedule to cpu1 + __buffer_unlock_commit(): cpu:1 + ... + ring_buffer_unlock_commit(): cpu:1 + ... + cpu = raw_smp_processor_id() cpu:1 + cpu_buffer = buffer->buffers[cpu] cpu:1 + +As shown above, the process will acquire cpuid twice and the return values +are not the same. + +To fix this problem using copy_from_user_nofault instead of +__copy_from_user_inatomic, as the former performs 'access_ok' before +copying. + +Link: https://lore.kernel.org/20250819105152.2766363-1-luogengkun@huaweicloud.com +Fixes: 656c7f0d2d2b ("tracing: Replace kmap with copy_from_user() in trace_marker writing") +Signed-off-by: Luo Gengkun +Reviewed-by: Masami Hiramatsu (Google) +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Sasha Levin +--- + kernel/trace/trace.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c +index 7af8bbc57531c..a6040a707abb7 100644 +--- a/kernel/trace/trace.c ++++ b/kernel/trace/trace.c +@@ -7233,7 +7233,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, + entry = ring_buffer_event_data(event); + entry->ip = _THIS_IP_; + +- len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt); ++ len = copy_from_user_nofault(&entry->buf, ubuf, cnt); + if (len) { + memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE); + cnt = FAULTED_SIZE; +@@ -7308,7 +7308,7 @@ tracing_mark_raw_write(struct file *filp, const char __user *ubuf, + + entry = ring_buffer_event_data(event); + +- len = __copy_from_user_inatomic(&entry->id, ubuf, cnt); ++ len = copy_from_user_nofault(&entry->id, ubuf, cnt); + if (len) { + entry->id = -1; + memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE); +-- +2.51.0 + diff --git a/queue-5.4/nfsv4-don-t-clear-capabilities-that-won-t-be-reset.patch b/queue-5.4/nfsv4-don-t-clear-capabilities-that-won-t-be-reset.patch new file mode 100644 index 0000000000..2636613667 --- /dev/null +++ b/queue-5.4/nfsv4-don-t-clear-capabilities-that-won-t-be-reset.patch @@ -0,0 +1,35 @@ +From 8c34529c68760e3bc93b042aff6f005737b551e4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 29 Aug 2025 09:02:16 -0700 +Subject: NFSv4: Don't clear capabilities that won't be reset + +From: Trond Myklebust + +[ Upstream commit 31f1a960ad1a14def94fa0b8c25d62b4c032813f ] + +Don't clear the capabilities that are not going to get reset by the call +to _nfs4_server_capabilities(). + +Reported-by: Scott Haiden +Fixes: b01f21cacde9 ("NFS: Fix the setting of capabilities when automounting a new filesystem") +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + fs/nfs/nfs4proc.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c +index 5f8de86b27982..f511087d5e1c2 100644 +--- a/fs/nfs/nfs4proc.c ++++ b/fs/nfs/nfs4proc.c +@@ -3885,7 +3885,6 @@ int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) + }; + int err; + +- nfs_server_set_init_caps(server); + do { + err = nfs4_handle_exception(server, + _nfs4_server_capabilities(server, fhandle), +-- +2.51.0 + diff --git a/queue-5.4/s390-cpum_cf-deny-all-sampling-events-by-counter-pmu.patch b/queue-5.4/s390-cpum_cf-deny-all-sampling-events-by-counter-pmu.patch new file mode 100644 index 0000000000..98f8d99a66 --- /dev/null +++ b/queue-5.4/s390-cpum_cf-deny-all-sampling-events-by-counter-pmu.patch @@ -0,0 +1,50 @@ +From b098f8c46e165b32681d7df84e7400c86ad39dba Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 22 Aug 2025 14:05:57 +0200 +Subject: s390/cpum_cf: Deny all sampling events by counter PMU + +From: Thomas Richter + +[ Upstream commit ce971233242b5391d99442271f3ca096fb49818d ] + +Deny all sampling event by the CPUMF counter facility device driver +and return -ENOENT. This return value is used to try other PMUs. +Up to now events for type PERF_TYPE_HARDWARE were not tested for +sampling and returned later on -EOPNOTSUPP. This ends the search +for alternative PMUs. Change that behavior and try other PMUs +instead. + +Fixes: 613a41b0d16e ("s390/cpum_cf: Reject request for sampling in event initialization") +Acked-by: Sumanth Korikkar +Signed-off-by: Thomas Richter +Signed-off-by: Alexander Gordeev +Signed-off-by: Sasha Levin +--- + arch/s390/kernel/perf_cpum_cf.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c +index dddb32e53db8b..c772bee528c07 100644 +--- a/arch/s390/kernel/perf_cpum_cf.c ++++ b/arch/s390/kernel/perf_cpum_cf.c +@@ -218,8 +218,6 @@ static int __hw_perf_event_init(struct perf_event *event, unsigned int type) + break; + + case PERF_TYPE_HARDWARE: +- if (is_sampling_event(event)) /* No sampling support */ +- return -ENOENT; + ev = attr->config; + /* Count user space (problem-state) only */ + if (!attr->exclude_user && attr->exclude_kernel) { +@@ -317,6 +315,8 @@ static int cpumf_pmu_event_init(struct perf_event *event) + unsigned int type = event->attr.type; + int err; + ++ if (is_sampling_event(event)) /* No sampling support */ ++ return err; + if (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_RAW) + err = __hw_perf_event_init(event, type); + else if (event->pmu->type == type) +-- +2.51.0 + diff --git a/queue-5.4/series b/queue-5.4/series index b17c4b4c7b..052f009eb0 100644 --- a/queue-5.4/series +++ b/queue-5.4/series @@ -1,2 +1,5 @@ usb-hub-fix-flushing-of-delayed-work-used-for-post-resume-purposes.patch net-fix-null-ptr-deref-by-sock_lock_init_class_and_name-and-rmmod.patch +nfsv4-don-t-clear-capabilities-that-won-t-be-reset.patch +s390-cpum_cf-deny-all-sampling-events-by-counter-pmu.patch +tcp_bpf-call-sk_msg_free-when-tcp_bpf_send_verdict-f.patch diff --git a/queue-5.4/tcp_bpf-call-sk_msg_free-when-tcp_bpf_send_verdict-f.patch b/queue-5.4/tcp_bpf-call-sk_msg_free-when-tcp_bpf_send_verdict-f.patch new file mode 100644 index 0000000000..330c9afaee --- /dev/null +++ b/queue-5.4/tcp_bpf-call-sk_msg_free-when-tcp_bpf_send_verdict-f.patch @@ -0,0 +1,98 @@ +From 30ffdab6042c93c9106d2ee8a456b0656049b1d6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 9 Sep 2025 23:26:12 +0000 +Subject: tcp_bpf: Call sk_msg_free() when tcp_bpf_send_verdict() fails to + allocate psock->cork. + +From: Kuniyuki Iwashima + +[ Upstream commit a3967baad4d533dc254c31e0d221e51c8d223d58 ] + +syzbot reported the splat below. [0] + +The repro does the following: + + 1. Load a sk_msg prog that calls bpf_msg_cork_bytes(msg, cork_bytes) + 2. Attach the prog to a SOCKMAP + 3. Add a socket to the SOCKMAP + 4. Activate fault injection + 5. Send data less than cork_bytes + +At 5., the data is carried over to the next sendmsg() as it is +smaller than the cork_bytes specified by bpf_msg_cork_bytes(). + +Then, tcp_bpf_send_verdict() tries to allocate psock->cork to hold +the data, but this fails silently due to fault injection + __GFP_NOWARN. + +If the allocation fails, we need to revert the sk->sk_forward_alloc +change done by sk_msg_alloc(). + +Let's call sk_msg_free() when tcp_bpf_send_verdict fails to allocate +psock->cork. + +The "*copied" also needs to be updated such that a proper error can +be returned to the caller, sendmsg. It fails to allocate psock->cork. +Nothing has been corked so far, so this patch simply sets "*copied" +to 0. + +[0]: +WARNING: net/ipv4/af_inet.c:156 at inet_sock_destruct+0x623/0x730 net/ipv4/af_inet.c:156, CPU#1: syz-executor/5983 +Modules linked in: +CPU: 1 UID: 0 PID: 5983 Comm: syz-executor Not tainted syzkaller #0 PREEMPT(full) +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/12/2025 +RIP: 0010:inet_sock_destruct+0x623/0x730 net/ipv4/af_inet.c:156 +Code: 0f 0b 90 e9 62 fe ff ff e8 7a db b5 f7 90 0f 0b 90 e9 95 fe ff ff e8 6c db b5 f7 90 0f 0b 90 e9 bb fe ff ff e8 5e db b5 f7 90 <0f> 0b 90 e9 e1 fe ff ff 89 f9 80 e1 07 80 c1 03 38 c1 0f 8c 9f fc +RSP: 0018:ffffc90000a08b48 EFLAGS: 00010246 +RAX: ffffffff8a09d0b2 RBX: dffffc0000000000 RCX: ffff888024a23c80 +RDX: 0000000000000100 RSI: 0000000000000fff RDI: 0000000000000000 +RBP: 0000000000000fff R08: ffff88807e07c627 R09: 1ffff1100fc0f8c4 +R10: dffffc0000000000 R11: ffffed100fc0f8c5 R12: ffff88807e07c380 +R13: dffffc0000000000 R14: ffff88807e07c60c R15: 1ffff1100fc0f872 +FS: 00005555604c4500(0000) GS:ffff888125af1000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 00005555604df5c8 CR3: 0000000032b06000 CR4: 00000000003526f0 +Call Trace: + + __sk_destruct+0x86/0x660 net/core/sock.c:2339 + rcu_do_batch kernel/rcu/tree.c:2605 [inline] + rcu_core+0xca8/0x1770 kernel/rcu/tree.c:2861 + handle_softirqs+0x286/0x870 kernel/softirq.c:579 + __do_softirq kernel/softirq.c:613 [inline] + invoke_softirq kernel/softirq.c:453 [inline] + __irq_exit_rcu+0xca/0x1f0 kernel/softirq.c:680 + irq_exit_rcu+0x9/0x30 kernel/softirq.c:696 + instr_sysvec_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1052 [inline] + sysvec_apic_timer_interrupt+0xa6/0xc0 arch/x86/kernel/apic/apic.c:1052 + + +Fixes: 4f738adba30a ("bpf: create tcp_bpf_ulp allowing BPF to monitor socket TX/RX data") +Reported-by: syzbot+4cabd1d2fa917a456db8@syzkaller.appspotmail.com +Closes: https://lore.kernel.org/netdev/68c0b6b5.050a0220.3c6139.0013.GAE@google.com/ +Signed-off-by: Kuniyuki Iwashima +Signed-off-by: Martin KaFai Lau +Link: https://patch.msgid.link/20250909232623.4151337-1-kuniyu@google.com +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp_bpf.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c +index 8c1508a2e241a..df0b9edd4e87e 100644 +--- a/net/ipv4/tcp_bpf.c ++++ b/net/ipv4/tcp_bpf.c +@@ -336,8 +336,11 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock, + if (!psock->cork) { + psock->cork = kzalloc(sizeof(*psock->cork), + GFP_ATOMIC | __GFP_NOWARN); +- if (!psock->cork) ++ if (!psock->cork) { ++ sk_msg_free(sk, msg); ++ *copied = 0; + return -ENOMEM; ++ } + } + memcpy(psock->cork, msg, sizeof(*msg)); + return 0; +-- +2.51.0 + diff --git a/queue-6.1/flexfiles-pnfs-fix-null-checks-on-result-of-ff_layou.patch b/queue-6.1/flexfiles-pnfs-fix-null-checks-on-result-of-ff_layou.patch new file mode 100644 index 0000000000..cc0fa88032 --- /dev/null +++ b/queue-6.1/flexfiles-pnfs-fix-null-checks-on-result-of-ff_layou.patch @@ -0,0 +1,90 @@ +From c3d245655de3e0459227cf1220df4d0c3bb6d8d2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 28 Aug 2025 16:51:00 +0200 +Subject: flexfiles/pNFS: fix NULL checks on result of + ff_layout_choose_ds_for_read + +From: Tigran Mkrtchyan + +[ Upstream commit 5a46d2339a5ae268ede53a221f20433d8ea4f2f9 ] + +Recent commit f06bedfa62d5 ("pNFS/flexfiles: don't attempt pnfs on fatal DS +errors") has changed the error return type of ff_layout_choose_ds_for_read() from +NULL to an error pointer. However, not all code paths have been updated +to match the change. Thus, some non-NULL checks will accept error pointers +as a valid return value. + +Reported-by: Dan Carpenter +Suggested-by: Dan Carpenter +Fixes: f06bedfa62d5 ("pNFS/flexfiles: don't attempt pnfs on fatal DS errors") +Signed-off-by: Tigran Mkrtchyan +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + fs/nfs/flexfilelayout/flexfilelayout.c | 19 ++++++++++++------- + 1 file changed, 12 insertions(+), 7 deletions(-) + +diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c +index 5dd16f4ae74d1..21016ee2eafc8 100644 +--- a/fs/nfs/flexfilelayout/flexfilelayout.c ++++ b/fs/nfs/flexfilelayout/flexfilelayout.c +@@ -756,8 +756,11 @@ ff_layout_choose_ds_for_read(struct pnfs_layout_segment *lseg, + continue; + + if (check_device && +- nfs4_test_deviceid_unavailable(&mirror->mirror_ds->id_node)) ++ nfs4_test_deviceid_unavailable(&mirror->mirror_ds->id_node)) { ++ // reinitialize the error state in case if this is the last iteration ++ ds = ERR_PTR(-EINVAL); + continue; ++ } + + *best_idx = idx; + break; +@@ -787,7 +790,7 @@ ff_layout_choose_best_ds_for_read(struct pnfs_layout_segment *lseg, + struct nfs4_pnfs_ds *ds; + + ds = ff_layout_choose_valid_ds_for_read(lseg, start_idx, best_idx); +- if (ds) ++ if (!IS_ERR(ds)) + return ds; + return ff_layout_choose_any_ds_for_read(lseg, start_idx, best_idx); + } +@@ -801,7 +804,7 @@ ff_layout_get_ds_for_read(struct nfs_pageio_descriptor *pgio, + + ds = ff_layout_choose_best_ds_for_read(lseg, pgio->pg_mirror_idx, + best_idx); +- if (ds || !pgio->pg_mirror_idx) ++ if (!IS_ERR(ds) || !pgio->pg_mirror_idx) + return ds; + return ff_layout_choose_best_ds_for_read(lseg, 0, best_idx); + } +@@ -859,7 +862,7 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio, + req->wb_nio = 0; + + ds = ff_layout_get_ds_for_read(pgio, &ds_idx); +- if (!ds) { ++ if (IS_ERR(ds)) { + if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg)) + goto out_mds; + pnfs_generic_pg_cleanup(pgio); +@@ -1063,11 +1066,13 @@ static void ff_layout_resend_pnfs_read(struct nfs_pgio_header *hdr) + { + u32 idx = hdr->pgio_mirror_idx + 1; + u32 new_idx = 0; ++ struct nfs4_pnfs_ds *ds; + +- if (ff_layout_choose_any_ds_for_read(hdr->lseg, idx, &new_idx)) +- ff_layout_send_layouterror(hdr->lseg); +- else ++ ds = ff_layout_choose_any_ds_for_read(hdr->lseg, idx, &new_idx); ++ if (IS_ERR(ds)) + pnfs_error_mark_layout_for_return(hdr->inode, hdr->lseg); ++ else ++ ff_layout_send_layouterror(hdr->lseg); + pnfs_read_resend_pnfs(hdr, new_idx); + } + +-- +2.51.0 + diff --git a/queue-6.1/ftrace-samples-fix-function-size-computation.patch b/queue-6.1/ftrace-samples-fix-function-size-computation.patch new file mode 100644 index 0000000000..5c356d4476 --- /dev/null +++ b/queue-6.1/ftrace-samples-fix-function-size-computation.patch @@ -0,0 +1,38 @@ +From ce5a67141e85b3f99fd28e6c333ac053d9599f67 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 26 Aug 2025 18:16:46 +0200 +Subject: ftrace/samples: Fix function size computation + +From: Vladimir Riabchun + +[ Upstream commit 80d03a40837a9b26750a25122b906c052cc846c9 ] + +In my_tramp1 function .size directive was placed above +ASM_RET instruction, leading to a wrong function size. + +Link: https://lore.kernel.org/aK3d7vxNcO52kEmg@vova-pc +Fixes: 9d907f1ae80b ("samples/ftrace: Fix asm function ELF annotations") +Signed-off-by: Vladimir Riabchun +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Sasha Levin +--- + samples/ftrace/ftrace-direct-modify.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/samples/ftrace/ftrace-direct-modify.c b/samples/ftrace/ftrace-direct-modify.c +index 39146fa83e20b..cbef49649ce01 100644 +--- a/samples/ftrace/ftrace-direct-modify.c ++++ b/samples/ftrace/ftrace-direct-modify.c +@@ -36,8 +36,8 @@ asm ( + " movq %rsp, %rbp\n" + " call my_direct_func1\n" + " leave\n" +-" .size my_tramp1, .-my_tramp1\n" + ASM_RET ++" .size my_tramp1, .-my_tramp1\n" + + " .type my_tramp2, @function\n" + " .globl my_tramp2\n" +-- +2.51.0 + diff --git a/queue-6.1/nfsv4-clear-the-nfs_cap_fs_locations-flag-if-it-is-n.patch b/queue-6.1/nfsv4-clear-the-nfs_cap_fs_locations-flag-if-it-is-n.patch new file mode 100644 index 0000000000..61661078af --- /dev/null +++ b/queue-6.1/nfsv4-clear-the-nfs_cap_fs_locations-flag-if-it-is-n.patch @@ -0,0 +1,38 @@ +From c3bd7c5f236389b8611375b51ccda596a45694e6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 29 Aug 2025 09:07:22 -0700 +Subject: NFSv4: Clear the NFS_CAP_FS_LOCATIONS flag if it is not set + +From: Trond Myklebust + +[ Upstream commit dd5a8621b886b02f8341c5d4ea68eb2c552ebd3e ] + +_nfs4_server_capabilities() is expected to clear any flags that are not +supported by the server. + +Fixes: 8a59bb93b7e3 ("NFSv4 store server support for fs_location attribute") +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + fs/nfs/nfs4proc.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c +index 29f189dc334fd..f8a91d15982dc 100644 +--- a/fs/nfs/nfs4proc.c ++++ b/fs/nfs/nfs4proc.c +@@ -3888,8 +3888,9 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f + res.attr_bitmask[2] &= FATTR4_WORD2_NFS42_MASK; + } + memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask)); +- server->caps &= ~(NFS_CAP_ACLS | NFS_CAP_HARDLINKS | +- NFS_CAP_SYMLINKS| NFS_CAP_SECURITY_LABEL); ++ server->caps &= ++ ~(NFS_CAP_ACLS | NFS_CAP_HARDLINKS | NFS_CAP_SYMLINKS | ++ NFS_CAP_SECURITY_LABEL | NFS_CAP_FS_LOCATIONS); + server->fattr_valid = NFS_ATTR_FATTR_V4; + if (res.attr_bitmask[0] & FATTR4_WORD0_ACL && + res.acl_bitmask & ACL4_SUPPORT_ALLOW_ACL) +-- +2.51.0 + diff --git a/queue-6.1/nfsv4-clear-the-nfs_cap_xattr-flag-if-not-supported-.patch b/queue-6.1/nfsv4-clear-the-nfs_cap_xattr-flag-if-not-supported-.patch new file mode 100644 index 0000000000..85d76dc9c2 --- /dev/null +++ b/queue-6.1/nfsv4-clear-the-nfs_cap_xattr-flag-if-not-supported-.patch @@ -0,0 +1,35 @@ +From b49d2bab2c74f0c6dc48d7b8e0636710cf731e77 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 29 Aug 2025 09:15:12 -0700 +Subject: NFSv4: Clear the NFS_CAP_XATTR flag if not supported by the server + +From: Trond Myklebust + +[ Upstream commit 4fb2b677fc1f70ee642c0beecc3cabf226ef5707 ] + +nfs_server_set_fsinfo() shouldn't assume that NFS_CAP_XATTR is unset +on entry to the function. + +Fixes: b78ef845c35d ("NFSv4.2: query the server for extended attribute support") +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + fs/nfs/client.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/fs/nfs/client.c b/fs/nfs/client.c +index 36025097d21b8..2ca04dcb192aa 100644 +--- a/fs/nfs/client.c ++++ b/fs/nfs/client.c +@@ -859,6 +859,8 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, + + if (fsinfo->xattr_support) + server->caps |= NFS_CAP_XATTR; ++ else ++ server->caps &= ~NFS_CAP_XATTR; + #endif + } + +-- +2.51.0 + diff --git a/queue-6.1/nfsv4-don-t-clear-capabilities-that-won-t-be-reset.patch b/queue-6.1/nfsv4-don-t-clear-capabilities-that-won-t-be-reset.patch new file mode 100644 index 0000000000..4da9b1b7e0 --- /dev/null +++ b/queue-6.1/nfsv4-don-t-clear-capabilities-that-won-t-be-reset.patch @@ -0,0 +1,35 @@ +From 4668e4b88a6c72379545c59868bdf61bd8295077 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 29 Aug 2025 09:02:16 -0700 +Subject: NFSv4: Don't clear capabilities that won't be reset + +From: Trond Myklebust + +[ Upstream commit 31f1a960ad1a14def94fa0b8c25d62b4c032813f ] + +Don't clear the capabilities that are not going to get reset by the call +to _nfs4_server_capabilities(). + +Reported-by: Scott Haiden +Fixes: b01f21cacde9 ("NFS: Fix the setting of capabilities when automounting a new filesystem") +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + fs/nfs/nfs4proc.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c +index 71e96fddc6cb5..29f189dc334fd 100644 +--- a/fs/nfs/nfs4proc.c ++++ b/fs/nfs/nfs4proc.c +@@ -3957,7 +3957,6 @@ int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) + }; + int err; + +- nfs_server_set_init_caps(server); + do { + err = nfs4_handle_exception(server, + _nfs4_server_capabilities(server, fhandle), +-- +2.51.0 + diff --git a/queue-6.1/nfsv4-flexfiles-fix-layout-merge-mirror-check.patch b/queue-6.1/nfsv4-flexfiles-fix-layout-merge-mirror-check.patch new file mode 100644 index 0000000000..1bd05e403c --- /dev/null +++ b/queue-6.1/nfsv4-flexfiles-fix-layout-merge-mirror-check.patch @@ -0,0 +1,42 @@ +From 4509dbbf52d7baba86e4dfe58cd31f00b4a458c4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 8 Sep 2025 17:35:16 +0000 +Subject: NFSv4/flexfiles: Fix layout merge mirror check. + +From: Jonathan Curley + +[ Upstream commit dd2fa82473453661d12723c46c9f43d9876a7efd ] + +Typo in ff_lseg_match_mirrors makes the diff ineffective. This results +in merge happening all the time. Merge happening all the time is +problematic because it marks lsegs invalid. Marking lsegs invalid +causes all outstanding IO to get restarted with EAGAIN and connections +to get closed. + +Closing connections constantly triggers race conditions in the RDMA +implementation... + +Fixes: 660d1eb22301c ("pNFS/flexfile: Don't merge layout segments if the mirrors don't match") +Signed-off-by: Jonathan Curley +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + fs/nfs/flexfilelayout/flexfilelayout.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c +index 21016ee2eafc8..e84ac71bdc18f 100644 +--- a/fs/nfs/flexfilelayout/flexfilelayout.c ++++ b/fs/nfs/flexfilelayout/flexfilelayout.c +@@ -276,7 +276,7 @@ ff_lseg_match_mirrors(struct pnfs_layout_segment *l1, + struct pnfs_layout_segment *l2) + { + const struct nfs4_ff_layout_segment *fl1 = FF_LAYOUT_LSEG(l1); +- const struct nfs4_ff_layout_segment *fl2 = FF_LAYOUT_LSEG(l1); ++ const struct nfs4_ff_layout_segment *fl2 = FF_LAYOUT_LSEG(l2); + u32 i; + + if (fl1->mirror_array_cnt != fl2->mirror_array_cnt) +-- +2.51.0 + diff --git a/queue-6.1/proc-fix-type-confusion-in-pde_set_flags.patch b/queue-6.1/proc-fix-type-confusion-in-pde_set_flags.patch new file mode 100644 index 0000000000..fde09d5db6 --- /dev/null +++ b/queue-6.1/proc-fix-type-confusion-in-pde_set_flags.patch @@ -0,0 +1,56 @@ +From 4d6ab5eaad09c1f2b7af90f97c50d32b3c14243d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 4 Sep 2025 21:57:15 +0800 +Subject: proc: fix type confusion in pde_set_flags() + +From: wangzijie + +[ Upstream commit 0ce9398aa0830f15f92bbed73853f9861c3e74ff ] + +Commit 2ce3d282bd50 ("proc: fix missing pde_set_flags() for net proc +files") missed a key part in the definition of proc_dir_entry: + +union { + const struct proc_ops *proc_ops; + const struct file_operations *proc_dir_ops; +}; + +So dereference of ->proc_ops assumes it is a proc_ops structure results in +type confusion and make NULL check for 'proc_ops' not work for proc dir. + +Add !S_ISDIR(dp->mode) test before calling pde_set_flags() to fix it. + +Link: https://lkml.kernel.org/r/20250904135715.3972782-1-wangzijie1@honor.com +Fixes: 2ce3d282bd50 ("proc: fix missing pde_set_flags() for net proc files") +Signed-off-by: wangzijie +Reported-by: Brad Spengler +Closes: https://lore.kernel.org/all/20250903065758.3678537-1-wangzijie1@honor.com/ +Cc: Alexey Dobriyan +Cc: Al Viro +Cc: Christian Brauner +Cc: Jiri Slaby +Cc: Stefano Brivio +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Sasha Levin +--- + fs/proc/generic.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/fs/proc/generic.c b/fs/proc/generic.c +index c96c884208a98..21820c729b4bd 100644 +--- a/fs/proc/generic.c ++++ b/fs/proc/generic.c +@@ -389,7 +389,8 @@ struct proc_dir_entry *proc_register(struct proc_dir_entry *dir, + if (proc_alloc_inum(&dp->low_ino)) + goto out_free_entry; + +- pde_set_flags(dp); ++ if (!S_ISDIR(dp->mode)) ++ pde_set_flags(dp); + + write_lock(&proc_subdir_lock); + dp->parent = dir; +-- +2.51.0 + diff --git a/queue-6.1/s390-cpum_cf-deny-all-sampling-events-by-counter-pmu.patch b/queue-6.1/s390-cpum_cf-deny-all-sampling-events-by-counter-pmu.patch new file mode 100644 index 0000000000..da95d2afc2 --- /dev/null +++ b/queue-6.1/s390-cpum_cf-deny-all-sampling-events-by-counter-pmu.patch @@ -0,0 +1,50 @@ +From 57a7149d7598869240f478efb3d14755d8a30b0b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 22 Aug 2025 14:05:57 +0200 +Subject: s390/cpum_cf: Deny all sampling events by counter PMU + +From: Thomas Richter + +[ Upstream commit ce971233242b5391d99442271f3ca096fb49818d ] + +Deny all sampling event by the CPUMF counter facility device driver +and return -ENOENT. This return value is used to try other PMUs. +Up to now events for type PERF_TYPE_HARDWARE were not tested for +sampling and returned later on -EOPNOTSUPP. This ends the search +for alternative PMUs. Change that behavior and try other PMUs +instead. + +Fixes: 613a41b0d16e ("s390/cpum_cf: Reject request for sampling in event initialization") +Acked-by: Sumanth Korikkar +Signed-off-by: Thomas Richter +Signed-off-by: Alexander Gordeev +Signed-off-by: Sasha Levin +--- + arch/s390/kernel/perf_cpum_cf.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c +index 28fa80fd69fa0..77d36a1981325 100644 +--- a/arch/s390/kernel/perf_cpum_cf.c ++++ b/arch/s390/kernel/perf_cpum_cf.c +@@ -459,8 +459,6 @@ static int __hw_perf_event_init(struct perf_event *event, unsigned int type) + break; + + case PERF_TYPE_HARDWARE: +- if (is_sampling_event(event)) /* No sampling support */ +- return -ENOENT; + ev = attr->config; + if (!attr->exclude_user && attr->exclude_kernel) { + /* +@@ -554,6 +552,8 @@ static int cpumf_pmu_event_init(struct perf_event *event) + unsigned int type = event->attr.type; + int err; + ++ if (is_sampling_event(event)) /* No sampling support */ ++ return err; + if (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_RAW) + err = __hw_perf_event_init(event, type); + else if (event->pmu->type == type) +-- +2.51.0 + diff --git a/queue-6.1/series b/queue-6.1/series index 791f1eb62b..d7cb40f52e 100644 --- a/queue-6.1/series +++ b/queue-6.1/series @@ -6,3 +6,14 @@ media-i2c-imx214-fix-link-frequency-validation.patch net-fix-null-ptr-deref-by-sock_lock_init_class_and_name-and-rmmod.patch asoc-q6apm-dai-schedule-all-available-frames-to-avoid-dsp-under-runs.patch tracing-do-not-add-length-to-print-format-in-synthetic-events.patch +flexfiles-pnfs-fix-null-checks-on-result-of-ff_layou.patch +nfsv4-don-t-clear-capabilities-that-won-t-be-reset.patch +nfsv4-clear-the-nfs_cap_fs_locations-flag-if-it-is-n.patch +nfsv4-clear-the-nfs_cap_xattr-flag-if-not-supported-.patch +tracing-fix-tracing_marker-may-trigger-page-fault-du.patch +ftrace-samples-fix-function-size-computation.patch +nfsv4-flexfiles-fix-layout-merge-mirror-check.patch +tracing-silence-warning-when-chunk-allocation-fails-.patch +s390-cpum_cf-deny-all-sampling-events-by-counter-pmu.patch +tcp_bpf-call-sk_msg_free-when-tcp_bpf_send_verdict-f.patch +proc-fix-type-confusion-in-pde_set_flags.patch diff --git a/queue-6.1/tcp_bpf-call-sk_msg_free-when-tcp_bpf_send_verdict-f.patch b/queue-6.1/tcp_bpf-call-sk_msg_free-when-tcp_bpf_send_verdict-f.patch new file mode 100644 index 0000000000..f3e2440b89 --- /dev/null +++ b/queue-6.1/tcp_bpf-call-sk_msg_free-when-tcp_bpf_send_verdict-f.patch @@ -0,0 +1,98 @@ +From fd1d4043d5a3e3f8124ab250535a7e87ade4ac83 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 9 Sep 2025 23:26:12 +0000 +Subject: tcp_bpf: Call sk_msg_free() when tcp_bpf_send_verdict() fails to + allocate psock->cork. + +From: Kuniyuki Iwashima + +[ Upstream commit a3967baad4d533dc254c31e0d221e51c8d223d58 ] + +syzbot reported the splat below. [0] + +The repro does the following: + + 1. Load a sk_msg prog that calls bpf_msg_cork_bytes(msg, cork_bytes) + 2. Attach the prog to a SOCKMAP + 3. Add a socket to the SOCKMAP + 4. Activate fault injection + 5. Send data less than cork_bytes + +At 5., the data is carried over to the next sendmsg() as it is +smaller than the cork_bytes specified by bpf_msg_cork_bytes(). + +Then, tcp_bpf_send_verdict() tries to allocate psock->cork to hold +the data, but this fails silently due to fault injection + __GFP_NOWARN. + +If the allocation fails, we need to revert the sk->sk_forward_alloc +change done by sk_msg_alloc(). + +Let's call sk_msg_free() when tcp_bpf_send_verdict fails to allocate +psock->cork. + +The "*copied" also needs to be updated such that a proper error can +be returned to the caller, sendmsg. It fails to allocate psock->cork. +Nothing has been corked so far, so this patch simply sets "*copied" +to 0. + +[0]: +WARNING: net/ipv4/af_inet.c:156 at inet_sock_destruct+0x623/0x730 net/ipv4/af_inet.c:156, CPU#1: syz-executor/5983 +Modules linked in: +CPU: 1 UID: 0 PID: 5983 Comm: syz-executor Not tainted syzkaller #0 PREEMPT(full) +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/12/2025 +RIP: 0010:inet_sock_destruct+0x623/0x730 net/ipv4/af_inet.c:156 +Code: 0f 0b 90 e9 62 fe ff ff e8 7a db b5 f7 90 0f 0b 90 e9 95 fe ff ff e8 6c db b5 f7 90 0f 0b 90 e9 bb fe ff ff e8 5e db b5 f7 90 <0f> 0b 90 e9 e1 fe ff ff 89 f9 80 e1 07 80 c1 03 38 c1 0f 8c 9f fc +RSP: 0018:ffffc90000a08b48 EFLAGS: 00010246 +RAX: ffffffff8a09d0b2 RBX: dffffc0000000000 RCX: ffff888024a23c80 +RDX: 0000000000000100 RSI: 0000000000000fff RDI: 0000000000000000 +RBP: 0000000000000fff R08: ffff88807e07c627 R09: 1ffff1100fc0f8c4 +R10: dffffc0000000000 R11: ffffed100fc0f8c5 R12: ffff88807e07c380 +R13: dffffc0000000000 R14: ffff88807e07c60c R15: 1ffff1100fc0f872 +FS: 00005555604c4500(0000) GS:ffff888125af1000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 00005555604df5c8 CR3: 0000000032b06000 CR4: 00000000003526f0 +Call Trace: + + __sk_destruct+0x86/0x660 net/core/sock.c:2339 + rcu_do_batch kernel/rcu/tree.c:2605 [inline] + rcu_core+0xca8/0x1770 kernel/rcu/tree.c:2861 + handle_softirqs+0x286/0x870 kernel/softirq.c:579 + __do_softirq kernel/softirq.c:613 [inline] + invoke_softirq kernel/softirq.c:453 [inline] + __irq_exit_rcu+0xca/0x1f0 kernel/softirq.c:680 + irq_exit_rcu+0x9/0x30 kernel/softirq.c:696 + instr_sysvec_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1052 [inline] + sysvec_apic_timer_interrupt+0xa6/0xc0 arch/x86/kernel/apic/apic.c:1052 + + +Fixes: 4f738adba30a ("bpf: create tcp_bpf_ulp allowing BPF to monitor socket TX/RX data") +Reported-by: syzbot+4cabd1d2fa917a456db8@syzkaller.appspotmail.com +Closes: https://lore.kernel.org/netdev/68c0b6b5.050a0220.3c6139.0013.GAE@google.com/ +Signed-off-by: Kuniyuki Iwashima +Signed-off-by: Martin KaFai Lau +Link: https://patch.msgid.link/20250909232623.4151337-1-kuniyu@google.com +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp_bpf.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c +index bf10fa3c37b76..1727ac094e106 100644 +--- a/net/ipv4/tcp_bpf.c ++++ b/net/ipv4/tcp_bpf.c +@@ -403,8 +403,11 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock, + if (!psock->cork) { + psock->cork = kzalloc(sizeof(*psock->cork), + GFP_ATOMIC | __GFP_NOWARN); +- if (!psock->cork) ++ if (!psock->cork) { ++ sk_msg_free(sk, msg); ++ *copied = 0; + return -ENOMEM; ++ } + } + memcpy(psock->cork, msg, sizeof(*msg)); + return 0; +-- +2.51.0 + diff --git a/queue-6.1/tracing-fix-tracing_marker-may-trigger-page-fault-du.patch b/queue-6.1/tracing-fix-tracing_marker-may-trigger-page-fault-du.patch new file mode 100644 index 0000000000..ea652bc486 --- /dev/null +++ b/queue-6.1/tracing-fix-tracing_marker-may-trigger-page-fault-du.patch @@ -0,0 +1,89 @@ +From c88e3b767953116d83f60f1a465b0c6eb841dfd6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 19 Aug 2025 10:51:52 +0000 +Subject: tracing: Fix tracing_marker may trigger page fault during + preempt_disable + +From: Luo Gengkun + +[ Upstream commit 3d62ab32df065e4a7797204a918f6489ddb8a237 ] + +Both tracing_mark_write and tracing_mark_raw_write call +__copy_from_user_inatomic during preempt_disable. But in some case, +__copy_from_user_inatomic may trigger page fault, and will call schedule() +subtly. And if a task is migrated to other cpu, the following warning will +be trigger: + if (RB_WARN_ON(cpu_buffer, + !local_read(&cpu_buffer->committing))) + +An example can illustrate this issue: + +process flow CPU +--------------------------------------------------------------------- + +tracing_mark_raw_write(): cpu:0 + ... + ring_buffer_lock_reserve(): cpu:0 + ... + cpu = raw_smp_processor_id() cpu:0 + cpu_buffer = buffer->buffers[cpu] cpu:0 + ... + ... + __copy_from_user_inatomic(): cpu:0 + ... + # page fault + do_mem_abort(): cpu:0 + ... + # Call schedule + schedule() cpu:0 + ... + # the task schedule to cpu1 + __buffer_unlock_commit(): cpu:1 + ... + ring_buffer_unlock_commit(): cpu:1 + ... + cpu = raw_smp_processor_id() cpu:1 + cpu_buffer = buffer->buffers[cpu] cpu:1 + +As shown above, the process will acquire cpuid twice and the return values +are not the same. + +To fix this problem using copy_from_user_nofault instead of +__copy_from_user_inatomic, as the former performs 'access_ok' before +copying. + +Link: https://lore.kernel.org/20250819105152.2766363-1-luogengkun@huaweicloud.com +Fixes: 656c7f0d2d2b ("tracing: Replace kmap with copy_from_user() in trace_marker writing") +Signed-off-by: Luo Gengkun +Reviewed-by: Masami Hiramatsu (Google) +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Sasha Levin +--- + kernel/trace/trace.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c +index 7e8ab09d98cc7..4a4aaca672b8f 100644 +--- a/kernel/trace/trace.c ++++ b/kernel/trace/trace.c +@@ -7253,7 +7253,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, + entry = ring_buffer_event_data(event); + entry->ip = _THIS_IP_; + +- len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt); ++ len = copy_from_user_nofault(&entry->buf, ubuf, cnt); + if (len) { + memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE); + cnt = FAULTED_SIZE; +@@ -7328,7 +7328,7 @@ tracing_mark_raw_write(struct file *filp, const char __user *ubuf, + + entry = ring_buffer_event_data(event); + +- len = __copy_from_user_inatomic(&entry->id, ubuf, cnt); ++ len = copy_from_user_nofault(&entry->id, ubuf, cnt); + if (len) { + entry->id = -1; + memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE); +-- +2.51.0 + diff --git a/queue-6.1/tracing-silence-warning-when-chunk-allocation-fails-.patch b/queue-6.1/tracing-silence-warning-when-chunk-allocation-fails-.patch new file mode 100644 index 0000000000..a0f3bbe59a --- /dev/null +++ b/queue-6.1/tracing-silence-warning-when-chunk-allocation-fails-.patch @@ -0,0 +1,97 @@ +From 1ae74f33b4de3575e1fd6f7a8ec5ef99ab193c91 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 8 Sep 2025 02:46:58 +0000 +Subject: tracing: Silence warning when chunk allocation fails in + trace_pid_write + +From: Pu Lehui + +[ Upstream commit cd4453c5e983cf1fd5757e9acb915adb1e4602b6 ] + +Syzkaller trigger a fault injection warning: + +WARNING: CPU: 1 PID: 12326 at tracepoint_add_func+0xbfc/0xeb0 +Modules linked in: +CPU: 1 UID: 0 PID: 12326 Comm: syz.6.10325 Tainted: G U 6.14.0-rc5-syzkaller #0 +Tainted: [U]=USER +Hardware name: Google Compute Engine/Google Compute Engine +RIP: 0010:tracepoint_add_func+0xbfc/0xeb0 kernel/tracepoint.c:294 +Code: 09 fe ff 90 0f 0b 90 0f b6 74 24 43 31 ff 41 bc ea ff ff ff +RSP: 0018:ffffc9000414fb48 EFLAGS: 00010283 +RAX: 00000000000012a1 RBX: ffffffff8e240ae0 RCX: ffffc90014b78000 +RDX: 0000000000080000 RSI: ffffffff81bbd78b RDI: 0000000000000001 +RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000000 +R10: 0000000000000001 R11: 0000000000000001 R12: ffffffffffffffef +R13: 0000000000000000 R14: dffffc0000000000 R15: ffffffff81c264f0 +FS: 00007f27217f66c0(0000) GS:ffff8880b8700000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 0000001b2e80dff8 CR3: 00000000268f8000 CR4: 00000000003526f0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +Call Trace: + + tracepoint_probe_register_prio+0xc0/0x110 kernel/tracepoint.c:464 + register_trace_prio_sched_switch include/trace/events/sched.h:222 [inline] + register_pid_events kernel/trace/trace_events.c:2354 [inline] + event_pid_write.isra.0+0x439/0x7a0 kernel/trace/trace_events.c:2425 + vfs_write+0x24c/0x1150 fs/read_write.c:677 + ksys_write+0x12b/0x250 fs/read_write.c:731 + do_syscall_x64 arch/x86/entry/common.c:52 [inline] + do_syscall_64+0xcd/0x250 arch/x86/entry/common.c:83 + entry_SYSCALL_64_after_hwframe+0x77/0x7f + +We can reproduce the warning by following the steps below: +1. echo 8 >> set_event_notrace_pid. Let tr->filtered_pids owns one pid + and register sched_switch tracepoint. +2. echo ' ' >> set_event_pid, and perform fault injection during chunk + allocation of trace_pid_list_alloc. Let pid_list with no pid and +assign to tr->filtered_pids. +3. echo ' ' >> set_event_pid. Let pid_list is NULL and assign to + tr->filtered_pids. +4. echo 9 >> set_event_pid, will trigger the double register + sched_switch tracepoint warning. + +The reason is that syzkaller injects a fault into the chunk allocation +in trace_pid_list_alloc, causing a failure in trace_pid_list_set, which +may trigger double register of the same tracepoint. This only occurs +when the system is about to crash, but to suppress this warning, let's +add failure handling logic to trace_pid_list_set. + +Link: https://lore.kernel.org/20250908024658.2390398-1-pulehui@huaweicloud.com +Fixes: 8d6e90983ade ("tracing: Create a sparse bitmask for pid filtering") +Reported-by: syzbot+161412ccaeff20ce4dde@syzkaller.appspotmail.com +Closes: https://lore.kernel.org/all/67cb890e.050a0220.d8275.022e.GAE@google.com +Signed-off-by: Pu Lehui +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Sasha Levin +--- + kernel/trace/trace.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c +index 4a4aaca672b8f..9795fc7daeb6b 100644 +--- a/kernel/trace/trace.c ++++ b/kernel/trace/trace.c +@@ -710,7 +710,10 @@ int trace_pid_write(struct trace_pid_list *filtered_pids, + /* copy the current bits to the new max */ + ret = trace_pid_list_first(filtered_pids, &pid); + while (!ret) { +- trace_pid_list_set(pid_list, pid); ++ ret = trace_pid_list_set(pid_list, pid); ++ if (ret < 0) ++ goto out; ++ + ret = trace_pid_list_next(filtered_pids, pid + 1, &pid); + nr_pids++; + } +@@ -747,6 +750,7 @@ int trace_pid_write(struct trace_pid_list *filtered_pids, + trace_parser_clear(&parser); + ret = 0; + } ++ out: + trace_parser_put(&parser); + + if (ret < 0) { +-- +2.51.0 + diff --git a/queue-6.12/alsa-hda-realtek-fix-built-in-mic-assignment-on-asus.patch b/queue-6.12/alsa-hda-realtek-fix-built-in-mic-assignment-on-asus.patch new file mode 100644 index 0000000000..979e0eb9a0 --- /dev/null +++ b/queue-6.12/alsa-hda-realtek-fix-built-in-mic-assignment-on-asus.patch @@ -0,0 +1,44 @@ +From 27e3d18811a8ac65b2acd2d1b4ec10315703591a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 24 Mar 2025 16:32:32 +0100 +Subject: ALSA: hda/realtek: Fix built-in mic assignment on ASUS VivoBook + X515UA + +From: Takashi Iwai + +[ Upstream commit 829ee558f3527fd602c6e2e9f270959d1de09fe0 ] + +ASUS VivoBook X515UA with PCI SSID 1043:106f had a default quirk +pickup via pin table that applies ALC256_FIXUP_ASUS_MIC, but this adds +a bogus built-in mic pin 0x13 enabled. This was no big problem +because the pin 0x13 was assigned as the secondary mic, but the recent +fix made the entries sorted, hence this bogus pin appeared now as the +primary input and it broke. + +For fixing the bug, put the right quirk entry for this device pointing +to ALC256_FIXUP_ASUS_MIC_NO_PRESENCE. + +Fixes: 3b4309546b48 ("ALSA: hda: Fix headset detection failure due to unstable sort") +Link: https://bugzilla.kernel.org/show_bug.cgi?id=219897 +Link: https://patch.msgid.link/20250324153233.21195-1-tiwai@suse.de +Signed-off-by: Takashi Iwai +Signed-off-by: Sasha Levin +--- + sound/pci/hda/patch_realtek.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c +index a28a59926adad..eb0e404c17841 100644 +--- a/sound/pci/hda/patch_realtek.c ++++ b/sound/pci/hda/patch_realtek.c +@@ -10857,6 +10857,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { + SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300), + SND_PCI_QUIRK(0x1043, 0x1054, "ASUS G614FH/FM/FP", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), ++ SND_PCI_QUIRK(0x1043, 0x106f, "ASUS VivoBook X515UA", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1043, 0x1074, "ASUS G614PH/PM/PP", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x1043, 0x10a1, "ASUS UX391UA", ALC294_FIXUP_ASUS_SPK), + SND_PCI_QUIRK(0x1043, 0x10a4, "ASUS TP3407SA", ALC287_FIXUP_TAS2781_I2C), +-- +2.51.0 + diff --git a/queue-6.12/bpf-allow-fall-back-to-interpreter-for-programs-with.patch b/queue-6.12/bpf-allow-fall-back-to-interpreter-for-programs-with.patch new file mode 100644 index 0000000000..27beee2c92 --- /dev/null +++ b/queue-6.12/bpf-allow-fall-back-to-interpreter-for-programs-with.patch @@ -0,0 +1,110 @@ +From d9380da850a6ea2c2e8116d4f692e48a402a9a7a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 9 Sep 2025 22:46:14 +0800 +Subject: bpf: Allow fall back to interpreter for programs with stack size <= + 512 + +From: KaFai Wan + +[ Upstream commit df0cb5cb50bd54d3cd4d0d83417ceec6a66404aa ] + +OpenWRT users reported regression on ARMv6 devices after updating to latest +HEAD, where tcpdump filter: + +tcpdump "not ether host 3c37121a2b3c and not ether host 184ecbca2a3a \ +and not ether host 14130b4d3f47 and not ether host f0f61cf440b7 \ +and not ether host a84b4dedf471 and not ether host d022be17e1d7 \ +and not ether host 5c497967208b and not ether host 706655784d5b" + +fails with warning: "Kernel filter failed: No error information" +when using config: + # CONFIG_BPF_JIT_ALWAYS_ON is not set + CONFIG_BPF_JIT_DEFAULT_ON=y + +The issue arises because commits: +1. "bpf: Fix array bounds error with may_goto" changed default runtime to + __bpf_prog_ret0_warn when jit_requested = 1 +2. "bpf: Avoid __bpf_prog_ret0_warn when jit fails" returns error when + jit_requested = 1 but jit fails + +This change restores interpreter fallback capability for BPF programs with +stack size <= 512 bytes when jit fails. + +Reported-by: Felix Fietkau +Closes: https://lore.kernel.org/bpf/2e267b4b-0540-45d8-9310-e127bf95fc63@nbd.name/ +Fixes: 6ebc5030e0c5 ("bpf: Fix array bounds error with may_goto") +Signed-off-by: KaFai Wan +Acked-by: Eduard Zingerman +Link: https://lore.kernel.org/r/20250909144614.2991253-1-kafai.wan@linux.dev +Signed-off-by: Alexei Starovoitov +Signed-off-by: Sasha Levin +--- + kernel/bpf/core.c | 16 +++++++++------- + 1 file changed, 9 insertions(+), 7 deletions(-) + +diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c +index 6f91e3a123e55..9380e0fd5e4af 100644 +--- a/kernel/bpf/core.c ++++ b/kernel/bpf/core.c +@@ -2299,8 +2299,7 @@ static unsigned int __bpf_prog_ret0_warn(const void *ctx, + const struct bpf_insn *insn) + { + /* If this handler ever gets executed, then BPF_JIT_ALWAYS_ON +- * is not working properly, or interpreter is being used when +- * prog->jit_requested is not 0, so warn about it! ++ * is not working properly, so warn about it! + */ + WARN_ON_ONCE(1); + return 0; +@@ -2401,8 +2400,9 @@ static int bpf_check_tail_call(const struct bpf_prog *fp) + return ret; + } + +-static void bpf_prog_select_func(struct bpf_prog *fp) ++static bool bpf_prog_select_interpreter(struct bpf_prog *fp) + { ++ bool select_interpreter = false; + #ifndef CONFIG_BPF_JIT_ALWAYS_ON + u32 stack_depth = max_t(u32, fp->aux->stack_depth, 1); + u32 idx = (round_up(stack_depth, 32) / 32) - 1; +@@ -2411,15 +2411,16 @@ static void bpf_prog_select_func(struct bpf_prog *fp) + * But for non-JITed programs, we don't need bpf_func, so no bounds + * check needed. + */ +- if (!fp->jit_requested && +- !WARN_ON_ONCE(idx >= ARRAY_SIZE(interpreters))) { ++ if (idx < ARRAY_SIZE(interpreters)) { + fp->bpf_func = interpreters[idx]; ++ select_interpreter = true; + } else { + fp->bpf_func = __bpf_prog_ret0_warn; + } + #else + fp->bpf_func = __bpf_prog_ret0_warn; + #endif ++ return select_interpreter; + } + + /** +@@ -2438,7 +2439,7 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err) + /* In case of BPF to BPF calls, verifier did all the prep + * work with regards to JITing, etc. + */ +- bool jit_needed = fp->jit_requested; ++ bool jit_needed = false; + + if (fp->bpf_func) + goto finalize; +@@ -2447,7 +2448,8 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err) + bpf_prog_has_kfunc_call(fp)) + jit_needed = true; + +- bpf_prog_select_func(fp); ++ if (!bpf_prog_select_interpreter(fp)) ++ jit_needed = true; + + /* eBPF JITs can rewrite the program in case constant + * blinding is active. However, in case of error during +-- +2.51.0 + diff --git a/queue-6.12/bpf-fix-out-of-bounds-dynptr-write-in-bpf_crypto_cry.patch b/queue-6.12/bpf-fix-out-of-bounds-dynptr-write-in-bpf_crypto_cry.patch new file mode 100644 index 0000000000..6f7c09467c --- /dev/null +++ b/queue-6.12/bpf-fix-out-of-bounds-dynptr-write-in-bpf_crypto_cry.patch @@ -0,0 +1,59 @@ +From 5d7d2a7e4311417ac41e6db04f1de129527501c1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 29 Aug 2025 16:36:56 +0200 +Subject: bpf: Fix out-of-bounds dynptr write in bpf_crypto_crypt + +From: Daniel Borkmann + +[ Upstream commit f9bb6ffa7f5ad0f8ee0f53fc4a10655872ee4a14 ] + +Stanislav reported that in bpf_crypto_crypt() the destination dynptr's +size is not validated to be at least as large as the source dynptr's +size before calling into the crypto backend with 'len = src_len'. This +can result in an OOB write when the destination is smaller than the +source. + +Concretely, in mentioned function, psrc and pdst are both linear +buffers fetched from each dynptr: + + psrc = __bpf_dynptr_data(src, src_len); + [...] + pdst = __bpf_dynptr_data_rw(dst, dst_len); + [...] + err = decrypt ? + ctx->type->decrypt(ctx->tfm, psrc, pdst, src_len, piv) : + ctx->type->encrypt(ctx->tfm, psrc, pdst, src_len, piv); + +The crypto backend expects pdst to be large enough with a src_len length +that can be written. Add an additional src_len > dst_len check and bail +out if it's the case. Note that these kfuncs are accessible under root +privileges only. + +Fixes: 3e1c6f35409f ("bpf: make common crypto API for TC/XDP programs") +Reported-by: Stanislav Fort +Signed-off-by: Daniel Borkmann +Cc: Vadim Fedorenko +Reviewed-by: Vadim Fedorenko +Link: https://lore.kernel.org/r/20250829143657.318524-1-daniel@iogearbox.net +Signed-off-by: Alexei Starovoitov +Signed-off-by: Sasha Levin +--- + kernel/bpf/crypto.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/bpf/crypto.c b/kernel/bpf/crypto.c +index 94854cd9c4cc3..83c4d9943084b 100644 +--- a/kernel/bpf/crypto.c ++++ b/kernel/bpf/crypto.c +@@ -278,7 +278,7 @@ static int bpf_crypto_crypt(const struct bpf_crypto_ctx *ctx, + siv_len = siv ? __bpf_dynptr_size(siv) : 0; + src_len = __bpf_dynptr_size(src); + dst_len = __bpf_dynptr_size(dst); +- if (!src_len || !dst_len) ++ if (!src_len || !dst_len || src_len > dst_len) + return -EINVAL; + + if (siv_len != ctx->siv_len) +-- +2.51.0 + diff --git a/queue-6.12/bpf-tell-memcg-to-use-allow_spinning-false-path-in-b.patch b/queue-6.12/bpf-tell-memcg-to-use-allow_spinning-false-path-in-b.patch new file mode 100644 index 0000000000..8aa5766270 --- /dev/null +++ b/queue-6.12/bpf-tell-memcg-to-use-allow_spinning-false-path-in-b.patch @@ -0,0 +1,92 @@ +From 07c53bcb5c6fe9a9ed067caf9e7b6accc58c849e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 9 Sep 2025 09:52:20 +0000 +Subject: bpf: Tell memcg to use allow_spinning=false path in bpf_timer_init() + +From: Peilin Ye + +[ Upstream commit 6d78b4473cdb08b74662355a9e8510bde09c511e ] + +Currently, calling bpf_map_kmalloc_node() from __bpf_async_init() can +cause various locking issues; see the following stack trace (edited for +style) as one example: + +... + [10.011566] do_raw_spin_lock.cold + [10.011570] try_to_wake_up (5) double-acquiring the same + [10.011575] kick_pool rq_lock, causing a hardlockup + [10.011579] __queue_work + [10.011582] queue_work_on + [10.011585] kernfs_notify + [10.011589] cgroup_file_notify + [10.011593] try_charge_memcg (4) memcg accounting raises an + [10.011597] obj_cgroup_charge_pages MEMCG_MAX event + [10.011599] obj_cgroup_charge_account + [10.011600] __memcg_slab_post_alloc_hook + [10.011603] __kmalloc_node_noprof +... + [10.011611] bpf_map_kmalloc_node + [10.011612] __bpf_async_init + [10.011615] bpf_timer_init (3) BPF calls bpf_timer_init() + [10.011617] bpf_prog_xxxxxxxxxxxxxxxx_fcg_runnable + [10.011619] bpf__sched_ext_ops_runnable + [10.011620] enqueue_task_scx (2) BPF runs with rq_lock held + [10.011622] enqueue_task + [10.011626] ttwu_do_activate + [10.011629] sched_ttwu_pending (1) grabs rq_lock +... + +The above was reproduced on bpf-next (b338cf849ec8) by modifying +./tools/sched_ext/scx_flatcg.bpf.c to call bpf_timer_init() during +ops.runnable(), and hacking the memcg accounting code a bit to make +a bpf_timer_init() call more likely to raise an MEMCG_MAX event. + +We have also run into other similar variants (both internally and on +bpf-next), including double-acquiring cgroup_file_kn_lock, the same +worker_pool::lock, etc. + +As suggested by Shakeel, fix this by using __GFP_HIGH instead of +GFP_ATOMIC in __bpf_async_init(), so that e.g. if try_charge_memcg() +raises an MEMCG_MAX event, we call __memcg_memory_event() with +@allow_spinning=false and avoid calling cgroup_file_notify() there. + +Depends on mm patch +"memcg: skip cgroup_file_notify if spinning is not allowed": +https://lore.kernel.org/bpf/20250905201606.66198-1-shakeel.butt@linux.dev/ + +v0 approach s/bpf_map_kmalloc_node/bpf_mem_alloc/ +https://lore.kernel.org/bpf/20250905061919.439648-1-yepeilin@google.com/ +v1 approach: +https://lore.kernel.org/bpf/20250905234547.862249-1-yepeilin@google.com/ + +Fixes: b00628b1c7d5 ("bpf: Introduce bpf timers.") +Suggested-by: Shakeel Butt +Signed-off-by: Peilin Ye +Link: https://lore.kernel.org/r/20250909095222.2121438-1-yepeilin@google.com +Signed-off-by: Alexei Starovoitov +Signed-off-by: Sasha Levin +--- + kernel/bpf/helpers.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c +index be4429463599f..a0bf39b7359aa 100644 +--- a/kernel/bpf/helpers.c ++++ b/kernel/bpf/helpers.c +@@ -1276,8 +1276,11 @@ static int __bpf_async_init(struct bpf_async_kern *async, struct bpf_map *map, u + goto out; + } + +- /* allocate hrtimer via map_kmalloc to use memcg accounting */ +- cb = bpf_map_kmalloc_node(map, size, GFP_ATOMIC, map->numa_node); ++ /* Allocate via bpf_map_kmalloc_node() for memcg accounting. Until ++ * kmalloc_nolock() is available, avoid locking issues by using ++ * __GFP_HIGH (GFP_ATOMIC & ~__GFP_RECLAIM). ++ */ ++ cb = bpf_map_kmalloc_node(map, size, __GFP_HIGH, map->numa_node); + if (!cb) { + ret = -ENOMEM; + goto out; +-- +2.51.0 + diff --git a/queue-6.12/dma-debug-fix-physical-address-calculation-for-struc.patch b/queue-6.12/dma-debug-fix-physical-address-calculation-for-struc.patch new file mode 100644 index 0000000000..d9c925eaac --- /dev/null +++ b/queue-6.12/dma-debug-fix-physical-address-calculation-for-struc.patch @@ -0,0 +1,96 @@ +From 820d123f436392a581de753f3febf1f3f8a6e6e5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 27 Nov 2024 21:59:26 +0300 +Subject: dma-debug: fix physical address calculation for struct + dma_debug_entry + +From: Fedor Pchelkin + +[ Upstream commit aef7ee7649e02f7fc0d2e5e532f352496976dcb1 ] + +Offset into the page should also be considered while calculating a physical +address for struct dma_debug_entry. page_to_phys() just shifts the value +PAGE_SHIFT bits to the left so offset part is zero-filled. + +An example (wrong) debug assertion failure with CONFIG_DMA_API_DEBUG +enabled which is observed during systemd boot process after recent +dma-debug changes: + +DMA-API: e1000 0000:00:03.0: cacheline tracking EEXIST, overlapping mappings aren't supported +WARNING: CPU: 4 PID: 941 at kernel/dma/debug.c:596 add_dma_entry +CPU: 4 UID: 0 PID: 941 Comm: ip Not tainted 6.12.0+ #288 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-debian-1.16.2-1 04/01/2014 +RIP: 0010:add_dma_entry kernel/dma/debug.c:596 +Call Trace: + +debug_dma_map_page kernel/dma/debug.c:1236 +dma_map_page_attrs kernel/dma/mapping.c:179 +e1000_alloc_rx_buffers drivers/net/ethernet/intel/e1000/e1000_main.c:4616 +... + +Found by Linux Verification Center (linuxtesting.org). + +Fixes: 9d4f645a1fd4 ("dma-debug: store a phys_addr_t in struct dma_debug_entry") +Signed-off-by: Fedor Pchelkin +[hch: added a little helper to clean up the code] +Signed-off-by: Christoph Hellwig +Signed-off-by: Sasha Levin +--- + kernel/dma/debug.c | 20 +++++++++++++++----- + 1 file changed, 15 insertions(+), 5 deletions(-) + +diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c +index 0221023e1120d..39972e834e7a1 100644 +--- a/kernel/dma/debug.c ++++ b/kernel/dma/debug.c +@@ -1224,7 +1224,7 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, + + entry->dev = dev; + entry->type = dma_debug_single; +- entry->paddr = page_to_phys(page); ++ entry->paddr = page_to_phys(page) + offset; + entry->dev_addr = dma_addr; + entry->size = size; + entry->direction = direction; +@@ -1382,6 +1382,18 @@ void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist, + } + } + ++static phys_addr_t virt_to_paddr(void *virt) ++{ ++ struct page *page; ++ ++ if (is_vmalloc_addr(virt)) ++ page = vmalloc_to_page(virt); ++ else ++ page = virt_to_page(virt); ++ ++ return page_to_phys(page) + offset_in_page(virt); ++} ++ + void debug_dma_alloc_coherent(struct device *dev, size_t size, + dma_addr_t dma_addr, void *virt, + unsigned long attrs) +@@ -1404,8 +1416,7 @@ void debug_dma_alloc_coherent(struct device *dev, size_t size, + + entry->type = dma_debug_coherent; + entry->dev = dev; +- entry->paddr = page_to_phys((is_vmalloc_addr(virt) ? +- vmalloc_to_page(virt) : virt_to_page(virt))); ++ entry->paddr = virt_to_paddr(virt); + entry->size = size; + entry->dev_addr = dma_addr; + entry->direction = DMA_BIDIRECTIONAL; +@@ -1428,8 +1439,7 @@ void debug_dma_free_coherent(struct device *dev, size_t size, + if (!is_vmalloc_addr(virt) && !virt_addr_valid(virt)) + return; + +- ref.paddr = page_to_phys((is_vmalloc_addr(virt) ? +- vmalloc_to_page(virt) : virt_to_page(virt))); ++ ref.paddr = virt_to_paddr(virt); + + if (unlikely(dma_debug_disabled())) + return; +-- +2.51.0 + diff --git a/queue-6.12/dma-mapping-fix-swapped-dir-flags-arguments-to-trace.patch b/queue-6.12/dma-mapping-fix-swapped-dir-flags-arguments-to-trace.patch new file mode 100644 index 0000000000..adfc048a1e --- /dev/null +++ b/queue-6.12/dma-mapping-fix-swapped-dir-flags-arguments-to-trace.patch @@ -0,0 +1,39 @@ +From 46594f0563c027618ae23449c2d6e29d3bfb5501 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 31 Oct 2024 11:45:14 -0400 +Subject: dma-mapping: fix swapped dir/flags arguments to + trace_dma_alloc_sgt_err + +From: Sean Anderson + +[ Upstream commit d5bbfbad58ec0ccd187282f0e171bc763efa6828 ] + +trace_dma_alloc_sgt_err was called with the dir and flags arguments +swapped. Fix this. + +Fixes: 68b6dbf1f441 ("dma-mapping: trace more error paths") +Signed-off-by: Sean Anderson +Reported-by: kernel test robot +Closes: https://lore.kernel.org/oe-kbuild-all/202410302243.1wnTlPk3-lkp@intel.com/ +Signed-off-by: Christoph Hellwig +Signed-off-by: Sasha Levin +--- + kernel/dma/mapping.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c +index c12c62ad8a6bf..c7cc4e33ec6e0 100644 +--- a/kernel/dma/mapping.c ++++ b/kernel/dma/mapping.c +@@ -782,7 +782,7 @@ struct sg_table *dma_alloc_noncontiguous(struct device *dev, size_t size, + trace_dma_alloc_sgt(dev, sgt, size, dir, gfp, attrs); + debug_dma_map_sg(dev, sgt->sgl, sgt->orig_nents, 1, dir, attrs); + } else { +- trace_dma_alloc_sgt_err(dev, NULL, 0, size, gfp, dir, attrs); ++ trace_dma_alloc_sgt_err(dev, NULL, 0, size, dir, gfp, attrs); + } + return sgt; + } +-- +2.51.0 + diff --git a/queue-6.12/drm-amd-display-fix-error-pointers-in-amdgpu_dm_crtc.patch b/queue-6.12/drm-amd-display-fix-error-pointers-in-amdgpu_dm_crtc.patch new file mode 100644 index 0000000000..8e16b86a81 --- /dev/null +++ b/queue-6.12/drm-amd-display-fix-error-pointers-in-amdgpu_dm_crtc.patch @@ -0,0 +1,81 @@ +From fc1be2a20ecaccf30a8e0f79444f189da4c193c5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 15 Jan 2025 22:29:06 +0530 +Subject: drm/amd/display: Fix error pointers in + amdgpu_dm_crtc_mem_type_changed + +From: Srinivasan Shanmugam + +[ Upstream commit da29abe71e164f10917ea6da02f5d9c192ccdeb7 ] + +The function amdgpu_dm_crtc_mem_type_changed was dereferencing pointers +returned by drm_atomic_get_plane_state without checking for errors. This +could lead to undefined behavior if the function returns an error pointer. + +This commit adds checks using IS_ERR to ensure that new_plane_state and +old_plane_state are valid before dereferencing them. + +Fixes the below: + +drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c:11486 amdgpu_dm_crtc_mem_type_changed() +error: 'new_plane_state' dereferencing possible ERR_PTR() + +drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c + 11475 static bool amdgpu_dm_crtc_mem_type_changed(struct drm_device *dev, + 11476 struct drm_atomic_state *state, + 11477 struct drm_crtc_state *crtc_state) + 11478 { + 11479 struct drm_plane *plane; + 11480 struct drm_plane_state *new_plane_state, *old_plane_state; + 11481 + 11482 drm_for_each_plane_mask(plane, dev, crtc_state->plane_mask) { + 11483 new_plane_state = drm_atomic_get_plane_state(state, plane); + 11484 old_plane_state = drm_atomic_get_plane_state(state, plane); + ^^^^^^^^^^^^^^^^^^^^^^^^^^ These functions can fail. + + 11485 +--> 11486 if (old_plane_state->fb && new_plane_state->fb && + 11487 get_mem_type(old_plane_state->fb) != get_mem_type(new_plane_state->fb)) + 11488 return true; + 11489 } + 11490 + 11491 return false; + 11492 } + +Fixes: 4caacd1671b7 ("drm/amd/display: Do not elevate mem_type change to full update") +Cc: Leo Li +Cc: Tom Chung +Cc: Rodrigo Siqueira +Cc: Roman Li +Cc: Alex Hung +Cc: Aurabindo Pillai +Cc: Harry Wentland +Cc: Hamza Mahfooz +Reported-by: Dan Carpenter +Signed-off-by: Srinivasan Shanmugam +Reviewed-by: Roman Li +Signed-off-by: Alex Deucher +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +index 9763752cf5cde..b585c321d3454 100644 +--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c ++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +@@ -11483,6 +11483,11 @@ static bool amdgpu_dm_crtc_mem_type_changed(struct drm_device *dev, + new_plane_state = drm_atomic_get_plane_state(state, plane); + old_plane_state = drm_atomic_get_plane_state(state, plane); + ++ if (IS_ERR(new_plane_state) || IS_ERR(old_plane_state)) { ++ DRM_ERROR("Failed to get plane state for plane %s\n", plane->name); ++ return false; ++ } ++ + if (old_plane_state->fb && new_plane_state->fb && + get_mem_type(old_plane_state->fb) != get_mem_type(new_plane_state->fb)) + return true; +-- +2.51.0 + diff --git a/queue-6.12/drm-amdgpu-add-back-jpeg-to-video-caps-for-carrizo-a.patch b/queue-6.12/drm-amdgpu-add-back-jpeg-to-video-caps-for-carrizo-a.patch new file mode 100644 index 0000000000..a87b978960 --- /dev/null +++ b/queue-6.12/drm-amdgpu-add-back-jpeg-to-video-caps-for-carrizo-a.patch @@ -0,0 +1,43 @@ +From a5e081bf596f980174b1f396f7e9046942768f80 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 7 Apr 2025 13:12:11 +0200 +Subject: drm/amdgpu: Add back JPEG to video caps for carrizo and newer + +From: David Rosca + +[ Upstream commit 2036be31741b00f030530381643a8b35a5a42b5c ] + +JPEG is not supported on Vega only. + +Fixes: 0a6e7b06bdbe ("drm/amdgpu: Remove JPEG from vega and carrizo video caps") +Signed-off-by: David Rosca +Reviewed-by: Leo Liu +Signed-off-by: Alex Deucher +(cherry picked from commit 0f4dfe86fe922c37bcec99dce80a15b4d5d4726d) +Cc: stable@vger.kernel.org +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/amd/amdgpu/vi.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c +index 48ab93e715c82..6e4f9c6108f60 100644 +--- a/drivers/gpu/drm/amd/amdgpu/vi.c ++++ b/drivers/gpu/drm/amd/amdgpu/vi.c +@@ -239,6 +239,13 @@ static const struct amdgpu_video_codec_info cz_video_codecs_decode_array[] = + .max_pixels_per_frame = 4096 * 4096, + .max_level = 186, + }, ++ { ++ .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, ++ .max_width = 4096, ++ .max_height = 4096, ++ .max_pixels_per_frame = 4096 * 4096, ++ .max_level = 0, ++ }, + }; + + static const struct amdgpu_video_codecs cz_video_codecs_decode = +-- +2.51.0 + diff --git a/queue-6.12/drm-i915-pmu-fix-zero-delta-busyness-issue.patch b/queue-6.12/drm-i915-pmu-fix-zero-delta-busyness-issue.patch new file mode 100644 index 0000000000..48815961b1 --- /dev/null +++ b/queue-6.12/drm-i915-pmu-fix-zero-delta-busyness-issue.patch @@ -0,0 +1,92 @@ +From 66e3cc5ed55ad42acfb30464e411c1d63e776de4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 23 Jan 2025 11:38:39 -0800 +Subject: drm/i915/pmu: Fix zero delta busyness issue + +From: Umesh Nerlige Ramappa + +[ Upstream commit cb5fab2afd906307876d79537ef0329033c40dd3 ] + +When running igt@gem_exec_balancer@individual for multiple iterations, +it is seen that the delta busyness returned by PMU is 0. The issue stems +from a combination of 2 implementation specific details: + +1) gt_park is throttling __update_guc_busyness_stats() so that it does +not hog PCI bandwidth for some use cases. (Ref: 59bcdb564b3ba) + +2) busyness implementation always returns monotonically increasing +counters. (Ref: cf907f6d29421) + +If an application queried an engine while it was active, +engine->stats.guc.running is set to true. Following that, if all PM +wakeref's are released, then gt is parked. At this time the throttling +of __update_guc_busyness_stats() may result in a missed update to the +running state of the engine (due to (1) above). This means subsequent +calls to guc_engine_busyness() will think that the engine is still +running and they will keep updating the cached counter (stats->total). +This results in an inflated cached counter. + +Later when the application runs a workload and queries for busyness, we +return the cached value since it is larger than the actual value (due to +(2) above) + +All subsequent queries will return the same large (inflated) value, so +the application sees a delta busyness of zero. + +Fix the issue by resetting the running state of engines each time +intel_guc_busyness_park() is called. + +v2: (Rodrigo) +- Use the correct tag in commit message +- Drop the redundant wakeref check in guc_engine_busyness() and update + commit message + +Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/13366 +Fixes: cf907f6d2942 ("i915/guc: Ensure busyness counter increases motonically") +Signed-off-by: Umesh Nerlige Ramappa +Reviewed-by: Rodrigo Vivi +Link: https://patchwork.freedesktop.org/patch/msgid/20250123193839.2394694-1-umesh.nerlige.ramappa@intel.com +(cherry picked from commit 431b742e2bfc9f6dd713f261629741980996d001) +Signed-off-by: Rodrigo Vivi +Signed-off-by: Sasha Levin +--- + .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 16 ++++++++++++++++ + 1 file changed, 16 insertions(+) + +diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +index b48373b166779..355a21eb48443 100644 +--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c ++++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +@@ -1469,6 +1469,19 @@ static void __reset_guc_busyness_stats(struct intel_guc *guc) + spin_unlock_irqrestore(&guc->timestamp.lock, flags); + } + ++static void __update_guc_busyness_running_state(struct intel_guc *guc) ++{ ++ struct intel_gt *gt = guc_to_gt(guc); ++ struct intel_engine_cs *engine; ++ enum intel_engine_id id; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&guc->timestamp.lock, flags); ++ for_each_engine(engine, gt, id) ++ engine->stats.guc.running = false; ++ spin_unlock_irqrestore(&guc->timestamp.lock, flags); ++} ++ + static void __update_guc_busyness_stats(struct intel_guc *guc) + { + struct intel_gt *gt = guc_to_gt(guc); +@@ -1619,6 +1632,9 @@ void intel_guc_busyness_park(struct intel_gt *gt) + if (!guc_submission_initialized(guc)) + return; + ++ /* Assume no engines are running and set running state to false */ ++ __update_guc_busyness_running_state(guc); ++ + /* + * There is a race with suspend flow where the worker runs after suspend + * and causes an unclaimed register access warning. Cancel the worker +-- +2.51.0 + diff --git a/queue-6.12/ext4-introduce-linear-search-for-dentries.patch b/queue-6.12/ext4-introduce-linear-search-for-dentries.patch new file mode 100644 index 0000000000..3c128f9390 --- /dev/null +++ b/queue-6.12/ext4-introduce-linear-search-for-dentries.patch @@ -0,0 +1,103 @@ +From 0e405b90a87eb2f4cff74b26fc1202d48e9cb521 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 7 Feb 2025 23:08:02 -0500 +Subject: ext4: introduce linear search for dentries +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Theodore Ts'o + +[ Upstream commit 9e28059d56649a7212d5b3f8751ec021154ba3dd ] + +This patch addresses an issue where some files in case-insensitive +directories become inaccessible due to changes in how the kernel +function, utf8_casefold(), generates case-folded strings from the +commit 5c26d2f1d3f5 ("unicode: Don't special case ignorable code +points"). + +There are good reasons why this change should be made; it's actually +quite stupid that Unicode seems to think that the characters ❤ and ❤️ +should be casefolded. Unfortimately because of the backwards +compatibility issue, this commit was reverted in 231825b2e1ff. + +This problem is addressed by instituting a brute-force linear fallback +if a lookup fails on case-folded directory, which does result in a +performance hit when looking up files affected by the changing how +thekernel treats ignorable Uniode characters, or when attempting to +look up non-existent file names. So this fallback can be disabled by +setting an encoding flag if in the future, the system administrator or +the manufacturer of a mobile handset or tablet can be sure that there +was no opportunity for a kernel to insert file names with incompatible +encodings. + +Fixes: 5c26d2f1d3f5 ("unicode: Don't special case ignorable code points") +Signed-off-by: Theodore Ts'o +Reviewed-by: Gabriel Krisman Bertazi +Signed-off-by: Sasha Levin +--- + fs/ext4/namei.c | 14 ++++++++++---- + include/linux/fs.h | 10 +++++++++- + 2 files changed, 19 insertions(+), 5 deletions(-) + +diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c +index 286f8fcb74cc9..29c7c0b8295fb 100644 +--- a/fs/ext4/namei.c ++++ b/fs/ext4/namei.c +@@ -1462,7 +1462,8 @@ static bool ext4_match(struct inode *parent, + * sure cf_name was properly initialized before + * considering the calculated hash. + */ +- if (IS_ENCRYPTED(parent) && fname->cf_name.name && ++ if (sb_no_casefold_compat_fallback(parent->i_sb) && ++ IS_ENCRYPTED(parent) && fname->cf_name.name && + (fname->hinfo.hash != EXT4_DIRENT_HASH(de) || + fname->hinfo.minor_hash != EXT4_DIRENT_MINOR_HASH(de))) + return false; +@@ -1595,10 +1596,15 @@ static struct buffer_head *__ext4_find_entry(struct inode *dir, + * return. Otherwise, fall back to doing a search the + * old fashioned way. + */ +- if (!IS_ERR(ret) || PTR_ERR(ret) != ERR_BAD_DX_DIR) ++ if (IS_ERR(ret) && PTR_ERR(ret) == ERR_BAD_DX_DIR) ++ dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, " ++ "falling back\n")); ++ else if (!sb_no_casefold_compat_fallback(dir->i_sb) && ++ *res_dir == NULL && IS_CASEFOLDED(dir)) ++ dxtrace(printk(KERN_DEBUG "ext4_find_entry: casefold " ++ "failed, falling back\n")); ++ else + goto cleanup_and_exit; +- dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, " +- "falling back\n")); + ret = NULL; + } + nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb); +diff --git a/include/linux/fs.h b/include/linux/fs.h +index a6de8d93838d1..37a01c9d96583 100644 +--- a/include/linux/fs.h ++++ b/include/linux/fs.h +@@ -1197,11 +1197,19 @@ extern int send_sigurg(struct file *file); + #define SB_NOUSER BIT(31) + + /* These flags relate to encoding and casefolding */ +-#define SB_ENC_STRICT_MODE_FL (1 << 0) ++#define SB_ENC_STRICT_MODE_FL (1 << 0) ++#define SB_ENC_NO_COMPAT_FALLBACK_FL (1 << 1) + + #define sb_has_strict_encoding(sb) \ + (sb->s_encoding_flags & SB_ENC_STRICT_MODE_FL) + ++#if IS_ENABLED(CONFIG_UNICODE) ++#define sb_no_casefold_compat_fallback(sb) \ ++ (sb->s_encoding_flags & SB_ENC_NO_COMPAT_FALLBACK_FL) ++#else ++#define sb_no_casefold_compat_fallback(sb) (1) ++#endif ++ + /* + * Umount options + */ +-- +2.51.0 + diff --git a/queue-6.12/flexfiles-pnfs-fix-null-checks-on-result-of-ff_layou.patch b/queue-6.12/flexfiles-pnfs-fix-null-checks-on-result-of-ff_layou.patch new file mode 100644 index 0000000000..637c188312 --- /dev/null +++ b/queue-6.12/flexfiles-pnfs-fix-null-checks-on-result-of-ff_layou.patch @@ -0,0 +1,90 @@ +From 2dabf0649858cbf20f82e9eea281191d7549f231 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 28 Aug 2025 16:51:00 +0200 +Subject: flexfiles/pNFS: fix NULL checks on result of + ff_layout_choose_ds_for_read + +From: Tigran Mkrtchyan + +[ Upstream commit 5a46d2339a5ae268ede53a221f20433d8ea4f2f9 ] + +Recent commit f06bedfa62d5 ("pNFS/flexfiles: don't attempt pnfs on fatal DS +errors") has changed the error return type of ff_layout_choose_ds_for_read() from +NULL to an error pointer. However, not all code paths have been updated +to match the change. Thus, some non-NULL checks will accept error pointers +as a valid return value. + +Reported-by: Dan Carpenter +Suggested-by: Dan Carpenter +Fixes: f06bedfa62d5 ("pNFS/flexfiles: don't attempt pnfs on fatal DS errors") +Signed-off-by: Tigran Mkrtchyan +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + fs/nfs/flexfilelayout/flexfilelayout.c | 19 ++++++++++++------- + 1 file changed, 12 insertions(+), 7 deletions(-) + +diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c +index b685e763ef11b..69496aab9583e 100644 +--- a/fs/nfs/flexfilelayout/flexfilelayout.c ++++ b/fs/nfs/flexfilelayout/flexfilelayout.c +@@ -772,8 +772,11 @@ ff_layout_choose_ds_for_read(struct pnfs_layout_segment *lseg, + continue; + + if (check_device && +- nfs4_test_deviceid_unavailable(&mirror->mirror_ds->id_node)) ++ nfs4_test_deviceid_unavailable(&mirror->mirror_ds->id_node)) { ++ // reinitialize the error state in case if this is the last iteration ++ ds = ERR_PTR(-EINVAL); + continue; ++ } + + *best_idx = idx; + break; +@@ -803,7 +806,7 @@ ff_layout_choose_best_ds_for_read(struct pnfs_layout_segment *lseg, + struct nfs4_pnfs_ds *ds; + + ds = ff_layout_choose_valid_ds_for_read(lseg, start_idx, best_idx); +- if (ds) ++ if (!IS_ERR(ds)) + return ds; + return ff_layout_choose_any_ds_for_read(lseg, start_idx, best_idx); + } +@@ -817,7 +820,7 @@ ff_layout_get_ds_for_read(struct nfs_pageio_descriptor *pgio, + + ds = ff_layout_choose_best_ds_for_read(lseg, pgio->pg_mirror_idx, + best_idx); +- if (ds || !pgio->pg_mirror_idx) ++ if (!IS_ERR(ds) || !pgio->pg_mirror_idx) + return ds; + return ff_layout_choose_best_ds_for_read(lseg, 0, best_idx); + } +@@ -867,7 +870,7 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio, + req->wb_nio = 0; + + ds = ff_layout_get_ds_for_read(pgio, &ds_idx); +- if (!ds) { ++ if (IS_ERR(ds)) { + if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg)) + goto out_mds; + pnfs_generic_pg_cleanup(pgio); +@@ -1071,11 +1074,13 @@ static void ff_layout_resend_pnfs_read(struct nfs_pgio_header *hdr) + { + u32 idx = hdr->pgio_mirror_idx + 1; + u32 new_idx = 0; ++ struct nfs4_pnfs_ds *ds; + +- if (ff_layout_choose_any_ds_for_read(hdr->lseg, idx, &new_idx)) +- ff_layout_send_layouterror(hdr->lseg); +- else ++ ds = ff_layout_choose_any_ds_for_read(hdr->lseg, idx, &new_idx); ++ if (IS_ERR(ds)) + pnfs_error_mark_layout_for_return(hdr->inode, hdr->lseg); ++ else ++ ff_layout_send_layouterror(hdr->lseg); + pnfs_read_resend_pnfs(hdr, new_idx); + } + +-- +2.51.0 + diff --git a/queue-6.12/fs-nfs-io-make-nfs_start_io_-killable.patch b/queue-6.12/fs-nfs-io-make-nfs_start_io_-killable.patch new file mode 100644 index 0000000000..0943263c3a --- /dev/null +++ b/queue-6.12/fs-nfs-io-make-nfs_start_io_-killable.patch @@ -0,0 +1,222 @@ +From 2f8248299298bd9dfd659e68ca6660b04ad30d0d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 21 Nov 2024 14:53:51 +0100 +Subject: fs/nfs/io: make nfs_start_io_*() killable + +From: Max Kellermann + +[ Upstream commit 38a125b31504f91bf6fdd3cfc3a3e9a721e6c97a ] + +This allows killing processes that wait for a lock when one process is +stuck waiting for the NFS server. This aims to complete the coverage +of NFS operations being killable, like nfs_direct_wait() does, for +example. + +Signed-off-by: Max Kellermann +Signed-off-by: Trond Myklebust +Stable-dep-of: 9eb90f435415 ("NFS: Serialise O_DIRECT i/o and truncate()") +Signed-off-by: Sasha Levin +--- + fs/nfs/direct.c | 21 ++++++++++++++++++--- + fs/nfs/file.c | 14 +++++++++++--- + fs/nfs/internal.h | 7 ++++--- + fs/nfs/io.c | 44 +++++++++++++++++++++++++++++++++----------- + 4 files changed, 66 insertions(+), 20 deletions(-) + +diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c +index f159cfc125adc..f32f8d7c9122b 100644 +--- a/fs/nfs/direct.c ++++ b/fs/nfs/direct.c +@@ -472,8 +472,16 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter, + if (user_backed_iter(iter)) + dreq->flags = NFS_ODIRECT_SHOULD_DIRTY; + +- if (!swap) +- nfs_start_io_direct(inode); ++ if (!swap) { ++ result = nfs_start_io_direct(inode); ++ if (result) { ++ /* release the reference that would usually be ++ * consumed by nfs_direct_read_schedule_iovec() ++ */ ++ nfs_direct_req_release(dreq); ++ goto out_release; ++ } ++ } + + NFS_I(inode)->read_io += count; + requested = nfs_direct_read_schedule_iovec(dreq, iter, iocb->ki_pos); +@@ -1031,7 +1039,14 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter, + requested = nfs_direct_write_schedule_iovec(dreq, iter, pos, + FLUSH_STABLE); + } else { +- nfs_start_io_direct(inode); ++ result = nfs_start_io_direct(inode); ++ if (result) { ++ /* release the reference that would usually be ++ * consumed by nfs_direct_write_schedule_iovec() ++ */ ++ nfs_direct_req_release(dreq); ++ goto out_release; ++ } + + requested = nfs_direct_write_schedule_iovec(dreq, iter, pos, + FLUSH_COND_STABLE); +diff --git a/fs/nfs/file.c b/fs/nfs/file.c +index 153d25d4b810c..033feeab8c346 100644 +--- a/fs/nfs/file.c ++++ b/fs/nfs/file.c +@@ -167,7 +167,10 @@ nfs_file_read(struct kiocb *iocb, struct iov_iter *to) + iocb->ki_filp, + iov_iter_count(to), (unsigned long) iocb->ki_pos); + +- nfs_start_io_read(inode); ++ result = nfs_start_io_read(inode); ++ if (result) ++ return result; ++ + result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping); + if (!result) { + result = generic_file_read_iter(iocb, to); +@@ -188,7 +191,10 @@ nfs_file_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe + + dprintk("NFS: splice_read(%pD2, %zu@%llu)\n", in, len, *ppos); + +- nfs_start_io_read(inode); ++ result = nfs_start_io_read(inode); ++ if (result) ++ return result; ++ + result = nfs_revalidate_mapping(inode, in->f_mapping); + if (!result) { + result = filemap_splice_read(in, ppos, pipe, len, flags); +@@ -669,7 +675,9 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from) + nfs_clear_invalid_mapping(file->f_mapping); + + since = filemap_sample_wb_err(file->f_mapping); +- nfs_start_io_write(inode); ++ error = nfs_start_io_write(inode); ++ if (error) ++ return error; + result = generic_write_checks(iocb, from); + if (result > 0) + result = generic_perform_write(iocb, from); +diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h +index 882d804089add..4860270f1be04 100644 +--- a/fs/nfs/internal.h ++++ b/fs/nfs/internal.h +@@ -6,6 +6,7 @@ + #include "nfs4_fs.h" + #include + #include ++#include + #include + #include + #include +@@ -516,11 +517,11 @@ extern const struct netfs_request_ops nfs_netfs_ops; + #endif + + /* io.c */ +-extern void nfs_start_io_read(struct inode *inode); ++extern __must_check int nfs_start_io_read(struct inode *inode); + extern void nfs_end_io_read(struct inode *inode); +-extern void nfs_start_io_write(struct inode *inode); ++extern __must_check int nfs_start_io_write(struct inode *inode); + extern void nfs_end_io_write(struct inode *inode); +-extern void nfs_start_io_direct(struct inode *inode); ++extern __must_check int nfs_start_io_direct(struct inode *inode); + extern void nfs_end_io_direct(struct inode *inode); + + static inline bool nfs_file_io_is_buffered(struct nfs_inode *nfsi) +diff --git a/fs/nfs/io.c b/fs/nfs/io.c +index b5551ed8f648b..3388faf2acb9f 100644 +--- a/fs/nfs/io.c ++++ b/fs/nfs/io.c +@@ -39,19 +39,28 @@ static void nfs_block_o_direct(struct nfs_inode *nfsi, struct inode *inode) + * Note that buffered writes and truncates both take a write lock on + * inode->i_rwsem, meaning that those are serialised w.r.t. the reads. + */ +-void ++int + nfs_start_io_read(struct inode *inode) + { + struct nfs_inode *nfsi = NFS_I(inode); ++ int err; ++ + /* Be an optimist! */ +- down_read(&inode->i_rwsem); ++ err = down_read_killable(&inode->i_rwsem); ++ if (err) ++ return err; + if (test_bit(NFS_INO_ODIRECT, &nfsi->flags) == 0) +- return; ++ return 0; + up_read(&inode->i_rwsem); ++ + /* Slow path.... */ +- down_write(&inode->i_rwsem); ++ err = down_write_killable(&inode->i_rwsem); ++ if (err) ++ return err; + nfs_block_o_direct(nfsi, inode); + downgrade_write(&inode->i_rwsem); ++ ++ return 0; + } + + /** +@@ -74,11 +83,15 @@ nfs_end_io_read(struct inode *inode) + * Declare that a buffered read operation is about to start, and ensure + * that we block all direct I/O. + */ +-void ++int + nfs_start_io_write(struct inode *inode) + { +- down_write(&inode->i_rwsem); +- nfs_block_o_direct(NFS_I(inode), inode); ++ int err; ++ ++ err = down_write_killable(&inode->i_rwsem); ++ if (!err) ++ nfs_block_o_direct(NFS_I(inode), inode); ++ return err; + } + + /** +@@ -119,19 +132,28 @@ static void nfs_block_buffered(struct nfs_inode *nfsi, struct inode *inode) + * Note that buffered writes and truncates both take a write lock on + * inode->i_rwsem, meaning that those are serialised w.r.t. O_DIRECT. + */ +-void ++int + nfs_start_io_direct(struct inode *inode) + { + struct nfs_inode *nfsi = NFS_I(inode); ++ int err; ++ + /* Be an optimist! */ +- down_read(&inode->i_rwsem); ++ err = down_read_killable(&inode->i_rwsem); ++ if (err) ++ return err; + if (test_bit(NFS_INO_ODIRECT, &nfsi->flags) != 0) +- return; ++ return 0; + up_read(&inode->i_rwsem); ++ + /* Slow path.... */ +- down_write(&inode->i_rwsem); ++ err = down_write_killable(&inode->i_rwsem); ++ if (err) ++ return err; + nfs_block_buffered(nfsi, inode); + downgrade_write(&inode->i_rwsem); ++ ++ return 0; + } + + /** +-- +2.51.0 + diff --git a/queue-6.12/ftrace-samples-fix-function-size-computation.patch b/queue-6.12/ftrace-samples-fix-function-size-computation.patch new file mode 100644 index 0000000000..e6577f89c1 --- /dev/null +++ b/queue-6.12/ftrace-samples-fix-function-size-computation.patch @@ -0,0 +1,38 @@ +From 0055a66ffbcb0421ed152de0d4b3b4732d814d64 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 26 Aug 2025 18:16:46 +0200 +Subject: ftrace/samples: Fix function size computation + +From: Vladimir Riabchun + +[ Upstream commit 80d03a40837a9b26750a25122b906c052cc846c9 ] + +In my_tramp1 function .size directive was placed above +ASM_RET instruction, leading to a wrong function size. + +Link: https://lore.kernel.org/aK3d7vxNcO52kEmg@vova-pc +Fixes: 9d907f1ae80b ("samples/ftrace: Fix asm function ELF annotations") +Signed-off-by: Vladimir Riabchun +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Sasha Levin +--- + samples/ftrace/ftrace-direct-modify.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/samples/ftrace/ftrace-direct-modify.c b/samples/ftrace/ftrace-direct-modify.c +index 81220390851a3..328c6e60f024b 100644 +--- a/samples/ftrace/ftrace-direct-modify.c ++++ b/samples/ftrace/ftrace-direct-modify.c +@@ -75,8 +75,8 @@ asm ( + CALL_DEPTH_ACCOUNT + " call my_direct_func1\n" + " leave\n" +-" .size my_tramp1, .-my_tramp1\n" + ASM_RET ++" .size my_tramp1, .-my_tramp1\n" + + " .type my_tramp2, @function\n" + " .globl my_tramp2\n" +-- +2.51.0 + diff --git a/queue-6.12/nfs-localio-add-direct-io-enablement-with-sync-and-a.patch b/queue-6.12/nfs-localio-add-direct-io-enablement-with-sync-and-a.patch new file mode 100644 index 0000000000..7566bbb7a8 --- /dev/null +++ b/queue-6.12/nfs-localio-add-direct-io-enablement-with-sync-and-a.patch @@ -0,0 +1,261 @@ +From 73c308932a2443e3d14355c86fe1a6b068cfd878 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 15 Nov 2024 20:40:53 -0500 +Subject: nfs/localio: add direct IO enablement with sync and async IO support + +From: Mike Snitzer + +[ Upstream commit 3feec68563dda59517f83d19123aa287a1dfd068 ] + +This commit simply adds the required O_DIRECT plumbing. It doesn't +address the fact that NFS doesn't ensure all writes are page aligned +(nor device logical block size aligned as required by O_DIRECT). + +Because NFS will read-modify-write for IO that isn't aligned, LOCALIO +will not use O_DIRECT semantics by default if/when an application +requests the use of O_DIRECT. Allow the use of O_DIRECT semantics by: +1: Adding a flag to the nfs_pgio_header struct to allow the NFS + O_DIRECT layer to signal that O_DIRECT was used by the application +2: Adding a 'localio_O_DIRECT_semantics' NFS module parameter that + when enabled will cause LOCALIO to use O_DIRECT semantics (this may + cause IO to fail if applications do not properly align their IO). + +This commit is derived from code developed by Weston Andros Adamson. + +Signed-off-by: Mike Snitzer +Reviewed-by: Jeff Layton +Signed-off-by: Anna Schumaker +Stable-dep-of: 992203a1fba5 ("nfs/localio: restore creds before releasing pageio data") +Signed-off-by: Sasha Levin +--- + Documentation/filesystems/nfs/localio.rst | 13 ++++ + fs/nfs/direct.c | 1 + + fs/nfs/localio.c | 93 ++++++++++++++++++++--- + include/linux/nfs_xdr.h | 1 + + 4 files changed, 98 insertions(+), 10 deletions(-) + +diff --git a/Documentation/filesystems/nfs/localio.rst b/Documentation/filesystems/nfs/localio.rst +index bd1967e2eab32..20fc901a08f4d 100644 +--- a/Documentation/filesystems/nfs/localio.rst ++++ b/Documentation/filesystems/nfs/localio.rst +@@ -306,6 +306,19 @@ is issuing IO to the underlying local filesystem that it is sharing with + the NFS server. See: fs/nfs/localio.c:nfs_local_doio() and + fs/nfs/localio.c:nfs_local_commit(). + ++With normal NFS that makes use of RPC to issue IO to the server, if an ++application uses O_DIRECT the NFS client will bypass the pagecache but ++the NFS server will not. Because the NFS server's use of buffered IO ++affords applications to be less precise with their alignment when ++issuing IO to the NFS client. LOCALIO can be configured to use O_DIRECT ++semantics by setting the 'localio_O_DIRECT_semantics' nfs module ++parameter to Y, e.g.: ++ ++ echo Y > /sys/module/nfs/parameters/localio_O_DIRECT_semantics ++ ++Once enabled, it will cause LOCALIO to use O_DIRECT semantics (this may ++cause IO to fail if applications do not properly align their IO). ++ + Security + ======== + +diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c +index c1f1b826888c9..f159cfc125adc 100644 +--- a/fs/nfs/direct.c ++++ b/fs/nfs/direct.c +@@ -320,6 +320,7 @@ static void nfs_read_sync_pgio_error(struct list_head *head, int error) + static void nfs_direct_pgio_init(struct nfs_pgio_header *hdr) + { + get_dreq(hdr->dreq); ++ set_bit(NFS_IOHDR_ODIRECT, &hdr->flags); + } + + static const struct nfs_pgio_completion_ops nfs_direct_read_completion_ops = { +diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c +index ab305dfc71269..8fb145124e93b 100644 +--- a/fs/nfs/localio.c ++++ b/fs/nfs/localio.c +@@ -35,6 +35,7 @@ struct nfs_local_kiocb { + struct bio_vec *bvec; + struct nfs_pgio_header *hdr; + struct work_struct work; ++ void (*aio_complete_work)(struct work_struct *); + struct nfsd_file *localio; + }; + +@@ -50,6 +51,11 @@ static void nfs_local_fsync_work(struct work_struct *work); + static bool localio_enabled __read_mostly = true; + module_param(localio_enabled, bool, 0644); + ++static bool localio_O_DIRECT_semantics __read_mostly = false; ++module_param(localio_O_DIRECT_semantics, bool, 0644); ++MODULE_PARM_DESC(localio_O_DIRECT_semantics, ++ "LOCALIO will use O_DIRECT semantics to filesystem."); ++ + static inline bool nfs_client_is_local(const struct nfs_client *clp) + { + return !!test_bit(NFS_CS_LOCAL_IO, &clp->cl_flags); +@@ -287,10 +293,19 @@ nfs_local_iocb_alloc(struct nfs_pgio_header *hdr, + kfree(iocb); + return NULL; + } +- init_sync_kiocb(&iocb->kiocb, file); ++ ++ if (localio_O_DIRECT_semantics && ++ test_bit(NFS_IOHDR_ODIRECT, &hdr->flags)) { ++ iocb->kiocb.ki_filp = file; ++ iocb->kiocb.ki_flags = IOCB_DIRECT; ++ } else ++ init_sync_kiocb(&iocb->kiocb, file); ++ + iocb->kiocb.ki_pos = hdr->args.offset; + iocb->hdr = hdr; + iocb->kiocb.ki_flags &= ~IOCB_APPEND; ++ iocb->aio_complete_work = NULL; ++ + return iocb; + } + +@@ -345,6 +360,18 @@ nfs_local_pgio_release(struct nfs_local_kiocb *iocb) + nfs_local_hdr_release(hdr, hdr->task.tk_ops); + } + ++/* ++ * Complete the I/O from iocb->kiocb.ki_complete() ++ * ++ * Note that this function can be called from a bottom half context, ++ * hence we need to queue the rpc_call_done() etc to a workqueue ++ */ ++static inline void nfs_local_pgio_aio_complete(struct nfs_local_kiocb *iocb) ++{ ++ INIT_WORK(&iocb->work, iocb->aio_complete_work); ++ queue_work(nfsiod_workqueue, &iocb->work); ++} ++ + static void + nfs_local_read_done(struct nfs_local_kiocb *iocb, long status) + { +@@ -367,6 +394,23 @@ nfs_local_read_done(struct nfs_local_kiocb *iocb, long status) + status > 0 ? status : 0, hdr->res.eof); + } + ++static void nfs_local_read_aio_complete_work(struct work_struct *work) ++{ ++ struct nfs_local_kiocb *iocb = ++ container_of(work, struct nfs_local_kiocb, work); ++ ++ nfs_local_pgio_release(iocb); ++} ++ ++static void nfs_local_read_aio_complete(struct kiocb *kiocb, long ret) ++{ ++ struct nfs_local_kiocb *iocb = ++ container_of(kiocb, struct nfs_local_kiocb, kiocb); ++ ++ nfs_local_read_done(iocb, ret); ++ nfs_local_pgio_aio_complete(iocb); /* Calls nfs_local_read_aio_complete_work */ ++} ++ + static void nfs_local_call_read(struct work_struct *work) + { + struct nfs_local_kiocb *iocb = +@@ -381,10 +425,10 @@ static void nfs_local_call_read(struct work_struct *work) + nfs_local_iter_init(&iter, iocb, READ); + + status = filp->f_op->read_iter(&iocb->kiocb, &iter); +- WARN_ON_ONCE(status == -EIOCBQUEUED); +- +- nfs_local_read_done(iocb, status); +- nfs_local_pgio_release(iocb); ++ if (status != -EIOCBQUEUED) { ++ nfs_local_read_done(iocb, status); ++ nfs_local_pgio_release(iocb); ++ } + + revert_creds(save_cred); + } +@@ -412,6 +456,11 @@ nfs_do_local_read(struct nfs_pgio_header *hdr, + nfs_local_pgio_init(hdr, call_ops); + hdr->res.eof = false; + ++ if (iocb->kiocb.ki_flags & IOCB_DIRECT) { ++ iocb->kiocb.ki_complete = nfs_local_read_aio_complete; ++ iocb->aio_complete_work = nfs_local_read_aio_complete_work; ++ } ++ + INIT_WORK(&iocb->work, nfs_local_call_read); + queue_work(nfslocaliod_workqueue, &iocb->work); + +@@ -541,6 +590,24 @@ nfs_local_write_done(struct nfs_local_kiocb *iocb, long status) + nfs_local_pgio_done(hdr, status); + } + ++static void nfs_local_write_aio_complete_work(struct work_struct *work) ++{ ++ struct nfs_local_kiocb *iocb = ++ container_of(work, struct nfs_local_kiocb, work); ++ ++ nfs_local_vfs_getattr(iocb); ++ nfs_local_pgio_release(iocb); ++} ++ ++static void nfs_local_write_aio_complete(struct kiocb *kiocb, long ret) ++{ ++ struct nfs_local_kiocb *iocb = ++ container_of(kiocb, struct nfs_local_kiocb, kiocb); ++ ++ nfs_local_write_done(iocb, ret); ++ nfs_local_pgio_aio_complete(iocb); /* Calls nfs_local_write_aio_complete_work */ ++} ++ + static void nfs_local_call_write(struct work_struct *work) + { + struct nfs_local_kiocb *iocb = +@@ -559,11 +626,11 @@ static void nfs_local_call_write(struct work_struct *work) + file_start_write(filp); + status = filp->f_op->write_iter(&iocb->kiocb, &iter); + file_end_write(filp); +- WARN_ON_ONCE(status == -EIOCBQUEUED); +- +- nfs_local_write_done(iocb, status); +- nfs_local_vfs_getattr(iocb); +- nfs_local_pgio_release(iocb); ++ if (status != -EIOCBQUEUED) { ++ nfs_local_write_done(iocb, status); ++ nfs_local_vfs_getattr(iocb); ++ nfs_local_pgio_release(iocb); ++ } + + revert_creds(save_cred); + current->flags = old_flags; +@@ -599,10 +666,16 @@ nfs_do_local_write(struct nfs_pgio_header *hdr, + case NFS_FILE_SYNC: + iocb->kiocb.ki_flags |= IOCB_DSYNC|IOCB_SYNC; + } ++ + nfs_local_pgio_init(hdr, call_ops); + + nfs_set_local_verifier(hdr->inode, hdr->res.verf, hdr->args.stable); + ++ if (iocb->kiocb.ki_flags & IOCB_DIRECT) { ++ iocb->kiocb.ki_complete = nfs_local_write_aio_complete; ++ iocb->aio_complete_work = nfs_local_write_aio_complete_work; ++ } ++ + INIT_WORK(&iocb->work, nfs_local_call_write); + queue_work(nfslocaliod_workqueue, &iocb->work); + +diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h +index 12d8e47bc5a38..b48d94f099657 100644 +--- a/include/linux/nfs_xdr.h ++++ b/include/linux/nfs_xdr.h +@@ -1637,6 +1637,7 @@ enum { + NFS_IOHDR_RESEND_PNFS, + NFS_IOHDR_RESEND_MDS, + NFS_IOHDR_UNSTABLE_WRITES, ++ NFS_IOHDR_ODIRECT, + }; + + struct nfs_io_completion; +-- +2.51.0 + diff --git a/queue-6.12/nfs-localio-remove-extra-indirect-nfs_to-call-to-che.patch b/queue-6.12/nfs-localio-remove-extra-indirect-nfs_to-call-to-che.patch new file mode 100644 index 0000000000..e65788345b --- /dev/null +++ b/queue-6.12/nfs-localio-remove-extra-indirect-nfs_to-call-to-che.patch @@ -0,0 +1,122 @@ +From 75c917c089b9910e5458cfb042a595f37f6fac70 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 3 Oct 2024 15:35:03 -0400 +Subject: nfs/localio: remove extra indirect nfs_to call to check + {read,write}_iter + +From: Mike Snitzer + +[ Upstream commit 0978e5b85fc0867f53c5f4e5b7d2a5536a623e16 ] + +Push the read_iter and write_iter availability checks down to +nfs_do_local_read and nfs_do_local_write respectively. + +This eliminates a redundant nfs_to->nfsd_file_file() call. + +Signed-off-by: Mike Snitzer +Signed-off-by: Trond Myklebust +Stable-dep-of: 992203a1fba5 ("nfs/localio: restore creds before releasing pageio data") +Signed-off-by: Sasha Levin +--- + fs/nfs/localio.c | 32 +++++++++++++++++++------------- + 1 file changed, 19 insertions(+), 13 deletions(-) + +diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c +index 21b2b38fae9f3..ab305dfc71269 100644 +--- a/fs/nfs/localio.c ++++ b/fs/nfs/localio.c +@@ -274,7 +274,7 @@ nfs_local_iocb_free(struct nfs_local_kiocb *iocb) + + static struct nfs_local_kiocb * + nfs_local_iocb_alloc(struct nfs_pgio_header *hdr, +- struct nfsd_file *localio, gfp_t flags) ++ struct file *file, gfp_t flags) + { + struct nfs_local_kiocb *iocb; + +@@ -287,9 +287,8 @@ nfs_local_iocb_alloc(struct nfs_pgio_header *hdr, + kfree(iocb); + return NULL; + } +- init_sync_kiocb(&iocb->kiocb, nfs_to->nfsd_file_file(localio)); ++ init_sync_kiocb(&iocb->kiocb, file); + iocb->kiocb.ki_pos = hdr->args.offset; +- iocb->localio = localio; + iocb->hdr = hdr; + iocb->kiocb.ki_flags &= ~IOCB_APPEND; + return iocb; +@@ -396,13 +395,19 @@ nfs_do_local_read(struct nfs_pgio_header *hdr, + const struct rpc_call_ops *call_ops) + { + struct nfs_local_kiocb *iocb; ++ struct file *file = nfs_to->nfsd_file_file(localio); ++ ++ /* Don't support filesystems without read_iter */ ++ if (!file->f_op->read_iter) ++ return -EAGAIN; + + dprintk("%s: vfs_read count=%u pos=%llu\n", + __func__, hdr->args.count, hdr->args.offset); + +- iocb = nfs_local_iocb_alloc(hdr, localio, GFP_KERNEL); ++ iocb = nfs_local_iocb_alloc(hdr, file, GFP_KERNEL); + if (iocb == NULL) + return -ENOMEM; ++ iocb->localio = localio; + + nfs_local_pgio_init(hdr, call_ops); + hdr->res.eof = false; +@@ -570,14 +575,20 @@ nfs_do_local_write(struct nfs_pgio_header *hdr, + const struct rpc_call_ops *call_ops) + { + struct nfs_local_kiocb *iocb; ++ struct file *file = nfs_to->nfsd_file_file(localio); ++ ++ /* Don't support filesystems without write_iter */ ++ if (!file->f_op->write_iter) ++ return -EAGAIN; + + dprintk("%s: vfs_write count=%u pos=%llu %s\n", + __func__, hdr->args.count, hdr->args.offset, + (hdr->args.stable == NFS_UNSTABLE) ? "unstable" : "stable"); + +- iocb = nfs_local_iocb_alloc(hdr, localio, GFP_NOIO); ++ iocb = nfs_local_iocb_alloc(hdr, file, GFP_NOIO); + if (iocb == NULL) + return -ENOMEM; ++ iocb->localio = localio; + + switch (hdr->args.stable) { + default: +@@ -603,16 +614,9 @@ int nfs_local_doio(struct nfs_client *clp, struct nfsd_file *localio, + const struct rpc_call_ops *call_ops) + { + int status = 0; +- struct file *filp = nfs_to->nfsd_file_file(localio); + + if (!hdr->args.count) + return 0; +- /* Don't support filesystems without read_iter/write_iter */ +- if (!filp->f_op->read_iter || !filp->f_op->write_iter) { +- nfs_local_disable(clp); +- status = -EAGAIN; +- goto out; +- } + + switch (hdr->rw_mode) { + case FMODE_READ: +@@ -626,8 +630,10 @@ int nfs_local_doio(struct nfs_client *clp, struct nfsd_file *localio, + hdr->rw_mode); + status = -EINVAL; + } +-out: ++ + if (status != 0) { ++ if (status == -EAGAIN) ++ nfs_local_disable(clp); + nfs_to_nfsd_file_put_local(localio); + hdr->task.tk_status = status; + nfs_local_hdr_release(hdr, call_ops); +-- +2.51.0 + diff --git a/queue-6.12/nfs-localio-restore-creds-before-releasing-pageio-da.patch b/queue-6.12/nfs-localio-restore-creds-before-releasing-pageio-da.patch new file mode 100644 index 0000000000..a245f26a73 --- /dev/null +++ b/queue-6.12/nfs-localio-restore-creds-before-releasing-pageio-da.patch @@ -0,0 +1,65 @@ +From 7c488db4e98238cab6e209b3f97359154dbfa621 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 7 Aug 2025 12:49:38 -0400 +Subject: nfs/localio: restore creds before releasing pageio data + +From: Scott Mayhew + +[ Upstream commit 992203a1fba51b025c60ec0c8b0d9223343dea95 ] + +Otherwise if the nfsd filecache code releases the nfsd_file +immediately, it can trigger the BUG_ON(cred == current->cred) in +__put_cred() when it puts the nfsd_file->nf_file->f-cred. + +Fixes: b9f5dd57f4a5 ("nfs/localio: use dedicated workqueues for filesystem read and write") +Signed-off-by: Scott Mayhew +Reviewed-by: Mike Snitzer +Link: https://lore.kernel.org/r/20250807164938.2395136-1-smayhew@redhat.com +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + fs/nfs/localio.c | 12 +++++++----- + 1 file changed, 7 insertions(+), 5 deletions(-) + +diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c +index 8fb145124e93b..82a053304ad59 100644 +--- a/fs/nfs/localio.c ++++ b/fs/nfs/localio.c +@@ -425,12 +425,13 @@ static void nfs_local_call_read(struct work_struct *work) + nfs_local_iter_init(&iter, iocb, READ); + + status = filp->f_op->read_iter(&iocb->kiocb, &iter); ++ ++ revert_creds(save_cred); ++ + if (status != -EIOCBQUEUED) { + nfs_local_read_done(iocb, status); + nfs_local_pgio_release(iocb); + } +- +- revert_creds(save_cred); + } + + static int +@@ -626,14 +627,15 @@ static void nfs_local_call_write(struct work_struct *work) + file_start_write(filp); + status = filp->f_op->write_iter(&iocb->kiocb, &iter); + file_end_write(filp); ++ ++ revert_creds(save_cred); ++ current->flags = old_flags; ++ + if (status != -EIOCBQUEUED) { + nfs_local_write_done(iocb, status); + nfs_local_vfs_getattr(iocb); + nfs_local_pgio_release(iocb); + } +- +- revert_creds(save_cred); +- current->flags = old_flags; + } + + static int +-- +2.51.0 + diff --git a/queue-6.12/nfs-nfs_invalidate_folio-must-observe-the-offset-and.patch b/queue-6.12/nfs-nfs_invalidate_folio-must-observe-the-offset-and.patch new file mode 100644 index 0000000000..c77d301c51 --- /dev/null +++ b/queue-6.12/nfs-nfs_invalidate_folio-must-observe-the-offset-and.patch @@ -0,0 +1,55 @@ +From 75d425850abd8075bde8b2f424724320c21529fe Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 3 Sep 2025 11:48:57 -0400 +Subject: NFS: nfs_invalidate_folio() must observe the offset and size + arguments + +From: Trond Myklebust + +[ Upstream commit b7b8574225e9d2b5f1fb5483886ab797892f43b5 ] + +If we're truncating part of the folio, then we need to write out the +data on the part that is not covered by the cancellation. + +Fixes: d47992f86b30 ("mm: change invalidatepage prototype to accept length") +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + fs/nfs/file.c | 7 ++++--- + fs/nfs/write.c | 1 + + 2 files changed, 5 insertions(+), 3 deletions(-) + +diff --git a/fs/nfs/file.c b/fs/nfs/file.c +index 033feeab8c346..a16a619fb8c33 100644 +--- a/fs/nfs/file.c ++++ b/fs/nfs/file.c +@@ -437,10 +437,11 @@ static void nfs_invalidate_folio(struct folio *folio, size_t offset, + dfprintk(PAGECACHE, "NFS: invalidate_folio(%lu, %zu, %zu)\n", + folio->index, offset, length); + +- if (offset != 0 || length < folio_size(folio)) +- return; + /* Cancel any unstarted writes on this page */ +- nfs_wb_folio_cancel(inode, folio); ++ if (offset != 0 || length < folio_size(folio)) ++ nfs_wb_folio(inode, folio); ++ else ++ nfs_wb_folio_cancel(inode, folio); + folio_wait_private_2(folio); /* [DEPRECATED] */ + trace_nfs_invalidate_folio(inode, folio_pos(folio) + offset, length); + } +diff --git a/fs/nfs/write.c b/fs/nfs/write.c +index 2b6b3542405c3..fd86546fafd3f 100644 +--- a/fs/nfs/write.c ++++ b/fs/nfs/write.c +@@ -2058,6 +2058,7 @@ int nfs_wb_folio_cancel(struct inode *inode, struct folio *folio) + * release it */ + nfs_inode_remove_request(req); + nfs_unlock_and_release_request(req); ++ folio_cancel_dirty(folio); + } + + return ret; +-- +2.51.0 + diff --git a/queue-6.12/nfs-serialise-o_direct-i-o-and-truncate.patch b/queue-6.12/nfs-serialise-o_direct-i-o-and-truncate.patch new file mode 100644 index 0000000000..01ce8d812d --- /dev/null +++ b/queue-6.12/nfs-serialise-o_direct-i-o-and-truncate.patch @@ -0,0 +1,100 @@ +From d3da963a88d8b736666ecf37b184b58417c82612 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 5 Sep 2025 12:06:23 -0400 +Subject: NFS: Serialise O_DIRECT i/o and truncate() + +From: Trond Myklebust + +[ Upstream commit 9eb90f435415c7da4800974ed943e39b5578ee7f ] + +Ensure that all O_DIRECT reads and writes are complete, and prevent the +initiation of new i/o until the setattr operation that will truncate the +file is complete. + +Fixes: a5864c999de6 ("NFS: Do not serialise O_DIRECT reads and writes") +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + fs/nfs/inode.c | 4 +++- + fs/nfs/internal.h | 10 ++++++++++ + fs/nfs/io.c | 13 ++----------- + 3 files changed, 15 insertions(+), 12 deletions(-) + +diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c +index 8827cb00f86d5..5bf5fb5ddd34c 100644 +--- a/fs/nfs/inode.c ++++ b/fs/nfs/inode.c +@@ -761,8 +761,10 @@ nfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, + trace_nfs_setattr_enter(inode); + + /* Write all dirty data */ +- if (S_ISREG(inode->i_mode)) ++ if (S_ISREG(inode->i_mode)) { ++ nfs_file_block_o_direct(NFS_I(inode)); + nfs_sync_inode(inode); ++ } + + fattr = nfs_alloc_fattr_with_label(NFS_SERVER(inode)); + if (fattr == NULL) { +diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h +index 4860270f1be04..456b423402814 100644 +--- a/fs/nfs/internal.h ++++ b/fs/nfs/internal.h +@@ -529,6 +529,16 @@ static inline bool nfs_file_io_is_buffered(struct nfs_inode *nfsi) + return test_bit(NFS_INO_ODIRECT, &nfsi->flags) == 0; + } + ++/* Must be called with exclusively locked inode->i_rwsem */ ++static inline void nfs_file_block_o_direct(struct nfs_inode *nfsi) ++{ ++ if (test_bit(NFS_INO_ODIRECT, &nfsi->flags)) { ++ clear_bit(NFS_INO_ODIRECT, &nfsi->flags); ++ inode_dio_wait(&nfsi->vfs_inode); ++ } ++} ++ ++ + /* namespace.c */ + #define NFS_PATH_CANONICAL 1 + extern char *nfs_path(char **p, struct dentry *dentry, +diff --git a/fs/nfs/io.c b/fs/nfs/io.c +index 3388faf2acb9f..d275b0a250bf3 100644 +--- a/fs/nfs/io.c ++++ b/fs/nfs/io.c +@@ -14,15 +14,6 @@ + + #include "internal.h" + +-/* Call with exclusively locked inode->i_rwsem */ +-static void nfs_block_o_direct(struct nfs_inode *nfsi, struct inode *inode) +-{ +- if (test_bit(NFS_INO_ODIRECT, &nfsi->flags)) { +- clear_bit(NFS_INO_ODIRECT, &nfsi->flags); +- inode_dio_wait(inode); +- } +-} +- + /** + * nfs_start_io_read - declare the file is being used for buffered reads + * @inode: file inode +@@ -57,7 +48,7 @@ nfs_start_io_read(struct inode *inode) + err = down_write_killable(&inode->i_rwsem); + if (err) + return err; +- nfs_block_o_direct(nfsi, inode); ++ nfs_file_block_o_direct(nfsi); + downgrade_write(&inode->i_rwsem); + + return 0; +@@ -90,7 +81,7 @@ nfs_start_io_write(struct inode *inode) + + err = down_write_killable(&inode->i_rwsem); + if (!err) +- nfs_block_o_direct(NFS_I(inode), inode); ++ nfs_file_block_o_direct(NFS_I(inode)); + return err; + } + +-- +2.51.0 + diff --git a/queue-6.12/nfsv4-clear-nfs_cap_open_xor-and-nfs_cap_delegtime-i.patch b/queue-6.12/nfsv4-clear-nfs_cap_open_xor-and-nfs_cap_delegtime-i.patch new file mode 100644 index 0000000000..c74193354b --- /dev/null +++ b/queue-6.12/nfsv4-clear-nfs_cap_open_xor-and-nfs_cap_delegtime-i.patch @@ -0,0 +1,36 @@ +From 50b700255188212c9f844d8474d92af39b03debb Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 29 Aug 2025 09:12:30 -0700 +Subject: NFSv4: Clear NFS_CAP_OPEN_XOR and NFS_CAP_DELEGTIME if not supported + +From: Trond Myklebust + +[ Upstream commit b3ac33436030bce37ecb3dcae581ecfaad28078c ] + +_nfs4_server_capabilities() should clear capabilities that are not +supported by the server. + +Fixes: d2a00cceb93a ("NFSv4: Detect support for OPEN4_SHARE_ACCESS_WANT_OPEN_XOR_DELEGATION") +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + fs/nfs/nfs4proc.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c +index 7a1a6c68d7324..ea92483d5e71e 100644 +--- a/fs/nfs/nfs4proc.c ++++ b/fs/nfs/nfs4proc.c +@@ -3991,7 +3991,8 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f + memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask)); + server->caps &= + ~(NFS_CAP_ACLS | NFS_CAP_HARDLINKS | NFS_CAP_SYMLINKS | +- NFS_CAP_SECURITY_LABEL | NFS_CAP_FS_LOCATIONS); ++ NFS_CAP_SECURITY_LABEL | NFS_CAP_FS_LOCATIONS | ++ NFS_CAP_OPEN_XOR | NFS_CAP_DELEGTIME); + server->fattr_valid = NFS_ATTR_FATTR_V4; + if (res.attr_bitmask[0] & FATTR4_WORD0_ACL && + res.acl_bitmask & ACL4_SUPPORT_ALLOW_ACL) +-- +2.51.0 + diff --git a/queue-6.12/nfsv4-clear-the-nfs_cap_fs_locations-flag-if-it-is-n.patch b/queue-6.12/nfsv4-clear-the-nfs_cap_fs_locations-flag-if-it-is-n.patch new file mode 100644 index 0000000000..c24c507fd4 --- /dev/null +++ b/queue-6.12/nfsv4-clear-the-nfs_cap_fs_locations-flag-if-it-is-n.patch @@ -0,0 +1,38 @@ +From c635003548e98f3d716b6f3257c2200a8434d546 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 29 Aug 2025 09:07:22 -0700 +Subject: NFSv4: Clear the NFS_CAP_FS_LOCATIONS flag if it is not set + +From: Trond Myklebust + +[ Upstream commit dd5a8621b886b02f8341c5d4ea68eb2c552ebd3e ] + +_nfs4_server_capabilities() is expected to clear any flags that are not +supported by the server. + +Fixes: 8a59bb93b7e3 ("NFSv4 store server support for fs_location attribute") +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + fs/nfs/nfs4proc.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c +index 3ac8ecad2e53a..7a1a6c68d7324 100644 +--- a/fs/nfs/nfs4proc.c ++++ b/fs/nfs/nfs4proc.c +@@ -3989,8 +3989,9 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f + res.attr_bitmask[2]; + } + memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask)); +- server->caps &= ~(NFS_CAP_ACLS | NFS_CAP_HARDLINKS | +- NFS_CAP_SYMLINKS| NFS_CAP_SECURITY_LABEL); ++ server->caps &= ++ ~(NFS_CAP_ACLS | NFS_CAP_HARDLINKS | NFS_CAP_SYMLINKS | ++ NFS_CAP_SECURITY_LABEL | NFS_CAP_FS_LOCATIONS); + server->fattr_valid = NFS_ATTR_FATTR_V4; + if (res.attr_bitmask[0] & FATTR4_WORD0_ACL && + res.acl_bitmask & ACL4_SUPPORT_ALLOW_ACL) +-- +2.51.0 + diff --git a/queue-6.12/nfsv4-clear-the-nfs_cap_xattr-flag-if-not-supported-.patch b/queue-6.12/nfsv4-clear-the-nfs_cap_xattr-flag-if-not-supported-.patch new file mode 100644 index 0000000000..da33c51f7d --- /dev/null +++ b/queue-6.12/nfsv4-clear-the-nfs_cap_xattr-flag-if-not-supported-.patch @@ -0,0 +1,35 @@ +From d870623bcc404fa0c6f97f8e49e98e8abc9e4600 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 29 Aug 2025 09:15:12 -0700 +Subject: NFSv4: Clear the NFS_CAP_XATTR flag if not supported by the server + +From: Trond Myklebust + +[ Upstream commit 4fb2b677fc1f70ee642c0beecc3cabf226ef5707 ] + +nfs_server_set_fsinfo() shouldn't assume that NFS_CAP_XATTR is unset +on entry to the function. + +Fixes: b78ef845c35d ("NFSv4.2: query the server for extended attribute support") +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + fs/nfs/client.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/fs/nfs/client.c b/fs/nfs/client.c +index 17edc124d03f2..035474f3fb8f3 100644 +--- a/fs/nfs/client.c ++++ b/fs/nfs/client.c +@@ -881,6 +881,8 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, + + if (fsinfo->xattr_support) + server->caps |= NFS_CAP_XATTR; ++ else ++ server->caps &= ~NFS_CAP_XATTR; + #endif + } + +-- +2.51.0 + diff --git a/queue-6.12/nfsv4-don-t-clear-capabilities-that-won-t-be-reset.patch b/queue-6.12/nfsv4-don-t-clear-capabilities-that-won-t-be-reset.patch new file mode 100644 index 0000000000..d23d7cb3c9 --- /dev/null +++ b/queue-6.12/nfsv4-don-t-clear-capabilities-that-won-t-be-reset.patch @@ -0,0 +1,35 @@ +From 9247bcbeb0382edd0509e3e08df254b92daad225 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 29 Aug 2025 09:02:16 -0700 +Subject: NFSv4: Don't clear capabilities that won't be reset + +From: Trond Myklebust + +[ Upstream commit 31f1a960ad1a14def94fa0b8c25d62b4c032813f ] + +Don't clear the capabilities that are not going to get reset by the call +to _nfs4_server_capabilities(). + +Reported-by: Scott Haiden +Fixes: b01f21cacde9 ("NFS: Fix the setting of capabilities when automounting a new filesystem") +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + fs/nfs/nfs4proc.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c +index e6b7cbc06c9c8..3ac8ecad2e53a 100644 +--- a/fs/nfs/nfs4proc.c ++++ b/fs/nfs/nfs4proc.c +@@ -4064,7 +4064,6 @@ int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) + }; + int err; + +- nfs_server_set_init_caps(server); + do { + err = nfs4_handle_exception(server, + _nfs4_server_capabilities(server, fhandle), +-- +2.51.0 + diff --git a/queue-6.12/nfsv4-flexfiles-fix-layout-merge-mirror-check.patch b/queue-6.12/nfsv4-flexfiles-fix-layout-merge-mirror-check.patch new file mode 100644 index 0000000000..73e9c03bf2 --- /dev/null +++ b/queue-6.12/nfsv4-flexfiles-fix-layout-merge-mirror-check.patch @@ -0,0 +1,42 @@ +From daa5747bb7002a3bf96618cb35b54b8831e1976b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 8 Sep 2025 17:35:16 +0000 +Subject: NFSv4/flexfiles: Fix layout merge mirror check. + +From: Jonathan Curley + +[ Upstream commit dd2fa82473453661d12723c46c9f43d9876a7efd ] + +Typo in ff_lseg_match_mirrors makes the diff ineffective. This results +in merge happening all the time. Merge happening all the time is +problematic because it marks lsegs invalid. Marking lsegs invalid +causes all outstanding IO to get restarted with EAGAIN and connections +to get closed. + +Closing connections constantly triggers race conditions in the RDMA +implementation... + +Fixes: 660d1eb22301c ("pNFS/flexfile: Don't merge layout segments if the mirrors don't match") +Signed-off-by: Jonathan Curley +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + fs/nfs/flexfilelayout/flexfilelayout.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c +index 69496aab9583e..6469846971966 100644 +--- a/fs/nfs/flexfilelayout/flexfilelayout.c ++++ b/fs/nfs/flexfilelayout/flexfilelayout.c +@@ -292,7 +292,7 @@ ff_lseg_match_mirrors(struct pnfs_layout_segment *l1, + struct pnfs_layout_segment *l2) + { + const struct nfs4_ff_layout_segment *fl1 = FF_LAYOUT_LSEG(l1); +- const struct nfs4_ff_layout_segment *fl2 = FF_LAYOUT_LSEG(l1); ++ const struct nfs4_ff_layout_segment *fl2 = FF_LAYOUT_LSEG(l2); + u32 i; + + if (fl1->mirror_array_cnt != fl2->mirror_array_cnt) +-- +2.51.0 + diff --git a/queue-6.12/nfsv4.2-serialise-o_direct-i-o-and-clone-range.patch b/queue-6.12/nfsv4.2-serialise-o_direct-i-o-and-clone-range.patch new file mode 100644 index 0000000000..ed936f844f --- /dev/null +++ b/queue-6.12/nfsv4.2-serialise-o_direct-i-o-and-clone-range.patch @@ -0,0 +1,38 @@ +From f1a1c9ae9c203f82116b878539244631d4ad6324 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 6 Sep 2025 10:40:24 -0400 +Subject: NFSv4.2: Serialise O_DIRECT i/o and clone range + +From: Trond Myklebust + +[ Upstream commit c80ebeba1198eac8811ab0dba36ecc13d51e4438 ] + +Ensure that all O_DIRECT reads and writes complete before cloning a file +range, so that both the source and destination are up to date. + +Fixes: a5864c999de6 ("NFS: Do not serialise O_DIRECT reads and writes") +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + fs/nfs/nfs4file.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c +index 1cd9652f3c280..453d08a9c4b4d 100644 +--- a/fs/nfs/nfs4file.c ++++ b/fs/nfs/nfs4file.c +@@ -283,9 +283,11 @@ static loff_t nfs42_remap_file_range(struct file *src_file, loff_t src_off, + + /* flush all pending writes on both src and dst so that server + * has the latest data */ ++ nfs_file_block_o_direct(NFS_I(src_inode)); + ret = nfs_sync_inode(src_inode); + if (ret) + goto out_unlock; ++ nfs_file_block_o_direct(NFS_I(dst_inode)); + ret = nfs_sync_inode(dst_inode); + if (ret) + goto out_unlock; +-- +2.51.0 + diff --git a/queue-6.12/nfsv4.2-serialise-o_direct-i-o-and-copy-range.patch b/queue-6.12/nfsv4.2-serialise-o_direct-i-o-and-copy-range.patch new file mode 100644 index 0000000000..05f3c222fd --- /dev/null +++ b/queue-6.12/nfsv4.2-serialise-o_direct-i-o-and-copy-range.patch @@ -0,0 +1,34 @@ +From 3b7bc92bf1b81880ea16f63275c77066f9eb8e3a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 6 Sep 2025 10:54:13 -0400 +Subject: NFSv4.2: Serialise O_DIRECT i/o and copy range + +From: Trond Myklebust + +[ Upstream commit ca247c89900ae90207f4d321e260cd93b7c7d104 ] + +Ensure that all O_DIRECT reads and writes complete before copying a file +range, so that the destination is up to date. + +Fixes: a5864c999de6 ("NFS: Do not serialise O_DIRECT reads and writes") +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + fs/nfs/nfs42proc.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c +index 66fe885fc19a1..582cf8a469560 100644 +--- a/fs/nfs/nfs42proc.c ++++ b/fs/nfs/nfs42proc.c +@@ -356,6 +356,7 @@ static ssize_t _nfs42_proc_copy(struct file *src, + return status; + } + ++ nfs_file_block_o_direct(NFS_I(dst_inode)); + status = nfs_sync_inode(dst_inode); + if (status) + return status; +-- +2.51.0 + diff --git a/queue-6.12/nfsv4.2-serialise-o_direct-i-o-and-fallocate.patch b/queue-6.12/nfsv4.2-serialise-o_direct-i-o-and-fallocate.patch new file mode 100644 index 0000000000..f1a2fb79d9 --- /dev/null +++ b/queue-6.12/nfsv4.2-serialise-o_direct-i-o-and-fallocate.patch @@ -0,0 +1,34 @@ +From 3ce30658edc39175fb0de7d0b0eb763891193767 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 5 Sep 2025 12:11:17 -0400 +Subject: NFSv4.2: Serialise O_DIRECT i/o and fallocate() + +From: Trond Myklebust + +[ Upstream commit b93128f29733af5d427a335978a19884c2c230e2 ] + +Ensure that all O_DIRECT reads and writes complete before calling +fallocate so that we don't race w.r.t. attribute updates. + +Fixes: 99f237832243 ("NFSv4.2: Always flush out writes in nfs42_proc_fallocate()") +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + fs/nfs/nfs42proc.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c +index 9f0d69e652644..66fe885fc19a1 100644 +--- a/fs/nfs/nfs42proc.c ++++ b/fs/nfs/nfs42proc.c +@@ -112,6 +112,7 @@ static int nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep, + exception.inode = inode; + exception.state = lock->open_context->state; + ++ nfs_file_block_o_direct(NFS_I(inode)); + err = nfs_sync_inode(inode); + if (err) + goto out; +-- +2.51.0 + diff --git a/queue-6.12/nvme-pci-skip-nvme_write_sq_db-on-empty-rqlist.patch b/queue-6.12/nvme-pci-skip-nvme_write_sq_db-on-empty-rqlist.patch new file mode 100644 index 0000000000..59d946830e --- /dev/null +++ b/queue-6.12/nvme-pci-skip-nvme_write_sq_db-on-empty-rqlist.patch @@ -0,0 +1,37 @@ +From 85ede2c11d442fd838e4dae5111236913005127b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 31 Mar 2025 18:28:59 +0200 +Subject: nvme-pci: skip nvme_write_sq_db on empty rqlist + +From: Maurizio Lombardi + +[ Upstream commit 288ff0d10beb069355036355d5f7612579dc869c ] + +nvme_submit_cmds() should check the rqlist before calling +nvme_write_sq_db(); if the list is empty, it must return immediately. + +Fixes: beadf0088501 ("nvme-pci: reverse request order in nvme_queue_rqs") +Signed-off-by: Maurizio Lombardi +Signed-off-by: Keith Busch +Signed-off-by: Sasha Levin +--- + drivers/nvme/host/pci.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c +index 2bddc9f60fecc..fdc9f1df0578b 100644 +--- a/drivers/nvme/host/pci.c ++++ b/drivers/nvme/host/pci.c +@@ -989,6 +989,9 @@ static void nvme_submit_cmds(struct nvme_queue *nvmeq, struct rq_list *rqlist) + { + struct request *req; + ++ if (rq_list_empty(rqlist)) ++ return; ++ + spin_lock(&nvmeq->sq_lock); + while ((req = rq_list_pop(rqlist))) { + struct nvme_iod *iod = blk_mq_rq_to_pdu(req); +-- +2.51.0 + diff --git a/queue-6.12/proc-fix-type-confusion-in-pde_set_flags.patch b/queue-6.12/proc-fix-type-confusion-in-pde_set_flags.patch new file mode 100644 index 0000000000..03cfea66f0 --- /dev/null +++ b/queue-6.12/proc-fix-type-confusion-in-pde_set_flags.patch @@ -0,0 +1,56 @@ +From 6fea6df22517207c66d4ab48ef2c709f644b2793 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 4 Sep 2025 21:57:15 +0800 +Subject: proc: fix type confusion in pde_set_flags() + +From: wangzijie + +[ Upstream commit 0ce9398aa0830f15f92bbed73853f9861c3e74ff ] + +Commit 2ce3d282bd50 ("proc: fix missing pde_set_flags() for net proc +files") missed a key part in the definition of proc_dir_entry: + +union { + const struct proc_ops *proc_ops; + const struct file_operations *proc_dir_ops; +}; + +So dereference of ->proc_ops assumes it is a proc_ops structure results in +type confusion and make NULL check for 'proc_ops' not work for proc dir. + +Add !S_ISDIR(dp->mode) test before calling pde_set_flags() to fix it. + +Link: https://lkml.kernel.org/r/20250904135715.3972782-1-wangzijie1@honor.com +Fixes: 2ce3d282bd50 ("proc: fix missing pde_set_flags() for net proc files") +Signed-off-by: wangzijie +Reported-by: Brad Spengler +Closes: https://lore.kernel.org/all/20250903065758.3678537-1-wangzijie1@honor.com/ +Cc: Alexey Dobriyan +Cc: Al Viro +Cc: Christian Brauner +Cc: Jiri Slaby +Cc: Stefano Brivio +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Sasha Levin +--- + fs/proc/generic.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/fs/proc/generic.c b/fs/proc/generic.c +index a87a9404e0d0c..eb49beff69bcb 100644 +--- a/fs/proc/generic.c ++++ b/fs/proc/generic.c +@@ -388,7 +388,8 @@ struct proc_dir_entry *proc_register(struct proc_dir_entry *dir, + if (proc_alloc_inum(&dp->low_ino)) + goto out_free_entry; + +- pde_set_flags(dp); ++ if (!S_ISDIR(dp->mode)) ++ pde_set_flags(dp); + + write_lock(&proc_subdir_lock); + dp->parent = dir; +-- +2.51.0 + diff --git a/queue-6.12/revert-drm-amd-display-optimize-cursor-position-upda.patch b/queue-6.12/revert-drm-amd-display-optimize-cursor-position-upda.patch new file mode 100644 index 0000000000..e90a78a93c --- /dev/null +++ b/queue-6.12/revert-drm-amd-display-optimize-cursor-position-upda.patch @@ -0,0 +1,108 @@ +From fc69b12dad6fe53042cd5a3eff1cab0fc2aff934 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 2 Jan 2025 13:12:24 -0500 +Subject: Revert "drm/amd/display: Optimize cursor position updates" + +From: Aurabindo Pillai + +[ Upstream commit a5d258a00b41143d9c64880eed35799d093c4782 ] + +This reverts commit 88c7c56d07c108ed4de319c8dba44aa4b8a38dd1. + +SW and HW state are not always matching in some cases causing cursor to +be disabled. + +Signed-off-by: Aurabindo Pillai +Reviewed-by: Leo Li +Signed-off-by: Alex Deucher +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp.c | 7 +++---- + .../gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c | 6 ++---- + drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.c | 8 +++----- + .../gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c | 10 ++++------ + 4 files changed, 12 insertions(+), 19 deletions(-) + +diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp.c +index 01480a04f85ef..9a3be1dd352b6 100644 +--- a/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp.c ++++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp.c +@@ -483,11 +483,10 @@ void dpp1_set_cursor_position( + if (src_y_offset + cursor_height <= 0) + cur_en = 0; /* not visible beyond top edge*/ + +- if (dpp_base->pos.cur0_ctl.bits.cur0_enable != cur_en) { +- REG_UPDATE(CURSOR0_CONTROL, CUR0_ENABLE, cur_en); ++ REG_UPDATE(CURSOR0_CONTROL, ++ CUR0_ENABLE, cur_en); + +- dpp_base->pos.cur0_ctl.bits.cur0_enable = cur_en; +- } ++ dpp_base->pos.cur0_ctl.bits.cur0_enable = cur_en; + } + + void dpp1_cnv_set_optional_cursor_attributes( +diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c +index 712aff7e17f7a..92b34fe47f740 100644 +--- a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c ++++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c +@@ -155,11 +155,9 @@ void dpp401_set_cursor_position( + struct dcn401_dpp *dpp = TO_DCN401_DPP(dpp_base); + uint32_t cur_en = pos->enable ? 1 : 0; + +- if (dpp_base->pos.cur0_ctl.bits.cur0_enable != cur_en) { +- REG_UPDATE(CURSOR0_CONTROL, CUR0_ENABLE, cur_en); ++ REG_UPDATE(CURSOR0_CONTROL, CUR0_ENABLE, cur_en); + +- dpp_base->pos.cur0_ctl.bits.cur0_enable = cur_en; +- } ++ dpp_base->pos.cur0_ctl.bits.cur0_enable = cur_en; + } + + void dpp401_set_optional_cursor_attributes( +diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.c +index c74ee2d50a699..b405fa22f87a9 100644 +--- a/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.c ++++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.c +@@ -1044,13 +1044,11 @@ void hubp2_cursor_set_position( + if (src_y_offset + cursor_height <= 0) + cur_en = 0; /* not visible beyond top edge*/ + +- if (hubp->pos.cur_ctl.bits.cur_enable != cur_en) { +- if (cur_en && REG_READ(CURSOR_SURFACE_ADDRESS) == 0) +- hubp->funcs->set_cursor_attributes(hubp, &hubp->curs_attr); ++ if (cur_en && REG_READ(CURSOR_SURFACE_ADDRESS) == 0) ++ hubp->funcs->set_cursor_attributes(hubp, &hubp->curs_attr); + +- REG_UPDATE(CURSOR_CONTROL, ++ REG_UPDATE(CURSOR_CONTROL, + CURSOR_ENABLE, cur_en); +- } + + REG_SET_2(CURSOR_POSITION, 0, + CURSOR_X_POSITION, pos->x, +diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c +index 7013c124efcff..2d52100510f05 100644 +--- a/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c ++++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c +@@ -718,13 +718,11 @@ void hubp401_cursor_set_position( + dc_fixpt_from_int(dst_x_offset), + param->h_scale_ratio)); + +- if (hubp->pos.cur_ctl.bits.cur_enable != cur_en) { +- if (cur_en && REG_READ(CURSOR_SURFACE_ADDRESS) == 0) +- hubp->funcs->set_cursor_attributes(hubp, &hubp->curs_attr); ++ if (cur_en && REG_READ(CURSOR_SURFACE_ADDRESS) == 0) ++ hubp->funcs->set_cursor_attributes(hubp, &hubp->curs_attr); + +- REG_UPDATE(CURSOR_CONTROL, +- CURSOR_ENABLE, cur_en); +- } ++ REG_UPDATE(CURSOR_CONTROL, ++ CURSOR_ENABLE, cur_en); + + REG_SET_2(CURSOR_POSITION, 0, + CURSOR_X_POSITION, x_pos, +-- +2.51.0 + diff --git a/queue-6.12/revert-udmabuf-fix-vmap_udmabuf-error-page-set.patch b/queue-6.12/revert-udmabuf-fix-vmap_udmabuf-error-page-set.patch new file mode 100644 index 0000000000..1f95581f70 --- /dev/null +++ b/queue-6.12/revert-udmabuf-fix-vmap_udmabuf-error-page-set.patch @@ -0,0 +1,86 @@ +From 5d8647fc5a90690fc85dbecfd2fefbeb8b139bff Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 28 Apr 2025 15:38:29 +0800 +Subject: Revert "udmabuf: fix vmap_udmabuf error page set" + +From: Huan Yang + +[ Upstream commit ceb7b62eaaaacfcf87473bd2e99ac73a758620cb ] + +This reverts commit 18d7de823b7150344d242c3677e65d68c5271b04. + +We cannot use vmap_pfn() in vmap_udmabuf() as it would fail the pfn_valid() +check in vmap_pfn_apply(). This is because vmap_pfn() is intended to be +used for mapping non-struct-page memory such as PCIe BARs. Since, udmabuf +mostly works with pages/folios backed by shmem/hugetlbfs/THP, vmap_pfn() +is not the right tool or API to invoke for implementing vmap. + +Signed-off-by: Huan Yang +Suggested-by: Vivek Kasireddy +Reported-by: Bingbu Cao +Closes: https://lore.kernel.org/dri-devel/eb7e0137-3508-4287-98c4-816c5fd98e10@vivo.com/T/#mbda4f64a3532b32e061f4e8763bc8e307bea3ca8 +Acked-by: Vivek Kasireddy +Signed-off-by: Vivek Kasireddy +Link: https://lore.kernel.org/r/20250428073831.19942-2-link@vivo.com +Signed-off-by: Sasha Levin +--- + drivers/dma-buf/Kconfig | 1 - + drivers/dma-buf/udmabuf.c | 22 +++++++--------------- + 2 files changed, 7 insertions(+), 16 deletions(-) + +diff --git a/drivers/dma-buf/Kconfig b/drivers/dma-buf/Kconfig +index fee04fdb08220..b46eb8a552d7b 100644 +--- a/drivers/dma-buf/Kconfig ++++ b/drivers/dma-buf/Kconfig +@@ -36,7 +36,6 @@ config UDMABUF + depends on DMA_SHARED_BUFFER + depends on MEMFD_CREATE || COMPILE_TEST + depends on MMU +- select VMAP_PFN + help + A driver to let userspace turn memfd regions into dma-bufs. + Qemu can use this to create host dmabufs for guest framebuffers. +diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c +index 959f690b12260..0e127a9109e75 100644 +--- a/drivers/dma-buf/udmabuf.c ++++ b/drivers/dma-buf/udmabuf.c +@@ -74,29 +74,21 @@ static int mmap_udmabuf(struct dma_buf *buf, struct vm_area_struct *vma) + static int vmap_udmabuf(struct dma_buf *buf, struct iosys_map *map) + { + struct udmabuf *ubuf = buf->priv; +- unsigned long *pfns; ++ struct page **pages; + void *vaddr; + pgoff_t pg; + + dma_resv_assert_held(buf->resv); + +- /** +- * HVO may free tail pages, so just use pfn to map each folio +- * into vmalloc area. +- */ +- pfns = kvmalloc_array(ubuf->pagecount, sizeof(*pfns), GFP_KERNEL); +- if (!pfns) ++ pages = kvmalloc_array(ubuf->pagecount, sizeof(*pages), GFP_KERNEL); ++ if (!pages) + return -ENOMEM; + +- for (pg = 0; pg < ubuf->pagecount; pg++) { +- unsigned long pfn = folio_pfn(ubuf->folios[pg]); +- +- pfn += ubuf->offsets[pg] >> PAGE_SHIFT; +- pfns[pg] = pfn; +- } ++ for (pg = 0; pg < ubuf->pagecount; pg++) ++ pages[pg] = &ubuf->folios[pg]->page; + +- vaddr = vmap_pfn(pfns, ubuf->pagecount, PAGE_KERNEL); +- kvfree(pfns); ++ vaddr = vm_map_ram(pages, ubuf->pagecount, -1); ++ kvfree(pages); + if (!vaddr) + return -EINVAL; + +-- +2.51.0 + diff --git a/queue-6.12/s390-cpum_cf-deny-all-sampling-events-by-counter-pmu.patch b/queue-6.12/s390-cpum_cf-deny-all-sampling-events-by-counter-pmu.patch new file mode 100644 index 0000000000..ca604cb637 --- /dev/null +++ b/queue-6.12/s390-cpum_cf-deny-all-sampling-events-by-counter-pmu.patch @@ -0,0 +1,50 @@ +From 9e8db30a18df77df95578658d3f11aee684dd47d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 22 Aug 2025 14:05:57 +0200 +Subject: s390/cpum_cf: Deny all sampling events by counter PMU + +From: Thomas Richter + +[ Upstream commit ce971233242b5391d99442271f3ca096fb49818d ] + +Deny all sampling event by the CPUMF counter facility device driver +and return -ENOENT. This return value is used to try other PMUs. +Up to now events for type PERF_TYPE_HARDWARE were not tested for +sampling and returned later on -EOPNOTSUPP. This ends the search +for alternative PMUs. Change that behavior and try other PMUs +instead. + +Fixes: 613a41b0d16e ("s390/cpum_cf: Reject request for sampling in event initialization") +Acked-by: Sumanth Korikkar +Signed-off-by: Thomas Richter +Signed-off-by: Alexander Gordeev +Signed-off-by: Sasha Levin +--- + arch/s390/kernel/perf_cpum_cf.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c +index 6d6b057b562fd..b017db3344cb5 100644 +--- a/arch/s390/kernel/perf_cpum_cf.c ++++ b/arch/s390/kernel/perf_cpum_cf.c +@@ -761,8 +761,6 @@ static int __hw_perf_event_init(struct perf_event *event, unsigned int type) + break; + + case PERF_TYPE_HARDWARE: +- if (is_sampling_event(event)) /* No sampling support */ +- return -ENOENT; + ev = attr->config; + if (!attr->exclude_user && attr->exclude_kernel) { + /* +@@ -860,6 +858,8 @@ static int cpumf_pmu_event_init(struct perf_event *event) + unsigned int type = event->attr.type; + int err = -ENOENT; + ++ if (is_sampling_event(event)) /* No sampling support */ ++ return err; + if (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_RAW) + err = __hw_perf_event_init(event, type); + else if (event->pmu->type == type) +-- +2.51.0 + diff --git a/queue-6.12/s390-pai-deny-all-events-not-handled-by-this-pmu.patch b/queue-6.12/s390-pai-deny-all-events-not-handled-by-this-pmu.patch new file mode 100644 index 0000000000..bdd22ebda6 --- /dev/null +++ b/queue-6.12/s390-pai-deny-all-events-not-handled-by-this-pmu.patch @@ -0,0 +1,60 @@ +From a6288f3d10a060a401d176a2f08c60818c85b54f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 25 Aug 2025 09:53:27 +0200 +Subject: s390/pai: Deny all events not handled by this PMU + +From: Thomas Richter + +[ Upstream commit 85941afd2c404247e583c827fae0a45da1c1d92c ] + +Each PAI PMU device driver returns -EINVAL when an event is out of +its accepted range. This return value aborts the search for an +alternative PMU device driver to handle this event. +Change the return value to -ENOENT. This return value is used to +try other PMUs instead. This makes the PMUs more robust when +the sequence of PMU device driver initialization changes (at boot time) +or by using modules. + +Fixes: 39d62336f5c12 ("s390/pai: add support for cryptography counters") +Acked-by: Sumanth Korikkar +Signed-off-by: Thomas Richter +Signed-off-by: Alexander Gordeev +Signed-off-by: Sasha Levin +--- + arch/s390/kernel/perf_pai_crypto.c | 4 ++-- + arch/s390/kernel/perf_pai_ext.c | 2 +- + 2 files changed, 3 insertions(+), 3 deletions(-) + +diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c +index 10725f5a6f0fd..11200880a96c1 100644 +--- a/arch/s390/kernel/perf_pai_crypto.c ++++ b/arch/s390/kernel/perf_pai_crypto.c +@@ -286,10 +286,10 @@ static int paicrypt_event_init(struct perf_event *event) + /* PAI crypto PMU registered as PERF_TYPE_RAW, check event type */ + if (a->type != PERF_TYPE_RAW && event->pmu->type != a->type) + return -ENOENT; +- /* PAI crypto event must be in valid range */ ++ /* PAI crypto event must be in valid range, try others if not */ + if (a->config < PAI_CRYPTO_BASE || + a->config > PAI_CRYPTO_BASE + paicrypt_cnt) +- return -EINVAL; ++ return -ENOENT; + /* Allow only CRYPTO_ALL for sampling */ + if (a->sample_period && a->config != PAI_CRYPTO_BASE) + return -EINVAL; +diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c +index a8f0bad99cf04..28398e313b58d 100644 +--- a/arch/s390/kernel/perf_pai_ext.c ++++ b/arch/s390/kernel/perf_pai_ext.c +@@ -266,7 +266,7 @@ static int paiext_event_valid(struct perf_event *event) + event->hw.config_base = offsetof(struct paiext_cb, acc); + return 0; + } +- return -EINVAL; ++ return -ENOENT; + } + + /* Might be called on different CPU than the one the event is intended for. */ +-- +2.51.0 + diff --git a/queue-6.12/series b/queue-6.12/series index c4f9186f0c..749fe7e73a 100644 --- a/queue-6.12/series +++ b/queue-6.12/series @@ -7,3 +7,40 @@ dma-debug-don-t-enforce-dma-mapping-check-on-noncohe.patch kunit-kasan_test-disable-fortify-string-checker-on-kasan_strings-test.patch net-mlx5-hws-change-error-flow-on-matcher-disconnect.patch mm-introduce-and-use-pgd-p4d-_populate_kernel.patch +dma-mapping-fix-swapped-dir-flags-arguments-to-trace.patch +dma-debug-fix-physical-address-calculation-for-struc.patch +nvme-pci-skip-nvme_write_sq_db-on-empty-rqlist.patch +revert-udmabuf-fix-vmap_udmabuf-error-page-set.patch +ext4-introduce-linear-search-for-dentries.patch +drm-i915-pmu-fix-zero-delta-busyness-issue.patch +drm-amd-display-fix-error-pointers-in-amdgpu_dm_crtc.patch +revert-drm-amd-display-optimize-cursor-position-upda.patch +alsa-hda-realtek-fix-built-in-mic-assignment-on-asus.patch +drm-amdgpu-add-back-jpeg-to-video-caps-for-carrizo-a.patch +flexfiles-pnfs-fix-null-checks-on-result-of-ff_layou.patch +sunrpc-call-xs_sock_process_cmsg-for-all-cmsg.patch +nfsv4-don-t-clear-capabilities-that-won-t-be-reset.patch +trace-fgraph-fix-error-handling.patch +nfsv4-clear-the-nfs_cap_fs_locations-flag-if-it-is-n.patch +nfsv4-clear-nfs_cap_open_xor-and-nfs_cap_delegtime-i.patch +nfsv4-clear-the-nfs_cap_xattr-flag-if-not-supported-.patch +tracing-fix-tracing_marker-may-trigger-page-fault-du.patch +nfs-localio-remove-extra-indirect-nfs_to-call-to-che.patch +nfs-localio-add-direct-io-enablement-with-sync-and-a.patch +nfs-localio-restore-creds-before-releasing-pageio-da.patch +ftrace-samples-fix-function-size-computation.patch +fs-nfs-io-make-nfs_start_io_-killable.patch +nfs-serialise-o_direct-i-o-and-truncate.patch +nfsv4.2-serialise-o_direct-i-o-and-fallocate.patch +nfsv4.2-serialise-o_direct-i-o-and-clone-range.patch +nfsv4.2-serialise-o_direct-i-o-and-copy-range.patch +nfs-nfs_invalidate_folio-must-observe-the-offset-and.patch +nfsv4-flexfiles-fix-layout-merge-mirror-check.patch +tracing-silence-warning-when-chunk-allocation-fails-.patch +s390-pai-deny-all-events-not-handled-by-this-pmu.patch +s390-cpum_cf-deny-all-sampling-events-by-counter-pmu.patch +bpf-fix-out-of-bounds-dynptr-write-in-bpf_crypto_cry.patch +bpf-allow-fall-back-to-interpreter-for-programs-with.patch +bpf-tell-memcg-to-use-allow_spinning-false-path-in-b.patch +tcp_bpf-call-sk_msg_free-when-tcp_bpf_send_verdict-f.patch +proc-fix-type-confusion-in-pde_set_flags.patch diff --git a/queue-6.12/sunrpc-call-xs_sock_process_cmsg-for-all-cmsg.patch b/queue-6.12/sunrpc-call-xs_sock_process_cmsg-for-all-cmsg.patch new file mode 100644 index 0000000000..803032916a --- /dev/null +++ b/queue-6.12/sunrpc-call-xs_sock_process_cmsg-for-all-cmsg.patch @@ -0,0 +1,48 @@ +From 740ae0f1a4b8a03e61b24f3c78d6769f34b824ec Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 4 Sep 2025 16:09:57 -0500 +Subject: SUNRPC: call xs_sock_process_cmsg for all cmsg + +From: Justin Worrell + +[ Upstream commit 9559d2fffd4f9b892165eed48198a0e5cb8504e6 ] + +xs_sock_recv_cmsg was failing to call xs_sock_process_cmsg for any cmsg +type other than TLS_RECORD_TYPE_ALERT (TLS_RECORD_TYPE_DATA, and other +values not handled.) Based on my reading of the previous commit +(cc5d5908: sunrpc: fix client side handling of tls alerts), it looks +like only iov_iter_revert should be conditional on TLS_RECORD_TYPE_ALERT +(but that other cmsg types should still call xs_sock_process_cmsg). On +my machine, I was unable to connect (over mtls) to an NFS share hosted +on FreeBSD. With this patch applied, I am able to mount the share again. + +Fixes: cc5d59081fa2 ("sunrpc: fix client side handling of tls alerts") +Signed-off-by: Justin Worrell +Reviewed-and-tested-by: Scott Mayhew +Link: https://lore.kernel.org/r/20250904211038.12874-3-jworrell@gmail.com +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + net/sunrpc/xprtsock.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c +index 92cec227215ae..b78f1aae9e806 100644 +--- a/net/sunrpc/xprtsock.c ++++ b/net/sunrpc/xprtsock.c +@@ -407,9 +407,9 @@ xs_sock_recv_cmsg(struct socket *sock, unsigned int *msg_flags, int flags) + iov_iter_kvec(&msg.msg_iter, ITER_DEST, &alert_kvec, 1, + alert_kvec.iov_len); + ret = sock_recvmsg(sock, &msg, flags); +- if (ret > 0 && +- tls_get_record_type(sock->sk, &u.cmsg) == TLS_RECORD_TYPE_ALERT) { +- iov_iter_revert(&msg.msg_iter, ret); ++ if (ret > 0) { ++ if (tls_get_record_type(sock->sk, &u.cmsg) == TLS_RECORD_TYPE_ALERT) ++ iov_iter_revert(&msg.msg_iter, ret); + ret = xs_sock_process_cmsg(sock, &msg, msg_flags, &u.cmsg, + -EAGAIN); + } +-- +2.51.0 + diff --git a/queue-6.12/tcp_bpf-call-sk_msg_free-when-tcp_bpf_send_verdict-f.patch b/queue-6.12/tcp_bpf-call-sk_msg_free-when-tcp_bpf_send_verdict-f.patch new file mode 100644 index 0000000000..d5b3a4bc39 --- /dev/null +++ b/queue-6.12/tcp_bpf-call-sk_msg_free-when-tcp_bpf_send_verdict-f.patch @@ -0,0 +1,98 @@ +From 59e1fecc21fb9a19cd8a0e813065f90d4c4a401c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 9 Sep 2025 23:26:12 +0000 +Subject: tcp_bpf: Call sk_msg_free() when tcp_bpf_send_verdict() fails to + allocate psock->cork. + +From: Kuniyuki Iwashima + +[ Upstream commit a3967baad4d533dc254c31e0d221e51c8d223d58 ] + +syzbot reported the splat below. [0] + +The repro does the following: + + 1. Load a sk_msg prog that calls bpf_msg_cork_bytes(msg, cork_bytes) + 2. Attach the prog to a SOCKMAP + 3. Add a socket to the SOCKMAP + 4. Activate fault injection + 5. Send data less than cork_bytes + +At 5., the data is carried over to the next sendmsg() as it is +smaller than the cork_bytes specified by bpf_msg_cork_bytes(). + +Then, tcp_bpf_send_verdict() tries to allocate psock->cork to hold +the data, but this fails silently due to fault injection + __GFP_NOWARN. + +If the allocation fails, we need to revert the sk->sk_forward_alloc +change done by sk_msg_alloc(). + +Let's call sk_msg_free() when tcp_bpf_send_verdict fails to allocate +psock->cork. + +The "*copied" also needs to be updated such that a proper error can +be returned to the caller, sendmsg. It fails to allocate psock->cork. +Nothing has been corked so far, so this patch simply sets "*copied" +to 0. + +[0]: +WARNING: net/ipv4/af_inet.c:156 at inet_sock_destruct+0x623/0x730 net/ipv4/af_inet.c:156, CPU#1: syz-executor/5983 +Modules linked in: +CPU: 1 UID: 0 PID: 5983 Comm: syz-executor Not tainted syzkaller #0 PREEMPT(full) +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/12/2025 +RIP: 0010:inet_sock_destruct+0x623/0x730 net/ipv4/af_inet.c:156 +Code: 0f 0b 90 e9 62 fe ff ff e8 7a db b5 f7 90 0f 0b 90 e9 95 fe ff ff e8 6c db b5 f7 90 0f 0b 90 e9 bb fe ff ff e8 5e db b5 f7 90 <0f> 0b 90 e9 e1 fe ff ff 89 f9 80 e1 07 80 c1 03 38 c1 0f 8c 9f fc +RSP: 0018:ffffc90000a08b48 EFLAGS: 00010246 +RAX: ffffffff8a09d0b2 RBX: dffffc0000000000 RCX: ffff888024a23c80 +RDX: 0000000000000100 RSI: 0000000000000fff RDI: 0000000000000000 +RBP: 0000000000000fff R08: ffff88807e07c627 R09: 1ffff1100fc0f8c4 +R10: dffffc0000000000 R11: ffffed100fc0f8c5 R12: ffff88807e07c380 +R13: dffffc0000000000 R14: ffff88807e07c60c R15: 1ffff1100fc0f872 +FS: 00005555604c4500(0000) GS:ffff888125af1000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 00005555604df5c8 CR3: 0000000032b06000 CR4: 00000000003526f0 +Call Trace: + + __sk_destruct+0x86/0x660 net/core/sock.c:2339 + rcu_do_batch kernel/rcu/tree.c:2605 [inline] + rcu_core+0xca8/0x1770 kernel/rcu/tree.c:2861 + handle_softirqs+0x286/0x870 kernel/softirq.c:579 + __do_softirq kernel/softirq.c:613 [inline] + invoke_softirq kernel/softirq.c:453 [inline] + __irq_exit_rcu+0xca/0x1f0 kernel/softirq.c:680 + irq_exit_rcu+0x9/0x30 kernel/softirq.c:696 + instr_sysvec_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1052 [inline] + sysvec_apic_timer_interrupt+0xa6/0xc0 arch/x86/kernel/apic/apic.c:1052 + + +Fixes: 4f738adba30a ("bpf: create tcp_bpf_ulp allowing BPF to monitor socket TX/RX data") +Reported-by: syzbot+4cabd1d2fa917a456db8@syzkaller.appspotmail.com +Closes: https://lore.kernel.org/netdev/68c0b6b5.050a0220.3c6139.0013.GAE@google.com/ +Signed-off-by: Kuniyuki Iwashima +Signed-off-by: Martin KaFai Lau +Link: https://patch.msgid.link/20250909232623.4151337-1-kuniyu@google.com +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp_bpf.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c +index 22e8a2af5dd8b..8372ca512a755 100644 +--- a/net/ipv4/tcp_bpf.c ++++ b/net/ipv4/tcp_bpf.c +@@ -408,8 +408,11 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock, + if (!psock->cork) { + psock->cork = kzalloc(sizeof(*psock->cork), + GFP_ATOMIC | __GFP_NOWARN); +- if (!psock->cork) ++ if (!psock->cork) { ++ sk_msg_free(sk, msg); ++ *copied = 0; + return -ENOMEM; ++ } + } + memcpy(psock->cork, msg, sizeof(*msg)); + return 0; +-- +2.51.0 + diff --git a/queue-6.12/trace-fgraph-fix-error-handling.patch b/queue-6.12/trace-fgraph-fix-error-handling.patch new file mode 100644 index 0000000000..ec40e1db71 --- /dev/null +++ b/queue-6.12/trace-fgraph-fix-error-handling.patch @@ -0,0 +1,51 @@ +From a78997f44c75023738366860ed60f7746d751638 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 5 Sep 2025 22:06:18 -0700 +Subject: trace/fgraph: Fix error handling + +From: Guenter Roeck + +[ Upstream commit ab1396af7595e7d49a3850481b24d7fe7cbdfd31 ] + +Commit edede7a6dcd7 ("trace/fgraph: Fix the warning caused by missing +unregister notifier") added a call to unregister the PM notifier if +register_ftrace_graph() failed. It does so unconditionally. However, +the PM notifier is only registered with the first call to +register_ftrace_graph(). If the first registration was successful and +a subsequent registration failed, the notifier is now unregistered even +if ftrace graphs are still registered. + +Fix the problem by only unregistering the PM notifier during error handling +if there are no active fgraph registrations. + +Fixes: edede7a6dcd7 ("trace/fgraph: Fix the warning caused by missing unregister notifier") +Closes: https://lore.kernel.org/all/63b0ba5a-a928-438e-84f9-93028dd72e54@roeck-us.net/ +Cc: Ye Weihua +Cc: Masami Hiramatsu +Cc: Mark Rutland +Cc: Mathieu Desnoyers +Link: https://lore.kernel.org/20250906050618.2634078-1-linux@roeck-us.net +Signed-off-by: Guenter Roeck +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Sasha Levin +--- + kernel/trace/fgraph.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c +index 2eed8bc672f91..988a4c4ba97bc 100644 +--- a/kernel/trace/fgraph.c ++++ b/kernel/trace/fgraph.c +@@ -1316,7 +1316,8 @@ int register_ftrace_graph(struct fgraph_ops *gops) + ftrace_graph_active--; + gops->saved_func = NULL; + fgraph_lru_release_index(i); +- unregister_pm_notifier(&ftrace_suspend_notifier); ++ if (!ftrace_graph_active) ++ unregister_pm_notifier(&ftrace_suspend_notifier); + } + return ret; + } +-- +2.51.0 + diff --git a/queue-6.12/tracing-fix-tracing_marker-may-trigger-page-fault-du.patch b/queue-6.12/tracing-fix-tracing_marker-may-trigger-page-fault-du.patch new file mode 100644 index 0000000000..0a910fbefd --- /dev/null +++ b/queue-6.12/tracing-fix-tracing_marker-may-trigger-page-fault-du.patch @@ -0,0 +1,89 @@ +From 14973c8659a896f372a9900070af037946da7e20 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 19 Aug 2025 10:51:52 +0000 +Subject: tracing: Fix tracing_marker may trigger page fault during + preempt_disable + +From: Luo Gengkun + +[ Upstream commit 3d62ab32df065e4a7797204a918f6489ddb8a237 ] + +Both tracing_mark_write and tracing_mark_raw_write call +__copy_from_user_inatomic during preempt_disable. But in some case, +__copy_from_user_inatomic may trigger page fault, and will call schedule() +subtly. And if a task is migrated to other cpu, the following warning will +be trigger: + if (RB_WARN_ON(cpu_buffer, + !local_read(&cpu_buffer->committing))) + +An example can illustrate this issue: + +process flow CPU +--------------------------------------------------------------------- + +tracing_mark_raw_write(): cpu:0 + ... + ring_buffer_lock_reserve(): cpu:0 + ... + cpu = raw_smp_processor_id() cpu:0 + cpu_buffer = buffer->buffers[cpu] cpu:0 + ... + ... + __copy_from_user_inatomic(): cpu:0 + ... + # page fault + do_mem_abort(): cpu:0 + ... + # Call schedule + schedule() cpu:0 + ... + # the task schedule to cpu1 + __buffer_unlock_commit(): cpu:1 + ... + ring_buffer_unlock_commit(): cpu:1 + ... + cpu = raw_smp_processor_id() cpu:1 + cpu_buffer = buffer->buffers[cpu] cpu:1 + +As shown above, the process will acquire cpuid twice and the return values +are not the same. + +To fix this problem using copy_from_user_nofault instead of +__copy_from_user_inatomic, as the former performs 'access_ok' before +copying. + +Link: https://lore.kernel.org/20250819105152.2766363-1-luogengkun@huaweicloud.com +Fixes: 656c7f0d2d2b ("tracing: Replace kmap with copy_from_user() in trace_marker writing") +Signed-off-by: Luo Gengkun +Reviewed-by: Masami Hiramatsu (Google) +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Sasha Levin +--- + kernel/trace/trace.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c +index ba3358eef34ba..87a43c0f90764 100644 +--- a/kernel/trace/trace.c ++++ b/kernel/trace/trace.c +@@ -6949,7 +6949,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, + entry = ring_buffer_event_data(event); + entry->ip = _THIS_IP_; + +- len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt); ++ len = copy_from_user_nofault(&entry->buf, ubuf, cnt); + if (len) { + memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE); + cnt = FAULTED_SIZE; +@@ -7020,7 +7020,7 @@ tracing_mark_raw_write(struct file *filp, const char __user *ubuf, + + entry = ring_buffer_event_data(event); + +- len = __copy_from_user_inatomic(&entry->id, ubuf, cnt); ++ len = copy_from_user_nofault(&entry->id, ubuf, cnt); + if (len) { + entry->id = -1; + memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE); +-- +2.51.0 + diff --git a/queue-6.12/tracing-silence-warning-when-chunk-allocation-fails-.patch b/queue-6.12/tracing-silence-warning-when-chunk-allocation-fails-.patch new file mode 100644 index 0000000000..278d1fbd1b --- /dev/null +++ b/queue-6.12/tracing-silence-warning-when-chunk-allocation-fails-.patch @@ -0,0 +1,97 @@ +From ca6f1c3b0a8b614aaa1f1afe98273dfc7a3a4634 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 8 Sep 2025 02:46:58 +0000 +Subject: tracing: Silence warning when chunk allocation fails in + trace_pid_write + +From: Pu Lehui + +[ Upstream commit cd4453c5e983cf1fd5757e9acb915adb1e4602b6 ] + +Syzkaller trigger a fault injection warning: + +WARNING: CPU: 1 PID: 12326 at tracepoint_add_func+0xbfc/0xeb0 +Modules linked in: +CPU: 1 UID: 0 PID: 12326 Comm: syz.6.10325 Tainted: G U 6.14.0-rc5-syzkaller #0 +Tainted: [U]=USER +Hardware name: Google Compute Engine/Google Compute Engine +RIP: 0010:tracepoint_add_func+0xbfc/0xeb0 kernel/tracepoint.c:294 +Code: 09 fe ff 90 0f 0b 90 0f b6 74 24 43 31 ff 41 bc ea ff ff ff +RSP: 0018:ffffc9000414fb48 EFLAGS: 00010283 +RAX: 00000000000012a1 RBX: ffffffff8e240ae0 RCX: ffffc90014b78000 +RDX: 0000000000080000 RSI: ffffffff81bbd78b RDI: 0000000000000001 +RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000000 +R10: 0000000000000001 R11: 0000000000000001 R12: ffffffffffffffef +R13: 0000000000000000 R14: dffffc0000000000 R15: ffffffff81c264f0 +FS: 00007f27217f66c0(0000) GS:ffff8880b8700000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 0000001b2e80dff8 CR3: 00000000268f8000 CR4: 00000000003526f0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +Call Trace: + + tracepoint_probe_register_prio+0xc0/0x110 kernel/tracepoint.c:464 + register_trace_prio_sched_switch include/trace/events/sched.h:222 [inline] + register_pid_events kernel/trace/trace_events.c:2354 [inline] + event_pid_write.isra.0+0x439/0x7a0 kernel/trace/trace_events.c:2425 + vfs_write+0x24c/0x1150 fs/read_write.c:677 + ksys_write+0x12b/0x250 fs/read_write.c:731 + do_syscall_x64 arch/x86/entry/common.c:52 [inline] + do_syscall_64+0xcd/0x250 arch/x86/entry/common.c:83 + entry_SYSCALL_64_after_hwframe+0x77/0x7f + +We can reproduce the warning by following the steps below: +1. echo 8 >> set_event_notrace_pid. Let tr->filtered_pids owns one pid + and register sched_switch tracepoint. +2. echo ' ' >> set_event_pid, and perform fault injection during chunk + allocation of trace_pid_list_alloc. Let pid_list with no pid and +assign to tr->filtered_pids. +3. echo ' ' >> set_event_pid. Let pid_list is NULL and assign to + tr->filtered_pids. +4. echo 9 >> set_event_pid, will trigger the double register + sched_switch tracepoint warning. + +The reason is that syzkaller injects a fault into the chunk allocation +in trace_pid_list_alloc, causing a failure in trace_pid_list_set, which +may trigger double register of the same tracepoint. This only occurs +when the system is about to crash, but to suppress this warning, let's +add failure handling logic to trace_pid_list_set. + +Link: https://lore.kernel.org/20250908024658.2390398-1-pulehui@huaweicloud.com +Fixes: 8d6e90983ade ("tracing: Create a sparse bitmask for pid filtering") +Reported-by: syzbot+161412ccaeff20ce4dde@syzkaller.appspotmail.com +Closes: https://lore.kernel.org/all/67cb890e.050a0220.d8275.022e.GAE@google.com +Signed-off-by: Pu Lehui +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Sasha Levin +--- + kernel/trace/trace.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c +index 87a43c0f90764..91e6bf1b101a7 100644 +--- a/kernel/trace/trace.c ++++ b/kernel/trace/trace.c +@@ -787,7 +787,10 @@ int trace_pid_write(struct trace_pid_list *filtered_pids, + /* copy the current bits to the new max */ + ret = trace_pid_list_first(filtered_pids, &pid); + while (!ret) { +- trace_pid_list_set(pid_list, pid); ++ ret = trace_pid_list_set(pid_list, pid); ++ if (ret < 0) ++ goto out; ++ + ret = trace_pid_list_next(filtered_pids, pid + 1, &pid); + nr_pids++; + } +@@ -824,6 +827,7 @@ int trace_pid_write(struct trace_pid_list *filtered_pids, + trace_parser_clear(&parser); + ret = 0; + } ++ out: + trace_parser_put(&parser); + + if (ret < 0) { +-- +2.51.0 + diff --git a/queue-6.16/bpf-allow-fall-back-to-interpreter-for-programs-with.patch b/queue-6.16/bpf-allow-fall-back-to-interpreter-for-programs-with.patch new file mode 100644 index 0000000000..c65eb4e3ed --- /dev/null +++ b/queue-6.16/bpf-allow-fall-back-to-interpreter-for-programs-with.patch @@ -0,0 +1,110 @@ +From 169d12b09ab94483307d6225c26e44dec8c65e81 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 9 Sep 2025 22:46:14 +0800 +Subject: bpf: Allow fall back to interpreter for programs with stack size <= + 512 + +From: KaFai Wan + +[ Upstream commit df0cb5cb50bd54d3cd4d0d83417ceec6a66404aa ] + +OpenWRT users reported regression on ARMv6 devices after updating to latest +HEAD, where tcpdump filter: + +tcpdump "not ether host 3c37121a2b3c and not ether host 184ecbca2a3a \ +and not ether host 14130b4d3f47 and not ether host f0f61cf440b7 \ +and not ether host a84b4dedf471 and not ether host d022be17e1d7 \ +and not ether host 5c497967208b and not ether host 706655784d5b" + +fails with warning: "Kernel filter failed: No error information" +when using config: + # CONFIG_BPF_JIT_ALWAYS_ON is not set + CONFIG_BPF_JIT_DEFAULT_ON=y + +The issue arises because commits: +1. "bpf: Fix array bounds error with may_goto" changed default runtime to + __bpf_prog_ret0_warn when jit_requested = 1 +2. "bpf: Avoid __bpf_prog_ret0_warn when jit fails" returns error when + jit_requested = 1 but jit fails + +This change restores interpreter fallback capability for BPF programs with +stack size <= 512 bytes when jit fails. + +Reported-by: Felix Fietkau +Closes: https://lore.kernel.org/bpf/2e267b4b-0540-45d8-9310-e127bf95fc63@nbd.name/ +Fixes: 6ebc5030e0c5 ("bpf: Fix array bounds error with may_goto") +Signed-off-by: KaFai Wan +Acked-by: Eduard Zingerman +Link: https://lore.kernel.org/r/20250909144614.2991253-1-kafai.wan@linux.dev +Signed-off-by: Alexei Starovoitov +Signed-off-by: Sasha Levin +--- + kernel/bpf/core.c | 16 +++++++++------- + 1 file changed, 9 insertions(+), 7 deletions(-) + +diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c +index d966e971893ab..829f0792d8d83 100644 +--- a/kernel/bpf/core.c ++++ b/kernel/bpf/core.c +@@ -2354,8 +2354,7 @@ static unsigned int __bpf_prog_ret0_warn(const void *ctx, + const struct bpf_insn *insn) + { + /* If this handler ever gets executed, then BPF_JIT_ALWAYS_ON +- * is not working properly, or interpreter is being used when +- * prog->jit_requested is not 0, so warn about it! ++ * is not working properly, so warn about it! + */ + WARN_ON_ONCE(1); + return 0; +@@ -2456,8 +2455,9 @@ static int bpf_check_tail_call(const struct bpf_prog *fp) + return ret; + } + +-static void bpf_prog_select_func(struct bpf_prog *fp) ++static bool bpf_prog_select_interpreter(struct bpf_prog *fp) + { ++ bool select_interpreter = false; + #ifndef CONFIG_BPF_JIT_ALWAYS_ON + u32 stack_depth = max_t(u32, fp->aux->stack_depth, 1); + u32 idx = (round_up(stack_depth, 32) / 32) - 1; +@@ -2466,15 +2466,16 @@ static void bpf_prog_select_func(struct bpf_prog *fp) + * But for non-JITed programs, we don't need bpf_func, so no bounds + * check needed. + */ +- if (!fp->jit_requested && +- !WARN_ON_ONCE(idx >= ARRAY_SIZE(interpreters))) { ++ if (idx < ARRAY_SIZE(interpreters)) { + fp->bpf_func = interpreters[idx]; ++ select_interpreter = true; + } else { + fp->bpf_func = __bpf_prog_ret0_warn; + } + #else + fp->bpf_func = __bpf_prog_ret0_warn; + #endif ++ return select_interpreter; + } + + /** +@@ -2493,7 +2494,7 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err) + /* In case of BPF to BPF calls, verifier did all the prep + * work with regards to JITing, etc. + */ +- bool jit_needed = fp->jit_requested; ++ bool jit_needed = false; + + if (fp->bpf_func) + goto finalize; +@@ -2502,7 +2503,8 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err) + bpf_prog_has_kfunc_call(fp)) + jit_needed = true; + +- bpf_prog_select_func(fp); ++ if (!bpf_prog_select_interpreter(fp)) ++ jit_needed = true; + + /* eBPF JITs can rewrite the program in case constant + * blinding is active. However, in case of error during +-- +2.51.0 + diff --git a/queue-6.16/bpf-cpumap-disable-page_pool-direct-xdp_return-need-.patch b/queue-6.16/bpf-cpumap-disable-page_pool-direct-xdp_return-need-.patch new file mode 100644 index 0000000000..0bf2cb8905 --- /dev/null +++ b/queue-6.16/bpf-cpumap-disable-page_pool-direct-xdp_return-need-.patch @@ -0,0 +1,80 @@ +From ab43ab27235335692b25079f5fcf86c330560d82 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 14 Aug 2025 20:24:37 +0200 +Subject: bpf, cpumap: Disable page_pool direct xdp_return need larger scope + +From: Jesper Dangaard Brouer + +[ Upstream commit 2b986b9e917bc88f81aa1ed386af63b26c983f1d ] + +When running an XDP bpf_prog on the remote CPU in cpumap code +then we must disable the direct return optimization that +xdp_return can perform for mem_type page_pool. This optimization +assumes code is still executing under RX-NAPI of the original +receiving CPU, which isn't true on this remote CPU. + +The cpumap code already disabled this via helpers +xdp_set_return_frame_no_direct() and xdp_clear_return_frame_no_direct(), +but the scope didn't include xdp_do_flush(). + +When doing XDP_REDIRECT towards e.g devmap this causes the +function bq_xmit_all() to run with direct return optimization +enabled. This can lead to hard to find bugs. The issue +only happens when bq_xmit_all() cannot ndo_xdp_xmit all +frames and them frees them via xdp_return_frame_rx_napi(). + +Fix by expanding scope to include xdp_do_flush(). This was found +by Dragos Tatulea. + +Fixes: 11941f8a8536 ("bpf: cpumap: Implement generic cpumap") +Reported-by: Dragos Tatulea +Reported-by: Chris Arges +Signed-off-by: Jesper Dangaard Brouer +Signed-off-by: Martin KaFai Lau +Signed-off-by: Daniel Borkmann +Tested-by: Chris Arges +Link: https://patch.msgid.link/175519587755.3008742.1088294435150406835.stgit@firesoul +Signed-off-by: Sasha Levin +--- + kernel/bpf/cpumap.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c +index 67e8a2fc1a99d..cfcf7ed57ca0d 100644 +--- a/kernel/bpf/cpumap.c ++++ b/kernel/bpf/cpumap.c +@@ -186,7 +186,6 @@ static int cpu_map_bpf_prog_run_xdp(struct bpf_cpu_map_entry *rcpu, + struct xdp_buff xdp; + int i, nframes = 0; + +- xdp_set_return_frame_no_direct(); + xdp.rxq = &rxq; + + for (i = 0; i < n; i++) { +@@ -231,7 +230,6 @@ static int cpu_map_bpf_prog_run_xdp(struct bpf_cpu_map_entry *rcpu, + } + } + +- xdp_clear_return_frame_no_direct(); + stats->pass += nframes; + + return nframes; +@@ -255,6 +253,7 @@ static void cpu_map_bpf_prog_run(struct bpf_cpu_map_entry *rcpu, void **frames, + + rcu_read_lock(); + bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx); ++ xdp_set_return_frame_no_direct(); + + ret->xdp_n = cpu_map_bpf_prog_run_xdp(rcpu, frames, ret->xdp_n, stats); + if (unlikely(ret->skb_n)) +@@ -264,6 +263,7 @@ static void cpu_map_bpf_prog_run(struct bpf_cpu_map_entry *rcpu, void **frames, + if (stats->redirect) + xdp_do_flush(); + ++ xdp_clear_return_frame_no_direct(); + bpf_net_ctx_clear(bpf_net_ctx); + rcu_read_unlock(); + +-- +2.51.0 + diff --git a/queue-6.16/bpf-fix-out-of-bounds-dynptr-write-in-bpf_crypto_cry.patch b/queue-6.16/bpf-fix-out-of-bounds-dynptr-write-in-bpf_crypto_cry.patch new file mode 100644 index 0000000000..fded56db90 --- /dev/null +++ b/queue-6.16/bpf-fix-out-of-bounds-dynptr-write-in-bpf_crypto_cry.patch @@ -0,0 +1,59 @@ +From 874607682346130609c2ccb56f841a74d254c732 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 29 Aug 2025 16:36:56 +0200 +Subject: bpf: Fix out-of-bounds dynptr write in bpf_crypto_crypt + +From: Daniel Borkmann + +[ Upstream commit f9bb6ffa7f5ad0f8ee0f53fc4a10655872ee4a14 ] + +Stanislav reported that in bpf_crypto_crypt() the destination dynptr's +size is not validated to be at least as large as the source dynptr's +size before calling into the crypto backend with 'len = src_len'. This +can result in an OOB write when the destination is smaller than the +source. + +Concretely, in mentioned function, psrc and pdst are both linear +buffers fetched from each dynptr: + + psrc = __bpf_dynptr_data(src, src_len); + [...] + pdst = __bpf_dynptr_data_rw(dst, dst_len); + [...] + err = decrypt ? + ctx->type->decrypt(ctx->tfm, psrc, pdst, src_len, piv) : + ctx->type->encrypt(ctx->tfm, psrc, pdst, src_len, piv); + +The crypto backend expects pdst to be large enough with a src_len length +that can be written. Add an additional src_len > dst_len check and bail +out if it's the case. Note that these kfuncs are accessible under root +privileges only. + +Fixes: 3e1c6f35409f ("bpf: make common crypto API for TC/XDP programs") +Reported-by: Stanislav Fort +Signed-off-by: Daniel Borkmann +Cc: Vadim Fedorenko +Reviewed-by: Vadim Fedorenko +Link: https://lore.kernel.org/r/20250829143657.318524-1-daniel@iogearbox.net +Signed-off-by: Alexei Starovoitov +Signed-off-by: Sasha Levin +--- + kernel/bpf/crypto.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/bpf/crypto.c b/kernel/bpf/crypto.c +index 94854cd9c4cc3..83c4d9943084b 100644 +--- a/kernel/bpf/crypto.c ++++ b/kernel/bpf/crypto.c +@@ -278,7 +278,7 @@ static int bpf_crypto_crypt(const struct bpf_crypto_ctx *ctx, + siv_len = siv ? __bpf_dynptr_size(siv) : 0; + src_len = __bpf_dynptr_size(src); + dst_len = __bpf_dynptr_size(dst); +- if (!src_len || !dst_len) ++ if (!src_len || !dst_len || src_len > dst_len) + return -EINVAL; + + if (siv_len != ctx->siv_len) +-- +2.51.0 + diff --git a/queue-6.16/bpf-tell-memcg-to-use-allow_spinning-false-path-in-b.patch b/queue-6.16/bpf-tell-memcg-to-use-allow_spinning-false-path-in-b.patch new file mode 100644 index 0000000000..4977c48295 --- /dev/null +++ b/queue-6.16/bpf-tell-memcg-to-use-allow_spinning-false-path-in-b.patch @@ -0,0 +1,92 @@ +From 281299f987f23bd3ce14503ba293783f4c3564ed Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 9 Sep 2025 09:52:20 +0000 +Subject: bpf: Tell memcg to use allow_spinning=false path in bpf_timer_init() + +From: Peilin Ye + +[ Upstream commit 6d78b4473cdb08b74662355a9e8510bde09c511e ] + +Currently, calling bpf_map_kmalloc_node() from __bpf_async_init() can +cause various locking issues; see the following stack trace (edited for +style) as one example: + +... + [10.011566] do_raw_spin_lock.cold + [10.011570] try_to_wake_up (5) double-acquiring the same + [10.011575] kick_pool rq_lock, causing a hardlockup + [10.011579] __queue_work + [10.011582] queue_work_on + [10.011585] kernfs_notify + [10.011589] cgroup_file_notify + [10.011593] try_charge_memcg (4) memcg accounting raises an + [10.011597] obj_cgroup_charge_pages MEMCG_MAX event + [10.011599] obj_cgroup_charge_account + [10.011600] __memcg_slab_post_alloc_hook + [10.011603] __kmalloc_node_noprof +... + [10.011611] bpf_map_kmalloc_node + [10.011612] __bpf_async_init + [10.011615] bpf_timer_init (3) BPF calls bpf_timer_init() + [10.011617] bpf_prog_xxxxxxxxxxxxxxxx_fcg_runnable + [10.011619] bpf__sched_ext_ops_runnable + [10.011620] enqueue_task_scx (2) BPF runs with rq_lock held + [10.011622] enqueue_task + [10.011626] ttwu_do_activate + [10.011629] sched_ttwu_pending (1) grabs rq_lock +... + +The above was reproduced on bpf-next (b338cf849ec8) by modifying +./tools/sched_ext/scx_flatcg.bpf.c to call bpf_timer_init() during +ops.runnable(), and hacking the memcg accounting code a bit to make +a bpf_timer_init() call more likely to raise an MEMCG_MAX event. + +We have also run into other similar variants (both internally and on +bpf-next), including double-acquiring cgroup_file_kn_lock, the same +worker_pool::lock, etc. + +As suggested by Shakeel, fix this by using __GFP_HIGH instead of +GFP_ATOMIC in __bpf_async_init(), so that e.g. if try_charge_memcg() +raises an MEMCG_MAX event, we call __memcg_memory_event() with +@allow_spinning=false and avoid calling cgroup_file_notify() there. + +Depends on mm patch +"memcg: skip cgroup_file_notify if spinning is not allowed": +https://lore.kernel.org/bpf/20250905201606.66198-1-shakeel.butt@linux.dev/ + +v0 approach s/bpf_map_kmalloc_node/bpf_mem_alloc/ +https://lore.kernel.org/bpf/20250905061919.439648-1-yepeilin@google.com/ +v1 approach: +https://lore.kernel.org/bpf/20250905234547.862249-1-yepeilin@google.com/ + +Fixes: b00628b1c7d5 ("bpf: Introduce bpf timers.") +Suggested-by: Shakeel Butt +Signed-off-by: Peilin Ye +Link: https://lore.kernel.org/r/20250909095222.2121438-1-yepeilin@google.com +Signed-off-by: Alexei Starovoitov +Signed-off-by: Sasha Levin +--- + kernel/bpf/helpers.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c +index fdf8737542ac4..3abbdebb2d9ef 100644 +--- a/kernel/bpf/helpers.c ++++ b/kernel/bpf/helpers.c +@@ -1277,8 +1277,11 @@ static int __bpf_async_init(struct bpf_async_kern *async, struct bpf_map *map, u + goto out; + } + +- /* allocate hrtimer via map_kmalloc to use memcg accounting */ +- cb = bpf_map_kmalloc_node(map, size, GFP_ATOMIC, map->numa_node); ++ /* Allocate via bpf_map_kmalloc_node() for memcg accounting. Until ++ * kmalloc_nolock() is available, avoid locking issues by using ++ * __GFP_HIGH (GFP_ATOMIC & ~__GFP_RECLAIM). ++ */ ++ cb = bpf_map_kmalloc_node(map, size, __GFP_HIGH, map->numa_node); + if (!cb) { + ret = -ENOMEM; + goto out; +-- +2.51.0 + diff --git a/queue-6.16/cpufreq-amd-pstate-fix-a-regression-leading-to-epp-0.patch b/queue-6.16/cpufreq-amd-pstate-fix-a-regression-leading-to-epp-0.patch new file mode 100644 index 0000000000..2438f33b47 --- /dev/null +++ b/queue-6.16/cpufreq-amd-pstate-fix-a-regression-leading-to-epp-0.patch @@ -0,0 +1,64 @@ +From f3a5b7122e24a0b3cba54df2f1dacee49a6c037f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 26 Aug 2025 00:27:47 -0500 +Subject: cpufreq/amd-pstate: Fix a regression leading to EPP 0 after resume + +From: Mario Limonciello (AMD) + +[ Upstream commit ba3319e5905710abe495b11a1aaf03ebb51d62e2 ] + +During the suspend sequence the cached CPPC request is destroyed +with the expectation that it's restored during resume. This assumption +broke when the separate cache EPP variable was removed, and then it was +broken again by commit 608a76b65288 ("cpufreq/amd-pstate: Add support +for the "Requested CPU Min frequency" BIOS option") which explicitly +set it to zero during suspend. + +Remove the invalidation and set the value during the suspend call to +update limits so that the cached variable can be used to restore on +resume. + +Fixes: 608a76b65288 ("cpufreq/amd-pstate: Add support for the "Requested CPU Min frequency" BIOS option") +Fixes: b7a41156588a ("cpufreq/amd-pstate: Invalidate cppc_req_cached during suspend") +Reported-by: goldens +Closes: https://community.frame.work/t/increased-power-usage-after-resuming-from-suspend-on-ryzen-7040-kernel-6-15-regression/ +Closes: https://bugzilla.redhat.com/show_bug.cgi?id=2391221 +Tested-by: goldens +Tested-by: Willian Wang +Reported-by: Vincent Mauirn +Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219981 +Tested-by: Alex De Lorenzo +Reviewed-by: Gautham R. Shenoy +Link: https://lore.kernel.org/r/20250826052747.2240670-1-superm1@kernel.org +Signed-off-by: Mario Limonciello (AMD) +Signed-off-by: Sasha Levin +--- + drivers/cpufreq/amd-pstate.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c +index bbb8e18a6e2b9..e9aaf72502e51 100644 +--- a/drivers/cpufreq/amd-pstate.c ++++ b/drivers/cpufreq/amd-pstate.c +@@ -1621,13 +1621,14 @@ static int amd_pstate_suspend(struct cpufreq_policy *policy) + * min_perf value across kexec reboots. If this CPU is just resumed back without kexec, + * the limits, epp and desired perf will get reset to the cached values in cpudata struct + */ +- ret = amd_pstate_update_perf(policy, perf.bios_min_perf, 0U, 0U, 0U, false); ++ ret = amd_pstate_update_perf(policy, perf.bios_min_perf, ++ FIELD_GET(AMD_CPPC_DES_PERF_MASK, cpudata->cppc_req_cached), ++ FIELD_GET(AMD_CPPC_MAX_PERF_MASK, cpudata->cppc_req_cached), ++ FIELD_GET(AMD_CPPC_EPP_PERF_MASK, cpudata->cppc_req_cached), ++ false); + if (ret) + return ret; + +- /* invalidate to ensure it's rewritten during resume */ +- cpudata->cppc_req_cached = 0; +- + /* set this flag to avoid setting core offline*/ + cpudata->suspended = true; + +-- +2.51.0 + diff --git a/queue-6.16/cpufreq-amd-pstate-fix-setting-of-cppc.min_perf-in-a.patch b/queue-6.16/cpufreq-amd-pstate-fix-setting-of-cppc.min_perf-in-a.patch new file mode 100644 index 0000000000..0a738aadb2 --- /dev/null +++ b/queue-6.16/cpufreq-amd-pstate-fix-setting-of-cppc.min_perf-in-a.patch @@ -0,0 +1,90 @@ +From 340e4fa43da8098d6c71bca21b113dd9c75fa5f3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 21 Aug 2025 09:56:38 +0530 +Subject: cpufreq/amd-pstate: Fix setting of CPPC.min_perf in active mode for + performance governor + +From: Gautham R. Shenoy + +[ Upstream commit 220abf77e7c2835cc63ea8cd7158cf83952640af ] + +In the "active" mode of the amd-pstate driver with performance +governor, the CPPC.min_perf is expected to be the nominal_perf. + +However after commit a9b9b4c2a4cd ("cpufreq/amd-pstate: Drop min and +max cached frequencies"), this is not the case when the governor is +switched from performance to powersave and back to performance, and +the CPPC.min_perf will be equal to the scaling_min_freq that was set +for the powersave governor. + +This is because prior to commit a9b9b4c2a4cd ("cpufreq/amd-pstate: +Drop min and max cached frequencies"), amd_pstate_epp_update_limit() +would unconditionally call amd_pstate_update_min_max_limit() and the +latter function would enforce the CPPC.min_perf constraint when the +governor is performance. + +However, after the aforementioned commit, +amd_pstate_update_min_max_limit() is called by +amd_pstate_epp_update_limit() only when either the +scaling_{min/max}_freq is different from the cached value of +cpudata->{min/max}_limit_freq, which wouldn't have changed on a +governor transition from powersave to performance, thus missing out on +enforcing the CPPC.min_perf constraint for the performance governor. + +Fix this by invoking amd_pstate_epp_udpate_limit() not only when the +{min/max} limits have changed from the cached values, but also when +the policy itself has changed. + +Fixes: a9b9b4c2a4cd ("cpufreq/amd-pstate: Drop min and max cached frequencies") +Signed-off-by: Gautham R. Shenoy +Reviewed-by: Mario Limonciello +Link: https://lore.kernel.org/r/20250821042638.356-1-gautham.shenoy@amd.com +Signed-off-by: Mario Limonciello (AMD) +Signed-off-by: Sasha Levin +--- + drivers/cpufreq/amd-pstate.c | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c +index f3477ab377425..bbb8e18a6e2b9 100644 +--- a/drivers/cpufreq/amd-pstate.c ++++ b/drivers/cpufreq/amd-pstate.c +@@ -1547,13 +1547,15 @@ static void amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy) + pr_debug("CPU %d exiting\n", policy->cpu); + } + +-static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy) ++static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy, bool policy_change) + { + struct amd_cpudata *cpudata = policy->driver_data; + union perf_cached perf; + u8 epp; + +- if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq) ++ if (policy_change || ++ policy->min != cpudata->min_limit_freq || ++ policy->max != cpudata->max_limit_freq) + amd_pstate_update_min_max_limit(policy); + + if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) +@@ -1577,7 +1579,7 @@ static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy) + + cpudata->policy = policy->policy; + +- ret = amd_pstate_epp_update_limit(policy); ++ ret = amd_pstate_epp_update_limit(policy, true); + if (ret) + return ret; + +@@ -1651,7 +1653,7 @@ static int amd_pstate_epp_resume(struct cpufreq_policy *policy) + int ret; + + /* enable amd pstate from suspend state*/ +- ret = amd_pstate_epp_update_limit(policy); ++ ret = amd_pstate_epp_update_limit(policy, false); + if (ret) + return ret; + +-- +2.51.0 + diff --git a/queue-6.16/flexfiles-pnfs-fix-null-checks-on-result-of-ff_layou.patch b/queue-6.16/flexfiles-pnfs-fix-null-checks-on-result-of-ff_layou.patch new file mode 100644 index 0000000000..9fce4ca674 --- /dev/null +++ b/queue-6.16/flexfiles-pnfs-fix-null-checks-on-result-of-ff_layou.patch @@ -0,0 +1,90 @@ +From 0f9f059231a20acd8c558464235c0e0c69a6c011 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 28 Aug 2025 16:51:00 +0200 +Subject: flexfiles/pNFS: fix NULL checks on result of + ff_layout_choose_ds_for_read + +From: Tigran Mkrtchyan + +[ Upstream commit 5a46d2339a5ae268ede53a221f20433d8ea4f2f9 ] + +Recent commit f06bedfa62d5 ("pNFS/flexfiles: don't attempt pnfs on fatal DS +errors") has changed the error return type of ff_layout_choose_ds_for_read() from +NULL to an error pointer. However, not all code paths have been updated +to match the change. Thus, some non-NULL checks will accept error pointers +as a valid return value. + +Reported-by: Dan Carpenter +Suggested-by: Dan Carpenter +Fixes: f06bedfa62d5 ("pNFS/flexfiles: don't attempt pnfs on fatal DS errors") +Signed-off-by: Tigran Mkrtchyan +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + fs/nfs/flexfilelayout/flexfilelayout.c | 19 ++++++++++++------- + 1 file changed, 12 insertions(+), 7 deletions(-) + +diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c +index 8dc921d835388..f8ab7b4e09e7e 100644 +--- a/fs/nfs/flexfilelayout/flexfilelayout.c ++++ b/fs/nfs/flexfilelayout/flexfilelayout.c +@@ -773,8 +773,11 @@ ff_layout_choose_ds_for_read(struct pnfs_layout_segment *lseg, + continue; + + if (check_device && +- nfs4_test_deviceid_unavailable(&mirror->mirror_ds->id_node)) ++ nfs4_test_deviceid_unavailable(&mirror->mirror_ds->id_node)) { ++ // reinitialize the error state in case if this is the last iteration ++ ds = ERR_PTR(-EINVAL); + continue; ++ } + + *best_idx = idx; + break; +@@ -804,7 +807,7 @@ ff_layout_choose_best_ds_for_read(struct pnfs_layout_segment *lseg, + struct nfs4_pnfs_ds *ds; + + ds = ff_layout_choose_valid_ds_for_read(lseg, start_idx, best_idx); +- if (ds) ++ if (!IS_ERR(ds)) + return ds; + return ff_layout_choose_any_ds_for_read(lseg, start_idx, best_idx); + } +@@ -818,7 +821,7 @@ ff_layout_get_ds_for_read(struct nfs_pageio_descriptor *pgio, + + ds = ff_layout_choose_best_ds_for_read(lseg, pgio->pg_mirror_idx, + best_idx); +- if (ds || !pgio->pg_mirror_idx) ++ if (!IS_ERR(ds) || !pgio->pg_mirror_idx) + return ds; + return ff_layout_choose_best_ds_for_read(lseg, 0, best_idx); + } +@@ -868,7 +871,7 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio, + req->wb_nio = 0; + + ds = ff_layout_get_ds_for_read(pgio, &ds_idx); +- if (!ds) { ++ if (IS_ERR(ds)) { + if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg)) + goto out_mds; + pnfs_generic_pg_cleanup(pgio); +@@ -1072,11 +1075,13 @@ static void ff_layout_resend_pnfs_read(struct nfs_pgio_header *hdr) + { + u32 idx = hdr->pgio_mirror_idx + 1; + u32 new_idx = 0; ++ struct nfs4_pnfs_ds *ds; + +- if (ff_layout_choose_any_ds_for_read(hdr->lseg, idx, &new_idx)) +- ff_layout_send_layouterror(hdr->lseg); +- else ++ ds = ff_layout_choose_any_ds_for_read(hdr->lseg, idx, &new_idx); ++ if (IS_ERR(ds)) + pnfs_error_mark_layout_for_return(hdr->inode, hdr->lseg); ++ else ++ ff_layout_send_layouterror(hdr->lseg); + pnfs_read_resend_pnfs(hdr, new_idx); + } + +-- +2.51.0 + diff --git a/queue-6.16/ftrace-samples-fix-function-size-computation.patch b/queue-6.16/ftrace-samples-fix-function-size-computation.patch new file mode 100644 index 0000000000..9aa400ec5d --- /dev/null +++ b/queue-6.16/ftrace-samples-fix-function-size-computation.patch @@ -0,0 +1,38 @@ +From 66cd55561211db135dbf04126ec678300afd492c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 26 Aug 2025 18:16:46 +0200 +Subject: ftrace/samples: Fix function size computation + +From: Vladimir Riabchun + +[ Upstream commit 80d03a40837a9b26750a25122b906c052cc846c9 ] + +In my_tramp1 function .size directive was placed above +ASM_RET instruction, leading to a wrong function size. + +Link: https://lore.kernel.org/aK3d7vxNcO52kEmg@vova-pc +Fixes: 9d907f1ae80b ("samples/ftrace: Fix asm function ELF annotations") +Signed-off-by: Vladimir Riabchun +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Sasha Levin +--- + samples/ftrace/ftrace-direct-modify.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/samples/ftrace/ftrace-direct-modify.c b/samples/ftrace/ftrace-direct-modify.c +index cfea7a38befb0..da3a9f2091f55 100644 +--- a/samples/ftrace/ftrace-direct-modify.c ++++ b/samples/ftrace/ftrace-direct-modify.c +@@ -75,8 +75,8 @@ asm ( + CALL_DEPTH_ACCOUNT + " call my_direct_func1\n" + " leave\n" +-" .size my_tramp1, .-my_tramp1\n" + ASM_RET ++" .size my_tramp1, .-my_tramp1\n" + + " .type my_tramp2, @function\n" + " .globl my_tramp2\n" +-- +2.51.0 + diff --git a/queue-6.16/md-keep-recovery_cp-in-mdp_superblock_s.patch b/queue-6.16/md-keep-recovery_cp-in-mdp_superblock_s.patch new file mode 100644 index 0000000000..269393283d --- /dev/null +++ b/queue-6.16/md-keep-recovery_cp-in-mdp_superblock_s.patch @@ -0,0 +1,69 @@ +From b4aef5172b455ef0ec632084c61927bd3950b984 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 15 Aug 2025 12:00:28 +0800 +Subject: md: keep recovery_cp in mdp_superblock_s + +From: Xiao Ni + +[ Upstream commit c27973211ffcdf0a092eec265d5993e64b89adaf ] + +commit 907a99c314a5 ("md: rename recovery_cp to resync_offset") replaces +recovery_cp with resync_offset in mdp_superblock_s which is in md_p.h. +md_p.h is used in userspace too. So mdadm building fails because of this. +This patch revert this change. + +Fixes: 907a99c314a5 ("md: rename recovery_cp to resync_offset") +Signed-off-by: Xiao Ni +Link: https://lore.kernel.org/linux-raid/20250815040028.18085-1-xni@redhat.com +Signed-off-by: Yu Kuai +Signed-off-by: Sasha Levin +--- + drivers/md/md.c | 6 +++--- + include/uapi/linux/raid/md_p.h | 2 +- + 2 files changed, 4 insertions(+), 4 deletions(-) + +diff --git a/drivers/md/md.c b/drivers/md/md.c +index 3f355bb85797f..0f41573fa9f5e 100644 +--- a/drivers/md/md.c ++++ b/drivers/md/md.c +@@ -1406,7 +1406,7 @@ static int super_90_validate(struct mddev *mddev, struct md_rdev *freshest, stru + else { + if (sb->events_hi == sb->cp_events_hi && + sb->events_lo == sb->cp_events_lo) { +- mddev->resync_offset = sb->resync_offset; ++ mddev->resync_offset = sb->recovery_cp; + } else + mddev->resync_offset = 0; + } +@@ -1534,13 +1534,13 @@ static void super_90_sync(struct mddev *mddev, struct md_rdev *rdev) + mddev->minor_version = sb->minor_version; + if (mddev->in_sync) + { +- sb->resync_offset = mddev->resync_offset; ++ sb->recovery_cp = mddev->resync_offset; + sb->cp_events_hi = (mddev->events>>32); + sb->cp_events_lo = (u32)mddev->events; + if (mddev->resync_offset == MaxSector) + sb->state = (1<< MD_SB_CLEAN); + } else +- sb->resync_offset = 0; ++ sb->recovery_cp = 0; + + sb->layout = mddev->layout; + sb->chunk_size = mddev->chunk_sectors << 9; +diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h +index b139462872775..ac74133a47688 100644 +--- a/include/uapi/linux/raid/md_p.h ++++ b/include/uapi/linux/raid/md_p.h +@@ -173,7 +173,7 @@ typedef struct mdp_superblock_s { + #else + #error unspecified endianness + #endif +- __u32 resync_offset; /* 11 resync checkpoint sector count */ ++ __u32 recovery_cp; /* 11 resync checkpoint sector count */ + /* There are only valid for minor_version > 90 */ + __u64 reshape_position; /* 12,13 next address in array-space for reshape */ + __u32 new_level; /* 14 new level we are reshaping to */ +-- +2.51.0 + diff --git a/queue-6.16/nfs-localio-restore-creds-before-releasing-pageio-da.patch b/queue-6.16/nfs-localio-restore-creds-before-releasing-pageio-da.patch new file mode 100644 index 0000000000..332d5cb005 --- /dev/null +++ b/queue-6.16/nfs-localio-restore-creds-before-releasing-pageio-da.patch @@ -0,0 +1,65 @@ +From 5f54f8e90dec4976e005eae72f6242868f4cd0d2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 7 Aug 2025 12:49:38 -0400 +Subject: nfs/localio: restore creds before releasing pageio data + +From: Scott Mayhew + +[ Upstream commit 992203a1fba51b025c60ec0c8b0d9223343dea95 ] + +Otherwise if the nfsd filecache code releases the nfsd_file +immediately, it can trigger the BUG_ON(cred == current->cred) in +__put_cred() when it puts the nfsd_file->nf_file->f-cred. + +Fixes: b9f5dd57f4a5 ("nfs/localio: use dedicated workqueues for filesystem read and write") +Signed-off-by: Scott Mayhew +Reviewed-by: Mike Snitzer +Link: https://lore.kernel.org/r/20250807164938.2395136-1-smayhew@redhat.com +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + fs/nfs/localio.c | 12 +++++++----- + 1 file changed, 7 insertions(+), 5 deletions(-) + +diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c +index 510d0a16cfe91..e2213ef18baed 100644 +--- a/fs/nfs/localio.c ++++ b/fs/nfs/localio.c +@@ -453,12 +453,13 @@ static void nfs_local_call_read(struct work_struct *work) + nfs_local_iter_init(&iter, iocb, READ); + + status = filp->f_op->read_iter(&iocb->kiocb, &iter); ++ ++ revert_creds(save_cred); ++ + if (status != -EIOCBQUEUED) { + nfs_local_read_done(iocb, status); + nfs_local_pgio_release(iocb); + } +- +- revert_creds(save_cred); + } + + static int +@@ -649,14 +650,15 @@ static void nfs_local_call_write(struct work_struct *work) + file_start_write(filp); + status = filp->f_op->write_iter(&iocb->kiocb, &iter); + file_end_write(filp); ++ ++ revert_creds(save_cred); ++ current->flags = old_flags; ++ + if (status != -EIOCBQUEUED) { + nfs_local_write_done(iocb, status); + nfs_local_vfs_getattr(iocb); + nfs_local_pgio_release(iocb); + } +- +- revert_creds(save_cred); +- current->flags = old_flags; + } + + static int +-- +2.51.0 + diff --git a/queue-6.16/nfs-nfs_invalidate_folio-must-observe-the-offset-and.patch b/queue-6.16/nfs-nfs_invalidate_folio-must-observe-the-offset-and.patch new file mode 100644 index 0000000000..7e396f47cc --- /dev/null +++ b/queue-6.16/nfs-nfs_invalidate_folio-must-observe-the-offset-and.patch @@ -0,0 +1,55 @@ +From a4f0e054410d32706d10a51896c21eecdc5acb78 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 3 Sep 2025 11:48:57 -0400 +Subject: NFS: nfs_invalidate_folio() must observe the offset and size + arguments + +From: Trond Myklebust + +[ Upstream commit b7b8574225e9d2b5f1fb5483886ab797892f43b5 ] + +If we're truncating part of the folio, then we need to write out the +data on the part that is not covered by the cancellation. + +Fixes: d47992f86b30 ("mm: change invalidatepage prototype to accept length") +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + fs/nfs/file.c | 7 ++++--- + fs/nfs/write.c | 1 + + 2 files changed, 5 insertions(+), 3 deletions(-) + +diff --git a/fs/nfs/file.c b/fs/nfs/file.c +index 033feeab8c346..a16a619fb8c33 100644 +--- a/fs/nfs/file.c ++++ b/fs/nfs/file.c +@@ -437,10 +437,11 @@ static void nfs_invalidate_folio(struct folio *folio, size_t offset, + dfprintk(PAGECACHE, "NFS: invalidate_folio(%lu, %zu, %zu)\n", + folio->index, offset, length); + +- if (offset != 0 || length < folio_size(folio)) +- return; + /* Cancel any unstarted writes on this page */ +- nfs_wb_folio_cancel(inode, folio); ++ if (offset != 0 || length < folio_size(folio)) ++ nfs_wb_folio(inode, folio); ++ else ++ nfs_wb_folio_cancel(inode, folio); + folio_wait_private_2(folio); /* [DEPRECATED] */ + trace_nfs_invalidate_folio(inode, folio_pos(folio) + offset, length); + } +diff --git a/fs/nfs/write.c b/fs/nfs/write.c +index ff29335ed8599..08fd1c0d45ec2 100644 +--- a/fs/nfs/write.c ++++ b/fs/nfs/write.c +@@ -2045,6 +2045,7 @@ int nfs_wb_folio_cancel(struct inode *inode, struct folio *folio) + * release it */ + nfs_inode_remove_request(req); + nfs_unlock_and_release_request(req); ++ folio_cancel_dirty(folio); + } + + return ret; +-- +2.51.0 + diff --git a/queue-6.16/nfs-serialise-o_direct-i-o-and-truncate.patch b/queue-6.16/nfs-serialise-o_direct-i-o-and-truncate.patch new file mode 100644 index 0000000000..76d3c7d86e --- /dev/null +++ b/queue-6.16/nfs-serialise-o_direct-i-o-and-truncate.patch @@ -0,0 +1,100 @@ +From 3653c77d3ceeb508b6cae3bf980e12f76f057013 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 5 Sep 2025 12:06:23 -0400 +Subject: NFS: Serialise O_DIRECT i/o and truncate() + +From: Trond Myklebust + +[ Upstream commit 9eb90f435415c7da4800974ed943e39b5578ee7f ] + +Ensure that all O_DIRECT reads and writes are complete, and prevent the +initiation of new i/o until the setattr operation that will truncate the +file is complete. + +Fixes: a5864c999de6 ("NFS: Do not serialise O_DIRECT reads and writes") +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + fs/nfs/inode.c | 4 +++- + fs/nfs/internal.h | 10 ++++++++++ + fs/nfs/io.c | 13 ++----------- + 3 files changed, 15 insertions(+), 12 deletions(-) + +diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c +index a2fa6bc4d74e3..a32cc45425e28 100644 +--- a/fs/nfs/inode.c ++++ b/fs/nfs/inode.c +@@ -761,8 +761,10 @@ nfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, + trace_nfs_setattr_enter(inode); + + /* Write all dirty data */ +- if (S_ISREG(inode->i_mode)) ++ if (S_ISREG(inode->i_mode)) { ++ nfs_file_block_o_direct(NFS_I(inode)); + nfs_sync_inode(inode); ++ } + + fattr = nfs_alloc_fattr_with_label(NFS_SERVER(inode)); + if (fattr == NULL) { +diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h +index 9dcbc33964922..0ef0fc6aba3b3 100644 +--- a/fs/nfs/internal.h ++++ b/fs/nfs/internal.h +@@ -531,6 +531,16 @@ static inline bool nfs_file_io_is_buffered(struct nfs_inode *nfsi) + return test_bit(NFS_INO_ODIRECT, &nfsi->flags) == 0; + } + ++/* Must be called with exclusively locked inode->i_rwsem */ ++static inline void nfs_file_block_o_direct(struct nfs_inode *nfsi) ++{ ++ if (test_bit(NFS_INO_ODIRECT, &nfsi->flags)) { ++ clear_bit(NFS_INO_ODIRECT, &nfsi->flags); ++ inode_dio_wait(&nfsi->vfs_inode); ++ } ++} ++ ++ + /* namespace.c */ + #define NFS_PATH_CANONICAL 1 + extern char *nfs_path(char **p, struct dentry *dentry, +diff --git a/fs/nfs/io.c b/fs/nfs/io.c +index 3388faf2acb9f..d275b0a250bf3 100644 +--- a/fs/nfs/io.c ++++ b/fs/nfs/io.c +@@ -14,15 +14,6 @@ + + #include "internal.h" + +-/* Call with exclusively locked inode->i_rwsem */ +-static void nfs_block_o_direct(struct nfs_inode *nfsi, struct inode *inode) +-{ +- if (test_bit(NFS_INO_ODIRECT, &nfsi->flags)) { +- clear_bit(NFS_INO_ODIRECT, &nfsi->flags); +- inode_dio_wait(inode); +- } +-} +- + /** + * nfs_start_io_read - declare the file is being used for buffered reads + * @inode: file inode +@@ -57,7 +48,7 @@ nfs_start_io_read(struct inode *inode) + err = down_write_killable(&inode->i_rwsem); + if (err) + return err; +- nfs_block_o_direct(nfsi, inode); ++ nfs_file_block_o_direct(nfsi); + downgrade_write(&inode->i_rwsem); + + return 0; +@@ -90,7 +81,7 @@ nfs_start_io_write(struct inode *inode) + + err = down_write_killable(&inode->i_rwsem); + if (!err) +- nfs_block_o_direct(NFS_I(inode), inode); ++ nfs_file_block_o_direct(NFS_I(inode)); + return err; + } + +-- +2.51.0 + diff --git a/queue-6.16/nfsv4-clear-nfs_cap_open_xor-and-nfs_cap_delegtime-i.patch b/queue-6.16/nfsv4-clear-nfs_cap_open_xor-and-nfs_cap_delegtime-i.patch new file mode 100644 index 0000000000..3bd985bfbc --- /dev/null +++ b/queue-6.16/nfsv4-clear-nfs_cap_open_xor-and-nfs_cap_delegtime-i.patch @@ -0,0 +1,36 @@ +From 0f56b9e0c43700a33eaaf76081e3ded90fa68928 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 29 Aug 2025 09:12:30 -0700 +Subject: NFSv4: Clear NFS_CAP_OPEN_XOR and NFS_CAP_DELEGTIME if not supported + +From: Trond Myklebust + +[ Upstream commit b3ac33436030bce37ecb3dcae581ecfaad28078c ] + +_nfs4_server_capabilities() should clear capabilities that are not +supported by the server. + +Fixes: d2a00cceb93a ("NFSv4: Detect support for OPEN4_SHARE_ACCESS_WANT_OPEN_XOR_DELEGATION") +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + fs/nfs/nfs4proc.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c +index ccd97dcf115f9..8d492e3b21631 100644 +--- a/fs/nfs/nfs4proc.c ++++ b/fs/nfs/nfs4proc.c +@@ -4009,7 +4009,8 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f + memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask)); + server->caps &= + ~(NFS_CAP_ACLS | NFS_CAP_HARDLINKS | NFS_CAP_SYMLINKS | +- NFS_CAP_SECURITY_LABEL | NFS_CAP_FS_LOCATIONS); ++ NFS_CAP_SECURITY_LABEL | NFS_CAP_FS_LOCATIONS | ++ NFS_CAP_OPEN_XOR | NFS_CAP_DELEGTIME); + server->fattr_valid = NFS_ATTR_FATTR_V4; + if (res.attr_bitmask[0] & FATTR4_WORD0_ACL && + res.acl_bitmask & ACL4_SUPPORT_ALLOW_ACL) +-- +2.51.0 + diff --git a/queue-6.16/nfsv4-clear-the-nfs_cap_fs_locations-flag-if-it-is-n.patch b/queue-6.16/nfsv4-clear-the-nfs_cap_fs_locations-flag-if-it-is-n.patch new file mode 100644 index 0000000000..e47d466251 --- /dev/null +++ b/queue-6.16/nfsv4-clear-the-nfs_cap_fs_locations-flag-if-it-is-n.patch @@ -0,0 +1,38 @@ +From 3386219821196cb7986f30873ae64cb37439e4f8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 29 Aug 2025 09:07:22 -0700 +Subject: NFSv4: Clear the NFS_CAP_FS_LOCATIONS flag if it is not set + +From: Trond Myklebust + +[ Upstream commit dd5a8621b886b02f8341c5d4ea68eb2c552ebd3e ] + +_nfs4_server_capabilities() is expected to clear any flags that are not +supported by the server. + +Fixes: 8a59bb93b7e3 ("NFSv4 store server support for fs_location attribute") +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + fs/nfs/nfs4proc.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c +index fc86c75372b94..ccd97dcf115f9 100644 +--- a/fs/nfs/nfs4proc.c ++++ b/fs/nfs/nfs4proc.c +@@ -4007,8 +4007,9 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f + res.attr_bitmask[2]; + } + memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask)); +- server->caps &= ~(NFS_CAP_ACLS | NFS_CAP_HARDLINKS | +- NFS_CAP_SYMLINKS| NFS_CAP_SECURITY_LABEL); ++ server->caps &= ++ ~(NFS_CAP_ACLS | NFS_CAP_HARDLINKS | NFS_CAP_SYMLINKS | ++ NFS_CAP_SECURITY_LABEL | NFS_CAP_FS_LOCATIONS); + server->fattr_valid = NFS_ATTR_FATTR_V4; + if (res.attr_bitmask[0] & FATTR4_WORD0_ACL && + res.acl_bitmask & ACL4_SUPPORT_ALLOW_ACL) +-- +2.51.0 + diff --git a/queue-6.16/nfsv4-clear-the-nfs_cap_xattr-flag-if-not-supported-.patch b/queue-6.16/nfsv4-clear-the-nfs_cap_xattr-flag-if-not-supported-.patch new file mode 100644 index 0000000000..a6e93e17ea --- /dev/null +++ b/queue-6.16/nfsv4-clear-the-nfs_cap_xattr-flag-if-not-supported-.patch @@ -0,0 +1,35 @@ +From 3324f8b63d5a2fb246d0530b29a61fbbee7c3aa7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 29 Aug 2025 09:15:12 -0700 +Subject: NFSv4: Clear the NFS_CAP_XATTR flag if not supported by the server + +From: Trond Myklebust + +[ Upstream commit 4fb2b677fc1f70ee642c0beecc3cabf226ef5707 ] + +nfs_server_set_fsinfo() shouldn't assume that NFS_CAP_XATTR is unset +on entry to the function. + +Fixes: b78ef845c35d ("NFSv4.2: query the server for extended attribute support") +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + fs/nfs/client.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/fs/nfs/client.c b/fs/nfs/client.c +index 3bcf5c204578c..97bd9d2a4b0cd 100644 +--- a/fs/nfs/client.c ++++ b/fs/nfs/client.c +@@ -890,6 +890,8 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, + + if (fsinfo->xattr_support) + server->caps |= NFS_CAP_XATTR; ++ else ++ server->caps &= ~NFS_CAP_XATTR; + #endif + } + +-- +2.51.0 + diff --git a/queue-6.16/nfsv4-don-t-clear-capabilities-that-won-t-be-reset.patch b/queue-6.16/nfsv4-don-t-clear-capabilities-that-won-t-be-reset.patch new file mode 100644 index 0000000000..9e579f3ad7 --- /dev/null +++ b/queue-6.16/nfsv4-don-t-clear-capabilities-that-won-t-be-reset.patch @@ -0,0 +1,35 @@ +From 73f9e443254dd3ce2cf452615cce7a4e19cb6fc2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 29 Aug 2025 09:02:16 -0700 +Subject: NFSv4: Don't clear capabilities that won't be reset + +From: Trond Myklebust + +[ Upstream commit 31f1a960ad1a14def94fa0b8c25d62b4c032813f ] + +Don't clear the capabilities that are not going to get reset by the call +to _nfs4_server_capabilities(). + +Reported-by: Scott Haiden +Fixes: b01f21cacde9 ("NFS: Fix the setting of capabilities when automounting a new filesystem") +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + fs/nfs/nfs4proc.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c +index 7e203857f4668..fc86c75372b94 100644 +--- a/fs/nfs/nfs4proc.c ++++ b/fs/nfs/nfs4proc.c +@@ -4082,7 +4082,6 @@ int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) + }; + int err; + +- nfs_server_set_init_caps(server); + do { + err = nfs4_handle_exception(server, + _nfs4_server_capabilities(server, fhandle), +-- +2.51.0 + diff --git a/queue-6.16/nfsv4-flexfiles-fix-layout-merge-mirror-check.patch b/queue-6.16/nfsv4-flexfiles-fix-layout-merge-mirror-check.patch new file mode 100644 index 0000000000..60b100b51c --- /dev/null +++ b/queue-6.16/nfsv4-flexfiles-fix-layout-merge-mirror-check.patch @@ -0,0 +1,42 @@ +From 2e8ae999e26a57c376c0a9d4c332a98439b94e22 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 8 Sep 2025 17:35:16 +0000 +Subject: NFSv4/flexfiles: Fix layout merge mirror check. + +From: Jonathan Curley + +[ Upstream commit dd2fa82473453661d12723c46c9f43d9876a7efd ] + +Typo in ff_lseg_match_mirrors makes the diff ineffective. This results +in merge happening all the time. Merge happening all the time is +problematic because it marks lsegs invalid. Marking lsegs invalid +causes all outstanding IO to get restarted with EAGAIN and connections +to get closed. + +Closing connections constantly triggers race conditions in the RDMA +implementation... + +Fixes: 660d1eb22301c ("pNFS/flexfile: Don't merge layout segments if the mirrors don't match") +Signed-off-by: Jonathan Curley +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + fs/nfs/flexfilelayout/flexfilelayout.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c +index f8ab7b4e09e7e..9edb5f9b0c4e4 100644 +--- a/fs/nfs/flexfilelayout/flexfilelayout.c ++++ b/fs/nfs/flexfilelayout/flexfilelayout.c +@@ -293,7 +293,7 @@ ff_lseg_match_mirrors(struct pnfs_layout_segment *l1, + struct pnfs_layout_segment *l2) + { + const struct nfs4_ff_layout_segment *fl1 = FF_LAYOUT_LSEG(l1); +- const struct nfs4_ff_layout_segment *fl2 = FF_LAYOUT_LSEG(l1); ++ const struct nfs4_ff_layout_segment *fl2 = FF_LAYOUT_LSEG(l2); + u32 i; + + if (fl1->mirror_array_cnt != fl2->mirror_array_cnt) +-- +2.51.0 + diff --git a/queue-6.16/nfsv4.2-serialise-o_direct-i-o-and-clone-range.patch b/queue-6.16/nfsv4.2-serialise-o_direct-i-o-and-clone-range.patch new file mode 100644 index 0000000000..851717c723 --- /dev/null +++ b/queue-6.16/nfsv4.2-serialise-o_direct-i-o-and-clone-range.patch @@ -0,0 +1,38 @@ +From 0b31cd60bb74fc1e8748e99ff31de207069b9ad4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 6 Sep 2025 10:40:24 -0400 +Subject: NFSv4.2: Serialise O_DIRECT i/o and clone range + +From: Trond Myklebust + +[ Upstream commit c80ebeba1198eac8811ab0dba36ecc13d51e4438 ] + +Ensure that all O_DIRECT reads and writes complete before cloning a file +range, so that both the source and destination are up to date. + +Fixes: a5864c999de6 ("NFS: Do not serialise O_DIRECT reads and writes") +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + fs/nfs/nfs4file.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c +index 5e9d66f3466c8..1fa69a0b33ab1 100644 +--- a/fs/nfs/nfs4file.c ++++ b/fs/nfs/nfs4file.c +@@ -291,9 +291,11 @@ static loff_t nfs42_remap_file_range(struct file *src_file, loff_t src_off, + + /* flush all pending writes on both src and dst so that server + * has the latest data */ ++ nfs_file_block_o_direct(NFS_I(src_inode)); + ret = nfs_sync_inode(src_inode); + if (ret) + goto out_unlock; ++ nfs_file_block_o_direct(NFS_I(dst_inode)); + ret = nfs_sync_inode(dst_inode); + if (ret) + goto out_unlock; +-- +2.51.0 + diff --git a/queue-6.16/nfsv4.2-serialise-o_direct-i-o-and-copy-range.patch b/queue-6.16/nfsv4.2-serialise-o_direct-i-o-and-copy-range.patch new file mode 100644 index 0000000000..60688f8e38 --- /dev/null +++ b/queue-6.16/nfsv4.2-serialise-o_direct-i-o-and-copy-range.patch @@ -0,0 +1,34 @@ +From fca59e7d66bf36744b66f416123a594162381e16 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 6 Sep 2025 10:54:13 -0400 +Subject: NFSv4.2: Serialise O_DIRECT i/o and copy range + +From: Trond Myklebust + +[ Upstream commit ca247c89900ae90207f4d321e260cd93b7c7d104 ] + +Ensure that all O_DIRECT reads and writes complete before copying a file +range, so that the destination is up to date. + +Fixes: a5864c999de6 ("NFS: Do not serialise O_DIRECT reads and writes") +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + fs/nfs/nfs42proc.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c +index 3774d5b64ba0e..48ee3d5d89c4a 100644 +--- a/fs/nfs/nfs42proc.c ++++ b/fs/nfs/nfs42proc.c +@@ -431,6 +431,7 @@ static ssize_t _nfs42_proc_copy(struct file *src, + return status; + } + ++ nfs_file_block_o_direct(NFS_I(dst_inode)); + status = nfs_sync_inode(dst_inode); + if (status) + return status; +-- +2.51.0 + diff --git a/queue-6.16/nfsv4.2-serialise-o_direct-i-o-and-fallocate.patch b/queue-6.16/nfsv4.2-serialise-o_direct-i-o-and-fallocate.patch new file mode 100644 index 0000000000..b04661f921 --- /dev/null +++ b/queue-6.16/nfsv4.2-serialise-o_direct-i-o-and-fallocate.patch @@ -0,0 +1,34 @@ +From e3b191d7b677f145e75b27d4df05ad7c0963fe2c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 5 Sep 2025 12:11:17 -0400 +Subject: NFSv4.2: Serialise O_DIRECT i/o and fallocate() + +From: Trond Myklebust + +[ Upstream commit b93128f29733af5d427a335978a19884c2c230e2 ] + +Ensure that all O_DIRECT reads and writes complete before calling +fallocate so that we don't race w.r.t. attribute updates. + +Fixes: 99f237832243 ("NFSv4.2: Always flush out writes in nfs42_proc_fallocate()") +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + fs/nfs/nfs42proc.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c +index 01c01f45358b7..3774d5b64ba0e 100644 +--- a/fs/nfs/nfs42proc.c ++++ b/fs/nfs/nfs42proc.c +@@ -114,6 +114,7 @@ static int nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep, + exception.inode = inode; + exception.state = lock->open_context->state; + ++ nfs_file_block_o_direct(NFS_I(inode)); + err = nfs_sync_inode(inode); + if (err) + goto out; +-- +2.51.0 + diff --git a/queue-6.16/proc-fix-type-confusion-in-pde_set_flags.patch b/queue-6.16/proc-fix-type-confusion-in-pde_set_flags.patch new file mode 100644 index 0000000000..be6f8298c8 --- /dev/null +++ b/queue-6.16/proc-fix-type-confusion-in-pde_set_flags.patch @@ -0,0 +1,56 @@ +From 003b6b611ad13f6f31f22a67a21eeabee1e6d839 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 4 Sep 2025 21:57:15 +0800 +Subject: proc: fix type confusion in pde_set_flags() + +From: wangzijie + +[ Upstream commit 0ce9398aa0830f15f92bbed73853f9861c3e74ff ] + +Commit 2ce3d282bd50 ("proc: fix missing pde_set_flags() for net proc +files") missed a key part in the definition of proc_dir_entry: + +union { + const struct proc_ops *proc_ops; + const struct file_operations *proc_dir_ops; +}; + +So dereference of ->proc_ops assumes it is a proc_ops structure results in +type confusion and make NULL check for 'proc_ops' not work for proc dir. + +Add !S_ISDIR(dp->mode) test before calling pde_set_flags() to fix it. + +Link: https://lkml.kernel.org/r/20250904135715.3972782-1-wangzijie1@honor.com +Fixes: 2ce3d282bd50 ("proc: fix missing pde_set_flags() for net proc files") +Signed-off-by: wangzijie +Reported-by: Brad Spengler +Closes: https://lore.kernel.org/all/20250903065758.3678537-1-wangzijie1@honor.com/ +Cc: Alexey Dobriyan +Cc: Al Viro +Cc: Christian Brauner +Cc: Jiri Slaby +Cc: Stefano Brivio +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Sasha Levin +--- + fs/proc/generic.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/fs/proc/generic.c b/fs/proc/generic.c +index 409bc1d11eca3..8e1e48760ffe0 100644 +--- a/fs/proc/generic.c ++++ b/fs/proc/generic.c +@@ -390,7 +390,8 @@ struct proc_dir_entry *proc_register(struct proc_dir_entry *dir, + if (proc_alloc_inum(&dp->low_ino)) + goto out_free_entry; + +- pde_set_flags(dp); ++ if (!S_ISDIR(dp->mode)) ++ pde_set_flags(dp); + + write_lock(&proc_subdir_lock); + dp->parent = dir; +-- +2.51.0 + diff --git a/queue-6.16/rqspinlock-choose-trylock-fallback-for-nmi-waiters.patch b/queue-6.16/rqspinlock-choose-trylock-fallback-for-nmi-waiters.patch new file mode 100644 index 0000000000..8b08b5e326 --- /dev/null +++ b/queue-6.16/rqspinlock-choose-trylock-fallback-for-nmi-waiters.patch @@ -0,0 +1,87 @@ +From ec46b7ee52337dfc938c646371b0fddcdad481a6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 9 Sep 2025 18:49:59 +0000 +Subject: rqspinlock: Choose trylock fallback for NMI waiters + +From: Kumar Kartikeya Dwivedi + +[ Upstream commit 0d80e7f951be1bdd08d328fd87694be0d6e8aaa8 ] + +Currently, out of all 3 types of waiters in the rqspinlock slow path +(i.e., pending bit waiter, wait queue head waiter, and wait queue +non-head waiter), only the pending bit waiter and wait queue head +waiters apply deadlock checks and a timeout on their waiting loop. The +assumption here was that the wait queue head's forward progress would be +sufficient to identify cases where the lock owner or pending bit waiter +is stuck, and non-head waiters relying on the head waiter would prove to +be sufficient for their own forward progress. + +However, the head waiter itself can be preempted by a non-head waiter +for the same lock (AA) or a different lock (ABBA) in a manner that +impedes its forward progress. In such a case, non-head waiters not +performing deadlock and timeout checks becomes insufficient, and the +system can enter a state of lockup. + +This is typically not a concern with non-NMI lock acquisitions, as lock +holders which in run in different contexts (IRQ, non-IRQ) use "irqsave" +variants of the lock APIs, which naturally excludes such lock holders +from preempting one another on the same CPU. + +It might seem likely that a similar case may occur for rqspinlock when +programs are attached to contention tracepoints (begin, end), however, +these tracepoints either precede the enqueue into the wait queue, or +succeed it, therefore cannot be used to preempt a head waiter's waiting +loop. + +We must still be careful against nested kprobe and fentry programs that +may attach to the middle of the head's waiting loop to stall forward +progress and invoke another rqspinlock acquisition that proceeds as a +non-head waiter. To this end, drop CC_FLAGS_FTRACE from the rqspinlock.o +object file. + +For now, this issue is resolved by falling back to a repeated trylock on +the lock word from NMI context, while performing the deadlock checks to +break out early in case forward progress is impossible, and use the +timeout as a final fallback. + +A more involved fix to terminate the queue when such a condition occurs +will be made as a follow up. A selftest to stress this aspect of nested +NMI/non-NMI locking attempts will be added in a subsequent patch to the +bpf-next tree when this fix lands and trees are synchronized. + +Reported-by: Josef Bacik +Fixes: 164c246571e9 ("rqspinlock: Protect waiters in queue from stalls") +Signed-off-by: Kumar Kartikeya Dwivedi +Link: https://lore.kernel.org/r/20250909184959.3509085-1-memxor@gmail.com +Signed-off-by: Alexei Starovoitov +Signed-off-by: Sasha Levin +--- + kernel/bpf/Makefile | 1 + + kernel/bpf/rqspinlock.c | 2 +- + 2 files changed, 2 insertions(+), 1 deletion(-) + +diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile +index 3a335c50e6e3c..12ec926ed7114 100644 +--- a/kernel/bpf/Makefile ++++ b/kernel/bpf/Makefile +@@ -62,3 +62,4 @@ CFLAGS_REMOVE_bpf_lru_list.o = $(CC_FLAGS_FTRACE) + CFLAGS_REMOVE_queue_stack_maps.o = $(CC_FLAGS_FTRACE) + CFLAGS_REMOVE_lpm_trie.o = $(CC_FLAGS_FTRACE) + CFLAGS_REMOVE_ringbuf.o = $(CC_FLAGS_FTRACE) ++CFLAGS_REMOVE_rqspinlock.o = $(CC_FLAGS_FTRACE) +diff --git a/kernel/bpf/rqspinlock.c b/kernel/bpf/rqspinlock.c +index 338305c8852cf..804e619f1e006 100644 +--- a/kernel/bpf/rqspinlock.c ++++ b/kernel/bpf/rqspinlock.c +@@ -471,7 +471,7 @@ int __lockfunc resilient_queued_spin_lock_slowpath(rqspinlock_t *lock, u32 val) + * any MCS node. This is not the most elegant solution, but is + * simple enough. + */ +- if (unlikely(idx >= _Q_MAX_NODES)) { ++ if (unlikely(idx >= _Q_MAX_NODES || in_nmi())) { + lockevent_inc(lock_no_node); + RES_RESET_TIMEOUT(ts, RES_DEF_TIMEOUT); + while (!queued_spin_trylock(lock)) { +-- +2.51.0 + diff --git a/queue-6.16/s390-cpum_cf-deny-all-sampling-events-by-counter-pmu.patch b/queue-6.16/s390-cpum_cf-deny-all-sampling-events-by-counter-pmu.patch new file mode 100644 index 0000000000..3b34ac00f5 --- /dev/null +++ b/queue-6.16/s390-cpum_cf-deny-all-sampling-events-by-counter-pmu.patch @@ -0,0 +1,50 @@ +From eff7c3d367f8618c522a414fa950cfff5a61b191 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 22 Aug 2025 14:05:57 +0200 +Subject: s390/cpum_cf: Deny all sampling events by counter PMU + +From: Thomas Richter + +[ Upstream commit ce971233242b5391d99442271f3ca096fb49818d ] + +Deny all sampling event by the CPUMF counter facility device driver +and return -ENOENT. This return value is used to try other PMUs. +Up to now events for type PERF_TYPE_HARDWARE were not tested for +sampling and returned later on -EOPNOTSUPP. This ends the search +for alternative PMUs. Change that behavior and try other PMUs +instead. + +Fixes: 613a41b0d16e ("s390/cpum_cf: Reject request for sampling in event initialization") +Acked-by: Sumanth Korikkar +Signed-off-by: Thomas Richter +Signed-off-by: Alexander Gordeev +Signed-off-by: Sasha Levin +--- + arch/s390/kernel/perf_cpum_cf.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c +index 6a262e198e35e..952cc8d103693 100644 +--- a/arch/s390/kernel/perf_cpum_cf.c ++++ b/arch/s390/kernel/perf_cpum_cf.c +@@ -761,8 +761,6 @@ static int __hw_perf_event_init(struct perf_event *event, unsigned int type) + break; + + case PERF_TYPE_HARDWARE: +- if (is_sampling_event(event)) /* No sampling support */ +- return -ENOENT; + ev = attr->config; + if (!attr->exclude_user && attr->exclude_kernel) { + /* +@@ -860,6 +858,8 @@ static int cpumf_pmu_event_init(struct perf_event *event) + unsigned int type = event->attr.type; + int err = -ENOENT; + ++ if (is_sampling_event(event)) /* No sampling support */ ++ return err; + if (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_RAW) + err = __hw_perf_event_init(event, type); + else if (event->pmu->type == type) +-- +2.51.0 + diff --git a/queue-6.16/s390-pai-deny-all-events-not-handled-by-this-pmu.patch b/queue-6.16/s390-pai-deny-all-events-not-handled-by-this-pmu.patch new file mode 100644 index 0000000000..8eebd47988 --- /dev/null +++ b/queue-6.16/s390-pai-deny-all-events-not-handled-by-this-pmu.patch @@ -0,0 +1,60 @@ +From 3676d58f16149634ade119fd3b202ca1cb7d0cc0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 25 Aug 2025 09:53:27 +0200 +Subject: s390/pai: Deny all events not handled by this PMU + +From: Thomas Richter + +[ Upstream commit 85941afd2c404247e583c827fae0a45da1c1d92c ] + +Each PAI PMU device driver returns -EINVAL when an event is out of +its accepted range. This return value aborts the search for an +alternative PMU device driver to handle this event. +Change the return value to -ENOENT. This return value is used to +try other PMUs instead. This makes the PMUs more robust when +the sequence of PMU device driver initialization changes (at boot time) +or by using modules. + +Fixes: 39d62336f5c12 ("s390/pai: add support for cryptography counters") +Acked-by: Sumanth Korikkar +Signed-off-by: Thomas Richter +Signed-off-by: Alexander Gordeev +Signed-off-by: Sasha Levin +--- + arch/s390/kernel/perf_pai_crypto.c | 4 ++-- + arch/s390/kernel/perf_pai_ext.c | 2 +- + 2 files changed, 3 insertions(+), 3 deletions(-) + +diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c +index 63875270941bc..01cc6493367a4 100644 +--- a/arch/s390/kernel/perf_pai_crypto.c ++++ b/arch/s390/kernel/perf_pai_crypto.c +@@ -286,10 +286,10 @@ static int paicrypt_event_init(struct perf_event *event) + /* PAI crypto PMU registered as PERF_TYPE_RAW, check event type */ + if (a->type != PERF_TYPE_RAW && event->pmu->type != a->type) + return -ENOENT; +- /* PAI crypto event must be in valid range */ ++ /* PAI crypto event must be in valid range, try others if not */ + if (a->config < PAI_CRYPTO_BASE || + a->config > PAI_CRYPTO_BASE + paicrypt_cnt) +- return -EINVAL; ++ return -ENOENT; + /* Allow only CRYPTO_ALL for sampling */ + if (a->sample_period && a->config != PAI_CRYPTO_BASE) + return -EINVAL; +diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c +index fd14d5ebccbca..d65a9730753c5 100644 +--- a/arch/s390/kernel/perf_pai_ext.c ++++ b/arch/s390/kernel/perf_pai_ext.c +@@ -266,7 +266,7 @@ static int paiext_event_valid(struct perf_event *event) + event->hw.config_base = offsetof(struct paiext_cb, acc); + return 0; + } +- return -EINVAL; ++ return -ENOENT; + } + + /* Might be called on different CPU than the one the event is intended for. */ +-- +2.51.0 + diff --git a/queue-6.16/series b/queue-6.16/series index f1fe423e06..ef6960ab6a 100644 --- a/queue-6.16/series +++ b/queue-6.16/series @@ -14,3 +14,34 @@ iommu-vt-d-make-iotlb_sync_map-a-static-property-of-.patch bluetooth-hci_conn-fix-running-bis_cleanup-for-hci_c.patch bluetooth-iso-fix-getname-not-returning-broadcast-fi.patch revert-drm-amdgpu-add-more-checks-to-psp-mailbox.patch +flexfiles-pnfs-fix-null-checks-on-result-of-ff_layou.patch +sunrpc-call-xs_sock_process_cmsg-for-all-cmsg.patch +nfsv4-don-t-clear-capabilities-that-won-t-be-reset.patch +md-keep-recovery_cp-in-mdp_superblock_s.patch +trace-fgraph-fix-error-handling.patch +nfsv4-clear-the-nfs_cap_fs_locations-flag-if-it-is-n.patch +nfsv4-clear-nfs_cap_open_xor-and-nfs_cap_delegtime-i.patch +nfsv4-clear-the-nfs_cap_xattr-flag-if-not-supported-.patch +tracing-fix-tracing_marker-may-trigger-page-fault-du.patch +nfs-localio-restore-creds-before-releasing-pageio-da.patch +ftrace-samples-fix-function-size-computation.patch +tracing-osnoise-fix-null-ptr-deref-in-bitmap_parseli.patch +nfs-serialise-o_direct-i-o-and-truncate.patch +nfsv4.2-serialise-o_direct-i-o-and-fallocate.patch +nfsv4.2-serialise-o_direct-i-o-and-clone-range.patch +nfsv4.2-serialise-o_direct-i-o-and-copy-range.patch +nfs-nfs_invalidate_folio-must-observe-the-offset-and.patch +nfsv4-flexfiles-fix-layout-merge-mirror-check.patch +tracing-silence-warning-when-chunk-allocation-fails-.patch +bpf-cpumap-disable-page_pool-direct-xdp_return-need-.patch +cpufreq-amd-pstate-fix-setting-of-cppc.min_perf-in-a.patch +s390-pai-deny-all-events-not-handled-by-this-pmu.patch +s390-cpum_cf-deny-all-sampling-events-by-counter-pmu.patch +cpufreq-amd-pstate-fix-a-regression-leading-to-epp-0.patch +bpf-fix-out-of-bounds-dynptr-write-in-bpf_crypto_cry.patch +xsk-fix-immature-cq-descriptor-production.patch +rqspinlock-choose-trylock-fallback-for-nmi-waiters.patch +bpf-allow-fall-back-to-interpreter-for-programs-with.patch +bpf-tell-memcg-to-use-allow_spinning-false-path-in-b.patch +tcp_bpf-call-sk_msg_free-when-tcp_bpf_send_verdict-f.patch +proc-fix-type-confusion-in-pde_set_flags.patch diff --git a/queue-6.16/sunrpc-call-xs_sock_process_cmsg-for-all-cmsg.patch b/queue-6.16/sunrpc-call-xs_sock_process_cmsg-for-all-cmsg.patch new file mode 100644 index 0000000000..e9a68c8a47 --- /dev/null +++ b/queue-6.16/sunrpc-call-xs_sock_process_cmsg-for-all-cmsg.patch @@ -0,0 +1,48 @@ +From 379f1cbaa91f536298b4445f903aea347c69f839 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 4 Sep 2025 16:09:57 -0500 +Subject: SUNRPC: call xs_sock_process_cmsg for all cmsg + +From: Justin Worrell + +[ Upstream commit 9559d2fffd4f9b892165eed48198a0e5cb8504e6 ] + +xs_sock_recv_cmsg was failing to call xs_sock_process_cmsg for any cmsg +type other than TLS_RECORD_TYPE_ALERT (TLS_RECORD_TYPE_DATA, and other +values not handled.) Based on my reading of the previous commit +(cc5d5908: sunrpc: fix client side handling of tls alerts), it looks +like only iov_iter_revert should be conditional on TLS_RECORD_TYPE_ALERT +(but that other cmsg types should still call xs_sock_process_cmsg). On +my machine, I was unable to connect (over mtls) to an NFS share hosted +on FreeBSD. With this patch applied, I am able to mount the share again. + +Fixes: cc5d59081fa2 ("sunrpc: fix client side handling of tls alerts") +Signed-off-by: Justin Worrell +Reviewed-and-tested-by: Scott Mayhew +Link: https://lore.kernel.org/r/20250904211038.12874-3-jworrell@gmail.com +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + net/sunrpc/xprtsock.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c +index c5f7bbf5775ff..3aa987e7f0724 100644 +--- a/net/sunrpc/xprtsock.c ++++ b/net/sunrpc/xprtsock.c +@@ -407,9 +407,9 @@ xs_sock_recv_cmsg(struct socket *sock, unsigned int *msg_flags, int flags) + iov_iter_kvec(&msg.msg_iter, ITER_DEST, &alert_kvec, 1, + alert_kvec.iov_len); + ret = sock_recvmsg(sock, &msg, flags); +- if (ret > 0 && +- tls_get_record_type(sock->sk, &u.cmsg) == TLS_RECORD_TYPE_ALERT) { +- iov_iter_revert(&msg.msg_iter, ret); ++ if (ret > 0) { ++ if (tls_get_record_type(sock->sk, &u.cmsg) == TLS_RECORD_TYPE_ALERT) ++ iov_iter_revert(&msg.msg_iter, ret); + ret = xs_sock_process_cmsg(sock, &msg, msg_flags, &u.cmsg, + -EAGAIN); + } +-- +2.51.0 + diff --git a/queue-6.16/tcp_bpf-call-sk_msg_free-when-tcp_bpf_send_verdict-f.patch b/queue-6.16/tcp_bpf-call-sk_msg_free-when-tcp_bpf_send_verdict-f.patch new file mode 100644 index 0000000000..543eb27a67 --- /dev/null +++ b/queue-6.16/tcp_bpf-call-sk_msg_free-when-tcp_bpf_send_verdict-f.patch @@ -0,0 +1,98 @@ +From 8ba9abf1565eaa34bea99c5b0a8079d0e869479f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 9 Sep 2025 23:26:12 +0000 +Subject: tcp_bpf: Call sk_msg_free() when tcp_bpf_send_verdict() fails to + allocate psock->cork. + +From: Kuniyuki Iwashima + +[ Upstream commit a3967baad4d533dc254c31e0d221e51c8d223d58 ] + +syzbot reported the splat below. [0] + +The repro does the following: + + 1. Load a sk_msg prog that calls bpf_msg_cork_bytes(msg, cork_bytes) + 2. Attach the prog to a SOCKMAP + 3. Add a socket to the SOCKMAP + 4. Activate fault injection + 5. Send data less than cork_bytes + +At 5., the data is carried over to the next sendmsg() as it is +smaller than the cork_bytes specified by bpf_msg_cork_bytes(). + +Then, tcp_bpf_send_verdict() tries to allocate psock->cork to hold +the data, but this fails silently due to fault injection + __GFP_NOWARN. + +If the allocation fails, we need to revert the sk->sk_forward_alloc +change done by sk_msg_alloc(). + +Let's call sk_msg_free() when tcp_bpf_send_verdict fails to allocate +psock->cork. + +The "*copied" also needs to be updated such that a proper error can +be returned to the caller, sendmsg. It fails to allocate psock->cork. +Nothing has been corked so far, so this patch simply sets "*copied" +to 0. + +[0]: +WARNING: net/ipv4/af_inet.c:156 at inet_sock_destruct+0x623/0x730 net/ipv4/af_inet.c:156, CPU#1: syz-executor/5983 +Modules linked in: +CPU: 1 UID: 0 PID: 5983 Comm: syz-executor Not tainted syzkaller #0 PREEMPT(full) +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/12/2025 +RIP: 0010:inet_sock_destruct+0x623/0x730 net/ipv4/af_inet.c:156 +Code: 0f 0b 90 e9 62 fe ff ff e8 7a db b5 f7 90 0f 0b 90 e9 95 fe ff ff e8 6c db b5 f7 90 0f 0b 90 e9 bb fe ff ff e8 5e db b5 f7 90 <0f> 0b 90 e9 e1 fe ff ff 89 f9 80 e1 07 80 c1 03 38 c1 0f 8c 9f fc +RSP: 0018:ffffc90000a08b48 EFLAGS: 00010246 +RAX: ffffffff8a09d0b2 RBX: dffffc0000000000 RCX: ffff888024a23c80 +RDX: 0000000000000100 RSI: 0000000000000fff RDI: 0000000000000000 +RBP: 0000000000000fff R08: ffff88807e07c627 R09: 1ffff1100fc0f8c4 +R10: dffffc0000000000 R11: ffffed100fc0f8c5 R12: ffff88807e07c380 +R13: dffffc0000000000 R14: ffff88807e07c60c R15: 1ffff1100fc0f872 +FS: 00005555604c4500(0000) GS:ffff888125af1000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 00005555604df5c8 CR3: 0000000032b06000 CR4: 00000000003526f0 +Call Trace: + + __sk_destruct+0x86/0x660 net/core/sock.c:2339 + rcu_do_batch kernel/rcu/tree.c:2605 [inline] + rcu_core+0xca8/0x1770 kernel/rcu/tree.c:2861 + handle_softirqs+0x286/0x870 kernel/softirq.c:579 + __do_softirq kernel/softirq.c:613 [inline] + invoke_softirq kernel/softirq.c:453 [inline] + __irq_exit_rcu+0xca/0x1f0 kernel/softirq.c:680 + irq_exit_rcu+0x9/0x30 kernel/softirq.c:696 + instr_sysvec_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1052 [inline] + sysvec_apic_timer_interrupt+0xa6/0xc0 arch/x86/kernel/apic/apic.c:1052 + + +Fixes: 4f738adba30a ("bpf: create tcp_bpf_ulp allowing BPF to monitor socket TX/RX data") +Reported-by: syzbot+4cabd1d2fa917a456db8@syzkaller.appspotmail.com +Closes: https://lore.kernel.org/netdev/68c0b6b5.050a0220.3c6139.0013.GAE@google.com/ +Signed-off-by: Kuniyuki Iwashima +Signed-off-by: Martin KaFai Lau +Link: https://patch.msgid.link/20250909232623.4151337-1-kuniyu@google.com +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp_bpf.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c +index ba581785adb4b..a268e1595b22a 100644 +--- a/net/ipv4/tcp_bpf.c ++++ b/net/ipv4/tcp_bpf.c +@@ -408,8 +408,11 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock, + if (!psock->cork) { + psock->cork = kzalloc(sizeof(*psock->cork), + GFP_ATOMIC | __GFP_NOWARN); +- if (!psock->cork) ++ if (!psock->cork) { ++ sk_msg_free(sk, msg); ++ *copied = 0; + return -ENOMEM; ++ } + } + memcpy(psock->cork, msg, sizeof(*msg)); + return 0; +-- +2.51.0 + diff --git a/queue-6.16/trace-fgraph-fix-error-handling.patch b/queue-6.16/trace-fgraph-fix-error-handling.patch new file mode 100644 index 0000000000..932c920e4a --- /dev/null +++ b/queue-6.16/trace-fgraph-fix-error-handling.patch @@ -0,0 +1,51 @@ +From f0d253e4985481b377ef7d2ec9db7ffab6229041 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 5 Sep 2025 22:06:18 -0700 +Subject: trace/fgraph: Fix error handling + +From: Guenter Roeck + +[ Upstream commit ab1396af7595e7d49a3850481b24d7fe7cbdfd31 ] + +Commit edede7a6dcd7 ("trace/fgraph: Fix the warning caused by missing +unregister notifier") added a call to unregister the PM notifier if +register_ftrace_graph() failed. It does so unconditionally. However, +the PM notifier is only registered with the first call to +register_ftrace_graph(). If the first registration was successful and +a subsequent registration failed, the notifier is now unregistered even +if ftrace graphs are still registered. + +Fix the problem by only unregistering the PM notifier during error handling +if there are no active fgraph registrations. + +Fixes: edede7a6dcd7 ("trace/fgraph: Fix the warning caused by missing unregister notifier") +Closes: https://lore.kernel.org/all/63b0ba5a-a928-438e-84f9-93028dd72e54@roeck-us.net/ +Cc: Ye Weihua +Cc: Masami Hiramatsu +Cc: Mark Rutland +Cc: Mathieu Desnoyers +Link: https://lore.kernel.org/20250906050618.2634078-1-linux@roeck-us.net +Signed-off-by: Guenter Roeck +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Sasha Levin +--- + kernel/trace/fgraph.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c +index dac2d58f39490..db40ec5cc9d73 100644 +--- a/kernel/trace/fgraph.c ++++ b/kernel/trace/fgraph.c +@@ -1393,7 +1393,8 @@ int register_ftrace_graph(struct fgraph_ops *gops) + ftrace_graph_active--; + gops->saved_func = NULL; + fgraph_lru_release_index(i); +- unregister_pm_notifier(&ftrace_suspend_notifier); ++ if (!ftrace_graph_active) ++ unregister_pm_notifier(&ftrace_suspend_notifier); + } + return ret; + } +-- +2.51.0 + diff --git a/queue-6.16/tracing-fix-tracing_marker-may-trigger-page-fault-du.patch b/queue-6.16/tracing-fix-tracing_marker-may-trigger-page-fault-du.patch new file mode 100644 index 0000000000..cceb2c9331 --- /dev/null +++ b/queue-6.16/tracing-fix-tracing_marker-may-trigger-page-fault-du.patch @@ -0,0 +1,89 @@ +From eedb89b0d6ecad898112b1b1059e8c5bb7d72bc5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 19 Aug 2025 10:51:52 +0000 +Subject: tracing: Fix tracing_marker may trigger page fault during + preempt_disable + +From: Luo Gengkun + +[ Upstream commit 3d62ab32df065e4a7797204a918f6489ddb8a237 ] + +Both tracing_mark_write and tracing_mark_raw_write call +__copy_from_user_inatomic during preempt_disable. But in some case, +__copy_from_user_inatomic may trigger page fault, and will call schedule() +subtly. And if a task is migrated to other cpu, the following warning will +be trigger: + if (RB_WARN_ON(cpu_buffer, + !local_read(&cpu_buffer->committing))) + +An example can illustrate this issue: + +process flow CPU +--------------------------------------------------------------------- + +tracing_mark_raw_write(): cpu:0 + ... + ring_buffer_lock_reserve(): cpu:0 + ... + cpu = raw_smp_processor_id() cpu:0 + cpu_buffer = buffer->buffers[cpu] cpu:0 + ... + ... + __copy_from_user_inatomic(): cpu:0 + ... + # page fault + do_mem_abort(): cpu:0 + ... + # Call schedule + schedule() cpu:0 + ... + # the task schedule to cpu1 + __buffer_unlock_commit(): cpu:1 + ... + ring_buffer_unlock_commit(): cpu:1 + ... + cpu = raw_smp_processor_id() cpu:1 + cpu_buffer = buffer->buffers[cpu] cpu:1 + +As shown above, the process will acquire cpuid twice and the return values +are not the same. + +To fix this problem using copy_from_user_nofault instead of +__copy_from_user_inatomic, as the former performs 'access_ok' before +copying. + +Link: https://lore.kernel.org/20250819105152.2766363-1-luogengkun@huaweicloud.com +Fixes: 656c7f0d2d2b ("tracing: Replace kmap with copy_from_user() in trace_marker writing") +Signed-off-by: Luo Gengkun +Reviewed-by: Masami Hiramatsu (Google) +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Sasha Levin +--- + kernel/trace/trace.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c +index b91fa02cc54a6..9329ac1667551 100644 +--- a/kernel/trace/trace.c ++++ b/kernel/trace/trace.c +@@ -7264,7 +7264,7 @@ static ssize_t write_marker_to_buffer(struct trace_array *tr, const char __user + entry = ring_buffer_event_data(event); + entry->ip = ip; + +- len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt); ++ len = copy_from_user_nofault(&entry->buf, ubuf, cnt); + if (len) { + memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE); + cnt = FAULTED_SIZE; +@@ -7361,7 +7361,7 @@ static ssize_t write_raw_marker_to_buffer(struct trace_array *tr, + + entry = ring_buffer_event_data(event); + +- len = __copy_from_user_inatomic(&entry->id, ubuf, cnt); ++ len = copy_from_user_nofault(&entry->id, ubuf, cnt); + if (len) { + entry->id = -1; + memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE); +-- +2.51.0 + diff --git a/queue-6.16/tracing-osnoise-fix-null-ptr-deref-in-bitmap_parseli.patch b/queue-6.16/tracing-osnoise-fix-null-ptr-deref-in-bitmap_parseli.patch new file mode 100644 index 0000000000..e91a3b0d66 --- /dev/null +++ b/queue-6.16/tracing-osnoise-fix-null-ptr-deref-in-bitmap_parseli.patch @@ -0,0 +1,63 @@ +From c48033a9c3c993d8bd250140649054e02cf8d787 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 6 Sep 2025 11:56:10 +0800 +Subject: tracing/osnoise: Fix null-ptr-deref in bitmap_parselist() + +From: Wang Liang + +[ Upstream commit c1628c00c4351dd0727ef7f670694f68d9e663d8 ] + +A crash was observed with the following output: + +BUG: kernel NULL pointer dereference, address: 0000000000000010 +Oops: Oops: 0000 [#1] SMP NOPTI +CPU: 2 UID: 0 PID: 92 Comm: osnoise_cpus Not tainted 6.17.0-rc4-00201-gd69eb204c255 #138 PREEMPT(voluntary) +RIP: 0010:bitmap_parselist+0x53/0x3e0 +Call Trace: + + osnoise_cpus_write+0x7a/0x190 + vfs_write+0xf8/0x410 + ? do_sys_openat2+0x88/0xd0 + ksys_write+0x60/0xd0 + do_syscall_64+0xa4/0x260 + entry_SYSCALL_64_after_hwframe+0x77/0x7f + + +This issue can be reproduced by below code: + +fd=open("/sys/kernel/debug/tracing/osnoise/cpus", O_WRONLY); +write(fd, "0-2", 0); + +When user pass 'count=0' to osnoise_cpus_write(), kmalloc() will return +ZERO_SIZE_PTR (16) and cpulist_parse() treat it as a normal value, which +trigger the null pointer dereference. Add check for the parameter 'count'. + +Cc: +Cc: +Cc: +Link: https://lore.kernel.org/20250906035610.3880282-1-wangliang74@huawei.com +Fixes: 17f89102fe23 ("tracing/osnoise: Allow arbitrarily long CPU string") +Signed-off-by: Wang Liang +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Sasha Levin +--- + kernel/trace/trace_osnoise.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c +index fd259da0aa645..337bc0eb5d71b 100644 +--- a/kernel/trace/trace_osnoise.c ++++ b/kernel/trace/trace_osnoise.c +@@ -2322,6 +2322,9 @@ osnoise_cpus_write(struct file *filp, const char __user *ubuf, size_t count, + int running, err; + char *buf __free(kfree) = NULL; + ++ if (count < 1) ++ return 0; ++ + buf = kmalloc(count, GFP_KERNEL); + if (!buf) + return -ENOMEM; +-- +2.51.0 + diff --git a/queue-6.16/tracing-silence-warning-when-chunk-allocation-fails-.patch b/queue-6.16/tracing-silence-warning-when-chunk-allocation-fails-.patch new file mode 100644 index 0000000000..794f3c420b --- /dev/null +++ b/queue-6.16/tracing-silence-warning-when-chunk-allocation-fails-.patch @@ -0,0 +1,97 @@ +From 28decb152feb97a371708328d96bc11e70cb2ed2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 8 Sep 2025 02:46:58 +0000 +Subject: tracing: Silence warning when chunk allocation fails in + trace_pid_write + +From: Pu Lehui + +[ Upstream commit cd4453c5e983cf1fd5757e9acb915adb1e4602b6 ] + +Syzkaller trigger a fault injection warning: + +WARNING: CPU: 1 PID: 12326 at tracepoint_add_func+0xbfc/0xeb0 +Modules linked in: +CPU: 1 UID: 0 PID: 12326 Comm: syz.6.10325 Tainted: G U 6.14.0-rc5-syzkaller #0 +Tainted: [U]=USER +Hardware name: Google Compute Engine/Google Compute Engine +RIP: 0010:tracepoint_add_func+0xbfc/0xeb0 kernel/tracepoint.c:294 +Code: 09 fe ff 90 0f 0b 90 0f b6 74 24 43 31 ff 41 bc ea ff ff ff +RSP: 0018:ffffc9000414fb48 EFLAGS: 00010283 +RAX: 00000000000012a1 RBX: ffffffff8e240ae0 RCX: ffffc90014b78000 +RDX: 0000000000080000 RSI: ffffffff81bbd78b RDI: 0000000000000001 +RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000000 +R10: 0000000000000001 R11: 0000000000000001 R12: ffffffffffffffef +R13: 0000000000000000 R14: dffffc0000000000 R15: ffffffff81c264f0 +FS: 00007f27217f66c0(0000) GS:ffff8880b8700000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 0000001b2e80dff8 CR3: 00000000268f8000 CR4: 00000000003526f0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +Call Trace: + + tracepoint_probe_register_prio+0xc0/0x110 kernel/tracepoint.c:464 + register_trace_prio_sched_switch include/trace/events/sched.h:222 [inline] + register_pid_events kernel/trace/trace_events.c:2354 [inline] + event_pid_write.isra.0+0x439/0x7a0 kernel/trace/trace_events.c:2425 + vfs_write+0x24c/0x1150 fs/read_write.c:677 + ksys_write+0x12b/0x250 fs/read_write.c:731 + do_syscall_x64 arch/x86/entry/common.c:52 [inline] + do_syscall_64+0xcd/0x250 arch/x86/entry/common.c:83 + entry_SYSCALL_64_after_hwframe+0x77/0x7f + +We can reproduce the warning by following the steps below: +1. echo 8 >> set_event_notrace_pid. Let tr->filtered_pids owns one pid + and register sched_switch tracepoint. +2. echo ' ' >> set_event_pid, and perform fault injection during chunk + allocation of trace_pid_list_alloc. Let pid_list with no pid and +assign to tr->filtered_pids. +3. echo ' ' >> set_event_pid. Let pid_list is NULL and assign to + tr->filtered_pids. +4. echo 9 >> set_event_pid, will trigger the double register + sched_switch tracepoint warning. + +The reason is that syzkaller injects a fault into the chunk allocation +in trace_pid_list_alloc, causing a failure in trace_pid_list_set, which +may trigger double register of the same tracepoint. This only occurs +when the system is about to crash, but to suppress this warning, let's +add failure handling logic to trace_pid_list_set. + +Link: https://lore.kernel.org/20250908024658.2390398-1-pulehui@huaweicloud.com +Fixes: 8d6e90983ade ("tracing: Create a sparse bitmask for pid filtering") +Reported-by: syzbot+161412ccaeff20ce4dde@syzkaller.appspotmail.com +Closes: https://lore.kernel.org/all/67cb890e.050a0220.d8275.022e.GAE@google.com +Signed-off-by: Pu Lehui +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Sasha Levin +--- + kernel/trace/trace.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c +index 9329ac1667551..56f6cebdb2299 100644 +--- a/kernel/trace/trace.c ++++ b/kernel/trace/trace.c +@@ -846,7 +846,10 @@ int trace_pid_write(struct trace_pid_list *filtered_pids, + /* copy the current bits to the new max */ + ret = trace_pid_list_first(filtered_pids, &pid); + while (!ret) { +- trace_pid_list_set(pid_list, pid); ++ ret = trace_pid_list_set(pid_list, pid); ++ if (ret < 0) ++ goto out; ++ + ret = trace_pid_list_next(filtered_pids, pid + 1, &pid); + nr_pids++; + } +@@ -883,6 +886,7 @@ int trace_pid_write(struct trace_pid_list *filtered_pids, + trace_parser_clear(&parser); + ret = 0; + } ++ out: + trace_parser_put(&parser); + + if (ret < 0) { +-- +2.51.0 + diff --git a/queue-6.16/xsk-fix-immature-cq-descriptor-production.patch b/queue-6.16/xsk-fix-immature-cq-descriptor-production.patch new file mode 100644 index 0000000000..3ec98933d4 --- /dev/null +++ b/queue-6.16/xsk-fix-immature-cq-descriptor-production.patch @@ -0,0 +1,324 @@ +From 63191cc91095e04a4b030522c003a3f73773fa76 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 4 Sep 2025 21:49:07 +0200 +Subject: xsk: Fix immature cq descriptor production + +From: Maciej Fijalkowski + +[ Upstream commit 30f241fcf52aaaef7ac16e66530faa11be78a865 ] + +Eryk reported an issue that I have put under Closes: tag, related to +umem addrs being prematurely produced onto pool's completion queue. +Let us make the skb's destructor responsible for producing all addrs +that given skb used. + +Commit from fixes tag introduced the buggy behavior, it was not broken +from day 1, but rather when xsk multi-buffer got introduced. + +In order to mitigate performance impact as much as possible, mimic the +linear and frag parts within skb by storing the first address from XSK +descriptor at sk_buff::destructor_arg. For fragments, store them at ::cb +via list. The nodes that will go onto list will be allocated via +kmem_cache. xsk_destruct_skb() will consume address stored at +::destructor_arg and optionally go through list from ::cb, if count of +descriptors associated with this particular skb is bigger than 1. + +Previous approach where whole array for storing UMEM addresses from XSK +descriptors was pre-allocated during first fragment processing yielded +too big performance regression for 64b traffic. In current approach +impact is much reduced on my tests and for jumbo frames I observed +traffic being slower by at most 9%. + +Magnus suggested to have this way of processing special cased for +XDP_SHARED_UMEM, so we would identify this during bind and set different +hooks for 'backpressure mechanism' on CQ and for skb destructor, but +given that results looked promising on my side I decided to have a +single data path for XSK generic Tx. I suppose other auxiliary stuff +would have to land as well in order to make it work. + +Fixes: b7f72a30e9ac ("xsk: introduce wrappers and helpers for supporting multi-buffer in Tx path") +Reported-by: Eryk Kubanski +Closes: https://lore.kernel.org/netdev/20250530103456.53564-1-e.kubanski@partner.samsung.com/ +Acked-by: Stanislav Fomichev +Signed-off-by: Maciej Fijalkowski +Tested-by: Jason Xing +Reviewed-by: Jason Xing +Link: https://lore.kernel.org/r/20250904194907.2342177-1-maciej.fijalkowski@intel.com +Signed-off-by: Alexei Starovoitov +Signed-off-by: Sasha Levin +--- + net/xdp/xsk.c | 113 ++++++++++++++++++++++++++++++++++++++------ + net/xdp/xsk_queue.h | 12 +++++ + 2 files changed, 111 insertions(+), 14 deletions(-) + +diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c +index 72c000c0ae5f5..de331541fdb38 100644 +--- a/net/xdp/xsk.c ++++ b/net/xdp/xsk.c +@@ -36,6 +36,20 @@ + #define TX_BATCH_SIZE 32 + #define MAX_PER_SOCKET_BUDGET (TX_BATCH_SIZE) + ++struct xsk_addr_node { ++ u64 addr; ++ struct list_head addr_node; ++}; ++ ++struct xsk_addr_head { ++ u32 num_descs; ++ struct list_head addrs_list; ++}; ++ ++static struct kmem_cache *xsk_tx_generic_cache; ++ ++#define XSKCB(skb) ((struct xsk_addr_head *)((skb)->cb)) ++ + void xsk_set_rx_need_wakeup(struct xsk_buff_pool *pool) + { + if (pool->cached_need_wakeup & XDP_WAKEUP_RX) +@@ -528,24 +542,43 @@ static int xsk_wakeup(struct xdp_sock *xs, u8 flags) + return dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, flags); + } + +-static int xsk_cq_reserve_addr_locked(struct xsk_buff_pool *pool, u64 addr) ++static int xsk_cq_reserve_locked(struct xsk_buff_pool *pool) + { + unsigned long flags; + int ret; + + spin_lock_irqsave(&pool->cq_lock, flags); +- ret = xskq_prod_reserve_addr(pool->cq, addr); ++ ret = xskq_prod_reserve(pool->cq); + spin_unlock_irqrestore(&pool->cq_lock, flags); + + return ret; + } + +-static void xsk_cq_submit_locked(struct xsk_buff_pool *pool, u32 n) ++static void xsk_cq_submit_addr_locked(struct xsk_buff_pool *pool, ++ struct sk_buff *skb) + { ++ struct xsk_addr_node *pos, *tmp; ++ u32 descs_processed = 0; + unsigned long flags; ++ u32 idx; + + spin_lock_irqsave(&pool->cq_lock, flags); +- xskq_prod_submit_n(pool->cq, n); ++ idx = xskq_get_prod(pool->cq); ++ ++ xskq_prod_write_addr(pool->cq, idx, ++ (u64)(uintptr_t)skb_shinfo(skb)->destructor_arg); ++ descs_processed++; ++ ++ if (unlikely(XSKCB(skb)->num_descs > 1)) { ++ list_for_each_entry_safe(pos, tmp, &XSKCB(skb)->addrs_list, addr_node) { ++ xskq_prod_write_addr(pool->cq, idx + descs_processed, ++ pos->addr); ++ descs_processed++; ++ list_del(&pos->addr_node); ++ kmem_cache_free(xsk_tx_generic_cache, pos); ++ } ++ } ++ xskq_prod_submit_n(pool->cq, descs_processed); + spin_unlock_irqrestore(&pool->cq_lock, flags); + } + +@@ -558,9 +591,14 @@ static void xsk_cq_cancel_locked(struct xsk_buff_pool *pool, u32 n) + spin_unlock_irqrestore(&pool->cq_lock, flags); + } + ++static void xsk_inc_num_desc(struct sk_buff *skb) ++{ ++ XSKCB(skb)->num_descs++; ++} ++ + static u32 xsk_get_num_desc(struct sk_buff *skb) + { +- return skb ? (long)skb_shinfo(skb)->destructor_arg : 0; ++ return XSKCB(skb)->num_descs; + } + + static void xsk_destruct_skb(struct sk_buff *skb) +@@ -572,23 +610,33 @@ static void xsk_destruct_skb(struct sk_buff *skb) + *compl->tx_timestamp = ktime_get_tai_fast_ns(); + } + +- xsk_cq_submit_locked(xdp_sk(skb->sk)->pool, xsk_get_num_desc(skb)); ++ xsk_cq_submit_addr_locked(xdp_sk(skb->sk)->pool, skb); + sock_wfree(skb); + } + +-static void xsk_set_destructor_arg(struct sk_buff *skb) ++static void xsk_set_destructor_arg(struct sk_buff *skb, u64 addr) + { +- long num = xsk_get_num_desc(xdp_sk(skb->sk)->skb) + 1; +- +- skb_shinfo(skb)->destructor_arg = (void *)num; ++ BUILD_BUG_ON(sizeof(struct xsk_addr_head) > sizeof(skb->cb)); ++ INIT_LIST_HEAD(&XSKCB(skb)->addrs_list); ++ XSKCB(skb)->num_descs = 0; ++ skb_shinfo(skb)->destructor_arg = (void *)(uintptr_t)addr; + } + + static void xsk_consume_skb(struct sk_buff *skb) + { + struct xdp_sock *xs = xdp_sk(skb->sk); ++ u32 num_descs = xsk_get_num_desc(skb); ++ struct xsk_addr_node *pos, *tmp; ++ ++ if (unlikely(num_descs > 1)) { ++ list_for_each_entry_safe(pos, tmp, &XSKCB(skb)->addrs_list, addr_node) { ++ list_del(&pos->addr_node); ++ kmem_cache_free(xsk_tx_generic_cache, pos); ++ } ++ } + + skb->destructor = sock_wfree; +- xsk_cq_cancel_locked(xs->pool, xsk_get_num_desc(skb)); ++ xsk_cq_cancel_locked(xs->pool, num_descs); + /* Free skb without triggering the perf drop trace */ + consume_skb(skb); + xs->skb = NULL; +@@ -605,6 +653,7 @@ static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs, + { + struct xsk_buff_pool *pool = xs->pool; + u32 hr, len, ts, offset, copy, copied; ++ struct xsk_addr_node *xsk_addr; + struct sk_buff *skb = xs->skb; + struct page *page; + void *buffer; +@@ -619,6 +668,19 @@ static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs, + return ERR_PTR(err); + + skb_reserve(skb, hr); ++ ++ xsk_set_destructor_arg(skb, desc->addr); ++ } else { ++ xsk_addr = kmem_cache_zalloc(xsk_tx_generic_cache, GFP_KERNEL); ++ if (!xsk_addr) ++ return ERR_PTR(-ENOMEM); ++ ++ /* in case of -EOVERFLOW that could happen below, ++ * xsk_consume_skb() will release this node as whole skb ++ * would be dropped, which implies freeing all list elements ++ */ ++ xsk_addr->addr = desc->addr; ++ list_add_tail(&xsk_addr->addr_node, &XSKCB(skb)->addrs_list); + } + + addr = desc->addr; +@@ -690,8 +752,11 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, + err = skb_store_bits(skb, 0, buffer, len); + if (unlikely(err)) + goto free_err; ++ ++ xsk_set_destructor_arg(skb, desc->addr); + } else { + int nr_frags = skb_shinfo(skb)->nr_frags; ++ struct xsk_addr_node *xsk_addr; + struct page *page; + u8 *vaddr; + +@@ -706,12 +771,22 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, + goto free_err; + } + ++ xsk_addr = kmem_cache_zalloc(xsk_tx_generic_cache, GFP_KERNEL); ++ if (!xsk_addr) { ++ __free_page(page); ++ err = -ENOMEM; ++ goto free_err; ++ } ++ + vaddr = kmap_local_page(page); + memcpy(vaddr, buffer, len); + kunmap_local(vaddr); + + skb_add_rx_frag(skb, nr_frags, page, 0, len, PAGE_SIZE); + refcount_add(PAGE_SIZE, &xs->sk.sk_wmem_alloc); ++ ++ xsk_addr->addr = desc->addr; ++ list_add_tail(&xsk_addr->addr_node, &XSKCB(skb)->addrs_list); + } + + if (first_frag && desc->options & XDP_TX_METADATA) { +@@ -755,7 +830,7 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, + skb->mark = READ_ONCE(xs->sk.sk_mark); + skb->destructor = xsk_destruct_skb; + xsk_tx_metadata_to_compl(meta, &skb_shinfo(skb)->xsk_meta); +- xsk_set_destructor_arg(skb); ++ xsk_inc_num_desc(skb); + + return skb; + +@@ -765,7 +840,7 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, + + if (err == -EOVERFLOW) { + /* Drop the packet */ +- xsk_set_destructor_arg(xs->skb); ++ xsk_inc_num_desc(xs->skb); + xsk_drop_skb(xs->skb); + xskq_cons_release(xs->tx); + } else { +@@ -807,7 +882,7 @@ static int __xsk_generic_xmit(struct sock *sk) + * if there is space in it. This avoids having to implement + * any buffering in the Tx path. + */ +- err = xsk_cq_reserve_addr_locked(xs->pool, desc.addr); ++ err = xsk_cq_reserve_locked(xs->pool); + if (err) { + err = -EAGAIN; + goto out; +@@ -1795,8 +1870,18 @@ static int __init xsk_init(void) + if (err) + goto out_pernet; + ++ xsk_tx_generic_cache = kmem_cache_create("xsk_generic_xmit_cache", ++ sizeof(struct xsk_addr_node), ++ 0, SLAB_HWCACHE_ALIGN, NULL); ++ if (!xsk_tx_generic_cache) { ++ err = -ENOMEM; ++ goto out_unreg_notif; ++ } ++ + return 0; + ++out_unreg_notif: ++ unregister_netdevice_notifier(&xsk_netdev_notifier); + out_pernet: + unregister_pernet_subsys(&xsk_net_ops); + out_sk: +diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h +index 46d87e961ad6d..f16f390370dc4 100644 +--- a/net/xdp/xsk_queue.h ++++ b/net/xdp/xsk_queue.h +@@ -344,6 +344,11 @@ static inline u32 xskq_cons_present_entries(struct xsk_queue *q) + + /* Functions for producers */ + ++static inline u32 xskq_get_prod(struct xsk_queue *q) ++{ ++ return READ_ONCE(q->ring->producer); ++} ++ + static inline u32 xskq_prod_nb_free(struct xsk_queue *q, u32 max) + { + u32 free_entries = q->nentries - (q->cached_prod - q->cached_cons); +@@ -390,6 +395,13 @@ static inline int xskq_prod_reserve_addr(struct xsk_queue *q, u64 addr) + return 0; + } + ++static inline void xskq_prod_write_addr(struct xsk_queue *q, u32 idx, u64 addr) ++{ ++ struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; ++ ++ ring->desc[idx & q->ring_mask] = addr; ++} ++ + static inline void xskq_prod_write_addr_batch(struct xsk_queue *q, struct xdp_desc *descs, + u32 nb_entries) + { +-- +2.51.0 + diff --git a/queue-6.6/bpf-tell-memcg-to-use-allow_spinning-false-path-in-b.patch b/queue-6.6/bpf-tell-memcg-to-use-allow_spinning-false-path-in-b.patch new file mode 100644 index 0000000000..2fab09a1ae --- /dev/null +++ b/queue-6.6/bpf-tell-memcg-to-use-allow_spinning-false-path-in-b.patch @@ -0,0 +1,92 @@ +From 2a4cc4e1a8b4995ff5a778a0844676080af81750 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 9 Sep 2025 09:52:20 +0000 +Subject: bpf: Tell memcg to use allow_spinning=false path in bpf_timer_init() + +From: Peilin Ye + +[ Upstream commit 6d78b4473cdb08b74662355a9e8510bde09c511e ] + +Currently, calling bpf_map_kmalloc_node() from __bpf_async_init() can +cause various locking issues; see the following stack trace (edited for +style) as one example: + +... + [10.011566] do_raw_spin_lock.cold + [10.011570] try_to_wake_up (5) double-acquiring the same + [10.011575] kick_pool rq_lock, causing a hardlockup + [10.011579] __queue_work + [10.011582] queue_work_on + [10.011585] kernfs_notify + [10.011589] cgroup_file_notify + [10.011593] try_charge_memcg (4) memcg accounting raises an + [10.011597] obj_cgroup_charge_pages MEMCG_MAX event + [10.011599] obj_cgroup_charge_account + [10.011600] __memcg_slab_post_alloc_hook + [10.011603] __kmalloc_node_noprof +... + [10.011611] bpf_map_kmalloc_node + [10.011612] __bpf_async_init + [10.011615] bpf_timer_init (3) BPF calls bpf_timer_init() + [10.011617] bpf_prog_xxxxxxxxxxxxxxxx_fcg_runnable + [10.011619] bpf__sched_ext_ops_runnable + [10.011620] enqueue_task_scx (2) BPF runs with rq_lock held + [10.011622] enqueue_task + [10.011626] ttwu_do_activate + [10.011629] sched_ttwu_pending (1) grabs rq_lock +... + +The above was reproduced on bpf-next (b338cf849ec8) by modifying +./tools/sched_ext/scx_flatcg.bpf.c to call bpf_timer_init() during +ops.runnable(), and hacking the memcg accounting code a bit to make +a bpf_timer_init() call more likely to raise an MEMCG_MAX event. + +We have also run into other similar variants (both internally and on +bpf-next), including double-acquiring cgroup_file_kn_lock, the same +worker_pool::lock, etc. + +As suggested by Shakeel, fix this by using __GFP_HIGH instead of +GFP_ATOMIC in __bpf_async_init(), so that e.g. if try_charge_memcg() +raises an MEMCG_MAX event, we call __memcg_memory_event() with +@allow_spinning=false and avoid calling cgroup_file_notify() there. + +Depends on mm patch +"memcg: skip cgroup_file_notify if spinning is not allowed": +https://lore.kernel.org/bpf/20250905201606.66198-1-shakeel.butt@linux.dev/ + +v0 approach s/bpf_map_kmalloc_node/bpf_mem_alloc/ +https://lore.kernel.org/bpf/20250905061919.439648-1-yepeilin@google.com/ +v1 approach: +https://lore.kernel.org/bpf/20250905234547.862249-1-yepeilin@google.com/ + +Fixes: b00628b1c7d5 ("bpf: Introduce bpf timers.") +Suggested-by: Shakeel Butt +Signed-off-by: Peilin Ye +Link: https://lore.kernel.org/r/20250909095222.2121438-1-yepeilin@google.com +Signed-off-by: Alexei Starovoitov +Signed-off-by: Sasha Levin +--- + kernel/bpf/helpers.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c +index 4b20a72ab8cff..90c281e1379ee 100644 +--- a/kernel/bpf/helpers.c ++++ b/kernel/bpf/helpers.c +@@ -1204,8 +1204,11 @@ static int __bpf_async_init(struct bpf_async_kern *async, struct bpf_map *map, u + goto out; + } + +- /* allocate hrtimer via map_kmalloc to use memcg accounting */ +- cb = bpf_map_kmalloc_node(map, size, GFP_ATOMIC, map->numa_node); ++ /* Allocate via bpf_map_kmalloc_node() for memcg accounting. Until ++ * kmalloc_nolock() is available, avoid locking issues by using ++ * __GFP_HIGH (GFP_ATOMIC & ~__GFP_RECLAIM). ++ */ ++ cb = bpf_map_kmalloc_node(map, size, __GFP_HIGH, map->numa_node); + if (!cb) { + ret = -ENOMEM; + goto out; +-- +2.51.0 + diff --git a/queue-6.6/flexfiles-pnfs-fix-null-checks-on-result-of-ff_layou.patch b/queue-6.6/flexfiles-pnfs-fix-null-checks-on-result-of-ff_layou.patch new file mode 100644 index 0000000000..ea93b0b7c5 --- /dev/null +++ b/queue-6.6/flexfiles-pnfs-fix-null-checks-on-result-of-ff_layou.patch @@ -0,0 +1,90 @@ +From 26b01313418881a673a0fdab986b6364157539f5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 28 Aug 2025 16:51:00 +0200 +Subject: flexfiles/pNFS: fix NULL checks on result of + ff_layout_choose_ds_for_read + +From: Tigran Mkrtchyan + +[ Upstream commit 5a46d2339a5ae268ede53a221f20433d8ea4f2f9 ] + +Recent commit f06bedfa62d5 ("pNFS/flexfiles: don't attempt pnfs on fatal DS +errors") has changed the error return type of ff_layout_choose_ds_for_read() from +NULL to an error pointer. However, not all code paths have been updated +to match the change. Thus, some non-NULL checks will accept error pointers +as a valid return value. + +Reported-by: Dan Carpenter +Suggested-by: Dan Carpenter +Fixes: f06bedfa62d5 ("pNFS/flexfiles: don't attempt pnfs on fatal DS errors") +Signed-off-by: Tigran Mkrtchyan +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + fs/nfs/flexfilelayout/flexfilelayout.c | 19 ++++++++++++------- + 1 file changed, 12 insertions(+), 7 deletions(-) + +diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c +index 7354b6b104783..b05dd4d3ed653 100644 +--- a/fs/nfs/flexfilelayout/flexfilelayout.c ++++ b/fs/nfs/flexfilelayout/flexfilelayout.c +@@ -756,8 +756,11 @@ ff_layout_choose_ds_for_read(struct pnfs_layout_segment *lseg, + continue; + + if (check_device && +- nfs4_test_deviceid_unavailable(&mirror->mirror_ds->id_node)) ++ nfs4_test_deviceid_unavailable(&mirror->mirror_ds->id_node)) { ++ // reinitialize the error state in case if this is the last iteration ++ ds = ERR_PTR(-EINVAL); + continue; ++ } + + *best_idx = idx; + break; +@@ -787,7 +790,7 @@ ff_layout_choose_best_ds_for_read(struct pnfs_layout_segment *lseg, + struct nfs4_pnfs_ds *ds; + + ds = ff_layout_choose_valid_ds_for_read(lseg, start_idx, best_idx); +- if (ds) ++ if (!IS_ERR(ds)) + return ds; + return ff_layout_choose_any_ds_for_read(lseg, start_idx, best_idx); + } +@@ -801,7 +804,7 @@ ff_layout_get_ds_for_read(struct nfs_pageio_descriptor *pgio, + + ds = ff_layout_choose_best_ds_for_read(lseg, pgio->pg_mirror_idx, + best_idx); +- if (ds || !pgio->pg_mirror_idx) ++ if (!IS_ERR(ds) || !pgio->pg_mirror_idx) + return ds; + return ff_layout_choose_best_ds_for_read(lseg, 0, best_idx); + } +@@ -859,7 +862,7 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio, + req->wb_nio = 0; + + ds = ff_layout_get_ds_for_read(pgio, &ds_idx); +- if (!ds) { ++ if (IS_ERR(ds)) { + if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg)) + goto out_mds; + pnfs_generic_pg_cleanup(pgio); +@@ -1063,11 +1066,13 @@ static void ff_layout_resend_pnfs_read(struct nfs_pgio_header *hdr) + { + u32 idx = hdr->pgio_mirror_idx + 1; + u32 new_idx = 0; ++ struct nfs4_pnfs_ds *ds; + +- if (ff_layout_choose_any_ds_for_read(hdr->lseg, idx, &new_idx)) +- ff_layout_send_layouterror(hdr->lseg); +- else ++ ds = ff_layout_choose_any_ds_for_read(hdr->lseg, idx, &new_idx); ++ if (IS_ERR(ds)) + pnfs_error_mark_layout_for_return(hdr->inode, hdr->lseg); ++ else ++ ff_layout_send_layouterror(hdr->lseg); + pnfs_read_resend_pnfs(hdr, new_idx); + } + +-- +2.51.0 + diff --git a/queue-6.6/fs-nfs-io-make-nfs_start_io_-killable.patch b/queue-6.6/fs-nfs-io-make-nfs_start_io_-killable.patch new file mode 100644 index 0000000000..2e21454c43 --- /dev/null +++ b/queue-6.6/fs-nfs-io-make-nfs_start_io_-killable.patch @@ -0,0 +1,222 @@ +From a5663a1ed1820c36bbf0465e3605464fa7847fb9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 21 Nov 2024 14:53:51 +0100 +Subject: fs/nfs/io: make nfs_start_io_*() killable + +From: Max Kellermann + +[ Upstream commit 38a125b31504f91bf6fdd3cfc3a3e9a721e6c97a ] + +This allows killing processes that wait for a lock when one process is +stuck waiting for the NFS server. This aims to complete the coverage +of NFS operations being killable, like nfs_direct_wait() does, for +example. + +Signed-off-by: Max Kellermann +Signed-off-by: Trond Myklebust +Stable-dep-of: 9eb90f435415 ("NFS: Serialise O_DIRECT i/o and truncate()") +Signed-off-by: Sasha Levin +--- + fs/nfs/direct.c | 21 ++++++++++++++++++--- + fs/nfs/file.c | 14 +++++++++++--- + fs/nfs/internal.h | 7 ++++--- + fs/nfs/io.c | 44 +++++++++++++++++++++++++++++++++----------- + 4 files changed, 66 insertions(+), 20 deletions(-) + +diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c +index a1ff4a4f5380e..4e53708dfcf43 100644 +--- a/fs/nfs/direct.c ++++ b/fs/nfs/direct.c +@@ -469,8 +469,16 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter, + if (user_backed_iter(iter)) + dreq->flags = NFS_ODIRECT_SHOULD_DIRTY; + +- if (!swap) +- nfs_start_io_direct(inode); ++ if (!swap) { ++ result = nfs_start_io_direct(inode); ++ if (result) { ++ /* release the reference that would usually be ++ * consumed by nfs_direct_read_schedule_iovec() ++ */ ++ nfs_direct_req_release(dreq); ++ goto out_release; ++ } ++ } + + NFS_I(inode)->read_io += count; + requested = nfs_direct_read_schedule_iovec(dreq, iter, iocb->ki_pos); +@@ -1023,7 +1031,14 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter, + requested = nfs_direct_write_schedule_iovec(dreq, iter, pos, + FLUSH_STABLE); + } else { +- nfs_start_io_direct(inode); ++ result = nfs_start_io_direct(inode); ++ if (result) { ++ /* release the reference that would usually be ++ * consumed by nfs_direct_write_schedule_iovec() ++ */ ++ nfs_direct_req_release(dreq); ++ goto out_release; ++ } + + requested = nfs_direct_write_schedule_iovec(dreq, iter, pos, + FLUSH_COND_STABLE); +diff --git a/fs/nfs/file.c b/fs/nfs/file.c +index 003dda0018403..2f4db026f8d67 100644 +--- a/fs/nfs/file.c ++++ b/fs/nfs/file.c +@@ -167,7 +167,10 @@ nfs_file_read(struct kiocb *iocb, struct iov_iter *to) + iocb->ki_filp, + iov_iter_count(to), (unsigned long) iocb->ki_pos); + +- nfs_start_io_read(inode); ++ result = nfs_start_io_read(inode); ++ if (result) ++ return result; ++ + result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping); + if (!result) { + result = generic_file_read_iter(iocb, to); +@@ -188,7 +191,10 @@ nfs_file_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe + + dprintk("NFS: splice_read(%pD2, %zu@%llu)\n", in, len, *ppos); + +- nfs_start_io_read(inode); ++ result = nfs_start_io_read(inode); ++ if (result) ++ return result; ++ + result = nfs_revalidate_mapping(inode, in->f_mapping); + if (!result) { + result = filemap_splice_read(in, ppos, pipe, len, flags); +@@ -668,7 +674,9 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from) + nfs_clear_invalid_mapping(file->f_mapping); + + since = filemap_sample_wb_err(file->f_mapping); +- nfs_start_io_write(inode); ++ error = nfs_start_io_write(inode); ++ if (error) ++ return error; + result = generic_write_checks(iocb, from); + if (result > 0) + result = generic_perform_write(iocb, from); +diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h +index 4eea91d054b24..e78f43a137231 100644 +--- a/fs/nfs/internal.h ++++ b/fs/nfs/internal.h +@@ -6,6 +6,7 @@ + #include "nfs4_fs.h" + #include + #include ++#include + #include + #include + #include +@@ -461,11 +462,11 @@ extern const struct netfs_request_ops nfs_netfs_ops; + #endif + + /* io.c */ +-extern void nfs_start_io_read(struct inode *inode); ++extern __must_check int nfs_start_io_read(struct inode *inode); + extern void nfs_end_io_read(struct inode *inode); +-extern void nfs_start_io_write(struct inode *inode); ++extern __must_check int nfs_start_io_write(struct inode *inode); + extern void nfs_end_io_write(struct inode *inode); +-extern void nfs_start_io_direct(struct inode *inode); ++extern __must_check int nfs_start_io_direct(struct inode *inode); + extern void nfs_end_io_direct(struct inode *inode); + + static inline bool nfs_file_io_is_buffered(struct nfs_inode *nfsi) +diff --git a/fs/nfs/io.c b/fs/nfs/io.c +index b5551ed8f648b..3388faf2acb9f 100644 +--- a/fs/nfs/io.c ++++ b/fs/nfs/io.c +@@ -39,19 +39,28 @@ static void nfs_block_o_direct(struct nfs_inode *nfsi, struct inode *inode) + * Note that buffered writes and truncates both take a write lock on + * inode->i_rwsem, meaning that those are serialised w.r.t. the reads. + */ +-void ++int + nfs_start_io_read(struct inode *inode) + { + struct nfs_inode *nfsi = NFS_I(inode); ++ int err; ++ + /* Be an optimist! */ +- down_read(&inode->i_rwsem); ++ err = down_read_killable(&inode->i_rwsem); ++ if (err) ++ return err; + if (test_bit(NFS_INO_ODIRECT, &nfsi->flags) == 0) +- return; ++ return 0; + up_read(&inode->i_rwsem); ++ + /* Slow path.... */ +- down_write(&inode->i_rwsem); ++ err = down_write_killable(&inode->i_rwsem); ++ if (err) ++ return err; + nfs_block_o_direct(nfsi, inode); + downgrade_write(&inode->i_rwsem); ++ ++ return 0; + } + + /** +@@ -74,11 +83,15 @@ nfs_end_io_read(struct inode *inode) + * Declare that a buffered read operation is about to start, and ensure + * that we block all direct I/O. + */ +-void ++int + nfs_start_io_write(struct inode *inode) + { +- down_write(&inode->i_rwsem); +- nfs_block_o_direct(NFS_I(inode), inode); ++ int err; ++ ++ err = down_write_killable(&inode->i_rwsem); ++ if (!err) ++ nfs_block_o_direct(NFS_I(inode), inode); ++ return err; + } + + /** +@@ -119,19 +132,28 @@ static void nfs_block_buffered(struct nfs_inode *nfsi, struct inode *inode) + * Note that buffered writes and truncates both take a write lock on + * inode->i_rwsem, meaning that those are serialised w.r.t. O_DIRECT. + */ +-void ++int + nfs_start_io_direct(struct inode *inode) + { + struct nfs_inode *nfsi = NFS_I(inode); ++ int err; ++ + /* Be an optimist! */ +- down_read(&inode->i_rwsem); ++ err = down_read_killable(&inode->i_rwsem); ++ if (err) ++ return err; + if (test_bit(NFS_INO_ODIRECT, &nfsi->flags) != 0) +- return; ++ return 0; + up_read(&inode->i_rwsem); ++ + /* Slow path.... */ +- down_write(&inode->i_rwsem); ++ err = down_write_killable(&inode->i_rwsem); ++ if (err) ++ return err; + nfs_block_buffered(nfsi, inode); + downgrade_write(&inode->i_rwsem); ++ ++ return 0; + } + + /** +-- +2.51.0 + diff --git a/queue-6.6/ftrace-samples-fix-function-size-computation.patch b/queue-6.6/ftrace-samples-fix-function-size-computation.patch new file mode 100644 index 0000000000..8ac54d21d2 --- /dev/null +++ b/queue-6.6/ftrace-samples-fix-function-size-computation.patch @@ -0,0 +1,38 @@ +From 33326afc84b29b807f042e1d17a672db1eaeb816 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 26 Aug 2025 18:16:46 +0200 +Subject: ftrace/samples: Fix function size computation + +From: Vladimir Riabchun + +[ Upstream commit 80d03a40837a9b26750a25122b906c052cc846c9 ] + +In my_tramp1 function .size directive was placed above +ASM_RET instruction, leading to a wrong function size. + +Link: https://lore.kernel.org/aK3d7vxNcO52kEmg@vova-pc +Fixes: 9d907f1ae80b ("samples/ftrace: Fix asm function ELF annotations") +Signed-off-by: Vladimir Riabchun +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Sasha Levin +--- + samples/ftrace/ftrace-direct-modify.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/samples/ftrace/ftrace-direct-modify.c b/samples/ftrace/ftrace-direct-modify.c +index e2a6a69352dfb..b40f85e3806fc 100644 +--- a/samples/ftrace/ftrace-direct-modify.c ++++ b/samples/ftrace/ftrace-direct-modify.c +@@ -40,8 +40,8 @@ asm ( + CALL_DEPTH_ACCOUNT + " call my_direct_func1\n" + " leave\n" +-" .size my_tramp1, .-my_tramp1\n" + ASM_RET ++" .size my_tramp1, .-my_tramp1\n" + + " .type my_tramp2, @function\n" + " .globl my_tramp2\n" +-- +2.51.0 + diff --git a/queue-6.6/nfs-serialise-o_direct-i-o-and-truncate.patch b/queue-6.6/nfs-serialise-o_direct-i-o-and-truncate.patch new file mode 100644 index 0000000000..28077f033b --- /dev/null +++ b/queue-6.6/nfs-serialise-o_direct-i-o-and-truncate.patch @@ -0,0 +1,100 @@ +From 7aeda370440125daa5e8a7c2c28e9d0e30699784 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 5 Sep 2025 12:06:23 -0400 +Subject: NFS: Serialise O_DIRECT i/o and truncate() + +From: Trond Myklebust + +[ Upstream commit 9eb90f435415c7da4800974ed943e39b5578ee7f ] + +Ensure that all O_DIRECT reads and writes are complete, and prevent the +initiation of new i/o until the setattr operation that will truncate the +file is complete. + +Fixes: a5864c999de6 ("NFS: Do not serialise O_DIRECT reads and writes") +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + fs/nfs/inode.c | 4 +++- + fs/nfs/internal.h | 10 ++++++++++ + fs/nfs/io.c | 13 ++----------- + 3 files changed, 15 insertions(+), 12 deletions(-) + +diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c +index 7e7dd2aab449d..5cd5e4226db36 100644 +--- a/fs/nfs/inode.c ++++ b/fs/nfs/inode.c +@@ -645,8 +645,10 @@ nfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, + trace_nfs_setattr_enter(inode); + + /* Write all dirty data */ +- if (S_ISREG(inode->i_mode)) ++ if (S_ISREG(inode->i_mode)) { ++ nfs_file_block_o_direct(NFS_I(inode)); + nfs_sync_inode(inode); ++ } + + fattr = nfs_alloc_fattr_with_label(NFS_SERVER(inode)); + if (fattr == NULL) { +diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h +index e78f43a137231..bde81e0abf0ae 100644 +--- a/fs/nfs/internal.h ++++ b/fs/nfs/internal.h +@@ -474,6 +474,16 @@ static inline bool nfs_file_io_is_buffered(struct nfs_inode *nfsi) + return test_bit(NFS_INO_ODIRECT, &nfsi->flags) == 0; + } + ++/* Must be called with exclusively locked inode->i_rwsem */ ++static inline void nfs_file_block_o_direct(struct nfs_inode *nfsi) ++{ ++ if (test_bit(NFS_INO_ODIRECT, &nfsi->flags)) { ++ clear_bit(NFS_INO_ODIRECT, &nfsi->flags); ++ inode_dio_wait(&nfsi->vfs_inode); ++ } ++} ++ ++ + /* namespace.c */ + #define NFS_PATH_CANONICAL 1 + extern char *nfs_path(char **p, struct dentry *dentry, +diff --git a/fs/nfs/io.c b/fs/nfs/io.c +index 3388faf2acb9f..d275b0a250bf3 100644 +--- a/fs/nfs/io.c ++++ b/fs/nfs/io.c +@@ -14,15 +14,6 @@ + + #include "internal.h" + +-/* Call with exclusively locked inode->i_rwsem */ +-static void nfs_block_o_direct(struct nfs_inode *nfsi, struct inode *inode) +-{ +- if (test_bit(NFS_INO_ODIRECT, &nfsi->flags)) { +- clear_bit(NFS_INO_ODIRECT, &nfsi->flags); +- inode_dio_wait(inode); +- } +-} +- + /** + * nfs_start_io_read - declare the file is being used for buffered reads + * @inode: file inode +@@ -57,7 +48,7 @@ nfs_start_io_read(struct inode *inode) + err = down_write_killable(&inode->i_rwsem); + if (err) + return err; +- nfs_block_o_direct(nfsi, inode); ++ nfs_file_block_o_direct(nfsi); + downgrade_write(&inode->i_rwsem); + + return 0; +@@ -90,7 +81,7 @@ nfs_start_io_write(struct inode *inode) + + err = down_write_killable(&inode->i_rwsem); + if (!err) +- nfs_block_o_direct(NFS_I(inode), inode); ++ nfs_file_block_o_direct(NFS_I(inode)); + return err; + } + +-- +2.51.0 + diff --git a/queue-6.6/nfsv4-clear-the-nfs_cap_fs_locations-flag-if-it-is-n.patch b/queue-6.6/nfsv4-clear-the-nfs_cap_fs_locations-flag-if-it-is-n.patch new file mode 100644 index 0000000000..afb2021437 --- /dev/null +++ b/queue-6.6/nfsv4-clear-the-nfs_cap_fs_locations-flag-if-it-is-n.patch @@ -0,0 +1,38 @@ +From 9398b1e561d2ae917939617cee45d6976bd56fa7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 29 Aug 2025 09:07:22 -0700 +Subject: NFSv4: Clear the NFS_CAP_FS_LOCATIONS flag if it is not set + +From: Trond Myklebust + +[ Upstream commit dd5a8621b886b02f8341c5d4ea68eb2c552ebd3e ] + +_nfs4_server_capabilities() is expected to clear any flags that are not +supported by the server. + +Fixes: 8a59bb93b7e3 ("NFSv4 store server support for fs_location attribute") +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + fs/nfs/nfs4proc.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c +index bc1eaabaf2c30..124b9cee6fed7 100644 +--- a/fs/nfs/nfs4proc.c ++++ b/fs/nfs/nfs4proc.c +@@ -3882,8 +3882,9 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f + res.attr_bitmask[2] &= FATTR4_WORD2_NFS42_MASK; + } + memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask)); +- server->caps &= ~(NFS_CAP_ACLS | NFS_CAP_HARDLINKS | +- NFS_CAP_SYMLINKS| NFS_CAP_SECURITY_LABEL); ++ server->caps &= ++ ~(NFS_CAP_ACLS | NFS_CAP_HARDLINKS | NFS_CAP_SYMLINKS | ++ NFS_CAP_SECURITY_LABEL | NFS_CAP_FS_LOCATIONS); + server->fattr_valid = NFS_ATTR_FATTR_V4; + if (res.attr_bitmask[0] & FATTR4_WORD0_ACL && + res.acl_bitmask & ACL4_SUPPORT_ALLOW_ACL) +-- +2.51.0 + diff --git a/queue-6.6/nfsv4-clear-the-nfs_cap_xattr-flag-if-not-supported-.patch b/queue-6.6/nfsv4-clear-the-nfs_cap_xattr-flag-if-not-supported-.patch new file mode 100644 index 0000000000..d299e3b1f4 --- /dev/null +++ b/queue-6.6/nfsv4-clear-the-nfs_cap_xattr-flag-if-not-supported-.patch @@ -0,0 +1,35 @@ +From 463ce18eda5744e7ce82599d88b6bdce84a5eb7f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 29 Aug 2025 09:15:12 -0700 +Subject: NFSv4: Clear the NFS_CAP_XATTR flag if not supported by the server + +From: Trond Myklebust + +[ Upstream commit 4fb2b677fc1f70ee642c0beecc3cabf226ef5707 ] + +nfs_server_set_fsinfo() shouldn't assume that NFS_CAP_XATTR is unset +on entry to the function. + +Fixes: b78ef845c35d ("NFSv4.2: query the server for extended attribute support") +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + fs/nfs/client.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/fs/nfs/client.c b/fs/nfs/client.c +index cc764da581c43..1bcdaee7e856f 100644 +--- a/fs/nfs/client.c ++++ b/fs/nfs/client.c +@@ -873,6 +873,8 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, + + if (fsinfo->xattr_support) + server->caps |= NFS_CAP_XATTR; ++ else ++ server->caps &= ~NFS_CAP_XATTR; + #endif + } + +-- +2.51.0 + diff --git a/queue-6.6/nfsv4-don-t-clear-capabilities-that-won-t-be-reset.patch b/queue-6.6/nfsv4-don-t-clear-capabilities-that-won-t-be-reset.patch new file mode 100644 index 0000000000..a9e5a242c4 --- /dev/null +++ b/queue-6.6/nfsv4-don-t-clear-capabilities-that-won-t-be-reset.patch @@ -0,0 +1,35 @@ +From 39dd014f96db82fedbc0fe2e0a64a40035e43539 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 29 Aug 2025 09:02:16 -0700 +Subject: NFSv4: Don't clear capabilities that won't be reset + +From: Trond Myklebust + +[ Upstream commit 31f1a960ad1a14def94fa0b8c25d62b4c032813f ] + +Don't clear the capabilities that are not going to get reset by the call +to _nfs4_server_capabilities(). + +Reported-by: Scott Haiden +Fixes: b01f21cacde9 ("NFS: Fix the setting of capabilities when automounting a new filesystem") +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + fs/nfs/nfs4proc.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c +index 6debcfc63222d..bc1eaabaf2c30 100644 +--- a/fs/nfs/nfs4proc.c ++++ b/fs/nfs/nfs4proc.c +@@ -3951,7 +3951,6 @@ int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) + }; + int err; + +- nfs_server_set_init_caps(server); + do { + err = nfs4_handle_exception(server, + _nfs4_server_capabilities(server, fhandle), +-- +2.51.0 + diff --git a/queue-6.6/nfsv4-flexfiles-fix-layout-merge-mirror-check.patch b/queue-6.6/nfsv4-flexfiles-fix-layout-merge-mirror-check.patch new file mode 100644 index 0000000000..64b341d324 --- /dev/null +++ b/queue-6.6/nfsv4-flexfiles-fix-layout-merge-mirror-check.patch @@ -0,0 +1,42 @@ +From dcb8da897ba8075b75bf40eb3a0fb6bb93f3ca89 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 8 Sep 2025 17:35:16 +0000 +Subject: NFSv4/flexfiles: Fix layout merge mirror check. + +From: Jonathan Curley + +[ Upstream commit dd2fa82473453661d12723c46c9f43d9876a7efd ] + +Typo in ff_lseg_match_mirrors makes the diff ineffective. This results +in merge happening all the time. Merge happening all the time is +problematic because it marks lsegs invalid. Marking lsegs invalid +causes all outstanding IO to get restarted with EAGAIN and connections +to get closed. + +Closing connections constantly triggers race conditions in the RDMA +implementation... + +Fixes: 660d1eb22301c ("pNFS/flexfile: Don't merge layout segments if the mirrors don't match") +Signed-off-by: Jonathan Curley +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + fs/nfs/flexfilelayout/flexfilelayout.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c +index b05dd4d3ed653..42c73c647a27f 100644 +--- a/fs/nfs/flexfilelayout/flexfilelayout.c ++++ b/fs/nfs/flexfilelayout/flexfilelayout.c +@@ -276,7 +276,7 @@ ff_lseg_match_mirrors(struct pnfs_layout_segment *l1, + struct pnfs_layout_segment *l2) + { + const struct nfs4_ff_layout_segment *fl1 = FF_LAYOUT_LSEG(l1); +- const struct nfs4_ff_layout_segment *fl2 = FF_LAYOUT_LSEG(l1); ++ const struct nfs4_ff_layout_segment *fl2 = FF_LAYOUT_LSEG(l2); + u32 i; + + if (fl1->mirror_array_cnt != fl2->mirror_array_cnt) +-- +2.51.0 + diff --git a/queue-6.6/nfsv4.2-serialise-o_direct-i-o-and-clone-range.patch b/queue-6.6/nfsv4.2-serialise-o_direct-i-o-and-clone-range.patch new file mode 100644 index 0000000000..7ae181acb7 --- /dev/null +++ b/queue-6.6/nfsv4.2-serialise-o_direct-i-o-and-clone-range.patch @@ -0,0 +1,38 @@ +From b6f58eda64781ea911e16bf09897182502a3e127 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 6 Sep 2025 10:40:24 -0400 +Subject: NFSv4.2: Serialise O_DIRECT i/o and clone range + +From: Trond Myklebust + +[ Upstream commit c80ebeba1198eac8811ab0dba36ecc13d51e4438 ] + +Ensure that all O_DIRECT reads and writes complete before cloning a file +range, so that both the source and destination are up to date. + +Fixes: a5864c999de6 ("NFS: Do not serialise O_DIRECT reads and writes") +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + fs/nfs/nfs4file.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c +index 02788c3c85e5b..befdb0f4e6dc3 100644 +--- a/fs/nfs/nfs4file.c ++++ b/fs/nfs/nfs4file.c +@@ -282,9 +282,11 @@ static loff_t nfs42_remap_file_range(struct file *src_file, loff_t src_off, + + /* flush all pending writes on both src and dst so that server + * has the latest data */ ++ nfs_file_block_o_direct(NFS_I(src_inode)); + ret = nfs_sync_inode(src_inode); + if (ret) + goto out_unlock; ++ nfs_file_block_o_direct(NFS_I(dst_inode)); + ret = nfs_sync_inode(dst_inode); + if (ret) + goto out_unlock; +-- +2.51.0 + diff --git a/queue-6.6/nfsv4.2-serialise-o_direct-i-o-and-copy-range.patch b/queue-6.6/nfsv4.2-serialise-o_direct-i-o-and-copy-range.patch new file mode 100644 index 0000000000..dbfeb6c089 --- /dev/null +++ b/queue-6.6/nfsv4.2-serialise-o_direct-i-o-and-copy-range.patch @@ -0,0 +1,34 @@ +From f90728ceb011c8195936f6fa0ae1e71747629514 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 6 Sep 2025 10:54:13 -0400 +Subject: NFSv4.2: Serialise O_DIRECT i/o and copy range + +From: Trond Myklebust + +[ Upstream commit ca247c89900ae90207f4d321e260cd93b7c7d104 ] + +Ensure that all O_DIRECT reads and writes complete before copying a file +range, so that the destination is up to date. + +Fixes: a5864c999de6 ("NFS: Do not serialise O_DIRECT reads and writes") +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + fs/nfs/nfs42proc.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c +index 66fe885fc19a1..582cf8a469560 100644 +--- a/fs/nfs/nfs42proc.c ++++ b/fs/nfs/nfs42proc.c +@@ -356,6 +356,7 @@ static ssize_t _nfs42_proc_copy(struct file *src, + return status; + } + ++ nfs_file_block_o_direct(NFS_I(dst_inode)); + status = nfs_sync_inode(dst_inode); + if (status) + return status; +-- +2.51.0 + diff --git a/queue-6.6/nfsv4.2-serialise-o_direct-i-o-and-fallocate.patch b/queue-6.6/nfsv4.2-serialise-o_direct-i-o-and-fallocate.patch new file mode 100644 index 0000000000..4329585c5e --- /dev/null +++ b/queue-6.6/nfsv4.2-serialise-o_direct-i-o-and-fallocate.patch @@ -0,0 +1,34 @@ +From b75b5e33d932b31718768ee5d6ed95053a8f46ee Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 5 Sep 2025 12:11:17 -0400 +Subject: NFSv4.2: Serialise O_DIRECT i/o and fallocate() + +From: Trond Myklebust + +[ Upstream commit b93128f29733af5d427a335978a19884c2c230e2 ] + +Ensure that all O_DIRECT reads and writes complete before calling +fallocate so that we don't race w.r.t. attribute updates. + +Fixes: 99f237832243 ("NFSv4.2: Always flush out writes in nfs42_proc_fallocate()") +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + fs/nfs/nfs42proc.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c +index 9f0d69e652644..66fe885fc19a1 100644 +--- a/fs/nfs/nfs42proc.c ++++ b/fs/nfs/nfs42proc.c +@@ -112,6 +112,7 @@ static int nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep, + exception.inode = inode; + exception.state = lock->open_context->state; + ++ nfs_file_block_o_direct(NFS_I(inode)); + err = nfs_sync_inode(inode); + if (err) + goto out; +-- +2.51.0 + diff --git a/queue-6.6/proc-fix-type-confusion-in-pde_set_flags.patch b/queue-6.6/proc-fix-type-confusion-in-pde_set_flags.patch new file mode 100644 index 0000000000..bb9ee5a439 --- /dev/null +++ b/queue-6.6/proc-fix-type-confusion-in-pde_set_flags.patch @@ -0,0 +1,56 @@ +From 90da140a260992605f344620607ae985a529f8d4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 4 Sep 2025 21:57:15 +0800 +Subject: proc: fix type confusion in pde_set_flags() + +From: wangzijie + +[ Upstream commit 0ce9398aa0830f15f92bbed73853f9861c3e74ff ] + +Commit 2ce3d282bd50 ("proc: fix missing pde_set_flags() for net proc +files") missed a key part in the definition of proc_dir_entry: + +union { + const struct proc_ops *proc_ops; + const struct file_operations *proc_dir_ops; +}; + +So dereference of ->proc_ops assumes it is a proc_ops structure results in +type confusion and make NULL check for 'proc_ops' not work for proc dir. + +Add !S_ISDIR(dp->mode) test before calling pde_set_flags() to fix it. + +Link: https://lkml.kernel.org/r/20250904135715.3972782-1-wangzijie1@honor.com +Fixes: 2ce3d282bd50 ("proc: fix missing pde_set_flags() for net proc files") +Signed-off-by: wangzijie +Reported-by: Brad Spengler +Closes: https://lore.kernel.org/all/20250903065758.3678537-1-wangzijie1@honor.com/ +Cc: Alexey Dobriyan +Cc: Al Viro +Cc: Christian Brauner +Cc: Jiri Slaby +Cc: Stefano Brivio +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Sasha Levin +--- + fs/proc/generic.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/fs/proc/generic.c b/fs/proc/generic.c +index db3f2c6abc162..4cadd2fd23d8f 100644 +--- a/fs/proc/generic.c ++++ b/fs/proc/generic.c +@@ -388,7 +388,8 @@ struct proc_dir_entry *proc_register(struct proc_dir_entry *dir, + if (proc_alloc_inum(&dp->low_ino)) + goto out_free_entry; + +- pde_set_flags(dp); ++ if (!S_ISDIR(dp->mode)) ++ pde_set_flags(dp); + + write_lock(&proc_subdir_lock); + dp->parent = dir; +-- +2.51.0 + diff --git a/queue-6.6/s390-cpum_cf-deny-all-sampling-events-by-counter-pmu.patch b/queue-6.6/s390-cpum_cf-deny-all-sampling-events-by-counter-pmu.patch new file mode 100644 index 0000000000..451d218faa --- /dev/null +++ b/queue-6.6/s390-cpum_cf-deny-all-sampling-events-by-counter-pmu.patch @@ -0,0 +1,50 @@ +From ab57bd2162a2ad0379ee1793e36c72784b59df28 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 22 Aug 2025 14:05:57 +0200 +Subject: s390/cpum_cf: Deny all sampling events by counter PMU + +From: Thomas Richter + +[ Upstream commit ce971233242b5391d99442271f3ca096fb49818d ] + +Deny all sampling event by the CPUMF counter facility device driver +and return -ENOENT. This return value is used to try other PMUs. +Up to now events for type PERF_TYPE_HARDWARE were not tested for +sampling and returned later on -EOPNOTSUPP. This ends the search +for alternative PMUs. Change that behavior and try other PMUs +instead. + +Fixes: 613a41b0d16e ("s390/cpum_cf: Reject request for sampling in event initialization") +Acked-by: Sumanth Korikkar +Signed-off-by: Thomas Richter +Signed-off-by: Alexander Gordeev +Signed-off-by: Sasha Levin +--- + arch/s390/kernel/perf_cpum_cf.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c +index 65a66df5bb865..771e1cb17540d 100644 +--- a/arch/s390/kernel/perf_cpum_cf.c ++++ b/arch/s390/kernel/perf_cpum_cf.c +@@ -757,8 +757,6 @@ static int __hw_perf_event_init(struct perf_event *event, unsigned int type) + break; + + case PERF_TYPE_HARDWARE: +- if (is_sampling_event(event)) /* No sampling support */ +- return -ENOENT; + ev = attr->config; + if (!attr->exclude_user && attr->exclude_kernel) { + /* +@@ -856,6 +854,8 @@ static int cpumf_pmu_event_init(struct perf_event *event) + unsigned int type = event->attr.type; + int err; + ++ if (is_sampling_event(event)) /* No sampling support */ ++ return err; + if (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_RAW) + err = __hw_perf_event_init(event, type); + else if (event->pmu->type == type) +-- +2.51.0 + diff --git a/queue-6.6/series b/queue-6.6/series index 97e8dc1e5a..ada523c485 100644 --- a/queue-6.6/series +++ b/queue-6.6/series @@ -6,3 +6,21 @@ nfsd-nfsd_unlink-clobbers-non-zero-status-returned-from-fh_fill_pre_attrs.patch media-i2c-imx214-fix-link-frequency-validation.patch net-fix-null-ptr-deref-by-sock_lock_init_class_and_name-and-rmmod.patch ima-limit-the-number-of-tomtou-integrity-violations.patch +flexfiles-pnfs-fix-null-checks-on-result-of-ff_layou.patch +sunrpc-call-xs_sock_process_cmsg-for-all-cmsg.patch +nfsv4-don-t-clear-capabilities-that-won-t-be-reset.patch +nfsv4-clear-the-nfs_cap_fs_locations-flag-if-it-is-n.patch +nfsv4-clear-the-nfs_cap_xattr-flag-if-not-supported-.patch +tracing-fix-tracing_marker-may-trigger-page-fault-du.patch +ftrace-samples-fix-function-size-computation.patch +fs-nfs-io-make-nfs_start_io_-killable.patch +nfs-serialise-o_direct-i-o-and-truncate.patch +nfsv4.2-serialise-o_direct-i-o-and-fallocate.patch +nfsv4.2-serialise-o_direct-i-o-and-clone-range.patch +nfsv4.2-serialise-o_direct-i-o-and-copy-range.patch +nfsv4-flexfiles-fix-layout-merge-mirror-check.patch +tracing-silence-warning-when-chunk-allocation-fails-.patch +s390-cpum_cf-deny-all-sampling-events-by-counter-pmu.patch +bpf-tell-memcg-to-use-allow_spinning-false-path-in-b.patch +tcp_bpf-call-sk_msg_free-when-tcp_bpf_send_verdict-f.patch +proc-fix-type-confusion-in-pde_set_flags.patch diff --git a/queue-6.6/sunrpc-call-xs_sock_process_cmsg-for-all-cmsg.patch b/queue-6.6/sunrpc-call-xs_sock_process_cmsg-for-all-cmsg.patch new file mode 100644 index 0000000000..4fe10894f4 --- /dev/null +++ b/queue-6.6/sunrpc-call-xs_sock_process_cmsg-for-all-cmsg.patch @@ -0,0 +1,48 @@ +From 0f3fd2cef71e8bfe3c11e06bb331ab26ec5e005e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 4 Sep 2025 16:09:57 -0500 +Subject: SUNRPC: call xs_sock_process_cmsg for all cmsg + +From: Justin Worrell + +[ Upstream commit 9559d2fffd4f9b892165eed48198a0e5cb8504e6 ] + +xs_sock_recv_cmsg was failing to call xs_sock_process_cmsg for any cmsg +type other than TLS_RECORD_TYPE_ALERT (TLS_RECORD_TYPE_DATA, and other +values not handled.) Based on my reading of the previous commit +(cc5d5908: sunrpc: fix client side handling of tls alerts), it looks +like only iov_iter_revert should be conditional on TLS_RECORD_TYPE_ALERT +(but that other cmsg types should still call xs_sock_process_cmsg). On +my machine, I was unable to connect (over mtls) to an NFS share hosted +on FreeBSD. With this patch applied, I am able to mount the share again. + +Fixes: cc5d59081fa2 ("sunrpc: fix client side handling of tls alerts") +Signed-off-by: Justin Worrell +Reviewed-and-tested-by: Scott Mayhew +Link: https://lore.kernel.org/r/20250904211038.12874-3-jworrell@gmail.com +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + net/sunrpc/xprtsock.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c +index 8b27a21f3b42d..3660ef2647112 100644 +--- a/net/sunrpc/xprtsock.c ++++ b/net/sunrpc/xprtsock.c +@@ -407,9 +407,9 @@ xs_sock_recv_cmsg(struct socket *sock, unsigned int *msg_flags, int flags) + iov_iter_kvec(&msg.msg_iter, ITER_DEST, &alert_kvec, 1, + alert_kvec.iov_len); + ret = sock_recvmsg(sock, &msg, flags); +- if (ret > 0 && +- tls_get_record_type(sock->sk, &u.cmsg) == TLS_RECORD_TYPE_ALERT) { +- iov_iter_revert(&msg.msg_iter, ret); ++ if (ret > 0) { ++ if (tls_get_record_type(sock->sk, &u.cmsg) == TLS_RECORD_TYPE_ALERT) ++ iov_iter_revert(&msg.msg_iter, ret); + ret = xs_sock_process_cmsg(sock, &msg, msg_flags, &u.cmsg, + -EAGAIN); + } +-- +2.51.0 + diff --git a/queue-6.6/tcp_bpf-call-sk_msg_free-when-tcp_bpf_send_verdict-f.patch b/queue-6.6/tcp_bpf-call-sk_msg_free-when-tcp_bpf_send_verdict-f.patch new file mode 100644 index 0000000000..afe11e825d --- /dev/null +++ b/queue-6.6/tcp_bpf-call-sk_msg_free-when-tcp_bpf_send_verdict-f.patch @@ -0,0 +1,98 @@ +From ae59fb3596e0289dc95a340d80545cad696252ab Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 9 Sep 2025 23:26:12 +0000 +Subject: tcp_bpf: Call sk_msg_free() when tcp_bpf_send_verdict() fails to + allocate psock->cork. + +From: Kuniyuki Iwashima + +[ Upstream commit a3967baad4d533dc254c31e0d221e51c8d223d58 ] + +syzbot reported the splat below. [0] + +The repro does the following: + + 1. Load a sk_msg prog that calls bpf_msg_cork_bytes(msg, cork_bytes) + 2. Attach the prog to a SOCKMAP + 3. Add a socket to the SOCKMAP + 4. Activate fault injection + 5. Send data less than cork_bytes + +At 5., the data is carried over to the next sendmsg() as it is +smaller than the cork_bytes specified by bpf_msg_cork_bytes(). + +Then, tcp_bpf_send_verdict() tries to allocate psock->cork to hold +the data, but this fails silently due to fault injection + __GFP_NOWARN. + +If the allocation fails, we need to revert the sk->sk_forward_alloc +change done by sk_msg_alloc(). + +Let's call sk_msg_free() when tcp_bpf_send_verdict fails to allocate +psock->cork. + +The "*copied" also needs to be updated such that a proper error can +be returned to the caller, sendmsg. It fails to allocate psock->cork. +Nothing has been corked so far, so this patch simply sets "*copied" +to 0. + +[0]: +WARNING: net/ipv4/af_inet.c:156 at inet_sock_destruct+0x623/0x730 net/ipv4/af_inet.c:156, CPU#1: syz-executor/5983 +Modules linked in: +CPU: 1 UID: 0 PID: 5983 Comm: syz-executor Not tainted syzkaller #0 PREEMPT(full) +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/12/2025 +RIP: 0010:inet_sock_destruct+0x623/0x730 net/ipv4/af_inet.c:156 +Code: 0f 0b 90 e9 62 fe ff ff e8 7a db b5 f7 90 0f 0b 90 e9 95 fe ff ff e8 6c db b5 f7 90 0f 0b 90 e9 bb fe ff ff e8 5e db b5 f7 90 <0f> 0b 90 e9 e1 fe ff ff 89 f9 80 e1 07 80 c1 03 38 c1 0f 8c 9f fc +RSP: 0018:ffffc90000a08b48 EFLAGS: 00010246 +RAX: ffffffff8a09d0b2 RBX: dffffc0000000000 RCX: ffff888024a23c80 +RDX: 0000000000000100 RSI: 0000000000000fff RDI: 0000000000000000 +RBP: 0000000000000fff R08: ffff88807e07c627 R09: 1ffff1100fc0f8c4 +R10: dffffc0000000000 R11: ffffed100fc0f8c5 R12: ffff88807e07c380 +R13: dffffc0000000000 R14: ffff88807e07c60c R15: 1ffff1100fc0f872 +FS: 00005555604c4500(0000) GS:ffff888125af1000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 00005555604df5c8 CR3: 0000000032b06000 CR4: 00000000003526f0 +Call Trace: + + __sk_destruct+0x86/0x660 net/core/sock.c:2339 + rcu_do_batch kernel/rcu/tree.c:2605 [inline] + rcu_core+0xca8/0x1770 kernel/rcu/tree.c:2861 + handle_softirqs+0x286/0x870 kernel/softirq.c:579 + __do_softirq kernel/softirq.c:613 [inline] + invoke_softirq kernel/softirq.c:453 [inline] + __irq_exit_rcu+0xca/0x1f0 kernel/softirq.c:680 + irq_exit_rcu+0x9/0x30 kernel/softirq.c:696 + instr_sysvec_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1052 [inline] + sysvec_apic_timer_interrupt+0xa6/0xc0 arch/x86/kernel/apic/apic.c:1052 + + +Fixes: 4f738adba30a ("bpf: create tcp_bpf_ulp allowing BPF to monitor socket TX/RX data") +Reported-by: syzbot+4cabd1d2fa917a456db8@syzkaller.appspotmail.com +Closes: https://lore.kernel.org/netdev/68c0b6b5.050a0220.3c6139.0013.GAE@google.com/ +Signed-off-by: Kuniyuki Iwashima +Signed-off-by: Martin KaFai Lau +Link: https://patch.msgid.link/20250909232623.4151337-1-kuniyu@google.com +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp_bpf.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c +index 5312237e80409..7518d2af63088 100644 +--- a/net/ipv4/tcp_bpf.c ++++ b/net/ipv4/tcp_bpf.c +@@ -408,8 +408,11 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock, + if (!psock->cork) { + psock->cork = kzalloc(sizeof(*psock->cork), + GFP_ATOMIC | __GFP_NOWARN); +- if (!psock->cork) ++ if (!psock->cork) { ++ sk_msg_free(sk, msg); ++ *copied = 0; + return -ENOMEM; ++ } + } + memcpy(psock->cork, msg, sizeof(*msg)); + return 0; +-- +2.51.0 + diff --git a/queue-6.6/tracing-fix-tracing_marker-may-trigger-page-fault-du.patch b/queue-6.6/tracing-fix-tracing_marker-may-trigger-page-fault-du.patch new file mode 100644 index 0000000000..d0b8cc885e --- /dev/null +++ b/queue-6.6/tracing-fix-tracing_marker-may-trigger-page-fault-du.patch @@ -0,0 +1,89 @@ +From 223c03563ba4744b9f696e88f295cc756d71b4f0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 19 Aug 2025 10:51:52 +0000 +Subject: tracing: Fix tracing_marker may trigger page fault during + preempt_disable + +From: Luo Gengkun + +[ Upstream commit 3d62ab32df065e4a7797204a918f6489ddb8a237 ] + +Both tracing_mark_write and tracing_mark_raw_write call +__copy_from_user_inatomic during preempt_disable. But in some case, +__copy_from_user_inatomic may trigger page fault, and will call schedule() +subtly. And if a task is migrated to other cpu, the following warning will +be trigger: + if (RB_WARN_ON(cpu_buffer, + !local_read(&cpu_buffer->committing))) + +An example can illustrate this issue: + +process flow CPU +--------------------------------------------------------------------- + +tracing_mark_raw_write(): cpu:0 + ... + ring_buffer_lock_reserve(): cpu:0 + ... + cpu = raw_smp_processor_id() cpu:0 + cpu_buffer = buffer->buffers[cpu] cpu:0 + ... + ... + __copy_from_user_inatomic(): cpu:0 + ... + # page fault + do_mem_abort(): cpu:0 + ... + # Call schedule + schedule() cpu:0 + ... + # the task schedule to cpu1 + __buffer_unlock_commit(): cpu:1 + ... + ring_buffer_unlock_commit(): cpu:1 + ... + cpu = raw_smp_processor_id() cpu:1 + cpu_buffer = buffer->buffers[cpu] cpu:1 + +As shown above, the process will acquire cpuid twice and the return values +are not the same. + +To fix this problem using copy_from_user_nofault instead of +__copy_from_user_inatomic, as the former performs 'access_ok' before +copying. + +Link: https://lore.kernel.org/20250819105152.2766363-1-luogengkun@huaweicloud.com +Fixes: 656c7f0d2d2b ("tracing: Replace kmap with copy_from_user() in trace_marker writing") +Signed-off-by: Luo Gengkun +Reviewed-by: Masami Hiramatsu (Google) +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Sasha Levin +--- + kernel/trace/trace.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c +index a32c8637503d1..baa87876cee47 100644 +--- a/kernel/trace/trace.c ++++ b/kernel/trace/trace.c +@@ -7226,7 +7226,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, + entry = ring_buffer_event_data(event); + entry->ip = _THIS_IP_; + +- len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt); ++ len = copy_from_user_nofault(&entry->buf, ubuf, cnt); + if (len) { + memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE); + cnt = FAULTED_SIZE; +@@ -7301,7 +7301,7 @@ tracing_mark_raw_write(struct file *filp, const char __user *ubuf, + + entry = ring_buffer_event_data(event); + +- len = __copy_from_user_inatomic(&entry->id, ubuf, cnt); ++ len = copy_from_user_nofault(&entry->id, ubuf, cnt); + if (len) { + entry->id = -1; + memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE); +-- +2.51.0 + diff --git a/queue-6.6/tracing-silence-warning-when-chunk-allocation-fails-.patch b/queue-6.6/tracing-silence-warning-when-chunk-allocation-fails-.patch new file mode 100644 index 0000000000..2f1db6de66 --- /dev/null +++ b/queue-6.6/tracing-silence-warning-when-chunk-allocation-fails-.patch @@ -0,0 +1,97 @@ +From 9479e0fafc399b63142315d9afc56561d38da56b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 8 Sep 2025 02:46:58 +0000 +Subject: tracing: Silence warning when chunk allocation fails in + trace_pid_write + +From: Pu Lehui + +[ Upstream commit cd4453c5e983cf1fd5757e9acb915adb1e4602b6 ] + +Syzkaller trigger a fault injection warning: + +WARNING: CPU: 1 PID: 12326 at tracepoint_add_func+0xbfc/0xeb0 +Modules linked in: +CPU: 1 UID: 0 PID: 12326 Comm: syz.6.10325 Tainted: G U 6.14.0-rc5-syzkaller #0 +Tainted: [U]=USER +Hardware name: Google Compute Engine/Google Compute Engine +RIP: 0010:tracepoint_add_func+0xbfc/0xeb0 kernel/tracepoint.c:294 +Code: 09 fe ff 90 0f 0b 90 0f b6 74 24 43 31 ff 41 bc ea ff ff ff +RSP: 0018:ffffc9000414fb48 EFLAGS: 00010283 +RAX: 00000000000012a1 RBX: ffffffff8e240ae0 RCX: ffffc90014b78000 +RDX: 0000000000080000 RSI: ffffffff81bbd78b RDI: 0000000000000001 +RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000000 +R10: 0000000000000001 R11: 0000000000000001 R12: ffffffffffffffef +R13: 0000000000000000 R14: dffffc0000000000 R15: ffffffff81c264f0 +FS: 00007f27217f66c0(0000) GS:ffff8880b8700000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 0000001b2e80dff8 CR3: 00000000268f8000 CR4: 00000000003526f0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +Call Trace: + + tracepoint_probe_register_prio+0xc0/0x110 kernel/tracepoint.c:464 + register_trace_prio_sched_switch include/trace/events/sched.h:222 [inline] + register_pid_events kernel/trace/trace_events.c:2354 [inline] + event_pid_write.isra.0+0x439/0x7a0 kernel/trace/trace_events.c:2425 + vfs_write+0x24c/0x1150 fs/read_write.c:677 + ksys_write+0x12b/0x250 fs/read_write.c:731 + do_syscall_x64 arch/x86/entry/common.c:52 [inline] + do_syscall_64+0xcd/0x250 arch/x86/entry/common.c:83 + entry_SYSCALL_64_after_hwframe+0x77/0x7f + +We can reproduce the warning by following the steps below: +1. echo 8 >> set_event_notrace_pid. Let tr->filtered_pids owns one pid + and register sched_switch tracepoint. +2. echo ' ' >> set_event_pid, and perform fault injection during chunk + allocation of trace_pid_list_alloc. Let pid_list with no pid and +assign to tr->filtered_pids. +3. echo ' ' >> set_event_pid. Let pid_list is NULL and assign to + tr->filtered_pids. +4. echo 9 >> set_event_pid, will trigger the double register + sched_switch tracepoint warning. + +The reason is that syzkaller injects a fault into the chunk allocation +in trace_pid_list_alloc, causing a failure in trace_pid_list_set, which +may trigger double register of the same tracepoint. This only occurs +when the system is about to crash, but to suppress this warning, let's +add failure handling logic to trace_pid_list_set. + +Link: https://lore.kernel.org/20250908024658.2390398-1-pulehui@huaweicloud.com +Fixes: 8d6e90983ade ("tracing: Create a sparse bitmask for pid filtering") +Reported-by: syzbot+161412ccaeff20ce4dde@syzkaller.appspotmail.com +Closes: https://lore.kernel.org/all/67cb890e.050a0220.d8275.022e.GAE@google.com +Signed-off-by: Pu Lehui +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Sasha Levin +--- + kernel/trace/trace.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c +index baa87876cee47..a111be83c3693 100644 +--- a/kernel/trace/trace.c ++++ b/kernel/trace/trace.c +@@ -750,7 +750,10 @@ int trace_pid_write(struct trace_pid_list *filtered_pids, + /* copy the current bits to the new max */ + ret = trace_pid_list_first(filtered_pids, &pid); + while (!ret) { +- trace_pid_list_set(pid_list, pid); ++ ret = trace_pid_list_set(pid_list, pid); ++ if (ret < 0) ++ goto out; ++ + ret = trace_pid_list_next(filtered_pids, pid + 1, &pid); + nr_pids++; + } +@@ -787,6 +790,7 @@ int trace_pid_write(struct trace_pid_list *filtered_pids, + trace_parser_clear(&parser); + ret = 0; + } ++ out: + trace_parser_put(&parser); + + if (ret < 0) { +-- +2.51.0 + -- 2.47.3