From: Greg Kroah-Hartman Date: Mon, 23 Dec 2024 12:01:54 +0000 (+0100) Subject: 6.12-stable patches X-Git-Tag: v6.1.122~20 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=60183cb56eb546d0f72ef3ab989872d92eb4e2f2;p=thirdparty%2Fkernel%2Fstable-queue.git 6.12-stable patches added patches: ceph-fix-memory-leak-in-ceph_direct_read_write.patch ceph-fix-memory-leaks-in-__ceph_sync_read.patch ceph-give-up-on-paths-longer-than-path_max.patch ceph-improve-error-handling-and-short-overflow-read-logic-in-__ceph_sync_read.patch ceph-validate-snapdirname-option-length-when-mounting.patch kvm-svm-allow-guest-writes-to-set-msr_amd64_de_cfg-bits.patch mm-shmem-fix-shmemhugepages-at-swapout.patch mm-use-aligned-address-in-clear_gigantic_page.patch mm-use-aligned-address-in-copy_user_gigantic_page.patch nfs-pnfs-fix-a-live-lock-between-recalled-layouts-and-layoutget.patch nilfs2-fix-buffer-head-leaks-in-calls-to-truncate_inode_pages.patch nilfs2-prevent-use-of-deleted-inode.patch ocfs2-fix-the-space-leak-in-la-when-releasing-la.patch of-address-preserve-the-flags-portion-on-1-1-dma-ranges-mapping.patch of-fix-error-path-in-of_parse_phandle_with_args_map.patch of-fix-refcount-leakage-for-of-node-returned-by-__of_get_dma_parent.patch of-irq-fix-interrupt-map-cell-length-check-in-of_irq_parse_imap_parent.patch of-irq-fix-using-uninitialized-variable-addr_len-in-api-of_irq_parse_one.patch of-property-fw_devlink-do-not-use-interrupt-parent-directly.patch udmabuf-also-check-for-f_seal_future_write.patch udmabuf-fix-racy-memfd-sealing-check.patch --- diff --git a/queue-6.12/ceph-fix-memory-leak-in-ceph_direct_read_write.patch b/queue-6.12/ceph-fix-memory-leak-in-ceph_direct_read_write.patch new file mode 100644 index 00000000000..87c86b68a7d --- /dev/null +++ b/queue-6.12/ceph-fix-memory-leak-in-ceph_direct_read_write.patch @@ -0,0 +1,107 @@ +From 66e0c4f91461d17d48071695271c824620bed4ef Mon Sep 17 00:00:00 2001 +From: Ilya Dryomov +Date: Fri, 6 Dec 2024 17:32:59 +0100 +Subject: ceph: fix memory leak in ceph_direct_read_write() + +From: Ilya Dryomov + +commit 66e0c4f91461d17d48071695271c824620bed4ef upstream. + +The bvecs array which is allocated in iter_get_bvecs_alloc() is leaked +and pages remain pinned if ceph_alloc_sparse_ext_map() fails. + +There is no need to delay the allocation of sparse_ext map until after +the bvecs array is set up, so fix this by moving sparse_ext allocation +a bit earlier. Also, make a similar adjustment in __ceph_sync_read() +for consistency (a leak of the same kind in __ceph_sync_read() has been +addressed differently). + +Cc: stable@vger.kernel.org +Fixes: 03bc06c7b0bd ("ceph: add new mount option to enable sparse reads") +Signed-off-by: Ilya Dryomov +Reviewed-by: Alex Markuze +Signed-off-by: Greg Kroah-Hartman +--- + fs/ceph/file.c | 43 ++++++++++++++++++++++--------------------- + 1 file changed, 22 insertions(+), 21 deletions(-) + +--- a/fs/ceph/file.c ++++ b/fs/ceph/file.c +@@ -1116,6 +1116,16 @@ ssize_t __ceph_sync_read(struct inode *i + len = read_off + read_len - off; + more = len < iov_iter_count(to); + ++ op = &req->r_ops[0]; ++ if (sparse) { ++ extent_cnt = __ceph_sparse_read_ext_count(inode, read_len); ++ ret = ceph_alloc_sparse_ext_map(op, extent_cnt); ++ if (ret) { ++ ceph_osdc_put_request(req); ++ break; ++ } ++ } ++ + num_pages = calc_pages_for(read_off, read_len); + page_off = offset_in_page(off); + pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL); +@@ -1129,16 +1139,6 @@ ssize_t __ceph_sync_read(struct inode *i + offset_in_page(read_off), + false, true); + +- op = &req->r_ops[0]; +- if (sparse) { +- extent_cnt = __ceph_sparse_read_ext_count(inode, read_len); +- ret = ceph_alloc_sparse_ext_map(op, extent_cnt); +- if (ret) { +- ceph_osdc_put_request(req); +- break; +- } +- } +- + ceph_osdc_start_request(osdc, req); + ret = ceph_osdc_wait_request(osdc, req); + +@@ -1551,6 +1551,16 @@ ceph_direct_read_write(struct kiocb *ioc + break; + } + ++ op = &req->r_ops[0]; ++ if (sparse) { ++ extent_cnt = __ceph_sparse_read_ext_count(inode, size); ++ ret = ceph_alloc_sparse_ext_map(op, extent_cnt); ++ if (ret) { ++ ceph_osdc_put_request(req); ++ break; ++ } ++ } ++ + len = iter_get_bvecs_alloc(iter, size, &bvecs, &num_pages); + if (len < 0) { + ceph_osdc_put_request(req); +@@ -1560,6 +1570,8 @@ ceph_direct_read_write(struct kiocb *ioc + if (len != size) + osd_req_op_extent_update(req, 0, len); + ++ osd_req_op_extent_osd_data_bvecs(req, 0, bvecs, num_pages, len); ++ + /* + * To simplify error handling, allow AIO when IO within i_size + * or IO can be satisfied by single OSD request. +@@ -1591,17 +1603,6 @@ ceph_direct_read_write(struct kiocb *ioc + req->r_mtime = mtime; + } + +- osd_req_op_extent_osd_data_bvecs(req, 0, bvecs, num_pages, len); +- op = &req->r_ops[0]; +- if (sparse) { +- extent_cnt = __ceph_sparse_read_ext_count(inode, size); +- ret = ceph_alloc_sparse_ext_map(op, extent_cnt); +- if (ret) { +- ceph_osdc_put_request(req); +- break; +- } +- } +- + if (aio_req) { + aio_req->total_len += len; + aio_req->num_reqs++; diff --git a/queue-6.12/ceph-fix-memory-leaks-in-__ceph_sync_read.patch b/queue-6.12/ceph-fix-memory-leaks-in-__ceph_sync_read.patch new file mode 100644 index 00000000000..6c4fdae9945 --- /dev/null +++ b/queue-6.12/ceph-fix-memory-leaks-in-__ceph_sync_read.patch @@ -0,0 +1,60 @@ +From d6fd6f8280f0257ba93f16900a0d3d3912f32c79 Mon Sep 17 00:00:00 2001 +From: Max Kellermann +Date: Thu, 5 Dec 2024 16:49:51 +0100 +Subject: ceph: fix memory leaks in __ceph_sync_read() + +From: Max Kellermann + +commit d6fd6f8280f0257ba93f16900a0d3d3912f32c79 upstream. + +In two `break` statements, the call to ceph_release_page_vector() was +missing, leaking the allocation from ceph_alloc_page_vector(). + +Instead of adding the missing ceph_release_page_vector() calls, the +Ceph maintainers preferred to transfer page ownership to the +`ceph_osd_request` by passing `own_pages=true` to +osd_req_op_extent_osd_data_pages(). This requires postponing the +ceph_osdc_put_request() call until after the block that accesses the +`pages`. + +Cc: stable@vger.kernel.org +Fixes: 03bc06c7b0bd ("ceph: add new mount option to enable sparse reads") +Fixes: f0fe1e54cfcf ("ceph: plumb in decryption during reads") +Signed-off-by: Max Kellermann +Reviewed-by: Ilya Dryomov +Signed-off-by: Ilya Dryomov +Signed-off-by: Greg Kroah-Hartman +--- + fs/ceph/file.c | 7 +++---- + 1 file changed, 3 insertions(+), 4 deletions(-) + +--- a/fs/ceph/file.c ++++ b/fs/ceph/file.c +@@ -1127,7 +1127,7 @@ ssize_t __ceph_sync_read(struct inode *i + + osd_req_op_extent_osd_data_pages(req, 0, pages, read_len, + offset_in_page(read_off), +- false, false); ++ false, true); + + op = &req->r_ops[0]; + if (sparse) { +@@ -1193,8 +1193,6 @@ ssize_t __ceph_sync_read(struct inode *i + ret = min_t(ssize_t, fret, len); + } + +- ceph_osdc_put_request(req); +- + /* Short read but not EOF? Zero out the remainder. */ + if (ret < len && (off + ret < i_size)) { + int zlen = min(len - ret, i_size - off - ret); +@@ -1226,7 +1224,8 @@ ssize_t __ceph_sync_read(struct inode *i + break; + } + } +- ceph_release_page_vector(pages, num_pages); ++ ++ ceph_osdc_put_request(req); + + if (off >= i_size || !more) + break; diff --git a/queue-6.12/ceph-give-up-on-paths-longer-than-path_max.patch b/queue-6.12/ceph-give-up-on-paths-longer-than-path_max.patch new file mode 100644 index 00000000000..af6b1203227 --- /dev/null +++ b/queue-6.12/ceph-give-up-on-paths-longer-than-path_max.patch @@ -0,0 +1,51 @@ +From 550f7ca98ee028a606aa75705a7e77b1bd11720f Mon Sep 17 00:00:00 2001 +From: Max Kellermann +Date: Mon, 18 Nov 2024 23:28:28 +0100 +Subject: ceph: give up on paths longer than PATH_MAX +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Max Kellermann + +commit 550f7ca98ee028a606aa75705a7e77b1bd11720f upstream. + +If the full path to be built by ceph_mdsc_build_path() happens to be +longer than PATH_MAX, then this function will enter an endless (retry) +loop, effectively blocking the whole task. Most of the machine +becomes unusable, making this a very simple and effective DoS +vulnerability. + +I cannot imagine why this retry was ever implemented, but it seems +rather useless and harmful to me. Let's remove it and fail with +ENAMETOOLONG instead. + +Cc: stable@vger.kernel.org +Reported-by: Dario Weißer +Signed-off-by: Max Kellermann +Reviewed-by: Alex Markuze +Signed-off-by: Ilya Dryomov +Signed-off-by: Greg Kroah-Hartman +--- + fs/ceph/mds_client.c | 9 ++++----- + 1 file changed, 4 insertions(+), 5 deletions(-) + +--- a/fs/ceph/mds_client.c ++++ b/fs/ceph/mds_client.c +@@ -2808,12 +2808,11 @@ retry: + + if (pos < 0) { + /* +- * A rename didn't occur, but somehow we didn't end up where +- * we thought we would. Throw a warning and try again. ++ * The path is longer than PATH_MAX and this function ++ * cannot ever succeed. Creating paths that long is ++ * possible with Ceph, but Linux cannot use them. + */ +- pr_warn_client(cl, "did not end path lookup where expected (pos = %d)\n", +- pos); +- goto retry; ++ return ERR_PTR(-ENAMETOOLONG); + } + + *pbase = base; diff --git a/queue-6.12/ceph-improve-error-handling-and-short-overflow-read-logic-in-__ceph_sync_read.patch b/queue-6.12/ceph-improve-error-handling-and-short-overflow-read-logic-in-__ceph_sync_read.patch new file mode 100644 index 00000000000..81e894c02e1 --- /dev/null +++ b/queue-6.12/ceph-improve-error-handling-and-short-overflow-read-logic-in-__ceph_sync_read.patch @@ -0,0 +1,115 @@ +From 9abee475803fab6ad59d4f4fc59c6a75374a7d9d Mon Sep 17 00:00:00 2001 +From: Alex Markuze +Date: Wed, 27 Nov 2024 15:34:10 +0200 +Subject: ceph: improve error handling and short/overflow-read logic in __ceph_sync_read() + +From: Alex Markuze + +commit 9abee475803fab6ad59d4f4fc59c6a75374a7d9d upstream. + +This patch refines the read logic in __ceph_sync_read() to ensure more +predictable and efficient behavior in various edge cases. + +- Return early if the requested read length is zero or if the file size + (`i_size`) is zero. +- Initialize the index variable (`idx`) where needed and reorder some + code to ensure it is always set before use. +- Improve error handling by checking for negative return values earlier. +- Remove redundant encrypted file checks after failures. Only attempt + filesystem-level decryption if the read succeeded. +- Simplify leftover calculations to correctly handle cases where the + read extends beyond the end of the file or stops short. This can be + hit by continuously reading a file while, on another client, we keep + truncating and writing new data into it. +- This resolves multiple issues caused by integer and consequent buffer + overflow (`pages` array being accessed beyond `num_pages`): + - https://tracker.ceph.com/issues/67524 + - https://tracker.ceph.com/issues/68980 + - https://tracker.ceph.com/issues/68981 + +Cc: stable@vger.kernel.org +Fixes: 1065da21e5df ("ceph: stop copying to iter at EOF on sync reads") +Reported-by: Luis Henriques (SUSE) +Signed-off-by: Alex Markuze +Reviewed-by: Viacheslav Dubeyko +Signed-off-by: Ilya Dryomov +Signed-off-by: Greg Kroah-Hartman +--- + fs/ceph/file.c | 29 ++++++++++++++--------------- + 1 file changed, 14 insertions(+), 15 deletions(-) + +--- a/fs/ceph/file.c ++++ b/fs/ceph/file.c +@@ -1066,7 +1066,7 @@ ssize_t __ceph_sync_read(struct inode *i + if (ceph_inode_is_shutdown(inode)) + return -EIO; + +- if (!len) ++ if (!len || !i_size) + return 0; + /* + * flush any page cache pages in this range. this +@@ -1086,7 +1086,7 @@ ssize_t __ceph_sync_read(struct inode *i + int num_pages; + size_t page_off; + bool more; +- int idx; ++ int idx = 0; + size_t left; + struct ceph_osd_req_op *op; + u64 read_off = off; +@@ -1160,7 +1160,14 @@ ssize_t __ceph_sync_read(struct inode *i + else if (ret == -ENOENT) + ret = 0; + +- if (ret > 0 && IS_ENCRYPTED(inode)) { ++ if (ret < 0) { ++ ceph_osdc_put_request(req); ++ if (ret == -EBLOCKLISTED) ++ fsc->blocklisted = true; ++ break; ++ } ++ ++ if (IS_ENCRYPTED(inode)) { + int fret; + + fret = ceph_fscrypt_decrypt_extents(inode, pages, +@@ -1189,7 +1196,7 @@ ssize_t __ceph_sync_read(struct inode *i + ceph_osdc_put_request(req); + + /* Short read but not EOF? Zero out the remainder. */ +- if (ret >= 0 && ret < len && (off + ret < i_size)) { ++ if (ret < len && (off + ret < i_size)) { + int zlen = min(len - ret, i_size - off - ret); + int zoff = page_off + ret; + +@@ -1199,13 +1206,11 @@ ssize_t __ceph_sync_read(struct inode *i + ret += zlen; + } + +- idx = 0; +- if (ret <= 0) +- left = 0; +- else if (off + ret > i_size) +- left = i_size - off; ++ if (off + ret > i_size) ++ left = (i_size > off) ? i_size - off : 0; + else + left = ret; ++ + while (left > 0) { + size_t plen, copied; + +@@ -1223,12 +1228,6 @@ ssize_t __ceph_sync_read(struct inode *i + } + ceph_release_page_vector(pages, num_pages); + +- if (ret < 0) { +- if (ret == -EBLOCKLISTED) +- fsc->blocklisted = true; +- break; +- } +- + if (off >= i_size || !more) + break; + } diff --git a/queue-6.12/ceph-validate-snapdirname-option-length-when-mounting.patch b/queue-6.12/ceph-validate-snapdirname-option-length-when-mounting.patch new file mode 100644 index 00000000000..2db3007de27 --- /dev/null +++ b/queue-6.12/ceph-validate-snapdirname-option-length-when-mounting.patch @@ -0,0 +1,33 @@ +From 12eb22a5a609421b380c3c6ca887474fb2089b2c Mon Sep 17 00:00:00 2001 +From: Ilya Dryomov +Date: Wed, 20 Nov 2024 16:43:51 +0100 +Subject: ceph: validate snapdirname option length when mounting + +From: Ilya Dryomov + +commit 12eb22a5a609421b380c3c6ca887474fb2089b2c upstream. + +It becomes a path component, so it shouldn't exceed NAME_MAX +characters. This was hardened in commit c152737be22b ("ceph: Use +strscpy() instead of strcpy() in __get_snap_name()"), but no actual +check was put in place. + +Cc: stable@vger.kernel.org +Signed-off-by: Ilya Dryomov +Reviewed-by: Alex Markuze +Signed-off-by: Greg Kroah-Hartman +--- + fs/ceph/super.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/fs/ceph/super.c ++++ b/fs/ceph/super.c +@@ -431,6 +431,8 @@ static int ceph_parse_mount_param(struct + + switch (token) { + case Opt_snapdirname: ++ if (strlen(param->string) > NAME_MAX) ++ return invalfc(fc, "snapdirname too long"); + kfree(fsopt->snapdir_name); + fsopt->snapdir_name = param->string; + param->string = NULL; diff --git a/queue-6.12/kvm-svm-allow-guest-writes-to-set-msr_amd64_de_cfg-bits.patch b/queue-6.12/kvm-svm-allow-guest-writes-to-set-msr_amd64_de_cfg-bits.patch new file mode 100644 index 00000000000..7fd72d98a88 --- /dev/null +++ b/queue-6.12/kvm-svm-allow-guest-writes-to-set-msr_amd64_de_cfg-bits.patch @@ -0,0 +1,54 @@ +From 4d5163cba43fe96902165606fa54e1aecbbb32de Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Wed, 11 Dec 2024 09:29:52 -0800 +Subject: KVM: SVM: Allow guest writes to set MSR_AMD64_DE_CFG bits + +From: Sean Christopherson + +commit 4d5163cba43fe96902165606fa54e1aecbbb32de upstream. + +Drop KVM's arbitrary behavior of making DE_CFG.LFENCE_SERIALIZE read-only +for the guest, as rejecting writes can lead to guest crashes, e.g. Windows +in particular doesn't gracefully handle unexpected #GPs on the WRMSR, and +nothing in the AMD manuals suggests that LFENCE_SERIALIZE is read-only _if +it exists_. + +KVM only allows LFENCE_SERIALIZE to be set, by the guest or host, if the +underlying CPU has X86_FEATURE_LFENCE_RDTSC, i.e. if LFENCE is guaranteed +to be serializing. So if the guest sets LFENCE_SERIALIZE, KVM will provide +the desired/correct behavior without any additional action (the guest's +value is never stuffed into hardware). And having LFENCE be serializing +even when it's not _required_ to be is a-ok from a functional perspective. + +Fixes: 74a0e79df68a ("KVM: SVM: Disallow guest from changing userspace's MSR_AMD64_DE_CFG value") +Fixes: d1d93fa90f1a ("KVM: SVM: Add MSR-based feature support for serializing LFENCE") +Reported-by: Simon Pilkington +Closes: https://lore.kernel.org/all/52914da7-a97b-45ad-86a0-affdf8266c61@mailbox.org +Cc: Tom Lendacky +Cc: stable@vger.kernel.org +Reviewed-by: Tom Lendacky +Link: https://lore.kernel.org/r/20241211172952.1477605-1-seanjc@google.com +Signed-off-by: Sean Christopherson +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/svm/svm.c | 9 --------- + 1 file changed, 9 deletions(-) + +--- a/arch/x86/kvm/svm/svm.c ++++ b/arch/x86/kvm/svm/svm.c +@@ -3199,15 +3199,6 @@ static int svm_set_msr(struct kvm_vcpu * + if (data & ~supported_de_cfg) + return 1; + +- /* +- * Don't let the guest change the host-programmed value. The +- * MSR is very model specific, i.e. contains multiple bits that +- * are completely unknown to KVM, and the one bit known to KVM +- * is simply a reflection of hardware capabilities. +- */ +- if (!msr->host_initiated && data != svm->msr_decfg) +- return 1; +- + svm->msr_decfg = data; + break; + } diff --git a/queue-6.12/mm-shmem-fix-shmemhugepages-at-swapout.patch b/queue-6.12/mm-shmem-fix-shmemhugepages-at-swapout.patch new file mode 100644 index 00000000000..c0da76a61cd --- /dev/null +++ b/queue-6.12/mm-shmem-fix-shmemhugepages-at-swapout.patch @@ -0,0 +1,84 @@ +From dad2dc9c92e0f93f33cebcb0595b8daa3d57473f Mon Sep 17 00:00:00 2001 +From: Hugh Dickins +Date: Wed, 4 Dec 2024 22:50:06 -0800 +Subject: mm: shmem: fix ShmemHugePages at swapout + +From: Hugh Dickins + +commit dad2dc9c92e0f93f33cebcb0595b8daa3d57473f upstream. + +/proc/meminfo ShmemHugePages has been showing overlarge amounts (more than +Shmem) after swapping out THPs: we forgot to update NR_SHMEM_THPS. + +Add shmem_update_stats(), to avoid repetition, and risk of making that +mistake again: the call from shmem_delete_from_page_cache() is the bugfix; +the call from shmem_replace_folio() is reassuring, but not really a bugfix +(replace corrects misplaced swapin readahead, but huge swapin readahead +would be a mistake). + +Link: https://lkml.kernel.org/r/5ba477c8-a569-70b5-923e-09ab221af45b@google.com +Fixes: 809bc86517cc ("mm: shmem: support large folio swap out") +Signed-off-by: Hugh Dickins +Reviewed-by: Shakeel Butt +Reviewed-by: Yosry Ahmed +Reviewed-by: Baolin Wang +Tested-by: Baolin Wang +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/shmem.c | 22 ++++++++++++---------- + 1 file changed, 12 insertions(+), 10 deletions(-) + +--- a/mm/shmem.c ++++ b/mm/shmem.c +@@ -779,6 +779,14 @@ static bool shmem_huge_global_enabled(st + } + #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + ++static void shmem_update_stats(struct folio *folio, int nr_pages) ++{ ++ if (folio_test_pmd_mappable(folio)) ++ __lruvec_stat_mod_folio(folio, NR_SHMEM_THPS, nr_pages); ++ __lruvec_stat_mod_folio(folio, NR_FILE_PAGES, nr_pages); ++ __lruvec_stat_mod_folio(folio, NR_SHMEM, nr_pages); ++} ++ + /* + * Somewhat like filemap_add_folio, but error if expected item has gone. + */ +@@ -813,10 +821,7 @@ static int shmem_add_to_page_cache(struc + xas_store(&xas, folio); + if (xas_error(&xas)) + goto unlock; +- if (folio_test_pmd_mappable(folio)) +- __lruvec_stat_mod_folio(folio, NR_SHMEM_THPS, nr); +- __lruvec_stat_mod_folio(folio, NR_FILE_PAGES, nr); +- __lruvec_stat_mod_folio(folio, NR_SHMEM, nr); ++ shmem_update_stats(folio, nr); + mapping->nrpages += nr; + unlock: + xas_unlock_irq(&xas); +@@ -844,8 +849,7 @@ static void shmem_delete_from_page_cache + error = shmem_replace_entry(mapping, folio->index, folio, radswap); + folio->mapping = NULL; + mapping->nrpages -= nr; +- __lruvec_stat_mod_folio(folio, NR_FILE_PAGES, -nr); +- __lruvec_stat_mod_folio(folio, NR_SHMEM, -nr); ++ shmem_update_stats(folio, -nr); + xa_unlock_irq(&mapping->i_pages); + folio_put_refs(folio, nr); + BUG_ON(error); +@@ -1944,10 +1948,8 @@ static int shmem_replace_folio(struct fo + } + if (!error) { + mem_cgroup_replace_folio(old, new); +- __lruvec_stat_mod_folio(new, NR_FILE_PAGES, nr_pages); +- __lruvec_stat_mod_folio(new, NR_SHMEM, nr_pages); +- __lruvec_stat_mod_folio(old, NR_FILE_PAGES, -nr_pages); +- __lruvec_stat_mod_folio(old, NR_SHMEM, -nr_pages); ++ shmem_update_stats(new, nr_pages); ++ shmem_update_stats(old, -nr_pages); + } + xa_unlock_irq(&swap_mapping->i_pages); + diff --git a/queue-6.12/mm-use-aligned-address-in-clear_gigantic_page.patch b/queue-6.12/mm-use-aligned-address-in-clear_gigantic_page.patch new file mode 100644 index 00000000000..60acae87153 --- /dev/null +++ b/queue-6.12/mm-use-aligned-address-in-clear_gigantic_page.patch @@ -0,0 +1,57 @@ +From 8aca2bc96c833ba695ede7a45ad7784c836a262e Mon Sep 17 00:00:00 2001 +From: Kefeng Wang +Date: Mon, 28 Oct 2024 22:56:55 +0800 +Subject: mm: use aligned address in clear_gigantic_page() + +From: Kefeng Wang + +commit 8aca2bc96c833ba695ede7a45ad7784c836a262e upstream. + +In current kernel, hugetlb_no_page() calls folio_zero_user() with the +fault address. Where the fault address may be not aligned with the huge +page size. Then, folio_zero_user() may call clear_gigantic_page() with +the address, while clear_gigantic_page() requires the address to be huge +page size aligned. So, this may cause memory corruption or information +leak, addtional, use more obvious naming 'addr_hint' instead of 'addr' for +clear_gigantic_page(). + +Link: https://lkml.kernel.org/r/20241028145656.932941-1-wangkefeng.wang@huawei.com +Fixes: 78fefd04c123 ("mm: memory: convert clear_huge_page() to folio_zero_user()") +Signed-off-by: Kefeng Wang +Reviewed-by: "Huang, Ying" +Reviewed-by: David Hildenbrand +Cc: Matthew Wilcox (Oracle) +Cc: Muchun Song +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + fs/hugetlbfs/inode.c | 2 +- + mm/memory.c | 3 ++- + 2 files changed, 3 insertions(+), 2 deletions(-) + +--- a/fs/hugetlbfs/inode.c ++++ b/fs/hugetlbfs/inode.c +@@ -893,7 +893,7 @@ static long hugetlbfs_fallocate(struct f + error = PTR_ERR(folio); + goto out; + } +- folio_zero_user(folio, ALIGN_DOWN(addr, hpage_size)); ++ folio_zero_user(folio, addr); + __folio_mark_uptodate(folio); + error = hugetlb_add_to_page_cache(folio, mapping, index); + if (unlikely(error)) { +--- a/mm/memory.c ++++ b/mm/memory.c +@@ -6780,9 +6780,10 @@ static inline int process_huge_page( + return 0; + } + +-static void clear_gigantic_page(struct folio *folio, unsigned long addr, ++static void clear_gigantic_page(struct folio *folio, unsigned long addr_hint, + unsigned int nr_pages) + { ++ unsigned long addr = ALIGN_DOWN(addr_hint, folio_size(folio)); + int i; + + might_sleep(); diff --git a/queue-6.12/mm-use-aligned-address-in-copy_user_gigantic_page.patch b/queue-6.12/mm-use-aligned-address-in-copy_user_gigantic_page.patch new file mode 100644 index 00000000000..15fb248fb6a --- /dev/null +++ b/queue-6.12/mm-use-aligned-address-in-copy_user_gigantic_page.patch @@ -0,0 +1,73 @@ +From f5d09de9f1bf9674c6418ff10d0a40cfe29268e1 Mon Sep 17 00:00:00 2001 +From: Kefeng Wang +Date: Mon, 28 Oct 2024 22:56:56 +0800 +Subject: mm: use aligned address in copy_user_gigantic_page() + +From: Kefeng Wang + +commit f5d09de9f1bf9674c6418ff10d0a40cfe29268e1 upstream. + +In current kernel, hugetlb_wp() calls copy_user_large_folio() with the +fault address. Where the fault address may be not aligned with the huge +page size. Then, copy_user_large_folio() may call +copy_user_gigantic_page() with the address, while +copy_user_gigantic_page() requires the address to be huge page size +aligned. So, this may cause memory corruption or information leak, +addtional, use more obvious naming 'addr_hint' instead of 'addr' for +copy_user_gigantic_page(). + +Link: https://lkml.kernel.org/r/20241028145656.932941-2-wangkefeng.wang@huawei.com +Fixes: 530dd9926dc1 ("mm: memory: improve copy_user_large_folio()") +Signed-off-by: Kefeng Wang +Reviewed-by: David Hildenbrand +Cc: Huang Ying +Cc: Matthew Wilcox (Oracle) +Cc: Muchun Song +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/hugetlb.c | 5 ++--- + mm/memory.c | 5 +++-- + 2 files changed, 5 insertions(+), 5 deletions(-) + +--- a/mm/hugetlb.c ++++ b/mm/hugetlb.c +@@ -5333,7 +5333,7 @@ again: + break; + } + ret = copy_user_large_folio(new_folio, pte_folio, +- ALIGN_DOWN(addr, sz), dst_vma); ++ addr, dst_vma); + folio_put(pte_folio); + if (ret) { + folio_put(new_folio); +@@ -6632,8 +6632,7 @@ int hugetlb_mfill_atomic_pte(pte_t *dst_ + *foliop = NULL; + goto out; + } +- ret = copy_user_large_folio(folio, *foliop, +- ALIGN_DOWN(dst_addr, size), dst_vma); ++ ret = copy_user_large_folio(folio, *foliop, dst_addr, dst_vma); + folio_put(*foliop); + *foliop = NULL; + if (ret) { +--- a/mm/memory.c ++++ b/mm/memory.c +@@ -6817,13 +6817,14 @@ void folio_zero_user(struct folio *folio + } + + static int copy_user_gigantic_page(struct folio *dst, struct folio *src, +- unsigned long addr, ++ unsigned long addr_hint, + struct vm_area_struct *vma, + unsigned int nr_pages) + { +- int i; ++ unsigned long addr = ALIGN_DOWN(addr_hint, folio_size(dst)); + struct page *dst_page; + struct page *src_page; ++ int i; + + for (i = 0; i < nr_pages; i++) { + dst_page = folio_page(dst, i); diff --git a/queue-6.12/nfs-pnfs-fix-a-live-lock-between-recalled-layouts-and-layoutget.patch b/queue-6.12/nfs-pnfs-fix-a-live-lock-between-recalled-layouts-and-layoutget.patch new file mode 100644 index 00000000000..6c71b8065b4 --- /dev/null +++ b/queue-6.12/nfs-pnfs-fix-a-live-lock-between-recalled-layouts-and-layoutget.patch @@ -0,0 +1,35 @@ +From 62e2a47ceab8f3f7d2e3f0e03fdd1c5e0059fd8b Mon Sep 17 00:00:00 2001 +From: Trond Myklebust +Date: Mon, 16 Dec 2024 19:28:06 -0500 +Subject: NFS/pnfs: Fix a live lock between recalled layouts and layoutget + +From: Trond Myklebust + +commit 62e2a47ceab8f3f7d2e3f0e03fdd1c5e0059fd8b upstream. + +When the server is recalling a layout, we should ignore the count of +outstanding layoutget calls, since the server is expected to return +either NFS4ERR_RECALLCONFLICT or NFS4ERR_RETURNCONFLICT for as long as +the recall is outstanding. +Currently, we may end up livelocking, causing the layout to eventually +be forcibly revoked. + +Fixes: bf0291dd2267 ("pNFS: Ensure LAYOUTGET and LAYOUTRETURN are properly serialised") +Cc: stable@vger.kernel.org +Signed-off-by: Trond Myklebust +Signed-off-by: Greg Kroah-Hartman +--- + fs/nfs/pnfs.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/nfs/pnfs.c ++++ b/fs/nfs/pnfs.c +@@ -1308,7 +1308,7 @@ pnfs_prepare_layoutreturn(struct pnfs_la + enum pnfs_iomode *iomode) + { + /* Serialise LAYOUTGET/LAYOUTRETURN */ +- if (atomic_read(&lo->plh_outstanding) != 0) ++ if (atomic_read(&lo->plh_outstanding) != 0 && lo->plh_return_seq == 0) + return false; + if (test_and_set_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags)) + return false; diff --git a/queue-6.12/nilfs2-fix-buffer-head-leaks-in-calls-to-truncate_inode_pages.patch b/queue-6.12/nilfs2-fix-buffer-head-leaks-in-calls-to-truncate_inode_pages.patch new file mode 100644 index 00000000000..418f10034a3 --- /dev/null +++ b/queue-6.12/nilfs2-fix-buffer-head-leaks-in-calls-to-truncate_inode_pages.patch @@ -0,0 +1,99 @@ +From 6309b8ce98e9a18390b9fd8f03fc412f3c17aee9 Mon Sep 17 00:00:00 2001 +From: Ryusuke Konishi +Date: Fri, 13 Dec 2024 01:43:28 +0900 +Subject: nilfs2: fix buffer head leaks in calls to truncate_inode_pages() + +From: Ryusuke Konishi + +commit 6309b8ce98e9a18390b9fd8f03fc412f3c17aee9 upstream. + +When block_invalidatepage was converted to block_invalidate_folio, the +fallback to block_invalidatepage in folio_invalidate() if the +address_space_operations method invalidatepage (currently +invalidate_folio) was not set, was removed. + +Unfortunately, some pseudo-inodes in nilfs2 use empty_aops set by +inode_init_always_gfp() as is, or explicitly set it to +address_space_operations. Therefore, with this change, +block_invalidatepage() is no longer called from folio_invalidate(), and as +a result, the buffer_head structures attached to these pages/folios are no +longer freed via try_to_free_buffers(). + +Thus, these buffer heads are now leaked by truncate_inode_pages(), which +cleans up the page cache from inode evict(), etc. + +Three types of caches use empty_aops: gc inode caches and the DAT shadow +inode used by GC, and b-tree node caches. Of these, b-tree node caches +explicitly call invalidate_mapping_pages() during cleanup, which involves +calling try_to_free_buffers(), so the leak was not visible during normal +operation but worsened when GC was performed. + +Fix this issue by using address_space_operations with invalidate_folio set +to block_invalidate_folio instead of empty_aops, which will ensure the +same behavior as before. + +Link: https://lkml.kernel.org/r/20241212164556.21338-1-konishi.ryusuke@gmail.com +Fixes: 7ba13abbd31e ("fs: Turn block_invalidatepage into block_invalidate_folio") +Signed-off-by: Ryusuke Konishi +Cc: [5.18+] +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + fs/nilfs2/btnode.c | 1 + + fs/nilfs2/gcinode.c | 2 +- + fs/nilfs2/inode.c | 5 +++++ + fs/nilfs2/nilfs.h | 1 + + 4 files changed, 8 insertions(+), 1 deletion(-) + +--- a/fs/nilfs2/btnode.c ++++ b/fs/nilfs2/btnode.c +@@ -35,6 +35,7 @@ void nilfs_init_btnc_inode(struct inode + ii->i_flags = 0; + memset(&ii->i_bmap_data, 0, sizeof(struct nilfs_bmap)); + mapping_set_gfp_mask(btnc_inode->i_mapping, GFP_NOFS); ++ btnc_inode->i_mapping->a_ops = &nilfs_buffer_cache_aops; + } + + void nilfs_btnode_cache_clear(struct address_space *btnc) +--- a/fs/nilfs2/gcinode.c ++++ b/fs/nilfs2/gcinode.c +@@ -163,7 +163,7 @@ int nilfs_init_gcinode(struct inode *ino + + inode->i_mode = S_IFREG; + mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); +- inode->i_mapping->a_ops = &empty_aops; ++ inode->i_mapping->a_ops = &nilfs_buffer_cache_aops; + + ii->i_flags = 0; + nilfs_bmap_init_gc(ii->i_bmap); +--- a/fs/nilfs2/inode.c ++++ b/fs/nilfs2/inode.c +@@ -307,6 +307,10 @@ const struct address_space_operations ni + .is_partially_uptodate = block_is_partially_uptodate, + }; + ++const struct address_space_operations nilfs_buffer_cache_aops = { ++ .invalidate_folio = block_invalidate_folio, ++}; ++ + static int nilfs_insert_inode_locked(struct inode *inode, + struct nilfs_root *root, + unsigned long ino) +@@ -706,6 +710,7 @@ struct inode *nilfs_iget_for_shadow(stru + NILFS_I(s_inode)->i_flags = 0; + memset(NILFS_I(s_inode)->i_bmap, 0, sizeof(struct nilfs_bmap)); + mapping_set_gfp_mask(s_inode->i_mapping, GFP_NOFS); ++ s_inode->i_mapping->a_ops = &nilfs_buffer_cache_aops; + + err = nilfs_attach_btree_node_cache(s_inode); + if (unlikely(err)) { +--- a/fs/nilfs2/nilfs.h ++++ b/fs/nilfs2/nilfs.h +@@ -401,6 +401,7 @@ extern const struct file_operations nilf + extern const struct inode_operations nilfs_file_inode_operations; + extern const struct file_operations nilfs_file_operations; + extern const struct address_space_operations nilfs_aops; ++extern const struct address_space_operations nilfs_buffer_cache_aops; + extern const struct inode_operations nilfs_dir_inode_operations; + extern const struct inode_operations nilfs_special_inode_operations; + extern const struct inode_operations nilfs_symlink_inode_operations; diff --git a/queue-6.12/nilfs2-prevent-use-of-deleted-inode.patch b/queue-6.12/nilfs2-prevent-use-of-deleted-inode.patch new file mode 100644 index 00000000000..0698a039d05 --- /dev/null +++ b/queue-6.12/nilfs2-prevent-use-of-deleted-inode.patch @@ -0,0 +1,84 @@ +From 901ce9705fbb9f330ff1f19600e5daf9770b0175 Mon Sep 17 00:00:00 2001 +From: Edward Adam Davis +Date: Mon, 9 Dec 2024 15:56:52 +0900 +Subject: nilfs2: prevent use of deleted inode + +From: Edward Adam Davis + +commit 901ce9705fbb9f330ff1f19600e5daf9770b0175 upstream. + +syzbot reported a WARNING in nilfs_rmdir. [1] + +Because the inode bitmap is corrupted, an inode with an inode number that +should exist as a ".nilfs" file was reassigned by nilfs_mkdir for "file0", +causing an inode duplication during execution. And this causes an +underflow of i_nlink in rmdir operations. + +The inode is used twice by the same task to unmount and remove directories +".nilfs" and "file0", it trigger warning in nilfs_rmdir. + +Avoid to this issue, check i_nlink in nilfs_iget(), if it is 0, it means +that this inode has been deleted, and iput is executed to reclaim it. + +[1] +WARNING: CPU: 1 PID: 5824 at fs/inode.c:407 drop_nlink+0xc4/0x110 fs/inode.c:407 +... +Call Trace: + + nilfs_rmdir+0x1b0/0x250 fs/nilfs2/namei.c:342 + vfs_rmdir+0x3a3/0x510 fs/namei.c:4394 + do_rmdir+0x3b5/0x580 fs/namei.c:4453 + __do_sys_rmdir fs/namei.c:4472 [inline] + __se_sys_rmdir fs/namei.c:4470 [inline] + __x64_sys_rmdir+0x47/0x50 fs/namei.c:4470 + do_syscall_x64 arch/x86/entry/common.c:52 [inline] + do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 + entry_SYSCALL_64_after_hwframe+0x77/0x7f + +Link: https://lkml.kernel.org/r/20241209065759.6781-1-konishi.ryusuke@gmail.com +Fixes: d25006523d0b ("nilfs2: pathname operations") +Signed-off-by: Ryusuke Konishi +Reported-by: syzbot+9260555647a5132edd48@syzkaller.appspotmail.com +Closes: https://syzkaller.appspot.com/bug?extid=9260555647a5132edd48 +Tested-by: syzbot+9260555647a5132edd48@syzkaller.appspotmail.com +Signed-off-by: Edward Adam Davis +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + fs/nilfs2/inode.c | 8 +++++++- + fs/nilfs2/namei.c | 5 +++++ + 2 files changed, 12 insertions(+), 1 deletion(-) + +--- a/fs/nilfs2/inode.c ++++ b/fs/nilfs2/inode.c +@@ -579,8 +579,14 @@ struct inode *nilfs_iget(struct super_bl + inode = nilfs_iget_locked(sb, root, ino); + if (unlikely(!inode)) + return ERR_PTR(-ENOMEM); +- if (!(inode->i_state & I_NEW)) ++ ++ if (!(inode->i_state & I_NEW)) { ++ if (!inode->i_nlink) { ++ iput(inode); ++ return ERR_PTR(-ESTALE); ++ } + return inode; ++ } + + err = __nilfs_read_inode(sb, root, ino, inode); + if (unlikely(err)) { +--- a/fs/nilfs2/namei.c ++++ b/fs/nilfs2/namei.c +@@ -67,6 +67,11 @@ nilfs_lookup(struct inode *dir, struct d + inode = NULL; + } else { + inode = nilfs_iget(dir->i_sb, NILFS_I(dir)->i_root, ino); ++ if (inode == ERR_PTR(-ESTALE)) { ++ nilfs_error(dir->i_sb, ++ "deleted inode referenced: %lu", ino); ++ return ERR_PTR(-EIO); ++ } + } + + return d_splice_alias(inode, dentry); diff --git a/queue-6.12/ocfs2-fix-the-space-leak-in-la-when-releasing-la.patch b/queue-6.12/ocfs2-fix-the-space-leak-in-la-when-releasing-la.patch new file mode 100644 index 00000000000..12a7bc293bf --- /dev/null +++ b/queue-6.12/ocfs2-fix-the-space-leak-in-la-when-releasing-la.patch @@ -0,0 +1,59 @@ +From 7782e3b3b004e8cb94a88621a22cc3c2f33e5b90 Mon Sep 17 00:00:00 2001 +From: Heming Zhao +Date: Thu, 5 Dec 2024 18:48:33 +0800 +Subject: ocfs2: fix the space leak in LA when releasing LA + +From: Heming Zhao + +commit 7782e3b3b004e8cb94a88621a22cc3c2f33e5b90 upstream. + +Commit 30dd3478c3cd ("ocfs2: correctly use ocfs2_find_next_zero_bit()") +introduced an issue, the ocfs2_sync_local_to_main() ignores the last +contiguous free bits, which causes an OCFS2 volume to lose the last free +clusters of LA window during the release routine. + +Please note, because commit dfe6c5692fb5 ("ocfs2: fix the la space leak +when unmounting an ocfs2 volume") was reverted, this commit is a +replacement fix for commit dfe6c5692fb5. + +Link: https://lkml.kernel.org/r/20241205104835.18223-3-heming.zhao@suse.com +Fixes: 30dd3478c3cd ("ocfs2: correctly use ocfs2_find_next_zero_bit()") +Signed-off-by: Heming Zhao +Suggested-by: Joseph Qi +Reviewed-by: Joseph Qi +Cc: Mark Fasheh +Cc: Joel Becker +Cc: Junxiao Bi +Cc: Changwei Ge +Cc: Jun Piao +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + fs/ocfs2/localalloc.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +--- a/fs/ocfs2/localalloc.c ++++ b/fs/ocfs2/localalloc.c +@@ -971,9 +971,9 @@ static int ocfs2_sync_local_to_main(stru + start = count = 0; + left = le32_to_cpu(alloc->id1.bitmap1.i_total); + +- while ((bit_off = ocfs2_find_next_zero_bit(bitmap, left, start)) < +- left) { +- if (bit_off == start) { ++ while (1) { ++ bit_off = ocfs2_find_next_zero_bit(bitmap, left, start); ++ if ((bit_off < left) && (bit_off == start)) { + count++; + start++; + continue; +@@ -998,6 +998,8 @@ static int ocfs2_sync_local_to_main(stru + } + } + ++ if (bit_off >= left) ++ break; + count = 1; + start = bit_off + 1; + } diff --git a/queue-6.12/of-address-preserve-the-flags-portion-on-1-1-dma-ranges-mapping.patch b/queue-6.12/of-address-preserve-the-flags-portion-on-1-1-dma-ranges-mapping.patch new file mode 100644 index 00000000000..2707344752a --- /dev/null +++ b/queue-6.12/of-address-preserve-the-flags-portion-on-1-1-dma-ranges-mapping.patch @@ -0,0 +1,45 @@ +From 7f05e20b989ac33c9c0f8c2028ec0a566493548f Mon Sep 17 00:00:00 2001 +From: Andrea della Porta +Date: Sun, 24 Nov 2024 11:05:37 +0100 +Subject: of: address: Preserve the flags portion on 1:1 dma-ranges mapping + +From: Andrea della Porta + +commit 7f05e20b989ac33c9c0f8c2028ec0a566493548f upstream. + +A missing or empty dma-ranges in a DT node implies a 1:1 mapping for dma +translations. In this specific case, the current behaviour is to zero out +the entire specifier so that the translation could be carried on as an +offset from zero. This includes address specifier that has flags (e.g. +PCI ranges). + +Once the flags portion has been zeroed, the translation chain is broken +since the mapping functions will check the upcoming address specifier +against mismatching flags, always failing the 1:1 mapping and its entire +purpose of always succeeding. + +Set to zero only the address portion while passing the flags through. + +Fixes: dbbdee94734b ("of/address: Merge all of the bus translation code") +Cc: stable@vger.kernel.org +Signed-off-by: Andrea della Porta +Tested-by: Herve Codina +Link: https://lore.kernel.org/r/e51ae57874e58a9b349c35e2e877425ebc075d7a.1732441813.git.andrea.porta@suse.com +Signed-off-by: Rob Herring (Arm) +Signed-off-by: Greg Kroah-Hartman +--- + drivers/of/address.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/of/address.c ++++ b/drivers/of/address.c +@@ -455,7 +455,8 @@ static int of_translate_one(struct devic + } + if (ranges == NULL || rlen == 0) { + offset = of_read_number(addr, na); +- memset(addr, 0, pna * 4); ++ /* set address to zero, pass flags through */ ++ memset(addr + pbus->flag_cells, 0, (pna - pbus->flag_cells) * 4); + pr_debug("empty ranges; 1:1 translation\n"); + goto finish; + } diff --git a/queue-6.12/of-fix-error-path-in-of_parse_phandle_with_args_map.patch b/queue-6.12/of-fix-error-path-in-of_parse_phandle_with_args_map.patch new file mode 100644 index 00000000000..c9f119824d0 --- /dev/null +++ b/queue-6.12/of-fix-error-path-in-of_parse_phandle_with_args_map.patch @@ -0,0 +1,72 @@ +From d7dfa7fde63dde4d2ec0083133efe2c6686c03ff Mon Sep 17 00:00:00 2001 +From: Herve Codina +Date: Mon, 2 Dec 2024 17:58:19 +0100 +Subject: of: Fix error path in of_parse_phandle_with_args_map() + +From: Herve Codina + +commit d7dfa7fde63dde4d2ec0083133efe2c6686c03ff upstream. + +The current code uses some 'goto put;' to cancel the parsing operation +and can lead to a return code value of 0 even on error cases. + +Indeed, some goto calls are done from a loop without setting the ret +value explicitly before the goto call and so the ret value can be set to +0 due to operation done in previous loop iteration. For instance match +can be set to 0 in the previous loop iteration (leading to a new +iteration) but ret can also be set to 0 it the of_property_read_u32() +call succeed. In that case if no match are found or if an error is +detected the new iteration, the return value can be wrongly 0. + +Avoid those cases setting the ret value explicitly before the goto +calls. + +Fixes: bd6f2fd5a1d5 ("of: Support parsing phandle argument lists through a nexus node") +Cc: stable@vger.kernel.org +Signed-off-by: Herve Codina +Link: https://lore.kernel.org/r/20241202165819.158681-1-herve.codina@bootlin.com +Signed-off-by: Rob Herring (Arm) +Signed-off-by: Greg Kroah-Hartman +--- + drivers/of/base.c | 15 ++++++++++----- + 1 file changed, 10 insertions(+), 5 deletions(-) + +--- a/drivers/of/base.c ++++ b/drivers/of/base.c +@@ -1455,8 +1455,10 @@ int of_parse_phandle_with_args_map(const + map_len--; + + /* Check if not found */ +- if (!new) ++ if (!new) { ++ ret = -EINVAL; + goto put; ++ } + + if (!of_device_is_available(new)) + match = 0; +@@ -1466,17 +1468,20 @@ int of_parse_phandle_with_args_map(const + goto put; + + /* Check for malformed properties */ +- if (WARN_ON(new_size > MAX_PHANDLE_ARGS)) +- goto put; +- if (map_len < new_size) ++ if (WARN_ON(new_size > MAX_PHANDLE_ARGS) || ++ map_len < new_size) { ++ ret = -EINVAL; + goto put; ++ } + + /* Move forward by new node's #-cells amount */ + map += new_size; + map_len -= new_size; + } +- if (!match) ++ if (!match) { ++ ret = -ENOENT; + goto put; ++ } + + /* Get the -map-pass-thru property (optional) */ + pass = of_get_property(cur, pass_name, NULL); diff --git a/queue-6.12/of-fix-refcount-leakage-for-of-node-returned-by-__of_get_dma_parent.patch b/queue-6.12/of-fix-refcount-leakage-for-of-node-returned-by-__of_get_dma_parent.patch new file mode 100644 index 00000000000..ccb9049042b --- /dev/null +++ b/queue-6.12/of-fix-refcount-leakage-for-of-node-returned-by-__of_get_dma_parent.patch @@ -0,0 +1,36 @@ +From 5d009e024056ded20c5bb1583146b833b23bbd5a Mon Sep 17 00:00:00 2001 +From: Zijun Hu +Date: Fri, 6 Dec 2024 08:52:30 +0800 +Subject: of: Fix refcount leakage for OF node returned by __of_get_dma_parent() + +From: Zijun Hu + +commit 5d009e024056ded20c5bb1583146b833b23bbd5a upstream. + +__of_get_dma_parent() returns OF device node @args.np, but the node's +refcount is increased twice, by both of_parse_phandle_with_args() and +of_node_get(), so causes refcount leakage for the node. + +Fix by directly returning the node got by of_parse_phandle_with_args(). + +Fixes: f83a6e5dea6c ("of: address: Add support for the parent DMA bus") +Cc: stable@vger.kernel.org +Signed-off-by: Zijun Hu +Link: https://lore.kernel.org/r/20241206-of_core_fix-v1-4-dc28ed56bec3@quicinc.com +Signed-off-by: Rob Herring (Arm) +Signed-off-by: Greg Kroah-Hartman +--- + drivers/of/address.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/of/address.c ++++ b/drivers/of/address.c +@@ -616,7 +616,7 @@ struct device_node *__of_get_dma_parent( + if (ret < 0) + return of_get_parent(np); + +- return of_node_get(args.np); ++ return args.np; + } + #endif + diff --git a/queue-6.12/of-irq-fix-interrupt-map-cell-length-check-in-of_irq_parse_imap_parent.patch b/queue-6.12/of-irq-fix-interrupt-map-cell-length-check-in-of_irq_parse_imap_parent.patch new file mode 100644 index 00000000000..ea8f8b54287 --- /dev/null +++ b/queue-6.12/of-irq-fix-interrupt-map-cell-length-check-in-of_irq_parse_imap_parent.patch @@ -0,0 +1,37 @@ +From fec3edc47d5cfc2dd296a5141df887bf567944db Mon Sep 17 00:00:00 2001 +From: Zijun Hu +Date: Mon, 9 Dec 2024 21:24:59 +0800 +Subject: of/irq: Fix interrupt-map cell length check in of_irq_parse_imap_parent() + +From: Zijun Hu + +commit fec3edc47d5cfc2dd296a5141df887bf567944db upstream. + +On a malformed interrupt-map property which is shorter than expected by +1 cell, we may read bogus data past the end of the property instead of +returning an error in of_irq_parse_imap_parent(). + +Decrement the remaining length when skipping over the interrupt parent +phandle cell. + +Fixes: 935df1bd40d4 ("of/irq: Factor out parsing of interrupt-map parent phandle+args from of_irq_parse_raw()") +Cc: stable@vger.kernel.org +Signed-off-by: Zijun Hu +Link: https://lore.kernel.org/r/20241209-of_irq_fix-v1-1-782f1419c8a1@quicinc.com +[rh: reword commit msg] +Signed-off-by: Rob Herring (Arm) +Signed-off-by: Greg Kroah-Hartman +--- + drivers/of/irq.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/of/irq.c ++++ b/drivers/of/irq.c +@@ -111,6 +111,7 @@ const __be32 *of_irq_parse_imap_parent(c + else + np = of_find_node_by_phandle(be32_to_cpup(imap)); + imap++; ++ len--; + + /* Check if not found */ + if (!np) { diff --git a/queue-6.12/of-irq-fix-using-uninitialized-variable-addr_len-in-api-of_irq_parse_one.patch b/queue-6.12/of-irq-fix-using-uninitialized-variable-addr_len-in-api-of_irq_parse_one.patch new file mode 100644 index 00000000000..f22d0de00ff --- /dev/null +++ b/queue-6.12/of-irq-fix-using-uninitialized-variable-addr_len-in-api-of_irq_parse_one.patch @@ -0,0 +1,47 @@ +From 0f7ca6f69354e0c3923bbc28c92d0ecab4d50a3e Mon Sep 17 00:00:00 2001 +From: Zijun Hu +Date: Mon, 9 Dec 2024 21:25:02 +0800 +Subject: of/irq: Fix using uninitialized variable @addr_len in API of_irq_parse_one() + +From: Zijun Hu + +commit 0f7ca6f69354e0c3923bbc28c92d0ecab4d50a3e upstream. + +of_irq_parse_one() may use uninitialized variable @addr_len as shown below: + +// @addr_len is uninitialized +int addr_len; + +// This operation does not touch @addr_len if it fails. +addr = of_get_property(device, "reg", &addr_len); + +// Use uninitialized @addr_len if the operation fails. +if (addr_len > sizeof(addr_buf)) + addr_len = sizeof(addr_buf); + +// Check the operation result here. +if (addr) + memcpy(addr_buf, addr, addr_len); + +Fix by initializing @addr_len before the operation. + +Fixes: b739dffa5d57 ("of/irq: Prevent device address out-of-bounds read in interrupt map walk") +Cc: stable@vger.kernel.org +Signed-off-by: Zijun Hu +Link: https://lore.kernel.org/r/20241209-of_irq_fix-v1-4-782f1419c8a1@quicinc.com +Signed-off-by: Rob Herring (Arm) +Signed-off-by: Greg Kroah-Hartman +--- + drivers/of/irq.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/of/irq.c ++++ b/drivers/of/irq.c +@@ -355,6 +355,7 @@ int of_irq_parse_one(struct device_node + return of_irq_parse_oldworld(device, index, out_irq); + + /* Get the reg property (if any) */ ++ addr_len = 0; + addr = of_get_property(device, "reg", &addr_len); + + /* Prevent out-of-bounds read in case of longer interrupt parent address size */ diff --git a/queue-6.12/of-property-fw_devlink-do-not-use-interrupt-parent-directly.patch b/queue-6.12/of-property-fw_devlink-do-not-use-interrupt-parent-directly.patch new file mode 100644 index 00000000000..ca1a56f9c70 --- /dev/null +++ b/queue-6.12/of-property-fw_devlink-do-not-use-interrupt-parent-directly.patch @@ -0,0 +1,65 @@ +From bc7acc0bd0f94c26bc0defc902311794a3d0fae9 Mon Sep 17 00:00:00 2001 +From: Samuel Holland +Date: Wed, 20 Nov 2024 15:31:16 -0800 +Subject: of: property: fw_devlink: Do not use interrupt-parent directly + +From: Samuel Holland + +commit bc7acc0bd0f94c26bc0defc902311794a3d0fae9 upstream. + +commit 7f00be96f125 ("of: property: Add device link support for +interrupt-parent, dmas and -gpio(s)") started adding device links for +the interrupt-parent property. commit 4104ca776ba3 ("of: property: Add +fw_devlink support for interrupts") and commit f265f06af194 ("of: +property: Fix fw_devlink handling of interrupts/interrupts-extended") +later added full support for parsing the interrupts and +interrupts-extended properties, which includes looking up the node of +the parent domain. This made the handler for the interrupt-parent +property redundant. + +In fact, creating device links based solely on interrupt-parent is +problematic, because it can create spurious cycles. A node may have +this property without itself being an interrupt controller or consumer. +For example, this property is often present in the root node or a /soc +bus node to set the default interrupt parent for child nodes. However, +it is incorrect for the bus to depend on the interrupt controller, as +some of the bus's children may not be interrupt consumers at all or may +have a different interrupt parent. + +Resolving these spurious dependency cycles can cause an incorrect probe +order for interrupt controller drivers. This was observed on a RISC-V +system with both an APLIC and IMSIC under /soc, where interrupt-parent +in /soc points to the APLIC, and the APLIC msi-parent points to the +IMSIC. fw_devlink found three dependency cycles and attempted to probe +the APLIC before the IMSIC. After applying this patch, there were no +dependency cycles and the probe order was correct. + +Acked-by: Marc Zyngier +Cc: stable@vger.kernel.org +Fixes: 4104ca776ba3 ("of: property: Add fw_devlink support for interrupts") +Signed-off-by: Samuel Holland +Link: https://lore.kernel.org/r/20241120233124.3649382-1-samuel.holland@sifive.com +Signed-off-by: Rob Herring (Arm) +Signed-off-by: Greg Kroah-Hartman +--- + drivers/of/property.c | 2 -- + 1 file changed, 2 deletions(-) + +--- a/drivers/of/property.c ++++ b/drivers/of/property.c +@@ -1213,7 +1213,6 @@ DEFINE_SIMPLE_PROP(iommus, "iommus", "#i + DEFINE_SIMPLE_PROP(mboxes, "mboxes", "#mbox-cells") + DEFINE_SIMPLE_PROP(io_channels, "io-channels", "#io-channel-cells") + DEFINE_SIMPLE_PROP(io_backends, "io-backends", "#io-backend-cells") +-DEFINE_SIMPLE_PROP(interrupt_parent, "interrupt-parent", NULL) + DEFINE_SIMPLE_PROP(dmas, "dmas", "#dma-cells") + DEFINE_SIMPLE_PROP(power_domains, "power-domains", "#power-domain-cells") + DEFINE_SIMPLE_PROP(hwlocks, "hwlocks", "#hwlock-cells") +@@ -1359,7 +1358,6 @@ static const struct supplier_bindings of + { .parse_prop = parse_mboxes, }, + { .parse_prop = parse_io_channels, }, + { .parse_prop = parse_io_backends, }, +- { .parse_prop = parse_interrupt_parent, }, + { .parse_prop = parse_dmas, .optional = true, }, + { .parse_prop = parse_power_domains, }, + { .parse_prop = parse_hwlocks, }, diff --git a/queue-6.12/series b/queue-6.12/series index 7e7eb47683a..db4f3cf19bb 100644 --- a/queue-6.12/series +++ b/queue-6.12/series @@ -135,3 +135,24 @@ selftests-memfd-run-sysctl-tests-when-pid-namespace-support-is-enabled.patch selftests-bpf-use-asm-constraint-m-for-loongarch.patch io_uring-fix-registered-ring-file-refcount-leak.patch io_uring-check-if-iowq-is-killed-before-queuing.patch +nfs-pnfs-fix-a-live-lock-between-recalled-layouts-and-layoutget.patch +kvm-svm-allow-guest-writes-to-set-msr_amd64_de_cfg-bits.patch +of-irq-fix-interrupt-map-cell-length-check-in-of_irq_parse_imap_parent.patch +of-irq-fix-using-uninitialized-variable-addr_len-in-api-of_irq_parse_one.patch +ocfs2-fix-the-space-leak-in-la-when-releasing-la.patch +nilfs2-fix-buffer-head-leaks-in-calls-to-truncate_inode_pages.patch +nilfs2-prevent-use-of-deleted-inode.patch +udmabuf-fix-racy-memfd-sealing-check.patch +udmabuf-also-check-for-f_seal_future_write.patch +of-property-fw_devlink-do-not-use-interrupt-parent-directly.patch +of-address-preserve-the-flags-portion-on-1-1-dma-ranges-mapping.patch +of-fix-error-path-in-of_parse_phandle_with_args_map.patch +of-fix-refcount-leakage-for-of-node-returned-by-__of_get_dma_parent.patch +ceph-give-up-on-paths-longer-than-path_max.patch +ceph-validate-snapdirname-option-length-when-mounting.patch +ceph-improve-error-handling-and-short-overflow-read-logic-in-__ceph_sync_read.patch +ceph-fix-memory-leaks-in-__ceph_sync_read.patch +ceph-fix-memory-leak-in-ceph_direct_read_write.patch +mm-use-aligned-address-in-clear_gigantic_page.patch +mm-use-aligned-address-in-copy_user_gigantic_page.patch +mm-shmem-fix-shmemhugepages-at-swapout.patch diff --git a/queue-6.12/udmabuf-also-check-for-f_seal_future_write.patch b/queue-6.12/udmabuf-also-check-for-f_seal_future_write.patch new file mode 100644 index 00000000000..257e71b2468 --- /dev/null +++ b/queue-6.12/udmabuf-also-check-for-f_seal_future_write.patch @@ -0,0 +1,36 @@ +From 0a16e24e34f28210f68195259456c73462518597 Mon Sep 17 00:00:00 2001 +From: Jann Horn +Date: Wed, 4 Dec 2024 17:26:20 +0100 +Subject: udmabuf: also check for F_SEAL_FUTURE_WRITE + +From: Jann Horn + +commit 0a16e24e34f28210f68195259456c73462518597 upstream. + +When F_SEAL_FUTURE_WRITE was introduced, it was overlooked that udmabuf +must reject memfds with this flag, just like ones with F_SEAL_WRITE. +Fix it by adding F_SEAL_FUTURE_WRITE to SEALS_DENIED. + +Fixes: ab3948f58ff8 ("mm/memfd: add an F_SEAL_FUTURE_WRITE seal to memfd") +Cc: stable@vger.kernel.org +Acked-by: Vivek Kasireddy +Signed-off-by: Jann Horn +Reviewed-by: Joel Fernandes (Google) +Signed-off-by: Vivek Kasireddy +Link: https://patchwork.freedesktop.org/patch/msgid/20241204-udmabuf-fixes-v2-2-23887289de1c@google.com +Signed-off-by: Greg Kroah-Hartman +--- + drivers/dma-buf/udmabuf.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/dma-buf/udmabuf.c ++++ b/drivers/dma-buf/udmabuf.c +@@ -256,7 +256,7 @@ static const struct dma_buf_ops udmabuf_ + }; + + #define SEALS_WANTED (F_SEAL_SHRINK) +-#define SEALS_DENIED (F_SEAL_WRITE) ++#define SEALS_DENIED (F_SEAL_WRITE|F_SEAL_FUTURE_WRITE) + + static int check_memfd_seals(struct file *memfd) + { diff --git a/queue-6.12/udmabuf-fix-racy-memfd-sealing-check.patch b/queue-6.12/udmabuf-fix-racy-memfd-sealing-check.patch new file mode 100644 index 00000000000..2b30e5a4632 --- /dev/null +++ b/queue-6.12/udmabuf-fix-racy-memfd-sealing-check.patch @@ -0,0 +1,60 @@ +From 9cb189a882738c1d28b349d4e7c6a1ef9b3d8f87 Mon Sep 17 00:00:00 2001 +From: Jann Horn +Date: Wed, 4 Dec 2024 17:26:19 +0100 +Subject: udmabuf: fix racy memfd sealing check + +From: Jann Horn + +commit 9cb189a882738c1d28b349d4e7c6a1ef9b3d8f87 upstream. + +The current check_memfd_seals() is racy: Since we first do +check_memfd_seals() and then udmabuf_pin_folios() without holding any +relevant lock across both, F_SEAL_WRITE can be set in between. +This is problematic because we can end up holding pins to pages in a +write-sealed memfd. + +Fix it using the inode lock, that's probably the easiest way. +In the future, we might want to consider moving this logic into memfd, +especially if anyone else wants to use memfd_pin_folios(). + +Reported-by: Julian Orth +Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219106 +Closes: https://lore.kernel.org/r/CAG48ez0w8HrFEZtJkfmkVKFDhE5aP7nz=obrimeTgpD+StkV9w@mail.gmail.com +Fixes: fbb0de795078 ("Add udmabuf misc device") +Cc: stable@vger.kernel.org +Signed-off-by: Jann Horn +Acked-by: Joel Fernandes (Google) +Acked-by: Vivek Kasireddy +Signed-off-by: Vivek Kasireddy +Link: https://patchwork.freedesktop.org/patch/msgid/20241204-udmabuf-fixes-v2-1-23887289de1c@google.com +Signed-off-by: Greg Kroah-Hartman +--- + drivers/dma-buf/udmabuf.c | 13 +++++++++---- + 1 file changed, 9 insertions(+), 4 deletions(-) + +--- a/drivers/dma-buf/udmabuf.c ++++ b/drivers/dma-buf/udmabuf.c +@@ -394,14 +394,19 @@ static long udmabuf_create(struct miscde + goto err; + } + ++ /* ++ * Take the inode lock to protect against concurrent ++ * memfd_add_seals(), which takes this lock in write mode. ++ */ ++ inode_lock_shared(file_inode(memfd)); + ret = check_memfd_seals(memfd); +- if (ret < 0) { +- fput(memfd); +- goto err; +- } ++ if (ret) ++ goto out_unlock; + + ret = udmabuf_pin_folios(ubuf, memfd, list[i].offset, + list[i].size); ++out_unlock: ++ inode_unlock_shared(file_inode(memfd)); + fput(memfd); + if (ret) + goto err;