From: Greg Kroah-Hartman Date: Mon, 12 Jan 2026 11:09:03 +0000 (+0100) Subject: 6.6-stable patches X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=8a13fb6923662e8dc12d2a65188eec57a6f02125;p=thirdparty%2Fkernel%2Fstable-queue.git 6.6-stable patches added patches: ksm-use-range-walk-function-to-jump-over-holes-in-scan_get_next_rmap_item.patch net-add-locking-to-protect-skb-dev-access-in-ip_output.patch nfsd-convert-to-new-timestamp-accessors.patch nfsd-fix-nfsv3-atomicity-bugs-in-nfsd_setattr.patch nfsd-nfsv4-file-creation-neglects-setting-acl.patch nfsd-set-security-label-during-create-operations.patch --- diff --git a/queue-6.6/ksm-use-range-walk-function-to-jump-over-holes-in-scan_get_next_rmap_item.patch b/queue-6.6/ksm-use-range-walk-function-to-jump-over-holes-in-scan_get_next_rmap_item.patch new file mode 100644 index 0000000000..dad7ba0888 --- /dev/null +++ b/queue-6.6/ksm-use-range-walk-function-to-jump-over-holes-in-scan_get_next_rmap_item.patch @@ -0,0 +1,234 @@ +From f5548c318d6520d4fa3c5ed6003eeb710763cbc5 Mon Sep 17 00:00:00 2001 +From: Pedro Demarchi Gomes +Date: Wed, 22 Oct 2025 12:30:59 -0300 +Subject: ksm: use range-walk function to jump over holes in scan_get_next_rmap_item + +From: Pedro Demarchi Gomes + +commit f5548c318d6520d4fa3c5ed6003eeb710763cbc5 upstream. + +Currently, scan_get_next_rmap_item() walks every page address in a VMA to +locate mergeable pages. This becomes highly inefficient when scanning +large virtual memory areas that contain mostly unmapped regions, causing +ksmd to use large amount of cpu without deduplicating much pages. + +This patch replaces the per-address lookup with a range walk using +walk_page_range(). The range walker allows KSM to skip over entire +unmapped holes in a VMA, avoiding unnecessary lookups. This problem was +previously discussed in [1]. + +Consider the following test program which creates a 32 TiB mapping in the +virtual address space but only populates a single page: + +#include +#include +#include + +/* 32 TiB */ +const size_t size = 32ul * 1024 * 1024 * 1024 * 1024; + +int main() { + char *area = mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_NORESERVE | MAP_PRIVATE | MAP_ANON, -1, 0); + + if (area == MAP_FAILED) { + perror("mmap() failed\n"); + return -1; + } + + /* Populate a single page such that we get an anon_vma. */ + *area = 0; + + /* Enable KSM. */ + madvise(area, size, MADV_MERGEABLE); + pause(); + return 0; +} + +$ ./ksm-sparse & +$ echo 1 > /sys/kernel/mm/ksm/run + +Without this patch ksmd uses 100% of the cpu for a long time (more then 1 +hour in my test machine) scanning all the 32 TiB virtual address space +that contain only one mapped page. This makes ksmd essentially deadlocked +not able to deduplicate anything of value. With this patch ksmd walks +only the one mapped page and skips the rest of the 32 TiB virtual address +space, making the scan fast using little cpu. + +Link: https://lkml.kernel.org/r/20251023035841.41406-1-pedrodemargomes@gmail.com +Link: https://lkml.kernel.org/r/20251022153059.22763-1-pedrodemargomes@gmail.com +Link: https://lore.kernel.org/linux-mm/423de7a3-1c62-4e72-8e79-19a6413e420c@redhat.com/ [1] +Fixes: 31dbd01f3143 ("ksm: Kernel SamePage Merging") +Signed-off-by: Pedro Demarchi Gomes +Co-developed-by: David Hildenbrand +Signed-off-by: David Hildenbrand +Reported-by: craftfever +Closes: https://lkml.kernel.org/r/020cf8de6e773bb78ba7614ef250129f11a63781@murena.io +Suggested-by: David Hildenbrand +Acked-by: David Hildenbrand +Cc: Chengming Zhou +Cc: xu xin +Cc: +Signed-off-by: Andrew Morton +[ change page to folios ] +Signed-off-by: Pedro Demarchi Gomes +Signed-off-by: Greg Kroah-Hartman +--- + mm/ksm.c | 126 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++------- + 1 file changed, 113 insertions(+), 13 deletions(-) + +--- a/mm/ksm.c ++++ b/mm/ksm.c +@@ -2304,6 +2304,95 @@ static struct ksm_rmap_item *get_next_rm + return rmap_item; + } + ++struct ksm_next_page_arg { ++ struct folio *folio; ++ struct page *page; ++ unsigned long addr; ++}; ++ ++static int ksm_next_page_pmd_entry(pmd_t *pmdp, unsigned long addr, unsigned long end, ++ struct mm_walk *walk) ++{ ++ struct ksm_next_page_arg *private = walk->private; ++ struct vm_area_struct *vma = walk->vma; ++ pte_t *start_ptep = NULL, *ptep, pte; ++ struct mm_struct *mm = walk->mm; ++ struct folio *folio; ++ struct page *page; ++ spinlock_t *ptl; ++ pmd_t pmd; ++ ++ if (ksm_test_exit(mm)) ++ return 0; ++ ++ cond_resched(); ++ ++ pmd = pmdp_get_lockless(pmdp); ++ if (!pmd_present(pmd)) ++ return 0; ++ ++ if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && pmd_leaf(pmd)) { ++ ptl = pmd_lock(mm, pmdp); ++ pmd = pmdp_get(pmdp); ++ ++ if (!pmd_present(pmd)) { ++ goto not_found_unlock; ++ } else if (pmd_leaf(pmd)) { ++ page = vm_normal_page_pmd(vma, addr, pmd); ++ if (!page) ++ goto not_found_unlock; ++ folio = page_folio(page); ++ ++ if (folio_is_zone_device(folio) || !folio_test_anon(folio)) ++ goto not_found_unlock; ++ ++ page += ((addr & (PMD_SIZE - 1)) >> PAGE_SHIFT); ++ goto found_unlock; ++ } ++ spin_unlock(ptl); ++ } ++ ++ start_ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl); ++ if (!start_ptep) ++ return 0; ++ ++ for (ptep = start_ptep; addr < end; ptep++, addr += PAGE_SIZE) { ++ pte = ptep_get(ptep); ++ ++ if (!pte_present(pte)) ++ continue; ++ ++ page = vm_normal_page(vma, addr, pte); ++ if (!page) ++ continue; ++ folio = page_folio(page); ++ ++ if (folio_is_zone_device(folio) || !folio_test_anon(folio)) ++ continue; ++ goto found_unlock; ++ } ++ ++not_found_unlock: ++ spin_unlock(ptl); ++ if (start_ptep) ++ pte_unmap(start_ptep); ++ return 0; ++found_unlock: ++ folio_get(folio); ++ spin_unlock(ptl); ++ if (start_ptep) ++ pte_unmap(start_ptep); ++ private->page = page; ++ private->folio = folio; ++ private->addr = addr; ++ return 1; ++} ++ ++static struct mm_walk_ops ksm_next_page_ops = { ++ .pmd_entry = ksm_next_page_pmd_entry, ++ .walk_lock = PGWALK_RDLOCK, ++}; ++ + static struct ksm_rmap_item *scan_get_next_rmap_item(struct page **page) + { + struct mm_struct *mm; +@@ -2390,32 +2479,43 @@ next_mm: + ksm_scan.address = vma->vm_end; + + while (ksm_scan.address < vma->vm_end) { ++ struct ksm_next_page_arg ksm_next_page_arg; ++ struct page *tmp_page = NULL; ++ struct folio *folio; ++ + if (ksm_test_exit(mm)) + break; +- *page = follow_page(vma, ksm_scan.address, FOLL_GET); +- if (IS_ERR_OR_NULL(*page)) { +- ksm_scan.address += PAGE_SIZE; +- cond_resched(); +- continue; ++ ++ int found; ++ ++ found = walk_page_range_vma(vma, ksm_scan.address, ++ vma->vm_end, ++ &ksm_next_page_ops, ++ &ksm_next_page_arg); ++ ++ if (found > 0) { ++ folio = ksm_next_page_arg.folio; ++ tmp_page = ksm_next_page_arg.page; ++ ksm_scan.address = ksm_next_page_arg.addr; ++ } else { ++ VM_WARN_ON_ONCE(found < 0); ++ ksm_scan.address = vma->vm_end - PAGE_SIZE; + } +- if (is_zone_device_page(*page)) +- goto next_page; +- if (PageAnon(*page)) { +- flush_anon_page(vma, *page, ksm_scan.address); +- flush_dcache_page(*page); ++ if (tmp_page) { ++ flush_anon_page(vma, tmp_page, ksm_scan.address); ++ flush_dcache_page(tmp_page); + rmap_item = get_next_rmap_item(mm_slot, + ksm_scan.rmap_list, ksm_scan.address); + if (rmap_item) { + ksm_scan.rmap_list = + &rmap_item->rmap_list; + ksm_scan.address += PAGE_SIZE; ++ *page = tmp_page; + } else +- put_page(*page); ++ folio_put(folio); + mmap_read_unlock(mm); + return rmap_item; + } +-next_page: +- put_page(*page); + ksm_scan.address += PAGE_SIZE; + cond_resched(); + } diff --git a/queue-6.6/net-add-locking-to-protect-skb-dev-access-in-ip_output.patch b/queue-6.6/net-add-locking-to-protect-skb-dev-access-in-ip_output.patch new file mode 100644 index 0000000000..d0e9cdc843 --- /dev/null +++ b/queue-6.6/net-add-locking-to-protect-skb-dev-access-in-ip_output.patch @@ -0,0 +1,113 @@ +From 1dbf1d590d10a6d1978e8184f8dfe20af22d680a Mon Sep 17 00:00:00 2001 +From: Sharath Chandra Vurukala +Date: Wed, 30 Jul 2025 16:21:18 +0530 +Subject: net: Add locking to protect skb->dev access in ip_output + +From: Sharath Chandra Vurukala + +commit 1dbf1d590d10a6d1978e8184f8dfe20af22d680a upstream. + +In ip_output() skb->dev is updated from the skb_dst(skb)->dev +this can become invalid when the interface is unregistered and freed, + +Introduced new skb_dst_dev_rcu() function to be used instead of +skb_dst_dev() within rcu_locks in ip_output.This will ensure that +all the skb's associated with the dev being deregistered will +be transnmitted out first, before freeing the dev. + +Given that ip_output() is called within an rcu_read_lock() +critical section or from a bottom-half context, it is safe to introduce +an RCU read-side critical section within it. + +Multiple panic call stacks were observed when UL traffic was run +in concurrency with device deregistration from different functions, +pasting one sample for reference. + +[496733.627565][T13385] Call trace: +[496733.627570][T13385] bpf_prog_ce7c9180c3b128ea_cgroupskb_egres+0x24c/0x7f0 +[496733.627581][T13385] __cgroup_bpf_run_filter_skb+0x128/0x498 +[496733.627595][T13385] ip_finish_output+0xa4/0xf4 +[496733.627605][T13385] ip_output+0x100/0x1a0 +[496733.627613][T13385] ip_send_skb+0x68/0x100 +[496733.627618][T13385] udp_send_skb+0x1c4/0x384 +[496733.627625][T13385] udp_sendmsg+0x7b0/0x898 +[496733.627631][T13385] inet_sendmsg+0x5c/0x7c +[496733.627639][T13385] __sys_sendto+0x174/0x1e4 +[496733.627647][T13385] __arm64_sys_sendto+0x28/0x3c +[496733.627653][T13385] invoke_syscall+0x58/0x11c +[496733.627662][T13385] el0_svc_common+0x88/0xf4 +[496733.627669][T13385] do_el0_svc+0x2c/0xb0 +[496733.627676][T13385] el0_svc+0x2c/0xa4 +[496733.627683][T13385] el0t_64_sync_handler+0x68/0xb4 +[496733.627689][T13385] el0t_64_sync+0x1a4/0x1a8 + +Changes in v3: +- Replaced WARN_ON() with WARN_ON_ONCE(), as suggested by Willem de Bruijn. +- Dropped legacy lines mistakenly pulled in from an outdated branch. + +Changes in v2: +- Addressed review comments from Eric Dumazet +- Used READ_ONCE() to prevent potential load/store tearing +- Added skb_dst_dev_rcu() and used along with rcu_read_lock() in ip_output + +Signed-off-by: Sharath Chandra Vurukala +Reviewed-by: Eric Dumazet +Link: https://patch.msgid.link/20250730105118.GA26100@hu-sharathv-hyd.qualcomm.com +Signed-off-by: Jakub Kicinski +[ Keerthana: Backported the patch to v6.6.y ] +Signed-off-by: Keerthana K +Signed-off-by: Greg Kroah-Hartman +--- + include/net/dst.h | 12 ++++++++++++ + net/ipv4/ip_output.c | 15 ++++++++++----- + 2 files changed, 22 insertions(+), 5 deletions(-) + +--- a/include/net/dst.h ++++ b/include/net/dst.h +@@ -569,6 +569,18 @@ static inline void skb_dst_update_pmtu_n + dst->ops->update_pmtu(dst, NULL, skb, mtu, false); + } + ++static inline struct net_device *dst_dev_rcu(const struct dst_entry *dst) ++{ ++ /* In the future, use rcu_dereference(dst->dev) */ ++ WARN_ON_ONCE(!rcu_read_lock_held()); ++ return READ_ONCE(dst->dev); ++} ++ ++static inline struct net_device *skb_dst_dev_rcu(const struct sk_buff *skb) ++{ ++ return dst_dev_rcu(skb_dst(skb)); ++} ++ + struct dst_entry *dst_blackhole_check(struct dst_entry *dst, u32 cookie); + void dst_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu, bool confirm_neigh); +--- a/net/ipv4/ip_output.c ++++ b/net/ipv4/ip_output.c +@@ -425,15 +425,20 @@ int ip_mc_output(struct net *net, struct + + int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb) + { +- struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev; ++ struct net_device *dev, *indev = skb->dev; ++ int ret_val; + ++ rcu_read_lock(); ++ dev = skb_dst_dev_rcu(skb); + skb->dev = dev; + skb->protocol = htons(ETH_P_IP); + +- return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, +- net, sk, skb, indev, dev, +- ip_finish_output, +- !(IPCB(skb)->flags & IPSKB_REROUTED)); ++ ret_val = NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, ++ net, sk, skb, indev, dev, ++ ip_finish_output, ++ !(IPCB(skb)->flags & IPSKB_REROUTED)); ++ rcu_read_unlock(); ++ return ret_val; + } + EXPORT_SYMBOL(ip_output); + diff --git a/queue-6.6/nfsd-convert-to-new-timestamp-accessors.patch b/queue-6.6/nfsd-convert-to-new-timestamp-accessors.patch new file mode 100644 index 0000000000..fd936498af --- /dev/null +++ b/queue-6.6/nfsd-convert-to-new-timestamp-accessors.patch @@ -0,0 +1,101 @@ +From 11fec9b9fb04fd1b3330a3b91ab9dcfa81ad5ad3 Mon Sep 17 00:00:00 2001 +From: Jeff Layton +Date: Wed, 4 Oct 2023 14:52:37 -0400 +Subject: nfsd: convert to new timestamp accessors + +From: Jeff Layton + +commit 11fec9b9fb04fd1b3330a3b91ab9dcfa81ad5ad3 upstream. + +Convert to using the new inode timestamp accessor functions. + +Signed-off-by: Jeff Layton +Link: https://lore.kernel.org/r/20231004185347.80880-50-jlayton@kernel.org +Stable-dep-of: 24d92de9186e ("nfsd: Fix NFSv3 atomicity bugs in nfsd_setattr()") +Signed-off-by: Christian Brauner +[ cel: d68886bae76a has already been applied ] +Signed-off-by: Chuck Lever +Signed-off-by: Greg Kroah-Hartman +--- + fs/nfsd/blocklayout.c | 3 ++- + fs/nfsd/nfs3proc.c | 4 ++-- + fs/nfsd/nfs4proc.c | 8 ++++---- + fs/nfsd/nfsctl.c | 2 +- + fs/nfsd/vfs.c | 2 +- + 5 files changed, 10 insertions(+), 9 deletions(-) + +--- a/fs/nfsd/blocklayout.c ++++ b/fs/nfsd/blocklayout.c +@@ -119,11 +119,12 @@ static __be32 + nfsd4_block_commit_blocks(struct inode *inode, struct nfsd4_layoutcommit *lcp, + struct iomap *iomaps, int nr_iomaps) + { ++ struct timespec64 mtime = inode_get_mtime(inode); + struct iattr iattr = { .ia_valid = 0 }; + int error; + + if (lcp->lc_mtime.tv_nsec == UTIME_NOW || +- timespec64_compare(&lcp->lc_mtime, &inode->i_mtime) < 0) ++ timespec64_compare(&lcp->lc_mtime, &mtime) < 0) + lcp->lc_mtime = current_time(inode); + iattr.ia_valid |= ATTR_ATIME | ATTR_CTIME | ATTR_MTIME; + iattr.ia_atime = iattr.ia_ctime = iattr.ia_mtime = lcp->lc_mtime; +--- a/fs/nfsd/nfs3proc.c ++++ b/fs/nfsd/nfs3proc.c +@@ -294,8 +294,8 @@ nfsd3_create_file(struct svc_rqst *rqstp + status = nfserr_exist; + break; + case NFS3_CREATE_EXCLUSIVE: +- if (d_inode(child)->i_mtime.tv_sec == v_mtime && +- d_inode(child)->i_atime.tv_sec == v_atime && ++ if (inode_get_mtime_sec(d_inode(child)) == v_mtime && ++ inode_get_atime_sec(d_inode(child)) == v_atime && + d_inode(child)->i_size == 0) { + break; + } +--- a/fs/nfsd/nfs4proc.c ++++ b/fs/nfsd/nfs4proc.c +@@ -322,8 +322,8 @@ nfsd4_create_file(struct svc_rqst *rqstp + status = nfserr_exist; + break; + case NFS4_CREATE_EXCLUSIVE: +- if (d_inode(child)->i_mtime.tv_sec == v_mtime && +- d_inode(child)->i_atime.tv_sec == v_atime && ++ if (inode_get_mtime_sec(d_inode(child)) == v_mtime && ++ inode_get_atime_sec(d_inode(child)) == v_atime && + d_inode(child)->i_size == 0) { + open->op_created = true; + break; /* subtle */ +@@ -331,8 +331,8 @@ nfsd4_create_file(struct svc_rqst *rqstp + status = nfserr_exist; + break; + case NFS4_CREATE_EXCLUSIVE4_1: +- if (d_inode(child)->i_mtime.tv_sec == v_mtime && +- d_inode(child)->i_atime.tv_sec == v_atime && ++ if (inode_get_mtime_sec(d_inode(child)) == v_mtime && ++ inode_get_atime_sec(d_inode(child)) == v_atime && + d_inode(child)->i_size == 0) { + open->op_created = true; + goto set_attr; /* subtle */ +--- a/fs/nfsd/nfsctl.c ++++ b/fs/nfsd/nfsctl.c +@@ -1139,7 +1139,7 @@ static struct inode *nfsd_get_inode(stru + /* Following advice from simple_fill_super documentation: */ + inode->i_ino = iunique(sb, NFSD_MaxReserved); + inode->i_mode = mode; +- inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode); ++ simple_inode_init_ts(inode); + switch (mode & S_IFMT) { + case S_IFDIR: + inode->i_fop = &simple_dir_operations; +--- a/fs/nfsd/vfs.c ++++ b/fs/nfsd/vfs.c +@@ -521,7 +521,7 @@ nfsd_setattr(struct svc_rqst *rqstp, str + + nfsd_sanitize_attrs(inode, iap); + +- if (check_guard && guardtime != inode_get_ctime(inode).tv_sec) ++ if (check_guard && guardtime != inode_get_ctime_sec(inode)) + return nfserr_notsync; + + /* diff --git a/queue-6.6/nfsd-fix-nfsv3-atomicity-bugs-in-nfsd_setattr.patch b/queue-6.6/nfsd-fix-nfsv3-atomicity-bugs-in-nfsd_setattr.patch new file mode 100644 index 0000000000..7b80045984 --- /dev/null +++ b/queue-6.6/nfsd-fix-nfsv3-atomicity-bugs-in-nfsd_setattr.patch @@ -0,0 +1,206 @@ +From 24d92de9186ebc340687caf7356e1070773e67bc Mon Sep 17 00:00:00 2001 +From: Trond Myklebust +Date: Thu, 15 Feb 2024 20:24:51 -0500 +Subject: nfsd: Fix NFSv3 atomicity bugs in nfsd_setattr() + +From: Trond Myklebust + +commit 24d92de9186ebc340687caf7356e1070773e67bc upstream. + +The main point of the guarded SETATTR is to prevent races with other +WRITE and SETATTR calls. That requires that the check of the guard time +against the inode ctime be done after taking the inode lock. + +Furthermore, we need to take into account the 32-bit nature of +timestamps in NFSv3, and the possibility that files may change at a +faster rate than once a second. + +Signed-off-by: Trond Myklebust +Reviewed-by: Jeff Layton +Reviewed-by: NeilBrown +Stable-dep-of: 442d27ff09a2 ("nfsd: set security label during create operations") +Signed-off-by: Chuck Lever +Signed-off-by: Greg Kroah-Hartman +--- + fs/nfsd/nfs3proc.c | 6 ++++-- + fs/nfsd/nfs3xdr.c | 5 +---- + fs/nfsd/nfs4proc.c | 3 +-- + fs/nfsd/nfs4state.c | 2 +- + fs/nfsd/nfsproc.c | 6 +++--- + fs/nfsd/vfs.c | 20 +++++++++++++------- + fs/nfsd/vfs.h | 2 +- + fs/nfsd/xdr3.h | 2 +- + 8 files changed, 25 insertions(+), 21 deletions(-) + +--- a/fs/nfsd/nfs3proc.c ++++ b/fs/nfsd/nfs3proc.c +@@ -71,13 +71,15 @@ nfsd3_proc_setattr(struct svc_rqst *rqst + struct nfsd_attrs attrs = { + .na_iattr = &argp->attrs, + }; ++ const struct timespec64 *guardtime = NULL; + + dprintk("nfsd: SETATTR(3) %s\n", + SVCFH_fmt(&argp->fh)); + + fh_copy(&resp->fh, &argp->fh); +- resp->status = nfsd_setattr(rqstp, &resp->fh, &attrs, +- argp->check_guard, argp->guardtime); ++ if (argp->check_guard) ++ guardtime = &argp->guardtime; ++ resp->status = nfsd_setattr(rqstp, &resp->fh, &attrs, guardtime); + return rpc_success; + } + +--- a/fs/nfsd/nfs3xdr.c ++++ b/fs/nfsd/nfs3xdr.c +@@ -295,17 +295,14 @@ svcxdr_decode_sattr3(struct svc_rqst *rq + static bool + svcxdr_decode_sattrguard3(struct xdr_stream *xdr, struct nfsd3_sattrargs *args) + { +- __be32 *p; + u32 check; + + if (xdr_stream_decode_bool(xdr, &check) < 0) + return false; + if (check) { +- p = xdr_inline_decode(xdr, XDR_UNIT * 2); +- if (!p) ++ if (!svcxdr_decode_nfstime3(xdr, &args->guardtime)) + return false; + args->check_guard = 1; +- args->guardtime = be32_to_cpup(p); + } else + args->check_guard = 0; + +--- a/fs/nfsd/nfs4proc.c ++++ b/fs/nfsd/nfs4proc.c +@@ -1160,8 +1160,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, st + goto out; + save_no_wcc = cstate->current_fh.fh_no_wcc; + cstate->current_fh.fh_no_wcc = true; +- status = nfsd_setattr(rqstp, &cstate->current_fh, &attrs, +- 0, (time64_t)0); ++ status = nfsd_setattr(rqstp, &cstate->current_fh, &attrs, NULL); + cstate->current_fh.fh_no_wcc = save_no_wcc; + if (!status) + status = nfserrno(attrs.na_labelerr); +--- a/fs/nfsd/nfs4state.c ++++ b/fs/nfsd/nfs4state.c +@@ -5225,7 +5225,7 @@ nfsd4_truncate(struct svc_rqst *rqstp, s + return 0; + if (!(open->op_share_access & NFS4_SHARE_ACCESS_WRITE)) + return nfserr_inval; +- return nfsd_setattr(rqstp, fh, &attrs, 0, (time64_t)0); ++ return nfsd_setattr(rqstp, fh, &attrs, NULL); + } + + static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp, +--- a/fs/nfsd/nfsproc.c ++++ b/fs/nfsd/nfsproc.c +@@ -103,7 +103,7 @@ nfsd_proc_setattr(struct svc_rqst *rqstp + } + } + +- resp->status = nfsd_setattr(rqstp, fhp, &attrs, 0, (time64_t)0); ++ resp->status = nfsd_setattr(rqstp, fhp, &attrs, NULL); + if (resp->status != nfs_ok) + goto out; + +@@ -390,8 +390,8 @@ nfsd_proc_create(struct svc_rqst *rqstp) + */ + attr->ia_valid &= ATTR_SIZE; + if (attr->ia_valid) +- resp->status = nfsd_setattr(rqstp, newfhp, &attrs, 0, +- (time64_t)0); ++ resp->status = nfsd_setattr(rqstp, newfhp, &attrs, ++ NULL); + } + + out_unlock: +--- a/fs/nfsd/vfs.c ++++ b/fs/nfsd/vfs.c +@@ -459,7 +459,6 @@ static int __nfsd_setattr(struct dentry + * @rqstp: controlling RPC transaction + * @fhp: filehandle of target + * @attr: attributes to set +- * @check_guard: set to 1 if guardtime is a valid timestamp + * @guardtime: do not act if ctime.tv_sec does not match this timestamp + * + * This call may adjust the contents of @attr (in particular, this +@@ -471,8 +470,7 @@ static int __nfsd_setattr(struct dentry + */ + __be32 + nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, +- struct nfsd_attrs *attr, +- int check_guard, time64_t guardtime) ++ struct nfsd_attrs *attr, const struct timespec64 *guardtime) + { + struct dentry *dentry; + struct inode *inode; +@@ -521,9 +519,6 @@ nfsd_setattr(struct svc_rqst *rqstp, str + + nfsd_sanitize_attrs(inode, iap); + +- if (check_guard && guardtime != inode_get_ctime_sec(inode)) +- return nfserr_notsync; +- + /* + * The size case is special, it changes the file in addition to the + * attributes, and file systems don't expect it to be mixed with +@@ -541,6 +536,16 @@ nfsd_setattr(struct svc_rqst *rqstp, str + err = fh_fill_pre_attrs(fhp); + if (err) + goto out_unlock; ++ ++ if (guardtime) { ++ struct timespec64 ctime = inode_get_ctime(inode); ++ if ((u32)guardtime->tv_sec != (u32)ctime.tv_sec || ++ guardtime->tv_nsec != ctime.tv_nsec) { ++ err = nfserr_notsync; ++ goto out_fill_attrs; ++ } ++ } ++ + for (retries = 1;;) { + struct iattr attrs; + +@@ -568,6 +573,7 @@ nfsd_setattr(struct svc_rqst *rqstp, str + attr->na_aclerr = set_posix_acl(&nop_mnt_idmap, + dentry, ACL_TYPE_DEFAULT, + attr->na_dpacl); ++out_fill_attrs: + fh_fill_post_attrs(fhp); + out_unlock: + inode_unlock(inode); +@@ -1374,7 +1380,7 @@ nfsd_create_setattr(struct svc_rqst *rqs + * if the attributes have not changed. + */ + if (iap->ia_valid) +- status = nfsd_setattr(rqstp, resfhp, attrs, 0, (time64_t)0); ++ status = nfsd_setattr(rqstp, resfhp, attrs, NULL); + else + status = nfserrno(commit_metadata(resfhp)); + +--- a/fs/nfsd/vfs.h ++++ b/fs/nfsd/vfs.h +@@ -69,7 +69,7 @@ __be32 nfsd_lookup_dentry(struct svc_r + const char *, unsigned int, + struct svc_export **, struct dentry **); + __be32 nfsd_setattr(struct svc_rqst *, struct svc_fh *, +- struct nfsd_attrs *, int, time64_t); ++ struct nfsd_attrs *, const struct timespec64 *); + int nfsd_mountpoint(struct dentry *, struct svc_export *); + #ifdef CONFIG_NFSD_V4 + __be32 nfsd4_vfs_fallocate(struct svc_rqst *, struct svc_fh *, +--- a/fs/nfsd/xdr3.h ++++ b/fs/nfsd/xdr3.h +@@ -14,7 +14,7 @@ struct nfsd3_sattrargs { + struct svc_fh fh; + struct iattr attrs; + int check_guard; +- time64_t guardtime; ++ struct timespec64 guardtime; + }; + + struct nfsd3_diropargs { diff --git a/queue-6.6/nfsd-nfsv4-file-creation-neglects-setting-acl.patch b/queue-6.6/nfsd-nfsv4-file-creation-neglects-setting-acl.patch new file mode 100644 index 0000000000..40acc76fc1 --- /dev/null +++ b/queue-6.6/nfsd-nfsv4-file-creation-neglects-setting-acl.patch @@ -0,0 +1,51 @@ +From 913f7cf77bf14c13cfea70e89bcb6d0b22239562 Mon Sep 17 00:00:00 2001 +From: Chuck Lever +Date: Tue, 18 Nov 2025 19:51:19 -0500 +Subject: NFSD: NFSv4 file creation neglects setting ACL +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Chuck Lever + +commit 913f7cf77bf14c13cfea70e89bcb6d0b22239562 upstream. + +An NFSv4 client that sets an ACL with a named principal during file +creation retrieves the ACL afterwards, and finds that it is only a +default ACL (based on the mode bits) and not the ACL that was +requested during file creation. This violates RFC 8881 section +6.4.1.3: "the ACL attribute is set as given". + +The issue occurs in nfsd_create_setattr(), which calls +nfsd_attrs_valid() to determine whether to call nfsd_setattr(). +However, nfsd_attrs_valid() checks only for iattr changes and +security labels, but not POSIX ACLs. When only an ACL is present, +the function returns false, nfsd_setattr() is skipped, and the +POSIX ACL is never applied to the inode. + +Subsequently, when the client retrieves the ACL, the server finds +no POSIX ACL on the inode and returns one generated from the file's +mode bits rather than returning the originally-specified ACL. + +Reported-by: Aurélien Couderc +Fixes: c0cbe70742f4 ("NFSD: add posix ACLs to struct nfsd_attrs") +Cc: Roland Mainz +Cc: stable@vger.kernel.org +Signed-off-by: Chuck Lever +Signed-off-by: Greg Kroah-Hartman +--- + fs/nfsd/vfs.h | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/fs/nfsd/vfs.h ++++ b/fs/nfsd/vfs.h +@@ -65,7 +65,8 @@ static inline bool nfsd_attrs_valid(stru + struct iattr *iap = attrs->na_iattr; + + return (iap->ia_valid || (attrs->na_seclabel && +- attrs->na_seclabel->len)); ++ attrs->na_seclabel->len) || ++ attrs->na_pacl || attrs->na_dpacl); + } + + __be32 nfserrno (int errno); diff --git a/queue-6.6/nfsd-set-security-label-during-create-operations.patch b/queue-6.6/nfsd-set-security-label-during-create-operations.patch new file mode 100644 index 0000000000..ec6266dec2 --- /dev/null +++ b/queue-6.6/nfsd-set-security-label-during-create-operations.patch @@ -0,0 +1,107 @@ +From 442d27ff09a218b61020ab56387dbc508ad6bfa6 Mon Sep 17 00:00:00 2001 +From: Stephen Smalley +Date: Fri, 3 May 2024 09:09:06 -0400 +Subject: nfsd: set security label during create operations + +From: Stephen Smalley + +commit 442d27ff09a218b61020ab56387dbc508ad6bfa6 upstream. + +When security labeling is enabled, the client can pass a file security +label as part of a create operation for the new file, similar to mode +and other attributes. At present, the security label is received by nfsd +and passed down to nfsd_create_setattr(), but nfsd_setattr() is never +called and therefore the label is never set on the new file. This bug +may have been introduced on or around commit d6a97d3f589a ("NFSD: +add security label to struct nfsd_attrs"). Looking at nfsd_setattr() +I am uncertain as to whether the same issue presents for +file ACLs and therefore requires a similar fix for those. + +An alternative approach would be to introduce a new LSM hook to set the +"create SID" of the current task prior to the actual file creation, which +would atomically label the new inode at creation time. This would be better +for SELinux and a similar approach has been used previously +(see security_dentry_create_files_as) but perhaps not usable by other LSMs. + +Reproducer: +1. Install a Linux distro with SELinux - Fedora is easiest +2. git clone https://github.com/SELinuxProject/selinux-testsuite +3. Install the requisite dependencies per selinux-testsuite/README.md +4. Run something like the following script: +MOUNT=$HOME/selinux-testsuite +sudo systemctl start nfs-server +sudo exportfs -o rw,no_root_squash,security_label localhost:$MOUNT +sudo mkdir -p /mnt/selinux-testsuite +sudo mount -t nfs -o vers=4.2 localhost:$MOUNT /mnt/selinux-testsuite +pushd /mnt/selinux-testsuite/ +sudo make -C policy load +pushd tests/filesystem +sudo runcon -t test_filesystem_t ./create_file -f trans_test_file \ + -e test_filesystem_filetranscon_t -v +sudo rm -f trans_test_file +popd +sudo make -C policy unload +popd +sudo umount /mnt/selinux-testsuite +sudo exportfs -u localhost:$MOUNT +sudo rmdir /mnt/selinux-testsuite +sudo systemctl stop nfs-server + +Expected output: + +Process context: + unconfined_u:unconfined_r:test_filesystem_t:s0-s0:c0.c1023 +Created file: trans_test_file +File context: unconfined_u:object_r:test_filesystem_filetranscon_t:s0 +File context is correct + +Actual output: + +Process context: + unconfined_u:unconfined_r:test_filesystem_t:s0-s0:c0.c1023 +Created file: trans_test_file +File context: system_u:object_r:test_file_t:s0 +File context error, expected: + test_filesystem_filetranscon_t +got: + test_file_t + +Signed-off-by: Stephen Smalley +Reviewed-by: Jeff Layton +Reviewed-by: NeilBrown +Stable-dep-of: 913f7cf77bf1 ("NFSD: NFSv4 file creation neglects setting ACL") +Signed-off-by: Chuck Lever +Signed-off-by: Greg Kroah-Hartman +--- + fs/nfsd/vfs.c | 2 +- + fs/nfsd/vfs.h | 8 ++++++++ + 2 files changed, 9 insertions(+), 1 deletion(-) + +--- a/fs/nfsd/vfs.c ++++ b/fs/nfsd/vfs.c +@@ -1379,7 +1379,7 @@ nfsd_create_setattr(struct svc_rqst *rqs + * Callers expect new file metadata to be committed even + * if the attributes have not changed. + */ +- if (iap->ia_valid) ++ if (nfsd_attrs_valid(attrs)) + status = nfsd_setattr(rqstp, resfhp, attrs, NULL); + else + status = nfserrno(commit_metadata(resfhp)); +--- a/fs/nfsd/vfs.h ++++ b/fs/nfsd/vfs.h +@@ -60,6 +60,14 @@ static inline void nfsd_attrs_free(struc + posix_acl_release(attrs->na_dpacl); + } + ++static inline bool nfsd_attrs_valid(struct nfsd_attrs *attrs) ++{ ++ struct iattr *iap = attrs->na_iattr; ++ ++ return (iap->ia_valid || (attrs->na_seclabel && ++ attrs->na_seclabel->len)); ++} ++ + __be32 nfserrno (int errno); + int nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp, + struct svc_export **expp); diff --git a/queue-6.6/series b/queue-6.6/series index 11fe3f64c2..17ac6ceee3 100644 --- a/queue-6.6/series +++ b/queue-6.6/series @@ -18,3 +18,9 @@ libceph-make-free_choose_arg_map-resilient-to-partial-allocation.patch libceph-return-the-handler-error-from-mon_handle_auth_done.patch libceph-reset-sparse-read-state-in-osd_fault.patch libceph-make-calc_target-set-t-paused-not-just-clear-it.patch +ksm-use-range-walk-function-to-jump-over-holes-in-scan_get_next_rmap_item.patch +net-add-locking-to-protect-skb-dev-access-in-ip_output.patch +nfsd-convert-to-new-timestamp-accessors.patch +nfsd-fix-nfsv3-atomicity-bugs-in-nfsd_setattr.patch +nfsd-set-security-label-during-create-operations.patch +nfsd-nfsv4-file-creation-neglects-setting-acl.patch