--- /dev/null
+From f5548c318d6520d4fa3c5ed6003eeb710763cbc5 Mon Sep 17 00:00:00 2001
+From: Pedro Demarchi Gomes <pedrodemargomes@gmail.com>
+Date: Wed, 22 Oct 2025 12:30:59 -0300
+Subject: ksm: use range-walk function to jump over holes in scan_get_next_rmap_item
+
+From: Pedro Demarchi Gomes <pedrodemargomes@gmail.com>
+
+commit f5548c318d6520d4fa3c5ed6003eeb710763cbc5 upstream.
+
+Currently, scan_get_next_rmap_item() walks every page address in a VMA to
+locate mergeable pages. This becomes highly inefficient when scanning
+large virtual memory areas that contain mostly unmapped regions, causing
+ksmd to use large amount of cpu without deduplicating much pages.
+
+This patch replaces the per-address lookup with a range walk using
+walk_page_range(). The range walker allows KSM to skip over entire
+unmapped holes in a VMA, avoiding unnecessary lookups. This problem was
+previously discussed in [1].
+
+Consider the following test program which creates a 32 TiB mapping in the
+virtual address space but only populates a single page:
+
+#include <unistd.h>
+#include <stdio.h>
+#include <sys/mman.h>
+
+/* 32 TiB */
+const size_t size = 32ul * 1024 * 1024 * 1024 * 1024;
+
+int main() {
+ char *area = mmap(NULL, size, PROT_READ | PROT_WRITE,
+ MAP_NORESERVE | MAP_PRIVATE | MAP_ANON, -1, 0);
+
+ if (area == MAP_FAILED) {
+ perror("mmap() failed\n");
+ return -1;
+ }
+
+ /* Populate a single page such that we get an anon_vma. */
+ *area = 0;
+
+ /* Enable KSM. */
+ madvise(area, size, MADV_MERGEABLE);
+ pause();
+ return 0;
+}
+
+$ ./ksm-sparse &
+$ echo 1 > /sys/kernel/mm/ksm/run
+
+Without this patch ksmd uses 100% of the cpu for a long time (more then 1
+hour in my test machine) scanning all the 32 TiB virtual address space
+that contain only one mapped page. This makes ksmd essentially deadlocked
+not able to deduplicate anything of value. With this patch ksmd walks
+only the one mapped page and skips the rest of the 32 TiB virtual address
+space, making the scan fast using little cpu.
+
+Link: https://lkml.kernel.org/r/20251023035841.41406-1-pedrodemargomes@gmail.com
+Link: https://lkml.kernel.org/r/20251022153059.22763-1-pedrodemargomes@gmail.com
+Link: https://lore.kernel.org/linux-mm/423de7a3-1c62-4e72-8e79-19a6413e420c@redhat.com/ [1]
+Fixes: 31dbd01f3143 ("ksm: Kernel SamePage Merging")
+Signed-off-by: Pedro Demarchi Gomes <pedrodemargomes@gmail.com>
+Co-developed-by: David Hildenbrand <david@redhat.com>
+Signed-off-by: David Hildenbrand <david@redhat.com>
+Reported-by: craftfever <craftfever@airmail.cc>
+Closes: https://lkml.kernel.org/r/020cf8de6e773bb78ba7614ef250129f11a63781@murena.io
+Suggested-by: David Hildenbrand <david@redhat.com>
+Acked-by: David Hildenbrand <david@redhat.com>
+Cc: Chengming Zhou <chengming.zhou@linux.dev>
+Cc: xu xin <xu.xin16@zte.com.cn>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+[ change page to folios ]
+Signed-off-by: Pedro Demarchi Gomes <pedrodemargomes@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/ksm.c | 126 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------
+ 1 file changed, 113 insertions(+), 13 deletions(-)
+
+--- a/mm/ksm.c
++++ b/mm/ksm.c
+@@ -2304,6 +2304,95 @@ static struct ksm_rmap_item *get_next_rm
+ return rmap_item;
+ }
+
++struct ksm_next_page_arg {
++ struct folio *folio;
++ struct page *page;
++ unsigned long addr;
++};
++
++static int ksm_next_page_pmd_entry(pmd_t *pmdp, unsigned long addr, unsigned long end,
++ struct mm_walk *walk)
++{
++ struct ksm_next_page_arg *private = walk->private;
++ struct vm_area_struct *vma = walk->vma;
++ pte_t *start_ptep = NULL, *ptep, pte;
++ struct mm_struct *mm = walk->mm;
++ struct folio *folio;
++ struct page *page;
++ spinlock_t *ptl;
++ pmd_t pmd;
++
++ if (ksm_test_exit(mm))
++ return 0;
++
++ cond_resched();
++
++ pmd = pmdp_get_lockless(pmdp);
++ if (!pmd_present(pmd))
++ return 0;
++
++ if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && pmd_leaf(pmd)) {
++ ptl = pmd_lock(mm, pmdp);
++ pmd = pmdp_get(pmdp);
++
++ if (!pmd_present(pmd)) {
++ goto not_found_unlock;
++ } else if (pmd_leaf(pmd)) {
++ page = vm_normal_page_pmd(vma, addr, pmd);
++ if (!page)
++ goto not_found_unlock;
++ folio = page_folio(page);
++
++ if (folio_is_zone_device(folio) || !folio_test_anon(folio))
++ goto not_found_unlock;
++
++ page += ((addr & (PMD_SIZE - 1)) >> PAGE_SHIFT);
++ goto found_unlock;
++ }
++ spin_unlock(ptl);
++ }
++
++ start_ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
++ if (!start_ptep)
++ return 0;
++
++ for (ptep = start_ptep; addr < end; ptep++, addr += PAGE_SIZE) {
++ pte = ptep_get(ptep);
++
++ if (!pte_present(pte))
++ continue;
++
++ page = vm_normal_page(vma, addr, pte);
++ if (!page)
++ continue;
++ folio = page_folio(page);
++
++ if (folio_is_zone_device(folio) || !folio_test_anon(folio))
++ continue;
++ goto found_unlock;
++ }
++
++not_found_unlock:
++ spin_unlock(ptl);
++ if (start_ptep)
++ pte_unmap(start_ptep);
++ return 0;
++found_unlock:
++ folio_get(folio);
++ spin_unlock(ptl);
++ if (start_ptep)
++ pte_unmap(start_ptep);
++ private->page = page;
++ private->folio = folio;
++ private->addr = addr;
++ return 1;
++}
++
++static struct mm_walk_ops ksm_next_page_ops = {
++ .pmd_entry = ksm_next_page_pmd_entry,
++ .walk_lock = PGWALK_RDLOCK,
++};
++
+ static struct ksm_rmap_item *scan_get_next_rmap_item(struct page **page)
+ {
+ struct mm_struct *mm;
+@@ -2390,32 +2479,43 @@ next_mm:
+ ksm_scan.address = vma->vm_end;
+
+ while (ksm_scan.address < vma->vm_end) {
++ struct ksm_next_page_arg ksm_next_page_arg;
++ struct page *tmp_page = NULL;
++ struct folio *folio;
++
+ if (ksm_test_exit(mm))
+ break;
+- *page = follow_page(vma, ksm_scan.address, FOLL_GET);
+- if (IS_ERR_OR_NULL(*page)) {
+- ksm_scan.address += PAGE_SIZE;
+- cond_resched();
+- continue;
++
++ int found;
++
++ found = walk_page_range_vma(vma, ksm_scan.address,
++ vma->vm_end,
++ &ksm_next_page_ops,
++ &ksm_next_page_arg);
++
++ if (found > 0) {
++ folio = ksm_next_page_arg.folio;
++ tmp_page = ksm_next_page_arg.page;
++ ksm_scan.address = ksm_next_page_arg.addr;
++ } else {
++ VM_WARN_ON_ONCE(found < 0);
++ ksm_scan.address = vma->vm_end - PAGE_SIZE;
+ }
+- if (is_zone_device_page(*page))
+- goto next_page;
+- if (PageAnon(*page)) {
+- flush_anon_page(vma, *page, ksm_scan.address);
+- flush_dcache_page(*page);
++ if (tmp_page) {
++ flush_anon_page(vma, tmp_page, ksm_scan.address);
++ flush_dcache_page(tmp_page);
+ rmap_item = get_next_rmap_item(mm_slot,
+ ksm_scan.rmap_list, ksm_scan.address);
+ if (rmap_item) {
+ ksm_scan.rmap_list =
+ &rmap_item->rmap_list;
+ ksm_scan.address += PAGE_SIZE;
++ *page = tmp_page;
+ } else
+- put_page(*page);
++ folio_put(folio);
+ mmap_read_unlock(mm);
+ return rmap_item;
+ }
+-next_page:
+- put_page(*page);
+ ksm_scan.address += PAGE_SIZE;
+ cond_resched();
+ }
--- /dev/null
+From 1dbf1d590d10a6d1978e8184f8dfe20af22d680a Mon Sep 17 00:00:00 2001
+From: Sharath Chandra Vurukala <quic_sharathv@quicinc.com>
+Date: Wed, 30 Jul 2025 16:21:18 +0530
+Subject: net: Add locking to protect skb->dev access in ip_output
+
+From: Sharath Chandra Vurukala <quic_sharathv@quicinc.com>
+
+commit 1dbf1d590d10a6d1978e8184f8dfe20af22d680a upstream.
+
+In ip_output() skb->dev is updated from the skb_dst(skb)->dev
+this can become invalid when the interface is unregistered and freed,
+
+Introduced new skb_dst_dev_rcu() function to be used instead of
+skb_dst_dev() within rcu_locks in ip_output.This will ensure that
+all the skb's associated with the dev being deregistered will
+be transnmitted out first, before freeing the dev.
+
+Given that ip_output() is called within an rcu_read_lock()
+critical section or from a bottom-half context, it is safe to introduce
+an RCU read-side critical section within it.
+
+Multiple panic call stacks were observed when UL traffic was run
+in concurrency with device deregistration from different functions,
+pasting one sample for reference.
+
+[496733.627565][T13385] Call trace:
+[496733.627570][T13385] bpf_prog_ce7c9180c3b128ea_cgroupskb_egres+0x24c/0x7f0
+[496733.627581][T13385] __cgroup_bpf_run_filter_skb+0x128/0x498
+[496733.627595][T13385] ip_finish_output+0xa4/0xf4
+[496733.627605][T13385] ip_output+0x100/0x1a0
+[496733.627613][T13385] ip_send_skb+0x68/0x100
+[496733.627618][T13385] udp_send_skb+0x1c4/0x384
+[496733.627625][T13385] udp_sendmsg+0x7b0/0x898
+[496733.627631][T13385] inet_sendmsg+0x5c/0x7c
+[496733.627639][T13385] __sys_sendto+0x174/0x1e4
+[496733.627647][T13385] __arm64_sys_sendto+0x28/0x3c
+[496733.627653][T13385] invoke_syscall+0x58/0x11c
+[496733.627662][T13385] el0_svc_common+0x88/0xf4
+[496733.627669][T13385] do_el0_svc+0x2c/0xb0
+[496733.627676][T13385] el0_svc+0x2c/0xa4
+[496733.627683][T13385] el0t_64_sync_handler+0x68/0xb4
+[496733.627689][T13385] el0t_64_sync+0x1a4/0x1a8
+
+Changes in v3:
+- Replaced WARN_ON() with WARN_ON_ONCE(), as suggested by Willem de Bruijn.
+- Dropped legacy lines mistakenly pulled in from an outdated branch.
+
+Changes in v2:
+- Addressed review comments from Eric Dumazet
+- Used READ_ONCE() to prevent potential load/store tearing
+- Added skb_dst_dev_rcu() and used along with rcu_read_lock() in ip_output
+
+Signed-off-by: Sharath Chandra Vurukala <quic_sharathv@quicinc.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Link: https://patch.msgid.link/20250730105118.GA26100@hu-sharathv-hyd.qualcomm.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+[ Keerthana: Backported the patch to v6.6.y ]
+Signed-off-by: Keerthana K <keerthana.kalyanasundaram@broadcom.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/dst.h | 12 ++++++++++++
+ net/ipv4/ip_output.c | 15 ++++++++++-----
+ 2 files changed, 22 insertions(+), 5 deletions(-)
+
+--- a/include/net/dst.h
++++ b/include/net/dst.h
+@@ -569,6 +569,18 @@ static inline void skb_dst_update_pmtu_n
+ dst->ops->update_pmtu(dst, NULL, skb, mtu, false);
+ }
+
++static inline struct net_device *dst_dev_rcu(const struct dst_entry *dst)
++{
++ /* In the future, use rcu_dereference(dst->dev) */
++ WARN_ON_ONCE(!rcu_read_lock_held());
++ return READ_ONCE(dst->dev);
++}
++
++static inline struct net_device *skb_dst_dev_rcu(const struct sk_buff *skb)
++{
++ return dst_dev_rcu(skb_dst(skb));
++}
++
+ struct dst_entry *dst_blackhole_check(struct dst_entry *dst, u32 cookie);
+ void dst_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb, u32 mtu, bool confirm_neigh);
+--- a/net/ipv4/ip_output.c
++++ b/net/ipv4/ip_output.c
+@@ -425,15 +425,20 @@ int ip_mc_output(struct net *net, struct
+
+ int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb)
+ {
+- struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev;
++ struct net_device *dev, *indev = skb->dev;
++ int ret_val;
+
++ rcu_read_lock();
++ dev = skb_dst_dev_rcu(skb);
+ skb->dev = dev;
+ skb->protocol = htons(ETH_P_IP);
+
+- return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
+- net, sk, skb, indev, dev,
+- ip_finish_output,
+- !(IPCB(skb)->flags & IPSKB_REROUTED));
++ ret_val = NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
++ net, sk, skb, indev, dev,
++ ip_finish_output,
++ !(IPCB(skb)->flags & IPSKB_REROUTED));
++ rcu_read_unlock();
++ return ret_val;
+ }
+ EXPORT_SYMBOL(ip_output);
+
--- /dev/null
+From 11fec9b9fb04fd1b3330a3b91ab9dcfa81ad5ad3 Mon Sep 17 00:00:00 2001
+From: Jeff Layton <jlayton@kernel.org>
+Date: Wed, 4 Oct 2023 14:52:37 -0400
+Subject: nfsd: convert to new timestamp accessors
+
+From: Jeff Layton <jlayton@kernel.org>
+
+commit 11fec9b9fb04fd1b3330a3b91ab9dcfa81ad5ad3 upstream.
+
+Convert to using the new inode timestamp accessor functions.
+
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Link: https://lore.kernel.org/r/20231004185347.80880-50-jlayton@kernel.org
+Stable-dep-of: 24d92de9186e ("nfsd: Fix NFSv3 atomicity bugs in nfsd_setattr()")
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+[ cel: d68886bae76a has already been applied ]
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nfsd/blocklayout.c | 3 ++-
+ fs/nfsd/nfs3proc.c | 4 ++--
+ fs/nfsd/nfs4proc.c | 8 ++++----
+ fs/nfsd/nfsctl.c | 2 +-
+ fs/nfsd/vfs.c | 2 +-
+ 5 files changed, 10 insertions(+), 9 deletions(-)
+
+--- a/fs/nfsd/blocklayout.c
++++ b/fs/nfsd/blocklayout.c
+@@ -119,11 +119,12 @@ static __be32
+ nfsd4_block_commit_blocks(struct inode *inode, struct nfsd4_layoutcommit *lcp,
+ struct iomap *iomaps, int nr_iomaps)
+ {
++ struct timespec64 mtime = inode_get_mtime(inode);
+ struct iattr iattr = { .ia_valid = 0 };
+ int error;
+
+ if (lcp->lc_mtime.tv_nsec == UTIME_NOW ||
+- timespec64_compare(&lcp->lc_mtime, &inode->i_mtime) < 0)
++ timespec64_compare(&lcp->lc_mtime, &mtime) < 0)
+ lcp->lc_mtime = current_time(inode);
+ iattr.ia_valid |= ATTR_ATIME | ATTR_CTIME | ATTR_MTIME;
+ iattr.ia_atime = iattr.ia_ctime = iattr.ia_mtime = lcp->lc_mtime;
+--- a/fs/nfsd/nfs3proc.c
++++ b/fs/nfsd/nfs3proc.c
+@@ -294,8 +294,8 @@ nfsd3_create_file(struct svc_rqst *rqstp
+ status = nfserr_exist;
+ break;
+ case NFS3_CREATE_EXCLUSIVE:
+- if (d_inode(child)->i_mtime.tv_sec == v_mtime &&
+- d_inode(child)->i_atime.tv_sec == v_atime &&
++ if (inode_get_mtime_sec(d_inode(child)) == v_mtime &&
++ inode_get_atime_sec(d_inode(child)) == v_atime &&
+ d_inode(child)->i_size == 0) {
+ break;
+ }
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -322,8 +322,8 @@ nfsd4_create_file(struct svc_rqst *rqstp
+ status = nfserr_exist;
+ break;
+ case NFS4_CREATE_EXCLUSIVE:
+- if (d_inode(child)->i_mtime.tv_sec == v_mtime &&
+- d_inode(child)->i_atime.tv_sec == v_atime &&
++ if (inode_get_mtime_sec(d_inode(child)) == v_mtime &&
++ inode_get_atime_sec(d_inode(child)) == v_atime &&
+ d_inode(child)->i_size == 0) {
+ open->op_created = true;
+ break; /* subtle */
+@@ -331,8 +331,8 @@ nfsd4_create_file(struct svc_rqst *rqstp
+ status = nfserr_exist;
+ break;
+ case NFS4_CREATE_EXCLUSIVE4_1:
+- if (d_inode(child)->i_mtime.tv_sec == v_mtime &&
+- d_inode(child)->i_atime.tv_sec == v_atime &&
++ if (inode_get_mtime_sec(d_inode(child)) == v_mtime &&
++ inode_get_atime_sec(d_inode(child)) == v_atime &&
+ d_inode(child)->i_size == 0) {
+ open->op_created = true;
+ goto set_attr; /* subtle */
+--- a/fs/nfsd/nfsctl.c
++++ b/fs/nfsd/nfsctl.c
+@@ -1139,7 +1139,7 @@ static struct inode *nfsd_get_inode(stru
+ /* Following advice from simple_fill_super documentation: */
+ inode->i_ino = iunique(sb, NFSD_MaxReserved);
+ inode->i_mode = mode;
+- inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
++ simple_inode_init_ts(inode);
+ switch (mode & S_IFMT) {
+ case S_IFDIR:
+ inode->i_fop = &simple_dir_operations;
+--- a/fs/nfsd/vfs.c
++++ b/fs/nfsd/vfs.c
+@@ -521,7 +521,7 @@ nfsd_setattr(struct svc_rqst *rqstp, str
+
+ nfsd_sanitize_attrs(inode, iap);
+
+- if (check_guard && guardtime != inode_get_ctime(inode).tv_sec)
++ if (check_guard && guardtime != inode_get_ctime_sec(inode))
+ return nfserr_notsync;
+
+ /*
--- /dev/null
+From 24d92de9186ebc340687caf7356e1070773e67bc Mon Sep 17 00:00:00 2001
+From: Trond Myklebust <trond.myklebust@hammerspace.com>
+Date: Thu, 15 Feb 2024 20:24:51 -0500
+Subject: nfsd: Fix NFSv3 atomicity bugs in nfsd_setattr()
+
+From: Trond Myklebust <trond.myklebust@hammerspace.com>
+
+commit 24d92de9186ebc340687caf7356e1070773e67bc upstream.
+
+The main point of the guarded SETATTR is to prevent races with other
+WRITE and SETATTR calls. That requires that the check of the guard time
+against the inode ctime be done after taking the inode lock.
+
+Furthermore, we need to take into account the 32-bit nature of
+timestamps in NFSv3, and the possibility that files may change at a
+faster rate than once a second.
+
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Reviewed-by: NeilBrown <neilb@suse.de>
+Stable-dep-of: 442d27ff09a2 ("nfsd: set security label during create operations")
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nfsd/nfs3proc.c | 6 ++++--
+ fs/nfsd/nfs3xdr.c | 5 +----
+ fs/nfsd/nfs4proc.c | 3 +--
+ fs/nfsd/nfs4state.c | 2 +-
+ fs/nfsd/nfsproc.c | 6 +++---
+ fs/nfsd/vfs.c | 20 +++++++++++++-------
+ fs/nfsd/vfs.h | 2 +-
+ fs/nfsd/xdr3.h | 2 +-
+ 8 files changed, 25 insertions(+), 21 deletions(-)
+
+--- a/fs/nfsd/nfs3proc.c
++++ b/fs/nfsd/nfs3proc.c
+@@ -71,13 +71,15 @@ nfsd3_proc_setattr(struct svc_rqst *rqst
+ struct nfsd_attrs attrs = {
+ .na_iattr = &argp->attrs,
+ };
++ const struct timespec64 *guardtime = NULL;
+
+ dprintk("nfsd: SETATTR(3) %s\n",
+ SVCFH_fmt(&argp->fh));
+
+ fh_copy(&resp->fh, &argp->fh);
+- resp->status = nfsd_setattr(rqstp, &resp->fh, &attrs,
+- argp->check_guard, argp->guardtime);
++ if (argp->check_guard)
++ guardtime = &argp->guardtime;
++ resp->status = nfsd_setattr(rqstp, &resp->fh, &attrs, guardtime);
+ return rpc_success;
+ }
+
+--- a/fs/nfsd/nfs3xdr.c
++++ b/fs/nfsd/nfs3xdr.c
+@@ -295,17 +295,14 @@ svcxdr_decode_sattr3(struct svc_rqst *rq
+ static bool
+ svcxdr_decode_sattrguard3(struct xdr_stream *xdr, struct nfsd3_sattrargs *args)
+ {
+- __be32 *p;
+ u32 check;
+
+ if (xdr_stream_decode_bool(xdr, &check) < 0)
+ return false;
+ if (check) {
+- p = xdr_inline_decode(xdr, XDR_UNIT * 2);
+- if (!p)
++ if (!svcxdr_decode_nfstime3(xdr, &args->guardtime))
+ return false;
+ args->check_guard = 1;
+- args->guardtime = be32_to_cpup(p);
+ } else
+ args->check_guard = 0;
+
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -1160,8 +1160,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, st
+ goto out;
+ save_no_wcc = cstate->current_fh.fh_no_wcc;
+ cstate->current_fh.fh_no_wcc = true;
+- status = nfsd_setattr(rqstp, &cstate->current_fh, &attrs,
+- 0, (time64_t)0);
++ status = nfsd_setattr(rqstp, &cstate->current_fh, &attrs, NULL);
+ cstate->current_fh.fh_no_wcc = save_no_wcc;
+ if (!status)
+ status = nfserrno(attrs.na_labelerr);
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -5225,7 +5225,7 @@ nfsd4_truncate(struct svc_rqst *rqstp, s
+ return 0;
+ if (!(open->op_share_access & NFS4_SHARE_ACCESS_WRITE))
+ return nfserr_inval;
+- return nfsd_setattr(rqstp, fh, &attrs, 0, (time64_t)0);
++ return nfsd_setattr(rqstp, fh, &attrs, NULL);
+ }
+
+ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
+--- a/fs/nfsd/nfsproc.c
++++ b/fs/nfsd/nfsproc.c
+@@ -103,7 +103,7 @@ nfsd_proc_setattr(struct svc_rqst *rqstp
+ }
+ }
+
+- resp->status = nfsd_setattr(rqstp, fhp, &attrs, 0, (time64_t)0);
++ resp->status = nfsd_setattr(rqstp, fhp, &attrs, NULL);
+ if (resp->status != nfs_ok)
+ goto out;
+
+@@ -390,8 +390,8 @@ nfsd_proc_create(struct svc_rqst *rqstp)
+ */
+ attr->ia_valid &= ATTR_SIZE;
+ if (attr->ia_valid)
+- resp->status = nfsd_setattr(rqstp, newfhp, &attrs, 0,
+- (time64_t)0);
++ resp->status = nfsd_setattr(rqstp, newfhp, &attrs,
++ NULL);
+ }
+
+ out_unlock:
+--- a/fs/nfsd/vfs.c
++++ b/fs/nfsd/vfs.c
+@@ -459,7 +459,6 @@ static int __nfsd_setattr(struct dentry
+ * @rqstp: controlling RPC transaction
+ * @fhp: filehandle of target
+ * @attr: attributes to set
+- * @check_guard: set to 1 if guardtime is a valid timestamp
+ * @guardtime: do not act if ctime.tv_sec does not match this timestamp
+ *
+ * This call may adjust the contents of @attr (in particular, this
+@@ -471,8 +470,7 @@ static int __nfsd_setattr(struct dentry
+ */
+ __be32
+ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
+- struct nfsd_attrs *attr,
+- int check_guard, time64_t guardtime)
++ struct nfsd_attrs *attr, const struct timespec64 *guardtime)
+ {
+ struct dentry *dentry;
+ struct inode *inode;
+@@ -521,9 +519,6 @@ nfsd_setattr(struct svc_rqst *rqstp, str
+
+ nfsd_sanitize_attrs(inode, iap);
+
+- if (check_guard && guardtime != inode_get_ctime_sec(inode))
+- return nfserr_notsync;
+-
+ /*
+ * The size case is special, it changes the file in addition to the
+ * attributes, and file systems don't expect it to be mixed with
+@@ -541,6 +536,16 @@ nfsd_setattr(struct svc_rqst *rqstp, str
+ err = fh_fill_pre_attrs(fhp);
+ if (err)
+ goto out_unlock;
++
++ if (guardtime) {
++ struct timespec64 ctime = inode_get_ctime(inode);
++ if ((u32)guardtime->tv_sec != (u32)ctime.tv_sec ||
++ guardtime->tv_nsec != ctime.tv_nsec) {
++ err = nfserr_notsync;
++ goto out_fill_attrs;
++ }
++ }
++
+ for (retries = 1;;) {
+ struct iattr attrs;
+
+@@ -568,6 +573,7 @@ nfsd_setattr(struct svc_rqst *rqstp, str
+ attr->na_aclerr = set_posix_acl(&nop_mnt_idmap,
+ dentry, ACL_TYPE_DEFAULT,
+ attr->na_dpacl);
++out_fill_attrs:
+ fh_fill_post_attrs(fhp);
+ out_unlock:
+ inode_unlock(inode);
+@@ -1374,7 +1380,7 @@ nfsd_create_setattr(struct svc_rqst *rqs
+ * if the attributes have not changed.
+ */
+ if (iap->ia_valid)
+- status = nfsd_setattr(rqstp, resfhp, attrs, 0, (time64_t)0);
++ status = nfsd_setattr(rqstp, resfhp, attrs, NULL);
+ else
+ status = nfserrno(commit_metadata(resfhp));
+
+--- a/fs/nfsd/vfs.h
++++ b/fs/nfsd/vfs.h
+@@ -69,7 +69,7 @@ __be32 nfsd_lookup_dentry(struct svc_r
+ const char *, unsigned int,
+ struct svc_export **, struct dentry **);
+ __be32 nfsd_setattr(struct svc_rqst *, struct svc_fh *,
+- struct nfsd_attrs *, int, time64_t);
++ struct nfsd_attrs *, const struct timespec64 *);
+ int nfsd_mountpoint(struct dentry *, struct svc_export *);
+ #ifdef CONFIG_NFSD_V4
+ __be32 nfsd4_vfs_fallocate(struct svc_rqst *, struct svc_fh *,
+--- a/fs/nfsd/xdr3.h
++++ b/fs/nfsd/xdr3.h
+@@ -14,7 +14,7 @@ struct nfsd3_sattrargs {
+ struct svc_fh fh;
+ struct iattr attrs;
+ int check_guard;
+- time64_t guardtime;
++ struct timespec64 guardtime;
+ };
+
+ struct nfsd3_diropargs {
--- /dev/null
+From 913f7cf77bf14c13cfea70e89bcb6d0b22239562 Mon Sep 17 00:00:00 2001
+From: Chuck Lever <chuck.lever@oracle.com>
+Date: Tue, 18 Nov 2025 19:51:19 -0500
+Subject: NFSD: NFSv4 file creation neglects setting ACL
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+commit 913f7cf77bf14c13cfea70e89bcb6d0b22239562 upstream.
+
+An NFSv4 client that sets an ACL with a named principal during file
+creation retrieves the ACL afterwards, and finds that it is only a
+default ACL (based on the mode bits) and not the ACL that was
+requested during file creation. This violates RFC 8881 section
+6.4.1.3: "the ACL attribute is set as given".
+
+The issue occurs in nfsd_create_setattr(), which calls
+nfsd_attrs_valid() to determine whether to call nfsd_setattr().
+However, nfsd_attrs_valid() checks only for iattr changes and
+security labels, but not POSIX ACLs. When only an ACL is present,
+the function returns false, nfsd_setattr() is skipped, and the
+POSIX ACL is never applied to the inode.
+
+Subsequently, when the client retrieves the ACL, the server finds
+no POSIX ACL on the inode and returns one generated from the file's
+mode bits rather than returning the originally-specified ACL.
+
+Reported-by: Aurélien Couderc <aurelien.couderc2002@gmail.com>
+Fixes: c0cbe70742f4 ("NFSD: add posix ACLs to struct nfsd_attrs")
+Cc: Roland Mainz <roland.mainz@nrubsig.org>
+Cc: stable@vger.kernel.org
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nfsd/vfs.h | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/fs/nfsd/vfs.h
++++ b/fs/nfsd/vfs.h
+@@ -65,7 +65,8 @@ static inline bool nfsd_attrs_valid(stru
+ struct iattr *iap = attrs->na_iattr;
+
+ return (iap->ia_valid || (attrs->na_seclabel &&
+- attrs->na_seclabel->len));
++ attrs->na_seclabel->len) ||
++ attrs->na_pacl || attrs->na_dpacl);
+ }
+
+ __be32 nfserrno (int errno);
--- /dev/null
+From 442d27ff09a218b61020ab56387dbc508ad6bfa6 Mon Sep 17 00:00:00 2001
+From: Stephen Smalley <stephen.smalley.work@gmail.com>
+Date: Fri, 3 May 2024 09:09:06 -0400
+Subject: nfsd: set security label during create operations
+
+From: Stephen Smalley <stephen.smalley.work@gmail.com>
+
+commit 442d27ff09a218b61020ab56387dbc508ad6bfa6 upstream.
+
+When security labeling is enabled, the client can pass a file security
+label as part of a create operation for the new file, similar to mode
+and other attributes. At present, the security label is received by nfsd
+and passed down to nfsd_create_setattr(), but nfsd_setattr() is never
+called and therefore the label is never set on the new file. This bug
+may have been introduced on or around commit d6a97d3f589a ("NFSD:
+add security label to struct nfsd_attrs"). Looking at nfsd_setattr()
+I am uncertain as to whether the same issue presents for
+file ACLs and therefore requires a similar fix for those.
+
+An alternative approach would be to introduce a new LSM hook to set the
+"create SID" of the current task prior to the actual file creation, which
+would atomically label the new inode at creation time. This would be better
+for SELinux and a similar approach has been used previously
+(see security_dentry_create_files_as) but perhaps not usable by other LSMs.
+
+Reproducer:
+1. Install a Linux distro with SELinux - Fedora is easiest
+2. git clone https://github.com/SELinuxProject/selinux-testsuite
+3. Install the requisite dependencies per selinux-testsuite/README.md
+4. Run something like the following script:
+MOUNT=$HOME/selinux-testsuite
+sudo systemctl start nfs-server
+sudo exportfs -o rw,no_root_squash,security_label localhost:$MOUNT
+sudo mkdir -p /mnt/selinux-testsuite
+sudo mount -t nfs -o vers=4.2 localhost:$MOUNT /mnt/selinux-testsuite
+pushd /mnt/selinux-testsuite/
+sudo make -C policy load
+pushd tests/filesystem
+sudo runcon -t test_filesystem_t ./create_file -f trans_test_file \
+ -e test_filesystem_filetranscon_t -v
+sudo rm -f trans_test_file
+popd
+sudo make -C policy unload
+popd
+sudo umount /mnt/selinux-testsuite
+sudo exportfs -u localhost:$MOUNT
+sudo rmdir /mnt/selinux-testsuite
+sudo systemctl stop nfs-server
+
+Expected output:
+<eliding noise from commands run prior to or after the test itself>
+Process context:
+ unconfined_u:unconfined_r:test_filesystem_t:s0-s0:c0.c1023
+Created file: trans_test_file
+File context: unconfined_u:object_r:test_filesystem_filetranscon_t:s0
+File context is correct
+
+Actual output:
+<eliding noise from commands run prior to or after the test itself>
+Process context:
+ unconfined_u:unconfined_r:test_filesystem_t:s0-s0:c0.c1023
+Created file: trans_test_file
+File context: system_u:object_r:test_file_t:s0
+File context error, expected:
+ test_filesystem_filetranscon_t
+got:
+ test_file_t
+
+Signed-off-by: Stephen Smalley <stephen.smalley.work@gmail.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Reviewed-by: NeilBrown <neilb@suse.de>
+Stable-dep-of: 913f7cf77bf1 ("NFSD: NFSv4 file creation neglects setting ACL")
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nfsd/vfs.c | 2 +-
+ fs/nfsd/vfs.h | 8 ++++++++
+ 2 files changed, 9 insertions(+), 1 deletion(-)
+
+--- a/fs/nfsd/vfs.c
++++ b/fs/nfsd/vfs.c
+@@ -1379,7 +1379,7 @@ nfsd_create_setattr(struct svc_rqst *rqs
+ * Callers expect new file metadata to be committed even
+ * if the attributes have not changed.
+ */
+- if (iap->ia_valid)
++ if (nfsd_attrs_valid(attrs))
+ status = nfsd_setattr(rqstp, resfhp, attrs, NULL);
+ else
+ status = nfserrno(commit_metadata(resfhp));
+--- a/fs/nfsd/vfs.h
++++ b/fs/nfsd/vfs.h
+@@ -60,6 +60,14 @@ static inline void nfsd_attrs_free(struc
+ posix_acl_release(attrs->na_dpacl);
+ }
+
++static inline bool nfsd_attrs_valid(struct nfsd_attrs *attrs)
++{
++ struct iattr *iap = attrs->na_iattr;
++
++ return (iap->ia_valid || (attrs->na_seclabel &&
++ attrs->na_seclabel->len));
++}
++
+ __be32 nfserrno (int errno);
+ int nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
+ struct svc_export **expp);
libceph-return-the-handler-error-from-mon_handle_auth_done.patch
libceph-reset-sparse-read-state-in-osd_fault.patch
libceph-make-calc_target-set-t-paused-not-just-clear-it.patch
+ksm-use-range-walk-function-to-jump-over-holes-in-scan_get_next_rmap_item.patch
+net-add-locking-to-protect-skb-dev-access-in-ip_output.patch
+nfsd-convert-to-new-timestamp-accessors.patch
+nfsd-fix-nfsv3-atomicity-bugs-in-nfsd_setattr.patch
+nfsd-set-security-label-during-create-operations.patch
+nfsd-nfsv4-file-creation-neglects-setting-acl.patch