]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
6.6-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 12 Jan 2026 11:09:03 +0000 (12:09 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 12 Jan 2026 11:09:03 +0000 (12:09 +0100)
added patches:
ksm-use-range-walk-function-to-jump-over-holes-in-scan_get_next_rmap_item.patch
net-add-locking-to-protect-skb-dev-access-in-ip_output.patch
nfsd-convert-to-new-timestamp-accessors.patch
nfsd-fix-nfsv3-atomicity-bugs-in-nfsd_setattr.patch
nfsd-nfsv4-file-creation-neglects-setting-acl.patch
nfsd-set-security-label-during-create-operations.patch

queue-6.6/ksm-use-range-walk-function-to-jump-over-holes-in-scan_get_next_rmap_item.patch [new file with mode: 0644]
queue-6.6/net-add-locking-to-protect-skb-dev-access-in-ip_output.patch [new file with mode: 0644]
queue-6.6/nfsd-convert-to-new-timestamp-accessors.patch [new file with mode: 0644]
queue-6.6/nfsd-fix-nfsv3-atomicity-bugs-in-nfsd_setattr.patch [new file with mode: 0644]
queue-6.6/nfsd-nfsv4-file-creation-neglects-setting-acl.patch [new file with mode: 0644]
queue-6.6/nfsd-set-security-label-during-create-operations.patch [new file with mode: 0644]
queue-6.6/series

diff --git a/queue-6.6/ksm-use-range-walk-function-to-jump-over-holes-in-scan_get_next_rmap_item.patch b/queue-6.6/ksm-use-range-walk-function-to-jump-over-holes-in-scan_get_next_rmap_item.patch
new file mode 100644 (file)
index 0000000..dad7ba0
--- /dev/null
@@ -0,0 +1,234 @@
+From f5548c318d6520d4fa3c5ed6003eeb710763cbc5 Mon Sep 17 00:00:00 2001
+From: Pedro Demarchi Gomes <pedrodemargomes@gmail.com>
+Date: Wed, 22 Oct 2025 12:30:59 -0300
+Subject: ksm: use range-walk function to jump over holes in scan_get_next_rmap_item
+
+From: Pedro Demarchi Gomes <pedrodemargomes@gmail.com>
+
+commit f5548c318d6520d4fa3c5ed6003eeb710763cbc5 upstream.
+
+Currently, scan_get_next_rmap_item() walks every page address in a VMA to
+locate mergeable pages.  This becomes highly inefficient when scanning
+large virtual memory areas that contain mostly unmapped regions, causing
+ksmd to use large amount of cpu without deduplicating much pages.
+
+This patch replaces the per-address lookup with a range walk using
+walk_page_range().  The range walker allows KSM to skip over entire
+unmapped holes in a VMA, avoiding unnecessary lookups.  This problem was
+previously discussed in [1].
+
+Consider the following test program which creates a 32 TiB mapping in the
+virtual address space but only populates a single page:
+
+#include <unistd.h>
+#include <stdio.h>
+#include <sys/mman.h>
+
+/* 32 TiB */
+const size_t size = 32ul * 1024 * 1024 * 1024 * 1024;
+
+int main() {
+        char *area = mmap(NULL, size, PROT_READ | PROT_WRITE,
+                          MAP_NORESERVE | MAP_PRIVATE | MAP_ANON, -1, 0);
+
+        if (area == MAP_FAILED) {
+                perror("mmap() failed\n");
+                return -1;
+        }
+
+        /* Populate a single page such that we get an anon_vma. */
+        *area = 0;
+
+        /* Enable KSM. */
+        madvise(area, size, MADV_MERGEABLE);
+        pause();
+        return 0;
+}
+
+$ ./ksm-sparse  &
+$ echo 1 > /sys/kernel/mm/ksm/run
+
+Without this patch ksmd uses 100% of the cpu for a long time (more then 1
+hour in my test machine) scanning all the 32 TiB virtual address space
+that contain only one mapped page.  This makes ksmd essentially deadlocked
+not able to deduplicate anything of value.  With this patch ksmd walks
+only the one mapped page and skips the rest of the 32 TiB virtual address
+space, making the scan fast using little cpu.
+
+Link: https://lkml.kernel.org/r/20251023035841.41406-1-pedrodemargomes@gmail.com
+Link: https://lkml.kernel.org/r/20251022153059.22763-1-pedrodemargomes@gmail.com
+Link: https://lore.kernel.org/linux-mm/423de7a3-1c62-4e72-8e79-19a6413e420c@redhat.com/ [1]
+Fixes: 31dbd01f3143 ("ksm: Kernel SamePage Merging")
+Signed-off-by: Pedro Demarchi Gomes <pedrodemargomes@gmail.com>
+Co-developed-by: David Hildenbrand <david@redhat.com>
+Signed-off-by: David Hildenbrand <david@redhat.com>
+Reported-by: craftfever <craftfever@airmail.cc>
+Closes: https://lkml.kernel.org/r/020cf8de6e773bb78ba7614ef250129f11a63781@murena.io
+Suggested-by: David Hildenbrand <david@redhat.com>
+Acked-by: David Hildenbrand <david@redhat.com>
+Cc: Chengming Zhou <chengming.zhou@linux.dev>
+Cc: xu xin <xu.xin16@zte.com.cn>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+[ change page to folios ]
+Signed-off-by: Pedro Demarchi Gomes <pedrodemargomes@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/ksm.c |  126 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------
+ 1 file changed, 113 insertions(+), 13 deletions(-)
+
+--- a/mm/ksm.c
++++ b/mm/ksm.c
+@@ -2304,6 +2304,95 @@ static struct ksm_rmap_item *get_next_rm
+       return rmap_item;
+ }
++struct ksm_next_page_arg {
++      struct folio *folio;
++      struct page *page;
++      unsigned long addr;
++};
++
++static int ksm_next_page_pmd_entry(pmd_t *pmdp, unsigned long addr, unsigned long end,
++              struct mm_walk *walk)
++{
++      struct ksm_next_page_arg *private = walk->private;
++      struct vm_area_struct *vma = walk->vma;
++      pte_t *start_ptep = NULL, *ptep, pte;
++      struct mm_struct *mm = walk->mm;
++      struct folio *folio;
++      struct page *page;
++      spinlock_t *ptl;
++      pmd_t pmd;
++
++      if (ksm_test_exit(mm))
++              return 0;
++
++      cond_resched();
++
++      pmd = pmdp_get_lockless(pmdp);
++      if (!pmd_present(pmd))
++              return 0;
++
++      if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && pmd_leaf(pmd)) {
++              ptl = pmd_lock(mm, pmdp);
++              pmd = pmdp_get(pmdp);
++
++              if (!pmd_present(pmd)) {
++                      goto not_found_unlock;
++              } else if (pmd_leaf(pmd)) {
++                      page = vm_normal_page_pmd(vma, addr, pmd);
++                      if (!page)
++                              goto not_found_unlock;
++                      folio = page_folio(page);
++
++                      if (folio_is_zone_device(folio) || !folio_test_anon(folio))
++                              goto not_found_unlock;
++
++                      page += ((addr & (PMD_SIZE - 1)) >> PAGE_SHIFT);
++                      goto found_unlock;
++              }
++              spin_unlock(ptl);
++      }
++
++      start_ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
++      if (!start_ptep)
++              return 0;
++
++      for (ptep = start_ptep; addr < end; ptep++, addr += PAGE_SIZE) {
++              pte = ptep_get(ptep);
++
++              if (!pte_present(pte))
++                      continue;
++
++              page = vm_normal_page(vma, addr, pte);
++              if (!page)
++                      continue;
++              folio = page_folio(page);
++
++              if (folio_is_zone_device(folio) || !folio_test_anon(folio))
++                      continue;
++              goto found_unlock;
++      }
++
++not_found_unlock:
++      spin_unlock(ptl);
++      if (start_ptep)
++              pte_unmap(start_ptep);
++      return 0;
++found_unlock:
++      folio_get(folio);
++      spin_unlock(ptl);
++      if (start_ptep)
++              pte_unmap(start_ptep);
++      private->page = page;
++      private->folio = folio;
++      private->addr = addr;
++      return 1;
++}
++
++static struct mm_walk_ops ksm_next_page_ops = {
++      .pmd_entry = ksm_next_page_pmd_entry,
++      .walk_lock = PGWALK_RDLOCK,
++};
++
+ static struct ksm_rmap_item *scan_get_next_rmap_item(struct page **page)
+ {
+       struct mm_struct *mm;
+@@ -2390,32 +2479,43 @@ next_mm:
+                       ksm_scan.address = vma->vm_end;
+               while (ksm_scan.address < vma->vm_end) {
++                      struct ksm_next_page_arg ksm_next_page_arg;
++                      struct page *tmp_page = NULL;
++                      struct folio *folio;
++
+                       if (ksm_test_exit(mm))
+                               break;
+-                      *page = follow_page(vma, ksm_scan.address, FOLL_GET);
+-                      if (IS_ERR_OR_NULL(*page)) {
+-                              ksm_scan.address += PAGE_SIZE;
+-                              cond_resched();
+-                              continue;
++
++                      int found;
++
++                      found = walk_page_range_vma(vma, ksm_scan.address,
++                                                  vma->vm_end,
++                                                  &ksm_next_page_ops,
++                                                  &ksm_next_page_arg);
++
++                      if (found > 0) {
++                              folio = ksm_next_page_arg.folio;
++                              tmp_page = ksm_next_page_arg.page;
++                              ksm_scan.address = ksm_next_page_arg.addr;
++                      } else {
++                              VM_WARN_ON_ONCE(found < 0);
++                              ksm_scan.address = vma->vm_end - PAGE_SIZE;
+                       }
+-                      if (is_zone_device_page(*page))
+-                              goto next_page;
+-                      if (PageAnon(*page)) {
+-                              flush_anon_page(vma, *page, ksm_scan.address);
+-                              flush_dcache_page(*page);
++                      if (tmp_page) {
++                              flush_anon_page(vma, tmp_page, ksm_scan.address);
++                              flush_dcache_page(tmp_page);
+                               rmap_item = get_next_rmap_item(mm_slot,
+                                       ksm_scan.rmap_list, ksm_scan.address);
+                               if (rmap_item) {
+                                       ksm_scan.rmap_list =
+                                                       &rmap_item->rmap_list;
+                                       ksm_scan.address += PAGE_SIZE;
++                                      *page = tmp_page;
+                               } else
+-                                      put_page(*page);
++                                      folio_put(folio);
+                               mmap_read_unlock(mm);
+                               return rmap_item;
+                       }
+-next_page:
+-                      put_page(*page);
+                       ksm_scan.address += PAGE_SIZE;
+                       cond_resched();
+               }
diff --git a/queue-6.6/net-add-locking-to-protect-skb-dev-access-in-ip_output.patch b/queue-6.6/net-add-locking-to-protect-skb-dev-access-in-ip_output.patch
new file mode 100644 (file)
index 0000000..d0e9cdc
--- /dev/null
@@ -0,0 +1,113 @@
+From 1dbf1d590d10a6d1978e8184f8dfe20af22d680a Mon Sep 17 00:00:00 2001
+From: Sharath Chandra Vurukala <quic_sharathv@quicinc.com>
+Date: Wed, 30 Jul 2025 16:21:18 +0530
+Subject: net: Add locking to protect skb->dev access in ip_output
+
+From: Sharath Chandra Vurukala <quic_sharathv@quicinc.com>
+
+commit 1dbf1d590d10a6d1978e8184f8dfe20af22d680a upstream.
+
+In ip_output() skb->dev is updated from the skb_dst(skb)->dev
+this can become invalid when the interface is unregistered and freed,
+
+Introduced new skb_dst_dev_rcu() function to be used instead of
+skb_dst_dev() within rcu_locks in ip_output.This will ensure that
+all the skb's associated with the dev being deregistered will
+be transnmitted out first, before freeing the dev.
+
+Given that ip_output() is called within an rcu_read_lock()
+critical section or from a bottom-half context, it is safe to introduce
+an RCU read-side critical section within it.
+
+Multiple panic call stacks were observed when UL traffic was run
+in concurrency with device deregistration from different functions,
+pasting one sample for reference.
+
+[496733.627565][T13385] Call trace:
+[496733.627570][T13385] bpf_prog_ce7c9180c3b128ea_cgroupskb_egres+0x24c/0x7f0
+[496733.627581][T13385] __cgroup_bpf_run_filter_skb+0x128/0x498
+[496733.627595][T13385] ip_finish_output+0xa4/0xf4
+[496733.627605][T13385] ip_output+0x100/0x1a0
+[496733.627613][T13385] ip_send_skb+0x68/0x100
+[496733.627618][T13385] udp_send_skb+0x1c4/0x384
+[496733.627625][T13385] udp_sendmsg+0x7b0/0x898
+[496733.627631][T13385] inet_sendmsg+0x5c/0x7c
+[496733.627639][T13385] __sys_sendto+0x174/0x1e4
+[496733.627647][T13385] __arm64_sys_sendto+0x28/0x3c
+[496733.627653][T13385] invoke_syscall+0x58/0x11c
+[496733.627662][T13385] el0_svc_common+0x88/0xf4
+[496733.627669][T13385] do_el0_svc+0x2c/0xb0
+[496733.627676][T13385] el0_svc+0x2c/0xa4
+[496733.627683][T13385] el0t_64_sync_handler+0x68/0xb4
+[496733.627689][T13385] el0t_64_sync+0x1a4/0x1a8
+
+Changes in v3:
+- Replaced WARN_ON() with  WARN_ON_ONCE(), as suggested by Willem de Bruijn.
+- Dropped legacy lines mistakenly pulled in from an outdated branch.
+
+Changes in v2:
+- Addressed review comments from Eric Dumazet
+- Used READ_ONCE() to prevent potential load/store tearing
+- Added skb_dst_dev_rcu() and used along with rcu_read_lock() in ip_output
+
+Signed-off-by: Sharath Chandra Vurukala <quic_sharathv@quicinc.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Link: https://patch.msgid.link/20250730105118.GA26100@hu-sharathv-hyd.qualcomm.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+[ Keerthana: Backported the patch to v6.6.y ]
+Signed-off-by: Keerthana K <keerthana.kalyanasundaram@broadcom.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/dst.h    |   12 ++++++++++++
+ net/ipv4/ip_output.c |   15 ++++++++++-----
+ 2 files changed, 22 insertions(+), 5 deletions(-)
+
+--- a/include/net/dst.h
++++ b/include/net/dst.h
+@@ -569,6 +569,18 @@ static inline void skb_dst_update_pmtu_n
+               dst->ops->update_pmtu(dst, NULL, skb, mtu, false);
+ }
++static inline struct net_device *dst_dev_rcu(const struct dst_entry *dst)
++{
++      /* In the future, use rcu_dereference(dst->dev) */
++      WARN_ON_ONCE(!rcu_read_lock_held());
++      return READ_ONCE(dst->dev);
++}
++
++static inline struct net_device *skb_dst_dev_rcu(const struct sk_buff *skb)
++{
++      return dst_dev_rcu(skb_dst(skb));
++}
++
+ struct dst_entry *dst_blackhole_check(struct dst_entry *dst, u32 cookie);
+ void dst_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
+                              struct sk_buff *skb, u32 mtu, bool confirm_neigh);
+--- a/net/ipv4/ip_output.c
++++ b/net/ipv4/ip_output.c
+@@ -425,15 +425,20 @@ int ip_mc_output(struct net *net, struct
+ int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb)
+ {
+-      struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev;
++      struct net_device *dev, *indev = skb->dev;
++      int ret_val;
++      rcu_read_lock();
++      dev = skb_dst_dev_rcu(skb);
+       skb->dev = dev;
+       skb->protocol = htons(ETH_P_IP);
+-      return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
+-                          net, sk, skb, indev, dev,
+-                          ip_finish_output,
+-                          !(IPCB(skb)->flags & IPSKB_REROUTED));
++      ret_val = NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
++                              net, sk, skb, indev, dev,
++                              ip_finish_output,
++                              !(IPCB(skb)->flags & IPSKB_REROUTED));
++      rcu_read_unlock();
++      return ret_val;
+ }
+ EXPORT_SYMBOL(ip_output);
diff --git a/queue-6.6/nfsd-convert-to-new-timestamp-accessors.patch b/queue-6.6/nfsd-convert-to-new-timestamp-accessors.patch
new file mode 100644 (file)
index 0000000..fd93649
--- /dev/null
@@ -0,0 +1,101 @@
+From 11fec9b9fb04fd1b3330a3b91ab9dcfa81ad5ad3 Mon Sep 17 00:00:00 2001
+From: Jeff Layton <jlayton@kernel.org>
+Date: Wed, 4 Oct 2023 14:52:37 -0400
+Subject: nfsd: convert to new timestamp accessors
+
+From: Jeff Layton <jlayton@kernel.org>
+
+commit 11fec9b9fb04fd1b3330a3b91ab9dcfa81ad5ad3 upstream.
+
+Convert to using the new inode timestamp accessor functions.
+
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Link: https://lore.kernel.org/r/20231004185347.80880-50-jlayton@kernel.org
+Stable-dep-of: 24d92de9186e ("nfsd: Fix NFSv3 atomicity bugs in nfsd_setattr()")
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+[ cel: d68886bae76a has already been applied ]
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nfsd/blocklayout.c |    3 ++-
+ fs/nfsd/nfs3proc.c    |    4 ++--
+ fs/nfsd/nfs4proc.c    |    8 ++++----
+ fs/nfsd/nfsctl.c      |    2 +-
+ fs/nfsd/vfs.c         |    2 +-
+ 5 files changed, 10 insertions(+), 9 deletions(-)
+
+--- a/fs/nfsd/blocklayout.c
++++ b/fs/nfsd/blocklayout.c
+@@ -119,11 +119,12 @@ static __be32
+ nfsd4_block_commit_blocks(struct inode *inode, struct nfsd4_layoutcommit *lcp,
+               struct iomap *iomaps, int nr_iomaps)
+ {
++      struct timespec64 mtime = inode_get_mtime(inode);
+       struct iattr iattr = { .ia_valid = 0 };
+       int error;
+       if (lcp->lc_mtime.tv_nsec == UTIME_NOW ||
+-          timespec64_compare(&lcp->lc_mtime, &inode->i_mtime) < 0)
++          timespec64_compare(&lcp->lc_mtime, &mtime) < 0)
+               lcp->lc_mtime = current_time(inode);
+       iattr.ia_valid |= ATTR_ATIME | ATTR_CTIME | ATTR_MTIME;
+       iattr.ia_atime = iattr.ia_ctime = iattr.ia_mtime = lcp->lc_mtime;
+--- a/fs/nfsd/nfs3proc.c
++++ b/fs/nfsd/nfs3proc.c
+@@ -294,8 +294,8 @@ nfsd3_create_file(struct svc_rqst *rqstp
+                       status = nfserr_exist;
+                       break;
+               case NFS3_CREATE_EXCLUSIVE:
+-                      if (d_inode(child)->i_mtime.tv_sec == v_mtime &&
+-                          d_inode(child)->i_atime.tv_sec == v_atime &&
++                      if (inode_get_mtime_sec(d_inode(child)) == v_mtime &&
++                          inode_get_atime_sec(d_inode(child)) == v_atime &&
+                           d_inode(child)->i_size == 0) {
+                               break;
+                       }
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -322,8 +322,8 @@ nfsd4_create_file(struct svc_rqst *rqstp
+                       status = nfserr_exist;
+                       break;
+               case NFS4_CREATE_EXCLUSIVE:
+-                      if (d_inode(child)->i_mtime.tv_sec == v_mtime &&
+-                          d_inode(child)->i_atime.tv_sec == v_atime &&
++                      if (inode_get_mtime_sec(d_inode(child)) == v_mtime &&
++                          inode_get_atime_sec(d_inode(child)) == v_atime &&
+                           d_inode(child)->i_size == 0) {
+                               open->op_created = true;
+                               break;          /* subtle */
+@@ -331,8 +331,8 @@ nfsd4_create_file(struct svc_rqst *rqstp
+                       status = nfserr_exist;
+                       break;
+               case NFS4_CREATE_EXCLUSIVE4_1:
+-                      if (d_inode(child)->i_mtime.tv_sec == v_mtime &&
+-                          d_inode(child)->i_atime.tv_sec == v_atime &&
++                      if (inode_get_mtime_sec(d_inode(child)) == v_mtime &&
++                          inode_get_atime_sec(d_inode(child)) == v_atime &&
+                           d_inode(child)->i_size == 0) {
+                               open->op_created = true;
+                               goto set_attr;  /* subtle */
+--- a/fs/nfsd/nfsctl.c
++++ b/fs/nfsd/nfsctl.c
+@@ -1139,7 +1139,7 @@ static struct inode *nfsd_get_inode(stru
+       /* Following advice from simple_fill_super documentation: */
+       inode->i_ino = iunique(sb, NFSD_MaxReserved);
+       inode->i_mode = mode;
+-      inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
++      simple_inode_init_ts(inode);
+       switch (mode & S_IFMT) {
+       case S_IFDIR:
+               inode->i_fop = &simple_dir_operations;
+--- a/fs/nfsd/vfs.c
++++ b/fs/nfsd/vfs.c
+@@ -521,7 +521,7 @@ nfsd_setattr(struct svc_rqst *rqstp, str
+       nfsd_sanitize_attrs(inode, iap);
+-      if (check_guard && guardtime != inode_get_ctime(inode).tv_sec)
++      if (check_guard && guardtime != inode_get_ctime_sec(inode))
+               return nfserr_notsync;
+       /*
diff --git a/queue-6.6/nfsd-fix-nfsv3-atomicity-bugs-in-nfsd_setattr.patch b/queue-6.6/nfsd-fix-nfsv3-atomicity-bugs-in-nfsd_setattr.patch
new file mode 100644 (file)
index 0000000..7b80045
--- /dev/null
@@ -0,0 +1,206 @@
+From 24d92de9186ebc340687caf7356e1070773e67bc Mon Sep 17 00:00:00 2001
+From: Trond Myklebust <trond.myklebust@hammerspace.com>
+Date: Thu, 15 Feb 2024 20:24:51 -0500
+Subject: nfsd: Fix NFSv3 atomicity bugs in nfsd_setattr()
+
+From: Trond Myklebust <trond.myklebust@hammerspace.com>
+
+commit 24d92de9186ebc340687caf7356e1070773e67bc upstream.
+
+The main point of the guarded SETATTR is to prevent races with other
+WRITE and SETATTR calls. That requires that the check of the guard time
+against the inode ctime be done after taking the inode lock.
+
+Furthermore, we need to take into account the 32-bit nature of
+timestamps in NFSv3, and the possibility that files may change at a
+faster rate than once a second.
+
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Reviewed-by: NeilBrown <neilb@suse.de>
+Stable-dep-of: 442d27ff09a2 ("nfsd: set security label during create operations")
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nfsd/nfs3proc.c  |    6 ++++--
+ fs/nfsd/nfs3xdr.c   |    5 +----
+ fs/nfsd/nfs4proc.c  |    3 +--
+ fs/nfsd/nfs4state.c |    2 +-
+ fs/nfsd/nfsproc.c   |    6 +++---
+ fs/nfsd/vfs.c       |   20 +++++++++++++-------
+ fs/nfsd/vfs.h       |    2 +-
+ fs/nfsd/xdr3.h      |    2 +-
+ 8 files changed, 25 insertions(+), 21 deletions(-)
+
+--- a/fs/nfsd/nfs3proc.c
++++ b/fs/nfsd/nfs3proc.c
+@@ -71,13 +71,15 @@ nfsd3_proc_setattr(struct svc_rqst *rqst
+       struct nfsd_attrs attrs = {
+               .na_iattr       = &argp->attrs,
+       };
++      const struct timespec64 *guardtime = NULL;
+       dprintk("nfsd: SETATTR(3)  %s\n",
+                               SVCFH_fmt(&argp->fh));
+       fh_copy(&resp->fh, &argp->fh);
+-      resp->status = nfsd_setattr(rqstp, &resp->fh, &attrs,
+-                                  argp->check_guard, argp->guardtime);
++      if (argp->check_guard)
++              guardtime = &argp->guardtime;
++      resp->status = nfsd_setattr(rqstp, &resp->fh, &attrs, guardtime);
+       return rpc_success;
+ }
+--- a/fs/nfsd/nfs3xdr.c
++++ b/fs/nfsd/nfs3xdr.c
+@@ -295,17 +295,14 @@ svcxdr_decode_sattr3(struct svc_rqst *rq
+ static bool
+ svcxdr_decode_sattrguard3(struct xdr_stream *xdr, struct nfsd3_sattrargs *args)
+ {
+-      __be32 *p;
+       u32 check;
+       if (xdr_stream_decode_bool(xdr, &check) < 0)
+               return false;
+       if (check) {
+-              p = xdr_inline_decode(xdr, XDR_UNIT * 2);
+-              if (!p)
++              if (!svcxdr_decode_nfstime3(xdr, &args->guardtime))
+                       return false;
+               args->check_guard = 1;
+-              args->guardtime = be32_to_cpup(p);
+       } else
+               args->check_guard = 0;
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -1160,8 +1160,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, st
+               goto out;
+       save_no_wcc = cstate->current_fh.fh_no_wcc;
+       cstate->current_fh.fh_no_wcc = true;
+-      status = nfsd_setattr(rqstp, &cstate->current_fh, &attrs,
+-                              0, (time64_t)0);
++      status = nfsd_setattr(rqstp, &cstate->current_fh, &attrs, NULL);
+       cstate->current_fh.fh_no_wcc = save_no_wcc;
+       if (!status)
+               status = nfserrno(attrs.na_labelerr);
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -5225,7 +5225,7 @@ nfsd4_truncate(struct svc_rqst *rqstp, s
+               return 0;
+       if (!(open->op_share_access & NFS4_SHARE_ACCESS_WRITE))
+               return nfserr_inval;
+-      return nfsd_setattr(rqstp, fh, &attrs, 0, (time64_t)0);
++      return nfsd_setattr(rqstp, fh, &attrs, NULL);
+ }
+ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
+--- a/fs/nfsd/nfsproc.c
++++ b/fs/nfsd/nfsproc.c
+@@ -103,7 +103,7 @@ nfsd_proc_setattr(struct svc_rqst *rqstp
+               }
+       }
+-      resp->status = nfsd_setattr(rqstp, fhp, &attrs, 0, (time64_t)0);
++      resp->status = nfsd_setattr(rqstp, fhp, &attrs, NULL);
+       if (resp->status != nfs_ok)
+               goto out;
+@@ -390,8 +390,8 @@ nfsd_proc_create(struct svc_rqst *rqstp)
+                */
+               attr->ia_valid &= ATTR_SIZE;
+               if (attr->ia_valid)
+-                      resp->status = nfsd_setattr(rqstp, newfhp, &attrs, 0,
+-                                                  (time64_t)0);
++                      resp->status = nfsd_setattr(rqstp, newfhp, &attrs,
++                                                  NULL);
+       }
+ out_unlock:
+--- a/fs/nfsd/vfs.c
++++ b/fs/nfsd/vfs.c
+@@ -459,7 +459,6 @@ static int __nfsd_setattr(struct dentry
+  * @rqstp: controlling RPC transaction
+  * @fhp: filehandle of target
+  * @attr: attributes to set
+- * @check_guard: set to 1 if guardtime is a valid timestamp
+  * @guardtime: do not act if ctime.tv_sec does not match this timestamp
+  *
+  * This call may adjust the contents of @attr (in particular, this
+@@ -471,8 +470,7 @@ static int __nfsd_setattr(struct dentry
+  */
+ __be32
+ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
+-           struct nfsd_attrs *attr,
+-           int check_guard, time64_t guardtime)
++           struct nfsd_attrs *attr, const struct timespec64 *guardtime)
+ {
+       struct dentry   *dentry;
+       struct inode    *inode;
+@@ -521,9 +519,6 @@ nfsd_setattr(struct svc_rqst *rqstp, str
+       nfsd_sanitize_attrs(inode, iap);
+-      if (check_guard && guardtime != inode_get_ctime_sec(inode))
+-              return nfserr_notsync;
+-
+       /*
+        * The size case is special, it changes the file in addition to the
+        * attributes, and file systems don't expect it to be mixed with
+@@ -541,6 +536,16 @@ nfsd_setattr(struct svc_rqst *rqstp, str
+       err = fh_fill_pre_attrs(fhp);
+       if (err)
+               goto out_unlock;
++
++      if (guardtime) {
++              struct timespec64 ctime = inode_get_ctime(inode);
++              if ((u32)guardtime->tv_sec != (u32)ctime.tv_sec ||
++                  guardtime->tv_nsec != ctime.tv_nsec) {
++                      err = nfserr_notsync;
++                      goto out_fill_attrs;
++              }
++      }
++
+       for (retries = 1;;) {
+               struct iattr attrs;
+@@ -568,6 +573,7 @@ nfsd_setattr(struct svc_rqst *rqstp, str
+               attr->na_aclerr = set_posix_acl(&nop_mnt_idmap,
+                                               dentry, ACL_TYPE_DEFAULT,
+                                               attr->na_dpacl);
++out_fill_attrs:
+       fh_fill_post_attrs(fhp);
+ out_unlock:
+       inode_unlock(inode);
+@@ -1374,7 +1380,7 @@ nfsd_create_setattr(struct svc_rqst *rqs
+        * if the attributes have not changed.
+        */
+       if (iap->ia_valid)
+-              status = nfsd_setattr(rqstp, resfhp, attrs, 0, (time64_t)0);
++              status = nfsd_setattr(rqstp, resfhp, attrs, NULL);
+       else
+               status = nfserrno(commit_metadata(resfhp));
+--- a/fs/nfsd/vfs.h
++++ b/fs/nfsd/vfs.h
+@@ -69,7 +69,7 @@ __be32                nfsd_lookup_dentry(struct svc_r
+                               const char *, unsigned int,
+                               struct svc_export **, struct dentry **);
+ __be32                nfsd_setattr(struct svc_rqst *, struct svc_fh *,
+-                              struct nfsd_attrs *, int, time64_t);
++                           struct nfsd_attrs *, const struct timespec64 *);
+ int nfsd_mountpoint(struct dentry *, struct svc_export *);
+ #ifdef CONFIG_NFSD_V4
+ __be32                nfsd4_vfs_fallocate(struct svc_rqst *, struct svc_fh *,
+--- a/fs/nfsd/xdr3.h
++++ b/fs/nfsd/xdr3.h
+@@ -14,7 +14,7 @@ struct nfsd3_sattrargs {
+       struct svc_fh           fh;
+       struct iattr            attrs;
+       int                     check_guard;
+-      time64_t                guardtime;
++      struct timespec64       guardtime;
+ };
+ struct nfsd3_diropargs {
diff --git a/queue-6.6/nfsd-nfsv4-file-creation-neglects-setting-acl.patch b/queue-6.6/nfsd-nfsv4-file-creation-neglects-setting-acl.patch
new file mode 100644 (file)
index 0000000..40acc76
--- /dev/null
@@ -0,0 +1,51 @@
+From 913f7cf77bf14c13cfea70e89bcb6d0b22239562 Mon Sep 17 00:00:00 2001
+From: Chuck Lever <chuck.lever@oracle.com>
+Date: Tue, 18 Nov 2025 19:51:19 -0500
+Subject: NFSD: NFSv4 file creation neglects setting ACL
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+commit 913f7cf77bf14c13cfea70e89bcb6d0b22239562 upstream.
+
+An NFSv4 client that sets an ACL with a named principal during file
+creation retrieves the ACL afterwards, and finds that it is only a
+default ACL (based on the mode bits) and not the ACL that was
+requested during file creation. This violates RFC 8881 section
+6.4.1.3: "the ACL attribute is set as given".
+
+The issue occurs in nfsd_create_setattr(), which calls
+nfsd_attrs_valid() to determine whether to call nfsd_setattr().
+However, nfsd_attrs_valid() checks only for iattr changes and
+security labels, but not POSIX ACLs. When only an ACL is present,
+the function returns false, nfsd_setattr() is skipped, and the
+POSIX ACL is never applied to the inode.
+
+Subsequently, when the client retrieves the ACL, the server finds
+no POSIX ACL on the inode and returns one generated from the file's
+mode bits rather than returning the originally-specified ACL.
+
+Reported-by: AurĂ©lien Couderc <aurelien.couderc2002@gmail.com>
+Fixes: c0cbe70742f4 ("NFSD: add posix ACLs to struct nfsd_attrs")
+Cc: Roland Mainz <roland.mainz@nrubsig.org>
+Cc: stable@vger.kernel.org
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nfsd/vfs.h |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/fs/nfsd/vfs.h
++++ b/fs/nfsd/vfs.h
+@@ -65,7 +65,8 @@ static inline bool nfsd_attrs_valid(stru
+       struct iattr *iap = attrs->na_iattr;
+       return (iap->ia_valid || (attrs->na_seclabel &&
+-              attrs->na_seclabel->len));
++              attrs->na_seclabel->len) ||
++              attrs->na_pacl || attrs->na_dpacl);
+ }
+ __be32                nfserrno (int errno);
diff --git a/queue-6.6/nfsd-set-security-label-during-create-operations.patch b/queue-6.6/nfsd-set-security-label-during-create-operations.patch
new file mode 100644 (file)
index 0000000..ec6266d
--- /dev/null
@@ -0,0 +1,107 @@
+From 442d27ff09a218b61020ab56387dbc508ad6bfa6 Mon Sep 17 00:00:00 2001
+From: Stephen Smalley <stephen.smalley.work@gmail.com>
+Date: Fri, 3 May 2024 09:09:06 -0400
+Subject: nfsd: set security label during create operations
+
+From: Stephen Smalley <stephen.smalley.work@gmail.com>
+
+commit 442d27ff09a218b61020ab56387dbc508ad6bfa6 upstream.
+
+When security labeling is enabled, the client can pass a file security
+label as part of a create operation for the new file, similar to mode
+and other attributes. At present, the security label is received by nfsd
+and passed down to nfsd_create_setattr(), but nfsd_setattr() is never
+called and therefore the label is never set on the new file. This bug
+may have been introduced on or around commit d6a97d3f589a ("NFSD:
+add security label to struct nfsd_attrs"). Looking at nfsd_setattr()
+I am uncertain as to whether the same issue presents for
+file ACLs and therefore requires a similar fix for those.
+
+An alternative approach would be to introduce a new LSM hook to set the
+"create SID" of the current task prior to the actual file creation, which
+would atomically label the new inode at creation time. This would be better
+for SELinux and a similar approach has been used previously
+(see security_dentry_create_files_as) but perhaps not usable by other LSMs.
+
+Reproducer:
+1. Install a Linux distro with SELinux - Fedora is easiest
+2. git clone https://github.com/SELinuxProject/selinux-testsuite
+3. Install the requisite dependencies per selinux-testsuite/README.md
+4. Run something like the following script:
+MOUNT=$HOME/selinux-testsuite
+sudo systemctl start nfs-server
+sudo exportfs -o rw,no_root_squash,security_label localhost:$MOUNT
+sudo mkdir -p /mnt/selinux-testsuite
+sudo mount -t nfs -o vers=4.2 localhost:$MOUNT /mnt/selinux-testsuite
+pushd /mnt/selinux-testsuite/
+sudo make -C policy load
+pushd tests/filesystem
+sudo runcon -t test_filesystem_t ./create_file -f trans_test_file \
+       -e test_filesystem_filetranscon_t -v
+sudo rm -f trans_test_file
+popd
+sudo make -C policy unload
+popd
+sudo umount /mnt/selinux-testsuite
+sudo exportfs -u localhost:$MOUNT
+sudo rmdir /mnt/selinux-testsuite
+sudo systemctl stop nfs-server
+
+Expected output:
+<eliding noise from commands run prior to or after the test itself>
+Process context:
+       unconfined_u:unconfined_r:test_filesystem_t:s0-s0:c0.c1023
+Created file: trans_test_file
+File context: unconfined_u:object_r:test_filesystem_filetranscon_t:s0
+File context is correct
+
+Actual output:
+<eliding noise from commands run prior to or after the test itself>
+Process context:
+       unconfined_u:unconfined_r:test_filesystem_t:s0-s0:c0.c1023
+Created file: trans_test_file
+File context: system_u:object_r:test_file_t:s0
+File context error, expected:
+       test_filesystem_filetranscon_t
+got:
+       test_file_t
+
+Signed-off-by: Stephen Smalley <stephen.smalley.work@gmail.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Reviewed-by: NeilBrown <neilb@suse.de>
+Stable-dep-of: 913f7cf77bf1 ("NFSD: NFSv4 file creation neglects setting ACL")
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nfsd/vfs.c |    2 +-
+ fs/nfsd/vfs.h |    8 ++++++++
+ 2 files changed, 9 insertions(+), 1 deletion(-)
+
+--- a/fs/nfsd/vfs.c
++++ b/fs/nfsd/vfs.c
+@@ -1379,7 +1379,7 @@ nfsd_create_setattr(struct svc_rqst *rqs
+        * Callers expect new file metadata to be committed even
+        * if the attributes have not changed.
+        */
+-      if (iap->ia_valid)
++      if (nfsd_attrs_valid(attrs))
+               status = nfsd_setattr(rqstp, resfhp, attrs, NULL);
+       else
+               status = nfserrno(commit_metadata(resfhp));
+--- a/fs/nfsd/vfs.h
++++ b/fs/nfsd/vfs.h
+@@ -60,6 +60,14 @@ static inline void nfsd_attrs_free(struc
+       posix_acl_release(attrs->na_dpacl);
+ }
++static inline bool nfsd_attrs_valid(struct nfsd_attrs *attrs)
++{
++      struct iattr *iap = attrs->na_iattr;
++
++      return (iap->ia_valid || (attrs->na_seclabel &&
++              attrs->na_seclabel->len));
++}
++
+ __be32                nfserrno (int errno);
+ int           nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
+                               struct svc_export **expp);
index 11fe3f64c20a8ca9299b86013ecec3d13bd5fea1..17ac6ceee3c01355a3c1f3ed30d241554c3f9b86 100644 (file)
@@ -18,3 +18,9 @@ libceph-make-free_choose_arg_map-resilient-to-partial-allocation.patch
 libceph-return-the-handler-error-from-mon_handle_auth_done.patch
 libceph-reset-sparse-read-state-in-osd_fault.patch
 libceph-make-calc_target-set-t-paused-not-just-clear-it.patch
+ksm-use-range-walk-function-to-jump-over-holes-in-scan_get_next_rmap_item.patch
+net-add-locking-to-protect-skb-dev-access-in-ip_output.patch
+nfsd-convert-to-new-timestamp-accessors.patch
+nfsd-fix-nfsv3-atomicity-bugs-in-nfsd_setattr.patch
+nfsd-set-security-label-during-create-operations.patch
+nfsd-nfsv4-file-creation-neglects-setting-acl.patch