]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.15-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 15 Aug 2025 16:50:43 +0000 (18:50 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 15 Aug 2025 16:50:43 +0000 (18:50 +0200)
added patches:
fs-prevent-file-descriptor-table-allocations-exceeding-int_max.patch
nfsd-detect-mismatch-of-file-handle-and-delegation-stateid-in-open-op.patch
nfsd-handle-get_client_locked-failure-in-nfsd4_setclientid_confirm.patch
sunvdc-balance-device-refcount-in-vdc_port_mpgroup_check.patch

queue-5.15/fs-prevent-file-descriptor-table-allocations-exceeding-int_max.patch [new file with mode: 0644]
queue-5.15/nfsd-detect-mismatch-of-file-handle-and-delegation-stateid-in-open-op.patch [new file with mode: 0644]
queue-5.15/nfsd-handle-get_client_locked-failure-in-nfsd4_setclientid_confirm.patch [new file with mode: 0644]
queue-5.15/series
queue-5.15/sunvdc-balance-device-refcount-in-vdc_port_mpgroup_check.patch [new file with mode: 0644]

diff --git a/queue-5.15/fs-prevent-file-descriptor-table-allocations-exceeding-int_max.patch b/queue-5.15/fs-prevent-file-descriptor-table-allocations-exceeding-int_max.patch
new file mode 100644 (file)
index 0000000..4f41497
--- /dev/null
@@ -0,0 +1,104 @@
+From 04a2c4b4511d186b0fce685da21085a5d4acd370 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 29 Jun 2025 03:40:21 -0400
+Subject: fs: Prevent file descriptor table allocations exceeding INT_MAX
+
+From: Sasha Levin <sashal@kernel.org>
+
+commit 04a2c4b4511d186b0fce685da21085a5d4acd370 upstream.
+
+When sysctl_nr_open is set to a very high value (for example, 1073741816
+as set by systemd), processes attempting to use file descriptors near
+the limit can trigger massive memory allocation attempts that exceed
+INT_MAX, resulting in a WARNING in mm/slub.c:
+
+  WARNING: CPU: 0 PID: 44 at mm/slub.c:5027 __kvmalloc_node_noprof+0x21a/0x288
+
+This happens because kvmalloc_array() and kvmalloc() check if the
+requested size exceeds INT_MAX and emit a warning when the allocation is
+not flagged with __GFP_NOWARN.
+
+Specifically, when nr_open is set to 1073741816 (0x3ffffff8) and a
+process calls dup2(oldfd, 1073741880), the kernel attempts to allocate:
+- File descriptor array: 1073741880 * 8 bytes = 8,589,935,040 bytes
+- Multiple bitmaps: ~400MB
+- Total allocation size: > 8GB (exceeding INT_MAX = 2,147,483,647)
+
+Reproducer:
+1. Set /proc/sys/fs/nr_open to 1073741816:
+   # echo 1073741816 > /proc/sys/fs/nr_open
+
+2. Run a program that uses a high file descriptor:
+   #include <unistd.h>
+   #include <sys/resource.h>
+
+   int main() {
+       struct rlimit rlim = {1073741824, 1073741824};
+       setrlimit(RLIMIT_NOFILE, &rlim);
+       dup2(2, 1073741880);  // Triggers the warning
+       return 0;
+   }
+
+3. Observe WARNING in dmesg at mm/slub.c:5027
+
+systemd commit a8b627a introduced automatic bumping of fs.nr_open to the
+maximum possible value. The rationale was that systems with memory
+control groups (memcg) no longer need separate file descriptor limits
+since memory is properly accounted. However, this change overlooked
+that:
+
+1. The kernel's allocation functions still enforce INT_MAX as a maximum
+   size regardless of memcg accounting
+2. Programs and tests that legitimately test file descriptor limits can
+   inadvertently trigger massive allocations
+3. The resulting allocations (>8GB) are impractical and will always fail
+
+systemd's algorithm starts with INT_MAX and keeps halving the value
+until the kernel accepts it. On most systems, this results in nr_open
+being set to 1073741816 (0x3ffffff8), which is just under 1GB of file
+descriptors.
+
+While processes rarely use file descriptors near this limit in normal
+operation, certain selftests (like
+tools/testing/selftests/core/unshare_test.c) and programs that test file
+descriptor limits can trigger this issue.
+
+Fix this by adding a check in alloc_fdtable() to ensure the requested
+allocation size does not exceed INT_MAX. This causes the operation to
+fail with -EMFILE instead of triggering a kernel warning and avoids the
+impractical >8GB memory allocation request.
+
+Fixes: 9cfe015aa424 ("get rid of NR_OPEN and introduce a sysctl_nr_open")
+Cc: stable@vger.kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Link: https://lore.kernel.org/20250629074021.1038845-1-sashal@kernel.org
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/file.c |   15 +++++++++++++++
+ 1 file changed, 15 insertions(+)
+
+--- a/fs/file.c
++++ b/fs/file.c
+@@ -126,6 +126,21 @@ static struct fdtable * alloc_fdtable(un
+       if (unlikely(nr > sysctl_nr_open))
+               nr = ((sysctl_nr_open - 1) | (BITS_PER_LONG - 1)) + 1;
++      /*
++       * Check if the allocation size would exceed INT_MAX. kvmalloc_array()
++       * and kvmalloc() will warn if the allocation size is greater than
++       * INT_MAX, as filp_cachep objects are not __GFP_NOWARN.
++       *
++       * This can happen when sysctl_nr_open is set to a very high value and
++       * a process tries to use a file descriptor near that limit. For example,
++       * if sysctl_nr_open is set to 1073741816 (0x3ffffff8) - which is what
++       * systemd typically sets it to - then trying to use a file descriptor
++       * close to that value will require allocating a file descriptor table
++       * that exceeds 8GB in size.
++       */
++      if (unlikely(nr > INT_MAX / sizeof(struct file *)))
++              return ERR_PTR(-EMFILE);
++
+       fdt = kmalloc(sizeof(struct fdtable), GFP_KERNEL_ACCOUNT);
+       if (!fdt)
+               goto out;
diff --git a/queue-5.15/nfsd-detect-mismatch-of-file-handle-and-delegation-stateid-in-open-op.patch b/queue-5.15/nfsd-detect-mismatch-of-file-handle-and-delegation-stateid-in-open-op.patch
new file mode 100644 (file)
index 0000000..6670a5c
--- /dev/null
@@ -0,0 +1,54 @@
+From 9c65001c57164033ad08b654c8b5ae35512ddf4a Mon Sep 17 00:00:00 2001
+From: Dai Ngo <dai.ngo@oracle.com>
+Date: Tue, 10 Jun 2025 08:35:28 -0700
+Subject: NFSD: detect mismatch of file handle and delegation stateid in OPEN op
+
+From: Dai Ngo <dai.ngo@oracle.com>
+
+commit 9c65001c57164033ad08b654c8b5ae35512ddf4a upstream.
+
+When the client sends an OPEN with claim type CLAIM_DELEG_CUR_FH or
+CLAIM_DELEGATION_CUR, the delegation stateid and the file handle
+must belong to the same file, otherwise return NFS4ERR_INVAL.
+
+Note that RFC8881, section 8.2.4, mandates the server to return
+NFS4ERR_BAD_STATEID if the selected table entry does not match the
+current filehandle. However returning NFS4ERR_BAD_STATEID in the
+OPEN causes the client to retry the operation and therefor get the
+client into a loop. To avoid this situation we return NFS4ERR_INVAL
+instead.
+
+Reported-by: Petro Pavlov <petro.pavlov@vastdata.com>
+Fixes: c44c5eeb2c02 ("[PATCH] nfsd4: add open state code for CLAIM_DELEGATE_CUR")
+Cc: stable@vger.kernel.org
+Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nfsd/nfs4state.c |   14 ++++++++++++++
+ 1 file changed, 14 insertions(+)
+
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -5736,6 +5736,20 @@ nfsd4_process_open2(struct svc_rqst *rqs
+               status = nfs4_check_deleg(cl, open, &dp);
+               if (status)
+                       goto out;
++              if (dp && nfsd4_is_deleg_cur(open) &&
++                              (dp->dl_stid.sc_file != fp)) {
++                      /*
++                       * RFC8881 section 8.2.4 mandates the server to return
++                       * NFS4ERR_BAD_STATEID if the selected table entry does
++                       * not match the current filehandle. However returning
++                       * NFS4ERR_BAD_STATEID in the OPEN can cause the client
++                       * to repeatedly retry the operation with the same
++                       * stateid, since the stateid itself is valid. To avoid
++                       * this situation NFSD returns NFS4ERR_INVAL instead.
++                       */
++                      status = nfserr_inval;
++                      goto out;
++              }
+               stp = nfsd4_find_and_lock_existing_open(fp, open);
+       } else {
+               open->op_file = NULL;
diff --git a/queue-5.15/nfsd-handle-get_client_locked-failure-in-nfsd4_setclientid_confirm.patch b/queue-5.15/nfsd-handle-get_client_locked-failure-in-nfsd4_setclientid_confirm.patch
new file mode 100644 (file)
index 0000000..e9a825b
--- /dev/null
@@ -0,0 +1,71 @@
+From 908e4ead7f757504d8b345452730636e298cbf68 Mon Sep 17 00:00:00 2001
+From: Jeff Layton <jlayton@kernel.org>
+Date: Wed, 4 Jun 2025 12:01:10 -0400
+Subject: nfsd: handle get_client_locked() failure in nfsd4_setclientid_confirm()
+
+From: Jeff Layton <jlayton@kernel.org>
+
+commit 908e4ead7f757504d8b345452730636e298cbf68 upstream.
+
+Lei Lu recently reported that nfsd4_setclientid_confirm() did not check
+the return value from get_client_locked(). a SETCLIENTID_CONFIRM could
+race with a confirmed client expiring and fail to get a reference. That
+could later lead to a UAF.
+
+Fix this by getting a reference early in the case where there is an
+extant confirmed client. If that fails then treat it as if there were no
+confirmed client found at all.
+
+In the case where the unconfirmed client is expiring, just fail and
+return the result from get_client_locked().
+
+Reported-by: lei lu <llfamsec@gmail.com>
+Closes: https://lore.kernel.org/linux-nfs/CAEBF3_b=UvqzNKdnfD_52L05Mqrqui9vZ2eFamgAbV0WG+FNWQ@mail.gmail.com/
+Fixes: d20c11d86d8f ("nfsd: Protect session creation and client confirm using client_lock")
+Cc: stable@vger.kernel.org
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nfsd/nfs4state.c |   20 +++++++++++++++-----
+ 1 file changed, 15 insertions(+), 5 deletions(-)
+
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -4285,10 +4285,16 @@ nfsd4_setclientid_confirm(struct svc_rqs
+       }
+       status = nfs_ok;
+       if (conf) {
+-              old = unconf;
+-              unhash_client_locked(old);
+-              nfsd4_change_callback(conf, &unconf->cl_cb_conn);
+-      } else {
++              if (get_client_locked(conf) == nfs_ok) {
++                      old = unconf;
++                      unhash_client_locked(old);
++                      nfsd4_change_callback(conf, &unconf->cl_cb_conn);
++              } else {
++                      conf = NULL;
++              }
++      }
++
++      if (!conf) {
+               old = find_confirmed_client_by_name(&unconf->cl_name, nn);
+               if (old) {
+                       status = nfserr_clid_inuse;
+@@ -4305,10 +4311,14 @@ nfsd4_setclientid_confirm(struct svc_rqs
+                       }
+                       trace_nfsd_clid_replaced(&old->cl_clientid);
+               }
++              status = get_client_locked(unconf);
++              if (status != nfs_ok) {
++                      old = NULL;
++                      goto out;
++              }
+               move_to_confirmed(unconf);
+               conf = unconf;
+       }
+-      get_client_locked(conf);
+       spin_unlock(&nn->client_lock);
+       if (conf == unconf)
+               fsnotify_dentry(conf->cl_nfsd_info_dentry, FS_MODIFY);
index 2dbf2f0b874398acc1ab0f09c972b0a3352ea94b..9c37271aa91ac2b9a2578cdeb6b9be4ffe79196b 100644 (file)
@@ -263,3 +263,7 @@ netlink-avoid-infinite-retry-looping-in-netlink_unicast.patch
 net-gianfar-fix-device-leak-when-querying-time-stamp-info.patch
 net-dpaa-fix-device-leak-when-querying-time-stamp-info.patch
 net-usb-asix_devices-add-phy_mask-for-ax88772-mdio-bus.patch
+nfsd-handle-get_client_locked-failure-in-nfsd4_setclientid_confirm.patch
+nfsd-detect-mismatch-of-file-handle-and-delegation-stateid-in-open-op.patch
+sunvdc-balance-device-refcount-in-vdc_port_mpgroup_check.patch
+fs-prevent-file-descriptor-table-allocations-exceeding-int_max.patch
diff --git a/queue-5.15/sunvdc-balance-device-refcount-in-vdc_port_mpgroup_check.patch b/queue-5.15/sunvdc-balance-device-refcount-in-vdc_port_mpgroup_check.patch
new file mode 100644 (file)
index 0000000..62950db
--- /dev/null
@@ -0,0 +1,48 @@
+From 63ce53724637e2e7ba51fe3a4f78351715049905 Mon Sep 17 00:00:00 2001
+From: Ma Ke <make24@iscas.ac.cn>
+Date: Sat, 19 Jul 2025 15:58:56 +0800
+Subject: sunvdc: Balance device refcount in vdc_port_mpgroup_check
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Ma Ke <make24@iscas.ac.cn>
+
+commit 63ce53724637e2e7ba51fe3a4f78351715049905 upstream.
+
+Using device_find_child() to locate a probed virtual-device-port node
+causes a device refcount imbalance, as device_find_child() internally
+calls get_device() to increment the device’s reference count before
+returning its pointer. vdc_port_mpgroup_check() directly returns true
+upon finding a matching device without releasing the reference via
+put_device(). We should call put_device() to decrement refcount.
+
+As comment of device_find_child() says, 'NOTE: you will need to drop
+the reference with put_device() after use'.
+
+Found by code review.
+
+Cc: stable@vger.kernel.org
+Fixes: 3ee70591d6c4 ("sunvdc: prevent sunvdc panic when mpgroup disk added to guest domain")
+Signed-off-by: Ma Ke <make24@iscas.ac.cn>
+Link: https://lore.kernel.org/r/20250719075856.3447953-1-make24@iscas.ac.cn
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/block/sunvdc.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/drivers/block/sunvdc.c
++++ b/drivers/block/sunvdc.c
+@@ -948,8 +948,10 @@ static bool vdc_port_mpgroup_check(struc
+       dev = device_find_child(vdev->dev.parent, &port_data,
+                               vdc_device_probed);
+-      if (dev)
++      if (dev) {
++              put_device(dev);
+               return true;
++      }
+       return false;
+ }