6.12-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sun, 14 Sep 2025 07:48:55 +0000 (09:48 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sun, 14 Sep 2025 07:48:55 +0000 (09:48 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 14 Sep 2025 07:48:55 +0000 (09:48 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 14 Sep 2025 07:48:55 +0000 (09:48 +0200)
diff --git a/queue-6.12/ceph-fix-race-condition-validating-r_parent-before-applying-state.patch b/queue-6.12/ceph-fix-race-condition-validating-r_parent-before-applying-state.patch

new file mode 100644 (file)

index 0000000..49cd6c3
--- /dev/null
+++ b/queue-6.12/ceph-fix-race-condition-validating-r_parent-before-applying-state.patch
@@ -0,0 +1,648 @@
+From 15f519e9f883b316d86e2bb6b767a023aafd9d83 Mon Sep 17 00:00:00 2001
+From: Alex Markuze <amarkuze@redhat.com>
+Date: Tue, 12 Aug 2025 09:57:38 +0000
+Subject: ceph: fix race condition validating r_parent before applying state
+
+From: Alex Markuze <amarkuze@redhat.com>
+
+commit 15f519e9f883b316d86e2bb6b767a023aafd9d83 upstream.
+
+Add validation to ensure the cached parent directory inode matches the
+directory info in MDS replies. This prevents client-side race conditions
+where concurrent operations (e.g. rename) cause r_parent to become stale
+between request initiation and reply processing, which could lead to
+applying state changes to incorrect directory inodes.
+
+[ idryomov: folded a kerneldoc fixup and a follow-up fix from Alex to
+  move CEPH_CAP_PIN reference when r_parent is updated:
+
+  When the parent directory lock is not held, req->r_parent can become
+  stale and is updated to point to the correct inode.  However, the
+  associated CEPH_CAP_PIN reference was not being adjusted.  The
+  CEPH_CAP_PIN is a reference on an inode that is tracked for
+  accounting purposes.  Moving this pin is important to keep the
+  accounting balanced. When the pin was not moved from the old parent
+  to the new one, it created two problems: The reference on the old,
+  stale parent was never released, causing a reference leak.
+  A reference for the new parent was never acquired, creating the risk
+  of a reference underflow later in ceph_mdsc_release_request().  This
+  patch corrects the logic by releasing the pin from the old parent and
+  acquiring it for the new parent when r_parent is switched.  This
+  ensures reference accounting stays balanced. ]
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Alex Markuze <amarkuze@redhat.com>
+Reviewed-by: Viacheslav Dubeyko <Slava.Dubeyko@ibm.com>
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ceph/debugfs.c    |   14 +---
+ fs/ceph/dir.c        |   17 ++---
+ fs/ceph/file.c       |   24 ++-----
+ fs/ceph/inode.c      |    7 --
+ fs/ceph/mds_client.c |  172 +++++++++++++++++++++++++++++++--------------------
+ fs/ceph/mds_client.h |   18 ++++-
+ 6 files changed, 145 insertions(+), 107 deletions(-)
+
+--- a/fs/ceph/debugfs.c
++++ b/fs/ceph/debugfs.c
+@@ -55,8 +55,6 @@ static int mdsc_show(struct seq_file *s,
+       struct ceph_mds_client *mdsc = fsc->mdsc;
+       struct ceph_mds_request *req;
+       struct rb_node *rp;
+-      int pathlen = 0;
+-      u64 pathbase;
+       char *path;
+ 
+       mutex_lock(&mdsc->mutex);
+@@ -81,8 +79,8 @@ static int mdsc_show(struct seq_file *s,
+               if (req->r_inode) {
+                       seq_printf(s, " #%llx", ceph_ino(req->r_inode));
+               } else if (req->r_dentry) {
+-                      path = ceph_mdsc_build_path(mdsc, req->r_dentry, &pathlen,
+-                                                  &pathbase, 0);
++                      struct ceph_path_info path_info;
++                      path = ceph_mdsc_build_path(mdsc, req->r_dentry, &path_info, 0);
+                       if (IS_ERR(path))
+                               path = NULL;
+                       spin_lock(&req->r_dentry->d_lock);
+@@ -91,7 +89,7 @@ static int mdsc_show(struct seq_file *s,
+                                  req->r_dentry,
+                                  path ? path : "");
+                       spin_unlock(&req->r_dentry->d_lock);
+-                      ceph_mdsc_free_path(path, pathlen);
++                      ceph_mdsc_free_path_info(&path_info);
+               } else if (req->r_path1) {
+                       seq_printf(s, " #%llx/%s", req->r_ino1.ino,
+                                  req->r_path1);
+@@ -100,8 +98,8 @@ static int mdsc_show(struct seq_file *s,
+               }
+ 
+               if (req->r_old_dentry) {
+-                      path = ceph_mdsc_build_path(mdsc, req->r_old_dentry, &pathlen,
+-                                                  &pathbase, 0);
++                      struct ceph_path_info path_info;
++                      path = ceph_mdsc_build_path(mdsc, req->r_old_dentry, &path_info, 0);
+                       if (IS_ERR(path))
+                               path = NULL;
+                       spin_lock(&req->r_old_dentry->d_lock);
+@@ -111,7 +109,7 @@ static int mdsc_show(struct seq_file *s,
+                                  req->r_old_dentry,
+                                  path ? path : "");
+                       spin_unlock(&req->r_old_dentry->d_lock);
+-                      ceph_mdsc_free_path(path, pathlen);
++                      ceph_mdsc_free_path_info(&path_info);
+               } else if (req->r_path2 && req->r_op != CEPH_MDS_OP_SYMLINK) {
+                       if (req->r_ino2.ino)
+                               seq_printf(s, " #%llx/%s", req->r_ino2.ino,
+--- a/fs/ceph/dir.c
++++ b/fs/ceph/dir.c
+@@ -1263,10 +1263,8 @@ static void ceph_async_unlink_cb(struct
+ 
+       /* If op failed, mark everyone involved for errors */
+       if (result) {
+-              int pathlen = 0;
+-              u64 base = 0;
+-              char *path = ceph_mdsc_build_path(mdsc, dentry, &pathlen,
+-                                                &base, 0);
++              struct ceph_path_info path_info = {0};
++              char *path = ceph_mdsc_build_path(mdsc, dentry, &path_info, 0);
+ 
+               /* mark error on parent + clear complete */
+               mapping_set_error(req->r_parent->i_mapping, result);
+@@ -1280,8 +1278,8 @@ static void ceph_async_unlink_cb(struct
+               mapping_set_error(req->r_old_inode->i_mapping, result);
+ 
+               pr_warn_client(cl, "failure path=(%llx)%s result=%d!\n",
+-                             base, IS_ERR(path) ? "<<bad>>" : path, result);
+-              ceph_mdsc_free_path(path, pathlen);
++                             path_info.vino.ino, IS_ERR(path) ? "<<bad>>" : path, result);
++              ceph_mdsc_free_path_info(&path_info);
+       }
+ out:
+       iput(req->r_old_inode);
+@@ -1339,8 +1337,6 @@ static int ceph_unlink(struct inode *dir
+       int err = -EROFS;
+       int op;
+       char *path;
+-      int pathlen;
+-      u64 pathbase;
+ 
+       if (ceph_snap(dir) == CEPH_SNAPDIR) {
+               /* rmdir .snap/foo is RMSNAP */
+@@ -1359,14 +1355,15 @@ static int ceph_unlink(struct inode *dir
+       if (!dn) {
+               try_async = false;
+       } else {
+-              path = ceph_mdsc_build_path(mdsc, dn, &pathlen, &pathbase, 0);
++              struct ceph_path_info path_info;
++              path = ceph_mdsc_build_path(mdsc, dn, &path_info, 0);
+               if (IS_ERR(path)) {
+                       try_async = false;
+                       err = 0;
+               } else {
+                       err = ceph_mds_check_access(mdsc, path, MAY_WRITE);
+               }
+-              ceph_mdsc_free_path(path, pathlen);
++              ceph_mdsc_free_path_info(&path_info);
+               dput(dn);
+ 
+               /* For none EACCES cases will let the MDS do the mds auth check */
+--- a/fs/ceph/file.c
++++ b/fs/ceph/file.c
+@@ -368,8 +368,6 @@ int ceph_open(struct inode *inode, struc
+       int flags, fmode, wanted;
+       struct dentry *dentry;
+       char *path;
+-      int pathlen;
+-      u64 pathbase;
+       bool do_sync = false;
+       int mask = MAY_READ;
+ 
+@@ -399,14 +397,15 @@ int ceph_open(struct inode *inode, struc
+       if (!dentry) {
+               do_sync = true;
+       } else {
+-              path = ceph_mdsc_build_path(mdsc, dentry, &pathlen, &pathbase, 0);
++              struct ceph_path_info path_info;
++              path = ceph_mdsc_build_path(mdsc, dentry, &path_info, 0);
+               if (IS_ERR(path)) {
+                       do_sync = true;
+                       err = 0;
+               } else {
+                       err = ceph_mds_check_access(mdsc, path, mask);
+               }
+-              ceph_mdsc_free_path(path, pathlen);
++              ceph_mdsc_free_path_info(&path_info);
+               dput(dentry);
+ 
+               /* For none EACCES cases will let the MDS do the mds auth check */
+@@ -614,15 +613,13 @@ static void ceph_async_create_cb(struct
+       mapping_set_error(req->r_parent->i_mapping, result);
+ 
+       if (result) {
+-              int pathlen = 0;
+-              u64 base = 0;
+-              char *path = ceph_mdsc_build_path(mdsc, req->r_dentry, &pathlen,
+-                                                &base, 0);
++              struct ceph_path_info path_info = {0};
++              char *path = ceph_mdsc_build_path(mdsc, req->r_dentry, &path_info, 0);
+ 
+               pr_warn_client(cl,
+                       "async create failure path=(%llx)%s result=%d!\n",
+-                      base, IS_ERR(path) ? "<<bad>>" : path, result);
+-              ceph_mdsc_free_path(path, pathlen);
++                      path_info.vino.ino, IS_ERR(path) ? "<<bad>>" : path, result);
++              ceph_mdsc_free_path_info(&path_info);
+ 
+               ceph_dir_clear_complete(req->r_parent);
+               if (!d_unhashed(dentry))
+@@ -791,8 +788,6 @@ int ceph_atomic_open(struct inode *dir,
+       int mask;
+       int err;
+       char *path;
+-      int pathlen;
+-      u64 pathbase;
+ 
+       doutc(cl, "%p %llx.%llx dentry %p '%pd' %s flags %d mode 0%o\n",
+             dir, ceph_vinop(dir), dentry, dentry,
+@@ -814,7 +809,8 @@ int ceph_atomic_open(struct inode *dir,
+       if (!dn) {
+               try_async = false;
+       } else {
+-              path = ceph_mdsc_build_path(mdsc, dn, &pathlen, &pathbase, 0);
++              struct ceph_path_info path_info;
++              path = ceph_mdsc_build_path(mdsc, dn, &path_info, 0);
+               if (IS_ERR(path)) {
+                       try_async = false;
+                       err = 0;
+@@ -826,7 +822,7 @@ int ceph_atomic_open(struct inode *dir,
+                               mask |= MAY_WRITE;
+                       err = ceph_mds_check_access(mdsc, path, mask);
+               }
+-              ceph_mdsc_free_path(path, pathlen);
++              ceph_mdsc_free_path_info(&path_info);
+               dput(dn);
+ 
+               /* For none EACCES cases will let the MDS do the mds auth check */
+--- a/fs/ceph/inode.c
++++ b/fs/ceph/inode.c
+@@ -2483,22 +2483,21 @@ int __ceph_setattr(struct mnt_idmap *idm
+       int truncate_retry = 20; /* The RMW will take around 50ms */
+       struct dentry *dentry;
+       char *path;
+-      int pathlen;
+-      u64 pathbase;
+       bool do_sync = false;
+ 
+       dentry = d_find_alias(inode);
+       if (!dentry) {
+               do_sync = true;
+       } else {
+-              path = ceph_mdsc_build_path(mdsc, dentry, &pathlen, &pathbase, 0);
++              struct ceph_path_info path_info;
++              path = ceph_mdsc_build_path(mdsc, dentry, &path_info, 0);
+               if (IS_ERR(path)) {
+                       do_sync = true;
+                       err = 0;
+               } else {
+                       err = ceph_mds_check_access(mdsc, path, MAY_WRITE);
+               }
+-              ceph_mdsc_free_path(path, pathlen);
++              ceph_mdsc_free_path_info(&path_info);
+               dput(dentry);
+ 
+               /* For none EACCES cases will let the MDS do the mds auth check */
+--- a/fs/ceph/mds_client.c
++++ b/fs/ceph/mds_client.c
+@@ -2686,8 +2686,7 @@ static u8 *get_fscrypt_altname(const str
+  * ceph_mdsc_build_path - build a path string to a given dentry
+  * @mdsc: mds client
+  * @dentry: dentry to which path should be built
+- * @plen: returned length of string
+- * @pbase: returned base inode number
++ * @path_info: output path, length, base ino+snap, and freepath ownership flag
+  * @for_wire: is this path going to be sent to the MDS?
+  *
+  * Build a string that represents the path to the dentry. This is mostly called
+@@ -2705,7 +2704,7 @@ static u8 *get_fscrypt_altname(const str
+  *   foo/.snap/bar -> foo//bar
+  */
+ char *ceph_mdsc_build_path(struct ceph_mds_client *mdsc, struct dentry *dentry,
+-                         int *plen, u64 *pbase, int for_wire)
++                         struct ceph_path_info *path_info, int for_wire)
+ {
+       struct ceph_client *cl = mdsc->fsc->client;
+       struct dentry *cur;
+@@ -2815,16 +2814,28 @@ retry:
+               return ERR_PTR(-ENAMETOOLONG);
+       }
+ 
+-      *pbase = base;
+-      *plen = PATH_MAX - 1 - pos;
++      /* Initialize the output structure */
++      memset(path_info, 0, sizeof(*path_info));
++
++      path_info->vino.ino = base;
++      path_info->pathlen = PATH_MAX - 1 - pos;
++      path_info->path = path + pos;
++      path_info->freepath = true;
++
++      /* Set snap from dentry if available */
++      if (d_inode(dentry))
++              path_info->vino.snap = ceph_snap(d_inode(dentry));
++      else
++              path_info->vino.snap = CEPH_NOSNAP;
++
+       doutc(cl, "on %p %d built %llx '%.*s'\n", dentry, d_count(dentry),
+-            base, *plen, path + pos);
++            base, PATH_MAX - 1 - pos, path + pos);
+       return path + pos;
+ }
+ 
+ static int build_dentry_path(struct ceph_mds_client *mdsc, struct dentry *dentry,
+-                           struct inode *dir, const char **ppath, int *ppathlen,
+-                           u64 *pino, bool *pfreepath, bool parent_locked)
++                           struct inode *dir, struct ceph_path_info *path_info,
++                           bool parent_locked)
+ {
+       char *path;
+ 
+@@ -2833,41 +2844,47 @@ static int build_dentry_path(struct ceph
+               dir = d_inode_rcu(dentry->d_parent);
+       if (dir && parent_locked && ceph_snap(dir) == CEPH_NOSNAP &&
+           !IS_ENCRYPTED(dir)) {
+-              *pino = ceph_ino(dir);
++              path_info->vino.ino = ceph_ino(dir);
++              path_info->vino.snap = ceph_snap(dir);
+               rcu_read_unlock();
+-              *ppath = dentry->d_name.name;
+-              *ppathlen = dentry->d_name.len;
++              path_info->path = dentry->d_name.name;
++              path_info->pathlen = dentry->d_name.len;
++              path_info->freepath = false;
+               return 0;
+       }
+       rcu_read_unlock();
+-      path = ceph_mdsc_build_path(mdsc, dentry, ppathlen, pino, 1);
++      path = ceph_mdsc_build_path(mdsc, dentry, path_info, 1);
+       if (IS_ERR(path))
+               return PTR_ERR(path);
+-      *ppath = path;
+-      *pfreepath = true;
++      /*
++       * ceph_mdsc_build_path already fills path_info, including snap handling.
++       */
+       return 0;
+ }
+ 
+-static int build_inode_path(struct inode *inode,
+-                          const char **ppath, int *ppathlen, u64 *pino,
+-                          bool *pfreepath)
++static int build_inode_path(struct inode *inode, struct ceph_path_info *path_info)
+ {
+       struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
+       struct dentry *dentry;
+       char *path;
+ 
+       if (ceph_snap(inode) == CEPH_NOSNAP) {
+-              *pino = ceph_ino(inode);
+-              *ppathlen = 0;
++              path_info->vino.ino = ceph_ino(inode);
++              path_info->vino.snap = ceph_snap(inode);
++              path_info->pathlen = 0;
++              path_info->freepath = false;
+               return 0;
+       }
+       dentry = d_find_alias(inode);
+-      path = ceph_mdsc_build_path(mdsc, dentry, ppathlen, pino, 1);
++      path = ceph_mdsc_build_path(mdsc, dentry, path_info, 1);
+       dput(dentry);
+       if (IS_ERR(path))
+               return PTR_ERR(path);
+-      *ppath = path;
+-      *pfreepath = true;
++      /*
++       * ceph_mdsc_build_path already fills path_info, including snap from dentry.
++       * Override with inode's snap since that's what this function is for.
++       */
++      path_info->vino.snap = ceph_snap(inode);
+       return 0;
+ }
+ 
+@@ -2877,26 +2894,32 @@ static int build_inode_path(struct inode
+  */
+ static int set_request_path_attr(struct ceph_mds_client *mdsc, struct inode *rinode,
+                                struct dentry *rdentry, struct inode *rdiri,
+-                               const char *rpath, u64 rino, const char **ppath,
+-                               int *pathlen, u64 *ino, bool *freepath,
++                               const char *rpath, u64 rino,
++                               struct ceph_path_info *path_info,
+                                bool parent_locked)
+ {
+       struct ceph_client *cl = mdsc->fsc->client;
+       int r = 0;
+ 
++      /* Initialize the output structure */
++      memset(path_info, 0, sizeof(*path_info));
++
+       if (rinode) {
+-              r = build_inode_path(rinode, ppath, pathlen, ino, freepath);
++              r = build_inode_path(rinode, path_info);
+               doutc(cl, " inode %p %llx.%llx\n", rinode, ceph_ino(rinode),
+                     ceph_snap(rinode));
+       } else if (rdentry) {
+-              r = build_dentry_path(mdsc, rdentry, rdiri, ppath, pathlen, ino,
+-                                      freepath, parent_locked);
+-              doutc(cl, " dentry %p %llx/%.*s\n", rdentry, *ino, *pathlen, *ppath);
++              r = build_dentry_path(mdsc, rdentry, rdiri, path_info, parent_locked);
++              doutc(cl, " dentry %p %llx/%.*s\n", rdentry, path_info->vino.ino,
++                    path_info->pathlen, path_info->path);
+       } else if (rpath || rino) {
+-              *ino = rino;
+-              *ppath = rpath;
+-              *pathlen = rpath ? strlen(rpath) : 0;
+-              doutc(cl, " path %.*s\n", *pathlen, rpath);
++              path_info->vino.ino = rino;
++              path_info->vino.snap = CEPH_NOSNAP;
++              path_info->path = rpath;
++              path_info->pathlen = rpath ? strlen(rpath) : 0;
++              path_info->freepath = false;
++
++              doutc(cl, " path %.*s\n", path_info->pathlen, rpath);
+       }
+ 
+       return r;
+@@ -2973,11 +2996,8 @@ static struct ceph_msg *create_request_m
+       struct ceph_client *cl = mdsc->fsc->client;
+       struct ceph_msg *msg;
+       struct ceph_mds_request_head_legacy *lhead;
+-      const char *path1 = NULL;
+-      const char *path2 = NULL;
+-      u64 ino1 = 0, ino2 = 0;
+-      int pathlen1 = 0, pathlen2 = 0;
+-      bool freepath1 = false, freepath2 = false;
++      struct ceph_path_info path_info1 = {0};
++      struct ceph_path_info path_info2 = {0};
+       struct dentry *old_dentry = NULL;
+       int len;
+       u16 releases;
+@@ -2987,25 +3007,49 @@ static struct ceph_msg *create_request_m
+       u16 request_head_version = mds_supported_head_version(session);
+       kuid_t caller_fsuid = req->r_cred->fsuid;
+       kgid_t caller_fsgid = req->r_cred->fsgid;
++      bool parent_locked = test_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
+ 
+       ret = set_request_path_attr(mdsc, req->r_inode, req->r_dentry,
+-                            req->r_parent, req->r_path1, req->r_ino1.ino,
+-                            &path1, &pathlen1, &ino1, &freepath1,
+-                            test_bit(CEPH_MDS_R_PARENT_LOCKED,
+-                                      &req->r_req_flags));
++                                  req->r_parent, req->r_path1, req->r_ino1.ino,
++                                  &path_info1, parent_locked);
+       if (ret < 0) {
+               msg = ERR_PTR(ret);
+               goto out;
+       }
+ 
++      /*
++       * When the parent directory's i_rwsem is *not* locked, req->r_parent may
++       * have become stale (e.g. after a concurrent rename) between the time the
++       * dentry was looked up and now.  If we detect that the stored r_parent
++       * does not match the inode number we just encoded for the request, switch
++       * to the correct inode so that the MDS receives a valid parent reference.
++       */
++      if (!parent_locked && req->r_parent && path_info1.vino.ino &&
++          ceph_ino(req->r_parent) != path_info1.vino.ino) {
++              struct inode *old_parent = req->r_parent;
++              struct inode *correct_dir = ceph_get_inode(mdsc->fsc->sb, path_info1.vino, NULL);
++              if (!IS_ERR(correct_dir)) {
++                      WARN_ONCE(1, "ceph: r_parent mismatch (had %llx wanted %llx) - updating\n",
++                                ceph_ino(old_parent), path_info1.vino.ino);
++                      /*
++                       * Transfer CEPH_CAP_PIN from the old parent to the new one.
++                       * The pin was taken earlier in ceph_mdsc_submit_request().
++                       */
++                      ceph_put_cap_refs(ceph_inode(old_parent), CEPH_CAP_PIN);
++                      iput(old_parent);
++                      req->r_parent = correct_dir;
++                      ceph_get_cap_refs(ceph_inode(req->r_parent), CEPH_CAP_PIN);
++              }
++      }
++
+       /* If r_old_dentry is set, then assume that its parent is locked */
+       if (req->r_old_dentry &&
+           !(req->r_old_dentry->d_flags & DCACHE_DISCONNECTED))
+               old_dentry = req->r_old_dentry;
+       ret = set_request_path_attr(mdsc, NULL, old_dentry,
+-                            req->r_old_dentry_dir,
+-                            req->r_path2, req->r_ino2.ino,
+-                            &path2, &pathlen2, &ino2, &freepath2, true);
++                                  req->r_old_dentry_dir,
++                                  req->r_path2, req->r_ino2.ino,
++                                  &path_info2, true);
+       if (ret < 0) {
+               msg = ERR_PTR(ret);
+               goto out_free1;
+@@ -3036,7 +3080,7 @@ static struct ceph_msg *create_request_m
+ 
+       /* filepaths */
+       len += 2 * (1 + sizeof(u32) + sizeof(u64));
+-      len += pathlen1 + pathlen2;
++      len += path_info1.pathlen + path_info2.pathlen;
+ 
+       /* cap releases */
+       len += sizeof(struct ceph_mds_request_release) *
+@@ -3044,9 +3088,9 @@ static struct ceph_msg *create_request_m
+                !!req->r_old_inode_drop + !!req->r_old_dentry_drop);
+ 
+       if (req->r_dentry_drop)
+-              len += pathlen1;
++              len += path_info1.pathlen;
+       if (req->r_old_dentry_drop)
+-              len += pathlen2;
++              len += path_info2.pathlen;
+ 
+       /* MClientRequest tail */
+ 
+@@ -3159,8 +3203,8 @@ static struct ceph_msg *create_request_m
+       lhead->ino = cpu_to_le64(req->r_deleg_ino);
+       lhead->args = req->r_args;
+ 
+-      ceph_encode_filepath(&p, end, ino1, path1);
+-      ceph_encode_filepath(&p, end, ino2, path2);
++      ceph_encode_filepath(&p, end, path_info1.vino.ino, path_info1.path);
++      ceph_encode_filepath(&p, end, path_info2.vino.ino, path_info2.path);
+ 
+       /* make note of release offset, in case we need to replay */
+       req->r_request_release_offset = p - msg->front.iov_base;
+@@ -3223,11 +3267,9 @@ static struct ceph_msg *create_request_m
+       msg->hdr.data_off = cpu_to_le16(0);
+ 
+ out_free2:
+-      if (freepath2)
+-              ceph_mdsc_free_path((char *)path2, pathlen2);
++      ceph_mdsc_free_path_info(&path_info2);
+ out_free1:
+-      if (freepath1)
+-              ceph_mdsc_free_path((char *)path1, pathlen1);
++      ceph_mdsc_free_path_info(&path_info1);
+ out:
+       return msg;
+ out_err:
+@@ -4584,24 +4626,20 @@ static int reconnect_caps_cb(struct inod
+       struct ceph_pagelist *pagelist = recon_state->pagelist;
+       struct dentry *dentry;
+       struct ceph_cap *cap;
+-      char *path;
+-      int pathlen = 0, err;
+-      u64 pathbase;
++      struct ceph_path_info path_info = {0};
++      int err;
+       u64 snap_follows;
+ 
+       dentry = d_find_primary(inode);
+       if (dentry) {
+               /* set pathbase to parent dir when msg_version >= 2 */
+-              path = ceph_mdsc_build_path(mdsc, dentry, &pathlen, &pathbase,
++              char *path = ceph_mdsc_build_path(mdsc, dentry, &path_info,
+                                           recon_state->msg_version >= 2);
+               dput(dentry);
+               if (IS_ERR(path)) {
+                       err = PTR_ERR(path);
+                       goto out_err;
+               }
+-      } else {
+-              path = NULL;
+-              pathbase = 0;
+       }
+ 
+       spin_lock(&ci->i_ceph_lock);
+@@ -4634,7 +4672,7 @@ static int reconnect_caps_cb(struct inod
+               rec.v2.wanted = cpu_to_le32(__ceph_caps_wanted(ci));
+               rec.v2.issued = cpu_to_le32(cap->issued);
+               rec.v2.snaprealm = cpu_to_le64(ci->i_snap_realm->ino);
+-              rec.v2.pathbase = cpu_to_le64(pathbase);
++              rec.v2.pathbase = cpu_to_le64(path_info.vino.ino);
+               rec.v2.flock_len = (__force __le32)
+                       ((ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) ? 0 : 1);
+       } else {
+@@ -4649,7 +4687,7 @@ static int reconnect_caps_cb(struct inod
+               ts = inode_get_atime(inode);
+               ceph_encode_timespec64(&rec.v1.atime, &ts);
+               rec.v1.snaprealm = cpu_to_le64(ci->i_snap_realm->ino);
+-              rec.v1.pathbase = cpu_to_le64(pathbase);
++              rec.v1.pathbase = cpu_to_le64(path_info.vino.ino);
+       }
+ 
+       if (list_empty(&ci->i_cap_snaps)) {
+@@ -4711,7 +4749,7 @@ encode_again:
+                           sizeof(struct ceph_filelock);
+               rec.v2.flock_len = cpu_to_le32(struct_len);
+ 
+-              struct_len += sizeof(u32) + pathlen + sizeof(rec.v2);
++              struct_len += sizeof(u32) + path_info.pathlen + sizeof(rec.v2);
+ 
+               if (struct_v >= 2)
+                       struct_len += sizeof(u64); /* snap_follows */
+@@ -4735,7 +4773,7 @@ encode_again:
+                       ceph_pagelist_encode_8(pagelist, 1);
+                       ceph_pagelist_encode_32(pagelist, struct_len);
+               }
+-              ceph_pagelist_encode_string(pagelist, path, pathlen);
++              ceph_pagelist_encode_string(pagelist, (char *)path_info.path, path_info.pathlen);
+               ceph_pagelist_append(pagelist, &rec, sizeof(rec.v2));
+               ceph_locks_to_pagelist(flocks, pagelist,
+                                      num_fcntl_locks, num_flock_locks);
+@@ -4746,17 +4784,17 @@ out_freeflocks:
+       } else {
+               err = ceph_pagelist_reserve(pagelist,
+                                           sizeof(u64) + sizeof(u32) +
+-                                          pathlen + sizeof(rec.v1));
++                                          path_info.pathlen + sizeof(rec.v1));
+               if (err)
+                       goto out_err;
+ 
+               ceph_pagelist_encode_64(pagelist, ceph_ino(inode));
+-              ceph_pagelist_encode_string(pagelist, path, pathlen);
++              ceph_pagelist_encode_string(pagelist, (char *)path_info.path, path_info.pathlen);
+               ceph_pagelist_append(pagelist, &rec, sizeof(rec.v1));
+       }
+ 
+ out_err:
+-      ceph_mdsc_free_path(path, pathlen);
++      ceph_mdsc_free_path_info(&path_info);
+       if (!err)
+               recon_state->nr_caps++;
+       return err;
+--- a/fs/ceph/mds_client.h
++++ b/fs/ceph/mds_client.h
+@@ -612,14 +612,24 @@ extern int ceph_mds_check_access(struct
+ 
+ extern void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc);
+ 
+-static inline void ceph_mdsc_free_path(char *path, int len)
++/*
++ * Structure to group path-related output parameters for build_*_path functions
++ */
++struct ceph_path_info {
++      const char *path;
++      int pathlen;
++      struct ceph_vino vino;
++      bool freepath;
++};
++
++static inline void ceph_mdsc_free_path_info(const struct ceph_path_info *path_info)
+ {
+-      if (!IS_ERR_OR_NULL(path))
+-              __putname(path - (PATH_MAX - 1 - len));
++      if (path_info && path_info->freepath && !IS_ERR_OR_NULL(path_info->path))
++              __putname((char *)path_info->path - (PATH_MAX - 1 - path_info->pathlen));
+ }
+ 
+ extern char *ceph_mdsc_build_path(struct ceph_mds_client *mdsc,
+-                                struct dentry *dentry, int *plen, u64 *base,
++                                struct dentry *dentry, struct ceph_path_info *path_info,
+                                 int for_wire);
+ 
+ extern void __ceph_mdsc_drop_dentry_lease(struct dentry *dentry);
diff --git a/queue-6.12/ceph-fix-race-condition-where-r_parent-becomes-stale-before-sending-message.patch b/queue-6.12/ceph-fix-race-condition-where-r_parent-becomes-stale-before-sending-message.patch

new file mode 100644 (file)

index 0000000..be9f9f3
--- /dev/null
+++ b/queue-6.12/ceph-fix-race-condition-where-r_parent-becomes-stale-before-sending-message.patch
@@ -0,0 +1,192 @@
+From bec324f33d1ed346394b2eee25bf6dbf3511f727 Mon Sep 17 00:00:00 2001
+From: Alex Markuze <amarkuze@redhat.com>
+Date: Tue, 12 Aug 2025 09:57:39 +0000
+Subject: ceph: fix race condition where r_parent becomes stale before sending message
+
+From: Alex Markuze <amarkuze@redhat.com>
+
+commit bec324f33d1ed346394b2eee25bf6dbf3511f727 upstream.
+
+When the parent directory's i_rwsem is not locked, req->r_parent may become
+stale due to concurrent operations (e.g. rename) between dentry lookup and
+message creation. Validate that r_parent matches the encoded parent inode
+and update to the correct inode if a mismatch is detected.
+
+[ idryomov: folded a follow-up fix from Alex to drop extra reference
+  from ceph_get_reply_dir() in ceph_fill_trace():
+
+  ceph_get_reply_dir() may return a different, referenced inode when
+  r_parent is stale and the parent directory lock is not held.
+  ceph_fill_trace() used that inode but failed to drop the reference
+  when it differed from req->r_parent, leaking an inode reference.
+
+  Keep the directory inode in a local variable and iput() it at
+  function end if it does not match req->r_parent. ]
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Alex Markuze <amarkuze@redhat.com>
+Reviewed-by: Viacheslav Dubeyko <Slava.Dubeyko@ibm.com>
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ceph/inode.c |   81 +++++++++++++++++++++++++++++++++++++++++++++++---------
+ 1 file changed, 69 insertions(+), 12 deletions(-)
+
+--- a/fs/ceph/inode.c
++++ b/fs/ceph/inode.c
+@@ -55,6 +55,52 @@ static int ceph_set_ino_cb(struct inode
+       return 0;
+ }
+ 
++/*
++ * Check if the parent inode matches the vino from directory reply info
++ */
++static inline bool ceph_vino_matches_parent(struct inode *parent,
++                                          struct ceph_vino vino)
++{
++      return ceph_ino(parent) == vino.ino && ceph_snap(parent) == vino.snap;
++}
++
++/*
++ * Validate that the directory inode referenced by @req->r_parent matches the
++ * inode number and snapshot id contained in the reply's directory record.  If
++ * they do not match – which can theoretically happen if the parent dentry was
++ * moved between the time the request was issued and the reply arrived – fall
++ * back to looking up the correct inode in the inode cache.
++ *
++ * A reference is *always* returned.  Callers that receive a different inode
++ * than the original @parent are responsible for dropping the extra reference
++ * once the reply has been processed.
++ */
++static struct inode *ceph_get_reply_dir(struct super_block *sb,
++                                      struct inode *parent,
++                                      struct ceph_mds_reply_info_parsed *rinfo)
++{
++      struct ceph_vino vino;
++
++      if (unlikely(!rinfo->diri.in))
++              return parent; /* nothing to compare against */
++
++      /* If we didn't have a cached parent inode to begin with, just bail out. */
++      if (!parent)
++              return NULL;
++
++      vino.ino  = le64_to_cpu(rinfo->diri.in->ino);
++      vino.snap = le64_to_cpu(rinfo->diri.in->snapid);
++
++      if (likely(ceph_vino_matches_parent(parent, vino)))
++              return parent; /* matches – use the original reference */
++
++      /* Mismatch – this should be rare.  Emit a WARN and obtain the correct inode. */
++      WARN_ONCE(1, "ceph: reply dir mismatch (parent valid %llx.%llx reply %llx.%llx)\n",
++                ceph_ino(parent), ceph_snap(parent), vino.ino, vino.snap);
++
++      return ceph_get_inode(sb, vino, NULL);
++}
++
+ /**
+  * ceph_new_inode - allocate a new inode in advance of an expected create
+  * @dir: parent directory for new inode
+@@ -1523,6 +1569,7 @@ int ceph_fill_trace(struct super_block *
+       struct ceph_vino tvino, dvino;
+       struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb);
+       struct ceph_client *cl = fsc->client;
++      struct inode *parent_dir = NULL;
+       int err = 0;
+ 
+       doutc(cl, "%p is_dentry %d is_target %d\n", req,
+@@ -1536,10 +1583,17 @@ int ceph_fill_trace(struct super_block *
+       }
+ 
+       if (rinfo->head->is_dentry) {
+-              struct inode *dir = req->r_parent;
+-
+-              if (dir) {
+-                      err = ceph_fill_inode(dir, NULL, &rinfo->diri,
++              /*
++               * r_parent may be stale, in cases when R_PARENT_LOCKED is not set,
++               * so we need to get the correct inode
++               */
++              parent_dir = ceph_get_reply_dir(sb, req->r_parent, rinfo);
++              if (unlikely(IS_ERR(parent_dir))) {
++                      err = PTR_ERR(parent_dir);
++                      goto done;
++              }
++              if (parent_dir) {
++                      err = ceph_fill_inode(parent_dir, NULL, &rinfo->diri,
+                                             rinfo->dirfrag, session, -1,
+                                             &req->r_caps_reservation);
+                       if (err < 0)
+@@ -1548,14 +1602,14 @@ int ceph_fill_trace(struct super_block *
+                       WARN_ON_ONCE(1);
+               }
+ 
+-              if (dir && req->r_op == CEPH_MDS_OP_LOOKUPNAME &&
++              if (parent_dir && req->r_op == CEPH_MDS_OP_LOOKUPNAME &&
+                   test_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags) &&
+                   !test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) {
+                       bool is_nokey = false;
+                       struct qstr dname;
+                       struct dentry *dn, *parent;
+                       struct fscrypt_str oname = FSTR_INIT(NULL, 0);
+-                      struct ceph_fname fname = { .dir        = dir,
++                      struct ceph_fname fname = { .dir        = parent_dir,
+                                                   .name       = rinfo->dname,
+                                                   .ctext      = rinfo->altname,
+                                                   .name_len   = rinfo->dname_len,
+@@ -1564,10 +1618,10 @@ int ceph_fill_trace(struct super_block *
+                       BUG_ON(!rinfo->head->is_target);
+                       BUG_ON(req->r_dentry);
+ 
+-                      parent = d_find_any_alias(dir);
++                      parent = d_find_any_alias(parent_dir);
+                       BUG_ON(!parent);
+ 
+-                      err = ceph_fname_alloc_buffer(dir, &oname);
++                      err = ceph_fname_alloc_buffer(parent_dir, &oname);
+                       if (err < 0) {
+                               dput(parent);
+                               goto done;
+@@ -1576,7 +1630,7 @@ int ceph_fill_trace(struct super_block *
+                       err = ceph_fname_to_usr(&fname, NULL, &oname, &is_nokey);
+                       if (err < 0) {
+                               dput(parent);
+-                              ceph_fname_free_buffer(dir, &oname);
++                              ceph_fname_free_buffer(parent_dir, &oname);
+                               goto done;
+                       }
+                       dname.name = oname.name;
+@@ -1595,7 +1649,7 @@ retry_lookup:
+                                     dname.len, dname.name, dn);
+                               if (!dn) {
+                                       dput(parent);
+-                                      ceph_fname_free_buffer(dir, &oname);
++                                      ceph_fname_free_buffer(parent_dir, &oname);
+                                       err = -ENOMEM;
+                                       goto done;
+                               }
+@@ -1610,12 +1664,12 @@ retry_lookup:
+                                   ceph_snap(d_inode(dn)) != tvino.snap)) {
+                               doutc(cl, " dn %p points to wrong inode %p\n",
+                                     dn, d_inode(dn));
+-                              ceph_dir_clear_ordered(dir);
++                              ceph_dir_clear_ordered(parent_dir);
+                               d_delete(dn);
+                               dput(dn);
+                               goto retry_lookup;
+                       }
+-                      ceph_fname_free_buffer(dir, &oname);
++                      ceph_fname_free_buffer(parent_dir, &oname);
+ 
+                       req->r_dentry = dn;
+                       dput(parent);
+@@ -1794,6 +1848,9 @@ retry_lookup:
+                                           &dvino, ptvino);
+       }
+ done:
++      /* Drop extra ref from ceph_get_reply_dir() if it returned a new inode */
++      if (unlikely(!IS_ERR_OR_NULL(parent_dir) && parent_dir != req->r_parent))
++              iput(parent_dir);
+       doutc(cl, "done err=%d\n", err);
+       return err;
+ }
diff --git a/queue-6.12/kernfs-fix-uaf-in-polling-when-open-file-is-released.patch b/queue-6.12/kernfs-fix-uaf-in-polling-when-open-file-is-released.patch

new file mode 100644 (file)

index 0000000..99f26df
--- /dev/null
+++ b/queue-6.12/kernfs-fix-uaf-in-polling-when-open-file-is-released.patch
@@ -0,0 +1,296 @@
+From 3c9ba2777d6c86025e1ba4186dc5cd930e40ec5f Mon Sep 17 00:00:00 2001
+From: Chen Ridong <chenridong@huawei.com>
+Date: Fri, 22 Aug 2025 07:07:14 +0000
+Subject: kernfs: Fix UAF in polling when open file is released
+
+From: Chen Ridong <chenridong@huawei.com>
+
+commit 3c9ba2777d6c86025e1ba4186dc5cd930e40ec5f upstream.
+
+A use-after-free (UAF) vulnerability was identified in the PSI (Pressure
+Stall Information) monitoring mechanism:
+
+BUG: KASAN: slab-use-after-free in psi_trigger_poll+0x3c/0x140
+Read of size 8 at addr ffff3de3d50bd308 by task systemd/1
+
+psi_trigger_poll+0x3c/0x140
+cgroup_pressure_poll+0x70/0xa0
+cgroup_file_poll+0x8c/0x100
+kernfs_fop_poll+0x11c/0x1c0
+ep_item_poll.isra.0+0x188/0x2c0
+
+Allocated by task 1:
+cgroup_file_open+0x88/0x388
+kernfs_fop_open+0x73c/0xaf0
+do_dentry_open+0x5fc/0x1200
+vfs_open+0xa0/0x3f0
+do_open+0x7e8/0xd08
+path_openat+0x2fc/0x6b0
+do_filp_open+0x174/0x368
+
+Freed by task 8462:
+cgroup_file_release+0x130/0x1f8
+kernfs_drain_open_files+0x17c/0x440
+kernfs_drain+0x2dc/0x360
+kernfs_show+0x1b8/0x288
+cgroup_file_show+0x150/0x268
+cgroup_pressure_write+0x1dc/0x340
+cgroup_file_write+0x274/0x548
+
+Reproduction Steps:
+1. Open test/cpu.pressure and establish epoll monitoring
+2. Disable monitoring: echo 0 > test/cgroup.pressure
+3. Re-enable monitoring: echo 1 > test/cgroup.pressure
+
+The race condition occurs because:
+1. When cgroup.pressure is disabled (echo 0 > cgroup.pressure), it:
+   - Releases PSI triggers via cgroup_file_release()
+   - Frees of->priv through kernfs_drain_open_files()
+2. While epoll still holds reference to the file and continues polling
+3. Re-enabling (echo 1 > cgroup.pressure) accesses freed of->priv
+
+epolling                       disable/enable cgroup.pressure
+fd=open(cpu.pressure)
+while(1)
+...
+epoll_wait
+kernfs_fop_poll
+kernfs_get_active = true       echo 0 > cgroup.pressure
+...                            cgroup_file_show
+                               kernfs_show
+                               // inactive kn
+                               kernfs_drain_open_files
+                               cft->release(of);
+                               kfree(ctx);
+                               ...
+kernfs_get_active = false
+                               echo 1 > cgroup.pressure
+                               kernfs_show
+                               kernfs_activate_one(kn);
+kernfs_fop_poll
+kernfs_get_active = true
+cgroup_file_poll
+psi_trigger_poll
+// UAF
+...
+end: close(fd)
+
+To address this issue, introduce kernfs_get_active_of() for kernfs open
+files to obtain active references. This function will fail if the open file
+has been released. Replace kernfs_get_active() with kernfs_get_active_of()
+to prevent further operations on released file descriptors.
+
+Fixes: 34f26a15611a ("sched/psi: Per-cgroup PSI accounting disable/re-enable interface")
+Cc: stable <stable@kernel.org>
+Reported-by: Zhang Zhaotian <zhangzhaotian@huawei.com>
+Signed-off-by: Chen Ridong <chenridong@huawei.com>
+Acked-by: Tejun Heo <tj@kernel.org>
+Link: https://lore.kernel.org/r/20250822070715.1565236-2-chenridong@huaweicloud.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/kernfs/file.c |   58 ++++++++++++++++++++++++++++++++++++-------------------
+ 1 file changed, 38 insertions(+), 20 deletions(-)
+
+--- a/fs/kernfs/file.c
++++ b/fs/kernfs/file.c
+@@ -70,6 +70,24 @@ static struct kernfs_open_node *of_on(st
+                                        !list_empty(&of->list));
+ }
+ 
++/* Get active reference to kernfs node for an open file */
++static struct kernfs_open_file *kernfs_get_active_of(struct kernfs_open_file *of)
++{
++      /* Skip if file was already released */
++      if (unlikely(of->released))
++              return NULL;
++
++      if (!kernfs_get_active(of->kn))
++              return NULL;
++
++      return of;
++}
++
++static void kernfs_put_active_of(struct kernfs_open_file *of)
++{
++      return kernfs_put_active(of->kn);
++}
++
+ /**
+  * kernfs_deref_open_node_locked - Get kernfs_open_node corresponding to @kn
+  *
+@@ -139,7 +157,7 @@ static void kernfs_seq_stop_active(struc
+ 
+       if (ops->seq_stop)
+               ops->seq_stop(sf, v);
+-      kernfs_put_active(of->kn);
++      kernfs_put_active_of(of);
+ }
+ 
+ static void *kernfs_seq_start(struct seq_file *sf, loff_t *ppos)
+@@ -152,7 +170,7 @@ static void *kernfs_seq_start(struct seq
+        * the ops aren't called concurrently for the same open file.
+        */
+       mutex_lock(&of->mutex);
+-      if (!kernfs_get_active(of->kn))
++      if (!kernfs_get_active_of(of))
+               return ERR_PTR(-ENODEV);
+ 
+       ops = kernfs_ops(of->kn);
+@@ -238,7 +256,7 @@ static ssize_t kernfs_file_read_iter(str
+        * the ops aren't called concurrently for the same open file.
+        */
+       mutex_lock(&of->mutex);
+-      if (!kernfs_get_active(of->kn)) {
++      if (!kernfs_get_active_of(of)) {
+               len = -ENODEV;
+               mutex_unlock(&of->mutex);
+               goto out_free;
+@@ -252,7 +270,7 @@ static ssize_t kernfs_file_read_iter(str
+       else
+               len = -EINVAL;
+ 
+-      kernfs_put_active(of->kn);
++      kernfs_put_active_of(of);
+       mutex_unlock(&of->mutex);
+ 
+       if (len < 0)
+@@ -323,7 +341,7 @@ static ssize_t kernfs_fop_write_iter(str
+        * the ops aren't called concurrently for the same open file.
+        */
+       mutex_lock(&of->mutex);
+-      if (!kernfs_get_active(of->kn)) {
++      if (!kernfs_get_active_of(of)) {
+               mutex_unlock(&of->mutex);
+               len = -ENODEV;
+               goto out_free;
+@@ -335,7 +353,7 @@ static ssize_t kernfs_fop_write_iter(str
+       else
+               len = -EINVAL;
+ 
+-      kernfs_put_active(of->kn);
++      kernfs_put_active_of(of);
+       mutex_unlock(&of->mutex);
+ 
+       if (len > 0)
+@@ -357,13 +375,13 @@ static void kernfs_vma_open(struct vm_ar
+       if (!of->vm_ops)
+               return;
+ 
+-      if (!kernfs_get_active(of->kn))
++      if (!kernfs_get_active_of(of))
+               return;
+ 
+       if (of->vm_ops->open)
+               of->vm_ops->open(vma);
+ 
+-      kernfs_put_active(of->kn);
++      kernfs_put_active_of(of);
+ }
+ 
+ static vm_fault_t kernfs_vma_fault(struct vm_fault *vmf)
+@@ -375,14 +393,14 @@ static vm_fault_t kernfs_vma_fault(struc
+       if (!of->vm_ops)
+               return VM_FAULT_SIGBUS;
+ 
+-      if (!kernfs_get_active(of->kn))
++      if (!kernfs_get_active_of(of))
+               return VM_FAULT_SIGBUS;
+ 
+       ret = VM_FAULT_SIGBUS;
+       if (of->vm_ops->fault)
+               ret = of->vm_ops->fault(vmf);
+ 
+-      kernfs_put_active(of->kn);
++      kernfs_put_active_of(of);
+       return ret;
+ }
+ 
+@@ -395,7 +413,7 @@ static vm_fault_t kernfs_vma_page_mkwrit
+       if (!of->vm_ops)
+               return VM_FAULT_SIGBUS;
+ 
+-      if (!kernfs_get_active(of->kn))
++      if (!kernfs_get_active_of(of))
+               return VM_FAULT_SIGBUS;
+ 
+       ret = 0;
+@@ -404,7 +422,7 @@ static vm_fault_t kernfs_vma_page_mkwrit
+       else
+               file_update_time(file);
+ 
+-      kernfs_put_active(of->kn);
++      kernfs_put_active_of(of);
+       return ret;
+ }
+ 
+@@ -418,14 +436,14 @@ static int kernfs_vma_access(struct vm_a
+       if (!of->vm_ops)
+               return -EINVAL;
+ 
+-      if (!kernfs_get_active(of->kn))
++      if (!kernfs_get_active_of(of))
+               return -EINVAL;
+ 
+       ret = -EINVAL;
+       if (of->vm_ops->access)
+               ret = of->vm_ops->access(vma, addr, buf, len, write);
+ 
+-      kernfs_put_active(of->kn);
++      kernfs_put_active_of(of);
+       return ret;
+ }
+ 
+@@ -455,7 +473,7 @@ static int kernfs_fop_mmap(struct file *
+       mutex_lock(&of->mutex);
+ 
+       rc = -ENODEV;
+-      if (!kernfs_get_active(of->kn))
++      if (!kernfs_get_active_of(of))
+               goto out_unlock;
+ 
+       ops = kernfs_ops(of->kn);
+@@ -490,7 +508,7 @@ static int kernfs_fop_mmap(struct file *
+       }
+       vma->vm_ops = &kernfs_vm_ops;
+ out_put:
+-      kernfs_put_active(of->kn);
++      kernfs_put_active_of(of);
+ out_unlock:
+       mutex_unlock(&of->mutex);
+ 
+@@ -852,7 +870,7 @@ static __poll_t kernfs_fop_poll(struct f
+       struct kernfs_node *kn = kernfs_dentry_node(filp->f_path.dentry);
+       __poll_t ret;
+ 
+-      if (!kernfs_get_active(kn))
++      if (!kernfs_get_active_of(of))
+               return DEFAULT_POLLMASK|EPOLLERR|EPOLLPRI;
+ 
+       if (kn->attr.ops->poll)
+@@ -860,7 +878,7 @@ static __poll_t kernfs_fop_poll(struct f
+       else
+               ret = kernfs_generic_poll(of, wait);
+ 
+-      kernfs_put_active(kn);
++      kernfs_put_active_of(of);
+       return ret;
+ }
+ 
+@@ -875,7 +893,7 @@ static loff_t kernfs_fop_llseek(struct f
+        * the ops aren't called concurrently for the same open file.
+        */
+       mutex_lock(&of->mutex);
+-      if (!kernfs_get_active(of->kn)) {
++      if (!kernfs_get_active_of(of)) {
+               mutex_unlock(&of->mutex);
+               return -ENODEV;
+       }
+@@ -886,7 +904,7 @@ static loff_t kernfs_fop_llseek(struct f
+       else
+               ret = generic_file_llseek(file, offset, whence);
+ 
+-      kernfs_put_active(of->kn);
++      kernfs_put_active_of(of);
+       mutex_unlock(&of->mutex);
+       return ret;
+ }
diff --git a/queue-6.12/libceph-fix-invalid-accesses-to-ceph_connection_v1_info.patch b/queue-6.12/libceph-fix-invalid-accesses-to-ceph_connection_v1_info.patch

new file mode 100644 (file)

index 0000000..6efd18c
--- /dev/null
+++ b/queue-6.12/libceph-fix-invalid-accesses-to-ceph_connection_v1_info.patch
@@ -0,0 +1,56 @@
+From cdbc9836c7afadad68f374791738f118263c5371 Mon Sep 17 00:00:00 2001
+From: Ilya Dryomov <idryomov@gmail.com>
+Date: Thu, 3 Jul 2025 12:10:50 +0200
+Subject: libceph: fix invalid accesses to ceph_connection_v1_info
+
+From: Ilya Dryomov <idryomov@gmail.com>
+
+commit cdbc9836c7afadad68f374791738f118263c5371 upstream.
+
+There is a place where generic code in messenger.c is reading and
+another place where it is writing to con->v1 union member without
+checking that the union member is active (i.e. msgr1 is in use).
+
+On 64-bit systems, con->v1.auth_retry overlaps with con->v2.out_iter,
+so such a read is almost guaranteed to return a bogus value instead of
+0 when msgr2 is in use.  This ends up being fairly benign because the
+side effect is just the invalidation of the authorizer and successive
+fetching of new tickets.
+
+con->v1.connect_seq overlaps with con->v2.conn_bufs and the fact that
+it's being written to can cause more serious consequences, but luckily
+it's not something that happens often.
+
+Cc: stable@vger.kernel.org
+Fixes: cd1a677cad99 ("libceph, ceph: implement msgr2.1 protocol (crc and secure modes)")
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Reviewed-by: Viacheslav Dubeyko <Slava.Dubeyko@ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c |    7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -1524,7 +1524,7 @@ static void con_fault_finish(struct ceph
+        * in case we faulted due to authentication, invalidate our
+        * current tickets so that we can get new ones.
+        */
+-      if (con->v1.auth_retry) {
++      if (!ceph_msgr2(from_msgr(con->msgr)) && con->v1.auth_retry) {
+               dout("auth_retry %d, invalidating\n", con->v1.auth_retry);
+               if (con->ops->invalidate_authorizer)
+                       con->ops->invalidate_authorizer(con);
+@@ -1714,9 +1714,10 @@ static void clear_standby(struct ceph_co
+ {
+       /* come back from STANDBY? */
+       if (con->state == CEPH_CON_S_STANDBY) {
+-              dout("clear_standby %p and ++connect_seq\n", con);
++              dout("clear_standby %p\n", con);
+               con->state = CEPH_CON_S_PREOPEN;
+-              con->v1.connect_seq++;
++              if (!ceph_msgr2(from_msgr(con->msgr)))
++                      con->v1.connect_seq++;
+               WARN_ON(ceph_con_flag_test(con, CEPH_CON_F_WRITE_PENDING));
+               WARN_ON(ceph_con_flag_test(con, CEPH_CON_F_KEEPALIVE_PENDING));
+       }
diff --git a/queue-6.12/series b/queue-6.12/series

index 696dbdd5507696972a5f6ae1fa227c2fc231e4c8..916ac5fa232228c5cb2bf8c87b9f1cc421838f3e 100644 (file)
--- a/queue-6.12/series
+++ b/queue-6.12/series
@@ -72,3 +72,7 @@ netlink-specs-mptcp-add-missing-server-side-attr.patch
  netlink-specs-mptcp-clearly-mention-attributes.patch
  netlink-specs-mptcp-replace-underscores-with-dashes-in-names.patch
  netlink-specs-mptcp-fix-if-idx-attribute-type.patch
+kernfs-fix-uaf-in-polling-when-open-file-is-released.patch
+libceph-fix-invalid-accesses-to-ceph_connection_v1_info.patch
+ceph-fix-race-condition-validating-r_parent-before-applying-state.patch
+ceph-fix-race-condition-where-r_parent-becomes-stale-before-sending-message.patch
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sun, 14 Sep 2025 07:48:55 +0000 (09:48 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sun, 14 Sep 2025 07:48:55 +0000 (09:48 +0200)
queue-6.12/ceph-fix-race-condition-validating-r_parent-before-applying-state.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/ceph-fix-race-condition-where-r_parent-becomes-stale-before-sending-message.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/kernfs-fix-uaf-in-polling-when-open-file-is-released.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/libceph-fix-invalid-accesses-to-ceph_connection_v1_info.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/series		patch \| blob \| blame \| history