Merge tag 'vfs-6.17-rc1.pidfs' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

author Linus Torvalds <torvalds@linux-foundation.org>

Mon, 28 Jul 2025 21:10:15 +0000 (14:10 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Mon, 28 Jul 2025 21:10:15 +0000 (14:10 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Mon, 28 Jul 2025 21:10:15 +0000 (14:10 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Mon, 28 Jul 2025 21:10:15 +0000 (14:10 -0700)
diff --cc fs/coredump.c

index fadf9d4be2e148d697afd6d66624b535caf1cf10,55d6a713a0fb0859c01ddb756b587a2d496eb3e2..fedbead956ed16636c333eea7d5fdc96c5497503
--- 1/fs/coredump.c
--- 2/fs/coredump.c
+++ b/fs/coredump.c
@@@ -662,439 -632,8 +662,434 @@@ static int umh_coredump_setup(struct su
         return 0;
   }
   
- -void do_coredump(const kernel_siginfo_t *siginfo)
+ +#ifdef CONFIG_UNIX
+ +static bool coredump_sock_connect(struct core_name *cn, struct coredump_params *cprm)
+ +{
+ +      struct file *file __free(fput) = NULL;
+ +      struct sockaddr_un addr = {
+ +              .sun_family = AF_UNIX,
+ +      };
+ +      ssize_t addr_len;
+ +      int retval;
+ +      struct socket *socket;
+ +
+ +      addr_len = strscpy(addr.sun_path, cn->corename);
+ +      if (addr_len < 0)
+ +              return false;
+ +      addr_len += offsetof(struct sockaddr_un, sun_path) + 1;
+ +
+ +      /*
+ +       * It is possible that the userspace process which is supposed
+ +       * to handle the coredump and is listening on the AF_UNIX socket
+ +       * coredumps. Userspace should just mark itself non dumpable.
+ +       */
+ +
+ +      retval = sock_create_kern(&init_net, AF_UNIX, SOCK_STREAM, 0, &socket);
+ +      if (retval < 0)
+ +              return false;
+ +
+ +      file = sock_alloc_file(socket, 0, NULL);
+ +      if (IS_ERR(file))
+ +              return false;
+ +
+ +      /*
+ +       * Set the thread-group leader pid which is used for the peer
+ +       * credentials during connect() below. Then immediately register
+ +       * it in pidfs...
+ +       */
+ +      cprm->pid = task_tgid(current);
+ +      retval = pidfs_register_pid(cprm->pid);
+ +      if (retval)
+ +              return false;
+ +
+ +      /*
+ +       * ... and set the coredump information so userspace has it
+ +       * available after connect()...
+ +       */
+ +      pidfs_coredump(cprm);
+ +
+ +      retval = kernel_connect(socket, (struct sockaddr *)(&addr), addr_len,
+ +                              O_NONBLOCK | SOCK_COREDUMP);
-       /*
-        * ... Make sure to only put our reference after connect() took
-        * its own reference keeping the pidfs entry alive ...
-        */
-       pidfs_put_pid(cprm->pid);
+ +
+ +      if (retval) {
+ +              if (retval == -EAGAIN)
+ +                      coredump_report_failure("Coredump socket %s receive queue full", addr.sun_path);
+ +              else
+ +                      coredump_report_failure("Coredump socket connection %s failed %d", addr.sun_path, retval);
+ +              return false;
+ +      }
+ +
+ +      /* ... and validate that @sk_peer_pid matches @cprm.pid. */
+ +      if (WARN_ON_ONCE(unix_peer(socket->sk)->sk_peer_pid != cprm->pid))
+ +              return false;
+ +
+ +      cprm->limit = RLIM_INFINITY;
+ +      cprm->file = no_free_ptr(file);
+ +
+ +      return true;
+ +}
+ +
+ +static inline bool coredump_sock_recv(struct file *file, struct coredump_ack *ack, size_t size, int flags)
+ +{
+ +      struct msghdr msg = {};
+ +      struct kvec iov = { .iov_base = ack, .iov_len = size };
+ +      ssize_t ret;
+ +
+ +      memset(ack, 0, size);
+ +      ret = kernel_recvmsg(sock_from_file(file), &msg, &iov, 1, size, flags);
+ +      return ret == size;
+ +}
+ +
+ +static inline bool coredump_sock_send(struct file *file, struct coredump_req *req)
+ +{
+ +      struct msghdr msg = { .msg_flags = MSG_NOSIGNAL };
+ +      struct kvec iov = { .iov_base = req, .iov_len = sizeof(*req) };
+ +      ssize_t ret;
+ +
+ +      ret = kernel_sendmsg(sock_from_file(file), &msg, &iov, 1, sizeof(*req));
+ +      return ret == sizeof(*req);
+ +}
+ +
+ +static_assert(sizeof(enum coredump_mark) == sizeof(__u32));
+ +
+ +static inline bool coredump_sock_mark(struct file *file, enum coredump_mark mark)
+ +{
+ +      struct msghdr msg = { .msg_flags = MSG_NOSIGNAL };
+ +      struct kvec iov = { .iov_base = &mark, .iov_len = sizeof(mark) };
+ +      ssize_t ret;
+ +
+ +      ret = kernel_sendmsg(sock_from_file(file), &msg, &iov, 1, sizeof(mark));
+ +      return ret == sizeof(mark);
+ +}
+ +
+ +static inline void coredump_sock_wait(struct file *file)
+ +{
+ +      ssize_t n;
+ +
+ +      /*
+ +       * We use a simple read to wait for the coredump processing to
+ +       * finish. Either the socket is closed or we get sent unexpected
+ +       * data. In both cases, we're done.
+ +       */
+ +      n = __kernel_read(file, &(char){ 0 }, 1, NULL);
+ +      if (n > 0)
+ +              coredump_report_failure("Coredump socket had unexpected data");
+ +      else if (n < 0)
+ +              coredump_report_failure("Coredump socket failed");
+ +}
+ +
+ +static inline void coredump_sock_shutdown(struct file *file)
+ +{
+ +      struct socket *socket;
+ +
+ +      socket = sock_from_file(file);
+ +      if (!socket)
+ +              return;
+ +
+ +      /* Let userspace know we're done processing the coredump. */
+ +      kernel_sock_shutdown(socket, SHUT_WR);
+ +}
+ +
+ +static bool coredump_sock_request(struct core_name *cn, struct coredump_params *cprm)
+ +{
+ +      struct coredump_req req = {
+ +              .size           = sizeof(struct coredump_req),
+ +              .mask           = COREDUMP_KERNEL | COREDUMP_USERSPACE |
+ +                                COREDUMP_REJECT | COREDUMP_WAIT,
+ +              .size_ack       = sizeof(struct coredump_ack),
+ +      };
+ +      struct coredump_ack ack = {};
+ +      ssize_t usize;
+ +
+ +      if (cn->core_type != COREDUMP_SOCK_REQ)
+ +              return true;
+ +
+ +      /* Let userspace know what we support. */
+ +      if (!coredump_sock_send(cprm->file, &req))
+ +              return false;
+ +
+ +      /* Peek the size of the coredump_ack. */
+ +      if (!coredump_sock_recv(cprm->file, &ack, sizeof(ack.size),
+ +                              MSG_PEEK | MSG_WAITALL))
+ +              return false;
+ +
+ +      /* Refuse unknown coredump_ack sizes. */
+ +      usize = ack.size;
+ +      if (usize < COREDUMP_ACK_SIZE_VER0) {
+ +              coredump_sock_mark(cprm->file, COREDUMP_MARK_MINSIZE);
+ +              return false;
+ +      }
+ +
+ +      if (usize > sizeof(ack)) {
+ +              coredump_sock_mark(cprm->file, COREDUMP_MARK_MAXSIZE);
+ +              return false;
+ +      }
+ +
+ +      /* Now retrieve the coredump_ack. */
+ +      if (!coredump_sock_recv(cprm->file, &ack, usize, MSG_WAITALL))
+ +              return false;
+ +      if (ack.size != usize)
+ +              return false;
+ +
+ +      /* Refuse unknown coredump_ack flags. */
+ +      if (ack.mask & ~req.mask) {
+ +              coredump_sock_mark(cprm->file, COREDUMP_MARK_UNSUPPORTED);
+ +              return false;
+ +      }
+ +
+ +      /* Refuse mutually exclusive options. */
+ +      if (hweight64(ack.mask & (COREDUMP_USERSPACE | COREDUMP_KERNEL |
+ +                                COREDUMP_REJECT)) != 1) {
+ +              coredump_sock_mark(cprm->file, COREDUMP_MARK_CONFLICTING);
+ +              return false;
+ +      }
+ +
+ +      if (ack.spare) {
+ +              coredump_sock_mark(cprm->file, COREDUMP_MARK_UNSUPPORTED);
+ +              return false;
+ +      }
+ +
+ +      cn->mask = ack.mask;
+ +      return coredump_sock_mark(cprm->file, COREDUMP_MARK_REQACK);
+ +}
+ +
+ +static bool coredump_socket(struct core_name *cn, struct coredump_params *cprm)
+ +{
+ +      if (!coredump_sock_connect(cn, cprm))
+ +              return false;
+ +
+ +      return coredump_sock_request(cn, cprm);
+ +}
+ +#else
+ +static inline void coredump_sock_wait(struct file *file) { }
+ +static inline void coredump_sock_shutdown(struct file *file) { }
+ +static inline bool coredump_socket(struct core_name *cn, struct coredump_params *cprm) { return false; }
+ +#endif
+ +
+ +/* cprm->mm_flags contains a stable snapshot of dumpability flags. */
+ +static inline bool coredump_force_suid_safe(const struct coredump_params *cprm)
+ +{
+ +      /* Require nonrelative corefile path and be extra careful. */
+ +      return __get_dumpable(cprm->mm_flags) == SUID_DUMP_ROOT;
+ +}
+ +
+ +static bool coredump_file(struct core_name *cn, struct coredump_params *cprm,
+ +                        const struct linux_binfmt *binfmt)
+ +{
+ +      struct mnt_idmap *idmap;
+ +      struct inode *inode;
+ +      struct file *file __free(fput) = NULL;
+ +      int open_flags = O_CREAT | O_WRONLY | O_NOFOLLOW | O_LARGEFILE | O_EXCL;
+ +
+ +      if (cprm->limit < binfmt->min_coredump)
+ +              return false;
+ +
+ +      if (coredump_force_suid_safe(cprm) && cn->corename[0] != '/') {
+ +              coredump_report_failure("this process can only dump core to a fully qualified path, skipping core dump");
+ +              return false;
+ +      }
+ +
+ +      /*
+ +       * Unlink the file if it exists unless this is a SUID
+ +       * binary - in that case, we're running around with root
+ +       * privs and don't want to unlink another user's coredump.
+ +       */
+ +      if (!coredump_force_suid_safe(cprm)) {
+ +              /*
+ +               * If it doesn't exist, that's fine. If there's some
+ +               * other problem, we'll catch it at the filp_open().
+ +               */
+ +              do_unlinkat(AT_FDCWD, getname_kernel(cn->corename));
+ +      }
+ +
+ +      /*
+ +       * There is a race between unlinking and creating the
+ +       * file, but if that causes an EEXIST here, that's
+ +       * fine - another process raced with us while creating
+ +       * the corefile, and the other process won. To userspace,
+ +       * what matters is that at least one of the two processes
+ +       * writes its coredump successfully, not which one.
+ +       */
+ +      if (coredump_force_suid_safe(cprm)) {
+ +              /*
+ +               * Using user namespaces, normal user tasks can change
+ +               * their current->fs->root to point to arbitrary
+ +               * directories. Since the intention of the "only dump
+ +               * with a fully qualified path" rule is to control where
+ +               * coredumps may be placed using root privileges,
+ +               * current->fs->root must not be used. Instead, use the
+ +               * root directory of init_task.
+ +               */
+ +              struct path root;
+ +
+ +              task_lock(&init_task);
+ +              get_fs_root(init_task.fs, &root);
+ +              task_unlock(&init_task);
+ +              file = file_open_root(&root, cn->corename, open_flags, 0600);
+ +              path_put(&root);
+ +      } else {
+ +              file = filp_open(cn->corename, open_flags, 0600);
+ +      }
+ +      if (IS_ERR(file))
+ +              return false;
+ +
+ +      inode = file_inode(file);
+ +      if (inode->i_nlink > 1)
+ +              return false;
+ +      if (d_unhashed(file->f_path.dentry))
+ +              return false;
+ +      /*
+ +       * AK: actually i see no reason to not allow this for named
+ +       * pipes etc, but keep the previous behaviour for now.
+ +       */
+ +      if (!S_ISREG(inode->i_mode))
+ +              return false;
+ +      /*
+ +       * Don't dump core if the filesystem changed owner or mode
+ +       * of the file during file creation. This is an issue when
+ +       * a process dumps core while its cwd is e.g. on a vfat
+ +       * filesystem.
+ +       */
+ +      idmap = file_mnt_idmap(file);
+ +      if (!vfsuid_eq_kuid(i_uid_into_vfsuid(idmap, inode), current_fsuid())) {
+ +              coredump_report_failure("Core dump to %s aborted: cannot preserve file owner", cn->corename);
+ +              return false;
+ +      }
+ +      if ((inode->i_mode & 0677) != 0600) {
+ +              coredump_report_failure("Core dump to %s aborted: cannot preserve file permissions", cn->corename);
+ +              return false;
+ +      }
+ +      if (!(file->f_mode & FMODE_CAN_WRITE))
+ +              return false;
+ +      if (do_truncate(idmap, file->f_path.dentry, 0, 0, file))
+ +              return false;
+ +
+ +      cprm->file = no_free_ptr(file);
+ +      return true;
+ +}
+ +
+ +static bool coredump_pipe(struct core_name *cn, struct coredump_params *cprm,
+ +                        size_t *argv, int argc)
+ +{
+ +      int argi;
+ +      char **helper_argv __free(kfree) = NULL;
+ +      struct subprocess_info *sub_info;
+ +
+ +      if (cprm->limit == 1) {
+ +              /* See umh_coredump_setup() which sets RLIMIT_CORE = 1.
+ +               *
+ +               * Normally core limits are irrelevant to pipes, since
+ +               * we're not writing to the file system, but we use
+ +               * cprm.limit of 1 here as a special value, this is a
+ +               * consistent way to catch recursive crashes.
+ +               * We can still crash if the core_pattern binary sets
+ +               * RLIM_CORE = !1, but it runs as root, and can do
+ +               * lots of stupid things.
+ +               *
+ +               * Note that we use task_tgid_vnr here to grab the pid
+ +               * of the process group leader.  That way we get the
+ +               * right pid if a thread in a multi-threaded
+ +               * core_pattern process dies.
+ +               */
+ +              coredump_report_failure("RLIMIT_CORE is set to 1, aborting core");
+ +              return false;
+ +      }
+ +      cprm->limit = RLIM_INFINITY;
+ +
+ +      cn->core_pipe_limit = atomic_inc_return(&core_pipe_count);
+ +      if (core_pipe_limit && (core_pipe_limit < cn->core_pipe_limit)) {
+ +              coredump_report_failure("over core_pipe_limit, skipping core dump");
+ +              return false;
+ +      }
+ +
+ +      helper_argv = kmalloc_array(argc + 1, sizeof(*helper_argv), GFP_KERNEL);
+ +      if (!helper_argv) {
+ +              coredump_report_failure("%s failed to allocate memory", __func__);
+ +              return false;
+ +      }
+ +      for (argi = 0; argi < argc; argi++)
+ +              helper_argv[argi] = cn->corename + argv[argi];
+ +      helper_argv[argi] = NULL;
+ +
+ +      sub_info = call_usermodehelper_setup(helper_argv[0], helper_argv, NULL,
+ +                                           GFP_KERNEL, umh_coredump_setup,
+ +                                           NULL, cprm);
+ +      if (!sub_info)
+ +              return false;
+ +
+ +      if (call_usermodehelper_exec(sub_info, UMH_WAIT_EXEC)) {
+ +              coredump_report_failure("|%s pipe failed", cn->corename);
+ +              return false;
+ +      }
+ +
+ +      /*
+ +       * umh disabled with CONFIG_STATIC_USERMODEHELPER_PATH="" would
+ +       * have this set to NULL.
+ +       */
+ +      if (!cprm->file) {
+ +              coredump_report_failure("Core dump to |%s disabled", cn->corename);
+ +              return false;
+ +      }
+ +
+ +      return true;
+ +}
+ +
+ +static bool coredump_write(struct core_name *cn,
+ +                        struct coredump_params *cprm,
+ +                        struct linux_binfmt *binfmt)
+ +{
+ +
+ +      if (dump_interrupted())
+ +              return true;
+ +
+ +      if (!dump_vma_snapshot(cprm))
+ +              return false;
+ +
+ +      file_start_write(cprm->file);
+ +      cn->core_dumped = binfmt->core_dump(cprm);
+ +      /*
+ +       * Ensures that file size is big enough to contain the current
+ +       * file postion. This prevents gdb from complaining about
+ +       * a truncated file if the last "write" to the file was
+ +       * dump_skip.
+ +       */
+ +      if (cprm->to_skip) {
+ +              cprm->to_skip--;
+ +              dump_emit(cprm, "", 1);
+ +      }
+ +      file_end_write(cprm->file);
+ +      free_vma_snapshot(cprm);
+ +      return true;
+ +}
+ +
+ +static void coredump_cleanup(struct core_name *cn, struct coredump_params *cprm)
+ +{
+ +      if (cprm->file)
+ +              filp_close(cprm->file, NULL);
+ +      if (cn->core_pipe_limit) {
+ +              VFS_WARN_ON_ONCE(cn->core_type != COREDUMP_PIPE);
+ +              atomic_dec(&core_pipe_count);
+ +      }
+ +      kfree(cn->corename);
+ +      coredump_finish(cn->core_dumped);
+ +}
+ +
+ +static inline bool coredump_skip(const struct coredump_params *cprm,
+ +                               const struct linux_binfmt *binfmt)
+ +{
+ +      if (!binfmt)
+ +              return true;
+ +      if (!binfmt->core_dump)
+ +              return true;
+ +      if (!__get_dumpable(cprm->mm_flags))
+ +              return true;
+ +      return false;
+ +}
+ +
+ +void vfs_coredump(const kernel_siginfo_t *siginfo)
   {
+ +      struct cred *cred __free(put_cred) = NULL;
+ +      size_t *argv __free(kfree) = NULL;
         struct core_state core_state;
         struct core_name cn;
         struct mm_struct *mm = current->mm;
diff --cc fs/exec.c
Simple merge
diff --cc fs/internal.h
Simple merge
diff --cc fs/libfs.c
Simple merge
diff --cc fs/namei.c
Simple merge
diff --cc fs/pidfs.c
Simple merge
diff --cc net/unix/af_unix.c

index 52b155123985a18632fc12dc986150e38f2fee70,c247fb9ac761d22c8d51f588458483913a783851..a8895786e0167cbcc0c923eb613525875e696d97
--- 1/net/unix/af_unix.c
--- 2/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@@ -1975,18 -1970,30 +1981,31 @@@ static void unix_skb_to_scm(struct sk_b
    * Some apps rely on write() giving SCM_CREDENTIALS
    * We include credentials if source or destination socket
    * asserted SOCK_PASSCRED.
+  *
+  * Context: May sleep.
+  * Return: On success zero, on error a negative error code is returned.
    */
- static void unix_maybe_add_creds(struct sk_buff *skb, const struct sock *sk,
-                                const struct sock *other)
+ static int unix_maybe_add_creds(struct sk_buff *skb, const struct sock *sk,
+                               const struct sock *other)
   {
         if (UNIXCB(skb).pid)
-               return;
+               return 0;
   
- -      if (unix_may_passcred(sk) || unix_may_passcred(other)) {
+ +      if (unix_may_passcred(sk) || unix_may_passcred(other) ||
+ +          !other->sk_socket) {
-               UNIXCB(skb).pid = get_pid(task_tgid(current));
+               struct pid *pid;
+               int err;
+ 
+               pid = task_tgid(current);
+               err = pidfs_register_pid(pid);
+               if (unlikely(err))
+                       return err;
+ 
+               UNIXCB(skb).pid = get_pid(pid);
                 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
         }
+ 
+       return 0;
   }
   
   static bool unix_skb_scm_eq(struct sk_buff *skb,
author	Linus Torvalds <torvalds@linux-foundation.org>
	Mon, 28 Jul 2025 21:10:15 +0000 (14:10 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Mon, 28 Jul 2025 21:10:15 +0000 (14:10 -0700)
		1	2
fs/coredump.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/exec.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/internal.h	patch \|	diff1 \|	diff2 \|	blob \| history
fs/libfs.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/namei.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/pidfs.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/unix/af_unix.c	patch \|	diff1 \|	diff2 \|	blob \| history