]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
fs: use do_splice_direct() for nfsd/ksmbd server-side-copy
authorAmir Goldstein <amir73il@gmail.com>
Thu, 30 Nov 2023 14:16:24 +0000 (16:16 +0200)
committerChristian Brauner <brauner@kernel.org>
Tue, 5 Dec 2023 11:58:02 +0000 (12:58 +0100)
nfsd/ksmbd call vfs_copy_file_range() with flag COPY_FILE_SPLICE to
perform kernel copy between two files on any two filesystems.

Splicing input file, while holding file_start_write() on the output file
which is on a different sb, posses a risk for fanotify related deadlocks.

We only need to call splice_file_range() from within the context of
->copy_file_range() filesystem methods with file_start_write() held.

To avoid the possible deadlocks, always use do_splice_direct() instead of
splice_file_range() for the kernel copy fallback in vfs_copy_file_range()
without holding file_start_write().

Reported-and-tested-by: Bert Karwatzki <spasswolf@web.de>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Link: https://lore.kernel.org/r/20231130141624.3338942-4-amir73il@gmail.com
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
fs/read_write.c

index 0bc99f38e623d79df4ce68254329e895d15a0b38..01a14570015b17df29c824ad0fd2b9903226ab44 100644 (file)
@@ -1421,6 +1421,10 @@ ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in,
                                struct file *file_out, loff_t pos_out,
                                size_t len, unsigned int flags)
 {
+       /* May only be called from within ->copy_file_range() methods */
+       if (WARN_ON_ONCE(flags))
+               return -EINVAL;
+
        return splice_file_range(file_in, &pos_in, file_out, &pos_out,
                                 min_t(size_t, len, MAX_RW_COUNT));
 }
@@ -1510,6 +1514,7 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
 {
        ssize_t ret;
        bool splice = flags & COPY_FILE_SPLICE;
+       bool samesb = file_inode(file_in)->i_sb == file_inode(file_out)->i_sb;
 
        if (flags & ~COPY_FILE_SPLICE)
                return -EINVAL;
@@ -1541,19 +1546,24 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
                ret = file_out->f_op->copy_file_range(file_in, pos_in,
                                                      file_out, pos_out,
                                                      len, flags);
-               goto done;
-       }
-
-       if (!splice && file_in->f_op->remap_file_range &&
-           file_inode(file_in)->i_sb == file_inode(file_out)->i_sb) {
+       } else if (!splice && file_in->f_op->remap_file_range && samesb) {
                ret = file_in->f_op->remap_file_range(file_in, pos_in,
                                file_out, pos_out,
                                min_t(loff_t, MAX_RW_COUNT, len),
                                REMAP_FILE_CAN_SHORTEN);
-               if (ret > 0)
-                       goto done;
+               /* fallback to splice */
+               if (ret <= 0)
+                       splice = true;
+       } else if (samesb) {
+               /* Fallback to splice for same sb copy for backward compat */
+               splice = true;
        }
 
+       file_end_write(file_out);
+
+       if (!splice)
+               goto done;
+
        /*
         * We can get here for same sb copy of filesystems that do not implement
         * ->copy_file_range() in case filesystem does not support clone or in
@@ -1565,11 +1575,16 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
         * and which filesystems do not, that will allow userspace tools to
         * make consistent desicions w.r.t using copy_file_range().
         *
-        * We also get here if caller (e.g. nfsd) requested COPY_FILE_SPLICE.
+        * We also get here if caller (e.g. nfsd) requested COPY_FILE_SPLICE
+        * for server-side-copy between any two sb.
+        *
+        * In any case, we call do_splice_direct() and not splice_file_range(),
+        * without file_start_write() held, to avoid possible deadlocks related
+        * to splicing from input file, while file_start_write() is held on
+        * the output file on a different sb.
         */
-       ret = generic_copy_file_range(file_in, pos_in, file_out, pos_out, len,
-                                     flags);
-
+       ret = do_splice_direct(file_in, &pos_in, file_out, &pos_out,
+                              min_t(size_t, len, MAX_RW_COUNT), 0);
 done:
        if (ret > 0) {
                fsnotify_access(file_in);
@@ -1581,8 +1596,6 @@ done:
        inc_syscr(current);
        inc_syscw(current);
 
-       file_end_write(file_out);
-
        return ret;
 }
 EXPORT_SYMBOL(vfs_copy_file_range);