From: Lennart Poettering Date: Fri, 26 Feb 2021 09:25:24 +0000 (+0100) Subject: copy: handle copy_file_range() weirdness on procfs/sysfs X-Git-Tag: v248-rc3~102 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=ee1aa61c4710ae567a2b844e0f0bb8cb0456ab8c;p=thirdparty%2Fsystemd.git copy: handle copy_file_range() weirdness on procfs/sysfs This addresses the issue described in https://lwn.net/Articles/846403/ and makes sure we will be able to stream bytes from procfs/sysfs via copy_bytes() if people ask us to. --- diff --git a/src/basic/copy.c b/src/basic/copy.c index e66d1064b93..6bb02c03a66 100644 --- a/src/basic/copy.c +++ b/src/basic/copy.c @@ -112,7 +112,7 @@ int copy_bytes_full( copy_progress_bytes_t progress, void *userdata) { - bool try_cfr = true, try_sendfile = true, try_splice = true; + bool try_cfr = true, try_sendfile = true, try_splice = true, copied_something = false; int r, nonblock_pipe = -1; size_t m = SSIZE_MAX; /* that is the maximum that sendfile and c_f_r accept */ @@ -211,9 +211,20 @@ int copy_bytes_full( try_cfr = false; /* use fallback below */ - } else if (n == 0) /* EOF */ - break; - else + } else if (n == 0) { /* likely EOF */ + + if (copied_something) + break; + + /* So, we hit EOF immediately, without having copied a single byte. This + * could indicate two things: the file is actually empty, or we are on some + * virtual file system such as procfs/sysfs where the syscall actually + * doesn't work but doesn't return an error. Try to handle that, by falling + * back to simple read()s in case we encounter empty files. + * + * See: https://lwn.net/Articles/846403/ */ + try_cfr = try_sendfile = try_splice = false; + } else /* Success! */ goto next; } @@ -227,9 +238,14 @@ int copy_bytes_full( try_sendfile = false; /* use fallback below */ - } else if (n == 0) /* EOF */ + } else if (n == 0) { /* likely EOF */ + + if (copied_something) + break; + + try_sendfile = try_splice = false; /* same logic as above for copy_file_range() */ break; - else + } else /* Success! */ goto next; } @@ -239,14 +255,14 @@ int copy_bytes_full( /* splice()'s asynchronous I/O support is a bit weird. When it encounters a pipe file * descriptor, then it will ignore its O_NONBLOCK flag and instead only honour the - * SPLICE_F_NONBLOCK flag specified in its flag parameter. Let's hide this behaviour here, and - * check if either of the specified fds are a pipe, and if so, let's pass the flag - * automatically, depending on O_NONBLOCK being set. + * SPLICE_F_NONBLOCK flag specified in its flag parameter. Let's hide this behaviour + * here, and check if either of the specified fds are a pipe, and if so, let's pass + * the flag automatically, depending on O_NONBLOCK being set. * - * Here's a twist though: when we use it to move data between two pipes of which one has - * O_NONBLOCK set and the other has not, then we have no individual control over O_NONBLOCK - * behaviour. Hence in that case we can't use splice() and still guarantee systematic - * O_NONBLOCK behaviour, hence don't. */ + * Here's a twist though: when we use it to move data between two pipes of which one + * has O_NONBLOCK set and the other has not, then we have no individual control over + * O_NONBLOCK behaviour. Hence in that case we can't use splice() and still guarantee + * systematic O_NONBLOCK behaviour, hence don't. */ if (nonblock_pipe < 0) { int a, b; @@ -264,12 +280,13 @@ int copy_bytes_full( (a == FD_IS_BLOCKING_PIPE && b == FD_IS_NONBLOCKING_PIPE) || (a == FD_IS_NONBLOCKING_PIPE && b == FD_IS_BLOCKING_PIPE)) - /* splice() only works if one of the fds is a pipe. If neither is, let's skip - * this step right-away. As mentioned above, if one of the two fds refers to a - * blocking pipe and the other to a non-blocking pipe, we can't use splice() - * either, hence don't try either. This hence means we can only use splice() if - * either only one of the two fds is a pipe, or if both are pipes with the same - * nonblocking flag setting. */ + /* splice() only works if one of the fds is a pipe. If neither is, + * let's skip this step right-away. As mentioned above, if one of the + * two fds refers to a blocking pipe and the other to a non-blocking + * pipe, we can't use splice() either, hence don't try either. This + * hence means we can only use splice() if either only one of the two + * fds is a pipe, or if both are pipes with the same nonblocking flag + * setting. */ try_splice = false; else @@ -285,9 +302,13 @@ int copy_bytes_full( try_splice = false; /* use fallback below */ - } else if (n == 0) /* EOF */ - break; - else + } else if (n == 0) { /* likely EOF */ + + if (copied_something) + break; + + try_splice = false; /* same logic as above for copy_file_range() + sendfile() */ + } else /* Success! */ goto next; } @@ -345,11 +366,12 @@ int copy_bytes_full( max_bytes -= n; } - /* sendfile accepts at most SSIZE_MAX-offset bytes to copy, - * so reduce our maximum by the amount we already copied, - * but don't go below our copy buffer size, unless we are - * close the limit of bytes we are allowed to copy. */ + /* sendfile accepts at most SSIZE_MAX-offset bytes to copy, so reduce our maximum by the + * amount we already copied, but don't go below our copy buffer size, unless we are close the + * limit of bytes we are allowed to copy. */ m = MAX(MIN(COPY_BUFFER_SIZE, max_bytes), m - n); + + copied_something = true; } return 0; /* return 0 if we hit EOF earlier than the size limit */ diff --git a/src/test/test-copy.c b/src/test/test-copy.c index ffa92978892..211b4a49259 100644 --- a/src/test/test-copy.c +++ b/src/test/test-copy.c @@ -304,6 +304,22 @@ static void test_copy_atomic(void) { assert_se(copy_file_atomic("/etc/fstab", q, 0644, 0, 0, COPY_REPLACE) >= 0); } +static void test_copy_proc(void) { + _cleanup_(rm_rf_physical_and_freep) char *p = NULL; + _cleanup_free_ char *f = NULL, *a = NULL, *b = NULL; + + /* Check if copying data from /proc/ works correctly, i.e. let's see if https://lwn.net/Articles/846403/ is a problem for us */ + + assert_se(mkdtemp_malloc(NULL, &p) >= 0); + assert_se(f = path_join(p, "version")); + assert_se(copy_file("/proc/version", f, 0, (mode_t) -1, 0, 0, 0) >= 0); + + assert_se(read_one_line_file("/proc/version", &a) >= 0); + assert_se(read_one_line_file(f, &b) >= 0); + assert_se(streq(a, b)); + assert_se(strlen(a) > 0); +} + int main(int argc, char *argv[]) { test_setup_logging(LOG_DEBUG); @@ -318,6 +334,7 @@ int main(int argc, char *argv[]) { test_copy_bytes_regular_file(argv[0], false, 32000); /* larger than copy buffer size */ test_copy_bytes_regular_file(argv[0], true, 32000); test_copy_atomic(); + test_copy_proc(); return 0; }