1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
8 #include "alloc-util.h"
10 #include "data-fd-util.h"
14 #include "memfd-util.h"
15 #include "tmpfile-util.h"
17 /* When the data is smaller or equal to 64K, try to place the copy in a memfd/pipe */
18 #define DATA_FD_MEMORY_LIMIT (64U*1024U)
20 /* If memfd/pipe didn't work out, then let's use a file in /tmp up to a size of 1M. If it's large than that use /var/tmp instead. */
21 #define DATA_FD_TMP_LIMIT (1024U*1024U)
23 int acquire_data_fd(const void *data
, size_t size
, unsigned flags
) {
24 _cleanup_close_pair_
int pipefds
[2] = { -1, -1 };
25 char pattern
[] = "/dev/shm/data-fd-XXXXXX";
26 _cleanup_close_
int fd
= -1;
31 assert(data
|| size
== 0);
33 /* Acquire a read-only file descriptor that when read from returns the specified data. This is much more
34 * complex than I wish it was. But here's why:
36 * a) First we try to use memfds. They are the best option, as we can seal them nicely to make them
37 * read-only. Unfortunately they require kernel 3.17, and – at the time of writing – we still support 3.14.
39 * b) Then, we try classic pipes. They are the second best options, as we can close the writing side, retaining
40 * a nicely read-only fd in the reading side. However, they are by default quite small, and unprivileged
41 * clients can only bump their size to a system-wide limit, which might be quite low.
43 * c) Then, we try an O_TMPFILE file in /dev/shm (that dir is the only suitable one known to exist from
44 * earliest boot on). To make it read-only we open the fd a second time with O_RDONLY via
45 * /proc/self/<fd>. Unfortunately O_TMPFILE is not available on older kernels on tmpfs.
47 * d) Finally, we try creating a regular file in /dev/shm, which we then delete.
49 * It sucks a bit that depending on the situation we return very different objects here, but that's Linux I
52 if (size
== 0 && ((flags
& ACQUIRE_NO_DEV_NULL
) == 0)) {
53 /* As a special case, return /dev/null if we have been called for an empty data block */
54 r
= open("/dev/null", O_RDONLY
|O_CLOEXEC
|O_NOCTTY
);
61 if ((flags
& ACQUIRE_NO_MEMFD
) == 0) {
62 fd
= memfd_new("data-fd");
66 n
= write(fd
, data
, size
);
69 if ((size_t) n
!= size
)
72 f
= lseek(fd
, 0, SEEK_SET
);
76 r
= memfd_set_sealed(fd
);
84 if ((flags
& ACQUIRE_NO_PIPE
) == 0) {
85 if (pipe2(pipefds
, O_CLOEXEC
|O_NONBLOCK
) < 0)
88 isz
= fcntl(pipefds
[1], F_GETPIPE_SZ
, 0);
92 if ((size_t) isz
< size
) {
94 if (isz
< 0 || (size_t) isz
!= size
)
97 /* Try to bump the pipe size */
98 (void) fcntl(pipefds
[1], F_SETPIPE_SZ
, isz
);
100 /* See if that worked */
101 isz
= fcntl(pipefds
[1], F_GETPIPE_SZ
, 0);
105 if ((size_t) isz
< size
)
109 n
= write(pipefds
[1], data
, size
);
112 if ((size_t) n
!= size
)
115 (void) fd_nonblock(pipefds
[0], false);
117 return TAKE_FD(pipefds
[0]);
121 if ((flags
& ACQUIRE_NO_TMPFILE
) == 0) {
122 fd
= open("/dev/shm", O_RDWR
|O_TMPFILE
|O_CLOEXEC
, 0500);
124 goto try_dev_shm_without_o_tmpfile
;
126 n
= write(fd
, data
, size
);
129 if ((size_t) n
!= size
)
132 /* Let's reopen the thing, in order to get an O_RDONLY fd for the original O_RDWR one */
133 return fd_reopen(fd
, O_RDONLY
|O_CLOEXEC
);
136 try_dev_shm_without_o_tmpfile
:
137 if ((flags
& ACQUIRE_NO_REGULAR
) == 0) {
138 fd
= mkostemp_safe(pattern
);
142 n
= write(fd
, data
, size
);
145 goto unlink_and_return
;
147 if ((size_t) n
!= size
) {
149 goto unlink_and_return
;
152 /* Let's reopen the thing, in order to get an O_RDONLY fd for the original O_RDWR one */
153 r
= open(pattern
, O_RDONLY
|O_CLOEXEC
);
158 (void) unlink(pattern
);
165 int copy_data_fd(int fd
) {
166 _cleanup_close_
int copy_fd
= -1, tmp_fd
= -1;
167 _cleanup_free_
void *remains
= NULL
;
168 size_t remains_size
= 0;
173 /* Creates a 'data' fd from the specified source fd, containing all the same data in a read-only fashion, but
174 * independent of it (i.e. the source fd can be closed and unmounted after this call succeeded). Tries to be
175 * somewhat smart about where to place the data. In the best case uses a memfd(). If memfd() are not supported
176 * uses a pipe instead. For larger data will use an unlinked file in /tmp, and for even larger data one in
179 if (fstat(fd
, &st
) < 0)
182 /* For now, let's only accept regular files, sockets, pipes and char devices */
183 if (S_ISDIR(st
.st_mode
))
185 if (S_ISLNK(st
.st_mode
))
187 if (!S_ISREG(st
.st_mode
) && !S_ISSOCK(st
.st_mode
) && !S_ISFIFO(st
.st_mode
) && !S_ISCHR(st
.st_mode
))
190 /* If we have reason to believe the data is bounded in size, then let's use memfds or pipes as backing fd. Note
191 * that we use the reported regular file size only as a hint, given that there are plenty special files in
192 * /proc and /sys which report a zero file size but can be read from. */
194 if (!S_ISREG(st
.st_mode
) || st
.st_size
< DATA_FD_MEMORY_LIMIT
) {
196 /* Try a memfd first */
197 copy_fd
= memfd_new("data-fd");
201 r
= copy_bytes(fd
, copy_fd
, DATA_FD_MEMORY_LIMIT
, 0);
205 f
= lseek(copy_fd
, 0, SEEK_SET
);
210 /* Did it fit into the limit? If so, we are done. */
211 r
= memfd_set_sealed(copy_fd
);
215 return TAKE_FD(copy_fd
);
218 /* Hmm, pity, this didn't fit. Let's fall back to /tmp then, see below */
221 _cleanup_(close_pairp
) int pipefds
[2] = { -1, -1 };
224 /* If memfds aren't available, use a pipe. Set O_NONBLOCK so that we will get EAGAIN rather
225 * then block indefinitely when we hit the pipe size limit */
227 if (pipe2(pipefds
, O_CLOEXEC
|O_NONBLOCK
) < 0)
230 isz
= fcntl(pipefds
[1], F_GETPIPE_SZ
, 0);
234 /* Try to enlarge the pipe size if necessary */
235 if ((size_t) isz
< DATA_FD_MEMORY_LIMIT
) {
237 (void) fcntl(pipefds
[1], F_SETPIPE_SZ
, DATA_FD_MEMORY_LIMIT
);
239 isz
= fcntl(pipefds
[1], F_GETPIPE_SZ
, 0);
244 if ((size_t) isz
>= DATA_FD_MEMORY_LIMIT
) {
246 r
= copy_bytes_full(fd
, pipefds
[1], DATA_FD_MEMORY_LIMIT
, 0, &remains
, &remains_size
, NULL
, NULL
);
247 if (r
< 0 && r
!= -EAGAIN
)
248 return r
; /* If we get EAGAIN it could be because of the source or because of
249 * the destination fd, we can't know, as sendfile() and friends won't
250 * tell us. Hence, treat this as reason to fall back, just to be
253 /* Everything fit in, yay! */
254 (void) fd_nonblock(pipefds
[0], false);
256 return TAKE_FD(pipefds
[0]);
259 /* Things didn't fit in. But we read data into the pipe, let's remember that, so that
260 * when writing the new file we incorporate this first. */
261 copy_fd
= TAKE_FD(pipefds
[0]);
266 /* If we have reason to believe this will fit fine in /tmp, then use that as first fallback. */
267 if ((!S_ISREG(st
.st_mode
) || st
.st_size
< DATA_FD_TMP_LIMIT
) &&
268 (DATA_FD_MEMORY_LIMIT
+ remains_size
) < DATA_FD_TMP_LIMIT
) {
271 tmp_fd
= open_tmpfile_unlinkable(NULL
/* NULL as directory means /tmp */, O_RDWR
|O_CLOEXEC
);
276 /* If we tried a memfd/pipe first and it ended up being too large, then copy this into the
277 * temporary file first. */
279 r
= copy_bytes(copy_fd
, tmp_fd
, UINT64_MAX
, 0);
286 if (remains_size
> 0) {
287 /* If there were remaining bytes (i.e. read into memory, but not written out yet) from the
288 * failed copy operation, let's flush them out next. */
290 r
= loop_write(tmp_fd
, remains
, remains_size
, false);
295 r
= copy_bytes(fd
, tmp_fd
, DATA_FD_TMP_LIMIT
- DATA_FD_MEMORY_LIMIT
- remains_size
, COPY_REFLINK
);
299 goto finish
; /* Yay, it fit in */
301 /* It didn't fit in. Let's not forget to use what we already used */
302 f
= lseek(tmp_fd
, 0, SEEK_SET
);
306 CLOSE_AND_REPLACE(copy_fd
, tmp_fd
);
308 remains
= mfree(remains
);
312 /* As last fallback use /var/tmp */
313 r
= var_tmp_dir(&td
);
317 tmp_fd
= open_tmpfile_unlinkable(td
, O_RDWR
|O_CLOEXEC
);
322 /* If we tried a memfd/pipe first, or a file in /tmp, and it ended up being too large, than copy this
323 * into the temporary file first. */
324 r
= copy_bytes(copy_fd
, tmp_fd
, UINT64_MAX
, COPY_REFLINK
);
331 if (remains_size
> 0) {
332 /* Then, copy in any read but not yet written bytes. */
333 r
= loop_write(tmp_fd
, remains
, remains_size
, false);
338 /* Copy in the rest */
339 r
= copy_bytes(fd
, tmp_fd
, UINT64_MAX
, COPY_REFLINK
);
346 /* Now convert the O_RDWR file descriptor into an O_RDONLY one (and as side effect seek to the beginning of the
349 return fd_reopen(tmp_fd
, O_RDONLY
|O_CLOEXEC
);