1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
8 #include "alloc-util.h"
10 #include "data-fd-util.h"
14 #include "memfd-util.h"
15 #include "tmpfile-util.h"
17 /* When the data is smaller or equal to 64K, try to place the copy in a memfd/pipe */
18 #define DATA_FD_MEMORY_LIMIT (64U*1024U)
20 /* If memfd/pipe didn't work out, then let's use a file in /tmp up to a size of 1M. If it's large than that use /var/tmp instead. */
21 #define DATA_FD_TMP_LIMIT (1024U*1024U)
23 int acquire_data_fd(const void *data
, size_t size
, unsigned flags
) {
24 _cleanup_close_pair_
int pipefds
[2] = { -1, -1 };
25 char pattern
[] = "/dev/shm/data-fd-XXXXXX";
26 _cleanup_close_
int fd
= -1;
31 assert(data
|| size
== 0);
33 /* Acquire a read-only file descriptor that when read from returns the specified data. This is much more
34 * complex than I wish it was. But here's why:
36 * a) First we try to use memfds. They are the best option, as we can seal them nicely to make them
37 * read-only. Unfortunately they require kernel 3.17, and – at the time of writing – we still support 3.14.
39 * b) Then, we try classic pipes. They are the second best options, as we can close the writing side, retaining
40 * a nicely read-only fd in the reading side. However, they are by default quite small, and unprivileged
41 * clients can only bump their size to a system-wide limit, which might be quite low.
43 * c) Then, we try an O_TMPFILE file in /dev/shm (that dir is the only suitable one known to exist from
44 * earliest boot on). To make it read-only we open the fd a second time with O_RDONLY via
45 * /proc/self/<fd>. Unfortunately O_TMPFILE is not available on older kernels on tmpfs.
47 * d) Finally, we try creating a regular file in /dev/shm, which we then delete.
49 * It sucks a bit that depending on the situation we return very different objects here, but that's Linux I
52 if (size
== 0 && ((flags
& ACQUIRE_NO_DEV_NULL
) == 0))
53 /* As a special case, return /dev/null if we have been called for an empty data block */
54 return RET_NERRNO(open("/dev/null", O_RDONLY
|O_CLOEXEC
|O_NOCTTY
));
56 if ((flags
& ACQUIRE_NO_MEMFD
) == 0) {
57 fd
= memfd_new("data-fd");
61 n
= write(fd
, data
, size
);
64 if ((size_t) n
!= size
)
67 f
= lseek(fd
, 0, SEEK_SET
);
71 r
= memfd_set_sealed(fd
);
79 if ((flags
& ACQUIRE_NO_PIPE
) == 0) {
80 if (pipe2(pipefds
, O_CLOEXEC
|O_NONBLOCK
) < 0)
83 isz
= fcntl(pipefds
[1], F_GETPIPE_SZ
, 0);
87 if ((size_t) isz
< size
) {
89 if (isz
< 0 || (size_t) isz
!= size
)
92 /* Try to bump the pipe size */
93 (void) fcntl(pipefds
[1], F_SETPIPE_SZ
, isz
);
95 /* See if that worked */
96 isz
= fcntl(pipefds
[1], F_GETPIPE_SZ
, 0);
100 if ((size_t) isz
< size
)
104 n
= write(pipefds
[1], data
, size
);
107 if ((size_t) n
!= size
)
110 (void) fd_nonblock(pipefds
[0], false);
112 return TAKE_FD(pipefds
[0]);
116 if ((flags
& ACQUIRE_NO_TMPFILE
) == 0) {
117 fd
= open("/dev/shm", O_RDWR
|O_TMPFILE
|O_CLOEXEC
, 0500);
119 goto try_dev_shm_without_o_tmpfile
;
121 n
= write(fd
, data
, size
);
124 if ((size_t) n
!= size
)
127 /* Let's reopen the thing, in order to get an O_RDONLY fd for the original O_RDWR one */
128 return fd_reopen(fd
, O_RDONLY
|O_CLOEXEC
);
131 try_dev_shm_without_o_tmpfile
:
132 if ((flags
& ACQUIRE_NO_REGULAR
) == 0) {
133 fd
= mkostemp_safe(pattern
);
137 n
= write(fd
, data
, size
);
140 goto unlink_and_return
;
142 if ((size_t) n
!= size
) {
144 goto unlink_and_return
;
147 /* Let's reopen the thing, in order to get an O_RDONLY fd for the original O_RDWR one */
148 r
= open(pattern
, O_RDONLY
|O_CLOEXEC
);
153 (void) unlink(pattern
);
160 int copy_data_fd(int fd
) {
161 _cleanup_close_
int copy_fd
= -1, tmp_fd
= -1;
162 _cleanup_free_
void *remains
= NULL
;
163 size_t remains_size
= 0;
168 /* Creates a 'data' fd from the specified source fd, containing all the same data in a read-only fashion, but
169 * independent of it (i.e. the source fd can be closed and unmounted after this call succeeded). Tries to be
170 * somewhat smart about where to place the data. In the best case uses a memfd(). If memfd() are not supported
171 * uses a pipe instead. For larger data will use an unlinked file in /tmp, and for even larger data one in
174 if (fstat(fd
, &st
) < 0)
177 /* For now, let's only accept regular files, sockets, pipes and char devices */
178 if (S_ISDIR(st
.st_mode
))
180 if (S_ISLNK(st
.st_mode
))
182 if (!S_ISREG(st
.st_mode
) && !S_ISSOCK(st
.st_mode
) && !S_ISFIFO(st
.st_mode
) && !S_ISCHR(st
.st_mode
))
185 /* If we have reason to believe the data is bounded in size, then let's use memfds or pipes as backing fd. Note
186 * that we use the reported regular file size only as a hint, given that there are plenty special files in
187 * /proc and /sys which report a zero file size but can be read from. */
189 if (!S_ISREG(st
.st_mode
) || st
.st_size
< DATA_FD_MEMORY_LIMIT
) {
191 /* Try a memfd first */
192 copy_fd
= memfd_new("data-fd");
196 r
= copy_bytes(fd
, copy_fd
, DATA_FD_MEMORY_LIMIT
, 0);
200 f
= lseek(copy_fd
, 0, SEEK_SET
);
205 /* Did it fit into the limit? If so, we are done. */
206 r
= memfd_set_sealed(copy_fd
);
210 return TAKE_FD(copy_fd
);
213 /* Hmm, pity, this didn't fit. Let's fall back to /tmp then, see below */
216 _cleanup_(close_pairp
) int pipefds
[2] = { -1, -1 };
219 /* If memfds aren't available, use a pipe. Set O_NONBLOCK so that we will get EAGAIN rather
220 * then block indefinitely when we hit the pipe size limit */
222 if (pipe2(pipefds
, O_CLOEXEC
|O_NONBLOCK
) < 0)
225 isz
= fcntl(pipefds
[1], F_GETPIPE_SZ
, 0);
229 /* Try to enlarge the pipe size if necessary */
230 if ((size_t) isz
< DATA_FD_MEMORY_LIMIT
) {
232 (void) fcntl(pipefds
[1], F_SETPIPE_SZ
, DATA_FD_MEMORY_LIMIT
);
234 isz
= fcntl(pipefds
[1], F_GETPIPE_SZ
, 0);
239 if ((size_t) isz
>= DATA_FD_MEMORY_LIMIT
) {
241 r
= copy_bytes_full(fd
, pipefds
[1], DATA_FD_MEMORY_LIMIT
, 0, &remains
, &remains_size
, NULL
, NULL
);
242 if (r
< 0 && r
!= -EAGAIN
)
243 return r
; /* If we get EAGAIN it could be because of the source or because of
244 * the destination fd, we can't know, as sendfile() and friends won't
245 * tell us. Hence, treat this as reason to fall back, just to be
248 /* Everything fit in, yay! */
249 (void) fd_nonblock(pipefds
[0], false);
251 return TAKE_FD(pipefds
[0]);
254 /* Things didn't fit in. But we read data into the pipe, let's remember that, so that
255 * when writing the new file we incorporate this first. */
256 copy_fd
= TAKE_FD(pipefds
[0]);
261 /* If we have reason to believe this will fit fine in /tmp, then use that as first fallback. */
262 if ((!S_ISREG(st
.st_mode
) || st
.st_size
< DATA_FD_TMP_LIMIT
) &&
263 (DATA_FD_MEMORY_LIMIT
+ remains_size
) < DATA_FD_TMP_LIMIT
) {
266 tmp_fd
= open_tmpfile_unlinkable(NULL
/* NULL as directory means /tmp */, O_RDWR
|O_CLOEXEC
);
271 /* If we tried a memfd/pipe first and it ended up being too large, then copy this into the
272 * temporary file first. */
274 r
= copy_bytes(copy_fd
, tmp_fd
, UINT64_MAX
, 0);
281 if (remains_size
> 0) {
282 /* If there were remaining bytes (i.e. read into memory, but not written out yet) from the
283 * failed copy operation, let's flush them out next. */
285 r
= loop_write(tmp_fd
, remains
, remains_size
, false);
290 r
= copy_bytes(fd
, tmp_fd
, DATA_FD_TMP_LIMIT
- DATA_FD_MEMORY_LIMIT
- remains_size
, COPY_REFLINK
);
294 goto finish
; /* Yay, it fit in */
296 /* It didn't fit in. Let's not forget to use what we already used */
297 f
= lseek(tmp_fd
, 0, SEEK_SET
);
301 close_and_replace(copy_fd
, tmp_fd
);
303 remains
= mfree(remains
);
307 /* As last fallback use /var/tmp */
308 r
= var_tmp_dir(&td
);
312 tmp_fd
= open_tmpfile_unlinkable(td
, O_RDWR
|O_CLOEXEC
);
317 /* If we tried a memfd/pipe first, or a file in /tmp, and it ended up being too large, than copy this
318 * into the temporary file first. */
319 r
= copy_bytes(copy_fd
, tmp_fd
, UINT64_MAX
, COPY_REFLINK
);
326 if (remains_size
> 0) {
327 /* Then, copy in any read but not yet written bytes. */
328 r
= loop_write(tmp_fd
, remains
, remains_size
, false);
333 /* Copy in the rest */
334 r
= copy_bytes(fd
, tmp_fd
, UINT64_MAX
, COPY_REFLINK
);
341 /* Now convert the O_RDWR file descriptor into an O_RDONLY one (and as side effect seek to the beginning of the
344 return fd_reopen(tmp_fd
, O_RDONLY
|O_CLOEXEC
);