]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/io-util.c
Merge pull request #29116 from yuwata/network-sriov-debug
[thirdparty/systemd.git] / src / basic / io-util.c
1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3 #include <errno.h>
4 #include <limits.h>
5 #include <stdio.h>
6 #include <unistd.h>
7
8 #include "errno-util.h"
9 #include "io-util.h"
10 #include "string-util.h"
11 #include "time-util.h"
12
13 int flush_fd(int fd) {
14 int count = 0;
15
16 /* Read from the specified file descriptor, until POLLIN is not set anymore, throwing away everything
17 * read. Note that some file descriptors (notable IP sockets) will trigger POLLIN even when no data can be read
18 * (due to IP packet checksum mismatches), hence this function is only safe to be non-blocking if the fd used
19 * was set to non-blocking too. */
20
21 for (;;) {
22 char buf[LINE_MAX];
23 ssize_t l;
24 int r;
25
26 r = fd_wait_for_event(fd, POLLIN, 0);
27 if (r < 0) {
28 if (r == -EINTR)
29 continue;
30
31 return r;
32 }
33 if (r == 0)
34 return count;
35
36 l = read(fd, buf, sizeof(buf));
37 if (l < 0) {
38 if (errno == EINTR)
39 continue;
40
41 if (errno == EAGAIN)
42 return count;
43
44 return -errno;
45 } else if (l == 0)
46 return count;
47
48 count += (int) l;
49 }
50 }
51
52 ssize_t loop_read(int fd, void *buf, size_t nbytes, bool do_poll) {
53 uint8_t *p = ASSERT_PTR(buf);
54 ssize_t n = 0;
55
56 assert(fd >= 0);
57
58 /* If called with nbytes == 0, let's call read() at least once, to validate the operation */
59
60 if (nbytes > (size_t) SSIZE_MAX)
61 return -EINVAL;
62
63 do {
64 ssize_t k;
65
66 k = read(fd, p, nbytes);
67 if (k < 0) {
68 if (errno == EINTR)
69 continue;
70
71 if (errno == EAGAIN && do_poll) {
72
73 /* We knowingly ignore any return value here,
74 * and expect that any error/EOF is reported
75 * via read() */
76
77 (void) fd_wait_for_event(fd, POLLIN, USEC_INFINITY);
78 continue;
79 }
80
81 return n > 0 ? n : -errno;
82 }
83
84 if (k == 0)
85 return n;
86
87 assert((size_t) k <= nbytes);
88
89 p += k;
90 nbytes -= k;
91 n += k;
92 } while (nbytes > 0);
93
94 return n;
95 }
96
97 int loop_read_exact(int fd, void *buf, size_t nbytes, bool do_poll) {
98 ssize_t n;
99
100 n = loop_read(fd, buf, nbytes, do_poll);
101 if (n < 0)
102 return (int) n;
103 if ((size_t) n != nbytes)
104 return -EIO;
105
106 return 0;
107 }
108
109 int loop_write_full(int fd, const void *buf, size_t nbytes, usec_t timeout) {
110 const uint8_t *p;
111 usec_t end;
112 int r;
113
114 assert(fd >= 0);
115 assert(buf || nbytes == 0);
116
117 if (nbytes == 0) {
118 static const dummy_t dummy[0];
119 assert_cc(sizeof(dummy) == 0);
120 p = (const void*) dummy; /* Some valid pointer, in case NULL was specified */
121 } else {
122 if (nbytes == SIZE_MAX)
123 nbytes = strlen(buf);
124 else if (_unlikely_(nbytes > (size_t) SSIZE_MAX))
125 return -EINVAL;
126
127 p = buf;
128 }
129
130 /* When timeout is 0 or USEC_INFINITY this is not used. But we initialize it to a sensible value. */
131 end = timestamp_is_set(timeout) ? usec_add(now(CLOCK_MONOTONIC), timeout) : USEC_INFINITY;
132
133 do {
134 ssize_t k;
135
136 k = write(fd, p, nbytes);
137 if (k < 0) {
138 if (errno == EINTR)
139 continue;
140
141 if (errno != EAGAIN || timeout == 0)
142 return -errno;
143
144 usec_t wait_for;
145
146 if (timeout == USEC_INFINITY)
147 wait_for = USEC_INFINITY;
148 else {
149 usec_t t = now(CLOCK_MONOTONIC);
150 if (t >= end)
151 return -ETIME;
152
153 wait_for = usec_sub_unsigned(end, t);
154 }
155
156 r = fd_wait_for_event(fd, POLLOUT, wait_for);
157 if (timeout == USEC_INFINITY || ERRNO_IS_NEG_TRANSIENT(r))
158 /* If timeout == USEC_INFINITY we knowingly ignore any return value
159 * here, and expect that any error/EOF is reported via write() */
160 continue;
161 if (r < 0)
162 return r;
163 if (r == 0)
164 return -ETIME;
165 }
166
167 if (_unlikely_(nbytes > 0 && k == 0)) /* Can't really happen */
168 return -EIO;
169
170 assert((size_t) k <= nbytes);
171
172 p += k;
173 nbytes -= k;
174 } while (nbytes > 0);
175
176 return 0;
177 }
178
179 int pipe_eof(int fd) {
180 int r;
181
182 r = fd_wait_for_event(fd, POLLIN, 0);
183 if (r <= 0)
184 return r;
185
186 return !!(r & POLLHUP);
187 }
188
189 int ppoll_usec(struct pollfd *fds, size_t nfds, usec_t timeout) {
190 int r;
191
192 assert(fds || nfds == 0);
193
194 /* This is a wrapper around ppoll() that does primarily two things:
195 *
196 * ✅ Takes a usec_t instead of a struct timespec
197 *
198 * ✅ Guarantees that if an invalid fd is specified we return EBADF (i.e. converts POLLNVAL to
199 * EBADF). This is done because EBADF is a programming error usually, and hence should bubble up
200 * as error, and not be eaten up as non-error POLLNVAL event.
201 *
202 * ⚠️ ⚠️ ⚠️ Note that this function does not add any special handling for EINTR. Don't forget
203 * poll()/ppoll() will return with EINTR on any received signal always, there is no automatic
204 * restarting via SA_RESTART available. Thus, typically you want to handle EINTR not as an error,
205 * but just as reason to restart things, under the assumption you use a more appropriate mechanism
206 * to handle signals, such as signalfd() or signal handlers. ⚠️ ⚠️ ⚠️
207 */
208
209 if (nfds == 0)
210 return 0;
211
212 r = ppoll(fds, nfds, timeout == USEC_INFINITY ? NULL : TIMESPEC_STORE(timeout), NULL);
213 if (r < 0)
214 return -errno;
215 if (r == 0)
216 return 0;
217
218 for (size_t i = 0, n = r; i < nfds && n > 0; i++) {
219 if (fds[i].revents == 0)
220 continue;
221 if (fds[i].revents & POLLNVAL)
222 return -EBADF;
223 n--;
224 }
225
226 return r;
227 }
228
229 int fd_wait_for_event(int fd, int event, usec_t timeout) {
230 struct pollfd pollfd = {
231 .fd = fd,
232 .events = event,
233 };
234 int r;
235
236 /* ⚠️ ⚠️ ⚠️ Keep in mind you almost certainly want to handle -EINTR gracefully in the caller, see
237 * ppoll_usec() above! ⚠️ ⚠️ ⚠️ */
238
239 r = ppoll_usec(&pollfd, 1, timeout);
240 if (r <= 0)
241 return r;
242
243 return pollfd.revents;
244 }
245
246 static size_t nul_length(const uint8_t *p, size_t sz) {
247 size_t n = 0;
248
249 while (sz > 0) {
250 if (*p != 0)
251 break;
252
253 n++;
254 p++;
255 sz--;
256 }
257
258 return n;
259 }
260
261 ssize_t sparse_write(int fd, const void *p, size_t sz, size_t run_length) {
262 const uint8_t *q, *w, *e;
263 ssize_t l;
264
265 q = w = p;
266 e = q + sz;
267 while (q < e) {
268 size_t n;
269
270 n = nul_length(q, e - q);
271
272 /* If there are more than the specified run length of
273 * NUL bytes, or if this is the beginning or the end
274 * of the buffer, then seek instead of write */
275 if ((n > run_length) ||
276 (n > 0 && q == p) ||
277 (n > 0 && q + n >= e)) {
278 if (q > w) {
279 l = write(fd, w, q - w);
280 if (l < 0)
281 return -errno;
282 if (l != q -w)
283 return -EIO;
284 }
285
286 if (lseek(fd, n, SEEK_CUR) == (off_t) -1)
287 return -errno;
288
289 q += n;
290 w = q;
291 } else if (n > 0)
292 q += n;
293 else
294 q++;
295 }
296
297 if (q > w) {
298 l = write(fd, w, q - w);
299 if (l < 0)
300 return -errno;
301 if (l != q - w)
302 return -EIO;
303 }
304
305 return q - (const uint8_t*) p;
306 }
307
308 char* set_iovec_string_field(struct iovec *iovec, size_t *n_iovec, const char *field, const char *value) {
309 char *x;
310
311 x = strjoin(field, value);
312 if (x)
313 iovec[(*n_iovec)++] = IOVEC_MAKE_STRING(x);
314 return x;
315 }
316
317 char* set_iovec_string_field_free(struct iovec *iovec, size_t *n_iovec, const char *field, char *value) {
318 char *x;
319
320 x = set_iovec_string_field(iovec, n_iovec, field, value);
321 free(value);
322 return x;
323 }
324
325 struct iovec_wrapper *iovw_new(void) {
326 return malloc0(sizeof(struct iovec_wrapper));
327 }
328
329 void iovw_free_contents(struct iovec_wrapper *iovw, bool free_vectors) {
330 if (free_vectors)
331 for (size_t i = 0; i < iovw->count; i++)
332 free(iovw->iovec[i].iov_base);
333
334 iovw->iovec = mfree(iovw->iovec);
335 iovw->count = 0;
336 }
337
338 struct iovec_wrapper *iovw_free_free(struct iovec_wrapper *iovw) {
339 iovw_free_contents(iovw, true);
340
341 return mfree(iovw);
342 }
343
344 struct iovec_wrapper *iovw_free(struct iovec_wrapper *iovw) {
345 iovw_free_contents(iovw, false);
346
347 return mfree(iovw);
348 }
349
350 int iovw_put(struct iovec_wrapper *iovw, void *data, size_t len) {
351 if (iovw->count >= IOV_MAX)
352 return -E2BIG;
353
354 if (!GREEDY_REALLOC(iovw->iovec, iovw->count + 1))
355 return -ENOMEM;
356
357 iovw->iovec[iovw->count++] = IOVEC_MAKE(data, len);
358 return 0;
359 }
360
361 int iovw_put_string_field(struct iovec_wrapper *iovw, const char *field, const char *value) {
362 _cleanup_free_ char *x = NULL;
363 int r;
364
365 x = strjoin(field, value);
366 if (!x)
367 return -ENOMEM;
368
369 r = iovw_put(iovw, x, strlen(x));
370 if (r >= 0)
371 TAKE_PTR(x);
372
373 return r;
374 }
375
376 int iovw_put_string_field_free(struct iovec_wrapper *iovw, const char *field, char *value) {
377 _cleanup_free_ _unused_ char *free_ptr = value;
378
379 return iovw_put_string_field(iovw, field, value);
380 }
381
382 void iovw_rebase(struct iovec_wrapper *iovw, char *old, char *new) {
383 for (size_t i = 0; i < iovw->count; i++)
384 iovw->iovec[i].iov_base = (char *)iovw->iovec[i].iov_base - old + new;
385 }
386
387 size_t iovw_size(struct iovec_wrapper *iovw) {
388 size_t n = 0;
389
390 for (size_t i = 0; i < iovw->count; i++)
391 n += iovw->iovec[i].iov_len;
392
393 return n;
394 }
395
396 int iovw_append(struct iovec_wrapper *target, const struct iovec_wrapper *source) {
397 size_t original_count;
398 int r;
399
400 assert(target);
401
402 /* This duplicates the source and merges it into the target. */
403
404 if (iovw_isempty(source))
405 return 0;
406
407 original_count = target->count;
408
409 FOREACH_ARRAY(iovec, source->iovec, source->count) {
410 void *dup;
411
412 dup = memdup(iovec->iov_base, iovec->iov_len);
413 if (!dup) {
414 r = -ENOMEM;
415 goto rollback;
416 }
417
418 r = iovw_consume(target, dup, iovec->iov_len);
419 if (r < 0)
420 goto rollback;
421 }
422
423 return 0;
424
425 rollback:
426 for (size_t i = original_count; i < target->count; i++)
427 free(target->iovec[i].iov_base);
428
429 target->count = original_count;
430 return r;
431 }
432
433 void iovec_array_free(struct iovec *iov, size_t n) {
434 if (!iov)
435 return;
436
437 for (size_t i = 0; i < n; i++)
438 free(iov[i].iov_base);
439
440 free(iov);
441 }